diff --git a/.gitattributes b/.gitattributes
index 2b65f6fe3cc80..e2211a2af515e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,7 @@
 *.bat text eol=crlf
 *.cmd text eol=crlf
+*.java text eol=lf
+*.scala text eol=lf
+*.xml text eol=lf
+*.py text eol=lf
+*.R text eol=lf
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index a9f757c3e2413..d53119ad75599 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -4,6 +4,9 @@ on:
   push:
     branches:
     - master
+  pull_request:
+    branches:
+    - master
 
 jobs:
   build:
@@ -12,16 +15,105 @@ jobs:
     strategy:
       matrix:
         java: [ '1.8', '11' ]
-    name: Build Spark with JDK ${{ matrix.java }}
+        hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
+        hive: [ 'hive-1.2', 'hive-2.3' ]
+        exclude:
+        - java: '11'
+          hive: 'hive-1.2'
+        - hadoop: 'hadoop-3.2'
+          hive: 'hive-1.2'
+    name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }}
 
     steps:
     - uses: actions/checkout@master
+    # We split caches because GitHub Action Cache has a 400MB-size limit.
+    - uses: actions/cache@v1
+      with:
+        path: build
+        key: build-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          build-
+    - uses: actions/cache@v1
+      with:
+        path: ~/.m2/repository/com
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-
+    - uses: actions/cache@v1
+      with:
+        path: ~/.m2/repository/org
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-
+    - uses: actions/cache@v1
+      with:
+        path: ~/.m2/repository/net
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-
+    - uses: actions/cache@v1
+      with:
+        path: ~/.m2/repository/io
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-
     - name: Set up JDK ${{ matrix.java }}
       uses: actions/setup-java@v1
       with:
-        version: ${{ matrix.java }}
+        java-version: ${{ matrix.java }}
     - name: Build with Maven
       run: |
-        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
         export MAVEN_CLI_OPTS="--no-transfer-progress"
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-3.2 -Phadoop-cloud -Djava.version=${{ matrix.java }} package
+        mkdir -p ~/.m2
+        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install
+        rm -rf ~/.m2/repository/org/apache/spark
+
+
+  lint:
+    runs-on: ubuntu-latest
+    name: Linters (Java/Scala/Python), licenses, dependencies
+    steps:
+    - uses: actions/checkout@master
+    - uses: actions/setup-java@v1
+      with:
+        java-version: '11'
+    - uses: actions/setup-python@v1
+      with:
+        python-version: '3.x'
+        architecture: 'x64'
+    - name: Scala
+      run: ./dev/lint-scala
+    - name: Java
+      run: ./dev/lint-java
+    - name: Python
+      run: |
+        pip install flake8 sphinx numpy
+        ./dev/lint-python
+    - name: License
+      run: ./dev/check-license
+    - name: Dependencies
+      run: ./dev/test-dependencies.sh
+
+  lintr:
+    runs-on: ubuntu-latest
+    name: Linter (R)
+    steps:
+    - uses: actions/checkout@master
+    - uses: actions/setup-java@v1
+      with:
+        java-version: '11'
+    - name: install R
+      run: |
+        echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran35/' | sudo tee -a /etc/apt/sources.list
+        curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
+        sudo apt-get update
+        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+    - name: install R packages
+      run: |
+        sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
+        sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
+    - name: package and install SparkR
+      run: ./R/install-dev.sh
+    - name: lint-r
+      run: ./dev/lint-r
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 0000000000000..8e2f5bf3b0818
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,24 @@
+name: Close stale PRs
+
+on:
+  schedule:
+  - cron: "0 0 * * *"
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/stale@v1.1.0
+      with:
+        repo-token: ${{ secrets.GITHUB_TOKEN }}
+        stale-pr-message: >
+          We're closing this PR because it hasn't been updated in a while.
+          This isn't a judgement on the merit of the PR in any way. It's just
+          a way of keeping the PR queue manageable.
+
+          If you'd like to revive this PR, please reopen it and ask a
+          committer to remove the Stale tag!
+        days-before-stale: 100
+        # Setting this to 0 is the same as setting it to 1.
+        # See: https://github.com/actions/stale/issues/28
+        days-before-close: 0
diff --git a/.gitignore b/.gitignore
index ae20c85ebe351..798e8acc4d43b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,7 +45,7 @@ dev/create-release/*final
 dev/create-release/*txt
 dev/pr-deps/
 dist/
-docs/_site
+docs/_site/
 docs/api
 sql/docs
 sql/site
@@ -63,6 +63,7 @@ project/plugins/target/
 python/lib/pyspark.zip
 python/.eggs/
 python/deps
+python/docs/_site/
 python/test_coverage/coverage_data
 python/test_coverage/htmlcov
 python/pyspark/python
diff --git a/LICENSE b/LICENSE
index 150ccc54ec6c2..6b169b1447f14 100644
--- a/LICENSE
+++ b/LICENSE
@@ -216,6 +216,7 @@ core/src/main/resources/org/apache/spark/ui/static/bootstrap*
 core/src/main/resources/org/apache/spark/ui/static/jsonFormatter*
 core/src/main/resources/org/apache/spark/ui/static/vis*
 docs/js/vendor/bootstrap.js
+external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
 
 
 Python Software Foundation License
@@ -243,7 +244,7 @@ MIT License
 core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
 core/src/main/resources/org/apache/spark/ui/static/*dataTables*
 core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js
-ore/src/main/resources/org/apache/spark/ui/static/jquery*
+core/src/main/resources/org/apache/spark/ui/static/jquery*
 core/src/main/resources/org/apache/spark/ui/static/sorttable.js
 docs/js/vendor/anchor.min.js
 docs/js/vendor/jquery*
diff --git a/LICENSE-binary b/LICENSE-binary
index ba20eea118687..b50da6be4e697 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -218,13 +218,14 @@ javax.jdo:jdo-api
 joda-time:joda-time
 net.sf.opencsv:opencsv
 org.apache.derby:derby
+org.ehcache:ehcache
 org.objenesis:objenesis
 org.roaringbitmap:RoaringBitmap
 org.scalanlp:breeze-macros_2.12
 org.scalanlp:breeze_2.12
 org.typelevel:macro-compat_2.12
 org.yaml:snakeyaml
-org.apache.xbean:xbean-asm5-shaded
+org.apache.xbean:xbean-asm7-shaded
 com.squareup.okhttp3:logging-interceptor
 com.squareup.okhttp3:okhttp
 com.squareup.okio:okio
@@ -242,10 +243,10 @@ com.vlkan:flatbuffers
 com.ning:compress-lzf
 io.airlift:aircompressor
 io.dropwizard.metrics:metrics-core
-io.dropwizard.metrics:metrics-ganglia
 io.dropwizard.metrics:metrics-graphite
 io.dropwizard.metrics:metrics-json
 io.dropwizard.metrics:metrics-jvm
+io.dropwizard.metrics:metrics-jmx
 org.iq80.snappy:snappy
 com.clearspring.analytics:stream
 com.jamesmurty.utils:java-xmlbuilder
@@ -253,12 +254,14 @@ commons-codec:commons-codec
 commons-collections:commons-collections
 io.fabric8:kubernetes-client
 io.fabric8:kubernetes-model
+io.fabric8:kubernetes-model-common
 io.netty:netty-all
 net.hydromatic:eigenbase-properties
 net.sf.supercsv:super-csv
 org.apache.arrow:arrow-format
 org.apache.arrow:arrow-memory
 org.apache.arrow:arrow-vector
+org.apache.commons:commons-configuration2
 org.apache.commons:commons-crypto
 org.apache.commons:commons-lang3
 org.apache.hadoop:hadoop-annotations
@@ -266,6 +269,7 @@ org.apache.hadoop:hadoop-auth
 org.apache.hadoop:hadoop-client
 org.apache.hadoop:hadoop-common
 org.apache.hadoop:hadoop-hdfs
+org.apache.hadoop:hadoop-hdfs-client
 org.apache.hadoop:hadoop-mapreduce-client-app
 org.apache.hadoop:hadoop-mapreduce-client-common
 org.apache.hadoop:hadoop-mapreduce-client-core
@@ -278,6 +282,21 @@ org.apache.hadoop:hadoop-yarn-server-common
 org.apache.hadoop:hadoop-yarn-server-web-proxy
 org.apache.httpcomponents:httpclient
 org.apache.httpcomponents:httpcore
+org.apache.kerby:kerb-admin
+org.apache.kerby:kerb-client
+org.apache.kerby:kerb-common
+org.apache.kerby:kerb-core
+org.apache.kerby:kerb-crypto
+org.apache.kerby:kerb-identity
+org.apache.kerby:kerb-server
+org.apache.kerby:kerb-simplekdc
+org.apache.kerby:kerb-util
+org.apache.kerby:kerby-asn1
+org.apache.kerby:kerby-config
+org.apache.kerby:kerby-pkix
+org.apache.kerby:kerby-util
+org.apache.kerby:kerby-xdr
+org.apache.kerby:token-provider
 org.apache.orc:orc-core
 org.apache.orc:orc-mapreduce
 org.mortbay.jetty:jetty
@@ -292,16 +311,24 @@ com.fasterxml.jackson.core:jackson-annotations
 com.fasterxml.jackson.core:jackson-core
 com.fasterxml.jackson.core:jackson-databind
 com.fasterxml.jackson.dataformat:jackson-dataformat-yaml
+com.fasterxml.jackson.jaxrs:jackson-jaxrs-base
+com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider
 com.fasterxml.jackson.module:jackson-module-jaxb-annotations
 com.fasterxml.jackson.module:jackson-module-paranamer
 com.fasterxml.jackson.module:jackson-module-scala_2.12
+com.fasterxml.woodstox:woodstox-core
 com.github.mifmif:generex
+com.github.stephenc.jcip:jcip-annotations
 com.google.code.findbugs:jsr305
 com.google.code.gson:gson
+com.google.flatbuffers:flatbuffers-java
+com.google.guava:guava
 com.google.inject:guice
 com.google.inject.extensions:guice-servlet
+com.nimbusds:nimbus-jose-jwt
 com.twitter:parquet-hadoop-bundle
 commons-cli:commons-cli
+commons-daemon:commons-daemon
 commons-dbcp:commons-dbcp
 commons-io:commons-io
 commons-lang:commons-lang
@@ -313,6 +340,8 @@ javax.inject:javax.inject
 javax.validation:validation-api
 log4j:apache-log4j-extras
 log4j:log4j
+net.minidev:accessors-smart
+net.minidev:json-smart
 net.sf.jpam:jpam
 org.apache.avro:avro
 org.apache.avro:avro-ipc
@@ -328,6 +357,7 @@ org.apache.directory.server:apacheds-i18n
 org.apache.directory.server:apacheds-kerberos-codec
 org.apache.htrace:htrace-core
 org.apache.ivy:ivy
+org.apache.geronimo.specs:geronimo-jcache_1.0_spec
 org.apache.mesos:mesos
 org.apache.parquet:parquet-column
 org.apache.parquet:parquet-common
@@ -343,11 +373,6 @@ org.datanucleus:datanucleus-api-jdo
 org.datanucleus:datanucleus-core
 org.datanucleus:datanucleus-rdbms
 org.lz4:lz4-java
-org.spark-project.hive:hive-beeline
-org.spark-project.hive:hive-cli
-org.spark-project.hive:hive-exec
-org.spark-project.hive:hive-jdbc
-org.spark-project.hive:hive-metastore
 org.xerial.snappy:snappy-java
 stax:stax-api
 xerces:xercesImpl
@@ -368,7 +393,27 @@ org.eclipse.jetty:jetty-util
 org.eclipse.jetty:jetty-webapp
 org.eclipse.jetty:jetty-xml
 org.scala-lang.modules:scala-xml_2.12
-org.opencypher:okapi-shade
+com.github.joshelser:dropwizard-metrics-hadoop-metrics2-reporter
+com.zaxxer.HikariCP
+org.apache.hive:hive-beeline
+org.apache.hive:hive-cli
+org.apache.hive:hive-common
+org.apache.hive:hive-exec
+org.apache.hive:hive-jdbc
+org.apache.hive:hive-llap-common
+org.apache.hive:hive-metastore
+org.apache.hive:hive-serde
+org.apache.hive:hive-service-rpc
+org.apache.hive:hive-shims-0.23
+org.apache.hive:hive-shims
+org.apache.hive:hive-common
+org.apache.hive:hive-shims-scheduler
+org.apache.hive:hive-storage-api
+org.apache.hive:hive-vector-code-gen
+org.datanucleus:javax.jdo
+com.tdunning:json
+org.apache.velocity:velocity
+org.apache.yetus:audience-annotations
 
 core/src/main/java/org/apache/spark/util/collection/TimSort.java
 core/src/main/resources/org/apache/spark/ui/static/bootstrap*
@@ -387,6 +432,7 @@ BSD 2-Clause
 ------------
 
 com.github.luben:zstd-jni
+dnsjava:dnsjava
 javolution:javolution
 com.esotericsoftware:kryo-shaded
 com.esotericsoftware:minlog
@@ -394,8 +440,11 @@ com.esotericsoftware:reflectasm
 com.google.protobuf:protobuf-java
 org.codehaus.janino:commons-compiler
 org.codehaus.janino:janino
+org.codehaus.woodstox:stax2-api
 jline:jline
 org.jodd:jodd-core
+com.github.wendykierp:JTransforms
+pl.edu.icm:JLargeArrays
 
 
 BSD 3-Clause
@@ -408,6 +457,7 @@ org.antlr:stringtemplate
 org.antlr:antlr4-runtime
 antlr:antlr
 com.github.fommil.netlib:core
+com.google.re2j:re2j
 com.thoughtworks.paranamer:paranamer
 org.scala-lang:scala-compiler
 org.scala-lang:scala-library
@@ -419,6 +469,7 @@ xmlenc:xmlenc
 net.sf.py4j:py4j
 org.jpmml:pmml-model
 org.jpmml:pmml-schema
+org.threeten:threeten-extra
 
 python/lib/py4j-*-src.zip
 python/pyspark/cloudpickle.py
@@ -433,8 +484,13 @@ is distributed under the 3-Clause BSD license.
 MIT License
 -----------
 
-org.spire-math:spire-macros_2.12
-org.spire-math:spire_2.12
+com.microsoft.sqlserver:mssql-jdbc
+org.typelevel:spire_2.12
+org.typelevel:spire-macros_2.12
+org.typelevel:spire-platform_2.12
+org.typelevel:spire-util_2.12
+org.typelevel:algebra_2.12:jar
+org.typelevel:cats-kernel_2.12
 org.typelevel:machinist_2.12
 net.razorvine:pyrolite
 org.slf4j:jcl-over-slf4j
@@ -446,7 +502,7 @@ com.github.scopt:scopt_2.12
 core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
 core/src/main/resources/org/apache/spark/ui/static/*dataTables*
 core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js
-ore/src/main/resources/org/apache/spark/ui/static/jquery*
+core/src/main/resources/org/apache/spark/ui/static/jquery*
 core/src/main/resources/org/apache/spark/ui/static/sorttable.js
 docs/js/vendor/anchor.min.js
 docs/js/vendor/jquery*
@@ -458,6 +514,7 @@ Common Development and Distribution License (CDDL) 1.0
 
 javax.activation:activation  http://www.oracle.com/technetwork/java/javase/tech/index-jsp-138795.html
 javax.xml.stream:stax-api    https://jcp.org/en/jsr/detail?id=173
+javax.transaction:javax.transaction-api
 
 
 Common Development and Distribution License (CDDL) 1.1
@@ -465,6 +522,7 @@ Common Development and Distribution License (CDDL) 1.1
 
 javax.el:javax.el-api	https://javaee.github.io/uel-ri/
 javax.servlet:javax.servlet-api   https://javaee.github.io/servlet-spec/
+javax.servlet.jsp:jsp-api
 javax.transaction:jta http://www.oracle.com/technetwork/java/index.html
 javax.xml.bind:jaxb-api    https://github.com/javaee/jaxb-v2
 org.glassfish.hk2:hk2-api https://github.com/javaee/glassfish
@@ -486,6 +544,7 @@ Eclipse Distribution License (EDL) 1.0
 --------------------------------------
 
 org.glassfish.jaxb:jaxb-runtime
+jakarta.activation:jakarta.activation-api
 jakarta.xml.bind:jakarta.xml.bind-api
 com.sun.istack:istack-commons-runtime
 
@@ -495,11 +554,7 @@ Eclipse Public License (EPL) 2.0
 
 jakarta.annotation:jakarta-annotation-api https://projects.eclipse.org/projects/ee4j.ca
 jakarta.ws.rs:jakarta.ws.rs-api https://github.com/eclipse-ee4j/jaxrs-api
-
-Mozilla Public License (MPL) 1.1
---------------------------------
-
-com.github.rwl:jtransforms https://sourceforge.net/projects/jtransforms/
+org.glassfish.hk2.external:jakarta.inject
 
 
 Python Software Foundation License
diff --git a/NOTICE b/NOTICE
index fefe08b38afc5..d5ea8dedb311b 100644
--- a/NOTICE
+++ b/NOTICE
@@ -26,3 +26,16 @@ The following provides more details on the included cryptographic software:
 This software uses Apache Commons Crypto (https://commons.apache.org/proper/commons-crypto/) to
 support authentication, and encryption and decryption of data sent across the network between
 services.
+
+
+Metrics
+Copyright 2010-2013 Coda Hale and Yammer, Inc.
+
+This product includes software developed by Coda Hale and Yammer, Inc.
+
+This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64,
+LongAdder), which was released with the following comments:
+
+    Written by Doug Lea with assistance from members of JCP JSR-166
+    Expert Group and released to the public domain, as explained at
+    http://creativecommons.org/publicdomain/zero/1.0/
\ No newline at end of file
diff --git a/NOTICE-binary b/NOTICE-binary
index f93e088a9a731..4ce8bf2f86b2a 100644
--- a/NOTICE-binary
+++ b/NOTICE-binary
@@ -65,8 +65,8 @@ Copyright 2009-2014 The Apache Software Foundation
 Objenesis
 Copyright 2006-2013 Joe Walnes, Henri Tremblay, Leonardo Mesquita
 
-Apache XBean :: ASM 5 shaded (repackaged)
-Copyright 2005-2015 The Apache Software Foundation
+Apache XBean :: ASM shaded (repackaged)
+Copyright 2005-2019 The Apache Software Foundation
 
 --------------------------------------
 
@@ -661,6 +661,9 @@ Copyright 2017 The Apache Software Foundation
 Apache Commons CLI
 Copyright 2001-2009 The Apache Software Foundation
 
+Apache Commons Daemon
+Copyright 1999-2019 The Apache Software Foundation
+
 Google Guice - Extensions - Servlet
 Copyright 2006-2011 Google, Inc.
 
@@ -1135,4 +1138,393 @@ Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
-limitations under the License.
\ No newline at end of file
+limitations under the License.
+
+dropwizard-metrics-hadoop-metrics2-reporter
+Copyright 2016 Josh Elser
+
+Hive Beeline
+Copyright 2019 The Apache Software Foundation
+
+Hive CLI
+Copyright 2019 The Apache Software Foundation
+
+Hive Common
+Copyright 2019 The Apache Software Foundation
+
+Hive JDBC
+Copyright 2019 The Apache Software Foundation
+
+Hive Query Language
+Copyright 2019 The Apache Software Foundation
+
+Hive Llap Common
+Copyright 2019 The Apache Software Foundation
+
+Hive Metastore
+Copyright 2019 The Apache Software Foundation
+
+Hive Serde
+Copyright 2019 The Apache Software Foundation
+
+Hive Service RPC
+Copyright 2019 The Apache Software Foundation
+
+Hive Shims
+Copyright 2019 The Apache Software Foundation
+
+Hive Shims 0.23
+Copyright 2019 The Apache Software Foundation
+
+Hive Shims Common
+Copyright 2019 The Apache Software Foundation
+
+Hive Shims Scheduler
+Copyright 2019 The Apache Software Foundation
+
+Hive Storage API
+Copyright 2018 The Apache Software Foundation
+
+Hive Vector-Code-Gen Utilities
+Copyright 2019 The Apache Software Foundation
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2015-2015 DataNucleus
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+Android JSON library
+Copyright (C) 2010 The Android Open Source Project
+
+This product includes software developed by
+The Android Open Source Project
+
+Apache Velocity
+
+Copyright (C) 2000-2007 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+Apache Yetus - Audience Annotations
+Copyright 2015-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+Ehcache V3
+Copyright 2014-2016 Terracotta, Inc.
+
+The product includes software from the Apache Commons Lang project,
+under the Apache License 2.0 (see: org.ehcache.impl.internal.classes.commonslang)
+
+Apache Geronimo JCache Spec 1.0
+Copyright 2003-2014 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Admin
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Client
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Common
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb core
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Crypto
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Identity
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Server
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerb Simple Kdc
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Util
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby ASN1 Project
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby Config
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby PKIX Project
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby Util
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby XDR Project
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Token provider
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Metrics
+Copyright 2010-2013 Coda Hale and Yammer, Inc.
+
+This product includes software developed by Coda Hale and Yammer, Inc.
+
+This product includes code derived from the JSR-166 project (ThreadLocalRandom, Striped64,
+LongAdder), which was released with the following comments:
+
+    Written by Doug Lea with assistance from members of JCP JSR-166
+    Expert Group and released to the public domain, as explained at
+    http://creativecommons.org/publicdomain/zero/1.0/
\ No newline at end of file
diff --git a/R/check-cran.sh b/R/check-cran.sh
index 22cc9c6b601fc..22c8f423cfd12 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -65,6 +65,10 @@ fi
 
 echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
 
+# Remove this environment variable to allow to check suggested packages once
+# Jenkins installs arrow. See SPARK-29339.
+export _R_CHECK_FORCE_SUGGESTS_=FALSE
+
 if [ -n "$NO_TESTS" ] && [ -n "$NO_MANUAL" ]
 then
   "$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
diff --git a/R/pkg/.lintr b/R/pkg/.lintr
index c83ad2adfe0ef..67dc1218ea551 100644
--- a/R/pkg/.lintr
+++ b/R/pkg/.lintr
@@ -1,2 +1,2 @@
-linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, object_name_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
+linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, object_name_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE), object_usage_linter = NULL, cyclocomp_linter = NULL)
 exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index f4780862099d3..c8cb1c3a992ad 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -22,7 +22,8 @@ Suggests:
     rmarkdown,
     testthat,
     e1071,
-    survival
+    survival,
+    arrow
 Collate:
     'schema.R'
     'generics.R'
@@ -61,3 +62,4 @@ Collate:
 RoxygenNote: 5.0.1
 VignetteBuilder: knitr
 NeedsCompilation: no
+Encoding: UTF-8
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index f9d9494ca6fa1..7ed2e36d59531 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -335,6 +335,7 @@ exportMethods("%<=>%",
               "ntile",
               "otherwise",
               "over",
+              "overlay",
               "percent_rank",
               "pmod",
               "posexplode",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 6f3c7c120ba3c..593d3ca16220d 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2252,7 +2252,7 @@ setMethod("mutate",
 
             # The last column of the same name in the specific columns takes effect
             deDupCols <- list()
-            for (i in 1:length(cols)) {
+            for (i in seq_len(length(cols))) {
               deDupCols[[ns[[i]]]] <- alias(cols[[i]], ns[[i]])
             }
 
@@ -2416,7 +2416,7 @@ setMethod("arrange",
             # builds a list of columns of type Column
             # example: [[1]] Column Species ASC
             #          [[2]] Column Petal_Length DESC
-            jcols <- lapply(seq_len(length(decreasing)), function(i){
+            jcols <- lapply(seq_len(length(decreasing)), function(i) {
               if (decreasing[[i]]) {
                 desc(getColumn(x, by[[i]]))
               } else {
@@ -2749,7 +2749,7 @@ genAliasesForIntersectedCols <- function(x, intersectedColNames, suffix) {
     col <- getColumn(x, colName)
     if (colName %in% intersectedColNames) {
       newJoin <- paste(colName, suffix, sep = "")
-      if (newJoin %in% allColNames){
+      if (newJoin %in% allColNames) {
         stop("The following column name: ", newJoin, " occurs more than once in the 'DataFrame'.",
           "Please use different suffixes for the intersected columns.")
       }
@@ -3475,7 +3475,7 @@ setMethod("str",
             cat(paste0("'", class(object), "': ", length(names), " variables:\n"))
 
             if (nrow(localDF) > 0) {
-              for (i in 1 : ncol(localDF)) {
+              for (i in seq_len(ncol(localDF))) {
                 # Get the first elements for each column
 
                 firstElements <- if (types[i] == "character") {
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 43ea27b359a9c..c6842912706af 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -148,19 +148,7 @@ getDefaultSqlSource <- function() {
 }
 
 writeToFileInArrow <- function(fileName, rdf, numPartitions) {
-  requireNamespace1 <- requireNamespace
-
-  # R API in Arrow is not yet released in CRAN. CRAN requires to add the
-  # package in requireNamespace at DESCRIPTION. Later, CRAN checks if the package is available
-  # or not. Therefore, it works around by avoiding direct requireNamespace.
-  # Currently, as of Arrow 0.12.0, it can be installed by install_github. See ARROW-3204.
-  if (requireNamespace1("arrow", quietly = TRUE)) {
-    record_batch <- get("record_batch", envir = asNamespace("arrow"), inherits = FALSE)
-    RecordBatchStreamWriter <- get(
-      "RecordBatchStreamWriter", envir = asNamespace("arrow"), inherits = FALSE)
-    FileOutputStream <- get(
-      "FileOutputStream", envir = asNamespace("arrow"), inherits = FALSE)
-
+  if (requireNamespace("arrow", quietly = TRUE)) {
     numPartitions <- if (!is.null(numPartitions)) {
       numToInt(numPartitions)
     } else {
@@ -176,11 +164,11 @@ writeToFileInArrow <- function(fileName, rdf, numPartitions) {
     stream_writer <- NULL
     tryCatch({
       for (rdf_slice in rdf_slices) {
-        batch <- record_batch(rdf_slice)
+        batch <- arrow::record_batch(rdf_slice)
         if (is.null(stream_writer)) {
-          stream <- FileOutputStream(fileName)
+          stream <- arrow::FileOutputStream$create(fileName)
           schema <- batch$schema
-          stream_writer <- RecordBatchStreamWriter(stream, schema)
+          stream_writer <- arrow::RecordBatchStreamWriter$create(stream, schema)
         }
 
         stream_writer$write_batch(batch)
@@ -209,7 +197,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) {
       as.list(schema)
     }
     if (is.null(names)) {
-      names <- lapply(1:length(firstRow), function(x) {
+      names <- lapply(seq_len(length(firstRow)), function(x) {
         paste0("_", as.character(x))
       })
     }
@@ -225,7 +213,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) {
     })
 
     types <- lapply(firstRow, infer_type)
-    fields <- lapply(1:length(firstRow), function(i) {
+    fields <- lapply(seq_len(length(firstRow)), function(i) {
       structField(names[[i]], types[[i]], TRUE)
     })
     schema <- do.call(structType, fields)
@@ -568,7 +556,6 @@ tableToDF <- function(tableName) {
 #' stringSchema <- "name STRING, info MAP<STRING, DOUBLE>"
 #' df4 <- read.df(mapTypeJsonPath, "json", stringSchema, multiLine = TRUE)
 #' }
-#' @name read.df
 #' @note read.df since 1.4.0
 read.df <- function(path = NULL, source = NULL, schema = NULL, na.strings = "NA", ...) {
   if (!is.null(path) && !is.character(path)) {
@@ -699,7 +686,6 @@ read.jdbc <- function(url, tableName,
 #' stringSchema <- "name STRING, info MAP<STRING, DOUBLE>"
 #' df1 <- read.stream("json", path = jsonDir, schema = stringSchema, maxFilesPerTrigger = 1)
 #' }
-#' @name read.stream
 #' @note read.stream since 2.2.0
 #' @note experimental
 read.stream <- function(source = NULL, schema = NULL, ...) {
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 51ae2d2954a9a..d96a287f818a2 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -301,7 +301,7 @@ broadcastRDD <- function(sc, object) {
 #' Set the checkpoint directory
 #'
 #' Set the directory under which RDDs are going to be checkpointed. The
-#' directory must be a HDFS path if running on a cluster.
+#' directory must be an HDFS path if running on a cluster.
 #'
 #' @param sc Spark Context to use
 #' @param dirName Directory path
@@ -416,7 +416,7 @@ spark.getSparkFiles <- function(fileName) {
 #' @examples
 #'\dontrun{
 #' sparkR.session()
-#' doubled <- spark.lapply(1:10, function(x){2 * x})
+#' doubled <- spark.lapply(1:10, function(x) {2 * x})
 #'}
 #' @note spark.lapply since 2.0.0
 spark.lapply <- function(list, func) {
@@ -446,7 +446,7 @@ setLogLevel <- function(level) {
 #' Set checkpoint directory
 #'
 #' Set the directory under which SparkDataFrame are going to be checkpointed. The directory must be
-#' a HDFS path if running on a cluster.
+#' an HDFS path if running on a cluster.
 #'
 #' @rdname setCheckpointDir
 #' @param directory Directory path to checkpoint to
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index b38d245a0cca7..ca4a6e342d772 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -232,11 +232,7 @@ readMultipleObjectsWithKeys <- function(inputCon) {
 }
 
 readDeserializeInArrow <- function(inputCon) {
-  # This is a hack to avoid CRAN check. Arrow is not uploaded into CRAN now. See ARROW-3204.
-  requireNamespace1 <- requireNamespace
-  if (requireNamespace1("arrow", quietly = TRUE)) {
-    RecordBatchStreamReader <- get(
-      "RecordBatchStreamReader", envir = asNamespace("arrow"), inherits = FALSE)
+  if (requireNamespace("arrow", quietly = TRUE)) {
     # Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
     useAsTibble <- exists("as_tibble", envir = asNamespace("arrow"))
 
@@ -246,7 +242,7 @@ readDeserializeInArrow <- function(inputCon) {
     # for now.
     dataLen <- readInt(inputCon)
     arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big")
-    batches <- RecordBatchStreamReader(arrowData)$batches()
+    batches <- arrow::RecordBatchStreamReader$create(arrowData)$batches()
 
     if (useAsTibble) {
       as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index eecb84572a30b..48f69d5769620 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -136,6 +136,14 @@ NULL
 #'           format to. See 'Details'.
 #'      }
 #' @param y Column to compute on.
+#' @param pos In \itemize{
+#'                \item \code{locate}: a start position of search.
+#'                \item \code{overlay}: a start postiton for replacement.
+#'                }
+#' @param len In \itemize{
+#'               \item \code{lpad} the maximum length of each output result.
+#'               \item \code{overlay} a number of bytes to replace.
+#'               }
 #' @param ... additional Columns.
 #' @name column_string_functions
 #' @rdname column_string_functions
@@ -879,8 +887,8 @@ setMethod("factorial",
 #'
 #' The function by default returns the first values it sees. It will return the first non-missing
 #' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
-#' Note: the function is non-deterministic because its results depends on order of rows which
-#' may be non-deterministic after a shuffle.
+#' Note: the function is non-deterministic because its results depends on the order of the rows
+#' which may be non-deterministic after a shuffle.
 #'
 #' @param na.rm a logical value indicating whether NA values should be stripped
 #'        before the computation proceeds.
@@ -1024,8 +1032,8 @@ setMethod("kurtosis",
 #'
 #' The function by default returns the last values it sees. It will return the last non-missing
 #' value it sees when na.rm is set to true. If all values are missing, then NA is returned.
-#' Note: the function is non-deterministic because its results depends on order of rows which
-#' may be non-deterministic after a shuffle.
+#' Note: the function is non-deterministic because its results depends on the order of the rows
+#' which may be non-deterministic after a shuffle.
 #'
 #' @param x column to compute on.
 #' @param na.rm a logical value indicating whether NA values should be stripped
@@ -1319,6 +1327,35 @@ setMethod("negate",
             column(jc)
           })
 
+#' @details
+#' \code{overlay}: Overlay the specified portion of \code{x} with \code{replace},
+#' starting from byte position \code{pos} of \code{src} and proceeding for
+#' \code{len} bytes.
+#'
+#' @param replace a Column with replacement.
+#'
+#' @rdname column_string_functions
+#' @aliases overlay overlay,Column-method,numericOrColumn-method
+#' @note overlay since 3.0.0
+setMethod("overlay",
+  signature(x = "Column", replace = "Column", pos = "numericOrColumn"),
+  function(x, replace, pos, len = -1) {
+    if (is.numeric(pos)) {
+      pos <- lit(as.integer(pos))
+    }
+
+    if (is.numeric(len)) {
+      len <- lit(as.integer(len))
+    }
+
+    jc <- callJStatic(
+      "org.apache.spark.sql.functions", "overlay",
+      x@jc, replace@jc, pos@jc, len@jc
+    )
+
+    column(jc)
+  })
+
 #' @details
 #' \code{quarter}: Extracts the quarter as an integer from a given date/timestamp/string.
 #'
@@ -2459,7 +2496,6 @@ setMethod("schema_of_csv", signature(x = "characterOrColumn"),
 #' @note from_utc_timestamp since 1.5.0
 setMethod("from_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
-            .Deprecated(msg = "from_utc_timestamp is deprecated. See SPARK-25496.")
             jc <- callJStatic("org.apache.spark.sql.functions", "from_utc_timestamp", y@jc, x)
             column(jc)
           })
@@ -2518,7 +2554,6 @@ setMethod("next_day", signature(y = "Column", x = "character"),
 #' @note to_utc_timestamp since 1.5.0
 setMethod("to_utc_timestamp", signature(y = "Column", x = "character"),
           function(y, x) {
-            .Deprecated(msg = "to_utc_timestamp is deprecated. See SPARK-25496.")
             jc <- callJStatic("org.apache.spark.sql.functions", "to_utc_timestamp", y@jc, x)
             column(jc)
           })
@@ -2819,7 +2854,6 @@ setMethod("window", signature(x = "Column"),
 #'
 #' @param substr a character string to be matched.
 #' @param str a Column where matches are sought for each entry.
-#' @param pos start position of search.
 #' @rdname column_string_functions
 #' @aliases locate locate,character,Column-method
 #' @note locate since 1.5.0
@@ -2834,7 +2868,6 @@ setMethod("locate", signature(substr = "character", str = "Column"),
 #' @details
 #' \code{lpad}: Left-padded with pad to a length of len.
 #'
-#' @param len maximum length of each output result.
 #' @param pad a character string to be padded with.
 #' @rdname column_string_functions
 #' @aliases lpad lpad,Column,numeric,character-method
@@ -3617,11 +3650,11 @@ setMethod("size",
 
 #' @details
 #' \code{slice}: Returns an array containing all the elements in x from the index start
-#' (or starting from the end if start is negative) with the specified length.
+#' (array indices start at 1, or from the end if start is negative) with the specified length.
 #'
 #' @rdname column_collection_functions
-#' @param start an index indicating the first element occurring in the result.
-#' @param length a number of consecutive elements chosen to the result.
+#' @param start the starting index
+#' @param length the length of the slice
 #' @aliases slice slice,Column-method
 #' @note slice since 2.4.0
 setMethod("slice",
@@ -3706,7 +3739,7 @@ setMethod("create_map",
 #' @details
 #' \code{collect_list}: Creates a list of objects with duplicates.
 #' Note: the function is non-deterministic because the order of collected results depends
-#' on order of rows which may be non-deterministic after a shuffle.
+#' on the order of the rows which may be non-deterministic after a shuffle.
 #'
 #' @rdname column_aggregate_functions
 #' @aliases collect_list collect_list,Column-method
@@ -3727,7 +3760,7 @@ setMethod("collect_list",
 #' @details
 #' \code{collect_set}: Creates a list of objects with duplicate elements eliminated.
 #' Note: the function is non-deterministic because the order of collected results depends
-#' on order of rows which may be non-deterministic after a shuffle.
+#' on the order of the rows which may be non-deterministic after a shuffle.
 #'
 #' @rdname column_aggregate_functions
 #' @aliases collect_set collect_set,Column-method
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index f849dd172247c..4134d5cecc888 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1149,6 +1149,10 @@ setGeneric("ntile", function(x) { standardGeneric("ntile") })
 #' @name NULL
 setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
 
+#' @rdname column_string_functions
+#' @name NULL
+setGeneric("overlay", function(x, replace, pos, ...) { standardGeneric("overlay") })
+
 #' @rdname column_window_functions
 #' @name NULL
 setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_rank") })
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 6e8f4dc3a7907..2b7995e1e37f6 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -162,7 +162,7 @@ methods <- c("avg", "max", "mean", "min", "sum")
 #' @note pivot since 2.0.0
 setMethod("pivot",
           signature(x = "GroupedData", colname = "character"),
-          function(x, colname, values = list()){
+          function(x, colname, values = list()) {
             stopifnot(length(colname) == 1)
             if (length(values) == 0) {
               result <- callJMethod(x@sgd, "pivot", colname)
diff --git a/R/pkg/R/mllib_recommendation.R b/R/pkg/R/mllib_recommendation.R
index 9a77b07462585..d238ff93ed245 100644
--- a/R/pkg/R/mllib_recommendation.R
+++ b/R/pkg/R/mllib_recommendation.R
@@ -82,6 +82,12 @@ setClass("ALSModel", representation(jobj = "jobj"))
 #' statsS <- summary(modelS)
 #' }
 #' @note spark.als since 2.1.0
+#' @note the input rating dataframe to the ALS implementation should be deterministic.
+#'       Nondeterministic data can cause failure during fitting ALS model. For example,
+#'       an order-sensitive operation like sampling after a repartition makes dataframe output
+#'       nondeterministic, like \code{sample(repartition(df, 2L), FALSE, 0.5, 1618L)}.
+#'       Checkpointing sampled dataframe or adding a sort before sampling can help make the
+#'       dataframe deterministic.
 setMethod("spark.als", signature(data = "SparkDataFrame"),
           function(data, ratingCol = "rating", userCol = "user", itemCol = "item",
                    rank = 10, regParam = 0.1, maxIter = 10, nonnegative = FALSE,
diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R
index ff16b436217dc..f6aa48f5fa04a 100644
--- a/R/pkg/R/mllib_tree.R
+++ b/R/pkg/R/mllib_tree.R
@@ -393,6 +393,7 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara
 #'                                         "error" (throw an error), "keep" (put invalid data in
 #'                                         a special additional bucket, at index numLabels). Default
 #'                                         is "error".
+#' @param bootstrap Whether bootstrap samples are used when building trees.
 #' @param ... additional arguments passed to the method.
 #' @aliases spark.randomForest,SparkDataFrame,formula-method
 #' @return \code{spark.randomForest} returns a fitted Random Forest model.
@@ -428,7 +429,8 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
                    featureSubsetStrategy = "auto", seed = NULL, subsamplingRate = 1.0,
                    minInstancesPerNode = 1, minInfoGain = 0.0, checkpointInterval = 10,
                    maxMemoryInMB = 256, cacheNodeIds = FALSE,
-                   handleInvalid = c("error", "keep", "skip")) {
+                   handleInvalid = c("error", "keep", "skip"),
+                   bootstrap = TRUE) {
             type <- match.arg(type)
             formula <- paste(deparse(formula), collapse = "")
             if (!is.null(seed)) {
@@ -445,7 +447,8 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
                                          as.numeric(minInfoGain), as.integer(checkpointInterval),
                                          as.character(featureSubsetStrategy), seed,
                                          as.numeric(subsamplingRate),
-                                         as.integer(maxMemoryInMB), as.logical(cacheNodeIds))
+                                         as.integer(maxMemoryInMB), as.logical(cacheNodeIds),
+                                         as.logical(bootstrap))
                      new("RandomForestRegressionModel", jobj = jobj)
                    },
                    classification = {
@@ -460,7 +463,7 @@ setMethod("spark.randomForest", signature(data = "SparkDataFrame", formula = "fo
                                          as.character(featureSubsetStrategy), seed,
                                          as.numeric(subsamplingRate),
                                          as.integer(maxMemoryInMB), as.logical(cacheNodeIds),
-                                         handleInvalid)
+                                         handleInvalid, as.logical(bootstrap))
                      new("RandomForestClassificationModel", jobj = jobj)
                    }
             )
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 0d6f32c8f7e1f..cb3c1c59d12ed 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -222,15 +222,11 @@ writeArgs <- function(con, args) {
 }
 
 writeSerializeInArrow <- function(conn, df) {
-  # This is a hack to avoid CRAN check. Arrow is not uploaded into CRAN now. See ARROW-3204.
-  requireNamespace1 <- requireNamespace
-  if (requireNamespace1("arrow", quietly = TRUE)) {
-    write_arrow <- get("write_arrow", envir = asNamespace("arrow"), inherits = FALSE)
-
+  if (requireNamespace("arrow", quietly = TRUE)) {
     # There looks no way to send each batch in streaming format via socket
     # connection. See ARROW-4512.
     # So, it writes the whole Arrow streaming-formatted binary at once for now.
-    writeRaw(conn, write_arrow(df, raw()))
+    writeRaw(conn, arrow::write_arrow(df, raw()))
   } else {
     stop("'arrow' package should be installed.")
   }
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 31b986c326d0c..cdb59093781fb 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -266,11 +266,12 @@ sparkR.sparkContext <- function(
 #' df <- read.json(path)
 #'
 #' sparkR.session("local[2]", "SparkR", "/home/spark")
-#' sparkR.session("yarn-client", "SparkR", "/home/spark",
-#'                list(spark.executor.memory="4g"),
+#' sparkR.session("yarn", "SparkR", "/home/spark",
+#'                list(spark.executor.memory="4g", spark.submit.deployMode="client"),
 #'                c("one.jar", "two.jar", "three.jar"),
 #'                c("com.databricks:spark-avro_2.12:2.0.1"))
-#' sparkR.session(spark.master = "yarn-client", spark.executor.memory = "4g")
+#' sparkR.session(spark.master = "yarn", spark.submit.deployMode = "client",
+#                 spark.executor.memory = "4g")
 #'}
 #' @note sparkR.session since 2.0.0
 sparkR.session <- function(
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index c3501977e64bc..a8c1ddb3dd20b 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -131,7 +131,7 @@ hashCode <- function(key) {
     } else {
       asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) })
       hashC <- 0
-      for (k in 1:length(asciiVals)) {
+      for (k in seq_len(length(asciiVals))) {
         hashC <- mult31AndAdd(hashC, asciiVals[k])
       }
       as.integer(hashC)
@@ -543,10 +543,14 @@ processClosure <- function(node, oldEnv, defVars, checkedFuncs, newEnv) {
               funcList <- mget(nodeChar, envir = checkedFuncs, inherits = F,
                                ifnotfound = list(list(NULL)))[[1]]
               found <- sapply(funcList, function(func) {
-                ifelse(identical(func, obj), TRUE, FALSE)
+                ifelse(
+                  identical(func, obj) &&
+                    # Also check if the parent environment is identical to current parent
+                    identical(parent.env(environment(func)), func.env),
+                  TRUE, FALSE)
               })
               if (sum(found) > 0) {
-                # If function has been examined, ignore.
+                # If function has been examined ignore
                 break
               }
               # Function has not been examined, record it and recursively clean its closure.
@@ -724,7 +728,7 @@ assignNewEnv <- function(data) {
   stopifnot(length(cols) > 0)
 
   env <- new.env()
-  for (i in 1:length(cols)) {
+  for (i in seq_len(length(cols))) {
     assign(x = cols[i], value = data[, cols[i], drop = F], envir = env)
   }
   env
@@ -750,7 +754,7 @@ launchScript <- function(script, combinedArgs, wait = FALSE, stdout = "", stderr
   if (.Platform$OS.type == "windows") {
     scriptWithArgs <- paste(script, combinedArgs, sep = " ")
     # on Windows, intern = F seems to mean output to the console. (documentation on this is missing)
-    shell(scriptWithArgs, translate = TRUE, wait = wait, intern = wait) # nolint
+    shell(scriptWithArgs, translate = TRUE, wait = wait, intern = wait)
   } else {
     # http://stat.ethz.ch/R-manual/R-devel/library/base/html/system2.html
     # stdout = F means discard output
diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R
index 80dc4ee634512..1ef05ea621e83 100644
--- a/R/pkg/inst/worker/worker.R
+++ b/R/pkg/inst/worker/worker.R
@@ -50,7 +50,7 @@ compute <- function(mode, partition, serializer, deserializer, key,
     } else {
       # Check to see if inputData is a valid data.frame
       stopifnot(deserializer == "byte" || deserializer == "arrow")
-      stopifnot(class(inputData) == "data.frame")
+      stopifnot(is.data.frame(inputData))
     }
 
     if (mode == 2) {
@@ -194,7 +194,7 @@ if (isEmpty != 0) {
        } else {
         # gapply mode
         outputs <- list()
-        for (i in 1:length(data)) {
+        for (i in seq_len(length(data))) {
           # Timing reading input data for execution
           inputElap <- elapsedSecs()
           output <- compute(mode, partition, serializer, deserializer, keys[[i]],
diff --git a/R/pkg/tests/fulltests/data/test_utils_utf.json b/R/pkg/tests/fulltests/data/test_utils_utf.json
new file mode 100644
index 0000000000000..b78352ee52ef1
--- /dev/null
+++ b/R/pkg/tests/fulltests/data/test_utils_utf.json
@@ -0,0 +1,4 @@
+{"name": "안녕하세요"}
+{"name": "您好", "age": 30}
+{"name": "こんにちは", "age": 19}
+{"name": "Xin chào"}
diff --git a/R/pkg/tests/fulltests/test_context.R b/R/pkg/tests/fulltests/test_context.R
index eb8d2a700e1ea..6be04b321e985 100644
--- a/R/pkg/tests/fulltests/test_context.R
+++ b/R/pkg/tests/fulltests/test_context.R
@@ -25,7 +25,8 @@ test_that("Check masked functions", {
   namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var",
                      "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset",
                      "summary", "transform", "drop", "window", "as.data.frame", "union", "not")
-  if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
+  version <- packageVersion("base")
+  if (as.numeric(version$major) >= 3 && as.numeric(version$minor) >= 3) {
     namesOfMasked <- c("endsWith", "startsWith", namesOfMasked)
   }
   masked <- conflicts(detail = TRUE)$`package:SparkR`
@@ -84,6 +85,7 @@ test_that("rdd GC across sparkR.stop", {
   countRDD(rdd3)
   countRDD(rdd4)
   sparkR.session.stop()
+  expect_true(TRUE)
 })
 
 test_that("job group functions can be called", {
@@ -93,6 +95,7 @@ test_that("job group functions can be called", {
   clearJobGroup()
 
   sparkR.session.stop()
+  expect_true(TRUE)
 })
 
 test_that("job description and local properties can be set and got", {
@@ -131,6 +134,7 @@ test_that("utility function can be called", {
   sparkR.sparkContext(master = sparkRTestMaster)
   setLogLevel("ERROR")
   sparkR.session.stop()
+  expect_true(TRUE)
 })
 
 test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
@@ -234,4 +238,5 @@ test_that("SPARK-25234: parallelize should not have integer overflow", {
   # 47000 * 47000 exceeds integer range
   parallelize(sc, 1:47000, 47000)
   sparkR.session.stop()
+  expect_true(TRUE)
 })
diff --git a/R/pkg/tests/fulltests/test_includePackage.R b/R/pkg/tests/fulltests/test_includePackage.R
index f4ea0d1b5cb27..1d16b260c4c52 100644
--- a/R/pkg/tests/fulltests/test_includePackage.R
+++ b/R/pkg/tests/fulltests/test_includePackage.R
@@ -27,8 +27,8 @@ rdd <- parallelize(sc, nums, 2L)
 
 test_that("include inside function", {
   # Only run the test if plyr is installed.
-  if ("plyr" %in% rownames(installed.packages())) {
-    suppressPackageStartupMessages(library(plyr))
+  if ("plyr" %in% rownames(installed.packages()) &&
+      suppressPackageStartupMessages(suppressWarnings(library(plyr, logical.return = TRUE)))) {
     generateData <- function(x) {
       suppressPackageStartupMessages(library(plyr))
       attach(airquality)
@@ -39,12 +39,13 @@ test_that("include inside function", {
     data <- lapplyPartition(rdd, generateData)
     actual <- collectRDD(data)
   }
+  expect_true(TRUE)
 })
 
 test_that("use include package", {
   # Only run the test if plyr is installed.
-  if ("plyr" %in% rownames(installed.packages())) {
-    suppressPackageStartupMessages(library(plyr))
+  if ("plyr" %in% rownames(installed.packages()) &&
+      suppressPackageStartupMessages(suppressWarnings(library(plyr, logical.return = TRUE)))) {
     generateData <- function(x) {
       attach(airquality)
       result <- transform(Ozone, logOzone = log(Ozone))
@@ -55,6 +56,7 @@ test_that("use include package", {
     data <- lapplyPartition(rdd, generateData)
     actual <- collectRDD(data)
   }
+  expect_true(TRUE)
 })
 
 sparkR.session.stop()
diff --git a/R/pkg/tests/fulltests/test_mllib_recommendation.R b/R/pkg/tests/fulltests/test_mllib_recommendation.R
index d50de4123aeb0..73f6cfd67cee9 100644
--- a/R/pkg/tests/fulltests/test_mllib_recommendation.R
+++ b/R/pkg/tests/fulltests/test_mllib_recommendation.R
@@ -31,7 +31,8 @@ test_that("spark.als", {
   stats <- summary(model)
   expect_equal(stats$rank, 10)
   test <- createDataFrame(list(list(0, 2), list(1, 0), list(2, 0)), c("user", "item"))
-  predictions <- collect(predict(model, test))
+  result <- predict(model, test)
+  predictions <- collect(arrange(result, desc(result$item), result$user))
 
   expect_equal(predictions$prediction, c(0.6324540, 3.6218479, -0.4568263),
   tolerance = 1e-4)
diff --git a/R/pkg/tests/fulltests/test_mllib_tree.R b/R/pkg/tests/fulltests/test_mllib_tree.R
index ad68700c7ff4e..ee5043a744bba 100644
--- a/R/pkg/tests/fulltests/test_mllib_tree.R
+++ b/R/pkg/tests/fulltests/test_mllib_tree.R
@@ -130,7 +130,7 @@ test_that("spark.randomForest", {
   # regression
   data <- suppressWarnings(createDataFrame(longley))
   model <- spark.randomForest(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16,
-                              numTrees = 1, seed = 1)
+                              numTrees = 1, seed = 1, bootstrap = FALSE)
 
   predictions <- collect(predict(model, data))
   expect_equal(predictions$prediction, c(60.323, 61.122, 60.171, 61.187,
diff --git a/R/pkg/tests/fulltests/test_sparkR.R b/R/pkg/tests/fulltests/test_sparkR.R
index f73fc6baeccef..4232f5ec430f6 100644
--- a/R/pkg/tests/fulltests/test_sparkR.R
+++ b/R/pkg/tests/fulltests/test_sparkR.R
@@ -36,8 +36,8 @@ test_that("sparkCheckInstall", {
 
   # "yarn-client, mesos-client" mode, SPARK_HOME was not set
   sparkHome <- ""
-  master <- "yarn-client"
-  deployMode <- ""
+  master <- "yarn"
+  deployMode <- "client"
   expect_error(sparkCheckInstall(sparkHome, master, deployMode))
   sparkHome <- ""
   master <- ""
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 035525a7a849b..c1d277ac84be1 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -172,7 +172,7 @@ test_that("structField type strings", {
   typeList <- c(primitiveTypes, complexTypes)
   typeStrings <- names(typeList)
 
-  for (i in seq_along(typeStrings)){
+  for (i in seq_along(typeStrings)) {
     typeString <- typeStrings[i]
     expected <- typeList[[i]]
     testField <- structField("_col", typeString)
@@ -203,7 +203,7 @@ test_that("structField type strings", {
   errorList <- c(primitiveErrors, complexErrors)
   typeStrings <- names(errorList)
 
-  for (i in seq_along(typeStrings)){
+  for (i in seq_along(typeStrings)) {
     typeString <- typeStrings[i]
     expected <- paste0("Unsupported type for SparkDataframe: ", errorList[[i]])
     expect_error(structField("_col", typeString), expected)
@@ -848,24 +848,31 @@ test_that("collect() and take() on a DataFrame return the same number of rows an
 })
 
 test_that("collect() support Unicode characters", {
-  lines <- c("{\"name\":\"안녕하세요\"}",
-             "{\"name\":\"您好\", \"age\":30}",
-             "{\"name\":\"こんにちは\", \"age\":19}",
-             "{\"name\":\"Xin chào\"}")
+  jsonPath <- file.path(
+    Sys.getenv("SPARK_HOME"),
+    "R", "pkg", "tests", "fulltests", "data",
+    "test_utils_utf.json"
+  )
+
+  lines <- readLines(jsonPath, encoding = "UTF-8")
 
-  jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
-  writeLines(lines, jsonPath)
+  expected <- regmatches(lines, gregexpr('(?<="name": ").*?(?=")', lines, perl = TRUE))
 
   df <- read.df(jsonPath, "json")
   rdf <- collect(df)
   expect_true(is.data.frame(rdf))
-  expect_equal(rdf$name[1], markUtf8("안녕하세요"))
-  expect_equal(rdf$name[2], markUtf8("您好"))
-  expect_equal(rdf$name[3], markUtf8("こんにちは"))
-  expect_equal(rdf$name[4], markUtf8("Xin chào"))
+  expect_equal(rdf$name[1], expected[[1]])
+  expect_equal(rdf$name[2], expected[[2]])
+  expect_equal(rdf$name[3], expected[[3]])
+  expect_equal(rdf$name[4], expected[[4]])
 
   df1 <- createDataFrame(rdf)
-  expect_equal(collect(where(df1, df1$name == markUtf8("您好")))$name, markUtf8("您好"))
+  expect_equal(
+    collect(
+      where(df1, df1$name == expected[[2]])
+    )$name,
+    expected[[2]]
+  )
 })
 
 test_that("multiple pipeline transformations result in an RDD with the correct values", {
@@ -1375,6 +1382,7 @@ test_that("column operators", {
   c5 <- c2 ^ c3 ^ c4
   c6 <- c2 %<=>% c3
   c7 <- !c6
+  expect_true(TRUE)
 })
 
 test_that("column functions", {
@@ -1405,6 +1413,8 @@ test_that("column functions", {
     trunc(c, "month") + trunc(c, "mon") + trunc(c, "mm")
   c24 <- date_trunc("hour", c) + date_trunc("minute", c) + date_trunc("week", c) +
     date_trunc("quarter", c) + current_date() + current_timestamp()
+  c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) +
+    overlay(c1, c2, 3, 4)
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
@@ -1800,7 +1810,8 @@ test_that("string operators", {
   expect_true(first(select(df, endsWith(df$name, "el")))[[1]])
   expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi")
   expect_equal(first(select(df, substr(df$name, 4, 6)))[[1]], "hae")
-  if (as.numeric(R.version$major) >= 3 && as.numeric(R.version$minor) >= 3) {
+  version <- packageVersion("base")
+  if (as.numeric(version$major) >= 3 && as.numeric(version$minor) >= 3) {
     expect_true(startsWith("Hello World", "Hello"))
     expect_false(endsWith("Hello World", "a"))
   }
@@ -1905,20 +1916,10 @@ test_that("date functions on a DataFrame", {
   df2 <- createDataFrame(l2)
   expect_equal(collect(select(df2, minute(df2$b)))[, 1], c(34, 24))
   expect_equal(collect(select(df2, second(df2$b)))[, 1], c(0, 34))
-  conf <- callJMethod(sparkSession, "conf")
-  isUtcTimestampFuncEnabled <- callJMethod(conf, "get", "spark.sql.legacy.utcTimestampFunc.enabled")
-  callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", "true")
-  tryCatch({
-    # Both from_utc_timestamp and to_utc_timestamp are deprecated as of SPARK-25496
-    expect_equal(suppressWarnings(collect(select(df2, from_utc_timestamp(df2$b, "JST"))))[, 1],
-                 c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
-    expect_equal(suppressWarnings(collect(select(df2, to_utc_timestamp(df2$b, "JST"))))[, 1],
-                 c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
-  },
-  finally = {
-    # Reverting the conf back
-    callJMethod(conf, "set", "spark.sql.legacy.utcTimestampFunc.enabled", isUtcTimestampFuncEnabled)
-  })
+  expect_equal(collect(select(df2, from_utc_timestamp(df2$b, "JST")))[, 1],
+               c(as.POSIXct("2012-12-13 21:34:00 UTC"), as.POSIXct("2014-12-15 10:24:34 UTC")))
+  expect_equal(collect(select(df2, to_utc_timestamp(df2$b, "JST")))[, 1],
+               c(as.POSIXct("2012-12-13 03:34:00 UTC"), as.POSIXct("2014-12-14 16:24:34 UTC")))
   expect_gt(collect(select(df2, unix_timestamp()))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(df2$b)))[1, 1], 0)
   expect_gt(collect(select(df2, unix_timestamp(lit("2015-01-01"), "yyyy-MM-dd")))[1, 1], 0)
@@ -3238,6 +3239,13 @@ test_that("Histogram", {
   expect_equal(histogram(df, "x")$counts, c(4, 0, 0, 0, 0, 0, 0, 0, 0, 1))
 })
 
+test_that("dapply() should show error message from R worker", {
+  df <- createDataFrame(list(list(n = 1)))
+  expect_error({
+    collect(dapply(df, function(x) stop("custom error message"), structType("a double")))
+  }, "custom error message")
+})
+
 test_that("dapply() and dapplyCollect() on a DataFrame", {
   df <- createDataFrame(
           list(list(1L, 1, "1"), list(2L, 2, "2"), list(3L, 3, "3")),
diff --git a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
index 825c7423e1579..97972753a78fa 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
@@ -101,7 +101,7 @@ test_that("dapply() Arrow optimization", {
   tryCatch({
     ret <- dapply(df,
     function(rdf) {
-      stopifnot(class(rdf) == "data.frame")
+      stopifnot(is.data.frame(rdf))
       rdf
     },
     schema(df))
@@ -115,7 +115,7 @@ test_that("dapply() Arrow optimization", {
   tryCatch({
     ret <- dapply(df,
                   function(rdf) {
-                    stopifnot(class(rdf) == "data.frame")
+                    stopifnot(is.data.frame(rdf))
                     # mtcars' hp is more then 50.
                     stopifnot(all(rdf$hp > 50))
                     rdf
@@ -199,7 +199,7 @@ test_that("gapply() Arrow optimization", {
                    if (length(key) > 0) {
                      stopifnot(is.numeric(key[[1]]))
                    }
-                   stopifnot(class(grouped) == "data.frame")
+                   stopifnot(is.data.frame(grouped))
                    grouped
                  },
                  schema(df))
@@ -217,7 +217,7 @@ test_that("gapply() Arrow optimization", {
                    if (length(key) > 0) {
                      stopifnot(is.numeric(key[[1]]))
                    }
-                   stopifnot(class(grouped) == "data.frame")
+                   stopifnot(is.data.frame(grouped))
                    stopifnot(length(colnames(grouped)) == 11)
                    # mtcars' hp is more then 50.
                    stopifnot(all(grouped$hp > 50))
diff --git a/R/pkg/tests/fulltests/test_textFile.R b/R/pkg/tests/fulltests/test_textFile.R
index be2d2711ff88e..046018c7c2a2d 100644
--- a/R/pkg/tests/fulltests/test_textFile.R
+++ b/R/pkg/tests/fulltests/test_textFile.R
@@ -75,6 +75,7 @@ test_that("several transformations on RDD created by textFile()", {
   collectRDD(rdd)
 
   unlink(fileName)
+  expect_true(TRUE)
 })
 
 test_that("textFile() followed by a saveAsTextFile() returns the same content", {
diff --git a/R/pkg/tests/fulltests/test_utils.R b/R/pkg/tests/fulltests/test_utils.R
index b2b6f34aaa085..c3fb9046fcda4 100644
--- a/R/pkg/tests/fulltests/test_utils.R
+++ b/R/pkg/tests/fulltests/test_utils.R
@@ -89,7 +89,10 @@ test_that("cleanClosure on R functions", {
     lapply(x, g) + 1  # Test for capturing function call "g"'s closure as a argument of lapply.
     l$field[1, 1] <- 3  # Test for access operators `$`.
     res <- defUse + l$field[1, ]  # Test for def-use chain of "defUse", and "" symbol.
-    f(res)  # Test for recursive calls.
+    # Enable once SPARK-30629 is fixed
+    # nolint start
+    # f(res)  # Test for recursive calls.
+    # nolint end
   }
   newF <- cleanClosure(f)
   env <- environment(newF)
@@ -101,7 +104,10 @@ test_that("cleanClosure on R functions", {
   # nolint end
   expect_true("g" %in% ls(env))
   expect_true("l" %in% ls(env))
-  expect_true("f" %in% ls(env))
+  # Enable once SPARK-30629 is fixed
+  # nolint start
+  # expect_true("f" %in% ls(env))
+  # nolint end
   expect_equal(get("l", envir = env, inherits = FALSE), l)
   # "y" should be in the environment of g.
   newG <- get("g", envir = env, inherits = FALSE)
@@ -110,6 +116,15 @@ test_that("cleanClosure on R functions", {
   actual <- get("y", envir = env, inherits = FALSE)
   expect_equal(actual, y)
 
+  # Test for combination for nested and sequenctial functions in a closure
+  f1 <- function(x) x + 1
+  f2 <- function(x) f1(x) + 2
+  userFunc <- function(x) { f1(x); f2(x) }
+  cUserFuncEnv <- environment(cleanClosure(userFunc))
+  expect_equal(length(cUserFuncEnv), 2)
+  innerCUserFuncEnv <- environment(cUserFuncEnv$f2)
+  expect_equal(length(innerCUserFuncEnv), 1)
+
   # Test for function (and variable) definitions.
   f <- function(x) {
     g <- function(y) { y * 2 }
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 1e96418558883..bf02ecdad66ff 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -20,7 +20,6 @@ library(SparkR)
 
 # SPARK-25572
 if (identical(Sys.getenv("NOT_CRAN"), "true")) {
-
   # Turn all warnings into errors
   options("warn" = 2)
 
@@ -60,11 +59,23 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
   if (identical(Sys.getenv("NOT_CRAN"), "true")) {
     # set random seed for predictable results. mostly for base's sample() in tree and classification
     set.seed(42)
-    # for testthat 1.0.2 later, change reporter from "summary" to default_reporter()
-    testthat:::run_tests("SparkR",
-                         file.path(sparkRDir, "pkg", "tests", "fulltests"),
-                         NULL,
-                         "summary")
+
+    # TODO (SPARK-30663) To be removed once testthat 1.x is removed from all builds
+    if (grepl("^1\\..*", packageVersion("testthat"))) {
+      # testthat 1.x
+      test_runner <- testthat:::run_tests
+      reporter <- "summary"
+
+    } else {
+      # testthat >= 2.0.0
+      test_runner <- testthat:::test_package_dir
+      reporter <- testthat::default_reporter()
+    }
+
+    test_runner("SparkR",
+                file.path(sparkRDir, "pkg", "tests", "fulltests"),
+                NULL,
+                reporter)
   }
 
   SparkR:::uninstallDownloadedSpark()
diff --git a/R/run-tests.sh b/R/run-tests.sh
index 86bd8aad5f113..51ca7d600caf0 100755
--- a/R/run-tests.sh
+++ b/R/run-tests.sh
@@ -23,7 +23,7 @@ FAILED=0
 LOGFILE=$FWDIR/unit-tests.out
 rm -f $LOGFILE
 
-SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 FAILED=$((PIPESTATUS[0]||$FAILED))
 
 NUM_TEST_WARNING="$(grep -c -e 'Warnings ----------------' $LOGFILE)"
diff --git a/README.md b/README.md
index 29777a5962bc2..d7931263b0fc7 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ and Structured Streaming for stream processing.
 
 <https://spark.apache.org/>
 
-[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7)
+[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7-hive-2.3/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7-hive-2.3)
 [![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
 [![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site)
 
@@ -29,7 +29,6 @@ To build Spark and its example programs, run:
 
 (You do not need to do this if you downloaded a pre-built package.)
 
-You can build Spark using more than one thread by using the -T option with Maven, see ["Parallel builds in Maven 3"](https://cwiki.apache.org/confluence/display/MAVEN/Parallel+builds+in+Maven+3).
 More detailed documentation is available from the project site, at
 ["Building Spark"](https://spark.apache.org/docs/latest/building-spark.html).
 
diff --git a/appveyor.yml b/appveyor.yml
index a61436c5d2e68..5d98260265b1a 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -42,13 +42,9 @@ install:
   # Install maven and dependencies
   - ps: .\dev\appveyor-install-dependencies.ps1
   # Required package for R unit tests
-  - cmd: R -e "install.packages(c('knitr', 'rmarkdown', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
-  # Here, we use the fixed version of testthat. For more details, please see SPARK-22817.
-  # As of devtools 2.1.0, it requires testthat higher then 2.1.1 as a dependency. SparkR test requires testthat 1.0.2.
-  # Therefore, we don't use devtools but installs it directly from the archive including its dependencies.
-  - cmd: R -e "install.packages(c('crayon', 'praise', 'R6'), repos='https://cloud.r-project.org/')"
-  - cmd: R -e "install.packages('https://cloud.r-project.org/src/contrib/Archive/testthat/testthat_1.0.2.tar.gz', repos=NULL, type='source')"
-  - cmd: R -e "packageVersion('knitr'); packageVersion('rmarkdown'); packageVersion('testthat'); packageVersion('e1071'); packageVersion('survival')"
+  - cmd: R -e "install.packages(c('knitr', 'rmarkdown', 'e1071', 'survival', 'arrow'), repos='https://cloud.r-project.org/')"
+  - cmd: R -e "install.packages(c('crayon', 'praise', 'R6', 'testthat'), repos='https://cloud.r-project.org/')"
+  - cmd: R -e "packageVersion('knitr'); packageVersion('rmarkdown'); packageVersion('testthat'); packageVersion('e1071'); packageVersion('survival'); packageVersion('arrow')"
 
 build_script:
   # '-Djna.nosys=true' is required to avoid kernel32.dll load failure.
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ef916fb99a04c..193ad3d671bcf 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -64,11 +64,6 @@
       <artifactId>spark-graphx_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-graph_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_${scala.binary.version}</artifactId>
@@ -122,7 +117,7 @@
           </executions>
           <configuration>
             <target>
-              <delete dir="${basedir}/../python/lib/pyspark.zip"/>
+              <delete file="${basedir}/../python/lib/pyspark.zip"/>
               <zip destfile="${basedir}/../python/lib/pyspark.zip">
                 <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
               </zip>
diff --git a/build/mvn b/build/mvn
index f68377b3ddc71..3628be9880253 100755
--- a/build/mvn
+++ b/build/mvn
@@ -22,7 +22,7 @@ _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 # Preserve the calling directory
 _CALLING_DIR="$(pwd)"
 # Options used during compilation
-_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
+_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
 
 # Installs any application tarball given a URL, the expected tarball name,
 # and, optionally, a checkable binary path to determine if the binary has
diff --git a/build/sbt b/build/sbt
index 7d8d0993e57d8..475dfd3b20b43 100755
--- a/build/sbt
+++ b/build/sbt
@@ -66,7 +66,7 @@ Usage: $script_name [options]
   -sbt-dir   <path>  path to global settings/plugins directory (default: ~/.sbt)
   -sbt-boot  <path>  path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
   -ivy       <path>  path to local Ivy repository (default: ~/.ivy2)
-  -mem    <integer>  set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
+  -mem    <integer>  set memory options (default: $sbt_default_mem, which is $(get_mem_opts $sbt_default_mem))
   -no-share          use all local caches; no sharing
   -no-global         uses global caches, but does not use global ~/.sbt directory.
   -jvm-debug <port>  Turn on JVM debugging, open at the given port.
diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash
index 0ed6f8b6d737b..162bfbf2257c7 100755
--- a/build/sbt-launch-lib.bash
+++ b/build/sbt-launch-lib.bash
@@ -17,6 +17,7 @@ declare -a java_args
 declare -a scalac_args
 declare -a sbt_commands
 declare -a maven_profiles
+declare sbt_default_mem=2048
 
 if test -x "$JAVA_HOME/bin/java"; then
     echo -e "Using $JAVA_HOME as default JAVA_HOME."
@@ -111,11 +112,10 @@ addDebugger () {
 # a ham-fisted attempt to move some memory settings in concert
 # so they need not be dicked around with individually.
 get_mem_opts () {
-  local mem=${1:-2048}
-  local perm=$(( $mem / 4 ))
-  (( $perm > 256 )) || perm=256
-  (( $perm < 4096 )) || perm=4096
-  local codecache=$(( $perm / 2 ))
+  local mem=${1:-$sbt_default_mem}
+  local codecache=$(( $mem / 8 ))
+  (( $codecache > 128 )) || codecache=128
+  (( $codecache < 2048 )) || codecache=2048
 
   echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m"
 }
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index f042a12fda3d2..a1c8a8e6582eb 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -45,7 +45,7 @@
       <artifactId>guava</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.fusesource.leveldbjni</groupId>
+      <groupId>${leveldbjni.group}</groupId>
       <artifactId>leveldbjni-all</artifactId>
     </dependency>
     <dependency>
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
index 6af45aec3c7b2..b33c53871c32f 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
@@ -252,7 +252,7 @@ private static <T> Predicate<? super T> getPredicate(
 
         return (value) -> set.contains(indexValueForEntity(getter, value));
       } else {
-        HashSet<Comparable> set = new HashSet<>(values.size());
+        HashSet<Comparable<?>> set = new HashSet<>(values.size());
         for (Object key : values) {
           set.add(asKey(key));
         }
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
index b8c5fab8709ed..d2a26982d8703 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
@@ -124,7 +124,7 @@ interface Accessor {
 
     Object get(Object instance) throws ReflectiveOperationException;
 
-    Class getType();
+    Class<?> getType();
   }
 
   private class FieldAccessor implements Accessor {
@@ -141,7 +141,7 @@ public Object get(Object instance) throws ReflectiveOperationException {
     }
 
     @Override
-    public Class getType() {
+    public Class<?> getType() {
       return field.getType();
     }
   }
@@ -160,7 +160,7 @@ public Object get(Object instance) throws ReflectiveOperationException {
     }
 
     @Override
-    public Class getType() {
+    public Class<?> getType() {
       return method.getReturnType();
     }
   }
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/ArrayKeyIndexType.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/ArrayKeyIndexType.java
index 32030fb4115c3..dd53fdf0b1b4c 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/ArrayKeyIndexType.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/ArrayKeyIndexType.java
@@ -38,7 +38,7 @@ public boolean equals(Object o) {
 
   @Override
   public int hashCode() {
-    return key.hashCode();
+    return Arrays.hashCode(key) ^ Arrays.hashCode(id);
   }
 
 }
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/CustomType1.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/CustomType1.java
index 92b643b0cb928..ebb5c2c5ed55c 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/CustomType1.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/CustomType1.java
@@ -17,8 +17,6 @@
 
 package org.apache.spark.util.kvstore;
 
-import com.google.common.base.Objects;
-
 public class CustomType1 {
 
   @KVIndex
@@ -52,12 +50,7 @@ public int hashCode() {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("key", key)
-      .add("id", id)
-      .add("name", name)
-      .add("num", num)
-      .toString();
+    return "CustomType1[key=" + key + ",id=" + id + ",name=" + name + ",num=" + num;
   }
 
 }
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index c107af9ceb415..163c250054e4d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -35,6 +35,12 @@
   </properties>
 
   <dependencies>
+    <!-- SPARK-28932 This is required in JDK11 -->
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+    </dependency>
+
     <!-- Core dependencies -->
     <dependency>
       <groupId>io.netty</groupId>
@@ -46,7 +52,7 @@
     </dependency>
 
     <dependency>
-      <groupId>org.fusesource.leveldbjni</groupId>
+      <groupId>${leveldbjni.group}</groupId>
       <artifactId>leveldbjni-all</artifactId>
       <version>1.8</version>
     </dependency>
@@ -87,13 +93,6 @@
     </dependency>
 
     <!-- Test dependencies -->
-    <!-- SPARK-28932 This is required in JDK11 -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>log4j</groupId>
       <artifactId>log4j</artifactId>
diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
index 45fee541a4f5d..66566b67870f3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -26,9 +26,10 @@
 import java.nio.channels.FileChannel;
 import java.nio.file.StandardOpenOption;
 
-import com.google.common.base.Objects;
 import com.google.common.io.ByteStreams;
 import io.netty.channel.DefaultFileRegion;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.util.JavaUtils;
 import org.apache.spark.network.util.LimitedInputStream;
@@ -144,10 +145,10 @@ public Object convertToNetty() throws IOException {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("file", file)
-      .add("offset", offset)
-      .add("length", length)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("file", file)
+      .append("offset", offset)
+      .append("length", length)
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
index acc49d968c186..b42977c7cb7f6 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
@@ -21,9 +21,10 @@
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 
-import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.ByteBufInputStream;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 /**
  * A {@link ManagedBuffer} backed by a Netty {@link ByteBuf}.
@@ -69,8 +70,8 @@ public Object convertToNetty() throws IOException {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("buf", buf)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("buf", buf)
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
index 631d767715256..084f89d2611cf 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -21,9 +21,10 @@
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 
-import com.google.common.base.Objects;
 import io.netty.buffer.ByteBufInputStream;
 import io.netty.buffer.Unpooled;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 /**
  * A {@link ManagedBuffer} backed by {@link ByteBuffer}.
@@ -67,8 +68,8 @@ public Object convertToNetty() throws IOException {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("buf", buf)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("buf", buf)
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
index b018197deaf2e..6dcc703e92669 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -27,13 +27,14 @@
 import javax.annotation.Nullable;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import com.google.common.util.concurrent.SettableFuture;
 import io.netty.channel.Channel;
 import io.netty.util.concurrent.Future;
 import io.netty.util.concurrent.GenericFutureListener;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -301,10 +302,10 @@ public void close() {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("remoteAdress", channel.remoteAddress())
-      .add("clientId", clientId)
-      .add("isActive", isActive())
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("remoteAdress", channel.remoteAddress())
+      .append("clientId", clientId)
+      .append("isActive", isActive())
       .toString();
   }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index 53835d8304866..c9ef9f918ffd1 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -293,9 +293,8 @@ public void close() {
     }
     connectionPool.clear();
 
-    if (workerGroup != null) {
+    if (workerGroup != null && !workerGroup.isShuttingDown()) {
       workerGroup.shutdownGracefully();
-      workerGroup = null;
     }
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
index 77b167d15e911..4428f0f295d6e 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
@@ -78,6 +78,7 @@ public void doBootstrap(TransportClient client, Channel channel) {
 
     try {
       doSparkAuth(client, channel);
+      client.setClientId(appId);
     } catch (GeneralSecurityException | IOException e) {
       throw Throwables.propagate(e);
     } catch (RuntimeException e) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
index fb44dbbb0953b..821cc7a849504 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
@@ -125,6 +125,7 @@ public void receive(TransportClient client, ByteBuffer message, RpcResponseCallb
       response.encode(responseData);
       callback.onSuccess(responseData.nioBuffer());
       engine.sessionCipher().addToChannel(channel);
+      client.setClientId(challenge.appId);
     } catch (Exception e) {
       // This is a fatal error: authentication has failed. Close the channel explicitly.
       LOG.debug("Authentication failed for client {}, closing channel.", channel.remoteAddress());
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
index 8995bbc940f63..36ca73f6ac0f0 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
@@ -90,7 +90,8 @@ CryptoOutputStream createOutputStream(WritableByteChannel ch) throws IOException
     return new CryptoOutputStream(cipher, conf, ch, key, new IvParameterSpec(outIv));
   }
 
-  private CryptoInputStream createInputStream(ReadableByteChannel ch) throws IOException {
+  @VisibleForTesting
+  CryptoInputStream createInputStream(ReadableByteChannel ch) throws IOException {
     return new CryptoInputStream(cipher, conf, ch, key, new IvParameterSpec(inIv));
   }
 
@@ -166,34 +167,45 @@ private static class DecryptionHandler extends ChannelInboundHandlerAdapter {
 
     @Override
     public void channelRead(ChannelHandlerContext ctx, Object data) throws Exception {
-      if (!isCipherValid) {
-        throw new IOException("Cipher is in invalid state.");
-      }
-      byteChannel.feedData((ByteBuf) data);
-
-      byte[] decryptedData = new byte[byteChannel.readableBytes()];
-      int offset = 0;
-      while (offset < decryptedData.length) {
-        // SPARK-25535: workaround for CRYPTO-141.
-        try {
-          offset += cis.read(decryptedData, offset, decryptedData.length - offset);
-        } catch (InternalError ie) {
-          isCipherValid = false;
-          throw ie;
+      ByteBuf buffer = (ByteBuf) data;
+
+      try {
+        if (!isCipherValid) {
+          throw new IOException("Cipher is in invalid state.");
+        }
+        byte[] decryptedData = new byte[buffer.readableBytes()];
+        byteChannel.feedData(buffer);
+
+        int offset = 0;
+        while (offset < decryptedData.length) {
+          // SPARK-25535: workaround for CRYPTO-141.
+          try {
+            offset += cis.read(decryptedData, offset, decryptedData.length - offset);
+          } catch (InternalError ie) {
+            isCipherValid = false;
+            throw ie;
+          }
         }
-      }
 
-      ctx.fireChannelRead(Unpooled.wrappedBuffer(decryptedData, 0, decryptedData.length));
+        ctx.fireChannelRead(Unpooled.wrappedBuffer(decryptedData, 0, decryptedData.length));
+      } finally {
+        buffer.release();
+      }
     }
 
     @Override
-    public void channelInactive(ChannelHandlerContext ctx) throws Exception {
+    public void handlerRemoved(ChannelHandlerContext ctx) throws Exception {
+      // We do the closing of the stream / channel in handlerRemoved(...) as
+      // this method will be called in all cases:
+      //
+      //     - when the Channel becomes inactive
+      //     - when the handler is removed from the ChannelPipeline
       try {
         if (isCipherValid) {
           cis.close();
         }
       } finally {
-        super.channelInactive(ctx);
+        super.handlerRemoved(ctx);
       }
     }
   }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
index a7afbfa8621c8..0f1781cbf1f2c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 /**
  * Response to {@link ChunkFetchRequest} when there is an error fetching the chunk.
@@ -54,7 +57,7 @@ public static ChunkFetchFailure decode(ByteBuf buf) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(streamChunkId, errorString);
+    return Objects.hash(streamChunkId, errorString);
   }
 
   @Override
@@ -68,9 +71,9 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("streamChunkId", streamChunkId)
-      .add("errorString", errorString)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("streamChunkId", streamChunkId)
+      .append("errorString", errorString)
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
index fe54fcc50dc86..7b034d5c2f595 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
@@ -17,8 +17,9 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 /**
  * Request to fetch a sequence of a single chunk of a stream. This will correspond to a single
@@ -64,8 +65,8 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("streamChunkId", streamChunkId)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("streamChunkId", streamChunkId)
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
index d5c9a9b3202fb..eaad143fc3f5f 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -67,7 +70,7 @@ public static ChunkFetchSuccess decode(ByteBuf buf) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(streamChunkId, body());
+    return Objects.hash(streamChunkId, body());
   }
 
   @Override
@@ -81,9 +84,9 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("streamChunkId", streamChunkId)
-      .add("buffer", body())
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("streamChunkId", streamChunkId)
+      .append("buffer", body())
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
index 736059fdd1f57..490915f6de4b3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
@@ -112,4 +112,27 @@ public static int[] decode(ByteBuf buf) {
       return ints;
     }
   }
+
+  /** Long integer arrays are encoded with their length followed by long integers. */
+  public static class LongArrays {
+    public static int encodedLength(long[] longs) {
+      return 4 + 8 * longs.length;
+    }
+
+    public static void encode(ByteBuf buf, long[] longs) {
+      buf.writeInt(longs.length);
+      for (long i : longs) {
+        buf.writeLong(i);
+      }
+    }
+
+    public static long[] decode(ByteBuf buf) {
+      int numLongs = buf.readInt();
+      long[] longs = new long[numLongs];
+      for (int i = 0; i < longs.length; i ++) {
+        longs[i] = buf.readLong();
+      }
+      return longs;
+    }
+  }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java
index 1632fb9e03687..719f6c64c5dee 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -72,8 +75,8 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("body", body())
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("body", body())
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
index 61061903de23f..6e4f5687d16cd 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 /** Response to {@link RpcRequest} for a failed RPC. */
 public final class RpcFailure extends AbstractMessage implements ResponseMessage {
@@ -52,7 +55,7 @@ public static RpcFailure decode(ByteBuf buf) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(requestId, errorString);
+    return Objects.hash(requestId, errorString);
   }
 
   @Override
@@ -66,9 +69,9 @@ public boolean equals(Object other) {
 
   @Override
    public String toString() {
-    return Objects.toStringHelper(this)
-      .add("requestId", requestId)
-      .add("errorString", errorString)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("requestId", requestId)
+      .append("errorString", errorString)
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
index cc1bb95d2d566..f2609ce2dbdb3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -64,7 +67,7 @@ public static RpcRequest decode(ByteBuf buf) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(requestId, body());
+    return Objects.hash(requestId, body());
   }
 
   @Override
@@ -78,9 +81,9 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("requestId", requestId)
-      .add("body", body())
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("requestId", requestId)
+      .append("body", body())
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
index c03291e9c0b23..51b36ea183362 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -64,7 +67,7 @@ public static RpcResponse decode(ByteBuf buf) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(requestId, body());
+    return Objects.hash(requestId, body());
   }
 
   @Override
@@ -78,9 +81,9 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("requestId", requestId)
-      .add("body", body())
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("requestId", requestId)
+      .append("body", body())
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
index d46a263884807..75c6d630b9c33 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 /**
 * Encapsulates a request for a particular chunk of a stream.
@@ -51,7 +54,7 @@ public static StreamChunkId decode(ByteBuf buffer) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(streamId, chunkIndex);
+    return Objects.hash(streamId, chunkIndex);
   }
 
   @Override
@@ -65,9 +68,9 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("streamId", streamId)
-      .add("chunkIndex", chunkIndex)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("streamId", streamId)
+      .append("chunkIndex", chunkIndex)
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java
index 68fcfa7748611..06836f5eea390 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 /**
  * Message indicating an error when transferring a stream.
@@ -54,7 +57,7 @@ public static StreamFailure decode(ByteBuf buf) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(streamId, error);
+    return Objects.hash(streamId, error);
   }
 
   @Override
@@ -68,9 +71,9 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("streamId", streamId)
-      .add("error", error)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("streamId", streamId)
+      .append("error", error)
       .toString();
   }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java
index 1b135af752bd8..3d035e5c94f23 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 /**
  * Request to stream data from the remote end.
@@ -67,8 +70,8 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("streamId", streamId)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("streamId", streamId)
       .toString();
   }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java
index 568108c4fe5e8..f30605ce836fc 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
 
@@ -67,7 +70,7 @@ public static StreamResponse decode(ByteBuf buf) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(byteCount, streamId);
+    return Objects.hash(byteCount, streamId);
   }
 
   @Override
@@ -81,10 +84,10 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("streamId", streamId)
-      .add("byteCount", byteCount)
-      .add("body", body())
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("streamId", streamId)
+      .append("byteCount", byteCount)
+      .append("body", body())
       .toString();
   }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java
index 7d21151e01074..fb50801a51ba3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java
@@ -20,8 +20,9 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 
-import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -99,9 +100,9 @@ public boolean equals(Object other) {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("requestId", requestId)
-      .add("body", body())
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("requestId", requestId)
+      .append("body", body())
       .toString();
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
index 67f64d7962035..1a902a937a176 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -57,7 +57,7 @@ private static class StreamState {
     int curChunk = 0;
 
     // Used to keep track of the number of chunks being transferred and not finished yet.
-    volatile long chunksBeingTransferred = 0L;
+    final AtomicLong chunksBeingTransferred = new AtomicLong(0L);
 
     StreamState(String appId, Iterator<ManagedBuffer> buffers, Channel channel) {
       this.appId = appId;
@@ -117,21 +117,35 @@ public static Pair<Long, Integer> parseStreamChunkId(String streamChunkId) {
 
   @Override
   public void connectionTerminated(Channel channel) {
+    RuntimeException failedToReleaseBufferException = null;
+
     // Close all streams which have been associated with the channel.
     for (Map.Entry<Long, StreamState> entry: streams.entrySet()) {
       StreamState state = entry.getValue();
       if (state.associatedChannel == channel) {
         streams.remove(entry.getKey());
 
-        // Release all remaining buffers.
-        while (state.buffers.hasNext()) {
-          ManagedBuffer buffer = state.buffers.next();
-          if (buffer != null) {
-            buffer.release();
+        try {
+          // Release all remaining buffers.
+          while (state.buffers.hasNext()) {
+            ManagedBuffer buffer = state.buffers.next();
+            if (buffer != null) {
+              buffer.release();
+            }
+          }
+        } catch (RuntimeException e) {
+          if (failedToReleaseBufferException == null) {
+            failedToReleaseBufferException = e;
+          } else {
+            logger.error("Exception trying to release remaining StreamState buffers", e);
           }
         }
       }
     }
+
+    if (failedToReleaseBufferException != null) {
+      throw failedToReleaseBufferException;
+    }
   }
 
   @Override
@@ -153,7 +167,7 @@ public void checkAuthorization(TransportClient client, long streamId) {
   public void chunkBeingSent(long streamId) {
     StreamState streamState = streams.get(streamId);
     if (streamState != null) {
-      streamState.chunksBeingTransferred++;
+      streamState.chunksBeingTransferred.incrementAndGet();
     }
 
   }
@@ -167,7 +181,7 @@ public void streamBeingSent(String streamId) {
   public void chunkSent(long streamId) {
     StreamState streamState = streams.get(streamId);
     if (streamState != null) {
-      streamState.chunksBeingTransferred--;
+      streamState.chunksBeingTransferred.decrementAndGet();
     }
   }
 
@@ -180,7 +194,7 @@ public void streamSent(String streamId) {
   public long chunksBeingTransferred() {
     long sum = 0L;
     for (StreamState streamState: streams.values()) {
-      sum += streamState.chunksBeingTransferred;
+      sum += streamState.chunksBeingTransferred.get();
     }
     return sum;
   }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
index 8396e691e9db1..f0ff9f57e7be5 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -100,9 +100,10 @@ public int getPort() {
   private void init(String hostToBind, int portToBind) {
 
     IOMode ioMode = IOMode.valueOf(conf.ioMode());
-    EventLoopGroup bossGroup =
-      NettyUtils.createEventLoop(ioMode, conf.serverThreads(), conf.getModuleName() + "-server");
-    EventLoopGroup workerGroup = bossGroup;
+    EventLoopGroup bossGroup = NettyUtils.createEventLoop(ioMode, 1,
+      conf.getModuleName() + "-boss");
+    EventLoopGroup workerGroup =  NettyUtils.createEventLoop(ioMode, conf.serverThreads(),
+      conf.getModuleName() + "-server");
 
     bootstrap = new ServerBootstrap()
       .group(bossGroup, workerGroup)
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/ByteArrayReadableChannel.java b/common/network-common/src/main/java/org/apache/spark/network/util/ByteArrayReadableChannel.java
index 25d103d0e316f..fe461d0b39862 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/ByteArrayReadableChannel.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/ByteArrayReadableChannel.java
@@ -19,23 +19,27 @@
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.channels.ClosedChannelException;
 import java.nio.channels.ReadableByteChannel;
 
 import io.netty.buffer.ByteBuf;
 
 public class ByteArrayReadableChannel implements ReadableByteChannel {
   private ByteBuf data;
+  private boolean closed;
 
-  public int readableBytes() {
-    return data.readableBytes();
-  }
-
-  public void feedData(ByteBuf buf) {
+  public void feedData(ByteBuf buf) throws ClosedChannelException {
+    if (closed) {
+      throw new ClosedChannelException();
+    }
     data = buf;
   }
 
   @Override
   public int read(ByteBuffer dst) throws IOException {
+    if (closed) {
+      throw new ClosedChannelException();
+    }
     int totalRead = 0;
     while (data.readableBytes() > 0 && dst.remaining() > 0) {
       int bytesToRead = Math.min(data.readableBytes(), dst.remaining());
@@ -43,20 +47,16 @@ public int read(ByteBuffer dst) throws IOException {
       totalRead += bytesToRead;
     }
 
-    if (data.readableBytes() == 0) {
-      data.release();
-    }
-
     return totalRead;
   }
 
   @Override
-  public void close() throws IOException {
+  public void close() {
+    closed = true;
   }
 
   @Override
   public boolean isOpen() {
-    return true;
+    return !closed;
   }
-
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index 589dfcbefb6ea..cc0f2919568ac 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -108,8 +108,12 @@ public int numConnectionsPerPeer() {
     return conf.getInt(SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY, 1);
   }
 
-  /** Requested maximum length of the queue of incoming connections. Default is 64. */
-  public int backLog() { return conf.getInt(SPARK_NETWORK_IO_BACKLOG_KEY, 64); }
+  /**
+   * Requested maximum length of the queue of incoming connections. If  &lt; 1,
+   * the default Netty value of {@link io.netty.util.NetUtil#SOMAXCONN} will be used.
+   * Default to -1.
+   */
+  public int backLog() { return conf.getInt(SPARK_NETWORK_IO_BACKLOG_KEY, -1); }
 
   /** Number of threads used in the server thread pool. Default to 0, which is 2x#cores. */
   public int serverThreads() { return conf.getInt(SPARK_NETWORK_IO_SERVERTHREADS_KEY, 0); }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
index 1980361a15523..cef0e415aa40a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
@@ -184,8 +184,12 @@ private ByteBuf decodeNext() {
       return null;
     }
 
-    // Reset buf and size for next frame.
+    return consumeCurrentFrameBuf();
+  }
+
+  private ByteBuf consumeCurrentFrameBuf() {
     ByteBuf frame = frameBuf;
+    // Reset buf and size for next frame.
     frameBuf = null;
     consolidatedFrameBufSize = 0;
     consolidatedNumComponents = 0;
@@ -215,13 +219,9 @@ private ByteBuf nextBufferForFrame(int bytesToRead) {
 
   @Override
   public void channelInactive(ChannelHandlerContext ctx) throws Exception {
-    for (ByteBuf b : buffers) {
-      b.release();
-    }
     if (interceptor != null) {
       interceptor.channelInactive();
     }
-    frameLenBuf.release();
     super.channelInactive(ctx);
   }
 
@@ -233,6 +233,24 @@ public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws E
     super.exceptionCaught(ctx, cause);
   }
 
+  @Override
+  public void handlerRemoved(ChannelHandlerContext ctx) throws Exception {
+    // Release all buffers that are still in our ownership.
+    // Doing this in handlerRemoved(...) guarantees that this will happen in all cases:
+    //     - When the Channel becomes inactive
+    //     - When the decoder is removed from the ChannelPipeline
+    for (ByteBuf b : buffers) {
+      b.release();
+    }
+    buffers.clear();
+    frameLenBuf.release();
+    ByteBuf frame = consumeCurrentFrameBuf();
+    if (frame != null) {
+      frame.release();
+    }
+    super.handlerRemoved(ctx);
+  }
+
   public void setInterceptor(Interceptor interceptor) {
     Preconditions.checkState(this.interceptor == null, "Already have an interceptor.");
     this.interceptor = interceptor;
diff --git a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
index 498dc51cdc81a..916c140621671 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
@@ -260,14 +260,14 @@ public void onFailure(Throwable e) {
   @Test
   public void singleRPC() throws Exception {
     RpcResult res = sendRPC("hello/Aaron");
-    assertEquals(res.successMessages, Sets.newHashSet("Hello, Aaron!"));
+    assertEquals(Sets.newHashSet("Hello, Aaron!"), res.successMessages);
     assertTrue(res.errorMessages.isEmpty());
   }
 
   @Test
   public void doubleRPC() throws Exception {
     RpcResult res = sendRPC("hello/Aaron", "hello/Reynold");
-    assertEquals(res.successMessages, Sets.newHashSet("Hello, Aaron!", "Hello, Reynold!"));
+    assertEquals(Sets.newHashSet("Hello, Aaron!", "Hello, Reynold!"), res.successMessages);
     assertTrue(res.errorMessages.isEmpty());
   }
 
@@ -295,7 +295,7 @@ public void doubleTrouble() throws Exception {
   @Test
   public void sendSuccessAndFailure() throws Exception {
     RpcResult res = sendRPC("hello/Bob", "throw error/the", "hello/Builder", "return error/!");
-    assertEquals(res.successMessages, Sets.newHashSet("Hello, Bob!", "Hello, Builder!"));
+    assertEquals(Sets.newHashSet("Hello, Bob!", "Hello, Builder!"), res.successMessages);
     assertErrorsContain(res.errorMessages, Sets.newHashSet("Thrown: the", "Returned: !"));
   }
 
diff --git a/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java b/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
index 2aec4a33bbe43..9b76981c31c57 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
@@ -217,4 +217,11 @@ public Iterable<Map.Entry<String, String>> getAll() {
       assertFalse(c1.isActive());
     }
   }
+
+  @Test(expected = IOException.class)
+  public void closeFactoryBeforeCreateClient() throws IOException, InterruptedException {
+    TransportClientFactory factory = context.createClientFactory();
+    factory.close();
+    factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
new file mode 100644
index 0000000000000..6b2186f73cd0c
--- /dev/null
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.crypto;
+
+import javax.crypto.spec.SecretKeySpec;
+import java.io.IOException;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.embedded.EmbeddedChannel;
+import org.apache.commons.crypto.stream.CryptoInputStream;
+import org.apache.commons.crypto.stream.CryptoOutputStream;
+import org.apache.spark.network.util.MapConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+import org.hamcrest.CoreMatchers;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class TransportCipherSuite {
+
+  @Test
+  public void testBufferNotLeaksOnInternalError() throws IOException {
+    String algorithm = "TestAlgorithm";
+    TransportConf conf = new TransportConf("Test", MapConfigProvider.EMPTY);
+    TransportCipher cipher = new TransportCipher(conf.cryptoConf(), conf.cipherTransformation(),
+      new SecretKeySpec(new byte[256], algorithm), new byte[0], new byte[0]) {
+
+      @Override
+      CryptoOutputStream createOutputStream(WritableByteChannel ch) {
+        return null;
+      }
+
+      @Override
+      CryptoInputStream createInputStream(ReadableByteChannel ch) throws IOException {
+        CryptoInputStream mockInputStream = mock(CryptoInputStream.class);
+        when(mockInputStream.read(any(byte[].class), anyInt(), anyInt()))
+          .thenThrow(new InternalError());
+        return mockInputStream;
+      }
+    };
+
+    EmbeddedChannel channel = new EmbeddedChannel();
+    cipher.addToChannel(channel);
+
+    ByteBuf buffer = Unpooled.wrappedBuffer(new byte[] { 1, 2 });
+    ByteBuf buffer2 = Unpooled.wrappedBuffer(new byte[] { 1, 2 });
+
+    try {
+      channel.writeInbound(buffer);
+      fail("Should have raised InternalError");
+    } catch (InternalError expected) {
+      // expected
+      assertEquals(0, buffer.refCnt());
+    }
+
+    try {
+      channel.writeInbound(buffer2);
+      fail("Should have raised an exception");
+    } catch (Throwable expected) {
+      assertThat(expected, CoreMatchers.instanceOf(IOException.class));
+      assertEquals(0, buffer2.refCnt());
+    }
+
+    // Simulate closing the connection
+    assertFalse(channel.finish());
+  }
+}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
index fb3503b783e54..45e1836da641f 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
@@ -18,6 +18,7 @@
 package org.apache.spark.network.server;
 
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
 import io.netty.channel.Channel;
@@ -96,4 +97,42 @@ public void managedBuffersAreFreedWhenConnectionIsClosed() {
     Mockito.verify(buffer2, Mockito.times(1)).release();
     Assert.assertEquals(0, manager.numStreamStates());
   }
+
+  @Test
+  public void streamStatesAreFreedWhenConnectionIsClosedEvenIfBufferIteratorThrowsException() {
+    OneForOneStreamManager manager = new OneForOneStreamManager();
+
+    Iterator<ManagedBuffer> buffers = Mockito.mock(Iterator.class);
+    Mockito.when(buffers.hasNext()).thenReturn(true);
+    Mockito.when(buffers.next()).thenThrow(RuntimeException.class);
+
+    ManagedBuffer mockManagedBuffer = Mockito.mock(ManagedBuffer.class);
+
+    Iterator<ManagedBuffer> buffers2 = Mockito.mock(Iterator.class);
+    Mockito.when(buffers2.hasNext()).thenReturn(true).thenReturn(true);
+    Mockito.when(buffers2.next()).thenReturn(mockManagedBuffer).thenThrow(RuntimeException.class);
+
+    Channel dummyChannel = Mockito.mock(Channel.class, Mockito.RETURNS_SMART_NULLS);
+    manager.registerStream("appId", buffers, dummyChannel);
+    manager.registerStream("appId", buffers2, dummyChannel);
+
+    Assert.assertEquals(2, manager.numStreamStates());
+
+    try {
+      manager.connectionTerminated(dummyChannel);
+      Assert.fail("connectionTerminated should throw exception when fails to release all buffers");
+
+    } catch (RuntimeException e) {
+
+      Mockito.verify(buffers, Mockito.times(1)).hasNext();
+      Mockito.verify(buffers, Mockito.times(1)).next();
+
+      Mockito.verify(buffers2, Mockito.times(2)).hasNext();
+      Mockito.verify(buffers2, Mockito.times(2)).next();
+
+      Mockito.verify(mockManagedBuffer, Mockito.times(1)).release();
+
+      Assert.assertEquals(0, manager.numStreamStates());
+    }
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
index 037e5cf7e5222..8c05288fb4111 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
@@ -102,11 +102,15 @@ protected void handleMessage(
           FetchShuffleBlocks msg = (FetchShuffleBlocks) msgObj;
           checkAuth(client, msg.appId);
           numBlockIds = 0;
-          for (int[] ids: msg.reduceIds) {
-            numBlockIds += ids.length;
+          if (msg.batchFetchEnabled) {
+            numBlockIds = msg.mapIds.length;
+          } else {
+            for (int[] ids: msg.reduceIds) {
+              numBlockIds += ids.length;
+            }
           }
           streamId = streamManager.registerStream(client.getClientId(),
-            new ManagedBufferIterator(msg, numBlockIds), client.getChannel());
+            new ShuffleManagedBufferIterator(msg), client.getChannel());
         } else {
           // For the compatibility with the old version, still keep the support for OpenBlocks.
           OpenBlocks msg = (OpenBlocks) msgObj;
@@ -146,6 +150,12 @@ protected void handleMessage(
       int numRemovedBlocks = blockManager.removeBlocks(msg.appId, msg.execId, msg.blockIds);
       callback.onSuccess(new BlocksRemoved(numRemovedBlocks).toByteBuffer());
 
+    } else if (msgObj instanceof GetLocalDirsForExecutors) {
+      GetLocalDirsForExecutors msg = (GetLocalDirsForExecutors) msgObj;
+      checkAuth(client, msg.appId);
+      Map<String, String[]> localDirs = blockManager.getLocalDirs(msg.appId, msg.execIds);
+      callback.onSuccess(new LocalDirsForExecutors(localDirs).toByteBuffer());
+
     } else {
       throw new UnsupportedOperationException("Unexpected message: " + msgObj);
     }
@@ -299,21 +309,6 @@ private int[] shuffleMapIdAndReduceIds(String[] blockIds, int shuffleId) {
       return mapIdAndReduceIds;
     }
 
-    ManagedBufferIterator(FetchShuffleBlocks msg, int numBlockIds) {
-      final int[] mapIdAndReduceIds = new int[2 * numBlockIds];
-      int idx = 0;
-      for (int i = 0; i < msg.mapIds.length; i++) {
-        for (int reduceId : msg.reduceIds[i]) {
-          mapIdAndReduceIds[idx++] = msg.mapIds[i];
-          mapIdAndReduceIds[idx++] = reduceId;
-        }
-      }
-      assert(idx == 2 * numBlockIds);
-      size = mapIdAndReduceIds.length;
-      blockDataForIndexFn = index -> blockManager.getBlockData(msg.appId, msg.execId,
-        msg.shuffleId, mapIdAndReduceIds[index], mapIdAndReduceIds[index + 1]);
-    }
-
     @Override
     public boolean hasNext() {
       return index < size;
@@ -328,6 +323,59 @@ public ManagedBuffer next() {
     }
   }
 
+  private class ShuffleManagedBufferIterator implements Iterator<ManagedBuffer> {
+
+    private int mapIdx = 0;
+    private int reduceIdx = 0;
+
+    private final String appId;
+    private final String execId;
+    private final int shuffleId;
+    private final long[] mapIds;
+    private final int[][] reduceIds;
+    private final boolean batchFetchEnabled;
+
+    ShuffleManagedBufferIterator(FetchShuffleBlocks msg) {
+      appId = msg.appId;
+      execId = msg.execId;
+      shuffleId = msg.shuffleId;
+      mapIds = msg.mapIds;
+      reduceIds = msg.reduceIds;
+      batchFetchEnabled = msg.batchFetchEnabled;
+    }
+
+    @Override
+    public boolean hasNext() {
+      // mapIds.length must equal to reduceIds.length, and the passed in FetchShuffleBlocks
+      // must have non-empty mapIds and reduceIds, see the checking logic in
+      // OneForOneBlockFetcher.
+      assert(mapIds.length != 0 && mapIds.length == reduceIds.length);
+      return mapIdx < mapIds.length && reduceIdx < reduceIds[mapIdx].length;
+    }
+
+    @Override
+    public ManagedBuffer next() {
+      ManagedBuffer block;
+      if (!batchFetchEnabled) {
+        block = blockManager.getBlockData(
+          appId, execId, shuffleId, mapIds[mapIdx], reduceIds[mapIdx][reduceIdx]);
+        if (reduceIdx < reduceIds[mapIdx].length - 1) {
+          reduceIdx += 1;
+        } else {
+          reduceIdx = 0;
+          mapIdx += 1;
+        }
+      } else {
+        assert(reduceIds[mapIdx].length == 2);
+        block = blockManager.getContinuousBlocksData(appId, execId, shuffleId, mapIds[mapIdx],
+          reduceIds[mapIdx][0], reduceIds[mapIdx][1]);
+        mapIdx += 1;
+      }
+      metrics.blockTransferRateBytes.mark(block != null ? block.size() : 0);
+      return block;
+    }
+  }
+
   @Override
   public void channelActive(TransportClient client) {
     metrics.activeConnections.inc();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
index b8e52c8621fb6..d6185f089d3c0 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
@@ -21,20 +21,21 @@
 import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Future;
 
 import com.codahale.metrics.MetricSet;
 import com.google.common.collect.Lists;
 import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.client.TransportClientFactory;
 import org.apache.spark.network.shuffle.protocol.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.spark.network.TransportContext;
-import org.apache.spark.network.client.TransportClient;
-import org.apache.spark.network.client.TransportClientBootstrap;
-import org.apache.spark.network.client.TransportClientFactory;
 import org.apache.spark.network.crypto.AuthClientBootstrap;
 import org.apache.spark.network.sasl.SecretKeyHolder;
 import org.apache.spark.network.server.NoOpRpcHandler;
@@ -53,7 +54,7 @@ public class ExternalBlockStoreClient extends BlockStoreClient {
   private final SecretKeyHolder secretKeyHolder;
   private final long registrationTimeoutMs;
 
-  protected TransportClientFactory clientFactory;
+  protected volatile TransportClientFactory clientFactory;
   protected String appId;
 
   /**
@@ -102,9 +103,14 @@ public void fetchBlocks(
     try {
       RetryingBlockFetcher.BlockFetchStarter blockFetchStarter =
           (blockIds1, listener1) -> {
-            TransportClient client = clientFactory.createClient(host, port);
-            new OneForOneBlockFetcher(client, appId, execId,
-              blockIds1, listener1, conf, downloadFileManager).start();
+            // Unless this client is closed.
+            if (clientFactory != null) {
+              TransportClient client = clientFactory.createClient(host, port);
+              new OneForOneBlockFetcher(client, appId, execId,
+                blockIds1, listener1, conf, downloadFileManager).start();
+            } else {
+              logger.info("This clientFactory was closed. Skipping further block fetch retries.");
+            }
           };
 
       int maxRetries = conf.maxIORetries();
@@ -177,7 +183,7 @@ public void onSuccess(ByteBuffer response) {
       @Override
       public void onFailure(Throwable e) {
         logger.warn("Error trying to remove RDD blocks " + Arrays.toString(blockIds) +
-            " via external shuffle service from executor: " + execId, e);
+          " via external shuffle service from executor: " + execId, e);
         numRemovedBlocksFuture.complete(0);
         client.close();
       }
@@ -185,6 +191,46 @@ public void onFailure(Throwable e) {
     return numRemovedBlocksFuture;
   }
 
+  public void getHostLocalDirs(
+      String host,
+      int port,
+      String[] execIds,
+      CompletableFuture<Map<String, String[]>> hostLocalDirsCompletable) {
+    checkInit();
+    GetLocalDirsForExecutors getLocalDirsMessage = new GetLocalDirsForExecutors(appId, execIds);
+    try {
+      TransportClient client = clientFactory.createClient(host, port);
+      client.sendRpc(getLocalDirsMessage.toByteBuffer(), new RpcResponseCallback() {
+        @Override
+        public void onSuccess(ByteBuffer response) {
+          try {
+            BlockTransferMessage msgObj = BlockTransferMessage.Decoder.fromByteBuffer(response);
+            hostLocalDirsCompletable.complete(
+              ((LocalDirsForExecutors) msgObj).getLocalDirsByExec());
+          } catch (Throwable t) {
+            logger.warn("Error trying to get the host local dirs for " +
+              Arrays.toString(getLocalDirsMessage.execIds) + " via external shuffle service",
+              t.getCause());
+            hostLocalDirsCompletable.completeExceptionally(t);
+          } finally {
+            client.close();
+          }
+        }
+
+        @Override
+        public void onFailure(Throwable t) {
+          logger.warn("Error trying to get the host local dirs for " +
+            Arrays.toString(getLocalDirsMessage.execIds) + " via external shuffle service",
+            t.getCause());
+          hostLocalDirsCompletable.completeExceptionally(t);
+          client.close();
+        }
+      });
+    } catch (IOException | InterruptedException e) {
+      hostLocalDirsCompletable.completeExceptionally(e);
+    }
+  }
+
   @Override
   public void close() {
     checkInit();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index 50f16fc700f12..ba1a17bf7e5ea 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -25,12 +25,15 @@
 import java.util.concurrent.Executor;
 import java.util.concurrent.Executors;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
+import org.apache.commons.lang3.tuple.Pair;
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Objects;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
@@ -165,21 +168,34 @@ public void registerExecutor(
   }
 
   /**
-   * Obtains a FileSegmentManagedBuffer from (shuffleId, mapId, reduceId). We make assumptions
-   * about how the hash and sort based shuffles store their data.
+   * Obtains a FileSegmentManagedBuffer from a single block (shuffleId, mapId, reduceId).
    */
   public ManagedBuffer getBlockData(
       String appId,
       String execId,
       int shuffleId,
-      int mapId,
+      long mapId,
       int reduceId) {
+    return getContinuousBlocksData(appId, execId, shuffleId, mapId, reduceId, reduceId + 1);
+  }
+
+  /**
+   * Obtains a FileSegmentManagedBuffer from (shuffleId, mapId, [startReduceId, endReduceId)).
+   * We make assumptions about how the hash and sort based shuffles store their data.
+   */
+  public ManagedBuffer getContinuousBlocksData(
+      String appId,
+      String execId,
+      int shuffleId,
+      long mapId,
+      int startReduceId,
+      int endReduceId) {
     ExecutorShuffleInfo executor = executors.get(new AppExecId(appId, execId));
     if (executor == null) {
       throw new RuntimeException(
         String.format("Executor is not registered (appId=%s, execId=%s)", appId, execId));
     }
-    return getSortBasedShuffleBlockData(executor, shuffleId, mapId, reduceId);
+    return getSortBasedShuffleBlockData(executor, shuffleId, mapId, startReduceId, endReduceId);
   }
 
   public ManagedBuffer getRddBlockData(
@@ -296,13 +312,14 @@ private void deleteNonShuffleServiceServedFiles(String[] dirs) {
    * and the block id format is from ShuffleDataBlockId and ShuffleIndexBlockId.
    */
   private ManagedBuffer getSortBasedShuffleBlockData(
-    ExecutorShuffleInfo executor, int shuffleId, int mapId, int reduceId) {
+    ExecutorShuffleInfo executor, int shuffleId, long mapId, int startReduceId, int endReduceId) {
     File indexFile = ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir,
       "shuffle_" + shuffleId + "_" + mapId + "_0.index");
 
     try {
       ShuffleIndexInformation shuffleIndexInformation = shuffleIndexCache.get(indexFile);
-      ShuffleIndexRecord shuffleIndexRecord = shuffleIndexInformation.getIndex(reduceId);
+      ShuffleIndexRecord shuffleIndexRecord = shuffleIndexInformation.getIndex(
+        startReduceId, endReduceId);
       return new FileSegmentManagedBuffer(
         conf,
         ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir,
@@ -355,6 +372,19 @@ public int removeBlocks(String appId, String execId, String[] blockIds) {
     return numRemovedBlocks;
   }
 
+  public Map<String, String[]> getLocalDirs(String appId, String[] execIds) {
+    return Arrays.stream(execIds)
+      .map(exec -> {
+        ExecutorShuffleInfo info = executors.get(new AppExecId(appId, exec));
+        if (info == null) {
+          throw new RuntimeException(
+            String.format("Executor is not registered (appId=%s, execId=%s)", appId, exec));
+        }
+        return Pair.of(exec, info.localDirs);
+      })
+      .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
+  }
+
   /** Simply encodes an executor's full ID, which is appId + execId. */
   public static class AppExecId {
     public final String appId;
@@ -372,19 +402,19 @@ public boolean equals(Object o) {
       if (o == null || getClass() != o.getClass()) return false;
 
       AppExecId appExecId = (AppExecId) o;
-      return Objects.equal(appId, appExecId.appId) && Objects.equal(execId, appExecId.execId);
+      return Objects.equals(appId, appExecId.appId) && Objects.equals(execId, appExecId.execId);
     }
 
     @Override
     public int hashCode() {
-      return Objects.hashCode(appId, execId);
+      return Objects.hash(appId, execId);
     }
 
     @Override
     public String toString() {
-      return Objects.toStringHelper(this)
-        .add("appId", appId)
-        .add("execId", execId)
+      return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+        .append("appId", appId)
+        .append("execId", execId)
         .toString();
     }
   }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index cc11e92067375..ec2e3dce661d9 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -24,6 +24,7 @@
 import java.util.HashMap;
 
 import com.google.common.primitives.Ints;
+import com.google.common.primitives.Longs;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -111,40 +112,49 @@ private boolean isShuffleBlocks(String[] blockIds) {
    */
   private FetchShuffleBlocks createFetchShuffleBlocksMsg(
       String appId, String execId, String[] blockIds) {
-    int shuffleId = splitBlockId(blockIds[0])[0];
-    HashMap<Integer, ArrayList<Integer>> mapIdToReduceIds = new HashMap<>();
+    String[] firstBlock = splitBlockId(blockIds[0]);
+    int shuffleId = Integer.parseInt(firstBlock[1]);
+    boolean batchFetchEnabled = firstBlock.length == 5;
+
+    HashMap<Long, ArrayList<Integer>> mapIdToReduceIds = new HashMap<>();
     for (String blockId : blockIds) {
-      int[] blockIdParts = splitBlockId(blockId);
-      if (blockIdParts[0] != shuffleId) {
+      String[] blockIdParts = splitBlockId(blockId);
+      if (Integer.parseInt(blockIdParts[1]) != shuffleId) {
         throw new IllegalArgumentException("Expected shuffleId=" + shuffleId +
           ", got:" + blockId);
       }
-      int mapId = blockIdParts[1];
+      long mapId = Long.parseLong(blockIdParts[2]);
       if (!mapIdToReduceIds.containsKey(mapId)) {
         mapIdToReduceIds.put(mapId, new ArrayList<>());
       }
-      mapIdToReduceIds.get(mapId).add(blockIdParts[2]);
+      mapIdToReduceIds.get(mapId).add(Integer.parseInt(blockIdParts[3]));
+      if (batchFetchEnabled) {
+        // When we read continuous shuffle blocks in batch, we will reuse reduceIds in
+        // FetchShuffleBlocks to store the start and end reduce id for range
+        // [startReduceId, endReduceId).
+        assert(blockIdParts.length == 5);
+        mapIdToReduceIds.get(mapId).add(Integer.parseInt(blockIdParts[4]));
+      }
     }
-    int[] mapIds = Ints.toArray(mapIdToReduceIds.keySet());
+    long[] mapIds = Longs.toArray(mapIdToReduceIds.keySet());
     int[][] reduceIdArr = new int[mapIds.length][];
     for (int i = 0; i < mapIds.length; i++) {
       reduceIdArr[i] = Ints.toArray(mapIdToReduceIds.get(mapIds[i]));
     }
-    return new FetchShuffleBlocks(appId, execId, shuffleId, mapIds, reduceIdArr);
+    return new FetchShuffleBlocks(
+      appId, execId, shuffleId, mapIds, reduceIdArr, batchFetchEnabled);
   }
 
-  /** Split the shuffleBlockId and return shuffleId, mapId and reduceId. */
-  private int[] splitBlockId(String blockId) {
+  /** Split the shuffleBlockId and return shuffleId, mapId and reduceIds. */
+  private String[] splitBlockId(String blockId) {
     String[] blockIdParts = blockId.split("_");
-    if (blockIdParts.length != 4 || !blockIdParts[0].equals("shuffle")) {
+    // For batch block id, the format contains shuffleId, mapId, begin reduceId, end reduceId.
+    // For single block id, the format contains shuffleId, mapId, educeId.
+    if (blockIdParts.length < 4 || blockIdParts.length > 5 || !blockIdParts[0].equals("shuffle")) {
       throw new IllegalArgumentException(
         "Unexpected shuffle block id format: " + blockId);
     }
-    return new int[] {
-      Integer.parseInt(blockIdParts[1]),
-      Integer.parseInt(blockIdParts[2]),
-      Integer.parseInt(blockIdParts[3])
-    };
+    return blockIdParts;
   }
 
   /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java
index 371149bef3974..b65aacfcc4b9e 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleIndexInformation.java
@@ -54,8 +54,15 @@ public int getSize() {
    * Get index offset for a particular reducer.
    */
   public ShuffleIndexRecord getIndex(int reduceId) {
-    long offset = offsets.get(reduceId);
-    long nextOffset = offsets.get(reduceId + 1);
+    return getIndex(reduceId, reduceId + 1);
+  }
+
+  /**
+   * Get index offset for the reducer range of [startReduceId, endReduceId).
+   */
+  public ShuffleIndexRecord getIndex(int startReduceId, int endReduceId) {
+    long offset = offsets.get(startReduceId);
+    long nextOffset = offsets.get(endReduceId);
     return new ShuffleIndexRecord(offset, nextOffset - offset);
   }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
index 41dd55847ebdb..89d8dfe8716b8 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
@@ -47,7 +47,7 @@ public abstract class BlockTransferMessage implements Encodable {
   public enum Type {
     OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3), REGISTER_DRIVER(4),
     HEARTBEAT(5), UPLOAD_BLOCK_STREAM(6), REMOVE_BLOCKS(7), BLOCKS_REMOVED(8),
-    FETCH_SHUFFLE_BLOCKS(9);
+    FETCH_SHUFFLE_BLOCKS(9), GET_LOCAL_DIRS_FOR_EXECUTORS(10), LOCAL_DIRS_FOR_EXECUTORS(11);
 
     private final byte id;
 
@@ -76,6 +76,8 @@ public static BlockTransferMessage fromByteBuffer(ByteBuffer msg) {
         case 7: return RemoveBlocks.decode(buf);
         case 8: return BlocksRemoved.decode(buf);
         case 9: return FetchShuffleBlocks.decode(buf);
+        case 10: return GetLocalDirsForExecutors.decode(buf);
+        case 11: return LocalDirsForExecutors.decode(buf);
         default: throw new IllegalArgumentException("Unknown message type: " + type);
       }
     }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java
index 3f04443871b68..a4d6035df807c 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.shuffle.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 // Needed by ScalaDoc. See SPARK-7726
 import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
@@ -41,8 +44,8 @@ public int hashCode() {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("numRemovedBlocks", numRemovedBlocks)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("numRemovedBlocks", numRemovedBlocks)
       .toString();
   }
 
@@ -50,7 +53,7 @@ public String toString() {
   public boolean equals(Object other) {
     if (other != null && other instanceof BlocksRemoved) {
       BlocksRemoved o = (BlocksRemoved) other;
-      return Objects.equal(numRemovedBlocks, o.numRemovedBlocks);
+      return numRemovedBlocks == o.numRemovedBlocks;
     }
     return false;
   }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
index 93758bdc58fb0..b4e7bc409d3b8 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
@@ -18,11 +18,13 @@
 package org.apache.spark.network.shuffle.protocol;
 
 import java.util.Arrays;
+import java.util.Objects;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
-import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.protocol.Encodable;
 import org.apache.spark.network.protocol.Encoders;
@@ -48,15 +50,15 @@ public ExecutorShuffleInfo(
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(subDirsPerLocalDir, shuffleManager) * 41 + Arrays.hashCode(localDirs);
+    return Objects.hash(subDirsPerLocalDir, shuffleManager) * 41 + Arrays.hashCode(localDirs);
   }
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("localDirs", Arrays.toString(localDirs))
-      .add("subDirsPerLocalDir", subDirsPerLocalDir)
-      .add("shuffleManager", shuffleManager)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("localDirs", Arrays.toString(localDirs))
+      .append("subDirsPerLocalDir", subDirsPerLocalDir)
+      .append("shuffleManager", shuffleManager)
       .toString();
   }
 
@@ -65,8 +67,8 @@ public boolean equals(Object other) {
     if (other != null && other instanceof ExecutorShuffleInfo) {
       ExecutorShuffleInfo o = (ExecutorShuffleInfo) other;
       return Arrays.equals(localDirs, o.localDirs)
-        && Objects.equal(subDirsPerLocalDir, o.subDirsPerLocalDir)
-        && Objects.equal(shuffleManager, o.shuffleManager);
+        && subDirsPerLocalDir == o.subDirsPerLocalDir
+        && Objects.equals(shuffleManager, o.shuffleManager);
     }
     return false;
   }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java
index 466eeb3e048a8..98057d58f7ab5 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java
@@ -19,8 +19,9 @@
 
 import java.util.Arrays;
 
-import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.protocol.Encoders;
 
@@ -34,21 +35,33 @@ public class FetchShuffleBlocks extends BlockTransferMessage {
   public final int shuffleId;
   // The length of mapIds must equal to reduceIds.size(), for the i-th mapId in mapIds,
   // it corresponds to the i-th int[] in reduceIds, which contains all reduce id for this map id.
-  public final int[] mapIds;
+  public final long[] mapIds;
+  // When batchFetchEnabled=true, reduceIds[i] contains 2 elements: startReduceId (inclusive) and
+  // endReduceId (exclusive) for the mapper mapIds[i].
+  // When batchFetchEnabled=false, reduceIds[i] contains all the reduce IDs that mapper mapIds[i]
+  // needs to fetch.
   public final int[][] reduceIds;
+  public final boolean batchFetchEnabled;
 
   public FetchShuffleBlocks(
       String appId,
       String execId,
       int shuffleId,
-      int[] mapIds,
-      int[][] reduceIds) {
+      long[] mapIds,
+      int[][] reduceIds,
+      boolean batchFetchEnabled) {
     this.appId = appId;
     this.execId = execId;
     this.shuffleId = shuffleId;
     this.mapIds = mapIds;
     this.reduceIds = reduceIds;
     assert(mapIds.length == reduceIds.length);
+    this.batchFetchEnabled = batchFetchEnabled;
+    if (batchFetchEnabled) {
+      for (int[] ids: reduceIds) {
+        assert(ids.length == 2);
+      }
+    }
   }
 
   @Override
@@ -56,12 +69,13 @@ public FetchShuffleBlocks(
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("appId", appId)
-      .add("execId", execId)
-      .add("shuffleId", shuffleId)
-      .add("mapIds", Arrays.toString(mapIds))
-      .add("reduceIds", Arrays.deepToString(reduceIds))
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("appId", appId)
+      .append("execId", execId)
+      .append("shuffleId", shuffleId)
+      .append("mapIds", Arrays.toString(mapIds))
+      .append("reduceIds", Arrays.deepToString(reduceIds))
+      .append("batchFetchEnabled", batchFetchEnabled)
       .toString();
   }
 
@@ -73,6 +87,7 @@ public boolean equals(Object o) {
     FetchShuffleBlocks that = (FetchShuffleBlocks) o;
 
     if (shuffleId != that.shuffleId) return false;
+    if (batchFetchEnabled != that.batchFetchEnabled) return false;
     if (!appId.equals(that.appId)) return false;
     if (!execId.equals(that.execId)) return false;
     if (!Arrays.equals(mapIds, that.mapIds)) return false;
@@ -86,6 +101,7 @@ public int hashCode() {
     result = 31 * result + shuffleId;
     result = 31 * result + Arrays.hashCode(mapIds);
     result = 31 * result + Arrays.deepHashCode(reduceIds);
+    result = 31 * result + (batchFetchEnabled ? 1 : 0);
     return result;
   }
 
@@ -98,9 +114,10 @@ public int encodedLength() {
     return Encoders.Strings.encodedLength(appId)
       + Encoders.Strings.encodedLength(execId)
       + 4 /* encoded length of shuffleId */
-      + Encoders.IntArrays.encodedLength(mapIds)
+      + Encoders.LongArrays.encodedLength(mapIds)
       + 4 /* encoded length of reduceIds.size() */
-      + encodedLengthOfReduceIds;
+      + encodedLengthOfReduceIds
+      + 1; /* encoded length of batchFetchEnabled */
   }
 
   @Override
@@ -108,23 +125,25 @@ public void encode(ByteBuf buf) {
     Encoders.Strings.encode(buf, appId);
     Encoders.Strings.encode(buf, execId);
     buf.writeInt(shuffleId);
-    Encoders.IntArrays.encode(buf, mapIds);
+    Encoders.LongArrays.encode(buf, mapIds);
     buf.writeInt(reduceIds.length);
     for (int[] ids: reduceIds) {
       Encoders.IntArrays.encode(buf, ids);
     }
+    buf.writeBoolean(batchFetchEnabled);
   }
 
   public static FetchShuffleBlocks decode(ByteBuf buf) {
     String appId = Encoders.Strings.decode(buf);
     String execId = Encoders.Strings.decode(buf);
     int shuffleId = buf.readInt();
-    int[] mapIds = Encoders.IntArrays.decode(buf);
+    long[] mapIds = Encoders.LongArrays.decode(buf);
     int reduceIdsSize = buf.readInt();
     int[][] reduceIds = new int[reduceIdsSize][];
     for (int i = 0; i < reduceIdsSize; i++) {
       reduceIds[i] = Encoders.IntArrays.decode(buf);
     }
-    return new FetchShuffleBlocks(appId, execId, shuffleId, mapIds, reduceIds);
+    boolean batchFetchEnabled = buf.readBoolean();
+    return new FetchShuffleBlocks(appId, execId, shuffleId, mapIds, reduceIds, batchFetchEnabled);
   }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java
new file mode 100644
index 0000000000000..47f617c5e0a0a
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
+
+import org.apache.spark.network.protocol.Encoders;
+
+// Needed by ScalaDoc. See SPARK-7726
+import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+/** Request to get the local dirs for the given executors. */
+public class GetLocalDirsForExecutors extends BlockTransferMessage {
+  public final String appId;
+  public final String[] execIds;
+
+  public GetLocalDirsForExecutors(String appId, String[] execIds) {
+    this.appId = appId;
+    this.execIds = execIds;
+  }
+
+  @Override
+  protected Type type() { return Type.GET_LOCAL_DIRS_FOR_EXECUTORS; }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(appId) * 41 + Arrays.hashCode(execIds);
+  }
+
+  @Override
+  public String toString() {
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("appId", appId)
+      .append("execIds", Arrays.toString(execIds))
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof GetLocalDirsForExecutors) {
+      GetLocalDirsForExecutors o = (GetLocalDirsForExecutors) other;
+      return appId.equals(o.appId) && Arrays.equals(execIds, o.execIds);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId) + Encoders.StringArrays.encodedLength(execIds);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    Encoders.StringArrays.encode(buf, execIds);
+  }
+
+  public static GetLocalDirsForExecutors decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    String[] execIds = Encoders.StringArrays.decode(buf);
+    return new GetLocalDirsForExecutors(appId, execIds);
+  }
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java
new file mode 100644
index 0000000000000..9e2f0668cbd24
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import java.util.*;
+
+import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
+
+import org.apache.spark.network.protocol.Encoders;
+
+// Needed by ScalaDoc. See SPARK-7726
+import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+/** The reply to get local dirs giving back the dirs for each of the requested executors. */
+public class LocalDirsForExecutors extends BlockTransferMessage {
+  private final String[] execIds;
+  private final int[] numLocalDirsByExec;
+  private final String[] allLocalDirs;
+
+  public LocalDirsForExecutors(Map<String, String[]> localDirsByExec) {
+    this.execIds = new String[localDirsByExec.size()];
+    this.numLocalDirsByExec = new int[localDirsByExec.size()];
+    ArrayList<String> localDirs = new ArrayList<>();
+    int index = 0;
+    for (Map.Entry<String, String[]> e: localDirsByExec.entrySet()) {
+      execIds[index] = e.getKey();
+      numLocalDirsByExec[index] = e.getValue().length;
+      Collections.addAll(localDirs, e.getValue());
+      index++;
+    }
+    this.allLocalDirs = localDirs.toArray(new String[0]);
+  }
+
+  private LocalDirsForExecutors(String[] execIds, int[] numLocalDirsByExec, String[] allLocalDirs) {
+    this.execIds = execIds;
+    this.numLocalDirsByExec = numLocalDirsByExec;
+    this.allLocalDirs = allLocalDirs;
+  }
+
+  @Override
+  protected Type type() { return Type.LOCAL_DIRS_FOR_EXECUTORS; }
+
+  @Override
+  public int hashCode() {
+    return Arrays.hashCode(execIds);
+  }
+
+  @Override
+  public String toString() {
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("execIds", Arrays.toString(execIds))
+      .append("numLocalDirsByExec", Arrays.toString(numLocalDirsByExec))
+      .append("allLocalDirs", Arrays.toString(allLocalDirs))
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other instanceof LocalDirsForExecutors) {
+      LocalDirsForExecutors o = (LocalDirsForExecutors) other;
+      return Arrays.equals(execIds, o.execIds)
+        && Arrays.equals(numLocalDirsByExec, o.numLocalDirsByExec)
+        && Arrays.equals(allLocalDirs, o.allLocalDirs);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.StringArrays.encodedLength(execIds)
+      + Encoders.IntArrays.encodedLength(numLocalDirsByExec)
+      + Encoders.StringArrays.encodedLength(allLocalDirs);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.StringArrays.encode(buf, execIds);
+    Encoders.IntArrays.encode(buf, numLocalDirsByExec);
+    Encoders.StringArrays.encode(buf, allLocalDirs);
+  }
+
+  public static LocalDirsForExecutors decode(ByteBuf buf) {
+    String[] execIds = Encoders.StringArrays.decode(buf);
+    int[] numLocalDirsByExec = Encoders.IntArrays.decode(buf);
+    String[] allLocalDirs = Encoders.StringArrays.decode(buf);
+    return new LocalDirsForExecutors(execIds, numLocalDirsByExec, allLocalDirs);
+  }
+
+  public Map<String, String[]> getLocalDirsByExec() {
+    Map<String, String[]> localDirsByExec = new HashMap<>();
+    int index = 0;
+    int localDirsIndex = 0;
+    for (int length: numLocalDirsByExec) {
+      localDirsByExec.put(execIds[index],
+        Arrays.copyOfRange(allLocalDirs, localDirsIndex, localDirsIndex + length));
+      localDirsIndex += length;
+      index++;
+    }
+    return localDirsByExec;
+  }
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
index ce954b8a289e4..771e17b3233ec 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
@@ -18,9 +18,11 @@
 package org.apache.spark.network.shuffle.protocol;
 
 import java.util.Arrays;
+import java.util.Objects;
 
-import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.protocol.Encoders;
 
@@ -44,15 +46,15 @@ public OpenBlocks(String appId, String execId, String[] blockIds) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(appId, execId) * 41 + Arrays.hashCode(blockIds);
+    return Objects.hash(appId, execId) * 41 + Arrays.hashCode(blockIds);
   }
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("appId", appId)
-      .add("execId", execId)
-      .add("blockIds", Arrays.toString(blockIds))
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("appId", appId)
+      .append("execId", execId)
+      .append("blockIds", Arrays.toString(blockIds))
       .toString();
   }
 
@@ -60,8 +62,8 @@ public String toString() {
   public boolean equals(Object other) {
     if (other != null && other instanceof OpenBlocks) {
       OpenBlocks o = (OpenBlocks) other;
-      return Objects.equal(appId, o.appId)
-        && Objects.equal(execId, o.execId)
+      return Objects.equals(appId, o.appId)
+        && Objects.equals(execId, o.execId)
         && Arrays.equals(blockIds, o.blockIds);
     }
     return false;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
index 167ef33104227..f6af755cd9cd5 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.shuffle.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.protocol.Encoders;
 
@@ -48,15 +51,15 @@ public RegisterExecutor(
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(appId, execId, executorInfo);
+    return Objects.hash(appId, execId, executorInfo);
   }
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("appId", appId)
-      .add("execId", execId)
-      .add("executorInfo", executorInfo)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("appId", appId)
+      .append("execId", execId)
+      .append("executorInfo", executorInfo)
       .toString();
   }
 
@@ -64,9 +67,9 @@ public String toString() {
   public boolean equals(Object other) {
     if (other != null && other instanceof RegisterExecutor) {
       RegisterExecutor o = (RegisterExecutor) other;
-      return Objects.equal(appId, o.appId)
-        && Objects.equal(execId, o.execId)
-        && Objects.equal(executorInfo, o.executorInfo);
+      return Objects.equals(appId, o.appId)
+        && Objects.equals(execId, o.execId)
+        && Objects.equals(executorInfo, o.executorInfo);
     }
     return false;
   }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java
index 1c718d307753f..ade838bd4286c 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java
@@ -17,11 +17,14 @@
 
 package org.apache.spark.network.shuffle.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Arrays;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
-import org.apache.spark.network.protocol.Encoders;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
-import java.util.Arrays;
+import org.apache.spark.network.protocol.Encoders;
 
 // Needed by ScalaDoc. See SPARK-7726
 import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
@@ -43,15 +46,15 @@ public RemoveBlocks(String appId, String execId, String[] blockIds) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(appId, execId) * 41 + Arrays.hashCode(blockIds);
+    return Objects.hash(appId, execId) * 41 + Arrays.hashCode(blockIds);
   }
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("appId", appId)
-      .add("execId", execId)
-      .add("blockIds", Arrays.toString(blockIds))
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("appId", appId)
+      .append("execId", execId)
+      .append("blockIds", Arrays.toString(blockIds))
       .toString();
   }
 
@@ -59,8 +62,8 @@ public String toString() {
   public boolean equals(Object other) {
     if (other != null && other instanceof RemoveBlocks) {
       RemoveBlocks o = (RemoveBlocks) other;
-      return Objects.equal(appId, o.appId)
-        && Objects.equal(execId, o.execId)
+      return Objects.equals(appId, o.appId)
+        && Objects.equals(execId, o.execId)
         && Arrays.equals(blockIds, o.blockIds);
     }
     return false;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
index 1915295aa6cc2..dd7715a4e82d4 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
@@ -17,8 +17,11 @@
 
 package org.apache.spark.network.shuffle.protocol;
 
-import com.google.common.base.Objects;
+import java.util.Objects;
+
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 // Needed by ScalaDoc. See SPARK-7726
 import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
@@ -41,14 +44,14 @@ public StreamHandle(long streamId, int numChunks) {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(streamId, numChunks);
+    return Objects.hash(streamId, numChunks);
   }
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("streamId", streamId)
-      .add("numChunks", numChunks)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("streamId", streamId)
+      .append("numChunks", numChunks)
       .toString();
   }
 
@@ -56,8 +59,8 @@ public String toString() {
   public boolean equals(Object other) {
     if (other != null && other instanceof StreamHandle) {
       StreamHandle o = (StreamHandle) other;
-      return Objects.equal(streamId, o.streamId)
-        && Objects.equal(numChunks, o.numChunks);
+      return Objects.equals(streamId, o.streamId)
+        && Objects.equals(numChunks, o.numChunks);
     }
     return false;
   }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
index 3caed59d508fd..a5bc3f7009b46 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
@@ -18,9 +18,11 @@
 package org.apache.spark.network.shuffle.protocol;
 
 import java.util.Arrays;
+import java.util.Objects;
 
-import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.protocol.Encoders;
 
@@ -60,18 +62,18 @@ public UploadBlock(
 
   @Override
   public int hashCode() {
-    int objectsHashCode = Objects.hashCode(appId, execId, blockId);
+    int objectsHashCode = Objects.hash(appId, execId, blockId);
     return (objectsHashCode * 41 + Arrays.hashCode(metadata)) * 41 + Arrays.hashCode(blockData);
   }
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("appId", appId)
-      .add("execId", execId)
-      .add("blockId", blockId)
-      .add("metadata size", metadata.length)
-      .add("block size", blockData.length)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("appId", appId)
+      .append("execId", execId)
+      .append("blockId", blockId)
+      .append("metadata size", metadata.length)
+      .append("block size", blockData.length)
       .toString();
   }
 
@@ -79,9 +81,9 @@ public String toString() {
   public boolean equals(Object other) {
     if (other != null && other instanceof UploadBlock) {
       UploadBlock o = (UploadBlock) other;
-      return Objects.equal(appId, o.appId)
-        && Objects.equal(execId, o.execId)
-        && Objects.equal(blockId, o.blockId)
+      return Objects.equals(appId, o.appId)
+        && Objects.equals(execId, o.execId)
+        && Objects.equals(blockId, o.blockId)
         && Arrays.equals(metadata, o.metadata)
         && Arrays.equals(blockData, o.blockData);
     }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java
index 9df30967d5bb2..958a84e516c81 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java
@@ -18,9 +18,11 @@
 package org.apache.spark.network.shuffle.protocol;
 
 import java.util.Arrays;
+import java.util.Objects;
 
-import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 
 import org.apache.spark.network.protocol.Encoders;
 
@@ -53,9 +55,9 @@ public int hashCode() {
 
   @Override
   public String toString() {
-    return Objects.toStringHelper(this)
-      .add("blockId", blockId)
-      .add("metadata size", metadata.length)
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("blockId", blockId)
+      .append("metadata size", metadata.length)
       .toString();
   }
 
@@ -63,7 +65,7 @@ public String toString() {
   public boolean equals(Object other) {
     if (other != null && other instanceof UploadBlockStream) {
       UploadBlockStream o = (UploadBlockStream) other;
-      return Objects.equal(blockId, o.blockId)
+      return Objects.equals(blockId, o.blockId)
         && Arrays.equals(metadata, o.metadata);
     }
     return false;
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
index e8e766d3fb3ab..96dfc3b7cae61 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
@@ -21,8 +21,6 @@
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.atomic.AtomicReference;
 
 import org.junit.After;
 import org.junit.AfterClass;
@@ -34,8 +32,6 @@
 
 import org.apache.spark.network.TestUtils;
 import org.apache.spark.network.TransportContext;
-import org.apache.spark.network.buffer.ManagedBuffer;
-import org.apache.spark.network.client.ChunkReceivedCallback;
 import org.apache.spark.network.client.RpcResponseCallback;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.client.TransportClientFactory;
@@ -44,15 +40,6 @@
 import org.apache.spark.network.server.StreamManager;
 import org.apache.spark.network.server.TransportServer;
 import org.apache.spark.network.server.TransportServerBootstrap;
-import org.apache.spark.network.shuffle.BlockFetchingListener;
-import org.apache.spark.network.shuffle.ExternalBlockHandler;
-import org.apache.spark.network.shuffle.ExternalShuffleBlockResolver;
-import org.apache.spark.network.shuffle.OneForOneBlockFetcher;
-import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
-import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
-import org.apache.spark.network.shuffle.protocol.OpenBlocks;
-import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
-import org.apache.spark.network.shuffle.protocol.StreamHandle;
 import org.apache.spark.network.util.JavaUtils;
 import org.apache.spark.network.util.MapConfigProvider;
 import org.apache.spark.network.util.TransportConf;
@@ -165,93 +152,6 @@ public void testNoSaslServer() {
     }
   }
 
-  /**
-   * This test is not actually testing SASL behavior, but testing that the shuffle service
-   * performs correct authorization checks based on the SASL authentication data.
-   */
-  @Test
-  public void testAppIsolation() throws Exception {
-    // Start a new server with the correct RPC handler to serve block data.
-    ExternalShuffleBlockResolver blockResolver = mock(ExternalShuffleBlockResolver.class);
-    ExternalBlockHandler blockHandler = new ExternalBlockHandler(
-      new OneForOneStreamManager(), blockResolver);
-    TransportServerBootstrap bootstrap = new SaslServerBootstrap(conf, secretKeyHolder);
-
-    try (
-      TransportContext blockServerContext = new TransportContext(conf, blockHandler);
-      TransportServer blockServer = blockServerContext.createServer(Arrays.asList(bootstrap));
-      // Create a client, and make a request to fetch blocks from a different app.
-      TransportClientFactory clientFactory1 = blockServerContext.createClientFactory(
-          Arrays.asList(new SaslClientBootstrap(conf, "app-1", secretKeyHolder)));
-      TransportClient client1 = clientFactory1.createClient(
-          TestUtils.getLocalHost(), blockServer.getPort())) {
-
-      AtomicReference<Throwable> exception = new AtomicReference<>();
-
-      CountDownLatch blockFetchLatch = new CountDownLatch(1);
-      BlockFetchingListener listener = new BlockFetchingListener() {
-        @Override
-        public void onBlockFetchSuccess(String blockId, ManagedBuffer data) {
-          blockFetchLatch.countDown();
-        }
-        @Override
-        public void onBlockFetchFailure(String blockId, Throwable t) {
-          exception.set(t);
-          blockFetchLatch.countDown();
-        }
-      };
-
-      String[] blockIds = { "shuffle_0_1_2", "shuffle_0_3_4" };
-      OneForOneBlockFetcher fetcher =
-          new OneForOneBlockFetcher(client1, "app-2", "0", blockIds, listener, conf);
-      fetcher.start();
-      blockFetchLatch.await();
-      checkSecurityException(exception.get());
-
-      // Register an executor so that the next steps work.
-      ExecutorShuffleInfo executorInfo = new ExecutorShuffleInfo(
-        new String[] { System.getProperty("java.io.tmpdir") }, 1,
-          "org.apache.spark.shuffle.sort.SortShuffleManager");
-      RegisterExecutor regmsg = new RegisterExecutor("app-1", "0", executorInfo);
-      client1.sendRpcSync(regmsg.toByteBuffer(), TIMEOUT_MS);
-
-      // Make a successful request to fetch blocks, which creates a new stream. But do not actually
-      // fetch any blocks, to keep the stream open.
-      OpenBlocks openMessage = new OpenBlocks("app-1", "0", blockIds);
-      ByteBuffer response = client1.sendRpcSync(openMessage.toByteBuffer(), TIMEOUT_MS);
-      StreamHandle stream = (StreamHandle) BlockTransferMessage.Decoder.fromByteBuffer(response);
-      long streamId = stream.streamId;
-
-      try (
-        // Create a second client, authenticated with a different app ID, and try to read from
-        // the stream created for the previous app.
-        TransportClientFactory clientFactory2 = blockServerContext.createClientFactory(
-            Arrays.asList(new SaslClientBootstrap(conf, "app-2", secretKeyHolder)));
-        TransportClient client2 = clientFactory2.createClient(
-            TestUtils.getLocalHost(), blockServer.getPort())
-      ) {
-        CountDownLatch chunkReceivedLatch = new CountDownLatch(1);
-        ChunkReceivedCallback callback = new ChunkReceivedCallback() {
-          @Override
-          public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
-            chunkReceivedLatch.countDown();
-          }
-
-          @Override
-          public void onFailure(int chunkIndex, Throwable t) {
-            exception.set(t);
-            chunkReceivedLatch.countDown();
-          }
-        };
-
-        exception.set(null);
-        client2.fetchChunk(streamId, 0, callback);
-        chunkReceivedLatch.await();
-        checkSecurityException(exception.get());
-      }
-    }
-  }
-
   /** RPC handler which simply responds with the message it received. */
   public static class TestRpcHandler extends RpcHandler {
     @Override
@@ -264,10 +164,4 @@ public StreamManager getStreamManager() {
       return new OneForOneStreamManager();
     }
   }
-
-  private static void checkSecurityException(Throwable t) {
-    assertNotNull("No exception was caught.", t);
-    assertTrue("Expected SecurityException.",
-      t.getMessage().contains(SecurityException.class.getName()));
-  }
 }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/AppIsolationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/AppIsolationSuite.java
new file mode 100644
index 0000000000000..92e75222d0391
--- /dev/null
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/AppIsolationSuite.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Function;
+import java.util.function.Supplier;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.TestUtils;
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.ChunkReceivedCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.client.TransportClientBootstrap;
+import org.apache.spark.network.client.TransportClientFactory;
+import org.apache.spark.network.crypto.AuthClientBootstrap;
+import org.apache.spark.network.crypto.AuthServerBootstrap;
+import org.apache.spark.network.sasl.SaslClientBootstrap;
+import org.apache.spark.network.sasl.SaslServerBootstrap;
+import org.apache.spark.network.sasl.SecretKeyHolder;
+import org.apache.spark.network.server.OneForOneStreamManager;
+import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.server.TransportServerBootstrap;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.shuffle.protocol.OpenBlocks;
+import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
+import org.apache.spark.network.shuffle.protocol.StreamHandle;
+import org.apache.spark.network.util.MapConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+public class AppIsolationSuite {
+
+  // Use a long timeout to account for slow / overloaded build machines. In the normal case,
+  // tests should finish way before the timeout expires.
+  private static final long TIMEOUT_MS = 10_000;
+
+  private static SecretKeyHolder secretKeyHolder;
+  private static TransportConf conf;
+
+  @BeforeClass
+  public static void beforeAll() {
+    Map<String, String> confMap = new HashMap<>();
+    confMap.put("spark.network.crypto.enabled", "true");
+    confMap.put("spark.network.crypto.saslFallback", "false");
+    conf = new TransportConf("shuffle", new MapConfigProvider(confMap));
+
+    secretKeyHolder = mock(SecretKeyHolder.class);
+    when(secretKeyHolder.getSaslUser(eq("app-1"))).thenReturn("app-1");
+    when(secretKeyHolder.getSecretKey(eq("app-1"))).thenReturn("app-1");
+    when(secretKeyHolder.getSaslUser(eq("app-2"))).thenReturn("app-2");
+    when(secretKeyHolder.getSecretKey(eq("app-2"))).thenReturn("app-2");
+  }
+
+  @Test
+  public void testSaslAppIsolation() throws Exception {
+    testAppIsolation(
+      () -> new SaslServerBootstrap(conf, secretKeyHolder),
+      appId -> new SaslClientBootstrap(conf, appId, secretKeyHolder));
+  }
+
+  @Test
+  public void testAuthEngineAppIsolation() throws Exception {
+    testAppIsolation(
+      () -> new AuthServerBootstrap(conf, secretKeyHolder),
+      appId -> new AuthClientBootstrap(conf, appId, secretKeyHolder));
+  }
+
+  private void testAppIsolation(
+      Supplier<TransportServerBootstrap> serverBootstrap,
+      Function<String, TransportClientBootstrap> clientBootstrapFactory) throws Exception {
+    // Start a new server with the correct RPC handler to serve block data.
+    ExternalShuffleBlockResolver blockResolver = mock(ExternalShuffleBlockResolver.class);
+    ExternalBlockHandler blockHandler = new ExternalBlockHandler(
+      new OneForOneStreamManager(), blockResolver);
+    TransportServerBootstrap bootstrap = serverBootstrap.get();
+
+    try (
+      TransportContext blockServerContext = new TransportContext(conf, blockHandler);
+      TransportServer blockServer = blockServerContext.createServer(Arrays.asList(bootstrap));
+      // Create a client, and make a request to fetch blocks from a different app.
+      TransportClientFactory clientFactory1 = blockServerContext.createClientFactory(
+          Arrays.asList(clientBootstrapFactory.apply("app-1")));
+      TransportClient client1 = clientFactory1.createClient(
+          TestUtils.getLocalHost(), blockServer.getPort())) {
+
+      AtomicReference<Throwable> exception = new AtomicReference<>();
+
+      CountDownLatch blockFetchLatch = new CountDownLatch(1);
+      BlockFetchingListener listener = new BlockFetchingListener() {
+        @Override
+        public void onBlockFetchSuccess(String blockId, ManagedBuffer data) {
+          blockFetchLatch.countDown();
+        }
+        @Override
+        public void onBlockFetchFailure(String blockId, Throwable t) {
+          exception.set(t);
+          blockFetchLatch.countDown();
+        }
+      };
+
+      String[] blockIds = { "shuffle_0_1_2", "shuffle_0_3_4" };
+      OneForOneBlockFetcher fetcher =
+          new OneForOneBlockFetcher(client1, "app-2", "0", blockIds, listener, conf);
+      fetcher.start();
+      blockFetchLatch.await();
+      checkSecurityException(exception.get());
+
+      // Register an executor so that the next steps work.
+      ExecutorShuffleInfo executorInfo = new ExecutorShuffleInfo(
+        new String[] { System.getProperty("java.io.tmpdir") }, 1,
+          "org.apache.spark.shuffle.sort.SortShuffleManager");
+      RegisterExecutor regmsg = new RegisterExecutor("app-1", "0", executorInfo);
+      client1.sendRpcSync(regmsg.toByteBuffer(), TIMEOUT_MS);
+
+      // Make a successful request to fetch blocks, which creates a new stream. But do not actually
+      // fetch any blocks, to keep the stream open.
+      OpenBlocks openMessage = new OpenBlocks("app-1", "0", blockIds);
+      ByteBuffer response = client1.sendRpcSync(openMessage.toByteBuffer(), TIMEOUT_MS);
+      StreamHandle stream = (StreamHandle) BlockTransferMessage.Decoder.fromByteBuffer(response);
+      long streamId = stream.streamId;
+
+      try (
+        // Create a second client, authenticated with a different app ID, and try to read from
+        // the stream created for the previous app.
+        TransportClientFactory clientFactory2 = blockServerContext.createClientFactory(
+            Arrays.asList(clientBootstrapFactory.apply("app-2")));
+        TransportClient client2 = clientFactory2.createClient(
+            TestUtils.getLocalHost(), blockServer.getPort())
+      ) {
+        CountDownLatch chunkReceivedLatch = new CountDownLatch(1);
+        ChunkReceivedCallback callback = new ChunkReceivedCallback() {
+          @Override
+          public void onSuccess(int chunkIndex, ManagedBuffer buffer) {
+            chunkReceivedLatch.countDown();
+          }
+
+          @Override
+          public void onFailure(int chunkIndex, Throwable t) {
+            exception.set(t);
+            chunkReceivedLatch.countDown();
+          }
+        };
+
+        exception.set(null);
+        client2.fetchChunk(streamId, 0, callback);
+        chunkReceivedLatch.await();
+        checkSecurityException(exception.get());
+      }
+    }
+  }
+
+  private static void checkSecurityException(Throwable t) {
+    assertNotNull("No exception was caught.", t);
+    assertTrue("Expected SecurityException.",
+      t.getMessage().contains(SecurityException.class.getName()));
+  }
+}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
index 649c471dc1679..67229371c3a4a 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
@@ -21,6 +21,9 @@
 
 import static org.junit.Assert.*;
 
+import java.util.HashMap;
+import java.util.Map;
+
 import org.apache.spark.network.shuffle.protocol.*;
 
 /** Verifies that all BlockTransferMessages can be serialized correctly. */
@@ -29,8 +32,11 @@ public class BlockTransferMessagesSuite {
   public void serializeOpenShuffleBlocks() {
     checkSerializeDeserialize(new OpenBlocks("app-1", "exec-2", new String[] { "b1", "b2" }));
     checkSerializeDeserialize(new FetchShuffleBlocks(
-      "app-1", "exec-2", 0, new int[] {0, 1},
-      new int[][] {{ 0, 1 }, { 0, 1, 2 }}));
+      "app-1", "exec-2", 0, new long[] {0, 1},
+      new int[][] {{ 0, 1 }, { 0, 1, 2 }}, false));
+    checkSerializeDeserialize(new FetchShuffleBlocks(
+      "app-1", "exec-2", 0, new long[] {0, 1},
+      new int[][] {{ 0, 1 }, { 0, 2 }}, true));
     checkSerializeDeserialize(new RegisterExecutor("app-1", "exec-2", new ExecutorShuffleInfo(
       new String[] { "/local1", "/local2" }, 32, "MyShuffleManager")));
     checkSerializeDeserialize(new UploadBlock("app-1", "exec-2", "block-3", new byte[] { 1, 2 },
@@ -38,10 +44,29 @@ public void serializeOpenShuffleBlocks() {
     checkSerializeDeserialize(new StreamHandle(12345, 16));
   }
 
-  private void checkSerializeDeserialize(BlockTransferMessage msg) {
+  @Test
+  public void testLocalDirsMessages() {
+    checkSerializeDeserialize(
+      new GetLocalDirsForExecutors("app-1", new String[]{"exec-1", "exec-2"}));
+
+    Map<String, String[]> map = new HashMap<>();
+    map.put("exec-1", new String[]{"loc1.1"});
+    map.put("exec-22", new String[]{"loc2.1", "loc2.2"});
+    LocalDirsForExecutors localDirsForExecs = new LocalDirsForExecutors(map);
+    Map<String, String[]> resultMap =
+      ((LocalDirsForExecutors)checkSerializeDeserialize(localDirsForExecs)).getLocalDirsByExec();
+    assertEquals(resultMap.size(), map.keySet().size());
+    for (Map.Entry<String, String[]> e: map.entrySet()) {
+      assertTrue(resultMap.containsKey(e.getKey()));
+      assertArrayEquals(e.getValue(), resultMap.get(e.getKey()));
+    }
+  }
+
+  private BlockTransferMessage checkSerializeDeserialize(BlockTransferMessage msg) {
     BlockTransferMessage msg2 = BlockTransferMessage.Decoder.fromByteBuffer(msg.toByteBuffer());
     assertEquals(msg, msg2);
     assertEquals(msg.hashCode(), msg2.hashCode());
     assertEquals(msg.toString(), msg2.toString());
+    return msg2;
   }
 }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java
index e38442327e22d..b37d8620a57f4 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java
@@ -30,7 +30,6 @@
 
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
-import com.google.common.util.concurrent.MoreExecutors;
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
@@ -42,7 +41,7 @@
 public class CleanupNonShuffleServiceServedFilesSuite {
 
   // Same-thread Executor used to ensure cleanup happens synchronously in test thread.
-  private Executor sameThreadExecutor = MoreExecutors.sameThreadExecutor();
+  private Executor sameThreadExecutor = Runnable::run;
 
   private static final String SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager";
 
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
index 9c623a70424b6..455351fcf767c 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
@@ -101,7 +101,7 @@ public void testFetchShuffleBlocks() {
     when(blockResolver.getBlockData("app0", "exec1", 0, 0, 1)).thenReturn(blockMarkers[1]);
 
     FetchShuffleBlocks fetchShuffleBlocks = new FetchShuffleBlocks(
-      "app0", "exec1", 0, new int[] { 0 }, new int[][] {{ 0, 1 }});
+      "app0", "exec1", 0, new long[] { 0 }, new int[][] {{ 0, 1 }}, false);
     checkOpenBlocksReceive(fetchShuffleBlocks, blockMarkers);
 
     verify(blockResolver, times(1)).getBlockData("app0", "exec1", 0, 0, 0);
@@ -109,6 +109,22 @@ public void testFetchShuffleBlocks() {
     verifyOpenBlockLatencyMetrics();
   }
 
+  @Test
+  public void testFetchShuffleBlocksInBatch() {
+    ManagedBuffer[] batchBlockMarkers = {
+      new NioManagedBuffer(ByteBuffer.wrap(new byte[10]))
+    };
+    when(blockResolver.getContinuousBlocksData(
+      "app0", "exec1", 0, 0, 0, 1)).thenReturn(batchBlockMarkers[0]);
+
+    FetchShuffleBlocks fetchShuffleBlocks = new FetchShuffleBlocks(
+      "app0", "exec1", 0, new long[] { 0 }, new int[][] {{ 0, 1 }}, true);
+    checkOpenBlocksReceive(fetchShuffleBlocks, batchBlockMarkers);
+
+    verify(blockResolver, times(1)).getContinuousBlocksData("app0", "exec1", 0, 0, 0, 1);
+    verifyOpenBlockLatencyMetrics();
+  }
+
   @Test
   public void testOpenDiskPersistedRDDBlocks() {
     when(blockResolver.getRddBlockData("app0", "exec1", 0, 0)).thenReturn(blockMarkers[0]);
@@ -154,7 +170,7 @@ private void checkOpenBlocksReceive(BlockTransferMessage msg, ManagedBuffer[] bl
 
     StreamHandle handle =
       (StreamHandle) BlockTransferMessage.Decoder.fromByteBuffer(response.getValue());
-    assertEquals(2, handle.numChunks);
+    assertEquals(blockMarkers.length, handle.numChunks);
 
     @SuppressWarnings("unchecked")
     ArgumentCaptor<Iterator<ManagedBuffer>> stream = (ArgumentCaptor<Iterator<ManagedBuffer>>)
@@ -162,8 +178,9 @@ private void checkOpenBlocksReceive(BlockTransferMessage msg, ManagedBuffer[] bl
     verify(streamManager, times(1)).registerStream(anyString(), stream.capture(),
       any());
     Iterator<ManagedBuffer> buffers = stream.getValue();
-    assertEquals(blockMarkers[0], buffers.next());
-    assertEquals(blockMarkers[1], buffers.next());
+    for (ManagedBuffer blockMarker : blockMarkers) {
+      assertEquals(blockMarker, buffers.next());
+    }
     assertFalse(buffers.hasNext());
   }
 
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
index 09eb699be305a..09b31430b1eb9 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -111,6 +111,13 @@ public void testSortShuffleBlocks() throws IOException {
         CharStreams.toString(new InputStreamReader(block1Stream, StandardCharsets.UTF_8));
       assertEquals(sortBlock1, block1);
     }
+
+    try (InputStream blocksStream = resolver.getContinuousBlocksData(
+        "app0", "exec0", 0, 0, 0, 2).createInputStream()) {
+      String blocks =
+        CharStreams.toString(new InputStreamReader(blocksStream, StandardCharsets.UTF_8));
+      assertEquals(sortBlock0 + sortBlock1, blocks);
+    }
   }
 
   @Test
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
index 47c087088a8a2..48b73e32216ce 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java
@@ -24,7 +24,6 @@
 import java.util.concurrent.Executor;
 import java.util.concurrent.atomic.AtomicBoolean;
 
-import com.google.common.util.concurrent.MoreExecutors;
 import org.junit.Test;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
@@ -35,7 +34,7 @@
 public class ExternalShuffleCleanupSuite {
 
   // Same-thread Executor used to ensure cleanup happens synchronously in test thread.
-  private Executor sameThreadExecutor = MoreExecutors.sameThreadExecutor();
+  private Executor sameThreadExecutor = Runnable::run;
   private TransportConf conf = new TransportConf("shuffle", MapConfigProvider.EMPTY);
   private static final String SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager";
 
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index 66633cc7a3595..285eedb39c65c 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -64,7 +64,7 @@ public void testFetchOne() {
     BlockFetchingListener listener = fetchBlocks(
       blocks,
       blockIds,
-      new FetchShuffleBlocks("app-id", "exec-id", 0, new int[] { 0 }, new int[][] {{ 0 }}),
+      new FetchShuffleBlocks("app-id", "exec-id", 0, new long[] { 0 }, new int[][] {{ 0 }}, false),
       conf);
 
     verify(listener).onBlockFetchSuccess("shuffle_0_0_0", blocks.get("shuffle_0_0_0"));
@@ -100,7 +100,8 @@ public void testFetchThreeShuffleBlocks() {
     BlockFetchingListener listener = fetchBlocks(
       blocks,
       blockIds,
-      new FetchShuffleBlocks("app-id", "exec-id", 0, new int[] { 0 }, new int[][] {{ 0, 1, 2 }}),
+      new FetchShuffleBlocks(
+        "app-id", "exec-id", 0, new long[] { 0 }, new int[][] {{ 0, 1, 2 }}, false),
       conf);
 
     for (int i = 0; i < 3; i ++) {
@@ -109,6 +110,23 @@ public void testFetchThreeShuffleBlocks() {
     }
   }
 
+  @Test
+  public void testBatchFetchThreeShuffleBlocks() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("shuffle_0_0_0_3", new NioManagedBuffer(ByteBuffer.wrap(new byte[58])));
+    String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
+
+    BlockFetchingListener listener = fetchBlocks(
+      blocks,
+      blockIds,
+      new FetchShuffleBlocks(
+        "app-id", "exec-id", 0, new long[] { 0 }, new int[][] {{ 0, 3 }}, true),
+      conf);
+
+    verify(listener, times(1)).onBlockFetchSuccess(
+      "shuffle_0_0_0_3", blocks.get("shuffle_0_0_0_3"));
+  }
+
   @Test
   public void testFetchThree() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
index 457805feeac45..fb67d7220a0b4 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -28,6 +28,7 @@
 
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
 import org.apache.spark.network.util.JavaUtils;
+import org.junit.Assert;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -122,7 +123,7 @@ private void insertFile(String filename) throws IOException {
   private void insertFile(String filename, byte[] block) throws IOException {
     OutputStream dataStream = null;
     File file = ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, filename);
-    assert(!file.exists()) : "this test file has been already generated";
+    Assert.assertFalse("this test file has been already generated", file.exists());
     try {
       dataStream = new FileOutputStream(
         ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, filename));
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index c170f99b112c0..815a56d765b6a 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -23,14 +23,16 @@
 import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -417,7 +419,7 @@ public boolean equals(Object o) {
       if (o == null || getClass() != o.getClass()) return false;
 
       AppId appExecId = (AppId) o;
-      return Objects.equal(appId, appExecId.appId);
+      return Objects.equals(appId, appExecId.appId);
     }
 
     @Override
@@ -427,8 +429,8 @@ public int hashCode() {
 
     @Override
     public String toString() {
-      return Objects.toStringHelper(this)
-          .add("appId", appId)
+      return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+          .append("appId", appId)
           .toString();
     }
   }
diff --git a/common/tags/src/test/java/org/apache/spark/tags/ExtendedSQLTest.java b/common/tags/src/test/java/org/apache/spark/tags/ExtendedSQLTest.java
new file mode 100644
index 0000000000000..1c0fff1b4045d
--- /dev/null
+++ b/common/tags/src/test/java/org/apache/spark/tags/ExtendedSQLTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface ExtendedSQLTest { }
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java
new file mode 100644
index 0000000000000..84a0156ebfb66
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util;
+
+public class DateTimeConstants {
+
+  public static final int YEARS_PER_DECADE = 10;
+  public static final int YEARS_PER_CENTURY = 100;
+  public static final int YEARS_PER_MILLENNIUM = 1000;
+
+  public static final byte MONTHS_PER_QUARTER = 3;
+  public static final int MONTHS_PER_YEAR = 12;
+
+  public static final byte DAYS_PER_WEEK = 7;
+  public static final long DAYS_PER_MONTH = 30L;
+
+  public static final long HOURS_PER_DAY = 24L;
+
+  public static final long MINUTES_PER_HOUR = 60L;
+
+  public static final long SECONDS_PER_MINUTE = 60L;
+  public static final long SECONDS_PER_HOUR = MINUTES_PER_HOUR * SECONDS_PER_MINUTE;
+  public static final long SECONDS_PER_DAY = HOURS_PER_DAY * SECONDS_PER_HOUR;
+
+  public static final long MILLIS_PER_SECOND = 1000L;
+  public static final long MILLIS_PER_MINUTE = SECONDS_PER_MINUTE * MILLIS_PER_SECOND;
+  public static final long MILLIS_PER_HOUR = MINUTES_PER_HOUR * MILLIS_PER_MINUTE;
+  public static final long MILLIS_PER_DAY = HOURS_PER_DAY * MILLIS_PER_HOUR;
+
+  public static final long MICROS_PER_MILLIS = 1000L;
+  public static final long MICROS_PER_SECOND = MILLIS_PER_SECOND * MICROS_PER_MILLIS;
+  public static final long MICROS_PER_MINUTE = SECONDS_PER_MINUTE * MICROS_PER_SECOND;
+  public static final long MICROS_PER_HOUR = MINUTES_PER_HOUR * MICROS_PER_MINUTE;
+  public static final long MICROS_PER_DAY = HOURS_PER_DAY * MICROS_PER_HOUR;
+  public static final long MICROS_PER_MONTH = DAYS_PER_MONTH * MICROS_PER_DAY;
+  /* 365.25 days per year assumes leap year every four years */
+  public static final long MICROS_PER_YEAR = (36525L * MICROS_PER_DAY) / 100;
+
+  public static final long NANOS_PER_MICROS = 1000L;
+  public static final long NANOS_PER_MILLIS = MICROS_PER_MILLIS * NANOS_PER_MICROS;
+  public static final long NANOS_PER_SECOND = MILLIS_PER_SECOND * NANOS_PER_MILLIS;
+}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
index 908ff1983e6be..f2d06e793f9dd 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -17,387 +17,114 @@
 
 package org.apache.spark.unsafe.types;
 
+import org.apache.spark.annotation.Unstable;
+
 import java.io.Serializable;
-import java.util.Locale;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import java.math.BigDecimal;
+import java.time.Duration;
+import java.time.Period;
+import java.time.temporal.ChronoUnit;
+import java.util.Objects;
+
+import static org.apache.spark.sql.catalyst.util.DateTimeConstants.*;
 
 /**
- * The internal representation of interval type.
+ * The class representing calendar intervals. The calendar interval is stored internally in
+ * three components:
+ * <ul>
+ *   <li>an integer value representing the number of `months` in this interval,</li>
+ *   <li>an integer value representing the number of `days` in this interval,</li>
+ *   <li>a long value representing the number of `microseconds` in this interval.</li>
+ * </ul>
+ *
+ * The `months` and `days` are not units of time with a constant length (unlike hours, seconds), so
+ * they are two separated fields from microseconds. One month may be equal to 28, 29, 30 or 31 days
+ * and one day may be equal to 23, 24 or 25 hours (daylight saving).
+ *
+ * @since 3.0.0
  */
+@Unstable
 public final class CalendarInterval implements Serializable {
-  public static final long MICROS_PER_MILLI = 1000L;
-  public static final long MICROS_PER_SECOND = MICROS_PER_MILLI * 1000;
-  public static final long MICROS_PER_MINUTE = MICROS_PER_SECOND * 60;
-  public static final long MICROS_PER_HOUR = MICROS_PER_MINUTE * 60;
-  public static final long MICROS_PER_DAY = MICROS_PER_HOUR * 24;
-  public static final long MICROS_PER_WEEK = MICROS_PER_DAY * 7;
-
-  /**
-   * A function to generate regex which matches interval string's unit part like "3 years".
-   *
-   * First, we can leave out some units in interval string, and we only care about the value of
-   * unit, so here we use non-capturing group to wrap the actual regex.
-   * At the beginning of the actual regex, we should match spaces before the unit part.
-   * Next is the number part, starts with an optional "-" to represent negative value. We use
-   * capturing group to wrap this part as we need the value later.
-   * Finally is the unit name, ends with an optional "s".
-   */
-  private static String unitRegex(String unit) {
-    return "(?:\\s+(-?\\d+)\\s+" + unit + "s?)?";
-  }
-
-  private static Pattern p = Pattern.compile("interval" + unitRegex("year") + unitRegex("month") +
-    unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") +
-    unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"));
-
-  private static Pattern yearMonthPattern =
-    Pattern.compile("^(?:['|\"])?([+|-])?(\\d+)-(\\d+)(?:['|\"])?$");
-
-  private static Pattern dayTimePattern = Pattern.compile(
-    "^(?:['|\"])?([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?(?:['|\"])?$");
-
-  private static Pattern quoteTrimPattern = Pattern.compile("^(?:['|\"])?(.*?)(?:['|\"])?$");
-
-  private static long toLong(String s) {
-    if (s == null) {
-      return 0;
-    } else {
-      return Long.parseLong(s);
-    }
-  }
-
-  /**
-   * Convert a string to CalendarInterval. Return null if the input string is not a valid interval.
-   * This method is case-sensitive and all characters in the input string should be in lower case.
-   */
-  public static CalendarInterval fromString(String s) {
-    if (s == null) {
-      return null;
-    }
-    s = s.trim();
-    Matcher m = p.matcher(s);
-    if (!m.matches() || s.equals("interval")) {
-      return null;
-    } else {
-      long months = toLong(m.group(1)) * 12 + toLong(m.group(2));
-      long microseconds = toLong(m.group(3)) * MICROS_PER_WEEK;
-      microseconds += toLong(m.group(4)) * MICROS_PER_DAY;
-      microseconds += toLong(m.group(5)) * MICROS_PER_HOUR;
-      microseconds += toLong(m.group(6)) * MICROS_PER_MINUTE;
-      microseconds += toLong(m.group(7)) * MICROS_PER_SECOND;
-      microseconds += toLong(m.group(8)) * MICROS_PER_MILLI;
-      microseconds += toLong(m.group(9));
-      return new CalendarInterval((int) months, microseconds);
-    }
-  }
-
-  /**
-   * Convert a string to CalendarInterval. Unlike fromString, this method is case-insensitive and
-   * will throw IllegalArgumentException when the input string is not a valid interval.
-   *
-   * @throws IllegalArgumentException if the string is not a valid internal.
-   */
-  public static CalendarInterval fromCaseInsensitiveString(String s) {
-    if (s == null || s.trim().isEmpty()) {
-      throw new IllegalArgumentException("Interval cannot be null or blank.");
-    }
-    String sInLowerCase = s.trim().toLowerCase(Locale.ROOT);
-    String interval =
-      sInLowerCase.startsWith("interval ") ? sInLowerCase : "interval " + sInLowerCase;
-    CalendarInterval cal = fromString(interval);
-    if (cal == null) {
-      throw new IllegalArgumentException("Invalid interval: " + s);
-    }
-    return cal;
-  }
-
-  public static long toLongWithRange(String fieldName,
-      String s, long minValue, long maxValue) throws IllegalArgumentException {
-    long result = 0;
-    if (s != null) {
-      result = Long.parseLong(s);
-      if (result < minValue || result > maxValue) {
-        throw new IllegalArgumentException(String.format("%s %d outside range [%d, %d]",
-          fieldName, result, minValue, maxValue));
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Parse YearMonth string in form: [-]YYYY-MM
-   *
-   * adapted from HiveIntervalYearMonth.valueOf
-   */
-  public static CalendarInterval fromYearMonthString(String s) throws IllegalArgumentException {
-    CalendarInterval result = null;
-    if (s == null) {
-      throw new IllegalArgumentException("Interval year-month string was null");
-    }
-    s = s.trim();
-    Matcher m = yearMonthPattern.matcher(s);
-    if (!m.matches()) {
-      throw new IllegalArgumentException(
-        "Interval string does not match year-month format of 'y-m': " + s);
-    } else {
-      try {
-        int sign = m.group(1) != null && m.group(1).equals("-") ? -1 : 1;
-        int years = (int) toLongWithRange("year", m.group(2), 0, Integer.MAX_VALUE);
-        int months = (int) toLongWithRange("month", m.group(3), 0, 11);
-        result = new CalendarInterval(sign * (years * 12 + months), 0);
-      } catch (Exception e) {
-        throw new IllegalArgumentException(
-          "Error parsing interval year-month string: " + e.getMessage(), e);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn
-   *
-   * adapted from HiveIntervalDayTime.valueOf
-   */
-  public static CalendarInterval fromDayTimeString(String s) throws IllegalArgumentException {
-    return fromDayTimeString(s, "day", "second");
-  }
-
-  /**
-   * Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn
-   *
-   * adapted from HiveIntervalDayTime.valueOf.
-   * Below interval conversion patterns are supported:
-   * - DAY TO (HOUR|MINUTE|SECOND)
-   * - HOUR TO (MINUTE|SECOND)
-   * - MINUTE TO SECOND
-   */
-  public static CalendarInterval fromDayTimeString(String s, String from, String to)
-      throws IllegalArgumentException {
-    CalendarInterval result = null;
-    if (s == null) {
-      throw new IllegalArgumentException("Interval day-time string was null");
-    }
-    s = s.trim();
-    Matcher m = dayTimePattern.matcher(s);
-    if (!m.matches()) {
-      throw new IllegalArgumentException(
-        "Interval string does not match day-time format of 'd h:m:s.n': " + s);
-    } else {
-      try {
-        int sign = m.group(1) != null && m.group(1).equals("-") ? -1 : 1;
-        long days = m.group(2) == null ? 0 : toLongWithRange("day", m.group(3),
-          0, Integer.MAX_VALUE);
-        long hours = 0;
-        long minutes;
-        long seconds = 0;
-        if (m.group(5) != null || from.equals("minute")) { // 'HH:mm:ss' or 'mm:ss minute'
-          hours = toLongWithRange("hour", m.group(5), 0, 23);
-          minutes = toLongWithRange("minute", m.group(6), 0, 59);
-          seconds = toLongWithRange("second", m.group(7), 0, 59);
-        } else if (m.group(8) != null){ // 'mm:ss.nn'
-          minutes = toLongWithRange("minute", m.group(6), 0, 59);
-          seconds = toLongWithRange("second", m.group(7), 0, 59);
-        } else { // 'HH:mm'
-          hours = toLongWithRange("hour", m.group(6), 0, 23);
-          minutes = toLongWithRange("second", m.group(7), 0, 59);
-        }
-        // Hive allow nanosecond precision interval
-        String nanoStr = m.group(9) == null ? null : (m.group(9) + "000000000").substring(0, 9);
-        long nanos = toLongWithRange("nanosecond", nanoStr, 0L, 999999999L);
-        switch (to) {
-          case "hour":
-            minutes = 0;
-            seconds = 0;
-            nanos = 0;
-            break;
-          case "minute":
-            seconds = 0;
-            nanos = 0;
-            break;
-          case "second":
-            // No-op
-            break;
-          default:
-            throw new IllegalArgumentException(
-              String.format("Cannot support (interval '%s' %s to %s) expression", s, from, to));
-        }
-        result = new CalendarInterval(0, sign * (
-          days * MICROS_PER_DAY + hours * MICROS_PER_HOUR + minutes * MICROS_PER_MINUTE +
-          seconds * MICROS_PER_SECOND + nanos / 1000L));
-      } catch (Exception e) {
-        throw new IllegalArgumentException(
-          "Error parsing interval day-time string: " + e.getMessage(), e);
-      }
-    }
-    return result;
-  }
-
-  public static CalendarInterval fromSingleUnitString(String unit, String s)
-      throws IllegalArgumentException {
-
-    CalendarInterval result = null;
-    if (s == null) {
-      throw new IllegalArgumentException(String.format("Interval %s string was null", unit));
-    }
-    s = s.trim();
-    Matcher m = quoteTrimPattern.matcher(s);
-    if (!m.matches()) {
-      throw new IllegalArgumentException(
-        "Interval string does not match day-time format of 'd h:m:s.n': " + s);
-    } else {
-      try {
-        switch (unit) {
-          case "year":
-            int year = (int) toLongWithRange("year", m.group(1),
-              Integer.MIN_VALUE / 12, Integer.MAX_VALUE / 12);
-            result = new CalendarInterval(year * 12, 0L);
-            break;
-          case "month":
-            int month = (int) toLongWithRange("month", m.group(1),
-              Integer.MIN_VALUE, Integer.MAX_VALUE);
-            result = new CalendarInterval(month, 0L);
-            break;
-          case "week":
-            long week = toLongWithRange("week", m.group(1),
-              Long.MIN_VALUE / MICROS_PER_WEEK, Long.MAX_VALUE / MICROS_PER_WEEK);
-            result = new CalendarInterval(0, week * MICROS_PER_WEEK);
-            break;
-          case "day":
-            long day = toLongWithRange("day", m.group(1),
-              Long.MIN_VALUE / MICROS_PER_DAY, Long.MAX_VALUE / MICROS_PER_DAY);
-            result = new CalendarInterval(0, day * MICROS_PER_DAY);
-            break;
-          case "hour":
-            long hour = toLongWithRange("hour", m.group(1),
-              Long.MIN_VALUE / MICROS_PER_HOUR, Long.MAX_VALUE / MICROS_PER_HOUR);
-            result = new CalendarInterval(0, hour * MICROS_PER_HOUR);
-            break;
-          case "minute":
-            long minute = toLongWithRange("minute", m.group(1),
-              Long.MIN_VALUE / MICROS_PER_MINUTE, Long.MAX_VALUE / MICROS_PER_MINUTE);
-            result = new CalendarInterval(0, minute * MICROS_PER_MINUTE);
-            break;
-          case "second": {
-            long micros = parseSecondNano(m.group(1));
-            result = new CalendarInterval(0, micros);
-            break;
-          }
-          case "millisecond":
-            long millisecond = toLongWithRange("millisecond", m.group(1),
-              Long.MIN_VALUE / MICROS_PER_MILLI, Long.MAX_VALUE / MICROS_PER_MILLI);
-            result = new CalendarInterval(0, millisecond * MICROS_PER_MILLI);
-            break;
-          case "microsecond": {
-            long micros = Long.parseLong(m.group(1));
-            result = new CalendarInterval(0, micros);
-            break;
-          }
-        }
-      } catch (Exception e) {
-        throw new IllegalArgumentException("Error parsing interval string: " + e.getMessage(), e);
-      }
-    }
-    return result;
-  }
-
-  /**
-   * Parse second_nano string in ss.nnnnnnnnn format to microseconds
-   */
-  public static long parseSecondNano(String secondNano) throws IllegalArgumentException {
-    String[] parts = secondNano.split("\\.");
-    if (parts.length == 1) {
-      return toLongWithRange("second", parts[0], Long.MIN_VALUE / MICROS_PER_SECOND,
-        Long.MAX_VALUE / MICROS_PER_SECOND) * MICROS_PER_SECOND;
-
-    } else if (parts.length == 2) {
-      long seconds = parts[0].equals("") ? 0L : toLongWithRange("second", parts[0],
-        Long.MIN_VALUE / MICROS_PER_SECOND, Long.MAX_VALUE / MICROS_PER_SECOND);
-      long nanos = toLongWithRange("nanosecond", parts[1], 0L, 999999999L);
-      return seconds * MICROS_PER_SECOND + nanos / 1000L;
-
-    } else {
-      throw new IllegalArgumentException(
-        "Interval string does not match second-nano format of ss.nnnnnnnnn");
-    }
-  }
-
+  // NOTE: If you're moving or renaming this file, you should also update Unidoc configuration
+  // specified in 'SparkBuild.scala'.
   public final int months;
+  public final int days;
   public final long microseconds;
 
-  public long milliseconds() {
-    return this.microseconds / MICROS_PER_MILLI;
-  }
-
-  public CalendarInterval(int months, long microseconds) {
+  // CalendarInterval is represented by months, days and microseconds. Months and days are not
+  // units of time with a constant length (unlike hours, seconds), so they are two separated fields
+  // from microseconds. One month may be equal to 29, 30 or 31 days and one day may be equal to
+  // 23, 24 or 25 hours (daylight saving)
+  public CalendarInterval(int months, int days, long microseconds) {
     this.months = months;
+    this.days = days;
     this.microseconds = microseconds;
   }
 
-  public CalendarInterval add(CalendarInterval that) {
-    int months = this.months + that.months;
-    long microseconds = this.microseconds + that.microseconds;
-    return new CalendarInterval(months, microseconds);
-  }
-
-  public CalendarInterval subtract(CalendarInterval that) {
-    int months = this.months - that.months;
-    long microseconds = this.microseconds - that.microseconds;
-    return new CalendarInterval(months, microseconds);
-  }
-
-  public CalendarInterval negate() {
-    return new CalendarInterval(-this.months, -this.microseconds);
-  }
-
   @Override
-  public boolean equals(Object other) {
-    if (this == other) return true;
-    if (other == null || !(other instanceof CalendarInterval)) return false;
-
-    CalendarInterval o = (CalendarInterval) other;
-    return this.months == o.months && this.microseconds == o.microseconds;
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+    CalendarInterval that = (CalendarInterval) o;
+    return months == that.months &&
+      days == that.days &&
+      microseconds == that.microseconds;
   }
 
   @Override
   public int hashCode() {
-    return 31 * months + (int) microseconds;
+    return Objects.hash(months, days, microseconds);
   }
 
   @Override
   public String toString() {
-    StringBuilder sb = new StringBuilder("interval");
+    if (months == 0 && days == 0 && microseconds == 0) {
+      return "0 seconds";
+    }
+
+    StringBuilder sb = new StringBuilder();
 
     if (months != 0) {
-      appendUnit(sb, months / 12, "year");
-      appendUnit(sb, months % 12, "month");
+      appendUnit(sb, months / 12, "years");
+      appendUnit(sb, months % 12, "months");
     }
 
+    appendUnit(sb, days, "days");
+
     if (microseconds != 0) {
       long rest = microseconds;
-      appendUnit(sb, rest / MICROS_PER_WEEK, "week");
-      rest %= MICROS_PER_WEEK;
-      appendUnit(sb, rest / MICROS_PER_DAY, "day");
-      rest %= MICROS_PER_DAY;
-      appendUnit(sb, rest / MICROS_PER_HOUR, "hour");
+      appendUnit(sb, rest / MICROS_PER_HOUR, "hours");
       rest %= MICROS_PER_HOUR;
-      appendUnit(sb, rest / MICROS_PER_MINUTE, "minute");
+      appendUnit(sb, rest / MICROS_PER_MINUTE, "minutes");
       rest %= MICROS_PER_MINUTE;
-      appendUnit(sb, rest / MICROS_PER_SECOND, "second");
-      rest %= MICROS_PER_SECOND;
-      appendUnit(sb, rest / MICROS_PER_MILLI, "millisecond");
-      rest %= MICROS_PER_MILLI;
-      appendUnit(sb, rest, "microsecond");
-    } else if (months == 0) {
-      sb.append(" 0 microseconds");
+      if (rest != 0) {
+        String s = BigDecimal.valueOf(rest, 6).stripTrailingZeros().toPlainString();
+        sb.append(s).append(" seconds ");
+      }
     }
 
+    sb.setLength(sb.length() - 1);
     return sb.toString();
   }
 
   private void appendUnit(StringBuilder sb, long value, String unit) {
     if (value != 0) {
-      sb.append(' ').append(value).append(' ').append(unit).append('s');
+      sb.append(value).append(' ').append(unit).append(' ');
     }
   }
+
+  /**
+   * Extracts the date part of the interval.
+   * @return an instance of {@code java.time.Period} based on the months and days fields
+   *         of the given interval, not null.
+   */
+  public Period extractAsPeriod() { return Period.of(0, months, days); }
+
+  /**
+   * Extracts the time part of the interval.
+   * @return an instance of {@code java.time.Duration} based on the microseconds field
+   *         of the given interval, not null.
+   * @throws ArithmeticException if a numeric overflow occurs
+   */
+  public Duration extractAsDuration() { return Duration.of(microseconds, ChronoUnit.MICROS); }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 30b884c5fa9c6..c5384669eb922 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -370,7 +370,7 @@ private byte getByte(int i) {
     return Platform.getByte(base, offset + i);
   }
 
-  private boolean matchAt(final UTF8String s, int pos) {
+  public boolean matchAt(final UTF8String s, int pos) {
     if (s.numBytes + pos > numBytes || pos < 0) {
       return false;
     }
@@ -538,14 +538,42 @@ private UTF8String copyUTF8String(int start, int end) {
   public UTF8String trim() {
     int s = 0;
     // skip all of the space (0x20) in the left side
-    while (s < this.numBytes && getByte(s) == 0x20) s++;
+    while (s < this.numBytes && getByte(s) == ' ') s++;
     if (s == this.numBytes) {
       // Everything trimmed
       return EMPTY_UTF8;
     }
     // skip all of the space (0x20) in the right side
     int e = this.numBytes - 1;
-    while (e > s && getByte(e) == 0x20) e--;
+    while (e > s && getByte(e) == ' ') e--;
+    if (s == 0 && e == numBytes - 1) {
+      // Nothing trimmed
+      return this;
+    }
+    return copyUTF8String(s, e);
+  }
+
+  /**
+   * Trims whitespaces (<= ASCII 32) from both ends of this string.
+   *
+   * Note that, this method is the same as java's {@link String#trim}, and different from
+   * {@link UTF8String#trim()} which remove only spaces(= ASCII 32) from both ends.
+   *
+   * @return A UTF8String whose value is this UTF8String, with any leading and trailing white
+   * space removed, or this UTF8String if it has no leading or trailing whitespace.
+   *
+   */
+  public UTF8String trimAll() {
+    int s = 0;
+    // skip all of the whitespaces (<=0x20) in the left side
+    while (s < this.numBytes && getByte(s) <= ' ') s++;
+    if (s == this.numBytes) {
+      // Everything trimmed
+      return EMPTY_UTF8;
+    }
+    // skip all of the whitespaces (<=0x20) in the right side
+    int e = this.numBytes - 1;
+    while (e > s && getByte(e) <= ' ') e--;
     if (s == 0 && e == numBytes - 1) {
       // Nothing trimmed
       return this;
@@ -1063,7 +1091,7 @@ public static class IntWrapper implements Serializable {
   }
 
   /**
-   * Parses this UTF8String to long.
+   * Parses this UTF8String(trimmed if needed) to long.
    *
    * Note that, in this method we accumulate the result in negative format, and convert it to
    * positive format at the end, if this string is not started with '-'. This is because min value
@@ -1077,18 +1105,20 @@ public static class IntWrapper implements Serializable {
    * @return true if the parsing was successful else false
    */
   public boolean toLong(LongWrapper toLongResult) {
-    if (numBytes == 0) {
-      return false;
-    }
+    int offset = 0;
+    while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
+    if (offset == this.numBytes) return false;
 
-    byte b = getByte(0);
+    int end = this.numBytes - 1;
+    while (end > offset && getByte(end) <= ' ') end--;
+
+    byte b = getByte(offset);
     final boolean negative = b == '-';
-    int offset = 0;
     if (negative || b == '+') {
-      offset++;
-      if (numBytes == 1) {
+      if (end - offset == 0) {
         return false;
       }
+      offset++;
     }
 
     final byte separator = '.';
@@ -1096,7 +1126,7 @@ public boolean toLong(LongWrapper toLongResult) {
     final long stopValue = Long.MIN_VALUE / radix;
     long result = 0;
 
-    while (offset < numBytes) {
+    while (offset <= end) {
       b = getByte(offset);
       offset++;
       if (b == separator) {
@@ -1131,7 +1161,7 @@ public boolean toLong(LongWrapper toLongResult) {
     // This is the case when we've encountered a decimal separator. The fractional
     // part will not change the number, but we will verify that the fractional part
     // is well formed.
-    while (offset < numBytes) {
+    while (offset <= end) {
       byte currentByte = getByte(offset);
       if (currentByte < '0' || currentByte > '9') {
         return false;
@@ -1151,7 +1181,7 @@ public boolean toLong(LongWrapper toLongResult) {
   }
 
   /**
-   * Parses this UTF8String to int.
+   * Parses this UTF8String(trimmed if needed) to int.
    *
    * Note that, in this method we accumulate the result in negative format, and convert it to
    * positive format at the end, if this string is not started with '-'. This is because min value
@@ -1168,18 +1198,20 @@ public boolean toLong(LongWrapper toLongResult) {
    * @return true if the parsing was successful else false
    */
   public boolean toInt(IntWrapper intWrapper) {
-    if (numBytes == 0) {
-      return false;
-    }
+    int offset = 0;
+    while (offset < this.numBytes && getByte(offset) <= ' ') offset++;
+    if (offset == this.numBytes) return false;
 
-    byte b = getByte(0);
+    int end = this.numBytes - 1;
+    while (end > offset && getByte(end) <= ' ') end--;
+
+    byte b = getByte(offset);
     final boolean negative = b == '-';
-    int offset = 0;
     if (negative || b == '+') {
-      offset++;
-      if (numBytes == 1) {
+      if (end - offset == 0) {
         return false;
       }
+      offset++;
     }
 
     final byte separator = '.';
@@ -1187,7 +1219,7 @@ public boolean toInt(IntWrapper intWrapper) {
     final int stopValue = Integer.MIN_VALUE / radix;
     int result = 0;
 
-    while (offset < numBytes) {
+    while (offset <= end) {
       b = getByte(offset);
       offset++;
       if (b == separator) {
@@ -1222,7 +1254,7 @@ public boolean toInt(IntWrapper intWrapper) {
     // This is the case when we've encountered a decimal separator. The fractional
     // part will not change the number, but we will verify that the fractional part
     // is well formed.
-    while (offset < numBytes) {
+    while (offset <= end) {
       byte currentByte = getByte(offset);
       if (currentByte < '0' || currentByte > '9') {
         return false;
@@ -1262,6 +1294,52 @@ public boolean toByte(IntWrapper intWrapper) {
     return false;
   }
 
+  /**
+   * Parses UTF8String(trimmed if needed) to long. This method is used when ANSI is enabled.
+   *
+   * @return If string contains valid numeric value then it returns the long value otherwise a
+   * NumberFormatException  is thrown.
+   */
+  public long toLongExact() {
+    LongWrapper result = new LongWrapper();
+    if (toLong(result)) {
+      return result.value;
+    }
+    throw new NumberFormatException("invalid input syntax for type numeric: " + this);
+  }
+
+  /**
+   * Parses UTF8String(trimmed if needed) to int. This method is used when ANSI is enabled.
+   *
+   * @return If string contains valid numeric value then it returns the int value otherwise a
+   * NumberFormatException  is thrown.
+   */
+  public int toIntExact() {
+    IntWrapper result = new IntWrapper();
+    if (toInt(result)) {
+      return result.value;
+    }
+    throw new NumberFormatException("invalid input syntax for type numeric: " + this);
+  }
+
+  public short toShortExact() {
+    int value = this.toIntExact();
+    short result = (short) value;
+    if (result == value) {
+      return result;
+    }
+    throw new NumberFormatException("invalid input syntax for type numeric: " + this);
+  }
+
+  public byte toByteExact() {
+    int value = this.toIntExact();
+    byte result = (byte) value;
+    if (result == value) {
+      return result;
+    }
+    throw new NumberFormatException("invalid input syntax for type numeric: " + this);
+  }
+
   @Override
   public String toString() {
     return new String(getBytes(), StandardCharsets.UTF_8);
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
index 3ad9ac7b4de9c..19e4182b38a4e 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -114,25 +114,25 @@ public void memoryDebugFillEnabledInTest() {
     Assert.assertTrue(MemoryAllocator.MEMORY_DEBUG_FILL_ENABLED);
     MemoryBlock onheap = MemoryAllocator.HEAP.allocate(1);
     Assert.assertEquals(
-      Platform.getByte(onheap.getBaseObject(), onheap.getBaseOffset()),
-      MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
+      MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE,
+      Platform.getByte(onheap.getBaseObject(), onheap.getBaseOffset()));
 
     MemoryBlock onheap1 = MemoryAllocator.HEAP.allocate(1024 * 1024);
     Object onheap1BaseObject = onheap1.getBaseObject();
     long onheap1BaseOffset = onheap1.getBaseOffset();
     MemoryAllocator.HEAP.free(onheap1);
     Assert.assertEquals(
-      Platform.getByte(onheap1BaseObject, onheap1BaseOffset),
-      MemoryAllocator.MEMORY_DEBUG_FILL_FREED_VALUE);
+      MemoryAllocator.MEMORY_DEBUG_FILL_FREED_VALUE,
+      Platform.getByte(onheap1BaseObject, onheap1BaseOffset));
     MemoryBlock onheap2 = MemoryAllocator.HEAP.allocate(1024 * 1024);
     Assert.assertEquals(
-      Platform.getByte(onheap2.getBaseObject(), onheap2.getBaseOffset()),
-      MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
+      MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE,
+      Platform.getByte(onheap2.getBaseObject(), onheap2.getBaseOffset()));
 
     MemoryBlock offheap = MemoryAllocator.UNSAFE.allocate(1);
     Assert.assertEquals(
-      Platform.getByte(offheap.getBaseObject(), offheap.getBaseOffset()),
-      MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE);
+      MemoryAllocator.MEMORY_DEBUG_FILL_CLEAN_VALUE,
+      Platform.getByte(offheap.getBaseObject(), offheap.getBaseOffset()));
     MemoryAllocator.UNSAFE.free(offheap);
   }
 
@@ -150,11 +150,11 @@ public void heapMemoryReuse() {
     // The size is greater than `HeapMemoryAllocator.POOLING_THRESHOLD_BYTES`,
     // reuse the previous memory which has released.
     MemoryBlock onheap3 = heapMem.allocate(1024 * 1024 + 1);
-    Assert.assertEquals(onheap3.size(), 1024 * 1024 + 1);
+    Assert.assertEquals(1024 * 1024 + 1, onheap3.size());
     Object obj3 = onheap3.getBaseObject();
     heapMem.free(onheap3);
     MemoryBlock onheap4 = heapMem.allocate(1024 * 1024 + 7);
-    Assert.assertEquals(onheap4.size(), 1024 * 1024 + 7);
+    Assert.assertEquals(1024 * 1024 + 7, onheap4.size());
     Assert.assertEquals(obj3, onheap4.getBaseObject());
   }
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
index c307d74e0ba07..6397f26c02f3a 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
@@ -19,259 +19,67 @@
 
 import org.junit.Test;
 
+import java.time.Duration;
+import java.time.Period;
+
 import static org.junit.Assert.*;
-import static org.apache.spark.unsafe.types.CalendarInterval.*;
+import static org.apache.spark.sql.catalyst.util.DateTimeConstants.*;
 
 public class CalendarIntervalSuite {
 
   @Test
   public void equalsTest() {
-    CalendarInterval i1 = new CalendarInterval(3, 123);
-    CalendarInterval i2 = new CalendarInterval(3, 321);
-    CalendarInterval i3 = new CalendarInterval(1, 123);
-    CalendarInterval i4 = new CalendarInterval(3, 123);
+    CalendarInterval i1 = new CalendarInterval(3, 2, 123);
+    CalendarInterval i2 = new CalendarInterval(3, 2,321);
+    CalendarInterval i3 = new CalendarInterval(3, 4,123);
+    CalendarInterval i4 = new CalendarInterval(1, 2, 123);
+    CalendarInterval i5 = new CalendarInterval(1, 4, 321);
+    CalendarInterval i6 = new CalendarInterval(3, 2, 123);
 
     assertNotSame(i1, i2);
     assertNotSame(i1, i3);
+    assertNotSame(i1, i4);
     assertNotSame(i2, i3);
-    assertEquals(i1, i4);
+    assertNotSame(i2, i4);
+    assertNotSame(i3, i4);
+    assertNotSame(i1, i5);
+    assertEquals(i1, i6);
   }
 
   @Test
   public void toStringTest() {
     CalendarInterval i;
 
-    i = new CalendarInterval(0, 0);
-    assertEquals("interval 0 microseconds", i.toString());
-
-    i = new CalendarInterval(34, 0);
-    assertEquals("interval 2 years 10 months", i.toString());
-
-    i = new CalendarInterval(-34, 0);
-    assertEquals("interval -2 years -10 months", i.toString());
-
-    i = new CalendarInterval(0, 3 * MICROS_PER_WEEK + 13 * MICROS_PER_HOUR + 123);
-    assertEquals("interval 3 weeks 13 hours 123 microseconds", i.toString());
-
-    i = new CalendarInterval(0, -3 * MICROS_PER_WEEK - 13 * MICROS_PER_HOUR - 123);
-    assertEquals("interval -3 weeks -13 hours -123 microseconds", i.toString());
-
-    i = new CalendarInterval(34, 3 * MICROS_PER_WEEK + 13 * MICROS_PER_HOUR + 123);
-    assertEquals("interval 2 years 10 months 3 weeks 13 hours 123 microseconds", i.toString());
-  }
-
-  @Test
-  public void fromStringTest() {
-    testSingleUnit("year", 3, 36, 0);
-    testSingleUnit("month", 3, 3, 0);
-    testSingleUnit("week", 3, 0, 3 * MICROS_PER_WEEK);
-    testSingleUnit("day", 3, 0, 3 * MICROS_PER_DAY);
-    testSingleUnit("hour", 3, 0, 3 * MICROS_PER_HOUR);
-    testSingleUnit("minute", 3, 0, 3 * MICROS_PER_MINUTE);
-    testSingleUnit("second", 3, 0, 3 * MICROS_PER_SECOND);
-    testSingleUnit("millisecond", 3, 0, 3 * MICROS_PER_MILLI);
-    testSingleUnit("microsecond", 3, 0, 3);
-
-    String input;
-
-    input = "interval   -5  years  23   month";
-    CalendarInterval result = new CalendarInterval(-5 * 12 + 23, 0);
-    assertEquals(fromString(input), result);
-
-    input = "interval   -5  years  23   month   ";
-    assertEquals(fromString(input), result);
-
-    input = "  interval   -5  years  23   month   ";
-    assertEquals(fromString(input), result);
-
-    // Error cases
-    input = "interval   3month 1 hour";
-    assertNull(fromString(input));
-
-    input = "interval 3 moth 1 hour";
-    assertNull(fromString(input));
-
-    input = "interval";
-    assertNull(fromString(input));
-
-    input = "int";
-    assertNull(fromString(input));
-
-    input = "";
-    assertNull(fromString(input));
-
-    input = null;
-    assertNull(fromString(input));
-  }
-
-  @Test
-  public void fromCaseInsensitiveStringTest() {
-    for (String input : new String[]{"5 MINUTES", "5 minutes", "5 Minutes"}) {
-      assertEquals(fromCaseInsensitiveString(input), new CalendarInterval(0, 5L * 60 * 1_000_000));
-    }
-
-    for (String input : new String[]{null, "", " "}) {
-      try {
-        fromCaseInsensitiveString(input);
-        fail("Expected to throw an exception for the invalid input");
-      } catch (IllegalArgumentException e) {
-        assertTrue(e.getMessage().contains("cannot be null or blank"));
-      }
-    }
-
-    for (String input : new String[]{"interval", "interval1 day", "foo", "foo 1 day"}) {
-      try {
-        fromCaseInsensitiveString(input);
-        fail("Expected to throw an exception for the invalid input");
-      } catch (IllegalArgumentException e) {
-        assertTrue(e.getMessage().contains("Invalid interval"));
-      }
-    }
-  }
-
-  @Test
-  public void fromYearMonthStringTest() {
-    String input;
-    CalendarInterval i;
-
-    input = "99-10";
-    i = new CalendarInterval(99 * 12 + 10, 0L);
-    assertEquals(fromYearMonthString(input), i);
+    i = new CalendarInterval(0, 0, 0);
+    assertEquals("0 seconds", i.toString());
 
-    input = "-8-10";
-    i = new CalendarInterval(-8 * 12 - 10, 0L);
-    assertEquals(fromYearMonthString(input), i);
+    i = new CalendarInterval(34, 0, 0);
+    assertEquals("2 years 10 months", i.toString());
 
-    try {
-      input = "99-15";
-      fromYearMonthString(input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("month 15 outside range"));
-    }
-  }
-
-  @Test
-  public void fromDayTimeStringTest() {
-    String input;
-    CalendarInterval i;
+    i = new CalendarInterval(-34, 0, 0);
+    assertEquals("-2 years -10 months", i.toString());
 
-    input = "5 12:40:30.999999999";
-    i = new CalendarInterval(0, 5 * MICROS_PER_DAY + 12 * MICROS_PER_HOUR +
-      40 * MICROS_PER_MINUTE + 30 * MICROS_PER_SECOND + 999999L);
-    assertEquals(fromDayTimeString(input), i);
+    i = new CalendarInterval(0, 31, 0);
+    assertEquals("31 days", i.toString());
 
-    input = "10 0:12:0.888";
-    i = new CalendarInterval(0, 10 * MICROS_PER_DAY + 12 * MICROS_PER_MINUTE +
-      888 * MICROS_PER_MILLI);
-    assertEquals(fromDayTimeString(input), i);
+    i = new CalendarInterval(0, -31, 0);
+    assertEquals("-31 days", i.toString());
 
-    input = "-3 0:0:0";
-    i = new CalendarInterval(0, -3 * MICROS_PER_DAY);
-    assertEquals(fromDayTimeString(input), i);
+    i = new CalendarInterval(0, 0, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123);
+    assertEquals("3 hours 13 minutes 0.000123 seconds", i.toString());
 
-    try {
-      input = "5 30:12:20";
-      fromDayTimeString(input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("hour 30 outside range"));
-    }
+    i = new CalendarInterval(0, 0, -3 * MICROS_PER_HOUR - 13 * MICROS_PER_MINUTE - 123);
+    assertEquals("-3 hours -13 minutes -0.000123 seconds", i.toString());
 
-    try {
-      input = "5 30-12";
-      fromDayTimeString(input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("not match day-time format"));
-    }
-
-    try {
-      input = "5 1:12:20";
-      fromDayTimeString(input, "hour", "microsecond");
-      fail("Expected to throw an exception for the invalid convention type");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("Cannot support (interval"));
-    }
+    i = new CalendarInterval(34, 31, 3 * MICROS_PER_HOUR + 13 * MICROS_PER_MINUTE + 123);
+    assertEquals("2 years 10 months 31 days 3 hours 13 minutes 0.000123 seconds",
+      i.toString());
   }
 
   @Test
-  public void fromSingleUnitStringTest() {
-    String input;
-    CalendarInterval i;
-
-    input = "12";
-    i = new CalendarInterval(12 * 12, 0L);
-    assertEquals(fromSingleUnitString("year", input), i);
-
-    input = "100";
-    i = new CalendarInterval(0, 100 * MICROS_PER_DAY);
-    assertEquals(fromSingleUnitString("day", input), i);
-
-    input = "1999.38888";
-    i = new CalendarInterval(0, 1999 * MICROS_PER_SECOND + 38);
-    assertEquals(fromSingleUnitString("second", input), i);
-
-    try {
-      input = String.valueOf(Integer.MAX_VALUE);
-      fromSingleUnitString("year", input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("outside range"));
-    }
-
-    try {
-      input = String.valueOf(Long.MAX_VALUE / MICROS_PER_HOUR + 1);
-      fromSingleUnitString("hour", input);
-      fail("Expected to throw an exception for the invalid input");
-    } catch (IllegalArgumentException e) {
-      assertTrue(e.getMessage().contains("outside range"));
-    }
-  }
-
-  @Test
-  public void addTest() {
-    String input = "interval 3 month 1 hour";
-    String input2 = "interval 2 month 100 hour";
-
-    CalendarInterval interval = fromString(input);
-    CalendarInterval interval2 = fromString(input2);
-
-    assertEquals(interval.add(interval2), new CalendarInterval(5, 101 * MICROS_PER_HOUR));
-
-    input = "interval -10 month -81 hour";
-    input2 = "interval 75 month 200 hour";
-
-    interval = fromString(input);
-    interval2 = fromString(input2);
-
-    assertEquals(interval.add(interval2), new CalendarInterval(65, 119 * MICROS_PER_HOUR));
-  }
-
-  @Test
-  public void subtractTest() {
-    String input = "interval 3 month 1 hour";
-    String input2 = "interval 2 month 100 hour";
-
-    CalendarInterval interval = fromString(input);
-    CalendarInterval interval2 = fromString(input2);
-
-    assertEquals(interval.subtract(interval2), new CalendarInterval(1, -99 * MICROS_PER_HOUR));
-
-    input = "interval -10 month -81 hour";
-    input2 = "interval 75 month 200 hour";
-
-    interval = fromString(input);
-    interval2 = fromString(input2);
-
-    assertEquals(interval.subtract(interval2), new CalendarInterval(-85, -281 * MICROS_PER_HOUR));
-  }
-
-  private static void testSingleUnit(String unit, int number, int months, long microseconds) {
-    String input1 = "interval " + number + " " + unit;
-    String input2 = "interval " + number + " " + unit + "s";
-    CalendarInterval result = new CalendarInterval(months, microseconds);
-    assertEquals(fromString(input1), result);
-    assertEquals(fromString(input2), result);
+  public void periodAndDurationTest() {
+    CalendarInterval interval = new CalendarInterval(120, -40, 123456);
+    assertEquals(Period.of(0, 120, -40), interval.extractAsPeriod());
+    assertEquals(Duration.ofNanos(123456000), interval.extractAsDuration());
   }
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index cd253c0cbc904..8f933877f82e6 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -38,11 +38,11 @@ public class UTF8StringSuite {
   private static void checkBasic(String str, int len) {
     UTF8String s1 = fromString(str);
     UTF8String s2 = fromBytes(str.getBytes(StandardCharsets.UTF_8));
-    assertEquals(s1.numChars(), len);
-    assertEquals(s2.numChars(), len);
+    assertEquals(len, s1.numChars());
+    assertEquals(len, s2.numChars());
 
-    assertEquals(s1.toString(), str);
-    assertEquals(s2.toString(), str);
+    assertEquals(str, s1.toString());
+    assertEquals(str, s2.toString());
     assertEquals(s1, s2);
 
     assertEquals(s1.hashCode(), s2.hashCode());
@@ -227,6 +227,7 @@ public void substring() {
   @Test
   public void trims() {
     assertEquals(fromString("1"), fromString("1").trim());
+    assertEquals(fromString("1"), fromString("1\t").trimAll());
 
     assertEquals(fromString("hello"), fromString("  hello ").trim());
     assertEquals(fromString("hello "), fromString("  hello ").trimLeft());
@@ -375,20 +376,20 @@ public void pad() {
   @Test
   public void substringSQL() {
     UTF8String e = fromString("example");
-    assertEquals(e.substringSQL(0, 2), fromString("ex"));
-    assertEquals(e.substringSQL(1, 2), fromString("ex"));
-    assertEquals(e.substringSQL(0, 7), fromString("example"));
-    assertEquals(e.substringSQL(1, 2), fromString("ex"));
-    assertEquals(e.substringSQL(0, 100), fromString("example"));
-    assertEquals(e.substringSQL(1, 100), fromString("example"));
-    assertEquals(e.substringSQL(2, 2), fromString("xa"));
-    assertEquals(e.substringSQL(1, 6), fromString("exampl"));
-    assertEquals(e.substringSQL(2, 100), fromString("xample"));
-    assertEquals(e.substringSQL(0, 0), fromString(""));
-    assertEquals(e.substringSQL(100, 4), EMPTY_UTF8);
-    assertEquals(e.substringSQL(0, Integer.MAX_VALUE), fromString("example"));
-    assertEquals(e.substringSQL(1, Integer.MAX_VALUE), fromString("example"));
-    assertEquals(e.substringSQL(2, Integer.MAX_VALUE), fromString("xample"));
+    assertEquals(fromString("ex"), e.substringSQL(0, 2));
+    assertEquals(fromString("ex"), e.substringSQL(1, 2));
+    assertEquals(fromString("example"), e.substringSQL(0, 7));
+    assertEquals(fromString("ex"), e.substringSQL(1, 2));
+    assertEquals(fromString("example"), e.substringSQL(0, 100));
+    assertEquals(fromString("example"), e.substringSQL(1, 100));
+    assertEquals(fromString("xa"), e.substringSQL(2, 2));
+    assertEquals(fromString("exampl"), e.substringSQL(1, 6));
+    assertEquals(fromString("xample"), e.substringSQL(2, 100));
+    assertEquals(fromString(""), e.substringSQL(0, 0));
+    assertEquals(EMPTY_UTF8, e.substringSQL(100, 4));
+    assertEquals(fromString("example"), e.substringSQL(0, Integer.MAX_VALUE));
+    assertEquals(fromString("example"), e.substringSQL(1, Integer.MAX_VALUE));
+    assertEquals(fromString("xample"), e.substringSQL(2, Integer.MAX_VALUE));
   }
 
   @Test
@@ -506,50 +507,50 @@ public void findInSet() {
 
   @Test
   public void soundex() {
-    assertEquals(fromString("Robert").soundex(), fromString("R163"));
-    assertEquals(fromString("Rupert").soundex(), fromString("R163"));
-    assertEquals(fromString("Rubin").soundex(), fromString("R150"));
-    assertEquals(fromString("Ashcraft").soundex(), fromString("A261"));
-    assertEquals(fromString("Ashcroft").soundex(), fromString("A261"));
-    assertEquals(fromString("Burroughs").soundex(), fromString("B620"));
-    assertEquals(fromString("Burrows").soundex(), fromString("B620"));
-    assertEquals(fromString("Ekzampul").soundex(), fromString("E251"));
-    assertEquals(fromString("Example").soundex(), fromString("E251"));
-    assertEquals(fromString("Ellery").soundex(), fromString("E460"));
-    assertEquals(fromString("Euler").soundex(), fromString("E460"));
-    assertEquals(fromString("Ghosh").soundex(), fromString("G200"));
-    assertEquals(fromString("Gauss").soundex(), fromString("G200"));
-    assertEquals(fromString("Gutierrez").soundex(), fromString("G362"));
-    assertEquals(fromString("Heilbronn").soundex(), fromString("H416"));
-    assertEquals(fromString("Hilbert").soundex(), fromString("H416"));
-    assertEquals(fromString("Jackson").soundex(), fromString("J250"));
-    assertEquals(fromString("Kant").soundex(), fromString("K530"));
-    assertEquals(fromString("Knuth").soundex(), fromString("K530"));
-    assertEquals(fromString("Lee").soundex(), fromString("L000"));
-    assertEquals(fromString("Lukasiewicz").soundex(), fromString("L222"));
-    assertEquals(fromString("Lissajous").soundex(), fromString("L222"));
-    assertEquals(fromString("Ladd").soundex(), fromString("L300"));
-    assertEquals(fromString("Lloyd").soundex(), fromString("L300"));
-    assertEquals(fromString("Moses").soundex(), fromString("M220"));
-    assertEquals(fromString("O'Hara").soundex(), fromString("O600"));
-    assertEquals(fromString("Pfister").soundex(), fromString("P236"));
-    assertEquals(fromString("Rubin").soundex(), fromString("R150"));
-    assertEquals(fromString("Robert").soundex(), fromString("R163"));
-    assertEquals(fromString("Rupert").soundex(), fromString("R163"));
-    assertEquals(fromString("Soundex").soundex(), fromString("S532"));
-    assertEquals(fromString("Sownteks").soundex(), fromString("S532"));
-    assertEquals(fromString("Tymczak").soundex(), fromString("T522"));
-    assertEquals(fromString("VanDeusen").soundex(), fromString("V532"));
-    assertEquals(fromString("Washington").soundex(), fromString("W252"));
-    assertEquals(fromString("Wheaton").soundex(), fromString("W350"));
-
-    assertEquals(fromString("a").soundex(), fromString("A000"));
-    assertEquals(fromString("ab").soundex(), fromString("A100"));
-    assertEquals(fromString("abc").soundex(), fromString("A120"));
-    assertEquals(fromString("abcd").soundex(), fromString("A123"));
-    assertEquals(fromString("").soundex(), fromString(""));
-    assertEquals(fromString("123").soundex(), fromString("123"));
-    assertEquals(fromString("世界千世").soundex(), fromString("世界千世"));
+    assertEquals(fromString("R163"), fromString("Robert").soundex());
+    assertEquals(fromString("R163"), fromString("Rupert").soundex());
+    assertEquals(fromString("R150"), fromString("Rubin").soundex());
+    assertEquals(fromString("A261"), fromString("Ashcraft").soundex());
+    assertEquals(fromString("A261"), fromString("Ashcroft").soundex());
+    assertEquals(fromString("B620"), fromString("Burroughs").soundex());
+    assertEquals(fromString("B620"), fromString("Burrows").soundex());
+    assertEquals(fromString("E251"), fromString("Ekzampul").soundex());
+    assertEquals(fromString("E251"), fromString("Example").soundex());
+    assertEquals(fromString("E460"), fromString("Ellery").soundex());
+    assertEquals(fromString("E460"), fromString("Euler").soundex());
+    assertEquals(fromString("G200"), fromString("Ghosh").soundex());
+    assertEquals(fromString("G200"), fromString("Gauss").soundex());
+    assertEquals(fromString("G362"), fromString("Gutierrez").soundex());
+    assertEquals(fromString("H416"), fromString("Heilbronn").soundex());
+    assertEquals(fromString("H416"), fromString("Hilbert").soundex());
+    assertEquals(fromString("J250"), fromString("Jackson").soundex());
+    assertEquals(fromString("K530"), fromString("Kant").soundex());
+    assertEquals(fromString("K530"), fromString("Knuth").soundex());
+    assertEquals(fromString("L000"), fromString("Lee").soundex());
+    assertEquals(fromString("L222"), fromString("Lukasiewicz").soundex());
+    assertEquals(fromString("L222"), fromString("Lissajous").soundex());
+    assertEquals(fromString("L300"), fromString("Ladd").soundex());
+    assertEquals(fromString("L300"), fromString("Lloyd").soundex());
+    assertEquals(fromString("M220"), fromString("Moses").soundex());
+    assertEquals(fromString("O600"), fromString("O'Hara").soundex());
+    assertEquals(fromString("P236"), fromString("Pfister").soundex());
+    assertEquals(fromString("R150"), fromString("Rubin").soundex());
+    assertEquals(fromString("R163"), fromString("Robert").soundex());
+    assertEquals(fromString("R163"), fromString("Rupert").soundex());
+    assertEquals(fromString("S532"), fromString("Soundex").soundex());
+    assertEquals(fromString("S532"), fromString("Sownteks").soundex());
+    assertEquals(fromString("T522"), fromString("Tymczak").soundex());
+    assertEquals(fromString("V532"), fromString("VanDeusen").soundex());
+    assertEquals(fromString("W252"), fromString("Washington").soundex());
+    assertEquals(fromString("W350"), fromString("Wheaton").soundex());
+
+    assertEquals(fromString("A000"), fromString("a").soundex());
+    assertEquals(fromString("A100"), fromString("ab").soundex());
+    assertEquals(fromString("A120"), fromString("abc").soundex());
+    assertEquals(fromString("A123"), fromString("abcd").soundex());
+    assertEquals(fromString(""), fromString("").soundex());
+    assertEquals(fromString("123"), fromString("123").soundex());
+    assertEquals(fromString("世界千世"), fromString("世界千世").soundex());
   }
 
   @Test
@@ -849,7 +850,7 @@ public void skipWrongFirstByte() {
 
     for (int i = 0; i < wrongFirstBytes.length; ++i) {
       c[0] = (byte)wrongFirstBytes[i];
-      assertEquals(fromBytes(c).numChars(), 1);
+      assertEquals(1, fromBytes(c).numChars());
     }
   }
 }
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
index fdb81a06d41c9..72aa682bb95bc 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.unsafe.types
 
 import org.apache.commons.text.similarity.LevenshteinDistance
 import org.scalacheck.{Arbitrary, Gen}
-import org.scalatest.prop.GeneratorDrivenPropertyChecks
+import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
 // scalastyle:off
 import org.scalatest.{FunSuite, Matchers}
 
@@ -28,7 +28,7 @@ import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8}
 /**
  * This TestSuite utilize ScalaCheck to generate randomized inputs for UTF8String testing.
  */
-class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenPropertyChecks with Matchers {
+class UTF8StringPropertyCheckSuite extends FunSuite with ScalaCheckDrivenPropertyChecks with Matchers {
 // scalastyle:on
 
   test("toString") {
diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
index da0b06d295252..f52d33fd64223 100644
--- a/conf/metrics.properties.template
+++ b/conf/metrics.properties.template
@@ -113,6 +113,15 @@
 #     /metrics/applications/json # App information
 #     /metrics/master/json       # Master information
 
+# org.apache.spark.metrics.sink.PrometheusServlet
+#   Name:     Default:   Description:
+#   path      VARIES*    Path prefix from the web server root
+#
+# * Default path is /metrics/prometheus for all instances except the master. The
+#   master has two paths:
+#     /metrics/applications/prometheus # App information
+#     /metrics/master/prometheus       # Master information
+
 # org.apache.spark.metrics.sink.GraphiteSink
 #   Name:     Default:      Description:
 #   host      NONE          Hostname of the Graphite server, must be set
@@ -192,4 +201,10 @@
 
 #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
 
-#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
\ No newline at end of file
+#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+
+# Example configuration for PrometheusServlet
+#*.sink.prometheusServlet.class=org.apache.spark.metrics.sink.PrometheusServlet
+#*.sink.prometheusServlet.path=/metrics/prometheus
+#master.sink.prometheusServlet.path=/metrics/master/prometheus
+#applications.sink.prometheusServlet.path=/metrics/applications/prometheus
diff --git a/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt b/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..e944111ff9e93
--- /dev/null
+++ b/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Coalesced RDD , large scale
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Coalesced RDD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Coalesce Num Partitions: 100 Num Hosts: 1            344            360          14          0.3        3441.4       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5            283            301          22          0.4        2825.1       1.2X
+Coalesce Num Partitions: 100 Num Hosts: 10            270            271           2          0.4        2700.5       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 20            272            273           1          0.4        2721.1       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 40            271            272           1          0.4        2710.0       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 80            266            267           2          0.4        2656.3       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 1            609            619          15          0.2        6089.0       0.6X
+Coalesce Num Partitions: 500 Num Hosts: 5            338            343           6          0.3        3383.0       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10            303            306           3          0.3        3029.4       1.1X
+Coalesce Num Partitions: 500 Num Hosts: 20            286            288           2          0.4        2855.9       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 40            279            282           4          0.4        2793.3       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 80            273            275           3          0.4        2725.9       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 1            951            955           4          0.1        9514.1       0.4X
+Coalesce Num Partitions: 1000 Num Hosts: 5            421            429           8          0.2        4211.3       0.8X
+Coalesce Num Partitions: 1000 Num Hosts: 10            347            352           4          0.3        3473.5       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 20            309            312           5          0.3        3087.5       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 40            290            294           6          0.3        2896.4       1.2X
+Coalesce Num Partitions: 1000 Num Hosts: 80            281            286           5          0.4        2811.3       1.2X
+Coalesce Num Partitions: 5000 Num Hosts: 1           3928           3950          27          0.0       39278.0       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5           1373           1389          27          0.1       13725.2       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 10            812            827          13          0.1        8123.3       0.4X
+Coalesce Num Partitions: 5000 Num Hosts: 20            530            540           9          0.2        5299.1       0.6X
+Coalesce Num Partitions: 5000 Num Hosts: 40            421            425           5          0.2        4210.5       0.8X
+Coalesce Num Partitions: 5000 Num Hosts: 80            335            344          12          0.3        3353.7       1.0X
+Coalesce Num Partitions: 10000 Num Hosts: 1           7116           7120           4          0.0       71159.0       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5           2539           2598          51          0.0       25390.1       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1393           1432          34          0.1       13928.1       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 20            833           1009         303          0.1        8329.2       0.4X
+Coalesce Num Partitions: 10000 Num Hosts: 40            562            563           3          0.2        5615.2       0.6X
+Coalesce Num Partitions: 10000 Num Hosts: 80            420            426           7          0.2        4204.0       0.8X
+
+
diff --git a/core/benchmarks/CoalescedRDDBenchmark-results.txt b/core/benchmarks/CoalescedRDDBenchmark-results.txt
index dd63b0adea4f2..f1b867951a074 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Windows 10 10.0
-Intel64 Family 6 Model 63 Stepping 2, GenuineIntel
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Coalesced RDD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1            346            364          24          0.3        3458.9       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5            258            264           6          0.4        2579.0       1.3X
-Coalesce Num Partitions: 100 Num Hosts: 10            242            249           7          0.4        2415.2       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 20            237            242           7          0.4        2371.7       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 40            230            231           1          0.4        2299.8       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 80            222            233          14          0.4        2223.0       1.6X
-Coalesce Num Partitions: 500 Num Hosts: 1            659            665           5          0.2        6590.4       0.5X
-Coalesce Num Partitions: 500 Num Hosts: 5            340            381          47          0.3        3395.2       1.0X
-Coalesce Num Partitions: 500 Num Hosts: 10            279            307          47          0.4        2788.3       1.2X
-Coalesce Num Partitions: 500 Num Hosts: 20            259            261           2          0.4        2591.9       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 40            241            250          15          0.4        2406.5       1.4X
-Coalesce Num Partitions: 500 Num Hosts: 80            235            237           3          0.4        2349.9       1.5X
-Coalesce Num Partitions: 1000 Num Hosts: 1           1050           1053           4          0.1       10503.2       0.3X
-Coalesce Num Partitions: 1000 Num Hosts: 5            405            407           2          0.2        4049.5       0.9X
-Coalesce Num Partitions: 1000 Num Hosts: 10            320            322           2          0.3        3202.7       1.1X
-Coalesce Num Partitions: 1000 Num Hosts: 20            276            277           0          0.4        2762.3       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 40            257            260           5          0.4        2571.2       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 80            245            252          13          0.4        2448.9       1.4X
-Coalesce Num Partitions: 5000 Num Hosts: 1           3099           3145          55          0.0       30988.6       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 5           1037           1050          20          0.1       10374.4       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 10            626            633           8          0.2        6261.8       0.6X
-Coalesce Num Partitions: 5000 Num Hosts: 20            426            431           5          0.2        4258.6       0.8X
-Coalesce Num Partitions: 5000 Num Hosts: 40            328            341          22          0.3        3275.4       1.1X
-Coalesce Num Partitions: 5000 Num Hosts: 80            272            275           4          0.4        2721.4       1.3X
-Coalesce Num Partitions: 10000 Num Hosts: 1           5516           5526           9          0.0       55156.8       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 5           1956           1992          48          0.1       19560.9       0.2X
-Coalesce Num Partitions: 10000 Num Hosts: 10           1045           1057          18          0.1       10447.4       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 20            637            658          24          0.2        6373.2       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 40            431            448          15          0.2        4312.9       0.8X
-Coalesce Num Partitions: 10000 Num Hosts: 80            326            328           2          0.3        3263.4       1.1X
+Coalesce Num Partitions: 100 Num Hosts: 1            395            401           9          0.3        3952.3       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5            296            344          42          0.3        2963.2       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 10            294            308          15          0.3        2941.7       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 20            316            328          13          0.3        3155.2       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 40            294            316          36          0.3        2940.3       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 80            292            324          30          0.3        2922.2       1.4X
+Coalesce Num Partitions: 500 Num Hosts: 1            629            687          61          0.2        6292.4       0.6X
+Coalesce Num Partitions: 500 Num Hosts: 5            354            378          42          0.3        3541.7       1.1X
+Coalesce Num Partitions: 500 Num Hosts: 10            318            338          29          0.3        3179.8       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 20            306            317          11          0.3        3059.2       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 40            294            311          28          0.3        2941.6       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 80            288            309          34          0.3        2883.9       1.4X
+Coalesce Num Partitions: 1000 Num Hosts: 1            956            978          20          0.1        9562.2       0.4X
+Coalesce Num Partitions: 1000 Num Hosts: 5            431            452          36          0.2        4306.2       0.9X
+Coalesce Num Partitions: 1000 Num Hosts: 10            358            379          23          0.3        3581.1       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 20            324            347          20          0.3        3236.7       1.2X
+Coalesce Num Partitions: 1000 Num Hosts: 40            312            333          20          0.3        3116.8       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 80            307            342          32          0.3        3068.4       1.3X
+Coalesce Num Partitions: 5000 Num Hosts: 1           3895           3906          12          0.0       38946.8       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5           1388           1401          19          0.1       13881.7       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 10            806            839          57          0.1        8063.7       0.5X
+Coalesce Num Partitions: 5000 Num Hosts: 20            546            573          44          0.2        5462.6       0.7X
+Coalesce Num Partitions: 5000 Num Hosts: 40            413            418           5          0.2        4134.7       1.0X
+Coalesce Num Partitions: 5000 Num Hosts: 80            345            365          23          0.3        3448.1       1.1X
+Coalesce Num Partitions: 10000 Num Hosts: 1           6933           6966          55          0.0       69328.8       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 5           2455           2499          69          0.0       24551.7       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1352           1392          34          0.1       13520.2       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 20            815            853          50          0.1        8147.5       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 40            558            581          28          0.2        5578.0       0.7X
+Coalesce Num Partitions: 10000 Num Hosts: 80            416            423           5          0.2        4163.3       0.9X
 
 
diff --git a/core/benchmarks/KryoBenchmark-jdk11-results.txt b/core/benchmarks/KryoBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..27f0b8f59f47a
--- /dev/null
+++ b/core/benchmarks/KryoBenchmark-jdk11-results.txt
@@ -0,0 +1,28 @@
+================================================================================================
+Benchmark Kryo Unsafe vs safe Serialization
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+basicTypes: Int with unsafe:true                    275            288          14          3.6         275.2       1.0X
+basicTypes: Long with unsafe:true                   331            336          13          3.0         330.9       0.8X
+basicTypes: Float with unsafe:true                  304            305           1          3.3         304.4       0.9X
+basicTypes: Double with unsafe:true                 328            332           3          3.0         328.1       0.8X
+Array: Int with unsafe:true                           4              4           0        252.8           4.0      69.6X
+Array: Long with unsafe:true                          6              6           0        161.5           6.2      44.5X
+Array: Float with unsafe:true                         4              4           0        264.6           3.8      72.8X
+Array: Double with unsafe:true                        6              7           0        160.5           6.2      44.2X
+Map of string->Double  with unsafe:true              52             52           0         19.3          51.8       5.3X
+basicTypes: Int with unsafe:false                   344            345           1          2.9         344.3       0.8X
+basicTypes: Long with unsafe:false                  372            373           1          2.7         372.3       0.7X
+basicTypes: Float with unsafe:false                 333            334           1          3.0         333.4       0.8X
+basicTypes: Double with unsafe:false                344            345           0          2.9         344.3       0.8X
+Array: Int with unsafe:false                         25             25           0         40.8          24.5      11.2X
+Array: Long with unsafe:false                        37             37           1         27.3          36.7       7.5X
+Array: Float with unsafe:false                       11             11           0         92.1          10.9      25.4X
+Array: Double with unsafe:false                      17             18           0         58.3          17.2      16.0X
+Map of string->Double  with unsafe:false             51             52           1         19.4          51.5       5.3X
+
+
diff --git a/core/benchmarks/KryoBenchmark-results.txt b/core/benchmarks/KryoBenchmark-results.txt
index 91e22f3afc14f..49791e6e87e3a 100644
--- a/core/benchmarks/KryoBenchmark-results.txt
+++ b/core/benchmarks/KryoBenchmark-results.txt
@@ -2,28 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
-Benchmark Kryo Unsafe vs safe Serialization: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true               138 /  149          7.2         138.0       1.0X
-basicTypes: Long with unsafe:true              168 /  173          6.0         167.7       0.8X
-basicTypes: Float with unsafe:true             153 /  174          6.5         153.1       0.9X
-basicTypes: Double with unsafe:true            161 /  185          6.2         161.1       0.9X
-Array: Int with unsafe:true                      2 /    3        409.7           2.4      56.5X
-Array: Long with unsafe:true                     4 /    5        232.5           4.3      32.1X
-Array: Float with unsafe:true                    3 /    4        367.3           2.7      50.7X
-Array: Double with unsafe:true                   4 /    5        228.5           4.4      31.5X
-Map of string->Double  with unsafe:true         38 /   45         26.5          37.8       3.7X
-basicTypes: Int with unsafe:false              176 /  187          5.7         175.9       0.8X
-basicTypes: Long with unsafe:false             191 /  203          5.2         191.2       0.7X
-basicTypes: Float with unsafe:false            166 /  176          6.0         166.2       0.8X
-basicTypes: Double with unsafe:false           174 /  190          5.7         174.3       0.8X
-Array: Int with unsafe:false                    19 /   26         52.9          18.9       7.3X
-Array: Long with unsafe:false                   27 /   31         37.7          26.5       5.2X
-Array: Float with unsafe:false                   8 /   10        124.3           8.0      17.2X
-Array: Double with unsafe:false                 12 /   13         83.6          12.0      11.5X
-Map of string->Double  with unsafe:false        38 /   42         26.1          38.3       3.6X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+basicTypes: Int with unsafe:true                    269            290          23          3.7         269.0       1.0X
+basicTypes: Long with unsafe:true                   294            295           1          3.4         293.8       0.9X
+basicTypes: Float with unsafe:true                  300            301           1          3.3         300.4       0.9X
+basicTypes: Double with unsafe:true                 304            305           1          3.3         304.0       0.9X
+Array: Int with unsafe:true                           5              6           1        193.5           5.2      52.0X
+Array: Long with unsafe:true                          8              9           1        131.2           7.6      35.3X
+Array: Float with unsafe:true                         6              6           0        163.5           6.1      44.0X
+Array: Double with unsafe:true                        9             10           0        108.8           9.2      29.3X
+Map of string->Double  with unsafe:true              54             54           1         18.7          53.6       5.0X
+basicTypes: Int with unsafe:false                   326            327           1          3.1         326.2       0.8X
+basicTypes: Long with unsafe:false                  353            354           1          2.8         353.3       0.8X
+basicTypes: Float with unsafe:false                 325            327           1          3.1         325.1       0.8X
+basicTypes: Double with unsafe:false                335            336           1          3.0         335.0       0.8X
+Array: Int with unsafe:false                         27             28           1         36.7          27.2       9.9X
+Array: Long with unsafe:false                        40             41           1         25.0          40.0       6.7X
+Array: Float with unsafe:false                       12             13           1         80.8          12.4      21.7X
+Array: Double with unsafe:false                      21             21           1         48.6          20.6      13.1X
+Map of string->Double  with unsafe:false             56             57           1         17.8          56.1       4.8X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt b/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..6b148bde12d36
--- /dev/null
+++ b/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+Benchmark KryoPool vs old"pool of 1" implementation
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+KryoPool:true                                      6208           8374         NaN          0.0    12416876.6       1.0X
+KryoPool:false                                     9084          11577         724          0.0    18168947.4       0.7X
+
+
diff --git a/core/benchmarks/KryoSerializerBenchmark-results.txt b/core/benchmarks/KryoSerializerBenchmark-results.txt
index c3ce336d93241..609f3298cbc00 100644
--- a/core/benchmarks/KryoSerializerBenchmark-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-results.txt
@@ -1,12 +1,12 @@
 ================================================================================================
-Benchmark KryoPool vs "pool of 1"
+Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14
-Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
-Benchmark KryoPool vs "pool of 1":       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-KryoPool:true                                 2682 / 3425          0.0     5364627.9       1.0X
-KryoPool:false                                8176 / 9292          0.0    16351252.2       0.3X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+KryoPool:true                                      6012           7586         NaN          0.0    12023020.2       1.0X
+KryoPool:false                                     9289          11566         909          0.0    18578683.1       0.6X
 
 
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..db23cf5c12ea7
--- /dev/null
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt
@@ -0,0 +1,66 @@
+OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                       170            178           9          1.2         849.7       1.0X
+Deserialization                                     530            535           9          0.4        2651.1       0.3X
+
+Compressed Serialized MapStatus sizes: 411 bytes
+Compressed Serialized Broadcast MapStatus sizes: 2 MB
+
+
+OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                       157            165           7          1.3         785.4       1.0X
+Deserialization                                     495            588          79          0.4        2476.7       0.3X
+
+Compressed Serialized MapStatus sizes: 2 MB
+Compressed Serialized Broadcast MapStatus sizes: 0 bytes
+
+
+OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                       344            351           4          0.6        1720.4       1.0X
+Deserialization                                     527            579          99          0.4        2635.9       0.7X
+
+Compressed Serialized MapStatus sizes: 427 bytes
+Compressed Serialized Broadcast MapStatus sizes: 13 MB
+
+
+OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                       317            321           4          0.6        1583.8       1.0X
+Deserialization                                     530            540          15          0.4        2648.3       0.6X
+
+Compressed Serialized MapStatus sizes: 13 MB
+Compressed Serialized Broadcast MapStatus sizes: 0 bytes
+
+
+OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                      1738           1849         156          0.1        8692.0       1.0X
+Deserialization                                     946            977          33          0.2        4730.2       1.8X
+
+Compressed Serialized MapStatus sizes: 556 bytes
+Compressed Serialized Broadcast MapStatus sizes: 121 MB
+
+
+OpenJDK 64-Bit Server VM 11.0.4+11-post-Ubuntu-1ubuntu218.04.3 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                      1379           1432          76          0.1        6892.6       1.0X
+Deserialization                                     929            941          19          0.2        4645.5       1.5X
+
+Compressed Serialized MapStatus sizes: 121 MB
+Compressed Serialized Broadcast MapStatus sizes: 0 bytes
+
+
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
new file mode 100644
index 0000000000000..053f4bf771923
--- /dev/null
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
@@ -0,0 +1,66 @@
+OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                       178            187          15          1.1         887.5       1.0X
+Deserialization                                     530            558          32          0.4        2647.5       0.3X
+
+Compressed Serialized MapStatus sizes: 411 bytes
+Compressed Serialized Broadcast MapStatus sizes: 2 MB
+
+
+OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                       167            175           7          1.2         835.7       1.0X
+Deserialization                                     523            537          22          0.4        2616.2       0.3X
+
+Compressed Serialized MapStatus sizes: 2 MB
+Compressed Serialized Broadcast MapStatus sizes: 0 bytes
+
+
+OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                       351            416         147          0.6        1754.4       1.0X
+Deserialization                                     546            551           8          0.4        2727.6       0.6X
+
+Compressed Serialized MapStatus sizes: 427 bytes
+Compressed Serialized Broadcast MapStatus sizes: 13 MB
+
+
+OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                       320            321           1          0.6        1598.0       1.0X
+Deserialization                                     542            549           7          0.4        2709.0       0.6X
+
+Compressed Serialized MapStatus sizes: 13 MB
+Compressed Serialized Broadcast MapStatus sizes: 0 bytes
+
+
+OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                      1671           1877         290          0.1        8357.3       1.0X
+Deserialization                                     943            970          32          0.2        4715.8       1.8X
+
+Compressed Serialized MapStatus sizes: 556 bytes
+Compressed Serialized Broadcast MapStatus sizes: 121 MB
+
+
+OpenJDK 64-Bit Server VM 1.8.0_222-8u222-b10-1ubuntu1~18.04.1-b10 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Serialization                                      1373           1436          89          0.1        6865.0       1.0X
+Deserialization                                     940            970          37          0.2        4699.1       1.5X
+
+Compressed Serialized MapStatus sizes: 121 MB
+Compressed Serialized Broadcast MapStatus sizes: 0 bytes
+
+
diff --git a/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt b/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..605b856d53382
--- /dev/null
+++ b/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Properties Cloning
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              0              0           0          0.1       11539.0       1.0X
+Utils.cloneProperties                                 0              0           0          1.7         572.0      20.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              0              0           0          0.0      217514.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.2        5387.0      40.4X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              1              1           0          0.0      634574.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        3082.0     205.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              3              3           0          0.0     2576565.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.1       16071.0     160.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              5              5           0          0.0     5027248.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       31842.0     157.9X
+
+
diff --git a/core/benchmarks/PropertiesCloneBenchmark-results.txt b/core/benchmarks/PropertiesCloneBenchmark-results.txt
new file mode 100644
index 0000000000000..5d332a147c698
--- /dev/null
+++ b/core/benchmarks/PropertiesCloneBenchmark-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Properties Cloning
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              0              0           0          0.1       13640.0       1.0X
+Utils.cloneProperties                                 0              0           0          1.6         608.0      22.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              0              0           0          0.0      238968.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.4        2318.0     103.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              1              1           0          0.0      725849.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        2900.0     250.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              3              3           0          0.0     2999676.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.1       11734.0     255.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              6              6           1          0.0     5846410.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       22405.0     260.9X
+
+
diff --git a/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt b/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..9aa10e4835a2f
--- /dev/null
+++ b/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt
@@ -0,0 +1,44 @@
+================================================================================================
+Pseudo random
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   1362           1362           0         73.4          13.6       1.0X
+XORShiftRandom                                      227            227           0        440.6           2.3       6.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   2725           2726           1         36.7          27.3       1.0X
+XORShiftRandom                                      694            694           1        144.1           6.9       3.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   2727           2728           0         36.7          27.3       1.0X
+XORShiftRandom                                      693            694           0        144.2           6.9       3.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   7012           7016           4         14.3          70.1       1.0X
+XORShiftRandom                                     6065           6067           1         16.5          60.7       1.2X
+
+
+================================================================================================
+hash seed
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+XORShiftRandom.hashSeed                              36             37           1        276.5           3.6       1.0X
+
+
diff --git a/core/benchmarks/XORShiftRandomBenchmark-results.txt b/core/benchmarks/XORShiftRandomBenchmark-results.txt
index 1140489e4a7f3..4b069878b2e9b 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-nextInt:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-java.util.Random                              1362 / 1362         73.4          13.6       1.0X
-XORShiftRandom                                 227 /  227        440.6           2.3       6.0X
+nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   1362           1396          59         73.4          13.6       1.0X
+XORShiftRandom                                      227            227           0        440.7           2.3       6.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-nextLong:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-java.util.Random                              2732 / 2732         36.6          27.3       1.0X
-XORShiftRandom                                 629 /  629        159.0           6.3       4.3X
+nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   2732           2732           1         36.6          27.3       1.0X
+XORShiftRandom                                      630            630           1        158.7           6.3       4.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-nextDouble:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-java.util.Random                              2730 / 2730         36.6          27.3       1.0X
-XORShiftRandom                                 629 /  629        159.0           6.3       4.3X
+nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   2731           2732           1         36.6          27.3       1.0X
+XORShiftRandom                                      630            630           0        158.8           6.3       4.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-nextGaussian:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-java.util.Random                            10288 / 10288          9.7         102.9       1.0X
-XORShiftRandom                                6351 / 6351         15.7          63.5       1.6X
+nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   8895           8899           4         11.2          88.9       1.0X
+XORShiftRandom                                     5049           5052           5         19.8          50.5       1.8X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash seed:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                       1193 / 1195          8.4         119.3       1.0X
+Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+XORShiftRandom.hashSeed                              67             68           1        148.8           6.7       1.0X
 
 
diff --git a/core/pom.xml b/core/pom.xml
index 42fc2c4b3a287..9d54d21b95ba3 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -26,12 +26,14 @@
   </parent>
 
   <artifactId>spark-core_2.12</artifactId>
-  <properties>
-    <sbt.project.name>core</sbt.project.name>
-  </properties>
   <packaging>jar</packaging>
   <name>Spark Project Core</name>
   <url>http://spark.apache.org/</url>
+
+  <properties>
+    <sbt.project.name>core</sbt.project.name>
+  </properties>
+  
   <dependencies>
     <dependency>
       <groupId>com.thoughtworks.paranamer</groupId>
@@ -163,7 +165,6 @@
       <artifactId>javax.servlet-api</artifactId>
       <version>${javaxservlet.version}</version>
     </dependency>
-
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
@@ -292,6 +293,16 @@
     <dependency>
       <groupId>io.dropwizard.metrics</groupId>
       <artifactId>metrics-graphite</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>com.rabbitmq</groupId>
+          <artifactId>amqp-client</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>io.dropwizard.metrics</groupId>
+      <artifactId>metrics-jmx</artifactId>
     </dependency>
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
@@ -384,6 +395,11 @@
       <artifactId>curator-test</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-minikdc</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>net.razorvine</groupId>
       <artifactId>pyrolite</artifactId>
@@ -501,6 +517,24 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/main/scala-${scala.binary.version}</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 
@@ -551,6 +585,15 @@
         </plugins>
       </build>
     </profile>
+    <profile>
+      <id>scala-2.13</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
   </profiles>
 
 </project>
diff --git a/core/src/main/java/org/apache/spark/ExecutorPlugin.java b/core/src/main/java/org/apache/spark/ExecutorPlugin.java
deleted file mode 100644
index f86520c81df33..0000000000000
--- a/core/src/main/java/org/apache/spark/ExecutorPlugin.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark;
-
-import org.apache.spark.annotation.DeveloperApi;
-
-/**
- * A plugin which can be automatically instantiated within each Spark executor.  Users can specify
- * plugins which should be created with the "spark.executor.plugins" configuration.  An instance
- * of each plugin will be created for every executor, including those created by dynamic allocation,
- * before the executor starts running any tasks.
- *
- * The specific api exposed to the end users still considered to be very unstable.  We will
- * hopefully be able to keep compatibility by providing default implementations for any methods
- * added, but make no guarantees this will always be possible across all Spark releases.
- *
- * Spark does nothing to verify the plugin is doing legitimate things, or to manage the resources
- * it uses.  A plugin acquires the same privileges as the user running the task.  A bad plugin
- * could also interfere with task execution and make the executor fail in unexpected ways.
- */
-@DeveloperApi
-public interface ExecutorPlugin {
-
-  /**
-   * Initialize the executor plugin.
-   *
-   * <p>Each executor will, during its initialization, invoke this method on each
-   * plugin provided in the spark.executor.plugins configuration.</p>
-   *
-   * <p>Plugins should create threads in their implementation of this method for
-   * any polling, blocking, or intensive computation.</p>
-   */
-  default void init() {}
-
-  /**
-   * Clean up and terminate this plugin.
-   *
-   * <p>This function is called during the executor shutdown phase. The executor
-   * will wait for the plugin to terminate before continuing its own shutdown.</p>
-   */
-  default void shutdown() {}
-}
diff --git a/core/src/main/java/org/apache/spark/api/plugin/DriverPlugin.java b/core/src/main/java/org/apache/spark/api/plugin/DriverPlugin.java
new file mode 100644
index 0000000000000..0c0d0df8ae682
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/api/plugin/DriverPlugin.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.plugin;
+
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.spark.SparkContext;
+import org.apache.spark.annotation.DeveloperApi;
+
+/**
+ * :: DeveloperApi ::
+ * Driver component of a {@link SparkPlugin}.
+ *
+ * @since 3.0.0
+ */
+@DeveloperApi
+public interface DriverPlugin {
+
+  /**
+   * Initialize the plugin.
+   * <p>
+   * This method is called early in the initialization of the Spark driver. Explicitly, it is
+   * called before the Spark driver's task scheduler is initialized. This means that a lot
+   * of other Spark subsystems may yet not have been initialized. This call also blocks driver
+   * initialization.
+   * <p>
+   * It's recommended that plugins be careful about what operations are performed in this call,
+   * preferrably performing expensive operations in a separate thread, or postponing them until
+   * the application has fully started.
+   *
+   * @param sc The SparkContext loading the plugin.
+   * @param pluginContext Additional plugin-specific about the Spark application where the plugin
+   *                      is running.
+   * @return A map that will be provided to the {@link ExecutorPlugin#init(PluginContext,Map)}
+   *         method.
+   */
+  default Map<String, String> init(SparkContext sc, PluginContext pluginContext) {
+    return Collections.emptyMap();
+  }
+
+  /**
+   * Register metrics published by the plugin with Spark's metrics system.
+   * <p>
+   * This method is called later in the initialization of the Spark application, after most
+   * subsystems are up and the application ID is known. If there are metrics registered in
+   * the registry ({@link PluginContext#metricRegistry()}), then a metrics source with the
+   * plugin name will be created.
+   * <p>
+   * Note that even though the metric registry is still accessible after this method is called,
+   * registering new metrics after this method is called may result in the metrics not being
+   * available.
+   *
+   * @param appId The application ID from the cluster manager.
+   * @param pluginContext Additional plugin-specific about the Spark application where the plugin
+   *                      is running.
+   */
+  default void registerMetrics(String appId, PluginContext pluginContext) {}
+
+  /**
+   * RPC message handler.
+   * <p>
+   * Plugins can use Spark's RPC system to send messages from executors to the driver (but not
+   * the other way around, currently). Messages sent by the executor component of the plugin will
+   * be delivered to this method, and the returned value will be sent back to the executor as
+   * the reply, if the executor has requested one.
+   * <p>
+   * Any exception thrown will be sent back to the executor as an error, in case it is expecting
+   * a reply. In case a reply is not expected, a log message will be written to the driver log.
+   * <p>
+   * The implementation of this handler should be thread-safe.
+   * <p>
+   * Note all plugins share RPC dispatch threads, and this method is called synchronously. So
+   * performing expensive operations in this handler may affect the operation of other active
+   * plugins. Internal Spark endpoints are not directly affected, though, since they use different
+   * threads.
+   * <p>
+   * Spark guarantees that the driver component will be ready to receive messages through this
+   * handler when executors are started.
+   *
+   * @param message The incoming message.
+   * @return Value to be returned to the caller. Ignored if the caller does not expect a reply.
+   */
+  default Object receive(Object message) throws Exception {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * Informs the plugin that the Spark application is shutting down.
+   * <p>
+   * This method is called during the driver shutdown phase. It is recommended that plugins
+   * not use any Spark functions (e.g. send RPC messages) during this call.
+   */
+  default void shutdown() {}
+
+}
diff --git a/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java b/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java
new file mode 100644
index 0000000000000..4961308035163
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.plugin;
+
+import java.util.Map;
+
+import org.apache.spark.annotation.DeveloperApi;
+
+/**
+ * :: DeveloperApi ::
+ * Executor component of a {@link SparkPlugin}.
+ *
+ * @since 3.0.0
+ */
+@DeveloperApi
+public interface ExecutorPlugin {
+
+  /**
+   * Initialize the executor plugin.
+   * <p>
+   * When a Spark plugin provides an executor plugin, this method will be called during the
+   * initialization of the executor process. It will block executor initialization until it
+   * returns.
+   * <p>
+   * Executor plugins that publish metrics should register all metrics with the context's
+   * registry ({@link PluginContext#metricRegistry()}) when this method is called. Metrics
+   * registered afterwards are not guaranteed to show up.
+   *
+   * @param ctx Context information for the executor where the plugin is running.
+   * @param extraConf Extra configuration provided by the driver component during its
+   *                  initialization.
+   */
+  default void init(PluginContext ctx, Map<String, String> extraConf) {}
+
+  /**
+   * Clean up and terminate this plugin.
+   * <p>
+   * This method is called during the executor shutdown phase, and blocks executor shutdown.
+   */
+  default void shutdown() {}
+
+}
diff --git a/core/src/main/java/org/apache/spark/api/plugin/PluginContext.java b/core/src/main/java/org/apache/spark/api/plugin/PluginContext.java
new file mode 100644
index 0000000000000..36d827598dfc5
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/api/plugin/PluginContext.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.plugin;
+
+import java.io.IOException;
+import java.util.Map;
+
+import com.codahale.metrics.MetricRegistry;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.resource.ResourceInformation;
+
+/**
+ * :: DeveloperApi ::
+ * Context information and operations for plugins loaded by Spark.
+ * <p>
+ * An instance of this class is provided to plugins in their initialization method. It is safe
+ * for plugins to keep a reference to the instance for later use (for example, to send messages
+ * to the plugin's driver component).
+ * <p>
+ * Context instances are plugin-specific, so metrics and messages are tied each plugin. It is
+ * not possible for a plugin to directly interact with other plugins.
+ *
+ * @since 3.0.0
+ */
+@DeveloperApi
+public interface PluginContext {
+
+  /**
+   * Registry where to register metrics published by the plugin associated with this context.
+   */
+  MetricRegistry metricRegistry();
+
+  /** Configuration of the Spark application. */
+  SparkConf conf();
+
+  /** Executor ID of the process. On the driver, this will identify the driver. */
+  String executorID();
+
+  /** The host name which is being used by the Spark process for communication. */
+  String hostname();
+
+  /** The custom resources (GPUs, FPGAs, etc) allocated to driver or executor. */
+  Map<String, ResourceInformation> resources();
+
+  /**
+   * Send a message to the plugin's driver-side component.
+   * <p>
+   * This method sends a message to the driver-side component of the plugin, without expecting
+   * a reply. It returns as soon as the message is enqueued for sending.
+   * <p>
+   * The message must be serializable.
+   *
+   * @param message Message to be sent.
+   */
+  void send(Object message) throws IOException;
+
+  /**
+   * Send an RPC to the plugin's driver-side component.
+   * <p>
+   * This method sends a message to the driver-side component of the plugin, and blocks until a
+   * reply arrives, or the configured RPC ask timeout (<code>spark.rpc.askTimeout</code>) elapses.
+   * <p>
+   * If the driver replies with an error, an exception with the corresponding error will be thrown.
+   * <p>
+   * The message must be serializable.
+   *
+   * @param message Message to be sent.
+   * @return The reply from the driver-side component.
+   */
+  Object ask(Object message) throws Exception;
+
+}
diff --git a/core/src/main/java/org/apache/spark/api/plugin/SparkPlugin.java b/core/src/main/java/org/apache/spark/api/plugin/SparkPlugin.java
new file mode 100644
index 0000000000000..21ddae37d8a0d
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/api/plugin/SparkPlugin.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.plugin;
+
+import org.apache.spark.annotation.DeveloperApi;
+
+/**
+ * :: DeveloperApi ::
+ * A plugin that can be dynamically loaded into a Spark application.
+ * <p>
+ * Plugins can be loaded by adding the plugin's class name to the appropriate Spark configuration.
+ * Check the Spark monitoring guide for details.
+ * <p>
+ * Plugins have two optional components: a driver-side component, of which a single instance is
+ * created per application, inside the Spark driver. And an executor-side component, of which one
+ * instance is created in each executor that is started by Spark. Details of each component can be
+ * found in the documentation for {@link DriverPlugin} and {@link ExecutorPlugin}.
+ *
+ * @since 3.0.0
+ */
+@DeveloperApi
+public interface SparkPlugin {
+
+  /**
+   * Return the plugin's driver-side component.
+   *
+   * @return The driver-side component, or null if one is not needed.
+   */
+  DriverPlugin driverPlugin();
+
+  /**
+   * Return the plugin's executor-side component.
+   *
+   * @return The executor-side component, or null if one is not needed.
+   */
+  ExecutorPlugin executorPlugin();
+
+}
diff --git a/core/src/main/java/org/apache/spark/api/resource/ResourceDiscoveryPlugin.java b/core/src/main/java/org/apache/spark/api/resource/ResourceDiscoveryPlugin.java
new file mode 100644
index 0000000000000..ffd2f83552a63
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/api/resource/ResourceDiscoveryPlugin.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.resource;
+
+import java.util.Optional;
+
+import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.SparkConf;
+import org.apache.spark.resource.ResourceInformation;
+import org.apache.spark.resource.ResourceRequest;
+
+/**
+ * :: DeveloperApi ::
+ * A plugin that can be dynamically loaded into a Spark application to control how custom
+ * resources are discovered. Plugins can be chained to allow different plugins to handle
+ * different resource types.
+ * <p>
+ * Plugins must implement the function discoveryResource.
+ *
+ * @since 3.0.0
+ */
+@DeveloperApi
+public interface ResourceDiscoveryPlugin {
+  /**
+   * Discover the addresses of the requested resource.
+   * <p>
+   * This method is called early in the initialization of the Spark Executor/Driver/Worker.
+   * This function is responsible for discovering the addresses of the resource which Spark will
+   * then use for scheduling and eventually providing to the user.
+   * Depending on the deployment mode and and configuration of custom resources, this could be
+   * called by the Spark Driver, the Spark Executors, in standalone mode the Workers, or all of
+   * them. The ResourceRequest has a ResourceID component that can be used to distinguish which
+   * component it is called from and what resource its being called for.
+   * This will get called once for each resource type requested and its the responsibility of
+   * this function to return enough addresses of that resource based on the request. If
+   * the addresses do not meet the requested amount, Spark will fail.
+   * If this plugin doesn't handle a particular resource, it should return an empty Optional
+   * and Spark will try other plugins and then last fall back to the default discovery script
+   * plugin.
+   *
+   * @param request The ResourceRequest that to be discovered.
+   * @param sparkConf SparkConf
+   * @return An {@link Optional} containing a {@link ResourceInformation} object containing
+   * the resource name and the addresses of the resource. If it returns {@link Optional#EMPTY}
+   * other plugins will be called.
+   */
+  Optional<ResourceInformation> discoverResource(ResourceRequest request, SparkConf sparkConf);
+}
diff --git a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
index 92bf0ecc1b5cb..7ca5ade7b9a74 100644
--- a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
@@ -51,7 +51,6 @@ public NioBufferedFileInputStream(File file) throws IOException {
   /**
    * Checks weather data is left to be read from the input stream.
    * @return true if data is left, false otherwise
-   * @throws IOException
    */
   private boolean refill() throws IOException {
     if (!byteBuffer.hasRemaining()) {
@@ -60,10 +59,10 @@ private boolean refill() throws IOException {
       while (nRead == 0) {
         nRead = fileChannel.read(byteBuffer);
       }
+      byteBuffer.flip();
       if (nRead < 0) {
         return false;
       }
-      byteBuffer.flip();
     }
     return true;
   }
diff --git a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
index 4bfd2d358f36f..9a9d0c7946549 100644
--- a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
+++ b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
@@ -54,7 +54,7 @@ public MemoryMode getMode() {
   /**
    * Returns the size of used memory in bytes.
    */
-  protected long getUsed() {
+  public long getUsed() {
     return used;
   }
 
@@ -78,7 +78,6 @@ public void spill() throws IOException {
    * @param size the amount of memory should be released
    * @param trigger the MemoryConsumer that trigger this spilling
    * @return the amount of released memory in bytes
-   * @throws IOException
    */
   public abstract long spill(long size, MemoryConsumer trigger) throws IOException;
 
diff --git a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleDataIO.java b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleDataIO.java
index e9e50ecc11e52..e4554bda8acab 100644
--- a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleDataIO.java
+++ b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleDataIO.java
@@ -46,4 +46,10 @@ public interface ShuffleDataIO {
    * are only invoked on the executors.
    */
   ShuffleExecutorComponents executor();
+
+  /**
+   * Called once on driver process to bootstrap the shuffle metadata modules that
+   * are maintained by the driver.
+   */
+  ShuffleDriverComponents driver();
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleDriverComponents.java b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleDriverComponents.java
new file mode 100644
index 0000000000000..b4cec17b85b32
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleDriverComponents.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.api;
+
+import java.util.Map;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * :: Private ::
+ * An interface for building shuffle support modules for the Driver.
+ */
+@Private
+public interface ShuffleDriverComponents {
+
+  /**
+   * Called once in the driver to bootstrap this module that is specific to this application.
+   * This method is called before submitting executor requests to the cluster manager.
+   *
+   * This method should prepare the module with its shuffle components i.e. registering against
+   * an external file servers or shuffle services, or creating tables in a shuffle
+   * storage data database.
+   *
+   * @return additional SparkConf settings necessary for initializing the executor components.
+   * This would include configurations that cannot be statically set on the application, like
+   * the host:port of external services for shuffle storage.
+   */
+  Map<String, String> initializeApplication();
+
+  /**
+   * Called once at the end of the Spark application to clean up any existing shuffle state.
+   */
+  void cleanupApplication();
+
+  /**
+   * Called once per shuffle id when the shuffle id is first generated for a shuffle stage.
+   *
+   * @param shuffleId The unique identifier for the shuffle stage.
+   */
+  default void registerShuffle(int shuffleId) {}
+
+  /**
+   * Removes shuffle data associated with the given shuffle.
+   *
+   * @param shuffleId The unique identifier for the shuffle stage.
+   * @param blocking Whether this call should block on the deletion of the data.
+   */
+  default void removeShuffle(int shuffleId, boolean blocking) {}
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java
index 70c112b78911d..30ca177545789 100644
--- a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java
+++ b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java
@@ -18,6 +18,8 @@
 package org.apache.spark.shuffle.api;
 
 import java.io.IOException;
+import java.util.Map;
+import java.util.Optional;
 
 import org.apache.spark.annotation.Private;
 
@@ -33,23 +35,42 @@ public interface ShuffleExecutorComponents {
   /**
    * Called once per executor to bootstrap this module with state that is specific to
    * that executor, specifically the application ID and executor ID.
+   *
+   * @param appId The Spark application id
+   * @param execId The unique identifier of the executor being initialized
+   * @param extraConfigs Extra configs that were returned by
+   *                     {@link ShuffleDriverComponents#initializeApplication()}
    */
-  void initializeExecutor(String appId, String execId);
+  void initializeExecutor(String appId, String execId, Map<String, String> extraConfigs);
 
   /**
    * Called once per map task to create a writer that will be responsible for persisting all the
    * partitioned bytes written by that map task.
-   *  @param shuffleId Unique identifier for the shuffle the map task is a part of
-   * @param mapId Within the shuffle, the identifier of the map task
-   * @param mapTaskAttemptId Identifier of the task attempt. Multiple attempts of the same map task
- *                         with the same (shuffleId, mapId) pair can be distinguished by the
- *                         different values of mapTaskAttemptId.
+   *
+   * @param shuffleId Unique identifier for the shuffle the map task is a part of
+   * @param mapTaskId An ID of the map task. The ID is unique within this Spark application.
    * @param numPartitions The number of partitions that will be written by the map task. Some of
-*                      these partitions may be empty.
+   *                      these partitions may be empty.
    */
   ShuffleMapOutputWriter createMapOutputWriter(
       int shuffleId,
-      int mapId,
-      long mapTaskAttemptId,
+      long mapTaskId,
       int numPartitions) throws IOException;
+
+  /**
+   * An optional extension for creating a map output writer that can optimize the transfer of a
+   * single partition file, as the entire result of a map task, to the backing store.
+   * <p>
+   * Most implementations should return the default {@link Optional#empty()} to indicate that
+   * they do not support this optimization. This primarily is for backwards-compatibility in
+   * preserving an optimization in the local disk shuffle storage implementation.
+   *
+   * @param shuffleId Unique identifier for the shuffle the map task is a part of
+   * @param mapId An ID of the map task. The ID is unique within this Spark application.
+   */
+  default Optional<SingleSpillShuffleMapOutputWriter> createSingleFileMapOutputWriter(
+      int shuffleId,
+      long mapId) throws IOException {
+    return Optional.empty();
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java
index 7fac00b7fbc3f..21abe9a57cd25 100644
--- a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java
@@ -39,7 +39,7 @@ public interface ShuffleMapOutputWriter {
    * for the same partition within any given map task. The partition identifier will be in the
    * range of precisely 0 (inclusive) to numPartitions (exclusive), where numPartitions was
    * provided upon the creation of this map output writer via
-   * {@link ShuffleExecutorComponents#createMapOutputWriter(int, int, long, int)}.
+   * {@link ShuffleExecutorComponents#createMapOutputWriter(int, long, int)}.
    * <p>
    * Calls to this method will be invoked with monotonically increasing reducePartitionIds; each
    * call to this method will be called with a reducePartitionId that is strictly greater than
diff --git a/core/src/main/java/org/apache/spark/shuffle/api/SingleSpillShuffleMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/api/SingleSpillShuffleMapOutputWriter.java
new file mode 100644
index 0000000000000..cad8dcfda52bc
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/api/SingleSpillShuffleMapOutputWriter.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.api;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * Optional extension for partition writing that is optimized for transferring a single
+ * file to the backing store.
+ */
+@Private
+public interface SingleSpillShuffleMapOutputWriter {
+
+  /**
+   * Transfer a file that contains the bytes of all the partitions written by this map task.
+   */
+  void transferMapSpillFile(File mapOutputFile, long[] partitionLengths) throws IOException;
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index f75e932860f90..dc157eaa3b253 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -85,8 +85,7 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   private final Partitioner partitioner;
   private final ShuffleWriteMetricsReporter writeMetrics;
   private final int shuffleId;
-  private final int mapId;
-  private final long mapTaskAttemptId;
+  private final long mapId;
   private final Serializer serializer;
   private final ShuffleExecutorComponents shuffleExecutorComponents;
 
@@ -106,8 +105,7 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   BypassMergeSortShuffleWriter(
       BlockManager blockManager,
       BypassMergeSortShuffleHandle<K, V> handle,
-      int mapId,
-      long mapTaskAttemptId,
+      long mapId,
       SparkConf conf,
       ShuffleWriteMetricsReporter writeMetrics,
       ShuffleExecutorComponents shuffleExecutorComponents) {
@@ -117,7 +115,6 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
     this.blockManager = blockManager;
     final ShuffleDependency<K, V, V> dep = handle.dependency();
     this.mapId = mapId;
-    this.mapTaskAttemptId = mapTaskAttemptId;
     this.shuffleId = dep.shuffleId();
     this.partitioner = dep.partitioner();
     this.numPartitions = partitioner.numPartitions();
@@ -130,11 +127,12 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   public void write(Iterator<Product2<K, V>> records) throws IOException {
     assert (partitionWriters == null);
     ShuffleMapOutputWriter mapOutputWriter = shuffleExecutorComponents
-        .createMapOutputWriter(shuffleId, mapId, mapTaskAttemptId, numPartitions);
+        .createMapOutputWriter(shuffleId, mapId, numPartitions);
     try {
       if (!records.hasNext()) {
         partitionLengths = mapOutputWriter.commitAllPartitions();
-        mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
+        mapStatus = MapStatus$.MODULE$.apply(
+          blockManager.shuffleServerId(), partitionLengths, mapId);
         return;
       }
       final SerializerInstance serInstance = serializer.newInstance();
@@ -167,7 +165,8 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
       }
 
       partitionLengths = writePartitionedData(mapOutputWriter);
-      mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
+      mapStatus = MapStatus$.MODULE$.apply(
+        blockManager.shuffleServerId(), partitionLengths, mapId);
     } catch (Exception e) {
       try {
         mapOutputWriter.abort(e);
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index 024756087bf7f..833744f4777ce 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -423,7 +423,6 @@ public void insertRecord(Object recordBase, long recordOffset, int length, int p
    *
    * @return metadata for the spill files written by this sorter. If no records were ever inserted
    *         into this sorter, then this will return an empty array.
-   * @throws IOException
    */
   public SpillInfo[] closeAndGetSpills() throws IOException {
     if (inMemSorter != null) {
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 9d05f03613ce9..d09282e61a9c7 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -17,9 +17,12 @@
 
 package org.apache.spark.shuffle.sort;
 
+import java.nio.channels.Channels;
+import java.util.Optional;
 import javax.annotation.Nullable;
 import java.io.*;
 import java.nio.channels.FileChannel;
+import java.nio.channels.WritableByteChannel;
 import java.util.Iterator;
 
 import scala.Option;
@@ -31,7 +34,6 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.Closeables;
-import com.google.common.io.Files;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -41,8 +43,6 @@
 import org.apache.spark.io.CompressionCodec;
 import org.apache.spark.io.CompressionCodec$;
 import org.apache.spark.io.NioBufferedFileInputStream;
-import org.apache.commons.io.output.CloseShieldOutputStream;
-import org.apache.commons.io.output.CountingOutputStream;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.network.util.LimitedInputStream;
 import org.apache.spark.scheduler.MapStatus;
@@ -50,8 +50,12 @@
 import org.apache.spark.shuffle.ShuffleWriteMetricsReporter;
 import org.apache.spark.serializer.SerializationStream;
 import org.apache.spark.serializer.SerializerInstance;
-import org.apache.spark.shuffle.IndexShuffleBlockResolver;
 import org.apache.spark.shuffle.ShuffleWriter;
+import org.apache.spark.shuffle.api.ShuffleExecutorComponents;
+import org.apache.spark.shuffle.api.ShuffleMapOutputWriter;
+import org.apache.spark.shuffle.api.ShufflePartitionWriter;
+import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter;
+import org.apache.spark.shuffle.api.WritableByteChannelWrapper;
 import org.apache.spark.storage.BlockManager;
 import org.apache.spark.storage.TimeTrackingOutputStream;
 import org.apache.spark.unsafe.Platform;
@@ -65,23 +69,21 @@ public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   private static final ClassTag<Object> OBJECT_CLASS_TAG = ClassTag$.MODULE$.Object();
 
   @VisibleForTesting
-  static final int DEFAULT_INITIAL_SORT_BUFFER_SIZE = 4096;
   static final int DEFAULT_INITIAL_SER_BUFFER_SIZE = 1024 * 1024;
 
   private final BlockManager blockManager;
-  private final IndexShuffleBlockResolver shuffleBlockResolver;
   private final TaskMemoryManager memoryManager;
   private final SerializerInstance serializer;
   private final Partitioner partitioner;
   private final ShuffleWriteMetricsReporter writeMetrics;
+  private final ShuffleExecutorComponents shuffleExecutorComponents;
   private final int shuffleId;
-  private final int mapId;
+  private final long mapId;
   private final TaskContext taskContext;
   private final SparkConf sparkConf;
   private final boolean transferToEnabled;
   private final int initialSortBufferSize;
   private final int inputBufferSizeInBytes;
-  private final int outputBufferSizeInBytes;
 
   @Nullable private MapStatus mapStatus;
   @Nullable private ShuffleExternalSorter sorter;
@@ -103,27 +105,15 @@ private static final class MyByteArrayOutputStream extends ByteArrayOutputStream
    */
   private boolean stopping = false;
 
-  private class CloseAndFlushShieldOutputStream extends CloseShieldOutputStream {
-
-    CloseAndFlushShieldOutputStream(OutputStream outputStream) {
-      super(outputStream);
-    }
-
-    @Override
-    public void flush() {
-      // do nothing
-    }
-  }
-
   public UnsafeShuffleWriter(
       BlockManager blockManager,
-      IndexShuffleBlockResolver shuffleBlockResolver,
       TaskMemoryManager memoryManager,
       SerializedShuffleHandle<K, V> handle,
-      int mapId,
+      long mapId,
       TaskContext taskContext,
       SparkConf sparkConf,
-      ShuffleWriteMetricsReporter writeMetrics) throws IOException {
+      ShuffleWriteMetricsReporter writeMetrics,
+      ShuffleExecutorComponents shuffleExecutorComponents) {
     final int numPartitions = handle.dependency().partitioner().numPartitions();
     if (numPartitions > SortShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE()) {
       throw new IllegalArgumentException(
@@ -132,7 +122,6 @@ public UnsafeShuffleWriter(
         " reduce partitions");
     }
     this.blockManager = blockManager;
-    this.shuffleBlockResolver = shuffleBlockResolver;
     this.memoryManager = memoryManager;
     this.mapId = mapId;
     final ShuffleDependency<K, V, V> dep = handle.dependency();
@@ -140,6 +129,7 @@ public UnsafeShuffleWriter(
     this.serializer = dep.serializer().newInstance();
     this.partitioner = dep.partitioner();
     this.writeMetrics = writeMetrics;
+    this.shuffleExecutorComponents = shuffleExecutorComponents;
     this.taskContext = taskContext;
     this.sparkConf = sparkConf;
     this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
@@ -147,8 +137,6 @@ public UnsafeShuffleWriter(
       (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE());
     this.inputBufferSizeInBytes =
       (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
-    this.outputBufferSizeInBytes =
-      (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE()) * 1024;
     open();
   }
 
@@ -231,25 +219,17 @@ void closeAndWriteOutput() throws IOException {
     final SpillInfo[] spills = sorter.closeAndGetSpills();
     sorter = null;
     final long[] partitionLengths;
-    final File output = shuffleBlockResolver.getDataFile(shuffleId, mapId);
-    final File tmp = Utils.tempFileWith(output);
     try {
-      try {
-        partitionLengths = mergeSpills(spills, tmp);
-      } finally {
-        for (SpillInfo spill : spills) {
-          if (spill.file.exists() && ! spill.file.delete()) {
-            logger.error("Error while deleting spill file {}", spill.file.getPath());
-          }
-        }
-      }
-      shuffleBlockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, tmp);
+      partitionLengths = mergeSpills(spills);
     } finally {
-      if (tmp.exists() && !tmp.delete()) {
-        logger.error("Error while deleting temp file {}", tmp.getAbsolutePath());
+      for (SpillInfo spill : spills) {
+        if (spill.file.exists() && !spill.file.delete()) {
+          logger.error("Error while deleting spill file {}", spill.file.getPath());
+        }
       }
     }
-    mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
+    mapStatus = MapStatus$.MODULE$.apply(
+      blockManager.shuffleServerId(), partitionLengths, mapId);
   }
 
   @VisibleForTesting
@@ -281,137 +261,153 @@ void forceSorterToSpill() throws IOException {
    *
    * @return the partition lengths in the merged file.
    */
-  private long[] mergeSpills(SpillInfo[] spills, File outputFile) throws IOException {
+  private long[] mergeSpills(SpillInfo[] spills) throws IOException {
+    long[] partitionLengths;
+    if (spills.length == 0) {
+      final ShuffleMapOutputWriter mapWriter = shuffleExecutorComponents
+          .createMapOutputWriter(shuffleId, mapId, partitioner.numPartitions());
+      return mapWriter.commitAllPartitions();
+    } else if (spills.length == 1) {
+      Optional<SingleSpillShuffleMapOutputWriter> maybeSingleFileWriter =
+          shuffleExecutorComponents.createSingleFileMapOutputWriter(shuffleId, mapId);
+      if (maybeSingleFileWriter.isPresent()) {
+        // Here, we don't need to perform any metrics updates because the bytes written to this
+        // output file would have already been counted as shuffle bytes written.
+        partitionLengths = spills[0].partitionLengths;
+        maybeSingleFileWriter.get().transferMapSpillFile(spills[0].file, partitionLengths);
+      } else {
+        partitionLengths = mergeSpillsUsingStandardWriter(spills);
+      }
+    } else {
+      partitionLengths = mergeSpillsUsingStandardWriter(spills);
+    }
+    return partitionLengths;
+  }
+
+  private long[] mergeSpillsUsingStandardWriter(SpillInfo[] spills) throws IOException {
+    long[] partitionLengths;
     final boolean compressionEnabled = (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_COMPRESS());
     final CompressionCodec compressionCodec = CompressionCodec$.MODULE$.createCodec(sparkConf);
     final boolean fastMergeEnabled =
-      (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_UNDAFE_FAST_MERGE_ENABLE());
+        (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_UNSAFE_FAST_MERGE_ENABLE());
     final boolean fastMergeIsSupported = !compressionEnabled ||
-      CompressionCodec$.MODULE$.supportsConcatenationOfSerializedStreams(compressionCodec);
+        CompressionCodec$.MODULE$.supportsConcatenationOfSerializedStreams(compressionCodec);
     final boolean encryptionEnabled = blockManager.serializerManager().encryptionEnabled();
+    final ShuffleMapOutputWriter mapWriter = shuffleExecutorComponents
+        .createMapOutputWriter(shuffleId, mapId, partitioner.numPartitions());
     try {
-      if (spills.length == 0) {
-        new FileOutputStream(outputFile).close(); // Create an empty file
-        return new long[partitioner.numPartitions()];
-      } else if (spills.length == 1) {
-        // Here, we don't need to perform any metrics updates because the bytes written to this
-        // output file would have already been counted as shuffle bytes written.
-        Files.move(spills[0].file, outputFile);
-        return spills[0].partitionLengths;
-      } else {
-        final long[] partitionLengths;
-        // There are multiple spills to merge, so none of these spill files' lengths were counted
-        // towards our shuffle write count or shuffle write time. If we use the slow merge path,
-        // then the final output file's size won't necessarily be equal to the sum of the spill
-        // files' sizes. To guard against this case, we look at the output file's actual size when
-        // computing shuffle bytes written.
-        //
-        // We allow the individual merge methods to report their own IO times since different merge
-        // strategies use different IO techniques.  We count IO during merge towards the shuffle
-        // shuffle write time, which appears to be consistent with the "not bypassing merge-sort"
-        // branch in ExternalSorter.
-        if (fastMergeEnabled && fastMergeIsSupported) {
-          // Compression is disabled or we are using an IO compression codec that supports
-          // decompression of concatenated compressed streams, so we can perform a fast spill merge
-          // that doesn't need to interpret the spilled bytes.
-          if (transferToEnabled && !encryptionEnabled) {
-            logger.debug("Using transferTo-based fast merge");
-            partitionLengths = mergeSpillsWithTransferTo(spills, outputFile);
-          } else {
-            logger.debug("Using fileStream-based fast merge");
-            partitionLengths = mergeSpillsWithFileStream(spills, outputFile, null);
-          }
+      // There are multiple spills to merge, so none of these spill files' lengths were counted
+      // towards our shuffle write count or shuffle write time. If we use the slow merge path,
+      // then the final output file's size won't necessarily be equal to the sum of the spill
+      // files' sizes. To guard against this case, we look at the output file's actual size when
+      // computing shuffle bytes written.
+      //
+      // We allow the individual merge methods to report their own IO times since different merge
+      // strategies use different IO techniques.  We count IO during merge towards the shuffle
+      // write time, which appears to be consistent with the "not bypassing merge-sort" branch in
+      // ExternalSorter.
+      if (fastMergeEnabled && fastMergeIsSupported) {
+        // Compression is disabled or we are using an IO compression codec that supports
+        // decompression of concatenated compressed streams, so we can perform a fast spill merge
+        // that doesn't need to interpret the spilled bytes.
+        if (transferToEnabled && !encryptionEnabled) {
+          logger.debug("Using transferTo-based fast merge");
+          mergeSpillsWithTransferTo(spills, mapWriter);
         } else {
-          logger.debug("Using slow merge");
-          partitionLengths = mergeSpillsWithFileStream(spills, outputFile, compressionCodec);
+          logger.debug("Using fileStream-based fast merge");
+          mergeSpillsWithFileStream(spills, mapWriter, null);
         }
-        // When closing an UnsafeShuffleExternalSorter that has already spilled once but also has
-        // in-memory records, we write out the in-memory records to a file but do not count that
-        // final write as bytes spilled (instead, it's accounted as shuffle write). The merge needs
-        // to be counted as shuffle write, but this will lead to double-counting of the final
-        // SpillInfo's bytes.
-        writeMetrics.decBytesWritten(spills[spills.length - 1].file.length());
-        writeMetrics.incBytesWritten(outputFile.length());
-        return partitionLengths;
+      } else {
+        logger.debug("Using slow merge");
+        mergeSpillsWithFileStream(spills, mapWriter, compressionCodec);
       }
-    } catch (IOException e) {
-      if (outputFile.exists() && !outputFile.delete()) {
-        logger.error("Unable to delete output file {}", outputFile.getPath());
+      // When closing an UnsafeShuffleExternalSorter that has already spilled once but also has
+      // in-memory records, we write out the in-memory records to a file but do not count that
+      // final write as bytes spilled (instead, it's accounted as shuffle write). The merge needs
+      // to be counted as shuffle write, but this will lead to double-counting of the final
+      // SpillInfo's bytes.
+      writeMetrics.decBytesWritten(spills[spills.length - 1].file.length());
+      partitionLengths = mapWriter.commitAllPartitions();
+    } catch (Exception e) {
+      try {
+        mapWriter.abort(e);
+      } catch (Exception e2) {
+        logger.warn("Failed to abort writing the map output.", e2);
+        e.addSuppressed(e2);
       }
       throw e;
     }
+    return partitionLengths;
   }
 
   /**
    * Merges spill files using Java FileStreams. This code path is typically slower than
    * the NIO-based merge, {@link UnsafeShuffleWriter#mergeSpillsWithTransferTo(SpillInfo[],
-   * File)}, and it's mostly used in cases where the IO compression codec does not support
-   * concatenation of compressed data, when encryption is enabled, or when users have
-   * explicitly disabled use of {@code transferTo} in order to work around kernel bugs.
+   * ShuffleMapOutputWriter)}, and it's mostly used in cases where the IO compression codec
+   * does not support concatenation of compressed data, when encryption is enabled, or when
+   * users have explicitly disabled use of {@code transferTo} in order to work around kernel bugs.
    * This code path might also be faster in cases where individual partition size in a spill
    * is small and UnsafeShuffleWriter#mergeSpillsWithTransferTo method performs many small
    * disk ios which is inefficient. In those case, Using large buffers for input and output
    * files helps reducing the number of disk ios, making the file merging faster.
    *
    * @param spills the spills to merge.
-   * @param outputFile the file to write the merged data to.
+   * @param mapWriter the map output writer to use for output.
    * @param compressionCodec the IO compression codec, or null if shuffle compression is disabled.
    * @return the partition lengths in the merged file.
    */
-  private long[] mergeSpillsWithFileStream(
+  private void mergeSpillsWithFileStream(
       SpillInfo[] spills,
-      File outputFile,
+      ShuffleMapOutputWriter mapWriter,
       @Nullable CompressionCodec compressionCodec) throws IOException {
-    assert (spills.length >= 2);
     final int numPartitions = partitioner.numPartitions();
-    final long[] partitionLengths = new long[numPartitions];
     final InputStream[] spillInputStreams = new InputStream[spills.length];
 
-    final OutputStream bos = new BufferedOutputStream(
-            new FileOutputStream(outputFile),
-            outputBufferSizeInBytes);
-    // Use a counting output stream to avoid having to close the underlying file and ask
-    // the file system for its size after each partition is written.
-    final CountingOutputStream mergedFileOutputStream = new CountingOutputStream(bos);
-
     boolean threwException = true;
     try {
       for (int i = 0; i < spills.length; i++) {
         spillInputStreams[i] = new NioBufferedFileInputStream(
-            spills[i].file,
-            inputBufferSizeInBytes);
+          spills[i].file,
+          inputBufferSizeInBytes);
       }
       for (int partition = 0; partition < numPartitions; partition++) {
-        final long initialFileLength = mergedFileOutputStream.getByteCount();
-        // Shield the underlying output stream from close() and flush() calls, so that we can close
-        // the higher level streams to make sure all data is really flushed and internal state is
-        // cleaned.
-        OutputStream partitionOutput = new CloseAndFlushShieldOutputStream(
-          new TimeTrackingOutputStream(writeMetrics, mergedFileOutputStream));
-        partitionOutput = blockManager.serializerManager().wrapForEncryption(partitionOutput);
-        if (compressionCodec != null) {
-          partitionOutput = compressionCodec.compressedOutputStream(partitionOutput);
-        }
-        for (int i = 0; i < spills.length; i++) {
-          final long partitionLengthInSpill = spills[i].partitionLengths[partition];
-          if (partitionLengthInSpill > 0) {
-            InputStream partitionInputStream = new LimitedInputStream(spillInputStreams[i],
-              partitionLengthInSpill, false);
-            try {
-              partitionInputStream = blockManager.serializerManager().wrapForEncryption(
-                partitionInputStream);
-              if (compressionCodec != null) {
-                partitionInputStream = compressionCodec.compressedInputStream(partitionInputStream);
+        boolean copyThrewException = true;
+        ShufflePartitionWriter writer = mapWriter.getPartitionWriter(partition);
+        OutputStream partitionOutput = writer.openStream();
+        try {
+          partitionOutput = new TimeTrackingOutputStream(writeMetrics, partitionOutput);
+          partitionOutput = blockManager.serializerManager().wrapForEncryption(partitionOutput);
+          if (compressionCodec != null) {
+            partitionOutput = compressionCodec.compressedOutputStream(partitionOutput);
+          }
+          for (int i = 0; i < spills.length; i++) {
+            final long partitionLengthInSpill = spills[i].partitionLengths[partition];
+
+            if (partitionLengthInSpill > 0) {
+              InputStream partitionInputStream = null;
+              boolean copySpillThrewException = true;
+              try {
+                partitionInputStream = new LimitedInputStream(spillInputStreams[i],
+                    partitionLengthInSpill, false);
+                partitionInputStream = blockManager.serializerManager().wrapForEncryption(
+                    partitionInputStream);
+                if (compressionCodec != null) {
+                  partitionInputStream = compressionCodec.compressedInputStream(
+                      partitionInputStream);
+                }
+                ByteStreams.copy(partitionInputStream, partitionOutput);
+                copySpillThrewException = false;
+              } finally {
+                Closeables.close(partitionInputStream, copySpillThrewException);
               }
-              ByteStreams.copy(partitionInputStream, partitionOutput);
-            } finally {
-              partitionInputStream.close();
             }
           }
+          copyThrewException = false;
+        } finally {
+          Closeables.close(partitionOutput, copyThrewException);
         }
-        partitionOutput.flush();
-        partitionOutput.close();
-        partitionLengths[partition] = (mergedFileOutputStream.getByteCount() - initialFileLength);
+        long numBytesWritten = writer.getNumBytesWritten();
+        writeMetrics.incBytesWritten(numBytesWritten);
       }
       threwException = false;
     } finally {
@@ -420,9 +416,7 @@ private long[] mergeSpillsWithFileStream(
       for (InputStream stream : spillInputStreams) {
         Closeables.close(stream, threwException);
       }
-      Closeables.close(mergedFileOutputStream, threwException);
     }
-    return partitionLengths;
   }
 
   /**
@@ -430,54 +424,46 @@ private long[] mergeSpillsWithFileStream(
    * This is only safe when the IO compression codec and serializer support concatenation of
    * serialized streams.
    *
+   * @param spills the spills to merge.
+   * @param mapWriter the map output writer to use for output.
    * @return the partition lengths in the merged file.
    */
-  private long[] mergeSpillsWithTransferTo(SpillInfo[] spills, File outputFile) throws IOException {
-    assert (spills.length >= 2);
+  private void mergeSpillsWithTransferTo(
+      SpillInfo[] spills,
+      ShuffleMapOutputWriter mapWriter) throws IOException {
     final int numPartitions = partitioner.numPartitions();
-    final long[] partitionLengths = new long[numPartitions];
     final FileChannel[] spillInputChannels = new FileChannel[spills.length];
     final long[] spillInputChannelPositions = new long[spills.length];
-    FileChannel mergedFileOutputChannel = null;
 
     boolean threwException = true;
     try {
       for (int i = 0; i < spills.length; i++) {
         spillInputChannels[i] = new FileInputStream(spills[i].file).getChannel();
       }
-      // This file needs to opened in append mode in order to work around a Linux kernel bug that
-      // affects transferTo; see SPARK-3948 for more details.
-      mergedFileOutputChannel = new FileOutputStream(outputFile, true).getChannel();
-
-      long bytesWrittenToMergedFile = 0;
       for (int partition = 0; partition < numPartitions; partition++) {
-        for (int i = 0; i < spills.length; i++) {
-          final long partitionLengthInSpill = spills[i].partitionLengths[partition];
-          final FileChannel spillInputChannel = spillInputChannels[i];
-          final long writeStartTime = System.nanoTime();
-          Utils.copyFileStreamNIO(
-            spillInputChannel,
-            mergedFileOutputChannel,
-            spillInputChannelPositions[i],
-            partitionLengthInSpill);
-          spillInputChannelPositions[i] += partitionLengthInSpill;
-          writeMetrics.incWriteTime(System.nanoTime() - writeStartTime);
-          bytesWrittenToMergedFile += partitionLengthInSpill;
-          partitionLengths[partition] += partitionLengthInSpill;
+        boolean copyThrewException = true;
+        ShufflePartitionWriter writer = mapWriter.getPartitionWriter(partition);
+        WritableByteChannelWrapper resolvedChannel = writer.openChannelWrapper()
+            .orElseGet(() -> new StreamFallbackChannelWrapper(openStreamUnchecked(writer)));
+        try {
+          for (int i = 0; i < spills.length; i++) {
+            long partitionLengthInSpill = spills[i].partitionLengths[partition];
+            final FileChannel spillInputChannel = spillInputChannels[i];
+            final long writeStartTime = System.nanoTime();
+            Utils.copyFileStreamNIO(
+                spillInputChannel,
+                resolvedChannel.channel(),
+                spillInputChannelPositions[i],
+                partitionLengthInSpill);
+            copyThrewException = false;
+            spillInputChannelPositions[i] += partitionLengthInSpill;
+            writeMetrics.incWriteTime(System.nanoTime() - writeStartTime);
+          }
+        } finally {
+          Closeables.close(resolvedChannel, copyThrewException);
         }
-      }
-      // Check the position after transferTo loop to see if it is in the right position and raise an
-      // exception if it is incorrect. The position will not be increased to the expected length
-      // after calling transferTo in kernel version 2.6.32. This issue is described at
-      // https://bugs.openjdk.java.net/browse/JDK-7052359 and SPARK-3948.
-      if (mergedFileOutputChannel.position() != bytesWrittenToMergedFile) {
-        throw new IOException(
-          "Current position " + mergedFileOutputChannel.position() + " does not equal expected " +
-            "position " + bytesWrittenToMergedFile + " after transferTo. Please check your kernel" +
-            " version to see if it is 2.6.32, as there is a kernel bug which will lead to " +
-            "unexpected behavior when using transferTo. You can set spark.file.transferTo=false " +
-            "to disable this NIO feature."
-        );
+        long numBytes = writer.getNumBytesWritten();
+        writeMetrics.incBytesWritten(numBytes);
       }
       threwException = false;
     } finally {
@@ -487,9 +473,7 @@ private long[] mergeSpillsWithTransferTo(SpillInfo[] spills, File outputFile) th
         assert(spillInputChannelPositions[i] == spills[i].file.length());
         Closeables.close(spillInputChannels[i], threwException);
       }
-      Closeables.close(mergedFileOutputChannel, threwException);
     }
-    return partitionLengths;
   }
 
   @Override
@@ -518,4 +502,30 @@ public Option<MapStatus> stop(boolean success) {
       }
     }
   }
+
+  private static OutputStream openStreamUnchecked(ShufflePartitionWriter writer) {
+    try {
+      return writer.openStream();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static final class StreamFallbackChannelWrapper implements WritableByteChannelWrapper {
+    private final WritableByteChannel channel;
+
+    StreamFallbackChannelWrapper(OutputStream fallbackStream) {
+      this.channel = Channels.newChannel(fallbackStream);
+    }
+
+    @Override
+    public WritableByteChannel channel() {
+      return channel;
+    }
+
+    @Override
+    public void close() throws IOException {
+      channel.close();
+    }
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleDataIO.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleDataIO.java
index cabcb171ac23a..50eb2f1813714 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleDataIO.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleDataIO.java
@@ -18,8 +18,9 @@
 package org.apache.spark.shuffle.sort.io;
 
 import org.apache.spark.SparkConf;
-import org.apache.spark.shuffle.api.ShuffleExecutorComponents;
 import org.apache.spark.shuffle.api.ShuffleDataIO;
+import org.apache.spark.shuffle.api.ShuffleDriverComponents;
+import org.apache.spark.shuffle.api.ShuffleExecutorComponents;
 
 /**
  * Implementation of the {@link ShuffleDataIO} plugin system that replicates the local shuffle
@@ -37,4 +38,9 @@ public LocalDiskShuffleDataIO(SparkConf sparkConf) {
   public ShuffleExecutorComponents executor() {
     return new LocalDiskShuffleExecutorComponents(sparkConf);
   }
+
+  @Override
+  public ShuffleDriverComponents driver() {
+    return new LocalDiskShuffleDriverComponents();
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleDriverComponents.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleDriverComponents.java
new file mode 100644
index 0000000000000..92b4b318c552d
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleDriverComponents.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.sort.io;
+
+import java.util.Collections;
+import java.util.Map;
+
+import org.apache.spark.SparkEnv;
+import org.apache.spark.shuffle.api.ShuffleDriverComponents;
+import org.apache.spark.storage.BlockManagerMaster;
+
+public class LocalDiskShuffleDriverComponents implements ShuffleDriverComponents {
+
+  private BlockManagerMaster blockManagerMaster;
+
+  @Override
+  public Map<String, String> initializeApplication() {
+    blockManagerMaster = SparkEnv.get().blockManager().master();
+    return Collections.emptyMap();
+  }
+
+  @Override
+  public void cleanupApplication() {
+    // nothing to clean up
+  }
+
+  @Override
+  public void removeShuffle(int shuffleId, boolean blocking) {
+    if (blockManagerMaster == null) {
+      throw new IllegalStateException("Driver components must be initialized before using");
+    }
+    blockManagerMaster.removeShuffle(shuffleId, blocking);
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
index 02eb710737285..eb4d9d9abc8e3 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
@@ -17,6 +17,9 @@
 
 package org.apache.spark.shuffle.sort.io;
 
+import java.util.Map;
+import java.util.Optional;
+
 import com.google.common.annotations.VisibleForTesting;
 
 import org.apache.spark.SparkConf;
@@ -24,6 +27,7 @@
 import org.apache.spark.shuffle.api.ShuffleExecutorComponents;
 import org.apache.spark.shuffle.api.ShuffleMapOutputWriter;
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
+import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter;
 import org.apache.spark.storage.BlockManager;
 
 public class LocalDiskShuffleExecutorComponents implements ShuffleExecutorComponents {
@@ -47,7 +51,7 @@ public LocalDiskShuffleExecutorComponents(
   }
 
   @Override
-  public void initializeExecutor(String appId, String execId) {
+  public void initializeExecutor(String appId, String execId, Map<String, String> extraConfigs) {
     blockManager = SparkEnv.get().blockManager();
     if (blockManager == null) {
       throw new IllegalStateException("No blockManager available from the SparkEnv.");
@@ -58,14 +62,24 @@ public void initializeExecutor(String appId, String execId) {
   @Override
   public ShuffleMapOutputWriter createMapOutputWriter(
       int shuffleId,
-      int mapId,
-      long mapTaskAttemptId,
+      long mapTaskId,
       int numPartitions) {
     if (blockResolver == null) {
       throw new IllegalStateException(
           "Executor components must be initialized before getting writers.");
     }
     return new LocalDiskShuffleMapOutputWriter(
-        shuffleId, mapId, numPartitions, blockResolver, sparkConf);
+        shuffleId, mapTaskId, numPartitions, blockResolver, sparkConf);
+  }
+
+  @Override
+  public Optional<SingleSpillShuffleMapOutputWriter> createSingleFileMapOutputWriter(
+      int shuffleId,
+      long mapId) {
+    if (blockResolver == null) {
+      throw new IllegalStateException(
+          "Executor components must be initialized before getting writers.");
+    }
+    return Optional.of(new LocalDiskSingleSpillMapOutputWriter(shuffleId, mapId, blockResolver));
   }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java
index 7fc19b1270a46..a6529fd76188a 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java
@@ -24,8 +24,8 @@
 import java.io.OutputStream;
 import java.nio.channels.FileChannel;
 import java.nio.channels.WritableByteChannel;
-
 import java.util.Optional;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -48,12 +48,13 @@ public class LocalDiskShuffleMapOutputWriter implements ShuffleMapOutputWriter {
     LoggerFactory.getLogger(LocalDiskShuffleMapOutputWriter.class);
 
   private final int shuffleId;
-  private final int mapId;
+  private final long mapId;
   private final IndexShuffleBlockResolver blockResolver;
   private final long[] partitionLengths;
   private final int bufferSize;
   private int lastPartitionId = -1;
   private long currChannelPosition;
+  private long bytesWrittenToMergedFile = 0L;
 
   private final File outputFile;
   private File outputTempFile;
@@ -63,7 +64,7 @@ public class LocalDiskShuffleMapOutputWriter implements ShuffleMapOutputWriter {
 
   public LocalDiskShuffleMapOutputWriter(
       int shuffleId,
-      int mapId,
+      long mapId,
       int numPartitions,
       IndexShuffleBlockResolver blockResolver,
       SparkConf sparkConf) {
@@ -97,6 +98,18 @@ public ShufflePartitionWriter getPartitionWriter(int reducePartitionId) throws I
 
   @Override
   public long[] commitAllPartitions() throws IOException {
+    // Check the position after transferTo loop to see if it is in the right position and raise a
+    // exception if it is incorrect. The position will not be increased to the expected length
+    // after calling transferTo in kernel version 2.6.32. This issue is described at
+    // https://bugs.openjdk.java.net/browse/JDK-7052359 and SPARK-3948.
+    if (outputFileChannel != null && outputFileChannel.position() != bytesWrittenToMergedFile) {
+      throw new IOException(
+          "Current position " + outputFileChannel.position() + " does not equal expected " +
+              "position " + bytesWrittenToMergedFile + " after transferTo. Please check your " +
+              " kernel version to see if it is 2.6.32, as there is a kernel bug which will lead " +
+              "to unexpected behavior when using transferTo. You can set " +
+              "spark.file.transferTo=false to disable this NIO feature.");
+    }
     cleanUp();
     File resolvedTmp = outputTempFile != null && outputTempFile.isFile() ? outputTempFile : null;
     blockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, resolvedTmp);
@@ -133,11 +146,10 @@ private void initStream() throws IOException {
   }
 
   private void initChannel() throws IOException {
-    if (outputFileStream == null) {
-      outputFileStream = new FileOutputStream(outputTempFile, true);
-    }
+    // This file needs to opened in append mode in order to work around a Linux kernel bug that
+    // affects transferTo; see SPARK-3948 for more details.
     if (outputFileChannel == null) {
-      outputFileChannel = outputFileStream.getChannel();
+      outputFileChannel = new FileOutputStream(outputTempFile, true).getChannel();
     }
   }
 
@@ -227,6 +239,7 @@ public void write(byte[] buf, int pos, int length) throws IOException {
     public void close() {
       isClosed = true;
       partitionLengths[partitionId] = count;
+      bytesWrittenToMergedFile += count;
     }
 
     private void verifyNotClosed() {
@@ -257,6 +270,7 @@ public WritableByteChannel channel() {
     @Override
     public void close() throws IOException {
       partitionLengths[partitionId] = getCount();
+      bytesWrittenToMergedFile += partitionLengths[partitionId];
     }
   }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskSingleSpillMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskSingleSpillMapOutputWriter.java
new file mode 100644
index 0000000000000..c8b41992a8919
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskSingleSpillMapOutputWriter.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.sort.io;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+
+import org.apache.spark.shuffle.IndexShuffleBlockResolver;
+import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter;
+import org.apache.spark.util.Utils;
+
+public class LocalDiskSingleSpillMapOutputWriter
+    implements SingleSpillShuffleMapOutputWriter {
+
+  private final int shuffleId;
+  private final long mapId;
+  private final IndexShuffleBlockResolver blockResolver;
+
+  public LocalDiskSingleSpillMapOutputWriter(
+      int shuffleId,
+      long mapId,
+      IndexShuffleBlockResolver blockResolver) {
+    this.shuffleId = shuffleId;
+    this.mapId = mapId;
+    this.blockResolver = blockResolver;
+  }
+
+  @Override
+  public void transferMapSpillFile(
+      File mapSpillFile,
+      long[] partitionLengths) throws IOException {
+    // The map spill file already has the proper format, and it contains all of the partition data.
+    // So just transfer it directly to the destination without any merging.
+    File outputFile = blockResolver.getDataFile(shuffleId, mapId);
+    File tempFile = Utils.tempFileWith(outputFile);
+    Files.move(mapSpillFile.toPath(), tempFile.toPath());
+    blockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, tempFile);
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index d320ba3139541..7bdd89488d119 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -694,7 +694,10 @@ public boolean append(Object kbase, long koff, int klen, Object vbase, long voff
       assert (vlen % 8 == 0);
       assert (longArray != null);
 
-      if (numKeys == MAX_CAPACITY
+      // We should not increase number of keys to be MAX_CAPACITY. The usage pattern of this map is
+      // lookup + append. If we append key until the number of keys to be MAX_CAPACITY, next time
+      // the call of lookup will hang forever because it cannot find an empty slot.
+      if (numKeys == MAX_CAPACITY - 1
         // The map could be reused from last spill (because of no enough memory to grow),
         // then we don't try to grow again if hit the `growthThreshold`.
         || !canGrowArray && numKeys >= growthThreshold) {
@@ -741,7 +744,9 @@ public boolean append(Object kbase, long koff, int klen, Object vbase, long voff
         longArray.set(pos * 2 + 1, keyHashcode);
         isDefined = true;
 
-        if (numKeys >= growthThreshold && longArray.size() < MAX_CAPACITY) {
+        // We use two array entries per key, so the array size is twice the capacity.
+        // We should compare the current capacity of the array, instead of its size.
+        if (numKeys >= growthThreshold && longArray.size() / 2 < MAX_CAPACITY) {
           try {
             growAndRehash();
           } catch (SparkOutOfMemoryError oom) {
@@ -886,6 +891,7 @@ public void reset() {
     numKeys = 0;
     numValues = 0;
     freeArray(longArray);
+    longArray = null;
     while (dataPages.size() > 0) {
       MemoryBlock dataPage = dataPages.removeLast();
       freePage(dataPage);
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 1b206c11d9a8e..55e4e609c3c7b 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -447,8 +447,6 @@ public void insertKVRecord(Object keyBase, long keyOffset, int keyLen,
 
   /**
    * Merges another UnsafeExternalSorters into this one, the other one will be emptied.
-   *
-   * @throws IOException
    */
   public void merge(UnsafeExternalSorter other) throws IOException {
     other.spill();
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index 1a9453a8b3e80..e14964d68119b 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -205,6 +205,10 @@ public long getSortTimeNanos() {
   }
 
   public long getMemoryUsage() {
+    if (array == null) {
+      return 0L;
+    }
+
     return array.size() * 8;
   }
 
diff --git a/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder b/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
new file mode 100644
index 0000000000000..784e58270ab42
--- /dev/null
+++ b/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
@@ -0,0 +1 @@
+org.apache.spark.deploy.history.BasicEventFilterBuilder
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/log4j-defaults.properties b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
index eb12848900b58..71652d0e9f5e8 100644
--- a/core/src/main/resources/org/apache/spark/log4j-defaults.properties
+++ b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
@@ -33,7 +33,8 @@ log4j.logger.org.sparkproject.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
 log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
 
-# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs
+# in SparkSQL with Hive support
 log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
 log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
index 31ef04552b880..e91449013e371 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
@@ -37,8 +37,8 @@ <h4 class="title-table">Summary</h4>
         </th>
         <th>Disk Used</th>
         <th>Cores</th>
-        <th>Active Tasks</th>
-        <th>Failed Tasks</th>
+        <th><span data-toggle="tooltip" data-placement="top" title="Number of tasks currently executing. Darker shading highlights executors with more active tasks.">Active Tasks</span></th>
+        <th><span data-toggle="tooltip" data-placement="top" title="Number of tasks that have failed on this executor. Darker shading highlights executors with a high proportion of failed tasks.">Failed Tasks</span></th>
         <th>Complete Tasks</th>
         <th>Total Tasks</th>
         <th><span data-toggle="tooltip"
@@ -50,12 +50,12 @@ <h4 class="title-table">Summary</h4>
                   title="Total shuffle bytes and records read (includes both data read locally and data read from remote executors).">
           Shuffle Read</span></th>
         <th>
-          <span data-toggle="tooltip" data-placement="left"
+          <span data-toggle="tooltip" data-placement="top"
                 title="Bytes and records written to disk in order to be read by a shuffle in a future stage.">
             Shuffle Write</span>
         </th>
         <th>
-          <span data-toggle="tooltip" data-placement="left"
+          <span data-toggle="tooltip" data-placement="top"
                 title="Number of executors blacklisted by the scheduler due to task failures.">
             Blacklisted</span>
         </th>
@@ -71,13 +71,10 @@ <h4 class="title-table">Executors</h4>
       <table id="active-executors-table" class="table table-striped compact cell-border">
         <thead>
         <tr>
-          <th>
-            <span data-toggle="tooltip" data-placement="right" title="ID of the executor">Executor ID</span></th>
-          <th>
-            <span data-toggle="tooltip" data-placement="top" title="Address">Address</span></th>
-          <th><span data-toggle="tooltip" data-placement="top" title="Status">Status</span></th>
-          <th>
-            <span data-toggle="tooltip" data-placement="top" title="RDD Blocks">RDD Blocks</span></th>
+          <th>Executor ID</th>
+          <th>Address</th>
+          <th>Status</th>
+          <th>RDD Blocks</th>
           <th>
             <span data-toggle="tooltip" data-placement="top"
                   title="Memory used / total available memory for storage of data like RDD partitions cached in memory.">
@@ -90,13 +87,13 @@ <h4 class="title-table">Executors</h4>
             <span data-toggle="tooltip"
                   title="Memory used / total available memory for off heap storage of data like RDD partitions cached in memory.">
               Off Heap Storage Memory</span></th>
-          <th><span data-toggle="tooltip" data-placement="top" title="Disk Used">Disk Used</span></th>
-          <th><span data-toggle="tooltip" data-placement="top" title="Cores">Cores</span></th>
-          <th><span data-toggle="tooltip" data-placement="top" title="Resources">Resources</span></th>
-          <th><span data-toggle="tooltip" data-placement="top" title="Active Tasks">Active Tasks</span></th>
-          <th><span data-toggle="tooltip" data-placement="top" title="Failed Tasks">Failed Tasks</span></th>
-          <th><span data-toggle="tooltip" data-placement="top" title="Complete Tasks">Complete Tasks</span></th>
-          <th><span data-toggle="tooltip" data-placement="top" title="Total Tasks">Total Tasks</span></th>
+          <th>Disk Used</th>
+          <th>Cores</th>
+          <th>Resources</th>
+          <th><span data-toggle="tooltip" data-placement="top" title="Number of tasks currently executing. Darker shading highlights executors with more active tasks.">Active Tasks</span></th>
+          <th><span data-toggle="tooltip" data-placement="top" title="Number of tasks that have failed on this executor. Darker shading highlights executors with a high proportion of failed tasks.">Failed Tasks</span></th>
+          <th>Complete Tasks</th>
+          <th>Total Tasks</th>
           <th>
             <scan data-toggle="tooltip" data-placement="top"
                   title="Shaded red when garbage collection (GC) time is over 10% of task time">
@@ -110,14 +107,11 @@ <h4 class="title-table">Executors</h4>
                   title="Total shuffle bytes and records read (includes both data read locally and data read from remote executors).">
               Shuffle Read</span></th>
           <th>
-            <!-- Place the shuffle write tooltip on the left (rather than the default position
-            of on top) because the shuffle write column is the last column on the right side and
-            the tooltip is wider than the column, so it doesn't fit on top. -->
-            <span data-toggle="tooltip" data-placement="left"
+            <span data-toggle="tooltip" data-placement="top"
                   title="Bytes and records written to disk in order to be read by a shuffle in a future stage.">
               Shuffle Write</span></th>
-          <th><span data-toggle="tooltip" data-placement="left" title="Logs">Logs</span></th>
-          <th><span data-toggle="tooltip" data-placement="left" title="Thread Dump">Thread Dump</span></th>
+          <th>Logs</th>
+          <th>Thread Dump</th>
         </tr>
         </thead>
         <tbody>
diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index 11d7c77d0c667..090bc72dc9274 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -462,7 +462,8 @@ $(document).ready(function () {
                         {"visible": false, "targets": 5},
                         {"visible": false, "targets": 6},
                         {"visible": false, "targets": 9}
-                    ]
+                    ],
+                    "deferRender": true
                 };
 
                 execDataTable = $(selector).DataTable(conf);
diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index 5f291620e0e95..4df5f07f077d7 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -177,6 +177,7 @@ $(document).ready(function() {
             {name: 'eventLog'},
           ],
           "autoWidth": false,
+          "deferRender": true
         };
 
         if (hasMultipleAttempts) {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
index 9960d5c34d1fc..ecd580e5c64aa 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
@@ -97,9 +97,14 @@ sorttable = {
             sorttable.reverse(this.sorttable_tbody);
             this.className = this.className.replace('sorttable_sorted',
                                                     'sorttable_sorted_reverse');
-            this.removeChild(document.getElementById('sorttable_sortfwdind'));
+            rowlists = this.parentNode.getElementsByTagName("span");
+            for (var j=0; j < rowlists.length; j++) {
+                if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/)) {
+                    rowlists[j].parentNode.removeChild(rowlists[j]);
+                }
+            }
             sortrevind = document.createElement('span');
-            sortrevind.id = "sorttable_sortrevind";
+            sortrevind.class = "sorttable_sortrevind";
             sortrevind.innerHTML = stIsIE ? '&nbsp<font face="webdings">5</font>' : '&nbsp;&#x25BE;';
             this.appendChild(sortrevind);
             return;
@@ -110,9 +115,14 @@ sorttable = {
             sorttable.reverse(this.sorttable_tbody);
             this.className = this.className.replace('sorttable_sorted_reverse',
                                                   'sorttable_sorted');
-            this.removeChild(document.getElementById('sorttable_sortrevind'));
+            rowlists = this.parentNode.getElementsByTagName("span");
+            for (var j=0; j < rowlists.length; j++) {
+                if (rowlists[j].className.search(/\sorttable_sortrevind\b/)) {
+                    rowlists[j].parentNode.removeChild(rowlists[j]);
+                }
+            }
             sortfwdind = document.createElement('span');
-            sortfwdind.id = "sorttable_sortfwdind";
+            sortfwdind.class = "sorttable_sortfwdind";
             sortfwdind.innerHTML = stIsIE ? '&nbsp<font face="webdings">6</font>' : '&nbsp;&#x25B4;';
             this.appendChild(sortfwdind);
             return;
@@ -126,14 +136,17 @@ sorttable = {
               cell.className = cell.className.replace('sorttable_sorted','');
             }
           });
-          sortfwdind = document.getElementById('sorttable_sortfwdind');
-          if (sortfwdind) { sortfwdind.parentNode.removeChild(sortfwdind); }
-          sortrevind = document.getElementById('sorttable_sortrevind');
-          if (sortrevind) { sortrevind.parentNode.removeChild(sortrevind); }
+          rowlists = this.parentNode.getElementsByTagName("span");
+          for (var j=0; j < rowlists.length; j++) {
+              if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/)
+                  || rowlists[j].className.search(/\sorttable_sortrevind\b/) ) {
+                  rowlists[j].parentNode.removeChild(rowlists[j]);
+              }
+          }
 
           this.className += ' sorttable_sorted';
           sortfwdind = document.createElement('span');
-          sortfwdind.id = "sorttable_sortfwdind";
+          sortfwdind.class = "sorttable_sortfwdind";
           sortfwdind.innerHTML = stIsIE ? '&nbsp<font face="webdings">6</font>' : '&nbsp;&#x25B4;';
           this.appendChild(sortfwdind);
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 035d72f8956ff..25dec9d3788ba 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -513,7 +513,7 @@ function addTooltipsForRDDs(svgContainer) {
     if (tooltipText) {
       node.select("circle")
         .attr("data-toggle", "tooltip")
-        .attr("data-placement", "bottom")
+        .attr("data-placement", "top")
         .attr("data-html", "true") // to interpret line break, tooltipText is showing <circle> title
         .attr("title", tooltipText);
     }
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 3ef1a76fd7202..ee2b7b353d62e 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -286,7 +286,7 @@ $(document).ready(function () {
         " Show Additional Metrics" +
         "</a></div>" +
         "<div class='container-fluid container-fluid-div' id='toggle-metrics' hidden>" +
-        "<div><input type='checkbox' class='toggle-vis' id='box-0' data-column='0'> Select All</div>" +
+        "<div id='select_all' class='select-all-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-0' data-column='0'> Select All</div>" +
         "<div id='scheduler_delay' class='scheduler-delay-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-11' data-column='11'> Scheduler Delay</div>" +
         "<div id='task_deserialization_time' class='task-deserialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-12' data-column='12'> Task Deserialization Time</div>" +
         "<div id='shuffle_read_blocked_time' class='shuffle-read-blocked-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-13' data-column='13'> Shuffle Read Blocked Time</div>" +
@@ -297,26 +297,26 @@ $(document).ready(function () {
         "</div>");
 
     $('#scheduler_delay').attr("data-toggle", "tooltip")
-        .attr("data-placement", "right")
+        .attr("data-placement", "top")
         .attr("title", "Scheduler delay includes time to ship the task from the scheduler to the executor, and time to send " +
             "the task result from the executor to the scheduler. If scheduler delay is large, consider decreasing the size of tasks or decreasing the size of task results.");
     $('#task_deserialization_time').attr("data-toggle", "tooltip")
-        .attr("data-placement", "right")
+        .attr("data-placement", "top")
         .attr("title", "Time spent deserializing the task closure on the executor, including the time to read the broadcasted task.");
     $('#shuffle_read_blocked_time').attr("data-toggle", "tooltip")
-        .attr("data-placement", "right")
+        .attr("data-placement", "top")
         .attr("title", "Time that the task spent blocked waiting for shuffle data to be read from remote machines.");
     $('#shuffle_remote_reads').attr("data-toggle", "tooltip")
-        .attr("data-placement", "right")
+        .attr("data-placement", "top")
         .attr("title", "Total shuffle bytes read from remote executors. This is a subset of the shuffle read bytes; the remaining shuffle data is read locally. ");
     $('#result_serialization_time').attr("data-toggle", "tooltip")
-            .attr("data-placement", "right")
+            .attr("data-placement", "top")
             .attr("title", "Time spent serializing the task result on the executor before sending it back to the driver.");
     $('#getting_result_time').attr("data-toggle", "tooltip")
-            .attr("data-placement", "right")
+            .attr("data-placement", "top")
             .attr("title", "Time that the driver spends fetching task results from workers. If this is large, consider decreasing the amount of data returned from each task.");
     $('#peak_execution_memory').attr("data-toggle", "tooltip")
-            .attr("data-placement", "right")
+            .attr("data-placement", "top")
             .attr("title", "Execution memory refers to the memory used by internal data structures created during " +
                 "shuffles, aggregations and joins when Tungsten is enabled. The value of this accumulator " +
                 "should be approximately the sum of the peak sizes across all such data structures created " +
@@ -325,6 +325,25 @@ $(document).ready(function () {
     $('[data-toggle="tooltip"]').tooltip();
     var tasksSummary = $("#parent-container");
     getStandAloneAppId(function (appId) {
+        // rendering the UI page
+        $.get(createTemplateURI(appId, "stagespage"), function(template) {
+          tasksSummary.append(Mustache.render($(template).filter("#stages-summary-template").html()));
+
+          $("#additionalMetrics").click(function(){
+              $("#arrowtoggle1").toggleClass("arrow-open arrow-closed");
+              $("#toggle-metrics").toggle();
+              if (window.localStorage) {
+                  window.localStorage.setItem("arrowtoggle1class", $("#arrowtoggle1").attr('class'));
+              }
+          });
+
+          $("#aggregatedMetrics").click(function(){
+              $("#arrowtoggle2").toggleClass("arrow-open arrow-closed");
+              $("#toggle-aggregatedMetrics").toggle();
+              if (window.localStorage) {
+                  window.localStorage.setItem("arrowtoggle2class", $("#arrowtoggle2").attr('class'));
+              }
+          });
 
         var endPoint = stageEndPoint(appId);
         var stageAttemptId = getStageAttemptId();
@@ -473,27 +492,6 @@ $(document).ready(function () {
             var accumulatorTable = responseBody.accumulatorUpdates.filter(accumUpdate =>
                 !(accumUpdate.name).toString().includes("internal."));
 
-            // rendering the UI page
-            var data = {"executors": response};
-            $.get(createTemplateURI(appId, "stagespage"), function(template) {
-                tasksSummary.append(Mustache.render($(template).filter("#stages-summary-template").html(), data));
-
-                $("#additionalMetrics").click(function(){
-                    $("#arrowtoggle1").toggleClass("arrow-open arrow-closed");
-                    $("#toggle-metrics").toggle();
-                    if (window.localStorage) {
-                        window.localStorage.setItem("arrowtoggle1class", $("#arrowtoggle1").attr('class'));
-                    }
-                });
-
-                $("#aggregatedMetrics").click(function(){
-                    $("#arrowtoggle2").toggleClass("arrow-open arrow-closed");
-                    $("#toggle-aggregatedMetrics").toggle();
-                    if (window.localStorage) {
-                        window.localStorage.setItem("arrowtoggle2class", $("#arrowtoggle2").attr('class'));
-                    }
-                });
-
                 var quantiles = "0,0.25,0.5,0.75,1.0";
                 $.getJSON(endPoint + "/" + stageAttemptId + "/taskSummary?quantiles=" + quantiles,
                   function(taskMetricsResponse, status, jqXHR) {
@@ -758,8 +756,11 @@ $(document).ready(function () {
                         {
                             data : function (row, type) {
                                 if (accumulatorTable.length > 0 && row.accumulatorUpdates.length > 0) {
-                                    var accIndex = row.accumulatorUpdates.length - 1;
-                                    return row.accumulatorUpdates[accIndex].name + ' : ' + row.accumulatorUpdates[accIndex].update;
+                                    var allAccums = "";
+                                    row.accumulatorUpdates.forEach(function(accumulator) {
+                                        allAccums += accumulator.name + ': ' + accumulator.update + "<BR>";
+                                    })
+                                    return allAccums;
                                 } else {
                                     return "";
                                 }
@@ -877,7 +878,8 @@ $(document).ready(function () {
                         { "visible": false, "targets": 16 },
                         { "visible": false, "targets": 17 },
                         { "visible": false, "targets": 18 }
-                    ]
+                    ],
+                    "deferRender": true
                 };
                 taskTableSelector = $(taskTable).DataTable(taskConf);
                 $('#active-tasks-table_filter input').unbind();
@@ -961,4 +963,4 @@ $(document).ready(function () {
             });
         });
     });
-});
+});
\ No newline at end of file
diff --git a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.css
similarity index 100%
rename from streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css
rename to core/src/main/resources/org/apache/spark/ui/static/streaming-page.css
diff --git a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js b/core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
similarity index 100%
rename from streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js
rename to core/src/main/resources/org/apache/spark/ui/static/streaming-page.js
diff --git a/core/src/main/resources/org/apache/spark/ui/static/structured-streaming-page.js b/core/src/main/resources/org/apache/spark/ui/static/structured-streaming-page.js
new file mode 100644
index 0000000000000..70250fdbd2d0c
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/structured-streaming-page.js
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// pre-define some colors for legends.
+var colorPool = ["#F8C471", "#F39C12", "#B9770E", "#73C6B6", "#16A085", "#117A65", "#B2BABB", "#7F8C8D", "#616A6B"];
+
+function drawAreaStack(id, labels, values, minX, maxX, minY, maxY) {
+    d3.select(d3.select(id).node().parentNode)
+        .style("padding", "8px 0 8px 8px")
+        .style("border-right", "0px solid white");
+
+    // Setup svg using Bostock's margin convention
+    var margin = {top: 20, right: 40, bottom: 30, left: maxMarginLeftForTimeline};
+    var width = 850 - margin.left - margin.right;
+    var height = 300 - margin.top - margin.bottom;
+
+    var svg = d3.select(id)
+        .append("svg")
+        .attr("width", width + margin.left + margin.right)
+        .attr("height", height + margin.top + margin.bottom)
+        .append("g")
+        .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
+
+    var data = values;
+
+    var parse = d3.time.format("%H:%M:%S.%L").parse;
+
+    // Transpose the data into layers
+    var dataset = d3.layout.stack()(labels.map(function(fruit) {
+        return data.map(function(d) {
+            return {_x: d.x, x: parse(d.x), y: +d[fruit]};
+        });
+    }));
+
+
+    // Set x, y and colors
+    var x = d3.scale.ordinal()
+        .domain(dataset[0].map(function(d) { return d.x; }))
+        .rangeRoundBands([10, width-10], 0.02);
+
+    var y = d3.scale.linear()
+        .domain([0, d3.max(dataset, function(d) {  return d3.max(d, function(d) { return d.y0 + d.y; });  })])
+        .range([height, 0]);
+
+    var colors = colorPool.slice(0, labels.length)
+
+    // Define and draw axes
+    var yAxis = d3.svg.axis()
+        .scale(y)
+        .orient("left")
+        .ticks(7)
+        .tickFormat( function(d) { return d } );
+
+    var xAxis = d3.svg.axis()
+        .scale(x)
+        .orient("bottom")
+        .tickFormat(d3.time.format("%H:%M:%S.%L"));
+
+    // Only show the first and last time in the graph
+    var xline = []
+    xline.push(x.domain()[0])
+    xline.push(x.domain()[x.domain().length - 1])
+    xAxis.tickValues(xline);
+
+    svg.append("g")
+        .attr("class", "y axis")
+        .call(yAxis)
+        .append("text")
+            .attr("transform", "translate(0," + unitLabelYOffset + ")")
+            .text("ms");
+
+    svg.append("g")
+        .attr("class", "x axis")
+        .attr("transform", "translate(0," + height + ")")
+        .call(xAxis);
+
+    // Create groups for each series, rects for each segment
+    var groups = svg.selectAll("g.cost")
+        .data(dataset)
+        .enter().append("g")
+        .attr("class", "cost")
+        .style("fill", function(d, i) { return colors[i]; });
+
+    var rect = groups.selectAll("rect")
+        .data(function(d) { return d; })
+        .enter()
+        .append("rect")
+        .attr("x", function(d) { return x(d.x); })
+        .attr("y", function(d) { return y(d.y0 + d.y); })
+        .attr("height", function(d) { return y(d.y0) - y(d.y0 + d.y); })
+        .attr("width", x.rangeBand())
+        .on('mouseover', function(d) {
+            var tip = '';
+            var idx = 0;
+            var _values = timeToValues[d._x]
+            _values.forEach(function (k) {
+                tip += labels[idx] + ': ' + k + '   ';
+                idx += 1;
+            });
+            tip += " at " + d._x
+            showBootstrapTooltip(d3.select(this).node(), tip);
+        })
+        .on('mouseout',  function() {
+            hideBootstrapTooltip(d3.select(this).node());
+        })
+        .on("mousemove", function(d) {
+            var xPosition = d3.mouse(this)[0] - 15;
+            var yPosition = d3.mouse(this)[1] - 25;
+            tooltip.attr("transform", "translate(" + xPosition + "," + yPosition + ")");
+            tooltip.select("text").text(d.y);
+        });
+
+
+    // Draw legend
+    var legend = svg.selectAll(".legend")
+        .data(colors)
+        .enter().append("g")
+        .attr("class", "legend")
+        .attr("transform", function(d, i) { return "translate(30," + i * 19 + ")"; });
+
+    legend.append("rect")
+        .attr("x", width - 20)
+        .attr("width", 18)
+        .attr("height", 18)
+        .style("fill", function(d, i) {return colors.slice().reverse()[i];})
+        .on('mouseover', function(d, i) {
+            var len = labels.length
+            showBootstrapTooltip(d3.select(this).node(), labels[len - 1 - i]);
+        })
+        .on('mouseout',  function() {
+            hideBootstrapTooltip(d3.select(this).node());
+        })
+        .on("mousemove", function(d) {
+            var xPosition = d3.mouse(this)[0] - 15;
+            var yPosition = d3.mouse(this)[1] - 25;
+            tooltip.attr("transform", "translate(" + xPosition + "," + yPosition + ")");
+            tooltip.select("text").text(d.y);
+        });
+
+    // Prep the tooltip bits, initial display is hidden
+    var tooltip = svg.append("g")
+        .attr("class", "tooltip")
+        .style("display", "none");
+
+    tooltip.append("rect")
+        .attr("width", 30)
+        .attr("height", 20)
+        .attr("fill", "white")
+        .style("opacity", 0.5);
+
+    tooltip.append("text")
+        .attr("x", 15)
+        .attr("dy", "1.2em")
+        .style("text-anchor", "middle")
+        .attr("font-size", "12px")
+        .attr("font-weight", "bold");
+}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
index 10bceae2fbdda..3f31403eaeef3 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
@@ -207,6 +207,12 @@ rect.getting-result-time-proportion {
   border-color: #3EC0FF;
 }
 
+.vis-timeline .vis-item.executor.added.vis-selected {
+  background-color: #00AAFF;
+  border-color: #184C66;
+  z-index: 2;
+}
+
 .legend-area rect.executor-added-legend {
   fill: #A0DFFF;
   stroke: #3EC0FF;
@@ -217,17 +223,17 @@ rect.getting-result-time-proportion {
   border-color: #FF4D6D;
 }
 
+.vis-timeline .vis-item.executor.removed.vis-selected {
+  background-color: #FF6680;
+  border-color: #661F2C;
+  z-index: 2;
+}
+
 .legend-area rect.executor-removed-legend {
   fill: #FFA1B0;
   stroke: #FF4D6D;
 }
 
-.vis-timeline .vis-item.executor.vis-selected {
-  background-color: #A2FCC0;
-  border-color: #36F572;
-  z-index: 2;
-}
-
 tr.corresponding-item-hover > td, tr.corresponding-item-hover > th {
   background-color: #D6FFE4 !important;
 }
diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
index 705a08f0293d3..b2cd616791734 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
@@ -83,8 +83,9 @@ function drawApplicationTimeline(groupArray, eventObjArray, startTime, offset) {
   });
 }
 
-$(function (){
-  if (window.localStorage.getItem("expand-application-timeline") == "true") {
+$(function () {
+  if ($("span.expand-application-timeline").length &&
+      window.localStorage.getItem("expand-application-timeline") == "true") {
     // Set it to false so that the click function can revert it
     window.localStorage.setItem("expand-application-timeline", "false");
     $("span.expand-application-timeline").trigger('click');
@@ -159,8 +160,9 @@ function drawJobTimeline(groupArray, eventObjArray, startTime, offset) {
   });
 }
 
-$(function (){
-  if (window.localStorage.getItem("expand-job-timeline") == "true") {
+$(function () {
+  if ($("span.expand-job-timeline").length &&
+      window.localStorage.getItem("expand-job-timeline") == "true") {
     // Set it to false so that the click function can revert it
     window.localStorage.setItem("expand-job-timeline", "false");
     $("span.expand-job-timeline").trigger('click');
@@ -226,8 +228,9 @@ function drawTaskAssignmentTimeline(groupArray, eventObjArray, minLaunchTime, ma
   });
 }
 
-$(function (){
-  if (window.localStorage.getItem("expand-task-assignment-timeline") == "true") {
+$(function () {
+  if ($("span.expand-task-assignment-timeline").length &&
+      window.localStorage.getItem("expand-task-assignment-timeline") == "true") {
     // Set it to false so that the click function can revert it
     window.localStorage.setItem("expand-task-assignment-timeline", "false");
     $("span.expand-task-assignment-timeline").trigger('click');
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
old mode 100644
new mode 100755
index 3e28816ba61b6..801c449fd626f
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -245,9 +245,9 @@ a.expandbutton {
   max-width: 600px;
 }
 
-.paginate_button.active > a {
-    color: #999999;
-    text-decoration: underline;
+.paginate_button.active {
+  border: 1px solid #979797 !important;
+  background: white linear-gradient(to bottom, #fff 0%, #dcdcdc 100%);
 }
 
 .title-table {
@@ -263,32 +263,36 @@ a.expandbutton {
   width: 200px;
 }
 
+.select-all-div-checkbox-div {
+  width: 90px;
+}
+
 .scheduler-delay-checkbox-div {
-  width: 120px;
+  width: 130px;
 }
 
 .task-deserialization-time-checkbox-div {
-  width: 175px;
+  width: 190px;
 }
 
 .shuffle-read-blocked-time-checkbox-div {
-  width: 187px;
+  width: 200px;
 }
 
 .shuffle-remote-reads-checkbox-div {
-  width: 157px;
+  width: 170px;
 }
 
 .result-serialization-time-checkbox-div {
-  width: 171px;
+  width: 185px;
 }
 
 .getting-result-time-checkbox-div {
-  width: 141px;
+  width: 155px;
 }
 
 .peak-execution-memory-checkbox-div {
-  width: 170px;
+  width: 180px;
 }
 
 #active-tasks-table th {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.js b/core/src/main/resources/org/apache/spark/ui/static/webui.js
index 89622106ff1f0..0ba461f02317f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.js
@@ -87,4 +87,16 @@ $(function() {
   collapseTablePageLoad('collapse-aggregated-runningExecutions','aggregated-runningExecutions');
   collapseTablePageLoad('collapse-aggregated-completedExecutions','aggregated-completedExecutions');
   collapseTablePageLoad('collapse-aggregated-failedExecutions','aggregated-failedExecutions');
-});
\ No newline at end of file
+  collapseTablePageLoad('collapse-aggregated-sessionstat','aggregated-sessionstat');
+  collapseTablePageLoad('collapse-aggregated-sqlstat','aggregated-sqlstat');
+  collapseTablePageLoad('collapse-aggregated-sqlsessionstat','aggregated-sqlsessionstat');
+  collapseTablePageLoad('collapse-aggregated-activeQueries','aggregated-activeQueries');
+  collapseTablePageLoad('collapse-aggregated-completedQueries','aggregated-completedQueries');
+});
+
+$(function() {
+    // Show/hide full job description on click event.
+    $(".description-input").click(function() {
+        $(this).toggleClass("description-input-full");
+    });
+});
diff --git a/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala b/core/src/main/scala-2.12/org/apache/spark/util/BoundedPriorityQueue.scala
similarity index 93%
rename from core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
rename to core/src/main/scala-2.12/org/apache/spark/util/BoundedPriorityQueue.scala
index eff0aa4453f08..a241023723444 100644
--- a/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
+++ b/core/src/main/scala-2.12/org/apache/spark/util/BoundedPriorityQueue.scala
@@ -31,6 +31,8 @@ import scala.collection.generic.Growable
 private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
   extends Iterable[A] with Growable[A] with Serializable {
 
+  //  Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation.
+
   private val underlying = new JPriorityQueue[A](maxSize, ord)
 
   override def iterator: Iterator[A] = underlying.iterator.asScala
@@ -59,7 +61,7 @@ private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Orderin
     this += elem1 += elem2 ++= elems
   }
 
-  override def clear() { underlying.clear() }
+  override def clear(): Unit = { underlying.clear() }
 
   private def maybeReplaceLowest(a: A): Boolean = {
     val head = underlying.peek()
diff --git a/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala b/core/src/main/scala-2.12/org/apache/spark/util/TimeStampedHashMap.scala
similarity index 92%
rename from core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
rename to core/src/main/scala-2.12/org/apache/spark/util/TimeStampedHashMap.scala
index 32af0127bbf38..da12582a5083a 100644
--- a/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
+++ b/core/src/main/scala-2.12/org/apache/spark/util/TimeStampedHashMap.scala
@@ -40,6 +40,8 @@ private[spark] case class TimeStampedValue[V](value: V, timestamp: Long)
 private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = false)
   extends mutable.Map[A, B]() with Logging {
 
+  //  Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation.
+
   private val internalMap = new ConcurrentHashMap[A, TimeStampedValue[B]]()
 
   def get(key: A): Option[B] = {
@@ -81,7 +83,7 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa
     this
   }
 
-  override def update(key: A, value: B) {
+  override def update(key: A, value: B): Unit = {
     this += ((key, value))
   }
 
@@ -97,7 +99,7 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa
 
   override def size: Int = internalMap.size
 
-  override def foreach[U](f: ((A, B)) => U) {
+  override def foreach[U](f: ((A, B)) => U): Unit = {
     val it = getEntrySet.iterator
     while(it.hasNext) {
       val entry = it.next()
@@ -111,13 +113,13 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa
     Option(prev).map(_.value)
   }
 
-  def putAll(map: Map[A, B]) {
+  def putAll(map: Map[A, B]): Unit = {
     map.foreach { case (k, v) => update(k, v) }
   }
 
   def toMap: Map[A, B] = iterator.toMap
 
-  def clearOldValues(threshTime: Long, f: (A, B) => Unit) {
+  def clearOldValues(threshTime: Long, f: (A, B) => Unit): Unit = {
     val it = getEntrySet.iterator
     while (it.hasNext) {
       val entry = it.next()
@@ -130,7 +132,7 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa
   }
 
   /** Removes old key-value pairs that have timestamp earlier than `threshTime`. */
-  def clearOldValues(threshTime: Long) {
+  def clearOldValues(threshTime: Long): Unit = {
     clearOldValues(threshTime, (_, _) => ())
   }
 
diff --git a/core/src/main/scala-2.13/org/apache/spark/util/BoundedPriorityQueue.scala b/core/src/main/scala-2.13/org/apache/spark/util/BoundedPriorityQueue.scala
new file mode 100644
index 0000000000000..bc55a44fc3c2e
--- /dev/null
+++ b/core/src/main/scala-2.13/org/apache/spark/util/BoundedPriorityQueue.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.Serializable
+import java.util.{PriorityQueue => JPriorityQueue}
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.Growable
+
+/**
+ * Bounded priority queue. This class wraps the original PriorityQueue
+ * class and modifies it such that only the top K elements are retained.
+ * The top K elements are defined by an implicit Ordering[A].
+ */
+private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Ordering[A])
+  extends Iterable[A] with Growable[A] with Serializable {
+
+  //  Note: this class supports Scala 2.13. A parallel source tree has a 2.12 implementation.
+
+  private val underlying = new JPriorityQueue[A](maxSize, ord)
+
+  override def iterator: Iterator[A] = underlying.iterator.asScala
+
+  override def size: Int = underlying.size
+
+  override def knownSize: Int = size
+
+  override def addAll(xs: IterableOnce[A]): this.type = {
+    xs.foreach { this += _ }
+    this
+  }
+
+  override def addOne(elem: A): this.type = {
+    if (size < maxSize) {
+      underlying.offer(elem)
+    } else {
+      maybeReplaceLowest(elem)
+    }
+    this
+  }
+
+  def poll(): A = {
+    underlying.poll()
+  }
+
+  override def clear(): Unit = { underlying.clear() }
+
+  private def maybeReplaceLowest(a: A): Boolean = {
+    val head = underlying.peek()
+    if (head != null && ord.gt(a, head)) {
+      underlying.poll()
+      underlying.offer(a)
+    } else {
+      false
+    }
+  }
+}
diff --git a/core/src/main/scala-2.13/org/apache/spark/util/TimeStampedHashMap.scala b/core/src/main/scala-2.13/org/apache/spark/util/TimeStampedHashMap.scala
new file mode 100644
index 0000000000000..9c860061b5862
--- /dev/null
+++ b/core/src/main/scala-2.13/org/apache/spark/util/TimeStampedHashMap.scala
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util.Map.Entry
+import java.util.Set
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.internal.Logging
+
+private[spark] case class TimeStampedValue[V](value: V, timestamp: Long)
+
+/**
+ * This is a custom implementation of scala.collection.mutable.Map which stores the insertion
+ * timestamp along with each key-value pair. If specified, the timestamp of each pair can be
+ * updated every time it is accessed. Key-value pairs whose timestamp are older than a particular
+ * threshold time can then be removed using the clearOldValues method. This is intended to
+ * be a drop-in replacement of scala.collection.mutable.HashMap.
+ *
+ * @param updateTimeStampOnGet Whether timestamp of a pair will be updated when it is accessed
+ */
+private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = false)
+  extends mutable.Map[A, B]() with Logging {
+
+  //  Note: this class supports Scala 2.13. A parallel source tree has a 2.12 implementation.
+
+  private val internalMap = new ConcurrentHashMap[A, TimeStampedValue[B]]()
+
+  def get(key: A): Option[B] = {
+    val value = internalMap.get(key)
+    if (value != null && updateTimeStampOnGet) {
+      internalMap.replace(key, value, TimeStampedValue(value.value, currentTime))
+    }
+    Option(value).map(_.value)
+  }
+
+  def iterator: Iterator[(A, B)] = {
+    getEntrySet.iterator.asScala.map(kv => (kv.getKey, kv.getValue.value))
+  }
+
+  def getEntrySet: Set[Entry[A, TimeStampedValue[B]]] = internalMap.entrySet
+
+  override def + [B1 >: B](kv: (A, B1)): mutable.Map[A, B1] = {
+    val newMap = new TimeStampedHashMap[A, B1]
+    val oldInternalMap = this.internalMap.asInstanceOf[ConcurrentHashMap[A, TimeStampedValue[B1]]]
+    newMap.internalMap.putAll(oldInternalMap)
+    kv match { case (a, b) => newMap.internalMap.put(a, TimeStampedValue(b, currentTime)) }
+    newMap
+  }
+
+  override def addOne(kv: (A, B)): this.type = {
+    kv match { case (a, b) => internalMap.put(a, TimeStampedValue(b, currentTime)) }
+    this
+  }
+
+  override def subtractOne(key: A): this.type = {
+    internalMap.remove(key)
+    this
+  }
+
+  override def update(key: A, value: B): Unit = {
+    this += ((key, value))
+  }
+
+  override def apply(key: A): B = {
+    get(key).getOrElse { throw new NoSuchElementException() }
+  }
+
+  override def filter(p: ((A, B)) => Boolean): mutable.Map[A, B] = {
+    internalMap.asScala.map { case (k, TimeStampedValue(v, t)) => (k, v) }.filter(p)
+  }
+
+  override def empty: mutable.Map[A, B] = new TimeStampedHashMap[A, B]()
+
+  override def size: Int = internalMap.size
+
+  override def foreach[U](f: ((A, B)) => U): Unit = {
+    val it = getEntrySet.iterator
+    while(it.hasNext) {
+      val entry = it.next()
+      val kv = (entry.getKey, entry.getValue.value)
+      f(kv)
+    }
+  }
+
+  def putIfAbsent(key: A, value: B): Option[B] = {
+    val prev = internalMap.putIfAbsent(key, TimeStampedValue(value, currentTime))
+    Option(prev).map(_.value)
+  }
+
+  def putAll(map: Map[A, B]): Unit = {
+    map.foreach { case (k, v) => update(k, v) }
+  }
+
+  def toMap: Map[A, B] = iterator.toMap
+
+  def clearOldValues(threshTime: Long, f: (A, B) => Unit): Unit = {
+    val it = getEntrySet.iterator
+    while (it.hasNext) {
+      val entry = it.next()
+      if (entry.getValue.timestamp < threshTime) {
+        f(entry.getKey, entry.getValue.value)
+        logDebug("Removing key " + entry.getKey)
+        it.remove()
+      }
+    }
+  }
+
+  /** Removes old key-value pairs that have timestamp earlier than `threshTime`. */
+  def clearOldValues(threshTime: Long): Unit = {
+    clearOldValues(threshTime, (_, _) => ())
+  }
+
+  private def currentTime: Long = System.currentTimeMillis
+
+  // For testing
+
+  def getTimeStampedValue(key: A): Option[TimeStampedValue[B]] = {
+    Option(internalMap.get(key))
+  }
+
+  def getTimestamp(key: A): Option[Long] = {
+    getTimeStampedValue(key).map(_.timestamp)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
index 9f59295059d30..4e417679ca663 100644
--- a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
@@ -107,9 +107,9 @@ private[spark] class BarrierCoordinator(
     private var timerTask: TimerTask = null
 
     // Init a TimerTask for a barrier() call.
-    private def initTimerTask(): Unit = {
+    private def initTimerTask(state: ContextBarrierState): Unit = {
       timerTask = new TimerTask {
-        override def run(): Unit = synchronized {
+        override def run(): Unit = state.synchronized {
           // Timeout current barrier() call, fail all the sync requests.
           requesters.foreach(_.sendFailure(new SparkException("The coordinator didn't get all " +
             s"barrier sync requests for barrier epoch $barrierEpoch from $barrierId within " +
@@ -148,7 +148,7 @@ private[spark] class BarrierCoordinator(
         // If this is the first sync message received for a barrier() call, start timer to ensure
         // we may timeout for the sync.
         if (requesters.isEmpty) {
-          initTimerTask()
+          initTimerTask(this)
           timer.schedule(timerTask, timeoutInSecs * 1000)
         }
         // Add the requester to array of RPCCallContexts pending for reply.
diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index 5afd8a5d866b2..3d369802f3023 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -19,6 +19,7 @@ package org.apache.spark
 
 import java.util.{Properties, Timer, TimerTask}
 
+import scala.collection.JavaConverters._
 import scala.concurrent.TimeoutException
 import scala.concurrent.duration._
 
@@ -211,6 +212,10 @@ class BarrierTaskContext private[spark] (
 
   override def resources(): Map[String, ResourceInformation] = taskContext.resources()
 
+  override def resourcesJMap(): java.util.Map[String, ResourceInformation] = {
+    resources().asJava
+  }
+
   override private[spark] def killTaskIfInterrupted(): Unit = taskContext.killTaskIfInterrupted()
 
   override private[spark] def getKillReason(): Option[String] = taskContext.getKillReason()
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index 24c83993b1b60..9506c36bf9c8c 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -27,6 +27,7 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.rdd.{RDD, ReliableRDDCheckpointData}
+import org.apache.spark.shuffle.api.ShuffleDriverComponents
 import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, ThreadUtils, Utils}
 
 /**
@@ -58,7 +59,9 @@ private class CleanupTaskWeakReference(
  * to be processed when the associated object goes out of scope of the application. Actual
  * cleanup is performed in a separate daemon thread.
  */
-private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
+private[spark] class ContextCleaner(
+    sc: SparkContext,
+    shuffleDriverComponents: ShuffleDriverComponents) extends Logging {
 
   /**
    * A buffer to ensure that `CleanupTaskWeakReference`s are not garbage collected as long as they
@@ -71,7 +74,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
 
   private val listeners = new ConcurrentLinkedQueue[CleanerListener]()
 
-  private val cleaningThread = new Thread() { override def run() { keepCleaning() }}
+  private val cleaningThread = new Thread() { override def run(): Unit = keepCleaning() }
 
   private val periodicGCService: ScheduledExecutorService =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("context-cleaner-periodic-gc")
@@ -221,7 +224,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
     try {
       logDebug("Cleaning shuffle " + shuffleId)
       mapOutputTrackerMaster.unregisterShuffle(shuffleId)
-      blockManagerMaster.removeShuffle(shuffleId, blocking)
+      shuffleDriverComponents.removeShuffle(shuffleId, blocking)
       listeners.asScala.foreach(_.shuffleCleaned(shuffleId))
       logDebug("Cleaned shuffle " + shuffleId)
     } catch {
@@ -269,7 +272,6 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
     }
   }
 
-  private def blockManagerMaster = sc.env.blockManager.master
   private def broadcastManager = sc.env.broadcastManager
   private def mapOutputTrackerMaster = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
 }
diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index fb051a8c0db8e..ba8e4d69ba755 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -93,9 +93,10 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
   val shuffleId: Int = _rdd.context.newShuffleId()
 
   val shuffleHandle: ShuffleHandle = _rdd.context.env.shuffleManager.registerShuffle(
-    shuffleId, _rdd.partitions.length, this)
+    shuffleId, this)
 
   _rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this))
+  _rdd.sparkContext.shuffleDriverComponents.registerShuffle(shuffleId)
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
index cb965cb180207..00bd0063c9e3a 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -37,24 +37,29 @@ private[spark] trait ExecutorAllocationClient {
   /**
    * Update the cluster manager on our scheduling needs. Three bits of information are included
    * to help it make decisions.
-   * @param numExecutors The total number of executors we'd like to have. The cluster manager
-   *                     shouldn't kill any running executor to reach this number, but,
-   *                     if all existing executors were to die, this is the number of executors
-   *                     we'd want to be allocated.
-   * @param localityAwareTasks The number of tasks in all active stages that have a locality
-   *                           preferences. This includes running, pending, and completed tasks.
-   * @param hostToLocalTaskCount A map of hosts to the number of tasks from all active stages
-   *                             that would like to like to run on that host.
-   *                             This includes running, pending, and completed tasks.
+   *
+   * @param resourceProfileIdToNumExecutors The total number of executors we'd like to have per
+   *                                        ResourceProfile id. The cluster manager shouldn't kill
+   *                                        any running executor to reach this number, but, if all
+   *                                        existing executors were to die, this is the number
+   *                                        of executors we'd want to be allocated.
+   * @param numLocalityAwareTasksPerResourceProfileId The number of tasks in all active stages that
+   *                                                  have a locality preferences per
+   *                                                  ResourceProfile id. This includes running,
+   *                                                  pending, and completed tasks.
+   * @param hostToLocalTaskCount A map of ResourceProfile id to a map of hosts to the number of
+   *                             tasks from all active stages that would like to like to run on
+   *                             that host. This includes running, pending, and completed tasks.
    * @return whether the request is acknowledged by the cluster manager.
    */
   private[spark] def requestTotalExecutors(
-      numExecutors: Int,
-      localityAwareTasks: Int,
-      hostToLocalTaskCount: Map[String, Int]): Boolean
+      resourceProfileIdToNumExecutors: Map[Int, Int],
+      numLocalityAwareTasksPerResourceProfileId: Map[Int, Int],
+      hostToLocalTaskCount: Map[Int, Map[String, Int]]): Boolean
 
   /**
-   * Request an additional number of executors from the cluster manager.
+   * Request an additional number of executors from the cluster manager for the default
+   * ResourceProfile.
    * @return whether the request is acknowledged by the cluster manager.
    */
   def requestExecutors(numAdditionalExecutors: Int): Boolean
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 5114cf70e3f26..5cb3160711a90 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -29,6 +29,8 @@ import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests.TEST_SCHEDULE_INTERVAL
 import org.apache.spark.metrics.source.Source
+import org.apache.spark.resource.ResourceProfile.UNKNOWN_RESOURCE_PROFILE_ID
+import org.apache.spark.resource.ResourceProfileManager
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.dynalloc.ExecutorMonitor
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
@@ -36,9 +38,9 @@ import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
 /**
  * An agent that dynamically allocates and removes executors based on the workload.
  *
- * The ExecutorAllocationManager maintains a moving target number of executors which is periodically
- * synced to the cluster manager. The target starts at a configured initial value and changes with
- * the number of pending and running tasks.
+ * The ExecutorAllocationManager maintains a moving target number of executors, for each
+ * ResourceProfile, which is periodically synced to the cluster manager. The target starts
+ * at a configured initial value and changes with the number of pending and running tasks.
  *
  * Decreasing the target number of executors happens when the current target is more than needed to
  * handle the current load. The target number of executors is always truncated to the number of
@@ -57,14 +59,18 @@ import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
  * quickly over time in case the maximum number of executors is very high. Otherwise, it will take
  * a long time to ramp up under heavy workloads.
  *
- * The remove policy is simpler: If an executor has been idle for K seconds, meaning it has not
- * been scheduled to run any tasks, then it is removed. Note that an executor caching any data
+ * The remove policy is simpler and is applied on each ResourceProfile separately. If an executor
+ * for that ResourceProfile has been idle for K seconds and the number of executors is more
+ * then what is needed for that ResourceProfile, meaning there are not enough tasks that could use
+ * the executor, then it is removed. Note that an executor caching any data
  * blocks will be removed if it has been idle for more than L seconds.
  *
  * There is no retry logic in either case because we make the assumption that the cluster manager
  * will eventually fulfill all requests it receives asynchronously.
  *
- * The relevant Spark properties include the following:
+ * The relevant Spark properties are below. Each of these properties applies separately to
+ * every ResourceProfile. So if you set a minimum number of executors, that is a minimum
+ * for each ResourceProfile.
  *
  *   spark.dynamicAllocation.enabled - Whether this feature is enabled
  *   spark.dynamicAllocation.minExecutors - Lower bound on the number of executors
@@ -95,7 +101,8 @@ private[spark] class ExecutorAllocationManager(
     listenerBus: LiveListenerBus,
     conf: SparkConf,
     cleaner: Option[ContextCleaner] = None,
-    clock: Clock = new SystemClock())
+    clock: Clock = new SystemClock(),
+    resourceProfileManager: ResourceProfileManager)
   extends Logging {
 
   allocationManager =>
@@ -117,23 +124,23 @@ private[spark] class ExecutorAllocationManager(
   // During testing, the methods to actually kill and add executors are mocked out
   private val testing = conf.get(DYN_ALLOCATION_TESTING)
 
-  // TODO: The default value of 1 for spark.executor.cores works right now because dynamic
-  // allocation is only supported for YARN and the default number of cores per executor in YARN is
-  // 1, but it might need to be attained differently for different cluster managers
-  private val tasksPerExecutorForFullParallelism =
-    conf.get(EXECUTOR_CORES) / conf.get(CPUS_PER_TASK)
-
   private val executorAllocationRatio =
     conf.get(DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO)
 
+  private val defaultProfileId = resourceProfileManager.defaultResourceProfile.id
+
   validateSettings()
 
-  // Number of executors to add in the next round
-  private var numExecutorsToAdd = 1
+  // Number of executors to add for each ResourceProfile in the next round
+  private val numExecutorsToAddPerResourceProfileId = new mutable.HashMap[Int, Int]
+  numExecutorsToAddPerResourceProfileId(defaultProfileId) = 1
 
   // The desired number of executors at this moment in time. If all our executors were to die, this
   // is the number of executors we would immediately want from the cluster manager.
-  private var numExecutorsTarget = initialNumExecutors
+  // Note every profile will be allowed to have initial number,
+  // we may want to make this configurable per Profile in the future
+  private val numExecutorsTargetPerResourceProfileId = new mutable.HashMap[Int, Int]
+  numExecutorsTargetPerResourceProfileId(defaultProfileId) = initialNumExecutors
 
   // A timestamp of when an addition should be triggered, or NOT_SET if it is not set
   // This is set when pending tasks are added but not scheduled yet
@@ -165,11 +172,12 @@ private[spark] class ExecutorAllocationManager(
   //   (2) an executor idle timeout has elapsed.
   @volatile private var initializing: Boolean = true
 
-  // Number of locality aware tasks, used for executor placement.
-  private var localityAwareTasks = 0
+  // Number of locality aware tasks for each ResourceProfile, used for executor placement.
+  private var numLocalityAwareTasksPerResourceProfileId = new mutable.HashMap[Int, Int]
+  numLocalityAwareTasksPerResourceProfileId(defaultProfileId) = 0
 
-  // Host to possible task running on it, used for executor placement.
-  private var hostToLocalTaskCount: Map[String, Int] = Map.empty
+  // ResourceProfile id to Host to possible task running on it, used for executor placement.
+  private var rpIdToHostToLocalTaskCount: Map[Int, Map[String, Int]] = Map.empty
 
   /**
    * Verify that the settings specified through the config are valid.
@@ -233,7 +241,14 @@ private[spark] class ExecutorAllocationManager(
     }
     executor.scheduleWithFixedDelay(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS)
 
-    client.requestTotalExecutors(numExecutorsTarget, localityAwareTasks, hostToLocalTaskCount)
+    // copy the maps inside synchonize to ensure not being modified
+    val (numExecutorsTarget, numLocalityAware) = synchronized {
+      val numTarget = numExecutorsTargetPerResourceProfileId.toMap
+      val numLocality = numLocalityAwareTasksPerResourceProfileId.toMap
+      (numTarget, numLocality)
+    }
+
+    client.requestTotalExecutors(numExecutorsTarget, numLocalityAware, rpIdToHostToLocalTaskCount)
   }
 
   /**
@@ -253,23 +268,38 @@ private[spark] class ExecutorAllocationManager(
    */
   def reset(): Unit = synchronized {
     addTime = 0L
-    numExecutorsTarget = initialNumExecutors
+    numExecutorsTargetPerResourceProfileId.keys.foreach { rpId =>
+      numExecutorsTargetPerResourceProfileId(rpId) = initialNumExecutors
+    }
     executorMonitor.reset()
   }
 
   /**
-   * The maximum number of executors we would need under the current load to satisfy all running
-   * and pending tasks, rounded up.
+   * The maximum number of executors, for the ResourceProfile id passed in, that we would need
+   * under the current load to satisfy all running and pending tasks, rounded up.
    */
-  private def maxNumExecutorsNeeded(): Int = {
-    val numRunningOrPendingTasks = listener.totalPendingTasks + listener.totalRunningTasks
-    math.ceil(numRunningOrPendingTasks * executorAllocationRatio /
-              tasksPerExecutorForFullParallelism)
-      .toInt
+  private def maxNumExecutorsNeededPerResourceProfile(rpId: Int): Int = {
+    val pending = listener.totalPendingTasksPerResourceProfile(rpId)
+    val pendingSpeculative = listener.pendingSpeculativeTasksPerResourceProfile(rpId)
+    val running = listener.totalRunningTasksPerResourceProfile(rpId)
+    val numRunningOrPendingTasks = pending + running
+    val rp = resourceProfileManager.resourceProfileFromId(rpId)
+    val tasksPerExecutor = rp.maxTasksPerExecutor(conf)
+    logDebug(s"max needed for rpId: $rpId numpending: $numRunningOrPendingTasks," +
+      s" tasksperexecutor: $tasksPerExecutor")
+    val maxNeeded = math.ceil(numRunningOrPendingTasks * executorAllocationRatio /
+      tasksPerExecutor).toInt
+    if (tasksPerExecutor > 1 && maxNeeded == 1 && pendingSpeculative > 0) {
+      // If we have pending speculative tasks and only need a single executor, allocate one more
+      // to satisfy the locality requirements of speculation
+      maxNeeded + 1
+    } else {
+      maxNeeded
+    }
   }
 
-  private def totalRunningTasks(): Int = synchronized {
-    listener.totalRunningTasks
+  private def totalRunningTasksPerResourceProfile(id: Int): Int = synchronized {
+    listener.totalRunningTasksPerResourceProfile(id)
   }
 
   /**
@@ -288,14 +318,15 @@ private[spark] class ExecutorAllocationManager(
     }
 
     // Update executor target number only after initializing flag is unset
-    updateAndSyncNumExecutorsTarget(clock.getTimeMillis())
+    updateAndSyncNumExecutorsTarget(clock.nanoTime())
     if (executorIdsToBeRemoved.nonEmpty) {
       removeExecutors(executorIdsToBeRemoved)
     }
   }
 
   /**
-   * Updates our target number of executors and syncs the result with the cluster manager.
+   * Updates our target number of executors for each ResourceProfile and then syncs the result
+   * with the cluster manager.
    *
    * Check to see whether our existing allocation and the requests we've made previously exceed our
    * current needs. If so, truncate our target and let the cluster manager know so that it can
@@ -307,136 +338,205 @@ private[spark] class ExecutorAllocationManager(
    * @return the delta in the target number of executors.
    */
   private def updateAndSyncNumExecutorsTarget(now: Long): Int = synchronized {
-    val maxNeeded = maxNumExecutorsNeeded
-
     if (initializing) {
       // Do not change our target while we are still initializing,
       // Otherwise the first job may have to ramp up unnecessarily
       0
-    } else if (maxNeeded < numExecutorsTarget) {
-      // The target number exceeds the number we actually need, so stop adding new
-      // executors and inform the cluster manager to cancel the extra pending requests
-      val oldNumExecutorsTarget = numExecutorsTarget
-      numExecutorsTarget = math.max(maxNeeded, minNumExecutors)
-      numExecutorsToAdd = 1
-
-      // If the new target has not changed, avoid sending a message to the cluster manager
-      if (numExecutorsTarget < oldNumExecutorsTarget) {
-        // We lower the target number of executors but don't actively kill any yet.  Killing is
-        // controlled separately by an idle timeout.  It's still helpful to reduce the target number
-        // in case an executor just happens to get lost (eg., bad hardware, or the cluster manager
-        // preempts it) -- in that case, there is no point in trying to immediately  get a new
-        // executor, since we wouldn't even use it yet.
-        client.requestTotalExecutors(numExecutorsTarget, localityAwareTasks, hostToLocalTaskCount)
-        logDebug(s"Lowering target number of executors to $numExecutorsTarget (previously " +
-          s"$oldNumExecutorsTarget) because not all requested executors are actually needed")
+    } else {
+      val updatesNeeded = new mutable.HashMap[Int, ExecutorAllocationManager.TargetNumUpdates]
+
+      // Update targets for all ResourceProfiles then do a single request to the cluster manager
+      numExecutorsTargetPerResourceProfileId.foreach { case (rpId, targetExecs) =>
+        val maxNeeded = maxNumExecutorsNeededPerResourceProfile(rpId)
+        if (maxNeeded < targetExecs) {
+          // The target number exceeds the number we actually need, so stop adding new
+          // executors and inform the cluster manager to cancel the extra pending requests
+
+          // We lower the target number of executors but don't actively kill any yet.  Killing is
+          // controlled separately by an idle timeout.  It's still helpful to reduce
+          // the target number in case an executor just happens to get lost (eg., bad hardware,
+          // or the cluster manager preempts it) -- in that case, there is no point in trying
+          // to immediately  get a new executor, since we wouldn't even use it yet.
+          decrementExecutorsFromTarget(maxNeeded, rpId, updatesNeeded)
+        } else if (addTime != NOT_SET && now >= addTime) {
+          addExecutorsToTarget(maxNeeded, rpId, updatesNeeded)
+        }
+      }
+      doUpdateRequest(updatesNeeded.toMap, now)
+    }
+  }
+
+  private def addExecutorsToTarget(
+      maxNeeded: Int,
+      rpId: Int,
+      updatesNeeded: mutable.HashMap[Int, ExecutorAllocationManager.TargetNumUpdates]): Int = {
+    updateTargetExecs(addExecutors, maxNeeded, rpId, updatesNeeded)
+  }
+
+  private def decrementExecutorsFromTarget(
+      maxNeeded: Int,
+      rpId: Int,
+      updatesNeeded: mutable.HashMap[Int, ExecutorAllocationManager.TargetNumUpdates]): Int = {
+    updateTargetExecs(decrementExecutors, maxNeeded, rpId, updatesNeeded)
+  }
+
+  private def updateTargetExecs(
+      updateTargetFn: (Int, Int) => Int,
+      maxNeeded: Int,
+      rpId: Int,
+      updatesNeeded: mutable.HashMap[Int, ExecutorAllocationManager.TargetNumUpdates]): Int = {
+    val oldNumExecutorsTarget = numExecutorsTargetPerResourceProfileId(rpId)
+    // update the target number (add or remove)
+    val delta = updateTargetFn(maxNeeded, rpId)
+    if (delta != 0) {
+      updatesNeeded(rpId) = ExecutorAllocationManager.TargetNumUpdates(delta, oldNumExecutorsTarget)
+    }
+    delta
+  }
+
+  private def doUpdateRequest(
+      updates: Map[Int, ExecutorAllocationManager.TargetNumUpdates],
+      now: Long): Int = {
+    // Only call cluster manager if target has changed.
+    if (updates.size > 0) {
+      val requestAcknowledged = try {
+        logDebug("requesting updates: " + updates)
+        testing ||
+          client.requestTotalExecutors(
+            numExecutorsTargetPerResourceProfileId.toMap,
+            numLocalityAwareTasksPerResourceProfileId.toMap,
+            rpIdToHostToLocalTaskCount)
+      } catch {
+        case NonFatal(e) =>
+          // Use INFO level so the error it doesn't show up by default in shells.
+          // Errors here are more commonly caused by YARN AM restarts, which is a recoverable
+          // issue, and generate a lot of noisy output.
+          logInfo("Error reaching cluster manager.", e)
+          false
+      }
+      if (requestAcknowledged) {
+        // have to go through all resource profiles that changed
+        var totalDelta = 0
+        updates.foreach { case (rpId, targetNum) =>
+          val delta = targetNum.delta
+          totalDelta += delta
+          if (delta > 0) {
+            val executorsString = "executor" + { if (delta > 1) "s" else "" }
+            logInfo(s"Requesting $delta new $executorsString because tasks are backlogged " +
+              s"(new desired total will be ${numExecutorsTargetPerResourceProfileId(rpId)} " +
+              s"for resource profile id: ${rpId})")
+            numExecutorsToAddPerResourceProfileId(rpId) =
+              if (delta == numExecutorsToAddPerResourceProfileId(rpId)) {
+                numExecutorsToAddPerResourceProfileId(rpId) * 2
+              } else {
+                1
+              }
+            logDebug(s"Starting timer to add more executors (to " +
+              s"expire in $sustainedSchedulerBacklogTimeoutS seconds)")
+            addTime = now + TimeUnit.SECONDS.toNanos(sustainedSchedulerBacklogTimeoutS)
+          } else {
+            logDebug(s"Lowering target number of executors to" +
+              s" ${numExecutorsTargetPerResourceProfileId(rpId)} (previously " +
+              s"$targetNum.oldNumExecutorsTarget for resource profile id: ${rpId}) " +
+              "because not all requested executors " +
+              "are actually needed")
+          }
+        }
+        totalDelta
+      } else {
+        // request was for all profiles so we have to go through all to reset to old num
+        updates.foreach { case (rpId, targetNum) =>
+          logWarning("Unable to reach the cluster manager to request more executors!")
+          numExecutorsTargetPerResourceProfileId(rpId) = targetNum.oldNumExecutorsTarget
+        }
+        0
       }
-      numExecutorsTarget - oldNumExecutorsTarget
-    } else if (addTime != NOT_SET && now >= addTime) {
-      val delta = addExecutors(maxNeeded)
-      logDebug(s"Starting timer to add more executors (to " +
-        s"expire in $sustainedSchedulerBacklogTimeoutS seconds)")
-      addTime = now + (sustainedSchedulerBacklogTimeoutS * 1000)
-      delta
     } else {
+      logDebug("No change in number of executors")
       0
     }
   }
 
+  private def decrementExecutors(maxNeeded: Int, rpId: Int): Int = {
+    val oldNumExecutorsTarget = numExecutorsTargetPerResourceProfileId(rpId)
+    numExecutorsTargetPerResourceProfileId(rpId) = math.max(maxNeeded, minNumExecutors)
+    numExecutorsToAddPerResourceProfileId(rpId) = 1
+    numExecutorsTargetPerResourceProfileId(rpId) - oldNumExecutorsTarget
+  }
+
   /**
-   * Request a number of executors from the cluster manager.
+   * Update the target number of executors and figure out how many to add.
    * If the cap on the number of executors is reached, give up and reset the
    * number of executors to add next round instead of continuing to double it.
    *
    * @param maxNumExecutorsNeeded the maximum number of executors all currently running or pending
    *                              tasks could fill
+   * @param rpId                  the ResourceProfile id of the executors
    * @return the number of additional executors actually requested.
    */
-  private def addExecutors(maxNumExecutorsNeeded: Int): Int = {
+  private def addExecutors(maxNumExecutorsNeeded: Int, rpId: Int): Int = {
+    val oldNumExecutorsTarget = numExecutorsTargetPerResourceProfileId(rpId)
     // Do not request more executors if it would put our target over the upper bound
-    if (numExecutorsTarget >= maxNumExecutors) {
-      logDebug(s"Not adding executors because our current target total " +
-        s"is already $numExecutorsTarget (limit $maxNumExecutors)")
-      numExecutorsToAdd = 1
+    // this is doing a max check per ResourceProfile
+    if (oldNumExecutorsTarget >= maxNumExecutors) {
+      logDebug("Not adding executors because our current target total " +
+        s"is already ${oldNumExecutorsTarget} (limit $maxNumExecutors)")
+      numExecutorsToAddPerResourceProfileId(rpId) = 1
       return 0
     }
-
-    val oldNumExecutorsTarget = numExecutorsTarget
     // There's no point in wasting time ramping up to the number of executors we already have, so
     // make sure our target is at least as much as our current allocation:
-    numExecutorsTarget = math.max(numExecutorsTarget, executorMonitor.executorCount)
+    var numExecutorsTarget = math.max(numExecutorsTargetPerResourceProfileId(rpId),
+        executorMonitor.executorCountWithResourceProfile(rpId))
     // Boost our target with the number to add for this round:
-    numExecutorsTarget += numExecutorsToAdd
+    numExecutorsTarget += numExecutorsToAddPerResourceProfileId(rpId)
     // Ensure that our target doesn't exceed what we need at the present moment:
     numExecutorsTarget = math.min(numExecutorsTarget, maxNumExecutorsNeeded)
     // Ensure that our target fits within configured bounds:
     numExecutorsTarget = math.max(math.min(numExecutorsTarget, maxNumExecutors), minNumExecutors)
-
     val delta = numExecutorsTarget - oldNumExecutorsTarget
+    numExecutorsTargetPerResourceProfileId(rpId) = numExecutorsTarget
 
     // If our target has not changed, do not send a message
     // to the cluster manager and reset our exponential growth
     if (delta == 0) {
-      // Check if there is any speculative jobs pending
-      if (listener.pendingTasks == 0 && listener.pendingSpeculativeTasks > 0) {
-        numExecutorsTarget =
-          math.max(math.min(maxNumExecutorsNeeded + 1, maxNumExecutors), minNumExecutors)
-      } else {
-        numExecutorsToAdd = 1
-        return 0
-      }
-    }
-
-    val addRequestAcknowledged = try {
-      testing ||
-        client.requestTotalExecutors(numExecutorsTarget, localityAwareTasks, hostToLocalTaskCount)
-    } catch {
-      case NonFatal(e) =>
-        // Use INFO level so the error it doesn't show up by default in shells. Errors here are more
-        // commonly caused by YARN AM restarts, which is a recoverable issue, and generate a lot of
-        // noisy output.
-        logInfo("Error reaching cluster manager.", e)
-        false
-    }
-    if (addRequestAcknowledged) {
-      val executorsString = "executor" + { if (delta > 1) "s" else "" }
-      logInfo(s"Requesting $delta new $executorsString because tasks are backlogged" +
-        s" (new desired total will be $numExecutorsTarget)")
-      numExecutorsToAdd = if (delta == numExecutorsToAdd) {
-        numExecutorsToAdd * 2
-      } else {
-        1
-      }
-      delta
-    } else {
-      logWarning(
-        s"Unable to reach the cluster manager to request $numExecutorsTarget total executors!")
-      numExecutorsTarget = oldNumExecutorsTarget
-      0
+      numExecutorsToAddPerResourceProfileId(rpId) = 1
     }
+    delta
   }
 
   /**
    * Request the cluster manager to remove the given executors.
    * Returns the list of executors which are removed.
    */
-  private def removeExecutors(executors: Seq[String]): Seq[String] = synchronized {
+  private def removeExecutors(executors: Seq[(String, Int)]): Seq[String] = synchronized {
     val executorIdsToBeRemoved = new ArrayBuffer[String]
-
-    logInfo("Request to remove executorIds: " + executors.mkString(", "))
-    val numExistingExecutors = executorMonitor.executorCount - executorMonitor.pendingRemovalCount
-
-    var newExecutorTotal = numExistingExecutors
-    executors.foreach { executorIdToBeRemoved =>
-      if (newExecutorTotal - 1 < minNumExecutors) {
-        logDebug(s"Not removing idle executor $executorIdToBeRemoved because there are only " +
-          s"$newExecutorTotal executor(s) left (minimum number of executor limit $minNumExecutors)")
-      } else if (newExecutorTotal - 1 < numExecutorsTarget) {
-        logDebug(s"Not removing idle executor $executorIdToBeRemoved because there are only " +
-          s"$newExecutorTotal executor(s) left (number of executor target $numExecutorsTarget)")
+    logDebug(s"Request to remove executorIds: ${executors.mkString(", ")}")
+    val numExecutorsTotalPerRpId = mutable.Map[Int, Int]()
+    executors.foreach { case (executorIdToBeRemoved, rpId) =>
+      if (rpId == UNKNOWN_RESOURCE_PROFILE_ID) {
+        if (testing) {
+          throw new SparkException("ResourceProfile Id was UNKNOWN, this is not expected")
+        }
+        logWarning(s"Not removing executor $executorIdsToBeRemoved because the " +
+          "ResourceProfile was UNKNOWN!")
       } else {
-        executorIdsToBeRemoved += executorIdToBeRemoved
-        newExecutorTotal -= 1
+        // get the running total as we remove or initialize it to the count - pendingRemoval
+        val newExecutorTotal = numExecutorsTotalPerRpId.getOrElseUpdate(rpId,
+          (executorMonitor.executorCountWithResourceProfile(rpId) -
+            executorMonitor.pendingRemovalCountPerResourceProfileId(rpId)))
+        if (newExecutorTotal - 1 < minNumExecutors) {
+          logDebug(s"Not removing idle executor $executorIdToBeRemoved because there " +
+            s"are only $newExecutorTotal executor(s) left (minimum number of executor limit " +
+            s"$minNumExecutors)")
+        } else if (newExecutorTotal - 1 < numExecutorsTargetPerResourceProfileId(rpId)) {
+          logDebug(s"Not removing idle executor $executorIdToBeRemoved because there " +
+            s"are only $newExecutorTotal executor(s) left (number of executor " +
+            s"target ${numExecutorsTargetPerResourceProfileId(rpId)})")
+        } else {
+          executorIdsToBeRemoved += executorIdToBeRemoved
+          numExecutorsTotalPerRpId(rpId) -= 1
+        }
       }
     }
 
@@ -456,14 +556,15 @@ private[spark] class ExecutorAllocationManager(
 
     // [SPARK-21834] killExecutors api reduces the target number of executors.
     // So we need to update the target with desired value.
-    client.requestTotalExecutors(numExecutorsTarget, localityAwareTasks, hostToLocalTaskCount)
+    client.requestTotalExecutors(
+      numExecutorsTargetPerResourceProfileId.toMap,
+      numLocalityAwareTasksPerResourceProfileId.toMap,
+      rpIdToHostToLocalTaskCount)
+
     // reset the newExecutorTotal to the existing number of executors
-    newExecutorTotal = numExistingExecutors
     if (testing || executorsRemoved.nonEmpty) {
-      newExecutorTotal -= executorsRemoved.size
       executorMonitor.executorsKilled(executorsRemoved)
-      logInfo(s"Executors ${executorsRemoved.mkString(",")} removed due to idle timeout." +
-        s"(new desired total will be $newExecutorTotal)")
+      logInfo(s"Executors ${executorsRemoved.mkString(",")} removed due to idle timeout.")
       executorsRemoved
     } else {
       logWarning(s"Unable to reach the cluster manager to kill executor/s " +
@@ -481,7 +582,7 @@ private[spark] class ExecutorAllocationManager(
     if (addTime == NOT_SET) {
       logDebug(s"Starting timer to add executors because pending tasks " +
         s"are building up (to expire in $schedulerBacklogTimeoutS seconds)")
-      addTime = clock.getTimeMillis + schedulerBacklogTimeoutS * 1000
+      addTime = clock.nanoTime() + TimeUnit.SECONDS.toNanos(schedulerBacklogTimeoutS)
     }
   }
 
@@ -492,7 +593,7 @@ private[spark] class ExecutorAllocationManager(
   private def onSchedulerQueueEmpty(): Unit = synchronized {
     logDebug("Clearing timer to add executors because there are no more pending tasks")
     addTime = NOT_SET
-    numExecutorsToAdd = 1
+    numExecutorsToAddPerResourceProfileId.transform { case (_, _) => 1 }
   }
 
   private case class StageAttempt(stageId: Int, stageAttemptId: Int) {
@@ -512,18 +613,22 @@ private[spark] class ExecutorAllocationManager(
     // Should be 0 when no stages are active.
     private val stageAttemptToNumRunningTask = new mutable.HashMap[StageAttempt, Int]
     private val stageAttemptToTaskIndices = new mutable.HashMap[StageAttempt, mutable.HashSet[Int]]
-    // Number of speculative tasks to be scheduled in each stageAttempt
+    // Number of speculative tasks pending/running in each stageAttempt
     private val stageAttemptToNumSpeculativeTasks = new mutable.HashMap[StageAttempt, Int]
     // The speculative tasks started in each stageAttempt
     private val stageAttemptToSpeculativeTaskIndices =
       new mutable.HashMap[StageAttempt, mutable.HashSet[Int]]
 
+    private val resourceProfileIdToStageAttempt =
+      new mutable.HashMap[Int, mutable.Set[StageAttempt]]
+
     // stageAttempt to tuple (the number of task with locality preferences, a map where each pair
-    // is a node and the number of tasks that would like to be scheduled on that node) map,
+    // is a node and the number of tasks that would like to be scheduled on that node, and
+    // the resource profile id) map,
     // maintain the executor placement hints for each stageAttempt used by resource framework
     // to better place the executors.
     private val stageAttemptToExecutorPlacementHints =
-      new mutable.HashMap[StageAttempt, (Int, Map[String, Int])]
+      new mutable.HashMap[StageAttempt, (Int, Map[String, Int], Int)]
 
     override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
       initializing = false
@@ -534,6 +639,13 @@ private[spark] class ExecutorAllocationManager(
       allocationManager.synchronized {
         stageAttemptToNumTasks(stageAttempt) = numTasks
         allocationManager.onSchedulerBacklogged()
+        // need to keep stage task requirements to ask for the right containers
+        val profId = stageSubmitted.stageInfo.resourceProfileId
+        logDebug(s"Stage resource profile id is: $profId with numTasks: $numTasks")
+        resourceProfileIdToStageAttempt.getOrElseUpdate(
+          profId, new mutable.HashSet[StageAttempt]) += stageAttempt
+        numExecutorsToAddPerResourceProfileId.getOrElseUpdate(profId, 1)
+        numExecutorsTargetPerResourceProfileId.getOrElseUpdate(profId, initialNumExecutors)
 
         // Compute the number of tasks requested by the stage on each host
         var numTasksPending = 0
@@ -548,7 +660,7 @@ private[spark] class ExecutorAllocationManager(
           }
         }
         stageAttemptToExecutorPlacementHints.put(stageAttempt,
-          (numTasksPending, hostToLocalTaskCountPerStage.toMap))
+          (numTasksPending, hostToLocalTaskCountPerStage.toMap, profId))
 
         // Update the executor placement hints
         updateExecutorPlacementHints()
@@ -560,7 +672,7 @@ private[spark] class ExecutorAllocationManager(
       val stageAttemptId = stageCompleted.stageInfo.attemptNumber()
       val stageAttempt = StageAttempt(stageId, stageAttemptId)
       allocationManager.synchronized {
-        // do NOT remove stageAttempt from stageAttemptToNumRunningTasks,
+        // do NOT remove stageAttempt from stageAttemptToNumRunningTask
         // because the attempt may still have running tasks,
         // even after another attempt for the stage is submitted.
         stageAttemptToNumTasks -= stageAttempt
@@ -596,7 +708,7 @@ private[spark] class ExecutorAllocationManager(
           stageAttemptToTaskIndices.getOrElseUpdate(stageAttempt,
             new mutable.HashSet[Int]) += taskIndex
         }
-        if (totalPendingTasks() == 0) {
+        if (!hasPendingTasks) {
           allocationManager.onSchedulerQueueEmpty()
         }
       }
@@ -612,20 +724,45 @@ private[spark] class ExecutorAllocationManager(
           stageAttemptToNumRunningTask(stageAttempt) -= 1
           if (stageAttemptToNumRunningTask(stageAttempt) == 0) {
             stageAttemptToNumRunningTask -= stageAttempt
+            if (!stageAttemptToNumTasks.contains(stageAttempt)) {
+              val rpForStage = resourceProfileIdToStageAttempt.filter { case (k, v) =>
+                v.contains(stageAttempt)
+              }.keys
+              if (rpForStage.size == 1) {
+                // be careful about the removal from here due to late tasks, make sure stage is
+                // really complete and no tasks left
+                resourceProfileIdToStageAttempt(rpForStage.head) -= stageAttempt
+              } else {
+                logWarning(s"Should have exactly one resource profile for stage $stageAttempt," +
+                  s" but have $rpForStage")
+              }
+            }
+
           }
         }
-        // If the task failed, we expect it to be resubmitted later. To ensure we have
-        // enough resources to run the resubmitted task, we need to mark the scheduler
-        // as backlogged again if it's not already marked as such (SPARK-8366)
-        if (taskEnd.reason != Success) {
-          if (totalPendingTasks() == 0) {
-            allocationManager.onSchedulerBacklogged()
-          }
-          if (taskEnd.taskInfo.speculative) {
-            stageAttemptToSpeculativeTaskIndices.get(stageAttempt).foreach {_.remove(taskIndex)}
-          } else {
-            stageAttemptToTaskIndices.get(stageAttempt).foreach {_.remove(taskIndex)}
-          }
+        if (taskEnd.taskInfo.speculative) {
+          stageAttemptToSpeculativeTaskIndices.get(stageAttempt).foreach {_.remove{taskIndex}}
+          stageAttemptToNumSpeculativeTasks(stageAttempt) -= 1
+        }
+
+        taskEnd.reason match {
+          case Success | _: TaskKilled =>
+          case _ =>
+            if (!hasPendingTasks) {
+              // If the task failed (not intentionally killed), we expect it to be resubmitted
+              // later. To ensure we have enough resources to run the resubmitted task, we need to
+              // mark the scheduler as backlogged again if it's not already marked as such
+              // (SPARK-8366)
+              allocationManager.onSchedulerBacklogged()
+            }
+            if (!taskEnd.taskInfo.speculative) {
+              // If a non-speculative task is intentionally killed, it means the speculative task
+              // has succeeded, and no further task of this task index will be resubmitted. In this
+              // case, the task index is completed and we shouldn't remove it from
+              // stageAttemptToTaskIndices. Otherwise, we will have a pending non-speculative task
+              // for the task index (SPARK-30511)
+              stageAttemptToTaskIndices.get(stageAttempt).foreach {_.remove(taskIndex)}
+            }
         }
       }
     }
@@ -648,20 +785,46 @@ private[spark] class ExecutorAllocationManager(
      *
      * Note: This is not thread-safe without the caller owning the `allocationManager` lock.
      */
-    def pendingTasks(): Int = {
-      stageAttemptToNumTasks.map { case (stageAttempt, numTasks) =>
-        numTasks - stageAttemptToTaskIndices.get(stageAttempt).map(_.size).getOrElse(0)
-      }.sum
+    def pendingTasksPerResourceProfile(rpId: Int): Int = {
+      val attempts = resourceProfileIdToStageAttempt.getOrElse(rpId, Set.empty).toSeq
+      attempts.map(attempt => getPendingTaskSum(attempt)).sum
     }
 
-    def pendingSpeculativeTasks(): Int = {
-      stageAttemptToNumSpeculativeTasks.map { case (stageAttempt, numTasks) =>
-        numTasks - stageAttemptToSpeculativeTaskIndices.get(stageAttempt).map(_.size).getOrElse(0)
-      }.sum
+    def hasPendingRegularTasks: Boolean = {
+      val attemptSets = resourceProfileIdToStageAttempt.values
+      attemptSets.exists(attempts => attempts.exists(getPendingTaskSum(_) > 0))
     }
 
-    def totalPendingTasks(): Int = {
-      pendingTasks + pendingSpeculativeTasks
+    private def getPendingTaskSum(attempt: StageAttempt): Int = {
+      val numTotalTasks = stageAttemptToNumTasks.getOrElse(attempt, 0)
+      val numRunning = stageAttemptToTaskIndices.get(attempt).map(_.size).getOrElse(0)
+      numTotalTasks - numRunning
+    }
+
+    def pendingSpeculativeTasksPerResourceProfile(rp: Int): Int = {
+      val attempts = resourceProfileIdToStageAttempt.getOrElse(rp, Set.empty).toSeq
+      attempts.map(attempt => getPendingSpeculativeTaskSum(attempt)).sum
+    }
+
+    def hasPendingSpeculativeTasks: Boolean = {
+      val attemptSets = resourceProfileIdToStageAttempt.values
+      attemptSets.exists { attempts =>
+        attempts.exists(getPendingSpeculativeTaskSum(_) > 0)
+      }
+    }
+
+    private def getPendingSpeculativeTaskSum(attempt: StageAttempt): Int = {
+      val numTotalTasks = stageAttemptToNumSpeculativeTasks.getOrElse(attempt, 0)
+      val numRunning = stageAttemptToSpeculativeTaskIndices.get(attempt).map(_.size).getOrElse(0)
+      numTotalTasks - numRunning
+    }
+
+    def hasPendingTasks: Boolean = {
+      hasPendingSpeculativeTasks || hasPendingRegularTasks
+    }
+
+    def totalPendingTasksPerResourceProfile(rp: Int): Int = {
+      pendingTasksPerResourceProfile(rp) + pendingSpeculativeTasksPerResourceProfile(rp)
     }
 
     /**
@@ -672,6 +835,14 @@ private[spark] class ExecutorAllocationManager(
       stageAttemptToNumRunningTask.values.sum
     }
 
+    def totalRunningTasksPerResourceProfile(rp: Int): Int = {
+      val attempts = resourceProfileIdToStageAttempt.getOrElse(rp, Set.empty).toSeq
+      // attempts is a Set, change to Seq so we keep all values
+      attempts.map { attempt =>
+        stageAttemptToNumRunningTask.getOrElseUpdate(attempt, 0)
+      }.sum
+    }
+
     /**
      * Update the Executor placement hints (the number of tasks with locality preferences,
      * a map where each pair is a node and the number of tasks that would like to be scheduled
@@ -681,18 +852,27 @@ private[spark] class ExecutorAllocationManager(
      * granularity within stages.
      */
     def updateExecutorPlacementHints(): Unit = {
-      var localityAwareTasks = 0
-      val localityToCount = new mutable.HashMap[String, Int]()
-      stageAttemptToExecutorPlacementHints.values.foreach { case (numTasksPending, localities) =>
-        localityAwareTasks += numTasksPending
-        localities.foreach { case (hostname, count) =>
-          val updatedCount = localityToCount.getOrElse(hostname, 0) + count
-          localityToCount(hostname) = updatedCount
-        }
+      val localityAwareTasksPerResourceProfileId = new mutable.HashMap[Int, Int]
+
+      // ResourceProfile id => map[host, count]
+      val rplocalityToCount = new mutable.HashMap[Int, mutable.HashMap[String, Int]]()
+      stageAttemptToExecutorPlacementHints.values.foreach {
+        case (numTasksPending, localities, rpId) =>
+          val rpNumPending =
+            localityAwareTasksPerResourceProfileId.getOrElse(rpId, 0)
+          localityAwareTasksPerResourceProfileId(rpId) = rpNumPending + numTasksPending
+          localities.foreach { case (hostname, count) =>
+            val rpBasedHostToCount =
+              rplocalityToCount.getOrElseUpdate(rpId, new mutable.HashMap[String, Int])
+            val newUpdated = rpBasedHostToCount.getOrElse(hostname, 0) + count
+            rpBasedHostToCount(hostname) = newUpdated
+          }
       }
 
-      allocationManager.localityAwareTasks = localityAwareTasks
-      allocationManager.hostToLocalTaskCount = localityToCount.toMap
+      allocationManager.numLocalityAwareTasksPerResourceProfileId =
+        localityAwareTasksPerResourceProfileId
+      allocationManager.rpIdToHostToLocalTaskCount =
+        rplocalityToCount.map { case (k, v) => (k, v.toMap)}.toMap
     }
   }
 
@@ -713,14 +893,22 @@ private[spark] class ExecutorAllocationManager(
       })
     }
 
-    registerGauge("numberExecutorsToAdd", numExecutorsToAdd, 0)
+    // The metrics are going to return the sum for all the different ResourceProfiles.
+    registerGauge("numberExecutorsToAdd",
+      numExecutorsToAddPerResourceProfileId.values.sum, 0)
     registerGauge("numberExecutorsPendingToRemove", executorMonitor.pendingRemovalCount, 0)
     registerGauge("numberAllExecutors", executorMonitor.executorCount, 0)
-    registerGauge("numberTargetExecutors", numExecutorsTarget, 0)
-    registerGauge("numberMaxNeededExecutors", maxNumExecutorsNeeded(), 0)
+    registerGauge("numberTargetExecutors",
+      numExecutorsTargetPerResourceProfileId.values.sum, 0)
+    registerGauge("numberMaxNeededExecutors", numExecutorsTargetPerResourceProfileId.keys
+        .map(maxNumExecutorsNeededPerResourceProfile(_)).sum, 0)
   }
 }
 
 private object ExecutorAllocationManager {
   val NOT_SET = Long.MaxValue
+
+  // helper case class for requesting executors, here to be visible for testing
+  private[spark] case class TargetNumUpdates(delta: Int, oldNumExecutorsTarget: Int)
+
 }
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
index 8230533f9d245..4bdcafce0d75a 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -115,7 +115,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
 
   @volatile private var _cancelled: Boolean = false
 
-  override def cancel() {
+  override def cancel(): Unit = {
     _cancelled = true
     jobWaiter.cancel()
   }
@@ -132,7 +132,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
     value.get.get
   }
 
-  override def onComplete[U](func: (Try[T]) => U)(implicit executor: ExecutionContext) {
+  override def onComplete[U](func: (Try[T]) => U)(implicit executor: ExecutionContext): Unit = {
     jobWaiter.completionFuture onComplete {_ => func(value.get)}
   }
 
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index 20224eb721c09..2ac72e66d6f32 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -27,6 +27,9 @@ import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config.Network
 import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+import org.apache.spark.scheduler.local.LocalSchedulerBackend
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util._
 
@@ -199,14 +202,30 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
       if (now - lastSeenMs > executorTimeoutMs) {
         logWarning(s"Removing executor $executorId with no recent heartbeats: " +
           s"${now - lastSeenMs} ms exceeds timeout $executorTimeoutMs ms")
-        scheduler.executorLost(executorId, SlaveLost("Executor heartbeat " +
-          s"timed out after ${now - lastSeenMs} ms"))
-          // Asynchronously kill the executor to avoid blocking the current thread
+        // Asynchronously kill the executor to avoid blocking the current thread
         killExecutorThread.submit(new Runnable {
           override def run(): Unit = Utils.tryLogNonFatalError {
             // Note: we want to get an executor back after expiring this one,
             // so do not simply call `sc.killExecutor` here (SPARK-8119)
             sc.killAndReplaceExecutor(executorId)
+            // SPARK-27348: in case of the executors which are not gracefully shut down,
+            // we should remove lost executors from CoarseGrainedSchedulerBackend manually
+            // here to guarantee two things:
+            // 1) explicitly remove executor information from CoarseGrainedSchedulerBackend for
+            //    a lost executor instead of waiting for disconnect message
+            // 2) call scheduler.executorLost() underlying to fail any tasks assigned to
+            //    those executors to avoid app hang
+            sc.schedulerBackend match {
+              case backend: CoarseGrainedSchedulerBackend =>
+                backend.driverEndpoint.send(RemoveExecutor(executorId,
+                  SlaveLost(s"Executor heartbeat timed out after ${now - lastSeenMs} ms")))
+
+              // LocalSchedulerBackend is used locally and only has one single executor
+              case _: LocalSchedulerBackend =>
+
+              case other => throw new UnsupportedOperationException(
+                s"Unknown scheduler backend: ${other.getClass}")
+            }
           }
         })
         executorLastSeen.remove(executorId)
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index d878fc527791a..f229061a6d0f6 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -17,22 +17,25 @@
 
 package org.apache.spark
 
-import java.io._
+import java.io.{ByteArrayInputStream, ObjectInputStream, ObjectOutputStream}
 import java.util.concurrent.{ConcurrentHashMap, LinkedBlockingQueue, ThreadPoolExecutor, TimeUnit}
-import java.util.zip.{GZIPInputStream, GZIPOutputStream}
+import java.util.concurrent.locks.ReentrantReadWriteLock
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.{HashMap, HashSet, ListBuffer, Map}
+import scala.collection.mutable.{HashMap, ListBuffer, Map}
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
+import org.apache.commons.io.output.{ByteArrayOutputStream => ApacheByteArrayOutputStream}
+
 import org.apache.spark.broadcast.{Broadcast, BroadcastManager}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.io.CompressionCodec
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv}
-import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.scheduler.{ExecutorCacheTaskLocation, MapStatus}
 import org.apache.spark.shuffle.MetadataFetchFailedException
 import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId}
 import org.apache.spark.util._
@@ -41,14 +44,36 @@ import org.apache.spark.util._
  * Helper class used by the [[MapOutputTrackerMaster]] to perform bookkeeping for a single
  * ShuffleMapStage.
  *
- * This class maintains a mapping from mapIds to `MapStatus`. It also maintains a cache of
+ * This class maintains a mapping from map index to `MapStatus`. It also maintains a cache of
  * serialized map statuses in order to speed up tasks' requests for map output statuses.
  *
  * All public methods of this class are thread-safe.
  */
 private class ShuffleStatus(numPartitions: Int) {
 
-  // All accesses to the following state must be guarded with `this.synchronized`.
+  private val (readLock, writeLock) = {
+    val lock = new ReentrantReadWriteLock()
+    (lock.readLock(), lock.writeLock())
+  }
+
+  // All accesses to the following state must be guarded with `withReadLock` or `withWriteLock`.
+  private def withReadLock[B](fn: => B): B = {
+    readLock.lock()
+    try {
+      fn
+    } finally {
+      readLock.unlock()
+    }
+  }
+
+  private def withWriteLock[B](fn: => B): B = {
+    writeLock.lock()
+    try {
+      fn
+    } finally {
+      writeLock.unlock()
+    }
+  }
 
   /**
    * MapStatus for each partition. The index of the array is the map partition id.
@@ -88,12 +113,12 @@ private class ShuffleStatus(numPartitions: Int) {
    * Register a map output. If there is already a registered location for the map output then it
    * will be replaced by the new location.
    */
-  def addMapOutput(mapId: Int, status: MapStatus): Unit = synchronized {
-    if (mapStatuses(mapId) == null) {
+  def addMapOutput(mapIndex: Int, status: MapStatus): Unit = withWriteLock {
+    if (mapStatuses(mapIndex) == null) {
       _numAvailableOutputs += 1
       invalidateSerializedMapOutputStatusCache()
     }
-    mapStatuses(mapId) = status
+    mapStatuses(mapIndex) = status
   }
 
   /**
@@ -101,10 +126,10 @@ private class ShuffleStatus(numPartitions: Int) {
    * This is a no-op if there is no registered map output or if the registered output is from a
    * different block manager.
    */
-  def removeMapOutput(mapId: Int, bmAddress: BlockManagerId): Unit = synchronized {
-    if (mapStatuses(mapId) != null && mapStatuses(mapId).location == bmAddress) {
+  def removeMapOutput(mapIndex: Int, bmAddress: BlockManagerId): Unit = withWriteLock {
+    if (mapStatuses(mapIndex) != null && mapStatuses(mapIndex).location == bmAddress) {
       _numAvailableOutputs -= 1
-      mapStatuses(mapId) = null
+      mapStatuses(mapIndex) = null
       invalidateSerializedMapOutputStatusCache()
     }
   }
@@ -113,7 +138,7 @@ private class ShuffleStatus(numPartitions: Int) {
    * Removes all shuffle outputs associated with this host. Note that this will also remove
    * outputs which are served by an external shuffle server (if one exists).
    */
-  def removeOutputsOnHost(host: String): Unit = {
+  def removeOutputsOnHost(host: String): Unit = withWriteLock {
     removeOutputsByFilter(x => x.host == host)
   }
 
@@ -122,7 +147,7 @@ private class ShuffleStatus(numPartitions: Int) {
    * remove outputs which are served by an external shuffle server (if one exists), as they are
    * still registered with that execId.
    */
-  def removeOutputsOnExecutor(execId: String): Unit = synchronized {
+  def removeOutputsOnExecutor(execId: String): Unit = withWriteLock {
     removeOutputsByFilter(x => x.executorId == execId)
   }
 
@@ -130,11 +155,11 @@ private class ShuffleStatus(numPartitions: Int) {
    * Removes all shuffle outputs which satisfies the filter. Note that this will also
    * remove outputs which are served by an external shuffle server (if one exists).
    */
-  def removeOutputsByFilter(f: (BlockManagerId) => Boolean): Unit = synchronized {
-    for (mapId <- 0 until mapStatuses.length) {
-      if (mapStatuses(mapId) != null && f(mapStatuses(mapId).location)) {
+  def removeOutputsByFilter(f: BlockManagerId => Boolean): Unit = withWriteLock {
+    for (mapIndex <- mapStatuses.indices) {
+      if (mapStatuses(mapIndex) != null && f(mapStatuses(mapIndex).location)) {
         _numAvailableOutputs -= 1
-        mapStatuses(mapId) = null
+        mapStatuses(mapIndex) = null
         invalidateSerializedMapOutputStatusCache()
       }
     }
@@ -143,14 +168,14 @@ private class ShuffleStatus(numPartitions: Int) {
   /**
    * Number of partitions that have shuffle outputs.
    */
-  def numAvailableOutputs: Int = synchronized {
+  def numAvailableOutputs: Int = withReadLock {
     _numAvailableOutputs
   }
 
   /**
    * Returns the sequence of partition ids that are missing (i.e. needs to be computed).
    */
-  def findMissingPartitions(): Seq[Int] = synchronized {
+  def findMissingPartitions(): Seq[Int] = withReadLock {
     val missing = (0 until numPartitions).filter(id => mapStatuses(id) == null)
     assert(missing.size == numPartitions - _numAvailableOutputs,
       s"${missing.size} missing, expected ${numPartitions - _numAvailableOutputs}")
@@ -169,18 +194,32 @@ private class ShuffleStatus(numPartitions: Int) {
   def serializedMapStatus(
       broadcastManager: BroadcastManager,
       isLocal: Boolean,
-      minBroadcastSize: Int): Array[Byte] = synchronized {
-    if (cachedSerializedMapStatus eq null) {
-      val serResult = MapOutputTracker.serializeMapStatuses(
-          mapStatuses, broadcastManager, isLocal, minBroadcastSize)
-      cachedSerializedMapStatus = serResult._1
-      cachedSerializedBroadcast = serResult._2
+      minBroadcastSize: Int,
+      conf: SparkConf): Array[Byte] = {
+    var result: Array[Byte] = null
+
+    withReadLock {
+      if (cachedSerializedMapStatus != null) {
+        result = cachedSerializedMapStatus
+      }
     }
-    cachedSerializedMapStatus
+
+    if (result == null) withWriteLock {
+      if (cachedSerializedMapStatus == null) {
+        val serResult = MapOutputTracker.serializeMapStatuses(
+          mapStatuses, broadcastManager, isLocal, minBroadcastSize, conf)
+        cachedSerializedMapStatus = serResult._1
+        cachedSerializedBroadcast = serResult._2
+      }
+      // The following line has to be outside if statement since it's possible that another thread
+      // initializes cachedSerializedMapStatus in-between `withReadLock` and `withWriteLock`.
+      result = cachedSerializedMapStatus
+    }
+    result
   }
 
   // Used in testing.
-  def hasCachedSerializedBroadcast: Boolean = synchronized {
+  def hasCachedSerializedBroadcast: Boolean = withReadLock {
     cachedSerializedBroadcast != null
   }
 
@@ -188,14 +227,14 @@ private class ShuffleStatus(numPartitions: Int) {
    * Helper function which provides thread-safe access to the mapStatuses array.
    * The function should NOT mutate the array.
    */
-  def withMapStatuses[T](f: Array[MapStatus] => T): T = synchronized {
+  def withMapStatuses[T](f: Array[MapStatus] => T): T = withReadLock {
     f(mapStatuses)
   }
 
   /**
    * Clears the cached serialized map output statuses.
    */
-  def invalidateSerializedMapOutputStatusCache(): Unit = synchronized {
+  def invalidateSerializedMapOutputStatusCache(): Unit = withWriteLock {
     if (cachedSerializedBroadcast != null) {
       // Prevent errors during broadcast cleanup from crashing the DAGScheduler (see SPARK-21444)
       Utils.tryLogNonFatalError {
@@ -272,7 +311,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   }
 
   /** Send a one-way message to the trackerEndpoint, to which we expect it to reply with true. */
-  protected def sendTracker(message: Any) {
+  protected def sendTracker(message: Any): Unit = {
     val response = askTracker[Boolean](message)
     if (response != true) {
       throw new SparkException(
@@ -282,7 +321,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
 
   // For testing
   def getMapSizesByExecutorId(shuffleId: Int, reduceId: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+      : Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
     getMapSizesByExecutorId(shuffleId, reduceId, reduceId + 1)
   }
 
@@ -292,18 +331,39 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
    * endPartition is excluded from the range).
    *
    * @return A sequence of 2-item tuples, where the first item in the tuple is a BlockManagerId,
-   *         and the second item is a sequence of (shuffle block id, shuffle block size) tuples
-   *         describing the shuffle blocks that are stored at that block manager.
+   *         and the second item is a sequence of (shuffle block id, shuffle block size, map index)
+   *         tuples describing the shuffle blocks that are stored at that block manager.
    */
-  def getMapSizesByExecutorId(shuffleId: Int, startPartition: Int, endPartition: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])]
+  def getMapSizesByExecutorId(
+      shuffleId: Int,
+      startPartition: Int,
+      endPartition: Int)
+  : Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]
+
+  /**
+   * Called from executors to get the server URIs and output sizes for each shuffle block that
+   * needs to be read from a given range of map output partitions (startPartition is included but
+   * endPartition is excluded from the range) and is produced by
+   * a range of mappers (startMapIndex, endMapIndex, startMapIndex is included and
+   * the endMapIndex is excluded).
+   *
+   * @return A sequence of 2-item tuples, where the first item in the tuple is a BlockManagerId,
+   *         and the second item is a sequence of (shuffle block id, shuffle block size, map index)
+   *         tuples describing the shuffle blocks that are stored at that block manager.
+   */
+  def getMapSizesByRange(
+      shuffleId: Int,
+      startMapIndex: Int,
+      endMapIndex: Int,
+      startPartition: Int,
+      endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]
 
   /**
    * Deletes map output status information for the specified shuffle stage.
    */
   def unregisterShuffle(shuffleId: Int): Unit
 
-  def stop() {}
+  def stop(): Unit = {}
 }
 
 /**
@@ -317,8 +377,8 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
  */
 private[spark] class MapOutputTrackerMaster(
     conf: SparkConf,
-    broadcastManager: BroadcastManager,
-    isLocal: Boolean)
+    private[spark] val broadcastManager: BroadcastManager,
+    private[spark] val isLocal: Boolean)
   extends MapOutputTracker(conf) {
 
   // The size at which we use Broadcast to send the map output statuses to the executors
@@ -393,7 +453,8 @@ private[spark] class MapOutputTrackerMaster(
               " to " + hostPort)
             val shuffleStatus = shuffleStatuses.get(shuffleId).head
             context.reply(
-              shuffleStatus.serializedMapStatus(broadcastManager, isLocal, minSizeForBroadcast))
+              shuffleStatus.serializedMapStatus(broadcastManager, isLocal, minSizeForBroadcast,
+                conf))
           } catch {
             case NonFatal(e) => logError(e.getMessage, e)
           }
@@ -412,21 +473,21 @@ private[spark] class MapOutputTrackerMaster(
     shuffleStatuses.valuesIterator.count(_.hasCachedSerializedBroadcast)
   }
 
-  def registerShuffle(shuffleId: Int, numMaps: Int) {
+  def registerShuffle(shuffleId: Int, numMaps: Int): Unit = {
     if (shuffleStatuses.put(shuffleId, new ShuffleStatus(numMaps)).isDefined) {
       throw new IllegalArgumentException("Shuffle ID " + shuffleId + " registered twice")
     }
   }
 
-  def registerMapOutput(shuffleId: Int, mapId: Int, status: MapStatus) {
-    shuffleStatuses(shuffleId).addMapOutput(mapId, status)
+  def registerMapOutput(shuffleId: Int, mapIndex: Int, status: MapStatus): Unit = {
+    shuffleStatuses(shuffleId).addMapOutput(mapIndex, status)
   }
 
   /** Unregister map output information of the given shuffle, mapper and block manager */
-  def unregisterMapOutput(shuffleId: Int, mapId: Int, bmAddress: BlockManagerId) {
+  def unregisterMapOutput(shuffleId: Int, mapIndex: Int, bmAddress: BlockManagerId): Unit = {
     shuffleStatuses.get(shuffleId) match {
       case Some(shuffleStatus) =>
-        shuffleStatus.removeMapOutput(mapId, bmAddress)
+        shuffleStatus.removeMapOutput(mapIndex, bmAddress)
         incrementEpoch()
       case None =>
         throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
@@ -434,7 +495,7 @@ private[spark] class MapOutputTrackerMaster(
   }
 
   /** Unregister all map output information of the given shuffle. */
-  def unregisterAllMapOutput(shuffleId: Int) {
+  def unregisterAllMapOutput(shuffleId: Int): Unit = {
     shuffleStatuses.get(shuffleId) match {
       case Some(shuffleStatus) =>
         shuffleStatus.removeOutputsByFilter(x => true)
@@ -446,7 +507,7 @@ private[spark] class MapOutputTrackerMaster(
   }
 
   /** Unregister shuffle data */
-  def unregisterShuffle(shuffleId: Int) {
+  def unregisterShuffle(shuffleId: Int): Unit = {
     shuffleStatuses.remove(shuffleId).foreach { shuffleStatus =>
       shuffleStatus.invalidateSerializedMapOutputStatusCache()
     }
@@ -629,7 +690,36 @@ private[spark] class MapOutputTrackerMaster(
     None
   }
 
-  def incrementEpoch() {
+  /**
+   * Return the locations where the Mappers ran. The locations each includes both a host and an
+   * executor id on that host.
+   *
+   * @param dep shuffle dependency object
+   * @param startMapIndex the start map index
+   * @param endMapIndex the end map index
+   * @return a sequence of locations where task runs.
+   */
+  def getMapLocation(
+      dep: ShuffleDependency[_, _, _],
+      startMapIndex: Int,
+      endMapIndex: Int): Seq[String] =
+  {
+    val shuffleStatus = shuffleStatuses.get(dep.shuffleId).orNull
+    if (shuffleStatus != null) {
+      shuffleStatus.withMapStatuses { statuses =>
+        if (startMapIndex < endMapIndex && (startMapIndex >= 0 && endMapIndex < statuses.length)) {
+          val statusesPicked = statuses.slice(startMapIndex, endMapIndex).filter(_ != null)
+          statusesPicked.map(_.location.host).toSeq
+        } else {
+          Nil
+        }
+      }
+    } else {
+      Nil
+    }
+  }
+
+  def incrementEpoch(): Unit = {
     epochLock.synchronized {
       epoch += 1
       logDebug("Increasing epoch to " + epoch)
@@ -645,20 +735,43 @@ private[spark] class MapOutputTrackerMaster(
 
   // Get blocks sizes by executor Id. Note that zero-sized blocks are excluded in the result.
   // This method is only called in local-mode.
-  def getMapSizesByExecutorId(shuffleId: Int, startPartition: Int, endPartition: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+  def getMapSizesByExecutorId(
+      shuffleId: Int,
+      startPartition: Int,
+      endPartition: Int)
+  : Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
     logDebug(s"Fetching outputs for shuffle $shuffleId, partitions $startPartition-$endPartition")
     shuffleStatuses.get(shuffleId) match {
       case Some (shuffleStatus) =>
         shuffleStatus.withMapStatuses { statuses =>
-          MapOutputTracker.convertMapStatuses(shuffleId, startPartition, endPartition, statuses)
+          MapOutputTracker.convertMapStatuses(
+            shuffleId, startPartition, endPartition, statuses, 0, shuffleStatus.mapStatuses.length)
+        }
+      case None =>
+        Iterator.empty
+    }
+  }
+
+  override def getMapSizesByRange(
+      shuffleId: Int,
+      startMapIndex: Int,
+      endMapIndex: Int,
+      startPartition: Int,
+      endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+    logDebug(s"Fetching outputs for shuffle $shuffleId, mappers $startMapIndex-$endMapIndex" +
+      s"partitions $startPartition-$endPartition")
+    shuffleStatuses.get(shuffleId) match {
+      case Some(shuffleStatus) =>
+        shuffleStatus.withMapStatuses { statuses =>
+          MapOutputTracker.convertMapStatuses(
+            shuffleId, startPartition, endPartition, statuses, startMapIndex, endMapIndex)
         }
       case None =>
         Iterator.empty
     }
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     mapOutputRequests.offer(PoisonPill)
     threadpool.shutdown()
     sendTracker(StopMapOutputTracker)
@@ -685,12 +798,36 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
   private val fetchingLock = new KeyLock[Int]
 
   // Get blocks sizes by executor Id. Note that zero-sized blocks are excluded in the result.
-  override def getMapSizesByExecutorId(shuffleId: Int, startPartition: Int, endPartition: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+  override def getMapSizesByExecutorId(
+      shuffleId: Int,
+      startPartition: Int,
+      endPartition: Int)
+    : Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
     logDebug(s"Fetching outputs for shuffle $shuffleId, partitions $startPartition-$endPartition")
-    val statuses = getStatuses(shuffleId)
+    val statuses = getStatuses(shuffleId, conf)
+    try {
+      MapOutputTracker.convertMapStatuses(
+        shuffleId, startPartition, endPartition, statuses, 0, statuses.length)
+    } catch {
+      case e: MetadataFetchFailedException =>
+        // We experienced a fetch failure so our mapStatuses cache is outdated; clear it:
+        mapStatuses.clear()
+        throw e
+    }
+  }
+
+  override def getMapSizesByRange(
+      shuffleId: Int,
+      startMapIndex: Int,
+      endMapIndex: Int,
+      startPartition: Int,
+      endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+    logDebug(s"Fetching outputs for shuffle $shuffleId, mappers $startMapIndex-$endMapIndex" +
+      s"partitions $startPartition-$endPartition")
+    val statuses = getStatuses(shuffleId, conf)
     try {
-      MapOutputTracker.convertMapStatuses(shuffleId, startPartition, endPartition, statuses)
+      MapOutputTracker.convertMapStatuses(
+        shuffleId, startPartition, endPartition, statuses, startMapIndex, endMapIndex)
     } catch {
       case e: MetadataFetchFailedException =>
         // We experienced a fetch failure so our mapStatuses cache is outdated; clear it:
@@ -705,7 +842,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
    *
    * (It would be nice to remove this restriction in the future.)
    */
-  private def getStatuses(shuffleId: Int): Array[MapStatus] = {
+  private def getStatuses(shuffleId: Int, conf: SparkConf): Array[MapStatus] = {
     val statuses = mapStatuses.get(shuffleId).orNull
     if (statuses == null) {
       logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
@@ -715,7 +852,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
         if (fetchedStatuses == null) {
           logInfo("Doing the fetch; tracker endpoint = " + trackerEndpoint)
           val fetchedBytes = askTracker[Array[Byte]](GetMapOutputStatuses(shuffleId))
-          fetchedStatuses = MapOutputTracker.deserializeMapStatuses(fetchedBytes)
+          fetchedStatuses = MapOutputTracker.deserializeMapStatuses(fetchedBytes, conf)
           logInfo("Got the output locations")
           mapStatuses.put(shuffleId, fetchedStatuses)
         }
@@ -757,13 +894,22 @@ private[spark] object MapOutputTracker extends Logging {
   private val BROADCAST = 1
 
   // Serialize an array of map output locations into an efficient byte format so that we can send
-  // it to reduce tasks. We do this by compressing the serialized bytes using GZIP. They will
+  // it to reduce tasks. We do this by compressing the serialized bytes using Zstd. They will
   // generally be pretty compressible because many map outputs will be on the same hostname.
-  def serializeMapStatuses(statuses: Array[MapStatus], broadcastManager: BroadcastManager,
-      isLocal: Boolean, minBroadcastSize: Int): (Array[Byte], Broadcast[Array[Byte]]) = {
-    val out = new ByteArrayOutputStream
+  def serializeMapStatuses(
+      statuses: Array[MapStatus],
+      broadcastManager: BroadcastManager,
+      isLocal: Boolean,
+      minBroadcastSize: Int,
+      conf: SparkConf): (Array[Byte], Broadcast[Array[Byte]]) = {
+    // Using `org.apache.commons.io.output.ByteArrayOutputStream` instead of the standard one
+    // This implementation doesn't reallocate the whole memory block but allocates
+    // additional buffers. This way no buffers need to be garbage collected and
+    // the contents don't have to be copied to the new buffer.
+    val out = new ApacheByteArrayOutputStream()
     out.write(DIRECT)
-    val objOut = new ObjectOutputStream(new GZIPOutputStream(out))
+    val codec = CompressionCodec.createCodec(conf, conf.get(MAP_STATUS_COMPRESSION_CODEC))
+    val objOut = new ObjectOutputStream(codec.compressedOutputStream(out))
     Utils.tryWithSafeFinally {
       // Since statuses can be modified in parallel, sync on it
       statuses.synchronized {
@@ -780,9 +926,12 @@ private[spark] object MapOutputTracker extends Logging {
       // toByteArray creates copy, so we can reuse out
       out.reset()
       out.write(BROADCAST)
-      val oos = new ObjectOutputStream(new GZIPOutputStream(out))
-      oos.writeObject(bcast)
-      oos.close()
+      val oos = new ObjectOutputStream(codec.compressedOutputStream(out))
+      Utils.tryWithSafeFinally {
+        oos.writeObject(bcast)
+      } {
+        oos.close()
+      }
       val outArr = out.toByteArray
       logInfo("Broadcast mapstatuses size = " + outArr.length + ", actual size = " + arr.length)
       (outArr, bcast)
@@ -792,11 +941,15 @@ private[spark] object MapOutputTracker extends Logging {
   }
 
   // Opposite of serializeMapStatuses.
-  def deserializeMapStatuses(bytes: Array[Byte]): Array[MapStatus] = {
+  def deserializeMapStatuses(bytes: Array[Byte], conf: SparkConf): Array[MapStatus] = {
     assert (bytes.length > 0)
 
     def deserializeObject(arr: Array[Byte], off: Int, len: Int): AnyRef = {
-      val objIn = new ObjectInputStream(new GZIPInputStream(
+      val codec = CompressionCodec.createCodec(conf, conf.get(MAP_STATUS_COMPRESSION_CODEC))
+      // The ZStd codec is wrapped in a `BufferedInputStream` which avoids overhead excessive
+      // of JNI call while trying to decompress small amount of data for each element
+      // of `MapStatuses`
+      val objIn = new ObjectInputStream(codec.compressedInputStream(
         new ByteArrayInputStream(arr, off, len)))
       Utils.tryWithSafeFinally {
         objIn.readObject()
@@ -832,19 +985,24 @@ private[spark] object MapOutputTracker extends Logging {
    * @param shuffleId Identifier for the shuffle
    * @param startPartition Start of map output partition ID range (included in range)
    * @param endPartition End of map output partition ID range (excluded from range)
-   * @param statuses List of map statuses, indexed by map ID.
+   * @param statuses List of map statuses, indexed by map partition index.
+   * @param startMapIndex Start Map index.
+   * @param endMapIndex End Map index.
    * @return A sequence of 2-item tuples, where the first item in the tuple is a BlockManagerId,
-   *         and the second item is a sequence of (shuffle block ID, shuffle block size) tuples
-   *         describing the shuffle blocks that are stored at that block manager.
+   *         and the second item is a sequence of (shuffle block id, shuffle block size, map index)
+   *         tuples describing the shuffle blocks that are stored at that block manager.
    */
   def convertMapStatuses(
       shuffleId: Int,
       startPartition: Int,
       endPartition: Int,
-      statuses: Array[MapStatus]): Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+      statuses: Array[MapStatus],
+      startMapIndex : Int,
+      endMapIndex: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
     assert (statuses != null)
-    val splitsByAddress = new HashMap[BlockManagerId, ListBuffer[(BlockId, Long)]]
-    for ((status, mapId) <- statuses.iterator.zipWithIndex) {
+    val splitsByAddress = new HashMap[BlockManagerId, ListBuffer[(BlockId, Long, Int)]]
+    val iter = statuses.iterator.zipWithIndex
+    for ((status, mapIndex) <- iter.slice(startMapIndex, endMapIndex)) {
       if (status == null) {
         val errorMessage = s"Missing an output location for shuffle $shuffleId"
         logError(errorMessage)
@@ -854,11 +1012,12 @@ private[spark] object MapOutputTracker extends Logging {
           val size = status.getSizeForBlock(part)
           if (size != 0) {
             splitsByAddress.getOrElseUpdate(status.location, ListBuffer()) +=
-                ((ShuffleBlockId(shuffleId, mapId, part), size))
+              ((ShuffleBlockId(shuffleId, status.mapId, part), size, mapIndex))
           }
         }
       }
     }
+
     splitsByAddress.iterator
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 77db0f5d0eaa7..d061627bea69c 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -108,12 +108,12 @@ private[spark] class SecurityManager(
    * Admin acls should be set before the view or modify acls.  If you modify the admin
    * acls you should also set the view and modify acls again to pick up the changes.
    */
-  def setViewAcls(defaultUsers: Set[String], allowedUsers: Seq[String]) {
+  def setViewAcls(defaultUsers: Set[String], allowedUsers: Seq[String]): Unit = {
     viewAcls = adminAcls ++ defaultUsers ++ allowedUsers
     logInfo("Changing view acls to: " + viewAcls.mkString(","))
   }
 
-  def setViewAcls(defaultUser: String, allowedUsers: Seq[String]) {
+  def setViewAcls(defaultUser: String, allowedUsers: Seq[String]): Unit = {
     setViewAcls(Set[String](defaultUser), allowedUsers)
   }
 
@@ -121,7 +121,7 @@ private[spark] class SecurityManager(
    * Admin acls groups should be set before the view or modify acls groups. If you modify the admin
    * acls groups you should also set the view and modify acls groups again to pick up the changes.
    */
-  def setViewAclsGroups(allowedUserGroups: Seq[String]) {
+  def setViewAclsGroups(allowedUserGroups: Seq[String]): Unit = {
     viewAclsGroups = adminAclsGroups ++ allowedUserGroups
     logInfo("Changing view acls groups to: " + viewAclsGroups.mkString(","))
   }
@@ -149,7 +149,7 @@ private[spark] class SecurityManager(
    * Admin acls should be set before the view or modify acls.  If you modify the admin
    * acls you should also set the view and modify acls again to pick up the changes.
    */
-  def setModifyAcls(defaultUsers: Set[String], allowedUsers: Seq[String]) {
+  def setModifyAcls(defaultUsers: Set[String], allowedUsers: Seq[String]): Unit = {
     modifyAcls = adminAcls ++ defaultUsers ++ allowedUsers
     logInfo("Changing modify acls to: " + modifyAcls.mkString(","))
   }
@@ -158,7 +158,7 @@ private[spark] class SecurityManager(
    * Admin acls groups should be set before the view or modify acls groups. If you modify the admin
    * acls groups you should also set the view and modify acls groups again to pick up the changes.
    */
-  def setModifyAclsGroups(allowedUserGroups: Seq[String]) {
+  def setModifyAclsGroups(allowedUserGroups: Seq[String]): Unit = {
     modifyAclsGroups = adminAclsGroups ++ allowedUserGroups
     logInfo("Changing modify acls groups to: " + modifyAclsGroups.mkString(","))
   }
@@ -186,7 +186,7 @@ private[spark] class SecurityManager(
    * Admin acls should be set before the view or modify acls.  If you modify the admin
    * acls you should also set the view and modify acls again to pick up the changes.
    */
-  def setAdminAcls(adminUsers: Seq[String]) {
+  def setAdminAcls(adminUsers: Seq[String]): Unit = {
     adminAcls = adminUsers.toSet
     logInfo("Changing admin acls to: " + adminAcls.mkString(","))
   }
@@ -195,12 +195,12 @@ private[spark] class SecurityManager(
    * Admin acls groups should be set before the view or modify acls groups. If you modify the admin
    * acls groups you should also set the view and modify acls groups again to pick up the changes.
    */
-  def setAdminAclsGroups(adminUserGroups: Seq[String]) {
+  def setAdminAclsGroups(adminUserGroups: Seq[String]): Unit = {
     adminAclsGroups = adminUserGroups.toSet
     logInfo("Changing admin acls groups to: " + adminAclsGroups.mkString(","))
   }
 
-  def setAcls(aclSetting: Boolean) {
+  def setAcls(aclSetting: Boolean): Unit = {
     aclsOn = aclSetting
     logInfo("Changing acls enabled to: " + aclsOn)
   }
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 24be54ec91828..40915e3904f7e 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -504,7 +504,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    * Checks for illegal or deprecated config settings. Throws an exception for the former. Not
    * idempotent - may mutate this conf object to convert deprecated settings to supported ones.
    */
-  private[spark] def validateSettings() {
+  private[spark] def validateSettings(): Unit = {
     if (contains("spark.local.dir")) {
       val msg = "Note that spark.local.dir will be overridden by the value set by " +
         "the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS" +
@@ -548,23 +548,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       }
     }
 
-    if (contains("spark.master") && get("spark.master").startsWith("yarn-")) {
-      val warning = s"spark.master ${get("spark.master")} is deprecated in Spark 2.0+, please " +
-        "instead use \"yarn\" with specified deploy mode."
-
-      get("spark.master") match {
-        case "yarn-cluster" =>
-          logWarning(warning)
-          set("spark.master", "yarn")
-          set(SUBMIT_DEPLOY_MODE, "cluster")
-        case "yarn-client" =>
-          logWarning(warning)
-          set("spark.master", "yarn")
-          set(SUBMIT_DEPLOY_MODE, "client")
-        case _ => // Any other unexpected master will be checked when creating scheduler backend.
-      }
-    }
-
     if (contains(SUBMIT_DEPLOY_MODE)) {
       get(SUBMIT_DEPLOY_MODE) match {
         case "cluster" | "client" =>
@@ -636,7 +619,9 @@ private[spark] object SparkConf extends Logging {
         "Not used anymore. Please use spark.shuffle.service.index.cache.size"),
       DeprecatedConfig("spark.yarn.credentials.file.retention.count", "2.4.0", "Not used anymore."),
       DeprecatedConfig("spark.yarn.credentials.file.retention.days", "2.4.0", "Not used anymore."),
-      DeprecatedConfig("spark.yarn.services", "3.0.0", "Feature no longer available.")
+      DeprecatedConfig("spark.yarn.services", "3.0.0", "Feature no longer available."),
+      DeprecatedConfig("spark.executor.plugins", "3.0.0",
+        "Feature replaced with new plugin API. See Monitoring documentation.")
     )
 
     Map(configs.map { cfg => (cfg.key -> cfg) } : _*)
@@ -699,7 +684,8 @@ private[spark] object SparkConf extends Logging {
     "spark.yarn.jars" -> Seq(
       AlternateConfig("spark.yarn.jar", "2.0")),
     MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM.key -> Seq(
-      AlternateConfig("spark.reducer.maxReqSizeShuffleToMem", "2.3")),
+      AlternateConfig("spark.reducer.maxReqSizeShuffleToMem", "2.3"),
+      AlternateConfig("spark.maxRemoteBlockSizeFetchToMem", "3.0")),
     LISTENER_BUS_EVENT_QUEUE_CAPACITY.key -> Seq(
       AlternateConfig("spark.scheduler.listenerbus.eventqueue.size", "2.3")),
     DRIVER_MEMORY_OVERHEAD.key -> Seq(
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 396d712bd739c..a47136ea36736 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -25,13 +25,13 @@ import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicReferenc
 
 import scala.collection.JavaConverters._
 import scala.collection.Map
+import scala.collection.immutable
 import scala.collection.mutable.HashMap
 import scala.language.implicitConversions
 import scala.reflect.{classTag, ClassTag}
 import scala.util.control.NonFatal
 
 import com.google.common.collect.MapMaker
-import org.apache.commons.lang3.SerializationUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.io.{ArrayWritable, BooleanWritable, BytesWritable, DoubleWritable, FloatWritable, IntWritable, LongWritable, NullWritable, Text, Writable}
@@ -43,22 +43,25 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
 import org.apache.spark.deploy.StandaloneResourceUtils._
-import org.apache.spark.executor.ExecutorMetrics
+import org.apache.spark.executor.{ExecutorMetrics, ExecutorMetricsSource}
 import org.apache.spark.input.{FixedLengthBinaryInputFormat, PortableDataStream, StreamInputFormat, WholeTextFileInputFormat}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests._
 import org.apache.spark.internal.config.UI._
+import org.apache.spark.internal.plugin.PluginContainer
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.metrics.source.JVMCPUSource
 import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd._
-import org.apache.spark.resource.{ResourceID, ResourceInformation}
+import org.apache.spark.resource._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
+import org.apache.spark.shuffle.ShuffleDataIOUtils
+import org.apache.spark.shuffle.api.ShuffleDriverComponents
 import org.apache.spark.status.{AppStatusSource, AppStatusStore}
 import org.apache.spark.status.api.v1.ThreadStackTrace
 import org.apache.spark.storage._
@@ -217,7 +220,10 @@ class SparkContext(config: SparkConf) extends Logging {
   private var _shutdownHookRef: AnyRef = _
   private var _statusStore: AppStatusStore = _
   private var _heartbeater: Heartbeater = _
-  private var _resources: scala.collection.immutable.Map[String, ResourceInformation] = _
+  private var _resources: immutable.Map[String, ResourceInformation] = _
+  private var _shuffleDriverComponents: ShuffleDriverComponents = _
+  private var _plugins: Option[PluginContainer] = None
+  private var _resourceProfileManager: ResourceProfileManager = _
 
   /* ------------------------------------------------------------------------------------- *
    | Accessors and public fields. These provide access to the internal state of the        |
@@ -320,6 +326,8 @@ class SparkContext(config: SparkConf) extends Logging {
     _dagScheduler = ds
   }
 
+  private[spark] def shuffleDriverComponents: ShuffleDriverComponents = _shuffleDriverComponents
+
   /**
    * A unique identifier for the Spark application.
    * Its format depends on the scheduler implementation.
@@ -337,6 +345,8 @@ class SparkContext(config: SparkConf) extends Logging {
   private[spark] def executorAllocationManager: Option[ExecutorAllocationManager] =
     _executorAllocationManager
 
+  private[spark] def resourceProfileManager: ResourceProfileManager = _resourceProfileManager
+
   private[spark] def cleaner: Option[ContextCleaner] = _cleaner
 
   private[spark] var checkpointDir: Option[String] = None
@@ -346,7 +356,7 @@ class SparkContext(config: SparkConf) extends Logging {
     override protected def childValue(parent: Properties): Properties = {
       // Note: make a clone such that changes in the parent properties aren't reflected in
       // the those of the children threads, which has confusing semantics (SPARK-10563).
-      SerializationUtils.clone(parent)
+      Utils.cloneProperties(parent)
     }
     override protected def initialValue(): Properties = new Properties()
   }
@@ -367,7 +377,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * @param logLevel The desired log level as a string.
    * Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
    */
-  def setLogLevel(logLevel: String) {
+  def setLogLevel(logLevel: String): Unit = {
     // let's allow lowercase or mixed case too
     val upperCased = logLevel.toUpperCase(Locale.ROOT)
     require(SparkContext.VALID_LOG_LEVELS.contains(upperCased),
@@ -438,13 +448,14 @@ class SparkContext(config: SparkConf) extends Logging {
     _eventLogCodec = {
       val compress = _conf.get(EVENT_LOG_COMPRESS)
       if (compress && isEventLogEnabled) {
-        Some(CompressionCodec.getCodecName(_conf)).map(CompressionCodec.getShortName)
+        Some(_conf.get(EVENT_LOG_COMPRESSION_CODEC)).map(CompressionCodec.getShortName)
       } else {
         None
       }
     }
 
     _listenerBus = new LiveListenerBus(_conf)
+    _resourceProfileManager = new ResourceProfileManager(_conf)
 
     // Initialize the app status store and listener before SparkEnv is created so that it gets
     // all events.
@@ -525,11 +536,19 @@ class SparkContext(config: SparkConf) extends Logging {
     executorEnvs ++= _conf.getExecutorEnv
     executorEnvs("SPARK_USER") = sparkUser
 
+    _shuffleDriverComponents = ShuffleDataIOUtils.loadShuffleDataIO(config).driver()
+    _shuffleDriverComponents.initializeApplication().asScala.foreach { case (k, v) =>
+      _conf.set(ShuffleDataIOUtils.SHUFFLE_SPARK_CONF_PREFIX + k, v)
+    }
+
     // We need to register "HeartbeatReceiver" before "createTaskScheduler" because Executor will
     // retrieve "HeartbeatReceiver" in the constructor. (SPARK-6640)
     _heartbeatReceiver = env.rpcEnv.setupEndpoint(
       HeartbeatReceiver.ENDPOINT_NAME, new HeartbeatReceiver(this))
 
+    // Initialize any plugins before the task scheduler is initialized.
+    _plugins = PluginContainer(this, _resources.asJava)
+
     // Create and start the scheduler
     val (sched, ts) = SparkContext.createTaskScheduler(this, master, deployMode)
     _schedulerBackend = sched
@@ -537,9 +556,16 @@ class SparkContext(config: SparkConf) extends Logging {
     _dagScheduler = new DAGScheduler(this)
     _heartbeatReceiver.ask[Boolean](TaskSchedulerIsSet)
 
+    val _executorMetricsSource =
+      if (_conf.get(METRICS_EXECUTORMETRICS_SOURCE_ENABLED)) {
+        Some(new ExecutorMetricsSource)
+      } else {
+        None
+      }
+
     // create and start the heartbeater for collecting memory metrics
     _heartbeater = new Heartbeater(
-      () => SparkContext.this.reportHeartBeat(),
+      () => SparkContext.this.reportHeartBeat(_executorMetricsSource),
       "driver-heartbeater",
       conf.get(EXECUTOR_HEARTBEAT_INTERVAL))
     _heartbeater.start()
@@ -559,7 +585,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
     // The metrics system for Driver need to be set spark.app.id to app ID.
     // So it should start after we get app ID from the task scheduler and set spark.app.id.
-    _env.metricsSystem.start()
+    _env.metricsSystem.start(_conf.get(METRICS_STATIC_SOURCES_ENABLED))
     // Attach the driver metrics servlet handler to the web ui after the metrics system is started.
     _env.metricsSystem.getServletHandlers.foreach(handler => ui.foreach(_.attachHandler(handler)))
 
@@ -577,7 +603,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
     _cleaner =
       if (_conf.get(CLEANER_REFERENCE_TRACKING)) {
-        Some(new ContextCleaner(this))
+        Some(new ContextCleaner(this, _shuffleDriverComponents))
       } else {
         None
       }
@@ -590,7 +616,7 @@ class SparkContext(config: SparkConf) extends Logging {
           case b: ExecutorAllocationClient =>
             Some(new ExecutorAllocationManager(
               schedulerBackend.asInstanceOf[ExecutorAllocationClient], listenerBus, _conf,
-              cleaner = cleaner))
+              cleaner = cleaner, resourceProfileManager = resourceProfileManager))
           case _ =>
             None
         }
@@ -608,10 +634,12 @@ class SparkContext(config: SparkConf) extends Logging {
     _env.metricsSystem.registerSource(_dagScheduler.metricsSource)
     _env.metricsSystem.registerSource(new BlockManagerSource(_env.blockManager))
     _env.metricsSystem.registerSource(new JVMCPUSource())
+    _executorMetricsSource.foreach(_.register(_env.metricsSystem))
     _executorAllocationManager.foreach { e =>
       _env.metricsSystem.registerSource(e.executorAllocationManagerSource)
     }
     appStatusSource.foreach(_env.metricsSystem.registerSource(_))
+    _plugins.foreach(_.registerMetrics(applicationId))
     // Make sure the context is stopped if the user forgets about it. This avoids leaving
     // unfinished event logs around after the JVM exits cleanly. It doesn't help if the JVM
     // is killed, though.
@@ -662,7 +690,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
   private[spark] def getLocalProperties: Properties = localProperties.get()
 
-  private[spark] def setLocalProperties(props: Properties) {
+  private[spark] def setLocalProperties(props: Properties): Unit = {
     localProperties.set(props)
   }
 
@@ -677,7 +705,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * implementation of thread pools have worker threads spawn other worker threads.
    * As a result, local properties may propagate unpredictably.
    */
-  def setLocalProperty(key: String, value: String) {
+  def setLocalProperty(key: String, value: String): Unit = {
     if (value == null) {
       localProperties.get.remove(key)
     } else {
@@ -693,7 +721,7 @@ class SparkContext(config: SparkConf) extends Logging {
     Option(localProperties.get).map(_.getProperty(key)).orNull
 
   /** Set a human readable description of the current job. */
-  def setJobDescription(value: String) {
+  def setJobDescription(value: String): Unit = {
     setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, value)
   }
 
@@ -721,7 +749,8 @@ class SparkContext(config: SparkConf) extends Logging {
    * are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS
    * may respond to Thread.interrupt() by marking nodes as dead.
    */
-  def setJobGroup(groupId: String, description: String, interruptOnCancel: Boolean = false) {
+  def setJobGroup(groupId: String,
+      description: String, interruptOnCancel: Boolean = false): Unit = {
     setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, description)
     setLocalProperty(SparkContext.SPARK_JOB_GROUP_ID, groupId)
     // Note: Specifying interruptOnCancel in setJobGroup (rather than cancelJobGroup) avoids
@@ -732,7 +761,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /** Clear the current thread's job group ID and its description. */
-  def clearJobGroup() {
+  def clearJobGroup(): Unit = {
     setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, null)
     setLocalProperty(SparkContext.SPARK_JOB_GROUP_ID, null)
     setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, null)
@@ -1509,17 +1538,17 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   def addFile(path: String, recursive: Boolean): Unit = {
     val uri = new Path(path).toUri
-    val schemeCorrectedPath = uri.getScheme match {
-      case null => new File(path).getCanonicalFile.toURI.toString
+    val schemeCorrectedURI = uri.getScheme match {
+      case null => new File(path).getCanonicalFile.toURI
       case "local" =>
         logWarning("File with 'local' scheme is not supported to add to file server, since " +
           "it is already available on every node.")
         return
-      case _ => path
+      case _ => uri
     }
 
-    val hadoopPath = new Path(schemeCorrectedPath)
-    val scheme = new URI(schemeCorrectedPath).getScheme
+    val hadoopPath = new Path(schemeCorrectedURI)
+    val scheme = schemeCorrectedURI.getScheme
     if (!Array("http", "https", "ftp").contains(scheme)) {
       val fs = hadoopPath.getFileSystem(hadoopConfiguration)
       val isDir = fs.getFileStatus(hadoopPath).isDirectory
@@ -1539,7 +1568,11 @@ class SparkContext(config: SparkConf) extends Logging {
     val key = if (!isLocal && scheme == "file") {
       env.rpcEnv.fileServer.addFile(new File(uri.getPath))
     } else {
-      schemeCorrectedPath
+        if (uri.getScheme == null) {
+          schemeCorrectedURI.toString
+        } else {
+          path
+        }
     }
     val timestamp = System.currentTimeMillis
     if (addedFiles.putIfAbsent(key, timestamp).isEmpty) {
@@ -1560,7 +1593,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Register a listener to receive up-calls from events that happen during execution.
    */
   @DeveloperApi
-  def addSparkListener(listener: SparkListenerInterface) {
+  def addSparkListener(listener: SparkListenerInterface): Unit = {
     listenerBus.addToSharedQueue(listener)
   }
 
@@ -1594,7 +1627,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
   /**
    * Update the cluster manager on our scheduling needs. Three bits of information are included
-   * to help it make decisions.
+   * to help it make decisions. This applies to the default ResourceProfile.
    * @param numExecutors The total number of executors we'd like to have. The cluster manager
    *                     shouldn't kill any running executor to reach this number, but,
    *                     if all existing executors were to die, this is the number of executors
@@ -1610,11 +1643,16 @@ class SparkContext(config: SparkConf) extends Logging {
   def requestTotalExecutors(
       numExecutors: Int,
       localityAwareTasks: Int,
-      hostToLocalTaskCount: scala.collection.immutable.Map[String, Int]
+      hostToLocalTaskCount: immutable.Map[String, Int]
     ): Boolean = {
     schedulerBackend match {
       case b: ExecutorAllocationClient =>
-        b.requestTotalExecutors(numExecutors, localityAwareTasks, hostToLocalTaskCount)
+        // this is being applied to the default resource profile, would need to add api to support
+        // others
+        val defaultProfId = resourceProfileManager.defaultResourceProfile.id
+        b.requestTotalExecutors(immutable.Map(defaultProfId-> numExecutors),
+          immutable.Map(localityAwareTasks -> defaultProfId),
+          immutable.Map(defaultProfId -> hostToLocalTaskCount))
       case _ =>
         logWarning("Requesting executors is not supported by current scheduler.")
         false
@@ -1789,14 +1827,14 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Register an RDD to be persisted in memory and/or disk storage
    */
-  private[spark] def persistRDD(rdd: RDD[_]) {
+  private[spark] def persistRDD(rdd: RDD[_]): Unit = {
     persistentRdds(rdd.id) = rdd
   }
 
   /**
    * Unpersist an RDD from memory and/or disk storage
    */
-  private[spark] def unpersistRDD(rddId: Int, blocking: Boolean) {
+  private[spark] def unpersistRDD(rddId: Int, blocking: Boolean): Unit = {
     env.blockManager.master.removeRdd(rddId, blocking)
     persistentRdds.remove(rddId)
     listenerBus.post(SparkListenerUnpersistRDD(rddId))
@@ -1812,7 +1850,7 @@ class SparkContext(config: SparkConf) extends Logging {
    *
    * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
-  def addJar(path: String) {
+  def addJar(path: String): Unit = {
     def addLocalJarFile(file: File): String = {
       try {
         if (!file.exists()) {
@@ -1832,7 +1870,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
     def checkRemoteJarFile(path: String): String = {
       val hadoopPath = new Path(path)
-      val scheme = new URI(path).getScheme
+      val scheme = hadoopPath.toUri.getScheme
       if (!Array("http", "https", "ftp").contains(scheme)) {
         try {
           val fs = hadoopPath.getFileSystem(hadoopConfiguration)
@@ -1854,21 +1892,21 @@ class SparkContext(config: SparkConf) extends Logging {
       }
     }
 
-    if (path == null) {
-      logWarning("null specified as parameter to addJar")
+    if (path == null || path.isEmpty) {
+      logWarning("null or empty path specified as parameter to addJar")
     } else {
       val key = if (path.contains("\\")) {
         // For local paths with backslashes on Windows, URI throws an exception
         addLocalJarFile(new File(path))
       } else {
-        val uri = new URI(path)
+        val uri = new Path(path).toUri
         // SPARK-17650: Make sure this is a valid URL before adding it to the list of dependencies
         Utils.validateURL(uri)
         uri.getScheme match {
           // A JAR file which exists only on the driver node
           case null =>
             // SPARK-22585 path without schema is not url encoded
-            addLocalJarFile(new File(uri.getRawPath))
+            addLocalJarFile(new File(uri.getPath))
           // A JAR file which exists only on the driver node
           case "file" => addLocalJarFile(new File(uri.getPath))
           // A JAR file which exists locally on every worker node
@@ -1966,6 +2004,9 @@ class SparkContext(config: SparkConf) extends Logging {
         _listenerBusStarted = false
       }
     }
+    Utils.tryLogNonFatalError {
+      _plugins.foreach(_.shutdown())
+    }
     Utils.tryLogNonFatalError {
       _eventLogger.foreach(_.stop())
     }
@@ -1975,6 +2016,11 @@ class SparkContext(config: SparkConf) extends Logging {
       }
       _heartbeater = null
     }
+    if (_shuffleDriverComponents != null) {
+      Utils.tryLogNonFatalError {
+        _shuffleDriverComponents.cleanupApplication()
+      }
+    }
     if (env != null && _heartbeatReceiver != null) {
       Utils.tryLogNonFatalError {
         env.rpcEnv.stop(_heartbeatReceiver)
@@ -2000,6 +2046,7 @@ class SparkContext(config: SparkConf) extends Logging {
     // Clear this `InheritableThreadLocal`, or it will still be inherited in child threads even this
     // `SparkContext` is stopped.
     localProperties.remove()
+    ResourceProfile.clearDefaultProfile()
     // Unset YARN mode system env variable, to allow switching between cluster types.
     SparkContext.clearActiveContext()
     logInfo("Successfully stopped SparkContext")
@@ -2019,7 +2066,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Set the thread-local property for overriding the call sites
    * of actions and RDDs.
    */
-  def setCallSite(shortCallSite: String) {
+  def setCallSite(shortCallSite: String): Unit = {
     setLocalProperty(CallSite.SHORT_FORM, shortCallSite)
   }
 
@@ -2027,7 +2074,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Set the thread-local property for overriding the call sites
    * of actions and RDDs.
    */
-  private[spark] def setCallSite(callSite: CallSite) {
+  private[spark] def setCallSite(callSite: CallSite): Unit = {
     setLocalProperty(CallSite.SHORT_FORM, callSite.shortForm)
     setLocalProperty(CallSite.LONG_FORM, callSite.longForm)
   }
@@ -2036,7 +2083,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Clear the thread-local property for overriding the call sites
    * of actions and RDDs.
    */
-  def clearCallSite() {
+  def clearCallSite(): Unit = {
     setLocalProperty(CallSite.SHORT_FORM, null)
     setLocalProperty(CallSite.LONG_FORM, null)
   }
@@ -2156,8 +2203,7 @@ class SparkContext(config: SparkConf) extends Logging {
   def runJob[T, U: ClassTag](
     rdd: RDD[T],
     processPartition: (TaskContext, Iterator[T]) => U,
-    resultHandler: (Int, U) => Unit)
-  {
+    resultHandler: (Int, U) => Unit): Unit = {
     runJob[T, U](rdd, processPartition, 0 until rdd.partitions.length, resultHandler)
   }
 
@@ -2171,8 +2217,7 @@ class SparkContext(config: SparkConf) extends Logging {
   def runJob[T, U: ClassTag](
       rdd: RDD[T],
       processPartition: Iterator[T] => U,
-      resultHandler: (Int, U) => Unit)
-  {
+      resultHandler: (Int, U) => Unit): Unit = {
     val processFunc = (context: TaskContext, iter: Iterator[T]) => processPartition(iter)
     runJob[T, U](rdd, processFunc, 0 until rdd.partitions.length, resultHandler)
   }
@@ -2257,13 +2302,13 @@ class SparkContext(config: SparkConf) extends Logging {
    * Cancel active jobs for the specified group. See `org.apache.spark.SparkContext.setJobGroup`
    * for more information.
    */
-  def cancelJobGroup(groupId: String) {
+  def cancelJobGroup(groupId: String): Unit = {
     assertNotStopped()
     dagScheduler.cancelJobGroup(groupId)
   }
 
   /** Cancel all jobs that have been scheduled or are running.  */
-  def cancelAllJobs() {
+  def cancelAllJobs(): Unit = {
     assertNotStopped()
     dagScheduler.cancelAllJobs()
   }
@@ -2351,7 +2396,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * @param directory path to the directory where checkpoint files will be stored
    * (must be HDFS path if running in cluster)
    */
-  def setCheckpointDir(directory: String) {
+  def setCheckpointDir(directory: String): Unit = {
 
     // If we are running on a cluster, log a warning if the directory is local.
     // Otherwise, the driver may attempt to reconstruct the checkpointed RDD from
@@ -2423,7 +2468,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /** Post the application start event */
-  private def postApplicationStart() {
+  private def postApplicationStart(): Unit = {
     // Note: this code assumes that the task scheduler has been initialized and has contacted
     // the cluster manager to get an application ID (in case the cluster manager provides one).
     listenerBus.post(SparkListenerApplicationStart(appName, Some(applicationId),
@@ -2433,12 +2478,12 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /** Post the application end event */
-  private def postApplicationEnd() {
+  private def postApplicationEnd(): Unit = {
     listenerBus.post(SparkListenerApplicationEnd(System.currentTimeMillis))
   }
 
   /** Post the environment update event once the task scheduler is ready */
-  private def postEnvironmentUpdate() {
+  private def postEnvironmentUpdate(): Unit = {
     if (taskScheduler != null) {
       val schedulingMode = getSchedulingMode.toString
       val addedJarPaths = addedJars.keys.toSeq
@@ -2451,8 +2496,10 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /** Reports heartbeat metrics for the driver. */
-  private def reportHeartBeat(): Unit = {
+  private def reportHeartBeat(executorMetricsSource: Option[ExecutorMetricsSource]): Unit = {
     val currentMetrics = ExecutorMetrics.getCurrentMetrics(env.memoryManager)
+    executorMetricsSource.foreach(_.updateMetricsSnapshot(currentMetrics))
+
     val driverUpdates = new HashMap[(Int, Int), ExecutorMetrics]
     // In the driver, we do not track per-stage metrics, so use a dummy stage for the key
     driverUpdates.put(EventLoggingListener.DRIVER_STAGE_KEY, new ExecutorMetrics(currentMetrics))
@@ -2735,75 +2782,34 @@ object SparkContext extends Logging {
     // When running locally, don't try to re-execute tasks on failure.
     val MAX_LOCAL_TASK_FAILURES = 1
 
-    // Ensure that executor's resources satisfies one or more tasks requirement.
-    def checkResourcesPerTask(clusterMode: Boolean, executorCores: Option[Int]): Unit = {
+    // Ensure that default executor's resources satisfies one or more tasks requirement.
+    // This function is for cluster managers that don't set the executor cores config, for
+    // others its checked in ResourceProfile.
+    def checkResourcesPerTask(executorCores: Int): Unit = {
       val taskCores = sc.conf.get(CPUS_PER_TASK)
-      val execCores = if (clusterMode) {
-        executorCores.getOrElse(sc.conf.get(EXECUTOR_CORES))
-      } else {
-        executorCores.get
-      }
-
-      // Number of cores per executor must meet at least one task requirement.
-      if (execCores < taskCores) {
-        throw new SparkException(s"The number of cores per executor (=$execCores) has to be >= " +
-          s"the task config: ${CPUS_PER_TASK.key} = $taskCores when run on $master.")
-      }
-
-      // Calculate the max slots each executor can provide based on resources available on each
-      // executor and resources required by each task.
-      val taskResourceRequirements = parseResourceRequirements(sc.conf, SPARK_TASK_PREFIX)
-      val executorResourcesAndAmounts =
-        parseAllResourceRequests(sc.conf, SPARK_EXECUTOR_PREFIX)
-          .map(request => (request.id.resourceName, request.amount)).toMap
-      var numSlots = execCores / taskCores
-      var limitingResourceName = "CPU"
-
-      taskResourceRequirements.foreach { taskReq =>
-        // Make sure the executor resources were specified through config.
-        val execAmount = executorResourcesAndAmounts.getOrElse(taskReq.resourceName,
-          throw new SparkException("The executor resource config: " +
-            ResourceID(SPARK_EXECUTOR_PREFIX, taskReq.resourceName).amountConf +
-            " needs to be specified since a task requirement config: " +
-            ResourceID(SPARK_TASK_PREFIX, taskReq.resourceName).amountConf +
-            " was specified")
-        )
-        // Make sure the executor resources are large enough to launch at least one task.
-        if (execAmount < taskReq.amount) {
-          throw new SparkException("The executor resource config: " +
-            ResourceID(SPARK_EXECUTOR_PREFIX, taskReq.resourceName).amountConf +
-            s" = $execAmount has to be >= the requested amount in task resource config: " +
-            ResourceID(SPARK_TASK_PREFIX, taskReq.resourceName).amountConf +
-            s" = ${taskReq.amount}")
-        }
-        // Compare and update the max slots each executor can provide.
-        val resourceNumSlots = execAmount / taskReq.amount
-        if (resourceNumSlots < numSlots) {
-          numSlots = resourceNumSlots
-          limitingResourceName = taskReq.resourceName
-        }
-      }
-      // There have been checks above to make sure the executor resources were specified and are
-      // large enough if any task resources were specified.
-      taskResourceRequirements.foreach { taskReq =>
-        val execAmount = executorResourcesAndAmounts(taskReq.resourceName)
-        if (taskReq.amount * numSlots < execAmount) {
-          val message = s"The configuration of resource: ${taskReq.resourceName} " +
-            s"(exec = ${execAmount}, task = ${taskReq.amount}) will result in wasted " +
-            s"resources due to resource ${limitingResourceName} limiting the number of " +
-            s"runnable tasks per executor to: ${numSlots}. Please adjust your configuration."
-          if (Utils.isTesting) {
-            throw new SparkException(message)
-          } else {
-            logWarning(message)
-          }
-        }
+      validateTaskCpusLargeEnough(executorCores, taskCores)
+      val defaultProf = sc.resourceProfileManager.defaultResourceProfile
+      // TODO - this is temporary until all of stage level scheduling feature is integrated,
+      // fail if any other resource limiting due to dynamic allocation and scheduler using
+      // slots based on cores
+      val cpuSlots = executorCores/taskCores
+      val limitingResource = defaultProf.limitingResource(sc.conf)
+      if (limitingResource.nonEmpty && !limitingResource.equals(ResourceProfile.CPUS) &&
+        defaultProf.maxTasksPerExecutor(sc.conf) < cpuSlots) {
+        throw new IllegalArgumentException("The number of slots on an executor has to be " +
+          "limited by the number of cores, otherwise you waste resources and " +
+          "dynamic allocation doesn't work properly. Your configuration has " +
+          s"core/task cpu slots = ${cpuSlots} and " +
+          s"${limitingResource} = " +
+          s"${defaultProf.maxTasksPerExecutor(sc.conf)}. Please adjust your configuration " +
+          "so that all resources require same number of executor slots.")
       }
+      ResourceUtils.warnOnWastedResources(defaultProf, sc.conf, Some(executorCores))
     }
 
     master match {
       case "local" =>
-        checkResourcesPerTask(clusterMode = false, Some(1))
+        checkResourcesPerTask(1)
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, 1)
         scheduler.initialize(backend)
@@ -2816,7 +2822,7 @@ object SparkContext extends Logging {
         if (threadCount <= 0) {
           throw new SparkException(s"Asked to run locally with $threadCount threads")
         }
-        checkResourcesPerTask(clusterMode = false, Some(threadCount))
+        checkResourcesPerTask(threadCount)
         val scheduler = new TaskSchedulerImpl(sc, MAX_LOCAL_TASK_FAILURES, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
         scheduler.initialize(backend)
@@ -2827,14 +2833,13 @@ object SparkContext extends Logging {
         // local[*, M] means the number of cores on the computer with M failures
         // local[N, M] means exactly N threads with M failures
         val threadCount = if (threads == "*") localCpuCount else threads.toInt
-        checkResourcesPerTask(clusterMode = false, Some(threadCount))
+        checkResourcesPerTask(threadCount)
         val scheduler = new TaskSchedulerImpl(sc, maxFailures.toInt, isLocal = true)
         val backend = new LocalSchedulerBackend(sc.getConf, scheduler, threadCount)
         scheduler.initialize(backend)
         (backend, scheduler)
 
       case SPARK_REGEX(sparkUrl) =>
-        checkResourcesPerTask(clusterMode = true, None)
         val scheduler = new TaskSchedulerImpl(sc)
         val masterUrls = sparkUrl.split(",").map("spark://" + _)
         val backend = new StandaloneSchedulerBackend(scheduler, sc, masterUrls)
@@ -2842,7 +2847,7 @@ object SparkContext extends Logging {
         (backend, scheduler)
 
       case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
-        checkResourcesPerTask(clusterMode = true, Some(coresPerSlave.toInt))
+        checkResourcesPerTask(coresPerSlave.toInt)
         // Check to make sure memory requested <= memoryPerSlave. Otherwise Spark will just hang.
         val memoryPerSlaveInt = memoryPerSlave.toInt
         if (sc.executorMemory > memoryPerSlaveInt) {
@@ -2851,6 +2856,14 @@ object SparkContext extends Logging {
               memoryPerSlaveInt, sc.executorMemory))
         }
 
+        // For host local mode setting the default of SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED
+        // to false because this mode is intended to be used for testing and in this case all the
+        // executors are running on the same host. So if host local reading was enabled here then
+        // testing of the remote fetching would be secondary as setting this config explicitly to
+        // false would be required in most of the unit test (despite the fact that remote fetching
+        // is much more frequent in production).
+        sc.conf.setIfMissing(SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED, false)
+
         val scheduler = new TaskSchedulerImpl(sc)
         val localCluster = new LocalSparkCluster(
           numSlaves.toInt, coresPerSlave.toInt, memoryPerSlaveInt, sc.conf)
@@ -2863,7 +2876,6 @@ object SparkContext extends Logging {
         (backend, scheduler)
 
       case masterUrl =>
-        checkResourcesPerTask(clusterMode = true, None)
         val cm = getClusterManager(masterUrl) match {
           case Some(clusterMgr) => clusterMgr
           case None => throw new SparkException("Could not parse Master URL: '" + master + "'")
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 419f0ab065150..8ba1739831803 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -22,10 +22,11 @@ import java.net.Socket
 import java.util.Locale
 
 import scala.collection.JavaConverters._
+import scala.collection.concurrent
 import scala.collection.mutable
 import scala.util.Properties
 
-import com.google.common.collect.MapMaker
+import com.google.common.cache.CacheBuilder
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.annotation.DeveloperApi
@@ -70,16 +71,17 @@ class SparkEnv (
     val outputCommitCoordinator: OutputCommitCoordinator,
     val conf: SparkConf) extends Logging {
 
-  private[spark] var isStopped = false
+  @volatile private[spark] var isStopped = false
   private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]()
 
   // A general, soft-reference map for metadata needed during HadoopRDD split computation
   // (e.g., HadoopFileRDD uses this to cache JobConfs and InputFormats).
-  private[spark] val hadoopJobMetadata = new MapMaker().softValues().makeMap[String, Any]()
+  private[spark] val hadoopJobMetadata =
+    CacheBuilder.newBuilder().softValues().build[String, AnyRef]().asMap()
 
   private[spark] var driverTmpDir: Option[String] = None
 
-  private[spark] def stop() {
+  private[spark] def stop(): Unit = {
 
     if (!isStopped) {
       isStopped = true
@@ -119,7 +121,8 @@ class SparkEnv (
   }
 
   private[spark]
-  def destroyPythonWorker(pythonExec: String, envVars: Map[String, String], worker: Socket) {
+  def destroyPythonWorker(pythonExec: String,
+      envVars: Map[String, String], worker: Socket): Unit = {
     synchronized {
       val key = (pythonExec, envVars)
       pythonWorkers.get(key).foreach(_.stopWorker(worker))
@@ -127,7 +130,8 @@ class SparkEnv (
   }
 
   private[spark]
-  def releasePythonWorker(pythonExec: String, envVars: Map[String, String], worker: Socket) {
+  def releasePythonWorker(pythonExec: String,
+      envVars: Map[String, String], worker: Socket): Unit = {
     synchronized {
       val key = (pythonExec, envVars)
       pythonWorkers.get(key).foreach(_.releaseWorker(worker))
@@ -141,7 +145,7 @@ object SparkEnv extends Logging {
   private[spark] val driverSystemName = "sparkDriver"
   private[spark] val executorSystemName = "sparkExecutor"
 
-  def set(e: SparkEnv) {
+  def set(e: SparkEnv): Unit = {
     env = e
   }
 
@@ -193,6 +197,7 @@ object SparkEnv extends Logging {
   private[spark] def createExecutorEnv(
       conf: SparkConf,
       executorId: String,
+      bindAddress: String,
       hostname: String,
       numCores: Int,
       ioEncryptionKey: Option[Array[Byte]],
@@ -200,7 +205,7 @@ object SparkEnv extends Logging {
     val env = create(
       conf,
       executorId,
-      hostname,
+      bindAddress,
       hostname,
       None,
       isLocal,
@@ -211,6 +216,17 @@ object SparkEnv extends Logging {
     env
   }
 
+  private[spark] def createExecutorEnv(
+      conf: SparkConf,
+      executorId: String,
+      hostname: String,
+      numCores: Int,
+      ioEncryptionKey: Option[Array[Byte]],
+      isLocal: Boolean): SparkEnv = {
+    createExecutorEnv(conf, executorId, hostname,
+      hostname, numCores, ioEncryptionKey, isLocal)
+  }
+
   /**
    * Helper method to create a SparkEnv for a driver or an executor.
    */
@@ -337,19 +353,26 @@ object SparkEnv extends Logging {
       None
     }
 
-    val blockManagerMaster = new BlockManagerMaster(registerOrLookupEndpoint(
-      BlockManagerMaster.DRIVER_ENDPOINT_NAME,
-      new BlockManagerMasterEndpoint(
-        rpcEnv,
-        isLocal,
-        conf,
-        listenerBus,
-        if (conf.get(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED)) {
-          externalShuffleClient
-        } else {
-          None
-        })),
-      conf, isDriver)
+    // Mapping from block manager id to the block manager's information.
+    val blockManagerInfo = new concurrent.TrieMap[BlockManagerId, BlockManagerInfo]()
+    val blockManagerMaster = new BlockManagerMaster(
+      registerOrLookupEndpoint(
+        BlockManagerMaster.DRIVER_ENDPOINT_NAME,
+        new BlockManagerMasterEndpoint(
+          rpcEnv,
+          isLocal,
+          conf,
+          listenerBus,
+          if (conf.get(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED)) {
+            externalShuffleClient
+          } else {
+            None
+          }, blockManagerInfo)),
+      registerOrLookupEndpoint(
+        BlockManagerMaster.DRIVER_HEARTBEAT_ENDPOINT_NAME,
+        new BlockManagerMasterHeartbeatEndpoint(rpcEnv, isLocal, blockManagerInfo)),
+      conf,
+      isDriver)
 
     val blockTransferService =
       new NettyBlockTransferService(conf, securityManager, bindAddress, advertiseAddress,
@@ -381,7 +404,7 @@ object SparkEnv extends Logging {
       conf.set(EXECUTOR_ID, executorId)
       val ms = MetricsSystem.createMetricsSystem(MetricsSystemInstances.EXECUTOR, conf,
         securityManager)
-      ms.start()
+      ms.start(conf.get(METRICS_STATIC_SOURCES_ENABLED))
       ms
     }
 
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
index 2299c54e2624b..fd41facf95c76 100644
--- a/core/src/main/scala/org/apache/spark/TaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -185,6 +185,14 @@ abstract class TaskContext extends Serializable {
   @Evolving
   def resources(): Map[String, ResourceInformation]
 
+  /**
+   * (java-specific) Resources allocated to the task. The key is the resource name and the value
+   * is information about the resource. Please refer to
+   * [[org.apache.spark.resource.ResourceInformation]] for specifics.
+   */
+  @Evolving
+  def resourcesJMap(): java.util.Map[String, ResourceInformation]
+
   @DeveloperApi
   def taskMetrics(): TaskMetrics
 
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index 516fb95593324..08a58a029528b 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -20,6 +20,7 @@ package org.apache.spark
 import java.util.Properties
 import javax.annotation.concurrent.GuardedBy
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.executor.TaskMetrics
@@ -101,6 +102,10 @@ private[spark] class TaskContextImpl(
     this
   }
 
+  override def resourcesJMap(): java.util.Map[String, ResourceInformation] = {
+    resources.asJava
+  }
+
   @GuardedBy("this")
   private[spark] override def markTaskFailed(error: Throwable): Unit = synchronized {
     if (failed) return
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 19f71a1dec296..b13028f868072 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -83,14 +83,15 @@ case object Resubmitted extends TaskFailedReason {
 case class FetchFailed(
     bmAddress: BlockManagerId,  // Note that bmAddress can be null
     shuffleId: Int,
-    mapId: Int,
+    mapId: Long,
+    mapIndex: Int,
     reduceId: Int,
     message: String)
   extends TaskFailedReason {
   override def toErrorString: String = {
     val bmAddressString = if (bmAddress == null) "null" else bmAddress.toString
-    s"FetchFailed($bmAddressString, shuffleId=$shuffleId, mapId=$mapId, reduceId=$reduceId, " +
-      s"message=\n$message\n)"
+    s"FetchFailed($bmAddressString, shuffleId=$shuffleId, mapIndex=$mapIndex, " +
+      s"mapId=$mapId, reduceId=$reduceId, message=\n$message\n)"
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ShowTablesStatement.scala b/core/src/main/scala/org/apache/spark/TaskOutputFileAlreadyExistException.scala
similarity index 78%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ShowTablesStatement.scala
rename to core/src/main/scala/org/apache/spark/TaskOutputFileAlreadyExistException.scala
index d75c4085a974b..68054625bac21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ShowTablesStatement.scala
+++ b/core/src/main/scala/org/apache/spark/TaskOutputFileAlreadyExistException.scala
@@ -15,10 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.plans.logical.sql
+package org.apache.spark
 
 /**
- * A SHOW TABLES statement, as parsed from SQL.
+ * Exception thrown when a task cannot write to output file due to the file already exists.
  */
-case class ShowTablesStatement(namespace: Option[Seq[String]], pattern: Option[String])
-    extends ParsedStatement
+private[spark] class TaskOutputFileAlreadyExistException(error: Throwable) extends Exception(error)
diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 41ae3ae3b758a..d459627930f4c 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -24,9 +24,9 @@ import java.nio.file.{Files => JavaFiles}
 import java.nio.file.attribute.PosixFilePermission.{OWNER_EXECUTE, OWNER_READ, OWNER_WRITE}
 import java.security.SecureRandom
 import java.security.cert.X509Certificate
-import java.util.{Arrays, EnumSet, Properties}
+import java.util.{Arrays, EnumSet, Locale, Properties}
 import java.util.concurrent.{TimeoutException, TimeUnit}
-import java.util.jar.{JarEntry, JarOutputStream}
+import java.util.jar.{JarEntry, JarOutputStream, Manifest}
 import javax.net.ssl._
 import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
@@ -42,7 +42,6 @@ import org.json4s.JsonAST.JValue
 import org.json4s.jackson.JsonMethods.{compact, render}
 
 import org.apache.spark.executor.TaskMetrics
-import org.apache.spark.internal.config._
 import org.apache.spark.scheduler._
 import org.apache.spark.util.Utils
 
@@ -98,9 +97,23 @@ private[spark] object TestUtils {
    * Create a jar file that contains this set of files. All files will be located in the specified
    * directory or at the root of the jar.
    */
-  def createJar(files: Seq[File], jarFile: File, directoryPrefix: Option[String] = None): URL = {
+  def createJar(
+      files: Seq[File],
+      jarFile: File,
+      directoryPrefix: Option[String] = None,
+      mainClass: Option[String] = None): URL = {
+    val manifest = mainClass match {
+      case Some(mc) =>
+        val m = new Manifest()
+        m.getMainAttributes.putValue("Manifest-Version", "1.0")
+        m.getMainAttributes.putValue("Main-Class", mc)
+        m
+      case None =>
+        new Manifest()
+    }
+
     val jarFileStream = new FileOutputStream(jarFile)
-    val jarStream = new JarOutputStream(jarFileStream, new java.util.jar.Manifest())
+    val jarStream = new JarOutputStream(jarFileStream, manifest)
 
     for (file <- files) {
       // The `name` for the argument in `JarEntry` should use / for its separator. This is
@@ -201,12 +214,20 @@ private[spark] object TestUtils {
    * Asserts that exception message contains the message. Please note this checks all
    * exceptions in the tree.
    */
-  def assertExceptionMsg(exception: Throwable, msg: String): Unit = {
+  def assertExceptionMsg(exception: Throwable, msg: String, ignoreCase: Boolean = false): Unit = {
+    def contain(msg1: String, msg2: String): Boolean = {
+      if (ignoreCase) {
+        msg1.toLowerCase(Locale.ROOT).contains(msg2.toLowerCase(Locale.ROOT))
+      } else {
+        msg1.contains(msg2)
+      }
+    }
+
     var e = exception
-    var contains = e.getMessage.contains(msg)
+    var contains = contain(e.getMessage, msg)
     while (e.getCause != null && !contains) {
       e = e.getCause
-      contains = e.getMessage.contains(msg)
+      contains = contain(e.getMessage, msg)
     }
     assert(contains, s"Exception tree doesn't contain the expected message: $msg")
   }
@@ -226,6 +247,16 @@ private[spark] object TestUtils {
       url: URL,
       method: String = "GET",
       headers: Seq[(String, String)] = Nil): Int = {
+    withHttpConnection(url, method, headers = headers) { connection =>
+      connection.getResponseCode()
+    }
+  }
+
+  def withHttpConnection[T](
+      url: URL,
+      method: String = "GET",
+      headers: Seq[(String, String)] = Nil)
+      (fn: HttpURLConnection => T): T = {
     val connection = url.openConnection().asInstanceOf[HttpURLConnection]
     connection.setRequestMethod(method)
     headers.foreach { case (k, v) => connection.setRequestProperty(k, v) }
@@ -235,8 +266,10 @@ private[spark] object TestUtils {
       val sslCtx = SSLContext.getInstance("SSL")
       val trustManager = new X509TrustManager {
         override def getAcceptedIssuers(): Array[X509Certificate] = null
-        override def checkClientTrusted(x509Certificates: Array[X509Certificate], s: String) {}
-        override def checkServerTrusted(x509Certificates: Array[X509Certificate], s: String) {}
+        override def checkClientTrusted(x509Certificates: Array[X509Certificate],
+            s: String): Unit = {}
+        override def checkServerTrusted(x509Certificates: Array[X509Certificate],
+            s: String): Unit = {}
       }
       val verifier = new HostnameVerifier() {
         override def verify(hostname: String, session: SSLSession): Boolean = true
@@ -248,7 +281,7 @@ private[spark] object TestUtils {
 
     try {
       connection.connect()
-      connection.getResponseCode()
+      fn(connection)
     } finally {
       connection.disconnect()
     }
@@ -264,7 +297,7 @@ private[spark] object TestUtils {
     try {
       body(listener)
     } finally {
-      sc.listenerBus.waitUntilEmpty(TimeUnit.SECONDS.toMillis(10))
+      sc.listenerBus.waitUntilEmpty()
       sc.listenerBus.removeListener(listener)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 317f3c51d0154..aa01374a2f2e8 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -791,7 +791,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      conf: JobConf) {
+      conf: JobConf): Unit = {
     rdd.saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass, conf)
   }
 
@@ -800,7 +800,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       path: String,
       keyClass: Class[_],
       valueClass: Class[_],
-      outputFormatClass: Class[F]) {
+      outputFormatClass: Class[F]): Unit = {
     rdd.saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass)
   }
 
@@ -810,7 +810,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      codec: Class[_ <: CompressionCodec]) {
+      codec: Class[_ <: CompressionCodec]): Unit = {
     rdd.saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass, codec)
   }
 
@@ -820,7 +820,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      conf: Configuration) {
+      conf: Configuration): Unit = {
     rdd.saveAsNewAPIHadoopFile(path, keyClass, valueClass, outputFormatClass, conf)
   }
 
@@ -828,7 +828,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Output the RDD to any Hadoop-supported storage system, using
    * a Configuration object for that storage system.
    */
-  def saveAsNewAPIHadoopDataset(conf: Configuration) {
+  def saveAsNewAPIHadoopDataset(conf: Configuration): Unit = {
     rdd.saveAsNewAPIHadoopDataset(conf)
   }
 
@@ -837,7 +837,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       path: String,
       keyClass: Class[_],
       valueClass: Class[_],
-      outputFormatClass: Class[F]) {
+      outputFormatClass: Class[F]): Unit = {
     rdd.saveAsNewAPIHadoopFile(path, keyClass, valueClass, outputFormatClass)
   }
 
@@ -847,7 +847,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * (e.g. a table name to write to) in the same way as it would be configured for a Hadoop
    * MapReduce job.
    */
-  def saveAsHadoopDataset(conf: JobConf) {
+  def saveAsHadoopDataset(conf: JobConf): Unit = {
     rdd.saveAsHadoopDataset(conf)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 5ba821935ac69..1ca5262742665 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -347,7 +347,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   /**
    * Applies a function f to all elements of this RDD.
    */
-  def foreach(f: VoidFunction[T]) {
+  def foreach(f: VoidFunction[T]): Unit = {
     rdd.foreach(x => f.call(x))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 330c2f6e6117e..149def29b8fbd 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -546,7 +546,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
   def broadcast[T](value: T): Broadcast[T] = sc.broadcast(value)(fakeClassTag)
 
   /** Shut down the SparkContext. */
-  def stop() {
+  def stop(): Unit = {
     sc.stop()
   }
 
@@ -567,7 +567,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
    *
    * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
-  def addFile(path: String) {
+  def addFile(path: String): Unit = {
     sc.addFile(path)
   }
 
@@ -593,7 +593,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
    *
    * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
-  def addJar(path: String) {
+  def addJar(path: String): Unit = {
     sc.addJar(path)
   }
 
@@ -609,9 +609,9 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
 
   /**
    * Set the directory under which RDDs are going to be checkpointed. The directory must
-   * be a HDFS path if running on a cluster.
+   * be an HDFS path if running on a cluster.
    */
-  def setCheckpointDir(dir: String) {
+  def setCheckpointDir(dir: String): Unit = {
     sc.setCheckpointDir(dir)
   }
 
@@ -631,14 +631,14 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
   /**
    * Pass-through to SparkContext.setCallSite.  For API support only.
    */
-  def setCallSite(site: String) {
+  def setCallSite(site: String): Unit = {
     sc.setCallSite(site)
   }
 
   /**
    * Pass-through to SparkContext.setCallSite.  For API support only.
    */
-  def clearCallSite() {
+  def clearCallSite(): Unit = {
     sc.clearCallSite()
   }
 
@@ -669,7 +669,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
    * @param logLevel The desired log level as a string.
    * Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
    */
-  def setLogLevel(logLevel: String) {
+  def setLogLevel(logLevel: String): Unit = {
     sc.setLogLevel(logLevel)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
index fd96052f95d3f..e9c77f4086d0d 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
@@ -81,7 +81,7 @@ private[spark] object JavaUtils {
           }
         }
 
-        override def remove() {
+        override def remove(): Unit = {
           prev match {
             case Some(k) =>
               underlying match {
diff --git a/core/src/main/scala/org/apache/spark/api/python/Py4JServer.scala b/core/src/main/scala/org/apache/spark/api/python/Py4JServer.scala
new file mode 100644
index 0000000000000..db440b1178920
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/api/python/Py4JServer.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.python
+
+import java.net.InetAddress
+import java.util.Locale
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.util.Utils
+
+/**
+ * A wrapper for both GatewayServer, and ClientServer to pin Python thread to JVM thread.
+ */
+private[spark] class Py4JServer(sparkConf: SparkConf) extends Logging {
+  private[spark] val secret: String = Utils.createSecret(sparkConf)
+
+  // Launch a Py4J gateway or client server for the process to connect to; this will let it see our
+  // Java system properties and such
+  private val localhost = InetAddress.getLoopbackAddress()
+  private[spark] val server = if (sys.env.getOrElse(
+      "PYSPARK_PIN_THREAD", "false").toLowerCase(Locale.ROOT) == "true") {
+    new py4j.ClientServer.ClientServerBuilder()
+      .authToken(secret)
+      .javaPort(0)
+      .javaAddress(localhost)
+      .build()
+  } else {
+    new py4j.GatewayServer.GatewayServerBuilder()
+      .authToken(secret)
+      .javaPort(0)
+      .javaAddress(localhost)
+      .callbackClient(py4j.GatewayServer.DEFAULT_PYTHON_PORT, localhost, secret)
+      .build()
+  }
+
+  def start(): Unit = server match {
+    case clientServer: py4j.ClientServer => clientServer.startServer()
+    case gatewayServer: py4j.GatewayServer => gatewayServer.start()
+    case other => throw new RuntimeException(s"Unexpected Py4J server ${other.getClass}")
+  }
+
+  def getListeningPort: Int = server match {
+    case clientServer: py4j.ClientServer => clientServer.getJavaServer.getListeningPort
+    case gatewayServer: py4j.GatewayServer => gatewayServer.getListeningPort
+    case other => throw new RuntimeException(s"Unexpected Py4J server ${other.getClass}")
+  }
+
+  def shutdown(): Unit = server match {
+    case clientServer: py4j.ClientServer => clientServer.shutdown()
+    case gatewayServer: py4j.GatewayServer => gatewayServer.shutdown()
+    case other => throw new RuntimeException(s"Unexpected Py4J server ${other.getClass}")
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala b/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
index 9ddc4a4910180..ed70e26e2520d 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
@@ -18,18 +18,14 @@
 package org.apache.spark.api.python
 
 import java.io.{DataOutputStream, File, FileOutputStream}
-import java.net.InetAddress
 import java.nio.charset.StandardCharsets.UTF_8
 import java.nio.file.Files
 
-import py4j.GatewayServer
-
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
 
 /**
- * Process that starts a Py4J GatewayServer on an ephemeral port.
+ * Process that starts a Py4J server on an ephemeral port.
  *
  * This process is launched (via SparkSubmit) by the PySpark driver (see java_gateway.py).
  */
@@ -37,23 +33,13 @@ private[spark] object PythonGatewayServer extends Logging {
   initializeLogIfNecessary(true)
 
   def main(args: Array[String]): Unit = {
-    val secret = Utils.createSecret(new SparkConf())
-
-    // Start a GatewayServer on an ephemeral port. Make sure the callback client is configured
-    // with the same secret, in case the app needs callbacks from the JVM to the underlying
-    // python processes.
-    val localhost = InetAddress.getLoopbackAddress()
-    val gatewayServer: GatewayServer = new GatewayServer.GatewayServerBuilder()
-      .authToken(secret)
-      .javaPort(0)
-      .javaAddress(localhost)
-      .callbackClient(GatewayServer.DEFAULT_PYTHON_PORT, localhost, secret)
-      .build()
+    val sparkConf = new SparkConf()
+    val gatewayServer: Py4JServer = new Py4JServer(sparkConf)
 
     gatewayServer.start()
     val boundPort: Int = gatewayServer.getListeningPort
     if (boundPort == -1) {
-      logError("GatewayServer failed to bind; exiting")
+      logError(s"${gatewayServer.server.getClass} failed to bind; exiting")
       System.exit(1)
     } else {
       logDebug(s"Started PythonGatewayServer on port $boundPort")
@@ -68,7 +54,7 @@ private[spark] object PythonGatewayServer extends Logging {
     val dos = new DataOutputStream(new FileOutputStream(tmpPath))
     dos.writeInt(boundPort)
 
-    val secretBytes = secret.getBytes(UTF_8)
+    val secretBytes = gatewayServer.secret.getBytes(UTF_8)
     dos.writeInt(secretBytes.length)
     dos.write(secretBytes, 0, secretBytes.length)
     dos.close()
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 4d76ff76e6752..6dc1721f56adf 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -24,6 +24,7 @@ import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.conf.Configuration
@@ -179,15 +180,22 @@ private[spark] object PythonRDD extends Logging {
    *         data collected from this job, the secret for authentication, and a socket auth
    *         server object that can be used to join the JVM serving thread in Python.
    */
-  def toLocalIteratorAndServe[T](rdd: RDD[T]): Array[Any] = {
+  def toLocalIteratorAndServe[T](rdd: RDD[T], prefetchPartitions: Boolean = false): Array[Any] = {
     val handleFunc = (sock: Socket) => {
       val out = new DataOutputStream(sock.getOutputStream)
       val in = new DataInputStream(sock.getInputStream)
       Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
         // Collects a partition on each iteration
         val collectPartitionIter = rdd.partitions.indices.iterator.map { i =>
-          rdd.sparkContext.runJob(rdd, (iter: Iterator[Any]) => iter.toArray, Seq(i)).head
+          var result: Array[Any] = null
+          rdd.sparkContext.submitJob(
+            rdd,
+            (iter: Iterator[Any]) => iter.toArray,
+            Seq(i), // The partition we are evaluating
+            (_, res: Array[Any]) => result = res,
+            result)
         }
+        val prefetchIter = collectPartitionIter.buffered
 
         // Write data until iteration is complete, client stops iteration, or error occurs
         var complete = false
@@ -196,10 +204,15 @@ private[spark] object PythonRDD extends Logging {
           // Read request for data, value of zero will stop iteration or non-zero to continue
           if (in.readInt() == 0) {
             complete = true
-          } else if (collectPartitionIter.hasNext) {
+          } else if (prefetchIter.hasNext) {
 
             // Client requested more data, attempt to collect the next partition
-            val partitionArray = collectPartitionIter.next()
+            val partitionFuture = prefetchIter.next()
+            // Cause the next job to be submitted if prefetchPartitions is enabled.
+            if (prefetchPartitions) {
+              prefetchIter.headOption
+            }
+            val partitionArray = ThreadUtils.awaitResult(partitionFuture, Duration.Inf)
 
             // Send response there is a partition to read
             out.writeInt(1)
@@ -245,7 +258,7 @@ private[spark] object PythonRDD extends Logging {
     new PythonBroadcast(path)
   }
 
-  def writeIteratorToStream[T](iter: Iterator[T], dataOut: DataOutputStream) {
+  def writeIteratorToStream[T](iter: Iterator[T], dataOut: DataOutputStream): Unit = {
 
     def write(obj: Any): Unit = obj match {
       case null =>
@@ -431,7 +444,7 @@ private[spark] object PythonRDD extends Logging {
     }
   }
 
-  def writeUTF(str: String, dataOut: DataOutputStream) {
+  def writeUTF(str: String, dataOut: DataOutputStream): Unit = {
     val bytes = str.getBytes(StandardCharsets.UTF_8)
     dataOut.writeInt(bytes.length)
     dataOut.write(bytes)
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index d2a10df7acbd3..658e0d593a167 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -48,6 +48,7 @@ private[spark] object PythonEvalType {
   val SQL_WINDOW_AGG_PANDAS_UDF = 203
   val SQL_SCALAR_PANDAS_ITER_UDF = 204
   val SQL_MAP_PANDAS_ITER_UDF = 205
+  val SQL_COGROUPED_MAP_PANDAS_UDF = 206
 
   def toString(pythonEvalType: Int): String = pythonEvalType match {
     case NON_UDF => "NON_UDF"
@@ -58,6 +59,7 @@ private[spark] object PythonEvalType {
     case SQL_WINDOW_AGG_PANDAS_UDF => "SQL_WINDOW_AGG_PANDAS_UDF"
     case SQL_SCALAR_PANDAS_ITER_UDF => "SQL_SCALAR_PANDAS_ITER_UDF"
     case SQL_MAP_PANDAS_ITER_UDF => "SQL_MAP_PANDAS_ITER_UDF"
+    case SQL_COGROUPED_MAP_PANDAS_UDF => "SQL_COGROUPED_MAP_PANDAS_UDF"
   }
 }
 
@@ -192,7 +194,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     def exception: Option[Throwable] = Option(_exception)
 
     /** Terminates the writer thread, ignoring any exceptions that may occur due to cleanup. */
-    def shutdownOnTaskCompletion() {
+    def shutdownOnTaskCompletion(): Unit = {
       assert(context.isCompleted)
       this.interrupt()
     }
@@ -410,7 +412,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       }
     }
 
-    def writeUTF(str: String, dataOut: DataOutputStream) {
+    def writeUTF(str: String, dataOut: DataOutputStream): Unit = {
       val bytes = str.getBytes(UTF_8)
       dataOut.writeInt(bytes.length)
       dataOut.write(bytes)
@@ -529,7 +531,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
 
     setDaemon(true)
 
-    override def run() {
+    override def run(): Unit = {
       // Kill the worker if it is interrupted, checking until task completion.
       // TODO: This has a race condition if interruption occurs, as completed may still become true.
       while (!context.isInterrupted && !context.isCompleted) {
@@ -609,7 +611,7 @@ private[spark] class PythonRunner(funcs: Seq[ChainedPythonFunctions])
               val obj = new Array[Byte](length)
               stream.readFully(obj)
               obj
-            case 0 => Array.empty[Byte]
+            case 0 => Array.emptyByteArray
             case SpecialLengths.TIMING_DATA =>
               handleTimingData()
               read()
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 6c37844a088ce..df236ba8926c1 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -189,7 +189,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     null
   }
 
-  private def startDaemon() {
+  private def startDaemon(): Unit = {
     self.synchronized {
       // Is it already running?
       if (daemon != null) {
@@ -212,8 +212,13 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
         try {
           daemonPort = in.readInt()
         } catch {
+          case _: EOFException if daemon.isAlive =>
+            throw new SparkException("EOFException occurred while reading the port number " +
+              s"from $daemonModule's stdout")
           case _: EOFException =>
-            throw new SparkException(s"No port number in $daemonModule's stdout")
+            throw new SparkException(
+              s"EOFException occurred while reading the port number from $daemonModule's" +
+              s" stdout and terminated with code: ${daemon.exitValue}.")
         }
 
         // test that the returned port number is within a valid range.
@@ -271,7 +276,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   /**
    * Redirect the given streams to our stderr in separate threads.
    */
-  private def redirectStreamsToStderr(stdout: InputStream, stderr: InputStream) {
+  private def redirectStreamsToStderr(stdout: InputStream, stderr: InputStream): Unit = {
     try {
       new RedirectThread(stdout, System.err, "stdout reader for " + pythonExec).start()
       new RedirectThread(stderr, System.err, "stderr reader for " + pythonExec).start()
@@ -288,7 +293,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
     setDaemon(true)
 
-    override def run() {
+    override def run(): Unit = {
       while (true) {
         self.synchronized {
           if (IDLE_WORKER_TIMEOUT_NS < System.nanoTime() - lastActivityNs) {
@@ -301,7 +306,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     }
   }
 
-  private def cleanupIdleWorkers() {
+  private def cleanupIdleWorkers(): Unit = {
     while (idleWorkers.nonEmpty) {
       val worker = idleWorkers.dequeue()
       try {
@@ -314,7 +319,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     }
   }
 
-  private def stopDaemon() {
+  private def stopDaemon(): Unit = {
     self.synchronized {
       if (useDaemon) {
         cleanupIdleWorkers()
@@ -332,11 +337,11 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     }
   }
 
-  def stop() {
+  def stop(): Unit = {
     stopDaemon()
   }
 
-  def stopWorker(worker: Socket) {
+  def stopWorker(worker: Socket): Unit = {
     self.synchronized {
       if (useDaemon) {
         if (daemon != null) {
@@ -355,7 +360,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     worker.close()
   }
 
-  def releaseWorker(worker: Socket) {
+  def releaseWorker(worker: Socket): Unit = {
     if (useDaemon) {
       self.synchronized {
         lastActivityNs = System.nanoTime()
diff --git a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
index 86965dbc2e778..4e790b364e1d2 100644
--- a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
@@ -37,11 +37,11 @@ case class TestWritable(var str: String, var int: Int, var double: Double) exten
   def this() = this("", 0, 0.0)
 
   def getStr: String = str
-  def setStr(str: String) { this.str = str }
+  def setStr(str: String): Unit = { this.str = str }
   def getInt: Int = int
-  def setInt(int: Int) { this.int = int }
+  def setInt(int: Int): Unit = { this.int = int }
   def getDouble: Double = double
-  def setDouble(double: Double) { this.double = double }
+  def setDouble(double: Double): Unit = { this.double = double }
 
   def write(out: DataOutput): Unit = {
     out.writeUTF(str)
@@ -106,13 +106,13 @@ private[python] class WritableToDoubleArrayConverter extends Converter[Any, Arra
  */
 object WriteInputFormatTestDataGenerator {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val path = args(0)
     val sc = new JavaSparkContext("local[4]", "test-writables")
     generateData(path, sc)
   }
 
-  def generateData(path: String, jsc: JavaSparkContext) {
+  def generateData(path: String, jsc: JavaSparkContext): Unit = {
     val sc = jsc.sc
 
     val basePath = s"$path/sftestdata/"
diff --git a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
index f96c5215cf0af..fdfe5f5b41d0a 100644
--- a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
@@ -82,12 +82,7 @@ private[spark] abstract class BaseRRunner[IN, OUT](
       serverSocket.close()
     }
 
-    try {
-      newReaderIterator(dataStream, errThread)
-    } catch {
-      case e: Exception =>
-        throw new SparkException("R computation failed with\n " + errThread.getLines(), e)
-    }
+    newReaderIterator(dataStream, errThread)
   }
 
   /**
@@ -138,6 +133,16 @@ private[spark] abstract class BaseRRunner[IN, OUT](
      * and then returns null.
      */
     protected def read(): OUT
+
+    protected val handleException: PartialFunction[Throwable, OUT] = {
+      case e: Exception =>
+        var msg = "R unexpectedly exited."
+        val lines = errThread.getLines()
+        if (lines.trim().nonEmpty) {
+          msg += s"\nR worker produced errors: $lines\n"
+        }
+        throw new SparkException(msg, e)
+    }
   }
 
   /**
@@ -230,7 +235,7 @@ private[spark] class BufferedStreamThread(
     errBufferSize: Int) extends Thread(name) with Logging {
   val lines = new Array[String](errBufferSize)
   var lineIdx = 0
-  override def run() {
+  override def run(): Unit = {
     for (line <- Source.fromInputStream(in).getLines) {
       synchronized {
         lines(lineIdx) = line
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index 0327386b45ed5..20ab6fc2f348d 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -125,10 +125,7 @@ private[spark] class RRunner[IN, OUT](
               eos = true
               null.asInstanceOf[OUT]
           }
-        } catch {
-          case eof: EOFException =>
-            throw new SparkException("R worker exited unexpectedly (cranshed)", eof)
-        }
+        } catch handleException
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index 0e81ad198db67..9ef6c7c5906a2 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -74,7 +74,7 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * Asynchronously delete cached copies of this broadcast on the executors.
    * If the broadcast is used after this is called, it will need to be re-sent to each executor.
    */
-  def unpersist() {
+  def unpersist(): Unit = {
     unpersist(blocking = false)
   }
 
@@ -83,7 +83,7 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * this is called, it will need to be re-sent to each executor.
    * @param blocking Whether to block until unpersisting has completed
    */
-  def unpersist(blocking: Boolean) {
+  def unpersist(blocking: Boolean): Unit = {
     assertValid()
     doUnpersist(blocking)
   }
@@ -93,7 +93,7 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * Destroy all data and metadata related to this broadcast variable. Use this with caution;
    * once a broadcast variable has been destroyed, it cannot be used again.
    */
-  def destroy() {
+  def destroy(): Unit = {
     destroy(blocking = false)
   }
 
@@ -102,7 +102,7 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * once a broadcast variable has been destroyed, it cannot be used again.
    * @param blocking Whether to block until destroy has completed
    */
-  private[spark] def destroy(blocking: Boolean) {
+  private[spark] def destroy(blocking: Boolean): Unit = {
     assertValid()
     _isValid = false
     _destroySite = Utils.getCallSite().shortForm
@@ -128,17 +128,17 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * Actually unpersist the broadcasted value on the executors. Concrete implementations of
    * Broadcast class must define their own logic to unpersist their own data.
    */
-  protected def doUnpersist(blocking: Boolean)
+  protected def doUnpersist(blocking: Boolean): Unit
 
   /**
    * Actually destroy all data and metadata related to this broadcast variable.
    * Implementation of Broadcast class must define their own logic to destroy their own
    * state.
    */
-  protected def doDestroy(blocking: Boolean)
+  protected def doDestroy(blocking: Boolean): Unit
 
   /** Check if this broadcast is valid. If not valid, exception is thrown. */
-  protected def assertValid() {
+  protected def assertValid(): Unit = {
     if (!_isValid) {
       throw new SparkException(
         "Attempted to use %s after it was destroyed (%s) ".format(toString, _destroySite))
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
index 9fa47451c1831..c93cadf1ab3e8 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
@@ -40,7 +40,7 @@ private[spark] class BroadcastManager(
   initialize()
 
   // Called by SparkContext or Executor before using Broadcast
-  private def initialize() {
+  private def initialize(): Unit = {
     synchronized {
       if (!initialized) {
         broadcastFactory = new TorrentBroadcastFactory
@@ -50,7 +50,7 @@ private[spark] class BroadcastManager(
     }
   }
 
-  def stop() {
+  def stop(): Unit = {
     broadcastFactory.stop()
   }
 
@@ -77,7 +77,7 @@ private[spark] class BroadcastManager(
     broadcastFactory.newBroadcast[T](value_, isLocal, bid)
   }
 
-  def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean) {
+  def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean): Unit = {
     broadcastFactory.unbroadcast(id, removeFromDriver, blocking)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 1379314ba1b53..77fbbc08c2103 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -73,7 +73,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   /** Size of each block. Default value is 4MB.  This value is only read by the broadcaster. */
   @transient private var blockSize: Int = _
 
-  private def setConf(conf: SparkConf) {
+  private def setConf(conf: SparkConf): Unit = {
     compressionCodec = if (conf.get(config.BROADCAST_COMPRESS)) {
       Some(CompressionCodec.createCodec(conf))
     } else {
@@ -196,7 +196,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   /**
    * Remove all persisted state associated with this Torrent broadcast on the executors.
    */
-  override protected def doUnpersist(blocking: Boolean) {
+  override protected def doUnpersist(blocking: Boolean): Unit = {
     TorrentBroadcast.unpersist(id, removeFromDriver = false, blocking)
   }
 
@@ -204,7 +204,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
    * Remove all persisted state associated with this Torrent broadcast on the executors
    * and driver.
    */
-  override protected def doDestroy(blocking: Boolean) {
+  override protected def doDestroy(blocking: Boolean): Unit = {
     TorrentBroadcast.unpersist(id, removeFromDriver = true, blocking)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
index b11f9ba171b84..65fb5186afae1 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
@@ -28,20 +28,21 @@ import org.apache.spark.{SecurityManager, SparkConf}
  */
 private[spark] class TorrentBroadcastFactory extends BroadcastFactory {
 
-  override def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager) { }
+  override def initialize(isDriver: Boolean, conf: SparkConf,
+      securityMgr: SecurityManager): Unit = { }
 
   override def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean, id: Long): Broadcast[T] = {
     new TorrentBroadcast[T](value_, id)
   }
 
-  override def stop() { }
+  override def stop(): Unit = { }
 
   /**
    * Remove all persisted state associated with the torrent broadcast with the given ID.
    * @param removeFromDriver Whether to remove state from the driver.
    * @param blocking Whether to block until unbroadcasted
    */
-  override def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean) {
+  override def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean): Unit = {
     TorrentBroadcast.unpersist(id, removeFromDriver, blocking)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index 648a8b1c763db..7022b986ea025 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -219,7 +219,7 @@ private class ClientEndpoint(
  * Executable utility for starting and terminating drivers inside of a standalone cluster.
  */
 object Client {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // scalastyle:off println
     if (!sys.props.contains("SPARK_SUBMIT")) {
       println("WARNING: This client is deprecated and will be removed in a future version of Spark")
diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
index a86ee66fb72b9..9d6bbf91168da 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
@@ -100,7 +100,7 @@ private[deploy] class ClientArguments(args: Array[String]) {
   /**
    * Print usage and exit JVM with the given exit code.
    */
-  private def printUsageAndExit(exitCode: Int) {
+  private def printUsageAndExit(exitCode: Int): Unit = {
     // TODO: It wouldn't be too hard to allow users to submit their app and dependency jars
     //       separately similar to in the YARN client.
     val usage =
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index fba371dcfb761..18305ad3746a6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -60,6 +60,15 @@ private[deploy] object DeployMessages {
     assert (port > 0)
   }
 
+  /**
+   * @param id the worker id
+   * @param worker the worker endpoint ref
+   */
+  case class WorkerDecommission(
+      id: String,
+      worker: RpcEndpointRef)
+    extends DeployMessage
+
   case class ExecutorStateChanged(
       appId: String,
       execId: Int,
@@ -149,6 +158,8 @@ private[deploy] object DeployMessages {
 
   case object ReregisterWithMaster // used when a worker attempts to reconnect to a master
 
+  case object DecommissionSelf // Mark as decommissioned. May be Master to Worker in the future.
+
   // AppClient to Master
 
   case class RegisterApplication(appDescription: ApplicationDescription, driver: RpcEndpointRef)
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
index 69c98e28931d7..0751bcf221f86 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
@@ -19,9 +19,13 @@ package org.apache.spark.deploy
 
 private[deploy] object ExecutorState extends Enumeration {
 
-  val LAUNCHING, RUNNING, KILLED, FAILED, LOST, EXITED = Value
+  val LAUNCHING, RUNNING, KILLED, FAILED, LOST, EXITED, DECOMMISSIONED = Value
 
   type ExecutorState = Value
 
-  def isFinished(state: ExecutorState): Boolean = Seq(KILLED, FAILED, LOST, EXITED).contains(state)
+  // DECOMMISSIONED isn't listed as finished since we don't want to remove the executor from
+  // the worker and the executor still exists - but we do want to avoid scheduling new tasks on it.
+  private val finishedStates = Seq(KILLED, FAILED, LOST, EXITED)
+
+  def isFinished(state: ExecutorState): Boolean = finishedStates.contains(state)
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index 64277e8de2a4d..ebfff89308886 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -87,14 +87,14 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
   }
 
   /** Starts the external shuffle service if the user has configured us to. */
-  def startIfEnabled() {
+  def startIfEnabled(): Unit = {
     if (enabled) {
       start()
     }
   }
 
   /** Start the external shuffle service */
-  def start() {
+  def start(): Unit = {
     require(server == null, "Shuffle server already started")
     val authEnabled = securityManager.isAuthenticationEnabled()
     logInfo(s"Starting shuffle service on port $port (auth enabled = $authEnabled)")
@@ -125,7 +125,7 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
     blockHandler.executorRemoved(executorId, appId)
   }
 
-  def stop() {
+  def stop(): Unit = {
     if (server != null) {
       server.close()
       server = null
diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index 99f841234005e..6ff68b694f8f3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -78,7 +78,7 @@ private object FaultToleranceTest extends App with Logging {
 
   System.setProperty(config.DRIVER_HOST_ADDRESS.key, "172.17.42.1") // default docker host ip
 
-  private def afterEach() {
+  private def afterEach(): Unit = {
     if (sc != null) {
       sc.stop()
       sc = null
@@ -180,7 +180,7 @@ private object FaultToleranceTest extends App with Logging {
     }
   }
 
-  private def test(name: String)(fn: => Unit) {
+  private def test(name: String)(fn: => Unit): Unit = {
     try {
       fn
       numPassed += 1
@@ -198,12 +198,12 @@ private object FaultToleranceTest extends App with Logging {
     afterEach()
   }
 
-  private def addMasters(num: Int) {
+  private def addMasters(num: Int): Unit = {
     logInfo(s">>>>> ADD MASTERS $num <<<<<")
     (1 to num).foreach { _ => masters += SparkDocker.startMaster(dockerMountDir) }
   }
 
-  private def addWorkers(num: Int) {
+  private def addWorkers(num: Int): Unit = {
     logInfo(s">>>>> ADD WORKERS $num <<<<<")
     val masterUrls = getMasterUrls(masters)
     (1 to num).foreach { _ => workers += SparkDocker.startWorker(dockerMountDir, masterUrls) }
@@ -239,7 +239,7 @@ private object FaultToleranceTest extends App with Logging {
 
   private def delay(secs: Duration = 5.seconds) = Thread.sleep(secs.toMillis)
 
-  private def terminateCluster() {
+  private def terminateCluster(): Unit = {
     logInfo(">>>>> TERMINATE CLUSTER <<<<<")
     masters.foreach(_.kill())
     workers.foreach(_.kill())
@@ -326,7 +326,7 @@ private object FaultToleranceTest extends App with Logging {
     }
   }
 
-  private def assertTrue(bool: Boolean, message: String = "") {
+  private def assertTrue(bool: Boolean, message: String = ""): Unit = {
     if (!bool) {
       throw new IllegalStateException("Assertion failed: " + message)
     }
@@ -346,7 +346,7 @@ private class TestMasterInfo(val ip: String, val dockerId: DockerId, val logFile
 
   logDebug("Created master: " + this)
 
-  def readState() {
+  def readState(): Unit = {
     try {
       val masterStream = new InputStreamReader(
         new URL("http://%s:8080/json".format(ip)).openStream, StandardCharsets.UTF_8)
@@ -372,7 +372,7 @@ private class TestMasterInfo(val ip: String, val dockerId: DockerId, val logFile
     }
   }
 
-  def kill() { Docker.kill(dockerId) }
+  def kill(): Unit = { Docker.kill(dockerId) }
 
   override def toString: String =
     "[ip=%s, id=%s, logFile=%s, state=%s]".
@@ -386,7 +386,7 @@ private class TestWorkerInfo(val ip: String, val dockerId: DockerId, val logFile
 
   logDebug("Created worker: " + this)
 
-  def kill() { Docker.kill(dockerId) }
+  def kill(): Unit = { Docker.kill(dockerId) }
 
   override def toString: String =
     "[ip=%s, id=%s, logFile=%s]".format(ip, dockerId, logFile.getAbsolutePath)
diff --git a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
index f1b58eb33a1b7..fc849d7f4372f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
@@ -72,7 +72,7 @@ class LocalSparkCluster(
     masters
   }
 
-  def stop() {
+  def stop(): Unit = {
     logInfo("Shutting down local Spark cluster.")
     // Stop the workers before the master so they don't get upset that it disconnected
     workerRpcEnvs.foreach(_.shutdown())
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index 8055a6270dac8..574ce60b19b4e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io.File
-import java.net.{InetAddress, URI}
+import java.net.URI
 import java.nio.file.Files
 
 import scala.collection.JavaConverters._
@@ -26,7 +26,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Try
 
 import org.apache.spark.{SparkConf, SparkUserAppException}
-import org.apache.spark.api.python.PythonUtils
+import org.apache.spark.api.python.{Py4JServer, PythonUtils}
 import org.apache.spark.internal.config._
 import org.apache.spark.util.{RedirectThread, Utils}
 
@@ -35,12 +35,11 @@ import org.apache.spark.util.{RedirectThread, Utils}
  * subprocess and then has it connect back to the JVM to access system properties, etc.
  */
 object PythonRunner {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val pythonFile = args(0)
     val pyFiles = args(1)
     val otherArgs = args.slice(2, args.length)
     val sparkConf = new SparkConf()
-    val secret = Utils.createSecret(sparkConf)
     val pythonExec = sparkConf.get(PYSPARK_DRIVER_PYTHON)
       .orElse(sparkConf.get(PYSPARK_PYTHON))
       .orElse(sys.env.get("PYSPARK_DRIVER_PYTHON"))
@@ -51,15 +50,8 @@ object PythonRunner {
     val formattedPythonFile = formatPath(pythonFile)
     val formattedPyFiles = resolvePyFiles(formatPaths(pyFiles))
 
-    // Launch a Py4J gateway server for the process to connect to; this will let it see our
-    // Java system properties and such
-    val localhost = InetAddress.getLoopbackAddress()
-    val gatewayServer = new py4j.GatewayServer.GatewayServerBuilder()
-      .authToken(secret)
-      .javaPort(0)
-      .javaAddress(localhost)
-      .callbackClient(py4j.GatewayServer.DEFAULT_PYTHON_PORT, localhost, secret)
-      .build()
+    val gatewayServer = new Py4JServer(sparkConf)
+
     val thread = new Thread(() => Utils.logUncaughtExceptions { gatewayServer.start() })
     thread.setName("py4j-gateway-init")
     thread.setDaemon(true)
@@ -86,7 +78,7 @@ object PythonRunner {
     // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
     env.put("PYTHONUNBUFFERED", "YES") // value is needed to be set to a non-empty string
     env.put("PYSPARK_GATEWAY_PORT", "" + gatewayServer.getListeningPort)
-    env.put("PYSPARK_GATEWAY_SECRET", secret)
+    env.put("PYSPARK_GATEWAY_SECRET", gatewayServer.secret)
     // pass conf spark.pyspark.python to python process, the only way to pass info to
     // python process is through environment variable.
     sparkConf.get(PYSPARK_PYTHON).foreach(env.put("PYSPARK_PYTHON", _))
diff --git a/core/src/main/scala/org/apache/spark/deploy/RRunner.scala b/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
index 60ba0470a628a..b32f9ea3b4747 100644
--- a/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
@@ -73,7 +73,7 @@ object RRunner {
     @volatile var sparkRBackendSecret: String = null
     val initialized = new Semaphore(0)
     val sparkRBackendThread = new Thread("SparkR backend") {
-      override def run() {
+      override def run(): Unit = {
         val (port, authHelper) = sparkRBackend.init()
         sparkRBackendPort = port
         sparkRBackendSecret = authHelper.secret
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala
index 8118c01eb712f..b89ae1b35e693 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala
@@ -45,7 +45,7 @@ private[spark] object SparkCuratorUtil extends Logging {
     zk
   }
 
-  def mkdir(zk: CuratorFramework, path: String) {
+  def mkdir(zk: CuratorFramework, path: String): Unit = {
     if (zk.checkExists().forPath(path) == null) {
       try {
         zk.create().creatingParentsIfNeeded().forPath(path)
@@ -57,7 +57,7 @@ private[spark] object SparkCuratorUtil extends Logging {
     }
   }
 
-  def deleteRecursive(zk: CuratorFramework, path: String) {
+  def deleteRecursive(zk: CuratorFramework, path: String): Unit = {
     if (zk.checkExists().forPath(path) != null) {
       for (child <- zk.getChildren.forPath(path).asScala) {
         zk.delete().forPath(path + "/" + child)
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 11420bb985520..1180501e8c738 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -57,7 +57,7 @@ private[spark] class SparkHadoopUtil extends Logging {
    * you need to look https://issues.apache.org/jira/browse/HDFS-3545 and possibly
    * do a FileSystem.closeAllForUGI in order to avoid leaking Filesystems
    */
-  def runAsSparkUser(func: () => Unit) {
+  def runAsSparkUser(func: () => Unit): Unit = {
     createSparkUser().doAs(new PrivilegedExceptionAction[Unit] {
       def run: Unit = func()
     })
@@ -71,7 +71,7 @@ private[spark] class SparkHadoopUtil extends Logging {
     ugi
   }
 
-  def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation) {
+  def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation): Unit = {
     dest.addCredentials(source.getCredentials())
   }
 
@@ -79,8 +79,10 @@ private[spark] class SparkHadoopUtil extends Logging {
    * Appends S3-specific, spark.hadoop.*, and spark.buffer.size configurations to a Hadoop
    * configuration.
    */
-  def appendS3AndSparkHadoopConfigurations(conf: SparkConf, hadoopConf: Configuration): Unit = {
-    SparkHadoopUtil.appendS3AndSparkHadoopConfigurations(conf, hadoopConf)
+  def appendS3AndSparkHadoopHiveConfigurations(
+      conf: SparkConf,
+      hadoopConf: Configuration): Unit = {
+    SparkHadoopUtil.appendS3AndSparkHadoopHiveConfigurations(conf, hadoopConf)
   }
 
   /**
@@ -103,6 +105,15 @@ private[spark] class SparkHadoopUtil extends Logging {
     }
   }
 
+  def appendSparkHiveConfigs(
+      srcMap: Map[String, String],
+      destMap: HashMap[String, String]): Unit = {
+    // Copy any "spark.hive.foo=bar" system properties into destMap as "hive.foo=bar"
+    for ((key, value) <- srcMap if key.startsWith("spark.hive.")) {
+      destMap.put(key.substring("spark.".length), value)
+    }
+  }
+
   /**
    * Return an appropriate (subclass) of Configuration. Creating config can initialize some Hadoop
    * subsystems.
@@ -140,7 +151,7 @@ private[spark] class SparkHadoopUtil extends Logging {
    * Add or overwrite current user's credentials with serialized delegation tokens,
    * also confirms correct hadoop configuration is set.
    */
-  private[spark] def addDelegationTokens(tokens: Array[Byte], sparkConf: SparkConf) {
+  private[spark] def addDelegationTokens(tokens: Array[Byte], sparkConf: SparkConf): Unit = {
     UserGroupInformation.setConfiguration(newConfiguration(sparkConf))
     val creds = deserialize(tokens)
     logInfo("Updating delegation tokens for current user.")
@@ -413,11 +424,11 @@ private[spark] object SparkHadoopUtil {
    */
   private[spark] def newConfiguration(conf: SparkConf): Configuration = {
     val hadoopConf = new Configuration()
-    appendS3AndSparkHadoopConfigurations(conf, hadoopConf)
+    appendS3AndSparkHadoopHiveConfigurations(conf, hadoopConf)
     hadoopConf
   }
 
-  private def appendS3AndSparkHadoopConfigurations(
+  private def appendS3AndSparkHadoopHiveConfigurations(
       conf: SparkConf,
       hadoopConf: Configuration): Unit = {
     // Note: this null check is around more than just access to the "conf" object to maintain
@@ -440,6 +451,7 @@ private[spark] object SparkHadoopUtil {
         }
       }
       appendSparkHadoopConfigs(conf, hadoopConf)
+      appendSparkHiveConfigs(conf, hadoopConf)
       val bufferSize = conf.get(BUFFER_SIZE).toString
       hadoopConf.set("io.file.buffer.size", bufferSize)
     }
@@ -452,37 +464,48 @@ private[spark] object SparkHadoopUtil {
     }
   }
 
+  private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = {
+    // Copy any "spark.hive.foo=bar" spark properties into conf as "hive.foo=bar"
+    for ((key, value) <- conf.getAll if key.startsWith("spark.hive.")) {
+      hadoopConf.set(key.substring("spark.".length), value)
+    }
+  }
+
   // scalastyle:off line.size.limit
   /**
-   * Create a path that uses replication instead of erasure coding (ec), regardless of the default
-   * configuration in hdfs for the given path.  This can be helpful as hdfs ec doesn't support
-   * hflush(), hsync(), or append()
+   * Create a file on the given file system, optionally making sure erasure coding is disabled.
+   *
+   * Disabling EC can be helpful as HDFS EC doesn't support hflush(), hsync(), or append().
    * https://hadoop.apache.org/docs/r3.0.0/hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html#Limitations
    */
   // scalastyle:on line.size.limit
-  def createNonECFile(fs: FileSystem, path: Path): FSDataOutputStream = {
-    try {
-      // Use reflection as this uses APIs only available in Hadoop 3
-      val builderMethod = fs.getClass().getMethod("createFile", classOf[Path])
-      // the builder api does not resolve relative paths, nor does it create parent dirs, while
-      // the old api does.
-      if (!fs.mkdirs(path.getParent())) {
-        throw new IOException(s"Failed to create parents of $path")
+  def createFile(fs: FileSystem, path: Path, allowEC: Boolean): FSDataOutputStream = {
+    if (allowEC) {
+      fs.create(path)
+    } else {
+      try {
+        // Use reflection as this uses APIs only available in Hadoop 3
+        val builderMethod = fs.getClass().getMethod("createFile", classOf[Path])
+        // the builder api does not resolve relative paths, nor does it create parent dirs, while
+        // the old api does.
+        if (!fs.mkdirs(path.getParent())) {
+          throw new IOException(s"Failed to create parents of $path")
+        }
+        val qualifiedPath = fs.makeQualified(path)
+        val builder = builderMethod.invoke(fs, qualifiedPath)
+        val builderCls = builder.getClass()
+        // this may throw a NoSuchMethodException if the path is not on hdfs
+        val replicateMethod = builderCls.getMethod("replicate")
+        val buildMethod = builderCls.getMethod("build")
+        val b2 = replicateMethod.invoke(builder)
+        buildMethod.invoke(b2).asInstanceOf[FSDataOutputStream]
+      } catch {
+        case  _: NoSuchMethodException =>
+          // No createFile() method, we're using an older hdfs client, which doesn't give us control
+          // over EC vs. replication.  Older hdfs doesn't have EC anyway, so just create a file with
+          // old apis.
+          fs.create(path)
       }
-      val qualifiedPath = fs.makeQualified(path)
-      val builder = builderMethod.invoke(fs, qualifiedPath)
-      val builderCls = builder.getClass()
-      // this may throw a NoSuchMethodException if the path is not on hdfs
-      val replicateMethod = builderCls.getMethod("replicate")
-      val buildMethod = builderCls.getMethod("build")
-      val b2 = replicateMethod.invoke(builder)
-      buildMethod.invoke(b2).asInstanceOf[FSDataOutputStream]
-    } catch {
-      case  _: NoSuchMethodException =>
-        // No createFile() method, we're using an older hdfs client, which doesn't give us control
-        // over EC vs. replication.  Older hdfs doesn't have EC anyway, so just create a file with
-        // old apis.
-        fs.create(path)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 12a8473b22025..8a03af5e38c9b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -23,11 +23,12 @@ import java.net.{URI, URL}
 import java.security.PrivilegedExceptionAction
 import java.text.ParseException
 import java.util.{ServiceLoader, UUID}
+import java.util.jar.JarInputStream
 
 import scala.annotation.tailrec
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
-import scala.util.{Properties, Try}
+import scala.util.{Failure, Properties, Success, Try}
 
 import org.apache.commons.io.FilenameUtils
 import org.apache.commons.lang3.StringUtils
@@ -229,10 +230,6 @@ private[spark] class SparkSubmit extends Logging {
     // Set the cluster manager
     val clusterManager: Int = args.master match {
       case "yarn" => YARN
-      case "yarn-client" | "yarn-cluster" =>
-        logWarning(s"Master ${args.master} is deprecated since 2.0." +
-          " Please use master \"yarn\" with specified deploy mode instead.")
-        YARN
       case m if m.startsWith("spark") => STANDALONE
       case m if m.startsWith("mesos") => MESOS
       case m if m.startsWith("k8s") => KUBERNETES
@@ -251,22 +248,7 @@ private[spark] class SparkSubmit extends Logging {
         -1
     }
 
-    // Because the deprecated way of specifying "yarn-cluster" and "yarn-client" encapsulate both
-    // the master and deploy mode, we have some logic to infer the master and deploy mode
-    // from each other if only one is specified, or exit early if they are at odds.
     if (clusterManager == YARN) {
-      (args.master, args.deployMode) match {
-        case ("yarn-cluster", null) =>
-          deployMode = CLUSTER
-          args.master = "yarn"
-        case ("yarn-cluster", "client") =>
-          error("Client deploy mode is not compatible with master \"yarn-cluster\"")
-        case ("yarn-client", "cluster") =>
-          error("Cluster deploy mode is not compatible with master \"yarn-client\"")
-        case (_, mode) =>
-          args.master = "yarn"
-      }
-
       // Make sure YARN is included in our build if we're trying to use it
       if (!Utils.classIsLoadable(YARN_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
         error(
@@ -456,6 +438,32 @@ private[spark] class SparkSubmit extends Logging {
       }.orNull
     }
 
+    // At this point, we have attempted to download all remote resources.
+    // Now we try to resolve the main class if our primary resource is a JAR.
+    if (args.mainClass == null && !args.isPython && !args.isR) {
+      try {
+        val uri = new URI(
+          Option(localPrimaryResource).getOrElse(args.primaryResource)
+        )
+        val fs = FileSystem.get(uri, hadoopConf)
+
+        Utils.tryWithResource(new JarInputStream(fs.open(new Path(uri)))) { jar =>
+          args.mainClass = jar.getManifest.getMainAttributes.getValue("Main-Class")
+        }
+      } catch {
+        case e: Throwable =>
+          error(
+            s"Failed to get main class in JAR with error '${e.getMessage}'. " +
+            " Please specify one with --class."
+          )
+      }
+
+      if (args.mainClass == null) {
+        // If we still can't figure out the main class at this point, blow up.
+        error("No main class set in JAR; please specify one with --class.")
+      }
+    }
+
     // If we're running a python app, set the main class to our specific python runner
     if (args.isPython && deployMode == CLIENT) {
       if (args.primaryResource == PYSPARK_SHELL) {
@@ -1047,7 +1055,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
    * Return whether the given primary resource requires running R.
    */
   private[deploy] def isR(res: String): Boolean = {
-    res != null && res.endsWith(".R") || res == SPARKR_SHELL
+    res != null && (res.endsWith(".R") || res.endsWith(".r")) || res == SPARKR_SHELL
   }
 
   private[deploy] def isInternal(res: String): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index ed1324baed0f1..3f7cfea778ac6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -19,10 +19,8 @@ package org.apache.spark.deploy
 
 import java.io.{ByteArrayOutputStream, File, PrintStream}
 import java.lang.reflect.InvocationTargetException
-import java.net.URI
 import java.nio.charset.StandardCharsets
 import java.util.{List => JList}
-import java.util.jar.JarFile
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
@@ -139,10 +137,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
    * Remove keys that don't start with "spark." from `sparkProperties`.
    */
   private def ignoreNonSparkProperties(): Unit = {
-    sparkProperties.foreach { case (k, v) =>
+    sparkProperties.keys.foreach { k =>
       if (!k.startsWith("spark.")) {
         sparkProperties -= k
-        logWarning(s"Ignoring non-spark config property: $k=$v")
+        logWarning(s"Ignoring non-Spark config property: $k")
       }
     }
   }
@@ -211,29 +209,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     dynamicAllocationEnabled =
       sparkProperties.get(DYN_ALLOCATION_ENABLED.key).exists("true".equalsIgnoreCase)
 
-    // Try to set main class from JAR if no --class argument is given
-    if (mainClass == null && !isPython && !isR && primaryResource != null) {
-      val uri = new URI(primaryResource)
-      val uriScheme = uri.getScheme()
-
-      uriScheme match {
-        case "file" =>
-          try {
-            Utils.tryWithResource(new JarFile(uri.getPath)) { jar =>
-              // Note that this might still return null if no main-class is set; we catch that later
-              mainClass = jar.getManifest.getMainAttributes.getValue("Main-Class")
-            }
-          } catch {
-            case _: Exception =>
-              error(s"Cannot load main class from JAR $primaryResource")
-          }
-        case _ =>
-          error(
-            s"Cannot load main class from JAR $primaryResource with URI $uriScheme. " +
-            "Please specify a class through --class.")
-      }
-    }
-
     // Global defaults. These should be keep to minimum to avoid confusing behavior.
     master = Option(master).getOrElse("local[*]")
 
@@ -269,9 +244,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     if (primaryResource == null) {
       error("Must specify a primary resource (JAR or Python or R file)")
     }
-    if (mainClass == null && SparkSubmit.isUserJar(primaryResource)) {
-      error("No main class set in JAR; please specify one with --class")
-    }
     if (driverMemory != null
         && Try(JavaUtils.byteStringAsBytes(driverMemory)).getOrElse(-1L) <= 0) {
       error("Driver memory must be a positive number")
diff --git a/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala b/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala
index d6f9618af4aac..65bf4351ebfd9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala
@@ -208,7 +208,7 @@ private[spark] object StandaloneResourceUtils extends Logging {
       }
       val newAllocation = {
         val allocations = newAssignments.map { case (rName, addresses) =>
-          ResourceAllocation(ResourceID(componentName, rName), addresses)
+          ResourceAllocation(new ResourceID(componentName, rName), addresses)
         }.toSeq
         StandaloneResourceAllocation(pid, allocations)
       }
@@ -348,7 +348,7 @@ private[spark] object StandaloneResourceUtils extends Logging {
     val compShortName = componentName.substring(componentName.lastIndexOf(".") + 1)
     val tmpFile = Utils.tempFileWith(dir)
     val allocations = resources.map { case (rName, rInfo) =>
-      ResourceAllocation(ResourceID(componentName, rName), rInfo.addresses)
+      ResourceAllocation(new ResourceID(componentName, rName), rInfo.addresses)
     }.toSeq
     try {
       writeResourceAllocationJson(componentName, allocations, tmpFile)
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
index 34ade4ce6f39b..eedf5e969e291 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
@@ -120,7 +120,7 @@ private[spark] class StandaloneAppClient(
      *
      * nthRetry means this is the nth attempt to register with master.
      */
-    private def registerWithMaster(nthRetry: Int) {
+    private def registerWithMaster(nthRetry: Int): Unit = {
       registerMasterFutures.set(tryRegisterAllMasters())
       registrationRetryTimer.set(registrationRetryThread.schedule(new Runnable {
         override def run(): Unit = {
@@ -180,6 +180,8 @@ private[spark] class StandaloneAppClient(
         logInfo("Executor updated: %s is now %s%s".format(fullId, state, messageText))
         if (ExecutorState.isFinished(state)) {
           listener.executorRemoved(fullId, message.getOrElse(""), exitStatus, workerLost)
+        } else if (state == ExecutorState.DECOMMISSIONED) {
+          listener.executorDecommissioned(fullId, message.getOrElse(""))
         }
 
       case WorkerRemoved(id, host, message) =>
@@ -246,14 +248,14 @@ private[spark] class StandaloneAppClient(
     /**
      * Notify the listener that we disconnected, if we hadn't already done so before.
      */
-    def markDisconnected() {
+    def markDisconnected(): Unit = {
       if (!alreadyDisconnected) {
         listener.disconnected()
         alreadyDisconnected = true
       }
     }
 
-    def markDead(reason: String) {
+    def markDead(reason: String): Unit = {
       if (!alreadyDead.get) {
         listener.dead(reason)
         alreadyDead.set(true)
@@ -271,12 +273,12 @@ private[spark] class StandaloneAppClient(
 
   }
 
-  def start() {
+  def start(): Unit = {
     // Just launch an rpcEndpoint; it will call back into the listener.
     endpoint.set(rpcEnv.setupEndpoint("AppClient", new ClientEndpoint(rpcEnv)))
   }
 
-  def stop() {
+  def stop(): Unit = {
     if (endpoint.get != null) {
       try {
         val timeout = RpcUtils.askRpcTimeout(conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClientListener.scala b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClientListener.scala
index d8bc1a883def1..2e38a6847891d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClientListener.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClientListener.scala
@@ -39,5 +39,7 @@ private[spark] trait StandaloneAppClientListener {
   def executorRemoved(
       fullId: String, message: String, exitStatus: Option[Int], workerLost: Boolean): Unit
 
+  def executorDecommissioned(fullId: String, message: String): Unit
+
   def workerRemoved(workerId: String, host: String, message: String): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
index 8c63fa65b40fd..fb2a67c2ab103 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
@@ -209,9 +209,8 @@ private[history] class ApplicationCache(
 
   /**
    * Register a filter for the web UI which checks for updates to the given app/attempt
-   * @param ui Spark UI to attach filters to
-   * @param appId application ID
-   * @param attemptId attempt ID
+   * @param key consisted of appId and attemptId
+   * @param loadedUI Spark UI to attach filters to
    */
   private def registerFilter(key: CacheKey, loadedUI: LoadedAppUI): Unit = {
     require(loadedUI != null)
@@ -231,7 +230,7 @@ private[history] class ApplicationCache(
 /**
  * An entry in the cache.
  *
- * @param ui Spark UI
+ * @param loadedUI Spark UI
  * @param completed Flag to indicated that the application has completed (and so
  *                 does not need refreshing).
  */
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index f1c06205bf04c..472b52957ed7f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -114,6 +114,12 @@ private[history] abstract class ApplicationHistoryProvider {
    */
   def stop(): Unit = { }
 
+  /**
+   * Called when the server is starting up. Implement this function to init the provider and start
+   * background threads. With this function we can start provider later after it is created.
+   */
+  def start(): Unit = { }
+
   /**
    * Returns configuration data to be shown in the History Server home page.
    *
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
new file mode 100644
index 0000000000000..b18bf2665d6ce
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import scala.collection.mutable
+
+import org.apache.spark.SparkContext
+import org.apache.spark.deploy.history.EventFilter.FilterStatistics
+import org.apache.spark.internal.Logging
+import org.apache.spark.scheduler._
+import org.apache.spark.storage.BlockManagerId
+
+/**
+ * This class tracks both live jobs and live executors, and pass the list to the
+ * [[BasicEventFilter]] to help BasicEventFilter to reject finished jobs (+ stages/tasks/RDDs)
+ * and dead executors.
+ */
+private[spark] class BasicEventFilterBuilder extends SparkListener with EventFilterBuilder {
+  private val liveJobToStages = new mutable.HashMap[Int, Set[Int]]
+  private val stageToTasks = new mutable.HashMap[Int, mutable.Set[Long]]
+  private val stageToRDDs = new mutable.HashMap[Int, Set[Int]]
+  private val _liveExecutors = new mutable.HashSet[String]
+
+  private var totalJobs: Long = 0L
+  private var totalStages: Long = 0L
+  private var totalTasks: Long = 0L
+
+  private[history] def liveJobs: Set[Int] = liveJobToStages.keySet.toSet
+  private[history] def liveStages: Set[Int] = stageToRDDs.keySet.toSet
+  private[history] def liveTasks: Set[Long] = stageToTasks.values.flatten.toSet
+  private[history] def liveRDDs: Set[Int] = stageToRDDs.values.flatten.toSet
+  private[history] def liveExecutors: Set[String] = _liveExecutors.toSet
+
+  override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+    totalJobs += 1
+    totalStages += jobStart.stageIds.length
+    liveJobToStages += jobStart.jobId -> jobStart.stageIds.toSet
+  }
+
+  override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
+    val stages = liveJobToStages.getOrElse(jobEnd.jobId, Seq.empty[Int])
+    liveJobToStages -= jobEnd.jobId
+    stageToTasks --= stages
+    stageToRDDs --= stages
+  }
+
+  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+    val stageId = stageSubmitted.stageInfo.stageId
+    stageToRDDs.put(stageId, stageSubmitted.stageInfo.rddInfos.map(_.id).toSet)
+    stageToTasks.getOrElseUpdate(stageId, new mutable.HashSet[Long]())
+  }
+
+  override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
+    totalTasks += 1
+    stageToTasks.get(taskStart.stageId).foreach { tasks =>
+      tasks += taskStart.taskInfo.taskId
+    }
+  }
+
+  override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
+    _liveExecutors += executorAdded.executorId
+  }
+
+  override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
+    _liveExecutors -= executorRemoved.executorId
+  }
+
+  override def createFilter(): EventFilter = {
+    val stats = FilterStatistics(totalJobs, liveJobs.size, totalStages,
+      liveStages.size, totalTasks, liveTasks.size)
+
+    new BasicEventFilter(stats, liveJobs, liveStages, liveTasks, liveRDDs, liveExecutors)
+  }
+}
+
+/**
+ * This class provides the functionality to reject events which are related to the finished
+ * jobs based on the given information. This class only deals with job related events, and provides
+ * a PartialFunction which returns false for rejected events for finished jobs, returns true
+ * otherwise.
+ */
+private[spark] abstract class JobEventFilter(
+    stats: Option[FilterStatistics],
+    liveJobs: Set[Int],
+    liveStages: Set[Int],
+    liveTasks: Set[Long],
+    liveRDDs: Set[Int]) extends EventFilter with Logging {
+
+  logDebug(s"jobs : $liveJobs")
+  logDebug(s"stages : $liveStages")
+  logDebug(s"tasks : $liveTasks")
+  logDebug(s"RDDs : $liveRDDs")
+
+  override def statistics(): Option[FilterStatistics] = stats
+
+  protected val acceptFnForJobEvents: PartialFunction[SparkListenerEvent, Boolean] = {
+    case e: SparkListenerStageCompleted =>
+      liveStages.contains(e.stageInfo.stageId)
+    case e: SparkListenerStageSubmitted =>
+      liveStages.contains(e.stageInfo.stageId)
+    case e: SparkListenerTaskStart =>
+      liveTasks.contains(e.taskInfo.taskId)
+    case e: SparkListenerTaskGettingResult =>
+      liveTasks.contains(e.taskInfo.taskId)
+    case e: SparkListenerTaskEnd =>
+      liveTasks.contains(e.taskInfo.taskId)
+    case e: SparkListenerJobStart =>
+      liveJobs.contains(e.jobId)
+    case e: SparkListenerJobEnd =>
+      liveJobs.contains(e.jobId)
+    case e: SparkListenerUnpersistRDD =>
+      liveRDDs.contains(e.rddId)
+    case e: SparkListenerExecutorMetricsUpdate =>
+      e.accumUpdates.exists { case (taskId, stageId, _, _) =>
+        liveTasks.contains(taskId) || liveStages.contains(stageId)
+      }
+    case e: SparkListenerSpeculativeTaskSubmitted =>
+      liveStages.contains(e.stageId)
+  }
+}
+
+/**
+ * This class rejects events which are related to the finished jobs or dead executors,
+ * based on the given information. The events which are not related to the job and executor
+ * will be considered as "Don't mind".
+ */
+private[spark] class BasicEventFilter(
+    stats: FilterStatistics,
+    liveJobs: Set[Int],
+    liveStages: Set[Int],
+    liveTasks: Set[Long],
+    liveRDDs: Set[Int],
+    liveExecutors: Set[String])
+  extends JobEventFilter(
+    Some(stats),
+    liveJobs,
+    liveStages,
+    liveTasks,
+    liveRDDs) with Logging {
+
+  logDebug(s"live executors : $liveExecutors")
+
+  private val _acceptFn: PartialFunction[SparkListenerEvent, Boolean] = {
+    case e: SparkListenerExecutorAdded => liveExecutors.contains(e.executorId)
+    case e: SparkListenerExecutorRemoved => liveExecutors.contains(e.executorId)
+    case e: SparkListenerExecutorBlacklisted => liveExecutors.contains(e.executorId)
+    case e: SparkListenerExecutorUnblacklisted => liveExecutors.contains(e.executorId)
+    case e: SparkListenerStageExecutorMetrics => liveExecutors.contains(e.execId)
+    case e: SparkListenerBlockManagerAdded => acceptBlockManagerEvent(e.blockManagerId)
+    case e: SparkListenerBlockManagerRemoved => acceptBlockManagerEvent(e.blockManagerId)
+    case e: SparkListenerBlockUpdated => acceptBlockManagerEvent(e.blockUpdatedInfo.blockManagerId)
+  }
+
+  private def acceptBlockManagerEvent(blockManagerId: BlockManagerId): Boolean = {
+    blockManagerId.isDriver || liveExecutors.contains(blockManagerId.executorId)
+  }
+
+  override def acceptFn(): PartialFunction[SparkListenerEvent, Boolean] = {
+    _acceptFn.orElse(acceptFnForJobEvents)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala
new file mode 100644
index 0000000000000..a5f2394960b70
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import scala.io.{Codec, Source}
+import scala.util.control.NonFatal
+
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.json4s.jackson.JsonMethods.parse
+
+import org.apache.spark.deploy.history.EventFilter.FilterStatistics
+import org.apache.spark.internal.Logging
+import org.apache.spark.scheduler._
+import org.apache.spark.util.{JsonProtocol, Utils}
+
+/**
+ * EventFilterBuilder provides the interface to gather the information from events being received
+ * by [[SparkListenerInterface]], and create a new [[EventFilter]] instance which leverages
+ * information gathered to decide whether the event should be accepted or not.
+ */
+private[spark] trait EventFilterBuilder extends SparkListenerInterface {
+  def createFilter(): EventFilter
+}
+
+/** [[EventFilter]] decides whether the given event should be accepted or rejected. */
+private[spark] trait EventFilter {
+  /**
+   * Provide statistic information of event filter, which would be used for measuring the score
+   * of compaction.
+   *
+   * To simplify the condition, currently the fields of statistic are static, since major kinds of
+   * events compaction would filter out are job related event types. If the filter doesn't track
+   * with job related events, return None instead.
+   */
+  def statistics(): Option[FilterStatistics]
+
+  /**
+   * Classify whether the event is accepted or rejected by this filter.
+   *
+   * The method should return the partial function which matches the events where the filter can
+   * decide whether the event should be accepted or rejected. Otherwise it should leave the events
+   * be unmatched.
+   */
+  def acceptFn(): PartialFunction[SparkListenerEvent, Boolean]
+}
+
+private[spark] object EventFilter extends Logging {
+  case class FilterStatistics(
+      totalJobs: Long,
+      liveJobs: Long,
+      totalStages: Long,
+      liveStages: Long,
+      totalTasks: Long,
+      liveTasks: Long)
+
+  def applyFilterToFile(
+      fs: FileSystem,
+      filters: Seq[EventFilter],
+      path: Path,
+      onAccepted: (String, SparkListenerEvent) => Unit,
+      onRejected: (String, SparkListenerEvent) => Unit,
+      onUnidentified: String => Unit): Unit = {
+    Utils.tryWithResource(EventLogFileReader.openEventLog(path, fs)) { in =>
+      val lines = Source.fromInputStream(in)(Codec.UTF8).getLines()
+
+      lines.zipWithIndex.foreach { case (line, lineNum) =>
+        try {
+          val event = try {
+            Some(JsonProtocol.sparkEventFromJson(parse(line)))
+          } catch {
+            // ignore any exception occurred from unidentified json
+            case NonFatal(_) =>
+              onUnidentified(line)
+              None
+          }
+
+          event.foreach { e =>
+            val results = filters.flatMap(_.acceptFn().lift.apply(e))
+            if (results.nonEmpty && results.forall(_ == false)) {
+              onRejected(line, e)
+            } else {
+              onAccepted(line, e)
+            }
+          }
+        } catch {
+          case e: Exception =>
+            logError(s"Exception parsing Spark event log: ${path.getName}", e)
+            logError(s"Malformed line #$lineNum: $line\n")
+            throw e
+        }
+      }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileCompactor.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileCompactor.scala
new file mode 100644
index 0000000000000..8558f765175fc
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileCompactor.scala
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.IOException
+import java.net.URI
+import java.util.ServiceLoader
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.history.EventFilter.FilterStatistics
+import org.apache.spark.internal.Logging
+import org.apache.spark.scheduler.ReplayListenerBus
+import org.apache.spark.util.Utils
+
+/**
+ * This class compacts the old event log files into one compact file, via two phases reading:
+ *
+ * 1) Initialize available [[EventFilterBuilder]] instances, and replay the old event log files with
+ * builders, so that these builders can gather the information to create [[EventFilter]] instances.
+ * 2) Initialize [[EventFilter]] instances from [[EventFilterBuilder]] instances, and replay the
+ * old event log files with filters. Rewrite the events to the compact file which the filters decide
+ * to accept.
+ *
+ * This class will calculate the score based on statistic from [[EventFilter]] instances, which
+ * represents approximate rate of filtered-out events. Score is being calculated via applying
+ * heuristic; task events tend to take most size in event log.
+ */
+class EventLogFileCompactor(
+    sparkConf: SparkConf,
+    hadoopConf: Configuration,
+    fs: FileSystem,
+    maxFilesToRetain: Int,
+    compactionThresholdScore: Double) extends Logging {
+
+  require(maxFilesToRetain > 0, "Max event log files to retain should be higher than 0.")
+
+  /**
+   * Compacts the old event log files into one compact file, and clean old event log files being
+   * compacted away.
+   *
+   * This method assumes caller will provide the sorted list of files which are sorted by
+   * the index of event log file, with at most one compact file placed first if it exists.
+   *
+   * When compacting the files, the range of compaction for given file list is determined as:
+   * (first ~ the file where there're `maxFilesToRetain` files on the right side)
+   *
+   * This method skips compaction for some circumstances described below:
+   * - not enough files on the range of compaction
+   * - score is lower than the threshold of compaction (meaning compaction won't help much)
+   *
+   * If this method returns the compaction result as SUCCESS, caller needs to re-read the list
+   * of event log files, as new compact file is available as well as old event log files are
+   * removed.
+   */
+  def compact(eventLogFiles: Seq[FileStatus]): CompactionResult = {
+    assertPrecondition(eventLogFiles)
+
+    if (eventLogFiles.length < maxFilesToRetain) {
+      return CompactionResult(CompactionResultCode.NOT_ENOUGH_FILES, None)
+    }
+
+    val filesToCompact = findFilesToCompact(eventLogFiles)
+    if (filesToCompact.isEmpty) {
+      CompactionResult(CompactionResultCode.NOT_ENOUGH_FILES, None)
+    } else {
+      val builders = initializeBuilders(fs, filesToCompact.map(_.getPath))
+
+      val filters = builders.map(_.createFilter())
+      val minScore = filters.flatMap(_.statistics()).map(calculateScore).min
+
+      if (minScore < compactionThresholdScore) {
+        CompactionResult(CompactionResultCode.LOW_SCORE_FOR_COMPACTION, None)
+      } else {
+        rewrite(filters, filesToCompact)
+        cleanupCompactedFiles(filesToCompact)
+        CompactionResult(CompactionResultCode.SUCCESS, Some(
+          RollingEventLogFilesWriter.getEventLogFileIndex(filesToCompact.last.getPath.getName)))
+      }
+    }
+  }
+
+  private def assertPrecondition(eventLogFiles: Seq[FileStatus]): Unit = {
+    val idxCompactedFiles = eventLogFiles.zipWithIndex.filter { case (file, _) =>
+      EventLogFileWriter.isCompacted(file.getPath)
+    }
+    require(idxCompactedFiles.size < 2 && idxCompactedFiles.headOption.forall(_._2 == 0),
+      "The number of compact files should be at most 1, and should be placed first if exists.")
+  }
+
+  /**
+   * Loads all available EventFilterBuilders in classloader via ServiceLoader, and initializes
+   * them via replaying events in given files.
+   */
+  private def initializeBuilders(fs: FileSystem, files: Seq[Path]): Seq[EventFilterBuilder] = {
+    val bus = new ReplayListenerBus()
+
+    val builders = ServiceLoader.load(classOf[EventFilterBuilder],
+      Utils.getContextOrSparkClassLoader).asScala.toSeq
+    builders.foreach(bus.addListener)
+
+    files.foreach { log =>
+      Utils.tryWithResource(EventLogFileReader.openEventLog(log, fs)) { in =>
+        bus.replay(in, log.getName)
+      }
+    }
+
+    builders
+  }
+
+  private def calculateScore(stats: FilterStatistics): Double = {
+    // For now it's simply measuring how many task events will be filtered out (rejected)
+    // but it can be sophisticated later once we get more heuristic information and found
+    // the case where this simple calculation doesn't work.
+    (stats.totalTasks - stats.liveTasks) * 1.0 / stats.totalTasks
+  }
+
+  /**
+   * This method rewrites the event log files into one compact file: the compact file will only
+   * contain the events which pass the filters. Events will be dropped only when all filters
+   * decide to reject the event or don't mind about the event. Otherwise, the original line for
+   * the event is written to the compact file as it is.
+   */
+  private[history] def rewrite(
+      filters: Seq[EventFilter],
+      eventLogFiles: Seq[FileStatus]): String = {
+    require(eventLogFiles.nonEmpty)
+
+    val lastIndexEventLogPath = eventLogFiles.last.getPath
+    val logWriter = new CompactedEventLogFileWriter(lastIndexEventLogPath, "dummy", None,
+      lastIndexEventLogPath.getParent.toUri, sparkConf, hadoopConf)
+
+    logWriter.start()
+    eventLogFiles.foreach { file =>
+      EventFilter.applyFilterToFile(fs, filters, file.getPath,
+        onAccepted = (line, _) => logWriter.writeEvent(line, flushLogger = true),
+        onRejected = (_, _) => {},
+        onUnidentified = line => logWriter.writeEvent(line, flushLogger = true)
+      )
+    }
+    logWriter.stop()
+
+    logWriter.logPath
+  }
+
+  private def cleanupCompactedFiles(files: Seq[FileStatus]): Unit = {
+    files.foreach { file =>
+      var deleted = false
+      try {
+        deleted = fs.delete(file.getPath, true)
+      } catch {
+        case _: IOException =>
+      }
+      if (!deleted) {
+        logWarning(s"Failed to remove ${file.getPath} / skip removing.")
+      }
+    }
+  }
+
+  private def findFilesToCompact(eventLogFiles: Seq[FileStatus]): Seq[FileStatus] = {
+    val numNormalEventLogFiles = {
+      if (EventLogFileWriter.isCompacted(eventLogFiles.head.getPath)) {
+        eventLogFiles.length - 1
+      } else {
+        eventLogFiles.length
+      }
+    }
+
+    // This avoids compacting only compact file.
+    if (numNormalEventLogFiles > maxFilesToRetain) {
+      eventLogFiles.dropRight(maxFilesToRetain)
+    } else {
+      Seq.empty
+    }
+  }
+}
+
+/**
+ * Describes the result of compaction.
+ *
+ * @param code The result of compaction.
+ * @param compactIndex The index of compact file if the compaction is successful.
+ *                     Otherwise it will be None.
+ */
+case class CompactionResult(code: CompactionResultCode.Value, compactIndex: Option[Long])
+
+object CompactionResultCode extends Enumeration {
+  val SUCCESS, NOT_ENOUGH_FILES, LOW_SCORE_FOR_COMPACTION = Value
+}
+
+/**
+ * This class helps to write compact file; to avoid reimplementing everything, it extends
+ * [[SingleEventLogFileWriter]], but only `originalFilePath` is used to determine the
+ * path of compact file.
+ */
+private class CompactedEventLogFileWriter(
+    originalFilePath: Path,
+    appId: String,
+    appAttemptId: Option[String],
+    logBaseDir: URI,
+    sparkConf: SparkConf,
+    hadoopConf: Configuration)
+  extends SingleEventLogFileWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf) {
+
+  override val logPath: String = originalFilePath.toUri.toString + EventLogFileWriter.COMPACTED
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
new file mode 100644
index 0000000000000..9f63a6441a838
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.{BufferedInputStream, InputStream}
+import java.util.concurrent.ConcurrentHashMap
+import java.util.zip.{ZipEntry, ZipOutputStream}
+
+import com.google.common.io.ByteStreams
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.apache.hadoop.hdfs.DFSInputStream
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.history.EventLogFileWriter.codecName
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.util.Utils
+
+/** The base class of reader which will read the information of event log file(s). */
+abstract class EventLogFileReader(
+    protected val fileSystem: FileSystem,
+    val rootPath: Path) {
+
+  protected def fileSizeForDFS(path: Path): Option[Long] = {
+    Utils.tryWithResource(fileSystem.open(path)) { in =>
+      in.getWrappedStream match {
+        case dfsIn: DFSInputStream => Some(dfsIn.getFileLength)
+        case _ => None
+      }
+    }
+  }
+
+  protected def addFileAsZipEntry(
+      zipStream: ZipOutputStream,
+      path: Path,
+      entryName: String): Unit = {
+    Utils.tryWithResource(fileSystem.open(path, 1 * 1024 * 1024)) { inputStream =>
+      zipStream.putNextEntry(new ZipEntry(entryName))
+      ByteStreams.copy(inputStream, zipStream)
+      zipStream.closeEntry()
+    }
+  }
+
+  /** Returns the last index of event log files. None for single event log file. */
+  def lastIndex: Option[Long]
+
+  /**
+   * Returns the size of file for the last index of event log files. Returns its size for
+   * single event log file.
+   */
+  def fileSizeForLastIndex: Long
+
+  /** Returns whether the application is completed. */
+  def completed: Boolean
+
+  /**
+   * Returns the size of file for the last index (itself for single event log file) of event log
+   * files, only when underlying input stream is DFSInputStream. Otherwise returns None.
+   */
+  def fileSizeForLastIndexForDFS: Option[Long]
+
+  /**
+   * Returns the modification time for the last index (itself for single event log file)
+   * of event log files.
+   */
+  def modificationTime: Long
+
+  /**
+   * This method compresses the files passed in, and writes the compressed data out into the
+   * ZipOutputStream passed in. Each file is written as a new ZipEntry with its name being
+   * the name of the file being compressed.
+   */
+  def zipEventLogFiles(zipStream: ZipOutputStream): Unit
+
+  /** Returns all available event log files. */
+  def listEventLogFiles: Seq[FileStatus]
+
+  /** Returns the short compression name if being used. None if it's uncompressed. */
+  def compressionCodec: Option[String]
+
+  /** Returns the size of all event log files. */
+  def totalSize: Long
+}
+
+object EventLogFileReader {
+  // A cache for compression codecs to avoid creating the same codec many times
+  private val codecMap = new ConcurrentHashMap[String, CompressionCodec]()
+
+  def apply(
+      fs: FileSystem,
+      path: Path,
+      lastIndex: Option[Long]): EventLogFileReader = {
+    lastIndex match {
+      case Some(_) => new RollingEventLogFilesFileReader(fs, path)
+      case None => new SingleFileEventLogFileReader(fs, path)
+    }
+  }
+
+  def apply(fs: FileSystem, path: Path): Option[EventLogFileReader] = {
+    apply(fs, fs.getFileStatus(path))
+  }
+
+  def apply(fs: FileSystem, status: FileStatus): Option[EventLogFileReader] = {
+    if (isSingleEventLog(status)) {
+      Some(new SingleFileEventLogFileReader(fs, status.getPath))
+    } else if (isRollingEventLogs(status)) {
+      Some(new RollingEventLogFilesFileReader(fs, status.getPath))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Opens an event log file and returns an input stream that contains the event data.
+   *
+   * @return input stream that holds one JSON record per line.
+   */
+  def openEventLog(log: Path, fs: FileSystem): InputStream = {
+    val in = new BufferedInputStream(fs.open(log))
+    try {
+      val codec = codecName(log).map { c =>
+        codecMap.computeIfAbsent(c, CompressionCodec.createCodec(new SparkConf, _))
+      }
+      codec.map(_.compressedContinuousInputStream(in)).getOrElse(in)
+    } catch {
+      case e: Throwable =>
+        in.close()
+        throw e
+    }
+  }
+
+  private def isSingleEventLog(status: FileStatus): Boolean = {
+    !status.isDirectory &&
+      // FsHistoryProvider used to generate a hidden file which can't be read.  Accidentally
+      // reading a garbage file is safe, but we would log an error which can be scary to
+      // the end-user.
+      !status.getPath.getName.startsWith(".")
+  }
+
+  private def isRollingEventLogs(status: FileStatus): Boolean = {
+    RollingEventLogFilesWriter.isEventLogDir(status)
+  }
+}
+
+/**
+ * The reader which will read the information of single event log file.
+ *
+ * This reader gets the status of event log file only once when required;
+ * It may not give "live" status of file that could be changing concurrently, and
+ * FileNotFoundException could occur if the log file is renamed before getting the
+ * status of log file.
+ */
+class SingleFileEventLogFileReader(
+    fs: FileSystem,
+    path: Path) extends EventLogFileReader(fs, path) {
+  private lazy val status = fileSystem.getFileStatus(rootPath)
+
+  override def lastIndex: Option[Long] = None
+
+  override def fileSizeForLastIndex: Long = status.getLen
+
+  override def completed: Boolean = !rootPath.getName.stripSuffix(EventLogFileWriter.COMPACTED)
+    .endsWith(EventLogFileWriter.IN_PROGRESS)
+
+  override def fileSizeForLastIndexForDFS: Option[Long] = {
+    if (completed) {
+      Some(fileSizeForLastIndex)
+    } else {
+      fileSizeForDFS(rootPath)
+    }
+  }
+
+  override def modificationTime: Long = status.getModificationTime
+
+  override def zipEventLogFiles(zipStream: ZipOutputStream): Unit = {
+    addFileAsZipEntry(zipStream, rootPath, rootPath.getName)
+  }
+
+  override def listEventLogFiles: Seq[FileStatus] = Seq(status)
+
+  override def compressionCodec: Option[String] = EventLogFileWriter.codecName(rootPath)
+
+  override def totalSize: Long = fileSizeForLastIndex
+}
+
+/**
+ * The reader which will read the information of rolled multiple event log files.
+ *
+ * This reader lists the files only once; if caller would like to play with updated list,
+ * it needs to create another reader instance.
+ */
+class RollingEventLogFilesFileReader(
+    fs: FileSystem,
+    path: Path) extends EventLogFileReader(fs, path) {
+  import RollingEventLogFilesWriter._
+
+  private lazy val files: Seq[FileStatus] = {
+    val ret = fs.listStatus(rootPath).toSeq
+    require(ret.exists(isEventLogFile), "Log directory must contain at least one event log file!")
+    require(ret.exists(isAppStatusFile), "Log directory must contain an appstatus file!")
+    ret
+  }
+
+  private lazy val appStatusFile = files.find(isAppStatusFile).get
+
+  private lazy val eventLogFiles: Seq[FileStatus] = {
+    val eventLogFiles = files.filter(isEventLogFile).sortBy { status =>
+      val filePath = status.getPath
+      var idx = getEventLogFileIndex(filePath.getName).toDouble
+      // trick to place compacted file later than normal file if index is same.
+      if (EventLogFileWriter.isCompacted(filePath)) {
+        idx += 0.1
+      }
+      idx
+    }
+    val filesToRead = dropBeforeLastCompactFile(eventLogFiles)
+    val indices = filesToRead.map { file => getEventLogFileIndex(file.getPath.getName) }
+    require((indices.head to indices.last) == indices, "Found missing event log file, expected" +
+      s" indices: ${indices.head to indices.last}, actual: ${indices}")
+    filesToRead
+  }
+
+  override def lastIndex: Option[Long] = Some(
+    getEventLogFileIndex(lastEventLogFile.getPath.getName))
+
+  override def fileSizeForLastIndex: Long = lastEventLogFile.getLen
+
+  override def completed: Boolean = {
+    !appStatusFile.getPath.getName.endsWith(EventLogFileWriter.IN_PROGRESS)
+  }
+
+  override def fileSizeForLastIndexForDFS: Option[Long] = {
+    if (completed) {
+      Some(fileSizeForLastIndex)
+    } else {
+      fileSizeForDFS(lastEventLogFile.getPath)
+    }
+  }
+
+  override def modificationTime: Long = lastEventLogFile.getModificationTime
+
+  override def zipEventLogFiles(zipStream: ZipOutputStream): Unit = {
+    val dirEntryName = rootPath.getName + "/"
+    zipStream.putNextEntry(new ZipEntry(dirEntryName))
+    files.foreach { file =>
+      addFileAsZipEntry(zipStream, file.getPath, dirEntryName + file.getPath.getName)
+    }
+  }
+
+  override def listEventLogFiles: Seq[FileStatus] = eventLogFiles
+
+  override def compressionCodec: Option[String] = {
+    EventLogFileWriter.codecName(eventLogFiles.head.getPath)
+  }
+
+  override def totalSize: Long = eventLogFiles.map(_.getLen).sum
+
+  private def lastEventLogFile: FileStatus = eventLogFiles.last
+
+  private def dropBeforeLastCompactFile(eventLogFiles: Seq[FileStatus]): Seq[FileStatus] = {
+    val lastCompactedFileIdx = eventLogFiles.lastIndexWhere { fs =>
+      EventLogFileWriter.isCompacted(fs.getPath)
+    }
+    eventLogFiles.drop(lastCompactedFileIdx)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala
new file mode 100644
index 0000000000000..1d58d054b7825
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala
@@ -0,0 +1,423 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io._
+import java.net.URI
+import java.nio.charset.StandardCharsets
+
+import org.apache.commons.compress.utils.CountingOutputStream
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, FileSystem, FSDataOutputStream, Path}
+import org.apache.hadoop.fs.permission.FsPermission
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.util.Utils
+
+/**
+ * The base class of writer which will write event logs into file.
+ *
+ * The following configurable parameters are available to tune the behavior of writing:
+ *   spark.eventLog.compress - Whether to compress logged events
+ *   spark.eventLog.compression.codec - The codec to compress logged events
+ *   spark.eventLog.overwrite - Whether to overwrite any existing files
+ *   spark.eventLog.buffer.kb - Buffer size to use when writing to output streams
+ *
+ * Note that descendant classes can maintain its own parameters: refer the javadoc of each class
+ * for more details.
+ *
+ * NOTE: CountingOutputStream being returned by "initLogFile" counts "non-compressed" bytes.
+ */
+abstract class EventLogFileWriter(
+    appId: String,
+    appAttemptId : Option[String],
+    logBaseDir: URI,
+    sparkConf: SparkConf,
+    hadoopConf: Configuration) extends Logging {
+
+  protected val shouldCompress = sparkConf.get(EVENT_LOG_COMPRESS)
+  protected val shouldOverwrite = sparkConf.get(EVENT_LOG_OVERWRITE)
+  protected val outputBufferSize = sparkConf.get(EVENT_LOG_OUTPUT_BUFFER_SIZE).toInt
+  protected val fileSystem = Utils.getHadoopFileSystem(logBaseDir, hadoopConf)
+  protected val compressionCodec =
+    if (shouldCompress) {
+      Some(CompressionCodec.createCodec(sparkConf, sparkConf.get(EVENT_LOG_COMPRESSION_CODEC)))
+    } else {
+      None
+    }
+
+  private[history] val compressionCodecName = compressionCodec.map { c =>
+    CompressionCodec.getShortName(c.getClass.getName)
+  }
+
+  // Only defined if the file system scheme is not local
+  protected var hadoopDataStream: Option[FSDataOutputStream] = None
+  protected var writer: Option[PrintWriter] = None
+
+  protected def requireLogBaseDirAsDirectory(): Unit = {
+    if (!fileSystem.getFileStatus(new Path(logBaseDir)).isDirectory) {
+      throw new IllegalArgumentException(s"Log directory $logBaseDir is not a directory.")
+    }
+  }
+
+  protected def initLogFile(path: Path)(fnSetupWriter: OutputStream => PrintWriter): Unit = {
+    if (shouldOverwrite && fileSystem.delete(path, true)) {
+      logWarning(s"Event log $path already exists. Overwriting...")
+    }
+
+    val defaultFs = FileSystem.getDefaultUri(hadoopConf).getScheme
+    val isDefaultLocal = defaultFs == null || defaultFs == "file"
+    val uri = path.toUri
+
+    // The Hadoop LocalFileSystem (r1.0.4) has known issues with syncing (HADOOP-7844).
+    // Therefore, for local files, use FileOutputStream instead.
+    val dstream =
+      if ((isDefaultLocal && uri.getScheme == null) || uri.getScheme == "file") {
+        new FileOutputStream(uri.getPath)
+      } else {
+        hadoopDataStream = Some(
+          SparkHadoopUtil.createFile(fileSystem, path, sparkConf.get(EVENT_LOG_ALLOW_EC)))
+        hadoopDataStream.get
+      }
+
+    try {
+      val cstream = compressionCodec.map(_.compressedContinuousOutputStream(dstream))
+        .getOrElse(dstream)
+      val bstream = new BufferedOutputStream(cstream, outputBufferSize)
+      fileSystem.setPermission(path, EventLogFileWriter.LOG_FILE_PERMISSIONS)
+      logInfo(s"Logging events to $path")
+      writer = Some(fnSetupWriter(bstream))
+    } catch {
+      case e: Exception =>
+        dstream.close()
+        throw e
+    }
+  }
+
+  protected def writeLine(line: String, flushLogger: Boolean = false): Unit = {
+    // scalastyle:off println
+    writer.foreach(_.println(line))
+    // scalastyle:on println
+    if (flushLogger) {
+      writer.foreach(_.flush())
+      hadoopDataStream.foreach(_.hflush())
+    }
+  }
+
+  protected def closeWriter(): Unit = {
+    writer.foreach(_.close())
+  }
+
+  protected def renameFile(src: Path, dest: Path, overwrite: Boolean): Unit = {
+    if (fileSystem.exists(dest)) {
+      if (overwrite) {
+        logWarning(s"Event log $dest already exists. Overwriting...")
+        if (!fileSystem.delete(dest, true)) {
+          logWarning(s"Error deleting $dest")
+        }
+      } else {
+        throw new IOException(s"Target log file already exists ($dest)")
+      }
+    }
+    fileSystem.rename(src, dest)
+    // touch file to ensure modtime is current across those filesystems where rename()
+    // does not set it but support setTimes() instead; it's a no-op on most object stores
+    try {
+      fileSystem.setTimes(dest, System.currentTimeMillis(), -1)
+    } catch {
+      case e: Exception => logDebug(s"failed to set time of $dest", e)
+    }
+  }
+
+  /** initialize writer for event logging */
+  def start(): Unit
+
+  /** writes JSON format of event to file */
+  def writeEvent(eventJson: String, flushLogger: Boolean = false): Unit
+
+  /** stops writer - indicating the application has been completed */
+  def stop(): Unit
+
+  /** returns representative path of log. for tests only. */
+  def logPath: String
+}
+
+object EventLogFileWriter {
+  // Suffix applied to the names of files still being written by applications.
+  val IN_PROGRESS = ".inprogress"
+  val COMPACTED = ".compact"
+
+  val LOG_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort)
+
+  def apply(
+      appId: String,
+      appAttemptId: Option[String],
+      logBaseDir: URI,
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): EventLogFileWriter = {
+    if (sparkConf.get(EVENT_LOG_ENABLE_ROLLING)) {
+      new RollingEventLogFilesWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf)
+    } else {
+      new SingleEventLogFileWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf)
+    }
+  }
+
+  def nameForAppAndAttempt(appId: String, appAttemptId: Option[String]): String = {
+    val base = Utils.sanitizeDirName(appId)
+    if (appAttemptId.isDefined) {
+      base + "_" + Utils.sanitizeDirName(appAttemptId.get)
+    } else {
+      base
+    }
+  }
+
+  def codecName(log: Path): Option[String] = {
+    // Compression codec is encoded as an extension, e.g. app_123.lzf
+    // Since we sanitize the app ID to not include periods, it is safe to split on it
+    val logName = log.getName.stripSuffix(COMPACTED).stripSuffix(IN_PROGRESS)
+    logName.split("\\.").tail.lastOption
+  }
+
+  def isCompacted(log: Path): Boolean = log.getName.endsWith(COMPACTED)
+}
+
+/**
+ * The writer to write event logs into single file.
+ */
+class SingleEventLogFileWriter(
+    appId: String,
+    appAttemptId : Option[String],
+    logBaseDir: URI,
+    sparkConf: SparkConf,
+    hadoopConf: Configuration)
+  extends EventLogFileWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf) {
+
+  override val logPath: String = SingleEventLogFileWriter.getLogPath(logBaseDir, appId,
+    appAttemptId, compressionCodecName)
+
+  protected def inProgressPath = logPath + EventLogFileWriter.IN_PROGRESS
+
+  override def start(): Unit = {
+    requireLogBaseDirAsDirectory()
+
+    initLogFile(new Path(inProgressPath)) { os =>
+      new PrintWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8))
+    }
+  }
+
+  override def writeEvent(eventJson: String, flushLogger: Boolean = false): Unit = {
+    writeLine(eventJson, flushLogger)
+  }
+
+  /**
+   * Stop logging events. The event log file will be renamed so that it loses the
+   * ".inprogress" suffix.
+   */
+  override def stop(): Unit = {
+    closeWriter()
+    renameFile(new Path(inProgressPath), new Path(logPath), shouldOverwrite)
+  }
+}
+
+object SingleEventLogFileWriter {
+  /**
+   * Return a file-system-safe path to the log file for the given application.
+   *
+   * Note that because we currently only create a single log file for each application,
+   * we must encode all the information needed to parse this event log in the file name
+   * instead of within the file itself. Otherwise, if the file is compressed, for instance,
+   * we won't know which codec to use to decompress the metadata needed to open the file in
+   * the first place.
+   *
+   * The log file name will identify the compression codec used for the contents, if any.
+   * For example, app_123 for an uncompressed log, app_123.lzf for an LZF-compressed log.
+   *
+   * @param logBaseDir Directory where the log file will be written.
+   * @param appId A unique app ID.
+   * @param appAttemptId A unique attempt id of appId. May be the empty string.
+   * @param compressionCodecName Name to identify the codec used to compress the contents
+   *                             of the log, or None if compression is not enabled.
+   * @return A path which consists of file-system-safe characters.
+   */
+  def getLogPath(
+      logBaseDir: URI,
+      appId: String,
+      appAttemptId: Option[String],
+      compressionCodecName: Option[String] = None): String = {
+    val codec = compressionCodecName.map("." + _).getOrElse("")
+    new Path(logBaseDir).toString.stripSuffix("/") + "/" +
+      EventLogFileWriter.nameForAppAndAttempt(appId, appAttemptId) + codec
+  }
+}
+
+/**
+ * The writer to write event logs into multiple log files, rolled over via configured size.
+ *
+ * The class creates one directory per application, and stores event log files as well as
+ * metadata files. The name of directory and files in the directory would follow:
+ *
+ * - The name of directory: eventlog_v2_appId(_[appAttemptId])
+ * - The prefix of name on event files: events_[index]_[appId](_[appAttemptId])(.[codec])
+ *   - "index" would be monotonically increasing value (say, sequence)
+ * - The name of metadata (app. status) file name: appstatus_[appId](_[appAttemptId])(.inprogress)
+ *
+ * The writer will roll over the event log file when configured size is reached. Note that the
+ * writer doesn't check the size on file being open for write: the writer tracks the count of bytes
+ * written before compression is applied.
+ *
+ * For metadata files, the class will leverage zero-byte file, as it provides minimized cost.
+ */
+class RollingEventLogFilesWriter(
+    appId: String,
+    appAttemptId : Option[String],
+    logBaseDir: URI,
+    sparkConf: SparkConf,
+    hadoopConf: Configuration)
+  extends EventLogFileWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf) {
+
+  import RollingEventLogFilesWriter._
+
+  private val eventFileMaxLength = sparkConf.get(EVENT_LOG_ROLLING_MAX_FILE_SIZE)
+
+  private val logDirForAppPath = getAppEventLogDirPath(logBaseDir, appId, appAttemptId)
+
+  private var countingOutputStream: Option[CountingOutputStream] = None
+
+  // index and event log path will be updated soon in rollEventLogFile, which `start` will call
+  private var index: Long = 0L
+  private var currentEventLogFilePath: Path = _
+
+  override def start(): Unit = {
+    requireLogBaseDirAsDirectory()
+
+    if (fileSystem.exists(logDirForAppPath) && shouldOverwrite) {
+      fileSystem.delete(logDirForAppPath, true)
+    }
+
+    if (fileSystem.exists(logDirForAppPath)) {
+      throw new IOException(s"Target log directory already exists ($logDirForAppPath)")
+    }
+
+    fileSystem.mkdirs(logDirForAppPath, EventLogFileWriter.LOG_FILE_PERMISSIONS)
+    createAppStatusFile(inProgress = true)
+    rollEventLogFile()
+  }
+
+  override def writeEvent(eventJson: String, flushLogger: Boolean = false): Unit = {
+    writer.foreach { w =>
+      val currentLen = countingOutputStream.get.getBytesWritten
+      if (currentLen + eventJson.length > eventFileMaxLength) {
+        rollEventLogFile()
+      }
+    }
+
+    writeLine(eventJson, flushLogger)
+  }
+
+  /** exposed for testing only */
+  private[history] def rollEventLogFile(): Unit = {
+    closeWriter()
+
+    index += 1
+    currentEventLogFilePath = getEventLogFilePath(logDirForAppPath, appId, appAttemptId, index,
+      compressionCodecName)
+
+    initLogFile(currentEventLogFilePath) { os =>
+      countingOutputStream = Some(new CountingOutputStream(os))
+      new PrintWriter(
+        new OutputStreamWriter(countingOutputStream.get, StandardCharsets.UTF_8))
+    }
+  }
+
+  override def stop(): Unit = {
+    closeWriter()
+    val appStatusPathIncomplete = getAppStatusFilePath(logDirForAppPath, appId, appAttemptId,
+      inProgress = true)
+    val appStatusPathComplete = getAppStatusFilePath(logDirForAppPath, appId, appAttemptId,
+      inProgress = false)
+    renameFile(appStatusPathIncomplete, appStatusPathComplete, overwrite = true)
+  }
+
+  override def logPath: String = logDirForAppPath.toString
+
+  private def createAppStatusFile(inProgress: Boolean): Unit = {
+    val appStatusPath = getAppStatusFilePath(logDirForAppPath, appId, appAttemptId, inProgress)
+    val outputStream = fileSystem.create(appStatusPath)
+    // we intentionally create zero-byte file to minimize the cost
+    outputStream.close()
+  }
+}
+
+object RollingEventLogFilesWriter {
+  private[history] val EVENT_LOG_DIR_NAME_PREFIX = "eventlog_v2_"
+  private[history] val EVENT_LOG_FILE_NAME_PREFIX = "events_"
+  private[history] val APPSTATUS_FILE_NAME_PREFIX = "appstatus_"
+
+  def getAppEventLogDirPath(logBaseDir: URI, appId: String, appAttemptId: Option[String]): Path =
+    new Path(new Path(logBaseDir), EVENT_LOG_DIR_NAME_PREFIX +
+      EventLogFileWriter.nameForAppAndAttempt(appId, appAttemptId))
+
+  def getAppStatusFilePath(
+      appLogDir: Path,
+      appId: String,
+      appAttemptId: Option[String],
+      inProgress: Boolean): Path = {
+    val base = APPSTATUS_FILE_NAME_PREFIX +
+      EventLogFileWriter.nameForAppAndAttempt(appId, appAttemptId)
+    val name = if (inProgress) base + EventLogFileWriter.IN_PROGRESS else base
+    new Path(appLogDir, name)
+  }
+
+  def getEventLogFilePath(
+      appLogDir: Path,
+      appId: String,
+      appAttemptId: Option[String],
+      index: Long,
+      codecName: Option[String]): Path = {
+    val base = s"${EVENT_LOG_FILE_NAME_PREFIX}${index}_" +
+      EventLogFileWriter.nameForAppAndAttempt(appId, appAttemptId)
+    val codec = codecName.map("." + _).getOrElse("")
+    new Path(appLogDir, base + codec)
+  }
+
+  def isEventLogDir(status: FileStatus): Boolean = {
+    status.isDirectory && status.getPath.getName.startsWith(EVENT_LOG_DIR_NAME_PREFIX)
+  }
+
+  def isEventLogFile(fileName: String): Boolean = {
+    fileName.startsWith(EVENT_LOG_FILE_NAME_PREFIX)
+  }
+
+  def isEventLogFile(status: FileStatus): Boolean = {
+    status.isFile && isEventLogFile(status.getPath.getName)
+  }
+
+  def isAppStatusFile(status: FileStatus): Boolean = {
+    status.isFile && status.getPath.getName.startsWith(APPSTATUS_FILE_NAME_PREFIX)
+  }
+
+  def getEventLogFileIndex(eventLogFileName: String): Long = {
+    require(isEventLogFile(eventLogFileName), "Not an event log file!")
+    val index = eventLogFileName.stripPrefix(EVENT_LOG_FILE_NAME_PREFIX).split("_")(0)
+    index.toLong
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 5f9b18ce01279..99d3eceb1121a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -18,23 +18,21 @@
 package org.apache.spark.deploy.history
 
 import java.io.{File, FileNotFoundException, IOException}
+import java.lang.{Long => JLong}
 import java.nio.file.Files
-import java.util.{Date, ServiceLoader}
+import java.util.{Date, NoSuchElementException, ServiceLoader}
 import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future, TimeUnit}
-import java.util.zip.{ZipEntry, ZipOutputStream}
+import java.util.zip.ZipOutputStream
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.concurrent.ExecutionException
 import scala.io.Source
-import scala.util.Try
 import scala.xml.Node
 
 import com.fasterxml.jackson.annotation.JsonIgnore
-import com.google.common.io.ByteStreams
-import com.google.common.util.concurrent.MoreExecutors
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
-import org.apache.hadoop.hdfs.{DFSInputStream, DistributedFileSystem}
+import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.apache.hadoop.hdfs.protocol.HdfsConstants
 import org.apache.hadoop.security.AccessControlException
 import org.fusesource.leveldbjni.internal.NativeDB
@@ -47,7 +45,6 @@ import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.internal.config.UI._
-import org.apache.spark.io.CompressionCodec
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.ReplayListenerBus._
 import org.apache.spark.status._
@@ -161,6 +158,29 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     new HistoryServerDiskManager(conf, path, listing, clock)
   }
 
+  private val fileCompactor = new EventLogFileCompactor(conf, hadoopConf, fs,
+    conf.get(EVENT_LOG_ROLLING_MAX_FILES_TO_RETAIN), conf.get(EVENT_LOG_COMPACTION_SCORE_THRESHOLD))
+
+  // Used to store the paths, which are being processed. This enable the replay log tasks execute
+  // asynchronously and make sure that checkForLogs would not process a path repeatedly.
+  private val processing = ConcurrentHashMap.newKeySet[String]
+
+  private def isProcessing(path: Path): Boolean = {
+    processing.contains(path.getName)
+  }
+
+  private def isProcessing(info: LogInfo): Boolean = {
+    processing.contains(info.logPath.split("/").last)
+  }
+
+  private def processing(path: Path): Unit = {
+    processing.add(path.getName)
+  }
+
+  private def endProcessing(path: Path): Unit = {
+    processing.remove(path.getName)
+  }
+
   private val blacklist = new ConcurrentHashMap[String, Long]
 
   // Visible for testing
@@ -196,11 +216,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     if (!Utils.isTesting) {
       ThreadUtils.newDaemonFixedThreadPool(NUM_PROCESSING_THREADS, "log-replay-executor")
     } else {
-      MoreExecutors.sameThreadExecutor()
+      ThreadUtils.sameThreadExecutorService
     }
   }
 
-  val initThread = initialize()
+  var initThread: Thread = null
 
   private[history] def initialize(): Thread = {
     if (!isFsInSafeMode()) {
@@ -353,10 +373,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     val ui = SparkUI.create(None, new HistoryAppStatusStore(conf, kvstore), conf, secManager,
       app.info.name, HistoryServer.getAttemptURI(appId, attempt.info.attemptId),
       attempt.info.startTime.getTime(), attempt.info.appSparkVersion)
-    loadPlugins().foreach(_.setupUI(ui))
 
-    val loadedUI = LoadedAppUI(ui)
+    // place the tab in UI based on the display order
+    loadPlugins().toSeq.sortBy(_.displayOrder).foreach(_.setupUI(ui))
 
+    val loadedUI = LoadedAppUI(ui)
     synchronized {
       activeUIs((appId, attemptId)) = loadedUI
     }
@@ -384,6 +405,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     Map("Event log directory" -> logDir.toString) ++ safeMode
   }
 
+  override def start(): Unit = {
+    initThread = initialize()
+  }
+
   override def stop(): Unit = {
     try {
       if (initThread != null && initThread.isAlive()) {
@@ -435,27 +460,27 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       logDebug(s"Scanning $logDir with lastScanTime==$lastScanTime")
 
       val updated = Option(fs.listStatus(new Path(logDir))).map(_.toSeq).getOrElse(Nil)
-        .filter { entry =>
-          !entry.isDirectory() &&
-            // FsHistoryProvider used to generate a hidden file which can't be read.  Accidentally
-            // reading a garbage file is safe, but we would log an error which can be scary to
-            // the end-user.
-            !entry.getPath().getName().startsWith(".") &&
-            !isBlacklisted(entry.getPath)
-        }
-        .filter { entry =>
+        .filter { entry => !isBlacklisted(entry.getPath) }
+        .filter { entry => !isProcessing(entry.getPath) }
+        .flatMap { entry => EventLogFileReader(fs, entry) }
+        .filter { reader =>
           try {
-            val info = listing.read(classOf[LogInfo], entry.getPath().toString())
+            val info = listing.read(classOf[LogInfo], reader.rootPath.toString())
 
             if (info.appId.isDefined) {
               // If the SHS view has a valid application, update the time the file was last seen so
               // that the entry is not deleted from the SHS listing. Also update the file size, in
               // case the code below decides we don't need to parse the log.
-              listing.write(info.copy(lastProcessed = newLastScanTime, fileSize = entry.getLen()))
+              listing.write(info.copy(lastProcessed = newLastScanTime,
+                fileSize = reader.fileSizeForLastIndex,
+                lastIndex = reader.lastIndex,
+                isComplete = reader.completed))
             }
 
-            if (shouldReloadLog(info, entry)) {
-              if (info.appId.isDefined && fastInProgressParsing) {
+            if (shouldReloadLog(info, reader)) {
+              // ignore fastInProgressParsing when rolling event log is enabled on the log path,
+              // to ensure proceeding compaction even fastInProgressParsing is turned on.
+              if (info.appId.isDefined && reader.lastIndex.isEmpty && fastInProgressParsing) {
                 // When fast in-progress parsing is on, we don't need to re-parse when the
                 // size changes, but we do need to invalidate any existing UIs.
                 // Also, we need to update the `lastUpdated time` to display the updated time in
@@ -468,6 +493,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
                       attempt.info.copy(lastUpdated = new Date(newLastScanTime)),
                       attempt.logPath,
                       attempt.fileSize,
+                      attempt.lastIndex,
                       attempt.adminAcls,
                       attempt.viewAcls,
                       attempt.adminAclsGroups,
@@ -493,56 +519,23 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
               // If the file is currently not being tracked by the SHS, add an entry for it and try
               // to parse it. This will allow the cleaner code to detect the file as stale later on
               // if it was not possible to parse it.
-              listing.write(LogInfo(entry.getPath().toString(), newLastScanTime, LogType.EventLogs,
-                None, None, entry.getLen()))
-              entry.getLen() > 0
+              listing.write(LogInfo(reader.rootPath.toString(), newLastScanTime, LogType.EventLogs,
+                None, None, reader.fileSizeForLastIndex, reader.lastIndex, None,
+                reader.completed))
+              reader.fileSizeForLastIndex > 0
           }
         }
         .sortWith { case (entry1, entry2) =>
-          entry1.getModificationTime() > entry2.getModificationTime()
+          entry1.modificationTime > entry2.modificationTime
         }
 
       if (updated.nonEmpty) {
-        logDebug(s"New/updated attempts found: ${updated.size} ${updated.map(_.getPath)}")
+        logDebug(s"New/updated attempts found: ${updated.size} ${updated.map(_.rootPath)}")
       }
 
-      val tasks = updated.flatMap { entry =>
-        try {
-          val task: Future[Unit] = replayExecutor.submit(
-            () => mergeApplicationListing(entry, newLastScanTime, true))
-          Some(task -> entry.getPath)
-        } catch {
-          // let the iteration over the updated entries break, since an exception on
-          // replayExecutor.submit (..) indicates the ExecutorService is unable
-          // to take any more submissions at this time
-          case e: Exception =>
-            logError(s"Exception while submitting event log for replay", e)
-            None
-        }
-      }
-
-      pendingReplayTasksCount.addAndGet(tasks.size)
-
-      // Wait for all tasks to finish. This makes sure that checkForLogs
-      // is not scheduled again while some tasks are already running in
-      // the replayExecutor.
-      tasks.foreach { case (task, path) =>
-        try {
-          task.get()
-        } catch {
-          case e: InterruptedException =>
-            throw e
-          case e: ExecutionException if e.getCause.isInstanceOf[AccessControlException] =>
-            // We don't have read permissions on the log file
-            logWarning(s"Unable to read log $path", e.getCause)
-            blacklist(path)
-            // SPARK-28157 We should remove this blacklisted entry from the KVStore
-            // to handle permission-only changes with the same file sizes later.
-            listing.delete(classOf[LogInfo], path.toString)
-          case e: Exception =>
-            logError("Exception while merging application listings", e)
-        } finally {
-          pendingReplayTasksCount.decrementAndGet()
+      updated.foreach { entry =>
+        submitLogProcessTask(entry.rootPath) { () =>
+          mergeApplicationListing(entry, newLastScanTime, true)
         }
       }
 
@@ -557,7 +550,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         .last(newLastScanTime - 1)
         .asScala
         .toList
-      stale.foreach { log =>
+      stale.filterNot(isProcessing).foreach { log =>
         log.appId.foreach { appId =>
           cleanAppData(appId, log.attemptId, log.logPath)
           listing.delete(classOf[LogInfo], log.logPath)
@@ -570,22 +563,26 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
-  private[history] def shouldReloadLog(info: LogInfo, entry: FileStatus): Boolean = {
-    var result = info.fileSize < entry.getLen
-    if (!result && info.logPath.endsWith(EventLoggingListener.IN_PROGRESS)) {
-      try {
-        result = Utils.tryWithResource(fs.open(entry.getPath)) { in =>
-          in.getWrappedStream match {
-            case dfsIn: DFSInputStream => info.fileSize < dfsIn.getFileLength
-            case _ => false
-          }
+  private[history] def shouldReloadLog(info: LogInfo, reader: EventLogFileReader): Boolean = {
+    if (info.isComplete != reader.completed) {
+      true
+    } else {
+      var result = if (info.lastIndex.isDefined) {
+        require(reader.lastIndex.isDefined)
+        info.lastIndex.get < reader.lastIndex.get || info.fileSize < reader.fileSizeForLastIndex
+      } else {
+        info.fileSize < reader.fileSizeForLastIndex
+      }
+      if (!result && !reader.completed) {
+        try {
+          result = reader.fileSizeForLastIndexForDFS.exists(info.fileSize < _)
+        } catch {
+          case e: Exception =>
+            logDebug(s"Failed to check the length for the file : ${info.logPath}", e)
         }
-      } catch {
-        case e: Exception =>
-          logDebug(s"Failed to check the length for the file : ${info.logPath}", e)
       }
+      result
     }
-    result
   }
 
   private def cleanAppData(appId: String, attemptId: Option[String], logPath: String): Unit = {
@@ -632,23 +629,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       attemptId: Option[String],
       zipStream: ZipOutputStream): Unit = {
 
-    /**
-     * This method compresses the files passed in, and writes the compressed data out into the
-     * [[OutputStream]] passed in. Each file is written as a new [[ZipEntry]] with its name being
-     * the name of the file being compressed.
-     */
-    def zipFileToStream(file: Path, entryName: String, outputStream: ZipOutputStream): Unit = {
-      val fs = file.getFileSystem(hadoopConf)
-      val inputStream = fs.open(file, 1 * 1024 * 1024) // 1MB Buffer
-      try {
-        outputStream.putNextEntry(new ZipEntry(entryName))
-        ByteStreams.copy(inputStream, outputStream)
-        outputStream.closeEntry()
-      } finally {
-        inputStream.close()
-      }
-    }
-
     val app = try {
       load(appId)
     } catch {
@@ -661,22 +641,68 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       attemptId
         .map { id => app.attempts.filter(_.info.attemptId == Some(id)) }
         .getOrElse(app.attempts)
-        .map(_.logPath)
-        .foreach { log =>
-          zipFileToStream(new Path(logDir, log), log, zipStream)
+        .foreach { attempt =>
+          val reader = EventLogFileReader(fs, new Path(logDir, attempt.logPath),
+            attempt.lastIndex)
+          reader.zipEventLogFiles(zipStream)
         }
     } finally {
       zipStream.close()
     }
   }
 
+  private def mergeApplicationListing(
+      reader: EventLogFileReader,
+      scanTime: Long,
+      enableOptimizations: Boolean): Unit = {
+    val rootPath = reader.rootPath
+    var succeeded = false
+    try {
+      val lastEvaluatedForCompaction: Option[Long] = try {
+        listing.read(classOf[LogInfo], rootPath.toString).lastEvaluatedForCompaction
+      } catch {
+        case _: NoSuchElementException => None
+      }
+
+      pendingReplayTasksCount.incrementAndGet()
+      doMergeApplicationListing(reader, scanTime, enableOptimizations, lastEvaluatedForCompaction)
+      if (conf.get(CLEANER_ENABLED)) {
+        checkAndCleanLog(rootPath.toString)
+      }
+
+      succeeded = true
+    } catch {
+      case e: InterruptedException =>
+        throw e
+      case e: AccessControlException =>
+        // We don't have read permissions on the log file
+        logWarning(s"Unable to read log $rootPath", e)
+        blacklist(rootPath)
+        // SPARK-28157 We should remove this blacklisted entry from the KVStore
+        // to handle permission-only changes with the same file sizes later.
+        listing.delete(classOf[LogInfo], rootPath.toString)
+      case e: Exception =>
+        logError("Exception while merging application listings", e)
+    } finally {
+      endProcessing(rootPath)
+      pendingReplayTasksCount.decrementAndGet()
+
+      // triggering another task for compaction task only if it succeeds
+      if (succeeded) {
+        submitLogProcessTask(rootPath) { () => compact(reader) }
+      }
+    }
+  }
+
   /**
    * Replay the given log file, saving the application in the listing db.
+   * Visable for testing
    */
-  protected def mergeApplicationListing(
-      fileStatus: FileStatus,
+  private[history] def doMergeApplicationListing(
+      reader: EventLogFileReader,
       scanTime: Long,
-      enableOptimizations: Boolean): Unit = {
+      enableOptimizations: Boolean,
+      lastEvaluatedForCompaction: Option[Long]): Unit = {
     val eventsFilter: ReplayEventsFilter = { eventString =>
       eventString.startsWith(APPL_START_EVENT_PREFIX) ||
         eventString.startsWith(APPL_END_EVENT_PREFIX) ||
@@ -684,8 +710,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         eventString.startsWith(ENV_UPDATE_EVENT_PREFIX)
     }
 
-    val logPath = fileStatus.getPath()
-    val appCompleted = isCompleted(logPath.getName())
+    val logPath = reader.rootPath
+    val appCompleted = reader.completed
     val reparseChunkSize = conf.get(END_EVENT_REPARSE_CHUNK_SIZE)
 
     // Enable halt support in listener if:
@@ -695,13 +721,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       ((!appCompleted && fastInProgressParsing) || reparseChunkSize > 0)
 
     val bus = new ReplayListenerBus()
-    val listener = new AppListingListener(fileStatus, clock, shouldHalt)
+    val listener = new AppListingListener(reader, clock, shouldHalt)
     bus.addListener(listener)
 
     logInfo(s"Parsing $logPath for listing data...")
-    Utils.tryWithResource(EventLoggingListener.openEventLog(logPath, fs)) { in =>
-      bus.replay(in, logPath.toString, !appCompleted, eventsFilter)
-    }
+    val logFiles = reader.listEventLogFiles
+    parseAppEventLogs(logFiles, bus, !appCompleted, eventsFilter)
 
     // If enabled above, the listing listener will halt parsing when there's enough information to
     // create a listing entry. When the app is completed, or fast parsing is disabled, we still need
@@ -723,8 +748,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     // current position is, since the replay listener bus buffers data internally.
     val lookForEndEvent = shouldHalt && (appCompleted || !fastInProgressParsing)
     if (lookForEndEvent && listener.applicationInfo.isDefined) {
-      Utils.tryWithResource(EventLoggingListener.openEventLog(logPath, fs)) { in =>
-        val target = fileStatus.getLen() - reparseChunkSize
+      val lastFile = logFiles.last
+      Utils.tryWithResource(EventLogFileReader.openEventLog(lastFile.getPath, fs)) { in =>
+        val target = lastFile.getLen - reparseChunkSize
         if (target > 0) {
           logInfo(s"Looking for end event; skipping $target bytes from $logPath...")
           var skipped = 0L
@@ -741,7 +767,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           source.next()
         }
 
-        bus.replay(source, logPath.toString, !appCompleted, eventsFilter)
+        bus.replay(source, lastFile.getPath.toString, !appCompleted, eventsFilter)
       }
     }
 
@@ -754,12 +780,15 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         invalidateUI(app.info.id, app.attempts.head.info.attemptId)
         addListing(app)
         listing.write(LogInfo(logPath.toString(), scanTime, LogType.EventLogs, Some(app.info.id),
-          app.attempts.head.info.attemptId, fileStatus.getLen()))
+          app.attempts.head.info.attemptId, reader.fileSizeForLastIndex, reader.lastIndex,
+          lastEvaluatedForCompaction, reader.completed))
 
         // For a finished log, remove the corresponding "in progress" entry from the listing DB if
         // the file is really gone.
-        if (appCompleted) {
-          val inProgressLog = logPath.toString() + EventLoggingListener.IN_PROGRESS
+        // The logic is only valid for single event log, as root path doesn't change for
+        // rolled event logs.
+        if (appCompleted && reader.lastIndex.isEmpty) {
+          val inProgressLog = logPath.toString() + EventLogFileWriter.IN_PROGRESS
           try {
             // Fetch the entry first to avoid an RPC when it's already removed.
             listing.read(classOf[LogInfo], inProgressLog)
@@ -776,14 +805,49 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         // mean the end event is before the configured threshold, so call the method again to
         // re-parse the whole log.
         logInfo(s"Reparsing $logPath since end event was not found.")
-        mergeApplicationListing(fileStatus, scanTime, false)
+        doMergeApplicationListing(reader, scanTime, enableOptimizations = false,
+          lastEvaluatedForCompaction)
 
       case _ =>
         // If the app hasn't written down its app ID to the logs, still record the entry in the
         // listing db, with an empty ID. This will make the log eligible for deletion if the app
         // does not make progress after the configured max log age.
         listing.write(
-          LogInfo(logPath.toString(), scanTime, LogType.EventLogs, None, None, fileStatus.getLen()))
+          LogInfo(logPath.toString(), scanTime, LogType.EventLogs, None, None,
+            reader.fileSizeForLastIndex, reader.lastIndex, lastEvaluatedForCompaction,
+            reader.completed))
+    }
+  }
+
+  private def compact(reader: EventLogFileReader): Unit = {
+    val rootPath = reader.rootPath
+    try {
+      reader.lastIndex match {
+        case Some(lastIndex) =>
+          try {
+            val info = listing.read(classOf[LogInfo], reader.rootPath.toString)
+            if (info.lastEvaluatedForCompaction.isEmpty ||
+                info.lastEvaluatedForCompaction.get < lastIndex) {
+              // haven't tried compaction for this index, do compaction
+              fileCompactor.compact(reader.listEventLogFiles)
+              listing.write(info.copy(lastEvaluatedForCompaction = Some(lastIndex)))
+            }
+          } catch {
+            case _: NoSuchElementException =>
+            // this should exist, but ignoring doesn't hurt much
+          }
+
+        case None => // This is not applied to single event log file.
+      }
+    } catch {
+      case e: InterruptedException =>
+        throw e
+      case e: AccessControlException =>
+        logWarning(s"Insufficient permission while compacting log for $rootPath", e)
+      case e: Exception =>
+        logError(s"Exception while compacting log for $rootPath", e)
+    } finally {
+      endProcessing(rootPath)
     }
   }
 
@@ -800,6 +864,30 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
+  /**
+   * Check and delete specified event log according to the max log age defined by the user.
+   */
+  private[history] def checkAndCleanLog(logPath: String): Unit = Utils.tryLog {
+    val maxTime = clock.getTimeMillis() - conf.get(MAX_LOG_AGE_S) * 1000
+    val log = listing.read(classOf[LogInfo], logPath)
+
+    if (log.lastProcessed <= maxTime && log.appId.isEmpty) {
+      logInfo(s"Deleting invalid / corrupt event log ${log.logPath}")
+      deleteLog(fs, new Path(log.logPath))
+      listing.delete(classOf[LogInfo], log.logPath)
+    }
+
+    log.appId.foreach { appId =>
+      val app = listing.read(classOf[ApplicationInfoWrapper], appId)
+      if (app.oldestAttempt() <= maxTime) {
+        val (remaining, toDelete) = app.attempts.partition { attempt =>
+          attempt.info.lastUpdated.getTime() >= maxTime
+        }
+        deleteAttemptLogs(app, remaining, toDelete)
+      }
+    }
+  }
+
   /**
    * Delete event logs from the log directory according to the clean policy defined by the user.
    */
@@ -829,7 +917,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       .asScala
       .filter { l => l.logType == null || l.logType == LogType.EventLogs }
       .toList
-    stale.foreach { log =>
+    stale.filterNot(isProcessing).foreach { log =>
       if (log.appId.isEmpty) {
         logInfo(s"Deleting invalid / corrupt event log ${log.logPath}")
         deleteLog(fs, new Path(log.logPath))
@@ -918,7 +1006,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           case e: NoSuchElementException =>
             // For every new driver log file discovered, create a new entry in listing
             listing.write(LogInfo(f.getPath().toString(), currentTime, LogType.DriverLogs, None,
-              None, f.getLen()))
+              None, f.getLen(), None, None, false))
           false
         }
       if (deleteFile) {
@@ -937,7 +1025,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       .asScala
       .filter { l => l.logType != null && l.logType == LogType.DriverLogs }
       .toList
-    stale.foreach { log =>
+    stale.filterNot(isProcessing).foreach { log =>
       logInfo(s"Deleting invalid driver log ${log.logPath}")
       listing.delete(classOf[LogInfo], log.logPath)
       deleteLog(driverLogFs, new Path(log.logPath))
@@ -945,11 +1033,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   }
 
   /**
-   * Rebuilds the application state store from its event log.
+   * Rebuilds the application state store from its event log. Exposed for testing.
    */
-  private def rebuildAppStore(
+  private[spark] def rebuildAppStore(
       store: KVStore,
-      eventLog: FileStatus,
+      reader: EventLogFileReader,
       lastUpdated: Long): Unit = {
     // Disable async updates, since they cause higher memory usage, and it's ok to take longer
     // to parse the event logs in the SHS.
@@ -966,13 +1054,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     } replayBus.addListener(listener)
 
     try {
-      val path = eventLog.getPath()
-      logInfo(s"Parsing $path to re-build UI...")
-      Utils.tryWithResource(EventLoggingListener.openEventLog(path, fs)) { in =>
-        replayBus.replay(in, path.toString(), maybeTruncated = !isCompleted(path.toString()))
-      }
+      val eventLogFiles = reader.listEventLogFiles
+      logInfo(s"Parsing ${reader.rootPath} to re-build UI...")
+      parseAppEventLogs(eventLogFiles, replayBus, !reader.completed)
       trackingStore.close(false)
-      logInfo(s"Finished parsing $path")
+      logInfo(s"Finished parsing ${reader.rootPath}")
     } catch {
       case e: Exception =>
         Utils.tryLogNonFatalError {
@@ -982,6 +1068,23 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
+  private def parseAppEventLogs(
+      logFiles: Seq[FileStatus],
+      replayBus: ReplayListenerBus,
+      maybeTruncated: Boolean,
+      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {
+    // stop replaying next log files if ReplayListenerBus indicates some error or halt
+    var continueReplay = true
+    logFiles.foreach { file =>
+      if (continueReplay) {
+        Utils.tryWithResource(EventLogFileReader.openEventLog(file.getPath, fs)) { in =>
+          continueReplay = replayBus.replay(in, file.getPath.toString,
+            maybeTruncated = maybeTruncated, eventsFilter = eventsFilter)
+        }
+      }
+    }
+  }
+
   /**
    * Checks whether HDFS is in safe mode.
    *
@@ -1063,30 +1166,60 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
     // At this point the disk data either does not exist or was deleted because it failed to
     // load, so the event log needs to be replayed.
-    val status = fs.getFileStatus(new Path(logDir, attempt.logPath))
-    val isCompressed = EventLoggingListener.codecName(status.getPath()).flatMap { name =>
-      Try(CompressionCodec.getShortName(name)).toOption
-    }.isDefined
-    logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...")
-    val lease = dm.lease(status.getLen(), isCompressed)
-    val newStorePath = try {
-      Utils.tryWithResource(KVUtils.open(lease.tmpPath, metadata)) { store =>
-        rebuildAppStore(store, status, attempt.info.lastUpdated.getTime())
+
+    var retried = false
+    var newStorePath: File = null
+    while (newStorePath == null) {
+      val reader = EventLogFileReader(fs, new Path(logDir, attempt.logPath),
+        attempt.lastIndex)
+      val isCompressed = reader.compressionCodec.isDefined
+      logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...")
+      val lease = dm.lease(reader.totalSize, isCompressed)
+      try {
+        Utils.tryWithResource(KVUtils.open(lease.tmpPath, metadata)) { store =>
+          rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime())
+        }
+        newStorePath = lease.commit(appId, attempt.info.attemptId)
+      } catch {
+        case _: IOException if !retried =>
+          // compaction may touch the file(s) which app rebuild wants to read
+          // compaction wouldn't run in short interval, so try again...
+          logWarning(s"Exception occurred while rebuilding app $appId - trying again...")
+          lease.rollback()
+          retried = true
+
+        case e: Exception =>
+          lease.rollback()
+          throw e
       }
-      lease.commit(appId, attempt.info.attemptId)
-    } catch {
-      case e: Exception =>
-        lease.rollback()
-        throw e
     }
 
     KVUtils.open(newStorePath, metadata)
   }
 
   private def createInMemoryStore(attempt: AttemptInfoWrapper): KVStore = {
-    val store = new InMemoryStore()
-    val status = fs.getFileStatus(new Path(logDir, attempt.logPath))
-    rebuildAppStore(store, status, attempt.info.lastUpdated.getTime())
+    var retried = false
+    var store: KVStore = null
+    while (store == null) {
+      try {
+        val s = new InMemoryStore()
+        val reader = EventLogFileReader(fs, new Path(logDir, attempt.logPath),
+          attempt.lastIndex)
+        rebuildAppStore(s, reader, attempt.info.lastUpdated.getTime())
+        store = s
+      } catch {
+        case _: IOException if !retried =>
+          // compaction may touch the file(s) which app rebuild wants to read
+          // compaction wouldn't run in short interval, so try again...
+          logWarning(s"Exception occurred while rebuilding log path ${attempt.logPath} - " +
+            "trying again...")
+          retried = true
+
+        case e: Exception =>
+          throw e
+      }
+    }
+
     store
   }
 
@@ -1117,10 +1250,20 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     deleted
   }
 
-  private def isCompleted(name: String): Boolean = {
-    !name.endsWith(EventLoggingListener.IN_PROGRESS)
+  /** NOTE: 'task' should ensure it executes 'endProcessing' at the end */
+  private def submitLogProcessTask(rootPath: Path)(task: Runnable): Unit = {
+    try {
+      processing(rootPath)
+      replayExecutor.submit(task)
+    } catch {
+      // let the iteration over the updated entries break, since an exception on
+      // replayExecutor.submit (..) indicates the ExecutorService is unable
+      // to take any more submissions at this time
+      case e: Exception =>
+        logError(s"Exception while submitting task", e)
+        endProcessing(rootPath)
+    }
   }
-
 }
 
 private[history] object FsHistoryProvider {
@@ -1161,12 +1304,19 @@ private[history] case class LogInfo(
     logType: LogType.Value,
     appId: Option[String],
     attemptId: Option[String],
-    fileSize: Long)
+    fileSize: Long,
+    @JsonDeserialize(contentAs = classOf[JLong])
+    lastIndex: Option[Long],
+    @JsonDeserialize(contentAs = classOf[JLong])
+    lastEvaluatedForCompaction: Option[Long],
+    isComplete: Boolean)
 
 private[history] class AttemptInfoWrapper(
     val info: ApplicationAttemptInfo,
     val logPath: String,
     val fileSize: Long,
+    @JsonDeserialize(contentAs = classOf[JLong])
+    val lastIndex: Option[Long],
     val adminAcls: Option[String],
     val viewAcls: Option[String],
     val adminAclsGroups: Option[String],
@@ -1190,12 +1340,13 @@ private[history] class ApplicationInfoWrapper(
 }
 
 private[history] class AppListingListener(
-    log: FileStatus,
+    reader: EventLogFileReader,
     clock: Clock,
     haltEnabled: Boolean) extends SparkListener {
 
   private val app = new MutableApplicationInfo()
-  private val attempt = new MutableAttemptInfo(log.getPath().getName(), log.getLen())
+  private val attempt = new MutableAttemptInfo(reader.rootPath.getName(),
+    reader.fileSizeForLastIndex, reader.lastIndex)
 
   private var gotEnvUpdate = false
   private var halted = false
@@ -1214,7 +1365,7 @@ private[history] class AppListingListener(
 
   override def onApplicationEnd(event: SparkListenerApplicationEnd): Unit = {
     attempt.endTime = new Date(event.time)
-    attempt.lastUpdated = new Date(log.getModificationTime())
+    attempt.lastUpdated = new Date(reader.modificationTime)
     attempt.duration = event.time - attempt.startTime.getTime()
     attempt.completed = true
   }
@@ -1280,7 +1431,7 @@ private[history] class AppListingListener(
 
   }
 
-  private class MutableAttemptInfo(logPath: String, fileSize: Long) {
+  private class MutableAttemptInfo(logPath: String, fileSize: Long, lastIndex: Option[Long]) {
     var attemptId: Option[String] = None
     var startTime = new Date(-1)
     var endTime = new Date(-1)
@@ -1309,6 +1460,7 @@ private[history] class AppListingListener(
         apiInfo,
         logPath,
         fileSize,
+        lastIndex,
         adminAcls,
         viewAcls,
         adminAclsGroups,
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 878f0cb632c5a..62cac261ae014 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -135,7 +135,7 @@ class HistoryServer(
    * This starts a background thread that periodically synchronizes information displayed on
    * this UI with the event logs in the provided base directory.
    */
-  def initialize() {
+  def initialize(): Unit = {
     attachPage(new HistoryPage(this))
 
     attachHandler(ApiRootResource.getServletHandler(this))
@@ -149,12 +149,12 @@ class HistoryServer(
   }
 
   /** Bind to the HTTP server behind this web interface. */
-  override def bind() {
+  override def bind(): Unit = {
     super.bind()
   }
 
   /** Stop the server and close the file system. */
-  override def stop() {
+  override def stop(): Unit = {
     super.stop()
     provider.stop()
   }
@@ -164,7 +164,7 @@ class HistoryServer(
       appId: String,
       attemptId: Option[String],
       ui: SparkUI,
-      completed: Boolean) {
+      completed: Boolean): Unit = {
     assert(serverInfo.isDefined, "HistoryServer must be bound before attaching SparkUIs")
     ui.getHandlers.foreach { handler =>
       serverInfo.get.addHandler(handler, ui.securityManager)
@@ -297,6 +297,7 @@ object HistoryServer extends Logging {
 
     val server = new HistoryServer(conf, provider, securityManager, port)
     server.bind()
+    provider.start()
 
     ShutdownHookManager.addShutdownHook { () => server.stop() }
 
@@ -326,7 +327,7 @@ object HistoryServer extends Logging {
     new SecurityManager(config)
   }
 
-  def initSecurity() {
+  def initSecurity(): Unit = {
     // If we are accessing HDFS and it has security enabled (Kerberos), we have to login
     // from a keytab file so that we can access HDFS beyond the kerberos ticket expiration.
     // As long as it is using Hadoop rpc (hdfs://), a relogin will automatically
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index dec89769c030b..01cc59e1d2e6e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -52,7 +52,7 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin
    // This mutates the SparkConf, so all accesses to it must be made after this line
    Utils.loadDefaultSparkProperties(conf, propertiesFile)
 
-  private def printUsageAndExit(exitCode: Int) {
+  private def printUsageAndExit(exitCode: Int): Unit = {
     // scalastyle:off println
     System.err.println(
       """
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index 6c56807458b27..03965e6dbbf31 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -57,7 +57,7 @@ private[spark] class ApplicationInfo(
     init()
   }
 
-  private def init() {
+  private def init(): Unit = {
     state = ApplicationState.WAITING
     executors = new mutable.HashMap[Int, ExecutorDesc]
     coresGranted = 0
@@ -92,7 +92,7 @@ private[spark] class ApplicationInfo(
     exec
   }
 
-  private[master] def removeExecutor(exec: ExecutorDesc) {
+  private[master] def removeExecutor(exec: ExecutorDesc): Unit = {
     if (executors.contains(exec.id)) {
       removedExecutors += executors(exec.id)
       executors -= exec.id
@@ -115,7 +115,7 @@ private[spark] class ApplicationInfo(
 
   private[master] def resetRetryCount() = _retryCount = 0
 
-  private[master] def markFinished(endState: ApplicationState.Value) {
+  private[master] def markFinished(endState: ApplicationState.Value): Unit = {
     state = endState
     endTime = System.currentTimeMillis()
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
index a8f8492561115..a598d2a1ddd76 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
@@ -33,7 +33,7 @@ private[master] class ExecutorDesc(
   var state = ExecutorState.LAUNCHING
 
   /** Copy all state (non-val) variables from the given on-the-wire ExecutorDescription. */
-  def copyState(execDesc: ExecutorDescription) {
+  def copyState(execDesc: ExecutorDescription): Unit = {
     state = execDesc.state
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
index f2b5ea7e23ec1..ba949e2630e43 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
@@ -56,7 +56,7 @@ private[master] class FileSystemPersistenceEngine(
     files.map(deserializeFromFile[T])
   }
 
-  private def serializeIntoFile(file: File, value: AnyRef) {
+  private def serializeIntoFile(file: File, value: AnyRef): Unit = {
     val created = file.createNewFile()
     if (!created) { throw new IllegalStateException("Could not create file: " + file) }
     val fileOut = new FileOutputStream(file)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
index 52e2854961eda..5bdfd18f37cd0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
@@ -27,7 +27,7 @@ import org.apache.spark.annotation.DeveloperApi
 @DeveloperApi
 trait LeaderElectionAgent {
   val masterInstance: LeaderElectable
-  def stop() {} // to avoid noops in implementations.
+  def stop(): Unit = {} // to avoid noops in implementations.
 }
 
 @DeveloperApi
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 5588dc8cff47a..71df5dfa423a9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -192,7 +192,7 @@ private[deploy] class Master(
     leaderElectionAgent = leaderElectionAgent_
   }
 
-  override def onStop() {
+  override def onStop(): Unit = {
     masterMetricsSystem.report()
     applicationMetricsSystem.report()
     // prevent the CompleteRecovery message sending to restarted master
@@ -211,11 +211,11 @@ private[deploy] class Master(
     leaderElectionAgent.stop()
   }
 
-  override def electedLeader() {
+  override def electedLeader(): Unit = {
     self.send(ElectedLeader)
   }
 
-  override def revokedLeadership() {
+  override def revokedLeadership(): Unit = {
     self.send(RevokedLeadership)
   }
 
@@ -243,6 +243,15 @@ private[deploy] class Master(
       logError("Leadership has been revoked -- master shutting down.")
       System.exit(0)
 
+    case WorkerDecommission(id, workerRef) =>
+      logInfo("Recording worker %s decommissioning".format(id))
+      if (state == RecoveryState.STANDBY) {
+        workerRef.send(MasterInStandby)
+      } else {
+        // We use foreach since get gives us an option and we can skip the failures.
+        idToWorker.get(id).foreach(decommissionWorker)
+      }
+
     case RegisterWorker(
       id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl,
       masterAddress, resources) =>
@@ -313,7 +322,9 @@ private[deploy] class Master(
             // Only retry certain number of times so we don't go into an infinite loop.
             // Important note: this code path is not exercised by tests, so be very careful when
             // changing this `if` condition.
+            // We also don't count failures from decommissioned workers since they are "expected."
             if (!normalExit
+                && oldState != ExecutorState.DECOMMISSIONED
                 && appInfo.incrementRetryCount() >= maxExecutorRetries
                 && maxExecutorRetries >= 0) { // < 0 disables this application-killing path
               val execs = appInfo.executors.values
@@ -529,7 +540,7 @@ private[deploy] class Master(
       apps.count(_.state == ApplicationState.UNKNOWN) == 0
 
   private def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
-      storedWorkers: Seq[WorkerInfo]) {
+      storedWorkers: Seq[WorkerInfo]): Unit = {
     for (app <- storedApps) {
       logInfo("Trying to recover app: " + app.id)
       try {
@@ -559,7 +570,7 @@ private[deploy] class Master(
     }
   }
 
-  private def completeRecovery() {
+  private def completeRecovery(): Unit = {
     // Ensure "only-once" recovery semantics using a short synchronization period.
     if (state != RecoveryState.RECOVERING) { return }
     state = RecoveryState.COMPLETING_RECOVERY
@@ -850,7 +861,27 @@ private[deploy] class Master(
     true
   }
 
-  private def removeWorker(worker: WorkerInfo, msg: String) {
+  private def decommissionWorker(worker: WorkerInfo): Unit = {
+    if (worker.state != WorkerState.DECOMMISSIONED) {
+      logInfo("Decommissioning worker %s on %s:%d".format(worker.id, worker.host, worker.port))
+      worker.setState(WorkerState.DECOMMISSIONED)
+      for (exec <- worker.executors.values) {
+        logInfo("Telling app of decommission executors")
+        exec.application.driver.send(ExecutorUpdated(
+          exec.id, ExecutorState.DECOMMISSIONED,
+          Some("worker decommissioned"), None, workerLost = false))
+        exec.state = ExecutorState.DECOMMISSIONED
+        exec.application.removeExecutor(exec)
+      }
+      // On recovery do not add a decommissioned executor
+      persistenceEngine.removeWorker(worker)
+    } else {
+      logWarning("Skipping decommissioning worker %s on %s:%d as worker is already decommissioned".
+        format(worker.id, worker.host, worker.port))
+    }
+  }
+
+  private def removeWorker(worker: WorkerInfo, msg: String): Unit = {
     logInfo("Removing worker " + worker.id + " on " + worker.host + ":" + worker.port)
     worker.setState(WorkerState.DEAD)
     idToWorker -= worker.id
@@ -879,7 +910,7 @@ private[deploy] class Master(
     persistenceEngine.removeWorker(worker)
   }
 
-  private def relaunchDriver(driver: DriverInfo) {
+  private def relaunchDriver(driver: DriverInfo): Unit = {
     // We must setup a new driver with a new driver id here, because the original driver may
     // be still running. Consider this scenario: a worker is network partitioned with master,
     // the master then relaunches driver driverID1 with a driver id driverID2, then the worker
@@ -919,11 +950,11 @@ private[deploy] class Master(
     waitingApps += app
   }
 
-  private def finishApplication(app: ApplicationInfo) {
+  private def finishApplication(app: ApplicationInfo): Unit = {
     removeApplication(app, ApplicationState.FINISHED)
   }
 
-  def removeApplication(app: ApplicationInfo, state: ApplicationState.Value) {
+  def removeApplication(app: ApplicationInfo, state: ApplicationState.Value): Unit = {
     if (apps.contains(app)) {
       logInfo("Removing app " + app.id)
       apps -= app
@@ -1047,7 +1078,7 @@ private[deploy] class Master(
   }
 
   /** Check for, and remove, any timed-out workers */
-  private def timeOutDeadWorkers() {
+  private def timeOutDeadWorkers(): Unit = {
     // Copy the workers into an array so we don't modify the hashset while iterating through it
     val currentTime = System.currentTimeMillis()
     val toRemove = workers.filter(_.lastHeartbeat < currentTime - workerTimeoutMs).toArray
@@ -1077,7 +1108,7 @@ private[deploy] class Master(
     new DriverInfo(now, newDriverId(date), desc, date)
   }
 
-  private def launchDriver(worker: WorkerInfo, driver: DriverInfo) {
+  private def launchDriver(worker: WorkerInfo, driver: DriverInfo): Unit = {
     logInfo("Launching driver " + driver.id + " on worker " + worker.id)
     worker.addDriver(driver)
     driver.worker = Some(worker)
@@ -1088,7 +1119,7 @@ private[deploy] class Master(
   private def removeDriver(
       driverId: String,
       finalState: DriverState,
-      exception: Option[Exception]) {
+      exception: Option[Exception]): Unit = {
     drivers.find(d => d.id == driverId) match {
       case Some(driver) =>
         logInfo(s"Removing driver: $driverId")
@@ -1113,7 +1144,7 @@ private[deploy] object Master extends Logging {
   val SYSTEM_NAME = "sparkMaster"
   val ENDPOINT_NAME = "Master"
 
-  def main(argStrings: Array[String]) {
+  def main(argStrings: Array[String]): Unit = {
     Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
       exitOnUncaughtException = false))
     Utils.initDaemon(log)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index cd31bbdcfab59..045a3da74dcd0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -94,7 +94,7 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
   /**
    * Print usage and exit JVM with the given exit code.
    */
-  private def printUsageAndExit(exitCode: Int) {
+  private def printUsageAndExit(exitCode: Int): Unit = {
     // scalastyle:off println
     System.err.println(
       "Usage: Master [options]\n" +
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
index b30bc821b7324..9a695e15a9cea 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
@@ -88,7 +88,7 @@ abstract class PersistenceEngine {
     }
   }
 
-  def close() {}
+  def close(): Unit = {}
 }
 
 private[master] class BlackHolePersistenceEngine extends PersistenceEngine {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
index a33b15354efea..0137e2be74720 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
@@ -18,9 +18,7 @@
 package org.apache.spark.deploy.master
 
 import scala.collection.mutable
-import scala.reflect.ClassTag
 
-import org.apache.spark.deploy.StandaloneResourceUtils.MutableResourceInfo
 import org.apache.spark.resource.{ResourceAllocator, ResourceInformation, ResourceRequirement}
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.util.Utils
@@ -30,6 +28,7 @@ private[spark] case class WorkerResourceInfo(name: String, addresses: Seq[String
 
   override protected def resourceName = this.name
   override protected def resourceAddresses = this.addresses
+  override protected def slotsPerAddress: Int = 1
 
   def acquire(amount: Int): ResourceInformation = {
     val allocated = availableAddrs.take(amount)
@@ -93,7 +92,7 @@ private[spark] class WorkerInfo(
     init()
   }
 
-  private def init() {
+  private def init(): Unit = {
     executors = new mutable.HashMap
     drivers = new mutable.HashMap
     state = WorkerState.ALIVE
@@ -107,13 +106,13 @@ private[spark] class WorkerInfo(
     host + ":" + port
   }
 
-  def addExecutor(exec: ExecutorDesc) {
+  def addExecutor(exec: ExecutorDesc): Unit = {
     executors(exec.fullId) = exec
     coresUsed += exec.cores
     memoryUsed += exec.memory
   }
 
-  def removeExecutor(exec: ExecutorDesc) {
+  def removeExecutor(exec: ExecutorDesc): Unit = {
     if (executors.contains(exec.fullId)) {
       executors -= exec.fullId
       coresUsed -= exec.cores
@@ -126,13 +125,13 @@ private[spark] class WorkerInfo(
     executors.values.exists(_.application == app)
   }
 
-  def addDriver(driver: DriverInfo) {
+  def addDriver(driver: DriverInfo): Unit = {
     drivers(driver.id) = driver
     memoryUsed += driver.desc.mem
     coresUsed += driver.desc.cores
   }
 
-  def removeDriver(driver: DriverInfo) {
+  def removeDriver(driver: DriverInfo): Unit = {
     drivers -= driver.id
     memoryUsed -= driver.desc.mem
     coresUsed -= driver.desc.cores
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
index 47f309144bdc0..d4ae977b19f4b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
@@ -36,7 +36,7 @@ private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderEle
 
   start()
 
-  private def start() {
+  private def start(): Unit = {
     logInfo("Starting ZooKeeper LeaderElection agent")
     zk = SparkCuratorUtil.newClient(conf)
     leaderLatch = new LeaderLatch(zk, workingDir)
@@ -44,12 +44,12 @@ private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderEle
     leaderLatch.start()
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     leaderLatch.close()
     zk.close()
   }
 
-  override def isLeader() {
+  override def isLeader(): Unit = {
     synchronized {
       // could have lost leadership by now.
       if (!leaderLatch.hasLeadership) {
@@ -61,7 +61,7 @@ private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderEle
     }
   }
 
-  override def notLeader() {
+  override def notLeader(): Unit = {
     synchronized {
       // could have gained leadership by now.
       if (leaderLatch.hasLeadership) {
@@ -73,7 +73,7 @@ private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderEle
     }
   }
 
-  private def updateLeadershipStatus(isLeader: Boolean) {
+  private def updateLeadershipStatus(isLeader: Boolean): Unit = {
     if (isLeader && status == LeadershipStatus.NOT_LEADER) {
       status = LeadershipStatus.LEADER
       masterInstance.electedLeader()
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
index 73dd0de017960..8eae445b439d9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
@@ -55,11 +55,11 @@ private[master] class ZooKeeperPersistenceEngine(conf: SparkConf, val serializer
       .filter(_.startsWith(prefix)).flatMap(deserializeFromFile[T])
   }
 
-  override def close() {
+  override def close(): Unit = {
     zk.close()
   }
 
-  private def serializeIntoFile(path: String, value: AnyRef) {
+  private def serializeIntoFile(path: String, value: AnyRef): Unit = {
     val serialized = serializer.newInstance().serialize(value)
     val bytes = new Array[Byte](serialized.remaining())
     serialized.get(bytes)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index c7e73bcc13c5f..071b79135d641 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -73,7 +73,7 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
             </li>
             <li>
               <span data-toggle="tooltip" title={ToolTips.APPLICATION_EXECUTOR_LIMIT}
-                    data-placement="right">
+                    data-placement="top">
                 <strong>Executor Limit: </strong>
                 {
                   if (app.executorLimit == Int.MaxValue) "Unlimited" else app.executorLimit
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index e8b614527f69c..f64b449851d86 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -27,7 +27,6 @@ import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, MasterStateRe
 import org.apache.spark.deploy.JsonProtocol
 import org.apache.spark.deploy.StandaloneResourceUtils._
 import org.apache.spark.deploy.master._
-import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
@@ -109,12 +108,17 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
     val completedApps = state.completedApps.sortBy(_.endTime).reverse
     val completedAppsTable = UIUtils.listingTable(appHeaders, appRow, completedApps)
 
-    val driverHeaders = Seq("Submission ID", "Submitted Time", "Worker", "State", "Cores",
-      "Memory", "Resources", "Main Class")
+    val activeDriverHeaders = Seq("Submission ID", "Submitted Time", "Worker", "State", "Cores",
+      "Memory", "Resources", "Main Class", "Duration")
     val activeDrivers = state.activeDrivers.sortBy(_.startTime).reverse
-    val activeDriversTable = UIUtils.listingTable(driverHeaders, driverRow, activeDrivers)
+    val activeDriversTable =
+      UIUtils.listingTable(activeDriverHeaders, activeDriverRow, activeDrivers)
+
+    val completedDriverHeaders = Seq("Submission ID", "Submitted Time", "Worker", "State", "Cores",
+      "Memory", "Resources", "Main Class")
     val completedDrivers = state.completedDrivers.sortBy(_.startTime).reverse
-    val completedDriversTable = UIUtils.listingTable(driverHeaders, driverRow, completedDrivers)
+    val completedDriversTable =
+      UIUtils.listingTable(completedDriverHeaders, completedDriverRow, completedDrivers)
 
     // For now we only show driver information if the user has submitted drivers to the cluster.
     // This is until we integrate the notion of drivers and applications in the UI.
@@ -311,7 +315,11 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
     </tr>
   }
 
-  private def driverRow(driver: DriverInfo): Seq[Node] = {
+  private def activeDriverRow(driver: DriverInfo) = driverRow(driver, showDuration = true)
+
+  private def completedDriverRow(driver: DriverInfo) = driverRow(driver, showDuration = false)
+
+  private def driverRow(driver: DriverInfo, showDuration: Boolean): Seq[Node] = {
     val killLink = if (parent.killEnabled &&
       (driver.state == DriverState.RUNNING ||
         driver.state == DriverState.SUBMITTED ||
@@ -346,6 +354,9 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
       </td>
       <td>{formatResourcesAddresses(driver.resources)}</td>
       <td>{driver.desc.command.arguments(2)}</td>
+      {if (showDuration) {
+        <td>{UIUtils.formatDuration(System.currentTimeMillis() - driver.startTime)}</td>
+      }}
     </tr>
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index be402ae247511..86554ec4ec1c9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -40,7 +40,7 @@ class MasterWebUI(
   initialize()
 
   /** Initialize all components of the server. */
-  def initialize() {
+  def initialize(): Unit = {
     val masterPage = new MasterPage(this)
     attachPage(new ApplicationPage(this))
     attachPage(masterPage)
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
index e59bf3f0eaf44..f60d940b8c82a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
@@ -317,8 +317,7 @@ private class ErrorServlet extends RestServlet {
           versionMismatch = true
           s"Unknown protocol version '$unknownVersion'."
         case _ =>
-          // never reached
-          s"Malformed path $path."
+          "Malformed path."
       }
     msg += s" Please submit requests through http://[host]:[port]/$serverVersion/submissions/..."
     val error = handleError(msg)
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index 759d857d56e0e..3168c763df4df 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -65,11 +65,6 @@ private[spark] class HadoopDelegationTokenManager(
     protected val hadoopConf: Configuration,
     protected val schedulerRef: RpcEndpointRef) extends Logging {
 
-  private val deprecatedProviderEnabledConfigs = List(
-    "spark.yarn.security.tokens.%s.enabled",
-    "spark.yarn.security.credentials.%s.enabled")
-  private val providerEnabledConfig = "spark.security.credentials.%s.enabled"
-
   private val principal = sparkConf.get(PRINCIPAL).orNull
 
   // The keytab can be a local: URI for cluster mode, so translate it to a regular path. If it is
@@ -140,13 +135,21 @@ private[spark] class HadoopDelegationTokenManager(
    * @param creds Credentials object where to store the delegation tokens.
    */
   def obtainDelegationTokens(creds: Credentials): Unit = {
-    val freshUGI = doLogin()
-    freshUGI.doAs(new PrivilegedExceptionAction[Unit]() {
-      override def run(): Unit = {
-        val (newTokens, _) = obtainDelegationTokens()
-        creds.addAll(newTokens)
-      }
-    })
+    val currentUser = UserGroupInformation.getCurrentUser()
+    val hasKerberosCreds = principal != null ||
+      Option(currentUser.getRealUser()).getOrElse(currentUser).hasKerberosCredentials()
+
+    // Delegation tokens can only be obtained if the real user has Kerberos credentials, so
+    // skip creation when those are not available.
+    if (hasKerberosCreds) {
+      val freshUGI = doLogin()
+      freshUGI.doAs(new PrivilegedExceptionAction[Unit]() {
+        override def run(): Unit = {
+          val (newTokens, _) = obtainDelegationTokens()
+          creds.addAll(newTokens)
+        }
+      })
+    }
   }
 
   /**
@@ -173,29 +176,6 @@ private[spark] class HadoopDelegationTokenManager(
     delegationTokenProviders.contains(serviceName)
   }
 
-  protected def isServiceEnabled(serviceName: String): Boolean = {
-    val key = providerEnabledConfig.format(serviceName)
-
-    deprecatedProviderEnabledConfigs.foreach { pattern =>
-      val deprecatedKey = pattern.format(serviceName)
-      if (sparkConf.contains(deprecatedKey)) {
-        logWarning(s"${deprecatedKey} is deprecated.  Please use ${key} instead.")
-      }
-    }
-
-    val isEnabledDeprecated = deprecatedProviderEnabledConfigs.forall { pattern =>
-      sparkConf
-        .getOption(pattern.format(serviceName))
-        .map(_.toBoolean)
-        .getOrElse(true)
-    }
-
-    sparkConf
-      .getOption(key)
-      .map(_.toBoolean)
-      .getOrElse(isEnabledDeprecated)
-  }
-
   private def scheduleRenewal(delay: Long): Unit = {
     val _delay = math.max(0, delay)
     logInfo(s"Scheduling renewal in ${UIUtils.formatDuration(delay)}.")
@@ -291,8 +271,39 @@ private[spark] class HadoopDelegationTokenManager(
 
     // Filter out providers for which spark.security.credentials.{service}.enabled is false.
     providers
-      .filter { p => isServiceEnabled(p.serviceName) }
+      .filter { p => HadoopDelegationTokenManager.isServiceEnabled(sparkConf, p.serviceName) }
       .map { p => (p.serviceName, p) }
       .toMap
   }
 }
+
+private[spark] object HadoopDelegationTokenManager extends Logging {
+  private val providerEnabledConfig = "spark.security.credentials.%s.enabled"
+
+  private val deprecatedProviderEnabledConfigs = List(
+    "spark.yarn.security.tokens.%s.enabled",
+    "spark.yarn.security.credentials.%s.enabled")
+
+  def isServiceEnabled(sparkConf: SparkConf, serviceName: String): Boolean = {
+    val key = providerEnabledConfig.format(serviceName)
+
+    deprecatedProviderEnabledConfigs.foreach { pattern =>
+      val deprecatedKey = pattern.format(serviceName)
+      if (sparkConf.contains(deprecatedKey)) {
+        logWarning(s"${deprecatedKey} is deprecated.  Please use ${key} instead.")
+      }
+    }
+
+    val isEnabledDeprecated = deprecatedProviderEnabledConfigs.forall { pattern =>
+      sparkConf
+        .getOption(pattern.format(serviceName))
+        .map(_.toBoolean)
+        .getOrElse(true)
+    }
+
+    sparkConf
+      .getOption(key)
+      .map(_.toBoolean)
+      .getOrElse(isEnabledDeprecated)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
index 12e0dae3f5e5a..f7423f1fc3f1c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
@@ -102,12 +102,12 @@ object CommandUtils extends Logging {
   }
 
   /** Spawn a thread that will redirect a given stream to a file */
-  def redirectStream(in: InputStream, file: File) {
+  def redirectStream(in: InputStream, file: File): Unit = {
     val out = new FileOutputStream(file, true)
     // TODO: It would be nice to add a shutdown hook here that explains why the output is
     //       terminating. Otherwise if the worker dies the executor logs will silently stop.
     new Thread("redirect output to " + file) {
-      override def run() {
+      override def run(): Unit = {
         try {
           Utils.copyStream(in, out, true)
         } catch {
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index 4934722c0d83e..53ec7b3a88f35 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -84,7 +84,7 @@ private[deploy] class DriverRunner(
   /** Starts a thread to run and manage the driver. */
   private[worker] def start() = {
     new Thread("DriverRunner for " + driverId) {
-      override def run() {
+      override def run(): Unit = {
         var shutdownHook: AnyRef = null
         try {
           shutdownHook = ShutdownHookManager.addShutdownHook { () =>
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index 56356f5f27e27..45ffdde58d6c3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -32,7 +32,7 @@ import org.apache.spark.util._
  * This is used in standalone cluster mode only.
  */
 object DriverWrapper extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     args.toList match {
       /*
        * IMPORTANT: Spark 1.3 provides a stable application submission gateway that is both
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 97939107f3057..2a5528bbe89cb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -31,7 +31,7 @@ import org.apache.spark.deploy.StandaloneResourceUtils.prepareResourcesFile
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.SPARK_EXECUTOR_PREFIX
 import org.apache.spark.internal.config.UI._
-import org.apache.spark.resource.{ResourceInformation, ResourceUtils}
+import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.util.{ShutdownHookManager, Utils}
 import org.apache.spark.util.logging.FileAppender
@@ -74,9 +74,9 @@ private[deploy] class ExecutorRunner(
   // make sense to remove this in the future.
   private var shutdownHook: AnyRef = null
 
-  private[worker] def start() {
+  private[worker] def start(): Unit = {
     workerThread = new Thread("ExecutorRunner for " + fullId) {
-      override def run() { fetchAndRunExecutor() }
+      override def run(): Unit = { fetchAndRunExecutor() }
     }
     workerThread.start()
     // Shutdown hook that kills actors on shutdown.
@@ -94,7 +94,7 @@ private[deploy] class ExecutorRunner(
    *
    * @param message the exception message which caused the executor's death
    */
-  private def killProcess(message: Option[String]) {
+  private def killProcess(message: Option[String]): Unit = {
     var exitCode: Option[Int] = None
     if (process != null) {
       logInfo("Killing process!")
@@ -118,7 +118,7 @@ private[deploy] class ExecutorRunner(
   }
 
   /** Stop this executor runner, including killing the process it launched */
-  private[worker] def kill() {
+  private[worker] def kill(): Unit = {
     if (workerThread != null) {
       // the workerThread will kill the child process when interrupted
       workerThread.interrupt()
@@ -145,7 +145,7 @@ private[deploy] class ExecutorRunner(
   /**
    * Download and run the executor described in our ApplicationDescription
    */
-  private def fetchAndRunExecutor() {
+  private def fetchAndRunExecutor(): Unit = {
     try {
       val resourceFileOpt = prepareResourcesFile(SPARK_EXECUTOR_PREFIX, resources, executorDir)
       // Launch the process
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 3731b6aec6522..d988bcedb47f0 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -67,6 +67,14 @@ private[deploy] class Worker(
   Utils.checkHost(host)
   assert (port > 0)
 
+  // If worker decommissioning is enabled register a handler on PWR to shutdown.
+  if (conf.get(WORKER_DECOMMISSION_ENABLED)) {
+    logInfo("Registering SIGPWR handler to trigger decommissioning.")
+    SignalUtils.register("PWR")(decommissionSelf)
+  } else {
+    logInfo("Worker decommissioning not enabled, SIGPWR will result in exiting.")
+  }
+
   // A scheduled executor used to send messages at the specified time.
   private val forwardMessageScheduler =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("worker-forward-message-scheduler")
@@ -128,6 +136,7 @@ private[deploy] class Worker(
   private val workerUri = RpcEndpointAddress(rpcEnv.address, endpointName).toString
   private var registered = false
   private var connected = false
+  private var decommissioned = false
   private val workerId = generateWorkerId()
   private val sparkHome =
     if (sys.props.contains(IS_TESTING.key)) {
@@ -190,14 +199,14 @@ private[deploy] class Worker(
   def coresFree: Int = cores - coresUsed
   def memoryFree: Int = memory - memoryUsed
 
-  private def createWorkDir() {
+  private def createWorkDir(): Unit = {
     workDir = Option(workDirPath).map(new File(_)).getOrElse(new File(sparkHome, "work"))
     if (!Utils.createDirectory(workDir)) {
       System.exit(1)
     }
   }
 
-  override def onStart() {
+  override def onStart(): Unit = {
     assert(!registered)
     logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
       host, port, cores, Utils.megabytesToString(memory)))
@@ -268,7 +277,8 @@ private[deploy] class Worker(
    * @param masterAddress the new master address which the worker should use to connect in case of
    *                      failure
    */
-  private def changeMaster(masterRef: RpcEndpointRef, uiUrl: String, masterAddress: RpcAddress) {
+  private def changeMaster(masterRef: RpcEndpointRef, uiUrl: String,
+      masterAddress: RpcAddress): Unit = {
     // activeMasterUrl it's a valid Spark url since we receive it from master.
     activeMasterUrl = masterRef.address.toSparkURL
     activeMasterWebUiUrl = uiUrl
@@ -391,7 +401,7 @@ private[deploy] class Worker(
     registrationRetryTimer = None
   }
 
-  private def registerWithMaster() {
+  private def registerWithMaster(): Unit = {
     // onDisconnected may be triggered multiple times, so don't attempt registration
     // if there are outstanding registration attempts scheduled.
     registrationRetryTimer match {
@@ -410,7 +420,7 @@ private[deploy] class Worker(
     }
   }
 
-  private def startExternalShuffleService() {
+  private def startExternalShuffleService(): Unit = {
     try {
       shuffleService.startIfEnabled()
     } catch {
@@ -548,6 +558,8 @@ private[deploy] class Worker(
     case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_, resources_) =>
       if (masterUrl != activeMasterUrl) {
         logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
+      } else if (decommissioned) {
+        logWarning("Asked to launch an executor while decommissioned. Not launching executor.")
       } else {
         try {
           logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
@@ -671,6 +683,9 @@ private[deploy] class Worker(
     case ApplicationFinished(id) =>
       finishedApps += id
       maybeCleanupApplication(id)
+
+    case DecommissionSelf =>
+      decommissionSelf()
   }
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
@@ -690,7 +705,7 @@ private[deploy] class Worker(
     }
   }
 
-  private def masterDisconnected() {
+  private def masterDisconnected(): Unit = {
     logError("Connection to master failed! Waiting for master to reconnect...")
     connected = false
     registerWithMaster()
@@ -736,7 +751,7 @@ private[deploy] class Worker(
     "worker-%s-%s-%d".format(createDateFormat.format(new Date), host, port)
   }
 
-  override def onStop() {
+  override def onStop(): Unit = {
     releaseResources(conf, SPARK_WORKER_PREFIX, resources, pid)
     cleanupThreadExecutor.shutdownNow()
     metricsSystem.report()
@@ -770,6 +785,18 @@ private[deploy] class Worker(
     }
   }
 
+  private[deploy] def decommissionSelf(): Boolean = {
+    if (conf.get(WORKER_DECOMMISSION_ENABLED)) {
+      logDebug("Decommissioning self")
+      decommissioned = true
+      sendToMaster(WorkerDecommission(workerId, self))
+    } else {
+      logWarning("Asked to decommission self, but decommissioning not enabled")
+    }
+    // Return true since can be called as a signal handler
+    true
+  }
+
   private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
     val driverId = driverStateChanged.driverId
     val exception = driverStateChanged.exception
@@ -834,7 +861,7 @@ private[deploy] object Worker extends Logging {
   val ENDPOINT_NAME = "Worker"
   private val SSL_NODE_LOCAL_CONFIG_PATTERN = """\-Dspark\.ssl\.useNodeLocalConf\=(.+)""".r
 
-  def main(argStrings: Array[String]) {
+  def main(argStrings: Array[String]): Unit = {
     Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
       exitOnUncaughtException = false))
     Utils.initDaemon(log)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index 8c87708e960e6..42f684c0a1973 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -122,7 +122,7 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
   /**
    * Print usage and exit JVM with the given exit code.
    */
-  def printUsageAndExit(exitCode: Int) {
+  def printUsageAndExit(exitCode: Int): Unit = {
     // scalastyle:off println
     System.err.println(
       "Usage: Worker [options] <master>\n" +
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index 96980c3ff0331..0f5e96c558490 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -43,7 +43,7 @@ class WorkerWebUI(
   initialize()
 
   /** Initialize all components of the server. */
-  def initialize() {
+  def initialize(): Unit = {
     val logPage = new LogPage(this)
     attachPage(logPage)
     attachPage(new WorkerPage(this))
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index e96c41a61b066..faf03a64ae8b2 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.executor
 
+import java.io.File
 import java.net.URL
 import java.nio.ByteBuffer
 import java.util.Locale
@@ -35,34 +36,43 @@ import org.apache.spark.deploy.worker.WorkerWatcher
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.resource.ResourceProfile._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.serializer.SerializerInstance
-import org.apache.spark.util.{ThreadUtils, Utils}
+import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, SignalUtils, ThreadUtils, Utils}
 
 private[spark] class CoarseGrainedExecutorBackend(
     override val rpcEnv: RpcEnv,
     driverUrl: String,
     executorId: String,
+    bindAddress: String,
     hostname: String,
     cores: Int,
     userClassPath: Seq[URL],
     env: SparkEnv,
-    resourcesFileOpt: Option[String])
-  extends ThreadSafeRpcEndpoint with ExecutorBackend with Logging {
+    resourcesFileOpt: Option[String],
+    resourceProfile: ResourceProfile)
+  extends IsolatedRpcEndpoint with ExecutorBackend with Logging {
+
+  import CoarseGrainedExecutorBackend._
 
   private implicit val formats = DefaultFormats
 
   private[this] val stopping = new AtomicBoolean(false)
   var executor: Executor = null
+  @volatile private var decommissioned = false
   @volatile var driver: Option[RpcEndpointRef] = None
 
   // If this CoarseGrainedExecutorBackend is changed to support multiple threads, then this may need
   // to be changed so that we don't share the serializer instance across threads
   private[this] val ser: SerializerInstance = env.closureSerializer.newInstance()
 
+  private var _resources = Map.empty[String, ResourceInformation]
+
   /**
    * Map each taskId to the information about the resource allocated to it, Please refer to
    * [[ResourceInformation]] for specifics.
@@ -70,43 +80,60 @@ private[spark] class CoarseGrainedExecutorBackend(
    */
   private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]]
 
-  override def onStart() {
+  override def onStart(): Unit = {
+    logInfo("Registering PWR handler.")
+    SignalUtils.register("PWR")(decommissionSelf)
+
     logInfo("Connecting to driver: " + driverUrl)
-    val resources = parseOrFindResources(resourcesFileOpt)
+    try {
+      _resources = parseOrFindResources(resourcesFileOpt)
+    } catch {
+      case NonFatal(e) =>
+        exitExecutor(1, "Unable to create executor due to " + e.getMessage, e)
+    }
     rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref =>
       // This is a very fast action so we can use "ThreadUtils.sameThread"
       driver = Some(ref)
       ref.ask[Boolean](RegisterExecutor(executorId, self, hostname, cores, extractLogUrls,
-        extractAttributes, resources))
+        extractAttributes, _resources, resourceProfile.id))
     }(ThreadUtils.sameThread).onComplete {
-      // This is a very fast action so we can use "ThreadUtils.sameThread"
-      case Success(msg) =>
-        // Always receive `true`. Just ignore it
+      case Success(_) =>
+        self.send(RegisteredExecutor)
       case Failure(e) =>
         exitExecutor(1, s"Cannot register with driver: $driverUrl", e, notifyDriver = false)
     }(ThreadUtils.sameThread)
   }
 
+  /**
+   * Create a classLoader for use for resource discovery. The user could provide a class
+   * as a substitute for the default one so we have to be able to load it from a user specified
+   * jar.
+   */
+  private def createClassLoader(): MutableURLClassLoader = {
+    val currentLoader = Utils.getContextOrSparkClassLoader
+    val urls = userClassPath.toArray
+    if (env.conf.get(EXECUTOR_USER_CLASS_PATH_FIRST)) {
+      new ChildFirstURLClassLoader(urls, currentLoader)
+    } else {
+      new MutableURLClassLoader(urls, currentLoader)
+    }
+  }
+
   // visible for testing
   def parseOrFindResources(resourcesFileOpt: Option[String]): Map[String, ResourceInformation] = {
-    // only parse the resources if a task requires them
-    val resourceInfo = if (parseResourceRequirements(env.conf, SPARK_TASK_PREFIX).nonEmpty) {
-      val resources = getOrDiscoverAllResources(env.conf, SPARK_EXECUTOR_PREFIX, resourcesFileOpt)
-      if (resources.isEmpty) {
-        throw new SparkException("User specified resources per task via: " +
-          s"$SPARK_TASK_PREFIX, but can't find any resources available on the executor.")
-      } else {
-        logResourceInfo(SPARK_EXECUTOR_PREFIX, resources)
-      }
+    // use a classloader that includes the user classpath in case they specified a class for
+    // resource discovery
+    val urlClassLoader = createClassLoader()
+    logDebug(s"Resource profile id is: ${resourceProfile.id}")
+    Utils.withContextClassLoader(urlClassLoader) {
+      val resources = getOrDiscoverAllResourcesForResourceProfile(
+        resourcesFileOpt,
+        SPARK_EXECUTOR_PREFIX,
+        resourceProfile,
+        env.conf)
+      logResourceInfo(SPARK_EXECUTOR_PREFIX, resources)
       resources
-    } else {
-      if (resourcesFileOpt.nonEmpty) {
-        logWarning("A resources file was specified but the application is not configured " +
-          s"to use any resources, see the configs with prefix: ${SPARK_TASK_PREFIX}")
-      }
-      Map.empty[String, ResourceInformation]
     }
-    resourceInfo
   }
 
   def extractLogUrls: Map[String, String] = {
@@ -125,19 +152,28 @@ private[spark] class CoarseGrainedExecutorBackend(
     case RegisteredExecutor =>
       logInfo("Successfully registered with driver")
       try {
-        executor = new Executor(executorId, hostname, env, userClassPath, isLocal = false)
+        executor = new Executor(executorId, hostname, env, userClassPath, isLocal = false,
+          resources = _resources)
+        driver.get.send(LaunchedExecutor(executorId))
       } catch {
         case NonFatal(e) =>
           exitExecutor(1, "Unable to create executor due to " + e.getMessage, e)
       }
 
-    case RegisterExecutorFailed(message) =>
-      exitExecutor(1, "Slave registration failed: " + message)
-
     case LaunchTask(data) =>
       if (executor == null) {
         exitExecutor(1, "Received LaunchTask command but executor was null")
       } else {
+        if (decommissioned) {
+          logError("Asked to launch a task while decommissioned.")
+          driver match {
+            case Some(endpoint) =>
+              logInfo("Sending DecommissionExecutor to driver.")
+              endpoint.send(DecommissionExecutor(executorId))
+            case _ =>
+              logError("No registered driver to send Decommission to.")
+          }
+        }
         val taskDesc = TaskDescription.decode(data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
         taskResources(taskDesc.taskId) = taskDesc.resources
@@ -186,7 +222,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     }
   }
 
-  override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer) {
+  override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer): Unit = {
     val resources = taskResources.getOrElse(taskId, Map.empty[String, ResourceInformation])
     val msg = StatusUpdate(executorId, taskId, state, data, resources)
     if (TaskState.isFinished(state)) {
@@ -220,26 +256,55 @@ private[spark] class CoarseGrainedExecutorBackend(
 
     System.exit(code)
   }
+
+  private def decommissionSelf(): Boolean = {
+    logInfo("Decommissioning self w/sync")
+    try {
+      decommissioned = true
+      // Tell master we are are decommissioned so it stops trying to schedule us
+      if (driver.nonEmpty) {
+        driver.get.askSync[Boolean](DecommissionExecutor(executorId))
+      } else {
+        logError("No driver to message decommissioning.")
+      }
+      if (executor != null) {
+        executor.decommission()
+      }
+      logInfo("Done decommissioning self.")
+      // Return true since we are handling a signal
+      true
+    } catch {
+      case e: Exception =>
+        logError(s"Error ${e} during attempt to decommission self")
+        false
+    }
+  }
 }
 
 private[spark] object CoarseGrainedExecutorBackend extends Logging {
 
+  // Message used internally to start the executor when the driver successfully accepted the
+  // registration request.
+  case object RegisteredExecutor
+
   case class Arguments(
       driverUrl: String,
       executorId: String,
+      bindAddress: String,
       hostname: String,
       cores: Int,
       appId: String,
       workerUrl: Option[String],
       userClassPath: mutable.ListBuffer[URL],
-      resourcesFileOpt: Option[String])
+      resourcesFileOpt: Option[String],
+      resourceProfileId: Int)
 
   def main(args: Array[String]): Unit = {
-    val createFn: (RpcEnv, Arguments, SparkEnv) =>
-      CoarseGrainedExecutorBackend = { case (rpcEnv, arguments, env) =>
+    val createFn: (RpcEnv, Arguments, SparkEnv, ResourceProfile) =>
+      CoarseGrainedExecutorBackend = { case (rpcEnv, arguments, env, resourceProfile) =>
       new CoarseGrainedExecutorBackend(rpcEnv, arguments.driverUrl, arguments.executorId,
-        arguments.hostname, arguments.cores, arguments.userClassPath, env,
-        arguments.resourcesFileOpt)
+        arguments.bindAddress, arguments.hostname, arguments.cores, arguments.userClassPath, env,
+        arguments.resourcesFileOpt, resourceProfile)
     }
     run(parseArguments(args, this.getClass.getCanonicalName.stripSuffix("$")), createFn)
     System.exit(0)
@@ -247,7 +312,8 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
 
   def run(
       arguments: Arguments,
-      backendCreateFn: (RpcEnv, Arguments, SparkEnv) => CoarseGrainedExecutorBackend): Unit = {
+      backendCreateFn: (RpcEnv, Arguments, SparkEnv, ResourceProfile) =>
+        CoarseGrainedExecutorBackend): Unit = {
 
     Utils.initDaemon(log)
 
@@ -259,10 +325,12 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       val executorConf = new SparkConf
       val fetcher = RpcEnv.create(
         "driverPropsFetcher",
+        arguments.bindAddress,
         arguments.hostname,
         -1,
         executorConf,
         new SecurityManager(executorConf),
+        numUsableCores = 0,
         clientMode = true)
 
       var driver: RpcEndpointRef = null
@@ -277,7 +345,7 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         }
       }
 
-      val cfg = driver.askSync[SparkAppConfig](RetrieveSparkAppConfig)
+      val cfg = driver.askSync[SparkAppConfig](RetrieveSparkAppConfig(arguments.resourceProfileId))
       val props = cfg.sparkProperties ++ Seq[(String, String)](("spark.app.id", arguments.appId))
       fetcher.shutdown()
 
@@ -297,10 +365,11 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       }
 
       driverConf.set(EXECUTOR_ID, arguments.executorId)
-      val env = SparkEnv.createExecutorEnv(driverConf, arguments.executorId, arguments.hostname,
-        arguments.cores, cfg.ioEncryptionKey, isLocal = false)
+      val env = SparkEnv.createExecutorEnv(driverConf, arguments.executorId, arguments.bindAddress,
+        arguments.hostname, arguments.cores, cfg.ioEncryptionKey, isLocal = false)
 
-      env.rpcEnv.setupEndpoint("Executor", backendCreateFn(env.rpcEnv, arguments, env))
+      env.rpcEnv.setupEndpoint("Executor",
+        backendCreateFn(env.rpcEnv, arguments, env, cfg.resourceProfile))
       arguments.workerUrl.foreach { url =>
         env.rpcEnv.setupEndpoint("WorkerWatcher", new WorkerWatcher(env.rpcEnv, url))
       }
@@ -311,12 +380,14 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
   def parseArguments(args: Array[String], classNameForEntry: String): Arguments = {
     var driverUrl: String = null
     var executorId: String = null
+    var bindAddress: String = null
     var hostname: String = null
     var cores: Int = 0
     var resourcesFileOpt: Option[String] = None
     var appId: String = null
     var workerUrl: Option[String] = None
     val userClassPath = new mutable.ListBuffer[URL]()
+    var resourceProfileId: Int = DEFAULT_RESOURCE_PROFILE_ID
 
     var argv = args.toList
     while (!argv.isEmpty) {
@@ -327,6 +398,9 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         case ("--executor-id") :: value :: tail =>
           executorId = value
           argv = tail
+        case ("--bind-address") :: value :: tail =>
+          bindAddress = value
+          argv = tail
         case ("--hostname") :: value :: tail =>
           hostname = value
           argv = tail
@@ -346,6 +420,9 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         case ("--user-class-path") :: value :: tail =>
           userClassPath += new URL(value)
           argv = tail
+        case ("--resourceProfileId") :: value :: tail =>
+          resourceProfileId = value.toInt
+          argv = tail
         case Nil =>
         case tail =>
           // scalastyle:off println
@@ -364,8 +441,12 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       printUsageAndExit(classNameForEntry)
     }
 
-    Arguments(driverUrl, executorId, hostname, cores, appId, workerUrl,
-      userClassPath, resourcesFileOpt)
+    if (bindAddress == null) {
+      bindAddress = hostname
+    }
+
+    Arguments(driverUrl, executorId, bindAddress, hostname, cores, appId, workerUrl,
+      userClassPath, resourcesFileOpt, resourceProfileId)
   }
 
   private def printUsageAndExit(classNameForEntry: String): Unit = {
@@ -377,12 +458,14 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       | Options are:
       |   --driver-url <driverUrl>
       |   --executor-id <executorId>
+      |   --bind-address <bindAddress>
       |   --hostname <hostname>
       |   --cores <cores>
       |   --resourcesFile <fileWithJSONResourceInformation>
       |   --app-id <appid>
       |   --worker-url <workerUrl>
       |   --user-class-path <url>
+      |   --resourceProfileId <id>
       |""".stripMargin)
     // scalastyle:on println
     System.exit(1)
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index c337d24381286..2bfa1cea4b26f 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -24,9 +24,11 @@ import java.net.{URI, URL}
 import java.nio.ByteBuffer
 import java.util.Properties
 import java.util.concurrent._
+import java.util.concurrent.atomic.AtomicBoolean
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
+import scala.collection.immutable
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map, WrappedArray}
 import scala.concurrent.duration._
 import scala.util.control.NonFatal
@@ -37,8 +39,10 @@ import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.plugin.PluginContainer
 import org.apache.spark.memory.{SparkOutOfMemoryError, TaskMemoryManager}
 import org.apache.spark.metrics.source.JVMCPUSource
+import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.rpc.RpcTimeout
 import org.apache.spark.scheduler._
 import org.apache.spark.shuffle.FetchFailedException
@@ -59,11 +63,16 @@ private[spark] class Executor(
     env: SparkEnv,
     userClassPath: Seq[URL] = Nil,
     isLocal: Boolean = false,
-    uncaughtExceptionHandler: UncaughtExceptionHandler = new SparkUncaughtExceptionHandler)
+    uncaughtExceptionHandler: UncaughtExceptionHandler = new SparkUncaughtExceptionHandler,
+    resources: immutable.Map[String, ResourceInformation])
   extends Logging {
 
   logInfo(s"Starting executor ID $executorId on host $executorHostname")
 
+  private val executorShutdown = new AtomicBoolean(false)
+  ShutdownHookManager.addShutdownHook(
+    () => stop()
+  )
   // Application dependencies (added through SparkContext) that we've fetched so far on this node.
   // Each map holds the master's timestamp for the version of that file or JAR we got.
   private val currentFiles: HashMap[String, Long] = new HashMap[String, Long]()
@@ -112,10 +121,18 @@ private[spark] class Executor(
   // create. The map key is a task id.
   private val taskReaperForTask: HashMap[Long, TaskReaper] = HashMap[Long, TaskReaper]()
 
+  val executorMetricsSource =
+    if (conf.get(METRICS_EXECUTORMETRICS_SOURCE_ENABLED)) {
+      Some(new ExecutorMetricsSource)
+    } else {
+      None
+    }
+
   if (!isLocal) {
     env.blockManager.initialize(conf.getAppId)
     env.metricsSystem.registerSource(executorSource)
     env.metricsSystem.registerSource(new JVMCPUSource())
+    executorMetricsSource.foreach(_.register(env.metricsSystem))
     env.metricsSystem.registerSource(env.blockManager.shuffleMetricsSource)
   }
 
@@ -136,27 +153,9 @@ private[spark] class Executor(
   // for fetching remote cached RDD blocks, so need to make sure it uses the right classloader too.
   env.serializerManager.setDefaultClassLoader(replClassLoader)
 
-  private val executorPlugins: Seq[ExecutorPlugin] = {
-    val pluginNames = conf.get(EXECUTOR_PLUGINS)
-    if (pluginNames.nonEmpty) {
-      logDebug(s"Initializing the following plugins: ${pluginNames.mkString(", ")}")
-
-      // Plugins need to load using a class loader that includes the executor's user classpath
-      val pluginList: Seq[ExecutorPlugin] =
-        Utils.withContextClassLoader(replClassLoader) {
-          val plugins = Utils.loadExtensions(classOf[ExecutorPlugin], pluginNames, conf)
-          plugins.foreach { plugin =>
-            plugin.init()
-            logDebug(s"Successfully loaded plugin " + plugin.getClass().getCanonicalName())
-          }
-          plugins
-        }
-
-      logDebug("Finished initializing plugins")
-      pluginList
-    } else {
-      Nil
-    }
+  // Plugins need to load using a class loader that includes the executor's user classpath
+  private val plugins: Option[PluginContainer] = Utils.withContextClassLoader(replClassLoader) {
+    PluginContainer(env, resources.asJava)
   }
 
   // Max size of direct result. If task result is bigger than this, we use the block manager
@@ -198,7 +197,8 @@ private[spark] class Executor(
   // Poller for the memory metrics. Visible for testing.
   private[executor] val metricsPoller = new ExecutorMetricsPoller(
     env.memoryManager,
-    METRICS_POLLING_INTERVAL_MS)
+    METRICS_POLLING_INTERVAL_MS,
+    executorMetricsSource)
 
   // Executor for the heartbeat task.
   private val heartbeater = new Heartbeater(
@@ -216,16 +216,32 @@ private[spark] class Executor(
    */
   private var heartbeatFailures = 0
 
+  /**
+   * Flag to prevent launching new tasks while decommissioned. There could be a race condition
+   * accessing this, but decommissioning is only intended to help not be a hard stop.
+   */
+  private var decommissioned = false
+
   heartbeater.start()
 
   metricsPoller.start()
 
   private[executor] def numRunningTasks: Int = runningTasks.size()
 
+  /**
+   * Mark an executor for decommissioning and avoid launching new tasks.
+   */
+  private[spark] def decommission(): Unit = {
+    decommissioned = true
+  }
+
   def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = {
     val tr = new TaskRunner(context, taskDescription)
     runningTasks.put(taskDescription.taskId, tr)
     threadPool.execute(tr)
+    if (decommissioned) {
+      log.error(s"Launching a task while in decommissioned state.")
+    }
   }
 
   def killTask(taskId: Long, interruptThread: Boolean, reason: String): Unit = {
@@ -266,34 +282,29 @@ private[spark] class Executor(
   }
 
   def stop(): Unit = {
-    env.metricsSystem.report()
-    try {
-      metricsPoller.stop()
-    } catch {
-      case NonFatal(e) =>
-        logWarning("Unable to stop executor metrics poller", e)
-    }
-    try {
-      heartbeater.stop()
-    } catch {
-      case NonFatal(e) =>
-        logWarning("Unable to stop heartbeater", e)
-    }
-    threadPool.shutdown()
-
-    // Notify plugins that executor is shutting down so they can terminate cleanly
-    Utils.withContextClassLoader(replClassLoader) {
-      executorPlugins.foreach { plugin =>
-        try {
-          plugin.shutdown()
-        } catch {
-          case e: Exception =>
-            logWarning("Plugin " + plugin.getClass().getCanonicalName() + " shutdown failed", e)
-        }
+    if (!executorShutdown.getAndSet(true)) {
+      env.metricsSystem.report()
+      try {
+        metricsPoller.stop()
+      } catch {
+        case NonFatal(e) =>
+          logWarning("Unable to stop executor metrics poller", e)
+      }
+      try {
+        heartbeater.stop()
+      } catch {
+        case NonFatal(e) =>
+          logWarning("Unable to stop heartbeater", e)
+      }
+      threadPool.shutdown()
+
+      // Notify plugins that executor is shutting down so they can terminate cleanly
+      Utils.withContextClassLoader(replClassLoader) {
+        plugins.foreach(_.shutdown())
+      }
+      if (!isLocal) {
+        env.stop()
       }
-    }
-    if (!isLocal) {
-      env.stop()
     }
   }
 
@@ -623,6 +634,11 @@ private[spark] class Executor(
           setTaskFinishedAndClearInterruptStatus()
           execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason))
 
+        case t: Throwable if env.isStopped =>
+          // Log the expected exception after executor.stop without stack traces
+          // see: SPARK-19147
+          logError(s"Exception in $taskName (TID $taskId): ${t.getMessage}")
+
         case t: Throwable =>
           // Attempt to exit cleanly by informing the driver of our failure.
           // If anything goes wrong (or this was a fatal exception), we will delegate to
@@ -846,7 +862,7 @@ private[spark] class Executor(
    * Download any missing dependencies if we receive a new set of files and JARs from the
    * SparkContext. Also adds any new JARs we fetched to the class loader.
    */
-  private def updateDependencies(newFiles: Map[String, Long], newJars: Map[String, Long]) {
+  private def updateDependencies(newFiles: Map[String, Long], newJars: Map[String, Long]): Unit = {
     lazy val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
     synchronized {
       // Fetch missing dependencies
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsPoller.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsPoller.scala
index 805b0f729b122..1c1a1ca8035d0 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsPoller.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsPoller.scala
@@ -48,7 +48,8 @@ import org.apache.spark.util.{ThreadUtils, Utils}
  */
 private[spark] class ExecutorMetricsPoller(
     memoryManager: MemoryManager,
-    pollingInterval: Long) extends Logging {
+    pollingInterval: Long,
+    executorMetricsSource: Option[ExecutorMetricsSource]) extends Logging {
 
   type StageKey = (Int, Int)
   // Task Count and Metric Peaks
@@ -79,6 +80,7 @@ private[spark] class ExecutorMetricsPoller(
 
     // get the latest values for the metrics
     val latestMetrics = ExecutorMetrics.getCurrentMetrics(memoryManager)
+    executorMetricsSource.foreach(_.updateMetricsSnapshot(latestMetrics))
 
     def updatePeaks(metrics: AtomicLongArray): Unit = {
       (0 until metrics.length).foreach { i =>
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala
new file mode 100644
index 0000000000000..14645f73ef278
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsSource.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.executor
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.spark.metrics.{ExecutorMetricType, MetricsSystem}
+import org.apache.spark.metrics.source.Source
+
+/**
+ * Expose executor metrics from [[ExecutorMetricsType]] using the Dropwizard metrics system.
+ *
+ * Metrics related to the memory system can be expensive to gather, therefore
+ * we implement some optimizations:
+ * (1) Metrics values are cached, updated at each heartbeat (default period is 10 seconds).
+ * An alternative faster polling mechanism is used, only if activated, by setting
+ * spark.executor.metrics.pollingInterval=<interval in ms>.
+ * (2) Procfs metrics are gathered all in one-go and only conditionally:
+ * if the /proc filesystem exists
+ * and spark.executor.processTreeMetrics.enabled=true.
+ */
+private[spark] class ExecutorMetricsSource extends Source {
+
+  override val metricRegistry = new MetricRegistry()
+  override val sourceName = "ExecutorMetrics"
+  @volatile var metricsSnapshot: Array[Long] = Array.fill(ExecutorMetricType.numMetrics)(0L)
+
+  // called by ExecutorMetricsPoller
+  def updateMetricsSnapshot(metricsUpdates: Array[Long]): Unit = {
+    metricsSnapshot = metricsUpdates
+  }
+
+  private class ExecutorMetricGauge(idx: Int) extends Gauge[Long] {
+    def getValue: Long = metricsSnapshot(idx)
+  }
+
+  def register(metricsSystem: MetricsSystem): Unit = {
+    val gauges: IndexedSeq[ExecutorMetricGauge] = (0 until ExecutorMetricType.numMetrics).map {
+      idx => new ExecutorMetricGauge(idx)
+    }.toIndexedSeq
+
+    ExecutorMetricType.metricToOffset.foreach {
+      case (name, idx) =>
+        metricRegistry.register(MetricRegistry.name(name), gauges(idx))
+    }
+
+    metricsSystem.registerSource(this)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
index 2111273d8b35a..80ef757332e43 100644
--- a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.executor
 
 import java.io._
-import java.nio.charset.Charset
 import java.nio.charset.StandardCharsets.UTF_8
 import java.nio.file.{Files, Paths}
 import java.util.Locale
@@ -59,11 +58,9 @@ private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends L
           logWarning("Exception checking for procfs dir", ioe)
           false
       }
-      val shouldLogStageExecutorMetrics =
-        SparkEnv.get.conf.get(config.EVENT_LOG_STAGE_EXECUTOR_METRICS)
-      val shouldLogStageExecutorProcessTreeMetrics =
-        SparkEnv.get.conf.get(config.EVENT_LOG_PROCESS_TREE_METRICS)
-      procDirExists.get && shouldLogStageExecutorProcessTreeMetrics && shouldLogStageExecutorMetrics
+      val shouldPollProcessTreeMetrics =
+        SparkEnv.get.conf.get(config.EXECUTOR_PROCESS_TREE_METRICS_ENABLED)
+      procDirExists.get && shouldPollProcessTreeMetrics
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index ea79c7310349d..1470a23884bb0 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -137,6 +137,7 @@ class TaskMetrics private[spark] () extends Serializable {
   private[spark] def setJvmGCTime(v: Long): Unit = _jvmGCTime.setValue(v)
   private[spark] def setResultSerializationTime(v: Long): Unit =
     _resultSerializationTime.setValue(v)
+  private[spark] def setPeakExecutionMemory(v: Long): Unit = _peakExecutionMemory.setValue(v)
   private[spark] def incMemoryBytesSpilled(v: Long): Unit = _memoryBytesSpilled.add(v)
   private[spark] def incDiskBytesSpilled(v: Long): Unit = _diskBytesSpilled.add(v)
   private[spark] def incPeakExecutionMemory(v: Long): Unit = _peakExecutionMemory.add(v)
diff --git a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
index 549395314ba61..f6902d1bf83a1 100644
--- a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
+++ b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
@@ -46,7 +46,7 @@ private[spark] class FixedLengthBinaryRecordReader
   private var recordKey: LongWritable = null
   private var recordValue: BytesWritable = null
 
-  override def close() {
+  override def close(): Unit = {
     if (fileInputStream != null) {
       fileInputStream.close()
     }
@@ -69,7 +69,7 @@ private[spark] class FixedLengthBinaryRecordReader
     }
   }
 
-  override def initialize(inputSplit: InputSplit, context: TaskAttemptContext) {
+  override def initialize(inputSplit: InputSplit, context: TaskAttemptContext): Unit = {
     // the file input
     val fileSplit = inputSplit.asInstanceOf[FileSplit]
 
diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
index 6a4af01475646..57210da6a48eb 100644
--- a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
+++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
@@ -44,7 +44,7 @@ private[spark] abstract class StreamFileInputFormat[T]
    * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API
    * which is set through setMaxSplitSize
    */
-  def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: Int) {
+  def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: Int): Unit = {
     val defaultMaxSplitBytes = sc.getConf.get(config.FILES_MAX_PARTITION_BYTES)
     val openCostInBytes = sc.getConf.get(config.FILES_OPEN_COST_IN_BYTES)
     val defaultParallelism = Math.max(sc.defaultParallelism, minPartitions)
diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
index 04c5c4b90e8a1..692deb7a3282f 100644
--- a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
+++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
@@ -48,7 +48,7 @@ private[spark] class WholeTextFileInputFormat
    * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API,
    * which is set through setMaxSplitSize
    */
-  def setMinPartitions(context: JobContext, minPartitions: Int) {
+  def setMinPartitions(context: JobContext, minPartitions: Int): Unit = {
     val files = listStatus(context).asScala
     val totalLen = files.map(file => if (file.isDirectory) 0L else file.getLen).sum
     val maxSplitSize = Math.ceil(totalLen * 1.0 /
diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
index 28fd1ff1b77ca..0bd2d551cc912 100644
--- a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
+++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.mapreduce.lib.input.{CombineFileRecordReader, CombineFi
  */
 private[spark] trait Configurable extends HConfigurable {
   private var conf: Configuration = _
-  def setConf(c: Configuration) {
+  def setConf(c: Configuration): Unit = {
     conf = c
   }
   def getConf: Configuration = conf
diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala b/core/src/main/scala/org/apache/spark/internal/Logging.scala
index 0987917bac0e7..0c1d9635b6535 100644
--- a/core/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -53,44 +53,44 @@ trait Logging {
   }
 
   // Log methods that take only a String
-  protected def logInfo(msg: => String) {
+  protected def logInfo(msg: => String): Unit = {
     if (log.isInfoEnabled) log.info(msg)
   }
 
-  protected def logDebug(msg: => String) {
+  protected def logDebug(msg: => String): Unit = {
     if (log.isDebugEnabled) log.debug(msg)
   }
 
-  protected def logTrace(msg: => String) {
+  protected def logTrace(msg: => String): Unit = {
     if (log.isTraceEnabled) log.trace(msg)
   }
 
-  protected def logWarning(msg: => String) {
+  protected def logWarning(msg: => String): Unit = {
     if (log.isWarnEnabled) log.warn(msg)
   }
 
-  protected def logError(msg: => String) {
+  protected def logError(msg: => String): Unit = {
     if (log.isErrorEnabled) log.error(msg)
   }
 
   // Log methods that take Throwables (Exceptions/Errors) too
-  protected def logInfo(msg: => String, throwable: Throwable) {
+  protected def logInfo(msg: => String, throwable: Throwable): Unit = {
     if (log.isInfoEnabled) log.info(msg, throwable)
   }
 
-  protected def logDebug(msg: => String, throwable: Throwable) {
+  protected def logDebug(msg: => String, throwable: Throwable): Unit = {
     if (log.isDebugEnabled) log.debug(msg, throwable)
   }
 
-  protected def logTrace(msg: => String, throwable: Throwable) {
+  protected def logTrace(msg: => String, throwable: Throwable): Unit = {
     if (log.isTraceEnabled) log.trace(msg, throwable)
   }
 
-  protected def logWarning(msg: => String, throwable: Throwable) {
+  protected def logWarning(msg: => String, throwable: Throwable): Unit = {
     if (log.isWarnEnabled) log.warn(msg, throwable)
   }
 
-  protected def logError(msg: => String, throwable: Throwable) {
+  protected def logError(msg: => String, throwable: Throwable): Unit = {
     if (log.isErrorEnabled) log.error(msg, throwable)
   }
 
@@ -116,6 +116,11 @@ trait Logging {
     false
   }
 
+  // For testing
+  private[spark] def initializeForcefully(isInterpreter: Boolean, silent: Boolean): Unit = {
+    initializeLogging(isInterpreter, silent)
+  }
+
   private def initializeLogging(isInterpreter: Boolean, silent: Boolean): Unit = {
     // Don't use a logger in here, as this is itself occurring during initialization of a logger
     // If Log4j 1.2 is being used, but is not initialized, load a default properties file
@@ -230,19 +235,18 @@ private class SparkShellLoggingFilter extends Filter {
    */
   def decide(loggingEvent: LoggingEvent): Int = {
     if (Logging.sparkShellThresholdLevel == null) {
-      return Filter.NEUTRAL
-    }
-    val rootLevel = LogManager.getRootLogger().getLevel()
-    if (!loggingEvent.getLevel().eq(rootLevel)) {
-      return Filter.NEUTRAL
-    }
-    var logger = loggingEvent.getLogger()
-    while (logger.getParent() != null) {
-      if (logger.getLevel() != null) {
-        return Filter.NEUTRAL
+      Filter.NEUTRAL
+    } else if (loggingEvent.getLevel.isGreaterOrEqual(Logging.sparkShellThresholdLevel)) {
+      Filter.NEUTRAL
+    } else {
+      var logger = loggingEvent.getLogger()
+      while (logger.getParent() != null) {
+        if (logger.getLevel != null || logger.getAllAppenders.hasMoreElements) {
+          return Filter.NEUTRAL
+        }
+        logger = logger.getParent()
       }
-      logger = logger.getParent()
+      Filter.DENY
     }
-    return Filter.DENY
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
index ca9af316dffd0..14fb5ff075472 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/History.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -84,6 +84,25 @@ private[spark] object History {
       .bytesConf(ByteUnit.BYTE)
       .createWithDefaultString("1m")
 
+  private[spark] val EVENT_LOG_ROLLING_MAX_FILES_TO_RETAIN =
+    ConfigBuilder("spark.history.fs.eventLog.rolling.maxFilesToRetain")
+      .doc("The maximum number of event log files which will be retained as non-compacted. " +
+        "By default, all event log files will be retained. Please set the configuration " +
+        s"and ${EVENT_LOG_ROLLING_MAX_FILE_SIZE.key} accordingly if you want to control " +
+        "the overall size of event log files.")
+      .intConf
+      .checkValue(_ > 0, "Max event log files to retain should be higher than 0.")
+      .createWithDefault(Integer.MAX_VALUE)
+
+  private[spark] val EVENT_LOG_COMPACTION_SCORE_THRESHOLD =
+    ConfigBuilder("spark.history.fs.eventLog.rolling.compaction.score.threshold")
+      .doc("The threshold score to determine whether it's good to do the compaction or not. " +
+        "The compaction score is calculated in analyzing, and being compared to this value. " +
+        "Compaction will proceed only when the score is higher than the threshold value.")
+      .internal()
+      .doubleConf
+      .createWithDefault(0.7d)
+
   val DRIVER_LOG_CLEANER_ENABLED = ConfigBuilder("spark.history.fs.driverlog.cleaner.enabled")
     .fallbackConf(CLEANER_ENABLED)
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Status.scala b/core/src/main/scala/org/apache/spark/internal/config/Status.scala
index 3e6a4e9810664..3cc00a6f094cf 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Status.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Status.scala
@@ -55,8 +55,8 @@ private[spark] object Status {
     .intConf
     .createWithDefault(Int.MaxValue)
 
-  val APP_STATUS_METRICS_ENABLED =
-    ConfigBuilder("spark.app.status.metrics.enabled")
+  val METRICS_APP_STATUS_SOURCE_ENABLED =
+    ConfigBuilder("spark.metrics.appStatusSource.enabled")
       .doc("Whether Dropwizard/Codahale metrics " +
         "will be reported for the status of the running spark app.")
       .booleanConf
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
index 21660ab3a9512..51df73ebde07d 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
@@ -53,4 +53,13 @@ private[spark] object Tests {
   val TEST_N_CORES_EXECUTOR = ConfigBuilder("spark.testing.nCoresPerExecutor")
     .intConf
     .createWithDefault(2)
+
+  val RESOURCES_WARNING_TESTING =
+    ConfigBuilder("spark.resources.warnings.testing").booleanConf.createWithDefault(false)
+
+  val RESOURCE_PROFILE_MANAGER_TESTING =
+    ConfigBuilder("spark.testing.resourceProfileManager")
+      .booleanConf
+      .createWithDefault(false)
+
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
index a11970ec73d88..60d985713d30e 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/UI.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
@@ -81,6 +81,13 @@ private[spark] object UI {
     .booleanConf
     .createWithDefault(true)
 
+  val UI_PROMETHEUS_ENABLED = ConfigBuilder("spark.ui.prometheus.enabled")
+    .internal()
+    .doc("Expose executor metrics at /metrics/executors/prometheus. " +
+      "For master/worker/driver metrics, you need to configure `conf/metrics.properties`.")
+    .booleanConf
+    .createWithDefault(false)
+
   val UI_X_XSS_PROTECTION = ConfigBuilder("spark.ui.xXssProtection")
     .doc("Value for HTTP X-XSS-Protection response header")
     .stringConf
@@ -143,6 +150,11 @@ private[spark] object UI {
     .stringConf
     .createWithDefault("org.apache.spark.security.ShellBasedGroupsMappingProvider")
 
+  val PROXY_REDIRECT_URI = ConfigBuilder("spark.ui.proxyRedirectUri")
+    .doc("Proxy address to use when responding with HTTP redirects.")
+    .stringConf
+    .createOptional
+
   val CUSTOM_EXECUTOR_LOG_URL = ConfigBuilder("spark.ui.custom.executor.log.url")
     .doc("Specifies custom spark executor log url for supporting external log service instead of " +
       "using cluster managers' application log urls in the Spark UI. Spark will support " +
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Worker.scala b/core/src/main/scala/org/apache/spark/internal/config/Worker.scala
index f1eaae29f18df..2b175c1e14ee5 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Worker.scala
@@ -71,4 +71,9 @@ private[spark] object Worker {
     ConfigBuilder("spark.worker.ui.compressedLogFileLengthCacheSize")
     .intConf
     .createWithDefault(100)
+
+  private[spark] val WORKER_DECOMMISSION_ENABLED =
+    ConfigBuilder("spark.worker.decommission.enabled")
+      .booleanConf
+      .createWithDefault(false)
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index b898413ac8d76..02acb6b530737 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -35,6 +35,7 @@ package object config {
   private[spark] val SPARK_DRIVER_PREFIX = "spark.driver"
   private[spark] val SPARK_EXECUTOR_PREFIX = "spark.executor"
   private[spark] val SPARK_TASK_PREFIX = "spark.task"
+  private[spark] val LISTENER_BUS_EVENT_QUEUE_PREFIX = "spark.scheduler.listenerbus.eventqueue"
 
   private[spark] val SPARK_RESOURCES_COORDINATE =
     ConfigBuilder("spark.resources.coordinate.enable")
@@ -53,6 +54,18 @@ package object config {
       .stringConf
       .createOptional
 
+  private[spark] val RESOURCES_DISCOVERY_PLUGIN =
+    ConfigBuilder("spark.resources.discovery.plugin")
+      .doc("Comma-separated list of class names implementing" +
+        "org.apache.spark.api.resource.ResourceDiscoveryPlugin to load into the application." +
+        "This is for advanced users to replace the resource discovery class with a " +
+        "custom implementation. Spark will try each class specified until one of them " +
+        "returns the resource information for that resource. It tries the discovery " +
+        "script last if none of the plugins return information for that resource.")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
   private[spark] val DRIVER_RESOURCES_FILE =
     ConfigBuilder("spark.driver.resourcesFile")
       .internal()
@@ -106,6 +119,11 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val DRIVER_LOG_ALLOW_EC =
+    ConfigBuilder("spark.driver.log.allowErasureCoding")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val EVENT_LOG_ENABLED = ConfigBuilder("spark.eventLog.enabled")
     .booleanConf
     .createWithDefault(false)
@@ -125,7 +143,7 @@ package object config {
       .createWithDefault(false)
 
   private[spark] val EVENT_LOG_ALLOW_EC =
-    ConfigBuilder("spark.eventLog.allowErasureCoding")
+    ConfigBuilder("spark.eventLog.erasureCoding.enabled")
       .booleanConf
       .createWithDefault(false)
 
@@ -142,11 +160,8 @@ package object config {
 
   private[spark] val EVENT_LOG_STAGE_EXECUTOR_METRICS =
     ConfigBuilder("spark.eventLog.logStageExecutorMetrics.enabled")
-      .booleanConf
-      .createWithDefault(false)
-
-  private[spark] val EVENT_LOG_PROCESS_TREE_METRICS =
-    ConfigBuilder("spark.eventLog.logStageExecutorProcessTreeMetrics.enabled")
+      .doc("Whether to write per-stage peaks of executor metrics (for each executor) " +
+        "to the event log.")
       .booleanConf
       .createWithDefault(false)
 
@@ -174,6 +189,21 @@ package object config {
   private[spark] val EVENT_LOG_CALLSITE_LONG_FORM =
     ConfigBuilder("spark.eventLog.longForm.enabled").booleanConf.createWithDefault(false)
 
+  private[spark] val EVENT_LOG_ENABLE_ROLLING =
+    ConfigBuilder("spark.eventLog.rolling.enabled")
+      .doc("Whether rolling over event log files is enabled.  If set to true, it cuts down " +
+        "each event log file to the configured size.")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val EVENT_LOG_ROLLING_MAX_FILE_SIZE =
+    ConfigBuilder("spark.eventLog.rolling.maxFileSize")
+      .doc("The max size of event log file to be rolled over.")
+      .bytesConf(ByteUnit.BYTE)
+      .checkValue(_ >= ByteUnit.MiB.toBytes(10), "Max file size of event log should be " +
+        "configured to be at least 10 MiB.")
+      .createWithDefaultString("128m")
+
   private[spark] val EXECUTOR_ID =
     ConfigBuilder("spark.executor.id").stringConf.createOptional
 
@@ -194,8 +224,18 @@ package object config {
   private[spark] val EXECUTOR_HEARTBEAT_MAX_FAILURES =
     ConfigBuilder("spark.executor.heartbeat.maxFailures").internal().intConf.createWithDefault(60)
 
+  private[spark] val EXECUTOR_PROCESS_TREE_METRICS_ENABLED =
+    ConfigBuilder("spark.executor.processTreeMetrics.enabled")
+      .doc("Whether to collect process tree metrics (from the /proc filesystem) when collecting " +
+        "executor metrics.")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val EXECUTOR_METRICS_POLLING_INTERVAL =
     ConfigBuilder("spark.executor.metrics.pollingInterval")
+      .doc("How often to collect executor metrics (in milliseconds). " +
+        "If 0, the polling is done on executor heartbeats. " +
+        "If positive, the polling is done at this interval.")
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString("0")
 
@@ -243,7 +283,8 @@ package object config {
     .createWithDefault(false)
 
   private[spark] val MEMORY_OFFHEAP_SIZE = ConfigBuilder("spark.memory.offHeap.size")
-    .doc("The absolute amount of memory in bytes which can be used for off-heap allocation. " +
+    .doc("The absolute amount of memory which can be used for off-heap allocation, " +
+      " in bytes unless otherwise specified. " +
       "This setting has no impact on heap memory usage, so if your executors' total memory " +
       "consumption must fit within some hard limit then be sure to shrink your JVM heap size " +
       "accordingly. This must be set to a positive value when spark.memory.offHeap.enabled=true.")
@@ -575,6 +616,10 @@ package object config {
 
   private[spark] val LISTENER_BUS_EVENT_QUEUE_CAPACITY =
     ConfigBuilder("spark.scheduler.listenerbus.eventqueue.capacity")
+      .doc("The default capacity for event queues. Spark will try to initialize " +
+        "an event queue using capacity specified by `spark.scheduler.listenerbus" +
+        ".eventqueue.queueName.capacity` first. If it's not configured, Spark will " +
+        "use the default capacity specified by this config.")
       .intConf
       .checkValue(_ > 0, "The capacity of listener bus event queue must be positive")
       .createWithDefault(10000)
@@ -585,6 +630,23 @@ package object config {
       .intConf
       .createWithDefault(128)
 
+  private[spark] val LISTENER_BUS_LOG_SLOW_EVENT_ENABLED =
+    ConfigBuilder("spark.scheduler.listenerbus.logSlowEvent.enabled")
+      .internal()
+      .doc("When enabled, log the event that takes too much time to process. This helps us " +
+        "discover the event types that cause performance bottlenecks. The time threshold is " +
+        "controlled by spark.scheduler.listenerbus.logSlowEvent.threshold.")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val LISTENER_BUS_LOG_SLOW_EVENT_TIME_THRESHOLD =
+    ConfigBuilder("spark.scheduler.listenerbus.logSlowEvent.threshold")
+      .internal()
+      .doc("The time threshold of whether a event is considered to be taking too much time to " +
+        "process. Log the event if spark.scheduler.listenerbus.logSlowEvent.enabled is true.")
+      .timeConf(TimeUnit.NANOSECONDS)
+      .createWithDefaultString("1s")
+
   // This property sets the root namespace for metrics reporting
   private[spark] val METRICS_NAMESPACE = ConfigBuilder("spark.metrics.namespace")
     .stringConf
@@ -594,6 +656,18 @@ package object config {
     .stringConf
     .createOptional
 
+  private[spark] val METRICS_EXECUTORMETRICS_SOURCE_ENABLED =
+    ConfigBuilder("spark.metrics.executorMetricsSource.enabled")
+      .doc("Whether to register the ExecutorMetrics source with the metrics system.")
+      .booleanConf
+      .createWithDefault(true)
+
+  private[spark] val METRICS_STATIC_SOURCES_ENABLED =
+    ConfigBuilder("spark.metrics.staticSources.enabled")
+      .doc("Whether to register static sources with the metrics system.")
+      .booleanConf
+      .createWithDefault(true)
+
   private[spark] val PYSPARK_DRIVER_PYTHON = ConfigBuilder("spark.pyspark.driver.python")
     .stringConf
     .createOptional
@@ -777,6 +851,17 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val CACHE_CHECKPOINT_PREFERRED_LOCS_EXPIRE_TIME =
+    ConfigBuilder("spark.rdd.checkpoint.cachePreferredLocsExpireTime")
+      .internal()
+      .doc("Expire time in minutes for caching preferred locations of checkpointed RDD." +
+        "Caching preferred locations can relieve query loading to DFS and save the query " +
+        "time. The drawback is that the cached locations can be possibly outdated and " +
+        "lose data locality. If this config is not specified, it will not cache.")
+      .timeConf(TimeUnit.MINUTES)
+      .checkValue(_ > 0, "The expire time for caching preferred locations cannot be non-positive.")
+      .createOptional
+
   private[spark] val SHUFFLE_ACCURATE_BLOCK_THRESHOLD =
     ConfigBuilder("spark.shuffle.accurateBlockThreshold")
       .doc("Threshold in bytes above which the size of shuffle blocks in " +
@@ -810,7 +895,7 @@ package object config {
       .createWithDefault(Int.MaxValue)
 
   private[spark] val MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM =
-    ConfigBuilder("spark.maxRemoteBlockSizeFetchToMem")
+    ConfigBuilder("spark.network.maxRemoteBlockSizeFetchToMem")
       .doc("Remote block will be fetched to disk when size of the block is above this threshold " +
         "in bytes. This is to avoid a giant request takes too much memory. Note this " +
         "configuration will affect both shuffle fetch and block manager remote block fetch. " +
@@ -961,6 +1046,15 @@ package object config {
       .booleanConf
       .createWithDefault(true)
 
+  private[spark] val MAP_STATUS_COMPRESSION_CODEC =
+    ConfigBuilder("spark.shuffle.mapStatus.compression.codec")
+      .internal()
+      .doc("The codec used to compress MapStatus, which is generated by ShuffleMapTask. " +
+        "By default, Spark provides four codecs: lz4, lzf, snappy, and zstd. You can also " +
+        "use fully qualified class names to specify the codec.")
+      .stringConf
+      .createWithDefault("zstd")
+
   private[spark] val SHUFFLE_SPILL_INITIAL_MEM_THRESHOLD =
     ConfigBuilder("spark.shuffle.spill.initialMemoryThreshold")
       .internal()
@@ -1020,13 +1114,23 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val STORAGE_LOCAL_DISK_BY_EXECUTORS_CACHE_SIZE =
+    ConfigBuilder("spark.storage.localDiskByExecutors.cacheSize")
+      .doc("The max number of executors for which the local dirs are stored. This size is " +
+        "both applied for the driver and both for the executors side to avoid having an " +
+        "unbounded store. This cache will be used to avoid the network in case of fetching disk " +
+        "persisted RDD blocks or shuffle blocks (when `spark.shuffle.readHostLocalDisk.enabled` " +
+        "is set) from the same host.")
+      .intConf
+      .createWithDefault(1000)
+
   private[spark] val SHUFFLE_SYNC =
     ConfigBuilder("spark.shuffle.sync")
       .doc("Whether to force outstanding writes to disk.")
       .booleanConf
       .createWithDefault(false)
 
-  private[spark] val SHUFFLE_UNDAFE_FAST_MERGE_ENABLE =
+  private[spark] val SHUFFLE_UNSAFE_FAST_MERGE_ENABLE =
     ConfigBuilder("spark.shuffle.unsafe.fastMergeEnabled")
       .doc("Whether to perform a fast spill merge.")
       .booleanConf
@@ -1047,6 +1151,22 @@ package object config {
       .checkValue(v => v > 0, "The value should be a positive integer.")
       .createWithDefault(2000)
 
+  private[spark] val SHUFFLE_USE_OLD_FETCH_PROTOCOL =
+    ConfigBuilder("spark.shuffle.useOldFetchProtocol")
+      .doc("Whether to use the old protocol while doing the shuffle block fetching. " +
+        "It is only enabled while we need the compatibility in the scenario of new Spark " +
+        "version job fetching shuffle blocks from old version external shuffle service.")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED =
+    ConfigBuilder("spark.shuffle.readHostLocalDisk.enabled")
+      .doc(s"If enabled (and `${SHUFFLE_USE_OLD_FETCH_PROTOCOL.key}` is disabled), shuffle " +
+        "blocks requested from those block managers which are running on the same host are read " +
+        "from the disk directly instead of being fetched as remote blocks over the network.")
+      .booleanConf
+      .createWithDefault(true)
+
   private[spark] val MEMORY_MAP_LIMIT_FOR_TESTS =
     ConfigBuilder("spark.storage.memoryMapLimitForTests")
       .internal()
@@ -1119,12 +1239,13 @@ package object config {
         s"The value must be in allowed range [1,048,576, ${MAX_BUFFER_SIZE_BYTES}].")
       .createWithDefault(1024 * 1024)
 
-  private[spark] val EXECUTOR_PLUGINS =
-    ConfigBuilder("spark.executor.plugins")
-      .doc("Comma-separated list of class names for \"plugins\" implementing " +
-        "org.apache.spark.ExecutorPlugin.  Plugins have the same privileges as any task " +
-        "in a Spark executor.  They can also interfere with task execution and fail in " +
-        "unexpected ways.  So be sure to only use this for trusted plugins.")
+  private[spark] val DEFAULT_PLUGINS_LIST = "spark.plugins.defaultList"
+
+  private[spark] val PLUGINS =
+    ConfigBuilder("spark.plugins")
+      .withPrepended(DEFAULT_PLUGINS_LIST, separator = ",")
+      .doc("Comma-separated list of class names implementing " +
+        "org.apache.spark.api.plugin.SparkPlugin to load into the application.")
       .stringConf
       .toSequence
       .createWithDefault(Nil)
@@ -1229,9 +1350,9 @@ package object config {
   private[spark] val IO_WARNING_LARGEFILETHRESHOLD =
     ConfigBuilder("spark.io.warning.largeFileThreshold")
       .internal()
-      .doc("When spark loading one single large file, if file size exceed this " +
-        "threshold, then log warning with possible reasons.")
-      .longConf
+      .doc("If the size in bytes of a file loaded by Spark exceeds this threshold, " +
+        "a warning is logged with the possible reasons.")
+      .bytesConf(ByteUnit.BYTE)
       .createWithDefault(1024 * 1024 * 1024)
 
   private[spark] val EVENT_LOG_COMPRESSION_CODEC =
@@ -1397,6 +1518,19 @@ package object config {
       .doubleConf
       .createWithDefault(0.75)
 
+  private[spark] val SPECULATION_TASK_DURATION_THRESHOLD =
+    ConfigBuilder("spark.speculation.task.duration.threshold")
+      .doc("Task duration after which scheduler would try to speculative run the task. If " +
+        "provided, tasks would be speculatively run if current stage contains less tasks " +
+        "than or equal to the number of slots on a single executor and the task is taking " +
+        "longer time than the threshold. This config helps speculate stage with very few " +
+        "tasks. Regular speculation configs may also apply if the executor slots are " +
+        "large enough. E.g. tasks might be re-launched if there are enough successful runs " +
+        "even though the threshold hasn't been reached. The number of slots is computed based " +
+        "on the conf values of spark.executor.cores and spark.task.cpus minimum 1.")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createOptional
+
   private[spark] val STAGING_DIR = ConfigBuilder("spark.yarn.stagingDir")
     .doc("Staging directory used while submitting applications.")
     .stringConf
diff --git a/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala
new file mode 100644
index 0000000000000..4eda4767094ad
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.plugin
+
+import scala.collection.JavaConverters._
+import scala.util.{Either, Left, Right}
+
+import org.apache.spark.{SparkContext, SparkEnv}
+import org.apache.spark.api.plugin._
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.util.Utils
+
+sealed abstract class PluginContainer {
+
+  def shutdown(): Unit
+  def registerMetrics(appId: String): Unit
+
+}
+
+private class DriverPluginContainer(
+    sc: SparkContext,
+    resources: java.util.Map[String, ResourceInformation],
+    plugins: Seq[SparkPlugin])
+  extends PluginContainer with Logging {
+
+  private val driverPlugins: Seq[(String, DriverPlugin, PluginContextImpl)] = plugins.flatMap { p =>
+    val driverPlugin = p.driverPlugin()
+    if (driverPlugin != null) {
+      val name = p.getClass().getName()
+      val ctx = new PluginContextImpl(name, sc.env.rpcEnv, sc.env.metricsSystem, sc.conf,
+        sc.env.executorId, resources)
+
+      val extraConf = driverPlugin.init(sc, ctx)
+      if (extraConf != null) {
+        extraConf.asScala.foreach { case (k, v) =>
+          sc.conf.set(s"${PluginContainer.EXTRA_CONF_PREFIX}$name.$k", v)
+        }
+      }
+      logInfo(s"Initialized driver component for plugin $name.")
+      Some((p.getClass().getName(), driverPlugin, ctx))
+    } else {
+      None
+    }
+  }
+
+  if (driverPlugins.nonEmpty) {
+    val pluginsByName = driverPlugins.map { case (name, plugin, _) => (name, plugin) }.toMap
+    sc.env.rpcEnv.setupEndpoint(classOf[PluginEndpoint].getName(),
+      new PluginEndpoint(pluginsByName, sc.env.rpcEnv))
+  }
+
+  override def registerMetrics(appId: String): Unit = {
+    driverPlugins.foreach { case (_, plugin, ctx) =>
+      plugin.registerMetrics(appId, ctx)
+      ctx.registerMetrics()
+    }
+  }
+
+  override def shutdown(): Unit = {
+    driverPlugins.foreach { case (name, plugin, _) =>
+      try {
+        logDebug(s"Stopping plugin $name.")
+        plugin.shutdown()
+      } catch {
+        case t: Throwable =>
+          logInfo(s"Exception while shutting down plugin $name.", t)
+      }
+    }
+  }
+
+}
+
+private class ExecutorPluginContainer(
+    env: SparkEnv,
+    resources: java.util.Map[String, ResourceInformation],
+    plugins: Seq[SparkPlugin])
+  extends PluginContainer with Logging {
+
+  private val executorPlugins: Seq[(String, ExecutorPlugin)] = {
+    val allExtraConf = env.conf.getAllWithPrefix(PluginContainer.EXTRA_CONF_PREFIX)
+
+    plugins.flatMap { p =>
+      val executorPlugin = p.executorPlugin()
+      if (executorPlugin != null) {
+        val name = p.getClass().getName()
+        val prefix = name + "."
+        val extraConf = allExtraConf
+          .filter { case (k, v) => k.startsWith(prefix) }
+          .map { case (k, v) => k.substring(prefix.length()) -> v }
+          .toMap
+          .asJava
+        val ctx = new PluginContextImpl(name, env.rpcEnv, env.metricsSystem, env.conf,
+          env.executorId, resources)
+        executorPlugin.init(ctx, extraConf)
+        ctx.registerMetrics()
+
+        logInfo(s"Initialized executor component for plugin $name.")
+        Some(p.getClass().getName() -> executorPlugin)
+      } else {
+        None
+      }
+    }
+  }
+
+  override def registerMetrics(appId: String): Unit = {
+    throw new IllegalStateException("Should not be called for the executor container.")
+  }
+
+  override def shutdown(): Unit = {
+    executorPlugins.foreach { case (name, plugin) =>
+      try {
+        logDebug(s"Stopping plugin $name.")
+        plugin.shutdown()
+      } catch {
+        case t: Throwable =>
+          logInfo(s"Exception while shutting down plugin $name.", t)
+      }
+    }
+  }
+}
+
+object PluginContainer {
+
+  val EXTRA_CONF_PREFIX = "spark.plugins.internal.conf."
+
+  def apply(
+      sc: SparkContext,
+      resources: java.util.Map[String, ResourceInformation]): Option[PluginContainer] = {
+    PluginContainer(Left(sc), resources)
+  }
+
+  def apply(
+      env: SparkEnv,
+      resources: java.util.Map[String, ResourceInformation]): Option[PluginContainer] = {
+    PluginContainer(Right(env), resources)
+  }
+
+
+  private def apply(
+      ctx: Either[SparkContext, SparkEnv],
+      resources: java.util.Map[String, ResourceInformation]): Option[PluginContainer] = {
+    val conf = ctx.fold(_.conf, _.conf)
+    val plugins = Utils.loadExtensions(classOf[SparkPlugin], conf.get(PLUGINS).distinct, conf)
+    if (plugins.nonEmpty) {
+      ctx match {
+        case Left(sc) => Some(new DriverPluginContainer(sc, resources, plugins))
+        case Right(env) => Some(new ExecutorPluginContainer(env, resources, plugins))
+      }
+    } else {
+      None
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/plugin/PluginContextImpl.scala b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContextImpl.scala
new file mode 100644
index 0000000000000..ca9119409d4b9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContextImpl.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.plugin
+
+import java.util
+
+import com.codahale.metrics.MetricRegistry
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.api.plugin.PluginContext
+import org.apache.spark.internal.Logging
+import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.metrics.source.Source
+import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.rpc.RpcEnv
+import org.apache.spark.util.RpcUtils
+
+private class PluginContextImpl(
+    pluginName: String,
+    rpcEnv: RpcEnv,
+    metricsSystem: MetricsSystem,
+    override val conf: SparkConf,
+    override val executorID: String,
+    override val resources: util.Map[String, ResourceInformation])
+  extends PluginContext with Logging {
+
+  override def hostname(): String = rpcEnv.address.hostPort.split(":")(0)
+
+  private val registry = new MetricRegistry()
+
+  private lazy val driverEndpoint = try {
+    RpcUtils.makeDriverRef(classOf[PluginEndpoint].getName(), conf, rpcEnv)
+  } catch {
+    case e: Exception =>
+      logWarning(s"Failed to create driver plugin endpoint ref.", e)
+      null
+  }
+
+  override def metricRegistry(): MetricRegistry = registry
+
+  override def send(message: AnyRef): Unit = {
+    if (driverEndpoint == null) {
+      throw new IllegalStateException("Driver endpoint is not known.")
+    }
+    driverEndpoint.send(PluginMessage(pluginName, message))
+  }
+
+  override def ask(message: AnyRef): AnyRef = {
+    try {
+      if (driverEndpoint != null) {
+        driverEndpoint.askSync[AnyRef](PluginMessage(pluginName, message))
+      } else {
+        throw new IllegalStateException("Driver endpoint is not known.")
+      }
+    } catch {
+      case e: SparkException if e.getCause() != null =>
+        throw e.getCause()
+    }
+  }
+
+  def registerMetrics(): Unit = {
+    if (!registry.getMetrics().isEmpty()) {
+      val src = new PluginMetricsSource(s"plugin.$pluginName", registry)
+      metricsSystem.registerSource(src)
+    }
+  }
+
+  class PluginMetricsSource(
+      override val sourceName: String,
+      override val metricRegistry: MetricRegistry)
+    extends Source
+
+}
diff --git a/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala b/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala
new file mode 100644
index 0000000000000..9a59b6bf678f9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.plugin
+
+import org.apache.spark.api.plugin.DriverPlugin
+import org.apache.spark.internal.Logging
+import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEnv}
+
+case class PluginMessage(pluginName: String, message: AnyRef)
+
+private class PluginEndpoint(
+    plugins: Map[String, DriverPlugin],
+    override val rpcEnv: RpcEnv)
+  extends IsolatedRpcEndpoint with Logging {
+
+  override def receive: PartialFunction[Any, Unit] = {
+    case PluginMessage(pluginName, message) =>
+      plugins.get(pluginName) match {
+        case Some(plugin) =>
+          try {
+            val reply = plugin.receive(message)
+            if (reply != null) {
+              logInfo(
+                s"Plugin $pluginName returned reply for one-way message of type " +
+                s"${message.getClass().getName()}.")
+            }
+          } catch {
+            case e: Exception =>
+              logWarning(s"Error in plugin $pluginName when handling message of type " +
+              s"${message.getClass().getName()}.", e)
+          }
+
+        case None =>
+          throw new IllegalArgumentException(s"Received message for unknown plugin $pluginName.")
+      }
+  }
+
+  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+    case PluginMessage(pluginName, message) =>
+      plugins.get(pluginName) match {
+        case Some(plugin) =>
+          context.reply(plugin.receive(message))
+
+        case None =>
+          throw new IllegalArgumentException(s"Received message for unknown plugin $pluginName.")
+      }
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index adbd59c9f03b4..5205a2d568ac3 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -44,6 +44,10 @@ trait CompressionCodec {
 
   def compressedOutputStream(s: OutputStream): OutputStream
 
+  private[spark] def compressedContinuousOutputStream(s: OutputStream): OutputStream = {
+    compressedOutputStream(s)
+  }
+
   def compressedInputStream(s: InputStream): InputStream
 
   private[spark] def compressedContinuousInputStream(s: InputStream): InputStream = {
@@ -220,6 +224,12 @@ class ZStdCompressionCodec(conf: SparkConf) extends CompressionCodec {
     new BufferedOutputStream(new ZstdOutputStream(s, level), bufferSize)
   }
 
+  override private[spark] def compressedContinuousOutputStream(s: OutputStream) = {
+    // SPARK-29322: Set "closeFrameOnFlush" to 'true' to let continuous input stream not being
+    // stuck on reading open frame.
+    new BufferedOutputStream(new ZstdOutputStream(s, level).setCloseFrameOnFlush(true), bufferSize)
+  }
+
   override def compressedInputStream(s: InputStream): InputStream = {
     // Wrap the zstd input stream in a buffered input stream so that we can
     // avoid overhead excessive of JNI call while trying to uncompress small amount of data.
diff --git a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
index 50055dcd2954a..4cffbb2a5701c 100644
--- a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
+++ b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
@@ -91,7 +91,7 @@ private[memory] class ExecutionMemoryPool(
   private[memory] def acquireMemory(
       numBytes: Long,
       taskAttemptId: Long,
-      maybeGrowPool: Long => Unit = (additionalSpaceNeeded: Long) => Unit,
+      maybeGrowPool: Long => Unit = (additionalSpaceNeeded: Long) => (),
       computeMaxPoolSize: () => Long = () => poolSize): Long = lock.synchronized {
     assert(numBytes > 0, s"invalid number of bytes requested: $numBytes")
 
diff --git a/core/src/main/scala/org/apache/spark/memory/package.scala b/core/src/main/scala/org/apache/spark/memory/package.scala
index 7f782193f246f..5909cb20b8e9c 100644
--- a/core/src/main/scala/org/apache/spark/memory/package.scala
+++ b/core/src/main/scala/org/apache/spark/memory/package.scala
@@ -41,23 +41,26 @@ package org.apache.spark
  * Diagrammatically:
  *
  * {{{
- *       +-------------+
- *       | MemConsumer |----+                                   +------------------------+
- *       +-------------+    |    +-------------------+          |     MemoryManager      |
- *                          +--->| TaskMemoryManager |----+     |                        |
- *       +-------------+    |    +-------------------+    |     |  +------------------+  |
- *       | MemConsumer |----+                             |     |  |  StorageMemPool  |  |
- *       +-------------+         +-------------------+    |     |  +------------------+  |
- *                               | TaskMemoryManager |----+     |                        |
- *                               +-------------------+    |     |  +------------------+  |
- *                                                        +---->|  |OnHeapExecMemPool |  |
- *                                        *               |     |  +------------------+  |
- *                                        *               |     |                        |
- *       +-------------+                  *               |     |  +------------------+  |
- *       | MemConsumer |----+                             |     |  |OffHeapExecMemPool|  |
- *       +-------------+    |    +-------------------+    |     |  +------------------+  |
- *                          +--->| TaskMemoryManager |----+     |                        |
- *                               +-------------------+          +------------------------+
+ *                                                              +---------------------------+
+ *       +-------------+                                        |       MemoryManager       |
+ *       | MemConsumer |----+                                   |                           |
+ *       +-------------+    |    +-------------------+          |  +---------------------+  |
+ *                          +--->| TaskMemoryManager |----+     |  |OnHeapStorageMemPool |  |
+ *       +-------------+    |    +-------------------+    |     |  +---------------------+  |
+ *       | MemConsumer |----+                             |     |                           |
+ *       +-------------+         +-------------------+    |     |  +---------------------+  |
+ *                               | TaskMemoryManager |----+     |  |OffHeapStorageMemPool|  |
+ *                               +-------------------+    |     |  +---------------------+  |
+ *                                                        +---->|                           |
+ *                                        *               |     |  +---------------------+  |
+ *                                        *               |     |  |OnHeapExecMemPool    |  |
+ *       +-------------+                  *               |     |  +---------------------+  |
+ *       | MemConsumer |----+                             |     |                           |
+ *       +-------------+    |    +-------------------+    |     |  +---------------------+  |
+ *                          +--->| TaskMemoryManager |----+     |  |OffHeapExecMemPool   |  |
+ *                               +-------------------+          |  +---------------------+  |
+ *                                                              |                           |
+ *                                                              +---------------------------+
  * }}}
  *
  *
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
index b6be8aaefd351..d98d5e3b81aa0 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -38,7 +38,7 @@ private[spark] class MetricsConfig(conf: SparkConf) extends Logging {
   private[metrics] val properties = new Properties()
   private[metrics] var perInstanceSubProperties: mutable.HashMap[String, Properties] = null
 
-  private def setDefaultProperties(prop: Properties) {
+  private def setDefaultProperties(prop: Properties): Unit = {
     prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
     prop.setProperty("*.sink.servlet.path", "/metrics/json")
     prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
@@ -49,7 +49,7 @@ private[spark] class MetricsConfig(conf: SparkConf) extends Logging {
    * Load properties from various places, based on precedence
    * If the same property is set again latter on in the method, it overwrites the previous value
    */
-  def initialize() {
+  def initialize(): Unit = {
     // Add default properties in case there's no properties file
     setDefaultProperties(properties)
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index c96640a6fab3f..57dcbe501c6dd 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -28,7 +28,7 @@ import org.eclipse.jetty.servlet.ServletContextHandler
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.metrics.sink.{MetricsServlet, Sink}
+import org.apache.spark.metrics.sink.{MetricsServlet, PrometheusServlet, Sink}
 import org.apache.spark.metrics.source.{Source, StaticSources}
 import org.apache.spark.util.Utils
 
@@ -83,18 +83,20 @@ private[spark] class MetricsSystem private (
 
   // Treat MetricsServlet as a special sink as it should be exposed to add handlers to web ui
   private var metricsServlet: Option[MetricsServlet] = None
+  private var prometheusServlet: Option[PrometheusServlet] = None
 
   /**
    * Get any UI handlers used by this metrics system; can only be called after start().
    */
   def getServletHandlers: Array[ServletContextHandler] = {
     require(running, "Can only call getServletHandlers on a running MetricsSystem")
-    metricsServlet.map(_.getHandlers(conf)).getOrElse(Array())
+    metricsServlet.map(_.getHandlers(conf)).getOrElse(Array()) ++
+      prometheusServlet.map(_.getHandlers(conf)).getOrElse(Array())
   }
 
   metricsConfig.initialize()
 
-  def start(registerStaticSources: Boolean = true) {
+  def start(registerStaticSources: Boolean = true): Unit = {
     require(!running, "Attempting to start a MetricsSystem that is already running")
     running = true
     if (registerStaticSources) {
@@ -105,16 +107,17 @@ private[spark] class MetricsSystem private (
     sinks.foreach(_.start)
   }
 
-  def stop() {
+  def stop(): Unit = {
     if (running) {
       sinks.foreach(_.stop)
+      registry.removeMatching((_: String, _: Metric) => true)
     } else {
       logWarning("Stopping a MetricsSystem that is not running")
     }
     running = false
   }
 
-  def report() {
+  def report(): Unit = {
     sinks.foreach(_.report())
   }
 
@@ -124,7 +127,7 @@ private[spark] class MetricsSystem private (
    * If either ID is not available, this defaults to just using <source name>.
    *
    * @param source Metric source to be named by this method.
-   * @return An unique metric name for each combination of
+   * @return A unique metric name for each combination of
    *         application, executor/driver and metric source.
    */
   private[spark] def buildRegistryName(source: Source): String = {
@@ -155,7 +158,7 @@ private[spark] class MetricsSystem private (
   def getSourcesByName(sourceName: String): Seq[Source] =
     sources.filter(_.sourceName == sourceName)
 
-  def registerSource(source: Source) {
+  def registerSource(source: Source): Unit = {
     sources += source
     try {
       val regName = buildRegistryName(source)
@@ -165,13 +168,13 @@ private[spark] class MetricsSystem private (
     }
   }
 
-  def removeSource(source: Source) {
+  def removeSource(source: Source): Unit = {
     sources -= source
     val regName = buildRegistryName(source)
     registry.removeMatching((name: String, _: Metric) => name.startsWith(regName))
   }
 
-  private def registerSources() {
+  private def registerSources(): Unit = {
     val instConfig = metricsConfig.getInstance(instance)
     val sourceConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SOURCE_REGEX)
 
@@ -187,7 +190,7 @@ private[spark] class MetricsSystem private (
     }
   }
 
-  private def registerSinks() {
+  private def registerSinks(): Unit = {
     val instConfig = metricsConfig.getInstance(instance)
     val sinkConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SINK_REGEX)
 
@@ -201,6 +204,12 @@ private[spark] class MetricsSystem private (
                 classOf[Properties], classOf[MetricRegistry], classOf[SecurityManager])
               .newInstance(kv._2, registry, securityMgr)
             metricsServlet = Some(servlet)
+          } else if (kv._1 == "prometheusServlet") {
+            val servlet = Utils.classForName[PrometheusServlet](classPath)
+              .getConstructor(
+                classOf[Properties], classOf[MetricRegistry], classOf[SecurityManager])
+              .newInstance(kv._2, registry, securityMgr)
+            prometheusServlet = Some(servlet)
           } else {
             val sink = Utils.classForName[Sink](classPath)
               .getConstructor(
@@ -225,7 +234,7 @@ private[spark] object MetricsSystem {
   private[this] val MINIMAL_POLL_UNIT = TimeUnit.SECONDS
   private[this] val MINIMAL_POLL_PERIOD = 1
 
-  def checkMinimalPollingPeriod(pollUnit: TimeUnit, pollPeriod: Int) {
+  def checkMinimalPollingPeriod(pollUnit: TimeUnit, pollPeriod: Int): Unit = {
     val period = MINIMAL_POLL_UNIT.convert(pollPeriod, pollUnit)
     if (period < MINIMAL_POLL_PERIOD) {
       throw new IllegalArgumentException("Polling period " + pollPeriod + " " + pollUnit +
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
index fce556fd0382c..bfd23168e4003 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
@@ -50,15 +50,15 @@ private[spark] class ConsoleSink(val property: Properties, val registry: MetricR
       .convertRatesTo(TimeUnit.SECONDS)
       .build()
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() {
+  override def report(): Unit = {
     reporter.report()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
index 88bba2fdbd1c6..579b8e0c0e984 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
@@ -59,15 +59,15 @@ private[spark] class CsvSink(val property: Properties, val registry: MetricRegis
       .convertRatesTo(TimeUnit.SECONDS)
       .build(new File(pollDir))
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() {
+  override def report(): Unit = {
     reporter.report()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
index 05d553ed30ff0..6ce64cd3543fe 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
@@ -89,15 +89,15 @@ private[spark] class GraphiteSink(val property: Properties, val registry: Metric
       .filter(filter)
       .build(graphite)
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() {
+  override def report(): Unit = {
     reporter.report()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
index 1992b42ac7f6b..a7b7b5573cfe8 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
@@ -19,7 +19,8 @@ package org.apache.spark.metrics.sink
 
 import java.util.Properties
 
-import com.codahale.metrics.{JmxReporter, MetricRegistry}
+import com.codahale.metrics.MetricRegistry
+import com.codahale.metrics.jmx.JmxReporter
 
 import org.apache.spark.SecurityManager
 
@@ -28,14 +29,14 @@ private[spark] class JmxSink(val property: Properties, val registry: MetricRegis
 
   val reporter: JmxReporter = JmxReporter.forRegistry(registry).build()
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start()
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() { }
+  override def report(): Unit = { }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
index bea24ca7807e4..7dd27d4fb9bf3 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
@@ -59,9 +59,9 @@ private[spark] class MetricsServlet(
     mapper.writeValueAsString(registry)
   }
 
-  override def start() { }
+  override def start(): Unit = { }
 
-  override def stop() { }
+  override def stop(): Unit = { }
 
-  override def report() { }
+  override def report(): Unit = { }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
new file mode 100644
index 0000000000000..7c33bce78378d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import java.util.Properties
+import javax.servlet.http.HttpServletRequest
+
+import com.codahale.metrics.MetricRegistry
+import org.eclipse.jetty.servlet.ServletContextHandler
+
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.ui.JettyUtils._
+
+/**
+ * This exposes the metrics of the given registry with Prometheus format.
+ *
+ * The output is consistent with /metrics/json result in terms of item ordering
+ * and with the previous result of Spark JMX Sink + Prometheus JMX Converter combination
+ * in terms of key string format.
+ */
+private[spark] class PrometheusServlet(
+    val property: Properties,
+    val registry: MetricRegistry,
+    securityMgr: SecurityManager)
+  extends Sink {
+
+  val SERVLET_KEY_PATH = "path"
+
+  val servletPath = property.getProperty(SERVLET_KEY_PATH)
+
+  def getHandlers(conf: SparkConf): Array[ServletContextHandler] = {
+    Array[ServletContextHandler](
+      createServletHandler(servletPath,
+        new ServletParams(request => getMetricsSnapshot(request), "text/plain"), conf)
+    )
+  }
+
+  def getMetricsSnapshot(request: HttpServletRequest): String = {
+    import scala.collection.JavaConverters._
+
+    val sb = new StringBuilder()
+    registry.getGauges.asScala.foreach { case (k, v) =>
+      if (!v.getValue.isInstanceOf[String]) {
+        sb.append(s"${normalizeKey(k)}Value ${v.getValue}\n")
+      }
+    }
+    registry.getCounters.asScala.foreach { case (k, v) =>
+      sb.append(s"${normalizeKey(k)}Count ${v.getCount}\n")
+    }
+    registry.getHistograms.asScala.foreach { case (k, h) =>
+      val snapshot = h.getSnapshot
+      val prefix = normalizeKey(k)
+      sb.append(s"${prefix}Count ${h.getCount}\n")
+      sb.append(s"${prefix}Max ${snapshot.getMax}\n")
+      sb.append(s"${prefix}Mean ${snapshot.getMean}\n")
+      sb.append(s"${prefix}Min ${snapshot.getMin}\n")
+      sb.append(s"${prefix}50thPercentile ${snapshot.getMedian}\n")
+      sb.append(s"${prefix}75thPercentile ${snapshot.get75thPercentile}\n")
+      sb.append(s"${prefix}95thPercentile ${snapshot.get95thPercentile}\n")
+      sb.append(s"${prefix}98thPercentile ${snapshot.get98thPercentile}\n")
+      sb.append(s"${prefix}99thPercentile ${snapshot.get99thPercentile}\n")
+      sb.append(s"${prefix}999thPercentile ${snapshot.get999thPercentile}\n")
+      sb.append(s"${prefix}StdDev ${snapshot.getStdDev}\n")
+    }
+    registry.getMeters.entrySet.iterator.asScala.foreach { kv =>
+      val prefix = normalizeKey(kv.getKey)
+      val meter = kv.getValue
+      sb.append(s"${prefix}Count ${meter.getCount}\n")
+      sb.append(s"${prefix}MeanRate ${meter.getMeanRate}\n")
+      sb.append(s"${prefix}OneMinuteRate ${meter.getOneMinuteRate}\n")
+      sb.append(s"${prefix}FiveMinuteRate ${meter.getFiveMinuteRate}\n")
+      sb.append(s"${prefix}FifteenMinuteRate ${meter.getFifteenMinuteRate}\n")
+    }
+    registry.getTimers.entrySet.iterator.asScala.foreach { kv =>
+      val prefix = normalizeKey(kv.getKey)
+      val timer = kv.getValue
+      val snapshot = timer.getSnapshot
+      sb.append(s"${prefix}Count ${timer.getCount}\n")
+      sb.append(s"${prefix}Max ${snapshot.getMax}\n")
+      sb.append(s"${prefix}Mean ${snapshot.getMax}\n")
+      sb.append(s"${prefix}Min ${snapshot.getMin}\n")
+      sb.append(s"${prefix}50thPercentile ${snapshot.getMedian}\n")
+      sb.append(s"${prefix}75thPercentile ${snapshot.get75thPercentile}\n")
+      sb.append(s"${prefix}95thPercentile ${snapshot.get95thPercentile}\n")
+      sb.append(s"${prefix}98thPercentile ${snapshot.get98thPercentile}\n")
+      sb.append(s"${prefix}99thPercentile ${snapshot.get99thPercentile}\n")
+      sb.append(s"${prefix}999thPercentile ${snapshot.get999thPercentile}\n")
+      sb.append(s"${prefix}StdDev ${snapshot.getStdDev}\n")
+      sb.append(s"${prefix}FifteenMinuteRate ${timer.getFifteenMinuteRate}\n")
+      sb.append(s"${prefix}FiveMinuteRate ${timer.getFiveMinuteRate}\n")
+      sb.append(s"${prefix}OneMinuteRate ${timer.getOneMinuteRate}\n")
+      sb.append(s"${prefix}MeanRate ${timer.getMeanRate}\n")
+    }
+    sb.toString()
+  }
+
+  private def normalizeKey(key: String): String = {
+    s"metrics_${key.replaceAll("[^a-zA-Z0-9]", "_")}_"
+  }
+
+  override def start(): Unit = { }
+
+  override def stop(): Unit = { }
+
+  override def report(): Unit = { }
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala
index 7fa4ba7622980..968d5ca809e72 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala
@@ -53,15 +53,15 @@ private[spark] class Slf4jSink(
     .convertRatesTo(TimeUnit.SECONDS)
     .build()
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() {
+  override def report(): Unit = {
     reporter.report()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
index 4993519aa3843..0bd5774b632bf 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
@@ -22,16 +22,22 @@ import scala.reflect.ClassTag
 import org.apache.spark.TaskContext
 import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.client.StreamCallbackWithID
-import org.apache.spark.storage.{BlockId, StorageLevel}
+import org.apache.spark.storage.{BlockId, ShuffleBlockId, StorageLevel}
 
 private[spark]
 trait BlockDataManager {
 
+  /**
+   * Interface to get host-local shuffle block data. Throws an exception if the block cannot be
+   * found or cannot be read successfully.
+   */
+  def getHostLocalShuffleData(blockId: BlockId, dirs: Array[String]): ManagedBuffer
+
   /**
    * Interface to get local block data. Throws an exception if the block cannot be found or
    * cannot be read successfully.
    */
-  def getBlockData(blockId: BlockId): ManagedBuffer
+  def getLocalBlockData(blockId: BlockId): ManagedBuffer
 
   /**
    * Put the block locally, using the given storage level.
@@ -57,7 +63,7 @@ trait BlockDataManager {
       classTag: ClassTag[_]): StreamCallbackWithID
 
   /**
-   * Release locks acquired by [[putBlockData()]] and [[getBlockData()]].
+   * Release locks acquired by [[putBlockData()]] and [[getLocalBlockData()]].
    */
   def releaseLock(blockId: BlockId, taskContext: Option[TaskContext]): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index b2ab31488e4c1..b3904f3362e8e 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -29,7 +29,7 @@ import org.apache.spark.network.client.{RpcResponseCallback, StreamCallbackWithI
 import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
 import org.apache.spark.network.shuffle.protocol._
 import org.apache.spark.serializer.Serializer
-import org.apache.spark.storage.{BlockId, ShuffleBlockId, StorageLevel}
+import org.apache.spark.storage.{BlockId, ShuffleBlockBatchId, ShuffleBlockId, StorageLevel}
 
 /**
  * Serves requests to open blocks by simply registering one chunk per block requested.
@@ -56,8 +56,12 @@ class NettyBlockRpcServer(
     message match {
       case openBlocks: OpenBlocks =>
         val blocksNum = openBlocks.blockIds.length
-        val blocks = for (i <- (0 until blocksNum).view)
-          yield blockManager.getBlockData(BlockId.apply(openBlocks.blockIds(i)))
+        val blocks = (0 until blocksNum).map { i =>
+          val blockId = BlockId.apply(openBlocks.blockIds(i))
+          assert(!blockId.isInstanceOf[ShuffleBlockBatchId],
+            "Continuous shuffle block fetching only works for new fetch protocol.")
+          blockManager.getLocalBlockData(blockId)
+        }
         val streamId = streamManager.registerStream(appId, blocks.iterator.asJava,
           client.getChannel)
         logTrace(s"Registered streamId $streamId with $blocksNum buffers")
@@ -65,12 +69,29 @@ class NettyBlockRpcServer(
 
       case fetchShuffleBlocks: FetchShuffleBlocks =>
         val blocks = fetchShuffleBlocks.mapIds.zipWithIndex.flatMap { case (mapId, index) =>
-          fetchShuffleBlocks.reduceIds.apply(index).map { reduceId =>
-            blockManager.getBlockData(
-              ShuffleBlockId(fetchShuffleBlocks.shuffleId, mapId, reduceId))
+          if (!fetchShuffleBlocks.batchFetchEnabled) {
+            fetchShuffleBlocks.reduceIds(index).map { reduceId =>
+              blockManager.getLocalBlockData(
+                ShuffleBlockId(fetchShuffleBlocks.shuffleId, mapId, reduceId))
+            }
+          } else {
+            val startAndEndId = fetchShuffleBlocks.reduceIds(index)
+            if (startAndEndId.length != 2) {
+              throw new IllegalStateException(s"Invalid shuffle fetch request when batch mode " +
+                s"is enabled: $fetchShuffleBlocks")
+            }
+            Array(blockManager.getLocalBlockData(
+              ShuffleBlockBatchId(
+                fetchShuffleBlocks.shuffleId, mapId, startAndEndId(0), startAndEndId(1))))
           }
         }
-        val numBlockIds = fetchShuffleBlocks.reduceIds.map(_.length).sum
+
+        val numBlockIds = if (fetchShuffleBlocks.batchFetchEnabled) {
+          fetchShuffleBlocks.mapIds.length
+        } else {
+          fetchShuffleBlocks.reduceIds.map(_.length).sum
+        }
+
         val streamId = streamManager.registerStream(appId, blocks.iterator.asJava,
           client.getChannel)
         logTrace(s"Registered streamId $streamId with $numBlockIds buffers")
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 1d27fe7db193f..ffb696029a033 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -116,7 +116,8 @@ private[spark] class NettyBlockTransferService(
     logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
     try {
       val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
-        override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
+        override def createAndStart(blockIds: Array[String],
+            listener: BlockFetchingListener): Unit = {
           try {
             val client = clientFactory.createClient(host, port)
             new OneForOneBlockFetcher(client, appId, execId, blockIds, listener,
diff --git a/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
index b089bbd7e972e..34c04f4025a96 100644
--- a/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
+++ b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
@@ -43,7 +43,7 @@ private[spark] class ApproximateActionListener[T, U, R](
   var failure: Option[Exception] = None             // Set if the job has failed (permanently)
   var resultObject: Option[PartialResult[R]] = None // Set if we've already returned a PartialResult
 
-  override def taskSucceeded(index: Int, result: Any) {
+  override def taskSucceeded(index: Int, result: Any): Unit = {
     synchronized {
       evaluator.merge(index, result.asInstanceOf[U])
       finishedTasks += 1
@@ -56,7 +56,7 @@ private[spark] class ApproximateActionListener[T, U, R](
     }
   }
 
-  override def jobFailed(exception: Exception) {
+  override def jobFailed(exception: Exception): Unit = {
     synchronized {
       failure = Some(exception)
       this.notifyAll()
diff --git a/core/src/main/scala/org/apache/spark/partial/PartialResult.scala b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
index 25cb7490aa9c9..012d4769617f6 100644
--- a/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
+++ b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
@@ -61,7 +61,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
    * Set a handler to be called if this PartialResult's job fails. Only one failure handler
    * is supported per PartialResult.
    */
-  def onFail(handler: Exception => Unit) {
+  def onFail(handler: Exception => Unit): Unit = {
     synchronized {
       if (failureHandler.isDefined) {
         throw new UnsupportedOperationException("onFail cannot be called twice")
@@ -85,7 +85,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
        override def onComplete(handler: T => Unit): PartialResult[T] = synchronized {
          PartialResult.this.onComplete(handler.compose(f)).map(f)
        }
-      override def onFail(handler: Exception => Unit) {
+      override def onFail(handler: Exception => Unit): Unit = {
         synchronized {
           PartialResult.this.onFail(handler)
         }
@@ -100,7 +100,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
     }
   }
 
-  private[spark] def setFinalValue(value: R) {
+  private[spark] def setFinalValue(value: R): Unit = {
     synchronized {
       if (finalValue.isDefined) {
         throw new UnsupportedOperationException("setFinalValue called twice on a PartialResult")
@@ -115,7 +115,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
 
   private def getFinalValueInternal() = finalValue
 
-  private[spark] def setFailure(exception: Exception) {
+  private[spark] def setFailure(exception: Exception): Unit = {
     synchronized {
       if (failure.isDefined) {
         throw new UnsupportedOperationException("setFailure called twice on a PartialResult")
diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
index ba9dae4ad48ec..d6379156ccf72 100644
--- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -109,7 +109,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
           (it: Iterator[T]) => it.take(left).toArray,
           p,
           (index: Int, data: Array[T]) => buf(index) = data,
-          Unit)
+          ())
         job.flatMap { _ =>
           buf.foreach(results ++= _.take(num - results.size))
           continue(partsScanned + p.size)
@@ -125,7 +125,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
   def foreachAsync(f: T => Unit): FutureAction[Unit] = self.withScope {
     val cleanF = self.context.clean(f)
     self.context.submitJob[T, Unit, Unit](self, _.foreach(cleanF), Range(0, self.partitions.length),
-      (index, data) => Unit, Unit)
+      (index, data) => (), ())
   }
 
   /**
@@ -133,7 +133,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
    */
   def foreachPartitionAsync(f: Iterator[T] => Unit): FutureAction[Unit] = self.withScope {
     self.context.submitJob[T, Unit, Unit](self, f, Range(0, self.partitions.length),
-      (index, data) => Unit, Unit)
+      (index, data) => (), ())
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
index 23cf19d55b4ae..a5c3e2a2dfe2a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
@@ -61,7 +61,7 @@ class BlockRDD[T: ClassTag](sc: SparkContext, @transient val blockIds: Array[Blo
    * irreversible operation, as the data in the blocks cannot be recovered back
    * once removed. Use it with caution.
    */
-  private[spark] def removeBlocks() {
+  private[spark] def removeBlocks(): Unit = {
     blockIds.foreach { blockId =>
       sparkContext.env.blockManager.master.removeBlock(blockId)
     }
@@ -77,7 +77,7 @@ class BlockRDD[T: ClassTag](sc: SparkContext, @transient val blockIds: Array[Blo
   }
 
   /** Check if this BlockRDD is valid. If not valid, exception is thrown. */
-  private[spark] def assertValid() {
+  private[spark] def assertValid(): Unit = {
     if (!isValid) {
       throw new SparkException(
         "Attempted to use %s after its blocks have been removed!".format(toString))
diff --git a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
index 57108dcedcf0c..fddd35b657479 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
@@ -85,7 +85,7 @@ class CartesianRDD[T: ClassTag, U: ClassTag](
     }
   )
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 909f58512153b..500d306f336ac 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -187,7 +187,7 @@ class CoGroupedRDD[K: ClassTag](
       createCombiner, mergeValue, mergeCombiners)
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdds = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index 55c141c2b8a0a..58a0c0c400e09 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -107,7 +107,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
     })
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     prev = null
   }
@@ -239,7 +239,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
    * locations (2 * n log(n))
    * @param targetLen The number of desired partition groups
    */
-  def setupGroups(targetLen: Int, partitionLocs: PartitionLocations) {
+  def setupGroups(targetLen: Int, partitionLocs: PartitionLocations): Unit = {
     // deal with empty case, just create targetLen partition groups with no preferred location
     if (partitionLocs.partsWithLocs.isEmpty) {
       (1 to targetLen).foreach(_ => groupArr += new PartitionGroup())
@@ -328,7 +328,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
   def throwBalls(
       maxPartitions: Int,
       prev: RDD[_],
-      balanceSlack: Double, partitionLocs: PartitionLocations) {
+      balanceSlack: Double, partitionLocs: PartitionLocations): Unit = {
     if (noLocality) {  // no preferredLocations in parent RDD, no randomization needed
       if (maxPartitions > groupArr.size) { // just return prev.partitions
         for ((p, i) <- prev.partitions.zipWithIndex) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index f3f9be3562922..9742d12cfe01e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -375,7 +375,7 @@ class HadoopRDD[K, V](
     locs.getOrElse(hsplit.getLocations.filter(_ != "localhost"))
   }
 
-  override def checkpoint() {
+  override def checkpoint(): Unit = {
     // Do nothing. Hadoop RDD should not be checkpointed.
   }
 
@@ -405,14 +405,14 @@ private[spark] object HadoopRDD extends Logging {
    * The three methods below are helpers for accessing the local map, a property of the SparkEnv of
    * the local process.
    */
-  def getCachedMetadata(key: String): Any = SparkEnv.get.hadoopJobMetadata.get(key)
+  def getCachedMetadata(key: String): AnyRef = SparkEnv.get.hadoopJobMetadata.get(key)
 
-  private def putCachedMetadata(key: String, value: Any): Unit =
+  private def putCachedMetadata(key: String, value: AnyRef): Unit =
     SparkEnv.get.hadoopJobMetadata.put(key, value)
 
   /** Add Hadoop configuration specific to a single partition and attempt. */
   def addLocalConfiguration(jobTrackerId: String, jobId: Int, splitId: Int, attemptId: Int,
-                            conf: JobConf) {
+                            conf: JobConf): Unit = {
     val jobID = new JobID(jobTrackerId, jobId)
     val taId = new TaskAttemptID(new TaskID(jobID, TaskType.MAP, splitId), attemptId)
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala
index bfe8152d4dee2..1beb085db27d9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala
@@ -76,7 +76,7 @@ private[spark] object InputFileBlockHolder {
   def set(filePath: String, startOffset: Long, length: Long): Unit = {
     require(filePath != null, "filePath cannot be null")
     require(startOffset >= 0, s"startOffset ($startOffset) cannot be negative")
-    require(length >= 0, s"length ($length) cannot be negative")
+    require(length >= -1, s"length ($length) cannot be smaller than -1")
     inputBlock.get().set(new FileBlock(UTF8String.fromString(filePath), startOffset, length))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index 56ef3e107a980..fccabcdd169c6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -109,7 +109,7 @@ class JdbcRDD[T: ClassTag](
       }
     }
 
-    override def close() {
+    override def close(): Unit = {
       try {
         if (null != rs) {
           rs.close()
diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
index aa61997122cf4..39520a9734b06 100644
--- a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
@@ -51,7 +51,7 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
   override def compute(split: Partition, context: TaskContext): Iterator[U] =
     f(context, split.index, firstParent[T].iterator(split, context))
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     prev = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index e23133682360f..1e39e10856877 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -261,7 +261,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     } else {
       StratifiedSamplingUtils.getBernoulliSamplingFunction(self, fractions, false, seed)
     }
-    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true)
+    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true, isOrderSensitive = true)
   }
 
   /**
@@ -291,7 +291,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     } else {
       StratifiedSamplingUtils.getBernoulliSamplingFunction(self, fractions, true, seed)
     }
-    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true)
+    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true, isOrderSensitive = true)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
index d744d67592545..965618ee827d1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
@@ -101,7 +101,7 @@ class PartitionerAwareUnionRDD[T: ClassTag](
     }
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdds = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
index 15691a8fc8eaa..c8cdaa60e4335 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
@@ -67,4 +67,12 @@ private[spark] class PartitionwiseSampledRDD[T: ClassTag, U: ClassTag](
     thisSampler.setSeed(split.seed)
     thisSampler.sample(firstParent[T].iterator(split.prev, context))
   }
+
+  override protected def getOutputDeterministicLevel = {
+    if (prev.outputDeterministicLevel == DeterministicLevel.UNORDERED) {
+      DeterministicLevel.INDETERMINATE
+    } else {
+      super.getOutputDeterministicLevel
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index eafe3b17c2136..a26b5791fa08b 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -225,10 +225,24 @@ abstract class RDD[T: ClassTag](
   /** Get the RDD's current storage level, or StorageLevel.NONE if none is set. */
   def getStorageLevel: StorageLevel = storageLevel
 
+  /**
+   * Lock for all mutable state of this RDD (persistence, partitions, dependencies, etc.).  We do
+   * not use `this` because RDDs are user-visible, so users might have added their own locking on
+   * RDDs; sharing that could lead to a deadlock.
+   *
+   * One thread might hold the lock on many of these, for a chain of RDD dependencies; but
+   * because DAGs are acyclic, and we only ever hold locks for one path in that DAG, there is no
+   * chance of deadlock.
+   *
+   * Executors may reference the shared fields (though they should never mutate them,
+   * that only happens on the driver).
+   */
+  private val stateLock = new Serializable {}
+
   // Our dependencies and partitions will be gotten by calling subclass's methods below, and will
   // be overwritten when we're checkpointed
-  private var dependencies_ : Seq[Dependency[_]] = _
-  @transient private var partitions_ : Array[Partition] = _
+  @volatile private var dependencies_ : Seq[Dependency[_]] = _
+  @volatile @transient private var partitions_ : Array[Partition] = _
 
   /** An Option holding our checkpoint RDD, if we are checkpointed */
   private def checkpointRDD: Option[CheckpointRDD[T]] = checkpointData.flatMap(_.checkpointRDD)
@@ -240,7 +254,11 @@ abstract class RDD[T: ClassTag](
   final def dependencies: Seq[Dependency[_]] = {
     checkpointRDD.map(r => List(new OneToOneDependency(r))).getOrElse {
       if (dependencies_ == null) {
-        dependencies_ = getDependencies
+        stateLock.synchronized {
+          if (dependencies_ == null) {
+            dependencies_ = getDependencies
+          }
+        }
       }
       dependencies_
     }
@@ -253,10 +271,14 @@ abstract class RDD[T: ClassTag](
   final def partitions: Array[Partition] = {
     checkpointRDD.map(_.partitions).getOrElse {
       if (partitions_ == null) {
-        partitions_ = getPartitions
-        partitions_.zipWithIndex.foreach { case (partition, index) =>
-          require(partition.index == index,
-            s"partitions($index).partition == ${partition.index}, but it should equal $index")
+        stateLock.synchronized {
+          if (partitions_ == null) {
+            partitions_ = getPartitions
+            partitions_.zipWithIndex.foreach { case (partition, index) =>
+              require(partition.index == index,
+                s"partitions($index).partition == ${partition.index}, but it should equal $index")
+            }
+          }
         }
       }
       partitions_
@@ -339,6 +361,7 @@ abstract class RDD[T: ClassTag](
       readCachedBlock = false
       computeOrReadCheckpoint(partition, context)
     }) match {
+      // Block hit.
       case Left(blockResult) =>
         if (readCachedBlock) {
           val existingMetrics = context.taskMetrics().inputMetrics
@@ -352,6 +375,7 @@ abstract class RDD[T: ClassTag](
         } else {
           new InterruptibleIterator(context, blockResult.data.asInstanceOf[Iterator[T]])
         }
+      // Need to compute the block.
       case Right(iter) =>
         new InterruptibleIterator(context, iter.asInstanceOf[Iterator[T]])
     }
@@ -430,8 +454,6 @@ abstract class RDD[T: ClassTag](
    *
    * If you are decreasing the number of partitions in this RDD, consider using `coalesce`,
    * which can avoid performing a shuffle.
-   *
-   * TODO Fix the Shuffle+Repartition data loss issue described in SPARK-23207.
    */
   def repartition(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] = withScope {
     coalesce(numPartitions, shuffle = true)
@@ -557,7 +579,7 @@ abstract class RDD[T: ClassTag](
       val sampler = new BernoulliCellSampler[T](lb, ub)
       sampler.setSeed(seed + index)
       sampler.sample(partition)
-    }, preservesPartitioning = true)
+    }, isOrderSensitive = true, preservesPartitioning = true)
   }
 
   /**
@@ -870,6 +892,29 @@ abstract class RDD[T: ClassTag](
       preservesPartitioning)
   }
 
+  /**
+   * Return a new RDD by applying a function to each partition of this RDD, while tracking the index
+   * of the original partition.
+   *
+   * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
+   * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
+   *
+   * `isOrderSensitive` indicates whether the function is order-sensitive. If it is order
+   * sensitive, it may return totally different result when the input order
+   * is changed. Mostly stateful functions are order-sensitive.
+   */
+  private[spark] def mapPartitionsWithIndex[U: ClassTag](
+      f: (Int, Iterator[T]) => Iterator[U],
+      preservesPartitioning: Boolean,
+      isOrderSensitive: Boolean): RDD[U] = withScope {
+    val cleanedF = sc.clean(f)
+    new MapPartitionsRDD(
+      this,
+      (_: TaskContext, index: Int, iter: Iterator[T]) => cleanedF(index, iter),
+      preservesPartitioning,
+      isOrderSensitive = isOrderSensitive)
+  }
+
   /**
    * Zips this RDD with another one, returning key-value pairs with the first element in each RDD,
    * second element in each RDD, etc. Assumes that the two RDDs have the *same number of
@@ -1767,7 +1812,7 @@ abstract class RDD[T: ClassTag](
    * Changes the dependencies of this RDD from its original parents to a new RDD (`newRDD`)
    * created from the checkpoint file, and forget its old dependencies and partitions.
    */
-  private[spark] def markCheckpointed(): Unit = {
+  private[spark] def markCheckpointed(): Unit = stateLock.synchronized {
     clearDependencies()
     partitions_ = null
     deps = null    // Forget the constructor argument for dependencies too
@@ -1779,7 +1824,7 @@ abstract class RDD[T: ClassTag](
    * collected. Subclasses of RDD may override this method for implementing their own cleaning
    * logic. See [[org.apache.spark.rdd.UnionRDD]] for an example.
    */
-  protected def clearDependencies(): Unit = {
+  protected def clearDependencies(): Unit = stateLock.synchronized {
     dependencies_ = null
   }
 
@@ -1938,6 +1983,7 @@ abstract class RDD[T: ClassTag](
       deterministicLevelCandidates.maxBy(_.id)
     }
   }
+
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDBarrier.scala b/core/src/main/scala/org/apache/spark/rdd/RDDBarrier.scala
index 42802f7113a19..b70ea0073c9a0 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDDBarrier.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDBarrier.scala
@@ -54,5 +54,27 @@ class RDDBarrier[T: ClassTag] private[spark] (rdd: RDD[T]) {
     )
   }
 
+  /**
+   * :: Experimental ::
+   * Returns a new RDD by applying a function to each partition of the wrapped RDD, while tracking
+   * the index of the original partition. And all tasks are launched together in a barrier stage.
+   * The interface is the same as [[org.apache.spark.rdd.RDD#mapPartitionsWithIndex]].
+   * Please see the API doc there.
+   * @see [[org.apache.spark.BarrierTaskContext]]
+   */
+  @Experimental
+  @Since("3.0.0")
+  def mapPartitionsWithIndex[S: ClassTag](
+      f: (Int, Iterator[T]) => Iterator[S],
+      preservesPartitioning: Boolean = false): RDD[S] = rdd.withScope {
+    val cleanedF = rdd.sparkContext.clean(f)
+    new MapPartitionsRDD(
+      rdd,
+      (_: TaskContext, index: Int, iter: Iterator[T]) => cleanedF(index, iter),
+      preservesPartitioning,
+      isFromBarrier = true
+    )
+  }
+
   // TODO: [SPARK-25247] add extra conf to RDDBarrier, e.g., timeout.
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
index d165610291f1d..a5c07c07e8f2b 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
@@ -20,15 +20,17 @@ package org.apache.spark.rdd
 import java.io.{FileNotFoundException, IOException}
 import java.util.concurrent.TimeUnit
 
+import scala.collection.mutable
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
+import com.google.common.cache.{CacheBuilder, CacheLoader}
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.{BUFFER_SIZE, CHECKPOINT_COMPRESS}
+import org.apache.spark.internal.config.{BUFFER_SIZE, CACHE_CHECKPOINT_PREFERRED_LOCS_EXPIRE_TIME, CHECKPOINT_COMPRESS}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 
@@ -82,16 +84,40 @@ private[spark] class ReliableCheckpointRDD[T: ClassTag](
     Array.tabulate(inputFiles.length)(i => new CheckpointRDDPartition(i))
   }
 
-  /**
-   * Return the locations of the checkpoint file associated with the given partition.
-   */
-  protected override def getPreferredLocations(split: Partition): Seq[String] = {
+  // Cache of preferred locations of checkpointed files.
+  @transient private[spark] lazy val cachedPreferredLocations = CacheBuilder.newBuilder()
+    .expireAfterWrite(
+      SparkEnv.get.conf.get(CACHE_CHECKPOINT_PREFERRED_LOCS_EXPIRE_TIME).get,
+      TimeUnit.MINUTES)
+    .build(
+      new CacheLoader[Partition, Seq[String]]() {
+        override def load(split: Partition): Seq[String] = {
+          getPartitionBlockLocations(split)
+        }
+      })
+
+  // Returns the block locations of given partition on file system.
+  private def getPartitionBlockLocations(split: Partition): Seq[String] = {
     val status = fs.getFileStatus(
       new Path(checkpointPath, ReliableCheckpointRDD.checkpointFileName(split.index)))
     val locations = fs.getFileBlockLocations(status, 0, status.getLen)
     locations.headOption.toList.flatMap(_.getHosts).filter(_ != "localhost")
   }
 
+  private lazy val cachedExpireTime =
+    SparkEnv.get.conf.get(CACHE_CHECKPOINT_PREFERRED_LOCS_EXPIRE_TIME)
+
+  /**
+   * Return the locations of the checkpoint file associated with the given partition.
+   */
+  protected override def getPreferredLocations(split: Partition): Seq[String] = {
+    if (cachedExpireTime.isDefined && cachedExpireTime.get > 0) {
+      cachedPreferredLocations.get(split)
+    } else {
+      getPartitionBlockLocations(split)
+    }
+  }
+
   /**
    * Read the content of the checkpoint file associated with the given partition.
    */
@@ -166,7 +192,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
   def writePartitionToCheckpointFile[T: ClassTag](
       path: String,
       broadcastedConf: Broadcast[SerializableConfiguration],
-      blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]) {
+      blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]): Unit = {
     val env = SparkEnv.get
     val outputDir = new Path(path)
     val fs = outputDir.getFileSystem(broadcastedConf.value.value)
diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index 5ec99b7f4f3ab..0930a5c9cfb96 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -108,7 +108,7 @@ class ShuffledRDD[K: ClassTag, V: ClassTag, C: ClassTag](
       .asInstanceOf[Iterator[(K, C)]]
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     prev = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
index 42d190377f104..d5a811d4dc3fd 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
@@ -127,7 +127,7 @@ private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag](
     map.asScala.iterator.map(t => t._2.iterator.map((t._1, _))).flatten
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
diff --git a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index 36589e93a1c5e..63fa3c2487c33 100644
--- a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -21,6 +21,7 @@ import java.io.{IOException, ObjectOutputStream}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.parallel.ForkJoinTaskSupport
+import scala.collection.parallel.immutable.ParVector
 import scala.reflect.ClassTag
 
 import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
@@ -75,13 +76,13 @@ class UnionRDD[T: ClassTag](
 
   override def getPartitions: Array[Partition] = {
     val parRDDs = if (isPartitionListingParallel) {
-      val parArray = rdds.par
+      val parArray = new ParVector(rdds.toVector)
       parArray.tasksupport = UnionRDD.partitionEvalTaskSupport
       parArray
     } else {
       rdds
     }
-    val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum)
+    val array = new Array[Partition](parRDDs.map(_.partitions.length).sum)
     var pos = 0
     for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) {
       array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index)
@@ -108,7 +109,7 @@ class UnionRDD[T: ClassTag](
   override def getPreferredLocations(s: Partition): Seq[String] =
     s.asInstanceOf[UnionPartition[T]].preferredLocations()
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdds = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
index 3cb1231bd3477..678a48948a3c1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
@@ -70,7 +70,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag](
     s.asInstanceOf[ZippedPartitionsPartition].preferredLocations
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdds = null
   }
@@ -89,7 +89,7 @@ private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag]
     f(rdd1.iterator(partitions(0), context), rdd2.iterator(partitions(1), context))
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
@@ -114,7 +114,7 @@ private[spark] class ZippedPartitionsRDD3
       rdd3.iterator(partitions(2), context))
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
@@ -142,7 +142,7 @@ private[spark] class ZippedPartitionsRDD4
       rdd4.iterator(partitions(3), context))
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
diff --git a/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala
index 4a6106984a495..e460542f0319e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala
@@ -76,8 +76,13 @@ import org.apache.spark.util.PeriodicCheckpointer
  */
 private[spark] class PeriodicRDDCheckpointer[T](
     checkpointInterval: Int,
-    sc: SparkContext)
+    sc: SparkContext,
+    storageLevel: StorageLevel)
   extends PeriodicCheckpointer[RDD[T]](checkpointInterval, sc) {
+  require(storageLevel != StorageLevel.NONE)
+
+  def this(checkpointInterval: Int, sc: SparkContext) =
+    this(checkpointInterval, sc, StorageLevel.MEMORY_ONLY)
 
   override protected def checkpoint(data: RDD[T]): Unit = data.checkpoint()
 
@@ -85,7 +90,7 @@ private[spark] class PeriodicRDDCheckpointer[T](
 
   override protected def persist(data: RDD[T]): Unit = {
     if (data.getStorageLevel == StorageLevel.NONE) {
-      data.persist()
+      data.persist(storageLevel)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequest.scala b/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequest.scala
new file mode 100644
index 0000000000000..9a920914ed674
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequest.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+/**
+ * An Executor resource request. This is used in conjunction with the ResourceProfile to
+ * programmatically specify the resources needed for an RDD that will be applied at the
+ * stage level.
+ *
+ * This is used to specify what the resource requirements are for an Executor and how
+ * Spark can find out specific details about those resources. Not all the parameters are
+ * required for every resource type. Resources like GPUs are supported and have same limitations
+ * as using the global spark configs spark.executor.resource.gpu.*. The amount, discoveryScript,
+ * and vendor parameters for resources are all the same parameters a user would specify through the
+ * configs: spark.executor.resource.{resourceName}.{amount, discoveryScript, vendor}.
+ *
+ * For instance, a user wants to allocate an Executor with GPU resources on YARN. The user has
+ * to specify the resource name (gpu), the amount or number of GPUs per Executor,
+ * the discovery script would be specified so that when the Executor starts up it can
+ * discovery what GPU addresses are available for it to use because YARN doesn't tell
+ * Spark that, then vendor would not be used because its specific for Kubernetes.
+ *
+ * See the configuration and cluster specific docs for more details.
+ *
+ * Use ExecutorResourceRequests class as a convenience API.
+ *
+ * @param resourceName Name of the resource
+ * @param amount Amount requesting
+ * @param discoveryScript Optional script used to discover the resources. This is required on some
+ *                        cluster managers that don't tell Spark the addresses of the resources
+ *                        allocated. The script runs on Executors startup to discover the addresses
+ *                        of the resources available.
+ * @param vendor Optional vendor, required for some cluster managers
+ *
+ * This api is currently private until the rest of the pieces are in place and then it
+ * will become public.
+ */
+private[spark] class ExecutorResourceRequest(
+    val resourceName: String,
+    val amount: Long,
+    val discoveryScript: String = "",
+    val vendor: String = "") extends Serializable {
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case that: ExecutorResourceRequest =>
+        that.getClass == this.getClass &&
+          that.resourceName == resourceName && that.amount == amount &&
+        that.discoveryScript == discoveryScript && that.vendor == vendor
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int =
+    Seq(resourceName, amount, discoveryScript, vendor).hashCode()
+
+  override def toString(): String = {
+    s"name: $resourceName, amount: $amount, script: $discoveryScript, vendor: $vendor"
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala b/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala
new file mode 100644
index 0000000000000..d4c29f9a70c44
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.network.util.JavaUtils
+import org.apache.spark.resource.ResourceProfile._
+
+/**
+ * A set of Executor resource requests. This is used in conjunction with the ResourceProfile to
+ * programmatically specify the resources needed for an RDD that will be applied at the
+ * stage level.
+ *
+ * This api is currently private until the rest of the pieces are in place and then it
+ * will become public.
+ */
+private[spark] class ExecutorResourceRequests() extends Serializable {
+
+  private val _executorResources = new ConcurrentHashMap[String, ExecutorResourceRequest]()
+
+  def requests: Map[String, ExecutorResourceRequest] = _executorResources.asScala.toMap
+
+  /**
+   * Specify heap memory. The value specified will be converted to MiB.
+   *
+   * @param amount Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
+   *               Default unit is MiB if not specified.
+   */
+  def memory(amount: String): this.type = {
+    val amountMiB = JavaUtils.byteStringAsMb(amount)
+    val req = new ExecutorResourceRequest(MEMORY, amountMiB)
+    _executorResources.put(MEMORY, req)
+    this
+  }
+
+  /**
+   * Specify overhead memory. The value specified will be converted to MiB.
+   *
+   * @param amount Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
+   *               Default unit is MiB if not specified.
+   */
+  def memoryOverhead(amount: String): this.type = {
+    val amountMiB = JavaUtils.byteStringAsMb(amount)
+    val req = new ExecutorResourceRequest(OVERHEAD_MEM, amountMiB)
+    _executorResources.put(OVERHEAD_MEM, req)
+    this
+  }
+
+  /**
+   * Specify pyspark memory. The value specified will be converted to MiB.
+   *
+   * @param amount Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
+   *               Default unit is MiB if not specified.
+   */
+  def pysparkMemory(amount: String): this.type = {
+    val amountMiB = JavaUtils.byteStringAsMb(amount)
+    val req = new ExecutorResourceRequest(PYSPARK_MEM, amountMiB)
+    _executorResources.put(PYSPARK_MEM, req)
+    this
+  }
+
+  /**
+   * Specify number of cores per Executor.
+   *
+   * @param amount Number of cores to allocate per Executor.
+   */
+  def cores(amount: Int): this.type = {
+    val req = new ExecutorResourceRequest(CORES, amount)
+    _executorResources.put(CORES, req)
+    this
+  }
+
+  /**
+   *  Amount of a particular custom resource(GPU, FPGA, etc) to use. The resource names supported
+   *  correspond to the regular Spark configs with the prefix removed. For instance, resources
+   *  like GPUs are gpu (spark configs spark.executor.resource.gpu.*). If you pass in a resource
+   *  that the cluster manager doesn't support the result is undefined, it may error or may just
+   *  be ignored.
+   *
+   * @param resourceName Name of the resource.
+   * @param amount amount of that resource per executor to use.
+   * @param discoveryScript Optional script used to discover the resources. This is required on
+   *                        some cluster managers that don't tell Spark the addresses of
+   *                        the resources allocated. The script runs on Executors startup to
+   *                        of the resources available.
+   * @param vendor Optional vendor, required for some cluster managers
+   */
+  def resource(
+      resourceName: String,
+      amount: Long,
+      discoveryScript: String = "",
+      vendor: String = ""): this.type = {
+    // a bit weird but for Java api use empty string as meaning None because empty
+    // string is otherwise invalid for those parameters anyway
+    val req = new ExecutorResourceRequest(resourceName, amount, discoveryScript, vendor)
+    _executorResources.put(resourceName, req)
+    this
+  }
+
+  override def toString: String = {
+    s"Executor resource requests: ${_executorResources}"
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
index e64fadc113149..22272a0f98a6c 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
@@ -30,27 +30,44 @@ trait ResourceAllocator {
 
   protected def resourceName: String
   protected def resourceAddresses: Seq[String]
+  protected def slotsPerAddress: Int
 
   /**
-   * Map from an address to its availability, the value `true` means the address is available,
-   * while value `false` means the address is assigned.
+   * Map from an address to its availability, a value > 0 means the address is available,
+   * while value of 0 means the address is fully assigned.
+   *
+   * For task resources ([[org.apache.spark.scheduler.ExecutorResourceInfo]]), this value
+   * can be a multiple, such that each address can be allocated up to [[slotsPerAddress]]
+   * times.
+   *
    * TODO Use [[OpenHashMap]] instead to gain better performance.
    */
-  private lazy val addressAvailabilityMap = mutable.HashMap(resourceAddresses.map(_ -> true): _*)
+  private lazy val addressAvailabilityMap = {
+    mutable.HashMap(resourceAddresses.map(_ -> slotsPerAddress): _*)
+  }
 
   /**
    * Sequence of currently available resource addresses.
+   *
+   * With [[slotsPerAddress]] greater than 1, [[availableAddrs]] can contain duplicate addresses
+   * e.g. with [[slotsPerAddress]] == 2, availableAddrs for addresses 0 and 1 can look like
+   * Seq("0", "0", "1"), where address 0 has two assignments available, and 1 has one.
    */
-  def availableAddrs: Seq[String] = addressAvailabilityMap.flatMap { case (addr, available) =>
-    if (available) Some(addr) else None
-  }.toSeq
+  def availableAddrs: Seq[String] = addressAvailabilityMap
+    .flatMap { case (addr, available) =>
+      (0 until available).map(_ => addr)
+    }.toSeq
 
   /**
    * Sequence of currently assigned resource addresses.
+   *
+   * With [[slotsPerAddress]] greater than 1, [[assignedAddrs]] can contain duplicate addresses
+   * e.g. with [[slotsPerAddress]] == 2, assignedAddrs for addresses 0 and 1 can look like
+   * Seq("0", "1", "1"), where address 0 was assigned once, and 1 was assigned twice.
    */
   private[spark] def assignedAddrs: Seq[String] = addressAvailabilityMap
     .flatMap { case (addr, available) =>
-      if (!available) Some(addr) else None
+      (0 until slotsPerAddress - available).map(_ => addr)
     }.toSeq
 
   /**
@@ -65,8 +82,8 @@ trait ResourceAllocator {
           s"address $address doesn't exist.")
       }
       val isAvailable = addressAvailabilityMap(address)
-      if (isAvailable) {
-        addressAvailabilityMap(address) = false
+      if (isAvailable > 0) {
+        addressAvailabilityMap(address) = addressAvailabilityMap(address) - 1
       } else {
         throw new SparkException("Try to acquire an address that is not available. " +
           s"$resourceName address $address is not available.")
@@ -86,8 +103,8 @@ trait ResourceAllocator {
           s"address $address doesn't exist.")
       }
       val isAvailable = addressAvailabilityMap(address)
-      if (!isAvailable) {
-        addressAvailabilityMap(address) = true
+      if (isAvailable < slotsPerAddress) {
+        addressAvailabilityMap(address) = addressAvailabilityMap(address) + 1
       } else {
         throw new SparkException(s"Try to release an address that is not assigned. $resourceName " +
           s"address $address is not assigned.")
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceDiscoveryScriptPlugin.scala b/core/src/main/scala/org/apache/spark/resource/ResourceDiscoveryScriptPlugin.scala
new file mode 100644
index 0000000000000..2ac6d3c500f9d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceDiscoveryScriptPlugin.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+import java.io.File
+import java.util.Optional
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.api.resource.ResourceDiscoveryPlugin
+import org.apache.spark.internal.Logging
+import org.apache.spark.util.Utils.executeAndGetOutput
+
+/**
+ * The default plugin that is loaded into a Spark application to control how custom
+ * resources are discovered. This executes the discovery script specified by the user
+ * and gets the json output back and contructs ResourceInformation objects from that.
+ * If the user specifies custom plugins, this is the last one to be executed and
+ * throws if the resource isn't discovered.
+ */
+class ResourceDiscoveryScriptPlugin extends ResourceDiscoveryPlugin with Logging {
+  override def discoverResource(
+      request: ResourceRequest,
+      sparkConf: SparkConf): Optional[ResourceInformation] = {
+    val script = request.discoveryScript
+    val resourceName = request.id.resourceName
+    val result = if (script.isPresent) {
+      val scriptFile = new File(script.get)
+      logInfo(s"Discovering resources for $resourceName with script: $scriptFile")
+      // check that script exists and try to execute
+      if (scriptFile.exists()) {
+        val output = executeAndGetOutput(Seq(script.get), new File("."))
+        ResourceInformation.parseJson(output)
+      } else {
+        throw new SparkException(s"Resource script: $scriptFile to discover $resourceName " +
+          "doesn't exist!")
+      }
+    } else {
+      throw new SparkException(s"User is expecting to use resource: $resourceName, but " +
+        "didn't specify a discovery script!")
+    }
+    if (!result.name.equals(resourceName)) {
+      throw new SparkException(s"Error running the resource discovery script ${script.get}: " +
+        s"script returned resource name ${result.name} and we were expecting $resourceName.")
+    }
+    Optional.of(result)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
new file mode 100644
index 0000000000000..03dcf5e317798
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
@@ -0,0 +1,321 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+import java.util.{Map => JMap}
+import java.util.concurrent.atomic.AtomicInteger
+import javax.annotation.concurrent.GuardedBy
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.annotation.Evolving
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Python.PYSPARK_EXECUTOR_MEMORY
+import org.apache.spark.util.Utils
+
+/**
+ * Resource profile to associate with an RDD. A ResourceProfile allows the user to
+ * specify executor and task requirements for an RDD that will get applied during a
+ * stage. This allows the user to change the resource requirements between stages.
+ * This is meant to be immutable so user can't change it after building.
+ */
+@Evolving
+class ResourceProfile(
+    val executorResources: Map[String, ExecutorResourceRequest],
+    val taskResources: Map[String, TaskResourceRequest]) extends Serializable with Logging {
+
+  // _id is only a var for testing purposes
+  private var _id = ResourceProfile.getNextProfileId
+  // This is used for any resources that use fractional amounts, the key is the resource name
+  // and the value is the number of tasks that can share a resource address. For example,
+  // if the user says task gpu amount is 0.5, that results in 2 tasks per resource address.
+  private var _executorResourceSlotsPerAddr: Option[Map[String, Int]] = None
+  private var _limitingResource: Option[String] = None
+  private var _maxTasksPerExecutor: Option[Int] = None
+  private var _coresLimitKnown: Boolean = false
+
+  def id: Int = _id
+
+  /**
+   * (Java-specific) gets a Java Map of resources to TaskResourceRequest
+   */
+  def taskResourcesJMap: JMap[String, TaskResourceRequest] = taskResources.asJava
+
+  /**
+   * (Java-specific) gets a Java Map of resources to ExecutorResourceRequest
+   */
+  def executorResourcesJMap: JMap[String, ExecutorResourceRequest] = {
+    executorResources.asJava
+  }
+
+  // Note that some cluster managers don't set the executor cores explicitly so
+  // be sure to check the Option as required
+  private[spark] def getExecutorCores: Option[Int] = {
+    executorResources.get(ResourceProfile.CORES).map(_.amount.toInt)
+  }
+
+  private[spark] def getTaskCpus: Option[Int] = {
+    taskResources.get(ResourceProfile.CPUS).map(_.amount.toInt)
+  }
+
+  private[spark] def getNumSlotsPerAddress(resource: String, sparkConf: SparkConf): Int = {
+    _executorResourceSlotsPerAddr.getOrElse {
+      calculateTasksAndLimitingResource(sparkConf)
+    }
+    _executorResourceSlotsPerAddr.get.getOrElse(resource,
+      throw new SparkException(s"Resource $resource doesn't exist in profile id: $id"))
+  }
+
+  // Maximum tasks you could put on an executor with this profile based on the limiting resource.
+  // If the executor cores config is not present this value is based on the other resources
+  // available or 1 if no other resources. You need to check the isCoresLimitKnown to
+  // calculate proper value.
+  private[spark] def maxTasksPerExecutor(sparkConf: SparkConf): Int = {
+    _maxTasksPerExecutor.getOrElse {
+      calculateTasksAndLimitingResource(sparkConf)
+      _maxTasksPerExecutor.get
+    }
+  }
+
+  // Returns whether the executor cores was available to use to calculate the max tasks
+  // per executor and limiting resource. Some cluster managers (like standalone and coarse
+  // grained mesos) don't use the cores config by default so we can't use it to calculate slots.
+  private[spark] def isCoresLimitKnown: Boolean = _coresLimitKnown
+
+  // The resource that has the least amount of slots per executor. Its possible multiple or all
+  // resources result in same number of slots and this could be any of those.
+  // If the executor cores config is not present this value is based on the other resources
+  // available or empty string if no other resources. You need to check the isCoresLimitKnown to
+  // calculate proper value.
+  private[spark] def limitingResource(sparkConf: SparkConf): String = {
+    _limitingResource.getOrElse {
+      calculateTasksAndLimitingResource(sparkConf)
+      _limitingResource.get
+    }
+  }
+
+  // executor cores config is not set for some masters by default and the default value
+  // only applies to yarn/k8s
+  private def shouldCheckExecutorCores(sparkConf: SparkConf): Boolean = {
+    val master = sparkConf.getOption("spark.master")
+    sparkConf.contains(EXECUTOR_CORES) ||
+      (master.isDefined && (master.get.equalsIgnoreCase("yarn") || master.get.startsWith("k8s")))
+  }
+
+  /**
+   * Utility function to calculate the number of tasks you can run on a single Executor based
+   * on the task and executor resource requests in the ResourceProfile. This will be based
+   * off the resource that is most restrictive. For instance, if the executor
+   * request is for 4 cpus and 2 gpus and your task request is for 1 cpu and 1 gpu each, the
+   * limiting resource is gpu and the number of tasks you can run on a single executor is 2.
+   * This function also sets the limiting resource, isCoresLimitKnown and number of slots per
+   * resource address.
+   */
+  private def calculateTasksAndLimitingResource(sparkConf: SparkConf): Unit = synchronized {
+    val shouldCheckExecCores = shouldCheckExecutorCores(sparkConf)
+    var (taskLimit, limitingResource) = if (shouldCheckExecCores) {
+      val cpusPerTask = taskResources.get(ResourceProfile.CPUS)
+        .map(_.amount).getOrElse(sparkConf.get(CPUS_PER_TASK).toDouble).toInt
+      assert(cpusPerTask > 0, "CPUs per task configuration has to be > 0")
+      val coresPerExecutor = getExecutorCores.getOrElse(sparkConf.get(EXECUTOR_CORES))
+      _coresLimitKnown = true
+      ResourceUtils.validateTaskCpusLargeEnough(coresPerExecutor, cpusPerTask)
+      val tasksBasedOnCores = coresPerExecutor / cpusPerTask
+      // Note that if the cores per executor aren't set properly this calculation could be off,
+      // we default it to just be 1 in order to allow checking of the rest of the custom
+      // resources. We set the limit based on the other resources available.
+      (tasksBasedOnCores, ResourceProfile.CPUS)
+    } else {
+      (-1, "")
+    }
+    val numPartsPerResourceMap = new mutable.HashMap[String, Int]
+    numPartsPerResourceMap(ResourceProfile.CORES) = 1
+    val taskResourcesToCheck = new mutable.HashMap[String, TaskResourceRequest]
+    taskResourcesToCheck ++= ResourceProfile.getCustomTaskResources(this)
+    val execResourceToCheck = ResourceProfile.getCustomExecutorResources(this)
+    execResourceToCheck.foreach { case (rName, execReq) =>
+      val taskReq = taskResources.get(rName).map(_.amount).getOrElse(0.0)
+      numPartsPerResourceMap(rName) = 1
+      if (taskReq > 0.0) {
+        if (taskReq > execReq.amount) {
+          throw new SparkException(s"The executor resource: $rName, amount: ${execReq.amount} " +
+            s"needs to be >= the task resource request amount of $taskReq")
+        }
+        val (numPerTask, parts) = ResourceUtils.calculateAmountAndPartsForFraction(taskReq)
+        numPartsPerResourceMap(rName) = parts
+        val numTasks = ((execReq.amount * parts) / numPerTask).toInt
+        if (taskLimit == -1 || numTasks < taskLimit) {
+          if (shouldCheckExecCores) {
+            // TODO - until resource profiles full implemented we need to error if cores not
+            // limiting resource because the scheduler code uses that for slots
+            throw new IllegalArgumentException("The number of slots on an executor has to be " +
+              "limited by the number of cores, otherwise you waste resources and " +
+              "dynamic allocation doesn't work properly. Your configuration has " +
+              s"core/task cpu slots = ${taskLimit} and " +
+              s"${execReq.resourceName} = ${numTasks}. " +
+              "Please adjust your configuration so that all resources require same number " +
+              "of executor slots.")
+          }
+          limitingResource = rName
+          taskLimit = numTasks
+        }
+        taskResourcesToCheck -= rName
+      } else {
+        logWarning(s"The executor resource config for resource: $rName was specified but " +
+          "no corresponding task resource request was specified.")
+      }
+    }
+    if(!shouldCheckExecCores) {
+      // if we can't rely on the executor cores config throw a warning for user
+      logWarning("Please ensure that the number of slots available on your " +
+        "executors is limited by the number of cores to task cpus and not another " +
+        "custom resource. If cores is not the limiting resource then dynamic " +
+        "allocation will not work properly!")
+    }
+    if (taskResourcesToCheck.nonEmpty) {
+      throw new SparkException("No executor resource configs were not specified for the " +
+        s"following task configs: ${taskResourcesToCheck.keys.mkString(",")}")
+    }
+    logInfo(s"Limiting resource is $limitingResource at $taskLimit tasks per executor")
+    _executorResourceSlotsPerAddr = Some(numPartsPerResourceMap.toMap)
+    _maxTasksPerExecutor = if (taskLimit == -1) Some(1) else Some(taskLimit)
+    _limitingResource = Some(limitingResource)
+    if (shouldCheckExecCores) {
+      ResourceUtils.warnOnWastedResources(this, sparkConf)
+    }
+  }
+
+  // to be used only by history server for reconstruction from events
+  private[spark] def setResourceProfileId(id: Int): Unit = {
+    _id = id
+  }
+
+  // testing only
+  private[spark] def setToDefaultProfile(): Unit = {
+    _id = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
+  }
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case that: ResourceProfile =>
+        that.getClass == this.getClass && that.id == _id &&
+          that.taskResources == taskResources && that.executorResources == executorResources
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Seq(taskResources, executorResources).hashCode()
+
+  override def toString(): String = {
+    s"Profile: id = ${_id}, executor resources: ${executorResources.mkString(",")}, " +
+      s"task resources: ${taskResources.mkString(",")}"
+  }
+}
+
+object ResourceProfile extends Logging {
+  // task resources
+  val CPUS = "cpus"
+  // Executor resources
+  val CORES = "cores"
+  val MEMORY = "memory"
+  val OVERHEAD_MEM = "memoryOverhead"
+  val PYSPARK_MEM = "pyspark.memory"
+
+  // all supported spark executor resources (minus the custom resources like GPUs/FPGAs)
+  val allSupportedExecutorResources = Seq(CORES, MEMORY, OVERHEAD_MEM, PYSPARK_MEM)
+
+  val UNKNOWN_RESOURCE_PROFILE_ID = -1
+  val DEFAULT_RESOURCE_PROFILE_ID = 0
+
+  private lazy val nextProfileId = new AtomicInteger(0)
+  private val DEFAULT_PROFILE_LOCK = new Object()
+
+  // The default resource profile uses the application level configs.
+  // var so that it can be reset for testing purposes.
+  @GuardedBy("DEFAULT_PROFILE_LOCK")
+  private var defaultProfile: Option[ResourceProfile] = None
+
+  private[spark] def getNextProfileId: Int = nextProfileId.getAndIncrement()
+
+  private[spark] def getOrCreateDefaultProfile(conf: SparkConf): ResourceProfile = {
+    DEFAULT_PROFILE_LOCK.synchronized {
+      defaultProfile match {
+        case Some(prof) => prof
+        case None =>
+          val taskResources = getDefaultTaskResources(conf)
+          val executorResources = getDefaultExecutorResources(conf)
+          val defProf = new ResourceProfile(executorResources, taskResources)
+          defProf.setToDefaultProfile()
+          defaultProfile = Some(defProf)
+          logInfo("Default ResourceProfile created, executor resources: " +
+            s"${defProf.executorResources}, task resources: " +
+            s"${defProf.taskResources}")
+          defProf
+      }
+    }
+  }
+
+  private def getDefaultTaskResources(conf: SparkConf): Map[String, TaskResourceRequest] = {
+    val cpusPerTask = conf.get(CPUS_PER_TASK)
+    val treqs = new TaskResourceRequests().cpus(cpusPerTask)
+    ResourceUtils.addTaskResourceRequests(conf, treqs)
+    treqs.requests
+  }
+
+  private def getDefaultExecutorResources(conf: SparkConf): Map[String, ExecutorResourceRequest] = {
+    val ereqs = new ExecutorResourceRequests()
+    ereqs.cores(conf.get(EXECUTOR_CORES))
+    ereqs.memory(conf.get(EXECUTOR_MEMORY).toString)
+    conf.get(EXECUTOR_MEMORY_OVERHEAD).map(mem => ereqs.memoryOverhead(mem.toString))
+    conf.get(PYSPARK_EXECUTOR_MEMORY).map(mem => ereqs.pysparkMemory(mem.toString))
+    val execReq = ResourceUtils.parseAllResourceRequests(conf, SPARK_EXECUTOR_PREFIX)
+    execReq.foreach { req =>
+      val name = req.id.resourceName
+      ereqs.resource(name, req.amount, req.discoveryScript.orElse(""),
+        req.vendor.orElse(""))
+    }
+    ereqs.requests
+  }
+
+  // for testing only
+  private[spark] def reInitDefaultProfile(conf: SparkConf): Unit = {
+    clearDefaultProfile()
+    // force recreate it after clearing
+    getOrCreateDefaultProfile(conf)
+  }
+
+  private[spark] def clearDefaultProfile(): Unit = {
+    DEFAULT_PROFILE_LOCK.synchronized {
+      defaultProfile = None
+    }
+  }
+
+  private[spark] def getCustomTaskResources(
+      rp: ResourceProfile): Map[String, TaskResourceRequest] = {
+    rp.taskResources.filterKeys(k => !k.equals(ResourceProfile.CPUS))
+  }
+
+  private[spark] def getCustomExecutorResources(
+      rp: ResourceProfile): Map[String, ExecutorResourceRequest] = {
+    rp.executorResources.filterKeys(k => !ResourceProfile.allSupportedExecutorResources.contains(k))
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala
new file mode 100644
index 0000000000000..26f23f4bf0476
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+import java.util.{Map => JMap}
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.annotation.Evolving
+
+/**
+ * Resource profile builder to build a Resource profile to associate with an RDD.
+ * A ResourceProfile allows the user to specify executor and task requirements for an RDD
+ * that will get applied during a stage. This allows the user to change the resource
+ * requirements between stages.
+ */
+@Evolving
+private[spark] class ResourceProfileBuilder() {
+
+  private val _taskResources = new ConcurrentHashMap[String, TaskResourceRequest]()
+  private val _executorResources = new ConcurrentHashMap[String, ExecutorResourceRequest]()
+
+  def taskResources: Map[String, TaskResourceRequest] = _taskResources.asScala.toMap
+  def executorResources: Map[String, ExecutorResourceRequest] = _executorResources.asScala.toMap
+
+  /**
+   * (Java-specific) gets a Java Map of resources to TaskResourceRequest
+   */
+  def taskResourcesJMap: JMap[String, TaskResourceRequest] = _taskResources.asScala.asJava
+
+  /**
+   * (Java-specific) gets a Java Map of resources to ExecutorResourceRequest
+   */
+  def executorResourcesJMap: JMap[String, ExecutorResourceRequest] = {
+    _executorResources.asScala.asJava
+  }
+
+  def require(requests: ExecutorResourceRequests): this.type = {
+    _executorResources.putAll(requests.requests.asJava)
+    this
+  }
+
+  def require(requests: TaskResourceRequests): this.type = {
+    _taskResources.putAll(requests.requests.asJava)
+    this
+  }
+
+  def clearExecutorResourceRequests(): this.type = {
+    _executorResources.clear()
+    this
+  }
+
+  def clearTaskResourceRequests(): this.type = {
+    _taskResources.clear()
+    this
+  }
+
+  override def toString(): String = {
+    "Profile executor resources: " +
+      s"${_executorResources.asScala.map(pair => s"${pair._1}=${pair._2.toString()}")}, " +
+      s"task resources: ${_taskResources.asScala.map(pair => s"${pair._1}=${pair._2.toString()}")}"
+  }
+
+  def build: ResourceProfile = {
+    new ResourceProfile(executorResources, taskResources)
+  }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
new file mode 100644
index 0000000000000..06db9468c451e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+import java.util.concurrent.ConcurrentHashMap
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.annotation.Evolving
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Tests._
+import org.apache.spark.util.Utils
+import org.apache.spark.util.Utils.isTesting
+
+/**
+ * Manager of resource profiles. The manager allows one place to keep the actual ResourceProfiles
+ * and everywhere else we can use the ResourceProfile Id to save on space.
+ * Note we never remove a resource profile at this point. Its expected this number if small
+ * so this shouldn't be much overhead.
+ */
+@Evolving
+private[spark] class ResourceProfileManager(sparkConf: SparkConf) extends Logging {
+  private val resourceProfileIdToResourceProfile = new ConcurrentHashMap[Int, ResourceProfile]()
+
+  private val defaultProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+  addResourceProfile(defaultProfile)
+
+  def defaultResourceProfile: ResourceProfile = defaultProfile
+
+  private val taskCpusDefaultProfile = defaultProfile.getTaskCpus.get
+  private val dynamicEnabled = Utils.isDynamicAllocationEnabled(sparkConf)
+  private val master = sparkConf.getOption("spark.master")
+  private val isNotYarn = master.isDefined && !master.get.equals("yarn")
+  private val errorForTesting = !isTesting || sparkConf.get(RESOURCE_PROFILE_MANAGER_TESTING)
+
+  // If we use anything except the default profile, its only supported on YARN right now.
+  // Throw an exception if not supported.
+  private[spark] def isSupported(rp: ResourceProfile): Boolean = {
+    val isNotDefaultProfile = rp.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
+    val notYarnAndNotDefaultProfile = isNotDefaultProfile && isNotYarn
+    val YarnNotDynAllocAndNotDefaultProfile = isNotDefaultProfile && !isNotYarn && !dynamicEnabled
+    if (errorForTesting && (notYarnAndNotDefaultProfile || YarnNotDynAllocAndNotDefaultProfile)) {
+      throw new SparkException("ResourceProfiles are only supported on YARN with dynamic " +
+        "allocation enabled.")
+    }
+    true
+  }
+
+  def addResourceProfile(rp: ResourceProfile): Unit = {
+    isSupported(rp)
+    // force the computation of maxTasks and limitingResource now so we don't have cost later
+    rp.limitingResource(sparkConf)
+    logInfo(s"Adding ResourceProfile id: ${rp.id}")
+    resourceProfileIdToResourceProfile.putIfAbsent(rp.id, rp)
+  }
+
+  /*
+   * Gets the ResourceProfile associated with the id, if a profile doesn't exist
+   * it returns the default ResourceProfile created from the application level configs.
+   */
+  def resourceProfileFromId(rpId: Int): ResourceProfile = {
+    val rp = resourceProfileIdToResourceProfile.get(rpId)
+    if (rp == null) {
+      throw new SparkException(s"ResourceProfileId $rpId not found!")
+    }
+    rp
+  }
+
+  def taskCpusForProfileId(rpId: Int): Int = {
+    resourceProfileFromId(rpId).getTaskCpus.getOrElse(taskCpusDefaultProfile)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 150ba09f77dd9..cdb761c7566e7 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.resource
 
-import java.io.File
 import java.nio.file.{Files, Paths}
+import java.util.Optional
 
 import scala.util.control.NonFatal
 
@@ -26,28 +26,97 @@ import org.json4s.DefaultFormats
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.api.resource.ResourceDiscoveryPlugin
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils.executeAndGetOutput
+import org.apache.spark.internal.config.{CPUS_PER_TASK, EXECUTOR_CORES, RESOURCES_DISCOVERY_PLUGIN, SPARK_TASK_PREFIX}
+import org.apache.spark.internal.config.Tests.{RESOURCES_WARNING_TESTING}
+import org.apache.spark.util.Utils
 
 /**
  * Resource identifier.
  * @param componentName spark.driver / spark.executor / spark.task
  * @param resourceName  gpu, fpga, etc
+ *
+ * @since 3.0.0
  */
-private[spark] case class ResourceID(componentName: String, resourceName: String) {
-  def confPrefix: String = s"$componentName.resource.$resourceName." // with ending dot
-  def amountConf: String = s"$confPrefix${ResourceUtils.AMOUNT}"
-  def discoveryScriptConf: String = s"$confPrefix${ResourceUtils.DISCOVERY_SCRIPT}"
-  def vendorConf: String = s"$confPrefix${ResourceUtils.VENDOR}"
+@DeveloperApi
+class ResourceID(val componentName: String, val resourceName: String) {
+  private[spark] def confPrefix: String = {
+    s"$componentName.${ResourceUtils.RESOURCE_PREFIX}.$resourceName."
+  }
+  private[spark] def amountConf: String = s"$confPrefix${ResourceUtils.AMOUNT}"
+  private[spark] def discoveryScriptConf: String = s"$confPrefix${ResourceUtils.DISCOVERY_SCRIPT}"
+  private[spark] def vendorConf: String = s"$confPrefix${ResourceUtils.VENDOR}"
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case that: ResourceID =>
+        that.getClass == this.getClass &&
+          that.componentName == componentName && that.resourceName == resourceName
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Seq(componentName, resourceName).hashCode()
 }
 
-private[spark] case class ResourceRequest(
-    id: ResourceID,
-    amount: Int,
-    discoveryScript: Option[String],
-    vendor: Option[String])
+/**
+ * Class that represents a resource request.
+ *
+ * The class used when discovering resources (using the discovery script),
+ * or via the context as it is parsing configuration for the ResourceID.
+ *
+ * @param id object identifying the resource
+ * @param amount integer amount for the resource. Note that for a request (executor level),
+ *               fractional resources does not make sense, so amount is an integer.
+ * @param discoveryScript optional discovery script file name
+ * @param vendor optional vendor name
+ *
+ * @since 3.0.0
+ */
+@DeveloperApi
+class ResourceRequest(
+    val id: ResourceID,
+    val amount: Long,
+    val discoveryScript: Optional[String],
+    val vendor: Optional[String]) {
 
-private[spark] case class ResourceRequirement(resourceName: String, amount: Int)
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case that: ResourceRequest =>
+        that.getClass == this.getClass &&
+          that.id == id && that.amount == amount && discoveryScript == discoveryScript &&
+          vendor == vendor
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Seq(id, amount, discoveryScript, vendor).hashCode()
+}
+
+/**
+ * Case class that represents resource requirements for a component in a
+ * an application (components are driver, executor or task).
+ *
+ * A configuration of spark.task.resource.[resourceName].amount = 4, equates to:
+ * amount = 4, and numParts = 1.
+ *
+ * A configuration of spark.task.resource.[resourceName].amount = 0.25, equates to:
+ * amount = 1, and numParts = 4.
+ *
+ * @param resourceName gpu, fpga, etc.
+ * @param amount whole units of the resource we expect (e.g. 1 gpus, 2 fpgas)
+ * @param numParts if not 1, the number of ways a whole resource is subdivided.
+ *                 This is always an integer greater than or equal to 1,
+ *                 where 1 is whole resource, 2 is divide a resource in two, and so on.
+ */
+private[spark] case class ResourceRequirement(
+    resourceName: String,
+    amount: Int,
+    numParts: Int = 1)
 
 /**
  * Case class representing allocated resource addresses for a specific resource.
@@ -73,29 +142,78 @@ private[spark] object ResourceUtils extends Logging {
     val amount = settings.getOrElse(AMOUNT,
       throw new SparkException(s"You must specify an amount for ${resourceId.resourceName}")
     ).toInt
-    val discoveryScript = settings.get(DISCOVERY_SCRIPT)
-    val vendor = settings.get(VENDOR)
-    ResourceRequest(resourceId, amount, discoveryScript, vendor)
+    val discoveryScript = Optional.ofNullable(settings.get(DISCOVERY_SCRIPT).orNull)
+    val vendor = Optional.ofNullable(settings.get(VENDOR).orNull)
+    new ResourceRequest(resourceId, amount, discoveryScript, vendor)
   }
 
   def listResourceIds(sparkConf: SparkConf, componentName: String): Seq[ResourceID] = {
-    sparkConf.getAllWithPrefix(s"$componentName.resource.").map { case (key, _) =>
+    sparkConf.getAllWithPrefix(s"$componentName.$RESOURCE_PREFIX.").map { case (key, _) =>
       key.substring(0, key.indexOf('.'))
-    }.toSet.toSeq.map(name => ResourceID(componentName, name))
+    }.toSet.toSeq.map(name => new ResourceID(componentName, name))
   }
 
   def parseAllResourceRequests(
       sparkConf: SparkConf,
       componentName: String): Seq[ResourceRequest] = {
-    listResourceIds(sparkConf, componentName).map { id =>
-      parseResourceRequest(sparkConf, id)
+    listResourceIds(sparkConf, componentName)
+      .map(id => parseResourceRequest(sparkConf, id))
+      .filter(_.amount > 0)
+  }
+
+  // Used to take a fraction amount from a task resource requirement and split into a real
+  // integer amount and the number of slots per address. For instance, if the amount is 0.5,
+  // the we get (1, 2) back out. This indicates that for each 1 address, it has 2 slots per
+  // address, which allows you to put 2 tasks on that address. Note if amount is greater
+  // than 1, then the number of slots per address has to be 1. This would indicate that a
+  // would have multiple addresses assigned per task. This can be used for calculating
+  // the number of tasks per executor -> (executorAmount * numParts) / (integer amount).
+  // Returns tuple of (integer amount, numParts)
+  def calculateAmountAndPartsForFraction(doubleAmount: Double): (Int, Int) = {
+    val parts = if (doubleAmount <= 0.5) {
+      Math.floor(1.0 / doubleAmount).toInt
+    } else if (doubleAmount % 1 != 0) {
+      throw new SparkException(
+        s"The resource amount ${doubleAmount} must be either <= 0.5, or a whole number.")
+    } else {
+      1
+    }
+    (Math.ceil(doubleAmount).toInt, parts)
+  }
+
+  // Add any task resource requests from the spark conf to the TaskResourceRequests passed in
+  def addTaskResourceRequests(
+      sparkConf: SparkConf,
+      treqs: TaskResourceRequests): Unit = {
+    listResourceIds(sparkConf, SPARK_TASK_PREFIX).map { resourceId =>
+      val settings = sparkConf.getAllWithPrefix(resourceId.confPrefix).toMap
+      val amountDouble = settings.getOrElse(AMOUNT,
+        throw new SparkException(s"You must specify an amount for ${resourceId.resourceName}")
+      ).toDouble
+      treqs.resource(resourceId.resourceName, amountDouble)
     }
   }
 
   def parseResourceRequirements(sparkConf: SparkConf, componentName: String)
     : Seq[ResourceRequirement] = {
-    parseAllResourceRequests(sparkConf, componentName).map { request =>
-      ResourceRequirement(request.id.resourceName, request.amount)
+    val resourceIds = listResourceIds(sparkConf, componentName)
+    val rnamesAndAmounts = resourceIds.map { resourceId =>
+      val settings = sparkConf.getAllWithPrefix(resourceId.confPrefix).toMap
+      val amountDouble = settings.getOrElse(AMOUNT,
+        throw new SparkException(s"You must specify an amount for ${resourceId.resourceName}")
+      ).toDouble
+      (resourceId.resourceName, amountDouble)
+    }
+    rnamesAndAmounts.filter { case (_, amount) => amount > 0 }.map { case (rName, amountDouble) =>
+      val (amount, parts) = if (componentName.equalsIgnoreCase(SPARK_TASK_PREFIX)) {
+        calculateAmountAndPartsForFraction(amountDouble)
+      } else if (amountDouble % 1 != 0) {
+        throw new SparkException(
+          s"Only tasks support fractional resources, please check your $componentName settings")
+      } else {
+        (amountDouble.toInt, 1)
+      }
+      ResourceRequirement(rName, amount, parts)
     }
   }
 
@@ -125,17 +243,28 @@ private[spark] object ResourceUtils extends Logging {
     }
   }
 
+  def parseAllocated(
+      resourcesFileOpt: Option[String],
+      componentName: String): Seq[ResourceAllocation] = {
+    resourcesFileOpt.toSeq.flatMap(parseAllocatedFromJsonFile)
+      .filter(_.id.componentName == componentName)
+  }
+
   private def parseAllocatedOrDiscoverResources(
       sparkConf: SparkConf,
       componentName: String,
       resourcesFileOpt: Option[String]): Seq[ResourceAllocation] = {
-    val allocated = resourcesFileOpt.toSeq.flatMap(parseAllocatedFromJsonFile)
-      .filter(_.id.componentName == componentName)
+    val allocated = parseAllocated(resourcesFileOpt, componentName)
     val otherResourceIds = listResourceIds(sparkConf, componentName).diff(allocated.map(_.id))
-    allocated ++ otherResourceIds.map { id =>
+    val otherResources = otherResourceIds.flatMap { id =>
       val request = parseResourceRequest(sparkConf, id)
-      ResourceAllocation(id, discoverResource(request).addresses)
+      if (request.amount > 0) {
+        Some(ResourceAllocation(id, discoverResource(sparkConf, request).addresses))
+      } else {
+        None
+      }
     }
+    allocated ++ otherResources
   }
 
   private def assertResourceAllocationMeetsRequest(
@@ -154,9 +283,24 @@ private[spark] object ResourceUtils extends Logging {
     requests.foreach(r => assertResourceAllocationMeetsRequest(allocated(r.id), r))
   }
 
+  private def assertAllResourceAllocationsMatchResourceProfile(
+      allocations: Map[String, ResourceInformation],
+      execReqs: Map[String, ExecutorResourceRequest]): Unit = {
+    execReqs.foreach { case (rName, req) =>
+      require(allocations.contains(rName) && allocations(rName).addresses.size >= req.amount,
+        s"Resource: ${rName}, with addresses: " +
+          s"${allocations(rName).addresses.mkString(",")} " +
+          s"is less than what the user requested: ${req.amount})")
+    }
+  }
+
   /**
    * Gets all allocated resource information for the input component from input resources file and
-   * discover the remaining via discovery scripts.
+   * the application level Spark configs. It first looks to see if resource were explicitly
+   * specified in the resources file (this would include specified address assignments and it only
+   * specified in certain cluster managers) and then it looks at the Spark configs to get any
+   * others not specified in the file. The resources not explicitly set in the file require a
+   * discovery script for it to run to get the addresses of the resource.
    * It also verifies the resource allocation meets required amount for each resource.
    * @return a map from resource name to resource info
    */
@@ -171,6 +315,51 @@ private[spark] object ResourceUtils extends Logging {
     resourceInfoMap
   }
 
+  // create an empty Optional if the string is empty
+  private def emptyStringToOptional(optStr: String): Optional[String] = {
+    if (optStr.isEmpty) {
+      Optional.empty[String]
+    } else {
+      Optional.of(optStr)
+    }
+  }
+
+  /**
+   * This function is similar to getOrDiscoverallResources, except for it uses the ResourceProfile
+   * information instead of the application level configs.
+   *
+   * It first looks to see if resource were explicitly specified in the resources file
+   * (this would include specified address assignments and it only specified in certain
+   * cluster managers) and then it looks at the ResourceProfile to get
+   * any others not specified in the file. The resources not explicitly set in the file require a
+   * discovery script for it to run to get the addresses of the resource.
+   * It also verifies the resource allocation meets required amount for each resource.
+   *
+   * @return a map from resource name to resource info
+   */
+  def getOrDiscoverAllResourcesForResourceProfile(
+      resourcesFileOpt: Option[String],
+      componentName: String,
+      resourceProfile: ResourceProfile,
+      sparkConf: SparkConf): Map[String, ResourceInformation] = {
+    val fileAllocated = parseAllocated(resourcesFileOpt, componentName)
+    val fileAllocResMap = fileAllocated.map(a => (a.id.resourceName, a.toResourceInformation)).toMap
+    // only want to look at the ResourceProfile for resources not in the resources file
+    val execReq = ResourceProfile.getCustomExecutorResources(resourceProfile)
+    val filteredExecreq = execReq.filterNot { case (rname, _) => fileAllocResMap.contains(rname) }
+    val rpAllocations = filteredExecreq.map { case (rName, execRequest) =>
+      val resourceId = new ResourceID(componentName, rName)
+      val scriptOpt = emptyStringToOptional(execRequest.discoveryScript)
+      val vendorOpt = emptyStringToOptional(execRequest.vendor)
+      val resourceReq = new ResourceRequest(resourceId, execRequest.amount, scriptOpt, vendorOpt)
+      val addrs = discoverResource(sparkConf, resourceReq).addresses
+      (rName, new ResourceInformation(rName, addrs))
+    }
+    val allAllocations = fileAllocResMap ++ rpAllocations
+    assertAllResourceAllocationsMatchResourceProfile(allAllocations, execReq)
+    allAllocations
+  }
+
   def logResourceInfo(componentName: String, resources: Map[String, ResourceInformation])
     : Unit = {
     logInfo("==============================================================")
@@ -178,32 +367,113 @@ private[spark] object ResourceUtils extends Logging {
     logInfo("==============================================================")
   }
 
-  // visible for test
-  private[spark] def discoverResource(resourceRequest: ResourceRequest): ResourceInformation = {
-    val resourceName = resourceRequest.id.resourceName
-    val script = resourceRequest.discoveryScript
-    val result = if (script.nonEmpty) {
-      val scriptFile = new File(script.get)
-      // check that script exists and try to execute
-      if (scriptFile.exists()) {
-        val output = executeAndGetOutput(Seq(script.get), new File("."))
-        ResourceInformation.parseJson(output)
-      } else {
-        throw new SparkException(s"Resource script: $scriptFile to discover $resourceName " +
-          "doesn't exist!")
+  private[spark] def discoverResource(
+      sparkConf: SparkConf,
+      resourceRequest: ResourceRequest): ResourceInformation = {
+    // always put the discovery script plugin as last plugin
+    val discoveryScriptPlugin = "org.apache.spark.resource.ResourceDiscoveryScriptPlugin"
+    val pluginClasses = sparkConf.get(RESOURCES_DISCOVERY_PLUGIN) :+ discoveryScriptPlugin
+    val resourcePlugins = Utils.loadExtensions(classOf[ResourceDiscoveryPlugin], pluginClasses,
+      sparkConf)
+    // apply each plugin until one of them returns the information for this resource
+    var riOption: Optional[ResourceInformation] = Optional.empty()
+    resourcePlugins.foreach { plugin =>
+      val riOption = plugin.discoverResource(resourceRequest, sparkConf)
+      if (riOption.isPresent()) {
+        return riOption.get()
       }
+    }
+    throw new SparkException(s"None of the discovery plugins returned ResourceInformation for " +
+      s"${resourceRequest.id.resourceName}")
+  }
+
+  def validateTaskCpusLargeEnough(execCores: Int, taskCpus: Int): Boolean = {
+    // Number of cores per executor must meet at least one task requirement.
+    if (execCores < taskCpus) {
+      throw new SparkException(s"The number of cores per executor (=$execCores) has to be >= " +
+        s"the number of cpus per task = $taskCpus.")
+    }
+    true
+  }
+
+  // the option executor cores parameter is by the different local modes since it not configured
+  // via the config
+  def warnOnWastedResources(
+      rp: ResourceProfile,
+      sparkConf: SparkConf,
+      execCores: Option[Int] = None): Unit = {
+    // There have been checks on the ResourceProfile to make sure the executor resources were
+    // specified and are large enough if any task resources were specified.
+    // Now just do some sanity test and log warnings when it looks like the user will
+    // waste some resources.
+    val coresKnown = rp.isCoresLimitKnown
+    var limitingResource = rp.limitingResource(sparkConf)
+    var maxTaskPerExec = rp.maxTasksPerExecutor(sparkConf)
+    val taskCpus = rp.getTaskCpus.getOrElse(sparkConf.get(CPUS_PER_TASK))
+    val cores = if (execCores.isDefined) {
+      execCores.get
+    } else if (coresKnown) {
+      rp.getExecutorCores.getOrElse(sparkConf.get(EXECUTOR_CORES))
     } else {
-      throw new SparkException(s"User is expecting to use resource: $resourceName, but " +
-        "didn't specify a discovery script!")
+      // can't calculate cores limit
+      return
     }
-    if (!result.name.equals(resourceName)) {
-      throw new SparkException(s"Error running the resource discovery script ${script.get}: " +
-        s"script returned resource name ${result.name} and we were expecting $resourceName.")
+    // when executor cores config isn't set, we can't calculate the real limiting resource
+    // and number of tasks per executor ahead of time, so calculate it now.
+    if (!coresKnown) {
+      val numTasksPerExecCores = cores / taskCpus
+      val numTasksPerExecCustomResource = rp.maxTasksPerExecutor(sparkConf)
+      if (limitingResource.isEmpty ||
+        (limitingResource.nonEmpty && numTasksPerExecCores < numTasksPerExecCustomResource)) {
+        limitingResource = ResourceProfile.CPUS
+        maxTaskPerExec = numTasksPerExecCores
+      }
+    }
+    val taskReq = ResourceProfile.getCustomTaskResources(rp)
+    val execReq = ResourceProfile.getCustomExecutorResources(rp)
+
+    if (limitingResource.nonEmpty && !limitingResource.equals(ResourceProfile.CPUS)) {
+      if ((taskCpus * maxTaskPerExec) < cores) {
+        val resourceNumSlots = Math.floor(cores/taskCpus).toInt
+        val message = s"The configuration of cores (exec = ${cores} " +
+          s"task = ${taskCpus}, runnable tasks = ${resourceNumSlots}) will " +
+          s"result in wasted resources due to resource ${limitingResource} limiting the " +
+          s"number of runnable tasks per executor to: ${maxTaskPerExec}. Please adjust " +
+          "your configuration."
+        if (sparkConf.get(RESOURCES_WARNING_TESTING)) {
+          throw new SparkException(message)
+        } else {
+          logWarning(message)
+        }
+      }
+    }
+
+    taskReq.foreach { case (rName, treq) =>
+      val execAmount = execReq(rName).amount
+      val numParts = rp.getNumSlotsPerAddress(rName, sparkConf)
+      // handle fractional
+      val taskAmount = if (numParts > 1) 1 else treq.amount
+      if (maxTaskPerExec < (execAmount * numParts / taskAmount)) {
+        val taskReqStr = s"${taskAmount}/${numParts}"
+        val resourceNumSlots = Math.floor(execAmount * numParts / taskAmount).toInt
+        val message = s"The configuration of resource: ${treq.resourceName} " +
+          s"(exec = ${execAmount}, task = ${taskReqStr}, " +
+          s"runnable tasks = ${resourceNumSlots}) will " +
+          s"result in wasted resources due to resource ${limitingResource} limiting the " +
+          s"number of runnable tasks per executor to: ${maxTaskPerExec}. Please adjust " +
+          "your configuration."
+        if (sparkConf.get(RESOURCES_WARNING_TESTING)) {
+          throw new SparkException(message)
+        } else {
+          logWarning(message)
+        }
+      }
     }
-    result
   }
 
   // known types of resources
   final val GPU: String = "gpu"
   final val FPGA: String = "fpga"
+
+  final val RESOURCE_PREFIX: String = "resource"
 }
diff --git a/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala b/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala
new file mode 100644
index 0000000000000..bffb0a2f523b1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+/**
+ * A task resource request. This is used in conjuntion with the ResourceProfile to
+ * programmatically specify the resources needed for an RDD that will be applied at the
+ * stage level.
+ *
+ * Use TaskResourceRequests class as a convenience API.
+ *
+ * This api is currently private until the rest of the pieces are in place and then it
+ * will become public.
+ */
+private[spark] class TaskResourceRequest(val resourceName: String, val amount: Double)
+  extends Serializable {
+
+  assert(amount <= 0.5 || amount % 1 == 0,
+    s"The resource amount ${amount} must be either <= 0.5, or a whole number.")
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case that: TaskResourceRequest =>
+        that.getClass == this.getClass &&
+          that.resourceName == resourceName && that.amount == amount
+      case _ =>
+        false
+    }
+  }
+
+  override def hashCode(): Int = Seq(resourceName, amount).hashCode()
+
+  override def toString(): String = {
+    s"name: $resourceName, amount: $amount"
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/resource/TaskResourceRequests.scala b/core/src/main/scala/org/apache/spark/resource/TaskResourceRequests.scala
new file mode 100644
index 0000000000000..9624b51dd158e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/resource/TaskResourceRequests.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.resource.ResourceProfile._
+
+/**
+ * A set of task resource requests. This is used in conjuntion with the ResourceProfile to
+ * programmatically specify the resources needed for an RDD that will be applied at the
+ * stage level.
+ *
+ * This api is currently private until the rest of the pieces are in place and then it
+ * will become public.
+ */
+private[spark] class TaskResourceRequests() extends Serializable {
+
+  private val _taskResources = new ConcurrentHashMap[String, TaskResourceRequest]()
+
+  def requests: Map[String, TaskResourceRequest] = _taskResources.asScala.toMap
+
+  /**
+   * Specify number of cpus per Task.
+   *
+   * @param amount Number of cpus to allocate per Task.
+   */
+  def cpus(amount: Int): this.type = {
+    val treq = new TaskResourceRequest(CPUS, amount)
+    _taskResources.put(CPUS, treq)
+    this
+  }
+
+  /**
+   *  Amount of a particular custom resource(GPU, FPGA, etc) to use.
+   *
+   * @param resourceName Name of the resource.
+   * @param amount Amount requesting as a Double to support fractional resource requests.
+   *               Valid values are less than or equal to 0.5 or whole numbers. This essentially
+   *               lets you configure X number of tasks to run on a single resource,
+   *               ie amount equals 0.5 translates into 2 tasks per resource address.
+   */
+  def resource(resourceName: String, amount: Double): this.type = {
+    val treq = new TaskResourceRequest(resourceName, amount)
+    _taskResources.put(resourceName, treq)
+    this
+  }
+
+  def addRequest(treq: TaskResourceRequest): this.type = {
+    _taskResources.put(treq.resourceName, treq)
+    this
+  }
+
+  override def toString: String = {
+    s"Task resource requests: ${_taskResources}"
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala
index 97eed540b8f59..4728759e7fb0d 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala
@@ -146,3 +146,19 @@ private[spark] trait RpcEndpoint {
  * [[ThreadSafeRpcEndpoint]] for different messages.
  */
 private[spark] trait ThreadSafeRpcEndpoint extends RpcEndpoint
+
+/**
+ * An endpoint that uses a dedicated thread pool for delivering messages.
+ */
+private[spark] trait IsolatedRpcEndpoint extends RpcEndpoint {
+
+  /**
+   * How many threads to use for delivering messages. By default, use a single thread.
+   *
+   * Note that requesting more than one thread means that the endpoint should be able to handle
+   * messages arriving from many threads at once, and all the things that entails (including
+   * messages being delivered to the endpoint out of order).
+   */
+  def threadCount(): Int = 1
+
+}
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
index 2f923d7902b05..41d6d146a86d7 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
@@ -17,20 +17,17 @@
 
 package org.apache.spark.rpc.netty
 
-import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, LinkedBlockingQueue, ThreadPoolExecutor, TimeUnit}
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, CountDownLatch}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
 import scala.concurrent.Promise
 import scala.util.control.NonFatal
 
-import org.apache.spark.{SparkConf, SparkContext, SparkException}
+import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.EXECUTOR_ID
-import org.apache.spark.internal.config.Network.RPC_NETTY_DISPATCHER_NUM_THREADS
 import org.apache.spark.network.client.RpcResponseCallback
 import org.apache.spark.rpc._
-import org.apache.spark.util.ThreadUtils
 
 /**
  * A message dispatcher, responsible for routing RPC messages to the appropriate endpoint(s).
@@ -40,20 +37,13 @@ import org.apache.spark.util.ThreadUtils
  */
 private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) extends Logging {
 
-  private class EndpointData(
-      val name: String,
-      val endpoint: RpcEndpoint,
-      val ref: NettyRpcEndpointRef) {
-    val inbox = new Inbox(ref, endpoint)
-  }
-
-  private val endpoints: ConcurrentMap[String, EndpointData] =
-    new ConcurrentHashMap[String, EndpointData]
+  private val endpoints: ConcurrentMap[String, MessageLoop] =
+    new ConcurrentHashMap[String, MessageLoop]
   private val endpointRefs: ConcurrentMap[RpcEndpoint, RpcEndpointRef] =
     new ConcurrentHashMap[RpcEndpoint, RpcEndpointRef]
 
-  // Track the receivers whose inboxes may contain messages.
-  private val receivers = new LinkedBlockingQueue[EndpointData]
+  private val shutdownLatch = new CountDownLatch(1)
+  private lazy val sharedLoop = new SharedMessageLoop(nettyEnv.conf, this, numUsableCores)
 
   /**
    * True if the dispatcher has been stopped. Once stopped, all messages posted will be bounced
@@ -69,12 +59,30 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
       if (stopped) {
         throw new IllegalStateException("RpcEnv has been stopped")
       }
-      if (endpoints.putIfAbsent(name, new EndpointData(name, endpoint, endpointRef)) != null) {
+      if (endpoints.containsKey(name)) {
         throw new IllegalArgumentException(s"There is already an RpcEndpoint called $name")
       }
-      val data = endpoints.get(name)
-      endpointRefs.put(data.endpoint, data.ref)
-      receivers.offer(data)  // for the OnStart message
+
+      // This must be done before assigning RpcEndpoint to MessageLoop, as MessageLoop sets Inbox be
+      // active when registering, and endpointRef must be put into endpointRefs before onStart is
+      // called.
+      endpointRefs.put(endpoint, endpointRef)
+
+      var messageLoop: MessageLoop = null
+      try {
+        messageLoop = endpoint match {
+          case e: IsolatedRpcEndpoint =>
+            new DedicatedMessageLoop(name, e, this)
+          case _ =>
+            sharedLoop.register(name, endpoint)
+            sharedLoop
+        }
+        endpoints.put(name, messageLoop)
+      } catch {
+        case NonFatal(e) =>
+          endpointRefs.remove(endpoint)
+          throw e
+      }
     }
     endpointRef
   }
@@ -85,10 +93,9 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
 
   // Should be idempotent
   private def unregisterRpcEndpoint(name: String): Unit = {
-    val data = endpoints.remove(name)
-    if (data != null) {
-      data.inbox.stop()
-      receivers.offer(data)  // for the OnStop message
+    val loop = endpoints.remove(name)
+    if (loop != null) {
+      loop.unregister(name)
     }
     // Don't clean `endpointRefs` here because it's possible that some messages are being processed
     // now and they can use `getRpcEndpointRef`. So `endpointRefs` will be cleaned in Inbox via
@@ -155,14 +162,13 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
       message: InboxMessage,
       callbackIfStopped: (Exception) => Unit): Unit = {
     val error = synchronized {
-      val data = endpoints.get(endpointName)
+      val loop = endpoints.get(endpointName)
       if (stopped) {
         Some(new RpcEnvStoppedException())
-      } else if (data == null) {
+      } else if (loop == null) {
         Some(new SparkException(s"Could not find $endpointName."))
       } else {
-        data.inbox.post(message)
-        receivers.offer(data)
+        loop.post(endpointName, message)
         None
       }
     }
@@ -177,15 +183,23 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
       }
       stopped = true
     }
-    // Stop all endpoints. This will queue all endpoints for processing by the message loops.
-    endpoints.keySet().asScala.foreach(unregisterRpcEndpoint)
-    // Enqueue a message that tells the message loops to stop.
-    receivers.offer(PoisonPill)
-    threadpool.shutdown()
+    var stopSharedLoop = false
+    endpoints.asScala.foreach { case (name, loop) =>
+      unregisterRpcEndpoint(name)
+      if (!loop.isInstanceOf[SharedMessageLoop]) {
+        loop.stop()
+      } else {
+        stopSharedLoop = true
+      }
+    }
+    if (stopSharedLoop) {
+      sharedLoop.stop()
+    }
+    shutdownLatch.countDown()
   }
 
   def awaitTermination(): Unit = {
-    threadpool.awaitTermination(Long.MaxValue, TimeUnit.MILLISECONDS)
+    shutdownLatch.await()
   }
 
   /**
@@ -194,61 +208,4 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
   def verify(name: String): Boolean = {
     endpoints.containsKey(name)
   }
-
-  private def getNumOfThreads(conf: SparkConf): Int = {
-    val availableCores =
-      if (numUsableCores > 0) numUsableCores else Runtime.getRuntime.availableProcessors()
-
-    val modNumThreads = conf.get(RPC_NETTY_DISPATCHER_NUM_THREADS)
-      .getOrElse(math.max(2, availableCores))
-
-    conf.get(EXECUTOR_ID).map { id =>
-      val role = if (id == SparkContext.DRIVER_IDENTIFIER) "driver" else "executor"
-      conf.getInt(s"spark.$role.rpc.netty.dispatcher.numThreads", modNumThreads)
-    }.getOrElse(modNumThreads)
-  }
-
-  /** Thread pool used for dispatching messages. */
-  private val threadpool: ThreadPoolExecutor = {
-    val numThreads = getNumOfThreads(nettyEnv.conf)
-    val pool = ThreadUtils.newDaemonFixedThreadPool(numThreads, "dispatcher-event-loop")
-    for (i <- 0 until numThreads) {
-      pool.execute(new MessageLoop)
-    }
-    pool
-  }
-
-  /** Message loop used for dispatching messages. */
-  private class MessageLoop extends Runnable {
-    override def run(): Unit = {
-      try {
-        while (true) {
-          try {
-            val data = receivers.take()
-            if (data == PoisonPill) {
-              // Put PoisonPill back so that other MessageLoops can see it.
-              receivers.offer(PoisonPill)
-              return
-            }
-            data.inbox.process(Dispatcher.this)
-          } catch {
-            case NonFatal(e) => logError(e.getMessage, e)
-          }
-        }
-      } catch {
-        case _: InterruptedException => // exit
-        case t: Throwable =>
-          try {
-            // Re-submit a MessageLoop so that Dispatcher will still work if
-            // UncaughtExceptionHandler decides to not kill JVM.
-            threadpool.execute(new MessageLoop)
-          } finally {
-            throw t
-          }
-      }
-    }
-  }
-
-  /** A poison endpoint that indicates MessageLoop should exit its message loop. */
-  private val PoisonPill = new EndpointData(null, null, null)
 }
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
index 44d2622a42f58..2ed03f7430c32 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
@@ -54,9 +54,7 @@ private[netty] case class RemoteProcessConnectionError(cause: Throwable, remoteA
 /**
  * An inbox that stores messages for an [[RpcEndpoint]] and posts messages to it thread-safely.
  */
-private[netty] class Inbox(
-    val endpointRef: NettyRpcEndpointRef,
-    val endpoint: RpcEndpoint)
+private[netty] class Inbox(val endpointName: String, val endpoint: RpcEndpoint)
   extends Logging {
 
   inbox =>  // Give this an alias so we can use it more clearly in closures.
@@ -195,7 +193,7 @@ private[netty] class Inbox(
    * Exposed for testing.
    */
   protected def onDrop(message: InboxMessage): Unit = {
-    logWarning(s"Drop $message because $endpointRef is stopped")
+    logWarning(s"Drop $message because endpoint $endpointName is stopped")
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala b/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala
new file mode 100644
index 0000000000000..c985c72f2adce
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rpc.netty
+
+import java.util.concurrent._
+
+import scala.util.control.NonFatal
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.EXECUTOR_ID
+import org.apache.spark.internal.config.Network._
+import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcEndpoint}
+import org.apache.spark.util.ThreadUtils
+
+/**
+ * A message loop used by [[Dispatcher]] to deliver messages to endpoints.
+ */
+private sealed abstract class MessageLoop(dispatcher: Dispatcher) extends Logging {
+
+  // List of inboxes with pending messages, to be processed by the message loop.
+  private val active = new LinkedBlockingQueue[Inbox]()
+
+  // Message loop task; should be run in all threads of the message loop's pool.
+  protected val receiveLoopRunnable = new Runnable() {
+    override def run(): Unit = receiveLoop()
+  }
+
+  protected val threadpool: ExecutorService
+
+  private var stopped = false
+
+  def post(endpointName: String, message: InboxMessage): Unit
+
+  def unregister(name: String): Unit
+
+  def stop(): Unit = {
+    synchronized {
+      if (!stopped) {
+        setActive(MessageLoop.PoisonPill)
+        threadpool.shutdown()
+        stopped = true
+      }
+    }
+    threadpool.awaitTermination(Long.MaxValue, TimeUnit.MILLISECONDS)
+  }
+
+  protected final def setActive(inbox: Inbox): Unit = active.offer(inbox)
+
+  private def receiveLoop(): Unit = {
+    try {
+      while (true) {
+        try {
+          val inbox = active.take()
+          if (inbox == MessageLoop.PoisonPill) {
+            // Put PoisonPill back so that other threads can see it.
+            setActive(MessageLoop.PoisonPill)
+            return
+          }
+          inbox.process(dispatcher)
+        } catch {
+          case NonFatal(e) => logError(e.getMessage, e)
+        }
+      }
+    } catch {
+      case _: InterruptedException => // exit
+        case t: Throwable =>
+          try {
+            // Re-submit a receive task so that message delivery will still work if
+            // UncaughtExceptionHandler decides to not kill JVM.
+            threadpool.execute(receiveLoopRunnable)
+          } finally {
+            throw t
+          }
+    }
+  }
+}
+
+private object MessageLoop {
+  /** A poison inbox that indicates the message loop should stop processing messages. */
+  val PoisonPill = new Inbox(null, null)
+}
+
+/**
+ * A message loop that serves multiple RPC endpoints, using a shared thread pool.
+ */
+private class SharedMessageLoop(
+    conf: SparkConf,
+    dispatcher: Dispatcher,
+    numUsableCores: Int)
+  extends MessageLoop(dispatcher) {
+
+  private val endpoints = new ConcurrentHashMap[String, Inbox]()
+
+  private def getNumOfThreads(conf: SparkConf): Int = {
+    val availableCores =
+      if (numUsableCores > 0) numUsableCores else Runtime.getRuntime.availableProcessors()
+
+    val modNumThreads = conf.get(RPC_NETTY_DISPATCHER_NUM_THREADS)
+      .getOrElse(math.max(2, availableCores))
+
+    conf.get(EXECUTOR_ID).map { id =>
+      val role = if (id == SparkContext.DRIVER_IDENTIFIER) "driver" else "executor"
+      conf.getInt(s"spark.$role.rpc.netty.dispatcher.numThreads", modNumThreads)
+    }.getOrElse(modNumThreads)
+  }
+
+  /** Thread pool used for dispatching messages. */
+  override protected val threadpool: ThreadPoolExecutor = {
+    val numThreads = getNumOfThreads(conf)
+    val pool = ThreadUtils.newDaemonFixedThreadPool(numThreads, "dispatcher-event-loop")
+    for (i <- 0 until numThreads) {
+      pool.execute(receiveLoopRunnable)
+    }
+    pool
+  }
+
+  override def post(endpointName: String, message: InboxMessage): Unit = {
+    val inbox = endpoints.get(endpointName)
+    inbox.post(message)
+    setActive(inbox)
+  }
+
+  override def unregister(name: String): Unit = {
+    val inbox = endpoints.remove(name)
+    if (inbox != null) {
+      inbox.stop()
+      // Mark active to handle the OnStop message.
+      setActive(inbox)
+    }
+  }
+
+  def register(name: String, endpoint: RpcEndpoint): Unit = {
+    val inbox = new Inbox(name, endpoint)
+    endpoints.put(name, inbox)
+    // Mark active to handle the OnStart message.
+    setActive(inbox)
+  }
+}
+
+/**
+ * A message loop that is dedicated to a single RPC endpoint.
+ */
+private class DedicatedMessageLoop(
+    name: String,
+    endpoint: IsolatedRpcEndpoint,
+    dispatcher: Dispatcher)
+  extends MessageLoop(dispatcher) {
+
+  private val inbox = new Inbox(name, endpoint)
+
+  override protected val threadpool = if (endpoint.threadCount() > 1) {
+    ThreadUtils.newDaemonCachedThreadPool(s"dispatcher-$name", endpoint.threadCount())
+  } else {
+    ThreadUtils.newDaemonSingleThreadExecutor(s"dispatcher-$name")
+  }
+
+  (1 to endpoint.threadCount()).foreach { _ =>
+    threadpool.submit(receiveLoopRunnable)
+  }
+
+  // Mark active to handle the OnStart message.
+  setActive(inbox)
+
+  override def post(endpointName: String, message: InboxMessage): Unit = {
+    require(endpointName == name)
+    inbox.post(message)
+    setActive(inbox)
+  }
+
+  override def unregister(endpointName: String): Unit = synchronized {
+    require(endpointName == name)
+    inbox.stop()
+    // Mark active to handle the OnStop message.
+    setActive(inbox)
+    setActive(MessageLoop.PoisonPill)
+    threadpool.shutdown()
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
index 11e2c475d9b45..1bcddaceb3576 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
@@ -50,11 +50,11 @@ private class AsyncEventQueue(
   // if no such conf is specified, use the value specified in
   // LISTENER_BUS_EVENT_QUEUE_CAPACITY
   private[scheduler] def capacity: Int = {
-    val queuesize = conf.getInt(s"spark.scheduler.listenerbus.eventqueue.${name}.capacity",
-                                conf.get(LISTENER_BUS_EVENT_QUEUE_CAPACITY))
-    assert(queuesize > 0, s"capacity for event queue $name must be greater than 0, " +
-      s"but $queuesize is configured.")
-    queuesize
+    val queueSize = conf.getInt(s"$LISTENER_BUS_EVENT_QUEUE_PREFIX.$name.capacity",
+      conf.get(LISTENER_BUS_EVENT_QUEUE_CAPACITY))
+    assert(queueSize > 0, s"capacity for event queue $name must be greater than 0, " +
+      s"but $queueSize is configured.")
+    queueSize
   }
 
   private val eventQueue = new LinkedBlockingQueue[SparkListenerEvent](capacity)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index b08483267c141..fd5c3e0827bf9 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -29,8 +29,6 @@ import scala.collection.mutable.{HashMap, HashSet, ListBuffer}
 import scala.concurrent.duration._
 import scala.util.control.NonFatal
 
-import org.apache.commons.lang3.SerializationUtils
-
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
@@ -39,7 +37,8 @@ import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.partial.{ApproximateActionListener, ApproximateEvaluator, PartialResult}
-import org.apache.spark.rdd.{DeterministicLevel, RDD, RDDCheckpointData}
+import org.apache.spark.rdd.{RDD, RDDCheckpointData}
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.RpcTimeout
 import org.apache.spark.storage._
 import org.apache.spark.storage.BlockManagerMessages.BlockManagerHeartbeat
@@ -229,7 +228,7 @@ private[spark] class DAGScheduler(
   /**
    * Called by the TaskSetManager to report task's starting.
    */
-  def taskStarted(task: Task[_], taskInfo: TaskInfo) {
+  def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {
     eventProcessLoop.post(BeginEvent(task, taskInfo))
   }
 
@@ -237,7 +236,7 @@ private[spark] class DAGScheduler(
    * Called by the TaskSetManager to report that a task has completed
    * and results are being fetched remotely.
    */
-  def taskGettingResult(taskInfo: TaskInfo) {
+  def taskGettingResult(taskInfo: TaskInfo): Unit = {
     eventProcessLoop.post(GettingResultEvent(taskInfo))
   }
 
@@ -269,7 +268,7 @@ private[spark] class DAGScheduler(
       executorUpdates: mutable.Map[(Int, Int), ExecutorMetrics]): Boolean = {
     listenerBus.post(SparkListenerExecutorMetricsUpdate(execId, accumUpdates,
       executorUpdates))
-    blockManagerMaster.driverEndpoint.askSync[Boolean](
+    blockManagerMaster.driverHeartbeatEndPoint.askSync[Boolean](
       BlockManagerHeartbeat(blockManagerId), new RpcTimeout(10.minutes, "BlockManagerHeartbeat"))
   }
 
@@ -393,7 +392,8 @@ private[spark] class DAGScheduler(
     val parents = getOrCreateParentStages(rdd, jobId)
     val id = nextStageId.getAndIncrement()
     val stage = new ShuffleMapStage(
-      id, rdd, numTasks, parents, jobId, rdd.creationSite, shuffleDep, mapOutputTracker)
+      id, rdd, numTasks, parents, jobId, rdd.creationSite, shuffleDep, mapOutputTracker,
+      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
 
     stageIdToStage(id) = stage
     shuffleIdToMapStage(shuffleDep.shuffleId) = stage
@@ -402,7 +402,8 @@ private[spark] class DAGScheduler(
     if (!mapOutputTracker.containsShuffle(shuffleDep.shuffleId)) {
       // Kind of ugly: need to register RDDs with the cache and map output tracker here
       // since we can't do it in the RDD constructor because # of partitions is unknown
-      logInfo("Registering RDD " + rdd.id + " (" + rdd.getCreationSite + ")")
+      logInfo(s"Registering RDD ${rdd.id} (${rdd.getCreationSite}) as input to " +
+        s"shuffle ${shuffleDep.shuffleId}")
       mapOutputTracker.registerShuffle(shuffleDep.shuffleId, rdd.partitions.length)
     }
     stage
@@ -454,7 +455,8 @@ private[spark] class DAGScheduler(
     checkBarrierStageWithRDDChainPattern(rdd, partitions.toSet.size)
     val parents = getOrCreateParentStages(rdd, jobId)
     val id = nextStageId.getAndIncrement()
-    val stage = new ResultStage(id, rdd, func, partitions, parents, jobId, callSite)
+    val stage = new ResultStage(id, rdd, func, partitions, parents, jobId, callSite,
+      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     stageIdToStage(id) = stage
     updateJobIdStageIdMaps(jobId, stage)
     stage
@@ -560,7 +562,7 @@ private[spark] class DAGScheduler(
     // caused by recursively visiting
     val waitingForVisit = new ListBuffer[RDD[_]]
     waitingForVisit += stage.rdd
-    def visit(rdd: RDD[_]) {
+    def visit(rdd: RDD[_]): Unit = {
       if (!visited(rdd)) {
         visited += rdd
         val rddHasUncachedPartitions = getCacheLocs(rdd).contains(Nil)
@@ -591,7 +593,7 @@ private[spark] class DAGScheduler(
    */
   private def updateJobIdStageIdMaps(jobId: Int, stage: Stage): Unit = {
     @tailrec
-    def updateJobIdStageIdMapsList(stages: List[Stage]) {
+    def updateJobIdStageIdMapsList(stages: List[Stage]): Unit = {
       if (stages.nonEmpty) {
         val s = stages.head
         s.jobIds += jobId
@@ -622,7 +624,7 @@ private[spark] class DAGScheduler(
               "Job %d not registered for stage %d even though that stage was registered for the job"
               .format(job.jobId, stageId))
           } else {
-            def removeStage(stageId: Int) {
+            def removeStage(stageId: Int): Unit = {
               // data structures based on Stage
               for (stage <- stageIdToStage.get(stageId)) {
                 if (runningStages.contains(stage)) {
@@ -696,9 +698,13 @@ private[spark] class DAGScheduler(
 
     val jobId = nextJobId.getAndIncrement()
     if (partitions.isEmpty) {
+      val clonedProperties = Utils.cloneProperties(properties)
+      if (sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION) == null) {
+        clonedProperties.setProperty(SparkContext.SPARK_JOB_DESCRIPTION, callSite.shortForm)
+      }
       val time = clock.getTimeMillis()
       listenerBus.post(
-        SparkListenerJobStart(jobId, time, Seq[StageInfo](), properties))
+        SparkListenerJobStart(jobId, time, Seq.empty, clonedProperties))
       listenerBus.post(
         SparkListenerJobEnd(jobId, time, JobSucceeded))
       // Return immediately if the job is running 0 tasks
@@ -710,7 +716,7 @@ private[spark] class DAGScheduler(
     val waiter = new JobWaiter[U](this, jobId, partitions.size, resultHandler)
     eventProcessLoop.post(JobSubmitted(
       jobId, rdd, func2, partitions.toArray, callSite, waiter,
-      SerializationUtils.clone(properties)))
+      Utils.cloneProperties(properties)))
     waiter
   }
 
@@ -782,7 +788,7 @@ private[spark] class DAGScheduler(
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     eventProcessLoop.post(JobSubmitted(
       jobId, rdd, func2, rdd.partitions.indices.toArray, callSite, listener,
-      SerializationUtils.clone(properties)))
+      Utils.cloneProperties(properties)))
     listener.awaitResult()    // Will throw an exception if the job fails
   }
 
@@ -819,7 +825,7 @@ private[spark] class DAGScheduler(
       this, jobId, 1,
       (_: Int, r: MapOutputStatistics) => callback(r))
     eventProcessLoop.post(MapStageSubmitted(
-      jobId, dependency, callSite, waiter, SerializationUtils.clone(properties)))
+      jobId, dependency, callSite, waiter, Utils.cloneProperties(properties)))
     waiter
   }
 
@@ -846,7 +852,7 @@ private[spark] class DAGScheduler(
     eventProcessLoop.post(AllJobsCancelled)
   }
 
-  private[scheduler] def doCancelAllJobs() {
+  private[scheduler] def doCancelAllJobs(): Unit = {
     // Cancel all running jobs.
     runningStages.map(_.firstJobId).foreach(handleJobCancellation(_,
       Option("as part of cancellation of all jobs")))
@@ -857,7 +863,7 @@ private[spark] class DAGScheduler(
   /**
    * Cancel all jobs associated with a running or scheduled stage.
    */
-  def cancelStage(stageId: Int, reason: Option[String]) {
+  def cancelStage(stageId: Int, reason: Option[String]): Unit = {
     eventProcessLoop.post(StageCancelled(stageId, reason))
   }
 
@@ -874,7 +880,7 @@ private[spark] class DAGScheduler(
    * Resubmit any failed stages. Ordinarily called after a small amount of time has passed since
    * the last fetch failure.
    */
-  private[scheduler] def resubmitFailedStages() {
+  private[scheduler] def resubmitFailedStages(): Unit = {
     if (failedStages.nonEmpty) {
       // Failed stages may be removed by job cancellation, so failed might be empty even if
       // the ResubmitFailedStages event has been scheduled.
@@ -893,7 +899,7 @@ private[spark] class DAGScheduler(
    * Submits stages that depend on the given parent stage. Called when the parent stage completes
    * successfully.
    */
-  private def submitWaitingChildStages(parent: Stage) {
+  private def submitWaitingChildStages(parent: Stage): Unit = {
     logTrace(s"Checking if any dependencies of $parent are now runnable")
     logTrace("running: " + runningStages)
     logTrace("waiting: " + waitingStages)
@@ -915,7 +921,7 @@ private[spark] class DAGScheduler(
     jobsThatUseStage.find(jobIdToActiveJob.contains)
   }
 
-  private[scheduler] def handleJobGroupCancelled(groupId: String) {
+  private[scheduler] def handleJobGroupCancelled(groupId: String): Unit = {
     // Cancel all jobs belonging to this job group.
     // First finds all active jobs with this group id, and then kill stages for them.
     val activeInGroup = activeJobs.filter { activeJob =>
@@ -928,7 +934,7 @@ private[spark] class DAGScheduler(
         Option("part of cancelled job group %s".format(groupId))))
   }
 
-  private[scheduler] def handleBeginEvent(task: Task[_], taskInfo: TaskInfo) {
+  private[scheduler] def handleBeginEvent(task: Task[_], taskInfo: TaskInfo): Unit = {
     // Note that there is a chance that this task is launched after the stage is cancelled.
     // In that case, we wouldn't have the stage anymore in stageIdToStage.
     val stageAttemptId =
@@ -947,7 +953,7 @@ private[spark] class DAGScheduler(
     stageIdToStage.get(taskSet.stageId).foreach { abortStage(_, reason, exception) }
   }
 
-  private[scheduler] def cleanUpAfterSchedulerStop() {
+  private[scheduler] def cleanUpAfterSchedulerStop(): Unit = {
     for (job <- activeJobs) {
       val error =
         new SparkException(s"Job ${job.jobId} cancelled because SparkContext was shut down")
@@ -965,7 +971,7 @@ private[spark] class DAGScheduler(
     }
   }
 
-  private[scheduler] def handleGetTaskResult(taskInfo: TaskInfo) {
+  private[scheduler] def handleGetTaskResult(taskInfo: TaskInfo): Unit = {
     listenerBus.post(SparkListenerTaskGettingResult(taskInfo))
   }
 
@@ -975,7 +981,7 @@ private[spark] class DAGScheduler(
       partitions: Array[Int],
       callSite: CallSite,
       listener: JobListener,
-      properties: Properties) {
+      properties: Properties): Unit = {
     var finalStage: ResultStage = null
     try {
       // New stage creation may throw an exception if, for example, jobs are run on a
@@ -1039,7 +1045,7 @@ private[spark] class DAGScheduler(
       dependency: ShuffleDependency[_, _, _],
       callSite: CallSite,
       listener: JobListener,
-      properties: Properties) {
+      properties: Properties): Unit = {
     // Submitting this map stage might still require the creation of some parent stages, so make
     // sure that happens.
     var finalStage: ShuffleMapStage = null
@@ -1079,10 +1085,11 @@ private[spark] class DAGScheduler(
   }
 
   /** Submits stage, but first recursively submits any missing parents. */
-  private def submitStage(stage: Stage) {
+  private def submitStage(stage: Stage): Unit = {
     val jobId = activeJobForStage(stage)
     if (jobId.isDefined) {
-      logDebug("submitStage(" + stage + ")")
+      logDebug(s"submitStage($stage (name=${stage.name};" +
+        s"jobs=${stage.jobIds.toSeq.sorted.mkString(",")}))")
       if (!waitingStages(stage) && !runningStages(stage) && !failedStages(stage)) {
         val missing = getMissingParentStages(stage).sortBy(_.id)
         logDebug("missing: " + missing)
@@ -1102,10 +1109,19 @@ private[spark] class DAGScheduler(
   }
 
   /** Called when stage's parents are available and we can now do its task. */
-  private def submitMissingTasks(stage: Stage, jobId: Int) {
+  private def submitMissingTasks(stage: Stage, jobId: Int): Unit = {
     logDebug("submitMissingTasks(" + stage + ")")
 
-    // First figure out the indexes of partition ids to compute.
+    // Before find missing partition, do the intermediate state clean work first.
+    // The operation here can make sure for the partially completed intermediate stage,
+    // `findMissingPartitions()` returns all partitions every time.
+    stage match {
+      case sms: ShuffleMapStage if stage.isIndeterminate && !sms.isAvailable =>
+        mapOutputTracker.unregisterAllMapOutput(sms.shuffleDep.shuffleId)
+      case _ =>
+    }
+
+    // Figure out the indexes of partition ids to compute.
     val partitionsToCompute: Seq[Int] = stage.findMissingPartitions()
 
     // Use the scheduling pool, job group, description, etc. from an ActiveJob associated
@@ -1346,7 +1362,7 @@ private[spark] class DAGScheduler(
    * Responds to a task finishing. This is called inside the event loop so it assumes that it can
    * modify the scheduler's internal state. Use taskEnded() to post a task end event from outside.
    */
-  private[scheduler] def handleTaskCompletion(event: CompletionEvent) {
+  private[scheduler] def handleTaskCompletion(event: CompletionEvent): Unit = {
     val task = event.task
     val stageId = task.stageId
 
@@ -1500,7 +1516,7 @@ private[spark] class DAGScheduler(
             }
         }
 
-      case FetchFailed(bmAddress, shuffleId, mapId, _, failureMessage) =>
+      case FetchFailed(bmAddress, shuffleId, _, mapIndex, _, failureMessage) =>
         val failedStage = stageIdToStage(task.stageId)
         val mapStage = shuffleIdToMapStage(shuffleId)
 
@@ -1523,17 +1539,17 @@ private[spark] class DAGScheduler(
             markStageAsFinished(failedStage, errorMessage = Some(failureMessage),
               willRetry = !shouldAbortStage)
           } else {
-            logDebug(s"Received fetch failure from $task, but its from $failedStage which is no " +
-              s"longer running")
+            logDebug(s"Received fetch failure from $task, but it's from $failedStage which is no " +
+              "longer running")
           }
 
           if (mapStage.rdd.isBarrier()) {
             // Mark all the map as broken in the map stage, to ensure retry all the tasks on
             // resubmitted stage attempt.
             mapOutputTracker.unregisterAllMapOutput(shuffleId)
-          } else if (mapId != -1) {
+          } else if (mapIndex != -1) {
             // Mark the map whose fetch failed as broken in the map stage
-            mapOutputTracker.unregisterMapOutput(shuffleId, mapId, bmAddress)
+            mapOutputTracker.unregisterMapOutput(shuffleId, mapIndex, bmAddress)
           }
 
           if (failedStage.rdd.isBarrier()) {
@@ -1575,7 +1591,7 @@ private[spark] class DAGScheduler(
               // Note that, if map stage is UNORDERED, we are fine. The shuffle partitioner is
               // guaranteed to be determinate, so the input data of the reducers will not change
               // even if the map tasks are re-tried.
-              if (mapStage.rdd.outputDeterministicLevel == DeterministicLevel.INDETERMINATE) {
+              if (mapStage.isIndeterminate) {
                 // It's a little tricky to find all the succeeding stages of `mapStage`, because
                 // each stage only know its parents not children. Here we traverse the stages from
                 // the leaf nodes (the result stages of active jobs), and rollback all the stages
@@ -1603,15 +1619,22 @@ private[spark] class DAGScheduler(
 
                 activeJobs.foreach(job => collectStagesToRollback(job.finalStage :: Nil))
 
+                // The stages will be rolled back after checking
+                val rollingBackStages = HashSet[Stage](mapStage)
                 stagesToRollback.foreach {
                   case mapStage: ShuffleMapStage =>
                     val numMissingPartitions = mapStage.findMissingPartitions().length
                     if (numMissingPartitions < mapStage.numTasks) {
-                      // TODO: support to rollback shuffle files.
-                      // Currently the shuffle writing is "first write wins", so we can't re-run a
-                      // shuffle map stage and overwrite existing shuffle files. We have to finish
-                      // SPARK-8029 first.
-                      abortStage(mapStage, generateErrorMessage(mapStage), None)
+                      if (sc.getConf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)) {
+                        val reason = "A shuffle map stage with indeterminate output was failed " +
+                          "and retried. However, Spark can only do this while using the new " +
+                          "shuffle block fetching protocol. Please check the config " +
+                          "'spark.shuffle.useOldFetchProtocol', see more detail in " +
+                          "SPARK-27665 and SPARK-25341."
+                        abortStage(mapStage, reason, None)
+                      } else {
+                        rollingBackStages += mapStage
+                      }
                     }
 
                   case resultStage: ResultStage if resultStage.activeJob.isDefined =>
@@ -1623,6 +1646,9 @@ private[spark] class DAGScheduler(
 
                   case _ =>
                 }
+                logInfo(s"The shuffle map stage $mapStage with indeterminate output was failed, " +
+                  s"we will roll back and rerun below stages which include itself and all its " +
+                  s"indeterminate child stages: $rollingBackStages")
               }
 
               // We expect one executor failure to trigger many FetchFailures in rapid succession,
@@ -1862,7 +1888,7 @@ private[spark] class DAGScheduler(
     clearCacheLocs()
   }
 
-  private[scheduler] def handleExecutorAdded(execId: String, host: String) {
+  private[scheduler] def handleExecutorAdded(execId: String, host: String): Unit = {
     // remove from failedEpoch(execId) ?
     if (failedEpoch.contains(execId)) {
       logInfo("Host added was in lost list earlier: " + host)
@@ -1870,7 +1896,7 @@ private[spark] class DAGScheduler(
     }
   }
 
-  private[scheduler] def handleStageCancellation(stageId: Int, reason: Option[String]) {
+  private[scheduler] def handleStageCancellation(stageId: Int, reason: Option[String]): Unit = {
     stageIdToStage.get(stageId) match {
       case Some(stage) =>
         val jobsThatUseStage: Array[Int] = stage.jobIds.toArray
@@ -1888,7 +1914,7 @@ private[spark] class DAGScheduler(
     }
   }
 
-  private[scheduler] def handleJobCancellation(jobId: Int, reason: Option[String]) {
+  private[scheduler] def handleJobCancellation(jobId: Int, reason: Option[String]): Unit = {
     if (!jobIdToStageIds.contains(jobId)) {
       logDebug("Trying to cancel unregistered job " + jobId)
     } else {
@@ -2010,7 +2036,7 @@ private[spark] class DAGScheduler(
     // caused by recursively visiting
     val waitingForVisit = new ListBuffer[RDD[_]]
     waitingForVisit += stage.rdd
-    def visit(rdd: RDD[_]) {
+    def visit(rdd: RDD[_]): Unit = {
       if (!visitedRdds(rdd)) {
         visitedRdds += rdd
         for (dep <- rdd.dependencies) {
@@ -2103,7 +2129,7 @@ private[spark] class DAGScheduler(
     listenerBus.post(SparkListenerJobEnd(job.jobId, clock.getTimeMillis(), JobSucceeded))
   }
 
-  def stop() {
+  def stop(): Unit = {
     messageScheduler.shutdownNow()
     eventProcessLoop.stop()
     taskScheduler.stop()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index 48eb2da3015f8..8c23388b37a3d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -17,24 +17,20 @@
 
 package org.apache.spark.scheduler
 
-import java.io._
 import java.net.URI
-import java.nio.charset.StandardCharsets
 
-import scala.collection.mutable.{ArrayBuffer, Map}
+import scala.collection.mutable
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
-import org.apache.hadoop.fs.permission.FsPermission
 import org.json4s.JsonAST.JValue
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{SPARK_VERSION, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.history.EventLogFileWriter
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.io.CompressionCodec
 import org.apache.spark.util.{JsonProtocol, Utils}
 
 /**
@@ -42,13 +38,12 @@ import org.apache.spark.util.{JsonProtocol, Utils}
  *
  * Event logging is specified by the following configurable parameters:
  *   spark.eventLog.enabled - Whether event logging is enabled.
- *   spark.eventLog.logBlockUpdates.enabled - Whether to log block updates
- *   spark.eventLog.compress - Whether to compress logged events
- *   spark.eventLog.compression.codec - The codec to compress logged events
- *   spark.eventLog.overwrite - Whether to overwrite any existing files.
  *   spark.eventLog.dir - Path to the directory in which events are logged.
- *   spark.eventLog.buffer.kb - Buffer size to use when writing to output streams
+ *   spark.eventLog.logBlockUpdates.enabled - Whether to log block updates
  *   spark.eventLog.logStageExecutorMetrics.enabled - Whether to log stage executor metrics
+ *
+ * Event log file writer maintains its own parameters: refer the doc of [[EventLogFileWriter]]
+ * and its descendant for more details.
  */
 private[spark] class EventLoggingListener(
     appId: String,
@@ -64,96 +59,43 @@ private[spark] class EventLoggingListener(
     this(appId, appAttemptId, logBaseDir, sparkConf,
       SparkHadoopUtil.get.newConfiguration(sparkConf))
 
-  private val shouldCompress = sparkConf.get(EVENT_LOG_COMPRESS)
-  private val shouldOverwrite = sparkConf.get(EVENT_LOG_OVERWRITE)
-  private val shouldLogBlockUpdates = sparkConf.get(EVENT_LOG_BLOCK_UPDATES)
-  private val shouldAllowECLogs = sparkConf.get(EVENT_LOG_ALLOW_EC)
-  private val shouldLogStageExecutorMetrics = sparkConf.get(EVENT_LOG_STAGE_EXECUTOR_METRICS)
-  private val testing = sparkConf.get(EVENT_LOG_TESTING)
-  private val outputBufferSize = sparkConf.get(EVENT_LOG_OUTPUT_BUFFER_SIZE).toInt
-  private val fileSystem = Utils.getHadoopFileSystem(logBaseDir, hadoopConf)
-  private val compressionCodec =
-    if (shouldCompress) {
-      Some(CompressionCodec.createCodec(sparkConf, sparkConf.get(EVENT_LOG_COMPRESSION_CODEC)))
-    } else {
-      None
-    }
-  // Visible for tests only.
-  private[scheduler] val compressionCodecName = compressionCodec.map { c =>
-    CompressionCodec.getShortName(c.getClass.getName)
-  }
-
-  // Only defined if the file system scheme is not local
-  private var hadoopDataStream: Option[FSDataOutputStream] = None
-
-  private var writer: Option[PrintWriter] = None
+  // For testing.
+  private[scheduler] val logWriter: EventLogFileWriter =
+    EventLogFileWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf)
 
   // For testing. Keep track of all JSON serialized events that have been logged.
-  private[scheduler] val loggedEvents = new ArrayBuffer[JValue]
+  private[scheduler] val loggedEvents = new mutable.ArrayBuffer[JValue]
 
-  // Visible for tests only.
-  private[scheduler] val logPath = getLogPath(logBaseDir, appId, appAttemptId, compressionCodecName)
+  private val shouldLogBlockUpdates = sparkConf.get(EVENT_LOG_BLOCK_UPDATES)
+  private val shouldLogStageExecutorMetrics = sparkConf.get(EVENT_LOG_STAGE_EXECUTOR_METRICS)
+  private val testing = sparkConf.get(EVENT_LOG_TESTING)
 
   // map of (stageId, stageAttempt) to executor metric peaks per executor/driver for the stage
-  private val liveStageExecutorMetrics = Map.empty[(Int, Int), Map[String, ExecutorMetrics]]
+  private val liveStageExecutorMetrics =
+    mutable.HashMap.empty[(Int, Int), mutable.HashMap[String, ExecutorMetrics]]
 
   /**
    * Creates the log file in the configured log directory.
    */
-  def start() {
-    if (!fileSystem.getFileStatus(new Path(logBaseDir)).isDirectory) {
-      throw new IllegalArgumentException(s"Log directory $logBaseDir is not a directory.")
-    }
-
-    val workingPath = logPath + IN_PROGRESS
-    val path = new Path(workingPath)
-    val uri = path.toUri
-    val defaultFs = FileSystem.getDefaultUri(hadoopConf).getScheme
-    val isDefaultLocal = defaultFs == null || defaultFs == "file"
-
-    if (shouldOverwrite && fileSystem.delete(path, true)) {
-      logWarning(s"Event log $path already exists. Overwriting...")
-    }
-
-    /* The Hadoop LocalFileSystem (r1.0.4) has known issues with syncing (HADOOP-7844).
-     * Therefore, for local files, use FileOutputStream instead. */
-    val dstream =
-      if ((isDefaultLocal && uri.getScheme == null) || uri.getScheme == "file") {
-        new FileOutputStream(uri.getPath)
-      } else {
-        hadoopDataStream = Some(if (shouldAllowECLogs) {
-          fileSystem.create(path)
-        } else {
-          SparkHadoopUtil.createNonECFile(fileSystem, path)
-        })
-        hadoopDataStream.get
-      }
+  def start(): Unit = {
+    logWriter.start()
+    initEventLog()
+  }
 
-    try {
-      val cstream = compressionCodec.map(_.compressedOutputStream(dstream)).getOrElse(dstream)
-      val bstream = new BufferedOutputStream(cstream, outputBufferSize)
-
-      EventLoggingListener.initEventLog(bstream, testing, loggedEvents)
-      fileSystem.setPermission(path, LOG_FILE_PERMISSIONS)
-      writer = Some(new PrintWriter(bstream))
-      logInfo("Logging events to %s".format(logPath))
-    } catch {
-      case e: Exception =>
-        dstream.close()
-        throw e
+  private def initEventLog(): Unit = {
+    val metadata = SparkListenerLogStart(SPARK_VERSION)
+    val eventJson = JsonProtocol.logStartToJson(metadata)
+    val metadataJson = compact(eventJson)
+    logWriter.writeEvent(metadataJson, flushLogger = true)
+    if (testing && loggedEvents != null) {
+      loggedEvents += eventJson
     }
   }
 
   /** Log the event as JSON. */
-  private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false) {
+  private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false): Unit = {
     val eventJson = JsonProtocol.sparkEventToJson(event)
-    // scalastyle:off println
-    writer.foreach(_.println(compact(render(eventJson))))
-    // scalastyle:on println
-    if (flushLogger) {
-      writer.foreach(_.flush())
-      hadoopDataStream.foreach(_.hflush())
-    }
+    logWriter.writeEvent(compact(render(eventJson)), flushLogger)
     if (testing) {
       loggedEvents += eventJson
     }
@@ -165,7 +107,7 @@ private[spark] class EventLoggingListener(
     if (shouldLogStageExecutorMetrics) {
       // record the peak metrics for the new stage
       liveStageExecutorMetrics.put((event.stageInfo.stageId, event.stageInfo.attemptNumber()),
-        Map.empty[String, ExecutorMetrics])
+        mutable.HashMap.empty[String, ExecutorMetrics])
     }
   }
 
@@ -299,32 +241,9 @@ private[spark] class EventLoggingListener(
     }
   }
 
-  /**
-   * Stop logging events. The event log file will be renamed so that it loses the
-   * ".inprogress" suffix.
-   */
+  /** Stop logging events. */
   def stop(): Unit = {
-    writer.foreach(_.close())
-
-    val target = new Path(logPath)
-    if (fileSystem.exists(target)) {
-      if (shouldOverwrite) {
-        logWarning(s"Event log $target already exists. Overwriting...")
-        if (!fileSystem.delete(target, true)) {
-          logWarning(s"Error deleting $target")
-        }
-      } else {
-        throw new IOException("Target log file already exists (%s)".format(logPath))
-      }
-    }
-    fileSystem.rename(new Path(logPath + IN_PROGRESS), target)
-    // touch file to ensure modtime is current across those filesystems where rename()
-    // does not set it, -and which support setTimes(); it's a no-op on most object stores
-    try {
-      fileSystem.setTimes(target, System.currentTimeMillis(), -1)
-    } catch {
-      case e: Exception => logDebug(s"failed to set time of $target", e)
-    }
+    logWriter.stop()
   }
 
   private[spark] def redactEvent(
@@ -336,8 +255,10 @@ private[spark] class EventLoggingListener(
     // ...
     // where jvmInformation, sparkProperties, etc. are sequence of tuples.
     // We go through the various  of properties and redact sensitive information from them.
-    val redactedProps = event.environmentDetails.map{ case (name, props) =>
-      name -> Utils.redact(sparkConf, props)
+    val noRedactProps = Seq("Classpath Entries")
+    val redactedProps = event.environmentDetails.map {
+      case (name, props) if noRedactProps.contains(name) => name -> props
+      case (name, props) => name -> Utils.redact(sparkConf, props)
     }
     SparkListenerEnvironmentUpdate(redactedProps)
   }
@@ -345,93 +266,7 @@ private[spark] class EventLoggingListener(
 }
 
 private[spark] object EventLoggingListener extends Logging {
-  // Suffix applied to the names of files still being written by applications.
-  val IN_PROGRESS = ".inprogress"
   val DEFAULT_LOG_DIR = "/tmp/spark-events"
   // Dummy stage key used by driver in executor metrics updates
   val DRIVER_STAGE_KEY = (-1, -1)
-
-  private val LOG_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort)
-
-  // A cache for compression codecs to avoid creating the same codec many times
-  private val codecMap = Map.empty[String, CompressionCodec]
-
-  /**
-   * Write metadata about an event log to the given stream.
-   * The metadata is encoded in the first line of the event log as JSON.
-   *
-   * @param logStream Raw output stream to the event log file.
-   */
-  def initEventLog(
-      logStream: OutputStream,
-      testing: Boolean,
-      loggedEvents: ArrayBuffer[JValue]): Unit = {
-    val metadata = SparkListenerLogStart(SPARK_VERSION)
-    val eventJson = JsonProtocol.logStartToJson(metadata)
-    val metadataJson = compact(eventJson) + "\n"
-    logStream.write(metadataJson.getBytes(StandardCharsets.UTF_8))
-    if (testing && loggedEvents != null) {
-      loggedEvents += eventJson
-    }
-  }
-
-  /**
-   * Return a file-system-safe path to the log file for the given application.
-   *
-   * Note that because we currently only create a single log file for each application,
-   * we must encode all the information needed to parse this event log in the file name
-   * instead of within the file itself. Otherwise, if the file is compressed, for instance,
-   * we won't know which codec to use to decompress the metadata needed to open the file in
-   * the first place.
-   *
-   * The log file name will identify the compression codec used for the contents, if any.
-   * For example, app_123 for an uncompressed log, app_123.lzf for an LZF-compressed log.
-   *
-   * @param logBaseDir Directory where the log file will be written.
-   * @param appId A unique app ID.
-   * @param appAttemptId A unique attempt id of appId. May be the empty string.
-   * @param compressionCodecName Name to identify the codec used to compress the contents
-   *                             of the log, or None if compression is not enabled.
-   * @return A path which consists of file-system-safe characters.
-   */
-  def getLogPath(
-      logBaseDir: URI,
-      appId: String,
-      appAttemptId: Option[String],
-      compressionCodecName: Option[String] = None): String = {
-    val base = new Path(logBaseDir).toString.stripSuffix("/") + "/" + Utils.sanitizeDirName(appId)
-    val codec = compressionCodecName.map("." + _).getOrElse("")
-    if (appAttemptId.isDefined) {
-      base + "_" + Utils.sanitizeDirName(appAttemptId.get) + codec
-    } else {
-      base + codec
-    }
-  }
-
-  /**
-   * Opens an event log file and returns an input stream that contains the event data.
-   *
-   * @return input stream that holds one JSON record per line.
-   */
-  def openEventLog(log: Path, fs: FileSystem): InputStream = {
-    val in = new BufferedInputStream(fs.open(log))
-    try {
-      val codec = codecName(log).map { c =>
-        codecMap.getOrElseUpdate(c, CompressionCodec.createCodec(new SparkConf, c))
-      }
-      codec.map(_.compressedContinuousInputStream(in)).getOrElse(in)
-    } catch {
-      case e: Throwable =>
-        in.close()
-        throw e
-    }
-  }
-
-  def codecName(log: Path): Option[String] = {
-    // Compression codec is encoded as an extension, e.g. app_123.lzf
-    // Since we sanitize the app ID to not include periods, it is safe to split on it
-    val logName = log.getName.stripSuffix(IN_PROGRESS)
-    logName.split("\\.").tail.lastOption
-  }
-
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
index 46a35b6a2eaf9..ee31093ec0652 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
@@ -58,3 +58,11 @@ private [spark] object LossReasonPending extends ExecutorLossReason("Pending los
 private[spark]
 case class SlaveLost(_message: String = "Slave lost", workerLost: Boolean = false)
   extends ExecutorLossReason(_message)
+
+/**
+ * A loss reason that means the executor is marked for decommissioning.
+ *
+ * This is used by the task scheduler to remove state associated with the executor, but
+ * not yet fail any tasks that were running in the executor before the executor is "fully" lost.
+ */
+private [spark] object ExecutorDecommission extends ExecutorLossReason("Executor decommission.")
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExecutorResourceInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorResourceInfo.scala
index 02047609edd96..fd04db8c09d76 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ExecutorResourceInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ExecutorResourceInfo.scala
@@ -25,10 +25,15 @@ import org.apache.spark.resource.{ResourceAllocator, ResourceInformation}
  * information.
  * @param name Resource name
  * @param addresses Resource addresses provided by the executor
+ * @param numParts Number of ways each resource is subdivided when scheduling tasks
  */
-private[spark] class ExecutorResourceInfo(name: String, addresses: Seq[String])
+private[spark] class ExecutorResourceInfo(
+    name: String,
+    addresses: Seq[String],
+    numParts: Int)
   extends ResourceInformation(name, addresses.toArray) with ResourceAllocator {
 
   override protected def resourceName = this.name
   override protected def resourceAddresses = this.addresses
+  override protected def slotsPerAddress: Int = numParts
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
index 66ab9a52b7781..2d26a314e7a62 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -64,7 +64,7 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
     case _ => false
   }
 
-  private def validate() {
+  private def validate(): Unit = {
     logDebug("validate InputFormatInfo : " + inputFormatClazz + ", path  " + path)
 
     try {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
index 65d7184231e24..feed831620840 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
@@ -49,7 +49,7 @@ private[spark] class JobWaiter[T](
    * asynchronously. After the low level scheduler cancels all the tasks belonging to this job, it
    * will fail this job with a SparkException.
    */
-  def cancel() {
+  def cancel(): Unit = {
     dagScheduler.cancelJob(jobId, None)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
index 302ebd30da228..95b0096cade38 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
@@ -186,6 +186,17 @@ private[spark] class LiveListenerBus(conf: SparkConf) {
     metricsSystem.registerSource(metrics)
   }
 
+  /**
+   * For testing only. Wait until there are no more events in the queue, or until the default
+   * wait time has elapsed. Throw `TimeoutException` if the specified time elapsed before the queue
+   * emptied.
+   * Exposed for testing.
+   */
+  @throws(classOf[TimeoutException])
+  private[spark] def waitUntilEmpty(): Unit = {
+    waitUntilEmpty(TimeUnit.SECONDS.toMillis(10))
+  }
+
   /**
    * For testing only. Wait until there are no more events in the queue, or until the specified
    * time has elapsed. Throw `TimeoutException` if the specified time elapsed before the queue
@@ -215,10 +226,8 @@ private[spark] class LiveListenerBus(conf: SparkConf) {
       return
     }
 
-    synchronized {
-      queues.asScala.foreach(_.stop())
-      queues.clear()
-    }
+    queues.asScala.foreach(_.stop())
+    queues.clear()
   }
 
   // For testing only.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index 64f0a060a247c..7f8893ff3b9d8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -43,6 +43,12 @@ private[spark] sealed trait MapStatus {
    * necessary for correctness, since block fetchers are allowed to skip zero-size blocks.
    */
   def getSizeForBlock(reduceId: Int): Long
+
+  /**
+   * The unique ID of this shuffle map task, if spark.shuffle.useOldFetchProtocol enabled we use
+   * partitionId of the task or taskContext.taskAttemptId is used.
+   */
+  def mapId: Long
 }
 
 
@@ -56,11 +62,14 @@ private[spark] object MapStatus {
     .map(_.conf.get(config.SHUFFLE_MIN_NUM_PARTS_TO_HIGHLY_COMPRESS))
     .getOrElse(config.SHUFFLE_MIN_NUM_PARTS_TO_HIGHLY_COMPRESS.defaultValue.get)
 
-  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): MapStatus = {
+  def apply(
+      loc: BlockManagerId,
+      uncompressedSizes: Array[Long],
+      mapTaskId: Long): MapStatus = {
     if (uncompressedSizes.length > minPartitionsToUseHighlyCompressMapStatus) {
-      HighlyCompressedMapStatus(loc, uncompressedSizes)
+      HighlyCompressedMapStatus(loc, uncompressedSizes, mapTaskId)
     } else {
-      new CompressedMapStatus(loc, uncompressedSizes)
+      new CompressedMapStatus(loc, uncompressedSizes, mapTaskId)
     }
   }
 
@@ -100,16 +109,19 @@ private[spark] object MapStatus {
  *
  * @param loc location where the task is being executed.
  * @param compressedSizes size of the blocks, indexed by reduce partition id.
+ * @param _mapTaskId unique task id for the task
  */
 private[spark] class CompressedMapStatus(
     private[this] var loc: BlockManagerId,
-    private[this] var compressedSizes: Array[Byte])
+    private[this] var compressedSizes: Array[Byte],
+    private[this] var _mapTaskId: Long)
   extends MapStatus with Externalizable {
 
-  protected def this() = this(null, null.asInstanceOf[Array[Byte]])  // For deserialization only
+  // For deserialization only
+  protected def this() = this(null, null.asInstanceOf[Array[Byte]], -1)
 
-  def this(loc: BlockManagerId, uncompressedSizes: Array[Long]) {
-    this(loc, uncompressedSizes.map(MapStatus.compressSize))
+  def this(loc: BlockManagerId, uncompressedSizes: Array[Long], mapTaskId: Long) {
+    this(loc, uncompressedSizes.map(MapStatus.compressSize), mapTaskId)
   }
 
   override def location: BlockManagerId = loc
@@ -118,10 +130,13 @@ private[spark] class CompressedMapStatus(
     MapStatus.decompressSize(compressedSizes(reduceId))
   }
 
+  override def mapId: Long = _mapTaskId
+
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     loc.writeExternal(out)
     out.writeInt(compressedSizes.length)
     out.write(compressedSizes)
+    out.writeLong(_mapTaskId)
   }
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
@@ -129,6 +144,7 @@ private[spark] class CompressedMapStatus(
     val len = in.readInt()
     compressedSizes = new Array[Byte](len)
     in.readFully(compressedSizes)
+    _mapTaskId = in.readLong()
   }
 }
 
@@ -142,20 +158,23 @@ private[spark] class CompressedMapStatus(
  * @param emptyBlocks a bitmap tracking which blocks are empty
  * @param avgSize average size of the non-empty and non-huge blocks
  * @param hugeBlockSizes sizes of huge blocks by their reduceId.
+ * @param _mapTaskId unique task id for the task
  */
 private[spark] class HighlyCompressedMapStatus private (
     private[this] var loc: BlockManagerId,
     private[this] var numNonEmptyBlocks: Int,
     private[this] var emptyBlocks: RoaringBitmap,
     private[this] var avgSize: Long,
-    private[this] var hugeBlockSizes: scala.collection.Map[Int, Byte])
+    private[this] var hugeBlockSizes: scala.collection.Map[Int, Byte],
+    private[this] var _mapTaskId: Long)
   extends MapStatus with Externalizable {
 
   // loc could be null when the default constructor is called during deserialization
-  require(loc == null || avgSize > 0 || hugeBlockSizes.size > 0 || numNonEmptyBlocks == 0,
+  require(loc == null || avgSize > 0 || hugeBlockSizes.size > 0
+    || numNonEmptyBlocks == 0 || _mapTaskId > 0,
     "Average size can only be zero for map stages that produced no output")
 
-  protected def this() = this(null, -1, null, -1, null)  // For deserialization only
+  protected def this() = this(null, -1, null, -1, null, -1)  // For deserialization only
 
   override def location: BlockManagerId = loc
 
@@ -171,6 +190,8 @@ private[spark] class HighlyCompressedMapStatus private (
     }
   }
 
+  override def mapId: Long = _mapTaskId
+
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     loc.writeExternal(out)
     emptyBlocks.writeExternal(out)
@@ -180,6 +201,7 @@ private[spark] class HighlyCompressedMapStatus private (
       out.writeInt(kv._1)
       out.writeByte(kv._2)
     }
+    out.writeLong(_mapTaskId)
   }
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
@@ -195,11 +217,15 @@ private[spark] class HighlyCompressedMapStatus private (
       hugeBlockSizesImpl(block) = size
     }
     hugeBlockSizes = hugeBlockSizesImpl
+    _mapTaskId = in.readLong()
   }
 }
 
 private[spark] object HighlyCompressedMapStatus {
-  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): HighlyCompressedMapStatus = {
+  def apply(
+      loc: BlockManagerId,
+      uncompressedSizes: Array[Long],
+      mapTaskId: Long): HighlyCompressedMapStatus = {
     // We must keep track of which blocks are empty so that we don't report a zero-sized
     // block as being non-empty (or vice-versa) when using the average block size.
     var i = 0
@@ -240,6 +266,6 @@ private[spark] object HighlyCompressedMapStatus {
     emptyBlocks.trim()
     emptyBlocks.runOptimize()
     new HighlyCompressedMapStatus(loc, numNonEmptyBlocks, emptyBlocks, avgSize,
-      hugeBlockSizes)
+      hugeBlockSizes, mapTaskId)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
index f4b0ab10155a2..2e2851eb9070b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
@@ -59,14 +59,14 @@ private[spark] class Pool(
     }
   }
 
-  override def addSchedulable(schedulable: Schedulable) {
+  override def addSchedulable(schedulable: Schedulable): Unit = {
     require(schedulable != null)
     schedulableQueue.add(schedulable)
     schedulableNameToSchedulable.put(schedulable.name, schedulable)
     schedulable.parent = this
   }
 
-  override def removeSchedulable(schedulable: Schedulable) {
+  override def removeSchedulable(schedulable: Schedulable): Unit = {
     schedulableQueue.remove(schedulable)
     schedulableNameToSchedulable.remove(schedulable.name)
   }
@@ -84,10 +84,14 @@ private[spark] class Pool(
     null
   }
 
-  override def executorLost(executorId: String, host: String, reason: ExecutorLossReason) {
+  override def executorLost(executorId: String, host: String, reason: ExecutorLossReason): Unit = {
     schedulableQueue.asScala.foreach(_.executorLost(executorId, host, reason))
   }
 
+  override def executorDecommission(executorId: String): Unit = {
+    schedulableQueue.asScala.foreach(_.executorDecommission(executorId))
+  }
+
   override def checkSpeculatableTasks(minTimeToSpeculation: Int): Boolean = {
     var shouldRevive = false
     for (schedulable <- schedulableQueue.asScala) {
@@ -106,14 +110,14 @@ private[spark] class Pool(
     sortedTaskSetQueue
   }
 
-  def increaseRunningTasks(taskNum: Int) {
+  def increaseRunningTasks(taskNum: Int): Unit = {
     runningTasks += taskNum
     if (parent != null) {
       parent.increaseRunningTasks(taskNum)
     }
   }
 
-  def decreaseRunningTasks(taskNum: Int) {
+  def decreaseRunningTasks(taskNum: Int): Unit = {
     runningTasks -= taskNum
     if (parent != null) {
       parent.decreaseRunningTasks(taskNum)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 226c23733c870..60b6fe7a60915 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 
 import java.io.{EOFException, InputStream, IOException}
 
-import scala.io.Source
+import scala.io.{Codec, Source}
 
 import com.fasterxml.jackson.core.JsonParseException
 import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException
@@ -48,13 +48,15 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
    * @param eventsFilter Filter function to select JSON event strings in the log data stream that
    *        should be parsed and replayed. When not specified, all event strings in the log data
    *        are parsed and replayed.
+   * @return whether it succeeds to replay the log file entirely without error including
+   *         HaltReplayException. false otherwise.
    */
   def replay(
       logData: InputStream,
       sourceName: String,
       maybeTruncated: Boolean = false,
-      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {
-    val lines = Source.fromInputStream(logData).getLines()
+      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Boolean = {
+    val lines = Source.fromInputStream(logData)(Codec.UTF8).getLines()
     replay(lines, sourceName, maybeTruncated, eventsFilter)
   }
 
@@ -66,7 +68,7 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
       lines: Iterator[String],
       sourceName: String,
       maybeTruncated: Boolean,
-      eventsFilter: ReplayEventsFilter): Unit = {
+      eventsFilter: ReplayEventsFilter): Boolean = {
     var currentLine: String = null
     var lineNumber: Int = 0
     val unrecognizedEvents = new scala.collection.mutable.HashSet[String]
@@ -114,15 +116,18 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
             }
         }
       }
+      true
     } catch {
       case e: HaltReplayException =>
         // Just stop replay.
-      case _: EOFException if maybeTruncated =>
+        false
+      case _: EOFException if maybeTruncated => false
       case ioe: IOException =>
         throw ioe
       case e: Exception =>
         logError(s"Exception parsing Spark event log: $sourceName", e)
         logError(s"Malformed line #$lineNumber: $currentLine\n")
+        false
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultStage.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultStage.scala
index d1687830ff7bf..7fdc3186e86bd 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultStage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultStage.scala
@@ -34,8 +34,9 @@ private[spark] class ResultStage(
     val partitions: Array[Int],
     parents: List[Stage],
     firstJobId: Int,
-    callSite: CallSite)
-  extends Stage(id, rdd, partitions.length, parents, firstJobId, callSite) {
+    callSite: CallSite,
+    resourceProfileId: Int)
+  extends Stage(id, rdd, partitions.length, parents, firstJobId, callSite, resourceProfileId) {
 
   /**
    * The active job for this result stage. Will be empty if the job has already finished
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala b/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala
index b6f88ed0a93aa..8cc239c81d11a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala
@@ -43,6 +43,7 @@ private[spark] trait Schedulable {
   def removeSchedulable(schedulable: Schedulable): Unit
   def getSchedulableByName(name: String): Schedulable
   def executorLost(executorId: String, host: String, reason: ExecutorLossReason): Unit
+  def executorDecommission(executorId: String): Unit
   def checkSpeculatableTasks(minTimeToSpeculation: Int): Boolean
   def getSortedTaskSetQueue: ArrayBuffer[TaskSetManager]
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
index c85c74f2fb973..8f6a22177a5b8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
@@ -45,11 +45,11 @@ private[spark] trait SchedulableBuilder {
 private[spark] class FIFOSchedulableBuilder(val rootPool: Pool)
   extends SchedulableBuilder with Logging {
 
-  override def buildPools() {
+  override def buildPools(): Unit = {
     // nothing
   }
 
-  override def addTaskSetManager(manager: Schedulable, properties: Properties) {
+  override def addTaskSetManager(manager: Schedulable, properties: Properties): Unit = {
     rootPool.addSchedulable(manager)
   }
 }
@@ -70,7 +70,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
   val DEFAULT_MINIMUM_SHARE = 0
   val DEFAULT_WEIGHT = 1
 
-  override def buildPools() {
+  override def buildPools(): Unit = {
     var fileData: Option[(InputStream, String)] = None
     try {
       fileData = schedulerAllocFile.map { f =>
@@ -106,7 +106,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
     buildDefaultPool()
   }
 
-  private def buildDefaultPool() {
+  private def buildDefaultPool(): Unit = {
     if (rootPool.getSchedulableByName(DEFAULT_POOL_NAME) == null) {
       val pool = new Pool(DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE,
         DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
@@ -116,7 +116,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
     }
   }
 
-  private def buildFairSchedulerPool(is: InputStream, fileName: String) {
+  private def buildFairSchedulerPool(is: InputStream, fileName: String): Unit = {
     val xml = XML.load(is)
     for (poolNode <- (xml \\ POOLS_PROPERTY)) {
 
@@ -180,7 +180,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
     }
   }
 
-  override def addTaskSetManager(manager: Schedulable, properties: Properties) {
+  override def addTaskSetManager(manager: Schedulable, properties: Properties): Unit = {
     val poolName = if (properties != null) {
         properties.getProperty(FAIR_SCHEDULER_PROPERTIES, DEFAULT_POOL_NAME)
       } else {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
index 9159d2a0158d5..4752353046c19 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
@@ -27,6 +27,9 @@ private[spark] trait SchedulerBackend {
 
   def start(): Unit
   def stop(): Unit
+  /**
+   * Update the current offers and schedule tasks
+   */
   def reviveOffers(): Unit
   def defaultParallelism(): Int
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapStage.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapStage.scala
index 1b44d0aee3195..be1984de9837f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapStage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapStage.scala
@@ -42,8 +42,9 @@ private[spark] class ShuffleMapStage(
     firstJobId: Int,
     callSite: CallSite,
     val shuffleDep: ShuffleDependency[_, _, _],
-    mapOutputTrackerMaster: MapOutputTrackerMaster)
-  extends Stage(id, rdd, numTasks, parents, firstJobId, callSite) {
+    mapOutputTrackerMaster: MapOutputTrackerMaster,
+    resourceProfileId: Int)
+  extends Stage(id, rdd, numTasks, parents, firstJobId, callSite, resourceProfileId) {
 
   private[this] var _mapStageJobs: List[ActiveJob] = Nil
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 710f5eb211dde..4c0c30a3caf67 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -23,7 +23,7 @@ import java.util.Properties
 
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.rdd.RDD
 
 /**
@@ -91,7 +91,12 @@ private[spark] class ShuffleMapTask(
 
     val rdd = rddAndDep._1
     val dep = rddAndDep._2
-    dep.shuffleWriterProcessor.write(rdd, dep, partitionId, context, partition)
+    // While we use the old shuffle fetch protocol, we use partitionId as mapId in the
+    // ShuffleBlockId construction.
+    val mapId = if (SparkEnv.get.conf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)) {
+      partitionId
+    } else context.taskAttemptId()
+    dep.shuffleWriterProcessor.write(rdd, dep, mapId, context, partition)
   }
 
   override def preferredLocations: Seq[TaskLocation] = preferredLocs
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
index 26cca334d3bd5..ae7924d66a301 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.HashSet
 
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.Logging
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{DeterministicLevel, RDD}
 import org.apache.spark.util.CallSite
 
 /**
@@ -59,7 +59,8 @@ private[scheduler] abstract class Stage(
     val numTasks: Int,
     val parents: List[Stage],
     val firstJobId: Int,
-    val callSite: CallSite)
+    val callSite: CallSite,
+    val resourceProfileId: Int)
   extends Logging {
 
   val numPartitions = rdd.partitions.length
@@ -79,7 +80,8 @@ private[scheduler] abstract class Stage(
    * StageInfo to tell SparkListeners when a job starts (which happens before any stage attempts
    * have been created).
    */
-  private var _latestInfo: StageInfo = StageInfo.fromStage(this, nextAttemptId)
+  private var _latestInfo: StageInfo =
+    StageInfo.fromStage(this, nextAttemptId, resourceProfileId = resourceProfileId)
 
   /**
    * Set of stage attempt IDs that have failed. We keep track of these failures in order to avoid
@@ -100,7 +102,8 @@ private[scheduler] abstract class Stage(
     val metrics = new TaskMetrics
     metrics.register(rdd.sparkContext)
     _latestInfo = StageInfo.fromStage(
-      this, nextAttemptId, Some(numPartitionsToCompute), metrics, taskLocalityPreferences)
+      this, nextAttemptId, Some(numPartitionsToCompute), metrics, taskLocalityPreferences,
+      resourceProfileId = resourceProfileId)
     nextAttemptId += 1
   }
 
@@ -116,4 +119,8 @@ private[scheduler] abstract class Stage(
 
   /** Returns the sequence of partition ids that are missing (i.e. needs to be computed). */
   def findMissingPartitions(): Seq[Int]
+
+  def isIndeterminate: Boolean = {
+    rdd.outputDeterministicLevel == DeterministicLevel.INDETERMINATE
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index e3216151462bd..556478d83cf39 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -38,7 +38,8 @@ class StageInfo(
     val details: String,
     val taskMetrics: TaskMetrics = null,
     private[spark] val taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty,
-    private[spark] val shuffleDepId: Option[Int] = None) {
+    private[spark] val shuffleDepId: Option[Int] = None,
+    val resourceProfileId: Int) {
   /** When this stage was submitted from the DAGScheduler to a TaskScheduler. */
   var submissionTime: Option[Long] = None
   /** Time when all tasks in the stage completed or when the stage was cancelled. */
@@ -52,7 +53,7 @@ class StageInfo(
    */
   val accumulables = HashMap[Long, AccumulableInfo]()
 
-  def stageFailed(reason: String) {
+  def stageFailed(reason: String): Unit = {
     failureReason = Some(reason)
     completionTime = Some(System.currentTimeMillis)
   }
@@ -87,7 +88,8 @@ private[spark] object StageInfo {
       attemptId: Int,
       numTasks: Option[Int] = None,
       taskMetrics: TaskMetrics = null,
-      taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty
+      taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty,
+      resourceProfileId: Int
     ): StageInfo = {
     val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd)
     val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos
@@ -105,6 +107,7 @@ private[spark] object StageInfo {
       stage.details,
       taskMetrics,
       taskLocalityPreferences,
-      shuffleDepId)
+      shuffleDepId,
+      resourceProfileId)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala b/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
index 3c7af4f6146fa..ca48775e77f27 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
@@ -36,7 +36,7 @@ class StatsReportListener extends SparkListener with Logging {
 
   private val taskInfoMetrics = mutable.Buffer[(TaskInfo, TaskMetrics)]()
 
-  override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+  override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
     val info = taskEnd.taskInfo
     val metrics = taskEnd.taskMetrics
     if (info != null && metrics != null) {
@@ -44,7 +44,7 @@ class StatsReportListener extends SparkListener with Logging {
     }
   }
 
-  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) {
+  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
     implicit val sc = stageCompleted
     this.logInfo(s"Finished stage: ${getStatusDetail(stageCompleted.stageInfo)}")
     showMillisDistribution("task runtime:", (info, _) => info.duration, taskInfoMetrics)
@@ -108,7 +108,7 @@ private[spark] object StatsReportListener extends Logging {
       (info, metric) => { getMetric(info, metric).toDouble })
   }
 
-  def showDistribution(heading: String, d: Distribution, formatNumber: Double => String) {
+  def showDistribution(heading: String, d: Distribution, formatNumber: Double => String): Unit = {
     val stats = d.statCounter
     val quantiles = d.getQuantiles(probabilities).map(formatNumber)
     logInfo(heading + stats)
@@ -119,11 +119,11 @@ private[spark] object StatsReportListener extends Logging {
   def showDistribution(
       heading: String,
       dOpt: Option[Distribution],
-      formatNumber: Double => String) {
+      formatNumber: Double => String): Unit = {
     dOpt.foreach { d => showDistribution(heading, d, formatNumber)}
   }
 
-  def showDistribution(heading: String, dOpt: Option[Distribution], format: String) {
+  def showDistribution(heading: String, dOpt: Option[Distribution], format: String): Unit = {
     def f(d: Double): String = format.format(d)
     showDistribution(heading, dOpt, f _)
   }
@@ -132,26 +132,26 @@ private[spark] object StatsReportListener extends Logging {
       heading: String,
       format: String,
       getMetric: (TaskInfo, TaskMetrics) => Double,
-      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]) {
+      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]): Unit = {
     showDistribution(heading, extractDoubleDistribution(taskInfoMetrics, getMetric), format)
   }
 
   def showBytesDistribution(
       heading: String,
       getMetric: (TaskInfo, TaskMetrics) => Long,
-      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]) {
+      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]): Unit = {
     showBytesDistribution(heading, extractLongDistribution(taskInfoMetrics, getMetric))
   }
 
-  def showBytesDistribution(heading: String, dOpt: Option[Distribution]) {
+  def showBytesDistribution(heading: String, dOpt: Option[Distribution]): Unit = {
     dOpt.foreach { dist => showBytesDistribution(heading, dist) }
   }
 
-  def showBytesDistribution(heading: String, dist: Distribution) {
+  def showBytesDistribution(heading: String, dist: Distribution): Unit = {
     showDistribution(heading, dist, (d => Utils.bytesToString(d.toLong)): Double => String)
   }
 
-  def showMillisDistribution(heading: String, dOpt: Option[Distribution]) {
+  def showMillisDistribution(heading: String, dOpt: Option[Distribution]): Unit = {
     showDistribution(heading, dOpt,
       (d => StatsReportListener.millisToString(d.toLong)): Double => String)
   }
@@ -159,7 +159,7 @@ private[spark] object StatsReportListener extends Logging {
   def showMillisDistribution(
       heading: String,
       getMetric: (TaskInfo, TaskMetrics) => Long,
-      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]) {
+      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]): Unit = {
     showMillisDistribution(heading, extractLongDistribution(taskInfoMetrics, getMetric))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 01828f860bd5e..ebc1c05435fee 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -225,7 +225,7 @@ private[spark] abstract class Task[T](
    * be called multiple times.
    * If interruptThread is true, we will also call Thread.interrupt() on the Task's executor thread.
    */
-  def kill(interruptThread: Boolean, reason: String) {
+  def kill(interruptThread: Boolean, reason: String): Unit = {
     require(reason != null)
     _reasonIfKilled = reason
     if (context != null) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
index 9843eab4f1346..921562bd15dae 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
@@ -70,11 +70,11 @@ class TaskInfo(
 
   var killed = false
 
-  private[spark] def markGettingResult(time: Long) {
+  private[spark] def markGettingResult(time: Long): Unit = {
     gettingResultTime = time
   }
 
-  private[spark] def markFinished(state: TaskState, time: Long) {
+  private[spark] def markFinished(state: TaskState, time: Long): Unit = {
     // finishTime should be set larger than 0, otherwise "finished" below will return false.
     assert(time > 0)
     finishTime = time
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 9b7f901c55e00..6c3d2a4ee3125 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -64,6 +64,9 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
           val (result, size) = serializer.get().deserialize[TaskResult[_]](serializedData) match {
             case directResult: DirectTaskResult[_] =>
               if (!taskSetManager.canFetchMoreResults(serializedData.limit())) {
+                // kill the task so that it will not become zombie task
+                scheduler.handleFailedTask(taskSetManager, tid, TaskState.KILLED, TaskKilled(
+                  "Tasks result size has exceeded maxResultSize"))
                 return
               }
               // deserialize "value" without holding any lock so that it won't block other threads.
@@ -75,6 +78,9 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
               if (!taskSetManager.canFetchMoreResults(size)) {
                 // dropped by executor if size is larger than maxResultSize
                 sparkEnv.blockManager.master.removeBlock(blockId)
+                // kill the task so that it will not become zombie task
+                scheduler.handleFailedTask(taskSetManager, tid, TaskState.KILLED, TaskKilled(
+                  "Tasks result size has exceeded maxResultSize"))
                 return
               }
               logDebug("Fetching indirect task result for TID %s".format(tid))
@@ -125,7 +131,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
   }
 
   def enqueueFailedTask(taskSetManager: TaskSetManager, tid: Long, taskState: TaskState,
-    serializedData: ByteBuffer) {
+    serializedData: ByteBuffer): Unit = {
     var reason : TaskFailedReason = UnknownReason
     try {
       getTaskResultExecutor.execute(() => Utils.logUncaughtExceptions {
@@ -164,7 +170,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
     })
   }
 
-  def stop() {
+  def stop(): Unit = {
     getTaskResultExecutor.shutdownNow()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 8c73d563043c2..e9e638a3645ac 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -46,7 +46,7 @@ private[spark] trait TaskScheduler {
   // Invoked after system has successfully initialized (typically in spark context).
   // Yarn uses this to bootstrap allocation of resources based on preferred locations,
   // wait for slave registrations, etc.
-  def postStartHook() { }
+  def postStartHook(): Unit = { }
 
   // Disconnect from the cluster.
   def stop(): Unit
@@ -72,7 +72,7 @@ private[spark] trait TaskScheduler {
 
   // Notify the corresponding `TaskSetManager`s of the stage, that a partition has already completed
   // and they can skip running tasks for it.
-  def notifyPartitionCompletion(stageId: Int, partitionId: Int)
+  def notifyPartitionCompletion(stageId: Int, partitionId: Int): Unit
 
   // Set the DAG scheduler for upcalls. This is guaranteed to be set before submitTasks is called.
   def setDAGScheduler(dagScheduler: DAGScheduler): Unit
@@ -98,6 +98,11 @@ private[spark] trait TaskScheduler {
    */
   def applicationId(): String = appId
 
+  /**
+   * Process a decommissioning executor.
+   */
+  def executorDecommission(executorId: String): Unit
+
   /**
    * Process a lost executor
    */
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 1496dff31a4dc..1b197c4cca53e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -170,11 +170,11 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  override def setDAGScheduler(dagScheduler: DAGScheduler) {
+  override def setDAGScheduler(dagScheduler: DAGScheduler): Unit = {
     this.dagScheduler = dagScheduler
   }
 
-  def initialize(backend: SchedulerBackend) {
+  def initialize(backend: SchedulerBackend): Unit = {
     this.backend = backend
     schedulableBuilder = {
       schedulingMode match {
@@ -192,7 +192,7 @@ private[spark] class TaskSchedulerImpl(
 
   def newTaskId(): Long = nextTaskId.getAndIncrement()
 
-  override def start() {
+  override def start(): Unit = {
     backend.start()
 
     if (!isLocal && conf.get(SPECULATION_ENABLED)) {
@@ -203,11 +203,11 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  override def postStartHook() {
+  override def postStartHook(): Unit = {
     waitBackendReady()
   }
 
-  override def submitTasks(taskSet: TaskSet) {
+  override def submitTasks(taskSet: TaskSet): Unit = {
     val tasks = taskSet.tasks
     logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
     this.synchronized {
@@ -233,7 +233,7 @@ private[spark] class TaskSchedulerImpl(
 
       if (!isLocal && !hasReceivedTask) {
         starvationTimer.scheduleAtFixedRate(new TimerTask() {
-          override def run() {
+          override def run(): Unit = {
             if (!hasLaunchedTask) {
               logWarning("Initial job has not accepted any resources; " +
                 "check your cluster UI to ensure that workers are registered " +
@@ -384,7 +384,9 @@ private[spark] class TaskSchedulerImpl(
    */
   private def resourcesMeetTaskRequirements(resources: Map[String, Buffer[String]]): Boolean = {
     val resourcesFree = resources.map(r => r._1 -> r._2.length)
-    ResourceUtils.resourcesMeetRequirements(resourcesFree, resourcesReqsPerTask)
+    val meetsReqs = ResourceUtils.resourcesMeetRequirements(resourcesFree, resourcesReqsPerTask)
+    logDebug(s"Resources meet task requirements is: $meetsReqs")
+    meetsReqs
   }
 
   /**
@@ -430,8 +432,7 @@ private[spark] class TaskSchedulerImpl(
     val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores / CPUS_PER_TASK))
     val availableResources = shuffledOffers.map(_.resources).toArray
     val availableCpus = shuffledOffers.map(o => o.cores).toArray
-    val availableSlots = shuffledOffers.map(o => o.cores / CPUS_PER_TASK).sum
-    val sortedTaskSets = rootPool.getSortedTaskSetQueue
+    val sortedTaskSets = rootPool.getSortedTaskSetQueue.filterNot(_.isZombie)
     for (taskSet <- sortedTaskSets) {
       logDebug("parentName: %s, name: %s, runningTasks: %s".format(
         taskSet.parent.name, taskSet.name, taskSet.runningTasks))
@@ -444,6 +445,7 @@ private[spark] class TaskSchedulerImpl(
     // of locality levels so that it gets a chance to launch local tasks on all of them.
     // NOTE: the preferredLocality order: PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY
     for (taskSet <- sortedTaskSets) {
+      val availableSlots = availableCpus.map(c => c / CPUS_PER_TASK).sum
       // Skip the barrier taskSet if the available slots are less than the number of pending tasks.
       if (taskSet.isBarrier && availableSlots < taskSet.numTasks) {
         // Skip the launch process.
@@ -572,7 +574,7 @@ private[spark] class TaskSchedulerImpl(
     Random.shuffle(offers)
   }
 
-  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer): Unit = {
     var failedExecutor: Option[String] = None
     var reason: Option[ExecutorLossReason] = None
     synchronized {
@@ -681,7 +683,7 @@ private[spark] class TaskSchedulerImpl(
     })
   }
 
-  def error(message: String) {
+  def error(message: String): Unit = {
     synchronized {
       if (taskSetsByStageIdAndAttempt.nonEmpty) {
         // Have each task set throw a SparkException with the error
@@ -704,7 +706,7 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     speculationScheduler.shutdown()
     if (backend != null) {
       backend.stop()
@@ -722,7 +724,7 @@ private[spark] class TaskSchedulerImpl(
   override def defaultParallelism(): Int = backend.defaultParallelism()
 
   // Check for speculatable tasks in all our active jobs.
-  def checkSpeculatableTasks() {
+  def checkSpeculatableTasks(): Unit = {
     var shouldRevive = false
     synchronized {
       shouldRevive = rootPool.checkSpeculatableTasks(MIN_TIME_TO_SPECULATION)
@@ -732,6 +734,11 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
+  override def executorDecommission(executorId: String): Unit = {
+    rootPool.executorDecommission(executorId)
+    backend.reviveOffers()
+  }
+
   override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {
     var failedExecutor: Option[String] = None
 
@@ -798,7 +805,7 @@ private[spark] class TaskSchedulerImpl(
    * reason is not yet known, do not yet remove its association with its host nor update the status
    * of any running tasks, since the loss reason defines whether we'll fail those tasks.
    */
-  private def removeExecutor(executorId: String, reason: ExecutorLossReason) {
+  private def removeExecutor(executorId: String, reason: ExecutorLossReason): Unit = {
     // The tasks on the lost executor may not send any more status updates (because the executor
     // has been lost), so they should be cleaned up here.
     executorIdToRunningTaskIds.remove(executorId).foreach { taskIds =>
@@ -829,7 +836,7 @@ private[spark] class TaskSchedulerImpl(
     blacklistTrackerOpt.foreach(_.handleRemovedExecutor(executorId))
   }
 
-  def executorAdded(execId: String, host: String) {
+  def executorAdded(execId: String, host: String): Unit = {
     dagScheduler.executorAdded(execId, host)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala
index b680979a466a5..4df2889089ee9 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala
@@ -69,7 +69,6 @@ private[scheduler] class TaskSetBlacklist(
 
   /**
    * Get the most recent failure reason of this TaskSet.
-   * @return
    */
   def getLatestFailureReason: String = {
     latestFailureReason
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 49bd55e553482..18684ee8ebbc2 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -81,6 +81,19 @@ private[spark] class TaskSetManager(
   val speculationQuantile = conf.get(SPECULATION_QUANTILE)
   val speculationMultiplier = conf.get(SPECULATION_MULTIPLIER)
   val minFinishedForSpeculation = math.max((speculationQuantile * numTasks).floor.toInt, 1)
+  // User provided threshold for speculation regardless of whether the quantile has been reached
+  val speculationTaskDurationThresOpt = conf.get(SPECULATION_TASK_DURATION_THRESHOLD)
+  // SPARK-29976: Only when the total number of tasks in the stage is less than or equal to the
+  // number of slots on a single executor, would the task manager speculative run the tasks if
+  // their duration is longer than the given threshold. In this way, we wouldn't speculate too
+  // aggressively but still handle basic cases.
+  // SPARK-30417: #cores per executor might not be set in spark conf for standalone mode, then
+  // the value of the conf would 1 by default. However, the executor would use all the cores on
+  // the worker. Therefore, CPUS_PER_TASK is okay to be greater than 1 without setting #cores.
+  // To handle this case, we assume the minimum number of slots is 1.
+  // TODO: use the actual number of slots for standalone mode.
+  val speculationTasksLessEqToSlots =
+    numTasks <= Math.max(conf.get(EXECUTOR_CORES) / sched.CPUS_PER_TASK, 1)
 
   // For each task, tracks whether a copy of the task has succeeded. A task will also be
   // marked as "succeeded" if it failed with a fetch failure, in which case it should not
@@ -216,6 +229,8 @@ private[spark] class TaskSetManager(
       index: Int,
       resolveRacks: Boolean = true,
       speculatable: Boolean = false): Unit = {
+    // A zombie TaskSetManager may reach here while handling failed task.
+    if (isZombie) return
     val pendingTaskSetToAddTo = if (speculatable) pendingSpeculatableTasks else pendingTasks
     for (loc <- tasks(index).preferredLocations) {
       loc match {
@@ -474,7 +489,7 @@ private[spark] class TaskSetManager(
     }
   }
 
-  private def maybeFinishTaskSet() {
+  private def maybeFinishTaskSet(): Unit = {
     if (isZombie && runningTasks == 0) {
       sched.taskSetFinished(this)
       if (tasksSuccessful == numTasks) {
@@ -758,7 +773,7 @@ private[spark] class TaskSetManager(
    * Marks the task as failed, re-adds it to the list of pending tasks, and notifies the
    * DAG Scheduler.
    */
-  def handleFailedTask(tid: Long, state: TaskState, reason: TaskFailedReason) {
+  def handleFailedTask(tid: Long, state: TaskState, reason: TaskFailedReason): Unit = {
     val info = taskInfos(tid)
     if (info.failed || info.killed) {
       return
@@ -799,6 +814,15 @@ private[spark] class TaskSetManager(
             info.id, taskSet.id, tid, ef.description))
           return
         }
+        if (ef.className == classOf[TaskOutputFileAlreadyExistException].getName) {
+          // If we can not write to output file in the task, there's no point in trying to
+          // re-execute it.
+          logError("Task %s in stage %s (TID %d) can not write to output file: %s; not retrying"
+            .format(info.id, taskSet.id, tid, ef.description))
+          abort("Task %s in stage %s (TID %d) can not write to output file: %s".format(
+            info.id, taskSet.id, tid, ef.description))
+          return
+        }
         val key = ef.description
         val now = clock.getTimeMillis()
         val (printFull, dupCount) = {
@@ -886,14 +910,14 @@ private[spark] class TaskSetManager(
    *
    * Used to keep track of the number of running tasks, for enforcing scheduling policies.
    */
-  def addRunningTask(tid: Long) {
+  def addRunningTask(tid: Long): Unit = {
     if (runningTasksSet.add(tid) && parent != null) {
       parent.increaseRunningTasks(1)
     }
   }
 
   /** If the given task ID is in the set of running tasks, removes it. */
-  def removeRunningTask(tid: Long) {
+  def removeRunningTask(tid: Long): Unit = {
     if (runningTasksSet.remove(tid) && parent != null) {
       parent.decreaseRunningTasks(1)
     }
@@ -903,9 +927,9 @@ private[spark] class TaskSetManager(
     null
   }
 
-  override def addSchedulable(schedulable: Schedulable) {}
+  override def addSchedulable(schedulable: Schedulable): Unit = {}
 
-  override def removeSchedulable(schedulable: Schedulable) {}
+  override def removeSchedulable(schedulable: Schedulable): Unit = {}
 
   override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
     val sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]()
@@ -914,7 +938,7 @@ private[spark] class TaskSetManager(
   }
 
   /** Called by TaskScheduler when an executor is lost so we can re-enqueue our tasks */
-  override def executorLost(execId: String, host: String, reason: ExecutorLossReason) {
+  override def executorLost(execId: String, host: String, reason: ExecutorLossReason): Unit = {
     // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage,
     // and we are not using an external shuffle server which could serve the shuffle outputs.
     // The reason is the next stage wouldn't be able to fetch the data from this dead executor
@@ -923,7 +947,10 @@ private[spark] class TaskSetManager(
         && !isZombie) {
       for ((tid, info) <- taskInfos if info.executorId == execId) {
         val index = taskInfos(tid).index
-        if (successful(index) && !killedByOtherAttempt.contains(tid)) {
+        // We may have a running task whose partition has been marked as successful,
+        // this partition has another task completed in another stage attempt.
+        // We treat it as a running task and will call handleFailedTask later.
+        if (successful(index) && !info.running && !killedByOtherAttempt.contains(tid)) {
           successful(index) = false
           copiesRunning(index) -= 1
           tasksSuccessful -= 1
@@ -948,15 +975,41 @@ private[spark] class TaskSetManager(
     recomputeLocality()
   }
 
+  /**
+   * Check if the task associated with the given tid has past the time threshold and should be
+   * speculative run.
+   */
+  private def checkAndSubmitSpeculatableTask(
+      tid: Long,
+      currentTimeMillis: Long,
+      threshold: Double): Boolean = {
+    val info = taskInfos(tid)
+    val index = info.index
+    if (!successful(index) && copiesRunning(index) == 1 &&
+        info.timeRunning(currentTimeMillis) > threshold && !speculatableTasks.contains(index)) {
+      addPendingTask(index, speculatable = true)
+      logInfo(
+        ("Marking task %d in stage %s (on %s) as speculatable because it ran more" +
+          " than %.0f ms(%d speculatable tasks in this taskset now)")
+          .format(index, taskSet.id, info.host, threshold, speculatableTasks.size + 1))
+      speculatableTasks += index
+      sched.dagScheduler.speculativeTaskSubmitted(tasks(index))
+      true
+    } else {
+      false
+    }
+  }
+
   /**
    * Check for tasks to be speculated and return true if there are any. This is called periodically
    * by the TaskScheduler.
    *
    */
   override def checkSpeculatableTasks(minTimeToSpeculation: Int): Boolean = {
-    // Can't speculate if we only have one task, and no need to speculate if the task set is a
-    // zombie or is from a barrier stage.
-    if (isZombie || isBarrier || numTasks == 1) {
+    // No need to speculate if the task set is zombie or is from a barrier stage. If there is only
+    // one task we don't speculate since we don't have metrics to decide whether it's taking too
+    // long or not, unless a task duration threshold is explicitly provided.
+    if (isZombie || isBarrier || (numTasks == 1 && !speculationTaskDurationThresOpt.isDefined)) {
       return false
     }
     var foundTasks = false
@@ -974,19 +1027,14 @@ private[spark] class TaskSetManager(
       // bound based on that.
       logDebug("Task length threshold for speculation: " + threshold)
       for (tid <- runningTasksSet) {
-        val info = taskInfos(tid)
-        val index = info.index
-        if (!successful(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold &&
-            !speculatableTasks.contains(index)) {
-          addPendingTask(index, speculatable = true)
-          logInfo(
-            ("Marking task %d in stage %s (on %s) as speculatable because it ran more" +
-            " than %.0f ms(%d speculatable tasks in this taskset now)")
-            .format(index, taskSet.id, info.host, threshold, speculatableTasks.size + 1))
-          speculatableTasks += index
-          sched.dagScheduler.speculativeTaskSubmitted(tasks(index))
-          foundTasks = true
-        }
+        foundTasks |= checkAndSubmitSpeculatableTask(tid, time, threshold)
+      }
+    } else if (speculationTaskDurationThresOpt.isDefined && speculationTasksLessEqToSlots) {
+      val time = clock.getTimeMillis()
+      val threshold = speculationTaskDurationThresOpt.get
+      logDebug(s"Tasks taking longer time than provided speculation threshold: $threshold")
+      for (tid <- runningTasksSet) {
+        foundTasks |= checkAndSubmitSpeculatableTask(tid, time, threshold)
       }
     }
     foundTasks
@@ -1035,14 +1083,22 @@ private[spark] class TaskSetManager(
     levels.toArray
   }
 
-  def recomputeLocality() {
+  def executorDecommission(execId: String): Unit = {
+    recomputeLocality()
+    // Future consideration: if an executor is decommissioned it may make sense to add the current
+    // tasks to the spec exec queue.
+  }
+
+  def recomputeLocality(): Unit = {
+    // A zombie TaskSetManager may reach here while executorLost happens
+    if (isZombie) return
     val previousLocalityLevel = myLocalityLevels(currentLocalityIndex)
     myLocalityLevels = computeValidLocalityLevels()
     localityWaits = myLocalityLevels.map(getLocalityWait)
     currentLocalityIndex = getLocalityIndex(previousLocalityLevel)
   }
 
-  def executorAdded() {
+  def executorAdded(): Unit = {
     recomputeLocality()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index a90fff02ac73d..8db0122f17ab4 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -20,7 +20,7 @@ package org.apache.spark.scheduler.cluster
 import java.nio.ByteBuffer
 
 import org.apache.spark.TaskState.TaskState
-import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.resource.{ResourceInformation, ResourceProfile}
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler.ExecutorLossReason
 import org.apache.spark.util.SerializableBuffer
@@ -29,12 +29,13 @@ private[spark] sealed trait CoarseGrainedClusterMessage extends Serializable
 
 private[spark] object CoarseGrainedClusterMessages {
 
-  case object RetrieveSparkAppConfig extends CoarseGrainedClusterMessage
+  case class RetrieveSparkAppConfig(resourceProfileId: Int) extends CoarseGrainedClusterMessage
 
   case class SparkAppConfig(
       sparkProperties: Seq[(String, String)],
       ioEncryptionKey: Option[Array[Byte]],
-      hadoopDelegationCreds: Option[Array[Byte]])
+      hadoopDelegationCreds: Option[Array[Byte]],
+      resourceProfile: ResourceProfile)
     extends CoarseGrainedClusterMessage
 
   case object RetrieveLastAllocatedExecutorId extends CoarseGrainedClusterMessage
@@ -48,13 +49,6 @@ private[spark] object CoarseGrainedClusterMessages {
   case class KillExecutorsOnHost(host: String)
     extends CoarseGrainedClusterMessage
 
-  sealed trait RegisterExecutorResponse
-
-  case object RegisteredExecutor extends CoarseGrainedClusterMessage with RegisterExecutorResponse
-
-  case class RegisterExecutorFailed(message: String) extends CoarseGrainedClusterMessage
-    with RegisterExecutorResponse
-
   case class UpdateDelegationTokens(tokens: Array[Byte])
     extends CoarseGrainedClusterMessage
 
@@ -66,9 +60,12 @@ private[spark] object CoarseGrainedClusterMessages {
       cores: Int,
       logUrls: Map[String, String],
       attributes: Map[String, String],
-      resources: Map[String, ResourceInformation])
+      resources: Map[String, ResourceInformation],
+      resourceProfileId: Int)
     extends CoarseGrainedClusterMessage
 
+  case class LaunchedExecutor(executorId: String) extends CoarseGrainedClusterMessage
+
   case class StatusUpdate(
       executorId: String,
       taskId: Long,
@@ -97,6 +94,8 @@ private[spark] object CoarseGrainedClusterMessages {
   case class RemoveExecutor(executorId: String, reason: ExecutorLossReason)
     extends CoarseGrainedClusterMessage
 
+  case class DecommissionExecutor(executorId: String)  extends CoarseGrainedClusterMessage
+
   case class RemoveWorker(workerId: String, host: String, message: String)
     extends CoarseGrainedClusterMessage
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index d81070c362ba6..6e1efdaf5beb2 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -33,6 +33,7 @@ import org.apache.spark.executor.ExecutorLogUrlHandler
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Network._
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -68,36 +69,39 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     conf.get(SCHEDULER_MAX_REGISTERED_RESOURCE_WAITING_TIME))
   private val createTimeNs = System.nanoTime()
 
-  // Accessing `executorDataMap` in `DriverEndpoint.receive/receiveAndReply` doesn't need any
-  // protection. But accessing `executorDataMap` out of `DriverEndpoint.receive/receiveAndReply`
-  // must be protected by `CoarseGrainedSchedulerBackend.this`. Besides, `executorDataMap` should
-  // only be modified in `DriverEndpoint.receive/receiveAndReply` with protection by
+  // Accessing `executorDataMap` in the inherited methods from ThreadSafeRpcEndpoint doesn't need
+  // any protection. But accessing `executorDataMap` out of the inherited methods must be
+  // protected by `CoarseGrainedSchedulerBackend.this`. Besides, `executorDataMap` should only
+  // be modified in the inherited methods from ThreadSafeRpcEndpoint with protection by
   // `CoarseGrainedSchedulerBackend.this`.
   private val executorDataMap = new HashMap[String, ExecutorData]
 
-  // Number of executors requested by the cluster manager, [[ExecutorAllocationManager]]
+  // Number of executors for each ResourceProfile requested by the cluster
+  // manager, [[ExecutorAllocationManager]]
   @GuardedBy("CoarseGrainedSchedulerBackend.this")
-  private var requestedTotalExecutors = 0
-
-  // Number of executors requested from the cluster manager that have not registered yet
-  @GuardedBy("CoarseGrainedSchedulerBackend.this")
-  private var numPendingExecutors = 0
+  private val requestedTotalExecutorsPerResourceProfile = new HashMap[ResourceProfile, Int]
 
   private val listenerBus = scheduler.sc.listenerBus
 
   // Executors we have requested the cluster manager to kill that have not died yet; maps
   // the executor ID to whether it was explicitly killed by the driver (and thus shouldn't
-  // be considered an app-related failure).
+  // be considered an app-related failure). Visible for testing only.
   @GuardedBy("CoarseGrainedSchedulerBackend.this")
-  private val executorsPendingToRemove = new HashMap[String, Boolean]
+  private[scheduler] val executorsPendingToRemove = new HashMap[String, Boolean]
+
+  // Executors that have been lost, but for which we don't yet know the real exit reason.
+  private val executorsPendingLossReason = new HashSet[String]
+
+  // Executors which are being decommissioned
+  protected val executorsPendingDecommission = new HashSet[String]
 
-  // A map to store hostname with its possible task number running on it
+  // A map of ResourceProfile id to map of hostname with its possible task number running on it
   @GuardedBy("CoarseGrainedSchedulerBackend.this")
-  protected var hostToLocalTaskCount: Map[String, Int] = Map.empty
+  protected var rpHostToLocalTaskCount: Map[Int, Map[String, Int]] = Map.empty
 
-  // The number of pending tasks which is locality required
+  // The number of pending tasks per ResourceProfile id which is locality required
   @GuardedBy("CoarseGrainedSchedulerBackend.this")
-  protected var localityAwareTasks = 0
+  protected var numLocalityAwareTasksPerResourceProfileId = Map.empty[Int, Int]
 
   // The num of current max ExecutorId used to re-register appMaster
   @volatile protected var currentExecutorIdCounter = 0
@@ -111,13 +115,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   private val reviveThread =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-revive-thread")
 
-  class DriverEndpoint extends ThreadSafeRpcEndpoint with Logging {
+  class DriverEndpoint extends IsolatedRpcEndpoint with Logging {
 
     override val rpcEnv: RpcEnv = CoarseGrainedSchedulerBackend.this.rpcEnv
 
-    // Executors that have been lost, but for which we don't yet know the real exit reason.
-    protected val executorsPendingLossReason = new HashSet[String]
-
     protected val addressToExecutorId = new HashMap[RpcAddress, String]
 
     // Spark configuration sent to executors. This is a lazy val so that subclasses of the
@@ -129,7 +130,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     private val logUrlHandler: ExecutorLogUrlHandler = new ExecutorLogUrlHandler(
       conf.get(UI.CUSTOM_EXECUTOR_LOG_URL))
 
-    override def onStart() {
+    override def onStart(): Unit = {
       // Periodically revive offers to allow delay scheduling to work
       val reviveIntervalMs = conf.get(SCHEDULER_REVIVE_INTERVAL).getOrElse(1000L)
 
@@ -186,22 +187,36 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         // automatically, so try to tell the executor to stop itself. See SPARK-13519.
         executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor))
         removeExecutor(executorId, reason)
+
+      case DecommissionExecutor(executorId) =>
+        logError(s"Received decommission executor message ${executorId}.")
+        decommissionExecutor(executorId)
+
+      case RemoveWorker(workerId, host, message) =>
+        removeWorker(workerId, host, message)
+
+      case LaunchedExecutor(executorId) =>
+        executorDataMap.get(executorId).foreach { data =>
+          data.freeCores = data.totalCores
+        }
+        makeOffers(executorId)
+      case e =>
+        logError(s"Received unexpected message. ${e}")
     }
 
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
 
       case RegisterExecutor(executorId, executorRef, hostname, cores, logUrls,
-          attributes, resources) =>
+          attributes, resources, resourceProfileId) =>
         if (executorDataMap.contains(executorId)) {
-          executorRef.send(RegisterExecutorFailed("Duplicate executor ID: " + executorId))
-          context.reply(true)
-        } else if (scheduler.nodeBlacklist.contains(hostname)) {
+          context.sendFailure(new IllegalStateException(s"Duplicate executor ID: $executorId"))
+        } else if (scheduler.nodeBlacklist.contains(hostname) ||
+            isBlacklisted(executorId, hostname)) {
           // If the cluster manager gives us an executor on a blacklisted node (because it
           // already started allocating those resources before we informed it of our blacklist,
           // or if it ignored our blacklist), then we reject that executor immediately.
           logInfo(s"Rejecting $executorId as it has been blacklisted.")
-          executorRef.send(RegisterExecutorFailed(s"Executor is blacklisted: $executorId"))
-          context.reply(true)
+          context.sendFailure(new IllegalStateException(s"Executor is blacklisted: $executorId"))
         } else {
           // If the executor's rpc env is not listening for incoming connections, `hostPort`
           // will be null, and the client connection should be used to contact the executor.
@@ -210,15 +225,21 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
             } else {
               context.senderAddress
             }
-          logInfo(s"Registered executor $executorRef ($executorAddress) with ID $executorId")
+          logInfo(s"Registered executor $executorRef ($executorAddress) with ID $executorId, " +
+            s" ResourceProfileId $resourceProfileId")
           addressToExecutorId(executorAddress) = executorId
           totalCoreCount.addAndGet(cores)
           totalRegisteredExecutors.addAndGet(1)
-          val resourcesInfo = resources.map{ case (k, v) =>
-            (v.name, new ExecutorResourceInfo(v.name, v.addresses))}
+          val resourcesInfo = resources.map { case (rName, info) =>
+            // tell the executor it can schedule resources up to numParts times,
+            // as configured by the user, or set to 1 as that is the default (1 task/resource)
+            val numParts = scheduler.sc.resourceProfileManager
+              .resourceProfileFromId(resourceProfileId).getNumSlotsPerAddress(rName, conf)
+            (info.name, new ExecutorResourceInfo(info.name, info.addresses, numParts))
+          }
           val data = new ExecutorData(executorRef, executorAddress, hostname,
-            cores, cores, logUrlHandler.applyPattern(logUrls, attributes), attributes,
-            resourcesInfo)
+            0, cores, logUrlHandler.applyPattern(logUrls, attributes), attributes,
+            resourcesInfo, resourceProfileId)
           // This must be synchronized because variables mutated
           // in this block are read when requesting executors
           CoarseGrainedSchedulerBackend.this.synchronized {
@@ -226,17 +247,11 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
             if (currentExecutorIdCounter < executorId.toInt) {
               currentExecutorIdCounter = executorId.toInt
             }
-            if (numPendingExecutors > 0) {
-              numPendingExecutors -= 1
-              logDebug(s"Decremented number of pending executors ($numPendingExecutors left)")
-            }
           }
-          executorRef.send(RegisteredExecutor)
           // Note: some tests expect the reply to come after we put the executor in the map
           context.reply(true)
           listenerBus.post(
             SparkListenerExecutorAdded(System.currentTimeMillis(), executorId, data))
-          makeOffers()
         }
 
       case StopDriver =>
@@ -254,20 +269,29 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         removeWorker(workerId, host, message)
         context.reply(true)
 
-      case RetrieveSparkAppConfig =>
+      case DecommissionExecutor(executorId) =>
+        logError(s"Received decommission executor message ${executorId}.")
+        decommissionExecutor(executorId)
+        context.reply(true)
+
+      case RetrieveSparkAppConfig(resourceProfileId) =>
+        val rp = scheduler.sc.resourceProfileManager.resourceProfileFromId(resourceProfileId)
         val reply = SparkAppConfig(
           sparkProperties,
           SparkEnv.get.securityManager.getIOEncryptionKey(),
-          Option(delegationTokens.get()))
+          Option(delegationTokens.get()),
+          rp)
         context.reply(reply)
+      case e =>
+        logError(s"Received unexpected ask ${e}")
     }
 
     // Make fake resource offers on all executors
-    private def makeOffers() {
+    private def makeOffers(): Unit = {
       // Make sure no executor is killed while some task is launching on it
       val taskDescs = withLock {
         // Filter out executors under killing
-        val activeExecutors = executorDataMap.filterKeys(executorIsAlive)
+        val activeExecutors = executorDataMap.filterKeys(isExecutorActive)
         val workOffers = activeExecutors.map {
           case (id, executorData) =>
             new WorkerOffer(id, executorData.executorHost, executorData.freeCores,
@@ -292,11 +316,11 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
 
     // Make fake resource offers on just one executor
-    private def makeOffers(executorId: String) {
+    private def makeOffers(executorId: String): Unit = {
       // Make sure no executor is killed while some task is launching on it
       val taskDescs = withLock {
         // Filter out executors under killing
-        if (executorIsAlive(executorId)) {
+        if (isExecutorActive(executorId)) {
           val executorData = executorDataMap(executorId)
           val workOffers = IndexedSeq(
             new WorkerOffer(executorId, executorData.executorHost, executorData.freeCores,
@@ -314,13 +338,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       }
     }
 
-    private def executorIsAlive(executorId: String): Boolean = synchronized {
-      !executorsPendingToRemove.contains(executorId) &&
-        !executorsPendingLossReason.contains(executorId)
-    }
-
     // Launch tasks returned by a set of resource offers
-    private def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
+    private def launchTasks(tasks: Seq[Seq[TaskDescription]]): Unit = {
       for (task <- tasks.flatten) {
         val serializedTask = TaskDescription.encode(task)
         if (serializedTask.limit() >= maxRpcMessageSize) {
@@ -365,6 +384,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
             addressToExecutorId -= executorInfo.executorAddress
             executorDataMap -= executorId
             executorsPendingLossReason -= executorId
+            executorsPendingDecommission -= executorId
             executorsPendingToRemove.remove(executorId).getOrElse(false)
           }
           totalCoreCount.addAndGet(-executorInfo.totalCores)
@@ -389,6 +409,35 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       scheduler.workerRemoved(workerId, host, message)
     }
 
+    /**
+     * Mark a given executor as decommissioned and stop making resource offers for it.
+     */
+    private def decommissionExecutor(executorId: String): Boolean = {
+      val shouldDisable = CoarseGrainedSchedulerBackend.this.synchronized {
+        // Only bother decommissioning executors which are alive.
+        if (isExecutorActive(executorId)) {
+          executorsPendingDecommission += executorId
+          true
+        } else {
+          false
+        }
+      }
+
+      if (shouldDisable) {
+        logInfo(s"Starting decommissioning executor $executorId.")
+        try {
+          scheduler.executorDecommission(executorId)
+        } catch {
+          case e: Exception =>
+            logError(s"Unexpected error during decommissioning ${e.toString}", e)
+        }
+        logInfo(s"Finished decommissioning executor $executorId.")
+      } else {
+        logInfo(s"Skipping decommissioning of executor $executorId.")
+      }
+      shouldDisable
+    }
+
     /**
      * Stop making resource offers for the given executor. The executor is marked as lost with
      * the loss reason still pending.
@@ -397,7 +446,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
      */
     protected def disableExecutor(executorId: String): Boolean = {
       val shouldDisable = CoarseGrainedSchedulerBackend.this.synchronized {
-        if (executorIsAlive(executorId)) {
+        if (isExecutorActive(executorId)) {
           executorsPendingLossReason += executorId
           true
         } else {
@@ -420,19 +469,21 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
   protected def minRegisteredRatio: Double = _minRegisteredRatio
 
-  override def start() {
+  override def start(): Unit = {
     if (UserGroupInformation.isSecurityEnabled()) {
       delegationTokenManager = createTokenManager()
       delegationTokenManager.foreach { dtm =>
         val ugi = UserGroupInformation.getCurrentUser()
         val tokens = if (dtm.renewalEnabled) {
           dtm.start()
-        } else if (ugi.hasKerberosCredentials() || SparkHadoopUtil.get.isProxyUser(ugi)) {
+        } else {
           val creds = ugi.getCredentials()
           dtm.obtainDelegationTokens(creds)
-          SparkHadoopUtil.get.serialize(creds)
-        } else {
-          null
+          if (creds.numberOfTokens() > 0 || creds.numberOfSecretKeys() > 0) {
+            SparkHadoopUtil.get.serialize(creds)
+          } else {
+            null
+          }
         }
         if (tokens != null) {
           updateDelegationTokens(tokens)
@@ -443,7 +494,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
   protected def createDriverEndpoint(): DriverEndpoint = new DriverEndpoint()
 
-  def stopExecutors() {
+  def stopExecutors(): Unit = {
     try {
       if (driverEndpoint != null) {
         logInfo("Shutting down all executors")
@@ -455,7 +506,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reviveThread.shutdownNow()
     stopExecutors()
     delegationTokenManager.foreach(_.stop())
@@ -472,12 +523,11 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   /**
    * Reset the state of CoarseGrainedSchedulerBackend to the initial state. Currently it will only
    * be called in the yarn-client mode when AM re-registers after a failure.
+   * Visible for testing only.
    * */
-  protected def reset(): Unit = {
+  protected[scheduler] def reset(): Unit = {
     val executors: Set[String] = synchronized {
-      requestedTotalExecutors = 0
-      numPendingExecutors = 0
-      executorsPendingToRemove.clear()
+      requestedTotalExecutorsPerResourceProfile.clear()
       executorDataMap.keys.toSet
     }
 
@@ -488,12 +538,12 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
   }
 
-  override def reviveOffers() {
+  override def reviveOffers(): Unit = {
     driverEndpoint.send(ReviveOffers)
   }
 
   override def killTask(
-      taskId: Long, executorId: String, interruptThread: Boolean, reason: String) {
+      taskId: Long, executorId: String, interruptThread: Boolean, reason: String): Unit = {
     driverEndpoint.send(KillTask(taskId, executorId, interruptThread, reason))
   }
 
@@ -510,8 +560,17 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   }
 
   protected def removeWorker(workerId: String, host: String, message: String): Unit = {
-    driverEndpoint.ask[Boolean](RemoveWorker(workerId, host, message)).failed.foreach(t =>
-      logError(t.getMessage, t))(ThreadUtils.sameThread)
+    driverEndpoint.send(RemoveWorker(workerId, host, message))
+  }
+
+  /**
+   * Called by subclasses when notified of a decommissioning executor.
+   */
+  private[spark] def decommissionExecutor(executorId: String): Unit = {
+    if (driverEndpoint != null) {
+      logInfo("Propegating executor decommission to driver.")
+      driverEndpoint.send(DecommissionExecutor(executorId))
+    }
   }
 
   def sufficientResourcesRegistered(): Boolean = true
@@ -533,29 +592,42 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   /**
    * Return the number of executors currently registered with this backend.
    */
-  private def numExistingExecutors: Int = executorDataMap.size
+  private def numExistingExecutors: Int = synchronized { executorDataMap.size }
 
-  override def getExecutorIds(): Seq[String] = {
+  override def getExecutorIds(): Seq[String] = synchronized {
     executorDataMap.keySet.toSeq
   }
 
   override def isExecutorActive(id: String): Boolean = synchronized {
-    executorDataMap.contains(id) && !executorsPendingToRemove.contains(id)
+    executorDataMap.contains(id) &&
+      !executorsPendingToRemove.contains(id) &&
+      !executorsPendingLossReason.contains(id) &&
+      !executorsPendingDecommission.contains(id)
+
   }
 
-  override def maxNumConcurrentTasks(): Int = {
+  override def maxNumConcurrentTasks(): Int = synchronized {
     executorDataMap.values.map { executor =>
       executor.totalCores / scheduler.CPUS_PER_TASK
     }.sum
   }
 
   // this function is for testing only
-  def getExecutorAvailableResources(executorId: String): Map[String, ExecutorResourceInfo] = {
+  def getExecutorAvailableResources(
+      executorId: String): Map[String, ExecutorResourceInfo] = synchronized {
     executorDataMap.get(executorId).map(_.resourcesInfo).getOrElse(Map.empty)
   }
 
+  // this function is for testing only
+  def getExecutorResourceProfileId(executorId: String): Int = synchronized {
+    val execDataOption = executorDataMap.get(executorId)
+    execDataOption.map(_.resourceProfileId).getOrElse(ResourceProfile.UNKNOWN_RESOURCE_PROFILE_ID)
+  }
+
   /**
-   * Request an additional number of executors from the cluster manager.
+   * Request an additional number of executors from the cluster manager. This is
+   * requesting against the default ResourceProfile, we will need an API change to
+   * allow against other profiles.
    * @return whether the request is acknowledged.
    */
   final override def requestExecutors(numAdditionalExecutors: Int): Boolean = {
@@ -567,21 +639,11 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     logInfo(s"Requesting $numAdditionalExecutors additional executor(s) from the cluster manager")
 
     val response = synchronized {
-      requestedTotalExecutors += numAdditionalExecutors
-      numPendingExecutors += numAdditionalExecutors
-      logDebug(s"Number of pending executors is now $numPendingExecutors")
-      if (requestedTotalExecutors !=
-          (numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)) {
-        logDebug(
-          s"""requestExecutors($numAdditionalExecutors): Executor request doesn't match:
-             |requestedTotalExecutors  = $requestedTotalExecutors
-             |numExistingExecutors     = $numExistingExecutors
-             |numPendingExecutors      = $numPendingExecutors
-             |executorsPendingToRemove = ${executorsPendingToRemove.size}""".stripMargin)
-      }
-
+      val defaultProf = scheduler.sc.resourceProfileManager.defaultResourceProfile
+      val numExisting = requestedTotalExecutorsPerResourceProfile.getOrElse(defaultProf, 0)
+      requestedTotalExecutorsPerResourceProfile(defaultProf) = numExisting + numAdditionalExecutors
       // Account for executors pending to be added or removed
-      doRequestTotalExecutors(requestedTotalExecutors)
+      doRequestTotalExecutors(requestedTotalExecutorsPerResourceProfile.toMap)
     }
 
     defaultAskTimeout.awaitResult(response)
@@ -590,39 +652,41 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   /**
    * Update the cluster manager on our scheduling needs. Three bits of information are included
    * to help it make decisions.
-   * @param numExecutors The total number of executors we'd like to have. The cluster manager
-   *                     shouldn't kill any running executor to reach this number, but,
-   *                     if all existing executors were to die, this is the number of executors
-   *                     we'd want to be allocated.
-   * @param localityAwareTasks The number of tasks in all active stages that have a locality
-   *                           preferences. This includes running, pending, and completed tasks.
+   * @param resourceProfileToNumExecutors The total number of executors we'd like to have per
+   *                                      ResourceProfile. The cluster manager shouldn't kill any
+   *                                      running executor to reach this number, but, if all
+   *                                      existing executors were to die, this is the number
+   *                                      of executors we'd want to be allocated.
+   * @param numLocalityAwareTasksPerResourceProfileId The number of tasks in all active stages that
+   *                                                  have a locality preferences per
+   *                                                  ResourceProfile. This includes running,
+   *                                                  pending, and completed tasks.
    * @param hostToLocalTaskCount A map of hosts to the number of tasks from all active stages
    *                             that would like to like to run on that host.
    *                             This includes running, pending, and completed tasks.
    * @return whether the request is acknowledged by the cluster manager.
    */
   final override def requestTotalExecutors(
-      numExecutors: Int,
-      localityAwareTasks: Int,
-      hostToLocalTaskCount: Map[String, Int]
-    ): Boolean = {
-    if (numExecutors < 0) {
+      resourceProfileIdToNumExecutors: Map[Int, Int],
+      numLocalityAwareTasksPerResourceProfileId: Map[Int, Int],
+      hostToLocalTaskCount: Map[Int, Map[String, Int]]
+  ): Boolean = {
+    val totalExecs = resourceProfileIdToNumExecutors.values.sum
+    if (totalExecs < 0) {
       throw new IllegalArgumentException(
         "Attempted to request a negative number of executor(s) " +
-          s"$numExecutors from the cluster manager. Please specify a positive number!")
+          s"$totalExecs from the cluster manager. Please specify a positive number!")
+    }
+    val resourceProfileToNumExecutors = resourceProfileIdToNumExecutors.map { case (rpid, num) =>
+      (scheduler.sc.resourceProfileManager.resourceProfileFromId(rpid), num)
     }
-
     val response = synchronized {
-      this.requestedTotalExecutors = numExecutors
-      this.localityAwareTasks = localityAwareTasks
-      this.hostToLocalTaskCount = hostToLocalTaskCount
-
-      numPendingExecutors =
-        math.max(numExecutors - numExistingExecutors + executorsPendingToRemove.size, 0)
-
-      doRequestTotalExecutors(numExecutors)
+      this.requestedTotalExecutorsPerResourceProfile.clear()
+      this.requestedTotalExecutorsPerResourceProfile ++= resourceProfileToNumExecutors
+      this.numLocalityAwareTasksPerResourceProfileId = numLocalityAwareTasksPerResourceProfileId
+      this.rpHostToLocalTaskCount = hostToLocalTaskCount
+      doRequestTotalExecutors(requestedTotalExecutorsPerResourceProfile.toMap)
     }
-
     defaultAskTimeout.awaitResult(response)
   }
 
@@ -638,7 +702,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    *
    * @return a future whose evaluation indicates whether the request is acknowledged.
    */
-  protected def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] =
+  protected def doRequestTotalExecutors(
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Future[Boolean] =
     Future.successful(false)
 
   /**
@@ -679,20 +744,20 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       // take into account executors that are pending to be added or removed.
       val adjustTotalExecutors =
         if (adjustTargetNumExecutors) {
-          requestedTotalExecutors = math.max(requestedTotalExecutors - executorsToKill.size, 0)
-          if (requestedTotalExecutors !=
-              (numExistingExecutors + numPendingExecutors - executorsPendingToRemove.size)) {
-            logDebug(
-              s"""killExecutors($executorIds, $adjustTargetNumExecutors, $countFailures, $force):
-                 |Executor counts do not match:
-                 |requestedTotalExecutors  = $requestedTotalExecutors
-                 |numExistingExecutors     = $numExistingExecutors
-                 |numPendingExecutors      = $numPendingExecutors
-                 |executorsPendingToRemove = ${executorsPendingToRemove.size}""".stripMargin)
+          executorsToKill.foreach { exec =>
+            val rpId = executorDataMap(exec).resourceProfileId
+            val rp = scheduler.sc.resourceProfileManager.resourceProfileFromId(rpId)
+            if (requestedTotalExecutorsPerResourceProfile.isEmpty) {
+              // Assume that we are killing an executor that was started by default and
+              // not through the request api
+              requestedTotalExecutorsPerResourceProfile(rp) = 0
+            } else {
+              val requestedTotalForRp = requestedTotalExecutorsPerResourceProfile(rp)
+              requestedTotalExecutorsPerResourceProfile(rp) = math.max(requestedTotalForRp - 1, 0)
+            }
           }
-          doRequestTotalExecutors(requestedTotalExecutors)
+          doRequestTotalExecutors(requestedTotalExecutorsPerResourceProfile.toMap)
         } else {
-          numPendingExecutors += executorsToKill.size
           Future.successful(true)
         }
 
@@ -758,6 +823,15 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
   protected def currentDelegationTokens: Array[Byte] = delegationTokens.get()
 
+  /**
+   * Checks whether the executor is blacklisted. This is called when the executor tries to
+   * register with the scheduler, and will deny registration if this method returns true.
+   *
+   * This is in addition to the blacklist kept by the task scheduler, so custom implementations
+   * don't need to check there.
+   */
+  protected def isBlacklisted(executorId: String, hostname: String): Boolean = false
+
   // SPARK-27112: We need to ensure that there is ordering of lock acquisition
   // between TaskSchedulerImpl and CoarseGrainedSchedulerBackend objects in order to fix
   // the deadlock issue exposed in SPARK-27112
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
index 17907d88e50c8..062146174f6a8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
@@ -29,6 +29,7 @@ import org.apache.spark.scheduler.ExecutorResourceInfo
  * @param freeCores  The current number of cores available for work on the executor
  * @param totalCores The total number of cores available to the executor
  * @param resourcesInfo The information of the currently available resources on the executor
+ * @param resourceProfileId The id of the ResourceProfile being used by this executor
  */
 private[cluster] class ExecutorData(
     val executorEndpoint: RpcEndpointRef,
@@ -38,5 +39,7 @@ private[cluster] class ExecutorData(
     override val totalCores: Int,
     override val logUrlMap: Map[String, String],
     override val attributes: Map[String, String],
-    override val resourcesInfo: Map[String, ExecutorResourceInfo]
-) extends ExecutorInfo(executorHost, totalCores, logUrlMap, attributes, resourcesInfo)
+    override val resourcesInfo: Map[String, ExecutorResourceInfo],
+    override val resourceProfileId: Int
+) extends ExecutorInfo(executorHost, totalCores, logUrlMap, attributes,
+  resourcesInfo, resourceProfileId)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
index 5a4ad6e00eb43..a97b08941ba78 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
@@ -18,6 +18,7 @@ package org.apache.spark.scheduler.cluster
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 
 /**
  * :: DeveloperApi ::
@@ -25,14 +26,15 @@ import org.apache.spark.resource.ResourceInformation
  */
 @DeveloperApi
 class ExecutorInfo(
-   val executorHost: String,
-   val totalCores: Int,
-   val logUrlMap: Map[String, String],
-   val attributes: Map[String, String],
-   val resourcesInfo: Map[String, ResourceInformation]) {
+    val executorHost: String,
+    val totalCores: Int,
+    val logUrlMap: Map[String, String],
+    val attributes: Map[String, String],
+    val resourcesInfo: Map[String, ResourceInformation],
+    val resourceProfileId: Int) {
 
   def this(executorHost: String, totalCores: Int, logUrlMap: Map[String, String]) = {
-    this(executorHost, totalCores, logUrlMap, Map.empty, Map.empty)
+    this(executorHost, totalCores, logUrlMap, Map.empty, Map.empty, DEFAULT_RESOURCE_PROFILE_ID)
   }
 
   def this(
@@ -40,7 +42,17 @@ class ExecutorInfo(
       totalCores: Int,
       logUrlMap: Map[String, String],
       attributes: Map[String, String]) = {
-    this(executorHost, totalCores, logUrlMap, attributes, Map.empty)
+    this(executorHost, totalCores, logUrlMap, attributes, Map.empty, DEFAULT_RESOURCE_PROFILE_ID)
+  }
+
+  def this(
+      executorHost: String,
+      totalCores: Int,
+      logUrlMap: Map[String, String],
+      attributes: Map[String, String],
+      resourcesInfo: Map[String, ResourceInformation]) = {
+    this(executorHost, totalCores, logUrlMap, attributes, resourcesInfo,
+      DEFAULT_RESOURCE_PROFILE_ID)
   }
 
   def canEqual(other: Any): Boolean = other.isInstanceOf[ExecutorInfo]
@@ -52,12 +64,14 @@ class ExecutorInfo(
         totalCores == that.totalCores &&
         logUrlMap == that.logUrlMap &&
         attributes == that.attributes &&
-        resourcesInfo == that.resourcesInfo
+        resourcesInfo == that.resourcesInfo &&
+        resourceProfileId == that.resourceProfileId
     case _ => false
   }
 
   override def hashCode(): Int = {
-    val state = Seq(executorHost, totalCores, logUrlMap, attributes, resourcesInfo)
+    val state = Seq(executorHost, totalCores, logUrlMap, attributes, resourcesInfo,
+      resourceProfileId)
     state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 2025a7dc24821..42c46464d79e1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -28,7 +28,7 @@ import org.apache.spark.deploy.client.{StandaloneAppClient, StandaloneAppClientL
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
-import org.apache.spark.resource.ResourceUtils
+import org.apache.spark.resource.{ResourceProfile, ResourceUtils}
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler._
 import org.apache.spark.util.Utils
@@ -58,8 +58,9 @@ private[spark] class StandaloneSchedulerBackend(
 
   private val maxCores = conf.get(config.CORES_MAX)
   private val totalExpectedCores = maxCores.getOrElse(0)
+  private val defaultProf = sc.resourceProfileManager.defaultResourceProfile
 
-  override def start() {
+  override def start(): Unit = {
     super.start()
 
     // SPARK-21159. The scheduler backend should only try to connect to the launcher when in client
@@ -129,21 +130,21 @@ private[spark] class StandaloneSchedulerBackend(
     stop(SparkAppHandle.State.FINISHED)
   }
 
-  override def connected(appId: String) {
+  override def connected(appId: String): Unit = {
     logInfo("Connected to Spark cluster with app ID " + appId)
     this.appId = appId
     notifyContext()
     launcherBackend.setAppId(appId)
   }
 
-  override def disconnected() {
+  override def disconnected(): Unit = {
     notifyContext()
     if (!stopping.get) {
       logWarning("Disconnected from Spark cluster! Waiting for reconnection...")
     }
   }
 
-  override def dead(reason: String) {
+  override def dead(reason: String): Unit = {
     notifyContext()
     if (!stopping.get) {
       launcherBackend.setState(SparkAppHandle.State.KILLED)
@@ -158,13 +159,13 @@ private[spark] class StandaloneSchedulerBackend(
   }
 
   override def executorAdded(fullId: String, workerId: String, hostPort: String, cores: Int,
-    memory: Int) {
+    memory: Int): Unit = {
     logInfo("Granted executor ID %s on hostPort %s with %d core(s), %s RAM".format(
       fullId, hostPort, cores, Utils.megabytesToString(memory)))
   }
 
   override def executorRemoved(
-      fullId: String, message: String, exitStatus: Option[Int], workerLost: Boolean) {
+      fullId: String, message: String, exitStatus: Option[Int], workerLost: Boolean): Unit = {
     val reason: ExecutorLossReason = exitStatus match {
       case Some(code) => ExecutorExited(code, exitCausedByApp = true, message)
       case None => SlaveLost(message, workerLost = workerLost)
@@ -173,6 +174,12 @@ private[spark] class StandaloneSchedulerBackend(
     removeExecutor(fullId.split("/")(1), reason)
   }
 
+  override def executorDecommissioned(fullId: String, message: String) {
+    logInfo("Asked to decommission executor")
+    decommissionExecutor(fullId.split("/")(1))
+    logInfo("Executor %s decommissioned: %s".format(fullId, message))
+  }
+
   override def workerRemoved(workerId: String, host: String, message: String): Unit = {
     logInfo("Worker %s removed: %s".format(workerId, message))
     removeWorker(workerId, host, message)
@@ -194,9 +201,13 @@ private[spark] class StandaloneSchedulerBackend(
    *
    * @return whether the request is acknowledged.
    */
-  protected override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = {
+  protected override def doRequestTotalExecutors(
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Future[Boolean] = {
+    // resources profiles not supported
     Option(client) match {
-      case Some(c) => c.requestTotalExecutors(requestedTotal)
+      case Some(c) =>
+        val numExecs = resourceProfileToTotalExecs.getOrElse(defaultProf, 0)
+        c.requestTotalExecutors(numExecs)
       case None =>
         logWarning("Attempted to request executors before driver fully initialized.")
         Future.successful(false)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
index aa901d6568b26..c29546b7577fc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
@@ -26,6 +26,7 @@ import scala.collection.mutable
 import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.resource.ResourceProfile.UNKNOWN_RESOURCE_PROFILE_ID
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.RDDBlockId
 import org.apache.spark.util.Clock
@@ -39,11 +40,12 @@ private[spark] class ExecutorMonitor(
     listenerBus: LiveListenerBus,
     clock: Clock) extends SparkListener with CleanerListener with Logging {
 
-  private val idleTimeoutMs = TimeUnit.SECONDS.toMillis(
+  private val idleTimeoutNs = TimeUnit.SECONDS.toNanos(
     conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT))
-  private val storageTimeoutMs = TimeUnit.SECONDS.toMillis(
+  private val storageTimeoutNs = TimeUnit.SECONDS.toNanos(
     conf.get(DYN_ALLOCATION_CACHED_EXECUTOR_IDLE_TIMEOUT))
-  private val shuffleTimeoutMs = conf.get(DYN_ALLOCATION_SHUFFLE_TIMEOUT)
+  private val shuffleTimeoutNs = TimeUnit.MILLISECONDS.toNanos(
+    conf.get(DYN_ALLOCATION_SHUFFLE_TIMEOUT))
 
   private val fetchFromShuffleSvcEnabled = conf.get(SHUFFLE_SERVICE_ENABLED) &&
     conf.get(SHUFFLE_SERVICE_FETCH_RDD_ENABLED)
@@ -51,6 +53,7 @@ private[spark] class ExecutorMonitor(
     conf.get(DYN_ALLOCATION_SHUFFLE_TRACKING)
 
   private val executors = new ConcurrentHashMap[String, Tracker]()
+  private val execResourceProfileCount = new ConcurrentHashMap[Int, Int]()
 
   // The following fields are an optimization to avoid having to scan all executors on every EAM
   // schedule interval to find out which ones are timed out. They keep track of when the next
@@ -67,7 +70,7 @@ private[spark] class ExecutorMonitor(
   // this listener. There are safeguards in other parts of the code that would prevent that executor
   // from being removed.
   private val nextTimeout = new AtomicLong(Long.MaxValue)
-  private var timedOutExecs = Seq.empty[String]
+  private var timedOutExecs = Seq.empty[(String, Int)]
 
   // Active job tracking.
   //
@@ -91,16 +94,17 @@ private[spark] class ExecutorMonitor(
 
   def reset(): Unit = {
     executors.clear()
+    execResourceProfileCount.clear()
     nextTimeout.set(Long.MaxValue)
     timedOutExecs = Nil
   }
 
   /**
-   * Returns the list of executors that are currently considered to be timed out.
-   * Should only be called from the EAM thread.
+   * Returns the list of executors and their ResourceProfile id that are currently considered to
+   * be timed out. Should only be called from the EAM thread.
    */
-  def timedOutExecutors(): Seq[String] = {
-    val now = clock.getTimeMillis()
+  def timedOutExecutors(): Seq[(String, Int)] = {
+    val now = clock.nanoTime()
     if (now >= nextTimeout.get()) {
       // Temporarily set the next timeout at Long.MaxValue. This ensures that after
       // scanning all executors below, we know when the next timeout for non-timed out
@@ -122,7 +126,7 @@ private[spark] class ExecutorMonitor(
             true
           }
         }
-        .keys
+        .map { case (name, exec) => (name, exec.resourceProfileId)}
         .toSeq
       updateNextTimeout(newNextTimeout)
     }
@@ -147,8 +151,26 @@ private[spark] class ExecutorMonitor(
 
   def executorCount: Int = executors.size()
 
+  def executorCountWithResourceProfile(id: Int): Int = {
+    execResourceProfileCount.getOrDefault(id, 0)
+  }
+
+  // for testing
+  def getResourceProfileId(executorId: String): Int = {
+    val execTrackingInfo = executors.get(executorId)
+    if (execTrackingInfo != null) {
+      execTrackingInfo.resourceProfileId
+    } else {
+      UNKNOWN_RESOURCE_PROFILE_ID
+    }
+  }
+
   def pendingRemovalCount: Int = executors.asScala.count { case (_, exec) => exec.pendingRemoval }
 
+  def pendingRemovalCountPerResourceProfileId(id: Int): Int = {
+    executors.asScala.filter { case (k, v) => v.resourceProfileId == id && v.pendingRemoval }.size
+  }
+
   override def onJobStart(event: SparkListenerJobStart): Unit = {
     if (!shuffleTrackingEnabled) {
       return
@@ -260,7 +282,7 @@ private[spark] class ExecutorMonitor(
     val executorId = event.taskInfo.executorId
     // Guard against a late arriving task start event (SPARK-26927).
     if (client.isExecutorActive(executorId)) {
-      val exec = ensureExecutorIsTracked(executorId)
+      val exec = ensureExecutorIsTracked(executorId, UNKNOWN_RESOURCE_PROFILE_ID)
       exec.updateRunningTasks(1)
     }
   }
@@ -289,15 +311,21 @@ private[spark] class ExecutorMonitor(
   }
 
   override def onExecutorAdded(event: SparkListenerExecutorAdded): Unit = {
-    val exec = ensureExecutorIsTracked(event.executorId)
+    val exec = ensureExecutorIsTracked(event.executorId, event.executorInfo.resourceProfileId)
     exec.updateRunningTasks(0)
     logInfo(s"New executor ${event.executorId} has registered (new total is ${executors.size()})")
   }
 
+  private def decrementExecResourceProfileCount(rpId: Int): Unit = {
+    val count = execResourceProfileCount.getOrDefault(rpId, 0)
+    execResourceProfileCount.replace(rpId, count, count - 1)
+    execResourceProfileCount.remove(rpId, 0)
+  }
+
   override def onExecutorRemoved(event: SparkListenerExecutorRemoved): Unit = {
     val removed = executors.remove(event.executorId)
     if (removed != null) {
-      logInfo(s"Executor ${event.executorId} removed (new total is ${executors.size()})")
+      decrementExecResourceProfileCount(removed.resourceProfileId)
       if (!removed.pendingRemoval) {
         nextTimeout.set(Long.MinValue)
       }
@@ -308,8 +336,8 @@ private[spark] class ExecutorMonitor(
     if (!event.blockUpdatedInfo.blockId.isInstanceOf[RDDBlockId]) {
       return
     }
-
-    val exec = ensureExecutorIsTracked(event.blockUpdatedInfo.blockManagerId.executorId)
+    val exec = ensureExecutorIsTracked(event.blockUpdatedInfo.blockManagerId.executorId,
+      UNKNOWN_RESOURCE_PROFILE_ID)
     val storageLevel = event.blockUpdatedInfo.storageLevel
     val blockId = event.blockUpdatedInfo.blockId.asInstanceOf[RDDBlockId]
 
@@ -391,8 +419,26 @@ private[spark] class ExecutorMonitor(
    * which the `SparkListenerTaskStart` event is posted before the `SparkListenerBlockManagerAdded`
    * event, which is possible because these events are posted in different threads. (see SPARK-4951)
    */
-  private def ensureExecutorIsTracked(id: String): Tracker = {
-    executors.computeIfAbsent(id, _ => new Tracker())
+  private def ensureExecutorIsTracked(id: String, resourceProfileId: Int): Tracker = {
+    val numExecsWithRpId = execResourceProfileCount.computeIfAbsent(resourceProfileId, _ => 0)
+    val execTracker = executors.computeIfAbsent(id, _ => {
+        val newcount = numExecsWithRpId + 1
+        execResourceProfileCount.put(resourceProfileId, newcount)
+        logDebug(s"Executor added with ResourceProfile id: $resourceProfileId " +
+          s"count is now $newcount")
+        new Tracker(resourceProfileId)
+      })
+    // if we had added executor before without knowing the resource profile id, fix it up
+    if (execTracker.resourceProfileId == UNKNOWN_RESOURCE_PROFILE_ID &&
+        resourceProfileId != UNKNOWN_RESOURCE_PROFILE_ID) {
+      logDebug(s"Executor: $id, resource profile id was unknown, setting " +
+        s"it to $resourceProfileId")
+      execTracker.resourceProfileId = resourceProfileId
+      // fix up the counts for each resource profile id
+      execResourceProfileCount.put(resourceProfileId, numExecsWithRpId + 1)
+      decrementExecResourceProfileCount(UNKNOWN_RESOURCE_PROFILE_ID)
+    }
+    execTracker
   }
 
   private def updateNextTimeout(newValue: Long): Unit = {
@@ -412,7 +458,7 @@ private[spark] class ExecutorMonitor(
     }
   }
 
-  private class Tracker {
+  private class Tracker(var resourceProfileId: Int) {
     @volatile var timeoutAt: Long = Long.MaxValue
 
     // Tracks whether this executor is thought to be timed out. It's used to detect when the list
@@ -437,7 +483,7 @@ private[spark] class ExecutorMonitor(
 
     def updateRunningTasks(delta: Int): Unit = {
       runningTasks = math.max(0, runningTasks + delta)
-      idleStart = if (runningTasks == 0) clock.getTimeMillis() else -1L
+      idleStart = if (runningTasks == 0) clock.nanoTime() else -1L
       updateTimeout()
     }
 
@@ -445,15 +491,15 @@ private[spark] class ExecutorMonitor(
       val oldDeadline = timeoutAt
       val newDeadline = if (idleStart >= 0) {
         val timeout = if (cachedBlocks.nonEmpty || (shuffleIds != null && shuffleIds.nonEmpty)) {
-          val _cacheTimeout = if (cachedBlocks.nonEmpty) storageTimeoutMs else Long.MaxValue
+          val _cacheTimeout = if (cachedBlocks.nonEmpty) storageTimeoutNs else Long.MaxValue
           val _shuffleTimeout = if (shuffleIds != null && shuffleIds.nonEmpty) {
-            shuffleTimeoutMs
+            shuffleTimeoutNs
           } else {
             Long.MaxValue
           }
           math.min(_cacheTimeout, _shuffleTimeout)
         } else {
-          idleTimeoutMs
+          idleTimeoutNs
         }
         val deadline = idleStart + timeout
         if (deadline >= 0) deadline else Long.MaxValue
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
index cbcc5310a59f0..42a5afe0b3f9d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
@@ -26,9 +26,11 @@ import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.{Executor, ExecutorBackend}
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
+import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
+import org.apache.spark.util.Utils
 
 private case class ReviveOffers()
 
@@ -54,10 +56,12 @@ private[spark] class LocalEndpoint(
   private var freeCores = totalCores
 
   val localExecutorId = SparkContext.DRIVER_IDENTIFIER
-  val localExecutorHostname = "localhost"
+  val localExecutorHostname = Utils.localCanonicalHostName()
 
+  // local mode doesn't support extra resources like GPUs right now
   private val executor = new Executor(
-    localExecutorId, localExecutorHostname, SparkEnv.get, userClassPath, isLocal = true)
+    localExecutorId, localExecutorHostname, SparkEnv.get, userClassPath, isLocal = true,
+    resources = Map.empty[String, ResourceInformation])
 
   override def receive: PartialFunction[Any, Unit] = {
     case ReviveOffers =>
@@ -80,7 +84,7 @@ private[spark] class LocalEndpoint(
       context.reply(true)
   }
 
-  def reviveOffers() {
+  def reviveOffers(): Unit = {
     // local mode doesn't support extra resources like GPUs right now
     val offers = IndexedSeq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores,
       Some(rpcEnv.address.hostPort)))
@@ -123,7 +127,7 @@ private[spark] class LocalSchedulerBackend(
 
   launcherBackend.connect()
 
-  override def start() {
+  override def start(): Unit = {
     val rpcEnv = SparkEnv.get.rpcEnv
     val executorEndpoint = new LocalEndpoint(rpcEnv, userClassPath, scheduler, this, totalCores)
     localEndpoint = rpcEnv.setupEndpoint("LocalSchedulerBackendEndpoint", executorEndpoint)
@@ -136,11 +140,11 @@ private[spark] class LocalSchedulerBackend(
     launcherBackend.setState(SparkAppHandle.State.RUNNING)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     stop(SparkAppHandle.State.FINISHED)
   }
 
-  override def reviveOffers() {
+  override def reviveOffers(): Unit = {
     localEndpoint.send(ReviveOffers)
   }
 
@@ -148,11 +152,11 @@ private[spark] class LocalSchedulerBackend(
     scheduler.conf.getInt("spark.default.parallelism", totalCores)
 
   override def killTask(
-      taskId: Long, executorId: String, interruptThread: Boolean, reason: String) {
+      taskId: Long, executorId: String, interruptThread: Boolean, reason: String): Unit = {
     localEndpoint.send(KillTask(taskId, interruptThread, reason))
   }
 
-  override def statusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer) {
+  override def statusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer): Unit = {
     localEndpoint.send(StatusUpdate(taskId, state, serializedData))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index 70564eeefda88..077b035f3d079 100644
--- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -54,8 +54,8 @@ private[spark] class JavaSerializationStream(
     this
   }
 
-  def flush() { objOut.flush() }
-  def close() { objOut.close() }
+  def flush(): Unit = { objOut.flush() }
+  def close(): Unit = { objOut.close() }
 }
 
 private[spark] class JavaDeserializationStream(in: InputStream, loader: ClassLoader)
@@ -74,7 +74,7 @@ private[spark] class JavaDeserializationStream(in: InputStream, loader: ClassLoa
   }
 
   def readObject[T: ClassTag](): T = objIn.readObject().asInstanceOf[T]
-  def close() { objIn.close() }
+  def close(): Unit = { objIn.close() }
 }
 
 private object JavaDeserializationStream {
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 20774c8d999c1..cdaab599e2a0b 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -40,6 +40,7 @@ import org.apache.spark._
 import org.apache.spark.api.python.PythonBroadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Kryo._
+import org.apache.spark.internal.io.FileCommitProtocol._
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus}
 import org.apache.spark.storage._
@@ -259,14 +260,14 @@ class KryoSerializationStream(
     this
   }
 
-  override def flush() {
+  override def flush(): Unit = {
     if (output == null) {
       throw new IOException("Stream is closed")
     }
     output.flush()
   }
 
-  override def close() {
+  override def close(): Unit = {
     if (output != null) {
       try {
         output.close()
@@ -301,7 +302,7 @@ class KryoDeserializationStream(
     }
   }
 
-  override def close() {
+  override def close(): Unit = {
     if (input != null) {
       try {
         // Kryo's Input automatically closes the input stream it is using.
@@ -469,7 +470,8 @@ private[serializer] object KryoSerializer {
     classOf[Array[String]],
     classOf[Array[Array[String]]],
     classOf[BoundedPriorityQueue[_]],
-    classOf[SparkConf]
+    classOf[SparkConf],
+    classOf[TaskCommitMessage]
   )
 
   private val toRegisterSerializer = Map[Class[_], KryoClassSerializer[_]](
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
index 5e7a98c8aa89c..75dc3982ab872 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
@@ -303,7 +303,7 @@ private[spark] object SerializationDebugger extends Logging {
 
   /** An output stream that emulates /dev/null */
   private class NullOutputStream extends OutputStream {
-    override def write(b: Int) { }
+    override def write(b: Int): Unit = { }
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index cb8b1cc077637..0c53a84af6e2f 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -173,7 +173,7 @@ abstract class DeserializationStream extends Closeable {
       }
     }
 
-    override protected def close() {
+    override protected def close(): Unit = {
       DeserializationStream.this.close()
     }
   }
@@ -193,7 +193,7 @@ abstract class DeserializationStream extends Closeable {
       }
     }
 
-    override protected def close() {
+    override protected def close(): Unit = {
       DeserializationStream.this.close()
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index 3e3c387911d36..623db9d00ab53 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -114,6 +114,7 @@ private[spark] class SerializerManager(
       case _: RDDBlockId => compressRdds
       case _: TempLocalBlockId => compressShuffleSpill
       case _: TempShuffleBlockId => compressShuffle
+      case _: ShuffleBlockBatchId => compressShuffle
       case _ => false
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala b/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala
index 04e4cf88d7063..6fe183c078089 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala
@@ -24,6 +24,5 @@ import org.apache.spark.ShuffleDependency
  */
 private[spark] class BaseShuffleHandle[K, V, C](
     shuffleId: Int,
-    val numMaps: Int,
     val dependency: ShuffleDependency[K, V, C])
   extends ShuffleHandle(shuffleId)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 4329824b1b627..bc2a0fbc36d5b 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -19,35 +19,58 @@ package org.apache.spark.shuffle
 
 import org.apache.spark._
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.io.CompressionCodec
 import org.apache.spark.serializer.SerializerManager
-import org.apache.spark.storage.{BlockManager, ShuffleBlockFetcherIterator}
+import org.apache.spark.storage.{BlockId, BlockManager, BlockManagerId, ShuffleBlockFetcherIterator}
 import org.apache.spark.util.CompletionIterator
 import org.apache.spark.util.collection.ExternalSorter
 
 /**
- * Fetches and reads the partitions in range [startPartition, endPartition) from a shuffle by
- * requesting them from other nodes' block stores.
+ * Fetches and reads the blocks from a shuffle by requesting them from other nodes' block stores.
  */
 private[spark] class BlockStoreShuffleReader[K, C](
     handle: BaseShuffleHandle[K, _, C],
-    startPartition: Int,
-    endPartition: Int,
+    blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])],
     context: TaskContext,
     readMetrics: ShuffleReadMetricsReporter,
     serializerManager: SerializerManager = SparkEnv.get.serializerManager,
     blockManager: BlockManager = SparkEnv.get.blockManager,
-    mapOutputTracker: MapOutputTracker = SparkEnv.get.mapOutputTracker)
+    mapOutputTracker: MapOutputTracker = SparkEnv.get.mapOutputTracker,
+    shouldBatchFetch: Boolean = false)
   extends ShuffleReader[K, C] with Logging {
 
   private val dep = handle.dependency
 
+  private def fetchContinuousBlocksInBatch: Boolean = {
+    val conf = SparkEnv.get.conf
+    val serializerRelocatable = dep.serializer.supportsRelocationOfSerializedObjects
+    val compressed = conf.get(config.SHUFFLE_COMPRESS)
+    val codecConcatenation = if (compressed) {
+      CompressionCodec.supportsConcatenationOfSerializedStreams(CompressionCodec.createCodec(conf))
+    } else {
+      true
+    }
+    val useOldFetchProtocol = conf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)
+
+    val doBatchFetch = shouldBatchFetch && serializerRelocatable &&
+      (!compressed || codecConcatenation) && !useOldFetchProtocol
+    if (shouldBatchFetch && !doBatchFetch) {
+      logDebug("The feature tag of continuous shuffle block fetching is set to true, but " +
+        "we can not enable the feature because other conditions are not satisfied. " +
+        s"Shuffle compress: $compressed, serializer relocatable: $serializerRelocatable, " +
+        s"codec concatenation: $codecConcatenation, use old shuffle fetch protocol: " +
+        s"$useOldFetchProtocol.")
+    }
+    doBatchFetch
+  }
+
   /** Read the combined key-values for this reduce task */
   override def read(): Iterator[Product2[K, C]] = {
     val wrappedStreams = new ShuffleBlockFetcherIterator(
       context,
       blockManager.blockStoreClient,
       blockManager,
-      mapOutputTracker.getMapSizesByExecutorId(handle.shuffleId, startPartition, endPartition),
+      blocksByAddress,
       serializerManager.wrapStream,
       // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility
       SparkEnv.get.conf.get(config.REDUCER_MAX_SIZE_IN_FLIGHT) * 1024 * 1024,
@@ -56,7 +79,8 @@ private[spark] class BlockStoreShuffleReader[K, C](
       SparkEnv.get.conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM),
       SparkEnv.get.conf.get(config.SHUFFLE_DETECT_CORRUPT),
       SparkEnv.get.conf.get(config.SHUFFLE_DETECT_CORRUPT_MEMORY),
-      readMetrics).toCompletionIterator
+      readMetrics,
+      fetchContinuousBlocksInBatch).toCompletionIterator
 
     val serializerInstance = dep.serializer.newInstance()
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
index 265a8acfa8d61..6509a04dc4893 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
@@ -35,7 +35,8 @@ import org.apache.spark.util.Utils
 private[spark] class FetchFailedException(
     bmAddress: BlockManagerId,
     shuffleId: Int,
-    mapId: Int,
+    mapId: Long,
+    mapIndex: Int,
     reduceId: Int,
     message: String,
     cause: Throwable = null)
@@ -44,10 +45,11 @@ private[spark] class FetchFailedException(
   def this(
       bmAddress: BlockManagerId,
       shuffleId: Int,
-      mapId: Int,
+      mapTaskId: Long,
+      mapIndex: Int,
       reduceId: Int,
       cause: Throwable) {
-    this(bmAddress, shuffleId, mapId, reduceId, cause.getMessage, cause)
+    this(bmAddress, shuffleId, mapTaskId, mapIndex, reduceId, cause.getMessage, cause)
   }
 
   // SPARK-19276. We set the fetch failure in the task context, so that even if there is user-code
@@ -56,8 +58,8 @@ private[spark] class FetchFailedException(
   // because the TaskContext is not defined in some test cases.
   Option(TaskContext.get()).map(_.setFetchFailed(this))
 
-  def toTaskFailedReason: TaskFailedReason = FetchFailed(bmAddress, shuffleId, mapId, reduceId,
-    Utils.exceptionString(this))
+  def toTaskFailedReason: TaskFailedReason = FetchFailed(
+    bmAddress, shuffleId, mapId, mapIndex, reduceId, Utils.exceptionString(this))
 }
 
 /**
@@ -67,4 +69,4 @@ private[spark] class MetadataFetchFailedException(
     shuffleId: Int,
     reduceId: Int,
     message: String)
-  extends FetchFailedException(null, shuffleId, -1, reduceId, message)
+  extends FetchFailedException(null, shuffleId, -1L, -1, reduceId, message)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index d3f1c7ec1bbee..af2c82e771970 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -26,6 +26,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.io.NioBufferedFileInputStream
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.network.shuffle.ExecutorDiskUtils
 import org.apache.spark.shuffle.IndexShuffleBlockResolver.NOOP_REDUCE_ID
 import org.apache.spark.storage._
 import org.apache.spark.util.Utils
@@ -51,18 +52,42 @@ private[spark] class IndexShuffleBlockResolver(
 
   private val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle")
 
-  def getDataFile(shuffleId: Int, mapId: Int): File = {
-    blockManager.diskBlockManager.getFile(ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID))
+
+  def getDataFile(shuffleId: Int, mapId: Long): File = getDataFile(shuffleId, mapId, None)
+
+  /**
+   * Get the shuffle data file.
+   *
+   * When the dirs parameter is None then use the disk manager's local directories. Otherwise,
+   * read from the specified directories.
+   */
+   def getDataFile(shuffleId: Int, mapId: Long, dirs: Option[Array[String]]): File = {
+    val blockId = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID)
+    dirs
+      .map(ExecutorDiskUtils.getFile(_, blockManager.subDirsPerLocalDir, blockId.name))
+      .getOrElse(blockManager.diskBlockManager.getFile(blockId))
   }
 
-  private def getIndexFile(shuffleId: Int, mapId: Int): File = {
-    blockManager.diskBlockManager.getFile(ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID))
+  /**
+   * Get the shuffle index file.
+   *
+   * When the dirs parameter is None then use the disk manager's local directories. Otherwise,
+   * read from the specified directories.
+   */
+  private def getIndexFile(
+      shuffleId: Int,
+      mapId: Long,
+      dirs: Option[Array[String]] = None): File = {
+    val blockId = ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID)
+    dirs
+      .map(ExecutorDiskUtils.getFile(_, blockManager.subDirsPerLocalDir, blockId.name))
+      .getOrElse(blockManager.diskBlockManager.getFile(blockId))
   }
 
   /**
    * Remove data file and index file that contain the output data from one map.
    */
-  def removeDataByMap(shuffleId: Int, mapId: Int): Unit = {
+  def removeDataByMap(shuffleId: Int, mapId: Long): Unit = {
     var file = getDataFile(shuffleId, mapId)
     if (file.exists()) {
       if (!file.delete()) {
@@ -135,7 +160,7 @@ private[spark] class IndexShuffleBlockResolver(
    */
   def writeIndexFileAndCommit(
       shuffleId: Int,
-      mapId: Int,
+      mapId: Long,
       lengths: Array[Long],
       dataTmp: File): Unit = {
     val indexFile = getIndexFile(shuffleId, mapId)
@@ -190,10 +215,20 @@ private[spark] class IndexShuffleBlockResolver(
     }
   }
 
-  override def getBlockData(blockId: ShuffleBlockId): ManagedBuffer = {
+  override def getBlockData(
+      blockId: BlockId,
+      dirs: Option[Array[String]]): ManagedBuffer = {
+    val (shuffleId, mapId, startReduceId, endReduceId) = blockId match {
+      case id: ShuffleBlockId =>
+        (id.shuffleId, id.mapId, id.reduceId, id.reduceId + 1)
+      case batchId: ShuffleBlockBatchId =>
+        (batchId.shuffleId, batchId.mapId, batchId.startReduceId, batchId.endReduceId)
+      case _ =>
+        throw new IllegalArgumentException("unexpected shuffle block id format: " + blockId)
+    }
     // The block is actually going to be a range of a single map output file for this map, so
     // find out the consolidated file, then the offset within that from our index
-    val indexFile = getIndexFile(blockId.shuffleId, blockId.mapId)
+    val indexFile = getIndexFile(shuffleId, mapId, dirs)
 
     // SPARK-22982: if this FileInputStream's position is seeked forward by another piece of code
     // which is incorrectly using our file descriptor then this code will fetch the wrong offsets
@@ -202,22 +237,23 @@ private[spark] class IndexShuffleBlockResolver(
     // class of issue from re-occurring in the future which is why they are left here even though
     // SPARK-22982 is fixed.
     val channel = Files.newByteChannel(indexFile.toPath)
-    channel.position(blockId.reduceId * 8L)
+    channel.position(startReduceId * 8L)
     val in = new DataInputStream(Channels.newInputStream(channel))
     try {
-      val offset = in.readLong()
-      val nextOffset = in.readLong()
+      val startOffset = in.readLong()
+      channel.position(endReduceId * 8L)
+      val endOffset = in.readLong()
       val actualPosition = channel.position()
-      val expectedPosition = blockId.reduceId * 8L + 16
+      val expectedPosition = endReduceId * 8L + 8
       if (actualPosition != expectedPosition) {
         throw new Exception(s"SPARK-22982: Incorrect channel position after index file reads: " +
           s"expected $expectedPosition but actual position was $actualPosition.")
       }
       new FileSegmentManagedBuffer(
         transportConf,
-        getDataFile(blockId.shuffleId, blockId.mapId),
-        offset,
-        nextOffset - offset)
+        getDataFile(shuffleId, mapId, dirs),
+        startOffset,
+        endOffset - startOffset)
     } finally {
       in.close()
     }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockResolver.scala
index d1ecbc1bf0178..5485cf955f11a 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockResolver.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.shuffle
 
 import org.apache.spark.network.buffer.ManagedBuffer
-import org.apache.spark.storage.ShuffleBlockId
+import org.apache.spark.storage.BlockId
 
 private[spark]
 /**
@@ -31,10 +31,14 @@ trait ShuffleBlockResolver {
   type ShuffleId = Int
 
   /**
-   * Retrieve the data for the specified block. If the data for that block is not available,
-   * throws an unspecified exception.
+   * Retrieve the data for the specified block.
+   *
+   * When the dirs parameter is None then use the disk manager's local directories. Otherwise,
+   * read from the specified directories.
+   *
+   * If the data for that block is not available, throws an unspecified exception.
    */
-  def getBlockData(blockId: ShuffleBlockId): ManagedBuffer
+  def getBlockData(blockId: BlockId, dirs: Option[Array[String]] = None): ManagedBuffer
 
   def stop(): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleDataIOUtils.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleDataIOUtils.scala
new file mode 100644
index 0000000000000..e9507a7584ba3
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleDataIOUtils.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.SHUFFLE_IO_PLUGIN_CLASS
+import org.apache.spark.shuffle.api.ShuffleDataIO
+import org.apache.spark.util.Utils
+
+private[spark] object ShuffleDataIOUtils {
+
+  /**
+   * The prefix of spark config keys that are passed from the driver to the executor.
+   */
+  val SHUFFLE_SPARK_CONF_PREFIX = "spark.shuffle.plugin.__config__."
+
+  def loadShuffleDataIO(conf: SparkConf): ShuffleDataIO = {
+    val configuredPluginClass = conf.get(SHUFFLE_IO_PLUGIN_CLASS)
+    val maybeIO = Utils.loadExtensions(
+      classOf[ShuffleDataIO], Seq(configuredPluginClass), conf)
+    require(maybeIO.nonEmpty, s"A valid shuffle plugin must be specified by config " +
+      s"${SHUFFLE_IO_PLUGIN_CLASS.key}, but $configuredPluginClass resulted in zero valid " +
+      s"plugins.")
+    maybeIO.head
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
index 18a743fbfa6fc..057b0d6e0b0a7 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
@@ -34,13 +34,12 @@ private[spark] trait ShuffleManager {
    */
   def registerShuffle[K, V, C](
       shuffleId: Int,
-      numMaps: Int,
       dependency: ShuffleDependency[K, V, C]): ShuffleHandle
 
   /** Get a writer for a given partition. Called on executors by map tasks. */
   def getWriter[K, V](
       handle: ShuffleHandle,
-      mapId: Int,
+      mapId: Long,
       context: TaskContext,
       metrics: ShuffleWriteMetricsReporter): ShuffleWriter[K, V]
 
@@ -55,6 +54,20 @@ private[spark] trait ShuffleManager {
       context: TaskContext,
       metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C]
 
+  /**
+   * Get a reader for a range of reduce partitions (startPartition to endPartition-1, inclusive) to
+   * read from map output (startMapIndex to endMapIndex - 1, inclusive).
+   * Called on executors by reduce tasks.
+   */
+  def getReaderForRange[K, C](
+      handle: ShuffleHandle,
+      startMapIndex: Int,
+      endMapIndex: Int,
+      startPartition: Int,
+      endPartition: Int,
+      context: TaskContext,
+      metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C]
+
   /**
    * Remove a shuffle's metadata from the ShuffleManager.
    * @return true if the metadata removed successfully, otherwise false.
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShufflePartitionPairsWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/ShufflePartitionPairsWriter.scala
index a988c5e126a76..e0affb858c359 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShufflePartitionPairsWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShufflePartitionPairsWriter.scala
@@ -21,7 +21,7 @@ import java.io.{Closeable, IOException, OutputStream}
 
 import org.apache.spark.serializer.{SerializationStream, SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.api.ShufflePartitionWriter
-import org.apache.spark.storage.BlockId
+import org.apache.spark.storage.{BlockId, TimeTrackingOutputStream}
 import org.apache.spark.util.Utils
 import org.apache.spark.util.collection.PairsWriter
 
@@ -39,6 +39,7 @@ private[spark] class ShufflePartitionPairsWriter(
 
   private var isClosed = false
   private var partitionStream: OutputStream = _
+  private var timeTrackingStream: OutputStream = _
   private var wrappedStream: OutputStream = _
   private var objOut: SerializationStream = _
   private var numRecordsWritten = 0
@@ -59,7 +60,8 @@ private[spark] class ShufflePartitionPairsWriter(
   private def open(): Unit = {
     try {
       partitionStream = partitionWriter.openStream
-      wrappedStream = serializerManager.wrapStream(blockId, partitionStream)
+      timeTrackingStream = new TimeTrackingOutputStream(writeMetrics, partitionStream)
+      wrappedStream = serializerManager.wrapStream(blockId, timeTrackingStream)
       objOut = serializerInstance.serializeStream(wrappedStream)
     } catch {
       case e: Exception =>
@@ -78,6 +80,7 @@ private[spark] class ShufflePartitionPairsWriter(
           // Setting these to null will prevent the underlying streams from being closed twice
           // just in case any stream's close() implementation is not idempotent.
           wrappedStream = null
+          timeTrackingStream = null
           partitionStream = null
         } {
           // Normally closing objOut would close the inner streams as well, but just in case there
@@ -86,9 +89,15 @@ private[spark] class ShufflePartitionPairsWriter(
             wrappedStream = closeIfNonNull(wrappedStream)
             // Same as above - if wrappedStream closes then assume it closes underlying
             // partitionStream and don't close again in the finally
+            timeTrackingStream = null
             partitionStream = null
           } {
-            partitionStream = closeIfNonNull(partitionStream)
+            Utils.tryWithSafeFinally {
+              timeTrackingStream = closeIfNonNull(timeTrackingStream)
+              partitionStream = null
+            } {
+              partitionStream = closeIfNonNull(partitionStream)
+            }
           }
         }
         updateBytesWritten()
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
index 5b0c7e9f2b0b4..1429144c6f6e2 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
@@ -44,7 +44,7 @@ private[spark] class ShuffleWriteProcessor extends Serializable with Logging {
   def write(
       rdd: RDD[_],
       dep: ShuffleDependency[_, _, _],
-      partitionId: Int,
+      mapId: Long,
       context: TaskContext,
       partition: Partition): MapStatus = {
     var writer: ShuffleWriter[Any, Any] = null
@@ -52,7 +52,7 @@ private[spark] class ShuffleWriteProcessor extends Serializable with Logging {
       val manager = SparkEnv.get.shuffleManager
       writer = manager.getWriter[Any, Any](
         dep.shuffleHandle,
-        partitionId,
+        mapId,
         context,
         createMetricsReporter(context))
       writer.write(
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 2a99c93b32af4..aefcb59b8bb87 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -19,11 +19,14 @@ package org.apache.spark.shuffle.sort
 
 import java.util.concurrent.ConcurrentHashMap
 
+import scala.collection.JavaConverters._
+
 import org.apache.spark._
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.shuffle._
 import org.apache.spark.shuffle.api.{ShuffleDataIO, ShuffleExecutorComponents}
 import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.OpenHashSet
 
 /**
  * In sort-based shuffle, incoming records are sorted according to their target partition ids, then
@@ -79,9 +82,9 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
   }
 
   /**
-   * A mapping from shuffle ids to the number of mappers producing output for those shuffles.
+   * A mapping from shuffle ids to the task ids of mappers producing output for those shuffles.
    */
-  private[this] val numMapsForShuffle = new ConcurrentHashMap[Int, Int]()
+  private[this] val taskIdMapsForShuffle = new ConcurrentHashMap[Int, OpenHashSet[Long]]()
 
   private lazy val shuffleExecutorComponents = loadShuffleExecutorComponents(conf)
 
@@ -92,7 +95,6 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
    */
   override def registerShuffle[K, V, C](
       shuffleId: Int,
-      numMaps: Int,
       dependency: ShuffleDependency[K, V, C]): ShuffleHandle = {
     if (SortShuffleWriter.shouldBypassMergeSort(conf, dependency)) {
       // If there are fewer than spark.shuffle.sort.bypassMergeThreshold partitions and we don't
@@ -101,14 +103,14 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
       // together the spilled files, which would happen with the normal code path. The downside is
       // having multiple files open at a time and thus more memory allocated to buffers.
       new BypassMergeSortShuffleHandle[K, V](
-        shuffleId, numMaps, dependency.asInstanceOf[ShuffleDependency[K, V, V]])
+        shuffleId, dependency.asInstanceOf[ShuffleDependency[K, V, V]])
     } else if (SortShuffleManager.canUseSerializedShuffle(dependency)) {
       // Otherwise, try to buffer map outputs in a serialized form, since this is more efficient:
       new SerializedShuffleHandle[K, V](
-        shuffleId, numMaps, dependency.asInstanceOf[ShuffleDependency[K, V, V]])
+        shuffleId, dependency.asInstanceOf[ShuffleDependency[K, V, V]])
     } else {
       // Otherwise, buffer map outputs in a deserialized form:
-      new BaseShuffleHandle(shuffleId, numMaps, dependency)
+      new BaseShuffleHandle(shuffleId, dependency)
     }
   }
 
@@ -122,37 +124,54 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
       endPartition: Int,
       context: TaskContext,
       metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = {
+    val blocksByAddress = SparkEnv.get.mapOutputTracker.getMapSizesByExecutorId(
+      handle.shuffleId, startPartition, endPartition)
+    new BlockStoreShuffleReader(
+      handle.asInstanceOf[BaseShuffleHandle[K, _, C]], blocksByAddress, context, metrics,
+      shouldBatchFetch = canUseBatchFetch(startPartition, endPartition, context))
+  }
+
+  override def getReaderForRange[K, C](
+      handle: ShuffleHandle,
+      startMapIndex: Int,
+      endMapIndex: Int,
+      startPartition: Int,
+      endPartition: Int,
+      context: TaskContext,
+      metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = {
+    val blocksByAddress = SparkEnv.get.mapOutputTracker.getMapSizesByRange(
+      handle.shuffleId, startMapIndex, endMapIndex, startPartition, endPartition)
     new BlockStoreShuffleReader(
-      handle.asInstanceOf[BaseShuffleHandle[K, _, C]],
-      startPartition, endPartition, context, metrics)
+      handle.asInstanceOf[BaseShuffleHandle[K, _, C]], blocksByAddress, context, metrics,
+      shouldBatchFetch = canUseBatchFetch(startPartition, endPartition, context))
   }
 
   /** Get a writer for a given partition. Called on executors by map tasks. */
   override def getWriter[K, V](
       handle: ShuffleHandle,
-      mapId: Int,
+      mapId: Long,
       context: TaskContext,
       metrics: ShuffleWriteMetricsReporter): ShuffleWriter[K, V] = {
-    numMapsForShuffle.putIfAbsent(
-      handle.shuffleId, handle.asInstanceOf[BaseShuffleHandle[_, _, _]].numMaps)
+    val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+      handle.shuffleId, _ => new OpenHashSet[Long](16))
+    mapTaskIds.synchronized { mapTaskIds.add(context.taskAttemptId()) }
     val env = SparkEnv.get
     handle match {
       case unsafeShuffleHandle: SerializedShuffleHandle[K @unchecked, V @unchecked] =>
         new UnsafeShuffleWriter(
           env.blockManager,
-          shuffleBlockResolver,
           context.taskMemoryManager(),
           unsafeShuffleHandle,
           mapId,
           context,
           env.conf,
-          metrics)
+          metrics,
+          shuffleExecutorComponents)
       case bypassMergeSortHandle: BypassMergeSortShuffleHandle[K @unchecked, V @unchecked] =>
         new BypassMergeSortShuffleWriter(
           env.blockManager,
           bypassMergeSortHandle,
           mapId,
-          context.taskAttemptId(),
           env.conf,
           metrics,
           shuffleExecutorComponents)
@@ -164,9 +183,9 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
 
   /** Remove a shuffle's metadata from the ShuffleManager. */
   override def unregisterShuffle(shuffleId: Int): Boolean = {
-    Option(numMapsForShuffle.remove(shuffleId)).foreach { numMaps =>
-      (0 until numMaps).foreach { mapId =>
-        shuffleBlockResolver.removeDataByMap(shuffleId, mapId)
+    Option(taskIdMapsForShuffle.remove(shuffleId)).foreach { mapTaskIds =>
+      mapTaskIds.iterator.foreach { mapTaskId =>
+        shuffleBlockResolver.removeDataByMap(shuffleId, mapTaskId)
       }
     }
     true
@@ -185,10 +204,26 @@ private[spark] object SortShuffleManager extends Logging {
    * The maximum number of shuffle output partitions that SortShuffleManager supports when
    * buffering map outputs in a serialized form. This is an extreme defensive programming measure,
    * since it's extremely unlikely that a single shuffle produces over 16 million output partitions.
-   * */
+   */
   val MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE =
     PackedRecordPointer.MAXIMUM_PARTITION_ID + 1
 
+  /**
+   * The local property key for continuous shuffle block fetching feature.
+   */
+  val FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED_KEY =
+    "__fetch_continuous_blocks_in_batch_enabled"
+
+  /**
+   * Helper method for determining whether a shuffle reader should fetch the continuous blocks
+   * in batch.
+   */
+  def canUseBatchFetch(startPartition: Int, endPartition: Int, context: TaskContext): Boolean = {
+    val fetchMultiPartitions = endPartition - startPartition > 1
+    fetchMultiPartitions &&
+      context.getLocalProperty(FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED_KEY) == "true"
+  }
+
   /**
    * Helper method for determining whether a shuffle should use an optimized serialized shuffle
    * path or whether it should fall back to the original path that operates on deserialized objects.
@@ -215,12 +250,13 @@ private[spark] object SortShuffleManager extends Logging {
   }
 
   private def loadShuffleExecutorComponents(conf: SparkConf): ShuffleExecutorComponents = {
-    val configuredPluginClasses = conf.get(config.SHUFFLE_IO_PLUGIN_CLASS)
-    val maybeIO = Utils.loadExtensions(
-      classOf[ShuffleDataIO], Seq(configuredPluginClasses), conf)
-    require(maybeIO.size == 1, s"Failed to load plugins of type $configuredPluginClasses")
-    val executorComponents = maybeIO.head.executor()
-    executorComponents.initializeExecutor(conf.getAppId, SparkEnv.get.executorId)
+    val executorComponents = ShuffleDataIOUtils.loadShuffleDataIO(conf).executor()
+    val extraConfigs = conf.getAllWithPrefix(ShuffleDataIOUtils.SHUFFLE_SPARK_CONF_PREFIX)
+        .toMap
+    executorComponents.initializeExecutor(
+      conf.getAppId,
+      SparkEnv.get.executorId,
+      extraConfigs.asJava)
     executorComponents
   }
 }
@@ -231,9 +267,8 @@ private[spark] object SortShuffleManager extends Logging {
  */
 private[spark] class SerializedShuffleHandle[K, V](
   shuffleId: Int,
-  numMaps: Int,
   dependency: ShuffleDependency[K, V, V])
-  extends BaseShuffleHandle(shuffleId, numMaps, dependency) {
+  extends BaseShuffleHandle(shuffleId, dependency) {
 }
 
 /**
@@ -242,7 +277,6 @@ private[spark] class SerializedShuffleHandle[K, V](
  */
 private[spark] class BypassMergeSortShuffleHandle[K, V](
   shuffleId: Int,
-  numMaps: Int,
   dependency: ShuffleDependency[K, V, V])
-  extends BaseShuffleHandle(shuffleId, numMaps, dependency) {
+  extends BaseShuffleHandle(shuffleId, dependency) {
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index a781b16252432..a391bdf2db44e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.collection.ExternalSorter
 private[spark] class SortShuffleWriter[K, V, C](
     shuffleBlockResolver: IndexShuffleBlockResolver,
     handle: BaseShuffleHandle[K, V, C],
-    mapId: Int,
+    mapId: Long,
     context: TaskContext,
     shuffleExecutorComponents: ShuffleExecutorComponents)
   extends ShuffleWriter[K, V] with Logging {
@@ -65,10 +65,10 @@ private[spark] class SortShuffleWriter[K, V, C](
     // because it just opens a single file, so is typically too fast to measure accurately
     // (see SPARK-3570).
     val mapOutputWriter = shuffleExecutorComponents.createMapOutputWriter(
-      dep.shuffleId, mapId, context.taskAttemptId(), dep.partitioner.numPartitions)
+      dep.shuffleId, mapId, dep.partitioner.numPartitions)
     sorter.writePartitionedMapOutput(dep.shuffleId, mapId, mapOutputWriter)
     val partitionLengths = mapOutputWriter.commitAllPartitions()
-    mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths)
+    mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths, mapId)
   }
 
   /** Close this writer, passing along whether the map completed */
diff --git a/core/src/main/scala/org/apache/spark/status/AppHistoryServerPlugin.scala b/core/src/main/scala/org/apache/spark/status/AppHistoryServerPlugin.scala
index d144a0e998fa1..2e9a31d5ac69c 100644
--- a/core/src/main/scala/org/apache/spark/status/AppHistoryServerPlugin.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppHistoryServerPlugin.scala
@@ -35,4 +35,9 @@ private[spark] trait AppHistoryServerPlugin {
    * Sets up UI of this plugin to rebuild the history UI.
    */
   def setupUI(ui: SparkUI): Unit
+
+  /**
+   * The position of a plugin tab relative to the other plugin tabs in the history UI.
+   */
+  def displayOrder: Int = Integer.MAX_VALUE
 }
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index c85b3caf8a5ef..c3f22f32993a8 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -71,7 +71,7 @@ private[spark] class AppStatusListener(
   // causing too many writes to the underlying store, and other expensive operations).
   private val liveStages = new ConcurrentHashMap[(Int, Int), LiveStage]()
   private val liveJobs = new HashMap[Int, LiveJob]()
-  private val liveExecutors = new HashMap[String, LiveExecutor]()
+  private[spark] val liveExecutors = new HashMap[String, LiveExecutor]()
   private val deadExecutors = new HashMap[String, LiveExecutor]()
   private val liveTasks = new HashMap[Long, LiveTask]()
   private val liveRDDs = new HashMap[Int, LiveRDD]()
@@ -234,8 +234,8 @@ private[spark] class AppStatusListener(
                 (partition.memoryUsed / partition.executors.length) * -1)
               rdd.diskUsed = addDeltaToValue(rdd.diskUsed,
                 (partition.diskUsed / partition.executors.length) * -1)
-              partition.update(partition.executors
-                .filter(!_.equals(event.executorId)), rdd.storageLevel,
+              partition.update(
+                partition.executors.filter(!_.equals(event.executorId)),
                 addDeltaToValue(partition.memoryUsed,
                   (partition.memoryUsed / partition.executors.length) * -1),
                 addDeltaToValue(partition.diskUsed,
@@ -355,6 +355,8 @@ private[spark] class AppStatusListener(
 
     val lastStageInfo = event.stageInfos.sortBy(_.stageId).lastOption
     val jobName = lastStageInfo.map(_.name).getOrElse("")
+    val description = Option(event.properties)
+      .flatMap { p => Option(p.getProperty(SparkContext.SPARK_JOB_DESCRIPTION)) }
     val jobGroup = Option(event.properties)
       .flatMap { p => Option(p.getProperty(SparkContext.SPARK_JOB_GROUP_ID)) }
     val sqlExecutionId = Option(event.properties)
@@ -363,6 +365,7 @@ private[spark] class AppStatusListener(
     val job = new LiveJob(
       event.jobId,
       jobName,
+      description,
       if (event.time > 0) Some(new Date(event.time)) else None,
       event.stageIds,
       jobGroup,
@@ -495,7 +498,7 @@ private[spark] class AppStatusListener(
 
     event.stageInfo.rddInfos.foreach { info =>
       if (info.storageLevel.isValid) {
-        liveUpdate(liveRDDs.getOrElseUpdate(info.id, new LiveRDD(info)), now)
+        liveUpdate(liveRDDs.getOrElseUpdate(info.id, new LiveRDD(info, info.storageLevel)), now)
       }
     }
 
@@ -769,6 +772,11 @@ private[spark] class AppStatusListener(
     event.maxOnHeapMem.foreach { _ =>
       exec.totalOnHeap = event.maxOnHeapMem.get
       exec.totalOffHeap = event.maxOffHeapMem.get
+      // SPARK-30594: whenever(first time or re-register) a BlockManager added, all blocks
+      // from this BlockManager will be reported to driver later. So, we should clean up
+      // used memory to avoid overlapped count.
+      exec.usedOnHeap = 0
+      exec.usedOffHeap = 0
     }
     exec.isActive = true
     exec.maxMemory = event.maxMem
@@ -916,12 +924,6 @@ private[spark] class AppStatusListener(
     val diskDelta = event.blockUpdatedInfo.diskSize * (if (storageLevel.useDisk) 1 else -1)
     val memoryDelta = event.blockUpdatedInfo.memSize * (if (storageLevel.useMemory) 1 else -1)
 
-    val updatedStorageLevel = if (storageLevel.isValid) {
-      Some(storageLevel.description)
-    } else {
-      None
-    }
-
     // We need information about the executor to update some memory accounting values in the
     // RDD info, so read that beforehand.
     val maybeExec = liveExecutors.get(executorId)
@@ -936,13 +938,9 @@ private[spark] class AppStatusListener(
     // Update the block entry in the RDD info, keeping track of the deltas above so that we
     // can update the executor information too.
     liveRDDs.get(block.rddId).foreach { rdd =>
-      if (updatedStorageLevel.isDefined) {
-        rdd.setStorageLevel(updatedStorageLevel.get)
-      }
-
       val partition = rdd.partition(block.name)
 
-      val executors = if (updatedStorageLevel.isDefined) {
+      val executors = if (storageLevel.isValid) {
         val current = partition.executors
         if (current.contains(executorId)) {
           current
@@ -957,7 +955,7 @@ private[spark] class AppStatusListener(
 
       // Only update the partition if it's still stored in some executor, otherwise get rid of it.
       if (executors.nonEmpty) {
-        partition.update(executors, rdd.storageLevel,
+        partition.update(executors,
           addDeltaToValue(partition.memoryUsed, memoryDelta),
           addDeltaToValue(partition.diskUsed, diskDelta))
       } else {
@@ -1049,7 +1047,7 @@ private[spark] class AppStatusListener(
     }
   }
 
-  private def updateExecutorMemoryDiskInfo(
+  private[spark] def updateExecutorMemoryDiskInfo(
       exec: LiveExecutor,
       storageLevel: StorageLevel,
       memoryDelta: Long,
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala b/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala
index f6a21578ff499..20f171bd3c375 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala
@@ -22,7 +22,7 @@ import AppStatusSource.getCounter
 import com.codahale.metrics.{Counter, Gauge, MetricRegistry}
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.config.Status.APP_STATUS_METRICS_ENABLED
+import org.apache.spark.internal.config.Status.METRICS_APP_STATUS_SOURCE_ENABLED
 import org.apache.spark.metrics.source.Source
 
 private [spark] class JobDuration(val value: AtomicLong) extends Gauge[Long] {
@@ -71,7 +71,7 @@ private[spark] object AppStatusSource {
   }
 
   def createSource(conf: SparkConf): Option[AppStatusSource] = {
-    Option(conf.get(APP_STATUS_METRICS_ENABLED))
+    Option(conf.get(METRICS_APP_STATUS_SOURCE_ENABLED))
       .filter(identity)
       .map { _ => new AppStatusSource() }
   }
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index 964ab27a524c4..6b89812cc2bf0 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -136,12 +136,6 @@ private[spark] class AppStatusStore(
     store.read(classOf[StageDataWrapper], Array(stageId, stageAttemptId)).locality
   }
 
-  // SPARK-26119: we only want to consider successful tasks when calculating the metrics summary,
-  // but currently this is very expensive when using a disk store. So we only trigger the slower
-  // code path when we know we have all data in memory. The following method checks whether all
-  // the data will be in memory.
-  private def isInMemoryStore: Boolean = store.isInstanceOf[InMemoryStore] || listener.isDefined
-
   /**
    * Calculates a summary of the task metrics for the given stage attempt, returning the
    * requested quantiles for the recorded metrics.
@@ -162,21 +156,11 @@ private[spark] class AppStatusStore(
     // cheaper for disk stores (avoids deserialization).
     val count = {
       Utils.tryWithResource(
-        if (isInMemoryStore) {
-          // For Live UI, we should count the tasks with status "SUCCESS" only.
-          store.view(classOf[TaskDataWrapper])
-            .parent(stageKey)
-            .index(TaskIndexNames.STATUS)
-            .first("SUCCESS")
-            .last("SUCCESS")
-            .closeableIterator()
-        } else {
-          store.view(classOf[TaskDataWrapper])
-            .parent(stageKey)
-            .index(TaskIndexNames.EXEC_RUN_TIME)
-            .first(0L)
-            .closeableIterator()
-        }
+        store.view(classOf[TaskDataWrapper])
+          .parent(stageKey)
+          .index(TaskIndexNames.EXEC_RUN_TIME)
+          .first(0L)
+          .closeableIterator()
       ) { it =>
         var _count = 0L
         while (it.hasNext()) {
@@ -245,50 +229,30 @@ private[spark] class AppStatusStore(
     // stabilize once the stage finishes. It's also slow, especially with disk stores.
     val indices = quantiles.map { q => math.min((q * count).toLong, count - 1) }
 
-    // TODO: Summary metrics needs to display all the successful tasks' metrics (SPARK-26119).
-    // For InMemory case, it is efficient to find using the following code. But for diskStore case
-    // we need an efficient solution to avoid deserialization time overhead. For that, we need to
-    // rework on the way indexing works, so that we can index by specific metrics for successful
-    // and failed tasks differently (would be tricky). Also would require changing the disk store
-    // version (to invalidate old stores).
     def scanTasks(index: String)(fn: TaskDataWrapper => Long): IndexedSeq[Double] = {
-      if (isInMemoryStore) {
-        val quantileTasks = store.view(classOf[TaskDataWrapper])
+      Utils.tryWithResource(
+        store.view(classOf[TaskDataWrapper])
           .parent(stageKey)
           .index(index)
           .first(0L)
-          .asScala
-          .filter { _.status == "SUCCESS"} // Filter "SUCCESS" tasks
-          .toIndexedSeq
-
-        indices.map { index =>
-          fn(quantileTasks(index.toInt)).toDouble
-        }.toIndexedSeq
-      } else {
-        Utils.tryWithResource(
-          store.view(classOf[TaskDataWrapper])
-            .parent(stageKey)
-            .index(index)
-            .first(0L)
-            .closeableIterator()
-        ) { it =>
-          var last = Double.NaN
-          var currentIdx = -1L
-          indices.map { idx =>
-            if (idx == currentIdx) {
+          .closeableIterator()
+      ) { it =>
+        var last = Double.NaN
+        var currentIdx = -1L
+        indices.map { idx =>
+          if (idx == currentIdx) {
+            last
+          } else {
+            val diff = idx - currentIdx
+            currentIdx = idx
+            if (it.skip(diff - 1)) {
+              last = fn(it.next()).toDouble
               last
             } else {
-              val diff = idx - currentIdx
-              currentIdx = idx
-              if (it.skip(diff - 1)) {
-                last = fn(it.next()).toDouble
-                last
-              } else {
-                Double.NaN
-              }
+              Double.NaN
             }
-          }.toIndexedSeq
-        }
+          }
+        }.toIndexedSeq
       }
     }
 
@@ -582,7 +546,7 @@ private[spark] class AppStatusStore(
 
 private[spark] object AppStatusStore {
 
-  val CURRENT_VERSION = 1L
+  val CURRENT_VERSION = 2L
 
   /**
    * Create an in-memory store for a live application.
diff --git a/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala b/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala
index 38cb030297c81..1b8dc9c8275ad 100644
--- a/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/ElementTrackingStore.scala
@@ -18,14 +18,12 @@
 package org.apache.spark.status
 
 import java.util.Collection
-import java.util.concurrent.TimeUnit
+import java.util.concurrent.{ExecutorService, TimeUnit}
 import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{HashMap, ListBuffer}
 
-import com.google.common.util.concurrent.MoreExecutors
-
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.status.ElementTrackingStore._
@@ -72,10 +70,10 @@ private[spark] class ElementTrackingStore(store: KVStore, conf: SparkConf) exten
 
   private val triggers = new HashMap[Class[_], LatchedTriggers]()
   private val flushTriggers = new ListBuffer[() => Unit]()
-  private val executor = if (conf.get(ASYNC_TRACKING_ENABLED)) {
+  private val executor: ExecutorService = if (conf.get(ASYNC_TRACKING_ENABLED)) {
     ThreadUtils.newDaemonSingleThreadExecutor("element-tracking-store-worker")
   } else {
-    MoreExecutors.sameThreadExecutor()
+    ThreadUtils.sameThreadExecutorService
   }
 
   @volatile private var stopped = false
diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
index aa4a21c1bb818..2714f30de14f0 100644
--- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
@@ -20,6 +20,7 @@ package org.apache.spark.status
 import java.util.Date
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.collection.JavaConverters._
 import scala.collection.immutable.{HashSet, TreeSet}
 import scala.collection.mutable.HashMap
 
@@ -30,7 +31,7 @@ import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.scheduler.{AccumulableInfo, StageInfo, TaskInfo}
 import org.apache.spark.status.api.v1
-import org.apache.spark.storage.RDDInfo
+import org.apache.spark.storage.{RDDInfo, StorageLevel}
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.AccumulatorContext
 import org.apache.spark.util.collection.OpenHashSet
@@ -62,6 +63,7 @@ private[spark] abstract class LiveEntity {
 private class LiveJob(
     val jobId: Int,
     name: String,
+    description: Option[String],
     val submissionTime: Option[Date],
     val stageIds: Seq[Int],
     jobGroup: Option[String],
@@ -92,7 +94,7 @@ private class LiveJob(
     val info = new v1.JobData(
       jobId,
       name,
-      None, // description is always None?
+      description,
       submissionTime,
       completionTime,
       stageIds,
@@ -183,6 +185,19 @@ private class LiveTask(
       info.timeRunning(lastUpdateTime.getOrElse(System.currentTimeMillis()))
     }
 
+    val hasMetrics = metrics.executorDeserializeTime >= 0
+
+    /**
+     * SPARK-26260: For non successful tasks, store the metrics as negative to avoid
+     * the calculation in the task summary. `toApi` method in the `TaskDataWrapper` will make
+     * it actual value.
+     */
+    val taskMetrics: v1.TaskMetrics = if (hasMetrics && !info.successful) {
+      makeNegative(metrics)
+    } else {
+      metrics
+    }
+
     new TaskDataWrapper(
       info.taskId,
       info.index,
@@ -198,30 +213,31 @@ private class LiveTask(
       newAccumulatorInfos(info.accumulables),
       errorMessage,
 
-      metrics.executorDeserializeTime,
-      metrics.executorDeserializeCpuTime,
-      metrics.executorRunTime,
-      metrics.executorCpuTime,
-      metrics.resultSize,
-      metrics.jvmGcTime,
-      metrics.resultSerializationTime,
-      metrics.memoryBytesSpilled,
-      metrics.diskBytesSpilled,
-      metrics.peakExecutionMemory,
-      metrics.inputMetrics.bytesRead,
-      metrics.inputMetrics.recordsRead,
-      metrics.outputMetrics.bytesWritten,
-      metrics.outputMetrics.recordsWritten,
-      metrics.shuffleReadMetrics.remoteBlocksFetched,
-      metrics.shuffleReadMetrics.localBlocksFetched,
-      metrics.shuffleReadMetrics.fetchWaitTime,
-      metrics.shuffleReadMetrics.remoteBytesRead,
-      metrics.shuffleReadMetrics.remoteBytesReadToDisk,
-      metrics.shuffleReadMetrics.localBytesRead,
-      metrics.shuffleReadMetrics.recordsRead,
-      metrics.shuffleWriteMetrics.bytesWritten,
-      metrics.shuffleWriteMetrics.writeTime,
-      metrics.shuffleWriteMetrics.recordsWritten,
+      hasMetrics,
+      taskMetrics.executorDeserializeTime,
+      taskMetrics.executorDeserializeCpuTime,
+      taskMetrics.executorRunTime,
+      taskMetrics.executorCpuTime,
+      taskMetrics.resultSize,
+      taskMetrics.jvmGcTime,
+      taskMetrics.resultSerializationTime,
+      taskMetrics.memoryBytesSpilled,
+      taskMetrics.diskBytesSpilled,
+      taskMetrics.peakExecutionMemory,
+      taskMetrics.inputMetrics.bytesRead,
+      taskMetrics.inputMetrics.recordsRead,
+      taskMetrics.outputMetrics.bytesWritten,
+      taskMetrics.outputMetrics.recordsWritten,
+      taskMetrics.shuffleReadMetrics.remoteBlocksFetched,
+      taskMetrics.shuffleReadMetrics.localBlocksFetched,
+      taskMetrics.shuffleReadMetrics.fetchWaitTime,
+      taskMetrics.shuffleReadMetrics.remoteBytesRead,
+      taskMetrics.shuffleReadMetrics.remoteBytesReadToDisk,
+      taskMetrics.shuffleReadMetrics.localBytesRead,
+      taskMetrics.shuffleReadMetrics.recordsRead,
+      taskMetrics.shuffleWriteMetrics.bytesWritten,
+      taskMetrics.shuffleWriteMetrics.writeTime,
+      taskMetrics.shuffleWriteMetrics.recordsWritten,
 
       stageId,
       stageAttemptId)
@@ -229,7 +245,7 @@ private class LiveTask(
 
 }
 
-private class LiveExecutor(val executorId: String, _addTime: Long) extends LiveEntity {
+private[spark] class LiveExecutor(val executorId: String, _addTime: Long) extends LiveEntity {
 
   var hostPort: String = null
   var host: String = null
@@ -458,7 +474,13 @@ private class LiveStage extends LiveEntity {
 
 }
 
-private class LiveRDDPartition(val blockName: String) {
+/**
+ * Data about a single partition of a cached RDD. The RDD storage level is used to compute the
+ * effective storage level of the partition, which takes into account the storage actually being
+ * used by the partition in the executors, and thus may differ from the storage level requested
+ * by the application.
+ */
+private class LiveRDDPartition(val blockName: String, rddLevel: StorageLevel) {
 
   import LiveEntityHelpers._
 
@@ -476,12 +498,13 @@ private class LiveRDDPartition(val blockName: String) {
 
   def update(
       executors: Seq[String],
-      storageLevel: String,
       memoryUsed: Long,
       diskUsed: Long): Unit = {
+    val level = StorageLevel(diskUsed > 0, memoryUsed > 0, rddLevel.useOffHeap,
+      if (memoryUsed > 0) rddLevel.deserialized else false, executors.size)
     value = new v1.RDDPartitionInfo(
       blockName,
-      weakIntern(storageLevel),
+      weakIntern(level.description),
       memoryUsed,
       diskUsed,
       executors)
@@ -520,27 +543,31 @@ private class LiveRDDDistribution(exec: LiveExecutor) {
 
 }
 
-private class LiveRDD(val info: RDDInfo) extends LiveEntity {
+/**
+ * Tracker for data related to a persisted RDD.
+ *
+ * The RDD storage level is immutable, following the current behavior of `RDD.persist()`, even
+ * though it is mutable in the `RDDInfo` structure. Since the listener does not track unpersisted
+ * RDDs, this covers the case where an early stage is run on the unpersisted RDD, and a later stage
+ * it started after the RDD is marked for caching.
+ */
+private class LiveRDD(val info: RDDInfo, storageLevel: StorageLevel) extends LiveEntity {
 
   import LiveEntityHelpers._
 
-  var storageLevel: String = weakIntern(info.storageLevel.description)
   var memoryUsed = 0L
   var diskUsed = 0L
 
+  private val levelDescription = weakIntern(storageLevel.description)
   private val partitions = new HashMap[String, LiveRDDPartition]()
   private val partitionSeq = new RDDPartitionSeq()
 
   private val distributions = new HashMap[String, LiveRDDDistribution]()
 
-  def setStorageLevel(level: String): Unit = {
-    this.storageLevel = weakIntern(level)
-  }
-
   def partition(blockName: String): LiveRDDPartition = {
     partitions.getOrElseUpdate(blockName, {
-      val part = new LiveRDDPartition(blockName)
-      part.update(Nil, storageLevel, 0L, 0L)
+      val part = new LiveRDDPartition(blockName, storageLevel)
+      part.update(Nil, 0L, 0L)
       partitionSeq.addPartition(part)
       part
     })
@@ -578,7 +605,7 @@ private class LiveRDD(val info: RDDInfo) extends LiveEntity {
       info.name,
       info.numPartitions,
       partitions.size,
-      storageLevel,
+      levelDescription,
       memoryUsed,
       diskUsed,
       dists,
@@ -599,10 +626,22 @@ private class SchedulerPool(name: String) extends LiveEntity {
 
 }
 
-private object LiveEntityHelpers {
+private[spark] object LiveEntityHelpers {
 
   private val stringInterner = Interners.newWeakInterner[String]()
 
+  private def accuValuetoString(value: Any): String = value match {
+    case list: java.util.List[_] =>
+      // SPARK-30379: For collection accumulator, string representation might
+      // takes much more memory (e.g. long => string of it) and cause OOM.
+      // So we only show first few elements.
+      if (list.size() > 5) {
+        list.asScala.take(5).mkString("[", ",", "," + "... " + (list.size() - 5) + " more items]")
+      } else {
+        list.toString
+      }
+    case _ => value.toString
+  }
 
   def newAccumulatorInfos(accums: Iterable[AccumulableInfo]): Seq[v1.AccumulableInfo] = {
     accums
@@ -615,8 +654,8 @@ private object LiveEntityHelpers {
         new v1.AccumulableInfo(
           acc.id,
           acc.name.map(weakIntern).orNull,
-          acc.update.map(_.toString()),
-          acc.value.map(_.toString()).orNull)
+          acc.update.map(accuValuetoString),
+          acc.value.map(accuValuetoString).orNull)
       }
       .toSeq
   }
@@ -698,6 +737,46 @@ private object LiveEntityHelpers {
     addMetrics(m1, m2, -1)
   }
 
+  /**
+   * Convert all the metric values to negative as well as handle zero values.
+   * This method assumes that all the metric values are greater than or equal to zero
+   */
+  def makeNegative(m: v1.TaskMetrics): v1.TaskMetrics = {
+    // To handle 0 metric value, add  1 and make the metric negative.
+    // To recover actual value do `math.abs(metric + 1)`
+    // Eg: if the metric values are (5, 3, 0, 1) => Updated metric values will be (-6, -4, -1, -2)
+    // To get actual metric value, do math.abs(metric + 1) => (5, 3, 0, 1)
+    def updateMetricValue(metric: Long): Long = {
+      metric * -1L - 1L
+    }
+
+    createMetrics(
+      updateMetricValue(m.executorDeserializeTime),
+      updateMetricValue(m.executorDeserializeCpuTime),
+      updateMetricValue(m.executorRunTime),
+      updateMetricValue(m.executorCpuTime),
+      updateMetricValue(m.resultSize),
+      updateMetricValue(m.jvmGcTime),
+      updateMetricValue(m.resultSerializationTime),
+      updateMetricValue(m.memoryBytesSpilled),
+      updateMetricValue(m.diskBytesSpilled),
+      updateMetricValue(m.peakExecutionMemory),
+      updateMetricValue(m.inputMetrics.bytesRead),
+      updateMetricValue(m.inputMetrics.recordsRead),
+      updateMetricValue(m.outputMetrics.bytesWritten),
+      updateMetricValue(m.outputMetrics.recordsWritten),
+      updateMetricValue(m.shuffleReadMetrics.remoteBlocksFetched),
+      updateMetricValue(m.shuffleReadMetrics.localBlocksFetched),
+      updateMetricValue(m.shuffleReadMetrics.fetchWaitTime),
+      updateMetricValue(m.shuffleReadMetrics.remoteBytesRead),
+      updateMetricValue(m.shuffleReadMetrics.remoteBytesReadToDisk),
+      updateMetricValue(m.shuffleReadMetrics.localBytesRead),
+      updateMetricValue(m.shuffleReadMetrics.recordsRead),
+      updateMetricValue(m.shuffleWriteMetrics.bytesWritten),
+      updateMetricValue(m.shuffleWriteMetrics.writeTime),
+      updateMetricValue(m.shuffleWriteMetrics.recordsWritten))
+  }
+
   private def addMetrics(m1: v1.TaskMetrics, m2: v1.TaskMetrics, mult: Int): v1.TaskMetrics = {
     createMetrics(
       m1.executorDeserializeTime + m2.executorDeserializeTime * mult,
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
index 2ee9d3d0815a1..cf5c759bebdbb 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
@@ -25,6 +25,8 @@ import javax.ws.rs.core.{MediaType, Response, StreamingOutput}
 import scala.util.control.NonFatal
 
 import org.apache.spark.{JobExecutionStatus, SparkContext}
+import org.apache.spark.status.api.v1
+import org.apache.spark.util.Utils
 
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class AbstractApplicationResource extends BaseAppResource {
@@ -97,7 +99,15 @@ private[v1] class AbstractApplicationResource extends BaseAppResource {
 
   @GET
   @Path("environment")
-  def environmentInfo(): ApplicationEnvironmentInfo = withUI(_.store.environmentInfo())
+  def environmentInfo(): ApplicationEnvironmentInfo = withUI { ui =>
+    val envInfo = ui.store.environmentInfo()
+    new v1.ApplicationEnvironmentInfo(
+      envInfo.runtime,
+      Utils.redact(ui.conf, envInfo.sparkProperties),
+      Utils.redact(ui.conf, envInfo.hadoopProperties),
+      Utils.redact(ui.conf, envInfo.systemProperties),
+      envInfo.classpathEntries)
+  }
 
   @GET
   @Path("logs")
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/PrometheusResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/PrometheusResource.scala
new file mode 100644
index 0000000000000..f9fb78e65a3d9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/PrometheusResource.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs._
+import javax.ws.rs.core.MediaType
+
+import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
+import org.glassfish.jersey.server.ServerProperties
+import org.glassfish.jersey.servlet.ServletContainer
+
+import org.apache.spark.ui.SparkUI
+
+/**
+ * This aims to expose Executor metrics like REST API which is documented in
+ *
+ *    https://spark.apache.org/docs/3.0.0/monitoring.html#executor-metrics
+ *
+ * Note that this is based on ExecutorSummary which is different from ExecutorSource.
+ */
+@Path("/executors")
+private[v1] class PrometheusResource extends ApiRequestContext {
+  @GET
+  @Path("prometheus")
+  @Produces(Array(MediaType.TEXT_PLAIN))
+  def executors(): String = {
+    val sb = new StringBuilder
+    val store = uiRoot.asInstanceOf[SparkUI].store
+    store.executorList(true).foreach { executor =>
+      val prefix = "metrics_executor_"
+      val labels = Seq(
+        "application_id" -> store.applicationInfo.id,
+        "application_name" -> store.applicationInfo.name,
+        "executor_id" -> executor.id
+      ).map { case (k, v) => s"""$k="$v"""" }.mkString("{", ", ", "}")
+      sb.append(s"${prefix}rddBlocks_Count$labels ${executor.rddBlocks}\n")
+      sb.append(s"${prefix}memoryUsed_Count$labels ${executor.memoryUsed}\n")
+      sb.append(s"${prefix}diskUsed_Count$labels ${executor.diskUsed}\n")
+      sb.append(s"${prefix}totalCores_Count$labels ${executor.totalCores}\n")
+      sb.append(s"${prefix}maxTasks_Count$labels ${executor.maxTasks}\n")
+      sb.append(s"${prefix}activeTasks_Count$labels ${executor.activeTasks}\n")
+      sb.append(s"${prefix}failedTasks_Count$labels ${executor.failedTasks}\n")
+      sb.append(s"${prefix}completedTasks_Count$labels ${executor.completedTasks}\n")
+      sb.append(s"${prefix}totalTasks_Count$labels ${executor.totalTasks}\n")
+      sb.append(s"${prefix}totalDuration_Value$labels ${executor.totalDuration}\n")
+      sb.append(s"${prefix}totalGCTime_Value$labels ${executor.totalGCTime}\n")
+      sb.append(s"${prefix}totalInputBytes_Count$labels ${executor.totalInputBytes}\n")
+      sb.append(s"${prefix}totalShuffleRead_Count$labels ${executor.totalShuffleRead}\n")
+      sb.append(s"${prefix}totalShuffleWrite_Count$labels ${executor.totalShuffleWrite}\n")
+      sb.append(s"${prefix}maxMemory_Count$labels ${executor.maxMemory}\n")
+      executor.executorLogs.foreach { case (k, v) => }
+      executor.memoryMetrics.foreach { m =>
+        sb.append(s"${prefix}usedOnHeapStorageMemory_Count$labels ${m.usedOnHeapStorageMemory}\n")
+        sb.append(s"${prefix}usedOffHeapStorageMemory_Count$labels ${m.usedOffHeapStorageMemory}\n")
+        sb.append(s"${prefix}totalOnHeapStorageMemory_Count$labels ${m.totalOnHeapStorageMemory}\n")
+        sb.append(s"${prefix}totalOffHeapStorageMemory_Count$labels " +
+          s"${m.totalOffHeapStorageMemory}\n")
+      }
+      executor.peakMemoryMetrics.foreach { m =>
+        val names = Array(
+          "JVMHeapMemory",
+          "JVMOffHeapMemory",
+          "OnHeapExecutionMemory",
+          "OffHeapExecutionMemory",
+          "OnHeapStorageMemory",
+          "OffHeapStorageMemory",
+          "OnHeapUnifiedMemory",
+          "OffHeapUnifiedMemory",
+          "DirectPoolMemory",
+          "MappedPoolMemory",
+          "ProcessTreeJVMVMemory",
+          "ProcessTreeJVMRSSMemory",
+          "ProcessTreePythonVMemory",
+          "ProcessTreePythonRSSMemory",
+          "ProcessTreeOtherVMemory",
+          "ProcessTreeOtherRSSMemory",
+          "MinorGCCount",
+          "MinorGCTime",
+          "MajorGCCount",
+          "MajorGCTime"
+        )
+        names.foreach { name =>
+          sb.append(s"$prefix${name}_Count$labels ${m.getMetricValue(name)}\n")
+        }
+      }
+    }
+    sb.toString
+  }
+}
+
+private[spark] object PrometheusResource {
+  def getServletHandler(uiRoot: UIRoot): ServletContextHandler = {
+    val jerseyContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS)
+    jerseyContext.setContextPath("/metrics")
+    val holder: ServletHolder = new ServletHolder(classOf[ServletContainer])
+    holder.setInitParameter(ServerProperties.PROVIDER_PACKAGES, "org.apache.spark.status.api.v1")
+    UIRootFromServletContext.setUiRoot(jerseyContext, uiRoot)
+    jerseyContext.addServlet(holder, "/*")
+    jerseyContext
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
index 9da5bea8bf5c4..f0a94d84d8a04 100644
--- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala
+++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
@@ -177,10 +177,13 @@ private[spark] class TaskDataWrapper(
     val accumulatorUpdates: Seq[AccumulableInfo],
     val errorMessage: Option[String],
 
+    val hasMetrics: Boolean,
     // The following is an exploded view of a TaskMetrics API object. This saves 5 objects
-    // (= 80 bytes of Java object overhead) per instance of this wrapper. If the first value
-    // (executorDeserializeTime) is -1L, it means the metrics for this task have not been
-    // recorded.
+    // (= 80 bytes of Java object overhead) per instance of this wrapper. Non successful
+    // tasks' metrics will have negative values in `TaskDataWrapper`. `TaskData` will have
+    // actual metric values. To recover the actual metric value from `TaskDataWrapper`,
+    // need use `getMetricValue` method. If `hasMetrics` is false, it means the metrics
+    // for this task have not been recorded.
     @KVIndexParam(value = TaskIndexNames.DESER_TIME, parent = TaskIndexNames.STAGE)
     val executorDeserializeTime: Long,
     @KVIndexParam(value = TaskIndexNames.DESER_CPU_TIME, parent = TaskIndexNames.STAGE)
@@ -233,39 +236,46 @@ private[spark] class TaskDataWrapper(
     val stageId: Int,
     val stageAttemptId: Int) {
 
-  def hasMetrics: Boolean = executorDeserializeTime >= 0
+  // SPARK-26260: To handle non successful tasks metrics (Running, Failed, Killed).
+  private def getMetricValue(metric: Long): Long = {
+    if (status != "SUCCESS") {
+      math.abs(metric + 1)
+    } else {
+      metric
+    }
+  }
 
   def toApi: TaskData = {
     val metrics = if (hasMetrics) {
       Some(new TaskMetrics(
-        executorDeserializeTime,
-        executorDeserializeCpuTime,
-        executorRunTime,
-        executorCpuTime,
-        resultSize,
-        jvmGcTime,
-        resultSerializationTime,
-        memoryBytesSpilled,
-        diskBytesSpilled,
-        peakExecutionMemory,
+        getMetricValue(executorDeserializeTime),
+        getMetricValue(executorDeserializeCpuTime),
+        getMetricValue(executorRunTime),
+        getMetricValue(executorCpuTime),
+        getMetricValue(resultSize),
+        getMetricValue(jvmGcTime),
+        getMetricValue(resultSerializationTime),
+        getMetricValue(memoryBytesSpilled),
+        getMetricValue(diskBytesSpilled),
+        getMetricValue(peakExecutionMemory),
         new InputMetrics(
-          inputBytesRead,
-          inputRecordsRead),
+          getMetricValue(inputBytesRead),
+          getMetricValue(inputRecordsRead)),
         new OutputMetrics(
-          outputBytesWritten,
-          outputRecordsWritten),
+          getMetricValue(outputBytesWritten),
+          getMetricValue(outputRecordsWritten)),
         new ShuffleReadMetrics(
-          shuffleRemoteBlocksFetched,
-          shuffleLocalBlocksFetched,
-          shuffleFetchWaitTime,
-          shuffleRemoteBytesRead,
-          shuffleRemoteBytesReadToDisk,
-          shuffleLocalBytesRead,
-          shuffleRecordsRead),
+          getMetricValue(shuffleRemoteBlocksFetched),
+          getMetricValue(shuffleLocalBlocksFetched),
+          getMetricValue(shuffleFetchWaitTime),
+          getMetricValue(shuffleRemoteBytesRead),
+          getMetricValue(shuffleRemoteBytesReadToDisk),
+          getMetricValue(shuffleLocalBytesRead),
+          getMetricValue(shuffleRecordsRead)),
         new ShuffleWriteMetrics(
-          shuffleBytesWritten,
-          shuffleWriteTime,
-          shuffleRecordsWritten)))
+          getMetricValue(shuffleBytesWritten),
+          getMetricValue(shuffleWriteTime),
+          getMetricValue(shuffleRecordsWritten))))
     } else {
       None
     }
@@ -296,8 +306,10 @@ private[spark] class TaskDataWrapper(
   @JsonIgnore @KVIndex(value = TaskIndexNames.SCHEDULER_DELAY, parent = TaskIndexNames.STAGE)
   def schedulerDelay: Long = {
     if (hasMetrics) {
-      AppStatusUtils.schedulerDelay(launchTime, resultFetchStart, duration, executorDeserializeTime,
-        resultSerializationTime, executorRunTime)
+      AppStatusUtils.schedulerDelay(launchTime, resultFetchStart, duration,
+        getMetricValue(executorDeserializeTime),
+        getMetricValue(resultSerializationTime),
+        getMetricValue(executorRunTime))
     } else {
       -1L
     }
@@ -330,7 +342,7 @@ private[spark] class TaskDataWrapper(
   @JsonIgnore @KVIndex(value = TaskIndexNames.SHUFFLE_TOTAL_READS, parent = TaskIndexNames.STAGE)
   private def shuffleTotalReads: Long = {
     if (hasMetrics) {
-      shuffleLocalBytesRead + shuffleRemoteBytesRead
+      getMetricValue(shuffleLocalBytesRead) + getMetricValue(shuffleRemoteBytesRead)
     } else {
       -1L
     }
@@ -339,7 +351,7 @@ private[spark] class TaskDataWrapper(
   @JsonIgnore @KVIndex(value = TaskIndexNames.SHUFFLE_TOTAL_BLOCKS, parent = TaskIndexNames.STAGE)
   private def shuffleTotalBlocks: Long = {
     if (hasMetrics) {
-      shuffleLocalBlocksFetched + shuffleRemoteBlocksFetched
+      getMetricValue(shuffleLocalBlocksFetched) + getMetricValue(shuffleRemoteBlocksFetched)
     } else {
       -1L
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
index 7ac2c71c18eb3..68ed3aa5b062f 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
@@ -38,7 +38,7 @@ sealed abstract class BlockId {
   // convenience methods
   def asRDDId: Option[RDDBlockId] = if (isRDD) Some(asInstanceOf[RDDBlockId]) else None
   def isRDD: Boolean = isInstanceOf[RDDBlockId]
-  def isShuffle: Boolean = isInstanceOf[ShuffleBlockId]
+  def isShuffle: Boolean = isInstanceOf[ShuffleBlockId] || isInstanceOf[ShuffleBlockBatchId]
   def isBroadcast: Boolean = isInstanceOf[BroadcastBlockId]
 
   override def toString: String = name
@@ -52,17 +52,29 @@ case class RDDBlockId(rddId: Int, splitIndex: Int) extends BlockId {
 // Format of the shuffle block ids (including data and index) should be kept in sync with
 // org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getBlockData().
 @DeveloperApi
-case class ShuffleBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
+case class ShuffleBlockId(shuffleId: Int, mapId: Long, reduceId: Int) extends BlockId {
   override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId
 }
 
+// The batch id of continuous shuffle blocks of same mapId in range [startReduceId, endReduceId).
 @DeveloperApi
-case class ShuffleDataBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
+case class ShuffleBlockBatchId(
+    shuffleId: Int,
+    mapId: Long,
+    startReduceId: Int,
+    endReduceId: Int) extends BlockId {
+  override def name: String = {
+    "shuffle_" + shuffleId + "_" + mapId + "_" + startReduceId + "_" + endReduceId
+  }
+}
+
+@DeveloperApi
+case class ShuffleDataBlockId(shuffleId: Int, mapId: Long, reduceId: Int) extends BlockId {
   override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".data"
 }
 
 @DeveloperApi
-case class ShuffleIndexBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
+case class ShuffleIndexBlockId(shuffleId: Int, mapId: Long, reduceId: Int) extends BlockId {
   override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".index"
 }
 
@@ -104,6 +116,7 @@ class UnrecognizedBlockId(name: String)
 object BlockId {
   val RDD = "rdd_([0-9]+)_([0-9]+)".r
   val SHUFFLE = "shuffle_([0-9]+)_([0-9]+)_([0-9]+)".r
+  val SHUFFLE_BATCH = "shuffle_([0-9]+)_([0-9]+)_([0-9]+)_([0-9]+)".r
   val SHUFFLE_DATA = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).data".r
   val SHUFFLE_INDEX = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).index".r
   val BROADCAST = "broadcast_([0-9]+)([_A-Za-z0-9]*)".r
@@ -117,11 +130,13 @@ object BlockId {
     case RDD(rddId, splitIndex) =>
       RDDBlockId(rddId.toInt, splitIndex.toInt)
     case SHUFFLE(shuffleId, mapId, reduceId) =>
-      ShuffleBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
+      ShuffleBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt)
+    case SHUFFLE_BATCH(shuffleId, mapId, startReduceId, endReduceId) =>
+      ShuffleBlockBatchId(shuffleId.toInt, mapId.toLong, startReduceId.toInt, endReduceId.toInt)
     case SHUFFLE_DATA(shuffleId, mapId, reduceId) =>
-      ShuffleDataBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
+      ShuffleDataBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt)
     case SHUFFLE_INDEX(shuffleId, mapId, reduceId) =>
-      ShuffleIndexBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
+      ShuffleIndexBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt)
     case BROADCAST(broadcastId, field) =>
       BroadcastBlockId(broadcastId.toLong, field.stripPrefix("_"))
     case TASKRESULT(taskId) =>
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 4b71dc1fff345..c47901314f53a 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -22,17 +22,18 @@ import java.lang.ref.{ReferenceQueue => JReferenceQueue, WeakReference}
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
 import java.util.Collections
-import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
+import java.util.concurrent.{CompletableFuture, ConcurrentHashMap, TimeUnit}
 
 import scala.collection.mutable
 import scala.collection.mutable.HashMap
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration._
 import scala.reflect.ClassTag
-import scala.util.Random
+import scala.util.{Failure, Random, Success, Try}
 import scala.util.control.NonFatal
 
 import com.codahale.metrics.{MetricRegistry, MetricSet}
+import com.google.common.cache.CacheBuilder
 import org.apache.commons.io.IOUtils
 
 import org.apache.spark._
@@ -113,6 +114,47 @@ private[spark] class ByteBufferBlockData(
 
 }
 
+private[spark] class HostLocalDirManager(
+    futureExecutionContext: ExecutionContext,
+    cacheSize: Int,
+    externalBlockStoreClient: ExternalBlockStoreClient,
+    host: String,
+    externalShuffleServicePort: Int) extends Logging {
+
+  private val executorIdToLocalDirsCache =
+    CacheBuilder
+      .newBuilder()
+      .maximumSize(cacheSize)
+      .build[String, Array[String]]()
+
+  private[spark] def getCachedHostLocalDirs()
+      : scala.collection.Map[String, Array[String]] = executorIdToLocalDirsCache.synchronized {
+    import scala.collection.JavaConverters._
+    return executorIdToLocalDirsCache.asMap().asScala
+  }
+
+  private[spark] def getHostLocalDirs(
+      executorIds: Array[String])(
+      callback: Try[java.util.Map[String, Array[String]]] => Unit): Unit = {
+    val hostLocalDirsCompletable = new CompletableFuture[java.util.Map[String, Array[String]]]
+    externalBlockStoreClient.getHostLocalDirs(
+      host,
+      externalShuffleServicePort,
+      executorIds,
+      hostLocalDirsCompletable)
+    hostLocalDirsCompletable.whenComplete { (hostLocalDirs, throwable) =>
+      if (hostLocalDirs != null) {
+        callback(Success(hostLocalDirs))
+        executorIdToLocalDirsCache.synchronized {
+          executorIdToLocalDirsCache.putAll(hostLocalDirs)
+        }
+      } else {
+        callback(Failure(throwable))
+      }
+    }
+  }
+}
+
 /**
  * Manager running on every node (driver and executors) which provides interfaces for putting and
  * retrieving blocks both locally and remotely into various stores (memory, disk, and off-heap).
@@ -206,6 +248,8 @@ private[spark] class BlockManager(
     new BlockManager.RemoteBlockDownloadFileManager(this)
   private val maxRemoteBlockToMem = conf.get(config.MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM)
 
+  var hostLocalDirManager: Option[HostLocalDirManager] = None
+
   /**
    * Abstraction for storing blocks from bytes, whether they start in memory or on disk.
    *
@@ -433,6 +477,21 @@ private[spark] class BlockManager(
       registerWithExternalShuffleServer()
     }
 
+    hostLocalDirManager =
+      if (conf.get(config.SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED) &&
+          !conf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)) {
+        externalBlockStoreClient.map { blockStoreClient =>
+          new HostLocalDirManager(
+            futureExecutionContext,
+            conf.get(config.STORAGE_LOCAL_DISK_BY_EXECUTORS_CACHE_SIZE),
+            blockStoreClient,
+            blockManagerId.host,
+            externalShuffleServicePort)
+        }
+      } else {
+        None
+      }
+
     logInfo(s"Initialized BlockManager: $blockManagerId")
   }
 
@@ -446,7 +505,7 @@ private[spark] class BlockManager(
     }
   }
 
-  private def registerWithExternalShuffleServer() {
+  private def registerWithExternalShuffleServer(): Unit = {
     logInfo("Registering executor with local external shuffle service.")
     val shuffleConfig = new ExecutorShuffleInfo(
       diskBlockManager.localDirsString,
@@ -542,13 +601,19 @@ private[spark] class BlockManager(
     }
   }
 
+  override def getHostLocalShuffleData(
+      blockId: BlockId,
+      dirs: Array[String]): ManagedBuffer = {
+    shuffleManager.shuffleBlockResolver.getBlockData(blockId, Some(dirs))
+  }
+
   /**
    * Interface to get local block data. Throws an exception if the block cannot be found or
    * cannot be read successfully.
    */
-  override def getBlockData(blockId: BlockId): ManagedBuffer = {
+  override def getLocalBlockData(blockId: BlockId): ManagedBuffer = {
     if (blockId.isShuffle) {
-      shuffleManager.shuffleBlockResolver.getBlockData(blockId.asInstanceOf[ShuffleBlockId])
+      shuffleManager.shuffleBlockResolver.getBlockData(blockId)
     } else {
       getLocalBytes(blockId) match {
         case Some(blockData) =>
@@ -853,7 +918,6 @@ private[spark] class BlockManager(
    * @param bufferTransformer this transformer expected to open the file if the block is backed by a
    *                          file by this it is guaranteed the whole content can be loaded
    * @tparam T result type
-   * @return
    */
   private[spark] def getRemoteBlock[T](
       blockId: BlockId,
@@ -1725,15 +1789,23 @@ private[spark] class BlockManager(
    * lock on the block.
    */
   private def removeBlockInternal(blockId: BlockId, tellMaster: Boolean): Unit = {
+    val blockStatus = if (tellMaster) {
+      val blockInfo = blockInfoManager.assertBlockIsLockedForWriting(blockId)
+      Some(getCurrentBlockStatus(blockId, blockInfo))
+    } else None
+
     // Removals are idempotent in disk store and memory store. At worst, we get a warning.
     val removedFromMemory = memoryStore.remove(blockId)
     val removedFromDisk = diskStore.remove(blockId)
     if (!removedFromMemory && !removedFromDisk) {
       logWarning(s"Block $blockId could not be removed as it was not found on disk or in memory")
     }
+
     blockInfoManager.removeBlock(blockId)
     if (tellMaster) {
-      reportBlockStatus(blockId, BlockStatus.empty)
+      // Only update storage level from the captured block status before deleting, so that
+      // memory size and disk size are being kept for calculating delta.
+      reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = StorageLevel.NONE))
     }
   }
 
@@ -1831,7 +1903,7 @@ private[spark] object BlockManager {
     private val POLL_TIMEOUT = 1000
     @volatile private var stopped = false
 
-    private val cleaningThread = new Thread() { override def run() { keepCleaning() } }
+    private val cleaningThread = new Thread() { override def run(): Unit = { keepCleaning() } }
     cleaningThread.setDaemon(true)
     cleaningThread.setName("RemoteBlock-temp-file-clean-thread")
     cleaningThread.start()
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index d188bdd912e5e..49e32d04d450a 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.Utils
 
 /**
  * :: DeveloperApi ::
- * This class represent an unique identifier for a BlockManager.
+ * This class represent a unique identifier for a BlockManager.
  *
  * The first 2 constructors of this class are made private to ensure that BlockManagerId objects
  * can be created only using the apply method in the companion object. This allows de-duplication
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index 9d13fedfb0c58..e335eb6ddb761 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.storage
 
-import scala.collection.Iterable
 import scala.collection.generic.CanBuildFrom
+import scala.collection.immutable.Iterable
 import scala.concurrent.Future
 
 import org.apache.spark.{SparkConf, SparkException}
@@ -30,6 +30,7 @@ import org.apache.spark.util.{RpcUtils, ThreadUtils}
 private[spark]
 class BlockManagerMaster(
     var driverEndpoint: RpcEndpointRef,
+    var driverHeartbeatEndPoint: RpcEndpointRef,
     conf: SparkConf,
     isDriver: Boolean)
   extends Logging {
@@ -37,7 +38,7 @@ class BlockManagerMaster(
   val timeout = RpcUtils.askRpcTimeout(conf)
 
   /** Remove a dead executor from the driver endpoint. This is only called on the driver side. */
-  def removeExecutor(execId: String) {
+  def removeExecutor(execId: String): Unit = {
     tell(RemoveExecutor(execId))
     logInfo("Removed " + execId + " successfully in removeExecutor")
   }
@@ -45,7 +46,7 @@ class BlockManagerMaster(
   /** Request removal of a dead executor from the driver endpoint.
    *  This is only called on the driver side. Non-blocking
    */
-  def removeExecutorAsync(execId: String) {
+  def removeExecutorAsync(execId: String): Unit = {
     driverEndpoint.ask[Boolean](RemoveExecutor(execId))
     logInfo("Removal of executor " + execId + " requested")
   }
@@ -120,12 +121,12 @@ class BlockManagerMaster(
    * Remove a block from the slaves that have it. This can only be used to remove
    * blocks that the driver knows about.
    */
-  def removeBlock(blockId: BlockId) {
+  def removeBlock(blockId: BlockId): Unit = {
     driverEndpoint.askSync[Boolean](RemoveBlock(blockId))
   }
 
   /** Remove all blocks belonging to the given RDD. */
-  def removeRdd(rddId: Int, blocking: Boolean) {
+  def removeRdd(rddId: Int, blocking: Boolean): Unit = {
     val future = driverEndpoint.askSync[Future[Seq[Int]]](RemoveRdd(rddId))
     future.failed.foreach(e =>
       logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}", e)
@@ -136,7 +137,7 @@ class BlockManagerMaster(
   }
 
   /** Remove all blocks belonging to the given shuffle. */
-  def removeShuffle(shuffleId: Int, blocking: Boolean) {
+  def removeShuffle(shuffleId: Int, blocking: Boolean): Unit = {
     val future = driverEndpoint.askSync[Future[Seq[Boolean]]](RemoveShuffle(shuffleId))
     future.failed.foreach(e =>
       logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}", e)
@@ -147,7 +148,7 @@ class BlockManagerMaster(
   }
 
   /** Remove all blocks belonging to the given broadcast. */
-  def removeBroadcast(broadcastId: Long, removeFromMaster: Boolean, blocking: Boolean) {
+  def removeBroadcast(broadcastId: Long, removeFromMaster: Boolean, blocking: Boolean): Unit = {
     val future = driverEndpoint.askSync[Future[Seq[Int]]](
       RemoveBroadcast(broadcastId, removeFromMaster))
     future.failed.foreach(e =>
@@ -200,7 +201,7 @@ class BlockManagerMaster(
         Option[BlockStatus],
         Iterable[Option[BlockStatus]]]]
     val blockStatus = timeout.awaitResult(
-      Future.sequence[Option[BlockStatus], Iterable](futures)(cbf, ThreadUtils.sameThread))
+      Future.sequence(futures)(cbf, ThreadUtils.sameThread))
     if (blockStatus == null) {
       throw new SparkException("BlockManager returned null for BlockStatus query: " + blockId)
     }
@@ -226,16 +227,21 @@ class BlockManagerMaster(
   }
 
   /** Stop the driver endpoint, called only on the Spark driver node */
-  def stop() {
+  def stop(): Unit = {
     if (driverEndpoint != null && isDriver) {
       tell(StopBlockManagerMaster)
       driverEndpoint = null
+      if (driverHeartbeatEndPoint.askSync[Boolean](StopBlockManagerMaster)) {
+        driverHeartbeatEndPoint = null
+      } else {
+        logWarning("Failed to stop BlockManagerMasterHeartbeatEndpoint")
+      }
       logInfo("BlockManagerMaster stopped")
     }
   }
 
   /** Send a one-way message to the master endpoint, to which we expect it to reply with true. */
-  private def tell(message: Any) {
+  private def tell(message: Any): Unit = {
     if (!driverEndpoint.askSync[Boolean](message)) {
       throw new SparkException("BlockManagerMasterEndpoint returned false, expected true.")
     }
@@ -245,4 +251,5 @@ class BlockManagerMaster(
 
 private[spark] object BlockManagerMaster {
   val DRIVER_ENDPOINT_NAME = "BlockManagerMaster"
+  val DRIVER_HEARTBEAT_ENDPOINT_NAME = "BlockManagerMasterHeartbeat"
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 5e021b334fd2b..d7f7eedc7f33b 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -26,17 +26,19 @@ import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
 import scala.util.Random
 
+import com.google.common.cache.CacheBuilder
+
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.network.shuffle.ExternalBlockStoreClient
-import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
+import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler._
 import org.apache.spark.storage.BlockManagerMessages._
 import org.apache.spark.util.{RpcUtils, ThreadUtils, Utils}
 
 /**
- * BlockManagerMasterEndpoint is an [[ThreadSafeRpcEndpoint]] on the master node to track statuses
+ * BlockManagerMasterEndpoint is an [[IsolatedRpcEndpoint]] on the master node to track statuses
  * of all slaves' block managers.
  */
 private[spark]
@@ -45,11 +47,16 @@ class BlockManagerMasterEndpoint(
     val isLocal: Boolean,
     conf: SparkConf,
     listenerBus: LiveListenerBus,
-    externalBlockStoreClient: Option[ExternalBlockStoreClient])
-  extends ThreadSafeRpcEndpoint with Logging {
+    externalBlockStoreClient: Option[ExternalBlockStoreClient],
+    blockManagerInfo: mutable.Map[BlockManagerId, BlockManagerInfo])
+  extends IsolatedRpcEndpoint with Logging {
 
-  // Mapping from block manager id to the block manager's information.
-  private val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]
+  // Mapping from executor id to the block manager's local disk directories.
+  private val executorIdToLocalDirs =
+    CacheBuilder
+      .newBuilder()
+      .maximumSize(conf.get(config.STORAGE_LOCAL_DISK_BY_EXECUTORS_CACHE_SIZE))
+      .build[String, Array[String]]()
 
   // Mapping from external shuffle service block manager id to the block statuses.
   private val blockStatusByShuffleService =
@@ -91,8 +98,13 @@ class BlockManagerMasterEndpoint(
 
     case _updateBlockInfo @
         UpdateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size) =>
-      context.reply(updateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size))
-      listenerBus.post(SparkListenerBlockUpdated(BlockUpdatedInfo(_updateBlockInfo)))
+      val isSuccess = updateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size)
+      context.reply(isSuccess)
+      // SPARK-30594: we should not post `SparkListenerBlockUpdated` when updateBlockInfo
+      // returns false since the block info would be updated again later.
+      if (isSuccess) {
+        listenerBus.post(SparkListenerBlockUpdated(BlockUpdatedInfo(_updateBlockInfo)))
+      }
 
     case GetLocations(blockId) =>
       context.reply(getLocations(blockId))
@@ -144,9 +156,6 @@ class BlockManagerMasterEndpoint(
     case StopBlockManagerMaster =>
       context.reply(true)
       stop()
-
-    case BlockManagerHeartbeat(blockManagerId) =>
-      context.reply(heartbeatReceived(blockManagerId))
   }
 
   private def removeRdd(rddId: Int): Future[Seq[Int]] = {
@@ -243,7 +252,7 @@ class BlockManagerMasterEndpoint(
     Future.sequence(futures)
   }
 
-  private def removeBlockManager(blockManagerId: BlockManagerId) {
+  private def removeBlockManager(blockManagerId: BlockManagerId): Unit = {
     val info = blockManagerInfo(blockManagerId)
 
     // Remove the block manager from blockManagerIdByExecutor.
@@ -285,27 +294,14 @@ class BlockManagerMasterEndpoint(
 
   }
 
-  private def removeExecutor(execId: String) {
+  private def removeExecutor(execId: String): Unit = {
     logInfo("Trying to remove executor " + execId + " from BlockManagerMaster.")
     blockManagerIdByExecutor.get(execId).foreach(removeBlockManager)
   }
 
-  /**
-   * Return true if the driver knows about the given block manager. Otherwise, return false,
-   * indicating that the block manager should re-register.
-   */
-  private def heartbeatReceived(blockManagerId: BlockManagerId): Boolean = {
-    if (!blockManagerInfo.contains(blockManagerId)) {
-      blockManagerId.isDriver && !isLocal
-    } else {
-      blockManagerInfo(blockManagerId).updateLastSeenMs()
-      true
-    }
-  }
-
   // Remove a block from the slaves that have it. This can only be used to remove
   // blocks that the master knows about.
-  private def removeBlockFromWorkers(blockId: BlockId) {
+  private def removeBlockFromWorkers(blockId: BlockId): Unit = {
     val locations = blockLocations.get(blockId)
     if (locations != null) {
       locations.foreach { blockManagerId: BlockManagerId =>
@@ -411,6 +407,7 @@ class BlockManagerMasterEndpoint(
       topologyMapper.getTopologyForHost(idWithoutTopologyInfo.host))
 
     val time = System.currentTimeMillis()
+    executorIdToLocalDirs.put(id.executorId, localDirs)
     if (!blockManagerInfo.contains(id)) {
       blockManagerIdByExecutor.get(id.executorId) match {
         case Some(oldId) =>
@@ -434,7 +431,7 @@ class BlockManagerMasterEndpoint(
           None
         }
 
-      blockManagerInfo(id) = new BlockManagerInfo(id, System.currentTimeMillis(), localDirs,
+      blockManagerInfo(id) = new BlockManagerInfo(id, System.currentTimeMillis(),
         maxOnHeapMemSize, maxOffHeapMemSize, slaveEndpoint, externalShuffleServiceBlockStatus)
     }
     listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxOnHeapMemSize + maxOffHeapMemSize,
@@ -514,15 +511,16 @@ class BlockManagerMasterEndpoint(
 
     if (locations.nonEmpty && status.isDefined) {
       val localDirs = locations.find { loc =>
-          if (loc.port != externalShuffleServicePort && loc.host == requesterHost) {
+        // When the external shuffle service running on the same host is found among the block
+        // locations then the block must be persisted on the disk. In this case the executorId
+        // can be used to access this block even when the original executor is already stopped.
+        loc.host == requesterHost &&
+          (loc.port == externalShuffleServicePort ||
             blockManagerInfo
               .get(loc)
               .flatMap(_.getStatus(blockId).map(_.storageLevel.useDisk))
-              .getOrElse(false)
-          } else {
-            false
-          }
-      }.map(blockManagerInfo(_).localDirs)
+              .getOrElse(false))
+      }.flatMap { bmId => Option(executorIdToLocalDirs.getIfPresent(bmId.executorId)) }
       Some(BlockLocationsAndStatus(locations, status.get, localDirs))
     } else {
       None
@@ -574,7 +572,6 @@ object BlockStatus {
 private[spark] class BlockManagerInfo(
     val blockManagerId: BlockManagerId,
     timeMs: Long,
-    val localDirs: Array[String],
     val maxOnHeapMem: Long,
     val maxOffHeapMem: Long,
     val slaveEndpoint: RpcEndpointRef,
@@ -593,7 +590,7 @@ private[spark] class BlockManagerInfo(
 
   def getStatus(blockId: BlockId): Option[BlockStatus] = Option(_blocks.get(blockId))
 
-  def updateLastSeenMs() {
+  def updateLastSeenMs(): Unit = {
     _lastSeenMs = System.currentTimeMillis()
   }
 
@@ -601,7 +598,7 @@ private[spark] class BlockManagerInfo(
       blockId: BlockId,
       storageLevel: StorageLevel,
       memSize: Long,
-      diskSize: Long) {
+      diskSize: Long): Unit = {
 
     updateLastSeenMs()
 
@@ -681,7 +678,7 @@ private[spark] class BlockManagerInfo(
     }
   }
 
-  def removeBlock(blockId: BlockId) {
+  def removeBlock(blockId: BlockId): Unit = {
     if (_blocks.containsKey(blockId)) {
       _remainingMem += _blocks.get(blockId).memSize
       _blocks.remove(blockId)
@@ -699,7 +696,7 @@ private[spark] class BlockManagerInfo(
 
   override def toString: String = "BlockManagerInfo " + timeMs + " " + _remainingMem
 
-  def clear() {
+  def clear(): Unit = {
     _blocks.clear()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterHeartbeatEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterHeartbeatEndpoint.scala
new file mode 100644
index 0000000000000..b06002123d803
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterHeartbeatEndpoint.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import scala.collection.mutable
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
+import org.apache.spark.storage.BlockManagerMessages.{BlockManagerHeartbeat, StopBlockManagerMaster}
+
+/**
+ * Separate heartbeat out of BlockManagerMasterEndpoint due to performance consideration.
+ */
+private[spark] class BlockManagerMasterHeartbeatEndpoint(
+    override val rpcEnv: RpcEnv,
+    isLocal: Boolean,
+    blockManagerInfo: mutable.Map[BlockManagerId, BlockManagerInfo])
+  extends ThreadSafeRpcEndpoint with Logging {
+
+  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+    case BlockManagerHeartbeat(blockManagerId) =>
+      context.reply(heartbeatReceived(blockManagerId))
+
+    case StopBlockManagerMaster =>
+      stop()
+      context.reply(true)
+
+    case _ => // do nothing for unexpected events
+  }
+
+  /**
+   * Return true if the driver knows about the given block manager. Otherwise, return false,
+   * indicating that the block manager should re-register.
+   */
+  private def heartbeatReceived(blockManagerId: BlockManagerId): Boolean = {
+    if (!blockManagerInfo.contains(blockManagerId)) {
+      blockManagerId.isDriver && !isLocal
+    } else {
+      blockManagerInfo(blockManagerId).updateLastSeenMs()
+      true
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala
index 67544b20408a6..29e21142ce449 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala
@@ -21,7 +21,7 @@ import scala.concurrent.{ExecutionContext, Future}
 
 import org.apache.spark.{MapOutputTracker, SparkEnv}
 import org.apache.spark.internal.Logging
-import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
+import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEnv}
 import org.apache.spark.storage.BlockManagerMessages._
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -34,7 +34,7 @@ class BlockManagerSlaveEndpoint(
     override val rpcEnv: RpcEnv,
     blockManager: BlockManager,
     mapOutputTracker: MapOutputTracker)
-  extends ThreadSafeRpcEndpoint with Logging {
+  extends IsolatedRpcEndpoint with Logging {
 
   private val asyncThreadPool =
     ThreadUtils.newDaemonCachedThreadPool("block-manager-slave-async-thread-pool", 100)
@@ -80,7 +80,7 @@ class BlockManagerSlaveEndpoint(
 
   }
 
-  private def doAsync[T](actionMessage: String, context: RpcCallContext)(body: => T) {
+  private def doAsync[T](actionMessage: String, context: RpcCallContext)(body: => T): Unit = {
     val future = Future {
       logDebug(actionMessage)
       body
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index c3990bf71e604..ee43b76e17010 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -20,6 +20,8 @@ package org.apache.spark.storage
 import java.io.{File, IOException}
 import java.util.UUID
 
+import scala.util.control.NonFatal
+
 import org.apache.spark.SparkConf
 import org.apache.spark.executor.ExecutorExitCode
 import org.apache.spark.internal.{config, Logging}
@@ -117,20 +119,38 @@ private[spark] class DiskBlockManager(conf: SparkConf, deleteFilesOnStop: Boolea
 
   /** Produces a unique block id and File suitable for storing local intermediate results. */
   def createTempLocalBlock(): (TempLocalBlockId, File) = {
-    var blockId = new TempLocalBlockId(UUID.randomUUID())
-    while (getFile(blockId).exists()) {
-      blockId = new TempLocalBlockId(UUID.randomUUID())
+    var blockId = TempLocalBlockId(UUID.randomUUID())
+    var tempLocalFile = getFile(blockId)
+    var count = 0
+    while (!canCreateFile(tempLocalFile) && count < Utils.MAX_DIR_CREATION_ATTEMPTS) {
+      blockId = TempLocalBlockId(UUID.randomUUID())
+      tempLocalFile = getFile(blockId)
+      count += 1
     }
-    (blockId, getFile(blockId))
+    (blockId, tempLocalFile)
   }
 
   /** Produces a unique block id and File suitable for storing shuffled intermediate results. */
   def createTempShuffleBlock(): (TempShuffleBlockId, File) = {
-    var blockId = new TempShuffleBlockId(UUID.randomUUID())
-    while (getFile(blockId).exists()) {
-      blockId = new TempShuffleBlockId(UUID.randomUUID())
+    var blockId = TempShuffleBlockId(UUID.randomUUID())
+    var tempShuffleFile = getFile(blockId)
+    var count = 0
+    while (!canCreateFile(tempShuffleFile) && count < Utils.MAX_DIR_CREATION_ATTEMPTS) {
+      blockId = TempShuffleBlockId(UUID.randomUUID())
+      tempShuffleFile = getFile(blockId)
+      count += 1
+    }
+    (blockId, tempShuffleFile)
+  }
+
+  private def canCreateFile(file: File): Boolean = {
+    try {
+      file.createNewFile()
+    } catch {
+      case NonFatal(_) =>
+        logError("Failed to create temporary block file: " + file.getAbsoluteFile)
+        false
     }
-    (blockId, getFile(blockId))
   }
 
   /**
@@ -161,7 +181,7 @@ private[spark] class DiskBlockManager(conf: SparkConf, deleteFilesOnStop: Boolea
   }
 
   /** Cleanup local dirs and stop shuffle sender. */
-  private[spark] def stop() {
+  private[spark] def stop(): Unit = {
     // Remove the shutdown hook.  It causes memory leaks if we leave it around.
     try {
       ShutdownHookManager.removeShutdownHook(shutdownHook)
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index 758621c52495b..e55c09274cd9a 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.storage
 
 import java.io.{BufferedOutputStream, File, FileOutputStream, OutputStream}
-import java.nio.channels.FileChannel
+import java.nio.channels.{ClosedByInterruptException, FileChannel}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.serializer.{SerializationStream, SerializerInstance, SerializerManager}
@@ -150,7 +150,7 @@ private[spark] class DiskBlockObjectWriter(
   /**
    * Commits any remaining partial writes and closes resources.
    */
-  override def close() {
+  override def close(): Unit = {
     if (initialized) {
       Utils.tryWithSafeFinally {
         commitAndGet()
@@ -219,6 +219,12 @@ private[spark] class DiskBlockObjectWriter(
         truncateStream = new FileOutputStream(file, true)
         truncateStream.getChannel.truncate(committedPosition)
       } catch {
+        // ClosedByInterruptException is an excepted exception when kill task,
+        // don't log the exception stack trace to avoid confusing users.
+        // See: SPARK-28340
+        case ce: ClosedByInterruptException =>
+          logError("Exception occurred while reverting partial writes to file "
+            + file + ", " + ce.getMessage)
         case e: Exception =>
           logError("Uncaught exception while reverting partial writes to file " + file, e)
       } finally {
@@ -234,7 +240,7 @@ private[spark] class DiskBlockObjectWriter(
   /**
    * Writes a key-value pair.
    */
-  override def write(key: Any, value: Any) {
+  override def write(key: Any, value: Any): Unit = {
     if (!streamOpen) {
       open()
     }
@@ -270,14 +276,14 @@ private[spark] class DiskBlockObjectWriter(
    * Report the number of bytes written in this writer's shuffle write metrics.
    * Note that this is only valid before the underlying streams are closed.
    */
-  private def updateBytesWritten() {
+  private def updateBytesWritten(): Unit = {
     val pos = channel.position()
     writeMetrics.incBytesWritten(pos - reportedPosition)
     reportedPosition = pos
   }
 
   // For testing
-  private[spark] override def flush() {
+  private[spark] override def flush(): Unit = {
     objOut.flush()
     bs.flush()
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index a5b7ee5762c49..cd4c86006af5a 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -18,11 +18,13 @@
 package org.apache.spark.storage
 
 import java.io.{InputStream, IOException}
+import java.nio.channels.ClosedByInterruptException
 import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Queue}
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, LinkedHashMap, Queue}
+import scala.util.{Failure, Success}
 
 import org.apache.commons.io.IOUtils
 
@@ -48,9 +50,10 @@ import org.apache.spark.util.{CompletionIterator, TaskCompletionListener, Utils}
  * @param shuffleClient [[BlockStoreClient]] for fetching remote blocks
  * @param blockManager [[BlockManager]] for reading local blocks
  * @param blocksByAddress list of blocks to fetch grouped by the [[BlockManagerId]].
- *                        For each block we also require the size (in bytes as a long field) in
- *                        order to throttle the memory usage. Note that zero-sized blocks are
- *                        already excluded, which happened in
+ *                        For each block we also require two info: 1. the size (in bytes as a long
+ *                        field) in order to throttle the memory usage; 2. the mapIndex for this
+ *                        block, which indicate the index in the map stage.
+ *                        Note that zero-sized blocks are already excluded, which happened in
  *                        [[org.apache.spark.MapOutputTracker.convertMapStatuses]].
  * @param streamWrapper A function to wrap the returned input stream.
  * @param maxBytesInFlight max size (in bytes) of remote blocks to fetch at any given point.
@@ -60,13 +63,15 @@ import org.apache.spark.util.{CompletionIterator, TaskCompletionListener, Utils}
  * @param maxReqSizeShuffleToMem max size (in bytes) of a request that can be shuffled to memory.
  * @param detectCorrupt whether to detect any corruption in fetched blocks.
  * @param shuffleMetrics used to report shuffle metrics.
+ * @param doBatchFetch fetch continuous shuffle blocks from same executor in batch if the server
+ *                     side supports.
  */
 private[spark]
 final class ShuffleBlockFetcherIterator(
     context: TaskContext,
     shuffleClient: BlockStoreClient,
     blockManager: BlockManager,
-    blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long)])],
+    blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])],
     streamWrapper: (BlockId, InputStream) => InputStream,
     maxBytesInFlight: Long,
     maxReqsInFlight: Int,
@@ -74,16 +79,20 @@ final class ShuffleBlockFetcherIterator(
     maxReqSizeShuffleToMem: Long,
     detectCorrupt: Boolean,
     detectCorruptUseExtraMemory: Boolean,
-    shuffleMetrics: ShuffleReadMetricsReporter)
+    shuffleMetrics: ShuffleReadMetricsReporter,
+    doBatchFetch: Boolean)
   extends Iterator[(BlockId, InputStream)] with DownloadFileManager with Logging {
 
   import ShuffleBlockFetcherIterator._
 
+  // Make remote requests at most maxBytesInFlight / 5 in length; the reason to keep them
+  // smaller than maxBytesInFlight is to allow multiple, parallel fetches from up to 5
+  // nodes, rather than blocking on reading output from one node.
+  private val targetRemoteRequestSize = math.max(maxBytesInFlight / 5, 1L)
+
   /**
    * Total number of blocks to fetch. This should be equal to the total number of blocks
    * in [[blocksByAddress]] because we already filter out zero-sized blocks in [[blocksByAddress]].
-   *
-   * This should equal localBlocks.size + remoteBlocks.size.
    */
   private[this] var numBlocksToFetch = 0
 
@@ -96,10 +105,14 @@ final class ShuffleBlockFetcherIterator(
   private[this] val startTimeNs = System.nanoTime()
 
   /** Local blocks to fetch, excluding zero-sized blocks. */
-  private[this] val localBlocks = scala.collection.mutable.LinkedHashSet[BlockId]()
+  private[this] val localBlocks = scala.collection.mutable.LinkedHashSet[(BlockId, Int)]()
+
+  /** Host local blockIds to fetch by executors, excluding zero-sized blocks. */
+  private[this] val hostLocalBlocksByExecutor =
+    LinkedHashMap[BlockManagerId, Seq[(BlockId, Long, Int)]]()
 
-  /** Remote blocks to fetch, excluding zero-sized blocks. */
-  private[this] val remoteBlocks = new HashSet[BlockId]()
+  /** Host local blocks to fetch, excluding zero-sized blocks. */
+  private[this] val hostLocalBlocks = scala.collection.mutable.LinkedHashSet[(BlockId, Int)]()
 
   /**
    * A queue to hold our results. This turns the asynchronous model provided by
@@ -188,7 +201,7 @@ final class ShuffleBlockFetcherIterator(
   /**
    * Mark the iterator as zombie, and release all buffers that haven't been deserialized yet.
    */
-  private[storage] def cleanup() {
+  private[storage] def cleanup(): Unit = {
     synchronized {
       isZombie = true
     }
@@ -198,7 +211,7 @@ final class ShuffleBlockFetcherIterator(
     while (iter.hasNext) {
       val result = iter.next()
       result match {
-        case SuccessFetchResult(_, address, _, buf, _) =>
+        case SuccessFetchResult(_, _, address, _, buf, _) =>
           if (address != blockManager.blockManagerId) {
             shuffleMetrics.incRemoteBytesRead(buf.size)
             if (buf.isInstanceOf[FileSegmentManagedBuffer]) {
@@ -217,16 +230,18 @@ final class ShuffleBlockFetcherIterator(
     }
   }
 
-  private[this] def sendRequest(req: FetchRequest) {
+  private[this] def sendRequest(req: FetchRequest): Unit = {
     logDebug("Sending request for %d blocks (%s) from %s".format(
       req.blocks.size, Utils.bytesToString(req.size), req.address.hostPort))
     bytesInFlight += req.size
     reqsInFlight += 1
 
-    // so we can look up the size of each blockID
-    val sizeMap = req.blocks.map { case (blockId, size) => (blockId.toString, size) }.toMap
-    val remainingBlocks = new HashSet[String]() ++= sizeMap.keys
-    val blockIds = req.blocks.map(_._1.toString)
+    // so we can look up the block info of each blockID
+    val infoMap = req.blocks.map {
+      case FetchBlockInfo(blockId, size, mapIndex) => (blockId.toString, (size, mapIndex))
+    }.toMap
+    val remainingBlocks = new HashSet[String]() ++= infoMap.keys
+    val blockIds = req.blocks.map(_.blockId.toString)
     val address = req.address
 
     val blockFetchingListener = new BlockFetchingListener {
@@ -239,8 +254,8 @@ final class ShuffleBlockFetcherIterator(
             // This needs to be released after use.
             buf.retain()
             remainingBlocks -= blockId
-            results.put(new SuccessFetchResult(BlockId(blockId), address, sizeMap(blockId), buf,
-              remainingBlocks.isEmpty))
+            results.put(new SuccessFetchResult(BlockId(blockId), infoMap(blockId)._2,
+              address, infoMap(blockId)._1, buf, remainingBlocks.isEmpty))
             logDebug("remainingBlocks: " + remainingBlocks)
           }
         }
@@ -249,7 +264,7 @@ final class ShuffleBlockFetcherIterator(
 
       override def onBlockFetchFailure(blockId: String, e: Throwable): Unit = {
         logError(s"Failed to get block(s) from ${req.address.host}:${req.address.port}", e)
-        results.put(new FailureFetchResult(BlockId(blockId), address, e))
+        results.put(new FailureFetchResult(BlockId(blockId), infoMap(blockId)._2, address, e))
       }
     }
 
@@ -265,70 +280,179 @@ final class ShuffleBlockFetcherIterator(
     }
   }
 
-  private[this] def splitLocalRemoteBlocks(): ArrayBuffer[FetchRequest] = {
-    // Make remote requests at most maxBytesInFlight / 5 in length; the reason to keep them
-    // smaller than maxBytesInFlight is to allow multiple, parallel fetches from up to 5
-    // nodes, rather than blocking on reading output from one node.
-    val targetRequestSize = math.max(maxBytesInFlight / 5, 1L)
-    logDebug("maxBytesInFlight: " + maxBytesInFlight + ", targetRequestSize: " + targetRequestSize
-      + ", maxBlocksInFlightPerAddress: " + maxBlocksInFlightPerAddress)
-
-    // Split local and remote blocks. Remote blocks are further split into FetchRequests of size
-    // at most maxBytesInFlight in order to limit the amount of data in flight.
-    val remoteRequests = new ArrayBuffer[FetchRequest]
+  private[this] def partitionBlocksByFetchMode(): ArrayBuffer[FetchRequest] = {
+    logDebug(s"maxBytesInFlight: $maxBytesInFlight, targetRemoteRequestSize: "
+      + s"$targetRemoteRequestSize, maxBlocksInFlightPerAddress: $maxBlocksInFlightPerAddress")
+
+    // Partition to local, host-local and remote blocks. Remote blocks are further split into
+    // FetchRequests of size at most maxBytesInFlight in order to limit the amount of data in flight
+    val collectedRemoteRequests = new ArrayBuffer[FetchRequest]
     var localBlockBytes = 0L
+    var hostLocalBlockBytes = 0L
     var remoteBlockBytes = 0L
+    var numRemoteBlocks = 0
+
+    val hostLocalDirReadingEnabled =
+      blockManager.hostLocalDirManager != null && blockManager.hostLocalDirManager.isDefined
 
     for ((address, blockInfos) <- blocksByAddress) {
       if (address.executorId == blockManager.blockManagerId.executorId) {
-        blockInfos.find(_._2 <= 0) match {
-          case Some((blockId, size)) if size < 0 =>
-            throw new BlockException(blockId, "Negative block size " + size)
-          case Some((blockId, size)) if size == 0 =>
-            throw new BlockException(blockId, "Zero-sized blocks should be excluded.")
-          case None => // do nothing.
-        }
-        localBlocks ++= blockInfos.map(_._1)
-        localBlockBytes += blockInfos.map(_._2).sum
-        numBlocksToFetch += localBlocks.size
+        checkBlockSizes(blockInfos)
+        val mergedBlockInfos = mergeContinuousShuffleBlockIdsIfNeeded(
+          blockInfos.map(info => FetchBlockInfo(info._1, info._2, info._3)).to[ArrayBuffer])
+        localBlocks ++= mergedBlockInfos.map(info => (info.blockId, info.mapIndex))
+        localBlockBytes += mergedBlockInfos.map(_.size).sum
+      } else if (hostLocalDirReadingEnabled && address.host == blockManager.blockManagerId.host) {
+        checkBlockSizes(blockInfos)
+        val mergedBlockInfos = mergeContinuousShuffleBlockIdsIfNeeded(
+          blockInfos.map(info => FetchBlockInfo(info._1, info._2, info._3)).to[ArrayBuffer])
+        val blocksForAddress =
+          mergedBlockInfos.map(info => (info.blockId, info.size, info.mapIndex))
+        hostLocalBlocksByExecutor += address -> blocksForAddress
+        hostLocalBlocks ++= blocksForAddress.map(info => (info._1, info._3))
+        hostLocalBlockBytes += mergedBlockInfos.map(_.size).sum
       } else {
-        val iterator = blockInfos.iterator
-        var curRequestSize = 0L
-        var curBlocks = new ArrayBuffer[(BlockId, Long)]
-        while (iterator.hasNext) {
-          val (blockId, size) = iterator.next()
-          remoteBlockBytes += size
-          if (size < 0) {
-            throw new BlockException(blockId, "Negative block size " + size)
-          } else if (size == 0) {
-            throw new BlockException(blockId, "Zero-sized blocks should be excluded.")
+        numRemoteBlocks += blockInfos.size
+        remoteBlockBytes += blockInfos.map(_._2).sum
+        collectFetchRequests(address, blockInfos, collectedRemoteRequests)
+      }
+    }
+    val totalBytes = localBlockBytes + remoteBlockBytes + hostLocalBlockBytes
+    logInfo(s"Getting $numBlocksToFetch (${Utils.bytesToString(totalBytes)}) non-empty blocks " +
+      s"including ${localBlocks.size} (${Utils.bytesToString(localBlockBytes)}) local and " +
+      s"${hostLocalBlocks.size} (${Utils.bytesToString(hostLocalBlockBytes)}) " +
+      s"host-local and $numRemoteBlocks (${Utils.bytesToString(remoteBlockBytes)}) remote blocks")
+    collectedRemoteRequests
+  }
+
+  private def collectFetchRequests(
+      address: BlockManagerId,
+      blockInfos: Seq[(BlockId, Long, Int)],
+      collectedRemoteRequests: ArrayBuffer[FetchRequest]): Unit = {
+    val iterator = blockInfos.iterator
+    var curRequestSize = 0L
+    var curBlocks = new ArrayBuffer[FetchBlockInfo]
+
+    def createFetchRequest(blocks: Seq[FetchBlockInfo]): Unit = {
+      collectedRemoteRequests += FetchRequest(address, blocks)
+      logDebug(s"Creating fetch request of $curRequestSize at $address "
+        + s"with ${blocks.size} blocks")
+    }
+
+    def createFetchRequests(): Unit = {
+      val mergedBlocks = mergeContinuousShuffleBlockIdsIfNeeded(curBlocks)
+      curBlocks = new ArrayBuffer[FetchBlockInfo]
+      if (mergedBlocks.length <= maxBlocksInFlightPerAddress) {
+        createFetchRequest(mergedBlocks)
+      } else {
+        mergedBlocks.grouped(maxBlocksInFlightPerAddress).foreach { blocks =>
+          if (blocks.length == maxBlocksInFlightPerAddress) {
+            createFetchRequest(blocks)
           } else {
-            curBlocks += ((blockId, size))
-            remoteBlocks += blockId
-            numBlocksToFetch += 1
-            curRequestSize += size
-          }
-          if (curRequestSize >= targetRequestSize ||
-              curBlocks.size >= maxBlocksInFlightPerAddress) {
-            // Add this FetchRequest
-            remoteRequests += new FetchRequest(address, curBlocks)
-            logDebug(s"Creating fetch request of $curRequestSize at $address "
-              + s"with ${curBlocks.size} blocks")
-            curBlocks = new ArrayBuffer[(BlockId, Long)]
-            curRequestSize = 0
+            // The last group does not exceed `maxBlocksInFlightPerAddress`. Put it back
+            // to `curBlocks`.
+            curBlocks = blocks
+            numBlocksToFetch -= blocks.size
           }
         }
-        // Add in the final request
-        if (curBlocks.nonEmpty) {
-          remoteRequests += new FetchRequest(address, curBlocks)
+      }
+      curRequestSize = curBlocks.map(_.size).sum
+    }
+
+    while (iterator.hasNext) {
+      val (blockId, size, mapIndex) = iterator.next()
+      assertPositiveBlockSize(blockId, size)
+      curBlocks += FetchBlockInfo(blockId, size, mapIndex)
+      curRequestSize += size
+      // For batch fetch, the actual block in flight should count for merged block.
+      val mayExceedsMaxBlocks = !doBatchFetch && curBlocks.size >= maxBlocksInFlightPerAddress
+      if (curRequestSize >= targetRemoteRequestSize || mayExceedsMaxBlocks) {
+        createFetchRequests()
+      }
+    }
+    // Add in the final request
+    if (curBlocks.nonEmpty) {
+      createFetchRequests()
+    }
+  }
+
+  private def assertPositiveBlockSize(blockId: BlockId, blockSize: Long): Unit = {
+    if (blockSize < 0) {
+      throw BlockException(blockId, "Negative block size " + size)
+    } else if (blockSize == 0) {
+      throw BlockException(blockId, "Zero-sized blocks should be excluded.")
+    }
+  }
+
+  private def checkBlockSizes(blockInfos: Seq[(BlockId, Long, Int)]): Unit = {
+    blockInfos.foreach { case (blockId, size, _) => assertPositiveBlockSize(blockId, size) }
+  }
+
+  private[this] def mergeContinuousShuffleBlockIdsIfNeeded(
+      blocks: ArrayBuffer[FetchBlockInfo]): ArrayBuffer[FetchBlockInfo] = {
+    val result = if (doBatchFetch) {
+      var curBlocks = new ArrayBuffer[FetchBlockInfo]
+      val mergedBlockInfo = new ArrayBuffer[FetchBlockInfo]
+
+      def mergeFetchBlockInfo(toBeMerged: ArrayBuffer[FetchBlockInfo]): FetchBlockInfo = {
+        val startBlockId = toBeMerged.head.blockId.asInstanceOf[ShuffleBlockId]
+
+        // The last merged block may comes from the input, and we can merge more blocks
+        // into it, if the map id is the same.
+        def shouldMergeIntoPreviousBatchBlockId =
+          mergedBlockInfo.last.blockId.asInstanceOf[ShuffleBlockBatchId].mapId == startBlockId.mapId
+
+        val startReduceId = if (mergedBlockInfo.nonEmpty && shouldMergeIntoPreviousBatchBlockId) {
+          // Remove the previous batch block id as we will add a new one to replace it.
+          mergedBlockInfo.remove(mergedBlockInfo.length - 1).blockId
+            .asInstanceOf[ShuffleBlockBatchId].startReduceId
+        } else {
+          startBlockId.reduceId
+        }
+
+        FetchBlockInfo(
+          ShuffleBlockBatchId(
+            startBlockId.shuffleId,
+            startBlockId.mapId,
+            startReduceId,
+            toBeMerged.last.blockId.asInstanceOf[ShuffleBlockId].reduceId + 1),
+          toBeMerged.map(_.size).sum,
+          toBeMerged.head.mapIndex)
+      }
+
+      val iter = blocks.iterator
+      while (iter.hasNext) {
+        val info = iter.next()
+        // It's possible that the input block id is already a batch ID. For example, we merge some
+        // blocks, and then make fetch requests with the merged blocks according to "max blocks per
+        // request". The last fetch request may be too small, and we give up and put the remaining
+        // merged blocks back to the input list.
+        if (info.blockId.isInstanceOf[ShuffleBlockBatchId]) {
+          mergedBlockInfo += info
+        } else {
+          if (curBlocks.isEmpty) {
+            curBlocks += info
+          } else {
+            val curBlockId = info.blockId.asInstanceOf[ShuffleBlockId]
+            val currentMapId = curBlocks.head.blockId.asInstanceOf[ShuffleBlockId].mapId
+            if (curBlockId.mapId != currentMapId) {
+              mergedBlockInfo += mergeFetchBlockInfo(curBlocks)
+              curBlocks.clear()
+            }
+            curBlocks += info
+          }
         }
       }
+      if (curBlocks.nonEmpty) {
+        mergedBlockInfo += mergeFetchBlockInfo(curBlocks)
+      }
+      mergedBlockInfo
+    } else {
+      blocks
     }
-    val totalBytes = localBlockBytes + remoteBlockBytes
-    logInfo(s"Getting $numBlocksToFetch (${Utils.bytesToString(totalBytes)}) non-empty blocks " +
-      s"including ${localBlocks.size} (${Utils.bytesToString(localBlockBytes)}) local blocks and " +
-      s"${remoteBlocks.size} (${Utils.bytesToString(remoteBlockBytes)}) remote blocks")
-    remoteRequests
+    // update metrics
+    numBlocksToFetch += result.size
+    result
   }
 
   /**
@@ -336,34 +460,118 @@ final class ShuffleBlockFetcherIterator(
    * `ManagedBuffer`'s memory is allocated lazily when we create the input stream, so all we
    * track in-memory are the ManagedBuffer references themselves.
    */
-  private[this] def fetchLocalBlocks() {
+  private[this] def fetchLocalBlocks(): Unit = {
     logDebug(s"Start fetching local blocks: ${localBlocks.mkString(", ")}")
     val iter = localBlocks.iterator
     while (iter.hasNext) {
-      val blockId = iter.next()
+      val (blockId, mapIndex) = iter.next()
       try {
-        val buf = blockManager.getBlockData(blockId)
+        val buf = blockManager.getLocalBlockData(blockId)
         shuffleMetrics.incLocalBlocksFetched(1)
         shuffleMetrics.incLocalBytesRead(buf.size)
         buf.retain()
-        results.put(new SuccessFetchResult(blockId, blockManager.blockManagerId,
+        results.put(new SuccessFetchResult(blockId, mapIndex, blockManager.blockManagerId,
           buf.size(), buf, false))
       } catch {
+        // If we see an exception, stop immediately.
         case e: Exception =>
-          // If we see an exception, stop immediately.
-          logError(s"Error occurred while fetching local blocks", e)
-          results.put(new FailureFetchResult(blockId, blockManager.blockManagerId, e))
+          e match {
+            // ClosedByInterruptException is an excepted exception when kill task,
+            // don't log the exception stack trace to avoid confusing users.
+            // See: SPARK-28340
+            case ce: ClosedByInterruptException =>
+              logError("Error occurred while fetching local blocks, " + ce.getMessage)
+            case ex: Exception => logError("Error occurred while fetching local blocks", ex)
+          }
+          results.put(new FailureFetchResult(blockId, mapIndex, blockManager.blockManagerId, e))
           return
       }
     }
   }
 
+  private[this] def fetchHostLocalBlock(
+      blockId: BlockId,
+      mapIndex: Int,
+      localDirs: Array[String],
+      blockManagerId: BlockManagerId): Boolean = {
+    try {
+      val buf = blockManager.getHostLocalShuffleData(blockId, localDirs)
+      buf.retain()
+      results.put(SuccessFetchResult(blockId, mapIndex, blockManagerId, buf.size(), buf,
+        isNetworkReqDone = false))
+      true
+    } catch {
+      case e: Exception =>
+        // If we see an exception, stop immediately.
+        logError(s"Error occurred while fetching local blocks", e)
+        results.put(FailureFetchResult(blockId, mapIndex, blockManagerId, e))
+        false
+    }
+  }
+
+  /**
+   * Fetch the host-local blocks while we are fetching remote blocks. This is ok because
+   * `ManagedBuffer`'s memory is allocated lazily when we create the input stream, so all we
+   * track in-memory are the ManagedBuffer references themselves.
+   */
+  private[this] def fetchHostLocalBlocks(hostLocalDirManager: HostLocalDirManager): Unit = {
+    val cachedDirsByExec = hostLocalDirManager.getCachedHostLocalDirs()
+    val (hostLocalBlocksWithCachedDirs, hostLocalBlocksWithMissingDirs) =
+      hostLocalBlocksByExecutor
+        .map { case (hostLocalBmId, bmInfos) =>
+          (hostLocalBmId, bmInfos, cachedDirsByExec.get(hostLocalBmId.executorId))
+        }.partition(_._3.isDefined)
+    val bmId = blockManager.blockManagerId
+    val immutableHostLocalBlocksWithoutDirs =
+      hostLocalBlocksWithMissingDirs.map { case (hostLocalBmId, bmInfos, _) =>
+        hostLocalBmId -> bmInfos
+      }.toMap
+    if (immutableHostLocalBlocksWithoutDirs.nonEmpty) {
+      logDebug(s"Asynchronous fetching host-local blocks without cached executors' dir: " +
+        s"${immutableHostLocalBlocksWithoutDirs.mkString(", ")}")
+      val execIdsWithoutDirs = immutableHostLocalBlocksWithoutDirs.keys.map(_.executorId).toArray
+      hostLocalDirManager.getHostLocalDirs(execIdsWithoutDirs) {
+        case Success(dirs) =>
+          immutableHostLocalBlocksWithoutDirs.foreach { case (hostLocalBmId, blockInfos) =>
+            blockInfos.takeWhile { case (blockId, _, mapIndex) =>
+              fetchHostLocalBlock(
+                blockId,
+                mapIndex,
+                dirs.get(hostLocalBmId.executorId),
+                hostLocalBmId)
+            }
+          }
+          logDebug(s"Got host-local blocks (without cached executors' dir) in " +
+            s"${Utils.getUsedTimeNs(startTimeNs)}")
+
+        case Failure(throwable) =>
+          logError(s"Error occurred while fetching host local blocks", throwable)
+          val (hostLocalBmId, blockInfoSeq) = immutableHostLocalBlocksWithoutDirs.head
+          val (blockId, _, mapIndex) = blockInfoSeq.head
+          results.put(FailureFetchResult(blockId, mapIndex, hostLocalBmId, throwable))
+      }
+    }
+    if (hostLocalBlocksWithCachedDirs.nonEmpty) {
+      logDebug(s"Synchronous fetching host-local blocks with cached executors' dir: " +
+          s"${hostLocalBlocksWithCachedDirs.mkString(", ")}")
+      hostLocalBlocksWithCachedDirs.foreach { case (_, blockInfos, localDirs) =>
+        blockInfos.foreach { case (blockId, _, mapIndex) =>
+          if (!fetchHostLocalBlock(blockId, mapIndex, localDirs.get, bmId)) {
+            return
+          }
+        }
+      }
+      logDebug(s"Got host-local blocks (with cached executors' dir) in " +
+        s"${Utils.getUsedTimeNs(startTimeNs)}")
+    }
+  }
+
   private[this] def initialize(): Unit = {
     // Add a task completion callback (called in both success case and failure case) to cleanup.
     context.addTaskCompletionListener(onCompleteCallback)
 
-    // Split local and remote blocks.
-    val remoteRequests = splitLocalRemoteBlocks()
+    // Partition blocks by the different fetch modes: local, host-local and remote blocks.
+    val remoteRequests = partitionBlocksByFetchMode()
     // Add the remote requests into our queue in a random order
     fetchRequests ++= Utils.randomize(remoteRequests)
     assert ((0 == reqsInFlight) == (0 == bytesInFlight),
@@ -379,6 +587,10 @@ final class ShuffleBlockFetcherIterator(
     // Get Local Blocks
     fetchLocalBlocks()
     logDebug(s"Got local blocks in ${Utils.getUsedTimeNs(startTimeNs)}")
+
+    if (hostLocalBlocks.nonEmpty) {
+      blockManager.hostLocalDirManager.foreach(fetchHostLocalBlocks)
+    }
   }
 
   override def hasNext: Boolean = numBlocksProcessed < numBlocksToFetch
@@ -412,17 +624,20 @@ final class ShuffleBlockFetcherIterator(
       shuffleMetrics.incFetchWaitTime(fetchWaitTime)
 
       result match {
-        case r @ SuccessFetchResult(blockId, address, size, buf, isNetworkReqDone) =>
+        case r @ SuccessFetchResult(blockId, mapIndex, address, size, buf, isNetworkReqDone) =>
           if (address != blockManager.blockManagerId) {
-            numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
-            shuffleMetrics.incRemoteBytesRead(buf.size)
-            if (buf.isInstanceOf[FileSegmentManagedBuffer]) {
-              shuffleMetrics.incRemoteBytesReadToDisk(buf.size)
+            if (hostLocalBlocks.contains(blockId -> mapIndex)) {
+              shuffleMetrics.incLocalBlocksFetched(1)
+              shuffleMetrics.incLocalBytesRead(buf.size)
+            } else {
+              numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
+              shuffleMetrics.incRemoteBytesRead(buf.size)
+              if (buf.isInstanceOf[FileSegmentManagedBuffer]) {
+                shuffleMetrics.incRemoteBytesReadToDisk(buf.size)
+              }
+              shuffleMetrics.incRemoteBlocksFetched(1)
+              bytesInFlight -= size
             }
-            shuffleMetrics.incRemoteBlocksFetched(1)
-          }
-          if (!localBlocks.contains(blockId)) {
-            bytesInFlight -= size
           }
           if (isNetworkReqDone) {
             reqsInFlight -= 1
@@ -445,7 +660,7 @@ final class ShuffleBlockFetcherIterator(
             // since the last call.
             val msg = s"Received a zero-size buffer for block $blockId from $address " +
               s"(expectedApproxSize = $size, isNetworkReqDone=$isNetworkReqDone)"
-            throwFetchFailedException(blockId, address, new IOException(msg))
+            throwFetchFailedException(blockId, mapIndex, address, new IOException(msg))
           }
 
           val in = try {
@@ -454,9 +669,14 @@ final class ShuffleBlockFetcherIterator(
             // The exception could only be throwed by local shuffle block
             case e: IOException =>
               assert(buf.isInstanceOf[FileSegmentManagedBuffer])
-              logError("Failed to create input stream from local block", e)
+              e match {
+                case ce: ClosedByInterruptException =>
+                  logError("Failed to create input stream from local block, " +
+                    ce.getMessage)
+                case e: IOException => logError("Failed to create input stream from local block", e)
+              }
               buf.release()
-              throwFetchFailedException(blockId, address, e)
+              throwFetchFailedException(blockId, mapIndex, address, e)
           }
           try {
             input = streamWrapper(blockId, in)
@@ -474,11 +694,12 @@ final class ShuffleBlockFetcherIterator(
               buf.release()
               if (buf.isInstanceOf[FileSegmentManagedBuffer]
                   || corruptedBlocks.contains(blockId)) {
-                throwFetchFailedException(blockId, address, e)
+                throwFetchFailedException(blockId, mapIndex, address, e)
               } else {
                 logWarning(s"got an corrupted block $blockId from $address, fetch again", e)
                 corruptedBlocks += blockId
-                fetchRequests += FetchRequest(address, Array((blockId, size)))
+                fetchRequests += FetchRequest(
+                  address, Array(FetchBlockInfo(blockId, size, mapIndex)))
                 result = null
               }
           } finally {
@@ -490,8 +711,8 @@ final class ShuffleBlockFetcherIterator(
             }
           }
 
-        case FailureFetchResult(blockId, address, e) =>
-          throwFetchFailedException(blockId, address, e)
+        case FailureFetchResult(blockId, mapIndex, address, e) =>
+          throwFetchFailedException(blockId, mapIndex, address, e)
       }
 
       // Send fetch requests up to maxBytesInFlight
@@ -504,6 +725,7 @@ final class ShuffleBlockFetcherIterator(
         input,
         this,
         currentResult.blockId,
+        currentResult.mapIndex,
         currentResult.address,
         detectCorrupt && streamCompressedOrEncrypted))
   }
@@ -570,11 +792,14 @@ final class ShuffleBlockFetcherIterator(
 
   private[storage] def throwFetchFailedException(
       blockId: BlockId,
+      mapIndex: Int,
       address: BlockManagerId,
       e: Throwable) = {
     blockId match {
       case ShuffleBlockId(shufId, mapId, reduceId) =>
-        throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e)
+        throw new FetchFailedException(address, shufId, mapId, mapIndex, reduceId, e)
+      case ShuffleBlockBatchId(shuffleId, mapId, startReduceId, _) =>
+        throw new FetchFailedException(address, shuffleId, mapId, mapIndex, startReduceId, e)
       case _ =>
         throw new SparkException(
           "Failed to get block " + blockId + ", which is not a shuffle block", e)
@@ -591,6 +816,7 @@ private class BufferReleasingInputStream(
     private[storage] val delegate: InputStream,
     private val iterator: ShuffleBlockFetcherIterator,
     private val blockId: BlockId,
+    private val mapIndex: Int,
     private val address: BlockManagerId,
     private val detectCorruption: Boolean)
   extends InputStream {
@@ -602,7 +828,7 @@ private class BufferReleasingInputStream(
     } catch {
       case e: IOException if detectCorruption =>
         IOUtils.closeQuietly(this)
-        iterator.throwFetchFailedException(blockId, address, e)
+        iterator.throwFetchFailedException(blockId, mapIndex, address, e)
     }
   }
 
@@ -624,7 +850,7 @@ private class BufferReleasingInputStream(
     } catch {
       case e: IOException if detectCorruption =>
         IOUtils.closeQuietly(this)
-        iterator.throwFetchFailedException(blockId, address, e)
+        iterator.throwFetchFailedException(blockId, mapIndex, address, e)
     }
   }
 
@@ -636,7 +862,7 @@ private class BufferReleasingInputStream(
     } catch {
       case e: IOException if detectCorruption =>
         IOUtils.closeQuietly(this)
-        iterator.throwFetchFailedException(blockId, address, e)
+        iterator.throwFetchFailedException(blockId, mapIndex, address, e)
     }
   }
 
@@ -646,7 +872,7 @@ private class BufferReleasingInputStream(
     } catch {
       case e: IOException if detectCorruption =>
         IOUtils.closeQuietly(this)
-        iterator.throwFetchFailedException(blockId, address, e)
+        iterator.throwFetchFailedException(blockId, mapIndex, address, e)
     }
   }
 
@@ -677,14 +903,25 @@ private class ShuffleFetchCompletionListener(var data: ShuffleBlockFetcherIterat
 private[storage]
 object ShuffleBlockFetcherIterator {
 
+  /**
+   * The block information to fetch used in FetchRequest.
+   * @param blockId block id
+   * @param size estimated size of the block. Note that this is NOT the exact bytes.
+   *             Size of remote block is used to calculate bytesInFlight.
+   * @param mapIndex the mapIndex for this block, which indicate the index in the map stage.
+   */
+  private[storage] case class FetchBlockInfo(
+    blockId: BlockId,
+    size: Long,
+    mapIndex: Int)
+
   /**
    * A request to fetch blocks from a remote BlockManager.
    * @param address remote BlockManager to fetch from.
-   * @param blocks Sequence of tuple, where the first element is the block id,
-   *               and the second element is the estimated size, used to calculate bytesInFlight.
+   * @param blocks Sequence of the information for blocks to fetch from the same address.
    */
-  case class FetchRequest(address: BlockManagerId, blocks: Seq[(BlockId, Long)]) {
-    val size = blocks.map(_._2).sum
+  case class FetchRequest(address: BlockManagerId, blocks: Seq[FetchBlockInfo]) {
+    val size = blocks.map(_.size).sum
   }
 
   /**
@@ -698,6 +935,7 @@ object ShuffleBlockFetcherIterator {
   /**
    * Result of a fetch from a remote block successfully.
    * @param blockId block id
+   * @param mapIndex the mapIndex for this block, which indicate the index in the map stage.
    * @param address BlockManager that the block was fetched from.
    * @param size estimated size of the block. Note that this is NOT the exact bytes.
    *             Size of remote block is used to calculate bytesInFlight.
@@ -706,6 +944,7 @@ object ShuffleBlockFetcherIterator {
    */
   private[storage] case class SuccessFetchResult(
       blockId: BlockId,
+      mapIndex: Int,
       address: BlockManagerId,
       size: Long,
       buf: ManagedBuffer,
@@ -717,11 +956,13 @@ object ShuffleBlockFetcherIterator {
   /**
    * Result of a fetch from a remote block unsuccessfully.
    * @param blockId block id
+   * @param mapIndex the mapIndex for this block, which indicate the index in the map stage
    * @param address BlockManager that the block was attempted to be fetched from
    * @param e the failure exception
    */
   private[storage] case class FailureFetchResult(
       blockId: BlockId,
+      mapIndex: Int,
       address: BlockManagerId,
       e: Throwable)
     extends FetchResult
diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
index f36b31c65a63d..d3a061fae746f 100644
--- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
@@ -48,7 +48,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
   // Schedule a refresh thread to run periodically
   private val timer = new Timer("refresh progress", true)
   timer.schedule(new TimerTask{
-    override def run() {
+    override def run(): Unit = {
       refresh()
     }
   }, firstDelayMSec, updatePeriodMSec)
@@ -73,7 +73,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
    * after your last output, keeps overwriting itself to hold in one line. The logging will follow
    * the progress bar, then progress bar will be showed in next line without overwrite logs.
    */
-  private def show(now: Long, stages: Seq[StageData]) {
+  private def show(now: Long, stages: Seq[StageData]): Unit = {
     val width = TerminalWidth / stages.size
     val bar = stages.map { s =>
       val total = s.numTasks
@@ -94,7 +94,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
     // only refresh if it's changed OR after 1 minute (or the ssh connection will be closed
     // after idle some time)
     if (bar != lastProgressBar || now - lastUpdateTime > 60 * 1000L) {
-      System.err.print(CR + bar)
+      System.err.print(CR + bar + CR)
       lastUpdateTime = now
     }
     lastProgressBar = bar
@@ -103,7 +103,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
   /**
    * Clear the progress bar if showed.
    */
-  private def clear() {
+  private def clear(): Unit = {
     if (!lastProgressBar.isEmpty) {
       System.err.printf(CR + " " * TerminalWidth + CR)
       lastProgressBar = ""
diff --git a/core/src/main/scala/org/apache/spark/ui/GraphUIData.scala b/core/src/main/scala/org/apache/spark/ui/GraphUIData.scala
new file mode 100644
index 0000000000000..87ff677514461
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/GraphUIData.scala
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+import java.{util => ju}
+import java.lang.{Long => JLong}
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+import scala.xml.{Node, Unparsed}
+
+/**
+ * A helper class to generate JavaScript and HTML for both timeline and histogram graphs.
+ *
+ * @param timelineDivId the timeline `id` used in the html `div` tag
+ * @param histogramDivId the timeline `id` used in the html `div` tag
+ * @param data the data for the graph
+ * @param minX the min value of X axis
+ * @param maxX the max value of X axis
+ * @param minY the min value of Y axis
+ * @param maxY the max value of Y axis
+ * @param unitY the unit of Y axis
+ * @param batchInterval if `batchInterval` is not None, we will draw a line for `batchInterval` in
+ *                      the graph
+ */
+private[spark] class GraphUIData(
+    timelineDivId: String,
+    histogramDivId: String,
+    data: Seq[(Long, Double)],
+    minX: Long,
+    maxX: Long,
+    minY: Double,
+    maxY: Double,
+    unitY: String,
+    batchInterval: Option[Double] = None) {
+
+  private var dataJavaScriptName: String = _
+
+  def generateDataJs(jsCollector: JsCollector): Unit = {
+    val jsForData = data.map { case (x, y) =>
+      s"""{"x": $x, "y": $y}"""
+    }.mkString("[", ",", "]")
+    dataJavaScriptName = jsCollector.nextVariableName
+    jsCollector.addPreparedStatement(s"var $dataJavaScriptName = $jsForData;")
+  }
+
+  def generateTimelineHtml(jsCollector: JsCollector): Seq[Node] = {
+    jsCollector.addPreparedStatement(s"registerTimeline($minY, $maxY);")
+    if (batchInterval.isDefined) {
+      jsCollector.addStatement(
+        "drawTimeline(" +
+          s"'#$timelineDivId', $dataJavaScriptName, $minX, $maxX, $minY, $maxY, '$unitY'," +
+          s" ${batchInterval.get}" +
+          ");")
+    } else {
+      jsCollector.addStatement(
+        s"drawTimeline('#$timelineDivId', $dataJavaScriptName, $minX, $maxX, $minY, $maxY," +
+          s" '$unitY');")
+    }
+    <div id={timelineDivId}></div>
+  }
+
+  def generateHistogramHtml(jsCollector: JsCollector): Seq[Node] = {
+    val histogramData = s"$dataJavaScriptName.map(function(d) { return d.y; })"
+    jsCollector.addPreparedStatement(s"registerHistogram($histogramData, $minY, $maxY);")
+    if (batchInterval.isDefined) {
+      jsCollector.addStatement(
+        "drawHistogram(" +
+          s"'#$histogramDivId', $histogramData, $minY, $maxY, '$unitY', ${batchInterval.get}" +
+          ");")
+    } else {
+      jsCollector.addStatement(
+        s"drawHistogram('#$histogramDivId', $histogramData, $minY, $maxY, '$unitY');")
+    }
+    <div id={histogramDivId}></div>
+  }
+
+  def generateAreaStackHtmlWithData(
+      jsCollector: JsCollector,
+      values: Array[(Long, ju.Map[String, JLong])]): Seq[Node] = {
+    val operationLabels = values.flatMap(_._2.keySet().asScala).toSet
+    val durationDataPadding = UIUtils.durationDataPadding(values)
+    val jsForData = durationDataPadding.map { case (x, y) =>
+      val s = y.toSeq.sortBy(_._1).map(e => s""""${e._1}": "${e._2}"""").mkString(",")
+      s"""{x: "${UIUtils.formatBatchTime(x, 1, showYYYYMMSS = false)}", $s}"""
+    }.mkString("[", ",", "]")
+    val jsForLabels = operationLabels.toSeq.sorted.mkString("[\"", "\",\"", "\"]")
+
+    val (maxX, minX, maxY, minY) = if (values != null && values.length > 0) {
+      val xValues = values.map(_._1.toLong)
+      val yValues = values.map(_._2.asScala.toSeq.map(_._2.toLong).sum)
+      (xValues.max, xValues.min, yValues.max, yValues.min)
+    } else {
+      (0L, 0L, 0L, 0L)
+    }
+
+    dataJavaScriptName = jsCollector.nextVariableName
+    jsCollector.addPreparedStatement(s"var $dataJavaScriptName = $jsForData;")
+    val labels = jsCollector.nextVariableName
+    jsCollector.addPreparedStatement(s"var $labels = $jsForLabels;")
+    jsCollector.addStatement(
+      s"drawAreaStack('#$timelineDivId', $labels, $dataJavaScriptName, $minX, $maxX, $minY, $maxY)")
+    <div id={timelineDivId}></div>
+  }
+}
+
+/**
+ * A helper class that allows the user to add JavaScript statements which will be executed when the
+ * DOM has finished loading.
+ */
+private[spark] class JsCollector {
+
+  private var variableId = 0
+
+  /**
+   * Return the next unused JavaScript variable name
+   */
+  def nextVariableName: String = {
+    variableId += 1
+    "v" + variableId
+  }
+
+  /**
+   * JavaScript statements that will execute before `statements`
+   */
+  private val preparedStatements = ArrayBuffer[String]()
+
+  /**
+   * JavaScript statements that will execute after `preparedStatements`
+   */
+  private val statements = ArrayBuffer[String]()
+
+  def addPreparedStatement(js: String): Unit = {
+    preparedStatements += js
+  }
+
+  def addStatement(js: String): Unit = {
+    statements += js
+  }
+
+  /**
+   * Generate a html snippet that will execute all scripts when the DOM has finished loading.
+   */
+  def toHtml: Seq[Node] = {
+    val js =
+      s"""
+         |$$(document).ready(function() {
+         |    ${preparedStatements.mkString("\n")}
+         |    ${statements.mkString("\n")}
+         |});""".stripMargin
+
+    <script>{Unparsed(js)}</script>
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index ff7baf4d9419b..94c99d48e773c 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ui
 import java.net.{URI, URL}
 import java.util.EnumSet
 import javax.servlet.DispatcherType
-import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
+import javax.servlet.http._
 
 import scala.language.implicitConversions
 import scala.xml.Node
@@ -73,7 +73,7 @@ private[spark] object JettyUtils extends Logging {
       servletParams: ServletParams[T],
       conf: SparkConf): HttpServlet = {
     new HttpServlet {
-      override def doGet(request: HttpServletRequest, response: HttpServletResponse) {
+      override def doGet(request: HttpServletRequest, response: HttpServletResponse): Unit = {
         try {
           response.setContentType("%s;charset=utf-8".format(servletParams.contentType))
           response.setStatus(HttpServletResponse.SC_OK)
@@ -259,7 +259,15 @@ private[spark] object JettyUtils extends Logging {
     server.addBean(errorHandler)
 
     val collection = new ContextHandlerCollection
-    server.setHandler(collection)
+    conf.get(PROXY_REDIRECT_URI) match {
+      case Some(proxyUri) =>
+        val proxyHandler = new ProxyRedirectHandler(proxyUri)
+        proxyHandler.setHandler(collection)
+        server.setHandler(proxyHandler)
+
+      case _ =>
+        server.setHandler(collection)
+    }
 
     // Executor used to create daemon threads for the Jetty connectors.
     val serverExecutor = new ScheduledExecutorScheduler(s"$serverName-JettyScheduler", true)
@@ -526,3 +534,51 @@ private[spark] case class ServerInfo(
   }
 
 }
+
+/**
+ * A Jetty handler to handle redirects to a proxy server. It intercepts redirects and rewrites the
+ * location to point to the proxy server.
+ *
+ * The handler needs to be set as the server's handler, because Jetty sometimes generates redirects
+ * before invoking any servlet handlers or filters. One of such cases is when asking for the root of
+ * a servlet context without the trailing slash (e.g. "/jobs") - Jetty will send a redirect to the
+ * same URL, but with a trailing slash.
+ */
+private class ProxyRedirectHandler(_proxyUri: String) extends HandlerWrapper {
+
+  private val proxyUri = _proxyUri.stripSuffix("/")
+
+  override def handle(
+      target: String,
+      baseRequest: Request,
+      request: HttpServletRequest,
+      response: HttpServletResponse): Unit = {
+    super.handle(target, baseRequest, request, new ResponseWrapper(request, response))
+  }
+
+  private class ResponseWrapper(
+      req: HttpServletRequest,
+      res: HttpServletResponse)
+    extends HttpServletResponseWrapper(res) {
+
+    override def sendRedirect(location: String): Unit = {
+      val newTarget = if (location != null) {
+        val target = new URI(location)
+        val path = if (target.getPath().startsWith("/")) {
+          target.getPath()
+        } else {
+          req.getRequestURI().stripSuffix("/") + "/" + target.getPath()
+        }
+        // The target path should already be encoded, so don't re-encode it, just the
+        // proxy address part.
+        val proxyBase = UIUtils.uiRoot(req)
+        val proxyPrefix = if (proxyBase.nonEmpty) s"$proxyUri$proxyBase" else proxyUri
+        s"${res.encodeURL(proxyPrefix)}${target.getPath()}"
+      } else {
+        null
+      }
+      super.sendRedirect(newTarget)
+    }
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index 6fb8e458a789c..8ae9828c3fee1 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -66,6 +66,9 @@ private[spark] class SparkUI private (
     addStaticHandler(SparkUI.STATIC_RESOURCE_DIR)
     attachHandler(createRedirectHandler("/", "/jobs/", basePath = basePath))
     attachHandler(ApiRootResource.getServletHandler(this))
+    if (sc.map(_.conf.get(UI_PROMETHEUS_ENABLED)).getOrElse(false)) {
+      attachHandler(PrometheusResource.getServletHandler(this))
+    }
 
     // These should be POST only, but, the YARN AM proxy won't proxy POSTs
     attachHandler(createRedirectHandler(
@@ -94,7 +97,7 @@ private[spark] class SparkUI private (
   }
 
   /** Stop the server behind this web interface. Only valid after bind(). */
-  override def stop() {
+  override def stop(): Unit = {
     super.stop()
     logInfo(s"Stopped Spark web UI at $webUrl")
   }
diff --git a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
index 766cc65084f07..aefd001e573f9 100644
--- a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
@@ -31,9 +31,9 @@ private[spark] object ToolTips {
   val SHUFFLE_READ_BLOCKED_TIME =
     "Time that the task spent blocked waiting for shuffle data to be read from remote machines."
 
-  val INPUT = "Bytes and records read from Hadoop or from Spark storage."
+  val INPUT = "Bytes read from Hadoop or from Spark storage."
 
-  val OUTPUT = "Bytes and records written to Hadoop."
+  val OUTPUT = "Bytes written to Hadoop."
 
   val STORAGE_MEMORY =
     "Memory used / total available memory for storage of data " +
@@ -99,4 +99,7 @@ private[spark] object ToolTips {
        dynamic allocation is enabled. The number of granted executors may exceed the limit
        ephemerally when executors are being killed.
     """
+
+  val DURATION =
+    "Elapsed time since the stage was submitted until execution completion of all its tasks."
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 70e24bd0e7ecd..94c45215b5ff2 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ui
 
+import java.{util => ju}
+import java.lang.{Long => JLong}
 import java.net.URLDecoder
 import java.nio.charset.StandardCharsets.UTF_8
 import java.text.SimpleDateFormat
@@ -24,6 +26,7 @@ import java.util.{Date, Locale, TimeZone}
 import javax.servlet.http.HttpServletRequest
 import javax.ws.rs.core.{MediaType, Response}
 
+import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 import scala.xml._
 import scala.xml.transform.{RewriteRule, RuleTransformer}
@@ -119,6 +122,59 @@ private[spark] object UIUtils extends Logging {
     }
   }
 
+  // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
+  private val batchTimeFormat = new ThreadLocal[SimpleDateFormat]() {
+    override def initialValue(): SimpleDateFormat =
+      new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US)
+  }
+
+  private val batchTimeFormatWithMilliseconds = new ThreadLocal[SimpleDateFormat]() {
+    override def initialValue(): SimpleDateFormat =
+      new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS", Locale.US)
+  }
+
+  /**
+   * If `batchInterval` is less than 1 second, format `batchTime` with milliseconds. Otherwise,
+   * format `batchTime` without milliseconds.
+   *
+   * @param batchTime the batch time to be formatted
+   * @param batchInterval the batch interval
+   * @param showYYYYMMSS if showing the `yyyy/MM/dd` part. If it's false, the return value wll be
+   *                     only `HH:mm:ss` or `HH:mm:ss.SSS` depending on `batchInterval`
+   * @param timezone only for test
+   */
+  def formatBatchTime(
+      batchTime: Long,
+      batchInterval: Long,
+      showYYYYMMSS: Boolean = true,
+      timezone: TimeZone = null): String = {
+    val oldTimezones =
+      (batchTimeFormat.get.getTimeZone, batchTimeFormatWithMilliseconds.get.getTimeZone)
+    if (timezone != null) {
+      batchTimeFormat.get.setTimeZone(timezone)
+      batchTimeFormatWithMilliseconds.get.setTimeZone(timezone)
+    }
+    try {
+      val formattedBatchTime =
+        if (batchInterval < 1000) {
+          batchTimeFormatWithMilliseconds.get.format(batchTime)
+        } else {
+          // If batchInterval >= 1 second, don't show milliseconds
+          batchTimeFormat.get.format(batchTime)
+        }
+      if (showYYYYMMSS) {
+        formattedBatchTime
+      } else {
+        formattedBatchTime.substring(formattedBatchTime.indexOf(' ') + 1)
+      }
+    } finally {
+      if (timezone != null) {
+        batchTimeFormat.get.setTimeZone(oldTimezones._1)
+        batchTimeFormatWithMilliseconds.get.setTimeZone(oldTimezones._2)
+      }
+    }
+  }
+
   /** Generate a human-readable string representing a number (e.g. 100 K) */
   def formatNumber(records: Double): String = {
     val trillion = 1e12
@@ -227,7 +283,7 @@ private[spark] object UIUtils extends Logging {
         <a href={prependBaseUri(request, activeTab.basePath, "/" + tab.prefix + "/")}>{tab.name}</a>
       </li>
     }
-    val helpButton: Seq[Node] = helpText.map(tooltip(_, "bottom")).getOrElse(Seq.empty)
+    val helpButton: Seq[Node] = helpText.map(tooltip(_, "top")).getOrElse(Seq.empty)
 
     <html>
       <head>
@@ -309,9 +365,13 @@ private[spark] object UIUtils extends Logging {
       data: Iterable[T],
       fixedWidth: Boolean = false,
       id: Option[String] = None,
+      // When headerClasses is not empty, it should have the same length as headers parameter
       headerClasses: Seq[String] = Seq.empty,
       stripeRowsWithCss: Boolean = true,
-      sortable: Boolean = true): Seq[Node] = {
+      sortable: Boolean = true,
+      // The tooltip information could be None, which indicates header does not have a tooltip.
+      // When tooltipHeaders is not empty, it should have the same length as headers parameter
+      tooltipHeaders: Seq[Option[String]] = Seq.empty): Seq[Node] = {
 
     val listingTableClass = {
       val _tableClass = if (stripeRowsWithCss) TABLE_CLASS_STRIPED else TABLE_CLASS_NOT_STRIPED
@@ -332,6 +392,14 @@ private[spark] object UIUtils extends Logging {
       }
     }
 
+    def getTooltip(index: Int): Option[String] = {
+      if (index < tooltipHeaders.size) {
+        tooltipHeaders(index)
+      } else {
+        None
+      }
+    }
+
     val newlinesInHeader = headers.exists(_.contains("\n"))
     def getHeaderContent(header: String): Seq[Node] = {
       if (newlinesInHeader) {
@@ -345,7 +413,15 @@ private[spark] object UIUtils extends Logging {
 
     val headerRow: Seq[Node] = {
       headers.view.zipWithIndex.map { x =>
-        <th width={colWidthAttr} class={getClass(x._2)}>{getHeaderContent(x._1)}</th>
+        getTooltip(x._2) match {
+          case Some(tooltip) =>
+            <th width={colWidthAttr} class={getClass(x._2)}>
+              <span data-toggle="tooltip" title={tooltip}>
+                {getHeaderContent(x._1)}
+              </span>
+            </th>
+          case None => <th width={colWidthAttr} class={getClass(x._2)}>{getHeaderContent(x._1)}</th>
+        }
       }
     }
     <table class={listingTableClass} id={id.map(Text.apply)}>
@@ -408,7 +484,7 @@ private[spark] object UIUtils extends Logging {
             class="expand-dag-viz" onclick={s"toggleDagViz($forJob);"}>
         <span class="expand-dag-viz-arrow arrow-closed"></span>
         <a data-toggle="tooltip" title={if (forJob) ToolTips.JOB_DAG else ToolTips.STAGE_DAG}
-           data-placement="right">
+           data-placement="top">
           DAG Visualization
         </a>
       </span>
@@ -552,4 +628,39 @@ private[spark] object UIUtils extends Logging {
   def buildErrorResponse(status: Response.Status, msg: String): Response = {
     Response.status(status).entity(msg).`type`(MediaType.TEXT_PLAIN).build()
   }
+
+  /**
+   * There may be different duration labels in each batch. So we need to
+   * mark those missing duration label as '0d' to avoid UI rending error.
+   */
+  def durationDataPadding(
+      values: Array[(Long, ju.Map[String, JLong])]): Array[(Long, Map[String, Double])] = {
+    val operationLabels = values.flatMap(_._2.keySet().asScala).toSet
+    values.map { case (xValue, yValue) =>
+      val dataPadding = operationLabels.map { opLabel =>
+        if (yValue.containsKey(opLabel)) {
+          (opLabel, yValue.get(opLabel).toDouble)
+        } else {
+          (opLabel, 0d)
+        }
+      }
+      (xValue, dataPadding.toMap)
+    }
+  }
+
+  def detailsUINode(isMultiline: Boolean, message: String): Seq[Node] = {
+    if (isMultiline) {
+      // scalastyle:off
+      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
+            class="expand-details">
+        +details
+      </span> ++
+        <div class="stacktrace-details collapsed">
+          <pre>{message}</pre>
+        </div>
+      // scalastyle:on
+    } else {
+      Seq.empty[Node]
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
index 8845dcf48a844..ca111a8d00a64 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
@@ -37,7 +37,7 @@ private[spark] object UIWorkloadGenerator {
   val NUM_PARTITIONS = 100
   val INTER_JOB_WAIT_MS = 5000
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 3) {
       // scalastyle:off println
       println(
@@ -98,7 +98,7 @@ private[spark] object UIWorkloadGenerator {
     (1 to nJobSet).foreach { _ =>
       for ((desc, job) <- jobs) {
         new Thread {
-          override def run() {
+          override def run(): Unit = {
             // scalastyle:off println
             try {
               setProperties(desc)
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 1fe822a0e3b57..9faa3dcf2cdf2 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -184,7 +184,7 @@ private[spark] abstract class WebUITab(parent: WebUI, val prefix: String) {
   val name = prefix.capitalize
 
   /** Attach a page to this tab. This prepends the page's prefix with the tab's own prefix. */
-  def attachPage(page: WebUIPage) {
+  def attachPage(page: WebUIPage): Unit = {
     page.prefix = (prefix + "/" + page.prefix).stripSuffix("/")
     pages += page
   }
@@ -236,4 +236,8 @@ private[spark] class DelegatingServletContextHandler(handler: ServletContextHand
   def filterCount(): Int = {
     handler.getServletHandler.getFilters.length
   }
+
+  def getContextPath(): String = {
+    handler.getContextPath
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
index 76537afd81ce0..c6eb461ad601c 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
@@ -39,15 +39,20 @@ private[ui] class EnvironmentPage(
       "Scala Version" -> appEnv.runtime.scalaVersion)
 
     val runtimeInformationTable = UIUtils.listingTable(
-      propertyHeader, jvmRow, jvmInformation.toSeq.sorted, fixedWidth = true)
+      propertyHeader, jvmRow, jvmInformation.toSeq.sorted, fixedWidth = true,
+      headerClasses = headerClasses)
     val sparkPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
-      Utils.redact(conf, appEnv.sparkProperties.sorted), fixedWidth = true)
+      Utils.redact(conf, appEnv.sparkProperties.sorted), fixedWidth = true,
+      headerClasses = headerClasses)
     val hadoopPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
-      Utils.redact(conf, appEnv.hadoopProperties.sorted), fixedWidth = true)
+      Utils.redact(conf, appEnv.hadoopProperties.sorted), fixedWidth = true,
+      headerClasses = headerClasses)
     val systemPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
-      Utils.redact(conf, appEnv.systemProperties.sorted), fixedWidth = true)
+      Utils.redact(conf, appEnv.systemProperties.sorted), fixedWidth = true,
+      headerClasses = headerClasses)
     val classpathEntriesTable = UIUtils.listingTable(
-      classPathHeaders, classPathRow, appEnv.classpathEntries.sorted, fixedWidth = true)
+      classPathHeader, classPathRow, appEnv.classpathEntries.sorted, fixedWidth = true,
+      headerClasses = headerClasses)
     val content =
       <span>
         <span class="collapse-aggregated-runtimeInformation collapse-table"
@@ -111,7 +116,9 @@ private[ui] class EnvironmentPage(
   }
 
   private def propertyHeader = Seq("Name", "Value")
-  private def classPathHeaders = Seq("Resource", "Source")
+  private def classPathHeader = Seq("Resource", "Source")
+  private def headerClasses = Seq("sorttable_alpha", "sorttable_alpha")
+
   private def jvmRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr>
   private def propertyRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr>
   private def classPathRow(data: (String, String)) = <tr><td>{data._1}</td><td>{data._2}</td></tr>
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
index a13037b5e24db..77564f48015f1 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -89,7 +89,12 @@ private[ui] class ExecutorThreadDumpPage(
           <th onClick="collapseAllThreadStackTrace(false)">Thread ID</th>
           <th onClick="collapseAllThreadStackTrace(false)">Thread Name</th>
           <th onClick="collapseAllThreadStackTrace(false)">Thread State</th>
-          <th onClick="collapseAllThreadStackTrace(false)">Thread Locks</th>
+          <th onClick="collapseAllThreadStackTrace(false)">
+            <span data-toggle="tooltip" data-placement="top"
+                  title="Objects whose lock the thread currently holds">
+              Thread Locks
+            </span>
+          </th>
         </thead>
         <tbody>{dumpRows}</tbody>
       </table>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index 11fcbf1c29c05..f53e67ff5cc98 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -71,7 +71,10 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
       val jobId = job.jobId
       val status = job.status
       val (_, lastStageDescription) = lastStageNameAndDescription(store, job)
-      val jobDescription = UIUtils.makeDescription(lastStageDescription, "", plainText = true).text
+      val jobDescription = UIUtils.makeDescription(
+        job.description.getOrElse(lastStageDescription),
+        "",
+        plainText = true).text
 
       val submissionTime = job.submissionTime.get.getTime()
       val completionTime = job.completionTime.map(_.getTime()).getOrElse(System.currentTimeMillis())
@@ -123,7 +126,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
            |  'group': 'executors',
            |  'start': new Date(${e.addTime.getTime()}),
            |  'content': '<div class="executor-event-content"' +
-           |    'data-toggle="tooltip" data-placement="bottom"' +
+           |    'data-toggle="tooltip" data-placement="top"' +
            |    'data-title="Executor ${e.id}<br>' +
            |    'Added at ${UIUtils.formatDate(e.addTime)}"' +
            |    'data-html="true">Executor ${e.id} added</div>'
@@ -139,7 +142,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
              |  'group': 'executors',
              |  'start': new Date(${removeTime.getTime()}),
              |  'content': '<div class="executor-event-content"' +
-             |    'data-toggle="tooltip" data-placement="bottom"' +
+             |    'data-toggle="tooltip" data-placement="top"' +
              |    'data-title="Executor ${e.id}<br>' +
              |    'Removed at ${UIUtils.formatDate(removeTime)}' +
              |    '${
@@ -183,7 +186,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
 
     <span class="expand-application-timeline">
       <span class="expand-application-timeline-arrow arrow-closed"></span>
-      <a data-toggle="tooltip" title={ToolTips.JOB_TIMELINE} data-placement="right">
+      <a data-toggle="tooltip" title={ToolTips.JOB_TIMELINE} data-placement="top">
         Event Timeline
       </a>
     </span> ++
@@ -449,7 +452,11 @@ private[ui] class JobDataSource(
     val formattedSubmissionTime = submissionTime.map(UIUtils.formatDate).getOrElse("Unknown")
     val (lastStageName, lastStageDescription) = lastStageNameAndDescription(store, jobData)
 
-    val jobDescription = UIUtils.makeDescription(lastStageDescription, basePath, plainText = false)
+    val jobDescription =
+      UIUtils.makeDescription(
+        jobData.description.getOrElse(lastStageDescription),
+        basePath,
+        plainText = false)
 
     val detailUrl = "%s/jobs/job/?id=%s".format(basePath, jobData.jobId)
 
@@ -541,12 +548,15 @@ private[ui] class JobPagedTable(
 
   override def headers: Seq[Node] = {
     // Information for each header: title, cssClass, and sortable
-    val jobHeadersAndCssClasses: Seq[(String, String, Boolean)] =
+    val jobHeadersAndCssClasses: Seq[(String, String, Boolean, Option[String])] =
       Seq(
-        (jobIdTitle, "", true),
-        ("Description", "", true), ("Submitted", "", true), ("Duration", "", true),
-        ("Stages: Succeeded/Total", "", false),
-        ("Tasks (for all stages): Succeeded/Total", "", false)
+        (jobIdTitle, "", true, None),
+        ("Description", "", true, None),
+        ("Submitted", "", true, None),
+        ("Duration", "", true, Some("Elapsed time since the job was submitted " +
+          "until execution completion of all its stages.")),
+        ("Stages: Succeeded/Total", "", false, None),
+        ("Tasks (for all stages): Succeeded/Total", "", false, None)
       )
 
     if (!jobHeadersAndCssClasses.filter(_._3).map(_._1).contains(sortColumn)) {
@@ -554,7 +564,7 @@ private[ui] class JobPagedTable(
     }
 
     val headerRow: Seq[Node] = {
-      jobHeadersAndCssClasses.map { case (header, cssClass, sortable) =>
+      jobHeadersAndCssClasses.map { case (header, cssClass, sortable, tooltip) =>
         if (header == sortColumn) {
           val headerLink = Unparsed(
             parameterPath +
@@ -566,9 +576,17 @@ private[ui] class JobPagedTable(
 
           <th class={cssClass}>
             <a href={headerLink}>
-              {header}<span>
-              &nbsp;{Unparsed(arrow)}
-            </span>
+              {
+                if (tooltip.nonEmpty) {
+                  <span data-toggle="tooltip" data-placement="top" title={tooltip.get}>
+                    {header}&nbsp;{Unparsed(arrow)}
+                  </span>
+                } else {
+                  <span>
+                    {header}&nbsp;{Unparsed(arrow)}
+                  </span>
+                }
+              }
             </a>
           </th>
         } else {
@@ -581,12 +599,32 @@ private[ui] class JobPagedTable(
 
             <th class={cssClass}>
               <a href={headerLink}>
-                {header}
-              </a>
+                {
+                  if (tooltip.nonEmpty) {
+                    <span data-toggle="tooltip" data-placement="top" title={tooltip.get}>
+                      {header}
+                    </span>
+                  } else {
+                    <span>
+                      {header}
+                    </span>
+                  }
+                }
+               </a>
             </th>
           } else {
             <th class={cssClass}>
-              {header}
+              {
+                if (tooltip.nonEmpty) {
+                  <span data-toggle="tooltip" data-placement="top" title={tooltip.get}>
+                    {header}
+                  </span>
+                } else {
+                  <span>
+                    {header}
+                  </span>
+                }
+              }
             </th>
           }
         }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
index f672ce0ec6a68..d8a93adbbe90a 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala
@@ -30,7 +30,6 @@ import org.apache.spark.ui.{UIUtils, WebUIPage}
 private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
   private val sc = parent.sc
   private val subPath = "stages"
-  private def isFairScheduler = parent.isFairScheduler
 
   def render(request: HttpServletRequest): Seq[Node] = {
     // For now, pool information is only accessible in live UIs
@@ -57,7 +56,7 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") {
         </ul>
       </div>
 
-    val poolsDescription = if (sc.isDefined && isFairScheduler) {
+    val poolsDescription = if (parent.isFairScheduler) {
         <span class="collapse-aggregated-poolTable collapse-table"
             onClick="collapseTable('collapse-aggregated-poolTable','aggregated-poolTable')">
           <h4>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index f7aca507d6f93..12f1aa25e8d2a 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -104,7 +104,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
            |  'group': 'executors',
            |  'start': new Date(${e.addTime.getTime()}),
            |  'content': '<div class="executor-event-content"' +
-           |    'data-toggle="tooltip" data-placement="bottom"' +
+           |    'data-toggle="tooltip" data-placement="top"' +
            |    'data-title="Executor ${e.id}<br>' +
            |    'Added at ${UIUtils.formatDate(e.addTime)}"' +
            |    'data-html="true">Executor ${e.id} added</div>'
@@ -120,7 +120,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
              |  'group': 'executors',
              |  'start': new Date(${removeTime.getTime()}),
              |  'content': '<div class="executor-event-content"' +
-             |    'data-toggle="tooltip" data-placement="bottom"' +
+             |    'data-toggle="tooltip" data-placement="top"' +
              |    'data-title="Executor ${e.id}<br>' +
              |    'Removed at ${UIUtils.formatDate(removeTime)}' +
              |    '${
@@ -164,7 +164,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
 
     <span class="expand-job-timeline">
       <span class="expand-job-timeline-arrow arrow-closed"></span>
-      <a data-toggle="tooltip" title={ToolTips.STAGE_TIMELINE} data-placement="right">
+      <a data-toggle="tooltip" title={ToolTips.STAGE_TIMELINE} data-placement="top">
         Event Timeline
       </a>
     </span> ++
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
index c2644a8eea157..dc3106400dd2b 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
@@ -32,7 +32,9 @@ private[ui] class JobsTab(parent: SparkUI, store: AppStatusStore)
   val sc = parent.sc
   val killEnabled = parent.killEnabled
 
+  // Show pool information for only live UI.
   def isFairScheduler: Boolean = {
+    sc.isDefined &&
     store
       .environmentInfo()
       .sparkProperties
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
index 683cfa582877d..7b90baad6d8d3 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
@@ -34,11 +34,17 @@ private[ui] class PoolTable(pools: Map[Schedulable, PoolData], parent: StagesTab
     <table class="table table-bordered table-striped table-condensed sortable table-fixed">
       <thead>
         <th>Pool Name</th>
-        <th>Minimum Share</th>
-        <th>Pool Weight</th>
+        <th>
+          <span data-toggle="tooltip" data-placement="top" title="Pool's minimum share of CPU
+           cores">Minimum Share</span>
+        </th>
+        <th>
+          <span data-toggle="tooltip" data-placement="top" title="Pool's share of cluster resources
+           relative to others">Pool Weight</span>
+        </th>
         <th>Active Stages</th>
         <th>Running Tasks</th>
-        <th>SchedulingMode</th>
+        <th>Scheduling Mode</th>
       </thead>
       <tbody>
         {pools.map { case (s, p) => poolRow(request, s, p) }}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index fce05e8a42fda..ccaa70b9daae0 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -288,10 +288,10 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
 
         val executorOverhead = serializationTime + deserializationTime
         val executorRunTime = if (taskInfo.duration.isDefined) {
-          totalExecutionTime - executorOverhead - gettingResultTime
+          math.max(totalExecutionTime - executorOverhead - gettingResultTime - schedulerDelay, 0)
         } else {
           metricsOpt.map(_.executorRunTime).getOrElse(
-            totalExecutionTime - executorOverhead - gettingResultTime)
+            math.max(totalExecutionTime - executorOverhead - gettingResultTime - schedulerDelay, 0))
         }
         val executorComputingTime = executorRunTime - shuffleReadTime - shuffleWriteTime
         val executorComputingTimeProportion =
@@ -721,19 +721,7 @@ private[ui] class TaskPagedTable(
       } else {
         error
       })
-    val details = if (isMultiline) {
-      // scalastyle:off
-      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
-            class="expand-details">
-        +details
-      </span> ++
-        <div class="stacktrace-details collapsed">
-          <pre>{error}</pre>
-        </div>
-      // scalastyle:on
-    } else {
-      ""
-    }
+    val details = UIUtils.detailsUINode(isMultiline, error)
     <td>{errorSummary}{details}</td>
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
index e24b2f2ec36db..a7d38e9b04b70 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -179,18 +179,20 @@ private[ui] class StagePagedTable(
     // stageHeadersAndCssClasses has three parts: header title, tooltip information, and sortable.
     // The tooltip information could be None, which indicates it does not have a tooltip.
     // Otherwise, it has two parts: tooltip text, and position (true for left, false for default).
-    val stageHeadersAndCssClasses: Seq[(String, Option[(String, Boolean)], Boolean)] =
-      Seq(("Stage Id", None, true)) ++
-      {if (isFairScheduler) {Seq(("Pool Name", None, true))} else Seq.empty} ++
+    val stageHeadersAndCssClasses: Seq[(String, String, Boolean)] =
+      Seq(("Stage Id", null, true)) ++
+      {if (isFairScheduler) {Seq(("Pool Name", null, true))} else Seq.empty} ++
       Seq(
-        ("Description", None, true), ("Submitted", None, true), ("Duration", None, true),
-        ("Tasks: Succeeded/Total", None, false),
-        ("Input", Some((ToolTips.INPUT, false)), true),
-        ("Output", Some((ToolTips.OUTPUT, false)), true),
-        ("Shuffle Read", Some((ToolTips.SHUFFLE_READ, false)), true),
-        ("Shuffle Write", Some((ToolTips.SHUFFLE_WRITE, true)), true)
+        ("Description", null, true),
+        ("Submitted", null, true),
+        ("Duration", ToolTips.DURATION, true),
+        ("Tasks: Succeeded/Total", null, false),
+        ("Input", ToolTips.INPUT, true),
+        ("Output", ToolTips.OUTPUT, true),
+        ("Shuffle Read", ToolTips.SHUFFLE_READ, true),
+        ("Shuffle Write", ToolTips.SHUFFLE_WRITE, true)
       ) ++
-      {if (isFailedStage) {Seq(("Failure Reason", None, false))} else Seq.empty}
+      {if (isFailedStage) {Seq(("Failure Reason", null, false))} else Seq.empty}
 
     if (!stageHeadersAndCssClasses.filter(_._3).map(_._1).contains(sortColumn)) {
       throw new IllegalArgumentException(s"Unknown column: $sortColumn")
@@ -198,22 +200,13 @@ private[ui] class StagePagedTable(
 
     val headerRow: Seq[Node] = {
       stageHeadersAndCssClasses.map { case (header, tooltip, sortable) =>
-        val headerSpan = tooltip.map { case (title, left) =>
-          if (left) {
-            /* Place the shuffle write tooltip on the left (rather than the default position
-            of on top) because the shuffle write column is the last column on the right side and
-            the tooltip is wider than the column, so it doesn't fit on top. */
-            <span data-toggle="tooltip" data-placement="left" title={title}>
+        val headerSpan = if (null != tooltip && !tooltip.isEmpty) {
+            <span data-toggle="tooltip" data-placement="top" title={tooltip}>
               {header}
             </span>
-          } else {
-            <span data-toggle="tooltip" title={title}>
-              {header}
-            </span>
-          }
-        }.getOrElse(
+        } else {
           {header}
-        )
+        }
 
         if (header == sortColumn) {
           val headerLink = Unparsed(
@@ -316,19 +309,7 @@ private[ui] class StagePagedTable(
       } else {
         failureReason
       })
-    val details = if (isMultiline) {
-      // scalastyle:off
-      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
-            class="expand-details">
-        +details
-      </span> ++
-        <div class="stacktrace-details collapsed">
-          <pre>{failureReason}</pre>
-        </div>
-      // scalastyle:on
-    } else {
-      ""
-    }
+    val details = UIUtils.detailsUINode(isMultiline, failureReason)
     <td valign="middle">{failureReasonSummary}{details}</td>
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
index 2d222b842be55..b59dd333da19e 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala
@@ -37,7 +37,9 @@ private[ui] class StagesTab(val parent: SparkUI, val store: AppStatusStore)
   attachPage(new StagePage(this, store))
   attachPage(new PoolPage(this))
 
+  // Show pool information for only live UI.
   def isFairScheduler: Boolean = {
+    sc.isDefined &&
     store
       .environmentInfo()
       .sparkProperties
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
index 2488197814ffd..fb43af357f7b8 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala
@@ -25,6 +25,7 @@ import scala.xml.Node
 import org.apache.spark.status.{AppStatusStore, StreamBlockData}
 import org.apache.spark.status.api.v1
 import org.apache.spark.ui._
+import org.apache.spark.ui.storage.ToolTips._
 import org.apache.spark.util.Utils
 
 /** Page showing list of RDD's currently stored in the cluster */
@@ -56,7 +57,8 @@ private[ui] class StoragePage(parent: SparkUITab, store: AppStatusStore) extends
             rddHeader,
             rddRow(request, _: v1.RDDStorageInfo),
             rdds,
-            id = Some("storage-by-rdd-table"))}
+            id = Some("storage-by-rdd-table"),
+            tooltipHeaders = tooltips)}
         </div>
       </div>
     }
@@ -72,6 +74,16 @@ private[ui] class StoragePage(parent: SparkUITab, store: AppStatusStore) extends
     "Size in Memory",
     "Size on Disk")
 
+  /** Tooltips for header fields of the RDD table */
+  val tooltips = Seq(
+    None,
+    Some(RDD_NAME),
+    Some(STORAGE_LEVEL),
+    Some(CACHED_PARTITIONS),
+    Some(FRACTION_CACHED),
+    Some(SIZE_IN_MEMORY),
+    Some(SIZE_ON_DISK))
+
   /** Render an HTML row representing an RDD */
   private def rddRow(request: HttpServletRequest, rdd: v1.RDDStorageInfo): Seq[Node] = {
     // scalastyle:off
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/HiveTestUtils.scala b/core/src/main/scala/org/apache/spark/ui/storage/ToolTips.scala
similarity index 58%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/test/HiveTestUtils.scala
rename to core/src/main/scala/org/apache/spark/ui/storage/ToolTips.scala
index 7631efedf46af..4677eba63c830 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/HiveTestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/ToolTips.scala
@@ -15,18 +15,28 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.test
+package org.apache.spark.ui.storage
 
-import java.io.File
+private[ui] object ToolTips {
 
-import org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax
-import org.apache.hive.hcatalog.data.JsonSerDe
+  val RDD_NAME =
+    "Name of the persisted RDD"
 
-object HiveTestUtils {
+  val STORAGE_LEVEL =
+    "StorageLevel displays where the persisted RDD is stored, " +
+      "format of the persisted RDD (serialized or de-serialized) and" +
+      "replication factor of the persisted RDD"
 
-  val getHiveContribJar: File =
-    new File(classOf[UDAFExampleMax].getProtectionDomain.getCodeSource.getLocation.getPath)
+  val CACHED_PARTITIONS =
+    "Number of partitions cached"
 
-  val getHiveHcatalogCoreJar: File =
-    new File(classOf[JsonSerDe].getProtectionDomain.getCodeSource.getLocation.getPath)
+  val FRACTION_CACHED =
+    "Fraction of total partitions cached"
+
+  val SIZE_IN_MEMORY =
+    "Total size of partitions in memory"
+
+  val SIZE_ON_DISK =
+    "Total size of partitions on the disk"
 }
+
diff --git a/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
index a5ee0ff16b5df..1383e1835028c 100644
--- a/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
@@ -67,7 +67,7 @@ class ByteBufferInputStream(private var buffer: ByteBuffer)
   /**
    * Clean up the buffer, and potentially dispose of it using StorageUtils.dispose().
    */
-  private def cleanUp() {
+  private def cleanUp(): Unit = {
     if (buffer != null) {
       buffer = null
     }
diff --git a/core/src/main/scala/org/apache/spark/util/Clock.scala b/core/src/main/scala/org/apache/spark/util/Clock.scala
index e92ed11bd165b..226f15d3d38c2 100644
--- a/core/src/main/scala/org/apache/spark/util/Clock.scala
+++ b/core/src/main/scala/org/apache/spark/util/Clock.scala
@@ -21,7 +21,37 @@ package org.apache.spark.util
  * An interface to represent clocks, so that they can be mocked out in unit tests.
  */
 private[spark] trait Clock {
+  /** @return Current system time, in ms. */
   def getTimeMillis(): Long
+
+  // scalastyle:off line.size.limit
+  /**
+   * Current value of high resolution time source, in ns.
+   *
+   * This method abstracts the call to the JRE's `System.nanoTime()` call. As with that method, the
+   * value here is not guaranteed to be monotonically increasing, but rather a higher resolution
+   * time source for use in the calculation of time intervals. The characteristics of the values
+   * returned may very from JVM to JVM (or even the same JVM running on different OSes or CPUs), but
+   * in general it should be preferred over [[getTimeMillis()]] when calculating time differences.
+   *
+   * Specifically for Linux on x64 architecture, the following links provide useful information
+   * about the characteristics of the value returned:
+   *
+   *  http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/
+   *  https://stackoverflow.com/questions/10921210/cpu-tsc-fetch-operation-especially-in-multicore-multi-processor-environment
+   *
+   * TL;DR: on modern (2.6.32+) Linux kernels with modern (AMD K8+) CPUs, the values returned by
+   * `System.nanoTime()` are consistent across CPU cores *and* packages, and provide always
+   * increasing values (although it may not be completely monotonic when the system clock is
+   * adjusted by NTP daemons using time slew).
+   */
+  // scalastyle:on line.size.limit
+  def nanoTime(): Long
+
+  /**
+   * Wait until the wall clock reaches at least the given time. Note this may not actually wait for
+   * the actual difference between the current and target times, since the wall clock may drift.
+   */
   def waitTillTime(targetTime: Long): Long
 }
 
@@ -36,15 +66,19 @@ private[spark] class SystemClock extends Clock {
    * @return the same time (milliseconds since the epoch)
    *         as is reported by `System.currentTimeMillis()`
    */
-  def getTimeMillis(): Long = System.currentTimeMillis()
+  override def getTimeMillis(): Long = System.currentTimeMillis()
+
+  /**
+   * @return value reported by `System.nanoTime()`.
+   */
+  override def nanoTime(): Long = System.nanoTime()
 
   /**
    * @param targetTime block until the current time is at least this value
    * @return current system time when wait has completed
    */
-  def waitTillTime(targetTime: Long): Long = {
-    var currentTime = 0L
-    currentTime = System.currentTimeMillis()
+  override def waitTillTime(targetTime: Long): Long = {
+    var currentTime = System.currentTimeMillis()
 
     var waitTime = targetTime - currentTime
     if (waitTime <= 0) {
diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index 6d6ef5a744204..d2ad14f2a1a96 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -387,7 +387,7 @@ private[spark] object ClosureCleaner extends Logging {
     }
   }
 
-  private def ensureSerializable(func: AnyRef) {
+  private def ensureSerializable(func: AnyRef): Unit = {
     try {
       if (SparkEnv.get != null) {
         SparkEnv.get.closureSerializer.newInstance().serialize(func)
@@ -433,7 +433,7 @@ private class ReturnStatementFinder(targetMethodName: Option[String] = None)
         name == targetMethodName.get || name == targetMethodName.get.stripSuffix("$adapted")
 
       new MethodVisitor(ASM7) {
-        override def visitTypeInsn(op: Int, tp: String) {
+        override def visitTypeInsn(op: Int, tp: String): Unit = {
           if (op == NEW && tp.contains("scala/runtime/NonLocalReturnControl") && isTargetMethod) {
             throw new ReturnStatementInClosureException
           }
@@ -480,7 +480,7 @@ private[util] class FieldAccessFinder(
     }
 
     new MethodVisitor(ASM7) {
-      override def visitFieldInsn(op: Int, owner: String, name: String, desc: String) {
+      override def visitFieldInsn(op: Int, owner: String, name: String, desc: String): Unit = {
         if (op == GETFIELD) {
           for (cl <- fields.keys if cl.getName == owner.replace('/', '.')) {
             fields(cl) += name
@@ -489,7 +489,7 @@ private[util] class FieldAccessFinder(
       }
 
       override def visitMethodInsn(
-          op: Int, owner: String, name: String, desc: String, itf: Boolean) {
+          op: Int, owner: String, name: String, desc: String, itf: Boolean): Unit = {
         for (cl <- fields.keys if cl.getName == owner.replace('/', '.')) {
           // Check for calls a getter method for a variable in an interpreter wrapper object.
           // This means that the corresponding field will be accessed, so we should save it.
@@ -528,7 +528,7 @@ private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM
   // The second closure technically has two inner closures, but this finder only finds one
 
   override def visit(version: Int, access: Int, name: String, sig: String,
-      superName: String, interfaces: Array[String]) {
+      superName: String, interfaces: Array[String]): Unit = {
     myName = name
   }
 
@@ -536,7 +536,7 @@ private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM
       sig: String, exceptions: Array[String]): MethodVisitor = {
     new MethodVisitor(ASM7) {
       override def visitMethodInsn(
-          op: Int, owner: String, name: String, desc: String, itf: Boolean) {
+          op: Int, owner: String, name: String, desc: String, itf: Boolean): Unit = {
         val argTypes = Type.getArgumentTypes(desc)
         if (op == INVOKESPECIAL && name == "<init>" && argTypes.length > 0
             && argTypes(0).toString.startsWith("L") // is it an object?
diff --git a/core/src/main/scala/org/apache/spark/util/Distribution.scala b/core/src/main/scala/org/apache/spark/util/Distribution.scala
index 240dcfbab60ac..550884c873297 100644
--- a/core/src/main/scala/org/apache/spark/util/Distribution.scala
+++ b/core/src/main/scala/org/apache/spark/util/Distribution.scala
@@ -65,7 +65,7 @@ private[spark] class Distribution(val data: Array[Double], val startIdx: Int, va
    * print a summary of this distribution to the given PrintStream.
    * @param out
    */
-  def summary(out: PrintStream = System.out) {
+  def summary(out: PrintStream = System.out): Unit = {
     // scalastyle:off println
     out.println(statCounter)
     showQuantiles(out)
@@ -83,7 +83,7 @@ private[spark] object Distribution {
     }
   }
 
-  def showQuantiles(out: PrintStream = System.out, quantiles: Iterable[Double]) {
+  def showQuantiles(out: PrintStream = System.out, quantiles: Iterable[Double]): Unit = {
     // scalastyle:off println
     out.println("min\t25%\t50%\t75%\tmax")
     quantiles.foreach{q => out.print(q + "\t")}
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 73ef80980e73f..53824735d2fc5 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -33,7 +33,7 @@ import org.apache.spark._
 import org.apache.spark.executor._
 import org.apache.spark.metrics.ExecutorMetricType
 import org.apache.spark.rdd.RDDOperationScope
-import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.resource.{ResourceInformation, ResourceProfile}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.storage._
@@ -391,6 +391,7 @@ private[spark] object JsonProtocol {
     ("Executor Deserialize CPU Time" -> taskMetrics.executorDeserializeCpuTime) ~
     ("Executor Run Time" -> taskMetrics.executorRunTime) ~
     ("Executor CPU Time" -> taskMetrics.executorCpuTime) ~
+    ("Peak Execution Memory" -> taskMetrics.peakExecutionMemory) ~
     ("Result Size" -> taskMetrics.resultSize) ~
     ("JVM GC Time" -> taskMetrics.jvmGCTime) ~
     ("Result Serialization Time" -> taskMetrics.resultSerializationTime) ~
@@ -420,6 +421,7 @@ private[spark] object JsonProtocol {
         ("Block Manager Address" -> blockManagerAddress) ~
         ("Shuffle ID" -> fetchFailed.shuffleId) ~
         ("Map ID" -> fetchFailed.mapId) ~
+        ("Map Index" -> fetchFailed.mapIndex) ~
         ("Reduce ID" -> fetchFailed.reduceId) ~
         ("Message" -> fetchFailed.message)
       case exceptionFailure: ExceptionFailure =>
@@ -660,7 +662,8 @@ private[spark] object JsonProtocol {
     val stageInfos = jsonOption(json \ "Stage Infos")
       .map(_.extract[Seq[JValue]].map(stageInfoFromJson)).getOrElse {
         stageIds.map { id =>
-          new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown")
+          new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown",
+            resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
         }
       }
     SparkListenerJobStart(jobId, submissionTime, stageInfos, properties)
@@ -801,7 +804,8 @@ private[spark] object JsonProtocol {
     }
 
     val stageInfo = new StageInfo(
-      stageId, attemptId, stageName, numTasks, rddInfos, parentIds, details)
+      stageId, attemptId, stageName, numTasks, rddInfos, parentIds, details,
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     stageInfo.submissionTime = submissionTime
     stageInfo.completionTime = completionTime
     stageInfo.failureReason = failureReason
@@ -893,6 +897,10 @@ private[spark] object JsonProtocol {
       case JNothing => 0
       case x => x.extract[Long]
     })
+    metrics.setPeakExecutionMemory((json \ "Peak Execution Memory") match {
+      case JNothing => 0
+      case x => x.extract[Long]
+    })
     metrics.setResultSize((json \ "Result Size").extract[Long])
     metrics.setJvmGCTime((json \ "JVM GC Time").extract[Long])
     metrics.setResultSerializationTime((json \ "Result Serialization Time").extract[Long])
@@ -974,10 +982,11 @@ private[spark] object JsonProtocol {
       case `fetchFailed` =>
         val blockManagerAddress = blockManagerIdFromJson(json \ "Block Manager Address")
         val shuffleId = (json \ "Shuffle ID").extract[Int]
-        val mapId = (json \ "Map ID").extract[Int]
+        val mapId = (json \ "Map ID").extract[Long]
+        val mapIndex = (json \ "Map Index").extract[Int]
         val reduceId = (json \ "Reduce ID").extract[Int]
         val message = jsonOption(json \ "Message").map(_.extract[String])
-        new FetchFailed(blockManagerAddress, shuffleId, mapId, reduceId,
+        new FetchFailed(blockManagerAddress, shuffleId, mapId, mapIndex, reduceId,
           message.getOrElse("Unknown reason"))
       case `exceptionFailure` =>
         val className = (json \ "Class Name").extract[String]
diff --git a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
index 2e517707ff774..51cd7d1284ff3 100644
--- a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
@@ -25,7 +25,8 @@ import scala.util.control.NonFatal
 
 import com.codahale.metrics.Timer
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.SparkEnv
+import org.apache.spark.internal.{config, Logging}
 
 /**
  * An event bus which posts events to its listeners.
@@ -37,6 +38,20 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
   // Marked `private[spark]` for access in tests.
   private[spark] def listeners = listenersPlusTimers.asScala.map(_._1).asJava
 
+  private lazy val env = SparkEnv.get
+
+  private lazy val logSlowEventEnabled = if (env != null) {
+    env.conf.get(config.LISTENER_BUS_LOG_SLOW_EVENT_ENABLED)
+  } else {
+    false
+  }
+
+  private lazy val logSlowEventThreshold = if (env != null) {
+    env.conf.get(config.LISTENER_BUS_LOG_SLOW_EVENT_TIME_THRESHOLD)
+  } else {
+    Long.MaxValue
+  }
+
   /**
    * Returns a CodaHale metrics Timer for measuring the listener's event processing time.
    * This method is intended to be overridden by subclasses.
@@ -95,6 +110,7 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
       } else {
         null
       }
+      lazy val listenerName = Utils.getFormattedClassName(listener)
       try {
         doPostEvent(listener, event)
         if (Thread.interrupted()) {
@@ -104,14 +120,17 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
         }
       } catch {
         case ie: InterruptedException =>
-          logError(s"Interrupted while posting to ${Utils.getFormattedClassName(listener)}.  " +
-            s"Removing that listener.", ie)
+          logError(s"Interrupted while posting to ${listenerName}. Removing that listener.", ie)
           removeListenerOnError(listener)
         case NonFatal(e) if !isIgnorableException(e) =>
-          logError(s"Listener ${Utils.getFormattedClassName(listener)} threw an exception", e)
+          logError(s"Listener ${listenerName} threw an exception", e)
       } finally {
         if (maybeTimerContext != null) {
-          maybeTimerContext.stop()
+          val elapsed = maybeTimerContext.stop()
+          if (logSlowEventEnabled && elapsed > logSlowEventThreshold) {
+            logInfo(s"Process of event ${event} by listener ${listenerName} took " +
+              s"${elapsed / 1000000000d}s.")
+          }
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/util/ManualClock.scala b/core/src/main/scala/org/apache/spark/util/ManualClock.scala
index e7a65d74a440e..36d6820eba239 100644
--- a/core/src/main/scala/org/apache/spark/util/ManualClock.scala
+++ b/core/src/main/scala/org/apache/spark/util/ManualClock.scala
@@ -17,11 +17,16 @@
 
 package org.apache.spark.util
 
+import java.util.concurrent.TimeUnit
+
 /**
  * A `Clock` whose time can be manually set and modified. Its reported time does not change
  * as time elapses, but only as its time is modified by callers. This is mainly useful for
  * testing.
  *
+ * For this implementation, `getTimeMillis()` and `nanoTime()` always return the same value
+ * (adjusted for the correct unit).
+ *
  * @param time initial time (in milliseconds since the epoch)
  */
 private[spark] class ManualClock(private var time: Long) extends Clock {
@@ -31,10 +36,11 @@ private[spark] class ManualClock(private var time: Long) extends Clock {
    */
   def this() = this(0L)
 
-  def getTimeMillis(): Long =
-    synchronized {
-      time
-    }
+  override def getTimeMillis(): Long = synchronized {
+    time
+  }
+
+  override def nanoTime(): Long = TimeUnit.MILLISECONDS.toNanos(getTimeMillis())
 
   /**
    * @param timeToSet new time (in milliseconds) that the clock should represent
@@ -56,7 +62,7 @@ private[spark] class ManualClock(private var time: Long) extends Clock {
    * @param targetTime block until the clock time is set or advanced to at least this time
    * @return current time reported by the clock when waiting finishes
    */
-  def waitTillTime(targetTime: Long): Long = synchronized {
+  override def waitTillTime(targetTime: Long): Long = synchronized {
     while (time < targetTime) {
       wait(10)
     }
diff --git a/core/src/main/scala/org/apache/spark/util/NextIterator.scala b/core/src/main/scala/org/apache/spark/util/NextIterator.scala
index 0b505a576768c..0e289025da110 100644
--- a/core/src/main/scala/org/apache/spark/util/NextIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/NextIterator.scala
@@ -50,7 +50,7 @@ private[spark] abstract class NextIterator[U] extends Iterator[U] {
    * Ideally you should have another try/catch, as in HadoopRDD, that
    * ensures any resources are closed should iteration fail.
    */
-  protected def close()
+  protected def close(): Unit
 
   /**
    * Calls the subclass-defined close method, but only once.
@@ -58,7 +58,7 @@ private[spark] abstract class NextIterator[U] extends Iterator[U] {
    * Usually calling `close` multiple times should be fine, but historically
    * there have been issues with some InputFormats throwing exceptions.
    */
-  def closeIfNeeded() {
+  def closeIfNeeded(): Unit = {
     if (!closed) {
       // Note: it's important that we set closed = true before calling close(), since setting it
       // afterwards would permit us to call close() multiple times if close() threw an exception.
diff --git a/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala b/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
index c105f3229af09..f01645d82303e 100644
--- a/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
+++ b/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
@@ -24,7 +24,6 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
-import org.apache.spark.storage.StorageLevel
 
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala
index 3354a923273ff..42d7f71404594 100644
--- a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala
+++ b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala
@@ -20,7 +20,14 @@ import java.io.{ObjectInputStream, ObjectOutputStream}
 
 import org.apache.hadoop.conf.Configuration
 
-private[spark]
+import org.apache.spark.annotation.{DeveloperApi, Unstable}
+
+/**
+ * Hadoop configuration but serializable. Use `value` to access the Hadoop configuration.
+ *
+ * @param value Hadoop configuration
+ */
+@DeveloperApi @Unstable
 class SerializableConfiguration(@transient var value: Configuration) extends Serializable {
   private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     out.defaultWriteObject()
diff --git a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
index b702838fa257f..4f1311224bb95 100644
--- a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
+++ b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
@@ -70,7 +70,7 @@ private[spark] object ShutdownHookManager extends Logging {
   }
 
   // Register the path to be deleted via shutdown hook
-  def registerShutdownDeleteDir(file: File) {
+  def registerShutdownDeleteDir(file: File): Unit = {
     val absolutePath = file.getAbsolutePath()
     shutdownDeletePaths.synchronized {
       shutdownDeletePaths += absolutePath
@@ -78,7 +78,7 @@ private[spark] object ShutdownHookManager extends Logging {
   }
 
   // Remove the path to be deleted via shutdown hook
-  def removeShutdownDeleteDir(file: File) {
+  def removeShutdownDeleteDir(file: File): Unit = {
     val absolutePath = file.getAbsolutePath()
     shutdownDeletePaths.synchronized {
       shutdownDeletePaths.remove(absolutePath)
@@ -120,7 +120,7 @@ private[spark] object ShutdownHookManager extends Logging {
   def inShutdown(): Boolean = {
     try {
       val hook = new Thread {
-        override def run() {}
+        override def run(): Unit = {}
       }
       // scalastyle:off runtimeaddshutdownhook
       Runtime.getRuntime.addShutdownHook(hook)
diff --git a/core/src/main/scala/org/apache/spark/util/SignalUtils.scala b/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
index 5a24965170cef..230195da2a121 100644
--- a/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
@@ -60,7 +60,7 @@ private[spark] object SignalUtils extends Logging {
     if (SystemUtils.IS_OS_UNIX) {
       try {
         val handler = handlers.getOrElseUpdate(signal, {
-          logInfo("Registered signal handler for " + signal)
+          logInfo("Registering signal handler for " + signal)
           new ActionHandler(new Signal(signal))
         })
         handler.register(action)
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 09c69f5c68b03..85e1119569ce2 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -107,7 +107,7 @@ object SizeEstimator extends Logging {
 
   // Sets object size, pointer size based on architecture and CompressedOops settings
   // from the JVM.
-  private def initialize() {
+  private def initialize(): Unit = {
     val arch = System.getProperty("os.arch")
     is64bit = arch.contains("64") || arch.contains("s390x")
     isCompressedOops = getIsCompressedOops
@@ -171,7 +171,7 @@ object SizeEstimator extends Logging {
     val stack = new ArrayBuffer[AnyRef]
     var size = 0L
 
-    def enqueue(obj: AnyRef) {
+    def enqueue(obj: AnyRef): Unit = {
       if (obj != null && !visited.containsKey(obj)) {
         visited.put(obj, null)
         stack += obj
@@ -205,7 +205,7 @@ object SizeEstimator extends Logging {
     state.size
   }
 
-  private def visitSingleObject(obj: AnyRef, state: SearchState) {
+  private def visitSingleObject(obj: AnyRef, state: SearchState): Unit = {
     val cls = obj.getClass
     if (cls.isArray) {
       visitArray(obj, cls, state)
@@ -234,7 +234,7 @@ object SizeEstimator extends Logging {
   private val ARRAY_SIZE_FOR_SAMPLING = 400
   private val ARRAY_SAMPLE_SIZE = 100 // should be lower than ARRAY_SIZE_FOR_SAMPLING
 
-  private def visitArray(array: AnyRef, arrayClass: Class[_], state: SearchState) {
+  private def visitArray(array: AnyRef, arrayClass: Class[_], state: SearchState): Unit = {
     val length = ScalaRunTime.array_length(array)
     val elementClass = arrayClass.getComponentType()
 
@@ -326,7 +326,7 @@ object SizeEstimator extends Logging {
     val parent = getClassInfo(cls.getSuperclass)
     var shellSize = parent.shellSize
     var pointerFields = parent.pointerFields
-    val sizeCount = Array.fill(fieldSizes.max + 1)(0)
+    val sizeCount = Array.ofDim[Int](fieldSizes.max + 1)
 
     // iterate through the fields of this class and gather information.
     for (field <- cls.getDeclaredFields) {
diff --git a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
index 1b34fbde38cd6..e77128755363d 100644
--- a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
+++ b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
@@ -28,7 +28,7 @@ import org.apache.spark.internal.Logging
 private[spark] class SparkUncaughtExceptionHandler(val exitOnUncaughtException: Boolean = true)
   extends Thread.UncaughtExceptionHandler with Logging {
 
-  override def uncaughtException(thread: Thread, exception: Throwable) {
+  override def uncaughtException(thread: Thread, exception: Throwable): Unit = {
     try {
       // Make it explicit that uncaught exceptions are thrown when container is shutting down.
       // It will help users when they analyze the executor logs
@@ -48,15 +48,30 @@ private[spark] class SparkUncaughtExceptionHandler(val exitOnUncaughtException:
             System.exit(SparkExitCode.OOM)
           case _ if exitOnUncaughtException =>
             System.exit(SparkExitCode.UNCAUGHT_EXCEPTION)
+          case _ =>
+            // SPARK-30310: Don't System.exit() when exitOnUncaughtException is false
         }
       }
     } catch {
-      case oom: OutOfMemoryError => Runtime.getRuntime.halt(SparkExitCode.OOM)
-      case t: Throwable => Runtime.getRuntime.halt(SparkExitCode.UNCAUGHT_EXCEPTION_TWICE)
+      case oom: OutOfMemoryError =>
+        try {
+          logError(s"Uncaught OutOfMemoryError in thread $thread, process halted.", oom)
+        } catch {
+          // absorb any exception/error since we're halting the process
+          case _: Throwable =>
+        }
+        Runtime.getRuntime.halt(SparkExitCode.OOM)
+      case t: Throwable =>
+        try {
+          logError(s"Another uncaught exception in thread $thread, process halted.", t)
+        } catch {
+          case _: Throwable =>
+        }
+        Runtime.getRuntime.halt(SparkExitCode.UNCAUGHT_EXCEPTION_TWICE)
     }
   }
 
-  def uncaughtException(exception: Throwable) {
+  def uncaughtException(exception: Throwable): Unit = {
     uncaughtException(Thread.currentThread(), exception)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 8df331251c749..de39e4b410f25 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -18,23 +18,97 @@
 package org.apache.spark.util
 
 import java.util.concurrent._
+import java.util.concurrent.locks.ReentrantLock
 
-import scala.collection.TraversableLike
-import scala.collection.generic.CanBuildFrom
-import scala.language.higherKinds
-
-import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder}
 import scala.concurrent.{Awaitable, ExecutionContext, ExecutionContextExecutor, Future}
 import scala.concurrent.duration.{Duration, FiniteDuration}
+import scala.language.higherKinds
 import scala.util.control.NonFatal
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder
+
 import org.apache.spark.SparkException
 import org.apache.spark.rpc.RpcAbortException
 
 private[spark] object ThreadUtils {
 
   private val sameThreadExecutionContext =
-    ExecutionContext.fromExecutorService(MoreExecutors.sameThreadExecutor())
+    ExecutionContext.fromExecutorService(sameThreadExecutorService())
+
+  // Inspired by Guava MoreExecutors.sameThreadExecutor; inlined and converted
+  // to Scala here to avoid Guava version issues
+  def sameThreadExecutorService(): ExecutorService = new AbstractExecutorService {
+    private val lock = new ReentrantLock()
+    private val termination = lock.newCondition()
+    private var runningTasks = 0
+    private var serviceIsShutdown = false
+
+    override def shutdown(): Unit = {
+      lock.lock()
+      try {
+        serviceIsShutdown = true
+      } finally {
+        lock.unlock()
+      }
+    }
+
+    override def shutdownNow(): java.util.List[Runnable] = {
+      shutdown()
+      java.util.Collections.emptyList()
+    }
+
+    override def isShutdown: Boolean = {
+      lock.lock()
+      try {
+        serviceIsShutdown
+      } finally {
+        lock.unlock()
+      }
+    }
+
+    override def isTerminated: Boolean = synchronized {
+      lock.lock()
+      try {
+        serviceIsShutdown && runningTasks == 0
+      } finally {
+        lock.unlock()
+      }
+    }
+
+    override def awaitTermination(timeout: Long, unit: TimeUnit): Boolean = {
+      var nanos = unit.toNanos(timeout)
+      lock.lock()
+      try {
+        while (nanos > 0 && !isTerminated()) {
+          nanos = termination.awaitNanos(nanos)
+        }
+        isTerminated()
+      } finally {
+        lock.unlock()
+      }
+    }
+
+    override def execute(command: Runnable): Unit = {
+      lock.lock()
+      try {
+        if (isShutdown()) throw new RejectedExecutionException("Executor already shutdown")
+        runningTasks += 1
+      } finally {
+        lock.unlock()
+      }
+      try {
+        command.run()
+      } finally {
+        lock.lock()
+        try {
+          runningTasks -= 1
+          if (isTerminated()) termination.signalAll()
+        } finally {
+          lock.unlock()
+        }
+      }
+    }
+  }
 
   /**
    * An `ExecutionContextExecutor` that runs each task in the thread that invokes `execute/submit`.
@@ -275,13 +349,7 @@ private[spark] object ThreadUtils {
    * @return new collection in which each element was given from the input collection `in` by
    *         applying the lambda function `f`.
    */
-  def parmap[I, O, Col[X] <: TraversableLike[X, Col[X]]]
-      (in: Col[I], prefix: String, maxThreads: Int)
-      (f: I => O)
-      (implicit
-        cbf: CanBuildFrom[Col[I], Future[O], Col[Future[O]]], // For in.map
-        cbf2: CanBuildFrom[Col[Future[O]], O, Col[O]] // for Future.sequence
-      ): Col[O] = {
+  def parmap[I, O](in: Seq[I], prefix: String, maxThreads: Int)(f: I => O): Seq[O] = {
     val pool = newForkJoinPool(prefix, maxThreads)
     try {
       implicit val ec = ExecutionContext.fromExecutor(pool)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 9c1f21fa236ba..297cc5e4cb100 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -28,7 +28,7 @@ import java.nio.channels.{Channels, FileChannel, WritableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.security.SecureRandom
-import java.util.{Locale, Properties, Random, UUID}
+import java.util.{Arrays, Locale, Properties, Random, UUID}
 import java.util.concurrent._
 import java.util.concurrent.TimeUnit.NANOSECONDS
 import java.util.zip.GZIPInputStream
@@ -45,9 +45,9 @@ import scala.util.matching.Regex
 
 import _root_.io.netty.channel.unix.Errors.NativeIoException
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
-import com.google.common.hash.HashCodes
 import com.google.common.io.{ByteStreams, Files => GFiles}
 import com.google.common.net.InetAddresses
+import org.apache.commons.codec.binary.Hex
 import org.apache.commons.lang3.SystemUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
@@ -95,7 +95,7 @@ private[spark] object Utils extends Logging {
    */
   val DEFAULT_DRIVER_MEM_MB = JavaUtils.DEFAULT_DRIVER_MEM_MB.toInt
 
-  private val MAX_DIR_CREATION_ATTEMPTS: Int = 10
+  val MAX_DIR_CREATION_ATTEMPTS: Int = 10
   @volatile private var localRootDirs: Array[String] = null
 
   /** Scheme used for files that are locally available on worker nodes in the cluster. */
@@ -731,7 +731,7 @@ private[spark] object Utils extends Logging {
       case "file" =>
         // In the case of a local file, copy the local file to the target directory.
         // Note the difference between uri vs url.
-        val sourceFile = if (uri.isAbsolute) new File(uri) else new File(url)
+        val sourceFile = if (uri.isAbsolute) new File(uri) else new File(uri.getPath)
         copyFile(url, sourceFile, targetFile, fileOverwrite)
       case _ =>
         val fs = getHadoopFileSystem(uri, hadoopConf)
@@ -999,7 +999,7 @@ private[spark] object Utils extends Logging {
    * Allow setting a custom host name because when we run on Mesos we need to use the same
    * hostname it reports to the master.
    */
-  def setCustomHostname(hostname: String) {
+  def setCustomHostname(hostname: String): Unit = {
     // DEBUG code
     Utils.checkHost(hostname)
     customHostname = Some(hostname)
@@ -1026,11 +1026,11 @@ private[spark] object Utils extends Logging {
     customHostname.getOrElse(InetAddresses.toUriString(localIpAddress))
   }
 
-  def checkHost(host: String) {
+  def checkHost(host: String): Unit = {
     assert(host != null && host.indexOf(':') == -1, s"Expected hostname (not IP) but got $host")
   }
 
-  def checkHostPort(hostPort: String) {
+  def checkHostPort(hostPort: String): Unit = {
     assert(hostPort != null && hostPort.indexOf(':') != -1,
       s"Expected host and port but got $hostPort")
   }
@@ -1280,7 +1280,7 @@ private[spark] object Utils extends Logging {
       inputStream: InputStream,
       processLine: String => Unit): Thread = {
     val t = new Thread(threadName) {
-      override def run() {
+      override def run(): Unit = {
         for (line <- Source.fromInputStream(inputStream).getLines()) {
           processLine(line)
         }
@@ -1297,7 +1297,7 @@ private[spark] object Utils extends Logging {
    *
    * NOTE: This method is to be called by the spark-started JVM process.
    */
-  def tryOrExit(block: => Unit) {
+  def tryOrExit(block: => Unit): Unit = {
     try {
       block
     } catch {
@@ -1314,7 +1314,7 @@ private[spark] object Utils extends Logging {
    * user-started JVM process completely; in contrast, tryOrExit is to be called in the
    * spark-started JVM process .
    */
-  def tryOrStopSparkContext(sc: SparkContext)(block: => Unit) {
+  def tryOrStopSparkContext(sc: SparkContext)(block: => Unit): Unit = {
     try {
       block
     } catch {
@@ -1352,7 +1352,7 @@ private[spark] object Utils extends Logging {
   }
 
   /** Executes the given block. Log non-fatal errors if any, and only throw fatal errors */
-  def tryLogNonFatalError(block: => Unit) {
+  def tryLogNonFatalError(block: => Unit): Unit = {
     try {
       block
     } catch {
@@ -1671,7 +1671,7 @@ private[spark] object Utils extends Logging {
     var inSingleQuote = false
     var inDoubleQuote = false
     val curWord = new StringBuilder
-    def endWord() {
+    def endWord(): Unit = {
       buf += curWord.toString
       curWord.clear()
     }
@@ -1744,34 +1744,6 @@ private[spark] object Utils extends Logging {
     hashAbs
   }
 
-  /**
-   * NaN-safe version of `java.lang.Double.compare()` which allows NaN values to be compared
-   * according to semantics where NaN == NaN and NaN is greater than any non-NaN double.
-   */
-  def nanSafeCompareDoubles(x: Double, y: Double): Int = {
-    val xIsNan: Boolean = java.lang.Double.isNaN(x)
-    val yIsNan: Boolean = java.lang.Double.isNaN(y)
-    if ((xIsNan && yIsNan) || (x == y)) 0
-    else if (xIsNan) 1
-    else if (yIsNan) -1
-    else if (x > y) 1
-    else -1
-  }
-
-  /**
-   * NaN-safe version of `java.lang.Float.compare()` which allows NaN values to be compared
-   * according to semantics where NaN == NaN and NaN is greater than any non-NaN float.
-   */
-  def nanSafeCompareFloats(x: Float, y: Float): Int = {
-    val xIsNan: Boolean = java.lang.Float.isNaN(x)
-    val yIsNan: Boolean = java.lang.Float.isNaN(y)
-    if ((xIsNan && yIsNan) || (x == y)) 0
-    else if (xIsNan) 1
-    else if (yIsNan) -1
-    else if (x > y) 1
-    else -1
-  }
-
   /**
    * Returns the system properties map that is thread-safe to iterator over. It gets the
    * properties which have been set explicitly, as well as those for which only a default value
@@ -1840,14 +1812,14 @@ private[spark] object Utils extends Logging {
    * Generate a zipWithIndex iterator, avoid index value overflowing problem
    * in scala's zipWithIndex
    */
-  def getIteratorZipWithIndex[T](iterator: Iterator[T], startIndex: Long): Iterator[(T, Long)] = {
+  def getIteratorZipWithIndex[T](iter: Iterator[T], startIndex: Long): Iterator[(T, Long)] = {
     new Iterator[(T, Long)] {
       require(startIndex >= 0, "startIndex should be >= 0.")
       var index: Long = startIndex - 1L
-      def hasNext: Boolean = iterator.hasNext
+      def hasNext: Boolean = iter.hasNext
       def next(): (T, Long) = {
         index += 1L
-        (iterator.next(), index)
+        (iter.next(), index)
       }
     }
   }
@@ -2342,7 +2314,7 @@ private[spark] object Utils extends Logging {
   /**
    * configure a new log4j level
    */
-  def setLogLevel(l: org.apache.log4j.Level) {
+  def setLogLevel(l: org.apache.log4j.Level): Unit = {
     val rootLogger = org.apache.log4j.Logger.getRootLogger()
     rootLogger.setLevel(l)
     // Setting threshold to null as rootLevel will define log level for spark-shell
@@ -2838,7 +2810,7 @@ private[spark] object Utils extends Logging {
     val rnd = new SecureRandom()
     val secretBytes = new Array[Byte](bits / JByte.SIZE)
     rnd.nextBytes(secretBytes)
-    HashCodes.fromBytes(secretBytes).toString()
+    Hex.encodeHexString(secretBytes)
   }
 
   /**
@@ -2950,6 +2922,13 @@ private[spark] object Utils extends Logging {
     val codec = codecFactory.getCodec(path)
     codec == null || codec.isInstanceOf[SplittableCompressionCodec]
   }
+
+  /** Create a new properties object with the same values as `props` */
+  def cloneProperties(props: Properties): Properties = {
+    val resultProps = new Properties()
+    props.forEach((k, v) => resultProps.put(k, v))
+    resultProps
+  }
 }
 
 private[util] object CallerContext extends Logging {
@@ -3033,7 +3012,8 @@ private[spark] class CallerContext(
     if (CallerContext.callerContextSupported) {
       try {
         val callerContext = Utils.classForName("org.apache.hadoop.ipc.CallerContext")
-        val builder = Utils.classForName("org.apache.hadoop.ipc.CallerContext$Builder")
+        val builder: Class[AnyRef] =
+          Utils.classForName("org.apache.hadoop.ipc.CallerContext$Builder")
         val builderInst = builder.getConstructor(classOf[String]).newInstance(context)
         val hdfsContext = builder.getMethod("build").invoke(builderInst)
         callerContext.getMethod("setCurrent", callerContext).invoke(null, hdfsContext)
@@ -3056,7 +3036,7 @@ private[spark] class RedirectThread(
   extends Thread(name) {
 
   setDaemon(true)
-  override def run() {
+  override def run(): Unit = {
     scala.util.control.Exception.ignoring(classOf[IOException]) {
       // FIXME: We copy the stream on the level of bytes to avoid encoding problems.
       Utils.tryWithSafeFinally {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
index bcb95b416dd25..46e311d8b0476 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
@@ -198,7 +198,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64)
   override def size: Int = curSize
 
   /** Increase table size by 1, rehashing if necessary */
-  private def incrementSize() {
+  private def incrementSize(): Unit = {
     curSize += 1
     if (curSize > growThreshold) {
       growTable()
@@ -211,7 +211,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64)
   private def rehash(h: Int): Int = Hashing.murmur3_32().hashInt(h).asInt()
 
   /** Double the table's size and re-hash everything */
-  protected def growTable() {
+  protected def growTable(): Unit = {
     // capacity < MAXIMUM_CAPACITY (2 ^ 29) so capacity * 2 won't overflow
     val newCapacity = capacity * 2
     require(newCapacity <= MAXIMUM_CAPACITY, s"Can't contain more than ${growThreshold} elements")
diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
index e63e0e3e1f68f..098f389829ec5 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
@@ -150,12 +150,12 @@ class BitSet(numBits: Int) extends Serializable {
    * Sets the bit at the specified index to true.
    * @param index the bit index
    */
-  def set(index: Int) {
+  def set(index: Int): Unit = {
     val bitmask = 1L << (index & 0x3f)  // mod 64 and shift
     words(index >> 6) |= bitmask        // div by 64 and mask
   }
 
-  def unset(index: Int) {
+  def unset(index: Int): Unit = {
     val bitmask = 1L << (index & 0x3f)  // mod 64 and shift
     words(index >> 6) &= ~bitmask        // div by 64 and mask
   }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala
index 5d3693190cc1f..9d5f1aac3391b 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/CompactBuffer.scala
@@ -112,8 +112,6 @@ private[spark] class CompactBuffer[T: ClassTag] extends Seq[T] with Serializable
 
   override def length: Int = curSize
 
-  override def size: Int = curSize
-
   override def iterator: Iterator[T] = new Iterator[T] {
     private var pos = 0
     override def hasNext: Boolean = pos < curSize
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 1ba3b7875f8dc..7f40b469a95e9 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -367,7 +367,7 @@ class ExternalAppendOnlyMap[K, V, C](
     private def removeFromBuffer[T](buffer: ArrayBuffer[T], index: Int): T = {
       val elem = buffer(index)
       buffer(index) = buffer(buffer.size - 1)  // This also works if index == buffer.size - 1
-      buffer.reduceToSize(buffer.size - 1)
+      buffer.trimEnd(1)
       elem
     }
 
@@ -549,7 +549,7 @@ class ExternalAppendOnlyMap[K, V, C](
       item
     }
 
-    private def cleanup() {
+    private def cleanup(): Unit = {
       batchIndex = batchOffsets.length  // Prevent reading any other batch
       if (deserializeStream != null) {
         deserializeStream.close()
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 7a822e137e556..cc97bbfa7201f 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -23,7 +23,7 @@ import java.util.Comparator
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import com.google.common.io.{ByteStreams, Closeables}
+import com.google.common.io.ByteStreams
 
 import org.apache.spark._
 import org.apache.spark.executor.ShuffleWriteMetrics
@@ -534,7 +534,7 @@ private[spark] class ExternalSorter[K, V, C](
      * Update partitionId if we have reached the end of our current partition, possibly skipping
      * empty partitions on the way.
      */
-    private def skipToNextPartition() {
+    private def skipToNextPartition(): Unit = {
       while (partitionId < numPartitions &&
           indexInPartition == spill.elementsPerPartition(partitionId)) {
         partitionId += 1
@@ -605,7 +605,7 @@ private[spark] class ExternalSorter[K, V, C](
     }
 
     // Clean up our open streams and put us in a state where we can't read any more data
-    def cleanup() {
+    def cleanup(): Unit = {
       batchId = batchOffsets.length  // Prevent reading any other batch
       val ds = deserializeStream
       deserializeStream = null
@@ -727,7 +727,7 @@ private[spark] class ExternalSorter[K, V, C](
    */
   def writePartitionedMapOutput(
       shuffleId: Int,
-      mapId: Int,
+      mapId: Long,
       mapOutputWriter: ShuffleMapOutputWriter): Unit = {
     var nextPartitionId = 0
     if (spills.isEmpty) {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
index 10ab0b3f89964..1200ac001cce7 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
@@ -76,7 +76,7 @@ class OpenHashMap[K : ClassTag, @specialized(Long, Int, Double) V: ClassTag](
   }
 
   /** Set the value for a key */
-  def update(k: K, v: V) {
+  def update(k: K, v: V): Unit = {
     if (k == null) {
       haveNullValue = true
       nullValue = v
diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
index 8883e17bf3164..6815e47a198d9 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
@@ -113,7 +113,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
    * Add an element to the set. If the set is over capacity after the insertion, grow the set
    * and rehash all elements.
    */
-  def add(k: T) {
+  def add(k: T): Unit = {
     addWithoutResize(k)
     rehashIfNeeded(k, grow, move)
   }
@@ -166,7 +166,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
    * @param moveFunc Callback invoked when we move the key from one position (in the old data array)
    *                 to a new position (in the new data array).
    */
-  def rehashIfNeeded(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit) {
+  def rehashIfNeeded(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit): Unit = {
     if (_size > _growThreshold) {
       rehash(k, allocateFunc, moveFunc)
     }
@@ -227,7 +227,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
    * @param moveFunc Callback invoked when we move the key from one position (in the old data array)
    *                 to a new position (in the new data array).
    */
-  private def rehash(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit) {
+  private def rehash(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit): Unit = {
     val newCapacity = _capacity * 2
     require(newCapacity > 0 && newCapacity <= OpenHashSet.MAX_CAPACITY,
       s"Can't contain more than ${(loadFactor * OpenHashSet.MAX_CAPACITY).toInt} elements")
@@ -320,8 +320,8 @@ object OpenHashSet {
     override def hash(o: Float): Int = java.lang.Float.floatToIntBits(o)
   }
 
-  private def grow1(newSize: Int) {}
-  private def move1(oldPos: Int, newPos: Int) { }
+  private def grow1(newSize: Int): Unit = {}
+  private def move1(oldPos: Int, newPos: Int): Unit = { }
 
   private val grow = grow1 _
   private val move = move1 _
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
index b4ec4ea521253..7a50d851941ee 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
@@ -66,7 +66,7 @@ class PrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag,
   }
 
   /** Set the value for a key */
-  def update(k: K, v: V) {
+  def update(k: K, v: V): Unit = {
     val pos = _keySet.addWithoutResize(k) & OpenHashSet.POSITION_MASK
     _values(pos) = v
     _keySet.rehashIfNeeded(k, grow, move)
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
index 9a7a5a4e74868..582bd124b5116 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
@@ -87,7 +87,7 @@ class KVArraySortDataFormat[K, T <: AnyRef : ClassTag] extends SortDataFormat[K,
 
   override def getKey(data: Array[T], pos: Int): K = data(2 * pos).asInstanceOf[K]
 
-  override def swap(data: Array[T], pos0: Int, pos1: Int) {
+  override def swap(data: Array[T], pos0: Int, pos1: Int): Unit = {
     val tmpKey = data(2 * pos0)
     val tmpVal = data(2 * pos0 + 1)
     data(2 * pos0) = data(2 * pos1)
@@ -96,12 +96,13 @@ class KVArraySortDataFormat[K, T <: AnyRef : ClassTag] extends SortDataFormat[K,
     data(2 * pos1 + 1) = tmpVal
   }
 
-  override def copyElement(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int) {
+  override def copyElement(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int): Unit = {
     dst(2 * dstPos) = src(2 * srcPos)
     dst(2 * dstPos + 1) = src(2 * srcPos + 1)
   }
 
-  override def copyRange(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int, length: Int) {
+  override def copyRange(src: Array[T], srcPos: Int,
+      dst: Array[T], dstPos: Int, length: Int): Unit = {
     System.arraycopy(src, 2 * srcPos, dst, 2 * dstPos, 2 * length)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
index bfc0face5d8e5..1983b0002853d 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
@@ -141,7 +141,7 @@ private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)
    *
    * @param size number of bytes spilled
    */
-  @inline private def logSpillage(size: Long) {
+  @inline private def logSpillage(size: Long): Unit = {
     val threadId = Thread.currentThread().getId
     logInfo("Thread %d spilling in-memory map of %s to disk (%d time%s so far)"
       .format(threadId, org.apache.spark.util.Utils.bytesToString(size),
diff --git a/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala b/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
index da8d58d05b6b9..9624b02cb407c 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
@@ -19,7 +19,6 @@ package org.apache.spark.util.collection
 
 import java.util.Comparator
 
-import org.apache.spark.storage.DiskBlockObjectWriter
 
 /**
  * A common interface for size-tracking collections of key-value pairs that
diff --git a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
index c4540433bce97..4c1b49762ace3 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
@@ -18,15 +18,18 @@
 package org.apache.spark.util.logging
 
 import java.io._
+import java.util.EnumSet
 import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
 import org.apache.hadoop.fs.permission.FsPermission
+import org.apache.hadoop.hdfs.client.HdfsDataOutputStream
 import org.apache.log4j.{FileAppender => Log4jFileAppender, _}
 
 import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.JavaUtils
@@ -111,7 +114,8 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
         + DriverLogger.DRIVER_LOG_FILE_SUFFIX).getAbsolutePath()
       try {
         inStream = new BufferedInputStream(new FileInputStream(localLogFile))
-        outputStream = fileSystem.create(new Path(dfsLogFile), true)
+        outputStream = SparkHadoopUtil.createFile(fileSystem, new Path(dfsLogFile),
+          conf.get(DRIVER_LOG_ALLOW_EC))
         fileSystem.setPermission(new Path(dfsLogFile), LOG_FILE_PERMISSIONS)
       } catch {
         case e: Exception =>
@@ -131,12 +135,20 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
       }
       try {
         var remaining = inStream.available()
+        val hadData = remaining > 0
         while (remaining > 0) {
           val read = inStream.read(tmpBuffer, 0, math.min(remaining, UPLOAD_CHUNK_SIZE))
           outputStream.write(tmpBuffer, 0, read)
           remaining -= read
         }
-        outputStream.hflush()
+        if (hadData) {
+          outputStream match {
+            case hdfsStream: HdfsDataOutputStream =>
+              hdfsStream.hsync(EnumSet.allOf(classOf[HdfsDataOutputStream.SyncFlag]))
+            case other =>
+              other.hflush()
+          }
+        }
       } catch {
         case e: Exception => logError("Failed writing driver logs to dfs", e)
       }
diff --git a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
index 3188e0bd2b70d..7107be25eb505 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
@@ -34,7 +34,7 @@ private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSi
   // Thread that reads the input stream and writes to file
   private val writingThread = new Thread("File appending thread for " + file) {
     setDaemon(true)
-    override def run() {
+    override def run(): Unit = {
       Utils.logUncaughtExceptions {
         appendStreamToFile()
       }
@@ -46,17 +46,17 @@ private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSi
    * Wait for the appender to stop appending, either because input stream is closed
    * or because of any error in appending
    */
-  def awaitTermination() {
+  def awaitTermination(): Unit = {
     writingThread.join()
   }
 
   /** Stop the appender */
-  def stop() {
+  def stop(): Unit = {
     markedForStop = true
   }
 
   /** Continuously read chunks from the input stream and append to the file */
-  protected def appendStreamToFile() {
+  protected def appendStreamToFile(): Unit = {
     try {
       logDebug("Started appending thread")
       Utils.tryWithSafeFinally {
@@ -85,7 +85,7 @@ private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSi
   }
 
   /** Append bytes to the file output stream */
-  protected def appendToFile(bytes: Array[Byte], len: Int) {
+  protected def appendToFile(bytes: Array[Byte], len: Int): Unit = {
     if (outputStream == null) {
       openFile()
     }
@@ -93,13 +93,13 @@ private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSi
   }
 
   /** Open the file output stream */
-  protected def openFile() {
+  protected def openFile(): Unit = {
     outputStream = new FileOutputStream(file, true)
     logDebug(s"Opened file $file")
   }
 
   /** Close the file output stream */
-  protected def closeFile() {
+  protected def closeFile(): Unit = {
     outputStream.flush()
     outputStream.close()
     logDebug(s"Closed file $file")
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
index 59439b68792e5..b73f422649312 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
@@ -49,12 +49,12 @@ private[spark] class RollingFileAppender(
   private val enableCompression = conf.get(config.EXECUTOR_LOGS_ROLLING_ENABLE_COMPRESSION)
 
   /** Stop the appender */
-  override def stop() {
+  override def stop(): Unit = {
     super.stop()
   }
 
   /** Append bytes to file after rolling over is necessary */
-  override protected def appendToFile(bytes: Array[Byte], len: Int) {
+  override protected def appendToFile(bytes: Array[Byte], len: Int): Unit = {
     if (rollingPolicy.shouldRollover(len)) {
       rollover()
       rollingPolicy.rolledOver()
@@ -64,7 +64,7 @@ private[spark] class RollingFileAppender(
   }
 
   /** Rollover the file, by closing the output stream and moving it over */
-  private def rollover() {
+  private def rollover(): Unit = {
     try {
       closeFile()
       moveFile()
@@ -106,7 +106,7 @@ private[spark] class RollingFileAppender(
   }
 
   /** Move the active log file to a new rollover file */
-  private def moveFile() {
+  private def moveFile(): Unit = {
     val rolloverSuffix = rollingPolicy.generateRolledOverFileSuffix()
     val rolloverFile = new File(
       activeFile.getParentFile, activeFile.getName + rolloverSuffix).getAbsoluteFile
@@ -138,7 +138,7 @@ private[spark] class RollingFileAppender(
   }
 
   /** Retain only last few files */
-  private[util] def deleteOldFiles() {
+  private[util] def deleteOldFiles(): Unit = {
     try {
       val rolledoverFiles = activeFile.getParentFile.listFiles(new FileFilter {
         def accept(f: File): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
index 1f263df57c857..5327ecd3e56a9 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
@@ -67,12 +67,12 @@ private[spark] class TimeBasedRollingPolicy(
   }
 
   /** Rollover has occurred, so find the next time to rollover */
-  def rolledOver() {
+  def rolledOver(): Unit = {
     nextRolloverTime = calculateNextRolloverTime()
     logDebug(s"Current time: ${System.currentTimeMillis}, next rollover time: " + nextRolloverTime)
   }
 
-  def bytesWritten(bytes: Long) { }  // nothing to do
+  def bytesWritten(bytes: Long): Unit = { }  // nothing to do
 
   private def calculateNextRolloverTime(): Long = {
     val now = System.currentTimeMillis()
@@ -118,12 +118,12 @@ private[spark] class SizeBasedRollingPolicy(
   }
 
   /** Rollover has occurred, so reset the counter */
-  def rolledOver() {
+  def rolledOver(): Unit = {
     bytesWrittenSinceRollover = 0
   }
 
   /** Increment the bytes that have been written in the current file */
-  def bytesWritten(bytes: Long) {
+  def bytesWritten(bytes: Long): Unit = {
     bytesWrittenSinceRollover += bytes
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
index 70554f1d03067..6dd2beebbb3dc 100644
--- a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
@@ -201,7 +201,7 @@ class PoissonSampler[T](
   private val rng = new PoissonDistribution(if (fraction > 0.0) fraction else 1.0)
   private val rngGap = RandomSampler.newDefaultRNG
 
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
     rngGap.setSeed(seed)
   }
diff --git a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
index af09e50a157ae..313569a81646d 100644
--- a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
@@ -49,7 +49,7 @@ private[spark] class XORShiftRandom(init: Long) extends JavaRandom(init) {
     (nextSeed & ((1L << bits) -1)).asInstanceOf[Int]
   }
 
-  override def setSeed(s: Long) {
+  override def setSeed(s: Long): Unit = {
     seed = XORShiftRandom.hashSeed(s)
   }
 }
@@ -60,7 +60,7 @@ private[spark] object XORShiftRandom {
   /** Hash seeds to have 0/1 bits throughout. */
   private[random] def hashSeed(seed: Long): Long = {
     val bytes = ByteBuffer.allocate(java.lang.Long.BYTES).putLong(seed).array()
-    val lowBits = MurmurHash3.bytesHash(bytes)
+    val lowBits = MurmurHash3.bytesHash(bytes, MurmurHash3.arraySeed)
     val highBits = MurmurHash3.bytesHash(bytes, lowBits)
     (highBits.toLong << 32) | (lowBits.toLong & 0xFFFFFFFFL)
   }
diff --git a/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java b/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java
deleted file mode 100644
index 80cd70282a51d..0000000000000
--- a/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark;
-
-import org.apache.spark.api.java.JavaSparkContext;
-
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-
-public class ExecutorPluginSuite {
-  private static final String EXECUTOR_PLUGIN_CONF_NAME = "spark.executor.plugins";
-  private static final String testBadPluginName = TestBadShutdownPlugin.class.getName();
-  private static final String testPluginName = TestExecutorPlugin.class.getName();
-  private static final String testSecondPluginName = TestSecondPlugin.class.getName();
-
-  // Static value modified by testing plugins to ensure plugins loaded correctly.
-  public static int numSuccessfulPlugins = 0;
-
-  // Static value modified by testing plugins to verify plugins shut down properly.
-  public static int numSuccessfulTerminations = 0;
-
-  private JavaSparkContext sc;
-
-  @Before
-  public void setUp() {
-    sc = null;
-    numSuccessfulPlugins = 0;
-    numSuccessfulTerminations = 0;
-  }
-
-  @After
-  public void tearDown() {
-    if (sc != null) {
-      sc.stop();
-      sc = null;
-    }
-  }
-
-  private SparkConf initializeSparkConf(String pluginNames) {
-    return new SparkConf()
-        .setMaster("local")
-        .setAppName("test")
-        .set(EXECUTOR_PLUGIN_CONF_NAME, pluginNames);
-  }
-
-  @Test
-  public void testPluginClassDoesNotExist() {
-    SparkConf conf = initializeSparkConf("nonexistent.plugin");
-    try {
-      sc = new JavaSparkContext(conf);
-      fail("No exception thrown for nonexistent plugin");
-    } catch (Exception e) {
-      // We cannot catch ClassNotFoundException directly because Java doesn't think it'll be thrown
-      assertTrue(e.toString().startsWith("java.lang.ClassNotFoundException"));
-    }
-  }
-
-  @Test
-  public void testAddPlugin() throws InterruptedException {
-    // Load the sample TestExecutorPlugin, which will change the value of numSuccessfulPlugins
-    SparkConf conf = initializeSparkConf(testPluginName);
-    sc = new JavaSparkContext(conf);
-    assertEquals(1, numSuccessfulPlugins);
-    sc.stop();
-    sc = null;
-    assertEquals(1, numSuccessfulTerminations);
-  }
-
-  @Test
-  public void testAddMultiplePlugins() throws InterruptedException {
-    // Load two plugins and verify they both execute.
-    SparkConf conf = initializeSparkConf(testPluginName + "," + testSecondPluginName);
-    sc = new JavaSparkContext(conf);
-    assertEquals(2, numSuccessfulPlugins);
-    sc.stop();
-    sc = null;
-    assertEquals(2, numSuccessfulTerminations);
-  }
-
-  @Test
-  public void testPluginShutdownWithException() {
-    // Verify an exception in one plugin shutdown does not affect the others
-    String pluginNames = testPluginName + "," + testBadPluginName + "," + testPluginName;
-    SparkConf conf = initializeSparkConf(pluginNames);
-    sc = new JavaSparkContext(conf);
-    assertEquals(3, numSuccessfulPlugins);
-    sc.stop();
-    sc = null;
-    assertEquals(2, numSuccessfulTerminations);
-  }
-
-  public static class TestExecutorPlugin implements ExecutorPlugin {
-    public void init() {
-      ExecutorPluginSuite.numSuccessfulPlugins++;
-    }
-
-    public void shutdown() {
-      ExecutorPluginSuite.numSuccessfulTerminations++;
-    }
-  }
-
-  public static class TestSecondPlugin implements ExecutorPlugin {
-    public void init() {
-      ExecutorPluginSuite.numSuccessfulPlugins++;
-    }
-
-    public void shutdown() {
-      ExecutorPluginSuite.numSuccessfulTerminations++;
-    }
-  }
-
-  public static class TestBadShutdownPlugin implements ExecutorPlugin {
-    public void init() {
-      ExecutorPluginSuite.numSuccessfulPlugins++;
-    }
-
-    public void shutdown() {
-      throw new RuntimeException("This plugin will fail to cleanly shut down");
-    }
-  }
-}
diff --git a/core/src/test/java/org/apache/spark/io/GenericFileInputStreamSuite.java b/core/src/test/java/org/apache/spark/io/GenericFileInputStreamSuite.java
index 22db3592ecc96..8ff787975eaae 100644
--- a/core/src/test/java/org/apache/spark/io/GenericFileInputStreamSuite.java
+++ b/core/src/test/java/org/apache/spark/io/GenericFileInputStreamSuite.java
@@ -48,8 +48,12 @@ public void setUp() throws IOException {
   }
 
   @After
-  public void tearDown() {
+  public void tearDown() throws IOException {
     inputFile.delete();
+
+    for (InputStream is : inputStreams) {
+      is.close();
+    }
   }
 
   @Test
@@ -141,4 +145,15 @@ public void testBytesSkippedAfterEOF() throws IOException {
       assertEquals(-1, inputStream.read());
     }
   }
+
+  @Test
+  public void testReadPastEOF() throws IOException {
+    InputStream is = inputStreams[0];
+    byte[] buf = new byte[1024];
+    int read;
+    while ((read = is.read(buf, 0, buf.length)) != -1);
+
+    int readAfterEOF = is.read(buf, 0, buf.length);
+    assertEquals(-1, readAfterEOF);
+  }
 }
diff --git a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
index 773c390175b6d..fb8523856da6f 100644
--- a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
+++ b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
@@ -323,7 +323,7 @@ public static class InProcessTestApp {
 
     public static void main(String[] args) throws Exception {
       assertNotEquals(0, args.length);
-      assertEquals(args[0], "hello");
+      assertEquals("hello", args[0]);
       new SparkContext().stop();
 
       synchronized (LOCK) {
@@ -340,7 +340,7 @@ public static class ErrorInProcessTestApp {
 
     public static void main(String[] args) {
       assertNotEquals(0, args.length);
-      assertEquals(args[0], "hello");
+      assertEquals("hello", args[0]);
       throw DUMMY_EXCEPTION;
     }
   }
diff --git a/core/src/test/java/org/apache/spark/resource/JavaResourceProfileSuite.java b/core/src/test/java/org/apache/spark/resource/JavaResourceProfileSuite.java
new file mode 100644
index 0000000000000..bb413c00fb972
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/resource/JavaResourceProfileSuite.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource;
+
+import java.util.Map;
+
+import static org.junit.Assert.*;
+import org.junit.Test;
+
+// Test the ResourceProfile and Request api's from Java
+public class JavaResourceProfileSuite {
+
+  String GpuResource = "resource.gpu";
+  String FPGAResource = "resource.fpga";
+
+  @Test
+  public void testResourceProfileAccessFromJava() throws Exception {
+    ExecutorResourceRequests execReqGpu =
+      new ExecutorResourceRequests().resource(GpuResource, 2,"myscript", "");
+    ExecutorResourceRequests execReqFpga =
+      new ExecutorResourceRequests().resource(FPGAResource, 3, "myfpgascript", "nvidia");
+
+    ResourceProfileBuilder rprof = new ResourceProfileBuilder();
+    rprof.require(execReqGpu);
+    rprof.require(execReqFpga);
+    TaskResourceRequests taskReq1 = new TaskResourceRequests().resource(GpuResource, 1);
+    rprof.require(taskReq1);
+
+    assertEquals(rprof.executorResources().size(), 2);
+    Map<String, ExecutorResourceRequest> eresources = rprof.executorResourcesJMap();
+    assert(eresources.containsKey(GpuResource));
+    ExecutorResourceRequest gpuReq = eresources.get(GpuResource);
+    assertEquals(gpuReq.amount(), 2);
+    assertEquals(gpuReq.discoveryScript(), "myscript");
+    assertEquals(gpuReq.vendor(), "");
+
+    assert(eresources.containsKey(FPGAResource));
+    ExecutorResourceRequest fpgaReq = eresources.get(FPGAResource);
+    assertEquals(fpgaReq.amount(), 3);
+    assertEquals(fpgaReq.discoveryScript(), "myfpgascript");
+    assertEquals(fpgaReq.vendor(), "nvidia");
+
+    assertEquals(rprof.taskResources().size(), 1);
+    Map<String, TaskResourceRequest> tresources = rprof.taskResourcesJMap();
+    assert(tresources.containsKey(GpuResource));
+    TaskResourceRequest taskReq = tresources.get(GpuResource);
+    assertEquals(taskReq.amount(), 1.0, 0);
+    assertEquals(taskReq.resourceName(), GpuResource);
+  }
+}
+
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 6b83a984f037c..ee8e38c24b47f 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -19,8 +19,10 @@
 
 import java.io.*;
 import java.nio.ByteBuffer;
+import java.nio.file.Files;
 import java.util.*;
 
+import org.mockito.stubbing.Answer;
 import scala.Option;
 import scala.Product2;
 import scala.Tuple2;
@@ -28,7 +30,6 @@
 import scala.collection.Iterator;
 
 import com.google.common.collect.HashMultiset;
-import com.google.common.collect.Iterators;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -53,6 +54,7 @@
 import org.apache.spark.security.CryptoStreamUtils;
 import org.apache.spark.serializer.*;
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
+import org.apache.spark.shuffle.sort.io.LocalDiskShuffleExecutorComponents;
 import org.apache.spark.storage.*;
 import org.apache.spark.util.Utils;
 
@@ -65,6 +67,7 @@
 
 public class UnsafeShuffleWriterSuite {
 
+  static final int DEFAULT_INITIAL_SORT_BUFFER_SIZE = 4096;
   static final int NUM_PARTITITONS = 4;
   TestMemoryManager memoryManager;
   TaskMemoryManager taskMemoryManager;
@@ -131,15 +134,29 @@ public void setUp() throws IOException {
         );
       });
 
-    when(shuffleBlockResolver.getDataFile(anyInt(), anyInt())).thenReturn(mergedOutputFile);
-    doAnswer(invocationOnMock -> {
+    when(shuffleBlockResolver.getDataFile(anyInt(), anyLong())).thenReturn(mergedOutputFile);
+
+    Answer<?> renameTempAnswer = invocationOnMock -> {
       partitionSizesInMergedFile = (long[]) invocationOnMock.getArguments()[2];
       File tmp = (File) invocationOnMock.getArguments()[3];
-      mergedOutputFile.delete();
-      tmp.renameTo(mergedOutputFile);
+      if (!mergedOutputFile.delete()) {
+        throw new RuntimeException("Failed to delete old merged output file.");
+      }
+      if (tmp != null) {
+        Files.move(tmp.toPath(), mergedOutputFile.toPath());
+      } else if (!mergedOutputFile.createNewFile()) {
+        throw new RuntimeException("Failed to create empty merged output file.");
+      }
       return null;
-    }).when(shuffleBlockResolver)
-      .writeIndexFileAndCommit(anyInt(), anyInt(), any(long[].class), any(File.class));
+    };
+
+    doAnswer(renameTempAnswer)
+        .when(shuffleBlockResolver)
+        .writeIndexFileAndCommit(anyInt(), anyLong(), any(long[].class), any(File.class));
+
+    doAnswer(renameTempAnswer)
+        .when(shuffleBlockResolver)
+        .writeIndexFileAndCommit(anyInt(), anyLong(), any(long[].class), eq(null));
 
     when(diskBlockManager.createTempShuffleBlock()).thenAnswer(invocationOnMock -> {
       TempShuffleBlockId blockId = new TempShuffleBlockId(UUID.randomUUID());
@@ -151,21 +168,20 @@ public void setUp() throws IOException {
     when(taskContext.taskMetrics()).thenReturn(taskMetrics);
     when(shuffleDep.serializer()).thenReturn(serializer);
     when(shuffleDep.partitioner()).thenReturn(hashPartitioner);
+    when(taskContext.taskMemoryManager()).thenReturn(taskMemoryManager);
   }
 
-  private UnsafeShuffleWriter<Object, Object> createWriter(
-      boolean transferToEnabled) throws IOException {
+  private UnsafeShuffleWriter<Object, Object> createWriter(boolean transferToEnabled) {
     conf.set("spark.file.transferTo", String.valueOf(transferToEnabled));
     return new UnsafeShuffleWriter<>(
       blockManager,
-      shuffleBlockResolver,
       taskMemoryManager,
-      new SerializedShuffleHandle<>(0, 1, shuffleDep),
-      0, // map id
+      new SerializedShuffleHandle<>(0, shuffleDep),
+      0L, // map id
       taskContext,
       conf,
-      taskContext.taskMetrics().shuffleWriteMetrics()
-    );
+      taskContext.taskMetrics().shuffleWriteMetrics(),
+      new LocalDiskShuffleExecutorComponents(conf, blockManager, shuffleBlockResolver));
   }
 
   private void assertSpillFilesWereCleanedUp() {
@@ -232,7 +248,7 @@ class BadRecords extends scala.collection.AbstractIterator<Product2<Object, Obje
   @Test
   public void writeEmptyIterator() throws Exception {
     final UnsafeShuffleWriter<Object, Object> writer = createWriter(true);
-    writer.write(Iterators.emptyIterator());
+    writer.write(new ArrayList<Product2<Object, Object>>().iterator());
     final Option<MapStatus> mapStatus = writer.stop(true);
     assertTrue(mapStatus.isDefined());
     assertTrue(mergedOutputFile.exists());
@@ -391,7 +407,7 @@ public void mergeSpillsWithFileStreamAndCompressionAndEncryption() throws Except
 
   @Test
   public void mergeSpillsWithCompressionAndEncryptionSlowPath() throws Exception {
-    conf.set(package$.MODULE$.SHUFFLE_UNDAFE_FAST_MERGE_ENABLE(), false);
+    conf.set(package$.MODULE$.SHUFFLE_UNSAFE_FAST_MERGE_ENABLE(), false);
     testMergingSpills(false, LZ4CompressionCodec.class.getName(), true);
   }
 
@@ -444,10 +460,10 @@ public void writeEnoughRecordsToTriggerSortBufferExpansionAndSpillRadixOn() thro
   }
 
   private void writeEnoughRecordsToTriggerSortBufferExpansionAndSpill() throws Exception {
-    memoryManager.limit(UnsafeShuffleWriter.DEFAULT_INITIAL_SORT_BUFFER_SIZE * 16);
+    memoryManager.limit(DEFAULT_INITIAL_SORT_BUFFER_SIZE * 16);
     final UnsafeShuffleWriter<Object, Object> writer = createWriter(false);
     final ArrayList<Product2<Object, Object>> dataToWrite = new ArrayList<>();
-    for (int i = 0; i < UnsafeShuffleWriter.DEFAULT_INITIAL_SORT_BUFFER_SIZE + 1; i++) {
+    for (int i = 0; i < DEFAULT_INITIAL_SORT_BUFFER_SIZE + 1; i++) {
       dataToWrite.add(new Tuple2<>(i, i));
     }
     writer.write(dataToWrite.iterator());
@@ -516,16 +532,15 @@ public void testPeakMemoryUsed() throws Exception {
     final long numRecordsPerPage = pageSizeBytes / recordLengthBytes;
     taskMemoryManager = spy(taskMemoryManager);
     when(taskMemoryManager.pageSizeBytes()).thenReturn(pageSizeBytes);
-    final UnsafeShuffleWriter<Object, Object> writer =
-      new UnsafeShuffleWriter<>(
+    final UnsafeShuffleWriter<Object, Object> writer = new UnsafeShuffleWriter<>(
         blockManager,
-        shuffleBlockResolver,
         taskMemoryManager,
-        new SerializedShuffleHandle<>(0, 1, shuffleDep),
-        0, // map id
+        new SerializedShuffleHandle<>(0, shuffleDep),
+        0L, // map id
         taskContext,
         conf,
-        taskContext.taskMetrics().shuffleWriteMetrics());
+        taskContext.taskMetrics().shuffleWriteMetrics(),
+        new LocalDiskShuffleExecutorComponents(conf, blockManager, shuffleBlockResolver));
 
     // Peak memory should be monotonically increasing. More specifically, every time
     // we allocate a new page it should increase by exactly the size of the page.
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 8d03c6778e18b..6e995a3929a75 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -34,6 +34,7 @@
 import org.apache.spark.SparkConf;
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.memory.MemoryMode;
+import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TestMemoryConsumer;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.memory.TestMemoryManager;
@@ -691,13 +692,11 @@ public void avoidDeadlock() throws InterruptedException {
 
     Thread thread = new Thread(() -> {
       int i = 0;
-      long used = 0;
       while (i < 10) {
         c1.use(10000000);
-        used += 10000000;
         i++;
       }
-      c1.free(used);
+      c1.free(c1.getUsed());
     });
 
     try {
@@ -726,4 +725,22 @@ public void avoidDeadlock() throws InterruptedException {
     }
   }
 
+  @Test
+  public void freeAfterFailedReset() {
+    // SPARK-29244: BytesToBytesMap.free after a OOM reset operation should not cause failure.
+    memoryManager.limit(5000);
+    BytesToBytesMap map =
+      new BytesToBytesMap(taskMemoryManager, blockManager, serializerManager, 256, 0.5, 4000);
+    // Force OOM on next memory allocation.
+    memoryManager.markExecutionAsOutOfMemoryOnce();
+    try {
+      map.reset();
+      Assert.fail("Expected SparkOutOfMemoryError to be thrown");
+    } catch (SparkOutOfMemoryError e) {
+      // Expected exception; do nothing.
+    } finally {
+      map.free();
+    }
+  }
+
 }
diff --git a/core/src/test/java/org/apache/spark/util/SerializableConfigurationSuite.java b/core/src/test/java/org/apache/spark/util/SerializableConfigurationSuite.java
new file mode 100644
index 0000000000000..28d038a524c88
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/util/SerializableConfigurationSuite.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import static org.junit.Assert.assertEquals;
+
+
+public class SerializableConfigurationSuite {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "SerializableConfigurationSuite");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void testSerializableConfiguration() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
+    Configuration hadoopConfiguration = new Configuration(false);
+    hadoopConfiguration.set("test.property", "value");
+    SerializableConfiguration scs = new SerializableConfiguration(hadoopConfiguration);
+    SerializableConfiguration actual = rdd.map(val -> scs).collect().get(0);
+    assertEquals("value", actual.value().get("test.property"));
+  }
+}
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index c6aa623560d57..43977717f6c97 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -222,7 +222,7 @@ public void testSortingEmptyArrays() throws Exception {
   public void testSortTimeMetric() throws Exception {
     final UnsafeExternalSorter sorter = newSorter();
     long prevSortTime = sorter.getSortTimeNanos();
-    assertEquals(prevSortTime, 0);
+    assertEquals(0, prevSortTime);
 
     sorter.insertRecord(null, 0, 0, 0, false);
     sorter.spill();
@@ -230,11 +230,14 @@ public void testSortTimeMetric() throws Exception {
     prevSortTime = sorter.getSortTimeNanos();
 
     sorter.spill();  // no sort needed
-    assertEquals(sorter.getSortTimeNanos(), prevSortTime);
+    assertEquals(prevSortTime, sorter.getSortTimeNanos());
 
     sorter.insertRecord(null, 0, 0, 0, false);
     UnsafeSorterIterator iter = sorter.getSortedIterator();
     assertThat(sorter.getSortTimeNanos(), greaterThan(prevSortTime));
+
+    sorter.cleanupResources();
+    assertSpillFilesWereCleanedUp();
   }
 
   @Test
@@ -510,6 +513,8 @@ public void testGetIterator() throws Exception {
     verifyIntIterator(sorter.getIterator(79), 79, 300);
     verifyIntIterator(sorter.getIterator(139), 139, 300);
     verifyIntIterator(sorter.getIterator(279), 279, 300);
+    sorter.cleanupResources();
+    assertSpillFilesWereCleanedUp();
   }
 
   @Test
diff --git a/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java b/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java
index 62a0b85915efc..5ce7937c03de2 100644
--- a/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java
+++ b/core/src/test/java/test/org/apache/spark/JavaTaskContextCompileCheck.java
@@ -17,7 +17,10 @@
 
 package test.org.apache.spark;
 
+import java.util.Map;
+
 import org.apache.spark.TaskContext;
+import org.apache.spark.resource.ResourceInformation;
 import org.apache.spark.util.TaskCompletionListener;
 import org.apache.spark.util.TaskFailureListener;
 
@@ -40,7 +43,9 @@ public static void test() {
     tc.stageId();
     tc.stageAttemptNumber();
     tc.taskAttemptId();
+    // this returns a scala Map, so make sure the JMap version give a java type back
     tc.resources();
+    Map<String, ResourceInformation> resources = tc.resourcesJMap();
     tc.taskMetrics();
     tc.taskMemoryManager();
     tc.getLocalProperties();
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 435665d8a1ce2..a75cf3f0381df 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -126,7 +126,7 @@ private[spark] object AccumulatorSuite {
     sc.addSparkListener(listener)
     testBody
     // wait until all events have been processed before proceeding to assert things
-    sc.listenerBus.waitUntilEmpty(10 * 1000)
+    sc.listenerBus.waitUntilEmpty()
     val accums = listener.getCompletedStageInfos.flatMap(_.accumulables.values)
     val isSet = accums.exists { a =>
       a.name == Some(PEAK_EXECUTION_MEMORY) && a.value.exists(_.asInstanceOf[Long] > 0L)
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index 3a43f1a033da1..6a108a55045ee 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -24,6 +24,7 @@ import scala.reflect.ClassTag
 import com.google.common.io.ByteStreams
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.internal.config.CACHE_CHECKPOINT_PREFERRED_LOCS_EXPIRE_TIME
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.rdd._
@@ -584,7 +585,7 @@ object CheckpointSuite {
   }
 }
 
-class CheckpointCompressionSuite extends SparkFunSuite with LocalSparkContext {
+class CheckpointStorageSuite extends SparkFunSuite with LocalSparkContext {
 
   test("checkpoint compression") {
     withTempDir { checkpointDir =>
@@ -618,4 +619,27 @@ class CheckpointCompressionSuite extends SparkFunSuite with LocalSparkContext {
       assert(rdd.collect().toSeq === (1 to 20))
     }
   }
+
+  test("cache checkpoint preferred location") {
+    withTempDir { checkpointDir =>
+      val conf = new SparkConf()
+        .set(CACHE_CHECKPOINT_PREFERRED_LOCS_EXPIRE_TIME.key, "10")
+        .set(UI_ENABLED.key, "false")
+      sc = new SparkContext("local", "test", conf)
+      sc.setCheckpointDir(checkpointDir.toString)
+      val rdd = sc.makeRDD(1 to 20, numSlices = 1)
+      rdd.checkpoint()
+      assert(rdd.collect().toSeq === (1 to 20))
+
+      // Verify that RDD is checkpointed
+      assert(rdd.firstParent.isInstanceOf[ReliableCheckpointRDD[_]])
+      val checkpointedRDD = rdd.firstParent.asInstanceOf[ReliableCheckpointRDD[_]]
+      val partiton = checkpointedRDD.partitions(0)
+      assert(!checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partiton))
+
+      val preferredLoc = checkpointedRDD.preferredLocations(partiton)
+      assert(checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partiton))
+      assert(preferredLoc == checkpointedRDD.cachedPreferredLocations.get(partiton))
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index 6a30a1d32f8c6..92ed24408384f 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -97,7 +97,7 @@ abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[So
   }
 
   /** Run GC and make sure it actually has run */
-  protected def runGC() {
+  protected def runGC(): Unit = {
     val weakRef = new WeakReference(new Object())
     val startTimeNs = System.nanoTime()
     System.gc() // Make a best effort to run the garbage collection. It *usually* runs GC.
@@ -406,7 +406,7 @@ class CleanerTester(
   sc.cleaner.get.attachListener(cleanerListener)
 
   /** Assert that all the stuff has been cleaned up */
-  def assertCleanup()(implicit waitTimeout: PatienceConfiguration.Timeout) {
+  def assertCleanup()(implicit waitTimeout: PatienceConfiguration.Timeout): Unit = {
     try {
       eventually(waitTimeout, interval(100.milliseconds)) {
         assert(isAllCleanedUp,
@@ -419,7 +419,7 @@ class CleanerTester(
   }
 
   /** Verify that RDDs, shuffles, etc. occupy resources */
-  private def preCleanupValidate() {
+  private def preCleanupValidate(): Unit = {
     assert(rddIds.nonEmpty || shuffleIds.nonEmpty || broadcastIds.nonEmpty ||
       checkpointIds.nonEmpty, "Nothing to cleanup")
 
@@ -465,7 +465,7 @@ class CleanerTester(
    * Verify that RDDs, shuffles, etc. do not occupy resources. Tests multiple times as there is
    * as there is not guarantee on how long it will take clean up the resources.
    */
-  private def postCleanupValidate() {
+  private def postCleanupValidate(): Unit = {
     // Verify the RDDs have been persisted and blocks are present
     rddIds.foreach { rddId =>
       assert(
diff --git a/core/src/test/scala/org/apache/spark/DebugFilesystem.scala b/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
index a5bdc95790722..1d3e28b39548f 100644
--- a/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
+++ b/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
@@ -21,7 +21,6 @@ import java.io.{FileDescriptor, InputStream}
 import java.lang
 import java.nio.ByteBuffer
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.hadoop.fs._
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index aad20545bafbe..3f309819065be 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark
 
+import org.scalatest.Assertions._
 import org.scalatest.Matchers
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.time.{Millis, Span}
@@ -77,7 +78,7 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
 
   test("simple groupByKey") {
     sc = new SparkContext(clusterUrl, "test")
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)), 5)
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (2, 1)), 5)
     val groups = pairs.groupByKey(5).collect()
     assert(groups.size === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
@@ -339,6 +340,21 @@ class DistributedSuite extends SparkFunSuite with Matchers with LocalSparkContex
     }
   }
 
+  test("reference partitions inside a task") {
+    // Run a simple job which just makes sure there is no failure if we touch rdd.partitions
+    // inside a task.  This requires the stateLock to be serializable.  This is very convoluted
+    // use case, it's just a check for backwards-compatibility after the fix for SPARK-28917.
+    sc = new SparkContext("local-cluster[1,1,1024]", "test")
+    val rdd1 = sc.parallelize(1 to 10, 1)
+    val rdd2 = rdd1.map { x => x + 1}
+    // ensure we can force computation of rdd2.dependencies inside a task.  Just touching
+    // it will force computation and touching the stateLock.  The check for null is to just
+    // to make sure that we've setup our test correctly, and haven't precomputed dependencies
+    // in the driver
+    val dependencyComputeCount = rdd1.map { x => if (rdd2.dependencies == null) 1 else 0}.sum()
+    assert(dependencyComputeCount > 0)
+  }
+
 }
 
 object DistributedSuite {
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index 182f28c5cce54..f58777584d0ae 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -50,7 +50,7 @@ class DriverSuite extends SparkFunSuite with TimeLimits {
  * sys.exit() after finishing.
  */
 object DriverWithoutCleanup {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf
     val sc = new SparkContext(args(0), "DriverWithoutCleanup", conf)
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 07fb323cfc355..8fa33f4915ea4 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark
 
+import java.util.concurrent.TimeUnit
+
 import scala.collection.mutable
 
 import org.mockito.ArgumentMatchers.{any, eq => meq}
@@ -27,6 +29,8 @@ import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Tests.TEST_SCHEDULE_INTERVAL
 import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, ResourceProfileBuilder, ResourceProfileManager, TaskResourceRequests}
+import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.util.{Clock, ManualClock, SystemClock}
@@ -42,6 +46,9 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
   private val managers = new mutable.ListBuffer[ExecutorAllocationManager]()
   private var listenerBus: LiveListenerBus = _
   private var client: ExecutorAllocationClient = _
+  private val clock = new SystemClock()
+  private var rpManager: ResourceProfileManager = _
+
 
   override def beforeEach(): Unit = {
     super.beforeEach()
@@ -64,7 +71,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
 
   private def post(event: SparkListenerEvent): Unit = {
     listenerBus.post(event)
-    listenerBus.waitUntilEmpty(1000)
+    listenerBus.waitUntilEmpty()
   }
 
   test("initialize dynamic allocation in SparkContext") {
@@ -105,65 +112,257 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
 
   test("starting state") {
     val manager = createManager(createConf())
-    assert(numExecutorsTarget(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
     assert(executorsPendingToRemove(manager).isEmpty)
     assert(addTime(manager) === ExecutorAllocationManager.NOT_SET)
   }
 
-  test("add executors") {
+  test("add executors default profile") {
     val manager = createManager(createConf(1, 10, 1))
     post(SparkListenerStageSubmitted(createStageInfo(0, 1000)))
 
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
+    // Keep adding until the limit is reached
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 2)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 2)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 4)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 4)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 4)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 8)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 8)
+    // reached the limit of 10
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+
+    // Register previously requested executors
+    onExecutorAddedDefaultProfile(manager, "first")
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    onExecutorAddedDefaultProfile(manager, "second")
+    onExecutorAddedDefaultProfile(manager, "third")
+    onExecutorAddedDefaultProfile(manager, "fourth")
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    onExecutorAddedDefaultProfile(manager, "first") // duplicates should not count
+    onExecutorAddedDefaultProfile(manager, "second")
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+
+    // Try adding again
+    // This should still fail because the number pending + running is still at the limit
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+  }
+
+  test("add executors multiple profiles") {
+    val manager = createManager(createConf(1, 10, 1))
+    post(SparkListenerStageSubmitted(createStageInfo(0, 1000, rp = defaultProfile)))
+    val rp1 = new ResourceProfileBuilder()
+    val execReqs = new ExecutorResourceRequests().cores(4).resource("gpu", 4)
+    val taskReqs = new TaskResourceRequests().cpus(1).resource("gpu", 1)
+    rp1.require(execReqs).require(taskReqs)
+    val rprof1 = rp1.build
+    rpManager.addResourceProfile(rprof1)
+    post(SparkListenerStageSubmitted(createStageInfo(1, 1000, rp = rprof1)))
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
     // Keep adding until the limit is reached
-    assert(numExecutorsTarget(manager) === 1)
-    assert(numExecutorsToAdd(manager) === 1)
-    assert(addExecutors(manager) === 1)
-    assert(numExecutorsTarget(manager) === 2)
-    assert(numExecutorsToAdd(manager) === 2)
-    assert(addExecutors(manager) === 2)
-    assert(numExecutorsTarget(manager) === 4)
-    assert(numExecutorsToAdd(manager) === 4)
-    assert(addExecutors(manager) === 4)
-    assert(numExecutorsTarget(manager) === 8)
-    assert(numExecutorsToAdd(manager) === 8)
-    assert(addExecutors(manager) === 2) // reached the limit of 10
-    assert(numExecutorsTarget(manager) === 10)
-    assert(numExecutorsToAdd(manager) === 1)
-    assert(addExecutors(manager) === 0)
-    assert(numExecutorsTarget(manager) === 10)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    assert(numExecutorsToAdd(manager, rprof1) === 1)
+    assert(numExecutorsTarget(manager, rprof1.id) === 1)
+    assert(addExecutorsToTarget(manager, updatesNeeded, rprof1) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 2)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 2)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    assert(numExecutorsToAdd(manager, rprof1) === 2)
+    assert(numExecutorsTarget(manager, rprof1.id) === 2)
+    assert(addExecutorsToTarget(manager, updatesNeeded, rprof1) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 4)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 4)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 4)
+    assert(numExecutorsToAdd(manager, rprof1) === 4)
+    assert(numExecutorsTarget(manager, rprof1.id) === 4)
+    assert(addExecutorsToTarget(manager, updatesNeeded, rprof1) === 4)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 8)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 8)
+    // reached the limit of 10
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    assert(numExecutorsToAdd(manager, rprof1) === 8)
+    assert(numExecutorsTarget(manager, rprof1.id) === 8)
+    assert(addExecutorsToTarget(manager, updatesNeeded, rprof1) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    assert(numExecutorsToAdd(manager, rprof1) === 1)
+    assert(numExecutorsTarget(manager, rprof1.id) === 10)
+    assert(addExecutorsToTarget(manager, updatesNeeded, rprof1) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(numExecutorsToAdd(manager, rprof1) === 1)
+    assert(numExecutorsTarget(manager, rprof1.id) === 10)
 
     // Register previously requested executors
-    onExecutorAdded(manager, "first")
-    assert(numExecutorsTarget(manager) === 10)
-    onExecutorAdded(manager, "second")
-    onExecutorAdded(manager, "third")
-    onExecutorAdded(manager, "fourth")
-    assert(numExecutorsTarget(manager) === 10)
-    onExecutorAdded(manager, "first") // duplicates should not count
-    onExecutorAdded(manager, "second")
-    assert(numExecutorsTarget(manager) === 10)
+    onExecutorAddedDefaultProfile(manager, "first")
+    onExecutorAdded(manager, "firstrp1", rprof1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsTarget(manager, rprof1.id) === 10)
+    onExecutorAddedDefaultProfile(manager, "second")
+    onExecutorAddedDefaultProfile(manager, "third")
+    onExecutorAddedDefaultProfile(manager, "fourth")
+    onExecutorAdded(manager, "secondrp1", rprof1)
+    onExecutorAdded(manager, "thirdrp1", rprof1)
+    onExecutorAdded(manager, "fourthrp1", rprof1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsTarget(manager, rprof1.id) === 10)
+    onExecutorAddedDefaultProfile(manager, "first") // duplicates should not count
+    onExecutorAddedDefaultProfile(manager, "second")
+    onExecutorAdded(manager, "firstrp1", rprof1)
+    onExecutorAdded(manager, "secondrp1", rprof1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsTarget(manager, rprof1.id) === 10)
 
     // Try adding again
     // This should still fail because the number pending + running is still at the limit
-    assert(addExecutors(manager) === 0)
-    assert(numExecutorsTarget(manager) === 10)
-    assert(numExecutorsToAdd(manager) === 1)
-    assert(addExecutors(manager) === 0)
-    assert(numExecutorsTarget(manager) === 10)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    assert(addExecutorsToTarget(manager, updatesNeeded, rprof1) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(numExecutorsToAdd(manager, rprof1) === 1)
+    assert(numExecutorsTarget(manager, rprof1.id) === 10)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    assert(addExecutorsToTarget(manager, updatesNeeded, rprof1) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(numExecutorsToAdd(manager, rprof1) === 1)
+    assert(numExecutorsTarget(manager, rprof1.id) === 10)
+  }
+
+  test("remove executors multiple profiles") {
+    val manager = createManager(createConf(5, 10, 5))
+    val rp1 = new ResourceProfileBuilder()
+    val execReqs = new ExecutorResourceRequests().cores(4).resource("gpu", 4)
+    val taskReqs = new TaskResourceRequests().cpus(1).resource("gpu", 1)
+    rp1.require(execReqs).require(taskReqs)
+    val rprof1 = rp1.build
+    val rp2 = new ResourceProfileBuilder()
+    val execReqs2 = new ExecutorResourceRequests().cores(1)
+    val taskReqs2 = new TaskResourceRequests().cpus(1)
+    rp2.require(execReqs2).require(taskReqs2)
+    val rprof2 = rp2.build
+    rpManager.addResourceProfile(rprof1)
+    rpManager.addResourceProfile(rprof2)
+    post(SparkListenerStageSubmitted(createStageInfo(1, 10, rp = rprof1)))
+    post(SparkListenerStageSubmitted(createStageInfo(2, 10, rp = rprof2)))
+
+    (1 to 10).map(_.toString).foreach { id => onExecutorAdded(manager, id, rprof1) }
+    (11 to 20).map(_.toString).foreach { id => onExecutorAdded(manager, id, rprof2) }
+    (21 to 30).map(_.toString).foreach { id => onExecutorAdded(manager, id, defaultProfile) }
+
+    // Keep removing until the limit is reached
+    assert(executorsPendingToRemove(manager).isEmpty)
+    assert(removeExecutor(manager, "1", rprof1.id))
+    assert(executorsPendingToRemove(manager).size === 1)
+    assert(executorsPendingToRemove(manager).contains("1"))
+    assert(removeExecutor(manager, "11", rprof2.id))
+    assert(removeExecutor(manager, "2", rprof1.id))
+    assert(executorsPendingToRemove(manager).size === 3)
+    assert(executorsPendingToRemove(manager).contains("2"))
+    assert(executorsPendingToRemove(manager).contains("11"))
+    assert(removeExecutor(manager, "21", defaultProfile.id))
+    assert(removeExecutor(manager, "3", rprof1.id))
+    assert(removeExecutor(manager, "4", rprof1.id))
+    assert(executorsPendingToRemove(manager).size === 6)
+    assert(executorsPendingToRemove(manager).contains("21"))
+    assert(executorsPendingToRemove(manager).contains("3"))
+    assert(executorsPendingToRemove(manager).contains("4"))
+    assert(removeExecutor(manager, "5", rprof1.id))
+    assert(!removeExecutor(manager, "6", rprof1.id)) // reached the limit of 5
+    assert(executorsPendingToRemove(manager).size === 7)
+    assert(executorsPendingToRemove(manager).contains("5"))
+    assert(!executorsPendingToRemove(manager).contains("6"))
+
+    // Kill executors previously requested to remove
+    onExecutorRemoved(manager, "1")
+    assert(executorsPendingToRemove(manager).size === 6)
+    assert(!executorsPendingToRemove(manager).contains("1"))
+    onExecutorRemoved(manager, "2")
+    onExecutorRemoved(manager, "3")
+    assert(executorsPendingToRemove(manager).size === 4)
+    assert(!executorsPendingToRemove(manager).contains("2"))
+    assert(!executorsPendingToRemove(manager).contains("3"))
+    onExecutorRemoved(manager, "2") // duplicates should not count
+    onExecutorRemoved(manager, "3")
+    assert(executorsPendingToRemove(manager).size === 4)
+    onExecutorRemoved(manager, "4")
+    onExecutorRemoved(manager, "5")
+    assert(executorsPendingToRemove(manager).size === 2)
+    assert(executorsPendingToRemove(manager).contains("11"))
+    assert(executorsPendingToRemove(manager).contains("21"))
+
+    // Try removing again
+    // This should still fail because the number pending + running is still at the limit
+    assert(!removeExecutor(manager, "7", rprof1.id))
+    assert(executorsPendingToRemove(manager).size === 2)
+    assert(!removeExecutor(manager, "8", rprof1.id))
+    assert(executorsPendingToRemove(manager).size === 2)
+
+    // make sure rprof2 has the same min limit or 5
+    assert(removeExecutor(manager, "12", rprof2.id))
+    assert(removeExecutor(manager, "13", rprof2.id))
+    assert(removeExecutor(manager, "14", rprof2.id))
+    assert(removeExecutor(manager, "15", rprof2.id))
+    assert(!removeExecutor(manager, "16", rprof2.id)) // reached the limit of 5
+    assert(executorsPendingToRemove(manager).size === 6)
+    assert(!executorsPendingToRemove(manager).contains("16"))
+    onExecutorRemoved(manager, "11")
+    onExecutorRemoved(manager, "12")
+    onExecutorRemoved(manager, "13")
+    onExecutorRemoved(manager, "14")
+    onExecutorRemoved(manager, "15")
+    assert(executorsPendingToRemove(manager).size === 1)
   }
 
   def testAllocationRatio(cores: Int, divisor: Double, expected: Int): Unit = {
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
     val conf = createConf(3, 15)
       .set(config.DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO, divisor)
       .set(config.EXECUTOR_CORES, cores)
     val manager = createManager(conf)
     post(SparkListenerStageSubmitted(createStageInfo(0, 20)))
     for (i <- 0 to 5) {
-      addExecutors(manager)
+      addExecutorsToTargetForDefaultProfile(manager, updatesNeeded)
+      doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
     }
-    assert(numExecutorsTarget(manager) === expected)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === expected)
   }
 
   test("executionAllocationRatio is correctly handled") {
@@ -182,83 +381,249 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     val manager = createManager(createConf(0, 10, 0))
     post(SparkListenerStageSubmitted(createStageInfo(0, 5)))
 
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
     // Verify that we're capped at number of tasks in the stage
-    assert(numExecutorsTarget(manager) === 0)
-    assert(numExecutorsToAdd(manager) === 1)
-    assert(addExecutors(manager) === 1)
-    assert(numExecutorsTarget(manager) === 1)
-    assert(numExecutorsToAdd(manager) === 2)
-    assert(addExecutors(manager) === 2)
-    assert(numExecutorsTarget(manager) === 3)
-    assert(numExecutorsToAdd(manager) === 4)
-    assert(addExecutors(manager) === 2)
-    assert(numExecutorsTarget(manager) === 5)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 2)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 3)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 4)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
 
     // Verify that running a task doesn't affect the target
     post(SparkListenerStageSubmitted(createStageInfo(1, 3)))
     post(SparkListenerExecutorAdded(
       0L, "executor-1", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     post(SparkListenerTaskStart(1, 0, createTaskInfo(0, 0, "executor-1")))
-    assert(numExecutorsTarget(manager) === 5)
-    assert(addExecutors(manager) === 1)
-    assert(numExecutorsTarget(manager) === 6)
-    assert(numExecutorsToAdd(manager) === 2)
-    assert(addExecutors(manager) === 2)
-    assert(numExecutorsTarget(manager) === 8)
-    assert(numExecutorsToAdd(manager) === 4)
-    assert(addExecutors(manager) === 0)
-    assert(numExecutorsTarget(manager) === 8)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 6)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 2)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 8)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 4)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 8)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
 
     // Verify that re-running a task doesn't blow things up
     post(SparkListenerStageSubmitted(createStageInfo(2, 3)))
     post(SparkListenerTaskStart(2, 0, createTaskInfo(0, 0, "executor-1")))
     post(SparkListenerTaskStart(2, 0, createTaskInfo(1, 0, "executor-1")))
-    assert(addExecutors(manager) === 1)
-    assert(numExecutorsTarget(manager) === 9)
-    assert(numExecutorsToAdd(manager) === 2)
-    assert(addExecutors(manager) === 1)
-    assert(numExecutorsTarget(manager) === 10)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 9)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 2)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
 
     // Verify that running a task once we're at our limit doesn't blow things up
     post(SparkListenerTaskStart(2, 0, createTaskInfo(0, 1, "executor-1")))
-    assert(addExecutors(manager) === 0)
-    assert(numExecutorsTarget(manager) === 10)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
   }
 
   test("add executors when speculative tasks added") {
     val manager = createManager(createConf(0, 10, 0))
 
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
+    post(SparkListenerStageSubmitted(createStageInfo(1, 2)))
     // Verify that we're capped at number of tasks including the speculative ones in the stage
     post(SparkListenerSpeculativeTaskSubmitted(1))
-    assert(numExecutorsTarget(manager) === 0)
-    assert(numExecutorsToAdd(manager) === 1)
-    assert(addExecutors(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
     post(SparkListenerSpeculativeTaskSubmitted(1))
     post(SparkListenerSpeculativeTaskSubmitted(1))
-    post(SparkListenerStageSubmitted(createStageInfo(1, 2)))
-    assert(numExecutorsTarget(manager) === 1)
-    assert(numExecutorsToAdd(manager) === 2)
-    assert(addExecutors(manager) === 2)
-    assert(numExecutorsTarget(manager) === 3)
-    assert(numExecutorsToAdd(manager) === 4)
-    assert(addExecutors(manager) === 2)
-    assert(numExecutorsTarget(manager) === 5)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 2)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 3)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 4)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
 
     // Verify that running a task doesn't affect the target
     post(SparkListenerTaskStart(1, 0, createTaskInfo(0, 0, "executor-1")))
-    assert(numExecutorsTarget(manager) === 5)
-    assert(addExecutors(manager) === 0)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
 
     // Verify that running a speculative task doesn't affect the target
     post(SparkListenerTaskStart(1, 0, createTaskInfo(1, 0, "executor-2", true)))
-    assert(numExecutorsTarget(manager) === 5)
-    assert(addExecutors(manager) === 0)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+  }
+
+  test("SPARK-30511 remove executors when speculative tasks end") {
+    val clock = new ManualClock()
+    val stage = createStageInfo(0, 40)
+    val conf = createConf(0, 10, 0).set(config.EXECUTOR_CORES, 4)
+    val manager = createManager(conf, clock = clock)
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
+    post(SparkListenerStageSubmitted(stage))
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 4)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 3)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+
+    (0 to 9).foreach(execId => onExecutorAddedDefaultProfile(manager, execId.toString))
+    (0 to 39).map { i => createTaskInfo(i, i, executorId = s"${i / 4}")}.foreach {
+      info => post(SparkListenerTaskStart(0, 0, info))
+    }
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 10)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 10)
+
+    // 30 tasks (0 - 29) finished
+    (0 to 29).map { i => createTaskInfo(i, i, executorId = s"${i / 4}")}.foreach {
+      info => post(SparkListenerTaskEnd(0, 0, null, Success, info, new ExecutorMetrics, null)) }
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 3)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 3)
+    (0 to 6).foreach { i => assert(removeExecutorDefaultProfile(manager, i.toString))}
+    (0 to 6).foreach { i => onExecutorRemoved(manager, i.toString)}
+
+    // 10 speculative tasks (30 - 39) launch for the remaining tasks
+    (30 to 39).foreach { _ => post(SparkListenerSpeculativeTaskSubmitted(0))}
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTarget(manager, defaultProfile.id) == 5)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 5)
+    (10 to 12).foreach(execId => onExecutorAddedDefaultProfile(manager, execId.toString))
+    (40 to 49).map { i =>
+      createTaskInfo(taskId = i, taskIndex = i - 10, executorId = s"${i / 4}", speculative = true)}
+      .foreach { info => post(SparkListenerTaskStart(0, 0, info))}
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    // At this point, we still have 6 executors running
+    assert(numExecutorsTarget(manager, defaultProfile.id) == 5)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 5)
+
+    // 6 speculative tasks (40 - 45) finish before the original tasks, with 4 speculative remaining
+    (40 to 45).map { i =>
+      createTaskInfo(taskId = i, taskIndex = i - 10, executorId = s"${i / 4}", speculative = true)}
+      .foreach {
+        info => post(SparkListenerTaskEnd(0, 0, null, Success, info, new ExecutorMetrics, null))}
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 4)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 4)
+    assert(removeExecutorDefaultProfile(manager, "10"))
+    onExecutorRemoved(manager, "10")
+    // At this point, we still have 5 executors running: ["7", "8", "9", "11", "12"]
+
+    // 6 original tasks (30 - 35) are intentionally killed
+    (30 to 35).map { i =>
+      createTaskInfo(i, i, executorId = s"${i / 4}")}
+      .foreach { info => post(
+        SparkListenerTaskEnd(0, 0, null, TaskKilled("test"), info, new ExecutorMetrics, null))}
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 2)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 2)
+    (7 to 8).foreach { i => assert(removeExecutorDefaultProfile(manager, i.toString))}
+    (7 to 8).foreach { i => onExecutorRemoved(manager, i.toString)}
+    // At this point, we still have 3 executors running: ["9", "11", "12"]
+
+    // Task 36 finishes before the speculative task 46, task 46 killed
+    post(SparkListenerTaskEnd(0, 0, null, Success,
+      createTaskInfo(36, 36, executorId = "9"), new ExecutorMetrics, null))
+    post(SparkListenerTaskEnd(0, 0, null, TaskKilled("test"),
+      createTaskInfo(46, 36, executorId = "11", speculative = true), new ExecutorMetrics, null))
+
+    // We should have 3 original tasks (index 37, 38, 39) running, with corresponding 3 speculative
+    // tasks running. Target lowers to 2, but still hold 3 executors ["9", "11", "12"]
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 2)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 2)
+    // At this point, we still have 3 executors running: ["9", "11", "12"]
+
+    // Task 37 and 47 succeed at the same time
+    post(SparkListenerTaskEnd(0, 0, null, Success,
+      createTaskInfo(37, 37, executorId = "9"), new ExecutorMetrics, null))
+    post(SparkListenerTaskEnd(0, 0, null, Success,
+      createTaskInfo(47, 37, executorId = "11", speculative = true), new ExecutorMetrics, null))
+
+    // We should have 2 original tasks (index 38, 39) running, with corresponding 2 speculative
+    // tasks running
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 1)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 1)
+    assert(removeExecutorDefaultProfile(manager, "11"))
+    onExecutorRemoved(manager, "11")
+    // At this point, we still have 2 executors running: ["9", "12"]
+
+    // Task 38 fails and task 49 fails, new speculative task 50 is submitted to speculate on task 39
+    post(SparkListenerTaskEnd(0, 0, null, UnknownReason,
+      createTaskInfo(38, 38, executorId = "9"), new ExecutorMetrics, null))
+    post(SparkListenerTaskEnd(0, 0, null, UnknownReason,
+      createTaskInfo(49, 39, executorId = "12", speculative = true), new ExecutorMetrics, null))
+    post(SparkListenerSpeculativeTaskSubmitted(0))
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    // maxNeeded = 1, allocate one more to satisfy speculation locality requirement
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 2)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 2)
+    post(SparkListenerTaskStart(0, 0,
+      createTaskInfo(50, 39, executorId = "12", speculative = true)))
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 1)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 1)
+
+    // Task 39 and 48 succeed, task 50 killed
+    post(SparkListenerTaskEnd(0, 0, null, Success,
+      createTaskInfo(39, 39, executorId = "9"), new ExecutorMetrics, null))
+    post(SparkListenerTaskEnd(0, 0, null, Success,
+      createTaskInfo(48, 38, executorId = "12", speculative = true), new ExecutorMetrics, null))
+    post(SparkListenerTaskEnd(0, 0, null, TaskKilled("test"),
+      createTaskInfo(50, 39, executorId = "12", speculative = true), new ExecutorMetrics, null))
+    post(SparkListenerStageCompleted(stage))
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 0)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 0)
+    assert(removeExecutorDefaultProfile(manager, "9"))
+    onExecutorRemoved(manager, "9")
+    assert(removeExecutorDefaultProfile(manager, "12"))
+    onExecutorRemoved(manager, "12")
   }
 
   test("properly handle task end events from completed stages") {
@@ -279,43 +644,49 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     post(SparkListenerStageCompleted(stage))
 
     // There are still two tasks that belong to the zombie stage running.
-    assert(totalRunningTasks(manager) === 2)
+    assert(totalRunningTasksPerResourceProfile(manager) === 2)
 
     // submit another attempt for the stage.  We count completions from the first zombie attempt
     val stageAttempt1 = createStageInfo(stage.stageId, 5, attemptId = 1)
     post(SparkListenerStageSubmitted(stageAttempt1))
     post(SparkListenerTaskEnd(0, 0, null, Success, taskInfo1, new ExecutorMetrics, null))
-    assert(totalRunningTasks(manager) === 1)
+    assert(totalRunningTasksPerResourceProfile(manager) === 1)
     val attemptTaskInfo1 = createTaskInfo(3, 0, "executor-1")
     val attemptTaskInfo2 = createTaskInfo(4, 1, "executor-1")
     post(SparkListenerTaskStart(0, 1, attemptTaskInfo1))
     post(SparkListenerTaskStart(0, 1, attemptTaskInfo2))
-    assert(totalRunningTasks(manager) === 3)
+    assert(totalRunningTasksPerResourceProfile(manager) === 3)
     post(SparkListenerTaskEnd(0, 1, null, Success, attemptTaskInfo1, new ExecutorMetrics, null))
-    assert(totalRunningTasks(manager) === 2)
+    assert(totalRunningTasksPerResourceProfile(manager) === 2)
     post(SparkListenerTaskEnd(0, 0, null, Success, taskInfo2, new ExecutorMetrics, null))
-    assert(totalRunningTasks(manager) === 1)
+    assert(totalRunningTasksPerResourceProfile(manager) === 1)
     post(SparkListenerTaskEnd(0, 1, null, Success, attemptTaskInfo2, new ExecutorMetrics, null))
-    assert(totalRunningTasks(manager) === 0)
+    assert(totalRunningTasksPerResourceProfile(manager) === 0)
   }
 
   testRetry("cancel pending executors when no longer needed") {
     val manager = createManager(createConf(0, 10, 0))
     post(SparkListenerStageSubmitted(createStageInfo(2, 5)))
 
-    assert(numExecutorsTarget(manager) === 0)
-    assert(numExecutorsToAdd(manager) === 1)
-    assert(addExecutors(manager) === 1)
-    assert(numExecutorsTarget(manager) === 1)
-    assert(numExecutorsToAdd(manager) === 2)
-    assert(addExecutors(manager) === 2)
-    assert(numExecutorsTarget(manager) === 3)
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 2)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 3)
 
     val task1Info = createTaskInfo(0, 0, "executor-1")
     post(SparkListenerTaskStart(2, 0, task1Info))
 
-    assert(numExecutorsToAdd(manager) === 4)
-    assert(addExecutors(manager) === 2)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 4)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
 
     val task2Info = createTaskInfo(1, 0, "executor-1")
     post(SparkListenerTaskStart(2, 0, task2Info))
@@ -331,22 +702,21 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
 
   test("remove executors") {
     val manager = createManager(createConf(5, 10, 5))
-    (1 to 10).map(_.toString).foreach { id => onExecutorAdded(manager, id) }
+    (1 to 10).map(_.toString).foreach { id => onExecutorAddedDefaultProfile(manager, id) }
 
     // Keep removing until the limit is reached
     assert(executorsPendingToRemove(manager).isEmpty)
-    assert(removeExecutor(manager, "1"))
+    assert(removeExecutorDefaultProfile(manager, "1"))
     assert(executorsPendingToRemove(manager).size === 1)
     assert(executorsPendingToRemove(manager).contains("1"))
-    assert(removeExecutor(manager, "2"))
-    assert(removeExecutor(manager, "3"))
+    assert(removeExecutorDefaultProfile(manager, "2"))
+    assert(removeExecutorDefaultProfile(manager, "3"))
     assert(executorsPendingToRemove(manager).size === 3)
     assert(executorsPendingToRemove(manager).contains("2"))
     assert(executorsPendingToRemove(manager).contains("3"))
-    assert(executorsPendingToRemove(manager).size === 3)
-    assert(removeExecutor(manager, "4"))
-    assert(removeExecutor(manager, "5"))
-    assert(!removeExecutor(manager, "6")) // reached the limit of 5
+    assert(removeExecutorDefaultProfile(manager, "4"))
+    assert(removeExecutorDefaultProfile(manager, "5"))
+    assert(!removeExecutorDefaultProfile(manager, "6")) // reached the limit of 5
     assert(executorsPendingToRemove(manager).size === 5)
     assert(executorsPendingToRemove(manager).contains("4"))
     assert(executorsPendingToRemove(manager).contains("5"))
@@ -370,29 +740,29 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
 
     // Try removing again
     // This should still fail because the number pending + running is still at the limit
-    assert(!removeExecutor(manager, "7"))
+    assert(!removeExecutorDefaultProfile(manager, "7"))
     assert(executorsPendingToRemove(manager).isEmpty)
-    assert(!removeExecutor(manager, "8"))
+    assert(!removeExecutorDefaultProfile(manager, "8"))
     assert(executorsPendingToRemove(manager).isEmpty)
   }
 
   test("remove multiple executors") {
     val manager = createManager(createConf(5, 10, 5))
-    (1 to 10).map(_.toString).foreach { id => onExecutorAdded(manager, id) }
+    (1 to 10).map(_.toString).foreach { id => onExecutorAddedDefaultProfile(manager, id) }
 
     // Keep removing until the limit is reached
     assert(executorsPendingToRemove(manager).isEmpty)
-    assert(removeExecutors(manager, Seq("1")) === Seq("1"))
+    assert(removeExecutorsDefaultProfile(manager, Seq("1")) === Seq("1"))
     assert(executorsPendingToRemove(manager).size === 1)
     assert(executorsPendingToRemove(manager).contains("1"))
-    assert(removeExecutors(manager, Seq("2", "3")) === Seq("2", "3"))
+    assert(removeExecutorsDefaultProfile(manager, Seq("2", "3")) === Seq("2", "3"))
     assert(executorsPendingToRemove(manager).size === 3)
     assert(executorsPendingToRemove(manager).contains("2"))
     assert(executorsPendingToRemove(manager).contains("3"))
     assert(executorsPendingToRemove(manager).size === 3)
-    assert(removeExecutor(manager, "4"))
-    assert(removeExecutors(manager, Seq("5")) === Seq("5"))
-    assert(!removeExecutor(manager, "6")) // reached the limit of 5
+    assert(removeExecutorDefaultProfile(manager, "4"))
+    assert(removeExecutorsDefaultProfile(manager, Seq("5")) === Seq("5"))
+    assert(!removeExecutorDefaultProfile(manager, "6")) // reached the limit of 5
     assert(executorsPendingToRemove(manager).size === 5)
     assert(executorsPendingToRemove(manager).contains("4"))
     assert(executorsPendingToRemove(manager).contains("5"))
@@ -416,87 +786,100 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
 
     // Try removing again
     // This should still fail because the number pending + running is still at the limit
-    assert(!removeExecutor(manager, "7"))
+    assert(!removeExecutorDefaultProfile(manager, "7"))
     assert(executorsPendingToRemove(manager).isEmpty)
-    assert(removeExecutors(manager, Seq("8")) !== Seq("8"))
+    assert(removeExecutorsDefaultProfile(manager, Seq("8")) !== Seq("8"))
     assert(executorsPendingToRemove(manager).isEmpty)
   }
 
-  test ("Removing with various numExecutorsTarget condition") {
+  test ("Removing with various numExecutorsTargetForDefaultProfileId condition") {
     val manager = createManager(createConf(5, 12, 5))
 
     post(SparkListenerStageSubmitted(createStageInfo(0, 8)))
 
-    // Remove when numExecutorsTarget is the same as the current number of executors
-    assert(addExecutors(manager) === 1)
-    assert(addExecutors(manager) === 2)
-    (1 to 8).foreach(execId => onExecutorAdded(manager, execId.toString))
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
+    // Remove when numExecutorsTargetForDefaultProfileId is the same as the current
+    // number of executors
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    (1 to 8).foreach(execId => onExecutorAddedDefaultProfile(manager, execId.toString))
     (1 to 8).map { i => createTaskInfo(i, i, s"$i") }.foreach {
       info => post(SparkListenerTaskStart(0, 0, info)) }
     assert(manager.executorMonitor.executorCount === 8)
-    assert(numExecutorsTarget(manager) === 8)
-    assert(maxNumExecutorsNeeded(manager) == 8)
-    assert(!removeExecutor(manager, "1")) // won't work since numExecutorsTarget == numExecutors
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 8)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 8)
+    // won't work since numExecutorsTargetForDefaultProfileId == numExecutors
+    assert(!removeExecutorDefaultProfile(manager, "1"))
 
-    // Remove executors when numExecutorsTarget is lower than current number of executors
+    // Remove executors when numExecutorsTargetForDefaultProfileId is lower than
+    // current number of executors
     (1 to 3).map { i => createTaskInfo(i, i, s"$i") }.foreach { info =>
       post(SparkListenerTaskEnd(0, 0, null, Success, info, new ExecutorMetrics, null))
     }
     adjustRequestedExecutors(manager)
     assert(manager.executorMonitor.executorCount === 8)
-    assert(numExecutorsTarget(manager) === 5)
-    assert(maxNumExecutorsNeeded(manager) == 5)
-    assert(removeExecutor(manager, "1"))
-    assert(removeExecutors(manager, Seq("2", "3"))=== Seq("2", "3"))
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 5)
+    assert(removeExecutorDefaultProfile(manager, "1"))
+    assert(removeExecutorsDefaultProfile(manager, Seq("2", "3"))=== Seq("2", "3"))
     onExecutorRemoved(manager, "1")
     onExecutorRemoved(manager, "2")
     onExecutorRemoved(manager, "3")
 
-    // numExecutorsTarget is lower than minNumExecutors
+    // numExecutorsTargetForDefaultProfileId is lower than minNumExecutors
     post(SparkListenerTaskEnd(0, 0, null, Success, createTaskInfo(4, 4, "4"),
       new ExecutorMetrics, null))
     assert(manager.executorMonitor.executorCount === 5)
-    assert(numExecutorsTarget(manager) === 5)
-    assert(maxNumExecutorsNeeded(manager) == 4)
-    assert(!removeExecutor(manager, "4")) // lower limit
-    assert(addExecutors(manager) === 0) // upper limit
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 4)
+    assert(!removeExecutorDefaultProfile(manager, "4")) // lower limit
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0) // upper limit
   }
 
   test ("interleaving add and remove") {
     val manager = createManager(createConf(5, 12, 5))
     post(SparkListenerStageSubmitted(createStageInfo(0, 1000)))
 
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
     // Add a few executors
-    assert(addExecutors(manager) === 1)
-    assert(addExecutors(manager) === 2)
-    onExecutorAdded(manager, "1")
-    onExecutorAdded(manager, "2")
-    onExecutorAdded(manager, "3")
-    onExecutorAdded(manager, "4")
-    onExecutorAdded(manager, "5")
-    onExecutorAdded(manager, "6")
-    onExecutorAdded(manager, "7")
-    onExecutorAdded(manager, "8")
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    onExecutorAddedDefaultProfile(manager, "1")
+    onExecutorAddedDefaultProfile(manager, "2")
+    onExecutorAddedDefaultProfile(manager, "3")
+    onExecutorAddedDefaultProfile(manager, "4")
+    onExecutorAddedDefaultProfile(manager, "5")
+    onExecutorAddedDefaultProfile(manager, "6")
+    onExecutorAddedDefaultProfile(manager, "7")
+    onExecutorAddedDefaultProfile(manager, "8")
     assert(manager.executorMonitor.executorCount === 8)
-    assert(numExecutorsTarget(manager) === 8)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 8)
 
 
     // Remove when numTargetExecutors is equal to the current number of executors
-    assert(!removeExecutor(manager, "1"))
-    assert(removeExecutors(manager, Seq("2", "3")) !== Seq("2", "3"))
+    assert(!removeExecutorDefaultProfile(manager, "1"))
+    assert(removeExecutorsDefaultProfile(manager, Seq("2", "3")) !== Seq("2", "3"))
 
     // Remove until limit
-    onExecutorAdded(manager, "9")
-    onExecutorAdded(manager, "10")
-    onExecutorAdded(manager, "11")
-    onExecutorAdded(manager, "12")
+    onExecutorAddedDefaultProfile(manager, "9")
+    onExecutorAddedDefaultProfile(manager, "10")
+    onExecutorAddedDefaultProfile(manager, "11")
+    onExecutorAddedDefaultProfile(manager, "12")
     assert(manager.executorMonitor.executorCount === 12)
-    assert(numExecutorsTarget(manager) === 8)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 8)
 
-    assert(removeExecutor(manager, "1"))
-    assert(removeExecutors(manager, Seq("2", "3", "4")) === Seq("2", "3", "4"))
-    assert(!removeExecutor(manager, "5")) // lower limit reached
-    assert(!removeExecutor(manager, "6"))
+    assert(removeExecutorDefaultProfile(manager, "1"))
+    assert(removeExecutorsDefaultProfile(manager, Seq("2", "3", "4")) === Seq("2", "3", "4"))
+    assert(!removeExecutorDefaultProfile(manager, "5")) // lower limit reached
+    assert(!removeExecutorDefaultProfile(manager, "6"))
     onExecutorRemoved(manager, "1")
     onExecutorRemoved(manager, "2")
     onExecutorRemoved(manager, "3")
@@ -504,33 +887,36 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     assert(manager.executorMonitor.executorCount === 8)
 
     // Add until limit
-    assert(!removeExecutor(manager, "7")) // still at lower limit
+    assert(!removeExecutorDefaultProfile(manager, "7")) // still at lower limit
     assert((manager, Seq("8")) !== Seq("8"))
-    onExecutorAdded(manager, "13")
-    onExecutorAdded(manager, "14")
-    onExecutorAdded(manager, "15")
-    onExecutorAdded(manager, "16")
+    onExecutorAddedDefaultProfile(manager, "13")
+    onExecutorAddedDefaultProfile(manager, "14")
+    onExecutorAddedDefaultProfile(manager, "15")
+    onExecutorAddedDefaultProfile(manager, "16")
     assert(manager.executorMonitor.executorCount === 12)
 
     // Remove succeeds again, now that we are no longer at the lower limit
-    assert(removeExecutors(manager, Seq("5", "6", "7")) === Seq("5", "6", "7"))
-    assert(removeExecutor(manager, "8"))
+    assert(removeExecutorsDefaultProfile(manager, Seq("5", "6", "7")) === Seq("5", "6", "7"))
+    assert(removeExecutorDefaultProfile(manager, "8"))
     assert(manager.executorMonitor.executorCount === 12)
     onExecutorRemoved(manager, "5")
     onExecutorRemoved(manager, "6")
     assert(manager.executorMonitor.executorCount === 10)
-    assert(numExecutorsToAdd(manager) === 4)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 4)
     onExecutorRemoved(manager, "9")
     onExecutorRemoved(manager, "10")
-    assert(addExecutors(manager) === 4) // at upper limit
-    onExecutorAdded(manager, "17")
-    onExecutorAdded(manager, "18")
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 4) // at upper limit
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    onExecutorAddedDefaultProfile(manager, "17")
+    onExecutorAddedDefaultProfile(manager, "18")
     assert(manager.executorMonitor.executorCount === 10)
-    assert(addExecutors(manager) === 0) // still at upper limit
-    onExecutorAdded(manager, "19")
-    onExecutorAdded(manager, "20")
+    // still at upper limit
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 0)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    onExecutorAddedDefaultProfile(manager, "19")
+    onExecutorAddedDefaultProfile(manager, "20")
     assert(manager.executorMonitor.executorCount === 12)
-    assert(numExecutorsTarget(manager) === 12)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 12)
   }
 
   test("starting/canceling add timer") {
@@ -541,7 +927,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     assert(addTime(manager) === NOT_SET)
     onSchedulerBacklogged(manager)
     val firstAddTime = addTime(manager)
-    assert(firstAddTime === clock.getTimeMillis + schedulerBacklogTimeout * 1000)
+    assert(firstAddTime === clock.nanoTime() + TimeUnit.SECONDS.toNanos(schedulerBacklogTimeout))
     clock.advance(100L)
     onSchedulerBacklogged(manager)
     assert(addTime(manager) === firstAddTime) // timer is already started
@@ -555,7 +941,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     assert(addTime(manager) === NOT_SET)
     onSchedulerBacklogged(manager)
     val secondAddTime = addTime(manager)
-    assert(secondAddTime === clock.getTimeMillis + schedulerBacklogTimeout * 1000)
+    assert(secondAddTime === clock.nanoTime() + TimeUnit.SECONDS.toNanos(schedulerBacklogTimeout))
     clock.advance(100L)
     onSchedulerBacklogged(manager)
     assert(addTime(manager) === secondAddTime) // timer is already started
@@ -568,22 +954,22 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     val manager = createManager(createConf(0, 20, 0), clock = clock)
 
     // No events - we should not be adding or removing
-    assert(numExecutorsTarget(manager) === 0)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
     assert(executorsPendingToRemove(manager).isEmpty)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 0)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
     assert(executorsPendingToRemove(manager).isEmpty)
     clock.advance(100L)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 0)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
     assert(executorsPendingToRemove(manager).isEmpty)
     clock.advance(1000L)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 0)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
     assert(executorsPendingToRemove(manager).isEmpty)
     clock.advance(10000L)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 0)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
     assert(executorsPendingToRemove(manager).isEmpty)
   }
 
@@ -596,43 +982,43 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     onSchedulerBacklogged(manager)
     clock.advance(schedulerBacklogTimeout * 1000 / 2)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 0) // timer not exceeded yet
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0) // timer not exceeded yet
     clock.advance(schedulerBacklogTimeout * 1000)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 1) // first timer exceeded
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1) // first timer exceeded
     clock.advance(sustainedSchedulerBacklogTimeout * 1000 / 2)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 1) // second timer not exceeded yet
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1) // second timer not exceeded yet
     clock.advance(sustainedSchedulerBacklogTimeout * 1000)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 1 + 2) // second timer exceeded
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1 + 2) // second timer exceeded
     clock.advance(sustainedSchedulerBacklogTimeout * 1000)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 1 + 2 + 4) // third timer exceeded
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1 + 2 + 4) // third timer exceeded
 
     // Scheduler queue drained
     onSchedulerQueueEmpty(manager)
     clock.advance(sustainedSchedulerBacklogTimeout * 1000)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 7) // timer is canceled
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 7) // timer is canceled
     clock.advance(sustainedSchedulerBacklogTimeout * 1000)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 7)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 7)
 
     // Scheduler queue backlogged again
     onSchedulerBacklogged(manager)
     clock.advance(schedulerBacklogTimeout * 1000)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 7 + 1) // timer restarted
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 7 + 1) // timer restarted
     clock.advance(sustainedSchedulerBacklogTimeout * 1000)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 7 + 1 + 2)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 7 + 1 + 2)
     clock.advance(sustainedSchedulerBacklogTimeout * 1000)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 7 + 1 + 2 + 4)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 7 + 1 + 2 + 4)
     clock.advance(sustainedSchedulerBacklogTimeout * 1000)
     schedule(manager)
-    assert(numExecutorsTarget(manager) === 20) // limit reached
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 20) // limit reached
   }
 
   test("mock polling loop remove behavior") {
@@ -640,9 +1026,9 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     val manager = createManager(createConf(1, 20, 1), clock = clock)
 
     // Remove idle executors on timeout
-    onExecutorAdded(manager, "executor-1")
-    onExecutorAdded(manager, "executor-2")
-    onExecutorAdded(manager, "executor-3")
+    onExecutorAddedDefaultProfile(manager, "executor-1")
+    onExecutorAddedDefaultProfile(manager, "executor-2")
+    onExecutorAddedDefaultProfile(manager, "executor-3")
     assert(executorsPendingToRemove(manager).isEmpty)
 
     // idle threshold not reached yet
@@ -658,10 +1044,10 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     assert(executorsPendingToRemove(manager).size === 2) // limit reached (1 executor remaining)
 
     // Mark a subset as busy - only idle executors should be removed
-    onExecutorAdded(manager, "executor-4")
-    onExecutorAdded(manager, "executor-5")
-    onExecutorAdded(manager, "executor-6")
-    onExecutorAdded(manager, "executor-7")
+    onExecutorAddedDefaultProfile(manager, "executor-4")
+    onExecutorAddedDefaultProfile(manager, "executor-5")
+    onExecutorAddedDefaultProfile(manager, "executor-6")
+    onExecutorAddedDefaultProfile(manager, "executor-7")
     assert(manager.executorMonitor.executorCount === 7)
     assert(executorsPendingToRemove(manager).size === 2) // 2 pending to be removed
     onExecutorBusy(manager, "executor-4")
@@ -726,23 +1112,31 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     val stage1 = createStageInfo(0, 1000)
     post(SparkListenerStageSubmitted(stage1))
 
-    assert(addExecutors(manager) === 1)
-    assert(addExecutors(manager) === 2)
-    assert(addExecutors(manager) === 4)
-    assert(addExecutors(manager) === 8)
-    assert(numExecutorsTarget(manager) === 15)
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 4)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 8)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 15)
     (0 until 15).foreach { i =>
-      onExecutorAdded(manager, s"executor-$i")
+      onExecutorAddedDefaultProfile(manager, s"executor-$i")
     }
     assert(manager.executorMonitor.executorCount === 15)
     post(SparkListenerStageCompleted(stage1))
 
     adjustRequestedExecutors(manager)
-    assert(numExecutorsTarget(manager) === 0)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
 
     post(SparkListenerStageSubmitted(createStageInfo(1, 1000)))
-    addExecutors(manager)
-    assert(numExecutorsTarget(manager) === 16)
+    addExecutorsToTargetForDefaultProfile(manager, updatesNeeded)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 16)
   }
 
   test("avoid ramp down initial executors until first job is submitted") {
@@ -750,19 +1144,19 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     val manager = createManager(createConf(2, 5, 3), clock = clock)
 
     // Verify the initial number of executors
-    assert(numExecutorsTarget(manager) === 3)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 3)
     schedule(manager)
     // Verify whether the initial number of executors is kept with no pending tasks
-    assert(numExecutorsTarget(manager) === 3)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 3)
 
     post(SparkListenerStageSubmitted(createStageInfo(1, 2)))
     clock.advance(100L)
 
-    assert(maxNumExecutorsNeeded(manager) === 2)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 2)
     schedule(manager)
 
     // Verify that current number of executors should be ramp down when first job is submitted
-    assert(numExecutorsTarget(manager) === 2)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 2)
   }
 
   test("avoid ramp down initial executors until idle executor is timeout") {
@@ -770,20 +1164,20 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     val manager = createManager(createConf(2, 5, 3), clock = clock)
 
     // Verify the initial number of executors
-    assert(numExecutorsTarget(manager) === 3)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 3)
     schedule(manager)
     // Verify the initial number of executors is kept when no pending tasks
-    assert(numExecutorsTarget(manager) === 3)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 3)
     (0 until 3).foreach { i =>
-      onExecutorAdded(manager, s"executor-$i")
+      onExecutorAddedDefaultProfile(manager, s"executor-$i")
     }
 
     clock.advance(executorIdleTimeout * 1000)
 
-    assert(maxNumExecutorsNeeded(manager) === 0)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 0)
     schedule(manager)
-    // Verify executor is timeout,numExecutorsTarget is recalculated
-    assert(numExecutorsTarget(manager) === 2)
+    // Verify executor is timeout,numExecutorsTargetForDefaultProfileId is recalculated
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 2)
   }
 
   test("get pending task number and related locality preference") {
@@ -799,7 +1193,8 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     val stageInfo1 = createStageInfo(1, 5, localityPreferences1)
     post(SparkListenerStageSubmitted(stageInfo1))
 
-    assert(localityAwareTasks(manager) === 3)
+    assert(localityAwareTasksForDefaultProfile(manager) === 3)
+    val hostToLocal = hostToLocalTaskCount(manager)
     assert(hostToLocalTaskCount(manager) ===
       Map("host1" -> 2, "host2" -> 3, "host3" -> 2, "host4" -> 2))
 
@@ -811,67 +1206,76 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     val stageInfo2 = createStageInfo(2, 3, localityPreferences2)
     post(SparkListenerStageSubmitted(stageInfo2))
 
-    assert(localityAwareTasks(manager) === 5)
+    assert(localityAwareTasksForDefaultProfile(manager) === 5)
     assert(hostToLocalTaskCount(manager) ===
       Map("host1" -> 2, "host2" -> 4, "host3" -> 4, "host4" -> 3, "host5" -> 2))
 
     post(SparkListenerStageCompleted(stageInfo1))
-    assert(localityAwareTasks(manager) === 2)
+    assert(localityAwareTasksForDefaultProfile(manager) === 2)
     assert(hostToLocalTaskCount(manager) ===
       Map("host2" -> 1, "host3" -> 2, "host4" -> 1, "host5" -> 2))
   }
 
-  test("SPARK-8366: maxNumExecutorsNeeded should properly handle failed tasks") {
+  test("SPARK-8366: maxNumExecutorsNeededPerResourceProfile should properly handle failed tasks") {
     val manager = createManager(createConf())
-    assert(maxNumExecutorsNeeded(manager) === 0)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 0)
 
     post(SparkListenerStageSubmitted(createStageInfo(0, 1)))
-    assert(maxNumExecutorsNeeded(manager) === 1)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 1)
 
     val taskInfo = createTaskInfo(1, 1, "executor-1")
     post(SparkListenerTaskStart(0, 0, taskInfo))
-    assert(maxNumExecutorsNeeded(manager) === 1)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 1)
 
     // If the task is failed, we expect it to be resubmitted later.
     val taskEndReason = ExceptionFailure(null, null, null, null, None)
     post(SparkListenerTaskEnd(0, 0, null, taskEndReason, taskInfo, new ExecutorMetrics, null))
-    assert(maxNumExecutorsNeeded(manager) === 1)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 1)
   }
 
   test("reset the state of allocation manager") {
     val manager = createManager(createConf())
-    assert(numExecutorsTarget(manager) === 1)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
+
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
 
     // Allocation manager is reset when adding executor requests are sent without reporting back
     // executor added.
     post(SparkListenerStageSubmitted(createStageInfo(0, 10)))
 
-    assert(addExecutors(manager) === 1)
-    assert(numExecutorsTarget(manager) === 2)
-    assert(addExecutors(manager) === 2)
-    assert(numExecutorsTarget(manager) === 4)
-    assert(addExecutors(manager) === 1)
-    assert(numExecutorsTarget(manager) === 5)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 2)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 4)
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
 
     manager.reset()
-    assert(numExecutorsTarget(manager) === 1)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
     assert(manager.executorMonitor.executorCount === 0)
 
     // Allocation manager is reset when executors are added.
     post(SparkListenerStageSubmitted(createStageInfo(0, 10)))
 
-    addExecutors(manager)
-    addExecutors(manager)
-    addExecutors(manager)
-    assert(numExecutorsTarget(manager) === 5)
-
-    onExecutorAdded(manager, "first")
-    onExecutorAdded(manager, "second")
-    onExecutorAdded(manager, "third")
-    onExecutorAdded(manager, "fourth")
-    onExecutorAdded(manager, "fifth")
+    addExecutorsToTargetForDefaultProfile(manager, updatesNeeded)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    addExecutorsToTargetForDefaultProfile(manager, updatesNeeded)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    addExecutorsToTargetForDefaultProfile(manager, updatesNeeded)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
+
+    onExecutorAddedDefaultProfile(manager, "first")
+    onExecutorAddedDefaultProfile(manager, "second")
+    onExecutorAddedDefaultProfile(manager, "third")
+    onExecutorAddedDefaultProfile(manager, "fourth")
+    onExecutorAddedDefaultProfile(manager, "fifth")
     assert(manager.executorMonitor.executorCount === 5)
 
     // Cluster manager lost will make all the live executors lost, so here simulate this behavior
@@ -882,28 +1286,31 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     onExecutorRemoved(manager, "fifth")
 
     manager.reset()
-    assert(numExecutorsTarget(manager) === 1)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
     assert(manager.executorMonitor.executorCount === 0)
 
     // Allocation manager is reset when executors are pending to remove
-    addExecutors(manager)
-    addExecutors(manager)
-    addExecutors(manager)
-    assert(numExecutorsTarget(manager) === 5)
-
-    onExecutorAdded(manager, "first")
-    onExecutorAdded(manager, "second")
-    onExecutorAdded(manager, "third")
-    onExecutorAdded(manager, "fourth")
-    onExecutorAdded(manager, "fifth")
-    onExecutorAdded(manager, "sixth")
-    onExecutorAdded(manager, "seventh")
-    onExecutorAdded(manager, "eighth")
+    addExecutorsToTargetForDefaultProfile(manager, updatesNeeded)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    addExecutorsToTargetForDefaultProfile(manager, updatesNeeded)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    addExecutorsToTargetForDefaultProfile(manager, updatesNeeded)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 5)
+
+    onExecutorAddedDefaultProfile(manager, "first")
+    onExecutorAddedDefaultProfile(manager, "second")
+    onExecutorAddedDefaultProfile(manager, "third")
+    onExecutorAddedDefaultProfile(manager, "fourth")
+    onExecutorAddedDefaultProfile(manager, "fifth")
+    onExecutorAddedDefaultProfile(manager, "sixth")
+    onExecutorAddedDefaultProfile(manager, "seventh")
+    onExecutorAddedDefaultProfile(manager, "eighth")
     assert(manager.executorMonitor.executorCount === 8)
 
-    removeExecutor(manager, "first")
-    removeExecutors(manager, Seq("second", "third"))
+    removeExecutorDefaultProfile(manager, "first")
+    removeExecutorsDefaultProfile(manager, Seq("second", "third"))
     assert(executorsPendingToRemove(manager) === Set("first", "second", "third"))
     assert(manager.executorMonitor.executorCount === 8)
 
@@ -917,8 +1324,8 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
 
     manager.reset()
 
-    assert(numExecutorsTarget(manager) === 1)
-    assert(numExecutorsToAdd(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
+    assert(numExecutorsToAddForDefaultProfile(manager) === 1)
     assert(executorsPendingToRemove(manager) === Set.empty)
     assert(manager.executorMonitor.executorCount === 0)
   }
@@ -929,31 +1336,31 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
       createConf(1, 2, 1).set(config.DYN_ALLOCATION_TESTING, false),
       clock = clock)
 
-    when(client.requestTotalExecutors(meq(2), any(), any())).thenReturn(true)
+    when(client.requestTotalExecutors(any(), any(), any())).thenReturn(true)
     // test setup -- job with 2 tasks, scale up to two executors
-    assert(numExecutorsTarget(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
     post(SparkListenerExecutorAdded(
       clock.getTimeMillis(), "executor-1", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     post(SparkListenerStageSubmitted(createStageInfo(0, 2)))
     clock.advance(1000)
-    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.getTimeMillis())
-    assert(numExecutorsTarget(manager) === 2)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 2)
     val taskInfo0 = createTaskInfo(0, 0, "executor-1")
     post(SparkListenerTaskStart(0, 0, taskInfo0))
     post(SparkListenerExecutorAdded(
       clock.getTimeMillis(), "executor-2", new ExecutorInfo("host1", 1, Map.empty, Map.empty)))
     val taskInfo1 = createTaskInfo(1, 1, "executor-2")
     post(SparkListenerTaskStart(0, 0, taskInfo1))
-    assert(numExecutorsTarget(manager) === 2)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 2)
 
     // have one task finish -- we should adjust the target number of executors down
     // but we should *not* kill any executors yet
     post(SparkListenerTaskEnd(0, 0, null, Success, taskInfo0, new ExecutorMetrics, null))
-    assert(maxNumExecutorsNeeded(manager) === 1)
-    assert(numExecutorsTarget(manager) === 2)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 2)
     clock.advance(1000)
-    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.getTimeMillis())
-    assert(numExecutorsTarget(manager) === 1)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
     verify(client, never).killExecutors(any(), any(), any(), any())
 
     // now we cross the idle timeout for executor-1, so we kill it.  the really important
@@ -963,8 +1370,8 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
       .thenReturn(Seq("executor-1"))
     clock.advance(3000)
     schedule(manager)
-    assert(maxNumExecutorsNeeded(manager) === 1)
-    assert(numExecutorsTarget(manager) === 1)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
     // here's the important verify -- we did kill the executors, but did not adjust the target count
     verify(client).killExecutors(Seq("executor-1"), false, false, false)
   }
@@ -972,7 +1379,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
   test("SPARK-26758 check executor target number after idle time out ") {
     val clock = new ManualClock(10000L)
     val manager = createManager(createConf(1, 5, 3), clock = clock)
-    assert(numExecutorsTarget(manager) === 3)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 3)
     post(SparkListenerExecutorAdded(
       clock.getTimeMillis(), "executor-1", new ExecutorInfo("host1", 1, Map.empty)))
     post(SparkListenerExecutorAdded(
@@ -983,14 +1390,14 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     clock.advance(executorIdleTimeout * 1000)
     schedule(manager)
     // once the schedule is run target executor number should be 1
-    assert(numExecutorsTarget(manager) === 1)
+    assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
   }
 
   private def createConf(
       minExecutors: Int = 1,
       maxExecutors: Int = 5,
       initialExecutors: Int = 1): SparkConf = {
-    new SparkConf()
+    val sparkConf = new SparkConf()
       .set(config.DYN_ALLOCATION_ENABLED, true)
       .set(config.DYN_ALLOCATION_MIN_EXECUTORS, minExecutors)
       .set(config.DYN_ALLOCATION_MAX_EXECUTORS, maxExecutors)
@@ -1005,19 +1412,37 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
       // SPARK-22864: effectively disable the allocation schedule by setting the period to a
       // really long value.
       .set(TEST_SCHEDULE_INTERVAL, 10000L)
+    sparkConf
   }
 
   private def createManager(
       conf: SparkConf,
       clock: Clock = new SystemClock()): ExecutorAllocationManager = {
-    val manager = new ExecutorAllocationManager(client, listenerBus, conf, clock = clock)
+    ResourceProfile.reInitDefaultProfile(conf)
+    rpManager = new ResourceProfileManager(conf)
+    val manager = new ExecutorAllocationManager(client, listenerBus, conf, clock = clock,
+      resourceProfileManager = rpManager)
     managers += manager
     manager.start()
     manager
   }
 
-  private def onExecutorAdded(manager: ExecutorAllocationManager, id: String): Unit = {
-    post(SparkListenerExecutorAdded(0L, id, null))
+  private val execInfo = new ExecutorInfo("host1", 1, Map.empty,
+    Map.empty, Map.empty, DEFAULT_RESOURCE_PROFILE_ID)
+
+  private def onExecutorAddedDefaultProfile(
+      manager: ExecutorAllocationManager,
+      id: String): Unit = {
+    post(SparkListenerExecutorAdded(0L, id, execInfo))
+  }
+
+  private def onExecutorAdded(
+      manager: ExecutorAllocationManager,
+      id: String,
+      rp: ResourceProfile): Unit = {
+    val cores = rp.getExecutorCores.getOrElse(1)
+    val execInfo = new ExecutorInfo("host1", cores, Map.empty, Map.empty, Map.empty, rp.id)
+    post(SparkListenerExecutorAdded(0L, id, execInfo))
   }
 
   private def onExecutorRemoved(manager: ExecutorAllocationManager, id: String): Unit = {
@@ -1035,8 +1460,18 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     post(SparkListenerTaskEnd(1, 1, "foo", Success, info, new ExecutorMetrics, null))
   }
 
-  private def removeExecutor(manager: ExecutorAllocationManager, executorId: String): Boolean = {
-    val executorsRemoved = removeExecutors(manager, Seq(executorId))
+  private def removeExecutorDefaultProfile(
+      manager: ExecutorAllocationManager,
+      executorId: String): Boolean = {
+    val executorsRemoved = removeExecutorsDefaultProfile(manager, Seq(executorId))
+    executorsRemoved.nonEmpty && executorsRemoved(0) == executorId
+  }
+
+  private def removeExecutor(
+      manager: ExecutorAllocationManager,
+      executorId: String,
+      rpId: Int): Boolean = {
+    val executorsRemoved = removeExecutors(manager, Seq((executorId, rpId)))
     executorsRemoved.nonEmpty && executorsRemoved(0) == executorId
   }
 
@@ -1058,10 +1493,11 @@ private object ExecutorAllocationManagerSuite extends PrivateMethodTester {
       stageId: Int,
       numTasks: Int,
       taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty,
-      attemptId: Int = 0
+      attemptId: Int = 0,
+      rp: ResourceProfile = defaultProfile
     ): StageInfo = {
     new StageInfo(stageId, attemptId, "name", numTasks, Seq.empty, Seq.empty, "no details",
-      taskLocalityPreferences = taskLocalityPreferences)
+      taskLocalityPreferences = taskLocalityPreferences, resourceProfileId = rp.id)
   }
 
   private def createTaskInfo(
@@ -1076,52 +1512,117 @@ private object ExecutorAllocationManagerSuite extends PrivateMethodTester {
    | Helper methods for accessing private methods and fields |
    * ------------------------------------------------------- */
 
-  private val _numExecutorsToAdd = PrivateMethod[Int]('numExecutorsToAdd)
-  private val _numExecutorsTarget = PrivateMethod[Int]('numExecutorsTarget)
-  private val _maxNumExecutorsNeeded = PrivateMethod[Int]('maxNumExecutorsNeeded)
-  private val _addTime = PrivateMethod[Long]('addTime)
-  private val _schedule = PrivateMethod[Unit]('schedule)
-  private val _addExecutors = PrivateMethod[Int]('addExecutors)
+  private val _numExecutorsToAddPerResourceProfileId =
+    PrivateMethod[mutable.HashMap[Int, Int]](
+      Symbol("numExecutorsToAddPerResourceProfileId"))
+  private val _numExecutorsTargetPerResourceProfileId =
+    PrivateMethod[mutable.HashMap[Int, Int]](
+      Symbol("numExecutorsTargetPerResourceProfileId"))
+  private val _maxNumExecutorsNeededPerResourceProfile =
+    PrivateMethod[Int](Symbol("maxNumExecutorsNeededPerResourceProfile"))
+  private val _addTime = PrivateMethod[Long](Symbol("addTime"))
+  private val _schedule = PrivateMethod[Unit](Symbol("schedule"))
+  private val _doUpdateRequest = PrivateMethod[Unit](Symbol("doUpdateRequest"))
   private val _updateAndSyncNumExecutorsTarget =
-    PrivateMethod[Int]('updateAndSyncNumExecutorsTarget)
-  private val _removeExecutors = PrivateMethod[Seq[String]]('removeExecutors)
-  private val _onSchedulerBacklogged = PrivateMethod[Unit]('onSchedulerBacklogged)
-  private val _onSchedulerQueueEmpty = PrivateMethod[Unit]('onSchedulerQueueEmpty)
-  private val _localityAwareTasks = PrivateMethod[Int]('localityAwareTasks)
-  private val _hostToLocalTaskCount = PrivateMethod[Map[String, Int]]('hostToLocalTaskCount)
-  private val _onSpeculativeTaskSubmitted = PrivateMethod[Unit]('onSpeculativeTaskSubmitted)
-  private val _totalRunningTasks = PrivateMethod[Int]('totalRunningTasks)
+    PrivateMethod[Int](Symbol("updateAndSyncNumExecutorsTarget"))
+  private val _addExecutorsToTarget = PrivateMethod[Int](Symbol("addExecutorsToTarget"))
+  private val _removeExecutors = PrivateMethod[Seq[String]](Symbol("removeExecutors"))
+  private val _onSchedulerBacklogged = PrivateMethod[Unit](Symbol("onSchedulerBacklogged"))
+  private val _onSchedulerQueueEmpty = PrivateMethod[Unit](Symbol("onSchedulerQueueEmpty"))
+  private val _localityAwareTasksPerResourceProfileId =
+    PrivateMethod[mutable.HashMap[Int, Int]](Symbol("numLocalityAwareTasksPerResourceProfileId"))
+  private val _rpIdToHostToLocalTaskCount =
+    PrivateMethod[Map[Int, Map[String, Int]]](Symbol("rpIdToHostToLocalTaskCount"))
+  private val _onSpeculativeTaskSubmitted =
+    PrivateMethod[Unit](Symbol("onSpeculativeTaskSubmitted"))
+  private val _totalRunningTasksPerResourceProfile =
+    PrivateMethod[Int](Symbol("totalRunningTasksPerResourceProfile"))
+
+  private val defaultProfile = ResourceProfile.getOrCreateDefaultProfile(new SparkConf)
+
+  private def numExecutorsToAddForDefaultProfile(manager: ExecutorAllocationManager): Int = {
+    numExecutorsToAdd(manager, defaultProfile)
+  }
+
+  private def numExecutorsToAdd(
+      manager: ExecutorAllocationManager,
+      rp: ResourceProfile): Int = {
+    val nmap = manager invokePrivate _numExecutorsToAddPerResourceProfileId()
+    nmap(rp.id)
+  }
+
+  private def updateAndSyncNumExecutorsTarget(
+      manager: ExecutorAllocationManager,
+      now: Long): Unit = {
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(now)
+  }
 
-  private def numExecutorsToAdd(manager: ExecutorAllocationManager): Int = {
-    manager invokePrivate _numExecutorsToAdd()
+  private def numExecutorsTargetForDefaultProfileId(manager: ExecutorAllocationManager): Int = {
+    numExecutorsTarget(manager, defaultProfile.id)
   }
 
-  private def numExecutorsTarget(manager: ExecutorAllocationManager): Int = {
-    manager invokePrivate _numExecutorsTarget()
+  private def numExecutorsTarget(
+      manager: ExecutorAllocationManager,
+      rpId: Int): Int = {
+    val numMap = manager invokePrivate _numExecutorsTargetPerResourceProfileId()
+    numMap(rpId)
+  }
+
+  private def addExecutorsToTargetForDefaultProfile(
+      manager: ExecutorAllocationManager,
+      updatesNeeded: mutable.HashMap[ResourceProfile,
+        ExecutorAllocationManager.TargetNumUpdates]
+  ): Int = {
+    addExecutorsToTarget(manager, updatesNeeded, defaultProfile)
+  }
+
+  private def addExecutorsToTarget(
+      manager: ExecutorAllocationManager,
+      updatesNeeded: mutable.HashMap[ResourceProfile,
+        ExecutorAllocationManager.TargetNumUpdates],
+      rp: ResourceProfile
+  ): Int = {
+    val maxNumExecutorsNeeded =
+      manager invokePrivate _maxNumExecutorsNeededPerResourceProfile(rp.id)
+    manager invokePrivate
+      _addExecutorsToTarget(maxNumExecutorsNeeded, rp.id, updatesNeeded)
   }
 
   private def addTime(manager: ExecutorAllocationManager): Long = {
     manager invokePrivate _addTime()
   }
 
-  private def schedule(manager: ExecutorAllocationManager): Unit = {
-    manager invokePrivate _schedule()
+  private def doUpdateRequest(
+      manager: ExecutorAllocationManager,
+      updates: Map[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates],
+      now: Long): Unit = {
+    manager invokePrivate _doUpdateRequest(updates, now)
   }
 
-  private def maxNumExecutorsNeeded(manager: ExecutorAllocationManager): Int = {
-    manager invokePrivate _maxNumExecutorsNeeded()
+  private def schedule(manager: ExecutorAllocationManager): Unit = {
+    manager invokePrivate _schedule()
   }
 
-  private def addExecutors(manager: ExecutorAllocationManager): Int = {
-    val maxNumExecutorsNeeded = manager invokePrivate _maxNumExecutorsNeeded()
-    manager invokePrivate _addExecutors(maxNumExecutorsNeeded)
+  private def maxNumExecutorsNeededPerResourceProfile(
+      manager: ExecutorAllocationManager,
+      rp: ResourceProfile): Int = {
+    manager invokePrivate _maxNumExecutorsNeededPerResourceProfile(rp.id)
   }
 
   private def adjustRequestedExecutors(manager: ExecutorAllocationManager): Int = {
     manager invokePrivate _updateAndSyncNumExecutorsTarget(0L)
   }
 
-  private def removeExecutors(manager: ExecutorAllocationManager, ids: Seq[String]): Seq[String] = {
+  private def removeExecutorsDefaultProfile(
+      manager: ExecutorAllocationManager,
+      ids: Seq[String]): Seq[String] = {
+    val idsAndProfileIds = ids.map((_, defaultProfile.id))
+    manager invokePrivate _removeExecutors(idsAndProfileIds)
+  }
+
+  private def removeExecutors(
+      manager: ExecutorAllocationManager,
+      ids: Seq[(String, Int)]): Seq[String] = {
     manager invokePrivate _removeExecutors(ids)
   }
 
@@ -1137,15 +1638,22 @@ private object ExecutorAllocationManagerSuite extends PrivateMethodTester {
     manager invokePrivate _onSpeculativeTaskSubmitted(id)
   }
 
-  private def localityAwareTasks(manager: ExecutorAllocationManager): Int = {
-    manager invokePrivate _localityAwareTasks()
+  private def localityAwareTasksForDefaultProfile(manager: ExecutorAllocationManager): Int = {
+    val localMap = manager invokePrivate _localityAwareTasksPerResourceProfileId()
+    localMap(defaultProfile.id)
+  }
+
+  private def totalRunningTasksPerResourceProfile(manager: ExecutorAllocationManager): Int = {
+    manager invokePrivate _totalRunningTasksPerResourceProfile(defaultProfile.id)
   }
 
-  private def totalRunningTasks(manager: ExecutorAllocationManager): Int = {
-    manager invokePrivate _totalRunningTasks()
+  private def hostToLocalTaskCount(
+      manager: ExecutorAllocationManager): Map[String, Int] = {
+    val rpIdToHostLocal = manager invokePrivate _rpIdToHostToLocalTaskCount()
+    rpIdToHostLocal(defaultProfile.id)
   }
 
-  private def hostToLocalTaskCount(manager: ExecutorAllocationManager): Map[String, Int] = {
-    manager invokePrivate _hostToLocalTaskCount()
+  private def getResourceProfileIdOfExecutor(manager: ExecutorAllocationManager): Int = {
+    defaultProfile.id
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
index 7f7f3db65d6ca..c217419f4092e 100644
--- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
@@ -40,7 +40,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
   var transportContext: TransportContext = _
   var rpcHandler: ExternalBlockHandler = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle", numUsableCores = 2)
     rpcHandler = new ExternalBlockHandler(transportConf, null)
@@ -52,7 +52,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
     conf.set(config.SHUFFLE_SERVICE_PORT, server.getPort)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     Utils.tryLogNonFatalError{
       server.close()
     }
@@ -68,6 +68,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
   // This test ensures that the external shuffle service is actually in use for the other tests.
   test("using external shuffle service") {
     sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
+    sc.getConf.get(config.SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED) should equal(false)
     sc.env.blockManager.externalShuffleServiceEnabled should equal(true)
     sc.env.blockManager.blockStoreClient.getClass should equal(classOf[ExternalBlockStoreClient])
 
@@ -79,7 +80,9 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
     // Therefore, we should wait until all slaves are up
     TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
 
-    val rdd = sc.parallelize(0 until 1000, 10).map(i => (i, 1)).reduceByKey(_ + _)
+    val rdd = sc.parallelize(0 until 1000, 10)
+      .map { i => (i, 1) }
+      .reduceByKey(_ + _)
 
     rdd.count()
     rdd.count()
@@ -96,6 +99,50 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
     e.getMessage should include ("Fetch failure will not retry stage due to testing config")
   }
 
+  test("SPARK-27651: read host local shuffle blocks from disk and avoid network remote fetches") {
+    val confWithHostLocalRead =
+      conf.clone.set(config.SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED, true)
+    confWithHostLocalRead.set(config.STORAGE_LOCAL_DISK_BY_EXECUTORS_CACHE_SIZE, 5)
+    sc = new SparkContext("local-cluster[2,1,1024]", "test", confWithHostLocalRead)
+    sc.getConf.get(config.SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED) should equal(true)
+    sc.env.blockManager.externalShuffleServiceEnabled should equal(true)
+    sc.env.blockManager.hostLocalDirManager.isDefined should equal(true)
+    sc.env.blockManager.blockStoreClient.getClass should equal(classOf[ExternalBlockStoreClient])
+
+    // In a slow machine, one slave may register hundreds of milliseconds ahead of the other one.
+    // If we don't wait for all slaves, it's possible that only one executor runs all jobs. Then
+    // all shuffle blocks will be in this executor, ShuffleBlockFetcherIterator will directly fetch
+    // local blocks from the local BlockManager and won't send requests to ExternalShuffleService.
+    // In this case, we won't receive FetchFailed. And it will make this test fail.
+    // Therefore, we should wait until all slaves are up
+    TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
+
+    val rdd = sc.parallelize(0 until 1000, 10)
+      .map { i => (i, 1) }
+      .reduceByKey(_ + _)
+
+    rdd.count()
+    rdd.count()
+
+    val cachedExecutors = rdd.mapPartitions { _ =>
+      SparkEnv.get.blockManager.hostLocalDirManager.map { localDirManager =>
+        localDirManager.getCachedHostLocalDirs().keySet.iterator
+      }.getOrElse(Iterator.empty)
+    }.collect().toSet
+
+    // both executors are caching the dirs of the other one
+    cachedExecutors should equal(sc.getExecutorIds().toSet)
+
+    // Invalidate the registered executors, disallowing access to their shuffle blocks (without
+    // deleting the actual shuffle files, so we could access them without the shuffle service).
+    // As directories are already cached there is no request to external shuffle service.
+    rpcHandler.applicationRemoved(sc.conf.getAppId, false /* cleanupLocalDirs */)
+
+    // Now Spark will not receive FetchFailed as host local blocks are read from the cached local
+    // disk directly
+    rdd.collect().map(_._2).sum should equal(1000)
+  }
+
   test("SPARK-25888: using external shuffle service fetching disk persisted blocks") {
     val confWithRddFetchEnabled = conf.clone.set(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED, true)
     sc = new SparkContext("local-cluster[1,1,1024]", "test", confWithRddFetchEnabled)
diff --git a/core/src/test/scala/org/apache/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala
index 5f79b526a419b..8b75c3a0ba653 100644
--- a/core/src/test/scala/org/apache/spark/FailureSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala
@@ -31,7 +31,7 @@ object FailureSuiteState {
   var tasksRun = 0
   var tasksFailed = 0
 
-  def clear() {
+  def clear(): Unit = {
     synchronized {
       tasksRun = 0
       tasksFailed = 0
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 6651e38f7ed62..e9ee6b5dfb665 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -36,18 +36,19 @@ import org.apache.hadoop.mapreduce.lib.output.{TextOutputFormat => NewTextOutput
 
 import org.apache.spark.internal.config._
 import org.apache.spark.rdd.{HadoopRDD, NewHadoopRDD}
+import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.Utils
 
 class FileSuite extends SparkFunSuite with LocalSparkContext {
   var tempDir: File = _
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
     tempDir = Utils.createTempDir()
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       Utils.deleteRecursively(tempDir)
     } finally {
@@ -372,7 +373,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
 
   test ("prevent user from overwriting the empty directory (old Hadoop API)") {
     sc = new SparkContext("local", "test")
-    val randomRDD = sc.parallelize(Array((1, "a"), (1, "a"), (2, "b"), (3, "c")), 1)
+    val randomRDD = sc.parallelize(Seq((1, "a"), (1, "a"), (2, "b"), (3, "c")), 1)
     intercept[FileAlreadyExistsException] {
       randomRDD.saveAsTextFile(tempDir.getPath)
     }
@@ -380,7 +381,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
 
   test ("prevent user from overwriting the non-empty directory (old Hadoop API)") {
     sc = new SparkContext("local", "test")
-    val randomRDD = sc.parallelize(Array((1, "a"), (1, "a"), (2, "b"), (3, "c")), 1)
+    val randomRDD = sc.parallelize(Seq((1, "a"), (1, "a"), (2, "b"), (3, "c")), 1)
     randomRDD.saveAsTextFile(tempDir.getPath + "/output")
     assert(new File(tempDir.getPath + "/output/part-00000").exists())
     intercept[FileAlreadyExistsException] {
@@ -392,7 +393,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     val conf = new SparkConf()
     conf.setAppName("test").setMaster("local").set("spark.hadoop.validateOutputSpecs", "false")
     sc = new SparkContext(conf)
-    val randomRDD = sc.parallelize(Array((1, "a"), (1, "a"), (2, "b"), (3, "c")), 1)
+    val randomRDD = sc.parallelize(Seq((1, "a"), (1, "a"), (2, "b"), (3, "c")), 1)
     randomRDD.saveAsTextFile(tempDir.getPath + "/output")
     assert(new File(tempDir.getPath + "/output/part-00000").exists())
     randomRDD.saveAsTextFile(tempDir.getPath + "/output")
@@ -402,7 +403,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
   test ("prevent user from overwriting the empty directory (new Hadoop API)") {
     sc = new SparkContext("local", "test")
     val randomRDD = sc.parallelize(
-      Array(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
+      Seq(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
     intercept[FileAlreadyExistsException] {
       randomRDD.saveAsNewAPIHadoopFile[NewTextOutputFormat[String, String]](tempDir.getPath)
     }
@@ -411,7 +412,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
   test ("prevent user from overwriting the non-empty directory (new Hadoop API)") {
     sc = new SparkContext("local", "test")
     val randomRDD = sc.parallelize(
-      Array(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
+      Seq(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
     randomRDD.saveAsNewAPIHadoopFile[NewTextOutputFormat[String, String]](
       tempDir.getPath + "/output")
     assert(new File(tempDir.getPath + "/output/part-r-00000").exists())
@@ -425,7 +426,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     conf.setAppName("test").setMaster("local").set("spark.hadoop.validateOutputSpecs", "false")
     sc = new SparkContext(conf)
     val randomRDD = sc.parallelize(
-      Array(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
+      Seq(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
     randomRDD.saveAsNewAPIHadoopFile[NewTextOutputFormat[String, String]](
       tempDir.getPath + "/output")
     assert(new File(tempDir.getPath + "/output/part-r-00000").exists())
@@ -437,7 +438,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
   test ("save Hadoop Dataset through old Hadoop API") {
     sc = new SparkContext("local", "test")
     val randomRDD = sc.parallelize(
-      Array(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
+      Seq(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
     val job = new JobConf()
     job.setOutputKeyClass(classOf[String])
     job.setOutputValueClass(classOf[String])
@@ -450,7 +451,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
   test ("save Hadoop Dataset through new Hadoop API") {
     sc = new SparkContext("local", "test")
     val randomRDD = sc.parallelize(
-      Array(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
+      Seq(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1)
     val job = Job.getInstance(sc.hadoopConfiguration)
     job.setOutputKeyClass(classOf[String])
     job.setOutputValueClass(classOf[String])
@@ -559,7 +560,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     sc = new SparkContext(conf)
 
     def testIgnoreEmptySplits(
-        data: Array[Tuple2[String, String]],
+        data: Seq[Tuple2[String, String]],
         actualPartitionNum: Int,
         expectedPartitionNum: Int): Unit = {
       val output = new File(tempDir, "output")
@@ -581,13 +582,13 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
 
     // Ensure that if no split is empty, we don't lose any splits
     testIgnoreEmptySplits(
-      data = Array(("key1", "a"), ("key2", "a"), ("key3", "b")),
+      data = Seq(("key1", "a"), ("key2", "a"), ("key3", "b")),
       actualPartitionNum = 2,
       expectedPartitionNum = 2)
 
     // Ensure that if part of the splits are empty, we remove the splits correctly
     testIgnoreEmptySplits(
-      data = Array(("key1", "a"), ("key2", "a")),
+      data = Seq(("key1", "a"), ("key2", "a")),
       actualPartitionNum = 5,
       expectedPartitionNum = 2)
   }
@@ -600,7 +601,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     sc = new SparkContext(conf)
 
     def testIgnoreEmptySplits(
-        data: Array[Tuple2[String, String]],
+        data: Seq[Tuple2[String, String]],
         actualPartitionNum: Int,
         expectedPartitionNum: Int): Unit = {
       val output = new File(tempDir, "output")
@@ -624,13 +625,13 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
 
     // Ensure that if no split is empty, we don't lose any splits
     testIgnoreEmptySplits(
-      data = Array(("1", "a"), ("2", "a"), ("3", "b")),
+      data = Seq(("1", "a"), ("2", "a"), ("3", "b")),
       actualPartitionNum = 2,
       expectedPartitionNum = 2)
 
     // Ensure that if part of the splits are empty, we remove the splits correctly
     testIgnoreEmptySplits(
-      data = Array(("1", "a"), ("2", "b")),
+      data = Seq(("1", "a"), ("2", "b")),
       actualPartitionNum = 5,
       expectedPartitionNum = 2)
   }
@@ -700,4 +701,40 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
 
     assert(collectRDDAndDeleteFileBeforeCompute(true).isEmpty)
   }
+
+  test("SPARK-25100: Support commit tasks when Kyro registration is required") {
+    // Prepare the input file
+    val inputFilePath = new File(tempDir, "/input").getAbsolutePath
+    Utils.tryWithResource(new PrintWriter(new File(inputFilePath))) { writer =>
+      for (i <- 1 to 3) {
+        writer.print(i)
+        writer.write('\n')
+      }
+    }
+
+    // Start a new SparkContext
+    val conf = new SparkConf(false)
+      .setMaster("local")
+      .setAppName("test")
+      .set("spark.kryo.registrationRequired", "true")
+      .set("spark.serializer", classOf[KryoSerializer].getName)
+    sc = new SparkContext(conf)
+
+    // Prepare the input RDD
+    val pairRDD = sc.textFile(inputFilePath).map(x => (x, x))
+
+    // Test saveAsTextFile()
+    val outputFilePath1 = new File(tempDir, "/out1").getAbsolutePath
+    pairRDD.saveAsTextFile(outputFilePath1)
+    assert(sc.textFile(outputFilePath1).collect() === Array("(1,1)", "(2,2)", "(3,3)"))
+
+    // Test saveAsNewAPIHadoopDataset()
+    val outputFilePath2 = new File(tempDir, "/out2").getAbsolutePath
+    val jobConf = new JobConf()
+    jobConf.setOutputKeyClass(classOf[IntWritable])
+    jobConf.setOutputValueClass(classOf[IntWritable])
+    jobConf.set("mapred.output.dir", outputFilePath2)
+    pairRDD.saveAsNewAPIHadoopDataset(jobConf)
+    assert(sc.textFile(outputFilePath2).collect() === Array("1\t1", "2\t2", "3\t3"))
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
index dfe33b1e52695..a9296955d18b4 100644
--- a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
@@ -26,9 +26,11 @@ import scala.concurrent.duration._
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{mock, spy, verify, when}
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
+import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.config.DYN_ALLOCATION_TESTING
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -55,9 +57,10 @@ class HeartbeatReceiverSuite
   private var heartbeatReceiverClock: ManualClock = null
 
   // Helper private method accessors for HeartbeatReceiver
-  private val _executorLastSeen = PrivateMethod[collection.Map[String, Long]]('executorLastSeen)
-  private val _executorTimeoutMs = PrivateMethod[Long]('executorTimeoutMs)
-  private val _killExecutorThread = PrivateMethod[ExecutorService]('killExecutorThread)
+  private val _executorLastSeen =
+    PrivateMethod[collection.Map[String, Long]](Symbol("executorLastSeen"))
+  private val _executorTimeoutMs = PrivateMethod[Long](Symbol("executorTimeoutMs"))
+  private val _killExecutorThread = PrivateMethod[ExecutorService](Symbol("killExecutorThread"))
 
   /**
    * Before each test, set up the SparkContext and a custom [[HeartbeatReceiver]]
@@ -73,6 +76,7 @@ class HeartbeatReceiverSuite
     scheduler = mock(classOf[TaskSchedulerImpl])
     when(sc.taskScheduler).thenReturn(scheduler)
     when(scheduler.nodeBlacklist).thenReturn(Predef.Set[String]())
+    when(scheduler.resourcesReqsPerTask).thenReturn(Seq.empty)
     when(scheduler.sc).thenReturn(sc)
     heartbeatReceiverClock = new ManualClock
     heartbeatReceiver = new HeartbeatReceiver(sc, heartbeatReceiverClock)
@@ -151,7 +155,6 @@ class HeartbeatReceiverSuite
     heartbeatReceiverClock.advance(executorTimeout)
     heartbeatReceiverRef.askSync[Boolean](ExpireDeadHosts)
     // Only the second executor should be expired as a dead host
-    verify(scheduler).executorLost(meq(executorId2), any())
     val trackedExecutors = getTrackedExecutors
     assert(trackedExecutors.size === 1)
     assert(trackedExecutors.contains(executorId1))
@@ -175,10 +178,10 @@ class HeartbeatReceiverSuite
     val dummyExecutorEndpointRef2 = rpcEnv.setupEndpoint("fake-executor-2", dummyExecutorEndpoint2)
     fakeSchedulerBackend.driverEndpoint.askSync[Boolean](
       RegisterExecutor(executorId1, dummyExecutorEndpointRef1, "1.2.3.4", 0, Map.empty, Map.empty,
-        Map.empty))
+        Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     fakeSchedulerBackend.driverEndpoint.askSync[Boolean](
       RegisterExecutor(executorId2, dummyExecutorEndpointRef2, "1.2.3.5", 0, Map.empty, Map.empty,
-        Map.empty))
+        Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     heartbeatReceiverRef.askSync[Boolean](TaskSchedulerIsSet)
     addExecutorAndVerify(executorId1)
     addExecutorAndVerify(executorId2)
@@ -207,6 +210,12 @@ class HeartbeatReceiverSuite
     // explicitly request new executors. For more detail, see SPARK-8119.
     assert(fakeClusterManager.getTargetNumExecutors === 2)
     assert(fakeClusterManager.getExecutorIdsToKill === Set(executorId1, executorId2))
+    // [SPARK-27348] HeartbeatReceiver should remove lost executor from scheduler backend
+    eventually(timeout(5.seconds)) {
+      assert(!fakeSchedulerBackend.getExecutorIds().contains(executorId1))
+      assert(!fakeSchedulerBackend.getExecutorIds().contains(executorId2))
+    }
+    fakeSchedulerBackend.stop()
   }
 
   /** Manually send a heartbeat and return the response. */
@@ -276,9 +285,14 @@ private class FakeSchedulerBackend(
     clusterManagerEndpoint: RpcEndpointRef)
   extends CoarseGrainedSchedulerBackend(scheduler, rpcEnv) {
 
-  protected override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = {
+  protected override def doRequestTotalExecutors(
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Future[Boolean] = {
     clusterManagerEndpoint.ask[Boolean](
-      RequestExecutors(requestedTotal, localityAwareTasks, hostToLocalTaskCount, Set.empty))
+      RequestExecutors(
+        resourceProfileToTotalExecs(ResourceProfile.getOrCreateDefaultProfile(conf)),
+        numLocalityAwareTasksPerResourceProfileId(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID),
+        rpHostToLocalTaskCount(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID),
+        Set.empty))
   }
 
   protected override def doKillExecutors(executorIds: Seq[String]): Future[Boolean] = {
diff --git a/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala
index e7eef8ec5150c..5399d868f46f1 100644
--- a/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala
@@ -90,7 +90,7 @@ class InternalAccumulatorSuite extends SparkFunSuite with LocalSparkContext {
         TaskContext.get().taskMetrics().testAccum.get.add(1)
         iter
       }
-      .reduceByKey { case (x, y) => x + y }
+      .reduceByKey { (x, y) => x + y }
       .mapPartitions { iter =>
         TaskContext.get().taskMetrics().testAccum.get.add(10)
         iter
@@ -142,6 +142,7 @@ class InternalAccumulatorSuite extends SparkFunSuite with LocalSparkContext {
           sid,
           taskContext.partitionId(),
           taskContext.partitionId(),
+          taskContext.partitionId(),
           "simulated fetch failure")
       } else {
         iter
@@ -210,7 +211,8 @@ class InternalAccumulatorSuite extends SparkFunSuite with LocalSparkContext {
   /**
    * A special [[ContextCleaner]] that saves the IDs of the accumulators registered for cleanup.
    */
-  private class SaveAccumContextCleaner(sc: SparkContext) extends ContextCleaner(sc) {
+  private class SaveAccumContextCleaner(sc: SparkContext) extends
+      ContextCleaner(sc, null) {
     private val accumsRegistered = new ArrayBuffer[Long]
 
     override def registerAccumulatorForCleanup(a: AccumulatorV2[_, _]): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index b533304287cf6..94ad8d8880027 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -40,7 +40,7 @@ import org.apache.spark.util.ThreadUtils
 class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAfter
   with LocalSparkContext {
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       resetSparkContext()
       JobCancellationSuite.taskStartedSemaphore.drainPermits()
@@ -127,7 +127,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -157,7 +157,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -192,7 +192,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -225,7 +225,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -264,7 +264,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -301,7 +301,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
 
     sc = new SparkContext("local[2]", "test")
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem1.release()
       }
     })
@@ -391,7 +391,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     assert(executionOfInterruptibleCounter.get() < numElements)
  }
 
-  def testCount() {
+  def testCount(): Unit = {
     // Cancel before launching any tasks
     {
       val f = sc.parallelize(1 to 10000, 2).map { i => Thread.sleep(10); i }.countAsync()
@@ -405,7 +405,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
       // Add a listener to release the semaphore once any tasks are launched.
       val sem = new Semaphore(0)
       sc.addSparkListener(new SparkListener {
-        override def onTaskStart(taskStart: SparkListenerTaskStart) {
+        override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
           sem.release()
         }
       })
@@ -421,7 +421,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     }
   }
 
-  def testTake() {
+  def testTake(): Unit = {
     // Cancel before launching any tasks
     {
       val f = sc.parallelize(1 to 10000, 2).map { i => Thread.sleep(10); i }.takeAsync(5000)
@@ -435,7 +435,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
       // Add a listener to release the semaphore once any tasks are launched.
       val sem = new Semaphore(0)
       sc.addSparkListener(new SparkListener {
-        override def onTaskStart(taskStart: SparkListenerTaskStart) {
+        override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
           sem.release()
         }
       })
diff --git a/core/src/test/scala/org/apache/spark/JsonTestUtils.scala b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
index ba367cd476146..8aa7f3c7cb1bf 100644
--- a/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
+++ b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
@@ -20,7 +20,7 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods
 
 trait JsonTestUtils {
-  def assertValidDataInJson(validateJson: JValue, expectedJson: JValue) {
+  def assertValidDataInJson(validateJson: JValue, expectedJson: JValue): Unit = {
     val Diff(c, a, d) = validateJson.diff(expectedJson)
     val validatePretty = JsonMethods.pretty(validateJson)
     val expectedPretty = JsonMethods.pretty(expectedJson)
diff --git a/core/src/test/scala/org/apache/spark/LocalSparkContext.scala b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
index 05aaaa11451b4..599ea8955491f 100644
--- a/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
@@ -22,17 +22,19 @@ import org.scalatest.BeforeAndAfterAll
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.Suite
 
+import org.apache.spark.resource.ResourceProfile
+
 /** Manages a local `sc` `SparkContext` variable, correctly stopping it after each test. */
 trait LocalSparkContext extends BeforeAndAfterEach with BeforeAndAfterAll { self: Suite =>
 
   @transient var sc: SparkContext = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     InternalLoggerFactory.setDefaultFactory(Slf4JLoggerFactory.INSTANCE)
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       resetSparkContext()
     } finally {
@@ -42,13 +44,14 @@ trait LocalSparkContext extends BeforeAndAfterEach with BeforeAndAfterAll { self
 
   def resetSparkContext(): Unit = {
     LocalSparkContext.stop(sc)
+    ResourceProfile.clearDefaultProfile()
     sc = null
   }
 
 }
 
 object LocalSparkContext {
-  def stop(sc: SparkContext) {
+  def stop(sc: SparkContext): Unit = {
     if (sc != null) {
       sc.stop()
     }
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index d86975964b558..d5ee19bde8edf 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -64,14 +64,15 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
     val size10000 = MapStatus.decompressSize(MapStatus.compressSize(10000L))
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-        Array(1000L, 10000L)))
+        Array(1000L, 10000L), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
-        Array(10000L, 1000L)))
+        Array(10000L, 1000L), 6))
     val statuses = tracker.getMapSizesByExecutorId(10, 0)
     assert(statuses.toSet ===
-      Seq((BlockManagerId("a", "hostA", 1000), ArrayBuffer((ShuffleBlockId(10, 0, 0), size1000))),
-          (BlockManagerId("b", "hostB", 1000), ArrayBuffer((ShuffleBlockId(10, 1, 0), size10000))))
-        .toSet)
+      Seq((BlockManagerId("a", "hostA", 1000),
+        ArrayBuffer((ShuffleBlockId(10, 5, 0), size1000, 0))),
+          (BlockManagerId("b", "hostB", 1000),
+            ArrayBuffer((ShuffleBlockId(10, 6, 0), size10000, 1)))).toSet)
     assert(0 == tracker.getNumCachedSerializedBroadcast)
     tracker.stop()
     rpcEnv.shutdown()
@@ -86,9 +87,9 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val compressedSize1000 = MapStatus.compressSize(1000L)
     val compressedSize10000 = MapStatus.compressSize(10000L)
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-      Array(compressedSize1000, compressedSize10000)))
+      Array(compressedSize1000, compressedSize10000), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
-      Array(compressedSize10000, compressedSize1000)))
+      Array(compressedSize10000, compressedSize1000), 6))
     assert(tracker.containsShuffle(10))
     assert(tracker.getMapSizesByExecutorId(10, 0).nonEmpty)
     assert(0 == tracker.getNumCachedSerializedBroadcast)
@@ -109,9 +110,9 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val compressedSize1000 = MapStatus.compressSize(1000L)
     val compressedSize10000 = MapStatus.compressSize(10000L)
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-        Array(compressedSize1000, compressedSize1000, compressedSize1000)))
+        Array(compressedSize1000, compressedSize1000, compressedSize1000), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
-        Array(compressedSize10000, compressedSize1000, compressedSize1000)))
+        Array(compressedSize10000, compressedSize1000, compressedSize1000), 6))
 
     assert(0 == tracker.getNumCachedSerializedBroadcast)
     // As if we had two simultaneous fetch failures
@@ -147,10 +148,11 @@ class MapOutputTrackerSuite extends SparkFunSuite {
 
     val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
     masterTracker.registerMapOutput(10, 0, MapStatus(
-      BlockManagerId("a", "hostA", 1000), Array(1000L)))
+      BlockManagerId("a", "hostA", 1000), Array(1000L), 5))
     slaveTracker.updateEpoch(masterTracker.getEpoch)
     assert(slaveTracker.getMapSizesByExecutorId(10, 0).toSeq ===
-      Seq((BlockManagerId("a", "hostA", 1000), ArrayBuffer((ShuffleBlockId(10, 0, 0), size1000)))))
+      Seq((BlockManagerId("a", "hostA", 1000),
+        ArrayBuffer((ShuffleBlockId(10, 5, 0), size1000, 0)))))
     assert(0 == masterTracker.getNumCachedSerializedBroadcast)
 
     val masterTrackerEpochBeforeLossOfMapOutput = masterTracker.getEpoch
@@ -184,7 +186,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     // Message size should be ~123B, and no exception should be thrown
     masterTracker.registerShuffle(10, 1)
     masterTracker.registerMapOutput(10, 0, MapStatus(
-      BlockManagerId("88", "mph", 1000), Array.fill[Long](10)(0)))
+      BlockManagerId("88", "mph", 1000), Array.fill[Long](10)(0), 5))
     val senderAddress = RpcAddress("localhost", 12345)
     val rpcCallContext = mock(classOf[RpcCallContext])
     when(rpcCallContext.senderAddress).thenReturn(senderAddress)
@@ -218,11 +220,11 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     // on hostB with output size 3
     tracker.registerShuffle(10, 3)
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-        Array(2L)))
+        Array(2L), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("a", "hostA", 1000),
-        Array(2L)))
+        Array(2L), 6))
     tracker.registerMapOutput(10, 2, MapStatus(BlockManagerId("b", "hostB", 1000),
-        Array(3L)))
+        Array(3L), 7))
 
     // When the threshold is 50%, only host A should be returned as a preferred location
     // as it has 4 out of 7 bytes of output.
@@ -262,7 +264,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
       masterTracker.registerShuffle(20, 100)
       (0 until 100).foreach { i =>
         masterTracker.registerMapOutput(20, i, new CompressedMapStatus(
-          BlockManagerId("999", "mps", 1000), Array.fill[Long](4000000)(0)))
+          BlockManagerId("999", "mps", 1000), Array.fill[Long](4000000)(0), 5))
       }
       val senderAddress = RpcAddress("localhost", 12345)
       val rpcCallContext = mock(classOf[RpcCallContext])
@@ -311,16 +313,18 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
     val size10000 = MapStatus.decompressSize(MapStatus.compressSize(10000L))
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-      Array(size0, size1000, size0, size10000)))
+      Array(size0, size1000, size0, size10000), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
-      Array(size10000, size0, size1000, size0)))
+      Array(size10000, size0, size1000, size0), 6))
     assert(tracker.containsShuffle(10))
     assert(tracker.getMapSizesByExecutorId(10, 0, 4).toSeq ===
         Seq(
           (BlockManagerId("a", "hostA", 1000),
-              Seq((ShuffleBlockId(10, 0, 1), size1000), (ShuffleBlockId(10, 0, 3), size10000))),
+              Seq((ShuffleBlockId(10, 5, 1), size1000, 0),
+                (ShuffleBlockId(10, 5, 3), size10000, 0))),
           (BlockManagerId("b", "hostB", 1000),
-              Seq((ShuffleBlockId(10, 1, 0), size10000), (ShuffleBlockId(10, 1, 2), size1000)))
+              Seq((ShuffleBlockId(10, 6, 0), size10000, 1),
+                (ShuffleBlockId(10, 6, 2), size1000, 1)))
         )
     )
 
diff --git a/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala b/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
new file mode 100644
index 0000000000000..78f1246295bf8
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.Assertions._
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.benchmark.BenchmarkBase
+import org.apache.spark.scheduler.CompressedMapStatus
+import org.apache.spark.storage.BlockManagerId
+
+/**
+ * Benchmark for MapStatuses serialization & deserialization performance.
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt: bin/spark-submit --class <this class> --jars <core test jar>
+ *   2. build/sbt "core/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      Results will be written to "benchmarks/MapStatusesSerDeserBenchmark-results.txt".
+ * }}}
+ */
+object MapStatusesSerDeserBenchmark extends BenchmarkBase {
+
+  var sc: SparkContext = null
+  var tracker: MapOutputTrackerMaster = null
+
+  def serDeserBenchmark(numMaps: Int, blockSize: Int, enableBroadcast: Boolean): Unit = {
+    val minBroadcastSize = if (enableBroadcast) {
+      0
+    } else {
+      Int.MaxValue
+    }
+
+    val benchmark = new Benchmark(s"$numMaps MapOutputs, $blockSize blocks " + {
+      if (enableBroadcast) "w/ " else "w/o "
+    } + "broadcast", numMaps, output = output)
+
+    val shuffleId = 10
+
+    tracker.registerShuffle(shuffleId, numMaps)
+    val r = new scala.util.Random(912)
+    (0 until numMaps).foreach { i =>
+      tracker.registerMapOutput(shuffleId, i,
+        new CompressedMapStatus(BlockManagerId(s"node$i", s"node$i.spark.apache.org", 1000),
+          Array.fill(blockSize) {
+            // Creating block size ranging from 0byte to 1GB
+            (r.nextDouble() * 1024 * 1024 * 1024).toLong
+          }, i))
+    }
+
+    val shuffleStatus = tracker.shuffleStatuses.get(shuffleId).head
+
+    var serializedMapStatusSizes = 0
+    var serializedBroadcastSizes = 0
+
+    val (serializedMapStatus, serializedBroadcast) = MapOutputTracker.serializeMapStatuses(
+      shuffleStatus.mapStatuses, tracker.broadcastManager, tracker.isLocal, minBroadcastSize,
+      sc.getConf)
+    serializedMapStatusSizes = serializedMapStatus.length
+    if (serializedBroadcast != null) {
+      serializedBroadcastSizes = serializedBroadcast.value.length
+    }
+
+    benchmark.addCase("Serialization") { _ =>
+      MapOutputTracker.serializeMapStatuses(shuffleStatus.mapStatuses, tracker.broadcastManager,
+        tracker.isLocal, minBroadcastSize, sc.getConf)
+    }
+
+    benchmark.addCase("Deserialization") { _ =>
+      val result = MapOutputTracker.deserializeMapStatuses(serializedMapStatus, sc.getConf)
+      assert(result.length == numMaps)
+    }
+
+    benchmark.run()
+    // scalastyle:off
+    import org.apache.commons.io.FileUtils
+    benchmark.out.println("Compressed Serialized MapStatus sizes: " +
+      FileUtils.byteCountToDisplaySize(serializedMapStatusSizes))
+    benchmark.out.println("Compressed Serialized Broadcast MapStatus sizes: " +
+      FileUtils.byteCountToDisplaySize(serializedBroadcastSizes) + "\n\n")
+    // scalastyle:on
+
+    tracker.unregisterShuffle(shuffleId)
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    createSparkContext()
+    tracker = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
+    val rpcEnv = sc.env.rpcEnv
+    val masterEndpoint = new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, sc.getConf)
+    rpcEnv.stop(tracker.trackerEndpoint)
+    rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, masterEndpoint)
+
+    serDeserBenchmark(200000, 10, true)
+    serDeserBenchmark(200000, 10, false)
+
+    serDeserBenchmark(200000, 100, true)
+    serDeserBenchmark(200000, 100, false)
+
+    serDeserBenchmark(200000, 1000, true)
+    serDeserBenchmark(200000, 1000, false)
+  }
+
+  def createSparkContext(): Unit = {
+    val conf = new SparkConf()
+    if (sc != null) {
+      sc.stop()
+    }
+    sc = new SparkContext("local", "MapStatusesSerializationBenchmark", conf)
+  }
+
+  override def afterAll(): Unit = {
+    tracker.stop()
+    if (sc != null) {
+      sc.stop()
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
index 9206b5debf4f3..1a3259c707025 100644
--- a/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
+++ b/core/src/test/scala/org/apache/spark/PartitioningSuite.scala
@@ -70,7 +70,7 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva
     // 1000 partitions.
     val partitionSizes = List(1, 2, 10, 100, 500, 1000, 1500)
     val partitioners = partitionSizes.map(p => (p, new RangePartitioner(p, rdd)))
-    val decoratedRangeBounds = PrivateMethod[Array[Int]]('rangeBounds)
+    val decoratedRangeBounds = PrivateMethod[Array[Int]](Symbol("rangeBounds"))
     partitioners.foreach { case (numPartitions, partitioner) =>
       val rangeBounds = partitioner.invokePrivate(decoratedRangeBounds())
       for (element <- 1 to 1000) {
@@ -262,11 +262,11 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva
 
   test("defaultPartitioner") {
     val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 150)
-    val rdd2 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4)))
+    val rdd2 = sc.parallelize(Seq((1, 2), (2, 3), (2, 4), (3, 4)))
       .partitionBy(new HashPartitioner(10))
-    val rdd3 = sc.parallelize(Array((1, 6), (7, 8), (3, 10), (5, 12), (13, 14)))
+    val rdd3 = sc.parallelize(Seq((1, 6), (7, 8), (3, 10), (5, 12), (13, 14)))
       .partitionBy(new HashPartitioner(100))
-    val rdd4 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4)))
+    val rdd4 = sc.parallelize(Seq((1, 2), (2, 3), (2, 4), (3, 4)))
       .partitionBy(new HashPartitioner(9))
     val rdd5 = sc.parallelize((1 to 10).map(x => (x, x)), 11)
 
@@ -289,14 +289,14 @@ class PartitioningSuite extends SparkFunSuite with SharedSparkContext with Priva
       sc.conf.set("spark.default.parallelism", "4")
 
       val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 150)
-      val rdd2 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4)))
+      val rdd2 = sc.parallelize(Seq((1, 2), (2, 3), (2, 4), (3, 4)))
         .partitionBy(new HashPartitioner(10))
-      val rdd3 = sc.parallelize(Array((1, 6), (7, 8), (3, 10), (5, 12), (13, 14)))
+      val rdd3 = sc.parallelize(Seq((1, 6), (7, 8), (3, 10), (5, 12), (13, 14)))
         .partitionBy(new HashPartitioner(100))
-      val rdd4 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4)))
+      val rdd4 = sc.parallelize(Seq((1, 2), (2, 3), (2, 4), (3, 4)))
         .partitionBy(new HashPartitioner(9))
       val rdd5 = sc.parallelize((1 to 10).map(x => (x, x)), 11)
-      val rdd6 = sc.parallelize(Array((1, 2), (2, 3), (2, 4), (3, 4)))
+      val rdd6 = sc.parallelize(Seq((1, 2), (2, 3), (2, 4), (3, 4)))
         .partitionBy(new HashPartitioner(3))
 
       val partitioner1 = Partitioner.defaultPartitioner(rdd1, rdd2)
diff --git a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
index 1aa1c421d792e..bdeb631878350 100644
--- a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
@@ -43,12 +43,12 @@ trait SharedSparkContext extends BeforeAndAfterAll with BeforeAndAfterEach { sel
     }
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     initializeContext()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       LocalSparkContext.stop(_sc)
       _sc = null
diff --git a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
index 73638d9b131ea..378a361845139 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
@@ -23,7 +23,7 @@ class ShuffleNettySuite extends ShuffleSuite with BeforeAndAfterAll {
 
   // This test suite should run all tests in ShuffleSuite with Netty shuffle mode.
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     conf.set("spark.shuffle.blockTransferService", "netty")
   }
diff --git a/core/src/test/scala/org/apache/spark/ShuffleOldFetchProtocolSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleOldFetchProtocolSuite.scala
new file mode 100644
index 0000000000000..a878593ba601a
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ShuffleOldFetchProtocolSuite.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.scalatest.BeforeAndAfterAll
+
+class ShuffleOldFetchProtocolSuite extends ShuffleSuite with BeforeAndAfterAll {
+
+  // This test suite should run all tests by setting spark.shuffle.useOldFetchProtocol=true.
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    conf.set("spark.shuffle.useOldFetchProtocol", "true")
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 923c9c90447fd..9e39271bdf9ee 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.scheduler.{MapStatus, MyRDD, SparkListener, SparkListene
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.shuffle.ShuffleWriter
 import org.apache.spark.storage.{ShuffleBlockId, ShuffleDataBlockId, ShuffleIndexBlockId}
-import org.apache.spark.util.{MutablePair, Utils}
+import org.apache.spark.util.MutablePair
 
 abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkContext {
 
@@ -44,7 +44,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
   test("groupByKey without compression") {
     val myConf = conf.clone().set(config.SHUFFLE_COMPRESS, false)
     sc = new SparkContext("local", "test", myConf)
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)), 4)
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (2, 1)), 4)
     val groups = pairs.groupByKey(4).collect()
     assert(groups.size === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
@@ -360,7 +360,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
     val metricsSystem = sc.env.metricsSystem
     val shuffleMapRdd = new MyRDD(sc, 1, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(1))
-    val shuffleHandle = manager.registerShuffle(0, 1, shuffleDep)
+    val shuffleHandle = manager.registerShuffle(0, shuffleDep)
     mapTrackerMaster.registerShuffle(0, 1)
 
     // first attempt -- its successful
@@ -487,7 +487,7 @@ object ShuffleSuite {
     @volatile var bytesWritten: Long = 0
     @volatile var bytesRead: Long = 0
     val listener = new SparkListener {
-      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
         recordsWritten += taskEnd.taskMetrics.shuffleWriteMetrics.recordsWritten
         bytesWritten += taskEnd.taskMetrics.shuffleWriteMetrics.bytesWritten
         recordsRead += taskEnd.taskMetrics.shuffleReadMetrics.recordsRead
@@ -498,7 +498,7 @@ object ShuffleSuite {
 
     job
 
-    sc.listenerBus.waitUntilEmpty(500)
+    sc.listenerBus.waitUntilEmpty()
     AggregatedShuffleMetrics(recordsWritten, recordsRead, bytesWritten, bytesRead)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
index 1aceda498d7c7..1a563621a5179 100644
--- a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
@@ -37,7 +37,7 @@ class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {
 
   private var tempDir: File = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     // Once 'spark.local.dir' is set, it is cached. Unless this is manually cleared
     // before/after a test, it could return the same directory even if this property
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 9f00131c8dc20..3bc2061c4f2ad 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -449,13 +449,77 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
 
     conf.remove(TASK_FPGA_ID.amountConf)
     // Ignore invalid prefix
-    conf.set(ResourceID("spark.invalid.prefix", FPGA).amountConf, "1")
+    conf.set(new ResourceID("spark.invalid.prefix", FPGA).amountConf, "1")
     taskResourceRequirement =
       parseResourceRequirements(conf, SPARK_TASK_PREFIX)
         .map(req => (req.resourceName, req.amount)).toMap
     assert(taskResourceRequirement.size == 1)
     assert(taskResourceRequirement.get(FPGA).isEmpty)
   }
+
+  test("test task resource requirement with 0 amount") {
+    val conf = new SparkConf()
+    conf.set(TASK_GPU_ID.amountConf, "2")
+    conf.set(TASK_FPGA_ID.amountConf, "0")
+    var taskResourceRequirement =
+      parseResourceRequirements(conf, SPARK_TASK_PREFIX)
+        .map(req => (req.resourceName, req.amount)).toMap
+
+    assert(taskResourceRequirement.size == 1)
+    assert(taskResourceRequirement(GPU) == 2)
+  }
+
+
+  test("Ensure that we can configure fractional resources for a task") {
+    val ratioSlots = Seq(
+      (0.10, 10), (0.11, 9), (0.125, 8), (0.14, 7), (0.16, 6),
+      (0.20, 5), (0.25, 4), (0.33, 3), (0.5, 2), (1.0, 1),
+      // if the amount is fractional greater than 0.5 and less than 1.0 we throw
+      (0.51, 1), (0.9, 1),
+      // if the amount is greater than one is not whole, we throw
+      (1.5, 0), (2.5, 0),
+      // it's ok if the amount is whole, and greater than 1
+      // parts are 1 because we get a whole part of a resource
+      (2.0, 1), (3.0, 1), (4.0, 1))
+    ratioSlots.foreach {
+      case (ratio, slots) =>
+        val conf = new SparkConf()
+        conf.set(TASK_GPU_ID.amountConf, ratio.toString)
+        if (ratio > 0.5 && ratio % 1 != 0) {
+          assertThrows[SparkException] {
+            parseResourceRequirements(conf, SPARK_TASK_PREFIX)
+          }
+        } else {
+          val reqs = parseResourceRequirements(conf, SPARK_TASK_PREFIX)
+          assert(reqs.size == 1)
+          assert(reqs.head.amount == Math.ceil(ratio).toInt)
+          assert(reqs.head.numParts == slots)
+        }
+    }
+  }
+
+  test("Non-task resources are never fractional") {
+    val ratioSlots = Seq(
+      // if the amount provided is not a whole number, we throw
+      (0.25, 0), (0.5, 0), (1.5, 0),
+      // otherwise we are successful at parsing resources
+      (1.0, 1), (2.0, 2), (3.0, 3))
+    ratioSlots.foreach {
+      case (ratio, slots) =>
+        val conf = new SparkConf()
+        conf.set(EXECUTOR_GPU_ID.amountConf, ratio.toString)
+        if (ratio % 1 != 0) {
+          assertThrows[SparkException] {
+            parseResourceRequirements(conf, SPARK_EXECUTOR_PREFIX)
+          }
+        } else {
+          val reqs = parseResourceRequirements(conf, SPARK_EXECUTOR_PREFIX)
+          assert(reqs.size == 1)
+          assert(reqs.head.amount == slots)
+          assert(reqs.head.numParts == 1)
+        }
+    }
+  }
 }
 
 class Class1 {}
@@ -463,7 +527,7 @@ class Class2 {}
 class Class3 {}
 
 class CustomRegistrator extends KryoRegistrator {
-  def registerClasses(kryo: Kryo) {
+  def registerClasses(kryo: Kryo): Unit = {
     kryo.register(classOf[Class2])
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
index 536b4aec75623..6271ce507fddb 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
@@ -63,7 +63,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
     assert(sc.getRDDStorageInfo.length === 0)
     rdd.collect()
-    sc.listenerBus.waitUntilEmpty(10000)
+    sc.listenerBus.waitUntilEmpty()
     eventually(timeout(10.seconds), interval(100.milliseconds)) {
       assert(sc.getRDDStorageInfo.length === 1)
     }
@@ -82,7 +82,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
 package object testPackage extends Assertions {
   private val CALL_SITE_REGEX = "(.+) at (.+):([0-9]+)".r
 
-  def runCallSiteTest(sc: SparkContext) {
+  def runCallSiteTest(sc: SparkContext): Unit = {
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
     val rddCreationSite = rdd.getCreationSite
     val curCallSite = sc.getCallSite().shortForm // note: 2 lines after definition of "rdd"
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
index 811b9757232e2..0c72f770a787c 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
@@ -46,7 +46,7 @@ class SparkContextSchedulerCreationSuite
     // real schedulers, so we don't want to create a full SparkContext with the desired scheduler.
     sc = new SparkContext("local", "test", conf)
     val createTaskSchedulerMethod =
-      PrivateMethod[Tuple2[SchedulerBackend, TaskScheduler]]('createTaskScheduler)
+      PrivateMethod[Tuple2[SchedulerBackend, TaskScheduler]](Symbol("createTaskScheduler"))
     val (_, sched) =
       SparkContext invokePrivate createTaskSchedulerMethod(sc, master, deployMode)
     try {
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 786f55c96a3e8..b6dfa69015c28 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -36,6 +36,7 @@ import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.TestUtils._
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.resource.ResourceAllocation
 import org.apache.spark.resource.ResourceUtils._
@@ -233,6 +234,42 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     }
   }
 
+  test("SPARK-30126: addFile when file path contains spaces with recursive works") {
+    withTempDir { dir =>
+      try {
+        val sep = File.separator
+        val tmpDir = Utils.createTempDir(dir.getAbsolutePath + sep + "test space")
+        val tmpConfFile1 = File.createTempFile("test file", ".conf", tmpDir)
+
+        sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+        sc.addFile(tmpConfFile1.getAbsolutePath, true)
+
+        assert(sc.listFiles().size == 1)
+        assert(sc.listFiles().head.contains(new Path(tmpConfFile1.getName).toUri.toString))
+      } finally {
+        sc.stop()
+      }
+    }
+  }
+
+  test("SPARK-30126: addFile when file path contains spaces without recursive works") {
+    withTempDir { dir =>
+      try {
+          val sep = File.separator
+          val tmpDir = Utils.createTempDir(dir.getAbsolutePath + sep + "test space")
+          val tmpConfFile2 = File.createTempFile("test file", ".conf", tmpDir)
+
+          sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+          sc.addFile(tmpConfFile2.getAbsolutePath)
+
+          assert(sc.listFiles().size == 1)
+          assert(sc.listFiles().head.contains(new Path(tmpConfFile2.getName).toUri.toString))
+      } finally {
+        sc.stop()
+      }
+    }
+  }
+
   test("addFile recursive can't add directories by default") {
     withTempDir { dir =>
       try {
@@ -294,6 +331,24 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     }
   }
 
+  test("SPARK-30126: add jar when path contains spaces") {
+    withTempDir { dir =>
+       try {
+          val sep = File.separator
+          val tmpDir = Utils.createTempDir(dir.getAbsolutePath + sep + "test space")
+          val tmpJar = File.createTempFile("test", ".jar", tmpDir)
+
+          sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+          sc.addJar(tmpJar.getAbsolutePath)
+
+          assert(sc.listJars().size == 1)
+          assert(sc.listJars().head.contains(tmpJar.getName))
+       } finally {
+         sc.stop()
+       }
+    }
+  }
+
   test("add jar with invalid path") {
     withTempDir { tmpDir =>
       val tmpJar = File.createTempFile("test", ".jar", tmpDir)
@@ -450,7 +505,9 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
     sc.setLocalProperty("testProperty", "testValue")
     var result = "unset";
-    val thread = new Thread() { override def run() = {result = sc.getLocalProperty("testProperty")}}
+    val thread = new Thread() {
+      override def run(): Unit = {result = sc.getLocalProperty("testProperty")}
+    }
     thread.start()
     thread.join()
     sc.stop()
@@ -461,10 +518,10 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
     var result = "unset";
     val thread1 = new Thread() {
-      override def run() = {sc.setLocalProperty("testProperty", "testValue")}}
+      override def run(): Unit = {sc.setLocalProperty("testProperty", "testValue")}}
     // testProperty should be unset and thus return null
     val thread2 = new Thread() {
-      override def run() = {result = sc.getLocalProperty("testProperty")}}
+      override def run(): Unit = {result = sc.getLocalProperty("testProperty")}}
     thread1.start()
     thread1.join()
     thread2.start()
@@ -705,7 +762,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       if (context.stageAttemptNumber == 0) {
         if (context.partitionId == 0) {
           // Make the first task in the first stage attempt fail.
-          throw new FetchFailedException(SparkEnv.get.blockManager.blockManagerId, 0, 0, 0,
+          throw new FetchFailedException(SparkEnv.get.blockManager.blockManagerId, 0, 0L, 0, 0,
             new java.io.IOException("fake"))
         } else {
           // Make the second task in the first stage attempt sleep to generate a zombie task
@@ -716,7 +773,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       }
       x
     }.collect()
-    sc.listenerBus.waitUntilEmpty(10000)
+    sc.listenerBus.waitUntilEmpty()
     // As executors will send the metrics of running tasks via heartbeat, we can use this to check
     // whether there is any running task.
     eventually(timeout(10.seconds)) {
@@ -728,7 +785,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   }
 
   test(s"Avoid setting ${CPUS_PER_TASK.key} unreasonably (SPARK-27192)") {
-    val FAIL_REASON = s"has to be >= the task config: ${CPUS_PER_TASK.key}"
+    val FAIL_REASON = " has to be >= the number of cpus per task"
     Seq(
       ("local", 2, None),
       ("local[2]", 3, None),
@@ -761,7 +818,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       sc = new SparkContext(conf)
 
       // Ensure all executors has started
-      TestUtils.waitUntilExecutorsUp(sc, 1, 10000)
+      TestUtils.waitUntilExecutorsUp(sc, 1, 60000)
       assert(sc.resources.size === 1)
       assert(sc.resources.get(GPU).get.addresses === Array("5", "6"))
       assert(sc.resources.get(GPU).get.name === "gpu")
@@ -790,7 +847,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       sc = new SparkContext(conf)
 
       // Ensure all executors has started
-      TestUtils.waitUntilExecutorsUp(sc, 1, 10000)
+      TestUtils.waitUntilExecutorsUp(sc, 1, 60000)
       // driver gpu resources file should take precedence over the script
       assert(sc.resources.size === 1)
       assert(sc.resources.get(GPU).get.addresses === Array("0", "1", "8"))
@@ -808,9 +865,8 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       sc = new SparkContext(conf)
     }.getMessage()
 
-    assert(error.contains("The executor resource config: spark.executor.resource.gpu.amount " +
-      "needs to be specified since a task requirement config: spark.task.resource.gpu.amount " +
-      "was specified"))
+    assert(error.contains("No executor resource configs were not specified for the following " +
+      "task configs: gpu"))
   }
 
   test("Test parsing resources executor config < task requirements") {
@@ -824,15 +880,15 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       sc = new SparkContext(conf)
     }.getMessage()
 
-    assert(error.contains("The executor resource config: spark.executor.resource.gpu.amount = 1 " +
-      "has to be >= the requested amount in task resource config: " +
-      "spark.task.resource.gpu.amount = 2"))
+    assert(error.contains("The executor resource: gpu, amount: 1 needs to be >= the task " +
+      "resource request amount of 2.0"))
   }
 
   test("Parse resources executor config not the same multiple numbers of the task requirements") {
     val conf = new SparkConf()
       .setMaster("local-cluster[1, 1, 1024]")
       .setAppName("test-cluster")
+    conf.set(RESOURCES_WARNING_TESTING, true)
     conf.set(TASK_GPU_ID.amountConf, "2")
     conf.set(EXECUTOR_GPU_ID.amountConf, "4")
 
@@ -840,9 +896,10 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       sc = new SparkContext(conf)
     }.getMessage()
 
-    assert(error.contains("The configuration of resource: gpu (exec = 4, task = 2) will result " +
-      "in wasted resources due to resource CPU limiting the number of runnable tasks per " +
-      "executor to: 1. Please adjust your configuration."))
+    assert(error.contains(
+      "The configuration of resource: gpu (exec = 4, task = 2.0/1, runnable tasks = 2) will " +
+        "result in wasted resources due to resource cpus limiting the number of runnable " +
+        "tasks per executor to: 1. Please adjust your configuration."))
   }
 
   test("test resource scheduling under local-cluster mode") {
@@ -854,7 +911,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
         """{"name": "gpu","addresses":["0", "1", "2", "3", "4", "5", "6", "7", "8"]}""")
 
       val conf = new SparkConf()
-        .setMaster("local-cluster[3, 3, 1024]")
+        .setMaster("local-cluster[3, 1, 1024]")
         .setAppName("test-cluster")
         .set(WORKER_GPU_ID.amountConf, "3")
         .set(WORKER_GPU_ID.discoveryScriptConf, discoveryScript)
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 9dd113262653b..cf4400e080e37 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -20,15 +20,17 @@ package org.apache.spark
 // scalastyle:off
 import java.io.File
 
-import scala.annotation.tailrec
+import org.apache.log4j.spi.LoggingEvent
 
-import org.apache.log4j.{Appender, Level, Logger}
+import scala.annotation.tailrec
+import org.apache.log4j.{Appender, AppenderSkeleton, Level, Logger}
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, BeforeAndAfterEach, FunSuite, Outcome}
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.util.{AccumulatorContext, Utils}
 
+import scala.collection.mutable.ArrayBuffer
+
 /**
  * Base abstract class for all unit tests in Spark for handling common functionality.
  *
@@ -186,4 +188,19 @@ abstract class SparkFunSuite
       }
     }
   }
+
+  class LogAppender(msg: String = "", maxEvents: Int = 1000) extends AppenderSkeleton {
+    val loggingEvents = new ArrayBuffer[LoggingEvent]()
+
+    override def append(loggingEvent: LoggingEvent): Unit = {
+      if (loggingEvents.size >= maxEvents) {
+        val loggingInfo = if (msg == "") "." else s" while logging $msg."
+        throw new IllegalStateException(
+          s"Number of events reached the limit of $maxEvents$loggingInfo")
+      }
+      loggingEvents.append(loggingEvent)
+    }
+    override def close(): Unit = {}
+    override def requiresLayout(): Boolean = false
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/ThreadingSuite.scala b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
index 5cf9c087e1dcb..bb04d0d263253 100644
--- a/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
@@ -29,7 +29,7 @@ object ThreadingSuiteState {
   val runningThreads = new AtomicInteger
   val failed = new AtomicBoolean
 
-  def clear() {
+  def clear(): Unit = {
     runningThreads.set(0)
     failed.set(false)
   }
@@ -44,7 +44,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     @volatile var answer1: Int = 0
     @volatile var answer2: Int = 0
     new Thread {
-      override def run() {
+      override def run(): Unit = {
         answer1 = nums.reduce(_ + _)
         answer2 = nums.first()    // This will run "locally" in the current thread
         sem.release()
@@ -62,7 +62,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     @volatile var ok = true
     for (i <- 0 until 10) {
       new Thread {
-        override def run() {
+        override def run(): Unit = {
           val answer1 = nums.reduce(_ + _)
           if (answer1 != 55) {
             printf("In thread %d: answer1 was %d\n", i, answer1)
@@ -90,7 +90,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     @volatile var ok = true
     for (i <- 0 until 10) {
       new Thread {
-        override def run() {
+        override def run(): Unit = {
           val answer1 = nums.reduce(_ + _)
           if (answer1 != 55) {
             printf("In thread %d: answer1 was %d\n", i, answer1)
@@ -121,7 +121,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     var throwable: Option[Throwable] = None
     for (i <- 0 until 2) {
       new Thread {
-        override def run() {
+        override def run(): Unit = {
           try {
             val ans = nums.map(number => {
               val running = ThreadingSuiteState.runningThreads
@@ -161,7 +161,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     var throwable: Option[Throwable] = None
     val threads = (1 to 5).map { i =>
       new Thread() {
-        override def run() {
+        override def run(): Unit = {
           try {
             sc.setLocalProperty("test", i.toString)
             assert(sc.getLocalProperty("test") === i.toString)
@@ -189,7 +189,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     var throwable: Option[Throwable] = None
     val threads = (1 to 5).map { i =>
       new Thread() {
-        override def run() {
+        override def run(): Unit = {
           try {
             assert(sc.getLocalProperty("test") === "parent")
             sc.setLocalProperty("test", i.toString)
diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
index 73f9d0e2bc0e1..9629f5ab1a3dd 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
@@ -26,6 +26,7 @@ import scala.util.Try
 
 import org.apache.commons.io.output.TeeOutputStream
 import org.apache.commons.lang3.SystemUtils
+import org.scalatest.Assertions._
 
 import org.apache.spark.util.Utils
 
@@ -141,12 +142,14 @@ private[spark] class Benchmark(
     val minIters = if (overrideNumIters != 0) overrideNumIters else minNumIters
     val minDuration = if (overrideNumIters != 0) 0 else minTime.toNanos
     val runTimes = ArrayBuffer[Long]()
+    var totalTime = 0L
     var i = 0
-    while (i < minIters || runTimes.sum < minDuration) {
+    while (i < minIters || totalTime < minDuration) {
       val timer = new Benchmark.Timer(i)
       f(timer)
       val runTime = timer.totalTime()
       runTimes += runTime
+      totalTime += runTime
 
       if (outputPerIteration) {
         // scalastyle:off
diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala
index a6666db4e95c3..55e34b32fe0d4 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala
@@ -21,6 +21,7 @@ import java.io.{File, FileOutputStream, OutputStream}
 
 /**
  * A base class for generate benchmark results to a file.
+ * For JDK9+, JDK major version number is added to the file names to distingush the results.
  */
 abstract class BenchmarkBase {
   var output: Option[OutputStream] = None
@@ -43,7 +44,9 @@ abstract class BenchmarkBase {
   def main(args: Array[String]): Unit = {
     val regenerateBenchmarkFiles: Boolean = System.getenv("SPARK_GENERATE_BENCHMARK_FILES") == "1"
     if (regenerateBenchmarkFiles) {
-      val resultFileName = s"${this.getClass.getSimpleName.replace("$", "")}-results.txt"
+      val version = System.getProperty("java.version").split("\\D+")(0).toInt
+      val jdkString = if (version > 8) s"-jdk$version" else ""
+      val resultFileName = s"${this.getClass.getSimpleName.replace("$", "")}$jdkString-results.txt"
       val file = new File(s"benchmarks/$resultFileName")
       if (!file.exists()) {
         file.createNewFile()
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 66b2f487dc1cb..a6776ee077894 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -194,11 +194,12 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
    * In between each step, this test verifies that the broadcast blocks are present only on the
    * expected nodes.
    */
-  private def testUnpersistTorrentBroadcast(distributed: Boolean, removeFromDriver: Boolean) {
+  private def testUnpersistTorrentBroadcast(distributed: Boolean,
+      removeFromDriver: Boolean): Unit = {
     val numSlaves = if (distributed) 2 else 0
 
     // Verify that blocks are persisted only on the driver
-    def afterCreation(broadcastId: Long, bmm: BlockManagerMaster) {
+    def afterCreation(broadcastId: Long, bmm: BlockManagerMaster): Unit = {
       var blockId = BroadcastBlockId(broadcastId)
       var statuses = bmm.getBlockStatus(blockId, askSlaves = true)
       assert(statuses.size === 1)
@@ -209,7 +210,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
     }
 
     // Verify that blocks are persisted in both the executors and the driver
-    def afterUsingBroadcast(broadcastId: Long, bmm: BlockManagerMaster) {
+    def afterUsingBroadcast(broadcastId: Long, bmm: BlockManagerMaster): Unit = {
       var blockId = BroadcastBlockId(broadcastId)
       val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
       assert(statuses.size === numSlaves + 1)
@@ -220,7 +221,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
 
     // Verify that blocks are unpersisted on all executors, and on all nodes if removeFromDriver
     // is true.
-    def afterUnpersist(broadcastId: Long, bmm: BlockManagerMaster) {
+    def afterUnpersist(broadcastId: Long, bmm: BlockManagerMaster): Unit = {
       var blockId = BroadcastBlockId(broadcastId)
       var expectedNumBlocks = if (removeFromDriver) 0 else 1
       var statuses = bmm.getBlockStatus(blockId, askSlaves = true)
@@ -251,7 +252,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
       afterCreation: (Long, BlockManagerMaster) => Unit,
       afterUsingBroadcast: (Long, BlockManagerMaster) => Unit,
       afterUnpersist: (Long, BlockManagerMaster) => Unit,
-      removeFromDriver: Boolean) {
+      removeFromDriver: Boolean): Unit = {
 
     sc = if (distributed) {
       val _sc =
@@ -307,7 +308,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
 
 package object testPackage extends Assertions {
 
-  def runCallSiteTest(sc: SparkContext) {
+  def runCallSiteTest(sc: SparkContext): Unit = {
     val broadcast = sc.broadcast(Array(1, 2, 3, 4))
     broadcast.destroy(blocking = true)
     val thrown = intercept[SparkException] { broadcast.value }
diff --git a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
index 9cfb8a647ad89..6914714dce6eb 100644
--- a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
@@ -46,7 +46,7 @@ class ExternalShuffleServiceDbSuite extends SparkFunSuite {
   var blockHandler: ExternalBlockHandler = _
   var blockResolver: ExternalShuffleBlockResolver = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     sparkConf = new SparkConf()
     sparkConf.set("spark.shuffle.service.enabled", "true")
@@ -63,7 +63,7 @@ class ExternalShuffleServiceDbSuite extends SparkFunSuite {
     registerExecutor()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       dataContext.cleanup()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index ad402c0e905ae..eeccf56cbf02e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -89,7 +89,7 @@ class JsonProtocolSuite extends SparkFunSuite with JsonTestUtils {
     assertValidDataInJson(output, JsonMethods.parse(JsonConstants.workerStateJsonStr))
   }
 
-  def assertValidJson(json: JValue) {
+  def assertValidJson(json: JValue): Unit = {
     try {
       JsonMethods.parse(JsonMethods.compact(json))
     } catch {
diff --git a/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala b/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala
index cbdf1755b0c5b..84fc16979925b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala
@@ -29,9 +29,6 @@ import org.apache.spark.util.SparkConfWithEnv
 
 class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
 
-  /** Length of time to wait while draining listener events. */
-  private val WAIT_TIMEOUT_MILLIS = 10000
-
   test("verify that correct log urls get propagated from workers") {
     sc = new SparkContext("local-cluster[2,1,1024]", "test")
 
@@ -41,7 +38,7 @@ class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
     // Trigger a job so that executors get added
     sc.parallelize(1 to 100, 4).map(_.toString).count()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.addedExecutorInfos.values.foreach { info =>
       assert(info.logUrlMap.nonEmpty)
       // Browse to each URL to check that it's valid
@@ -61,7 +58,7 @@ class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
     // Trigger a job so that executors get added
     sc.parallelize(1 to 100, 4).map(_.toString).count()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
     assert(listeners.size === 1)
     val listener = listeners(0)
@@ -77,7 +74,7 @@ class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
 private[spark] class SaveExecutorInfo extends SparkListener {
   val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()
 
-  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
+  override def onExecutorAdded(executor: SparkListenerExecutorAdded): Unit = {
     addedExecutorInfos(executor.executorId) = executor.executorInfo
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
index ef947eb074647..d04d9b6dcb2be 100644
--- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
@@ -58,7 +58,7 @@ class RPackageUtilsSuite
   /** Simple PrintStream that reads data into a buffer */
   private class BufferPrintStream extends PrintStream(noOpOutputStream) {
     // scalastyle:off println
-    override def println(line: String) {
+    override def println(line: String): Unit = {
     // scalastyle:on println
       lineBuffer += line
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 385f549aa1ad9..9d4736825618e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -39,11 +39,11 @@ import org.apache.spark.TestUtils.JavaSourceFromString
 import org.apache.spark.api.r.RUtils
 import org.apache.spark.deploy.SparkSubmit._
 import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
+import org.apache.spark.deploy.history.EventLogFileReader
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher.SparkLauncher
-import org.apache.spark.scheduler.EventLoggingListener
 import org.apache.spark.util.{CommandLineUtils, ResetSystemProperties, Utils}
 
 trait TestPrematureExit {
@@ -57,7 +57,7 @@ trait TestPrematureExit {
   private class BufferPrintStream extends PrintStream(noOpOutputStream) {
     var lineBuffer = ArrayBuffer[String]()
     // scalastyle:off println
-    override def println(line: String) {
+    override def println(line: String): Unit = {
       lineBuffer += line
     }
     // scalastyle:on println
@@ -121,7 +121,7 @@ class SparkSubmitSuite
 
   private val submit = new SparkSubmit()
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
   }
 
@@ -453,6 +453,83 @@ class SparkSubmitSuite
     conf.get("spark.kubernetes.driver.container.image") should be ("bar")
   }
 
+  /**
+   * Helper function for testing main class resolution on remote JAR files.
+   *
+   * @param tempDir path to temporary directory
+   * @param deployMode either "client" or "cluster"
+   * @return a pair of the JAR file and the 4-tuple returned by
+   *         [[org.apache.spark.deploy.SparkSubmit#prepareSubmitEnvironment]]
+   */
+  private def testResolveMainClassOnRemoteJar(
+    tempDir: File,
+    deployMode: String
+  ): (File, (Seq[String], Seq[String], SparkConf, String)) = {
+    val excFile = TestUtils.createCompiledClass("SomeMainClass", tempDir, "", null, Seq.empty)
+    val jarFile = new File(tempDir, "s3-mainClass-test-%s.jar".format(System.currentTimeMillis()))
+    val jarUrl = TestUtils.createJar(
+      Seq(excFile),
+      jarFile,
+      directoryPrefix = Some(tempDir.toString),
+      mainClass = Some("SomeMainClass"))
+
+    val hadoopConf = new Configuration()
+    updateConfWithFakeS3Fs(hadoopConf)
+
+    val clArgs = Seq(
+      "--name", "testApp",
+      "--master", "yarn",
+      "--deploy-mode", deployMode,
+      "--conf", "spark.hadoop.fs.s3a.impl=org.apache.spark.deploy.TestFileSystem",
+      "--conf", "spark.hadoop.fs.s3a.impl.disable.cache=true",
+      s"s3a://${jarUrl.getPath}",
+      "arg1", "arg2")
+
+    val appArgs = new SparkSubmitArguments(clArgs)
+    (jarFile, submit.prepareSubmitEnvironment(appArgs, conf = Some(hadoopConf)))
+  }
+
+  test("automatically sets mainClass if primary resource is S3 JAR in client mode") {
+    withTempDir { tempDir =>
+      val (jarFile, (childArgs, classpaths, _, mainClass_)) = testResolveMainClassOnRemoteJar(
+        tempDir, "client"
+      )
+
+      mainClass_ should be ("SomeMainClass")
+      classpaths should have length 1
+      classpaths.head should endWith (jarFile.getName)
+      childArgs.mkString(" ") should be ("arg1 arg2")
+    }
+  }
+
+  test("automatically sets mainClass if primary resource is S3 JAR in cluster mode") {
+    withTempDir { tempDir =>
+      val (jarFile, (childArgs, classpaths, _, mainClass_)) = testResolveMainClassOnRemoteJar(
+        tempDir, "cluster"
+      )
+
+      mainClass_ should be (YARN_CLUSTER_SUBMIT_CLASS)
+      classpaths should have length 1
+      classpaths.head should endWith (jarFile.getName)
+      childArgs.mkString(" ") should include ("--class SomeMainClass")
+      childArgs.mkString(" ") should endWith ("--arg arg1 --arg arg2")
+    }
+  }
+
+  test("error informatively when mainClass isn't set and S3 JAR doesn't exist") {
+    val hadoopConf = new Configuration()
+    updateConfWithFakeS3Fs(hadoopConf)
+
+    val clArgs = Seq(
+      "--name", "testApp",
+      "--master", "yarn",
+      "--conf", "spark.hadoop.fs.s3a.impl=org.apache.spark.deploy.TestFileSystem",
+      "--conf", "spark.hadoop.fs.s3a.impl.disable.cache=true",
+      s"s3a:///does-not-exist.jar")
+
+    testPrematureExit(clArgs.toArray, "File /does-not-exist.jar does not exist")
+  }
+
   test("handles confs with flag equivalents") {
     val clArgs = Seq(
       "--deploy-mode", "cluster",
@@ -535,7 +612,7 @@ class SparkSubmitSuite
         unusedJar.toString)
       runSparkSubmit(args)
       val listStatus = fileSystem.listStatus(testDirPath)
-      val logData = EventLoggingListener.openEventLog(listStatus.last.getPath, fileSystem)
+      val logData = EventLogFileReader.openEventLog(listStatus.last.getPath, fileSystem)
       Source.fromInputStream(logData).getLines().foreach { line =>
         assert(!line.contains("secret_password"))
       }
@@ -600,7 +677,7 @@ class SparkSubmitSuite
   }
 
   // TODO(SPARK-9603): Building a package is flaky on Jenkins Maven builds.
-  // See https://gist.github.com/shivaram/3a2fecce60768a603dac for a error log
+  // See https://gist.github.com/shivaram/3a2fecce60768a603dac for an error log
   ignore("correctly builds R packages included in a jar with --packages") {
     assume(RUtils.isRInstalled, "R isn't installed on this machine.")
     assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.")
@@ -1365,7 +1442,7 @@ object SparkSubmitSuite extends SparkFunSuite with TimeLimits {
 }
 
 object JarCreationTest extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val sc = new SparkContext(conf)
@@ -1389,7 +1466,7 @@ object JarCreationTest extends Logging {
 }
 
 object SimpleApplicationTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val sc = new SparkContext(conf)
@@ -1415,7 +1492,7 @@ object SimpleApplicationTest {
 }
 
 object UserClasspathFirstTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val ccl = Thread.currentThread().getContextClassLoader()
     val resource = ccl.getResourceAsStream("test.resource")
     val bytes = ByteStreams.toByteArray(resource)
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 8e1a519e187ce..31e6c730eadc0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -44,13 +44,13 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
   private class BufferPrintStream extends PrintStream(noOpOutputStream) {
     var lineBuffer = ArrayBuffer[String]()
     // scalastyle:off println
-    override def println(line: String) {
+    override def println(line: String): Unit = {
       lineBuffer += line
     }
     // scalastyle:on println
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     // We don't want to write logs during testing
     SparkSubmitUtils.printStream = new BufferPrintStream
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index 9bf7714ed77dd..f8b99302c4ad5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -26,15 +26,16 @@ import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
-import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.ApplicationInfo
 import org.apache.spark.deploy.master.Master
 import org.apache.spark.deploy.worker.Worker
 import org.apache.spark.internal.config
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.scheduler.cluster._
-import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RegisterExecutor, RegisterExecutorFailed}
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{LaunchedExecutor, RegisterExecutor}
 
 /**
  * End-to-end tests for dynamic allocation in standalone mode.
@@ -437,8 +438,8 @@ class StandaloneDynamicAllocationSuite
     assert(executors.size === 2)
 
     // simulate running a task on the executor
-    val getMap =
-      PrivateMethod[mutable.HashMap[String, mutable.HashSet[Long]]]('executorIdToRunningTaskIds)
+    val getMap = PrivateMethod[mutable.HashMap[String, mutable.HashSet[Long]]](
+      Symbol("executorIdToRunningTaskIds"))
     val taskScheduler = sc.taskScheduler.asInstanceOf[TaskSchedulerImpl]
     val executorIdToRunningTaskIds = taskScheduler invokePrivate getMap()
     executorIdToRunningTaskIds(executors.head) = mutable.HashSet(1L)
@@ -482,12 +483,16 @@ class StandaloneDynamicAllocationSuite
       assert(apps.head.getExecutorLimit === Int.MaxValue)
     }
     val beforeList = getApplications().head.executors.keys.toSet
-    assert(killExecutorsOnHost(sc, "localhost").equals(true))
-
     syncExecutors(sc)
-    val afterList = getApplications().head.executors.keys.toSet
+
+    sc.schedulerBackend match {
+      case b: CoarseGrainedSchedulerBackend =>
+        b.killExecutorsOnHost("localhost")
+      case _ => fail("expected coarse grained scheduler")
+    }
 
     eventually(timeout(10.seconds), interval(100.millis)) {
+      val afterList = getApplications().head.executors.keys.toSet
       assert(beforeList.intersect(afterList).size == 0)
     }
   }
@@ -501,11 +506,12 @@ class StandaloneDynamicAllocationSuite
     val mockAddress = mock(classOf[RpcAddress])
     when(endpointRef.address).thenReturn(mockAddress)
     val message = RegisterExecutor("one", endpointRef, "blacklisted-host", 10, Map.empty,
-      Map.empty, Map.empty)
+      Map.empty, Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
 
     val taskScheduler = mock(classOf[TaskSchedulerImpl])
     when(taskScheduler.nodeBlacklist()).thenReturn(Set("blacklisted-host"))
     when(taskScheduler.resourceOffers(any())).thenReturn(Nil)
+    when(taskScheduler.resourcesReqsPerTask).thenReturn(Seq.empty)
     when(taskScheduler.sc).thenReturn(sc)
 
     val rpcEnv = RpcEnv.create("test-rpcenv", "localhost", 0, conf, securityManager)
@@ -513,10 +519,11 @@ class StandaloneDynamicAllocationSuite
       val scheduler = new CoarseGrainedSchedulerBackend(taskScheduler, rpcEnv)
       try {
         scheduler.start()
-        scheduler.driverEndpoint.ask[Boolean](message)
-        eventually(timeout(10.seconds), interval(100.millis)) {
-          verify(endpointRef).send(RegisterExecutorFailed(any()))
+        val e = intercept[SparkException] {
+          scheduler.driverEndpoint.askSync[Boolean](message)
         }
+        assert(e.getCause().isInstanceOf[IllegalStateException])
+        assert(scheduler.getExecutorIds().isEmpty)
       } finally {
         scheduler.stop()
       }
@@ -535,6 +542,11 @@ class StandaloneDynamicAllocationSuite
       .setMaster(masterRpcEnv.address.toSparkURL)
       .setAppName("test")
       .set(config.EXECUTOR_MEMORY.key, "256m")
+      // Because we're faking executor launches in the Worker, set the config so that the driver
+      // will not timeout anything related to executors.
+      .set(config.Network.NETWORK_TIMEOUT.key, "2h")
+      .set(config.EXECUTOR_HEARTBEAT_INTERVAL.key, "1h")
+      .set(config.STORAGE_BLOCKMANAGER_SLAVE_TIMEOUT.key, "1h")
   }
 
   /** Make a master to which our application will send executor requests. */
@@ -548,8 +560,7 @@ class StandaloneDynamicAllocationSuite
   private def makeWorkers(cores: Int, memory: Int): Seq[Worker] = {
     (0 until numWorkers).map { i =>
       val rpcEnv = workerRpcEnvs(i)
-      val worker = new Worker(rpcEnv, 0, cores, memory, Array(masterRpcEnv.address),
-        Worker.ENDPOINT_NAME, null, conf, securityManager)
+      val worker = new TestWorker(rpcEnv, cores, memory)
       rpcEnv.setupEndpoint(Worker.ENDPOINT_NAME, worker)
       worker
     }
@@ -587,16 +598,6 @@ class StandaloneDynamicAllocationSuite
     }
   }
 
-  /** Kill the executors on a given host. */
-  private def killExecutorsOnHost(sc: SparkContext, host: String): Boolean = {
-    syncExecutors(sc)
-    sc.schedulerBackend match {
-      case b: CoarseGrainedSchedulerBackend =>
-        b.killExecutorsOnHost(host)
-      case _ => fail("expected coarse grained scheduler")
-    }
-  }
-
   /**
    * Return a list of executor IDs belonging to this application.
    *
@@ -619,9 +620,8 @@ class StandaloneDynamicAllocationSuite
    * we submit a request to kill them. This must be called before each kill request.
    */
   private def syncExecutors(sc: SparkContext): Unit = {
-    val driverExecutors = sc.env.blockManager.master.getStorageStatus
-      .map(_.blockManagerId.executorId)
-      .filter { _ != SparkContext.DRIVER_IDENTIFIER}
+    val backend = sc.schedulerBackend.asInstanceOf[CoarseGrainedSchedulerBackend]
+    val driverExecutors = backend.getExecutorIds()
     val masterExecutors = getExecutorIds(sc)
     val missingExecutors = masterExecutors.toSet.diff(driverExecutors.toSet).toSeq.sorted
     missingExecutors.foreach { id =>
@@ -630,10 +630,30 @@ class StandaloneDynamicAllocationSuite
       val mockAddress = mock(classOf[RpcAddress])
       when(endpointRef.address).thenReturn(mockAddress)
       val message = RegisterExecutor(id, endpointRef, "localhost", 10, Map.empty, Map.empty,
-        Map.empty)
-      val backend = sc.schedulerBackend.asInstanceOf[CoarseGrainedSchedulerBackend]
+        Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
       backend.driverEndpoint.askSync[Boolean](message)
+      backend.driverEndpoint.send(LaunchedExecutor(id))
+    }
+  }
+
+  /**
+   * Worker implementation that does not actually launch any executors, but reports them as
+   * running so the Master keeps track of them. This requires that `syncExecutors` be used
+   * to make sure the Master instance and the SparkContext under test agree about what
+   * executors are running.
+   */
+  private class TestWorker(rpcEnv: RpcEnv, cores: Int, memory: Int)
+    extends Worker(
+      rpcEnv, 0, cores, memory, Array(masterRpcEnv.address), Worker.ENDPOINT_NAME,
+      null, conf, securityManager) {
+
+    override def receive: PartialFunction[Any, Unit] = testReceive.orElse(super.receive)
+
+    private def testReceive: PartialFunction[Any, Unit] = synchronized {
+      case LaunchExecutor(_, appId, execId, _, _, _, _) =>
+        self.send(ExecutorStateChanged(appId, execId, ExecutorState.RUNNING, None, None))
     }
+
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index a1d3077b8fc87..a3e39d7f53728 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.deploy.{ApplicationDescription, Command}
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.{ApplicationInfo, Master}
 import org.apache.spark.deploy.worker.Worker
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.util.Utils
 
@@ -44,13 +44,13 @@ class AppClientSuite
     with Eventually
     with ScalaFutures {
   private val numWorkers = 2
-  private val conf = new SparkConf()
-  private val securityManager = new SecurityManager(conf)
 
+  private var conf: SparkConf = null
   private var masterRpcEnv: RpcEnv = null
   private var workerRpcEnvs: Seq[RpcEnv] = null
   private var master: Master = null
   private var workers: Seq[Worker] = null
+  private var securityManager: SecurityManager = null
 
   /**
    * Start the local cluster.
@@ -58,6 +58,8 @@ class AppClientSuite
    */
   override def beforeAll(): Unit = {
     super.beforeAll()
+    conf = new SparkConf().set(config.Worker.WORKER_DECOMMISSION_ENABLED.key, "true")
+    securityManager = new SecurityManager(conf)
     masterRpcEnv = RpcEnv.create(Master.SYSTEM_NAME, "localhost", 0, conf, securityManager)
     workerRpcEnvs = (0 until numWorkers).map { i =>
       RpcEnv.create(Worker.SYSTEM_NAME + i, "localhost", 0, conf, securityManager)
@@ -111,8 +113,23 @@ class AppClientSuite
         assert(apps.head.getExecutorLimit === numExecutorsRequested, s"executor request failed")
       }
 
+
+      // Save the executor id before decommissioning so we can kill it
+      val application = getApplications().head
+      val executors = application.executors
+      val executorId: String = executors.head._2.fullId
+
+      // Send a decommission self to all the workers
+      // Note: normally the worker would send this on their own.
+      workers.foreach(worker => worker.decommissionSelf())
+
+      // Decommissioning is async.
+      eventually(timeout(1.seconds), interval(10.millis)) {
+        // We only record decommissioning for the executor we've requested
+        assert(ci.listener.execDecommissionedList.size === 1)
+      }
+
       // Send request to kill executor, verify request was made
-      val executorId: String = getApplications().head.executors.head._2.fullId
       whenReady(
         ci.client.killExecutors(Seq(executorId)),
         timeout(10.seconds),
@@ -120,6 +137,15 @@ class AppClientSuite
         assert(acknowledged)
       }
 
+      // Verify that asking for executors on the decommissioned workers fails
+      whenReady(
+        ci.client.requestTotalExecutors(numExecutorsRequested),
+        timeout(10.seconds),
+        interval(10.millis)) { acknowledged =>
+        assert(acknowledged)
+      }
+      assert(getApplications().head.executors.size === 0)
+
       // Issue stop command for Client to disconnect from Master
       ci.client.stop()
 
@@ -189,6 +215,7 @@ class AppClientSuite
     val deadReasonList = new ConcurrentLinkedQueue[String]()
     val execAddedList = new ConcurrentLinkedQueue[String]()
     val execRemovedList = new ConcurrentLinkedQueue[String]()
+    val execDecommissionedList = new ConcurrentLinkedQueue[String]()
 
     def connected(id: String): Unit = {
       connectedIdList.add(id)
@@ -218,6 +245,10 @@ class AppClientSuite
       execRemovedList.add(id)
     }
 
+    def executorDecommissioned(id: String, message: String): Unit = {
+      execDecommissionedList.add(id)
+    }
+
     def workerRemoved(workerId: String, host: String, message: String): Unit = {}
   }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/TestExecutor.scala b/core/src/test/scala/org/apache/spark/deploy/client/TestExecutor.scala
index a98b1fa8f83a1..1dce49d1f9d5a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/TestExecutor.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/TestExecutor.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy.client
 
 private[spark] object TestExecutor {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // scalastyle:off println
     println("Hello world!")
     // scalastyle:on println
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
index 1148446c9faa1..48bd088d07ff9 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
@@ -28,7 +28,7 @@ import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.scalatest.Matchers
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterBuilderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterBuilderSuite.scala
new file mode 100644
index 0000000000000..c905797bf1287
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterBuilderSuite.scala
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import org.apache.spark.{SparkFunSuite, Success, TaskResultLost, TaskState}
+import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.scheduler._
+import org.apache.spark.status.ListenerEventsTestHelper
+
+class BasicEventFilterBuilderSuite extends SparkFunSuite {
+  import ListenerEventsTestHelper._
+
+  override protected def beforeEach(): Unit = {
+    ListenerEventsTestHelper.reset()
+  }
+
+  test("track live jobs") {
+    var time = 0L
+
+    val listener = new BasicEventFilterBuilder
+    listener.onOtherEvent(SparkListenerLogStart("TestSparkVersion"))
+
+    // Start the application.
+    time += 1
+    listener.onApplicationStart(SparkListenerApplicationStart(
+      "name",
+      Some("id"),
+      time,
+      "user",
+      Some("attempt"),
+      None))
+
+    // Start a couple of executors.
+    time += 1
+    val execIds = Array("1", "2")
+    execIds.foreach { id =>
+      listener.onExecutorAdded(createExecutorAddedEvent(id, time))
+    }
+
+    // Start a job with 2 stages / 4 tasks each
+    time += 1
+
+    val rddsForStage0 = createRdds(2)
+    val rddsForStage1 = createRdds(2)
+
+    val stage0 = createStage(rddsForStage0, Nil)
+    val stage1 = createStage(rddsForStage1, Seq(stage0.stageId))
+    val stages = Seq(stage0, stage1)
+
+    val jobProps = createJobProps()
+    listener.onJobStart(SparkListenerJobStart(1, time, stages, jobProps))
+
+    // Submit stage 0
+    time += 1
+    stages.head.submissionTime = Some(time)
+    listener.onStageSubmitted(SparkListenerStageSubmitted(stages.head, jobProps))
+
+    // Start tasks from stage 0
+    time += 1
+
+    val s0Tasks = ListenerEventsTestHelper.createTasks(4, execIds, time)
+    s0Tasks.foreach { task =>
+      listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId,
+        stages.head.attemptNumber(), task))
+    }
+
+    // Fail one of the tasks, re-start it.
+    time += 1
+    s0Tasks.head.markFinished(TaskState.FAILED, time)
+    listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptNumber,
+      "taskType", TaskResultLost, s0Tasks.head, new ExecutorMetrics, null))
+
+    time += 1
+    val reattempt = createTaskWithNewAttempt(s0Tasks.head, time)
+    listener.onTaskStart(SparkListenerTaskStart(stages.head.stageId, stages.head.attemptNumber,
+      reattempt))
+
+    // Succeed all tasks in stage 0.
+    val pending = s0Tasks.drop(1) ++ Seq(reattempt)
+
+    time += 1
+    pending.foreach { task =>
+      task.markFinished(TaskState.FINISHED, time)
+      listener.onTaskEnd(SparkListenerTaskEnd(stages.head.stageId, stages.head.attemptNumber,
+        "taskType", Success, task, new ExecutorMetrics, TaskMetrics.empty))
+    }
+
+    // End stage 0.
+    time += 1
+    stages.head.completionTime = Some(time)
+    listener.onStageCompleted(SparkListenerStageCompleted(stages.head))
+
+    assert(listener.liveJobs === Set(1))
+    assert(listener.liveStages === Set(0))
+    // stage 1 not yet submitted - RDDs for stage 1 is not available
+    assert(listener.liveRDDs === rddsForStage0.map(_.id).toSet)
+    assert(listener.liveTasks === (s0Tasks ++ Seq(reattempt)).map(_.taskId).toSet)
+
+    // Submit stage 1.
+    time += 1
+    stages.last.submissionTime = Some(time)
+    listener.onStageSubmitted(SparkListenerStageSubmitted(stages.last, jobProps))
+
+    // Start and fail all tasks of stage 1.
+    time += 1
+    val s1Tasks = createTasks(4, execIds, time)
+    s1Tasks.foreach { task =>
+      listener.onTaskStart(SparkListenerTaskStart(stages.last.stageId,
+        stages.last.attemptNumber,
+        task))
+    }
+
+    time += 1
+    s1Tasks.foreach { task =>
+      task.markFinished(TaskState.FAILED, time)
+      listener.onTaskEnd(SparkListenerTaskEnd(stages.last.stageId, stages.last.attemptNumber,
+        "taskType", TaskResultLost, task, new ExecutorMetrics, null))
+    }
+
+    // Fail stage 1.
+    time += 1
+    stages.last.completionTime = Some(time)
+    stages.last.failureReason = Some("uh oh")
+    listener.onStageCompleted(SparkListenerStageCompleted(stages.last))
+
+    // - Re-submit stage 1, all tasks, and succeed them and the stage.
+    val oldS1 = stages.last
+    val newS1 = new StageInfo(oldS1.stageId, oldS1.attemptNumber + 1, oldS1.name, oldS1.numTasks,
+      oldS1.rddInfos, oldS1.parentIds, oldS1.details, oldS1.taskMetrics,
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+
+    time += 1
+    newS1.submissionTime = Some(time)
+    listener.onStageSubmitted(SparkListenerStageSubmitted(newS1, jobProps))
+
+    val newS1Tasks = createTasks(4, execIds, time)
+
+    newS1Tasks.foreach { task =>
+      listener.onTaskStart(SparkListenerTaskStart(newS1.stageId, newS1.attemptNumber, task))
+    }
+
+    time += 1
+    newS1Tasks.foreach { task =>
+      task.markFinished(TaskState.FINISHED, time)
+      listener.onTaskEnd(SparkListenerTaskEnd(newS1.stageId, newS1.attemptNumber, "taskType",
+        Success, task, new ExecutorMetrics, null))
+    }
+
+    time += 1
+    newS1.completionTime = Some(time)
+    listener.onStageCompleted(SparkListenerStageCompleted(newS1))
+
+    assert(listener.liveJobs === Set(1))
+    assert(listener.liveStages === Set(0, 1))
+    // stage 0 and 1 are finished but it stores the information regarding stage
+    assert(listener.liveRDDs === (rddsForStage0.map(_.id) ++ rddsForStage1.map(_.id)).toSet)
+    assert(listener.liveTasks ===
+      (s0Tasks ++ Seq(reattempt) ++ s1Tasks ++ newS1Tasks).map(_.taskId).toSet)
+
+    // Start next job.
+    time += 1
+
+    val rddsForStage2 = createRdds(2)
+    val rddsForStage3 = createRdds(2)
+
+    val stage3 = createStage(rddsForStage2, Nil)
+    val stage4 = createStage(rddsForStage3, Seq(stage3.stageId))
+    val stagesForJob2 = Seq(stage3, stage4)
+
+    listener.onJobStart(SparkListenerJobStart(2, time, stagesForJob2, jobProps))
+
+    // End job 1.
+    time += 1
+    listener.onJobEnd(SparkListenerJobEnd(1, time, JobSucceeded))
+
+    // everything related to job 1 should be cleaned up, but not for job 2
+    assert(listener.liveJobs === Set(2))
+    assert(listener.liveStages.isEmpty)
+    // no RDD information available as these stages are not submitted yet
+    assert(listener.liveRDDs.isEmpty)
+    // stageToTasks has no information for job 2, as no task has been started
+    assert(listener.liveTasks.isEmpty)
+  }
+
+  test("track live executors") {
+    var time = 0L
+
+    val listener = new BasicEventFilterBuilder
+    listener.onOtherEvent(SparkListenerLogStart("TestSparkVersion"))
+
+    // Start the application.
+    time += 1
+    listener.onApplicationStart(SparkListenerApplicationStart(
+      "name",
+      Some("id"),
+      time,
+      "user",
+      Some("attempt"),
+      None))
+
+    // Start a couple of executors.
+    time += 1
+    val execIds = (1 to 3).map(_.toString)
+    execIds.foreach { id =>
+      listener.onExecutorAdded(createExecutorAddedEvent(id, time))
+    }
+
+    // End one of executors.
+    time += 1
+    listener.onExecutorRemoved(createExecutorRemovedEvent(execIds.head, time))
+
+    assert(listener.liveExecutors === execIds.drop(1).toSet)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala
new file mode 100644
index 0000000000000..2da40dccba53e
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import org.apache.spark.{storage, SparkContext, SparkFunSuite, Success, TaskState}
+import org.apache.spark.deploy.history.EventFilter.FilterStatistics
+import org.apache.spark.executor.ExecutorMetrics
+import org.apache.spark.scheduler._
+import org.apache.spark.status.ListenerEventsTestHelper._
+import org.apache.spark.storage.{BlockManagerId, RDDBlockId, StorageLevel}
+
+class BasicEventFilterSuite extends SparkFunSuite {
+  import BasicEventFilterSuite._
+
+  test("filter out events for finished jobs") {
+    // assume finished job 1 with stage 1, tasks (1, 2), rdds (1, 2)
+    // live job 2 with stage 2 with tasks (3, 4) & rdds (3, 4),
+    // and stage 3 with tasks (5, 6) & rdds (5, 6)
+    val liveJobs = Set(2)
+    val liveStages = Set(2, 3)
+    val liveTasks = Set(3L, 4L, 5L, 6L)
+    val liveRDDs = Set(3, 4, 5, 6)
+    val liveExecutors: Set[String] = Set("1", "2")
+    val filterStats = FilterStatistics(
+      // counts finished job 1
+      liveJobs.size + 1,
+      liveJobs.size,
+      // counts finished stage 1 for job 1
+      liveStages.size + 1,
+      liveStages.size,
+      // counts finished tasks (1, 2) for job 1
+      liveTasks.size + 2,
+      liveTasks.size)
+
+    val filter = new BasicEventFilter(filterStats, liveJobs, liveStages, liveTasks, liveRDDs,
+      liveExecutors)
+    val acceptFn = filter.acceptFn().lift
+
+    // Verifying with finished job 1
+    val rddsForStage1 = createRddsWithId(1 to 2)
+    val stage1 = createStage(1, rddsForStage1, Nil)
+    val tasksForStage1 = createTasks(Seq(1L, 2L), liveExecutors.toArray, 0)
+    tasksForStage1.foreach { task => task.markFinished(TaskState.FINISHED, 5) }
+
+    val jobStartEventForJob1 = SparkListenerJobStart(1, 0, Seq(stage1))
+    val jobEndEventForJob1 = SparkListenerJobEnd(1, 0, JobSucceeded)
+    val stageSubmittedEventsForJob1 = SparkListenerStageSubmitted(stage1)
+    val stageCompletedEventsForJob1 = SparkListenerStageCompleted(stage1)
+    val unpersistRDDEventsForJob1 = (1 to 2).map(SparkListenerUnpersistRDD)
+
+    // job events for finished job should be rejected
+    assert(Some(false) === acceptFn(jobStartEventForJob1))
+    assert(Some(false) === acceptFn(jobEndEventForJob1))
+
+    // stage events for finished job should be rejected
+    // NOTE: it doesn't filter out stage events which are also related to the executor
+    assertFilterStageEvents(
+      acceptFn,
+      stageSubmittedEventsForJob1,
+      stageCompletedEventsForJob1,
+      unpersistRDDEventsForJob1,
+      SparkListenerSpeculativeTaskSubmitted(stage1.stageId, stageAttemptId = 1),
+      Some(false))
+
+    // task events for finished job should be rejected
+    assertFilterTaskEvents(acceptFn, tasksForStage1, stage1, Some(false))
+
+    // Verifying with live job 2
+    val rddsForStage2 = createRddsWithId(3 to 4)
+    val stage2 = createStage(2, rddsForStage2, Nil)
+    val tasksForStage2 = createTasks(Seq(3L, 4L), liveExecutors.toArray, 0)
+    tasksForStage1.foreach { task => task.markFinished(TaskState.FINISHED, 5) }
+
+    val jobStartEventForJob2 = SparkListenerJobStart(2, 0, Seq(stage2))
+    val stageSubmittedEventsForJob2 = SparkListenerStageSubmitted(stage2)
+    val stageCompletedEventsForJob2 = SparkListenerStageCompleted(stage2)
+    val unpersistRDDEventsForJob2 = rddsForStage2.map { rdd => SparkListenerUnpersistRDD(rdd.id) }
+
+    // job events for live job should be accepted
+    assert(Some(true) === acceptFn(jobStartEventForJob2))
+
+    // stage events for live job should be accepted
+    assertFilterStageEvents(
+      acceptFn,
+      stageSubmittedEventsForJob2,
+      stageCompletedEventsForJob2,
+      unpersistRDDEventsForJob2,
+      SparkListenerSpeculativeTaskSubmitted(stage2.stageId, stageAttemptId = 1),
+      Some(true))
+
+    // task events for live job should be accepted
+    assertFilterTaskEvents(acceptFn, tasksForStage2, stage2, Some(true))
+  }
+
+  test("accept all events for block manager addition/removal on driver") {
+    val filter = new BasicEventFilter(EMPTY_STATS, Set.empty, Set.empty, Set.empty, Set.empty,
+      Set.empty)
+    val acceptFn = filter.acceptFn().lift
+
+    val bmId = BlockManagerId(SparkContext.DRIVER_IDENTIFIER, "host1", 1)
+    assert(Some(true) === acceptFn(SparkListenerBlockManagerAdded(0, bmId, 1)))
+    assert(Some(true) === acceptFn(SparkListenerBlockManagerRemoved(1, bmId)))
+    assert(Some(true) === acceptFn(SparkListenerBlockUpdated(
+      storage.BlockUpdatedInfo(bmId, RDDBlockId(1, 1), StorageLevel.DISK_ONLY, 0, 10))))
+  }
+
+  test("filter out events for dead executors") {
+    // assume executor 1 was dead, and live executor 2 is available
+    val liveExecutors: Set[String] = Set("2")
+
+    val filter = new BasicEventFilter(EMPTY_STATS, Set.empty, Set.empty, Set.empty, Set.empty,
+      liveExecutors)
+    val acceptFn = filter.acceptFn().lift
+
+    // events for dead executor should be rejected
+    assert(Some(false) === acceptFn(createExecutorAddedEvent(1)))
+    // though the name of event is stage executor metrics, AppStatusListener only deals with
+    // live executors
+    assert(Some(false) === acceptFn(
+      SparkListenerStageExecutorMetrics(1.toString, 0, 0, new ExecutorMetrics)))
+    assert(Some(false) === acceptFn(SparkListenerExecutorBlacklisted(0, 1.toString, 1)))
+    assert(Some(false) === acceptFn(SparkListenerExecutorUnblacklisted(0, 1.toString)))
+    assert(Some(false) === acceptFn(createExecutorRemovedEvent(1)))
+    val bmId = BlockManagerId(1.toString, "host1", 1)
+    assert(Some(false) === acceptFn(SparkListenerBlockManagerAdded(0, bmId, 1)))
+    assert(Some(false) === acceptFn(SparkListenerBlockManagerRemoved(1, bmId)))
+    assert(Some(false) === acceptFn(SparkListenerBlockUpdated(
+      storage.BlockUpdatedInfo(bmId, RDDBlockId(1, 1), StorageLevel.DISK_ONLY, 0, 10))))
+
+    // events for live executor should be accepted
+    assert(Some(true) === acceptFn(createExecutorAddedEvent(2)))
+    assert(Some(true) === acceptFn(
+      SparkListenerStageExecutorMetrics(2.toString, 0, 0, new ExecutorMetrics)))
+    assert(Some(true) === acceptFn(SparkListenerExecutorBlacklisted(0, 2.toString, 1)))
+    assert(Some(true) === acceptFn(SparkListenerExecutorUnblacklisted(0, 2.toString)))
+    assert(Some(true) === acceptFn(createExecutorRemovedEvent(2)))
+    val bmId2 = BlockManagerId(2.toString, "host1", 1)
+    assert(Some(true) === acceptFn(SparkListenerBlockManagerAdded(0, bmId2, 1)))
+    assert(Some(true) === acceptFn(SparkListenerBlockManagerRemoved(1, bmId2)))
+    assert(Some(true) === acceptFn(SparkListenerBlockUpdated(
+      storage.BlockUpdatedInfo(bmId2, RDDBlockId(1, 1), StorageLevel.DISK_ONLY, 0, 10))))
+  }
+
+  test("other events should be left to other filters") {
+    val filter = new BasicEventFilter(EMPTY_STATS, Set.empty, Set.empty, Set.empty, Set.empty,
+      Set.empty)
+    val acceptFn = filter.acceptFn().lift
+
+    assert(None === acceptFn(SparkListenerEnvironmentUpdate(Map.empty)))
+    assert(None === acceptFn(SparkListenerApplicationStart("1", Some("1"), 0, "user", None)))
+    assert(None === acceptFn(SparkListenerApplicationEnd(1)))
+    assert(None === acceptFn(SparkListenerNodeBlacklisted(0, "host1", 1)))
+    assert(None === acceptFn(SparkListenerNodeUnblacklisted(0, "host1")))
+    assert(None === acceptFn(SparkListenerLogStart("testVersion")))
+  }
+
+  private def assertFilterStageEvents(
+      acceptFn: SparkListenerEvent => Option[Boolean],
+      stageSubmitted: SparkListenerStageSubmitted,
+      stageCompleted: SparkListenerStageCompleted,
+      unpersistRDDs: Seq[SparkListenerUnpersistRDD],
+      taskSpeculativeSubmitted: SparkListenerSpeculativeTaskSubmitted,
+      expectedVal: Option[Boolean]): Unit = {
+    assert(acceptFn(stageSubmitted) === expectedVal)
+    assert(acceptFn(stageCompleted) === expectedVal)
+    unpersistRDDs.foreach { event =>
+      assert(acceptFn(event) === expectedVal)
+    }
+    assert(acceptFn(taskSpeculativeSubmitted) === expectedVal)
+  }
+
+  private def assertFilterTaskEvents(
+      acceptFn: SparkListenerEvent => Option[Boolean],
+      taskInfos: Seq[TaskInfo],
+      stageInfo: StageInfo,
+      expectedVal: Option[Boolean]): Unit = {
+    taskInfos.foreach { task =>
+      val taskStartEvent = SparkListenerTaskStart(stageInfo.stageId, 0, task)
+      assert(acceptFn(taskStartEvent) === expectedVal)
+
+      val taskGettingResultEvent = SparkListenerTaskGettingResult(task)
+      assert(acceptFn(taskGettingResultEvent) === expectedVal)
+
+      val taskEndEvent = SparkListenerTaskEnd(stageInfo.stageId, 0, "taskType",
+        Success, task, new ExecutorMetrics, null)
+      assert(acceptFn(taskEndEvent) === expectedVal)
+    }
+  }
+}
+
+object BasicEventFilterSuite {
+  val EMPTY_STATS = FilterStatistics(0, 0, 0, 0, 0, 0)
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
new file mode 100644
index 0000000000000..2a914023ec821
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
@@ -0,0 +1,335 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import scala.collection.mutable
+import scala.io.{Codec, Source}
+
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.json4s.jackson.JsonMethods.parse
+
+import org.apache.spark.{SparkConf, SparkFunSuite, Success}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.history.EventLogTestHelper.writeEventsToRollingWriter
+import org.apache.spark.executor.ExecutorMetrics
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.ExecutorInfo
+import org.apache.spark.status.ListenerEventsTestHelper._
+import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.{JsonProtocol, Utils}
+
+class EventLogFileCompactorSuite extends SparkFunSuite {
+  import EventLogFileCompactorSuite._
+
+  private val sparkConf = new SparkConf()
+  private val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf)
+
+  test("No event log files") {
+    withTempDir { dir =>
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+      val compactor = new EventLogFileCompactor(sparkConf, hadoopConf, fs,
+        TEST_ROLLING_MAX_FILES_TO_RETAIN, TEST_COMPACTION_SCORE_THRESHOLD)
+
+      assertNoCompaction(fs, Seq.empty, compactor.compact(Seq.empty),
+        CompactionResultCode.NOT_ENOUGH_FILES)
+    }
+  }
+
+  test("No compact file, less origin files available than max files to retain") {
+    withTempDir { dir =>
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      val fileStatuses = writeEventsToRollingWriter(fs, "app", dir, sparkConf, hadoopConf,
+        (1 to 2).map(_ => testEvent): _*)
+      val compactor = new EventLogFileCompactor(sparkConf, hadoopConf, fs,
+        TEST_ROLLING_MAX_FILES_TO_RETAIN, TEST_COMPACTION_SCORE_THRESHOLD)
+      assertNoCompaction(fs, fileStatuses, compactor.compact(fileStatuses),
+        CompactionResultCode.NOT_ENOUGH_FILES)
+    }
+  }
+
+  test("No compact file, more origin files available than max files to retain") {
+    withTempDir { dir =>
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      val fileStatuses = writeEventsToRollingWriter(fs, "app", dir, sparkConf, hadoopConf,
+        (1 to 5).map(_ => testEvent): _*)
+      val compactor = new EventLogFileCompactor(sparkConf, hadoopConf, fs,
+        TEST_ROLLING_MAX_FILES_TO_RETAIN, TEST_COMPACTION_SCORE_THRESHOLD)
+      assertCompaction(fs, fileStatuses, compactor.compact(fileStatuses),
+        expectedNumOfFilesCompacted = 2)
+    }
+  }
+
+  test("compact file exists, less origin files available than max files to retain") {
+    withTempDir { dir =>
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      val fileStatuses = writeEventsToRollingWriter(fs, "app", dir, sparkConf, hadoopConf,
+        (1 to 2).map(_ => testEvent): _*)
+
+      val fileToCompact = fileStatuses.head.getPath
+      val compactedPath = new Path(fileToCompact.getParent,
+        fileToCompact.getName + EventLogFileWriter.COMPACTED)
+      assert(fs.rename(fileToCompact, compactedPath))
+
+      val newFileStatuses = Seq(fs.getFileStatus(compactedPath)) ++ fileStatuses.drop(1)
+      val compactor = new EventLogFileCompactor(sparkConf, hadoopConf, fs,
+        TEST_ROLLING_MAX_FILES_TO_RETAIN, TEST_COMPACTION_SCORE_THRESHOLD)
+      assertNoCompaction(fs, newFileStatuses, compactor.compact(newFileStatuses),
+        CompactionResultCode.NOT_ENOUGH_FILES)
+    }
+  }
+
+  test("compact file exists, number of origin files are same as max files to retain") {
+    withTempDir { dir =>
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      val fileStatuses = writeEventsToRollingWriter(fs, "app", dir, sparkConf, hadoopConf,
+        (1 to 4).map(_ => testEvent): _*)
+
+      val fileToCompact = fileStatuses.head.getPath
+      val compactedPath = new Path(fileToCompact.getParent,
+        fileToCompact.getName + EventLogFileWriter.COMPACTED)
+      assert(fs.rename(fileToCompact, compactedPath))
+
+      val newFileStatuses = Seq(fs.getFileStatus(compactedPath)) ++ fileStatuses.drop(1)
+      val compactor = new EventLogFileCompactor(sparkConf, hadoopConf, fs,
+        TEST_ROLLING_MAX_FILES_TO_RETAIN, TEST_COMPACTION_SCORE_THRESHOLD)
+      assertNoCompaction(fs, newFileStatuses, compactor.compact(newFileStatuses),
+        CompactionResultCode.NOT_ENOUGH_FILES)
+    }
+  }
+
+  test("compact file exists, more origin files available than max files to retain") {
+    withTempDir { dir =>
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      val fileStatuses = writeEventsToRollingWriter(fs, "app", dir, sparkConf, hadoopConf,
+        (1 to 10).map(_ => testEvent): _*)
+
+      val fileToCompact = fileStatuses.head.getPath
+      val compactedPath = new Path(fileToCompact.getParent,
+        fileToCompact.getName + EventLogFileWriter.COMPACTED)
+      assert(fs.rename(fileToCompact, compactedPath))
+
+      val newFileStatuses = Seq(fs.getFileStatus(compactedPath)) ++ fileStatuses.drop(1)
+      val compactor = new EventLogFileCompactor(sparkConf, hadoopConf, fs,
+        TEST_ROLLING_MAX_FILES_TO_RETAIN, TEST_COMPACTION_SCORE_THRESHOLD)
+      assertCompaction(fs, newFileStatuses, compactor.compact(newFileStatuses),
+        expectedNumOfFilesCompacted = 7)
+    }
+  }
+
+  test("events for finished job are dropped in new compact file") {
+    withTempDir { dir =>
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      // 1, 2 will be compacted into one file, 3~5 are dummies to ensure max files to retain
+      val fileStatuses = writeEventsToRollingWriter(fs, "app", dir, sparkConf, hadoopConf,
+        Seq(
+          SparkListenerExecutorAdded(0, "exec1", new ExecutorInfo("host1", 1, Map.empty)),
+          SparkListenerJobStart(1, 0, Seq.empty)),
+        Seq(
+          SparkListenerJobEnd(1, 1, JobSucceeded),
+          SparkListenerExecutorAdded(2, "exec2", new ExecutorInfo("host2", 1, Map.empty))),
+        testEvent,
+        testEvent,
+        testEvent)
+
+      val compactor = new EventLogFileCompactor(sparkConf, hadoopConf, fs,
+        TEST_ROLLING_MAX_FILES_TO_RETAIN, TEST_COMPACTION_SCORE_THRESHOLD)
+      assertCompaction(fs, fileStatuses, compactor.compact(fileStatuses),
+        expectedNumOfFilesCompacted = 2)
+
+      val expectCompactFileBasePath = fileStatuses.take(2).last.getPath
+      val compactFilePath = getCompactFilePath(expectCompactFileBasePath)
+      Utils.tryWithResource(EventLogFileReader.openEventLog(compactFilePath, fs)) { is =>
+        val lines = Source.fromInputStream(is)(Codec.UTF8).getLines().toList
+        assert(lines.length === 2, "Compacted file should have only two events being accepted")
+        lines.foreach { line =>
+          val event = JsonProtocol.sparkEventFromJson(parse(line))
+          assert(!event.isInstanceOf[SparkListenerJobStart] &&
+            !event.isInstanceOf[SparkListenerJobEnd])
+        }
+      }
+    }
+  }
+
+  test("Don't compact file if score is lower than threshold") {
+    withTempDir { dir =>
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      // job 1 having 4 tasks
+      val rddsForStage1 = createRddsWithId(1 to 2)
+      val stage1 = createStage(1, rddsForStage1, Nil)
+      val tasks = createTasks(4, Array("exec1"), 0L).map(createTaskStartEvent(_, 1, 0))
+
+      // job 2 having 4 tasks
+      val rddsForStage2 = createRddsWithId(3 to 4)
+      val stage2 = createStage(2, rddsForStage2, Nil)
+      val tasks2 = createTasks(4, Array("exec1"), 0L).map(createTaskStartEvent(_, 2, 0))
+
+      // here job 1 is finished and job 2 is still live, hence half of total tasks are considered
+      // as live
+      val fileStatuses = writeEventsToRollingWriter(fs, "app", dir, sparkConf, hadoopConf,
+        Seq(SparkListenerJobStart(1, 0, Seq(stage1)), SparkListenerStageSubmitted(stage1)),
+        tasks,
+        Seq(SparkListenerJobStart(2, 0, Seq(stage2)), SparkListenerStageSubmitted(stage2)),
+        tasks2,
+        Seq(SparkListenerJobEnd(1, 0, JobSucceeded)),
+        testEvent,
+        testEvent,
+        testEvent)
+
+      val compactor = new EventLogFileCompactor(sparkConf, hadoopConf, fs,
+        TEST_ROLLING_MAX_FILES_TO_RETAIN, 0.7d)
+      assertNoCompaction(fs, fileStatuses, compactor.compact(fileStatuses),
+        CompactionResultCode.LOW_SCORE_FOR_COMPACTION)
+    }
+  }
+
+  test("rewrite files with test filters") {
+    class TestEventFilter1 extends EventFilter {
+      override def acceptFn(): PartialFunction[SparkListenerEvent, Boolean] = {
+        case _: SparkListenerApplicationEnd => true
+        case _: SparkListenerBlockManagerAdded => true
+        case _: SparkListenerApplicationStart => false
+      }
+
+      override def statistics(): Option[EventFilter.FilterStatistics] = None
+    }
+
+    class TestEventFilter2 extends EventFilter {
+      override def acceptFn(): PartialFunction[SparkListenerEvent, Boolean] = {
+        case _: SparkListenerApplicationEnd => true
+        case _: SparkListenerEnvironmentUpdate => true
+        case _: SparkListenerNodeBlacklisted => true
+        case _: SparkListenerBlockManagerAdded => false
+        case _: SparkListenerApplicationStart => false
+        case _: SparkListenerNodeUnblacklisted => false
+      }
+
+      override def statistics(): Option[EventFilter.FilterStatistics] = None
+    }
+
+    def writeEventToWriter(writer: EventLogFileWriter, event: SparkListenerEvent): String = {
+      val line = EventLogTestHelper.convertEvent(event)
+      writer.writeEvent(line, flushLogger = true)
+      line
+    }
+
+    withTempDir { tempDir =>
+      val sparkConf = new SparkConf
+      val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf)
+      val fs = new Path(tempDir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      val writer = new SingleEventLogFileWriter("app", None, tempDir.toURI, sparkConf, hadoopConf)
+      writer.start()
+
+      val expectedLines = new mutable.ArrayBuffer[String]
+
+      // filterApplicationEnd: Some(true) & Some(true) => filter in
+      expectedLines += writeEventToWriter(writer, SparkListenerApplicationEnd(0))
+
+      // filterBlockManagerAdded: Some(true) & Some(false) => filter in
+      expectedLines += writeEventToWriter(writer, SparkListenerBlockManagerAdded(
+        0, BlockManagerId("1", "host1", 1), 10))
+
+      // filterApplicationStart: Some(false) & Some(false) => filter out
+      writeEventToWriter(writer, SparkListenerApplicationStart("app", None, 0, "user", None))
+
+      // filterNodeBlacklisted: None & Some(true) => filter in
+      expectedLines += writeEventToWriter(writer, SparkListenerNodeBlacklisted(0, "host1", 1))
+
+      // filterNodeUnblacklisted: None & Some(false) => filter out
+      writeEventToWriter(writer, SparkListenerNodeUnblacklisted(0, "host1"))
+
+      // other events: None & None => filter in
+      expectedLines += writeEventToWriter(writer, SparkListenerUnpersistRDD(0))
+
+      writer.stop()
+
+      val filters = Seq(new TestEventFilter1, new TestEventFilter2)
+
+      val logPath = new Path(writer.logPath)
+      val compactor = new EventLogFileCompactor(sparkConf, hadoopConf, fs,
+        TEST_ROLLING_MAX_FILES_TO_RETAIN, TEST_COMPACTION_SCORE_THRESHOLD)
+      val newPath = compactor.rewrite(filters, Seq(fs.getFileStatus(logPath)))
+      assert(new Path(newPath).getName === logPath.getName + EventLogFileWriter.COMPACTED)
+
+      Utils.tryWithResource(EventLogFileReader.openEventLog(new Path(newPath), fs)) { is =>
+        val lines = Source.fromInputStream(is)(Codec.UTF8).getLines()
+        var linesLength = 0
+        lines.foreach { line =>
+          linesLength += 1
+          assert(expectedLines.contains(line))
+        }
+        assert(linesLength === expectedLines.length)
+      }
+    }
+  }
+
+  private def assertCompaction(
+      fs: FileSystem,
+      originalFiles: Seq[FileStatus],
+      compactRet: CompactionResult,
+      expectedNumOfFilesCompacted: Int): Unit = {
+    assert(CompactionResultCode.SUCCESS === compactRet.code)
+
+    val expectRetainedFiles = originalFiles.drop(expectedNumOfFilesCompacted)
+    expectRetainedFiles.foreach { status => assert(fs.exists(status.getPath)) }
+
+    val expectRemovedFiles = originalFiles.take(expectedNumOfFilesCompacted)
+    expectRemovedFiles.foreach { status => assert(!fs.exists(status.getPath)) }
+
+    val expectCompactFileBasePath = originalFiles.take(expectedNumOfFilesCompacted).last.getPath
+    val expectCompactFileIndex = RollingEventLogFilesWriter.getEventLogFileIndex(
+      expectCompactFileBasePath.getName)
+    assert(Some(expectCompactFileIndex) === compactRet.compactIndex)
+
+    val expectCompactFilePath = getCompactFilePath(expectCompactFileBasePath)
+    assert(fs.exists(expectCompactFilePath))
+  }
+
+  private def getCompactFilePath(expectCompactFileBasePath: Path): Path = {
+    new Path(expectCompactFileBasePath.getParent,
+      expectCompactFileBasePath.getName + EventLogFileWriter.COMPACTED)
+  }
+
+  private def assertNoCompaction(
+      fs: FileSystem,
+      originalFiles: Seq[FileStatus],
+      compactRet: CompactionResult,
+      expectedCompactRet: CompactionResultCode.Value): Unit = {
+    assert(expectedCompactRet === compactRet.code)
+    assert(None === compactRet.compactIndex)
+    originalFiles.foreach { status => assert(fs.exists(status.getPath)) }
+  }
+
+  private def testEvent: Seq[SparkListenerEvent] =
+    Seq(SparkListenerApplicationStart("app", Some("app"), 0, "user", None))
+}
+
+object EventLogFileCompactorSuite {
+  val TEST_ROLLING_MAX_FILES_TO_RETAIN = 3
+
+  // To simplify the tests, we set the score threshold as 0.0d.
+  // Individual test can use the other value to verify the functionality.
+  val TEST_COMPACTION_SCORE_THRESHOLD = 0.0d
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala
new file mode 100644
index 0000000000000..8eab2da1a37b7
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
+import java.net.URI
+import java.nio.charset.StandardCharsets
+import java.util.zip.{ZipInputStream, ZipOutputStream}
+
+import com.google.common.io.{ByteStreams, Files}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.history.EventLogTestHelper._
+import org.apache.spark.deploy.history.RollingEventLogFilesWriter._
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.util.Utils
+
+
+abstract class EventLogFileReadersSuite extends SparkFunSuite with LocalSparkContext
+  with BeforeAndAfter with Logging {
+
+  protected val fileSystem = Utils.getHadoopFileSystem("/", SparkHadoopUtil.get.conf)
+  protected var testDir: File = _
+  protected var testDirPath: Path = _
+
+  before {
+    testDir = Utils.createTempDir(namePrefix = s"event log")
+    testDirPath = new Path(testDir.getAbsolutePath())
+  }
+
+  after {
+    Utils.deleteRecursively(testDir)
+  }
+
+  test("Retrieve EventLogFileReader correctly") {
+    def assertInstanceOfEventLogReader(
+        expectedClazz: Option[Class[_ <: EventLogFileReader]],
+        actual: Option[EventLogFileReader]): Unit = {
+      if (expectedClazz.isEmpty) {
+        assert(actual.isEmpty, s"Expected no EventLogFileReader instance but was " +
+          s"${actual.map(_.getClass).getOrElse("<None>")}")
+      } else {
+        assert(actual.isDefined, s"Expected an EventLogFileReader instance but was empty")
+        assert(expectedClazz.get.isAssignableFrom(actual.get.getClass),
+          s"Expected ${expectedClazz.get} but was ${actual.get.getClass}")
+      }
+    }
+
+    def testCreateEventLogReaderWithPath(
+        path: Path,
+        isFile: Boolean,
+        expectedClazz: Option[Class[_ <: EventLogFileReader]]): Unit = {
+      if (isFile) {
+        Utils.tryWithResource(fileSystem.create(path)) { is =>
+          is.writeInt(10)
+        }
+      } else {
+        fileSystem.mkdirs(path)
+      }
+
+      val reader = EventLogFileReader(fileSystem, path)
+      assertInstanceOfEventLogReader(expectedClazz, reader)
+      val reader2 = EventLogFileReader(fileSystem,
+        fileSystem.getFileStatus(path))
+      assertInstanceOfEventLogReader(expectedClazz, reader2)
+    }
+
+    // path with no last index - single event log
+    val reader1 = EventLogFileReader(fileSystem, new Path(testDirPath, "aaa"),
+      None)
+    assertInstanceOfEventLogReader(Some(classOf[SingleFileEventLogFileReader]), Some(reader1))
+
+    // path with last index - rolling event log
+    val reader2 = EventLogFileReader(fileSystem,
+      new Path(testDirPath, s"${EVENT_LOG_DIR_NAME_PREFIX}aaa"), Some(3))
+    assertInstanceOfEventLogReader(Some(classOf[RollingEventLogFilesFileReader]), Some(reader2))
+
+    // path - file (both path and FileStatus)
+    val eventLogFile = new Path(testDirPath, "bbb")
+    testCreateEventLogReaderWithPath(eventLogFile, isFile = true,
+      Some(classOf[SingleFileEventLogFileReader]))
+
+    // path - file starting with "."
+    val invalidEventLogFile = new Path(testDirPath, ".bbb")
+    testCreateEventLogReaderWithPath(invalidEventLogFile, isFile = true, None)
+
+    // path - directory with "eventlog_v2_" prefix
+    val eventLogDir = new Path(testDirPath, s"${EVENT_LOG_DIR_NAME_PREFIX}ccc")
+    testCreateEventLogReaderWithPath(eventLogDir, isFile = false,
+      Some(classOf[RollingEventLogFilesFileReader]))
+
+    // path - directory with no "eventlog_v2_" prefix
+    val invalidEventLogDir = new Path(testDirPath, "ccc")
+    testCreateEventLogReaderWithPath(invalidEventLogDir, isFile = false, None)
+  }
+
+  val allCodecs = Seq(None) ++
+    CompressionCodec.ALL_COMPRESSION_CODECS.map { c => Some(CompressionCodec.getShortName(c)) }
+
+  allCodecs.foreach { codecShortName =>
+    test(s"get information, list event log files, zip log files - with codec $codecShortName") {
+      val appId = getUniqueApplicationId
+      val attemptId = None
+
+      val conf = getLoggingConf(testDirPath, codecShortName)
+      val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+
+      val writer = createWriter(appId, attemptId, testDirPath.toUri, conf, hadoopConf)
+      writer.start()
+
+      // The test for writing events into EventLogFileWriter is covered to its own test suite.
+      val dummyData = Seq("dummy1", "dummy2", "dummy3")
+      dummyData.foreach(writer.writeEvent(_, flushLogger = true))
+
+      val logPathIncompleted = getCurrentLogPath(writer.logPath, isCompleted = false)
+      val readerOpt = EventLogFileReader(fileSystem, new Path(logPathIncompleted))
+      assertAppropriateReader(readerOpt)
+      val reader = readerOpt.get
+
+      verifyReader(reader, new Path(logPathIncompleted), codecShortName, isCompleted = false)
+
+      writer.stop()
+
+      val logPathCompleted = getCurrentLogPath(writer.logPath, isCompleted = true)
+      val readerOpt2 = EventLogFileReader(fileSystem, new Path(logPathCompleted))
+      assertAppropriateReader(readerOpt2)
+      val reader2 = readerOpt2.get
+
+      verifyReader(reader2, new Path(logPathCompleted), codecShortName, isCompleted = true)
+    }
+  }
+
+  protected def createWriter(
+      appId: String,
+      appAttemptId : Option[String],
+      logBaseDir: URI,
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): EventLogFileWriter
+
+  protected def getCurrentLogPath(logPath: String, isCompleted: Boolean): String
+
+  protected def assertAppropriateReader(actualReader: Option[EventLogFileReader]): Unit
+
+  protected def verifyReader(
+      reader: EventLogFileReader,
+      logPath: Path,
+      compressionCodecShortName: Option[String],
+      isCompleted: Boolean): Unit
+}
+
+class SingleFileEventLogFileReaderSuite extends EventLogFileReadersSuite {
+  override protected def createWriter(
+      appId: String,
+      appAttemptId: Option[String],
+      logBaseDir: URI,
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): EventLogFileWriter = {
+    new SingleEventLogFileWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf)
+  }
+
+  override protected def assertAppropriateReader(actualReader: Option[EventLogFileReader]): Unit = {
+    assert(actualReader.isDefined, s"Expected an EventLogReader instance but was empty")
+    assert(actualReader.get.isInstanceOf[SingleFileEventLogFileReader],
+      s"Expected SingleFileEventLogReader but was ${actualReader.get.getClass}")
+  }
+
+  override protected def getCurrentLogPath(logPath: String, isCompleted: Boolean): String = {
+    if (!isCompleted) logPath + EventLogFileWriter.IN_PROGRESS else logPath
+  }
+
+  override protected def verifyReader(
+      reader: EventLogFileReader,
+      logPath: Path,
+      compressionCodecShortName: Option[String],
+      isCompleted: Boolean): Unit = {
+    val status = fileSystem.getFileStatus(logPath)
+
+    assert(status.isFile)
+    assert(reader.rootPath === fileSystem.makeQualified(logPath))
+    assert(reader.lastIndex.isEmpty)
+    assert(reader.fileSizeForLastIndex === status.getLen)
+    assert(reader.completed === isCompleted)
+    assert(reader.modificationTime === status.getModificationTime)
+    assert(reader.listEventLogFiles.length === 1)
+    assert(reader.listEventLogFiles.map(_.getPath.toUri.getPath) ===
+      Seq(logPath.toUri.getPath))
+    assert(reader.compressionCodec === compressionCodecShortName)
+    assert(reader.totalSize === status.getLen)
+
+    val underlyingStream = new ByteArrayOutputStream()
+    Utils.tryWithResource(new ZipOutputStream(underlyingStream)) { os =>
+      reader.zipEventLogFiles(os)
+    }
+
+    Utils.tryWithResource(new ZipInputStream(
+        new ByteArrayInputStream(underlyingStream.toByteArray))) { is =>
+
+      var entry = is.getNextEntry
+      assert(entry != null)
+      val actual = new String(ByteStreams.toByteArray(is), StandardCharsets.UTF_8)
+      val expected = Files.toString(new File(logPath.toString), StandardCharsets.UTF_8)
+      assert(actual === expected)
+      assert(is.getNextEntry === null)
+    }
+  }
+}
+
+class RollingEventLogFilesReaderSuite extends EventLogFileReadersSuite {
+  allCodecs.foreach { codecShortName =>
+    test(s"rolling event log files - codec $codecShortName") {
+      val appId = getUniqueApplicationId
+      val attemptId = None
+
+      val conf = getLoggingConf(testDirPath, codecShortName)
+      conf.set(EVENT_LOG_ENABLE_ROLLING, true)
+      conf.set(EVENT_LOG_ROLLING_MAX_FILE_SIZE.key, "10m")
+
+      val writer = createWriter(appId, attemptId, testDirPath.toUri, conf,
+        SparkHadoopUtil.get.newConfiguration(conf))
+
+      writer.start()
+
+      // write log more than 20m (intended to roll over to 3 files)
+      val dummyStr = "dummy" * 1024
+      writeTestEvents(writer, dummyStr, 1024 * 1024 * 20)
+
+      val logPathIncompleted = getCurrentLogPath(writer.logPath, isCompleted = false)
+      val readerOpt = EventLogFileReader(fileSystem,
+        new Path(logPathIncompleted))
+      verifyReader(readerOpt.get, new Path(logPathIncompleted), codecShortName, isCompleted = false)
+      assert(readerOpt.get.listEventLogFiles.length === 3)
+
+      writer.stop()
+
+      val logPathCompleted = getCurrentLogPath(writer.logPath, isCompleted = true)
+      val readerOpt2 = EventLogFileReader(fileSystem, new Path(logPathCompleted))
+      verifyReader(readerOpt2.get, new Path(logPathCompleted), codecShortName, isCompleted = true)
+      assert(readerOpt2.get.listEventLogFiles.length === 3)
+    }
+  }
+
+  override protected def createWriter(
+      appId: String,
+      appAttemptId: Option[String],
+      logBaseDir: URI,
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): EventLogFileWriter = {
+    new RollingEventLogFilesWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf)
+  }
+
+  override protected def assertAppropriateReader(actualReader: Option[EventLogFileReader]): Unit = {
+    assert(actualReader.isDefined, s"Expected an EventLogReader instance but was empty")
+    assert(actualReader.get.isInstanceOf[RollingEventLogFilesFileReader],
+      s"Expected RollingEventLogFilesReader but was ${actualReader.get.getClass}")
+  }
+
+  override protected def getCurrentLogPath(logPath: String, isCompleted: Boolean): String = logPath
+
+  override protected def verifyReader(
+      reader: EventLogFileReader,
+      logPath: Path,
+      compressionCodecShortName: Option[String],
+      isCompleted: Boolean): Unit = {
+    import RollingEventLogFilesWriter._
+
+    val status = fileSystem.getFileStatus(logPath)
+    assert(status.isDirectory)
+
+    val statusInDir = fileSystem.listStatus(logPath)
+    val eventFiles = statusInDir.filter(isEventLogFile).sortBy { s =>
+      getEventLogFileIndex(s.getPath.getName)
+    }
+    assert(eventFiles.nonEmpty)
+    val lastEventFile = eventFiles.last
+    val allLen = eventFiles.map(_.getLen).sum
+
+    assert(reader.rootPath === fileSystem.makeQualified(logPath))
+    assert(reader.lastIndex === Some(getEventLogFileIndex(lastEventFile.getPath.getName)))
+    assert(reader.fileSizeForLastIndex === lastEventFile.getLen)
+    assert(reader.completed === isCompleted)
+    assert(reader.modificationTime === lastEventFile.getModificationTime)
+    assert(reader.listEventLogFiles.length === eventFiles.length)
+    assert(reader.listEventLogFiles.map(_.getPath) === eventFiles.map(_.getPath))
+    assert(reader.compressionCodec === compressionCodecShortName)
+    assert(reader.totalSize === allLen)
+
+    val underlyingStream = new ByteArrayOutputStream()
+    Utils.tryWithResource(new ZipOutputStream(underlyingStream)) { os =>
+      reader.zipEventLogFiles(os)
+    }
+
+    Utils.tryWithResource(new ZipInputStream(
+      new ByteArrayInputStream(underlyingStream.toByteArray))) { is =>
+
+      val entry = is.getNextEntry
+      assert(entry != null)
+
+      // directory
+      assert(entry.getName === logPath.getName + "/")
+
+      val allFileNames = fileSystem.listStatus(logPath).map(_.getPath.getName).toSet
+
+      var count = 0
+      var noMoreEntry = false
+      while (!noMoreEntry) {
+        val entry = is.getNextEntry
+        if (entry == null) {
+          noMoreEntry = true
+        } else {
+          count += 1
+
+          assert(entry.getName.startsWith(logPath.getName + "/"))
+          val fileName = entry.getName.stripPrefix(logPath.getName + "/")
+          assert(allFileNames.contains(fileName))
+
+          val actual = new String(ByteStreams.toByteArray(is), StandardCharsets.UTF_8)
+          val expected = Files.toString(new File(logPath.toString, fileName),
+            StandardCharsets.UTF_8)
+          assert(actual === expected)
+        }
+      }
+
+      assert(count === allFileNames.size)
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
new file mode 100644
index 0000000000000..060b878fb8ef2
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
@@ -0,0 +1,378 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.{File, FileOutputStream, IOException}
+import java.net.URI
+
+import scala.collection.mutable
+import scala.io.Source
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.history.EventLogTestHelper._
+import org.apache.spark.internal.config._
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.util.Utils
+
+
+abstract class EventLogFileWritersSuite extends SparkFunSuite with LocalSparkContext
+  with BeforeAndAfter {
+
+  protected val fileSystem = Utils.getHadoopFileSystem("/",
+    SparkHadoopUtil.get.newConfiguration(new SparkConf()))
+  protected var testDir: File = _
+  protected var testDirPath: Path = _
+
+  before {
+    testDir = Utils.createTempDir(namePrefix = s"event log")
+    testDirPath = new Path(testDir.getAbsolutePath())
+  }
+
+  after {
+    Utils.deleteRecursively(testDir)
+  }
+
+  test("create EventLogFileWriter with enable/disable rolling") {
+    def buildWriterAndVerify(conf: SparkConf, expectedClazz: Class[_]): Unit = {
+      val writer = EventLogFileWriter(
+        getUniqueApplicationId, None, testDirPath.toUri, conf,
+        SparkHadoopUtil.get.newConfiguration(conf))
+      val writerClazz = writer.getClass
+      assert(expectedClazz === writerClazz)
+    }
+
+    val conf = new SparkConf
+    conf.set(EVENT_LOG_ENABLED, true)
+    conf.set(EVENT_LOG_DIR, testDir.toString)
+
+    // default config
+    buildWriterAndVerify(conf, classOf[SingleEventLogFileWriter])
+
+    conf.set(EVENT_LOG_ENABLE_ROLLING, true)
+    buildWriterAndVerify(conf, classOf[RollingEventLogFilesWriter])
+
+    conf.set(EVENT_LOG_ENABLE_ROLLING, false)
+    buildWriterAndVerify(conf, classOf[SingleEventLogFileWriter])
+  }
+
+  val allCodecs = Seq(None) ++
+    CompressionCodec.ALL_COMPRESSION_CODECS.map(c => Some(CompressionCodec.getShortName(c)))
+
+  allCodecs.foreach { codecShortName =>
+    test(s"initialize, write, stop - with codec $codecShortName") {
+      val appId = getUniqueApplicationId
+      val attemptId = None
+
+      val conf = getLoggingConf(testDirPath, codecShortName)
+      val writer = createWriter(appId, attemptId, testDirPath.toUri, conf,
+        SparkHadoopUtil.get.newConfiguration(conf))
+
+      writer.start()
+
+      // snappy stream throws exception on empty stream, so we should provide some data to test.
+      val dummyData = Seq("dummy1", "dummy2", "dummy3")
+      dummyData.foreach(writer.writeEvent(_, flushLogger = true))
+
+      writer.stop()
+
+      verifyWriteEventLogFile(appId, attemptId, testDirPath.toUri, codecShortName, dummyData)
+    }
+  }
+
+  test("spark.eventLog.compression.codec overrides spark.io.compression.codec") {
+    val conf = new SparkConf
+    conf.set(EVENT_LOG_COMPRESS, true)
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+
+    val appId = "test"
+    val appAttemptId = None
+
+    // The default value is `spark.io.compression.codec`.
+    val writer = createWriter(appId, appAttemptId, testDirPath.toUri, conf, hadoopConf)
+    assert(writer.compressionCodecName.contains("lz4"))
+
+    // `spark.eventLog.compression.codec` overrides `spark.io.compression.codec`.
+    conf.set(EVENT_LOG_COMPRESSION_CODEC, "zstd")
+    val writer2 = createWriter(appId, appAttemptId, testDirPath.toUri, conf, hadoopConf)
+    assert(writer2.compressionCodecName.contains("zstd"))
+  }
+
+  protected def readLinesFromEventLogFile(log: Path, fs: FileSystem): List[String] = {
+    val logDataStream = EventLogFileReader.openEventLog(log, fs)
+    try {
+      Source.fromInputStream(logDataStream).getLines().toList
+    } finally {
+      logDataStream.close()
+    }
+  }
+
+  protected def createWriter(
+      appId: String,
+      appAttemptId : Option[String],
+      logBaseDir: URI,
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): EventLogFileWriter
+
+  /**
+   * This should be called with "closed" event log file; No guarantee on reading event log file
+   * which is being written, especially the file is compressed. SHS also does the best it can.
+   */
+  protected def verifyWriteEventLogFile(
+      appId: String,
+      appAttemptId : Option[String],
+      logBaseDir: URI,
+      compressionCodecShortName: Option[String],
+      expectedLines: Seq[String] = Seq.empty): Unit
+}
+
+class SingleEventLogFileWriterSuite extends EventLogFileWritersSuite {
+
+  test("Log overwriting") {
+    val appId = "test"
+    val appAttemptId = None
+    val logUri = SingleEventLogFileWriter.getLogPath(testDir.toURI, appId, appAttemptId)
+
+    val conf = getLoggingConf(testDirPath)
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+    val writer = createWriter(appId, appAttemptId, testDir.toURI, conf, hadoopConf)
+
+    val logPath = new Path(logUri).toUri.getPath
+    writer.start()
+
+    val dummyData = Seq("dummy1", "dummy2", "dummy3")
+    dummyData.foreach(writer.writeEvent(_, flushLogger = true))
+
+    // Create file before writing the event log
+    new FileOutputStream(new File(logPath)).close()
+    // Expected IOException, since we haven't enabled log overwrite.
+    intercept[IOException] { writer.stop() }
+
+    // Try again, but enable overwriting.
+    conf.set(EVENT_LOG_OVERWRITE, true)
+    val writer2 = createWriter(appId, appAttemptId, testDir.toURI, conf, hadoopConf)
+    writer2.start()
+    dummyData.foreach(writer2.writeEvent(_, flushLogger = true))
+    writer2.stop()
+  }
+
+  test("Event log name") {
+    val baseDirUri = Utils.resolveURI("/base-dir")
+    // without compression
+    assert(s"${baseDirUri.toString}/app1" === SingleEventLogFileWriter.getLogPath(
+      baseDirUri, "app1", None, None))
+    // with compression
+    assert(s"${baseDirUri.toString}/app1.lzf" ===
+      SingleEventLogFileWriter.getLogPath(baseDirUri, "app1", None, Some("lzf")))
+    // illegal characters in app ID
+    assert(s"${baseDirUri.toString}/a-fine-mind_dollar_bills__1" ===
+      SingleEventLogFileWriter.getLogPath(baseDirUri,
+        "a fine:mind$dollar{bills}.1", None, None))
+    // illegal characters in app ID with compression
+    assert(s"${baseDirUri.toString}/a-fine-mind_dollar_bills__1.lz4" ===
+      SingleEventLogFileWriter.getLogPath(baseDirUri,
+        "a fine:mind$dollar{bills}.1", None, Some("lz4")))
+  }
+
+  override protected def createWriter(
+      appId: String,
+      appAttemptId: Option[String],
+      logBaseDir: URI,
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): EventLogFileWriter = {
+    new SingleEventLogFileWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf)
+  }
+
+  override protected def verifyWriteEventLogFile(
+      appId: String,
+      appAttemptId: Option[String],
+      logBaseDir: URI,
+      compressionCodecShortName: Option[String],
+      expectedLines: Seq[String]): Unit = {
+    // read single event log file
+    val logPath = SingleEventLogFileWriter.getLogPath(logBaseDir, appId, appAttemptId,
+      compressionCodecShortName)
+
+    val finalLogPath = new Path(logPath)
+    assert(fileSystem.exists(finalLogPath) && fileSystem.isFile(finalLogPath))
+    assert(expectedLines === readLinesFromEventLogFile(finalLogPath, fileSystem))
+  }
+}
+
+class RollingEventLogFilesWriterSuite extends EventLogFileWritersSuite {
+  import RollingEventLogFilesWriter._
+
+  test("Event log names") {
+    val baseDirUri = Utils.resolveURI("/base-dir")
+    val appId = "app1"
+    val appAttemptId = None
+
+    // happy case with app ID
+    val logDir = RollingEventLogFilesWriter.getAppEventLogDirPath(baseDirUri, appId, None)
+    assert(s"${baseDirUri.toString}/${EVENT_LOG_DIR_NAME_PREFIX}${appId}" === logDir.toString)
+
+    // appstatus: inprogress or completed
+    assert(s"$logDir/${APPSTATUS_FILE_NAME_PREFIX}${appId}${EventLogFileWriter.IN_PROGRESS}" ===
+      RollingEventLogFilesWriter.getAppStatusFilePath(logDir, appId, appAttemptId,
+        inProgress = true).toString)
+    assert(s"$logDir/${APPSTATUS_FILE_NAME_PREFIX}${appId}" ===
+      RollingEventLogFilesWriter.getAppStatusFilePath(logDir, appId, appAttemptId,
+        inProgress = false).toString)
+
+    // without compression
+    assert(s"$logDir/${EVENT_LOG_FILE_NAME_PREFIX}1_${appId}" ===
+      RollingEventLogFilesWriter.getEventLogFilePath(logDir, appId, appAttemptId, 1, None).toString)
+
+    // with compression
+    assert(s"$logDir/${EVENT_LOG_FILE_NAME_PREFIX}1_${appId}.lzf" ===
+      RollingEventLogFilesWriter.getEventLogFilePath(logDir, appId, appAttemptId,
+        1, Some("lzf")).toString)
+
+    // illegal characters in app ID
+    assert(s"${baseDirUri.toString}/${EVENT_LOG_DIR_NAME_PREFIX}a-fine-mind_dollar_bills__1" ===
+      RollingEventLogFilesWriter.getAppEventLogDirPath(baseDirUri,
+        "a fine:mind$dollar{bills}.1", None).toString)
+  }
+
+  test("Log overwriting") {
+    val appId = "test"
+    val appAttemptId = None
+    val logDirPath = RollingEventLogFilesWriter.getAppEventLogDirPath(testDir.toURI, appId,
+      appAttemptId)
+
+    val conf = getLoggingConf(testDirPath)
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+    val writer = createWriter(appId, appAttemptId, testDir.toURI, conf, hadoopConf)
+
+    val logPath = logDirPath.toUri.getPath
+
+    // Create file before writing the event log directory
+    // it doesn't matter whether the existing one is file or directory
+    new FileOutputStream(new File(logPath)).close()
+
+    // Expected IOException, since we haven't enabled log overwrite.
+    // Note that the place IOException is thrown is different from single event log file.
+    intercept[IOException] { writer.start() }
+
+    // Try again, but enable overwriting.
+    conf.set(EVENT_LOG_OVERWRITE, true)
+
+    val writer2 = createWriter(appId, appAttemptId, testDir.toURI, conf, hadoopConf)
+    writer2.start()
+    val dummyData = Seq("dummy1", "dummy2", "dummy3")
+    dummyData.foreach(writer2.writeEvent(_, flushLogger = true))
+    writer2.stop()
+  }
+
+  allCodecs.foreach { codecShortName =>
+    test(s"rolling event log files - codec $codecShortName") {
+      def assertEventLogFilesIndex(
+          eventLogFiles: Seq[FileStatus],
+          expectedLastIndex: Int,
+          expectedMaxSizeBytes: Long): Unit = {
+        assert(eventLogFiles.forall(f => f.getLen <= expectedMaxSizeBytes))
+        assert((1 to expectedLastIndex) ===
+          eventLogFiles.map(f => getEventLogFileIndex(f.getPath.getName)))
+      }
+
+      val appId = getUniqueApplicationId
+      val attemptId = None
+
+      val conf = getLoggingConf(testDirPath, codecShortName)
+      conf.set(EVENT_LOG_ENABLE_ROLLING, true)
+      conf.set(EVENT_LOG_ROLLING_MAX_FILE_SIZE.key, "10m")
+
+      val writer = createWriter(appId, attemptId, testDirPath.toUri, conf,
+        SparkHadoopUtil.get.newConfiguration(conf))
+
+      writer.start()
+
+      // write log more than 20m (intended to roll over to 3 files)
+      val dummyStr = "dummy" * 1024
+      val expectedLines = writeTestEvents(writer, dummyStr, 1024 * 1024 * 21)
+
+      val logDirPath = getAppEventLogDirPath(testDirPath.toUri, appId, attemptId)
+
+      val eventLogFiles = listEventLogFiles(logDirPath)
+      assertEventLogFilesIndex(eventLogFiles, 3, 1024 * 1024 * 10)
+
+      writer.stop()
+
+      val eventLogFiles2 = listEventLogFiles(logDirPath)
+      assertEventLogFilesIndex(eventLogFiles2, 3, 1024 * 1024 * 10)
+
+      verifyWriteEventLogFile(appId, attemptId, testDirPath.toUri,
+        codecShortName, expectedLines)
+    }
+  }
+
+  test(s"rolling event log files - the max size of event log file size less than lower limit") {
+    val appId = getUniqueApplicationId
+    val attemptId = None
+
+    val conf = getLoggingConf(testDirPath, None)
+    conf.set(EVENT_LOG_ENABLE_ROLLING, true)
+    conf.set(EVENT_LOG_ROLLING_MAX_FILE_SIZE.key, "9m")
+
+    val e = intercept[IllegalArgumentException] {
+      createWriter(appId, attemptId, testDirPath.toUri, conf,
+        SparkHadoopUtil.get.newConfiguration(conf))
+    }
+    assert(e.getMessage.contains("should be configured to be at least"))
+  }
+
+  override protected def createWriter(
+      appId: String,
+      appAttemptId: Option[String],
+      logBaseDir: URI,
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): EventLogFileWriter = {
+    new RollingEventLogFilesWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf)
+  }
+
+  override protected def verifyWriteEventLogFile(
+      appId: String,
+      appAttemptId: Option[String],
+      logBaseDir: URI,
+      compressionCodecShortName: Option[String],
+      expectedLines: Seq[String]): Unit = {
+    val logDirPath = getAppEventLogDirPath(logBaseDir, appId, appAttemptId)
+
+    assert(fileSystem.exists(logDirPath) && fileSystem.isDirectory(logDirPath))
+
+    val appStatusFile = getAppStatusFilePath(logDirPath, appId, appAttemptId, inProgress = false)
+    assert(fileSystem.exists(appStatusFile) && fileSystem.isFile(appStatusFile))
+
+    val eventLogFiles = listEventLogFiles(logDirPath)
+    val allLines = mutable.ArrayBuffer[String]()
+    eventLogFiles.foreach { file =>
+      allLines.appendAll(readLinesFromEventLogFile(file.getPath, fileSystem))
+    }
+
+    assert(expectedLines === allLines)
+  }
+
+  private def listEventLogFiles(logDirPath: Path): Seq[FileStatus] = {
+    fileSystem.listStatus(logDirPath).filter(isEventLogFile)
+      .sortBy { fs => getEventLogFileIndex(fs.getPath.getName) }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
new file mode 100644
index 0000000000000..298fd65f293cb
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
+import org.json4s.jackson.JsonMethods.{compact, render}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.config._
+import org.apache.spark.scheduler._
+import org.apache.spark.util.JsonProtocol
+
+object EventLogTestHelper {
+  def getUniqueApplicationId: String = "test-" + System.currentTimeMillis
+
+  /**
+   * Get a SparkConf with event logging enabled. It doesn't enable rolling event logs, so caller
+   * should set it manually.
+   */
+  def getLoggingConf(logDir: Path, compressionCodec: Option[String] = None): SparkConf = {
+    val conf = new SparkConf
+    conf.set(EVENT_LOG_ENABLED, true)
+    conf.set(EVENT_LOG_BLOCK_UPDATES, true)
+    conf.set(EVENT_LOG_TESTING, true)
+    conf.set(EVENT_LOG_DIR, logDir.toString)
+    compressionCodec.foreach { codec =>
+      conf.set(EVENT_LOG_COMPRESS, true)
+      conf.set(EVENT_LOG_COMPRESSION_CODEC, codec)
+    }
+    conf.set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
+    conf
+  }
+
+  def writeTestEvents(
+      writer: EventLogFileWriter,
+      eventStr: String,
+      desiredSize: Long): Seq[String] = {
+    val stringLen = eventStr.getBytes(StandardCharsets.UTF_8).length
+    val repeatCount = Math.floor(desiredSize / stringLen).toInt
+    (0 until repeatCount).map { _ =>
+      writer.writeEvent(eventStr, flushLogger = true)
+      eventStr
+    }
+  }
+
+  def writeEventLogFile(
+      sparkConf: SparkConf,
+      hadoopConf: Configuration,
+      dir: File,
+      idx: Int,
+      events: Seq[SparkListenerEvent]): String = {
+    // to simplify the code, we don't concern about file name being matched with the naming rule
+    // of event log file
+    val writer = new SingleEventLogFileWriter(s"app$idx", None, dir.toURI, sparkConf, hadoopConf)
+    writer.start()
+    events.foreach { event => writer.writeEvent(convertEvent(event), flushLogger = true) }
+    writer.stop()
+    writer.logPath
+  }
+
+  def writeEventsToRollingWriter(
+      fs: FileSystem,
+      appId: String,
+      dir: File,
+      sparkConf: SparkConf,
+      hadoopConf: Configuration,
+      eventsFiles: Seq[SparkListenerEvent]*): Seq[FileStatus] = {
+    val writer = new RollingEventLogFilesWriter(appId, None, dir.toURI, sparkConf, hadoopConf)
+    writer.start()
+
+    eventsFiles.dropRight(1).foreach { events =>
+      writeEventsToRollingWriter(writer, events, rollFile = true)
+    }
+    eventsFiles.lastOption.foreach { events =>
+      writeEventsToRollingWriter(writer, events, rollFile = false)
+    }
+
+    writer.stop()
+    EventLogFileReader(fs, new Path(writer.logPath)).get.listEventLogFiles
+  }
+
+  def writeEventsToRollingWriter(
+      writer: RollingEventLogFilesWriter,
+      events: Seq[SparkListenerEvent],
+      rollFile: Boolean): Unit = {
+    events.foreach { event => writer.writeEvent(convertEvent(event), flushLogger = true) }
+    if (rollFile) writer.rollEventLogFile()
+  }
+
+  def convertEvent(event: SparkListenerEvent): String = {
+    compact(render(JsonProtocol.sparkEventToJson(event)))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 30261dde678f1..c2f34fc3a95ed 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -37,7 +37,9 @@ import org.mockito.Mockito.{doThrow, mock, spy, verify, when}
 import org.scalatest.Matchers
 import org.scalatest.concurrent.Eventually._
 
-import org.apache.spark.{SecurityManager, SPARK_VERSION, SparkConf, SparkFunSuite}
+import org.apache.spark.{JobExecutionStatus, SecurityManager, SPARK_VERSION, SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.history.EventLogTestHelper._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.DRIVER_LOG_DFS_DIR
 import org.apache.spark.internal.config.History._
@@ -47,12 +49,13 @@ import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.security.GroupMappingServiceProvider
 import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.KVUtils.KVStoreScalaSerializer
 import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
 import org.apache.spark.util.{Clock, JsonProtocol, ManualClock, Utils}
+import org.apache.spark.util.kvstore.InMemoryStore
 import org.apache.spark.util.logging.DriverLogger
 
 class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
-
   private var testDir: File = null
 
   override def beforeEach(): Unit = {
@@ -74,8 +77,8 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
       appAttemptId: Option[String],
       inProgress: Boolean,
       codec: Option[String] = None): File = {
-    val ip = if (inProgress) EventLoggingListener.IN_PROGRESS else ""
-    val logUri = EventLoggingListener.getLogPath(testDir.toURI, appId, appAttemptId, codec)
+    val ip = if (inProgress) EventLogFileWriter.IN_PROGRESS else ""
+    val logUri = SingleEventLogFileWriter.getLogPath(testDir.toURI, appId, appAttemptId, codec)
     val logPath = new Path(logUri).toUri.getPath + ip
     new File(logPath)
   }
@@ -86,7 +89,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     }
   }
 
-  private def testAppLogParsing(inMemory: Boolean) {
+  private def testAppLogParsing(inMemory: Boolean): Unit = {
     val clock = new ManualClock(12345678)
     val conf = createTestConf(inMemory = inMemory)
     val provider = new FsHistoryProvider(conf, clock)
@@ -159,13 +162,14 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     assume(!Utils.isWindows)
 
     class TestFsHistoryProvider extends FsHistoryProvider(createTestConf()) {
-      var mergeApplicationListingCall = 0
-      override protected def mergeApplicationListing(
-          fileStatus: FileStatus,
+      var doMergeApplicationListingCall = 0
+      override private[history] def doMergeApplicationListing(
+          reader: EventLogFileReader,
           lastSeen: Long,
-          enableSkipToEnd: Boolean): Unit = {
-        super.mergeApplicationListing(fileStatus, lastSeen, enableSkipToEnd)
-        mergeApplicationListingCall += 1
+          enableSkipToEnd: Boolean,
+          lastCompactionIndex: Option[Long]): Unit = {
+        super.doMergeApplicationListing(reader, lastSeen, enableSkipToEnd, lastCompactionIndex)
+        doMergeApplicationListingCall += 1
       }
     }
     val provider = new TestFsHistoryProvider
@@ -186,7 +190,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
       list.size should be (1)
     }
 
-    provider.mergeApplicationListingCall should be (1)
+    provider.doMergeApplicationListingCall should be (1)
   }
 
   test("history file is renamed from inprogress to completed") {
@@ -199,13 +203,13 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     )
     updateAndCheck(provider) { list =>
       list.size should be (1)
-      provider.getAttempt("app1", None).logPath should endWith(EventLoggingListener.IN_PROGRESS)
+      provider.getAttempt("app1", None).logPath should endWith(EventLogFileWriter.IN_PROGRESS)
     }
 
     logFile1.renameTo(newLogFile("app1", None, inProgress = false))
     updateAndCheck(provider) { list =>
       list.size should be (1)
-      provider.getAttempt("app1", None).logPath should not endWith(EventLoggingListener.IN_PROGRESS)
+      provider.getAttempt("app1", None).logPath should not endWith(EventLogFileWriter.IN_PROGRESS)
     }
   }
 
@@ -1161,29 +1165,45 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     when(mockedFs.open(path)).thenReturn(in)
     when(in.getWrappedStream).thenReturn(dfsIn)
     when(dfsIn.getFileLength).thenReturn(200)
+
     // FileStatus.getLen is more than logInfo fileSize
     var fileStatus = new FileStatus(200, false, 0, 0, 0, path)
+    when(mockedFs.getFileStatus(path)).thenReturn(fileStatus)
     var logInfo = new LogInfo(path.toString, 0, LogType.EventLogs, Some("appId"),
-      Some("attemptId"), 100)
-    assert(mockedProvider.shouldReloadLog(logInfo, fileStatus))
+      Some("attemptId"), 100, None, None, false)
+    var reader = EventLogFileReader(mockedFs, path)
+    assert(reader.isDefined)
+    assert(mockedProvider.shouldReloadLog(logInfo, reader.get))
 
     fileStatus = new FileStatus()
     fileStatus.setPath(path)
+    when(mockedFs.getFileStatus(path)).thenReturn(fileStatus)
     // DFSInputStream.getFileLength is more than logInfo fileSize
     logInfo = new LogInfo(path.toString, 0, LogType.EventLogs, Some("appId"),
-      Some("attemptId"), 100)
-    assert(mockedProvider.shouldReloadLog(logInfo, fileStatus))
+      Some("attemptId"), 100, None, None, false)
+    reader = EventLogFileReader(mockedFs, path)
+    assert(reader.isDefined)
+    assert(mockedProvider.shouldReloadLog(logInfo, reader.get))
+
     // DFSInputStream.getFileLength is equal to logInfo fileSize
     logInfo = new LogInfo(path.toString, 0, LogType.EventLogs, Some("appId"),
-      Some("attemptId"), 200)
-    assert(!mockedProvider.shouldReloadLog(logInfo, fileStatus))
+      Some("attemptId"), 200, None, None, false)
+    reader = EventLogFileReader(mockedFs, path)
+    assert(reader.isDefined)
+    assert(!mockedProvider.shouldReloadLog(logInfo, reader.get))
+
     // in.getWrappedStream returns other than DFSInputStream
     val bin = mock(classOf[BufferedInputStream])
     when(in.getWrappedStream).thenReturn(bin)
-    assert(!mockedProvider.shouldReloadLog(logInfo, fileStatus))
+    reader = EventLogFileReader(mockedFs, path)
+    assert(reader.isDefined)
+    assert(!mockedProvider.shouldReloadLog(logInfo, reader.get))
+
     // fs.open throws exception
     when(mockedFs.open(path)).thenThrow(new IOException("Throwing intentionally"))
-    assert(!mockedProvider.shouldReloadLog(logInfo, fileStatus))
+    reader = EventLogFileReader(mockedFs, path)
+    assert(reader.isDefined)
+    assert(!mockedProvider.shouldReloadLog(logInfo, reader.get))
   }
 
   test("log cleaner with the maximum number of log files") {
@@ -1236,6 +1256,220 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     }
   }
 
+  test("backwards compatibility with LogInfo from Spark 2.4") {
+    case class LogInfoV24(
+         logPath: String,
+         lastProcessed: Long,
+         appId: Option[String],
+         attemptId: Option[String],
+         fileSize: Long)
+
+    val oldObj = LogInfoV24("dummy", System.currentTimeMillis(), Some("hello"),
+      Some("attempt1"), 100)
+
+    val serializer = new KVStoreScalaSerializer()
+    val serializedOldObj = serializer.serialize(oldObj)
+    val deserializedOldObj = serializer.deserialize(serializedOldObj, classOf[LogInfo])
+    assert(deserializedOldObj.logPath === oldObj.logPath)
+    assert(deserializedOldObj.lastProcessed === oldObj.lastProcessed)
+    assert(deserializedOldObj.appId === oldObj.appId)
+    assert(deserializedOldObj.attemptId === oldObj.attemptId)
+    assert(deserializedOldObj.fileSize === oldObj.fileSize)
+
+    // SPARK-25118: added logType: LogType.Value - expected 'null' on old format
+    assert(deserializedOldObj.logType === null)
+
+    // SPARK-28869: added lastIndex: Option[Long], isComplete: Boolean - expected 'None' and
+    // 'false' on old format. The default value for isComplete is wrong value for completed app,
+    // but the value will be corrected once checkForLogs is called.
+    assert(deserializedOldObj.lastIndex === None)
+    assert(deserializedOldObj.isComplete === false)
+  }
+
+  test("SPARK-29755 LogInfo should be serialized/deserialized by jackson properly") {
+    def assertSerDe(serializer: KVStoreScalaSerializer, info: LogInfo): Unit = {
+      val infoAfterSerDe = serializer.deserialize(serializer.serialize(info), classOf[LogInfo])
+      assert(infoAfterSerDe === info)
+      assertOptionAfterSerde(infoAfterSerDe.lastIndex, info.lastIndex)
+    }
+
+    val serializer = new KVStoreScalaSerializer()
+    val logInfoWithIndexAsNone = LogInfo("dummy", 0, LogType.EventLogs, Some("appId"),
+      Some("attemptId"), 100, None, None, false)
+    assertSerDe(serializer, logInfoWithIndexAsNone)
+
+    val logInfoWithIndex = LogInfo("dummy", 0, LogType.EventLogs, Some("appId"),
+      Some("attemptId"), 100, Some(3), None, false)
+    assertSerDe(serializer, logInfoWithIndex)
+  }
+
+  test("SPARK-29755 AttemptInfoWrapper should be serialized/deserialized by jackson properly") {
+    def assertSerDe(serializer: KVStoreScalaSerializer, attempt: AttemptInfoWrapper): Unit = {
+      val attemptAfterSerDe = serializer.deserialize(serializer.serialize(attempt),
+        classOf[AttemptInfoWrapper])
+      assert(attemptAfterSerDe.info === attempt.info)
+      // skip comparing some fields, as they've not triggered SPARK-29755
+      assertOptionAfterSerde(attemptAfterSerDe.lastIndex, attempt.lastIndex)
+    }
+
+    val serializer = new KVStoreScalaSerializer()
+    val appInfo = new ApplicationAttemptInfo(None, new Date(1), new Date(1), new Date(1),
+      10, "spark", false, "dummy")
+    val attemptInfoWithIndexAsNone = new AttemptInfoWrapper(appInfo, "dummyPath", 10, None,
+      None, None, None, None)
+    assertSerDe(serializer, attemptInfoWithIndexAsNone)
+
+    val attemptInfoWithIndex = new AttemptInfoWrapper(appInfo, "dummyPath", 10, Some(1),
+      None, None, None, None)
+    assertSerDe(serializer, attemptInfoWithIndex)
+  }
+
+  test("SPARK-29043: clean up specified event log") {
+    val clock = new ManualClock()
+    val conf = createTestConf().set(MAX_LOG_AGE_S, 0L).set(CLEANER_ENABLED, true)
+    val provider = new FsHistoryProvider(conf, clock)
+
+    // create an invalid application log file
+    val inValidLogFile = newLogFile("inValidLogFile", None, inProgress = true)
+    inValidLogFile.createNewFile()
+    writeFile(inValidLogFile, None,
+      SparkListenerApplicationStart(inValidLogFile.getName, None, 1L, "test", None))
+    inValidLogFile.setLastModified(clock.getTimeMillis())
+
+    // create a valid application log file
+    val validLogFile = newLogFile("validLogFile", None, inProgress = true)
+    validLogFile.createNewFile()
+    writeFile(validLogFile, None,
+      SparkListenerApplicationStart(validLogFile.getName, Some("local_123"), 1L, "test", None))
+    validLogFile.setLastModified(clock.getTimeMillis())
+
+    provider.checkForLogs()
+    // The invalid application log file would be cleaned by checkAndCleanLog().
+    assert(new File(testDir.toURI).listFiles().size === 1)
+
+    clock.advance(1)
+    // cleanLogs() would clean the valid application log file.
+    provider.cleanLogs()
+    assert(new File(testDir.toURI).listFiles().size === 0)
+  }
+
+  private def assertOptionAfterSerde(opt: Option[Long], expected: Option[Long]): Unit = {
+    if (expected.isEmpty) {
+      assert(opt.isEmpty)
+    } else {
+      // The issue happens only when the value in Option is being unboxed. Here we ensure unboxing
+      // to Long succeeds: even though IDE suggests `.toLong` is redundant, direct comparison
+      // doesn't trigger unboxing and passes even without SPARK-29755, so don't remove
+      // `.toLong` below. Please refer SPARK-29755 for more details.
+      assert(opt.get.toLong === expected.get.toLong)
+    }
+  }
+
+  test("compact event log files") {
+    def verifyEventLogFiles(
+        fs: FileSystem,
+        rootPath: String,
+        expectedIndexForCompact: Option[Long],
+        expectedIndicesForNonCompact: Seq[Long]): Unit = {
+      val reader = EventLogFileReader(fs, new Path(rootPath)).get
+      var logFiles = reader.listEventLogFiles
+
+      expectedIndexForCompact.foreach { idx =>
+        val headFile = logFiles.head
+        assert(EventLogFileWriter.isCompacted(headFile.getPath))
+        assert(idx == RollingEventLogFilesWriter.getEventLogFileIndex(headFile.getPath.getName))
+        logFiles = logFiles.drop(1)
+      }
+
+      assert(logFiles.size === expectedIndicesForNonCompact.size)
+
+      logFiles.foreach { logFile =>
+        assert(RollingEventLogFilesWriter.isEventLogFile(logFile))
+        assert(!EventLogFileWriter.isCompacted(logFile.getPath))
+      }
+
+      val indices = logFiles.map { logFile =>
+        RollingEventLogFilesWriter.getEventLogFileIndex(logFile.getPath.getName)
+      }
+      assert(expectedIndicesForNonCompact === indices)
+    }
+
+    withTempDir { dir =>
+      val conf = createTestConf()
+      conf.set(HISTORY_LOG_DIR, dir.getAbsolutePath)
+      conf.set(EVENT_LOG_ROLLING_MAX_FILES_TO_RETAIN, 1)
+      conf.set(EVENT_LOG_COMPACTION_SCORE_THRESHOLD, 0.0d)
+      val hadoopConf = SparkHadoopUtil.newConfiguration(conf)
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      val provider = new FsHistoryProvider(conf)
+
+      val writer = new RollingEventLogFilesWriter("app", None, dir.toURI, conf, hadoopConf)
+      writer.start()
+
+      // writing event log file 1 - don't compact for now
+      writeEventsToRollingWriter(writer, Seq(
+        SparkListenerApplicationStart("app", Some("app"), 0, "user", None),
+        SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false)
+
+      updateAndCheck(provider) { _ =>
+        verifyEventLogFiles(fs, writer.logPath, None, Seq(1))
+        val info = provider.listing.read(classOf[LogInfo], writer.logPath)
+        assert(info.lastEvaluatedForCompaction === Some(1))
+      }
+
+      // writing event log file 2 - compact the event log file 1 into 1.compact
+      writeEventsToRollingWriter(writer, Seq.empty, rollFile = true)
+      writeEventsToRollingWriter(writer, Seq(SparkListenerUnpersistRDD(1),
+        SparkListenerJobEnd(1, 1, JobSucceeded)), rollFile = false)
+
+      updateAndCheck(provider) { _ =>
+        verifyEventLogFiles(fs, writer.logPath, Some(1), Seq(2))
+        val info = provider.listing.read(classOf[LogInfo], writer.logPath)
+        assert(info.lastEvaluatedForCompaction === Some(2))
+      }
+
+      // writing event log file 3 - compact two files - 1.compact & 2 into one, 2.compact
+      writeEventsToRollingWriter(writer, Seq.empty, rollFile = true)
+      writeEventsToRollingWriter(writer, Seq(
+        SparkListenerExecutorAdded(3, "exec1", new ExecutorInfo("host1", 1, Map.empty)),
+        SparkListenerJobStart(2, 4, Seq.empty),
+        SparkListenerJobEnd(2, 5, JobSucceeded)), rollFile = false)
+
+      writer.stop()
+
+      updateAndCheck(provider) { _ =>
+        verifyEventLogFiles(fs, writer.logPath, Some(2), Seq(3))
+
+        val info = provider.listing.read(classOf[LogInfo], writer.logPath)
+        assert(info.lastEvaluatedForCompaction === Some(3))
+
+        val store = new InMemoryStore
+        val appStore = new AppStatusStore(store)
+
+        val reader = EventLogFileReader(fs, new Path(writer.logPath)).get
+        provider.rebuildAppStore(store, reader, 0L)
+
+        // replayed store doesn't have any job, as events for job are removed while compacting
+        intercept[NoSuchElementException] {
+          appStore.job(1)
+        }
+
+        // but other events should be available even they were in original files to compact
+        val appInfo = appStore.applicationInfo()
+        assert(appInfo.id === "app")
+        assert(appInfo.name === "app")
+
+        // All events in retained file(s) should be available, including events which would have
+        // been filtered out if compaction is applied. e.g. finished jobs, removed executors, etc.
+        val exec1 = appStore.executorSummary("exec1")
+        assert(exec1.hostPort === "host1")
+        val job2 = appStore.job(2)
+        assert(job2.status === JobExecutionStatus.SUCCEEDED)
+      }
+    }
+  }
+
   /**
    * Asks the provider to check for logs and calls a function to perform checks on the updated
    * app list. Example:
@@ -1254,9 +1488,13 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
   private def writeFile(file: File, codec: Option[CompressionCodec],
     events: SparkListenerEvent*) = {
     val fstream = new FileOutputStream(file)
-    val cstream = codec.map(_.compressedOutputStream(fstream)).getOrElse(fstream)
+    val cstream = codec.map(_.compressedContinuousOutputStream(fstream)).getOrElse(fstream)
     val bstream = new BufferedOutputStream(cstream)
-    EventLoggingListener.initEventLog(bstream, false, null)
+
+    val metadata = SparkListenerLogStart(org.apache.spark.SPARK_VERSION)
+    val eventJson = JsonProtocol.logStartToJson(metadata)
+    val metadataJson = compact(eventJson) + "\n"
+    bstream.write(metadataJson.getBytes(StandardCharsets.UTF_8))
 
     val writer = new OutputStreamWriter(bstream, StandardCharsets.UTF_8)
     Utils.tryWithSafeFinally {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index dbc1938ed469a..206db0feb5716 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -26,7 +26,6 @@ import javax.servlet.http.{HttpServletRequest, HttpServletRequestWrapper, HttpSe
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
 
-import com.gargoylesoftware.htmlunit.BrowserVersion
 import com.google.common.io.{ByteStreams, Files}
 import org.apache.commons.io.{FileUtils, IOUtils}
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
@@ -40,8 +39,8 @@ import org.openqa.selenium.WebDriver
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.scalatest.{BeforeAndAfter, Matchers}
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.selenium.WebBrowser
+import org.scalatestplus.mockito.MockitoSugar
+import org.scalatestplus.selenium.WebBrowser
 
 import org.apache.spark._
 import org.apache.spark.internal.config._
@@ -85,7 +84,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       .set(IS_TESTING, true)
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
-      .set(EVENT_LOG_PROCESS_TREE_METRICS, true)
+      .set(EXECUTOR_PROCESS_TREE_METRICS_ENABLED, true)
     conf.setAll(extraConf)
     provider = new FsHistoryProvider(conf)
     provider.checkForLogs()
@@ -94,6 +93,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     server = new HistoryServer(conf, provider, securityManager, 18080)
     server.initialize()
     server.bind()
+    provider.start()
     port = server.boundPort
   }
 
@@ -185,7 +185,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     test(name) {
       val (code, jsonOpt, errOpt) = getContentAndCode(path)
       code should be (HttpServletResponse.SC_OK)
-      jsonOpt should be ('defined)
+      jsonOpt should be (Symbol("defined"))
       errOpt should be (None)
 
       val exp = IOUtils.toString(new FileInputStream(
@@ -364,8 +364,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     contextHandler.addServlet(holder, "/")
     server.attachHandler(contextHandler)
 
-    implicit val webDriver: WebDriver =
-      new HtmlUnitDriver(BrowserVersion.INTERNET_EXPLORER_11, true)
+    implicit val webDriver: WebDriver = new HtmlUnitDriver(true)
 
     try {
       val url = s"http://localhost:$port"
@@ -451,6 +450,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     server = new HistoryServer(myConf, provider, securityManager, 0)
     server.initialize()
     server.bind()
+    provider.start()
     val port = server.boundPort
     val metrics = server.cacheMetrics
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 9ce046a2e2f50..0cf573c2490b3 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.deploy.master
 
 import java.util.Date
-import java.util.concurrent.ConcurrentLinkedQueue
+import java.util.concurrent.{ConcurrentLinkedQueue, CountDownLatch, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.collection.mutable.{HashMap, HashSet}
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.concurrent.duration._
 import scala.io.Source
 import scala.reflect.ClassTag
@@ -97,13 +97,40 @@ class MockWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf) extend
   }
 }
 
-class MockExecutorLaunchFailWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf)
-  extends MockWorker(master, conf) {
+// This class is designed to handle the lifecycle of only one application.
+class MockExecutorLaunchFailWorker(master: Master, conf: SparkConf = new SparkConf)
+  extends MockWorker(master.self, conf) with Eventually {
+
+  val appRegistered = new CountDownLatch(1)
+  val launchExecutorReceived = new CountDownLatch(1)
+  val appIdsToLaunchExecutor = new mutable.HashSet[String]
   var failedCnt = 0
+
   override def receive: PartialFunction[Any, Unit] = {
+    case LaunchDriver(driverId, _, _) =>
+      master.self.send(RegisterApplication(appDesc, newDriver(driverId)))
+
+      // Below code doesn't make driver stuck, as newDriver opens another rpc endpoint for
+      // handling driver related messages. To simplify logic, we will block handling
+      // LaunchExecutor message until we validate registering app succeeds.
+      eventually(timeout(5.seconds)) {
+        // an app would be registered with Master once Driver set up
+        assert(apps.nonEmpty)
+        assert(master.idToApp.keySet.intersect(apps.keySet) == apps.keySet)
+      }
+
+      appRegistered.countDown()
     case LaunchExecutor(_, appId, execId, _, _, _, _) =>
+      assert(appRegistered.await(10, TimeUnit.SECONDS))
+
+      if (failedCnt == 0) {
+        launchExecutorReceived.countDown()
+      }
+      assert(master.idToApp.contains(appId))
+      appIdsToLaunchExecutor += appId
       failedCnt += 1
-      master.send(ExecutorStateChanged(appId, execId, ExecutorState.FAILED, None, None))
+      master.self.send(ExecutorStateChanged(appId, execId, ExecutorState.FAILED, None, None))
+
     case otherMsg => super.receive(otherMsg)
   }
 }
@@ -542,9 +569,10 @@ class MasterSuite extends SparkFunSuite
   // | Utility methods and fields for testing |
   // ==========================================
 
-  private val _scheduleExecutorsOnWorkers = PrivateMethod[Array[Int]]('scheduleExecutorsOnWorkers)
-  private val _drivers = PrivateMethod[HashSet[DriverInfo]]('drivers)
-  private val _state = PrivateMethod[RecoveryState.Value]('state)
+  private val _scheduleExecutorsOnWorkers =
+    PrivateMethod[Array[Int]](Symbol("scheduleExecutorsOnWorkers"))
+  private val _drivers = PrivateMethod[HashSet[DriverInfo]](Symbol("drivers"))
+  private val _state = PrivateMethod[RecoveryState.Value](Symbol("state"))
 
   private val workerInfo = makeWorkerInfo(4096, 10)
   private val workerInfos = Array(workerInfo, workerInfo, workerInfo)
@@ -661,7 +689,7 @@ class MasterSuite extends SparkFunSuite
     val master = makeAliveMaster()
     var worker: MockExecutorLaunchFailWorker = null
     try {
-      worker = new MockExecutorLaunchFailWorker(master.self)
+      worker = new MockExecutorLaunchFailWorker(master)
       worker.rpcEnv.setupEndpoint("worker", worker)
       val workerRegMsg = RegisterWorker(
         worker.id,
@@ -676,19 +704,16 @@ class MasterSuite extends SparkFunSuite
       val driver = DeployTestUtils.createDriverDesc()
       // mimic DriverClient to send RequestSubmitDriver to master
       master.self.askSync[SubmitDriverResponse](RequestSubmitDriver(driver))
-      var appId: String = null
-      eventually(timeout(10.seconds)) {
-        // an app would be registered with Master once Driver set up
-        assert(worker.apps.nonEmpty)
-        appId = worker.apps.head._1
-        assert(master.idToApp.contains(appId))
-      }
+
+      // LaunchExecutor message should have been received in worker side
+      assert(worker.launchExecutorReceived.await(10, TimeUnit.SECONDS))
 
       eventually(timeout(10.seconds)) {
+        val appIds = worker.appIdsToLaunchExecutor
         // Master would continually launch executors until reach MAX_EXECUTOR_RETRIES
         assert(worker.failedCnt == master.conf.get(MAX_EXECUTOR_RETRIES))
         // Master would remove the app if no executor could be launched for it
-        assert(!master.idToApp.contains(appId))
+        assert(master.idToApp.keySet.intersect(appIds).isEmpty)
       }
     } finally {
       if (worker != null) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
index f4558aa3eb893..e2d7facdd77e0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
@@ -47,12 +47,12 @@ class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {
   when(master.self).thenReturn(masterEndpointRef)
   val masterWebUI = new MasterWebUI(master, 0)
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     masterWebUI.bind()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       masterWebUI.stop()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
index 89b8bb4ff7d03..d5312845a3b50 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
@@ -42,7 +42,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite with BeforeAndAfterEach {
   private var rpcEnv: Option[RpcEnv] = None
   private var server: Option[RestSubmissionServer] = None
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       rpcEnv.foreach(_.shutdown())
       server.foreach(_.stop())
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
index 70174f7ff939a..275bca3459855 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
@@ -17,11 +17,17 @@
 
 package org.apache.spark.deploy.security
 
+import java.security.PrivilegedExceptionAction
+
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.security.Credentials
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION
+import org.apache.hadoop.minikdc.MiniKdc
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.security.HadoopDelegationTokenProvider
+import org.apache.spark.util.Utils
 
 private class ExceptionThrowingDelegationTokenProvider extends HadoopDelegationTokenProvider {
   ExceptionThrowingDelegationTokenProvider.constructed = true
@@ -69,4 +75,48 @@ class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
     assert(!manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
   }
+
+  test("SPARK-29082: do not fail if current user does not have credentials") {
+    // SparkHadoopUtil overrides the UGI configuration during initialization. That normally
+    // happens early in the Spark application, but here it may affect the test depending on
+    // how it's run, so force its initialization.
+    SparkHadoopUtil.get
+
+    var kdc: MiniKdc = null
+    try {
+      // UserGroupInformation.setConfiguration needs default kerberos realm which can be set in
+      // krb5.conf. MiniKdc sets "java.security.krb5.conf" in start and removes it when stop called.
+      val kdcDir = Utils.createTempDir()
+      val kdcConf = MiniKdc.createConf()
+      kdc = new MiniKdc(kdcConf, kdcDir)
+      kdc.start()
+
+      val krbConf = new Configuration()
+      krbConf.set(HADOOP_SECURITY_AUTHENTICATION, "kerberos")
+
+      UserGroupInformation.setConfiguration(krbConf)
+      val manager = new HadoopDelegationTokenManager(new SparkConf(false), krbConf, null)
+      val testImpl = new PrivilegedExceptionAction[Unit] {
+        override def run(): Unit = {
+          assert(UserGroupInformation.isSecurityEnabled())
+          val creds = new Credentials()
+          manager.obtainDelegationTokens(creds)
+          assert(creds.numberOfTokens() === 0)
+          assert(creds.numberOfSecretKeys() === 0)
+        }
+      }
+
+      val realUser = UserGroupInformation.createUserForTesting("realUser", Array.empty)
+      realUser.doAs(testImpl)
+
+      val proxyUser = UserGroupInformation.createProxyUserForTesting("proxyUser", realUser,
+        Array.empty)
+      proxyUser.doAs(testImpl)
+    } finally {
+      if (kdc != null) {
+        kdc.stop()
+      }
+      UserGroupInformation.reset()
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala
index 1f19884bc24d3..44f38e7043dcd 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala
@@ -22,14 +22,15 @@ import org.apache.hadoop.fs.Path
 import org.scalatest.Matchers
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.internal.config.STAGING_DIR
+import org.apache.spark.internal.config.{STAGING_DIR, SUBMIT_DEPLOY_MODE}
 
 class HadoopFSDelegationTokenProviderSuite extends SparkFunSuite with Matchers {
   test("hadoopFSsToAccess should return defaultFS even if not configured") {
     val sparkConf = new SparkConf()
     val defaultFS = "hdfs://localhost:8020"
     val statingDir = "hdfs://localhost:8021"
-    sparkConf.set("spark.master", "yarn-client")
+    sparkConf.setMaster("yarn")
+    sparkConf.set(SUBMIT_DEPLOY_MODE, "client")
     sparkConf.set(STAGING_DIR, statingDir)
     val hadoopConf = new Configuration()
     hadoopConf.set("fs.defaultFS", defaultFS)
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/CommandUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/CommandUtilsSuite.scala
index 607c0a4fac46b..2d3cc5d3abd65 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/CommandUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/CommandUtilsSuite.scala
@@ -38,7 +38,7 @@ class CommandUtilsSuite extends SparkFunSuite with Matchers with PrivateMethodTe
   }
 
   test("auth secret shouldn't appear in java opts") {
-    val buildLocalCommand = PrivateMethod[Command]('buildLocalCommand)
+    val buildLocalCommand = PrivateMethod[Command](Symbol("buildLocalCommand"))
     val conf = new SparkConf
     val secret = "This is the secret sauce"
     // set auth secret
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ui/LogPageSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ui/LogPageSuite.scala
index 4c3e96777940d..c8b4e3372386b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ui/LogPageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ui/LogPageSuite.scala
@@ -53,7 +53,7 @@ class LogPageSuite extends SparkFunSuite with PrivateMethodTester {
     write(tmpRand, "1 6 4 5 2 7 8")
 
     // Get the logs. All log types other than "stderr" or "stdout" will be rejected
-    val getLog = PrivateMethod[(String, Long, Long, Long)]('getLog)
+    val getLog = PrivateMethod[(String, Long, Long, Long)](Symbol("getLog"))
     val (stdout, _, _, _) =
       logPage invokePrivate getLog(workDir.getAbsolutePath, "stdout", None, 100)
     val (stderr, _, _, _) =
diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
index 64d99a59b9192..3134a738b33fa 100644
--- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
@@ -30,11 +30,11 @@ import org.json4s.JsonAST.{JArray, JObject}
 import org.json4s.JsonDSL._
 import org.mockito.Mockito.when
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.TestUtils._
-import org.apache.spark.resource.{ResourceAllocation, ResourceInformation}
+import org.apache.spark.resource._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.rpc.RpcEnv
@@ -50,13 +50,13 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
 
   test("parsing no resources") {
     val conf = new SparkConf
-    conf.set(TASK_GPU_ID.amountConf, "2")
+    val resourceProfile = ResourceProfile.getOrCreateDefaultProfile(conf)
     val serializer = new JavaSerializer(conf)
     val env = createMockEnv(conf, serializer)
 
     // we don't really use this, just need it to get at the parser function
-    val backend = new CoarseGrainedExecutorBackend( env.rpcEnv, "driverurl", "1", "host1",
-      4, Seq.empty[URL], env, None)
+    val backend = new CoarseGrainedExecutorBackend( env.rpcEnv, "driverurl", "1", "host1", "host1",
+      4, Seq.empty[URL], env, None, resourceProfile)
     withTempDir { tmpDir =>
       val testResourceArgs: JObject = ("" -> "")
       val ja = JArray(List(testResourceArgs))
@@ -73,12 +73,11 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
   test("parsing one resource") {
     val conf = new SparkConf
     conf.set(EXECUTOR_GPU_ID.amountConf, "2")
-    conf.set(TASK_GPU_ID.amountConf, "2")
     val serializer = new JavaSerializer(conf)
     val env = createMockEnv(conf, serializer)
     // we don't really use this, just need it to get at the parser function
-    val backend = new CoarseGrainedExecutorBackend( env.rpcEnv, "driverurl", "1", "host1",
-      4, Seq.empty[URL], env, None)
+    val backend = new CoarseGrainedExecutorBackend( env.rpcEnv, "driverurl", "1", "host1", "host1",
+      4, Seq.empty[URL], env, None, ResourceProfile.getOrCreateDefaultProfile(conf))
     withTempDir { tmpDir =>
       val ra = ResourceAllocation(EXECUTOR_GPU_ID, Seq("0", "1"))
       val ja = Extraction.decompose(Seq(ra))
@@ -88,22 +87,31 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
       assert(parsedResources.size === 1)
       assert(parsedResources.get(GPU).nonEmpty)
       assert(parsedResources.get(GPU).get.name === GPU)
-      assert(parsedResources.get(GPU).get.addresses.deep === Array("0", "1").deep)
+      assert(parsedResources.get(GPU).get.addresses.sameElements(Array("0", "1")))
     }
   }
 
+  test("parsing multiple resources resource profile") {
+    val rpBuilder = new ResourceProfileBuilder
+    val ereqs = new ExecutorResourceRequests().resource(GPU, 2)
+    ereqs.resource(FPGA, 3)
+    val rp = rpBuilder.require(ereqs).build
+    testParsingMultipleResources(new SparkConf, rp)
+  }
+
   test("parsing multiple resources") {
     val conf = new SparkConf
     conf.set(EXECUTOR_GPU_ID.amountConf, "2")
-    conf.set(TASK_GPU_ID.amountConf, "2")
     conf.set(EXECUTOR_FPGA_ID.amountConf, "3")
-    conf.set(TASK_FPGA_ID.amountConf, "3")
+    testParsingMultipleResources(conf, ResourceProfile.getOrCreateDefaultProfile(conf))
+  }
 
+  def testParsingMultipleResources(conf: SparkConf, resourceProfile: ResourceProfile) {
     val serializer = new JavaSerializer(conf)
     val env = createMockEnv(conf, serializer)
     // we don't really use this, just need it to get at the parser function
-    val backend = new CoarseGrainedExecutorBackend( env.rpcEnv, "driverurl", "1", "host1",
-      4, Seq.empty[URL], env, None)
+    val backend = new CoarseGrainedExecutorBackend( env.rpcEnv, "driverurl", "1", "host1", "host1",
+      4, Seq.empty[URL], env, None, resourceProfile)
 
     withTempDir { tmpDir =>
       val gpuArgs = ResourceAllocation(EXECUTOR_GPU_ID, Seq("0", "1"))
@@ -116,27 +124,26 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
       assert(parsedResources.size === 2)
       assert(parsedResources.get(GPU).nonEmpty)
       assert(parsedResources.get(GPU).get.name === GPU)
-      assert(parsedResources.get(GPU).get.addresses.deep === Array("0", "1").deep)
+      assert(parsedResources.get(GPU).get.addresses.sameElements(Array("0", "1")))
       assert(parsedResources.get(FPGA).nonEmpty)
       assert(parsedResources.get(FPGA).get.name === FPGA)
-      assert(parsedResources.get(FPGA).get.addresses.deep === Array("f1", "f2", "f3").deep)
+      assert(parsedResources.get(FPGA).get.addresses.sameElements(Array("f1", "f2", "f3")))
     }
   }
 
   test("error checking parsing resources and executor and task configs") {
     val conf = new SparkConf
     conf.set(EXECUTOR_GPU_ID.amountConf, "2")
-    conf.set(TASK_GPU_ID.amountConf, "2")
     val serializer = new JavaSerializer(conf)
     val env = createMockEnv(conf, serializer)
     // we don't really use this, just need it to get at the parser function
-    val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1",
-      4, Seq.empty[URL], env, None)
+    val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1", "host1",
+      4, Seq.empty[URL], env, None, ResourceProfile.getOrCreateDefaultProfile(conf))
 
     // not enough gpu's on the executor
     withTempDir { tmpDir =>
       val gpuArgs = ResourceAllocation(EXECUTOR_GPU_ID, Seq("0"))
-            val ja = Extraction.decompose(Seq(gpuArgs))
+      val ja = Extraction.decompose(Seq(gpuArgs))
       val f1 = createTempJsonFile(tmpDir, "resources", ja)
 
       var error = intercept[IllegalArgumentException] {
@@ -157,20 +164,34 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
         val parsedResources = backend.parseOrFindResources(Some(f1))
       }.getMessage()
 
-      assert(error.contains("User is expecting to use resource: gpu, but didn't specify a " +
-        "discovery script!"))
+      assert(error.contains("User is expecting to use resource: gpu, but didn't " +
+        "specify a discovery script!"))
     }
   }
 
+  test("executor resource found less than required resource profile") {
+    val rpBuilder = new ResourceProfileBuilder
+    val ereqs = new ExecutorResourceRequests().resource(GPU, 4)
+    val treqs = new TaskResourceRequests().resource(GPU, 1)
+    val rp = rpBuilder.require(ereqs).require(treqs).build
+    testExecutorResourceFoundLessThanRequired(new SparkConf, rp)
+  }
+
   test("executor resource found less than required") {
-    val conf = new SparkConf
+    val conf = new SparkConf()
     conf.set(EXECUTOR_GPU_ID.amountConf, "4")
     conf.set(TASK_GPU_ID.amountConf, "1")
+    testExecutorResourceFoundLessThanRequired(conf, ResourceProfile.getOrCreateDefaultProfile(conf))
+  }
+
+  private def testExecutorResourceFoundLessThanRequired(
+      conf: SparkConf,
+      resourceProfile: ResourceProfile) = {
     val serializer = new JavaSerializer(conf)
     val env = createMockEnv(conf, serializer)
     // we don't really use this, just need it to get at the parser function
-    val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1",
-      4, Seq.empty[URL], env, None)
+    val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1", "host1",
+      4, Seq.empty[URL], env, None, resourceProfile)
 
     // executor resources < required
     withTempDir { tmpDir =>
@@ -190,7 +211,6 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
   test("use resource discovery") {
     val conf = new SparkConf
     conf.set(EXECUTOR_FPGA_ID.amountConf, "3")
-    conf.set(TASK_FPGA_ID.amountConf, "3")
     assume(!(Utils.isWindows))
     withTempDir { dir =>
       val scriptPath = createTempScriptWithExpectedOutput(dir, "fpgaDiscoverScript",
@@ -201,49 +221,68 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
       val env = createMockEnv(conf, serializer)
 
       // we don't really use this, just need it to get at the parser function
-      val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1",
-        4, Seq.empty[URL], env, None)
+      val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1", "host1",
+        4, Seq.empty[URL], env, None, ResourceProfile.getOrCreateDefaultProfile(conf))
 
       val parsedResources = backend.parseOrFindResources(None)
 
       assert(parsedResources.size === 1)
       assert(parsedResources.get(FPGA).nonEmpty)
       assert(parsedResources.get(FPGA).get.name === FPGA)
-      assert(parsedResources.get(FPGA).get.addresses.deep === Array("f1", "f2", "f3").deep)
+      assert(parsedResources.get(FPGA).get.addresses.sameElements(Array("f1", "f2", "f3")))
+    }
+  }
+
+  test("use resource discovery and allocated file option with resource profile") {
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val scriptPath = createTempScriptWithExpectedOutput(dir, "fpgaDiscoverScript",
+        """{"name": "fpga","addresses":["f1", "f2", "f3"]}""")
+      val rpBuilder = new ResourceProfileBuilder
+      val ereqs = new ExecutorResourceRequests().resource(FPGA, 3, scriptPath)
+      ereqs.resource(GPU, 2)
+      val rp = rpBuilder.require(ereqs).build
+      allocatedFileAndConfigsResourceDiscoveryTestFpga(dir, new SparkConf, rp)
     }
   }
 
   test("use resource discovery and allocated file option") {
-    val conf = new SparkConf
-    conf.set(EXECUTOR_FPGA_ID.amountConf, "3")
-    conf.set(TASK_FPGA_ID.amountConf, "3")
     assume(!(Utils.isWindows))
     withTempDir { dir =>
       val scriptPath = createTempScriptWithExpectedOutput(dir, "fpgaDiscoverScript",
         """{"name": "fpga","addresses":["f1", "f2", "f3"]}""")
+      val conf = new SparkConf
+      conf.set(EXECUTOR_FPGA_ID.amountConf, "3")
       conf.set(EXECUTOR_FPGA_ID.discoveryScriptConf, scriptPath)
-
-      val serializer = new JavaSerializer(conf)
-      val env = createMockEnv(conf, serializer)
-
-      // we don't really use this, just need it to get at the parser function
-      val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1",
-        4, Seq.empty[URL], env, None)
-      val gpuArgs = ResourceAllocation(EXECUTOR_GPU_ID, Seq("0", "1"))
-      val ja = Extraction.decompose(Seq(gpuArgs))
-      val f1 = createTempJsonFile(dir, "resources", ja)
-      val parsedResources = backend.parseOrFindResources(Some(f1))
-
-      assert(parsedResources.size === 2)
-      assert(parsedResources.get(GPU).nonEmpty)
-      assert(parsedResources.get(GPU).get.name === GPU)
-      assert(parsedResources.get(GPU).get.addresses.deep === Array("0", "1").deep)
-      assert(parsedResources.get(FPGA).nonEmpty)
-      assert(parsedResources.get(FPGA).get.name === FPGA)
-      assert(parsedResources.get(FPGA).get.addresses.deep === Array("f1", "f2", "f3").deep)
+      conf.set(EXECUTOR_GPU_ID.amountConf, "2")
+      val rp = ResourceProfile.getOrCreateDefaultProfile(conf)
+      allocatedFileAndConfigsResourceDiscoveryTestFpga(dir, conf, rp)
     }
   }
 
+  private def allocatedFileAndConfigsResourceDiscoveryTestFpga(
+      dir: File,
+      conf: SparkConf,
+      resourceProfile: ResourceProfile) = {
+    val serializer = new JavaSerializer(conf)
+    val env = createMockEnv(conf, serializer)
+
+    // we don't really use this, just need it to get at the parser function
+    val backend = new CoarseGrainedExecutorBackend(env.rpcEnv, "driverurl", "1", "host1", "host1",
+      4, Seq.empty[URL], env, None, resourceProfile)
+    val gpuArgs = ResourceAllocation(EXECUTOR_GPU_ID, Seq("0", "1"))
+    val ja = Extraction.decompose(Seq(gpuArgs))
+    val f1 = createTempJsonFile(dir, "resources", ja)
+    val parsedResources = backend.parseOrFindResources(Some(f1))
+
+    assert(parsedResources.size === 2)
+    assert(parsedResources.get(GPU).nonEmpty)
+    assert(parsedResources.get(GPU).get.name === GPU)
+    assert(parsedResources.get(GPU).get.addresses.sameElements(Array("0", "1")))
+    assert(parsedResources.get(FPGA).nonEmpty)
+    assert(parsedResources.get(FPGA).get.name === FPGA)
+    assert(parsedResources.get(FPGA).get.addresses.sameElements(Array("f1", "f2", "f3")))
+  }
 
   test("track allocated resources by taskId") {
     val conf = new SparkConf
@@ -254,15 +293,16 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
     try {
       val rpcEnv = RpcEnv.create("1", "localhost", 0, conf, securityMgr)
       val env = createMockEnv(conf, serializer, Some(rpcEnv))
-      backend = new CoarseGrainedExecutorBackend(env.rpcEnv, rpcEnv.address.hostPort, "1",
-        "host1", 4, Seq.empty[URL], env, None)
+        backend = new CoarseGrainedExecutorBackend(env.rpcEnv, rpcEnv.address.hostPort, "1",
+        "host1", "host1", 4, Seq.empty[URL], env, None,
+          resourceProfile = ResourceProfile.getOrCreateDefaultProfile(conf))
       assert(backend.taskResources.isEmpty)
 
       val taskId = 1000000
       // We don't really verify the data, just pass it around.
       val data = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4))
-      val taskDescription = new TaskDescription(taskId, 2, "1", "TASK 1000000", 19, 1,
-        mutable.Map.empty, mutable.Map.empty, new Properties,
+      val taskDescription = new TaskDescription(taskId, 2, "1", "TASK 1000000",
+        19, 1, mutable.Map.empty, mutable.Map.empty, new Properties,
         Map(GPU -> new ResourceInformation(GPU, Array("0", "1"))), data)
       val serializedTaskDescription = TaskDescription.encode(taskDescription)
       backend.executor = mock[Executor]
@@ -272,13 +312,15 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
       backend.self.send(LaunchTask(new SerializableBuffer(serializedTaskDescription)))
       eventually(timeout(10.seconds)) {
         assert(backend.taskResources.size == 1)
-        assert(backend.taskResources(taskId)(GPU).addresses sameElements Array("0", "1"))
+        val resources = backend.taskResources(taskId)
+        assert(resources(GPU).addresses sameElements Array("0", "1"))
       }
 
       // Update the status of a running task shall not affect `taskResources` map.
       backend.statusUpdate(taskId, TaskState.RUNNING, data)
       assert(backend.taskResources.size == 1)
-      assert(backend.taskResources(taskId)(GPU).addresses sameElements Array("0", "1"))
+      val resources = backend.taskResources(taskId)
+      assert(resources(GPU).addresses sameElements Array("0", "1"))
 
       // Update the status of a finished task shall remove the entry from `taskResources` map.
       backend.statusUpdate(taskId, TaskState.FINISHED, data)
@@ -290,6 +332,31 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
     }
   }
 
+  test("SPARK-24203 when bindAddress is not set, it defaults to hostname") {
+    val args1 = Array(
+      "--driver-url", "driverurl",
+      "--executor-id", "1",
+      "--hostname", "host1",
+      "--cores", "1",
+      "--app-id", "app1")
+
+    val arg = CoarseGrainedExecutorBackend.parseArguments(args1, "")
+    assert(arg.bindAddress == "host1")
+  }
+
+  test("SPARK-24203 when bindAddress is different, it does not default to hostname") {
+    val args1 = Array(
+      "--driver-url", "driverurl",
+      "--executor-id", "1",
+      "--hostname", "host1",
+      "--bind-address", "bindaddress1",
+      "--cores", "1",
+      "--app-id", "app1")
+
+    val arg = CoarseGrainedExecutorBackend.parseArguments(args1, "")
+    assert(arg.bindAddress == "bindaddress1")
+  }
+
   private def createMockEnv(conf: SparkConf, serializer: JavaSerializer,
       rpcEnv: Option[RpcEnv] = None): SparkEnv = {
     val mockEnv = mock[SparkEnv]
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index ac7e4b51ebc2b..31049d104e63d 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -33,9 +33,10 @@ import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{inOrder, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
+import org.scalatest.Assertions._
 import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
@@ -56,7 +57,7 @@ import org.apache.spark.util.{LongAccumulator, UninterruptibleThread}
 class ExecutorSuite extends SparkFunSuite
     with LocalSparkContext with MockitoSugar with Eventually with PrivateMethodTester {
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     // Unset any latches after each test; each test that needs them initializes new ones.
     ExecutorSuiteHelper.latches = null
     super.afterEach()
@@ -116,7 +117,8 @@ class ExecutorSuite extends SparkFunSuite
 
     var executor: Executor = null
     try {
-      executor = new Executor("id", "localhost", env, userClassPath = Nil, isLocal = true)
+      executor = new Executor("id", "localhost", env, userClassPath = Nil, isLocal = true,
+        resources = immutable.Map.empty[String, ResourceInformation])
       // the task will be launched in a dedicated worker thread
       executor.launchTask(mockExecutorBackend, taskDescription)
 
@@ -253,7 +255,8 @@ class ExecutorSuite extends SparkFunSuite
     val serializer = new JavaSerializer(conf)
     val env = createMockEnv(conf, serializer)
     val executor =
-      new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true)
+      new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true,
+        resources = immutable.Map.empty[String, ResourceInformation])
     val executorClass = classOf[Executor]
 
     // Save all heartbeats sent into an ArrayBuffer for verification
@@ -275,7 +278,7 @@ class ExecutorSuite extends SparkFunSuite
   private def heartbeatZeroAccumulatorUpdateTest(dropZeroMetrics: Boolean): Unit = {
     val c = EXECUTOR_HEARTBEAT_DROP_ZERO_ACCUMULATOR_UPDATES.key -> dropZeroMetrics.toString
     withHeartbeatExecutor(c) { (executor, heartbeats) =>
-      val reportHeartbeat = PrivateMethod[Unit]('reportHeartBeat)
+      val reportHeartbeat = PrivateMethod[Unit](Symbol("reportHeartBeat"))
 
       // When no tasks are running, there should be no accumulators sent in heartbeat
       executor.invokePrivate(reportHeartbeat())
@@ -352,7 +355,8 @@ class ExecutorSuite extends SparkFunSuite
     val mockBackend = mock[ExecutorBackend]
     var executor: Executor = null
     try {
-      executor = new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true)
+      executor = new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true,
+        resources = immutable.Map.empty[String, ResourceInformation])
       executor.launchTask(mockBackend, taskDescription)
 
       // Ensure that the executor's metricsPoller is polled so that values are recorded for
@@ -465,7 +469,8 @@ class ExecutorSuite extends SparkFunSuite
     val timedOut = new AtomicBoolean(false)
     try {
       executor = new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true,
-        uncaughtExceptionHandler = mockUncaughtExceptionHandler)
+        uncaughtExceptionHandler = mockUncaughtExceptionHandler,
+        resources = immutable.Map.empty[String, ResourceInformation])
       // the task will be launched in a dedicated worker thread
       executor.launchTask(mockBackend, taskDescription)
       if (killTask) {
@@ -528,7 +533,8 @@ class FetchFailureThrowingRDD(sc: SparkContext) extends RDD[Int](sc, Nil) {
         throw new FetchFailedException(
           bmAddress = BlockManagerId("1", "hostA", 1234),
           shuffleId = 0,
-          mapId = 0,
+          mapId = 0L,
+          mapIndex = 0,
           reduceId = 0,
           message = "fake fetch failure"
         )
diff --git a/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala b/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala
index 9ed1497db5e1d..9836697e1647c 100644
--- a/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala
@@ -22,9 +22,9 @@ import org.apache.spark.SparkFunSuite
 
 class ProcfsMetricsGetterSuite extends SparkFunSuite {
 
-  val p = new ProcfsMetricsGetter(getTestResourcePath("ProcfsMetrics"))
 
   test("testGetProcessInfo") {
+    val p = new ProcfsMetricsGetter(getTestResourcePath("ProcfsMetrics"))
     var r = ProcfsMetrics(0, 0, 0, 0, 0, 0)
     r = p.addProcfsMetricsFromOneProcess(r, 26109)
     assert(r.jvmVmemTotal == 4769947648L)
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
index 576ca1613f75e..9a21ea6dafcac 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
@@ -25,7 +25,6 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
 
 /**
  * Tests the correctness of
@@ -35,13 +34,13 @@ import org.apache.spark.util.Utils
 class WholeTextFileInputFormatSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   private var sc: SparkContext = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val conf = new SparkConf()
     sc = new SparkContext("local", "test", conf)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       sc.stop()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index 47552916adb22..fab7aea6c47aa 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -40,7 +40,7 @@ class WholeTextFileRecordReaderSuite extends SparkFunSuite with BeforeAndAfterAl
   private var sc: SparkContext = _
   private var factory: CompressionCodecFactory = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     // Hadoop's FileSystem caching does not use the Configuration as part of its cache key, which
     // can cause Filesystem.get(Configuration) to return a cached instance created with a different
     // configuration than the one passed to get() (see HADOOP-8490 for more details). This caused
@@ -59,7 +59,7 @@ class WholeTextFileRecordReaderSuite extends SparkFunSuite with BeforeAndAfterAl
     factory = new CompressionCodecFactory(sc.hadoopConfiguration)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       sc.stop()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/internal/LoggingSuite.scala b/core/src/test/scala/org/apache/spark/internal/LoggingSuite.scala
index 250ac3dafcabc..6b7cc304a1baa 100644
--- a/core/src/test/scala/org/apache/spark/internal/LoggingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/LoggingSuite.scala
@@ -33,18 +33,14 @@ class LoggingSuite extends SparkFunSuite {
     val originalThreshold = Logging.sparkShellThresholdLevel
     Logging.sparkShellThresholdLevel = Level.WARN
     try {
-      val logger = Logger.getLogger("a.b.c.D")
-      val logEvent = new LoggingEvent(logger.getName(), logger, Level.INFO, "Test", null)
-      assert(ssf.decide(logEvent) === Filter.DENY)
-
-      // log level is less than threshold level but different from root level
-      val logEvent1 = new LoggingEvent(logger.getName(), logger, Level.DEBUG, "Test", null)
-      assert(ssf.decide(logEvent1) != Filter.DENY)
+      val logger1 = Logger.getLogger("a.b.c.D")
+      val logEvent1 = new LoggingEvent(logger1.getName(), logger1, Level.INFO, "Test", null)
+      assert(ssf.decide(logEvent1) == Filter.DENY)
 
       // custom log level configured
       val parentLogger = Logger.getLogger("a.b.c")
       parentLogger.setLevel(Level.INFO)
-      assert(ssf.decide(logEvent) != Filter.DENY)
+      assert(ssf.decide(logEvent1) != Filter.DENY)
 
       // log level is greater than or equal to threshold level
       val logger2 = Logger.getLogger("a.b.E")
diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
new file mode 100644
index 0000000000000..cf2d9293ef822
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.plugin
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+import java.util.{Map => JMap}
+
+import scala.collection.JavaConverters._
+import scala.concurrent.duration._
+
+import com.codahale.metrics.Gauge
+import com.google.common.io.Files
+import org.mockito.ArgumentMatchers.{any, eq => meq}
+import org.mockito.Mockito.{mock, spy, verify, when}
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
+
+import org.apache.spark._
+import org.apache.spark.TestUtils._
+import org.apache.spark.api.plugin._
+import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher
+import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.resource.ResourceUtils.GPU
+import org.apache.spark.resource.TestResourceIDs.{DRIVER_GPU_ID, EXECUTOR_GPU_ID, WORKER_GPU_ID}
+import org.apache.spark.util.Utils
+
+class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with LocalSparkContext {
+
+  override def afterEach(): Unit = {
+    TestSparkPlugin.reset()
+    NonLocalModeSparkPlugin.reset()
+    super.afterEach()
+  }
+
+  test("plugin initialization and communication") {
+    val conf = new SparkConf()
+      .setAppName(getClass().getName())
+      .set(SparkLauncher.SPARK_MASTER, "local[1]")
+      .set(PLUGINS, Seq(classOf[TestSparkPlugin].getName()))
+
+    TestSparkPlugin.extraConf = Map("foo" -> "bar", "bar" -> "baz").asJava
+
+    sc = new SparkContext(conf)
+
+    assert(TestSparkPlugin.driverPlugin != null)
+    verify(TestSparkPlugin.driverPlugin).init(meq(sc), any())
+
+    assert(TestSparkPlugin.executorPlugin != null)
+    verify(TestSparkPlugin.executorPlugin).init(any(), meq(TestSparkPlugin.extraConf))
+
+    assert(TestSparkPlugin.executorContext != null)
+    assert(TestSparkPlugin.executorContext.resources.isEmpty)
+
+    // One way messages don't block, so need to loop checking whether it arrives.
+    TestSparkPlugin.executorContext.send("oneway")
+    eventually(timeout(10.seconds), interval(10.millis)) {
+      verify(TestSparkPlugin.driverPlugin).receive("oneway")
+    }
+
+    assert(TestSparkPlugin.executorContext.ask("ask") === "reply")
+
+    val err = intercept[Exception] {
+      TestSparkPlugin.executorContext.ask("unknown message")
+    }
+    assert(err.getMessage().contains("unknown message"))
+
+    // It should be possible for the driver plugin to send a message to itself, even if that doesn't
+    // make a whole lot of sense. It at least allows the same context class to be used on both
+    // sides.
+    assert(TestSparkPlugin.driverContext != null)
+    assert(TestSparkPlugin.driverContext.ask("ask") === "reply")
+
+    val metricSources = sc.env.metricsSystem
+      .getSourcesByName(s"plugin.${classOf[TestSparkPlugin].getName()}")
+    assert(metricSources.size === 2)
+
+    def findMetric(name: String): Int = {
+      val allFound = metricSources.filter(_.metricRegistry.getGauges().containsKey(name))
+      assert(allFound.size === 1)
+      allFound.head.metricRegistry.getGauges().get(name).asInstanceOf[Gauge[Int]].getValue()
+    }
+
+    assert(findMetric("driverMetric") === 42)
+    assert(findMetric("executorMetric") === 84)
+
+    sc.stop()
+    sc = null
+
+    verify(TestSparkPlugin.driverPlugin).shutdown()
+    verify(TestSparkPlugin.executorPlugin).shutdown()
+  }
+
+  test("do nothing if plugins are not configured") {
+    val conf = new SparkConf()
+    val env = mock(classOf[SparkEnv])
+    when(env.conf).thenReturn(conf)
+    val container = PluginContainer(env, Map.empty[String, ResourceInformation].asJava)
+    assert(container === None)
+  }
+
+  test("merging of config options") {
+    val conf = new SparkConf()
+      .setAppName(getClass().getName())
+      .set(SparkLauncher.SPARK_MASTER, "local[1]")
+      .set(PLUGINS, Seq(classOf[TestSparkPlugin].getName()))
+      .set(DEFAULT_PLUGINS_LIST, classOf[TestSparkPlugin].getName())
+
+    assert(conf.get(PLUGINS).size === 2)
+
+    sc = new SparkContext(conf)
+    // Just check plugin is loaded. The plugin code below checks whether a single copy was loaded.
+    assert(TestSparkPlugin.driverPlugin != null)
+  }
+
+  test("plugin initialization in non-local mode") {
+    val path = Utils.createTempDir()
+
+    val conf = new SparkConf()
+      .setAppName(getClass().getName())
+      .set(SparkLauncher.SPARK_MASTER, "local-cluster[2,1,1024]")
+      .set(PLUGINS, Seq(classOf[NonLocalModeSparkPlugin].getName()))
+      .set(NonLocalModeSparkPlugin.TEST_PATH_CONF, path.getAbsolutePath())
+
+    sc = new SparkContext(conf)
+    TestUtils.waitUntilExecutorsUp(sc, 2, 10000)
+
+    eventually(timeout(10.seconds), interval(100.millis)) {
+      val children = path.listFiles()
+      assert(children != null)
+      assert(children.length >= 3)
+    }
+  }
+
+  test("plugin initialization in non-local mode with resources") {
+    withTempDir { dir =>
+      val scriptPath = createTempScriptWithExpectedOutput(dir, "gpuDiscoveryScript",
+        """{"name": "gpu","addresses":["5", "6"]}""")
+
+      val workerScript = createTempScriptWithExpectedOutput(dir, "resourceDiscoveryScript",
+        """{"name": "gpu","addresses":["3", "4"]}""")
+
+      val conf = new SparkConf()
+        .setAppName(getClass().getName())
+        .set(SparkLauncher.SPARK_MASTER, "local-cluster[1,1,1024]")
+        .set(PLUGINS, Seq(classOf[NonLocalModeSparkPlugin].getName()))
+        .set(NonLocalModeSparkPlugin.TEST_PATH_CONF, dir.getAbsolutePath())
+        .set(DRIVER_GPU_ID.amountConf, "2")
+        .set(DRIVER_GPU_ID.discoveryScriptConf, scriptPath)
+        .set(WORKER_GPU_ID.amountConf, "2")
+        .set(WORKER_GPU_ID.discoveryScriptConf, workerScript)
+        .set(EXECUTOR_GPU_ID.amountConf, "2")
+      sc = new SparkContext(conf)
+
+      // Ensure all executors has started
+      TestUtils.waitUntilExecutorsUp(sc, 1, 10000)
+
+      var children = Array.empty[File]
+      eventually(timeout(10.seconds), interval(100.millis)) {
+        children = dir.listFiles()
+        assert(children != null)
+        // we have 2 discovery scripts and then expect 1 driver and 1 executor file
+        assert(children.length >= 4)
+      }
+      val execFiles =
+        children.filter(_.getName.startsWith(NonLocalModeSparkPlugin.executorFileStr))
+      assert(execFiles.size === 1)
+      val allLines = Files.readLines(execFiles(0), StandardCharsets.UTF_8)
+      assert(allLines.size === 1)
+      val addrs = NonLocalModeSparkPlugin.extractGpuAddrs(allLines.get(0))
+      assert(addrs.size === 2)
+      assert(addrs.sorted === Array("3", "4"))
+
+      assert(NonLocalModeSparkPlugin.driverContext != null)
+      val driverResources = NonLocalModeSparkPlugin.driverContext.resources()
+      assert(driverResources.size === 1)
+      assert(driverResources.get(GPU).addresses === Array("5", "6"))
+      assert(driverResources.get(GPU).name === GPU)
+    }
+  }
+}
+
+class NonLocalModeSparkPlugin extends SparkPlugin {
+
+  override def driverPlugin(): DriverPlugin = {
+    new DriverPlugin() {
+      override def init(sc: SparkContext, ctx: PluginContext): JMap[String, String] = {
+        NonLocalModeSparkPlugin.writeDriverFile(NonLocalModeSparkPlugin.driverFileStr, ctx.conf(),
+          ctx.executorID())
+        NonLocalModeSparkPlugin.driverContext = ctx
+        Map.empty[String, String].asJava
+      }
+    }
+  }
+
+  override def executorPlugin(): ExecutorPlugin = {
+    new ExecutorPlugin() {
+      override def init(ctx: PluginContext, extraConf: JMap[String, String]): Unit = {
+        NonLocalModeSparkPlugin.writeFile(NonLocalModeSparkPlugin.executorFileStr, ctx.conf(),
+          ctx.executorID(), ctx.resources().asScala.toMap)
+      }
+    }
+  }
+}
+
+object NonLocalModeSparkPlugin {
+  val TEST_PATH_CONF = "spark.nonLocalPlugin.path"
+  var driverContext: PluginContext = _
+  val executorFileStr = "EXECUTOR_FILE_"
+  val driverFileStr = "DRIVER_FILE_"
+
+  private def createFileStringWithGpuAddrs(
+      id: String,
+      resources: Map[String, ResourceInformation]): String = {
+    // try to keep this simple and only write the gpus addresses, if we add more resources need to
+    // make more complex
+    val resourcesString = resources.filterKeys(_.equals(GPU)).map {
+      case (_, ri) =>
+        s"${ri.addresses.mkString(",")}"
+    }.mkString(",")
+    s"$id&$resourcesString"
+  }
+
+  def extractGpuAddrs(str: String): Array[String] = {
+    val idAndAddrs = str.split("&")
+    if (idAndAddrs.size > 1) {
+      idAndAddrs(1).split(",")
+    } else {
+      Array.empty[String]
+    }
+  }
+
+  def writeDriverFile(
+      filePrefix: String,
+      conf: SparkConf,
+      id: String): Unit = {
+    writeFile(filePrefix, conf, id, Map.empty)
+  }
+
+  def writeFile(
+      filePrefix: String,
+      conf: SparkConf,
+      id: String,
+      resources: Map[String, ResourceInformation]): Unit = {
+    val path = conf.get(TEST_PATH_CONF)
+    val strToWrite = createFileStringWithGpuAddrs(id, resources)
+    Files.write(strToWrite, new File(path, s"$filePrefix$id"), StandardCharsets.UTF_8)
+  }
+
+  def reset(): Unit = {
+    driverContext = null
+  }
+}
+
+class TestSparkPlugin extends SparkPlugin {
+
+  override def driverPlugin(): DriverPlugin = {
+    val p = new TestDriverPlugin()
+    require(TestSparkPlugin.driverPlugin == null, "Driver plugin already initialized.")
+    TestSparkPlugin.driverPlugin = spy(p)
+    TestSparkPlugin.driverPlugin
+  }
+
+  override def executorPlugin(): ExecutorPlugin = {
+    val p = new TestExecutorPlugin()
+    require(TestSparkPlugin.executorPlugin == null, "Executor plugin already initialized.")
+    TestSparkPlugin.executorPlugin = spy(p)
+    TestSparkPlugin.executorPlugin
+  }
+
+}
+
+private class TestDriverPlugin extends DriverPlugin {
+
+  override def init(sc: SparkContext, ctx: PluginContext): JMap[String, String] = {
+    TestSparkPlugin.driverContext = ctx
+    TestSparkPlugin.extraConf
+  }
+
+  override def registerMetrics(appId: String, ctx: PluginContext): Unit = {
+    ctx.metricRegistry().register("driverMetric", new Gauge[Int] {
+      override def getValue(): Int = 42
+    })
+  }
+
+  override def receive(msg: AnyRef): AnyRef = msg match {
+    case "oneway" => null
+    case "ask" => "reply"
+    case other => throw new IllegalArgumentException(s"unknown: $other")
+  }
+
+}
+
+private class TestExecutorPlugin extends ExecutorPlugin {
+
+  override def init(ctx: PluginContext, extraConf: JMap[String, String]): Unit = {
+    ctx.metricRegistry().register("executorMetric", new Gauge[Int] {
+      override def getValue(): Int = 84
+    })
+    TestSparkPlugin.executorContext = ctx
+  }
+
+}
+
+private object TestSparkPlugin {
+  var driverPlugin: TestDriverPlugin = _
+  var driverContext: PluginContext = _
+
+  var executorPlugin: TestExecutorPlugin = _
+  var executorContext: PluginContext = _
+
+  var extraConf: JMap[String, String] = _
+
+  def reset(): Unit = {
+    driverPlugin = null
+    driverContext = null
+    executorPlugin = null
+    executorContext = null
+    extraConf = null
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferFileRegionSuite.scala b/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferFileRegionSuite.scala
index a6b0654204f34..551c0f1a73241 100644
--- a/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferFileRegionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferFileRegionSuite.scala
@@ -23,7 +23,7 @@ import scala.util.Random
 
 import org.mockito.Mockito.when
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
 import org.apache.spark.internal.config
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index 7b40e3e58216d..4b27396e6ae05 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 class CompressionCodecSuite extends SparkFunSuite {
   val conf = new SparkConf(false)
 
-  def testCodec(codec: CompressionCodec) {
+  def testCodec(codec: CompressionCodec): Unit = {
     // Write 1000 integers to the output stream, compressed.
     val outputStream = new ByteArrayOutputStream()
     val out = codec.compressedOutputStream(outputStream)
diff --git a/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala b/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala
index c26945fa5fa31..60f67699f81be 100644
--- a/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala
+++ b/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala
@@ -17,60 +17,110 @@
 
 package org.apache.spark.memory
 
+import javax.annotation.concurrent.GuardedBy
+
+import scala.collection.mutable
+
 import org.apache.spark.SparkConf
 import org.apache.spark.storage.BlockId
 
 class TestMemoryManager(conf: SparkConf)
   extends MemoryManager(conf, numCores = 1, Long.MaxValue, Long.MaxValue) {
 
+  @GuardedBy("this")
+  private var consequentOOM = 0
+  @GuardedBy("this")
+  private var available = Long.MaxValue
+  @GuardedBy("this")
+  private val memoryForTask = mutable.HashMap[Long, Long]().withDefaultValue(0L)
+
   override private[memory] def acquireExecutionMemory(
       numBytes: Long,
       taskAttemptId: Long,
-      memoryMode: MemoryMode): Long = {
-    if (consequentOOM > 0) {
-      consequentOOM -= 1
-      0
-    } else if (available >= numBytes) {
-      available -= numBytes
-      numBytes
-    } else {
-      val grant = available
-      available = 0
-      grant
+      memoryMode: MemoryMode): Long = synchronized {
+    require(numBytes >= 0)
+    val acquired = {
+      if (consequentOOM > 0) {
+        consequentOOM -= 1
+        0
+      } else if (available >= numBytes) {
+        available -= numBytes
+        numBytes
+      } else {
+        val grant = available
+        available = 0
+        grant
+      }
     }
+    memoryForTask(taskAttemptId) = memoryForTask.getOrElse(taskAttemptId, 0L) + acquired
+    acquired
+  }
+
+  override private[memory] def releaseExecutionMemory(
+      numBytes: Long,
+      taskAttemptId: Long,
+      memoryMode: MemoryMode): Unit = synchronized {
+    require(numBytes >= 0)
+    available += numBytes
+    val existingMemoryUsage = memoryForTask.getOrElse(taskAttemptId, 0L)
+    val newMemoryUsage = existingMemoryUsage - numBytes
+    require(
+      newMemoryUsage >= 0,
+      s"Attempting to free $numBytes of memory for task attempt $taskAttemptId, but it only " +
+      s"allocated $existingMemoryUsage bytes of memory")
+    memoryForTask(taskAttemptId) = newMemoryUsage
+  }
+
+  override private[memory] def releaseAllExecutionMemoryForTask(taskAttemptId: Long): Long = {
+    memoryForTask.remove(taskAttemptId).getOrElse(0L)
+  }
+
+  override private[memory] def getExecutionMemoryUsageForTask(taskAttemptId: Long): Long = {
+    memoryForTask.getOrElse(taskAttemptId, 0L)
   }
+
   override def acquireStorageMemory(
       blockId: BlockId,
       numBytes: Long,
-      memoryMode: MemoryMode): Boolean = true
+      memoryMode: MemoryMode): Boolean = {
+    require(numBytes >= 0)
+    true
+  }
+
   override def acquireUnrollMemory(
       blockId: BlockId,
       numBytes: Long,
-     memoryMode: MemoryMode): Boolean = true
-  override def releaseStorageMemory(numBytes: Long, memoryMode: MemoryMode): Unit = {}
-  override private[memory] def releaseExecutionMemory(
-      numBytes: Long,
-      taskAttemptId: Long,
-      memoryMode: MemoryMode): Unit = {
-    available += numBytes
+      memoryMode: MemoryMode): Boolean = {
+    require(numBytes >= 0)
+    true
   }
+
+  override def releaseStorageMemory(numBytes: Long, memoryMode: MemoryMode): Unit = {
+    require(numBytes >= 0)
+  }
+
   override def maxOnHeapStorageMemory: Long = Long.MaxValue
 
   override def maxOffHeapStorageMemory: Long = 0L
 
-  private var consequentOOM = 0
-  private var available = Long.MaxValue
-
+  /**
+   * Causes the next call to [[acquireExecutionMemory()]] to fail to allocate
+   * memory (returning `0`), simulating low-on-memory / out-of-memory conditions.
+   */
   def markExecutionAsOutOfMemoryOnce(): Unit = {
     markconsequentOOM(1)
   }
 
-  def markconsequentOOM(n : Int) : Unit = {
+  /**
+   * Causes the next `n` calls to [[acquireExecutionMemory()]] to fail to allocate
+   * memory (returning `0`), simulating low-on-memory / out-of-memory conditions.
+   */
+  def markconsequentOOM(n: Int): Unit = synchronized {
     consequentOOM += n
   }
 
-  def limit(avail: Long): Unit = {
+  def limit(avail: Long): Unit = synchronized {
+    require(avail >= 0)
     available = avail
   }
-
 }
diff --git a/core/src/test/scala/org/apache/spark/memory/TestMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/TestMemoryManagerSuite.scala
new file mode 100644
index 0000000000000..043f341074b88
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/memory/TestMemoryManagerSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.memory
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+
+/**
+ * Tests of [[TestMemoryManager]] itself.
+ */
+class TestMemoryManagerSuite extends SparkFunSuite {
+  test("tracks allocated execution memory by task") {
+    val testMemoryManager = new TestMemoryManager(new SparkConf())
+
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(0) == 0)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(1) == 0)
+
+    testMemoryManager.acquireExecutionMemory(10, 0, MemoryMode.ON_HEAP)
+    testMemoryManager.acquireExecutionMemory(5, 1, MemoryMode.ON_HEAP)
+    testMemoryManager.acquireExecutionMemory(5, 0, MemoryMode.ON_HEAP)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(0) == 15)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(1) == 5)
+
+    testMemoryManager.releaseExecutionMemory(10, 0, MemoryMode.ON_HEAP)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(0) == 5)
+
+    testMemoryManager.releaseAllExecutionMemoryForTask(0)
+    testMemoryManager.releaseAllExecutionMemoryForTask(1)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(0) == 0)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(1) == 0)
+  }
+
+  test("markconsequentOOM") {
+    val testMemoryManager = new TestMemoryManager(new SparkConf())
+    assert(testMemoryManager.acquireExecutionMemory(1, 0, MemoryMode.ON_HEAP) == 1)
+    testMemoryManager.markconsequentOOM(2)
+    assert(testMemoryManager.acquireExecutionMemory(1, 0, MemoryMode.ON_HEAP) == 0)
+    assert(testMemoryManager.acquireExecutionMemory(1, 0, MemoryMode.ON_HEAP) == 0)
+    assert(testMemoryManager.acquireExecutionMemory(1, 0, MemoryMode.ON_HEAP) == 1)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
index 0a689f81a5761..0cafe6891c7d1 100644
--- a/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/UnifiedMemoryManagerSuite.scala
@@ -305,7 +305,7 @@ class UnifiedMemoryManagerSuite extends MemoryManagerSuite with PrivateMethodTes
     intercept[RuntimeException] {
       mm.acquireExecutionMemory(1000L, 0, memoryMode)
     }
-    val assertInvariants = PrivateMethod[Unit]('assertInvariants)
+    val assertInvariants = PrivateMethod[Unit](Symbol("assertInvariants"))
     mm.invokePrivate[Unit](assertInvariants())
   }
 
diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
index c7bd0c905d027..330347299ab56 100644
--- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.metrics
 
-import java.io.{File, FileWriter, PrintWriter}
+import java.io.{File, PrintWriter}
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -166,7 +166,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
     var shuffleRead = 0L
     var shuffleWritten = 0L
     sc.addSparkListener(new SparkListener() {
-      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
         val metrics = taskEnd.taskMetrics
         inputRead += metrics.inputMetrics.recordsRead
         outputWritten += metrics.outputMetrics.recordsWritten
@@ -182,7 +182,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
       .reduceByKey(_ + _)
       .saveAsTextFile(tmpFile.toURI.toString)
 
-    sc.listenerBus.waitUntilEmpty(500)
+    sc.listenerBus.waitUntilEmpty()
     assert(inputRead == numRecords)
 
     assert(outputWritten == numBuckets)
@@ -243,17 +243,17 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
     val taskMetrics = new ArrayBuffer[Long]()
 
     // Avoid receiving earlier taskEnd events
-    sc.listenerBus.waitUntilEmpty(500)
+    sc.listenerBus.waitUntilEmpty()
 
     sc.addSparkListener(new SparkListener() {
-      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
         taskMetrics += collector(taskEnd)
       }
     })
 
     job
 
-    sc.listenerBus.waitUntilEmpty(500)
+    sc.listenerBus.waitUntilEmpty()
     taskMetrics.sum
   }
 
@@ -284,16 +284,16 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
 
     val taskBytesWritten = new ArrayBuffer[Long]()
     sc.addSparkListener(new SparkListener() {
-      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
         taskBytesWritten += taskEnd.taskMetrics.outputMetrics.bytesWritten
       }
     })
 
-    val rdd = sc.parallelize(Array("a", "b", "c", "d"), 2)
+    val rdd = sc.parallelize(Seq("a", "b", "c", "d"), 2)
 
     try {
       rdd.saveAsTextFile(outPath.toString)
-      sc.listenerBus.waitUntilEmpty(500)
+      sc.listenerBus.waitUntilEmpty()
       assert(taskBytesWritten.length == 2)
       val outFiles = fs.listStatus(outPath).filter(_.getPath.getName != "_SUCCESS")
       taskBytesWritten.zip(outFiles).foreach { case (bytes, fileStatus) =>
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
index 99c9dde1cf23c..70b6c9a112142 100644
--- a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
@@ -42,8 +42,8 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
   test("MetricsSystem with default config") {
     val metricsSystem = MetricsSystem.createMetricsSystem("default", conf, securityMgr)
     metricsSystem.start()
-    val sources = PrivateMethod[ArrayBuffer[Source]]('sources)
-    val sinks = PrivateMethod[ArrayBuffer[Sink]]('sinks)
+    val sources = PrivateMethod[ArrayBuffer[Source]](Symbol("sources"))
+    val sinks = PrivateMethod[ArrayBuffer[Sink]](Symbol("sinks"))
 
     assert(metricsSystem.invokePrivate(sources()).length === StaticSources.allSources.length)
     assert(metricsSystem.invokePrivate(sinks()).length === 0)
@@ -53,8 +53,8 @@ class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateM
   test("MetricsSystem with sources add") {
     val metricsSystem = MetricsSystem.createMetricsSystem("test", conf, securityMgr)
     metricsSystem.start()
-    val sources = PrivateMethod[ArrayBuffer[Source]]('sources)
-    val sinks = PrivateMethod[ArrayBuffer[Sink]]('sinks)
+    val sources = PrivateMethod[ArrayBuffer[Source]](Symbol("sources"))
+    val sinks = PrivateMethod[ArrayBuffer[Sink]](Symbol("sinks"))
 
     assert(metricsSystem.invokePrivate(sources()).length === StaticSources.allSources.length)
     assert(metricsSystem.invokePrivate(sinks()).length === 1)
diff --git a/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala b/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala
new file mode 100644
index 0000000000000..8f5ab7419d4f7
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.source
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config.{METRICS_EXECUTORMETRICS_SOURCE_ENABLED, METRICS_STATIC_SOURCES_ENABLED}
+
+class SourceConfigSuite extends SparkFunSuite with LocalSparkContext {
+
+  test("Test configuration for adding static sources registration") {
+    val conf = new SparkConf()
+    conf.set(METRICS_STATIC_SOURCES_ENABLED, true)
+    val sc = new SparkContext("local", "test", conf)
+    try {
+      val metricsSystem = sc.env.metricsSystem
+
+      // Static sources should be registered
+      assert (metricsSystem.getSourcesByName("CodeGenerator").nonEmpty)
+      assert (metricsSystem.getSourcesByName("HiveExternalCatalog").nonEmpty)
+    } finally {
+      sc.stop()
+    }
+  }
+
+  test("Test configuration for skipping static sources registration") {
+    val conf = new SparkConf()
+    conf.set(METRICS_STATIC_SOURCES_ENABLED, false)
+    val sc = new SparkContext("local", "test", conf)
+    try {
+      val metricsSystem = sc.env.metricsSystem
+
+      // Static sources should not be registered
+      assert (metricsSystem.getSourcesByName("CodeGenerator").isEmpty)
+      assert (metricsSystem.getSourcesByName("HiveExternalCatalog").isEmpty)
+    } finally {
+      sc.stop()
+    }
+  }
+
+  test("Test configuration for adding ExecutorMetrics source registration") {
+    val conf = new SparkConf()
+    conf.set(METRICS_EXECUTORMETRICS_SOURCE_ENABLED, true)
+    val sc = new SparkContext("local", "test", conf)
+    try {
+      val metricsSystem = sc.env.metricsSystem
+
+      // ExecutorMetrics source should be registered
+      assert (metricsSystem.getSourcesByName("ExecutorMetrics").nonEmpty)
+    } finally {
+      sc.stop()
+    }
+  }
+
+  test("Test configuration for skipping ExecutorMetrics source registration") {
+    val conf = new SparkConf()
+    conf.set(METRICS_EXECUTORMETRICS_SOURCE_ENABLED, false)
+    val sc = new SparkContext("local", "test", conf)
+    try {
+      val metricsSystem = sc.env.metricsSystem
+
+      // ExecutorMetrics source should not be registered
+      assert (metricsSystem.getSourcesByName("ExecutorMetrics").isEmpty)
+    } finally {
+      sc.stop()
+    }
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
index 544d52d48b385..c726329ce8a84 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -29,7 +29,7 @@ import scala.util.{Failure, Success, Try}
 import com.google.common.io.CharStreams
 import org.mockito.Mockito._
 import org.scalatest.Matchers
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config._
@@ -122,7 +122,7 @@ class NettyBlockTransferSecuritySuite extends SparkFunSuite with MockitoSugar wi
     val blockString = "Hello, world!"
     val blockBuffer = new NioManagedBuffer(ByteBuffer.wrap(
       blockString.getBytes(StandardCharsets.UTF_8)))
-    when(blockManager.getBlockData(blockId)).thenReturn(blockBuffer)
+    when(blockManager.getLocalBlockData(blockId)).thenReturn(blockBuffer)
 
     val securityManager0 = new SecurityManager(conf0)
     val exec0 = new NettyBlockTransferService(conf0, securityManager0, "localhost", "localhost", 0,
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
index 5d67d3358a9ca..edddf88a28f85 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
@@ -41,7 +41,7 @@ class NettyBlockTransferServiceSuite
   private var service0: NettyBlockTransferService = _
   private var service1: NettyBlockTransferService = _
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       if (service0 != null) {
         service0.close()
diff --git a/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala
index d7265b6c24fe7..55cd1a4bfe7dd 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.network.netty
 
-import org.scalatest.Matchers
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.network.util.NettyUtils
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index a7eb0eca72e56..a5bc557eef5ad 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -37,12 +37,12 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim
   // Necessary to make ScalaTest 3.x interrupt a thread on the JVM like ScalaTest 2.2.x
   implicit val defaultSignaler: Signaler = ThreadSignaler
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     sc = new SparkContext("local[2]", "test")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       LocalSparkContext.stop(sc)
       sc = null
@@ -66,7 +66,7 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim
   }
 
   test("foreachAsync") {
-    zeroPartRdd.foreachAsync(i => Unit).get()
+    zeroPartRdd.foreachAsync(i => ()).get()
 
     val accum = sc.longAccumulator
     sc.parallelize(1 to 1000, 3).foreachAsync { i =>
@@ -76,7 +76,7 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim
   }
 
   test("foreachPartitionAsync") {
-    zeroPartRdd.foreachPartitionAsync(iter => Unit).get()
+    zeroPartRdd.foreachPartitionAsync(iter => ()).get()
 
     val accum = sc.longAccumulator
     sc.parallelize(1 to 1000, 9).foreachPartitionAsync { iter =>
@@ -86,7 +86,7 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim
   }
 
   test("takeAsync") {
-    def testTake(rdd: RDD[Int], input: Seq[Int], num: Int) {
+    def testTake(rdd: RDD[Int], input: Seq[Int], num: Int): Unit = {
       val expected = input.take(num)
       val saw = rdd.takeAsync(num).get()
       assert(saw == expected, "incorrect result for rdd with %d partitions (expected %s, saw %s)"
diff --git a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
index 42b30707f2624..617ca5a1a8bc4 100644
--- a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
@@ -67,7 +67,8 @@ object CoalescedRDDBenchmark extends BenchmarkBase {
     benchmark.run()
   }
 
-  private def performCoalesce(blocks: immutable.Seq[(Int, Seq[String])], numPartitions: Int) {
+  private def performCoalesce(blocks: immutable.Seq[(Int, Seq[String])],
+      numPartitions: Int): Unit = {
     sc.makeRDD(blocks).coalesce(numPartitions).partitions
   }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 1564435a0bbae..2de4b109e40e9 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -34,11 +34,10 @@ import org.scalatest.Assertions
 
 import org.apache.spark._
 import org.apache.spark.Partitioner
-import org.apache.spark.util.Utils
 
 class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   test("aggregateByKey") {
-    val pairs = sc.parallelize(Array((1, 1), (1, 1), (3, 2), (5, 1), (5, 3)), 2)
+    val pairs = sc.parallelize(Seq((1, 1), (1, 1), (3, 2), (5, 1), (5, 3)), 2)
 
     val sets = pairs.aggregateByKey(new HashSet[Int]())(_ += _, _ ++= _).collect()
     assert(sets.size === 3)
@@ -51,7 +50,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("groupByKey") {
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)))
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (2, 1)))
     val groups = pairs.groupByKey().collect()
     assert(groups.size === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
@@ -61,7 +60,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("groupByKey with duplicates") {
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
     val groups = pairs.groupByKey().collect()
     assert(groups.size === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
@@ -71,7 +70,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("groupByKey with negative key hash codes") {
-    val pairs = sc.parallelize(Array((-1, 1), (-1, 2), (-1, 3), (2, 1)))
+    val pairs = sc.parallelize(Seq((-1, 1), (-1, 2), (-1, 3), (2, 1)))
     val groups = pairs.groupByKey().collect()
     assert(groups.size === 2)
     val valuesForMinus1 = groups.find(_._1 == -1).get._2
@@ -81,7 +80,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("groupByKey with many output partitions") {
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)))
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (2, 1)))
     val groups = pairs.groupByKey(10).collect()
     assert(groups.size === 2)
     val valuesFor1 = groups.find(_._1 == 1).get._2
@@ -170,13 +169,13 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("reduceByKey") {
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
     val sums = pairs.reduceByKey(_ + _).collect()
     assert(sums.toSet === Set((1, 7), (2, 1)))
   }
 
   test("reduceByKey with collectAsMap") {
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
     val sums = pairs.reduceByKey(_ + _).collectAsMap()
     assert(sums.size === 2)
     assert(sums(1) === 7)
@@ -184,7 +183,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("reduceByKey with many output partitions") {
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
     val sums = pairs.reduceByKey(_ + _, 10).collect()
     assert(sums.toSet === Set((1, 7), (2, 1)))
   }
@@ -194,13 +193,13 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
       def numPartitions = 2
       def getPartition(key: Any) = key.asInstanceOf[Int]
     }
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 1), (0, 1))).partitionBy(p)
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 1), (0, 1))).partitionBy(p)
     val sums = pairs.reduceByKey(_ + _)
     assert(sums.collect().toSet === Set((1, 4), (0, 1)))
     assert(sums.partitioner === Some(p))
     // count the dependencies to make sure there is only 1 ShuffledRDD
     val deps = new HashSet[RDD[_]]()
-    def visit(r: RDD[_]) {
+    def visit(r: RDD[_]): Unit = {
       for (dep <- r.dependencies) {
         deps += dep.rdd
         visit(dep.rdd)
@@ -246,8 +245,8 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("join") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
-    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.join(rdd2).collect()
     assert(joined.size === 4)
     assert(joined.toSet === Set(
@@ -259,8 +258,8 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("join all-to-all") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (1, 3)))
-    val rdd2 = sc.parallelize(Array((1, 'x'), (1, 'y')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (1, 3)))
+    val rdd2 = sc.parallelize(Seq((1, 'x'), (1, 'y')))
     val joined = rdd1.join(rdd2).collect()
     assert(joined.size === 6)
     assert(joined.toSet === Set(
@@ -274,8 +273,8 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("leftOuterJoin") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
-    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.leftOuterJoin(rdd2).collect()
     assert(joined.size === 5)
     assert(joined.toSet === Set(
@@ -292,7 +291,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     import scala.reflect.classTag
     val intPairCT = classTag[(Int, Int)]
 
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.emptyRDD[(Int, Int)](intPairCT)
 
     val joined = rdd1.cogroup(rdd2).collect()
@@ -304,7 +303,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     import scala.reflect.classTag
     val intCT = classTag[Int]
 
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
     val rdd2 = sc.emptyRDD[Int](intCT).groupBy((x) => 5)
     val joined = rdd1.cogroup(rdd2).collect()
     assert(joined.size > 0)
@@ -315,7 +314,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     "with an order of magnitude difference in number of partitions") {
     val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 1000)
     val rdd2 = sc
-      .parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+      .parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
       .partitionBy(new HashPartitioner(10))
     val joined = rdd1.cogroup(rdd2)
     assert(joined.getNumPartitions == rdd1.getNumPartitions)
@@ -325,7 +324,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   test("cogroup between multiple RDD with number of partitions similar in order of magnitude") {
     val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 20)
     val rdd2 = sc
-      .parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+      .parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
       .partitionBy(new HashPartitioner(10))
     val joined = rdd1.cogroup(rdd2)
     assert(joined.getNumPartitions == rdd2.getNumPartitions)
@@ -336,7 +335,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     try {
       sc.conf.set("spark.default.parallelism", "4")
       val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 20)
-      val rdd2 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)), 10)
+      val rdd2 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)), 10)
       val joined = rdd1.cogroup(rdd2)
       assert(joined.getNumPartitions == sc.defaultParallelism)
     } finally {
@@ -349,7 +348,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     try {
       sc.conf.set("spark.default.parallelism", "4")
       val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 20)
-      val rdd2 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+      val rdd2 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
         .partitionBy(new HashPartitioner(10))
       val joined = rdd1.cogroup(rdd2)
       assert(joined.getNumPartitions == rdd2.getNumPartitions)
@@ -364,7 +363,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     try {
       sc.conf.set("spark.default.parallelism", "4")
       val rdd1 = sc.parallelize((1 to 1000).map(x => (x, x)), 1000)
-      val rdd2 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+      val rdd2 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
         .partitionBy(new HashPartitioner(10))
       val joined = rdd1.cogroup(rdd2)
       assert(joined.getNumPartitions == rdd2.getNumPartitions)
@@ -374,8 +373,8 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("rightOuterJoin") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
-    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.rightOuterJoin(rdd2).collect()
     assert(joined.size === 5)
     assert(joined.toSet === Set(
@@ -388,8 +387,8 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("fullOuterJoin") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
-    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.fullOuterJoin(rdd2).collect()
     assert(joined.size === 6)
     assert(joined.toSet === Set(
@@ -403,15 +402,15 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("join with no matches") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
-    val rdd2 = sc.parallelize(Array((4, 'x'), (5, 'y'), (5, 'z'), (6, 'w')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Seq((4, 'x'), (5, 'y'), (5, 'z'), (6, 'w')))
     val joined = rdd1.join(rdd2).collect()
     assert(joined.size === 0)
   }
 
   test("join with many output partitions") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
-    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.join(rdd2, 10).collect()
     assert(joined.size === 4)
     assert(joined.toSet === Set(
@@ -423,8 +422,8 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("groupWith") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
-    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
     val joined = rdd1.groupWith(rdd2).collect()
     assert(joined.size === 4)
     val joinedSet = joined.map(x => (x._1, (x._2._1.toList, x._2._2.toList))).toSet
@@ -437,9 +436,9 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("groupWith3") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
-    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
-    val rdd3 = sc.parallelize(Array((1, 'a'), (3, 'b'), (4, 'c'), (4, 'd')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val rdd3 = sc.parallelize(Seq((1, 'a'), (3, 'b'), (4, 'c'), (4, 'd')))
     val joined = rdd1.groupWith(rdd2, rdd3).collect()
     assert(joined.size === 4)
     val joinedSet = joined.map(x => (x._1,
@@ -453,10 +452,10 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("groupWith4") {
-    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
-    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
-    val rdd3 = sc.parallelize(Array((1, 'a'), (3, 'b'), (4, 'c'), (4, 'd')))
-    val rdd4 = sc.parallelize(Array((2, '@')))
+    val rdd1 = sc.parallelize(Seq((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Seq((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val rdd3 = sc.parallelize(Seq((1, 'a'), (3, 'b'), (4, 'c'), (4, 'd')))
+    val rdd4 = sc.parallelize(Seq((2, '@')))
     val joined = rdd1.groupWith(rdd2, rdd3, rdd4).collect()
     assert(joined.size === 4)
     val joinedSet = joined.map(x => (x._1,
@@ -480,7 +479,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("keys and values") {
-    val rdd = sc.parallelize(Array((1, "a"), (2, "b")))
+    val rdd = sc.parallelize(Seq((1, "a"), (2, "b")))
     assert(rdd.keys.collect().toList === List(1, 2))
     assert(rdd.values.collect().toList === List("a", "b"))
   }
@@ -496,8 +495,8 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("default partitioner uses largest partitioner") {
-    val a = sc.makeRDD(Array((1, "a"), (2, "b")), 2)
-    val b = sc.makeRDD(Array((1, "a"), (2, "b")), 2000)
+    val a = sc.makeRDD(Seq((1, "a"), (2, "b")), 2)
+    val b = sc.makeRDD(Seq((1, "a"), (2, "b")), 2000)
     val c = a.join(b)
     assert(c.partitions.size === 2000)
   }
@@ -517,9 +516,9 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
       def getPartition(key: Any) = key.asInstanceOf[Int]
     }
     // partitionBy so we have a narrow dependency
-    val a = sc.parallelize(Array((1, "a"), (2, "b"), (3, "c"))).partitionBy(p)
+    val a = sc.parallelize(Seq((1, "a"), (2, "b"), (3, "c"))).partitionBy(p)
     // more partitions/no partitioner so a shuffle dependency
-    val b = sc.parallelize(Array((2, "b"), (3, "cc"), (4, "d")), 4)
+    val b = sc.parallelize(Seq((2, "b"), (3, "cc"), (4, "d")), 4)
     val c = a.subtract(b)
     assert(c.collect().toSet === Set((1, "a"), (3, "c")))
     // Ideally we could keep the original partitioner...
@@ -527,8 +526,8 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("subtractByKey") {
-    val a = sc.parallelize(Array((1, "a"), (1, "a"), (2, "b"), (3, "c")), 2)
-    val b = sc.parallelize(Array((2, 20), (3, 30), (4, 40)), 4)
+    val a = sc.parallelize(Seq((1, "a"), (1, "a"), (2, "b"), (3, "c")), 2)
+    val b = sc.parallelize(Seq((2, 20), (3, 30), (4, 40)), 4)
     val c = a.subtractByKey(b)
     assert(c.collect().toSet === Set((1, "a"), (1, "a")))
     assert(c.partitions.size === a.partitions.size)
@@ -541,22 +540,22 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
       def getPartition(key: Any) = key.asInstanceOf[Int]
     }
     // partitionBy so we have a narrow dependency
-    val a = sc.parallelize(Array((1, "a"), (1, "a"), (2, "b"), (3, "c"))).partitionBy(p)
+    val a = sc.parallelize(Seq((1, "a"), (1, "a"), (2, "b"), (3, "c"))).partitionBy(p)
     // more partitions/no partitioner so a shuffle dependency
-    val b = sc.parallelize(Array((2, "b"), (3, "cc"), (4, "d")), 4)
+    val b = sc.parallelize(Seq((2, "b"), (3, "cc"), (4, "d")), 4)
     val c = a.subtractByKey(b)
     assert(c.collect().toSet === Set((1, "a"), (1, "a")))
     assert(c.partitioner.get === p)
   }
 
   test("foldByKey") {
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
     val sums = pairs.foldByKey(0)(_ + _).collect()
     assert(sums.toSet === Set((1, 7), (2, 1)))
   }
 
   test("foldByKey with mutable result type") {
-    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val pairs = sc.parallelize(Seq((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
     val bufs = pairs.mapValues(v => ArrayBuffer(v)).cache()
     // Fold the values using in-place mutation
     val sums = bufs.foldByKey(new ArrayBuffer[Int])(_ ++= _).collect()
@@ -571,7 +570,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("saveNewAPIHadoopFile should call setConf if format is configurable") {
-    val pairs = sc.parallelize(Array((Integer.valueOf(1), Integer.valueOf(1))))
+    val pairs = sc.parallelize(Seq((Integer.valueOf(1), Integer.valueOf(1))))
 
     // No error, non-configurable formats still work
     pairs.saveAsNewAPIHadoopFile[NewFakeFormat]("ignored")
@@ -587,7 +586,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
 
   test("The JobId on the driver and executors should be the same during the commit") {
     // Create more than one rdd to mimic stageId not equal to rddId
-    val pairs = sc.parallelize(Array((1, 2), (2, 3)), 2)
+    val pairs = sc.parallelize(Seq((1, 2), (2, 3)), 2)
       .map { p => (Integer.valueOf(p._1 + 1), Integer.valueOf(p._2 + 1)) }
       .filter { p => p._1 > 0 }
     pairs.saveAsNewAPIHadoopFile[YetAnotherFakeFormat]("ignored")
@@ -595,7 +594,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("saveAsHadoopFile should respect configured output committers") {
-    val pairs = sc.parallelize(Array((Integer.valueOf(1), Integer.valueOf(1))))
+    val pairs = sc.parallelize(Seq((Integer.valueOf(1), Integer.valueOf(1))))
     val conf = new JobConf()
     conf.setOutputCommitter(classOf[FakeOutputCommitter])
 
@@ -607,7 +606,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("failure callbacks should be called before calling writer.close() in saveNewAPIHadoopFile") {
-    val pairs = sc.parallelize(Array((Integer.valueOf(1), Integer.valueOf(2))), 1)
+    val pairs = sc.parallelize(Seq((Integer.valueOf(1), Integer.valueOf(2))), 1)
 
     FakeWriterWithCallback.calledBy = ""
     FakeWriterWithCallback.exception = null
@@ -622,7 +621,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("failure callbacks should be called before calling writer.close() in saveAsHadoopFile") {
-    val pairs = sc.parallelize(Array((Integer.valueOf(1), Integer.valueOf(2))), 1)
+    val pairs = sc.parallelize(Seq((Integer.valueOf(1), Integer.valueOf(2))), 1)
     val conf = new JobConf()
 
     FakeWriterWithCallback.calledBy = ""
@@ -640,7 +639,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
 
   test("saveAsNewAPIHadoopDataset should support invalid output paths when " +
     "there are no files to be committed to an absolute output location") {
-    val pairs = sc.parallelize(Array((Integer.valueOf(1), Integer.valueOf(2))), 1)
+    val pairs = sc.parallelize(Seq((Integer.valueOf(1), Integer.valueOf(2))), 1)
 
     def saveRddWithPath(path: String): Unit = {
       val job = NewJob.getInstance(new Configuration(sc.hadoopConfiguration))
@@ -668,7 +667,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   // for non-null invalid paths.
   test("saveAsHadoopDataset should respect empty output directory when " +
     "there are no files to be committed to an absolute output location") {
-    val pairs = sc.parallelize(Array((Integer.valueOf(1), Integer.valueOf(2))), 1)
+    val pairs = sc.parallelize(Seq((Integer.valueOf(1), Integer.valueOf(2))), 1)
 
     val conf = new JobConf()
     conf.setOutputKeyClass(classOf[Integer])
@@ -683,7 +682,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("lookup") {
-    val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7)))
+    val pairs = sc.parallelize(Seq((1, 2), (3, 4), (5, 6), (5, 7)))
 
     assert(pairs.partitioner === None)
     assert(pairs.lookup(1) === Seq(2))
@@ -693,7 +692,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("lookup with partitioner") {
-    val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7)))
+    val pairs = sc.parallelize(Seq((1, 2), (3, 4), (5, 6), (5, 7)))
 
     val p = new Partitioner {
       def numPartitions: Int = 2
@@ -709,7 +708,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("lookup with bad partitioner") {
-    val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7)))
+    val pairs = sc.parallelize(Seq((1, 2), (3, 4), (5, 6), (5, 7)))
 
     val p = new Partitioner {
       def numPartitions: Int = 2
diff --git a/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
index 424d9f825c465..10f4bbcf7f48b 100644
--- a/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
@@ -22,7 +22,7 @@ import scala.collection.immutable.NumericRange
 import org.scalacheck.Arbitrary._
 import org.scalacheck.Gen
 import org.scalacheck.Prop._
-import org.scalatest.prop.Checkers
+import org.scalatestplus.scalacheck.Checkers
 
 import org.apache.spark.SparkFunSuite
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
index cb0de1c6beb6b..da2ccbfae181f 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
@@ -25,7 +25,7 @@ class MockSampler extends RandomSampler[Long, Long] {
 
   private var s: Long = _
 
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     s = seed
   }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
index 69739a2e58481..2da2854dfbcb9 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
@@ -21,16 +21,18 @@ import java.io.File
 
 import scala.collection.JavaConverters._
 import scala.collection.Map
+import scala.concurrent.duration._
 import scala.io.Codec
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapred.{FileSplit, JobConf, TextInputFormat}
+import org.scalatest.concurrent.Eventually
 
 import org.apache.spark._
 import org.apache.spark.util.Utils
 
-class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
+class PipedRDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
   val envCommand = if (Utils.isWindows) {
     "cmd.exe /C set"
   } else {
@@ -100,11 +102,16 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
 
     assert(result.collect().length === 0)
 
-    // collect stderr writer threads
-    val stderrWriterThread = Thread.getAllStackTraces.keySet().asScala
-      .find { _.getName.startsWith(PipedRDD.STDIN_WRITER_THREAD_PREFIX) }
-
-    assert(stderrWriterThread.isEmpty)
+    // SPARK-29104 PipedRDD will invoke `stdinWriterThread.interrupt()` at task completion,
+    // and `obj.wait` will get InterruptedException. However, there exists a possibility
+    // which the thread termination gets delayed because the thread starts from `obj.wait()`
+    // with that exception. To prevent test flakiness, we need to use `eventually`.
+    eventually(timeout(10.seconds), interval(1.second)) {
+      // collect stdin writer threads
+      val stdinWriterThread = Thread.getAllStackTraces.keySet().asScala
+        .find { _.getName.startsWith(PipedRDD.STDIN_WRITER_THREAD_PREFIX) }
+      assert(stdinWriterThread.isEmpty)
+    }
   }
 
   test("advanced pipe") {
@@ -131,7 +138,7 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
     assert(c(6) === "3_")
     assert(c(7) === "4_")
 
-    val nums1 = sc.makeRDD(Array("a\t1", "b\t2", "a\t3", "b\t4"), 2)
+    val nums1 = sc.makeRDD(Seq("a\t1", "b\t2", "a\t3", "b\t4"), 2)
     val d = nums1.groupBy(str => str.split("\t")(0)).
       pipe(Seq("cat"),
         Map[String, String](),
@@ -230,7 +237,7 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
     testExportInputFile("mapreduce_map_input_file")
   }
 
-  def testExportInputFile(varName: String) {
+  def testExportInputFile(varName: String): Unit = {
     assume(TestUtils.testCommandAvailable(envCommand))
     val nums = new HadoopRDD(sc, new JobConf(), classOf[TextInputFormat], classOf[LongWritable],
       classOf[Text], 2) {
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDBarrierSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDBarrierSuite.scala
index 2f6c4d6a42ea3..f048f95430138 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDBarrierSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDBarrierSuite.scala
@@ -29,6 +29,15 @@ class RDDBarrierSuite extends SparkFunSuite with SharedSparkContext {
     assert(rdd2.isBarrier())
   }
 
+  test("RDDBarrier mapPartitionsWithIndex") {
+    val rdd = sc.parallelize(1 to 12, 4)
+    assert(rdd.isBarrier() === false)
+
+    val rdd2 = rdd.barrier().mapPartitionsWithIndex((index, iter) => Iterator(index))
+    assert(rdd2.isBarrier())
+    assert(rdd2.collect().toList === List(0, 1, 2, 3))
+  }
+
   test("create an RDDBarrier in the middle of a chain of RDDs") {
     val rdd = sc.parallelize(1 to 10, 4).map(x => x * 2)
     val rdd2 = rdd.barrier().mapPartitions(iter => iter).map(x => (x, x + 1))
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 60e63bfd68625..18154d861a731 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -236,7 +236,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
   }
 
   test("aggregate") {
-    val pairs = sc.makeRDD(Array(("a", 1), ("b", 2), ("a", 2), ("c", 5), ("a", 3)))
+    val pairs = sc.makeRDD(Seq(("a", 1), ("b", 2), ("a", 2), ("c", 5), ("a", 3)))
     type StringMap = HashMap[String, Int]
     val emptyMap = new StringMap {
       override def default(key: String): Int = 0
@@ -366,7 +366,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     assert(math.abs(partitions1(1).length - 500) < initialPartitions)
     assert(repartitioned1.collect() === input)
 
-    def testSplitPartitions(input: Seq[Int], initialPartitions: Int, finalPartitions: Int) {
+    def testSplitPartitions(input: Seq[Int], initialPartitions: Int, finalPartitions: Int): Unit = {
       val data = sc.parallelize(input, initialPartitions)
       val repartitioned = data.repartition(finalPartitions)
       assert(repartitioned.partitions.size === finalPartitions)
@@ -1099,7 +1099,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
       override def index: Int = 0
     })
     override def getDependencies: Seq[Dependency[_]] = mutableDependencies
-    def addDependency(dep: Dependency[_]) {
+    def addDependency(dep: Dependency[_]): Unit = {
       mutableDependencies += dep
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
index 7f20206202cb9..d5f7d30a253fe 100644
--- a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.internal.Logging
 class SortingSuite extends SparkFunSuite with SharedSparkContext with Matchers with Logging {
 
   test("sortByKey") {
-    val pairs = sc.parallelize(Array((1, 0), (2, 0), (0, 0), (3, 0)), 2)
+    val pairs = sc.parallelize(Seq((1, 0), (2, 0), (0, 0), (3, 0)), 2)
     assert(pairs.sortByKey().collect() === Array((0, 0), (1, 0), (2, 0), (3, 0)))
   }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala
index 5d7b973fbd9ac..7079b9ea8eadc 100644
--- a/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/ZippedPartitionsSuite.scala
@@ -27,9 +27,9 @@ object ZippedPartitionsSuite {
 
 class ZippedPartitionsSuite extends SparkFunSuite with SharedSparkContext {
   test("print sizes") {
-    val data1 = sc.makeRDD(Array(1, 2, 3, 4), 2)
-    val data2 = sc.makeRDD(Array("1", "2", "3", "4", "5", "6"), 2)
-    val data3 = sc.makeRDD(Array(1.0, 2.0), 2)
+    val data1 = sc.makeRDD(Seq(1, 2, 3, 4), 2)
+    val data2 = sc.makeRDD(Seq("1", "2", "3", "4", "5", "6"), 2)
+    val data3 = sc.makeRDD(Seq(1.0, 2.0), 2)
 
     val zippedRDD = data1.zipPartitions(data2, data3)(ZippedPartitionsSuite.procZippedData)
 
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceDiscoveryPluginSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceDiscoveryPluginSuite.scala
new file mode 100644
index 0000000000000..7a05daa2ad715
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceDiscoveryPluginSuite.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+import java.util.Optional
+import java.util.UUID
+
+import scala.concurrent.duration._
+
+import com.google.common.io.Files
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
+
+import org.apache.spark._
+import org.apache.spark.TestUtils.createTempScriptWithExpectedOutput
+import org.apache.spark.api.resource.ResourceDiscoveryPlugin
+import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher
+import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
+import org.apache.spark.resource.TestResourceIDs._
+import org.apache.spark.util.Utils
+
+class ResourceDiscoveryPluginSuite extends SparkFunSuite with LocalSparkContext {
+
+  test("plugin initialization in non-local mode fpga and gpu") {
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val conf = new SparkConf()
+        .setAppName(getClass().getName())
+        .set(SparkLauncher.SPARK_MASTER, "local-cluster[2,1,1024]")
+        .set(RESOURCES_DISCOVERY_PLUGIN, Seq(classOf[TestResourceDiscoveryPluginGPU].getName(),
+          classOf[TestResourceDiscoveryPluginFPGA].getName()))
+        .set(TestResourceDiscoveryPlugin.TEST_PATH_CONF, dir.getAbsolutePath())
+        .set(WORKER_GPU_ID.amountConf, "2")
+        .set(TASK_GPU_ID.amountConf, "1")
+        .set(EXECUTOR_GPU_ID.amountConf, "1")
+        .set(SPARK_RESOURCES_DIR, dir.getName())
+        .set(WORKER_FPGA_ID.amountConf, "2")
+        .set(TASK_FPGA_ID.amountConf, "1")
+        .set(EXECUTOR_FPGA_ID.amountConf, "1")
+
+      sc = new SparkContext(conf)
+      TestUtils.waitUntilExecutorsUp(sc, 2, 10000)
+
+      eventually(timeout(10.seconds), interval(100.millis)) {
+        val children = dir.listFiles()
+        assert(children != null)
+        assert(children.length >= 4)
+        val gpuFiles = children.filter(f => f.getName().contains(GPU))
+        val fpgaFiles = children.filter(f => f.getName().contains(FPGA))
+        assert(gpuFiles.length == 2)
+        assert(fpgaFiles.length == 2)
+      }
+    }
+  }
+
+  test("single plugin gpu") {
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val conf = new SparkConf()
+        .setAppName(getClass().getName())
+        .set(SparkLauncher.SPARK_MASTER, "local-cluster[2,1,1024]")
+        .set(RESOURCES_DISCOVERY_PLUGIN, Seq(classOf[TestResourceDiscoveryPluginGPU].getName()))
+        .set(TestResourceDiscoveryPlugin.TEST_PATH_CONF, dir.getAbsolutePath())
+        .set(WORKER_GPU_ID.amountConf, "2")
+        .set(TASK_GPU_ID.amountConf, "1")
+        .set(EXECUTOR_GPU_ID.amountConf, "1")
+        .set(SPARK_RESOURCES_DIR, dir.getName())
+
+      sc = new SparkContext(conf)
+      TestUtils.waitUntilExecutorsUp(sc, 2, 10000)
+
+      eventually(timeout(10.seconds), interval(100.millis)) {
+        val children = dir.listFiles()
+        assert(children != null)
+        assert(children.length >= 2)
+        val gpuFiles = children.filter(f => f.getName().contains(GPU))
+        assert(gpuFiles.length == 2)
+      }
+    }
+  }
+
+  test("multiple plugins with one empty") {
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val conf = new SparkConf()
+        .setAppName(getClass().getName())
+        .set(SparkLauncher.SPARK_MASTER, "local-cluster[2,1,1024]")
+        .set(RESOURCES_DISCOVERY_PLUGIN, Seq(classOf[TestResourceDiscoveryPluginEmpty].getName(),
+          classOf[TestResourceDiscoveryPluginGPU].getName()))
+        .set(TestResourceDiscoveryPlugin.TEST_PATH_CONF, dir.getAbsolutePath())
+        .set(WORKER_GPU_ID.amountConf, "2")
+        .set(TASK_GPU_ID.amountConf, "1")
+        .set(EXECUTOR_GPU_ID.amountConf, "1")
+        .set(SPARK_RESOURCES_DIR, dir.getName())
+
+      sc = new SparkContext(conf)
+      TestUtils.waitUntilExecutorsUp(sc, 2, 10000)
+
+      eventually(timeout(10.seconds), interval(100.millis)) {
+        val children = dir.listFiles()
+        assert(children != null)
+        assert(children.length >= 2)
+        val gpuFiles = children.filter(f => f.getName().contains(GPU))
+        assert(gpuFiles.length == 2)
+      }
+    }
+  }
+
+  test("empty plugin fallback to discovery script") {
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val scriptPath = createTempScriptWithExpectedOutput(dir, "gpuDiscoveryScript",
+        """{"name": "gpu","addresses":["5", "6"]}""")
+      val conf = new SparkConf()
+        .setAppName(getClass().getName())
+        .set(SparkLauncher.SPARK_MASTER, "local-cluster[2,1,1024]")
+        .set(RESOURCES_DISCOVERY_PLUGIN, Seq(classOf[TestResourceDiscoveryPluginEmpty].getName()))
+        .set(DRIVER_GPU_ID.discoveryScriptConf, scriptPath)
+        .set(DRIVER_GPU_ID.amountConf, "2")
+        .set(SPARK_RESOURCES_DIR, dir.getName())
+
+      sc = new SparkContext(conf)
+      TestUtils.waitUntilExecutorsUp(sc, 2, 10000)
+
+      assert(sc.resources.size === 1)
+      assert(sc.resources.get(GPU).get.addresses === Array("5", "6"))
+      assert(sc.resources.get(GPU).get.name === "gpu")
+    }
+  }
+}
+
+object TestResourceDiscoveryPlugin {
+  val TEST_PATH_CONF = "spark.nonLocalDiscoveryPlugin.path"
+
+  def writeFile(conf: SparkConf, id: String): Unit = {
+    val path = conf.get(TEST_PATH_CONF)
+    val fileName = s"$id - ${UUID.randomUUID.toString}"
+    Files.write(id, new File(path, fileName), StandardCharsets.UTF_8)
+  }
+}
+
+private class TestResourceDiscoveryPluginGPU extends ResourceDiscoveryPlugin {
+
+  override def discoverResource(
+      request: ResourceRequest,
+      conf: SparkConf): Optional[ResourceInformation] = {
+    if (request.id.resourceName.equals(GPU)) {
+      TestResourceDiscoveryPlugin.writeFile(conf, request.id.resourceName)
+      Optional.of(new ResourceInformation(GPU, Array("0", "1", "2", "3")))
+    } else {
+      Optional.empty()
+    }
+  }
+}
+
+private class TestResourceDiscoveryPluginEmpty extends ResourceDiscoveryPlugin {
+
+  override def discoverResource(
+      request: ResourceRequest,
+      conf: SparkConf): Optional[ResourceInformation] = {
+    Optional.empty()
+  }
+}
+
+private class TestResourceDiscoveryPluginFPGA extends ResourceDiscoveryPlugin {
+
+  override def discoverResource(
+      request: ResourceRequest,
+      conf: SparkConf): Optional[ResourceInformation] = {
+    if (request.id.resourceName.equals(FPGA)) {
+      TestResourceDiscoveryPlugin.writeFile(conf, request.id.resourceName)
+      Optional.of(new ResourceInformation(FPGA, Array("0", "1", "2", "3")))
+    } else {
+      Optional.empty()
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
new file mode 100644
index 0000000000000..075260317284d
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
+import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests._
+
+class ResourceProfileManagerSuite extends SparkFunSuite {
+
+  override def beforeAll() {
+    try {
+      ResourceProfile.clearDefaultProfile()
+    } finally {
+      super.beforeAll()
+    }
+  }
+
+  override def afterEach() {
+    try {
+      ResourceProfile.clearDefaultProfile()
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  test("ResourceProfileManager") {
+    val conf = new SparkConf().set(EXECUTOR_CORES, 4)
+    val rpmanager = new ResourceProfileManager(conf)
+    val defaultProf = rpmanager.defaultResourceProfile
+    assert(defaultProf.id === ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    assert(defaultProf.executorResources.size === 2,
+      "Executor resources should contain cores and memory by default")
+    assert(defaultProf.executorResources(ResourceProfile.CORES).amount === 4,
+      s"Executor resources should have 4 cores")
+  }
+
+  test("isSupported yarn no dynamic allocation") {
+    val conf = new SparkConf().setMaster("yarn").set(EXECUTOR_CORES, 4)
+    conf.set(RESOURCE_PROFILE_MANAGER_TESTING.key, "true")
+    val rpmanager = new ResourceProfileManager(conf)
+    // default profile should always work
+    val defaultProf = rpmanager.defaultResourceProfile
+    val rprof = new ResourceProfileBuilder()
+    val gpuExecReq =
+      new ExecutorResourceRequests().resource("gpu", 2, "someScript")
+    val immrprof = rprof.require(gpuExecReq).build
+    val error = intercept[SparkException] {
+      rpmanager.isSupported(immrprof)
+    }.getMessage()
+
+    assert(error.contains("ResourceProfiles are only supported on YARN with dynamic allocation"))
+  }
+
+  test("isSupported yarn with dynamic allocation") {
+    val conf = new SparkConf().setMaster("yarn").set(EXECUTOR_CORES, 4)
+    conf.set(DYN_ALLOCATION_ENABLED, true)
+    conf.set(RESOURCE_PROFILE_MANAGER_TESTING.key, "true")
+    val rpmanager = new ResourceProfileManager(conf)
+    // default profile should always work
+    val defaultProf = rpmanager.defaultResourceProfile
+    val rprof = new ResourceProfileBuilder()
+    val gpuExecReq =
+      new ExecutorResourceRequests().resource("gpu", 2, "someScript")
+    val immrprof = rprof.require(gpuExecReq).build
+    assert(rpmanager.isSupported(immrprof) == true)
+  }
+
+  test("isSupported yarn with local mode") {
+    val conf = new SparkConf().setMaster("local").set(EXECUTOR_CORES, 4)
+    conf.set(RESOURCE_PROFILE_MANAGER_TESTING.key, "true")
+    val rpmanager = new ResourceProfileManager(conf)
+    // default profile should always work
+    val defaultProf = rpmanager.defaultResourceProfile
+    val rprof = new ResourceProfileBuilder()
+    val gpuExecReq =
+      new ExecutorResourceRequests().resource("gpu", 2, "someScript")
+    val immrprof = rprof.require(gpuExecReq).build
+    var error = intercept[SparkException] {
+      rpmanager.isSupported(immrprof)
+    }.getMessage()
+
+    assert(error.contains("ResourceProfiles are only supported on YARN with dynamic allocation"))
+  }
+
+
+
+}
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
new file mode 100644
index 0000000000000..b2f2c3632e454
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.resource
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.{EXECUTOR_CORES, EXECUTOR_MEMORY, EXECUTOR_MEMORY_OVERHEAD}
+import org.apache.spark.internal.config.Python.PYSPARK_EXECUTOR_MEMORY
+import org.apache.spark.resource.TestResourceIDs._
+
+class ResourceProfileSuite extends SparkFunSuite {
+
+  override def beforeAll() {
+    try {
+      ResourceProfile.clearDefaultProfile()
+    } finally {
+      super.beforeAll()
+    }
+  }
+
+  override def afterEach() {
+    try {
+      ResourceProfile.clearDefaultProfile()
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  test("Default ResourceProfile") {
+    val rprof = ResourceProfile.getOrCreateDefaultProfile(new SparkConf)
+    assert(rprof.id === ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    assert(rprof.executorResources.size === 2,
+      "Executor resources should contain cores and memory by default")
+    assert(rprof.executorResources(ResourceProfile.CORES).amount === 1,
+      "Executor resources should have 1 core")
+    assert(rprof.getExecutorCores.get === 1,
+      "Executor resources should have 1 core")
+    assert(rprof.executorResources(ResourceProfile.MEMORY).amount === 1024,
+      "Executor resources should have 1024 memory")
+    assert(rprof.executorResources.get(ResourceProfile.PYSPARK_MEM) == None,
+      "pyspark memory empty if not specified")
+    assert(rprof.executorResources.get(ResourceProfile.OVERHEAD_MEM) == None,
+      "overhead memory empty if not specified")
+    assert(rprof.taskResources.size === 1,
+      "Task resources should just contain cpus by default")
+    assert(rprof.taskResources(ResourceProfile.CPUS).amount === 1,
+      "Task resources should have 1 cpu")
+    assert(rprof.getTaskCpus.get === 1,
+      "Task resources should have 1 cpu")
+  }
+
+  test("Default ResourceProfile with app level resources specified") {
+    val conf = new SparkConf
+    conf.set(PYSPARK_EXECUTOR_MEMORY.key, "2g")
+    conf.set(EXECUTOR_MEMORY_OVERHEAD.key, "1g")
+    conf.set(EXECUTOR_MEMORY.key, "4g")
+    conf.set(EXECUTOR_CORES.key, "4")
+    conf.set(TASK_GPU_ID.amountConf, "1")
+    conf.set(EXECUTOR_GPU_ID.amountConf, "1")
+    conf.set(EXECUTOR_GPU_ID.discoveryScriptConf, "nameOfScript")
+    val rprof = ResourceProfile.getOrCreateDefaultProfile(conf)
+    assert(rprof.id === ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    val execResources = rprof.executorResources
+    assert(execResources.size === 5, s"Executor resources should contain cores, pyspark " +
+      s"memory, memory overhead, memory, and gpu $execResources")
+    assert(execResources.contains("gpu"), "Executor resources should have gpu")
+    assert(rprof.executorResources(ResourceProfile.CORES).amount === 4,
+      "Executor resources should have 4 core")
+    assert(rprof.getExecutorCores.get === 4,
+      "Executor resources should have 4 core")
+    assert(rprof.executorResources(ResourceProfile.MEMORY).amount === 4096,
+      "Executor resources should have 1024 memory")
+    assert(rprof.executorResources(ResourceProfile.PYSPARK_MEM).amount == 2048,
+      "pyspark memory empty if not specified")
+    assert(rprof.executorResources(ResourceProfile.OVERHEAD_MEM).amount == 1024,
+      "overhead memory empty if not specified")
+    assert(rprof.taskResources.size === 2,
+      "Task resources should just contain cpus and gpu")
+    assert(rprof.taskResources.contains("gpu"), "Task resources should have gpu")
+  }
+
+  test("test default profile task gpus fractional") {
+    val sparkConf = new SparkConf()
+      .set(EXECUTOR_GPU_ID.amountConf, "2")
+      .set(TASK_GPU_ID.amountConf, "0.33")
+    val immrprof = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    assert(immrprof.taskResources.get("gpu").get.amount == 0.33)
+  }
+
+  test("maxTasksPerExecutor cpus") {
+    val sparkConf = new SparkConf()
+      .set(EXECUTOR_CORES, 1)
+    val rprof = new ResourceProfileBuilder()
+    val taskReq = new TaskResourceRequests().resource("gpu", 1)
+    val execReq =
+      new ExecutorResourceRequests().resource("gpu", 2, "myscript", "nvidia")
+    rprof.require(taskReq).require(execReq)
+    val immrprof = new ResourceProfile(rprof.executorResources, rprof.taskResources)
+    assert(immrprof.limitingResource(sparkConf) == "cpus")
+    assert(immrprof.maxTasksPerExecutor(sparkConf) == 1)
+  }
+
+  test("maxTasksPerExecutor/limiting no executor cores") {
+    val sparkConf = new SparkConf().setMaster("spark://testing")
+    val rprof = new ResourceProfileBuilder()
+    val taskReq = new TaskResourceRequests().resource("gpu", 1)
+    val execReq =
+      new ExecutorResourceRequests().resource("gpu", 2, "myscript", "nvidia")
+    rprof.require(taskReq).require(execReq)
+    val immrprof = new ResourceProfile(rprof.executorResources, rprof.taskResources)
+    assert(immrprof.limitingResource(sparkConf) == "gpu")
+    assert(immrprof.maxTasksPerExecutor(sparkConf) == 2)
+    assert(immrprof.isCoresLimitKnown == false)
+  }
+
+  test("maxTasksPerExecutor/limiting no other resource no executor cores") {
+    val sparkConf = new SparkConf().setMaster("spark://testing")
+    val immrprof = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    assert(immrprof.limitingResource(sparkConf) == "")
+    assert(immrprof.maxTasksPerExecutor(sparkConf) == 1)
+    assert(immrprof.isCoresLimitKnown == false)
+  }
+
+  test("maxTasksPerExecutor/limiting executor cores") {
+    val sparkConf = new SparkConf().setMaster("spark://testing").set(EXECUTOR_CORES, 2)
+    val rprof = new ResourceProfileBuilder()
+    val taskReq = new TaskResourceRequests().resource("gpu", 1)
+    val execReq =
+      new ExecutorResourceRequests().resource("gpu", 2, "myscript", "nvidia")
+    rprof.require(taskReq).require(execReq)
+    val immrprof = new ResourceProfile(rprof.executorResources, rprof.taskResources)
+    assert(immrprof.limitingResource(sparkConf) == ResourceProfile.CPUS)
+    assert(immrprof.maxTasksPerExecutor(sparkConf) == 2)
+    assert(immrprof.isCoresLimitKnown == true)
+  }
+
+
+  test("Create ResourceProfile") {
+    val rprof = new ResourceProfileBuilder()
+    val taskReq = new TaskResourceRequests().resource("gpu", 1)
+    val eReq = new ExecutorResourceRequests().resource("gpu", 2, "myscript", "nvidia")
+    rprof.require(taskReq).require(eReq)
+
+    assert(rprof.executorResources.size === 1)
+    assert(rprof.executorResources.contains("gpu"),
+      "Executor resources should have gpu")
+    assert(rprof.executorResources.get("gpu").get.vendor === "nvidia",
+      "gpu vendor should be nvidia")
+    assert(rprof.executorResources.get("gpu").get.discoveryScript === "myscript",
+      "discoveryScript should be myscript")
+    assert(rprof.executorResources.get("gpu").get.amount === 2,
+    "gpu amount should be 2")
+
+    assert(rprof.taskResources.size === 1, "Should have 1 task resource")
+    assert(rprof.taskResources.contains("gpu"), "Task resources should have gpu")
+    assert(rprof.taskResources.get("gpu").get.amount === 1,
+      "Task resources should have 1 gpu")
+
+    val ereqs = new ExecutorResourceRequests()
+    ereqs.cores(2).memory("4096")
+    ereqs.memoryOverhead("2048").pysparkMemory("1024")
+    val treqs = new TaskResourceRequests()
+    treqs.cpus(1)
+
+    rprof.require(treqs)
+    rprof.require(ereqs)
+
+    assert(rprof.executorResources.size === 5)
+    assert(rprof.executorResources(ResourceProfile.CORES).amount === 2,
+      "Executor resources should have 2 cores")
+    assert(rprof.executorResources(ResourceProfile.MEMORY).amount === 4096,
+      "Executor resources should have 4096 memory")
+    assert(rprof.executorResources(ResourceProfile.OVERHEAD_MEM).amount === 2048,
+      "Executor resources should have 2048 overhead memory")
+    assert(rprof.executorResources(ResourceProfile.PYSPARK_MEM).amount === 1024,
+      "Executor resources should have 1024 pyspark memory")
+
+    assert(rprof.taskResources.size === 2)
+    assert(rprof.taskResources("cpus").amount === 1, "Task resources should have cpu")
+  }
+
+  test("Test ExecutorResourceRequests memory helpers") {
+    val rprof = new ResourceProfileBuilder()
+    val ereqs = new ExecutorResourceRequests()
+    ereqs.memory("4g")
+    ereqs.memoryOverhead("2000m").pysparkMemory("512000k")
+    rprof.require(ereqs)
+
+    assert(rprof.executorResources(ResourceProfile.MEMORY).amount === 4096,
+      "Executor resources should have 4096 memory")
+    assert(rprof.executorResources(ResourceProfile.OVERHEAD_MEM).amount === 2000,
+      "Executor resources should have 2000 overhead memory")
+    assert(rprof.executorResources(ResourceProfile.PYSPARK_MEM).amount === 500,
+      "Executor resources should have 512 pyspark memory")
+  }
+
+  test("Test TaskResourceRequest fractional") {
+    val rprof = new ResourceProfileBuilder()
+    val treqs = new TaskResourceRequests().resource("gpu", 0.33)
+    rprof.require(treqs)
+
+    assert(rprof.taskResources.size === 1, "Should have 1 task resource")
+    assert(rprof.taskResources.contains("gpu"), "Task resources should have gpu")
+    assert(rprof.taskResources.get("gpu").get.amount === 0.33,
+      "Task resources should have 0.33 gpu")
+
+    val fpgaReqs = new TaskResourceRequests().resource("fpga", 4.0)
+    rprof.require(fpgaReqs)
+
+    assert(rprof.taskResources.size === 2, "Should have 2 task resource")
+    assert(rprof.taskResources.contains("fpga"), "Task resources should have gpu")
+    assert(rprof.taskResources.get("fpga").get.amount === 4.0,
+      "Task resources should have 4.0 gpu")
+
+    var taskError = intercept[AssertionError] {
+      rprof.require(new TaskResourceRequests().resource("gpu", 1.5))
+    }.getMessage()
+    assert(taskError.contains("The resource amount 1.5 must be either <= 0.5, or a whole number."))
+
+    taskError = intercept[AssertionError] {
+      rprof.require(new TaskResourceRequests().resource("gpu", 0.7))
+    }.getMessage()
+    assert(taskError.contains("The resource amount 0.7 must be either <= 0.5, or a whole number."))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
index c2ecc96db906b..278a72a7192d8 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
@@ -19,14 +19,17 @@ package org.apache.spark.resource
 
 import java.io.File
 import java.nio.file.{Files => JavaFiles}
+import java.util.Optional
 
 import org.json4s.{DefaultFormats, Extraction}
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkException, SparkFunSuite}
 import org.apache.spark.TestUtils._
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
+import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.util.Utils
 
 class ResourceUtilsSuite extends SparkFunSuite
@@ -35,7 +38,7 @@ class ResourceUtilsSuite extends SparkFunSuite
   test("ResourceID") {
     val componentName = "spark.test"
     val resourceName = "p100"
-    val id = ResourceID(componentName, resourceName)
+    val id = new ResourceID(componentName, resourceName)
     val confPrefix = s"$componentName.resource.$resourceName."
     assert(id.confPrefix === confPrefix)
     assert(id.amountConf === s"${confPrefix}amount")
@@ -60,6 +63,20 @@ class ResourceUtilsSuite extends SparkFunSuite
     }
   }
 
+  test("Resource discoverer amount 0") {
+    val conf = new SparkConf
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val scriptPath = createTempScriptWithExpectedOutput(dir, "gpuDiscoverScript",
+        """{"name": "gpu"}""")
+      conf.set(EXECUTOR_GPU_ID.amountConf, "0")
+      conf.set(EXECUTOR_GPU_ID.discoveryScriptConf, scriptPath)
+
+      val res = getOrDiscoverAllResources(conf, SPARK_EXECUTOR_PREFIX, None)
+      assert(res.isEmpty)
+    }
+  }
+
   test("Resource discoverer multiple resource types") {
     val conf = new SparkConf
     assume(!(Utils.isWindows))
@@ -74,19 +91,26 @@ class ResourceUtilsSuite extends SparkFunSuite
       conf.set(EXECUTOR_FPGA_ID.amountConf, "2")
       conf.set(EXECUTOR_FPGA_ID.discoveryScriptConf, fpgaDiscovery)
 
+      // test one with amount 0 to make sure ignored
+      val fooDiscovery = createTempScriptWithExpectedOutput(dir, "fooDiscoverScript",
+        """{"name": "foo", "addresses": ["f1", "f2", "f3"]}""")
+      val fooId = new ResourceID(SPARK_EXECUTOR_PREFIX, "foo")
+      conf.set(fooId.amountConf, "0")
+      conf.set(fooId.discoveryScriptConf, fooDiscovery)
+
       val resources = getOrDiscoverAllResources(conf, SPARK_EXECUTOR_PREFIX, None)
       assert(resources.size === 2)
       val gpuValue = resources.get(GPU)
       assert(gpuValue.nonEmpty, "Should have a gpu entry")
       assert(gpuValue.get.name == "gpu", "name should be gpu")
       assert(gpuValue.get.addresses.size == 2, "Should have 2 indexes")
-      assert(gpuValue.get.addresses.deep == Array("0", "1").deep, "should have 0,1 entries")
+      assert(gpuValue.get.addresses.sameElements(Array("0", "1")), "should have 0,1 entries")
 
       val fpgaValue = resources.get(FPGA)
       assert(fpgaValue.nonEmpty, "Should have a gpu entry")
       assert(fpgaValue.get.name == "fpga", "name should be fpga")
       assert(fpgaValue.get.addresses.size == 3, "Should have 3 indexes")
-      assert(fpgaValue.get.addresses.deep == Array("f1", "f2", "f3").deep,
+      assert(fpgaValue.get.addresses.sameElements(Array("f1", "f2", "f3")),
         "should have f1,f2,f3 entries")
     }
   }
@@ -107,7 +131,8 @@ class ResourceUtilsSuite extends SparkFunSuite
       assert(resourcesFromFileOnly(FPGA) === expectedFpgaInfo)
 
       val gpuDiscovery = createTempScriptWithExpectedOutput(
-        dir, "gpuDiscoveryScript", """{"name": "gpu", "addresses": ["0", "1"]}""")
+        dir, "gpuDiscoveryScript",
+        """{"name": "gpu", "addresses": ["0", "1"]}""")
       conf.set(EXECUTOR_GPU_ID.amountConf, "2")
       conf.set(EXECUTOR_GPU_ID.discoveryScriptConf, gpuDiscovery)
       val resourcesFromBoth = getOrDiscoverAllResources(
@@ -118,6 +143,40 @@ class ResourceUtilsSuite extends SparkFunSuite
     }
   }
 
+  test("get from resources file and discover resource profile remaining") {
+    val conf = new SparkConf
+    val rpId = 1
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      implicit val formats = DefaultFormats
+      val fpgaAddrs = Seq("f1", "f2", "f3")
+      val fpgaAllocation = ResourceAllocation(EXECUTOR_FPGA_ID, fpgaAddrs)
+      val resourcesFile = createTempJsonFile(
+        dir, "resources", Extraction.decompose(Seq(fpgaAllocation)))
+      val resourcesFromFileOnly = getOrDiscoverAllResourcesForResourceProfile(
+        Some(resourcesFile),
+        SPARK_EXECUTOR_PREFIX,
+        ResourceProfile.getOrCreateDefaultProfile(conf),
+        conf)
+      val expectedFpgaInfo = new ResourceInformation(FPGA, fpgaAddrs.toArray)
+      assert(resourcesFromFileOnly(FPGA) === expectedFpgaInfo)
+
+      val gpuDiscovery = createTempScriptWithExpectedOutput(
+        dir, "gpuDiscoveryScript",
+        """{"name": "gpu", "addresses": ["0", "1"]}""")
+      val rpBuilder = new ResourceProfileBuilder()
+      val ereqs = new ExecutorResourceRequests().resource(GPU, 2, gpuDiscovery)
+      val treqs = new TaskResourceRequests().resource(GPU, 1)
+
+      val rp = rpBuilder.require(ereqs).require(treqs).build
+      val resourcesFromBoth = getOrDiscoverAllResourcesForResourceProfile(
+        Some(resourcesFile), SPARK_EXECUTOR_PREFIX, rp, conf)
+      val expectedGpuInfo = new ResourceInformation(GPU, Array("0", "1"))
+      assert(resourcesFromBoth(FPGA) === expectedFpgaInfo)
+      assert(resourcesFromBoth(GPU) === expectedGpuInfo)
+    }
+  }
+
   test("list resource ids") {
     val conf = new SparkConf
     conf.set(DRIVER_GPU_ID.amountConf, "2")
@@ -127,7 +186,7 @@ class ResourceUtilsSuite extends SparkFunSuite
 
     conf.set(DRIVER_FPGA_ID.amountConf, "2")
     val resourcesMap = listResourceIds(conf, SPARK_DRIVER_PREFIX)
-      .map{ rId => (rId.resourceName, 1)}.toMap
+      .map { rId => (rId.resourceName, 1) }.toMap
     assert(resourcesMap.size === 2, "should only have GPU for resource")
     assert(resourcesMap.get(GPU).nonEmpty, "should have GPU")
     assert(resourcesMap.get(FPGA).nonEmpty, "should have FPGA")
@@ -139,8 +198,8 @@ class ResourceUtilsSuite extends SparkFunSuite
     var request = parseResourceRequest(conf, DRIVER_GPU_ID)
     assert(request.id.resourceName === GPU, "should only have GPU for resource")
     assert(request.amount === 2, "GPU count should be 2")
-    assert(request.discoveryScript === None, "discovery script should be empty")
-    assert(request.vendor === None, "vendor should be empty")
+    assert(request.discoveryScript === Optional.empty(), "discovery script should be empty")
+    assert(request.vendor === Optional.empty(), "vendor should be empty")
 
     val vendor = "nvidia.com"
     val discoveryScript = "discoveryScriptGPU"
@@ -175,7 +234,7 @@ class ResourceUtilsSuite extends SparkFunSuite
       assert(gpuValue.nonEmpty, "Should have a gpu entry")
       assert(gpuValue.get.name == "gpu", "name should be gpu")
       assert(gpuValue.get.addresses.size == 2, "Should have 2 indexes")
-      assert(gpuValue.get.addresses.deep == Array("0", "1").deep, "should have 0,1 entries")
+      assert(gpuValue.get.addresses.sameElements(Array("0", "1")), "should have 0,1 entries")
     }
   }
 
@@ -186,14 +245,14 @@ class ResourceUtilsSuite extends SparkFunSuite
       val gpuDiscovery = createTempScriptWithExpectedOutput(dir, "gpuDiscoveryScript",
         """{"name": "fpga", "addresses": ["0", "1"]}""")
       val request =
-        ResourceRequest(
+        new ResourceRequest(
           DRIVER_GPU_ID,
           2,
-          Some(gpuDiscovery),
-          None)
+          Optional.of(gpuDiscovery),
+          Optional.empty[String])
 
       val error = intercept[SparkException] {
-        discoverResource(request)
+        discoverResource(conf, request)
       }.getMessage()
 
       assert(error.contains(s"Error running the resource discovery script $gpuDiscovery: " +
@@ -201,6 +260,28 @@ class ResourceUtilsSuite extends SparkFunSuite
     }
   }
 
+  test("Resource discoverer with invalid class") {
+    val conf = new SparkConf()
+      .set(RESOURCES_DISCOVERY_PLUGIN, Seq("someinvalidclass"))
+    assume(!(Utils.isWindows))
+    withTempDir { dir =>
+      val gpuDiscovery = createTempScriptWithExpectedOutput(dir, "gpuDiscoveryScript",
+        """{"name": "fpga", "addresses": ["0", "1"]}""")
+      val request =
+        new ResourceRequest(
+          DRIVER_GPU_ID,
+          2,
+          Optional.of(gpuDiscovery),
+          Optional.empty[String])
+
+      val error = intercept[ClassNotFoundException] {
+        discoverResource(conf, request)
+      }.getMessage()
+
+      assert(error.contains(s"someinvalidclass"))
+    }
+  }
+
   test("Resource discoverer script returns invalid format") {
     val conf = new SparkConf
     assume(!(Utils.isWindows))
@@ -209,14 +290,14 @@ class ResourceUtilsSuite extends SparkFunSuite
         """{"addresses": ["0", "1"]}""")
 
       val request =
-        ResourceRequest(
+        new ResourceRequest(
           EXECUTOR_GPU_ID,
           2,
-          Some(gpuDiscovery),
-          None)
+          Optional.of(gpuDiscovery),
+          Optional.empty[String])
 
       val error = intercept[SparkException] {
-        discoverResource(request)
+        discoverResource(conf, request)
       }.getMessage()
 
       assert(error.contains("Error parsing JSON into ResourceInformation"))
@@ -229,14 +310,14 @@ class ResourceUtilsSuite extends SparkFunSuite
       val file1 = new File(dir, "bogusfilepath")
       try {
         val request =
-          ResourceRequest(
+          new ResourceRequest(
             EXECUTOR_GPU_ID,
             2,
-            Some(file1.getPath()),
-            None)
+            Optional.of(file1.getPath()),
+            Optional.empty[String])
 
         val error = intercept[SparkException] {
-          discoverResource(request)
+          discoverResource(conf, request)
         }.getMessage()
 
         assert(error.contains("doesn't exist"))
@@ -247,10 +328,11 @@ class ResourceUtilsSuite extends SparkFunSuite
   }
 
   test("gpu's specified but not a discovery script") {
-    val request = ResourceRequest(EXECUTOR_GPU_ID, 2, None, None)
+    val request = new ResourceRequest(EXECUTOR_GPU_ID, 2, Optional.empty[String],
+      Optional.empty[String])
 
     val error = intercept[SparkException] {
-      discoverResource(request)
+      discoverResource(new SparkConf(), request)
     }.getMessage()
 
     assert(error.contains("User is expecting to use resource: gpu, but " +
diff --git a/core/src/test/scala/org/apache/spark/resource/TestResourceIDs.scala b/core/src/test/scala/org/apache/spark/resource/TestResourceIDs.scala
index c4509e93104d5..60246f5fad9a8 100644
--- a/core/src/test/scala/org/apache/spark/resource/TestResourceIDs.scala
+++ b/core/src/test/scala/org/apache/spark/resource/TestResourceIDs.scala
@@ -22,14 +22,14 @@ import org.apache.spark.internal.config.Worker.SPARK_WORKER_PREFIX
 import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
 
 object TestResourceIDs {
-  val DRIVER_GPU_ID = ResourceID(SPARK_DRIVER_PREFIX, GPU)
-  val EXECUTOR_GPU_ID = ResourceID(SPARK_EXECUTOR_PREFIX, GPU)
-  val TASK_GPU_ID = ResourceID(SPARK_TASK_PREFIX, GPU)
-  val WORKER_GPU_ID = ResourceID(SPARK_WORKER_PREFIX, GPU)
+  val DRIVER_GPU_ID = new ResourceID(SPARK_DRIVER_PREFIX, GPU)
+  val EXECUTOR_GPU_ID = new ResourceID(SPARK_EXECUTOR_PREFIX, GPU)
+  val TASK_GPU_ID = new ResourceID(SPARK_TASK_PREFIX, GPU)
+  val WORKER_GPU_ID = new ResourceID(SPARK_WORKER_PREFIX, GPU)
 
-  val DRIVER_FPGA_ID = ResourceID(SPARK_DRIVER_PREFIX, FPGA)
-  val EXECUTOR_FPGA_ID = ResourceID(SPARK_EXECUTOR_PREFIX, FPGA)
-  val TASK_FPGA_ID = ResourceID(SPARK_TASK_PREFIX, FPGA)
-  val WORKER_FPGA_ID = ResourceID(SPARK_WORKER_PREFIX, FPGA)
+  val DRIVER_FPGA_ID = new ResourceID(SPARK_DRIVER_PREFIX, FPGA)
+  val EXECUTOR_FPGA_ID = new ResourceID(SPARK_EXECUTOR_PREFIX, FPGA)
+  val TASK_FPGA_ID = new ResourceID(SPARK_TASK_PREFIX, FPGA)
+  val WORKER_FPGA_ID = new ResourceID(SPARK_WORKER_PREFIX, FPGA)
 
 }
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index 5bdf71be35b3b..c10f2c244e133 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -36,7 +36,6 @@ import org.scalatest.concurrent.Eventually._
 import org.apache.spark.{SecurityManager, SparkConf, SparkEnv, SparkException, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.config._
-import org.apache.spark.internal.config.Network
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
@@ -409,7 +408,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
       (0 until 10) foreach { _ =>
         new Thread {
-          override def run() {
+          override def run(): Unit = {
             (0 until 100) foreach { _ =>
               endpointRef.send("Hello")
             }
@@ -954,6 +953,40 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     verify(endpoint, never()).onDisconnected(any())
     verify(endpoint, never()).onNetworkError(any(), any())
   }
+
+  test("isolated endpoints") {
+    val latch = new CountDownLatch(1)
+    val singleThreadedEnv = createRpcEnv(
+      new SparkConf().set(Network.RPC_NETTY_DISPATCHER_NUM_THREADS, 1), "singleThread", 0)
+    try {
+      val blockingEndpoint = singleThreadedEnv.setupEndpoint("blocking", new IsolatedRpcEndpoint {
+        override val rpcEnv: RpcEnv = singleThreadedEnv
+
+        override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+          case m =>
+            latch.await()
+            context.reply(m)
+        }
+      })
+
+      val nonBlockingEndpoint = singleThreadedEnv.setupEndpoint("non-blocking", new RpcEndpoint {
+        override val rpcEnv: RpcEnv = singleThreadedEnv
+
+        override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+          case m => context.reply(m)
+        }
+      })
+
+      val to = new RpcTimeout(5.seconds, "test-timeout")
+      val blockingFuture = blockingEndpoint.ask[String]("hi", to)
+      assert(nonBlockingEndpoint.askSync[String]("hello", to) === "hello")
+      latch.countDown()
+      assert(ThreadUtils.awaitResult(blockingFuture, 5.seconds) === "hi")
+    } finally {
+      latch.countDown()
+      singleThreadedEnv.shutdown()
+    }
+  }
 }
 
 class UnserializableClass
diff --git a/core/src/test/scala/org/apache/spark/rpc/TestRpcEndpoint.scala b/core/src/test/scala/org/apache/spark/rpc/TestRpcEndpoint.scala
index 5e8da3e205ab0..7c65f3b126e3d 100644
--- a/core/src/test/scala/org/apache/spark/rpc/TestRpcEndpoint.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/TestRpcEndpoint.scala
@@ -20,6 +20,7 @@ package org.apache.spark.rpc
 import scala.collection.mutable.ArrayBuffer
 
 import org.scalactic.TripleEquals
+import org.scalatest.Assertions._
 
 class TestRpcEndpoint extends ThreadSafeRpcEndpoint with TripleEquals {
 
diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala
index e5539566e4b6f..c74c728b3e3f3 100644
--- a/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala
@@ -29,12 +29,9 @@ class InboxSuite extends SparkFunSuite {
 
   test("post") {
     val endpoint = new TestRpcEndpoint
-    val endpointRef = mock(classOf[NettyRpcEndpointRef])
-    when(endpointRef.name).thenReturn("hello")
-
     val dispatcher = mock(classOf[Dispatcher])
 
-    val inbox = new Inbox(endpointRef, endpoint)
+    val inbox = new Inbox("name", endpoint)
     val message = OneWayMessage(null, "hi")
     inbox.post(message)
     inbox.process(dispatcher)
@@ -51,10 +48,9 @@ class InboxSuite extends SparkFunSuite {
 
   test("post: with reply") {
     val endpoint = new TestRpcEndpoint
-    val endpointRef = mock(classOf[NettyRpcEndpointRef])
     val dispatcher = mock(classOf[Dispatcher])
 
-    val inbox = new Inbox(endpointRef, endpoint)
+    val inbox = new Inbox("name", endpoint)
     val message = RpcMessage(null, "hi", null)
     inbox.post(message)
     inbox.process(dispatcher)
@@ -65,13 +61,10 @@ class InboxSuite extends SparkFunSuite {
 
   test("post: multiple threads") {
     val endpoint = new TestRpcEndpoint
-    val endpointRef = mock(classOf[NettyRpcEndpointRef])
-    when(endpointRef.name).thenReturn("hello")
-
     val dispatcher = mock(classOf[Dispatcher])
 
     val numDroppedMessages = new AtomicInteger(0)
-    val inbox = new Inbox(endpointRef, endpoint) {
+    val inbox = new Inbox("name", endpoint) {
       override def onDrop(message: InboxMessage): Unit = {
         numDroppedMessages.incrementAndGet()
       }
@@ -107,12 +100,10 @@ class InboxSuite extends SparkFunSuite {
 
   test("post: Associated") {
     val endpoint = new TestRpcEndpoint
-    val endpointRef = mock(classOf[NettyRpcEndpointRef])
     val dispatcher = mock(classOf[Dispatcher])
-
     val remoteAddress = RpcAddress("localhost", 11111)
 
-    val inbox = new Inbox(endpointRef, endpoint)
+    val inbox = new Inbox("name", endpoint)
     inbox.post(RemoteProcessConnected(remoteAddress))
     inbox.process(dispatcher)
 
@@ -121,12 +112,11 @@ class InboxSuite extends SparkFunSuite {
 
   test("post: Disassociated") {
     val endpoint = new TestRpcEndpoint
-    val endpointRef = mock(classOf[NettyRpcEndpointRef])
     val dispatcher = mock(classOf[Dispatcher])
 
     val remoteAddress = RpcAddress("localhost", 11111)
 
-    val inbox = new Inbox(endpointRef, endpoint)
+    val inbox = new Inbox("name", endpoint)
     inbox.post(RemoteProcessDisconnected(remoteAddress))
     inbox.process(dispatcher)
 
@@ -135,13 +125,12 @@ class InboxSuite extends SparkFunSuite {
 
   test("post: AssociationError") {
     val endpoint = new TestRpcEndpoint
-    val endpointRef = mock(classOf[NettyRpcEndpointRef])
     val dispatcher = mock(classOf[Dispatcher])
 
     val remoteAddress = RpcAddress("localhost", 11111)
     val cause = new RuntimeException("Oops")
 
-    val inbox = new Inbox(endpointRef, endpoint)
+    val inbox = new Inbox("name", endpoint)
     inbox.post(RemoteProcessConnectionError(cause, remoteAddress))
     inbox.process(dispatcher)
 
diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
index 59b4b706bbcdd..378d433cf44f8 100644
--- a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.ExecutionException
 import scala.concurrent.duration._
 
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.network.client.TransportClient
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index 8d5f04ac7651a..fc8ac38479932 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -26,13 +26,18 @@ import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 
 class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
 
-  test("global sync by barrier() call") {
+  def initLocalClusterSparkContext(): Unit = {
     val conf = new SparkConf()
       // Init local cluster here so each barrier task runs in a separated process, thus `barrier()`
       // call is actually useful.
       .setMaster("local-cluster[4, 1, 1024]")
       .setAppName("test-cluster")
+      .set(TEST_NO_STAGE_RETRY, true)
     sc = new SparkContext(conf)
+  }
+
+  test("global sync by barrier() call") {
+    initLocalClusterSparkContext()
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -48,10 +53,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("support multiple barrier() call within a single task") {
-    val conf = new SparkConf()
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+    initLocalClusterSparkContext()
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -77,12 +79,8 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("throw exception on barrier() call timeout") {
-    val conf = new SparkConf()
-      .set("spark.barrier.sync.timeout", "1")
-      .set(TEST_NO_STAGE_RETRY, true)
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+    initLocalClusterSparkContext()
+    sc.conf.set("spark.barrier.sync.timeout", "1")
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -102,12 +100,8 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("throw exception if barrier() call doesn't happen on every task") {
-    val conf = new SparkConf()
-      .set("spark.barrier.sync.timeout", "1")
-      .set(TEST_NO_STAGE_RETRY, true)
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+    initLocalClusterSparkContext()
+    sc.conf.set("spark.barrier.sync.timeout", "1")
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -125,12 +119,8 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("throw exception if the number of barrier() calls are not the same on every task") {
-    val conf = new SparkConf()
-      .set("spark.barrier.sync.timeout", "1")
-      .set(TEST_NO_STAGE_RETRY, true)
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+    initLocalClusterSparkContext()
+    sc.conf.set("spark.barrier.sync.timeout", "1")
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -156,10 +146,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
     assert(error.contains("within 1 second(s)"))
   }
 
-
-  def testBarrierTaskKilled(sc: SparkContext, interruptOnCancel: Boolean): Unit = {
-    sc.setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, interruptOnCancel.toString)
-
+  def testBarrierTaskKilled(interruptOnKill: Boolean): Unit = {
     withTempDir { dir =>
       val killedFlagFile = "barrier.task.killed"
       val rdd = sc.makeRDD(Seq(0, 1), 2)
@@ -181,12 +168,15 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
 
       val listener = new SparkListener {
         override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
-          new Thread {
-            override def run: Unit = {
-              Thread.sleep(1000)
-              sc.killTaskAttempt(taskStart.taskInfo.taskId, interruptThread = false)
-            }
-          }.start()
+          val partitionId = taskStart.taskInfo.index
+          if (partitionId == 0) {
+            new Thread {
+              override def run: Unit = {
+                Thread.sleep(1000)
+                sc.killTaskAttempt(taskStart.taskInfo.taskId, interruptThread = interruptOnKill)
+              }
+            }.start()
+          }
         }
       }
       sc.addSparkListener(listener)
@@ -201,15 +191,13 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
-  test("barrier task killed") {
-    val conf = new SparkConf()
-      .set("spark.barrier.sync.timeout", "1")
-      .set(TEST_NO_STAGE_RETRY, true)
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+  test("barrier task killed, no interrupt") {
+    initLocalClusterSparkContext()
+    testBarrierTaskKilled(interruptOnKill = false)
+  }
 
-    testBarrierTaskKilled(sc, true)
-    testBarrierTaskKilled(sc, false)
+  test("barrier task killed, interrupt") {
+    initLocalClusterSparkContext()
+    testBarrierTaskKilled(interruptOnKill = true)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
index 0fe0e5b78233c..246d4b2f56ec9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.scheduler
 
-import scala.concurrent.duration._
-
 import org.apache.spark._
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Tests._
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
index 93a88cc30a20c..a1671a58f0d9b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
@@ -21,7 +21,7 @@ import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{never, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.internal.config
@@ -437,7 +437,7 @@ class BlacklistTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with M
   }
 
   test("check blacklist configuration invariants") {
-    val conf = new SparkConf().setMaster("yarn-cluster")
+    val conf = new SparkConf().setMaster("yarn").set(config.SUBMIT_DEPLOY_MODE, "cluster")
     Seq(
       (2, 2),
       (2, 3)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index 3edbbeb9c08f1..7666c6c7810cc 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.scheduler
 import java.util.Properties
 import java.util.concurrent.atomic.AtomicBoolean
 
-import scala.collection.immutable
 import scala.collection.mutable
 import scala.concurrent.duration._
 import scala.language.postfixOps
@@ -29,13 +28,13 @@ import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.when
 import org.mockito.invocation.InvocationOnMock
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar._
+import org.scalatestplus.mockito.MockitoSugar._
 
 import org.apache.spark._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 import org.apache.spark.rdd.RDD
-import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.resource.{ExecutorResourceRequests, ResourceInformation, ResourceProfile, TaskResourceRequests}
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv}
@@ -174,22 +173,24 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     sc.addSparkListener(listener)
 
     backend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor("1", mockEndpointRef, mockAddress.host, 1, logUrls, attributes, Map.empty))
+      RegisterExecutor("1", mockEndpointRef, mockAddress.host, 1, logUrls, attributes,
+        Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     backend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor("2", mockEndpointRef, mockAddress.host, 1, logUrls, attributes, Map.empty))
+      RegisterExecutor("2", mockEndpointRef, mockAddress.host, 1, logUrls, attributes,
+        Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     backend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor("3", mockEndpointRef, mockAddress.host, 1, logUrls, attributes, Map.empty))
+      RegisterExecutor("3", mockEndpointRef, mockAddress.host, 1, logUrls, attributes,
+        Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
 
     sc.listenerBus.waitUntilEmpty(executorUpTimeout.toMillis)
     assert(executorAddedCount === 3)
   }
 
   test("extra resources from executor") {
-    import TestUtils._
-
     val conf = new SparkConf()
-      .set(EXECUTOR_CORES, 3)
+      .set(EXECUTOR_CORES, 1)
       .set(SCHEDULER_REVIVE_INTERVAL.key, "1m") // don't let it auto revive during test
+      .set(EXECUTOR_INSTANCES, 0) // avoid errors about duplicate executor registrations
       .setMaster(
       "coarseclustermanager[org.apache.spark.scheduler.TestCoarseGrainedSchedulerBackend]")
       .setAppName("test")
@@ -197,6 +198,11 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     conf.set(EXECUTOR_GPU_ID.amountConf, "1")
 
     sc = new SparkContext(conf)
+    val execGpu = new ExecutorResourceRequests().cores(1).resource(GPU, 3)
+    val taskGpu = new TaskResourceRequests().cpus(1).resource(GPU, 1)
+    val rp = new ResourceProfile(execGpu.requests, taskGpu.requests)
+    sc.resourceProfileManager.addResourceProfile(rp)
+    assert(rp.id > ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     val backend = sc.schedulerBackend.asInstanceOf[TestCoarseGrainedSchedulerBackend]
     val mockEndpointRef = mock[RpcEndpointRef]
     val mockAddress = mock[RpcAddress]
@@ -214,20 +220,25 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     sc.addSparkListener(listener)
 
     backend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor("1", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources))
+      RegisterExecutor("1", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources,
+        ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     backend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor("2", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources))
+      RegisterExecutor("2", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources,
+        ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     backend.driverEndpoint.askSync[Boolean](
-      RegisterExecutor("3", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources))
+      RegisterExecutor("3", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources,
+        rp.id))
 
     val frameSize = RpcUtils.maxMessageSizeBytes(sc.conf)
     val bytebuffer = java.nio.ByteBuffer.allocate(frameSize - 100)
     val buffer = new SerializableBuffer(bytebuffer)
 
     var execResources = backend.getExecutorAvailableResources("1")
-
     assert(execResources(GPU).availableAddrs.sorted === Array("0", "1", "3"))
 
+    var exec3ResourceProfileId = backend.getExecutorResourceProfileId("3")
+    assert(exec3ResourceProfileId === rp.id)
+
     val taskResources = Map(GPU -> new ResourceInformation(GPU, Array("0")))
     var taskDescs: Seq[Seq[TaskDescription]] = Seq(Seq(new TaskDescription(1, 0, "1",
       "t1", 0, 1, mutable.Map.empty[String, Long], mutable.Map.empty[String, Long],
@@ -283,6 +294,7 @@ private class CSMockExternalClusterManager extends ExternalClusterManager {
     when(ts.applicationAttemptId()).thenReturn(Some("attempt1"))
     when(ts.schedulingMode).thenReturn(SchedulingMode.FIFO)
     when(ts.nodeBlacklist()).thenReturn(Set.empty[String])
+    when(ts.resourcesReqsPerTask).thenReturn(Seq.empty)
     ts
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala b/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala
index 1be2e2a067115..46e5e6f97b1f1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala
@@ -111,7 +111,7 @@ class CustomShuffledRDD[K, V, C](
       .asInstanceOf[Iterator[(K, C)]]
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     dependency = null
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 2b3423f9a4d40..e40b63fe13cb1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -19,13 +19,14 @@ package org.apache.spark.scheduler
 
 import java.util.Properties
 import java.util.concurrent.{CountDownLatch, TimeUnit}
-import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong, AtomicReference}
 
 import scala.annotation.meta.param
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
 import scala.util.control.NonFatal
 
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
+import org.scalatest.exceptions.TestFailedException
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
@@ -36,7 +37,7 @@ import org.apache.spark.rdd.{DeterministicLevel, RDD}
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.shuffle.{FetchFailedException, MetadataFetchFailedException}
 import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster}
-import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, LongAccumulator, Utils}
+import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, LongAccumulator, ThreadUtils, Utils}
 
 class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler)
   extends DAGSchedulerEventProcessLoop(dagScheduler) {
@@ -150,7 +151,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSet.tasks.foreach(_.epoch = mapOutputTracker.getEpoch)
       taskSets += taskSet
     }
-    override def cancelTasks(stageId: Int, interruptThread: Boolean) {
+    override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = {
       cancelledStages += stageId
     }
     override def killTaskAttempt(
@@ -166,39 +167,72 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     }
     override def setDAGScheduler(dagScheduler: DAGScheduler) = {}
     override def defaultParallelism() = 2
+    override def executorDecommission(executorId: String) = {}
     override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {}
     override def workerRemoved(workerId: String, host: String, message: String): Unit = {}
     override def applicationAttemptId(): Option[String] = None
   }
 
-  /** Length of time to wait while draining listener events. */
-  val WAIT_TIMEOUT_MILLIS = 10000
-
-  val submittedStageInfos = new HashSet[StageInfo]
-  val successfulStages = new HashSet[Int]
-  val failedStages = new ArrayBuffer[Int]
-  val stageByOrderOfExecution = new ArrayBuffer[Int]
-  val endedTasks = new HashSet[Long]
-  val sparkListener = new SparkListener() {
-    override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) {
-      submittedStageInfos += stageSubmitted.stageInfo
+  /**
+   * Listeners which records some information to verify in UTs. Getter-kind methods in this class
+   * ensures the value is returned after ensuring there's no event to process, as well as the
+   * value is immutable: prevent showing odd result by race condition.
+   */
+  class EventInfoRecordingListener extends SparkListener {
+    private val _submittedStageInfos = new HashSet[StageInfo]
+    private val _successfulStages = new HashSet[Int]
+    private val _failedStages = new ArrayBuffer[Int]
+    private val _stageByOrderOfExecution = new ArrayBuffer[Int]
+    private val _endedTasks = new HashSet[Long]
+
+    override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+      _submittedStageInfos += stageSubmitted.stageInfo
     }
 
-    override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) {
+    override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
       val stageInfo = stageCompleted.stageInfo
-      stageByOrderOfExecution += stageInfo.stageId
+      _stageByOrderOfExecution += stageInfo.stageId
       if (stageInfo.failureReason.isEmpty) {
-        successfulStages += stageInfo.stageId
+        _successfulStages += stageInfo.stageId
       } else {
-        failedStages += stageInfo.stageId
+        _failedStages += stageInfo.stageId
       }
     }
 
     override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
-      endedTasks += taskEnd.taskInfo.taskId
+      _endedTasks += taskEnd.taskInfo.taskId
+    }
+
+    def submittedStageInfos: Set[StageInfo] = {
+      waitForListeners()
+      _submittedStageInfos.toSet
+    }
+
+    def successfulStages: Set[Int] = {
+      waitForListeners()
+      _successfulStages.toSet
     }
+
+    def failedStages: List[Int] = {
+      waitForListeners()
+      _failedStages.toList
+    }
+
+    def stageByOrderOfExecution: List[Int] = {
+      waitForListeners()
+      _stageByOrderOfExecution.toList
+    }
+
+    def endedTasks: Set[Long] = {
+      waitForListeners()
+      _endedTasks.toSet
+    }
+
+    private def waitForListeners(): Unit = sc.listenerBus.waitUntilEmpty()
   }
 
+  var sparkListener: EventInfoRecordingListener = null
+
   var mapOutputTracker: MapOutputTrackerMaster = null
   var broadcastManager: BroadcastManager = null
   var securityMgr: SecurityManager = null
@@ -212,14 +246,14 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
    */
   val cacheLocations = new HashMap[(Int, Int), Seq[BlockManagerId]]
   // stub out BlockManagerMaster.getLocations to use our cacheLocations
-  val blockManagerMaster = new BlockManagerMaster(null, conf, true) {
+  val blockManagerMaster = new BlockManagerMaster(null, null, conf, true) {
       override def getLocations(blockIds: Array[BlockId]): IndexedSeq[Seq[BlockManagerId]] = {
         blockIds.map {
           _.asRDDId.map(id => (id.rddId -> id.splitIndex)).flatMap(key => cacheLocations.get(key)).
             getOrElse(Seq())
         }.toIndexedSeq
       }
-      override def removeExecutor(execId: String) {
+      override def removeExecutor(execId: String): Unit = {
         // don't need to propagate to the driver, which we don't have
       }
     }
@@ -247,10 +281,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
   private def init(testConf: SparkConf): Unit = {
     sc = new SparkContext("local[2]", "DAGSchedulerSuite", testConf)
-    submittedStageInfos.clear()
-    successfulStages.clear()
-    failedStages.clear()
-    endedTasks.clear()
+    sparkListener = new EventInfoRecordingListener
     failure = null
     sc.addSparkListener(sparkListener)
     taskSets.clear()
@@ -286,7 +317,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     }
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     super.afterAll()
   }
 
@@ -303,7 +334,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
    * After processing the event, submit waiting stages as is done on most iterations of the
    * DAGScheduler event loop.
    */
-  private def runEvent(event: DAGSchedulerEvent) {
+  private def runEvent(event: DAGSchedulerEvent): Unit = {
     dagEventProcessLoopTester.post(event)
   }
 
@@ -316,7 +347,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     it.next.asInstanceOf[Tuple2[_, _]]._1
 
   /** Send the given CompletionEvent messages for the tasks in the TaskSet. */
-  private def complete(taskSet: TaskSet, results: Seq[(TaskEndReason, Any)]) {
+  private def complete(taskSet: TaskSet, results: Seq[(TaskEndReason, Any)]): Unit = {
     assert(taskSet.tasks.size >= results.size)
     for ((result, i) <- results.zipWithIndex) {
       if (i < taskSet.tasks.size) {
@@ -328,7 +359,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   private def completeWithAccumulator(
       accumId: Long,
       taskSet: TaskSet,
-      results: Seq[(TaskEndReason, Any)]) {
+      results: Seq[(TaskEndReason, Any)]): Unit = {
     assert(taskSet.tasks.size >= results.size)
     for ((result, i) <- results.zipWithIndex) {
       if (i < taskSet.tasks.size) {
@@ -363,19 +394,18 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   /** Sends TaskSetFailed to the scheduler. */
-  private def failed(taskSet: TaskSet, message: String) {
+  private def failed(taskSet: TaskSet, message: String): Unit = {
     runEvent(TaskSetFailed(taskSet, message, None))
   }
 
   /** Sends JobCancelled to the DAG scheduler. */
-  private def cancel(jobId: Int) {
+  private def cancel(jobId: Int): Unit = {
     runEvent(JobCancelled(jobId, None))
   }
 
   test("[SPARK-3353] parent stage should have lower stage id") {
-    stageByOrderOfExecution.clear()
     sc.parallelize(1 to 10).map(x => (x, x)).reduceByKey(_ + _, 4).count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    val stageByOrderOfExecution = sparkListener.stageByOrderOfExecution
     assert(stageByOrderOfExecution.length === 2)
     assert(stageByOrderOfExecution(0) < stageByOrderOfExecution(1))
   }
@@ -455,18 +485,22 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // map stage1 completes successfully, with one task on each executor
     complete(taskSets(0), Seq(
       (Success,
-        MapStatus(BlockManagerId("exec-hostA1", "hostA", 12345), Array.fill[Long](1)(2))),
+        MapStatus(
+          BlockManagerId("exec-hostA1", "hostA", 12345), Array.fill[Long](1)(2), mapTaskId = 5)),
       (Success,
-        MapStatus(BlockManagerId("exec-hostA2", "hostA", 12345), Array.fill[Long](1)(2))),
-      (Success, makeMapStatus("hostB", 1))
+        MapStatus(
+          BlockManagerId("exec-hostA2", "hostA", 12345), Array.fill[Long](1)(2), mapTaskId = 6)),
+      (Success, makeMapStatus("hostB", 1, mapTaskId = 7))
     ))
     // map stage2 completes successfully, with one task on each executor
     complete(taskSets(1), Seq(
       (Success,
-        MapStatus(BlockManagerId("exec-hostA1", "hostA", 12345), Array.fill[Long](1)(2))),
+        MapStatus(
+          BlockManagerId("exec-hostA1", "hostA", 12345), Array.fill[Long](1)(2), mapTaskId = 8)),
       (Success,
-        MapStatus(BlockManagerId("exec-hostA2", "hostA", 12345), Array.fill[Long](1)(2))),
-      (Success, makeMapStatus("hostB", 1))
+        MapStatus(
+          BlockManagerId("exec-hostA2", "hostA", 12345), Array.fill[Long](1)(2), mapTaskId = 9)),
+      (Success, makeMapStatus("hostB", 1, mapTaskId = 10))
     ))
     // make sure our test setup is correct
     val initialMapStatus1 = mapOutputTracker.shuffleStatuses(firstShuffleId).mapStatuses
@@ -474,16 +508,19 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(initialMapStatus1.count(_ != null) === 3)
     assert(initialMapStatus1.map{_.location.executorId}.toSet ===
       Set("exec-hostA1", "exec-hostA2", "exec-hostB"))
+    assert(initialMapStatus1.map{_.mapId}.toSet === Set(5, 6, 7))
 
     val initialMapStatus2 = mapOutputTracker.shuffleStatuses(secondShuffleId).mapStatuses
     //  val initialMapStatus1 = mapOutputTracker.mapStatuses.get(0).get
     assert(initialMapStatus2.count(_ != null) === 3)
     assert(initialMapStatus2.map{_.location.executorId}.toSet ===
       Set("exec-hostA1", "exec-hostA2", "exec-hostB"))
+    assert(initialMapStatus2.map{_.mapId}.toSet === Set(8, 9, 10))
 
     // reduce stage fails with a fetch failure from one host
     complete(taskSets(2), Seq(
-      (FetchFailed(BlockManagerId("exec-hostA2", "hostA", 12345), firstShuffleId, 0, 0, "ignored"),
+      (FetchFailed(BlockManagerId("exec-hostA2", "hostA", 12345),
+        firstShuffleId, 0L, 0, 0, "ignored"),
         null)
     ))
 
@@ -618,9 +655,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     submit(unserializableRdd, Array(0))
     assert(failure.getMessage.startsWith(
       "Job aborted due to stage failure: Task not serializable:"))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(0))
-    assert(failedStages.size === 1)
+    assert(sparkListener.failedStages === Seq(0))
     assertDataStructuresEmpty()
   }
 
@@ -628,9 +663,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     submit(new MyRDD(sc, 1, Nil), Array(0))
     failed(taskSets(0), "some failure")
     assert(failure.getMessage === "Job aborted due to stage failure: some failure")
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(0))
-    assert(failedStages.size === 1)
+    assert(sparkListener.failedStages === Seq(0))
     assertDataStructuresEmpty()
   }
 
@@ -639,9 +672,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     val jobId = submit(rdd, Array(0))
     cancel(jobId)
     assert(failure.getMessage === s"Job $jobId cancelled ")
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(0))
-    assert(failedStages.size === 1)
+    assert(sparkListener.failedStages === Seq(0))
     assertDataStructuresEmpty()
   }
 
@@ -656,7 +687,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       override def submitTasks(taskSet: TaskSet): Unit = {
         taskSets += taskSet
       }
-      override def cancelTasks(stageId: Int, interruptThread: Boolean) {
+      override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = {
         throw new UnsupportedOperationException
       }
       override def killTaskAttempt(
@@ -677,6 +708,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
           accumUpdates: Array[(Long, Seq[AccumulatorV2[_, _]])],
           blockManagerId: BlockManagerId,
           executorUpdates: Map[(Int, Int), ExecutorMetrics]): Boolean = true
+      override def executorDecommission(executorId: String): Unit = {}
       override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {}
       override def workerRemoved(workerId: String, host: String, message: String): Unit = {}
       override def applicationAttemptId(): Option[String] = None
@@ -699,9 +731,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(results === Map(0 -> 42))
     assertDataStructuresEmpty()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.isEmpty)
-    assert(successfulStages.contains(0))
+    assert(sparkListener.failedStages.isEmpty)
+    assert(sparkListener.successfulStages.contains(0))
   }
 
   test("run trivial shuffle") {
@@ -732,7 +763,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // the 2nd ResultTask failed
     complete(taskSets(1), Seq(
       (Success, 42),
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"), null)))
     // this will get called
     // blockManagerMaster.removeExecutor("exec-hostA")
     // ask the scheduler to try it again
@@ -788,14 +819,49 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     }
   }
 
+  test("SPARK-28967 properties must be cloned before posting to listener bus for 0 partition") {
+    val properties = new Properties()
+    val func = (context: TaskContext, it: Iterator[(_)]) => 1
+    val resultHandler = (taskIndex: Int, result: Int) => {}
+    val assertionError = new AtomicReference[TestFailedException](
+      new TestFailedException("Listener didn't receive expected JobStart event", 0))
+    val listener = new SparkListener() {
+      override def onJobStart(event: SparkListenerJobStart): Unit = {
+        try {
+          // spark.job.description can be implicitly set for 0 partition jobs.
+          // So event.properties and properties can be different. See SPARK-29997.
+          event.properties.remove(SparkContext.SPARK_JOB_DESCRIPTION)
+          properties.remove(SparkContext.SPARK_JOB_DESCRIPTION)
+
+          assert(event.properties.equals(properties), "Expected same content of properties, " +
+            s"but got properties with different content. props in caller ${properties} /" +
+            s" props in event ${event.properties}")
+          assert(event.properties.ne(properties), "Expected instance with different identity, " +
+            "but got same instance.")
+          assertionError.set(null)
+        } catch {
+          case e: TestFailedException => assertionError.set(e)
+        }
+      }
+    }
+    sc.addSparkListener(listener)
+
+    // 0 partition
+    val testRdd = new MyRDD(sc, 0, Nil)
+    val waiter = scheduler.submitJob(testRdd, func, Seq.empty, CallSite.empty,
+      resultHandler, properties)
+    sc.listenerBus.waitUntilEmpty()
+    assert(assertionError.get() === null)
+  }
+
   // Helper function to validate state when creating tests for task failures
-  private def checkStageId(stageId: Int, attempt: Int, stageAttempt: TaskSet) {
+  private def checkStageId(stageId: Int, attempt: Int, stageAttempt: TaskSet): Unit = {
     assert(stageAttempt.stageId === stageId)
     assert(stageAttempt.stageAttemptId == attempt)
   }
 
   // Helper functions to extract commonly used code in Fetch Failure test cases
-  private def setupStageAbortTest(sc: SparkContext) {
+  private def setupStageAbortTest(sc: SparkContext): Unit = {
     sc.listenerBus.addToSharedQueue(new EndListener())
     ended = false
     jobResult = null
@@ -849,7 +915,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     val stageAttempt = taskSets.last
     checkStageId(stageId, attemptIdx, stageAttempt)
     complete(stageAttempt, stageAttempt.tasks.zipWithIndex.map { case (task, idx) =>
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleDep.shuffleId, 0, idx, "ignored"), null)
+      (FetchFailed(makeBlockManagerId("hostA"), shuffleDep.shuffleId, 0L, 0, idx, "ignored"), null)
     }.toSeq)
   }
 
@@ -902,7 +968,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     completeNextResultStageWithSuccess(1, 1)
 
     // Confirm job finished successfully
-    sc.listenerBus.waitUntilEmpty(1000)
+    sc.listenerBus.waitUntilEmpty()
     assert(ended)
     assert(results === (0 until parts).map { idx => idx -> 42 }.toMap)
     assertDataStructuresEmpty()
@@ -939,7 +1005,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       } else {
         // Stage should have been aborted and removed from running stages
         assertDataStructuresEmpty()
-        sc.listenerBus.waitUntilEmpty(1000)
+        sc.listenerBus.waitUntilEmpty()
         assert(ended)
         jobResult match {
           case JobFailed(reason) =>
@@ -1061,7 +1127,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     completeNextResultStageWithSuccess(2, 1)
 
     assertDataStructuresEmpty()
-    sc.listenerBus.waitUntilEmpty(1000)
+    sc.listenerBus.waitUntilEmpty()
     assert(ended)
     assert(results === Map(0 -> 42))
   }
@@ -1082,19 +1148,17 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The first result task fails, with a fetch failure for the output from the first mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(1))
+    assert(sparkListener.failedStages.contains(1))
 
     // The second ResultTask fails, with a fetch failure for the output from the second mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 1, 1, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 1L, 1, 1, "ignored"),
       null))
     // The SparkListener should not receive redundant failure events.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.size == 1)
+    assert(sparkListener.failedStages.size === 1)
   }
 
   test("Retry all the tasks on a resubmitted attempt of a barrier stage caused by FetchFailure") {
@@ -1111,7 +1175,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The first result task fails, with a fetch failure for the output from the first mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
     assert(mapOutputTracker.findMissingPartitions(shuffleId) === Some(Seq(0, 1)))
 
@@ -1122,7 +1186,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // Complete the result stage.
     completeNextResultStageWithSuccess(1, 1)
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     assertDataStructuresEmpty()
   }
 
@@ -1141,7 +1205,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(1),
       TaskKilled("test"),
       null))
-    assert(failedStages === Seq(0))
+    assert(sparkListener.failedStages === Seq(0))
     assert(mapOutputTracker.findMissingPartitions(shuffleId) === Some(Seq(0, 1)))
 
     scheduler.resubmitFailedStages()
@@ -1151,7 +1215,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // Complete the result stage.
     completeNextResultStageWithSuccess(1, 0)
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     assertDataStructuresEmpty()
   }
 
@@ -1177,7 +1241,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       null))
 
     // Assert the stage has been cancelled.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     assert(failure.getMessage.startsWith("Job aborted due to stage failure: Could not recover " +
       "from a failed barrier ResultStage."))
   }
@@ -1195,11 +1259,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     val mapStageId = 0
     def countSubmittedMapStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == mapStageId)
+      sparkListener.submittedStageInfos.count(_.stageId == mapStageId)
     }
 
     // The map stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 1)
 
     complete(taskSets(0), Seq(
@@ -1214,14 +1277,12 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The first result task fails, with a fetch failure for the output from the first mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(1))
+    assert(sparkListener.failedStages.contains(1))
 
     // Trigger resubmission of the failed map stage.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
 
     // Another attempt for the map stage should have been submitted, resulting in 2 total attempts.
     assert(countSubmittedMapStageAttempts() === 2)
@@ -1229,7 +1290,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The second ResultTask fails, with a fetch failure for the output from the second mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(1),
-      FetchFailed(makeBlockManagerId("hostB"), shuffleId, 1, 1, "ignored"),
+      FetchFailed(makeBlockManagerId("hostB"), shuffleId, 1L, 1, 1, "ignored"),
       null))
 
     // Another ResubmitFailedStages event should not result in another attempt for the map
@@ -1238,7 +1299,6 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // shouldn't effect anything -- our calling it just makes *SURE* it gets called between the
     // desired event and our check.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 2)
 
   }
@@ -1256,14 +1316,13 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     submit(reduceRdd, Array(0, 1))
 
     def countSubmittedReduceStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == 1)
+      sparkListener.submittedStageInfos.count(_.stageId == 1)
     }
     def countSubmittedMapStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == 0)
+      sparkListener.submittedStageInfos.count(_.stageId == 0)
     }
 
     // The map stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 1)
 
     // Complete the map stage.
@@ -1272,13 +1331,12 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       (Success, makeMapStatus("hostB", 2))))
 
     // The reduce stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedReduceStageAttempts() === 1)
 
     // The first result task fails, with a fetch failure for the output from the first mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
 
     // Trigger resubmission of the failed map stage and finish the re-started map task.
@@ -1287,14 +1345,13 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // Because the map stage finished, another attempt for the reduce stage should have been
     // submitted, resulting in 2 total attempts for each the map and the reduce stage.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 2)
     assert(countSubmittedReduceStageAttempts() === 2)
 
     // A late FetchFailed arrives from the second task in the original reduce stage.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(1),
-      FetchFailed(makeBlockManagerId("hostB"), shuffleId, 1, 1, "ignored"),
+      FetchFailed(makeBlockManagerId("hostB"), shuffleId, 1L, 1, 1, "ignored"),
       null))
 
     // Running ResubmitFailedStages shouldn't result in any more attempts for the map stage, because
@@ -1317,10 +1374,9 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(1), Success, 42,
       Seq.empty, Array.empty, createFakeTaskInfoWithId(1)))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     // verify stage exists
     assert(scheduler.stageIdToStage.contains(0))
-    assert(endedTasks.size == 2)
+    assert(sparkListener.endedTasks.size === 2)
 
     // finish other 2 tasks
     runEvent(makeCompletionEvent(
@@ -1329,8 +1385,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(3), Success, 42,
       Seq.empty, Array.empty, createFakeTaskInfoWithId(3)))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(endedTasks.size == 4)
+    assert(sparkListener.endedTasks.size === 4)
 
     // verify the stage is done
     assert(!scheduler.stageIdToStage.contains(0))
@@ -1340,15 +1395,13 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(3), Success, 42,
       Seq.empty, Array.empty, createFakeTaskInfoWithId(5)))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(endedTasks.size == 5)
+    assert(sparkListener.endedTasks.size === 5)
 
     // make sure non successful tasks also send out event
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(3), UnknownReason, 42,
       Seq.empty, Array.empty, createFakeTaskInfoWithId(6)))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(endedTasks.size == 6)
+    assert(sparkListener.endedTasks.size === 6)
   }
 
   test("ignore late map task completions") {
@@ -1421,8 +1474,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // Listener bus should get told about the map stage failing, but not the reduce stage
     // (since the reduce stage hasn't been started yet).
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.toSet === Set(0))
+    assert(sparkListener.failedStages.toSet === Set(0))
 
     assertDataStructuresEmpty()
   }
@@ -1494,7 +1546,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     runEvent(ExecutorLost("exec-hostA", ExecutorKilled))
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(null, firstShuffleId, 2, 0, "Fetch failed"),
+      FetchFailed(null, firstShuffleId, 2L, 2, 0, "Fetch failed"),
       null))
 
     // so we resubmit stage 0, which completes happily
@@ -1650,7 +1702,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // listener for all jobs, and here we want to capture the failure for each job separately.
     class FailureRecordingJobListener() extends JobListener {
       var failureMessage: String = _
-      override def taskSucceeded(index: Int, result: Any) {}
+      override def taskSucceeded(index: Int, result: Any): Unit = {}
       override def jobFailed(exception: Exception): Unit = { failureMessage = exception.getMessage }
     }
     val listener1 = new FailureRecordingJobListener()
@@ -1665,9 +1717,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(cancelledStages.toSet === Set(0, 2))
 
     // Make sure the listeners got told about both failed stages.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(successfulStages.isEmpty)
-    assert(failedStages.toSet === Set(0, 2))
+    assert(sparkListener.successfulStages.isEmpty)
+    assert(sparkListener.failedStages.toSet === Set(0, 2))
 
     assert(listener1.failureMessage === s"Job aborted due to stage failure: $stageFailureMessage")
     assert(listener2.failureMessage === s"Job aborted due to stage failure: $stageFailureMessage")
@@ -1754,7 +1805,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // lets say there is a fetch failure in this task set, which makes us go back and
     // run stage 0, attempt 1
     complete(taskSets(1), Seq(
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleDep1.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"),
+        shuffleDep1.shuffleId, 0L, 0, 0, "ignored"), null)))
     scheduler.resubmitFailedStages()
 
     // stage 0, attempt 1 should have the properties of job2
@@ -1835,7 +1887,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       (Success, makeMapStatus("hostC", 1))))
     // fail the third stage because hostA went down
     complete(taskSets(2), Seq(
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"),
+        shuffleDepTwo.shuffleId, 0L, 0, 0, "ignored"), null)))
     // TODO assert this:
     // blockManagerMaster.removeExecutor("exec-hostA")
     // have DAGScheduler try again
@@ -1866,7 +1919,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       (Success, makeMapStatus("hostB", 1))))
     // pretend stage 2 failed because hostA went down
     complete(taskSets(2), Seq(
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"),
+        shuffleDepTwo.shuffleId, 0L, 0, 0, "ignored"), null)))
     // TODO assert this:
     // blockManagerMaster.removeExecutor("exec-hostA")
     // DAGScheduler should notice the cached copy of the second shuffle and try to get it rerun.
@@ -2227,7 +2281,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     submit(reduceRdd, Array(0, 1))
     complete(taskSets(1), Seq(
       (Success, 42),
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"), null)))
     // Ask the scheduler to try it again; TaskSet 2 will rerun the map task that we couldn't fetch
     // from, then TaskSet 3 will run the reduce stage
     scheduler.resubmitFailedStages()
@@ -2286,7 +2340,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(taskSets(1).stageId === 1)
     complete(taskSets(1), Seq(
       (Success, makeMapStatus("hostA", rdd2.partitions.length)),
-      (FetchFailed(makeBlockManagerId("hostA"), dep1.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"), dep1.shuffleId, 0L, 0, 0, "ignored"), null)))
     scheduler.resubmitFailedStages()
     assert(listener2.results.size === 0)    // Second stage listener should not have a result yet
 
@@ -2312,7 +2366,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(taskSets(4).stageId === 2)
     complete(taskSets(4), Seq(
       (Success, 52),
-      (FetchFailed(makeBlockManagerId("hostD"), dep2.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostD"), dep2.shuffleId, 0L, 0, 0, "ignored"), null)))
     scheduler.resubmitFailedStages()
 
     // TaskSet 5 will rerun stage 1's lost task, then TaskSet 6 will rerun stage 2
@@ -2350,7 +2404,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(taskSets(1).stageId === 1)
     complete(taskSets(1), Seq(
       (Success, makeMapStatus("hostC", rdd2.partitions.length)),
-      (FetchFailed(makeBlockManagerId("hostA"), dep1.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"), dep1.shuffleId, 0L, 0, 0, "ignored"), null)))
     scheduler.resubmitFailedStages()
     // Stage1 listener should not have a result yet
     assert(listener2.results.size === 0)
@@ -2485,7 +2539,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       rdd1.map {
         case (x, _) if (x == 1) =>
           throw new FetchFailedException(
-            BlockManagerId("1", "1", 1), shuffleHandle.shuffleId, 0, 0, "test")
+            BlockManagerId("1", "1", 1), shuffleHandle.shuffleId, 0L, 0, 0, "test")
         case (x, _) => x
       }.count()
     }
@@ -2498,7 +2552,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       rdd1.map {
         case (x, _) if (x == 1) && FailThisAttempt._fail.getAndSet(false) =>
           throw new FetchFailedException(
-            BlockManagerId("1", "1", 1), shuffleHandle.shuffleId, 0, 0, "test")
+            BlockManagerId("1", "1", 1), shuffleHandle.shuffleId, 0L, 0, 0, "test")
       }
     }
 
@@ -2552,7 +2606,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(taskSets(1).stageId === 1 && taskSets(1).stageAttemptId === 0)
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleIdA, 0, 0,
+      FetchFailed(makeBlockManagerId("hostA"), shuffleIdA, 0L, 0, 0,
         "Fetch failure of task: stageId=1, stageAttempt=0, partitionId=0"),
       result = null))
 
@@ -2628,7 +2682,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       sc.parallelize(1 to tasks, tasks).foreach { _ =>
         accum.add(1L)
       }
-      sc.listenerBus.waitUntilEmpty(1000)
+      sc.listenerBus.waitUntilEmpty()
       assert(foundCount.get() === tasks)
     }
   }
@@ -2641,11 +2695,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     val mapStageId = 0
     def countSubmittedMapStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == mapStageId)
+      sparkListener.submittedStageInfos.count(_.stageId == mapStageId)
     }
 
     // The map stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 1)
 
     // The first map task fails with TaskKilled.
@@ -2653,7 +2706,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(0),
       TaskKilled("test"),
       null))
-    assert(failedStages === Seq(0))
+    assert(sparkListener.failedStages === Seq(0))
 
     // The second map task fails with TaskKilled.
     runEvent(makeCompletionEvent(
@@ -2663,7 +2716,6 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // Trigger resubmission of the failed map stage.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
 
     // Another attempt for the map stage should have been submitted, resulting in 2 total attempts.
     assert(countSubmittedMapStageAttempts() === 2)
@@ -2677,11 +2729,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     val mapStageId = 0
     def countSubmittedMapStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == mapStageId)
+      sparkListener.submittedStageInfos.count(_.stageId == mapStageId)
     }
 
     // The map stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 1)
 
     // The first map task fails with TaskKilled.
@@ -2689,11 +2740,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(0),
       TaskKilled("test"),
       null))
-    assert(failedStages === Seq(0))
+    assert(sparkListener.failedStages === Seq(0))
 
     // Trigger resubmission of the failed map stage.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
 
     // Another attempt for the map stage should have been submitted, resulting in 2 total attempts.
     assert(countSubmittedMapStageAttempts() === 2)
@@ -2706,11 +2756,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // The second map task failure doesn't trigger stage retry.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 2)
   }
 
-  test("SPARK-23207: retry all the succeeding stages when the map stage is indeterminate") {
+  private def constructIndeterminateStageFetchFailed(): (Int, Int) = {
     val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = true)
 
     val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(2))
@@ -2738,14 +2787,152 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The first task of the final stage failed with fetch failure
     runEvent(makeCompletionEvent(
       taskSets(2).tasks(0),
-      FetchFailed(makeBlockManagerId("hostC"), shuffleId2, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostC"), shuffleId2, 0L, 0, 0, "ignored"),
+      null))
+    (shuffleId1, shuffleId2)
+  }
+
+  test("SPARK-25341: abort stage while using old fetch protocol") {
+    // reset the test context with using old fetch protocol
+    afterEach()
+    val conf = new SparkConf()
+    conf.set(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL.key, "true")
+    init(conf)
+    // Construct the scenario of indeterminate stage fetch failed.
+    constructIndeterminateStageFetchFailed()
+    // The job should fail because Spark can't rollback the shuffle map stage while
+    // using old protocol.
+    assert(failure != null && failure.getMessage.contains(
+      "Spark can only do this while using the new shuffle block fetching protocol"))
+  }
+
+  test("SPARK-25341: retry all the succeeding stages when the map stage is indeterminate") {
+    val (shuffleId1, shuffleId2) = constructIndeterminateStageFetchFailed()
+
+    // Check status for all failedStages
+    val failedStages = scheduler.failedStages.toSeq
+    assert(failedStages.map(_.id) == Seq(1, 2))
+    // Shuffle blocks of "hostC" is lost, so first task of the `shuffleMapRdd2` needs to retry.
+    assert(failedStages.collect {
+      case stage: ShuffleMapStage if stage.shuffleDep.shuffleId == shuffleId2 => stage
+    }.head.findMissingPartitions() == Seq(0))
+    // The result stage is still waiting for its 2 tasks to complete
+    assert(failedStages.collect {
+      case stage: ResultStage => stage
+    }.head.findMissingPartitions() == Seq(0, 1))
+
+    scheduler.resubmitFailedStages()
+
+    // The first task of the `shuffleMapRdd2` failed with fetch failure
+    runEvent(makeCompletionEvent(
+      taskSets(3).tasks(0),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId1, 0L, 0, 0, "ignored"),
+      null))
+
+    val newFailedStages = scheduler.failedStages.toSeq
+    assert(newFailedStages.map(_.id) == Seq(0, 1))
+
+    scheduler.resubmitFailedStages()
+
+    // First shuffle map stage resubmitted and reran all tasks.
+    assert(taskSets(4).stageId == 0)
+    assert(taskSets(4).stageAttemptId == 1)
+    assert(taskSets(4).tasks.length == 2)
+
+    // Finish all stage.
+    complete(taskSets(4), Seq(
+      (Success, makeMapStatus("hostA", 2)),
+      (Success, makeMapStatus("hostB", 2))))
+    assert(mapOutputTracker.findMissingPartitions(shuffleId1) === Some(Seq.empty))
+
+    complete(taskSets(5), Seq(
+      (Success, makeMapStatus("hostC", 2)),
+      (Success, makeMapStatus("hostD", 2))))
+    assert(mapOutputTracker.findMissingPartitions(shuffleId2) === Some(Seq.empty))
+
+    complete(taskSets(6), Seq((Success, 11), (Success, 12)))
+
+    // Job successful ended.
+    assert(results === Map(0 -> 11, 1 -> 12))
+    results.clear()
+    assertDataStructuresEmpty()
+  }
+
+  test("SPARK-25341: continuous indeterminate stage roll back") {
+    // shuffleMapRdd1/2/3 are all indeterminate.
+    val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = true)
+    val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(2))
+    val shuffleId1 = shuffleDep1.shuffleId
+
+    val shuffleMapRdd2 = new MyRDD(
+      sc, 2, List(shuffleDep1), tracker = mapOutputTracker, indeterminate = true)
+    val shuffleDep2 = new ShuffleDependency(shuffleMapRdd2, new HashPartitioner(2))
+    val shuffleId2 = shuffleDep2.shuffleId
+
+    val shuffleMapRdd3 = new MyRDD(
+      sc, 2, List(shuffleDep2), tracker = mapOutputTracker, indeterminate = true)
+    val shuffleDep3 = new ShuffleDependency(shuffleMapRdd3, new HashPartitioner(2))
+    val shuffleId3 = shuffleDep3.shuffleId
+    val finalRdd = new MyRDD(sc, 2, List(shuffleDep3), tracker = mapOutputTracker)
+
+    submit(finalRdd, Array(0, 1), properties = new Properties())
+
+    // Finish the first 2 shuffle map stages.
+    complete(taskSets(0), Seq(
+      (Success, makeMapStatus("hostA", 2)),
+      (Success, makeMapStatus("hostB", 2))))
+    assert(mapOutputTracker.findMissingPartitions(shuffleId1) === Some(Seq.empty))
+
+    complete(taskSets(1), Seq(
+      (Success, makeMapStatus("hostB", 2)),
+      (Success, makeMapStatus("hostD", 2))))
+    assert(mapOutputTracker.findMissingPartitions(shuffleId2) === Some(Seq.empty))
+
+    // Executor lost on hostB, both of stage 0 and 1 should be reran.
+    runEvent(makeCompletionEvent(
+      taskSets(2).tasks(0),
+      FetchFailed(makeBlockManagerId("hostB"), shuffleId2, 0L, 0, 0, "ignored"),
       null))
+    mapOutputTracker.removeOutputsOnHost("hostB")
+
+    assert(scheduler.failedStages.toSeq.map(_.id) == Seq(1, 2))
+    scheduler.resubmitFailedStages()
+
+    def checkAndCompleteRetryStage(
+        taskSetIndex: Int,
+        stageId: Int,
+        shuffleId: Int): Unit = {
+      assert(taskSets(taskSetIndex).stageId == stageId)
+      assert(taskSets(taskSetIndex).stageAttemptId == 1)
+      assert(taskSets(taskSetIndex).tasks.length == 2)
+      complete(taskSets(taskSetIndex), Seq(
+        (Success, makeMapStatus("hostA", 2)),
+        (Success, makeMapStatus("hostB", 2))))
+      assert(mapOutputTracker.findMissingPartitions(shuffleId) === Some(Seq.empty))
+    }
+
+    // Check all indeterminate stage roll back.
+    checkAndCompleteRetryStage(3, 0, shuffleId1)
+    checkAndCompleteRetryStage(4, 1, shuffleId2)
+    checkAndCompleteRetryStage(5, 2, shuffleId3)
 
-    // The second shuffle map stage need to rerun, the job will abort for the indeterminate
-    // stage rerun.
-    // TODO: After we support re-generate shuffle file(SPARK-25341), this test will be extended.
-    assert(failure != null && failure.getMessage
-      .contains("Spark cannot rollback the ShuffleMapStage 1"))
+    // Result stage success, all job ended.
+    complete(taskSets(6), Seq((Success, 11), (Success, 12)))
+    assert(results === Map(0 -> 11, 1 -> 12))
+    results.clear()
+    assertDataStructuresEmpty()
+  }
+
+  test("SPARK-29042: Sampled RDD with unordered input should be indeterminate") {
+    val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = false)
+
+    val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(2))
+    val shuffleMapRdd2 = new MyRDD(sc, 2, List(shuffleDep1), tracker = mapOutputTracker)
+
+    assert(shuffleMapRdd2.outputDeterministicLevel == DeterministicLevel.UNORDERED)
+
+    val sampledRdd = shuffleMapRdd2.sample(true, 0.3, 1000L)
+    assert(sampledRdd.outputDeterministicLevel == DeterministicLevel.INDETERMINATE)
   }
 
   private def assertResultStageFailToRollback(mapRdd: MyRDD): Unit = {
@@ -2766,7 +2953,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // Fail the second task with FetchFailed.
     runEvent(makeCompletionEvent(
       taskSets.last.tasks(1),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
 
     // The job should fail because Spark can't rollback the result stage.
@@ -2809,7 +2996,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // Fail the second task with FetchFailed.
     runEvent(makeCompletionEvent(
       taskSets.last.tasks(1),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
 
     assert(failure == null, "job should not fail")
@@ -2856,33 +3043,6 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(latch.await(10, TimeUnit.SECONDS))
   }
 
-  test("SPARK-28699: abort stage if parent stage is indeterminate stage") {
-    val shuffleMapRdd = new MyRDD(sc, 2, Nil, indeterminate = true)
-
-    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2))
-    val shuffleId = shuffleDep.shuffleId
-    val finalRdd = new MyRDD(sc, 2, List(shuffleDep), tracker = mapOutputTracker)
-
-    submit(finalRdd, Array(0, 1))
-
-    // Finish the first shuffle map stage.
-    complete(taskSets(0), Seq(
-      (Success, makeMapStatus("hostA", 2)),
-      (Success, makeMapStatus("hostB", 2))))
-    assert(mapOutputTracker.findMissingPartitions(shuffleId) === Some(Seq.empty))
-
-    runEvent(makeCompletionEvent(
-      taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
-      null))
-
-    // Shuffle blocks of "hostA" is lost, so first task of the `shuffleMapRdd` needs to retry.
-    // The result stage is still waiting for its 2 tasks to complete.
-    // Because of shuffleMapRdd is indeterminate, this job will be abort.
-    assert(failure != null && failure.getMessage
-      .contains("Spark cannot rollback the ShuffleMapStage 0"))
-  }
-
   test("Completions in zombie tasksets update status of non-zombie taskset") {
     val parts = 4
     val shuffleMapRdd = new MyRDD(sc, parts, Nil)
@@ -2899,7 +3059,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The second task of the shuffle map stage failed with FetchFailed.
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(1),
-      FetchFailed(makeBlockManagerId("hostB"), shuffleDep.shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostB"), shuffleDep.shuffleId, 0L, 0, 0, "ignored"),
       null))
 
     scheduler.resubmitFailedStages()
@@ -2938,7 +3098,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
    * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
    * Note that this checks only the host and not the executor ID.
    */
-  private def assertLocations(taskSet: TaskSet, hosts: Seq[Seq[String]]) {
+  private def assertLocations(taskSet: TaskSet, hosts: Seq[Seq[String]]): Unit = {
     assert(hosts.size === taskSet.tasks.size)
     for ((taskLocs, expectedLocs) <- taskSet.tasks.map(_.preferredLocations).zip(hosts)) {
       assert(taskLocs.map(_.host).toSet === expectedLocs.toSet)
@@ -2989,8 +3149,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 }
 
 object DAGSchedulerSuite {
-  def makeMapStatus(host: String, reduces: Int, sizes: Byte = 2): MapStatus =
-    MapStatus(makeBlockManagerId(host), Array.fill[Long](reduces)(sizes))
+  def makeMapStatus(host: String, reduces: Int, sizes: Byte = 2, mapTaskId: Long = -1): MapStatus =
+    MapStatus(makeBlockManagerId(host), Array.fill[Long](reduces)(sizes), mapTaskId)
 
   def makeBlockManagerId(host: String): BlockManagerId =
     BlockManagerId("exec-" + host, host, 12345)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index a83ca594ee908..61ea21fa86c5a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.scheduler
 
-import java.io.{File, FileOutputStream, InputStream, IOException}
+import java.io.{File, InputStream}
 import java.util.Arrays
 
 import scala.collection.immutable.Map
@@ -32,15 +32,16 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.history.{EventLogFileReader, SingleEventLogFileWriter}
+import org.apache.spark.deploy.history.EventLogTestHelper._
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
 import org.apache.spark.io._
 import org.apache.spark.metrics.{ExecutorMetricType, MetricsSystem}
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.util.{JsonProtocol, Utils}
 
-
 /**
  * Test whether EventLoggingListener logs events properly.
  *
@@ -51,8 +52,6 @@ import org.apache.spark.util.{JsonProtocol, Utils}
 class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext with BeforeAndAfter
   with Logging {
 
-  import EventLoggingListenerSuite._
-
   private val fileSystem = Utils.getHadoopFileSystem("/",
     SparkHadoopUtil.get.newConfiguration(new SparkConf()))
   private var testDir: File = _
@@ -68,40 +67,6 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     Utils.deleteRecursively(testDir)
   }
 
-  test("Verify log file exist") {
-    // Verify logging directory exists
-    val conf = getLoggingConf(testDirPath)
-    val eventLogger = new EventLoggingListener("test", None, testDirPath.toUri(), conf)
-    eventLogger.start()
-
-    val logPath = new Path(eventLogger.logPath + EventLoggingListener.IN_PROGRESS)
-    assert(fileSystem.exists(logPath))
-    val logStatus = fileSystem.getFileStatus(logPath)
-    assert(!logStatus.isDirectory)
-
-    // Verify log is renamed after stop()
-    eventLogger.stop()
-    assert(!fileSystem.getFileStatus(new Path(eventLogger.logPath)).isDirectory)
-  }
-
-  test("Basic event logging") {
-    testEventLogging()
-  }
-
-  test("spark.eventLog.compression.codec overrides spark.io.compression.codec") {
-    val conf = new SparkConf
-    conf.set(EVENT_LOG_COMPRESS, true)
-
-    // The default value is `spark.io.compression.codec`.
-    val e = new EventLoggingListener("test", None, testDirPath.toUri(), conf)
-    assert(e.compressionCodecName.contains("lz4"))
-
-    // `spark.eventLog.compression.codec` overrides `spark.io.compression.codec`.
-    conf.set(EVENT_LOG_COMPRESSION_CODEC, "zstd")
-    val e2 = new EventLoggingListener("test", None, testDirPath.toUri(), conf)
-    assert(e2.compressionCodecName.contains("zstd"))
-  }
-
   test("Basic event logging with compression") {
     CompressionCodec.ALL_COMPRESSION_CODECS.foreach { codec =>
       testEventLogging(compressionCodec = Some(CompressionCodec.getShortName(codec)))
@@ -131,35 +96,6 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     assert(redactedProps(key) == "*********(redacted)")
   }
 
-  test("Log overwriting") {
-    val logUri = EventLoggingListener.getLogPath(testDir.toURI, "test", None)
-    val logPath = new Path(logUri).toUri.getPath
-    // Create file before writing the event log
-    new FileOutputStream(new File(logPath)).close()
-    // Expected IOException, since we haven't enabled log overwrite.
-    intercept[IOException] { testEventLogging() }
-    // Try again, but enable overwriting.
-    testEventLogging(extraConf = Map(EVENT_LOG_OVERWRITE.key -> "true"))
-  }
-
-  test("Event log name") {
-    val baseDirUri = Utils.resolveURI("/base-dir")
-    // without compression
-    assert(s"${baseDirUri.toString}/app1" === EventLoggingListener.getLogPath(
-      baseDirUri, "app1", None))
-    // with compression
-    assert(s"${baseDirUri.toString}/app1.lzf" ===
-      EventLoggingListener.getLogPath(baseDirUri, "app1", None, Some("lzf")))
-    // illegal characters in app ID
-    assert(s"${baseDirUri.toString}/a-fine-mind_dollar_bills__1" ===
-      EventLoggingListener.getLogPath(baseDirUri,
-        "a fine:mind$dollar{bills}.1", None))
-    // illegal characters in app ID with compression
-    assert(s"${baseDirUri.toString}/a-fine-mind_dollar_bills__1.lz4" ===
-      EventLoggingListener.getLogPath(baseDirUri,
-        "a fine:mind$dollar{bills}.1", None, Some("lz4")))
-  }
-
   test("Executor metrics update") {
     testStageExecutorMetricsEventLogging()
   }
@@ -168,8 +104,6 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
    * Actual test logic *
    * ----------------- */
 
-  import EventLoggingListenerSuite._
-
   /**
    * Test basic event logging functionality.
    *
@@ -178,7 +112,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
    */
   private def testEventLogging(
       compressionCodec: Option[String] = None,
-      extraConf: Map[String, String] = Map()) {
+      extraConf: Map[String, String] = Map()): Unit = {
     val conf = getLoggingConf(testDirPath, compressionCodec)
     extraConf.foreach { case (k, v) => conf.set(k, v) }
     val logName = compressionCodec.map("test-" + _).getOrElse("test")
@@ -198,7 +132,8 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     eventLogger.stop()
 
     // Verify file contains exactly the two events logged
-    val logData = EventLoggingListener.openEventLog(new Path(eventLogger.logPath), fileSystem)
+    val logPath = eventLogger.logWriter.logPath
+    val logData = EventLogFileReader.openEventLog(new Path(logPath), fileSystem)
     try {
       val lines = readLines(logData)
       val logStart = SparkListenerLogStart(SPARK_VERSION)
@@ -218,7 +153,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
    * Test end-to-end event logging functionality in an application.
    * This runs a simple Spark job and asserts that the expected events are logged when expected.
    */
-  private def testApplicationEventLogging(compressionCodec: Option[String] = None) {
+  private def testApplicationEventLogging(compressionCodec: Option[String] = None): Unit = {
     // Set defaultFS to something that would cause an exception, to make sure we don't run
     // into SPARK-6688.
     val conf = getLoggingConf(testDirPath, compressionCodec)
@@ -226,9 +161,10 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     sc = new SparkContext("local-cluster[2,2,1024]", "test", conf)
     assert(sc.eventLogger.isDefined)
     val eventLogger = sc.eventLogger.get
-    val eventLogPath = eventLogger.logPath
+
+    val eventLogPath = eventLogger.logWriter.logPath
     val expectedLogDir = testDir.toURI()
-    assert(eventLogPath === EventLoggingListener.getLogPath(
+    assert(eventLogPath === SingleEventLogFileWriter.getLogPath(
       expectedLogDir, sc.applicationId, None, compressionCodec.map(CompressionCodec.getShortName)))
 
     // Begin listening for events that trigger asserts
@@ -243,7 +179,8 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     eventExistenceListener.assertAllCallbacksInvoked()
 
     // Make sure expected events exist in the log file.
-    val logData = EventLoggingListener.openEventLog(new Path(eventLogger.logPath), fileSystem)
+    val logData = EventLogFileReader.openEventLog(new Path(eventLogger.logWriter.logPath),
+      fileSystem)
     val eventSet = mutable.Set(
       SparkListenerApplicationStart,
       SparkListenerBlockManagerAdded,
@@ -284,7 +221,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
    * from SparkListenerTaskEnd events for tasks belonging to the stage are
    * logged in a StageExecutorMetrics event for each executor at stage completion.
    */
-  private def testStageExecutorMetricsEventLogging() {
+  private def testStageExecutorMetricsEventLogging(): Unit = {
     val conf = getLoggingConf(testDirPath, None)
     val logName = "stageExecutorMetrics-test"
     val eventLogger = new EventLoggingListener(logName, None, testDirPath.toUri(), conf)
@@ -466,7 +403,8 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     // Verify the log file contains the expected events.
     // Posted events should be logged, except for ExecutorMetricsUpdate events -- these
     // are consolidated, and the peak values for each stage are logged at stage end.
-    val logData = EventLoggingListener.openEventLog(new Path(eventLogger.logPath), fileSystem)
+    val logData = EventLogFileReader.openEventLog(new Path(eventLogger.logWriter.logPath),
+      fileSystem)
     try {
       val lines = readLines(logData)
       val logStart = SparkListenerLogStart(SPARK_VERSION)
@@ -501,12 +439,14 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
 
   private def createStageSubmittedEvent(stageId: Int) = {
     SparkListenerStageSubmitted(new StageInfo(stageId, 0, stageId.toString, 0,
-      Seq.empty, Seq.empty, "details"))
+      Seq.empty, Seq.empty, "details",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
   }
 
   private def createStageCompletedEvent(stageId: Int) = {
     SparkListenerStageCompleted(new StageInfo(stageId, 0, stageId.toString, 0,
-      Seq.empty, Seq.empty, "details"))
+      Seq.empty, Seq.empty, "details",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
   }
 
   private def createExecutorAddedEvent(executorId: Int) = {
@@ -621,19 +561,19 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     var jobEnded = false
     var appEnded = false
 
-    override def onJobStart(jobStart: SparkListenerJobStart) {
+    override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
       jobStarted = true
     }
 
-    override def onJobEnd(jobEnd: SparkListenerJobEnd) {
+    override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
       jobEnded = true
     }
 
-    override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) {
+    override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
       appEnded = true
     }
 
-    def assertAllCallbacksInvoked() {
+    def assertAllCallbacksInvoked(): Unit = {
       assert(jobStarted, "JobStart callback not invoked!")
       assert(jobEnded, "JobEnd callback not invoked!")
       assert(appEnded, "ApplicationEnd callback not invoked!")
@@ -641,24 +581,3 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
   }
 
 }
-
-
-object EventLoggingListenerSuite {
-
-  /** Get a SparkConf with event logging enabled. */
-  def getLoggingConf(logDir: Path, compressionCodec: Option[String] = None): SparkConf = {
-    val conf = new SparkConf
-    conf.set(EVENT_LOG_ENABLED, true)
-    conf.set(EVENT_LOG_BLOCK_UPDATES, true)
-    conf.set(EVENT_LOG_TESTING, true)
-    conf.set(EVENT_LOG_DIR, logDir.toString)
-    compressionCodec.foreach { codec =>
-      conf.set(EVENT_LOG_COMPRESS, true)
-      conf.set(EVENT_LOG_COMPRESSION_CODEC, codec)
-    }
-    conf.set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
-    conf
-  }
-
-  def getUniqueApplicationId: String = "test-" + System.currentTimeMillis
-}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ExecutorResourceInfoSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ExecutorResourceInfoSuite.scala
index 0109d1f82a453..388d4e25a06cf 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ExecutorResourceInfoSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ExecutorResourceInfoSuite.scala
@@ -26,7 +26,7 @@ class ExecutorResourceInfoSuite extends SparkFunSuite {
 
   test("Track Executor Resource information") {
     // Init Executor Resource.
-    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"))
+    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"), 1)
     assert(info.availableAddrs.sorted sameElements Seq("0", "1", "2", "3"))
     assert(info.assignedAddrs.isEmpty)
 
@@ -43,7 +43,7 @@ class ExecutorResourceInfoSuite extends SparkFunSuite {
 
   test("Don't allow acquire address that is not available") {
     // Init Executor Resource.
-    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"))
+    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"), 1)
     // Acquire some addresses.
     info.acquire(Seq("0", "1"))
     assert(!info.availableAddrs.contains("1"))
@@ -56,7 +56,7 @@ class ExecutorResourceInfoSuite extends SparkFunSuite {
 
   test("Don't allow acquire address that doesn't exist") {
     // Init Executor Resource.
-    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"))
+    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"), 1)
     assert(!info.availableAddrs.contains("4"))
     // Acquire an address that doesn't exist
     val e = intercept[SparkException] {
@@ -67,7 +67,7 @@ class ExecutorResourceInfoSuite extends SparkFunSuite {
 
   test("Don't allow release address that is not assigned") {
     // Init Executor Resource.
-    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"))
+    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"), 1)
     // Acquire addresses
     info.acquire(Array("0", "1"))
     assert(!info.assignedAddrs.contains("2"))
@@ -80,7 +80,7 @@ class ExecutorResourceInfoSuite extends SparkFunSuite {
 
   test("Don't allow release address that doesn't exist") {
     // Init Executor Resource.
-    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"))
+    val info = new ExecutorResourceInfo(GPU, ArrayBuffer("0", "1", "2", "3"), 1)
     assert(!info.assignedAddrs.contains("4"))
     // Release an address that doesn't exist
     val e = intercept[SparkException] {
@@ -88,4 +88,28 @@ class ExecutorResourceInfoSuite extends SparkFunSuite {
     }
     assert(e.getMessage.contains("Try to release an address that doesn't exist."))
   }
+
+  test("Ensure that we can acquire the same fractions of a resource from an executor") {
+    val slotSeq = Seq(10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
+    val addresses = ArrayBuffer("0", "1", "2", "3")
+    slotSeq.foreach { slots =>
+      val info = new ExecutorResourceInfo(GPU, addresses, slots)
+      for (_ <- 0 until slots) {
+        addresses.foreach(addr => info.acquire(Seq(addr)))
+      }
+
+      // assert that each address was assigned `slots` times
+      info.assignedAddrs
+        .groupBy(identity)
+        .mapValues(_.size)
+        .foreach(x => assert(x._2 == slots))
+
+      addresses.foreach { addr =>
+        assertThrows[SparkException] {
+          info.acquire(Seq(addr))
+        }
+        assert(!info.availableAddrs.contains(addr))
+      }
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
index 73e88c4a0fda6..9f593e0039adc 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
@@ -67,9 +67,9 @@ private class DummyExternalClusterManager extends ExternalClusterManager {
 
 private class DummySchedulerBackend extends SchedulerBackend {
   var initialized = false
-  def start() {}
-  def stop() {}
-  def reviveOffers() {}
+  def start(): Unit = {}
+  def stop(): Unit = {}
+  def reviveOffers(): Unit = {}
   def defaultParallelism(): Int = 1
   def maxNumConcurrentTasks(): Int = 0
 }
@@ -89,6 +89,7 @@ private class DummyTaskScheduler extends TaskScheduler {
   override def notifyPartitionCompletion(stageId: Int, partitionId: Int): Unit = {}
   override def setDAGScheduler(dagScheduler: DAGScheduler): Unit = {}
   override def defaultParallelism(): Int = 2
+  override def executorDecommission(executorId: String): Unit = {}
   override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {}
   override def workerRemoved(workerId: String, host: String, message: String): Unit = {}
   override def applicationAttemptId(): Option[String] = None
diff --git a/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala b/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
index b29d32f7b35c5..8cb6268f85d36 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
@@ -42,22 +42,30 @@ object FakeTask {
    * locations for each task (given as varargs) if this sequence is not empty.
    */
   def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
-    createTaskSet(numTasks, stageAttemptId = 0, prefLocs: _*)
+    createTaskSet(numTasks, stageId = 0, stageAttemptId = 0, priority = 0, prefLocs: _*)
   }
 
-  def createTaskSet(numTasks: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
-    createTaskSet(numTasks, stageId = 0, stageAttemptId, prefLocs: _*)
+  def createTaskSet(
+      numTasks: Int,
+      stageId: Int,
+      stageAttemptId: Int,
+      prefLocs: Seq[TaskLocation]*): TaskSet = {
+    createTaskSet(numTasks, stageId, stageAttemptId, priority = 0, prefLocs: _*)
   }
 
-  def createTaskSet(numTasks: Int, stageId: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*):
-  TaskSet = {
+  def createTaskSet(
+      numTasks: Int,
+      stageId: Int,
+      stageAttemptId: Int,
+      priority: Int,
+      prefLocs: Seq[TaskLocation]*): TaskSet = {
     if (prefLocs.size != 0 && prefLocs.size != numTasks) {
       throw new IllegalArgumentException("Wrong number of task locations")
     }
     val tasks = Array.tabulate[Task[_]](numTasks) { i =>
       new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil)
     }
-    new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null)
+    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null)
   }
 
   def createShuffleMapTaskSet(
@@ -65,6 +73,15 @@ object FakeTask {
       stageId: Int,
       stageAttemptId: Int,
       prefLocs: Seq[TaskLocation]*): TaskSet = {
+    createShuffleMapTaskSet(numTasks, stageId, stageAttemptId, priority = 0, prefLocs: _*)
+  }
+
+  def createShuffleMapTaskSet(
+      numTasks: Int,
+      stageId: Int,
+      stageAttemptId: Int,
+      priority: Int,
+      prefLocs: Seq[TaskLocation]*): TaskSet = {
     if (prefLocs.size != 0 && prefLocs.size != numTasks) {
       throw new IllegalArgumentException("Wrong number of task locations")
     }
@@ -74,17 +91,18 @@ object FakeTask {
       }, prefLocs(i), new Properties,
         SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array())
     }
-    new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null)
+    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null)
   }
 
   def createBarrierTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
-    createBarrierTaskSet(numTasks, stageId = 0, stageAttempId = 0, prefLocs: _*)
+    createBarrierTaskSet(numTasks, stageId = 0, stageAttemptId = 0, priority = 0, prefLocs: _*)
   }
 
   def createBarrierTaskSet(
       numTasks: Int,
       stageId: Int,
-      stageAttempId: Int,
+      stageAttemptId: Int,
+      priority: Int,
       prefLocs: Seq[TaskLocation]*): TaskSet = {
     if (prefLocs.size != 0 && prefLocs.size != numTasks) {
       throw new IllegalArgumentException("Wrong number of task locations")
@@ -92,6 +110,6 @@ object FakeTask {
     val tasks = Array.tabulate[Task[_]](numTasks) { i =>
       new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil, isBarrier = true)
     }
-    new TaskSet(tasks, stageId, stageAttempId, priority = 0, null)
+    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index c1e7fb9a1db16..23cc416f8572f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -61,7 +61,7 @@ class MapStatusSuite extends SparkFunSuite {
       stddev <- Seq(0.0, 0.01, 0.5, 1.0)
     ) {
       val sizes = Array.fill[Long](numSizes)(abs(round(Random.nextGaussian() * stddev)) + mean)
-      val status = MapStatus(BlockManagerId("a", "b", 10), sizes)
+      val status = MapStatus(BlockManagerId("a", "b", 10), sizes, -1)
       val status1 = compressAndDecompressMapStatus(status)
       for (i <- 0 until numSizes) {
         if (sizes(i) != 0) {
@@ -75,7 +75,7 @@ class MapStatusSuite extends SparkFunSuite {
 
   test("large tasks should use " + classOf[HighlyCompressedMapStatus].getName) {
     val sizes = Array.fill[Long](2001)(150L)
-    val status = MapStatus(null, sizes)
+    val status = MapStatus(null, sizes, -1)
     assert(status.isInstanceOf[HighlyCompressedMapStatus])
     assert(status.getSizeForBlock(10) === 150L)
     assert(status.getSizeForBlock(50) === 150L)
@@ -87,10 +87,12 @@ class MapStatusSuite extends SparkFunSuite {
     val sizes = Array.tabulate[Long](3000) { i => i.toLong }
     val avg = sizes.sum / sizes.count(_ != 0)
     val loc = BlockManagerId("a", "b", 10)
-    val status = MapStatus(loc, sizes)
+    val mapTaskAttemptId = 5
+    val status = MapStatus(loc, sizes, mapTaskAttemptId)
     val status1 = compressAndDecompressMapStatus(status)
     assert(status1.isInstanceOf[HighlyCompressedMapStatus])
     assert(status1.location == loc)
+    assert(status1.mapId == mapTaskAttemptId)
     for (i <- 0 until 3000) {
       val estimate = status1.getSizeForBlock(i)
       if (sizes(i) > 0) {
@@ -109,7 +111,7 @@ class MapStatusSuite extends SparkFunSuite {
     val smallBlockSizes = sizes.filter(n => n > 0 && n < threshold)
     val avg = smallBlockSizes.sum / smallBlockSizes.length
     val loc = BlockManagerId("a", "b", 10)
-    val status = MapStatus(loc, sizes)
+    val status = MapStatus(loc, sizes, 5)
     val status1 = compressAndDecompressMapStatus(status)
     assert(status1.isInstanceOf[HighlyCompressedMapStatus])
     assert(status1.location == loc)
@@ -165,7 +167,7 @@ class MapStatusSuite extends SparkFunSuite {
     SparkEnv.set(env)
     // Value of element in sizes is equal to the corresponding index.
     val sizes = (0L to 2000L).toArray
-    val status1 = MapStatus(BlockManagerId("exec-0", "host-0", 100), sizes)
+    val status1 = MapStatus(BlockManagerId("exec-0", "host-0", 100), sizes, 5)
     val arrayStream = new ByteArrayOutputStream(102400)
     val objectOutputStream = new ObjectOutputStream(arrayStream)
     assert(status1.isInstanceOf[HighlyCompressedMapStatus])
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
index 848f702935536..7d063c3b3ac53 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
@@ -22,7 +22,6 @@ import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.time.{Seconds, Span}
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TaskContext}
-import org.apache.spark.util.Utils
 
 /**
  * Integration tests for the OutputCommitCoordinator.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index d6964063c118e..728b9d65054ec 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -158,7 +158,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     def resultHandler(x: Int, y: Unit): Unit = {}
     val futureAction: SimpleFutureAction[Unit] = sc.submitJob[Int, Unit, Unit](rdd,
       OutputCommitFunctions(tempDir.getAbsolutePath).commitSuccessfully,
-      0 until rdd.partitions.size, resultHandler, () => Unit)
+      0 until rdd.partitions.size, resultHandler, () => ())
     // It's an error if the job completes successfully even though no committer was authorized,
     // so throw an exception if the job was allowed to complete.
     intercept[TimeoutException] {
@@ -251,10 +251,10 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     // stage so that we can check the state of the output committer.
     val retriedStage = sc.parallelize(1 to 100, 10)
       .map { i => (i % 10, i) }
-      .reduceByKey { case (_, _) =>
+      .reduceByKey { (_, _) =>
         val ctx = TaskContext.get()
         if (ctx.stageAttemptNumber() == 0) {
-          throw new FetchFailedException(SparkEnv.get.blockManager.blockManagerId, 1, 1, 1,
+          throw new FetchFailedException(SparkEnv.get.blockManager.blockManagerId, 1, 1L, 1, 1,
             new Exception("Failure for test."))
         } else {
           ctx.stageId()
@@ -288,7 +288,7 @@ private case class OutputCommitFunctions(tempDirPath: String) {
 
   // Mock output committer that simulates a failed commit (after commit is authorized)
   private def failingOutputCommitter = new FakeOutputCommitter {
-    override def commitTask(taskAttemptContext: TaskAttemptContext) {
+    override def commitTask(taskAttemptContext: TaskAttemptContext): Unit = {
       throw new RuntimeException
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
index d65b5cbfc094e..e6fbf9b09d43d 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.scheduler
 
 import java.io._
-import java.net.URI
+import java.nio.charset.StandardCharsets
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.ArrayBuffer
@@ -30,6 +30,8 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.history.EventLogFileReader
+import org.apache.spark.deploy.history.EventLogTestHelper._
 import org.apache.spark.io.{CompressionCodec, LZ4CompressionCodec}
 import org.apache.spark.util.{JsonProtocol, JsonProtocolSuite, Utils}
 
@@ -52,17 +54,18 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
   test("Simple replay") {
     val logFilePath = getFilePath(testDir, "events.txt")
     val fstream = fileSystem.create(logFilePath)
+    val fwriter = new OutputStreamWriter(fstream, StandardCharsets.UTF_8)
     val applicationStart = SparkListenerApplicationStart("Greatest App (N)ever", None,
       125L, "Mickey", None)
     val applicationEnd = SparkListenerApplicationEnd(1000L)
-    Utils.tryWithResource(new PrintWriter(fstream)) { writer =>
+    Utils.tryWithResource(new PrintWriter(fwriter)) { writer =>
       // scalastyle:off println
       writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
       writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
       // scalastyle:on println
     }
 
-    val conf = EventLoggingListenerSuite.getLoggingConf(logFilePath)
+    val conf = getLoggingConf(logFilePath)
     val logData = fileSystem.open(logFilePath)
     val eventMonster = new EventBufferingListener
     try {
@@ -87,8 +90,9 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
   test("Replay compressed inprogress log file succeeding on partial read") {
     val buffered = new ByteArrayOutputStream
     val codec = new LZ4CompressionCodec(new SparkConf())
-    val compstream = codec.compressedOutputStream(buffered)
-    Utils.tryWithResource(new PrintWriter(compstream)) { writer =>
+    val compstream = codec.compressedContinuousOutputStream(buffered)
+    val cwriter = new OutputStreamWriter(compstream, StandardCharsets.UTF_8)
+    Utils.tryWithResource(new PrintWriter(cwriter)) { writer =>
 
       val applicationStart = SparkListenerApplicationStart("AppStarts", None,
         125L, "Mickey", None)
@@ -107,14 +111,14 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
     }
 
     // Read the compressed .inprogress file and verify only first event was parsed.
-    val conf = EventLoggingListenerSuite.getLoggingConf(logFilePath)
+    val conf = getLoggingConf(logFilePath)
     val replayer = new ReplayListenerBus()
 
     val eventMonster = new EventBufferingListener
     replayer.addListener(eventMonster)
 
     // Verify the replay returns the events given the input maybe truncated.
-    val logData = EventLoggingListener.openEventLog(logFilePath, fileSystem)
+    val logData = EventLogFileReader.openEventLog(logFilePath, fileSystem)
     Utils.tryWithResource(new EarlyEOFInputStream(logData, buffered.size - 10)) { failingStream =>
       replayer.replay(failingStream, logFilePath.toString, true)
 
@@ -123,7 +127,7 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
     }
 
     // Verify the replay throws the EOF exception since the input may not be truncated.
-    val logData2 = EventLoggingListener.openEventLog(logFilePath, fileSystem)
+    val logData2 = EventLogFileReader.openEventLog(logFilePath, fileSystem)
     Utils.tryWithResource(new EarlyEOFInputStream(logData2, buffered.size - 10)) { failingStream2 =>
       intercept[EOFException] {
         replayer.replay(failingStream2, logFilePath.toString, false)
@@ -134,10 +138,11 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
   test("Replay incompatible event log") {
     val logFilePath = getFilePath(testDir, "incompatible.txt")
     val fstream = fileSystem.create(logFilePath)
+    val fwriter = new OutputStreamWriter(fstream, StandardCharsets.UTF_8)
     val applicationStart = SparkListenerApplicationStart("Incompatible App", None,
       125L, "UserUsingIncompatibleVersion", None)
     val applicationEnd = SparkListenerApplicationEnd(1000L)
-    Utils.tryWithResource(new PrintWriter(fstream)) { writer =>
+    Utils.tryWithResource(new PrintWriter(fwriter)) { writer =>
       // scalastyle:off println
       writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
       writer.println("""{"Event":"UnrecognizedEventOnlyForTest","Timestamp":1477593059313}""")
@@ -145,7 +150,7 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
       // scalastyle:on println
     }
 
-    val conf = EventLoggingListenerSuite.getLoggingConf(logFilePath)
+    val conf = getLoggingConf(logFilePath)
     val logData = fileSystem.open(logFilePath)
     val eventMonster = new EventBufferingListener
     try {
@@ -184,14 +189,14 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
    * event to the corresponding event replayed from the event logs. This test makes the
    * assumption that the event logging behavior is correct (tested in a separate suite).
    */
-  private def testApplicationReplay(codecName: Option[String] = None) {
+  private def testApplicationReplay(codecName: Option[String] = None): Unit = {
     val logDir = new File(testDir.getAbsolutePath, "test-replay")
     // Here, it creates `Path` from the URI instead of the absolute path for the explicit file
     // scheme so that the string representation of this `Path` has leading file scheme correctly.
     val logDirPath = new Path(logDir.toURI)
     fileSystem.mkdirs(logDirPath)
 
-    val conf = EventLoggingListenerSuite.getLoggingConf(logDirPath, codecName)
+    val conf = getLoggingConf(logDirPath, codecName)
     sc = new SparkContext("local-cluster[2,1,1024]", "Test replay", conf)
 
     // Run a few jobs
@@ -208,7 +213,7 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
     assert(!eventLog.isDirectory)
 
     // Replay events
-    val logData = EventLoggingListener.openEventLog(eventLog.getPath(), fileSystem)
+    val logData = EventLogFileReader.openEventLog(eventLog.getPath(), fileSystem)
     val eventMonster = new EventBufferingListener
     try {
       val replayer = new ReplayListenerBus()
@@ -242,7 +247,7 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
 
     private[scheduler] val loggedEvents = new ArrayBuffer[JValue]
 
-    override def onEvent(event: SparkListenerEvent) {
+    override def onEvent(event: SparkListenerEvent): Unit = {
       val eventJson = JsonProtocol.sparkEventToJson(event)
       loggedEvents += eventJson
     }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index 96706536fe53c..dff8975a4fe49 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -26,6 +26,7 @@ import scala.concurrent.duration.{Duration, SECONDS}
 import scala.reflect.ClassTag
 
 import org.scalactic.TripleEquals
+import org.scalatest.Assertions
 import org.scalatest.Assertions.AssertionsHelper
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
@@ -463,7 +464,7 @@ class MockRDD(
   override def toString: String = "MockRDD " + id
 }
 
-object MockRDD extends AssertionsHelper with TripleEquals {
+object MockRDD extends AssertionsHelper with TripleEquals with Assertions {
   /**
    * make sure all the shuffle dependencies have a consistent number of output partitions
    * (mostly to make sure the test setup makes sense, not that Spark itself would get this wrong)
@@ -621,7 +622,7 @@ class BasicSchedulerIntegrationSuite extends SchedulerIntegrationSuite[SingleCor
           backend.taskSuccess(taskDescription, DAGSchedulerSuite.makeMapStatus("hostA", 10))
         case (1, 0, 0) =>
           val fetchFailed = FetchFailed(
-            DAGSchedulerSuite.makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored")
+            DAGSchedulerSuite.makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored")
           backend.taskFailed(taskDescription, fetchFailed)
         case (1, _, partition) =>
           backend.taskSuccess(taskDescription, 42 + partition)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 8903e1054f53d..d4e8d63b54e5f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.scheduler
 
-import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
+import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.util.concurrent.Semaphore
 
 import scala.collection.JavaConverters._
@@ -38,9 +38,6 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
 
   import LiveListenerBus._
 
-  /** Length of time to wait while draining listener events. */
-  val WAIT_TIMEOUT_MILLIS = 10000
-
   val jobCompletionTime = 1421191296660L
 
   private val mockSparkContext: SparkContext = Mockito.mock(classOf[SparkContext])
@@ -65,7 +62,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
 
     sc.listenerBus.addToSharedQueue(listener)
     sc.listenerBus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     sc.stop()
 
     assert(listener.sparkExSeen)
@@ -86,7 +83,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
 
     // Five messages should be marked as received and queued, but no messages should be posted to
-    // listeners yet because the the listener bus hasn't been started.
+    // listeners yet because the listener bus hasn't been started.
     assert(bus.metrics.numEventsPosted.getCount === 5)
     assert(bus.queuedEvents.size === 5)
 
@@ -97,7 +94,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     // Starting listener bus should flush all buffered events
     bus.start(mockSparkContext, mockMetricsSystem)
     Mockito.verify(mockMetricsSystem).registerSource(bus.metrics)
-    bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    bus.waitUntilEmpty()
     assert(counter.count === 5)
     assert(sharedQueueSize(bus) === 0)
     assert(eventProcessingTimeCount(bus) === 5)
@@ -159,7 +156,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     assert(!drained)
 
     new Thread("ListenerBusStopper") {
-      override def run() {
+      override def run(): Unit = {
         stopperStarted.release()
         // stop() will block until notify() is called below
         bus.stop()
@@ -209,7 +206,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     assert(sharedQueueSize(bus) === 1)
     assert(numDroppedEvents(bus) === 1)
 
-    // Allow the the remaining events to be processed so we can stop the listener bus:
+    // Allow the remaining events to be processed so we can stop the listener bus:
     listenerWait.release(2)
     bus.stop()
   }
@@ -223,7 +220,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     rdd2.setName("Target RDD")
     rdd2.count()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
 
     listener.stageInfos.size should be {1}
     val (stageInfo, taskInfoMetrics) = listener.stageInfos.head
@@ -231,8 +228,8 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     stageInfo.rddInfos.forall(_.numPartitions == 4) should be {true}
     stageInfo.rddInfos.exists(_.name == "Target RDD") should be {true}
     stageInfo.numTasks should be {4}
-    stageInfo.submissionTime should be ('defined)
-    stageInfo.completionTime should be ('defined)
+    stageInfo.submissionTime should be (Symbol("defined"))
+    stageInfo.completionTime should be (Symbol("defined"))
     taskInfoMetrics.length should be {4}
   }
 
@@ -248,7 +245,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     rdd3.setName("Trois")
 
     rdd1.count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be {1}
     val stageInfo1 = listener.stageInfos.keys.find(_.stageId == 0).get
     stageInfo1.rddInfos.size should be {1} // ParallelCollectionRDD
@@ -257,7 +254,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     listener.stageInfos.clear()
 
     rdd2.count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be {1}
     val stageInfo2 = listener.stageInfos.keys.find(_.stageId == 1).get
     stageInfo2.rddInfos.size should be {3}
@@ -266,7 +263,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     listener.stageInfos.clear()
 
     rdd3.count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be {2} // Shuffle map stage + result stage
     val stageInfo3 = listener.stageInfos.keys.find(_.stageId == 3).get
     stageInfo3.rddInfos.size should be {1} // ShuffledRDD
@@ -282,7 +279,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val rdd2 = rdd1.map(_.toString)
     sc.runJob(rdd2, (items: Iterator[String]) => items.size, Seq(0, 1))
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
 
     listener.stageInfos.size should be {1}
     val (stageInfo, _) = listener.stageInfos.head
@@ -310,7 +307,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val numSlices = 16
     val d = sc.parallelize(0 to 10000, numSlices).map(w)
     d.count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be (1)
 
     val d2 = d.map { i => w(i) -> i * 2 }.setName("shuffle input 1")
@@ -321,7 +318,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     d4.setName("A Cogroup")
     d4.collectAsMap()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be (4)
     listener.stageInfos.foreach { case (stageInfo, taskInfoMetrics) =>
       /**
@@ -372,7 +369,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
       .reduce { case (x, y) => x }
     assert(result === 1.to(maxRpcMessageSize).toArray)
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     val TASK_INDEX = 0
     assert(listener.startedTasks.contains(TASK_INDEX))
     assert(listener.startedGettingResultTasks.contains(TASK_INDEX))
@@ -388,7 +385,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val result = sc.parallelize(Seq(1), 1).map(2 * _).reduce { case (x, y) => x }
     assert(result === 2)
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     val TASK_INDEX = 0
     assert(listener.startedTasks.contains(TASK_INDEX))
     assert(listener.startedGettingResultTasks.isEmpty)
@@ -443,7 +440,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
 
     // Post events to all listeners, and wait until the queue is drained
     (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
-    bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    bus.waitUntilEmpty()
 
     // The exception should be caught, and the event should be propagated to other listeners
     assert(jobCounter1.count === 5)
@@ -513,7 +510,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
       // after we post one event, both interrupting listeners should get removed, and the
       // event log queue should be removed
       bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded))
-      bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+      bus.waitUntilEmpty()
       assert(bus.activeQueues() === Set(SHARED_QUEUE, APP_STATUS_QUEUE))
       assert(bus.findListenersByClass[BasicJobCounter]().size === 2)
       assert(bus.findListenersByClass[InterruptingListener]().size === 0)
@@ -522,7 +519,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
 
       // posting more events should be fine, they'll just get processed from the OK queue.
       (0 until 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
-      bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+      bus.waitUntilEmpty()
       assert(counter1.count === 6)
       assert(counter2.count === 6)
 
@@ -532,6 +529,47 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     }
   }
 
+  Seq(true, false).foreach { throwInterruptedException =>
+    val suffix = if (throwInterruptedException) "throw interrupt" else "set Thread interrupted"
+    test(s"SPARK-30285: Fix deadlock in AsyncEventQueue.removeListenerOnError: $suffix") {
+      val LISTENER_BUS_STOP_WAITING_TIMEOUT_MILLIS = 10 * 1000L // 10 seconds
+      val bus = new LiveListenerBus(new SparkConf(false))
+      val counter1 = new BasicJobCounter()
+      val counter2 = new BasicJobCounter()
+      val interruptingListener = new DelayInterruptingJobCounter(throwInterruptedException, 3)
+      bus.addToSharedQueue(counter1)
+      bus.addToSharedQueue(interruptingListener)
+      bus.addToEventLogQueue(counter2)
+      assert(bus.activeQueues() === Set(SHARED_QUEUE, EVENT_LOG_QUEUE))
+      assert(bus.findListenersByClass[BasicJobCounter]().size === 2)
+      assert(bus.findListenersByClass[DelayInterruptingJobCounter]().size === 1)
+
+      bus.start(mockSparkContext, mockMetricsSystem)
+
+      (0 until 5).foreach { jobId =>
+        bus.post(SparkListenerJobEnd(jobId, jobCompletionTime, JobSucceeded))
+      }
+
+      // Call bus.stop in a separate thread, otherwise we will block here until bus is stopped
+      val stoppingThread = new Thread(() => {
+        bus.stop()
+      })
+      stoppingThread.start()
+      // Notify interrupting listener starts to work
+      interruptingListener.sleep = false
+      // Wait for bus to stop
+      stoppingThread.join(LISTENER_BUS_STOP_WAITING_TIMEOUT_MILLIS)
+
+      // Stopping has been finished
+      assert(stoppingThread.isAlive === false)
+      // All queues are removed
+      assert(bus.activeQueues() === Set.empty)
+      assert(counter1.count === 5)
+      assert(counter2.count === 5)
+      assert(interruptingListener.count === 3)
+    }
+  }
+
   test("event queue size can be configued through spark conf") {
     // configure the shared queue size to be 1, event log queue size to be 2,
     // and listner bus event queue size to be 5
@@ -563,7 +601,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
   /**
    * Assert that the given list of numbers has an average that is greater than zero.
    */
-  private def checkNonZeroAvg(m: Iterable[Long], msg: String) {
+  private def checkNonZeroAvg(m: Iterable[Long], msg: String): Unit = {
     assert(m.sum / m.size.toDouble > 0.0, msg)
   }
 
@@ -574,7 +612,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val stageInfos = mutable.Map[StageInfo, Seq[(TaskInfo, TaskMetrics)]]()
     var taskInfoMetrics = mutable.Buffer[(TaskInfo, TaskMetrics)]()
 
-    override def onTaskEnd(task: SparkListenerTaskEnd) {
+    override def onTaskEnd(task: SparkListenerTaskEnd): Unit = {
       val info = task.taskInfo
       val metrics = task.taskMetrics
       if (info != null && metrics != null) {
@@ -582,7 +620,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
       }
     }
 
-    override def onStageCompleted(stage: SparkListenerStageCompleted) {
+    override def onStageCompleted(stage: SparkListenerStageCompleted): Unit = {
       stageInfos(stage.stageInfo) = taskInfoMetrics
       taskInfoMetrics = mutable.Buffer.empty
     }
@@ -606,7 +644,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
       notify()
     }
 
-    override def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult) {
+    override def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult): Unit = {
       startedGettingResultTasks += taskGettingResult.taskInfo.index
     }
   }
@@ -630,6 +668,35 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
       }
     }
   }
+
+  /**
+   * A simple listener that works as follows:
+   * 1. sleep and wait when `sleep` is true
+   * 2. when `sleep` is false, start to work:
+   *    if it is interruptOnJobId, interrupt
+   *    else count SparkListenerJobEnd numbers
+   */
+  private class DelayInterruptingJobCounter(
+      val throwInterruptedException: Boolean,
+      val interruptOnJobId: Int) extends SparkListener {
+    @volatile var sleep = true
+    var count = 0
+
+    override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
+      while (sleep) {
+        Thread.sleep(10)
+      }
+      if (interruptOnJobId == jobEnd.jobId) {
+        if (throwInterruptedException) {
+          throw new InterruptedException("got interrupted")
+        } else {
+          Thread.currentThread().interrupt()
+        }
+      } else {
+        count += 1
+      }
+    }
+  }
 }
 
 // These classes can't be declared inside of the SparkListenerSuite class because we don't want
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
index a6576e0d1c520..c84735c9665a7 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
@@ -57,7 +57,7 @@ class SparkListenerWithClusterSuite extends SparkFunSuite with LocalSparkContext
   private class SaveExecutorInfo extends SparkListener {
     val addedExecutorInfo = mutable.Map[String, ExecutorInfo]()
 
-    override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
+    override def onExecutorAdded(executor: SparkListenerExecutorAdded): Unit = {
       addedExecutorInfo(executor.executorId) = executor.executorInfo
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index c16b552d20891..394a2a9fbf7cb 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -176,7 +176,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
       if (stageAttemptNumber < 2) {
         // Throw FetchFailedException to explicitly trigger stage resubmission. A normal exception
         // will only trigger task resubmission in the same stage.
-        throw new FetchFailedException(null, 0, 0, 0, "Fake")
+        throw new FetchFailedException(null, 0, 0L, 0, 0, "Fake")
       }
       Seq(stageAttemptNumber).iterator
     }.collect()
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
index ae464352da440..2efe6da5e986f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
@@ -25,18 +25,19 @@ import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 import scala.util.control.NonFatal
 
-import com.google.common.util.concurrent.MoreExecutors
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{any, anyLong}
 import org.mockito.Mockito.{spy, times, verify}
+import org.scalatest.Assertions._
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
+import org.apache.spark.TaskState.TaskState
 import org.apache.spark.TestUtils.JavaSourceFromString
 import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 import org.apache.spark.storage.TaskResultBlockId
-import org.apache.spark.util.{MutableURLClassLoader, RpcUtils, Utils}
+import org.apache.spark.util.{MutableURLClassLoader, RpcUtils, ThreadUtils, Utils}
 
 
 /**
@@ -52,7 +53,7 @@ private class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: Task
   @volatile var removeBlockSuccessfully = false
 
   override def enqueueSuccessfulTask(
-    taskSetManager: TaskSetManager, tid: Long, serializedData: ByteBuffer) {
+    taskSetManager: TaskSetManager, tid: Long, serializedData: ByteBuffer): Unit = {
     if (!removedResult) {
       // Only remove the result once, since we'd like to test the case where the task eventually
       // succeeds.
@@ -78,6 +79,16 @@ private class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: Task
   }
 }
 
+private class DummyTaskSchedulerImpl(sc: SparkContext)
+  extends TaskSchedulerImpl(sc, 1, true) {
+  override def handleFailedTask(
+      taskSetManager: TaskSetManager,
+      tid: Long,
+      taskState: TaskState,
+      reason: TaskFailedReason): Unit = {
+    // do nothing
+  }
+}
 
 /**
  * A [[TaskResultGetter]] that stores the [[DirectTaskResult]]s it receives from executors
@@ -87,7 +98,7 @@ private class MyTaskResultGetter(env: SparkEnv, scheduler: TaskSchedulerImpl)
   extends TaskResultGetter(env, scheduler) {
 
   // Use the current thread so we can access its results synchronously
-  protected override val getTaskResultExecutor = MoreExecutors.sameThreadExecutor()
+  protected override val getTaskResultExecutor = ThreadUtils.sameThreadExecutorService
 
   // DirectTaskResults that we receive from the executors
   private val _taskResults = new ArrayBuffer[DirectTaskResult[_]]
@@ -130,6 +141,31 @@ class TaskResultGetterSuite extends SparkFunSuite with BeforeAndAfter with Local
       "Expect result to be removed from the block manager.")
   }
 
+  test("handling total size of results larger than maxResultSize") {
+    sc = new SparkContext("local", "test", conf)
+    val scheduler = new DummyTaskSchedulerImpl(sc)
+    val spyScheduler = spy(scheduler)
+    val resultGetter = new TaskResultGetter(sc.env, spyScheduler)
+    scheduler.taskResultGetter = resultGetter
+    val myTsm = new TaskSetManager(spyScheduler, FakeTask.createTaskSet(2), 1) {
+      // always returns false
+      override def canFetchMoreResults(size: Long): Boolean = false
+    }
+    val indirectTaskResult = IndirectTaskResult(TaskResultBlockId(0), 0)
+    val directTaskResult = new DirectTaskResult(ByteBuffer.allocate(0), Nil, Array())
+    val ser = sc.env.closureSerializer.newInstance()
+    val serializedIndirect = ser.serialize(indirectTaskResult)
+    val serializedDirect = ser.serialize(directTaskResult)
+    resultGetter.enqueueSuccessfulTask(myTsm, 0, serializedDirect)
+    resultGetter.enqueueSuccessfulTask(myTsm, 1, serializedIndirect)
+    eventually(timeout(1.second)) {
+      verify(spyScheduler, times(1)).handleFailedTask(
+        myTsm, 0, TaskState.KILLED, TaskKilled("Tasks result size has exceeded maxResultSize"))
+      verify(spyScheduler, times(1)).handleFailedTask(
+        myTsm, 1, TaskState.KILLED, TaskKilled("Tasks result size has exceeded maxResultSize"))
+    }
+  }
+
   test("task retried if result missing from block manager") {
     // Set the maximum number of task failures to > 0, so that the task set isn't aborted
     // after the result is missing.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index cac6285e58417..e7ecf847ff4f4 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -26,7 +26,7 @@ import org.mockito.ArgumentMatchers.{any, anyInt, anyString, eq => meq}
 import org.mockito.Mockito.{atLeast, atMost, never, spy, times, verify, when}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
@@ -36,9 +36,9 @@ import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.util.ManualClock
 
 class FakeSchedulerBackend extends SchedulerBackend {
-  def start() {}
-  def stop() {}
-  def reviveOffers() {}
+  def start(): Unit = {}
+  def stop(): Unit = {}
+  def reviveOffers(): Unit = {}
   def defaultParallelism(): Int = 1
   def maxNumConcurrentTasks(): Int = 0
 }
@@ -228,19 +228,19 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       taskScheduler.taskSetManagerForAttempt(taskset.stageId, taskset.stageAttemptId).get.isZombie
     }
 
-    val attempt1 = FakeTask.createTaskSet(1, 0)
+    val attempt1 = FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 0)
     taskScheduler.submitTasks(attempt1)
     // The first submitted taskset is active
     assert(!isTasksetZombie(attempt1))
 
-    val attempt2 = FakeTask.createTaskSet(1, 1)
+    val attempt2 = FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 1)
     taskScheduler.submitTasks(attempt2)
     // The first submitted taskset is zombie now
     assert(isTasksetZombie(attempt1))
     // The newly submitted taskset is active
     assert(!isTasksetZombie(attempt2))
 
-    val attempt3 = FakeTask.createTaskSet(1, 2)
+    val attempt3 = FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 2)
     taskScheduler.submitTasks(attempt3)
     // The first submitted taskset remains zombie
     assert(isTasksetZombie(attempt1))
@@ -255,7 +255,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
     val numFreeCores = 1
     val workerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", numFreeCores))
-    val attempt1 = FakeTask.createTaskSet(10)
+    val attempt1 = FakeTask.createTaskSet(10, stageId = 0, stageAttemptId = 0)
 
     // submit attempt 1, offer some resources, some tasks get scheduled
     taskScheduler.submitTasks(attempt1)
@@ -271,7 +271,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(0 === taskDescriptions2.length)
 
     // if we schedule another attempt for the same stage, it should get scheduled
-    val attempt2 = FakeTask.createTaskSet(10, 1)
+    val attempt2 = FakeTask.createTaskSet(10, stageId = 0, stageAttemptId = 1)
 
     // submit attempt 2, offer some resources, some tasks get scheduled
     taskScheduler.submitTasks(attempt2)
@@ -287,7 +287,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
     val numFreeCores = 10
     val workerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", numFreeCores))
-    val attempt1 = FakeTask.createTaskSet(10)
+    val attempt1 = FakeTask.createTaskSet(10, stageId = 0, stageAttemptId = 0)
 
     // submit attempt 1, offer some resources, some tasks get scheduled
     taskScheduler.submitTasks(attempt1)
@@ -303,7 +303,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(0 === taskDescriptions2.length)
 
     // submit attempt 2
-    val attempt2 = FakeTask.createTaskSet(10, 1)
+    val attempt2 = FakeTask.createTaskSet(10, stageId = 0, stageAttemptId = 1)
     taskScheduler.submitTasks(attempt2)
 
     // attempt 1 finished (this can happen even if it was marked zombie earlier -- all tasks were
@@ -497,7 +497,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("abort stage when all executors are blacklisted and we cannot acquire new executor") {
     taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
-    val taskSet = FakeTask.createTaskSet(numTasks = 10, stageAttemptId = 0)
+    val taskSet = FakeTask.createTaskSet(numTasks = 10)
     taskScheduler.submitTasks(taskSet)
     val tsm = stageToMockTaskSetManager(0)
 
@@ -539,7 +539,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "0")
 
     // We have only 1 task remaining with 1 executor
-    val taskSet = FakeTask.createTaskSet(numTasks = 1, stageAttemptId = 0)
+    val taskSet = FakeTask.createTaskSet(numTasks = 1)
     taskScheduler.submitTasks(taskSet)
     val tsm = stageToMockTaskSetManager(0)
 
@@ -571,7 +571,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "10")
 
     // We have only 1 task remaining with 1 executor
-    val taskSet = FakeTask.createTaskSet(numTasks = 1, stageAttemptId = 0)
+    val taskSet = FakeTask.createTaskSet(numTasks = 1)
     taskScheduler.submitTasks(taskSet)
     val tsm = stageToMockTaskSetManager(0)
 
@@ -910,7 +910,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
   test("SPARK-16106 locality levels updated if executor added to existing host") {
     val taskScheduler = setupScheduler()
 
-    taskScheduler.submitTasks(FakeTask.createTaskSet(2, 0,
+    taskScheduler.submitTasks(FakeTask.createTaskSet(2, stageId = 0, stageAttemptId = 0,
       (0 until 2).map { _ => Seq(TaskLocation("host0", "executor2")) }: _*
     ))
 
@@ -948,7 +948,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
   test("scheduler checks for executors that can be expired from blacklist") {
     taskScheduler = setupScheduler()
 
-    taskScheduler.submitTasks(FakeTask.createTaskSet(1, 0))
+    taskScheduler.submitTasks(FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 0))
     taskScheduler.resourceOffers(IndexedSeq(
       new WorkerOffer("executor0", "host0", 1)
     )).flatten
@@ -962,8 +962,8 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     taskScheduler.initialize(new FakeSchedulerBackend)
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
     new DAGScheduler(sc, taskScheduler) {
-      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
-      override def executorAdded(execId: String, host: String) {}
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {}
+      override def executorAdded(execId: String, host: String): Unit = {}
     }
 
     val e0Offers = IndexedSeq(WorkerOffer("executor0", "host0", 1))
@@ -993,8 +993,8 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     taskScheduler.initialize(new FakeSchedulerBackend)
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
     new DAGScheduler(sc, taskScheduler) {
-      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
-      override def executorAdded(execId: String, host: String) {}
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {}
+      override def executorAdded(execId: String, host: String): Unit = {}
     }
 
     val e0Offers = IndexedSeq(WorkerOffer("executor0", "host0", 1))
@@ -1044,8 +1044,8 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
     new DAGScheduler(sc, taskScheduler) {
-      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
-      override def executorAdded(execId: String, host: String) {}
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {}
+      override def executorAdded(execId: String, host: String): Unit = {}
     }
     taskScheduler.initialize(new FakeSchedulerBackend)
 
@@ -1084,8 +1084,8 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
     new DAGScheduler(sc, taskScheduler) {
-      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
-      override def executorAdded(execId: String, host: String) {}
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {}
+      override def executorAdded(execId: String, host: String): Unit = {}
     }
     taskScheduler.initialize(new FakeSchedulerBackend)
     // make an offer on the preferred host so the scheduler knows its alive.  This is necessary
@@ -1154,6 +1154,29 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(3 === taskDescriptions.length)
   }
 
+  test("SPARK-29263: barrier TaskSet can't schedule when higher prio taskset takes the slots") {
+    val taskCpus = 2
+    val taskScheduler = setupSchedulerWithMaster(
+      s"local[$taskCpus]",
+      config.CPUS_PER_TASK.key -> taskCpus.toString)
+
+    val numFreeCores = 3
+    val workerOffers = IndexedSeq(
+      new WorkerOffer("executor0", "host0", numFreeCores, Some("192.168.0.101:49625")),
+      new WorkerOffer("executor1", "host1", numFreeCores, Some("192.168.0.101:49627")),
+      new WorkerOffer("executor2", "host2", numFreeCores, Some("192.168.0.101:49629")))
+    val barrier = FakeTask.createBarrierTaskSet(3, stageId = 0, stageAttemptId = 0, priority = 1)
+    val highPrio = FakeTask.createTaskSet(1, stageId = 1, stageAttemptId = 0, priority = 0)
+
+    // submit highPrio and barrier taskSet
+    taskScheduler.submitTasks(highPrio)
+    taskScheduler.submitTasks(barrier)
+    val taskDescriptions = taskScheduler.resourceOffers(workerOffers).flatten
+    // it schedules the highPrio task first, and then will not have enough slots to schedule
+    // the barrier taskset
+    assert(1 === taskDescriptions.length)
+  }
+
   test("cancelTasks shall kill all the running tasks and fail the stage") {
     val taskScheduler = setupScheduler()
 
@@ -1169,7 +1192,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       }
     })
 
-    val attempt1 = FakeTask.createTaskSet(10, 0)
+    val attempt1 = FakeTask.createTaskSet(10)
     taskScheduler.submitTasks(attempt1)
 
     val workerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", 1),
@@ -1200,7 +1223,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       }
     })
 
-    val attempt1 = FakeTask.createTaskSet(10, 0)
+    val attempt1 = FakeTask.createTaskSet(10)
     taskScheduler.submitTasks(attempt1)
 
     val workerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", 1),
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
index b3bc76687ce1b..ed97a4c206ca3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 import org.mockito.ArgumentMatchers.isA
 import org.mockito.Mockito.{never, verify}
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index fedfa083e8d8f..b740e357903a2 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -21,16 +21,22 @@ import java.util.{Properties, Random}
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
+import scala.concurrent.duration._
 
+import org.apache.hadoop.fs.FileAlreadyExistsException
 import org.mockito.ArgumentMatchers.{any, anyBoolean, anyInt, anyString}
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
+import org.scalatest.Assertions._
+import org.scalatest.PrivateMethodTester
+import org.scalatest.concurrent.Eventually
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
+import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.serializer.SerializerInstance
 import org.apache.spark.storage.BlockManagerId
 import org.apache.spark.util.{AccumulatorV2, ManualClock}
@@ -38,7 +44,7 @@ import org.apache.spark.util.{AccumulatorV2, ManualClock}
 class FakeDAGScheduler(sc: SparkContext, taskScheduler: FakeTaskScheduler)
   extends DAGScheduler(sc) {
 
-  override def taskStarted(task: Task[_], taskInfo: TaskInfo) {
+  override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {
     taskScheduler.startedTasks += taskInfo.index
   }
 
@@ -48,13 +54,13 @@ class FakeDAGScheduler(sc: SparkContext, taskScheduler: FakeTaskScheduler)
       result: Any,
       accumUpdates: Seq[AccumulatorV2[_, _]],
       metricPeaks: Array[Long],
-      taskInfo: TaskInfo) {
+      taskInfo: TaskInfo): Unit = {
     taskScheduler.endedTasks(taskInfo.index) = reason
   }
 
-  override def executorAdded(execId: String, host: String) {}
+  override def executorAdded(execId: String, host: String): Unit = {}
 
-  override def executorLost(execId: String, reason: ExecutorLossReason) {}
+  override def executorLost(execId: String, reason: ExecutorLossReason): Unit = {}
 
   override def taskSetFailed(
       taskSet: TaskSet,
@@ -74,13 +80,13 @@ object FakeRackUtil {
   var numBatchInvocation = 0
   var numSingleHostInvocation = 0
 
-  def cleanUp() {
+  def cleanUp(): Unit = {
     hostToRack.clear()
     numBatchInvocation = 0
     numSingleHostInvocation = 0
   }
 
-  def assignHostToRack(host: String, rack: String) {
+  def assignHostToRack(host: String, rack: String): Unit = {
     hostToRack(host) = rack
   }
 
@@ -124,10 +130,10 @@ class FakeTaskScheduler(sc: SparkContext, liveExecutors: (String, String)* /* ex
 
   dagScheduler = new FakeDAGScheduler(sc, this)
 
-  def removeExecutor(execId: String) {
+  def removeExecutor(execId: String): Unit = {
     executors -= execId
     val host = executorIdToHost.get(execId)
-    assert(host != None)
+    assert(host.isDefined)
     val hostId = host.get
     val executorsOnHost = hostToExecutors(hostId)
     executorsOnHost -= execId
@@ -149,7 +155,7 @@ class FakeTaskScheduler(sc: SparkContext, liveExecutors: (String, String)* /* ex
     hostsByRack.get(rack) != None
   }
 
-  def addExecutor(execId: String, host: String) {
+  def addExecutor(execId: String, host: String): Unit = {
     executors.put(execId, host)
     val executorsOnHost = hostToExecutors.getOrElseUpdate(host, new mutable.HashSet[String])
     executorsOnHost += execId
@@ -177,7 +183,12 @@ class LargeTask(stageId: Int) extends Task[Array[Byte]](stageId, 0, 0) {
   override def preferredLocations: Seq[TaskLocation] = Seq[TaskLocation]()
 }
 
-class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logging {
+class TaskSetManagerSuite
+  extends SparkFunSuite
+  with LocalSparkContext
+  with PrivateMethodTester
+  with Eventually
+  with Logging {
   import TaskLocality.{ANY, PROCESS_LOCAL, NO_PREF, NODE_LOCAL, RACK_LOCAL}
 
   private val conf = new SparkConf
@@ -1262,7 +1273,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
 
     // now fail those tasks
     tsmSpy.handleFailedTask(taskDescs(0).taskId, TaskState.FAILED,
-      FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0, 0, "ignored"))
+      FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0L, 0, 0, "ignored"))
     tsmSpy.handleFailedTask(taskDescs(1).taskId, TaskState.FAILED,
       ExecutorLostFailure(taskDescs(1).executorId, exitCausedByApp = false, reason = None))
     tsmSpy.handleFailedTask(taskDescs(2).taskId, TaskState.FAILED,
@@ -1302,7 +1313,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
 
     // Fail the task with fetch failure
     tsm.handleFailedTask(taskDescs(0).taskId, TaskState.FAILED,
-      FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0, 0, "ignored"))
+      FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0L, 0, 0, "ignored"))
 
     assert(blacklistTracker.isNodeBlacklisted("host1"))
   }
@@ -1775,4 +1786,208 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     assert(!manager.checkSpeculatableTasks(0))
     assert(manager.resourceOffer("exec1", "host1", ANY).isEmpty)
   }
+
+  private def testSpeculationDurationSetup(
+      speculationThresholdOpt: Option[String],
+      speculationQuantile: Double,
+      numTasks: Int,
+      numExecutorCores: Int,
+      numCoresPerTask: Int): (TaskSetManager, ManualClock) = {
+    sc = new SparkContext("local", "test")
+    sc.conf.set(config.SPECULATION_ENABLED, true)
+    sc.conf.set(config.SPECULATION_QUANTILE.key, speculationQuantile.toString)
+    // Set the number of slots per executor
+    sc.conf.set(config.EXECUTOR_CORES.key, numExecutorCores.toString)
+    sc.conf.set(config.CPUS_PER_TASK.key, numCoresPerTask.toString)
+    if (speculationThresholdOpt.isDefined) {
+      sc.conf.set(config.SPECULATION_TASK_DURATION_THRESHOLD.key, speculationThresholdOpt.get)
+    }
+    sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+    // Create a task set with the given number of tasks
+    val taskSet = FakeTask.createTaskSet(numTasks)
+    val clock = new ManualClock()
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
+    manager.isZombie = false
+
+    // Offer resources for the task to start
+    for (i <- 1 to numTasks) {
+      manager.resourceOffer(s"exec$i", s"host$i", NO_PREF)
+    }
+    (manager, clock)
+  }
+
+  private def testSpeculationDurationThreshold(
+      speculationThresholdProvided: Boolean,
+      numTasks: Int,
+      numSlots: Int): Unit = {
+    val (manager, clock) = testSpeculationDurationSetup(
+      // Set the threshold to be 60 minutes
+      if (speculationThresholdProvided) Some("60min") else None,
+      // Set the quantile to be 1.0 so that regular speculation would not be triggered
+      speculationQuantile = 1.0,
+      numTasks,
+      numSlots,
+      numCoresPerTask = 1
+    )
+
+    // if the time threshold has not been exceeded, no speculative run should be triggered
+    clock.advance(1000*60*60)
+    assert(!manager.checkSpeculatableTasks(0))
+    assert(sched.speculativeTasks.size == 0)
+
+    // Now the task should have been running for 60 minutes and 1 second
+    clock.advance(1000)
+    if (speculationThresholdProvided && numSlots >= numTasks) {
+      assert(manager.checkSpeculatableTasks(0))
+      assert(sched.speculativeTasks.size == numTasks)
+      // Should not submit duplicated tasks
+      assert(!manager.checkSpeculatableTasks(0))
+      assert(sched.speculativeTasks.size == numTasks)
+    } else {
+      // If the feature flag is turned off, or the stage contains too many tasks
+      assert(!manager.checkSpeculatableTasks(0))
+      assert(sched.speculativeTasks.size == 0)
+    }
+  }
+
+  Seq(1, 2).foreach { numTasks =>
+    test("SPARK-29976 when a speculation time threshold is provided, should speculative " +
+      s"run the task even if there are not enough successful runs, total tasks: $numTasks") {
+      testSpeculationDurationThreshold(true, numTasks, numTasks)
+    }
+
+    test("SPARK-29976: when the speculation time threshold is not provided," +
+      s"don't speculative run if there are not enough successful runs, total tasks: $numTasks") {
+      testSpeculationDurationThreshold(false, numTasks, numTasks)
+    }
+  }
+
+  test("SPARK-29976 when a speculation time threshold is provided, should not speculative " +
+      "if there are too many tasks in the stage even though time threshold is provided") {
+    testSpeculationDurationThreshold(true, 2, 1)
+  }
+
+  test("SPARK-29976 Regular speculation configs should still take effect even when a " +
+      "threshold is provided") {
+    val (manager, clock) = testSpeculationDurationSetup(
+      Some("60min"),
+      speculationQuantile = 0.5,
+      numTasks = 2,
+      numExecutorCores = 2,
+      numCoresPerTask = 1
+    )
+
+    // Task duration can't be 0, advance 1 sec
+    clock.advance(1000)
+    // Mark one of the task succeeded, which should satisfy the quantile
+    manager.handleSuccessfulTask(0, createTaskResult(0))
+    // Advance 1 more second so the remaining task takes longer than medium but doesn't satisfy the
+    // duration threshold yet
+    clock.advance(1000)
+    assert(manager.checkSpeculatableTasks(0))
+    assert(sched.speculativeTasks.size == 1)
+  }
+
+  test("SPARK-30417 when spark.task.cpus is greater than spark.executor.cores due to " +
+    "standalone settings, speculate if there is only one task in the stage") {
+    val (manager, clock) = testSpeculationDurationSetup(
+      Some("60min"),
+      // Set the quantile to be 1.0 so that regular speculation would not be triggered
+      speculationQuantile = 1.0,
+      numTasks = 1,
+      numExecutorCores = 1,
+      numCoresPerTask = 2
+    )
+
+    clock.advance(1000*60*60)
+    assert(!manager.checkSpeculatableTasks(0))
+    assert(sched.speculativeTasks.size == 0)
+    // Now the task should have been running for 60 minutes and 1 second
+    clock.advance(1000)
+    assert(manager.checkSpeculatableTasks(0))
+    assert(sched.speculativeTasks.size == 1)
+  }
+
+  test("TaskOutputFileAlreadyExistException lead to task set abortion") {
+    sc = new SparkContext("local", "test")
+    sched = new FakeTaskScheduler(sc, ("exec1", "host1"))
+    val taskSet = FakeTask.createTaskSet(1)
+    val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES)
+    assert(sched.taskSetsFailed.isEmpty)
+
+    val offerResult = manager.resourceOffer("exec1", "host1", ANY)
+    assert(offerResult.isDefined,
+      "Expect resource offer on iteration 0 to return a task")
+    assert(offerResult.get.index === 0)
+    val reason = new ExceptionFailure(
+      new TaskOutputFileAlreadyExistException(
+        new FileAlreadyExistsException("file already exists")),
+      Seq.empty[AccumulableInfo])
+    manager.handleFailedTask(offerResult.get.taskId, TaskState.FAILED, reason)
+    assert(sched.taskSetsFailed.contains(taskSet.id))
+  }
+
+  test("SPARK-30359: don't clean executorsPendingToRemove " +
+    "at the beginning of CoarseGrainedSchedulerBackend.reset") {
+    val conf = new SparkConf()
+      // use local-cluster mode in order to get CoarseGrainedSchedulerBackend
+      .setMaster("local-cluster[2, 1, 2048]")
+      // allow to set up at most two executors
+      .set("spark.cores.max", "2")
+      .setAppName("CoarseGrainedSchedulerBackend.reset")
+    sc = new SparkContext(conf)
+    val sched = sc.taskScheduler
+    val backend = sc.schedulerBackend.asInstanceOf[CoarseGrainedSchedulerBackend]
+
+    TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
+
+    val tasks = Array.tabulate[Task[_]](2)(partition => new FakeLongTasks(stageId = 0, partition))
+    val taskSet: TaskSet = new TaskSet(tasks, stageId = 0, stageAttemptId = 0, priority = 0, null)
+    val stageId = taskSet.stageId
+    val stageAttemptId = taskSet.stageAttemptId
+    sched.submitTasks(taskSet)
+    val taskSetManagers =
+      PrivateMethod[mutable.HashMap[Int, mutable.HashMap[Int, TaskSetManager]]](
+        Symbol("taskSetsByStageIdAndAttempt"))
+    // get the TaskSetManager
+    val manager = sched.invokePrivate(taskSetManagers()).get(stageId).get(stageAttemptId)
+
+    val (task0, task1) = eventually(timeout(10.seconds), interval(100.milliseconds)) {
+      (manager.taskInfos(0), manager.taskInfos(1))
+    }
+
+    val (taskId0, index0, exec0) = (task0.taskId, task0.index, task0.executorId)
+    val (taskId1, index1, exec1) = (task1.taskId, task1.index, task1.executorId)
+    // set up two running tasks
+    assert(manager.taskInfos(taskId0).running)
+    assert(manager.taskInfos(taskId1).running)
+
+    val numFailures = PrivateMethod[Array[Int]](Symbol("numFailures"))
+    // no task failures yet
+    assert(manager.invokePrivate(numFailures())(index0) === 0)
+    assert(manager.invokePrivate(numFailures())(index1) === 0)
+
+    // let exec1 count task failures but exec0 doesn't
+    backend.executorsPendingToRemove(exec0) = true
+    backend.executorsPendingToRemove(exec1) = false
+
+    backend.reset()
+
+    eventually(timeout(10.seconds), interval(100.milliseconds)) {
+      // executorsPendingToRemove should eventually be empty after reset()
+      assert(backend.executorsPendingToRemove.isEmpty)
+      assert(manager.invokePrivate(numFailures())(index0) === 0)
+      assert(manager.invokePrivate(numFailures())(index1) === 1)
+    }
+  }
+}
+
+class FakeLongTasks(stageId: Int, partitionId: Int) extends FakeTask(stageId, partitionId) {
+
+  override def runTask(context: TaskContext): Int = {
+    while (true) {
+      Thread.sleep(10000)
+    }
+    0
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
new file mode 100644
index 0000000000000..15733b0d932ec
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import java.util.concurrent.Semaphore
+
+import scala.concurrent.TimeoutException
+import scala.concurrent.duration._
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite}
+import org.apache.spark.internal.config
+import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend
+import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils}
+
+class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext {
+
+  override def beforeEach(): Unit = {
+    val conf = new SparkConf().setAppName("test").setMaster("local")
+      .set(config.Worker.WORKER_DECOMMISSION_ENABLED, true)
+
+    sc = new SparkContext("local-cluster[2, 1, 1024]", "test", conf)
+  }
+
+  test("verify task with no decommissioning works as expected") {
+    val input = sc.parallelize(1 to 10)
+    input.count()
+    val sleepyRdd = input.mapPartitions{ x =>
+      Thread.sleep(100)
+      x
+    }
+    assert(sleepyRdd.count() === 10)
+  }
+
+  test("verify a task with all workers decommissioned succeeds") {
+    val input = sc.parallelize(1 to 10)
+    // Do a count to wait for the executors to be registered.
+    input.count()
+    val sleepyRdd = input.mapPartitions{ x =>
+      Thread.sleep(50)
+      x
+    }
+    // Listen for the job
+    val sem = new Semaphore(0)
+    sc.addSparkListener(new SparkListener {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
+        sem.release()
+      }
+    })
+    // Start the task.
+    val asyncCount = sleepyRdd.countAsync()
+    // Wait for the job to have started
+    sem.acquire(1)
+    // Decommission all the executors, this should not halt the current task.
+    // decom.sh message passing is tested manually.
+    val sched = sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend]
+    val execs = sched.getExecutorIds()
+    execs.foreach(execId => sched.decommissionExecutor(execId))
+    val asyncCountResult = ThreadUtils.awaitResult(asyncCount, 2.seconds)
+    assert(asyncCountResult === 10)
+    // Try and launch task after decommissioning, this should fail
+    val postDecommissioned = input.map(x => x)
+    val postDecomAsyncCount = postDecommissioned.countAsync()
+    val thrown = intercept[java.util.concurrent.TimeoutException]{
+      val result = ThreadUtils.awaitResult(postDecomAsyncCount, 2.seconds)
+    }
+    assert(postDecomAsyncCount.isCompleted === false,
+      "After exec decommission new task could not launch")
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
index d3feb35537b34..3596a9ebb1f5a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
@@ -27,15 +27,18 @@ import org.mockito.Mockito.{doAnswer, mock, when}
 import org.apache.spark._
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.config._
+import org.apache.spark.resource.ResourceProfile.{DEFAULT_RESOURCE_PROFILE_ID, UNKNOWN_RESOURCE_PROFILE_ID}
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.storage._
 import org.apache.spark.util.ManualClock
 
 class ExecutorMonitorSuite extends SparkFunSuite {
 
-  private val idleTimeoutMs = TimeUnit.SECONDS.toMillis(60L)
-  private val storageTimeoutMs = TimeUnit.SECONDS.toMillis(120L)
-  private val shuffleTimeoutMs = TimeUnit.SECONDS.toMillis(240L)
+  private val idleTimeoutNs = TimeUnit.SECONDS.toNanos(60L)
+  private val storageTimeoutNs = TimeUnit.SECONDS.toNanos(120L)
+  private val shuffleTimeoutNs = TimeUnit.SECONDS.toNanos(240L)
 
   private val conf = new SparkConf()
     .set(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, "60s")
@@ -47,6 +50,9 @@ class ExecutorMonitorSuite extends SparkFunSuite {
   private var client: ExecutorAllocationClient = _
   private var clock: ManualClock = _
 
+  private val execInfo = new ExecutorInfo("host1", 1, Map.empty,
+    Map.empty, Map.empty, DEFAULT_RESOURCE_PROFILE_ID)
+
   // List of known executors. Allows easily mocking which executors are alive without
   // having to use mockito APIs directly in each test.
   private val knownExecs = mutable.HashSet[String]()
@@ -64,10 +70,12 @@ class ExecutorMonitorSuite extends SparkFunSuite {
 
   test("basic executor timeout") {
     knownExecs += "1"
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     assert(monitor.executorCount === 1)
     assert(monitor.isExecutorIdle("1"))
     assert(monitor.timedOutExecutors(idleDeadline) === Seq("1"))
+    assert(monitor.executorCountWithResourceProfile(DEFAULT_RESOURCE_PROFILE_ID) === 1)
+    assert(monitor.getResourceProfileId("1") === DEFAULT_RESOURCE_PROFILE_ID)
   }
 
   test("SPARK-4951, SPARK-26927: handle out of order task start events") {
@@ -75,26 +83,38 @@ class ExecutorMonitorSuite extends SparkFunSuite {
 
     monitor.onTaskStart(SparkListenerTaskStart(1, 1, taskInfo("1", 1)))
     assert(monitor.executorCount === 1)
+    assert(monitor.executorCountWithResourceProfile(UNKNOWN_RESOURCE_PROFILE_ID) === 1)
 
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     assert(monitor.executorCount === 1)
+    assert(monitor.executorCountWithResourceProfile(UNKNOWN_RESOURCE_PROFILE_ID) === 0)
+    assert(monitor.executorCountWithResourceProfile(DEFAULT_RESOURCE_PROFILE_ID) === 1)
+    assert(monitor.getResourceProfileId("1") === DEFAULT_RESOURCE_PROFILE_ID)
 
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", execInfo))
     assert(monitor.executorCount === 2)
+    assert(monitor.executorCountWithResourceProfile(DEFAULT_RESOURCE_PROFILE_ID) === 2)
+    assert(monitor.getResourceProfileId("2") === DEFAULT_RESOURCE_PROFILE_ID)
 
     monitor.onExecutorRemoved(SparkListenerExecutorRemoved(clock.getTimeMillis(), "2", null))
     assert(monitor.executorCount === 1)
+    assert(monitor.executorCountWithResourceProfile(DEFAULT_RESOURCE_PROFILE_ID) === 1)
 
     knownExecs -= "2"
 
     monitor.onTaskStart(SparkListenerTaskStart(1, 1, taskInfo("2", 2)))
     assert(monitor.executorCount === 1)
+    assert(monitor.executorCountWithResourceProfile(DEFAULT_RESOURCE_PROFILE_ID) === 1)
+
+    monitor.onExecutorRemoved(SparkListenerExecutorRemoved(clock.getTimeMillis(), "1", null))
+    assert(monitor.executorCount === 0)
+    assert(monitor.executorCountWithResourceProfile(DEFAULT_RESOURCE_PROFILE_ID) === 0)
   }
 
   test("track tasks running on executor") {
     knownExecs += "1"
 
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     monitor.onTaskStart(SparkListenerTaskStart(1, 1, taskInfo("1", 1)))
     assert(!monitor.isExecutorIdle("1"))
 
@@ -111,13 +131,13 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     monitor.onTaskEnd(SparkListenerTaskEnd(1, 1, "foo", Success, taskInfo("1", 1),
       new ExecutorMetrics, null))
     assert(monitor.isExecutorIdle("1"))
-    assert(monitor.timedOutExecutors(clock.getTimeMillis()).isEmpty)
-    assert(monitor.timedOutExecutors(clock.getTimeMillis() + idleTimeoutMs + 1) === Seq("1"))
+    assert(monitor.timedOutExecutors(clock.nanoTime()).isEmpty)
+    assert(monitor.timedOutExecutors(clock.nanoTime() + idleTimeoutNs + 1) === Seq("1"))
   }
 
   test("use appropriate time out depending on whether blocks are stored") {
     knownExecs += "1"
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     assert(monitor.isExecutorIdle("1"))
     assert(monitor.timedOutExecutors(idleDeadline) === Seq("1"))
 
@@ -139,7 +159,7 @@ class ExecutorMonitorSuite extends SparkFunSuite {
   }
 
   test("keeps track of stored blocks for each rdd and split") {
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
 
     monitor.onBlockUpdated(rddUpdate(1, 0, "1"))
     assert(monitor.timedOutExecutors(idleDeadline).isEmpty)
@@ -166,27 +186,27 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     // originally went idle.
     clock.setTime(idleDeadline)
     monitor.onUnpersistRDD(SparkListenerUnpersistRDD(2))
-    assert(monitor.timedOutExecutors(clock.getTimeMillis()) === Seq("1"))
+    assert(monitor.timedOutExecutors(clock.nanoTime()) === Seq("1"))
   }
 
   test("handle timeouts correctly with multiple executors") {
     knownExecs ++= Set("1", "2", "3")
 
     // start exec 1 at 0s (should idle time out at 60s)
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     assert(monitor.isExecutorIdle("1"))
 
     // start exec 2 at 30s, store a block (should idle time out at 150s)
     clock.setTime(TimeUnit.SECONDS.toMillis(30))
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", execInfo))
     monitor.onBlockUpdated(rddUpdate(1, 0, "2"))
     assert(monitor.isExecutorIdle("2"))
     assert(!monitor.timedOutExecutors(idleDeadline).contains("2"))
 
     // start exec 3 at 60s (should idle timeout at 120s, exec 1 should time out)
     clock.setTime(TimeUnit.SECONDS.toMillis(60))
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "3", null))
-    assert(monitor.timedOutExecutors(clock.getTimeMillis()) === Seq("1"))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "3", execInfo))
+    assert(monitor.timedOutExecutors(clock.nanoTime()) === Seq("1"))
 
     // store block on exec 3 (should now idle time out at 180s)
     monitor.onBlockUpdated(rddUpdate(1, 0, "3"))
@@ -196,16 +216,16 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     // advance to 140s, remove block from exec 3 (time out immediately)
     clock.setTime(TimeUnit.SECONDS.toMillis(140))
     monitor.onBlockUpdated(rddUpdate(1, 0, "3", level = StorageLevel.NONE))
-    assert(monitor.timedOutExecutors(clock.getTimeMillis()).toSet === Set("1", "3"))
+    assert(monitor.timedOutExecutors(clock.nanoTime()).toSet === Set("1", "3"))
 
     // advance to 150s, now exec 2 should time out
     clock.setTime(TimeUnit.SECONDS.toMillis(150))
-    assert(monitor.timedOutExecutors(clock.getTimeMillis()).toSet === Set("1", "2", "3"))
+    assert(monitor.timedOutExecutors(clock.nanoTime()).toSet === Set("1", "2", "3"))
   }
 
   test("SPARK-27677: don't track blocks stored on disk when using shuffle service") {
     // First make sure that blocks on disk are counted when no shuffle service is available.
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     monitor.onBlockUpdated(rddUpdate(1, 0, "1", level = StorageLevel.DISK_ONLY))
     assert(monitor.timedOutExecutors(idleDeadline).isEmpty)
     assert(monitor.timedOutExecutors(storageDeadline) ===  Seq("1"))
@@ -213,7 +233,7 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     conf.set(SHUFFLE_SERVICE_ENABLED, true).set(SHUFFLE_SERVICE_FETCH_RDD_ENABLED, true)
     monitor = new ExecutorMonitor(conf, client, null, clock)
 
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     monitor.onBlockUpdated(rddUpdate(1, 0, "1", level = StorageLevel.MEMORY_ONLY))
     monitor.onBlockUpdated(rddUpdate(1, 1, "1", level = StorageLevel.MEMORY_ONLY))
     assert(monitor.timedOutExecutors(idleDeadline).isEmpty)
@@ -236,25 +256,28 @@ class ExecutorMonitorSuite extends SparkFunSuite {
   test("track executors pending for removal") {
     knownExecs ++= Set("1", "2", "3")
 
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", null))
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "3", null))
+    val execInfoRp1 = new ExecutorInfo("host1", 1, Map.empty,
+      Map.empty, Map.empty, 1)
+
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", execInfo))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "3", execInfoRp1))
     clock.setTime(idleDeadline)
-    assert(monitor.timedOutExecutors().toSet === Set("1", "2", "3"))
+    assert(monitor.timedOutExecutors().toSet === Set(("1", 0), ("2", 0), ("3", 1)))
     assert(monitor.pendingRemovalCount === 0)
 
     // Notify that only a subset of executors was killed, to mimic the case where the scheduler
     // refuses to kill an executor that is busy for whatever reason the monitor hasn't detected yet.
     monitor.executorsKilled(Seq("1"))
-    assert(monitor.timedOutExecutors().toSet === Set("2", "3"))
+    assert(monitor.timedOutExecutors().toSet === Set(("2", 0), ("3", 1)))
     assert(monitor.pendingRemovalCount === 1)
 
     // Check the timed out executors again so that we're sure they're still timed out when no
     // events happen. This ensures that the monitor doesn't lose track of them.
-    assert(monitor.timedOutExecutors().toSet === Set("2", "3"))
+    assert(monitor.timedOutExecutors().toSet === Set(("2", 0), ("3", 1)))
 
     monitor.onTaskStart(SparkListenerTaskStart(1, 1, taskInfo("2", 1)))
-    assert(monitor.timedOutExecutors().toSet === Set("3"))
+    assert(monitor.timedOutExecutors().toSet === Set(("3", 1)))
 
     monitor.executorsKilled(Seq("3"))
     assert(monitor.pendingRemovalCount === 2)
@@ -263,7 +286,7 @@ class ExecutorMonitorSuite extends SparkFunSuite {
       new ExecutorMetrics, null))
     assert(monitor.timedOutExecutors().isEmpty)
     clock.advance(idleDeadline)
-    assert(monitor.timedOutExecutors().toSet === Set("2"))
+    assert(monitor.timedOutExecutors().toSet === Set(("2", 0)))
   }
 
   test("shuffle block tracking") {
@@ -286,7 +309,7 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     monitor.onJobStart(SparkListenerJobStart(1, clock.getTimeMillis(), Seq(stage1, stage2)))
     monitor.onJobStart(SparkListenerJobStart(2, clock.getTimeMillis(), Seq(stage3, stage4)))
 
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     assert(monitor.timedOutExecutors(idleDeadline) === Seq("1"))
 
     // First a failed task, to make sure it does not count.
@@ -342,7 +365,7 @@ class ExecutorMonitorSuite extends SparkFunSuite {
         throw new IllegalStateException("No event should be sent.")
       }
     }
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     monitor.shuffleCleaned(0)
   }
 
@@ -351,8 +374,8 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     conf.set(DYN_ALLOCATION_SHUFFLE_TRACKING, true).set(SHUFFLE_SERVICE_ENABLED, false)
     monitor = new ExecutorMonitor(conf, client, bus, clock)
 
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", execInfo))
 
     // Two separate jobs with separate shuffles. The first job will only run tasks on
     // executor 1, the second on executor 2. Ensures that jobs finishing don't affect
@@ -401,7 +424,7 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     val stage = stageInfo(1, shuffleId = 0)
     monitor.onJobStart(SparkListenerJobStart(1, clock.getTimeMillis(), Seq(stage)))
     clock.advance(1000L)
-    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", null))
+    monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     monitor.onTaskStart(SparkListenerTaskStart(1, 0, taskInfo("1", 1)))
     monitor.onTaskEnd(SparkListenerTaskEnd(1, 0, "foo", Success, taskInfo("1", 1),
       new ExecutorMetrics, null))
@@ -410,13 +433,14 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     assert(monitor.timedOutExecutors(idleDeadline).isEmpty)
   }
 
-  private def idleDeadline: Long = clock.getTimeMillis() + idleTimeoutMs + 1
-  private def storageDeadline: Long = clock.getTimeMillis() + storageTimeoutMs + 1
-  private def shuffleDeadline: Long = clock.getTimeMillis() + shuffleTimeoutMs + 1
+  private def idleDeadline: Long = clock.nanoTime() + idleTimeoutNs + 1
+  private def storageDeadline: Long = clock.nanoTime() + storageTimeoutNs + 1
+  private def shuffleDeadline: Long = clock.nanoTime() + shuffleTimeoutNs + 1
 
   private def stageInfo(id: Int, shuffleId: Int = -1): StageInfo = {
     new StageInfo(id, 0, s"stage$id", 1, Nil, Nil, "",
-      shuffleDepId = if (shuffleId >= 0) Some(shuffleId) else None)
+      shuffleDepId = if (shuffleId >= 0) Some(shuffleId) else None,
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
   }
 
   private def taskInfo(
diff --git a/core/src/test/scala/org/apache/spark/security/EncryptionFunSuite.scala b/core/src/test/scala/org/apache/spark/security/EncryptionFunSuite.scala
index be6b8a6b5b108..213f0ba2ec180 100644
--- a/core/src/test/scala/org/apache/spark/security/EncryptionFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/security/EncryptionFunSuite.scala
@@ -27,7 +27,7 @@ trait EncryptionFunSuite {
    * Runs a test twice, initializing a SparkConf object with encryption off, then on. It's ok
    * for the test to modify the provided SparkConf.
    */
-  final protected def encryptionTest(name: String)(fn: SparkConf => Unit) {
+  final protected def encryptionTest(name: String)(fn: SparkConf => Unit): Unit = {
     encryptionTestHelper(name) { case (name, conf) =>
       test(name)(fn(conf))
     }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
index 2915b99dcfb60..953b651c72a83 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
@@ -25,6 +25,7 @@ import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Kryo._
+import org.apache.spark.launcher.SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS
 import org.apache.spark.serializer.KryoTest._
 import org.apache.spark.util.ThreadUtils
 
@@ -71,6 +72,9 @@ object KryoSerializerBenchmark extends BenchmarkBase {
 
   def createSparkContext(usePool: Boolean): SparkContext = {
     val conf = new SparkConf()
+    // SPARK-29282 This is for consistency between JDK8 and JDK11.
+    conf.set(EXECUTOR_EXTRA_JAVA_OPTIONS,
+      "-XX:+UseParallelGC -XX:-UseDynamicNumberOfGCThreads")
     conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
     conf.set(KRYO_USER_REGISTRATORS, classOf[MyRegistrator].getName)
     conf.set(KRYO_USE_POOL, usePool)
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
index 5d76c096d46ac..d4fafab4a5d64 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
@@ -56,7 +56,7 @@ object KryoDistributedTest {
   class MyCustomClass
 
   class AppJarRegistrator extends KryoRegistrator {
-    override def registerClasses(k: Kryo) {
+    override def registerClasses(k: Kryo): Unit = {
       k.register(Utils.classForName(AppJarRegistrator.customClassName,
         noSparkClassLoader = true))
     }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 2442670b6d3f0..4c47a67ee9ffc 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -34,10 +34,11 @@ import org.roaringbitmap.RoaringBitmap
 import org.apache.spark.{SharedSparkContext, SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Kryo._
+import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.scheduler.HighlyCompressedMapStatus
 import org.apache.spark.serializer.KryoTest._
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.util.{ThreadUtils, Utils}
+import org.apache.spark.util.ThreadUtils
 
 class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
   conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
@@ -86,7 +87,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
     check(1)
@@ -119,7 +120,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
     check((1, 1))
@@ -146,7 +147,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
     check(List[Int]())
@@ -173,7 +174,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
   test("Bug: SPARK-10251") {
     val ser = new KryoSerializer(conf.clone.set(KRYO_REGISTRATION_REQUIRED, true))
       .newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
     check((1, 3))
@@ -202,7 +203,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
   test("ranges") {
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
       // Check that very long ranges don't get written one element at a time
       assert(ser.serialize(t).limit() < 200)
@@ -238,7 +239,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
   test("custom registrator") {
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
 
@@ -274,19 +275,19 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
   }
 
   test("kryo with parallelize for specialized tuples") {
-    assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).count === 3)
+    assert(sc.parallelize(Seq((1, 11), (2, 22), (3, 33))).count === 3)
   }
 
   test("kryo with parallelize for primitive arrays") {
-    assert (sc.parallelize( Array(1, 2, 3) ).count === 3)
+    assert(sc.parallelize(Array(1, 2, 3)).count === 3)
   }
 
   test("kryo with collect for specialized tuples") {
-    assert (sc.parallelize( Array((1, 11), (2, 22), (3, 33)) ).collect().head === ((1, 11)))
+    assert(sc.parallelize(Seq((1, 11), (2, 22), (3, 33))).collect().head === ((1, 11)))
   }
 
   test("kryo with SerializableHyperLogLog") {
-    assert(sc.parallelize( Array(1, 2, 3, 2, 3, 3, 2, 3, 1) ).countApproxDistinct(0.01) === 3)
+    assert(sc.parallelize(Array(1, 2, 3, 2, 3, 3, 2, 3, 1)).countApproxDistinct(0.01) === 3)
   }
 
   test("kryo with reduce") {
@@ -350,8 +351,31 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     val ser = new KryoSerializer(conf).newInstance()
     val denseBlockSizes = new Array[Long](5000)
     val sparseBlockSizes = Array[Long](0L, 1L, 0L, 2L)
+    var mapTaskId = 0
     Seq(denseBlockSizes, sparseBlockSizes).foreach { blockSizes =>
-      ser.serialize(HighlyCompressedMapStatus(BlockManagerId("exec-1", "host", 1234), blockSizes))
+      mapTaskId += 1
+      ser.serialize(HighlyCompressedMapStatus(
+        BlockManagerId("exec-1", "host", 1234), blockSizes, mapTaskId))
+    }
+  }
+
+  test("registration of TaskCommitMessage") {
+    val conf = new SparkConf(false)
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
+
+    // HadoopMapReduceCommitProtocol.commitTask() returns a TaskCommitMessage containing a complex
+    // structure.
+
+    val ser = new KryoSerializer(conf).newInstance()
+    val addedAbsPathFiles = Map("test1" -> "test1", "test2" -> "test2")
+    val partitionPaths = Set("test3")
+
+    val taskCommitMessage1 = new TaskCommitMessage(addedAbsPathFiles -> partitionPaths)
+    val taskCommitMessage2 = new TaskCommitMessage(Map.empty -> Set.empty)
+    Seq(taskCommitMessage1, taskCommitMessage2).foreach { taskCommitMessage =>
+      val obj1 = ser.deserialize[TaskCommitMessage](ser.serialize(taskCommitMessage)).obj
+      val obj2 = taskCommitMessage.obj
+      assert(obj1 == obj2)
     }
   }
 
@@ -460,7 +484,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
     val tests = mutable.ListBuffer[Future[Boolean]]()
 
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       tests += Future {
         val serializerInstance = ser.newInstance()
         serializerInstance.deserialize[T](serializerInstance.serialize(t)) === t
@@ -579,7 +603,7 @@ object KryoTest {
   }
 
   class MyRegistrator extends KryoRegistrator {
-    override def registerClasses(k: Kryo) {
+    override def registerClasses(k: Kryo): Unit = {
       k.register(classOf[CaseClass])
       k.register(classOf[ClassWithNoArgConstructor])
       k.register(classOf[ClassWithoutNoArgConstructor])
@@ -588,7 +612,7 @@ object KryoTest {
   }
 
   class RegistratorWithoutAutoReset extends KryoRegistrator {
-    override def registerClasses(k: Kryo) {
+    override def registerClasses(k: Kryo): Unit = {
       k.setAutoReset(false)
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala
index 126ba0e8b1e93..65f3793c421fa 100644
--- a/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala
@@ -23,12 +23,12 @@ class UnsafeKryoSerializerSuite extends KryoSerializerSuite {
 
   // This test suite should run all tests in KryoSerializerSuite with kryo unsafe.
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     conf.set(KRYO_USE_UNSAFE, true)
     super.beforeAll()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     conf.set(KRYO_USE_UNSAFE, false)
     super.afterAll()
   }
diff --git a/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
index 6d2ef17a7a790..a82f86a11c77e 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.shuffle
 import java.io.{ByteArrayOutputStream, InputStream}
 import java.nio.ByteBuffer
 
+import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark._
@@ -95,19 +96,20 @@ class BlockStoreShuffleReaderSuite extends SparkFunSuite with LocalSparkContext
       // Setup the blockManager mock so the buffer gets returned when the shuffle code tries to
       // fetch shuffle data.
       val shuffleBlockId = ShuffleBlockId(shuffleId, mapId, reduceId)
-      when(blockManager.getBlockData(shuffleBlockId)).thenReturn(managedBuffer)
+      when(blockManager.getLocalBlockData(meq(shuffleBlockId))).thenReturn(managedBuffer)
       managedBuffer
     }
 
     // Make a mocked MapOutputTracker for the shuffle reader to use to determine what
     // shuffle data to read.
     val mapOutputTracker = mock(classOf[MapOutputTracker])
-    when(mapOutputTracker.getMapSizesByExecutorId(shuffleId, reduceId, reduceId + 1)).thenReturn {
+    when(mapOutputTracker.getMapSizesByExecutorId(
+      shuffleId, reduceId, reduceId + 1)).thenReturn {
       // Test a scenario where all data is local, to avoid creating a bunch of additional mocks
       // for the code to read data over the network.
       val shuffleBlockIdsAndSizes = (0 until numMaps).map { mapId =>
         val shuffleBlockId = ShuffleBlockId(shuffleId, mapId, reduceId)
-        (shuffleBlockId, byteOutputStream.size().toLong)
+        (shuffleBlockId, byteOutputStream.size().toLong, mapId)
       }
       Seq((localBlockManagerId, shuffleBlockIdsAndSizes)).toIterator
     }
@@ -118,7 +120,7 @@ class BlockStoreShuffleReaderSuite extends SparkFunSuite with LocalSparkContext
       when(dependency.serializer).thenReturn(serializer)
       when(dependency.aggregator).thenReturn(None)
       when(dependency.keyOrdering).thenReturn(None)
-      new BaseShuffleHandle(shuffleId, numMaps, dependency)
+      new BaseShuffleHandle(shuffleId, dependency)
     }
 
     val serializerManager = new SerializerManager(
@@ -129,15 +131,15 @@ class BlockStoreShuffleReaderSuite extends SparkFunSuite with LocalSparkContext
 
     val taskContext = TaskContext.empty()
     val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val blocksByAddress = mapOutputTracker.getMapSizesByExecutorId(
+      shuffleId, reduceId, reduceId + 1)
     val shuffleReader = new BlockStoreShuffleReader(
       shuffleHandle,
-      reduceId,
-      reduceId + 1,
+      blocksByAddress,
       taskContext,
       metrics,
       serializerManager,
-      blockManager,
-      mapOutputTracker)
+      blockManager)
 
     assert(shuffleReader.read().length === keyValuePairsPerMap * numMaps)
 
diff --git a/core/src/test/scala/org/apache/spark/shuffle/ShuffleDriverComponentsSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/ShuffleDriverComponentsSuite.scala
new file mode 100644
index 0000000000000..3d70ff1fed29f
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/shuffle/ShuffleDriverComponentsSuite.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import java.util.{Map => JMap}
+import java.util.concurrent.atomic.AtomicBoolean
+
+import com.google.common.collect.ImmutableMap
+import org.scalatest.Assertions._
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config.SHUFFLE_IO_PLUGIN_CLASS
+import org.apache.spark.shuffle.api.{ShuffleDataIO, ShuffleDriverComponents, ShuffleExecutorComponents, ShuffleMapOutputWriter}
+import org.apache.spark.shuffle.sort.io.LocalDiskShuffleDataIO
+
+class ShuffleDriverComponentsSuite
+    extends SparkFunSuite with LocalSparkContext with BeforeAndAfterEach {
+
+  test("test serialization of shuffle initialization conf to executors") {
+    val testConf = new SparkConf()
+      .setAppName("testing")
+      .set(ShuffleDataIOUtils.SHUFFLE_SPARK_CONF_PREFIX + "test-plugin-key", "user-set-value")
+      .set(ShuffleDataIOUtils.SHUFFLE_SPARK_CONF_PREFIX + "test-user-key", "user-set-value")
+      .setMaster("local-cluster[2,1,1024]")
+      .set(SHUFFLE_IO_PLUGIN_CLASS, "org.apache.spark.shuffle.TestShuffleDataIO")
+
+    sc = new SparkContext(testConf)
+
+    val out = sc.parallelize(Seq((1, "one"), (2, "two"), (3, "three")), 3)
+      .groupByKey()
+      .foreach { _ =>
+        if (!TestShuffleExecutorComponentsInitialized.initialized.get()) {
+          throw new RuntimeException("TestShuffleExecutorComponents wasn't initialized")
+        }
+      }
+  }
+}
+
+class TestShuffleDataIO(sparkConf: SparkConf) extends ShuffleDataIO {
+  private val delegate = new LocalDiskShuffleDataIO(sparkConf)
+
+  override def driver(): ShuffleDriverComponents = new TestShuffleDriverComponents()
+
+  override def executor(): ShuffleExecutorComponents =
+    new TestShuffleExecutorComponentsInitialized(delegate.executor())
+}
+
+class TestShuffleDriverComponents extends ShuffleDriverComponents {
+  override def initializeApplication(): JMap[String, String] = {
+    ImmutableMap.of("test-plugin-key", "plugin-set-value")
+  }
+
+  override def cleanupApplication(): Unit = {}
+}
+
+object TestShuffleExecutorComponentsInitialized {
+  val initialized = new AtomicBoolean(false)
+}
+
+class TestShuffleExecutorComponentsInitialized(delegate: ShuffleExecutorComponents)
+    extends ShuffleExecutorComponents {
+
+  override def initializeExecutor(
+      appId: String,
+      execId: String,
+      extraConfigs: JMap[String, String]): Unit = {
+    delegate.initializeExecutor(appId, execId, extraConfigs)
+    assert(extraConfigs.get("test-plugin-key") == "plugin-set-value", extraConfigs)
+    assert(extraConfigs.get("test-user-key") == "user-set-value")
+    TestShuffleExecutorComponentsInitialized.initialized.set(true)
+  }
+
+  override def createMapOutputWriter(
+      shuffleId: Int,
+      mapTaskId: Long,
+      numPartitions: Int): ShuffleMapOutputWriter = {
+    delegate.createMapOutputWriter(shuffleId, mapTaskId, numPartitions)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
index b9f81fa0d0a06..f8474022867f4 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
@@ -25,7 +25,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.mockito.{Mock, MockitoAnnotations}
 import org.mockito.Answers.RETURNS_SMART_NULLS
-import org.mockito.ArgumentMatchers.{any, anyInt}
+import org.mockito.ArgumentMatchers.{any, anyInt, anyLong}
 import org.mockito.Mockito._
 import org.scalatest.BeforeAndAfterEach
 
@@ -65,7 +65,6 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     taskMetrics = new TaskMetrics
     shuffleHandle = new BypassMergeSortShuffleHandle[Int, Int](
       shuffleId = 0,
-      numMaps = 2,
       dependency = dependency
     )
     val memoryManager = new TestMemoryManager(conf)
@@ -78,7 +77,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     when(taskContext.taskMemoryManager()).thenReturn(taskMemoryManager)
 
     when(blockResolver.writeIndexFileAndCommit(
-      anyInt, anyInt, any(classOf[Array[Long]]), any(classOf[File])))
+      anyInt, anyLong, any(classOf[Array[Long]]), any(classOf[File])))
       .thenAnswer { invocationOnMock =>
         val tmp = invocationOnMock.getArguments()(3).asInstanceOf[File]
         if (tmp != null) {
@@ -139,8 +138,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     val writer = new BypassMergeSortShuffleWriter[Int, Int](
       blockManager,
       shuffleHandle,
-      0, // MapId
-      0L, // MapTaskAttemptId
+      0L, // MapId
       conf,
       taskContext.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
@@ -166,8 +164,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
       val writer = new BypassMergeSortShuffleWriter[Int, Int](
         blockManager,
         shuffleHandle,
-        0, // MapId
-        0L,
+        0L, // MapId
         transferConf,
         taskContext.taskMetrics().shuffleWriteMetrics,
         shuffleExecutorComponents)
@@ -202,8 +199,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     val writer = new BypassMergeSortShuffleWriter[Int, Int](
       blockManager,
       shuffleHandle,
-      0, // MapId
-      0L,
+      0L, // MapId
       conf,
       taskContext.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
@@ -224,8 +220,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     val writer = new BypassMergeSortShuffleWriter[Int, Int](
       blockManager,
       shuffleHandle,
-      0, // MapId
-      0L,
+      0L, // MapId
       conf,
       taskContext.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
index 8b955c98f7953..49055ab71c3fe 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.shuffle.sort
 import java.lang.{Long => JLong}
 
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.executor.{ShuffleWriteMetrics, TaskMetrics}
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
index 0dd6040808f9e..4c5694fcf0305 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
@@ -57,7 +57,7 @@ class SortShuffleWriterSuite extends SparkFunSuite with SharedSparkContext with
       when(dependency.serializer).thenReturn(serializer)
       when(dependency.aggregator).thenReturn(None)
       when(dependency.keyOrdering).thenReturn(None)
-      new BaseShuffleHandle(shuffleId, numMaps = numMaps, dependency)
+      new BaseShuffleHandle(shuffleId, dependency)
     }
     shuffleExecutorComponents = new LocalDiskShuffleExecutorComponents(
       conf, blockManager, shuffleBlockResolver)
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
index 5156cc2cc47a6..f92455912f510 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
@@ -23,7 +23,7 @@ import java.nio.file.Files
 import java.util.Arrays
 
 import org.mockito.Answers.RETURNS_SMART_NULLS
-import org.mockito.ArgumentMatchers.{any, anyInt}
+import org.mockito.ArgumentMatchers.{any, anyInt, anyLong}
 import org.mockito.Mock
 import org.mockito.Mockito.when
 import org.mockito.MockitoAnnotations
@@ -73,9 +73,9 @@ class LocalDiskShuffleMapOutputWriterSuite extends SparkFunSuite with BeforeAndA
     conf = new SparkConf()
       .set("spark.app.id", "example.spark.app")
       .set("spark.shuffle.unsafe.file.output.buffer", "16k")
-    when(blockResolver.getDataFile(anyInt, anyInt)).thenReturn(mergedOutputFile)
+    when(blockResolver.getDataFile(anyInt, anyLong)).thenReturn(mergedOutputFile)
     when(blockResolver.writeIndexFileAndCommit(
-      anyInt, anyInt, any(classOf[Array[Long]]), any(classOf[File])))
+      anyInt, anyLong, any(classOf[Array[Long]]), any(classOf[File])))
       .thenAnswer { invocationOnMock =>
         partitionSizesInMergedFile = invocationOnMock.getArguments()(2).asInstanceOf[Array[Long]]
         val tmp: File = invocationOnMock.getArguments()(3).asInstanceOf[File]
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 4b71a4844bde1..24eb1685f577a 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -30,18 +30,21 @@ import org.apache.spark._
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.metrics.ExecutorMetricType
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster._
+import org.apache.spark.status.ListenerEventsTestHelper._
 import org.apache.spark.status.api.v1
 import org.apache.spark.storage._
 import org.apache.spark.util.Utils
 
 class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
-
   private val conf = new SparkConf()
     .set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
     .set(ASYNC_TRACKING_ENABLED, false)
 
+  private val twoReplicaMemAndDiskLevel = StorageLevel(true, true, false, true, 2)
+
   private var time: Long = _
   private var testDir: File = _
   private var store: ElementTrackingStore = _
@@ -149,10 +152,13 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     // Start a job with 2 stages / 4 tasks each
     time += 1
     val stages = Seq(
-      new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1"),
-      new StageInfo(2, 0, "stage2", 4, Nil, Seq(1), "details2"))
+      new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID),
+      new StageInfo(2, 0, "stage2", 4, Nil, Seq(1), "details2",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
 
     val jobProps = new Properties()
+    jobProps.setProperty(SparkContext.SPARK_JOB_DESCRIPTION, "jobDescription")
     jobProps.setProperty(SparkContext.SPARK_JOB_GROUP_ID, "jobGroup")
     jobProps.setProperty(SparkContext.SPARK_SCHEDULER_POOL, "schedPool")
 
@@ -161,7 +167,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     check[JobDataWrapper](1) { job =>
       assert(job.info.jobId === 1)
       assert(job.info.name === stages.last.name)
-      assert(job.info.description === None)
+      assert(job.info.description === Some("jobDescription"))
       assert(job.info.status === JobExecutionStatus.RUNNING)
       assert(job.info.submissionTime === Some(new Date(time)))
       assert(job.info.jobGroup === Some("jobGroup"))
@@ -521,7 +527,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     // - Re-submit stage 2, all tasks, and succeed them and the stage.
     val oldS2 = stages.last
     val newS2 = new StageInfo(oldS2.stageId, oldS2.attemptNumber + 1, oldS2.name, oldS2.numTasks,
-      oldS2.rddInfos, oldS2.parentIds, oldS2.details, oldS2.taskMetrics)
+      oldS2.rddInfos, oldS2.parentIds, oldS2.details, oldS2.taskMetrics,
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
 
     time += 1
     newS2.submissionTime = Some(time)
@@ -572,8 +579,10 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     // change the stats of the already finished job.
     time += 1
     val j2Stages = Seq(
-      new StageInfo(3, 0, "stage1", 4, Nil, Nil, "details1"),
-      new StageInfo(4, 0, "stage2", 4, Nil, Seq(3), "details2"))
+      new StageInfo(3, 0, "stage1", 4, Nil, Nil, "details1",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID),
+      new StageInfo(4, 0, "stage2", 4, Nil, Seq(3), "details2",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     j2Stages.last.submissionTime = Some(time)
     listener.onJobStart(SparkListenerJobStart(2, time, j2Stages, null))
     assert(store.count(classOf[JobDataWrapper]) === 2)
@@ -697,10 +706,20 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     val rdd2b1 = RddBlock(2, 1, 5L, 6L)
     val level = StorageLevel.MEMORY_AND_DISK
 
+    // Submit a stage for the first RDD before it's marked for caching, to make sure later
+    // the listener picks up the correct storage level.
+    val rdd1Info = new RDDInfo(rdd1b1.rddId, "rdd1", 2, StorageLevel.NONE, false, Nil)
+    val stage0 = new StageInfo(0, 0, "stage0", 4, Seq(rdd1Info), Nil, "details0",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    listener.onStageSubmitted(SparkListenerStageSubmitted(stage0, new Properties()))
+    listener.onStageCompleted(SparkListenerStageCompleted(stage0))
+    assert(store.count(classOf[RDDStorageInfoWrapper]) === 0)
+
     // Submit a stage and make sure the RDDs are recorded.
-    val rdd1Info = new RDDInfo(rdd1b1.rddId, "rdd1", 2, level, false, Nil)
+    rdd1Info.storageLevel = level
     val rdd2Info = new RDDInfo(rdd2b1.rddId, "rdd2", 1, level, false, Nil)
-    val stage = new StageInfo(1, 0, "stage1", 4, Seq(rdd1Info, rdd2Info), Nil, "details1")
+    val stage = new StageInfo(1, 0, "stage1", 4, Seq(rdd1Info, rdd2Info), Nil, "details1",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     listener.onStageSubmitted(SparkListenerStageSubmitted(stage, new Properties()))
 
     check[RDDStorageInfoWrapper](rdd1b1.rddId) { wrapper =>
@@ -763,6 +782,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
       assert(part.memoryUsed === rdd1b1.memSize * 2)
       assert(part.diskUsed === rdd1b1.diskSize * 2)
       assert(part.executors === Seq(bm1.executorId, bm2.executorId))
+      assert(part.storageLevel === twoReplicaMemAndDiskLevel.description)
     }
 
     check[ExecutorSummaryWrapper](bm2.executorId) { exec =>
@@ -800,9 +820,30 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
       assert(exec.info.diskUsed === rdd1b1.diskSize + rdd1b2.diskSize)
     }
 
-    // Remove block 1 from bm 1.
+    // Evict block 1 from memory in bm 1. Note that because of SPARK-29319, the disk size
+    // is reported as "0" here to avoid double-counting; the current behavior of the block
+    // manager is to provide the actual disk size of the block.
+    listener.onBlockUpdated(SparkListenerBlockUpdated(
+      BlockUpdatedInfo(bm1, rdd1b1.blockId, StorageLevel.DISK_ONLY,
+        rdd1b1.memSize, 0L)))
+
+    check[RDDStorageInfoWrapper](rdd1b1.rddId) { wrapper =>
+      assert(wrapper.info.numCachedPartitions === 2L)
+      assert(wrapper.info.memoryUsed === rdd1b1.memSize + rdd1b2.memSize)
+      assert(wrapper.info.diskUsed === 2 * rdd1b1.diskSize + rdd1b2.diskSize)
+      assert(wrapper.info.dataDistribution.get.size === 2L)
+      assert(wrapper.info.partitions.get.size === 2L)
+    }
+
+    check[ExecutorSummaryWrapper](bm1.executorId) { exec =>
+      assert(exec.info.rddBlocks === 2L)
+      assert(exec.info.memoryUsed === rdd1b2.memSize)
+      assert(exec.info.diskUsed === rdd1b1.diskSize + rdd1b2.diskSize)
+    }
+
+    // Remove block 1 from bm 1; note memSize = 0 due to the eviction above.
     listener.onBlockUpdated(SparkListenerBlockUpdated(
-      BlockUpdatedInfo(bm1, rdd1b1.blockId, StorageLevel.NONE, rdd1b1.memSize, rdd1b1.diskSize)))
+      BlockUpdatedInfo(bm1, rdd1b1.blockId, StorageLevel.NONE, 0, rdd1b1.diskSize)))
 
     check[RDDStorageInfoWrapper](rdd1b1.rddId) { wrapper =>
       assert(wrapper.info.numCachedPartitions === 2L)
@@ -985,9 +1026,12 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     // data is not deleted.
     time += 1
     val stages = Seq(
-      new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1"),
-      new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2"),
-      new StageInfo(3, 0, "stage3", 4, Nil, Nil, "details3"))
+      new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID),
+      new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID),
+      new StageInfo(3, 0, "stage3", 4, Nil, Nil, "details3",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
 
     // Graph data is generated by the job start event, so fire it.
     listener.onJobStart(SparkListenerJobStart(4, time, stages, null))
@@ -1035,7 +1079,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     }
     assert(store.count(classOf[CachedQuantile], "stage", key(dropped)) === 0)
 
-    val attempt2 = new StageInfo(3, 1, "stage3", 4, Nil, Nil, "details3")
+    val attempt2 = new StageInfo(3, 1, "stage3", 4, Nil, Nil, "details3",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     time += 1
     attempt2.submissionTime = Some(time)
     listener.onStageSubmitted(SparkListenerStageSubmitted(attempt2, new Properties()))
@@ -1106,9 +1151,12 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     val testConf = conf.clone().set(MAX_RETAINED_STAGES, 2)
     val listener = new AppStatusListener(store, testConf, true)
 
-    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1")
-    val stage2 = new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2")
-    val stage3 = new StageInfo(3, 0, "stage3", 4, Nil, Nil, "details3")
+    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    val stage2 = new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    val stage3 = new StageInfo(3, 0, "stage3", 4, Nil, Nil, "details3",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
 
     // Start stage 1 and stage 2
     time += 1
@@ -1139,8 +1187,10 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     val testConf = conf.clone().set(MAX_RETAINED_STAGES, 2)
     val listener = new AppStatusListener(store, testConf, true)
 
-    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1")
-    val stage2 = new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2")
+    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    val stage2 = new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
 
     // Sart job 1
     time += 1
@@ -1160,7 +1210,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     listener.onJobEnd(SparkListenerJobEnd(1, time, JobSucceeded))
 
     // Submit stage 3 and verify stage 2 is evicted
-    val stage3 = new StageInfo(3, 0, "stage3", 4, Nil, Nil, "details3")
+    val stage3 = new StageInfo(3, 0, "stage3", 4, Nil, Nil, "details3",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     time += 1
     stage3.submissionTime = Some(time)
     listener.onStageSubmitted(SparkListenerStageSubmitted(stage3, new Properties()))
@@ -1175,7 +1226,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     val testConf = conf.clone().set(MAX_RETAINED_TASKS_PER_STAGE, 2)
     val listener = new AppStatusListener(store, testConf, true)
 
-    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1")
+    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     stage1.submissionTime = Some(time)
     listener.onStageSubmitted(SparkListenerStageSubmitted(stage1, new Properties()))
 
@@ -1210,9 +1262,12 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     val listener = new AppStatusListener(store, testConf, true)
     val appStore = new AppStatusStore(store)
 
-    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1")
-    val stage2 = new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2")
-    val stage3 = new StageInfo(3, 0, "stage3", 4, Nil, Nil, "details3")
+    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    val stage2 = new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    val stage3 = new StageInfo(3, 0, "stage3", 4, Nil, Nil, "details3",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
 
     time += 1
     stage1.submissionTime = Some(time)
@@ -1241,8 +1296,10 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
   test("SPARK-24415: update metrics for tasks that finish late") {
     val listener = new AppStatusListener(store, conf, true)
 
-    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1")
-    val stage2 = new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2")
+    val stage1 = new StageInfo(1, 0, "stage1", 4, Nil, Nil, "details1",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    val stage2 = new StageInfo(2, 0, "stage2", 4, Nil, Nil, "details2",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
 
     // Start job
     listener.onJobStart(SparkListenerJobStart(1, time, Seq(stage1, stage2), null))
@@ -1307,7 +1364,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
       listener.onExecutorAdded(createExecutorAddedEvent(1))
       listener.onExecutorAdded(createExecutorAddedEvent(2))
-      val stage = new StageInfo(1, 0, "stage", 4, Nil, Nil, "details")
+      val stage = new StageInfo(1, 0, "stage", 4, Nil, Nil, "details",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
       listener.onJobStart(SparkListenerJobStart(1, time, Seq(stage), null))
       listener.onStageSubmitted(SparkListenerStageSubmitted(stage, new Properties()))
 
@@ -1544,7 +1602,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     // Submit a stage and make sure the RDDs are recorded.
     val rdd1Info = new RDDInfo(rdd1b1.rddId, "rdd1", 2, level, false, Nil)
-    val stage = new StageInfo(1, 0, "stage1", 4, Seq(rdd1Info), Nil, "details1")
+    val stage = new StageInfo(1, 0, "stage1", 4, Seq(rdd1Info), Nil, "details1",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     listener.onStageSubmitted(SparkListenerStageSubmitted(stage, new Properties()))
 
     // Add partition 1 replicated on two block managers.
@@ -1571,7 +1630,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
       assert(dist.memoryRemaining === maxMemory - dist.memoryUsed)
 
       val part1 = wrapper.info.partitions.get.find(_.blockName === rdd1b1.blockId.name).get
-      assert(part1.storageLevel === level.description)
+      assert(part1.storageLevel === twoReplicaMemAndDiskLevel.description)
       assert(part1.memoryUsed === 2 * rdd1b1.memSize)
       assert(part1.diskUsed === 2 * rdd1b1.diskSize)
       assert(part1.executors === Seq(bm1.executorId, bm2.executorId))
@@ -1624,6 +1683,30 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     }
   }
 
+  test("clean up used memory when BlockManager added") {
+    val listener = new AppStatusListener(store, conf, true)
+    // Add block manager at the first time
+    val driver = BlockManagerId(SparkContext.DRIVER_IDENTIFIER, "localhost", 42)
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(
+      time, driver, 42L, Some(43L), Some(44L)))
+    // Update the memory metrics
+    listener.updateExecutorMemoryDiskInfo(
+      listener.liveExecutors(SparkContext.DRIVER_IDENTIFIER),
+      StorageLevel.MEMORY_AND_DISK,
+      10L,
+      10L
+    )
+    // Re-add the same block manager again
+    listener.onBlockManagerAdded(SparkListenerBlockManagerAdded(
+      time, driver, 42L, Some(43L), Some(44L)))
+
+    check[ExecutorSummaryWrapper](SparkContext.DRIVER_IDENTIFIER) { d =>
+      val memoryMetrics = d.info.memoryMetrics.get
+      assert(memoryMetrics.usedOffHeapStorageMemory == 0)
+      assert(memoryMetrics.usedOnHeapStorageMemory == 0)
+    }
+  }
+
 
   private def key(stage: StageInfo): Array[Int] = Array(stage.stageId, stage.attemptNumber)
 
@@ -1661,40 +1744,4 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
     def blockId: BlockId = RDDBlockId(rddId, partId)
 
   }
-
-  /** Create a stage submitted event for the specified stage Id. */
-  private def createStageSubmittedEvent(stageId: Int) = {
-    SparkListenerStageSubmitted(new StageInfo(stageId, 0, stageId.toString, 0,
-      Seq.empty, Seq.empty, "details"))
-  }
-
-  /** Create a stage completed event for the specified stage Id. */
-  private def createStageCompletedEvent(stageId: Int) = {
-    SparkListenerStageCompleted(new StageInfo(stageId, 0, stageId.toString, 0,
-      Seq.empty, Seq.empty, "details"))
-  }
-
-  /** Create an executor added event for the specified executor Id. */
-  private def createExecutorAddedEvent(executorId: Int) = {
-    SparkListenerExecutorAdded(0L, executorId.toString,
-      new ExecutorInfo("host1", 1, Map.empty, Map.empty))
-  }
-
-  /** Create an executor added event for the specified executor Id. */
-  private def createExecutorRemovedEvent(executorId: Int) = {
-    SparkListenerExecutorRemoved(10L, executorId.toString, "test")
-  }
-
-  /** Create an executor metrics update event, with the specified executor metrics values. */
-  private def createExecutorMetricsUpdateEvent(
-      stageId: Int,
-      executorId: Int,
-      executorMetrics: Array[Long]): SparkListenerExecutorMetricsUpdate = {
-    val taskMetrics = TaskMetrics.empty
-    taskMetrics.incDiskBytesSpilled(111)
-    taskMetrics.incMemoryBytesSpilled(222)
-    val accum = Array((333L, 1, 1, taskMetrics.accumulators().map(AccumulatorSuite.makeInfo)))
-    val executorUpdates = Map((stageId, 0) -> new ExecutorMetrics(executorMetrics))
-    SparkListenerExecutorMetricsUpdate(executorId.toString, accum, executorUpdates)
-  }
 }
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
index 165fdb71cc78b..735e51942626f 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.status
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.util.Distribution
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.{TaskInfo, TaskLocality}
+import org.apache.spark.util.{Distribution, Utils}
 import org.apache.spark.util.kvstore._
 
 class AppStatusStoreSuite extends SparkFunSuite {
@@ -76,42 +78,61 @@ class AppStatusStoreSuite extends SparkFunSuite {
     assert(store.count(classOf[CachedQuantile]) === 2)
   }
 
-  private def createLiveStore(inMemoryStore: InMemoryStore): AppStatusStore = {
+  private def createAppStore(disk: Boolean, live: Boolean): AppStatusStore = {
     val conf = new SparkConf()
-    val store = new ElementTrackingStore(inMemoryStore, conf)
-    val listener = new AppStatusListener(store, conf, true, None)
-    new AppStatusStore(store, listener = Some(listener))
-  }
+    if (live) {
+      return AppStatusStore.createLiveStore(conf)
+    }
 
-  test("SPARK-28638: only successful tasks have taskSummary when with in memory kvstore") {
-    val store = new InMemoryStore()
-    (0 until 5).foreach { i => store.write(newTaskData(i, status = "FAILED")) }
-    Seq(new AppStatusStore(store), createLiveStore(store)).foreach { appStore =>
-      val summary = appStore.taskSummary(stageId, attemptId, uiQuantiles)
-      assert(summary.size === 0)
+    val store: KVStore = if (disk) {
+      val testDir = Utils.createTempDir()
+      val diskStore = KVUtils.open(testDir, getClass.getName)
+      new ElementTrackingStore(diskStore, conf)
+    } else {
+      new ElementTrackingStore(new InMemoryStore, conf)
     }
+    new AppStatusStore(store)
   }
 
-  test("SPARK-28638: summary should contain successful tasks only when with in memory kvstore") {
-    val store = new InMemoryStore()
+  Seq(
+    "disk" -> createAppStore(disk = true, live = false),
+    "in memory" -> createAppStore(disk = false, live = false),
+    "in memory live" -> createAppStore(disk = false, live = true)
+  ).foreach { case (hint, appStore) =>
+    test(s"SPARK-26260: summary should contain only successful tasks' metrics (store = $hint)") {
+      val store = appStore.store
+
+      // Success and failed tasks metrics
+      for (i <- 0 to 5) {
+        if (i % 2 == 0) {
+          writeTaskDataToStore(i, store, "FAILED")
+        } else {
+          writeTaskDataToStore(i, store, "SUCCESS")
+        }
+      }
 
-    for (i <- 0 to 5) {
-      if (i % 2 == 1) {
-        store.write(newTaskData(i, status = "FAILED"))
-      } else {
-        store.write(newTaskData(i))
+      // Running tasks metrics (-1 = no metrics reported, positive = metrics have been reported)
+      Seq(-1, 6).foreach { metric =>
+        writeTaskDataToStore(metric, store, "RUNNING")
       }
-    }
 
-    Seq(new AppStatusStore(store), createLiveStore(store)).foreach { appStore =>
+      /**
+       * Following are the tasks metrics,
+       * 1, 3, 5 => Success
+       * 0, 2, 4 => Failed
+       * -1, 6 => Running
+       *
+       * Task summary will consider (1, 3, 5) only
+       */
       val summary = appStore.taskSummary(stageId, attemptId, uiQuantiles).get
 
-      val values = Array(0.0, 2.0, 4.0)
+      val values = Array(1.0, 3.0, 5.0)
 
       val dist = new Distribution(values, 0, values.length).getQuantiles(uiQuantiles.sorted)
       dist.zip(summary.executorRunTime).foreach { case (expected, actual) =>
         assert(expected === actual)
       }
+      appStore.close()
     }
   }
 
@@ -133,9 +154,54 @@ class AppStatusStoreSuite extends SparkFunSuite {
 
   private def newTaskData(i: Int, status: String = "SUCCESS"): TaskDataWrapper = {
     new TaskDataWrapper(
-      i, i, i, i, i, i, i.toString, i.toString, status, i.toString, false, Nil, None,
+      i.toLong, i, i, i, i, i, i.toString, i.toString, status, i.toString, false, Nil, None, true,
       i, i, i, i, i, i, i, i, i, i,
       i, i, i, i, i, i, i, i, i, i,
       i, i, i, i, stageId, attemptId)
   }
+
+  private def writeTaskDataToStore(i: Int, store: KVStore, status: String): Unit = {
+    val liveTask = new LiveTask(new TaskInfo( i.toLong, i, i, i.toLong, i.toString,
+       i.toString, TaskLocality.ANY, false), stageId, attemptId, None)
+
+    if (status == "SUCCESS") {
+      liveTask.info.finishTime = 1L
+    } else if (status == "FAILED") {
+      liveTask.info.failed = true
+      liveTask.info.finishTime = 1L
+    }
+
+    val taskMetrics = getTaskMetrics(i)
+    liveTask.updateMetrics(taskMetrics)
+    liveTask.write(store.asInstanceOf[ElementTrackingStore], 1L)
+  }
+
+  private def getTaskMetrics(i: Int): TaskMetrics = {
+    val taskMetrics = new TaskMetrics()
+    taskMetrics.setExecutorDeserializeTime(i)
+    taskMetrics.setExecutorDeserializeCpuTime(i)
+    taskMetrics.setExecutorRunTime(i)
+    taskMetrics.setExecutorCpuTime(i)
+    taskMetrics.setResultSize(i)
+    taskMetrics.setJvmGCTime(i)
+    taskMetrics.setResultSerializationTime(i)
+    taskMetrics.incMemoryBytesSpilled(i)
+    taskMetrics.incDiskBytesSpilled(i)
+    taskMetrics.incPeakExecutionMemory(i)
+    taskMetrics.inputMetrics.incBytesRead(i)
+    taskMetrics.inputMetrics.incRecordsRead(i)
+    taskMetrics.outputMetrics.setBytesWritten(i)
+    taskMetrics.outputMetrics.setRecordsWritten(i)
+    taskMetrics.shuffleReadMetrics.incRemoteBlocksFetched(i)
+    taskMetrics.shuffleReadMetrics.incLocalBlocksFetched(i)
+    taskMetrics.shuffleReadMetrics.incFetchWaitTime(i)
+    taskMetrics.shuffleReadMetrics.incRemoteBytesRead(i)
+    taskMetrics.shuffleReadMetrics.incRemoteBytesReadToDisk(i)
+    taskMetrics.shuffleReadMetrics.incLocalBytesRead(i)
+    taskMetrics.shuffleReadMetrics.incRecordsRead(i)
+    taskMetrics.shuffleWriteMetrics.incBytesWritten(i)
+    taskMetrics.shuffleWriteMetrics.incWriteTime(i)
+    taskMetrics.shuffleWriteMetrics.incRecordsWritten(i)
+    taskMetrics
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/status/ListenerEventsTestHelper.scala b/core/src/test/scala/org/apache/spark/status/ListenerEventsTestHelper.scala
new file mode 100644
index 0000000000000..99c0d9593ccae
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/status/ListenerEventsTestHelper.scala
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status
+
+import java.util.Properties
+
+import scala.collection.immutable.Map
+
+import org.apache.spark.{AccumulatorSuite, SparkContext, Success, TaskState}
+import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded, SparkListenerExecutorMetricsUpdate, SparkListenerExecutorRemoved, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerStageSubmitted, SparkListenerTaskEnd, SparkListenerTaskStart, StageInfo, TaskInfo, TaskLocality}
+import org.apache.spark.scheduler.cluster.ExecutorInfo
+import org.apache.spark.storage.{RDDInfo, StorageLevel}
+
+object ListenerEventsTestHelper {
+
+  private var taskIdTracker = -1L
+  private var rddIdTracker = -1
+  private var stageIdTracker = -1
+
+  def reset(): Unit = {
+    taskIdTracker = -1L
+    rddIdTracker = -1
+    stageIdTracker = -1
+  }
+
+  def createJobProps(): Properties = {
+    val jobProps = new Properties()
+    jobProps.setProperty(SparkContext.SPARK_JOB_DESCRIPTION, "jobDescription")
+    jobProps.setProperty(SparkContext.SPARK_JOB_GROUP_ID, "jobGroup")
+    jobProps.setProperty(SparkContext.SPARK_SCHEDULER_POOL, "schedPool")
+    jobProps
+  }
+
+  def createRddsWithId(ids: Seq[Int]): Seq[RDDInfo] = {
+    ids.map { rddId =>
+      new RDDInfo(rddId, s"rdd${rddId}", 2, StorageLevel.NONE, false, Nil)
+    }
+  }
+
+  def createRdds(count: Int): Seq[RDDInfo] = {
+    (1 to count).map { _ =>
+      val rddId = nextRddId()
+      new RDDInfo(rddId, s"rdd${rddId}", 2, StorageLevel.NONE, false, Nil)
+    }
+  }
+
+  def createStage(id: Int, rdds: Seq[RDDInfo], parentIds: Seq[Int]): StageInfo = {
+    new StageInfo(id, 0, s"stage${id}", 4, rdds, parentIds, s"details${id}",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+  }
+
+  def createStage(rdds: Seq[RDDInfo], parentIds: Seq[Int]): StageInfo = {
+    createStage(nextStageId(), rdds, parentIds)
+  }
+
+  def createTasks(ids: Seq[Long], execs: Array[String], time: Long): Seq[TaskInfo] = {
+    ids.zipWithIndex.map { case (id, idx) =>
+      val exec = execs(idx % execs.length)
+      new TaskInfo(id, idx, 1, time, exec, s"$exec.example.com",
+        TaskLocality.PROCESS_LOCAL, idx % 2 == 0)
+    }
+  }
+
+  def createTasks(count: Int, execs: Array[String], time: Long): Seq[TaskInfo] = {
+    createTasks((1 to count).map { _ => nextTaskId() }, execs, time)
+  }
+
+  def createTaskWithNewAttempt(orig: TaskInfo, time: Long): TaskInfo = {
+    // Task reattempts have a different ID, but the same index as the original.
+    new TaskInfo(nextTaskId(), orig.index, orig.attemptNumber + 1, time, orig.executorId,
+      s"${orig.executorId}.example.com", TaskLocality.PROCESS_LOCAL, orig.speculative)
+  }
+
+  def createTaskStartEvent(
+      taskInfo: TaskInfo,
+      stageId: Int,
+      attemptId: Int): SparkListenerTaskStart = {
+    SparkListenerTaskStart(stageId, attemptId, taskInfo)
+  }
+
+  /** Create a stage submitted event for the specified stage Id. */
+  def createStageSubmittedEvent(stageId: Int): SparkListenerStageSubmitted = {
+    SparkListenerStageSubmitted(new StageInfo(stageId, 0, stageId.toString, 0,
+      Seq.empty, Seq.empty, "details",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
+  }
+
+  /** Create a stage completed event for the specified stage Id. */
+  def createStageCompletedEvent(stageId: Int): SparkListenerStageCompleted = {
+    SparkListenerStageCompleted(new StageInfo(stageId, 0, stageId.toString, 0,
+      Seq.empty, Seq.empty, "details",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
+  }
+
+  def createExecutorAddedEvent(executorId: Int): SparkListenerExecutorAdded = {
+    createExecutorAddedEvent(executorId.toString, 0)
+  }
+
+  /** Create an executor added event for the specified executor Id. */
+  def createExecutorAddedEvent(executorId: String, time: Long): SparkListenerExecutorAdded = {
+    SparkListenerExecutorAdded(time, executorId,
+      new ExecutorInfo("host1", 1, Map.empty, Map.empty))
+  }
+
+  def createExecutorRemovedEvent(executorId: Int): SparkListenerExecutorRemoved = {
+    createExecutorRemovedEvent(executorId.toString, 10L)
+  }
+
+  /** Create an executor added event for the specified executor Id. */
+  def createExecutorRemovedEvent(executorId: String, time: Long): SparkListenerExecutorRemoved = {
+    SparkListenerExecutorRemoved(time, executorId, "test")
+  }
+
+  /** Create an executor metrics update event, with the specified executor metrics values. */
+  def createExecutorMetricsUpdateEvent(
+      stageId: Int,
+      executorId: Int,
+      executorMetrics: Array[Long]): SparkListenerExecutorMetricsUpdate = {
+    val taskMetrics = TaskMetrics.empty
+    taskMetrics.incDiskBytesSpilled(111)
+    taskMetrics.incMemoryBytesSpilled(222)
+    val accum = Array((333L, 1, 1, taskMetrics.accumulators().map(AccumulatorSuite.makeInfo)))
+    val executorUpdates = Map((stageId, 0) -> new ExecutorMetrics(executorMetrics))
+    SparkListenerExecutorMetricsUpdate(executorId.toString, accum, executorUpdates)
+  }
+
+  case class JobInfo(
+      stageIds: Seq[Int],
+      stageToTaskIds: Map[Int, Seq[Long]],
+      stageToRddIds: Map[Int, Seq[Int]])
+
+  def pushJobEventsWithoutJobEnd(
+      listener: SparkListener,
+      jobId: Int,
+      jobProps: Properties,
+      execIds: Array[String],
+      time: Long): JobInfo = {
+    // Start a job with 1 stage / 4 tasks each
+    val rddsForStage = createRdds(2)
+    val stage = createStage(rddsForStage, Nil)
+
+    listener.onJobStart(SparkListenerJobStart(jobId, time, Seq(stage), jobProps))
+
+    // Submit stage
+    stage.submissionTime = Some(time)
+    listener.onStageSubmitted(SparkListenerStageSubmitted(stage, jobProps))
+
+    // Start tasks from stage
+    val s1Tasks = createTasks(4, execIds, time)
+    s1Tasks.foreach { task =>
+      listener.onTaskStart(SparkListenerTaskStart(stage.stageId,
+        stage.attemptNumber(), task))
+    }
+
+    // Succeed all tasks in stage.
+    val s1Metrics = TaskMetrics.empty
+    s1Metrics.setExecutorCpuTime(2L)
+    s1Metrics.setExecutorRunTime(4L)
+
+    s1Tasks.foreach { task =>
+      task.markFinished(TaskState.FINISHED, time)
+      listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber,
+        "taskType", Success, task, new ExecutorMetrics, s1Metrics))
+    }
+
+    // End stage.
+    stage.completionTime = Some(time)
+    listener.onStageCompleted(SparkListenerStageCompleted(stage))
+
+    JobInfo(Seq(stage.stageId), Map(stage.stageId -> s1Tasks.map(_.taskId)),
+      Map(stage.stageId -> rddsForStage.map(_.id)))
+  }
+
+  private def nextTaskId(): Long = {
+    taskIdTracker += 1
+    taskIdTracker
+  }
+
+  private def nextRddId(): Int = {
+    rddIdTracker += 1
+    rddIdTracker
+  }
+
+  private def nextStageId(): Int = {
+    stageIdTracker += 1
+    stageIdTracker
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala b/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
index bb2d2633001f0..35e8a62c93c99 100644
--- a/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.status
 
+import java.util.Arrays
+
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.status.api.v1.RDDPartitionInfo
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.{AccumulatorMetadata, CollectionAccumulator}
 
 class LiveEntitySuite extends SparkFunSuite {
 
@@ -52,6 +55,17 @@ class LiveEntitySuite extends SparkFunSuite {
     assert(!seq.exists(_.blockName == items(5).blockName))
   }
 
+  test("Only show few elements of CollectionAccumulator when converting to v1.AccumulableInfo") {
+    val acc = new CollectionAccumulator[Int]()
+    val value = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+    acc.setValue(value)
+    acc.metadata = AccumulatorMetadata(0L, None, false)
+    val accuInfo = LiveEntityHelpers
+      .newAccumulatorInfos(Seq(acc.toInfo(Some(acc.value), Some(acc.value))))(0)
+    assert(accuInfo.update.get == "[1,2,3,4,5,... 5 more items]")
+    assert(accuInfo.value == "[1,2,3,4,5,... 5 more items]")
+  }
+
   private def checkSize(seq: Seq[_], expected: Int): Unit = {
     assert(seq.length === expected)
     var count = 0
@@ -60,8 +74,8 @@ class LiveEntitySuite extends SparkFunSuite {
   }
 
   private def newPartition(i: Int): LiveRDDPartition = {
-    val part = new LiveRDDPartition(i.toString)
-    part.update(Seq(i.toString), i.toString, i, i)
+    val part = new LiveRDDPartition(i.toString, StorageLevel.MEMORY_AND_DISK)
+    part.update(Seq(i.toString), i, i)
     part
   }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala
index ff4755833a916..ef7b13875540f 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala
@@ -22,13 +22,13 @@ import java.util.UUID
 import org.apache.spark.SparkFunSuite
 
 class BlockIdSuite extends SparkFunSuite {
-  def assertSame(id1: BlockId, id2: BlockId) {
+  def assertSame(id1: BlockId, id2: BlockId): Unit = {
     assert(id1.name === id2.name)
     assert(id1.hashCode === id2.hashCode)
     assert(id1 === id2)
   }
 
-  def assertDifferent(id1: BlockId, id2: BlockId) {
+  def assertDifferent(id1: BlockId, id2: BlockId): Unit = {
     assert(id1.name != id2.name)
     assert(id1.hashCode != id2.hashCode)
     assert(id1 != id2)
@@ -64,6 +64,20 @@ class BlockIdSuite extends SparkFunSuite {
     assertSame(id, BlockId(id.toString))
   }
 
+  test("shuffle batch") {
+    val id = ShuffleBlockBatchId(1, 2, 3, 4)
+    assertSame(id, ShuffleBlockBatchId(1, 2, 3, 4))
+    assertDifferent(id, ShuffleBlockBatchId(2, 2, 3, 4))
+    assert(id.name === "shuffle_1_2_3_4")
+    assert(id.asRDDId === None)
+    assert(id.shuffleId === 1)
+    assert(id.mapId === 2)
+    assert(id.startReduceId === 3)
+    assert(id.endReduceId === 4)
+    assert(id.isShuffle)
+    assertSame(id, BlockId(id.toString))
+  }
+
   test("shuffle data") {
     val id = ShuffleDataBlockId(4, 5, 6)
     assertSame(id, ShuffleDataBlockId(4, 5, 6))
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerInfoSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerInfoSuite.scala
index 49cbd66cccb86..01e3d6a46e709 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerInfoSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerInfoSuite.scala
@@ -31,7 +31,6 @@ class BlockManagerInfoSuite extends SparkFunSuite {
       val bmInfo = new BlockManagerInfo(
         BlockManagerId("executor0", "host", 1234, None),
         timeMs = 300,
-        Array(),
         maxOnHeapMem = 10000,
         maxOffHeapMem = 20000,
         slaveEndpoint = null,
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 05a9ac685e5e7..59ace850d0bd2 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.storage
 
 import java.util.Locale
 
+import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 import scala.language.implicitConversions
@@ -97,9 +98,12 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     conf.set(STORAGE_CACHED_PEERS_TTL, 10)
 
     sc = new SparkContext("local", "test", conf)
+    val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]()
     master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager",
       new BlockManagerMasterEndpoint(rpcEnv, true, conf,
-        new LiveListenerBus(conf), None)), conf, true)
+        new LiveListenerBus(conf), None, blockManagerInfo)),
+      rpcEnv.setupEndpoint("blockmanagerHeartbeat",
+      new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), conf, true)
     allStores.clear()
   }
 
@@ -308,7 +312,7 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
    * is correct. Then it also drops the block from memory of each store (using LRU) and
    * again checks whether the master's knowledge gets updated.
    */
-  protected def testReplication(maxReplication: Int, storageLevels: Seq[StorageLevel]) {
+  protected def testReplication(maxReplication: Int, storageLevels: Seq[StorageLevel]): Unit = {
     import org.apache.spark.storage.StorageLevel._
 
     assert(maxReplication > 1,
@@ -431,7 +435,7 @@ class BlockManagerProactiveReplicationSuite extends BlockManagerReplicationBehav
     }
   }
 
-  def testProactiveReplication(replicationFactor: Int) {
+  def testProactiveReplication(replicationFactor: Int): Unit = {
     val blockSize = 1000
     val storeSize = 10000
     val initialStores = (1 to 10).map { i => makeBlockManager(storeSize, s"store$i") }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 509d4efcab67a..8d06768a2b284 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -21,6 +21,7 @@ import java.io.File
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.Future
 import scala.concurrent.duration._
@@ -28,9 +29,8 @@ import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
 import org.apache.commons.lang3.RandomUtils
-import org.mockito.{ArgumentMatchers => mc}
-import org.mockito.Mockito.{doAnswer, mock, spy, times, verify, when}
-import org.mockito.invocation.InvocationOnMock
+import org.mockito.{ArgumentCaptor, ArgumentMatchers => mc}
+import org.mockito.Mockito.{doAnswer, mock, never, spy, times, verify, when}
 import org.scalatest._
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
@@ -50,7 +50,7 @@ import org.apache.spark.network.server.{NoOpRpcHandler, TransportServer, Transpo
 import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager, ExecutorDiskUtils, ExternalBlockStoreClient}
 import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, RegisterExecutor}
 import org.apache.spark.rpc.RpcEnv
-import org.apache.spark.scheduler.LiveListenerBus
+import org.apache.spark.scheduler.{LiveListenerBus, SparkListenerBlockUpdated}
 import org.apache.spark.security.{CryptoStreamUtils, EncryptionFunSuite}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer, SerializerManager}
 import org.apache.spark.shuffle.sort.SortShuffleManager
@@ -71,6 +71,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
   val allStores = ArrayBuffer[BlockManager]()
   var rpcEnv: RpcEnv = null
   var master: BlockManagerMaster = null
+  var liveListenerBus: LiveListenerBus = null
   val securityMgr = new SecurityManager(new SparkConf(false))
   val bcastManager = new BroadcastManager(true, new SparkConf(false), securityMgr)
   val mapOutputTracker = new MapOutputTrackerMaster(new SparkConf(false), bcastManager, true)
@@ -143,11 +144,16 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     // need to create a SparkContext is to initialize LiveListenerBus.
     sc = mock(classOf[SparkContext])
     when(sc.conf).thenReturn(conf)
-    master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager",
+
+    val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]()
+    liveListenerBus = spy(new LiveListenerBus(conf))
+    master = spy(new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager",
       new BlockManagerMasterEndpoint(rpcEnv, true, conf,
-        new LiveListenerBus(conf), None)), conf, true)
+        liveListenerBus, None, blockManagerInfo)),
+      rpcEnv.setupEndpoint("blockmanagerHeartbeat",
+      new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), conf, true))
 
-    val initialize = PrivateMethod[Unit]('initialize)
+    val initialize = PrivateMethod[Unit](Symbol("initialize"))
     SizeEstimator invokePrivate initialize()
   }
 
@@ -160,6 +166,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       rpcEnv.awaitTermination()
       rpcEnv = null
       master = null
+      liveListenerBus = null
     } finally {
       super.afterEach()
     }
@@ -289,14 +296,19 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!store.hasLocalBlock("a1-to-remove"))
       master.getLocations("a1-to-remove") should have size 0
+      assertUpdateBlockInfoReportedForRemovingBlock(store, "a1-to-remove",
+        removedFromMemory = true, removedFromDisk = false)
     }
     eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!store.hasLocalBlock("a2-to-remove"))
       master.getLocations("a2-to-remove") should have size 0
+      assertUpdateBlockInfoReportedForRemovingBlock(store, "a2-to-remove",
+        removedFromMemory = true, removedFromDisk = false)
     }
     eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(store.hasLocalBlock("a3-to-remove"))
       master.getLocations("a3-to-remove") should have size 0
+      assertUpdateBlockInfoNotReported(store, "a3-to-remove")
     }
     eventually(timeout(1.second), interval(10.milliseconds)) {
       val memStatus = master.getMemoryStatus.head._2
@@ -375,16 +387,21 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     assert(!executorStore.hasLocalBlock(broadcast0BlockId))
     assert(executorStore.hasLocalBlock(broadcast1BlockId))
     assert(executorStore.hasLocalBlock(broadcast2BlockId))
+    assertUpdateBlockInfoReportedForRemovingBlock(executorStore, broadcast0BlockId,
+      removedFromMemory = false, removedFromDisk = true)
 
     // nothing should be removed from the driver store
     assert(driverStore.hasLocalBlock(broadcast0BlockId))
     assert(driverStore.hasLocalBlock(broadcast1BlockId))
     assert(driverStore.hasLocalBlock(broadcast2BlockId))
+    assertUpdateBlockInfoNotReported(driverStore, broadcast0BlockId)
 
     // remove broadcast 0 block from the driver as well
     master.removeBroadcast(0, removeFromMaster = true, blocking = true)
     assert(!driverStore.hasLocalBlock(broadcast0BlockId))
     assert(driverStore.hasLocalBlock(broadcast1BlockId))
+    assertUpdateBlockInfoReportedForRemovingBlock(driverStore, broadcast0BlockId,
+      removedFromMemory = false, removedFromDisk = true)
 
     // remove broadcast 1 block from both the stores asynchronously
     // and verify all broadcast 1 blocks have been removed
@@ -392,6 +409,10 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!driverStore.hasLocalBlock(broadcast1BlockId))
       assert(!executorStore.hasLocalBlock(broadcast1BlockId))
+      assertUpdateBlockInfoReportedForRemovingBlock(driverStore, broadcast1BlockId,
+        removedFromMemory = false, removedFromDisk = true)
+      assertUpdateBlockInfoReportedForRemovingBlock(executorStore, broadcast1BlockId,
+        removedFromMemory = false, removedFromDisk = true)
     }
 
     // remove broadcast 2 from both the stores asynchronously
@@ -402,11 +423,46 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       assert(!driverStore.hasLocalBlock(broadcast2BlockId2))
       assert(!executorStore.hasLocalBlock(broadcast2BlockId))
       assert(!executorStore.hasLocalBlock(broadcast2BlockId2))
+      assertUpdateBlockInfoReportedForRemovingBlock(driverStore, broadcast2BlockId,
+        removedFromMemory = false, removedFromDisk = true)
+      assertUpdateBlockInfoReportedForRemovingBlock(driverStore, broadcast2BlockId2,
+        removedFromMemory = false, removedFromDisk = true)
+      assertUpdateBlockInfoReportedForRemovingBlock(executorStore, broadcast2BlockId,
+        removedFromMemory = false, removedFromDisk = true)
+      assertUpdateBlockInfoReportedForRemovingBlock(executorStore, broadcast2BlockId2,
+        removedFromMemory = false, removedFromDisk = true)
     }
     executorStore.stop()
     driverStore.stop()
   }
 
+  private def assertUpdateBlockInfoReportedForRemovingBlock(
+      store: BlockManager,
+      blockId: BlockId,
+      removedFromMemory: Boolean,
+      removedFromDisk: Boolean): Unit = {
+    def assertSizeReported(captor: ArgumentCaptor[Long], expectRemoved: Boolean): Unit = {
+      assert(captor.getAllValues().size() === 1)
+      if (expectRemoved) {
+        assert(captor.getValue() > 0)
+      } else {
+        assert(captor.getValue() === 0)
+      }
+    }
+
+    val memSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
+    val diskSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
+    verify(master).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
+      mc.eq(StorageLevel.NONE), memSizeCaptor.capture(), diskSizeCaptor.capture())
+    assertSizeReported(memSizeCaptor, removedFromMemory)
+    assertSizeReported(diskSizeCaptor, removedFromDisk)
+  }
+
+  private def assertUpdateBlockInfoNotReported(store: BlockManager, blockId: BlockId): Unit = {
+    verify(master, never()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
+      mc.eq(StorageLevel.NONE), mc.anyInt(), mc.anyInt())
+  }
+
   test("reregistration on heart beat") {
     val store = makeBlockManager(2000)
     val a1 = new Array[Byte](400)
@@ -419,7 +475,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     master.removeExecutor(store.blockManagerId.executorId)
     assert(master.getLocations("a1").size == 0, "a1 was not removed from master")
 
-    val reregister = !master.driverEndpoint.askSync[Boolean](
+    val reregister = !master.driverHeartbeatEndPoint.askSync[Boolean](
       BlockManagerHeartbeat(store.blockManagerId))
     assert(reregister)
   }
@@ -451,18 +507,18 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     for (i <- 1 to 100) {
       master.removeExecutor(store.blockManagerId.executorId)
       val t1 = new Thread {
-        override def run() {
+        override def run(): Unit = {
           store.putIterator(
             "a2", a2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
         }
       }
       val t2 = new Thread {
-        override def run() {
+        override def run(): Unit = {
           store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
         }
       }
       val t3 = new Thread {
-        override def run() {
+        override def run(): Unit = {
           store.reregister()
         }
       }
@@ -520,7 +576,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     when(bmMaster.getLocations(mc.any[BlockId])).thenReturn(Seq(bmId1, bmId2, bmId3))
 
     val blockManager = makeBlockManager(128, "exec", bmMaster)
-    val sortLocations = PrivateMethod[Seq[BlockManagerId]]('sortLocations)
+    val sortLocations = PrivateMethod[Seq[BlockManagerId]](Symbol("sortLocations"))
     val locations = blockManager invokePrivate sortLocations(bmMaster.getLocations("test"))
     assert(locations.map(_.host) === Seq(localHost, localHost, otherHost))
   }
@@ -543,7 +599,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val blockManager = makeBlockManager(128, "exec", bmMaster)
     blockManager.blockManagerId =
       BlockManagerId(SparkContext.DRIVER_IDENTIFIER, localHost, 1, Some(localRack))
-    val sortLocations = PrivateMethod[Seq[BlockManagerId]]('sortLocations)
+    val sortLocations = PrivateMethod[Seq[BlockManagerId]](Symbol("sortLocations"))
     val locations = blockManager invokePrivate sortLocations(bmMaster.getLocations("test"))
     assert(locations.map(_.host) === Seq(localHost, localHost, otherHost, otherHost, otherHost))
     assert(locations.flatMap(_.topologyInfo)
@@ -601,7 +657,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       // check getRemoteBytes
       val bytesViaStore1 = cleanBm.getRemoteBytes(blockId)
       assert(bytesViaStore1.isDefined)
-      val expectedContent = sameHostBm.getBlockData(blockId).nioByteBuffer().array()
+      val expectedContent = sameHostBm.getLocalBlockData(blockId).nioByteBuffer().array()
       assert(bytesViaStore1.get.toArray === expectedContent)
 
       // check getRemoteValues
@@ -1042,7 +1098,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val blockStatus = blockStatusOption.get
     assert((blockStatus.diskSize > 0) === !storageLevel.useMemory)
     assert((blockStatus.memSize > 0) === storageLevel.useMemory)
-    assert(blockManager.getBlockData(blockId).nioByteBuffer().array() === ser)
+    assert(blockManager.getLocalBlockData(blockId).nioByteBuffer().array() === ser)
   }
 
   Seq(
@@ -1640,6 +1696,16 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     assert(locs(blockIds(0)) == expectedLocs)
   }
 
+  test("SPARK-30594: Do not post SparkListenerBlockUpdated when updateBlockInfo returns false") {
+    // update block info for non-existent block manager
+    val updateInfo = UpdateBlockInfo(BlockManagerId("1", "host1", 100),
+      BlockId("test_1"), StorageLevel.MEMORY_ONLY, 1, 1)
+    val result = master.driverEndpoint.askSync[Boolean](updateInfo)
+
+    assert(!result)
+    verify(liveListenerBus, never()).post(SparkListenerBlockUpdated(BlockUpdatedInfo(updateInfo)))
+  }
+
   class MockBlockTransferService(val maxFailures: Int) extends BlockTransferService {
     var numCalls = 0
     var tempFileManager: DownloadFileManager = null
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index 0c4f3c48ef802..ccc525e854838 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.storage
 
 import java.io.{File, FileWriter}
-import java.util.UUID
 
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
@@ -33,14 +32,14 @@ class DiskBlockManagerSuite extends SparkFunSuite with BeforeAndAfterEach with B
 
   var diskBlockManager: DiskBlockManager = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     rootDir0 = Utils.createTempDir()
     rootDir1 = Utils.createTempDir()
     rootDirs = rootDir0.getAbsolutePath + "," + rootDir1.getAbsolutePath
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(rootDir0)
       Utils.deleteRecursively(rootDir1)
@@ -49,14 +48,14 @@ class DiskBlockManagerSuite extends SparkFunSuite with BeforeAndAfterEach with B
     }
   }
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
     val conf = testConf.clone
-    conf.set("spark.local.dir", rootDirs)
+    conf.set("spark.local.dir", rootDirs).set("spark.diskStore.subDirectories", "1")
     diskBlockManager = new DiskBlockManager(conf, deleteFilesOnStop = true)
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       diskBlockManager.stop()
     } finally {
@@ -86,9 +85,50 @@ class DiskBlockManagerSuite extends SparkFunSuite with BeforeAndAfterEach with B
     assert(diskBlockManager.getAllBlocks().isEmpty)
   }
 
-  def writeToFile(file: File, numBytes: Int) {
+  def writeToFile(file: File, numBytes: Int): Unit = {
     val writer = new FileWriter(file, true)
     for (i <- 0 until numBytes) writer.write(i)
     writer.close()
   }
+
+  test("temporary shuffle/local file should be able to handle disk failures") {
+    try {
+      // the following two lines pre-create subdirectories under each root dir of block manager
+      diskBlockManager.getFile("1")
+      diskBlockManager.getFile("2")
+
+      val tempShuffleFile1 = diskBlockManager.createTempShuffleBlock()._2
+      val tempLocalFile1 = diskBlockManager.createTempLocalBlock()._2
+      assert(tempShuffleFile1.exists(), "There are no bad disks, so temp shuffle file exists")
+      assert(tempLocalFile1.exists(), "There are no bad disks, so temp local file exists")
+
+      // partial disks damaged
+      rootDir0.setExecutable(false)
+      val tempShuffleFile2 = diskBlockManager.createTempShuffleBlock()._2
+      val tempLocalFile2 = diskBlockManager.createTempLocalBlock()._2
+      // It's possible that after 10 retries we still not able to find the healthy disk. we need to
+      // remove the flakiness of these two asserts
+      if (tempShuffleFile2.getParentFile.getParentFile.getParent === rootDir1.getAbsolutePath) {
+        assert(tempShuffleFile2.exists(),
+          "There is only one bad disk, so temp shuffle file should be created")
+      }
+      if (tempLocalFile2.getParentFile.getParentFile.getParent === rootDir1.getAbsolutePath) {
+        assert(tempLocalFile2.exists(),
+          "There is only one bad disk, so temp local file should be created")
+      }
+
+      // all disks damaged
+      rootDir1.setExecutable(false)
+      val tempShuffleFile3 = diskBlockManager.createTempShuffleBlock()._2
+      val tempLocalFile3 = diskBlockManager.createTempLocalBlock()._2
+      assert(!tempShuffleFile3.exists(),
+        "All disks are broken, so there should be no temp shuffle file created")
+      assert(!tempLocalFile3.exists(),
+        "All disks are broken, so there should be no temp local file created")
+    } finally {
+      rootDir0.setExecutable(true)
+      rootDir1.setExecutable(true)
+    }
+
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
index a7231411e81de..ccd7e4b62ad9e 100644
--- a/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/MemoryStoreSuite.scala
@@ -55,7 +55,7 @@ class MemoryStoreSuite
     super.beforeEach()
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
     System.setProperty("os.arch", "amd64")
-    val initialize = PrivateMethod[Unit]('initialize)
+    val initialize = PrivateMethod[Unit](Symbol("initialize"))
     SizeEstimator invokePrivate initialize()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala b/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
index 3dbc1c4b457a8..8177ef6e140b2 100644
--- a/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/PartiallySerializedBlockSuite.scala
@@ -43,9 +43,10 @@ class PartiallySerializedBlockSuite
   private val memoryStore = Mockito.mock(classOf[MemoryStore], Mockito.RETURNS_SMART_NULLS)
   private val serializerManager = new SerializerManager(new JavaSerializer(conf), conf)
 
-  private val getSerializationStream = PrivateMethod[SerializationStream]('serializationStream)
+  private val getSerializationStream =
+    PrivateMethod[SerializationStream](Symbol("serializationStream"))
   private val getRedirectableOutputStream =
-    PrivateMethod[RedirectableOutputStream]('redirectableOutputStream)
+    PrivateMethod[RedirectableOutputStream](Symbol("redirectableOutputStream"))
 
   override protected def beforeEach(): Unit = {
     super.beforeEach()
diff --git a/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala
index 56860b2e55709..74442c2966a72 100644
--- a/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.memory.MemoryMode.ON_HEAP
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index ed402440e74f1..45f47c7c49bca 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.storage
 import java.io._
 import java.nio.ByteBuffer
 import java.util.UUID
-import java.util.concurrent.Semaphore
+import java.util.concurrent.{CompletableFuture, Semaphore}
 
 import scala.concurrent.ExecutionContext.Implicits.global
 import scala.concurrent.Future
@@ -33,7 +33,7 @@ import org.scalatest.PrivateMethodTester
 import org.apache.spark.{SparkFunSuite, TaskContext}
 import org.apache.spark.network._
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
-import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager}
+import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager, ExternalBlockStoreClient}
 import org.apache.spark.network.util.LimitedInputStream
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util.Utils
@@ -65,6 +65,29 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     transfer
   }
 
+  private def initHostLocalDirManager(
+      blockManager: BlockManager,
+      hostLocalDirs: Map[String, Array[String]]): Unit = {
+    val mockExternalBlockStoreClient = mock(classOf[ExternalBlockStoreClient])
+    val hostLocalDirManager = new HostLocalDirManager(
+      futureExecutionContext = global,
+      cacheSize = 1,
+      externalBlockStoreClient = mockExternalBlockStoreClient,
+      host = "localhost",
+      externalShuffleServicePort = 7337)
+
+    when(blockManager.hostLocalDirManager).thenReturn(Some(hostLocalDirManager))
+    when(mockExternalBlockStoreClient.getHostLocalDirs(any(), any(), any(), any()))
+      .thenAnswer { invocation =>
+        val completableFuture = invocation.getArguments()(3)
+          .asInstanceOf[CompletableFuture[java.util.Map[String, Array[String]]]]
+        import scala.collection.JavaConverters._
+        completableFuture.complete(hostLocalDirs.asJava)
+      }
+
+    blockManager.hostLocalDirManager = Some(hostLocalDirManager)
+  }
+
   // Create a mock managed buffer for testing
   def createMockManagedBuffer(size: Int = 1): ManagedBuffer = {
     val mockManagedBuffer = mock(classOf[ManagedBuffer])
@@ -76,9 +99,24 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     mockManagedBuffer
   }
 
-  test("successful 3 local reads + 2 remote reads") {
+  def verifyBufferRelease(buffer: ManagedBuffer, inputStream: InputStream): Unit = {
+    // Note: ShuffleBlockFetcherIterator wraps input streams in a BufferReleasingInputStream
+    val wrappedInputStream = inputStream.asInstanceOf[BufferReleasingInputStream]
+    verify(buffer, times(0)).release()
+    val delegateAccess = PrivateMethod[InputStream](Symbol("delegate"))
+
+    verify(wrappedInputStream.invokePrivate(delegateAccess()), times(0)).close()
+    wrappedInputStream.close()
+    verify(buffer, times(1)).release()
+    verify(wrappedInputStream.invokePrivate(delegateAccess()), times(1)).close()
+    wrappedInputStream.close() // close should be idempotent
+    verify(buffer, times(1)).release()
+    verify(wrappedInputStream.invokePrivate(delegateAccess()), times(1)).close()
+  }
+
+  test("successful 3 local + 4 host local + 2 remote reads") {
     val blockManager = mock(classOf[BlockManager])
-    val localBmId = BlockManagerId("test-client", "test-client", 1)
+    val localBmId = BlockManagerId("test-local-client", "test-local-host", 1)
     doReturn(localBmId).when(blockManager).blockManagerId
 
     // Make sure blockManager.getBlockData would return the blocks
@@ -87,20 +125,38 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       ShuffleBlockId(0, 1, 0) -> createMockManagedBuffer(),
       ShuffleBlockId(0, 2, 0) -> createMockManagedBuffer())
     localBlocks.foreach { case (blockId, buf) =>
-      doReturn(buf).when(blockManager).getBlockData(meq(blockId))
+      doReturn(buf).when(blockManager).getLocalBlockData(meq(blockId))
     }
 
     // Make sure remote blocks would return
-    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val remoteBmId = BlockManagerId("test-remote-client-1", "test-remote-host", 2)
     val remoteBlocks = Map[BlockId, ManagedBuffer](
       ShuffleBlockId(0, 3, 0) -> createMockManagedBuffer(),
       ShuffleBlockId(0, 4, 0) -> createMockManagedBuffer())
 
     val transfer = createMockTransfer(remoteBlocks)
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (localBmId, localBlocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq),
-      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)
+    // Create a block manager running on the same host (host-local)
+    val hostLocalBmId = BlockManagerId("test-host-local-client-1", "test-local-host", 3)
+    val hostLocalBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 5, 0) -> createMockManagedBuffer(),
+      ShuffleBlockId(0, 6, 0) -> createMockManagedBuffer(),
+      ShuffleBlockId(0, 7, 0) -> createMockManagedBuffer(),
+      ShuffleBlockId(0, 8, 0) -> createMockManagedBuffer())
+
+    hostLocalBlocks.foreach { case (blockId, buf) =>
+      doReturn(buf)
+        .when(blockManager)
+        .getHostLocalShuffleData(meq(blockId.asInstanceOf[ShuffleBlockId]), any())
+    }
+    val hostLocalDirs = Map("test-host-local-client-1" -> Array("local-dir"))
+    // returning local dir for hostLocalBmId
+    initHostLocalDirManager(blockManager, hostLocalDirs)
+
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (localBmId, localBlocks.keys.map(blockId => (blockId, 1L, 0)).toSeq),
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 1L, 1)).toSeq),
+      (hostLocalBmId, hostLocalBlocks.keys.map(blockId => (blockId, 1L, 1)).toSeq)
     ).toIterator
 
     val taskContext = TaskContext.empty()
@@ -117,35 +173,229 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       true,
       false,
-      metrics)
+      metrics,
+      false)
 
     // 3 local blocks fetched in initialization
-    verify(blockManager, times(3)).getBlockData(any())
+    verify(blockManager, times(3)).getLocalBlockData(any())
 
-    for (i <- 0 until 5) {
-      assert(iterator.hasNext, s"iterator should have 5 elements but actually has $i elements")
+    val allBlocks = localBlocks ++ remoteBlocks ++ hostLocalBlocks
+    for (i <- 0 until allBlocks.size) {
+      assert(iterator.hasNext,
+        s"iterator should have ${allBlocks.size} elements but actually has $i elements")
       val (blockId, inputStream) = iterator.next()
 
       // Make sure we release buffers when a wrapped input stream is closed.
-      val mockBuf = localBlocks.getOrElse(blockId, remoteBlocks(blockId))
-      // Note: ShuffleBlockFetcherIterator wraps input streams in a BufferReleasingInputStream
-      val wrappedInputStream = inputStream.asInstanceOf[BufferReleasingInputStream]
-      verify(mockBuf, times(0)).release()
-      val delegateAccess = PrivateMethod[InputStream]('delegate)
-
-      verify(wrappedInputStream.invokePrivate(delegateAccess()), times(0)).close()
-      wrappedInputStream.close()
-      verify(mockBuf, times(1)).release()
-      verify(wrappedInputStream.invokePrivate(delegateAccess()), times(1)).close()
-      wrappedInputStream.close() // close should be idempotent
-      verify(mockBuf, times(1)).release()
-      verify(wrappedInputStream.invokePrivate(delegateAccess()), times(1)).close()
+      val mockBuf = allBlocks(blockId)
+      verifyBufferRelease(mockBuf, inputStream)
     }
 
-    // 3 local blocks, and 2 remote blocks
-    // (but from the same block manager so one call to fetchBlocks)
-    verify(blockManager, times(3)).getBlockData(any())
+    // 4 host-local locks fetched
+    verify(blockManager, times(4))
+      .getHostLocalShuffleData(any(), meq(Array("local-dir")))
+
+    // 2 remote blocks are read from the same block manager
     verify(transfer, times(1)).fetchBlocks(any(), any(), any(), any(), any(), any())
+    assert(blockManager.hostLocalDirManager.get.getCachedHostLocalDirs().size === 1)
+  }
+
+  test("error during accessing host local dirs for executors") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-local-client", "test-local-host", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+    val hostLocalBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 1, 0) -> createMockManagedBuffer())
+
+    hostLocalBlocks.foreach { case (blockId, buf) =>
+      doReturn(buf)
+        .when(blockManager)
+        .getHostLocalShuffleData(meq(blockId.asInstanceOf[ShuffleBlockId]), any())
+    }
+    val hostLocalBmId = BlockManagerId("test-host-local-client-1", "test-local-host", 3)
+
+    val mockExternalBlockStoreClient = mock(classOf[ExternalBlockStoreClient])
+    val hostLocalDirManager = new HostLocalDirManager(
+      futureExecutionContext = global,
+      cacheSize = 1,
+      externalBlockStoreClient = mockExternalBlockStoreClient,
+      host = "localhost",
+      externalShuffleServicePort = 7337)
+
+    when(blockManager.hostLocalDirManager).thenReturn(Some(hostLocalDirManager))
+    when(mockExternalBlockStoreClient.getHostLocalDirs(any(), any(), any(), any()))
+      .thenAnswer { invocation =>
+        val completableFuture = invocation.getArguments()(3)
+          .asInstanceOf[CompletableFuture[java.util.Map[String, Array[String]]]]
+        completableFuture.completeExceptionally(new Throwable("failed fetch"))
+      }
+
+    blockManager.hostLocalDirManager = Some(hostLocalDirManager)
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (hostLocalBmId, hostLocalBlocks.keys.map(blockId => (blockId, 1L, 1)).toSeq)
+    ).toIterator
+
+    val transfer = createMockTransfer(Map())
+    val taskContext = TaskContext.empty()
+    val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = new ShuffleBlockFetcherIterator(
+      taskContext,
+      transfer,
+      blockManager,
+      blocksByAddress,
+      (_, in) => in,
+      48 * 1024 * 1024,
+      Int.MaxValue,
+      Int.MaxValue,
+      Int.MaxValue,
+      true,
+      false,
+      metrics,
+      false)
+    intercept[FetchFailedException] { iterator.next() }
+  }
+
+  test("fetch continuous blocks in batch successful 3 local + 4 host local + 2 remote reads") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-local-host", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    // Make sure blockManager.getBlockData would return the merged block
+    val localBlocks = Seq[BlockId](
+      ShuffleBlockId(0, 0, 0),
+      ShuffleBlockId(0, 0, 1),
+      ShuffleBlockId(0, 0, 2))
+    val mergedLocalBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockBatchId(0, 0, 0, 3) -> createMockManagedBuffer())
+    mergedLocalBlocks.foreach { case (blockId, buf) =>
+      doReturn(buf).when(blockManager).getLocalBlockData(meq(blockId))
+    }
+
+    // Make sure remote blocks would return the merged block
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val remoteBlocks = Seq[BlockId](
+      ShuffleBlockId(0, 3, 0),
+      ShuffleBlockId(0, 3, 1))
+    val mergedRemoteBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockBatchId(0, 3, 0, 2) -> createMockManagedBuffer())
+    val transfer = createMockTransfer(mergedRemoteBlocks)
+
+     // Create a block manager running on the same host (host-local)
+    val hostLocalBmId = BlockManagerId("test-host-local-client-1", "test-local-host", 3)
+    val hostLocalBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 4, 0) -> createMockManagedBuffer(),
+      ShuffleBlockId(0, 4, 1) -> createMockManagedBuffer(),
+      ShuffleBlockId(0, 4, 2) -> createMockManagedBuffer(),
+      ShuffleBlockId(0, 4, 3) -> createMockManagedBuffer())
+    val mergedHostLocalBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockBatchId(0, 4, 0, 4) -> createMockManagedBuffer())
+
+    mergedHostLocalBlocks.foreach { case (blockId, buf) =>
+      doReturn(buf)
+        .when(blockManager)
+        .getHostLocalShuffleData(meq(blockId.asInstanceOf[ShuffleBlockBatchId]), any())
+    }
+    val hostLocalDirs = Map("test-host-local-client-1" -> Array("local-dir"))
+    // returning local dir for hostLocalBmId
+    initHostLocalDirManager(blockManager, hostLocalDirs)
+
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (localBmId, localBlocks.map(blockId => (blockId, 1L, 0))),
+      (remoteBmId, remoteBlocks.map(blockId => (blockId, 1L, 1))),
+      (hostLocalBmId, hostLocalBlocks.keys.map(blockId => (blockId, 1L, 1)).toSeq)
+    ).toIterator
+
+    val taskContext = TaskContext.empty()
+    val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = new ShuffleBlockFetcherIterator(
+      taskContext,
+      transfer,
+      blockManager,
+      blocksByAddress,
+      (_, in) => in,
+      48 * 1024 * 1024,
+      Int.MaxValue,
+      Int.MaxValue,
+      Int.MaxValue,
+      true,
+      false,
+      metrics,
+      true)
+
+    // 3 local blocks batch fetched in initialization
+    verify(blockManager, times(1)).getLocalBlockData(any())
+
+    val allBlocks = mergedLocalBlocks ++ mergedRemoteBlocks ++ mergedHostLocalBlocks
+    for (i <- 0 until 3) {
+      assert(iterator.hasNext, s"iterator should have 3 elements but actually has $i elements")
+      val (blockId, inputStream) = iterator.next()
+      verify(transfer, times(1)).fetchBlocks(any(), any(), any(), any(), any(), any())
+      // Make sure we release buffers when a wrapped input stream is closed.
+      val mockBuf = allBlocks(blockId)
+      verifyBufferRelease(mockBuf, inputStream)
+    }
+
+    // 4 host-local locks fetched
+    verify(blockManager, times(1))
+      .getHostLocalShuffleData(any(), meq(Array("local-dir")))
+
+    assert(blockManager.hostLocalDirManager.get.getCachedHostLocalDirs().size === 1)
+  }
+
+  test("fetch continuous blocks in batch respects maxSize and maxBlocks") {
+    val blockManager = mock(classOf[BlockManager])
+    val localBmId = BlockManagerId("test-client", "test-local-host", 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+
+    // Make sure remote blocks would return the merged block
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val remoteBlocks = Seq[BlockId](
+      ShuffleBlockId(0, 3, 0),
+      ShuffleBlockId(0, 3, 1),
+      ShuffleBlockId(0, 3, 2),
+      ShuffleBlockId(0, 4, 0),
+      ShuffleBlockId(0, 4, 1),
+      ShuffleBlockId(0, 5, 0),
+      ShuffleBlockId(0, 5, 1),
+      ShuffleBlockId(0, 5, 2))
+    val mergedRemoteBlocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockBatchId(0, 3, 0, 3) -> createMockManagedBuffer(),
+      ShuffleBlockBatchId(0, 4, 0, 2) -> createMockManagedBuffer(),
+      ShuffleBlockBatchId(0, 5, 0, 3) -> createMockManagedBuffer())
+    val transfer = createMockTransfer(mergedRemoteBlocks)
+
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, remoteBlocks.map(blockId => (blockId, 1L, 1)))
+    ).toIterator
+
+    val taskContext = TaskContext.empty()
+    val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = new ShuffleBlockFetcherIterator(
+      taskContext,
+      transfer,
+      blockManager,
+      blocksByAddress,
+      (_, in) => in,
+      35,
+      Int.MaxValue,
+      2,
+      Int.MaxValue,
+      true,
+      false,
+      metrics,
+      true)
+
+    var numResults = 0
+    while (iterator.hasNext) {
+      val (blockId, inputStream) = iterator.next()
+      // Make sure we release buffers when a wrapped input stream is closed.
+      val mockBuf = mergedRemoteBlocks(blockId)
+      verifyBufferRelease(mockBuf, inputStream)
+      numResults += 1
+    }
+    // The first 2 batch block ids are in the same fetch request as they don't exceed the max size
+    // and max blocks, so 2 requests in total.
+    verify(transfer, times(2)).fetchBlocks(any(), any(), any(), any(), any(), any())
+    assert(numResults == 3)
   }
 
   test("release current unexhausted buffer in case the task completes early") {
@@ -179,8 +429,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         }
       })
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq)).toIterator
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
@@ -195,7 +445,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       true,
       false,
-      taskContext.taskMetrics.createTempShuffleReadMetrics())
+      taskContext.taskMetrics.createTempShuffleReadMetrics(),
+      false)
 
     verify(blocks(ShuffleBlockId(0, 0, 0)), times(0)).release()
     iterator.next()._2.close() // close() first block's input stream
@@ -247,8 +498,9 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         }
       })
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq))
+      .toIterator
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
@@ -263,7 +515,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       true,
       false,
-      taskContext.taskMetrics.createTempShuffleReadMetrics())
+      taskContext.taskMetrics.createTempShuffleReadMetrics(),
+      false)
 
     // Continue only after the mock calls onBlockFetchFailure
     sem.acquire()
@@ -336,8 +589,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         }
       })
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq)).toIterator
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
@@ -352,7 +605,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       true,
       true,
-      taskContext.taskMetrics.createTempShuffleReadMetrics())
+      taskContext.taskMetrics.createTempShuffleReadMetrics(),
+      false)
 
     // Continue only after the mock calls onBlockFetchFailure
     sem.acquire()
@@ -389,8 +643,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val corruptBuffer1 = mockCorruptBuffer(streamLength, 0)
     val blockManagerId1 = BlockManagerId("remote-client-1", "remote-client-1", 1)
     val shuffleBlockId1 = ShuffleBlockId(0, 1, 0)
-    val blockLengths1 = Seq[Tuple2[BlockId, Long]](
-      shuffleBlockId1 -> corruptBuffer1.size()
+    val blockLengths1 = Seq[Tuple3[BlockId, Long, Int]](
+      (shuffleBlockId1, corruptBuffer1.size(), 1)
     )
 
     val streamNotCorruptTill = 8 * 1024
@@ -398,13 +652,13 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val corruptBuffer2 = mockCorruptBuffer(streamLength, streamNotCorruptTill)
     val blockManagerId2 = BlockManagerId("remote-client-2", "remote-client-2", 2)
     val shuffleBlockId2 = ShuffleBlockId(0, 2, 0)
-    val blockLengths2 = Seq[Tuple2[BlockId, Long]](
-      shuffleBlockId2 -> corruptBuffer2.size()
+    val blockLengths2 = Seq[Tuple3[BlockId, Long, Int]](
+      (shuffleBlockId2, corruptBuffer2.size(), 2)
     )
 
     val transfer = createMockTransfer(
       Map(shuffleBlockId1 -> corruptBuffer1, shuffleBlockId2 -> corruptBuffer2))
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
       (blockManagerId1, blockLengths1),
       (blockManagerId2, blockLengths2)
     ).toIterator
@@ -422,7 +676,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       true,
       true,
-      taskContext.taskMetrics.createTempShuffleReadMetrics())
+      taskContext.taskMetrics.createTempShuffleReadMetrics(),
+      false)
 
     // We'll get back the block which has corruption after maxBytesInFlight/3 because the other
     // block will detect corruption on first fetch, and then get added to the queue again for
@@ -464,12 +719,12 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val blockManager = mock(classOf[BlockManager])
     val localBmId = BlockManagerId("test-client", "test-client", 1)
     doReturn(localBmId).when(blockManager).blockManagerId
-    doReturn(managedBuffer).when(blockManager).getBlockData(ShuffleBlockId(0, 0, 0))
-    val localBlockLengths = Seq[Tuple2[BlockId, Long]](
-      ShuffleBlockId(0, 0, 0) -> 10000
+    doReturn(managedBuffer).when(blockManager).getLocalBlockData(meq(ShuffleBlockId(0, 0, 0)))
+    val localBlockLengths = Seq[Tuple3[BlockId, Long, Int]](
+      (ShuffleBlockId(0, 0, 0), 10000, 0)
     )
     val transfer = createMockTransfer(Map(ShuffleBlockId(0, 0, 0) -> managedBuffer))
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
       (localBmId, localBlockLengths)
     ).toIterator
 
@@ -486,7 +741,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       true,
       true,
-      taskContext.taskMetrics.createTempShuffleReadMetrics())
+      taskContext.taskMetrics.createTempShuffleReadMetrics(),
+      false)
     val (id, st) = iterator.next()
     // Check that the test setup is correct -- make sure we have a concatenated stream.
     assert (st.asInstanceOf[BufferReleasingInputStream].delegate.isInstanceOf[SequenceInputStream])
@@ -531,8 +787,9 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         }
       })
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq))
+      .toIterator
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
@@ -547,7 +804,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       true,
       false,
-      taskContext.taskMetrics.createTempShuffleReadMetrics())
+      taskContext.taskMetrics.createTempShuffleReadMetrics(),
+      false)
 
     // Continue only after the mock calls onBlockFetchFailure
     sem.acquire()
@@ -591,7 +849,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       })
 
     def fetchShuffleBlock(
-        blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long)])]): Unit = {
+        blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]): Unit = {
       // Set `maxBytesInFlight` and `maxReqsInFlight` to `Int.MaxValue`, so that during the
       // construction of `ShuffleBlockFetcherIterator`, all requests to fetch remote shuffle blocks
       // are issued. The `maxReqSizeShuffleToMem` is hard-coded as 200 here.
@@ -608,18 +866,19 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         maxReqSizeShuffleToMem = 200,
         detectCorrupt = true,
         false,
-        taskContext.taskMetrics.createTempShuffleReadMetrics())
+        taskContext.taskMetrics.createTempShuffleReadMetrics(),
+        false)
     }
 
-    val blocksByAddress1 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 100L)).toSeq)).toIterator
+    val blocksByAddress1 = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 100L, 0)).toSeq)).toIterator
     fetchShuffleBlock(blocksByAddress1)
     // `maxReqSizeShuffleToMem` is 200, which is greater than the block size 100, so don't fetch
     // shuffle block to disk.
     assert(tempFileManager == null)
 
-    val blocksByAddress2 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 300L)).toSeq)).toIterator
+    val blocksByAddress2 = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 300L, 0)).toSeq)).toIterator
     fetchShuffleBlock(blocksByAddress2)
     // `maxReqSizeShuffleToMem` is 200, which is smaller than the block size 300, so fetch
     // shuffle block to disk.
@@ -640,8 +899,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     val transfer = createMockTransfer(blocks.mapValues(_ => createMockManagedBuffer(0)))
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq))
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq))
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
@@ -656,7 +915,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       Int.MaxValue,
       true,
       false,
-      taskContext.taskMetrics.createTempShuffleReadMetrics())
+      taskContext.taskMetrics.createTempShuffleReadMetrics(),
+      false)
 
     // All blocks fetched return zero length and should trigger a receive-side error:
     val e = intercept[FetchFailedException] { iterator.next() }
diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
index bd18e9e628da8..7711934cbe8a6 100644
--- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
@@ -27,6 +27,7 @@ import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 import org.apache.spark._
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.config.Status._
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler._
 import org.apache.spark.status.AppStatusStore
 import org.apache.spark.status.api.v1.{AccumulableInfo => UIAccumulableInfo, StageData, StageStatus}
@@ -131,7 +132,8 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext {
       val page = new StagePage(tab, statusStore)
 
       // Simulate a stage in job progress listener
-      val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details")
+      val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
       // Simulate two tasks to test PEAK_EXECUTION_MEMORY correctness
       (1 to 2).foreach {
         taskId =>
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index 1913b8d425519..9f0cdeac9ca39 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ui
 
-import java.net.{HttpURLConnection, URL}
+import java.net.URL
 import java.util.Locale
 import javax.servlet.http.{HttpServletRequest, HttpServletResponse}
 
@@ -31,8 +31,8 @@ import org.openqa.selenium.{By, WebDriver}
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.scalatest._
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.selenium.WebBrowser
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.selenium.WebBrowser
 import org.w3c.css.sac.CSSParseException
 
 import org.apache.spark._
@@ -233,7 +233,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
   test("spark.ui.killEnabled should properly control kill button display") {
     def hasKillLink: Boolean = find(className("kill-link")).isDefined
-    def runSlowJob(sc: SparkContext) {
+    def runSlowJob(sc: SparkContext): Unit = {
       sc.parallelize(1 to 10).map{x => Thread.sleep(10000); x}.countAsync()
     }
 
@@ -316,10 +316,12 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
           val env = SparkEnv.get
           val bmAddress = env.blockManager.blockManagerId
           val shuffleId = shuffleHandle.shuffleId
-          val mapId = 0
+          val mapId = 0L
+          val mapIndex = 0
           val reduceId = taskContext.partitionId()
           val message = "Simulated fetch failure"
-          throw new FetchFailedException(bmAddress, shuffleId, mapId, reduceId, message)
+          throw new FetchFailedException(
+            bmAddress, shuffleId, mapId, mapIndex, reduceId, message)
         } else {
           x
         }
@@ -754,6 +756,22 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     }
   }
 
+  test("description for empty jobs") {
+    withSpark(newSparkContext()) { sc =>
+      sc.emptyRDD[Int].collect
+      val description = "This is my job"
+      sc.setJobDescription(description)
+      sc.emptyRDD[Int].collect
+
+      eventually(timeout(10.seconds), interval(50.milliseconds)) {
+        goToUi(sc, "/jobs")
+        val descriptions = findAll(className("description-input")).toArray
+        descriptions(0).text should be (description)
+        descriptions(1).text should include ("collect")
+      }
+    }
+  }
+
   def goToUi(sc: SparkContext, path: String): Unit = {
     goToUi(sc.ui.get, path)
   }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 34fd218437f87..2ad4a634cd9a7 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -32,7 +32,7 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
 import org.apache.spark.LocalSparkContext._
-import org.apache.spark.internal.config.UI.UI_ENABLED
+import org.apache.spark.internal.config.UI
 import org.apache.spark.util.Utils
 
 class UISuite extends SparkFunSuite {
@@ -45,7 +45,7 @@ class UISuite extends SparkFunSuite {
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("test")
-      .set(UI_ENABLED, true)
+      .set(UI.UI_ENABLED, true)
     val sc = new SparkContext(conf)
     assert(sc.ui.isDefined)
     sc
@@ -273,7 +273,6 @@ class UISuite extends SparkFunSuite {
 
       val (_, testContext) = newContext("/test2")
       serverInfo.addHandler(testContext, securityMgr)
-      testContext.start()
 
       val httpPort = serverInfo.boundPort
 
@@ -318,6 +317,54 @@ class UISuite extends SparkFunSuite {
     }
   }
 
+  test("redirect with proxy server support") {
+    val proxyRoot = "https://proxy.example.com:443/prefix"
+    val (conf, securityMgr, sslOptions) = sslDisabledConf()
+    conf.set(UI.PROXY_REDIRECT_URI, proxyRoot)
+
+    val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
+    try {
+      val serverAddr = s"http://localhost:${serverInfo.boundPort}"
+
+      val (_, ctx) = newContext("/ctx1")
+      serverInfo.addHandler(ctx, securityMgr)
+
+      val redirect = JettyUtils.createRedirectHandler("/src", "/dst")
+      serverInfo.addHandler(redirect, securityMgr)
+
+      // Test Jetty's built-in redirect to add the trailing slash to the context path.
+      TestUtils.withHttpConnection(new URL(s"$serverAddr/ctx1")) { conn =>
+        assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
+        val location = Option(conn.getHeaderFields().get("Location"))
+          .map(_.get(0)).orNull
+        assert(location === s"$proxyRoot/ctx1/")
+      }
+
+      // Test with a URL handled by the added redirect handler, and also including a path prefix.
+      val headers = Seq("X-Forwarded-Context" -> "/prefix")
+      TestUtils.withHttpConnection(
+          new URL(s"$serverAddr/src/"),
+          headers = headers) { conn =>
+        assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
+        val location = Option(conn.getHeaderFields().get("Location"))
+          .map(_.get(0)).orNull
+        assert(location === s"$proxyRoot/prefix/dst")
+      }
+
+      // Not really used by Spark, but test with a relative redirect.
+      val relative = JettyUtils.createRedirectHandler("/rel", "root")
+      serverInfo.addHandler(relative, securityMgr)
+      TestUtils.withHttpConnection(new URL(s"$serverAddr/rel/")) { conn =>
+        assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
+        val location = Option(conn.getHeaderFields().get("Location"))
+          .map(_.get(0)).orNull
+        assert(location === s"$proxyRoot/rel/root")
+      }
+    } finally {
+      stopServer(serverInfo)
+    }
+  }
+
   /**
    * Create a new context handler for the given path, with a single servlet that responds to
    * requests in `$path/root`.
diff --git a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
index de105b6f188f5..82773e3cc6860 100644
--- a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ui
 
 import scala.xml.{Node, Text}
+import scala.xml.Utility.trim
 
 import org.apache.spark.SparkFunSuite
 
@@ -129,6 +130,55 @@ class UIUtilsSuite extends SparkFunSuite {
     assert(decoded1 === decodeURLParameter(decoded1))
   }
 
+  test("listingTable with tooltips") {
+
+    def generateDataRowValue: String => Seq[Node] = row => <a>{row}</a>
+    val header = Seq("Header1", "Header2")
+    val data = Seq("Data1", "Data2")
+    val tooltip = Seq(None, Some("tooltip"))
+
+    val generated = listingTable(header, generateDataRowValue, data, tooltipHeaders = tooltip)
+
+    val expected: Node =
+      <table class="table table-bordered table-condensed table-striped sortable">
+        <thead>
+          <th width="" class="">{header(0)}</th>
+          <th width="" class="">
+              <span data-toggle="tooltip" title="tooltip">
+                {header(1)}
+              </span>
+          </th>
+        </thead>
+      <tbody>
+        {data.map(generateDataRowValue)}
+      </tbody>
+    </table>
+
+    assert(trim(generated(0)) == trim(expected))
+  }
+
+  test("listingTable without tooltips") {
+
+    def generateDataRowValue: String => Seq[Node] = row => <a>{row}</a>
+    val header = Seq("Header1", "Header2")
+    val data = Seq("Data1", "Data2")
+
+    val generated = listingTable(header, generateDataRowValue, data)
+
+    val expected =
+      <table class="table table-bordered table-condensed table-striped sortable">
+        <thead>
+          <th width="" class="">{header(0)}</th>
+          <th width="" class="">{header(1)}</th>
+        </thead>
+        <tbody>
+          {data.map(generateDataRowValue)}
+        </tbody>
+      </table>
+
+    assert(trim(generated(0)) == trim(expected))
+  }
+
   private def verify(
       desc: String,
       expected: Node,
diff --git a/core/src/test/scala/org/apache/spark/ui/storage/StoragePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/storage/StoragePageSuite.scala
index 06f01a60868f9..f93ecd3b006b2 100644
--- a/core/src/test/scala/org/apache/spark/ui/storage/StoragePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/storage/StoragePageSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ui.storage
 import javax.servlet.http.HttpServletRequest
 
 import org.mockito.Mockito._
+import scala.xml.{Node, Text}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.status.StreamBlockData
@@ -74,7 +75,21 @@ class StoragePageSuite extends SparkFunSuite {
       "Fraction Cached",
       "Size in Memory",
       "Size on Disk")
-    assert((xmlNodes \\ "th").map(_.text) === headers)
+
+    val headerRow: Seq[Node] = {
+      headers.view.zipWithIndex.map { x =>
+        storagePage.tooltips(x._2) match {
+          case Some(tooltip) =>
+            <th width={""} class={""}>
+              <span data-toggle="tooltip" title={tooltip}>
+                {Text(x._1)}
+              </span>
+            </th>
+          case None => <th width={""} class={""}>{Text(x._1)}</th>
+        }
+      }.toList
+    }
+    assert((xmlNodes \\ "th").map(_.text) === headerRow.map(_.text))
 
     assert((xmlNodes \\ "tr").size === 3)
     assert(((xmlNodes \\ "tr")(0) \\ "td").map(_.text.trim) ===
diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
index 5e08a3dc1181d..b0520c7ab1b1f 100644
--- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala
@@ -299,7 +299,7 @@ private object TestUserClosuresActuallyCleaned {
     rdd.aggregateByKey(0)({ case (_, _) => return; 1 }, { case (_, _) => return; 1 }).count()
   }
   def testFoldByKey(rdd: RDD[(Int, Int)]): Unit = { rdd.foldByKey(0) { case (_, _) => return; 1 } }
-  def testReduceByKey(rdd: RDD[(Int, Int)]): Unit = { rdd.reduceByKey { case (_, _) => return; 1 } }
+  def testReduceByKey(rdd: RDD[(Int, Int)]): Unit = { rdd.reduceByKey { (_, _) => return; 1 } }
   def testReduceByKeyLocally(rdd: RDD[(Int, Int)]): Unit = {
     rdd.reduceByKeyLocally { case (_, _) => return; 1 }
   }
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index f5f93ece660b8..21e69550785a4 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -356,7 +356,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
   }
 
   /** Delete all the generated rolled over files */
-  def cleanup() {
+  def cleanup(): Unit = {
     testFile.getParentFile.listFiles.filter { file =>
       file.getName.startsWith(testFile.getName)
     }.foreach { _.delete() }
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index e781c5f71faf4..edc0662a0f73e 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -32,8 +32,7 @@ import org.apache.spark._
 import org.apache.spark.executor._
 import org.apache.spark.metrics.ExecutorMetricType
 import org.apache.spark.rdd.RDDOperationScope
-import org.apache.spark.resource.ResourceInformation
-import org.apache.spark.resource.ResourceUtils
+import org.apache.spark.resource.{ResourceInformation, ResourceProfile, ResourceUtils}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.shuffle.MetadataFetchFailedException
@@ -179,7 +178,7 @@ class JsonProtocolSuite extends SparkFunSuite {
     testJobResult(jobFailed)
 
     // TaskEndReason
-    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L, 18, 19,
       "Some exception")
     val fetchMetadataFailed = new MetadataFetchFailedException(17,
       19, "metadata Fetch failed exception").toTaskFailedReason
@@ -296,12 +295,12 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("FetchFailed backwards compatibility") {
     // FetchFailed in Spark 1.1.0 does not have a "Message" property.
-    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L, 18, 19,
       "ignored")
     val oldEvent = JsonProtocol.taskEndReasonToJson(fetchFailed)
       .removeField({ _._1 == "Message" })
-    val expectedFetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
-      "Unknown reason")
+    val expectedFetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L,
+      18, 19, "Unknown reason")
     assert(expectedFetchFailed === JsonProtocol.taskEndReasonFromJson(oldEvent))
   }
 
@@ -341,7 +340,8 @@ class JsonProtocolSuite extends SparkFunSuite {
     val stageIds = Seq[Int](1, 2, 3, 4)
     val stageInfos = stageIds.map(x => makeStageInfo(x, x * 200, x * 300, x * 400L, x * 500L))
     val dummyStageInfos =
-      stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown"))
+      stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown",
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     val jobStart = SparkListenerJobStart(10, jobSubmissionTime, stageInfos, properties)
     val oldEvent = JsonProtocol.jobStartToJson(jobStart).removeField({_._1 == "Stage Infos"})
     val expectedJobStart =
@@ -383,9 +383,11 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("StageInfo backward compatibility (parent IDs)") {
     // Prior to Spark 1.4.0, StageInfo did not have the "Parent IDs" property
-    val stageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq(1, 2, 3), "details")
+    val stageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq(1, 2, 3), "details",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     val oldStageInfo = JsonProtocol.stageInfoToJson(stageInfo).removeField({ _._1 == "Parent IDs"})
-    val expectedStageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq.empty, "details")
+    val expectedStageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq.empty, "details",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     assertEquals(expectedStageInfo, JsonProtocol.stageInfoFromJson(oldStageInfo))
   }
 
@@ -496,59 +498,59 @@ private[spark] object JsonProtocolSuite extends Assertions {
   private val nodeBlacklistedTime = 1421458952000L
   private val nodeUnblacklistedTime = 1421458962000L
 
-  private def testEvent(event: SparkListenerEvent, jsonString: String) {
+  private def testEvent(event: SparkListenerEvent, jsonString: String): Unit = {
     val actualJsonString = compact(render(JsonProtocol.sparkEventToJson(event)))
     val newEvent = JsonProtocol.sparkEventFromJson(parse(actualJsonString))
     assertJsonStringEquals(jsonString, actualJsonString, event.getClass.getSimpleName)
     assertEquals(event, newEvent)
   }
 
-  private def testRDDInfo(info: RDDInfo) {
+  private def testRDDInfo(info: RDDInfo): Unit = {
     val newInfo = JsonProtocol.rddInfoFromJson(JsonProtocol.rddInfoToJson(info))
     assertEquals(info, newInfo)
   }
 
-  private def testStageInfo(info: StageInfo) {
+  private def testStageInfo(info: StageInfo): Unit = {
     val newInfo = JsonProtocol.stageInfoFromJson(JsonProtocol.stageInfoToJson(info))
     assertEquals(info, newInfo)
   }
 
-  private def testStorageLevel(level: StorageLevel) {
+  private def testStorageLevel(level: StorageLevel): Unit = {
     val newLevel = JsonProtocol.storageLevelFromJson(JsonProtocol.storageLevelToJson(level))
     assertEquals(level, newLevel)
   }
 
-  private def testTaskMetrics(metrics: TaskMetrics) {
+  private def testTaskMetrics(metrics: TaskMetrics): Unit = {
     val newMetrics = JsonProtocol.taskMetricsFromJson(JsonProtocol.taskMetricsToJson(metrics))
     assertEquals(metrics, newMetrics)
   }
 
-  private def testBlockManagerId(id: BlockManagerId) {
+  private def testBlockManagerId(id: BlockManagerId): Unit = {
     val newId = JsonProtocol.blockManagerIdFromJson(JsonProtocol.blockManagerIdToJson(id))
     assert(id === newId)
   }
 
-  private def testTaskInfo(info: TaskInfo) {
+  private def testTaskInfo(info: TaskInfo): Unit = {
     val newInfo = JsonProtocol.taskInfoFromJson(JsonProtocol.taskInfoToJson(info))
     assertEquals(info, newInfo)
   }
 
-  private def testJobResult(result: JobResult) {
+  private def testJobResult(result: JobResult): Unit = {
     val newResult = JsonProtocol.jobResultFromJson(JsonProtocol.jobResultToJson(result))
     assertEquals(result, newResult)
   }
 
-  private def testTaskEndReason(reason: TaskEndReason) {
+  private def testTaskEndReason(reason: TaskEndReason): Unit = {
     val newReason = JsonProtocol.taskEndReasonFromJson(JsonProtocol.taskEndReasonToJson(reason))
     assertEquals(reason, newReason)
   }
 
-  private def testBlockId(blockId: BlockId) {
+  private def testBlockId(blockId: BlockId): Unit = {
     val newBlockId = BlockId(blockId.toString)
     assert(blockId === newBlockId)
   }
 
-  private def testExecutorInfo(info: ExecutorInfo) {
+  private def testExecutorInfo(info: ExecutorInfo): Unit = {
     val newInfo = JsonProtocol.executorInfoFromJson(JsonProtocol.executorInfoToJson(info))
     assertEquals(info, newInfo)
   }
@@ -565,7 +567,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
    | Util methods for comparing events |
    * --------------------------------- */
 
-  private[spark] def assertEquals(event1: SparkListenerEvent, event2: SparkListenerEvent) {
+  private[spark] def assertEquals(event1: SparkListenerEvent, event2: SparkListenerEvent): Unit = {
     (event1, event2) match {
       case (e1: SparkListenerStageSubmitted, e2: SparkListenerStageSubmitted) =>
         assert(e1.properties === e2.properties)
@@ -633,7 +635,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertEquals(info1: StageInfo, info2: StageInfo) {
+  private def assertEquals(info1: StageInfo, info2: StageInfo): Unit = {
     assert(info1.stageId === info2.stageId)
     assert(info1.name === info2.name)
     assert(info1.numTasks === info2.numTasks)
@@ -647,7 +649,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assert(info1.details === info2.details)
   }
 
-  private def assertEquals(info1: RDDInfo, info2: RDDInfo) {
+  private def assertEquals(info1: RDDInfo, info2: RDDInfo): Unit = {
     assert(info1.id === info2.id)
     assert(info1.name === info2.name)
     assert(info1.numPartitions === info2.numPartitions)
@@ -657,14 +659,14 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assertEquals(info1.storageLevel, info2.storageLevel)
   }
 
-  private def assertEquals(level1: StorageLevel, level2: StorageLevel) {
+  private def assertEquals(level1: StorageLevel, level2: StorageLevel): Unit = {
     assert(level1.useDisk === level2.useDisk)
     assert(level1.useMemory === level2.useMemory)
     assert(level1.deserialized === level2.deserialized)
     assert(level1.replication === level2.replication)
   }
 
-  private def assertEquals(info1: TaskInfo, info2: TaskInfo) {
+  private def assertEquals(info1: TaskInfo, info2: TaskInfo): Unit = {
     assert(info1.taskId === info2.taskId)
     assert(info1.index === info2.index)
     assert(info1.attemptNumber === info2.attemptNumber)
@@ -679,12 +681,12 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assert(info1.accumulables === info2.accumulables)
   }
 
-  private def assertEquals(info1: ExecutorInfo, info2: ExecutorInfo) {
+  private def assertEquals(info1: ExecutorInfo, info2: ExecutorInfo): Unit = {
     assert(info1.executorHost == info2.executorHost)
     assert(info1.totalCores == info2.totalCores)
   }
 
-  private def assertEquals(metrics1: TaskMetrics, metrics2: TaskMetrics) {
+  private def assertEquals(metrics1: TaskMetrics, metrics2: TaskMetrics): Unit = {
     assert(metrics1.executorDeserializeTime === metrics2.executorDeserializeTime)
     assert(metrics1.executorDeserializeCpuTime === metrics2.executorDeserializeCpuTime)
     assert(metrics1.executorRunTime === metrics2.executorRunTime)
@@ -700,23 +702,23 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assertBlocksEquals(metrics1.updatedBlockStatuses, metrics2.updatedBlockStatuses)
   }
 
-  private def assertEquals(metrics1: ShuffleReadMetrics, metrics2: ShuffleReadMetrics) {
+  private def assertEquals(metrics1: ShuffleReadMetrics, metrics2: ShuffleReadMetrics): Unit = {
     assert(metrics1.remoteBlocksFetched === metrics2.remoteBlocksFetched)
     assert(metrics1.localBlocksFetched === metrics2.localBlocksFetched)
     assert(metrics1.fetchWaitTime === metrics2.fetchWaitTime)
     assert(metrics1.remoteBytesRead === metrics2.remoteBytesRead)
   }
 
-  private def assertEquals(metrics1: ShuffleWriteMetrics, metrics2: ShuffleWriteMetrics) {
+  private def assertEquals(metrics1: ShuffleWriteMetrics, metrics2: ShuffleWriteMetrics): Unit = {
     assert(metrics1.bytesWritten === metrics2.bytesWritten)
     assert(metrics1.writeTime === metrics2.writeTime)
   }
 
-  private def assertEquals(metrics1: InputMetrics, metrics2: InputMetrics) {
+  private def assertEquals(metrics1: InputMetrics, metrics2: InputMetrics): Unit = {
     assert(metrics1.bytesRead === metrics2.bytesRead)
   }
 
-  private def assertEquals(result1: JobResult, result2: JobResult) {
+  private def assertEquals(result1: JobResult, result2: JobResult): Unit = {
     (result1, result2) match {
       case (JobSucceeded, JobSucceeded) =>
       case (r1: JobFailed, r2: JobFailed) =>
@@ -725,13 +727,14 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertEquals(reason1: TaskEndReason, reason2: TaskEndReason) {
+  private def assertEquals(reason1: TaskEndReason, reason2: TaskEndReason): Unit = {
     (reason1, reason2) match {
       case (Success, Success) =>
       case (Resubmitted, Resubmitted) =>
       case (r1: FetchFailed, r2: FetchFailed) =>
         assert(r1.shuffleId === r2.shuffleId)
         assert(r1.mapId === r2.mapId)
+        assert(r1.mapIndex === r2.mapIndex)
         assert(r1.reduceId === r2.reduceId)
         assert(r1.bmAddress === r2.bmAddress)
         assert(r1.message === r2.message)
@@ -761,7 +764,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
 
   private def assertEquals(
       details1: Map[String, Seq[(String, String)]],
-      details2: Map[String, Seq[(String, String)]]) {
+      details2: Map[String, Seq[(String, String)]]): Unit = {
     details1.zip(details2).foreach {
       case ((key1, values1: Seq[(String, String)]), (key2, values2: Seq[(String, String)])) =>
         assert(key1 === key2)
@@ -769,7 +772,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertEquals(exception1: Exception, exception2: Exception) {
+  private def assertEquals(exception1: Exception, exception2: Exception): Unit = {
     assert(exception1.getMessage === exception2.getMessage)
     assertSeqEquals(
       exception1.getStackTrace,
@@ -783,7 +786,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertJsonStringEquals(expected: String, actual: String, metadata: String) {
+  private def assertJsonStringEquals(expected: String, actual: String, metadata: String): Unit = {
     val expectedJson = parse(expected)
     val actualJson = parse(actual)
     if (expectedJson != actualJson) {
@@ -796,7 +799,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertSeqEquals[T](seq1: Seq[T], seq2: Seq[T], assertEquals: (T, T) => Unit) {
+  private def assertSeqEquals[T](seq1: Seq[T], seq2: Seq[T], assertEquals: (T, T) => Unit): Unit = {
     assert(seq1.length === seq2.length)
     seq1.zip(seq2).foreach { case (t1, t2) =>
       assertEquals(t1, t2)
@@ -806,7 +809,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
   private def assertOptionEquals[T](
       opt1: Option[T],
       opt2: Option[T],
-      assertEquals: (T, T) => Unit) {
+      assertEquals: (T, T) => Unit): Unit = {
     if (opt1.isDefined) {
       assert(opt2.isDefined)
       assertEquals(opt1.get, opt2.get)
@@ -825,11 +828,12 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assertSeqEquals(blocks1, blocks2, assertBlockEquals)
   }
 
-  private def assertBlockEquals(b1: (BlockId, BlockStatus), b2: (BlockId, BlockStatus)) {
+  private def assertBlockEquals(b1: (BlockId, BlockStatus), b2: (BlockId, BlockStatus)): Unit = {
     assert(b1 === b2)
   }
 
-  private def assertStackTraceElementEquals(ste1: StackTraceElement, ste2: StackTraceElement) {
+  private def assertStackTraceElementEquals(ste1: StackTraceElement,
+      ste2: StackTraceElement): Unit = {
     // This mimics the equals() method from Java 8 and earlier. Java 9 adds checks for
     // class loader and module, which will cause them to be not equal, when we don't
     // care about those
@@ -871,7 +875,8 @@ private[spark] object JsonProtocolSuite extends Assertions {
 
   private def makeStageInfo(a: Int, b: Int, c: Int, d: Long, e: Long) = {
     val rddInfos = (0 until a % 5).map { i => makeRddInfo(a + i, b + i, c + i, d + i, e + i) }
-    val stageInfo = new StageInfo(a, 0, "greetings", b, rddInfos, Seq(100, 200, 300), "details")
+    val stageInfo = new StageInfo(a, 0, "greetings", b, rddInfos, Seq(100, 200, 300), "details",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     val (acc1, acc2) = (makeAccumulableInfo(1), makeAccumulableInfo(2))
     stageInfo.accumulables(acc1.id) = acc1
     stageInfo.accumulables(acc2.id) = acc2
@@ -936,6 +941,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     t.setExecutorDeserializeCpuTime(a)
     t.setExecutorRunTime(b)
     t.setExecutorCpuTime(b)
+    t.setPeakExecutionMemory(c)
     t.setResultSize(c)
     t.setJvmGCTime(d)
     t.setResultSerializationTime(a + b)
@@ -1241,6 +1247,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Executor Deserialize CPU Time": 300,
       |    "Executor Run Time": 400,
       |    "Executor CPU Time": 400,
+      |    "Peak Execution Memory": 500,
       |    "Result Size": 500,
       |    "JVM GC Time": 600,
       |    "Result Serialization Time": 700,
@@ -1364,6 +1371,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Executor Deserialize CPU Time": 300,
       |    "Executor Run Time": 400,
       |    "Executor CPU Time": 400,
+      |    "Peak Execution Memory": 500,
       |    "Result Size": 500,
       |    "JVM GC Time": 600,
       |    "Result Serialization Time": 700,
@@ -1487,6 +1495,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Executor Deserialize CPU Time": 300,
       |    "Executor Run Time": 400,
       |    "Executor CPU Time": 400,
+      |    "Peak Execution Memory": 500,
       |    "Result Size": 500,
       |    "JVM GC Time": 600,
       |    "Result Serialization Time": 700,
@@ -2050,7 +2059,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |        {
       |          "ID": 9,
       |          "Name": "$PEAK_EXECUTION_MEMORY",
-      |          "Update": 0,
+      |          "Update": 500,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
diff --git a/core/src/test/scala/org/apache/spark/util/KeyLockSuite.scala b/core/src/test/scala/org/apache/spark/util/KeyLockSuite.scala
index 2169a0e4d442f..6888e492a8d33 100644
--- a/core/src/test/scala/org/apache/spark/util/KeyLockSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/KeyLockSuite.scala
@@ -49,7 +49,7 @@ class KeyLockSuite extends SparkFunSuite with TimeLimits {
     @volatile var e: Throwable = null
     val threads = (0 until numThreads).map { i =>
       new Thread() {
-        override def run(): Unit = try {
+        override def run(): Unit = {
           latch.await(foreverMs, TimeUnit.MILLISECONDS)
           keyLock.withLock(keys(i)) {
             var cur = numThreadsHoldingLock.get()
diff --git a/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
index 4b7164d8acbce..1efd399b5db68 100644
--- a/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
@@ -81,7 +81,7 @@ class NextIteratorSuite extends SparkFunSuite with Matchers {
       }
     }
 
-    override def close() {
+    override def close(): Unit = {
       closeCalled += 1
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala b/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
index 06c2ceb68bd79..f14ec175232be 100644
--- a/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/PeriodicRDDCheckpointerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.util
 
 import org.apache.hadoop.fs.Path
+import org.scalatest.Assertions._
 
 import org.apache.spark.{SharedSparkContext, SparkContext, SparkFunSuite}
 import org.apache.spark.rdd.RDD
diff --git a/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala b/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala
new file mode 100644
index 0000000000000..baacc7527a806
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import java.util.Properties
+
+import scala.util.Random
+
+import org.apache.commons.lang3.SerializationUtils
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+
+
+/**
+ * Benchmark for Kryo Unsafe vs safe Serialization.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar>
+ *   2. build/sbt "core/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      Results will be written to "benchmarks/PropertiesCloneBenchmark-results.txt".
+ * }}}
+ */
+object PropertiesCloneBenchmark extends BenchmarkBase {
+  /**
+   * Benchmark various cases of cloning properties objects
+   */
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Properties Cloning") {
+      def compareSerialization(name: String, props: Properties): Unit = {
+        val benchmark = new Benchmark(name, 1, output = output)
+        benchmark.addCase("SerializationUtils.clone") { _ =>
+          SerializationUtils.clone(props)
+        }
+        benchmark.addCase("Utils.cloneProperties") { _ =>
+          Utils.cloneProperties(props)
+        }
+        benchmark.run()
+      }
+      compareSerialization("Empty Properties", new Properties)
+      compareSerialization("System Properties", System.getProperties)
+      compareSerialization("Small Properties", makeRandomProps(10, 40, 100))
+      compareSerialization("Medium Properties", makeRandomProps(50, 40, 100))
+      compareSerialization("Large Properties", makeRandomProps(100, 40, 100))
+    }
+  }
+
+  def makeRandomProps(numProperties: Int, keySize: Int, valueSize: Int): Properties = {
+    val props = new Properties
+    for (_ <- 1 to numProperties) {
+      props.put(
+        Random.alphanumeric.take(keySize),
+        Random.alphanumeric.take(valueSize)
+      )
+    }
+    props
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
index 75e4504850679..0b1796540abbb 100644
--- a/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
+++ b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
@@ -19,7 +19,6 @@ package org.apache.spark.util
 
 import java.util.Properties
 
-import org.apache.commons.lang3.SerializationUtils
 import org.scalatest.{BeforeAndAfterEach, Suite}
 
 /**
@@ -43,11 +42,11 @@ private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Su
   var oldProperties: Properties = null
 
   override def beforeEach(): Unit = {
-    // we need SerializationUtils.clone instead of `new Properties(System.getProperties())` because
+    // we need Utils.cloneProperties instead of `new Properties(System.getProperties())` because
     // the later way of creating a copy does not copy the properties but it initializes a new
     // Properties object with the given properties as defaults. They are not recognized at all
     // by standard Scala wrapper over Java Properties then.
-    oldProperties = SerializationUtils.clone(System.getProperties)
+    oldProperties = Utils.cloneProperties(System.getProperties)
     super.beforeEach()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index 8bc62db81e4f9..d4f2053e0b2f4 100644
--- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -73,7 +73,7 @@ class SizeEstimatorSuite
   with PrivateMethodTester
   with ResetSystemProperties {
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
     super.beforeEach()
     System.setProperty("os.arch", "amd64")
@@ -180,7 +180,7 @@ class SizeEstimatorSuite
   test("32-bit arch") {
     System.setProperty("os.arch", "x86")
 
-    val initialize = PrivateMethod[Unit]('initialize)
+    val initialize = PrivateMethod[Unit](Symbol("initialize"))
     SizeEstimator invokePrivate initialize()
 
     assertResult(40)(SizeEstimator.estimate(DummyString("")))
@@ -194,7 +194,7 @@ class SizeEstimatorSuite
   test("64-bit arch with no compressed oops") {
     System.setProperty("os.arch", "amd64")
     System.setProperty(TEST_USE_COMPRESSED_OOPS_KEY, "false")
-    val initialize = PrivateMethod[Unit]('initialize)
+    val initialize = PrivateMethod[Unit](Symbol("initialize"))
     SizeEstimator invokePrivate initialize()
 
     assertResult(56)(SizeEstimator.estimate(DummyString("")))
@@ -220,7 +220,7 @@ class SizeEstimatorSuite
 
   test("check 64-bit detection for s390x arch") {
     System.setProperty("os.arch", "s390x")
-    val initialize = PrivateMethod[Unit]('initialize)
+    val initialize = PrivateMethod[Unit](Symbol("initialize"))
     SizeEstimator invokePrivate initialize()
     // Class should be 32 bytes on s390x if recognised as 64 bit platform
     assertResult(32)(SizeEstimator.estimate(new DummyClass7))
diff --git a/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
new file mode 100644
index 0000000000000..90741a6bde7f0
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.File
+
+import scala.util.Try
+
+import org.apache.spark.SparkFunSuite
+
+class SparkUncaughtExceptionHandlerSuite extends SparkFunSuite {
+
+  private val sparkHome =
+    sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
+
+  Seq(
+    (ThrowableTypes.RuntimeException, true, SparkExitCode.UNCAUGHT_EXCEPTION),
+    (ThrowableTypes.RuntimeException, false, 0),
+    (ThrowableTypes.OutOfMemoryError, true, SparkExitCode.OOM),
+    (ThrowableTypes.OutOfMemoryError, false, SparkExitCode.OOM),
+    (ThrowableTypes.SparkFatalRuntimeException, true, SparkExitCode.UNCAUGHT_EXCEPTION),
+    (ThrowableTypes.SparkFatalRuntimeException, false, 0),
+    (ThrowableTypes.SparkFatalOutOfMemoryError, true, SparkExitCode.OOM),
+    (ThrowableTypes.SparkFatalOutOfMemoryError, false, SparkExitCode.OOM)
+  ).foreach {
+    case (throwable: ThrowableTypes.ThrowableTypesVal,
+    exitOnUncaughtException: Boolean, expectedExitCode) =>
+      test(s"SPARK-30310: Test uncaught $throwable, " +
+          s"exitOnUncaughtException = $exitOnUncaughtException") {
+
+        // creates a ThrowableThrower process via spark-class and verify the exit code
+        val process = Utils.executeCommand(
+          Seq(s"$sparkHome/bin/spark-class",
+            ThrowableThrower.getClass.getCanonicalName.dropRight(1), // drops the "$" at the end
+            throwable.name,
+            exitOnUncaughtException.toString),
+          new File(sparkHome),
+          Map("SPARK_TESTING" -> "1", "SPARK_HOME" -> sparkHome)
+        )
+        assert(process.waitFor == expectedExitCode)
+      }
+  }
+}
+
+// enumeration object for the Throwable types that SparkUncaughtExceptionHandler handles
+object ThrowableTypes extends Enumeration {
+
+  sealed case class ThrowableTypesVal(name: String, t: Throwable) extends Val(name)
+
+  val RuntimeException = ThrowableTypesVal("RuntimeException", new RuntimeException)
+  val OutOfMemoryError = ThrowableTypesVal("OutOfMemoryError", new OutOfMemoryError)
+  val SparkFatalRuntimeException = ThrowableTypesVal("SparkFatalException(RuntimeException)",
+    new SparkFatalException(new RuntimeException))
+  val SparkFatalOutOfMemoryError = ThrowableTypesVal("SparkFatalException(OutOfMemoryError)",
+    new SparkFatalException(new OutOfMemoryError))
+
+  // returns the actual Throwable by its name
+  def getThrowableByName(name: String): Throwable = {
+    super.withName(name).asInstanceOf[ThrowableTypesVal].t
+  }
+}
+
+// Invoked by spark-class for throwing a Throwable
+object ThrowableThrower {
+
+  // a thread that uses SparkUncaughtExceptionHandler and throws a Throwable by name
+  class ThrowerThread(name: String, exitOnUncaughtException: Boolean) extends Thread {
+    override def run() {
+      Thread.setDefaultUncaughtExceptionHandler(
+        new SparkUncaughtExceptionHandler(exitOnUncaughtException))
+      throw ThrowableTypes.getThrowableByName(name)
+    }
+  }
+
+  // main() requires 2 args:
+  // - args(0): name of the Throwable defined in ThrowableTypes
+  // - args(1): exitOnUncaughtException (true/false)
+  //
+  // it exits with the exit code dictated by either:
+  // - SparkUncaughtExceptionHandler (SparkExitCode)
+  // - main() (0, or -1 when number of args is wrong)
+  def main(args: Array[String]): Unit = {
+    if (args.length == 2) {
+      val t = new ThrowerThread(args(0),
+        Try(args(1).toBoolean).getOrElse(false))
+      t.start()
+      t.join()
+      System.exit(0)
+    } else {
+      System.exit(-1)
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
index aa3f062e582c3..ac36e537c75bb 100644
--- a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
@@ -132,7 +132,7 @@ class ThreadUtilsSuite extends SparkFunSuite {
     val t = new Thread() {
       setDaemon(true)
 
-      override def run() {
+      override def run(): Unit = {
         try {
           // "par" is uninterruptible. The following will keep running even if the thread is
           // interrupted. We should prefer to use "ThreadUtils.parmap".
diff --git a/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
index 77a92e7e1eb43..1644540946839 100644
--- a/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
@@ -63,7 +63,7 @@ class TimeStampedHashMapSuite extends SparkFunSuite {
   }
 
   /** Test basic operations of a Scala mutable Map. */
-  def testMap(hashMapConstructor: => mutable.Map[String, String]) {
+  def testMap(hashMapConstructor: => mutable.Map[String, String]): Unit = {
     def newMap() = hashMapConstructor
     val testMap1 = newMap()
     val testMap2 = newMap()
@@ -134,7 +134,7 @@ class TimeStampedHashMapSuite extends SparkFunSuite {
   }
 
   /** Test thread safety of a Scala mutable map. */
-  def testMapThreadSafety(hashMapConstructor: => mutable.Map[String, String]) {
+  def testMapThreadSafety(hashMapConstructor: => mutable.Map[String, String]): Unit = {
     def newMap() = hashMapConstructor
     val name = newMap().getClass.getSimpleName
     val testMap = newMap()
@@ -150,7 +150,7 @@ class TimeStampedHashMapSuite extends SparkFunSuite {
     }
 
     val threads = (1 to 25).map(i => new Thread() {
-      override def run() {
+      override def run(): Unit = {
         try {
           for (j <- 1 to 1000) {
             Random.nextInt(3) match {
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 7e4a40b60aac5..8f8902e497d49 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -849,36 +849,6 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     assert(buffer.toString === "st circular test circular")
   }
 
-  test("nanSafeCompareDoubles") {
-    def shouldMatchDefaultOrder(a: Double, b: Double): Unit = {
-      assert(Utils.nanSafeCompareDoubles(a, b) === JDouble.compare(a, b))
-      assert(Utils.nanSafeCompareDoubles(b, a) === JDouble.compare(b, a))
-    }
-    shouldMatchDefaultOrder(0d, 0d)
-    shouldMatchDefaultOrder(0d, 1d)
-    shouldMatchDefaultOrder(Double.MinValue, Double.MaxValue)
-    assert(Utils.nanSafeCompareDoubles(Double.NaN, Double.NaN) === 0)
-    assert(Utils.nanSafeCompareDoubles(Double.NaN, Double.PositiveInfinity) === 1)
-    assert(Utils.nanSafeCompareDoubles(Double.NaN, Double.NegativeInfinity) === 1)
-    assert(Utils.nanSafeCompareDoubles(Double.PositiveInfinity, Double.NaN) === -1)
-    assert(Utils.nanSafeCompareDoubles(Double.NegativeInfinity, Double.NaN) === -1)
-  }
-
-  test("nanSafeCompareFloats") {
-    def shouldMatchDefaultOrder(a: Float, b: Float): Unit = {
-      assert(Utils.nanSafeCompareFloats(a, b) === JFloat.compare(a, b))
-      assert(Utils.nanSafeCompareFloats(b, a) === JFloat.compare(b, a))
-    }
-    shouldMatchDefaultOrder(0f, 0f)
-    shouldMatchDefaultOrder(1f, 1f)
-    shouldMatchDefaultOrder(Float.MinValue, Float.MaxValue)
-    assert(Utils.nanSafeCompareFloats(Float.NaN, Float.NaN) === 0)
-    assert(Utils.nanSafeCompareFloats(Float.NaN, Float.PositiveInfinity) === 1)
-    assert(Utils.nanSafeCompareFloats(Float.NaN, Float.NegativeInfinity) === 1)
-    assert(Utils.nanSafeCompareFloats(Float.PositiveInfinity, Float.NaN) === -1)
-    assert(Utils.nanSafeCompareFloats(Float.NegativeInfinity, Float.NaN) === -1)
-  }
-
   test("isDynamicAllocationEnabled") {
     val conf = new SparkConf()
     conf.set("spark.master", "yarn")
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 2b5993a352cb0..0b4e1494bf300 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -436,7 +436,7 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
     val it = map.iterator
     assert(it.isInstanceOf[CompletionIterator[_, _]])
     // org.apache.spark.util.collection.AppendOnlyMap.destructiveSortedIterator returns
-    // an instance of an annonymous Iterator class.
+    // an instance of an anonymous Iterator class.
 
     val underlyingMapRef = WeakReference(map.currentMap)
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index 2bad56d7ff424..a6de64b6c68a0 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -294,7 +294,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
    |  Helper methods that contain the test body  |
    * =========================================== */
 
-  private def emptyDataStream(conf: SparkConf) {
+  private def emptyDataStream(conf: SparkConf): Unit = {
     conf.set(SHUFFLE_MANAGER, "sort")
     sc = new SparkContext("local", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
@@ -327,7 +327,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     sorter4.stop()
   }
 
-  private def fewElementsPerPartition(conf: SparkConf) {
+  private def fewElementsPerPartition(conf: SparkConf): Unit = {
     conf.set(SHUFFLE_MANAGER, "sort")
     sc = new SparkContext("local", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
@@ -368,7 +368,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     sorter4.stop()
   }
 
-  private def emptyPartitionsWithSpilling(conf: SparkConf) {
+  private def emptyPartitionsWithSpilling(conf: SparkConf): Unit = {
     val size = 1000
     conf.set(SHUFFLE_MANAGER, "sort")
     conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
@@ -393,7 +393,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     sorter.stop()
   }
 
-  private def testSpillingInLocalCluster(conf: SparkConf, numReduceTasks: Int) {
+  private def testSpillingInLocalCluster(conf: SparkConf, numReduceTasks: Int): Unit = {
     val size = 5000
     conf.set(SHUFFLE_MANAGER, "sort")
     conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 4)
@@ -517,7 +517,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
       conf: SparkConf,
       withPartialAgg: Boolean,
       withOrdering: Boolean,
-      withSpilling: Boolean) {
+      withSpilling: Boolean): Unit = {
     val size = 1000
     if (withSpilling) {
       conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
@@ -551,7 +551,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     assert(results === expected)
   }
 
-  private def sortWithoutBreakingSortingContracts(conf: SparkConf) {
+  private def sortWithoutBreakingSortingContracts(conf: SparkConf): Unit = {
     val size = 100000
     val conf = createSparkConf(loadDefaults = true, kryo = false)
     conf.set(SHUFFLE_MANAGER, "sort")
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
index 4759a830da4ca..8aa4be6c2ff8d 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
@@ -71,7 +71,7 @@ class SizeTrackerSuite extends SparkFunSuite {
     testMap[String, Int](10000, i => (randString(0, 10000), i))
   }
 
-  def testVector[T: ClassTag](numElements: Int, makeElement: Int => T) {
+  def testVector[T: ClassTag](numElements: Int, makeElement: Int => T): Unit = {
     val vector = new SizeTrackingVector[T]
     for (i <- 0 until numElements) {
       val item = makeElement(i)
@@ -80,7 +80,7 @@ class SizeTrackerSuite extends SparkFunSuite {
     }
   }
 
-  def testMap[K, V](numElements: Int, makeElement: (Int) => (K, V)) {
+  def testMap[K, V](numElements: Int, makeElement: (Int) => (K, V)): Unit = {
     val map = new SizeTrackingAppendOnlyMap[K, V]
     for (i <- 0 until numElements) {
       val (k, v) = makeElement(i)
@@ -89,7 +89,7 @@ class SizeTrackerSuite extends SparkFunSuite {
     }
   }
 
-  def expectWithinError(obj: AnyRef, estimatedSize: Long, error: Double) {
+  def expectWithinError(obj: AnyRef, estimatedSize: Long, error: Double): Unit = {
     val betterEstimatedSize = SizeEstimator.estimate(obj)
     assert(betterEstimatedSize * (1 - error) < estimatedSize,
       s"Estimated size $estimatedSize was less than expected size $betterEstimatedSize")
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
index e80bd96c982df..bb03f0d3cdc20 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -59,7 +59,7 @@ class SorterSuite extends SparkFunSuite with Logging {
 
     Arrays.sort(keys)
     new Sorter(new KVArraySortDataFormat[Double, Number])
-      .sort(keyValueArray, 0, keys.length, Ordering.Double)
+      .sort(keyValueArray, 0, keys.length, (x, y) => java.lang.Double.compare(x, y))
 
     keys.zipWithIndex.foreach { case (k, i) =>
       assert(k === keyValueArray(2 * i))
@@ -311,12 +311,13 @@ abstract class AbstractIntArraySortDataFormat[K] extends SortDataFormat[K, Array
     data(pos1) = tmp
   }
 
-  override def copyElement(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int) {
+  override def copyElement(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int): Unit = {
     dst(dstPos) = src(srcPos)
   }
 
   /** Copy a range of elements starting at src(srcPos) to dest, starting at destPos. */
-  override def copyRange(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int, length: Int) {
+  override def copyRange(src: Array[Int], srcPos: Int,
+      dst: Array[Int], dstPos: Int, length: Int): Unit = {
     System.arraycopy(src, srcPos, dst, dstPos, length)
   }
 
@@ -334,13 +335,13 @@ abstract class AbstractByteArraySortDataFormat[K] extends SortDataFormat[K, Arra
     data(pos1) = tmp
   }
 
-  override def copyElement(src: Array[Byte], srcPos: Int, dst: Array[Byte], dstPos: Int) {
+  override def copyElement(src: Array[Byte], srcPos: Int, dst: Array[Byte], dstPos: Int): Unit = {
     dst(dstPos) = src(srcPos)
   }
 
   /** Copy a range of elements starting at src(srcPos) to dest, starting at destPos. */
   override def copyRange(src: Array[Byte],
-                         srcPos: Int, dst: Array[Byte], dstPos: Int, length: Int) {
+                         srcPos: Int, dst: Array[Byte], dstPos: Int, length: Int): Unit = {
     System.arraycopy(src, srcPos, dst, dstPos, length)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index 38cb37c524594..a55004f664a54 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -20,12 +20,12 @@ package org.apache.spark.util.collection.unsafe.sort
 import java.nio.charset.StandardCharsets
 
 import com.google.common.primitives.UnsignedBytes
-import org.scalatest.prop.PropertyChecks
+import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.unsafe.types.UTF8String
 
-class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
+class PrefixComparatorsSuite extends SparkFunSuite with ScalaCheckPropertyChecks {
 
   test("String prefix comparator") {
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
index a3c006b43d8e4..9ae6a8ef879f3 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
@@ -108,7 +108,8 @@ class RadixSortSuite extends SparkFunSuite with Logging {
     }
   }
 
-  private def referenceKeyPrefixSort(buf: LongArray, lo: Long, hi: Long, refCmp: PrefixComparator) {
+  private def referenceKeyPrefixSort(buf: LongArray, lo: Long, hi: Long,
+      refCmp: PrefixComparator): Unit = {
     val sortBuffer = new LongArray(MemoryBlock.fromLongArray(new Array[Long](buf.size().toInt)))
     new Sorter(new UnsafeSortDataFormat(sortBuffer)).sort(
       buf, Ints.checkedCast(lo), Ints.checkedCast(hi),
diff --git a/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala b/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala
index 973f71cdeb755..bd7ec242a9317 100644
--- a/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.util.logging
 
-import java.io.{BufferedInputStream, File, FileInputStream}
+import java.io.File
 
 import org.apache.commons.io.FileUtils
 
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index e12dc994b0842..73f461255de43 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -118,3 +118,4 @@ announce.tmpl
 vote.tmpl
 SessionManager.java
 SessionHandler.java
+GangliaReporter.java
diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index d33a107cc86a5..1658cca6050bc 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -81,7 +81,7 @@ if (!(Test-Path $tools)) {
 # ========================== Maven
 Push-Location $tools
 
-$mavenVer = "3.6.2"
+$mavenVer = "3.6.3"
 Start-FileDownload "https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip" "maven.zip"
 
 # extract
@@ -90,7 +90,7 @@ Invoke-Expression "7z.exe x maven.zip"
 # add maven to environment variables
 $env:PATH = "$tools\apache-maven-$mavenVer\bin;" + $env:PATH
 $env:M2_HOME = "$tools\apache-maven-$mavenVer"
-$env:MAVEN_OPTS = "-Xmx2g -XX:ReservedCodeCacheSize=512m"
+$env:MAVEN_OPTS = "-Xmx2g -XX:ReservedCodeCacheSize=1g"
 
 Pop-Location
 
@@ -115,7 +115,7 @@ $env:Path += ";$env:HADOOP_HOME\bin"
 Pop-Location
 
 # ========================== R
-$rVer = "3.6.1"
+$rVer = "3.6.2"
 $rToolsVer = "3.5.1"
 
 InstallR
diff --git a/dev/change-scala-version.sh b/dev/change-scala-version.sh
index 4054d530d065e..06411b9b12a0d 100755
--- a/dev/change-scala-version.sh
+++ b/dev/change-scala-version.sh
@@ -19,7 +19,7 @@
 
 set -e
 
-VALID_VERSIONS=( 2.12 )
+VALID_VERSIONS=( 2.12 2.13 )
 
 usage() {
   echo "Usage: $(basename $0) [-h|--help] <version>
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
index 945686de49967..804a178a5fe28 100644
--- a/dev/checkstyle-suppressions.xml
+++ b/dev/checkstyle-suppressions.xml
@@ -30,6 +30,8 @@
 <suppressions>
     <suppress checks=".*"
               files="core/src/main/java/org/apache/spark/util/collection/TimSort.java"/>
+    <suppress checks=".*"
+              files="external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java"/>
     <suppress checks=".*"
               files="sql/core/src/main/java/org/apache/spark/sql/api.java/*"/>
     <suppress checks="LineLength"
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
index 00700a7791b31..483fc7c01bd8f 100644
--- a/dev/checkstyle.xml
+++ b/dev/checkstyle.xml
@@ -24,7 +24,7 @@
     Checkstyle configuration based on the Google coding conventions from:
 
     -  Google Java Style
-       https://google-styleguide.googlecode.com/svn-history/r130/trunk/javaguide.html
+       https://google.github.io/styleguide/javaguide.html
 
     with Spark-specific changes from:
 
@@ -64,6 +64,11 @@
         <property name="message" value="No trailing whitespace allowed."/>
     </module>
 
+    <module name="LineLength">
+        <property name="max" value="100"/>
+        <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+    </module>
+
     <module name="NewlineAtEndOfFile"/>
 
     <module name="TreeWalker">
@@ -91,10 +96,6 @@
             <property name="allowByTailComment" value="true"/>
             <property name="allowNonPrintableEscapes" value="true"/>
         </module>
-        <module name="LineLength">
-            <property name="max" value="100"/>
-            <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
-        </module>
         <module name="NoLineWrap"/>
         <module name="EmptyBlock">
             <property name="option" value="TEXT"/>
diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh
index c1a122ebfb12e..694a87bf78084 100755
--- a/dev/create-release/do-release-docker.sh
+++ b/dev/create-release/do-release-docker.sh
@@ -127,6 +127,7 @@ GPG_KEY=$GPG_KEY
 ASF_PASSWORD=$ASF_PASSWORD
 GPG_PASSPHRASE=$GPG_PASSPHRASE
 RELEASE_STEP=$RELEASE_STEP
+USER=$USER
 EOF
 
 JAVA_VOL=
@@ -135,9 +136,6 @@ if [ -n "$JAVA" ]; then
   JAVA_VOL="--volume $JAVA:/opt/spark-java"
 fi
 
-# SPARK-24530: Sphinx must work with python 3 to generate doc correctly.
-echo "SPHINXPYTHON=/opt/p35/bin/python" >> $ENVFILE
-
 echo "Building $RELEASE_TAG; output will be at $WORKDIR/output"
 docker run -ti \
   --env-file "$ENVFILE" \
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index f35bc4f48652b..022d3af95c05d 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -138,7 +138,8 @@ fi
 # Hive-specific profiles for some builds
 HIVE_PROFILES="-Phive -Phive-thriftserver"
 # Profiles for publishing snapshots and release to Maven Central
-PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl"
+# We use Apache Hive 2.3 for publishing
+PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Phive-2.3 -Pspark-ganglia-lgpl -Pkinesis-asl"
 # Profiles for building binary releases
 BASE_RELEASE_PROFILES="$BASE_PROFILES -Psparkr"
 
@@ -164,7 +165,6 @@ DEST_DIR_NAME="$SPARK_PACKAGE_VERSION"
 
 git clean -d -f -x
 rm .gitignore
-rm -rf .git
 cd ..
 
 if [[ "$1" == "package" ]]; then
@@ -179,7 +179,7 @@ if [[ "$1" == "package" ]]; then
     rm -r spark-$SPARK_VERSION/licenses-binary
   fi
 
-  tar cvzf spark-$SPARK_VERSION.tgz spark-$SPARK_VERSION
+  tar cvzf spark-$SPARK_VERSION.tgz --exclude spark-$SPARK_VERSION/.git spark-$SPARK_VERSION
   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour --output spark-$SPARK_VERSION.tgz.asc \
     --detach-sig spark-$SPARK_VERSION.tgz
   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
@@ -220,7 +220,7 @@ if [[ "$1" == "package" ]]; then
 
     # Write out the VERSION to PySpark version info we rewrite the - into a . and SNAPSHOT
     # to dev0 to be closer to PEP440.
-    PYSPARK_VERSION=`echo "$SPARK_VERSION" |  sed -r "s/-/./" | sed -r "s/SNAPSHOT/dev0/"`
+    PYSPARK_VERSION=`echo "$SPARK_VERSION" |  sed -e "s/-/./" -e "s/SNAPSHOT/dev0/" -e "s/preview/dev/"`
     echo "__version__='$PYSPARK_VERSION'" > python/pyspark/version.py
 
     # Get maven home set by MVN
@@ -281,6 +281,9 @@ if [[ "$1" == "package" ]]; then
     BINARY_PKGS_ARGS["without-hadoop"]="-Phadoop-provided"
     if [[ $SPARK_VERSION < "3.0." ]]; then
       BINARY_PKGS_ARGS["hadoop2.6"]="-Phadoop-2.6 $HIVE_PROFILES"
+    else
+      BINARY_PKGS_ARGS["hadoop2.7-hive1.2"]="-Phadoop-2.7 -Phive-1.2 $HIVE_PROFILES"
+      BINARY_PKGS_ARGS["hadoop3.2"]="-Phadoop-3.2 $HIVE_PROFILES"
     fi
   fi
 
@@ -413,13 +416,13 @@ if [[ "$1" == "publish-release" ]]; then
 
   # TODO: revisit for Scala 2.13 support
 
-  if ! is_dry_run && [[ $PUBLISH_SCALA_2_11 = 1 ]]; then
+  if [[ $PUBLISH_SCALA_2_11 = 1 ]]; then
     ./dev/change-scala-version.sh 2.11
     $MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -DskipTests \
       $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install
   fi
 
-  if ! is_dry_run && [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
+  if [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
     ./dev/change-scala-version.sh 2.12
     $MVN -DzincPort=$((ZINC_PORT + 2)) -Dmaven.repo.local=$tmp_repo -DskipTests \
       $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install
diff --git a/dev/create-release/release-tag.sh b/dev/create-release/release-tag.sh
index 8024440759eb5..39856a9955955 100755
--- a/dev/create-release/release-tag.sh
+++ b/dev/create-release/release-tag.sh
@@ -73,8 +73,12 @@ git config user.email $GIT_EMAIL
 
 # Create release version
 $MVN versions:set -DnewVersion=$RELEASE_VERSION | grep -v "no value" # silence logs
-# Set the release version in R/pkg/DESCRIPTION
-sed -i".tmp1" 's/Version.*$/Version: '"$RELEASE_VERSION"'/g' R/pkg/DESCRIPTION
+if [[ $RELEASE_VERSION != *"preview"* ]]; then
+  # Set the release version in R/pkg/DESCRIPTION
+  sed -i".tmp1" 's/Version.*$/Version: '"$RELEASE_VERSION"'/g' R/pkg/DESCRIPTION
+else
+  sed -i".tmp1" 's/-SNAPSHOT/'"-$(cut -d "-" -f 2 <<< $RELEASE_VERSION)"'/g' R/pkg/R/sparkR.R
+fi
 # Set the release version in docs
 sed -i".tmp1" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$RELEASE_VERSION"'/g' docs/_config.yml
 sed -i".tmp2" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$RELEASE_VERSION"'/g' docs/_config.yml
@@ -104,7 +108,11 @@ git commit -a -m "Preparing development version $NEXT_VERSION"
 if ! is_dry_run; then
   # Push changes
   git push origin $RELEASE_TAG
-  git push origin HEAD:$GIT_BRANCH
+  if [[ $RELEASE_VERSION != *"preview"* ]]; then
+    git push origin HEAD:$GIT_BRANCH
+  else
+    echo "It's preview release. We only push $RELEASE_TAG to remote."
+  fi
 
   cd ..
   rm -rf spark
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index 4bfecedbf0406..63451687ee8c2 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -20,7 +20,7 @@
 # Includes:
 # * Java 8
 # * Ivy
-# * Python/PyPandoc (2.7.15/3.6.7)
+# * Python (2.7.15/3.6.7)
 # * R-base/R-base-dev (3.6.1)
 # * Ruby 2.3 build utilities
 
@@ -33,8 +33,8 @@ ENV DEBCONF_NONINTERACTIVE_SEEN true
 # These arguments are just for reuse and not really meant to be customized.
 ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 
-ARG BASE_PIP_PKGS="setuptools wheel virtualenv"
-ARG PIP_PKGS="pyopenssl pypandoc numpy pygments sphinx"
+ARG BASE_PIP_PKGS="setuptools wheel"
+ARG PIP_PKGS="pyopenssl numpy sphinx"
 
 # Install extra needed repos and refresh.
 # - CRAN repo
@@ -62,14 +62,13 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \
   curl -sL https://deb.nodesource.com/setup_11.x | bash && \
   $APT_INSTALL nodejs && \
   # Install needed python packages. Use pip for installing packages (for consistency).
-  $APT_INSTALL libpython2.7-dev libpython3-dev python-pip python3-pip && \
-  pip install $BASE_PIP_PKGS && \
-  pip install $PIP_PKGS && \
-  cd && \
-  virtualenv -p python3 /opt/p35 && \
-  . /opt/p35/bin/activate && \
-  pip install $BASE_PIP_PKGS && \
-  pip install $PIP_PKGS && \
+  $APT_INSTALL libpython3-dev python3-pip && \
+  # Change default python version to python3.
+  update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \
+  update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \
+  update-alternatives --set python /usr/bin/python3.6 && \
+  pip3 install $BASE_PIP_PKGS && \
+  pip3 install $PIP_PKGS && \
   # Install R packages and dependencies used when building.
   # R depends on pandoc*, libssl (which are installed above).
   $APT_INSTALL r-base r-base-dev && \
@@ -79,8 +78,8 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \
   # Install tools needed to build the documentation.
   $APT_INSTALL ruby2.3 ruby2.3-dev mkdocs && \
   gem install jekyll --no-rdoc --no-ri -v 3.8.6 && \
-  gem install jekyll-redirect-from && \
-  gem install pygments.rb
+  gem install jekyll-redirect-from -v 0.15.0 && \
+  gem install rouge
 
 WORKDIR /opt/spark-rm/output
 
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
deleted file mode 100644
index 775fb3c0a22e8..0000000000000
--- a/dev/deps/spark-deps-hadoop-2.7
+++ /dev/null
@@ -1,202 +0,0 @@
-JavaEWAH-0.3.2.jar
-RoaringBitmap-0.7.45.jar
-ST4-4.0.4.jar
-activation-1.1.1.jar
-aircompressor-0.10.jar
-antlr-2.7.7.jar
-antlr-runtime-3.4.jar
-antlr4-runtime-4.7.1.jar
-aopalliance-1.0.jar
-aopalliance-repackaged-2.5.0.jar
-apache-log4j-extras-1.2.17.jar
-apacheds-i18n-2.0.0-M15.jar
-apacheds-kerberos-codec-2.0.0-M15.jar
-api-asn1-api-1.0.0-M20.jar
-api-util-1.0.0-M20.jar
-arpack_combined_all-0.1.jar
-arrow-format-0.12.0.jar
-arrow-memory-0.12.0.jar
-arrow-vector-0.12.0.jar
-automaton-1.11-8.jar
-avro-1.8.2.jar
-avro-ipc-1.8.2.jar
-avro-mapred-1.8.2-hadoop2.jar
-bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.12-0.13.2.jar
-breeze_2.12-0.13.2.jar
-chill-java-0.9.3.jar
-chill_2.12-0.9.3.jar
-commons-beanutils-1.9.3.jar
-commons-cli-1.2.jar
-commons-codec-1.10.jar
-commons-collections-3.2.2.jar
-commons-compiler-3.0.15.jar
-commons-compress-1.8.1.jar
-commons-configuration-1.6.jar
-commons-crypto-1.0.0.jar
-commons-dbcp-1.4.jar
-commons-digester-1.8.jar
-commons-httpclient-3.1.jar
-commons-io-2.4.jar
-commons-lang-2.6.jar
-commons-lang3-3.8.1.jar
-commons-logging-1.1.3.jar
-commons-math3-3.4.1.jar
-commons-net-3.1.jar
-commons-pool-1.5.4.jar
-commons-text-1.6.jar
-compress-lzf-1.0.3.jar
-core-1.1.2.jar
-curator-client-2.7.1.jar
-curator-framework-2.7.1.jar
-curator-recipes-2.7.1.jar
-datanucleus-api-jdo-3.2.6.jar
-datanucleus-core-3.2.10.jar
-datanucleus-rdbms-3.2.9.jar
-derby-10.12.1.1.jar
-flatbuffers-java-1.9.0.jar
-generex-1.0.2.jar
-gson-2.2.4.jar
-guava-14.0.1.jar
-guice-3.0.jar
-guice-servlet-3.0.jar
-hadoop-annotations-2.7.4.jar
-hadoop-auth-2.7.4.jar
-hadoop-client-2.7.4.jar
-hadoop-common-2.7.4.jar
-hadoop-hdfs-2.7.4.jar
-hadoop-mapreduce-client-app-2.7.4.jar
-hadoop-mapreduce-client-common-2.7.4.jar
-hadoop-mapreduce-client-core-2.7.4.jar
-hadoop-mapreduce-client-jobclient-2.7.4.jar
-hadoop-mapreduce-client-shuffle-2.7.4.jar
-hadoop-yarn-api-2.7.4.jar
-hadoop-yarn-client-2.7.4.jar
-hadoop-yarn-common-2.7.4.jar
-hadoop-yarn-server-common-2.7.4.jar
-hadoop-yarn-server-web-proxy-2.7.4.jar
-hk2-api-2.5.0.jar
-hk2-locator-2.5.0.jar
-hk2-utils-2.5.0.jar
-hppc-0.7.2.jar
-htrace-core-3.1.0-incubating.jar
-httpclient-4.5.6.jar
-httpcore-4.4.10.jar
-istack-commons-runtime-3.0.8.jar
-ivy-2.4.0.jar
-jackson-annotations-2.9.9.jar
-jackson-core-2.9.9.jar
-jackson-core-asl-1.9.13.jar
-jackson-databind-2.9.9.3.jar
-jackson-dataformat-yaml-2.9.9.jar
-jackson-jaxrs-1.9.13.jar
-jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations-2.9.9.jar
-jackson-module-paranamer-2.9.9.jar
-jackson-module-scala_2.12-2.9.9.jar
-jackson-xc-1.9.13.jar
-jakarta.annotation-api-1.3.4.jar
-jakarta.inject-2.5.0.jar
-jakarta.ws.rs-api-2.1.5.jar
-jakarta.xml.bind-api-2.3.2.jar
-janino-3.0.15.jar
-javassist-3.22.0-CR2.jar
-javax.el-3.0.1-b11.jar
-javax.inject-1.jar
-javax.servlet-api-3.1.0.jar
-javolution-5.5.1.jar
-jaxb-api-2.2.2.jar
-jaxb-runtime-2.3.2.jar
-jcl-over-slf4j-1.7.16.jar
-jdo-api-3.0.1.jar
-jersey-client-2.29.jar
-jersey-common-2.29.jar
-jersey-container-servlet-2.29.jar
-jersey-container-servlet-core-2.29.jar
-jersey-hk2-2.29.jar
-jersey-media-jaxb-2.29.jar
-jersey-server-2.29.jar
-jetty-6.1.26.jar
-jetty-sslengine-6.1.26.jar
-jetty-util-6.1.26.jar
-jline-2.14.6.jar
-joda-time-2.9.3.jar
-jodd-core-3.5.2.jar
-jpam-1.1.jar
-json4s-ast_2.12-3.6.6.jar
-json4s-core_2.12-3.6.6.jar
-json4s-jackson_2.12-3.6.6.jar
-json4s-scalap_2.12-3.6.6.jar
-jsp-api-2.1.jar
-jsr305-3.0.0.jar
-jta-1.1.jar
-jtransforms-2.4.0.jar
-jul-to-slf4j-1.7.16.jar
-kryo-shaded-4.0.2.jar
-kubernetes-client-4.4.2.jar
-kubernetes-model-4.4.2.jar
-kubernetes-model-common-4.4.2.jar
-leveldbjni-all-1.8.jar
-libfb303-0.9.3.jar
-libthrift-0.12.0.jar
-log4j-1.2.17.jar
-logging-interceptor-3.12.0.jar
-lz4-java-1.6.0.jar
-machinist_2.12-0.6.1.jar
-macro-compat_2.12-1.1.1.jar
-mesos-1.4.0-shaded-protobuf.jar
-metrics-core-3.1.5.jar
-metrics-graphite-3.1.5.jar
-metrics-json-3.1.5.jar
-metrics-jvm-3.1.5.jar
-minlog-1.3.0.jar
-netty-all-4.1.30.Final.jar
-objenesis-2.5.1.jar
-okapi-shade-0.4.2.jar
-okhttp-3.8.1.jar
-okio-1.13.0.jar
-opencsv-2.3.jar
-orc-core-1.5.5-nohive.jar
-orc-mapreduce-1.5.5-nohive.jar
-orc-shims-1.5.5.jar
-oro-2.0.8.jar
-osgi-resource-locator-1.0.3.jar
-paranamer-2.8.jar
-parquet-column-1.10.1.jar
-parquet-common-1.10.1.jar
-parquet-encoding-1.10.1.jar
-parquet-format-2.4.0.jar
-parquet-hadoop-1.10.1.jar
-parquet-hadoop-bundle-1.6.0.jar
-parquet-jackson-1.10.1.jar
-protobuf-java-2.5.0.jar
-py4j-0.10.8.1.jar
-pyrolite-4.30.jar
-scala-compiler-2.12.8.jar
-scala-library-2.12.8.jar
-scala-parser-combinators_2.12-1.1.0.jar
-scala-reflect-2.12.8.jar
-scala-xml_2.12-1.2.0.jar
-shapeless_2.12-2.3.2.jar
-shims-0.7.45.jar
-slf4j-api-1.7.16.jar
-slf4j-log4j12-1.7.16.jar
-snakeyaml-1.23.jar
-snappy-0.2.jar
-snappy-java-1.1.7.3.jar
-spire-macros_2.12-0.13.0.jar
-spire_2.12-0.13.0.jar
-stax-api-1.0-2.jar
-stax-api-1.0.1.jar
-stream-2.9.6.jar
-stringtemplate-3.2.1.jar
-super-csv-2.2.0.jar
-univocity-parsers-2.7.3.jar
-validation-api-2.0.1.Final.jar
-xbean-asm7-shaded-4.14.jar
-xercesImpl-2.9.1.jar
-xmlenc-0.52.jar
-xz-1.5.jar
-zjsonpatch-0.3.0.jar
-zookeeper-3.4.6.jar
-zstd-jni-1.4.2-1.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
new file mode 100644
index 0000000000000..534ac39e0c46e
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
@@ -0,0 +1,209 @@
+JLargeArrays/1.5//JLargeArrays-1.5.jar
+JTransforms/3.1//JTransforms-3.1.jar
+JavaEWAH/0.3.2//JavaEWAH-0.3.2.jar
+RoaringBitmap/0.7.45//RoaringBitmap-0.7.45.jar
+ST4/4.0.4//ST4-4.0.4.jar
+activation/1.1.1//activation-1.1.1.jar
+aircompressor/0.10//aircompressor-0.10.jar
+algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+antlr-runtime/3.4//antlr-runtime-3.4.jar
+antlr/2.7.7//antlr-2.7.7.jar
+antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
+aopalliance/1.0//aopalliance-1.0.jar
+apache-log4j-extras/1.2.17//apache-log4j-extras-1.2.17.jar
+apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar
+apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar
+api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar
+api-util/1.0.0-M20//api-util-1.0.0-M20.jar
+arpack_combined_all/0.1//arpack_combined_all-0.1.jar
+arrow-format/0.15.1//arrow-format-0.15.1.jar
+arrow-memory/0.15.1//arrow-memory-0.15.1.jar
+arrow-vector/0.15.1//arrow-vector-0.15.1.jar
+audience-annotations/0.5.0//audience-annotations-0.5.0.jar
+automaton/1.11-8//automaton-1.11-8.jar
+avro-ipc/1.8.2//avro-ipc-1.8.2.jar
+avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar
+avro/1.8.2//avro-1.8.2.jar
+bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
+breeze_2.12/1.0//breeze_2.12-1.0.jar
+cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
+chill-java/0.9.5//chill-java-0.9.5.jar
+chill_2.12/0.9.5//chill_2.12-0.9.5.jar
+commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
+commons-cli/1.2//commons-cli-1.2.jar
+commons-codec/1.10//commons-codec-1.10.jar
+commons-collections/3.2.2//commons-collections-3.2.2.jar
+commons-compiler/3.0.15//commons-compiler-3.0.15.jar
+commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-configuration/1.6//commons-configuration-1.6.jar
+commons-crypto/1.0.0//commons-crypto-1.0.0.jar
+commons-dbcp/1.4//commons-dbcp-1.4.jar
+commons-digester/1.8//commons-digester-1.8.jar
+commons-httpclient/3.1//commons-httpclient-3.1.jar
+commons-io/2.4//commons-io-2.4.jar
+commons-lang/2.6//commons-lang-2.6.jar
+commons-lang3/3.9//commons-lang3-3.9.jar
+commons-logging/1.1.3//commons-logging-1.1.3.jar
+commons-math3/3.4.1//commons-math3-3.4.1.jar
+commons-net/3.1//commons-net-3.1.jar
+commons-pool/1.5.4//commons-pool-1.5.4.jar
+commons-text/1.6//commons-text-1.6.jar
+compress-lzf/1.0.3//compress-lzf-1.0.3.jar
+core/1.1.2//core-1.1.2.jar
+curator-client/2.7.1//curator-client-2.7.1.jar
+curator-framework/2.7.1//curator-framework-2.7.1.jar
+curator-recipes/2.7.1//curator-recipes-2.7.1.jar
+datanucleus-api-jdo/3.2.6//datanucleus-api-jdo-3.2.6.jar
+datanucleus-core/3.2.10//datanucleus-core-3.2.10.jar
+datanucleus-rdbms/3.2.9//datanucleus-rdbms-3.2.9.jar
+derby/10.12.1.1//derby-10.12.1.1.jar
+flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
+generex/1.0.2//generex-1.0.2.jar
+gson/2.2.4//gson-2.2.4.jar
+guava/14.0.1//guava-14.0.1.jar
+guice-servlet/3.0//guice-servlet-3.0.jar
+guice/3.0//guice-3.0.jar
+hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar
+hadoop-auth/2.7.4//hadoop-auth-2.7.4.jar
+hadoop-client/2.7.4//hadoop-client-2.7.4.jar
+hadoop-common/2.7.4//hadoop-common-2.7.4.jar
+hadoop-hdfs/2.7.4//hadoop-hdfs-2.7.4.jar
+hadoop-mapreduce-client-app/2.7.4//hadoop-mapreduce-client-app-2.7.4.jar
+hadoop-mapreduce-client-common/2.7.4//hadoop-mapreduce-client-common-2.7.4.jar
+hadoop-mapreduce-client-core/2.7.4//hadoop-mapreduce-client-core-2.7.4.jar
+hadoop-mapreduce-client-jobclient/2.7.4//hadoop-mapreduce-client-jobclient-2.7.4.jar
+hadoop-mapreduce-client-shuffle/2.7.4//hadoop-mapreduce-client-shuffle-2.7.4.jar
+hadoop-yarn-api/2.7.4//hadoop-yarn-api-2.7.4.jar
+hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar
+hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar
+hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar
+hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar
+hk2-api/2.6.1//hk2-api-2.6.1.jar
+hk2-locator/2.6.1//hk2-locator-2.6.1.jar
+hk2-utils/2.6.1//hk2-utils-2.6.1.jar
+htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar
+httpclient/4.5.6//httpclient-4.5.6.jar
+httpcore/4.4.12//httpcore-4.4.12.jar
+istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
+ivy/2.4.0//ivy-2.4.0.jar
+jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar
+jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
+jackson-core/2.10.0//jackson-core-2.10.0.jar
+jackson-databind/2.10.0//jackson-databind-2.10.0.jar
+jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
+jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
+jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
+jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
+jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
+jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar
+jackson-xc/1.9.13//jackson-xc-1.9.13.jar
+jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
+jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
+jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
+jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
+jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
+jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
+janino/3.0.15//janino-3.0.15.jar
+javassist/3.25.0-GA//javassist-3.25.0-GA.jar
+javax.inject/1//javax.inject-1.jar
+javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
+javolution/5.5.1//javolution-5.5.1.jar
+jaxb-api/2.2.2//jaxb-api-2.2.2.jar
+jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
+jcl-over-slf4j/1.7.16//jcl-over-slf4j-1.7.16.jar
+jdo-api/3.0.1//jdo-api-3.0.1.jar
+jersey-client/2.30//jersey-client-2.30.jar
+jersey-common/2.30//jersey-common-2.30.jar
+jersey-container-servlet-core/2.30//jersey-container-servlet-core-2.30.jar
+jersey-container-servlet/2.30//jersey-container-servlet-2.30.jar
+jersey-hk2/2.30//jersey-hk2-2.30.jar
+jersey-media-jaxb/2.30//jersey-media-jaxb-2.30.jar
+jersey-server/2.30//jersey-server-2.30.jar
+jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar
+jetty-util/6.1.26//jetty-util-6.1.26.jar
+jetty/6.1.26//jetty-6.1.26.jar
+jline/2.14.6//jline-2.14.6.jar
+joda-time/2.10.5//joda-time-2.10.5.jar
+jodd-core/3.5.2//jodd-core-3.5.2.jar
+jpam/1.1//jpam-1.1.jar
+json4s-ast_2.12/3.6.6//json4s-ast_2.12-3.6.6.jar
+json4s-core_2.12/3.6.6//json4s-core_2.12-3.6.6.jar
+json4s-jackson_2.12/3.6.6//json4s-jackson_2.12-3.6.6.jar
+json4s-scalap_2.12/3.6.6//json4s-scalap_2.12-3.6.6.jar
+jsp-api/2.1//jsp-api-2.1.jar
+jsr305/3.0.0//jsr305-3.0.0.jar
+jta/1.1//jta-1.1.jar
+jul-to-slf4j/1.7.16//jul-to-slf4j-1.7.16.jar
+kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
+kubernetes-client/4.7.1//kubernetes-client-4.7.1.jar
+kubernetes-model-common/4.7.1//kubernetes-model-common-4.7.1.jar
+kubernetes-model/4.7.1//kubernetes-model-4.7.1.jar
+leveldbjni-all/1.8//leveldbjni-all-1.8.jar
+libfb303/0.9.3//libfb303-0.9.3.jar
+libthrift/0.12.0//libthrift-0.12.0.jar
+log4j/1.2.17//log4j-1.2.17.jar
+logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar
+lz4-java/1.7.1//lz4-java-1.7.1.jar
+machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
+macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
+mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
+metrics-core/4.1.1//metrics-core-4.1.1.jar
+metrics-graphite/4.1.1//metrics-graphite-4.1.1.jar
+metrics-jmx/4.1.1//metrics-jmx-4.1.1.jar
+metrics-json/4.1.1//metrics-json-4.1.1.jar
+metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
+minlog/1.3.0//minlog-1.3.0.jar
+netty-all/4.1.42.Final//netty-all-4.1.42.Final.jar
+objenesis/2.5.1//objenesis-2.5.1.jar
+okhttp/3.12.6//okhttp-3.12.6.jar
+okio/1.15.0//okio-1.15.0.jar
+opencsv/2.3//opencsv-2.3.jar
+orc-core/1.5.9/nohive/orc-core-1.5.9-nohive.jar
+orc-mapreduce/1.5.9/nohive/orc-mapreduce-1.5.9-nohive.jar
+orc-shims/1.5.9//orc-shims-1.5.9.jar
+oro/2.0.8//oro-2.0.8.jar
+osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
+paranamer/2.8//paranamer-2.8.jar
+parquet-column/1.10.1//parquet-column-1.10.1.jar
+parquet-common/1.10.1//parquet-common-1.10.1.jar
+parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
+parquet-format/2.4.0//parquet-format-2.4.0.jar
+parquet-hadoop-bundle/1.6.0//parquet-hadoop-bundle-1.6.0.jar
+parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
+parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
+protobuf-java/2.5.0//protobuf-java-2.5.0.jar
+py4j/0.10.8.1//py4j-0.10.8.1.jar
+pyrolite/4.30//pyrolite-4.30.jar
+scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
+scala-compiler/2.12.10//scala-compiler-2.12.10.jar
+scala-library/2.12.10//scala-library-2.12.10.jar
+scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
+scala-reflect/2.12.10//scala-reflect-2.12.10.jar
+scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
+shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
+shims/0.7.45//shims-0.7.45.jar
+slf4j-api/1.7.16//slf4j-api-1.7.16.jar
+slf4j-log4j12/1.7.16//slf4j-log4j12-1.7.16.jar
+snakeyaml/1.24//snakeyaml-1.24.jar
+snappy-java/1.1.7.3//snappy-java-1.1.7.3.jar
+snappy/0.2//snappy-0.2.jar
+spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
+spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
+spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
+spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
+stax-api/1.0-2//stax-api-1.0-2.jar
+stax-api/1.0.1//stax-api-1.0.1.jar
+stream/2.9.6//stream-2.9.6.jar
+stringtemplate/3.2.1//stringtemplate-3.2.1.jar
+super-csv/2.2.0//super-csv-2.2.0.jar
+threeten-extra/1.5.0//threeten-extra-1.5.0.jar
+univocity-parsers/2.8.3//univocity-parsers-2.8.3.jar
+xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
+xercesImpl/2.9.1//xercesImpl-2.9.1.jar
+xmlenc/0.52//xmlenc-0.52.jar
+xz/1.5//xz-1.5.jar
+zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
+zookeeper/3.4.14//zookeeper-3.4.14.jar
+zstd-jni/1.4.4-3//zstd-jni-1.4.4-3.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
new file mode 100644
index 0000000000000..c50cf96dc9065
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -0,0 +1,223 @@
+HikariCP/2.5.1//HikariCP-2.5.1.jar
+JLargeArrays/1.5//JLargeArrays-1.5.jar
+JTransforms/3.1//JTransforms-3.1.jar
+RoaringBitmap/0.7.45//RoaringBitmap-0.7.45.jar
+ST4/4.0.4//ST4-4.0.4.jar
+activation/1.1.1//activation-1.1.1.jar
+aircompressor/0.10//aircompressor-0.10.jar
+algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
+antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
+aopalliance/1.0//aopalliance-1.0.jar
+apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar
+apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar
+api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar
+api-util/1.0.0-M20//api-util-1.0.0-M20.jar
+arpack_combined_all/0.1//arpack_combined_all-0.1.jar
+arrow-format/0.15.1//arrow-format-0.15.1.jar
+arrow-memory/0.15.1//arrow-memory-0.15.1.jar
+arrow-vector/0.15.1//arrow-vector-0.15.1.jar
+audience-annotations/0.5.0//audience-annotations-0.5.0.jar
+automaton/1.11-8//automaton-1.11-8.jar
+avro-ipc/1.8.2//avro-ipc-1.8.2.jar
+avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar
+avro/1.8.2//avro-1.8.2.jar
+bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
+breeze_2.12/1.0//breeze_2.12-1.0.jar
+cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
+chill-java/0.9.5//chill-java-0.9.5.jar
+chill_2.12/0.9.5//chill_2.12-0.9.5.jar
+commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
+commons-cli/1.2//commons-cli-1.2.jar
+commons-codec/1.10//commons-codec-1.10.jar
+commons-collections/3.2.2//commons-collections-3.2.2.jar
+commons-compiler/3.0.15//commons-compiler-3.0.15.jar
+commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-configuration/1.6//commons-configuration-1.6.jar
+commons-crypto/1.0.0//commons-crypto-1.0.0.jar
+commons-dbcp/1.4//commons-dbcp-1.4.jar
+commons-digester/1.8//commons-digester-1.8.jar
+commons-httpclient/3.1//commons-httpclient-3.1.jar
+commons-io/2.4//commons-io-2.4.jar
+commons-lang/2.6//commons-lang-2.6.jar
+commons-lang3/3.9//commons-lang3-3.9.jar
+commons-logging/1.1.3//commons-logging-1.1.3.jar
+commons-math3/3.4.1//commons-math3-3.4.1.jar
+commons-net/3.1//commons-net-3.1.jar
+commons-pool/1.5.4//commons-pool-1.5.4.jar
+commons-text/1.6//commons-text-1.6.jar
+compress-lzf/1.0.3//compress-lzf-1.0.3.jar
+core/1.1.2//core-1.1.2.jar
+curator-client/2.7.1//curator-client-2.7.1.jar
+curator-framework/2.7.1//curator-framework-2.7.1.jar
+curator-recipes/2.7.1//curator-recipes-2.7.1.jar
+datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
+datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
+datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
+derby/10.12.1.1//derby-10.12.1.1.jar
+dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
+flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
+generex/1.0.2//generex-1.0.2.jar
+gson/2.2.4//gson-2.2.4.jar
+guava/14.0.1//guava-14.0.1.jar
+guice-servlet/3.0//guice-servlet-3.0.jar
+guice/3.0//guice-3.0.jar
+hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar
+hadoop-auth/2.7.4//hadoop-auth-2.7.4.jar
+hadoop-client/2.7.4//hadoop-client-2.7.4.jar
+hadoop-common/2.7.4//hadoop-common-2.7.4.jar
+hadoop-hdfs/2.7.4//hadoop-hdfs-2.7.4.jar
+hadoop-mapreduce-client-app/2.7.4//hadoop-mapreduce-client-app-2.7.4.jar
+hadoop-mapreduce-client-common/2.7.4//hadoop-mapreduce-client-common-2.7.4.jar
+hadoop-mapreduce-client-core/2.7.4//hadoop-mapreduce-client-core-2.7.4.jar
+hadoop-mapreduce-client-jobclient/2.7.4//hadoop-mapreduce-client-jobclient-2.7.4.jar
+hadoop-mapreduce-client-shuffle/2.7.4//hadoop-mapreduce-client-shuffle-2.7.4.jar
+hadoop-yarn-api/2.7.4//hadoop-yarn-api-2.7.4.jar
+hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar
+hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar
+hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar
+hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar
+hive-beeline/2.3.6//hive-beeline-2.3.6.jar
+hive-cli/2.3.6//hive-cli-2.3.6.jar
+hive-common/2.3.6//hive-common-2.3.6.jar
+hive-exec/2.3.6/core/hive-exec-2.3.6-core.jar
+hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar
+hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar
+hive-metastore/2.3.6//hive-metastore-2.3.6.jar
+hive-serde/2.3.6//hive-serde-2.3.6.jar
+hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar
+hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar
+hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar
+hive-shims/2.3.6//hive-shims-2.3.6.jar
+hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar
+hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar
+hk2-api/2.6.1//hk2-api-2.6.1.jar
+hk2-locator/2.6.1//hk2-locator-2.6.1.jar
+hk2-utils/2.6.1//hk2-utils-2.6.1.jar
+htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar
+httpclient/4.5.6//httpclient-4.5.6.jar
+httpcore/4.4.12//httpcore-4.4.12.jar
+istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
+ivy/2.4.0//ivy-2.4.0.jar
+jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar
+jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
+jackson-core/2.10.0//jackson-core-2.10.0.jar
+jackson-databind/2.10.0//jackson-databind-2.10.0.jar
+jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
+jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
+jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
+jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
+jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
+jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar
+jackson-xc/1.9.13//jackson-xc-1.9.13.jar
+jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
+jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
+jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
+jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
+jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
+jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
+janino/3.0.15//janino-3.0.15.jar
+javassist/3.25.0-GA//javassist-3.25.0-GA.jar
+javax.inject/1//javax.inject-1.jar
+javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
+javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
+javolution/5.5.1//javolution-5.5.1.jar
+jaxb-api/2.2.2//jaxb-api-2.2.2.jar
+jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
+jcl-over-slf4j/1.7.16//jcl-over-slf4j-1.7.16.jar
+jdo-api/3.0.1//jdo-api-3.0.1.jar
+jersey-client/2.30//jersey-client-2.30.jar
+jersey-common/2.30//jersey-common-2.30.jar
+jersey-container-servlet-core/2.30//jersey-container-servlet-core-2.30.jar
+jersey-container-servlet/2.30//jersey-container-servlet-2.30.jar
+jersey-hk2/2.30//jersey-hk2-2.30.jar
+jersey-media-jaxb/2.30//jersey-media-jaxb-2.30.jar
+jersey-server/2.30//jersey-server-2.30.jar
+jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar
+jetty-util/6.1.26//jetty-util-6.1.26.jar
+jetty/6.1.26//jetty-6.1.26.jar
+jline/2.14.6//jline-2.14.6.jar
+joda-time/2.10.5//joda-time-2.10.5.jar
+jodd-core/3.5.2//jodd-core-3.5.2.jar
+jpam/1.1//jpam-1.1.jar
+json/1.8//json-1.8.jar
+json4s-ast_2.12/3.6.6//json4s-ast_2.12-3.6.6.jar
+json4s-core_2.12/3.6.6//json4s-core_2.12-3.6.6.jar
+json4s-jackson_2.12/3.6.6//json4s-jackson_2.12-3.6.6.jar
+json4s-scalap_2.12/3.6.6//json4s-scalap_2.12-3.6.6.jar
+jsp-api/2.1//jsp-api-2.1.jar
+jsr305/3.0.0//jsr305-3.0.0.jar
+jta/1.1//jta-1.1.jar
+jul-to-slf4j/1.7.16//jul-to-slf4j-1.7.16.jar
+kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
+kubernetes-client/4.7.1//kubernetes-client-4.7.1.jar
+kubernetes-model-common/4.7.1//kubernetes-model-common-4.7.1.jar
+kubernetes-model/4.7.1//kubernetes-model-4.7.1.jar
+leveldbjni-all/1.8//leveldbjni-all-1.8.jar
+libfb303/0.9.3//libfb303-0.9.3.jar
+libthrift/0.12.0//libthrift-0.12.0.jar
+log4j/1.2.17//log4j-1.2.17.jar
+logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar
+lz4-java/1.7.1//lz4-java-1.7.1.jar
+machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
+macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
+mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
+metrics-core/4.1.1//metrics-core-4.1.1.jar
+metrics-graphite/4.1.1//metrics-graphite-4.1.1.jar
+metrics-jmx/4.1.1//metrics-jmx-4.1.1.jar
+metrics-json/4.1.1//metrics-json-4.1.1.jar
+metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
+minlog/1.3.0//minlog-1.3.0.jar
+netty-all/4.1.42.Final//netty-all-4.1.42.Final.jar
+objenesis/2.5.1//objenesis-2.5.1.jar
+okhttp/3.12.6//okhttp-3.12.6.jar
+okio/1.15.0//okio-1.15.0.jar
+opencsv/2.3//opencsv-2.3.jar
+orc-core/1.5.9//orc-core-1.5.9.jar
+orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar
+orc-shims/1.5.9//orc-shims-1.5.9.jar
+oro/2.0.8//oro-2.0.8.jar
+osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
+paranamer/2.8//paranamer-2.8.jar
+parquet-column/1.10.1//parquet-column-1.10.1.jar
+parquet-common/1.10.1//parquet-common-1.10.1.jar
+parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
+parquet-format/2.4.0//parquet-format-2.4.0.jar
+parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
+parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
+protobuf-java/2.5.0//protobuf-java-2.5.0.jar
+py4j/0.10.8.1//py4j-0.10.8.1.jar
+pyrolite/4.30//pyrolite-4.30.jar
+scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
+scala-compiler/2.12.10//scala-compiler-2.12.10.jar
+scala-library/2.12.10//scala-library-2.12.10.jar
+scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
+scala-reflect/2.12.10//scala-reflect-2.12.10.jar
+scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
+shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
+shims/0.7.45//shims-0.7.45.jar
+slf4j-api/1.7.16//slf4j-api-1.7.16.jar
+slf4j-log4j12/1.7.16//slf4j-log4j12-1.7.16.jar
+snakeyaml/1.24//snakeyaml-1.24.jar
+snappy-java/1.1.7.3//snappy-java-1.1.7.3.jar
+spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
+spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
+spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
+spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
+stax-api/1.0-2//stax-api-1.0-2.jar
+stax-api/1.0.1//stax-api-1.0.1.jar
+stream/2.9.6//stream-2.9.6.jar
+super-csv/2.2.0//super-csv-2.2.0.jar
+threeten-extra/1.5.0//threeten-extra-1.5.0.jar
+transaction-api/1.1//transaction-api-1.1.jar
+univocity-parsers/2.8.3//univocity-parsers-2.8.3.jar
+velocity/1.5//velocity-1.5.jar
+xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
+xercesImpl/2.9.1//xercesImpl-2.9.1.jar
+xmlenc/0.52//xmlenc-0.52.jar
+xz/1.5//xz-1.5.jar
+zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
+zookeeper/3.4.14//zookeeper-3.4.14.jar
+zstd-jni/1.4.4-3//zstd-jni-1.4.4-3.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
deleted file mode 100644
index de046634eefbb..0000000000000
--- a/dev/deps/spark-deps-hadoop-3.2
+++ /dev/null
@@ -1,221 +0,0 @@
-JavaEWAH-0.3.2.jar
-RoaringBitmap-0.7.45.jar
-ST4-4.0.4.jar
-accessors-smart-1.2.jar
-activation-1.1.1.jar
-aircompressor-0.10.jar
-antlr-2.7.7.jar
-antlr-runtime-3.4.jar
-antlr4-runtime-4.7.1.jar
-aopalliance-1.0.jar
-aopalliance-repackaged-2.5.0.jar
-apache-log4j-extras-1.2.17.jar
-arpack_combined_all-0.1.jar
-arrow-format-0.12.0.jar
-arrow-memory-0.12.0.jar
-arrow-vector-0.12.0.jar
-audience-annotations-0.5.0.jar
-automaton-1.11-8.jar
-avro-1.8.2.jar
-avro-ipc-1.8.2.jar
-avro-mapred-1.8.2-hadoop2.jar
-bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.12-0.13.2.jar
-breeze_2.12-0.13.2.jar
-chill-java-0.9.3.jar
-chill_2.12-0.9.3.jar
-commons-beanutils-1.9.3.jar
-commons-cli-1.2.jar
-commons-codec-1.10.jar
-commons-collections-3.2.2.jar
-commons-compiler-3.0.15.jar
-commons-compress-1.8.1.jar
-commons-configuration2-2.1.1.jar
-commons-crypto-1.0.0.jar
-commons-daemon-1.0.13.jar
-commons-dbcp-1.4.jar
-commons-httpclient-3.1.jar
-commons-io-2.4.jar
-commons-lang-2.6.jar
-commons-lang3-3.8.1.jar
-commons-logging-1.1.3.jar
-commons-math3-3.4.1.jar
-commons-net-3.1.jar
-commons-pool-1.5.4.jar
-commons-text-1.6.jar
-compress-lzf-1.0.3.jar
-core-1.1.2.jar
-curator-client-2.13.0.jar
-curator-framework-2.13.0.jar
-curator-recipes-2.13.0.jar
-datanucleus-api-jdo-3.2.6.jar
-datanucleus-core-4.1.17.jar
-datanucleus-rdbms-3.2.9.jar
-derby-10.12.1.1.jar
-dnsjava-2.1.7.jar
-ehcache-3.3.1.jar
-flatbuffers-java-1.9.0.jar
-generex-1.0.2.jar
-geronimo-jcache_1.0_spec-1.0-alpha-1.jar
-gson-2.2.4.jar
-guava-14.0.1.jar
-guice-4.0.jar
-guice-servlet-4.0.jar
-hadoop-annotations-3.2.0.jar
-hadoop-auth-3.2.0.jar
-hadoop-client-3.2.0.jar
-hadoop-common-3.2.0.jar
-hadoop-hdfs-client-3.2.0.jar
-hadoop-mapreduce-client-common-3.2.0.jar
-hadoop-mapreduce-client-core-3.2.0.jar
-hadoop-mapreduce-client-jobclient-3.2.0.jar
-hadoop-yarn-api-3.2.0.jar
-hadoop-yarn-client-3.2.0.jar
-hadoop-yarn-common-3.2.0.jar
-hadoop-yarn-registry-3.2.0.jar
-hadoop-yarn-server-common-3.2.0.jar
-hadoop-yarn-server-web-proxy-3.2.0.jar
-hive-storage-api-2.6.0.jar
-hk2-api-2.5.0.jar
-hk2-locator-2.5.0.jar
-hk2-utils-2.5.0.jar
-hppc-0.7.2.jar
-htrace-core4-4.1.0-incubating.jar
-httpclient-4.5.6.jar
-httpcore-4.4.10.jar
-istack-commons-runtime-3.0.8.jar
-ivy-2.4.0.jar
-jackson-annotations-2.9.9.jar
-jackson-core-2.9.9.jar
-jackson-core-asl-1.9.13.jar
-jackson-databind-2.9.9.3.jar
-jackson-dataformat-yaml-2.9.9.jar
-jackson-jaxrs-base-2.9.5.jar
-jackson-jaxrs-json-provider-2.9.5.jar
-jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations-2.9.9.jar
-jackson-module-paranamer-2.9.9.jar
-jackson-module-scala_2.12-2.9.9.jar
-jakarta.annotation-api-1.3.4.jar
-jakarta.inject-2.5.0.jar
-jakarta.ws.rs-api-2.1.5.jar
-jakarta.xml.bind-api-2.3.2.jar
-janino-3.0.15.jar
-javassist-3.22.0-CR2.jar
-javax.el-3.0.1-b11.jar
-javax.inject-1.jar
-javax.servlet-api-3.1.0.jar
-javolution-5.5.1.jar
-jaxb-api-2.2.11.jar
-jaxb-runtime-2.3.2.jar
-jcip-annotations-1.0-1.jar
-jcl-over-slf4j-1.7.16.jar
-jdo-api-3.0.1.jar
-jersey-client-2.29.jar
-jersey-common-2.29.jar
-jersey-container-servlet-2.29.jar
-jersey-container-servlet-core-2.29.jar
-jersey-hk2-2.29.jar
-jersey-media-jaxb-2.29.jar
-jersey-server-2.29.jar
-jetty-webapp-9.4.18.v20190429.jar
-jetty-xml-9.4.18.v20190429.jar
-jline-2.14.6.jar
-joda-time-2.9.3.jar
-jodd-core-3.5.2.jar
-jpam-1.1.jar
-json-smart-2.3.jar
-json4s-ast_2.12-3.6.6.jar
-json4s-core_2.12-3.6.6.jar
-json4s-jackson_2.12-3.6.6.jar
-json4s-scalap_2.12-3.6.6.jar
-jsp-api-2.1.jar
-jsr305-3.0.0.jar
-jta-1.1.jar
-jtransforms-2.4.0.jar
-jul-to-slf4j-1.7.16.jar
-kerb-admin-1.0.1.jar
-kerb-client-1.0.1.jar
-kerb-common-1.0.1.jar
-kerb-core-1.0.1.jar
-kerb-crypto-1.0.1.jar
-kerb-identity-1.0.1.jar
-kerb-server-1.0.1.jar
-kerb-simplekdc-1.0.1.jar
-kerb-util-1.0.1.jar
-kerby-asn1-1.0.1.jar
-kerby-config-1.0.1.jar
-kerby-pkix-1.0.1.jar
-kerby-util-1.0.1.jar
-kerby-xdr-1.0.1.jar
-kryo-shaded-4.0.2.jar
-kubernetes-client-4.4.2.jar
-kubernetes-model-4.4.2.jar
-kubernetes-model-common-4.4.2.jar
-leveldbjni-all-1.8.jar
-libfb303-0.9.3.jar
-libthrift-0.12.0.jar
-log4j-1.2.17.jar
-logging-interceptor-3.12.0.jar
-lz4-java-1.6.0.jar
-machinist_2.12-0.6.1.jar
-macro-compat_2.12-1.1.1.jar
-mesos-1.4.0-shaded-protobuf.jar
-metrics-core-3.1.5.jar
-metrics-graphite-3.1.5.jar
-metrics-json-3.1.5.jar
-metrics-jvm-3.1.5.jar
-minlog-1.3.0.jar
-mssql-jdbc-6.2.1.jre7.jar
-netty-all-4.1.30.Final.jar
-nimbus-jose-jwt-4.41.1.jar
-objenesis-2.5.1.jar
-okapi-shade-0.4.2.jar
-okhttp-2.7.5.jar
-okhttp-3.8.1.jar
-okio-1.13.0.jar
-opencsv-2.3.jar
-orc-core-1.5.5-nohive.jar
-orc-mapreduce-1.5.5-nohive.jar
-orc-shims-1.5.5.jar
-oro-2.0.8.jar
-osgi-resource-locator-1.0.3.jar
-paranamer-2.8.jar
-parquet-column-1.10.1.jar
-parquet-common-1.10.1.jar
-parquet-encoding-1.10.1.jar
-parquet-format-2.4.0.jar
-parquet-hadoop-1.10.1.jar
-parquet-jackson-1.10.1.jar
-protobuf-java-2.5.0.jar
-py4j-0.10.8.1.jar
-pyrolite-4.30.jar
-re2j-1.1.jar
-scala-compiler-2.12.8.jar
-scala-library-2.12.8.jar
-scala-parser-combinators_2.12-1.1.0.jar
-scala-reflect-2.12.8.jar
-scala-xml_2.12-1.2.0.jar
-shapeless_2.12-2.3.2.jar
-shims-0.7.45.jar
-slf4j-api-1.7.16.jar
-slf4j-log4j12-1.7.16.jar
-snakeyaml-1.23.jar
-snappy-0.2.jar
-snappy-java-1.1.7.3.jar
-spire-macros_2.12-0.13.0.jar
-spire_2.12-0.13.0.jar
-stax-api-1.0.1.jar
-stax2-api-3.1.4.jar
-stream-2.9.6.jar
-stringtemplate-3.2.1.jar
-super-csv-2.2.0.jar
-token-provider-1.0.1.jar
-univocity-parsers-2.7.3.jar
-validation-api-2.0.1.Final.jar
-woodstox-core-5.0.3.jar
-xbean-asm7-shaded-4.14.jar
-xz-1.5.jar
-zjsonpatch-0.3.0.jar
-zookeeper-3.4.13.jar
-zstd-jni-1.4.2-1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
new file mode 100644
index 0000000000000..c37ce7fab36f6
--- /dev/null
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -0,0 +1,239 @@
+HikariCP/2.5.1//HikariCP-2.5.1.jar
+JLargeArrays/1.5//JLargeArrays-1.5.jar
+JTransforms/3.1//JTransforms-3.1.jar
+RoaringBitmap/0.7.45//RoaringBitmap-0.7.45.jar
+ST4/4.0.4//ST4-4.0.4.jar
+accessors-smart/1.2//accessors-smart-1.2.jar
+activation/1.1.1//activation-1.1.1.jar
+aircompressor/0.10//aircompressor-0.10.jar
+algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
+antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
+aopalliance/1.0//aopalliance-1.0.jar
+arpack_combined_all/0.1//arpack_combined_all-0.1.jar
+arrow-format/0.15.1//arrow-format-0.15.1.jar
+arrow-memory/0.15.1//arrow-memory-0.15.1.jar
+arrow-vector/0.15.1//arrow-vector-0.15.1.jar
+audience-annotations/0.5.0//audience-annotations-0.5.0.jar
+automaton/1.11-8//automaton-1.11-8.jar
+avro-ipc/1.8.2//avro-ipc-1.8.2.jar
+avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar
+avro/1.8.2//avro-1.8.2.jar
+bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
+breeze_2.12/1.0//breeze_2.12-1.0.jar
+cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
+chill-java/0.9.5//chill-java-0.9.5.jar
+chill_2.12/0.9.5//chill_2.12-0.9.5.jar
+commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
+commons-cli/1.2//commons-cli-1.2.jar
+commons-codec/1.10//commons-codec-1.10.jar
+commons-collections/3.2.2//commons-collections-3.2.2.jar
+commons-compiler/3.0.15//commons-compiler-3.0.15.jar
+commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar
+commons-crypto/1.0.0//commons-crypto-1.0.0.jar
+commons-daemon/1.0.13//commons-daemon-1.0.13.jar
+commons-dbcp/1.4//commons-dbcp-1.4.jar
+commons-httpclient/3.1//commons-httpclient-3.1.jar
+commons-io/2.4//commons-io-2.4.jar
+commons-lang/2.6//commons-lang-2.6.jar
+commons-lang3/3.9//commons-lang3-3.9.jar
+commons-logging/1.1.3//commons-logging-1.1.3.jar
+commons-math3/3.4.1//commons-math3-3.4.1.jar
+commons-net/3.1//commons-net-3.1.jar
+commons-pool/1.5.4//commons-pool-1.5.4.jar
+commons-text/1.6//commons-text-1.6.jar
+compress-lzf/1.0.3//compress-lzf-1.0.3.jar
+core/1.1.2//core-1.1.2.jar
+curator-client/2.13.0//curator-client-2.13.0.jar
+curator-framework/2.13.0//curator-framework-2.13.0.jar
+curator-recipes/2.13.0//curator-recipes-2.13.0.jar
+datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
+datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
+datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
+derby/10.12.1.1//derby-10.12.1.1.jar
+dnsjava/2.1.7//dnsjava-2.1.7.jar
+dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
+ehcache/3.3.1//ehcache-3.3.1.jar
+flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
+generex/1.0.2//generex-1.0.2.jar
+geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar
+gson/2.2.4//gson-2.2.4.jar
+guava/14.0.1//guava-14.0.1.jar
+guice-servlet/4.0//guice-servlet-4.0.jar
+guice/4.0//guice-4.0.jar
+hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar
+hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar
+hadoop-client/3.2.0//hadoop-client-3.2.0.jar
+hadoop-common/3.2.0//hadoop-common-3.2.0.jar
+hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar
+hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar
+hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar
+hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar
+hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar
+hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar
+hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar
+hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar
+hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar
+hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar
+hive-beeline/2.3.6//hive-beeline-2.3.6.jar
+hive-cli/2.3.6//hive-cli-2.3.6.jar
+hive-common/2.3.6//hive-common-2.3.6.jar
+hive-exec/2.3.6/core/hive-exec-2.3.6-core.jar
+hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar
+hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar
+hive-metastore/2.3.6//hive-metastore-2.3.6.jar
+hive-serde/2.3.6//hive-serde-2.3.6.jar
+hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar
+hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar
+hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar
+hive-shims/2.3.6//hive-shims-2.3.6.jar
+hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar
+hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar
+hk2-api/2.6.1//hk2-api-2.6.1.jar
+hk2-locator/2.6.1//hk2-locator-2.6.1.jar
+hk2-utils/2.6.1//hk2-utils-2.6.1.jar
+htrace-core4/4.1.0-incubating//htrace-core4-4.1.0-incubating.jar
+httpclient/4.5.6//httpclient-4.5.6.jar
+httpcore/4.4.12//httpcore-4.4.12.jar
+istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
+ivy/2.4.0//ivy-2.4.0.jar
+jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar
+jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
+jackson-core/2.10.0//jackson-core-2.10.0.jar
+jackson-databind/2.10.0//jackson-databind-2.10.0.jar
+jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
+jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar
+jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar
+jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
+jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
+jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
+jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar
+jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
+jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
+jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
+jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
+jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
+jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
+janino/3.0.15//janino-3.0.15.jar
+javassist/3.25.0-GA//javassist-3.25.0-GA.jar
+javax.inject/1//javax.inject-1.jar
+javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
+javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
+javolution/5.5.1//javolution-5.5.1.jar
+jaxb-api/2.2.11//jaxb-api-2.2.11.jar
+jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
+jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar
+jcl-over-slf4j/1.7.16//jcl-over-slf4j-1.7.16.jar
+jdo-api/3.0.1//jdo-api-3.0.1.jar
+jersey-client/2.30//jersey-client-2.30.jar
+jersey-common/2.30//jersey-common-2.30.jar
+jersey-container-servlet-core/2.30//jersey-container-servlet-core-2.30.jar
+jersey-container-servlet/2.30//jersey-container-servlet-2.30.jar
+jersey-hk2/2.30//jersey-hk2-2.30.jar
+jersey-media-jaxb/2.30//jersey-media-jaxb-2.30.jar
+jersey-server/2.30//jersey-server-2.30.jar
+jline/2.14.6//jline-2.14.6.jar
+joda-time/2.10.5//joda-time-2.10.5.jar
+jodd-core/3.5.2//jodd-core-3.5.2.jar
+jpam/1.1//jpam-1.1.jar
+json-smart/2.3//json-smart-2.3.jar
+json/1.8//json-1.8.jar
+json4s-ast_2.12/3.6.6//json4s-ast_2.12-3.6.6.jar
+json4s-core_2.12/3.6.6//json4s-core_2.12-3.6.6.jar
+json4s-jackson_2.12/3.6.6//json4s-jackson_2.12-3.6.6.jar
+json4s-scalap_2.12/3.6.6//json4s-scalap_2.12-3.6.6.jar
+jsp-api/2.1//jsp-api-2.1.jar
+jsr305/3.0.0//jsr305-3.0.0.jar
+jta/1.1//jta-1.1.jar
+jul-to-slf4j/1.7.16//jul-to-slf4j-1.7.16.jar
+kerb-admin/1.0.1//kerb-admin-1.0.1.jar
+kerb-client/1.0.1//kerb-client-1.0.1.jar
+kerb-common/1.0.1//kerb-common-1.0.1.jar
+kerb-core/1.0.1//kerb-core-1.0.1.jar
+kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar
+kerb-identity/1.0.1//kerb-identity-1.0.1.jar
+kerb-server/1.0.1//kerb-server-1.0.1.jar
+kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar
+kerb-util/1.0.1//kerb-util-1.0.1.jar
+kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar
+kerby-config/1.0.1//kerby-config-1.0.1.jar
+kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar
+kerby-util/1.0.1//kerby-util-1.0.1.jar
+kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar
+kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
+kubernetes-client/4.7.1//kubernetes-client-4.7.1.jar
+kubernetes-model-common/4.7.1//kubernetes-model-common-4.7.1.jar
+kubernetes-model/4.7.1//kubernetes-model-4.7.1.jar
+leveldbjni-all/1.8//leveldbjni-all-1.8.jar
+libfb303/0.9.3//libfb303-0.9.3.jar
+libthrift/0.12.0//libthrift-0.12.0.jar
+log4j/1.2.17//log4j-1.2.17.jar
+logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar
+lz4-java/1.7.1//lz4-java-1.7.1.jar
+machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
+macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
+mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
+metrics-core/4.1.1//metrics-core-4.1.1.jar
+metrics-graphite/4.1.1//metrics-graphite-4.1.1.jar
+metrics-jmx/4.1.1//metrics-jmx-4.1.1.jar
+metrics-json/4.1.1//metrics-json-4.1.1.jar
+metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
+minlog/1.3.0//minlog-1.3.0.jar
+mssql-jdbc/6.2.1.jre7//mssql-jdbc-6.2.1.jre7.jar
+netty-all/4.1.42.Final//netty-all-4.1.42.Final.jar
+nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar
+objenesis/2.5.1//objenesis-2.5.1.jar
+okhttp/2.7.5//okhttp-2.7.5.jar
+okhttp/3.12.6//okhttp-3.12.6.jar
+okio/1.15.0//okio-1.15.0.jar
+opencsv/2.3//opencsv-2.3.jar
+orc-core/1.5.9//orc-core-1.5.9.jar
+orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar
+orc-shims/1.5.9//orc-shims-1.5.9.jar
+oro/2.0.8//oro-2.0.8.jar
+osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
+paranamer/2.8//paranamer-2.8.jar
+parquet-column/1.10.1//parquet-column-1.10.1.jar
+parquet-common/1.10.1//parquet-common-1.10.1.jar
+parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
+parquet-format/2.4.0//parquet-format-2.4.0.jar
+parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
+parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
+protobuf-java/2.5.0//protobuf-java-2.5.0.jar
+py4j/0.10.8.1//py4j-0.10.8.1.jar
+pyrolite/4.30//pyrolite-4.30.jar
+re2j/1.1//re2j-1.1.jar
+scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
+scala-compiler/2.12.10//scala-compiler-2.12.10.jar
+scala-library/2.12.10//scala-library-2.12.10.jar
+scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
+scala-reflect/2.12.10//scala-reflect-2.12.10.jar
+scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
+shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
+shims/0.7.45//shims-0.7.45.jar
+slf4j-api/1.7.16//slf4j-api-1.7.16.jar
+slf4j-log4j12/1.7.16//slf4j-log4j12-1.7.16.jar
+snakeyaml/1.24//snakeyaml-1.24.jar
+snappy-java/1.1.7.3//snappy-java-1.1.7.3.jar
+spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
+spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
+spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
+spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
+stax-api/1.0.1//stax-api-1.0.1.jar
+stax2-api/3.1.4//stax2-api-3.1.4.jar
+stream/2.9.6//stream-2.9.6.jar
+super-csv/2.2.0//super-csv-2.2.0.jar
+threeten-extra/1.5.0//threeten-extra-1.5.0.jar
+token-provider/1.0.1//token-provider-1.0.1.jar
+transaction-api/1.1//transaction-api-1.1.jar
+univocity-parsers/2.8.3//univocity-parsers-2.8.3.jar
+velocity/1.5//velocity-1.5.jar
+woodstox-core/5.0.3//woodstox-core-5.0.3.jar
+xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
+xz/1.5//xz-1.5.jar
+zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
+zookeeper/3.4.14//zookeeper-3.4.14.jar
+zstd-jni/1.4.4-3//zstd-jni-1.4.4-3.jar
diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py
index fa1736163d4c6..b444b74d4027c 100755
--- a/dev/github_jira_sync.py
+++ b/dev/github_jira_sync.py
@@ -116,7 +116,8 @@ def build_pr_component_dic(jira_prs):
     dic = {}
     for issue, pr in jira_prs:
         print(issue)
-        jira_components = [c.name.upper() for c in jira_client.issue(issue).fields.components]
+        page = get_json(get_url(JIRA_API_BASE + "/rest/api/2/issue/" + issue))
+        jira_components = [c['name'].upper() for c in page['fields']['components']]
         if pr['number'] in dic:
             dic[pr['number']][1].update(jira_components)
         else:
@@ -163,7 +164,8 @@ def reset_pr_labels(pr_num, jira_components):
     url = pr['html_url']
     title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
     try:
-        existing_links = map(lambda l: l.raw['object']['url'], jira_client.remote_links(issue))
+        page = get_json(get_url(JIRA_API_BASE + "/rest/api/2/issue/" + issue + "/remotelink"))
+        existing_links = map(lambda l: l['object']['url'], page)
     except:
         print("Failure reading JIRA %s (does it exist?)" % issue)
         print(sys.exc_info()[0])
diff --git a/dev/lint-python b/dev/lint-python
index 06816932e754a..24f0d8fb6ea36 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -27,6 +27,8 @@ MINIMUM_PYCODESTYLE="2.4.0"
 
 SPHINX_BUILD="sphinx-build"
 
+PYTHON_EXECUTABLE="python3"
+
 function compile_python_test {
     local COMPILE_STATUS=
     local COMPILE_REPORT=
@@ -36,9 +38,9 @@ function compile_python_test {
         exit 1;
     fi
 
-    # compileall: https://docs.python.org/2/library/compileall.html
+    # compileall: https://docs.python.org/3/library/compileall.html
     echo "starting python compilation test..."
-    COMPILE_REPORT=$( (python -B -mcompileall -q -l $1) 2>&1)
+    COMPILE_REPORT=$( ("$PYTHON_EXECUTABLE" -B -mcompileall -q -l -x "[/\\\\][.]git" $1) 2>&1)
     COMPILE_STATUS=$?
 
     if [ $COMPILE_STATUS -ne 0 ]; then
@@ -70,7 +72,7 @@ function pycodestyle_test {
     RUN_LOCAL_PYCODESTYLE="False"
     if hash "$PYCODESTYLE_BUILD" 2> /dev/null; then
         VERSION=$( $PYCODESTYLE_BUILD --version 2> /dev/null)
-        EXPECTED_PYCODESTYLE=$( (python -c 'from distutils.version import LooseVersion;
+        EXPECTED_PYCODESTYLE=$( ("$PYTHON_EXECUTABLE" -c 'from distutils.version import LooseVersion;
                                 print(LooseVersion("""'${VERSION[0]}'""") >= LooseVersion("""'$MINIMUM_PYCODESTYLE'"""))')\
                                 2> /dev/null)
 
@@ -96,7 +98,7 @@ function pycodestyle_test {
         fi
 
         echo "starting pycodestyle test..."
-        PYCODESTYLE_REPORT=$( (python "$PYCODESTYLE_SCRIPT_PATH" --config=dev/tox.ini $1) 2>&1)
+        PYCODESTYLE_REPORT=$( ("$PYTHON_EXECUTABLE" "$PYCODESTYLE_SCRIPT_PATH" --config=dev/tox.ini $1) 2>&1)
         PYCODESTYLE_STATUS=$?
     else
         # we have the right version installed, so run locally
@@ -130,7 +132,7 @@ function flake8_test {
 
     FLAKE8_VERSION="$($FLAKE8_BUILD --version  2> /dev/null)"
     VERSION=($FLAKE8_VERSION)
-    EXPECTED_FLAKE8=$( (python -c 'from distutils.version import LooseVersion;
+    EXPECTED_FLAKE8=$( ("$PYTHON_EXECUTABLE" -c 'from distutils.version import LooseVersion;
                        print(LooseVersion("""'${VERSION[0]}'""") >= LooseVersion("""'$MINIMUM_FLAKE8'"""))') \
                        2> /dev/null)
 
@@ -175,7 +177,7 @@ function pydocstyle_test {
     fi
 
     PYDOCSTYLE_VERSION="$($PYDOCSTYLEBUILD --version 2> /dev/null)"
-    EXPECTED_PYDOCSTYLE=$(python -c 'from distutils.version import LooseVersion; \
+    EXPECTED_PYDOCSTYLE=$("$PYTHON_EXECUTABLE" -c 'from distutils.version import LooseVersion; \
                              print(LooseVersion("""'$PYDOCSTYLE_VERSION'""") >= LooseVersion("""'$MINIMUM_PYDOCSTYLE'"""))' \
                              2> /dev/null)
 
diff --git a/dev/lint-r b/dev/lint-r
index bfda0bca15eb7..b08f5efecd5d3 100755
--- a/dev/lint-r
+++ b/dev/lint-r
@@ -17,6 +17,9 @@
 # limitations under the License.
 #
 
+set -o pipefail
+set -e
+
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
 LINT_R_REPORT_FILE_NAME="$SPARK_ROOT_DIR/dev/lint-r-report.log"
@@ -24,7 +27,7 @@ LINT_R_REPORT_FILE_NAME="$SPARK_ROOT_DIR/dev/lint-r-report.log"
 
 if ! type "Rscript" > /dev/null; then
   echo "ERROR: You should install R"
-  exit
+  exit 1
 fi
 
 `which Rscript` --vanilla "$SPARK_ROOT_DIR/dev/lint-r.R" "$SPARK_ROOT_DIR" | tee "$LINT_R_REPORT_FILE_NAME"
diff --git a/dev/lint-r.R b/dev/lint-r.R
index a4261d266bbc0..7e165319e316a 100644
--- a/dev/lint-r.R
+++ b/dev/lint-r.R
@@ -27,7 +27,7 @@ if (! library(SparkR, lib.loc = LOCAL_LIB_LOC, logical.return = TRUE)) {
 # Installs lintr from Github in a local directory.
 # NOTE: The CRAN's version is too old to adapt to our rules.
 if ("lintr" %in% row.names(installed.packages()) == FALSE) {
-  devtools::install_github("jimhester/lintr@5431140")
+  devtools::install_github("jimhester/lintr@v2.0.0")
 }
 
 library(lintr)
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index a550af93feecd..0b30eec76bb53 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -160,7 +160,7 @@ fi
 # Build uber fat JAR
 cd "$SPARK_HOME"
 
-export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:ReservedCodeCacheSize=512m}"
+export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:ReservedCodeCacheSize=1g}"
 
 # Store the command as an array because $MVN variable might have spaces in it.
 # Normal quoting tricks don't work.
@@ -233,7 +233,7 @@ if [ "$MAKE_PIP" == "true" ]; then
   pushd "$SPARK_HOME/python" > /dev/null
   # Delete the egg info file if it exists, this can cache older setup files.
   rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
-  python setup.py sdist
+  python3 setup.py sdist
   popd > /dev/null
 else
   echo "Skipping building python distribution package"
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index fa3d50b8989f1..967cdace60dc9 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -97,9 +97,9 @@ def fail(msg):
 def run_cmd(cmd):
     print(cmd)
     if isinstance(cmd, list):
-        return subprocess.check_output(cmd).decode(sys.getdefaultencoding())
+        return subprocess.check_output(cmd).decode('utf-8')
     else:
-        return subprocess.check_output(cmd.split(" ")).decode(sys.getdefaultencoding())
+        return subprocess.check_output(cmd.split(" ")).decode('utf-8')
 
 
 def continue_maybe(prompt):
diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py
index 4171f28684d59..e9f10233b12b7 100644
--- a/dev/pip-sanity-check.py
+++ b/dev/pip-sanity-check.py
@@ -15,8 +15,6 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
-
 from pyspark.sql import SparkSession
 from pyspark.mllib.linalg import *
 import sys
diff --git a/dev/requirements.txt b/dev/requirements.txt
index 3fdd3425ffcc2..baea9213dbc97 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -2,5 +2,4 @@ flake8==3.5.0
 jira==1.0.3
 PyGithub==1.26.0
 Unidecode==0.04.19
-pypandoc==1.3.3
 sphinx
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index 60cf4d8209416..470f21e69d46a 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -39,21 +39,16 @@ PYTHON_EXECS=()
 # Some systems don't have pip or virtualenv - in those cases our tests won't work.
 if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then
   echo "virtualenv installed - using. Note if this is a conda virtual env you may wish to set USE_CONDA"
-  # Figure out which Python execs we should test pip installation with
-  if hash python2 2>/dev/null; then
-    # We do this since we are testing with virtualenv and the default virtual env python
-    # is in /usr/bin/python
-    PYTHON_EXECS+=('python2')
-  elif hash python 2>/dev/null; then
-    # If python2 isn't installed fallback to python if available
-    PYTHON_EXECS+=('python')
-  fi
+  # test only against python3
   if hash python3 2>/dev/null; then
-    PYTHON_EXECS+=('python3')
+    PYTHON_EXECS=('python3')
+  else
+    echo "Python3 not installed on system, skipping pip installability tests"
+    exit 0
   fi
 elif hash conda 2>/dev/null; then
   echo "Using conda virtual environments"
-  PYTHON_EXECS=('3.5')
+  PYTHON_EXECS=('3.6')
   USE_CONDA=1
 else
   echo "Missing virtualenv & conda, skipping pip installability tests"
@@ -90,14 +85,14 @@ for python in "${PYTHON_EXECS[@]}"; do
     fi
     # Upgrade pip & friends if using virtual env
     if [ ! -n "$USE_CONDA" ]; then
-      pip install --upgrade pip pypandoc wheel numpy
+      pip install --upgrade pip wheel numpy
     fi
 
     echo "Creating pip installable source dist"
     cd "$FWDIR"/python
     # Delete the egg info file if it exists, this can cache the setup file.
     rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
-    python setup.py sdist
+    python3 setup.py sdist
 
 
     echo "Installing dist into virtual env"
@@ -117,9 +112,9 @@ for python in "${PYTHON_EXECS[@]}"; do
     echo "Run basic sanity check on pip installed version with spark-submit"
     spark-submit "$FWDIR"/dev/pip-sanity-check.py
     echo "Run basic sanity check with import based"
-    python "$FWDIR"/dev/pip-sanity-check.py
+    python3 "$FWDIR"/dev/pip-sanity-check.py
     echo "Run the tests for context.py"
-    python "$FWDIR"/python/pyspark/context.py
+    python3 "$FWDIR"/python/pyspark/context.py
 
     cd "$FWDIR"
 
diff --git a/dev/run-tests b/dev/run-tests
index 9cf93d000d0ea..143d78ec63731 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -20,10 +20,10 @@
 FWDIR="$(cd "`dirname $0`"/..; pwd)"
 cd "$FWDIR"
 
-PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (2, 7, 0))')
+PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))')
 if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then
-  echo "Python versions prior to 2.7 are not supported."
+  echo "Python versions prior to 3.6 are not supported."
   exit -1
 fi
 
-exec python -u ./dev/run-tests.py "$@"
+exec python3 -u ./dev/run-tests.py "$@"
diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins
index 5bc03e41d1f2d..c3adc696a5122 100755
--- a/dev/run-tests-jenkins
+++ b/dev/run-tests-jenkins
@@ -25,10 +25,12 @@
 FWDIR="$( cd "$( dirname "$0" )/.." && pwd )"
 cd "$FWDIR"
 
-PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (2, 7, 0))')
+export PATH=/home/anaconda/envs/py36/bin:$PATH
+
+PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))')
 if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then
-  echo "Python versions prior to 2.7 are not supported."
+  echo "Python versions prior to 3.6 are not supported."
   exit -1
 fi
 
-exec python -u ./dev/run-tests-jenkins.py "$@"
+exec python3 -u ./dev/run-tests-jenkins.py "$@"
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index e9b0b327603be..72e32d4e16e14 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,7 +17,6 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
 import os
 import sys
 import json
@@ -177,12 +176,15 @@ def main():
     if "test-maven" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven"
     # Switch the Hadoop profile based on the PR title:
-    if "test-hadoop2.6" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.6"
     if "test-hadoop2.7" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.7"
     if "test-hadoop3.2" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2"
+    # Switch the Hive profile based on the PR title:
+    if "test-hive1.2" in ghprb_pull_title:
+        os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive1.2"
+    if "test-hive2.3" in ghprb_pull_title:
+        os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive2.3"
 
     build_display_name = os.environ["BUILD_DISPLAY_NAME"]
     build_url = os.environ["BUILD_URL"]
diff --git a/dev/run-tests.py b/dev/run-tests.py
index ea515708124db..5255a77ec2081 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,7 +17,6 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
 import itertools
 from argparse import ArgumentParser
 import os
@@ -44,15 +43,20 @@ def determine_modules_for_files(filenames):
     """
     Given a list of filenames, return the set of modules that contain those files.
     If a file is not associated with a more specific submodule, then this method will consider that
-    file to belong to the 'root' module.
+    file to belong to the 'root' module. GitHub Action and Appveyor files are ignored.
 
     >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"]))
     ['pyspark-core', 'sql']
     >>> [x.name for x in determine_modules_for_files(["file_not_matched_by_any_subproject"])]
     ['root']
+    >>> [x.name for x in determine_modules_for_files( \
+            [".github/workflows/master.yml", "appveyor.yml"])]
+    []
     """
     changed_modules = set()
     for filename in filenames:
+        if filename in (".github/workflows/master.yml", "appveyor.yml"):
+            continue
         matched_at_least_one_module = False
         for module in modules.all_modules:
             if module.contains_file(filename):
@@ -175,7 +179,8 @@ def run_apache_rat_checks():
     run_cmd([os.path.join(SPARK_HOME, "dev", "check-license")])
 
 
-def run_scala_style_checks(build_profiles):
+def run_scala_style_checks(extra_profiles):
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     set_title_and_block("Running Scala style checks", "BLOCK_SCALA_STYLE")
     profiles = " ".join(build_profiles)
     print("[info] Checking Scala style using SBT with these profiles: ", profiles)
@@ -265,7 +270,7 @@ def exec_sbt(sbt_args=()):
     echo_proc.wait()
     for line in iter(sbt_proc.stdout.readline, b''):
         if not sbt_output_filter.match(line):
-            print(line, end='')
+            print(line.decode('utf-8'), end='')
     retcode = sbt_proc.wait()
 
     if retcode != 0:
@@ -291,9 +296,28 @@ def get_hadoop_profiles(hadoop_version):
         sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
 
 
-def build_spark_maven(hadoop_version):
+def get_hive_profiles(hive_version):
+    """
+    For the given Hive version tag, return a list of Maven/SBT profile flags for
+    building and testing against that Hive version.
+    """
+
+    sbt_maven_hive_profiles = {
+        "hive1.2": ["-Phive-1.2"],
+        "hive2.3": ["-Phive-2.3"],
+    }
+
+    if hive_version in sbt_maven_hive_profiles:
+        return sbt_maven_hive_profiles[hive_version]
+    else:
+        print("[error] Could not find", hive_version, "in the list. Valid options",
+              " are", sbt_maven_hive_profiles.keys())
+        sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
+
+
+def build_spark_maven(extra_profiles):
     # Enable all of the profiles for the build:
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     mvn_goals = ["clean", "package", "-DskipTests"]
     profiles_and_goals = build_profiles + mvn_goals
 
@@ -302,9 +326,9 @@ def build_spark_maven(hadoop_version):
     exec_maven(profiles_and_goals)
 
 
-def build_spark_sbt(hadoop_version):
+def build_spark_sbt(extra_profiles):
     # Enable all of the profiles for the build:
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     sbt_goals = ["test:package",  # Build test jars as some tests depend on them
                  "streaming-kinesis-asl-assembly/assembly"]
     profiles_and_goals = build_profiles + sbt_goals
@@ -314,10 +338,10 @@ def build_spark_sbt(hadoop_version):
     exec_sbt(profiles_and_goals)
 
 
-def build_spark_unidoc_sbt(hadoop_version):
+def build_spark_unidoc_sbt(extra_profiles):
     set_title_and_block("Building Unidoc API Documentation", "BLOCK_DOCUMENTATION")
     # Enable all of the profiles for the build:
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     sbt_goals = ["unidoc"]
     profiles_and_goals = build_profiles + sbt_goals
 
@@ -327,9 +351,9 @@ def build_spark_unidoc_sbt(hadoop_version):
     exec_sbt(profiles_and_goals)
 
 
-def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
+def build_spark_assembly_sbt(extra_profiles, checkstyle=False):
     # Enable all of the profiles for the build:
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     sbt_goals = ["assembly/package"]
     profiles_and_goals = build_profiles + sbt_goals
     print("[info] Building Spark assembly using SBT with these arguments: ",
@@ -339,25 +363,25 @@ def build_spark_assembly_sbt(hadoop_version, checkstyle=False):
     if checkstyle:
         run_java_style_checks(build_profiles)
 
-    build_spark_unidoc_sbt(hadoop_version)
+    build_spark_unidoc_sbt(extra_profiles)
 
 
-def build_apache_spark(build_tool, hadoop_version):
-    """Will build Spark against Hive v1.2.1 given the passed in build tool (either `sbt` or
-    `maven`). Defaults to using `sbt`."""
+def build_apache_spark(build_tool, extra_profiles):
+    """Will build Spark with the extra profiles and the passed in build tool
+    (either `sbt` or `maven`). Defaults to using `sbt`."""
 
     set_title_and_block("Building Spark", "BLOCK_BUILD")
 
     rm_r("lib_managed")
 
     if build_tool == "maven":
-        build_spark_maven(hadoop_version)
+        build_spark_maven(extra_profiles)
     else:
-        build_spark_sbt(hadoop_version)
+        build_spark_sbt(extra_profiles)
 
 
-def detect_binary_inop_with_mima(hadoop_version):
-    build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
+def detect_binary_inop_with_mima(extra_profiles):
+    build_profiles = extra_profiles + modules.root.build_profile_flags
     set_title_and_block("Detecting binary incompatibilities with MiMa", "BLOCK_MIMA")
     profiles = " ".join(build_profiles)
     print("[info] Detecting binary incompatibilities with MiMa using SBT with these profiles: ",
@@ -391,14 +415,14 @@ def run_scala_tests_sbt(test_modules, test_profiles):
     exec_sbt(profiles_and_goals)
 
 
-def run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags):
+def run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags):
     """Function to properly execute all tests passed in as a set from the
     `determine_test_suites` function"""
     set_title_and_block("Running Spark unit tests", "BLOCK_SPARK_UNIT_TESTS")
 
     test_modules = set(test_modules)
 
-    test_profiles = get_hadoop_profiles(hadoop_version) + \
+    test_profiles = extra_profiles + \
         list(set(itertools.chain.from_iterable(m.build_profile_flags for m in test_modules)))
 
     if excluded_tags:
@@ -551,6 +575,7 @@ def main():
         # to reflect the environment settings
         build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt")
         hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop2.7")
+        hive_version = os.environ.get("AMPLAB_JENKINS_BUILD_HIVE_PROFILE", "hive2.3")
         test_env = "amplab_jenkins"
         # add path for Python3 in Jenkins if we're calling from a Jenkins machine
         # TODO(sknapp):  after all builds are ported to the ubuntu workers, change this to be:
@@ -560,10 +585,12 @@ def main():
         # else we're running locally and can use local settings
         build_tool = "sbt"
         hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
+        hive_version = os.environ.get("HIVE_PROFILE", "hive2.3")
         test_env = "local"
 
     print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version,
-          "under environment", test_env)
+          "and Hive profile", hive_version, "under environment", test_env)
+    extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version)
 
     changed_modules = None
     changed_files = None
@@ -597,8 +624,7 @@ def main():
     if not changed_files or any(f.endswith(".scala")
                                 or f.endswith("scalastyle-config.xml")
                                 for f in changed_files):
-        build_profiles = get_hadoop_profiles(hadoop_version) + modules.root.build_profile_flags
-        run_scala_style_checks(build_profiles)
+        run_scala_style_checks(extra_profiles)
     should_run_java_style_checks = False
     if not changed_files or any(f.endswith(".java")
                                 or f.endswith("checkstyle.xml")
@@ -626,18 +652,18 @@ def main():
         run_build_tests()
 
     # spark build
-    build_apache_spark(build_tool, hadoop_version)
+    build_apache_spark(build_tool, extra_profiles)
 
     # backwards compatibility checks
     if build_tool == "sbt":
         # Note: compatibility tests only supported in sbt for now
-        detect_binary_inop_with_mima(hadoop_version)
+        detect_binary_inop_with_mima(extra_profiles)
         # Since we did not build assembly/package before running dev/mima, we need to
         # do it here because the tests still rely on it; see SPARK-13294 for details.
-        build_spark_assembly_sbt(hadoop_version, should_run_java_style_checks)
+        build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
 
     # run the test suites
-    run_scala_tests(build_tool, hadoop_version, test_modules, excluded_tags)
+    run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags)
 
     modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
     if modules_with_python_tests:
diff --git a/dev/scalafmt b/dev/scalafmt
index 76f688a2f5b88..3f69bc98f51c7 100755
--- a/dev/scalafmt
+++ b/dev/scalafmt
@@ -17,7 +17,6 @@
 # limitations under the License.
 #
 
-# by default, format only files that differ from git master
-params="${@:---diff}"
+VERSION="${@:-2.12}"
+./build/mvn -Pscala-$VERSION mvn-scalafmt_$VERSION:format -Dscalafmt.skip=false
 
-./build/mvn mvn-scalafmt_2.12:format -Dscalafmt.skip=false -Dscalafmt.parameters="$params"
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 0f6dbf2f99a97..391e4bbe1b1f0 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -329,6 +329,7 @@ def __hash__(self):
         "pyspark.tests.test_join",
         "pyspark.tests.test_profiler",
         "pyspark.tests.test_rdd",
+        "pyspark.tests.test_rddbarrier",
         "pyspark.tests.test_readwrite",
         "pyspark.tests.test_serializers",
         "pyspark.tests.test_shuffle",
@@ -361,8 +362,14 @@ def __hash__(self):
         "pyspark.sql.udf",
         "pyspark.sql.window",
         "pyspark.sql.avro.functions",
+        "pyspark.sql.pandas.conversion",
+        "pyspark.sql.pandas.map_ops",
+        "pyspark.sql.pandas.group_ops",
+        "pyspark.sql.pandas.types",
+        "pyspark.sql.pandas.serializers",
+        "pyspark.sql.pandas.typehints",
+        "pyspark.sql.pandas.utils",
         # unittests
-        "pyspark.sql.tests.test_appsubmit",
         "pyspark.sql.tests.test_arrow",
         "pyspark.sql.tests.test_catalog",
         "pyspark.sql.tests.test_column",
@@ -372,10 +379,13 @@ def __hash__(self):
         "pyspark.sql.tests.test_datasources",
         "pyspark.sql.tests.test_functions",
         "pyspark.sql.tests.test_group",
+        "pyspark.sql.tests.test_pandas_cogrouped_map",
+        "pyspark.sql.tests.test_pandas_grouped_map",
+        "pyspark.sql.tests.test_pandas_map",
         "pyspark.sql.tests.test_pandas_udf",
         "pyspark.sql.tests.test_pandas_udf_grouped_agg",
-        "pyspark.sql.tests.test_pandas_udf_grouped_map",
         "pyspark.sql.tests.test_pandas_udf_scalar",
+        "pyspark.sql.tests.test_pandas_udf_typehints",
         "pyspark.sql.tests.test_pandas_udf_window",
         "pyspark.sql.tests.test_readwriter",
         "pyspark.sql.tests.test_serde",
@@ -459,6 +469,7 @@ def __hash__(self):
         "pyspark.ml.evaluation",
         "pyspark.ml.feature",
         "pyspark.ml.fpm",
+        "pyspark.ml.functions",
         "pyspark.ml.image",
         "pyspark.ml.linalg.__init__",
         "pyspark.ml.recommendation",
diff --git a/dev/sparktestsupport/shellutils.py b/dev/sparktestsupport/shellutils.py
index ec6ea86269f5e..d9cb8aa45c8d2 100644
--- a/dev/sparktestsupport/shellutils.py
+++ b/dev/sparktestsupport/shellutils.py
@@ -15,14 +15,12 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
 import os
 import shutil
 import subprocess
 import sys
 
 subprocess_check_output = subprocess.check_output
-subprocess_check_call = subprocess.check_call
 
 
 def exit_from_command_with_retcode(cmd, retcode):
@@ -55,9 +53,9 @@ def run_cmd(cmd, return_output=False):
         cmd = cmd.split()
     try:
         if return_output:
-            return subprocess_check_output(cmd).decode(sys.getdefaultencoding())
+            return subprocess_check_output(cmd).decode('utf-8')
         else:
-            return subprocess_check_call(cmd)
+            return subprocess.run(cmd, universal_newlines=True, check=True)
     except subprocess.CalledProcessError as e:
         exit_from_command_with_retcode(e.cmd, e.returncode)
 
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 54574f6097e26..936ac00f6b9e7 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -31,9 +31,10 @@ export LC_ALL=C
 # NOTE: These should match those in the release publishing script
 HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pkubernetes -Pyarn -Phive"
 MVN="build/mvn"
-HADOOP_PROFILES=(
-    hadoop-2.7
-    hadoop-3.2
+HADOOP_HIVE_PROFILES=(
+    hadoop-2.7-hive-1.2
+    hadoop-2.7-hive-2.3
+    hadoop-3.2-hive-2.3
 )
 
 # We'll switch the version to a temp. one, publish POMs using that new version, then switch back to
@@ -66,19 +67,45 @@ trap reset_version EXIT
 $MVN -q versions:set -DnewVersion=$TEMP_VERSION -DgenerateBackupPoms=false > /dev/null
 
 # Generate manifests for each Hadoop profile:
-for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
-  echo "Performing Maven install for $HADOOP_PROFILE"
-  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar jar:test-jar install:install clean -q
+for HADOOP_HIVE_PROFILE in "${HADOOP_HIVE_PROFILES[@]}"; do
+  if [[ $HADOOP_HIVE_PROFILE == **hadoop-3.2-hive-2.3** ]]; then
+    HADOOP_PROFILE=hadoop-3.2
+    HIVE_PROFILE=hive-2.3
+  elif [[ $HADOOP_HIVE_PROFILE == **hadoop-2.7-hive-2.3** ]]; then
+    HADOOP_PROFILE=hadoop-2.7
+    HIVE_PROFILE=hive-2.3
+  else
+    HADOOP_PROFILE=hadoop-2.7
+    HIVE_PROFILE=hive-1.2
+  fi
+  echo "Performing Maven install for $HADOOP_HIVE_PROFILE"
+  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE -P$HIVE_PROFILE jar:jar jar:test-jar install:install clean -q
 
-  echo "Performing Maven validate for $HADOOP_PROFILE"
-  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE validate -q
+  echo "Performing Maven validate for $HADOOP_HIVE_PROFILE"
+  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE -P$HIVE_PROFILE validate -q
 
-  echo "Generating dependency manifest for $HADOOP_PROFILE"
+  echo "Generating dependency manifest for $HADOOP_HIVE_PROFILE"
   mkdir -p dev/pr-deps
-  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE dependency:build-classpath -pl assembly \
+  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE -P$HIVE_PROFILE dependency:build-classpath -pl assembly -am \
     | grep "Dependencies classpath:" -A 1 \
-    | tail -n 1 | tr ":" "\n" | rev | cut -d "/" -f 1 | rev | sort \
-    | grep -v spark > dev/pr-deps/spark-deps-$HADOOP_PROFILE
+    | tail -n 1 | tr ":" "\n" | awk -F '/' '{
+      # For each dependency classpath, we fetch the last three parts split by "/": artifact id, version, and jar name.
+      # Since classifier, if exists, always sits between "artifact_id-version-" and ".jar" suffix in the jar name,
+      # we extract classifier and put it right before the jar name explicitly.
+      # For example, `orc-core/1.5.5/nohive/orc-core-1.5.5-nohive.jar`
+      #                              ^^^^^^
+      #                              extracted classifier
+      #               `okio/1.15.0//okio-1.15.0.jar`
+      #                           ^
+      #                           empty for dependencies without classifier
+      artifact_id=$(NF-2);
+      version=$(NF-1);
+      jar_name=$NF;
+      classifier_start_index=length(artifact_id"-"version"-") + 1;
+      classifier_end_index=index(jar_name, ".jar") - 1;
+      classifier=substr(jar_name, classifier_start_index, classifier_end_index - classifier_start_index + 1);
+      print artifact_id"/"version"/"classifier"/"jar_name
+    }' | sort | grep -v spark > dev/pr-deps/spark-deps-$HADOOP_HIVE_PROFILE
 done
 
 if [[ $@ == **replace-manifest** ]]; then
@@ -88,13 +115,13 @@ if [[ $@ == **replace-manifest** ]]; then
   exit 0
 fi
 
-for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
+for HADOOP_HIVE_PROFILE in "${HADOOP_HIVE_PROFILES[@]}"; do
   set +e
   dep_diff="$(
     git diff \
     --no-index \
-    dev/deps/spark-deps-$HADOOP_PROFILE \
-    dev/pr-deps/spark-deps-$HADOOP_PROFILE \
+    dev/deps/spark-deps-$HADOOP_HIVE_PROFILE \
+    dev/pr-deps/spark-deps-$HADOOP_HIVE_PROFILE \
   )"
   set -e
   if [ "$dep_diff" != "" ]; then
diff --git a/dev/tox.ini b/dev/tox.ini
index 11b1b040035b0..54f65692c8303 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -16,6 +16,6 @@
 [pycodestyle]
 ignore=E226,E241,E305,E402,E722,E731,E741,W503,W504
 max-line-length=100
-exclude=cloudpickle.py,heapq3.py,shared.py,python/docs/conf.py,work/*/*.py,python/.eggs/*,dist/*
+exclude=cloudpickle.py,heapq3.py,shared.py,python/docs/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*
 [pydocstyle]
 ignore=D100,D101,D102,D103,D104,D105,D106,D107,D200,D201,D202,D203,D204,D205,D206,D207,D208,D209,D210,D211,D212,D213,D214,D215,D300,D301,D302,D400,D401,D402,D403,D404,D405,D406,D407,D408,D409,D410,D411,D412,D413,D414
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 0000000000000..2260493b46ab3
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1 @@
+sql-configs.html
diff --git a/docs/README.md b/docs/README.md
index da531321aa5da..22039871cf63d 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -6,9 +6,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -36,19 +36,31 @@ You need to have [Ruby](https://www.ruby-lang.org/en/documentation/installation/
 installed. Also install the following libraries:
 
 ```sh
-$ sudo gem install jekyll jekyll-redirect-from pygments.rb
-$ sudo pip install Pygments
-# Following is needed only for generating API docs
-$ sudo pip install sphinx pypandoc mkdocs
-$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "rmarkdown"), repos="https://cloud.r-project.org/")'
-$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "5.0.1", repos="https://cloud.r-project.org/")'
-$ sudo Rscript -e 'devtools::install_version("testthat", version = "1.0.2", repos="https://cloud.r-project.org/")'
+$ sudo gem install jekyll jekyll-redirect-from rouge
 ```
 
 Note: If you are on a system with both Ruby 1.9 and Ruby 2.0 you may need to replace gem with gem2.0.
 
+### R Documentation
+
+If you'd like to generate R documentation, you'll need to [install Pandoc](https://pandoc.org/installing.html)
+and install these libraries:
+
+```sh
+$ sudo Rscript -e 'install.packages(c("knitr", "devtools", "testthat", "rmarkdown"), repos="https://cloud.r-project.org/")'
+$ sudo Rscript -e 'devtools::install_version("roxygen2", version = "5.0.1", repos="https://cloud.r-project.org/")'
+```
+
 Note: Other versions of roxygen2 might work in SparkR documentation generation but `RoxygenNote` field in `$SPARK_HOME/R/pkg/DESCRIPTION` is 5.0.1, which is updated if the version is mismatched.
 
+### API Documentation
+
+To generate API docs for any language, you'll need to install these libraries:
+
+```sh
+$ sudo pip install sphinx mkdocs numpy
+```
+
 ## Generating the Documentation HTML
 
 We include the Spark documentation as part of the source (as opposed to using a hosted wiki, such as
@@ -103,3 +115,17 @@ using [MkDocs](https://www.mkdocs.org/).
 NOTE: To skip the step of building and copying over the Scala, Java, Python, R and SQL API docs, run `SKIP_API=1
 jekyll build`. In addition, `SKIP_SCALADOC=1`, `SKIP_PYTHONDOC=1`, `SKIP_RDOC=1` and `SKIP_SQLDOC=1` can be used
 to skip a single step of the corresponding language. `SKIP_SCALADOC` indicates skipping both the Scala and Java docs.
+
+### Automatically Rebuilding API Docs
+
+`jekyll serve --watch` will only watch what's in `docs/`, and it won't follow symlinks. That means it won't monitor your API docs under `python/docs` or elsewhere.
+
+To work around this limitation for Python, install [`entr`](http://eradman.com/entrproject/) and run the following in a separate shell:
+
+```sh
+cd "$SPARK_HOME/python/docs"
+find .. -type f -name '*.py' \
+| entr -s 'make html && cp -r _build/html/. ../../docs/api/python'
+```
+
+Whenever there is a change to your Python code, `entr` will automatically rebuild the Python API docs and copy them to `docs/`, thus triggering a Jekyll update.
diff --git a/docs/_config.yml b/docs/_config.yml
index 146c90fcff6e5..a888620139207 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -1,4 +1,4 @@
-highlighter: pygments
+highlighter: rouge
 markdown: kramdown
 gems:
   - jekyll-redirect-from
@@ -17,7 +17,7 @@ include:
 SPARK_VERSION: 3.0.0-SNAPSHOT
 SPARK_VERSION_SHORT: 3.0.0
 SCALA_BINARY_VERSION: "2.12"
-SCALA_VERSION: "2.12.8"
+SCALA_VERSION: "2.12.10"
 MESOS_VERSION: 1.0.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
diff --git a/docs/_data/menu-migration.yaml b/docs/_data/menu-migration.yaml
new file mode 100644
index 0000000000000..1d8b311dd64fb
--- /dev/null
+++ b/docs/_data/menu-migration.yaml
@@ -0,0 +1,12 @@
+- text: Spark Core
+  url: core-migration-guide.html
+- text: SQL, Datasets and DataFrame
+  url: sql-migration-guide.html
+- text: Structured Streaming
+  url: ss-migration-guide.html
+- text: MLlib (Machine Learning)
+  url: ml-migration-guide.html
+- text: PySpark (Python on Spark)
+  url: pyspark-migration-guide.html
+- text: SparkR (R on Spark)
+  url: sparkr-migration-guide.html
diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index 717911b5a4645..38a5cf61245a6 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -15,6 +15,8 @@
       url: sql-getting-started.html#creating-datasets
     - text: Interoperating with RDDs
       url: sql-getting-started.html#interoperating-with-rdds
+    - text: Scalar Functions
+      url: sql-getting-started.html#scalar-functions
     - text: Aggregations
       url: sql-getting-started.html#aggregations
 - text: Data Sources
@@ -22,6 +24,8 @@
   subitems:
     - text: "Generic Load/Save Functions"
       url: sql-data-sources-load-save-functions.html
+    - text: "Generic File Source Options"
+      url: sql-data-sources-generic-options.html
     - text: Parquet Files
       url: sql-data-sources-parquet.html
     - text: ORC Files
@@ -34,6 +38,8 @@
       url: sql-data-sources-jdbc.html
     - text: Avro Files
       url: sql-data-sources-avro.html
+    - text: Whole Binary Files
+      url: sql-data-sources-binaryFile.html
     - text: Troubleshooting
       url: sql-data-sources-troubleshooting.html
 - text: Performance Tuning
@@ -43,8 +49,8 @@
       url: sql-performance-tuning.html#caching-data-in-memory
     - text: Other Configuration Options
       url: sql-performance-tuning.html#other-configuration-options
-    - text: Broadcast Hint for SQL Queries
-      url: sql-performance-tuning.html#broadcast-hint-for-sql-queries
+    - text: Join Strategy Hints for SQL Queries
+      url: sql-performance-tuning.html#join-strategy-hints-for-sql-queries
 - text: Distributed SQL Engine
   url: sql-distributed-sql-engine.html
   subitems:
@@ -64,22 +70,25 @@
     - text: Usage Notes
       url: sql-pyspark-pandas-with-arrow.html#usage-notes
 - text: Migration Guide
-  url: sql-migration-guide.html
-  subitems:
-    - text: Spark SQL Upgrading Guide
-      url: sql-migration-guide-upgrade.html
-    - text: Compatibility with Apache Hive
-      url: sql-migration-guide-hive-compatibility.html
-    - text: SQL Reserved/Non-Reserved Keywords
-      url: sql-reserved-and-non-reserved-keywords.html
-
+  url: sql-migration-old.html
 - text: SQL Reference
   url: sql-ref.html
   subitems:
     - text: Data Types
       url: sql-ref-datatypes.html
+    - text: Null Semantics
+      url: sql-ref-null-semantics.html
     - text: NaN Semantics
       url: sql-ref-nan-semantics.html
+    - text: ANSI Compliance
+      url: sql-ref-ansi-compliance.html
+      subitems:
+        - text: Arithmetic Operations
+          url: sql-ref-ansi-compliance.html#arithmetic-operations
+        - text: Type Conversion
+          url: sql-ref-ansi-compliance.html#type-conversion
+        - text: SQL Keywords
+          url: sql-ref-ansi-compliance.html#sql-keywords
     - text: SQL Syntax
       url: sql-ref-syntax.html
       subitems:
@@ -125,43 +134,35 @@
             - text: SELECT 
               url: sql-ref-syntax-qry-select.html 
               subitems:
-                - text: DISTINCT Clause
-                  url: sql-ref-syntax-qry-select-distinct.html
-                - text: Joins
-                  url: sql-ref-syntax-qry-select-join.html
-                - text: ORDER BY Clause 
-                  url: sql-ref-syntax-qry-select-orderby.html
+                - text: WHERE Clause 
+                  url: sql-ref-syntax-qry-select-where.html
                 - text: GROUP BY Clause 
                   url: sql-ref-syntax-qry-select-groupby.html
                 - text: HAVING Clause 
                   url: sql-ref-syntax-qry-select-having.html
+                - text: ORDER BY Clause 
+                  url: sql-ref-syntax-qry-select-orderby.html
+                - text: SORT BY Clause 
+                  url: sql-ref-syntax-qry-select-sortby.html
+                - text: CLUSTER BY Clause 
+                  url: sql-ref-syntax-qry-select-clusterby.html
+                - text: DISTRIBUTE BY Clause 
+                  url: sql-ref-syntax-qry-select-distribute-by.html
                 - text: LIMIT Clause 
                   url: sql-ref-syntax-qry-select-limit.html
-                - text: Set operations
-                  url: sql-ref-syntax-qry-select-setops.html
-                - text: Common Table Expression(CTE)
-                  url: sql-ref-syntax-qry-select-cte.html
-                - text: Subqueries
-                  url: sql-ref-syntax-qry-select-subqueries.html
-                - text: Query hints
-                  url: sql-ref-syntax-qry-select-hints.html
-            - text: SAMPLING
-              url: sql-ref-syntax-qry-sampling.html
-            - text: WINDOWING ANALYTIC FUNCTIONS 
-              url: sql-ref-syntax-qry-window.html
-            - text: AGGREGATION (CUBE/ROLLUP/GROUPING)
-              url: sql-ref-syntax-qry-aggregation.html
+                - text: USE database
+                  url: sql-ref-syntax-qry-select-usedb.html
             - text: EXPLAIN
               url: sql-ref-syntax-qry-explain.html
-        - text: Auxilarry Statements
+        - text: Auxiliary Statements
           url: sql-ref-syntax-aux.html
           subitems:
-            - text: Analyze statement
+            - text: ANALYZE
               url: sql-ref-syntax-aux-analyze.html
               subitems: 
                 - text: ANALYZE TABLE
                   url: sql-ref-syntax-aux-analyze-table.html
-            - text: Caching statements 
+            - text: CACHE
               url: sql-ref-syntax-aux-cache.html
               subitems:
                 - text: CACHE TABLE
@@ -170,7 +171,11 @@
                   url: sql-ref-syntax-aux-cache-uncache-table.html
                 - text: CLEAR CACHE
                   url: sql-ref-syntax-aux-cache-clear-cache.html
-            - text: Describe Commands
+                - text: REFRESH TABLE
+                  url: sql-ref-syntax-aux-refresh-table.html
+                - text: REFRESH
+                  url: sql-ref-syntax-aux-cache-refresh.md
+            - text: DESCRIBE
               url: sql-ref-syntax-aux-describe.html
               subitems:
                 - text: DESCRIBE DATABASE
@@ -181,7 +186,7 @@
                   url: sql-ref-syntax-aux-describe-function.html
                 - text: DESCRIBE QUERY
                   url: sql-ref-syntax-aux-describe-query.html
-            - text: Show commands
+            - text: SHOW
               url: sql-ref-syntax-aux-show.html
               subitems:
                 - text: SHOW COLUMNS 
@@ -200,36 +205,21 @@
                   url: sql-ref-syntax-aux-show-partitions.html
                 - text: SHOW CREATE TABLE
                   url: sql-ref-syntax-aux-show-create-table.html
-            - text: Configuration Management Commands
+            - text: CONFIGURATION MANAGEMENT
               url: sql-ref-syntax-aux-conf-mgmt.html
               subitems:
                 - text: SET 
                   url: sql-ref-syntax-aux-conf-mgmt-set.html
                 - text: RESET
                   url: sql-ref-syntax-aux-conf-mgmt-reset.html
-            - text: Resource Management Commands
+            - text: RESOURCE MANAGEMENT
               url: sql-ref-syntax-aux-resource-mgmt.html
               subitems:
                 - text: ADD FILE
                   url: sql-ref-syntax-aux-resource-mgmt-add-file.html
                 - text: ADD JAR
                   url: sql-ref-syntax-aux-resource-mgmt-add-jar.html
-    - text: Functions
-      url: sql-ref-functions.html
-      subitems:
-        - text: Builtin Functions
-          url: sql-ref-functions-builtin.html
-          subitems:
-            - text: Scalar functions
-              url: sql-ref-functions-builtin-scalar.html
-            - text: Aggregate functions
-              url: sql-ref-functions-builtin-aggregate.html
-        - text: User defined Functions
-          url: sql-ref-functions-udf.html
-          subitems:
-            - text: Scalar functions
-              url: sql-ref-functions-udf-scalar.html
-            - text: Aggregate functions
-              url: sql-ref-functions-udf-aggregate.html
-    - text: Arthmetic operations
-      url: sql-ref-arithmetic-ops.html
+                - text: LIST FILE
+                  url: sql-ref-syntax-aux-resource-mgmt-list-file.html
+                - text: LIST JAR
+                  url: sql-ref-syntax-aux-resource-mgmt-list-jar.html
diff --git a/docs/_includes/nav-left-wrapper-migration.html b/docs/_includes/nav-left-wrapper-migration.html
new file mode 100644
index 0000000000000..4318a324a9475
--- /dev/null
+++ b/docs/_includes/nav-left-wrapper-migration.html
@@ -0,0 +1,6 @@
+<div class="left-menu-wrapper">
+    <div class="left-menu">
+        <h3><a href="migration-guide.html">Migration Guide</a></h3>
+        {% include nav-left.html nav=include.nav-migration %}
+    </div>
+</div>
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 8ea15dc71d541..d05ac6bbe129d 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -82,7 +82,7 @@
                         <li class="dropdown">
                             <a href="#" class="dropdown-toggle" data-toggle="dropdown">API Docs<b class="caret"></b></a>
                             <ul class="dropdown-menu">
-                                <li><a href="api/scala/index.html#org.apache.spark.package">Scala</a></li>
+                                <li><a href="api/scala/org/apache/spark/index.html">Scala</a></li>
                                 <li><a href="api/java/index.html">Java</a></li>
                                 <li><a href="api/python/index.html">Python</a></li>
                                 <li><a href="api/R/index.html">R</a></li>
@@ -112,6 +112,7 @@
                                 <li><a href="job-scheduling.html">Job Scheduling</a></li>
                                 <li><a href="security.html">Security</a></li>
                                 <li><a href="hardware-provisioning.html">Hardware Provisioning</a></li>
+                                <li><a href="migration-guide.html">Migration Guide</a></li>
                                 <li class="divider"></li>
                                 <li><a href="building-spark.html">Building Spark</a></li>
                                 <li><a href="https://spark.apache.org/contributing.html">Contributing to Spark</a></li>
@@ -126,8 +127,10 @@
 
         <div class="container-wrapper">
 
-            {% if page.url contains "/ml" or page.url contains "/sql" %}
-                {% if page.url contains "/ml" %}
+            {% if page.url contains "/ml" or page.url contains "/sql" or page.url contains "migration-guide.html" %}
+                {% if page.url contains "migration-guide.html" %}
+                    {% include nav-left-wrapper-migration.html nav-migration=site.data.menu-migration %}
+                {% elsif page.url contains "/ml" %}
                     {% include nav-left-wrapper-ml.html nav-mllib=site.data.menu-mllib nav-ml=site.data.menu-ml %}
                 {% else %}
                     {% include nav-left-wrapper-sql.html nav-sql=site.data.menu-sql %}
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 2d1a9547e3731..f95e4e2f97792 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -96,9 +96,9 @@
     end
     # End updating JavaDoc files for badge post-processing
 
-    puts "Copying jquery.js from Scala API to Java API for page post-processing of badges"
-    jquery_src_file = "./api/scala/lib/jquery.js"
-    jquery_dest_file = "./api/java/lib/jquery.js"
+    puts "Copying jquery.min.js from Scala API to Java API for page post-processing of badges"
+    jquery_src_file = "./api/scala/lib/jquery.min.js"
+    jquery_dest_file = "./api/java/lib/jquery.min.js"
     mkdir_p("./api/java/lib")
     cp(jquery_src_file, jquery_dest_file)
 
diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb
index 1e91f12518e0b..6b4b1c652a81b 100644
--- a/docs/_plugins/include_example.rb
+++ b/docs/_plugins/include_example.rb
@@ -16,7 +16,7 @@
 #
 
 require 'liquid'
-require 'pygments'
+require 'rouge'
 
 module Jekyll
   class IncludeExampleTag < Liquid::Tag
@@ -54,9 +54,11 @@ def render(context)
         puts(e.backtrace)
         exit 1
       end
-      code = select_lines(code)
+      code = select_lines(code).strip
 
-      rendered_code = Pygments.highlight(code, :lexer => @lang)
+      formatter = Rouge::Formatters::HTMLPygments.new(Rouge::Formatters::HTML.new)
+      lexer = Rouge::Lexer.find(@lang)
+      rendered_code = formatter.format(lexer.lex(code))
 
       hint = "<div><small>Find full example code at " \
         "\"examples/src/main/#{snippet_file}\" in the Spark repo.</small></div>"
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 37f898645da68..77ab7900dc4a2 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -27,14 +27,14 @@ license: |
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.6.2 and Java 8.
+Building Spark using Maven requires Maven 3.6.3 and Java 8.
 Spark requires Scala 2.12; support for Scala 2.11 was removed in Spark 3.0.0.
 
 ### Setting up Maven's Memory Usage
 
 You'll need to configure Maven to use more memory than usual by setting `MAVEN_OPTS`:
 
-    export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
+    export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
 
 (The `ReservedCodeCacheSize` setting is optional but recommended.)
 If you don't add these parameters to `MAVEN_OPTS`, you may see errors and warnings like the following:
@@ -70,9 +70,9 @@ This will build Spark distribution along with Python pip and R packages. For mor
 
 ## Specifying the Hadoop Version and Enabling YARN
 
-You can specify the exact version of Hadoop to compile against through the `hadoop.version` property. 
+You can specify the exact version of Hadoop to compile against through the `hadoop.version` property.
 
-You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different 
+You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different
 from `hadoop.version`.
 
 Example:
@@ -82,14 +82,11 @@ Example:
 ## Building With Hive and JDBC Support
 
 To enable Hive integration for Spark SQL along with its JDBC server and CLI,
-add the `-Phive` and `Phive-thriftserver` profiles to your existing build options.
-By default, Spark will use Hive 1.2.1 with the `hadoop-2.7` profile, and Hive 2.3.6 with the `hadoop-3.2` profile.
-
-    # With Hive 1.2.1 support
-    ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package
+add the `-Phive` and `-Phive-thriftserver` profiles to your existing build options.
+By default Spark will build with Hive 2.3.6.
 
     # With Hive 2.3.6 support
-    ./build/mvn -Pyarn -Phive -Phive-thriftserver -Phadoop-3.2 -DskipTests clean package
+    ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package
 
 ## Packaging without Hadoop Dependencies for YARN
 
@@ -160,7 +157,7 @@ prompt.
 Configure the JVM options for SBT in `.jvmopts` at the project root, for example:
 
     -Xmx2g
-    -XX:ReservedCodeCacheSize=512m
+    -XX:ReservedCodeCacheSize=1g
 
 For the meanings of these two options, please carefully read the [Setting up Maven's Memory Usage section](https://spark.apache.org/docs/latest/building-spark.html#setting-up-mavens-memory-usage).
 
@@ -241,8 +238,7 @@ The run-tests script also can be limited to a specific Python version or a speci
 
 To run the SparkR tests you will need to install the [knitr](https://cran.r-project.org/package=knitr), [rmarkdown](https://cran.r-project.org/package=rmarkdown), [testthat](https://cran.r-project.org/package=testthat), [e1071](https://cran.r-project.org/package=e1071) and [survival](https://cran.r-project.org/package=survival) packages first:
 
-    Rscript -e "install.packages(c('knitr', 'rmarkdown', 'devtools', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
-    Rscript -e "devtools::install_version('testthat', version = '1.0.2', repos='https://cloud.r-project.org/')"
+    Rscript -e "install.packages(c('knitr', 'rmarkdown', 'devtools', 'testthat', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
 
 You can run just the SparkR tests using the command:
 
diff --git a/docs/cloud-integration.md b/docs/cloud-integration.md
index a8d40fe7456e4..b2a3e77f1ee9d 100644
--- a/docs/cloud-integration.md
+++ b/docs/cloud-integration.md
@@ -257,4 +257,5 @@ Here is the documentation on the standard connectors both from Apache and the cl
 * [Amazon EMR File System (EMRFS)](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-fs.html). From Amazon
 * [Google Cloud Storage Connector for Spark and Hadoop](https://cloud.google.com/hadoop/google-cloud-storage-connector). From Google
 * [The Azure Blob Filesystem driver (ABFS)](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-abfs-driver)
+* IBM Cloud Object Storage connector for Apache Spark: [Stocator](https://github.com/CODAIT/stocator), [IBM Object Storage](https://www.ibm.com/cloud/object-storage), [how-to-use-connector](https://developer.ibm.com/code/2018/08/16/installing-running-stocator-apache-spark-ibm-cloud-object-storage). From IBM
 
diff --git a/docs/configuration.md b/docs/configuration.md
index 9933283cdad87..9c9cf84151b20 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -24,7 +24,7 @@ license: |
 Spark provides three locations to configure the system:
 
 * [Spark properties](#spark-properties) control most application parameters and can be set by using
-  a [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) object, or through Java
+  a [SparkConf](api/scala/org/apache/spark/SparkConf.html) object, or through Java
   system properties.
 * [Environment variables](#environment-variables) can be used to set per-machine settings, such as
   the IP address, through the `conf/spark-env.sh` script on each node.
@@ -34,7 +34,7 @@ Spark provides three locations to configure the system:
 
 Spark properties control most application settings and are configured separately for each
 application. These properties can be set directly on a
-[SparkConf](api/scala/index.html#org.apache.spark.SparkConf) passed to your
+[SparkConf](api/scala/org/apache/spark/SparkConf.html) passed to your
 `SparkContext`. `SparkConf` allows you to configure some of the common properties
 (e.g. master URL and application name), as well as arbitrary key-value pairs through the
 `set()` method. For example, we could initialize an application with two threads as follows:
@@ -230,7 +230,7 @@ of the most common options to set are:
     write to STDOUT a JSON string in the format of the ResourceInformation class. This has a
     name and an array of addresses. For a client-submitted driver in Standalone, discovery
     script must assign different resource addresses to this driver comparing to workers' and
-    other dirvers' when <code>spark.resources.coordinate.enable</code> is off.
+    other drivers' when <code>spark.resources.coordinate.enable</code> is off.
   </td>
 </tr>
 <tr>
@@ -243,6 +243,18 @@ of the most common options to set are:
     this config would be set to nvidia.com or amd.com)
   </td>
 </tr>
+<tr>
+ <td><code>spark.resources.discovery.plugin</code></td>
+  <td>org.apache.spark.resource.ResourceDiscoveryScriptPlugin</td>
+  <td>
+    Comma-separated list of class names implementing
+    org.apache.spark.api.resource.ResourceDiscoveryPlugin to load into the application.
+    This is for advanced users to replace the resource discovery class with a
+    custom implementation. Spark will try each class specified until one of them
+    returns the resource information for that resource. It tries the discovery
+    script last if none of the plugins return information for that resource.
+  </td>
+</tr>
 <tr>
   <td><code>spark.executor.memory</code></td>
   <td>1g</td>
@@ -411,6 +423,16 @@ of the most common options to set are:
     use the default layout.
   </td>
 </tr>
+<tr>
+  <td><code>spark.driver.log.allowErasureCoding</code></td>
+  <td>false</td>
+  <td>
+    Whether to allow driver logs to use erasure coding.  On HDFS, erasure coded files will not
+    update as quickly as regular replicated files, so they make take longer to reflect changes
+    written by the application. Note that even if this is true, Spark will still not force the
+    file to use erasure coding, it will simply use file system defaults.
+  </td>
+</tr>
 </table>
 
 Apart from these, the following properties are also available, and may be useful in some situations:
@@ -834,13 +856,14 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.shuffle.io.backLog</code></td>
-  <td>64</td>
+  <td>-1</td>
   <td>
     Length of the accept queue for the shuffle service. For large applications, this value may
     need to be increased, so that incoming connections are not dropped if the service cannot keep
     up with a large number of connections arriving in a short period of time. This needs to
     be configured wherever the shuffle service itself is running, which may be outside of the
-    application (see <code>spark.shuffle.service.enabled</code> option below).
+    application (see <code>spark.shuffle.service.enabled</code> option below). If set below 1,
+    will fallback to OS default defined by Netty's <code>io.netty.util.NetUtil#SOMAXCONN</code>.
   </td>
 </tr>
 <tr>
@@ -866,7 +889,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.service.index.cache.size</code></td>
   <td>100m</td>
   <td>
-    Cache entries limited to the specified memory footprint in bytes.
+    Cache entries limited to the specified memory footprint, in bytes unless otherwise specified.
   </td>
 </tr>
 <tr>
@@ -956,7 +979,7 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-  <td><code>spark.eventLog.allowErasureCoding</code></td>
+  <td><code>spark.eventLog.erasureCoding.enabled</code></td>
   <td>false</td>
   <td>
     Whether to allow event logs to use erasure coding, or turn erasure coding off, regardless of
@@ -998,6 +1021,21 @@ Apart from these, the following properties are also available, and may be useful
     Buffer size to use when writing to output streams, in KiB unless otherwise specified.
   </td>
 </tr>
+<tr>
+  <td><code>spark.eventLog.rolling.enabled</code></td>
+  <td>false</td>
+  <td>
+    Whether rolling over event log files is enabled. If set to true, it cuts down each event
+    log file to the configured size.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.eventLog.rolling.maxFileSize</code></td>
+  <td>128m</td>
+  <td>
+    The max size of event log file before it's rolled over.
+  </td>
+</tr>
 <tr>
   <td><code>spark.ui.dagGraph.retainedRootRDDs</code></td>
   <td>Int.MaxValue</td>
@@ -1081,6 +1119,18 @@ Apart from these, the following properties are also available, and may be useful
     This is the URL where your proxy is running. This URL is for proxy which is running in front of Spark Master. This is useful when running proxy for authentication e.g. OAuth proxy. Make sure this is a complete URL including scheme (http/https) and port to reach your proxy.
   </td>
 </tr>
+<tr>
+  <td><code>spark.ui.proxyRedirectUri</code></td>
+  <td></td>
+  <td>
+    Where to address redirects when Spark is running behind a proxy. This will make Spark
+    modify redirect responses so they point to the proxy server, instead of the Spark UI's own
+    address. This should be only the address of the server, without any prefix paths for the
+    application; the prefix should be set either by the proxy server itself (by adding the
+    <code>X-Forwarded-Context</code> request header), or by setting the proxy base in the Spark
+    app's configuration.
+  </td>
+</tr>
 <tr>
   <td><code>spark.ui.showConsoleProgress</code></td>
   <td>false</td>
@@ -1207,16 +1257,18 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.io.compression.lz4.blockSize</code></td>
   <td>32k</td>
   <td>
-    Block size in bytes used in LZ4 compression, in the case when LZ4 compression codec
+    Block size used in LZ4 compression, in the case when LZ4 compression codec
     is used. Lowering this block size will also lower shuffle memory usage when LZ4 is used.
+    Default unit is bytes, unless otherwise specified.
   </td>
 </tr>
 <tr>
   <td><code>spark.io.compression.snappy.blockSize</code></td>
   <td>32k</td>
   <td>
-    Block size in bytes used in Snappy compression, in the case when Snappy compression codec
-    is used. Lowering this block size will also lower shuffle memory usage when Snappy is used.
+    Block size in Snappy compression, in the case when Snappy compression codec is used. 
+    Lowering this block size will also lower shuffle memory usage when Snappy is used.
+    Default unit is bytes, unless otherwise specified.
   </td>
 </tr>
 <tr>
@@ -1274,7 +1326,7 @@ Apart from these, the following properties are also available, and may be useful
     property is useful if you need to register your classes in a custom way, e.g. to specify a custom
     field serializer. Otherwise <code>spark.kryo.classesToRegister</code> is simpler. It should be
     set to classes that extend
-    <a href="api/scala/index.html#org.apache.spark.serializer.KryoRegistrator">
+    <a href="api/scala/org/apache/spark/serializer/KryoRegistrator.html">
     <code>KryoRegistrator</code></a>.
     See the <a href="tuning.html#data-serialization">tuning guide</a> for more details.
   </td>
@@ -1327,7 +1379,7 @@ Apart from these, the following properties are also available, and may be useful
     but is quite slow, so we recommend <a href="tuning.html">using
     <code>org.apache.spark.serializer.KryoSerializer</code> and configuring Kryo serialization</a>
     when speed is necessary. Can be any subclass of
-    <a href="api/scala/index.html#org.apache.spark.serializer.Serializer">
+    <a href="api/scala/org/apache/spark/serializer/Serializer.html">
     <code>org.apache.spark.Serializer</code></a>.
   </td>
 </tr>
@@ -1384,7 +1436,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.memory.offHeap.size</code></td>
   <td>0</td>
   <td>
-    The absolute amount of memory in bytes which can be used for off-heap allocation.
+    The absolute amount of memory which can be used for off-heap allocation, in bytes unless otherwise specified.
     This setting has no impact on heap memory usage, so if your executors' total memory consumption 
     must fit within some hard limit then be sure to shrink your JVM heap size accordingly.
     This must be set to a positive value when <code>spark.memory.offHeap.enabled=true</code>.
@@ -1568,9 +1620,9 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.storage.memoryMapThreshold</code></td>
   <td>2m</td>
   <td>
-    Size in bytes of a block above which Spark memory maps when reading a block from disk.
-    This prevents Spark from memory mapping very small blocks. In general, memory
-    mapping has high overhead for blocks close to or below the page size of the operating system.
+    Size of a block above which Spark memory maps when reading a block from disk. Default unit is bytes,
+    unless specified otherwise. This prevents Spark from memory mapping very small blocks. In general, 
+    memory mapping has high overhead for blocks close to or below the page size of the operating system.
   </td>
 </tr>
 <tr>
@@ -1584,6 +1636,43 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 </table>
 
+### Executor Metrics
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.eventLog.logStageExecutorMetrics.enabled</code></td>
+  <td>false</td>
+  <td>
+    Whether to write per-stage peaks of executor metrics (for each executor) to the event log.
+    <br />
+    <em>Note:</em> The metrics are polled (collected) and sent in the executor heartbeat,
+    and this is always done; this configuration is only to determine if aggregated metric peaks
+    are written to the event log.
+  </td>
+</tr>
+  <td><code>spark.executor.processTreeMetrics.enabled</code></td>
+  <td>false</td>
+  <td>
+    Whether to collect process tree metrics (from the /proc filesystem) when collecting
+    executor metrics.
+    <br />
+    <em>Note:</em> The process tree metrics are collected only if the /proc filesystem
+    exists.
+  </td>
+<tr>
+  <td><code>spark.executor.metrics.pollingInterval</code></td>
+  <td>0</td>
+  <td>
+    How often to collect executor metrics (in milliseconds).
+    <br />
+    If 0, the polling is done on executor heartbeats (thus at the heartbeat interval,
+    specified by <code>spark.executor.heartbeatInterval</code>).
+    If positive, the polling is done at this interval.
+  </td>
+</tr>
+</table>
+
 ### Networking
 
 <table class="table">
@@ -1721,7 +1810,7 @@ Apart from these, the following properties are also available, and may be useful
   </td>
 </tr>
 <tr>
-  <td><code>spark.maxRemoteBlockSizeFetchToMem</code></td>
+  <td><code>spark.network.maxRemoteBlockSizeFetchToMem</code></td>
   <td>200m</td>
   <td>
     Remote block will be fetched to disk when size of the block is above this threshold
@@ -1825,9 +1914,56 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.scheduler.listenerbus.eventqueue.capacity</code></td>
   <td>10000</td>
   <td>
-    Capacity for event queue in Spark listener bus, must be greater than 0. Consider increasing
-    value (e.g. 20000) if listener events are dropped. Increasing this value may result in the
-    driver using more memory.
+    The default capacity for event queues. Spark will try to initialize an event queue 
+    using capacity specified by `spark.scheduler.listenerbus.eventqueue.queueName.capacity` 
+    first. If it's not configured, Spark will use the default capacity specified by this 
+    config. Note that capacity must be greater than 0. Consider increasing value (e.g. 20000) 
+    if listener events are dropped. Increasing this value may result in the driver using more memory.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.scheduler.listenerbus.eventqueue.shared.capacity</code></td>
+  <td><code>spark.scheduler.listenerbus.eventqueue.capacity</code></td>
+  <td>
+    Capacity for shared event queue in Spark listener bus, which hold events for external listener(s)
+    that register to the listener bus. Consider increasing value, if the listener events corresponding
+    to shared queue are dropped. Increasing this value may result in the driver using more memory.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.scheduler.listenerbus.eventqueue.appStatus.capacity</code></td>
+  <td><code>spark.scheduler.listenerbus.eventqueue.capacity</code></td>
+  <td>
+    Capacity for appStatus event queue, which hold events for internal application status listeners.
+    Consider increasing value, if the listener events corresponding to appStatus queue are dropped.
+    Increasing this value may result in the driver using more memory.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.scheduler.listenerbus.eventqueue.executorManagement.capacity</code></td>
+  <td><code>spark.scheduler.listenerbus.eventqueue.capacity</code></td>
+  <td>
+    Capacity for executorManagement event queue in Spark listener bus, which hold events for internal
+    executor management listeners. Consider increasing value if the listener events corresponding to
+    executorManagement queue are dropped. Increasing this value may result in the driver using more memory.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.scheduler.listenerbus.eventqueue.eventLog.capacity</code></td>
+  <td><code>spark.scheduler.listenerbus.eventqueue.capacity</code></td>
+  <td>
+    Capacity for eventLog queue in Spark listener bus, which hold events for Event logging listeners
+    that write events to eventLogs. Consider increasing value if the listener events corresponding to eventLog queue
+    are dropped. Increasing this value may result in the driver using more memory.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.scheduler.listenerbus.eventqueue.streams.capacity</code></td>
+  <td><code>spark.scheduler.listenerbus.eventqueue.capacity</code></td>
+  <td>
+    Capacity for streams queue in Spark listener bus, which hold events for internal streaming listener.
+    Consider increasing value if the listener events corresponding to streams queue are dropped. Increasing
+    this value may result in the driver using more memory.
   </td>
 </tr>
 <tr>
@@ -1959,6 +2095,20 @@ Apart from these, the following properties are also available, and may be useful
     Fraction of tasks which must be complete before speculation is enabled for a particular stage.
   </td>
 </tr>
+<tr>
+  <td><code>spark.speculation.task.duration.threshold</code></td>
+  <td>None</td>
+  <td>
+    Task duration after which scheduler would try to speculative run the task. If provided, tasks
+    would be speculatively run if current stage contains less tasks than or equal to the number of
+    slots on a single executor and the task is taking longer time than the threshold. This config
+    helps speculate stage with very few tasks. Regular speculation configs may also apply if the
+    executor slots are large enough. E.g. tasks might be re-launched if there are enough successful
+    runs even though the threshold hasn't been reached. The number of slots is computed based on
+    the conf values of spark.executor.cores and spark.task.cpus minimum 1.
+    Default unit is bytes, unless otherwise specified.
+  </td>
+</tr>
 <tr>
   <td><code>spark.task.cpus</code></td>
   <td>1</td>
@@ -1970,9 +2120,15 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.task.resource.{resourceName}.amount</code></td>
   <td>1</td>
   <td>
-    Amount of a particular resource type to allocate for each task. If this is specified
-    you must also provide the executor config <code>spark.executor.resource.{resourceName}.amount</code>
-    and any corresponding discovery configs so that your executors are created with that resource type.
+    Amount of a particular resource type to allocate for each task, note that this can be a double.
+    If this is specified you must also provide the executor config 
+    <code>spark.executor.resource.{resourceName}.amount</code> and any corresponding discovery configs 
+    so that your executors are created with that resource type. In addition to whole amounts, 
+    a fractional amount (for example, 0.25, which means 1/4th of a resource) may be specified. 
+    Fractional amounts must be less than or equal to 0.5, or in other words, the minimum amount of
+    resource sharing is 2 tasks per resource. Additionally, fractional amounts are floored 
+    in order to assign resource slots (e.g. a 0.2222 configuration, or 1/0.2222 slots will become 
+    4 tasks/resource, not 5).
   </td>
 </tr>
 <tr>
@@ -2243,47 +2399,15 @@ the driver or executor, or, in the absence of that value, the number of cores av
 Please refer to the [Security](security.html) page for available options on how to secure different
 Spark subsystems.
 
-### Spark SQL
-
-Running the <code>SET -v</code> command will show the entire list of the SQL configuration.
-
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-// spark is an existing SparkSession
-spark.sql("SET -v").show(numRows = 200, truncate = false)
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-// spark is an existing SparkSession
-spark.sql("SET -v").show(200, false);
-{% endhighlight %}
-</div>
-
-<div data-lang="python"  markdown="1">
 
-{% highlight python %}
-# spark is an existing SparkSession
-spark.sql("SET -v").show(n=200, truncate=False)
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-sparkR.session()
-properties <- sql("SET -v")
-showDF(properties, numRows = 200, truncate = FALSE)
-{% endhighlight %}
+{% for static_file in site.static_files %}
+    {% if static_file.name == 'sql-configs.html' %}
+### Spark SQL
 
-</div>
-</div>
+        {% include_relative sql-configs.html %}
+        {% break %}
+    {% endif %}
+{% endfor %}
 
 
 ### Spark Streaming
@@ -2318,7 +2442,7 @@ showDF(properties, numRows = 200, truncate = FALSE)
     Interval at which data received by Spark Streaming receivers is chunked
     into blocks of data before storing them in Spark. Minimum recommended - 50 ms. See the
     <a href="streaming-programming-guide.html#level-of-parallelism-in-data-receiving">performance
-     tuning</a> section in the Spark Streaming programing guide for more details.
+     tuning</a> section in the Spark Streaming programming guide for more details.
   </td>
 </tr>
 <tr>
@@ -2329,7 +2453,7 @@ showDF(properties, numRows = 200, truncate = FALSE)
     Effectively, each stream will consume at most this number of records per second.
     Setting this configuration to 0 or a negative number will put no limit on the rate.
     See the <a href="streaming-programming-guide.html#deploying-applications">deployment guide</a>
-    in the Spark Streaming programing guide for mode details.
+    in the Spark Streaming programming guide for mode details.
   </td>
 </tr>
 <tr>
@@ -2339,7 +2463,7 @@ showDF(properties, numRows = 200, truncate = FALSE)
     Enable write-ahead logs for receivers. All the input data received through receivers
     will be saved to write-ahead logs that will allow it to be recovered after driver failures.
     See the <a href="streaming-programming-guide.html#deploying-applications">deployment guide</a>
-    in the Spark Streaming programing guide for more details.
+    in the Spark Streaming programming guide for more details.
   </td>
 </tr>
 <tr>
@@ -2596,11 +2720,14 @@ You can copy and modify `hdfs-site.xml`, `core-site.xml`, `yarn-site.xml`, `hive
 Spark's classpath for each application. In a Spark cluster running on YARN, these configuration
 files are set cluster-wide, and cannot safely be changed by the application.
 
-The better choice is to use spark hadoop properties in the form of `spark.hadoop.*`. 
+The better choice is to use spark hadoop properties in the form of `spark.hadoop.*`, and use
+spark hive properties in the form of `spark.hive.*`.
+For example, adding configuration "spark.hadoop.abc.def=xyz" represents adding hadoop property "abc.def=xyz",
+and adding configuration "spark.hive.abc=xyz" represents adding hive property "hive.abc=xyz".
 They can be considered as same as normal spark properties which can be set in `$SPARK_HOME/conf/spark-defaults.conf`
 
 In some cases, you may want to avoid hard-coding certain configurations in a `SparkConf`. For
-instance, Spark allows you to simply create an empty conf and set spark/spark hadoop properties.
+instance, Spark allows you to simply create an empty conf and set spark/spark hadoop/spark hive properties.
 
 {% highlight scala %}
 val conf = new SparkConf().set("spark.hadoop.abc.def", "xyz")
@@ -2614,6 +2741,19 @@ Also, you can modify or add configurations at runtime:
   --master local[4] \  
   --conf spark.eventLog.enabled=false \ 
   --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \ 
-  --conf spark.hadoop.abc.def=xyz \ 
+  --conf spark.hadoop.abc.def=xyz \
+  --conf spark.hive.abc=xyz
   myApp.jar
 {% endhighlight %}
+
+# Custom Resource Scheduling and Configuration Overview
+
+GPUs and other accelerators have been widely used for accelerating special workloads, e.g.,
+deep learning and signal processing. Spark now supports requesting and scheduling generic resources, such as GPUs, with a few caveats. The current implementation requires that the resource have addresses that can be allocated by the scheduler. It requires your cluster manager to support and be properly configured with the resources.
+
+There are configurations available to request resources for the driver: <code>spark.driver.resource.{resourceName}.amount</code>, request resources for the executor(s): <code>spark.executor.resource.{resourceName}.amount</code> and specify the requirements for each task: <code>spark.task.resource.{resourceName}.amount</code>. The <code>spark.driver.resource.{resourceName}.discoveryScript</code> config is required on YARN, Kubernetes and a client side Driver on Spark Standalone. <code>spark.executor.resource.{resourceName}.discoveryScript</code> config is required for YARN and Kubernetes. Kubernetes also requires <code>spark.driver.resource.{resourceName}.vendor</code> and/or <code>spark.executor.resource.{resourceName}.vendor</code>. See the config descriptions above for more information on each.
+
+Spark will use the configurations specified to first request containers with the corresponding resources from the cluster manager. Once it gets the container, Spark launches an Executor in that container which will discover what resources the container has and the addresses associated with each resource. The Executor will register with the Driver and report back the resources available to that Executor. The Spark scheduler can then schedule tasks to each Executor and assign specific resource addresses based on the resource requirements the user specified. The user can see the resources assigned to a task using the <code>TaskContext.get().resources</code> api. On the driver, the user can see the resources assigned with the SparkContext <code>resources</code> call. It's then up to the user to use the assignedaddresses to do the processing they want or pass those into the ML/AI framework they are using.
+
+See your cluster manager specific page for requirements and details on each of - [YARN](running-on-yarn.html#resource-allocation-and-configuration-overview), [Kubernetes](running-on-kubernetes.html#resource-allocation-and-configuration-overview) and [Standalone Mode](spark-standalone.html#resource-allocation-and-configuration-overview). It is currently not available with Mesos or local mode. If using local-cluster mode see the Spark Standalone documentation but be aware only a single worker resources file or discovery script can be specified the is shared by all the Workers so you should enable resource coordination (see <code>spark.resources.coordinate.enable</code>).
+
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
new file mode 100644
index 0000000000000..fdb0afad6af9b
--- /dev/null
+++ b/docs/core-migration-guide.md
@@ -0,0 +1,40 @@
+---
+layout: global
+title: "Migration Guide: Spark Core"
+displayTitle: "Migration Guide: Spark Core"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+{:toc}
+
+## Upgrading from Core 2.4 to 3.0
+
+- The `org.apache.spark.ExecutorPlugin` interface and related configuration has been replaced with
+  `org.apache.spark.plugin.SparkPlugin`, which adds new functionality. Plugins using the old
+  interface need to be modified to extend the new interfaces. Check the
+  [Monitoring](monitoring.html) guide for more details.
+
+- Deprecated method `TaskContext.isRunningLocally` has been removed. Local execution was removed and it always has returned `false`.
+
+- Deprecated method `shuffleBytesWritten`, `shuffleWriteTime` and `shuffleRecordsWritten` in `ShuffleWriteMetrics` have been removed. Instead, use `bytesWritten`, `writeTime ` and `recordsWritten` respectively.
+
+- Deprecated method `AccumulableInfo.apply` have been removed because creating `AccumulableInfo` is disallowed.
+
+- Event log file will be written as UTF-8 encoding, and Spark History Server will replay event log files as UTF-8 encoding. Previously Spark writes event log file as default charset of driver JVM process, so Spark History Server of Spark 2.x is needed to read the old event log files in case of incompatible encoding.
+
+- A new protocol for fetching shuffle blocks is used. It's recommended that external shuffle services be upgraded when running Spark 3.0 apps. Old external shuffle services can still be used by setting the configuration `spark.shuffle.useOldFetchProtocol` to `true`. Otherwise, Spark may run into errors with messages like `IllegalArgumentException: Unexpected message type: <number>`.
diff --git a/docs/css/main.css b/docs/css/main.css
index e24dff8531f24..dc05d287996be 100755
--- a/docs/css/main.css
+++ b/docs/css/main.css
@@ -211,8 +211,6 @@ a.anchorjs-link:hover { text-decoration: none; }
   float: left;
   position: fixed;
   overflow-y: scroll;
-  top: 0;
-  bottom: 0;
 }
 
 .left-menu {
diff --git a/docs/css/pygments-default.css b/docs/css/pygments-default.css
index a4d583b366603..0d62ad1a018aa 100644
--- a/docs/css/pygments-default.css
+++ b/docs/css/pygments-default.css
@@ -11,6 +11,10 @@ Also, I was thrown off for a while at first when I was using markdown
 code block inside my {% highlight scala %} ... {% endhighlight %} tags
 (I was using 4 spaces for this), when it turns out that pygments will
 insert the code (or pre?) tags for you.
+
+Note that due to Python 3 compatibility in the project, now we use
+Rouge which claims Pygments compatibility, instead of pygments.rb which
+does not support Python 3. See SPARK-28752.
 */
 
 .hll { background-color: #ffffcc }
@@ -73,4 +77,4 @@ insert the code (or pre?) tags for you.
 .vc { color: #bb60d5 } /* Name.Variable.Class */
 .vg { color: #bb60d5 } /* Name.Variable.Global */
 .vi { color: #bb60d5 } /* Name.Variable.Instance */
-.il { color: #40a070 } /* Literal.Number.Integer.Long */
\ No newline at end of file
+.il { color: #40a070 } /* Literal.Number.Integer.Long */
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index c96f7aaba88e6..50c9366a0999f 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -25,38 +25,38 @@ license: |
 
 <!-- All the documentation links  -->
 
-[EdgeRDD]: api/scala/index.html#org.apache.spark.graphx.EdgeRDD
-[VertexRDD]: api/scala/index.html#org.apache.spark.graphx.VertexRDD
-[Edge]: api/scala/index.html#org.apache.spark.graphx.Edge
-[EdgeTriplet]: api/scala/index.html#org.apache.spark.graphx.EdgeTriplet
-[Graph]: api/scala/index.html#org.apache.spark.graphx.Graph
-[GraphOps]: api/scala/index.html#org.apache.spark.graphx.GraphOps
-[Graph.mapVertices]: api/scala/index.html#org.apache.spark.graphx.Graph@mapVertices[VD2]((VertexId,VD)⇒VD2)(ClassTag[VD2]):Graph[VD2,ED]
-[Graph.reverse]: api/scala/index.html#org.apache.spark.graphx.Graph@reverse:Graph[VD,ED]
-[Graph.subgraph]: api/scala/index.html#org.apache.spark.graphx.Graph@subgraph((EdgeTriplet[VD,ED])⇒Boolean,(VertexId,VD)⇒Boolean):Graph[VD,ED]
-[Graph.mask]: api/scala/index.html#org.apache.spark.graphx.Graph@mask[VD2,ED2](Graph[VD2,ED2])(ClassTag[VD2],ClassTag[ED2]):Graph[VD,ED]
-[Graph.groupEdges]: api/scala/index.html#org.apache.spark.graphx.Graph@groupEdges((ED,ED)⇒ED):Graph[VD,ED]
-[GraphOps.joinVertices]: api/scala/index.html#org.apache.spark.graphx.GraphOps@joinVertices[U](RDD[(VertexId,U)])((VertexId,VD,U)⇒VD)(ClassTag[U]):Graph[VD,ED]
-[Graph.outerJoinVertices]: api/scala/index.html#org.apache.spark.graphx.Graph@outerJoinVertices[U,VD2](RDD[(VertexId,U)])((VertexId,VD,Option[U])⇒VD2)(ClassTag[U],ClassTag[VD2]):Graph[VD2,ED]
-[Graph.aggregateMessages]: api/scala/index.html#org.apache.spark.graphx.Graph@aggregateMessages[A]((EdgeContext[VD,ED,A])⇒Unit,(A,A)⇒A,TripletFields)(ClassTag[A]):VertexRDD[A]
-[EdgeContext]: api/scala/index.html#org.apache.spark.graphx.EdgeContext
-[GraphOps.collectNeighborIds]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighborIds(EdgeDirection):VertexRDD[Array[VertexId]]
-[GraphOps.collectNeighbors]: api/scala/index.html#org.apache.spark.graphx.GraphOps@collectNeighbors(EdgeDirection):VertexRDD[Array[(VertexId,VD)]]
+[EdgeRDD]: api/scala/org/apache/spark/graphx/EdgeRDD.html
+[VertexRDD]: api/scala/org/apache/spark/graphx/VertexRDD.html
+[Edge]: api/scala/org/apache/spark/graphx/Edge.html
+[EdgeTriplet]: api/scala/org/apache/spark/graphx/EdgeTriplet.html
+[Graph]: api/scala/org/apache/spark/graphx/Graph$.html
+[GraphOps]: api/scala/org/apache/spark/graphx/GraphOps.html
+[Graph.mapVertices]: api/scala/org/apache/spark/graphx/Graph.html#mapVertices[VD2]((VertexId,VD)⇒VD2)(ClassTag[VD2]):Graph[VD2,ED]
+[Graph.reverse]: api/scala/org/apache/spark/graphx/Graph.html#reverse:Graph[VD,ED]
+[Graph.subgraph]: api/scala/org/apache/spark/graphx/Graph.html#subgraph((EdgeTriplet[VD,ED])⇒Boolean,(VertexId,VD)⇒Boolean):Graph[VD,ED]
+[Graph.mask]: api/scala/org/apache/spark/graphx/Graph.html#mask[VD2,ED2](Graph[VD2,ED2])(ClassTag[VD2],ClassTag[ED2]):Graph[VD,ED]
+[Graph.groupEdges]: api/scala/org/apache/spark/graphx/Graph.html#groupEdges((ED,ED)⇒ED):Graph[VD,ED]
+[GraphOps.joinVertices]: api/scala/org/apache/spark/graphx/GraphOps.html#joinVertices[U](RDD[(VertexId,U)])((VertexId,VD,U)⇒VD)(ClassTag[U]):Graph[VD,ED]
+[Graph.outerJoinVertices]: api/scala/org/apache/spark/graphx/Graph.html#outerJoinVertices[U,VD2](RDD[(VertexId,U)])((VertexId,VD,Option[U])⇒VD2)(ClassTag[U],ClassTag[VD2]):Graph[VD2,ED]
+[Graph.aggregateMessages]: api/scala/org/apache/spark/graphx/Graph.html#aggregateMessages[A]((EdgeContext[VD,ED,A])⇒Unit,(A,A)⇒A,TripletFields)(ClassTag[A]):VertexRDD[A]
+[EdgeContext]: api/scala/org/apache/spark/graphx/EdgeContext.html
+[GraphOps.collectNeighborIds]: api/scala/org/apache/spark/graphx/GraphOps.html#collectNeighborIds(EdgeDirection):VertexRDD[Array[VertexId]]
+[GraphOps.collectNeighbors]: api/scala/org/apache/spark/graphx/GraphOps.html#collectNeighbors(EdgeDirection):VertexRDD[Array[(VertexId,VD)]]
 [RDD Persistence]: rdd-programming-guide.html#rdd-persistence
-[Graph.cache]: api/scala/index.html#org.apache.spark.graphx.Graph@cache():Graph[VD,ED]
-[GraphOps.pregel]: api/scala/index.html#org.apache.spark.graphx.GraphOps@pregel[A](A,Int,EdgeDirection)((VertexId,VD,A)⇒VD,(EdgeTriplet[VD,ED])⇒Iterator[(VertexId,A)],(A,A)⇒A)(ClassTag[A]):Graph[VD,ED]
-[PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy$
-[GraphLoader.edgeListFile]: api/scala/index.html#org.apache.spark.graphx.GraphLoader$@edgeListFile(SparkContext,String,Boolean,Int):Graph[Int,Int]
-[Graph.apply]: api/scala/index.html#org.apache.spark.graphx.Graph$@apply[VD,ED](RDD[(VertexId,VD)],RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
-[Graph.fromEdgeTuples]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdgeTuples[VD](RDD[(VertexId,VertexId)],VD,Option[PartitionStrategy])(ClassTag[VD]):Graph[VD,Int]
-[Graph.fromEdges]: api/scala/index.html#org.apache.spark.graphx.Graph$@fromEdges[VD,ED](RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
-[PartitionStrategy]: api/scala/index.html#org.apache.spark.graphx.PartitionStrategy
-[PageRank]: api/scala/index.html#org.apache.spark.graphx.lib.PageRank$
-[ConnectedComponents]: api/scala/index.html#org.apache.spark.graphx.lib.ConnectedComponents$
-[TriangleCount]: api/scala/index.html#org.apache.spark.graphx.lib.TriangleCount$
-[Graph.partitionBy]: api/scala/index.html#org.apache.spark.graphx.Graph@partitionBy(PartitionStrategy):Graph[VD,ED]
-[EdgeContext.sendToSrc]: api/scala/index.html#org.apache.spark.graphx.EdgeContext@sendToSrc(msg:A):Unit
-[EdgeContext.sendToDst]: api/scala/index.html#org.apache.spark.graphx.EdgeContext@sendToDst(msg:A):Unit
+[Graph.cache]: api/scala/org/apache/spark/graphx/Graph.html#cache():Graph[VD,ED]
+[GraphOps.pregel]: api/scala/org/apache/spark/graphx/GraphOps.html#pregel[A](A,Int,EdgeDirection)((VertexId,VD,A)⇒VD,(EdgeTriplet[VD,ED])⇒Iterator[(VertexId,A)],(A,A)⇒A)(ClassTag[A]):Graph[VD,ED]
+[PartitionStrategy]: api/scala/org/apache/spark/graphx/PartitionStrategy$.html
+[GraphLoader.edgeListFile]: api/scala/org/apache/spark/graphx/GraphLoader$.html#edgeListFile(SparkContext,String,Boolean,Int):Graph[Int,Int]
+[Graph.apply]: api/scala/org/apache/spark/graphx/Graph$.html#apply[VD,ED](RDD[(VertexId,VD)],RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
+[Graph.fromEdgeTuples]: api/scala/org/apache/spark/graphx/Graph$.html#fromEdgeTuples[VD](RDD[(VertexId,VertexId)],VD,Option[PartitionStrategy])(ClassTag[VD]):Graph[VD,Int]
+[Graph.fromEdges]: api/scala/org/apache/spark/graphx/Graph$.html#fromEdges[VD,ED](RDD[Edge[ED]],VD)(ClassTag[VD],ClassTag[ED]):Graph[VD,ED]
+[PartitionStrategy]: api/scala/org/apache/spark/graphx/PartitionStrategy$.html
+[PageRank]: api/scala/org/apache/spark/graphx/lib/PageRank$.html
+[ConnectedComponents]: api/scala/org/apache/spark/graphx/lib/ConnectedComponents$.html
+[TriangleCount]: api/scala/org/apache/spark/graphx/lib/TriangleCount$.html
+[Graph.partitionBy]: api/scala/org/apache/spark/graphx/Graph.html#partitionBy(PartitionStrategy):Graph[VD,ED]
+[EdgeContext.sendToSrc]: api/scala/org/apache/spark/graphx/EdgeContext.html#sendToSrc(msg:A):Unit
+[EdgeContext.sendToDst]: api/scala/org/apache/spark/graphx/EdgeContext.html#sendToDst(msg:A):Unit
 [TripletFields]: api/java/org/apache/spark/graphx/TripletFields.html
 [TripletFields.All]: api/java/org/apache/spark/graphx/TripletFields.html#All
 [TripletFields.None]: api/java/org/apache/spark/graphx/TripletFields.html#None
@@ -74,7 +74,7 @@ license: |
 # Overview
 
 GraphX is a new component in Spark for graphs and graph-parallel computation. At a high level,
-GraphX extends the Spark [RDD](api/scala/index.html#org.apache.spark.rdd.RDD) by introducing a
+GraphX extends the Spark [RDD](api/scala/org/apache/spark/rdd/RDD.html) by introducing a
 new [Graph](#property_graph) abstraction: a directed multigraph with properties
 attached to each vertex and edge.  To support graph computation, GraphX exposes a set of fundamental
 operators (e.g., [subgraph](#structural_operators), [joinVertices](#join_operators), and
@@ -99,7 +99,7 @@ getting started with Spark refer to the [Spark Quick Start Guide](quick-start.ht
 
 # The Property Graph
 
-The [property graph](api/scala/index.html#org.apache.spark.graphx.Graph) is a directed multigraph
+The [property graph](api/scala/org/apache/spark/graphx/Graph.html) is a directed multigraph
 with user defined objects attached to each vertex and edge.  A directed multigraph is a directed
 graph with potentially multiple parallel edges sharing the same source and destination vertex.  The
 ability to support parallel edges simplifies modeling scenarios where there can be multiple
@@ -175,7 +175,7 @@ val userGraph: Graph[(String, String), String]
 There are numerous ways to construct a property graph from raw files, RDDs, and even synthetic
 generators and these are discussed in more detail in the section on
 [graph builders](#graph_builders).  Probably the most general method is to use the
-[Graph object](api/scala/index.html#org.apache.spark.graphx.Graph$).  For example the following
+[Graph object](api/scala/org/apache/spark/graphx/Graph$.html).  For example the following
 code constructs a graph from a collection of RDDs:
 
 {% highlight scala %}
@@ -183,11 +183,11 @@ code constructs a graph from a collection of RDDs:
 val sc: SparkContext
 // Create an RDD for the vertices
 val users: RDD[(VertexId, (String, String))] =
-  sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
+  sc.parallelize(Seq((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
                        (5L, ("franklin", "prof")), (2L, ("istoica", "prof"))))
 // Create an RDD for edges
 val relationships: RDD[Edge[String]] =
-  sc.parallelize(Array(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),
+  sc.parallelize(Seq(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),
                        Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi")))
 // Define a default user in case there are relationship with missing user
 val defaultUser = ("John Doe", "Missing")
@@ -420,12 +420,12 @@ interest or eliminate broken links. For example in the following code we remove
 {% highlight scala %}
 // Create an RDD for the vertices
 val users: RDD[(VertexId, (String, String))] =
-  sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
+  sc.parallelize(Seq((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
                        (5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
                        (4L, ("peter", "student"))))
 // Create an RDD for edges
 val relationships: RDD[Edge[String]] =
-  sc.parallelize(Array(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),
+  sc.parallelize(Seq(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),
                        Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
                        Edge(4L, 0L, "student"),   Edge(5L, 0L, "colleague")))
 // Define a default user in case there are relationship with missing user
diff --git a/docs/hadoop-provided.md b/docs/hadoop-provided.md
index 37cdaa6150d36..6442e947abcd0 100644
--- a/docs/hadoop-provided.md
+++ b/docs/hadoop-provided.md
@@ -39,3 +39,25 @@ export SPARK_DIST_CLASSPATH=$(/path/to/hadoop/bin/hadoop classpath)
 export SPARK_DIST_CLASSPATH=$(hadoop --config /path/to/configs classpath)
 
 {% endhighlight %}
+
+# Hadoop Free Build Setup for Spark on Kubernetes  
+To run the Hadoop free build of Spark on Kubernetes, the executor image must have the appropriate version of Hadoop binaries and the correct `SPARK_DIST_CLASSPATH` value set. See the example below for the relevant changes needed in the executor Dockerfile:
+
+{% highlight bash %}
+### Set environment variables in the executor dockerfile ###
+
+ENV SPARK_HOME="/opt/spark"  
+ENV HADOOP_HOME="/opt/hadoop"  
+ENV PATH="$SPARK_HOME/bin:$HADOOP_HOME/bin:$PATH"  
+...  
+
+#Copy your target hadoop binaries to the executor hadoop home   
+
+COPY /opt/hadoop3  $HADOOP_HOME  
+...
+
+#Copy and use the Spark provided entrypoint.sh. It sets your SPARK_DIST_CLASSPATH using the hadoop binary in $HADOOP_HOME and starts the executor. If you choose to customize the value of SPARK_DIST_CLASSPATH here, the value will be retained in entrypoint.sh
+
+ENTRYPOINT [ "/opt/entrypoint.sh" ]
+...  
+{% endhighlight %}
diff --git a/docs/img/AllStagesPageDetail4.png b/docs/img/AllStagesPageDetail4.png
index 7880f950345d3..2f038b3d6196b 100644
Binary files a/docs/img/AllStagesPageDetail4.png and b/docs/img/AllStagesPageDetail4.png differ
diff --git a/docs/img/AllStagesPageDetail5.png b/docs/img/AllStagesPageDetail5.png
index 0f90ffcb32005..95d1f0e7f3bea 100644
Binary files a/docs/img/AllStagesPageDetail5.png and b/docs/img/AllStagesPageDetail5.png differ
diff --git a/docs/img/JDBCServer1.png b/docs/img/JDBCServer1.png
new file mode 100644
index 0000000000000..c568b199353ae
Binary files /dev/null and b/docs/img/JDBCServer1.png differ
diff --git a/docs/img/JDBCServer2.png b/docs/img/JDBCServer2.png
new file mode 100644
index 0000000000000..84008c78ef269
Binary files /dev/null and b/docs/img/JDBCServer2.png differ
diff --git a/docs/img/JDBCServer3.png b/docs/img/JDBCServer3.png
new file mode 100644
index 0000000000000..772c3cfdeb967
Binary files /dev/null and b/docs/img/JDBCServer3.png differ
diff --git a/docs/img/JobPageDetail2.png b/docs/img/JobPageDetail2.png
index ab6d7bdf15ec2..5eb529eb7c275 100644
Binary files a/docs/img/JobPageDetail2.png and b/docs/img/JobPageDetail2.png differ
diff --git a/docs/img/webui-sql-dag.png b/docs/img/webui-sql-dag.png
index 4ca21092e8b39..1c83c176da325 100644
Binary files a/docs/img/webui-sql-dag.png and b/docs/img/webui-sql-dag.png differ
diff --git a/docs/index.md b/docs/index.md
index 4217918a87462..38f12dd4db77b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -42,17 +42,18 @@ If you'd like to build Spark from
 source, visit [Building Spark](building-spark.html).
 
 
-Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS). It's easy to run
-locally on one machine --- all you need is to have `java` installed on your system `PATH`,
-or the `JAVA_HOME` environment variable pointing to a Java installation.
+Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS), and it should run on any platform that runs a supported version of Java. This should include JVMs on x86_64 and ARM64. It's easy to run locally on one machine --- all you need is to have `java` installed on your system `PATH`, or the `JAVA_HOME` environment variable pointing to a Java installation.
 
-Spark runs on Java 8, Scala 2.12, Python 2.7+/3.4+ and R 3.1+.
-Python 2 support is deprecated as of Spark 3.0.0.
+Spark runs on Java 8/11, Scala 2.12, Python 2.7+/3.4+ and R 3.1+.
+Java 8 prior to version 8u92 support is deprecated as of Spark 3.0.0.
+Python 2 and Python 3 prior to version 3.6 support is deprecated as of Spark 3.0.0.
 R prior to version 3.4 support is deprecated as of Spark 3.0.0.
 For the Scala API, Spark {{site.SPARK_VERSION}}
 uses Scala {{site.SCALA_BINARY_VERSION}}. You will need to use a compatible Scala version
 ({{site.SCALA_BINARY_VERSION}}.x).
 
+For Java 11, `-Dio.netty.tryReflectionSetAccessible=true` is required additionally for Apache Arrow library. This prevents `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available` when Apache Arrow uses Netty internally.
+
 # Running the Examples and Shell
 
 Spark comes with several sample programs.  Scala, Java, Python and R examples are in the
@@ -117,7 +118,7 @@ options for deployment:
 
 **API Docs:**
 
-* [Spark Scala API (Scaladoc)](api/scala/index.html#org.apache.spark.package)
+* [Spark Scala API (Scaladoc)](api/scala/org/apache/spark/index.html)
 * [Spark Java API (Javadoc)](api/java/index.html)
 * [Spark Python API (Sphinx)](api/python/index.html)
 * [Spark R API (Roxygen2)](api/R/index.html)
@@ -146,6 +147,7 @@ options for deployment:
 * Integration with other storage systems:
   * [Cloud Infrastructures](cloud-integration.html)
   * [OpenStack Swift](storage-openstack-swift.html)
+* [Migration Guide](migration-guide.html): Migration guides for Spark components
 * [Building Spark](building-spark.html): build Spark using the Maven system
 * [Contributing to Spark](https://spark.apache.org/contributing.html)
 * [Third Party Projects](https://spark.apache.org/third-party-projects.html): related third party Spark projects
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index 3e70c59d89a37..eaacfa49c657c 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -287,3 +287,21 @@ users can set the `spark.sql.thriftserver.scheduler.pool` variable:
 {% highlight SQL %}
 SET spark.sql.thriftserver.scheduler.pool=accounting;
 {% endhighlight %}
+
+## Concurrent Jobs in PySpark
+
+PySpark, by default, does not support to synchronize PVM threads with JVM threads and 
+launching multiple jobs in multiple PVM threads does not guarantee to launch each job
+in each corresponding JVM thread. Due to this limitation, it is unable to set a different job group
+via `sc.setJobGroup` in a separate PVM thread, which also disallows to cancel the job via `sc.cancelJobGroup`
+later.
+
+In order to synchronize PVM threads with JVM threads, you should set `PYSPARK_PIN_THREAD` environment variable
+to `true`. This pinned thread mode allows one PVM thread has one corresponding JVM thread.
+
+However, currently it cannot inherit the local properties from the parent thread although it isolates
+each thread with its own local properties. To work around this, you should manually copy and set the
+local properties from the parent thread to the child thread when you create another thread in PVM.
+
+Note that `PYSPARK_PIN_THREAD` is currently experimental and not recommended for use in production.
+
diff --git a/docs/sql-ref-arithmetic-ops.md b/docs/migration-guide.md
similarity index 62%
rename from docs/sql-ref-arithmetic-ops.md
rename to docs/migration-guide.md
index 7bc8ffe31c990..9ca0ada37a2fe 100644
--- a/docs/sql-ref-arithmetic-ops.md
+++ b/docs/migration-guide.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Arithmetic Operations
-displayTitle: Arithmetic Operations
+title: Migration Guide
+displayTitle: Migration Guide
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,4 +19,12 @@ license: |
   limitations under the License.
 ---
 
-Operations performed on numeric types (with the exception of decimal) are not checked for overflow. This means that in case an operation causes an overflow, the result is the same that the same operation returns in a Java/Scala program (eg. if the sum of 2 integers is higher than the maximum value representable, the result is a negative number).
+This page documents sections of the migration guide for each component in order
+for users to migrate effectively.
+
+* [Spark Core](core-migration-guide.html)
+* [SQL, Datasets, and DataFrame](sql-migration-guide.html)
+* [Structured Streaming](ss-migration-guide.html)
+* [MLlib (Machine Learning)](ml-migration-guide.html)
+* [PySpark (Python on Spark)](pyspark-migration-guide.html)
+* [SparkR (R on Spark)](sparkr-migration-guide.html)
diff --git a/docs/ml-advanced.md b/docs/ml-advanced.md
index 5787fe914ce7a..0e19bca92d19d 100644
--- a/docs/ml-advanced.md
+++ b/docs/ml-advanced.md
@@ -55,10 +55,10 @@ other first-order optimizations.
 Quasi-Newton](https://www.microsoft.com/en-us/research/wp-content/uploads/2007/01/andrew07scalable.pdf)
 (OWL-QN) is an extension of L-BFGS that can effectively handle L1 and elastic net regularization.
 
-L-BFGS is used as a solver for [LinearRegression](api/scala/index.html#org.apache.spark.ml.regression.LinearRegression),
-[LogisticRegression](api/scala/index.html#org.apache.spark.ml.classification.LogisticRegression),
-[AFTSurvivalRegression](api/scala/index.html#org.apache.spark.ml.regression.AFTSurvivalRegression)
-and [MultilayerPerceptronClassifier](api/scala/index.html#org.apache.spark.ml.classification.MultilayerPerceptronClassifier).
+L-BFGS is used as a solver for [LinearRegression](api/scala/org/apache/spark/ml/regression/LinearRegression.html),
+[LogisticRegression](api/scala/org/apache/spark/ml/classification/LogisticRegression.html),
+[AFTSurvivalRegression](api/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.html)
+and [MultilayerPerceptronClassifier](api/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.html).
 
 MLlib L-BFGS solver calls the corresponding implementation in [breeze](https://github.com/scalanlp/breeze/blob/master/math/src/main/scala/breeze/optimize/LBFGS.scala).
 
@@ -108,4 +108,4 @@ It solves certain optimization problems iteratively through the following proced
 
 Since it involves solving a weighted least squares (WLS) problem by `WeightedLeastSquares` in each iteration,
 it also requires the number of features to be no more than 4096.
-Currently IRLS is used as the default solver of [GeneralizedLinearRegression](api/scala/index.html#org.apache.spark.ml.regression.GeneralizedLinearRegression).
+Currently IRLS is used as the default solver of [GeneralizedLinearRegression](api/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.html).
diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index b83b4ba08a5fd..9d5388005e587 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -71,7 +71,7 @@ $\alpha$ and `regParam` corresponds to $\lambda$.
 
 <div data-lang="scala" markdown="1">
 
-More details on parameters can be found in the [Scala API documentation](api/scala/index.html#org.apache.spark.ml.classification.LogisticRegression).
+More details on parameters can be found in the [Scala API documentation](api/scala/org/apache/spark/ml/classification/LogisticRegression.html).
 
 {% include_example scala/org/apache/spark/examples/ml/LogisticRegressionWithElasticNetExample.scala %}
 </div>
@@ -109,12 +109,12 @@ only available on the driver.
 
 <div data-lang="scala" markdown="1">
 
-[`LogisticRegressionTrainingSummary`](api/scala/index.html#org.apache.spark.ml.classification.LogisticRegressionTrainingSummary)
+[`LogisticRegressionTrainingSummary`](api/scala/org/apache/spark/ml/classification/LogisticRegressionTrainingSummary.html)
 provides a summary for a
-[`LogisticRegressionModel`](api/scala/index.html#org.apache.spark.ml.classification.LogisticRegressionModel).
+[`LogisticRegressionModel`](api/scala/org/apache/spark/ml/classification/LogisticRegressionModel.html).
 In the case of binary classification, certain additional metrics are
 available, e.g. ROC curve. The binary summary can be accessed via the
-`binarySummary` method. See [`BinaryLogisticRegressionTrainingSummary`](api/scala/index.html#org.apache.spark.ml.classification.BinaryLogisticRegressionTrainingSummary).
+`binarySummary` method. See [`BinaryLogisticRegressionTrainingSummary`](api/scala/org/apache/spark/ml/classification/BinaryLogisticRegressionTrainingSummary.html).
 
 Continuing the earlier example:
 
@@ -216,7 +216,7 @@ We use two feature transformers to prepare the data; these help index categories
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-More details on parameters can be found in the [Scala API documentation](api/scala/index.html#org.apache.spark.ml.classification.DecisionTreeClassifier).
+More details on parameters can be found in the [Scala API documentation](api/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.html).
 
 {% include_example scala/org/apache/spark/examples/ml/DecisionTreeClassificationExample.scala %}
 
@@ -261,7 +261,7 @@ We use two feature transformers to prepare the data; these help index categories
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.classification.RandomForestClassifier) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/classification/RandomForestClassifier.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/RandomForestClassifierExample.scala %}
 </div>
@@ -302,7 +302,7 @@ We use two feature transformers to prepare the data; these help index categories
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.classification.GBTClassifier) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/classification/GBTClassifier.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/GradientBoostedTreeClassifierExample.scala %}
 </div>
@@ -358,7 +358,7 @@ MLPC employs backpropagation for learning the model. We use the logistic loss fu
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.classification.MultilayerPerceptronClassifier) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/MultilayerPerceptronClassifierExample.scala %}
 </div>
@@ -403,7 +403,7 @@ in Spark ML supports binary classification with linear SVM. Internally, it optim
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.classification.LinearSVC) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/classification/LinearSVC.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/LinearSVCExample.scala %}
 </div>
@@ -447,7 +447,7 @@ The example below demonstrates how to load the
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.classification.OneVsRest) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/classification/OneVsRest.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/OneVsRestExample.scala %}
 </div>
@@ -478,16 +478,18 @@ it computes the conditional probability distribution of each feature given each
 For prediction, it applies Bayes' theorem to compute the conditional probability distribution
 of each label given an observation.
 
-MLlib supports both [multinomial naive Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes)
-and [Bernoulli naive Bayes](http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html).
+MLlib supports [Multinomial naive Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes),
+[Complement naive Bayes](https://people.csail.mit.edu/jrennie/papers/icml03-nb.pdf),
+[Bernoulli naive Bayes](http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html)
+and [Gaussian naive Bayes](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Gaussian_naive_Bayes).
 
 *Input data*:
-These models are typically used for [document classification](http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html).
+These Multinomial, Complement and Bernoulli models are typically used for [document classification](http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html).
 Within that context, each observation is a document and each feature represents a term.
-A feature's value is the frequency of the term (in multinomial Naive Bayes) or
+A feature's value is the frequency of the term (in Multinomial or Complement Naive Bayes) or
 a zero or one indicating whether the term was found in the document (in Bernoulli Naive Bayes).
-Feature values must be *non-negative*. The model type is selected with an optional parameter
-"multinomial" or "bernoulli" with "multinomial" as the default.
+Feature values for Multinomial and Bernoulli models must be *non-negative*. The model type is selected with an optional parameter
+"multinomial", "complement", "bernoulli" or "gaussian", with "multinomial" as the default.
 For document classification, the input feature vectors should usually be sparse vectors.
 Since the training data is only used once, it is not necessary to cache it.
 
@@ -499,7 +501,7 @@ setting the parameter $\lambda$ (default to $1.0$).
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.classification.NaiveBayes) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/classification/NaiveBayes.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/NaiveBayesExample.scala %}
 </div>
@@ -528,6 +530,42 @@ Refer to the [R API docs](api/R/spark.naiveBayes.html) for more details.
 </div>
 
 
+## Factorization machines classifier
+
+For more background and more details about the implementation of factorization machines,
+refer to the [Factorization Machines section](ml-classification-regression.html#factorization-machines).
+
+**Examples**
+
+The following examples load a dataset in LibSVM format, split it into training and test sets,
+train on the first dataset, and then evaluate on the held-out test set.
+We scale features to be between 0 and 1 to prevent the exploding gradient problem.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/classification/FMClassifier.html) for more details.
+
+{% include_example scala/org/apache/spark/examples/ml/FMClassifierExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+Refer to the [Java API docs](api/java/org/apache/spark/ml/classification/FMClassifier.html) for more details.
+
+{% include_example java/org/apache/spark/examples/ml/JavaFMClassifierExample.java %}
+</div>
+
+<div data-lang="python" markdown="1">
+
+Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.classification.FMClassifier) for more details.
+
+{% include_example python/ml/fm_classifier_example.py %}
+</div>
+
+</div>
+
+
 # Regression
 
 ## Linear regression
@@ -547,7 +585,7 @@ regression model and extracting model summary statistics.
 
 <div data-lang="scala" markdown="1">
 
-More details on parameters can be found in the [Scala API documentation](api/scala/index.html#org.apache.spark.ml.regression.LinearRegression).
+More details on parameters can be found in the [Scala API documentation](api/scala/org/apache/spark/ml/regression/LinearRegression.html).
 
 {% include_example scala/org/apache/spark/examples/ml/LinearRegressionWithElasticNetExample.scala %}
 </div>
@@ -668,7 +706,7 @@ others.
     <tr>
       <td>Gamma</td>
       <td>Continuous</td>
-      <td>Inverse*, Idenity, Log</td>
+      <td>Inverse*, Identity, Log</td>
     </tr>
     <tr>
       <td>Tweedie</td>
@@ -688,7 +726,7 @@ function and extracting model summary statistics.
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.regression.GeneralizedLinearRegression) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/GeneralizedLinearRegressionExample.scala %}
 </div>
@@ -730,7 +768,7 @@ We use a feature transformer to index categorical features, adding metadata to t
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-More details on parameters can be found in the [Scala API documentation](api/scala/index.html#org.apache.spark.ml.regression.DecisionTreeRegressor).
+More details on parameters can be found in the [Scala API documentation](api/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.html).
 
 {% include_example scala/org/apache/spark/examples/ml/DecisionTreeRegressionExample.scala %}
 </div>
@@ -772,7 +810,7 @@ We use a feature transformer to index categorical features, adding metadata to t
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.regression.RandomForestRegressor) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/regression/RandomForestRegressor.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/RandomForestRegressorExample.scala %}
 </div>
@@ -813,7 +851,7 @@ be true in general.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.regression.GBTRegressor) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/regression/GBTRegressor.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/GradientBoostedTreeRegressorExample.scala %}
 </div>
@@ -907,7 +945,7 @@ The implementation matches the result from R's survival function
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.regression.AFTSurvivalRegression) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/AFTSurvivalRegressionExample.scala %}
 </div>
@@ -987,7 +1025,7 @@ is treated as piecewise linear function. The rules for prediction therefore are:
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [`IsotonicRegression` Scala docs](api/scala/index.html#org.apache.spark.ml.regression.IsotonicRegression) for details on the API.
+Refer to the [`IsotonicRegression` Scala docs](api/scala/org/apache/spark/ml/regression/IsotonicRegression.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/IsotonicRegressionExample.scala %}
 </div>
@@ -1013,6 +1051,43 @@ Refer to the [`IsotonicRegression` R API docs](api/R/spark.isoreg.html) for more
 
 </div>
 
+
+## Factorization machines regressor
+
+For more background and more details about the implementation of factorization machines,
+refer to the [Factorization Machines section](ml-classification-regression.html#factorization-machines).
+
+**Examples**
+
+The following examples load a dataset in LibSVM format, split it into training and test sets,
+train on the first dataset, and then evaluate on the held-out test set.
+We scale features to be between 0 and 1 to prevent the exploding gradient problem.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/regression/FMRegressor.html) for more details.
+
+{% include_example scala/org/apache/spark/examples/ml/FMRegressorExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+Refer to the [Java API docs](api/java/org/apache/spark/ml/regression/FMRegressor.html) for more details.
+
+{% include_example java/org/apache/spark/examples/ml/JavaFMRegressorExample.java %}
+</div>
+
+<div data-lang="python" markdown="1">
+
+Refer to the [Python API docs](api/python/pyspark.ml.html#pyspark.ml.regression.FMRegressor) for more details.
+
+{% include_example python/ml/fm_regressor_example.py %}
+</div>
+
+</div>
+
+
 # Linear methods
 
 We implement popular linear methods such as logistic
@@ -1042,6 +1117,40 @@ regression](http://en.wikipedia.org/wiki/Tikhonov_regularization) model.
 We implement Pipelines API for both linear regression and logistic
 regression with elastic net regularization.
 
+# Factorization Machines
+
+[Factorization Machines](https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf) are able to estimate interactions
+between features even in problems with huge sparsity (like advertising and recommendation system).
+The `spark.ml` implementation supports factorization machines for binary classification and for regression.
+
+Factorization machines formula is:
+
+$$
+\hat{y} = w_0 + \sum\limits^n_{i-1} w_i x_i +
+  \sum\limits^n_{i=1} \sum\limits^n_{j=i+1} \langle v_i, v_j \rangle x_i x_j
+$$
+
+The first two terms denote intercept and linear term (same as in linear regression),
+and the last term denotes pairwise interactions term. $$v_i$$ describes the i-th variable
+with k factors.
+
+FM can be used for regression and optimization criterion is mean square error. FM also can be used for
+binary classification through sigmoid function. The optimization criterion is logistic loss.
+
+The pairwise interactions can be reformulated:
+
+$$
+\sum\limits^n_{i=1} \sum\limits^n_{j=i+1} \langle v_i, v_j \rangle x_i x_j
+  = \frac{1}{2}\sum\limits^k_{f=1}
+    \left(\left( \sum\limits^n_{i=1}v_{i,f}x_i \right)^2 -
+    \sum\limits^n_{i=1}v_{i,f}^2x_i^2 \right)
+$$
+
+This equation has only linear complexity in both k and n - i.e. its computation is in $$O(kn)$$.
+
+In general, in order to prevent the exploding gradient problem, it is best to scale continuous features to be between 0 and 1,
+or bin the continuous features and one-hot encode them.
+
 # Decision trees
 
 [Decision trees](http://en.wikipedia.org/wiki/Decision_tree_learning)
diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index 2775d0421ccca..4574567fa9d50 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -85,7 +85,7 @@ called [kmeans||](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf).
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.clustering.KMeans) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/clustering/KMeans.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/KMeansExample.scala %}
 </div>
@@ -123,7 +123,7 @@ and generates a `LDAModel` as the base model. Expert users may cast a `LDAModel`
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.clustering.LDA) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/clustering/LDA.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/LDAExample.scala %}
 </div>
@@ -166,7 +166,7 @@ Bisecting K-means can often be much faster than regular K-means, but it will gen
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.clustering.BisectingKMeans) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/clustering/BisectingKMeans.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala %}
 </div>
@@ -255,7 +255,7 @@ model.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.clustering.GaussianMixture) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/clustering/GaussianMixture.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/GaussianMixtureExample.scala %}
 </div>
@@ -302,7 +302,7 @@ using truncated power iteration on a normalized pair-wise similarity matrix of t
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.clustering.PowerIterationClustering) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/clustering/PowerIterationClustering.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/PowerIterationClusteringExample.scala %}
 </div>
diff --git a/docs/ml-collaborative-filtering.md b/docs/ml-collaborative-filtering.md
index e6e596bed110b..6c41efd5cc306 100644
--- a/docs/ml-collaborative-filtering.md
+++ b/docs/ml-collaborative-filtering.md
@@ -115,7 +115,7 @@ explicit (`implicitPrefs` is `false`).
 We evaluate the recommendation model by measuring the root-mean-square error of
 rating prediction.
 
-Refer to the [`ALS` Scala docs](api/scala/index.html#org.apache.spark.ml.recommendation.ALS)
+Refer to the [`ALS` Scala docs](api/scala/org/apache/spark/ml/recommendation/ALS.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/ALSExample.scala %}
diff --git a/docs/ml-datasource.md b/docs/ml-datasource.md
index 5dc2d057a9163..0f2f5f482ec50 100644
--- a/docs/ml-datasource.md
+++ b/docs/ml-datasource.md
@@ -42,7 +42,7 @@ The schema of the `image` column is:
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[`ImageDataSource`](api/scala/index.html#org.apache.spark.ml.source.image.ImageDataSource)
+[`ImageDataSource`](api/scala/org/apache/spark/ml/source/image/ImageDataSource.html)
 implements a Spark SQL data source API for loading image data as a DataFrame.
 
 {% highlight scala %}
@@ -133,7 +133,7 @@ The schemas of the columns are:
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[`LibSVMDataSource`](api/scala/index.html#org.apache.spark.ml.source.libsvm.LibSVMDataSource)
+[`LibSVMDataSource`](api/scala/org/apache/spark/ml/source/libsvm/LibSVMDataSource.html)
 implements a Spark SQL data source API for loading `LIBSVM` data as a DataFrame.
 
 {% highlight scala %}
diff --git a/docs/ml-features.md b/docs/ml-features.md
index 92d2f3d0b418d..9c05fd5fa1ce2 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -96,8 +96,8 @@ when using text as features.  Our feature vectors could then be passed to a lear
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [HashingTF Scala docs](api/scala/index.html#org.apache.spark.ml.feature.HashingTF) and
-the [IDF Scala docs](api/scala/index.html#org.apache.spark.ml.feature.IDF) for more details on the API.
+Refer to the [HashingTF Scala docs](api/scala/org/apache/spark/ml/feature/HashingTF.html) and
+the [IDF Scala docs](api/scala/org/apache/spark/ml/feature/IDF.html) for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/TfIdfExample.scala %}
 </div>
@@ -135,7 +135,7 @@ In the following code segment, we start with a set of documents, each of which i
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Word2Vec Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Word2Vec)
+Refer to the [Word2Vec Scala docs](api/scala/org/apache/spark/ml/feature/Word2Vec.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/Word2VecExample.scala %}
@@ -200,8 +200,8 @@ Each vector represents the token counts of the document over the vocabulary.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [CountVectorizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.CountVectorizer)
-and the [CountVectorizerModel Scala docs](api/scala/index.html#org.apache.spark.ml.feature.CountVectorizerModel)
+Refer to the [CountVectorizer Scala docs](api/scala/org/apache/spark/ml/feature/CountVectorizer.html)
+and the [CountVectorizerModel Scala docs](api/scala/org/apache/spark/ml/feature/CountVectorizerModel.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/CountVectorizerExample.scala %}
@@ -286,7 +286,7 @@ The resulting feature vectors could then be passed to a learning algorithm.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [FeatureHasher Scala docs](api/scala/index.html#org.apache.spark.ml.feature.FeatureHasher)
+Refer to the [FeatureHasher Scala docs](api/scala/org/apache/spark/ml/feature/FeatureHasher.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/FeatureHasherExample.scala %}
@@ -313,9 +313,9 @@ for more details on the API.
 
 ## Tokenizer
 
-[Tokenization](http://en.wikipedia.org/wiki/Lexical_analysis#Tokenization) is the process of taking text (such as a sentence) and breaking it into individual terms (usually words).  A simple [Tokenizer](api/scala/index.html#org.apache.spark.ml.feature.Tokenizer) class provides this functionality.  The example below shows how to split sentences into sequences of words.
+[Tokenization](http://en.wikipedia.org/wiki/Lexical_analysis#Tokenization) is the process of taking text (such as a sentence) and breaking it into individual terms (usually words).  A simple [Tokenizer](api/scala/org/apache/spark/ml/feature/Tokenizer.html) class provides this functionality.  The example below shows how to split sentences into sequences of words.
 
-[RegexTokenizer](api/scala/index.html#org.apache.spark.ml.feature.RegexTokenizer) allows more
+[RegexTokenizer](api/scala/org/apache/spark/ml/feature/RegexTokenizer.html) allows more
  advanced tokenization based on regular expression (regex) matching.
  By default, the parameter "pattern" (regex, default: `"\\s+"`) is used as delimiters to split the input text.
  Alternatively, users can set parameter "gaps" to false indicating the regex "pattern" denotes
@@ -326,8 +326,8 @@ for more details on the API.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Tokenizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Tokenizer)
-and the [RegexTokenizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.RegexTokenizer)
+Refer to the [Tokenizer Scala docs](api/scala/org/apache/spark/ml/feature/Tokenizer.html)
+and the [RegexTokenizer Scala docs](api/scala/org/apache/spark/ml/feature/RegexTokenizer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/TokenizerExample.scala %}
@@ -395,7 +395,7 @@ filtered out.
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [StopWordsRemover Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StopWordsRemover)
+Refer to the [StopWordsRemover Scala docs](api/scala/org/apache/spark/ml/feature/StopWordsRemover.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala %}
@@ -430,7 +430,7 @@ An [n-gram](https://en.wikipedia.org/wiki/N-gram) is a sequence of $n$ tokens (t
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [NGram Scala docs](api/scala/index.html#org.apache.spark.ml.feature.NGram)
+Refer to the [NGram Scala docs](api/scala/org/apache/spark/ml/feature/NGram.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/NGramExample.scala %}
@@ -468,7 +468,7 @@ for `inputCol`.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Binarizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Binarizer)
+Refer to the [Binarizer Scala docs](api/scala/org/apache/spark/ml/feature/Binarizer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/BinarizerExample.scala %}
@@ -493,14 +493,14 @@ for more details on the API.
 
 ## PCA
 
-[PCA](http://en.wikipedia.org/wiki/Principal_component_analysis) is a statistical procedure that uses an orthogonal transformation to convert a set of observations of possibly correlated variables into a set of values of linearly uncorrelated variables called principal components. A [PCA](api/scala/index.html#org.apache.spark.ml.feature.PCA) class trains a model to project vectors to a low-dimensional space using PCA. The example below shows how to project 5-dimensional feature vectors into 3-dimensional principal components.
+[PCA](http://en.wikipedia.org/wiki/Principal_component_analysis) is a statistical procedure that uses an orthogonal transformation to convert a set of observations of possibly correlated variables into a set of values of linearly uncorrelated variables called principal components. A [PCA](api/scala/org/apache/spark/ml/feature/PCA.html) class trains a model to project vectors to a low-dimensional space using PCA. The example below shows how to project 5-dimensional feature vectors into 3-dimensional principal components.
 
 **Examples**
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [PCA Scala docs](api/scala/index.html#org.apache.spark.ml.feature.PCA)
+Refer to the [PCA Scala docs](api/scala/org/apache/spark/ml/feature/PCA.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/PCAExample.scala %}
@@ -525,14 +525,14 @@ for more details on the API.
 
 ## PolynomialExpansion
 
-[Polynomial expansion](http://en.wikipedia.org/wiki/Polynomial_expansion) is the process of expanding your features into a polynomial space, which is formulated by an n-degree combination of original dimensions. A [PolynomialExpansion](api/scala/index.html#org.apache.spark.ml.feature.PolynomialExpansion) class provides this functionality.  The example below shows how to expand your features into a 3-degree polynomial space.
+[Polynomial expansion](http://en.wikipedia.org/wiki/Polynomial_expansion) is the process of expanding your features into a polynomial space, which is formulated by an n-degree combination of original dimensions. A [PolynomialExpansion](api/scala/org/apache/spark/ml/feature/PolynomialExpansion.html) class provides this functionality.  The example below shows how to expand your features into a 3-degree polynomial space.
 
 **Examples**
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [PolynomialExpansion Scala docs](api/scala/index.html#org.apache.spark.ml.feature.PolynomialExpansion)
+Refer to the [PolynomialExpansion Scala docs](api/scala/org/apache/spark/ml/feature/PolynomialExpansion.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala %}
@@ -561,7 +561,7 @@ The [Discrete Cosine
 Transform](https://en.wikipedia.org/wiki/Discrete_cosine_transform)
 transforms a length $N$ real-valued sequence in the time domain into
 another length $N$ real-valued sequence in the frequency domain. A
-[DCT](api/scala/index.html#org.apache.spark.ml.feature.DCT) class
+[DCT](api/scala/org/apache/spark/ml/feature/DCT.html) class
 provides this functionality, implementing the
 [DCT-II](https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II)
 and scaling the result by $1/\sqrt{2}$ such that the representing matrix
@@ -574,7 +574,7 @@ $0$th DCT coefficient and _not_ the $N/2$th).
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [DCT Scala docs](api/scala/index.html#org.apache.spark.ml.feature.DCT)
+Refer to the [DCT Scala docs](api/scala/org/apache/spark/ml/feature/DCT.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/DCTExample.scala %}
@@ -704,7 +704,7 @@ Notice that the rows containing "d" or "e" are mapped to index "3.0"
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [StringIndexer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StringIndexer)
+Refer to the [StringIndexer Scala docs](api/scala/org/apache/spark/ml/feature/StringIndexer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/StringIndexerExample.scala %}
@@ -770,7 +770,7 @@ labels (they will be inferred from the columns' metadata):
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [IndexToString Scala docs](api/scala/index.html#org.apache.spark.ml.feature.IndexToString)
+Refer to the [IndexToString Scala docs](api/scala/org/apache/spark/ml/feature/IndexToString.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/IndexToStringExample.scala %}
@@ -809,7 +809,7 @@ for more details on the API.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [OneHotEncoder Scala docs](api/scala/index.html#org.apache.spark.ml.feature.OneHotEncoder) for more details on the API.
+Refer to the [OneHotEncoder Scala docs](api/scala/org/apache/spark/ml/feature/OneHotEncoder.html) for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala %}
 </div>
@@ -835,7 +835,7 @@ Refer to the [OneHotEncoder Python docs](api/python/pyspark.ml.html#pyspark.ml.f
 `VectorIndexer` helps index categorical features in datasets of `Vector`s.
 It can both automatically decide which features are categorical and convert original values to category indices.  Specifically, it does the following:
 
-1. Take an input column of type [Vector](api/scala/index.html#org.apache.spark.ml.linalg.Vector) and a parameter `maxCategories`.
+1. Take an input column of type [Vector](api/scala/org/apache/spark/ml/linalg/Vector.html) and a parameter `maxCategories`.
 2. Decide which features should be categorical based on the number of distinct values, where features with at most `maxCategories` are declared categorical.
 3. Compute 0-based category indices for each categorical feature.
 4. Index categorical features and transform original feature values to indices.
@@ -849,7 +849,7 @@ In the example below, we read in a dataset of labeled points and then use `Vecto
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [VectorIndexer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.VectorIndexer)
+Refer to the [VectorIndexer Scala docs](api/scala/org/apache/spark/ml/feature/VectorIndexer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/VectorIndexerExample.scala %}
@@ -910,7 +910,7 @@ then `interactedCol` as the output column contains:
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Interaction Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Interaction)
+Refer to the [Interaction Scala docs](api/scala/org/apache/spark/ml/feature/Interaction.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/InteractionExample.scala %}
@@ -944,7 +944,7 @@ The following example demonstrates how to load a dataset in libsvm format and th
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Normalizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Normalizer)
+Refer to the [Normalizer Scala docs](api/scala/org/apache/spark/ml/feature/Normalizer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/NormalizerExample.scala %}
@@ -986,7 +986,7 @@ The following example demonstrates how to load a dataset in libsvm format and th
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [StandardScaler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.StandardScaler)
+Refer to the [StandardScaler Scala docs](api/scala/org/apache/spark/ml/feature/StandardScaler.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/StandardScalerExample.scala %}
@@ -1030,7 +1030,7 @@ The following example demonstrates how to load a dataset in libsvm format and th
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [RobustScaler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.RobustScaler)
+Refer to the [RobustScaler Scala docs](api/scala/org/apache/spark/ml/feature/RobustScaler.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/RobustScalerExample.scala %}
@@ -1078,8 +1078,8 @@ The following example demonstrates how to load a dataset in libsvm format and th
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [MinMaxScaler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.MinMaxScaler)
-and the [MinMaxScalerModel Scala docs](api/scala/index.html#org.apache.spark.ml.feature.MinMaxScalerModel)
+Refer to the [MinMaxScaler Scala docs](api/scala/org/apache/spark/ml/feature/MinMaxScaler.html)
+and the [MinMaxScalerModel Scala docs](api/scala/org/apache/spark/ml/feature/MinMaxScalerModel.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala %}
@@ -1121,8 +1121,8 @@ The following example demonstrates how to load a dataset in libsvm format and th
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [MaxAbsScaler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.MaxAbsScaler)
-and the [MaxAbsScalerModel Scala docs](api/scala/index.html#org.apache.spark.ml.feature.MaxAbsScalerModel)
+Refer to the [MaxAbsScaler Scala docs](api/scala/org/apache/spark/ml/feature/MaxAbsScaler.html)
+and the [MaxAbsScalerModel Scala docs](api/scala/org/apache/spark/ml/feature/MaxAbsScalerModel.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/MaxAbsScalerExample.scala %}
@@ -1157,7 +1157,7 @@ Note that if you have no idea of the upper and lower bounds of the targeted colu
 
 Note also that the splits that you provided have to be in strictly increasing order, i.e. `s0 < s1 < s2 < ... < sn`.
 
-More details can be found in the API docs for [Bucketizer](api/scala/index.html#org.apache.spark.ml.feature.Bucketizer).
+More details can be found in the API docs for [Bucketizer](api/scala/org/apache/spark/ml/feature/Bucketizer.html).
 
 **Examples**
 
@@ -1166,7 +1166,7 @@ The following example demonstrates how to bucketize a column of `Double`s into a
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Bucketizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Bucketizer)
+Refer to the [Bucketizer Scala docs](api/scala/org/apache/spark/ml/feature/Bucketizer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/BucketizerExample.scala %}
@@ -1216,7 +1216,7 @@ This example below demonstrates how to transform vectors using a transforming ve
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [ElementwiseProduct Scala docs](api/scala/index.html#org.apache.spark.ml.feature.ElementwiseProduct)
+Refer to the [ElementwiseProduct Scala docs](api/scala/org/apache/spark/ml/feature/ElementwiseProduct.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/ElementwiseProductExample.scala %}
@@ -1276,7 +1276,7 @@ This is the output of the `SQLTransformer` with statement `"SELECT *, (v1 + v2)
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [SQLTransformer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.SQLTransformer)
+Refer to the [SQLTransformer Scala docs](api/scala/org/apache/spark/ml/feature/SQLTransformer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/SQLTransformerExample.scala %}
@@ -1336,7 +1336,7 @@ output column to `features`, after transformation we should get the following Da
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [VectorAssembler Scala docs](api/scala/index.html#org.apache.spark.ml.feature.VectorAssembler)
+Refer to the [VectorAssembler Scala docs](api/scala/org/apache/spark/ml/feature/VectorAssembler.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala %}
@@ -1387,7 +1387,7 @@ to avoid this kind of inconsistent state.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [VectorSizeHint Scala docs](api/scala/index.html#org.apache.spark.ml.feature.VectorSizeHint)
+Refer to the [VectorSizeHint Scala docs](api/scala/org/apache/spark/ml/feature/VectorSizeHint.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/VectorSizeHintExample.scala %}
@@ -1426,7 +1426,7 @@ NaN values, they will be handled specially and placed into their own bucket, for
 are used, then non-NaN data will be put into buckets[0-3], but NaNs will be counted in a special bucket[4].
 
 Algorithm: The bin ranges are chosen using an approximate algorithm (see the documentation for
-[approxQuantile](api/scala/index.html#org.apache.spark.sql.DataFrameStatFunctions) for a
+[approxQuantile](api/scala/org/apache/spark/sql/DataFrameStatFunctions.html) for a
 detailed description). The precision of the approximation can be controlled with the
 `relativeError` parameter. When set to zero, exact quantiles are calculated
 (**Note:** Computing exact quantiles is an expensive operation). The lower and upper bin bounds
@@ -1470,7 +1470,7 @@ a categorical one. Given `numBuckets = 3`, we should get the following DataFrame
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [QuantileDiscretizer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.QuantileDiscretizer)
+Refer to the [QuantileDiscretizer Scala docs](api/scala/org/apache/spark/ml/feature/QuantileDiscretizer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala %}
@@ -1539,7 +1539,7 @@ the relevant column.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [Imputer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Imputer)
+Refer to the [Imputer Scala docs](api/scala/org/apache/spark/ml/feature/Imputer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/ImputerExample.scala %}
@@ -1620,7 +1620,7 @@ Suppose also that we have potential input attributes for the `userFeatures`, i.e
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [VectorSlicer Scala docs](api/scala/index.html#org.apache.spark.ml.feature.VectorSlicer)
+Refer to the [VectorSlicer Scala docs](api/scala/org/apache/spark/ml/feature/VectorSlicer.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/VectorSlicerExample.scala %}
@@ -1706,7 +1706,7 @@ id | country | hour | clicked | features         | label
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [RFormula Scala docs](api/scala/index.html#org.apache.spark.ml.feature.RFormula)
+Refer to the [RFormula Scala docs](api/scala/org/apache/spark/ml/feature/RFormula.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/RFormulaExample.scala %}
@@ -1770,7 +1770,7 @@ id | features              | clicked | selectedFeatures
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [ChiSqSelector Scala docs](api/scala/index.html#org.apache.spark.ml.feature.ChiSqSelector)
+Refer to the [ChiSqSelector Scala docs](api/scala/org/apache/spark/ml/feature/ChiSqSelector.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala %}
@@ -1856,7 +1856,7 @@ Bucketed Random Projection accepts arbitrary vectors as input features, and supp
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [BucketedRandomProjectionLSH Scala docs](api/scala/index.html#org.apache.spark.ml.feature.BucketedRandomProjectionLSH)
+Refer to the [BucketedRandomProjectionLSH Scala docs](api/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/BucketedRandomProjectionLSHExample.scala %}
@@ -1897,7 +1897,7 @@ The input sets for MinHash are represented as binary vectors, where the vector i
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [MinHashLSH Scala docs](api/scala/index.html#org.apache.spark.ml.feature.MinHashLSH)
+Refer to the [MinHashLSH Scala docs](api/scala/org/apache/spark/ml/feature/MinHashLSH.html)
 for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/MinHashLSHExample.scala %}
diff --git a/docs/ml-frequent-pattern-mining.md b/docs/ml-frequent-pattern-mining.md
index a243188603997..42d7e50357391 100644
--- a/docs/ml-frequent-pattern-mining.md
+++ b/docs/ml-frequent-pattern-mining.md
@@ -75,7 +75,7 @@ The `FPGrowthModel` provides:
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.fpm.FPGrowth) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/fpm/FPGrowth.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/FPGrowthExample.scala %}
 </div>
@@ -128,7 +128,7 @@ pattern mining problem.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [Scala API docs](api/scala/index.html#org.apache.spark.ml.fpm.PrefixSpan) for more details.
+Refer to the [Scala API docs](api/scala/org/apache/spark/ml/fpm/PrefixSpan.html) for more details.
 
 {% include_example scala/org/apache/spark/examples/ml/PrefixSpanExample.scala %}
 </div>
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 4661d6cd87c04..7b4fa4f651e64 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -113,68 +113,7 @@ transforming multiple columns.
 * Robust linear regression with Huber loss
 ([SPARK-3181](https://issues.apache.org/jira/browse/SPARK-3181)).
 
-# Migration guide
+# Migration Guide
 
-MLlib is under active development.
-The APIs marked `Experimental`/`DeveloperApi` may change in future releases,
-and the migration guide below will explain all changes between releases.
+The migration guide is now archived [on this page](ml-migration-guide.html).
 
-## From 2.4 to 3.0
-
-### Breaking changes
-
-* `OneHotEncoder` which is deprecated in 2.3, is removed in 3.0 and `OneHotEncoderEstimator` is now renamed to `OneHotEncoder`.
-
-### Changes of behavior
-
-* [SPARK-11215](https://issues.apache.org/jira/browse/SPARK-11215):
- In Spark 2.4 and previous versions, when specifying `frequencyDesc` or `frequencyAsc` as
- `stringOrderType` param in `StringIndexer`, in case of equal frequency, the order of
- strings is undefined. Since Spark 3.0, the strings with equal frequency are further
- sorted by alphabet. And since Spark 3.0, `StringIndexer` supports encoding multiple
- columns.
-
-## From 2.2 to 2.3
-
-### Breaking changes
-
-* The class and trait hierarchy for logistic regression model summaries was changed to be cleaner
-and better accommodate the addition of the multi-class summary. This is a breaking change for user
-code that casts a `LogisticRegressionTrainingSummary` to a
-`BinaryLogisticRegressionTrainingSummary`. Users should instead use the `model.binarySummary`
-method. See [SPARK-17139](https://issues.apache.org/jira/browse/SPARK-17139) for more detail 
-(_note_ this is an `Experimental` API). This _does not_ affect the Python `summary` method, which
-will still work correctly for both multinomial and binary cases.
-
-### Deprecations and changes of behavior
-
-**Deprecations**
-
-* `OneHotEncoder` has been deprecated and will be removed in `3.0`. It has been replaced by the
-new [`OneHotEncoderEstimator`](ml-features.html#onehotencoderestimator)
-(see [SPARK-13030](https://issues.apache.org/jira/browse/SPARK-13030)). **Note** that
-`OneHotEncoderEstimator` will be renamed to `OneHotEncoder` in `3.0` (but
-`OneHotEncoderEstimator` will be kept as an alias).
-
-**Changes of behavior**
-
-* [SPARK-21027](https://issues.apache.org/jira/browse/SPARK-21027):
- The default parallelism used in `OneVsRest` is now set to 1 (i.e. serial). In `2.2` and
- earlier versions, the level of parallelism was set to the default threadpool size in Scala.
-* [SPARK-22156](https://issues.apache.org/jira/browse/SPARK-22156):
- The learning rate update for `Word2Vec` was incorrect when `numIterations` was set greater than
- `1`. This will cause training results to be different between `2.3` and earlier versions.
-* [SPARK-21681](https://issues.apache.org/jira/browse/SPARK-21681):
- Fixed an edge case bug in multinomial logistic regression that resulted in incorrect coefficients
- when some features had zero variance.
-* [SPARK-16957](https://issues.apache.org/jira/browse/SPARK-16957):
- Tree algorithms now use mid-points for split values. This may change results from model training.
-* [SPARK-14657](https://issues.apache.org/jira/browse/SPARK-14657):
- Fixed an issue where the features generated by `RFormula` without an intercept were inconsistent
- with the output in R. This may change results from model training in this scenario.
-  
-## Previous Spark versions
-
-Earlier migration guides are archived [on this page](ml-migration-guides.html).
-
----
diff --git a/docs/ml-migration-guides.md b/docs/ml-migration-guide.md
similarity index 73%
rename from docs/ml-migration-guides.md
rename to docs/ml-migration-guide.md
index 99edd9bd69efa..f3cd762b6f2ac 100644
--- a/docs/ml-migration-guides.md
+++ b/docs/ml-migration-guide.md
@@ -1,8 +1,7 @@
 ---
 layout: global
-title: Old Migration Guides - MLlib
-displayTitle: Old Migration Guides - MLlib
-description: MLlib migration guides from before Spark SPARK_VERSION_SHORT
+title: "Migration Guide: MLlib (Machine Learning)"
+displayTitle: "Migration Guide: MLlib (Machine Learning)"
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -20,15 +19,81 @@ license: |
   limitations under the License.
 ---
 
-The migration guide for the current Spark version is kept on the [MLlib Guide main page](ml-guide.html#migration-guide).
+* Table of contents
+{:toc}
 
-## From 2.1 to 2.2
+Note that this migration guide describes the items specific to MLlib.
+Many items of SQL migration can be applied when migrating MLlib to higher versions for DataFrame-based APIs.
+Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
+
+## Upgrading from MLlib 2.4 to 3.0
+
+### Breaking changes
+{:.no_toc}
+
+* `OneHotEncoder` which is deprecated in 2.3, is removed in 3.0 and `OneHotEncoderEstimator` is now renamed to `OneHotEncoder`.
+* `org.apache.spark.ml.image.ImageSchema.readImages` which is deprecated in 2.3, is removed in 3.0, use `spark.read.format('image')` instead.
+
+### Changes of behavior
+{:.no_toc}
+
+* [SPARK-11215](https://issues.apache.org/jira/browse/SPARK-11215):
+ In Spark 2.4 and previous versions, when specifying `frequencyDesc` or `frequencyAsc` as
+ `stringOrderType` param in `StringIndexer`, in case of equal frequency, the order of
+ strings is undefined. Since Spark 3.0, the strings with equal frequency are further
+ sorted by alphabet. And since Spark 3.0, `StringIndexer` supports encoding multiple
+ columns.
+
+## Upgrading from MLlib 2.2 to 2.3
+
+### Breaking changes
+{:.no_toc}
+
+* The class and trait hierarchy for logistic regression model summaries was changed to be cleaner
+and better accommodate the addition of the multi-class summary. This is a breaking change for user
+code that casts a `LogisticRegressionTrainingSummary` to a
+`BinaryLogisticRegressionTrainingSummary`. Users should instead use the `model.binarySummary`
+method. See [SPARK-17139](https://issues.apache.org/jira/browse/SPARK-17139) for more detail
+(_note_ this is an `Experimental` API). This _does not_ affect the Python `summary` method, which
+will still work correctly for both multinomial and binary cases.
+
+### Deprecations and changes of behavior
+{:.no_toc}
+
+**Deprecations**
+
+* `OneHotEncoder` has been deprecated and will be removed in `3.0`. It has been replaced by the
+new [`OneHotEncoderEstimator`](ml-features.html#onehotencoderestimator)
+(see [SPARK-13030](https://issues.apache.org/jira/browse/SPARK-13030)). **Note** that
+`OneHotEncoderEstimator` will be renamed to `OneHotEncoder` in `3.0` (but
+`OneHotEncoderEstimator` will be kept as an alias).
+
+**Changes of behavior**
+
+* [SPARK-21027](https://issues.apache.org/jira/browse/SPARK-21027):
+ The default parallelism used in `OneVsRest` is now set to 1 (i.e. serial). In `2.2` and
+ earlier versions, the level of parallelism was set to the default threadpool size in Scala.
+* [SPARK-22156](https://issues.apache.org/jira/browse/SPARK-22156):
+ The learning rate update for `Word2Vec` was incorrect when `numIterations` was set greater than
+ `1`. This will cause training results to be different between `2.3` and earlier versions.
+* [SPARK-21681](https://issues.apache.org/jira/browse/SPARK-21681):
+ Fixed an edge case bug in multinomial logistic regression that resulted in incorrect coefficients
+ when some features had zero variance.
+* [SPARK-16957](https://issues.apache.org/jira/browse/SPARK-16957):
+ Tree algorithms now use mid-points for split values. This may change results from model training.
+* [SPARK-14657](https://issues.apache.org/jira/browse/SPARK-14657):
+ Fixed an issue where the features generated by `RFormula` without an intercept were inconsistent
+ with the output in R. This may change results from model training in this scenario.
+
+## Upgrading from MLlib 2.1 to 2.2
 
 ### Breaking changes
+{:.no_toc}
 
 There are no breaking changes.
 
 ### Deprecations and changes of behavior
+{:.no_toc}
 
 **Deprecations**
 
@@ -45,9 +110,10 @@ There are no deprecations.
  `StringIndexer` now handles `NULL` values in the same way as unseen values. Previously an exception
  would always be thrown regardless of the setting of the `handleInvalid` parameter.
  
-## From 2.0 to 2.1
+## Upgrading from MLlib 2.0 to 2.1
 
 ### Breaking changes
+{:.no_toc}
  
 **Deprecated methods removed**
 
@@ -59,6 +125,7 @@ There are no deprecations.
 * `validateParams` in `Evaluator`
 
 ### Deprecations and changes of behavior
+{:.no_toc}
 
 **Deprecations**
 
@@ -74,9 +141,10 @@ There are no deprecations.
 * [SPARK-17389](https://issues.apache.org/jira/browse/SPARK-17389):
  `KMeans` reduces the default number of steps from 5 to 2 for the k-means|| initialization mode.
 
-## From 1.6 to 2.0
+## Upgrading from MLlib 1.6 to 2.0
 
 ### Breaking changes
+{:.no_toc}
 
 There were several breaking changes in Spark 2.0, which are outlined below.
 
@@ -119,7 +187,7 @@ val mlVec: org.apache.spark.ml.linalg.Vector = mllibVec.asML
 val mlMat: org.apache.spark.ml.linalg.Matrix = mllibMat.asML
 {% endhighlight %}
 
-Refer to the [`MLUtils` Scala docs](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) for further detail.
+Refer to the [`MLUtils` Scala docs](api/scala/org/apache/spark/mllib/util/MLUtils$.html) for further detail.
 </div>
 
 <div data-lang="java" markdown="1">
@@ -171,6 +239,7 @@ Several deprecated methods were removed in the `spark.mllib` and `spark.ml` pack
 A full list of breaking changes can be found at [SPARK-14810](https://issues.apache.org/jira/browse/SPARK-14810).
 
 ### Deprecations and changes of behavior
+{:.no_toc}
 
 **Deprecations**
 
@@ -186,7 +255,7 @@ Deprecations in the `spark.mllib` and `spark.ml` packages include:
  We move all functionality in overridden methods to the corresponding `transformSchema`.
 * [SPARK-14829](https://issues.apache.org/jira/browse/SPARK-14829):
  In `spark.mllib` package, `LinearRegressionWithSGD`, `LassoWithSGD`, `RidgeRegressionWithSGD` and `LogisticRegressionWithSGD` have been deprecated.
- We encourage users to use `spark.ml.regression.LinearRegresson` and `spark.ml.classification.LogisticRegresson`.
+ We encourage users to use `spark.ml.regression.LinearRegression` and `spark.ml.classification.LogisticRegression`.
 * [SPARK-14900](https://issues.apache.org/jira/browse/SPARK-14900):
  In `spark.mllib.evaluation.MulticlassMetrics`, the parameters `precision`, `recall` and `fMeasure` have been deprecated in favor of `accuracy`.
 * [SPARK-15644](https://issues.apache.org/jira/browse/SPARK-15644):
@@ -198,12 +267,12 @@ Deprecations in the `spark.mllib` and `spark.ml` packages include:
 Changes of behavior in the `spark.mllib` and `spark.ml` packages include:
 
 * [SPARK-7780](https://issues.apache.org/jira/browse/SPARK-7780):
- `spark.mllib.classification.LogisticRegressionWithLBFGS` directly calls `spark.ml.classification.LogisticRegresson` for binary classification now.
+ `spark.mllib.classification.LogisticRegressionWithLBFGS` directly calls `spark.ml.classification.LogisticRegression` for binary classification now.
  This will introduce the following behavior changes for `spark.mllib.classification.LogisticRegressionWithLBFGS`:
     * The intercept will not be regularized when training binary classification model with L1/L2 Updater.
     * If users set without regularization, training with or without feature scaling will return the same solution by the same convergence rate.
 * [SPARK-13429](https://issues.apache.org/jira/browse/SPARK-13429):
- In order to provide better and consistent result with `spark.ml.classification.LogisticRegresson`,
+ In order to provide better and consistent result with `spark.ml.classification.LogisticRegression`,
  the default value of `spark.mllib.classification.LogisticRegressionWithLBFGS`: `convergenceTol` has been changed from 1E-4 to 1E-6.
 * [SPARK-12363](https://issues.apache.org/jira/browse/SPARK-12363):
  Fix a bug of `PowerIterationClustering` which will likely change its result.
@@ -221,7 +290,7 @@ Changes of behavior in the `spark.mllib` and `spark.ml` packages include:
  `QuantileDiscretizer` now uses `spark.sql.DataFrameStatFunctions.approxQuantile` to find splits (previously used custom sampling logic).
  The output buckets will differ for same input data and params.
 
-## From 1.5 to 1.6
+## Upgrading from MLlib 1.5 to 1.6
 
 There are no breaking API changes in the `spark.mllib` or `spark.ml` packages, but there are
 deprecations and changes of behavior.
@@ -248,7 +317,7 @@ Changes of behavior:
  tokenizing. Now, it converts to lowercase by default, with an option not to. This matches the
  behavior of the simpler `Tokenizer` transformer.
 
-## From 1.4 to 1.5
+## Upgrading from MLlib 1.4 to 1.5
 
 In the `spark.mllib` package, there are no breaking API changes but several behavior changes:
 
@@ -267,14 +336,14 @@ In the `spark.ml` package, there exists one breaking API change and one behavior
 * [SPARK-10097](https://issues.apache.org/jira/browse/SPARK-10097): `Evaluator.isLargerBetter` is
   added to indicate metric ordering. Metrics like RMSE no longer flip signs as in 1.4.
 
-## From 1.3 to 1.4
+## Upgrading from MLlib 1.3 to 1.4
 
 In the `spark.mllib` package, there were several breaking changes, but all in `DeveloperApi` or `Experimental` APIs:
 
 * Gradient-Boosted Trees
-    * *(Breaking change)* The signature of the [`Loss.gradient`](api/scala/index.html#org.apache.spark.mllib.tree.loss.Loss) method was changed.  This is only an issues for users who wrote their own losses for GBTs.
-    * *(Breaking change)* The `apply` and `copy` methods for the case class [`BoostingStrategy`](api/scala/index.html#org.apache.spark.mllib.tree.configuration.BoostingStrategy) have been changed because of a modification to the case class fields.  This could be an issue for users who use `BoostingStrategy` to set GBT parameters.
-* *(Breaking change)* The return value of [`LDA.run`](api/scala/index.html#org.apache.spark.mllib.clustering.LDA) has changed.  It now returns an abstract class `LDAModel` instead of the concrete class `DistributedLDAModel`.  The object of type `LDAModel` can still be cast to the appropriate concrete type, which depends on the optimization algorithm.
+    * *(Breaking change)* The signature of the [`Loss.gradient`](api/scala/org/apache/spark/mllib/tree/loss/Loss.html) method was changed.  This is only an issues for users who wrote their own losses for GBTs.
+    * *(Breaking change)* The `apply` and `copy` methods for the case class [`BoostingStrategy`](api/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.html) have been changed because of a modification to the case class fields.  This could be an issue for users who use `BoostingStrategy` to set GBT parameters.
+* *(Breaking change)* The return value of [`LDA.run`](api/scala/org/apache/spark/mllib/clustering/LDA.html) has changed.  It now returns an abstract class `LDAModel` instead of the concrete class `DistributedLDAModel`.  The object of type `LDAModel` can still be cast to the appropriate concrete type, which depends on the optimization algorithm.
 
 In the `spark.ml` package, several major API changes occurred, including:
 
@@ -286,16 +355,16 @@ Since the `spark.ml` API was an alpha component in Spark 1.3, we do not list all
 However, since 1.4 `spark.ml` is no longer an alpha component, we will provide details on any API
 changes for future releases.
 
-## From 1.2 to 1.3
+## Upgrading from MLlib 1.2 to 1.3
 
 In the `spark.mllib` package, there were several breaking changes.  The first change (in `ALS`) is the only one in a component not marked as Alpha or Experimental.
 
-* *(Breaking change)* In [`ALS`](api/scala/index.html#org.apache.spark.mllib.recommendation.ALS), the extraneous method `solveLeastSquares` has been removed.  The `DeveloperApi` method `analyzeBlocks` was also removed.
-* *(Breaking change)* [`StandardScalerModel`](api/scala/index.html#org.apache.spark.mllib.feature.StandardScalerModel) remains an Alpha component. In it, the `variance` method has been replaced with the `std` method.  To compute the column variance values returned by the original `variance` method, simply square the standard deviation values returned by `std`.
-* *(Breaking change)* [`StreamingLinearRegressionWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD) remains an Experimental component.  In it, there were two changes:
+* *(Breaking change)* In [`ALS`](api/scala/org/apache/spark/mllib/recommendation/ALS.html), the extraneous method `solveLeastSquares` has been removed.  The `DeveloperApi` method `analyzeBlocks` was also removed.
+* *(Breaking change)* [`StandardScalerModel`](api/scala/org/apache/spark/mllib/feature/StandardScalerModel.html) remains an Alpha component. In it, the `variance` method has been replaced with the `std` method.  To compute the column variance values returned by the original `variance` method, simply square the standard deviation values returned by `std`.
+* *(Breaking change)* [`StreamingLinearRegressionWithSGD`](api/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.html) remains an Experimental component.  In it, there were two changes:
     * The constructor taking arguments was removed in favor of a builder pattern using the default constructor plus parameter setter methods.
     * Variable `model` is no longer public.
-* *(Breaking change)* [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree) remains an Experimental component.  In it and its associated classes, there were several changes:
+* *(Breaking change)* [`DecisionTree`](api/scala/org/apache/spark/mllib/tree/DecisionTree.html) remains an Experimental component.  In it and its associated classes, there were several changes:
     * In `DecisionTree`, the deprecated class method `train` has been removed.  (The object/static `train` methods remain.)
     * In `Strategy`, the `checkpointDir` parameter has been removed.  Checkpointing is still supported, but the checkpoint directory must be set before calling tree and tree ensemble training.
 * `PythonMLlibAPI` (the interface between Scala/Java and Python for MLlib) was a public API but is now private, declared `private[python]`.  This was never meant for external use.
@@ -304,31 +373,31 @@ In the `spark.mllib` package, there were several breaking changes.  The first ch
 
 In the `spark.ml` package, the main API changes are from Spark SQL.  We list the most important changes here:
 
-* The old [SchemaRDD](https://spark.apache.org/docs/1.2.1/api/scala/index.html#org.apache.spark.sql.SchemaRDD) has been replaced with [DataFrame](api/scala/index.html#org.apache.spark.sql.DataFrame) with a somewhat modified API.  All algorithms in `spark.ml` which used to use SchemaRDD now use DataFrame.
+* The old [SchemaRDD](https://spark.apache.org/docs/1.2.1/api/scala/index.html#org.apache.spark.sql.SchemaRDD) has been replaced with [DataFrame](api/scala/org/apache/spark/sql/DataFrame.html) with a somewhat modified API.  All algorithms in `spark.ml` which used to use SchemaRDD now use DataFrame.
 * In Spark 1.2, we used implicit conversions from `RDD`s of `LabeledPoint` into `SchemaRDD`s by calling `import sqlContext._` where `sqlContext` was an instance of `SQLContext`.  These implicits have been moved, so we now call `import sqlContext.implicits._`.
 * Java APIs for SQL have also changed accordingly.  Please see the examples above and the [Spark SQL Programming Guide](sql-programming-guide.html) for details.
 
 Other changes were in `LogisticRegression`:
 
 * The `scoreCol` output column (with default value "score") was renamed to be `probabilityCol` (with default value "probability").  The type was originally `Double` (for the probability of class 1.0), but it is now `Vector` (for the probability of each class, to support multiclass classification in the future).
-* In Spark 1.2, `LogisticRegressionModel` did not include an intercept.  In Spark 1.3, it includes an intercept; however, it will always be 0.0 since it uses the default settings for [spark.mllib.LogisticRegressionWithLBFGS](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS).  The option to use an intercept will be added in the future.
+* In Spark 1.2, `LogisticRegressionModel` did not include an intercept.  In Spark 1.3, it includes an intercept; however, it will always be 0.0 since it uses the default settings for [spark.mllib.LogisticRegressionWithLBFGS](api/scala/org/apache/spark/mllib/classification/LogisticRegressionWithLBFGS.html).  The option to use an intercept will be added in the future.
 
-## From 1.1 to 1.2
+## Upgrading from MLlib 1.1 to 1.2
 
 The only API changes in MLlib v1.2 are in
-[`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree),
+[`DecisionTree`](api/scala/org/apache/spark/mllib/tree/DecisionTree.html),
 which continues to be an experimental API in MLlib 1.2:
 
 1. *(Breaking change)* The Scala API for classification takes a named argument specifying the number
 of classes.  In MLlib v1.1, this argument was called `numClasses` in Python and
 `numClassesForClassification` in Scala.  In MLlib v1.2, the names are both set to `numClasses`.
 This `numClasses` parameter is specified either via
-[`Strategy`](api/scala/index.html#org.apache.spark.mllib.tree.configuration.Strategy)
-or via [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree)
+[`Strategy`](api/scala/org/apache/spark/mllib/tree/configuration/Strategy.html)
+or via [`DecisionTree`](api/scala/org/apache/spark/mllib/tree/DecisionTree.html)
 static `trainClassifier` and `trainRegressor` methods.
 
 2. *(Breaking change)* The API for
-[`Node`](api/scala/index.html#org.apache.spark.mllib.tree.model.Node) has changed.
+[`Node`](api/scala/org/apache/spark/mllib/tree/model/Node.html) has changed.
 This should generally not affect user code, unless the user manually constructs decision trees
 (instead of using the `trainClassifier` or `trainRegressor` methods).
 The tree `Node` now includes more information, including the probability of the predicted label
@@ -339,10 +408,10 @@ The tree `Node` now includes more information, including the probability of the
 Examples in the Spark distribution and examples in the
 [Decision Trees Guide](mllib-decision-tree.html#examples) have been updated accordingly.
 
-## From 1.0 to 1.1
+## Upgrading from MLlib 1.0 to 1.1
 
 The only API changes in MLlib v1.1 are in
-[`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree),
+[`DecisionTree`](api/scala/org/apache/spark/mllib/tree/DecisionTree.html),
 which continues to be an experimental API in MLlib 1.1:
 
 1. *(Breaking change)* The meaning of tree depth has been changed by 1 in order to match
@@ -352,12 +421,12 @@ and in [rpart](http://cran.r-project.org/web/packages/rpart/index.html).
 In MLlib v1.0, a depth-1 tree had 1 leaf node, and a depth-2 tree had 1 root node and 2 leaf nodes.
 In MLlib v1.1, a depth-0 tree has 1 leaf node, and a depth-1 tree has 1 root node and 2 leaf nodes.
 This depth is specified by the `maxDepth` parameter in
-[`Strategy`](api/scala/index.html#org.apache.spark.mllib.tree.configuration.Strategy)
-or via [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree)
+[`Strategy`](api/scala/org/apache/spark/mllib/tree/configuration/Strategy.html)
+or via [`DecisionTree`](api/scala/org/apache/spark/mllib/tree/DecisionTree.html)
 static `trainClassifier` and `trainRegressor` methods.
 
 2. *(Non-breaking change)* We recommend using the newly added `trainClassifier` and `trainRegressor`
-methods to build a [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree),
+methods to build a [`DecisionTree`](api/scala/org/apache/spark/mllib/tree/DecisionTree.html),
 rather than using the old parameter class `Strategy`.  These new training methods explicitly
 separate classification and regression, and they replace specialized parameter types with
 simple `String` types.
@@ -365,7 +434,7 @@ simple `String` types.
 Examples of the new recommended `trainClassifier` and `trainRegressor` are given in the
 [Decision Trees Guide](mllib-decision-tree.html#examples).
 
-## From 0.9 to 1.0
+## Upgrading from MLlib 0.9 to 1.0
 
 In MLlib v1.0, we support both dense and sparse input in a unified way, which introduces a few
 breaking changes.  If your data is sparse, please store it in a sparse format instead of dense to
diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md
index 993a428ab5489..0b581e1a09c97 100644
--- a/docs/ml-pipeline.md
+++ b/docs/ml-pipeline.md
@@ -238,7 +238,7 @@ notes, then it should be treated as a bug to be fixed.
 
 This section gives code examples illustrating the functionality discussed above.
 For more info, please refer to the API documentation
-([Scala](api/scala/index.html#org.apache.spark.ml.package),
+([Scala](api/scala/org/apache/spark/ml/package.html),
 [Java](api/java/org/apache/spark/ml/package-summary.html),
 and [Python](api/python/pyspark.ml.html)).
 
@@ -250,9 +250,9 @@ This example covers the concepts of `Estimator`, `Transformer`, and `Param`.
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [`Estimator` Scala docs](api/scala/index.html#org.apache.spark.ml.Estimator),
-the [`Transformer` Scala docs](api/scala/index.html#org.apache.spark.ml.Transformer) and
-the [`Params` Scala docs](api/scala/index.html#org.apache.spark.ml.param.Params) for details on the API.
+Refer to the [`Estimator` Scala docs](api/scala/org/apache/spark/ml/Estimator.html),
+the [`Transformer` Scala docs](api/scala/org/apache/spark/ml/Transformer.html) and
+the [`Params` Scala docs](api/scala/org/apache/spark/ml/param/Params.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/EstimatorTransformerParamExample.scala %}
 </div>
@@ -285,7 +285,7 @@ This example follows the simple text document `Pipeline` illustrated in the figu
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [`Pipeline` Scala docs](api/scala/index.html#org.apache.spark.ml.Pipeline) for details on the API.
+Refer to the [`Pipeline` Scala docs](api/scala/org/apache/spark/ml/Pipeline.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/PipelineExample.scala %}
 </div>
diff --git a/docs/ml-statistics.md b/docs/ml-statistics.md
index c404b628117de..a3d57ff7d266b 100644
--- a/docs/ml-statistics.md
+++ b/docs/ml-statistics.md
@@ -50,7 +50,7 @@ correlation methods are currently Pearson's and Spearman's correlation.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[`Correlation`](api/scala/index.html#org.apache.spark.ml.stat.Correlation$)
+[`Correlation`](api/scala/org/apache/spark/ml/stat/Correlation$.html)
 computes the correlation matrix for the input Dataset of Vectors using the specified method.
 The output will be a DataFrame that contains the correlation matrix of the column of vectors.
 
@@ -87,7 +87,7 @@ the Chi-squared statistic is computed. All label and feature values must be cate
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-Refer to the [`ChiSquareTest` Scala docs](api/scala/index.html#org.apache.spark.ml.stat.ChiSquareTest$) for details on the API.
+Refer to the [`ChiSquareTest` Scala docs](api/scala/org/apache/spark/ml/stat/ChiSquareTest$.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/ChiSquareTestExample.scala %}
 </div>
@@ -109,11 +109,12 @@ Refer to the [`ChiSquareTest` Python docs](api/python/index.html#pyspark.ml.stat
 ## Summarizer
 
 We provide vector column summary statistics for `Dataframe` through `Summarizer`.
-Available metrics are the column-wise max, min, mean, variance, and number of nonzeros, as well as the total count.
+Available metrics are the column-wise max, min, mean, sum, variance, std, and number of nonzeros,
+as well as the total count.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-The following example demonstrates using [`Summarizer`](api/scala/index.html#org.apache.spark.ml.stat.Summarizer$)
+The following example demonstrates using [`Summarizer`](api/scala/org/apache/spark/ml/stat/Summarizer$.html)
 to compute the mean and variance for a vector column of the input dataframe, with and without a weight column.
 
 {% include_example scala/org/apache/spark/examples/ml/SummarizerExample.scala %}
@@ -132,4 +133,4 @@ Refer to the [`Summarizer` Python docs](api/python/index.html#pyspark.ml.stat.Su
 {% include_example python/ml/summarizer_example.py %}
 </div>
 
-</div>
\ No newline at end of file
+</div>
diff --git a/docs/ml-tuning.md b/docs/ml-tuning.md
index 0717cce538bf3..49e23684e5974 100644
--- a/docs/ml-tuning.md
+++ b/docs/ml-tuning.md
@@ -49,12 +49,12 @@ Built-in Cross-Validation and other tooling allow users to optimize hyperparamet
 An important task in ML is *model selection*, or using data to find the best model or parameters for a given task.  This is also called *tuning*.
 Tuning may be done for individual `Estimator`s such as `LogisticRegression`, or for entire `Pipeline`s which include multiple algorithms, featurization, and other steps.  Users can tune an entire `Pipeline` at once, rather than tuning each element in the `Pipeline` separately.
 
-MLlib supports model selection using tools such as [`CrossValidator`](api/scala/index.html#org.apache.spark.ml.tuning.CrossValidator) and [`TrainValidationSplit`](api/scala/index.html#org.apache.spark.ml.tuning.TrainValidationSplit).
+MLlib supports model selection using tools such as [`CrossValidator`](api/scala/org/apache/spark/ml/tuning/CrossValidator.html) and [`TrainValidationSplit`](api/scala/org/apache/spark/ml/tuning/TrainValidationSplit.html).
 These tools require the following items:
 
-* [`Estimator`](api/scala/index.html#org.apache.spark.ml.Estimator): algorithm or `Pipeline` to tune
+* [`Estimator`](api/scala/org/apache/spark/ml/Estimator.html): algorithm or `Pipeline` to tune
 * Set of `ParamMap`s: parameters to choose from, sometimes called a "parameter grid" to search over
-* [`Evaluator`](api/scala/index.html#org.apache.spark.ml.evaluation.Evaluator): metric to measure how well a fitted `Model` does on held-out test data
+* [`Evaluator`](api/scala/org/apache/spark/ml/evaluation/Evaluator.html): metric to measure how well a fitted `Model` does on held-out test data
 
 At a high level, these model selection tools work as follows:
 
@@ -63,13 +63,13 @@ At a high level, these model selection tools work as follows:
   * For each `ParamMap`, they fit the `Estimator` using those parameters, get the fitted `Model`, and evaluate the `Model`'s performance using the `Evaluator`.
 * They select the `Model` produced by the best-performing set of parameters.
 
-The `Evaluator` can be a [`RegressionEvaluator`](api/scala/index.html#org.apache.spark.ml.evaluation.RegressionEvaluator)
-for regression problems, a [`BinaryClassificationEvaluator`](api/scala/index.html#org.apache.spark.ml.evaluation.BinaryClassificationEvaluator)
-for binary data, or a [`MulticlassClassificationEvaluator`](api/scala/index.html#org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator)
+The `Evaluator` can be a [`RegressionEvaluator`](api/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.html)
+for regression problems, a [`BinaryClassificationEvaluator`](api/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.html)
+for binary data, or a [`MulticlassClassificationEvaluator`](api/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.html)
 for multiclass problems. The default metric used to choose the best `ParamMap` can be overridden by the `setMetricName`
 method in each of these evaluators.
 
-To help construct the parameter grid, users can use the [`ParamGridBuilder`](api/scala/index.html#org.apache.spark.ml.tuning.ParamGridBuilder) utility.
+To help construct the parameter grid, users can use the [`ParamGridBuilder`](api/scala/org/apache/spark/ml/tuning/ParamGridBuilder.html) utility.
 By default, sets of parameters from the parameter grid are evaluated in serial. Parameter evaluation can be done in parallel by setting `parallelism` with a value of 2 or more (a value of 1 will be serial) before running model selection with `CrossValidator` or `TrainValidationSplit`.
 The value of `parallelism` should be chosen carefully to maximize parallelism without exceeding cluster resources, and larger values may not always lead to improved performance.  Generally speaking, a value up to 10 should be sufficient for most clusters.
 
@@ -93,7 +93,7 @@ However, it is also a well-established method for choosing parameters which is m
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [`CrossValidator` Scala docs](api/scala/index.html#org.apache.spark.ml.tuning.CrossValidator) for details on the API.
+Refer to the [`CrossValidator` Scala docs](api/scala/org/apache/spark/ml/tuning/CrossValidator.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/ModelSelectionViaCrossValidationExample.scala %}
 </div>
@@ -133,7 +133,7 @@ Like `CrossValidator`, `TrainValidationSplit` finally fits the `Estimator` using
 
 <div data-lang="scala" markdown="1">
 
-Refer to the [`TrainValidationSplit` Scala docs](api/scala/index.html#org.apache.spark.ml.tuning.TrainValidationSplit) for details on the API.
+Refer to the [`TrainValidationSplit` Scala docs](api/scala/org/apache/spark/ml/tuning/TrainValidationSplit.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/ml/ModelSelectionViaTrainValidationSplitExample.scala %}
 </div>
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 12c33a5e38049..4cb2e259ccfbc 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -55,12 +55,12 @@ initialization via k-means\|\|.
 The following code snippets can be executed in `spark-shell`.
 
 In the following example after loading and parsing data, we use the
-[`KMeans`](api/scala/index.html#org.apache.spark.mllib.clustering.KMeans) object to cluster the data
+[`KMeans`](api/scala/org/apache/spark/mllib/clustering/KMeans.html) object to cluster the data
 into two clusters. The number of desired clusters is passed to the algorithm. We then compute Within
 Set Sum of Squared Error (WSSSE). You can reduce this error measure by increasing *k*. In fact, the
 optimal *k* is usually one where there is an "elbow" in the WSSSE graph.
 
-Refer to the [`KMeans` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.KMeans) and [`KMeansModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.KMeansModel) for details on the API.
+Refer to the [`KMeans` Scala docs](api/scala/org/apache/spark/mllib/clustering/KMeans.html) and [`KMeansModel` Scala docs](api/scala/org/apache/spark/mllib/clustering/KMeansModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/KMeansExample.scala %}
 </div>
@@ -111,11 +111,11 @@ has the following parameters:
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 In the following example after loading and parsing data, we use a
-[GaussianMixture](api/scala/index.html#org.apache.spark.mllib.clustering.GaussianMixture)
+[GaussianMixture](api/scala/org/apache/spark/mllib/clustering/GaussianMixture.html)
 object to cluster the data into two clusters. The number of desired clusters is passed
 to the algorithm. We then output the parameters of the mixture model.
 
-Refer to the [`GaussianMixture` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.GaussianMixture) and [`GaussianMixtureModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.GaussianMixtureModel) for details on the API.
+Refer to the [`GaussianMixture` Scala docs](api/scala/org/apache/spark/mllib/clustering/GaussianMixture.html) and [`GaussianMixtureModel` Scala docs](api/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala %}
 </div>
@@ -172,15 +172,15 @@ In the following, we show code snippets to demonstrate how to use PIC in `spark.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-[`PowerIterationClustering`](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClustering) 
+[`PowerIterationClustering`](api/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.html) 
 implements the PIC algorithm.
 It takes an `RDD` of `(srcId: Long, dstId: Long, similarity: Double)` tuples representing the
 affinity matrix.
 Calling `PowerIterationClustering.run` returns a
-[`PowerIterationClusteringModel`](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClusteringModel),
+[`PowerIterationClusteringModel`](api/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringModel.html),
 which contains the computed clustering assignments.
 
-Refer to the [`PowerIterationClustering` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClustering) and [`PowerIterationClusteringModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClusteringModel) for details on the API.
+Refer to the [`PowerIterationClustering` Scala docs](api/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.html) and [`PowerIterationClusteringModel` Scala docs](api/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala %}
 </div>
@@ -278,9 +278,9 @@ separately.
 **Expectation Maximization**
 
 Implemented in
-[`EMLDAOptimizer`](api/scala/index.html#org.apache.spark.mllib.clustering.EMLDAOptimizer)
+[`EMLDAOptimizer`](api/scala/org/apache/spark/mllib/clustering/EMLDAOptimizer.html)
 and
-[`DistributedLDAModel`](api/scala/index.html#org.apache.spark.mllib.clustering.DistributedLDAModel).
+[`DistributedLDAModel`](api/scala/org/apache/spark/mllib/clustering/DistributedLDAModel.html).
 
 For the parameters provided to `LDA`:
 
@@ -350,13 +350,13 @@ perplexity of the provided `documents` given the inferred topics.
 **Examples**
 
 In the following example, we load word count vectors representing a corpus of documents.
-We then use [LDA](api/scala/index.html#org.apache.spark.mllib.clustering.LDA)
+We then use [LDA](api/scala/org/apache/spark/mllib/clustering/LDA.html)
 to infer three topics from the documents. The number of desired clusters is passed
 to the algorithm. We then output the topics, represented as probability distributions over words.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-Refer to the [`LDA` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.LDA) and [`DistributedLDAModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.DistributedLDAModel) for details on the API.
+Refer to the [`LDA` Scala docs](api/scala/org/apache/spark/mllib/clustering/LDA.html) and [`DistributedLDAModel` Scala docs](api/scala/org/apache/spark/mllib/clustering/DistributedLDAModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala %}
 </div>
@@ -398,7 +398,7 @@ The implementation in MLlib has the following parameters:
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-Refer to the [`BisectingKMeans` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.BisectingKMeans) and [`BisectingKMeansModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.BisectingKMeansModel) for details on the API.
+Refer to the [`BisectingKMeans` Scala docs](api/scala/org/apache/spark/mllib/clustering/BisectingKMeans.html) and [`BisectingKMeansModel` Scala docs](api/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala %}
 </div>
@@ -451,7 +451,7 @@ This example shows how to estimate clusters on streaming data.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`StreamingKMeans` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.StreamingKMeans) for details on the API.
+Refer to the [`StreamingKMeans` Scala docs](api/scala/org/apache/spark/mllib/clustering/StreamingKMeans.html) for details on the API.
 And Refer to [Spark Streaming Programming Guide](streaming-programming-guide.html#initializing) for details on StreamingContext.
 
 {% include_example scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala %}
diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md
index 21546a63263f9..aaefa59c4a9c3 100644
--- a/docs/mllib-collaborative-filtering.md
+++ b/docs/mllib-collaborative-filtering.md
@@ -76,11 +76,11 @@ best parameter learned from a sampled subset to the full dataset and expect simi
 
 <div data-lang="scala" markdown="1">
 In the following example, we load rating data. Each row consists of a user, a product and a rating.
-We use the default [ALS.train()](api/scala/index.html#org.apache.spark.mllib.recommendation.ALS$) 
+We use the default [ALS.train()](api/scala/org/apache/spark/mllib/recommendation/ALS$.html)
 method which assumes ratings are explicit. We evaluate the
 recommendation model by measuring the Mean Squared Error of rating prediction.
 
-Refer to the [`ALS` Scala docs](api/scala/index.html#org.apache.spark.mllib.recommendation.ALS) for more details on the API.
+Refer to the [`ALS` Scala docs](api/scala/org/apache/spark/mllib/recommendation/ALS.html) for more details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/RecommendationExample.scala %}
 
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index cdac46284b6be..6d3b1a599d48b 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -42,13 +42,13 @@ of the vector.
 <div data-lang="scala" markdown="1">
 
 The base class of local vectors is
-[`Vector`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector), and we provide two
-implementations: [`DenseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseVector) and
-[`SparseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.SparseVector).  We recommend
+[`Vector`](api/scala/org/apache/spark/mllib/linalg/Vector.html), and we provide two
+implementations: [`DenseVector`](api/scala/org/apache/spark/mllib/linalg/DenseVector.html) and
+[`SparseVector`](api/scala/org/apache/spark/mllib/linalg/SparseVector.html).  We recommend
 using the factory methods implemented in
-[`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) to create local vectors.
+[`Vectors`](api/scala/org/apache/spark/mllib/linalg/Vectors$.html) to create local vectors.
 
-Refer to the [`Vector` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) and [`Vectors` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) for details on the API.
+Refer to the [`Vector` Scala docs](api/scala/org/apache/spark/mllib/linalg/Vector.html) and [`Vectors` Scala docs](api/scala/org/apache/spark/mllib/linalg/Vectors$.html) for details on the API.
 
 {% highlight scala %}
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
@@ -138,9 +138,9 @@ For multiclass classification, labels should be class indices starting from zero
 <div data-lang="scala" markdown="1">
 
 A labeled point is represented by the case class
-[`LabeledPoint`](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint).
+[`LabeledPoint`](api/scala/org/apache/spark/mllib/regression/LabeledPoint.html).
 
-Refer to the [`LabeledPoint` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint) for details on the API.
+Refer to the [`LabeledPoint` Scala docs](api/scala/org/apache/spark/mllib/regression/LabeledPoint.html) for details on the API.
 
 {% highlight scala %}
 import org.apache.spark.mllib.linalg.Vectors
@@ -211,10 +211,10 @@ After loading, the feature indices are converted to zero-based.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-[`MLUtils.loadLibSVMFile`](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) reads training
+[`MLUtils.loadLibSVMFile`](api/scala/org/apache/spark/mllib/util/MLUtils$.html) reads training
 examples stored in LIBSVM format.
 
-Refer to the [`MLUtils` Scala docs](api/scala/index.html#org.apache.spark.mllib.util.MLUtils$) for details on the API.
+Refer to the [`MLUtils` Scala docs](api/scala/org/apache/spark/mllib/util/MLUtils$.html) for details on the API.
 
 {% highlight scala %}
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -272,14 +272,14 @@ is stored in a one-dimensional array `[1.0, 3.0, 5.0, 2.0, 4.0, 6.0]` with the m
 <div data-lang="scala" markdown="1">
 
 The base class of local matrices is
-[`Matrix`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix), and we provide two
-implementations: [`DenseMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseMatrix),
-and [`SparseMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.SparseMatrix).
+[`Matrix`](api/scala/org/apache/spark/mllib/linalg/Matrix.html), and we provide two
+implementations: [`DenseMatrix`](api/scala/org/apache/spark/mllib/linalg/DenseMatrix.html),
+and [`SparseMatrix`](api/scala/org/apache/spark/mllib/linalg/SparseMatrix.html).
 We recommend using the factory methods implemented
-in [`Matrices`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices$) to create local
+in [`Matrices`](api/scala/org/apache/spark/mllib/linalg/Matrices$.html) to create local
 matrices. Remember, local matrices in MLlib are stored in column-major order.
 
-Refer to the [`Matrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix) and [`Matrices` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices$) for details on the API.
+Refer to the [`Matrix` Scala docs](api/scala/org/apache/spark/mllib/linalg/Matrix.html) and [`Matrices` Scala docs](api/scala/org/apache/spark/mllib/linalg/Matrices$.html) for details on the API.
 
 {% highlight scala %}
 import org.apache.spark.mllib.linalg.{Matrix, Matrices}
@@ -377,12 +377,12 @@ limited by the integer range but it should be much smaller in practice.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-A [`RowMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) can be
+A [`RowMatrix`](api/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) can be
 created from an `RDD[Vector]` instance.  Then we can compute its column summary statistics and decompositions.
 [QR decomposition](https://en.wikipedia.org/wiki/QR_decomposition) is of the form A = QR where Q is an orthogonal matrix and R is an upper triangular matrix.
 For [singular value decomposition (SVD)](https://en.wikipedia.org/wiki/Singular_value_decomposition) and [principal component analysis (PCA)](https://en.wikipedia.org/wiki/Principal_component_analysis), please refer to [Dimensionality reduction](mllib-dimensionality-reduction.html).
 
-Refer to the [`RowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) for details on the API.
+Refer to the [`RowMatrix` Scala docs](api/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) for details on the API.
 
 {% highlight scala %}
 import org.apache.spark.mllib.linalg.Vector
@@ -463,13 +463,13 @@ vector.
 <div data-lang="scala" markdown="1">
 
 An
-[`IndexedRowMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix)
+[`IndexedRowMatrix`](api/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.html)
 can be created from an `RDD[IndexedRow]` instance, where
-[`IndexedRow`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRow) is a
+[`IndexedRow`](api/scala/org/apache/spark/mllib/linalg/distributed/IndexedRow.html) is a
 wrapper over `(Long, Vector)`.  An `IndexedRowMatrix` can be converted to a `RowMatrix` by dropping
 its row indices.
 
-Refer to the [`IndexedRowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix) for details on the API.
+Refer to the [`IndexedRowMatrix` Scala docs](api/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.html) for details on the API.
 
 {% highlight scala %}
 import org.apache.spark.mllib.linalg.distributed.{IndexedRow, IndexedRowMatrix, RowMatrix}
@@ -568,14 +568,14 @@ dimensions of the matrix are huge and the matrix is very sparse.
 <div data-lang="scala" markdown="1">
 
 A
-[`CoordinateMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.CoordinateMatrix)
+[`CoordinateMatrix`](api/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.html)
 can be created from an `RDD[MatrixEntry]` instance, where
-[`MatrixEntry`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.MatrixEntry) is a
+[`MatrixEntry`](api/scala/org/apache/spark/mllib/linalg/distributed/MatrixEntry.html) is a
 wrapper over `(Long, Long, Double)`.  A `CoordinateMatrix` can be converted to an `IndexedRowMatrix`
 with sparse rows by calling `toIndexedRowMatrix`.  Other computations for 
 `CoordinateMatrix` are not currently supported.
 
-Refer to the [`CoordinateMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.CoordinateMatrix) for details on the API.
+Refer to the [`CoordinateMatrix` Scala docs](api/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.html) for details on the API.
 
 {% highlight scala %}
 import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}
@@ -678,12 +678,12 @@ the sub-matrix at the given index with size `rowsPerBlock` x `colsPerBlock`.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-A [`BlockMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) can be
+A [`BlockMatrix`](api/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.html) can be
 most easily created from an `IndexedRowMatrix` or `CoordinateMatrix` by calling `toBlockMatrix`.
 `toBlockMatrix` creates blocks of size 1024 x 1024 by default.
 Users may change the block size by supplying the values through `toBlockMatrix(rowsPerBlock, colsPerBlock)`.
 
-Refer to the [`BlockMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) for details on the API.
+Refer to the [`BlockMatrix` Scala docs](api/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.html) for details on the API.
 
 {% highlight scala %}
 import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry}
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index 045da744239b9..455649c8e686e 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -151,7 +151,7 @@ When tuning these parameters, be careful to validate on held-out test data to av
 
 * **`maxDepth`**: Maximum depth of a tree.  Deeper trees are more expressive (potentially allowing higher accuracy), but they are also more costly to train and are more likely to overfit.
 
-* **`minInstancesPerNode`**: For a node to be split further, each of its children must receive at least this number of training instances.  This is commonly used with [RandomForest](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest$) since those are often trained deeper than individual trees.
+* **`minInstancesPerNode`**: For a node to be split further, each of its children must receive at least this number of training instances.  This is commonly used with [RandomForest](api/scala/org/apache/spark/mllib/tree/RandomForest$.html) since those are often trained deeper than individual trees.
 
 * **`minInfoGain`**: For a node to be split further, the split must improve at least this much (in terms of information gain).
 
@@ -167,13 +167,13 @@ These parameters may be tuned.  Be careful to validate on held-out test data whe
   * The default value is conservatively chosen to be 256 MiB to allow the decision algorithm to work in most scenarios.  Increasing `maxMemoryInMB` can lead to faster training (if the memory is available) by allowing fewer passes over the data.  However, there may be decreasing returns as `maxMemoryInMB` grows since the amount of communication on each iteration can be proportional to `maxMemoryInMB`.
   * *Implementation details*: For faster processing, the decision tree algorithm collects statistics about groups of nodes to split (rather than 1 node at a time).  The number of nodes which can be handled in one group is determined by the memory requirements (which vary per features).  The `maxMemoryInMB` parameter specifies the memory limit in terms of megabytes which each worker can use for these statistics.
 
-* **`subsamplingRate`**: Fraction of the training data used for learning the decision tree.  This parameter is most relevant for training ensembles of trees (using [`RandomForest`](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest$) and [`GradientBoostedTrees`](api/scala/index.html#org.apache.spark.mllib.tree.GradientBoostedTrees)), where it can be useful to subsample the original data.  For training a single decision tree, this parameter is less useful since the number of training instances is generally not the main constraint.
+* **`subsamplingRate`**: Fraction of the training data used for learning the decision tree.  This parameter is most relevant for training ensembles of trees (using [`RandomForest`](api/scala/org/apache/spark/mllib/tree/RandomForest$.html) and [`GradientBoostedTrees`](api/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.html)), where it can be useful to subsample the original data.  For training a single decision tree, this parameter is less useful since the number of training instances is generally not the main constraint.
 
 * **`impurity`**: Impurity measure (discussed above) used to choose between candidate splits.  This measure must match the `algo` parameter.
 
 ### Caching and checkpointing
 
-MLlib 1.2 adds several features for scaling up to larger (deeper) trees and tree ensembles.  When `maxDepth` is set to be large, it can be useful to turn on node ID caching and checkpointing.  These parameters are also useful for [RandomForest](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest$) when `numTrees` is set to be large.
+MLlib 1.2 adds several features for scaling up to larger (deeper) trees and tree ensembles.  When `maxDepth` is set to be large, it can be useful to turn on node ID caching and checkpointing.  These parameters are also useful for [RandomForest](api/scala/org/apache/spark/mllib/tree/RandomForest$.html) when `numTrees` is set to be large.
 
 * **`useNodeIdCache`**: If this is set to true, the algorithm will avoid passing the current model (tree or trees) to executors on each iteration.
   * This can be useful with deep trees (speeding up computation on workers) and for large Random Forests (reducing communication on each iteration).
@@ -207,7 +207,7 @@ maximum tree depth of 5. The test error is calculated to measure the algorithm a
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`DecisionTree` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree) and [`DecisionTreeModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.model.DecisionTreeModel) for details on the API.
+Refer to the [`DecisionTree` Scala docs](api/scala/org/apache/spark/mllib/tree/DecisionTree.html) and [`DecisionTreeModel` Scala docs](api/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/DecisionTreeClassificationExample.scala %}
 </div>
@@ -238,7 +238,7 @@ depth of 5. The Mean Squared Error (MSE) is computed at the end to evaluate
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`DecisionTree` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree) and [`DecisionTreeModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.model.DecisionTreeModel) for details on the API.
+Refer to the [`DecisionTree` Scala docs](api/scala/org/apache/spark/mllib/tree/DecisionTree.html) and [`DecisionTreeModel` Scala docs](api/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/DecisionTreeRegressionExample.scala %}
 </div>
diff --git a/docs/mllib-dimensionality-reduction.md b/docs/mllib-dimensionality-reduction.md
index 5eb36b4228ca3..8818e403aabbc 100644
--- a/docs/mllib-dimensionality-reduction.md
+++ b/docs/mllib-dimensionality-reduction.md
@@ -77,7 +77,7 @@ passes, $O(n)$ storage on each executor, and $O(n k)$ storage on the driver.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-Refer to the [`SingularValueDecomposition` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.SingularValueDecomposition) for details on the API.
+Refer to the [`SingularValueDecomposition` Scala docs](api/scala/org/apache/spark/mllib/linalg/SingularValueDecomposition.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/SVDExample.scala %}
 
@@ -117,14 +117,14 @@ the rotation matrix are called principal components. PCA is used widely in dimen
 The following code demonstrates how to compute principal components on a `RowMatrix`
 and use them to project the vectors into a low-dimensional space.
 
-Refer to the [`RowMatrix` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.RowMatrix) for details on the API.
+Refer to the [`RowMatrix` Scala docs](api/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/PCAOnRowMatrixExample.scala %}
 
 The following code demonstrates how to compute principal components on source vectors
 and use them to project the vectors into a low-dimensional space while keeping associated labels:
 
-Refer to the [`PCA` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.PCA) for details on the API.
+Refer to the [`PCA` Scala docs](api/scala/org/apache/spark/mllib/feature/PCA.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/PCAOnSourceVectorExample.scala %}
 
diff --git a/docs/mllib-ensembles.md b/docs/mllib-ensembles.md
index 6149f458214e6..27a9fe67f3f6d 100644
--- a/docs/mllib-ensembles.md
+++ b/docs/mllib-ensembles.md
@@ -24,7 +24,7 @@ license: |
 
 An [ensemble method](http://en.wikipedia.org/wiki/Ensemble_learning)
 is a learning algorithm which creates a model composed of a set of other base models.
-`spark.mllib` supports two major ensemble algorithms: [`GradientBoostedTrees`](api/scala/index.html#org.apache.spark.mllib.tree.GradientBoostedTrees) and [`RandomForest`](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest$).
+`spark.mllib` supports two major ensemble algorithms: [`GradientBoostedTrees`](api/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.html) and [`RandomForest`](api/scala/org/apache/spark/mllib/tree/RandomForest$.html).
 Both use [decision trees](mllib-decision-tree.html) as their base models.
 
 ## Gradient-Boosted Trees vs. Random Forests
@@ -111,7 +111,7 @@ The test error is calculated to measure the algorithm accuracy.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`RandomForest` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest$) and [`RandomForestModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.model.RandomForestModel) for details on the API.
+Refer to the [`RandomForest` Scala docs](api/scala/org/apache/spark/mllib/tree/RandomForest$.html) and [`RandomForestModel` Scala docs](api/scala/org/apache/spark/mllib/tree/model/RandomForestModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/RandomForestClassificationExample.scala %}
 </div>
@@ -142,7 +142,7 @@ The Mean Squared Error (MSE) is computed at the end to evaluate
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`RandomForest` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.RandomForest$) and [`RandomForestModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.model.RandomForestModel) for details on the API.
+Refer to the [`RandomForest` Scala docs](api/scala/org/apache/spark/mllib/tree/RandomForest$.html) and [`RandomForestModel` Scala docs](api/scala/org/apache/spark/mllib/tree/model/RandomForestModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/RandomForestRegressionExample.scala %}
 </div>
@@ -252,7 +252,7 @@ The test error is calculated to measure the algorithm accuracy.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`GradientBoostedTrees` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.GradientBoostedTrees) and [`GradientBoostedTreesModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.model.GradientBoostedTreesModel) for details on the API.
+Refer to the [`GradientBoostedTrees` Scala docs](api/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.html) and [`GradientBoostedTreesModel` Scala docs](api/scala/org/apache/spark/mllib/tree/model/GradientBoostedTreesModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/GradientBoostingClassificationExample.scala %}
 </div>
@@ -283,7 +283,7 @@ The Mean Squared Error (MSE) is computed at the end to evaluate
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`GradientBoostedTrees` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.GradientBoostedTrees) and [`GradientBoostedTreesModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.tree.model.GradientBoostedTreesModel) for details on the API.
+Refer to the [`GradientBoostedTrees` Scala docs](api/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.html) and [`GradientBoostedTreesModel` Scala docs](api/scala/org/apache/spark/mllib/tree/model/GradientBoostedTreesModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/GradientBoostingRegressionExample.scala %}
 </div>
diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md
index f931fa32ea541..f9efa769fc140 100644
--- a/docs/mllib-evaluation-metrics.md
+++ b/docs/mllib-evaluation-metrics.md
@@ -117,7 +117,7 @@ The following code snippets illustrate how to load a sample dataset, train a bin
 data, and evaluate the performance of the algorithm by several binary evaluation metrics.
 
 <div data-lang="scala" markdown="1">
-Refer to the [`LogisticRegressionWithLBFGS` Scala docs](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS) and [`BinaryClassificationMetrics` Scala docs](api/scala/index.html#org.apache.spark.mllib.evaluation.BinaryClassificationMetrics) for details on the API.
+Refer to the [`LogisticRegressionWithLBFGS` Scala docs](api/scala/org/apache/spark/mllib/classification/LogisticRegressionWithLBFGS.html) and [`BinaryClassificationMetrics` Scala docs](api/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/BinaryClassificationMetricsExample.scala %}
 
@@ -243,7 +243,7 @@ The following code snippets illustrate how to load a sample dataset, train a mul
 the data, and evaluate the performance of the algorithm by several multiclass classification evaluation metrics.
 
 <div data-lang="scala" markdown="1">
-Refer to the [`MulticlassMetrics` Scala docs](api/scala/index.html#org.apache.spark.mllib.evaluation.MulticlassMetrics) for details on the API.
+Refer to the [`MulticlassMetrics` Scala docs](api/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/MulticlassMetricsExample.scala %}
 
@@ -393,7 +393,7 @@ True classes:
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`MultilabelMetrics` Scala docs](api/scala/index.html#org.apache.spark.mllib.evaluation.MultilabelMetrics) for details on the API.
+Refer to the [`MultilabelMetrics` Scala docs](api/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/MultiLabelMetricsExample.scala %}
 
@@ -521,7 +521,7 @@ expanded world of non-positive weights are "the same as never having interacted
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`RegressionMetrics` Scala docs](api/scala/index.html#org.apache.spark.mllib.evaluation.RegressionMetrics) and [`RankingMetrics` Scala docs](api/scala/index.html#org.apache.spark.mllib.evaluation.RankingMetrics) for details on the API.
+Refer to the [`RegressionMetrics` Scala docs](api/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.html) and [`RankingMetrics` Scala docs](api/scala/org/apache/spark/mllib/evaluation/RankingMetrics.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala %}
 
@@ -577,31 +577,3 @@ variable from a number of independent variables.
     </tr>
   </tbody>
 </table>
-
-**Examples**
-
-<div class="codetabs">
-The following code snippets illustrate how to load a sample dataset, train a linear regression algorithm on the data,
-and evaluate the performance of the algorithm by several regression metrics.
-
-<div data-lang="scala" markdown="1">
-Refer to the [`RegressionMetrics` Scala docs](api/scala/index.html#org.apache.spark.mllib.evaluation.RegressionMetrics) for details on the API.
-
-{% include_example scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala %}
-
-</div>
-
-<div data-lang="java" markdown="1">
-Refer to the [`RegressionMetrics` Java docs](api/java/org/apache/spark/mllib/evaluation/RegressionMetrics.html) for details on the API.
-
-{% include_example java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java %}
-
-</div>
-
-<div data-lang="python" markdown="1">
-Refer to the [`RegressionMetrics` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RegressionMetrics) for more details on the API.
-
-{% include_example python/mllib/regression_metrics_example.py %}
-
-</div>
-</div>
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
index b7f8ae9d07b0a..8df9699150f6a 100644
--- a/docs/mllib-feature-extraction.md
+++ b/docs/mllib-feature-extraction.md
@@ -69,12 +69,12 @@ We refer users to the [Stanford NLP Group](http://nlp.stanford.edu/) and
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-TF and IDF are implemented in [HashingTF](api/scala/index.html#org.apache.spark.mllib.feature.HashingTF)
-and [IDF](api/scala/index.html#org.apache.spark.mllib.feature.IDF).
+TF and IDF are implemented in [HashingTF](api/scala/org/apache/spark/mllib/feature/HashingTF.html)
+and [IDF](api/scala/org/apache/spark/mllib/feature/IDF.html).
 `HashingTF` takes an `RDD[Iterable[_]]` as the input.
 Each record could be an iterable of strings or other types.
 
-Refer to the [`HashingTF` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.HashingTF) for details on the API.
+Refer to the [`HashingTF` Scala docs](api/scala/org/apache/spark/mllib/feature/HashingTF.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/TFIDFExample.scala %}
 </div>
@@ -135,7 +135,7 @@ Here we assume the extracted file is `text8` and in same directory as you run th
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-Refer to the [`Word2Vec` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.Word2Vec) for details on the API.
+Refer to the [`Word2Vec` Scala docs](api/scala/org/apache/spark/mllib/feature/Word2Vec.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/Word2VecExample.scala %}
 </div>
@@ -159,19 +159,19 @@ against features with very large variances exerting an overly large influence du
 
 ### Model Fitting
 
-[`StandardScaler`](api/scala/index.html#org.apache.spark.mllib.feature.StandardScaler) has the
+[`StandardScaler`](api/scala/org/apache/spark/mllib/feature/StandardScaler.html) has the
 following parameters in the constructor:
 
 * `withMean` False by default. Centers the data with mean before scaling. It will build a dense
 output, so take care when applying to sparse input.
 * `withStd` True by default. Scales the data to unit standard deviation.
 
-We provide a [`fit`](api/scala/index.html#org.apache.spark.mllib.feature.StandardScaler) method in
+We provide a [`fit`](api/scala/org/apache/spark/mllib/feature/StandardScaler.html) method in
 `StandardScaler` which can take an input of `RDD[Vector]`, learn the summary statistics, and then
 return a model which can transform the input dataset into unit standard deviation and/or zero mean features
 depending how we configure the `StandardScaler`.
 
-This model implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer)
+This model implements [`VectorTransformer`](api/scala/org/apache/spark/mllib/feature/VectorTransformer.html)
 which can apply the standardization on a `Vector` to produce a transformed `Vector` or on
 an `RDD[Vector]` to produce a transformed `RDD[Vector]`.
 
@@ -185,7 +185,7 @@ so that the new features have unit standard deviation and/or zero mean.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-Refer to the [`StandardScaler` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.StandardScaler) for details on the API.
+Refer to the [`StandardScaler` Scala docs](api/scala/org/apache/spark/mllib/feature/StandardScaler.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/StandardScalerExample.scala %}
 </div>
@@ -203,12 +203,12 @@ Normalizer scales individual samples to have unit $L^p$ norm. This is a common o
 classification or clustering. For example, the dot product of two $L^2$ normalized TF-IDF vectors
 is the cosine similarity of the vectors.
 
-[`Normalizer`](api/scala/index.html#org.apache.spark.mllib.feature.Normalizer) has the following
+[`Normalizer`](api/scala/org/apache/spark/mllib/feature/Normalizer.html) has the following
 parameter in the constructor:
 
 * `p` Normalization in $L^p$ space, $p = 2$ by default.
 
-`Normalizer` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer)
+`Normalizer` implements [`VectorTransformer`](api/scala/org/apache/spark/mllib/feature/VectorTransformer.html)
 which can apply the normalization on a `Vector` to produce a transformed `Vector` or on
 an `RDD[Vector]` to produce a transformed `RDD[Vector]`.
 
@@ -221,7 +221,7 @@ with $L^2$ norm, and $L^\infty$ norm.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-Refer to the [`Normalizer` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.Normalizer) for details on the API.
+Refer to the [`Normalizer` Scala docs](api/scala/org/apache/spark/mllib/feature/Normalizer.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/NormalizerExample.scala %}
 </div>
@@ -239,7 +239,7 @@ Refer to the [`Normalizer` Python docs](api/python/pyspark.mllib.html#pyspark.ml
 features for use in model construction. It reduces the size of the feature space, which can improve
 both speed and statistical learning behavior.
 
-[`ChiSqSelector`](api/scala/index.html#org.apache.spark.mllib.feature.ChiSqSelector) implements
+[`ChiSqSelector`](api/scala/org/apache/spark/mllib/feature/ChiSqSelector.html) implements
 Chi-Squared feature selection. It operates on labeled data with categorical features. ChiSqSelector uses the
 [Chi-Squared test of independence](https://en.wikipedia.org/wiki/Chi-squared_test) to decide which
 features to choose. It supports five selection methods: `numTopFeatures`, `percentile`, `fpr`, `fdr`, `fwe`:
@@ -257,7 +257,7 @@ The number of features to select can be tuned using a held-out validation set.
 
 ### Model Fitting
 
-The [`fit`](api/scala/index.html#org.apache.spark.mllib.feature.ChiSqSelector) method takes
+The [`fit`](api/scala/org/apache/spark/mllib/feature/ChiSqSelector.html) method takes
 an input of `RDD[LabeledPoint]` with categorical features, learns the summary statistics, and then
 returns a `ChiSqSelectorModel` which can transform an input dataset into the reduced feature space.
 The `ChiSqSelectorModel` can be applied either to a `Vector` to produce a reduced `Vector`, or to
@@ -272,7 +272,7 @@ The following example shows the basic use of ChiSqSelector. The data set used ha
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [`ChiSqSelector` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.ChiSqSelector)
+Refer to the [`ChiSqSelector` Scala docs](api/scala/org/apache/spark/mllib/feature/ChiSqSelector.html)
 for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/ChiSqSelectorExample.scala %}
@@ -312,11 +312,11 @@ v_N
   \end{pmatrix}
 \]`
 
-[`ElementwiseProduct`](api/scala/index.html#org.apache.spark.mllib.feature.ElementwiseProduct) has the following parameter in the constructor:
+[`ElementwiseProduct`](api/scala/org/apache/spark/mllib/feature/ElementwiseProduct.html) has the following parameter in the constructor:
 
 * `scalingVec`: the transforming vector.
 
-`ElementwiseProduct` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer) which can apply the weighting on a `Vector` to produce a transformed `Vector` or on an `RDD[Vector]` to produce a transformed `RDD[Vector]`.
+`ElementwiseProduct` implements [`VectorTransformer`](api/scala/org/apache/spark/mllib/feature/VectorTransformer.html) which can apply the weighting on a `Vector` to produce a transformed `Vector` or on an `RDD[Vector]` to produce a transformed `RDD[Vector]`.
 
 ### Example
 
@@ -325,7 +325,7 @@ This example below demonstrates how to transform vectors using a transforming ve
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-Refer to the [`ElementwiseProduct` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.ElementwiseProduct) for details on the API.
+Refer to the [`ElementwiseProduct` Scala docs](api/scala/org/apache/spark/mllib/feature/ElementwiseProduct.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala %}
 </div>
@@ -348,17 +348,3 @@ Refer to the [`ElementwiseProduct` Python docs](api/python/pyspark.mllib.html#py
 
 A feature transformer that projects vectors to a low-dimensional space using PCA.
 Details you can read at [dimensionality reduction](mllib-dimensionality-reduction.html).
-
-### Example
-
-The following code demonstrates how to compute principal components on a `Vector`
-and use them to project the vectors into a low-dimensional space while keeping associated labels
-for calculation a [Linear Regression](mllib-linear-methods.html)
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-Refer to the [`PCA` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.PCA) for details on the API.
-
-{% include_example scala/org/apache/spark/examples/mllib/PCAExample.scala %}
-</div>
-</div>
diff --git a/docs/mllib-frequent-pattern-mining.md b/docs/mllib-frequent-pattern-mining.md
index 8bc93ac2e8adf..709acde062d7e 100644
--- a/docs/mllib-frequent-pattern-mining.md
+++ b/docs/mllib-frequent-pattern-mining.md
@@ -54,18 +54,18 @@ We refer users to the papers for more details.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-[`FPGrowth`](api/scala/index.html#org.apache.spark.mllib.fpm.FPGrowth) implements the
+[`FPGrowth`](api/scala/org/apache/spark/mllib/fpm/FPGrowth.html) implements the
 FP-growth algorithm.
 It takes an `RDD` of transactions, where each transaction is an `Array` of items of a generic type.
 Calling `FPGrowth.run` with transactions returns an
-[`FPGrowthModel`](api/scala/index.html#org.apache.spark.mllib.fpm.FPGrowthModel)
+[`FPGrowthModel`](api/scala/org/apache/spark/mllib/fpm/FPGrowthModel.html)
 that stores the frequent itemsets with their frequencies.  The following
 example illustrates how to mine frequent itemsets and association rules
 (see [Association
 Rules](mllib-frequent-pattern-mining.html#association-rules) for
 details) from `transactions`.
 
-Refer to the [`FPGrowth` Scala docs](api/scala/index.html#org.apache.spark.mllib.fpm.FPGrowth) for details on the API.
+Refer to the [`FPGrowth` Scala docs](api/scala/org/apache/spark/mllib/fpm/FPGrowth.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala %}
 
@@ -111,7 +111,7 @@ Refer to the [`FPGrowth` Python docs](api/python/pyspark.mllib.html#pyspark.mlli
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[AssociationRules](api/scala/index.html#org.apache.spark.mllib.fpm.AssociationRules)
+[AssociationRules](api/scala/org/apache/spark/mllib/fpm/AssociationRules.html)
 implements a parallel rule generation algorithm for constructing rules
 that have a single item as the consequent.
 
@@ -168,13 +168,13 @@ The following example illustrates PrefixSpan running on the sequences
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-[`PrefixSpan`](api/scala/index.html#org.apache.spark.mllib.fpm.PrefixSpan) implements the
+[`PrefixSpan`](api/scala/org/apache/spark/mllib/fpm/PrefixSpan.html) implements the
 PrefixSpan algorithm.
 Calling `PrefixSpan.run` returns a
-[`PrefixSpanModel`](api/scala/index.html#org.apache.spark.mllib.fpm.PrefixSpanModel)
+[`PrefixSpanModel`](api/scala/org/apache/spark/mllib/fpm/PrefixSpanModel.html)
 that stores the frequent sequences with their frequencies.
 
-Refer to the [`PrefixSpan` Scala docs](api/scala/index.html#org.apache.spark.mllib.fpm.PrefixSpan) and [`PrefixSpanModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.fpm.PrefixSpanModel) for details on the API.
+Refer to the [`PrefixSpan` Scala docs](api/scala/org/apache/spark/mllib/fpm/PrefixSpan.html) and [`PrefixSpanModel` Scala docs](api/scala/org/apache/spark/mllib/fpm/PrefixSpanModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala %}
 
diff --git a/docs/mllib-isotonic-regression.md b/docs/mllib-isotonic-regression.md
index d9cc775547bb1..94ffadaf65c55 100644
--- a/docs/mllib-isotonic-regression.md
+++ b/docs/mllib-isotonic-regression.md
@@ -74,7 +74,7 @@ i.e. 4710.28,500.00. The data are split to training and testing set.
 Model is created using the training set and a mean squared error is calculated from the predicted
 labels and real labels in the test set.
 
-Refer to the [`IsotonicRegression` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.IsotonicRegression) and [`IsotonicRegressionModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.IsotonicRegressionModel) for details on the API.
+Refer to the [`IsotonicRegression` Scala docs](api/scala/org/apache/spark/mllib/regression/IsotonicRegression.html) and [`IsotonicRegressionModel` Scala docs](api/scala/org/apache/spark/mllib/regression/IsotonicRegressionModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/IsotonicRegressionExample.scala %}
 </div>
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 2d3ec4ca24443..e7726271ccb72 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -184,7 +184,7 @@ training algorithm on this training data using a static method in the algorithm
 object, and make predictions with the resulting model to compute the training
 error.
 
-Refer to the [`SVMWithSGD` Scala docs](api/scala/index.html#org.apache.spark.mllib.classification.SVMWithSGD) and [`SVMModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.classification.SVMModel) for details on the API.
+Refer to the [`SVMWithSGD` Scala docs](api/scala/org/apache/spark/mllib/classification/SVMWithSGD.html) and [`SVMModel` Scala docs](api/scala/org/apache/spark/mllib/classification/SVMModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/SVMWithSGDExample.scala %}
 
@@ -305,11 +305,11 @@ We recommend L-BFGS over mini-batch gradient descent for faster convergence.
 <div data-lang="scala" markdown="1">
 The following code illustrates how to load a sample multiclass dataset, split it into train and
 test, and use
-[LogisticRegressionWithLBFGS](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS)
+[LogisticRegressionWithLBFGS](api/scala/org/apache/spark/mllib/classification/LogisticRegressionWithLBFGS.html)
 to fit a logistic regression model.
 Then the model is evaluated against the test dataset and saved to disk.
 
-Refer to the [`LogisticRegressionWithLBFGS` Scala docs](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS) and [`LogisticRegressionModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionModel) for details on the API.
+Refer to the [`LogisticRegressionWithLBFGS` Scala docs](api/scala/org/apache/spark/mllib/classification/LogisticRegressionWithLBFGS.html) and [`LogisticRegressionModel` Scala docs](api/scala/org/apache/spark/mllib/classification/LogisticRegressionModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/LogisticRegressionWithLBFGSExample.scala %}
 
@@ -360,57 +360,6 @@ regularization; and [*Lasso*](http://en.wikipedia.org/wiki/Lasso_(statistics)) u
 regularization.  For all of these models, the average loss or training error, $\frac{1}{n} \sum_{i=1}^n (\wv^T x_i - y_i)^2$, is
 known as the [mean squared error](http://en.wikipedia.org/wiki/Mean_squared_error).
 
-**Examples**
-
-<div class="codetabs">
-
-<div data-lang="scala" markdown="1">
-The following example demonstrates how to load training data, parse it as an RDD of LabeledPoint.
-The example then uses LinearRegressionWithSGD to build a simple linear model to predict label
-values. We compute the mean squared error at the end to evaluate
-[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
-
-Refer to the [`LinearRegressionWithSGD` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.LinearRegressionWithSGD) and [`LinearRegressionModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.LinearRegressionModel) for details on the API.
-
-{% include_example scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala %}
-
-[`RidgeRegressionWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.RidgeRegressionWithSGD)
-and [`LassoWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.LassoWithSGD) can be used in a similar fashion as `LinearRegressionWithSGD`.
-
-</div>
-
-<div data-lang="java" markdown="1">
-All of MLlib's methods use Java-friendly types, so you can import and call them there the same
-way you do in Scala. The only caveat is that the methods take Scala RDD objects, while the
-Spark Java API uses a separate `JavaRDD` class. You can convert a Java RDD to a Scala one by
-calling `.rdd()` on your `JavaRDD` object. The corresponding Java example to
-the Scala snippet provided, is presented below:
-
-Refer to the [`LinearRegressionWithSGD` Java docs](api/java/org/apache/spark/mllib/regression/LinearRegressionWithSGD.html) and [`LinearRegressionModel` Java docs](api/java/org/apache/spark/mllib/regression/LinearRegressionModel.html) for details on the API.
-
-{% include_example java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java %}
-</div>
-
-<div data-lang="python" markdown="1">
-The following example demonstrate how to load training data, parse it as an RDD of LabeledPoint.
-The example then uses LinearRegressionWithSGD to build a simple linear model to predict label
-values. We compute the mean squared error at the end to evaluate
-[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
-
-Note that the Python API does not yet support model save/load but will in the future.
-
-Refer to the [`LinearRegressionWithSGD` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.regression.LinearRegressionWithSGD) and [`LinearRegressionModel` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.regression.LinearRegressionModel) for more details on the API.
-
-{% include_example python/mllib/linear_regression_with_sgd_example.py %}
-</div>
-</div>
-
-In order to run the above application, follow the instructions
-provided in the [Self-Contained Applications](quick-start.html#self-contained-applications)
-section of the Spark
-quick-start guide. Be sure to also include *spark-mllib* to your build file as
-a dependency.
-
 ### Streaming linear regression
 
 When data arrive in a streaming fashion, it is useful to fit regression models online,
@@ -489,8 +438,8 @@ regularization parameter (`regParam`) along with various parameters associated w
 gradient descent (`stepSize`, `numIterations`, `miniBatchFraction`).  For each of them, we support
 all three possible regularizations (none, L1 or L2).
 
-For Logistic Regression, [L-BFGS](api/scala/index.html#org.apache.spark.mllib.optimization.LBFGS)
-version is implemented under [LogisticRegressionWithLBFGS](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS), and this
+For Logistic Regression, [L-BFGS](api/scala/org/apache/spark/mllib/optimization/LBFGS.html)
+version is implemented under [LogisticRegressionWithLBFGS](api/scala/org/apache/spark/mllib/classification/LogisticRegressionWithLBFGS.html), and this
 version supports both binary and multinomial Logistic Regression while SGD version only supports
 binary Logistic Regression. However, L-BFGS version doesn't support L1 regularization but SGD one
 supports L1 regularization. When L1 regularization is not required, L-BFGS version is strongly
@@ -499,10 +448,10 @@ inverse Hessian matrix using quasi-Newton method.
 
 Algorithms are all implemented in Scala:
 
-* [SVMWithSGD](api/scala/index.html#org.apache.spark.mllib.classification.SVMWithSGD)
-* [LogisticRegressionWithLBFGS](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS)
-* [LogisticRegressionWithSGD](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithSGD)
-* [LinearRegressionWithSGD](api/scala/index.html#org.apache.spark.mllib.regression.LinearRegressionWithSGD)
-* [RidgeRegressionWithSGD](api/scala/index.html#org.apache.spark.mllib.regression.RidgeRegressionWithSGD)
-* [LassoWithSGD](api/scala/index.html#org.apache.spark.mllib.regression.LassoWithSGD)
+* [SVMWithSGD](api/scala/org/apache/spark/mllib/classification/SVMWithSGD.html)
+* [LogisticRegressionWithLBFGS](api/scala/org/apache/spark/mllib/classification/LogisticRegressionWithLBFGS.html)
+* [LogisticRegressionWithSGD](api/scala/org/apache/spark/mllib/classification/LogisticRegressionWithSGD.html)
+* [LinearRegressionWithSGD](api/scala/org/apache/spark/mllib/regression/LinearRegressionWithSGD.html)
+* [RidgeRegressionWithSGD](api/scala/org/apache/spark/mllib/regression/RidgeRegressionWithSGD.html)
+* [LassoWithSGD](api/scala/org/apache/spark/mllib/regression/LassoWithSGD.html)
 
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index 09b15876a3914..a3602667b5835 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -46,14 +46,14 @@ sparsity. Since the training data is only used once, it is not necessary to cach
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-[NaiveBayes](api/scala/index.html#org.apache.spark.mllib.classification.NaiveBayes$) implements
+[NaiveBayes](api/scala/org/apache/spark/mllib/classification/NaiveBayes$.html) implements
 multinomial naive Bayes. It takes an RDD of
-[LabeledPoint](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint) and an optional
+[LabeledPoint](api/scala/org/apache/spark/mllib/regression/LabeledPoint.html) and an optional
 smoothing parameter `lambda` as input, an optional model type parameter (default is "multinomial"), and outputs a
-[NaiveBayesModel](api/scala/index.html#org.apache.spark.mllib.classification.NaiveBayesModel), which
+[NaiveBayesModel](api/scala/org/apache/spark/mllib/classification/NaiveBayesModel.html), which
 can be used for evaluation and prediction.
 
-Refer to the [`NaiveBayes` Scala docs](api/scala/index.html#org.apache.spark.mllib.classification.NaiveBayes) and [`NaiveBayesModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.classification.NaiveBayesModel) for details on the API.
+Refer to the [`NaiveBayes` Scala docs](api/scala/org/apache/spark/mllib/classification/NaiveBayes$.html) and [`NaiveBayesModel` Scala docs](api/scala/org/apache/spark/mllib/classification/NaiveBayesModel.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/NaiveBayesExample.scala %}
 </div>
diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md
index f2e128ec215a1..f0b0c817ed3f6 100644
--- a/docs/mllib-optimization.md
+++ b/docs/mllib-optimization.md
@@ -111,12 +111,12 @@ As an alternative to just use the subgradient `$R'(\wv)$` of the regularizer in
 direction, an improved update for some cases can be obtained by using the proximal operator
 instead.
 For the L1-regularizer, the proximal operator is given by soft thresholding, as implemented in
-[L1Updater](api/scala/index.html#org.apache.spark.mllib.optimization.L1Updater).
+[L1Updater](api/scala/org/apache/spark/mllib/optimization/L1Updater.html).
 
 
 ### Update schemes for distributed SGD
 The SGD implementation in
-[GradientDescent](api/scala/index.html#org.apache.spark.mllib.optimization.GradientDescent) uses
+[GradientDescent](api/scala/org/apache/spark/mllib/optimization/GradientDescent.html) uses
 a simple (distributed) sampling of the data examples.
 We recall that the loss part of the optimization problem `$\eqref{eq:regPrimal}$` is
 `$\frac1n \sum_{i=1}^n L(\wv;\x_i,y_i)$`, and therefore `$\frac1n \sum_{i=1}^n L'_{\wv,i}$` would
@@ -169,7 +169,7 @@ are developed, see the
 section for example.
 
 The SGD class
-[GradientDescent](api/scala/index.html#org.apache.spark.mllib.optimization.GradientDescent)
+[GradientDescent](api/scala/org/apache/spark/mllib/optimization/GradientDescent.html)
 sets the following parameters:
 
 * `Gradient` is a class that computes the stochastic gradient of the function
@@ -195,15 +195,15 @@ each iteration, to compute the gradient direction.
 L-BFGS is currently only a low-level optimization primitive in `MLlib`. If you want to use L-BFGS in various 
 ML algorithms such as Linear Regression, and Logistic Regression, you have to pass the gradient of objective
 function, and updater into optimizer yourself instead of using the training APIs like 
-[LogisticRegressionWithSGD](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithSGD).
+[LogisticRegressionWithSGD](api/scala/org/apache/spark/mllib/classification/LogisticRegressionWithSGD.html).
 See the example below. It will be addressed in the next release. 
 
 The L1 regularization by using 
-[L1Updater](api/scala/index.html#org.apache.spark.mllib.optimization.L1Updater) will not work since the 
+[L1Updater](api/scala/org/apache/spark/mllib/optimization/L1Updater.html) will not work since the 
 soft-thresholding logic in L1Updater is designed for gradient descent. See the developer's note.
 
 The L-BFGS method
-[LBFGS.runLBFGS](api/scala/index.html#org.apache.spark.mllib.optimization.LBFGS)
+[LBFGS.runLBFGS](api/scala/org/apache/spark/mllib/optimization/LBFGS.html)
 has the following parameters:
 
 * `Gradient` is a class that computes the gradient of the objective function
@@ -233,7 +233,7 @@ L-BFGS optimizer.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-Refer to the [`LBFGS` Scala docs](api/scala/index.html#org.apache.spark.mllib.optimization.LBFGS) and [`SquaredL2Updater` Scala docs](api/scala/index.html#org.apache.spark.mllib.optimization.SquaredL2Updater) for details on the API.
+Refer to the [`LBFGS` Scala docs](api/scala/org/apache/spark/mllib/optimization/LBFGS.html) and [`SquaredL2Updater` Scala docs](api/scala/org/apache/spark/mllib/optimization/SquaredL2Updater.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/LBFGSExample.scala %}
 </div>
diff --git a/docs/mllib-pmml-model-export.md b/docs/mllib-pmml-model-export.md
index fbc14cefd14ec..d7eb51d69a1da 100644
--- a/docs/mllib-pmml-model-export.md
+++ b/docs/mllib-pmml-model-export.md
@@ -62,7 +62,7 @@ To export a supported `model` (see table above) to PMML, simply call `model.toPM
 
 As well as exporting the PMML model to a String (`model.toPMML` as in the example above), you can export the PMML model to other formats.
 
-Refer to the [`KMeans` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.KMeans) and [`Vectors` Scala docs](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) for details on the API.
+Refer to the [`KMeans` Scala docs](api/scala/org/apache/spark/mllib/clustering/KMeans.html) and [`Vectors` Scala docs](api/scala/org/apache/spark/mllib/linalg/Vectors$.html) for details on the API.
 
 Here a complete example of building a KMeansModel and print it out in PMML format:
 {% include_example scala/org/apache/spark/examples/mllib/PMMLModelExportExample.scala %}
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index 4698d3e6347e7..7de214bb6b6a6 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -48,12 +48,12 @@ available in `Statistics`.
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-[`colStats()`](api/scala/index.html#org.apache.spark.mllib.stat.Statistics$) returns an instance of
-[`MultivariateStatisticalSummary`](api/scala/index.html#org.apache.spark.mllib.stat.MultivariateStatisticalSummary),
+[`colStats()`](api/scala/org/apache/spark/mllib/stat/Statistics$.html) returns an instance of
+[`MultivariateStatisticalSummary`](api/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.html),
 which contains the column-wise max, min, mean, variance, and number of nonzeros, as well as the
 total count.
 
-Refer to the [`MultivariateStatisticalSummary` Scala docs](api/scala/index.html#org.apache.spark.mllib.stat.MultivariateStatisticalSummary) for details on the API.
+Refer to the [`MultivariateStatisticalSummary` Scala docs](api/scala/org/apache/spark/mllib/stat/MultivariateStatisticalSummary.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/SummaryStatisticsExample.scala %}
 </div>
@@ -91,11 +91,11 @@ correlation methods are currently Pearson's and Spearman's correlation.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[`Statistics`](api/scala/index.html#org.apache.spark.mllib.stat.Statistics$) provides methods to
+[`Statistics`](api/scala/org/apache/spark/mllib/stat/Statistics$.html) provides methods to
 calculate correlations between series. Depending on the type of input, two `RDD[Double]`s or
 an `RDD[Vector]`, the output will be a `Double` or the correlation `Matrix` respectively.
 
-Refer to the [`Statistics` Scala docs](api/scala/index.html#org.apache.spark.mllib.stat.Statistics$) for details on the API.
+Refer to the [`Statistics` Scala docs](api/scala/org/apache/spark/mllib/stat/Statistics$.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/CorrelationsExample.scala %}
 </div>
@@ -137,7 +137,7 @@ python.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[`sampleByKeyExact()`](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions) allows users to
+[`sampleByKeyExact()`](api/scala/org/apache/spark/rdd/PairRDDFunctions.html) allows users to
 sample exactly $\lceil f_k \cdot n_k \rceil \, \forall k \in K$ items, where $f_k$ is the desired
 fraction for key $k$, $n_k$ is the number of key-value pairs for key $k$, and $K$ is the set of
 keys. Sampling without replacement requires one additional pass over the RDD to guarantee sample
@@ -181,7 +181,7 @@ independence tests.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[`Statistics`](api/scala/index.html#org.apache.spark.mllib.stat.Statistics$) provides methods to
+[`Statistics`](api/scala/org/apache/spark/mllib/stat/Statistics$.html) provides methods to
 run Pearson's chi-squared tests. The following example demonstrates how to run and interpret
 hypothesis tests.
 
@@ -221,11 +221,11 @@ message.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[`Statistics`](api/scala/index.html#org.apache.spark.mllib.stat.Statistics$) provides methods to
+[`Statistics`](api/scala/org/apache/spark/mllib/stat/Statistics$.html) provides methods to
 run a 1-sample, 2-sided Kolmogorov-Smirnov test. The following example demonstrates how to run
 and interpret the hypothesis tests.
 
-Refer to the [`Statistics` Scala docs](api/scala/index.html#org.apache.spark.mllib.stat.Statistics$) for details on the API.
+Refer to the [`Statistics` Scala docs](api/scala/org/apache/spark/mllib/stat/Statistics$.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/HypothesisTestingKolmogorovSmirnovTestExample.scala %}
 </div>
@@ -269,7 +269,7 @@ all prior batches.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[`StreamingTest`](api/scala/index.html#org.apache.spark.mllib.stat.test.StreamingTest)
+[`StreamingTest`](api/scala/org/apache/spark/mllib/stat/test/StreamingTest.html)
 provides streaming hypothesis testing.
 
 {% include_example scala/org/apache/spark/examples/mllib/StreamingTestExample.scala %}
@@ -292,12 +292,12 @@ uniform, standard normal, or Poisson.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
-[`RandomRDDs`](api/scala/index.html#org.apache.spark.mllib.random.RandomRDDs$) provides factory
+[`RandomRDDs`](api/scala/org/apache/spark/mllib/random/RandomRDDs$.html) provides factory
 methods to generate random double RDDs or vector RDDs.
 The following example generates a random double RDD, whose values follows the standard normal
 distribution `N(0, 1)`, and then map it to `N(1, 4)`.
 
-Refer to the [`RandomRDDs` Scala docs](api/scala/index.html#org.apache.spark.mllib.random.RandomRDDs$) for details on the API.
+Refer to the [`RandomRDDs` Scala docs](api/scala/org/apache/spark/mllib/random/RandomRDDs$.html) for details on the API.
 
 {% highlight scala %}
 import org.apache.spark.SparkContext
@@ -370,11 +370,11 @@ mean of PDFs of normal distributions centered around each of the samples.
 <div class="codetabs">
 
 <div data-lang="scala" markdown="1">
-[`KernelDensity`](api/scala/index.html#org.apache.spark.mllib.stat.KernelDensity) provides methods
+[`KernelDensity`](api/scala/org/apache/spark/mllib/stat/KernelDensity.html) provides methods
 to compute kernel density estimates from an RDD of samples. The following example demonstrates how
 to do so.
 
-Refer to the [`KernelDensity` Scala docs](api/scala/index.html#org.apache.spark.mllib.stat.KernelDensity) for details on the API.
+Refer to the [`KernelDensity` Scala docs](api/scala/org/apache/spark/mllib/stat/KernelDensity.html) for details on the API.
 
 {% include_example scala/org/apache/spark/examples/mllib/KernelDensityEstimationExample.scala %}
 </div>
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 8c81916d4f7d0..fc95f6a8d3191 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -300,7 +300,26 @@ Security options for the Spark History Server are covered more detail in the
         Even this is set to `true`, this configuration has no effect on a live application, it only affects the history server.
     </td>
   </tr>
-
+  <tr>
+    <td>spark.history.fs.eventLog.rolling.maxFilesToRetain</td>
+    <td>Int.MaxValue</td>
+    <td>
+      The maximum number of event log files which will be retained as non-compacted. By default,
+      all event log files will be retained.<br/>
+      Please note that compaction will happen in Spark History Server, which means this configuration
+      should be set to the configuration of Spark History server, and the same value will be applied
+      across applications which are being loaded in Spark History Server. This also means compaction
+      and cleanup would require running Spark History Server.<br/>
+      Please set the configuration in Spark History Server, and <code>spark.eventLog.rolling.maxFileSize</code>
+      in each application accordingly if you want to control the overall size of event log files.
+      The event log files older than these retained files will be compacted into single file and
+      deleted afterwards.<br/>
+      NOTE: Spark History Server may not compact the old event log files if it figures
+      out not a lot of space would be reduced during compaction. For streaming query
+      (including Structured Streaming) we normally expect compaction will run, but for
+      batch query compaction won't run in many cases.
+    </td>
+  </tr>
 </table>
 
 Note that in all of these UIs, the tables are sortable by clicking their headers,
@@ -640,7 +659,10 @@ A list of the available metrics, with a short description:
 
 ### Executor Metrics
 
-Executor-level metrics are sent from each executor to the driver as part of the Heartbeat to describe the performance metrics of Executor itself like JVM heap memory, GC infomation. Metrics `peakExecutorMetrics.*` are only enabled if `spark.eventLog.logStageExecutorMetrics.enabled` is true.
+Executor-level metrics are sent from each executor to the driver as part of the Heartbeat to describe the performance metrics of Executor itself like JVM heap memory, GC information.
+Executor metric values and their measured peak values per executor are exposed via the REST API at the end point `/applications/[app-id]/executors`.
+In addition, aggregated per-stage peak values of the executor metrics are written to the event log if `spark.eventLog.logStageExecutorMetrics.enabled` is true.
+Executor metrics are also exposed via the Spark metrics system based on the Dropwizard metrics library.
 A list of the available metrics, with a short description:
 
 <table class="table">
@@ -736,7 +758,7 @@ A list of the available metrics, with a short description:
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreeJVMVMemory</td>
-    <td>Virtual memory size in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+    <td>Virtual memory size in bytes. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreeJVMRSSMemory</td>
@@ -744,23 +766,23 @@ A list of the available metrics, with a short description:
       in real memory.  This is just the pages which count
       toward text, data, or stack space.  This does not
       include pages which have not been demand-loaded in,
-      or which are swapped out. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+      or which are swapped out. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreePythonVMemory</td>
-    <td>Virtual memory size for Python in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+    <td>Virtual memory size for Python in bytes. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreePythonRSSMemory</td>
-    <td>Resident Set Size for Python. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+    <td>Resident Set Size for Python. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreeOtherVMemory</td>
-    <td>Virtual memory size for other kind of process in bytes. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+    <td>Virtual memory size for other kind of process in bytes. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
   </tr>
   <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.ProcessTreeOtherRSSMemory</td>
-    <td>Resident Set Size for other kind of process. Enabled if spark.eventLog.logStageExecutorProcessTreeMetrics.enabled is true.</td>
+    <td>Resident Set Size for other kind of process. Enabled if spark.executor.processTreeMetrics.enabled is true.</td>
   </tr>
     <tr>
     <td>&nbsp;&nbsp;&nbsp;&nbsp;.MinorGCCount</td>
@@ -844,6 +866,7 @@ Each instance can report to zero or more _sinks_. Sinks are contained in the
 * `CSVSink`: Exports metrics data to CSV files at regular intervals.
 * `JmxSink`: Registers metrics for viewing in a JMX console.
 * `MetricsServlet`: Adds a servlet within the existing Spark UI to serve metrics data as JSON data.
+* `PrometheusServlet`: Adds a servlet within the existing Spark UI to serve metrics data in Prometheus format.
 * `GraphiteSink`: Sends metrics to a Graphite node.
 * `Slf4jSink`: Sends metrics to slf4j as log entries.
 * `StatsdSink`: Sends metrics to a StatsD node.
@@ -922,6 +945,8 @@ This is the component with the largest amount of instrumented metrics
   - memory.remainingOnHeapMem_MB
 
 - namespace=HiveExternalCatalog
+  - **note:**: these metrics are conditional to a configuration parameter:
+    `spark.metrics.staticSources.enabled` (default is true) 
   - fileCacheHits.count
   - filesDiscovered.count
   - hiveClientCalls.count
@@ -929,6 +954,8 @@ This is the component with the largest amount of instrumented metrics
   - partitionsFetched.count
 
 - namespace=CodeGenerator
+  - **note:**: these metrics are conditional to a configuration parameter:
+    `spark.metrics.staticSources.enabled` (default is true) 
   - compilationTime (histogram)
   - generatedClassSize (histogram)
   - generatedMethodSize (histogram)
@@ -957,8 +984,8 @@ This is the component with the largest amount of instrumented metrics
   - queue.executorManagement.listenerProcessingTime (timer)
 
 - namespace=appStatus (all metrics of type=counter)
-  - **note:** Introduced in Spark 3.0. Conditional to configuration parameter:  
-   `spark.app.status.metrics.enabled=true` (default is false)
+  - **note:** Introduced in Spark 3.0. Conditional to a configuration parameter:  
+   `spark.metrics.appStatusSource.enabled` (default is false)
   - stages.failedStages.count
   - stages.skippedStages.count
   - stages.completedStages.count
@@ -990,6 +1017,17 @@ This is the component with the largest amount of instrumented metrics
 - namespace=JVMCPU
   - jvmCpuTime
 
+- namespace=ExecutorMetrics
+  - **note:** these metrics are conditional to a configuration parameter:
+    `spark.metrics.executorMetricsSource.enabled` (default is true) 
+  - This source contains memory-related metrics. A full list of available metrics in this 
+    namespace can be found in the corresponding entry for the Executor component instance.
+ 
+- namespace=plugin.\<Plugin Class Name>
+  - Optional namespace(s). Metrics in this namespace are defined by user-supplied code, and
+  configured using the Spark plugin API. See "Advanced Instrumentation" below for how to load
+  custom plugins into Spark.
+
 ### Component instance = Executor
 These metrics are exposed by Spark executors. Note, currently they are not available
 when running in local mode.
@@ -1036,6 +1074,40 @@ when running in local mode.
   - threadpool.maxPool_size
   - threadpool.startedTasks
 
+- namespace=ExecutorMetrics
+  - **notes:** 
+    - These metrics are conditional to a configuration parameter:
+    `spark.metrics.executorMetricsSource.enabled` (default value is true) 
+    - ExecutorMetrics are updated as part of heartbeat processes scheduled
+   for the executors and for the driver at regular intervals: `spark.executor.heartbeatInterval` (default value is 10 seconds)
+    - An optional faster polling mechanism is available for executor memory metrics, 
+   it can be activated by setting a polling interval (in milliseconds) using the configuration parameter `spark.executor.metrics.pollingInterval`
+  - JVMHeapMemory
+  - JVMOffHeapMemory
+  - OnHeapExecutionMemory
+  - OnHeapStorageMemory
+  - OnHeapUnifiedMemory
+  - OffHeapExecutionMemory
+  - OffHeapStorageMemory
+  - OffHeapUnifiedMemory
+  - DirectPoolMemory
+  - MappedPoolMemory
+  - MinorGCCount
+  - MinorGCTime
+  - MajorGCCount
+  - MajorGCTime
+  - "ProcessTree*" metric counters:
+    - ProcessTreeJVMVMemory
+    - ProcessTreeJVMRSSMemory
+    - ProcessTreePythonVMemory
+    - ProcessTreePythonRSSMemory
+    - ProcessTreeOtherVMemory
+    - ProcessTreeOtherRSSMemory
+    - **note:** "ProcessTree*" metrics are collected only under certain conditions.
+      The conditions are the logical AND of the following: `/proc` filesystem exists,
+      `spark.executor.processTreeMetrics.enabled=true`.
+      "ProcessTree*" metrics report 0 when those conditions are not met.
+
 - namespace=JVMCPU
   - jvmCpuTime
 
@@ -1046,6 +1118,8 @@ when running in local mode.
   - shuffle-server.usedHeapMemory
 
 - namespace=HiveExternalCatalog
+  - **note:**: these metrics are conditional to a configuration parameter:
+    `spark.metrics.staticSources.enabled` (default is true) 
   - fileCacheHits.count
   - filesDiscovered.count
   - hiveClientCalls.count
@@ -1053,12 +1127,19 @@ when running in local mode.
   - partitionsFetched.count
 
 - namespace=CodeGenerator
+  - **note:**: these metrics are conditional to a configuration parameter:
+    `spark.metrics.staticSources.enabled` (default is true) 
   - compilationTime (histogram)
   - generatedClassSize (histogram)
   - generatedMethodSize (histogram)
   - hiveClientCalls.count
   - sourceCodeSize (histogram)
 
+- namespace=plugin.\<Plugin Class Name>
+  - Optional namespace(s). Metrics in this namespace are defined by user-supplied code, and
+  configured using the Spark plugin API. See "Advanced Instrumentation" below for how to load
+  custom plugins into Spark.
+
 ### Source = JVM Source 
 Notes: 
   - Activate this source by setting the relevant `metrics.properties` file entry or the 
@@ -1135,3 +1216,21 @@ can provide fine-grained profiling on individual nodes.
 * JVM utilities such as `jstack` for providing stack traces, `jmap` for creating heap-dumps,
 `jstat` for reporting time-series statistics and `jconsole` for visually exploring various JVM
 properties are useful for those comfortable with JVM internals.
+
+Spark also provides a plugin API so that custom instrumentation code can be added to Spark
+applications. There are two configuration keys available for loading plugins into Spark:
+
+- <code>spark.plugins</code>
+- <code>spark.plugins.defaultList</code>
+
+Both take a comma-separated list of class names that implement the
+<code>org.apache.spark.api.plugin.SparkPlugin</code> interface. The two names exist so that it's
+possible for one list to be placed in the Spark default config file, allowing users to
+easily add other plugins from the command line without overwriting the config file's list. Duplicate
+plugins are ignored.
+
+Distribution of the jar files containing the plugin code is currently not done by Spark. The user
+or admin should make sure that the jar files are available to Spark applications, for example, by
+including the plugin jar with the Spark distribution. The exception to this rule is the YARN
+backend, where the <code>--jars</code> command line option (or equivalent config entry) can be
+used to make the plugin code available to both executors and cluster-mode drivers.
diff --git a/docs/pyspark-migration-guide.md b/docs/pyspark-migration-guide.md
new file mode 100644
index 0000000000000..f7f20389aa694
--- /dev/null
+++ b/docs/pyspark-migration-guide.md
@@ -0,0 +1,125 @@
+---
+layout: global
+title: "Migration Guide: PySpark (Python on Spark)"
+displayTitle: "Migration Guide: PySpark (Python on Spark)"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+{:toc}
+
+Note that this migration guide describes the items specific to PySpark.
+Many items of SQL migration can be applied when migrating PySpark to higher versions.
+Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
+
+## Upgrading from PySpark 2.4 to 3.0
+
+  - Since Spark 3.0, PySpark requires a Pandas version of 0.23.2 or higher to use Pandas related functionality, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
+
+  - Since Spark 3.0, PySpark requires a PyArrow version of 0.12.1 or higher to use PyArrow related functionality, such as `pandas_udf`, `toPandas` and `createDataFrame` with "spark.sql.execution.arrow.enabled=true", etc.
+
+  - In PySpark, when creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 3.0, the builder comes to not update the configurations. This is the same behavior as Java/Scala API in 2.3 and above. If you want to update them, you need to update them prior to creating a `SparkSession`.
+
+  - In PySpark, when Arrow optimization is enabled, if Arrow version is higher than 0.11.0, Arrow can perform safe type conversion when converting Pandas.Series to Arrow array during serialization. Arrow will raise errors when detecting unsafe type conversion like overflow. Setting `spark.sql.execution.pandas.arrowSafeTypeConversion` to true can enable it. The default setting is false. PySpark's behavior for Arrow versions is illustrated in the table below:
+    <table class="table">
+        <tr>
+          <th>
+            <b>PyArrow version</b>
+          </th>
+          <th>
+            <b>Integer Overflow</b>
+          </th>
+          <th>
+            <b>Floating Point Truncation</b>
+          </th>
+        </tr>
+        <tr>
+          <td>
+            version < 0.11.0
+          </td>
+          <td>
+            Raise error
+          </td>
+          <td>
+            Silently allows
+          </td>
+        </tr>
+        <tr>
+          <td>
+            version > 0.11.0, arrowSafeTypeConversion=false
+          </td>
+          <td>
+            Silent overflow
+          </td>
+          <td>
+            Silently allows
+          </td>
+        </tr>
+        <tr>
+          <td>
+            version > 0.11.0, arrowSafeTypeConversion=true
+          </td>
+          <td>
+            Raise error
+          </td>
+          <td>
+            Raise error
+          </td>
+        </tr>
+    </table>
+
+  - Since Spark 3.0, `createDataFrame(..., verifySchema=True)` validates `LongType` as well in PySpark. Previously, `LongType` was not verified and resulted in `None` in case the value overflows. To restore this behavior, `verifySchema` can be set to `False` to disable the validation.
+
+  - Since Spark 3.0, `Column.getItem` is fixed such that it does not call `Column.apply`. Consequently, if `Column` is used as an argument to `getItem`, the indexing operator should be used.
+    For example, `map_col.getItem(col('id'))` should be replaced with `map_col[col('id')]`.
+
+  - As of Spark 3.0 `Row` field names are no longer sorted alphabetically when constructing with named arguments for Python versions 3.6 and above, and the order of fields will match that as entered. To enable sorted fields by default, as in Spark 2.4, set the environment variable `PYSPARK_ROW_FIELD_SORTING_ENABLED` to "true" for both executors and driver - this environment variable must be consistent on all executors and driver; otherwise, it may cause failures or incorrect answers. For Python versions less than 3.6, the field names will be sorted alphabetically as the only option.
+
+## Upgrading from PySpark 2.3 to 2.4
+
+  - In PySpark, when Arrow optimization is enabled, previously `toPandas` just failed when Arrow optimization is unable to be used whereas `createDataFrame` from Pandas DataFrame allowed the fallback to non-optimization. Now, both `toPandas` and `createDataFrame` from Pandas DataFrame allow the fallback by default, which can be switched off by `spark.sql.execution.arrow.fallback.enabled`.
+
+## Upgrading from PySpark 2.3.0 to 2.3.1 and above
+
+  - As of version 2.3.1 Arrow functionality, including `pandas_udf` and `toPandas()`/`createDataFrame()` with `spark.sql.execution.arrow.enabled` set to `True`, has been marked as experimental. These are still evolving and not currently recommended for use in production.
+
+## Upgrading from PySpark 2.2 to 2.3
+
+  - In PySpark, now we need Pandas 0.19.2 or upper if you want to use Pandas related functionalities, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
+
+  - In PySpark, the behavior of timestamp values for Pandas related functionalities was changed to respect session timezone. If you want to use the old behavior, you need to set a configuration `spark.sql.execution.pandas.respectSessionTimeZone` to `False`. See [SPARK-22395](https://issues.apache.org/jira/browse/SPARK-22395) for details.
+
+  - In PySpark, `na.fill()` or `fillna` also accepts boolean and replaces nulls with booleans. In prior Spark versions, PySpark just ignores it and returns the original Dataset/DataFrame.
+
+  - In PySpark, `df.replace` does not allow to omit `value` when `to_replace` is not a dictionary. Previously, `value` could be omitted in the other cases and had `None` by default, which is counterintuitive and error-prone.
+
+## Upgrading from PySpark 1.4 to 1.5
+
+ - Resolution of strings to columns in Python now supports using dots (`.`) to qualify the column or
+   access nested values. For example `df['table.column.nestedField']`. However, this means that if
+   your column name contains any dots you must now escape them using backticks (e.g., ``table.`column.with.dots`.nested``).
+
+ - DataFrame.withColumn method in PySpark supports adding a new column or replacing existing columns of the same name.
+
+
+## Upgrading from PySpark 1.0-1.2 to 1.3
+
+#### Python DataTypes No Longer Singletons
+{:.no_toc}
+
+When using DataTypes in Python you will need to construct them (i.e. `StringType()`) instead of
+referencing a singleton.
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 93abb25f20f47..86ba2c44b0e02 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -57,7 +57,7 @@ scala> val textFile = spark.read.textFile("README.md")
 textFile: org.apache.spark.sql.Dataset[String] = [value: string]
 {% endhighlight %}
 
-You can get values from Dataset directly, by calling some actions, or transform the Dataset to get a new one. For more details, please read the _[API doc](api/scala/index.html#org.apache.spark.sql.Dataset)_.
+You can get values from Dataset directly, by calling some actions, or transform the Dataset to get a new one. For more details, please read the _[API doc](api/scala/org/apache/spark/sql/Dataset.html)_.
 
 {% highlight scala %}
 scala> textFile.count() // Number of items in this Dataset
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 5e55c93c4148e..ba99007aaf639 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -149,8 +149,8 @@ $ PYSPARK_PYTHON=/opt/pypy-2.5/bin/pypy bin/spark-submit examples/src/main/pytho
 
 <div data-lang="scala"  markdown="1">
 
-The first thing a Spark program must do is to create a [SparkContext](api/scala/index.html#org.apache.spark.SparkContext) object, which tells Spark
-how to access a cluster. To create a `SparkContext` you first need to build a [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) object
+The first thing a Spark program must do is to create a [SparkContext](api/scala/org/apache/spark/SparkContext.html) object, which tells Spark
+how to access a cluster. To create a `SparkContext` you first need to build a [SparkConf](api/scala/org/apache/spark/SparkConf.html) object
 that contains information about your application.
 
 Only one SparkContext should be active per JVM. You must `stop()` the active SparkContext before creating a new one.
@@ -500,7 +500,7 @@ then this approach should work well for such cases.
 
 If you have custom serialized binary data (such as loading data from Cassandra / HBase), then you will first need to
 transform that data on the Scala/Java side to something which can be handled by Pyrolite's pickler.
-A [Converter](api/scala/index.html#org.apache.spark.api.python.Converter) trait is provided
+A [Converter](api/scala/org/apache/spark/api/python/Converter.html) trait is provided
 for this. Simply extend this trait and implement your transformation code in the ```convert```
 method. Remember to ensure that this class, along with any dependencies required to access your ```InputFormat```, are packaged into your Spark job jar and included on the PySpark
 classpath.
@@ -856,7 +856,7 @@ by a key.
 In Scala, these operations are automatically available on RDDs containing
 [Tuple2](http://www.scala-lang.org/api/{{site.SCALA_VERSION}}/index.html#scala.Tuple2) objects
 (the built-in tuples in the language, created by simply writing `(a, b)`). The key-value pair operations are available in the
-[PairRDDFunctions](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions) class,
+[PairRDDFunctions](api/scala/org/apache/spark/rdd/PairRDDFunctions.html) class,
 which automatically wraps around an RDD of tuples.
 
 For example, the following code uses the `reduceByKey` operation on key-value pairs to count how
@@ -946,12 +946,12 @@ We could also use `counts.sortByKey()`, for example, to sort the pairs alphabeti
 
 The following table lists some of the common transformations supported by Spark. Refer to the
 RDD API doc
-([Scala](api/scala/index.html#org.apache.spark.rdd.RDD),
+([Scala](api/scala/org/apache/spark/rdd/RDD.html),
  [Java](api/java/index.html?org/apache/spark/api/java/JavaRDD.html),
  [Python](api/python/pyspark.html#pyspark.RDD),
  [R](api/R/index.html))
 and pair RDD functions doc
-([Scala](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions),
+([Scala](api/scala/org/apache/spark/rdd/PairRDDFunctions.html),
  [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html))
 for details.
 
@@ -1060,13 +1060,13 @@ for details.
 
 The following table lists some of the common actions supported by Spark. Refer to the
 RDD API doc
-([Scala](api/scala/index.html#org.apache.spark.rdd.RDD),
+([Scala](api/scala/org/apache/spark/rdd/RDD.html),
  [Java](api/java/index.html?org/apache/spark/api/java/JavaRDD.html),
  [Python](api/python/pyspark.html#pyspark.RDD),
  [R](api/R/index.html))
 
 and pair RDD functions doc
-([Scala](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions),
+([Scala](api/scala/org/apache/spark/rdd/PairRDDFunctions.html),
  [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html))
 for details.
 
@@ -1208,7 +1208,7 @@ In addition, each persisted RDD can be stored using a different *storage level*,
 to persist the dataset on disk, persist it in memory but as serialized Java objects (to save space),
 replicate it across nodes.
 These levels are set by passing a
-`StorageLevel` object ([Scala](api/scala/index.html#org.apache.spark.storage.StorageLevel),
+`StorageLevel` object ([Scala](api/scala/org/apache/spark/storage/StorageLevel.html),
 [Java](api/java/index.html?org/apache/spark/storage/StorageLevel.html),
 [Python](api/python/pyspark.html#pyspark.StorageLevel))
 to `persist()`. The `cache()` method is a shorthand for using the default storage level,
@@ -1404,11 +1404,11 @@ res2: Long = 10
 {% endhighlight %}
 
 While this code used the built-in support for accumulators of type Long, programmers can also
-create their own types by subclassing [AccumulatorV2](api/scala/index.html#org.apache.spark.util.AccumulatorV2).
+create their own types by subclassing [AccumulatorV2](api/scala/org/apache/spark/util/AccumulatorV2.html).
 The AccumulatorV2 abstract class has several methods which one has to override: `reset` for resetting
 the accumulator to zero, `add` for adding another value into the accumulator,
 `merge` for merging another same-type accumulator into this one. Other methods that must be overridden
-are contained in the [API documentation](api/scala/index.html#org.apache.spark.util.AccumulatorV2). For example, supposing we had a `MyVector` class
+are contained in the [API documentation](api/scala/org/apache/spark/util/AccumulatorV2.html). For example, supposing we had a `MyVector` class
 representing mathematical vectors, we could write:
 
 {% highlight scala %}
@@ -1457,11 +1457,11 @@ accum.value();
 {% endhighlight %}
 
 While this code used the built-in support for accumulators of type Long, programmers can also
-create their own types by subclassing [AccumulatorV2](api/scala/index.html#org.apache.spark.util.AccumulatorV2).
+create their own types by subclassing [AccumulatorV2](api/scala/org/apache/spark/util/AccumulatorV2.html).
 The AccumulatorV2 abstract class has several methods which one has to override: `reset` for resetting
 the accumulator to zero, `add` for adding another value into the accumulator,
 `merge` for merging another same-type accumulator into this one. Other methods that must be overridden
-are contained in the [API documentation](api/scala/index.html#org.apache.spark.util.AccumulatorV2). For example, supposing we had a `MyVector` class
+are contained in the [API documentation](api/scala/org/apache/spark/util/AccumulatorV2.html). For example, supposing we had a `MyVector` class
 representing mathematical vectors, we could write:
 
 {% highlight java %}
@@ -1620,4 +1620,4 @@ For help on deploying, the [cluster mode overview](cluster-overview.html) descri
 in distributed operation and supported cluster managers.
 
 Finally, full API documentation is available in
-[Scala](api/scala/#org.apache.spark.package), [Java](api/java/), [Python](api/python/) and [R](api/R/).
+[Scala](api/scala/org/apache/spark/), [Java](api/java/), [Python](api/python/) and [R](api/R/).
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 2d4e5cd65f497..61d6154ccb084 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -132,7 +132,7 @@ $ ./bin/spark-submit \
 ```
 
 The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting
-`spark.master` in the application's configuration, must be a URL with the format `k8s://<api_server_url>`. Prefixing the
+`spark.master` in the application's configuration, must be a URL with the format `k8s://<api_server_host>:<k8s-apiserver-port>`. The port must always be specified, even if it's the HTTPS port 443. Prefixing the
 master string with `k8s://` will cause the Spark application to launch on the Kubernetes cluster, with the API server
 being contacted at `api_server_url`. If no HTTP protocol is specified in the URL, it defaults to `https`. For example,
 setting the master to `k8s://example.com:443` is equivalent to setting it to `k8s://https://example.com:443`, but to
@@ -324,7 +324,7 @@ If no volume is set as local storage, Spark uses temporary scratch space to spil
 
 `emptyDir` volumes use the nodes backing storage for ephemeral storage by default, this behaviour may not be appropriate for some compute environments.  For example if you have diskless nodes with remote storage mounted over a network, having lots of executors doing IO to this remote storage may actually degrade performance.
 
-In this case it may be desirable to set `spark.kubernetes.local.dirs.tmpfs=true` in your configuration which will cause the `emptyDir` volumes to be configured as `tmpfs` i.e. RAM backed volumes.  When configured like this Sparks local storage usage will count towards your pods memory usage therefore you may wish to increase your memory requests by increasing the value of `spark.kubernetes.memoryOverheadFactor` as appropriate.
+In this case it may be desirable to set `spark.kubernetes.local.dirs.tmpfs=true` in your configuration which will cause the `emptyDir` volumes to be configured as `tmpfs` i.e. RAM backed volumes.  When configured like this Spark's local storage usage will count towards your pods memory usage therefore you may wish to increase your memory requests by increasing the value of `spark.kubernetes.memoryOverheadFactor` as appropriate.
 
 
 ## Introspection and Debugging
@@ -507,6 +507,13 @@ See the [configuration page](configuration.html) for information on Spark config
     configuration properties e.g. <code>spark.kubernetes.namespace</code>.
   </td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.driver.master</code></td>
+  <td><code>https://kubernetes.default.svc</code></td>
+  <td>
+    The internal Kubernetes master (API server) address to be used for driver to request executors.
+  </td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.namespace</code></td>
   <td><code>default</code></td>
@@ -1266,3 +1273,14 @@ The following affect the driver and executor containers. All other containers in
   </td>
 </tr>
 </table>
+
+### Resource Allocation and Configuration Overview
+
+Please make sure to have read the Custom Resource Scheduling and Configuration Overview section on the [configuration page](configuration.html). This section only talks about the Kubernetes specific aspects of resource scheduling.
+
+The user is responsible to properly configuring the Kubernetes cluster to have the resources available and ideally isolate each resource per container so that a resource is not shared between multiple containers. If the resource is not isolated the user is responsible for writing a discovery script so that the resource is not shared between containers. See the Kubernetes documentation for specifics on configuring Kubernetes with [custom resources](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/).
+
+Spark automatically handles translating the Spark configs <code>spark.{driver/executor}.resource.{resourceType}</code> into the kubernetes configs as long as the Kubernetes resource type follows the Kubernetes device plugin format of `vendor-domain/resourcetype`. The user must specify the vendor using the <code>spark.{driver/executor}.resource.{resourceType}.vendor</code> config. The user does not need to explicitly add anything if you are using Pod templates. For reference and an example, you can see the Kubernetes documentation for scheduling [GPUs](https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/). Spark only supports setting the resource limits.
+
+Kubernetes does not tell Spark the addresses of the resources allocated to each container. For that reason, the user must specify a discovery script that gets run by the executor on startup to discover what resources are available to that executor. You can find an example scripts in `examples/src/main/scripts/getGpusResources.sh`. The script must have execute permissions set and the user should setup permissions to not allow malicious users to modify it. The script should write to STDOUT a JSON string in the format of the ResourceInformation class. This has the resource name and an array of resource addresses available to just that executor.
+
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index d3d049e6fef70..ab4e96329a69b 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -419,6 +419,15 @@ To use a custom metrics.properties for the application master and executors, upd
   in YARN ApplicationReports, which can be used for filtering when querying YARN apps.
   </td>
 </tr>
+<tr>
+  <td><code>spark.yarn.priority</code></td>
+  <td>(none)</td>
+  <td>
+  Application priority for YARN to define pending applications ordering policy, those with higher
+  integer value have a better opportunity to be activated. Currently, YARN only supports application
+  priority when using FIFO ordering policy.
+  </td>
+</tr>
 <tr>
   <td><code>spark.yarn.config.gatewayPath</code></td>
   <td>(none)</td>
@@ -542,6 +551,20 @@ For example, suppose you would like to point log url link to Job History Server
 
  NOTE: you need to replace `<JHS_POST>` and `<JHS_PORT>` with actual value.
 
+# Resource Allocation and Configuration Overview
+
+Please make sure to have read the Custom Resource Scheduling and Configuration Overview section on the [configuration page](configuration.html). This section only talks about the YARN specific aspects of resource scheduling.
+
+YARN needs to be configured to support any resources the user wants to use with Spark. Resource scheduling on YARN was added in YARN 3.1.0. See the YARN documentation for more information on configuring resources and properly setting up isolation. Ideally the resources are setup isolated so that an executor can only see the resources it was allocated. If you do not have isolation enabled, the user is responsible for creating a discovery script that ensures the resource is not shared between executors.
+
+YARN currently supports any user defined resource type but has built in types for GPU (<code>yarn.io/gpu</code>) and FPGA (<code>yarn.io/fpga</code>). For that reason, if you are using either of those resources, Spark can translate your request for spark resources into YARN resources and you only have to specify the <code>spark.{driver/executor}.resource.</code> configs. If you are using a resource other then FPGA or GPU, the user is responsible for specifying the configs for both YARN (<code>spark.yarn.{driver/executor}.resource.</code>) and Spark (<code>spark.{driver/executor}.resource.</code>).
+
+For example, the user wants to request 2 GPUs for each executor. The user can just specify <code>spark.executor.resource.gpu.amount=2</code> and Spark will handle requesting <code>yarn.io/gpu</code> resource type from YARN.
+
+If the user has a user defined YARN resource, lets call it `acceleratorX` then the user must specify <code>spark.yarn.executor.resource.acceleratorX.amount=2</code> and <code>spark.executor.resource.acceleratorX.amount=2</code>.
+
+YARN does not tell Spark the addresses of the resources allocated to each container. For that reason, the user must specify a discovery script that gets run by the executor on startup to discover what resources are available to that executor. You can find an example scripts in `examples/src/main/scripts/getGpusResources.sh`. The script must have execute permissions set and the user should setup permissions to not allow malicious users to modify it. The script should write to STDOUT a JSON string in the format of the ResourceInformation class. This has the resource name and an array of resource addresses available to just that executor.
+
 # Important notes
 
 - Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index bc77469b6664f..1264951a2f270 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -340,6 +340,18 @@ SPARK_WORKER_OPTS supports the following system properties:
 </tr>
 </table>
 
+# Resource Allocation and Configuration Overview
+
+Please make sure to have read the Custom Resource Scheduling and Configuration Overview section on the [configuration page](configuration.html). This section only talks about the Spark Standalone specific aspects of resource scheduling.
+
+Spark Standalone has 2 parts, the first is configuring the resources for the Worker, the second is the resource allocation for a specific application.
+
+The user must configure the Workers to have a set of resources available so that it can assign them out to Executors. The <code>spark.worker.resource.{resourceName}.amount</code> is used to control the amount of each resource the worker has allocated. The user must also specify either <code>spark.worker.resourcesFile</code> or <code>spark.worker.resource.{resourceName}.discoveryScript</code> to specify how the Worker discovers the resources its assigned. See the descriptions above for each of those to see which method works best for your setup. Please take note of <code>spark.resources.coordinate.enable</code> as it indicates whether Spark should handle coordinating resources or if the user has made sure each Worker has separate resources. Also note that if using the resources coordination <code>spark.resources.dir</code> can be used to specify the directory used to do that coordination.
+
+The second part is running an application on Spark Standalone. The only special case from the standard Spark resource configs is when you are running the Driver in client mode. For a Driver in client mode, the user can specify the resources it uses via <code>spark.driver.resourcesfile</code> or <code>spark.driver.resources.{resourceName}.discoveryScript</code>. If the Driver is running on the same host as other Drivers or Workers there are 2 ways to make sure the they don't use the same resources. The user can either configure <code>spark.resources.coordinate.enable</code> on and give all the Driver/Workers the same set or resources and Spark will handle make sure each Driver/Worker has separate resources, or the user can make sure the resources file or discovery script only returns resources the do not conflict with other Drivers or Workers running on the same node.
+
+Note, the user does not need to specify a discovery script when submitting an application as the Worker will start each Executor with the resources it allocates to it.
+
 # Connecting an Application to the Cluster
 
 To run an application on the Spark cluster, simply pass the `spark://IP:PORT` URL of the master as to the [`SparkContext`
@@ -420,7 +432,7 @@ In addition, detailed log output for each job is also written to the work direct
 
 # Running Alongside Hadoop
 
-You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in the same local area network (e.g. you place a few Spark machines on each rack that you have Hadoop on).
+You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use an hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in the same local area network (e.g. you place a few Spark machines on each rack that you have Hadoop on).
 
 
 # Configuring Ports for Network Security
diff --git a/docs/sparkr-migration-guide.md b/docs/sparkr-migration-guide.md
new file mode 100644
index 0000000000000..6fbc4c03aefc1
--- /dev/null
+++ b/docs/sparkr-migration-guide.md
@@ -0,0 +1,77 @@
+---
+layout: global
+title: "Migration Guide: SparkR (R on Spark)"
+displayTitle: "Migration Guide: SparkR (R on Spark)"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+{:toc}
+
+Note that this migration guide describes the items specific to SparkR.
+Many items of SQL migration can be applied when migrating SparkR to higher versions.
+Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
+
+## Upgrading from SparkR 2.4 to 3.0
+
+ - The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead.
+ - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead.
+
+## Upgrading from SparkR 2.3 to 2.4
+
+ - Previously, we don't check the validity of the size of the last layer in `spark.mlp`. For example, if the training data only has two labels, a `layers` param like `c(1, 3)` doesn't cause an error previously, now it does.
+
+## Upgrading from SparkR 2.3 to 2.3.1 and above
+
+ - In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was wrongly subtracted by one and considered as 0-based. This can lead to inconsistent substring results and also does not match with the behaviour with `substr` in R. In version 2.3.1 and later, it has been fixed so the `start` parameter of `substr` method is now 1-based. As an example, `substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the result would be `bcd` in SparkR 2.3.1.
+
+## Upgrading from SparkR 2.2 to 2.3
+
+ - The `stringsAsFactors` parameter was previously ignored with `collect`, for example, in `collect(createDataFrame(iris), stringsAsFactors = TRUE))`. It has been corrected.
+ - For `summary`, option for statistics to compute has been added. Its output is changed from that from `describe`.
+ - A warning can be raised if versions of SparkR package and the Spark JVM do not match.
+
+## Upgrading from SparkR 2.1 to 2.2
+
+ - A `numPartitions` parameter has been added to `createDataFrame` and `as.DataFrame`. When splitting the data, the partition position calculation has been made to match the one in Scala.
+ - The method `createExternalTable` has been deprecated to be replaced by `createTable`. Either methods can be called to create external or managed table. Additional catalog methods have also been added.
+ - By default, derby.log is now saved to `tempdir()`. This will be created when instantiating the SparkSession with `enableHiveSupport` set to `TRUE`.
+ - `spark.lda` was not setting the optimizer correctly. It has been corrected.
+ - Several model summary outputs are updated to have `coefficients` as `matrix`. This includes `spark.logit`, `spark.kmeans`, `spark.glm`. Model summary outputs for `spark.gaussianMixture` have added log-likelihood as `loglik`.
+
+## Upgrading from SparkR 2.0 to 3.1
+
+ - `join` no longer performs Cartesian Product by default, use `crossJoin` instead.
+
+
+## Upgrading from SparkR 1.6 to 2.0
+
+ - The method `table` has been removed and replaced by `tableToDF`.
+ - The class `DataFrame` has been renamed to `SparkDataFrame` to avoid name conflicts.
+ - Spark's `SQLContext` and `HiveContext` have been deprecated to be replaced by `SparkSession`. Instead of `sparkR.init()`, call `sparkR.session()` in its place to instantiate the SparkSession. Once that is done, that currently active SparkSession will be used for SparkDataFrame operations.
+ - The parameter `sparkExecutorEnv` is not supported by `sparkR.session`. To set environment for the executors, set Spark config properties with the prefix "spark.executorEnv.VAR_NAME", for example, "spark.executorEnv.PATH"
+ - The `sqlContext` parameter is no longer required for these functions: `createDataFrame`, `as.DataFrame`, `read.json`, `jsonFile`, `read.parquet`, `parquetFile`, `read.text`, `sql`, `tables`, `tableNames`, `cacheTable`, `uncacheTable`, `clearCache`, `dropTempTable`, `read.df`, `loadDF`, `createExternalTable`.
+ - The method `registerTempTable` has been deprecated to be replaced by `createOrReplaceTempView`.
+ - The method `dropTempTable` has been deprecated to be replaced by `dropTempView`.
+ - The `sc` SparkContext parameter is no longer required for these functions: `setJobGroup`, `clearJobGroup`, `cancelJobGroup`
+
+## Upgrading from SparkR 1.5 to 1.6
+
+ - Before Spark 1.6.0, the default mode for writes was `append`. It was changed in Spark 1.6.0 to `error` to match the Scala API.
+ - SparkSQL converts `NA` in R to `null` and vice-versa.
+ - Since 1.6.1, withColumn method in SparkR supports adding a new column to or replacing existing columns
+   of the same name of a DataFrame.
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 7431d025aa629..24fa3b4feac19 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -663,13 +663,20 @@ Apache Arrow is an in-memory columnar data format that is used in Spark to effic
 
 ## Ensure Arrow Installed
 
-Currently, Arrow R library is not on CRAN yet [ARROW-3204](https://issues.apache.org/jira/browse/ARROW-3204). Therefore, it should be installed directly from Github. You can use `remotes::install_github` as below.
+Arrow R library is available on CRAN as of [ARROW-3204](https://issues.apache.org/jira/browse/ARROW-3204). It can be installed as below.
+
+```bash
+Rscript -e 'install.packages("arrow", repos="https://cloud.r-project.org/")'
+```
+
+If you need to install old versions, it should be installed directly from Github. You can use `remotes::install_github` as below.
 
 ```bash
 Rscript -e 'remotes::install_github("apache/arrow@apache-arrow-0.12.1", subdir = "r")'
 ```
 
-`apache-arrow-0.12.1` is a version tag that can be checked in [Arrow at Github](https://github.com/apache/arrow/releases). You must ensure that Arrow R package is installed and available on all cluster nodes. The current supported version is 0.12.1.
+`apache-arrow-0.12.1` is a version tag that can be checked in [Arrow at Github](https://github.com/apache/arrow/releases). You must ensure that Arrow R package is installed and available on all cluster nodes.
+The current supported minimum version is 0.12.1; however, this might change between the minor releases since Arrow optimization in SparkR is experimental.
 
 ## Enabling for Conversion to/from R DataFrame, `dapply` and `gapply`
 
@@ -748,49 +755,5 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma
 
 # Migration Guide
 
-## Upgrading From SparkR 1.5.x to 1.6.x
-
- - Before Spark 1.6.0, the default mode for writes was `append`. It was changed in Spark 1.6.0 to `error` to match the Scala API.
- - SparkSQL converts `NA` in R to `null` and vice-versa.
-
-## Upgrading From SparkR 1.6.x to 2.0
-
- - The method `table` has been removed and replaced by `tableToDF`.
- - The class `DataFrame` has been renamed to `SparkDataFrame` to avoid name conflicts.
- - Spark's `SQLContext` and `HiveContext` have been deprecated to be replaced by `SparkSession`. Instead of `sparkR.init()`, call `sparkR.session()` in its place to instantiate the SparkSession. Once that is done, that currently active SparkSession will be used for SparkDataFrame operations.
- - The parameter `sparkExecutorEnv` is not supported by `sparkR.session`. To set environment for the executors, set Spark config properties with the prefix "spark.executorEnv.VAR_NAME", for example, "spark.executorEnv.PATH"
- - The `sqlContext` parameter is no longer required for these functions: `createDataFrame`, `as.DataFrame`, `read.json`, `jsonFile`, `read.parquet`, `parquetFile`, `read.text`, `sql`, `tables`, `tableNames`, `cacheTable`, `uncacheTable`, `clearCache`, `dropTempTable`, `read.df`, `loadDF`, `createExternalTable`.
- - The method `registerTempTable` has been deprecated to be replaced by `createOrReplaceTempView`.
- - The method `dropTempTable` has been deprecated to be replaced by `dropTempView`.
- - The `sc` SparkContext parameter is no longer required for these functions: `setJobGroup`, `clearJobGroup`, `cancelJobGroup`
-
-## Upgrading to SparkR 2.1.0
-
- - `join` no longer performs Cartesian Product by default, use `crossJoin` instead.
-
-## Upgrading to SparkR 2.2.0
-
- - A `numPartitions` parameter has been added to `createDataFrame` and `as.DataFrame`. When splitting the data, the partition position calculation has been made to match the one in Scala.
- - The method `createExternalTable` has been deprecated to be replaced by `createTable`. Either methods can be called to create external or managed table. Additional catalog methods have also been added.
- - By default, derby.log is now saved to `tempdir()`. This will be created when instantiating the SparkSession with `enableHiveSupport` set to `TRUE`.
- - `spark.lda` was not setting the optimizer correctly. It has been corrected.
- - Several model summary outputs are updated to have `coefficients` as `matrix`. This includes `spark.logit`, `spark.kmeans`, `spark.glm`. Model summary outputs for `spark.gaussianMixture` have added log-likelihood as `loglik`.
-
-## Upgrading to SparkR 2.3.0
-
- - The `stringsAsFactors` parameter was previously ignored with `collect`, for example, in `collect(createDataFrame(iris), stringsAsFactors = TRUE))`. It has been corrected.
- - For `summary`, option for statistics to compute has been added. Its output is changed from that from `describe`.
- - A warning can be raised if versions of SparkR package and the Spark JVM do not match.
-
-## Upgrading to SparkR 2.3.1 and above
-
- - In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was wrongly subtracted by one and considered as 0-based. This can lead to inconsistent substring results and also does not match with the behaviour with `substr` in R. In version 2.3.1 and later, it has been fixed so the `start` parameter of `substr` method is now 1-based. As an example, `substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the result would be `bcd` in SparkR 2.3.1.
-
-## Upgrading to SparkR 2.4.0
-
- - Previously, we don't check the validity of the size of the last layer in `spark.mlp`. For example, if the training data only has two labels, a `layers` param like `c(1, 3)` doesn't cause an error previously, now it does.
-
-## Upgrading to SparkR 3.0.0
+The migration guide is now archived [on this page](sparkr-migration-guide.html).
 
- - The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead.
- - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead.
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index 726db2ec09ad6..8e6a4079cd5de 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -198,9 +198,22 @@ Data source options of Avro can be set via:
   <tr>
     <td><code>avroSchema</code></td>
     <td>None</td>
-    <td>Optional Avro schema provided by a user in JSON format. The data type and naming of record fields
-    should match the Avro data type when reading from Avro or match the Spark's internal data type (e.g., StringType, IntegerType) when writing to Avro files; otherwise, the read/write action will fail.</td>
-    <td>read and write</td>
+    <td>Optional schema provided by a user in JSON format.
+      <ul>
+        <li>
+          When reading Avro, this option can be set to an evolved schema, which is compatible but different with
+          the actual Avro schema. The deserialization schema will be consistent with the evolved schema.
+          For example, if we set an evolved schema containing one additional column with a default value,
+          the reading result in Spark will contain the new column too.
+        </li>
+        <li>
+          When writing Avro, this option can be set if the expected output Avro schema doesn't match the
+          schema converted by Spark. For example, the expected schema of one column is of "enum" type,
+          instead of "string" type in the default converted schema.
+        </li>
+      </ul>
+    </td>
+    <td> read, write and function <code>from_avro</code></td>
   </tr>
   <tr>
     <td><code>recordName</code></td>
@@ -217,7 +230,7 @@ Data source options of Avro can be set via:
   <tr>
     <td><code>ignoreExtension</code></td>
     <td>true</td>
-    <td>The option controls ignoring of files without <code>.avro</code> extensions in read.<br> If the option is enabled, all files (with and without <code>.avro</code> extension) are loaded.</td>
+    <td>The option controls ignoring of files without <code>.avro</code> extensions in read.<br> If the option is enabled, all files (with and without <code>.avro</code> extension) are loaded.<br> The option has been deprecated, and it will be removed in the future releases. Please use the general data source option <a href="./sql-data-sources-generic-options.html#path-global-filter">pathGlobFilter</a> for filtering file names.</td>
     <td>read</td>
   </tr>
   <tr>
diff --git a/docs/sql-data-sources-generic-options.md b/docs/sql-data-sources-generic-options.md
new file mode 100644
index 0000000000000..6bcf48235bced
--- /dev/null
+++ b/docs/sql-data-sources-generic-options.md
@@ -0,0 +1,121 @@
+---
+layout: global
+title: Generic File Source Options
+displayTitle: Generic File Source Options
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+{:toc}
+
+These generic options/configurations are effective only when using file-based sources: parquet, orc, avro, json, csv, text.
+
+Please note that the hierarchy of directories used in examples below are:
+
+{% highlight text %}
+
+dir1/
+ ├── dir2/
+ │    └── file2.parquet (schema: <file: string>, content: "file2.parquet")
+ └── file1.parquet (schema: <file, string>, content: "file1.parquet")
+ └── file3.json (schema: <file, string>, content: "{'file':'corrupt.json'}")
+
+{% endhighlight %}
+
+### Ignore Corrupt Files
+
+Spark allows you to use `spark.sql.files.ignoreCorruptFiles` to ignore corrupt files while reading data
+from files. When set to true, the Spark jobs will continue to run when encountering corrupted files and
+the contents that have been read will still be returned.
+
+To ignore corrupt files while reading data files, you can use:
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% include_example ignore_corrupt_files scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% include_example ignore_corrupt_files java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% include_example ignore_corrupt_files python/sql/datasource.py %}
+</div>
+
+<div data-lang="r"  markdown="1">
+{% include_example ignore_corrupt_files r/RSparkSQLExample.R %}
+</div>
+</div>
+
+### Ignore Missing Files
+
+Spark allows you to use `spark.sql.files.ignoreMissingFiles` to ignore missing files while reading data
+from files. Here, missing file really means the deleted file under directory after you construct the
+`DataFrame`. When set to true, the Spark jobs will continue to run when encountering missing files and
+the contents that have been read will still be returned.
+
+### Path Global Filter
+
+`pathGlobFilter` is used to only include files with file names matching the pattern.
+The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+It does not change the behavior of partition discovery.
+
+To load files with paths matching a given glob pattern while keeping the behavior of partition discovery,
+you can use:
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% include_example load_with_path_glob_filter scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% include_example load_with_path_glob_filter java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% include_example load_with_path_glob_filter python/sql/datasource.py %}
+</div>
+
+<div data-lang="r"  markdown="1">
+{% include_example load_with_path_glob_filter r/RSparkSQLExample.R %}
+</div>
+</div>
+
+### Recursive File Lookup
+`recursiveFileLookup` is used to recursively load files and it disables partition inferring. Its default value is `false`.
+If data source explicitly specifies the `partitionSpec` when `recursiveFileLookup` is true, exception will be thrown.
+
+To load all files recursively, you can use:
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% include_example recursive_file_lookup scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% include_example recursive_file_lookup java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% include_example recursive_file_lookup python/sql/datasource.py %}
+</div>
+
+<div data-lang="r"  markdown="1">
+{% include_example recursive_file_lookup r/RSparkSQLExample.R %}
+</div>
+</div>
diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index e4ce3e938b75e..0054d466db987 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -88,17 +88,17 @@ creating table, you can create a table using storage handler at Hive side, and u
   <tr>
     <td><code>inputFormat, outputFormat</code></td>
     <td>
-      These 2 options specify the name of a corresponding `InputFormat` and `OutputFormat` class as a string literal,
-      e.g. `org.apache.hadoop.hive.ql.io.orc.OrcInputFormat`. These 2 options must be appeared in a pair, and you can not
-      specify them if you already specified the `fileFormat` option.
+      These 2 options specify the name of a corresponding <code>InputFormat</code> and <code>OutputFormat</code> class as a string literal,
+      e.g. <code>org.apache.hadoop.hive.ql.io.orc.OrcInputFormat</code>. These 2 options must be appeared in a pair, and you can not
+      specify them if you already specified the <code>fileFormat</code> option.
     </td>
   </tr>
 
   <tr>
     <td><code>serde</code></td>
     <td>
-      This option specifies the name of a serde class. When the `fileFormat` option is specified, do not specify this option
-      if the given `fileFormat` already include the information of serde. Currently "sequencefile", "textfile" and "rcfile"
+      This option specifies the name of a serde class. When the <code>fileFormat</code> option is specified, do not specify this option
+      if the given <code>fileFormat</code> already include the information of serde. Currently "sequencefile", "textfile" and "rcfile"
       don't include the serde information and you can use this option with these 3 fileFormats.
     </td>
   </tr>
@@ -119,7 +119,7 @@ One of the most important pieces of Spark SQL's Hive support is interaction with
 which enables Spark SQL to access metadata of Hive tables. Starting from Spark 1.4.0, a single binary
 build of Spark SQL can be used to query different versions of Hive metastores, using the configuration described below.
 Note that independent of the version of Hive that is being used to talk to the metastore, internally Spark SQL
-will compile against Hive 1.2.1 and use those classes for internal execution (serdes, UDFs, UDAFs, etc).
+will compile against built-in Hive and use those classes for internal execution (serdes, UDFs, UDAFs, etc).
 
 The following options can be used to configure the version of Hive that is used to retrieve metadata:
 
@@ -127,7 +127,7 @@ The following options can be used to configure the version of Hive that is used
   <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
   <tr>
     <td><code>spark.sql.hive.metastore.version</code></td>
-    <td><code>1.2.1</code></td>
+    <td><code>2.3.6</code></td>
     <td>
       Version of the Hive metastore. Available
       options are <code>0.12.0</code> through <code>2.3.6</code> and <code>3.0.0</code> through <code>3.1.2</code>.
@@ -141,9 +141,9 @@ The following options can be used to configure the version of Hive that is used
       property can be one of three options:
       <ol>
         <li><code>builtin</code></li>
-        Use Hive 1.2.1, which is bundled with the Spark assembly when <code>-Phive</code> is
+        Use Hive 2.3.6, which is bundled with the Spark assembly when <code>-Phive</code> is
         enabled. When this option is chosen, <code>spark.sql.hive.metastore.version</code> must be
-        either <code>1.2.1</code> or not defined.
+        either <code>2.3.6</code> or not defined.
         <li><code>maven</code></li>
         Use Hive jars of specified version downloaded from Maven repositories. This configuration
         is not generally recommended for production deployments.
diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index c3502cbdea8e7..3cdff42057ce1 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -23,7 +23,7 @@ license: |
 {:toc}
 
 Spark SQL also includes a data source that can read data from other databases using JDBC. This
-functionality should be preferred over using [JdbcRDD](api/scala/index.html#org.apache.spark.rdd.JdbcRDD).
+functionality should be preferred over using [JdbcRDD](api/scala/org/apache/spark/rdd/JdbcRDD.html).
 This is because the results are returned
 as a DataFrame and they can easily be processed in Spark SQL or joined with other data sources.
 The JDBC data source is also easier to use from Java or Python as it does not require the user to
@@ -60,7 +60,7 @@ the following case-insensitive options:
       The JDBC table that should be read from or written into. Note that when using it in the read
       path anything that is valid in a <code>FROM</code> clause of a SQL query can be used.
       For example, instead of a full table you could also use a subquery in parentheses. It is not
-      allowed to specify `dbtable` and `query` options at the same time.
+      allowed to specify <code>dbtable</code> and <code>query</code> options at the same time.
     </td>
   </tr>
   <tr>
@@ -72,10 +72,10 @@ the following case-insensitive options:
       <code> SELECT &lt;columns&gt; FROM (&lt;user_specified_query&gt;) spark_gen_alias</code><br><br>
       Below are a couple of restrictions while using this option.<br>
       <ol>
-         <li> It is not allowed to specify `dbtable` and `query` options at the same time. </li>
-         <li> It is not allowed to specify `query` and `partitionColumn` options at the same time. When specifying
-            `partitionColumn` option is required, the subquery can be specified using `dbtable` option instead and
-            partition columns can be qualified using the subquery alias provided as part of `dbtable`. <br>
+         <li> It is not allowed to specify <code>dbtable</code> and <code>query</code> options at the same time. </li>
+         <li> It is not allowed to specify <code>query</code> and <code>partitionColumn</code> options at the same time. When specifying
+            <code>partitionColumn</code> option is required, the subquery can be specified using <code>dbtable</code> option instead and
+            partition columns can be qualified using the subquery alias provided as part of <code>dbtable</code>. <br>
             Example:<br>
             <code>
                spark.read.format("jdbc")<br>
diff --git a/docs/sql-data-sources-json.md b/docs/sql-data-sources-json.md
index 4ce4897189846..588f6cdaa0ad5 100644
--- a/docs/sql-data-sources-json.md
+++ b/docs/sql-data-sources-json.md
@@ -93,4 +93,4 @@ SELECT * FROM jsonTable
 
 </div>
 
-</div>
\ No newline at end of file
+</div>
diff --git a/docs/sql-data-sources-load-save-functions.md b/docs/sql-data-sources-load-save-functions.md
index 07482137a28a3..a7efb9347ac64 100644
--- a/docs/sql-data-sources-load-save-functions.md
+++ b/docs/sql-data-sources-load-save-functions.md
@@ -102,27 +102,6 @@ To load a CSV file you can use:
 </div>
 </div>
 
-To load files with paths matching a given glob pattern while keeping the behavior of partition discovery,
-you can use:
-
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
-{% include_example load_with_path_glob_filter scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% include_example load_with_path_glob_filter java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
-</div>
-
-<div data-lang="python"  markdown="1">
-{% include_example load_with_path_glob_filter python/sql/datasource.py %}
-</div>
-
-<div data-lang="r"  markdown="1">
-{% include_example load_with_path_glob_filter r/RSparkSQLExample.R %}
-</div>
-</div>
-
 The extra options are also used during write operation.
 For example, you can control bloom filters and dictionary encodings for ORC data sources.
 The following ORC example will create bloom filter and use dictionary encoding only for `favorite_color`.
diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md
index 45bff17c6cf2b..bddffe02602e8 100644
--- a/docs/sql-data-sources-orc.md
+++ b/docs/sql-data-sources-orc.md
@@ -31,7 +31,7 @@ the vectorized reader is used when `spark.sql.hive.convertMetastoreOrc` is also
   <tr>
     <td><code>spark.sql.orc.impl</code></td>
     <td><code>native</code></td>
-    <td>The name of ORC implementation. It can be one of <code>native</code> and <code>hive</code>. <code>native</code> means the native ORC support that is built on Apache ORC 1.4. `hive` means the ORC library in Hive 1.2.1.</td>
+    <td>The name of ORC implementation. It can be one of <code>native</code> and <code>hive</code>. <code>native</code> means the native ORC support. <code>hive</code> means the ORC library in Hive.</td>
   </tr>
   <tr>
     <td><code>spark.sql.orc.enableVectorizedReader</code></td>
diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index b5309870f485b..53a1111cd8286 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -280,12 +280,12 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td><code>spark.sql.parquet.compression.codec</code></td>
   <td>snappy</td>
   <td>
-    Sets the compression codec used when writing Parquet files. If either `compression` or
-    `parquet.compression` is specified in the table-specific options/properties, the precedence would be
-    `compression`, `parquet.compression`, `spark.sql.parquet.compression.codec`. Acceptable values include:
+    Sets the compression codec used when writing Parquet files. If either <code>compression</code> or
+    <code>parquet.compression</code> is specified in the table-specific options/properties, the precedence would be
+    <code>compression</code>, <code>parquet.compression</code>, <code>spark.sql.parquet.compression.codec</code>. Acceptable values include:
     none, uncompressed, snappy, gzip, lzo, brotli, lz4, zstd.
-    Note that `zstd` requires `ZStandardCodec` to be installed before Hadoop 2.9.0, `brotli` requires
-    `BrotliCodec` to be installed.
+    Note that <code>zstd</code> requires <code>ZStandardCodec</code> to be installed before Hadoop 2.9.0, <code>brotli</code> requires
+    <code>BrotliCodec</code> to be installed.
   </td>
 </tr>
 <tr>
diff --git a/docs/sql-data-sources.md b/docs/sql-data-sources.md
index 079c54060d15d..9396846041709 100644
--- a/docs/sql-data-sources.md
+++ b/docs/sql-data-sources.md
@@ -33,6 +33,11 @@ goes into specific options that are available for the built-in data sources.
   * [Save Modes](sql-data-sources-load-save-functions.html#save-modes)
   * [Saving to Persistent Tables](sql-data-sources-load-save-functions.html#saving-to-persistent-tables)
   * [Bucketing, Sorting and Partitioning](sql-data-sources-load-save-functions.html#bucketing-sorting-and-partitioning)
+* [Generic File Source Options](sql-data-sources-generic-options.html)
+  * [Ignore Corrupt Files](sql-data-sources-generic-options.html#ignore-corrupt-iles)
+  * [Ignore Missing Files](sql-data-sources-generic-options.html#ignore-missing-iles)
+  * [Path Global Filter](sql-data-sources-generic-options.html#path-global-filter)
+  * [Recursive File Lookup](sql-data-sources-generic-options.html#recursive-file-lookup)
 * [Parquet Files](sql-data-sources-parquet.html)
   * [Loading Data Programmatically](sql-data-sources-parquet.html#loading-data-programmatically)
   * [Partition Discovery](sql-data-sources-parquet.html#partition-discovery)
diff --git a/docs/sql-distributed-sql-engine.md b/docs/sql-distributed-sql-engine.md
index 13be6d51a7ece..8d47a672985d3 100644
--- a/docs/sql-distributed-sql-engine.md
+++ b/docs/sql-distributed-sql-engine.md
@@ -29,7 +29,7 @@ without the need to write any code.
 ## Running the Thrift JDBC/ODBC server
 
 The Thrift JDBC/ODBC server implemented here corresponds to the [`HiveServer2`](https://cwiki.apache.org/confluence/display/Hive/Setting+Up+HiveServer2)
-in Hive 1.2.1. You can test the JDBC server with the beeline script that comes with either Spark or Hive 1.2.1.
+in built-in Hive. You can test the JDBC server with the beeline script that comes with either Spark or compatible Hive.
 
 To start the JDBC/ODBC server, run the following in the Spark directory:
 
diff --git a/docs/sql-getting-started.md b/docs/sql-getting-started.md
index 5d18c48879f93..9df0f768268d0 100644
--- a/docs/sql-getting-started.md
+++ b/docs/sql-getting-started.md
@@ -27,7 +27,7 @@ license: |
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
 
-The entry point into all functionality in Spark is the [`SparkSession`](api/scala/index.html#org.apache.spark.sql.SparkSession) class. To create a basic `SparkSession`, just use `SparkSession.builder()`:
+The entry point into all functionality in Spark is the [`SparkSession`](api/scala/org/apache/spark/sql/SparkSession.html) class. To create a basic `SparkSession`, just use `SparkSession.builder()`:
 
 {% include_example init_session scala/org/apache/spark/examples/sql/SparkSQLExample.scala %}
 </div>
@@ -104,7 +104,7 @@ As an example, the following creates a DataFrame based on the content of a JSON
 
 ## Untyped Dataset Operations (aka DataFrame Operations)
 
-DataFrames provide a domain-specific language for structured data manipulation in [Scala](api/scala/index.html#org.apache.spark.sql.Dataset), [Java](api/java/index.html?org/apache/spark/sql/Dataset.html), [Python](api/python/pyspark.sql.html#pyspark.sql.DataFrame) and [R](api/R/SparkDataFrame.html).
+DataFrames provide a domain-specific language for structured data manipulation in [Scala](api/scala/org/apache/spark/sql/Dataset.html), [Java](api/java/index.html?org/apache/spark/sql/Dataset.html), [Python](api/python/pyspark.sql.html#pyspark.sql.DataFrame) and [R](api/R/SparkDataFrame.html).
 
 As mentioned above, in Spark 2.0, DataFrames are just Dataset of `Row`s in Scala and Java API. These operations are also referred as "untyped transformations" in contrast to "typed transformations" come with strongly typed Scala/Java Datasets.
 
@@ -114,9 +114,9 @@ Here we include some basic examples of structured data processing using Datasets
 <div data-lang="scala"  markdown="1">
 {% include_example untyped_ops scala/org/apache/spark/examples/sql/SparkSQLExample.scala %}
 
-For a complete list of the types of operations that can be performed on a Dataset, refer to the [API Documentation](api/scala/index.html#org.apache.spark.sql.Dataset).
+For a complete list of the types of operations that can be performed on a Dataset, refer to the [API Documentation](api/scala/org/apache/spark/sql/Dataset.html).
 
-In addition to simple column references and expressions, Datasets also have a rich library of functions including string manipulation, date arithmetic, common math operations and more. The complete list is available in the [DataFrame Function Reference](api/scala/index.html#org.apache.spark.sql.functions$).
+In addition to simple column references and expressions, Datasets also have a rich library of functions including string manipulation, date arithmetic, common math operations and more. The complete list is available in the [DataFrame Function Reference](api/scala/org/apache/spark/sql/functions$.html).
 </div>
 
 <div data-lang="java" markdown="1">
@@ -222,7 +222,7 @@ SELECT * FROM global_temp.temp_view
 ## Creating Datasets
 
 Datasets are similar to RDDs, however, instead of using Java serialization or Kryo they use
-a specialized [Encoder](api/scala/index.html#org.apache.spark.sql.Encoder) to serialize the objects
+a specialized [Encoder](api/scala/org/apache/spark/sql/Encoder.html) to serialize the objects
 for processing or transmitting over the network. While both encoders and standard serialization are
 responsible for turning an object into bytes, encoders are code generated dynamically and use a format
 that allows Spark to perform many operations like filtering, sorting and hashing without deserializing
@@ -346,39 +346,41 @@ For example:
 
 </div>
 
+## Scalar Functions
+(to be filled soon)
+
 ## Aggregations
 
-The [built-in DataFrames functions](api/scala/index.html#org.apache.spark.sql.functions$) provide common
+The [built-in DataFrames functions](api/scala/org/apache/spark/sql/functions$.html) provide common
 aggregations such as `count()`, `countDistinct()`, `avg()`, `max()`, `min()`, etc.
 While those functions are designed for DataFrames, Spark SQL also has type-safe versions for some of them in
-[Scala](api/scala/index.html#org.apache.spark.sql.expressions.scalalang.typed$) and
+[Scala](api/scala/org/apache/spark/sql/expressions/scalalang/typed$.html) and
 [Java](api/java/org/apache/spark/sql/expressions/javalang/typed.html) to work with strongly typed Datasets.
 Moreover, users are not limited to the predefined aggregate functions and can create their own.
 
-### Untyped User-Defined Aggregate Functions
-Users have to extend the [UserDefinedAggregateFunction](api/scala/index.html#org.apache.spark.sql.expressions.UserDefinedAggregateFunction)
-abstract class to implement a custom untyped aggregate function. For example, a user-defined average
-can look like:
+### Type-Safe User-Defined Aggregate Functions
+
+User-defined aggregations for strongly typed Datasets revolve around the [Aggregator](api/scala/org/apache/spark/sql/expressions/Aggregator.html) abstract class.
+For example, a type-safe user-defined average can look like:
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
-{% include_example untyped_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala%}
+{% include_example typed_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala%}
 </div>
 <div data-lang="java"  markdown="1">
-{% include_example untyped_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java%}
+{% include_example typed_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java%}
 </div>
 </div>
 
-### Type-Safe User-Defined Aggregate Functions
-
-User-defined aggregations for strongly typed Datasets revolve around the [Aggregator](api/scala/index.html#org.apache.spark.sql.expressions.Aggregator) abstract class.
-For example, a type-safe user-defined average can look like:
+### Untyped User-Defined Aggregate Functions
+Typed aggregations, as described above, may also be registered as untyped aggregating UDFs for use with DataFrames.
+For example, a user-defined average for untyped DataFrames can look like:
 
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
-{% include_example typed_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedTypedAggregation.scala%}
+{% include_example untyped_custom_aggregation scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala%}
 </div>
 <div data-lang="java"  markdown="1">
-{% include_example typed_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java%}
+{% include_example untyped_custom_aggregation java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java%}
 </div>
 </div>
diff --git a/docs/sql-migration-guide-hive-compatibility.md b/docs/sql-migration-guide-hive-compatibility.md
deleted file mode 100644
index d4b4fdf19d926..0000000000000
--- a/docs/sql-migration-guide-hive-compatibility.md
+++ /dev/null
@@ -1,167 +0,0 @@
----
-layout: global
-title: Compatibility with Apache Hive
-displayTitle: Compatibility with Apache Hive
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
- 
-     http://www.apache.org/licenses/LICENSE-2.0
- 
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-* Table of contents
-{:toc}
-
-Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
-Currently, Hive SerDes and UDFs are based on Hive 1.2.1,
-and Spark SQL can be connected to different versions of Hive Metastore
-(from 0.12.0 to 2.3.6 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
-
-#### Deploying in Existing Hive Warehouses
-
-The Spark SQL Thrift JDBC server is designed to be "out of the box" compatible with existing Hive
-installations. You do not need to modify your existing Hive Metastore or change the data placement
-or partitioning of your tables.
-
-### Supported Hive Features
-
-Spark SQL supports the vast majority of Hive features, such as:
-
-* Hive query statements, including:
-  * `SELECT`
-  * `GROUP BY`
-  * `ORDER BY`
-  * `CLUSTER BY`
-  * `SORT BY`
-* All Hive operators, including:
-  * Relational operators (`=`, `⇔`, `==`, `<>`, `<`, `>`, `>=`, `<=`, etc)
-  * Arithmetic operators (`+`, `-`, `*`, `/`, `%`, etc)
-  * Logical operators (`AND`, `&&`, `OR`, `||`, etc)
-  * Complex type constructors
-  * Mathematical functions (`sign`, `ln`, `cos`, etc)
-  * String functions (`instr`, `length`, `printf`, etc)
-* User defined functions (UDF)
-* User defined aggregation functions (UDAF)
-* User defined serialization formats (SerDes)
-* Window functions
-* Joins
-  * `JOIN`
-  * `{LEFT|RIGHT|FULL} OUTER JOIN`
-  * `LEFT SEMI JOIN`
-  * `CROSS JOIN`
-* Unions
-* Sub-queries
-  * `SELECT col FROM ( SELECT a + b AS col from t1) t2`
-* Sampling
-* Explain
-* Partitioned tables including dynamic partition insertion
-* View
-  * If column aliases are not specified in view definition queries, both Spark and Hive will
-    generate alias names, but in different ways. In order for Spark to be able to read views created
-    by Hive, users should explicitly specify column aliases in view definition queries. As an
-    example, Spark cannot read `v1` created as below by Hive.
-
-    ```
-    CREATE VIEW v1 AS SELECT * FROM (SELECT c + 1 FROM (SELECT 1 c) t1) t2;
-    ```
-
-    Instead, you should create `v1` as below with column aliases explicitly specified.
-
-    ```
-    CREATE VIEW v1 AS SELECT * FROM (SELECT c + 1 AS inc_c FROM (SELECT 1 c) t1) t2;
-    ```
-
-* All Hive DDL Functions, including:
-  * `CREATE TABLE`
-  * `CREATE TABLE AS SELECT`
-  * `ALTER TABLE`
-* Most Hive Data types, including:
-  * `TINYINT`
-  * `SMALLINT`
-  * `INT`
-  * `BIGINT`
-  * `BOOLEAN`
-  * `FLOAT`
-  * `DOUBLE`
-  * `STRING`
-  * `BINARY`
-  * `TIMESTAMP`
-  * `DATE`
-  * `ARRAY<>`
-  * `MAP<>`
-  * `STRUCT<>`
-
-### Unsupported Hive Functionality
-
-Below is a list of Hive features that we don't support yet. Most of these features are rarely used
-in Hive deployments.
-
-**Major Hive Features**
-
-* Tables with buckets: bucket is the hash partitioning within a Hive table partition. Spark SQL
-  doesn't support buckets yet.
-
-
-**Esoteric Hive Features**
-
-* `UNION` type
-* Unique join
-* Column statistics collecting: Spark SQL does not piggyback scans to collect column statistics at
-  the moment and only supports populating the sizeInBytes field of the hive metastore.
-
-**Hive Input/Output Formats**
-
-* File format for CLI: For results showing back to the CLI, Spark SQL only supports TextOutputFormat.
-* Hadoop archive
-
-**Hive Optimizations**
-
-A handful of Hive optimizations are not yet included in Spark. Some of these (such as indexes) are
-less important due to Spark SQL's in-memory computational model. Others are slotted for future
-releases of Spark SQL.
-
-* Block-level bitmap indexes and virtual columns (used to build indexes)
-* Automatically determine the number of reducers for joins and groupbys: Currently, in Spark SQL, you
-  need to control the degree of parallelism post-shuffle using "`SET spark.sql.shuffle.partitions=[num_tasks];`".
-* Meta-data only query: For queries that can be answered by using only metadata, Spark SQL still
-  launches tasks to compute the result.
-* Skew data flag: Spark SQL does not follow the skew data flags in Hive.
-* `STREAMTABLE` hint in join: Spark SQL does not follow the `STREAMTABLE` hint.
-* Merge multiple small files for query results: if the result output contains multiple small files,
-  Hive can optionally merge the small files into fewer large files to avoid overflowing the HDFS
-  metadata. Spark SQL does not support that.
-
-**Hive UDF/UDTF/UDAF**
-
-Not all the APIs of the Hive UDF/UDTF/UDAF are supported by Spark SQL. Below are the unsupported APIs:
-
-* `getRequiredJars` and `getRequiredFiles` (`UDF` and `GenericUDF`) are functions to automatically
-  include additional resources required by this UDF.
-* `initialize(StructObjectInspector)` in `GenericUDTF` is not supported yet. Spark SQL currently uses
-  a deprecated interface `initialize(ObjectInspector[])` only.
-* `configure` (`GenericUDF`, `GenericUDTF`, and `GenericUDAFEvaluator`) is a function to initialize
-  functions with `MapredContext`, which is inapplicable to Spark.
-* `close` (`GenericUDF` and `GenericUDAFEvaluator`) is a function to release associated resources.
-  Spark SQL does not call this function when tasks finish.
-* `reset` (`GenericUDAFEvaluator`) is a function to re-initialize aggregation for reusing the same aggregation.
-  Spark SQL currently does not support the reuse of aggregation.
-* `getWindowingEvaluator` (`GenericUDAFEvaluator`) is a function to optimize aggregation by evaluating
-  an aggregate over a fixed window.
-
-### Incompatible Hive UDF
-
-Below are the scenarios in which Hive and Spark generate different results:
-
-* `SQRT(n)` If n < 0, Hive returns null, Spark SQL returns NaN.
-* `ACOS(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN.
-* `ASIN(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN.
diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
deleted file mode 100644
index cc3ef1e757756..0000000000000
--- a/docs/sql-migration-guide-upgrade.md
+++ /dev/null
@@ -1,829 +0,0 @@
----
-layout: global
-title: Spark SQL Upgrading Guide
-displayTitle: Spark SQL Upgrading Guide
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-* Table of contents
-{:toc}
-
-## Upgrading From Spark SQL 2.4 to 3.0
-  - Since Spark 3.0, configuration `spark.sql.crossJoin.enabled` become internal configuration, and is true by default, so by default spark won't raise exception on sql with implicit cross join.
-
-  - Since Spark 3.0, we reversed argument order of the trim function from `TRIM(trimStr, str)` to `TRIM(str, trimStr)` to be compatible with other databases.
-
-  - Since Spark 3.0, PySpark requires a Pandas version of 0.23.2 or higher to use Pandas related functionality, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
-
-  - Since Spark 3.0, PySpark requires a PyArrow version of 0.12.1 or higher to use PyArrow related functionality, such as `pandas_udf`, `toPandas` and `createDataFrame` with "spark.sql.execution.arrow.enabled=true", etc.
-
-  - In Spark version 2.4 and earlier, SQL queries such as `FROM <table>` or `FROM <table> UNION ALL FROM <table>` are supported by accident. In hive-style `FROM <table> SELECT <expr>`, the `SELECT` clause is not negligible. Neither Hive nor Presto support this syntax. Therefore we will treat these queries as invalid since Spark 3.0.
-
-  - Since Spark 3.0, the Dataset and DataFrame API `unionAll` is not deprecated any more. It is an alias for `union`.
-
-  - In PySpark, when creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 3.0, the builder comes to not update the configurations. This is the same behavior as Java/Scala API in 2.3 and above. If you want to update them, you need to update them prior to creating a `SparkSession`.
-
-  - In Spark version 2.4 and earlier, the parser of JSON data source treats empty strings as null for some data types such as `IntegerType`. For `FloatType` and `DoubleType`, it fails on empty strings and throws exceptions. Since Spark 3.0, we disallow empty strings and will throw exceptions for data types except for `StringType` and `BinaryType`.
-
-  - Since Spark 3.0, the `from_json` functions supports two modes - `PERMISSIVE` and `FAILFAST`. The modes can be set via the `mode` option. The default mode became `PERMISSIVE`. In previous versions, behavior of `from_json` did not conform to either `PERMISSIVE` nor `FAILFAST`, especially in processing of malformed JSON records. For example, the JSON string `{"a" 1}` with the schema `a INT` is converted to `null` by previous versions but Spark 3.0 converts it to `Row(null)`.
-
-  - The `ADD JAR` command previously returned a result set with the single value 0. It now returns an empty result set.
-
-  - In Spark version 2.4 and earlier, users can create map values with map type key via built-in function like `CreateMap`, `MapFromArrays`, etc. Since Spark 3.0, it's not allowed to create map values with map type key with these built-in functions. Users can still read map values with map type key from data source or Java/Scala collections, though they are not very useful.
-
-  - In Spark version 2.4 and earlier, `Dataset.groupByKey` results to a grouped dataset with key attribute wrongly named as "value", if the key is non-struct type, e.g. int, string, array, etc. This is counterintuitive and makes the schema of aggregation queries weird. For example, the schema of `ds.groupByKey(...).count()` is `(value, count)`. Since Spark 3.0, we name the grouping attribute to "key". The old behaviour is preserved under a newly added configuration `spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue` with a default value of `false`.
-
-  - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal to 0.0, but -0.0 and 0.0 are considered as different values when used in aggregate grouping keys, window partition keys and join keys. Since Spark 3.0, this bug is fixed. For example, `Seq(-0.0, 0.0).toDF("d").groupBy("d").count()` returns `[(0.0, 2)]` in Spark 3.0, and `[(0.0, 1), (-0.0, 1)]` in Spark 2.4 and earlier.
-
-  - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be undefined.
-
-  - In Spark version 2.4 and earlier, partition column value is converted as null if it can't be casted to corresponding user provided schema. Since 3.0, partition column value is validated with user provided schema. An exception is thrown if the validation fails. You can disable such validation by setting `spark.sql.sources.validatePartitionColumns` to `false`.
-
-  - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.setCommandRejectsSparkCoreConfs` to `false`.
-
-  - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
-
-  - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
-
-  - Refreshing a cached table would trigger a table uncache operation and then a table cache (lazily) operation. In Spark version 2.4 and earlier, the cache name and storage level are not preserved before the uncache operation. Therefore, the cache name and storage level could be changed unexpectedly. Since Spark 3.0, cache name and storage level will be first preserved for cache recreation. It helps to maintain a consistent cache behavior upon table refreshing.
-
-  - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
-
-  - In PySpark, when Arrow optimization is enabled, if Arrow version is higher than 0.11.0, Arrow can perform safe type conversion when converting Pandas.Series to Arrow array during serialization. Arrow will raise errors when detecting unsafe type conversion like overflow. Setting `spark.sql.execution.pandas.arrowSafeTypeConversion` to true can enable it. The default setting is false. PySpark's behavior for Arrow versions is illustrated in the table below:
-    <table class="table">
-        <tr>
-          <th>
-            <b>PyArrow version</b>
-          </th>
-          <th>
-            <b>Integer Overflow</b>
-          </th>
-          <th>
-            <b>Floating Point Truncation</b>
-          </th>
-        </tr>
-        <tr>
-          <td>
-            version < 0.11.0
-          </td>
-          <td>
-            Raise error
-          </td>
-          <td>
-            Silently allows
-          </td>
-        </tr>
-        <tr>
-          <td>
-            version > 0.11.0, arrowSafeTypeConversion=false
-          </td>
-          <td>
-            Silent overflow
-          </td>
-          <td>
-            Silently allows
-          </td>
-        </tr>
-        <tr>
-          <td>
-            version > 0.11.0, arrowSafeTypeConversion=true
-          </td>
-          <td>
-            Raise error
-          </td>
-          <td>
-            Raise error
-          </td>
-        </tr>
-    </table>
-
-  - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
-
-  - Since Spark 3.0, Proleptic Gregorian calendar is used in parsing, formatting, and converting dates and timestamps as well as in extracting sub-components like years, days and etc. Spark 3.0 uses Java 8 API classes from the java.time packages that based on ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html). In Spark version 2.4 and earlier, those operations are performed by using the hybrid calendar (Julian + Gregorian, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html). The changes impact on the results for dates before October 15, 1582 (Gregorian) and affect on the following Spark 3.0 API:
-
-    - CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpose with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`.
-
-    - The `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`.
-
-    - The `weekofyear`, `weekday`, `dayofweek`, `date_trunc`, `from_utc_timestamp`, `to_utc_timestamp`, and `unix_timestamp` functions use java.time API for calculation week number of year, day number of week as well for conversion from/to TimestampType values in UTC time zone.
-
-    - the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
-
-    - Formatting of `TIMESTAMP` and `DATE` literals.
-
-  - In Spark version 2.4 and earlier, invalid time zone ids are silently ignored and replaced by GMT time zone, for example, in the from_utc_timestamp function. Since Spark 3.0, such time zone ids are rejected, and Spark throws `java.time.DateTimeException`.
-
-  - In Spark version 2.4 and earlier, the `current_timestamp` function returns a timestamp with millisecond resolution only. Since Spark 3.0, the function can return the result with microsecond resolution if the underlying clock available on the system offers such resolution.
-
-  - In Spark version 2.4 and earlier, when reading a Hive Serde table with Spark native data sources(parquet/orc), Spark will infer the actual file schema and update the table schema in metastore. Since Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, please set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.
-
-  - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion uses the default time zone of the Java virtual machine.
-
-  - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`. The old behavior can be restored by setting `spark.sql.legacy.sessionInitWithConfigDefaults` to `true`.
-
-  - Since Spark 3.0, parquet logical type `TIMESTAMP_MICROS` is used by default while saving `TIMESTAMP` columns. In Spark version 2.4 and earlier, `TIMESTAMP` columns are saved as `INT96` in parquet files. To set `INT96` to `spark.sql.parquet.outputTimestampType` restores the previous behavior.
-
-  - Since Spark 3.0, if `hive.default.fileformat` is not found in `Spark SQL configuration` then it will fallback to hive-site.xml present in the `Hadoop configuration` of `SparkContext`.
-
-  - Since Spark 3.0, Spark will cast `String` to `Date/TimeStamp` in binary comparisons with dates/timestamps. The previous behaviour of casting `Date/Timestamp` to `String` can be restored by setting `spark.sql.legacy.typeCoercion.datetimeToString` to `true`.
-
-  - Since Spark 3.0, when Avro files are written with user provided schema, the fields will be matched by field names between catalyst schema and avro schema instead of positions.
-
-  - Since Spark 3.0, when Avro files are written with user provided non-nullable schema, even the catalyst schema is nullable, Spark is still able to write the files. However, Spark will throw runtime NPE if any of the records contains null.
-
-  - Since Spark 3.0, we use a new protocol for fetching shuffle blocks, for external shuffle service users, we need to upgrade the server correspondingly. Otherwise, we'll get the error message `UnsupportedOperationException: Unexpected message: FetchShuffleBlocks`. If it is hard to upgrade the shuffle service right now, you can still use the old protocol by setting `spark.shuffle.useOldFetchProtocol` to `true`.
-
-  - Since Spark 3.0, a higher-order function `exists` follows the three-valued boolean logic, i.e., if the `predicate` returns any `null`s and no `true` is obtained, then `exists` will return `null` instead of `false`. For example, `exists(array(1, null, 3), x -> x % 2 == 0)` will be `null`. The previous behaviour can be restored by setting `spark.sql.legacy.arrayExistsFollowsThreeValuedLogic` to `false`.
-
-  - Since Spark 3.0, if files or subdirectories disappear during recursive directory listing (i.e. they appear in an intermediate listing but then cannot be read or listed during later phases of the recursive directory listing, due to either concurrent file deletions or object store consistency issues) then the listing will fail with an exception unless `spark.sql.files.ignoreMissingFiles` is `true` (default `false`). In previous versions, these missing files or subdirectories would be ignored. Note that this change of behavior only applies during initial table file listing (or during `REFRESH TABLE`), not during query execution: the net change is that `spark.sql.files.ignoreMissingFiles` is now obeyed during table file listing / query planning, not only at query execution time.
-
-  - Since Spark 3.0, `createDataFrame(..., verifySchema=True)` validates `LongType` as well in PySpark. Previously, `LongType` was not verified and resulted in `None` in case the value overflows. To restore this behavior, `verifySchema` can be set to `False` to disable the validation.
-
-  - Since Spark 3.0, substitution order of nested WITH clauses is changed and an inner CTE definition takes precedence over an outer. In version 2.4 and earlier, `WITH t AS (SELECT 1), t2 AS (WITH t AS (SELECT 2) SELECT * FROM t) SELECT * FROM t2` returns `1` while in version 3.0 it returns `2`. The previous behaviour can be restored by setting `spark.sql.legacy.ctePrecedence.enabled` to `true`.
-
-  - Since Spark 3.0, the `add_months` function does not adjust the resulting date to a last day of month if the original date is a last day of months. For example, `select add_months(DATE'2019-02-28', 1)` results `2019-03-28`. In Spark version 2.4 and earlier, the resulting date is adjusted when the original date is a last day of months. For example, adding a month to `2019-02-28` results in `2019-03-31`.
-
-  - Since Spark 3.0, 0-argument Java UDF is executed in the executor side identically with other UDFs. In Spark version 2.4 and earlier, 0-argument Java UDF alone was executed in the driver side, and the result was propagated to executors, which might be more performant in some cases but caused inconsistency with a correctness issue in some cases.
-
-  - The result of `java.lang.Math`'s `log`, `log1p`, `exp`, `expm1`, and `pow` may vary across platforms. In Spark 3.0, the result of the equivalent SQL functions (including related SQL functions like `LOG10`) return values consistent with `java.lang.StrictMath`. In virtually all cases this makes no difference in the return value, and the difference is very small, but may not exactly match `java.lang.Math` on x86 platforms in cases like, for example, `log(3.0)`, whose value varies between `Math.log()` and `StrictMath.log()`.
-
-  - Since Spark 3.0, Dataset query fails if it contains ambiguous column reference that is caused by self join. A typical example: `val df1 = ...; val df2 = df1.filter(...);`, then `df1.join(df2, df1("a") > df2("a"))` returns an empty result which is quite confusing. This is because Spark cannot resolve Dataset column references that point to tables being self joined, and `df1("a")` is exactly the same as `df2("a")` in Spark. To restore the behavior before Spark 3.0, you can set `spark.sql.analyzer.failAmbiguousSelfJoin` to `false`.
-
-  - Since Spark 3.0, `Cast` function processes string literals such as 'Infinity', '+Infinity', '-Infinity', 'NaN', 'Inf', '+Inf', '-Inf' in case insensitive manner when casting the literals to `Double` or `Float` type to ensure greater compatibility with other database systems. This behaviour change is illustrated in the table below:
-    <table class="table">
-        <tr>
-          <th>
-            <b>Operation</b>
-          </th>
-          <th>
-            <b>Result prior to Spark 3.0</b>
-          </th>
-          <th>
-            <b>Result starting Spark 3.0</b>
-          </th>
-        </tr>
-        <tr>
-          <td>
-            CAST('infinity' AS DOUBLE)<br>
-            CAST('+infinity' AS DOUBLE)<br>
-            CAST('inf' AS DOUBLE)<br>
-            CAST('+inf' AS DOUBLE)<br>
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Double.PositiveInfinity
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('-infinity' AS DOUBLE)<br>
-            CAST('-inf' AS DOUBLE)<br>
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Double.NegativeInfinity
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('infinity' AS FLOAT)<br>
-            CAST('+infinity' AS FLOAT)<br>
-            CAST('inf' AS FLOAT)<br>
-            CAST('+inf' AS FLOAT)<br>
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Float.PositiveInfinity
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('-infinity' AS FLOAT)<br>
-            CAST('-inf' AS FLOAT)<br>
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Float.NegativeInfinity
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('nan' AS DOUBLE)
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Double.NaN
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('nan' AS FLOAT)
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Float.NaN
-          </td>
-        </tr>
-    </table>
-
-## Upgrading from Spark SQL 2.4 to 2.4.1
-
-  - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
-    inconsistently interpreted as both seconds and milliseconds in Spark 2.4.0 in different parts of the code.
-    Unitless values are now consistently interpreted as milliseconds. Applications that set values like "30"
-    need to specify a value with units like "30s" now, to avoid being interpreted as milliseconds; otherwise,
-    the extremely short interval that results will likely cause applications to fail.
-
-  - When turning a Dataset to another Dataset, Spark will up cast the fields in the original Dataset to the type of corresponding fields in the target DataSet. In version 2.4 and earlier, this up cast is not very strict, e.g. `Seq("str").toDS.as[Int]` fails, but `Seq("str").toDS.as[Boolean]` works and throw NPE during execution. In Spark 3.0, the up cast is stricter and turning String into something else is not allowed, i.e. `Seq("str").toDS.as[Boolean]` will fail during analysis.
-
-## Upgrading From Spark SQL 2.3 to 2.4
-
-  - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
-    <table class="table">
-        <tr>
-          <th>
-            <b>Query</b>
-          </th>
-          <th>
-            <b>Spark 2.3 or Prior</b>
-          </th>
-          <th>
-            <b>Spark 2.4</b>
-          </th>
-          <th>
-            <b>Remarks</b>
-          </th>
-        </tr>
-        <tr>
-          <td>
-            <code>SELECT array_contains(array(1), 1.34D);</code>
-          </td>
-          <td>
-            <code>true</code>
-          </td>
-          <td>
-            <code>false</code>
-          </td>
-          <td>
-            In Spark 2.4, left and right parameters are promoted to array type of double type and double type respectively.
-          </td>
-        </tr>
-        <tr>
-          <td>
-            <code>SELECT array_contains(array(1), '1');</code>
-          </td>
-          <td>
-            <code>true</code>
-          </td>
-          <td>
-            <code>AnalysisException</code> is thrown.
-          </td>
-          <td>
-            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
-          </td>
-        </tr>
-        <tr>
-          <td>
-            <code>SELECT array_contains(array(1), 'anystring');</code>
-          </td>
-          <td>
-            <code>null</code>
-          </td>
-          <td>
-            <code>AnalysisException</code> is thrown.
-          </td>
-          <td>
-            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
-          </td>
-        </tr>
-    </table>
-
-  - Since Spark 2.4, when there is a struct field in front of the IN operator before a subquery, the inner query must contain a struct field as well. In previous versions, instead, the fields of the struct were compared to the output of the inner query. Eg. if `a` is a `struct(a string, b int)`, in Spark 2.4 `a in (select (1 as a, 'a' as b) from range(1))` is a valid query, while `a in (select 1, 'a' from range(1))` is not. In previous version it was the opposite.
-
-  - In versions 2.2.1+ and 2.3, if `spark.sql.caseSensitive` is set to true, then the `CURRENT_DATE` and `CURRENT_TIMESTAMP` functions incorrectly became case-sensitive and would resolve to columns (unless typed in lower case). In Spark 2.4 this has been fixed and the functions are no longer case-sensitive.
-
-  - Since Spark 2.4, Spark will evaluate the set operations referenced in a query by following a precedence rule as per the SQL standard. If the order is not specified by parentheses, set operations are performed from left to right with the exception that all INTERSECT operations are performed before any UNION, EXCEPT or MINUS operations. The old behaviour of giving equal precedence to all the set operations are preserved under a newly added configuration `spark.sql.legacy.setopsPrecedence.enabled` with a default value of `false`. When this property is set to `true`, spark will evaluate the set operators from left to right as they appear in the query given no explicit ordering is enforced by usage of parenthesis.
-
-  - Since Spark 2.4, Spark will display table description column Last Access value as UNKNOWN when the value was Jan 01 1970.
-
-  - Since Spark 2.4, Spark maximizes the usage of a vectorized ORC reader for ORC files by default. To do that, `spark.sql.orc.impl` and `spark.sql.orc.filterPushdown` change their default values to `native` and `true` respectively. ORC files created by native ORC writer cannot be read by some old Apache Hive releases. Use `spark.sql.orc.impl=hive` to create the files shared with Hive 2.1.1 and older.
-
-  - In PySpark, when Arrow optimization is enabled, previously `toPandas` just failed when Arrow optimization is unable to be used whereas `createDataFrame` from Pandas DataFrame allowed the fallback to non-optimization. Now, both `toPandas` and `createDataFrame` from Pandas DataFrame allow the fallback by default, which can be switched off by `spark.sql.execution.arrow.fallback.enabled`.
-
-  - Since Spark 2.4, writing an empty dataframe to a directory launches at least one write task, even if physically the dataframe has no partition. This introduces a small behavior change that for self-describing file formats like Parquet and Orc, Spark creates a metadata-only file in the target directory when writing a 0-partition dataframe, so that schema inference can still work if users read that directory later. The new behavior is more reasonable and more consistent regarding writing empty dataframe.
-
-  - Since Spark 2.4, expression IDs in UDF arguments do not appear in column names. For example, a column name in Spark 2.4 is not `UDF:f(col0 AS colA#28)` but ``UDF:f(col0 AS `colA`)``.
-
-  - Since Spark 2.4, writing a dataframe with an empty or nested empty schema using any file formats (parquet, orc, json, text, csv etc.) is not allowed. An exception is thrown when attempting to write dataframes with empty schema.
-
-  - Since Spark 2.4, Spark compares a DATE type with a TIMESTAMP type after promotes both sides to TIMESTAMP. To set `false` to `spark.sql.legacy.compareDateTimestampInTimestamp` restores the previous behavior. This option will be removed in Spark 3.0.
-
-  - Since Spark 2.4, creating a managed table with nonempty location is not allowed. An exception is thrown when attempting to create a managed table with nonempty location. To set `true` to `spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation` restores the previous behavior. This option will be removed in Spark 3.0.
-
-  - Since Spark 2.4, renaming a managed table to existing location is not allowed. An exception is thrown when attempting to rename a managed table to existing location.
-
-  - Since Spark 2.4, the type coercion rules can automatically promote the argument types of the variadic SQL functions (e.g., IN/COALESCE) to the widest common type, no matter how the input arguments order. In prior Spark versions, the promotion could fail in some specific orders (e.g., TimestampType, IntegerType and StringType) and throw an exception.
-
-  - Since Spark 2.4, Spark has enabled non-cascading SQL cache invalidation in addition to the traditional cache invalidation mechanism. The non-cascading cache invalidation mechanism allows users to remove a cache without impacting its dependent caches. This new cache invalidation mechanism is used in scenarios where the data of the cache to be removed is still valid, e.g., calling unpersist() on a Dataset, or dropping a temporary view. This allows users to free up memory and keep the desired caches valid at the same time.
-
-  - In version 2.3 and earlier, Spark converts Parquet Hive tables by default but ignores table properties like `TBLPROPERTIES (parquet.compression 'NONE')`. This happens for ORC Hive table properties like `TBLPROPERTIES (orc.compress 'NONE')` in case of `spark.sql.hive.convertMetastoreOrc=true`, too. Since Spark 2.4, Spark respects Parquet/ORC specific table properties while converting Parquet/ORC Hive tables. As an example, `CREATE TABLE t(id int) STORED AS PARQUET TBLPROPERTIES (parquet.compression 'NONE')` would generate Snappy parquet files during insertion in Spark 2.3, and in Spark 2.4, the result would be uncompressed parquet files.
-
-  - Since Spark 2.0, Spark converts Parquet Hive tables by default for better performance. Since Spark 2.4, Spark converts ORC Hive tables by default, too. It means Spark uses its own ORC support by default instead of Hive SerDe. As an example, `CREATE TABLE t(id int) STORED AS ORC` would be handled with Hive SerDe in Spark 2.3, and in Spark 2.4, it would be converted into Spark's ORC data source table and ORC vectorization would be applied. To set `false` to `spark.sql.hive.convertMetastoreOrc` restores the previous behavior.
-
-  - In version 2.3 and earlier, CSV rows are considered as malformed if at least one column value in the row is malformed. CSV parser dropped such rows in the DROPMALFORMED mode or outputs an error in the FAILFAST mode. Since Spark 2.4, CSV row is considered as malformed only when it contains malformed column values requested from CSV datasource, other values can be ignored. As an example, CSV file contains the "id,name" header and one row "1234". In Spark 2.4, selection of the id column consists of a row with one column value 1234 but in Spark 2.3 and earlier it is empty in the DROPMALFORMED mode. To restore the previous behavior, set `spark.sql.csv.parser.columnPruning.enabled` to `false`.
-
-  - Since Spark 2.4, File listing for compute statistics is done in parallel by default. This can be disabled by setting `spark.sql.statistics.parallelFileListingInStatsComputation.enabled` to `False`.
-
-  - Since Spark 2.4, Metadata files (e.g. Parquet summary files) and temporary files are not counted as data files when calculating table size during Statistics computation.
-
-  - Since Spark 2.4, empty strings are saved as quoted empty strings `""`. In version 2.3 and earlier, empty strings are equal to `null` values and do not reflect to any characters in saved CSV files. For example, the row of `"a", null, "", 1` was written as `a,,,1`. Since Spark 2.4, the same row is saved as `a,,"",1`. To restore the previous behavior, set the CSV option `emptyValue` to empty (not quoted) string.
-
-  - Since Spark 2.4, The LOAD DATA command supports wildcard `?` and `*`, which match any one character, and zero or more characters, respectively. Example: `LOAD DATA INPATH '/tmp/folder*/'` or `LOAD DATA INPATH '/tmp/part-?'`. Special Characters like `space` also now work in paths. Example: `LOAD DATA INPATH '/tmp/folder name/'`.
-
-  - In Spark version 2.3 and earlier, HAVING without GROUP BY is treated as WHERE. This means, `SELECT 1 FROM range(10) HAVING true` is executed as `SELECT 1 FROM range(10) WHERE true`  and returns 10 rows. This violates SQL standard, and has been fixed in Spark 2.4. Since Spark 2.4, HAVING without GROUP BY is treated as a global aggregate, which means `SELECT 1 FROM range(10) HAVING true` will return only one row. To restore the previous behavior, set `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` to `true`.
-
-  - In version 2.3 and earlier, when reading from a Parquet data source table, Spark always returns null for any column whose column names in Hive metastore schema and Parquet schema are in different letter cases, no matter whether `spark.sql.caseSensitive` is set to `true` or `false`. Since 2.4, when `spark.sql.caseSensitive` is set to `false`, Spark does case insensitive column name resolution between Hive metastore schema and Parquet schema, so even column names are in different letter cases, Spark returns corresponding column values. An exception is thrown if there is ambiguity, i.e. more than one Parquet column is matched. This change also applies to Parquet Hive tables when `spark.sql.hive.convertMetastoreParquet` is set to `true`.
-
-## Upgrading From Spark SQL 2.3.0 to 2.3.1 and above
-
-  - As of version 2.3.1 Arrow functionality, including `pandas_udf` and `toPandas()`/`createDataFrame()` with `spark.sql.execution.arrow.enabled` set to `True`, has been marked as experimental. These are still evolving and not currently recommended for use in production.
-
-## Upgrading From Spark SQL 2.2 to 2.3
-
-  - Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the referenced columns only include the internal corrupt record column (named `_corrupt_record` by default). For example, `spark.read.schema(schema).json(file).filter($"_corrupt_record".isNotNull).count()` and `spark.read.schema(schema).json(file).select("_corrupt_record").show()`. Instead, you can cache or save the parsed results and then send the same query. For example, `val df = spark.read.schema(schema).json(file).cache()` and then `df.filter($"_corrupt_record".isNotNull).count()`.
-
-  - The `percentile_approx` function previously accepted numeric type input and output double type results. Now it supports date type, timestamp type and numeric types as input types. The result type is also changed to be the same as the input type, which is more reasonable for percentiles.
-
-  - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown.
-
-  - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below:
-    <table class="table">
-      <tr>
-        <th>
-          <b>InputA \ InputB</b>
-        </th>
-        <th>
-          <b>NullType</b>
-        </th>
-        <th>
-          <b>IntegerType</b>
-        </th>
-        <th>
-          <b>LongType</b>
-        </th>
-        <th>
-          <b>DecimalType(38,0)*</b>
-        </th>
-        <th>
-          <b>DoubleType</b>
-        </th>
-        <th>
-          <b>DateType</b>
-        </th>
-        <th>
-          <b>TimestampType</b>
-        </th>
-        <th>
-          <b>StringType</b>
-        </th>
-      </tr>
-      <tr>
-        <td>
-          <b>NullType</b>
-        </td>
-        <td>NullType</td>
-        <td>IntegerType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>DoubleType</td>
-        <td>DateType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>IntegerType</b>
-        </td>
-        <td>IntegerType</td>
-        <td>IntegerType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>LongType</b>
-        </td>
-        <td>LongType</td>
-        <td>LongType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DecimalType(38,0)*</b>
-        </td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DoubleType</b>
-        </td>
-        <td>DoubleType</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DateType</b>
-        </td>
-        <td>DateType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>DateType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>TimestampType</b>
-        </td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>TimestampType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>StringType</b>
-        </td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-    </table>
-
-    Note that, for <b>DecimalType(38,0)*</b>, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type.
-
-  - In PySpark, now we need Pandas 0.19.2 or upper if you want to use Pandas related functionalities, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
-
-  - In PySpark, the behavior of timestamp values for Pandas related functionalities was changed to respect session timezone. If you want to use the old behavior, you need to set a configuration `spark.sql.execution.pandas.respectSessionTimeZone` to `False`. See [SPARK-22395](https://issues.apache.org/jira/browse/SPARK-22395) for details.
-
-  - In PySpark, `na.fill()` or `fillna` also accepts boolean and replaces nulls with booleans. In prior Spark versions, PySpark just ignores it and returns the original Dataset/DataFrame.
-
-  - Since Spark 2.3, when either broadcast hash join or broadcast nested loop join is applicable, we prefer to broadcasting the table that is explicitly specified in a broadcast hint. For details, see the section [Broadcast Hint](sql-performance-tuning.html#broadcast-hint-for-sql-queries) and [SPARK-22489](https://issues.apache.org/jira/browse/SPARK-22489).
-
-  - Since Spark 2.3, when all inputs are binary, `functions.concat()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.concatBinaryAsString` to `true`.
-
-  - Since Spark 2.3, when all inputs are binary, SQL `elt()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.eltOutputAsString` to `true`.
-
- - Since Spark 2.3, by default arithmetic operations between decimals return a rounded value if an exact representation is not possible (instead of returning NULL). This is compliant with SQL ANSI 2011 specification and Hive's new behavior introduced in Hive 2.2 (HIVE-15331). This involves the following changes
-
-    - The rules to determine the result type of an arithmetic operation have been updated. In particular, if the precision / scale needed are out of the range of available values, the scale is reduced up to 6, in order to prevent the truncation of the integer part of the decimals. All the arithmetic operations are affected by the change, ie. addition (`+`), subtraction (`-`), multiplication (`*`), division (`/`), remainder (`%`) and positive module (`pmod`).
-
-    - Literal values used in SQL operations are converted to DECIMAL with the exact precision and scale needed by them.
-
-    - The configuration `spark.sql.decimalOperations.allowPrecisionLoss` has been introduced. It defaults to `true`, which means the new behavior described here; if set to `false`, Spark uses previous rules, ie. it doesn't adjust the needed scale to represent the values and it returns NULL if an exact representation of the value is not possible.
-
-  - In PySpark, `df.replace` does not allow to omit `value` when `to_replace` is not a dictionary. Previously, `value` could be omitted in the other cases and had `None` by default, which is counterintuitive and error-prone.
-
-  - Un-aliased subquery's semantic has not been well defined with confusing behaviors. Since Spark 2.3, we invalidate such confusing cases, for example: `SELECT v.i from (SELECT i FROM v)`, Spark will throw an analysis exception in this case because users should not be able to use the qualifier inside a subquery. See [SPARK-20690](https://issues.apache.org/jira/browse/SPARK-20690) and [SPARK-21335](https://issues.apache.org/jira/browse/SPARK-21335) for more details.
-
-  - When creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 2.3, the builder comes to not update the configurations. If you want to update them, you need to update them prior to creating a `SparkSession`.
-
-## Upgrading From Spark SQL 2.1 to 2.2
-
-  - Spark 2.1.1 introduced a new configuration key: `spark.sql.hive.caseSensitiveInferenceMode`. It had a default setting of `NEVER_INFER`, which kept behavior identical to 2.1.0. However, Spark 2.2.0 changes this setting's default value to `INFER_AND_SAVE` to restore compatibility with reading Hive metastore tables whose underlying file schema have mixed-case column names. With the `INFER_AND_SAVE` configuration value, on first access Spark will perform schema inference on any Hive metastore table for which it has not already saved an inferred schema. Note that schema inference can be a very time-consuming operation for tables with thousands of partitions. If compatibility with mixed-case column names is not a concern, you can safely set `spark.sql.hive.caseSensitiveInferenceMode` to `NEVER_INFER` to avoid the initial overhead of schema inference. Note that with the new default `INFER_AND_SAVE` setting, the results of the schema inference are saved as a metastore key for future use. Therefore, the initial schema inference occurs only at a table's first access.
-
-  - Since Spark 2.2.1 and 2.3.0, the schema is always inferred at runtime when the data source tables have the columns that exist in both partition schema and data schema. The inferred schema does not have the partitioned columns. When reading the table, Spark respects the partition values of these overlapping columns instead of the values stored in the data source files. In 2.2.0 and 2.1.x release, the inferred schema is partitioned but the data of the table is invisible to users (i.e., the result set is empty).
-
-  - Since Spark 2.2, view definitions are stored in a different way from prior versions. This may cause Spark unable to read views created by prior versions. In such cases, you need to recreate the views using `ALTER VIEW AS` or `CREATE OR REPLACE VIEW AS` with newer Spark versions.
-
-## Upgrading From Spark SQL 2.0 to 2.1
-
- - Datasource tables now store partition metadata in the Hive metastore. This means that Hive DDLs such as `ALTER TABLE PARTITION ... SET LOCATION` are now available for tables created with the Datasource API.
-
-    - Legacy datasource tables can be migrated to this format via the `MSCK REPAIR TABLE` command. Migrating legacy tables is recommended to take advantage of Hive DDL support and improved planning performance.
-
-    - To determine if a table has been migrated, look for the `PartitionProvider: Catalog` attribute when issuing `DESCRIBE FORMATTED` on the table.
- - Changes to `INSERT OVERWRITE TABLE ... PARTITION ...` behavior for Datasource tables.
-
-    - In prior Spark versions `INSERT OVERWRITE` overwrote the entire Datasource table, even when given a partition specification. Now only partitions matching the specification are overwritten.
-
-    - Note that this still differs from the behavior of Hive tables, which is to overwrite only partitions overlapping with newly inserted data.
-
-## Upgrading From Spark SQL 1.6 to 2.0
-
- - `SparkSession` is now the new entry point of Spark that replaces the old `SQLContext` and
-
-   `HiveContext`. Note that the old SQLContext and HiveContext are kept for backward compatibility. A new `catalog` interface is accessible from `SparkSession` - existing API on databases and tables access such as `listTables`, `createExternalTable`, `dropTempView`, `cacheTable` are moved here.
-
- - Dataset API and DataFrame API are unified. In Scala, `DataFrame` becomes a type alias for
-   `Dataset[Row]`, while Java API users must replace `DataFrame` with `Dataset<Row>`. Both the typed
-   transformations (e.g., `map`, `filter`, and `groupByKey`) and untyped transformations (e.g.,
-   `select` and `groupBy`) are available on the Dataset class. Since compile-time type-safety in
-   Python and R is not a language feature, the concept of Dataset does not apply to these languages’
-   APIs. Instead, `DataFrame` remains the primary programming abstraction, which is analogous to the
-   single-node data frame notion in these languages.
-
- - Dataset and DataFrame API `unionAll` has been deprecated and replaced by `union`
-
- - Dataset and DataFrame API `explode` has been deprecated, alternatively, use `functions.explode()` with `select` or `flatMap`
-
- - Dataset and DataFrame API `registerTempTable` has been deprecated and replaced by `createOrReplaceTempView`
-
- - Changes to `CREATE TABLE ... LOCATION` behavior for Hive tables.
-
-    - From Spark 2.0, `CREATE TABLE ... LOCATION` is equivalent to `CREATE EXTERNAL TABLE ... LOCATION`
-      in order to prevent accidental dropping the existing data in the user-provided locations.
-      That means, a Hive table created in Spark SQL with the user-specified location is always a Hive external table.
-      Dropping external tables will not remove the data. Users are not allowed to specify the location for Hive managed tables.
-      Note that this is different from the Hive behavior.
-
-    - As a result, `DROP TABLE` statements on those tables will not remove the data.
-
- - `spark.sql.parquet.cacheMetadata` is no longer used.
-   See [SPARK-13664](https://issues.apache.org/jira/browse/SPARK-13664) for details.
-
-## Upgrading From Spark SQL 1.5 to 1.6
-
- - From Spark 1.6, by default, the Thrift server runs in multi-session mode. Which means each JDBC/ODBC
-   connection owns a copy of their own SQL configuration and temporary function registry. Cached
-   tables are still shared though. If you prefer to run the Thrift server in the old single-session
-   mode, please set option `spark.sql.hive.thriftServer.singleSession` to `true`. You may either add
-   this option to `spark-defaults.conf`, or pass it to `start-thriftserver.sh` via `--conf`:
-
-   {% highlight bash %}
-   ./sbin/start-thriftserver.sh \
-     --conf spark.sql.hive.thriftServer.singleSession=true \
-     ...
-   {% endhighlight %}
-
- - Since 1.6.1, withColumn method in sparkR supports adding a new column to or replacing existing columns
-   of the same name of a DataFrame.
-
- - From Spark 1.6, LongType casts to TimestampType expect seconds instead of microseconds. This
-   change was made to match the behavior of Hive 1.2 for more consistent type casting to TimestampType
-   from numeric types. See [SPARK-11724](https://issues.apache.org/jira/browse/SPARK-11724) for
-   details.
-
-## Upgrading From Spark SQL 1.4 to 1.5
-
- - Optimized execution using manually managed memory (Tungsten) is now enabled by default, along with
-   code generation for expression evaluation. These features can both be disabled by setting
-   `spark.sql.tungsten.enabled` to `false`.
-
- - Parquet schema merging is no longer enabled by default. It can be re-enabled by setting
-   `spark.sql.parquet.mergeSchema` to `true`.
-
- - Resolution of strings to columns in python now supports using dots (`.`) to qualify the column or
-   access nested values. For example `df['table.column.nestedField']`. However, this means that if
-   your column name contains any dots you must now escape them using backticks (e.g., ``table.`column.with.dots`.nested``).
-
- - In-memory columnar storage partition pruning is on by default. It can be disabled by setting
-   `spark.sql.inMemoryColumnarStorage.partitionPruning` to `false`.
-
- - Unlimited precision decimal columns are no longer supported, instead Spark SQL enforces a maximum
-   precision of 38. When inferring schema from `BigDecimal` objects, a precision of (38, 18) is now
-   used. When no precision is specified in DDL then the default remains `Decimal(10, 0)`.
-
- - Timestamps are now stored at a precision of 1us, rather than 1ns
-
- - In the `sql` dialect, floating point numbers are now parsed as decimal. HiveQL parsing remains
-   unchanged.
-
- - The canonical name of SQL/DataFrame functions are now lower case (e.g., sum vs SUM).
-
- - JSON data source will not automatically load new files that are created by other applications
-   (i.e. files that are not inserted to the dataset through Spark SQL).
-   For a JSON persistent table (i.e. the metadata of the table is stored in Hive Metastore),
-   users can use `REFRESH TABLE` SQL command or `HiveContext`'s `refreshTable` method
-   to include those new files to the table. For a DataFrame representing a JSON dataset, users need to recreate
-   the DataFrame and the new DataFrame will include new files.
-
- - DataFrame.withColumn method in pySpark supports adding a new column or replacing existing columns of the same name.
-
-## Upgrading from Spark SQL 1.3 to 1.4
-
-#### DataFrame data reader/writer interface
-
-Based on user feedback, we created a new, more fluid API for reading data in (`SQLContext.read`)
-and writing data out (`DataFrame.write`),
-and deprecated the old APIs (e.g., `SQLContext.parquetFile`, `SQLContext.jsonFile`).
-
-See the API docs for `SQLContext.read` (
-  <a href="api/scala/index.html#org.apache.spark.sql.SQLContext@read:DataFrameReader">Scala</a>,
-  <a href="api/java/org/apache/spark/sql/SQLContext.html#read()">Java</a>,
-  <a href="api/python/pyspark.sql.html#pyspark.sql.SQLContext.read">Python</a>
-) and `DataFrame.write` (
-  <a href="api/scala/index.html#org.apache.spark.sql.DataFrame@write:DataFrameWriter">Scala</a>,
-  <a href="api/java/org/apache/spark/sql/Dataset.html#write()">Java</a>,
-  <a href="api/python/pyspark.sql.html#pyspark.sql.DataFrame.write">Python</a>
-) more information.
-
-
-#### DataFrame.groupBy retains grouping columns
-
-Based on user feedback, we changed the default behavior of `DataFrame.groupBy().agg()` to retain the
-grouping columns in the resulting `DataFrame`. To keep the behavior in 1.3, set `spark.sql.retainGroupColumns` to `false`.
-
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
-{% highlight scala %}
-
-// In 1.3.x, in order for the grouping column "department" to show up,
-// it must be included explicitly as part of the agg function call.
-df.groupBy("department").agg($"department", max("age"), sum("expense"))
-
-// In 1.4+, grouping column "department" is included automatically.
-df.groupBy("department").agg(max("age"), sum("expense"))
-
-// Revert to 1.3 behavior (not retaining grouping column) by:
-sqlContext.setConf("spark.sql.retainGroupColumns", "false")
-
-{% endhighlight %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% highlight java %}
-
-// In 1.3.x, in order for the grouping column "department" to show up,
-// it must be included explicitly as part of the agg function call.
-df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
-
-// In 1.4+, grouping column "department" is included automatically.
-df.groupBy("department").agg(max("age"), sum("expense"));
-
-// Revert to 1.3 behavior (not retaining grouping column) by:
-sqlContext.setConf("spark.sql.retainGroupColumns", "false");
-
-{% endhighlight %}
-</div>
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-
-import pyspark.sql.functions as func
-
-# In 1.3.x, in order for the grouping column "department" to show up,
-# it must be included explicitly as part of the agg function call.
-df.groupBy("department").agg(df["department"], func.max("age"), func.sum("expense"))
-
-# In 1.4+, grouping column "department" is included automatically.
-df.groupBy("department").agg(func.max("age"), func.sum("expense"))
-
-# Revert to 1.3.x behavior (not retaining grouping column) by:
-sqlContext.setConf("spark.sql.retainGroupColumns", "false")
-
-{% endhighlight %}
-</div>
-
-</div>
-
-
-#### Behavior change on DataFrame.withColumn
-
-Prior to 1.4, DataFrame.withColumn() supports adding a column only. The column will always be added
-as a new column with its specified name in the result DataFrame even if there may be any existing
-columns of the same name. Since 1.4, DataFrame.withColumn() supports adding a column of a different
-name from names of all existing columns or replacing existing columns of the same name.
-
-Note that this change is only for Scala API, not for PySpark and SparkR.
-
-
-## Upgrading from Spark SQL 1.0-1.2 to 1.3
-
-In Spark 1.3 we removed the "Alpha" label from Spark SQL and as part of this did a cleanup of the
-available APIs. From Spark 1.3 onwards, Spark SQL will provide binary compatibility with other
-releases in the 1.X series. This compatibility guarantee excludes APIs that are explicitly marked
-as unstable (i.e., DeveloperAPI or Experimental).
-
-#### Rename of SchemaRDD to DataFrame
-
-The largest change that users will notice when upgrading to Spark SQL 1.3 is that `SchemaRDD` has
-been renamed to `DataFrame`. This is primarily because DataFrames no longer inherit from RDD
-directly, but instead provide most of the functionality that RDDs provide though their own
-implementation. DataFrames can still be converted to RDDs by calling the `.rdd` method.
-
-In Scala, there is a type alias from `SchemaRDD` to `DataFrame` to provide source compatibility for
-some use cases. It is still recommended that users update their code to use `DataFrame` instead.
-Java and Python users will need to update their code.
-
-#### Unification of the Java and Scala APIs
-
-Prior to Spark 1.3 there were separate Java compatible classes (`JavaSQLContext` and `JavaSchemaRDD`)
-that mirrored the Scala API. In Spark 1.3 the Java API and Scala API have been unified. Users
-of either language should use `SQLContext` and `DataFrame`. In general these classes try to
-use types that are usable from both languages (i.e. `Array` instead of language-specific collections).
-In some cases where no common type exists (e.g., for passing in closures or Maps) function overloading
-is used instead.
-
-Additionally, the Java specific types API has been removed. Users of both Scala and Java should
-use the classes present in `org.apache.spark.sql.types` to describe schema programmatically.
-
-
-#### Isolation of Implicit Conversions and Removal of dsl Package (Scala-only)
-
-Many of the code examples prior to Spark 1.3 started with `import sqlContext._`, which brought
-all of the functions from sqlContext into scope. In Spark 1.3 we have isolated the implicit
-conversions for converting `RDD`s into `DataFrame`s into an object inside of the `SQLContext`.
-Users should now write `import sqlContext.implicits._`.
-
-Additionally, the implicit conversions now only augment RDDs that are composed of `Product`s (i.e.,
-case classes or tuples) with a method `toDF`, instead of applying automatically.
-
-When using function inside of the DSL (now replaced with the `DataFrame` API) users used to import
-`org.apache.spark.sql.catalyst.dsl`. Instead the public dataframe functions API should be used:
-`import org.apache.spark.sql.functions._`.
-
-#### Removal of the type aliases in org.apache.spark.sql for DataType (Scala-only)
-
-Spark 1.3 removes the type aliases that were present in the base sql package for `DataType`. Users
-should instead import the classes in `org.apache.spark.sql.types`
-
-#### UDF Registration Moved to `sqlContext.udf` (Java & Scala)
-
-Functions that are used to register UDFs, either for use in the DataFrame DSL or SQL, have been
-moved into the udf object in `SQLContext`.
-
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
-{% highlight scala %}
-
-sqlContext.udf.register("strLen", (s: String) => s.length())
-
-{% endhighlight %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% highlight java %}
-
-sqlContext.udf().register("strLen", (String s) -> s.length(), DataTypes.IntegerType);
-
-{% endhighlight %}
-</div>
-
-</div>
-
-Python UDF registration is unchanged.
-
-#### Python DataTypes No Longer Singletons
-
-When using DataTypes in Python you will need to construct them (i.e. `StringType()`) instead of
-referencing a singleton.
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 4c23147106b65..11afae14d0205 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Migration Guide
-displayTitle: Migration Guide
+title: "Migration Guide: SQL, Datasets and DataFrame"
+displayTitle: "Migration Guide: SQL, Datasets and DataFrame"
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,20 +19,1060 @@ license: |
   limitations under the License.
 ---
 
-* [Spark SQL Upgrading Guide](sql-migration-guide-upgrade.html)
-  * [Upgrading From Spark SQL 2.4 to 3.0](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-24-to-30)
-  * [Upgrading From Spark SQL 2.3 to 2.4](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-23-to-24)
-  * [Upgrading From Spark SQL 2.3.0 to 2.3.1 and above](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-230-to-231-and-above)
-  * [Upgrading From Spark SQL 2.2 to 2.3](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-22-to-23)
-  * [Upgrading From Spark SQL 2.1 to 2.2](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-21-to-22)
-  * [Upgrading From Spark SQL 2.0 to 2.1](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-20-to-21)
-  * [Upgrading From Spark SQL 1.6 to 2.0](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-16-to-20)
-  * [Upgrading From Spark SQL 1.5 to 1.6](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-15-to-16)
-  * [Upgrading From Spark SQL 1.4 to 1.5](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-14-to-15)
-  * [Upgrading from Spark SQL 1.3 to 1.4](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-13-to-14)
-  * [Upgrading from Spark SQL 1.0-1.2 to 1.3](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-10-12-to-13)
-* [Compatibility with Apache Hive](sql-migration-guide-hive-compatibility.html)
-  * [Deploying in Existing Hive Warehouses](sql-migration-guide-hive-compatibility.html#deploying-in-existing-hive-warehouses)
-  * [Supported Hive Features](sql-migration-guide-hive-compatibility.html#supported-hive-features)
-  * [Unsupported Hive Functionality](sql-migration-guide-hive-compatibility.html#unsupported-hive-functionality)
-  * [Incompatible Hive UDF](sql-migration-guide-hive-compatibility.html#incompatible-hive-udf)
+* Table of contents
+{:toc}
+
+## Upgrading from Spark SQL 2.4 to 3.0
+  - Since Spark 3.0, when inserting a value into a table column with a different data type, the type coercion is performed as per ANSI SQL standard. Certain unreasonable type conversions such as converting `string` to `int` and `double` to `boolean` are disallowed. A runtime exception will be thrown if the value is out-of-range for the data type of the column. In Spark version 2.4 and earlier, type conversions during table insertion are allowed as long as they are valid `Cast`. When inserting an out-of-range value to a integral field, the low-order bits of the value is inserted(the same as Java/Scala numeric type casting). For example, if 257 is inserted to a field of byte type, the result is 1. The behavior is controlled by the option `spark.sql.storeAssignmentPolicy`, with a default value as "ANSI". Setting the option as "Legacy" restores the previous behavior.
+
+  - In Spark 3.0, the deprecated methods `SQLContext.createExternalTable` and `SparkSession.createExternalTable` have been removed in favor of its replacement, `createTable`.
+
+  - In Spark 3.0, the deprecated `HiveContext` class has been removed. Use `SparkSession.builder.enableHiveSupport()` instead.
+
+  - Since Spark 3.0, configuration `spark.sql.crossJoin.enabled` become internal configuration, and is true by default, so by default spark won't raise exception on sql with implicit cross join.
+
+  - Since Spark 3.0, we reversed argument order of the trim function from `TRIM(trimStr, str)` to `TRIM(str, trimStr)` to be compatible with other databases.
+
+  - In Spark version 2.4 and earlier, SQL queries such as `FROM <table>` or `FROM <table> UNION ALL FROM <table>` are supported by accident. In hive-style `FROM <table> SELECT <expr>`, the `SELECT` clause is not negligible. Neither Hive nor Presto support this syntax. Therefore we will treat these queries as invalid since Spark 3.0.
+
+  - Since Spark 3.0, the Dataset and DataFrame API `unionAll` is not deprecated any more. It is an alias for `union`.
+
+  - In Spark version 2.4 and earlier, the parser of JSON data source treats empty strings as null for some data types such as `IntegerType`. For `FloatType`, `DoubleType`, `DateType` and `TimestampType`, it fails on empty strings and throws exceptions. Since Spark 3.0, we disallow empty strings and will throw exceptions for data types except for `StringType` and `BinaryType`. The previous behaviour of allowing empty string can be restored by setting `spark.sql.legacy.json.allowEmptyString.enabled` to `true`.
+
+  - Since Spark 3.0, the `from_json` functions supports two modes - `PERMISSIVE` and `FAILFAST`. The modes can be set via the `mode` option. The default mode became `PERMISSIVE`. In previous versions, behavior of `from_json` did not conform to either `PERMISSIVE` nor `FAILFAST`, especially in processing of malformed JSON records. For example, the JSON string `{"a" 1}` with the schema `a INT` is converted to `null` by previous versions but Spark 3.0 converts it to `Row(null)`.
+
+  - The `ADD JAR` command previously returned a result set with the single value 0. It now returns an empty result set.
+
+  - In Spark version 2.4 and earlier, users can create map values with map type key via built-in function like `CreateMap`, `MapFromArrays`, etc. Since Spark 3.0, it's not allowed to create map values with map type key with these built-in functions. Users can still read map values with map type key from data source or Java/Scala collections, though they are not very useful.
+
+  - In Spark version 2.4 and earlier, `Dataset.groupByKey` results to a grouped dataset with key attribute wrongly named as "value", if the key is non-struct type, e.g. int, string, array, etc. This is counterintuitive and makes the schema of aggregation queries weird. For example, the schema of `ds.groupByKey(...).count()` is `(value, count)`. Since Spark 3.0, we name the grouping attribute to "key". The old behaviour is preserved under a newly added configuration `spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue` with a default value of `false`.
+
+  - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal to 0.0, but -0.0 and 0.0 are considered as different values when used in aggregate grouping keys, window partition keys and join keys. Since Spark 3.0, this bug is fixed. For example, `Seq(-0.0, 0.0).toDF("d").groupBy("d").count()` returns `[(0.0, 2)]` in Spark 3.0, and `[(0.0, 1), (-0.0, 1)]` in Spark 2.4 and earlier.
+
+  - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be undefined.
+
+  - In Spark version 2.4 and earlier, partition column value is converted as null if it can't be casted to corresponding user provided schema. Since 3.0, partition column value is validated with user provided schema. An exception is thrown if the validation fails. You can disable such validation by setting `spark.sql.sources.validatePartitionColumns` to `false`.
+
+  - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.setCommandRejectsSparkCoreConfs` to `false`.
+
+  - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
+
+  - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
+
+  - Refreshing a cached table would trigger a table uncache operation and then a table cache (lazily) operation. In Spark version 2.4 and earlier, the cache name and storage level are not preserved before the uncache operation. Therefore, the cache name and storage level could be changed unexpectedly. Since Spark 3.0, cache name and storage level will be first preserved for cache recreation. It helps to maintain a consistent cache behavior upon table refreshing.
+
+  - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
+
+  - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
+
+  - Since Spark 3.0, Proleptic Gregorian calendar is used in parsing, formatting, and converting dates and timestamps as well as in extracting sub-components like years, days and etc. Spark 3.0 uses Java 8 API classes from the java.time packages that based on ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html). In Spark version 2.4 and earlier, those operations are performed by using the hybrid calendar (Julian + Gregorian, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html). The changes impact on the results for dates before October 15, 1582 (Gregorian) and affect on the following Spark 3.0 API:
+
+    - Parsing/formatting of timestamp/date strings. This effects on CSV/JSON datasources and on the `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions when patterns specified by users is used for parsing and formatting. Since Spark 3.0, the conversions are based on `java.time.format.DateTimeFormatter`, see https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html. New implementation performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`. In Spark version 2.4 and earlier, `java.text.SimpleDateFormat` is used for timestamp/date string conversions, and the supported patterns are described in https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html. The old behavior can be restored by setting `spark.sql.legacy.timeParser.enabled` to `true`.
+
+    - The `weekofyear`, `weekday`, `dayofweek`, `date_trunc`, `from_utc_timestamp`, `to_utc_timestamp`, and `unix_timestamp` functions use java.time API for calculation week number of year, day number of week as well for conversion from/to TimestampType values in UTC time zone.
+
+    - the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
+
+    - Formatting of `TIMESTAMP` and `DATE` literals.
+
+    - Creating of typed `TIMESTAMP` and `DATE` literals from strings. Since Spark 3.0, string conversion to typed `TIMESTAMP`/`DATE` literals is performed via casting to `TIMESTAMP`/`DATE` values. For example, `TIMESTAMP '2019-12-23 12:59:30'` is semantically equal to `CAST('2019-12-23 12:59:30' AS TIMESTAMP)`. When the input string does not contain information about time zone, the time zone from the SQL config `spark.sql.session.timeZone` is used in that case. In Spark version 2.4 and earlier, the conversion is based on JVM system time zone. The different sources of the default time zone may change the behavior of typed `TIMESTAMP` and `DATE` literals.
+
+  - In Spark version 2.4 and earlier, invalid time zone ids are silently ignored and replaced by GMT time zone, for example, in the from_utc_timestamp function. Since Spark 3.0, such time zone ids are rejected, and Spark throws `java.time.DateTimeException`.
+
+  - In Spark version 2.4 and earlier, the `current_timestamp` function returns a timestamp with millisecond resolution only. Since Spark 3.0, the function can return the result with microsecond resolution if the underlying clock available on the system offers such resolution.
+
+  - In Spark version 2.4 and earlier, when reading a Hive Serde table with Spark native data sources(parquet/orc), Spark will infer the actual file schema and update the table schema in metastore. Since Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, please set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.
+
+  - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion uses the default time zone of the Java virtual machine.
+
+  - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`. The old behavior can be restored by setting `spark.sql.legacy.sessionInitWithConfigDefaults` to `true`.
+
+  - Since Spark 3.0, parquet logical type `TIMESTAMP_MICROS` is used by default while saving `TIMESTAMP` columns. In Spark version 2.4 and earlier, `TIMESTAMP` columns are saved as `INT96` in parquet files. To set `INT96` to `spark.sql.parquet.outputTimestampType` restores the previous behavior.
+
+  - Since Spark 3.0, if `hive.default.fileformat` is not found in `Spark SQL configuration` then it will fallback to hive-site.xml present in the `Hadoop configuration` of `SparkContext`.
+
+  - Since Spark 3.0, Spark will cast `String` to `Date/TimeStamp` in binary comparisons with dates/timestamps. The previous behaviour of casting `Date/Timestamp` to `String` can be restored by setting `spark.sql.legacy.typeCoercion.datetimeToString.enabled` to `true`.
+
+  - Since Spark 3.0, when Avro files are written with user provided schema, the fields will be matched by field names between catalyst schema and avro schema instead of positions.
+
+  - Since Spark 3.0, when Avro files are written with user provided non-nullable schema, even the catalyst schema is nullable, Spark is still able to write the files. However, Spark will throw runtime NPE if any of the records contains null.
+
+  - Since Spark 3.0, a higher-order function `exists` follows the three-valued boolean logic, i.e., if the `predicate` returns any `null`s and no `true` is obtained, then `exists` will return `null` instead of `false`. For example, `exists(array(1, null, 3), x -> x % 2 == 0)` will be `null`. The previous behaviour can be restored by setting `spark.sql.legacy.arrayExistsFollowsThreeValuedLogic` to `false`.
+
+  - Since Spark 3.0, if files or subdirectories disappear during recursive directory listing (i.e. they appear in an intermediate listing but then cannot be read or listed during later phases of the recursive directory listing, due to either concurrent file deletions or object store consistency issues) then the listing will fail with an exception unless `spark.sql.files.ignoreMissingFiles` is `true` (default `false`). In previous versions, these missing files or subdirectories would be ignored. Note that this change of behavior only applies during initial table file listing (or during `REFRESH TABLE`), not during query execution: the net change is that `spark.sql.files.ignoreMissingFiles` is now obeyed during table file listing / query planning, not only at query execution time.
+
+  - Since Spark 3.0, Spark throws an AnalysisException if name conflict is detected in the nested WITH clause by default. It forces the users to choose the specific substitution order they wanted, which is controlled by `spark.sql.legacy.ctePrecedence.enabled`. If set to false (which is recommended), inner CTE definitions take precedence over outer definitions. For example, set the config to `false`, `WITH t AS (SELECT 1), t2 AS (WITH t AS (SELECT 2) SELECT * FROM t) SELECT * FROM t2` returns `2`, while setting it to `true`, the result is `1` which is the behavior in version 2.4 and earlier.
+
+  - Since Spark 3.0, the `add_months` function does not adjust the resulting date to a last day of month if the original date is a last day of months. For example, `select add_months(DATE'2019-02-28', 1)` results `2019-03-28`. In Spark version 2.4 and earlier, the resulting date is adjusted when the original date is a last day of months. For example, adding a month to `2019-02-28` results in `2019-03-31`.
+
+  - Since Spark 3.0, 0-argument Java UDF is executed in the executor side identically with other UDFs. In Spark version 2.4 and earlier, 0-argument Java UDF alone was executed in the driver side, and the result was propagated to executors, which might be more performant in some cases but caused inconsistency with a correctness issue in some cases.
+
+  - The result of `java.lang.Math`'s `log`, `log1p`, `exp`, `expm1`, and `pow` may vary across platforms. In Spark 3.0, the result of the equivalent SQL functions (including related SQL functions like `LOG10`) return values consistent with `java.lang.StrictMath`. In virtually all cases this makes no difference in the return value, and the difference is very small, but may not exactly match `java.lang.Math` on x86 platforms in cases like, for example, `log(3.0)`, whose value varies between `Math.log()` and `StrictMath.log()`.
+
+  - Since Spark 3.0, Dataset query fails if it contains ambiguous column reference that is caused by self join. A typical example: `val df1 = ...; val df2 = df1.filter(...);`, then `df1.join(df2, df1("a") > df2("a"))` returns an empty result which is quite confusing. This is because Spark cannot resolve Dataset column references that point to tables being self joined, and `df1("a")` is exactly the same as `df2("a")` in Spark. To restore the behavior before Spark 3.0, you can set `spark.sql.analyzer.failAmbiguousSelfJoin.enabled` to `false`.
+
+  - Since Spark 3.0, `Cast` function processes string literals such as 'Infinity', '+Infinity', '-Infinity', 'NaN', 'Inf', '+Inf', '-Inf' in case insensitive manner when casting the literals to `Double` or `Float` type to ensure greater compatibility with other database systems. This behaviour change is illustrated in the table below:
+    <table class="table">
+        <tr>
+          <th>
+            <b>Operation</b>
+          </th>
+          <th>
+            <b>Result prior to Spark 3.0</b>
+          </th>
+          <th>
+            <b>Result starting Spark 3.0</b>
+          </th>
+        </tr>
+        <tr>
+          <td>
+            CAST('infinity' AS DOUBLE)<br>
+            CAST('+infinity' AS DOUBLE)<br>
+            CAST('inf' AS DOUBLE)<br>
+            CAST('+inf' AS DOUBLE)<br>
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Double.PositiveInfinity
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('-infinity' AS DOUBLE)<br>
+            CAST('-inf' AS DOUBLE)<br>
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Double.NegativeInfinity
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('infinity' AS FLOAT)<br>
+            CAST('+infinity' AS FLOAT)<br>
+            CAST('inf' AS FLOAT)<br>
+            CAST('+inf' AS FLOAT)<br>
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Float.PositiveInfinity
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('-infinity' AS FLOAT)<br>
+            CAST('-inf' AS FLOAT)<br>
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Float.NegativeInfinity
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('nan' AS DOUBLE)
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Double.NaN
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('nan' AS FLOAT)
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Float.NaN
+          </td>
+        </tr>
+    </table>
+
+  - Since Spark 3.0, special values are supported in conversion from strings to dates and timestamps. Those values are simply notational shorthands that will be converted to ordinary date or timestamp values when read. The following string values are supported for dates:
+    - `epoch [zoneId]` - 1970-01-01
+    - `today [zoneId]` - the current date in the time zone specified by `spark.sql.session.timeZone`
+    - `yesterday [zoneId]` - the current date - 1
+    - `tomorrow [zoneId]` - the current date + 1
+    - `now` - the date of running the current query. It has the same notion as today
+  For example `SELECT date 'tomorrow' - date 'yesterday';` should output `2`. Here are special timestamp values:
+    - `epoch [zoneId]` - 1970-01-01 00:00:00+00 (Unix system time zero)
+    - `today [zoneId]` - midnight today
+    - `yesterday [zoneId]` - midnight yesterday
+    - `tomorrow [zoneId]` - midnight tomorrow
+    - `now` - current query start time
+  For example `SELECT timestamp 'tomorrow';`.
+
+  - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`.
+  
+  - Since Spark 3.0, when the `array`/`map` function is called without any parameters, it returns an empty collection with `NullType` as element type. In Spark version 2.4 and earlier, it returns an empty collection with `StringType` as element type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.createEmptyCollectionUsingStringType` to `true`.
+
+  - Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception.
+
+  - Since Spark 3.0, when casting interval values to string type, there is no "interval" prefix, e.g. `1 days 2 hours`. In Spark version 2.4 and earlier, the string contains the "interval" prefix like `interval 1 days 2 hours`.
+
+  - Since Spark 3.0, when casting string value to integral types(tinyint, smallint, int and bigint), datetime types(date, timestamp and interval) and boolean type, the leading and trailing whitespaces (<= ASCII 32) will be trimmed before converted to these type values, e.g. `cast(' 1\t' as int)` results `1`, `cast(' 1\t' as boolean)` results `true`, `cast('2019-10-10\t as date)` results the date value `2019-10-10`. In Spark version 2.4 and earlier, while casting string to integrals and booleans, it will not trim the whitespaces from both ends, the foregoing results will be `null`, while to datetimes, only the trailing spaces (= ASCII 32) will be removed.
+
+  - Since Spark 3.0, numbers written in scientific notation(e.g. `1E2`) would be parsed as Double. In Spark version 2.4 and earlier, they're parsed as Decimal. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.exponentLiteralAsDecimal.enabled` to `true`.
+
+  - Since Spark 3.0, we pad decimal numbers with trailing zeros to the scale of the column for `spark-sql` interface, for example:
+    <table class="table">
+        <tr>
+          <th>
+            <b>Query</b>
+          </th>
+          <th>
+            <b>Spark 2.4 or Prior</b>
+          </th>
+          <th>
+            <b>Spark 3.0</b>
+          </th>
+        </tr>
+        <tr>
+          <td>
+            <code>SELECT CAST(1 AS decimal(38, 18));</code>
+          </td>
+          <td>
+            <code>1</code>
+          </td>
+          <td>
+            <code>1.000000000000000000</code>
+          </td>
+        </tr>
+    </table>
+    
+  - Since Spark 3.0, CREATE TABLE without a specific provider will use the value of `spark.sql.sources.default` as its provider. In Spark version 2.4 and earlier, it was hive. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.createHiveTableByDefault.enabled` to `true`.
+
+  - Since Spark 3.0, the unary arithmetic operator plus(`+`) only accepts string, numeric and interval type values as inputs. Besides, `+` with a integral string representation will be coerced to double value, e.g. `+'1'` results `1.0`. In Spark version 2.4 and earlier, this operator is ignored. There is no type checking for it, thus, all type values with a `+` prefix are valid, e.g. `+ array(1, 2)` is valid and results `[1, 2]`. Besides, there is no type coercion for it at all, e.g. in Spark 2.4, the result of `+'1'` is string `1`.
+
+  - Since Spark 3.0, day-time interval strings are converted to intervals with respect to the `from` and `to` bounds. If an input string does not match to the pattern defined by specified bounds, the `ParseException` exception is thrown. For example, `interval '2 10:20' hour to minute` raises the exception because the expected format is `[+|-]h[h]:[m]m`. In Spark version 2.4, the `from` bound was not taken into account, and the `to` bound was used to truncate the resulted interval. For instance, the day-time interval string from the showed example is converted to `interval 10 hours 20 minutes`. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.fromDayTimeString.enabled` to `true`.
+  
+  - Since Spark 3.0, negative scale of decimal is not allowed by default, e.g. data type of literal like `1E10BD` is `DecimalType(11, 0)`. In Spark version 2.4 and earlier, it was `DecimalType(2, -9)`. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.allowNegativeScaleOfDecimal.enabled` to `true`.
+
+  - Since Spark 3.0, the `date_add` and `date_sub` functions only accepts int, smallint, tinyint as the 2nd argument, fractional and string types are not valid anymore, e.g. `date_add(cast('1964-05-23' as date), '12.34')` will cause `AnalysisException`. In Spark version 2.4 and earlier, if the 2nd argument is fractional or string value, it will be coerced to int value, and the result will be a date value of `1964-06-04`.
+
+  - Since Spark 3.0, the function `percentile_approx` and its alias `approx_percentile` only accept integral value with range in `[1, 2147483647]` as its 3rd argument `accuracy`, fractional and string types are disallowed, e.g. `percentile_approx(10.0, 0.2, 1.8D)` will cause `AnalysisException`. In Spark version 2.4 and earlier, if `accuracy` is fractional or string value, it will be coerced to an int value, `percentile_approx(10.0, 0.2, 1.8D)` is operated as `percentile_approx(10.0, 0.2, 1)` which results in `10.0`.
+
+  - Since Spark 3.0, the properties listing below become reserved, commands will fail if we specify reserved properties in places like `CREATE DATABASE ... WITH DBPROPERTIES` and `ALTER TABLE ... SET TBLPROPERTIES`. We need their specific clauses to specify them, e.g. `CREATE DATABASE test COMMENT 'any comment' LOCATION 'some path'`. We can set `spark.sql.legacy.property.nonReserved` to `true` to ignore the `ParseException`, in this case, these properties will be silently removed, e.g `SET DBPROTERTIES('location'='/tmp')` will affect nothing. In Spark version 2.4 and earlier, these properties are neither reserved nor have side effects, e.g. `SET DBPROTERTIES('location'='/tmp')` will not change the location of the database but only create a headless property just like `'a'='b'`.
+    <table class="table">
+        <tr>
+          <th>
+            <b>Property(case sensitive)</b>
+          </th>
+          <th>
+            <b>Database Reserved</b>
+          </th>
+          <th>
+            <b>Table Reserved</b>
+          </th>
+          <th>
+            <b>Remarks</b>
+          </th>
+        </tr>
+        <tr>
+          <td>
+            provider
+          </td>
+          <td>
+            no
+          </td>
+          <td>
+            yes
+          </td>
+          <td>
+            For tables, please use the USING clause to specify it. Once set, it can't be changed.
+          </td>
+        </tr>
+        <tr>
+          <td>
+            location
+          </td>
+          <td>
+            yes
+          </td>
+          <td>
+            yes
+          </td>
+          <td>
+            For databases and tables, please use the LOCATION clause to specify it.
+          </td>
+        </tr>
+        <tr>
+          <td>
+            owner
+          </td>
+          <td>
+            yes
+          </td>
+          <td>
+            yes
+          </td>
+          <td>
+            For databases and tables, it is determined by the user who runs spark and create the table.
+          </td>
+        </tr>
+    </table>
+
+  - Since Spark 3.0, `ADD FILE` can be used to add file directories as well. Earlier only single files can be added using this command. To restore the behaviour of earlier versions, set `spark.sql.legacy.addDirectory.recursive.enabled` to false.
+
+  - Since Spark 3.0, `SHOW TBLPROPERTIES` will cause `AnalysisException` if the table does not exist. In Spark version 2.4 and earlier, this scenario caused `NoSuchTableException`. Also, `SHOW TBLPROPERTIES` on a temporary view will cause `AnalysisException`. In Spark version 2.4 and earlier, it returned an empty result.
+
+  - Since Spark 3.0, `SHOW CREATE TABLE` will always return Spark DDL, even when the given table is a Hive serde table. For generating Hive DDL, please use `SHOW CREATE TABLE AS SERDE` command instead.
+
+  - Since Spark 3.0, we upgraded the built-in Hive from 1.2 to 2.3. This may need to set `spark.sql.hive.metastore.version` and `spark.sql.hive.metastore.jars` according to the version of the Hive metastore.
+  For example: set `spark.sql.hive.metastore.version` to `1.2.1` and `spark.sql.hive.metastore.jars` to `maven` if your Hive metastore version is 1.2.1.
+
+## Upgrading from Spark SQL 2.4.4 to 2.4.5
+
+  - Since Spark 2.4.5, `TRUNCATE TABLE` command tries to set back original permission and ACLs during re-creating the table/partition paths. To restore the behaviour of earlier versions, set `spark.sql.truncateTable.ignorePermissionAcl.enabled` to `true`.
+
+  - Since Spark 2.4.5, `spark.sql.legacy.mssqlserver.numericMapping.enabled` configuration is added in order to support the legacy MsSQLServer dialect mapping behavior using IntegerType and DoubleType for SMALLINT and REAL JDBC types, respectively. To restore the behaviour of 2.4.3 and earlier versions, set `spark.sql.legacy.mssqlserver.numericMapping.enabled` to `true`.
+
+## Upgrading from Spark SQL 2.4.3 to 2.4.4
+
+  - Since Spark 2.4.4, according to [MsSqlServer Guide](https://docs.microsoft.com/en-us/sql/connect/jdbc/using-basic-data-types?view=sql-server-2017), MsSQLServer JDBC Dialect uses ShortType and FloatType for SMALLINT and REAL, respectively. Previously, IntegerType and DoubleType is used.
+
+## Upgrading from Spark SQL 2.4 to 2.4.1
+
+  - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
+    inconsistently interpreted as both seconds and milliseconds in Spark 2.4.0 in different parts of the code.
+    Unitless values are now consistently interpreted as milliseconds. Applications that set values like "30"
+    need to specify a value with units like "30s" now, to avoid being interpreted as milliseconds; otherwise,
+    the extremely short interval that results will likely cause applications to fail.
+
+  - When turning a Dataset to another Dataset, Spark will up cast the fields in the original Dataset to the type of corresponding fields in the target DataSet. In version 2.4 and earlier, this up cast is not very strict, e.g. `Seq("str").toDS.as[Int]` fails, but `Seq("str").toDS.as[Boolean]` works and throw NPE during execution. In Spark 3.0, the up cast is stricter and turning String into something else is not allowed, i.e. `Seq("str").toDS.as[Boolean]` will fail during analysis.
+
+## Upgrading from Spark SQL 2.3 to 2.4
+
+  - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
+    <table class="table">
+        <tr>
+          <th>
+            <b>Query</b>
+          </th>
+          <th>
+            <b>Spark 2.3 or Prior</b>
+          </th>
+          <th>
+            <b>Spark 2.4</b>
+          </th>
+          <th>
+            <b>Remarks</b>
+          </th>
+        </tr>
+        <tr>
+          <td>
+            <code>SELECT array_contains(array(1), 1.34D);</code>
+          </td>
+          <td>
+            <code>true</code>
+          </td>
+          <td>
+            <code>false</code>
+          </td>
+          <td>
+            In Spark 2.4, left and right parameters are promoted to array type of double type and double type respectively.
+          </td>
+        </tr>
+        <tr>
+          <td>
+            <code>SELECT array_contains(array(1), '1');</code>
+          </td>
+          <td>
+            <code>true</code>
+          </td>
+          <td>
+            <code>AnalysisException</code> is thrown.
+          </td>
+          <td>
+            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
+          </td>
+        </tr>
+        <tr>
+          <td>
+            <code>SELECT array_contains(array(1), 'anystring');</code>
+          </td>
+          <td>
+            <code>null</code>
+          </td>
+          <td>
+            <code>AnalysisException</code> is thrown.
+          </td>
+          <td>
+            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
+          </td>
+        </tr>
+    </table>
+
+  - Since Spark 2.4, when there is a struct field in front of the IN operator before a subquery, the inner query must contain a struct field as well. In previous versions, instead, the fields of the struct were compared to the output of the inner query. Eg. if `a` is a `struct(a string, b int)`, in Spark 2.4 `a in (select (1 as a, 'a' as b) from range(1))` is a valid query, while `a in (select 1, 'a' from range(1))` is not. In previous version it was the opposite.
+
+  - In versions 2.2.1+ and 2.3, if `spark.sql.caseSensitive` is set to true, then the `CURRENT_DATE` and `CURRENT_TIMESTAMP` functions incorrectly became case-sensitive and would resolve to columns (unless typed in lower case). In Spark 2.4 this has been fixed and the functions are no longer case-sensitive.
+
+  - Since Spark 2.4, Spark will evaluate the set operations referenced in a query by following a precedence rule as per the SQL standard. If the order is not specified by parentheses, set operations are performed from left to right with the exception that all INTERSECT operations are performed before any UNION, EXCEPT or MINUS operations. The old behaviour of giving equal precedence to all the set operations are preserved under a newly added configuration `spark.sql.legacy.setopsPrecedence.enabled` with a default value of `false`. When this property is set to `true`, spark will evaluate the set operators from left to right as they appear in the query given no explicit ordering is enforced by usage of parenthesis.
+
+  - Since Spark 2.4, Spark will display table description column Last Access value as UNKNOWN when the value was Jan 01 1970.
+
+  - Since Spark 2.4, Spark maximizes the usage of a vectorized ORC reader for ORC files by default. To do that, `spark.sql.orc.impl` and `spark.sql.orc.filterPushdown` change their default values to `native` and `true` respectively. ORC files created by native ORC writer cannot be read by some old Apache Hive releases. Use `spark.sql.orc.impl=hive` to create the files shared with Hive 2.1.1 and older.
+
+  - Since Spark 2.4, writing an empty dataframe to a directory launches at least one write task, even if physically the dataframe has no partition. This introduces a small behavior change that for self-describing file formats like Parquet and Orc, Spark creates a metadata-only file in the target directory when writing a 0-partition dataframe, so that schema inference can still work if users read that directory later. The new behavior is more reasonable and more consistent regarding writing empty dataframe.
+
+  - Since Spark 2.4, expression IDs in UDF arguments do not appear in column names. For example, a column name in Spark 2.4 is not `UDF:f(col0 AS colA#28)` but ``UDF:f(col0 AS `colA`)``.
+
+  - Since Spark 2.4, writing a dataframe with an empty or nested empty schema using any file formats (parquet, orc, json, text, csv etc.) is not allowed. An exception is thrown when attempting to write dataframes with empty schema.
+
+  - Since Spark 2.4, Spark compares a DATE type with a TIMESTAMP type after promotes both sides to TIMESTAMP. To set `false` to `spark.sql.legacy.compareDateTimestampInTimestamp` restores the previous behavior. This option will be removed in Spark 3.0.
+
+  - Since Spark 2.4, creating a managed table with nonempty location is not allowed. An exception is thrown when attempting to create a managed table with nonempty location. To set `true` to `spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation` restores the previous behavior. This option will be removed in Spark 3.0.
+
+  - Since Spark 2.4, renaming a managed table to existing location is not allowed. An exception is thrown when attempting to rename a managed table to existing location.
+
+  - Since Spark 2.4, the type coercion rules can automatically promote the argument types of the variadic SQL functions (e.g., IN/COALESCE) to the widest common type, no matter how the input arguments order. In prior Spark versions, the promotion could fail in some specific orders (e.g., TimestampType, IntegerType and StringType) and throw an exception.
+
+  - Since Spark 2.4, Spark has enabled non-cascading SQL cache invalidation in addition to the traditional cache invalidation mechanism. The non-cascading cache invalidation mechanism allows users to remove a cache without impacting its dependent caches. This new cache invalidation mechanism is used in scenarios where the data of the cache to be removed is still valid, e.g., calling unpersist() on a Dataset, or dropping a temporary view. This allows users to free up memory and keep the desired caches valid at the same time.
+
+  - In version 2.3 and earlier, Spark converts Parquet Hive tables by default but ignores table properties like `TBLPROPERTIES (parquet.compression 'NONE')`. This happens for ORC Hive table properties like `TBLPROPERTIES (orc.compress 'NONE')` in case of `spark.sql.hive.convertMetastoreOrc=true`, too. Since Spark 2.4, Spark respects Parquet/ORC specific table properties while converting Parquet/ORC Hive tables. As an example, `CREATE TABLE t(id int) STORED AS PARQUET TBLPROPERTIES (parquet.compression 'NONE')` would generate Snappy parquet files during insertion in Spark 2.3, and in Spark 2.4, the result would be uncompressed parquet files.
+
+  - Since Spark 2.0, Spark converts Parquet Hive tables by default for better performance. Since Spark 2.4, Spark converts ORC Hive tables by default, too. It means Spark uses its own ORC support by default instead of Hive SerDe. As an example, `CREATE TABLE t(id int) STORED AS ORC` would be handled with Hive SerDe in Spark 2.3, and in Spark 2.4, it would be converted into Spark's ORC data source table and ORC vectorization would be applied. To set `false` to `spark.sql.hive.convertMetastoreOrc` restores the previous behavior.
+
+  - In version 2.3 and earlier, CSV rows are considered as malformed if at least one column value in the row is malformed. CSV parser dropped such rows in the DROPMALFORMED mode or outputs an error in the FAILFAST mode. Since Spark 2.4, CSV row is considered as malformed only when it contains malformed column values requested from CSV datasource, other values can be ignored. As an example, CSV file contains the "id,name" header and one row "1234". In Spark 2.4, selection of the id column consists of a row with one column value 1234 but in Spark 2.3 and earlier it is empty in the DROPMALFORMED mode. To restore the previous behavior, set `spark.sql.csv.parser.columnPruning.enabled` to `false`.
+
+  - Since Spark 2.4, File listing for compute statistics is done in parallel by default. This can be disabled by setting `spark.sql.statistics.parallelFileListingInStatsComputation.enabled` to `False`.
+
+  - Since Spark 2.4, Metadata files (e.g. Parquet summary files) and temporary files are not counted as data files when calculating table size during Statistics computation.
+
+  - Since Spark 2.4, empty strings are saved as quoted empty strings `""`. In version 2.3 and earlier, empty strings are equal to `null` values and do not reflect to any characters in saved CSV files. For example, the row of `"a", null, "", 1` was written as `a,,,1`. Since Spark 2.4, the same row is saved as `a,,"",1`. To restore the previous behavior, set the CSV option `emptyValue` to empty (not quoted) string.
+
+  - Since Spark 2.4, The LOAD DATA command supports wildcard `?` and `*`, which match any one character, and zero or more characters, respectively. Example: `LOAD DATA INPATH '/tmp/folder*/'` or `LOAD DATA INPATH '/tmp/part-?'`. Special Characters like `space` also now work in paths. Example: `LOAD DATA INPATH '/tmp/folder name/'`.
+
+  - In Spark version 2.3 and earlier, HAVING without GROUP BY is treated as WHERE. This means, `SELECT 1 FROM range(10) HAVING true` is executed as `SELECT 1 FROM range(10) WHERE true`  and returns 10 rows. This violates SQL standard, and has been fixed in Spark 2.4. Since Spark 2.4, HAVING without GROUP BY is treated as a global aggregate, which means `SELECT 1 FROM range(10) HAVING true` will return only one row. To restore the previous behavior, set `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` to `true`.
+
+  - In version 2.3 and earlier, when reading from a Parquet data source table, Spark always returns null for any column whose column names in Hive metastore schema and Parquet schema are in different letter cases, no matter whether `spark.sql.caseSensitive` is set to `true` or `false`. Since 2.4, when `spark.sql.caseSensitive` is set to `false`, Spark does case insensitive column name resolution between Hive metastore schema and Parquet schema, so even column names are in different letter cases, Spark returns corresponding column values. An exception is thrown if there is ambiguity, i.e. more than one Parquet column is matched. This change also applies to Parquet Hive tables when `spark.sql.hive.convertMetastoreParquet` is set to `true`.
+
+## Upgrading from Spark SQL 2.2 to 2.3
+
+  - Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the referenced columns only include the internal corrupt record column (named `_corrupt_record` by default). For example, `spark.read.schema(schema).json(file).filter($"_corrupt_record".isNotNull).count()` and `spark.read.schema(schema).json(file).select("_corrupt_record").show()`. Instead, you can cache or save the parsed results and then send the same query. For example, `val df = spark.read.schema(schema).json(file).cache()` and then `df.filter($"_corrupt_record".isNotNull).count()`.
+
+  - The `percentile_approx` function previously accepted numeric type input and output double type results. Now it supports date type, timestamp type and numeric types as input types. The result type is also changed to be the same as the input type, which is more reasonable for percentiles.
+
+  - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown.
+
+  - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below:
+    <table class="table">
+      <tr>
+        <th>
+          <b>InputA \ InputB</b>
+        </th>
+        <th>
+          <b>NullType</b>
+        </th>
+        <th>
+          <b>IntegerType</b>
+        </th>
+        <th>
+          <b>LongType</b>
+        </th>
+        <th>
+          <b>DecimalType(38,0)*</b>
+        </th>
+        <th>
+          <b>DoubleType</b>
+        </th>
+        <th>
+          <b>DateType</b>
+        </th>
+        <th>
+          <b>TimestampType</b>
+        </th>
+        <th>
+          <b>StringType</b>
+        </th>
+      </tr>
+      <tr>
+        <td>
+          <b>NullType</b>
+        </td>
+        <td>NullType</td>
+        <td>IntegerType</td>
+        <td>LongType</td>
+        <td>DecimalType(38,0)</td>
+        <td>DoubleType</td>
+        <td>DateType</td>
+        <td>TimestampType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>IntegerType</b>
+        </td>
+        <td>IntegerType</td>
+        <td>IntegerType</td>
+        <td>LongType</td>
+        <td>DecimalType(38,0)</td>
+        <td>DoubleType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>LongType</b>
+        </td>
+        <td>LongType</td>
+        <td>LongType</td>
+        <td>LongType</td>
+        <td>DecimalType(38,0)</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>DecimalType(38,0)*</b>
+        </td>
+        <td>DecimalType(38,0)</td>
+        <td>DecimalType(38,0)</td>
+        <td>DecimalType(38,0)</td>
+        <td>DecimalType(38,0)</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>DoubleType</b>
+        </td>
+        <td>DoubleType</td>
+        <td>DoubleType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>DoubleType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>DateType</b>
+        </td>
+        <td>DateType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>DateType</td>
+        <td>TimestampType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>TimestampType</b>
+        </td>
+        <td>TimestampType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>TimestampType</td>
+        <td>TimestampType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>StringType</b>
+        </td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+    </table>
+
+    Note that, for <b>DecimalType(38,0)*</b>, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type.
+
+  - Since Spark 2.3, when either broadcast hash join or broadcast nested loop join is applicable, we prefer to broadcasting the table that is explicitly specified in a broadcast hint. For details, see the section [Join Strategy Hints for SQL Queries](sql-performance-tuning.html#join-strategy-hints-for-sql-queries) and [SPARK-22489](https://issues.apache.org/jira/browse/SPARK-22489).
+
+  - Since Spark 2.3, when all inputs are binary, `functions.concat()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.concatBinaryAsString` to `true`.
+
+  - Since Spark 2.3, when all inputs are binary, SQL `elt()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.eltOutputAsString` to `true`.
+
+ - Since Spark 2.3, by default arithmetic operations between decimals return a rounded value if an exact representation is not possible (instead of returning NULL). This is compliant with SQL ANSI 2011 specification and Hive's new behavior introduced in Hive 2.2 (HIVE-15331). This involves the following changes
+
+    - The rules to determine the result type of an arithmetic operation have been updated. In particular, if the precision / scale needed are out of the range of available values, the scale is reduced up to 6, in order to prevent the truncation of the integer part of the decimals. All the arithmetic operations are affected by the change, ie. addition (`+`), subtraction (`-`), multiplication (`*`), division (`/`), remainder (`%`) and positive module (`pmod`).
+
+    - Literal values used in SQL operations are converted to DECIMAL with the exact precision and scale needed by them.
+
+    - The configuration `spark.sql.decimalOperations.allowPrecisionLoss` has been introduced. It defaults to `true`, which means the new behavior described here; if set to `false`, Spark uses previous rules, ie. it doesn't adjust the needed scale to represent the values and it returns NULL if an exact representation of the value is not possible.
+
+  - Un-aliased subquery's semantic has not been well defined with confusing behaviors. Since Spark 2.3, we invalidate such confusing cases, for example: `SELECT v.i from (SELECT i FROM v)`, Spark will throw an analysis exception in this case because users should not be able to use the qualifier inside a subquery. See [SPARK-20690](https://issues.apache.org/jira/browse/SPARK-20690) and [SPARK-21335](https://issues.apache.org/jira/browse/SPARK-21335) for more details.
+
+  - When creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 2.3, the builder comes to not update the configurations. If you want to update them, you need to update them prior to creating a `SparkSession`.
+
+## Upgrading from Spark SQL 2.1 to 2.2
+
+  - Spark 2.1.1 introduced a new configuration key: `spark.sql.hive.caseSensitiveInferenceMode`. It had a default setting of `NEVER_INFER`, which kept behavior identical to 2.1.0. However, Spark 2.2.0 changes this setting's default value to `INFER_AND_SAVE` to restore compatibility with reading Hive metastore tables whose underlying file schema have mixed-case column names. With the `INFER_AND_SAVE` configuration value, on first access Spark will perform schema inference on any Hive metastore table for which it has not already saved an inferred schema. Note that schema inference can be a very time-consuming operation for tables with thousands of partitions. If compatibility with mixed-case column names is not a concern, you can safely set `spark.sql.hive.caseSensitiveInferenceMode` to `NEVER_INFER` to avoid the initial overhead of schema inference. Note that with the new default `INFER_AND_SAVE` setting, the results of the schema inference are saved as a metastore key for future use. Therefore, the initial schema inference occurs only at a table's first access.
+
+  - Since Spark 2.2.1 and 2.3.0, the schema is always inferred at runtime when the data source tables have the columns that exist in both partition schema and data schema. The inferred schema does not have the partitioned columns. When reading the table, Spark respects the partition values of these overlapping columns instead of the values stored in the data source files. In 2.2.0 and 2.1.x release, the inferred schema is partitioned but the data of the table is invisible to users (i.e., the result set is empty).
+
+  - Since Spark 2.2, view definitions are stored in a different way from prior versions. This may cause Spark unable to read views created by prior versions. In such cases, you need to recreate the views using `ALTER VIEW AS` or `CREATE OR REPLACE VIEW AS` with newer Spark versions.
+
+## Upgrading from Spark SQL 2.0 to 2.1
+
+ - Datasource tables now store partition metadata in the Hive metastore. This means that Hive DDLs such as `ALTER TABLE PARTITION ... SET LOCATION` are now available for tables created with the Datasource API.
+
+    - Legacy datasource tables can be migrated to this format via the `MSCK REPAIR TABLE` command. Migrating legacy tables is recommended to take advantage of Hive DDL support and improved planning performance.
+
+    - To determine if a table has been migrated, look for the `PartitionProvider: Catalog` attribute when issuing `DESCRIBE FORMATTED` on the table.
+ - Changes to `INSERT OVERWRITE TABLE ... PARTITION ...` behavior for Datasource tables.
+
+    - In prior Spark versions `INSERT OVERWRITE` overwrote the entire Datasource table, even when given a partition specification. Now only partitions matching the specification are overwritten.
+
+    - Note that this still differs from the behavior of Hive tables, which is to overwrite only partitions overlapping with newly inserted data.
+
+## Upgrading from Spark SQL 1.6 to 2.0
+
+ - `SparkSession` is now the new entry point of Spark that replaces the old `SQLContext` and
+
+   `HiveContext`. Note that the old SQLContext and HiveContext are kept for backward compatibility. A new `catalog` interface is accessible from `SparkSession` - existing API on databases and tables access such as `listTables`, `createExternalTable`, `dropTempView`, `cacheTable` are moved here.
+
+ - Dataset API and DataFrame API are unified. In Scala, `DataFrame` becomes a type alias for
+   `Dataset[Row]`, while Java API users must replace `DataFrame` with `Dataset<Row>`. Both the typed
+   transformations (e.g., `map`, `filter`, and `groupByKey`) and untyped transformations (e.g.,
+   `select` and `groupBy`) are available on the Dataset class. Since compile-time type-safety in
+   Python and R is not a language feature, the concept of Dataset does not apply to these languages’
+   APIs. Instead, `DataFrame` remains the primary programming abstraction, which is analogous to the
+   single-node data frame notion in these languages.
+
+ - Dataset and DataFrame API `unionAll` has been deprecated and replaced by `union`
+
+ - Dataset and DataFrame API `explode` has been deprecated, alternatively, use `functions.explode()` with `select` or `flatMap`
+
+ - Dataset and DataFrame API `registerTempTable` has been deprecated and replaced by `createOrReplaceTempView`
+
+ - Changes to `CREATE TABLE ... LOCATION` behavior for Hive tables.
+
+    - From Spark 2.0, `CREATE TABLE ... LOCATION` is equivalent to `CREATE EXTERNAL TABLE ... LOCATION`
+      in order to prevent accidental dropping the existing data in the user-provided locations.
+      That means, a Hive table created in Spark SQL with the user-specified location is always a Hive external table.
+      Dropping external tables will not remove the data. Users are not allowed to specify the location for Hive managed tables.
+      Note that this is different from the Hive behavior.
+
+    - As a result, `DROP TABLE` statements on those tables will not remove the data.
+
+ - `spark.sql.parquet.cacheMetadata` is no longer used.
+   See [SPARK-13664](https://issues.apache.org/jira/browse/SPARK-13664) for details.
+
+## Upgrading from Spark SQL 1.5 to 1.6
+
+ - From Spark 1.6, by default, the Thrift server runs in multi-session mode. Which means each JDBC/ODBC
+   connection owns a copy of their own SQL configuration and temporary function registry. Cached
+   tables are still shared though. If you prefer to run the Thrift server in the old single-session
+   mode, please set option `spark.sql.hive.thriftServer.singleSession` to `true`. You may either add
+   this option to `spark-defaults.conf`, or pass it to `start-thriftserver.sh` via `--conf`:
+
+   {% highlight bash %}
+   ./sbin/start-thriftserver.sh \
+     --conf spark.sql.hive.thriftServer.singleSession=true \
+     ...
+   {% endhighlight %}
+
+ - From Spark 1.6, LongType casts to TimestampType expect seconds instead of microseconds. This
+   change was made to match the behavior of Hive 1.2 for more consistent type casting to TimestampType
+   from numeric types. See [SPARK-11724](https://issues.apache.org/jira/browse/SPARK-11724) for
+   details.
+
+## Upgrading from Spark SQL 1.4 to 1.5
+
+ - Optimized execution using manually managed memory (Tungsten) is now enabled by default, along with
+   code generation for expression evaluation. These features can both be disabled by setting
+   `spark.sql.tungsten.enabled` to `false`.
+
+ - Parquet schema merging is no longer enabled by default. It can be re-enabled by setting
+   `spark.sql.parquet.mergeSchema` to `true`.
+
+ - In-memory columnar storage partition pruning is on by default. It can be disabled by setting
+   `spark.sql.inMemoryColumnarStorage.partitionPruning` to `false`.
+
+ - Unlimited precision decimal columns are no longer supported, instead Spark SQL enforces a maximum
+   precision of 38. When inferring schema from `BigDecimal` objects, a precision of (38, 18) is now
+   used. When no precision is specified in DDL then the default remains `Decimal(10, 0)`.
+
+ - Timestamps are now stored at a precision of 1us, rather than 1ns
+
+ - In the `sql` dialect, floating point numbers are now parsed as decimal. HiveQL parsing remains
+   unchanged.
+
+ - The canonical name of SQL/DataFrame functions are now lower case (e.g., sum vs SUM).
+
+ - JSON data source will not automatically load new files that are created by other applications
+   (i.e. files that are not inserted to the dataset through Spark SQL).
+   For a JSON persistent table (i.e. the metadata of the table is stored in Hive Metastore),
+   users can use `REFRESH TABLE` SQL command or `HiveContext`'s `refreshTable` method
+   to include those new files to the table. For a DataFrame representing a JSON dataset, users need to recreate
+   the DataFrame and the new DataFrame will include new files.
+
+## Upgrading from Spark SQL 1.3 to 1.4
+
+#### DataFrame data reader/writer interface
+{:.no_toc}
+
+Based on user feedback, we created a new, more fluid API for reading data in (`SQLContext.read`)
+and writing data out (`DataFrame.write`),
+and deprecated the old APIs (e.g., `SQLContext.parquetFile`, `SQLContext.jsonFile`).
+
+See the API docs for `SQLContext.read` (
+  <a href="api/scala/org/apache/spark/sql/SQLContext.html#read:DataFrameReader">Scala</a>,
+  <a href="api/java/org/apache/spark/sql/SQLContext.html#read()">Java</a>,
+  <a href="api/python/pyspark.sql.html#pyspark.sql.SQLContext.read">Python</a>
+) and `DataFrame.write` (
+  <a href="api/scala/org/apache/spark/sql/DataFrame.html#write:DataFrameWriter">Scala</a>,
+  <a href="api/java/org/apache/spark/sql/Dataset.html#write()">Java</a>,
+  <a href="api/python/pyspark.sql.html#pyspark.sql.DataFrame.write">Python</a>
+) more information.
+
+
+#### DataFrame.groupBy retains grouping columns
+{:.no_toc}
+
+Based on user feedback, we changed the default behavior of `DataFrame.groupBy().agg()` to retain the
+grouping columns in the resulting `DataFrame`. To keep the behavior in 1.3, set `spark.sql.retainGroupColumns` to `false`.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+
+// In 1.3.x, in order for the grouping column "department" to show up,
+// it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg($"department", max("age"), sum("expense"))
+
+// In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(max("age"), sum("expense"))
+
+// Revert to 1.3 behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+
+// In 1.3.x, in order for the grouping column "department" to show up,
+// it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
+
+// In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(max("age"), sum("expense"));
+
+// Revert to 1.3 behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false");
+
+{% endhighlight %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+
+import pyspark.sql.functions as func
+
+# In 1.3.x, in order for the grouping column "department" to show up,
+# it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg(df["department"], func.max("age"), func.sum("expense"))
+
+# In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(func.max("age"), func.sum("expense"))
+
+# Revert to 1.3.x behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false")
+
+{% endhighlight %}
+</div>
+
+</div>
+
+
+#### Behavior change on DataFrame.withColumn
+{:.no_toc}
+
+Prior to 1.4, DataFrame.withColumn() supports adding a column only. The column will always be added
+as a new column with its specified name in the result DataFrame even if there may be any existing
+columns of the same name. Since 1.4, DataFrame.withColumn() supports adding a column of a different
+name from names of all existing columns or replacing existing columns of the same name.
+
+Note that this change is only for Scala API, not for PySpark and SparkR.
+
+
+## Upgrading from Spark SQL 1.0-1.2 to 1.3
+
+In Spark 1.3 we removed the "Alpha" label from Spark SQL and as part of this did a cleanup of the
+available APIs. From Spark 1.3 onwards, Spark SQL will provide binary compatibility with other
+releases in the 1.X series. This compatibility guarantee excludes APIs that are explicitly marked
+as unstable (i.e., DeveloperAPI or Experimental).
+
+#### Rename of SchemaRDD to DataFrame
+{:.no_toc}
+
+The largest change that users will notice when upgrading to Spark SQL 1.3 is that `SchemaRDD` has
+been renamed to `DataFrame`. This is primarily because DataFrames no longer inherit from RDD
+directly, but instead provide most of the functionality that RDDs provide though their own
+implementation. DataFrames can still be converted to RDDs by calling the `.rdd` method.
+
+In Scala, there is a type alias from `SchemaRDD` to `DataFrame` to provide source compatibility for
+some use cases. It is still recommended that users update their code to use `DataFrame` instead.
+Java and Python users will need to update their code.
+
+#### Unification of the Java and Scala APIs
+{:.no_toc}
+
+Prior to Spark 1.3 there were separate Java compatible classes (`JavaSQLContext` and `JavaSchemaRDD`)
+that mirrored the Scala API. In Spark 1.3 the Java API and Scala API have been unified. Users
+of either language should use `SQLContext` and `DataFrame`. In general these classes try to
+use types that are usable from both languages (i.e. `Array` instead of language-specific collections).
+In some cases where no common type exists (e.g., for passing in closures or Maps) function overloading
+is used instead.
+
+Additionally, the Java specific types API has been removed. Users of both Scala and Java should
+use the classes present in `org.apache.spark.sql.types` to describe schema programmatically.
+
+
+#### Isolation of Implicit Conversions and Removal of dsl Package (Scala-only)
+{:.no_toc}
+
+Many of the code examples prior to Spark 1.3 started with `import sqlContext._`, which brought
+all of the functions from sqlContext into scope. In Spark 1.3 we have isolated the implicit
+conversions for converting `RDD`s into `DataFrame`s into an object inside of the `SQLContext`.
+Users should now write `import sqlContext.implicits._`.
+
+Additionally, the implicit conversions now only augment RDDs that are composed of `Product`s (i.e.,
+case classes or tuples) with a method `toDF`, instead of applying automatically.
+
+When using function inside of the DSL (now replaced with the `DataFrame` API) users used to import
+`org.apache.spark.sql.catalyst.dsl`. Instead the public dataframe functions API should be used:
+`import org.apache.spark.sql.functions._`.
+
+#### Removal of the type aliases in org.apache.spark.sql for DataType (Scala-only)
+{:.no_toc}
+
+Spark 1.3 removes the type aliases that were present in the base sql package for `DataType`. Users
+should instead import the classes in `org.apache.spark.sql.types`
+
+#### UDF Registration Moved to `sqlContext.udf` (Java & Scala)
+{:.no_toc}
+
+Functions that are used to register UDFs, either for use in the DataFrame DSL or SQL, have been
+moved into the udf object in `SQLContext`.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+
+sqlContext.udf.register("strLen", (s: String) => s.length())
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+
+sqlContext.udf().register("strLen", (String s) -> s.length(), DataTypes.IntegerType);
+
+{% endhighlight %}
+</div>
+
+</div>
+
+Python UDF registration is unchanged.
+
+
+
+## Compatibility with Apache Hive
+
+Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
+Currently, Hive SerDes and UDFs are based on built-in Hive,
+and Spark SQL can be connected to different versions of Hive Metastore
+(from 0.12.0 to 2.3.6 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
+
+#### Deploying in Existing Hive Warehouses
+{:.no_toc}
+
+The Spark SQL Thrift JDBC server is designed to be "out of the box" compatible with existing Hive
+installations. You do not need to modify your existing Hive Metastore or change the data placement
+or partitioning of your tables.
+
+### Supported Hive Features
+{:.no_toc}
+
+Spark SQL supports the vast majority of Hive features, such as:
+
+* Hive query statements, including:
+  * `SELECT`
+  * `GROUP BY`
+  * `ORDER BY`
+  * `DISTRIBUTE BY`
+  * `CLUSTER BY`
+  * `SORT BY`
+* All Hive operators, including:
+  * Relational operators (`=`, `<=>`, `==`, `<>`, `<`, `>`, `>=`, `<=`, etc)
+  * Arithmetic operators (`+`, `-`, `*`, `/`, `%`, etc)
+  * Logical operators (`AND`, `&&`, `OR`, `||`, etc)
+  * Complex type constructors
+  * Mathematical functions (`sign`, `ln`, `cos`, etc)
+  * String functions (`instr`, `length`, `printf`, etc)
+* User defined functions (UDF)
+* User defined aggregation functions (UDAF)
+* User defined serialization formats (SerDes)
+* Window functions
+* Joins
+  * `JOIN`
+  * `{LEFT|RIGHT|FULL} OUTER JOIN`
+  * `LEFT SEMI JOIN`
+  * `LEFT ANTI JOIN`
+  * `CROSS JOIN`
+* Unions
+* Sub-queries
+  * Sub-queries in the FROM Clause
+  
+    ```SELECT col FROM (SELECT a + b AS col FROM t1) t2```
+  * Sub-queries in WHERE Clause
+    * Correlated or non-correlated IN and NOT IN statement in WHERE Clause
+    
+      ```
+      SELECT col FROM t1 WHERE col IN (SELECT a FROM t2 WHERE t1.a = t2.a)
+      SELECT col FROM t1 WHERE col IN (SELECT a FROM t2)
+      ```
+    * Correlated or non-correlated EXISTS and NOT EXISTS statement in WHERE Clause
+    
+      ```
+      SELECT col FROM t1 WHERE EXISTS (SELECT t2.a FROM t2 WHERE t1.a = t2.a AND t2.a > 10)
+      SELECT col FROM t1 WHERE EXISTS (SELECT t2.a FROM t2 WHERE t2.a > 10)
+      ```
+    * Non-correlated IN and NOT IN statement in JOIN Condition
+    
+      ```SELECT t1.col FROM t1 JOIN t2 ON t1.a = t2.a AND t1.a IN (SELECT a FROM t3)```
+   
+    * Non-correlated EXISTS and NOT EXISTS statement in JOIN Condition
+       
+      ```SELECT t1.col FROM t1 JOIN t2 ON t1.a = t2.a AND EXISTS (SELECT * FROM t3 WHERE t3.a > 10)``` 
+       
+* Sampling
+* Explain
+* Partitioned tables including dynamic partition insertion
+* View
+  * If column aliases are not specified in view definition queries, both Spark and Hive will
+    generate alias names, but in different ways. In order for Spark to be able to read views created
+    by Hive, users should explicitly specify column aliases in view definition queries. As an
+    example, Spark cannot read `v1` created as below by Hive.
+
+    ```
+    CREATE VIEW v1 AS SELECT * FROM (SELECT c + 1 FROM (SELECT 1 c) t1) t2;
+    ```
+
+    Instead, you should create `v1` as below with column aliases explicitly specified.
+
+    ```
+    CREATE VIEW v1 AS SELECT * FROM (SELECT c + 1 AS inc_c FROM (SELECT 1 c) t1) t2;
+    ```
+
+* All Hive DDL Functions, including:
+  * `CREATE TABLE`
+  * `CREATE TABLE AS SELECT`
+  * `CREATE TABLE LIKE`
+  * `ALTER TABLE`
+* Most Hive Data types, including:
+  * `TINYINT`
+  * `SMALLINT`
+  * `INT`
+  * `BIGINT`
+  * `BOOLEAN`
+  * `FLOAT`
+  * `DOUBLE`
+  * `STRING`
+  * `BINARY`
+  * `TIMESTAMP`
+  * `DATE`
+  * `ARRAY<>`
+  * `MAP<>`
+  * `STRUCT<>`
+
+### Unsupported Hive Functionality
+{:.no_toc}
+
+Below is a list of Hive features that we don't support yet. Most of these features are rarely used
+in Hive deployments.
+
+**Major Hive Features**
+
+* Tables with buckets: bucket is the hash partitioning within a Hive table partition. Spark SQL
+  doesn't support buckets yet.
+
+
+**Esoteric Hive Features**
+
+* `UNION` type
+* Unique join
+* Column statistics collecting: Spark SQL does not piggyback scans to collect column statistics at
+  the moment and only supports populating the sizeInBytes field of the hive metastore.
+
+**Hive Input/Output Formats**
+
+* File format for CLI: For results showing back to the CLI, Spark SQL only supports TextOutputFormat.
+* Hadoop archive
+
+**Hive Optimizations**
+
+A handful of Hive optimizations are not yet included in Spark. Some of these (such as indexes) are
+less important due to Spark SQL's in-memory computational model. Others are slotted for future
+releases of Spark SQL.
+
+* Block-level bitmap indexes and virtual columns (used to build indexes)
+* Automatically determine the number of reducers for joins and groupbys: Currently, in Spark SQL, you
+  need to control the degree of parallelism post-shuffle using "`SET spark.sql.shuffle.partitions=[num_tasks];`".
+* Meta-data only query: For queries that can be answered by using only metadata, Spark SQL still
+  launches tasks to compute the result.
+* Skew data flag: Spark SQL does not follow the skew data flags in Hive.
+* `STREAMTABLE` hint in join: Spark SQL does not follow the `STREAMTABLE` hint.
+* Merge multiple small files for query results: if the result output contains multiple small files,
+  Hive can optionally merge the small files into fewer large files to avoid overflowing the HDFS
+  metadata. Spark SQL does not support that.
+
+**Hive UDF/UDTF/UDAF**
+
+Not all the APIs of the Hive UDF/UDTF/UDAF are supported by Spark SQL. Below are the unsupported APIs:
+
+* `getRequiredJars` and `getRequiredFiles` (`UDF` and `GenericUDF`) are functions to automatically
+  include additional resources required by this UDF.
+* `initialize(StructObjectInspector)` in `GenericUDTF` is not supported yet. Spark SQL currently uses
+  a deprecated interface `initialize(ObjectInspector[])` only.
+* `configure` (`GenericUDF`, `GenericUDTF`, and `GenericUDAFEvaluator`) is a function to initialize
+  functions with `MapredContext`, which is inapplicable to Spark.
+* `close` (`GenericUDF` and `GenericUDAFEvaluator`) is a function to release associated resources.
+  Spark SQL does not call this function when tasks finish.
+* `reset` (`GenericUDAFEvaluator`) is a function to re-initialize aggregation for reusing the same aggregation.
+  Spark SQL currently does not support the reuse of aggregation.
+* `getWindowingEvaluator` (`GenericUDAFEvaluator`) is a function to optimize aggregation by evaluating
+  an aggregate over a fixed window.
+
+### Incompatible Hive UDF
+{:.no_toc}
+
+Below are the scenarios in which Hive and Spark generate different results:
+
+* `SQRT(n)` If n < 0, Hive returns null, Spark SQL returns NaN.
+* `ACOS(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN.
+* `ASIN(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN.
diff --git a/docs/mllib-migration-guides.md b/docs/sql-migration-old.md
similarity index 73%
rename from docs/mllib-migration-guides.md
rename to docs/sql-migration-old.md
index b746b96e19f07..e100820f6d664 100644
--- a/docs/mllib-migration-guides.md
+++ b/docs/sql-migration-old.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Old Migration Guides - MLlib
-displayTitle: Old Migration Guides - MLlib
+title: Migration Guide
+displayTitle: Migration Guide
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,6 +19,5 @@ license: |
   limitations under the License.
 ---
 
-The migration guide for the current Spark version is kept on the [MLlib Guide main page](ml-guide.html#migration-guide).
+The migration guide is now archived [on this page](sql-migration-guide.html).
 
-Past migration guides are now stored at [ml-migration-guides.html](ml-migration-guides.html).
diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index 2a1edda84252c..5a86c0cc31e12 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -67,6 +67,7 @@ that these options will be deprecated in future release as more optimizations ar
     <td>134217728 (128 MB)</td>
     <td>
       The maximum number of bytes to pack into a single partition when reading files.
+      This configuration is effective only when using file-based sources such as Parquet, JSON and ORC.
     </td>
   </tr>
   <tr>
@@ -76,7 +77,8 @@ that these options will be deprecated in future release as more optimizations ar
       The estimated cost to open a file, measured by the number of bytes could be scanned in the same
       time. This is used when putting multiple files into a partition. It is better to over-estimated,
       then the partitions with small files will be faster than partitions with bigger files (which is
-      scheduled first).
+      scheduled first). This configuration is effective only when using file-based sources such as Parquet,
+      JSON and ORC.
     </td>
   </tr>
   <tr>
@@ -129,8 +131,7 @@ a specific strategy may not support all join types.
 <div data-lang="scala"  markdown="1">
 
 {% highlight scala %}
-import org.apache.spark.sql.functions.broadcast
-broadcast(spark.table("src")).join(spark.table("records"), "key").show()
+spark.table("src").join(spark.table("records").hint("broadcast"), "key").show()
 {% endhighlight %}
 
 </div>
@@ -138,8 +139,7 @@ broadcast(spark.table("src")).join(spark.table("records"), "key").show()
 <div data-lang="java"  markdown="1">
 
 {% highlight java %}
-import static org.apache.spark.sql.functions.broadcast;
-broadcast(spark.table("src")).join(spark.table("records"), "key").show();
+spark.table("src").join(spark.table("records").hint("broadcast"), "key").show();
 {% endhighlight %}
 
 </div>
@@ -147,8 +147,7 @@ broadcast(spark.table("src")).join(spark.table("records"), "key").show();
 <div data-lang="python"  markdown="1">
 
 {% highlight python %}
-from pyspark.sql.functions import broadcast
-broadcast(spark.table("src")).join(spark.table("records"), "key").show()
+spark.table("src").join(spark.table("records").hint("broadcast"), "key").show()
 {% endhighlight %}
 
 </div>
@@ -158,7 +157,7 @@ broadcast(spark.table("src")).join(spark.table("records"), "key").show()
 {% highlight r %}
 src <- sql("SELECT * FROM src")
 records <- sql("SELECT * FROM records")
-head(join(broadcast(src), records, src$key == records$key))
+head(join(src, hint(records, "broadcast"), src$key == records$key))
 {% endhighlight %}
 
 </div>
@@ -172,3 +171,18 @@ SELECT /*+ BROADCAST(r) */ * FROM records r JOIN src s ON r.key = s.key
 
 </div>
 </div>
+
+## Coalesce Hints for SQL Queries
+
+Coalesce hints allows the Spark SQL users to control the number of output files just like the
+`coalesce`, `repartition` and `repartitionByRange` in Dataset API, they can be used for performance
+tuning and reducing the number of output files. The "COALESCE" hint only has a partition number as a
+parameter. The "REPARTITION" hint has a partition number, columns, or both of them as parameters.
+The "REPARTITION_BY_RANGE" hint must have column names and a partition number is optional.
+
+    SELECT /*+ COALESCE(3) */ * FROM t
+    SELECT /*+ REPARTITION(3) */ * FROM t
+    SELECT /*+ REPARTITION(c) */ * FROM t
+    SELECT /*+ REPARTITION(3, c) */ * FROM t
+    SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t
+    SELECT /*+ REPARTITION_BY_RANGE(3, c) */ * FROM t
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 0a4d07ea37b69..06bf553e4f704 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -61,7 +61,7 @@ In Scala and Java, a DataFrame is represented by a Dataset of `Row`s.
 In [the Scala API][scala-datasets], `DataFrame` is simply a type alias of `Dataset[Row]`.
 While, in [Java API][java-datasets], users need to use `Dataset<Row>` to represent a `DataFrame`.
 
-[scala-datasets]: api/scala/index.html#org.apache.spark.sql.Dataset
+[scala-datasets]: api/scala/org/apache/spark/sql/Dataset.html
 [java-datasets]: api/java/index.html?org/apache/spark/sql/Dataset.html
 
 Throughout this document, we will often refer to Scala/Java Datasets of `Row`s as DataFrames.
diff --git a/docs/sql-pyspark-pandas-with-arrow.md b/docs/sql-pyspark-pandas-with-arrow.md
index 25112d1d2a833..92a515746b607 100644
--- a/docs/sql-pyspark-pandas-with-arrow.md
+++ b/docs/sql-pyspark-pandas-with-arrow.md
@@ -35,7 +35,7 @@ working with Arrow-enabled data.
 
 If you install PySpark using pip, then PyArrow can be brought in as an extra dependency of the
 SQL module with the command `pip install pyspark[sql]`. Otherwise, you must ensure that PyArrow
-is installed and available on all cluster nodes. The current supported version is 0.12.1.
+is installed and available on all cluster nodes. The current supported version is 0.15.1+.
 You can install using pip or conda from the conda-forge channel. See PyArrow
 [installation](https://arrow.apache.org/docs/python/install.html) for details.
 
@@ -65,55 +65,163 @@ Spark will fall back to create the DataFrame without Arrow.
 
 ## Pandas UDFs (a.k.a. Vectorized UDFs)
 
-Pandas UDFs are user defined functions that are executed by Spark using Arrow to transfer data and
-Pandas to work with the data. A Pandas UDF is defined using the keyword `pandas_udf` as a decorator
-or to wrap the function, no additional configuration is required. Currently, there are two types of
-Pandas UDF: Scalar and Grouped Map.
+Pandas UDFs are user defined functions that are executed by Spark using
+Arrow to transfer data and Pandas to work with the data, which allows vectorized operations. A Pandas
+UDF is defined using the `pandas_udf` as a decorator or to wrap the function, and no additional
+configuration is required. A Pandas UDF behaves as a regular PySpark function API in general.
 
-### Scalar
+Before Spark 3.0, Pandas UDFs used to be defined with `PandasUDFType`. From Spark 3.0
+with Python 3.6+, you can also use [Python type hints](https://www.python.org/dev/peps/pep-0484).
+Using Python type hints are preferred and using `PandasUDFType` will be deprecated in
+the future release.
 
-Scalar Pandas UDFs are used for vectorizing scalar operations. They can be used with functions such
-as `select` and `withColumn`. The Python function should take `pandas.Series` as inputs and return
-a `pandas.Series` of the same length. Internally, Spark will execute a Pandas UDF by splitting
-columns into batches and calling the function for each batch as a subset of the data, then
-concatenating the results together.
+Note that the type hint should use `pandas.Series` in all cases but there is one variant
+that `pandas.DataFrame` should be used for its input or output type hint instead when the input
+or output column is of `StructType`. The following example shows a Pandas UDF which takes long
+column, string column and struct column, and outputs a struct column. It requires the function to
+specify the type hints of `pandas.Series` and `pandas.DataFrame` as below:
 
-The following example shows how to create a scalar Pandas UDF that computes the product of 2 columns.
+<p>
+<div class="codetabs">
+<div data-lang="python" markdown="1">
+{% include_example ser_to_frame_pandas_udf python/sql/arrow.py %}
+</div>
+</div>
+</p>
+
+In the following sections, it describes the cominations of the supported type hints. For simplicity,
+`pandas.DataFrame` variant is omitted.
+
+### Series to Series
+
+The type hint can be expressed as `pandas.Series`, ... -> `pandas.Series`.
+
+By using `pandas_udf` with the function having such type hints, it creates a Pandas UDF where the given
+function takes one or more `pandas.Series` and outputs one `pandas.Series`. The output of the function should
+always be of the same length as the input. Internally, PySpark will execute a Pandas UDF by splitting
+columns into batches and calling the function for each batch as a subset of the data, then concatenating
+the results together.
+
+The following example shows how to create this Pandas UDF that computes the product of 2 columns.
 
 <div class="codetabs">
 <div data-lang="python" markdown="1">
-{% include_example scalar_pandas_udf python/sql/arrow.py %}
+{% include_example ser_to_ser_pandas_udf python/sql/arrow.py %}
 </div>
 </div>
 
-### Scalar Iterator
+For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/pyspark.sql.html#pyspark.sql.functions.pandas_udf)
+
+### Iterator of Series to Iterator of Series
+
+The type hint can be expressed as `Iterator[pandas.Series]` -> `Iterator[pandas.Series]`.
+
+By using `pandas_udf` with the function having such type hints, it creates a Pandas UDF where the given
+function takes an iterator of `pandas.Series` and outputs an iterator of `pandas.Series`. The output of each
+series from the function should always be of the same length as the input. In this case, the created
+Pandas UDF requires one input column when the Pandas UDF is called. To use multiple input columns,
+a different type hint is required. See Iterator of Multiple Series to Iterator of Series.
+
+It is useful when the UDF execution requires initializing some states although internally it works
+identically as Series to Series case. The pseudocode below illustrates the example.
+
+{% highlight python %}
+@pandas_udf("long")
+def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
+    # Do some expensive initialization with a state
+    state = very_expensive_initialization()
+    for x in iterator:
+        # Use that state for whole iterator.
+        yield calculate_with_state(x, state)
 
-Scalar iterator (`SCALAR_ITER`) Pandas UDF is the same as scalar Pandas UDF above except that the
-underlying Python function takes an iterator of batches as input instead of a single batch and,
-instead of returning a single output batch, it yields output batches or returns an iterator of
-output batches.
-It is useful when the UDF execution requires initializing some states, e.g., loading an machine
-learning model file to apply inference to every input batch.
+df.select(calculate("value")).show()
+{% endhighlight %}
 
-The following example shows how to create scalar iterator Pandas UDFs:
+The following example shows how to create this Pandas UDF:
 
 <div class="codetabs">
 <div data-lang="python" markdown="1">
-{% include_example scalar_iter_pandas_udf python/sql/arrow.py %}
+{% include_example iter_ser_to_iter_ser_pandas_udf python/sql/arrow.py %}
 </div>
 </div>
 
+For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/pyspark.sql.html#pyspark.sql.functions.pandas_udf)
+
+### Iterator of Multiple Series to Iterator of Series
+
+The type hint can be expressed as `Iterator[Tuple[pandas.Series, ...]]` -> `Iterator[pandas.Series]`.
+
+By using `pandas_udf` with the function having such type hints, it creates a Pandas UDF where the
+given function takes an iterator of a tuple of multiple `pandas.Series` and outputs an iterator of `pandas.Series`.
+In this case, the created pandas UDF requires multiple input columns as many as the series in the tuple
+when the Pandas UDF is called. It works identically as Iterator of Series to Iterator of Series case except the parameter difference.
+
+The following example shows how to create this Pandas UDF:
+
+<div class="codetabs">
+<div data-lang="python" markdown="1">
+{% include_example iter_sers_to_iter_ser_pandas_udf python/sql/arrow.py %}
+</div>
+</div>
+
+For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/pyspark.sql.html#pyspark.sql.functions.pandas_udf)
+
+### Series to Scalar
+
+The type hint can be expressed as `pandas.Series`, ... -> `Any`.
+
+By using `pandas_udf` with the function having such type hints, it creates a Pandas UDF similar
+to PySpark's aggregate functions. The given function takes `pandas.Series` and returns a scalar value.
+The return type should be a primitive data type, and the returned scalar can be either a python
+primitive type, e.g., `int` or `float` or a numpy data type, e.g., `numpy.int64` or `numpy.float64`.
+`Any` should ideally be a specific scalar type accordingly.
+
+This UDF can be also used with `groupBy().agg()` and [`pyspark.sql.Window`](api/python/pyspark.sql.html#pyspark.sql.Window).
+It defines an aggregation from one or more `pandas.Series` to a scalar value, where each `pandas.Series`
+represents a column within the group or window.
+
+Note that this type of UDF does not support partial aggregation and all data for a group or window
+will be loaded into memory. Also, only unbounded window is supported with Grouped aggregate Pandas
+UDFs currently. The following example shows how to use this type of UDF to compute mean with a group-by
+and window operations:
+
+<div class="codetabs">
+<div data-lang="python" markdown="1">
+{% include_example ser_to_scalar_pandas_udf python/sql/arrow.py %}
+</div>
+</div>
+
+For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/pyspark.sql.html#pyspark.sql.functions.pandas_udf)
+
+
+## Pandas Function APIs
+
+Pandas function APIs can directly apply a Python native function against the whole DataFrame by
+using Pandas instances. Internally it works similarly with Pandas UDFs by Spark using Arrow to transfer
+data and Pandas to work with the data, which allows vectorized operations. A Pandas function API behaves
+as a regular API under PySpark `DataFrame` in general.
+
+From Spark 3.0, Grouped map pandas UDF is now categorized as a separate Pandas Function API,
+`DataFrame.groupby().applyInPandas()`. It is still possible to use it with `PandasUDFType`
+and `DataFrame.groupby().apply()` as it was; however, it is preferred to use
+`DataFrame.groupby().applyInPandas()` directly. Using `PandasUDFType` will be deprecated
+in the future.
+
 ### Grouped Map
-Grouped map Pandas UDFs are used with `groupBy().apply()` which implements the "split-apply-combine" pattern.
-Split-apply-combine consists of three steps:
+
+Grouped map operations with Pandas instances are supported by `DataFrame.groupby().applyInPandas()`
+which requires a Python function that takes a `pandas.DataFrame` and return another `pandas.DataFrame`.
+It maps each group to each `pandas.DataFrame` in the Python function.
+
+This API implements the "split-apply-combine" pattern which consists of three steps:
 * Split the data into groups by using `DataFrame.groupBy`.
 * Apply a function on each group. The input and output of the function are both `pandas.DataFrame`. The
   input data contains all the rows and columns for each group.
-* Combine the results into a new `DataFrame`.
+* Combine the results into a new PySpark `DataFrame`.
 
-To use `groupBy().apply()`, the user needs to define the following:
+To use `groupBy().applyInPandas()`, the user needs to define the following:
 * A Python function that defines the computation for each group.
-* A `StructType` object or a string that defines the schema of the output `DataFrame`.
+* A `StructType` object or a string that defines the schema of the output PySpark `DataFrame`.
 
 The column labels of the returned `pandas.DataFrame` must either match the field names in the
 defined output schema if specified as strings, or match the field data types by position if not
@@ -125,57 +233,67 @@ lead to out of memory exceptions, especially if the group sizes are skewed. The
 [maxRecordsPerBatch](#setting-arrow-batch-size) is not applied on groups and it is up to the user
 to ensure that the grouped data will fit into the available memory.
 
-The following example shows how to use `groupby().apply()` to subtract the mean from each value in the group.
+The following example shows how to use `groupby().applyInPandas()` to subtract the mean from each value
+in the group.
 
 <div class="codetabs">
 <div data-lang="python" markdown="1">
-{% include_example grouped_map_pandas_udf python/sql/arrow.py %}
+{% include_example grouped_apply_in_pandas python/sql/arrow.py %}
 </div>
 </div>
 
-For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/pyspark.sql.html#pyspark.sql.functions.pandas_udf) and
-[`pyspark.sql.GroupedData.apply`](api/python/pyspark.sql.html#pyspark.sql.GroupedData.apply).
-
-### Grouped Aggregate
+For detailed usage, please see [`pyspark.sql.GroupedData.applyInPandas`](api/python/pyspark.sql.html#pyspark.sql.GroupedData.applyInPandas).
 
-Grouped aggregate Pandas UDFs are similar to Spark aggregate functions. Grouped aggregate Pandas UDFs are used with `groupBy().agg()` and
-[`pyspark.sql.Window`](api/python/pyspark.sql.html#pyspark.sql.Window). It defines an aggregation from one or more `pandas.Series`
-to a scalar value, where each `pandas.Series` represents a column within the group or window.
+### Map
 
-Note that this type of UDF does not support partial aggregation and all data for a group or window will be loaded into memory. Also,
-only unbounded window is supported with Grouped aggregate Pandas UDFs currently.
+Map operations with Pandas instances are supported by `DataFrame.mapInPandas()` which maps an iterator
+of `pandas.DataFrame`s to another iterator of `pandas.DataFrame`s that represents the current
+PySpark `DataFrame` and returns the result as a PySpark `DataFrame`. The functions takes and outputs
+an iterator of `pandas.DataFrame`. It can return the output of arbitrary length in contrast to some
+Pandas UDFs although internally it works similarly with Series to Series Pandas UDF.
 
-The following example shows how to use this type of UDF to compute mean with groupBy and window operations:
+The following example shows how to use `mapInPandas()`:
 
 <div class="codetabs">
 <div data-lang="python" markdown="1">
-{% include_example grouped_agg_pandas_udf python/sql/arrow.py %}
+{% include_example map_in_pandas python/sql/arrow.py %}
 </div>
 </div>
 
-For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/pyspark.sql.html#pyspark.sql.functions.pandas_udf)
+For detailed usage, please see [`pyspark.sql.DataFrame.mapsInPandas`](api/python/pyspark.sql.html#pyspark.sql.DataFrame.mapInPandas).
+
+### Co-grouped Map
 
+Co-grouped map operations with Pandas instances are supported by `DataFrame.groupby().cogroup().applyInPandas()` which
+allows two PySpark `DataFrame`s to be cogrouped by a common key and then a Python function applied to each
+cogroup. It consists of the following steps:
+* Shuffle the data such that the groups of each dataframe which share a key are cogrouped together.
+* Apply a function to each cogroup. The input of the function is two `pandas.DataFrame` (with an optional tuple
+representing the key). The output of the function is a `pandas.DataFrame`.
+* Combine the `pandas.DataFrame`s from all groups into a new PySpark `DataFrame`. 
 
-### Map Iterator
+To use `groupBy().cogroup().applyInPandas()`, the user needs to define the following:
+* A Python function that defines the computation for each cogroup.
+* A `StructType` object or a string that defines the schema of the output PySpark `DataFrame`.
 
-Map iterator Pandas UDFs are used to transform data with an iterator of batches. Map iterator
-Pandas UDFs can be used with 
-[`pyspark.sql.DataFrame.mapInPandas`](api/python/pyspark.sql.html#pyspark.sql.DataFrame.mapInPandas).
-It defines a map function that transforms an iterator of `pandas.DataFrame` to another.
+The column labels of the returned `pandas.DataFrame` must either match the field names in the
+defined output schema if specified as strings, or match the field data types by position if not
+strings, e.g. integer indices. See [pandas.DataFrame](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html#pandas.DataFrame)
+on how to label columns when constructing a `pandas.DataFrame`.
 
-It can return the output of arbitrary length in contrast to the scalar Pandas UDF. It maps an iterator of `pandas.DataFrame`s,
-that represents the current `DataFrame`, using the map iterator UDF and returns the result as a `DataFrame`.
+Note that all data for a cogroup will be loaded into memory before the function is applied. This can lead to out of
+memory exceptions, especially if the group sizes are skewed. The configuration for [maxRecordsPerBatch](#setting-arrow-batch-size)
+is not applied and it is up to the user to ensure that the cogrouped data will fit into the available memory.
 
-The following example shows how to create map iterator Pandas UDFs:
+The following example shows how to use `groupby().cogroup().applyInPandas()` to perform an asof join between two datasets.
 
 <div class="codetabs">
 <div data-lang="python" markdown="1">
-{% include_example map_iter_pandas_udf python/sql/arrow.py %}
+{% include_example cogrouped_apply_in_pandas python/sql/arrow.py %}
 </div>
 </div>
 
-For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/pyspark.sql.html#pyspark.sql.functions.pandas_udf) and
-[`pyspark.sql.DataFrame.mapsInPandas`](api/python/pyspark.sql.html#pyspark.sql.DataFrame.mapInPandas).
+For detailed usage, please see [`pyspark.sql.PandasCogroupedOps.applyInPandas()`](api/python/pyspark.sql.html#pyspark.sql.PandasCogroupedOps.applyInPandas).
 
 
 ## Usage Notes
@@ -219,3 +337,20 @@ Note that a standard UDF (non-Pandas) will load timestamp data as Python datetim
 different than a Pandas timestamp. It is recommended to use Pandas time series functionality when
 working with timestamps in `pandas_udf`s to get the best performance, see
 [here](https://pandas.pydata.org/pandas-docs/stable/timeseries.html) for details.
+
+### Compatibility Setting for PyArrow >= 0.15.0 and Spark 2.3.x, 2.4.x
+
+Since Arrow 0.15.0, a change in the binary IPC format requires an environment variable to be
+compatible with previous versions of Arrow <= 0.14.1. This is only necessary to do for PySpark
+users with versions 2.3.x and 2.4.x that have manually upgraded PyArrow to 0.15.0. The following
+can be added to `conf/spark-env.sh` to use the legacy Arrow IPC format:
+
+```
+ARROW_PRE_0_15_IPC_FORMAT=1
+```
+
+This will instruct PyArrow >= 0.15.0 to use the legacy IPC format with the older Arrow Java that
+is in Spark 2.3.x and 2.4.x. Not setting this environment variable will lead to a similar error as
+described in [SPARK-29367](https://issues.apache.org/jira/browse/SPARK-29367) when running
+`pandas_udf`s or `toPandas()` with Arrow enabled. More information about the Arrow IPC change can
+be read on the Arrow 0.15.0 release [blog](http://arrow.apache.org/blog/2019/10/06/0.15.0-release/#columnar-streaming-protocol-change-since-0140).
diff --git a/docs/sql-keywords.md b/docs/sql-ref-ansi-compliance.md
similarity index 78%
rename from docs/sql-keywords.md
rename to docs/sql-ref-ansi-compliance.md
index 08be6b62a88e7..d02383518b967 100644
--- a/docs/sql-keywords.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Spark SQL Keywords
-displayTitle: Spark SQL Keywords
+title: ANSI Compliance
+displayTitle: ANSI Compliance
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,15 +19,137 @@ license: |
   limitations under the License.
 ---
 
-When `spark.sql.parser.ansi.enabled` is true, Spark SQL has two kinds of keywords:
+Spark SQL has two options to comply with the SQL standard: `spark.sql.ansi.enabled` and `spark.sql.storeAssignmentPolicy` (See a table below for details).
+When `spark.sql.ansi.enabled` is set to `true`, Spark SQL follows the standard in basic behaviours (e.g., arithmetic operations, type conversion, and SQL parsing).
+Moreover, Spark SQL has an independent option to control implicit casting behaviours when inserting rows in a table.
+The casting behaviours are defined as store assignment rules in the standard.
+When `spark.sql.storeAssignmentPolicy` is set to `ANSI`, Spark SQL complies with the ANSI store assignment rules.
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td><code>spark.sql.ansi.enabled</code></td>
+  <td>false</td>
+  <td>
+    When true, Spark tries to conform to the ANSI SQL specification:
+    1. Spark will throw a runtime exception if an overflow occurs in any operation on integral/decimal field.
+    2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in the SQL parser.
+  </td>
+</tr>
+<tr>
+  <td><code>spark.sql.storeAssignmentPolicy</code></td>
+  <td>ANSI</td>
+  <td>
+    When inserting a value into a column with different data type, Spark will perform type coercion.
+    Currently, we support 3 policies for the type coercion rules: ANSI, legacy and strict. With ANSI policy,
+    Spark performs the type coercion as per ANSI SQL. In practice, the behavior is mostly the same as PostgreSQL.
+    It disallows certain unreasonable type conversions such as converting string to int or double to boolean.
+    With legacy policy, Spark allows the type coercion as long as it is a valid Cast, which is very loose.
+    e.g. converting string to int or double to boolean is allowed.
+    It is also the only behavior in Spark 2.x and it is compatible with Hive.
+    With strict policy, Spark doesn't allow any possible precision loss or data truncation in type coercion,
+    e.g. converting double to int or decimal to double is not allowed.
+  </td>
+</tr>
+</table>
+
+The following subsections present behaviour changes in arithmetic operations, type conversions, and SQL parsing when the ANSI mode enabled.
+
+### Arithmetic Operations
+
+In Spark SQL, arithmetic operations performed on numeric types (with the exception of decimal) are not checked for overflows by default.
+This means that in case an operation causes overflows, the result is the same that the same operation returns in a Java/Scala program (e.g., if the sum of 2 integers is higher than the maximum value representable, the result is a negative number).
+On the other hand, Spark SQL returns null for decimal overflows.
+When `spark.sql.ansi.enabled` is set to `true` and an overflow occurs in numeric and interval arithmetic operations, it throws an arithmetic exception at runtime.
+
+{% highlight sql %}
+-- `spark.sql.ansi.enabled=true`
+SELECT 2147483647 + 1;
+
+  java.lang.ArithmeticException: integer overflow
+
+-- `spark.sql.ansi.enabled=false`
+SELECT 2147483647 + 1;
+
+  +----------------+
+  |(2147483647 + 1)|
+  +----------------+
+  |     -2147483648|
+  +----------------+
+
+{% endhighlight %}
+
+### Type Conversion
+
+Spark SQL has three kinds of type conversions: explicit casting, type coercion, and store assignment casting.
+When `spark.sql.ansi.enabled` is set to `true`, explicit casting by `CAST` syntax throws a runtime exception for illegal cast patterns defined in the standard, e.g. casts from a string to an integer.
+On the other hand, `INSERT INTO` syntax throws an analysis exception when the ANSI mode enabled via `spark.sql.storeAssignmentPolicy=ANSI`.
+
+Currently, the ANSI mode affects explicit casting and assignment casting only.
+In future releases, the behaviour of type coercion might change along with the other two type conversion rules.
+
+{% highlight sql %}
+-- Examples of explicit casting
+
+-- `spark.sql.ansi.enabled=true`
+SELECT CAST('a' AS INT);
+
+  java.lang.NumberFormatException: invalid input syntax for type numeric: a
+
+SELECT CAST(2147483648L AS INT);
+
+  java.lang.ArithmeticException: Casting 2147483648 to int causes overflow
+
+-- `spark.sql.ansi.enabled=false` (This is a default behaviour)
+SELECT CAST('a' AS INT);
+
+  +--------------+
+  |CAST(a AS INT)|
+  +--------------+
+  |          null|
+  +--------------+
+
+SELECT CAST(2147483648L AS INT);
+
+  +-----------------------+
+  |CAST(2147483648 AS INT)|
+  +-----------------------+
+  |            -2147483648|
+  +-----------------------+
+
+-- Examples of store assignment rules
+CREATE TABLE t (v INT);
+
+-- `spark.sql.storeAssignmentPolicy=ANSI`
+INSERT INTO t VALUES ('1');
+
+  org.apache.spark.sql.AnalysisException: Cannot write incompatible data to table '`default`.`t`':
+  - Cannot safely cast 'v': StringType to IntegerType;
+
+-- `spark.sql.storeAssignmentPolicy=LEGACY` (This is a legacy behaviour until Spark 2.x)
+INSERT INTO t VALUES ('1');
+SELECT * FROM t;
+
+  +---+
+  |  v|
+  +---+
+  |  1|
+  +---+
+
+{% endhighlight %}
+
+### SQL Keywords
+
+When `spark.sql.ansi.enabled` is true, Spark SQL will use the ANSI mode parser.
+In this mode, Spark SQL has two kinds of keywords:
 * Reserved keywords: Keywords that are reserved and can't be used as identifiers for table, view, column, function, alias, etc.
-* Non-reserved keywords: Keywords that have a special meaning only in particular contexts and can be used as identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK can be used as identifiers in other places.
+* Non-reserved keywords: Keywords that have a special meaning only in particular contexts and can be used as identifiers in other contexts. For example, `EXPLAIN SELECT ...` is a command, but EXPLAIN can be used as identifiers in other places.
 
-When `spark.sql.parser.ansi.enabled` is false, Spark SQL has two kinds of keywords:
-* Non-reserved keywords: Same definition as the one when `spark.sql.parser.ansi.enabled=true`.
+When the ANSI mode is disabled, Spark SQL has two kinds of keywords:
+* Non-reserved keywords: Same definition as the one when the ANSI mode enabled.
 * Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias.
 
-By default `spark.sql.parser.ansi.enabled` is false.
+By default `spark.sql.ansi.enabled` is false.
 
 Below is a list of all the keywords in Spark SQL.
 
@@ -87,7 +209,6 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>DATABASE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DATABASES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DAY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>DAYS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DBPROPERTIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DEFINED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>DELETE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -103,6 +224,7 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>DROP</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ELSE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>END</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
+  <tr><td>ESCAPE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>ESCAPED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>EXCEPT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>EXCHANGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -115,9 +237,9 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>FALSE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FETCH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FIELDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>FILTER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FILEFORMAT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>FIRST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>FIRST_VALUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FOLLOWING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>FOR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>FOREIGN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -133,7 +255,6 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>GROUPING</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>HAVING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>HOUR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>HOURS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>IF</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>IGNORE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>IMPORT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -152,7 +273,6 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>JOIN</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>KEYS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LAST</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>LAST_VALUE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LATERAL</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>LAZY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>LEADING</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -169,16 +289,14 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>LOGICAL</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MACRO</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MAP</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MICROSECOND</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MICROSECONDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MILLISECOND</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>MILLISECONDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MATCHED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>MERGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MINUS</td><td>reserved</td><td>strict-non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MINUTE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>MINUTES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MONTH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>MONTHS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MSCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>NAMESPACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>NAMESPACES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>NATURAL</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>NO</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NOT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -208,6 +326,7 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>PRECEDING</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>PRIMARY</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>PRINCIPALS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>PROPERTIES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>PURGE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>QUERY</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>RANGE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -221,7 +340,6 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>REPAIR</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>REPLACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>RESET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>RESPECT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>RESTRICT</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>REVOKE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>RIGHT</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
@@ -234,7 +352,6 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>ROWS</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SCHEMA</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SECOND</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>SECONDS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SELECT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>SEMI</td><td>reserved</td><td>strict-non-reserved</td><td>non-reserved</td></tr>
   <tr><td>SEPARATED</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
@@ -279,17 +396,15 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>UNKNOWN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>UNLOCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>UNSET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>UPDATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>USE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>USER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>USING</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>VALUES</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>VIEW</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>WEEK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
-  <tr><td>WEEKS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>WHEN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>WHERE</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>WINDOW</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>WITH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>YEAR</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
-  <tr><td>YEARS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
 </table>
diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md
index 0add62b10ed6b..9700608fe8a34 100644
--- a/docs/sql-ref-datatypes.md
+++ b/docs/sql-ref-datatypes.md
@@ -631,4 +631,79 @@ from pyspark.sql.types import *
 </tr>
 </table>
 </div>
+
+<div data-lang="sql"  markdown="1">
+
+The following table shows the type names as well as aliases used in Spark SQL parser for each data type.
+
+<table class="table">
+<tr>
+  <th style="width:40%">Data type</th>
+  <th>SQL name</th></tr>
+<tr>
+  <td> <b>BooleanType</b> </td>
+  <td> BOOLEAN </td>
+</tr>
+<tr>
+  <td> <b>ByteType</b> </td>
+  <td> BYTE, TINYINT </td>
+</tr>
+<tr>
+  <td> <b>ShortType</b> </td>
+  <td> SHORT, SMALLINT </td>
+</tr>
+<tr>
+  <td> <b>IntegerType</b> </td>
+  <td> INT, INTEGER </td>
+</tr>
+<tr>
+  <td> <b>LongType</b> </td>
+  <td> LONG, BIGINT </td>
+</tr>
+<tr>
+  <td> <b>FloatType</b> </td>
+  <td> FLOAT, REAL </td>
+</tr>
+<tr>
+  <td> <b>DoubleType</b> </td>
+  <td> DOUBLE </td>
+</tr>
+<tr>
+  <td> <b>DateType</b> </td>
+  <td> DATE </td>
+</tr>
+<tr>
+  <td> <b>TimestampType</b> </td>
+  <td> TIMESTAMP </td>
+</tr>
+<tr>
+  <td> <b>StringType</b> </td>
+  <td> STRING </td>
+</tr>
+<tr>
+  <td> <b>BinaryType</b> </td>
+  <td> BINARY </td>
+</tr>
+<tr>
+  <td> <b>DecimalType</b> </td>
+  <td> DECIMAL, DEC, NUMERIC </td>
+</tr>
+<tr>
+  <td> <b>CalendarIntervalType</b> </td>
+  <td> INTERVAL </td>
+</tr>
+<tr>
+  <td> <b>ArrayType</b> </td>
+  <td> ARRAY&lt;element_type&gt; </td>
+</tr>
+<tr>
+  <td> <b>StructType</b> </td>
+  <td> STRUCT&lt;field1_name: field1_type, field2_name: field2_type, ...&gt; </td>
+</tr>
+<tr>
+  <td> <b>MapType</b> </td>
+  <td> MAP&lt;key_type, value_type&gt; </td>
+</tr>
+</table>
+</div>
 </div>
diff --git a/docs/sql-ref-functions-builtin.md b/docs/sql-ref-functions-builtin.md
index eb0e73d00e848..6c57b0d6fdf6f 100644
--- a/docs/sql-ref-functions-builtin.md
+++ b/docs/sql-ref-functions-builtin.md
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
+Spark SQL is Apache Spark's module for working with structured data.
 This guide is a reference for Structured Query Language (SQL) for Apache 
 Spark. This document describes the SQL constructs supported by Spark in detail
 along with usage examples when applicable.
diff --git a/docs/sql-ref-functions-udf.md b/docs/sql-ref-functions-udf.md
index eb0e73d00e848..6c57b0d6fdf6f 100644
--- a/docs/sql-ref-functions-udf.md
+++ b/docs/sql-ref-functions-udf.md
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
+Spark SQL is Apache Spark's module for working with structured data.
 This guide is a reference for Structured Query Language (SQL) for Apache 
 Spark. This document describes the SQL constructs supported by Spark in detail
 along with usage examples when applicable.
diff --git a/docs/sql-ref-functions.md b/docs/sql-ref-functions.md
index eb0e73d00e848..6c57b0d6fdf6f 100644
--- a/docs/sql-ref-functions.md
+++ b/docs/sql-ref-functions.md
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
+Spark SQL is Apache Spark's module for working with structured data.
 This guide is a reference for Structured Query Language (SQL) for Apache 
 Spark. This document describes the SQL constructs supported by Spark in detail
 along with usage examples when applicable.
diff --git a/docs/sql-ref-null-semantics.md b/docs/sql-ref-null-semantics.md
new file mode 100644
index 0000000000000..3cbc15c600cee
--- /dev/null
+++ b/docs/sql-ref-null-semantics.md
@@ -0,0 +1,708 @@
+---
+layout: global
+title: NULL Semantics
+displayTitle: NULL Semantics
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+A table consists of a set of rows and each row contains a set of columns.
+A column is associated with a data type and represents
+a specific attribute of an entity (for example, `age` is a column of an
+entity called `person`). Sometimes, the value of a column
+specific to a row is not known at the time the row comes into existence.
+In `SQL`, such values are represented as `NULL`. This section details the
+semantics of `NULL` values handling in various operators, expressions and
+other `SQL` constructs.
+
+1. [Null handling in comparison operators](#comp-operators)
+2. [Null handling in Logical operators](#logical-operators)
+3. [Null handling in Expressions](#expressions)
+      1. [Null handling in null-intolerant expressions](#null-intolerant)
+      2. [Null handling Expressions that can process null value operands](#can-process-null)
+      3. [Null handling in built-in aggregate expressions](#built-in-aggregate)
+4. [Null handling in WHERE, HAVING and JOIN conditions](#condition-expressions)
+5. [Null handling in GROUP BY and DISTINCT](#aggregate-operator)
+6. [Null handling in ORDER BY](#order-by)
+7. [Null handling in UNION, INTERSECT, EXCEPT](#set-operators)
+8. [Null handling in EXISTS and NOT EXISTS subquery](#exists-not-exists)
+9. [Null handling in IN and NOT IN subquery](#in-not-in)
+
+<style type="text/css">
+.tsclass {font-size:12px;color:#333333;width:40%;border-width: 2px;border-color: #729ea5;border-collapse: collapse;}
+.tsclass th {text-align: left;}
+</style>
+
+The following illustrates the schema layout and data of a table named `person`. The data contains `NULL` values in
+the `age` column and this table will be used in various examples in the sections below.
+**<u>TABLE: person</u>**
+<table class="tsclass" border="1">
+<tr><th>Id</th><th>Name</th><th>Age</th></tr>
+<tr><td>100</td><td>Joe</td><td>30</td></tr>
+<tr><td>200</td><td>Marry</td><td>NULL</td></tr>
+<tr><td>300</td><td>Mike</td><td>18</td></tr>
+<tr><td>400</td><td>Fred</td><td>50</td></tr>
+<tr><td>500</td><td>Albert</td><td>NULL</td></tr>
+<tr><td>600</td><td>Michelle</td><td>30</td></tr>
+<tr><td>700</td><td>Dan</td><td>50</td></tr>
+</table>
+
+### Comparison operators <a name="comp-operators"></a>
+
+Apache spark supports the standard comparison operators such as '>', '>=', '=', '<' and '<='.
+The result of these operators is unknown or `NULL` when one of the operands or both the operands are
+unknown or `NULL`. In order to compare the `NULL` values for equality, Spark provides a null-safe
+equal operator ('<=>'), which returns `False` when one of the operand is `NULL` and returns 'True` when
+both the operands are `NULL`. The following table illustrates the behaviour of comparison operators when
+one or both operands are `NULL`:
+
+<table class="tsclass" border="1">
+  <tr>
+    <th>Left Operand</th>
+    <th>Right  Operand</th>
+    <th>></th>
+    <th>>=</th>
+    <th>=</th>
+    <th><</th>
+    <th><=</th>
+    <th><=></th>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>Any value</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>False</td>
+  </tr>
+  <tr>
+    <td>Any value</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>False</td>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>True</td>
+  </tr>
+</table>
+
+### Examples
+{% highlight sql %}
+-- Normal comparison operators return `NULL` when one of the operand is `NULL`.
+SELECT 5 > null AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+-- Normal comparison operators return `NULL` when both the operands are `NULL`.
+SELECT null = null AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+-- Null-safe equal operator return `False` when one of the operand is `NULL`
+SELECT 5 <=> null AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |false            |
+  +-----------------+
+
+-- Null-safe equal operator return `True` when one of the operand is `NULL`
+SELECT NULL <=> NULL;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |true             |
+  +-----------------+
+{% endhighlight %}
+
+### Logical operators <a name="logical-operators"></a>
+Spark supports standard logical operators such as `AND`, `OR` and `NOT`. These operators take `Boolean` expressions
+as the arguments and return a `Boolean` value.  
+
+The following tables illustrate the behavior of logical operators when one or both operands are `NULL`.
+
+<table class="tsclass" border="1">
+  <tr>
+    <th>Left Operand</th>
+    <th>Right Operand</th>
+    <th>OR</th>
+    <th>AND</th>
+  </tr>
+  <tr>
+    <td>True</td>
+    <td>NULL</td>
+    <td>True</td>
+    <td>NULL</td>
+  </tr>
+  <tr>
+    <td>False</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>False</td>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>True</td>
+    <td>True</td>
+    <td>NULL</td>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>False</td>
+    <td>NULL</td>
+    <td>NULL</td>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+  </tr>
+</table>
+<br>
+<table class="tsclass" border="1">
+  <tr>
+    <th>operand</th>
+    <th>NOT</th>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>NULL</td>
+  </tr>
+</table>
+
+### Examples
+{% highlight sql %}
+-- Normal comparison operators return `NULL` when one of the operands is `NULL`.
+SELECT (true OR null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |true             |
+  +-----------------+
+
+-- Normal comparison operators return `NULL` when both the operands are `NULL`.
+SELECT (null OR false) AS expression_output
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+-- Null-safe equal operator returns `False` when one of the operands is `NULL`
+SELECT NOT(null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+{% endhighlight %}
+
+### Expressions <a name="expressions"></a>
+The comparison operators and logical operators are treated as expressions in
+Spark. Other than these two kinds of expressions, Spark supports other form of
+expressions such as function expressions, cast expressions, etc. The expressions
+in Spark can be broadly classified as :
+- Null intolerant expressions
+- Expressions that can process `NULL` value operands
+  - The result of these expressions depends on the expression itself.
+
+#### Null intolerant expressions <a name="null-intolerant"></a>
+Null intolerant expressions return `NULL` when one or more arguments of 
+expression are `NULL` and most of the expressions fall in this category.
+
+##### Examples
+{% highlight sql %}
+SELECT concat('John', null) as expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+SELECT positive(null) as expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+SELECT to_date(null) as expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+{% endhighlight %}
+
+#### Expressions that can process null value operands. <a name="can-process-null"></a>
+
+This class of expressions are designed to handle `NULL` values. The result of the 
+expressions depends on the expression itself. As an example, function expression `isnull`
+returns a `true` on null input and `false` on non null input where as function `coalesce`
+returns the first non `NULL` value in its list of operands. However, `coalesce` returns
+`NULL` when all its operands are `NULL`. Below is an incomplete list of expressions of this category.
+  - COALESCE
+  - NULLIF
+  - IFNULL
+  - NVL
+  - NVL2
+  - ISNAN
+  - NANVL
+  - ISNULL
+  - ISNOTNULL
+  - ATLEASTNNONNULLS 
+  - IN
+
+
+##### Examples
+{% highlight sql %}
+SELECT isnull(null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |true             |
+  +-----------------+
+
+-- Returns the first occurrence of non `NULL` value.
+SELECT coalesce(null, null, 3, null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |3                |
+  +-----------------+
+
+-- Returns `NULL` as all its operands are `NULL`. 
+SELECT coalesce(null, null, null, null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+SELECT isnan(null) as expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |false            |
+  +-----------------+
+{% endhighlight %}
+
+#### Builtin Aggregate Expressions <a name="built-in-aggregate"></a>
+Aggregate functions compute a single result by processing a set of input rows. Below are
+the rules of how `NULL` values are handled by aggregate functions.
+- `NULL` values are ignored from processing by all the aggregate functions.
+  - Only exception to this rule is COUNT(*) function.
+- Some aggregate functions return `NULL` when all input values are `NULL` or the input data set
+  is empty.<br> The list of these functions is:
+  - MAX
+  - MIN
+  - SUM
+  - AVG
+  - EVERY
+  - ANY
+  - SOME
+   
+#### Examples
+{% highlight sql %}
+-- `count(*)` does not skip `NULL` values.
+SELECT count(*) FROM person;
+  +--------+
+  |count(1)|
+  +--------+
+  |7       |
+  +--------+
+
+-- `NULL` values in column `age` are skipped from processing.
+SELECT count(age) FROM person;
+  +----------+
+  |count(age)|
+  +----------+
+  |5         |
+  +----------+
+
+-- `count(*)` on an empty input set returns 0. This is unlike the other
+-- aggregate functions, such as `max`, which return `NULL`.
+SELECT count(*) FROM person where 1 = 0;
+  +--------+
+  |count(1)|
+  +--------+
+  |0       |
+  +--------+
+
+-- `NULL` values are excluded from computation of maximum value.
+SELECT max(age) FROM person;
+  +--------+
+  |max(age)|
+  +--------+
+  |50      |
+  +--------+
+
+-- `max` returns `NULL` on an empty input set.
+SELECT max(age) FROM person where 1 = 0;
+  +--------+
+  |max(age)|
+  +--------+
+  |null    |
+  +--------+
+
+{% endhighlight %}
+
+### Condition expressions in WHERE, HAVING and JOIN clauses. <a name="condition-expressions"></a>
+`WHERE`, `HAVING` operators filter rows based on the user specified condition.
+A `JOIN` operator is used to combine rows from two tables based on a join condition.
+For all the three operators, a condition expression is a boolean expression and can return
+ <code>True, False or Unknown (NULL)</code>. They are "satisfied" if the result of the condition is `True`.
+
+#### Examples
+{% highlight sql %}
+-- Persons whose age is unknown (`NULL`) are filtered out from the result set.
+SELECT * FROM person WHERE age > 0;
+  +--------+---+
+  |name    |age|
+  +--------+---+
+  |Michelle|30 |
+  |Fred    |50 |
+  |Mike    |18 |
+  |Dan     |50 |
+  |Joe     |30 |
+  +--------+---+
+
+-- `IS NULL` expression is used in disjunction to select the persons
+-- with unknown (`NULL`) records.
+SELECT * FROM person WHERE age > 0 OR age IS NULL;
+  +--------+----+
+  |name    |age |
+  +--------+----+
+  |Albert  |null|
+  |Michelle|30  |
+  |Fred    |50  |
+  |Mike    |18  |
+  |Dan     |50  |
+  |Marry   |null|
+  |Joe     |30  |
+  +--------+----+
+
+-- Person with unknown(`NULL`) ages are skipped from processing.
+SELECT * FROM person GROUP BY age HAVING max(age) > 18;
+  +---+--------+                                                                  
+  |age|count(1)|
+  +---+--------+
+  |50 |2       |
+  |30 |2       |
+  +---+--------+
+
+-- A self join case with a join condition `p1.age = p2.age AND p1.name = p2.name`.
+-- The persons with unknown age (`NULL`) are filtered out by the join operator.
+SELECT * FROM person p1, person p2
+WHERE p1.age = p2.age
+  AND p1.name = p2.name;
+  +--------+---+--------+---+
+  |name    |age|name    |age|
+  +--------+---+--------+---+
+  |Michelle|30 |Michelle|30 |
+  |Fred    |50 |Fred    |50 |
+  |Mike    |18 |Mike    |18 |
+  |Dan     |50 |Dan     |50 |
+  |Joe     |30 |Joe     |30 |
+  +--------+---+--------+---+
+
+-- The age column from both legs of join are compared using null-safe equal which
+-- is why the persons with unknown age (`NULL`) are qualified by the join.
+SELECT * FROM person p1, person p2
+WHERE p1.age <=> p2.age
+  AND p1.name = p2.name;
++--------+----+--------+----+
+|    name| age|    name| age|
++--------+----+--------+----+
+|  Albert|null|  Albert|null|
+|Michelle|  30|Michelle|  30|
+|    Fred|  50|    Fred|  50|
+|    Mike|  18|    Mike|  18|
+|     Dan|  50|     Dan|  50|
+|   Marry|null|   Marry|null|
+|     Joe|  30|     Joe|  30|
++--------+----+--------+----+
+
+{% endhighlight %}
+
+### Aggregate operator (GROUP BY, DISTINCT) <a name="aggregate-operator"></a>
+As discussed in the previous section [comparison operator](sql-ref-null-semantics.html#comparison-operators),
+two `NULL` values are not equal. However, for the purpose of grouping and distinct processing, the two or more
+values with `NULL data`are grouped together into the same bucket. This behaviour is conformant with SQL
+standard and with other enterprise database management systems.
+
+#### Examples
+{% highlight sql %}
+-- `NULL` values are put in one bucket in `GROUP BY` processing.
+SELECT age, count(*) FROM person GROUP BY age;
+  +----+--------+                                                                 
+  |age |count(1)|
+  +----+--------+
+  |null|2       |
+  |50  |2       |
+  |30  |2       |
+  |18  |1       |
+  +----+--------+
+
+-- All `NULL` ages are considered one distinct value in `DISTINCT` processing.
+SELECT DISTINCT age FROM person;
+  +----+
+  |age |
+  +----+
+  |null|
+  |50  |
+  |30  |
+  |18  |
+  +----+
+
+{% endhighlight %}
+
+### Sort operator (ORDER BY Clause) <a name="order-by"></a>
+Spark SQL supports null ordering specification in `ORDER BY` clause. Spark processes the `ORDER BY` clause by
+placing all the `NULL` values at first or at last depending on the null ordering specification. By default, all
+the `NULL` values are placed at first.
+
+#### Examples
+{% highlight sql %}
+-- `NULL` values are shown at first and other values
+-- are sorted in ascending way.
+SELECT age, name FROM person ORDER BY age;
+  +----+--------+
+  |age |name    |
+  +----+--------+
+  |null|Marry   |
+  |null|Albert  |
+  |18  |Mike    |
+  |30  |Michelle|
+  |30  |Joe     |
+  |50  |Fred    |
+  |50  |Dan     |
+  +----+--------+
+
+-- Column values other than `NULL` are sorted in ascending
+-- way and `NULL` values are shown at the last.
+SELECT age, name FROM person ORDER BY age NULLS LAST;
+  +----+--------+
+  |age |name    |
+  +----+--------+
+  |18  |Mike    |
+  |30  |Michelle|
+  |30  |Joe     |
+  |50  |Dan     |
+  |50  |Fred    |
+  |null|Marry   |
+  |null|Albert  |
+  +----+--------+
+
+-- Columns other than `NULL` values are sorted in descending
+-- and `NULL` values are shown at the last.
+SELECT age, name FROM person ORDER BY age DESC NULLS LAST;
+  +----+--------+
+  |age |name    |
+  +----+--------+
+  |50  |Fred    |
+  |50  |Dan     |
+  |30  |Michelle|
+  |30  |Joe     |
+  |18  |Mike    |
+  |null|Marry   |
+  |null|Albert  |
+  +----+--------+
+{% endhighlight %}
+
+### Set operators (UNION, INTERSECT, EXCEPT) <a name="set-operators"></a>
+`NULL` values are compared in a null-safe manner for equality in the context of
+set operations. That means when comparing rows, two `NULL` values are considered 
+equal unlike the regular `EqualTo`(`=`) operator.
+
+#### Examples
+{% highlight sql %}
+CREATE VIEW unknown_age SELECT * FROM person WHERE age IS NULL;
+
+-- Only common rows between two legs of `INTERSECT` are in the 
+-- result set. The comparison between columns of the row are done
+-- in a null-safe manner.
+SELECT name, age FROM person
+INTERSECT
+SELECT name, age from unknown_age;
+  +------+----+                                                                   
+  |name  |age |
+  +------+----+
+  |Albert|null|
+  |Marry |null|
+  +------+----+
+
+-- `NULL` values from two legs of the `EXCEPT` are not in output. 
+-- This basically shows that the comparison happens in a null-safe manner.
+SELECT age, name FROM person
+EXCEPT
+SELECT age FROM unknown_age;
+  +---+--------+                                                                  
+  |age|name    |
+  +---+--------+
+  |30 |Joe     |
+  |50 |Fred    |
+  |30 |Michelle|
+  |18 |Mike    |
+  |50 |Dan     |
+  +---+--------+
+
+-- Performs `UNION` operation between two sets of data. 
+-- The comparison between columns of the row ae done in
+-- null-safe manner.
+SELECT name, age FROM person
+UNION 
+SELECT name, age FROM unknown_age;
+  +--------+----+                                                                 
+  |name    |age |
+  +--------+----+
+  |Albert  |null|
+  |Joe     |30  |
+  |Michelle|30  |
+  |Marry   |null|
+  |Fred    |50  |
+  |Mike    |18  |
+  |Dan     |50  |
+  +--------+----+
+{% endhighlight %}
+
+
+### EXISTS/NOT EXISTS Subquery <a name="exists-not-exists"></a>
+In Spark, EXISTS and NOT EXISTS expressions are allowed inside a WHERE clause. 
+These are boolean expressions which return either `TRUE` or
+`FALSE`. In other words, EXISTS is a membership condition and returns `TRUE`
+when the subquery it refers to returns one or more rows. Similary, NOT EXISTS
+is a non-membership condition and returns TRUE when no rows or zero rows are
+returned from the subquery.
+
+These two expressions are not affected by presence of NULL in the result of
+the subquery. They are normally faster because they can be converted to
+semijoins / anti-semijoins without special provisions for null awareness.
+
+#### Examples
+{% highlight sql %}
+-- Even if subquery produces rows with `NULL` values, the `EXISTS` expression
+-- evaluates to `TRUE` as the subquery produces 1 row.
+SELECT * FROM person WHERE EXISTS (SELECT null);
+  +--------+----+                                                                 
+  |name    |age |
+  +--------+----+
+  |Albert  |null|
+  |Michelle|30  |
+  |Fred    |50  |
+  |Mike    |18  |
+  |Dan     |50  |
+  |Marry   |null|
+  |Joe     |30  |
+  +--------+----+
+
+-- `NOT EXISTS` expression returns `FALSE`. It returns `TRUE` only when
+-- subquery produces no rows. In this case, it returns 1 row.
+SELECT * FROM person WHERE NOT EXISTS (SELECT null);
+  +----+---+
+  |name|age|
+  +----+---+
+  +----+---+
+
+-- `NOT EXISTS` expression returns `TRUE`.
+SELECT * FROM person WHERE NOT EXISTS (SELECT 1 WHERE 1 = 0);
+  +--------+----+
+  |name    |age |
+  +--------+----+
+  |Albert  |null|
+  |Michelle|30  |
+  |Fred    |50  |
+  |Mike    |18  |
+  |Dan     |50  |
+  |Marry   |null|
+  |Joe     |30  |
+  +--------+----+
+{% endhighlight %}
+
+### IN/NOT IN Subquery <a name="in-not-in"></a>
+In Spark, `IN` and `NOT IN` expressions are allowed inside a WHERE clause of
+a query. Unlike the `EXISTS` expression, `IN` expression can return a `TRUE`,
+`FALSE` or `UNKNOWN (NULL)` value. Conceptually a `IN` expression is semantically
+equivalent to a set of equality condition separated by a disjunctive operator (`OR`).
+For example, c1 IN (1, 2, 3) is semantically equivalent to `(C1 = 1 OR c1 = 2 OR c1 = 3)`.
+
+As far as handling `NULL` values are concerned, the semantics can be deduced from
+the `NULL` value handling in comparison operators(`=`) and logical operators(`OR`).
+To summarize, below are the rules for computing the result of an `IN` expression.
+
+- TRUE is returned when the non-NULL value in question is found in the list
+- FALSE is returned when the non-NULL value is not found in the list and the
+  list does not contain NULL values
+- UNKNOWN is returned when the value is `NULL`, or the non-NULL value is not found in the list
+  and the list contains at least one `NULL` value
+
+NOT IN always returns UNKNOWN when the list contains `NULL`, regardless of the input value.
+This is because IN returns UNKNOWN if the value is not in the list containing `NULL`,
+and because NOT UNKNOWN is again UNKNOWN.
+ 
+#### Examples
+{% highlight sql %}
+-- The subquery has only `NULL` value in its result set. Therefore,
+-- the result of `IN` predicate is UNKNOWN.
+SELECT * FROM person WHERE age IN (SELECT null);
+  +----+---+
+  |name|age|
+  +----+---+
+  +----+---+
+
+-- The subquery has `NULL` value in the result set as well as a valid 
+-- value `50`. Rows with age = 50 are returned. 
+SELECT * FROM person
+WHERE age IN (SELECT age FROM VALUES (50), (null) sub(age));
+  +----+---+
+  |name|age|
+  +----+---+
+  |Fred|50 |
+  |Dan |50 |
+  +----+---+
+
+-- Since subquery has `NULL` value in the result set, the `NOT IN`
+-- predicate would return UNKNOWN. Hence, no rows are
+-- qualified for this query.
+SELECT * FROM person
+WHERE age NOT IN (SELECT age FROM VALUES (50), (null) sub(age));
+  +----+---+
+  |name|age|
+  +----+---+
+  +----+---+
+
+{% endhighlight %}
diff --git a/docs/sql-ref-syntax-aux-analyze-table.md b/docs/sql-ref-syntax-aux-analyze-table.md
index 887f225f89bcc..40513e836b026 100644
--- a/docs/sql-ref-syntax-aux-analyze-table.md
+++ b/docs/sql-ref-syntax-aux-analyze-table.md
@@ -25,20 +25,33 @@ The `ANALYZE TABLE` statement collects statistics about the table to be used by
 
 ### Syntax
 {% highlight sql %}
-ANALYZE TABLE table_name [ PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] ) ]
+ANALYZE TABLE table_identifier [ partition_spec ]
     COMPUTE STATISTICS [ NOSCAN | FOR COLUMNS col [ , ... ] | FOR ALL COLUMNS ]
 
 {% endhighlight %}
 
 ### Parameters
 <dl>
-  <dt><code><em>table_name</em></code></dt>
-  <dd>The name of an existing table.</dd>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
 </dl>
 
 <dl>
-  <dt><code><em>PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )</em></code></dt>
-  <dd>Specifies one or more partition column and value pairs. The partition value is optional.</dd>
+  <dt><code><em>partition_spec</em></code></dt>
+  <dd>
+    An optional parameter that specifies a comma separated list of key and value pairs
+    for partitions. When specified, partition statistics is returned.<br><br>
+    <b>Syntax:</b>
+      <code>
+        PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )
+      </code>
+  </dd>
 </dl>
 
 <dl>
@@ -93,4 +106,4 @@ ANALYZE TABLE table_name [ PARTITION ( partition_col_name [ = partition_col_val
      max_col_len	13
      histogram	NULL
 
-{% endhighlight %}
\ No newline at end of file
+{% endhighlight %}
diff --git a/docs/sql-ref-syntax-aux-analyze.md b/docs/sql-ref-syntax-aux-analyze.md
index 2d19d18b42527..b1bdc73657724 100644
--- a/docs/sql-ref-syntax-aux-analyze.md
+++ b/docs/sql-ref-syntax-aux-analyze.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: ANALYZE
-displayTitle: ANALYZE
+title: Analyze Statement
+displayTitle: Analyze Statement
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
diff --git a/docs/sql-ref-syntax-aux-cache-cache-table.md b/docs/sql-ref-syntax-aux-cache-cache-table.md
index 224b6631618fb..20ade1961ab0b 100644
--- a/docs/sql-ref-syntax-aux-cache-cache-table.md
+++ b/docs/sql-ref-syntax-aux-cache-cache-table.md
@@ -20,11 +20,12 @@ license: |
 ---
 
 ### Description
-`CACHE TABLE` statement caches contents of a table or output of a query with the given storage level. This reduces scanning of the original files in future queries.
+`CACHE TABLE` statement caches contents of a table or output of a query with the given storage level. If a query is cached, then a temp view will be created for this query.
+This reduces scanning of the original files in future queries. 
 
 ### Syntax
 {% highlight sql %}
-CACHE [ LAZY ] TABLE table_name
+CACHE [ LAZY ] TABLE table_identifier
     [ OPTIONS ( 'storageLevel' [ = ] value ) ] [ [ AS ] query ]
 {% endhighlight %}
 
@@ -35,8 +36,14 @@ CACHE [ LAZY ] TABLE table_name
 </dl>
 
 <dl>
-  <dt><code><em>table_name</em></code></dt>
-  <dd>The name of the table to be cached.</dd>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies the table or view name to be cached. The table or view name may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
 </dl>
 
 <dl>
@@ -80,4 +87,5 @@ CACHE TABLE testCache OPTIONS ('storageLevel' 'DISK_ONLY') SELECT * FROM testDat
 ### Related Statements
   * [CLEAR CACHE](sql-ref-syntax-aux-cache-clear-cache.html)
   * [UNCACHE TABLE](sql-ref-syntax-aux-cache-uncache-table.html)
+  * [REFRESH TABLE](sql-ref-syntax-aux-refresh-table.html)
 
diff --git a/docs/sql-ref-syntax-aux-cache-clear-cache.md b/docs/sql-ref-syntax-aux-cache-clear-cache.md
index 88d126f0f528e..57ba77d59ea7b 100644
--- a/docs/sql-ref-syntax-aux-cache-clear-cache.md
+++ b/docs/sql-ref-syntax-aux-cache-clear-cache.md
@@ -19,4 +19,21 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`CLEAR CACHE` removes the entries and associated data from the in-memory and/or on-disk cache for all cached tables and views.
+
+### Syntax
+{% highlight sql %}
+CLEAR CACHE
+{% endhighlight %}
+
+### Examples
+{% highlight sql %}
+CLEAR CACHE;
+{% endhighlight %}
+
+### Related Statements
+ * [CACHE TABLE](sql-ref-syntax-aux-cache-cache-table.html)
+ * [UNCACHE TABLE](sql-ref-syntax-aux-cache-uncache-table.html)
+ * [REFRESH TABLE](sql-ref-syntax-aux-refresh-table.html)
+
diff --git a/docs/sql-ref-syntax-aux-cache-refresh.md b/docs/sql-ref-syntax-aux-cache-refresh.md
new file mode 100644
index 0000000000000..4c56893aeca98
--- /dev/null
+++ b/docs/sql-ref-syntax-aux-cache-refresh.md
@@ -0,0 +1,54 @@
+---
+layout: global
+title: REFRESH
+displayTitle: REFRESH
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+`REFRESH` is used to invalidate and refresh all the cached data (and the associated metadata) for
+all Datasets that contains the given data source path. Path matching is by prefix, i.e. "/" would
+invalidate everything that is cached. 
+
+### Syntax
+{% highlight sql %}
+REFRESH resource_path
+{% endhighlight %}
+
+### Parameters
+<dl>
+ <dt><code><em>resource_path</em></code></dt>
+ <dd>The path of the resource that is to be refreshed.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+ -- The Path is resolved using the datasource's File Index.
+ 
+CREATE TABLE test(ID INT) using parquet;
+INSERT INTO test SELECT 1000;
+CACHE TABLE test;
+INSERT INTO test SELECT 100;
+REFRESH "hdfs://path/to/table";
+ 
+{% endhighlight %}
+
+### Related Statements
+- [CACHE TABLE](sql-ref-syntax-aux-cache-cache-table.html)
+- [CLEAR CACHE](sql-ref-syntax-aux-cache-clear-cache.html)
+- [UNCACHE TABLE](sql-ref-syntax-aux-cache-uncache-table.html)
+- [REFRESH TABLE](sql-ref-syntax-aux-refresh-table.html)
diff --git a/docs/sql-ref-syntax-aux-cache-uncache-table.md b/docs/sql-ref-syntax-aux-cache-uncache-table.md
index 69819fee088da..69e21c258a333 100644
--- a/docs/sql-ref-syntax-aux-cache-uncache-table.md
+++ b/docs/sql-ref-syntax-aux-cache-uncache-table.md
@@ -20,21 +20,32 @@ license: |
 ---
 
 ### Description
-`UNCACHE TABLE` removes the entries and associated data from the in-memory and/or on-disk cache for a given table. The
-underlying entries should already have been brought to cache by previous `CACHE TABLE` operation. `UNCACHE TABLE` on a non-existent table throws Exception if `IF EXISTS` is not specified.
+`UNCACHE TABLE` removes the entries and associated data from the in-memory and/or on-disk cache for a given table or view. The
+underlying entries should already have been brought to cache by previous `CACHE TABLE` operation. `UNCACHE TABLE` on a non-existent table throws an exception if `IF EXISTS` is not specified.
+
 ### Syntax
 {% highlight sql %}
-UNCACHE TABLE [ IF EXISTS ] table_name
+UNCACHE TABLE [ IF EXISTS ] table_identifier
 {% endhighlight %}
+
 ### Parameters
 <dl>
- <dt><code><em>table_name</em></code></dt>
- <dd>The name of the table to be uncached.</dd>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies the table or view name to be uncached. The table or view name may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
 </dl>
+
 ### Examples
 {% highlight sql %}
 UNCACHE TABLE t1;
 {% endhighlight %}
+
 ### Related Statements
  * [CACHE TABLE](sql-ref-syntax-aux-cache-cache-table.html)
  * [CLEAR CACHE](sql-ref-syntax-aux-cache-clear-cache.html)
+ * [REFRESH TABLE](sql-ref-syntax-aux-refresh-table.html)
diff --git a/docs/sql-ref-syntax-aux-cache.md b/docs/sql-ref-syntax-aux-cache.md
index eb0e73d00e848..e4a640a6edbe5 100644
--- a/docs/sql-ref-syntax-aux-cache.md
+++ b/docs/sql-ref-syntax-aux-cache.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: Cache Statements
+displayTitle: Cache Statements
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+* [CACHE TABLE statement](sql-ref-syntax-aux-cache-cache-table.html)
+* [UNCACHE TABLE statement](sql-ref-syntax-aux-cache-uncache-table.html)
+* [CLEAR CACHE statement](sql-ref-syntax-aux-cache-clear-cache.html)
+* [REFRESH TABLE statement](sql-ref-syntax-aux-refresh-table.html)
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-reset.md b/docs/sql-ref-syntax-aux-conf-mgmt-reset.md
index ad2d7f9a83316..8ee61514ee4ef 100644
--- a/docs/sql-ref-syntax-aux-conf-mgmt-reset.md
+++ b/docs/sql-ref-syntax-aux-conf-mgmt-reset.md
@@ -19,4 +19,20 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+Reset all the properties specific to the current session to their default values. After RESET command, executing SET command will output empty.
+
+### Syntax
+{% highlight sql %}
+RESET
+{% endhighlight %}
+
+
+### Examples
+{% highlight sql %}
+-- Reset all the properties specific to the current session to their default values.
+RESET;
+{% endhighlight %}
+
+### Related Statements
+- [SET](sql-ref-syntax-aux-conf-mgmt-set.html)
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-set.md b/docs/sql-ref-syntax-aux-conf-mgmt-set.md
index c38d68dbb4f1d..f05dde3f567ee 100644
--- a/docs/sql-ref-syntax-aux-conf-mgmt-set.md
+++ b/docs/sql-ref-syntax-aux-conf-mgmt-set.md
@@ -19,4 +19,51 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+The SET command sets a property, returns the value of an existing property or returns all SQLConf properties with value and meaning.
+
+### Syntax
+{% highlight sql %}
+SET
+SET [ -v ]
+SET property_key[ = property_value ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>-v</em></code></dt>
+  <dd>Outputs the key, value and meaning of existing SQLConf properties.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>property_key</em></code></dt>
+  <dd>Returns the value of specified property key.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>property_key=property_value</em></code></dt>
+  <dd>Sets the value for a given property key. If an old value exists for a given property key, then it gets overridden by the new value.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- Set a property.
+SET  spark.sql.variable.substitute=false;
+
+-- List all SQLConf properties with value and meaning.
+SET -v;
+
+-- List all SQLConf properties with value for current session.
+SET;
+
+-- List the value of specified property key.
+SET  spark.sql.variable.substitute;
+    +--------------------------------+--------+
+    |              key               | value  |
+    +--------------------------------+--------+
+    | spark.sql.variable.substitute  | false  |
+    +--------------------------------+--------+
+{% endhighlight %}
+
+### Related Statements
+- [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html)
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt.md b/docs/sql-ref-syntax-aux-conf-mgmt.md
index eb0e73d00e848..7c5d9cc895c10 100644
--- a/docs/sql-ref-syntax-aux-conf-mgmt.md
+++ b/docs/sql-ref-syntax-aux-conf-mgmt.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: Configuration Management
+displayTitle: Configuration Management
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,7 +19,5 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+* [SET](sql-ref-syntax-aux-conf-mgmt-set.html)
+* [UNSET](sql-ref-syntax-aux-conf-mgmt-reset.html)
diff --git a/docs/sql-ref-syntax-aux-describe-database.md b/docs/sql-ref-syntax-aux-describe-database.md
index c94383a82ef7e..05a64ab2060b4 100644
--- a/docs/sql-ref-syntax-aux-describe-database.md
+++ b/docs/sql-ref-syntax-aux-describe-database.md
@@ -27,14 +27,14 @@ interchangeable.
 
 ### Syntax
 {% highlight sql %}
-{DESC | DESCRIBE} DATABASE [EXTENDED] db_name
+{ DESC | DESCRIBE } DATABASE [ EXTENDED ] db_name
 {% endhighlight %}
 
 ### Parameters
 <dl>
   <dt><code><em>db_name</em></code></dt>
   <dd>
-    Specifies a name of an existing database or an existing schema in the syetem. If the name does not exist, an
+    Specifies a name of an existing database or an existing schema in the system. If the name does not exist, an
     exception is thrown.
   </dd>
 </dl>
diff --git a/docs/sql-ref-syntax-aux-describe-function.md b/docs/sql-ref-syntax-aux-describe-function.md
index d3dc1920642f6..f3c9c625b97b8 100644
--- a/docs/sql-ref-syntax-aux-describe-function.md
+++ b/docs/sql-ref-syntax-aux-describe-function.md
@@ -27,7 +27,7 @@ metadata information is returned along with the extended usage information.
 
 ### Syntax
 {% highlight sql %}
-{DESC | DESCRIBE} FUNCTION [EXTENDED] function_name
+{ DESC | DESCRIBE } FUNCTION [ EXTENDED ] function_name
 {% endhighlight %}
 
 ### Parameters
@@ -40,7 +40,7 @@ metadata information is returned along with the extended usage information.
     it is resolved from the current database.<br><br>
     <b>Syntax:</b>
       <code>
-        [database_name.]function_name
+        [ database_name. ] function_name
       </code>
   </dd>
 </dl>
diff --git a/docs/sql-ref-syntax-aux-describe-query.md b/docs/sql-ref-syntax-aux-describe-query.md
index f7c4f0da8de96..b07ebe78193d1 100644
--- a/docs/sql-ref-syntax-aux-describe-query.md
+++ b/docs/sql-ref-syntax-aux-describe-query.md
@@ -26,7 +26,7 @@ describe the query output.
 
 ### Syntax
 {% highlight sql %}
-{DESC | DESCRIBE} [QUERY] input_statement
+{ DESC | DESCRIBE } [ QUERY ] input_statement
 {% endhighlight %}
 
 ### Parameters
@@ -62,7 +62,7 @@ DESCRIBE QUERY select age, sum(age) FROM person GROUP BY age;
   |sum(age)|bigint   |null      |
   +--------+---------+----------+
 
--- Returns column metadata information for common table experession (`CTE`).
+-- Returns column metadata information for common table expression (`CTE`).
 DESCRIBE QUERY WITH all_names_cte
   AS (SELECT name from person) SELECT * FROM all_names_cte;
   +--------+---------+-------+
diff --git a/docs/sql-ref-syntax-aux-describe-table.md b/docs/sql-ref-syntax-aux-describe-table.md
index e2cb0e4f3cd7c..4e6aeb5b6f349 100644
--- a/docs/sql-ref-syntax-aux-describe-table.md
+++ b/docs/sql-ref-syntax-aux-describe-table.md
@@ -26,7 +26,7 @@ to return the metadata pertaining to a partition or column respectively.
 
 ### Syntax
 {% highlight sql %}
-{DESC | DESCRIBE} [TABLE] [format] table_identifier [partition_spec] [col_name]
+{ DESC | DESCRIBE } [ TABLE ] [ format ] table_identifier [ partition_spec ] [ col_name ]
 {% endhighlight %}
 
 ### Parameters
@@ -42,28 +42,28 @@ to return the metadata pertaining to a partition or column respectively.
     Specifies a table name, which may be optionally qualified with a database name.<br><br>
     <b>Syntax:</b>
       <code>
-        [database_name.]table_name
+        [ database_name. ] table_name
       </code>
   </dd>
   <dt><code><em>partition_spec</em></code></dt>
   <dd>
     An optional parameter that specifies a comma separated list of key and value pairs
-    for paritions. When specified, additional partition metadata is returned.<br><br>
+    for partitions. When specified, additional partition metadata is returned.<br><br>
     <b>Syntax:</b>
       <code>
-        PARTITION (partition_col_name  = partition_col_val [ , ... ])
+        PARTITION ( partition_col_name  = partition_col_val [ , ... ] )
       </code>
   </dd>  
   <dt><code><em>col_name</em></code></dt>
   <dd>
-    An optional paramter that specifies the column name that needs to be described.
+    An optional parameter that specifies the column name that needs to be described.
     The supplied column name may be optionally qualified. Parameters `partition_spec`
     and `col_name` are  mutually exclusive and can not be specified together. Currently
     nested columns are not allowed to be specified.<br><br>
     
     <b>Syntax:</b>
       <code>
-        [database_name.][table_name.]column_name
+        [ database_name. ] [ table_name. ] column_name
       </code>
    </dd>
 </dl>
diff --git a/docs/sql-ref-syntax-aux-describe.md b/docs/sql-ref-syntax-aux-describe.md
index eb0e73d00e848..9f17746316480 100644
--- a/docs/sql-ref-syntax-aux-describe.md
+++ b/docs/sql-ref-syntax-aux-describe.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: Describe Commands
+displayTitle: Describe Commands
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+* [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html)
+* [DESCRIBE TABLE](sql-ref-syntax-aux-describe-table.html)
+* [DESCRIBE FUNCTION](sql-ref-syntax-aux-describe-function.html)
+* [DESCRIBE QUERY](sql-ref-syntax-aux-describe-query.html)
diff --git a/docs/sql-ref-syntax-aux-refresh-table.md b/docs/sql-ref-syntax-aux-refresh-table.md
new file mode 100644
index 0000000000000..96d698a1d68b0
--- /dev/null
+++ b/docs/sql-ref-syntax-aux-refresh-table.md
@@ -0,0 +1,58 @@
+---
+layout: global
+title: REFRESH TABLE
+displayTitle: REFRESH TABLE
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+`REFRESH TABLE` statement invalidates the cached entries, which include data
+and metadata of the given table or view. The invalidated cache is populated in
+lazy manner when the cached table or the query associated with it is executed again.
+
+### Syntax
+{% highlight sql %}
+REFRESH [TABLE] table_identifier
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which is either a qualified or unqualified name that designates a table/view. If no database identifier is provided, it refers to a temporary view or a table/view in the current database.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- The cached entries of the table will be refreshed  
+-- The table is resolved from the current database as the table name is unqualified.
+REFRESH TABLE tbl1;
+
+-- The cached entries of the view will be refreshed or invalidated
+-- The view is resolved from tempDB database, as the view name is qualified.
+REFRESH TABLE tempDB.view1;   
+{% endhighlight %}
+
+### Related Statements
+- [CACHE TABLE](sql-ref-syntax-aux-cache-cache-table.html)
+- [CLEAR CACHE](sql-ref-syntax-aux-cache-clear-cache.html)
+- [UNCACHE TABLE](sql-ref-syntax-aux-cache-uncache-table.html)
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md b/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md
index f57b81ead6f49..7e485cbafe709 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-file.md
@@ -19,4 +19,31 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`ADD FILE` can be used to add a single file as well as a directory to the list of resources. The added resource can be listed using [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html).
+
+### Syntax
+{% highlight sql %}
+ADD FILE resource_name
+{% endhighlight %}
+
+### Parameters
+<dl>
+ <dt><code><em>resource_name</em></code></dt>
+ <dd>The name of the file or directory to be added.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+ADD FILE /tmp/test;
+ADD FILE "/path/to/file/abc.txt";
+ADD FILE '/another/test.txt';
+ADD FILE "/path with space/abc.txt";
+ADD FILE "/path/to/some/directory";
+{% endhighlight %}
+
+### Related Statements
+ * [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
+ * [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
+ * [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
+
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
index cd91119c8c2ba..db0a85013321d 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
@@ -19,4 +19,29 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`ADD JAR` adds a JAR file to the list of resources. The added JAR file can be listed using [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html).
+
+### Syntax
+{% highlight sql %}
+ADD JAR file_name
+{% endhighlight %}
+
+### Parameters
+<dl>
+ <dt><code><em>file_name</em></code></dt>
+ <dd>The name of the JAR file to be added. It could be either on a local file system or a distributed file system.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+ADD JAR /tmp/test.jar;
+ADD JAR "/path/to/some.jar";
+ADD JAR '/some/other.jar';
+ADD JAR "/path with space/abc.jar";
+{% endhighlight %}
+
+### Related Statements
+ * [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
+ * [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
+ * [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-list-file.md b/docs/sql-ref-syntax-aux-resource-mgmt-list-file.md
new file mode 100644
index 0000000000000..c42bf7ae8dd41
--- /dev/null
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-list-file.md
@@ -0,0 +1,48 @@
+---
+layout: global
+title: LIST FILE
+displayTitle: LIST FILE
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+`LIST FILE` lists the resources added by [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html).
+
+### Syntax
+{% highlight sql %}
+LIST FILE
+{% endhighlight %}
+
+### Examples
+{% highlight sql %}
+ADD FILE /tmp/test;
+ADD FILE /tmp/test_2;
+LIST FILE;
+-- output for LIST FILE
+file:/private/tmp/test
+file:/private/tmp/test_2
+
+LIST FILE /tmp/test /some/random/file /another/random/file
+--output
+file:/private/tmp/test
+{% endhighlight %}
+
+### Related Statements
+ * [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
+ * [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
+ * [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
+
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md b/docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md
new file mode 100644
index 0000000000000..9d1739753099e
--- /dev/null
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-list-jar.md
@@ -0,0 +1,48 @@
+---
+layout: global
+title: LIST JAR
+displayTitle: LIST JAR
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+`LIST JAR` lists the JARs added by [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html).
+
+### Syntax
+{% highlight sql %}
+LIST JAR
+{% endhighlight %}
+
+### Examples
+{% highlight sql %}
+ADD JAR /tmp/test.jar;
+ADD JAR /tmp/test_2.jar;
+LIST JAR;
+-- output for LIST JAR
+spark://192.168.1.112:62859/jars/test.jar
+spark://192.168.1.112:62859/jars/test_2.jar
+
+LIST JAR /tmp/test.jar /some/random.jar /another/random.jar;
+-- output
+spark://192.168.1.112:62859/jars/test.jar
+{% endhighlight %}
+
+### Related Statements
+ * [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
+ * [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
+ * [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
+
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt.md b/docs/sql-ref-syntax-aux-resource-mgmt.md
index eb0e73d00e848..0885f56bdb7cf 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: Resource Management
+displayTitle: Resource Management
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+* [ADD FILE](sql-ref-syntax-aux-resource-mgmt-add-file.html)
+* [ADD JAR](sql-ref-syntax-aux-resource-mgmt-add-jar.html)
+* [LIST FILE](sql-ref-syntax-aux-resource-mgmt-list-file.html)
+* [LIST JAR](sql-ref-syntax-aux-resource-mgmt-list-jar.html)
diff --git a/docs/sql-ref-syntax-aux-show-columns.md b/docs/sql-ref-syntax-aux-show-columns.md
index 1e6947b160bfe..0c8aba83a8403 100644
--- a/docs/sql-ref-syntax-aux-show-columns.md
+++ b/docs/sql-ref-syntax-aux-show-columns.md
@@ -23,18 +23,18 @@ Return the list of columns in a table. If the table does not exist, an exception
 
 ### Syntax
 {% highlight sql %}
-SHOW COLUMNS table [ database ]
+SHOW COLUMNS table_identifier [ database ]
 {% endhighlight %}
 
 ### Parameters
 <dl>
-  <dt><code><em>table</em></code></dt>
+  <dt><code><em>table_identifier</em></code></dt>
   <dd>
     Specifies the table name of an existing table. The table may be optionally qualified
     with a database name.<br><br>
     <b>Syntax:</b>
       <code>
-        { IN | FROM } [database_name.]table_name
+        { IN | FROM } [ database_name . ] table_name
       </code><br><br>
     <b>Note:</b>
     Keywords <code>IN</code> and <code>FROM</code> are interchangeable.
diff --git a/docs/sql-ref-syntax-aux-show-create-table.md b/docs/sql-ref-syntax-aux-show-create-table.md
index 2cf40915774c4..24aba602ab3cf 100644
--- a/docs/sql-ref-syntax-aux-show-create-table.md
+++ b/docs/sql-ref-syntax-aux-show-create-table.md
@@ -19,4 +19,52 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`SHOW CREATE TABLE` returns the [CREATE TABLE statement](sql-ref-syntax-ddl-create-table.html) or [CREATE VIEW statement](sql-ref-syntax-ddl-create-view.html) that was used to create a given table or view. `SHOW CREATE TABLE` on a non-existent table or a temporary view throws an exception.
+
+### Syntax
+{% highlight sql %}
+SHOW CREATE TABLE table_identifier
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table or view name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE test (c INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+    STORED AS TEXTFILE
+    TBLPROPERTIES ('prop1' = 'value1', 'prop2' = 'value2');
+
+show create table test;
+
+-- the result of SHOW CREATE TABLE test
+CREATE TABLE `test`(`c` INT)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+  'field.delim' = ',',
+  'serialization.format' = ','
+)
+STORED AS
+  INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+TBLPROPERTIES (
+  'transient_lastDdlTime' = '1569350233',
+  'prop1' = 'value1',
+  'prop2' = 'value2'
+)
+
+{% endhighlight %}
+
+### Related Statements
+ * [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
+ * [CREATE VIEW](sql-ref-syntax-ddl-create-view.html)
diff --git a/docs/sql-ref-syntax-aux-show-databases.md b/docs/sql-ref-syntax-aux-show-databases.md
index b1c48dae4fc66..9d4be21daeabc 100644
--- a/docs/sql-ref-syntax-aux-show-databases.md
+++ b/docs/sql-ref-syntax-aux-show-databases.md
@@ -22,12 +22,12 @@ license: |
 ### Description
 Lists the databases that match an optionally supplied string pattern. If no
 pattern is supplied then the command lists all the databases in the system.
-Please note that the usage of `SCHEMAS` and `DATABASES` are interchangable
+Please note that the usage of `SCHEMAS` and `DATABASES` are interchangeable
 and mean the same thing.
 
 ### Syntax
 {% highlight sql %}
-SHOW {DATABASES|SCHEMAS} [LIKE string_pattern]
+SHOW { DATABASES | SCHEMAS } [ LIKE string_pattern ]
 {% endhighlight %}
 
 ### Parameters
@@ -74,6 +74,6 @@ SHOW SCHEMAS;
   +------------+
 {% endhighlight %}
 ### Related Statements
-- [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-databases.html)
+- [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html)
 - [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
 - [ALTER DATABASE](sql-ref-syntax-ddl-alter-database.html)
diff --git a/docs/sql-ref-syntax-aux-show-functions.md b/docs/sql-ref-syntax-aux-show-functions.md
index db026071e6a59..701d427039aa5 100644
--- a/docs/sql-ref-syntax-aux-show-functions.md
+++ b/docs/sql-ref-syntax-aux-show-functions.md
@@ -28,7 +28,7 @@ clause is optional and supported only for compatibility with other systems.
 
 ### Syntax
 {% highlight sql %}
-SHOW [ function_kind ] FUNCTIONS ([LIKE] function_name | regex_pattern)
+SHOW [ function_kind ] FUNCTIONS ( [ LIKE ] function_name | regex_pattern )
 {% endhighlight %}
 
 ### Parameters
diff --git a/docs/sql-ref-syntax-aux-show-partitions.md b/docs/sql-ref-syntax-aux-show-partitions.md
index c6499de9cbb9e..6c8401e8e4718 100644
--- a/docs/sql-ref-syntax-aux-show-partitions.md
+++ b/docs/sql-ref-syntax-aux-show-partitions.md
@@ -18,5 +18,97 @@ license: |
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
+### Description
 
-**This page is under construction**
+The `SHOW PARTITIONS` statement is used to list partitions of a table. An optional
+partition spec may be specified to return the partitions matching the supplied
+partition spec.
+
+### Syntax
+{% highlight sql %}
+SHOW PARTITIONS table_identifier [ partition_spec ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+<dl>
+  <dt><code><em>partition_spec</em></code></dt>
+  <dd>
+    An optional parameter that specifies a comma separated list of key and value pairs
+    for partitions. When specified, the partitions that match the partition spec are returned.<br><br>
+    <b>Syntax:</b>
+      <code>
+        PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )
+      </code>
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- create a partitioned table and insert a few rows.
+USE salesdb;
+CREATE TABLE customer(id INT, name STRING) PARTITIONED BY (state STRING, city STRING);
+INSERT INTO customer PARTITION (state = 'CA', city = 'Fremont') VALUES (100, 'John');
+INSERT INTO customer PARTITION (state = 'CA', city = 'San Jose') VALUES (200, 'Marry');
+INSERT INTO customer PARTITION (state = 'AZ', city = 'Peoria') VALUES (300, 'Daniel');
+
+-- Lists all partitions for table `customer`
+SHOW PARTITIONS customer;
+  +----------------------+
+  |partition             |
+  +----------------------+
+  |state=AZ/city=Peoria  |
+  |state=CA/city=Fremont |
+  |state=CA/city=San Jose|
+  +----------------------+
+
+-- Lists all partitions for the qualified table `customer`
+SHOW PARTITIONS salesdb.customer;
+  +----------------------+
+  |partition             |
+  +----------------------+
+  |state=AZ/city=Peoria  |
+  |state=CA/city=Fremont |
+  |state=CA/city=San Jose|
+  +----------------------+
+
+-- Specify a full partition spec to list specific partition
+SHOW PARTITIONS customer PARTITION (state = 'CA', city = 'Fremont');
+  +---------------------+
+  |partition            |
+  +---------------------+
+  |state=CA/city=Fremont|
+  +---------------------+
+
+-- Specify a partial partition spec to list the specific partitions
+SHOW PARTITIONS customer PARTITION (state = 'CA');
+  +----------------------+
+  |partition             |
+  +----------------------+
+  |state=CA/city=Fremont |
+  |state=CA/city=San Jose|
+  +----------------------+
+
+-- Specify a partial spec to list specific partition
+SHOW PARTITIONS customer PARTITION (city =  'San Jose');
+  +----------------------+
+  |partition             |
+  +----------------------+
+  |state=CA/city=San Jose|
+  +----------------------+
+{% endhighlight %}
+
+### Related statements
+- [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
+- [INSERT STATEMENT](sql-ref-syntax-dml-insert.html)
+- [DESCRIBE TABLE](sql-ref-syntax-aux-describe-table.html)
+- [SHOW TABLE](sql-ref-syntax-aux-show-table.html)
diff --git a/docs/sql-ref-syntax-aux-show-table.md b/docs/sql-ref-syntax-aux-show-table.md
index ad549b6b11ecb..49696585ba581 100644
--- a/docs/sql-ref-syntax-aux-show-table.md
+++ b/docs/sql-ref-syntax-aux-show-table.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: SHOW TABLE
-displayTitle: SHOW TABLE
+title: SHOW TABLE EXTENDED
+displayTitle: SHOW TABLE EXTENDED
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -18,5 +18,166 @@ license: |
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
+### Description
 
-**This page is under construction**
+`SHOW TABLE EXTENDED` will show information for all tables matching the given regular expression.
+Output includes basic table information and file system information like `Last Access`, 
+`Created By`, `Type`, `Provider`, `Table Properties`, `Location`, `Serde Library`, `InputFormat`, 
+`OutputFormat`, `Storage Properties`, `Partition Provider`, `Partition Columns` and `Schema`.
+
+If a partition specification is present, it outputs the given partition's file-system-specific 
+information such as `Partition Parameters` and `Partition Statistics`. Note that a table regex 
+cannot be used with a partition specification.
+
+### Syntax
+{% highlight sql %}
+SHOW TABLE EXTENDED [ IN | FROM database_name ] LIKE 'identifier_with_wildcards'
+    [ partition_spec ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+ <dt><code><em>IN|FROM database_name</em></code></dt>
+  <dd>
+    Specifies database name. If not provided, will use the current database.
+  </dd>
+  <dt><code><em>LIKE string_pattern</em></code></dt>
+  <dd>
+    Specifies the regular expression pattern that is used to filter out unwanted tables.
+    <ul> 
+       <li> Except for `*` and `|` character, the pattern works like a regex.</li>
+       <li> `*` alone matches 0 or more characters and `|` is used to separate multiple different regexes,
+             any of which can match. </li>
+       <li> The leading and trailing blanks are trimmed in the input pattern before processing.</li>
+    </ul> 
+  </dd>
+  <dt><code><em>partition_spec</em></code></dt>
+  <dd>
+    An optional parameter that specifies a comma separated list of key and value pairs
+    for partitions. Note that a table regex cannot be used with a partition specification.<br><br>
+    <b>Syntax:</b>
+      <code>
+        PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )
+      </code>
+  </dd>
+</dl>
+### Examples
+{% highlight sql %}
+-- Assumes `employee` table created with partitioned by column `grade`
+-- +-------+--------+--+
+-- | name  | grade  |
+-- +-------+--------+--+
+-- | sam   | 1      |
+-- | suj   | 2      |
+-- +-------+--------+--+
+
+ -- Show the details of the table
+SHOW TABLE EXTENDED  LIKE `employee`;
++--------+---------+-----------+---------------------------------------------------------------
+|database|tableName|isTemporary|                         information
++--------+---------+-----------+---------------------------------------------------------------
+|default |employee |false      |Database: default
+                                Table: employee
+                                Owner: root
+                                Created Time: Fri Aug 30 15:10:21 IST 2019
+                                Last Access: Thu Jan 01 05:30:00 IST 1970
+                                Created By: Spark 3.0.0-SNAPSHOT
+                                Type: MANAGED
+                                Provider: hive
+                                Table Properties: [transient_lastDdlTime=1567158021]
+                                Location: file:/opt/spark1/spark/spark-warehouse/employee
+                                Serde Library: org.apache.hadoop.hive.serde2.lazy   
+                                .LazySimpleSerDe
+                                InputFormat: org.apache.hadoop.mapred.TextInputFormat
+                                OutputFormat: org.apache.hadoop.hive.ql.io
+                                .HiveIgnoreKeyTextOutputFormat
+                                Storage Properties: [serialization.format=1]
+                                Partition Provider: Catalog
+                                Partition Columns: [`grade`]
+                                Schema: root
+                                 |-- name: string (nullable = true)
+                                 |-- grade: integer (nullable = true)
+                                                                                                          
++--------+---------+-----------+---------------------------------------------------------------
+
+-- showing the multiple table details with pattern matching
+SHOW TABLE EXTENDED  LIKE `employe*`;
++--------+---------+-----------+---------------------------------------------------------------
+|database|tableName|isTemporary|                         information
++--------+---------+-----------+---------------------------------------------------------------
+|default |employee |false      |Database: default
+                                 Table: employee
+                                 Owner: root
+                                 Created Time: Fri Aug 30 15:10:21 IST 2019
+                                 Last Access: Thu Jan 01 05:30:00 IST 1970
+                                 Created By: Spark 3.0.0-SNAPSHOT
+                                 Type: MANAGED
+                                 Provider: hive
+                                 Table Properties: [transient_lastDdlTime=1567158021]
+                                 Location: file:/opt/spark1/spark/spark-warehouse/employee
+                                 Serde Library: org.apache.hadoop.hive.serde2.lazy
+                                 .LazySimpleSerDe
+                                 InputFormat: org.apache.hadoop.mapred.TextInputFormat
+                                 OutputFormat: org.apache.hadoop.hive.ql.io
+                                 .HiveIgnoreKeyTextOutputFormat
+                                 Storage Properties: [serialization.format=1]
+                                 Partition Provider: Catalog
+                                 Partition Columns: [`grade`]
+                                 Schema: root
+                                  |-- name: string (nullable = true)
+                                  |-- grade: integer (nullable = true)
+
+|default |employee1|false      |Database: default
+                                Table: employee1
+                                Owner: root
+                                Created Time: Fri Aug 30 15:22:33 IST 2019
+                                Last Access: Thu Jan 01 05:30:00 IST 1970
+                                Created By: Spark 3.0.0-SNAPSHOT
+                                Type: MANAGED
+                                Provider: hive
+                                Table Properties: [transient_lastDdlTime=1567158753]
+                                Location: file:/opt/spark1/spark/spark-warehouse/employee1
+                                Serde Library: org.apache.hadoop.hive.serde2.lazy
+                                .LazySimpleSerDe
+                                InputFormat: org.apache.hadoop.mapred.TextInputFormat
+                                OutputFormat: org.apache.hadoop.hive.ql.io
+                                .HiveIgnoreKeyTextOutputFormat
+                                Storage Properties: [serialization.format=1]
+                                Partition Provider: Catalog
+                                Schema: root
+                                |-- name: string (nullable = true)
+                                                                                                             
++--------+---------+----------+----------------------------------------------------------------
+  
+-- show partition file system details
+SHOW TABLE EXTENDED  IN `default` LIKE `employee` PARTITION (`grade=1`);
++--------+---------+-----------+---------------------------------------------------------------
+|database|tableName|isTemporary|                         information                           
++--------+---------+-----------+---------------------------------------------------------------
+|default |employee |false      | Partition Values: [grade=1]
+                               Location: file:/opt/spark1/spark/spark-warehouse/employee
+                               /grade=1
+                               Serde Library: org.apache.hadoop.hive.serde2.lazy
+                               .LazySimpleSerDe
+                               InputFormat: org.apache.hadoop.mapred.TextInputFormat
+                               OutputFormat: org.apache.hadoop.hive.ql.io
+                               .HiveIgnoreKeyTextOutputFormat
+                               Storage Properties: [serialization.format=1]
+                               Partition Parameters: {rawDataSize=-1, numFiles=1, 
+                               transient_lastDdlTime=1567158221, totalSize=4, 
+                               COLUMN_STATS_ACCURATE=false, numRows=-1}
+                               Created Time: Fri Aug 30 15:13:41 IST 2019
+                               Last Access: Thu Jan 01 05:30:00 IST 1970
+                               Partition Statistics: 4 bytes
+                                                                                                                                                                             |
++--------+---------+-----------+---------------------------------------------------------------
+
+-- show partition file system details with regex fails as shown below
+SHOW TABLE EXTENDED  IN `default` LIKE `empl*` PARTITION (`grade=1`);
+Error: Error running query: org.apache.spark.sql.catalyst.analysis.NoSuchTableException:
+ Table or view 'emplo*' not found in database 'default'; (state=,code=0)
+
+{% endhighlight %}
+### Related Statements
+- [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
+- [DESCRIBE TABLE](sql-ref-syntax-aux-describe-table.html)
diff --git a/docs/sql-ref-syntax-aux-show-tables.md b/docs/sql-ref-syntax-aux-show-tables.md
index e4340d608bf5e..2a078abf911e7 100644
--- a/docs/sql-ref-syntax-aux-show-tables.md
+++ b/docs/sql-ref-syntax-aux-show-tables.md
@@ -18,5 +18,90 @@ license: |
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
+### Description
 
-**This page is under construction**
+The `SHOW TABLES` statement returns all the tables for an optionally specified database.
+Additionally, the output of this statement may be filtered by an optional matching
+pattern. If no database is specified then the tables are returned from the 
+current database.
+
+### Syntax
+{% highlight sql %}
+SHOW TABLES [ { FROM | IN } database_name ] [ LIKE 'regex_pattern' ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>{ FROM | IN } database_name</em></code></dt>
+  <dd>
+     Specifies the database name from which tables are listed.
+  </dd>
+  <dt><code><em>LIKE regex_pattern</em></code></dt>
+  <dd>
+     Specifies the regular expression pattern that is used to filter out unwanted tables. 
+     <ul> 
+          <li> Except for `*` and `|` character, the pattern works like a regex.</li>
+          <li> `*` alone matches 0 or more characters and `|` is used to separate multiple different regexes,
+           any of which can match. </li>
+          <li> The leading and trailing blanks are trimmed in the input pattern before processing.</li>
+     </ul>
+    
+  </dd>
+</dl>
+
+### Example
+{% highlight sql %}
+-- List all tables in default database
+SHOW TABLES;
+  +-----------+------------+--------------+--+
+  | database  | tableName  | isTemporary  |
+  +-----------+------------+--------------+--+
+  | default   | sam        | false        |
+  | default   | sam1       | false        |
+  | default   | suj        | false        |
+  +-----------+------------+--------------+--+
+
+-- List all tables from userdb database 
+SHOW TABLES FROM userdb;
+  +-----------+------------+--------------+--+
+  | database  | tableName  | isTemporary  |
+  +-----------+------------+--------------+--+
+  | userdb    | user1      | false        |
+  | userdb    | user2      | false        |
+  +-----------+------------+--------------+--+
+
+-- List all tables in userdb database
+SHOW TABLES IN userdb;
+  +-----------+------------+--------------+--+
+  | database  | tableName  | isTemporary  |
+  +-----------+------------+--------------+--+
+  | userdb    | user1      | false        |
+  | userdb    | user2      | false        |
+  +-----------+------------+--------------+--+
+
+-- List all tables from default database matching the pattern `sam*`
+SHOW TABLES FROM default LIKE 'sam*';
+  +-----------+------------+--------------+--+
+  | database  | tableName  | isTemporary  |
+  +-----------+------------+--------------+--+
+  | default   | sam        | false        |
+  | default   | sam1       | false        |
+  +-----------+------------+--------------+--+
+  
+-- List all tables matching the pattern `sam*|suj`
+SHOW TABLES  LIKE 'sam*|suj';
+  +-----------+------------+--------------+--+
+  | database  | tableName  | isTemporary  |
+  +-----------+------------+--------------+--+
+  | default   | sam        | false        |
+  | default   | sam1       | false        |
+  | default   | suj        | false        |
+  +-----------+------------+--------------+--+
+
+{% endhighlight %}
+
+### Related statements
+- [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
+- [DROP TABLE](sql-ref-syntax-ddl-drop-table.html)
+- [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
+- [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html)
diff --git a/docs/sql-ref-syntax-aux-show-tblproperties.md b/docs/sql-ref-syntax-aux-show-tblproperties.md
index 169ab40052d67..451fd4abc85bb 100644
--- a/docs/sql-ref-syntax-aux-show-tblproperties.md
+++ b/docs/sql-ref-syntax-aux-show-tblproperties.md
@@ -21,7 +21,7 @@ license: |
 
 ### Description
 This statement returns the value of a table property given an optional value for
-a property key. If no key is specified then all the proerties are returned. 
+a property key. If no key is specified then all the properties are returned. 
 
 ### Syntax
 {% highlight sql %}
@@ -37,7 +37,7 @@ SHOW TBLPROPERTIES table_identifier
     with a database name.<br><br>
     <b>Syntax:</b>
       <code>
-        [database_name.]table_name
+        [ database_name. ] table_name
       </code>
   </dd>
   <dt><code><em>unquoted_property_key</em></code></dt>
@@ -46,7 +46,7 @@ SHOW TBLPROPERTIES table_identifier
     parts separated by dot.<br><br>
     <b>Syntax:</b>
       <code>
-        [key_part1][.key_part2][...]
+        [ key_part1 ] [ .key_part2 ] [ ... ]
       </code>
   </dd>   
   <dt><code><em>property_key_as_string_literal</em></code></dt>
@@ -56,11 +56,11 @@ SHOW TBLPROPERTIES table_identifier
 </dl>
 
 **Note**
-- Property value returned by this statement exludes some properties 
+- Property value returned by this statement excludes some properties 
   that are internal to spark and hive. The excluded properties are :
   - All the properties that start with prefix `spark.sql`
-  - Propery keys such as:  `EXTERNAL`, `comment`
-  - All the properties generated intenally by hive to store statistics. Some of these
+  - Property keys such as:  `EXTERNAL`, `comment`
+  - All the properties generated internally by hive to store statistics. Some of these
     properties are: `numFiles`, `numPartitions`, `numRows`.
 
 ### Examples
@@ -111,4 +111,5 @@ SHOW TBLPROPERTIES customer ('created.date');
 ### Related Statements
 - [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
 - [ALTER TABLE SET TBLPROPERTIES](sql-ref-syntax-ddl-alter-table.html)
-- [SHOW TABLE](sql-ref-syntax-aux-show-table.html)
+- [SHOW TABLES](sql-ref-syntax-aux-show-tables.html)
+- [SHOW TABLE EXTENDED](sql-ref-syntax-aux-show-table.html)
diff --git a/docs/sql-ref-syntax-aux-show.md b/docs/sql-ref-syntax-aux-show.md
index eb0e73d00e848..f6d700acf9eef 100644
--- a/docs/sql-ref-syntax-aux-show.md
+++ b/docs/sql-ref-syntax-aux-show.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: Show Commands
+displayTitle: Show Commands
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -18,8 +18,11 @@ license: |
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
-
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+* [SHOW COLUMNS](sql-ref-syntax-aux-show-columns.html)
+* [SHOW DATABASES](sql-ref-syntax-aux-show-databases.html)
+* [SHOW FUNCTIONS](sql-ref-syntax-aux-show-functions.html)
+* [SHOW TABLE EXTENDED](sql-ref-syntax-aux-show-table.html)
+* [SHOW TABLES](sql-ref-syntax-aux-show-tables.html)
+* [SHOW TBLPROPERTIES](sql-ref-syntax-aux-show-tblproperties.html)
+* [SHOW PARTITIONS](sql-ref-syntax-aux-show-partitions.html)
+* [SHOW CREATE TABLE](sql-ref-syntax-aux-show-create-table.html)
diff --git a/docs/sql-ref-syntax-aux.md b/docs/sql-ref-syntax-aux.md
index eb0e73d00e848..ba09d70b437a9 100644
--- a/docs/sql-ref-syntax-aux.md
+++ b/docs/sql-ref-syntax-aux.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: Auxiliary Statements
+displayTitle: Auxiliary Statements
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,7 +19,11 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+Besides the major SQL statements such as Data Definition Statements, Data Manipulation Statements and Data Retrieval Statements, Spark SQL also supports the following Auxiliary Statements:
+
+- [ANALYZE](sql-ref-syntax-aux-analyze.html)
+- [CACHE](sql-ref-syntax-aux-cache.html)
+- [DESCRIBE](sql-ref-syntax-aux-describe.html)
+- [SHOW](sql-ref-syntax-aux-show.html)
+- [CONFIGURATION MANAGEMENT](sql-ref-syntax-aux-conf-mgmt.html)
+- [RESOURCE MANAGEMENT](sql-ref-syntax-aux-resource-mgmt.html)
diff --git a/docs/sql-ref-syntax-ddl-alter-database.md b/docs/sql-ref-syntax-ddl-alter-database.md
index 6e985862b8421..a32343674feb0 100644
--- a/docs/sql-ref-syntax-ddl-alter-database.md
+++ b/docs/sql-ref-syntax-ddl-alter-database.md
@@ -21,13 +21,14 @@ license: |
 ### Description
 You can alter metadata associated with a database by setting `DBPROPERTIES`.  The specified property
 values override any existing value with the same property name. Please note that the usage of 
-`SCHEMA` and `DATABASE` are interchangable and one can be used in place of the other. An error message
+`SCHEMA` and `DATABASE` are interchangeable and one can be used in place of the other. An error message
 is issued if the database is not found in the system. This command is mostly used to record the metadata
 for a database and may be used for auditing purposes.
 
 ### Syntax
 {% highlight sql %}
-ALTER {DATABASE | SCHEMA} database_name SET DBPROPERTIES (propery_name=property_value, ...);
+ALTER { DATABASE | SCHEMA } database_name
+    SET DBPROPERTIES ( property_name = property_value, ... )
 {% endhighlight %}
 
 ### Parameters
diff --git a/docs/sql-ref-syntax-ddl-alter-table.md b/docs/sql-ref-syntax-ddl-alter-table.md
index 7fcd397915825..a921478daa470 100644
--- a/docs/sql-ref-syntax-ddl-alter-table.md
+++ b/docs/sql-ref-syntax-ddl-alter-table.md
@@ -19,4 +19,260 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`ALTER TABLE` statement changes the schema or properties of a table.
+
+### RENAME 
+`ALTER TABLE RENAME` statement changes the table name of an existing table in the database.
+
+#### Syntax
+{% highlight sql %}
+ALTER TABLE table_identifier RENAME TO table_identifier
+
+ALTER TABLE table_identifier partition_spec RENAME TO partition_spec
+
+{% endhighlight %}
+
+#### Parameters
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>partition_spec</em></code></dt>
+  <dd>
+    Partition to be renamed. <br><br>
+    <b>Syntax:</b>
+      <code>
+        PARTITION ( partition_col_name  = partition_col_val [ , ... ] )
+      </code>
+  </dd>
+</dl>
+
+
+### ADD COLUMNS
+`ALTER TABLE ADD COLUMNS` statement adds mentioned columns to an existing table.
+
+#### Syntax
+{% highlight sql %}
+ALTER TABLE table_identifier ADD COLUMNS ( col_spec [ , col_spec ... ] )
+{% endhighlight %}
+
+#### Parameters
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>COLUMNS ( col_spec )</em></code></dt>
+  <dd>Specifies the columns to be added to be renamed.</dd>
+</dl>
+
+
+### SET AND UNSET
+
+#### SET TABLE PROPERTIES
+`ALTER TABLE SET` command is used for setting the table properties. If a particular property was already set, 
+this overrides the old value with the new one.
+
+`ALTER TABLE UNSET` is used to drop the table property. 
+
+##### Syntax
+{% highlight sql %}
+
+--Set Table Properties 
+ALTER TABLE table_identifier SET TBLPROPERTIES ( key1 = val1, key2 = val2, ... )
+
+--Unset Table Properties
+ALTER TABLE table_identifier UNSET TBLPROPERTIES [ IF EXISTS ] ( key1, key2, ... )
+  
+{% endhighlight %}
+
+#### SET SERDE
+`ALTER TABLE SET` command is used for setting the SERDE or SERDE properties in Hive tables. If a particular property was already set,  
+this overrides the old value with the new one.
+
+##### Syntax
+{% highlight sql %}
+
+--Set SERDE Properties
+ALTER TABLE table_identifier [ partition_spec ]
+    SET SERDEPROPERTIES ( key1 = val1, key2 = val2, ... )
+
+ALTER TABLE table_identifier [ partition_spec ] SET SERDE serde_class_name
+    [ WITH SERDEPROPERTIES ( key1 = val1, key2 = val2, ... ) ]
+
+{% endhighlight %}
+
+#### SET LOCATION And SET FILE FORMAT
+`ALTER TABLE SET` command can also be used for changing the file location and file format for 
+existing tables. 
+
+##### Syntax
+{% highlight sql %}
+
+--Changing File Format
+ALTER TABLE table_identifier [ partition_spec ] SET FILEFORMAT file_format
+
+--Changing File Location
+ALTER TABLE table_identifier [ partition_spec ] SET LOCATION 'new_location'
+
+{% endhighlight %}
+
+#### Parameters
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>partition_spec</em></code></dt>
+  <dd>
+    Specifies the partition on which the property has to be set.<br><br>
+    <b>Syntax:</b>
+      <code>
+        PARTITION ( partition_col_name  = partition_col_val [ , ... ] )
+      </code>
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>SERDEPROPERTIES ( key1 = val1, key2 = val2, ... )</em></code></dt>
+  <dd>Specifies the SERDE properties to be set.</dd>
+</dl>
+
+
+### Examples
+{% highlight sql %}
+
+--RENAME table 
+DESC student;
++--------------------------+------------+----------+--+
+|         col_name         | data_type  | comment  |
++--------------------------+------------+----------+--+
+| name                     | string     | NULL     |
+| rollno                   | int        | NULL     |
+| age                      | int        | NULL     |
+| # Partition Information  |            |          |
+| # col_name               | data_type  | comment  |
+| age                      | int        | NULL     |
++--------------------------+------------+----------+--+
+
+ALTER TABLE Student RENAME TO StudentInfo;
+
+--After Renaming the table
+
+DESC StudentInfo;
++--------------------------+------------+----------+--+
+|         col_name         | data_type  | comment  |
++--------------------------+------------+----------+--+
+| name                     | string     | NULL     |
+| rollno                   | int        | NULL     |
+| age                      | int        | NULL     |
+| # Partition Information  |            |          |
+| # col_name               | data_type  | comment  |
+| age                      | int        | NULL     |
++--------------------------+------------+----------+--+
+
+--RENAME partition
+
+SHOW PARTITIONS StudentInfo;
++------------+--+
+| partition  |
++------------+--+
+| age=10     |
+| age=11     |
+| age=12     |
++------------+--+
+
+ALTER TABLE default.StudentInfo PARTITION (age='10') RENAME TO PARTITION (age='15');
+
+--After renaming Partition
+SHOW PARTITIONS StudentInfo;
++------------+--+
+| partition  |
++------------+--+
+| age=11     |
+| age=12     |
+| age=15     |
++------------+--+
+
+-- Add new column to a table
+
+DESC StudentInfo;
++--------------------------+------------+----------+--+
+|         col_name         | data_type  | comment  |
++--------------------------+------------+----------+--+
+| name                     | string     | NULL     |
+| rollno                   | int        | NULL     |
+| age                      | int        | NULL     |
+| # Partition Information  |            |          |
+| # col_name               | data_type  | comment  |
+| age                      | int        | NULL     |
++--------------------------+------------+----------+
+
+ALTER TABLE StudentInfo ADD columns (LastName string, DOB timestamp);
+
+--After Adding New columns to the table
+DESC StudentInfo;
++--------------------------+------------+----------+--+
+|         col_name         | data_type  | comment  |
++--------------------------+------------+----------+--+
+| name                     | string     | NULL     |
+| rollno                   | int        | NULL     |
+| LastName                 | string     | NULL     |
+| DOB                      | timestamp  | NULL     |
+| age                      | int        | NULL     |
+| # Partition Information  |            |          |
+| # col_name               | data_type  | comment  |
+| age                      | int        | NULL     |
++--------------------------+------------+----------+--+
+
+
+--Change the fileformat
+ALTER TABLE loc_orc SET fileformat orc;
+
+ALTER TABLE p1 partition (month=2, day=2) SET fileformat parquet;
+
+--Change the file Location
+ALTER TABLE dbx.tab1 PARTITION (a='1', b='2') SET LOCATION '/path/to/part/ways'
+
+-- SET SERDE/ SERDE Properties
+ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
+
+ALTER TABLE dbx.tab1 SET SERDE 'org.apache.madoop' WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')
+
+--SET TABLE PROPERTIES
+ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('winner' = 'loser')
+
+--DROP TABLE PROPERTIES
+ALTER TABLE dbx.tab1 UNSET TBLPROPERTIES ('winner')
+
+{% endhighlight %}
+
+
+### Related Statements
+- [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
+- [DROP TABLE](sql-ref-syntax-ddl-drop-table.html)
+
+
diff --git a/docs/sql-ref-syntax-ddl-alter-view.md b/docs/sql-ref-syntax-ddl-alter-view.md
index 7a7947a3e0ac4..d42d040522cae 100644
--- a/docs/sql-ref-syntax-ddl-alter-view.md
+++ b/docs/sql-ref-syntax-ddl-alter-view.md
@@ -19,4 +19,217 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+
+The `ALTER VIEW` statement can alter metadata associated with the view. It can change the definition of the view, change
+the name of a view to a different name, set and unset the metadata of the view by setting `TBLPROPERTIES`.
+
+#### RENAME View
+Renames the existing view. If the new view name already exists in the source database, a `TableAlreadyExistsException` is thrown. This operation
+does not support moving the views across databases.
+
+#### Syntax
+{% highlight sql %}
+ALTER VIEW view_identifier RENAME TO view_identifier
+{% endhighlight %}
+
+#### Parameters
+<dl>
+  <dt><code><em>view_identifier</em></code></dt>
+  <dd>
+    Specifies a view name, which may be optionally qualified with a database name.<br><br>
+    <b> Syntax:</b>
+      <code>
+        [ database_name. ] view_name
+      </code>
+  </dd>
+</dl>
+
+#### SET View Properties
+Set one or more properties of an existing view. The properties are the key value pairs. If the properties' keys exist, 
+the values are replaced with the new values. If the properties' keys do not exist, the key value pairs are added into
+the properties.
+
+#### Syntax
+{% highlight sql %}
+ALTER VIEW view_identifier SET TBLPROPERTIES ( property_key = property_val [ , ... ] )
+{% endhighlight %}
+
+#### Parameters
+<dl>
+  <dt><code><em>view_identifier</em></code></dt>
+  <dd>
+    Specifies a view name, which may be optionally qualified with a database name.<br><br>
+    <b> Syntax:</b>
+      <code>
+        [ database_name. ] view_name
+      </code>
+  </dd>
+  <dt><code><em>property_key</em></code></dt>
+  <dd>
+    Specifies the property key. The key may consists of multiple parts separated by dot.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ key_part1 ] [ .key_part2 ] [ ... ]
+      </code>
+  </dd>
+</dl>
+
+#### UNSET View Properties
+Drop one or more properties of an existing view. If the specified keys do not exist, an exception is thrown. Use 
+`IF EXISTS` to avoid the exception. 
+
+#### Syntax
+{% highlight sql %}
+ALTER VIEW view_identifier UNSET TBLPROPERTIES [ IF EXISTS ]  ( property_key [ , ... ] )
+{% endhighlight %}
+
+#### Parameters
+<dl>
+  <dt><code><em>view_identifier</em></code></dt>
+  <dd>
+    Specifies a view name, which may be optionally qualified with a database name.<br><br>
+    <b> Syntax:</b>
+      <code>
+        [ database_name. ] view_name
+      </code>
+  </dd>
+  <dt><code><em>property_key</em></code></dt>
+  <dd>
+    Specifies the property key. The key may consists of multiple parts separated by dot.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ key_part1 ] [ .key_part2 ] [ ... ]
+      </code>
+  </dd>
+</dl>
+
+#### ALTER View AS SELECT
+`ALTER VIEW view_identifier AS SELECT` statement changes the definition of a view, the `SELECT` statement must be valid,
+and the `view_identifier` must exist.
+
+#### Syntax
+{% highlight sql %}
+ALTER VIEW view_identifier AS select_statement
+{% endhighlight %}
+
+#### Parameters
+<dl>
+  <dt><code><em>view_identifier</em></code></dt>
+  <dd>
+    Specifies a view name, which may be optionally qualified with a database name.<br><br>
+    <b> Syntax:</b>
+      <code>
+        [ database_name. ] view_name
+      </code>
+  </dd>
+  <dt><code><em>select_statement</em></code></dt>
+  <dd>
+    Specifies the definition of the view, detail check <a href="sql-ref-syntax-qry-select.html">select_statement</a>
+  </dd>
+</dl>
+
+### Examples
+
+{% highlight sql %}
+-- Rename only changes the view name.
+-- The source and target databases of the view have to be the same.
+-- Use qualified or unqualified name for the source and target view  
+ALTER VIEW tempdb1.v1 RENAME TO tempdb1.v2;
+
+-- Verify that the new view is created.
+DESCRIBE TABLE EXTENDED tempdb1.v2;
+
++----------------------------+----------+-------+
+|col_name                    |data_type |comment|
++----------------------------+----------+-------+
+|c1                          |int       |null   |
+|c2                          |string    |null   |
+|                            |          |       |
+|# Detailed Table Information|          |       |
+|Database                    |tempdb1   |       |
+|Table                       |v2        |       |
++----------------------------+----------+-------+
+
+-- Before ALTER VIEW SET TBLPROPERTIES
+DESC TABLE EXTENDED tempdb1.v2;
+
++----------------------------+----------+-------+
+|col_name                    |data_type |comment|
++----------------------------+----------+-------+
+|c1                          |int       |null   |
+|c2                          |string    |null   |
+|                            |          |       |
+|# Detailed Table Information|          |       |
+|Database                    |tempdb1   |       |
+|Table                       |v2        |       |
+|Table Properties            |[....]    |       |
++----------------------------+----------+-------+
+
+-- Set properties in TBLPROPERTIES
+ALTER VIEW tempdb1.v2 SET TBLPROPERTIES ('created.by.user' = "John", 'created.date' = '01-01-2001' );
+
+-- Use `DESCRIBE TABLE EXTENDED tempdb1.v2` to verify
+DESC TABLE EXTENDED tempdb1.v2;
+
++----------------------------+-----------------------------------------------------+-------+
+|col_name                    |data_type                                            |comment|
++----------------------------+-----------------------------------------------------+-------+
+|c1                          |int                                                  |null   |
+|c2                          |string                                               |null   |
+|                            |                                                     |       |
+|# Detailed Table Information|                                                     |       |
+|Database                    |tempdb1                                              |       |
+|Table                       |v2                                                   |       |
+|Table Properties            |[created.by.user=John, created.date=01-01-2001, ....]|       |
++----------------------------+-----------------------------------------------------+-------+
+
+-- Remove the key `created.by.user` and `created.date` from `TBLPROPERTIES`
+ALTER VIEW tempdb1.v2 UNSET TBLPROPERTIES ('created.by.user', 'created.date');
+
+--Use `DESC TABLE EXTENDED tempdb1.v2` to verify the changes
+DESC TABLE EXTENDED tempdb1.v2;
+
++----------------------------+----------+-------+
+|col_name                    |data_type |comment|
++----------------------------+----------+-------+
+|c1                          |int       |null   |
+|c2                          |string    |null   |
+|                            |          |       |
+|# Detailed Table Information|          |       |
+|Database                    |tempdb1   |       |
+|Table                       |v2        |       |
+|Table Properties            |[....]    |       |
++----------------------------+----------+-------+
+
+-- Change the view definition
+ALTER VIEW tempdb1.v2 AS SELECT * FROM tempdb1.v1;
+
+-- Use `DESC TABLE EXTENDED` to verify
+DESC TABLE EXTENDED tempdb1.v2;
+
++----------------------------+---------------------------+-------+
+|col_name                    |data_type                  |comment|
++----------------------------+---------------------------+-------+
+|c1                          |int                        |null   |
+|c2                          |string                     |null   |
+|                            |                           |       |
+|# Detailed Table Information|                           |       |
+|Database                    |tempdb1                    |       |
+|Table                       |v2                         |       |
+|Type                        |VIEW                       |       |
+|View Text                   |select * from tempdb1.v1   |       |
+|View Original Text          |select * from tempdb1.v1   |       |
++----------------------------+---------------------------+-------+
+{% endhighlight %}
+
+### Related Statements
+
+- [describe-table](sql-ref-syntax-aux-describe-table.html)
+- [create-view](sql-ref-syntax-ddl-create-view.html)
+- [drop-view](sql-ref-syntax-ddl-drop-view.html)
+
+#### Note:
+
+`ALTER VIEW` statement does not support `SET SERDE` or `SET SERDEPROPERTIES` properties
+
diff --git a/docs/sql-ref-syntax-ddl-create-database.md b/docs/sql-ref-syntax-ddl-create-database.md
index bbcd34a6d6853..4d2211c650953 100644
--- a/docs/sql-ref-syntax-ddl-create-database.md
+++ b/docs/sql-ref-syntax-ddl-create-database.md
@@ -19,4 +19,61 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+Creates a database with the specified name. If database with the same name already exists, an exception will be thrown.
+
+### Syntax
+{% highlight sql %}
+CREATE { DATABASE | SCHEMA } [ IF NOT EXISTS ] database_name
+  [ COMMENT database_comment ]
+  [ LOCATION database_directory ]
+  [ WITH DBPROPERTIES ( property_name = property_value [ , ... ] ) ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+    <dt><code><em>database_name</em></code></dt>
+    <dd>Specifies the name of the database to be created.</dd>
+
+    <dt><code><em>IF NOT EXISTS</em></code></dt>
+    <dd>Creates a database with the given name if it doesn't exists. If a database with the same name already exists, nothing will happen.</dd>
+
+    <dt><code><em>database_directory</em></code></dt>
+    <dd>Path of the file system in which the specified database is to be created. If the specified path does not exist in the underlying file system, this command creates a directory with the path. If the location is not specified, the database will be created in the default warehouse directory, whose path is configured by the static configuration spark.sql.warehouse.dir.</dd>
+
+    <dt><code><em>database_comment</em></code></dt>
+    <dd>Specifies the description for the database.</dd>
+
+    <dt><code><em>WITH DBPROPERTIES ( property_name=property_value [ , ... ] )</em></code></dt>
+    <dd>Specifies the properties for the database in key-value pairs.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- Create database `customer_db`. This throws exception if database with name customer_db
+-- already exists.
+CREATE DATABASE customer_db;
+
+-- Create database `customer_db` only if database with same name doesn't exist.
+CREATE DATABASE IF NOT EXISTS customer_db;
+
+-- Create database `customer_db` only if database with same name doesn't exist with 
+-- `Comments`,`Specific Location` and `Database properties`.
+CREATE DATABASE IF NOT EXISTS customer_db COMMENT 'This is customer database' LOCATION '/user'
+ WITH DBPROPERTIES (ID=001, Name='John');
+
+-- Verify that properties are set.
+DESCRIBE DATABASE EXTENDED customer_db;
+   +----------------------------+-----------------------------+
+   | database_description_item  | database_description_value  |
+   +----------------------------+-----------------------------+
+   | Database Name              | customer_db                 |
+   | Description                | This is customer database   |
+   | Location                   | hdfs://hacluster/user       |
+   | Properties                 | ((ID,001), (Name,John))     |
+   +----------------------------+-----------------------------+
+{% endhighlight %}
+
+### Related Statements
+- [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html)
+- [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html)
diff --git a/docs/sql-ref-syntax-ddl-create-function.md b/docs/sql-ref-syntax-ddl-create-function.md
index f95a9eba42c2f..1f94bf6d25aa5 100644
--- a/docs/sql-ref-syntax-ddl-create-function.md
+++ b/docs/sql-ref-syntax-ddl-create-function.md
@@ -19,4 +19,153 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+The `CREATE FUNCTION` statement is used to create a temporary or permanent function
+in Spark. Temporary functions are scoped at a session level where as permanent
+functions are created in the persistent catalog and are made available to
+all sessions. The resources specified in the `USING` clause are made available
+to all executors when they are executed for the first time. In addition to the
+SQL interface, spark allows users to create custom user defined scalar and
+aggregate functions using Scala, Python and Java APIs. Please refer to 
+[scalar_functions](sql-getting-started.html#scalar-functions) and 
+[aggregate functions](sql-getting-started#aggregations) for more information.
+
+### Syntax
+{% highlight sql %}
+CREATE [ OR REPLACE ] [ TEMPORARY ] FUNCTION [ IF NOT EXISTS ]
+    function_name AS class_name [ resource_locations ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>OR REPLACE</em></code></dt>
+  <dd>
+    If specified, the resources for the function are reloaded. This is mainly useful
+    to pick up any changes made to the implementation of the function. This
+    parameter is mutually exclusive to <code>IF NOT EXISTS</code> and can not
+    be specified together.
+  </dd>
+  <dt><code><em>TEMPORARY</em></code></dt>
+  <dd>
+    Indicates the scope of function being created. When <code>TEMPORARY</code> is specified, the
+    created function is valid and visible in the current session. No persistent
+    entry is made in the catalog for these kind of functions.
+  </dd>
+  <dt><code><em>IF NOT EXISTS</em></code></dt>
+  <dd>
+    If specified, creates the function only when it does not exist. The creation
+    of function succeeds (no error is thrown) if the specified function already
+    exists in the system. This parameter is mutually exclusive to <code> OR REPLACE</code> 
+    and can not be specified together.
+  </dd>
+  <dt><code><em>function_name</em></code></dt>
+  <dd>
+    Specifies a name of function to be created. The function name may be
+    optionally qualified with a database name. <br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] function_name
+      </code>
+  </dd>
+  <dt><code><em>class_name</em></code></dt>
+  <dd>
+    Specifies the name of the class that provides the implementation for function to be created.
+    The implementing class should extend one of the base classes as follows:
+    <ul>
+      <li>Should extend <code>UDF</code> or <code>UDAF</code> in <code>org.apache.hadoop.hive.ql.exec</code> package.</li>
+      <li>Should extend <code>AbstractGenericUDAFResolver</code>, <code>GenericUDF</code>, or
+          <code>GenericUDTF</code> in <code>org.apache.hadoop.hive.ql.udf.generic</code> package.</li>
+      <li>Should extend <code>UserDefinedAggregateFunction</code> in <code>org.apache.spark.sql.expressions</code> package.</li>
+    </ul>
+  </dd>
+  <dt><code><em>resource_locations</em></code></dt>
+  <dd>
+    Specifies the list of resources that contain the implementation of the function
+    along with its dependencies. <br><br>
+    <b>Syntax:</b>
+      <code>
+        USING { { (JAR | FILE ) resource_uri } , ... }
+      </code>
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- 1. Create a simple UDF `SimpleUdf` that increments the supplied integral value by 10.
+--    import org.apache.hadoop.hive.ql.exec.UDF;
+--    public class SimpleUdf extends UDF {
+--      public int evaluate(int value) {
+--        return value + 10;
+--      }
+--    }
+-- 2. Compile and place it in a JAR file called `SimpleUdf.jar` in /tmp.
+
+-- Create a table called `test` and insert two rows.
+CREATE TABLE test(c1 INT);
+INSERT INTO test VALUES (1), (2);
+
+-- Create a permanent function called `simple_udf`. 
+CREATE FUNCTION simple_udf AS 'SimpleUdf'
+  USING JAR '/tmp/SimpleUdf.jar';
+
+-- Verify that the function is in the registry.
+SHOW USER FUNCTIONS;
+  +------------------+
+  |          function|
+  +------------------+
+  |default.simple_udf|
+  +------------------+
+
+-- Invoke the function. Every selected value should be incremented by 10.
+SELECT simple_udf(c1) AS function_return_value FROM t1;
+  +---------------------+                                                         
+  |function_return_value|
+  +---------------------+
+  |                   11|
+  |                   12|
+  +---------------------+
+
+-- Created a temporary function.
+CREATE TEMPORARY FUNCTION simple_temp_udf AS 'SimpleUdf' 
+  USING JAR '/tmp/SimpleUdf.jar';
+
+-- Verify that the newly created temporary function is in the registry.
+-- Please note that the temporary function does not have a qualified
+-- database associated with it.
+SHOW USER FUNCTIONS;
+  +------------------+
+  |          function|
+  +------------------+
+  |default.simple_udf|
+  |   simple_temp_udf|
+  +------------------+
+
+-- 1. Modify `SimpleUdf`'s implementation to add supplied integral value by 20.
+--    import org.apache.hadoop.hive.ql.exec.UDF;
+  
+--    public class SimpleUdfR extends UDF {
+--      public int evaluate(int value) {
+--        return value + 20;
+--      }
+--    }
+-- 2. Compile and place it in a jar file called `SimpleUdfR.jar` in /tmp.
+
+-- Replace the implementation of `simple_udf`
+CREATE OR REPLACE FUNCTION simple_udf AS 'SimpleUdfR'
+  USING JAR '/tmp/SimpleUdfR.jar';
+
+-- Invoke the function. Every selected value should be incremented by 20.
+SELECT simple_udf(c1) AS function_return_value FROM t1;
++---------------------+                                                         
+|function_return_value|
++---------------------+
+|                   21|
+|                   22|
++---------------------+
+
+{% endhighlight %}
+
+### Related statements
+- [SHOW FUNCTIONS](sql-ref-syntax-aux-show-functions.html)
+- [DESCRIBE FUNCTION](sql-ref-syntax-aux-describe-function.html)
+- [DROP FUNCTION](sql-ref-syntax-ddl-drop-function.html)
diff --git a/docs/sql-ref-syntax-ddl-create-table-datasource.md b/docs/sql-ref-syntax-ddl-create-table-datasource.md
new file mode 100644
index 0000000000000..9b15c0865b38a
--- /dev/null
+++ b/docs/sql-ref-syntax-ddl-create-table-datasource.md
@@ -0,0 +1,115 @@
+---
+layout: global
+title: CREATE DATASOURCE TABLE
+displayTitle: CREATE DATASOURCE TABLE
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+
+The `CREATE TABLE` statement defines a new table using a Data Source. 
+
+### Syntax
+{% highlight sql %}
+CREATE TABLE [ IF NOT EXISTS ] table_identifier
+  [ ( col_name1 col_type1 [ COMMENT col_comment1 ], ... ) ]
+  USING data_source
+  [ OPTIONS ( key1=val1, key2=val2, ... ) ]
+  [ PARTITIONED BY ( col_name1, col_name2, ... ) ]
+  [ CLUSTERED BY ( col_name3, col_name4, ... ) 
+    [ SORTED BY ( col_name [ ASC | DESC ], ... ) ] 
+    INTO num_buckets BUCKETS ]
+  [ LOCATION path ]
+  [ COMMENT table_comment ]
+  [ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ]
+  [ AS select_statement ]
+{% endhighlight %}
+
+### Parameters
+
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+<dl>
+  <dt><code><em>USING data_source</em></code></dt>
+  <dd>Data Source is the input format used to create the table. Data source can be CSV, TXT, ORC, JDBC, PARQUET, etc.</dd>
+</dl> 
+
+<dl>
+  <dt><code><em>PARTITIONED BY</em></code></dt>
+  <dd>Partitions are created on the table, based on the columns specified.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>CLUSTERED BY</em></code></dt>
+  <dd>
+	Partitions created on the table will be bucketed into fixed buckets based on the column specified for bucketing.<br><br>
+   	<b>NOTE:</b>Bucketing is an optimization technique that uses buckets (and bucketing columns) to determine data partitioning and avoid data shuffle.<br>
+	<dt><code><em>SORTED BY</em></code></dt>
+	<dd>Determines the order in which the data is stored in buckets. Default is Ascending order.</dd>
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>LOCATION</em></code></dt>
+  <dd>Path to the directory where table data is stored, which could be a path on distributed storage like HDFS, etc.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>COMMENT</em></code></dt>
+  <dd>Table comments are added.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>TBLPROPERTIES</em></code></dt>
+  <dd>Table properties that have to be set are specified, such as `created.by.user`, `owner`, etc.
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>AS select_statement</em></code></dt>
+  <dd>The table is populated using the data from the select statement.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+
+--Using data source
+CREATE TABLE Student (Id INT,name STRING ,age INT) USING CSV;
+
+--Using data from another table
+CREATE TABLE StudentInfo
+  AS SELECT * FROM Student;
+
+--Partitioned and bucketed
+CREATE TABLE Student (Id INT,name STRING ,age INT)
+  USING CSV
+  PARTITIONED BY (age)
+  CLUSTERED BY (Id) INTO 4 buckets;
+
+{% endhighlight %}
+
+### Related Statements
+* [CREATE TABLE USING HIVE FORMAT](sql-ref-syntax-ddl-create-table-hiveformat.html)
+* [CREATE TABLE LIKE](sql-ref-syntax-ddl-create-table-like.html)
diff --git a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
new file mode 100644
index 0000000000000..78b7d0581e985
--- /dev/null
+++ b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
@@ -0,0 +1,122 @@
+---
+layout: global
+title: CREATE HIVEFORMAT TABLE
+displayTitle: CREATE HIVEFORMAT TABLE
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+### Description
+
+The `CREATE TABLE` statement defines a new table using Hive format.
+
+### Syntax
+{% highlight sql %}
+CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier
+  [ ( col_name1[:] col_type1 [ COMMENT col_comment1 ], ... ) ]
+  [ COMMENT table_comment ]
+  [ PARTITIONED BY ( col_name2[:] col_type2 [ COMMENT col_comment2 ], ... ) 
+      | ( col_name1, col_name2, ... ) ]
+  [ ROW FORMAT row_format ]
+  [ STORED AS file_format ]
+  [ LOCATION path ]
+  [ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ]
+  [ AS select_statement ]
+
+{% endhighlight %}
+
+### Parameters
+
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>EXTERNAL</em></code></dt>
+  <dd>Table is defined using the path provided as LOCATION, does not use default location for this table.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>PARTITIONED BY</em></code></dt>
+  <dd>Partitions are created on the table, based on the columns specified.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>ROW FORMAT</em></code></dt>
+  <dd>SERDE is used to specify a custom SerDe or the DELIMITED clause in order to use the native SerDe.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>STORED AS</em></code></dt>
+    <dd>File format for table storage, could be TEXTFILE, ORC, PARQUET,etc.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>LOCATION</em></code></dt>
+  <dd>Path to the directory where table data is stored, Path to the directory where table data is stored, which could be a path on distributed storage like HDFS, etc.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>COMMENT</em></code></dt>
+  <dd>Table comments are added.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>TBLPROPERTIES</em></code></dt>
+  <dd>
+	Table properties that have to be set are specified, such as `created.by.user`, `owner`, etc.
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>AS select_statement</em></code></dt>
+  <dd>The table is populated using the data from the select statement.</dd>
+</dl>
+
+
+### Examples
+{% highlight sql %}
+
+--Using Comment and loading data from another table into the created table
+CREATE TABLE StudentInfo
+  COMMENT 'Table is created using existing data'
+  AS SELECT * FROM Student;
+
+--Partitioned table
+CREATE TABLE Student (Id INT,name STRING)
+  PARTITIONED BY (age INT)
+  TBLPROPERTIES ('owner'='xxxx');
+
+CREATE TABLE Student (Id INT,name STRING,age INT)
+  PARTITIONED BY (name,age);
+
+--Using Row Format and file format
+CREATE TABLE Student (Id INT,name STRING)
+  ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+  STORED AS TEXTFILE;
+
+{% endhighlight %}
+
+
+### Related Statements
+* [CREATE TABLE USING DATASOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
+* [CREATE TABLE LIKE](sql-ref-syntax-ddl-create-table-like.html)
diff --git a/docs/sql-ref-syntax-ddl-create-table-like.md b/docs/sql-ref-syntax-ddl-create-table-like.md
new file mode 100644
index 0000000000000..b7d7bdd1d9805
--- /dev/null
+++ b/docs/sql-ref-syntax-ddl-create-table-like.md
@@ -0,0 +1,97 @@
+---
+layout: global
+title: CREATE TABLE LIKE
+displayTitle: CREATE TABLE LIKE
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+### Description
+
+The `CREATE TABLE` statement defines a new table using the definition/metadata of an existing table or view.
+
+### Syntax
+{% highlight sql %}
+CREATE TABLE [IF NOT EXISTS] table_identifier LIKE source_table_identifier
+USING data_source
+[ ROW FORMAT row_format ]
+[ STORED AS file_format ]
+[ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ]
+[ LOCATION path ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>  [ TBLPROPERTIES ( key1=val1, key2=val2, ... ) ]
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>USING data_source</em></code></dt>
+  <dd>Data Source is the input format used to create the table. Data source can be CSV, TXT, ORC, JDBC, PARQUET, etc.</dd>
+</dl> 
+
+<dl>
+  <dt><code><em>ROW FORMAT</em></code></dt>
+  <dd>SERDE is used to specify a custom SerDe or the DELIMITED clause in order to use the native SerDe.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>STORED AS</em></code></dt>
+  <dd>File format for table storage, could be TEXTFILE, ORC, PARQUET,etc.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>TBLPROPERTIES</em></code></dt>
+  <dd>Table properties that have to be set are specified, such as `created.by.user`, `owner`, etc.
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>LOCATION</em></code></dt>
+  <dd>Path to the directory where table data is stored,Path to the directory where table data is stored, which could be a path on distributed storage like HDFS, etc. Location to create an external table.</dd>
+</dl>
+
+
+### Examples
+{% highlight sql %}
+
+--Create table using an exsisting table
+CREATE TABLE Student_Dupli like Student;
+
+--Create table like using a data source
+CREATE TABLE Student_Dupli like Student USING CSV;
+
+--Table is created as external table at the location specified
+CREATE TABLE Student_Dupli like Student location  '/root1/home';
+
+--Create table like using a rowformat
+CREATE TABLE Student_Dupli like Student
+  ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+  STORED AS TEXTFILE
+  TBLPROPERTIES ('owner'='xxxx');
+
+{% endhighlight %}
+
+### Related Statements
+* [CREATE TABLE USING DATASOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
+* [CREATE TABLE USING HIVE FORMAT](sql-ref-syntax-ddl-create-table-hiveformat.html)
+
diff --git a/docs/sql-ref-syntax-ddl-create-table.md b/docs/sql-ref-syntax-ddl-create-table.md
index 4ce8ef697c2c2..c0e58a41cf5cc 100644
--- a/docs/sql-ref-syntax-ddl-create-table.md
+++ b/docs/sql-ref-syntax-ddl-create-table.md
@@ -19,4 +19,14 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`CREATE TABLE` statement is used to define a table in an exsisting database. 
+
+The CREATE statements:
+* [CREATE TABLE USING DATASOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
+* [CREATE TABLE USING HIVE FORMAT](sql-ref-syntax-ddl-create-table-hiveformat.html)
+* [CREATE TABLE LIKE](sql-ref-syntax-ddl-create-table-like.html)
+
+### Related Statements
+- [ALTER TABLE](sql-ref-syntax-ddl-alter-table.html)
+- [DROP TABLE](sql-ref-syntax-ddl-drop-table.html)
diff --git a/docs/sql-ref-syntax-ddl-create-view.md b/docs/sql-ref-syntax-ddl-create-view.md
index eff7df91f59c5..67060d70f0790 100644
--- a/docs/sql-ref-syntax-ddl-create-view.md
+++ b/docs/sql-ref-syntax-ddl-create-view.md
@@ -19,4 +19,73 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+Views are based on the result-set of an `SQL` query. `CREATE VIEW` constructs
+a virtual table that has no physical data therefore other operations like
+`ALTER VIEW` and `DROP VIEW` only change metadata. 
+
+### Syntax
+{% highlight sql %}
+CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_identifier
+    create_view_clauses AS query
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>OR REPLACE</em></code></dt>
+  <dd>If a view of same name already exists, it will be replaced.</dd>
+</dl>
+<dl>
+  <dt><code><em>[ GLOBAL ] TEMPORARY</em></code></dt>
+  <dd>TEMPORARY views are session-scoped and will be dropped when session ends 
+      because it skips persisting the definition in the underlying metastore, if any.
+      GLOBAL TEMPORARY views are tied to a system preserved temporary database `global_temp`.</dd>
+</dl>
+<dl>
+  <dt><code><em>IF NOT EXISTS</em></code></dt>
+  <dd>Creates a view if it does not exists.</dd>
+</dl>
+<dl>
+  <dt><code><em>view_identifier</em></code></dt>
+  <dd>
+    Specifies a view name, which may be optionally qualified with a database name.<br><br>
+    <b> Syntax:</b>
+      <code>
+        [ database_name. ] view_name
+      </code>
+  </dd>
+</dl>
+<dl>
+  <dt><code><em>create_view_clauses</em></code></dt>
+  <dd>These clauses are optional and order insensitive. It can be of following formats.
+    <ul>
+      <li><code>[ ( column_name [ COMMENT column_comment ], ... ) ]</code> to specify column-level comments.</li>
+      <li><code>[ COMMENT view_comment ]</code> to specify view-level comments.</li>
+      <li><code>[ TBLPROPERTIES ( property_name = property_value, ... ) ]</code> to add metadata key-value pairs.</li>
+    </ul>  
+  </dd>
+</dl>
+<dl>
+  <dt><code><em>query</em></code></dt>
+  <dd>A <a href="sql-ref-syntax-qry-select.html">SELECT</a> statement that constructs the view from base tables or other views.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- Create or replace view for `experienced_employee` with comments.
+CREATE OR REPLACE VIEW experienced_employee
+    (ID COMMENT 'Unique identification number', Name) 
+    COMMENT 'View for experienced employees'
+    AS SELECT id, name FROM all_employee
+        WHERE working_years > 5;
+
+-- Create a global temporary view `subscribed_movies` if it does not exist.
+CREATE GLOBAL TEMPORARY VIEW IF NOT EXISTS subscribed_movies 
+    AS SELECT mo.member_id, mb.full_name, mo.movie_title
+        FROM movies AS mo INNER JOIN members AS mb 
+        ON mo.member_id = mb.id;
+{% endhighlight %}
+
+### Related Statements
+- [ALTER VIEW](sql-ref-syntax-ddl-alter-view.html)
+- [DROP VIEW](sql-ref-syntax-ddl-drop-view.html)
diff --git a/docs/sql-ref-syntax-ddl-drop-database.md b/docs/sql-ref-syntax-ddl-drop-database.md
index cd900a7e393db..0bdb98f2b129c 100644
--- a/docs/sql-ref-syntax-ddl-drop-database.md
+++ b/docs/sql-ref-syntax-ddl-drop-database.md
@@ -19,4 +19,62 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+
+Drop a database and delete the directory associated with the database from the file system. An 
+exception will be thrown if the database does not exist in the system. 
+
+### Syntax
+
+{% highlight sql %}
+DROP ( DATABASE | SCHEMA ) [ IF EXISTS ] dbname [ RESTRICT | CASCADE ]
+{% endhighlight %}
+
+
+### Parameters
+
+<dl>
+  <dt><code><em>DATABASE | SCHEMA</em></code></dt>
+  <dd>`DATABASE` and `SCHEMA` mean the same thing, either of them can be used.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>IF EXISTS</em></code></dt>
+  <dd>If specified, no exception is thrown when the database does not exist.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>RESTRICT</em></code></dt>
+  <dd>If specified, will restrict dropping a non-empty database and is enabled by default.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>CASCADE</em></code></dt>
+  <dd>If specified, will drop all the associated tables and functions.</dd>
+</dl>
+
+### Example
+{% highlight sql %}
+-- Create `inventory_db` Database
+CREATE DATABASE inventory_db COMMENT 'This database is used to maintain Inventory';
+
+-- Drop the database and it's tables
+DROP DATABASE inventory_db CASCADE;
++---------+
+| Result  |
++---------+
++---------+
+
+-- Drop the database using IF EXISTS
+DROP DATABASE IF EXISTS inventory_db CASCADE;
++---------+
+| Result  |
++---------+
++---------+
+
+{% endhighlight %}
+
+### Related statements
+- [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
+- [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html)
+- [SHOW DATABASES](sql-ref-syntax-aux-show-databases.html)
diff --git a/docs/sql-ref-syntax-ddl-drop-function.md b/docs/sql-ref-syntax-ddl-drop-function.md
index 9cf1f445e91f4..16d08d1ae8e99 100644
--- a/docs/sql-ref-syntax-ddl-drop-function.md
+++ b/docs/sql-ref-syntax-ddl-drop-function.md
@@ -19,4 +19,87 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+The `DROP FUNCTION` statement drops a temporary or user defined function (UDF). An exception will
+ be thrown if the function does not exist. 
+
+### Syntax
+{% highlight sql %}
+DROP [ TEMPORARY ] FUNCTION [ IF EXISTS ] [ db_name. ] function_name
+{% endhighlight %}
+
+
+### Parameters
+
+<dl>
+  <dt><code><em>function_name</em></code></dt>
+  <dd>The name of an existing function.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>TEMPORARY</em></code></dt>
+  <dd>Should be used to delete the `temporary` function.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>IF EXISTS</em></code></dt>
+  <dd>If specified, no exception is thrown when the function does not exist.</dd>
+</dl>
+
+### Example
+{% highlight sql %}
+-- Create a permanent function `test_avg`
+CREATE FUNCTION test_avg as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage';
+
+-- List user functions
+SHOW USER FUNCTIONS;
+  +-------------------+
+  |     function      |
+  +-------------------+
+  | default.test_avg  |
+  +-------------------+
+
+-- Create Temporary function `test_avg`
+CREATE TEMPORARY FUNCTION test_avg as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage';
+
+-- List user functions
+SHOW USER FUNCTIONS;
+  +-------------------+
+  |     function      |
+  +-------------------+
+  | default.test_avg  |
+  | test_avg          |
+  +-------------------+
+
+-- Drop Permanent function
+DROP FUNCTION test_avg;
+  +---------+
+  | Result  |
+  +---------+
+  +---------+
+
+-- Try to drop Permanent function which is not present
+DROP FUNCTION test_avg;
+  Error: Error running query:
+  org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException:
+  Function 'default.test_avg' not found in database 'default'; (state=,code=0)
+
+-- List the functions after dropping, it should list only temporary function
+SHOW USER FUNCTIONS;
+  +-----------+
+  | function  |
+  +-----------+
+  | test_avg  |
+  +-----------+
+  
+-- Drop Temporary function
+DROP TEMPORARY FUNCTION IF EXISTS test_avg;
+  +---------+
+  | Result  |
+  +---------+
+  +---------+
+{% endhighlight %}
+### Related statements
+- [CREATE FUNCTION](sql-ref-syntax-ddl-create-function.html)
+- [DESCRIBE FUNCTION](sql-ref-syntax-aux-describe-function.html)
+- [SHOW FUNCTION](sql-ref-syntax-aux-show-functions.html)
diff --git a/docs/sql-ref-syntax-ddl-drop-table.md b/docs/sql-ref-syntax-ddl-drop-table.md
index a036e66c3906d..d1d8534efe7a2 100644
--- a/docs/sql-ref-syntax-ddl-drop-table.md
+++ b/docs/sql-ref-syntax-ddl-drop-table.md
@@ -19,4 +19,69 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+
+`DROP TABLE` deletes the table and removes the directory associated with the table from the file system
+if the table is not `EXTERNAL` table. If the table is not present it throws an exception.
+
+In case of an external table, only the associated metadata information is removed from the metastore database.
+
+### Syntax
+{% highlight sql %}
+DROP TABLE [ IF EXISTS ] table_identifier
+{% endhighlight %}
+
+### Parameter
+<dl>
+  <dt><code><em>IF EXISTS</em></code></dt>
+  <dd>
+     If specified, no exception is thrown when the table does not exists.
+  </dd>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies the table name to be dropped. The table name may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+### Example
+{% highlight sql %}
+-- Assumes a table named `employeetable` exists.
+DROP TABLE employeetable;
++---------+--+
+| Result  |
++---------+--+
++---------+--+
+
+-- Assumes a table named `employeetable` exists in the `userdb` database
+DROP TABLE userdb.employeetable;
++---------+--+
+| Result  |
++---------+--+
++---------+--+
+
+-- Assumes a table named `employeetable` does not exists.
+-- Throws exception
+DROP TABLE employeetable;
+Error: org.apache.spark.sql.AnalysisException: Table or view not found: employeetable;
+(state=,code=0)
+
+-- Assumes a table named `employeetable` does not exists,Try with IF EXISTS
+-- this time it will not throw exception
+DROP TABLE IF EXISTS employeetable;
++---------+--+
+| Result  |
++---------+--+
++---------+--+
+
+{% endhighlight %}
+
+### Related Statements
+- [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
+- [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
+- [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html)
+
+
diff --git a/docs/sql-ref-syntax-ddl-drop-view.md b/docs/sql-ref-syntax-ddl-drop-view.md
index 9ad22500fd9e4..bf9e497b3a418 100644
--- a/docs/sql-ref-syntax-ddl-drop-view.md
+++ b/docs/sql-ref-syntax-ddl-drop-view.md
@@ -19,4 +19,64 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`DROP VIEW` removes the metadata associated with a specified view from the catalog.
+
+### Syntax
+{% highlight sql %}
+DROP VIEW [ IF EXISTS ] view_identifier
+{% endhighlight %}
+
+### Parameter
+<dl>
+  <dt><code><em>IF EXISTS</em></code></dt>
+  <dd>
+     If specified, no exception is thrown when the view does not exists.
+  </dd>
+  <dt><code><em>view_identifier</em></code></dt>
+  <dd>
+    Specifies the view name to be dropped. The view name may be optionally qualified with a database name.<br><br>
+    <b> Syntax:</b>
+      <code>
+        [ database_name. ] view_name
+      </code>
+  </dd>
+</dl>
+
+### Example
+{% highlight sql %}
+-- Assumes a view named `employeeView` exists.
+DROP VIEW employeeView;
++---------+--+
+| Result  |
++---------+--+
++---------+--+
+
+-- Assumes a view named `employeeView` exists in the `userdb` database
+DROP VIEW userdb.employeeView;
++---------+--+
+| Result  |
++---------+--+
++---------+--+
+
+-- Assumes a view named `employeeView` does not exists.
+-- Throws exception
+DROP VIEW employeeView;
+Error: org.apache.spark.sql.AnalysisException: Table or view not found: employeeView;
+(state=,code=0)
+
+-- Assumes a view named `employeeView` does not exists,Try with IF EXISTS
+-- this time it will not throw exception
+DROP VIEW IF EXISTS employeeView;
++---------+--+
+| Result  |
++---------+--+
++---------+--+
+
+{% endhighlight %}
+
+### Related Statements
+- [CREATE VIEW](sql-ref-syntax-ddl-create-view.html)
+- [ALTER VIEW](sql-ref-syntax-ddl-alter-view.html)
+- [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
+- [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html)
diff --git a/docs/sql-ref-syntax-ddl-repair-table.md b/docs/sql-ref-syntax-ddl-repair-table.md
index f21de558b8a07..daa6a46fca58f 100644
--- a/docs/sql-ref-syntax-ddl-repair-table.md
+++ b/docs/sql-ref-syntax-ddl-repair-table.md
@@ -19,4 +19,51 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`MSCK REPAIR TABLE` recovers all the partitions in the directory of a table and updates the Hive metastore. When creating a table using `PARTITIONED BY` clause, partitions are generated and registered in the Hive metastore. However, if the partitioned table is created from existing data, partitions are not registered automatically in the Hive metastore. User needs to run `MSCK REPAIR TABLE` to register the partitions. `MSCK REPAIR TABLE` on a non-existent table or a table without partitions throws an exception. Another way to recover partitions is to use `ALTER TABLE RECOVER PARTITIONS`.
+
+### Syntax
+{% highlight sql %}
+MSCK REPAIR TABLE table_identifier
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies the name of the table to be repaired. The table name may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+ -- create a partitioned table from existing data /tmp/namesAndAges.parquet
+ CREATE TABLE t1 (name STRING, age INT) USING parquet PARTITIONED BY (age)
+     location "/tmp/namesAndAges.parquet";
+
+ -- SELECT * FROM t1 does not return results
+ SELECT * FROM t1;
+
+ -- run MSCK REPAIR TABLE to recovers all the partitions
+ MSCK REPAIR TABLE t1;
+
+ -- SELECT * FROM t1 returns results
+ SELECT * FROM t1;
+
+     + -------------- + ------+
+     | name           | age   |
+     + -------------- + ------+
+     | Michael        | 20    |
+     + -------------- + ------+
+     | Justin         | 19    |
+     + -------------- + ----- +
+     | Andy           | 30    |
+     + -------------- + ----- +
+
+{% endhighlight %}
+### Related Statements
+ * [ALTER TABLE](sql-ref-syntax-ddl-alter-table.html)
diff --git a/docs/sql-ref-syntax-ddl-truncate-table.md b/docs/sql-ref-syntax-ddl-truncate-table.md
index 2704259391e94..3a0569e809d84 100644
--- a/docs/sql-ref-syntax-ddl-truncate-table.md
+++ b/docs/sql-ref-syntax-ddl-truncate-table.md
@@ -19,4 +19,80 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+The `TRUNCATE TABLE` statement removes all the rows from a table or partition(s). The table must not be a view 
+or an external/temporary table. In order to truncate multiple partitions at once, the user can specify the partitions 
+in `partition_spec`. If no `partition_spec` is specified it will remove all partitions in the table.
+
+### Syntax
+{% highlight sql %}
+TRUNCATE TABLE table_identifier [ partition_spec ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+<dl>
+  <dt><code><em>partition_spec</em></code></dt>
+  <dd>
+    An optional parameter that specifies a comma separated list of key and value pairs
+    for partitions.<br><br>
+    <b>Syntax:</b>
+      <code>
+        PARTITION ( partition_col_name  = partition_col_val [ , ... ] )
+      </code>
+  </dd>
+</dl>
+
+
+### Examples
+{% highlight sql %}
+
+--Create table Student with partition
+CREATE TABLE Student ( name String, rollno INT) PARTITIONED BY (age int);
+
+SELECT * from Student;
++-------+---------+------+--+
+| name  | rollno  | age  |
++-------+---------+------+--+
+| ABC   | 1       | 10   |
+| DEF   | 2       | 10   |
+| XYZ   | 3       | 12   |
++-------+---------+------+--+
+
+-- Removes all rows from the table in the partition specified
+TRUNCATE TABLE Student partition(age=10);
+
+--After truncate execution, records belonging to partition age=10 are removed
+SELECT * from Student;
++-------+---------+------+--+
+| name  | rollno  | age  |
++-------+---------+------+--+
+| XYZ   | 3       | 12   |
++-------+---------+------+--+
+
+-- Removes all rows from the table from all partitions
+TRUNCATE TABLE Student;
+
+SELECT * from Student;
++-------+---------+------+--+
+| name  | rollno  | age  |
++-------+---------+------+--+
++-------+---------+------+--+
+No rows selected 
+
+{% endhighlight %}
+
+
+### Related Statements
+- [DROP TABLE](sql-ref-syntax-ddl-drop-table.html)
+- [ALTER TABLE](sql-ref-syntax-ddl-alter-table.html)
+
diff --git a/docs/sql-ref-syntax-ddl.md b/docs/sql-ref-syntax-ddl.md
index eb0e73d00e848..954020a874455 100644
--- a/docs/sql-ref-syntax-ddl.md
+++ b/docs/sql-ref-syntax-ddl.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: Data Definition Statements
+displayTitle: Data Definition Statements
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,7 +19,20 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+
+Data Definition Statements are used to create or modify the structure of database objects in a database. Spark SQL supports the following Data Definition Statements:
+
+
+- [ALTER DATABASE](sql-ref-syntax-ddl-alter-database.html)
+- [ALTER TABLE](sql-ref-syntax-ddl-alter-table.html)
+- [ALTER VIEW](sql-ref-syntax-ddl-alter-view.html)
+- [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
+- [CREATE FUNCTION](sql-ref-syntax-ddl-create-function.html)
+- [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
+- [CREATE VIEW](sql-ref-syntax-ddl-create-view.html)
+- [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html)
+- [DROP FUNCTION](sql-ref-syntax-ddl-drop-function.html)
+- [DROP TABLE](sql-ref-syntax-ddl-drop-table.html)
+- [DROP VIEW](sql-ref-syntax-ddl-drop-view.html)
+- [TRUNCATE TABLE](sql-ref-syntax-ddl-truncate-table.html)
+- [REPAIR TABLE](sql-ref-syntax-ddl-repair-table.html)
diff --git a/docs/sql-ref-syntax-dml-insert-into.md b/docs/sql-ref-syntax-dml-insert-into.md
index 890e30bdbd3e1..715f43c9b80ea 100644
--- a/docs/sql-ref-syntax-dml-insert-into.md
+++ b/docs/sql-ref-syntax-dml-insert-into.md
@@ -25,25 +25,37 @@ The `INSERT INTO` statement inserts new rows into a table. The inserted rows can
 
 ### Syntax
 {% highlight sql %}
-INSERT INTO [ TABLE ] table_name
-    [ PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] ) ]
+INSERT INTO [ TABLE ] table_identifier [ partition_spec ]
     { { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] } | query }
 {% endhighlight %}
 
 ### Parameters
 <dl>
-  <dt><code><em>table_name</em></code></dt>
-  <dd>The name of an existing table.</dd>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
 </dl>
 
 <dl>
-  <dt><code><em>PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )</em></code></dt>
-  <dd>Specifies one or more partition column and value pairs. The partition value is optional.</dd>
+  <dt><code><em>partition_spec</em></code></dt>
+  <dd>
+    An optional parameter that specifies a comma separated list of key and value pairs
+    for partitions.<br><br>
+    <b>Syntax:</b>
+      <code>
+        PARTITION (partition_col_name  = partition_col_val [ , ... ])
+      </code>
+  </dd>
 </dl>
 
 <dl>
   <dt><code><em>VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ]</em></code></dt>
-  <dd>Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. A comma must be used to seperate each value in the clause. More than one set of values can be specified to insert multiple rows.</dd>
+  <dd>Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. A comma must be used to separate each value in the clause. More than one set of values can be specified to insert multiple rows.</dd>
 </dl>
 
 <dl>
@@ -203,4 +215,4 @@ INSERT INTO [ TABLE ] table_name
 ### Related Statements
   * [INSERT OVERWRITE statement](sql-ref-syntax-dml-insert-overwrite-table.html)
   * [INSERT OVERWRITE DIRECTORY statement](sql-ref-syntax-dml-insert-overwrite-directory.html)
-  * [INSERT OVERWRITE DIRECTORY with Hive format statement](sql-ref-syntax-dml-insert-overwrite-directory-hive.html)
\ No newline at end of file
+  * [INSERT OVERWRITE DIRECTORY with Hive format statement](sql-ref-syntax-dml-insert-overwrite-directory-hive.html)
diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-directory-hive.md b/docs/sql-ref-syntax-dml-insert-overwrite-directory-hive.md
index 784e631618e20..3b0475aef1015 100644
--- a/docs/sql-ref-syntax-dml-insert-overwrite-directory-hive.md
+++ b/docs/sql-ref-syntax-dml-insert-overwrite-directory-hive.md
@@ -55,7 +55,7 @@ INSERT OVERWRITE [ LOCAL ] DIRECTORY directory_path
 <dl>
   <dt><code><em>VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ]</em></code></dt>
   <dd>
-  Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. A comma must be used to seperate each value in the clause. More than one set of values can be specified to insert multiple rows.
+  Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. A comma must be used to separate each value in the clause. More than one set of values can be specified to insert multiple rows.
   </dd>
 </dl>
 
@@ -84,4 +84,4 @@ INSERT OVERWRITE [ LOCAL ] DIRECTORY directory_path
 ### Related Statements
   * [INSERT INTO statement](sql-ref-syntax-dml-insert-into.html)
   * [INSERT OVERWRITE statement](sql-ref-syntax-dml-insert-overwrite-table.html)
-  * [INSERT OVERWRITE DIRECTORY statement](sql-ref-syntax-dml-insert-overwrite-directory.html)
\ No newline at end of file
+  * [INSERT OVERWRITE DIRECTORY statement](sql-ref-syntax-dml-insert-overwrite-directory.html)
diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-directory.md b/docs/sql-ref-syntax-dml-insert-overwrite-directory.md
index 89d58e7abc231..6f19c62bc3d56 100644
--- a/docs/sql-ref-syntax-dml-insert-overwrite-directory.md
+++ b/docs/sql-ref-syntax-dml-insert-overwrite-directory.md
@@ -51,7 +51,7 @@ INSERT OVERWRITE [ LOCAL ] DIRECTORY [ directory_path ]
 <dl>
   <dt><code><em>VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ]</em></code></dt>
   <dd>
-  Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. A comma must be used to seperate each value in the clause. More than one set of values can be specified to insert multiple rows.
+  Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. A comma must be used to separate each value in the clause. More than one set of values can be specified to insert multiple rows.
   </dd>
 </dl>
 
@@ -82,4 +82,4 @@ INSERT OVERWRITE DIRECTORY
 ### Related Statements
   * [INSERT INTO statement](sql-ref-syntax-dml-insert-into.html)
   * [INSERT OVERWRITE statement](sql-ref-syntax-dml-insert-overwrite-table.html)
-  * [INSERT OVERWRITE DIRECTORY with Hive format statement](sql-ref-syntax-dml-insert-overwrite-directory-hive.html)
\ No newline at end of file
+  * [INSERT OVERWRITE DIRECTORY with Hive format statement](sql-ref-syntax-dml-insert-overwrite-directory-hive.html)
diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-table.md b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
index 97faded8f000b..1f32342642b2a 100644
--- a/docs/sql-ref-syntax-dml-insert-overwrite-table.md
+++ b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
@@ -25,25 +25,37 @@ The `INSERT OVERWRITE` statement overwrites the existing data in the table using
 
 ### Syntax
 {% highlight sql %}
-INSERT OVERWRITE [ TABLE ] table_name
-    [ PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] ) [ IF NOT EXISTS ] ]
+INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ]
     { { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] } | query }
 {% endhighlight %}
 
 ### Parameters
 <dl>
-  <dt><code><em>table_name</em></code></dt>
-  <dd>The name of an existing table.</dd>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
 </dl>
 
 <dl>
-  <dt><code><em>PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )</em></code></dt>
-  <dd>Specifies one or more partition column and value pairs. The partition value is optional.</dd>
+  <dt><code><em>partition_spec</em></code></dt>
+  <dd>
+    An optional parameter that specifies a comma separated list of key and value pairs
+    for partitions.<br><br>
+    <b>Syntax:</b>
+      <code>
+        PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )
+      </code>
+  </dd>
 </dl>
 
 <dl>
   <dt><code><em>VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ]</em></code></dt>
-  <dd>Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. A comma must be used to seperate each value in the clause. More than one set of values can be specified to insert multiple rows.</dd>
+  <dd>Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. A comma must be used to separate each value in the clause. More than one set of values can be specified to insert multiple rows.</dd>
 </dl>
 
 <dl>
diff --git a/docs/sql-ref-syntax-dml-load.md b/docs/sql-ref-syntax-dml-load.md
index fd25ba314e0b6..aadfd708e274e 100644
--- a/docs/sql-ref-syntax-dml-load.md
+++ b/docs/sql-ref-syntax-dml-load.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: LOAD
-displayTitle: LOAD
+title: LOAD DATA
+displayTitle: LOAD DATA
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,4 +19,113 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`LOAD DATA` statement loads the data into a table from the user specified directory or file. If a directory is specified then all the files from the directory are loaded. If a file is specified then only the single file is loaded. Additionally the `LOAD DATA` statement takes an optional partition specification. When a partition is specified, the data files (when input source is a directory) or the single file (when input source is a file) are loaded into the partition of the target table.
+
+### Syntax
+{% highlight sql %}
+LOAD DATA [ LOCAL ] INPATH path [ OVERWRITE ] INTO TABLE table_identifier [ partition_spec ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>path</em></code></dt>
+  <dd>Path of the file system. It can be either an absolute or a relative path.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>table_identifier</em></code></dt>
+  <dd>
+    Specifies a table name, which may be optionally qualified with a database name.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [ database_name. ] table_name
+      </code>
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>partition_spec</em></code></dt>
+  <dd>
+    An optional parameter that specifies a comma separated list of key and value pairs
+    for partitions.<br><br>
+    <b>Syntax:</b>
+      <code>
+        PARTITION ( partition_col_name = partition_col_val [ , ... ] )
+      </code>
+  </dd>
+</dl>
+
+<dl>
+  <dt><code><em>LOCAL</em></code></dt>
+  <dd>If specified, it causes the <code>INPATH</code> to be resolved against the local file system, instead of the default file system, which is typically a distributed storage.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>OVERWRITE</em></code></dt>
+  <dd>By default, new data is appended to the table. If <code>OVERWRITE</code> is used, the table is instead overwritten with new data.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+ -- Example without partition specification.
+ -- Assuming the students table has already been created and populated.
+ SELECT * FROM students;
+
+     + -------------- + ------------------------------ + -------------- +
+     | name           | address                        | student_id     |
+     + -------------- + ------------------------------ + -------------- +
+     | Amy Smith      | 123 Park Ave, San Jose         | 111111         |
+     + -------------- + ------------------------------ + -------------- +
+
+ CREATE TABLE test_load (name VARCHAR(64), address VARCHAR(64), student_id INT);
+
+ -- Assuming the students table is in '/user/hive/warehouse/'
+ LOAD DATA LOCAL INPATH '/user/hive/warehouse/students' OVERWRITE INTO TABLE test_load;
+
+ SELECT * FROM test_load;
+
+     + -------------- + ------------------------------ + -------------- +
+     | name           | address                        | student_id     |
+     + -------------- + ------------------------------ + -------------- +
+     | Amy Smith      | 123 Park Ave, San Jose         | 111111         |
+     + -------------- + ------------------------------ + -------------- +
+
+ -- Example with partition specification.
+ CREATE TABLE test_partition (c1 INT, c2 INT, c3 INT) USING HIVE PARTITIONED BY (c2, c3);
+
+ INSERT INTO test_partition PARTITION (c2 = 2, c3 = 3) VALUES (1);
+
+ INSERT INTO test_partition PARTITION (c2 = 5, c3 = 6) VALUES (4);
+
+ INSERT INTO test_partition PARTITION (c2 = 8, c3 = 9) VALUES (7);
+
+ SELECT * FROM test_partition;
+
+      + ------- + ------- + ----- +
+      | c1      | c2      | c3    |
+      + ------- + --------------- +
+      | 1       | 2       | 3     |
+      + ------- + ------- + ----- +
+      | 4       | 5       | 6     |
+      + ------- + ------- + ----- +
+      | 7       | 8       | 9     |
+      + ------- + ------- + ----- +
+
+ CREATE TABLE test_load_partition (c1 INT, c2 INT, c3 INT) USING HIVE PARTITIONED BY (c2, c3);
+
+ -- Assuming the test_partition table is in '/user/hive/warehouse/'
+ LOAD DATA LOCAL INPATH '/user/hive/warehouse/test_partition/c2=2/c3=3'
+     OVERWRITE INTO TABLE test_load_partition PARTITION (c2=2, c3=3);
+
+ SELECT * FROM test_load_partition;
+
+      + ------- + ------- + ----- +
+      | c1      | c2      | c3    |
+      + ------- + --------------- +
+      | 1       | 2       | 3     |
+      + ------- + ------- + ----- +
+
+
+{% endhighlight %}
+
diff --git a/docs/sql-ref-syntax-dml.md b/docs/sql-ref-syntax-dml.md
index 7fd537e0a0f5c..b5dd45f8962c9 100644
--- a/docs/sql-ref-syntax-dml.md
+++ b/docs/sql-ref-syntax-dml.md
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+Data Manipulation Statements are used to add, change, or delete data. Spark SQL supports the following Data Manipulation Statements:
+
+- [INSERT](sql-ref-syntax-dml-insert.html)
+- [LOAD](sql-ref-syntax-dml-load.html)
diff --git a/docs/sql-ref-syntax-qry-explain.md b/docs/sql-ref-syntax-qry-explain.md
index 501f2083f0f8c..7e18e16bc8ea6 100644
--- a/docs/sql-ref-syntax-qry-explain.md
+++ b/docs/sql-ref-syntax-qry-explain.md
@@ -19,4 +19,121 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+
+The `EXPLAIN` statement is used to provide logical/physical plans for an input statement. 
+By default, this clause provides information about a physical plan only.
+
+
+### Syntax
+{% highlight sql %}
+EXPLAIN [EXTENDED | CODEGEN | COST | FORMATTED] statement
+{% endhighlight %}
+
+### Parameters
+
+<dl>
+  <dt><code><em>EXTENDED</em></code></dt>
+  <dd>Generates parsed logical plan, analyzed logical plan, optimized logical plan and physical plan.
+   Parsed Logical plan is a unresolved plan that extracted from the query.
+   Analyzed logical plans transforms which translates unresolvedAttribute and unresolvedRelation into fully typed objects.
+   The optimized logical plan transforms through a set of optimization rules, resulting in the physical plan.
+  </dd>
+</dl> 
+
+<dl>
+  <dt><code><em>CODEGEN</em></code></dt>
+  <dd>Generates code for the statement, if any and a physical plan.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>COST</em></code></dt>
+  <dd>If plan node statistics are available, generates a logical plan and the statistics.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>FORMATTED</em></code></dt>
+  <dd>Generates two sections: a physical plan outline and node details.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>statement</em></code></dt>
+  <dd>
+    Specifies a SQL statement to be explained.
+  </dd>
+ </dl>
+
+### Examples
+{% highlight sql %}
+
+--Default Output
+
+EXPLAIN select k, sum(v) from values (1, 2), (1, 3) t(k, v) group by k;
++----------------------------------------------------+
+|                        plan                        |
++----------------------------------------------------+
+| == Physical Plan ==
+*(2) HashAggregate(keys=[k#33], functions=[sum(cast(v#34 as bigint))])
++- Exchange hashpartitioning(k#33, 200), true, [id=#59]
+   +- *(1) HashAggregate(keys=[k#33], functions=[partial_sum(cast(v#34 as bigint))])
+      +- *(1) LocalTableScan [k#33, v#34]
+|
+ +----------------------------------------------------
+
+-- Using Extended
+
+EXPLAIN EXTENDED select k, sum(v) from values (1, 2), (1, 3) t(k, v) group by k;
++----------------------------------------------------+
+|                        plan                        |
++----------------------------------------------------+
+| == Parsed Logical Plan ==
+'Aggregate ['k], ['k, unresolvedalias('sum('v), None)]
++- 'SubqueryAlias `t`
+   +- 'UnresolvedInlineTable [k, v], [List(1, 2), List(1, 3)]
+
+== Analyzed Logical Plan ==
+k: int, sum(v): bigint
+Aggregate [k#47], [k#47, sum(cast(v#48 as bigint)) AS sum(v)#50L]
++- SubqueryAlias `t`
+   +- LocalRelation [k#47, v#48]
+
+== Optimized Logical Plan ==
+Aggregate [k#47], [k#47, sum(cast(v#48 as bigint)) AS sum(v)#50L]
++- LocalRelation [k#47, v#48]
+
+== Physical Plan ==
+*(2) HashAggregate(keys=[k#47], functions=[sum(cast(v#48 as bigint))], output=[k#47, sum(v)#50L])
++- Exchange hashpartitioning(k#47, 200), true, [id=#79]
+   +- *(1) HashAggregate(keys=[k#47], functions=[partial_sum(cast(v#48 as bigint))], output=[k#47, sum#52L])
+      +- *(1) LocalTableScan [k#47, v#48]
+ |
++----------------------------------------------------+
+
+--Using Formatted
+
+EXPLAIN FORMATTED select k, sum(v) from values (1, 2), (1, 3) t(k, v) group by k;
++----------------------------------------------------+
+|                        plan                        |
++----------------------------------------------------+
+| == Physical Plan ==
+* HashAggregate (4)
++- Exchange (3)
+   +- * HashAggregate (2)
+      +- * LocalTableScan (1)
+
+
+(1) LocalTableScan [codegen id : 1]
+Output: [k#19, v#20]
+     
+(2) HashAggregate [codegen id : 1]
+Input: [k#19, v#20]
+     
+(3) Exchange 
+Input: [k#19, sum#24L]
+     
+(4) HashAggregate [codegen id : 2]
+Input: [k#19, sum#24L] 
+ |
++----------------------------------------------------+
+
+{% endhighlight %}
diff --git a/docs/sql-ref-syntax-qry-select-clusterby.md b/docs/sql-ref-syntax-qry-select-clusterby.md
new file mode 100644
index 0000000000000..bb60e8bfe25fc
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-clusterby.md
@@ -0,0 +1,99 @@
+---
+layout: global
+title: CLUSTER BY Clause
+displayTitle: CLUSTER BY Clause
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+The <code>CLUSTER BY</code> clause is used to first repartition the data based
+on the input expressions and then sort the data within each partition. This is
+semantically equivalent to performing a
+[DISTRIBUTE BY](sql-ref-syntax-qry-select-distribute-by.html) followed by a
+[SORT BY](sql-ref-syntax-qry-select-sortby.html). This clause only ensures that the
+resultant rows are sorted within each partition and does not guarantee a total order of output.
+
+### Syntax
+{% highlight sql %}
+CLUSTER BY { expression [ , ... ] }
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>expression</em></code></dt>
+  <dd>
+    Specifies combination of one or more values, operators and SQL functions that results in a value.
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE person (name STRING, age INT);
+INSERT INTO person VALUES 
+    ('Zen Hui', 25), 
+    ('Anil B', 18), 
+    ('Shone S', 16), 
+    ('Mike A', 25),
+    ('John A', 18), 
+    ('Jack N', 16);
+
+-- Reduce the number of shuffle partitions to 2 to illustrate the behavior of `CLUSTER BY`.
+-- It's easier to see the clustering and sorting behavior with less number of partitions.
+SET spark.sql.shuffle.partitions = 2;
+                        
+-- Select the rows with no ordering. Please note that without any sort directive, the results
+-- of the query is not deterministic. It's included here to show the difference in behavior 
+-- of a query when `CLUSTER BY` is not used vs when it's used. The query below produces rows
+-- where age column is not sorted.
+SELECT age, name FROM person;
+
+  +---+-------+
+  |age|name   |
+  +---+-------+
+  |16 |Shone S|
+  |25 |Zen Hui|
+  |16 |Jack N |
+  |25 |Mike A |
+  |18 |John A |
+  |18 |Anil B |
+  +---+-------+
+
+-- Produces rows clustered by age. Persons with same age are clustered together.
+-- In the query below, persons with age 18 and 25 are in first partition and the
+-- persons with age 16 are in the second partition. The rows are sorted based
+-- on age within each partition.
+SELECT age, name FROM person CLUSTER BY age;
+
+  +---+-------+
+  |age|name   |
+  +---+-------+
+  |18 |John A |
+  |18 |Anil B |
+  |25 |Zen Hui|
+  |25 |Mike A |
+  |16 |Shone S|
+  |16 |Jack N |
+  +---+-------+
+{% endhighlight %}
+
+### Related Clauses
+- [SELECT Main](sql-ref-syntax-qry-select.html)
+- [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+- [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+- [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+- [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+- [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+- [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+- [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
diff --git a/docs/sql-ref-syntax-qry-select-distribute-by.md b/docs/sql-ref-syntax-qry-select-distribute-by.md
new file mode 100644
index 0000000000000..5ade9c16e4a0f
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-distribute-by.md
@@ -0,0 +1,94 @@
+---
+layout: global
+title: DISTRIBUTE BY Clause
+displayTitle: DISTRIBUTE BY Clause
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+The <code>DISTRIBUTE BY</code> clause is used to repartition the data based
+on the input expressions. Unlike the [CLUSTER BY](sql-ref-syntax-qry-select-clusterby.html)
+clause, this does not sort the data within each partition. 
+
+### Syntax
+{% highlight sql %}
+DISTRIBUTE BY { expression [ , ... ] }
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>expression</em></code></dt>
+  <dd>
+    Specifies combination of one or more values, operators and SQL functions that results in a value.
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE person (name STRING, age INT);
+INSERT INTO person VALUES
+    ('Zen Hui', 25), 
+    ('Anil B', 18), 
+    ('Shone S', 16), 
+    ('Mike A', 25),
+    ('John A', 18), 
+    ('Jack N', 16);
+
+-- Reduce the number of shuffle partitions to 2 to illustrate the behavior of `DISTRIBUTE BY`.
+-- It's easier to see the clustering and sorting behavior with less number of partitions.
+SET spark.sql.shuffle.partitions = 2;
+                        
+-- Select the rows with no ordering. Please note that without any sort directive, the result
+-- of the query is not deterministic. It's included here to just contrast it with the 
+-- behavior of `DISTRIBUTE BY`. The query below produces rows where age columns are not
+-- clustered together.
+SELECT age, name FROM person;
+
+  +---+-------+
+  |age|name   |
+  +---+-------+
+  |16 |Shone S|
+  |25 |Zen Hui|
+  |16 |Jack N |
+  |25 |Mike A |
+  |18 |John A |
+  |18 |Anil B |
+  +---+-------+
+
+-- Produces rows clustered by age. Persons with same age are clustered together.
+-- Unlike `CLUSTER BY` clause, the rows are not sorted within a partition.
+SELECT age, name FROM person DISTRIBUTE BY age;
+
+  +---+-------+
+  |age|name   |
+  +---+-------+
+  |25 |Zen Hui|
+  |25 |Mike A |
+  |18 |John A |
+  |18 |Anil B |
+  |16 |Shone S|
+  |16 |Jack N |
+  +---+-------+
+{% endhighlight %}
+
+### Related Clauses
+- [SELECT Main](sql-ref-syntax-qry-select.html)
+- [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+- [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+- [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+- [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+- [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+- [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+- [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
diff --git a/docs/sql-ref-syntax-qry-select-groupby.md b/docs/sql-ref-syntax-qry-select-groupby.md
index 8ba7d155f3716..ab1c5d6eb5ee5 100644
--- a/docs/sql-ref-syntax-qry-select-groupby.md
+++ b/docs/sql-ref-syntax-qry-select-groupby.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: GROUPBY Clause
-displayTitle: GROUPBY Clause
+title: GROUP BY Clause
+displayTitle: GROUP BY Clause
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -18,5 +18,219 @@ license: |
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
+The <code>GROUP BY</code> clause is used to group the rows based on a set of specified grouping expressions and compute aggregations on 
+the group of rows based on one or more specified aggregate functions. Spark also supports advanced aggregations to do multiple 
+aggregations for the same input record set via `GROUPING SETS`, `CUBE`, `ROLLUP` clauses.
 
-**This page is under construction**
+### Syntax
+{% highlight sql %}
+GROUP BY [ GROUPING SETS grouping_sets ] group_expression [ , group_expression [ , ... ] ]
+    [ ( WITH ROLLUP | WITH CUBE | GROUPING SETS grouping_sets ) ) ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>GROUPING SETS</em></code></dt>
+  <dd>
+    Groups the rows for each subset of the expressions specified in the grouping sets. For example, 
+    <code>GROUP BY GROUPING SETS (warehouse, product)</code> is semantically equivalent
+    to union of results of <code>GROUP BY warehouse</code> and <code>GROUP BY product</code>. This clause
+    is shorthand for a <code>UNION ALL</code> where each leg of the <code>UNION ALL</code> 
+    operator performs aggregation of subset of the columns specified in the <code>GROUPING SETS</code> clause.
+  </dd>
+  <dt><code><em>grouping_sets</em></code></dt>
+  <dd>
+    Specifies one of more groupings based on which the <code>GROUP BY</code> clause performs aggregations. A grouping
+    set is specified by a list of comma-separated expressions in parentheses.<br><br>
+    <b>Syntax:</b>
+      <code>
+        ( () | ( expression [ , ... ] ) )
+      </code>
+  </dd>
+  <dt><code><em>grouping_expression</em></code></dt>
+  <dd>
+    Specifies the critieria based on which the rows are grouped together. The grouping of rows is performed based on
+    result values of the grouping expressions. A grouping expression may be a column alias, a column position
+    or an expression.
+  </dd>
+  <dt><code><em>ROLLUP</em></code></dt>
+  <dd>
+    Specifies multiple levels of aggregations in a single statement. This clause is used to compute aggregations 
+    based on multiple grouping sets. <code>ROLLUP</code> is shorthand for <code>GROUPING SETS</code>. For example,
+    GROUP BY warehouse, product  WITH ROLLUP is equivalent to GROUP BY <code>warehouse, product</code> GROUPING SETS
+    <code> ((warehouse, product), (warehouse), ())</code>.
+    The N elements of a <code>ROLLUP</code> specification results in N+1 <code>GROUPING SETS</code>.
+  </dd>
+  <dt><code><em>CUBE</em></code></dt>
+  <dd>
+    <code>CUBE</code> clause is used to perform aggregations based on combination of grouping columns specified in the 
+    <code>GROUP BY</code> clause. For example, <code>GROUP BY warehouse, product  WITH CUBE</code> is equivalent 
+    to GROUP BY <code>warehouse, product</code> GROUPING SETS <code>((warehouse, product), (warehouse), (product), ())</code>.
+    The N elements of a <code>CUBE</code> specification results in 2^N <code>GROUPING SETS</code>.
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE dealer (id INT, city STRING, car_model STRING, quantity INT);
+INSERT INTO dealer VALUES 
+    (100, 'Fremont', 'Honda Civic', 10),
+    (100, 'Fremont', 'Honda Accord', 15),
+    (100, 'Fremont', 'Honda CRV', 7),
+    (200, 'Dublin', 'Honda Civic', 20),
+    (200, 'Dublin', 'Honda Accord', 10),
+    (200, 'Dublin', 'Honda CRV', 3),
+    (300, 'San Jose', 'Honda Civic', 5),
+    (300, 'San Jose', 'Honda Accord', 8);
+
+-- Sum of quantity per dealership. Group by `id`.
+SELECT id, sum(quantity) FROM dealer GROUP BY id ORDER BY id;
+
+  +---+-------------+
+  |id |sum(quantity)|
+  +---+-------------+
+  |100|32           |
+  |200|33           |
+  |300|13           |
+  +---+-------------+
+
+-- Use column position in GROUP by clause.
+SELECT id, sum(quantity) FROM dealer GROUP BY 1 ORDER BY 1;
+
+  +---+-------------+
+  |id |sum(quantity)|
+  +---+-------------+
+  |100|32           |
+  |200|33           |
+  |300|13           |
+  +---+-------------+
+
+-- Multiple aggregations.
+-- 1. Sum of quantity per dealership.
+-- 2. Max quantity per dealership. 
+SELECT id, sum(quantity) AS sum, max(quantity) AS max FROM dealer GROUP BY id ORDER BY id;
+
+  +---+---+---+
+  |id |sum|max|
+  +---+---+---+
+  |100|32 |15 |
+  |200|33 |20 |
+  |300|13 |8  |
+  +---+---+---+
+
+-- Aggregations using multiple sets of grouping columns in a single statement.
+-- Following performs aggregations based on four sets of grouping columns.
+-- 1. city, car_model
+-- 2. city
+-- 3. car_model
+-- 4. Empty grouping set. Returns quantities for all city and car models.
+SELECT city, car_model, sum(quantity) AS sum FROM dealer
+   GROUP BY GROUPING SETS ((city, car_model), (city), (car_model), ())
+   ORDER BY city;
+
+  +--------+------------+---+
+  |city    |car_model   |sum|
+  +--------+------------+---+
+  |null    |null        |78 |
+  |null    |Honda Accord|33 |
+  |null    |Honda CRV   |10 |
+  |null    |Honda Civic |35 |
+  |Dublin  |null        |33 |
+  |Dublin  |Honda Accord|10 |
+  |Dublin  |Honda CRV   |3  |
+  |Dublin  |Honda Civic |20 |
+  |Fremont |null        |32 |
+  |Fremont |Honda Accord|15 |
+  |Fremont |Honda CRV   |7  |
+  |Fremont |Honda Civic |10 |
+  |San Jose|null        |13 |
+  |San Jose|Honda Accord|8  |
+  |San Jose|Honda Civic |5  |
+  +--------+------------+---+
+
+-- Alternate syntax for `GROUPING SETS` in which both `GROUP BY` and `GROUPING SETS` 
+-- specifications are present.
+SELECT city, car_model, sum(quantity) AS sum FROM dealer
+   GROUP BY city, car_model GROUPING SETS ((city, car_model), (city), (car_model), ())
+   ORDER BY city, car_model;
+
+  +--------+------------+---+
+  |city    |car_model   |sum|
+  +--------+------------+---+
+  |null    |null        |78 |
+  |null    |Honda Accord|33 |
+  |null    |Honda CRV   |10 |
+  |null    |Honda Civic |35 |
+  |Dublin  |null        |33 |
+  |Dublin  |Honda Accord|10 |
+  |Dublin  |Honda CRV   |3  |
+  |Dublin  |Honda Civic |20 |
+  |Fremont |null        |32 |
+  |Fremont |Honda Accord|15 |
+  |Fremont |Honda CRV   |7  |
+  |Fremont |Honda Civic |10 |
+  |San Jose|null        |13 |
+  |San Jose|Honda Accord|8  |
+  |San Jose|Honda Civic |5  |
+  +--------+------------+---+
+
+-- Group by processing with `ROLLUP` clause.
+-- Equivalent GROUP BY GROUPING SETS ((city, car_model), (city), ())
+SELECT city, car_model, sum(quantity) AS sum FROM dealer
+   GROUP BY city, car_model WITH ROLLUP
+   ORDER BY city, car_model;
+
+  +--------+------------+---+
+  |city    |car_model   |sum|
+  +--------+------------+---+
+  |null    |null        |78 |
+  |Dublin  |null        |33 |
+  |Dublin  |Honda Accord|10 |
+  |Dublin  |Honda CRV   |3  |
+  |Dublin  |Honda Civic |20 |
+  |Fremont |null        |32 |
+  |Fremont |Honda Accord|15 |
+  |Fremont |Honda CRV   |7  |
+  |Fremont |Honda Civic |10 |
+  |San Jose|null        |13 |
+  |San Jose|Honda Accord|8  |
+  |San Jose|Honda Civic |5  |
+  +--------+------------+---+
+
+-- Group by processing with `CUBE` clause.
+-- Equivalent GROUP BY GROUPING SETS ((city, car_model), (city), (car_model), ())
+SELECT city, car_model, sum(quantity) AS sum FROM dealer
+   GROUP BY city, car_model WITH CUBE 
+   ORDER BY city, car_model;
+
+  +--------+------------+---+
+  |city    |car_model   |sum|
+  +--------+------------+---+
+  |null    |null        |78 |
+  |null    |Honda Accord|33 |
+  |null    |Honda CRV   |10 |
+  |null    |Honda Civic |35 |
+  |Dublin  |null        |33 |
+  |Dublin  |Honda Accord|10 |
+  |Dublin  |Honda CRV   |3  |
+  |Dublin  |Honda Civic |20 |
+  |Fremont |null        |32 |
+  |Fremont |Honda Accord|15 |
+  |Fremont |Honda CRV   |7  |
+  |Fremont |Honda Civic |10 |
+  |San Jose|null        |13 |
+  |San Jose|Honda Accord|8  |
+  |San Jose|Honda Civic |5  |
+  +--------+------------+---+
+
+{% endhighlight %}
+
+### Related clauses
+- [SELECT Main](sql-ref-syntax-qry-select.html)
+- [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+- [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+- [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+- [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+- [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+- [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+- [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
diff --git a/docs/sql-ref-syntax-qry-select-having.md b/docs/sql-ref-syntax-qry-select-having.md
index ca92eb0d4daf0..94d9be649f106 100644
--- a/docs/sql-ref-syntax-qry-select-having.md
+++ b/docs/sql-ref-syntax-qry-select-having.md
@@ -18,5 +18,113 @@ license: |
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
+The <code>HAVING</code> clause is used to filter the results produced by
+<code>GROUP BY</code> based on the specified condition. It is often used
+in conjunction with a [GROUP BY](sql-ref-syntax-qry-select-groupby.html)
+clause.
 
-**This page is under construction**
+### Syntax
+{% highlight sql %}
+HAVING boolean_expression
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>boolean_expression</em></code></dt>
+  <dd>
+    Specifies any expression that evaluates to a result type <code>boolean</code>. Two or
+    more expressions may be combined together using the logical 
+    operators ( <code>AND</code>, <code>OR</code> ).<br><br>
+
+    <b>Note</b><br>
+    The expressions specified in the <code>HAVING</code> clause can only refer to:
+     <ol>
+      <li>Constants</li>
+      <li>Expressions that appear in GROUP BY</li>
+      <li>Aggregate functions</li>
+    </ol>
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE dealer (id INT, city STRING, car_model STRING, quantity INT);
+INSERT INTO dealer VALUES
+    (100, 'Fremont', 'Honda Civic', 10),
+    (100, 'Fremont', 'Honda Accord', 15),
+    (100, 'Fremont', 'Honda CRV', 7),
+    (200, 'Dublin', 'Honda Civic', 20),
+    (200, 'Dublin', 'Honda Accord', 10),
+    (200, 'Dublin', 'Honda CRV', 3),
+    (300, 'San Jose', 'Honda Civic', 5),
+    (300, 'San Jose', 'Honda Accord', 8);
+
+-- `HAVING` clause referring to column in `GROUP BY`.
+SELECT city, sum(quantity) AS sum FROM dealer GROUP BY city HAVING city = 'Fremont';
+
+  +-------+---+
+  |city   |sum|
+  +-------+---+
+  |Fremont|32 |
+  +-------+---+
+
+-- `HAVING` clause referring to aggregate function.
+SELECT city, sum(quantity) AS sum FROM dealer GROUP BY city HAVING sum(quantity) > 15;
+ 
+  +-------+---+
+  |   city|sum|
+  +-------+---+
+  | Dublin| 33|
+  |Fremont| 32|
+  +-------+---+
+
+-- `HAVING` clause referring to aggregate function by its alias.
+SELECT city, sum(quantity) AS sum FROM dealer GROUP BY city HAVING sum > 15;
+
+  +-------+---+
+  |   city|sum|
+  +-------+---+
+  | Dublin| 33|
+  |Fremont| 32|
+  +-------+---+
+
+-- `HAVING` clause referring to a different aggregate function than what is present in
+-- `SELECT` list.
+SELECT city, sum(quantity) AS sum FROM dealer GROUP BY city HAVING max(quantity) > 15;
+
+  +------+---+
+  |city  |sum|
+  +------+---+
+  |Dublin|33 |
+  +------+---+
+
+-- `HAVING` clause referring to constant expression.
+SELECT city, sum(quantity) AS sum FROM dealer GROUP BY city HAVING 1 > 0 ORDER BY city;
+  
+  +--------+---+
+  |    city|sum|
+  +--------+---+
+  |  Dublin| 33|
+  | Fremont| 32|
+  |San Jose| 13|
+  +--------+---+
+
+-- `HAVING` clause without a `GROUP BY` clause.
+SELECT sum(quantity) AS sum FROM dealer HAVING sum(quantity) > 10;
+  +---+
+  |sum|
+  +---+
+  | 78|
+  +---+
+ 
+{% endhighlight %}
+
+### Related Clauses
+- [SELECT Main](sql-ref-syntax-qry-select.html)
+- [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+- [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+- [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+- [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+- [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+- [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+- [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
diff --git a/docs/sql-ref-syntax-qry-select-limit.md b/docs/sql-ref-syntax-qry-select-limit.md
index d7fac3bb98929..2b9999cc40783 100644
--- a/docs/sql-ref-syntax-qry-select-limit.md
+++ b/docs/sql-ref-syntax-qry-select-limit.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: LIMIT operator
-displayTitle: LIMIT operator
+title: LIMIT Clause
+displayTitle: LIMIT Clause
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -18,5 +18,84 @@ license: |
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
+The <code>LIMIT</code> clause is used to constrain the number of rows returned by
+the [SELECT](sql-ref-syntax-qry-select.html) statement. In general, this clause
+is used in conjuction with [ORDER BY](sql-ref-syntax-qry-select-orderby.html) to
+ensure that the results are deterministic.
 
-**This page is under construction**
+### Syntax
+{% highlight sql %}
+LIMIT { ALL | integer_expression }
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>ALL</em></code></dt>
+  <dd>
+    If specified, the query returns all the rows. In other words, no limit is applied if this
+    option is specified.
+  </dd>
+  <dt><code><em>integer_expression</em></code></dt>
+  <dd>
+    Specifies an expression that returns an integer. 
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE person (name STRING, age INT);
+INSERT INTO person VALUES
+    ('Zen Hui', 25), 
+    ('Anil B', 18), 
+    ('Shone S', 16), 
+    ('Mike A', 25),
+    ('John A', 18), 
+    ('Jack N', 16);
+                        
+-- Select the first two rows.
+SELECT name, age FROM person ORDER BY name LIMIT 2;
+
+  +------+---+
+  |name  |age|
+  +------+---+
+  |Anil B|18 |
+  |Jack N|16 |
+  +------+---+
+
+-- Specifying ALL option on LIMIT returns all the rows.
+SELECT name, age FROM person ORDER BY name LIMIT ALL;
+
+  +-------+---+
+  |name   |age|
+  +-------+---+
+  |Anil B |18 |
+  |Jack N |16 |
+  |John A |18 |
+  |Mike A |25 |
+  |Shone S|16 |
+  |Zen Hui|25 |
+  +-------+---+
+
+-- A function expression as an input to limit.
+SELECT name, age FROM person ORDER BY name LIMIT length('SPARK')
+
+  +-------+---+
+  |   name|age|
+  +-------+---+
+  | Anil B| 18|
+  | Jack N| 16|
+  | John A| 18|
+  | Mike A| 25|
+  |Shone S| 16|
+  +-------+---+
+{% endhighlight %}
+
+### Related Clauses
+- [SELECT Main](sql-ref-syntax-qry-select.html)
+- [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+- [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+- [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+- [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+- [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+- [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+- [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
diff --git a/docs/sql-ref-syntax-qry-select-orderby.md b/docs/sql-ref-syntax-qry-select-orderby.md
index 1f7c031e4aa88..c4b4ced0b7e73 100644
--- a/docs/sql-ref-syntax-qry-select-orderby.md
+++ b/docs/sql-ref-syntax-qry-select-orderby.md
@@ -18,5 +18,136 @@ license: |
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
+The <code>ORDER BY</code> clause is used to return the result rows in a sorted manner
+in the user specified order. Unlike the [SORT BY](sql-ref-syntax-qry-select-sortby.html)
+clause, this clause guarantees a total order in the output. 
 
-**This page is under construction**
+### Syntax
+{% highlight sql %}
+ORDER BY { expression [ sort_direction | nulls_sort_oder ] [ , ... ] }
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>ORDER BY</em></code></dt>
+  <dd>
+    Specifies a comma-separated list of expressions along with optional parameters <code>sort_direction</code>
+    and <code>nulls_sort_order</code> which are used to sort the rows.
+  </dd>
+  <dt><code><em>sort_direction</em></code></dt>
+  <dd>
+    Optionally specifies whether to sort the rows in ascending or descending
+    order. The valid values for the sort direction are <code>ASC</code> for ascending
+    and <code>DESC</code> for descending. If sort direction is not explicitly specified, then by default
+    rows are sorted ascending. <br><br>
+    <b>Syntax:</b>
+    <code>
+       [ ASC | DESC ]
+    </code>
+  </dd>
+  <dt><code><em>nulls_sort_order</em></code></dt>
+  <dd>
+    Optionally specifies whether NULL values are returned before/after non-NULL values, based on the 
+    sort direction. In Spark, NULL values are considered to be lower than any non-NULL values by default.
+    Therefore the ordering of NULL values depend on the sort direction. If <code>null_sort_order</code> is
+    not specified, then NULLs sort first if sort order is <code>ASC</code> and NULLS sort last if 
+    sort order is <code>DESC</code>.<br><br>
+    <ol>
+      <li> If <code>NULLS FIRST</code> (the default) is specified, then NULL values are returned first 
+           regardless of the sort order.</li>
+      <li>If <code>NULLS LAST</code> is specified, then NULL values are returned last regardless of
+           the sort order. </li>
+    </ol><br>
+    <b>Syntax:</b>
+    <code>
+       [ NULLS { FIRST | LAST } ] 
+    </code>
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE person (id INT, name STRING, age INT);
+INSERT INTO person VALUES
+    (100, 'John', 30),
+    (200, 'Mary', NULL),
+    (300, 'Mike', 80),
+    (400, 'Jerry', NULL),
+    (500, 'Dan',  50);
+
+-- Sort rows by age. By default rows are sorted in ascending manner.
+SELECT name, age FROM person ORDER BY age;
+
+  +-----+----+
+  |name |age |
+  +-----+----+
+  |Jerry|null|
+  |Mary |null|
+  |John |30  |
+  |Dan  |50  |
+  |Mike |80  |
+  +-----+----+
+
+-- Sort rows in ascending manner keeping null values to be last.
+SELECT name, age FROM person ORDER BY age NULLS LAST;
+
+  +-----+----+
+  |name |age |
+  +-----+----+
+  |John |30  |
+  |Dan  |50  |
+  |Mike |80  |
+  |Mary |null|
+  |Jerry|null|
+  +-----+----+
+
+-- Sort rows by age in descending manner.
+SELECT name, age FROM person ORDER BY age DESC;
+ 
+  +-----+----+
+  |name |age |
+  +-----+----+
+  |Mike |80  |
+  |Dan  |50  |
+  |John |30  |
+  |Jerry|null|
+  |Mary |null|
+  +-----+----+
+
+-- Sort rows in ascending manner keeping null values to be first.
+SELECT name, age FROM person ORDER BY age DESC NULLS FIRST;
+
+  +-----+----+
+  |name |age |
+  +-----+----+
+  |Jerry|null|
+  |Mary |null|
+  |Mike |80  |
+  |Dan  |50  |
+  |John |30  |
+  +-----+----+
+
+-- Sort rows based on more than one column with each column having different
+-- sort direction.
+SELECT * FROM person ORDER BY name ASC, age DESC;
+
+  +---+-----+----+
+  |id |name |age |
+  +---+-----+----+
+  |500|Dan  |50  |
+  |400|Jerry|null|
+  |100|John |30  |
+  |200|Mary |null|
+  |300|Mike |80  |
+  +---+-----+----+
+{% endhighlight %}
+
+### Related Clauses
+- [SELECT Main](sql-ref-syntax-qry-select.html)
+- [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+- [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+- [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+- [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+- [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+- [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+- [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
diff --git a/docs/sql-ref-syntax-qry-select-sortby.md b/docs/sql-ref-syntax-qry-select-sortby.md
new file mode 100644
index 0000000000000..1818a69f4a34b
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-sortby.md
@@ -0,0 +1,187 @@
+---
+layout: global
+title: SORT BY Clause
+displayTitle: SORT BY Clause
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+The <code>SORT BY</code> clause is used to return the result rows sorted
+within each partition in the user specified order. When there is more than one partition
+<code>SORT BY</code> may return result that is partially ordered. This is different
+than [ORDER BY](sql-ref-syntax-qry-select-orderby.html) clause which guarantees a
+total order of the output.
+
+### Syntax
+{% highlight sql %}
+SORT BY { expression [ sort_direction | nulls_sort_order ] [ , ... ] }
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>SORT BY</em></code></dt>
+  <dd>
+    Specifies a comma-separated list of expressions along with optional parameters <code>sort_direction</code>
+    and <code>nulls_sort_order</code> which are used to sort the rows within each partition.
+  </dd>
+  <dt><code><em>sort_direction</em></code></dt>
+  <dd>
+    Optionally specifies whether to sort the rows in ascending or descending
+    order. The valid values for the sort direction are <code>ASC</code> for ascending
+    and <code>DESC</code> for descending. If sort direction is not explicitly specified, then by default
+    rows are sorted ascending. <br><br>
+    <b>Syntax:</b>
+    <code>
+       [ ASC | DESC ]
+    </code>
+  </dd>
+  <dt><code><em>nulls_sort_order</em></code></dt>
+  <dd>
+    Optionally specifies whether NULL values are returned before/after non-NULL values, based on the 
+    sort direction. In Spark, NULL values are considered to be lower than any non-NULL values by default.
+    Therefore the ordering of NULL values depend on the sort direction. If <code>null_sort_order</code> is
+    not specified, then NULLs sort first if sort order is <code>ASC</code> and NULLS sort last if 
+    sort order is <code>DESC</code>.<br><br>
+    <ol>
+      <li> If <code>NULLS FIRST</code> (the default) is specified, then NULL values are returned first 
+           regardless of the sort order.</li>
+      <li>If <code>NULLS LAST</code> is specified, then NULL values are returned last regardless of
+           the sort order. </li>
+    </ol><br>
+    <b>Syntax:</b>
+    <code>
+       [ NULLS { FIRST | LAST } ] 
+    </code>
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE person (zip_code INT, name STRING, age INT);
+INSERT INTO person VALUES
+    (94588, 'Zen Hui', 50), 
+    (94588, 'Dan Li', 18), 
+    (94588, 'Anil K', 27),
+    (94588, 'John V', NULL),
+    (94511, 'David K', 42),
+    (94511, 'Aryan B.', 18),
+    (94511, 'Lalit B.', NULL);
+
+-- Use `REPARTITION` hint to partition the data by `zip_code` to 
+-- examine the `SORT BY` behavior. This is used in rest of the
+-- examples.
+
+-- Sort rows by `name` within each partition in ascending manner
+SELECT /*+ REPARTITION(zip_code) */ name, age, zip_code FROM person SORT BY name;
+
+  +--------+----+--------+
+  |name    |age |zip_code|
+  +--------+----+--------+
+  |Anil K  |27  |94588   |
+  |Dan Li  |18  |94588   |
+  |John V  |null|94588   |
+  |Zen Hui |50  |94588   |
+  |Aryan B.|18  |94511   |
+  |David K |42  |94511   |
+  |Lalit B.|null|94511   |
+  +--------+----+--------+
+
+-- Sort rows within each partition using column position.
+SELECT /*+ REPARTITION(zip_code) */ name, age, zip_code FROM person SORT BY 1;
+
+  +--------+----+--------+
+  |name    |age |zip_code|
+  +--------+----+--------+
+  |Anil K  |27  |94588   |
+  |Dan Li  |18  |94588   |
+  |John V  |null|94588   |
+  |Zen Hui |50  |94588   |
+  |Aryan B.|18  |94511   |
+  |David K |42  |94511   |
+  |Lalit B.|null|94511   |
+  +--------+----+--------+
+
+-- Sort rows within partition in ascending manner keeping null values to be last.
+SELECT /*+ REPARTITION(zip_code) */ age, name, zip_code FROM person SORT BY age NULLS LAST;
+
+  +----+--------+--------+
+  |age |name    |zip_code|
+  +----+--------+--------+
+  |18  |Dan Li  |94588   |
+  |27  |Anil K  |94588   |
+  |50  |Zen Hui |94588   |
+  |null|John V  |94588   |
+  |18  |Aryan B.|94511   |
+  |42  |David K |94511   |
+  |null|Lalit B.|94511   |
+  +----+--------+--------+
+
+-- Sort rows by age within each partition in descending manner.
+SELECT /*+ REPARTITION(zip_code) */ age, name, zip_code FROM person SORT BY age DESC;
+ 
+  +----+--------+--------+
+  |age |name    |zip_code|
+  +----+--------+--------+
+  |50  |Zen Hui |94588   |
+  |27  |Anil K  |94588   |
+  |18  |Dan Li  |94588   |
+  |null|John V  |94588   |
+  |42  |David K |94511   |
+  |18  |Aryan B.|94511   |
+  |null|Lalit B.|94511   |
+  +----+--------+--------+
+
+-- Sort rows by age within each partition in ascending manner keeping null values to be first.
+SELECT /*+ REPARTITION(zip_code) */ age, name, zip_code FROM person SORT BY age DESC NULLS FIRST;
+
+  +----+--------+--------+
+  |age |name    |zip_code|
+  +----+--------+--------+
+  |null|John V  |94588   |
+  |50  |Zen Hui |94588   |
+  |27  |Anil K  |94588   |
+  |18  |Dan Li  |94588   |
+  |null|Lalit B.|94511   |
+  |42  |David K |94511   |
+  |18  |Aryan B.|94511   |
+  +----+--------+--------+
+
+-- Sort rows within each partition based on more than one column with each column having
+-- different sort direction.
+SELECT /*+ REPARTITION(zip_code) */ name, age, zip_code FROM person
+   SORT BY name ASC, age DESC;
+
+  +--------+----+--------+
+  |name    |age |zip_code|
+  +--------+----+--------+
+  |Anil K  |27  |94588   |
+  |Dan Li  |18  |94588   |
+  |John V  |null|94588   |
+  |Zen Hui |50  |94588   |
+  |Aryan B.|18  |94511   |
+  |David K |42  |94511   |
+  |Lalit B.|null|94511   |
+  +--------+----+--------+
+{% endhighlight %}
+
+### Related Clauses
+- [SELECT Main](sql-ref-syntax-qry-select.html)
+- [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+- [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+- [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+- [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+- [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+- [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+- [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
diff --git a/docs/sql-ref-syntax-qry-select-usedb.md b/docs/sql-ref-syntax-qry-select-usedb.md
new file mode 100644
index 0000000000000..92ac91ac51769
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-usedb.md
@@ -0,0 +1,60 @@
+---
+layout: global
+title: USE Database
+displayTitle: USE Database
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+`USE` statement is used to set the current database. After the current database is set,
+the unqualified database artifacts such as tables, functions and views that are 
+referenced by SQLs are resolved from the current database. 
+The default database name is 'default'.
+
+### Syntax
+{% highlight sql %}
+USE database_name
+{% endhighlight %}
+
+### Parameter
+
+<dl>
+  <dt><code><em>database_name</em></code></dt>
+  <dd>
+     Name of the database will be used. If the database does not exist, an exception will be thrown.
+  </dd>
+</dl>
+
+### Example
+{% highlight sql %}
+-- Use the 'userdb' which exists.
+USE userdb;
++---------+--+
+| Result  |
++---------+--+
++---------+--+
+
+-- Use the 'userdb1' which doesn't exist
+USE userdb1;
+Error: org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'userdb1' not found;(state=,code=0)
+{% endhighlight %}
+
+### Related statements.
+- [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
+- [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html)
+- [CREATE TABLE ](sql-ref-syntax-ddl-create-table.html)
+
diff --git a/docs/sql-ref-syntax-qry-select-where.md b/docs/sql-ref-syntax-qry-select-where.md
new file mode 100644
index 0000000000000..a493623df480b
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-where.md
@@ -0,0 +1,124 @@
+---
+layout: global
+title: SELECT
+displayTitle: WHERE clause
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+The <code>WHERE</code> clause is used to limit the results of the <code>FROM</code>
+clause of a query or a subquery based on the specified condition.
+
+### Syntax
+{% highlight sql %}
+WHERE boolean_expression
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>boolean_expression</em></code></dt>
+  <dd>
+    Specifies any expression that evaluates to a result type <code>boolean</code>. Two or
+    more expressions may be combined together using the logical 
+    operators ( <code>AND</code>, <code>OR</code> ).
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE person (id INT, name STRING, age INT);
+INSERT INTO person VALUES
+    (100, 'John', 30),
+    (200, 'Mary', NULL),
+    (300, 'Mike', 80),
+    (400, 'Dan',  50);
+
+-- Comparison operator in `WHERE` clause.
+SELECT * FROM person WHERE id > 200 ORDER BY id;
+  +---+----+---+
+  |id |name|age|
+  +---+----+---+
+  |300|Mike|80 |
+  |400|Dan |50 |
+  +---+----+---+
+
+-- Comparison and logical operators in `WHERE` clause.
+SELECT * FROM person WHERE id = 200 OR id = 300 ORDER BY id;
+  +---+----+----+
+  |id |name|age |
+  +---+----+----+
+  |200|Mary|null|
+  |300|Mike|80  |
+  +---+----+----+
+
+-- IS NULL expression in `WHERE` clause.
+SELECT * FROM person WHERE id > 300 OR age IS NULL ORDER BY id;
+  +---+----+----+
+  |id |name|age |
+  +---+----+----+
+  |200|Mary|null|
+  |400|Dan |50  |
+  +---+----+----+
+
+-- Function expression in `WHERE` clause.
+SELECT * FROM person WHERE length(name) > 3 ORDER BY id;
+  +---+----+----+
+  |id |name|age |
+  +---+----+----+
+  |100|John|30  |
+  |200|Mary|null|
+  |300|Mike|80  |
+  +---+----+----+
+
+-- `BETWEEN` expression in `WHERE` clause.
+SELECT * FROM person WHERE id BETWEEN 200 AND 300 ORDER BY id;
+  +---+----+----+
+  | id|name| age|
+  +---+----+----+
+  |200|Mary|null|
+  |300|Mike|  80|
+  +---+----+----+
+
+-- Scalar Subquery in `WHERE` clause.
+SELECT * FROM person WHERE age > (SELECT avg(age) FROM person);
+  +---+----+---+
+  |id |name|age|
+  +---+----+---+
+  |300|Mike|80 |
+  +---+----+---+
+
+-- Correlated column reference in `WHERE` clause of subquery.
+SELECT * FROM person AS parent 
+WHERE EXISTS (
+              SELECT 1 FROM person AS child
+              WHERE parent.id = child.id AND child.age IS NULL
+             );
+  +---+----+----+
+  |id |name|age |
+  +---+----+----+
+  |200|Mary|null|
+  +---+----+----+
+
+{% endhighlight %}
+
+### Related Clauses
+- [SELECT Main](sql-ref-syntax-qry-select.html)
+- [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+- [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+- [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+- [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+- [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+- [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+- [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
diff --git a/docs/sql-ref-syntax-qry-select.md b/docs/sql-ref-syntax-qry-select.md
index 41972ef070831..80b930f335d1e 100644
--- a/docs/sql-ref-syntax-qry-select.md
+++ b/docs/sql-ref-syntax-qry-select.md
@@ -18,8 +18,129 @@ license: |
   See the License for the specific language governing permissions and
   limitations under the License.
 ---
+Spark supports a `SELECT` statement and conforms to the ANSI SQL standard. Queries are
+used to retrieve result sets from one or more tables. The following section 
+describes the overall query syntax and the sub-sections cover different constructs
+of a query along with examples. 
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+### Syntax
+{% highlight sql %}
+[ WITH with_query [ , ... ] ]
+SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] }
+  FROM { from_item [ , ...] }
+  [ WHERE boolean_expression ]
+  [ GROUP BY expression [ , ...] ]
+  [ HAVING boolean_expression ]
+  [ ORDER BY { expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [ , ...] } ]
+  [ SORT  BY { expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [ , ...] } ]
+  [ CLUSTER BY { expression [ , ...] } ]
+  [ DISTRIBUTE BY { expression [, ...] } ]
+  { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] select ]
+  [ WINDOW { named_window [ , WINDOW named_window, ... ] } ]
+  [ LIMIT { ALL | expression } ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>with_query</em></code></dt>
+  <dd>
+    Specifies the common table expressions (CTEs) before the main <code>SELECT</code> query block.
+    These table expressions are allowed to be referenced later in the main query. This is useful to abstract
+    out repeated subquery blocks in the main query and improves readability of the query.
+  </dd>
+  <dt><code><em>hints</em></code></dt>
+  <dd>
+    Hints can be specified to help spark optimizer make better planning decisions. Currently spark supports hints
+    that influence selection of join strategies and repartitioning of the data. 
+  </dd>
+  <dt><code><em>ALL</em></code></dt>
+  <dd>
+    Select all matching rows from the relation and is enabled by default.
+  </dd>
+  <dt><code><em>DISTINCT</em></code></dt>
+  <dd>
+    Select all matching rows from the relation after removing duplicates in results.
+  </dd>
+  <dt><code><em>named_expression</em></code></dt>
+  <dd>
+    An expression with an assigned name. In general, it denotes a column expression.<br><br>
+    <b>Syntax:</b>
+      <code>
+        expression [AS] [alias]
+      </code>
+  </dd>
+  <dt><code><em>from_item</em></code></dt>
+  <dd>
+    Specifies a source of input for the query. It can be one of the following:
+    <ol>
+      <li>Table relation</li>
+      <li>Join relation</li>
+      <li>Table valued function</li>
+      <li>Inlined table</li>
+      <li>Subquery</li>    
+    </ol>
+  </dd>
+  <dt><code><em>WHERE</em></code></dt>
+  <dd>
+    Filters the result of the FROM clause based on the supplied predicates.
+  </dd>
+  <dt><code><em>GROUP BY</em></code></dt>
+  <dd>
+    Specifies the expressions that are used to group the rows. This is used in conjunction with aggregate functions
+    (MIN, MAX, COUNT, SUM, AVG) to group rows based on the grouping expressions.
+  </dd>
+  <dt><code><em>HAVING</em></code></dt>
+  <dd>
+    Specifies the predicates by which the rows produced by GROUP BY are filtered. The HAVING clause is used to
+    filter rows after the grouping is performed.
+  </dd>
+  <dt><code><em>ORDER BY</em></code></dt>
+  <dd>
+    Specifies an ordering of the rows of the complete result set of the query. The output rows are ordered
+    across the partitions. This parameter is mutually exclusive with <code>SORT BY</code>,
+    <code>CLUSTER BY</code> and <code>DISTRIBUTE BY</code> and can not be specified together.
+  </dd>
+  <dt><code><em>SORT BY</em></code></dt>
+  <dd>
+    Specifies an ordering by which the rows are ordered within each partition. This parameter is mutually
+    exclusive with <code>ORDER BY</code> and <code>CLUSTER BY</code> and can not be specified together.
+  </dd>
+  <dt><code><em>CLUSTER BY</em></code></dt>
+  <dd>
+    Specifies a set of expressions that is used to repartition and sort the rows. Using this clause has
+    the same effect of using <code>DISTRIBUTE BY</code> and <code>SORT BY</code> together. 
+  </dd>
+  <dt><code><em>DISTRIBUTE BY</em></code></dt>
+  <dd>
+    Specifies a set of expressions by which the result rows are repartitioned. This parameter is mutually 
+    exclusive with <code>ORDER BY</code> and <code>CLUSTER BY</code> and can not be specified together. 
+  </dd>
+  <dt><code><em>LIMIT</em></code></dt>
+  <dd>
+    Specifies the maximum number of rows that can be returned by a statement or subquery. This clause 
+    is mostly used in the conjunction with <code>ORDER BY</code> to produce a deterministic result. 
+  </dd>
+  <dt><code><em>boolean_expression</em></code></dt>
+  <dd>
+    Specifies an expression with a return type of boolean.
+  </dd>
+  <dt><code><em>expression</em></code></dt>
+  <dd>
+    Specifies a combination of one or more values, operators, and SQL functions that evaluates to a value.
+  </dd>
+  <dt><code><em>named_window</em></code></dt>
+  <dd>
+    Specifies aliases for one or more source window specifications. The source window specifications can 
+    be referenced in the widow definitions in the query.
+  </dd>
+</dl>
+
+### Related Clauses
+- [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+- [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+- [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+- [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+- [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+- [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+- [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+- [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
diff --git a/docs/sql-ref-syntax-qry.md b/docs/sql-ref-syntax-qry.md
index eb0e73d00e848..cd7c0ffccdc0f 100644
--- a/docs/sql-ref-syntax-qry.md
+++ b/docs/sql-ref-syntax-qry.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: Data Retrieval
+displayTitle: Data Retrieval
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,7 +19,20 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+Spark supports <code>SELECT</code> statement that is  used to retrieve rows
+from one or more tables according to the specified clauses. The full syntax
+and brief description of supported clauses are explained in
+[SELECT](sql-ref-syntax-qry-select.html) section. Spark also provides the
+ability to generate logical and physical plan for a given query using
+[EXPLAIN](sql-ref-syntax-qry-explain.html) statement.
+
+ 
+- [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+- [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+- [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+- [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+- [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+- [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+- [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+- [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+- [EXPLAIN Statement](sql-ref-syntax-qry-explain.html)
diff --git a/docs/sql-ref-syntax.md b/docs/sql-ref-syntax.md
index eb0e73d00e848..2510278ab252c 100644
--- a/docs/sql-ref-syntax.md
+++ b/docs/sql-ref-syntax.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: SQL Syntax
+displayTitle: SQL Syntax
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,7 +19,4 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+Spark SQL is Apache Spark's module for working with structured data. The SQL Syntax section describes the SQL syntax in detail along with usage examples when applicable.
diff --git a/docs/sql-ref.md b/docs/sql-ref.md
index eb0e73d00e848..6c57b0d6fdf6f 100644
--- a/docs/sql-ref.md
+++ b/docs/sql-ref.md
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
+Spark SQL is Apache Spark's module for working with structured data.
 This guide is a reference for Structured Query Language (SQL) for Apache 
 Spark. This document describes the SQL constructs supported by Spark in detail
 along with usage examples when applicable.
diff --git a/docs/ss-migration-guide.md b/docs/ss-migration-guide.md
new file mode 100644
index 0000000000000..db8fdff8b2ac4
--- /dev/null
+++ b/docs/ss-migration-guide.md
@@ -0,0 +1,33 @@
+---
+layout: global
+title: "Migration Guide: Structured Streaming"
+displayTitle: "Migration Guide: Structured Streaming"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+{:toc}
+
+Note that this migration guide describes the items specific to Structured Streaming.
+Many items of SQL migration can be applied when migrating Structured Streaming to higher versions.
+Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
+
+## Upgrading from Structured Streaming 2.4 to 3.0
+
+- In Spark 3.0, Structured Streaming forces the source schema into nullable when file-based datasources such as text, json, csv, parquet and orc are used via `spark.readStream(...)`. Previously, it respected the nullability in source schema; however, it caused issues tricky to debug with NPE. To restore the previous behavior, set `spark.sql.streaming.fileSource.schema.forceNullable` to `false`.
+
+- Spark 3.0 fixes the correctness issue on Stream-stream outer join, which changes the schema of state. (SPARK-26154 for more details) Spark 3.0 will fail the query if you start your query from checkpoint constructed from Spark 2.x which uses stream-stream outer join. Please discard the checkpoint and replay previous inputs to recalculate outputs.
\ No newline at end of file
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index a37b961243f47..6ce73b27d11ba 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -28,7 +28,7 @@ in Scala or Java.
 ## Implementing a Custom Receiver
 
 This starts with implementing a **Receiver**
-([Scala doc](api/scala/index.html#org.apache.spark.streaming.receiver.Receiver),
+([Scala doc](api/scala/org/apache/spark/streaming/receiver/Receiver.html),
 [Java doc](api/java/org/apache/spark/streaming/receiver/Receiver.html)).
 A custom receiver must extend this abstract class by implementing two methods
 
diff --git a/docs/streaming-kafka-0-10-integration.md b/docs/streaming-kafka-0-10-integration.md
index d8fd6724e91bd..0f5964786fbce 100644
--- a/docs/streaming-kafka-0-10-integration.md
+++ b/docs/streaming-kafka-0-10-integration.md
@@ -233,7 +233,7 @@ For data stores that support transactions, saving offsets in the same transactio
 {% highlight scala %}
 // The details depend on your data store, but the general idea looks like this
 
-// begin from the the offsets committed to the database
+// begin from the offsets committed to the database
 val fromOffsets = selectOffsetsFromYourDatabase.map { resultSet =>
   new TopicPartition(resultSet.string("topic"), resultSet.int("partition")) -> resultSet.long("offset")
 }.toMap
@@ -263,7 +263,7 @@ stream.foreachRDD { rdd =>
 {% highlight java %}
 // The details depend on your data store, but the general idea looks like this
 
-// begin from the the offsets committed to the database
+// begin from the offsets committed to the database
 Map<TopicPartition, Long> fromOffsets = new HashMap<>();
 for (resultSet : selectOffsetsFromYourDatabase)
   fromOffsets.put(new TopicPartition(resultSet.string("topic"), resultSet.int("partition")), resultSet.long("offset"));
diff --git a/docs/streaming-kafka-integration.md b/docs/streaming-kafka-integration.md
index 6fa363285f385..3282124b0be5d 100644
--- a/docs/streaming-kafka-integration.md
+++ b/docs/streaming-kafka-integration.md
@@ -23,4 +23,4 @@ replicated commit log service.  Please read the [Kafka documentation](https://ka
 thoroughly before starting an integration using Spark.
 
 At the moment, Spark requires Kafka 0.10 and higher. See 
-<a href="streaming-kafka-0-10-integration.html">Kafka 0.10 integration documentation</a> for details.
\ No newline at end of file
+<a href="streaming-kafka-0-10-integration.html">Kafka 0.10 integration documentation</a> for details.
diff --git a/docs/streaming-kinesis-integration.md b/docs/streaming-kinesis-integration.md
index 55acec53302e4..c81597160ca09 100644
--- a/docs/streaming-kinesis-integration.md
+++ b/docs/streaming-kinesis-integration.md
@@ -59,18 +59,18 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
                 .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
                 .build()
 
-	See the [API docs](api/scala/index.html#org.apache.spark.streaming.kinesis.KinesisInputDStream)
+	See the [API docs](api/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.html)
 	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala). Refer to the [Running the Example](#running-the-example) subsection for instructions on how to run the example.
 
 	</div>
 	<div data-lang="java" markdown="1">
-            import org.apache.spark.storage.StorageLevel
-            import org.apache.spark.streaming.kinesis.KinesisInputDStream
-            import org.apache.spark.streaming.Seconds
-            import org.apache.spark.streaming.StreamingContext
-            import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+            import org.apache.spark.storage.StorageLevel;
+            import org.apache.spark.streaming.kinesis.KinesisInputDStream;
+            import org.apache.spark.streaming.Seconds;
+            import org.apache.spark.streaming.StreamingContext;
+            import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
 
-            KinesisInputDStream<byte[]> kinesisStream = KinesisInputDStream.builder
+            KinesisInputDStream<byte[]> kinesisStream = KinesisInputDStream.builder()
                 .streamingContext(streamingContext)
                 .endpointUrl([endpoint URL])
                 .regionName([region name])
@@ -81,7 +81,7 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
                 .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
                 .build();
 
-	See the [API docs](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisUtils.html)
+	See the [API docs](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisInputDStream.html)
 	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example.
 
 	</div>
@@ -98,14 +98,21 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
 	</div>
 	</div>
 
-	You may also provide a "message handler function" that takes a Kinesis `Record` and returns a generic object `T`, in case you would like to use other data included in a `Record` such as partition key. This is currently only supported in Scala and Java.
+	You may also provide the following settings. These are currently only supported in Scala and Java.
+
+	- A "message handler function" that takes a Kinesis `Record` and returns a generic object `T`, in case you would like to use other data included in a `Record` such as partition key.
+
+	- CloudWatch metrics level and dimensions. See [the AWS documentation about monitoring KCL](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html) for details.
 
 	<div class="codetabs">
 	<div data-lang="scala" markdown="1">
+                import collection.JavaConverters._
                 import org.apache.spark.storage.StorageLevel
                 import org.apache.spark.streaming.kinesis.KinesisInputDStream
                 import org.apache.spark.streaming.{Seconds, StreamingContext}
                 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+                import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration
+                import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 
                 val kinesisStream = KinesisInputDStream.builder
                     .streamingContext(streamingContext)
@@ -116,17 +123,22 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
                     .checkpointAppName([Kinesis app name])
                     .checkpointInterval([checkpoint interval])
                     .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
+                    .metricsLevel(MetricsLevel.DETAILED)
+                    .metricsEnabledDimensions(KinesisClientLibConfiguration.DEFAULT_METRICS_ENABLED_DIMENSIONS.asScala.toSet)
                     .buildWithMessageHandler([message handler])
 
 	</div>
 	<div data-lang="java" markdown="1">
-                import org.apache.spark.storage.StorageLevel
-                import org.apache.spark.streaming.kinesis.KinesisInputDStream
-                import org.apache.spark.streaming.Seconds
-                import org.apache.spark.streaming.StreamingContext
-                import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
-
-                KinesisInputDStream<byte[]> kinesisStream = KinesisInputDStream.builder
+                import org.apache.spark.storage.StorageLevel;
+                import org.apache.spark.streaming.kinesis.KinesisInputDStream;
+                import org.apache.spark.streaming.Seconds;
+                import org.apache.spark.streaming.StreamingContext;
+                import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+                import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;
+                import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel;
+                import scala.collection.JavaConverters;
+
+                KinesisInputDStream<byte[]> kinesisStream = KinesisInputDStream.builder()
                     .streamingContext(streamingContext)
                     .endpointUrl([endpoint URL])
                     .regionName([region name])
@@ -135,6 +147,8 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
                     .checkpointAppName([Kinesis app name])
                     .checkpointInterval([checkpoint interval])
                     .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
+                    .metricsLevel(MetricsLevel.DETAILED)
+                    .metricsEnabledDimensions(JavaConverters.asScalaSetConverter(KinesisClientLibConfiguration.DEFAULT_METRICS_ENABLED_DIMENSIONS).asScala().toSet())
                     .buildWithMessageHandler([message handler]);
 
 	</div>
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index f5abed74bff20..ac4aa9255ae68 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -57,7 +57,7 @@ Spark Streaming provides a high-level abstraction called *discretized stream* or
 which represents a continuous stream of data. DStreams can be created either from input data
 streams from sources such as Kafka, and Kinesis, or by applying high-level
 operations on other DStreams. Internally, a DStream is represented as a sequence of
-[RDDs](api/scala/index.html#org.apache.spark.rdd.RDD).
+[RDDs](api/scala/org/apache/spark/rdd/RDD.html).
 
 This guide shows you how to start writing Spark Streaming programs with DStreams. You can
 write Spark Streaming programs in Scala, Java or Python (introduced in Spark 1.2),
@@ -80,7 +80,7 @@ do is as follows.
 <div data-lang="scala"  markdown="1" >
 First, we import the names of the Spark Streaming classes and some implicit
 conversions from StreamingContext into our environment in order to add useful methods to
-other classes we need (like DStream). [StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) is the
+other classes we need (like DStream). [StreamingContext](api/scala/org/apache/spark/streaming/StreamingContext.html) is the
 main entry point for all streaming functionality. We create a local StreamingContext with two execution threads,  and a batch interval of 1 second.
 
 {% highlight scala %}
@@ -185,7 +185,7 @@ JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(x.split(" ")).itera
 generating multiple new records from each record in the source DStream. In this case,
 each line will be split into multiple words and the stream of words is represented as the
 `words` DStream. Note that we defined the transformation using a
-[FlatMapFunction](api/scala/index.html#org.apache.spark.api.java.function.FlatMapFunction) object.
+[FlatMapFunction](api/scala/org/apache/spark/api/java/function/FlatMapFunction.html) object.
 As we will discover along the way, there are a number of such convenience classes in the Java API
 that help defines DStream transformations.
 
@@ -201,9 +201,9 @@ wordCounts.print();
 {% endhighlight %}
 
 The `words` DStream is further mapped (one-to-one transformation) to a DStream of `(word,
-1)` pairs, using a [PairFunction](api/scala/index.html#org.apache.spark.api.java.function.PairFunction)
+1)` pairs, using a [PairFunction](api/scala/org/apache/spark/api/java/function/PairFunction.html)
 object. Then, it is reduced to get the frequency of words in each batch of data,
-using a [Function2](api/scala/index.html#org.apache.spark.api.java.function.Function2) object.
+using a [Function2](api/scala/org/apache/spark/api/java/function/Function2.html) object.
 Finally, `wordCounts.print()` will print a few of the counts generated every second.
 
 Note that when these lines are executed, Spark Streaming only sets up the computation it
@@ -435,7 +435,7 @@ To initialize a Spark Streaming program, a **StreamingContext** object has to be
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 
-A [StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) object can be created from a [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) object.
+A [StreamingContext](api/scala/org/apache/spark/streaming/StreamingContext.html) object can be created from a [SparkConf](api/scala/org/apache/spark/SparkConf.html) object.
 
 {% highlight scala %}
 import org.apache.spark._
@@ -451,7 +451,7 @@ or a special __"local[\*]"__ string to run in local mode. In practice, when runn
 you will not want to hardcode `master` in the program,
 but rather [launch the application with `spark-submit`](submitting-applications.html) and
 receive it there. However, for local testing and unit tests, you can pass "local[\*]" to run Spark Streaming
-in-process (detects the number of cores in the local system). Note that this internally creates a [SparkContext](api/scala/index.html#org.apache.spark.SparkContext) (starting point of all Spark functionality) which can be accessed as `ssc.sparkContext`.
+in-process (detects the number of cores in the local system). Note that this internally creates a [SparkContext](api/scala/org/apache/spark/SparkContext.html) (starting point of all Spark functionality) which can be accessed as `ssc.sparkContext`.
 
 The batch interval must be set based on the latency requirements of your application
 and available cluster resources. See the [Performance Tuning](#setting-the-right-batch-interval)
@@ -584,7 +584,7 @@ Input DStreams are DStreams representing the stream of input data received from
 sources. In the [quick example](#a-quick-example), `lines` was an input DStream as it represented
 the stream of data received from the netcat server. Every input DStream
 (except file stream, discussed later in this section) is associated with a **Receiver**
-([Scala doc](api/scala/index.html#org.apache.spark.streaming.receiver.Receiver),
+([Scala doc](api/scala/org/apache/spark/streaming/receiver/Receiver.html),
 [Java doc](api/java/org/apache/spark/streaming/receiver/Receiver.html)) object which receives the
 data from a source and stores it in Spark's memory for processing.
 
@@ -739,7 +739,7 @@ DStreams can be created with data streams received through custom receivers. See
 For testing a Spark Streaming application with test data, one can also create a DStream based on a queue of RDDs, using `streamingContext.queueStream(queueOfRDDs)`. Each RDD pushed into the queue will be treated as a batch of data in the DStream, and processed like a stream.
 
 For more details on streams from sockets and files, see the API documentations of the relevant functions in
-[StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) for
+[StreamingContext](api/scala/org/apache/spark/streaming/StreamingContext.html) for
 Scala, [JavaStreamingContext](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html)
 for Java, and [StreamingContext](api/python/pyspark.streaming.html#pyspark.streaming.StreamingContext) for Python.
 
@@ -1219,8 +1219,8 @@ joinedStream = windowedStream.transform(lambda rdd: rdd.join(dataset))
 In fact, you can also dynamically change the dataset you want to join against. The function provided to `transform` is evaluated every batch interval and therefore will use the current dataset that `dataset` reference points to.
 
 The complete list of DStream transformations is available in the API documentation. For the Scala API,
-see [DStream](api/scala/index.html#org.apache.spark.streaming.dstream.DStream)
-and [PairDStreamFunctions](api/scala/index.html#org.apache.spark.streaming.dstream.PairDStreamFunctions).
+see [DStream](api/scala/org/apache/spark/streaming/dstream/DStream.html)
+and [PairDStreamFunctions](api/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.html).
 For the Java API, see [JavaDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaDStream.html)
 and [JavaPairDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaPairDStream.html).
 For the Python API, see [DStream](api/python/pyspark.streaming.html#pyspark.streaming.DStream).
@@ -2067,7 +2067,7 @@ for prime time, the old one be can be brought down. Note that this can be done f
 sending the data to two destinations (i.e., the earlier and upgraded applications).
 
 - The existing application is shutdown gracefully (see
-[`StreamingContext.stop(...)`](api/scala/index.html#org.apache.spark.streaming.StreamingContext)
+[`StreamingContext.stop(...)`](api/scala/org/apache/spark/streaming/StreamingContext.html)
 or [`JavaStreamingContext.stop(...)`](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html)
 for graceful shutdown options) which ensure data that has been received is completely
 processed before shutdown. Then the
@@ -2104,7 +2104,7 @@ In that case, consider
 [reducing](#reducing-the-batch-processing-times) the batch processing time.
 
 The progress of a Spark Streaming program can also be monitored using the
-[StreamingListener](api/scala/index.html#org.apache.spark.streaming.scheduler.StreamingListener) interface,
+[StreamingListener](api/scala/org/apache/spark/streaming/scheduler/StreamingListener.html) interface,
 which allows you to get receiver status and processing times. Note that this is a developer API
 and it is likely to be improved upon (i.e., more information reported) in the future.
 
@@ -2197,7 +2197,7 @@ computation is not high enough. For example, for distributed reduce operations l
 and `reduceByKeyAndWindow`, the default number of parallel tasks is controlled by
 the `spark.default.parallelism` [configuration property](configuration.html#spark-properties). You
 can pass the level of parallelism as an argument (see
-[`PairDStreamFunctions`](api/scala/index.html#org.apache.spark.streaming.dstream.PairDStreamFunctions)
+[`PairDStreamFunctions`](api/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.html)
 documentation), or set the `spark.default.parallelism`
 [configuration property](configuration.html#spark-properties) to change the default.
 
@@ -2205,9 +2205,9 @@ documentation), or set the `spark.default.parallelism`
 {:.no_toc}
 The overheads of data serialization can be reduced by tuning the serialization formats. In the case of streaming, there are two types of data that are being serialized.
 
-* **Input data**: By default, the input data received through Receivers is stored in the executors' memory with [StorageLevel.MEMORY_AND_DISK_SER_2](api/scala/index.html#org.apache.spark.storage.StorageLevel$). That is, the data is serialized into bytes to reduce GC overheads, and replicated for tolerating executor failures. Also, the data is kept first in memory, and spilled over to disk only if the memory is insufficient to hold all of the input data necessary for the streaming computation. This serialization obviously has overheads -- the receiver must deserialize the received data and re-serialize it using Spark's serialization format. 
+* **Input data**: By default, the input data received through Receivers is stored in the executors' memory with [StorageLevel.MEMORY_AND_DISK_SER_2](api/scala/org/apache/spark/storage/StorageLevel$.html). That is, the data is serialized into bytes to reduce GC overheads, and replicated for tolerating executor failures. Also, the data is kept first in memory, and spilled over to disk only if the memory is insufficient to hold all of the input data necessary for the streaming computation. This serialization obviously has overheads -- the receiver must deserialize the received data and re-serialize it using Spark's serialization format.
 
-* **Persisted RDDs generated by Streaming Operations**: RDDs generated by streaming computations may be persisted in memory. For example, window operations persist data in memory as they would be processed multiple times. However, unlike the Spark Core default of [StorageLevel.MEMORY_ONLY](api/scala/index.html#org.apache.spark.storage.StorageLevel$), persisted RDDs generated by streaming computations are persisted with [StorageLevel.MEMORY_ONLY_SER](api/scala/index.html#org.apache.spark.storage.StorageLevel$) (i.e. serialized) by default to minimize GC overheads.
+* **Persisted RDDs generated by Streaming Operations**: RDDs generated by streaming computations may be persisted in memory. For example, window operations persist data in memory as they would be processed multiple times. However, unlike the Spark Core default of [StorageLevel.MEMORY_ONLY](api/scala/org/apache/spark/storage/StorageLevel$.html), persisted RDDs generated by streaming computations are persisted with [StorageLevel.MEMORY_ONLY_SER](api/scala/org/apache/spark/storage/StorageLevel.html$) (i.e. serialized) by default to minimize GC overheads.
 
 In both cases, using Kryo serialization can reduce both CPU and memory overheads. See the [Spark Tuning Guide](tuning.html#data-serialization) for more details. For Kryo, consider registering custom classes, and disabling object reference tracking (see Kryo-related configurations in the [Configuration Guide](configuration.html#compression-and-serialization)).
 
@@ -2247,7 +2247,7 @@ A good approach to figure out the right batch size for your application is to te
 conservative batch interval (say, 5-10 seconds) and a low data rate. To verify whether the system
 is able to keep up with the data rate, you can check the value of the end-to-end delay experienced
 by each processed batch (either look for "Total delay" in Spark driver log4j logs, or use the
-[StreamingListener](api/scala/index.html#org.apache.spark.streaming.scheduler.StreamingListener)
+[StreamingListener](api/scala/org/apache/spark/streaming/scheduler/StreamingListener.html)
 interface).
 If the delay is maintained to be comparable to the batch size, then system is stable. Otherwise,
 if the delay is continuously increasing, it means that the system is unable to keep up and it
@@ -2485,16 +2485,16 @@ additional effort may be necessary to achieve exactly-once semantics. There are
 * Third-party DStream data sources can be found in [Third Party Projects](https://spark.apache.org/third-party-projects.html)
 * API documentation
   - Scala docs
-    * [StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) and
-  [DStream](api/scala/index.html#org.apache.spark.streaming.dstream.DStream)
-    * [KafkaUtils](api/scala/index.html#org.apache.spark.streaming.kafka.KafkaUtils$),
-    [KinesisUtils](api/scala/index.html#org.apache.spark.streaming.kinesis.KinesisUtils$),
+    * [StreamingContext](api/scala/org/apache/spark/streaming/StreamingContext.html) and
+  [DStream](api/scala/org/apache/spark/streaming/dstream/DStream.html)
+    * [KafkaUtils](api/scala/org/apache/spark/streaming/kafka/KafkaUtils$.html),
+    [KinesisUtils](api/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.html),
   - Java docs
     * [JavaStreamingContext](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html),
     [JavaDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaDStream.html) and
     [JavaPairDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaPairDStream.html)
     * [KafkaUtils](api/java/index.html?org/apache/spark/streaming/kafka/KafkaUtils.html),
-    [KinesisUtils](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisUtils.html)
+    [KinesisUtils](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisInputDStream.html)
   - Python docs
     * [StreamingContext](api/python/pyspark.streaming.html#pyspark.streaming.StreamingContext) and [DStream](api/python/pyspark.streaming.html#pyspark.streaming.DStream)
     * [KafkaUtils](api/python/pyspark.streaming.html#pyspark.streaming.kafka.KafkaUtils)
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index c4378b4a02663..a1eeee54987db 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -27,6 +27,8 @@ For Scala/Java applications using SBT/Maven project definitions, link your appli
     artifactId = spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}
     version = {{site.SPARK_VERSION_SHORT}}
 
+Please note that to use the headers functionality, your Kafka client version should be version 0.11.0.0 or up.
+
 For Python applications, you need to add this above library and its dependencies when deploying your
 application. See the [Deploying](#deploying) subsection below.
 
@@ -50,6 +52,17 @@ val df = spark
 df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
   .as[(String, String)]
 
+// Subscribe to 1 topic, with headers
+val df = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .option("includeHeaders", "true")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers")
+  .as[(String, String, Map)]
+
 // Subscribe to multiple topics
 val df = spark
   .readStream
@@ -84,6 +97,16 @@ Dataset<Row> df = spark
   .load();
 df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
 
+// Subscribe to 1 topic, with headers
+Dataset<Row> df = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .option("includeHeaders", "true")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers");
+
 // Subscribe to multiple topics
 Dataset<Row> df = spark
   .readStream()
@@ -116,6 +139,16 @@ df = spark \
   .load()
 df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
+# Subscribe to 1 topic, with headers
+val df = spark \
+  .readStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1") \
+  .option("includeHeaders", "true") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers")
+
 # Subscribe to multiple topics
 df = spark \
   .readStream \
@@ -286,6 +319,10 @@ Each row in the source has the following schema:
   <td>timestampType</td>
   <td>int</td>
 </tr>
+<tr>
+  <td>headers (optional)</td>
+  <td>array</td>
+</tr>
 </table>
 
 The following options must be set for the Kafka source
@@ -325,6 +362,27 @@ The following configurations are optional:
 
 <table class="table">
 <tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr>
+<tr>
+  <td>startingOffsetsByTimestamp</td>
+  <td>json string
+  """ {"topicA":{"0": 1000, "1": 1000}, "topicB": {"0": 2000, "1": 2000}} """
+  </td>
+  <td>none (the value of <code>startingOffsets</code> will apply)</td>
+  <td>streaming and batch</td>
+  <td>The start point of timestamp when a query is started, a json string specifying a starting timestamp for
+  each TopicPartition. The returned offset for each partition is the earliest offset whose timestamp is greater than or
+  equal to the given timestamp in the corresponding partition. If the matched offset doesn't exist,
+  the query will fail immediately to prevent unintended read from such partition. (This is a kind of limitation as of now, and will be addressed in near future.)<p/>
+  <p/>
+  Spark simply passes the timestamp information to <code>KafkaConsumer.offsetsForTimes</code>, and doesn't interpret or reason about the value. <p/>
+  For more details on <code>KafkaConsumer.offsetsForTimes</code>, please refer <a href="https://kafka.apache.org/21/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#offsetsForTimes-java.util.Map-">javadoc</a> for details.<p/>
+  Also the meaning of <code>timestamp</code> here can be vary according to Kafka configuration (<code>log.message.timestamp.type</code>): please refer <a href="https://kafka.apache.org/documentation/">Kafka documentation</a> for further details.<p/>
+  Note: This option requires Kafka 0.10.1.0 or higher.<p/>
+  Note2: <code>startingOffsetsByTimestamp</code> takes precedence over <code>startingOffsets</code>.<p/>
+  Note3: For streaming queries, this only applies when a new query is started, and that resuming will
+  always pick up from where the query left off. Newly discovered partitions during a query will start at
+  earliest.</td>
+</tr>
 <tr>
   <td>startingOffsets</td>
   <td>"earliest", "latest" (streaming only), or json string
@@ -340,6 +398,25 @@ The following configurations are optional:
   always pick up from where the query left off. Newly discovered partitions during a query will start at
   earliest.</td>
 </tr>
+<tr>
+  <td>endingOffsetsByTimestamp</td>
+  <td>json string
+  """ {"topicA":{"0": 1000, "1": 1000}, "topicB": {"0": 2000, "1": 2000}} """
+  </td>
+  <td>latest</td>
+  <td>batch query</td>
+  <td>The end point when a batch query is ended, a json string specifying an ending timestamp for each TopicPartition.
+  The returned offset for each partition is the earliest offset whose timestamp is greater than or equal to
+  the given timestamp in the corresponding partition. If the matched offset doesn't exist, the offset will
+  be set to latest.<p/>
+  <p/>
+  Spark simply passes the timestamp information to <code>KafkaConsumer.offsetsForTimes</code>, and doesn't interpret or reason about the value. <p/>
+  For more details on <code>KafkaConsumer.offsetsForTimes</code>, please refer <a href="https://kafka.apache.org/21/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#offsetsForTimes-java.util.Map-">javadoc</a> for details.<p/>
+  Also the meaning of <code>timestamp</code> here can be vary according to Kafka configuration (<code>log.message.timestamp.type</code>): please refer <a href="https://kafka.apache.org/documentation/">Kafka documentation</a> for further details.<p/>
+  Note: This option requires Kafka 0.10.1.0 or higher.<p/>
+  Note2: <code>endingOffsetsByTimestamp</code> takes precedence over <code>endingOffsets</code>.
+  </td>
+</tr>
 <tr>
   <td>endingOffsets</td>
   <td>latest or json string
@@ -396,8 +473,8 @@ The following configurations are optional:
   <td>Desired minimum number of partitions to read from Kafka.
   By default, Spark has a 1-1 mapping of topicPartitions to Spark partitions consuming from Kafka.
   If you set this option to a value greater than your topicPartitions, Spark will divvy up large
-  Kafka partitions to smaller pieces. Please note that this configuration is like a `hint`: the
-  number of Spark tasks will be **approximately** `minPartitions`. It can be less or more depending on
+  Kafka partitions to smaller pieces. Please note that this configuration is like a <code>hint</code>: the
+  number of Spark tasks will be <strong>approximately</strong> <code>minPartitions</code>. It can be less or more depending on
   rounding errors or Kafka partitions that didn't receive any new data.</td>
 </tr>
 <tr>
@@ -405,7 +482,7 @@ The following configurations are optional:
   <td>string</td>
   <td>spark-kafka-source</td>
   <td>streaming and batch</td>
-  <td>Prefix of consumer group identifiers (`group.id`) that are generated by structured streaming
+  <td>Prefix of consumer group identifiers (<code>group.id</code>) that are generated by structured streaming
   queries. If "kafka.group.id" is set, this option will be ignored.</td>
 </tr>
 <tr>
@@ -425,6 +502,13 @@ The following configurations are optional:
   issues, set the Kafka consumer session timeout (by setting option "kafka.session.timeout.ms") to
   be very small. When this is set, option "groupIdPrefix" will be ignored.</td>
 </tr>
+<tr>
+  <td>includeHeaders</td>
+  <td>boolean</td>
+  <td>false</td>
+  <td>streaming and batch</td>
+  <td>Whether to include the Kafka headers in the row.</td>
+</tr>
 </table>
 
 ### Consumer Caching
@@ -455,7 +539,7 @@ The following properties are available to configure the consumer pool:
 <tr>
   <td>spark.kafka.consumer.cache.evictorThreadRunInterval</td>
   <td>The interval of time between runs of the idle evictor thread for consumer pool. When non-positive, no idle evictor thread will be run.</td>
-  <td>1m (1 minutes)</td>
+  <td>1m (1 minute)</td>
 </tr>
 <tr>
   <td>spark.kafka.consumer.cache.jmx.enable</td>
@@ -496,7 +580,7 @@ The following properties are available to configure the fetched data pool:
 <tr>
   <td>spark.kafka.consumer.fetchedData.cache.evictorThreadRunInterval</td>
   <td>The interval of time between runs of the idle evictor thread for fetched data pool. When non-positive, no idle evictor thread will be run.</td>
-  <td>1m (1 minutes)</td>
+  <td>1m (1 minute)</td>
 </tr>
 </table>
 
@@ -522,10 +606,18 @@ The Dataframe being written to Kafka should have the following columns in schema
   <td>value (required)</td>
   <td>string or binary</td>
 </tr>
+<tr>
+  <td>headers (optional)</td>
+  <td>array</td>
+</tr>
 <tr>
   <td>topic (*optional)</td>
   <td>string</td>
 </tr>
+<tr>
+  <td>partition (optional)</td>
+  <td>int</td>
+</tr>
 </table>
 \* The topic column is required if the "topic" configuration option is not specified.<br>
 
@@ -534,6 +626,12 @@ a ```null``` valued key column will be automatically added (see Kafka semantics
 how ```null``` valued key values are handled). If a topic column exists then its value
 is used as the topic when writing the given row to Kafka, unless the "topic" configuration
 option is set i.e., the "topic" configuration option overrides the topic column.
+If a "partition" column is not specified (or its value is ```null```) 
+then the partition is calculated by the Kafka producer.
+A Kafka partitioner can be specified in Spark by setting the
+```kafka.partitioner.class``` option. If not present, Kafka default partitioner
+will be used.
+
 
 The following options must be set for the Kafka sink
 for both batch and streaming queries.
@@ -559,6 +657,13 @@ The following configurations are optional:
   <td>Sets the topic that all rows will be written to in Kafka. This option overrides any
   topic column that may exist in the data.</td>
 </tr>
+<tr>
+  <td>includeHeaders</td>
+  <td>boolean</td>
+  <td>false</td>
+  <td>streaming and batch</td>
+  <td>Whether to include the Kafka headers in the row.</td>
+</tr>
 </table>
 
 ### Creating a Kafka Sink for Streaming Queries
@@ -697,6 +802,34 @@ df.selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)") \
 </div>
 </div>
 
+### Producer Caching
+
+Given Kafka producer instance is designed to be thread-safe, Spark initializes a Kafka producer instance and co-use across tasks for same caching key.
+
+The caching key is built up from the following information:
+
+* Kafka producer configuration
+
+This includes configuration for authorization, which Spark will automatically include when delegation token is being used. Even we take authorization into account, you can expect same Kafka producer instance will be used among same Kafka producer configuration.
+It will use different Kafka producer when delegation token is renewed; Kafka producer instance for old delegation token will be evicted according to the cache policy.
+
+The following properties are available to configure the producer pool:
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+<tr>
+  <td>spark.kafka.producer.cache.timeout</td>
+  <td>The minimum amount of time a producer may sit idle in the pool before it is eligible for eviction by the evictor.</td>
+  <td>10m (10 minutes)</td>
+</tr>
+<tr>
+  <td>spark.kafka.producer.cache.evictorThreadRunInterval</td>
+  <td>The interval of time between runs of the idle evictor thread for producer pool. When non-positive, no idle evictor thread will be run.</td>
+  <td>1m (1 minute)</td>
+</tr>
+</table>
+
+Idle eviction thread periodically removes producers which are not used longer than given timeout. Note that the producer is shared and used concurrently, so the last used timestamp is determined by the moment the producer instance is returned and reference count is 0.
 
 ## Kafka Specific Configurations
 
@@ -825,7 +958,9 @@ Delegation tokens can be obtained from multiple clusters and <code>${cluster}</c
     <td><code>spark.kafka.clusters.${cluster}.security.protocol</code></td>
     <td>SASL_SSL</td>
     <td>
-      Protocol used to communicate with brokers. For further details please see Kafka documentation. Only used to obtain delegation token.
+      Protocol used to communicate with brokers. For further details please see Kafka documentation. Protocol is applied on all the sources and sinks as default where
+      <code>bootstrap.servers</code> config matches (for further details please see <code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code>),
+      and can be overridden by setting <code>kafka.security.protocol</code> on the source or sink.
     </td>
   </tr>
   <tr>
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index deaf262c5f572..9f4edd73c2b65 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -498,13 +498,13 @@ to track the read position in the stream. The engine uses checkpointing and writ
 
 # API using Datasets and DataFrames
 Since Spark 2.0, DataFrames and Datasets can represent static, bounded data, as well as streaming, unbounded data. Similar to static Datasets/DataFrames, you can use the common entry point `SparkSession`
-([Scala](api/scala/index.html#org.apache.spark.sql.SparkSession)/[Java](api/java/org/apache/spark/sql/SparkSession.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.SparkSession)/[R](api/R/sparkR.session.html) docs)
+([Scala](api/scala/org/apache/spark/sql/SparkSession.html)/[Java](api/java/org/apache/spark/sql/SparkSession.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.SparkSession)/[R](api/R/sparkR.session.html) docs)
 to create streaming DataFrames/Datasets from streaming sources, and apply the same operations on them as static DataFrames/Datasets. If you are not familiar with Datasets/DataFrames, you are strongly advised to familiarize yourself with them using the
 [DataFrame/Dataset Programming Guide](sql-programming-guide.html).
 
 ## Creating streaming DataFrames and streaming Datasets
 Streaming DataFrames can be created through the `DataStreamReader` interface
-([Scala](api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader) docs)
+([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamReader.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader) docs)
 returned by `SparkSession.readStream()`. In [R](api/R/read.stream.html), with the `read.stream()` method. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc.
 
 #### Input Sources
@@ -546,9 +546,18 @@ Here are the details of all the sources in Spark.
         "s3://a/dataset.txt"<br/>
         "s3n://a/b/dataset.txt"<br/>
         "s3a://a/b/c/dataset.txt"<br/>
+        <code>cleanSource</code>: option to clean up completed files after processing.<br/>
+        Available options are "archive", "delete", "off". If the option is not provided, the default value is "off".<br/>
+        When "archive" is provided, additional option <code>sourceArchiveDir</code> must be provided as well. The value of "sourceArchiveDir" must not match with source pattern in depth (the number of directories from the root directory), where the depth is minimum of depth on both paths. This will ensure archived files are never included as new source files.<br/>
+        For example, suppose you provide '/hello?/spark/*' as source pattern, '/hello1/spark/archive/dir' cannot be used as the value of "sourceArchiveDir", as '/hello?/spark/*' and '/hello1/spark/archive' will be matched. '/hello1/spark' cannot be also used as the value of "sourceArchiveDir", as '/hello?/spark' and '/hello1/spark' will be matched. '/archived/here' would be OK as it doesn't match.<br/>
+        Spark will move source files respecting their own path. For example, if the path of source file is <code>/a/b/dataset.txt</code> and the path of archive directory is <code>/archived/here</code>, file will be moved to <code>/archived/here/a/b/dataset.txt</code>.<br/>
+        NOTE: Both archiving (via moving) or deleting completed files will introduce overhead (slow down, even if it's happening in separate thread) in each micro-batch, so you need to understand the cost for each operation in your file system before enabling this option. On the other hand, enabling this option will reduce the cost to list source files which can be an expensive operation.<br/>
+        Number of threads used in completed file cleaner can be configured with<code>spark.sql.streaming.fileSource.cleaner.numThreads</code> (default: 1).<br/>
+        NOTE 2: The source path should not be used from multiple sources or queries when enabling this option. Similarly, you must ensure the source path doesn't match to any files in output directory of file stream sink.<br/>
+        NOTE 3: Both delete and move actions are best effort. Failing to delete or move files will not fail the streaming query. Spark may not clean up some source files in some circumstances - e.g. the application doesn't shut down gracefully, too many files are queued to clean up.
         <br/><br/>
         For file-format-specific options, see the related methods in <code>DataStreamReader</code>
-        (<a href="api/scala/index.html#org.apache.spark.sql.streaming.DataStreamReader">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader">Python</a>/<a
+        (<a href="api/scala/org/apache/spark/sql/streaming/DataStreamReader.html">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamReader">Python</a>/<a
         href="api/R/read.stream.html">R</a>).
         E.g. for "parquet" format options see <code>DataStreamReader.parquet()</code>.
         <br/><br/>
@@ -973,8 +982,8 @@ Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp
 Dataset<Row> windowedCounts = words
     .withWatermark("timestamp", "10 minutes")
     .groupBy(
-        functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
-        words.col("word"))
+        window(col("timestamp"), "10 minutes", "5 minutes"),
+        col("word"))
     .count();
 {% endhighlight %}
 
@@ -1505,7 +1514,6 @@ Additional details on supported joins:
 
   - Cannot use mapGroupsWithState and flatMapGroupsWithState in Update mode before joins.
 
-
 ### Streaming Deduplication
 You can deduplicate records in data streams using a unique identifier in the events. This is exactly same as deduplication on static using a unique identifier column. The query will store the necessary amount of data from previous records such that it can filter duplicate records. Similar to aggregations, you can use deduplication with or without watermarking.
 
@@ -1614,7 +1622,9 @@ However, as a side effect, data from the slower streams will be aggressively dro
 this configuration judiciously.
 
 ### Arbitrary Stateful Operations
-Many usecases require more advanced stateful operations than aggregations. For example, in many usecases, you have to track sessions from data streams of events. For doing such sessionization, you will have to save arbitrary types of data as state, and perform arbitrary operations on the state using the data stream events in every trigger. Since Spark 2.2, this can be done using the operation `mapGroupsWithState` and the more powerful operation `flatMapGroupsWithState`. Both operations allow you to apply user-defined code on grouped Datasets to update user-defined state. For more concrete details, take a look at the API documentation ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.GroupState)/[Java](api/java/org/apache/spark/sql/streaming/GroupState.html)) and the examples ([Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java)).
+Many usecases require more advanced stateful operations than aggregations. For example, in many usecases, you have to track sessions from data streams of events. For doing such sessionization, you will have to save arbitrary types of data as state, and perform arbitrary operations on the state using the data stream events in every trigger. Since Spark 2.2, this can be done using the operation `mapGroupsWithState` and the more powerful operation `flatMapGroupsWithState`. Both operations allow you to apply user-defined code on grouped Datasets to update user-defined state. For more concrete details, take a look at the API documentation ([Scala](api/scala/org/apache/spark/sql/streaming/GroupState.html)/[Java](api/java/org/apache/spark/sql/streaming/GroupState.html)) and the examples ([Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java)).
+
+Though Spark cannot check and force it, the state function should be implemented with respect to the semantics of the output mode. For example, in Update mode Spark doesn't expect that the state function will emit rows which are older than current watermark plus allowed late record delay, whereas in Append mode the state function can emit these rows.
 
 ### Unsupported Operations
 There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets. 
@@ -1647,9 +1657,29 @@ For example, sorting on the input stream is not supported, as it requires keepin
 track of all the data received in the stream. This is therefore fundamentally hard to execute 
 efficiently.
 
+### Limitation of global watermark
+
+In Append mode, if a stateful operation emits rows older than current watermark plus allowed late record delay,
+they will be "late rows" in downstream stateful operations (as Spark uses global watermark). Note that these rows may be discarded.
+This is a limitation of a global watermark, and it could potentially cause a correctness issue.
+
+Spark will check the logical plan of query and log a warning when Spark detects such a pattern.
+
+Any of the stateful operation(s) after any of below stateful operations can have this issue:
+
+* streaming aggregation in Append mode
+* stream-stream outer join
+* `mapGroupsWithState` and `flatMapGroupsWithState` in Append mode (depending on the implementation of the state function)
+
+As Spark cannot check the state function of `mapGroupsWithState`/`flatMapGroupsWithState`, Spark assumes that the state function
+emits late rows if the operator uses Append mode.
+
+There's a known workaround: split your streaming query into multiple queries per stateful operator, and ensure
+end-to-end exactly once per query. Ensuring end-to-end exactly once for the last query is optional.
+
 ## Starting Streaming Queries
 Once you have defined the final result DataFrame/Dataset, all that is left is for you to start the streaming computation. To do that, you have to use the `DataStreamWriter`
-([Scala](api/scala/index.html#org.apache.spark.sql.streaming.DataStreamWriter)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamWriter) docs)
+([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamWriter) docs)
 returned through `Dataset.writeStream()`. You will have to specify one or more of the following in this interface.
 
 - *Details of the output sink:* Data format, location, etc.
@@ -1696,7 +1726,7 @@ Here is the compatibility matrix.
     <td style="vertical-align: middle;">Append, Update, Complete</td>
     <td>
         Append mode uses watermark to drop old aggregation state. But the output of a 
-        windowed aggregation is delayed the late threshold specified in `withWatermark()` as by
+        windowed aggregation is delayed the late threshold specified in <code>withWatermark()</code> as by
         the modes semantics, rows can be added to the Result Table only once after they are 
         finalized (i.e. after watermark is crossed). See the
         <a href="#handling-late-data-and-watermarking">Late Data</a> section for more details.
@@ -1833,7 +1863,7 @@ Here are the details of all the sinks in Spark.
         <code>path</code>: path to the output directory, must be specified.
         <br/><br/>
         For file-format-specific options, see the related methods in DataFrameWriter
-        (<a href="api/scala/index.html#org.apache.spark.sql.DataFrameWriter">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.DataFrameWriter">Python</a>/<a
+        (<a href="api/scala/org/apache/spark/sql/DataFrameWriter.html">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.DataFrameWriter">Python</a>/<a
         href="api/R/write.stream.html">R</a>).
         E.g. for "parquet" format options see <code>DataFrameWriter.parquet()</code>
     </td>
@@ -2145,7 +2175,7 @@ Since Spark 2.4, `foreach` is available in Scala, Java and Python.
 <div class="codetabs">
 <div data-lang="scala"  markdown="1">
 
-In Scala, you have to extend the class `ForeachWriter` ([docs](api/scala/index.html#org.apache.spark.sql.ForeachWriter)).
+In Scala, you have to extend the class `ForeachWriter` ([docs](api/scala/org/apache/spark/sql/ForeachWriter.html)).
 
 {% highlight scala %}
 streamingDatasetOfString.writeStream.foreach(
@@ -2303,7 +2333,7 @@ Here are the different kinds of triggers that are supported.
   <tr>
     <td><b>One-time micro-batch</b></td>
     <td>
-        The query will execute *only one* micro-batch to process all the available data and then
+        The query will execute <strong>only one</strong> micro-batch to process all the available data and then
         stop on its own. This is useful in scenarios you want to periodically spin up a cluster,
         process everything that is available since the last period, and then shutdown the
         cluster. In some case, this may lead to significant cost savings.
@@ -2534,7 +2564,7 @@ lastProgress(query)       # the most recent progress update of this streaming qu
 </div>
 
 You can start any number of queries in a single SparkSession. They will all be running concurrently sharing the cluster resources. You can use `sparkSession.streams()` to get the `StreamingQueryManager`
-([Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryManager)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryManager.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.StreamingQueryManager) docs)
+([Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryManager.html)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryManager.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.StreamingQueryManager) docs)
 that can be used to manage the currently active queries.
 
 <div class="codetabs">
@@ -2594,7 +2624,7 @@ There are multiple ways to monitor active streaming queries. You can either push
 You can directly get the current status and metrics of an active query using 
 `streamingQuery.lastProgress()` and `streamingQuery.status()`. 
 `lastProgress()` returns a `StreamingQueryProgress` object 
-in [Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryProgress) 
+in [Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryProgress.html) 
 and [Java](api/java/org/apache/spark/sql/streaming/StreamingQueryProgress.html)
 and a dictionary with the same fields in Python. It has all the information about
 the progress made in the last trigger of the stream - what data was processed, 
@@ -2602,7 +2632,7 @@ what were the processing rates, latencies, etc. There is also
 `streamingQuery.recentProgress` which returns an array of last few progresses.  
 
 In addition, `streamingQuery.status()` returns a `StreamingQueryStatus` object 
-in [Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryStatus) 
+in [Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.html) 
 and [Java](api/java/org/apache/spark/sql/streaming/StreamingQueryStatus.html)
 and a dictionary with the same fields in Python. It gives information about
 what the query is immediately doing - is a trigger active, is data being processed, etc.
@@ -2823,7 +2853,7 @@ Will print something like the following.
 
 You can also asynchronously monitor all queries associated with a
 `SparkSession` by attaching a `StreamingQueryListener`
-([Scala](api/scala/index.html#org.apache.spark.sql.streaming.StreamingQueryListener)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryListener.html) docs).
+([Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryListener.html)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryListener.html) docs).
 Once you attach your custom `StreamingQueryListener` object with
 `sparkSession.streams.attachListener()`, you will get callbacks when a query is started and
 stopped and when there is progress made in an active query. Here is an example,
diff --git a/docs/tuning.md b/docs/tuning.md
index 1faf7cfe0d68e..8e29e5d2e9e74 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -260,7 +260,7 @@ enough. Spark automatically sets the number of "map" tasks to run on each file a
 (though you can control it through optional parameters to `SparkContext.textFile`, etc), and for
 distributed "reduce" operations, such as `groupByKey` and `reduceByKey`, it uses the largest
 parent RDD's number of partitions. You can pass the level of parallelism as a second argument
-(see the [`spark.PairRDDFunctions`](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions) documentation),
+(see the [`spark.PairRDDFunctions`](api/scala/org/apache/spark/rdd/PairRDDFunctions.html) documentation),
 or set the config property `spark.default.parallelism` to change the default.
 In general, we recommend 2-3 tasks per CPU core in your cluster.
 
diff --git a/docs/web-ui.md b/docs/web-ui.md
index 9b22926016c77..e28a689c8de50 100644
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -73,7 +73,8 @@ This page displays the details of a specific job identified by its job ID.
 </p>
 
 * DAG visualization: Visual representation of the directed acyclic graph of this job where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied on RDD.
-
+* An example of DAG visualization for `sc.parallelize(1 to 100).toDF.count()`
+ 
 <p style="text-align: center;">
   <img src="img/JobPageDetail2.png" title="DAG" alt="DAG" width="40%">
 </p>
@@ -124,6 +125,8 @@ The stage detail page begins with information like total time across all tasks,
 </p>
 
 There is also a visual representation of the directed acyclic graph (DAG) of this stage, where vertices represent the RDDs or DataFrames and the edges represent an operation to be applied.
+Nodes are grouped by operation scope in the DAG visualization and labelled with the operation scope name (BatchScan, WholeStageCodegen, Exchange, etc).
+Notably, Whole Stage Code Generation operations are also annotated with the code generation id. For stages belonging to Spark DataFrame or SQL execution, this allows to cross-reference Stage execution details to the relevant details in the Web-UI SQL Tab page where SQL plan graphs and execution plans are reported.
 
 <p style="text-align: center;">
   <img src="img/AllStagesPageDetail5.png" title="Stage DAG" alt="Stage DAG" width="50%">
@@ -336,7 +339,7 @@ scala> spark.sql("select name,sum(count) from global_temp.df group by name").sho
 </p>
 
 Now the above three dataframe/SQL operators are shown in the list. If we click the
-'show at \<console\>: 24' link of the last query, we will see the DAG of the job.
+'show at \<console\>: 24' link of the last query, we will see the DAG and details of the query execution.
 
 <p style="text-align: center;">
   <img src="img/webui-sql-dag.png"
@@ -346,10 +349,12 @@ Now the above three dataframe/SQL operators are shown in the list. If we click t
   <!-- Images are downsized intentionally to improve quality on retina displays -->
 </p>
 
-We can see that details information of each stage. The first block 'WholeStageCodegen'  
-compile multiple operator ('LocalTableScan' and 'HashAggregate') together into a single Java
-function to improve performance, and metrics like number of rows and spill size are listed in
-the block. The second block 'Exchange' shows the metrics on the shuffle exchange, including
+The query details page displays information about the query execution time, its duration,
+the list of associated jobs, and the query execution DAG.
+The first block 'WholeStageCodegen (1)' compiles multiple operators ('LocalTableScan' and 'HashAggregate') together into a single Java
+function to improve performance, and metrics like number of rows and spill size are listed in the block.
+The annotation '(1)' in the block name is the code generation id.
+The second block 'Exchange' shows the metrics on the shuffle exchange, including
 number of written shuffle records, total data size, etc.
 
 
@@ -362,10 +367,88 @@ number of written shuffle records, total data size, etc.
 </p>
 Clicking the 'Details' link on the bottom displays the logical plans and the physical plan, which
 illustrate how Spark parses, analyzes, optimizes and performs the query.
-
+Steps in the physical plan subject to whole stage code generation optimization, are prefixed by a star followed by
+the code generation id, for example: '*(1) LocalTableScan'
+
+### SQL metrics
+
+The metrics of SQL operators are shown in the block of physical operators. The SQL metrics can be useful
+when we want to dive into the execution details of each operator. For example, "number of output rows"
+can answer how many rows are output after a Filter operator, "shuffle bytes written total" in an Exchange
+operator shows the number of bytes written by a shuffle.
+
+Here is the list of SQL metrics:
+
+<table class="table">
+<tr><th>SQL metrics</th><th>Meaning</th><th>Operators</th></tr>
+<tr><td> <code>number of output rows</code> </td><td> the number of output rows of the operator </td><td> Aggregate operators, Join operators, Sample, Range, Scan operators, Filter, etc.</td></tr>
+<tr><td> <code>data size</code> </td><td> the size of broadcast/shuffled/collected data of the operator </td><td> BroadcastExchange, ShuffleExchange, Subquery </td></tr>
+<tr><td> <code>time to collect</code> </td><td> the time spent on collecting data </td><td> BroadcastExchange, Subquery </td></tr>
+<tr><td> <code>scan time</code> </td><td> the time spent on scanning data </td><td> ColumnarBatchScan, FileSourceScan </td></tr>
+<tr><td> <code>metadata time</code> </td><td> the time spent on getting metadata like number of partitions, number of files </td><td> FileSourceScan </td></tr>
+<tr><td> <code>shuffle bytes written</code> </td><td> the number of bytes written </td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange </td></tr>
+<tr><td> <code>shuffle records written</code> </td><td> the number of records written </td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange </td></tr>
+<tr><td> <code>shuffle write time</code> </td><td> the time spent on shuffle writing </td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange </td></tr>
+<tr><td> <code>remote blocks read</code> </td><td> the number of blocks read remotely </td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange</td></tr>
+<tr><td> <code>remote bytes read</code> </td><td> the number of bytes read remotely </td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange </td></tr>
+<tr><td> <code>remote bytes read to disk</code> </td><td> the number of bytes read from remote to local disk </td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange </td></tr>
+<tr><td> <code>local blocks read</code> </td><td> the number of blocks read locally </td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange </td></tr>
+<tr><td> <code>local bytes read</code> </td><td> the number of bytes read locally </td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange </td></tr>
+<tr><td> <code>fetch wait time</code> </td><td> the time spent on fetching data (local and remote)</td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange </td></tr>
+<tr><td> <code>records read</code> </td><td> the number of read records </td><td> CollectLimit, TakeOrderedAndProject, ShuffleExchange </td></tr>
+<tr><td> <code>sort time</code> </td><td> the time spent on sorting </td><td> Sort </td></tr>
+<tr><td> <code>peak memory</code> </td><td> the peak memory usage in the operator </td><td> Sort, HashAggregate </td></tr>
+<tr><td> <code>spill size</code> </td><td> number of bytes spilled to disk from memory in the operator </td><td> Sort, HashAggregate </td></tr>
+<tr><td> <code>time in aggregation build</code> </td><td> the time spent on aggregation </td><td> HashAggregate, ObjectHashAggregate </td></tr>
+<tr><td> <code>avg hash probe bucket list iters</code> </td><td> the average bucket list iterations per lookup during aggregation </td><td> HashAggregate </td></tr>
+<tr><td> <code>data size of build side</code> </td><td> the size of built hash map </td><td> ShuffledHashJoin </td></tr>
+<tr><td> <code>time to build hash map</code> </td><td> the time spent on building hash map </td><td> ShuffledHashJoin </td></tr>
+
+</table>
 
 ## Streaming Tab
 The web UI includes a Streaming tab if the application uses Spark streaming. This tab displays
 scheduling delay and processing time for each micro-batch in the data stream, which can be useful
 for troubleshooting the streaming application.
 
+## JDBC/ODBC Server Tab
+We can see this tab when Spark is running as a [distributed SQL engine](sql-distributed-sql-engine.html). It shows information about sessions and submitted SQL operations.
+
+The first section of the page displays general information about the JDBC/ODBC server: start time and uptime.
+
+<p style="text-align: center;">
+  <img src="img/JDBCServer1.png" width="40%" title="JDBC/ODBC Header" alt="JDBC/ODBC Header">
+</p>
+
+The second section contains information about active and finished sessions.
+* **User** and **IP** of the connection.
+* **Session id** link to access to session info.
+* **Start time**, **finish time** and **duration** of the session.
+* **Total execute** is the number of operations submitted in this session.
+
+<p style="text-align: center;">
+  <img src="img/JDBCServer2.png" title="JDBC/ODBC sessions" alt="JDBC/ODBC sessions">
+</p>
+
+The third section has the SQL statistics of the submitted operations.
+* **User** that submit the operation.
+* **Job id** link to [jobs tab](web-ui.html#jobs-tab).
+* **Group id** of the query that group all jobs together. An application can cancel all running jobs using this group id.
+* **Start time** of the operation.
+* **Finish time** of the execution, before fetching the results.
+* **Close time** of the operation after fetching the results.
+* **Execution time** is the difference between finish time and start time.
+* **Duration time** is the difference between close time and start time.
+* **Statement** is the operation being executed.
+* **State** of the process.
+	* _Started_, first state, when the process begins.
+	* _Compiled_, execution plan generated.
+	* _Failed_, final state when the execution failed or finished with error.
+	* _Canceled_, final state when the execution is canceled.
+	* _Finished_ processing and waiting to fetch results.
+	* _Closed_, final state when client closed the statement.
+* **Detail** of the execution plan with parsed logical plan, analyzed logical plan, optimized logical plan and physical plan or errors in the SQL statement.
+
+<p style="text-align: center;">
+  <img src="img/JDBCServer3.png" title="JDBC/ODBC SQL Statistics" alt="JDBC/ODBC SQL Statistics">
+</p>
diff --git a/examples/pom.xml b/examples/pom.xml
index ac148ef4c9c01..a099f1e042e99 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -107,7 +107,7 @@
     <dependency>
       <groupId>com.github.scopt</groupId>
       <artifactId>scopt_${scala.binary.version}</artifactId>
-      <version>3.7.0</version>
+      <version>3.7.1</version>
     </dependency>
     <dependency>
       <groupId>${hive.parquet.group}</groupId>
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaFMClassifierExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaFMClassifierExample.java
new file mode 100644
index 0000000000000..2f11082e0dd69
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaFMClassifierExample.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.classification.FMClassificationModel;
+import org.apache.spark.ml.classification.FMClassifier;
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator;
+import org.apache.spark.ml.feature.*;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaFMClassifierExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+        .builder()
+        .appName("JavaFMClassifierExample")
+        .getOrCreate();
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    Dataset<Row> data = spark
+        .read()
+        .format("libsvm")
+        .load("data/mllib/sample_libsvm_data.txt");
+
+    // Index labels, adding metadata to the label column.
+    // Fit on whole dataset to include all labels in index.
+    StringIndexerModel labelIndexer = new StringIndexer()
+        .setInputCol("label")
+        .setOutputCol("indexedLabel")
+        .fit(data);
+    // Scale features.
+    MinMaxScalerModel featureScaler = new MinMaxScaler()
+        .setInputCol("features")
+        .setOutputCol("scaledFeatures")
+        .fit(data);
+
+    // Split the data into training and test sets (30% held out for testing)
+    Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
+    Dataset<Row> trainingData = splits[0];
+    Dataset<Row> testData = splits[1];
+
+    // Train a FM model.
+    FMClassifier fm = new FMClassifier()
+        .setLabelCol("indexedLabel")
+        .setFeaturesCol("scaledFeatures")
+        .setStepSize(0.001);
+
+    // Convert indexed labels back to original labels.
+    IndexToString labelConverter = new IndexToString()
+        .setInputCol("prediction")
+        .setOutputCol("predictedLabel")
+        .setLabels(labelIndexer.labelsArray()[0]);
+
+    // Create a Pipeline.
+    Pipeline pipeline = new Pipeline()
+        .setStages(new PipelineStage[] {labelIndexer, featureScaler, fm, labelConverter});
+
+    // Train model.
+    PipelineModel model = pipeline.fit(trainingData);
+
+    // Make predictions.
+    Dataset<Row> predictions = model.transform(testData);
+
+    // Select example rows to display.
+    predictions.select("predictedLabel", "label", "features").show(5);
+
+    // Select (prediction, true label) and compute test accuracy.
+    MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
+        .setLabelCol("indexedLabel")
+        .setPredictionCol("prediction")
+        .setMetricName("accuracy");
+    double accuracy = evaluator.evaluate(predictions);
+    System.out.println("Test Accuracy = " + accuracy);
+
+    FMClassificationModel fmModel = (FMClassificationModel)(model.stages()[2]);
+    System.out.println("Factors: " + fmModel.factors());
+    System.out.println("Linear: " + fmModel.linear());
+    System.out.println("Intercept: " + fmModel.intercept());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaFMRegressorExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaFMRegressorExample.java
new file mode 100644
index 0000000000000..3ade40f3d39c2
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaFMRegressorExample.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+// $example on$
+import org.apache.spark.ml.Pipeline;
+import org.apache.spark.ml.PipelineModel;
+import org.apache.spark.ml.PipelineStage;
+import org.apache.spark.ml.evaluation.RegressionEvaluator;
+import org.apache.spark.ml.feature.MinMaxScaler;
+import org.apache.spark.ml.feature.MinMaxScalerModel;
+import org.apache.spark.ml.regression.FMRegressionModel;
+import org.apache.spark.ml.regression.FMRegressor;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+// $example off$
+
+public class JavaFMRegressorExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+        .builder()
+        .appName("JavaFMRegressorExample")
+        .getOrCreate();
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    Dataset<Row> data = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
+
+    // Scale features.
+    MinMaxScalerModel featureScaler = new MinMaxScaler()
+        .setInputCol("features")
+        .setOutputCol("scaledFeatures")
+        .fit(data);
+
+    // Split the data into training and test sets (30% held out for testing).
+    Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3});
+    Dataset<Row> trainingData = splits[0];
+    Dataset<Row> testData = splits[1];
+
+    // Train a FM model.
+    FMRegressor fm = new FMRegressor()
+        .setLabelCol("label")
+        .setFeaturesCol("scaledFeatures")
+        .setStepSize(0.001);
+
+    // Create a Pipeline.
+    Pipeline pipeline = new Pipeline().setStages(new PipelineStage[] {featureScaler, fm});
+
+    // Train model.
+    PipelineModel model = pipeline.fit(trainingData);
+
+    // Make predictions.
+    Dataset<Row> predictions = model.transform(testData);
+
+    // Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5);
+
+    // Select (prediction, true label) and compute test error.
+    RegressionEvaluator evaluator = new RegressionEvaluator()
+        .setLabelCol("label")
+        .setPredictionCol("prediction")
+        .setMetricName("rmse");
+    double rmse = evaluator.evaluate(predictions);
+    System.out.println("Root Mean Squared Error (RMSE) on test data = " + rmse);
+
+    FMRegressionModel fmModel = (FMRegressionModel)(model.stages()[1]);
+    System.out.println("Factors: " + fmModel.factors());
+    System.out.println("Linear: " + fmModel.linear());
+    System.out.println("Intercept: " + fmModel.intercept());
+    // $example off$
+
+    spark.stop();
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java
deleted file mode 100644
index 324a781c1a44a..0000000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-
-// $example on$
-import scala.Tuple2;
-
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.regression.LinearRegressionModel;
-import org.apache.spark.mllib.regression.LinearRegressionWithSGD;
-// $example off$
-
-/**
- * Example for LinearRegressionWithSGD.
- */
-public class JavaLinearRegressionWithSGDExample {
-  public static void main(String[] args) {
-    SparkConf conf = new SparkConf().setAppName("JavaLinearRegressionWithSGDExample");
-    JavaSparkContext sc = new JavaSparkContext(conf);
-
-    // $example on$
-    // Load and parse the data
-    String path = "data/mllib/ridge-data/lpsa.data";
-    JavaRDD<String> data = sc.textFile(path);
-    JavaRDD<LabeledPoint> parsedData = data.map(line -> {
-      String[] parts = line.split(",");
-      String[] features = parts[1].split(" ");
-      double[] v = new double[features.length];
-      for (int i = 0; i < features.length - 1; i++) {
-        v[i] = Double.parseDouble(features[i]);
-      }
-      return new LabeledPoint(Double.parseDouble(parts[0]), Vectors.dense(v));
-    });
-    parsedData.cache();
-
-    // Building the model
-    int numIterations = 100;
-    double stepSize = 0.00000001;
-    LinearRegressionModel model =
-      LinearRegressionWithSGD.train(JavaRDD.toRDD(parsedData), numIterations, stepSize);
-
-    // Evaluate model on training examples and compute training error
-    JavaPairRDD<Double, Double> valuesAndPreds = parsedData.mapToPair(point ->
-      new Tuple2<>(model.predict(point.features()), point.label()));
-
-    double MSE = valuesAndPreds.mapToDouble(pair -> {
-      double diff = pair._1() - pair._2();
-      return diff * diff;
-    }).mean();
-    System.out.println("training Mean Squared Error = " + MSE);
-
-    // Save and load model
-    model.save(sc.sc(), "target/tmp/javaLinearRegressionWithSGDModel");
-    LinearRegressionModel sameModel = LinearRegressionModel.load(sc.sc(),
-      "target/tmp/javaLinearRegressionWithSGDModel");
-    // $example off$
-
-    sc.stop();
-  }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
deleted file mode 100644
index 00033b5730a3d..0000000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-// $example on$
-import scala.Tuple2;
-
-import org.apache.spark.api.java.*;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.regression.LinearRegressionModel;
-import org.apache.spark.mllib.regression.LinearRegressionWithSGD;
-import org.apache.spark.mllib.evaluation.RegressionMetrics;
-import org.apache.spark.SparkConf;
-// $example off$
-
-public class JavaRegressionMetricsExample {
-  public static void main(String[] args) {
-    SparkConf conf = new SparkConf().setAppName("Java Regression Metrics Example");
-    JavaSparkContext sc = new JavaSparkContext(conf);
-    // $example on$
-    // Load and parse the data
-    String path = "data/mllib/sample_linear_regression_data.txt";
-    JavaRDD<String> data = sc.textFile(path);
-    JavaRDD<LabeledPoint> parsedData = data.map(line -> {
-      String[] parts = line.split(" ");
-      double[] v = new double[parts.length - 1];
-      for (int i = 1; i < parts.length; i++) {
-        v[i - 1] = Double.parseDouble(parts[i].split(":")[1]);
-      }
-      return new LabeledPoint(Double.parseDouble(parts[0]), Vectors.dense(v));
-    });
-    parsedData.cache();
-
-    // Building the model
-    int numIterations = 100;
-    LinearRegressionModel model = LinearRegressionWithSGD.train(JavaRDD.toRDD(parsedData),
-      numIterations);
-
-    // Evaluate model on training examples and compute training error
-    JavaPairRDD<Object, Object> valuesAndPreds = parsedData.mapToPair(point ->
-      new Tuple2<>(model.predict(point.features()), point.label()));
-
-    // Instantiate metrics object
-    RegressionMetrics metrics = new RegressionMetrics(valuesAndPreds.rdd());
-
-    // Squared error
-    System.out.format("MSE = %f\n", metrics.meanSquaredError());
-    System.out.format("RMSE = %f\n", metrics.rootMeanSquaredError());
-
-    // R-squared
-    System.out.format("R Squared = %f\n", metrics.r2());
-
-    // Mean absolute error
-    System.out.format("MAE = %f\n", metrics.meanAbsoluteError());
-
-    // Explained variance
-    System.out.format("Explained Variance = %f\n", metrics.explainedVariance());
-
-    // Save and load model
-    model.save(sc.sc(), "target/tmp/LogisticRegressionModel");
-    LinearRegressionModel sameModel = LinearRegressionModel.load(sc.sc(),
-      "target/tmp/LogisticRegressionModel");
-    // $example off$
-
-    sc.stop();
-  }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
index b2ce0bc08642a..2295225387a33 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
@@ -98,6 +98,7 @@ public static void main(String[] args) {
       .getOrCreate();
 
     runBasicDataSourceExample(spark);
+    runGenericFileSourceOptionsExample(spark);
     runBasicParquetExample(spark);
     runParquetSchemaMergingExample(spark);
     runJsonDatasetExample(spark);
@@ -106,6 +107,48 @@ public static void main(String[] args) {
     spark.stop();
   }
 
+  private static void runGenericFileSourceOptionsExample(SparkSession spark) {
+    // $example on:ignore_corrupt_files$
+    // enable ignore corrupt files
+    spark.sql("set spark.sql.files.ignoreCorruptFiles=true");
+    // dir1/file3.json is corrupt from parquet's view
+    Dataset<Row> testCorruptDF = spark.read().parquet(
+            "examples/src/main/resources/dir1/",
+            "examples/src/main/resources/dir1/dir2/");
+    testCorruptDF.show();
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // |file2.parquet|
+    // +-------------+
+    // $example off:ignore_corrupt_files$
+    // $example on:recursive_file_lookup$
+    Dataset<Row> recursiveLoadedDF = spark.read().format("parquet")
+            .option("recursiveFileLookup", "true")
+            .load("examples/src/main/resources/dir1");
+    recursiveLoadedDF.show();
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // |file2.parquet|
+    // +-------------+
+    // $example off:recursive_file_lookup$
+    spark.sql("set spark.sql.files.ignoreCorruptFiles=false");
+    // $example on:load_with_path_glob_filter$
+    Dataset<Row> testGlobFilterDF = spark.read().format("parquet")
+            .option("pathGlobFilter", "*.parquet") // json file should be filtered out
+            .load("examples/src/main/resources/dir1");
+    testGlobFilterDF.show();
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // +-------------+
+    // $example off:load_with_path_glob_filter$
+  }
+
   private static void runBasicDataSourceExample(SparkSession spark) {
     // $example on:generic_load_save_functions$
     Dataset<Row> usersDF = spark.read().load("examples/src/main/resources/users.parquet");
@@ -123,11 +166,6 @@ private static void runBasicDataSourceExample(SparkSession spark) {
       .option("header", "true")
       .load("examples/src/main/resources/people.csv");
     // $example off:manual_load_options_csv$
-    // $example on:load_with_path_glob_filter$
-    Dataset<Row> partitionedUsersDF = spark.read().format("orc")
-      .option("pathGlobFilter", "*.orc")
-      .load("examples/src/main/resources/partitioned_users.orc");
-    // $example off:load_with_path_glob_filter$
     // $example on:manual_save_options_orc$
     usersDF.write().format("orc")
       .option("orc.bloom.filter.columns", "favorite_color")
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
index 6da60a1fc6b88..d300018845add 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
@@ -17,81 +17,85 @@
 package org.apache.spark.examples.sql;
 
 // $example on:untyped_custom_aggregation$
-import java.util.ArrayList;
-import java.util.List;
+import java.io.Serializable;
 
 import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
-import org.apache.spark.sql.expressions.MutableAggregationBuffer;
-import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
-import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.expressions.Aggregator;
+import org.apache.spark.sql.functions;
 // $example off:untyped_custom_aggregation$
 
 public class JavaUserDefinedUntypedAggregation {
 
   // $example on:untyped_custom_aggregation$
-  public static class MyAverage extends UserDefinedAggregateFunction {
+  public static class Average implements Serializable  {
+    private long sum;
+    private long count;
 
-    private StructType inputSchema;
-    private StructType bufferSchema;
+    // Constructors, getters, setters...
+    // $example off:typed_custom_aggregation$
+    public Average() {
+    }
+
+    public Average(long sum, long count) {
+      this.sum = sum;
+      this.count = count;
+    }
 
-    public MyAverage() {
-      List<StructField> inputFields = new ArrayList<>();
-      inputFields.add(DataTypes.createStructField("inputColumn", DataTypes.LongType, true));
-      inputSchema = DataTypes.createStructType(inputFields);
+    public long getSum() {
+      return sum;
+    }
 
-      List<StructField> bufferFields = new ArrayList<>();
-      bufferFields.add(DataTypes.createStructField("sum", DataTypes.LongType, true));
-      bufferFields.add(DataTypes.createStructField("count", DataTypes.LongType, true));
-      bufferSchema = DataTypes.createStructType(bufferFields);
+    public void setSum(long sum) {
+      this.sum = sum;
     }
-    // Data types of input arguments of this aggregate function
-    public StructType inputSchema() {
-      return inputSchema;
+
+    public long getCount() {
+      return count;
     }
-    // Data types of values in the aggregation buffer
-    public StructType bufferSchema() {
-      return bufferSchema;
+
+    public void setCount(long count) {
+      this.count = count;
     }
-    // The data type of the returned value
-    public DataType dataType() {
-      return DataTypes.DoubleType;
+    // $example on:typed_custom_aggregation$
+  }
+
+  public static class MyAverage extends Aggregator<Long, Average, Double> {
+    // A zero value for this aggregation. Should satisfy the property that any b + zero = b
+    public Average zero() {
+      return new Average(0L, 0L);
     }
-    // Whether this function always returns the same output on the identical input
-    public boolean deterministic() {
-      return true;
+    // Combine two values to produce a new value. For performance, the function may modify `buffer`
+    // and return it instead of constructing a new object
+    public Average reduce(Average buffer, Long data) {
+      long newSum = buffer.getSum() + data;
+      long newCount = buffer.getCount() + 1;
+      buffer.setSum(newSum);
+      buffer.setCount(newCount);
+      return buffer;
     }
-    // Initializes the given aggregation buffer. The buffer itself is a `Row` that in addition to
-    // standard methods like retrieving a value at an index (e.g., get(), getBoolean()), provides
-    // the opportunity to update its values. Note that arrays and maps inside the buffer are still
-    // immutable.
-    public void initialize(MutableAggregationBuffer buffer) {
-      buffer.update(0, 0L);
-      buffer.update(1, 0L);
+    // Merge two intermediate values
+    public Average merge(Average b1, Average b2) {
+      long mergedSum = b1.getSum() + b2.getSum();
+      long mergedCount = b1.getCount() + b2.getCount();
+      b1.setSum(mergedSum);
+      b1.setCount(mergedCount);
+      return b1;
     }
-    // Updates the given aggregation buffer `buffer` with new input data from `input`
-    public void update(MutableAggregationBuffer buffer, Row input) {
-      if (!input.isNullAt(0)) {
-        long updatedSum = buffer.getLong(0) + input.getLong(0);
-        long updatedCount = buffer.getLong(1) + 1;
-        buffer.update(0, updatedSum);
-        buffer.update(1, updatedCount);
-      }
+    // Transform the output of the reduction
+    public Double finish(Average reduction) {
+      return ((double) reduction.getSum()) / reduction.getCount();
     }
-    // Merges two aggregation buffers and stores the updated buffer values back to `buffer1`
-    public void merge(MutableAggregationBuffer buffer1, Row buffer2) {
-      long mergedSum = buffer1.getLong(0) + buffer2.getLong(0);
-      long mergedCount = buffer1.getLong(1) + buffer2.getLong(1);
-      buffer1.update(0, mergedSum);
-      buffer1.update(1, mergedCount);
+    // Specifies the Encoder for the intermediate value type
+    public Encoder<Average> bufferEncoder() {
+      return Encoders.bean(Average.class);
     }
-    // Calculates the final result
-    public Double evaluate(Row buffer) {
-      return ((double) buffer.getLong(0)) / buffer.getLong(1);
+    // Specifies the Encoder for the final output value type
+    public Encoder<Double> outputEncoder() {
+      return Encoders.DOUBLE();
     }
   }
   // $example off:untyped_custom_aggregation$
@@ -104,7 +108,7 @@ public static void main(String[] args) {
 
     // $example on:untyped_custom_aggregation$
     // Register the function to access it
-    spark.udf().register("myAverage", new MyAverage());
+    spark.udf().register("myAverage", functions.udaf(new MyAverage(), Encoders.LONG()));
 
     Dataset<Row> df = spark.read().json("examples/src/main/resources/employees.json");
     df.createOrReplaceTempView("employees");
diff --git a/examples/src/main/python/ml/fm_classifier_example.py b/examples/src/main/python/ml/fm_classifier_example.py
new file mode 100644
index 0000000000000..6e7c2ccf021ed
--- /dev/null
+++ b/examples/src/main/python/ml/fm_classifier_example.py
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+FMClassifier Example.
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.classification import FMClassifier
+from pyspark.ml.feature import MinMaxScaler, StringIndexer
+from pyspark.ml.evaluation import MulticlassClassificationEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("FMClassifierExample") \
+        .getOrCreate()
+
+    # $example on$
+    # Load and parse the data file, converting it to a DataFrame.
+    data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    # Index labels, adding metadata to the label column.
+    # Fit on whole dataset to include all labels in index.
+    labelIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel").fit(data)
+    # Scale features.
+    featureScaler = MinMaxScaler(inputCol="features", outputCol="scaledFeatures").fit(data)
+
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a FM model.
+    fm = FMClassifier(labelCol="indexedLabel", featuresCol="scaledFeatures", stepSize=0.001)
+
+    # Create a Pipeline.
+    pipeline = Pipeline(stages=[labelIndexer, featureScaler, fm])
+
+    # Train model.
+    model = pipeline.fit(trainingData)
+
+    # Make predictions.
+    predictions = model.transform(testData)
+
+    # Select example rows to display.
+    predictions.select("prediction", "indexedLabel", "features").show(5)
+
+    # Select (prediction, true label) and compute test accuracy
+    evaluator = MulticlassClassificationEvaluator(
+        labelCol="indexedLabel", predictionCol="prediction", metricName="accuracy")
+    accuracy = evaluator.evaluate(predictions)
+    print("Test set accuracy = %g" % accuracy)
+
+    fmModel = model.stages[2]
+    print("Factors: " + str(fmModel.factors))
+    print("Linear: " + str(fmModel.linear))
+    print("Intercept: " + str(fmModel.intercept))
+    # $example off$
+
+    spark.stop()
diff --git a/examples/src/main/python/ml/fm_regressor_example.py b/examples/src/main/python/ml/fm_regressor_example.py
new file mode 100644
index 0000000000000..afd76396800b7
--- /dev/null
+++ b/examples/src/main/python/ml/fm_regressor_example.py
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+FMRegressor Example.
+"""
+from __future__ import print_function
+
+# $example on$
+from pyspark.ml import Pipeline
+from pyspark.ml.regression import FMRegressor
+from pyspark.ml.feature import MinMaxScaler
+from pyspark.ml.evaluation import RegressionEvaluator
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("FMRegressorExample") \
+        .getOrCreate()
+
+    # $example on$
+    # Load and parse the data file, converting it to a DataFrame.
+    data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    # Scale features.
+    featureScaler = MinMaxScaler(inputCol="features", outputCol="scaledFeatures").fit(data)
+
+    # Split the data into training and test sets (30% held out for testing)
+    (trainingData, testData) = data.randomSplit([0.7, 0.3])
+
+    # Train a FM model.
+    fm = FMRegressor(featuresCol="scaledFeatures", stepSize=0.001)
+
+    # Create a Pipeline.
+    pipeline = Pipeline(stages=[featureScaler, fm])
+
+    # Train model.
+    model = pipeline.fit(trainingData)
+
+    # Make predictions.
+    predictions = model.transform(testData)
+
+    # Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5)
+
+    # Select (prediction, true label) and compute test error
+    evaluator = RegressionEvaluator(
+        labelCol="label", predictionCol="prediction", metricName="rmse")
+    rmse = evaluator.evaluate(predictions)
+    print("Root Mean Squared Error (RMSE) on test data = %g" % rmse)
+
+    fmModel = model.stages[1]
+    print("Factors: " + str(fmModel.factors))
+    print("Linear: " + str(fmModel.linear))
+    print("Intercept: " + str(fmModel.intercept))
+    # $example off$
+
+    spark.stop()
diff --git a/examples/src/main/python/sql/arrow.py b/examples/src/main/python/sql/arrow.py
index de8d4f755de6f..b7d8467172fab 100644
--- a/examples/src/main/python/sql/arrow.py
+++ b/examples/src/main/python/sql/arrow.py
@@ -23,12 +23,19 @@
 
 from __future__ import print_function
 
+import sys
+
 from pyspark.sql import SparkSession
-from pyspark.sql.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
+from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
 
 require_minimum_pandas_version()
 require_minimum_pyarrow_version()
 
+if sys.version_info < (3, 6):
+    raise Exception(
+        "Running this example file requires Python 3.6+; however, "
+        "your Python version was:\n %s" % sys.version)
+
 
 def dataframe_with_arrow_example(spark):
     # $example on:dataframe_with_arrow$
@@ -50,15 +57,45 @@ def dataframe_with_arrow_example(spark):
     print("Pandas DataFrame result statistics:\n%s\n" % str(result_pdf.describe()))
 
 
-def scalar_pandas_udf_example(spark):
-    # $example on:scalar_pandas_udf$
+def ser_to_frame_pandas_udf_example(spark):
+    # $example on:ser_to_frame_pandas_udf$
+    import pandas as pd
+
+    from pyspark.sql.functions import pandas_udf
+
+    @pandas_udf("col1 string, col2 long")
+    def func(s1: pd.Series, s2: pd.Series, s3: pd.DataFrame) -> pd.DataFrame:
+        s3['col2'] = s1 + s2.str.len()
+        return s3
+
+    # Create a Spark DataFrame that has three columns including a sturct column.
+    df = spark.createDataFrame(
+        [[1, "a string", ("a nested string",)]],
+        "long_col long, string_col string, struct_col struct<col1:string>")
+
+    df.printSchema()
+    # root
+    # |-- long_column: long (nullable = true)
+    # |-- string_column: string (nullable = true)
+    # |-- struct_column: struct (nullable = true)
+    # |    |-- col1: string (nullable = true)
+
+    df.select(func("long_col", "string_col", "struct_col")).printSchema()
+    # |-- func(long_col, string_col, struct_col): struct (nullable = true)
+    # |    |-- col1: string (nullable = true)
+    # |    |-- col2: long (nullable = true)
+    # $example off:ser_to_frame_pandas_udf$$
+
+
+def ser_to_ser_pandas_udf_example(spark):
+    # $example on:ser_to_ser_pandas_udf$
     import pandas as pd
 
     from pyspark.sql.functions import col, pandas_udf
     from pyspark.sql.types import LongType
 
     # Declare the function and create the UDF
-    def multiply_func(a, b):
+    def multiply_func(a: pd.Series, b: pd.Series) -> pd.Series:
         return a * b
 
     multiply = pandas_udf(multiply_func, returnType=LongType())
@@ -83,26 +120,27 @@ def multiply_func(a, b):
     # |                  4|
     # |                  9|
     # +-------------------+
-    # $example off:scalar_pandas_udf$
+    # $example off:ser_to_ser_pandas_udf$
+
 
+def iter_ser_to_iter_ser_pandas_udf_example(spark):
+    # $example on:iter_ser_to_iter_ser_pandas_udf$
+    from typing import Iterator
 
-def scalar_iter_pandas_udf_example(spark):
-    # $example on:scalar_iter_pandas_udf$
     import pandas as pd
 
-    from pyspark.sql.functions import col, pandas_udf, struct, PandasUDFType
+    from pyspark.sql.functions import pandas_udf
 
     pdf = pd.DataFrame([1, 2, 3], columns=["x"])
     df = spark.createDataFrame(pdf)
 
-    # When the UDF is called with a single column that is not StructType,
-    # the input to the underlying function is an iterator of pd.Series.
-    @pandas_udf("long", PandasUDFType.SCALAR_ITER)
-    def plus_one(batch_iter):
-        for x in batch_iter:
+    # Declare the function and create the UDF
+    @pandas_udf("long")
+    def plus_one(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
+        for x in iterator:
             yield x + 1
 
-    df.select(plus_one(col("x"))).show()
+    df.select(plus_one("x")).show()
     # +-----------+
     # |plus_one(x)|
     # +-----------+
@@ -110,15 +148,28 @@ def plus_one(batch_iter):
     # |          3|
     # |          4|
     # +-----------+
+    # $example off:iter_ser_to_iter_ser_pandas_udf$
+
+
+def iter_sers_to_iter_ser_pandas_udf_example(spark):
+    # $example on:iter_sers_to_iter_ser_pandas_udf$
+    from typing import Iterator, Tuple
 
-    # When the UDF is called with more than one columns,
-    # the input to the underlying function is an iterator of pd.Series tuple.
-    @pandas_udf("long", PandasUDFType.SCALAR_ITER)
-    def multiply_two_cols(batch_iter):
-        for a, b in batch_iter:
+    import pandas as pd
+
+    from pyspark.sql.functions import pandas_udf
+
+    pdf = pd.DataFrame([1, 2, 3], columns=["x"])
+    df = spark.createDataFrame(pdf)
+
+    # Declare the function and create the UDF
+    @pandas_udf("long")
+    def multiply_two_cols(
+            iterator: Iterator[Tuple[pd.Series, pd.Series]]) -> Iterator[pd.Series]:
+        for a, b in iterator:
             yield a * b
 
-    df.select(multiply_two_cols(col("x"), col("x"))).show()
+    df.select(multiply_two_cols("x", "x")).show()
     # +-----------------------+
     # |multiply_two_cols(x, x)|
     # +-----------------------+
@@ -126,92 +177,32 @@ def multiply_two_cols(batch_iter):
     # |                      4|
     # |                      9|
     # +-----------------------+
+    # $example off:iter_sers_to_iter_ser_pandas_udf$
 
-    # When the UDF is called with a single column that is StructType,
-    # the input to the underlying function is an iterator of pd.DataFrame.
-    @pandas_udf("long", PandasUDFType.SCALAR_ITER)
-    def multiply_two_nested_cols(pdf_iter):
-        for pdf in pdf_iter:
-            yield pdf["a"] * pdf["b"]
-
-    df.select(
-        multiply_two_nested_cols(
-            struct(col("x").alias("a"), col("x").alias("b"))
-        ).alias("y")
-    ).show()
-    # +---+
-    # |  y|
-    # +---+
-    # |  1|
-    # |  4|
-    # |  9|
-    # +---+
-
-    # In the UDF, you can initialize some states before processing batches.
-    # Wrap your code with try/finally or use context managers to ensure
-    # the release of resources at the end.
-    y_bc = spark.sparkContext.broadcast(1)
-
-    @pandas_udf("long", PandasUDFType.SCALAR_ITER)
-    def plus_y(batch_iter):
-        y = y_bc.value  # initialize states
-        try:
-            for x in batch_iter:
-                yield x + y
-        finally:
-            pass  # release resources here, if any
-
-    df.select(plus_y(col("x"))).show()
-    # +---------+
-    # |plus_y(x)|
-    # +---------+
-    # |        2|
-    # |        3|
-    # |        4|
-    # +---------+
-    # $example off:scalar_iter_pandas_udf$
-
-
-def grouped_map_pandas_udf_example(spark):
-    # $example on:grouped_map_pandas_udf$
-    from pyspark.sql.functions import pandas_udf, PandasUDFType
-
-    df = spark.createDataFrame(
-        [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
-        ("id", "v"))
-
-    @pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)
-    def subtract_mean(pdf):
-        # pdf is a pandas.DataFrame
-        v = pdf.v
-        return pdf.assign(v=v - v.mean())
-
-    df.groupby("id").apply(subtract_mean).show()
-    # +---+----+
-    # | id|   v|
-    # +---+----+
-    # |  1|-0.5|
-    # |  1| 0.5|
-    # |  2|-3.0|
-    # |  2|-1.0|
-    # |  2| 4.0|
-    # +---+----+
-    # $example off:grouped_map_pandas_udf$
 
+def ser_to_scalar_pandas_udf_example(spark):
+    # $example on:ser_to_scalar_pandas_udf$
+    import pandas as pd
 
-def grouped_agg_pandas_udf_example(spark):
-    # $example on:grouped_agg_pandas_udf$
-    from pyspark.sql.functions import pandas_udf, PandasUDFType
+    from pyspark.sql.functions import pandas_udf
     from pyspark.sql import Window
 
     df = spark.createDataFrame(
         [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
         ("id", "v"))
 
-    @pandas_udf("double", PandasUDFType.GROUPED_AGG)
-    def mean_udf(v):
+    # Declare the function and create the UDF
+    @pandas_udf("double")
+    def mean_udf(v: pd.Series) -> float:
         return v.mean()
 
+    df.select(mean_udf(df['v'])).show()
+    # +-----------+
+    # |mean_udf(v)|
+    # +-----------+
+    # |        4.2|
+    # +-----------+
+
     df.groupby("id").agg(mean_udf(df['v'])).show()
     # +---+-----------+
     # | id|mean_udf(v)|
@@ -233,29 +224,76 @@ def mean_udf(v):
     # |  2| 5.0|   6.0|
     # |  2|10.0|   6.0|
     # +---+----+------+
-    # $example off:grouped_agg_pandas_udf$
+    # $example off:ser_to_scalar_pandas_udf$
 
 
-def map_iter_pandas_udf_example(spark):
-    # $example on:map_iter_pandas_udf$
-    import pandas as pd
+def grouped_apply_in_pandas_example(spark):
+    # $example on:grouped_apply_in_pandas$
+    df = spark.createDataFrame(
+        [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+        ("id", "v"))
+
+    def subtract_mean(pdf):
+        # pdf is a pandas.DataFrame
+        v = pdf.v
+        return pdf.assign(v=v - v.mean())
+
+    df.groupby("id").applyInPandas(subtract_mean, schema="id long, v double").show()
+    # +---+----+
+    # | id|   v|
+    # +---+----+
+    # |  1|-0.5|
+    # |  1| 0.5|
+    # |  2|-3.0|
+    # |  2|-1.0|
+    # |  2| 4.0|
+    # +---+----+
+    # $example off:grouped_apply_in_pandas$
 
-    from pyspark.sql.functions import pandas_udf, PandasUDFType
 
+def map_in_pandas_example(spark):
+    # $example on:map_in_pandas$
     df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age"))
 
-    @pandas_udf(df.schema, PandasUDFType.MAP_ITER)
-    def filter_func(batch_iter):
-        for pdf in batch_iter:
+    def filter_func(iterator):
+        for pdf in iterator:
             yield pdf[pdf.id == 1]
 
-    df.mapInPandas(filter_func).show()
+    df.mapInPandas(filter_func, schema=df.schema).show()
     # +---+---+
     # | id|age|
     # +---+---+
     # |  1| 21|
     # +---+---+
-    # $example off:map_iter_pandas_udf$
+    # $example off:map_in_pandas$
+
+
+def cogrouped_apply_in_pandas_example(spark):
+    # $example on:cogrouped_apply_in_pandas$
+    import pandas as pd
+
+    df1 = spark.createDataFrame(
+        [(20000101, 1, 1.0), (20000101, 2, 2.0), (20000102, 1, 3.0), (20000102, 2, 4.0)],
+        ("time", "id", "v1"))
+
+    df2 = spark.createDataFrame(
+        [(20000101, 1, "x"), (20000101, 2, "y")],
+        ("time", "id", "v2"))
+
+    def asof_join(l, r):
+        return pd.merge_asof(l, r, on="time", by="id")
+
+    df1.groupby("id").cogroup(df2.groupby("id")).applyInPandas(
+        asof_join, schema="time int, id int, v1 double, v2 string").show()
+    # +--------+---+---+---+
+    # |    time| id| v1| v2|
+    # +--------+---+---+---+
+    # |20000101|  1|1.0|  x|
+    # |20000102|  1|3.0|  x|
+    # |20000101|  2|2.0|  y|
+    # |20000102|  2|4.0|  y|
+    # +--------+---+---+---+
+    # $example off:cogrouped_apply_in_pandas$
 
 
 if __name__ == "__main__":
@@ -266,15 +304,21 @@ def filter_func(batch_iter):
 
     print("Running Pandas to/from conversion example")
     dataframe_with_arrow_example(spark)
-    print("Running pandas_udf scalar example")
-    scalar_pandas_udf_example(spark)
-    print("Running pandas_udf scalar iterator example")
-    scalar_iter_pandas_udf_example(spark)
-    print("Running pandas_udf grouped map example")
-    grouped_map_pandas_udf_example(spark)
-    print("Running pandas_udf grouped agg example")
-    grouped_agg_pandas_udf_example(spark)
-    print("Running pandas_udf map iterator example")
-    map_iter_pandas_udf_example(spark)
+    print("Running pandas_udf example: Series to Frame")
+    ser_to_frame_pandas_udf_example(spark)
+    print("Running pandas_udf example: Series to Series")
+    ser_to_ser_pandas_udf_example(spark)
+    print("Running pandas_udf example: Iterator of Series to Iterator of Seires")
+    iter_ser_to_iter_ser_pandas_udf_example(spark)
+    print("Running pandas_udf example: Iterator of Multiple Series to Iterator of Series")
+    iter_sers_to_iter_ser_pandas_udf_example(spark)
+    print("Running pandas_udf example: Series to Scalar")
+    ser_to_scalar_pandas_udf_example(spark)
+    print("Running pandas function example: Grouped Map")
+    grouped_apply_in_pandas_example(spark)
+    print("Running pandas function example: Map")
+    map_in_pandas_example(spark)
+    print("Running pandas function example: Co-grouped Map")
+    cogrouped_apply_in_pandas_example(spark)
 
     spark.stop()
diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py
index 0d78097ea975e..265f135e1e5f2 100644
--- a/examples/src/main/python/sql/datasource.py
+++ b/examples/src/main/python/sql/datasource.py
@@ -28,6 +28,48 @@
 # $example off:schema_merging$
 
 
+def generic_file_source_options_example(spark):
+    # $example on:ignore_corrupt_files$
+    # enable ignore corrupt files
+    spark.sql("set spark.sql.files.ignoreCorruptFiles=true")
+    # dir1/file3.json is corrupt from parquet's view
+    test_corrupt_df = spark.read.parquet("examples/src/main/resources/dir1/",
+                                         "examples/src/main/resources/dir1/dir2/")
+    test_corrupt_df.show()
+    # +-------------+
+    # |         file|
+    # +-------------+
+    # |file1.parquet|
+    # |file2.parquet|
+    # +-------------+
+    # $example off:ignore_corrupt_files$
+
+    # $example on:recursive_file_lookup$
+    recursive_loaded_df = spark.read.format("parquet")\
+        .option("recursiveFileLookup", "true")\
+        .load("examples/src/main/resources/dir1")
+    recursive_loaded_df.show()
+    # +-------------+
+    # |         file|
+    # +-------------+
+    # |file1.parquet|
+    # |file2.parquet|
+    # +-------------+
+    # $example off:recursive_file_lookup$
+    spark.sql("set spark.sql.files.ignoreCorruptFiles=false")
+
+    # $example on:load_with_path_glob_filter$
+    df = spark.read.load("examples/src/main/resources/dir1",
+                         format="parquet", pathGlobFilter="*.parquet")
+    df.show()
+    # +-------------+
+    # |         file|
+    # +-------------+
+    # |file1.parquet|
+    # +-------------+
+    # $example off:load_with_path_glob_filter$
+
+
 def basic_datasource_example(spark):
     # $example on:generic_load_save_functions$
     df = spark.read.load("examples/src/main/resources/users.parquet")
@@ -57,11 +99,6 @@ def basic_datasource_example(spark):
                          format="csv", sep=":", inferSchema="true", header="true")
     # $example off:manual_load_options_csv$
 
-    # $example on:load_with_path_glob_filter$
-    df = spark.read.load("examples/src/main/resources/partitioned_users.orc",
-                         format="orc", pathGlobFilter="*.orc")
-    # $example off:load_with_path_glob_filter$
-
     # $example on:manual_save_options_orc$
     df = spark.read.orc("examples/src/main/resources/users.orc")
     (df.write.format("orc")
@@ -233,6 +270,7 @@ def jdbc_dataset_example(spark):
         .getOrCreate()
 
     basic_datasource_example(spark)
+    generic_file_source_options_example(spark)
     parquet_example(spark)
     parquet_schema_merging_example(spark)
     json_dataset_example(spark)
diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index fa083d5542fae..8685cfb5c05f2 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -99,6 +99,26 @@ createOrReplaceTempView(df, "table")
 df <- sql("SELECT * FROM table")
 # $example off:run_sql$
 
+# Ignore corrupt files
+# $example on:ignore_corrupt_files$
+# enable ignore corrupt files
+sql("set spark.sql.files.ignoreCorruptFiles=true")
+# dir1/file3.json is corrupt from parquet's view
+testCorruptDF <- read.parquet(c("examples/src/main/resources/dir1/", "examples/src/main/resources/dir1/dir2/"))
+head(testCorruptDF)
+#            file
+# 1 file1.parquet
+# 2 file2.parquet
+# $example off:ignore_corrupt_files$
+
+# $example on:recursive_file_lookup$
+recursiveLoadedDF <- read.df("examples/src/main/resources/dir1", "parquet", recursiveFileLookup = "true")
+head(recursiveLoadedDF)
+#            file
+# 1 file1.parquet
+# 2 file2.parquet
+# $example off:recursive_file_lookup$
+sql("set spark.sql.files.ignoreCorruptFiles=false")
 
 # $example on:generic_load_save_functions$
 df <- read.df("examples/src/main/resources/users.parquet")
@@ -119,7 +139,9 @@ namesAndAges <- select(df, "name", "age")
 # $example off:manual_load_options_csv$
 
 # $example on:load_with_path_glob_filter$
-df <- read.df("examples/src/main/resources/partitioned_users.orc", "orc", pathGlobFilter = "*.orc")
+df <- read.df("examples/src/main/resources/dir1", "parquet", pathGlobFilter = "*.parquet")
+#            file
+# 1 file1.parquet
 # $example off:load_with_path_glob_filter$
 
 # $example on:manual_save_options_orc$
diff --git a/examples/src/main/resources/dir1/dir2/file2.parquet b/examples/src/main/resources/dir1/dir2/file2.parquet
new file mode 100644
index 0000000000000..d1895bf29b75c
Binary files /dev/null and b/examples/src/main/resources/dir1/dir2/file2.parquet differ
diff --git a/examples/src/main/resources/dir1/file1.parquet b/examples/src/main/resources/dir1/file1.parquet
new file mode 100644
index 0000000000000..ad360b16fd898
Binary files /dev/null and b/examples/src/main/resources/dir1/file1.parquet differ
diff --git a/examples/src/main/resources/dir1/file3.json b/examples/src/main/resources/dir1/file3.json
new file mode 100644
index 0000000000000..0490f92d7f317
--- /dev/null
+++ b/examples/src/main/resources/dir1/file3.json
@@ -0,0 +1 @@
+{"file":"corrupt.json"}
diff --git a/examples/src/main/resources/partitioned_users.orc/do_not_read_this.txt b/examples/src/main/resources/partitioned_users.orc/do_not_read_this.txt
deleted file mode 100644
index 9c19f2a0449eb..0000000000000
--- a/examples/src/main/resources/partitioned_users.orc/do_not_read_this.txt
+++ /dev/null
@@ -1 +0,0 @@
-do not read this
diff --git a/examples/src/main/resources/partitioned_users.orc/favorite_color=__HIVE_DEFAULT_PARTITION__/users.orc b/examples/src/main/resources/partitioned_users.orc/favorite_color=__HIVE_DEFAULT_PARTITION__/users.orc
deleted file mode 100644
index 890395a9281ab..0000000000000
Binary files a/examples/src/main/resources/partitioned_users.orc/favorite_color=__HIVE_DEFAULT_PARTITION__/users.orc and /dev/null differ
diff --git a/examples/src/main/resources/partitioned_users.orc/favorite_color=red/users.orc b/examples/src/main/resources/partitioned_users.orc/favorite_color=red/users.orc
deleted file mode 100644
index 150615a6f3b24..0000000000000
Binary files a/examples/src/main/resources/partitioned_users.orc/favorite_color=red/users.orc and /dev/null differ
diff --git a/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala b/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala
index 5d9a9a73f12ec..a0e8d63133b74 100644
--- a/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.SparkSession
  * This example shows how to register accumulators against the accumulator source.
  * A simple RDD is created, and during the map, the accumulators are incremented.
  *
- * The only argument, numElem, sets the number elements in the collection to parallize.
+ * The only argument, numElem, sets the number elements in the collection to parallelize.
  *
  * The result is output to stdout in the driver with the values of the accumulators.
  * For the long accumulator, it should equal numElem the double accumulator should be
@@ -36,7 +36,7 @@ import org.apache.spark.sql.SparkSession
  * accumulator source) are reported to stdout as well.
  */
 object AccumulatorMetricsTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val spark = SparkSession
       .builder()
diff --git a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
index 3311de12dbd97..d7e79966037cc 100644
--- a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: BroadcastTest [partitions] [numElem] [blockSize]
  */
 object BroadcastTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val blockSize = if (args.length > 2) args(2) else "4096"
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
index d12ef642bd2cd..ed56108f4b624 100644
--- a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.Utils
  * test driver submission in the standalone scheduler.
  */
 object DriverSubmissionTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       println("Usage: DriverSubmissionTest <seconds-to-sleep>")
       System.exit(0)
diff --git a/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
index 45c4953a84be2..6e95318a8cbc0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
@@ -20,7 +20,7 @@ package org.apache.spark.examples
 import org.apache.spark.sql.SparkSession
 
 object ExceptionHandlingTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("ExceptionHandlingTest")
diff --git a/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
index 2f2bbb1275438..c07c1afbcb174 100644
--- a/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: GroupByTest [numMappers] [numKVPairs] [KeySize] [numReducers]
  */
 object GroupByTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("GroupBy Test")
diff --git a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
index b327e13533b81..48698678571e3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
 object HdfsTest {
 
   /** Usage: HdfsTest [file] */
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println("Usage: HdfsTest <file>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
index 3f9cea35d6503..87c2f6853807a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -93,7 +93,7 @@ object LocalALS {
     new CholeskyDecomposition(XtX).getSolver.solve(Xty)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of ALS and is given as an example!
         |Please use org.apache.spark.ml.recommendation.ALS
@@ -101,7 +101,7 @@ object LocalALS {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     args match {
       case Array(m, u, f, iters) =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
index 5512e33e41ac3..5478c585a959e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -39,7 +39,7 @@ object LocalFileLR {
     DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
         |Please use org.apache.spark.ml.classification.LogisticRegression
@@ -47,7 +47,7 @@ object LocalFileLR {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     showWarning()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
index f5162a59522f0..4a73466841f69 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -62,7 +62,7 @@ object LocalKMeans {
     bestIndex
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
         |Please use org.apache.spark.ml.clustering.KMeans
@@ -70,7 +70,7 @@ object LocalKMeans {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     showWarning()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
index bde8ccd305960..4ca0ecdcfe6e0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -46,7 +46,7 @@ object LocalLR {
     Array.tabulate(N)(generatePoint)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
         |Please use org.apache.spark.ml.classification.LogisticRegression
@@ -54,7 +54,7 @@ object LocalLR {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     showWarning()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
index a93c15c85cfc1..7660ffd02ed9b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
@@ -21,7 +21,7 @@ package org.apache.spark.examples
 import scala.math.random
 
 object LocalPi {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     var count = 0
     for (i <- 1 to 100000) {
       val x = random * 2 - 1
diff --git a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
index 03187aee044e4..e2120eaee6e5a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
@@ -41,7 +41,7 @@ object LogQuery {
       | 0 73.23.2.15 images.com 1358492557 - Whatup""".stripMargin.split('\n').mkString
   )
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val sparkConf = new SparkConf().setAppName("Log Query")
     val sc = new SparkContext(sparkConf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
index e6f33b7adf5d1..4bea5cae775cb 100644
--- a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: MultiBroadcastTest [partitions] [numElem]
  */
 object MultiBroadcastTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val spark = SparkSession
       .builder
diff --git a/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
index 2332a661f26a0..2bd7c3e954396 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: SimpleSkewedGroupByTest [numMappers] [numKVPairs] [valSize] [numReducers] [ratio]
  */
 object SimpleSkewedGroupByTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("SimpleSkewedGroupByTest")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
index 4d3c34041bc17..2e7abd62dcdc6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: GroupByTest [numMappers] [numKVPairs] [KeySize] [numReducers]
  */
 object SkewedGroupByTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("GroupBy Test")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index d3e7b7a967de7..651f0224d4402 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -78,7 +78,7 @@ object SparkALS {
     new CholeskyDecomposition(XtX).getSolver.solve(Xty)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of ALS and is given as an example!
         |Please use org.apache.spark.ml.recommendation.ALS
@@ -86,7 +86,7 @@ object SparkALS {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     var slices = 0
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
index 23eaa879114a9..8c09ce614d931 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -49,7 +49,7 @@ object SparkHdfsLR {
     DataPoint(new DenseVector(x), y)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
         |Please use org.apache.spark.ml.classification.LogisticRegression
@@ -57,7 +57,7 @@ object SparkHdfsLR {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     if (args.length < 2) {
       System.err.println("Usage: SparkHdfsLR <file> <iters>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index b005cb6971c16..ec9b44ce6e3b7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -49,7 +49,7 @@ object SparkKMeans {
     bestIndex
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
         |Please use org.apache.spark.ml.clustering.KMeans
@@ -57,7 +57,7 @@ object SparkKMeans {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     if (args.length < 3) {
       System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index 4b1497345af82..deb6668f7ecfc 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -51,7 +51,7 @@ object SparkLR {
     Array.tabulate(N)(generatePoint)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
         |Please use org.apache.spark.ml.classification.LogisticRegression
@@ -59,7 +59,7 @@ object SparkLR {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     showWarning()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
index 9299bad5d3290..3bd475c440d72 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.SparkSession
  */
 object SparkPageRank {
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of PageRank and is given as an example!
         |Please use the PageRank implementation found in org.apache.spark.graphx.lib.PageRank
@@ -47,7 +47,7 @@ object SparkPageRank {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println("Usage: SparkPageRank <file> <iter>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
index 828d98b5001d7..a8eec6a99cf4b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.SparkSession
 
 /** Computes an approximation to pi */
 object SparkPi {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("Spark Pi")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala b/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala
index 64076f2deb706..99a12b9442365 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.SparkSession
 
 /** Usage: SparkRemoteFileTest [file] */
 object SparkRemoteFileTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println("Usage: SparkRemoteFileTest <file>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
index f5d42141f5dd2..7a6fa9a797ff9 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
@@ -41,7 +41,7 @@ object SparkTC {
     edges.toSeq
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("SparkTC")
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
index da3ffca1a6f2a..af18c0afbb223 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
@@ -23,7 +23,7 @@ package org.apache.spark.examples.graphx
  * http://snap.stanford.edu/data/soc-LiveJournal1.html.
  */
 object LiveJournalPageRank {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println(
         "Usage: LiveJournalPageRank <edge_list_file>\n" +
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
index 57b2edf992208..8bc9c0a86eab6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
@@ -47,7 +47,7 @@ object SynthBenchmark {
    *   -degFile the local file to save the degree information (Default: Empty)
    *   -seed seed to use for RNGs (Default: -1, picks seed randomly)
    */
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val options = args.map {
       arg =>
         arg.dropWhile(_ == '-').split('=') match {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
index 8091838a2301e..354e65c2bae38 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
@@ -42,7 +42,7 @@ object ALSExample {
   }
   // $example off$
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("ALSExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala
index 5638e66b8792a..1a67a6e755ab4 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.sql.SparkSession
 
 object ChiSqSelectorExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("ChiSqSelectorExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala
index 91d861dd4380a..947ca5f5fb5e1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel}
 import org.apache.spark.sql.SparkSession
 
 object CountVectorizerExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("CountVectorizerExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
index ee4469faab3a0..4377efd9e95fa 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
@@ -41,7 +41,7 @@ object DataFrameExample {
   case class Params(input: String = "data/mllib/sample_libsvm_data.txt")
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("DataFrameExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
index 19f2d7751bc54..ef38163d7eb0d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
@@ -65,7 +65,7 @@ object DecisionTreeExample {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("DecisionTreeExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 2dc11b07d88ef..86d00cac9485f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.{Dataset, Row, SparkSession}
  */
 object DeveloperApiExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("DeveloperApiExample")
@@ -162,7 +162,7 @@ private class MyLogisticRegressionModel(
    *          This raw prediction may be any real number, where a larger value indicates greater
    *          confidence for that label.
    */
-  override protected def predictRaw(features: Vector): Vector = {
+  override def predictRaw(features: Vector): Vector = {
     val margin = BLAS.dot(features, coefficients)
     // There are 2 classes (binary classification), so we return a length-2 vector,
     // where index i corresponds to class i (i = 0, 1).
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/FMClassifierExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/FMClassifierExample.scala
new file mode 100644
index 0000000000000..612a76fd125c3
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/FMClassifierExample.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.classification.{FMClassificationModel, FMClassifier}
+import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
+import org.apache.spark.ml.feature.{IndexToString, MinMaxScaler, StringIndexer}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object FMClassifierExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("FMClassifierExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Index labels, adding metadata to the label column.
+    // Fit on whole dataset to include all labels in index.
+    val labelIndexer = new StringIndexer()
+      .setInputCol("label")
+      .setOutputCol("indexedLabel")
+      .fit(data)
+    // Scale features.
+    val featureScaler = new MinMaxScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures")
+      .fit(data)
+
+    // Split the data into training and test sets (30% held out for testing).
+    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
+
+    // Train a FM model.
+    val fm = new FMClassifier()
+      .setLabelCol("indexedLabel")
+      .setFeaturesCol("scaledFeatures")
+      .setStepSize(0.001)
+
+    // Convert indexed labels back to original labels.
+    val labelConverter = new IndexToString()
+      .setInputCol("prediction")
+      .setOutputCol("predictedLabel")
+      .setLabels(labelIndexer.labelsArray(0))
+
+    // Create a Pipeline.
+    val pipeline = new Pipeline()
+      .setStages(Array(labelIndexer, featureScaler, fm, labelConverter))
+
+    // Train model.
+    val model = pipeline.fit(trainingData)
+
+    // Make predictions.
+    val predictions = model.transform(testData)
+
+    // Select example rows to display.
+    predictions.select("predictedLabel", "label", "features").show(5)
+
+    // Select (prediction, true label) and compute test accuracy.
+    val evaluator = new MulticlassClassificationEvaluator()
+      .setLabelCol("indexedLabel")
+      .setPredictionCol("prediction")
+      .setMetricName("accuracy")
+    val accuracy = evaluator.evaluate(predictions)
+    println(s"Test set accuracy = $accuracy")
+
+    val fmModel = model.stages(2).asInstanceOf[FMClassificationModel]
+    println(s"Factors: ${fmModel.factors} Linear: ${fmModel.linear} " +
+      s"Intercept: ${fmModel.intercept}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/FMRegressorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/FMRegressorExample.scala
new file mode 100644
index 0000000000000..6bb06eab33d9c
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/FMRegressorExample.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.evaluation.RegressionEvaluator
+import org.apache.spark.ml.feature.MinMaxScaler
+import org.apache.spark.ml.regression.{FMRegressionModel, FMRegressor}
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object FMRegressorExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("FMRegressorExample")
+      .getOrCreate()
+
+    // $example on$
+    // Load and parse the data file, converting it to a DataFrame.
+    val data = spark.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
+
+    // Scale features.
+    val featureScaler = new MinMaxScaler()
+      .setInputCol("features")
+      .setOutputCol("scaledFeatures")
+      .fit(data)
+
+    // Split the data into training and test sets (30% held out for testing).
+    val Array(trainingData, testData) = data.randomSplit(Array(0.7, 0.3))
+
+    // Train a FM model.
+    val fm = new FMRegressor()
+      .setLabelCol("label")
+      .setFeaturesCol("scaledFeatures")
+      .setStepSize(0.001)
+
+    // Create a Pipeline.
+    val pipeline = new Pipeline()
+      .setStages(Array(featureScaler, fm))
+
+    // Train model.
+    val model = pipeline.fit(trainingData)
+
+    // Make predictions.
+    val predictions = model.transform(testData)
+
+    // Select example rows to display.
+    predictions.select("prediction", "label", "features").show(5)
+
+    // Select (prediction, true label) and compute test error.
+    val evaluator = new RegressionEvaluator()
+      .setLabelCol("label")
+      .setPredictionCol("prediction")
+      .setMetricName("rmse")
+    val rmse = evaluator.evaluate(predictions)
+    println(s"Root Mean Squared Error (RMSE) on test data = $rmse")
+
+    val fmModel = model.stages(1).asInstanceOf[FMRegressionModel]
+    println(s"Factors: ${fmModel.factors} Linear: ${fmModel.linear} " +
+      s"Intercept: ${fmModel.intercept}")
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
index 8f3ce4b315bd3..ca4235d53e636 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
@@ -63,7 +63,7 @@ object GBTExample {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("GBTExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
index 2940682c32801..b3642c0b45db6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.ml.feature.{IndexToString, StringIndexer}
 import org.apache.spark.sql.SparkSession
 
 object IndexToStringExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("IndexToStringExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
index 6903a1c298ced..370c6fd7c17fc 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
@@ -50,7 +50,7 @@ object LinearRegressionExample {
       tol: Double = 1E-6,
       fracTest: Double = 0.2) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("LinearRegressionExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
index bd6cc8cff2348..b64ab4792add4 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
@@ -55,7 +55,7 @@ object LogisticRegressionExample {
       tol: Double = 1E-6,
       fracTest: Double = 0.2) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("LogisticRegressionExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
index 4ad6c7c3ef202..86e70e8ab0189 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.SparkSession
  */
 
 object OneVsRestExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName(s"OneVsRestExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
index 0fe16fb6dfa9f..55823fe1832e5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
@@ -23,7 +23,7 @@ import org.apache.spark.ml.feature.QuantileDiscretizer
 import org.apache.spark.sql.SparkSession
 
 object QuantileDiscretizerExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("QuantileDiscretizerExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
index 3c127a46e1f10..6ba14bcd1822f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
@@ -64,7 +64,7 @@ object RandomForestExample {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("RandomForestExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala
index bb4587b82cb37..bf6a4846b6e34 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.feature.SQLTransformer
 import org.apache.spark.sql.SparkSession
 
 object SQLTransformerExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("SQLTransformerExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
index ec2df2ef876ba..6121c81cd1f5d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.SparkSession
 
 object TfIdfExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("TfIdfExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
index b4179ecc1e56d..05f2ee3288624 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
@@ -82,7 +82,7 @@ object UnaryTransformerExample {
   object MyTransformer extends DefaultParamsReadable[MyTransformer]
   // $example off$
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder()
       .appName("UnaryTransformerExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
index 4bcc6ac6a01f5..8ff0e8c6a51c8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.SparkSession
 
 object Word2VecExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("Word2Vec example")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
index a07535bb5a38d..1a7839414b38e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset
 
 object AssociationRulesExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val conf = new SparkConf().setAppName("AssociationRulesExample")
     val sc = new SparkContext(conf)
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
index e3cc1d9c83361..6fc3501fc57b5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
@@ -58,7 +58,7 @@ object BinaryClassification {
       regType: RegType = L2,
       regParam: Double = 0.01) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("BinaryClassification") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala
index 53d0b8fc208ef..b7f0ba00f913e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala
@@ -34,7 +34,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
  */
 object BisectingKMeansExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val sparkConf = new SparkConf().setAppName("mllib.BisectingKMeansExample")
     val sc = new SparkContext(sparkConf)
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
index 0b44c339ef139..cf9f7adbf6999 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
@@ -37,7 +37,7 @@ object Correlations {
   case class Params(input: String = "data/mllib/sample_linear_regression_data.txt")
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val defaultParams = Params()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
index 681465d2176d4..9082f0b5a8b85 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
@@ -45,7 +45,7 @@ object CosineSimilarity {
   case class Params(inputFile: String = null, threshold: Double = 0.1)
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("CosineSimilarity") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
index b5d1b02f92524..1029ca04c348f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
@@ -67,7 +67,7 @@ object DecisionTreeRunner {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("DecisionTreeRunner") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
index b228827e5886f..0259df2799174 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
@@ -47,7 +47,7 @@ object DenseKMeans {
       numIterations: Int = 10,
       initializationMode: InitializationMode = Parallel) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("DenseKMeans") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala
index 1e4e3543194e2..1855058bb4f3c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/ElementwiseProductExample.scala
@@ -34,7 +34,7 @@ object ElementwiseProductExample {
 
     // $example on$
     // Create some vector data; also works for sparse vectors
-    val data = sc.parallelize(Array(Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0)))
+    val data = sc.parallelize(Seq(Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0)))
 
     val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
     val transformer = new ElementwiseProduct(transformingVector)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
index f724ee1030f04..a25ce826ee842 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
@@ -35,7 +35,7 @@ object FPGrowthExample {
     minSupport: Double = 0.3,
     numPartition: Int = -1) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("FPGrowthExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala
index b1b3a79d87ae1..103d212a80e78 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.mllib.linalg.Vectors
 
 object GaussianMixtureExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("GaussianMixtureExample")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
index 3f264933cd3cc..12e0c8df274b2 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
@@ -50,7 +50,7 @@ object GradientBoostedTreesRunner {
       numIterations: Int = 10,
       fracTest: Double = 0.2) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("GradientBoostedTrees") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
index 9b3c3266ee30a..8435209377553 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
@@ -29,7 +29,7 @@ import org.apache.spark.rdd.RDD
 
 object HypothesisTestingExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("HypothesisTestingExample")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
index b0a6f1671a898..17ebd4159b8d7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.mllib.linalg.Vectors
 
 object KMeansExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("KMeansExample")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
index cd77ecf990b3b..605ca68e627ec 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -53,7 +53,7 @@ object LDAExample {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("LDAExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
index d25962c5500ed..55a45b302b5a3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.mllib.linalg.Vectors
 
 object LatentDirichletAllocationExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("LatentDirichletAllocationExample")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
deleted file mode 100644
index 03222b13ad27d..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.log4j.{Level, Logger}
-import scopt.OptionParser
-
-import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.mllib.optimization.{L1Updater, SimpleUpdater, SquaredL2Updater}
-import org.apache.spark.mllib.regression.LinearRegressionWithSGD
-import org.apache.spark.mllib.util.MLUtils
-
-/**
- * An example app for linear regression. Run with
- * {{{
- * bin/run-example org.apache.spark.examples.mllib.LinearRegression
- * }}}
- * A synthetic dataset can be found at `data/mllib/sample_linear_regression_data.txt`.
- * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
- */
-@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
-object LinearRegression {
-
-  object RegType extends Enumeration {
-    type RegType = Value
-    val NONE, L1, L2 = Value
-  }
-
-  import RegType._
-
-  case class Params(
-      input: String = null,
-      numIterations: Int = 100,
-      stepSize: Double = 1.0,
-      regType: RegType = L2,
-      regParam: Double = 0.01) extends AbstractParams[Params]
-
-  def main(args: Array[String]) {
-    val defaultParams = Params()
-
-    val parser = new OptionParser[Params]("LinearRegression") {
-      head("LinearRegression: an example app for linear regression.")
-      opt[Int]("numIterations")
-        .text("number of iterations")
-        .action((x, c) => c.copy(numIterations = x))
-      opt[Double]("stepSize")
-        .text(s"initial step size, default: ${defaultParams.stepSize}")
-        .action((x, c) => c.copy(stepSize = x))
-      opt[String]("regType")
-        .text(s"regularization type (${RegType.values.mkString(",")}), " +
-        s"default: ${defaultParams.regType}")
-        .action((x, c) => c.copy(regType = RegType.withName(x)))
-      opt[Double]("regParam")
-        .text(s"regularization parameter, default: ${defaultParams.regParam}")
-      arg[String]("<input>")
-        .required()
-        .text("input paths to labeled examples in LIBSVM format")
-        .action((x, c) => c.copy(input = x))
-      note(
-        """
-          |For example, the following command runs this app on a synthetic dataset:
-          |
-          | bin/spark-submit --class org.apache.spark.examples.mllib.LinearRegression \
-          |  examples/target/scala-*/spark-examples-*.jar \
-          |  data/mllib/sample_linear_regression_data.txt
-        """.stripMargin)
-    }
-
-    parser.parse(args, defaultParams) match {
-      case Some(params) => run(params)
-      case _ => sys.exit(1)
-    }
-  }
-
-  def run(params: Params): Unit = {
-    val conf = new SparkConf().setAppName(s"LinearRegression with $params")
-    val sc = new SparkContext(conf)
-
-    Logger.getRootLogger.setLevel(Level.WARN)
-
-    val examples = MLUtils.loadLibSVMFile(sc, params.input).cache()
-
-    val splits = examples.randomSplit(Array(0.8, 0.2))
-    val training = splits(0).cache()
-    val test = splits(1).cache()
-
-    val numTraining = training.count()
-    val numTest = test.count()
-    println(s"Training: $numTraining, test: $numTest.")
-
-    examples.unpersist()
-
-    val updater = params.regType match {
-      case NONE => new SimpleUpdater()
-      case L1 => new L1Updater()
-      case L2 => new SquaredL2Updater()
-    }
-
-    val algorithm = new LinearRegressionWithSGD()
-    algorithm.optimizer
-      .setNumIterations(params.numIterations)
-      .setStepSize(params.stepSize)
-      .setUpdater(updater)
-      .setRegParam(params.regParam)
-
-    val model = algorithm.run(training)
-
-    val prediction = model.predict(test.map(_.features))
-    val predictionAndLabel = prediction.zip(test.map(_.label))
-
-    val loss = predictionAndLabel.map { case (p, l) =>
-      val err = p - l
-      err * err
-    }.reduce(_ + _)
-    val rmse = math.sqrt(loss / numTest)
-
-    println(s"Test RMSE = $rmse.")
-
-    sc.stop()
-  }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
deleted file mode 100644
index 449b725d1d173..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.{SparkConf, SparkContext}
-// $example on$
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.regression.LinearRegressionModel
-import org.apache.spark.mllib.regression.LinearRegressionWithSGD
-// $example off$
-
-@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
-object LinearRegressionWithSGDExample {
-
-  def main(args: Array[String]): Unit = {
-    val conf = new SparkConf().setAppName("LinearRegressionWithSGDExample")
-    val sc = new SparkContext(conf)
-
-    // $example on$
-    // Load and parse the data
-    val data = sc.textFile("data/mllib/ridge-data/lpsa.data")
-    val parsedData = data.map { line =>
-      val parts = line.split(',')
-      LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
-    }.cache()
-
-    // Building the model
-    val numIterations = 100
-    val stepSize = 0.00000001
-    val model = LinearRegressionWithSGD.train(parsedData, numIterations, stepSize)
-
-    // Evaluate model on training examples and compute training error
-    val valuesAndPreds = parsedData.map { point =>
-      val prediction = model.predict(point.features)
-      (point.label, prediction)
-    }
-    val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2) }.mean()
-    println(s"training Mean Squared Error $MSE")
-
-    // Save and load model
-    model.save(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
-    val sameModel = LinearRegressionModel.load(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
-    // $example off$
-
-    sc.stop()
-  }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
index fd810155d6a88..92c85c9271a5a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -48,7 +48,7 @@ object MovieLensALS {
       numProductBlocks: Int = -1,
       implicitPrefs: Boolean = false) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("MovieLensALS") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
index f9e47e485e72f..b5c52f9a31224 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
@@ -38,7 +38,7 @@ object MultivariateSummarizer {
   case class Params(input: String = "data/mllib/sample_linear_regression_data.txt")
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val defaultParams = Params()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala
deleted file mode 100644
index eff2393cc3abe..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.SparkConf
-import org.apache.spark.SparkContext
-// $example on$
-import org.apache.spark.mllib.feature.PCA
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD}
-// $example off$
-
-@deprecated("Deprecated since LinearRegressionWithSGD is deprecated.  Use ml.feature.PCA", "2.0.0")
-object PCAExample {
-
-  def main(args: Array[String]): Unit = {
-
-    val conf = new SparkConf().setAppName("PCAExample")
-    val sc = new SparkContext(conf)
-
-    // $example on$
-    val data = sc.textFile("data/mllib/ridge-data/lpsa.data").map { line =>
-      val parts = line.split(',')
-      LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
-    }.cache()
-
-    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
-    val training = splits(0).cache()
-    val test = splits(1)
-
-    val pca = new PCA(training.first().features.size / 2).fit(data.map(_.features))
-    val training_pca = training.map(p => p.copy(features = pca.transform(p.features)))
-    val test_pca = test.map(p => p.copy(features = pca.transform(p.features)))
-
-    val numIterations = 100
-    val model = LinearRegressionWithSGD.train(training, numIterations)
-    val model_pca = LinearRegressionWithSGD.train(training_pca, numIterations)
-
-    val valuesAndPreds = test.map { point =>
-      val score = model.predict(point.features)
-      (score, point.label)
-    }
-
-    val valuesAndPreds_pca = test_pca.map { point =>
-      val score = model_pca.predict(point.features)
-      (score, point.label)
-    }
-
-    val MSE = valuesAndPreds.map { case (v, p) => math.pow((v - p), 2) }.mean()
-    val MSE_pca = valuesAndPreds_pca.map { case (v, p) => math.pow((v - p), 2) }.mean()
-
-    println(s"Mean Squared Error = $MSE")
-    println(s"PCA Mean Squared Error = $MSE_pca")
-    // $example off$
-
-    sc.stop()
-  }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
index 65603252c4384..eaf1dacd0160a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
@@ -62,7 +62,7 @@ object PowerIterationClusteringExample {
       maxIterations: Int = 15
     ) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("PowerIterationClusteringExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
index 8b789277774af..1b5d919a047e8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.mllib.fpm.PrefixSpan
 
 object PrefixSpanExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val conf = new SparkConf().setAppName("PrefixSpanExample")
     val sc = new SparkContext(conf)
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
index 7ccbb5a0640cd..aee12a1b4751f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
@@ -31,7 +31,7 @@ import org.apache.spark.rdd.RDD
  */
 object RandomRDDGeneration {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName(s"RandomRDDGeneration")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
index ea13ec05e2fad..2845028dd0814 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.mllib.recommendation.{ALS, Rating}
 import org.apache.spark.sql.SparkSession
 
 object RankingMetricsExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("RankingMetricsExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
deleted file mode 100644
index 76cfb804e18f3..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// scalastyle:off println
-
-package org.apache.spark.examples.mllib
-
-// $example on$
-import org.apache.spark.mllib.evaluation.RegressionMetrics
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD}
-// $example off$
-import org.apache.spark.sql.SparkSession
-
-@deprecated("Use ml.regression.LinearRegression and the resulting model summary for metrics",
-  "2.0.0")
-object RegressionMetricsExample {
-  def main(args: Array[String]): Unit = {
-    val spark = SparkSession
-      .builder
-      .appName("RegressionMetricsExample")
-      .getOrCreate()
-    // $example on$
-    // Load the data
-    val data = spark
-      .read.format("libsvm").load("data/mllib/sample_linear_regression_data.txt")
-      .rdd.map(row => LabeledPoint(row.getDouble(0), row.get(1).asInstanceOf[Vector]))
-      .cache()
-
-    // Build the model
-    val numIterations = 100
-    val model = LinearRegressionWithSGD.train(data, numIterations)
-
-    // Get predictions
-    val valuesAndPreds = data.map{ point =>
-      val prediction = model.predict(point.features)
-      (prediction, point.label)
-    }
-
-    // Instantiate metrics object
-    val metrics = new RegressionMetrics(valuesAndPreds)
-
-    // Squared error
-    println(s"MSE = ${metrics.meanSquaredError}")
-    println(s"RMSE = ${metrics.rootMeanSquaredError}")
-
-    // R-squared
-    println(s"R-squared = ${metrics.r2}")
-
-    // Mean absolute error
-    println(s"MAE = ${metrics.meanAbsoluteError}")
-
-    // Explained variance
-    println(s"Explained variance = ${metrics.explainedVariance}")
-    // $example off$
-
-    spark.stop()
-  }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
index ba3deae5d688f..fdde47d60c544 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
@@ -35,7 +35,7 @@ object SampledRDDs {
   case class Params(input: String = "data/mllib/sample_binary_classification_data.txt")
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("SampledRDDs") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
index 694c3bb18b045..ba16e8f5ff347 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
@@ -26,7 +26,7 @@ import org.apache.spark.rdd.RDD
 
 object SimpleFPGrowth {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("SimpleFPGrowth")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
index b76add2f9bc99..b501f4db2efbb 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
@@ -40,7 +40,7 @@ object SparseNaiveBayes {
       numFeatures: Int = -1,
       lambda: Double = 1.0) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("SparseNaiveBayes") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
index 7888af79f87f4..5186f599d9628 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
@@ -52,7 +52,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  */
 object StreamingKMeansExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 5) {
       System.err.println(
         "Usage: StreamingKMeansExample " +
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
index a8b144a197229..4c72f444ff9ec 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
@@ -46,7 +46,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  */
 object StreamingLogisticRegression {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     if (args.length != 4) {
       System.err.println(
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala
index ae4dee24c6474..f60b10a02274b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala
@@ -44,7 +44,7 @@ import org.apache.spark.util.Utils
  */
 object StreamingTestExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 3) {
       // scalastyle:off println
       System.err.println(
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
index 071d341b81614..6b839f3f4ac1e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
@@ -35,7 +35,7 @@ import org.apache.spark.mllib.linalg.distributed.RowMatrix
  * represents a 3-by-2 matrix, whose first row is (0.5, 1.0).
  */
 object TallSkinnyPCA {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 1) {
       System.err.println("Usage: TallSkinnyPCA <input>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
index 8ae6de16d80e7..8874c2eda3d2e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
@@ -35,7 +35,7 @@ import org.apache.spark.mllib.linalg.distributed.RowMatrix
  * represents a 3-by-2 matrix, whose first row is (0.5, 1.0).
  */
 object TallSkinnySVD {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 1) {
       System.err.println("Usage: TallSkinnySVD <input>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
index deaa9f252b9b0..4fd482d5b8bf7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.SparkSession
 case class Record(key: Int, value: String)
 
 object RDDRelation {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // $example on:init_session$
     val spark = SparkSession
       .builder
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
index c7b6a50f0ae7c..2c7abfcd335d1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
@@ -24,7 +24,7 @@ object SQLDataSourceExample {
 
   case class Person(name: String, age: Long)
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder()
       .appName("Spark SQL data sources example")
@@ -32,6 +32,7 @@ object SQLDataSourceExample {
       .getOrCreate()
 
     runBasicDataSourceExample(spark)
+    runGenericFileSourceOptionsExample(spark)
     runBasicParquetExample(spark)
     runParquetSchemaMergingExample(spark)
     runJsonDatasetExample(spark)
@@ -40,6 +41,48 @@ object SQLDataSourceExample {
     spark.stop()
   }
 
+  private def runGenericFileSourceOptionsExample(spark: SparkSession): Unit = {
+    // $example on:ignore_corrupt_files$
+    // enable ignore corrupt files
+    spark.sql("set spark.sql.files.ignoreCorruptFiles=true")
+    // dir1/file3.json is corrupt from parquet's view
+    val testCorruptDF = spark.read.parquet(
+      "examples/src/main/resources/dir1/",
+      "examples/src/main/resources/dir1/dir2/")
+    testCorruptDF.show()
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // |file2.parquet|
+    // +-------------+
+    // $example off:ignore_corrupt_files$
+    // $example on:recursive_file_lookup$
+    val recursiveLoadedDF = spark.read.format("parquet")
+      .option("recursiveFileLookup", "true")
+      .load("examples/src/main/resources/dir1")
+    recursiveLoadedDF.show()
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // |file2.parquet|
+    // +-------------+
+    // $example off:recursive_file_lookup$
+    spark.sql("set spark.sql.files.ignoreCorruptFiles=false")
+    // $example on:load_with_path_glob_filter$
+    val testGlobFilterDF = spark.read.format("parquet")
+      .option("pathGlobFilter", "*.parquet") // json file should be filtered out
+      .load("examples/src/main/resources/dir1")
+    testGlobFilterDF.show()
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // +-------------+
+    // $example off:load_with_path_glob_filter$
+  }
+
   private def runBasicDataSourceExample(spark: SparkSession): Unit = {
     // $example on:generic_load_save_functions$
     val usersDF = spark.read.load("examples/src/main/resources/users.parquet")
@@ -56,11 +99,6 @@ object SQLDataSourceExample {
       .option("header", "true")
       .load("examples/src/main/resources/people.csv")
     // $example off:manual_load_options_csv$
-    // $example on:load_with_path_glob_filter$
-    val partitionedUsersDF = spark.read.format("orc")
-      .option("pathGlobFilter", "*.orc")
-      .load("examples/src/main/resources/partitioned_users.orc")
-    // $example off:load_with_path_glob_filter$
     // $example on:manual_save_options_orc$
     usersDF.write.format("orc")
       .option("orc.bloom.filter.columns", "favorite_color")
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
index 678cbc64aff1f..fde281087c267 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
@@ -34,7 +34,7 @@ object SparkSQLExample {
   case class Person(name: String, age: Long)
   // $example off:create_ds$
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // $example on:init_session$
     val spark = SparkSession
       .builder()
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala
index 3656a84c571db..c7ab88806b7a5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/UserDefinedUntypedAggregation.scala
@@ -17,48 +17,38 @@
 package org.apache.spark.examples.sql
 
 // $example on:untyped_custom_aggregation$
-import org.apache.spark.sql.{Row, SparkSession}
-import org.apache.spark.sql.expressions.MutableAggregationBuffer
-import org.apache.spark.sql.expressions.UserDefinedAggregateFunction
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
+import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.functions
 // $example off:untyped_custom_aggregation$
 
 object UserDefinedUntypedAggregation {
 
   // $example on:untyped_custom_aggregation$
-  object MyAverage extends UserDefinedAggregateFunction {
-    // Data types of input arguments of this aggregate function
-    def inputSchema: StructType = StructType(StructField("inputColumn", LongType) :: Nil)
-    // Data types of values in the aggregation buffer
-    def bufferSchema: StructType = {
-      StructType(StructField("sum", LongType) :: StructField("count", LongType) :: Nil)
-    }
-    // The data type of the returned value
-    def dataType: DataType = DoubleType
-    // Whether this function always returns the same output on the identical input
-    def deterministic: Boolean = true
-    // Initializes the given aggregation buffer. The buffer itself is a `Row` that in addition to
-    // standard methods like retrieving a value at an index (e.g., get(), getBoolean()), provides
-    // the opportunity to update its values. Note that arrays and maps inside the buffer are still
-    // immutable.
-    def initialize(buffer: MutableAggregationBuffer): Unit = {
-      buffer(0) = 0L
-      buffer(1) = 0L
-    }
-    // Updates the given aggregation buffer `buffer` with new input data from `input`
-    def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
-      if (!input.isNullAt(0)) {
-        buffer(0) = buffer.getLong(0) + input.getLong(0)
-        buffer(1) = buffer.getLong(1) + 1
-      }
+  case class Average(var sum: Long, var count: Long)
+
+  object MyAverage extends Aggregator[Long, Average, Double] {
+    // A zero value for this aggregation. Should satisfy the property that any b + zero = b
+    def zero: Average = Average(0L, 0L)
+    // Combine two values to produce a new value. For performance, the function may modify `buffer`
+    // and return it instead of constructing a new object
+    def reduce(buffer: Average, data: Long): Average = {
+      buffer.sum += data
+      buffer.count += 1
+      buffer
     }
-    // Merges two aggregation buffers and stores the updated buffer values back to `buffer1`
-    def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
-      buffer1(0) = buffer1.getLong(0) + buffer2.getLong(0)
-      buffer1(1) = buffer1.getLong(1) + buffer2.getLong(1)
+    // Merge two intermediate values
+    def merge(b1: Average, b2: Average): Average = {
+      b1.sum += b2.sum
+      b1.count += b2.count
+      b1
     }
-    // Calculates the final result
-    def evaluate(buffer: Row): Double = buffer.getLong(0).toDouble / buffer.getLong(1)
+    // Transform the output of the reduction
+    def finish(reduction: Average): Double = reduction.sum.toDouble / reduction.count
+    // Specifies the Encoder for the intermediate value type
+    def bufferEncoder: Encoder[Average] = Encoders.product
+    // Specifies the Encoder for the final output value type
+    def outputEncoder: Encoder[Double] = Encoders.scalaDouble
   }
   // $example off:untyped_custom_aggregation$
 
@@ -70,7 +60,7 @@ object UserDefinedUntypedAggregation {
 
     // $example on:untyped_custom_aggregation$
     // Register the function to access it
-    spark.udf.register("myAverage", MyAverage)
+    spark.udf.register("myAverage", functions.udaf(MyAverage))
 
     val df = spark.read.json("examples/src/main/resources/employees.json")
     df.createOrReplaceTempView("employees")
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
index a832276602b88..3be8a3862f39c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
@@ -28,7 +28,7 @@ object SparkHiveExample {
   case class Record(key: Int, value: String)
   // $example off:spark_hive$
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // When working with Hive, one must instantiate `SparkSession` with Hive support, including
     // connectivity to a persistent Hive metastore, support for Hive serdes, and Hive user-defined
     // functions. Users who do not have an existing Hive deployment can still enable Hive support.
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala
index de477c5ce8161..6dbc70bd141f3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.SparkSession
  *    localhost 9999`
  */
 object StructuredNetworkWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: StructuredNetworkWordCount <hostname> <port>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala
index b4dad21dd75b0..4ba2c6bc68918 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala
@@ -48,7 +48,7 @@ import org.apache.spark.sql.functions._
  */
 object StructuredNetworkWordCountWindowed {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 3) {
       System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
         " <window duration in seconds> [<slide duration in seconds>]")
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
index fc3f8fa53c7ae..0f47deaf1021b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -37,7 +37,7 @@ import org.apache.spark.streaming.receiver.Receiver
  *    `$ bin/run-example org.apache.spark.examples.streaming.CustomReceiver localhost 9999`
  */
 object CustomReceiver {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: CustomReceiver <hostname> <port>")
       System.exit(1)
@@ -64,20 +64,20 @@ object CustomReceiver {
 class CustomReceiver(host: String, port: Int)
   extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) {
 
-  def onStart() {
+  def onStart(): Unit = {
     // Start the thread that receives data over a connection
     new Thread("Socket Receiver") {
-      override def run() { receive() }
+      override def run(): Unit = { receive() }
     }.start()
   }
 
-  def onStop() {
+  def onStop(): Unit = {
    // There is nothing much to do as the thread calling receive()
    // is designed to stop by itself isStopped() returns false
   }
 
   /** Create a socket connection and receive data until receiver is stopped */
-  private def receive() {
+  private def receive(): Unit = {
    var socket: Socket = null
    var userInput: String = null
    try {
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
index 3024b59480099..6fdb37194ea7d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
@@ -37,7 +37,7 @@ import org.apache.spark.streaming.kafka010._
  *    consumer-group topic1,topic2
  */
 object DirectKafkaWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 3) {
       System.err.println(s"""
         |Usage: DirectKafkaWordCount <brokers> <groupId> <topics>
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala
index b68a59873a8fe..6a35ce9b2a293 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala
@@ -76,7 +76,7 @@ import org.apache.spark.streaming.kafka010._
  * using SASL_SSL in production.
  */
 object DirectKerberizedKafkaWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 3) {
       System.err.println(s"""
         |Usage: DirectKerberizedKafkaWordCount <brokers> <groupId> <topics>
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
index 1f282d437dc38..19dc7a3cce0ac 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
@@ -33,7 +33,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  * Then create a text file in `localdir` and the words in the file will get counted.
  */
 object HdfsWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println("Usage: HdfsWordCount <directory>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
index 15b57fccb4076..26bb51dde3a1d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
@@ -34,7 +34,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  *    `$ bin/run-example org.apache.spark.examples.streaming.NetworkWordCount localhost 9999`
  */
 object NetworkWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: NetworkWordCount <hostname> <port>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
index 19bacd449787b..09eeaf9fa4496 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
@@ -25,7 +25,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
 
 object QueueStream {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     StreamingExamples.setStreamingLogLevels()
     val sparkConf = new SparkConf().setAppName("QueueStream")
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
index 437ccf0898d7c..a20abd6e9d12e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
@@ -37,7 +37,7 @@ import org.apache.spark.util.IntParam
  *   <batchMillise> is the Spark Streaming batch duration in milliseconds.
  */
 object RawNetworkGrep {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 4) {
       System.err.println("Usage: RawNetworkGrep <numStreams> <host> <port> <batchMillis>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
index f018f3a26d2e9..243c22e71275c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
@@ -139,7 +139,7 @@ object RecoverableNetworkWordCount {
     ssc
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 4) {
       System.err.println(s"Your arguments were ${args.mkString("[", ", ", "]")}")
       System.err.println(
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
index 787bbec73b28f..778be7baaeeac 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
@@ -38,7 +38,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext, Time}
  */
 
 object SqlNetworkWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: NetworkWordCount <hostname> <port>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
index 2811e67009fb0..46f01edf7deec 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
@@ -35,7 +35,7 @@ import org.apache.spark.streaming._
  *      org.apache.spark.examples.streaming.StatefulNetworkWordCount localhost 9999`
  */
 object StatefulNetworkWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: StatefulNetworkWordCount <hostname> <port>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
index b00f32fb25243..073f9728c68af 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
@@ -25,7 +25,7 @@ import org.apache.spark.internal.Logging
 object StreamingExamples extends Logging {
 
   /** Set reasonable logging levels for streaming if the user has not configured log4j. */
-  def setStreamingLogLevels() {
+  def setStreamingLogLevels(): Unit = {
     val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
     if (!log4jInitialized) {
       // We first log something to initialize Spark's default logging, then we override the
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
index 2108bc63edea2..7234f30e7d267 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
@@ -81,7 +81,7 @@ object PageViewGenerator {
     new PageView(page, status, zipCode, id).toString()
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 2) {
       System.err.println("Usage: PageViewGenerator <port> <viewsPerSecond>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
index b8e7c7e9e9152..b51bfacabf4aa 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
@@ -35,7 +35,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  */
 // scalastyle:on
 object PageViewStream {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 3) {
       System.err.println("Usage: PageViewStream <metric> <host> <port>")
       System.err.println("<metric> must be one of pageCounts, slidingPageCounts," +
diff --git a/external/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt b/external/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..3c1b5af0d5986
--- /dev/null
+++ b/external/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt
@@ -0,0 +1,122 @@
+================================================================================================
+SQL Single Numeric Column Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2689           2694           7          5.8         170.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2741           2759          26          5.7         174.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2736           2748          17          5.7         173.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3305           3317          17          4.8         210.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2904           2952          68          5.4         184.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3090           3093           4          5.1         196.5       1.0X
+
+
+================================================================================================
+Int and String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of columns                                     5351           5365          20          2.0         510.3       1.0X
+
+
+================================================================================================
+Partitioned Table Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column                                        3278           3288          14          4.8         208.4       1.0X
+Partition column                                   3149           3193          62          5.0         200.2       1.0X
+Both columns                                       3198           3204           7          4.9         203.4       1.0X
+
+
+================================================================================================
+Repeated String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3435           3438           5          3.1         327.6       1.0X
+
+
+================================================================================================
+String with Nulls Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               5634           5650          23          1.9         537.3       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               4725           4752          39          2.2         450.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3550           3566          23          3.0         338.6       1.0X
+
+
+================================================================================================
+Single Column Scan From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               5271           5279          11          0.2        5027.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              10393          10516         174          0.1        9911.3       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              15330          15343          19          0.1       14619.6       1.0X
+
+
diff --git a/external/avro/benchmarks/AvroReadBenchmark-results.txt b/external/avro/benchmarks/AvroReadBenchmark-results.txt
index 7900fea453b10..0ab611a0f9a29 100644
--- a/external/avro/benchmarks/AvroReadBenchmark-results.txt
+++ b/external/avro/benchmarks/AvroReadBenchmark-results.txt
@@ -2,121 +2,121 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2774 / 2815          5.7         176.4       1.0X
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3049           3071          32          5.2         193.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2761 / 2777          5.7         175.5       1.0X
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2982           2992          13          5.3         189.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2783 / 2870          5.7         176.9       1.0X
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2984           2989           7          5.3         189.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           3256 / 3266          4.8         207.0       1.0X
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3262           3353         128          4.8         207.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2841 / 2867          5.5         180.6       1.0X
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2716           2723          10          5.8         172.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2981 / 2996          5.3         189.5       1.0X
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2868           2870           3          5.5         182.4       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of columns                                4781 / 4783          2.2         456.0       1.0X
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of columns                                     4714           4739          35          2.2         449.6       1.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Data column                                   3372 / 3386          4.7         214.4       1.0X
-Partition column                              3035 / 3064          5.2         193.0       1.1X
-Both columns                                  3445 / 3461          4.6         219.1       1.0X
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column                                        3257           3286          41          4.8         207.1       1.0X
+Partition column                                   3258           3277          27          4.8         207.2       1.0X
+Both columns                                       3399           3405           9          4.6         216.1       1.0X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of string length                          3395 / 3401          3.1         323.8       1.0X
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3292           3316          33          3.2         314.0       1.0X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of string length                          5580 / 5624          1.9         532.2       1.0X
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               5450           5456           9          1.9         519.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (50.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of string length                          4622 / 4623          2.3         440.8       1.0X
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               4410           4435          35          2.4         420.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (95.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of string length                          3238 / 3241          3.2         308.8       1.0X
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3074           3122          68          3.4         293.2       1.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of single column                          5472 / 5484          0.2        5218.8       1.0X
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               5120           5136          23          0.2        4882.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 200 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of single column                        10680 / 10701          0.1       10185.1       1.0X
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               9952          10002          71          0.1        9490.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 300 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of single column                        16143 / 16238          0.1       15394.9       1.0X
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              14973          14978           7          0.1       14279.8       1.0X
 
 
diff --git a/external/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..2cf1835013821
--- /dev/null
+++ b/external/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt
@@ -0,0 +1,10 @@
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           3026           3142         164          5.2         192.4       1.0X
+Output Single Double Column                        3157           3260         145          5.0         200.7       1.0X
+Output Int and String Column                       6123           6190          94          2.6         389.3       0.5X
+Output Partitions                                  5197           5733         758          3.0         330.4       0.6X
+Output Buckets                                     7074           7285         298          2.2         449.7       0.4X
+
diff --git a/external/avro/benchmarks/AvroWriteBenchmark-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-results.txt
index fb2a77333eec5..20f6ae9099a4d 100644
--- a/external/avro/benchmarks/AvroWriteBenchmark-results.txt
+++ b/external/avro/benchmarks/AvroWriteBenchmark-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Avro writer benchmark:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      3213 / 3373          4.9         204.3       1.0X
-Output Single Double Column                   3313 / 3345          4.7         210.7       1.0X
-Output Int and String Column                  7303 / 7316          2.2         464.3       0.4X
-Output Partitions                             5309 / 5691          3.0         337.5       0.6X
-Output Buckets                                7031 / 7557          2.2         447.0       0.5X
+Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           3080           3137          82          5.1         195.8       1.0X
+Output Single Double Column                        3595           3595           0          4.4         228.6       0.9X
+Output Int and String Column                       7491           7504          18          2.1         476.3       0.4X
+Output Partitions                                  5518           5663         205          2.9         350.8       0.6X
+Output Buckets                                     7467           7581         161          2.1         474.7       0.4X
 
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
index 5656ac7f38e1b..79c72057c5823 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
@@ -39,7 +39,7 @@ case class AvroDataToCatalyst(
   override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
 
   override lazy val dataType: DataType = {
-    val dt = SchemaConverters.toSqlType(avroSchema).dataType
+    val dt = SchemaConverters.toSqlType(expectedSchema).dataType
     parseMode match {
       // With PermissiveMode, the output Catalyst row might contain columns of null values for
       // corrupt records, even if some of the columns are not nullable in the user-provided schema.
@@ -51,18 +51,24 @@ case class AvroDataToCatalyst(
 
   override def nullable: Boolean = true
 
-  @transient private lazy val avroSchema = new Schema.Parser().parse(jsonFormatSchema)
+  private lazy val avroOptions = AvroOptions(options)
 
-  @transient private lazy val reader = new GenericDatumReader[Any](avroSchema)
+  @transient private lazy val actualSchema = new Schema.Parser().parse(jsonFormatSchema)
 
-  @transient private lazy val deserializer = new AvroDeserializer(avroSchema, dataType)
+  @transient private lazy val expectedSchema = avroOptions.schema
+    .map(expectedSchema => new Schema.Parser().parse(expectedSchema))
+    .getOrElse(actualSchema)
+
+  @transient private lazy val reader = new GenericDatumReader[Any](actualSchema, expectedSchema)
+
+  @transient private lazy val deserializer = new AvroDeserializer(expectedSchema, dataType)
 
   @transient private var decoder: BinaryDecoder = _
 
   @transient private var result: Any = _
 
   @transient private lazy val parseMode: ParseMode = {
-    val mode = AvroOptions(options).parseMode
+    val mode = avroOptions.parseMode
     if (mode != PermissiveMode && mode != FailFastMode) {
       throw new AnalysisException(unacceptableModeMessage(mode.name))
     }
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index baaccedd2d536..2c17c16f06da7 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -32,10 +32,10 @@ import org.apache.avro.util.Utf8
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_DAY
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
-
 /**
  * A deserializer to deserialize data in avro format to data in catalyst format.
  */
@@ -110,7 +110,7 @@ class AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
       // Before we upgrade Avro to 1.8 for logical type support, spark-avro converts Long to Date.
       // For backward compatibility, we still keep this conversion.
       case (LONG, DateType) => (updater, ordinal, value) =>
-        updater.setInt(ordinal, (value.asInstanceOf[Long] / DateTimeUtils.MILLIS_PER_DAY).toInt)
+        updater.setInt(ordinal, (value.asInstanceOf[Long] / MILLIS_PER_DAY).toInt)
 
       case (FLOAT, FloatType) => (updater, ordinal, value) =>
         updater.setFloat(ordinal, value.asInstanceOf[Float])
@@ -167,14 +167,14 @@ class AvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
       case (ARRAY, ArrayType(elementType, containsNull)) =>
         val elementWriter = newWriter(avroType.getElementType, elementType, path)
         (updater, ordinal, value) =>
-          val array = value.asInstanceOf[GenericData.Array[Any]]
-          val len = array.size()
-          val result = createArrayData(elementType, len)
+          val collection = value.asInstanceOf[java.util.Collection[Any]]
+          val result = createArrayData(elementType, collection.size())
           val elementUpdater = new ArrayDataUpdater(result)
 
           var i = 0
-          while (i < len) {
-            val element = array.get(i)
+          val iter = collection.iterator()
+          while (iter.hasNext) {
+            val element = iter.next()
             if (element == null) {
               if (!containsNull) {
                 throw new RuntimeException(s"Array value at path ${path.mkString(".")} is not " +
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
index 338244aa9e53b..f3ea78583fbc0 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
@@ -36,7 +36,16 @@ class AvroOptions(
   }
 
   /**
-   * Optional schema provided by an user in JSON format.
+   * Optional schema provided by a user in JSON format.
+   *
+   * When reading Avro, this option can be set to an evolved schema, which is compatible but
+   * different with the actual Avro schema. The deserialization schema will be consistent with
+   * the evolved schema. For example, if we set an evolved schema containing one additional
+   * column with a default value, the reading result in Spark will contain the new column too.
+   *
+   * When writing Avro, this option can be set if the expected output Avro schema doesn't match the
+   * schema converted by Spark. For example, the expected schema of one column is of "enum" type,
+   * instead of "string" type in the default converted schema.
    */
   val schema: Option[String] = parameters.get("avroSchema")
 
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index b978b7974b92d..70dcd58a600fc 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.Job
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.avro.AvroOptions.ignoreExtensionKey
 import org.apache.spark.sql.execution.datasources.OutputWriterFactory
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -42,12 +43,12 @@ object AvroUtils extends Logging {
       options: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
     val conf = spark.sessionState.newHadoopConf()
-    if (options.contains("ignoreExtension")) {
-      logWarning(s"Option ${AvroOptions.ignoreExtensionKey} is deprecated. Please use the " +
-        "general data source option pathGlobFilter for filtering file names.")
-    }
     val parsedOptions = new AvroOptions(options, conf)
 
+    if (parsedOptions.parameters.contains(ignoreExtensionKey)) {
+      logWarning(s"Option $ignoreExtensionKey is deprecated. Please use the " +
+        "general data source option pathGlobFilter for filtering file names.")
+    }
     // User can specify an optional avro json schema.
     val avroSchema = parsedOptions.schema
       .map(new Schema.Parser().parse)
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
index a6ae3906c6d80..74bfaaed9d8a4 100755
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
@@ -45,9 +45,11 @@ object functions {
   }
 
   /**
-   * Converts a binary column of avro format into its corresponding catalyst value. The specified
-   * schema must match the read data, otherwise the behavior is undefined: it may fail or return
-   * arbitrary result.
+   * Converts a binary column of Avro format into its corresponding catalyst value.
+   * The specified schema must match actual schema of the read data, otherwise the behavior
+   * is undefined: it may fail or return arbitrary result.
+   * To deserialize the data with a compatible and evolved schema, the expected Avro schema can be
+   * set via the option avroSchema.
    *
    * @param data the binary column.
    * @param jsonFormatSchema the avro schema in JSON string format.
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala
index 3171f1e08b4fc..c6f52d676422c 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql.v2.avro
 
 import org.apache.spark.sql.avro.AvroFileFormat
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
index 243af7da47003..8230dbaf8ea6c 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
@@ -31,10 +31,10 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.avro.{AvroDeserializer, AvroOptions}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.v2.{EmptyPartitionReader, FilePartitionReaderFactory, PartitionReaderWithPartitionValues}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
@@ -46,7 +46,7 @@ import org.apache.spark.util.SerializableConfiguration
  * @param dataSchema Schema of AVRO files.
  * @param readDataSchema Required data schema of AVRO files.
  * @param partitionSchema Schema of partitions.
- * @param options Options for parsing AVRO files.
+ * @param parsedOptions Options for parsing AVRO files.
  */
 case class AvroPartitionReaderFactory(
     sqlConf: SQLConf,
@@ -54,11 +54,10 @@ case class AvroPartitionReaderFactory(
     dataSchema: StructType,
     readDataSchema: StructType,
     partitionSchema: StructType,
-    options: Map[String, String]) extends FilePartitionReaderFactory with Logging {
+    parsedOptions: AvroOptions) extends FilePartitionReaderFactory with Logging {
 
   override def buildReader(partitionedFile: PartitionedFile): PartitionReader[InternalRow] = {
     val conf = broadcastedConf.value.value
-    val parsedOptions = new AvroOptions(options, conf)
     val userProvidedSchema = parsedOptions.schema.map(new Schema.Parser().parse)
 
     if (parsedOptions.ignoreExtension || partitionedFile.filePath.endsWith(".avro")) {
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
index 6ec351080a118..fe7315c739296 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
@@ -21,9 +21,11 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.avro.AvroOptions
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScan
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
@@ -34,19 +36,33 @@ case class AvroScan(
     dataSchema: StructType,
     readDataSchema: StructType,
     readPartitionSchema: StructType,
-    options: CaseInsensitiveStringMap)
-  extends FileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema) {
-    override def isSplitable(path: Path): Boolean = true
-
-    override def createReaderFactory(): PartitionReaderFactory = {
-      val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
-      // Hadoop Configurations are case sensitive.
-      val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
-      val broadcastedConf = sparkSession.sparkContext.broadcast(
-        new SerializableConfiguration(hadoopConf))
-      // The partition values are already truncated in `FileScan.partitions`.
-      // We should use `readPartitionSchema` as the partition schema here.
-      AvroPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
-        dataSchema, readDataSchema, readPartitionSchema, caseSensitiveMap)
-    }
+    options: CaseInsensitiveStringMap,
+    partitionFilters: Seq[Expression] = Seq.empty,
+    dataFilters: Seq[Expression] = Seq.empty) extends FileScan {
+  override def isSplitable(path: Path): Boolean = true
+
+  override def createReaderFactory(): PartitionReaderFactory = {
+    val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
+    // Hadoop Configurations are case sensitive.
+    val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
+    val broadcastedConf = sparkSession.sparkContext.broadcast(
+      new SerializableConfiguration(hadoopConf))
+    val parsedOptions = new AvroOptions(caseSensitiveMap, hadoopConf)
+    // The partition values are already truncated in `FileScan.partitions`.
+    // We should use `readPartitionSchema` as the partition schema here.
+    AvroPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
+      dataSchema, readDataSchema, readPartitionSchema, parsedOptions)
+  }
+
+  override def withFilters(
+      partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): FileScan =
+    this.copy(partitionFilters = partitionFilters, dataFilters = dataFilters)
+
+  override def equals(obj: Any): Boolean = obj match {
+    case a: AvroScan => super.equals(a) && dataSchema == a.dataSchema && options == a.options
+
+    case _ => false
   }
+
+  override def hashCode(): Int = super.hashCode()
+}
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
index 815da2bd92d44..e36c71ef4b1f7 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql.v2.avro
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
-import org.apache.spark.sql.sources.v2.reader.Scan
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
index a781624aa61aa..2096f1a08a0d1 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
@@ -22,9 +22,9 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.avro.AvroUtils
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -42,8 +42,8 @@ case class AvroTable(
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     AvroUtils.inferSchema(sparkSession, options.asScala.toMap, files)
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
-    new AvroWriteBuilder(options, paths, formatName, supportsDataType)
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+    new AvroWriteBuilder(paths, formatName, supportsDataType, info)
 
   override def supportsDataType(dataType: DataType): Boolean = AvroUtils.supportsDataType(dataType)
 
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWriteBuilder.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWriteBuilder.scala
index c2ddc4b19127d..c4defb9f065e3 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWriteBuilder.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWriteBuilder.scala
@@ -19,18 +19,18 @@ package org.apache.spark.sql.v2.avro
 import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.sql.avro.AvroUtils
+import org.apache.spark.sql.connector.write.LogicalWriteInfo
 import org.apache.spark.sql.execution.datasources.OutputWriterFactory
 import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class AvroWriteBuilder(
-    options: CaseInsensitiveStringMap,
     paths: Seq[String],
     formatName: String,
-    supportsDataType: DataType => Boolean)
-  extends FileWriteBuilder(options, paths, formatName, supportsDataType) {
+    supportsDataType: DataType => Boolean,
+    info: LogicalWriteInfo)
+  extends FileWriteBuilder(paths, formatName, supportsDataType, info) {
   override def prepareWrite(
       sqlConf: SQLConf,
       job: Job,
diff --git a/external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java b/external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java
index a448583dddfb7..94188ecf57c86 100644
--- a/external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java
+++ b/external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.avro;
 
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -44,13 +43,6 @@ public void tearDown() {
     spark.stop();
   }
 
-  private static void checkAnswer(Dataset<Row> actual, Dataset<Row> expected) {
-    String errorMessage = QueryTest$.MODULE$.checkAnswer(actual, expected.collectAsList());
-    if (errorMessage != null) {
-      Assert.fail(errorMessage);
-    }
-  }
-
   @Test
   public void testToAvroFromAvro() {
     Dataset<Long> rangeDf = spark.range(10);
@@ -69,6 +61,6 @@ public void testToAvroFromAvro() {
       from_avro(avroDF.col("a"), avroTypeLong),
       from_avro(avroDF.col("b"), avroTypeStr));
 
-    checkAnswer(actual, df);
+    QueryTest$.MODULE$.checkAnswer(actual, df.collectAsList());
   }
 }
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
index 4b39e711aa287..c8a1f670bda9e 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
@@ -17,7 +17,12 @@
 
 package org.apache.spark.sql.avro
 
+import java.util
+import java.util.Collections
+
 import org.apache.avro.Schema
+import org.apache.avro.generic.{GenericData, GenericRecordBuilder}
+import org.apache.avro.message.{BinaryMessageDecoder, BinaryMessageEncoder}
 
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.sql.{RandomDataGenerator, Row}
@@ -127,6 +132,26 @@ class AvroCatalystDataConversionSuite extends SparkFunSuite
     }
   }
 
+  test("array of nested schema with seed") {
+    val seed = scala.util.Random.nextLong()
+    val rand = new scala.util.Random(seed)
+    val schema = StructType(
+      StructField("a",
+        ArrayType(
+          RandomDataGenerator.randomNestedSchema(rand, 10, testingTypes),
+          containsNull = false),
+        nullable = false
+      ) :: Nil
+    )
+
+    withClue(s"Schema: $schema\nseed: $seed") {
+      val data = RandomDataGenerator.randomRow(rand, schema)
+      val converter = CatalystTypeConverters.createToCatalystConverter(schema)
+      val input = Literal.create(converter(data), schema)
+      roundTripTest(input)
+    }
+  }
+
   test("read int as string") {
     val data = Literal(1)
     val avroTypeJson =
@@ -246,4 +271,46 @@ class AvroCatalystDataConversionSuite extends SparkFunSuite
     }.getMessage
     assert(message ==  "Cannot convert Catalyst type StringType to Avro type \"long\".")
   }
+
+  test("avro array can be generic java collection") {
+    val jsonFormatSchema =
+      """
+        |{ "type": "record",
+        |  "name": "record",
+        |  "fields" : [{
+        |    "name": "array",
+        |    "type": {
+        |      "type": "array",
+        |      "items": ["null", "int"]
+        |    }
+        |  }]
+        |}
+      """.stripMargin
+    val avroSchema = new Schema.Parser().parse(jsonFormatSchema)
+    val dataType = SchemaConverters.toSqlType(avroSchema).dataType
+    val deserializer = new AvroDeserializer(avroSchema, dataType)
+
+    def checkDeserialization(data: GenericData.Record, expected: Any): Unit = {
+      assert(checkResult(
+        expected,
+        deserializer.deserialize(data),
+        dataType, exprNullable = false
+      ))
+    }
+
+    def validateDeserialization(array: java.util.Collection[Integer]): Unit = {
+      val data = new GenericRecordBuilder(avroSchema)
+        .set("array", array)
+        .build()
+      val expected = InternalRow(new GenericArrayData(new util.ArrayList[Any](array)))
+      checkDeserialization(data, expected)
+
+      val reEncoded = new BinaryMessageDecoder[GenericData.Record](new GenericData(), avroSchema)
+        .decode(new BinaryMessageEncoder(new GenericData(), avroSchema).encode(data))
+      checkDeserialization(reEncoded, expected)
+    }
+
+    validateDeserialization(Collections.emptySet())
+    validateDeserialization(util.Arrays.asList(1, null, 3))
+  }
 }
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
index c4f995015dfca..7f14efe15ad55 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -28,7 +28,7 @@ import org.apache.avro.io.EncoderFactory
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.execution.LocalTableScanExec
-import org.apache.spark.sql.functions.{col, struct}
+import org.apache.spark.sql.functions.{col, lit, struct}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -38,7 +38,9 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
   test("roundtrip in to_avro and from_avro - int and string") {
     val df = spark.range(10).select('id, 'id.cast("string").as("str"))
 
-    val avroDF = df.select(to_avro('id).as("a"), to_avro('str).as("b"))
+    val avroDF = df.select(
+      functions.to_avro('id).as("a"),
+      functions.to_avro('str).as("b"))
     val avroTypeLong = s"""
       |{
       |  "type": "int",
@@ -51,12 +53,14 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
       |  "name": "str"
       |}
     """.stripMargin
-    checkAnswer(avroDF.select(from_avro('a, avroTypeLong), from_avro('b, avroTypeStr)), df)
+    checkAnswer(avroDF.select(
+      functions.from_avro('a, avroTypeLong),
+      functions.from_avro('b, avroTypeStr)), df)
   }
 
   test("roundtrip in to_avro and from_avro - struct") {
     val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct"))
-    val avroStructDF = df.select(to_avro('struct).as("avro"))
+    val avroStructDF = df.select(functions.to_avro('struct).as("avro"))
     val avroTypeStruct = s"""
       |{
       |  "type": "record",
@@ -67,13 +71,14 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
       |  ]
       |}
     """.stripMargin
-    checkAnswer(avroStructDF.select(from_avro('avro, avroTypeStruct)), df)
+    checkAnswer(avroStructDF.select(
+      functions.from_avro('avro, avroTypeStruct)), df)
   }
 
   test("handle invalid input in from_avro") {
     val count = 10
     val df = spark.range(count).select(struct('id, 'id.as("id2")).as("struct"))
-    val avroStructDF = df.select(to_avro('struct).as("avro"))
+    val avroStructDF = df.select(functions.to_avro('struct).as("avro"))
     val avroTypeStruct = s"""
       |{
       |  "type": "record",
@@ -87,7 +92,7 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
 
     intercept[SparkException] {
       avroStructDF.select(
-        org.apache.spark.sql.avro.functions.from_avro(
+        functions.from_avro(
           'avro, avroTypeStruct, Map("mode" -> "FAILFAST").asJava)).collect()
     }
 
@@ -95,7 +100,7 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
     val expected = (0 until count).map(_ => Row(Row(null, null)))
     checkAnswer(
       avroStructDF.select(
-        org.apache.spark.sql.avro.functions.from_avro(
+       functions.from_avro(
           'avro, avroTypeStruct, Map("mode" -> "PERMISSIVE").asJava)),
       expected)
   }
@@ -115,8 +120,8 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
       |  }, "null" ]
       |}, "null" ]
     """.stripMargin
-    val readBackOne = dfOne.select(to_avro($"array").as("avro"))
-      .select(from_avro($"avro", avroTypeArrStruct).as("array"))
+    val readBackOne = dfOne.select(functions.to_avro($"array").as("avro"))
+      .select(functions.from_avro($"avro", avroTypeArrStruct).as("array"))
     checkAnswer(dfOne, readBackOne)
   }
 
@@ -153,4 +158,47 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
       assert(df.collect().map(_.get(0)) === Seq(Row("one"), Row("two"), Row("three"), Row("four")))
     }
   }
+
+  test("SPARK-27506: roundtrip in to_avro and from_avro with different compatible schemas") {
+    val df = spark.range(10).select(
+      struct('id.as("col1"), 'id.cast("string").as("col2")).as("struct")
+    )
+    val avroStructDF = df.select(functions.to_avro('struct).as("avro"))
+    val actualAvroSchema =
+      s"""
+         |{
+         |  "type": "record",
+         |  "name": "struct",
+         |  "fields": [
+         |    {"name": "col1", "type": "int"},
+         |    {"name": "col2", "type": "string"}
+         |  ]
+         |}
+         |""".stripMargin
+
+    val evolvedAvroSchema =
+      s"""
+         |{
+         |  "type": "record",
+         |  "name": "struct",
+         |  "fields": [
+         |    {"name": "col1", "type": "int"},
+         |    {"name": "col2", "type": "string"},
+         |    {"name": "col3", "type": "string", "default": ""}
+         |  ]
+         |}
+         |""".stripMargin
+
+    val expected = spark.range(10).select(
+      struct('id.as("col1"), 'id.cast("string").as("col2"), lit("").as("col3")).as("struct")
+    )
+
+    checkAnswer(
+      avroStructDF.select(
+        functions.from_avro(
+          'avro,
+          actualAvroSchema,
+          Map("avroSchema" -> evolvedAvroSchema).asJava)),
+      expected)
+  }
 }
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index cf88981b1efbd..360160c9c9398 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -36,10 +36,15 @@ import org.apache.commons.io.FileUtils
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.TestingUDT.{IntervalData, NullData, NullUDT}
-import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.plans.logical.Filter
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.datasources.{DataSource, FilePartition}
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.v2.avro.AvroScan
 import org.apache.spark.util.Utils
 
 abstract class AvroSuite extends QueryTest with SharedSparkSession {
@@ -1036,7 +1041,7 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession {
       (TimestampType, LONG),
       (DecimalType(4, 2), BYTES)
     )
-    def assertException(f: () => AvroSerializer) {
+    def assertException(f: () => AvroSerializer): Unit = {
       val message = intercept[org.apache.spark.sql.avro.IncompatibleSchemaException] {
         f()
       }.getMessage
@@ -1492,6 +1497,30 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession {
       |}
     """.stripMargin)
   }
+
+  test("log a warning of ignoreExtension deprecation") {
+    val logAppender = new LogAppender("deprecated Avro option 'ignoreExtension'")
+    withTempPath { dir =>
+      Seq(("a", 1, 2), ("b", 1, 2), ("c", 2, 1), ("d", 2, 1))
+        .toDF("value", "p1", "p2")
+        .repartition(2)
+        .write
+        .format("avro")
+        .save(dir.getCanonicalPath)
+      withLogAppender(logAppender) {
+        spark
+          .read
+          .format("avro")
+          .option(AvroOptions.ignoreExtensionKey, false)
+          .load(dir.getCanonicalPath)
+          .count()
+      }
+      val deprecatedEvents = logAppender.loggingEvents
+        .filter(_.getRenderedMessage.contains(
+          s"Option ${AvroOptions.ignoreExtensionKey} is deprecated"))
+      assert(deprecatedEvents.size === 1)
+    }
+  }
 }
 
 class AvroV1Suite extends AvroSuite {
@@ -1502,8 +1531,102 @@ class AvroV1Suite extends AvroSuite {
 }
 
 class AvroV2Suite extends AvroSuite {
+  import testImplicits._
+
   override protected def sparkConf: SparkConf =
     super
       .sparkConf
       .set(SQLConf.USE_V1_SOURCE_LIST, "")
+
+  test("Avro source v2: support partition pruning") {
+    withTempPath { dir =>
+      Seq(("a", 1, 2), ("b", 1, 2), ("c", 2, 1))
+        .toDF("value", "p1", "p2")
+        .write
+        .format("avro")
+        .partitionBy("p1", "p2")
+        .save(dir.getCanonicalPath)
+      val df = spark
+        .read
+        .format("avro")
+        .load(dir.getCanonicalPath)
+        .where("p1 = 1 and p2 = 2 and value != \"a\"")
+
+       val filterCondition = df.queryExecution.optimizedPlan.collectFirst {
+         case f: Filter => f.condition
+       }
+      assert(filterCondition.isDefined)
+      // The partitions filters should be pushed down and no need to be reevaluated.
+      assert(filterCondition.get.collectFirst {
+        case a: AttributeReference if a.name == "p1" || a.name == "p2" => a
+      }.isEmpty)
+
+      val fileScan = df.queryExecution.executedPlan collectFirst {
+        case BatchScanExec(_, f: AvroScan) => f
+      }
+      assert(fileScan.nonEmpty)
+      assert(fileScan.get.partitionFilters.nonEmpty)
+      assert(fileScan.get.dataFilters.nonEmpty)
+      assert(fileScan.get.planInputPartitions().forall { partition =>
+        partition.asInstanceOf[FilePartition].files.forall { file =>
+          file.filePath.contains("p1=1") && file.filePath.contains("p2=2")
+        }
+      })
+      checkAnswer(df, Row("b", 1, 2))
+    }
+  }
+
+  test("Avro source v2: support passing data filters to FileScan without partitionFilters") {
+    withTempPath { dir =>
+      Seq(("a", 1, 2), ("b", 1, 2), ("c", 2, 1))
+        .toDF("value", "p1", "p2")
+        .write
+        .format("avro")
+        .save(dir.getCanonicalPath)
+      val df = spark
+        .read
+        .format("avro")
+        .load(dir.getCanonicalPath)
+        .where("value = 'a'")
+
+      val filterCondition = df.queryExecution.optimizedPlan.collectFirst {
+        case f: Filter => f.condition
+      }
+      assert(filterCondition.isDefined)
+
+      val fileScan = df.queryExecution.executedPlan collectFirst {
+        case BatchScanExec(_, f: AvroScan) => f
+      }
+      assert(fileScan.nonEmpty)
+      assert(fileScan.get.partitionFilters.isEmpty)
+      assert(fileScan.get.dataFilters.nonEmpty)
+      checkAnswer(df, Row("a", 1, 2))
+    }
+  }
+
+  private def getBatchScanExec(plan: SparkPlan): BatchScanExec = {
+    plan.find(_.isInstanceOf[BatchScanExec]).get.asInstanceOf[BatchScanExec]
+  }
+
+  test("Avro source v2: same result with different orders of data filters and partition filters") {
+    withTempPath { path =>
+      val tmpDir = path.getCanonicalPath
+      spark
+        .range(10)
+        .selectExpr("id as a", "id + 1 as b", "id + 2 as c", "id + 3 as d")
+        .write
+        .partitionBy("a", "b")
+        .format("avro")
+        .save(tmpDir)
+      val df = spark.read.format("avro").load(tmpDir)
+      // partition filters: a > 1 AND b < 9
+      // data filters: c > 1 AND d < 9
+      val plan1 = df.where("a > 1 AND b < 9 AND c > 1 AND d < 9").queryExecution.sparkPlan
+      val plan2 = df.where("b < 9 AND a > 1 AND d < 9 AND c > 1").queryExecution.sparkPlan
+      assert(plan1.sameResult(plan2))
+      val scan1 = getBatchScanExec(plan1)
+      val scan2 = getBatchScanExec(plan2)
+      assert(scan1.sameResult(scan2))
+    }
+  }
 }
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/DeprecatedAvroFunctionsSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/DeprecatedAvroFunctionsSuite.scala
new file mode 100644
index 0000000000000..cdfa1b118b18d
--- /dev/null
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/DeprecatedAvroFunctionsSuite.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import java.io.ByteArrayOutputStream
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.{GenericDatumWriter, GenericRecord, GenericRecordBuilder}
+import org.apache.avro.io.EncoderFactory
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.execution.LocalTableScanExec
+import org.apache.spark.sql.functions.{col, struct}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+@deprecated("This test suite will be removed.", "3.0.0")
+class DeprecatedAvroFunctionsSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  test("roundtrip in to_avro and from_avro - int and string") {
+    val df = spark.range(10).select('id, 'id.cast("string").as("str"))
+
+    val avroDF = df.select(to_avro('id).as("a"), to_avro('str).as("b"))
+    val avroTypeLong = s"""
+      |{
+      |  "type": "int",
+      |  "name": "id"
+      |}
+    """.stripMargin
+    val avroTypeStr = s"""
+      |{
+      |  "type": "string",
+      |  "name": "str"
+      |}
+    """.stripMargin
+    checkAnswer(avroDF.select(from_avro('a, avroTypeLong), from_avro('b, avroTypeStr)), df)
+  }
+
+  test("roundtrip in to_avro and from_avro - struct") {
+    val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct"))
+    val avroStructDF = df.select(to_avro('struct).as("avro"))
+    val avroTypeStruct = s"""
+      |{
+      |  "type": "record",
+      |  "name": "struct",
+      |  "fields": [
+      |    {"name": "col1", "type": "long"},
+      |    {"name": "col2", "type": "string"}
+      |  ]
+      |}
+    """.stripMargin
+    checkAnswer(avroStructDF.select(from_avro('avro, avroTypeStruct)), df)
+  }
+
+  test("roundtrip in to_avro and from_avro - array with null") {
+    val dfOne = Seq(Tuple1(Tuple1(1) :: Nil), Tuple1(null :: Nil)).toDF("array")
+    val avroTypeArrStruct = s"""
+      |[ {
+      |  "type" : "array",
+      |  "items" : [ {
+      |    "type" : "record",
+      |    "name" : "x",
+      |    "fields" : [ {
+      |      "name" : "y",
+      |      "type" : "int"
+      |    } ]
+      |  }, "null" ]
+      |}, "null" ]
+    """.stripMargin
+    val readBackOne = dfOne.select(to_avro($"array").as("avro"))
+      .select(from_avro($"avro", avroTypeArrStruct).as("array"))
+    checkAnswer(dfOne, readBackOne)
+  }
+
+  test("SPARK-27798: from_avro produces same value when converted to local relation") {
+    val simpleSchema =
+      """
+        |{
+        |  "type": "record",
+        |  "name" : "Payload",
+        |  "fields" : [ {"name" : "message", "type" : "string" } ]
+        |}
+      """.stripMargin
+
+    def generateBinary(message: String, avroSchema: String): Array[Byte] = {
+      val schema = new Schema.Parser().parse(avroSchema)
+      val out = new ByteArrayOutputStream()
+      val writer = new GenericDatumWriter[GenericRecord](schema)
+      val encoder = EncoderFactory.get().binaryEncoder(out, null)
+      val rootRecord = new GenericRecordBuilder(schema).set("message", message).build()
+      writer.write(rootRecord, encoder)
+      encoder.flush()
+      out.toByteArray
+    }
+
+    // This bug is hit when the rule `ConvertToLocalRelation` is run. But the rule was excluded
+    // in `SharedSparkSession`.
+    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
+      val df = Seq("one", "two", "three", "four").map(generateBinary(_, simpleSchema))
+        .toDF()
+        .withColumn("value", from_avro(col("value"), simpleSchema))
+
+      assert(df.queryExecution.executedPlan.isInstanceOf[LocalTableScanExec])
+      assert(df.collect().map(_.get(0)) === Seq(Row("one"), Row("two"), Row("three"), Row("four")))
+    }
+  }
+}
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
index f2f7d650066fb..dc9606f405191 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
@@ -22,7 +22,6 @@ import scala.util.Random
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.types._
 
 /**
@@ -36,7 +35,7 @@ import org.apache.spark.sql.types._
  *      Results will be written to "benchmarks/AvroReadBenchmark-results.txt".
  * }}}
  */
-object AvroReadBenchmark extends SqlBasedBenchmark with SQLHelper {
+object AvroReadBenchmark extends SqlBasedBenchmark {
   def withTempTable(tableNames: String*)(f: => Unit): Unit = {
     try f finally tableNames.foreach(spark.catalog.dropTempView)
   }
@@ -65,7 +64,7 @@ object AvroReadBenchmark extends SqlBasedBenchmark with SQLHelper {
         prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM t1"))
 
         benchmark.addCase("Sum") { _ =>
-          spark.sql("SELECT sum(id) FROM avroTable").collect()
+          spark.sql("SELECT sum(id) FROM avroTable").noop()
         }
 
         benchmark.run()
@@ -86,7 +85,7 @@ object AvroReadBenchmark extends SqlBasedBenchmark with SQLHelper {
           spark.sql("SELECT CAST(value AS INT) AS c1, CAST(value as STRING) AS c2 FROM t1"))
 
         benchmark.addCase("Sum of columns") { _ =>
-          spark.sql("SELECT sum(c1), sum(length(c2)) FROM avroTable").collect()
+          spark.sql("SELECT sum(c1), sum(length(c2)) FROM avroTable").noop()
         }
 
         benchmark.run()
@@ -105,15 +104,15 @@ object AvroReadBenchmark extends SqlBasedBenchmark with SQLHelper {
         prepareTable(dir, spark.sql("SELECT value % 2 AS p, value AS id FROM t1"), Some("p"))
 
         benchmark.addCase("Data column") { _ =>
-          spark.sql("SELECT sum(id) FROM avroTable").collect()
+          spark.sql("SELECT sum(id) FROM avroTable").noop()
         }
 
         benchmark.addCase("Partition column") { _ =>
-          spark.sql("SELECT sum(p) FROM avroTable").collect()
+          spark.sql("SELECT sum(p) FROM avroTable").noop()
         }
 
         benchmark.addCase("Both columns") { _ =>
-          spark.sql("SELECT sum(p), sum(id) FROM avroTable").collect()
+          spark.sql("SELECT sum(p), sum(id) FROM avroTable").noop()
         }
 
         benchmark.run()
@@ -131,7 +130,7 @@ object AvroReadBenchmark extends SqlBasedBenchmark with SQLHelper {
         prepareTable(dir, spark.sql("SELECT CAST((id % 200) + 10000 as STRING) AS c1 FROM t1"))
 
         benchmark.addCase("Sum of string length") { _ =>
-          spark.sql("SELECT sum(length(c1)) FROM avroTable").collect()
+          spark.sql("SELECT sum(length(c1)) FROM avroTable").noop()
         }
 
         benchmark.run()
@@ -156,7 +155,7 @@ object AvroReadBenchmark extends SqlBasedBenchmark with SQLHelper {
 
         benchmark.addCase("Sum of string length") { _ =>
           spark.sql("SELECT SUM(LENGTH(c2)) FROM avroTable " +
-            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop()
         }
 
         benchmark.run()
@@ -179,7 +178,7 @@ object AvroReadBenchmark extends SqlBasedBenchmark with SQLHelper {
         prepareTable(dir, spark.sql("SELECT * FROM t1"))
 
         benchmark.addCase("Sum of single column") { _ =>
-          spark.sql(s"SELECT sum(c$middle) FROM avroTable").collect()
+          spark.sql(s"SELECT sum(c$middle) FROM avroTable").noop()
         }
 
         benchmark.run()
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index a4956ff5ee9cc..aff79b8b8e642 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -106,6 +106,14 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <!-- Although SPARK-28737 upgraded Jersey to 2.29 for JDK11, 'com.spotify.docker-client' still
+      uses this repackaged 'jersey-guava'. We add this back for JDK8/JDK11 testing. -->
+    <dependency>
+      <groupId>org.glassfish.jersey.bundles.repackaged</groupId>
+      <artifactId>jersey-guava</artifactId>
+      <version>2.25.1</version>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>mysql</groupId>
       <artifactId>mysql-connector-java</artifactId>
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index efd7ca74c796b..5738307095933 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -21,6 +21,7 @@ import java.math.BigDecimal
 import java.sql.{Connection, Date, Timestamp}
 import java.util.Properties
 
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.tags.DockerTest
 
 @DockerTest
@@ -112,36 +113,58 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
   }
 
   test("Numeric types") {
-    val df = spark.read.jdbc(jdbcUrl, "numbers", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 1)
-    val row = rows(0)
-    val types = row.toSeq.map(x => x.getClass.toString)
-    assert(types.length == 12)
-    assert(types(0).equals("class java.lang.Boolean"))
-    assert(types(1).equals("class java.lang.Integer"))
-    assert(types(2).equals("class java.lang.Short"))
-    assert(types(3).equals("class java.lang.Integer"))
-    assert(types(4).equals("class java.lang.Long"))
-    assert(types(5).equals("class java.lang.Double"))
-    assert(types(6).equals("class java.lang.Float"))
-    assert(types(7).equals("class java.lang.Float"))
-    assert(types(8).equals("class java.math.BigDecimal"))
-    assert(types(9).equals("class java.math.BigDecimal"))
-    assert(types(10).equals("class java.math.BigDecimal"))
-    assert(types(11).equals("class java.math.BigDecimal"))
-    assert(row.getBoolean(0) == false)
-    assert(row.getInt(1) == 255)
-    assert(row.getShort(2) == 32767)
-    assert(row.getInt(3) == 2147483647)
-    assert(row.getLong(4) == 9223372036854775807L)
-    assert(row.getDouble(5) == 1.2345678901234512E14) // float = float(53) has 15-digits precision
-    assert(row.getFloat(6) == 1.23456788103168E14)   // float(24) has 7-digits precision
-    assert(row.getFloat(7) == 1.23456788103168E14)   // real = float(24)
-    assert(row.getAs[BigDecimal](8).equals(new BigDecimal("123.00")))
-    assert(row.getAs[BigDecimal](9).equals(new BigDecimal("12345.12000")))
-    assert(row.getAs[BigDecimal](10).equals(new BigDecimal("922337203685477.5800")))
-    assert(row.getAs[BigDecimal](11).equals(new BigDecimal("214748.3647")))
+    Seq(true, false).foreach { flag =>
+      withSQLConf(SQLConf.LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED.key -> s"$flag") {
+        val df = spark.read.jdbc(jdbcUrl, "numbers", new Properties)
+        val rows = df.collect()
+        assert(rows.length == 1)
+        val row = rows(0)
+        val types = row.toSeq.map(x => x.getClass.toString)
+        assert(types.length == 12)
+        assert(types(0).equals("class java.lang.Boolean"))
+        assert(types(1).equals("class java.lang.Integer"))
+        if (flag) {
+          assert(types(2).equals("class java.lang.Integer"))
+        } else {
+          assert(types(2).equals("class java.lang.Short"))
+        }
+        assert(types(3).equals("class java.lang.Integer"))
+        assert(types(4).equals("class java.lang.Long"))
+        assert(types(5).equals("class java.lang.Double"))
+        if (flag) {
+          assert(types(6).equals("class java.lang.Double"))
+          assert(types(7).equals("class java.lang.Double"))
+        } else {
+          assert(types(6).equals("class java.lang.Float"))
+          assert(types(7).equals("class java.lang.Float"))
+        }
+        assert(types(8).equals("class java.math.BigDecimal"))
+        assert(types(9).equals("class java.math.BigDecimal"))
+        assert(types(10).equals("class java.math.BigDecimal"))
+        assert(types(11).equals("class java.math.BigDecimal"))
+        assert(row.getBoolean(0) == false)
+        assert(row.getInt(1) == 255)
+        if (flag) {
+          assert(row.getInt(2) == 32767)
+        } else {
+          assert(row.getShort(2) == 32767)
+        }
+        assert(row.getInt(3) == 2147483647)
+        assert(row.getLong(4) == 9223372036854775807L)
+        assert(row.getDouble(5) == 1.2345678901234512E14) // float(53) has 15-digits precision
+        if (flag) {
+          assert(row.getDouble(6) == 1.23456788103168E14) // float(24) has 7-digits precision
+          assert(row.getDouble(7) == 1.23456788103168E14) // real = float(24)
+        } else {
+          assert(row.getFloat(6) == 1.23456788103168E14)  // float(24) has 7-digits precision
+          assert(row.getFloat(7) == 1.23456788103168E14)  // real = float(24)
+        }
+        assert(row.getAs[BigDecimal](8).equals(new BigDecimal("123.00")))
+        assert(row.getAs[BigDecimal](9).equals(new BigDecimal("12345.12000")))
+        assert(row.getAs[BigDecimal](10).equals(new BigDecimal("922337203685477.5800")))
+        assert(row.getAs[BigDecimal](11).equals(new BigDecimal("214748.3647")))
+      }
+    }
   }
 
   test("Date types") {
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index 9cd5c4ec41a52..bba1b5275269b 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.tags.DockerTest
 @DockerTest
 class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
   override val db = new DatabaseOnDocker {
-    override val imageName = "mysql:5.7.9"
+    override val imageName = "mysql:5.7.28"
     override val env = Map(
       "MYSQL_ROOT_PASSWORD" -> "rootpass"
     )
@@ -39,6 +39,8 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
   }
 
   override def dataPreparation(conn: Connection): Unit = {
+    // Since MySQL 5.7.14+, we need to disable strict mode
+    conn.prepareStatement("SET GLOBAL sql_mode = ''").executeUpdate()
     conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
     conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y TEXT(8))").executeUpdate()
     conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate()
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index 89da9a1de6f74..599f00def0750 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.tags.DockerTest
 @DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
   override val db = new DatabaseOnDocker {
-    override val imageName = "postgres:11.4"
+    override val imageName = "postgres:12.0-alpine"
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/external/docker/spark-test/base/Dockerfile b/external/docker/spark-test/base/Dockerfile
index c1fd630d0b665..5bec5d3f16548 100644
--- a/external/docker/spark-test/base/Dockerfile
+++ b/external/docker/spark-test/base/Dockerfile
@@ -25,7 +25,7 @@ RUN apt-get update && \
     apt-get install -y less openjdk-8-jre-headless iproute2 vim-tiny sudo openssh-server && \
     rm -rf /var/lib/apt/lists/*
 
-ENV SCALA_VERSION 2.12.8
+ENV SCALA_VERSION 2.12.10
 ENV CDH_VERSION cdh4
 ENV SCALA_HOME /opt/scala-$SCALA_VERSION
 ENV SPARK_HOME /opt/spark
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 0735f0a7b937f..693820da6af6b 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -46,6 +46,13 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
deleted file mode 100644
index fc177cdc9037e..0000000000000
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/CachedKafkaProducer.scala
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010
-
-import java.{util => ju}
-import java.util.concurrent.{ConcurrentMap, ExecutionException, TimeUnit}
-
-import com.google.common.cache._
-import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
-import org.apache.kafka.clients.producer.KafkaProducer
-import scala.collection.JavaConverters._
-import scala.util.control.NonFatal
-
-import org.apache.spark.SparkEnv
-import org.apache.spark.internal.Logging
-import org.apache.spark.kafka010.{KafkaConfigUpdater, KafkaRedactionUtil}
-
-private[kafka010] object CachedKafkaProducer extends Logging {
-
-  private type Producer = KafkaProducer[Array[Byte], Array[Byte]]
-
-  private val defaultCacheExpireTimeout = TimeUnit.MINUTES.toMillis(10)
-
-  private lazy val cacheExpireTimeout: Long = Option(SparkEnv.get)
-    .map(_.conf.get(PRODUCER_CACHE_TIMEOUT))
-    .getOrElse(defaultCacheExpireTimeout)
-
-  private val cacheLoader = new CacheLoader[Seq[(String, Object)], Producer] {
-    override def load(config: Seq[(String, Object)]): Producer = {
-      createKafkaProducer(config)
-    }
-  }
-
-  private val removalListener = new RemovalListener[Seq[(String, Object)], Producer]() {
-    override def onRemoval(
-        notification: RemovalNotification[Seq[(String, Object)], Producer]): Unit = {
-      val paramsSeq: Seq[(String, Object)] = notification.getKey
-      val producer: Producer = notification.getValue
-      if (log.isDebugEnabled()) {
-        val redactedParamsSeq = KafkaRedactionUtil.redactParams(paramsSeq)
-        logDebug(s"Evicting kafka producer $producer params: $redactedParamsSeq, " +
-          s"due to ${notification.getCause}")
-      }
-      close(paramsSeq, producer)
-    }
-  }
-
-  private lazy val guavaCache: LoadingCache[Seq[(String, Object)], Producer] =
-    CacheBuilder.newBuilder().expireAfterAccess(cacheExpireTimeout, TimeUnit.MILLISECONDS)
-      .removalListener(removalListener)
-      .build[Seq[(String, Object)], Producer](cacheLoader)
-
-  private def createKafkaProducer(paramsSeq: Seq[(String, Object)]): Producer = {
-    val kafkaProducer: Producer = new Producer(paramsSeq.toMap.asJava)
-    if (log.isDebugEnabled()) {
-      val redactedParamsSeq = KafkaRedactionUtil.redactParams(paramsSeq)
-      logDebug(s"Created a new instance of KafkaProducer for $redactedParamsSeq.")
-    }
-    kafkaProducer
-  }
-
-  /**
-   * Get a cached KafkaProducer for a given configuration. If matching KafkaProducer doesn't
-   * exist, a new KafkaProducer will be created. KafkaProducer is thread safe, it is best to keep
-   * one instance per specified kafkaParams.
-   */
-  private[kafka010] def getOrCreate(kafkaParams: ju.Map[String, Object]): Producer = {
-    val updatedKafkaProducerConfiguration =
-      KafkaConfigUpdater("executor", kafkaParams.asScala.toMap)
-        .setAuthenticationConfigIfNeeded()
-        .build()
-    val paramsSeq: Seq[(String, Object)] = paramsToSeq(updatedKafkaProducerConfiguration)
-    try {
-      guavaCache.get(paramsSeq)
-    } catch {
-      case e @ (_: ExecutionException | _: UncheckedExecutionException | _: ExecutionError)
-        if e.getCause != null =>
-        throw e.getCause
-    }
-  }
-
-  private def paramsToSeq(kafkaParams: ju.Map[String, Object]): Seq[(String, Object)] = {
-    val paramsSeq: Seq[(String, Object)] = kafkaParams.asScala.toSeq.sortBy(x => x._1)
-    paramsSeq
-  }
-
-  /** For explicitly closing kafka producer */
-  private[kafka010] def close(kafkaParams: ju.Map[String, Object]): Unit = {
-    val paramsSeq = paramsToSeq(kafkaParams)
-    guavaCache.invalidate(paramsSeq)
-  }
-
-  /** Auto close on cache evict */
-  private def close(paramsSeq: Seq[(String, Object)], producer: Producer): Unit = {
-    try {
-      if (log.isInfoEnabled()) {
-        val redactedParamsSeq = KafkaRedactionUtil.redactParams(paramsSeq)
-        logInfo(s"Closing the KafkaProducer with params: ${redactedParamsSeq.mkString("\n")}.")
-      }
-      producer.close()
-    } catch {
-      case NonFatal(e) => logWarning("Error while closing kafka producer.", e)
-    }
-  }
-
-  private[kafka010] def clear(): Unit = {
-    logInfo("Cleaning up guava cache.")
-    guavaCache.invalidateAll()
-  }
-
-  // Intended for testing purpose only.
-  private def getAsMap: ConcurrentMap[Seq[(String, Object)], Producer] = guavaCache.asMap()
-}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
index 868edb5dcdc0c..6dd5af2389a81 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
@@ -68,7 +68,7 @@ private object JsonUtils {
           partOffsets.map { case (part, offset) =>
               new TopicPartition(topic, part) -> offset
           }
-      }.toMap
+      }
     } catch {
       case NonFatal(x) =>
         throw new IllegalArgumentException(
@@ -76,12 +76,27 @@ private object JsonUtils {
     }
   }
 
+  def partitionTimestamps(str: String): Map[TopicPartition, Long] = {
+    try {
+      Serialization.read[Map[String, Map[Int, Long]]](str).flatMap { case (topic, partTimestamps) =>
+        partTimestamps.map { case (part, timestamp) =>
+          new TopicPartition(topic, part) -> timestamp
+        }
+      }
+    } catch {
+      case NonFatal(x) =>
+        throw new IllegalArgumentException(
+          s"""Expected e.g. {"topicA": {"0": 123456789, "1": 123456789},
+             |"topicB": {"0": 123456789, "1": 123456789}}, got $str""".stripMargin)
+    }
+  }
+
   /**
    * Write per-TopicPartition offsets as json string
    */
   def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
     val result = new HashMap[String, HashMap[Int, Long]]()
-    implicit val ordering = new Ordering[TopicPartition] {
+    implicit val order = new Ordering[TopicPartition] {
       override def compare(x: TopicPartition, y: TopicPartition): Int = {
         Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition))
       }
@@ -95,4 +110,9 @@ private object JsonUtils {
     }
     Serialization.write(result)
   }
+
+  def partitionTimestamps(topicTimestamps: Map[TopicPartition, Long]): String = {
+    // For now it's same as partitionOffsets
+    partitionOffsets(topicTimestamps)
+  }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
index 700414167f3ef..9ad083f1cfde5 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
@@ -23,8 +23,7 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, PartitionReaderFactory}
-
+import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionReaderFactory}
 
 private[kafka010] class KafkaBatch(
     strategy: ConsumerStrategy,
@@ -32,7 +31,8 @@ private[kafka010] class KafkaBatch(
     specifiedKafkaParams: Map[String, String],
     failOnDataLoss: Boolean,
     startingOffsets: KafkaOffsetRangeLimit,
-    endingOffsets: KafkaOffsetRangeLimit)
+    endingOffsets: KafkaOffsetRangeLimit,
+    includeHeaders: Boolean)
   extends Batch with Logging {
   assert(startingOffsets != LatestOffsetRangeLimit,
     "Starting offset not allowed to be set to latest offsets.")
@@ -57,41 +57,17 @@ private[kafka010] class KafkaBatch(
       driverGroupIdPrefix = s"$uniqueGroupId-driver")
 
     // Leverage the KafkaReader to obtain the relevant partition offsets
-    val (fromPartitionOffsets, untilPartitionOffsets) = {
-      try {
-        (kafkaOffsetReader.fetchPartitionOffsets(startingOffsets),
-          kafkaOffsetReader.fetchPartitionOffsets(endingOffsets))
-      } finally {
-        kafkaOffsetReader.close()
-      }
-    }
-
-    // Obtain topicPartitions in both from and until partition offset, ignoring
-    // topic partitions that were added and/or deleted between the two above calls.
-    if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) {
-      implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic())
-      val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",")
-      val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",")
-      throw new IllegalStateException("different topic partitions " +
-        s"for starting offsets topics[${fromTopics}] and " +
-        s"ending offsets topics[${untilTopics}]")
+    val offsetRanges: Seq[KafkaOffsetRange] = try {
+      kafkaOffsetReader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets)
+    } finally {
+      kafkaOffsetReader.close()
     }
 
-    // Calculate offset ranges
-    val offsetRanges = untilPartitionOffsets.keySet.map { tp =>
-      val fromOffset = fromPartitionOffsets.getOrElse(tp,
-        // This should not happen since topicPartitions contains all partitions not in
-        // fromPartitionOffsets
-        throw new IllegalStateException(s"$tp doesn't have a from offset"))
-      val untilOffset = untilPartitionOffsets(tp)
-      KafkaOffsetRange(tp, fromOffset, untilOffset, None)
-    }.toArray
-
     val executorKafkaParams =
       KafkaSourceProvider.kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId)
     offsetRanges.map { range =>
       new KafkaBatchInputPartition(
-        range, executorKafkaParams, pollTimeoutMs, failOnDataLoss)
+        range, executorKafkaParams, pollTimeoutMs, failOnDataLoss, includeHeaders)
     }.toArray
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
index 53b0b3c46854e..8b37fd6e7e2b3 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
@@ -22,21 +22,22 @@ import java.{util => ju}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.sources.v2.reader._
-
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer
 
 /** A [[InputPartition]] for reading Kafka data in a batch based streaming query. */
 private[kafka010] case class KafkaBatchInputPartition(
     offsetRange: KafkaOffsetRange,
     executorKafkaParams: ju.Map[String, Object],
     pollTimeoutMs: Long,
-    failOnDataLoss: Boolean) extends InputPartition
+    failOnDataLoss: Boolean,
+    includeHeaders: Boolean) extends InputPartition
 
 private[kafka010] object KafkaBatchReaderFactory extends PartitionReaderFactory {
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
     val p = partition.asInstanceOf[KafkaBatchInputPartition]
     KafkaBatchPartitionReader(p.offsetRange, p.executorKafkaParams, p.pollTimeoutMs,
-      p.failOnDataLoss)
+      p.failOnDataLoss, p.includeHeaders)
   }
 }
 
@@ -45,12 +46,14 @@ private case class KafkaBatchPartitionReader(
     offsetRange: KafkaOffsetRange,
     executorKafkaParams: ju.Map[String, Object],
     pollTimeoutMs: Long,
-    failOnDataLoss: Boolean) extends PartitionReader[InternalRow] with Logging {
+    failOnDataLoss: Boolean,
+    includeHeaders: Boolean) extends PartitionReader[InternalRow] with Logging {
 
   private val consumer = KafkaDataConsumer.acquire(offsetRange.topicPartition, executorKafkaParams)
 
   private val rangeToRead = resolveRange(offsetRange)
-  private val converter = new KafkaRecordToUnsafeRowConverter
+  private val unsafeRowProjector = new KafkaRecordToRowConverter()
+    .toUnsafeRowProjector(includeHeaders)
 
   private var nextOffset = rangeToRead.fromOffset
   private var nextRow: UnsafeRow = _
@@ -59,7 +62,7 @@ private case class KafkaBatchPartitionReader(
     if (nextOffset < rangeToRead.untilOffset) {
       val record = consumer.get(nextOffset, rangeToRead.untilOffset, pollTimeoutMs, failOnDataLoss)
       if (record != null) {
-        nextRow = converter.toUnsafeRow(record)
+        nextRow = unsafeRowProjector(record)
         nextOffset = record.offset + 1
         true
       } else {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala
index 47ec07ae128d2..56c0fdd7c35b7 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.kafka010
 import java.{util => ju}
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, PhysicalWriteInfo, WriterCommitMessage}
 import org.apache.spark.sql.kafka010.KafkaWriter.validateQuery
-import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -40,7 +40,7 @@ private[kafka010] class KafkaBatchWrite(
 
   validateQuery(schema.toAttributes, producerParams, topic)
 
-  override def createBatchWriterFactory(): KafkaBatchWriterFactory =
+  override def createBatchWriterFactory(info: PhysicalWriteInfo): KafkaBatchWriterFactory =
     KafkaBatchWriterFactory(topic, producerParams, schema)
 
   override def commit(messages: Array[WriterCommitMessage]): Unit = {}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
index a9c1181a01c51..0b549870a3482 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
@@ -27,9 +27,10 @@ import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.kafka010.KafkaSourceProvider.{INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE, INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE}
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming._
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousPartitionReaderFactory, ContinuousStream, Offset, PartitionOffset}
+import org.apache.spark.sql.kafka010.KafkaSourceProvider._
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
@@ -56,6 +57,7 @@ class KafkaContinuousStream(
 
   private[kafka010] val pollTimeoutMs =
     options.getLong(KafkaSourceProvider.CONSUMER_POLL_TIMEOUT, 512)
+  private val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
 
   // Initialized when creating reader factories. If this diverges from the partitions at the latest
   // offsets, we need to reconfigure.
@@ -68,6 +70,8 @@ class KafkaContinuousStream(
       case EarliestOffsetRangeLimit => KafkaSourceOffset(offsetReader.fetchEarliestOffsets())
       case LatestOffsetRangeLimit => KafkaSourceOffset(offsetReader.fetchLatestOffsets(None))
       case SpecificOffsetRangeLimit(p) => offsetReader.fetchSpecificOffsets(p, reportDataLoss)
+      case SpecificTimestampRangeLimit(p) => offsetReader.fetchSpecificTimestampBasedOffsets(p,
+        failsOnNoMatchingOffset = true)
     }
     logInfo(s"Initial offsets: $offsets")
     offsets
@@ -88,7 +92,7 @@ class KafkaContinuousStream(
     if (deletedPartitions.nonEmpty) {
       val message = if (
         offsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
-        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+        s"$deletedPartitions are gone. ${CUSTOM_GROUP_ID_ERROR_MESSAGE}"
       } else {
         s"$deletedPartitions are gone. Some data may have been missed."
       }
@@ -102,7 +106,7 @@ class KafkaContinuousStream(
     startOffsets.toSeq.map {
       case (topicPartition, start) =>
         KafkaContinuousInputPartition(
-          topicPartition, start, kafkaParams, pollTimeoutMs, failOnDataLoss)
+          topicPartition, start, kafkaParams, pollTimeoutMs, failOnDataLoss, includeHeaders)
     }.toArray
   }
 
@@ -153,19 +157,22 @@ class KafkaContinuousStream(
  * @param pollTimeoutMs The timeout for Kafka consumer polling.
  * @param failOnDataLoss Flag indicating whether data reader should fail if some offsets
  *                       are skipped.
+ * @param includeHeaders Flag indicating whether to include Kafka records' headers.
  */
 case class KafkaContinuousInputPartition(
-    topicPartition: TopicPartition,
-    startOffset: Long,
-    kafkaParams: ju.Map[String, Object],
-    pollTimeoutMs: Long,
-    failOnDataLoss: Boolean) extends InputPartition
+  topicPartition: TopicPartition,
+  startOffset: Long,
+  kafkaParams: ju.Map[String, Object],
+  pollTimeoutMs: Long,
+  failOnDataLoss: Boolean,
+  includeHeaders: Boolean) extends InputPartition
 
 object KafkaContinuousReaderFactory extends ContinuousPartitionReaderFactory {
   override def createReader(partition: InputPartition): ContinuousPartitionReader[InternalRow] = {
     val p = partition.asInstanceOf[KafkaContinuousInputPartition]
     new KafkaContinuousPartitionReader(
-      p.topicPartition, p.startOffset, p.kafkaParams, p.pollTimeoutMs, p.failOnDataLoss)
+      p.topicPartition, p.startOffset, p.kafkaParams, p.pollTimeoutMs,
+      p.failOnDataLoss, p.includeHeaders)
   }
 }
 
@@ -184,9 +191,11 @@ class KafkaContinuousPartitionReader(
     startOffset: Long,
     kafkaParams: ju.Map[String, Object],
     pollTimeoutMs: Long,
-    failOnDataLoss: Boolean) extends ContinuousPartitionReader[InternalRow] {
+    failOnDataLoss: Boolean,
+    includeHeaders: Boolean) extends ContinuousPartitionReader[InternalRow] {
   private val consumer = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-  private val converter = new KafkaRecordToUnsafeRowConverter
+  private val unsafeRowProjector = new KafkaRecordToRowConverter()
+    .toUnsafeRowProjector(includeHeaders)
 
   private var nextKafkaOffset = startOffset
   private var currentRecord: ConsumerRecord[Array[Byte], Array[Byte]] = _
@@ -225,7 +234,7 @@ class KafkaContinuousPartitionReader(
   }
 
   override def get(): UnsafeRow = {
-    converter.toUnsafeRow(currentRecord)
+    unsafeRowProjector(currentRecord)
   }
 
   override def getOffset(): KafkaSourcePartitionOffset = {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala
index 884773452b2a5..63863a6cc6d6f 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala
@@ -21,7 +21,8 @@ import java.{util => ju}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}
+import org.apache.spark.sql.kafka010.producer.{CachedKafkaProducer, InternalKafkaProducerPool}
 
 /**
  * Dummy commit message. The DataSourceV2 framework requires a commit message implementation but we
@@ -44,11 +45,14 @@ private[kafka010] class KafkaDataWriter(
     inputSchema: Seq[Attribute])
   extends KafkaRowWriter(inputSchema, targetTopic) with DataWriter[InternalRow] {
 
-  private lazy val producer = CachedKafkaProducer.getOrCreate(producerParams)
+  private var producer: Option[CachedKafkaProducer] = None
 
   def write(row: InternalRow): Unit = {
     checkForErrors()
-    sendRow(row, producer)
+    if (producer.isEmpty) {
+      producer = Some(InternalKafkaProducerPool.acquire(producerParams))
+    }
+    producer.foreach { p => sendRow(row, p.producer) }
   }
 
   def commit(): WriterCommitMessage = {
@@ -56,7 +60,7 @@ private[kafka010] class KafkaDataWriter(
     // This requires flushing and then checking that no callbacks produced errors.
     // We also check for errors before to fail as soon as possible - the check is cheap.
     checkForErrors()
-    producer.flush()
+    producer.foreach(_.producer.flush())
     checkForErrors()
     KafkaDataWriterCommitMessage
   }
@@ -64,11 +68,7 @@ private[kafka010] class KafkaDataWriter(
   def abort(): Unit = {}
 
   def close(): Unit = {
-    checkForErrors()
-    if (producer != null) {
-      producer.flush()
-      checkForErrors()
-      CachedKafkaProducer.close(producerParams)
-    }
+    producer.foreach(InternalKafkaProducerPool.release)
+    producer = None
   }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 9cd16c8e16249..6599e7e0fe707 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -26,10 +26,9 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.execution.streaming.sources.RateControlMicroBatchStream
-import org.apache.spark.sql.kafka010.KafkaSourceProvider.{INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE, INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE}
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset, ReadAllAvailable, ReadLimit, ReadMaxRows, SupportsAdmissionControl}
+import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.UninterruptibleThread
 
@@ -55,7 +54,7 @@ private[kafka010] class KafkaMicroBatchStream(
     options: CaseInsensitiveStringMap,
     metadataPath: String,
     startingOffsets: KafkaOffsetRangeLimit,
-    failOnDataLoss: Boolean) extends RateControlMicroBatchStream with Logging {
+    failOnDataLoss: Boolean) extends SupportsAdmissionControl with MicroBatchStream with Logging {
 
   private[kafka010] val pollTimeoutMs = options.getLong(
     KafkaSourceProvider.CONSUMER_POLL_TIMEOUT,
@@ -64,7 +63,7 @@ private[kafka010] class KafkaMicroBatchStream(
   private[kafka010] val maxOffsetsPerTrigger = Option(options.get(
     KafkaSourceProvider.MAX_OFFSET_PER_TRIGGER)).map(_.toLong)
 
-  private val rangeCalculator = KafkaOffsetRangeCalculator(options)
+  private val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
 
   private var endPartitionOffsets: KafkaSourceOffset = _
 
@@ -77,13 +76,23 @@ private[kafka010] class KafkaMicroBatchStream(
     KafkaSourceOffset(getOrCreateInitialPartitionOffsets())
   }
 
-  override def latestOffset(start: Offset): Offset = {
+  override def getDefaultReadLimit: ReadLimit = {
+    maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(super.getDefaultReadLimit)
+  }
+
+  override def latestOffset(): Offset = {
+    throw new UnsupportedOperationException(
+      "latestOffset(Offset, ReadLimit) should be called instead of this method")
+  }
+
+  override def latestOffset(start: Offset, readLimit: ReadLimit): Offset = {
     val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
     val latestPartitionOffsets = kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
-    endPartitionOffsets = KafkaSourceOffset(maxOffsetsPerTrigger.map { maxOffsets =>
-      rateLimit(maxOffsets, startPartitionOffsets, latestPartitionOffsets)
-    }.getOrElse {
-      latestPartitionOffsets
+    endPartitionOffsets = KafkaSourceOffset(readLimit match {
+      case rows: ReadMaxRows =>
+        rateLimit(rows.maxRows(), startPartitionOffsets, latestPartitionOffsets)
+      case _: ReadAllAvailable =>
+        latestPartitionOffsets
     })
     endPartitionOffsets
   }
@@ -92,61 +101,16 @@ private[kafka010] class KafkaMicroBatchStream(
     val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
     val endPartitionOffsets = end.asInstanceOf[KafkaSourceOffset].partitionToOffsets
 
-    // Find the new partitions, and get their earliest offsets
-    val newPartitions = endPartitionOffsets.keySet.diff(startPartitionOffsets.keySet)
-    val newPartitionInitialOffsets = kafkaOffsetReader.fetchEarliestOffsets(newPartitions.toSeq)
-    if (newPartitionInitialOffsets.keySet != newPartitions) {
-      // We cannot get from offsets for some partitions. It means they got deleted.
-      val deletedPartitions = newPartitions.diff(newPartitionInitialOffsets.keySet)
-      reportDataLoss(
-        s"Cannot find earliest offsets of ${deletedPartitions}. Some data may have been missed")
-    }
-    logInfo(s"Partitions added: $newPartitionInitialOffsets")
-    newPartitionInitialOffsets.filter(_._2 != 0).foreach { case (p, o) =>
-      reportDataLoss(
-        s"Added partition $p starts from $o instead of 0. Some data may have been missed")
-    }
-
-    // Find deleted partitions, and report data loss if required
-    val deletedPartitions = startPartitionOffsets.keySet.diff(endPartitionOffsets.keySet)
-    if (deletedPartitions.nonEmpty) {
-      val message =
-        if (kafkaOffsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
-          s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
-        } else {
-          s"$deletedPartitions are gone. Some data may have been missed."
-        }
-      reportDataLoss(message)
-    }
-
-    // Use the end partitions to calculate offset ranges to ignore partitions that have
-    // been deleted
-    val topicPartitions = endPartitionOffsets.keySet.filter { tp =>
-      // Ignore partitions that we don't know the from offsets.
-      newPartitionInitialOffsets.contains(tp) || startPartitionOffsets.contains(tp)
-    }.toSeq
-    logDebug("TopicPartitions: " + topicPartitions.mkString(", "))
-
-    val fromOffsets = startPartitionOffsets ++ newPartitionInitialOffsets
-    val untilOffsets = endPartitionOffsets
-    untilOffsets.foreach { case (tp, untilOffset) =>
-      fromOffsets.get(tp).foreach { fromOffset =>
-        if (untilOffset < fromOffset) {
-          reportDataLoss(s"Partition $tp's offset was changed from " +
-            s"$fromOffset to $untilOffset, some data may have been missed")
-        }
-      }
-    }
-
-    // Calculate offset ranges
-    val offsetRanges = rangeCalculator.getRanges(
-      fromOffsets = fromOffsets,
-      untilOffsets = untilOffsets,
-      executorLocations = getSortedExecutorList())
+    val offsetRanges = kafkaOffsetReader.getOffsetRangesFromResolvedOffsets(
+      startPartitionOffsets,
+      endPartitionOffsets,
+      reportDataLoss
+    )
 
     // Generate factories based on the offset ranges
     offsetRanges.map { range =>
-      KafkaBatchInputPartition(range, executorKafkaParams, pollTimeoutMs, failOnDataLoss)
+      KafkaBatchInputPartition(range, executorKafkaParams, pollTimeoutMs,
+        failOnDataLoss, includeHeaders)
     }.toArray
   }
 
@@ -189,6 +153,8 @@ private[kafka010] class KafkaMicroBatchStream(
           KafkaSourceOffset(kafkaOffsetReader.fetchLatestOffsets(None))
         case SpecificOffsetRangeLimit(p) =>
           kafkaOffsetReader.fetchSpecificOffsets(p, reportDataLoss)
+        case SpecificTimestampRangeLimit(p) =>
+          kafkaOffsetReader.fetchSpecificTimestampBasedOffsets(p, failsOnNoMatchingOffset = true)
       }
       metadataLog.add(0, offsets)
       logInfo(s"Initial offsets: $offsets")
@@ -237,23 +203,6 @@ private[kafka010] class KafkaMicroBatchStream(
     }
   }
 
-  private def getSortedExecutorList(): Array[String] = {
-
-    def compare(a: ExecutorCacheTaskLocation, b: ExecutorCacheTaskLocation): Boolean = {
-      if (a.host == b.host) {
-        a.executorId > b.executorId
-      } else {
-        a.host > b.host
-      }
-    }
-
-    val bm = SparkEnv.get.blockManager
-    bm.master.getPeers(bm.blockManagerId).toArray
-      .map(x => ExecutorCacheTaskLocation(x.host, x.executorId))
-      .sortWith(compare)
-      .map(_.toString)
-  }
-
   /**
    * If `failOnDataLoss` is true, this method will throw an `IllegalStateException`.
    * Otherwise, just log a warning.
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
index 61ffe31edfd04..f7183f7add14b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 
 /**
- * Class to calculate offset ranges to process based on the the from and until offsets, and
+ * Class to calculate offset ranges to process based on the from and until offsets, and
  * the configured `minPartitions`.
  */
 private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int]) {
@@ -41,14 +41,9 @@ private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int
    * Empty ranges (`KafkaOffsetRange.size <= 0`) will be dropped.
    */
   def getRanges(
-      fromOffsets: PartitionOffsetMap,
-      untilOffsets: PartitionOffsetMap,
+      ranges: Seq[KafkaOffsetRange],
       executorLocations: Seq[String] = Seq.empty): Seq[KafkaOffsetRange] = {
-    val partitionsToRead = untilOffsets.keySet.intersect(fromOffsets.keySet)
-
-    val offsetRanges = partitionsToRead.toSeq.map { tp =>
-      KafkaOffsetRange(tp, fromOffsets(tp), untilOffsets(tp), preferredLoc = None)
-    }.filter(_.size > 0)
+    val offsetRanges = ranges.filter(_.size > 0)
 
     // If minPartitions not set or there are enough partitions to satisfy minPartitions
     if (minPartitions.isEmpty || offsetRanges.size > minPartitions.get) {
@@ -106,6 +101,13 @@ private[kafka010] case class KafkaOffsetRange(
     topicPartition: TopicPartition,
     fromOffset: Long,
     untilOffset: Long,
-    preferredLoc: Option[String]) {
-  lazy val size: Long = untilOffset - fromOffset
+    preferredLoc: Option[String] = None) {
+  def topic: String = topicPartition.topic
+  def partition: Int = topicPartition.partition
+  /**
+   * The estimated size of messages in the range. It may be different than the real number of
+   * messages due to log compaction or transaction metadata. It should not be used to provide
+   * answers directly.
+   */
+  def size: Long = untilOffset - fromOffset
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
index 80a026f4f5d73..d64b5d4f7e9e8 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
@@ -42,6 +42,13 @@ private[kafka010] case object LatestOffsetRangeLimit extends KafkaOffsetRangeLim
 private[kafka010] case class SpecificOffsetRangeLimit(
     partitionOffsets: Map[TopicPartition, Long]) extends KafkaOffsetRangeLimit
 
+/**
+ * Represents the desire to bind to earliest offset which timestamp for the offset is equal or
+ * greater than specific timestamp.
+ */
+private[kafka010] case class SpecificTimestampRangeLimit(
+    topicTimestamps: Map[TopicPartition, Long]) extends KafkaOffsetRangeLimit
+
 private[kafka010] object KafkaOffsetRangeLimit {
   /**
    * Used to denote offset range limits that are resolved via Kafka
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index f3effd5300a79..216e74a85c2ae 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -26,12 +26,13 @@ import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 import scala.util.control.NonFatal
 
-import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer}
+import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer, OffsetAndTimestamp}
 import org.apache.kafka.common.TopicPartition
 
+import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
+import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.types._
 import org.apache.spark.util.{ThreadUtils, UninterruptibleThread}
 
 /**
@@ -92,9 +93,27 @@ private[kafka010] class KafkaOffsetReader(
   private[kafka010] val maxOffsetFetchAttempts =
     readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_NUM_RETRY, "3").toInt
 
+  /**
+   * Number of partitions to read from Kafka. If this value is greater than the number of Kafka
+   * topicPartitions, we will split up  the read tasks of the skewed partitions to multiple Spark
+   * tasks. The number of Spark tasks will be *approximately* `numPartitions`. It can be less or
+   * more depending on rounding errors or Kafka partitions that didn't receive any new data.
+   */
+  private val minPartitions =
+    readerOptions.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY).map(_.toInt)
+
+  private val rangeCalculator = new KafkaOffsetRangeCalculator(minPartitions)
+
   private[kafka010] val offsetFetchAttemptIntervalMs =
     readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong
 
+  /**
+   * Whether we should divide Kafka TopicPartitions with a lot of data into smaller Spark tasks.
+   */
+  private def shouldDivvyUpLargePartitions(numTopicPartitions: Int): Boolean = {
+    minPartitions.map(_ > numTopicPartitions).getOrElse(false)
+  }
+
   private def nextGroupId(): String = {
     groupId = driverGroupIdPrefix + "-" + nextId
     nextId += 1
@@ -127,12 +146,14 @@ private[kafka010] class KafkaOffsetReader(
    * Fetch the partition offsets for the topic partitions that are indicated
    * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]].
    */
-  def fetchPartitionOffsets(offsetRangeLimit: KafkaOffsetRangeLimit): Map[TopicPartition, Long] = {
+  def fetchPartitionOffsets(
+      offsetRangeLimit: KafkaOffsetRangeLimit,
+      isStartingOffsets: Boolean): Map[TopicPartition, Long] = {
     def validateTopicPartitions(partitions: Set[TopicPartition],
       partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
       assert(partitions == partitionOffsets.keySet,
         "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
-          "Use -1 for latest, -2 for earliest, if you don't care.\n" +
+          "Use -1 for latest, -2 for earliest.\n" +
           s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}")
       logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
       partitionOffsets
@@ -148,6 +169,9 @@ private[kafka010] class KafkaOffsetReader(
       }.toMap
       case SpecificOffsetRangeLimit(partitionOffsets) =>
         validateTopicPartitions(partitions, partitionOffsets)
+      case SpecificTimestampRangeLimit(partitionTimestamps) =>
+        fetchSpecificTimestampBasedOffsets(partitionTimestamps,
+          failsOnNoMatchingOffset = isStartingOffsets).partitionToOffsets
     }
   }
 
@@ -162,23 +186,83 @@ private[kafka010] class KafkaOffsetReader(
   def fetchSpecificOffsets(
       partitionOffsets: Map[TopicPartition, Long],
       reportDataLoss: String => Unit): KafkaSourceOffset = {
-    val fetched = runUninterruptibly {
-      withRetriesWithoutInterrupt {
-        // Poll to get the latest assigned partitions
-        consumer.poll(0)
-        val partitions = consumer.assignment()
+    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
+      assert(partitions.asScala == partitionOffsets.keySet,
+        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
+          "Use -1 for latest, -2 for earliest, if you don't care.\n" +
+          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}")
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
+    }
 
-        // Call `position` to wait until the potential offset request triggered by `poll(0)` is
-        // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
-        // `poll(0)` may reset offsets that should have been set by another request.
-        partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
+    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ =>
+      partitionOffsets
+    }
 
-        consumer.pause(partitions)
-        assert(partitions.asScala == partitionOffsets.keySet,
-          "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
-            "Use -1 for latest, -2 for earliest, if you don't care.\n" +
-            s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}")
-        logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
+    val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { fetched =>
+      partitionOffsets.foreach {
+        case (tp, off) if off != KafkaOffsetRangeLimit.LATEST &&
+          off != KafkaOffsetRangeLimit.EARLIEST =>
+          if (fetched(tp) != off) {
+            reportDataLoss(
+              s"startingOffsets for $tp was $off but consumer reset to ${fetched(tp)}")
+          }
+        case _ =>
+        // no real way to check that beginning or end is reasonable
+      }
+    }
+
+    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets,
+      fnAssertFetchedOffsets)
+  }
+
+  def fetchSpecificTimestampBasedOffsets(
+      partitionTimestamps: Map[TopicPartition, Long],
+      failsOnNoMatchingOffset: Boolean): KafkaSourceOffset = {
+    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
+      assert(partitions.asScala == partitionTimestamps.keySet,
+        "If starting/endingOffsetsByTimestamp contains specific offsets, you must specify all " +
+          s"topics. Specified: ${partitionTimestamps.keySet} Assigned: ${partitions.asScala}")
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionTimestamps")
+    }
+
+    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => {
+        val converted = partitionTimestamps.map { case (tp, timestamp) =>
+          tp -> java.lang.Long.valueOf(timestamp)
+        }.asJava
+
+        val offsetForTime: ju.Map[TopicPartition, OffsetAndTimestamp] =
+          consumer.offsetsForTimes(converted)
+
+        offsetForTime.asScala.map { case (tp, offsetAndTimestamp) =>
+          if (failsOnNoMatchingOffset) {
+            assert(offsetAndTimestamp != null, "No offset matched from request of " +
+              s"topic-partition $tp and timestamp ${partitionTimestamps(tp)}.")
+          }
+
+          if (offsetAndTimestamp == null) {
+            tp -> KafkaOffsetRangeLimit.LATEST
+          } else {
+            tp -> offsetAndTimestamp.offset()
+          }
+        }.toMap
+      }
+    }
+
+    val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { _ => }
+
+    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets,
+      fnAssertFetchedOffsets)
+  }
+
+  private def fetchSpecificOffsets0(
+      fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit,
+      fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long],
+      fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit): KafkaSourceOffset = {
+    val fetched = partitionsAssignedToConsumer {
+      partitions => {
+        fnAssertParametersWithPartitions(partitions)
+
+        val partitionOffsets = fnRetrievePartitionOffsets(partitions)
 
         partitionOffsets.foreach {
           case (tp, KafkaOffsetRangeLimit.LATEST) =>
@@ -187,22 +271,15 @@ private[kafka010] class KafkaOffsetReader(
             consumer.seekToBeginning(ju.Arrays.asList(tp))
           case (tp, off) => consumer.seek(tp, off)
         }
+
         partitionOffsets.map {
           case (tp, _) => tp -> consumer.position(tp)
         }
       }
     }
 
-    partitionOffsets.foreach {
-      case (tp, off) if off != KafkaOffsetRangeLimit.LATEST &&
-        off != KafkaOffsetRangeLimit.EARLIEST =>
-        if (fetched(tp) != off) {
-          reportDataLoss(
-            s"startingOffsets for $tp was $off but consumer reset to ${fetched(tp)}")
-        }
-      case _ =>
-        // no real way to check that beginning or end is reasonable
-    }
+    fnAssertFetchedOffsets(fetched)
+
     KafkaSourceOffset(fetched)
   }
 
@@ -210,20 +287,15 @@ private[kafka010] class KafkaOffsetReader(
    * Fetch the earliest offsets for the topic partitions that are indicated
    * in the [[ConsumerStrategy]].
    */
-  def fetchEarliestOffsets(): Map[TopicPartition, Long] = runUninterruptibly {
-    withRetriesWithoutInterrupt {
-      // Poll to get the latest assigned partitions
-      consumer.poll(0)
-      val partitions = consumer.assignment()
-      consumer.pause(partitions)
-      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to the beginning")
+  def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer(
+    partitions => {
+      logDebug("Seeking to the beginning")
 
       consumer.seekToBeginning(partitions)
       val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
       logDebug(s"Got earliest offsets for partition : $partitionOffsets")
       partitionOffsets
-    }
-  }
+    }, fetchingEarliestOffset = true)
 
   /**
    * Fetch the latest offsets for the topic partitions that are indicated
@@ -240,19 +312,9 @@ private[kafka010] class KafkaOffsetReader(
    * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying.
    */
   def fetchLatestOffsets(
-      knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap = runUninterruptibly {
-    withRetriesWithoutInterrupt {
-      // Poll to get the latest assigned partitions
-      consumer.poll(0)
-      val partitions = consumer.assignment()
-
-      // Call `position` to wait until the potential offset request triggered by `poll(0)` is
-      // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
-      // `poll(0)` may reset offsets that should have been set by another request.
-      partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
-
-      consumer.pause(partitions)
-      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to the end.")
+      knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap =
+    partitionsAssignedToConsumer { partitions => {
+      logDebug("Seeking to the end.")
 
       if (knownOffsets.isEmpty) {
         consumer.seekToEnd(partitions)
@@ -316,26 +378,177 @@ private[kafka010] class KafkaOffsetReader(
     if (newPartitions.isEmpty) {
       Map.empty[TopicPartition, Long]
     } else {
-      runUninterruptibly {
-        withRetriesWithoutInterrupt {
-          // Poll to get the latest assigned partitions
-          consumer.poll(0)
-          val partitions = consumer.assignment()
-          consumer.pause(partitions)
-          logDebug(s"\tPartitions assigned to consumer: $partitions")
-
-          // Get the earliest offset of each partition
-          consumer.seekToBeginning(partitions)
-          val partitionOffsets = newPartitions.filter { p =>
-            // When deleting topics happen at the same time, some partitions may not be in
-            // `partitions`. So we need to ignore them
-            partitions.contains(p)
-          }.map(p => p -> consumer.position(p)).toMap
-          logDebug(s"Got earliest offsets for new partitions: $partitionOffsets")
-          partitionOffsets
+      partitionsAssignedToConsumer(partitions => {
+        // Get the earliest offset of each partition
+        consumer.seekToBeginning(partitions)
+        val partitionOffsets = newPartitions.filter { p =>
+          // When deleting topics happen at the same time, some partitions may not be in
+          // `partitions`. So we need to ignore them
+          partitions.contains(p)
+        }.map(p => p -> consumer.position(p)).toMap
+        logDebug(s"Got earliest offsets for new partitions: $partitionOffsets")
+        partitionOffsets
+      }, fetchingEarliestOffset = true)
+    }
+  }
+
+  /**
+   * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may
+   * split partitions to respect it. Since offsets can be early and late binding which are evaluated
+   * on the executors, in order to divvy up the partitions we need to perform some substitutions. We
+   * don't want to send exact offsets to the executors, because data may age out before we can
+   * consume the data. This method makes some approximate splitting, and replaces the special offset
+   * values in the final output.
+   */
+  def getOffsetRangesFromUnresolvedOffsets(
+      startingOffsets: KafkaOffsetRangeLimit,
+      endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = {
+    val fromPartitionOffsets = fetchPartitionOffsets(startingOffsets, isStartingOffsets = true)
+    val untilPartitionOffsets = fetchPartitionOffsets(endingOffsets, isStartingOffsets = false)
+
+    // Obtain topicPartitions in both from and until partition offset, ignoring
+    // topic partitions that were added and/or deleted between the two above calls.
+    if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) {
+      implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic())
+      val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",")
+      val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",")
+      throw new IllegalStateException("different topic partitions " +
+        s"for starting offsets topics[${fromTopics}] and " +
+        s"ending offsets topics[${untilTopics}]")
+    }
+
+    // Calculate offset ranges
+    val offsetRangesBase = untilPartitionOffsets.keySet.map { tp =>
+      val fromOffset = fromPartitionOffsets.get(tp).getOrElse {
+        // This should not happen since topicPartitions contains all partitions not in
+        // fromPartitionOffsets
+        throw new IllegalStateException(s"$tp doesn't have a from offset")
+      }
+      val untilOffset = untilPartitionOffsets(tp)
+      KafkaOffsetRange(tp, fromOffset, untilOffset, None)
+    }.toSeq
+
+    if (shouldDivvyUpLargePartitions(offsetRangesBase.size)) {
+      val fromOffsetsMap =
+        offsetRangesBase.map(range => (range.topicPartition, range.fromOffset)).toMap
+      val untilOffsetsMap =
+        offsetRangesBase.map(range => (range.topicPartition, range.untilOffset)).toMap
+
+      // No need to report data loss here
+      val resolvedFromOffsets = fetchSpecificOffsets(fromOffsetsMap, _ => ()).partitionToOffsets
+      val resolvedUntilOffsets = fetchSpecificOffsets(untilOffsetsMap, _ => ()).partitionToOffsets
+      val ranges = offsetRangesBase.map(_.topicPartition).map { tp =>
+        KafkaOffsetRange(tp, resolvedFromOffsets(tp), resolvedUntilOffsets(tp), preferredLoc = None)
+      }
+      val divvied = rangeCalculator.getRanges(ranges).groupBy(_.topicPartition)
+      divvied.flatMap { case (tp, splitOffsetRanges) =>
+        if (splitOffsetRanges.length == 1) {
+          Seq(KafkaOffsetRange(tp, fromOffsetsMap(tp), untilOffsetsMap(tp), None))
+        } else {
+          // the list can't be empty
+          val first = splitOffsetRanges.head.copy(fromOffset = fromOffsetsMap(tp))
+          val end = splitOffsetRanges.last.copy(untilOffset = untilOffsetsMap(tp))
+          Seq(first) ++ splitOffsetRanges.drop(1).dropRight(1) :+ end
         }
+      }.toArray.toSeq
+    } else {
+      offsetRangesBase
+    }
+  }
+
+  private def getSortedExecutorList(): Array[String] = {
+    def compare(a: ExecutorCacheTaskLocation, b: ExecutorCacheTaskLocation): Boolean = {
+      if (a.host == b.host) {
+        a.executorId > b.executorId
+      } else {
+        a.host > b.host
       }
     }
+
+    val bm = SparkEnv.get.blockManager
+    bm.master.getPeers(bm.blockManagerId).toArray
+      .map(x => ExecutorCacheTaskLocation(x.host, x.executorId))
+      .sortWith(compare)
+      .map(_.toString)
+  }
+
+  /**
+   * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method
+   * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will
+   * be called.
+   */
+  def getOffsetRangesFromResolvedOffsets(
+      fromPartitionOffsets: PartitionOffsetMap,
+      untilPartitionOffsets: PartitionOffsetMap,
+      reportDataLoss: String => Unit): Seq[KafkaOffsetRange] = {
+    // Find the new partitions, and get their earliest offsets
+    val newPartitions = untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet)
+    val newPartitionInitialOffsets = fetchEarliestOffsets(newPartitions.toSeq)
+    if (newPartitionInitialOffsets.keySet != newPartitions) {
+      // We cannot get from offsets for some partitions. It means they got deleted.
+      val deletedPartitions = newPartitions.diff(newPartitionInitialOffsets.keySet)
+      reportDataLoss(
+        s"Cannot find earliest offsets of ${deletedPartitions}. Some data may have been missed")
+    }
+    logInfo(s"Partitions added: $newPartitionInitialOffsets")
+    newPartitionInitialOffsets.filter(_._2 != 0).foreach { case (p, o) =>
+      reportDataLoss(
+        s"Added partition $p starts from $o instead of 0. Some data may have been missed")
+    }
+
+    val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet)
+    if (deletedPartitions.nonEmpty) {
+      val message = if (driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+      } else {
+        s"$deletedPartitions are gone. Some data may have been missed."
+      }
+      reportDataLoss(message)
+    }
+
+    // Use the until partitions to calculate offset ranges to ignore partitions that have
+    // been deleted
+    val topicPartitions = untilPartitionOffsets.keySet.filter { tp =>
+      // Ignore partitions that we don't know the from offsets.
+      newPartitionInitialOffsets.contains(tp) || fromPartitionOffsets.contains(tp)
+    }.toSeq
+    logDebug("TopicPartitions: " + topicPartitions.mkString(", "))
+
+    val fromOffsets = fromPartitionOffsets ++ newPartitionInitialOffsets
+    val untilOffsets = untilPartitionOffsets
+    val ranges = topicPartitions.map { tp =>
+      val fromOffset = fromOffsets(tp)
+      val untilOffset = untilOffsets(tp)
+      if (untilOffset < fromOffset) {
+        reportDataLoss(s"Partition $tp's offset was changed from " +
+          s"$fromOffset to $untilOffset, some data may have been missed")
+      }
+      KafkaOffsetRange(tp, fromOffset, untilOffset, preferredLoc = None)
+    }
+    rangeCalculator.getRanges(ranges, getSortedExecutorList)
+  }
+
+  private def partitionsAssignedToConsumer(
+      body: ju.Set[TopicPartition] => Map[TopicPartition, Long],
+      fetchingEarliestOffset: Boolean = false)
+    : Map[TopicPartition, Long] = runUninterruptibly {
+
+    withRetriesWithoutInterrupt {
+      // Poll to get the latest assigned partitions
+      consumer.poll(0)
+      val partitions = consumer.assignment()
+
+      if (!fetchingEarliestOffset) {
+        // Call `position` to wait until the potential offset request triggered by `poll(0)` is
+        // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
+        // `poll(0)` may reset offsets that should have been set by another request.
+        partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
+      }
+
+      consumer.pause(partitions)
+      logDebug(s"Partitions assigned to consumer: $partitions.")
+      body(partitions)
+    }
   }
 
   /**
@@ -421,16 +634,3 @@ private[kafka010] class KafkaOffsetReader(
     _consumer = null  // will automatically get reinitialized again
   }
 }
-
-private[kafka010] object KafkaOffsetReader {
-
-  def kafkaSchema: StructType = StructType(Seq(
-    StructField("key", BinaryType),
-    StructField("value", BinaryType),
-    StructField("topic", StringType),
-    StructField("partition", IntegerType),
-    StructField("offset", LongType),
-    StructField("timestamp", TimestampType),
-    StructField("timestampType", IntegerType)
-  ))
-}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
new file mode 100644
index 0000000000000..aed099c142bc3
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.sql.Timestamp
+
+import scala.collection.JavaConverters._
+
+import org.apache.kafka.clients.consumer.ConsumerRecord
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+/** A simple class for converting Kafka ConsumerRecord to InternalRow/UnsafeRow */
+private[kafka010] class KafkaRecordToRowConverter {
+  import KafkaRecordToRowConverter._
+
+  private val toUnsafeRowWithoutHeaders = UnsafeProjection.create(schemaWithoutHeaders)
+  private val toUnsafeRowWithHeaders = UnsafeProjection.create(schemaWithHeaders)
+
+  val toInternalRowWithoutHeaders: Record => InternalRow =
+    (cr: Record) => InternalRow(
+      cr.key, cr.value, UTF8String.fromString(cr.topic), cr.partition, cr.offset,
+      DateTimeUtils.fromJavaTimestamp(new Timestamp(cr.timestamp)), cr.timestampType.id
+    )
+
+  val toInternalRowWithHeaders: Record => InternalRow =
+    (cr: Record) => InternalRow(
+      cr.key, cr.value, UTF8String.fromString(cr.topic), cr.partition, cr.offset,
+      DateTimeUtils.fromJavaTimestamp(new Timestamp(cr.timestamp)), cr.timestampType.id,
+      if (cr.headers.iterator().hasNext) {
+        new GenericArrayData(cr.headers.iterator().asScala
+          .map(header =>
+            InternalRow(UTF8String.fromString(header.key()), header.value())
+          ).toArray)
+      } else {
+        null
+      }
+    )
+
+  def toUnsafeRowWithoutHeadersProjector: Record => UnsafeRow =
+    (cr: Record) => toUnsafeRowWithoutHeaders(toInternalRowWithoutHeaders(cr))
+
+  def toUnsafeRowWithHeadersProjector: Record => UnsafeRow =
+    (cr: Record) => toUnsafeRowWithHeaders(toInternalRowWithHeaders(cr))
+
+  def toUnsafeRowProjector(includeHeaders: Boolean): Record => UnsafeRow = {
+    if (includeHeaders) toUnsafeRowWithHeadersProjector else toUnsafeRowWithoutHeadersProjector
+  }
+}
+
+private[kafka010] object KafkaRecordToRowConverter {
+  type Record = ConsumerRecord[Array[Byte], Array[Byte]]
+
+  val headersType = ArrayType(StructType(Array(
+    StructField("key", StringType),
+    StructField("value", BinaryType))))
+
+  private val schemaWithoutHeaders = new StructType(Array(
+    StructField("key", BinaryType),
+    StructField("value", BinaryType),
+    StructField("topic", StringType),
+    StructField("partition", IntegerType),
+    StructField("offset", LongType),
+    StructField("timestamp", TimestampType),
+    StructField("timestampType", IntegerType)
+  ))
+
+  private val schemaWithHeaders =
+    new StructType(schemaWithoutHeaders.fields :+ StructField("headers", headersType))
+
+  def kafkaSchema(includeHeaders: Boolean): StructType = {
+    if (includeHeaders) schemaWithHeaders else schemaWithoutHeaders
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala
deleted file mode 100644
index 306ef10b775a9..0000000000000
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010
-
-import org.apache.kafka.clients.consumer.ConsumerRecord
-
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.unsafe.types.UTF8String
-
-/** A simple class for converting Kafka ConsumerRecord to UnsafeRow */
-private[kafka010] class KafkaRecordToUnsafeRowConverter {
-  private val rowWriter = new UnsafeRowWriter(7)
-
-  def toUnsafeRow(record: ConsumerRecord[Array[Byte], Array[Byte]]): UnsafeRow = {
-    rowWriter.reset()
-    rowWriter.zeroOutNullBytes()
-
-    if (record.key == null) {
-      rowWriter.setNullAt(0)
-    } else {
-      rowWriter.write(0, record.key)
-    }
-    if (record.value == null) {
-      rowWriter.setNullAt(1)
-    } else {
-      rowWriter.write(1, record.value)
-    }
-    rowWriter.write(2, UTF8String.fromString(record.topic))
-    rowWriter.write(3, record.partition)
-    rowWriter.write(4, record.offset)
-    rowWriter.write(
-      5,
-      DateTimeUtils.fromJavaTimestamp(new java.sql.Timestamp(record.timestamp)))
-    rowWriter.write(6, record.timestampType.id)
-    rowWriter.getRow()
-  }
-}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index dc7087821b10c..413a0c4de8bea 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -24,10 +24,9 @@ import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.unsafe.types.UTF8String
 
 
 private[kafka010] class KafkaRelation(
@@ -36,6 +35,7 @@ private[kafka010] class KafkaRelation(
     sourceOptions: CaseInsensitiveMap[String],
     specifiedKafkaParams: Map[String, String],
     failOnDataLoss: Boolean,
+    includeHeaders: Boolean,
     startingOffsets: KafkaOffsetRangeLimit,
     endingOffsets: KafkaOffsetRangeLimit)
   extends BaseRelation with TableScan with Logging {
@@ -49,7 +49,9 @@ private[kafka010] class KafkaRelation(
     (sqlContext.sparkContext.conf.get(NETWORK_TIMEOUT) * 1000L).toString
   ).toLong
 
-  override def schema: StructType = KafkaOffsetReader.kafkaSchema
+  private val converter = new KafkaRecordToRowConverter()
+
+  override def schema: StructType = KafkaRecordToRowConverter.kafkaSchema(includeHeaders)
 
   override def buildScan(): RDD[Row] = {
     // Each running query should use its own group id. Otherwise, the query may be only assigned
@@ -64,54 +66,26 @@ private[kafka010] class KafkaRelation(
       driverGroupIdPrefix = s"$uniqueGroupId-driver")
 
     // Leverage the KafkaReader to obtain the relevant partition offsets
-    val (fromPartitionOffsets, untilPartitionOffsets) = {
-      try {
-        (kafkaOffsetReader.fetchPartitionOffsets(startingOffsets),
-          kafkaOffsetReader.fetchPartitionOffsets(endingOffsets))
-      } finally {
-        kafkaOffsetReader.close()
-      }
-    }
-
-    // Obtain topicPartitions in both from and until partition offset, ignoring
-    // topic partitions that were added and/or deleted between the two above calls.
-    if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) {
-      implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic())
-      val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",")
-      val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",")
-      throw new IllegalStateException("different topic partitions " +
-        s"for starting offsets topics[${fromTopics}] and " +
-        s"ending offsets topics[${untilTopics}]")
+    val offsetRanges: Seq[KafkaOffsetRange] = try {
+      kafkaOffsetReader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets)
+    } finally {
+      kafkaOffsetReader.close()
     }
 
-    // Calculate offset ranges
-    val offsetRanges = untilPartitionOffsets.keySet.map { tp =>
-      val fromOffset = fromPartitionOffsets.getOrElse(tp,
-        // This should not happen since topicPartitions contains all partitions not in
-        // fromPartitionOffsets
-        throw new IllegalStateException(s"$tp doesn't have a from offset"))
-      val untilOffset = untilPartitionOffsets(tp)
-      KafkaSourceRDDOffsetRange(tp, fromOffset, untilOffset, None)
-    }.toArray
-
     logInfo("GetBatch generating RDD of offset range: " +
       offsetRanges.sortBy(_.topicPartition.toString).mkString(", "))
 
     // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
     val executorKafkaParams =
       KafkaSourceProvider.kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId)
+    val toInternalRow = if (includeHeaders) {
+      converter.toInternalRowWithHeaders
+    } else {
+      converter.toInternalRowWithoutHeaders
+    }
     val rdd = new KafkaSourceRDD(
       sqlContext.sparkContext, executorKafkaParams, offsetRanges,
-      pollTimeoutMs, failOnDataLoss).map { cr =>
-      InternalRow(
-        cr.key,
-        cr.value,
-        UTF8String.fromString(cr.topic),
-        cr.partition,
-        cr.offset,
-        DateTimeUtils.fromJavaTimestamp(new java.sql.Timestamp(cr.timestamp)),
-        cr.timestampType.id)
-    }
+      pollTimeoutMs, failOnDataLoss).map(toInternalRow)
     sqlContext.internalCreateDataFrame(rdd.setName("kafka"), schema).rdd
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index d1a35ec53bc94..57879c7ca31cf 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -31,12 +31,13 @@ import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.read.streaming
+import org.apache.spark.sql.connector.read.streaming.{ReadAllAvailable, ReadLimit, ReadMaxRows, SupportsAdmissionControl}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.kafka010.KafkaSource._
-import org.apache.spark.sql.kafka010.KafkaSourceProvider.{INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE, INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE}
+import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A [[Source]] that reads data from Kafka using the following design.
@@ -80,17 +81,19 @@ private[kafka010] class KafkaSource(
     metadataPath: String,
     startingOffsets: KafkaOffsetRangeLimit,
     failOnDataLoss: Boolean)
-  extends Source with Logging {
+  extends SupportsAdmissionControl with Source with Logging {
 
   private val sc = sqlContext.sparkContext
 
-  private val pollTimeoutMs = sourceOptions.getOrElse(
-    KafkaSourceProvider.CONSUMER_POLL_TIMEOUT,
-    (sc.conf.get(NETWORK_TIMEOUT) * 1000L).toString
-  ).toLong
+  private val pollTimeoutMs =
+    sourceOptions.getOrElse(CONSUMER_POLL_TIMEOUT, (sc.conf.get(NETWORK_TIMEOUT) * 1000L).toString)
+      .toLong
 
   private val maxOffsetsPerTrigger =
-    sourceOptions.get(KafkaSourceProvider.MAX_OFFSET_PER_TRIGGER).map(_.toLong)
+    sourceOptions.get(MAX_OFFSET_PER_TRIGGER).map(_.toLong)
+
+  private val includeHeaders =
+    sourceOptions.getOrElse(INCLUDE_HEADERS, "false").toBoolean
 
   /**
    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
@@ -104,6 +107,8 @@ private[kafka010] class KafkaSource(
         case EarliestOffsetRangeLimit => KafkaSourceOffset(kafkaReader.fetchEarliestOffsets())
         case LatestOffsetRangeLimit => KafkaSourceOffset(kafkaReader.fetchLatestOffsets(None))
         case SpecificOffsetRangeLimit(p) => kafkaReader.fetchSpecificOffsets(p, reportDataLoss)
+        case SpecificTimestampRangeLimit(p) =>
+          kafkaReader.fetchSpecificTimestampBasedOffsets(p, failsOnNoMatchingOffset = true)
       }
       metadataLog.add(0, offsets)
       logInfo(s"Initial offsets: $offsets")
@@ -111,29 +116,42 @@ private[kafka010] class KafkaSource(
     }.partitionToOffsets
   }
 
+  override def getDefaultReadLimit: ReadLimit = {
+    maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(super.getDefaultReadLimit)
+  }
+
   private var currentPartitionOffsets: Option[Map[TopicPartition, Long]] = None
 
-  override def schema: StructType = KafkaOffsetReader.kafkaSchema
+  private val converter = new KafkaRecordToRowConverter()
+
+  override def schema: StructType = KafkaRecordToRowConverter.kafkaSchema(includeHeaders)
 
   /** Returns the maximum available offset for this source. */
   override def getOffset: Option[Offset] = {
+    throw new UnsupportedOperationException(
+      "latestOffset(Offset, ReadLimit) should be called instead of this method")
+  }
+
+  override def latestOffset(startOffset: streaming.Offset, limit: ReadLimit): streaming.Offset = {
     // Make sure initialPartitionOffsets is initialized
     initialPartitionOffsets
 
     val latest = kafkaReader.fetchLatestOffsets(
       currentPartitionOffsets.orElse(Some(initialPartitionOffsets)))
-    val offsets = maxOffsetsPerTrigger match {
-      case None =>
+    val offsets = limit match {
+      case rows: ReadMaxRows =>
+        if (currentPartitionOffsets.isEmpty) {
+          rateLimit(rows.maxRows(), initialPartitionOffsets, latest)
+        } else {
+          rateLimit(rows.maxRows(), currentPartitionOffsets.get, latest)
+        }
+      case _: ReadAllAvailable =>
         latest
-      case Some(limit) if currentPartitionOffsets.isEmpty =>
-        rateLimit(limit, initialPartitionOffsets, latest)
-      case Some(limit) =>
-        rateLimit(limit, currentPartitionOffsets.get, latest)
     }
 
     currentPartitionOffsets = Some(offsets)
     logDebug(s"GetOffset: ${offsets.toSeq.map(_.toString).sorted}")
-    Some(KafkaSourceOffset(offsets))
+    KafkaSourceOffset(offsets)
   }
 
   /** Proportionally distribute limit number of offsets among topicpartitions */
@@ -205,78 +223,20 @@ private[kafka010] class KafkaSource(
         initialPartitionOffsets
     }
 
-    // Find the new partitions, and get their earliest offsets
-    val newPartitions = untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet)
-    val newPartitionOffsets = kafkaReader.fetchEarliestOffsets(newPartitions.toSeq)
-    if (newPartitionOffsets.keySet != newPartitions) {
-      // We cannot get from offsets for some partitions. It means they got deleted.
-      val deletedPartitions = newPartitions.diff(newPartitionOffsets.keySet)
-      reportDataLoss(
-        s"Cannot find earliest offsets of ${deletedPartitions}. Some data may have been missed")
-    }
-    logInfo(s"Partitions added: $newPartitionOffsets")
-    newPartitionOffsets.filter(_._2 != 0).foreach { case (p, o) =>
-      reportDataLoss(
-        s"Added partition $p starts from $o instead of 0. Some data may have been missed")
-    }
-
-    val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet)
-    if (deletedPartitions.nonEmpty) {
-      val message = if (kafkaReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
-        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
-      } else {
-        s"$deletedPartitions are gone. Some data may have been missed."
-      }
-      reportDataLoss(message)
-    }
-
-    // Use the until partitions to calculate offset ranges to ignore partitions that have
-    // been deleted
-    val topicPartitions = untilPartitionOffsets.keySet.filter { tp =>
-      // Ignore partitions that we don't know the from offsets.
-      newPartitionOffsets.contains(tp) || fromPartitionOffsets.contains(tp)
-    }.toSeq
-    logDebug("TopicPartitions: " + topicPartitions.mkString(", "))
-
-    val sortedExecutors = getSortedExecutorList(sc)
-    val numExecutors = sortedExecutors.length
-    logDebug("Sorted executors: " + sortedExecutors.mkString(", "))
-
-    // Calculate offset ranges
-    val offsetRanges = topicPartitions.map { tp =>
-      val fromOffset = fromPartitionOffsets.getOrElse(tp, newPartitionOffsets.getOrElse(tp, {
-        // This should not happen since newPartitionOffsets contains all partitions not in
-        // fromPartitionOffsets
-        throw new IllegalStateException(s"$tp doesn't have a from offset")
-      }))
-      val untilOffset = untilPartitionOffsets(tp)
-      val preferredLoc = if (numExecutors > 0) {
-        // This allows cached KafkaConsumers in the executors to be re-used to read the same
-        // partition in every batch.
-        Some(sortedExecutors(Math.floorMod(tp.hashCode, numExecutors)))
-      } else None
-      KafkaSourceRDDOffsetRange(tp, fromOffset, untilOffset, preferredLoc)
-    }.filter { range =>
-      if (range.untilOffset < range.fromOffset) {
-        reportDataLoss(s"Partition ${range.topicPartition}'s offset was changed from " +
-          s"${range.fromOffset} to ${range.untilOffset}, some data may have been missed")
-        false
-      } else {
-        true
-      }
-    }.toArray
+    val offsetRanges = kafkaReader.getOffsetRangesFromResolvedOffsets(
+      fromPartitionOffsets,
+      untilPartitionOffsets,
+      reportDataLoss)
 
     // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
-    val rdd = new KafkaSourceRDD(
-      sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss).map { cr =>
-      InternalRow(
-        cr.key,
-        cr.value,
-        UTF8String.fromString(cr.topic),
-        cr.partition,
-        cr.offset,
-        DateTimeUtils.fromJavaTimestamp(new java.sql.Timestamp(cr.timestamp)),
-        cr.timestampType.id)
+    val rdd = if (includeHeaders) {
+      new KafkaSourceRDD(
+        sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss)
+        .map(converter.toInternalRowWithHeaders)
+    } else {
+      new KafkaSourceRDD(
+        sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss)
+        .map(converter.toInternalRowWithoutHeaders)
     }
 
     logInfo("GetBatch generating RDD of offset range: " +
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
index 90d70439c5329..b9674a30aee39 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.kafka010
 
 import org.apache.kafka.common.TopicPartition
 
+import org.apache.spark.sql.connector.read.streaming.PartitionOffset
 import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset}
-import org.apache.spark.sql.sources.v2.reader.streaming.PartitionOffset
 
 /**
  * An [[Offset]] for the [[KafkaSource]]. This one tracks all partitions of subscribed topics and
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index c3f0be4be96e2..a5e5d01152db8 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -30,14 +30,14 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.kafka010.KafkaConfigUpdater
 import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.read.{Batch, Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
+import org.apache.spark.sql.connector.write.{BatchWrite, LogicalWriteInfo, WriteBuilder}
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.streaming.{Sink, Source}
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.reader.{Batch, Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, WriteBuilder}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -52,7 +52,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     with StreamSinkProvider
     with RelationProvider
     with CreatableRelationProvider
-    with TableProvider
+    with SimpleTableProvider
     with Logging {
   import KafkaSourceProvider._
 
@@ -70,7 +70,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     val caseInsensitiveParameters = CaseInsensitiveMap(parameters)
     validateStreamOptions(caseInsensitiveParameters)
     require(schema.isEmpty, "Kafka source has a fixed schema and cannot be set with a custom one")
-    (shortName(), KafkaOffsetReader.kafkaSchema)
+    val includeHeaders = caseInsensitiveParameters.getOrElse(INCLUDE_HEADERS, "false").toBoolean
+    (shortName(), KafkaRecordToRowConverter.kafkaSchema(includeHeaders))
   }
 
   override def createSource(
@@ -89,7 +90,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveParameters)
 
     val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      caseInsensitiveParameters, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+      caseInsensitiveParameters, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+      STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
     val kafkaOffsetReader = new KafkaOffsetReader(
       strategy(caseInsensitiveParameters),
@@ -108,7 +110,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
   }
 
   override def getTable(options: CaseInsensitiveStringMap): KafkaTable = {
-    new KafkaTable
+    val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
+    new KafkaTable(includeHeaders)
   }
 
   /**
@@ -125,19 +128,24 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveParameters)
 
     val startingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      caseInsensitiveParameters, STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
+      caseInsensitiveParameters, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+      STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
     assert(startingRelationOffsets != LatestOffsetRangeLimit)
 
     val endingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      caseInsensitiveParameters, ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+      caseInsensitiveParameters, ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+      ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
     assert(endingRelationOffsets != EarliestOffsetRangeLimit)
 
+    val includeHeaders = caseInsensitiveParameters.getOrElse(INCLUDE_HEADERS, "false").toBoolean
+
     new KafkaRelation(
       sqlContext,
       strategy(caseInsensitiveParameters),
       sourceOptions = caseInsensitiveParameters,
       specifiedKafkaParams = specifiedKafkaParams,
       failOnDataLoss = failOnDataLoss(caseInsensitiveParameters),
+      includeHeaders = includeHeaders,
       startingOffsets = startingRelationOffsets,
       endingOffsets = endingRelationOffsets)
   }
@@ -317,13 +325,17 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     // Stream specific options
     params.get(ENDING_OFFSETS_OPTION_KEY).map(_ =>
       throw new IllegalArgumentException("ending offset not valid in streaming queries"))
+    params.get(ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY).map(_ =>
+      throw new IllegalArgumentException("ending timestamp not valid in streaming queries"))
+
     validateGeneralOptions(params)
   }
 
   private def validateBatchOptions(params: CaseInsensitiveMap[String]) = {
     // Batch specific options
     KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      params, STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit) match {
+      params, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY, STARTING_OFFSETS_OPTION_KEY,
+      EarliestOffsetRangeLimit) match {
       case EarliestOffsetRangeLimit => // good to go
       case LatestOffsetRangeLimit =>
         throw new IllegalArgumentException("starting offset can't be latest " +
@@ -335,10 +347,12 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
               "be latest for batch queries on Kafka")
           case _ => // ignore
         }
+      case _: SpecificTimestampRangeLimit => // good to go
     }
 
     KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      params, ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit) match {
+      params, ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY, ENDING_OFFSETS_OPTION_KEY,
+      LatestOffsetRangeLimit) match {
       case EarliestOffsetRangeLimit =>
         throw new IllegalArgumentException("ending offset can't be earliest " +
           "for batch queries on Kafka")
@@ -350,6 +364,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
               "earliest for batch queries on Kafka")
           case _ => // ignore
         }
+      case _: SpecificTimestampRangeLimit => // good to go
     }
 
     validateGeneralOptions(params)
@@ -360,13 +375,14 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     }
   }
 
-  class KafkaTable extends Table with SupportsRead with SupportsWrite {
+  class KafkaTable(includeHeaders: Boolean) extends Table with SupportsRead with SupportsWrite {
 
     override def name(): String = "KafkaTable"
 
-    override def schema(): StructType = KafkaOffsetReader.kafkaSchema
+    override def schema(): StructType = KafkaRecordToRowConverter.kafkaSchema(includeHeaders)
 
     override def capabilities(): ju.Set[TableCapability] = {
+      import TableCapability._
       // ACCEPT_ANY_SCHEMA is needed because of the following reasons:
       // * Kafka writer validates the schema instead of the SQL analyzer (the schema is fixed)
       // * Read schema differs from write schema (please see Kafka integration guide)
@@ -377,18 +393,14 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder =
       () => new KafkaScan(options)
 
-    override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
+    override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
       new WriteBuilder {
-        private var inputSchema: StructType = _
+        private val options = info.options
+        private val inputSchema: StructType = info.schema()
         private val topic = Option(options.get(TOPIC_OPTION_KEY)).map(_.trim)
         private val producerParams =
           kafkaParamsForProducer(CaseInsensitiveMap(options.asScala.toMap))
 
-        override def withInputDataSchema(schema: StructType): WriteBuilder = {
-          this.inputSchema = schema
-          this
-        }
-
         override def buildForBatch(): BatchWrite = {
           assert(inputSchema != null)
           new KafkaBatchWrite(topic, producerParams, inputSchema)
@@ -403,8 +415,11 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
   }
 
   class KafkaScan(options: CaseInsensitiveStringMap) extends Scan {
+    val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
 
-    override def readSchema(): StructType = KafkaOffsetReader.kafkaSchema
+    override def readSchema(): StructType = {
+      KafkaRecordToRowConverter.kafkaSchema(includeHeaders)
+    }
 
     override def toBatch(): Batch = {
       val caseInsensitiveOptions = CaseInsensitiveMap(options.asScala.toMap)
@@ -412,10 +427,12 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveOptions)
 
       val startingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-        caseInsensitiveOptions, STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
+        caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+        STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
 
       val endingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-        caseInsensitiveOptions, ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+        caseInsensitiveOptions, ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+        ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
       new KafkaBatch(
         strategy(caseInsensitiveOptions),
@@ -423,7 +440,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
         specifiedKafkaParams,
         failOnDataLoss(caseInsensitiveOptions),
         startingRelationOffsets,
-        endingRelationOffsets)
+        endingRelationOffsets,
+        includeHeaders)
     }
 
     override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
@@ -437,7 +455,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveOptions)
 
       val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-        caseInsensitiveOptions, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+        caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+        STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
       val kafkaOffsetReader = new KafkaOffsetReader(
         strategy(caseInsensitiveOptions),
@@ -465,7 +484,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveOptions)
 
       val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-        caseInsensitiveOptions, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+        caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+        STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
       val kafkaOffsetReader = new KafkaOffsetReader(
         strategy(caseInsensitiveOptions),
@@ -491,6 +511,8 @@ private[kafka010] object KafkaSourceProvider extends Logging {
   private val STRATEGY_OPTION_KEYS = Set(SUBSCRIBE, SUBSCRIBE_PATTERN, ASSIGN)
   private[kafka010] val STARTING_OFFSETS_OPTION_KEY = "startingoffsets"
   private[kafka010] val ENDING_OFFSETS_OPTION_KEY = "endingoffsets"
+  private[kafka010] val STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY = "startingoffsetsbytimestamp"
+  private[kafka010] val ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY = "endingoffsetsbytimestamp"
   private val FAIL_ON_DATA_LOSS_OPTION_KEY = "failondataloss"
   private[kafka010] val MIN_PARTITIONS_OPTION_KEY = "minpartitions"
   private[kafka010] val MAX_OFFSET_PER_TRIGGER = "maxoffsetspertrigger"
@@ -498,6 +520,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
   private[kafka010] val FETCH_OFFSET_RETRY_INTERVAL_MS = "fetchoffset.retryintervalms"
   private[kafka010] val CONSUMER_POLL_TIMEOUT = "kafkaconsumer.polltimeoutms"
   private val GROUP_ID_PREFIX = "groupidprefix"
+  private[kafka010] val INCLUDE_HEADERS = "includeheaders"
 
   val TOPIC_OPTION_KEY = "topic"
 
@@ -533,15 +556,20 @@ private[kafka010] object KafkaSourceProvider extends Logging {
 
   def getKafkaOffsetRangeLimit(
       params: CaseInsensitiveMap[String],
+      offsetByTimestampOptionKey: String,
       offsetOptionKey: String,
       defaultOffsets: KafkaOffsetRangeLimit): KafkaOffsetRangeLimit = {
-    params.get(offsetOptionKey).map(_.trim) match {
-      case Some(offset) if offset.toLowerCase(Locale.ROOT) == "latest" =>
-        LatestOffsetRangeLimit
-      case Some(offset) if offset.toLowerCase(Locale.ROOT) == "earliest" =>
-        EarliestOffsetRangeLimit
-      case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
-      case None => defaultOffsets
+    params.get(offsetByTimestampOptionKey).map(_.trim) match {
+      case Some(json) => SpecificTimestampRangeLimit(JsonUtils.partitionTimestamps(json))
+      case None =>
+        params.get(offsetOptionKey).map(_.trim) match {
+          case Some(offset) if offset.toLowerCase(Locale.ROOT) == "latest" =>
+            LatestOffsetRangeLimit
+          case Some(offset) if offset.toLowerCase(Locale.ROOT) == "earliest" =>
+            EarliestOffsetRangeLimit
+          case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
+          case None => defaultOffsets
+        }
     }
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
index dae9515205f5e..5475864500941 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
@@ -19,33 +19,17 @@ package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
 
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
-import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.clients.consumer.ConsumerRecord
 
 import org.apache.spark.{Partition, SparkContext, TaskContext}
-import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.NextIterator
 
-
-/** Offset range that one partition of the KafkaSourceRDD has to read */
-private[kafka010] case class KafkaSourceRDDOffsetRange(
-    topicPartition: TopicPartition,
-    fromOffset: Long,
-    untilOffset: Long,
-    preferredLoc: Option[String]) {
-  def topic: String = topicPartition.topic
-  def partition: Int = topicPartition.partition
-  def size: Long = untilOffset - fromOffset
-}
-
-
 /** Partition of the KafkaSourceRDD */
 private[kafka010] case class KafkaSourceRDDPartition(
-  index: Int, offsetRange: KafkaSourceRDDOffsetRange) extends Partition
+  index: Int, offsetRange: KafkaOffsetRange) extends Partition
 
 
 /**
@@ -61,7 +45,7 @@ private[kafka010] case class KafkaSourceRDDPartition(
 private[kafka010] class KafkaSourceRDD(
     sc: SparkContext,
     executorKafkaParams: ju.Map[String, Object],
-    offsetRanges: Seq[KafkaSourceRDDOffsetRange],
+    offsetRanges: Seq[KafkaOffsetRange],
     pollTimeoutMs: Long,
     failOnDataLoss: Boolean)
   extends RDD[ConsumerRecord[Array[Byte], Array[Byte]]](sc, Nil) {
@@ -133,7 +117,7 @@ private[kafka010] class KafkaSourceRDD(
     }
   }
 
-  private def resolveRange(consumer: KafkaDataConsumer, range: KafkaSourceRDDOffsetRange) = {
+  private def resolveRange(consumer: KafkaDataConsumer, range: KafkaOffsetRange) = {
     if (range.fromOffset < 0 || range.untilOffset < 0) {
       // Late bind the offset range
       val availableOffsetRange = consumer.getAvailableOffsetRange()
@@ -151,7 +135,7 @@ private[kafka010] class KafkaSourceRDD(
       } else {
         range.untilOffset
       }
-      KafkaSourceRDDOffsetRange(range.topicPartition,
+      KafkaOffsetRange(range.topicPartition,
         fromOffset, untilOffset, range.preferredLoc)
     } else {
       range
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
index 6dd1d2984a96e..bcf9e3416f843 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.kafka010
 import java.{util => ju}
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.write.{DataWriter, PhysicalWriteInfo, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.kafka010.KafkaWriter.validateQuery
-import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -41,7 +41,8 @@ private[kafka010] class KafkaStreamingWrite(
 
   validateQuery(schema.toAttributes, producerParams, topic)
 
-  override def createStreamingWriterFactory(): KafkaStreamWriterFactory =
+  override def createStreamingWriterFactory(
+      info: PhysicalWriteInfo): KafkaStreamWriterFactory =
     KafkaStreamWriterFactory(topic, producerParams, schema)
 
   override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
index 041fac7717635..fddba3f0f9919 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
@@ -19,11 +19,16 @@ package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
 
+import scala.collection.JavaConverters._
+
 import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}
+import org.apache.kafka.common.header.Header
+import org.apache.kafka.common.header.internals.RecordHeader
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal, UnsafeProjection}
-import org.apache.spark.sql.types.{BinaryType, StringType}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, UnsafeProjection}
+import org.apache.spark.sql.kafka010.producer.{CachedKafkaProducer, InternalKafkaProducerPool}
+import org.apache.spark.sql.types.BinaryType
 
 /**
  * Writes out data in a single Spark task, without any concerns about how
@@ -35,25 +40,30 @@ private[kafka010] class KafkaWriteTask(
     inputSchema: Seq[Attribute],
     topic: Option[String]) extends KafkaRowWriter(inputSchema, topic) {
   // used to synchronize with Kafka callbacks
-  private var producer: KafkaProducer[Array[Byte], Array[Byte]] = _
+  private var producer: Option[CachedKafkaProducer] = None
 
   /**
    * Writes key value data out to topics.
    */
   def execute(iterator: Iterator[InternalRow]): Unit = {
-    producer = CachedKafkaProducer.getOrCreate(producerConfiguration)
+    producer = Some(InternalKafkaProducerPool.acquire(producerConfiguration))
+    val internalProducer = producer.get.producer
     while (iterator.hasNext && failedWrite == null) {
       val currentRow = iterator.next()
-      sendRow(currentRow, producer)
+      sendRow(currentRow, internalProducer)
     }
   }
 
   def close(): Unit = {
-    checkForErrors()
-    if (producer != null) {
-      producer.flush()
+    try {
       checkForErrors()
-      producer = null
+      producer.foreach { p =>
+        p.producer.flush()
+        checkForErrors()
+      }
+    } finally {
+      producer.foreach(InternalKafkaProducerPool.release)
+      producer = None
     }
   }
 }
@@ -88,7 +98,20 @@ private[kafka010] abstract class KafkaRowWriter(
       throw new NullPointerException(s"null topic present in the data. Use the " +
         s"${KafkaSourceProvider.TOPIC_OPTION_KEY} option for setting a default topic.")
     }
-    val record = new ProducerRecord[Array[Byte], Array[Byte]](topic.toString, key, value)
+    val partition: Integer =
+      if (projectedRow.isNullAt(4)) null else projectedRow.getInt(4)
+    val record = if (projectedRow.isNullAt(3)) {
+      new ProducerRecord[Array[Byte], Array[Byte]](topic.toString, partition, key, value)
+    } else {
+      val headerArray = projectedRow.getArray(3)
+      val headers = (0 until headerArray.numElements()).map { i =>
+        val struct = headerArray.getStruct(i, 2)
+        new RecordHeader(struct.getUTF8String(0).toString, struct.getBinary(1))
+          .asInstanceOf[Header]
+      }
+      new ProducerRecord[Array[Byte], Array[Byte]](
+        topic.toString, partition, key, value, headers.asJava)
+    }
     producer.send(record, callback)
   }
 
@@ -99,41 +122,16 @@ private[kafka010] abstract class KafkaRowWriter(
   }
 
   private def createProjection = {
-    val topicExpression = topic.map(Literal(_)).orElse {
-      inputSchema.find(_.name == KafkaWriter.TOPIC_ATTRIBUTE_NAME)
-    }.getOrElse {
-      throw new IllegalStateException(s"topic option required when no " +
-        s"'${KafkaWriter.TOPIC_ATTRIBUTE_NAME}' attribute is present")
-    }
-    topicExpression.dataType match {
-      case StringType => // good
-      case t =>
-        throw new IllegalStateException(s"${KafkaWriter.TOPIC_ATTRIBUTE_NAME} " +
-          s"attribute unsupported type $t. ${KafkaWriter.TOPIC_ATTRIBUTE_NAME} " +
-          s"must be a ${StringType.catalogString}")
-    }
-    val keyExpression = inputSchema.find(_.name == KafkaWriter.KEY_ATTRIBUTE_NAME)
-      .getOrElse(Literal(null, BinaryType))
-    keyExpression.dataType match {
-      case StringType | BinaryType => // good
-      case t =>
-        throw new IllegalStateException(s"${KafkaWriter.KEY_ATTRIBUTE_NAME} " +
-          s"attribute unsupported type ${t.catalogString}")
-    }
-    val valueExpression = inputSchema
-      .find(_.name == KafkaWriter.VALUE_ATTRIBUTE_NAME).getOrElse(
-      throw new IllegalStateException("Required attribute " +
-        s"'${KafkaWriter.VALUE_ATTRIBUTE_NAME}' not found")
-    )
-    valueExpression.dataType match {
-      case StringType | BinaryType => // good
-      case t =>
-        throw new IllegalStateException(s"${KafkaWriter.VALUE_ATTRIBUTE_NAME} " +
-          s"attribute unsupported type ${t.catalogString}")
-    }
     UnsafeProjection.create(
-      Seq(topicExpression, Cast(keyExpression, BinaryType),
-        Cast(valueExpression, BinaryType)), inputSchema)
+      Seq(
+        KafkaWriter.topicExpression(inputSchema, topic),
+        Cast(KafkaWriter.keyExpression(inputSchema), BinaryType),
+        Cast(KafkaWriter.valueExpression(inputSchema), BinaryType),
+        KafkaWriter.headersExpression(inputSchema),
+        KafkaWriter.partitionExpression(inputSchema)
+      ),
+      inputSchema
+    )
   }
 }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
index e1a9191cc5a84..5ef4b3a1c19d4 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
@@ -21,9 +21,10 @@ import java.{util => ju}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
-import org.apache.spark.sql.types.{BinaryType, StringType}
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.types.{BinaryType, DataType, IntegerType, StringType}
 import org.apache.spark.util.Utils
 
 /**
@@ -39,6 +40,8 @@ private[kafka010] object KafkaWriter extends Logging {
   val TOPIC_ATTRIBUTE_NAME: String = "topic"
   val KEY_ATTRIBUTE_NAME: String = "key"
   val VALUE_ATTRIBUTE_NAME: String = "value"
+  val HEADERS_ATTRIBUTE_NAME: String = "headers"
+  val PARTITION_ATTRIBUTE_NAME: String = "partition"
 
   override def toString: String = "KafkaWriter"
 
@@ -46,34 +49,14 @@ private[kafka010] object KafkaWriter extends Logging {
       schema: Seq[Attribute],
       kafkaParameters: ju.Map[String, Object],
       topic: Option[String] = None): Unit = {
-    schema.find(_.name == TOPIC_ATTRIBUTE_NAME).getOrElse(
-      if (topic.isEmpty) {
-        throw new AnalysisException(s"topic option required when no " +
-          s"'$TOPIC_ATTRIBUTE_NAME' attribute is present. Use the " +
-          s"${KafkaSourceProvider.TOPIC_OPTION_KEY} option for setting a topic.")
-      } else {
-        Literal.create(topic.get, StringType)
-      }
-    ).dataType match {
-      case StringType => // good
-      case _ =>
-        throw new AnalysisException(s"Topic type must be a ${StringType.catalogString}")
-    }
-    schema.find(_.name == KEY_ATTRIBUTE_NAME).getOrElse(
-      Literal(null, StringType)
-    ).dataType match {
-      case StringType | BinaryType => // good
-      case _ =>
-        throw new AnalysisException(s"$KEY_ATTRIBUTE_NAME attribute type " +
-          s"must be a ${StringType.catalogString} or ${BinaryType.catalogString}")
-    }
-    schema.find(_.name == VALUE_ATTRIBUTE_NAME).getOrElse(
-      throw new AnalysisException(s"Required attribute '$VALUE_ATTRIBUTE_NAME' not found")
-    ).dataType match {
-      case StringType | BinaryType => // good
-      case _ =>
-        throw new AnalysisException(s"$VALUE_ATTRIBUTE_NAME attribute type " +
-          s"must be a ${StringType.catalogString} or ${BinaryType.catalogString}")
+    try {
+      topicExpression(schema, topic)
+      keyExpression(schema)
+      valueExpression(schema)
+      headersExpression(schema)
+      partitionExpression(schema)
+    } catch {
+      case e: IllegalStateException => throw new AnalysisException(e.getMessage)
     }
   }
 
@@ -90,4 +73,53 @@ private[kafka010] object KafkaWriter extends Logging {
         finallyBlock = writeTask.close())
     }
   }
+
+  def topicExpression(schema: Seq[Attribute], topic: Option[String] = None): Expression = {
+    topic.map(Literal(_)).getOrElse(
+      expression(schema, TOPIC_ATTRIBUTE_NAME, Seq(StringType)) {
+        throw new IllegalStateException(s"topic option required when no " +
+          s"'${TOPIC_ATTRIBUTE_NAME}' attribute is present. Use the " +
+          s"${KafkaSourceProvider.TOPIC_OPTION_KEY} option for setting a topic.")
+      }
+    )
+  }
+
+  def keyExpression(schema: Seq[Attribute]): Expression = {
+    expression(schema, KEY_ATTRIBUTE_NAME, Seq(StringType, BinaryType)) {
+      Literal(null, BinaryType)
+    }
+  }
+
+  def valueExpression(schema: Seq[Attribute]): Expression = {
+    expression(schema, VALUE_ATTRIBUTE_NAME, Seq(StringType, BinaryType)) {
+      throw new IllegalStateException(s"Required attribute '${VALUE_ATTRIBUTE_NAME}' not found")
+    }
+  }
+
+  def headersExpression(schema: Seq[Attribute]): Expression = {
+    expression(schema, HEADERS_ATTRIBUTE_NAME, Seq(KafkaRecordToRowConverter.headersType)) {
+      Literal(CatalystTypeConverters.convertToCatalyst(null),
+        KafkaRecordToRowConverter.headersType)
+    }
+  }
+
+  def partitionExpression(schema: Seq[Attribute]): Expression = {
+    expression(schema, PARTITION_ATTRIBUTE_NAME, Seq(IntegerType)) {
+      Literal(null, IntegerType)
+    }
+  }
+
+  private def expression(
+      schema: Seq[Attribute],
+      attrName: String,
+      desired: Seq[DataType])(
+      default: => Expression): Expression = {
+    val expr = schema.find(_.name == attrName).getOrElse(default)
+    if (!desired.exists(_.sameType(expr.dataType))) {
+      throw new IllegalStateException(s"$attrName attribute unsupported type " +
+        s"${expr.dataType.catalogString}. $attrName must be a(n) " +
+        s"${desired.map(_.catalogString).mkString(" or ")}")
+    }
+    expr
+  }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/FetchedDataPool.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala
similarity index 92%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/FetchedDataPool.scala
rename to external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala
index 6f18407a17001..6174bfb203429 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/FetchedDataPool.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.kafka010
+package org.apache.spark.sql.kafka010.consumer
 
 import java.{util => ju}
 import java.util.concurrent.{ScheduledExecutorService, ScheduledFuture, TimeUnit}
@@ -27,7 +27,8 @@ import org.apache.kafka.clients.consumer.ConsumerRecord
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.kafka010.KafkaDataConsumer.{CacheKey, UNKNOWN_OFFSET}
+import org.apache.spark.sql.kafka010.{FETCHED_DATA_CACHE_EVICTOR_THREAD_RUN_INTERVAL, FETCHED_DATA_CACHE_TIMEOUT}
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer.{CacheKey, UNKNOWN_OFFSET}
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
 
 /**
@@ -39,7 +40,7 @@ import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
  * modified in same instance, this class cannot be replaced with general pool implementations
  * including Apache Commons Pool which pools KafkaConsumer.
  */
-private[kafka010] class FetchedDataPool(
+private[consumer] class FetchedDataPool(
     executorService: ScheduledExecutorService,
     clock: Clock,
     conf: SparkConf) extends Logging {
@@ -159,8 +160,8 @@ private[kafka010] class FetchedDataPool(
   }
 }
 
-private[kafka010] object FetchedDataPool {
-  private[kafka010] case class CachedFetchedData(fetchedData: FetchedData) {
+private[consumer] object FetchedDataPool {
+  private[consumer] case class CachedFetchedData(fetchedData: FetchedData) {
     var lastReleasedTimestamp: Long = Long.MaxValue
     var lastAcquiredTimestamp: Long = Long.MinValue
     var inUse: Boolean = false
@@ -179,5 +180,5 @@ private[kafka010] object FetchedDataPool {
     }
   }
 
-  private[kafka010] type CachedFetchedDataList = mutable.ListBuffer[CachedFetchedData]
+  private[consumer] type CachedFetchedDataList = mutable.ListBuffer[CachedFetchedData]
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/InternalKafkaConsumerPool.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala
similarity index 96%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/InternalKafkaConsumerPool.scala
rename to external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala
index 276a942742b8e..2256f96c660be 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/InternalKafkaConsumerPool.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.kafka010
+package org.apache.spark.sql.kafka010.consumer
 
 import java.{util => ju}
 import java.util.concurrent.ConcurrentHashMap
@@ -25,8 +25,9 @@ import org.apache.commons.pool2.impl.{DefaultEvictionPolicy, DefaultPooledObject
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.kafka010.InternalKafkaConsumerPool._
-import org.apache.spark.sql.kafka010.KafkaDataConsumer.CacheKey
+import org.apache.spark.sql.kafka010._
+import org.apache.spark.sql.kafka010.consumer.InternalKafkaConsumerPool._
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer.CacheKey
 
 /**
  * Provides object pool for [[InternalKafkaConsumer]] which is grouped by [[CacheKey]].
@@ -45,10 +46,9 @@ import org.apache.spark.sql.kafka010.KafkaDataConsumer.CacheKey
  * not yet returned, hence provide thread-safety usage of non-thread-safe [[InternalKafkaConsumer]]
  * unless caller shares the object to multiple threads.
  */
-private[kafka010] class InternalKafkaConsumerPool(
+private[consumer] class InternalKafkaConsumerPool(
     objectFactory: ObjectFactory,
     poolConfig: PoolConfig) extends Logging {
-
   def this(conf: SparkConf) = {
     this(new ObjectFactory, new PoolConfig(conf))
   }
@@ -147,7 +147,7 @@ private[kafka010] class InternalKafkaConsumerPool(
   }
 }
 
-private[kafka010] object InternalKafkaConsumerPool {
+private[consumer] object InternalKafkaConsumerPool {
   object CustomSwallowedExceptionListener extends SwallowedExceptionListener with Logging {
     override def onSwallowException(e: Exception): Unit = {
       logError(s"Error closing Kafka consumer", e)
@@ -218,4 +218,3 @@ private[kafka010] object InternalKafkaConsumerPool {
     }
   }
 }
-
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
similarity index 93%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
rename to external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
index 87036beb9a252..5f23029d9fed3 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.kafka010
+package org.apache.spark.sql.kafka010.consumer
 
 import java.{util => ju}
 import java.io.Closeable
@@ -23,14 +23,15 @@ import java.util.concurrent.TimeoutException
 
 import scala.collection.JavaConverters._
 
+import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer, OffsetOutOfRangeException}
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.internal.Logging
-import org.apache.spark.kafka010.KafkaConfigUpdater
-import org.apache.spark.sql.kafka010.KafkaDataConsumer.{AvailableOffsetRange, UNKNOWN_OFFSET}
+import org.apache.spark.kafka010.{KafkaConfigUpdater, KafkaTokenUtil}
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer.{AvailableOffsetRange, UNKNOWN_OFFSET}
 import org.apache.spark.util.{ShutdownHookManager, UninterruptibleThread}
 
 /**
@@ -46,6 +47,15 @@ private[kafka010] class InternalKafkaConsumer(
 
   val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
 
+  // Exposed for testing
+  private[consumer] val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(
+    SparkEnv.get.conf, kafkaParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
+      .asInstanceOf[String])
+
+  // Kafka consumer is not able to give back the params instantiated with so we need to store it.
+  // It must be updated whenever a new consumer is created.
+  // Exposed for testing
+  private[consumer] var kafkaParamsWithSecurity: ju.Map[String, Object] = _
   private val consumer = createConsumer()
 
   /**
@@ -106,10 +116,10 @@ private[kafka010] class InternalKafkaConsumer(
 
   /** Create a KafkaConsumer to fetch records for `topicPartition` */
   private def createConsumer(): KafkaConsumer[Array[Byte], Array[Byte]] = {
-    val updatedKafkaParams = KafkaConfigUpdater("executor", kafkaParams.asScala.toMap)
-      .setAuthenticationConfigIfNeeded()
+    kafkaParamsWithSecurity = KafkaConfigUpdater("executor", kafkaParams.asScala.toMap)
+      .setAuthenticationConfigIfNeeded(clusterConfig)
       .build()
-    val c = new KafkaConsumer[Array[Byte], Array[Byte]](updatedKafkaParams)
+    val c = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParamsWithSecurity)
     val tps = new ju.ArrayList[TopicPartition]()
     tps.add(topicPartition)
     c.assign(tps)
@@ -122,8 +132,6 @@ private[kafka010] class InternalKafkaConsumer(
   }
 }
 
-// TODO: consider changing this to normal class, as having mutable variables in
-//   case class sounds weird.
 /**
  * The internal object to store the fetched data from Kafka consumer and the next offset to poll.
  *
@@ -133,7 +141,7 @@ private[kafka010] class InternalKafkaConsumer(
  * @param _offsetAfterPoll the Kafka offset after calling `poll`. We will use this offset to
  *                           poll when `records` is drained.
  */
-private[kafka010] case class FetchedData(
+private[consumer] case class FetchedData(
     private var _records: ju.ListIterator[ConsumerRecord[Array[Byte], Array[Byte]]],
     private var _nextOffsetInFetchedData: Long,
     private var _offsetAfterPoll: Long) {
@@ -184,15 +192,13 @@ private[kafka010] case class FetchedData(
   def offsetAfterPoll: Long = _offsetAfterPoll
 }
 
-// TODO: consider changing this to normal class, as having mutable variables in
-//   case class sounds weird.
 /**
  * The internal object returned by the `fetchRecord` method. If `record` is empty, it means it is
  * invisible (either a transaction message, or an aborted message when the consumer's
  * `isolation.level` is `read_committed`), and the caller should use `nextOffsetToFetch` to fetch
  * instead.
  */
-private[kafka010] case class FetchedRecord(
+private[consumer] case class FetchedRecord(
     var record: ConsumerRecord[Array[Byte], Array[Byte]],
     var nextOffsetToFetch: Long) {
 
@@ -219,7 +225,8 @@ private[kafka010] class KafkaDataConsumer(
     fetchedDataPool: FetchedDataPool) extends Logging {
   import KafkaDataConsumer._
 
-  @volatile private[kafka010] var _consumer: Option[InternalKafkaConsumer] = None
+  // Exposed for testing
+  @volatile private[consumer] var _consumer: Option[InternalKafkaConsumer] = None
   @volatile private var _fetchedData: Option[FetchedData] = None
 
   private val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
@@ -516,13 +523,25 @@ private[kafka010] class KafkaDataConsumer(
     fetchedData.withNewPoll(records.listIterator, offsetAfterPoll)
   }
 
-  private def getOrRetrieveConsumer(): InternalKafkaConsumer = _consumer match {
-    case None =>
-      _consumer = Option(consumerPool.borrowObject(cacheKey, kafkaParams))
-      require(_consumer.isDefined, "borrowing consumer from pool must always succeed.")
-      _consumer.get
+  private[kafka010] def getOrRetrieveConsumer(): InternalKafkaConsumer = {
+    if (!_consumer.isDefined) {
+      retrieveConsumer()
+    }
+    require(_consumer.isDefined, "Consumer must be defined")
+    if (KafkaTokenUtil.needTokenUpdate(SparkEnv.get.conf, _consumer.get.kafkaParamsWithSecurity,
+        _consumer.get.clusterConfig)) {
+      logDebug("Cached consumer uses an old delegation token, invalidating.")
+      releaseConsumer()
+      consumerPool.invalidateKey(cacheKey)
+      fetchedDataPool.invalidate(cacheKey)
+      retrieveConsumer()
+    }
+    _consumer.get
+  }
 
-    case Some(consumer) => consumer
+  private def retrieveConsumer(): Unit = {
+    _consumer = Option(consumerPool.borrowObject(cacheKey, kafkaParams))
+    require(_consumer.isDefined, "borrowing consumer from pool must always succeed.")
   }
 
   private def getOrRetrieveFetchedData(offset: Long): FetchedData = _fetchedData match {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
index 6f6ae55fc4971..460bb8bd34ec6 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
@@ -32,6 +32,13 @@ package object kafka010 {   // scalastyle:ignore
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString("10m")
 
+  private[kafka010] val PRODUCER_CACHE_EVICTOR_THREAD_RUN_INTERVAL =
+    ConfigBuilder("spark.kafka.producer.cache.evictorThreadRunInterval")
+      .doc("The interval of time between runs of the idle evictor thread for producer pool. " +
+        "When non-positive, no idle evictor thread will be run.")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("1m")
+
   private[kafka010] val CONSUMER_CACHE_CAPACITY =
     ConfigBuilder("spark.kafka.consumer.cache.capacity")
       .doc("The maximum number of consumers cached. Please note it's a soft limit" +
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala
new file mode 100644
index 0000000000000..83519de0d3b1e
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010.producer
+
+import java.{util => ju}
+
+import scala.util.control.NonFatal
+
+import org.apache.kafka.clients.producer.KafkaProducer
+
+import org.apache.spark.internal.Logging
+
+private[kafka010] class CachedKafkaProducer(
+    val cacheKey: Seq[(String, Object)],
+    val producer: KafkaProducer[Array[Byte], Array[Byte]]) extends Logging {
+  val id: String = ju.UUID.randomUUID().toString
+
+  private[producer] def close(): Unit = {
+    try {
+      logInfo(s"Closing the KafkaProducer with id: $id.")
+      producer.close()
+    } catch {
+      case NonFatal(e) => logWarning("Error while closing kafka producer.", e)
+    }
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala
new file mode 100644
index 0000000000000..8d1f9b8d37f60
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010.producer
+
+import java.{util => ju}
+import java.util.concurrent.{ScheduledExecutorService, ScheduledFuture, TimeUnit}
+import javax.annotation.concurrent.GuardedBy
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.kafka.clients.producer.KafkaProducer
+
+import org.apache.spark.{SparkConf, SparkEnv}
+import org.apache.spark.internal.Logging
+import org.apache.spark.kafka010.{KafkaConfigUpdater, KafkaRedactionUtil}
+import org.apache.spark.sql.kafka010.{PRODUCER_CACHE_EVICTOR_THREAD_RUN_INTERVAL, PRODUCER_CACHE_TIMEOUT}
+import org.apache.spark.util.{Clock, ShutdownHookManager, SystemClock, ThreadUtils, Utils}
+
+/**
+ * Provides object pool for [[CachedKafkaProducer]] which is grouped by
+ * [[org.apache.spark.sql.kafka010.producer.InternalKafkaProducerPool.CacheKey]].
+ */
+private[producer] class InternalKafkaProducerPool(
+    executorService: ScheduledExecutorService,
+    val clock: Clock,
+    conf: SparkConf) extends Logging {
+  import InternalKafkaProducerPool._
+
+  def this(sparkConf: SparkConf) = {
+    this(ThreadUtils.newDaemonSingleThreadScheduledExecutor(
+      "kafka-producer-cache-evictor"), new SystemClock, sparkConf)
+  }
+
+  /** exposed for testing */
+  private[producer] val cacheExpireTimeoutMillis: Long = conf.get(PRODUCER_CACHE_TIMEOUT)
+
+  @GuardedBy("this")
+  private val cache = new mutable.HashMap[CacheKey, CachedProducerEntry]
+
+  private def startEvictorThread(): Option[ScheduledFuture[_]] = {
+    val evictorThreadRunIntervalMillis = conf.get(PRODUCER_CACHE_EVICTOR_THREAD_RUN_INTERVAL)
+    if (evictorThreadRunIntervalMillis > 0) {
+      val future = executorService.scheduleAtFixedRate(() => {
+        Utils.tryLogNonFatalError(evictExpired())
+      }, 0, evictorThreadRunIntervalMillis, TimeUnit.MILLISECONDS)
+      Some(future)
+    } else {
+      None
+    }
+  }
+
+  private val scheduled = startEvictorThread()
+
+  /**
+   * Get a cached KafkaProducer for a given configuration. If matching KafkaProducer doesn't
+   * exist, a new KafkaProducer will be created. KafkaProducer is thread safe, it is best to keep
+   * one instance per specified kafkaParams.
+   */
+  private[producer] def acquire(kafkaParams: ju.Map[String, Object]): CachedKafkaProducer = {
+    val updatedKafkaProducerConfiguration =
+      KafkaConfigUpdater("executor", kafkaParams.asScala.toMap)
+        .setAuthenticationConfigIfNeeded()
+        .build()
+    val paramsSeq: Seq[(String, Object)] = paramsToSeq(updatedKafkaProducerConfiguration)
+    synchronized {
+      val entry = cache.getOrElseUpdate(paramsSeq, {
+        val producer = createKafkaProducer(paramsSeq)
+        val cachedProducer = new CachedKafkaProducer(paramsSeq, producer)
+        new CachedProducerEntry(cachedProducer,
+          TimeUnit.MILLISECONDS.toNanos(cacheExpireTimeoutMillis))
+      })
+      entry.handleBorrowed()
+      entry.producer
+    }
+  }
+
+  private[producer] def release(producer: CachedKafkaProducer): Unit = {
+    synchronized {
+      cache.get(producer.cacheKey) match {
+        case Some(entry) if entry.producer.id == producer.id =>
+          entry.handleReturned(clock.nanoTime())
+        case _ =>
+          logWarning(s"Released producer ${producer.id} is not a member of the cache. Closing.")
+          producer.close()
+      }
+    }
+  }
+
+  private[producer] def shutdown(): Unit = {
+    scheduled.foreach(_.cancel(false))
+    ThreadUtils.shutdown(executorService)
+  }
+
+  /** exposed for testing. */
+  private[producer] def reset(): Unit = synchronized {
+    cache.foreach { case (_, v) => v.producer.close() }
+    cache.clear()
+  }
+
+  /** exposed for testing */
+  private[producer] def getAsMap: Map[CacheKey, CachedProducerEntry] = cache.toMap
+
+  private def evictExpired(): Unit = {
+    val curTimeNs = clock.nanoTime()
+    val producers = new mutable.ArrayBuffer[CachedProducerEntry]()
+    synchronized {
+      cache.retain { case (_, v) =>
+        if (v.expired(curTimeNs)) {
+          producers += v
+          false
+        } else {
+          true
+        }
+      }
+    }
+    producers.foreach { _.producer.close() }
+  }
+
+  private def createKafkaProducer(paramsSeq: Seq[(String, Object)]): Producer = {
+    val kafkaProducer: Producer = new Producer(paramsSeq.toMap.asJava)
+    if (log.isDebugEnabled()) {
+      val redactedParamsSeq = KafkaRedactionUtil.redactParams(paramsSeq)
+      logDebug(s"Created a new instance of KafkaProducer for $redactedParamsSeq.")
+    }
+    kafkaProducer
+  }
+
+  private def paramsToSeq(kafkaParams: ju.Map[String, Object]): Seq[(String, Object)] = {
+    kafkaParams.asScala.toSeq.sortBy(x => x._1)
+  }
+}
+
+private[kafka010] object InternalKafkaProducerPool extends Logging {
+  private val pool = new InternalKafkaProducerPool(
+    Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf()))
+
+  private type CacheKey = Seq[(String, Object)]
+  private type Producer = KafkaProducer[Array[Byte], Array[Byte]]
+
+  ShutdownHookManager.addShutdownHook { () =>
+    try {
+      pool.shutdown()
+    } catch {
+      case e: Throwable =>
+        logWarning("Ignoring Exception while shutting down pools from shutdown hook", e)
+    }
+  }
+
+  /**
+   * This class is used as metadata of producer pool, and shouldn't be exposed to the public.
+   * This class assumes thread-safety is guaranteed by the caller.
+   */
+  private[producer] class CachedProducerEntry(
+      val producer: CachedKafkaProducer,
+      cacheExpireTimeoutNs: Long) {
+    private var _refCount: Long = 0L
+    private var _expireAt: Long = Long.MaxValue
+
+    /** exposed for testing */
+    private[producer] def refCount: Long = _refCount
+    private[producer] def expireAt: Long = _expireAt
+
+    def handleBorrowed(): Unit = {
+      _refCount += 1
+      _expireAt = Long.MaxValue
+    }
+
+    def handleReturned(curTimeNs: Long): Unit = {
+      require(_refCount > 0, "Reference count shouldn't become negative. Returning same producer " +
+        "multiple times would occur this bug. Check the logic around returning producer.")
+
+      _refCount -= 1
+      if (_refCount == 0) {
+        _expireAt = curTimeNs + cacheExpireTimeoutNs
+      }
+    }
+
+    def expired(curTimeNs: Long): Boolean = _refCount == 0 && _expireAt < curTimeNs
+  }
+
+  def acquire(kafkaParams: ju.Map[String, Object]): CachedKafkaProducer = {
+    pool.acquire(kafkaParams)
+  }
+
+  def release(producer: CachedKafkaProducer): Unit = {
+    pool.release(producer)
+  }
+
+  def reset(): Unit = pool.reset()
+}
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0 b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata
new file mode 100644
index 0000000000000..f1b5ab7aa17f0
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata
@@ -0,0 +1 @@
+{"id":"fc415a71-f0a2-4c3c-aeaf-f9e258c3f726"}
\ No newline at end of file
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0 b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0
new file mode 100644
index 0000000000000..5dbadea57acbe
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1568508285207,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+{"spark-test-topic-2b8619f5-d3c4-4c2d-b5d1-8d9d9458aa62":{"2":3,"4":3,"1":3,"3":3,"0":3}}
\ No newline at end of file
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0 b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0
new file mode 100644
index 0000000000000..8cf9f8e009ce8
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0 differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta
new file mode 100644
index 0000000000000..5815bbdcc2467
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta
new file mode 100644
index 0000000000000..e1a065b2b1c78
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta
new file mode 100644
index 0000000000000..cce14294e0044
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta
new file mode 100644
index 0000000000000..57063019503bc
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta
new file mode 100644
index 0000000000000..e8b1e4bdc8dba
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaProducerSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaProducerSuite.scala
deleted file mode 100644
index 35c1379de160b..0000000000000
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/CachedKafkaProducerSuite.scala
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010
-
-import java.{util => ju}
-import java.util.concurrent.ConcurrentMap
-
-import org.apache.kafka.clients.producer.KafkaProducer
-import org.apache.kafka.common.serialization.ByteArraySerializer
-import org.scalatest.PrivateMethodTester
-
-import org.apache.spark.sql.test.SharedSparkSession
-
-class CachedKafkaProducerSuite extends SharedSparkSession with PrivateMethodTester with KafkaTest {
-
-  type KP = KafkaProducer[Array[Byte], Array[Byte]]
-
-  protected override def beforeEach(): Unit = {
-    super.beforeEach()
-    CachedKafkaProducer.clear()
-  }
-
-  test("Should return the cached instance on calling getOrCreate with same params.") {
-    val kafkaParams = new ju.HashMap[String, Object]()
-    kafkaParams.put("acks", "0")
-    // Here only host should be resolvable, it does not need a running instance of kafka server.
-    kafkaParams.put("bootstrap.servers", "127.0.0.1:9022")
-    kafkaParams.put("key.serializer", classOf[ByteArraySerializer].getName)
-    kafkaParams.put("value.serializer", classOf[ByteArraySerializer].getName)
-    val producer = CachedKafkaProducer.getOrCreate(kafkaParams)
-    val producer2 = CachedKafkaProducer.getOrCreate(kafkaParams)
-    assert(producer == producer2)
-
-    val cacheMap = PrivateMethod[ConcurrentMap[Seq[(String, Object)], KP]]('getAsMap)
-    val map = CachedKafkaProducer.invokePrivate(cacheMap())
-    assert(map.size == 1)
-  }
-
-  test("Should close the correct kafka producer for the given kafkaPrams.") {
-    val kafkaParams = new ju.HashMap[String, Object]()
-    kafkaParams.put("acks", "0")
-    kafkaParams.put("bootstrap.servers", "127.0.0.1:9022")
-    kafkaParams.put("key.serializer", classOf[ByteArraySerializer].getName)
-    kafkaParams.put("value.serializer", classOf[ByteArraySerializer].getName)
-    val producer: KP = CachedKafkaProducer.getOrCreate(kafkaParams)
-    kafkaParams.put("acks", "1")
-    val producer2: KP = CachedKafkaProducer.getOrCreate(kafkaParams)
-    // With updated conf, a new producer instance should be created.
-    assert(producer != producer2)
-
-    val cacheMap = PrivateMethod[ConcurrentMap[Seq[(String, Object)], KP]]('getAsMap)
-    val map = CachedKafkaProducer.invokePrivate(cacheMap())
-    assert(map.size == 2)
-
-    CachedKafkaProducer.close(kafkaParams)
-    val map2 = CachedKafkaProducer.invokePrivate(cacheMap())
-    assert(map2.size == 1)
-    import scala.collection.JavaConverters._
-    val (seq: Seq[(String, Object)], _producer: KP) = map2.asScala.toArray.apply(0)
-    assert(_producer == producer)
-  }
-}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala
deleted file mode 100644
index 65adbd6b9887c..0000000000000
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSinkSuite.scala
+++ /dev/null
@@ -1,416 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010
-
-import java.util.Locale
-
-import org.apache.kafka.clients.producer.ProducerConfig
-import org.apache.kafka.common.serialization.ByteArraySerializer
-import org.scalatest.time.SpanSugar._
-
-import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, SpecificInternalRow, UnsafeProjection}
-import org.apache.spark.sql.streaming._
-import org.apache.spark.sql.types.{BinaryType, DataType}
-import org.apache.spark.util.Utils
-
-/**
- * This is a temporary port of KafkaSinkSuite, since we do not yet have a V2 memory stream.
- * Once we have one, this will be changed to a specialization of KafkaSinkSuite and we won't have
- * to duplicate all the code.
- */
-class KafkaContinuousSinkSuite extends KafkaContinuousTest {
-  import testImplicits._
-
-  override val streamingTimeout = 30.seconds
-
-  override val brokerProps = Map("auto.create.topics.enable" -> "false")
-
-  override def afterAll(): Unit = {
-    if (testUtils != null) {
-      testUtils.teardown()
-      testUtils = null
-    }
-    super.afterAll()
-  }
-
-  test("streaming - write to kafka with topic field") {
-    val inputTopic = newTopic()
-    testUtils.createTopic(inputTopic, partitions = 1)
-
-    val input = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("subscribe", inputTopic)
-      .option("startingOffsets", "earliest")
-      .load()
-
-    val topic = newTopic()
-    testUtils.createTopic(topic)
-
-    val writer = createKafkaWriter(
-      input.toDF(),
-      withTopic = None,
-      withOutputMode = Some(OutputMode.Append))(
-      withSelectExpr = s"'$topic' as topic", "value")
-
-    val reader = createKafkaReader(topic)
-      .selectExpr("CAST(key as STRING) key", "CAST(value as STRING) value")
-      .selectExpr("CAST(key as INT) key", "CAST(value as INT) value")
-      .as[(Option[Int], Int)]
-      .map(_._2)
-
-    try {
-      testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-      eventually(timeout(streamingTimeout)) {
-        checkDatasetUnorderly(reader, 1, 2, 3, 4, 5)
-      }
-      testUtils.sendMessages(inputTopic, Array("6", "7", "8", "9", "10"))
-      eventually(timeout(streamingTimeout)) {
-        checkDatasetUnorderly(reader, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
-      }
-    } finally {
-      writer.stop()
-    }
-  }
-
-  test("streaming - write w/o topic field, with topic option") {
-    val inputTopic = newTopic()
-    testUtils.createTopic(inputTopic, partitions = 1)
-
-    val input = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("subscribe", inputTopic)
-      .option("startingOffsets", "earliest")
-      .load()
-
-    val topic = newTopic()
-    testUtils.createTopic(topic)
-
-    val writer = createKafkaWriter(
-      input.toDF(),
-      withTopic = Some(topic),
-      withOutputMode = Some(OutputMode.Append()))()
-
-    val reader = createKafkaReader(topic)
-      .selectExpr("CAST(key as STRING) key", "CAST(value as STRING) value")
-      .selectExpr("CAST(key as INT) key", "CAST(value as INT) value")
-      .as[(Option[Int], Int)]
-      .map(_._2)
-
-    try {
-      testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-      eventually(timeout(streamingTimeout)) {
-        checkDatasetUnorderly(reader, 1, 2, 3, 4, 5)
-      }
-      testUtils.sendMessages(inputTopic, Array("6", "7", "8", "9", "10"))
-      eventually(timeout(streamingTimeout)) {
-        checkDatasetUnorderly(reader, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
-      }
-    } finally {
-      writer.stop()
-    }
-  }
-
-  test("streaming - topic field and topic option") {
-    /* The purpose of this test is to ensure that the topic option
-     * overrides the topic field. We begin by writing some data that
-     * includes a topic field and value (e.g., 'foo') along with a topic
-     * option. Then when we read from the topic specified in the option
-     * we should see the data i.e., the data was written to the topic
-     * option, and not to the topic in the data e.g., foo
-     */
-    val inputTopic = newTopic()
-    testUtils.createTopic(inputTopic, partitions = 1)
-
-    val input = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("subscribe", inputTopic)
-      .option("startingOffsets", "earliest")
-      .load()
-
-    val topic = newTopic()
-    testUtils.createTopic(topic)
-
-    val writer = createKafkaWriter(
-      input.toDF(),
-      withTopic = Some(topic),
-      withOutputMode = Some(OutputMode.Append()))(
-      withSelectExpr = "'foo' as topic", "CAST(value as STRING) value")
-
-    val reader = createKafkaReader(topic)
-      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-      .selectExpr("CAST(key AS INT)", "CAST(value AS INT)")
-      .as[(Option[Int], Int)]
-      .map(_._2)
-
-    try {
-      testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-      eventually(timeout(streamingTimeout)) {
-        checkDatasetUnorderly(reader, 1, 2, 3, 4, 5)
-      }
-      testUtils.sendMessages(inputTopic, Array("6", "7", "8", "9", "10"))
-      eventually(timeout(streamingTimeout)) {
-        checkDatasetUnorderly(reader, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
-      }
-    } finally {
-      writer.stop()
-    }
-  }
-
-  test("null topic attribute") {
-    val inputTopic = newTopic()
-    testUtils.createTopic(inputTopic, partitions = 1)
-
-    val input = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("subscribe", inputTopic)
-      .option("startingOffsets", "earliest")
-      .load()
-    val topic = newTopic()
-    testUtils.createTopic(topic)
-
-    /* No topic field or topic option */
-    var writer: StreamingQuery = null
-    var ex: Exception = null
-    try {
-      writer = createKafkaWriter(input.toDF())(
-        withSelectExpr = "CAST(null as STRING) as topic", "value"
-      )
-      testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-      eventually(timeout(streamingTimeout)) {
-        assert(writer.exception.isDefined)
-        ex = writer.exception.get
-      }
-    } finally {
-      writer.stop()
-    }
-    assert(ex.getCause.getCause.getMessage
-      .toLowerCase(Locale.ROOT)
-      .contains("null topic present in the data."))
-  }
-
-  test("streaming - write data with bad schema") {
-    val inputTopic = newTopic()
-    testUtils.createTopic(inputTopic, partitions = 1)
-
-    val input = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("subscribe", inputTopic)
-      .option("startingOffsets", "earliest")
-      .load()
-    val topic = newTopic()
-    testUtils.createTopic(topic)
-
-    val ex = intercept[AnalysisException] {
-      /* No topic field or topic option */
-      createKafkaWriter(input.toDF())(
-        withSelectExpr = "value as key", "value"
-      )
-    }
-    assert(ex.getMessage
-      .toLowerCase(Locale.ROOT)
-      .contains("topic option required when no 'topic' attribute is present"))
-
-    val ex2 = intercept[AnalysisException] {
-      /* No value field */
-      createKafkaWriter(input.toDF())(
-        withSelectExpr = s"'$topic' as topic", "value as key"
-      )
-    }
-    assert(ex2.getMessage.toLowerCase(Locale.ROOT).contains(
-      "required attribute 'value' not found"))
-  }
-
-  test("streaming - write data with valid schema but wrong types") {
-    val inputTopic = newTopic()
-    testUtils.createTopic(inputTopic, partitions = 1)
-
-    val input = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("subscribe", inputTopic)
-      .option("startingOffsets", "earliest")
-      .load()
-      .selectExpr("CAST(value as STRING) value")
-    val topic = newTopic()
-    testUtils.createTopic(topic)
-
-    val ex = intercept[AnalysisException] {
-      /* topic field wrong type */
-      createKafkaWriter(input.toDF())(
-        withSelectExpr = s"CAST('1' as INT) as topic", "value"
-      )
-    }
-    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("topic type must be a string"))
-
-    val ex2 = intercept[AnalysisException] {
-      /* value field wrong type */
-      createKafkaWriter(input.toDF())(
-        withSelectExpr = s"'$topic' as topic", "CAST(value as INT) as value"
-      )
-    }
-    assert(ex2.getMessage.toLowerCase(Locale.ROOT).contains(
-      "value attribute type must be a string or binary"))
-
-    val ex3 = intercept[AnalysisException] {
-      /* key field wrong type */
-      createKafkaWriter(input.toDF())(
-        withSelectExpr = s"'$topic' as topic", "CAST(value as INT) as key", "value"
-      )
-    }
-    assert(ex3.getMessage.toLowerCase(Locale.ROOT).contains(
-      "key attribute type must be a string or binary"))
-  }
-
-  test("streaming - write to non-existing topic") {
-    val inputTopic = newTopic()
-    testUtils.createTopic(inputTopic, partitions = 1)
-
-    val input = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("subscribe", inputTopic)
-      .option("startingOffsets", "earliest")
-      .load()
-    val topic = newTopic()
-
-    var writer: StreamingQuery = null
-    var ex: Exception = null
-    try {
-      ex = intercept[StreamingQueryException] {
-        writer = createKafkaWriter(input.toDF(), withTopic = Some(topic))()
-        testUtils.sendMessages(inputTopic, Array("1", "2", "3", "4", "5"))
-        eventually(timeout(streamingTimeout)) {
-          assert(writer.exception.isDefined)
-        }
-        throw writer.exception.get
-      }
-    } finally {
-      writer.stop()
-    }
-    assert(ex.getCause.getCause.getMessage.toLowerCase(Locale.ROOT).contains("job aborted"))
-  }
-
-  test("streaming - exception on config serializer") {
-    val inputTopic = newTopic()
-    testUtils.createTopic(inputTopic, partitions = 1)
-    testUtils.sendMessages(inputTopic, Array("0"))
-
-    val input = spark
-      .readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("subscribe", inputTopic)
-      .load()
-
-    val ex = intercept[IllegalArgumentException] {
-      createKafkaWriter(
-        input.toDF(),
-        withOptions = Map("kafka.key.serializer" -> "foo"))()
-    }
-    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
-      "kafka option 'key.serializer' is not supported"))
-
-    val ex2 = intercept[IllegalArgumentException] {
-      createKafkaWriter(
-        input.toDF(),
-        withOptions = Map("kafka.value.serializer" -> "foo"))()
-    }
-    assert(ex2.getMessage.toLowerCase(Locale.ROOT).contains(
-      "kafka option 'value.serializer' is not supported"))
-  }
-
-  test("generic - write big data with small producer buffer") {
-    /* This test ensures that we understand the semantics of Kafka when
-    * is comes to blocking on a call to send when the send buffer is full.
-    * This test will configure the smallest possible producer buffer and
-    * indicate that we should block when it is full. Thus, no exception should
-    * be thrown in the case of a full buffer.
-    */
-    val topic = newTopic()
-    testUtils.createTopic(topic, 1)
-    val options = new java.util.HashMap[String, Object]
-    options.put("bootstrap.servers", testUtils.brokerAddress)
-    options.put("buffer.memory", "16384") // min buffer size
-    options.put("block.on.buffer.full", "true")
-    options.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer].getName)
-    options.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer].getName)
-    val inputSchema = Seq(AttributeReference("value", BinaryType)())
-    val data = new Array[Byte](15000) // large value
-    val writeTask = new KafkaDataWriter(Some(topic), options, inputSchema)
-    try {
-      val fieldTypes: Array[DataType] = Array(BinaryType)
-      val converter = UnsafeProjection.create(fieldTypes)
-      val row = new SpecificInternalRow(fieldTypes)
-      row.update(0, data)
-      val iter = Seq.fill(1000)(converter.apply(row)).iterator
-      iter.foreach(writeTask.write(_))
-      writeTask.commit()
-    } finally {
-      writeTask.close()
-    }
-  }
-
-  private def createKafkaReader(topic: String): DataFrame = {
-    spark.read
-      .format("kafka")
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("startingOffsets", "earliest")
-      .option("endingOffsets", "latest")
-      .option("subscribe", topic)
-      .load()
-  }
-
-  private def createKafkaWriter(
-      input: DataFrame,
-      withTopic: Option[String] = None,
-      withOutputMode: Option[OutputMode] = None,
-      withOptions: Map[String, String] = Map[String, String]())
-      (withSelectExpr: String*): StreamingQuery = {
-    var stream: DataStreamWriter[Row] = null
-    val checkpointDir = Utils.createTempDir()
-    var df = input.toDF()
-    if (withSelectExpr.length > 0) {
-      df = df.selectExpr(withSelectExpr: _*)
-    }
-    stream = df.writeStream
-      .format("kafka")
-      .option("checkpointLocation", checkpointDir.getCanonicalPath)
-      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      // We need to reduce blocking time to efficiently test non-existent partition behavior.
-      .option("kafka.max.block.ms", "1000")
-      .trigger(Trigger.Continuous(1000))
-      .queryName("kafkaStream")
-    withTopic.foreach(stream.option("topic", _))
-    withOutputMode.foreach(stream.outputMode(_))
-    withOptions.foreach(opt => stream.option(opt._1, opt._2))
-    stream.start()
-  }
-}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
index 9850a91f34f63..306483825ae3b 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
@@ -82,7 +82,6 @@ class KafkaDelegationTokenSuite extends StreamTest with SharedSparkSession with
         .format("kafka")
         .option("checkpointLocation", checkpointDir.getCanonicalPath)
         .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-        .option("kafka.security.protocol", SASL_PLAINTEXT.name)
         .option("topic", topic)
         .start()
 
@@ -99,7 +98,6 @@ class KafkaDelegationTokenSuite extends StreamTest with SharedSparkSession with
     val streamingDf = spark.readStream
       .format("kafka")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("kafka.security.protocol", SASL_PLAINTEXT.name)
       .option("startingOffsets", s"earliest")
       .option("subscribe", topic)
       .load()
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index ae8a6886b2b4d..a4601b91af0d6 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -28,13 +28,15 @@ import scala.collection.JavaConverters._
 import scala.io.Source
 import scala.util.Random
 
+import org.apache.commons.io.FileUtils
 import org.apache.kafka.clients.producer.{ProducerRecord, RecordMetadata}
 import org.apache.kafka.common.TopicPartition
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.sql.{Dataset, ForeachWriter, SparkSession}
+import org.apache.spark.sql.{Dataset, ForeachWriter, Row, SparkSession}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.streaming._
@@ -42,11 +44,11 @@ import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 import org.apache.spark.sql.streaming.{StreamTest, Trigger}
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
 
 abstract class KafkaSourceTest extends StreamTest with SharedSparkSession with KafkaTest {
 
@@ -295,6 +297,28 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
         13, 126, 127, 128, 129, 130, 131, 132, 133, 134
       )
     )
+
+    // When Trigger.Once() is used, the read limit should be ignored
+    val allData = Seq(1) ++ (10 to 20) ++ (100 to 200)
+    withTempDir { dir =>
+      testStream(mapped)(
+        StartStream(Trigger.Once(), checkpointLocation = dir.getCanonicalPath),
+        AssertOnQuery { q =>
+          q.processAllAvailable()
+          true
+        },
+        CheckAnswer(allData: _*),
+        StopStream,
+
+        AddKafkaData(Set(topic), 1000 to 1010: _*),
+        StartStream(Trigger.Once(), checkpointLocation = dir.getCanonicalPath),
+        AssertOnQuery { q =>
+          q.processAllAvailable()
+          true
+        },
+        CheckAnswer((allData ++ 1000.to(1010)): _*)
+      )
+    }
   }
 
   test("input row metrics") {
@@ -677,7 +701,8 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     })
   }
 
-  private def testGroupId(groupIdKey: String, validateGroupId: (String, Iterable[String]) => Unit) {
+  private def testGroupId(groupIdKey: String,
+      validateGroupId: (String, Iterable[String]) => Unit): Unit = {
     // Tests code path KafkaSourceProvider.{sourceSchema(.), createSource(.)}
     // as well as KafkaOffsetReader.createConsumer(.)
     val topic = newTopic()
@@ -1060,6 +1085,35 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
   test("SPARK-27494: read kafka record containing null key/values.") {
     testNullableKeyValue(Trigger.ProcessingTime(100))
   }
+
+  test("SPARK-30656: minPartitions") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    testUtils.sendMessages(topic, (0 to 9).map(_.toString).toArray, Some(0))
+    testUtils.sendMessages(topic, (10 to 19).map(_.toString).toArray, Some(1))
+    testUtils.sendMessages(topic, Array("20"), Some(2))
+
+    val ds = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("subscribe", topic)
+      .option("startingOffsets", "earliest")
+      .option("minPartitions", "6")
+      .load()
+      .select($"value".as[String])
+    val q = ds.writeStream.foreachBatch { (batch: Dataset[String], _: Long) =>
+      val partitions = batch.rdd.collectPartitions()
+      assert(partitions.length >= 6)
+      assert(partitions.flatten.toSet === (0 to 20).map(_.toString).toSet): Unit
+    }.start()
+    try {
+      q.processAllAvailable()
+    } finally {
+      q.stop()
+    }
+  }
 }
 
 
@@ -1162,6 +1216,63 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
     intercept[IllegalArgumentException] { test(minPartitions = "-1", 1, true) }
   }
 
+  test("default config of includeHeader doesn't break existing query from Spark 2.4") {
+    import testImplicits._
+
+    // This topic name is migrated from Spark 2.4.3 test run
+    val topic = "spark-test-topic-2b8619f5-d3c4-4c2d-b5d1-8d9d9458aa62"
+    // create same topic and messages as test run
+    testUtils.createTopic(topic, partitions = 5, overwrite = true)
+    testUtils.sendMessages(topic, Array(-20, -21, -22).map(_.toString), Some(0))
+    testUtils.sendMessages(topic, Array(-10, -11, -12).map(_.toString), Some(1))
+    testUtils.sendMessages(topic, Array(0, 1, 2).map(_.toString), Some(2))
+    testUtils.sendMessages(topic, Array(10, 11, 12).map(_.toString), Some(3))
+    testUtils.sendMessages(topic, Array(20, 21, 22).map(_.toString), Some(4))
+    require(testUtils.getLatestOffsets(Set(topic)).size === 5)
+
+    val headers = Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8)))
+    (31 to 35).map { num =>
+      new RecordBuilder(topic, num.toString).partition(num - 31).headers(headers).build()
+    }.foreach { rec => testUtils.sendMessage(rec) }
+
+    val kafka = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("subscribePattern", topic)
+      .option("startingOffsets", "earliest")
+      .load()
+
+    val query = kafka.dropDuplicates()
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .as[(String, String)]
+      .map(kv => kv._2.toInt + 1)
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/").toURI
+
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    testStream(query)(
+      StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+      /*
+        Note: The checkpoint was generated using the following input in Spark version 2.4.3
+        testUtils.createTopic(topic, partitions = 5, overwrite = true)
+
+        testUtils.sendMessages(topic, Array(-20, -21, -22).map(_.toString), Some(0))
+        testUtils.sendMessages(topic, Array(-10, -11, -12).map(_.toString), Some(1))
+        testUtils.sendMessages(topic, Array(0, 1, 2).map(_.toString), Some(2))
+        testUtils.sendMessages(topic, Array(10, 11, 12).map(_.toString), Some(3))
+        testUtils.sendMessages(topic, Array(20, 21, 22).map(_.toString), Some(4))
+        */
+      makeSureGetOffsetCalled,
+      CheckNewAnswer(32, 33, 34, 35, 36)
+    )
+  }
 }
 
 abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
@@ -1219,6 +1330,16 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
         "failOnDataLoss" -> failOnDataLoss.toString)
     }
 
+    test(s"assign from specific timestamps (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromSpecificTimestamps(
+        topic,
+        failOnDataLoss = failOnDataLoss,
+        addPartitions = false,
+        "assign" -> assignString(topic, 0 to 4),
+        "failOnDataLoss" -> failOnDataLoss.toString)
+    }
+
     test(s"subscribing topic by name from latest offsets (failOnDataLoss: $failOnDataLoss)") {
       val topic = newTopic()
       testFromLatestOffsets(
@@ -1242,6 +1363,12 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       testFromSpecificOffsets(topic, failOnDataLoss = failOnDataLoss, "subscribe" -> topic)
     }
 
+    test(s"subscribing topic by name from specific timestamps (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromSpecificTimestamps(topic, failOnDataLoss = failOnDataLoss, addPartitions = true,
+        "subscribe" -> topic)
+    }
+
     test(s"subscribing topic by pattern from latest offsets (failOnDataLoss: $failOnDataLoss)") {
       val topicPrefix = newTopic()
       val topic = topicPrefix + "-suffix"
@@ -1270,6 +1397,17 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
         failOnDataLoss = failOnDataLoss,
         "subscribePattern" -> s"$topicPrefix-.*")
     }
+
+    test(s"subscribing topic by pattern from specific timestamps " +
+      s"(failOnDataLoss: $failOnDataLoss)") {
+      val topicPrefix = newTopic()
+      val topic = topicPrefix + "-suffix"
+      testFromSpecificTimestamps(
+        topic,
+        failOnDataLoss = failOnDataLoss,
+        addPartitions = true,
+        "subscribePattern" -> s"$topicPrefix-.*")
+    }
   }
 
   test("bad source options") {
@@ -1289,6 +1427,9 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
     // Specifying an ending offset
     testBadOptions("endingOffsets" -> "latest")("Ending offset not valid in streaming queries")
 
+    testBadOptions("subscribe" -> "t", "endingOffsetsByTimestamp" -> "{\"t\": {\"0\": 1000}}")(
+      "Ending timestamp not valid in streaming queries")
+
     // No strategy specified
     testBadOptions()("options must be specified", "subscribe", "subscribePattern")
 
@@ -1337,7 +1478,8 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       (STARTING_OFFSETS_OPTION_KEY, """{"topic-A":{"0":23}}""",
         SpecificOffsetRangeLimit(Map(new TopicPartition("topic-A", 0) -> 23))))) {
       val offset = getKafkaOffsetRangeLimit(
-        CaseInsensitiveMap[String](Map(optionKey -> optionValue)), optionKey, answer)
+        CaseInsensitiveMap[String](Map(optionKey -> optionValue)), "dummy", optionKey,
+        answer)
       assert(offset === answer)
     }
 
@@ -1345,7 +1487,7 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       (STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit),
       (ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit))) {
       val offset = getKafkaOffsetRangeLimit(
-        CaseInsensitiveMap[String](Map.empty), optionKey, answer)
+        CaseInsensitiveMap[String](Map.empty), "dummy", optionKey, answer)
       assert(offset === answer)
     }
   }
@@ -1410,11 +1552,90 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
     )
   }
 
+  private def testFromSpecificTimestamps(
+      topic: String,
+      failOnDataLoss: Boolean,
+      addPartitions: Boolean,
+      options: (String, String)*): Unit = {
+    def sendMessages(topic: String, msgs: Seq[String], part: Int, ts: Long): Unit = {
+      val records = msgs.map { msg =>
+        new RecordBuilder(topic, msg).partition(part).timestamp(ts).build()
+      }
+      testUtils.sendMessages(records)
+    }
+
+    testUtils.createTopic(topic, partitions = 5)
+
+    val firstTimestamp = System.currentTimeMillis() - 5000
+    sendMessages(topic, Array(-20).map(_.toString), 0, firstTimestamp)
+    sendMessages(topic, Array(-10).map(_.toString), 1, firstTimestamp)
+    sendMessages(topic, Array(0, 1).map(_.toString), 2, firstTimestamp)
+    sendMessages(topic, Array(10, 11).map(_.toString), 3, firstTimestamp)
+    sendMessages(topic, Array(20, 21, 22).map(_.toString), 4, firstTimestamp)
+
+    val secondTimestamp = firstTimestamp + 1000
+    sendMessages(topic, Array(-21, -22).map(_.toString), 0, secondTimestamp)
+    sendMessages(topic, Array(-11, -12).map(_.toString), 1, secondTimestamp)
+    sendMessages(topic, Array(2).map(_.toString), 2, secondTimestamp)
+    sendMessages(topic, Array(12).map(_.toString), 3, secondTimestamp)
+    // no data after second timestamp for partition 4
+
+    require(testUtils.getLatestOffsets(Set(topic)).size === 5)
+
+    // we intentionally starts from second timestamp,
+    // except for partition 4 - it starts from first timestamp
+    val startPartitionTimestamps: Map[TopicPartition, Long] = Map(
+      (0 to 3).map(new TopicPartition(topic, _) -> secondTimestamp): _*
+    ) ++ Map(new TopicPartition(topic, 4) -> firstTimestamp)
+    val startingTimestamps = JsonUtils.partitionTimestamps(startPartitionTimestamps)
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("startingOffsetsByTimestamp", startingTimestamps)
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("failOnDataLoss", failOnDataLoss.toString)
+    options.foreach { case (k, v) => reader.option(k, v) }
+    val kafka = reader.load()
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .as[(String, String)]
+    val mapped: org.apache.spark.sql.Dataset[_] = kafka.map(kv => kv._2.toInt)
+
+    testStream(mapped)(
+      makeSureGetOffsetCalled,
+      Execute { q =>
+        val partitions = (0 to 4).map(new TopicPartition(topic, _))
+        // wait to reach the last offset in every partition
+        q.awaitOffset(
+          0, KafkaSourceOffset(partitions.map(tp => tp -> 3L).toMap), streamingTimeout.toMillis)
+      },
+      CheckAnswer(-21, -22, -11, -12, 2, 12, 20, 21, 22),
+      StopStream,
+      StartStream(),
+      CheckAnswer(-21, -22, -11, -12, 2, 12, 20, 21, 22), // Should get the data back on recovery
+      StopStream,
+      AddKafkaData(Set(topic), 30, 31, 32), // Add data when stream is stopped
+      StartStream(),
+      CheckAnswer(-21, -22, -11, -12, 2, 12, 20, 21, 22, 30, 31, 32), // Should get the added data
+      AssertOnQuery("Add partitions") { query: StreamExecution =>
+        if (addPartitions) setTopicPartitions(topic, 10, query)
+        true
+      },
+      AddKafkaData(Set(topic), 40, 41, 42, 43, 44)(ensureDataInMultiplePartition = true),
+      CheckAnswer(-21, -22, -11, -12, 2, 12, 20, 21, 22, 30, 31, 32, 40, 41, 42, 43, 44),
+      StopStream
+    )
+  }
+
   test("Kafka column types") {
     val now = System.currentTimeMillis()
     val topic = newTopic()
     testUtils.createTopic(newTopic(), partitions = 1)
-    testUtils.sendMessages(topic, Array(1).map(_.toString))
+    testUtils.sendMessage(
+      new RecordBuilder(topic, "1")
+        .headers(Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8)))).build()
+    )
 
     val kafka = spark
       .readStream
@@ -1423,6 +1644,7 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       .option("kafka.metadata.max.age.ms", "1")
       .option("startingOffsets", s"earliest")
       .option("subscribe", topic)
+      .option("includeHeaders", "true")
       .load()
 
     val query = kafka
@@ -1445,6 +1667,21 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
     // producer. So here we just use a low bound to make sure the internal conversion works.
     assert(row.getAs[java.sql.Timestamp]("timestamp").getTime >= now, s"Unexpected results: $row")
     assert(row.getAs[Int]("timestampType") === 0, s"Unexpected results: $row")
+
+    def checkHeader(row: Row, expected: Seq[(String, Array[Byte])]): Unit = {
+      // array<struct<key:string,value:binary>>
+      val headers = row.getList[Row](row.fieldIndex("headers")).asScala
+      assert(headers.length === expected.length)
+
+      (0 until expected.length).foreach { idx =>
+        val key = headers(idx).getAs[String]("key")
+        val value = headers(idx).getAs[Array[Byte]]("value")
+        assert(key === expected(idx)._1)
+        assert(value === expected(idx)._2)
+      }
+    }
+
+    checkHeader(row, Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8))))
     query.stop()
   }
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
index 2374a817422fa..5d010cd553521 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
@@ -34,31 +34,16 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
     }
   }
 
-
   test("with no minPartition: N TopicPartitions to N offset ranges") {
     val calc = KafkaOffsetRangeCalculator(CaseInsensitiveStringMap.empty())
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 1),
-        untilOffsets = Map(tp1 -> 2)) ==
-      Seq(KafkaOffsetRange(tp1, 1, 2, None)))
-
-    assert(
-      calc.getRanges(
-        fromOffsets = Map(tp1 -> 1),
-        untilOffsets = Map(tp1 -> 2, tp2 -> 1), Seq.empty) ==
+        Seq(KafkaOffsetRange(tp1, 1, 2))) ==
       Seq(KafkaOffsetRange(tp1, 1, 2, None)))
 
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 1, tp2 -> 1),
-        untilOffsets = Map(tp1 -> 2)) ==
-      Seq(KafkaOffsetRange(tp1, 1, 2, None)))
-
-    assert(
-      calc.getRanges(
-        fromOffsets = Map(tp1 -> 1, tp2 -> 1),
-        untilOffsets = Map(tp1 -> 2),
+        Seq(KafkaOffsetRange(tp1, 1, 2)),
         executorLocations = Seq("location")) ==
       Seq(KafkaOffsetRange(tp1, 1, 2, Some("location"))))
   }
@@ -67,16 +52,19 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
     val calc = KafkaOffsetRangeCalculator(CaseInsensitiveStringMap.empty())
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 1, tp2 -> 1),
-        untilOffsets = Map(tp1 -> 2, tp2 -> 1)) ==
+        Seq(
+          KafkaOffsetRange(tp1, 1, 2),
+          KafkaOffsetRange(tp2, 1, 1))) ===
       Seq(KafkaOffsetRange(tp1, 1, 2, None)))
   }
 
   testWithMinPartitions("N TopicPartitions to N offset ranges", 3) { calc =>
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 1, tp2 -> 1, tp3 -> 1),
-        untilOffsets = Map(tp1 -> 2, tp2 -> 2, tp3 -> 2)) ==
+        Seq(
+          KafkaOffsetRange(tp1, 1, 2),
+          KafkaOffsetRange(tp2, 1, 2),
+          KafkaOffsetRange(tp3, 1, 2))) ===
       Seq(
         KafkaOffsetRange(tp1, 1, 2, None),
         KafkaOffsetRange(tp2, 1, 2, None),
@@ -86,18 +74,16 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
   testWithMinPartitions("1 TopicPartition to N offset ranges", 4) { calc =>
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 1),
-        untilOffsets = Map(tp1 -> 5)) ==
-      Seq(
-        KafkaOffsetRange(tp1, 1, 2, None),
-        KafkaOffsetRange(tp1, 2, 3, None),
-        KafkaOffsetRange(tp1, 3, 4, None),
-        KafkaOffsetRange(tp1, 4, 5, None)))
+        Seq(KafkaOffsetRange(tp1, 1, 5))) ==
+        Seq(
+          KafkaOffsetRange(tp1, 1, 2, None),
+          KafkaOffsetRange(tp1, 2, 3, None),
+          KafkaOffsetRange(tp1, 3, 4, None),
+          KafkaOffsetRange(tp1, 4, 5, None)))
 
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 1),
-        untilOffsets = Map(tp1 -> 5),
+        Seq(KafkaOffsetRange(tp1, 1, 5)),
         executorLocations = Seq("location")) ==
         Seq(
           KafkaOffsetRange(tp1, 1, 2, None),
@@ -109,8 +95,9 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
   testWithMinPartitions("N skewed TopicPartitions to M offset ranges", 3) { calc =>
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 1, tp2 -> 1),
-        untilOffsets = Map(tp1 -> 5, tp2 -> 21)) ==
+        Seq(
+          KafkaOffsetRange(tp1, 1, 5),
+          KafkaOffsetRange(tp2, 1, 21))) ===
         Seq(
           KafkaOffsetRange(tp1, 1, 5, None),
           KafkaOffsetRange(tp2, 1, 7, None),
@@ -118,11 +105,51 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
           KafkaOffsetRange(tp2, 14, 21, None)))
   }
 
+  testWithMinPartitions("SPARK-30656: ignore empty ranges and split the rest", 4) { calc =>
+    assert(
+      calc.getRanges(
+        Seq(
+          KafkaOffsetRange(tp1, 1, 1),
+          KafkaOffsetRange(tp2, 1, 21))) ===
+        Seq(
+          KafkaOffsetRange(tp2, 1, 6, None),
+          KafkaOffsetRange(tp2, 6, 11, None),
+          KafkaOffsetRange(tp2, 11, 16, None),
+          KafkaOffsetRange(tp2, 16, 21, None)))
+  }
+
+  testWithMinPartitions(
+      "SPARK-30656: N very skewed TopicPartitions to M offset ranges",
+      3) { calc =>
+    assert(
+      calc.getRanges(
+        Seq(
+          KafkaOffsetRange(tp1, 1, 2),
+          KafkaOffsetRange(tp2, 1, 1001))) ===
+        Seq(
+          KafkaOffsetRange(tp1, 1, 2, None),
+          KafkaOffsetRange(tp2, 1, 334, None),
+          KafkaOffsetRange(tp2, 334, 667, None),
+          KafkaOffsetRange(tp2, 667, 1001, None)))
+  }
+
+  testWithMinPartitions(
+      "SPARK-30656: minPartitions less than the length of topic partitions",
+      1) { calc =>
+    assert(
+      calc.getRanges(
+        Seq(
+          KafkaOffsetRange(tp1, 1, 5),
+          KafkaOffsetRange(tp2, 1, 21))) ===
+        Seq(
+          KafkaOffsetRange(tp1, 1, 5, None),
+          KafkaOffsetRange(tp2, 1, 21, None)))
+  }
+
   testWithMinPartitions("range inexact multiple of minPartitions", 3) { calc =>
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 1),
-        untilOffsets = Map(tp1 -> 11)) ==
+        Seq(KafkaOffsetRange(tp1, 1, 11))) ==
         Seq(
           KafkaOffsetRange(tp1, 1, 4, None),
           KafkaOffsetRange(tp1, 4, 7, None),
@@ -132,8 +159,10 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
   testWithMinPartitions("empty ranges ignored", 3) { calc =>
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 1, tp2 -> 1, tp3 -> 1),
-        untilOffsets = Map(tp1 -> 5, tp2 -> 21, tp3 -> 1)) ==
+        Seq(
+          KafkaOffsetRange(tp1, 1, 5),
+          KafkaOffsetRange(tp2, 1, 21),
+          KafkaOffsetRange(tp3, 1, 1))) ==
         Seq(
           KafkaOffsetRange(tp1, 1, 5, None),
           KafkaOffsetRange(tp2, 1, 7, None),
@@ -144,8 +173,10 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
   testWithMinPartitions("SPARK-28489: never drop offsets", 6) { calc =>
     assert(
       calc.getRanges(
-        fromOffsets = Map(tp1 -> 0, tp2 -> 0, tp3 -> 0),
-        untilOffsets = Map(tp1 -> 10, tp2 -> 10, tp3 -> 1)) ==
+        Seq(
+          KafkaOffsetRange(tp1, 0, 10),
+          KafkaOffsetRange(tp2, 0, 10),
+          KafkaOffsetRange(tp3, 0, 1))) ==
         Seq(
           KafkaOffsetRange(tp1, 0, 3, None),
           KafkaOffsetRange(tp1, 3, 6, None),
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
new file mode 100644
index 0000000000000..ad22a56d9157f
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.util.UUID
+import java.util.concurrent.atomic.AtomicInteger
+
+import org.apache.kafka.common.TopicPartition
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.kafka010.KafkaOffsetRangeLimit.{EARLIEST, LATEST}
+import org.apache.spark.sql.test.SharedSparkSession
+
+class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with KafkaTest {
+
+  protected var testUtils: KafkaTestUtils = _
+
+  private val topicId = new AtomicInteger(0)
+
+  private def newTopic(): String = s"topic-${topicId.getAndIncrement()}"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    testUtils = new KafkaTestUtils
+    testUtils.setup()
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      if (testUtils != null) {
+        testUtils.teardown()
+        testUtils = null
+      }
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  private def createKafkaReader(topic: String, minPartitions: Option[Int]): KafkaOffsetReader = {
+    new KafkaOffsetReader(
+      SubscribeStrategy(Seq(topic)),
+      org.apache.spark.sql.kafka010.KafkaSourceProvider.kafkaParamsForDriver(
+        Map(
+        "bootstrap.servers" ->
+         testUtils.brokerAddress
+      )),
+      CaseInsensitiveMap(
+        minPartitions.map(m => Map("minPartitions" -> m.toString)).getOrElse(Map.empty)),
+      UUID.randomUUID().toString
+    )
+  }
+
+  test("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - using specific offsets") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 1)
+    testUtils.sendMessages(topic, (0 until 10).map(_.toString).toArray, Some(0))
+    val tp = new TopicPartition(topic, 0)
+    val reader = createKafkaReader(topic, minPartitions = Some(3))
+    val startingOffsets = SpecificOffsetRangeLimit(Map(tp -> 1))
+    val endingOffsets = SpecificOffsetRangeLimit(Map(tp -> 4))
+    val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets)
+    assert(offsetRanges === Seq(
+      KafkaOffsetRange(tp, 1, 2, None),
+      KafkaOffsetRange(tp, 2, 3, None),
+      KafkaOffsetRange(tp, 3, 4, None)))
+  }
+
+  test("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - using special offsets") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 1)
+    testUtils.sendMessages(topic, (0 until 4).map(_.toString).toArray, Some(0))
+    val tp = new TopicPartition(topic, 0)
+    val reader = createKafkaReader(topic, minPartitions = Some(3))
+    val startingOffsets = EarliestOffsetRangeLimit
+    val endingOffsets = LatestOffsetRangeLimit
+    val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets)
+    assert(offsetRanges === Seq(
+      KafkaOffsetRange(tp, EARLIEST, 1, None),
+      KafkaOffsetRange(tp, 1, 2, None),
+      KafkaOffsetRange(tp, 2, LATEST, None)))
+  }
+
+  test("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - multiple topic partitions") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 2)
+    testUtils.sendMessages(topic, (0 until 100).map(_.toString).toArray, Some(0))
+    testUtils.sendMessages(topic, (0 until 4).map(_.toString).toArray, Some(1))
+    val tp1 = new TopicPartition(topic, 0)
+    val tp2 = new TopicPartition(topic, 1)
+    val reader = createKafkaReader(topic, minPartitions = Some(3))
+
+    val startingOffsets = SpecificOffsetRangeLimit(Map(tp1 -> EARLIEST, tp2 -> EARLIEST))
+    val endingOffsets = SpecificOffsetRangeLimit(Map(tp1 -> LATEST, tp2 -> 3))
+    val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets)
+    assert(offsetRanges === Seq(
+      KafkaOffsetRange(tp2, EARLIEST, 3, None),
+      KafkaOffsetRange(tp1, EARLIEST, 33, None),
+      KafkaOffsetRange(tp1, 33, 66, None),
+      KafkaOffsetRange(tp1, 66, LATEST, None)))
+  }
+
+  test("SPARK-30656: getOffsetRangesFromResolvedOffsets") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 2)
+    testUtils.sendMessages(topic, (0 until 100).map(_.toString).toArray, Some(0))
+    testUtils.sendMessages(topic, (0 until 4).map(_.toString).toArray, Some(1))
+    val tp1 = new TopicPartition(topic, 0)
+    val tp2 = new TopicPartition(topic, 1)
+    val reader = createKafkaReader(topic, minPartitions = Some(3))
+
+    val fromPartitionOffsets = Map(tp1 -> 0L, tp2 -> 0L)
+    val untilPartitionOffsets = Map(tp1 -> 100L, tp2 -> 3L)
+    val offsetRanges = reader.getOffsetRangesFromResolvedOffsets(
+      fromPartitionOffsets,
+      untilPartitionOffsets,
+      _ => {})
+    assert(offsetRanges === Seq(
+      KafkaOffsetRange(tp1, 0, 33, None),
+      KafkaOffsetRange(tp1, 33, 66, None),
+      KafkaOffsetRange(tp1, 66, 100, None),
+      KafkaOffsetRange(tp2, 0, 3, None)))
+  }
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
index b4e1b78c7db4e..32d056140a0d7 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.kafka010
 
+import java.nio.charset.StandardCharsets.UTF_8
 import java.util.Locale
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.annotation.tailrec
 import scala.collection.JavaConverters._
 import scala.util.Random
 
@@ -27,11 +29,11 @@ import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{DataFrameReader, QueryTest}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.Utils
 
@@ -70,7 +72,8 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
   protected def createDF(
       topic: String,
       withOptions: Map[String, String] = Map.empty[String, String],
-      brokerAddress: Option[String] = None) = {
+      brokerAddress: Option[String] = None,
+      includeHeaders: Boolean = false) = {
     val df = spark
       .read
       .format("kafka")
@@ -80,7 +83,13 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
     withOptions.foreach {
       case (key, value) => df.option(key, value)
     }
-    df.load().selectExpr("CAST(value AS STRING)")
+    if (includeHeaders) {
+      df.option("includeHeaders", "true")
+      df.load()
+        .selectExpr("CAST(value AS STRING)", "headers")
+    } else {
+      df.load().selectExpr("CAST(value AS STRING)")
+    }
   }
 
   test("explicit earliest to latest offsets") {
@@ -147,6 +156,214 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
     checkAnswer(df, (0 to 30).map(_.toString).toDF)
   }
 
+  test("default starting and ending offsets with headers") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    testUtils.sendMessage(
+      new RecordBuilder(topic, "1").headers(Seq()).partition(0).build()
+    )
+    testUtils.sendMessage(
+      new RecordBuilder(topic, "2").headers(
+        Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8)))).partition(1).build()
+    )
+    testUtils.sendMessage(
+      new RecordBuilder(topic, "3").headers(
+        Seq(("e", "f".getBytes(UTF_8)), ("e", "g".getBytes(UTF_8)))).partition(2).build()
+    )
+
+    // Implicit offset values, should default to earliest and latest
+    val df = createDF(topic, includeHeaders = true)
+    // Test that we default to "earliest" and "latest"
+    checkAnswer(df, Seq(("1", null),
+      ("2", Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8)))),
+      ("3", Seq(("e", "f".getBytes(UTF_8)), ("e", "g".getBytes(UTF_8))))).toDF)
+  }
+
+  test("timestamp provided for starting and ending") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // timestamp both presented: starting "first" ending "finalized"
+    verifyTimestampRelatedQueryResult({ df =>
+      val startPartitionTimestamps: Map[TopicPartition, Long] = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(1)): _*)
+      val startingTimestamps = JsonUtils.partitionTimestamps(startPartitionTimestamps)
+
+      val endPartitionTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(2)): _*)
+      val endingTimestamps = JsonUtils.partitionTimestamps(endPartitionTimestamps)
+
+      df.option("startingOffsetsByTimestamp", startingTimestamps)
+        .option("endingOffsetsByTimestamp", endingTimestamps)
+    }, topic, 10 to 19)
+  }
+
+  test("timestamp provided for starting, offset provided for ending") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // starting only presented as "first", and ending presented as endingOffsets
+    verifyTimestampRelatedQueryResult({ df =>
+      val startTopicTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps.head): _*)
+      val startingTimestamps = JsonUtils.partitionTimestamps(startTopicTimestamps)
+
+      val endPartitionOffsets = Map(
+        new TopicPartition(topic, 0) -> -1L, // -1 => latest
+        new TopicPartition(topic, 1) -> -1L,
+        new TopicPartition(topic, 2) -> 1L  // explicit offset - take only first one
+      )
+      val endingOffsets = JsonUtils.partitionOffsets(endPartitionOffsets)
+
+      // so we here expect full of records from partition 0 and 1, and only the first record
+      // from partition 2 which is "2"
+
+      df.option("startingOffsetsByTimestamp", startingTimestamps)
+        .option("endingOffsets", endingOffsets)
+    }, topic, (0 to 29).filterNot(_ % 3 == 2) ++ Seq(2))
+  }
+
+  test("timestamp provided for ending, offset provided for starting") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // ending only presented as "third", and starting presented as startingOffsets
+    verifyTimestampRelatedQueryResult({ df =>
+      val startPartitionOffsets = Map(
+        new TopicPartition(topic, 0) -> -2L, // -2 => earliest
+        new TopicPartition(topic, 1) -> -2L,
+        new TopicPartition(topic, 2) -> 0L   // explicit earliest
+      )
+      val startingOffsets = JsonUtils.partitionOffsets(startPartitionOffsets)
+
+      val endTopicTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(2)): _*)
+      val endingTimestamps = JsonUtils.partitionTimestamps(endTopicTimestamps)
+
+      df.option("startingOffsets", startingOffsets)
+        .option("endingOffsetsByTimestamp", endingTimestamps)
+    }, topic, 0 to 19)
+  }
+
+  test("timestamp provided for starting, ending not provided") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // starting only presented as "second", and ending not presented
+    verifyTimestampRelatedQueryResult({ df =>
+      val startTopicTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(1)): _*)
+      val startingTimestamps = JsonUtils.partitionTimestamps(startTopicTimestamps)
+
+      df.option("startingOffsetsByTimestamp", startingTimestamps)
+    }, topic, 10 to 29)
+  }
+
+  test("timestamp provided for ending, starting not provided") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // ending only presented as "third", and starting not presented
+    verifyTimestampRelatedQueryResult({ df =>
+      val endTopicTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(2)): _*)
+      val endingTimestamps = JsonUtils.partitionTimestamps(endTopicTimestamps)
+
+      df.option("endingOffsetsByTimestamp", endingTimestamps)
+    }, topic, 0 to 19)
+  }
+
+  test("no matched offset for timestamp - startingOffsets") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    val e = intercept[SparkException] {
+      verifyTimestampRelatedQueryResult({ df =>
+        // partition 2 will make query fail
+        val startTopicTimestamps = Map(
+          (0 to 1).map(new TopicPartition(topic, _) -> timestamps(1)): _*) ++
+          Map(new TopicPartition(topic, 2) -> Long.MaxValue)
+
+        val startingTimestamps = JsonUtils.partitionTimestamps(startTopicTimestamps)
+
+        df.option("startingOffsetsByTimestamp", startingTimestamps)
+      }, topic, Seq.empty)
+    }
+
+    @tailrec
+    def assertionErrorInExceptionChain(e: Throwable): Boolean = {
+      if (e.isInstanceOf[AssertionError]) {
+        true
+      } else if (e.getCause == null) {
+        false
+      } else {
+        assertionErrorInExceptionChain(e.getCause)
+      }
+    }
+
+    assert(assertionErrorInExceptionChain(e),
+      "Cannot find expected AssertionError in chained exceptions")
+  }
+
+  test("no matched offset for timestamp - endingOffsets") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // the query will run fine, since we allow no matching offset for timestamp
+    // if it's endingOffsets
+    // for partition 0 and 1, it only takes records between first and second timestamp
+    // for partition 2, it will take all records
+    verifyTimestampRelatedQueryResult({ df =>
+      val endTopicTimestamps = Map(
+        (0 to 1).map(new TopicPartition(topic, _) -> timestamps(1)): _*) ++
+        Map(new TopicPartition(topic, 2) -> Long.MaxValue)
+
+      val endingTimestamps = JsonUtils.partitionTimestamps(endTopicTimestamps)
+
+      df.option("endingOffsetsByTimestamp", endingTimestamps)
+    }, topic, (0 to 9) ++ (10 to 29).filter(_ % 3 == 2))
+  }
+
+  private def prepareTimestampRelatedUnitTest: (String, Seq[Long]) = {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+
+    def sendMessages(topic: String, msgs: Array[String], part: Int, ts: Long): Unit = {
+      val records = msgs.map { msg =>
+        new RecordBuilder(topic, msg).partition(part).timestamp(ts).build()
+      }
+      testUtils.sendMessages(records)
+    }
+
+    val firstTimestamp = System.currentTimeMillis() - 5000
+    (0 to 2).foreach { partNum =>
+      sendMessages(topic, (0 to 9).filter(_ % 3 == partNum)
+        .map(_.toString).toArray, partNum, firstTimestamp)
+    }
+
+    val secondTimestamp = firstTimestamp + 1000
+    (0 to 2).foreach { partNum =>
+      sendMessages(topic, (10 to 19).filter(_ % 3 == partNum)
+        .map(_.toString).toArray, partNum, secondTimestamp)
+    }
+
+    val thirdTimestamp = secondTimestamp + 1000
+    (0 to 2).foreach { partNum =>
+      sendMessages(topic, (20 to 29).filter(_ % 3 == partNum)
+        .map(_.toString).toArray, partNum, thirdTimestamp)
+    }
+
+    val finalizedTimestamp = thirdTimestamp + 1000
+
+    (topic, Seq(firstTimestamp, secondTimestamp, thirdTimestamp, finalizedTimestamp))
+  }
+
+  private def verifyTimestampRelatedQueryResult(
+      optionFn: DataFrameReader => DataFrameReader,
+      topic: String,
+      expectation: Seq[Int]): Unit = {
+    val df = spark.read
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("subscribe", topic)
+
+    val df2 = optionFn(df).load().selectExpr("CAST(value AS STRING)")
+    checkAnswer(df2, expectation.map(_.toString).toDF)
+  }
+
   test("reuse same dataframe in query") {
     // This test ensures that we do not cache the Kafka Consumer in KafkaRelation
     val topic = newTopic()
@@ -263,7 +480,8 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
     })
   }
 
-  private def testGroupId(groupIdKey: String, validateGroupId: (String, Iterable[String]) => Unit) {
+  private def testGroupId(groupIdKey: String,
+      validateGroupId: (String, Iterable[String]) => Unit): Unit = {
     // Tests code path KafkaSourceProvider.createRelation(.)
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 3)
@@ -379,6 +597,28 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
       checkAnswer(df, (1 to 15).map(_.toString).toDF)
     }
   }
+
+  test("SPARK-30656: minPartitions") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    testUtils.sendMessages(topic, (0 to 9).map(_.toString).toArray, Some(0))
+    testUtils.sendMessages(topic, (10 to 19).map(_.toString).toArray, Some(1))
+    testUtils.sendMessages(topic, Array("20"), Some(2))
+
+    // Implicit offset values, should default to earliest and latest
+    val df = createDF(topic, Map("minPartitions" -> "6"))
+    val rdd = df.rdd
+    val partitions = rdd.collectPartitions()
+    assert(partitions.length >= 6)
+    assert(partitions.flatMap(_.map(_.getString(0))).toSet === (0 to 20).map(_.toString).toSet)
+
+    // Because of late binding, reused `rdd` and `df` should see the new data.
+    testUtils.sendMessages(topic, (21 to 30).map(_.toString).toArray)
+    assert(rdd.collectPartitions().flatMap(_.map(_.getString(0))).toSet
+      === (0 to 30).map(_.toString).toSet)
+    assert(df.rdd.collectPartitions().flatMap(_.map(_.getString(0))).toSet
+      === (0 to 30).map(_.toString).toSet)
+  }
 }
 
 class KafkaRelationSuiteV1 extends KafkaRelationSuiteBase {
@@ -406,7 +646,7 @@ class KafkaRelationSuiteV2 extends KafkaRelationSuiteBase {
     val topic = newTopic()
     val df = createDF(topic)
     assert(df.logicalPlan.collect {
-      case DataSourceV2Relation(_, _, _) => true
+      case _: DataSourceV2Relation => true
     }.nonEmpty)
   }
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
index 84ad41610cccd..5c8c5b1f3b307 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
@@ -17,22 +17,29 @@
 
 package org.apache.spark.sql.kafka010
 
-import java.util.Locale
+import java.nio.charset.StandardCharsets.UTF_8
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.reflect.ClassTag
+import scala.util.Try
+
 import org.apache.kafka.clients.producer.ProducerConfig
+import org.apache.kafka.clients.producer.internals.DefaultPartitioner
+import org.apache.kafka.common.Cluster
 import org.apache.kafka.common.serialization.ByteArraySerializer
+import org.scalatest.concurrent.TimeLimits.failAfter
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.{SparkConf, SparkException, TestUtils}
+import org.apache.spark.{SparkConf, SparkContext, SparkException, TestUtils}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, SpecificInternalRow, UnsafeProjection}
-import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.{MemoryStream, MemoryStreamBase}
+import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming._
-import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{BinaryType, DataType}
+import org.apache.spark.sql.test.{SharedSparkSession, TestSparkSession}
+import org.apache.spark.sql.types.{BinaryType, DataType, IntegerType, StringType, StructField, StructType}
 
 abstract class KafkaSinkSuiteBase extends QueryTest with SharedSparkSession with KafkaTest {
   protected var testUtils: KafkaTestUtils = _
@@ -59,24 +66,29 @@ abstract class KafkaSinkSuiteBase extends QueryTest with SharedSparkSession with
 
   protected def newTopic(): String = s"topic-${topicId.getAndIncrement()}"
 
-  protected def createKafkaReader(topic: String): DataFrame = {
+  protected def createKafkaReader(topic: String, includeHeaders: Boolean = false): DataFrame = {
     spark.read
       .format("kafka")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("startingOffsets", "earliest")
       .option("endingOffsets", "latest")
       .option("subscribe", topic)
+      .option("includeHeaders", includeHeaders.toString)
       .load()
   }
 }
 
-class KafkaSinkStreamingSuite extends KafkaSinkSuiteBase with StreamTest {
+abstract class KafkaSinkStreamingSuiteBase extends KafkaSinkSuiteBase {
   import testImplicits._
 
-  override val streamingTimeout = 30.seconds
+  protected val streamingTimeout = 30.seconds
+
+  protected def createMemoryStream(): MemoryStreamBase[String]
+  protected def verifyResult(writer: StreamingQuery)(verifyFn: => Unit): Unit
+  protected def defaultTrigger: Option[Trigger]
 
   test("streaming - write to kafka with topic field") {
-    val input = MemoryStream[String]
+    val input = createMemoryStream()
     val topic = newTopic()
     testUtils.createTopic(topic)
 
@@ -92,55 +104,29 @@ class KafkaSinkStreamingSuite extends KafkaSinkSuiteBase with StreamTest {
       .as[(Option[Int], Int)]
       .map(_._2)
 
-    try {
-      input.addData("1", "2", "3", "4", "5")
-      failAfter(streamingTimeout) {
-        writer.processAllAvailable()
-      }
-      checkDatasetUnorderly(reader, 1, 2, 3, 4, 5)
-      input.addData("6", "7", "8", "9", "10")
-      failAfter(streamingTimeout) {
-        writer.processAllAvailable()
-      }
-      checkDatasetUnorderly(reader, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
-    } finally {
-      writer.stop()
-    }
+    runAndVerifyValues(input, writer, reader)
   }
 
-  test("streaming - write aggregation w/o topic field, with topic option") {
-    val input = MemoryStream[String]
+  test("streaming - write w/o topic field, with topic option") {
+    val input = createMemoryStream()
     val topic = newTopic()
     testUtils.createTopic(topic)
 
     val writer = createKafkaWriter(
-      input.toDF().groupBy("value").count(),
+      input.toDF(),
       withTopic = Some(topic),
-      withOutputMode = Some(OutputMode.Update()))(
-      withSelectExpr = "CAST(value as STRING) key", "CAST(count as STRING) value")
+      withOutputMode = Some(OutputMode.Append()))()
 
     val reader = createKafkaReader(topic)
       .selectExpr("CAST(key as STRING) key", "CAST(value as STRING) value")
       .selectExpr("CAST(key as INT) key", "CAST(value as INT) value")
-      .as[(Int, Int)]
+      .as[(Option[Int], Int)]
+      .map(_._2)
 
-    try {
-      input.addData("1", "2", "2", "3", "3", "3")
-      failAfter(streamingTimeout) {
-        writer.processAllAvailable()
-      }
-      checkDatasetUnorderly(reader, (1, 1), (2, 2), (3, 3))
-      input.addData("1", "2", "3")
-      failAfter(streamingTimeout) {
-        writer.processAllAvailable()
-      }
-      checkDatasetUnorderly(reader, (1, 1), (2, 2), (3, 3), (1, 2), (2, 3), (3, 4))
-    } finally {
-      writer.stop()
-    }
+    runAndVerifyValues(input, writer, reader)
   }
 
-  test("streaming - aggregation with topic field and topic option") {
+  test("streaming - topic field and topic option") {
     /* The purpose of this test is to ensure that the topic option
      * overrides the topic field. We begin by writing some data that
      * includes a topic field and value (e.g., 'foo') along with a topic
@@ -148,195 +134,69 @@ class KafkaSinkStreamingSuite extends KafkaSinkSuiteBase with StreamTest {
      * we should see the data i.e., the data was written to the topic
      * option, and not to the topic in the data e.g., foo
      */
-    val input = MemoryStream[String]
+    val input = createMemoryStream()
     val topic = newTopic()
     testUtils.createTopic(topic)
 
     val writer = createKafkaWriter(
-      input.toDF().groupBy("value").count(),
+      input.toDF(),
       withTopic = Some(topic),
-      withOutputMode = Some(OutputMode.Update()))(
-      withSelectExpr = "'foo' as topic",
-        "CAST(value as STRING) key", "CAST(count as STRING) value")
+      withOutputMode = Some(OutputMode.Append()))(
+      withSelectExpr = "'foo' as topic", "value")
 
     val reader = createKafkaReader(topic)
       .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
       .selectExpr("CAST(key AS INT)", "CAST(value AS INT)")
-      .as[(Int, Int)]
-
-    try {
-      input.addData("1", "2", "2", "3", "3", "3")
-      failAfter(streamingTimeout) {
-        writer.processAllAvailable()
-      }
-      checkDatasetUnorderly(reader, (1, 1), (2, 2), (3, 3))
-      input.addData("1", "2", "3")
-      failAfter(streamingTimeout) {
-        writer.processAllAvailable()
-      }
-      checkDatasetUnorderly(reader, (1, 1), (2, 2), (3, 3), (1, 2), (2, 3), (3, 4))
-    } finally {
-      writer.stop()
-    }
-  }
-
-  test("streaming - sink progress is produced") {
-    /* ensure sink progress is correctly produced. */
-    val input = MemoryStream[String]
-    val topic = newTopic()
-    testUtils.createTopic(topic)
-
-    val writer = createKafkaWriter(
-      input.toDF(),
-      withTopic = Some(topic),
-      withOutputMode = Some(OutputMode.Update()))()
+      .as[(Option[Int], Int)]
+      .map(_._2)
 
-    try {
-      input.addData("1", "2", "3")
-      failAfter(streamingTimeout) {
-        writer.processAllAvailable()
-      }
-      assert(writer.lastProgress.sink.numOutputRows == 3L)
-    } finally {
-      writer.stop()
-    }
+    runAndVerifyValues(input, writer, reader)
   }
 
   test("streaming - write data with bad schema") {
-    val input = MemoryStream[String]
+    val input = createMemoryStream()
     val topic = newTopic()
     testUtils.createTopic(topic)
 
-    /* No topic field or topic option */
-    var writer: StreamingQuery = null
-    var ex: Exception = null
-    try {
-      ex = intercept[StreamingQueryException] {
-        writer = createKafkaWriter(input.toDF())(
-          withSelectExpr = "value as key", "value"
-        )
-        input.addData("1", "2", "3", "4", "5")
-        writer.processAllAvailable()
-      }
-    } finally {
-      writer.stop()
-    }
-    assert(ex.getMessage
-      .toLowerCase(Locale.ROOT)
-      .contains("topic option required when no 'topic' attribute is present"))
-
-    try {
-      /* No value field */
-      ex = intercept[StreamingQueryException] {
-        writer = createKafkaWriter(input.toDF())(
-          withSelectExpr = s"'$topic' as topic", "value as key"
-        )
-        input.addData("1", "2", "3", "4", "5")
-        writer.processAllAvailable()
-      }
-    } finally {
-      writer.stop()
-    }
-    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
-      "required attribute 'value' not found"))
+    assertWrongSchema(input, Seq("value as key", "value"),
+      "topic option required when no 'topic' attribute is present")
+    assertWrongSchema(input, Seq(s"'$topic' as topic", "value as key"),
+      "required attribute 'value' not found")
   }
 
   test("streaming - write data with valid schema but wrong types") {
-    val input = MemoryStream[String]
+    val input = createMemoryStream()
     val topic = newTopic()
     testUtils.createTopic(topic)
 
-    var writer: StreamingQuery = null
-    var ex: Exception = null
-    try {
-      /* topic field wrong type */
-      ex = intercept[StreamingQueryException] {
-        writer = createKafkaWriter(input.toDF())(
-          withSelectExpr = s"CAST('1' as INT) as topic", "value"
-        )
-        input.addData("1", "2", "3", "4", "5")
-        writer.processAllAvailable()
-      }
-    } finally {
-      writer.stop()
-    }
-    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains("topic type must be a string"))
-
-    try {
-      /* value field wrong type */
-      ex = intercept[StreamingQueryException] {
-        writer = createKafkaWriter(input.toDF())(
-          withSelectExpr = s"'$topic' as topic", "CAST(value as INT) as value"
-        )
-        input.addData("1", "2", "3", "4", "5")
-        writer.processAllAvailable()
-      }
-    } finally {
-      writer.stop()
-    }
-    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
-      "value attribute type must be a string or binary"))
-
-    try {
-      ex = intercept[StreamingQueryException] {
-        /* key field wrong type */
-        writer = createKafkaWriter(input.toDF())(
-          withSelectExpr = s"'$topic' as topic", "CAST(value as INT) as key", "value"
-        )
-        input.addData("1", "2", "3", "4", "5")
-        writer.processAllAvailable()
-      }
-    } finally {
-      writer.stop()
-    }
-    assert(ex.getMessage.toLowerCase(Locale.ROOT).contains(
-      "key attribute type must be a string or binary"))
+    assertWrongSchema(input, Seq("CAST('1' as INT) as topic", "value"),
+      "topic must be a(n) string")
+    assertWrongSchema(input, Seq(s"'$topic' as topic", "CAST(value as INT) as value"),
+      "value must be a(n) string or binary")
+    assertWrongSchema(input, Seq(s"'$topic' as topic", "CAST(value as INT) as key", "value"),
+      "key must be a(n) string or binary")
+    assertWrongSchema(input, Seq(s"'$topic' as topic", "value", "value as partition"),
+      "partition must be a(n) int")
   }
 
   test("streaming - write to non-existing topic") {
-    val input = MemoryStream[String]
-    val topic = newTopic()
+    val input = createMemoryStream()
 
-    var writer: StreamingQuery = null
-    var ex: Exception = null
-    try {
-      ex = intercept[StreamingQueryException] {
-        writer = createKafkaWriter(input.toDF(), withTopic = Some(topic))()
-        input.addData("1", "2", "3", "4", "5")
-        writer.processAllAvailable()
-      }
-    } finally {
-      writer.stop()
+    runAndVerifyException[StreamingQueryException](input, "job aborted") {
+      createKafkaWriter(input.toDF(), withTopic = Some(newTopic()))()
     }
-    assert(ex.getCause.getCause.getMessage.toLowerCase(Locale.ROOT).contains("job aborted"))
   }
 
   test("streaming - exception on config serializer") {
-    val input = MemoryStream[String]
-    var writer: StreamingQuery = null
-    var ex: Exception = null
-    ex = intercept[StreamingQueryException] {
-      writer = createKafkaWriter(
-        input.toDF(),
-        withOptions = Map("kafka.key.serializer" -> "foo"))()
-      input.addData("1")
-      writer.processAllAvailable()
-    }
-    assert(ex.getCause.getMessage.toLowerCase(Locale.ROOT).contains(
-      "kafka option 'key.serializer' is not supported"))
-
-    ex = intercept[StreamingQueryException] {
-      writer = createKafkaWriter(
-        input.toDF(),
-        withOptions = Map("kafka.value.serializer" -> "foo"))()
-      input.addData("1")
-      writer.processAllAvailable()
-    }
-    assert(ex.getCause.getMessage.toLowerCase(Locale.ROOT).contains(
-      "kafka option 'value.serializer' is not supported"))
+    val input = createMemoryStream()
+
+    assertWrongOption(input, Map("kafka.key.serializer" -> "foo"),
+      "kafka option 'key.serializer' is not supported")
+    assertWrongOption(input, Map("kafka.value.serializer" -> "foo"),
+      "kafka option 'value.serializer' is not supported")
   }
 
-  private def createKafkaWriter(
+  protected def createKafkaWriter(
       input: DataFrame,
       withTopic: Option[String] = None,
       withOutputMode: Option[OutputMode] = None,
@@ -357,9 +217,163 @@ class KafkaSinkStreamingSuite extends KafkaSinkSuiteBase with StreamTest {
       withTopic.foreach(stream.option("topic", _))
       withOutputMode.foreach(stream.outputMode(_))
       withOptions.foreach(opt => stream.option(opt._1, opt._2))
+      defaultTrigger.foreach(stream.trigger(_))
     }
     stream.start()
   }
+
+  private def runAndVerifyValues(
+      input: MemoryStreamBase[String],
+      writer: StreamingQuery,
+      reader: Dataset[Int]): Unit = {
+    try {
+      input.addData("1", "2", "3", "4", "5")
+      verifyResult(writer)(checkDatasetUnorderly(reader, 1, 2, 3, 4, 5))
+      input.addData("6", "7", "8", "9", "10")
+      verifyResult(writer)(checkDatasetUnorderly(reader, 1, 2, 3, 4, 5, 6,
+        7, 8, 9, 10))
+    } finally {
+      writer.stop()
+    }
+  }
+
+  private def runAndVerifyException[T <: Exception : ClassTag](
+      input: MemoryStreamBase[String],
+      expectErrorMsg: String)(
+      writerFn: => StreamingQuery): Unit = {
+    var writer: StreamingQuery = null
+    val ex: Exception = try {
+      intercept[T] {
+        writer = writerFn
+        input.addData("1", "2", "3", "4", "5")
+        input match {
+          case _: MemoryStream[String] => writer.processAllAvailable()
+          case _: ContinuousMemoryStream[String] =>
+            eventually(timeout(streamingTimeout)) {
+              assert(writer.exception.isDefined)
+            }
+
+            throw writer.exception.get
+        }
+      }
+    } finally {
+      if (writer != null) writer.stop()
+    }
+    TestUtils.assertExceptionMsg(ex, expectErrorMsg, ignoreCase = true)
+  }
+
+  private def assertWrongSchema(
+      input: MemoryStreamBase[String],
+      selectExpr: Seq[String],
+      expectErrorMsg: String): Unit = {
+    // just pick common exception of both micro-batch and continuous cases
+    runAndVerifyException[Exception](input, expectErrorMsg) {
+      createKafkaWriter(input.toDF())(
+        withSelectExpr = selectExpr: _*)
+    }
+  }
+
+  private def assertWrongOption(
+      input: MemoryStreamBase[String],
+      options: Map[String, String],
+      expectErrorMsg: String): Unit = {
+    // just pick common exception of both micro-batch and continuous cases
+    runAndVerifyException[Exception](input, expectErrorMsg) {
+      createKafkaWriter(input.toDF(), withOptions = options)()
+    }
+  }
+}
+
+class KafkaSinkMicroBatchStreamingSuite extends KafkaSinkStreamingSuiteBase {
+  import testImplicits._
+
+  override val streamingTimeout = 30.seconds
+
+  override protected def createMemoryStream(): MemoryStreamBase[String] = MemoryStream[String]
+
+  override protected def verifyResult(writer: StreamingQuery)(verifyFn: => Unit): Unit = {
+    failAfter(streamingTimeout) {
+      writer.processAllAvailable()
+    }
+    verifyFn
+  }
+
+  override protected def defaultTrigger: Option[Trigger] = None
+
+  test("streaming - sink progress is produced") {
+    /* ensure sink progress is correctly produced. */
+    val input = MemoryStream[String]
+    val topic = newTopic()
+    testUtils.createTopic(topic)
+
+    val writer = createKafkaWriter(
+      input.toDF(),
+      withTopic = Some(topic),
+      withOutputMode = Some(OutputMode.Update()))()
+
+    try {
+      input.addData("1", "2", "3")
+      verifyResult(writer) {
+        assert(writer.lastProgress.sink.numOutputRows == 3L)
+      }
+    } finally {
+      writer.stop()
+    }
+  }
+}
+
+class KafkaContinuousSinkSuite extends KafkaSinkStreamingSuiteBase {
+  import testImplicits._
+
+  // We need more than the default local[2] to be able to schedule all partitions simultaneously.
+  override protected def createSparkSession = new TestSparkSession(
+    new SparkContext(
+      "local[10]",
+      "continuous-stream-test-sql-context",
+      sparkConf.set("spark.sql.testkey", "true")))
+
+  override protected def createMemoryStream(): MemoryStreamBase[String] = {
+    ContinuousMemoryStream.singlePartition[String]
+  }
+
+  override protected def verifyResult(writer: StreamingQuery)(verifyFn: => Unit): Unit = {
+    eventually(timeout(streamingTimeout), interval(5.seconds)) {
+      verifyFn
+    }
+  }
+
+  override protected def defaultTrigger: Option[Trigger] = Some(Trigger.Continuous(1000))
+
+  test("generic - write big data with small producer buffer") {
+    /* This test ensures that we understand the semantics of Kafka when
+    * is comes to blocking on a call to send when the send buffer is full.
+    * This test will configure the smallest possible producer buffer and
+    * indicate that we should block when it is full. Thus, no exception should
+    * be thrown in the case of a full buffer.
+    */
+    val topic = newTopic()
+    testUtils.createTopic(topic, 1)
+    val options = new java.util.HashMap[String, Object]
+    options.put("bootstrap.servers", testUtils.brokerAddress)
+    options.put("buffer.memory", "16384") // min buffer size
+    options.put("block.on.buffer.full", "true")
+    options.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer].getName)
+    options.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[ByteArraySerializer].getName)
+    val inputSchema = Seq(AttributeReference("value", BinaryType)())
+    val data = new Array[Byte](15000) // large value
+    val writeTask = new KafkaDataWriter(Some(topic), options, inputSchema)
+    try {
+      val fieldTypes: Array[DataType] = Array(BinaryType)
+      val converter = UnsafeProjection.create(fieldTypes)
+      val row = new SpecificInternalRow(fieldTypes)
+      row.update(0, data)
+      val iter = Seq.fill(1000)(converter.apply(row)).iterator
+      iter.foreach(writeTask.write(_))
+      writeTask.commit()
+    } finally {
+      writeTask.close()
+    }
+  }
 }
 
 abstract class KafkaSinkBatchSuiteBase extends KafkaSinkSuiteBase {
@@ -368,15 +382,111 @@ abstract class KafkaSinkBatchSuiteBase extends KafkaSinkSuiteBase {
   test("batch - write to kafka") {
     val topic = newTopic()
     testUtils.createTopic(topic)
-    val df = Seq("1", "2", "3", "4", "5").map(v => (topic, v)).toDF("topic", "value")
+    val data = Seq(
+      Row(topic, "1", Seq(
+        Row("a", "b".getBytes(UTF_8))
+      )),
+      Row(topic, "2", Seq(
+        Row("c", "d".getBytes(UTF_8)),
+        Row("e", "f".getBytes(UTF_8))
+      )),
+      Row(topic, "3", Seq(
+        Row("g", "h".getBytes(UTF_8)),
+        Row("g", "i".getBytes(UTF_8))
+      )),
+      Row(topic, "4", null),
+      Row(topic, "5", Seq(
+        Row("j", "k".getBytes(UTF_8)),
+        Row("j", "l".getBytes(UTF_8)),
+        Row("m", "n".getBytes(UTF_8))
+      ))
+    )
+
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(data),
+      StructType(Seq(StructField("topic", StringType), StructField("value", StringType),
+        StructField("headers", KafkaRecordToRowConverter.headersType)))
+    )
+
     df.write
       .format("kafka")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("topic", topic)
+      .mode("append")
       .save()
     checkAnswer(
-      createKafkaReader(topic).selectExpr("CAST(value as STRING) value"),
-      Row("1") :: Row("2") :: Row("3") :: Row("4") :: Row("5") :: Nil)
+      createKafkaReader(topic, includeHeaders = true).selectExpr(
+        "CAST(value as STRING) value", "headers"
+      ),
+      Row("1", Seq(Row("a", "b".getBytes(UTF_8)))) ::
+        Row("2", Seq(Row("c", "d".getBytes(UTF_8)), Row("e", "f".getBytes(UTF_8)))) ::
+        Row("3", Seq(Row("g", "h".getBytes(UTF_8)), Row("g", "i".getBytes(UTF_8)))) ::
+        Row("4", null) ::
+        Row("5", Seq(
+          Row("j", "k".getBytes(UTF_8)),
+          Row("j", "l".getBytes(UTF_8)),
+          Row("m", "n".getBytes(UTF_8)))) ::
+        Nil
+    )
+  }
+
+  def writeToKafka(df: DataFrame, topic: String, options: Map[String, String] = Map.empty): Unit = {
+    df
+      .write
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("topic", topic)
+      .options(options)
+      .mode("append")
+      .save()
+  }
+
+  def partitionsInTopic(topic: String): Set[Int] = {
+    createKafkaReader(topic)
+      .select("partition")
+      .map(_.getInt(0))
+      .collect()
+      .toSet
+  }
+
+  test("batch - partition column and partitioner priorities") {
+    val nrPartitions = 4
+    val topic1 = newTopic()
+    val topic2 = newTopic()
+    val topic3 = newTopic()
+    val topic4 = newTopic()
+    testUtils.createTopic(topic1, nrPartitions)
+    testUtils.createTopic(topic2, nrPartitions)
+    testUtils.createTopic(topic3, nrPartitions)
+    testUtils.createTopic(topic4, nrPartitions)
+    val customKafkaPartitionerConf = Map(
+      "kafka.partitioner.class" -> "org.apache.spark.sql.kafka010.TestKafkaPartitioner"
+    )
+
+    val df = (0 until 5).map(n => (topic1, s"$n", s"$n")).toDF("topic", "key", "value")
+
+    // default kafka partitioner
+    writeToKafka(df, topic1)
+    val partitionsInTopic1 = partitionsInTopic(topic1)
+    assert(partitionsInTopic1.size > 1)
+
+    // custom partitioner (always returns 0) overrides default partitioner
+    writeToKafka(df, topic2, customKafkaPartitionerConf)
+    val partitionsInTopic2 = partitionsInTopic(topic2)
+    assert(partitionsInTopic2.size == 1)
+    assert(partitionsInTopic2.head == 0)
+
+    // partition column overrides custom partitioner
+    val dfWithCustomPartition = df.withColumn("partition", lit(2))
+    writeToKafka(dfWithCustomPartition, topic3, customKafkaPartitionerConf)
+    val partitionsInTopic3 = partitionsInTopic(topic3)
+    assert(partitionsInTopic3.size == 1)
+    assert(partitionsInTopic3.head == 2)
+
+    // when the partition column value is null, it is ignored
+    val dfWithNullPartitions = df.withColumn("partition", lit(null).cast(IntegerType))
+    writeToKafka(dfWithNullPartitions, topic4)
+    assert(partitionsInTopic(topic4) == partitionsInTopic1)
   }
 
   test("batch - null topic field value, and no topic option") {
@@ -385,12 +495,13 @@ abstract class KafkaSinkBatchSuiteBase extends KafkaSinkSuiteBase {
       df.write
         .format("kafka")
         .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+        .mode("append")
         .save()
     }
     TestUtils.assertExceptionMsg(ex, "null topic present in the data")
   }
 
-  protected def testUnsupportedSaveModes(msg: (SaveMode) => String) {
+  protected def testUnsupportedSaveModes(msg: (SaveMode) => Seq[String]): Unit = {
     val topic = newTopic()
     testUtils.createTopic(topic)
     val df = Seq[(String, String)](null.asInstanceOf[String] -> "1").toDF("topic", "value")
@@ -403,7 +514,10 @@ abstract class KafkaSinkBatchSuiteBase extends KafkaSinkSuiteBase {
           .mode(mode)
           .save()
       }
-      TestUtils.assertExceptionMsg(ex, msg(mode))
+      val errorChecks = msg(mode).map(m => Try(TestUtils.assertExceptionMsg(ex, m)))
+      if (!errorChecks.exists(_.isSuccess)) {
+        fail("Error messages not found in exception trace")
+      }
     }
   }
 
@@ -419,6 +533,7 @@ abstract class KafkaSinkBatchSuiteBase extends KafkaSinkSuiteBase {
       .format("kafka")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("topic", topic)
+      .mode("append")
       .save()
   }
 }
@@ -430,7 +545,7 @@ class KafkaSinkBatchSuiteV1 extends KafkaSinkBatchSuiteBase {
       .set(SQLConf.USE_V1_SOURCE_LIST, "kafka")
 
   test("batch - unsupported save modes") {
-    testUnsupportedSaveModes((mode) => s"Save mode ${mode.name} not allowed for Kafka")
+    testUnsupportedSaveModes((mode) => s"Save mode ${mode.name} not allowed for Kafka" :: Nil)
   }
 }
 
@@ -441,7 +556,8 @@ class KafkaSinkBatchSuiteV2 extends KafkaSinkBatchSuiteBase {
       .set(SQLConf.USE_V1_SOURCE_LIST, "")
 
   test("batch - unsupported save modes") {
-    testUnsupportedSaveModes((mode) => s"cannot be written with ${mode.name} mode")
+    testUnsupportedSaveModes((mode) =>
+      Seq(s"cannot be written with ${mode.name} mode", "does not support truncate"))
   }
 
   test("generic - write big data with small producer buffer") {
@@ -474,3 +590,13 @@ class KafkaSinkBatchSuiteV2 extends KafkaSinkBatchSuiteBase {
     }
   }
 }
+
+class TestKafkaPartitioner extends DefaultPartitioner {
+  override def partition(
+      topic: String,
+      key: Any,
+      keyBytes: Array[Byte],
+      value: Any,
+      valueBytes: Array[Byte],
+      cluster: Cluster): Int = 0
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala
index 8e6de88865e06..f7b00b31ebba0 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
 import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
-import org.apache.spark.sql.sources.v2.reader.Scan
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class KafkaSourceProviderSuite extends SparkFunSuite {
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTest.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTest.scala
index 19acda95c707c..087d938f8ed8e 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTest.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTest.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.kafka010
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.kafka010.producer.InternalKafkaProducerPool
 
 /** A trait to clean cached Kafka producers in `afterAll` */
 trait KafkaTest extends BeforeAndAfterAll {
@@ -27,6 +28,6 @@ trait KafkaTest extends BeforeAndAfterAll {
 
   override def afterAll(): Unit = {
     super.afterAll()
-    CachedKafkaProducer.clear()
+    InternalKafkaProducerPool.reset()
   }
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index d7cb30f530396..7b972fede96e5 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -18,21 +18,21 @@
 package org.apache.spark.sql.kafka010
 
 import java.io.{File, IOException}
-import java.lang.{Integer => JInt}
-import java.net.InetSocketAddress
+import java.net.{InetAddress, InetSocketAddress}
 import java.nio.charset.StandardCharsets
-import java.util.{Collections, Map => JMap, Properties, UUID}
+import java.util.{Collections, Properties, UUID}
 import java.util.concurrent.TimeUnit
 import javax.security.auth.login.Configuration
 
 import scala.collection.JavaConverters._
+import scala.io.Source
 import scala.util.Random
 
 import com.google.common.io.Files
 import kafka.api.Request
-import kafka.server.{KafkaConfig, KafkaServer}
+import kafka.server.{HostedPartition, KafkaConfig, KafkaServer}
 import kafka.server.checkpoints.OffsetCheckpointFile
-import kafka.utils.ZkUtils
+import kafka.zk.KafkaZkClient
 import org.apache.hadoop.minikdc.MiniKdc
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.clients.CommonClientConfigs
@@ -44,8 +44,10 @@ import org.apache.kafka.common.config.SaslConfigs
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT}
 import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
+import org.apache.kafka.common.utils.SystemTime
 import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
 import org.apache.zookeeper.server.auth.SASLAuthenticationProvider
+import org.scalatest.Assertions._
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 
@@ -65,25 +67,30 @@ class KafkaTestUtils(
     secure: Boolean = false) extends Logging {
 
   private val JAVA_AUTH_CONFIG = "java.security.auth.login.config"
+  private val IBM_KRB_DEBUG_CONFIG = "com.ibm.security.krb5.Krb5Debug"
+  private val SUN_KRB_DEBUG_CONFIG = "sun.security.krb5.debug"
+
+  private val localCanonicalHostName = InetAddress.getLoopbackAddress().getCanonicalHostName()
+  logInfo(s"Local host name is $localCanonicalHostName")
 
   private var kdc: MiniKdc = _
 
   // Zookeeper related configurations
-  private val zkHost = "localhost"
+  private val zkHost = localCanonicalHostName
   private var zkPort: Int = 0
   private val zkConnectionTimeout = 60000
   private val zkSessionTimeout = 10000
 
   private var zookeeper: EmbeddedZookeeper = _
-  private var zkUtils: ZkUtils = _
+  private var zkClient: KafkaZkClient = _
 
   // Kafka broker related configurations
-  private val brokerHost = "localhost"
+  private val brokerHost = localCanonicalHostName
   private var brokerPort = 0
   private var brokerConf: KafkaConfig = _
 
   private val brokerServiceName = "kafka"
-  private val clientUser = "client/localhost"
+  private val clientUser = s"client/$localCanonicalHostName"
   private var clientKeytabFile: File = _
 
   // Kafka broker server
@@ -109,9 +116,9 @@ class KafkaTestUtils(
     s"$brokerHost:$brokerPort"
   }
 
-  def zookeeperClient: ZkUtils = {
+  def zookeeperClient: KafkaZkClient = {
     assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper client")
-    Option(zkUtils).getOrElse(
+    Option(zkClient).getOrElse(
       throw new IllegalStateException("Zookeeper client is not yet initialized"))
   }
 
@@ -128,26 +135,62 @@ class KafkaTestUtils(
   private def setUpMiniKdc(): Unit = {
     val kdcDir = Utils.createTempDir()
     val kdcConf = MiniKdc.createConf()
+    kdcConf.setProperty(MiniKdc.DEBUG, "true")
     kdc = new MiniKdc(kdcConf, kdcDir)
     kdc.start()
+    // TODO https://issues.apache.org/jira/browse/SPARK-30037
+    // Need to build spark's own MiniKDC and customize krb5.conf like Kafka
+    rewriteKrb5Conf()
     kdcReady = true
   }
 
+  /**
+   * In this method we rewrite krb5.conf to make kdc and client use the same enctypes
+   */
+  private def rewriteKrb5Conf(): Unit = {
+    val krb5Conf = Source.fromFile(kdc.getKrb5conf, "UTF-8").getLines()
+    var rewritten = false
+    val addedConfig =
+      addedKrb5Config("default_tkt_enctypes", "aes128-cts-hmac-sha1-96") +
+        addedKrb5Config("default_tgs_enctypes", "aes128-cts-hmac-sha1-96")
+    val rewriteKrb5Conf = krb5Conf.map(s =>
+      if (s.contains("libdefaults")) {
+        rewritten = true
+        s + addedConfig
+      } else {
+        s
+      }).filter(!_.trim.startsWith("#")).mkString(System.lineSeparator())
+
+    val krb5confStr = if (!rewritten) {
+      "[libdefaults]" + addedConfig + System.lineSeparator() +
+        System.lineSeparator() + rewriteKrb5Conf
+    } else {
+      rewriteKrb5Conf
+    }
+
+    kdc.getKrb5conf.delete()
+    Files.write(krb5confStr, kdc.getKrb5conf, StandardCharsets.UTF_8)
+  }
+
+  private def addedKrb5Config(key: String, value: String): String = {
+    System.lineSeparator() + s"    $key=$value"
+  }
+
   private def createKeytabsAndJaasConfigFile(): String = {
     assert(kdcReady, "KDC should be set up beforehand")
     val baseDir = Utils.createTempDir()
 
-    val zkServerUser = "zookeeper/localhost"
+    val zkServerUser = s"zookeeper/$localCanonicalHostName"
     val zkServerKeytabFile = new File(baseDir, "zookeeper.keytab")
     kdc.createPrincipal(zkServerKeytabFile, zkServerUser)
     logDebug(s"Created keytab file: ${zkServerKeytabFile.getAbsolutePath()}")
 
-    val zkClientUser = "zkclient/localhost"
+    val zkClientUser = s"zkclient/$localCanonicalHostName"
     val zkClientKeytabFile = new File(baseDir, "zkclient.keytab")
     kdc.createPrincipal(zkClientKeytabFile, zkClientUser)
     logDebug(s"Created keytab file: ${zkClientKeytabFile.getAbsolutePath()}")
 
-    val kafkaServerUser = "kafka/localhost"
+    val kafkaServerUser = s"kafka/$localCanonicalHostName"
     val kafkaServerKeytabFile = new File(baseDir, "kafka.keytab")
     kdc.createPrincipal(kafkaServerKeytabFile, kafkaServerUser)
     logDebug(s"Created keytab file: ${kafkaServerKeytabFile.getAbsolutePath()}")
@@ -165,6 +208,7 @@ class KafkaTestUtils(
       |  useKeyTab=true
       |  storeKey=true
       |  useTicketCache=false
+      |  refreshKrb5Config=true
       |  keyTab="${zkServerKeytabFile.getAbsolutePath()}"
       |  principal="$zkServerUser@$realm";
       |};
@@ -174,6 +218,7 @@ class KafkaTestUtils(
       |  useKeyTab=true
       |  storeKey=true
       |  useTicketCache=false
+      |  refreshKrb5Config=true
       |  keyTab="${zkClientKeytabFile.getAbsolutePath()}"
       |  principal="$zkClientUser@$realm";
       |};
@@ -199,7 +244,8 @@ class KafkaTestUtils(
     zookeeper = new EmbeddedZookeeper(s"$zkHost:$zkPort")
     // Get the actual zookeeper binding port
     zkPort = zookeeper.actualPort
-    zkUtils = ZkUtils(s"$zkHost:$zkPort", zkSessionTimeout, zkConnectionTimeout, false)
+    zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout,
+      zkConnectionTimeout, 1, new SystemTime())
     zkReady = true
   }
 
@@ -233,6 +279,7 @@ class KafkaTestUtils(
     }
 
     if (secure) {
+      setupKrbDebug()
       setUpMiniKdc()
       val jaasConfigFile = createKeytabsAndJaasConfigFile()
       System.setProperty(JAVA_AUTH_CONFIG, jaasConfigFile)
@@ -243,7 +290,15 @@ class KafkaTestUtils(
     setupEmbeddedZookeeper()
     setupEmbeddedKafkaServer()
     eventually(timeout(1.minute)) {
-      assert(zkUtils.getAllBrokersInCluster().nonEmpty, "Broker was not up in 60 seconds")
+      assert(zkClient.getAllBrokersInCluster.nonEmpty, "Broker was not up in 60 seconds")
+    }
+  }
+
+  private def setupKrbDebug(): Unit = {
+    if (System.getProperty("java.vendor").contains("IBM")) {
+      System.setProperty(IBM_KRB_DEBUG_CONFIG, "all")
+    } else {
+      System.setProperty(SUN_KRB_DEBUG_CONFIG, "true")
     }
   }
 
@@ -282,9 +337,9 @@ class KafkaTestUtils(
       }
     }
 
-    if (zkUtils != null) {
-      zkUtils.close()
-      zkUtils = null
+    if (zkClient != null) {
+      zkClient.close()
+      zkClient = null
     }
 
     if (zookeeper != null) {
@@ -298,6 +353,15 @@ class KafkaTestUtils(
       kdc.stop()
     }
     UserGroupInformation.reset()
+    teardownKrbDebug()
+  }
+
+  private def teardownKrbDebug(): Unit = {
+    if (System.getProperty("java.vendor").contains("IBM")) {
+      System.clearProperty(IBM_KRB_DEBUG_CONFIG)
+    } else {
+      System.clearProperty(SUN_KRB_DEBUG_CONFIG)
+    }
   }
 
   /** Create a Kafka topic and wait until it is propagated to the whole cluster */
@@ -305,7 +369,7 @@ class KafkaTestUtils(
     var created = false
     while (!created) {
       try {
-        val newTopic = new NewTopic(topic, partitions, 1)
+        val newTopic = new NewTopic(topic, partitions, 1.shortValue())
         adminClient.createTopics(Collections.singleton(newTopic))
         created = true
       } catch {
@@ -322,7 +386,7 @@ class KafkaTestUtils(
   }
 
   def getAllTopicsAndPartitionSize(): Seq[(String, Int)] = {
-    zkUtils.getPartitionsForTopics(zkUtils.getAllTopics()).mapValues(_.size).toSeq
+    zkClient.getPartitionsForTopics(zkClient.getAllTopicsInCluster).mapValues(_.size).toSeq
   }
 
   /** Create a Kafka topic and wait until it is propagated to the whole cluster */
@@ -332,9 +396,9 @@ class KafkaTestUtils(
 
   /** Delete a Kafka topic and wait until it is propagated to the whole cluster */
   def deleteTopic(topic: String): Unit = {
-    val partitions = zkUtils.getPartitionsForTopics(Seq(topic))(topic).size
+    val partitions = zkClient.getPartitionsForTopics(Set(topic))(topic).size
     adminClient.deleteTopics(Collections.singleton(topic))
-    verifyTopicDeletionWithRetries(zkUtils, topic, partitions, List(this.server))
+    verifyTopicDeletionWithRetries(topic, partitions, List(this.server))
   }
 
   /** Add new partitions to a Kafka topic */
@@ -348,38 +412,33 @@ class KafkaTestUtils(
     }
   }
 
-  /** Java-friendly function for sending messages to the Kafka broker */
-  def sendMessages(topic: String, messageToFreq: JMap[String, JInt]): Unit = {
-    sendMessages(topic, Map(messageToFreq.asScala.mapValues(_.intValue()).toSeq: _*))
+  def sendMessages(topic: String, msgs: Array[String]): Seq[(String, RecordMetadata)] = {
+    sendMessages(topic, msgs, None)
   }
 
-  /** Send the messages to the Kafka broker */
-  def sendMessages(topic: String, messageToFreq: Map[String, Int]): Unit = {
-    val messages = messageToFreq.flatMap { case (s, freq) => Seq.fill(freq)(s) }.toArray
-    sendMessages(topic, messages)
+  def sendMessages(
+      topic: String,
+      msgs: Array[String],
+      part: Option[Int]): Seq[(String, RecordMetadata)] = {
+    val records = msgs.map { msg =>
+      val builder = new RecordBuilder(topic, msg)
+      part.foreach { p => builder.partition(p) }
+      builder.build()
+    }
+    sendMessages(records)
   }
 
-  /** Send the array of messages to the Kafka broker */
-  def sendMessages(topic: String, messages: Array[String]): Seq[(String, RecordMetadata)] = {
-    sendMessages(topic, messages, None)
+  def sendMessage(msg: ProducerRecord[String, String]): Seq[(String, RecordMetadata)] = {
+    sendMessages(Array(msg))
   }
 
-  /** Send the array of messages to the Kafka broker using specified partition */
-  def sendMessages(
-      topic: String,
-      messages: Array[String],
-      partition: Option[Int]): Seq[(String, RecordMetadata)] = {
+  def sendMessages(msgs: Seq[ProducerRecord[String, String]]): Seq[(String, RecordMetadata)] = {
     producer = new KafkaProducer[String, String](producerConfiguration)
     val offsets = try {
-      messages.map { m =>
-        val record = partition match {
-          case Some(p) => new ProducerRecord[String, String](topic, p, null, m)
-          case None => new ProducerRecord[String, String](topic, m)
-        }
-        val metadata =
-          producer.send(record).get(10, TimeUnit.SECONDS)
-          logInfo(s"\tSent $m to partition ${metadata.partition}, offset ${metadata.offset}")
-        (m, metadata)
+      msgs.map { msg =>
+        val metadata = producer.send(msg).get(10, TimeUnit.SECONDS)
+        logInfo(s"\tSent ($msg) to partition ${metadata.partition}, offset ${metadata.offset}")
+        (msg.value(), metadata)
       }
     } finally {
       if (producer != null) {
@@ -518,15 +577,12 @@ class KafkaTestUtils(
       servers: Seq[KafkaServer]): Unit = {
     val topicAndPartitions = (0 until numPartitions).map(new TopicPartition(topic, _))
 
-    import ZkUtils._
     // wait until admin path for delete topic is deleted, signaling completion of topic deletion
-    assert(
-      !zkUtils.pathExists(getDeleteTopicPath(topic)),
-      s"${getDeleteTopicPath(topic)} still exists")
-    assert(!zkUtils.pathExists(getTopicPath(topic)), s"${getTopicPath(topic)} still exists")
+    assert(!zkClient.isTopicMarkedForDeletion(topic), "topic is still marked for deletion")
+    assert(!zkClient.topicExists(topic), "topic still exists")
     // ensure that the topic-partition has been deleted from all brokers' replica managers
     assert(servers.forall(server => topicAndPartitions.forall(tp =>
-      server.replicaManager.getPartition(tp) == None)),
+      server.replicaManager.getPartition(tp) == HostedPartition.None)),
       s"topic $topic still exists in the replica manager")
     // ensure that logs from all replicas are deleted if delete topic is marked successful
     assert(servers.forall(server => topicAndPartitions.forall(tp =>
@@ -541,16 +597,15 @@ class KafkaTestUtils(
     }), s"checkpoint for topic $topic still exists")
     // ensure the topic is gone
     assert(
-      !zkUtils.getAllTopics().contains(topic),
+      !zkClient.getAllTopicsInCluster.contains(topic),
       s"topic $topic still exists on zookeeper")
   }
 
   /** Verify topic is deleted. Retry to delete the topic if not. */
   private def verifyTopicDeletionWithRetries(
-      zkUtils: ZkUtils,
       topic: String,
       numPartitions: Int,
-      servers: Seq[KafkaServer]) {
+      servers: Seq[KafkaServer]): Unit = {
     eventually(timeout(1.minute), interval(200.milliseconds)) {
       try {
         verifyTopicDeletion(topic, numPartitions, servers)
@@ -569,9 +624,9 @@ class KafkaTestUtils(
     def isPropagated = server.dataPlaneRequestProcessor.metadataCache
         .getPartitionInfo(topic, partition) match {
       case Some(partitionState) =>
-        zkUtils.getLeaderForPartition(topic, partition).isDefined &&
-          Request.isValidBrokerId(partitionState.basePartitionState.leader) &&
-          !partitionState.basePartitionState.replicas.isEmpty
+        zkClient.getLeaderForPartition(new TopicPartition(topic, partition)).isDefined &&
+          Request.isValidBrokerId(partitionState.leader) &&
+          !partitionState.replicas.isEmpty
 
       case _ =>
         false
@@ -613,7 +668,7 @@ class KafkaTestUtils(
 
     val actualPort = factory.getLocalPort
 
-    def shutdown() {
+    def shutdown(): Unit = {
       factory.shutdown()
       // The directories are not closed even if the ZooKeeper server is shut down.
       // Please see ZOOKEEPER-1844, which is fixed in 3.4.6+. It leads to test failures
@@ -634,4 +689,3 @@ class KafkaTestUtils(
     }
   }
 }
-
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala
new file mode 100644
index 0000000000000..ef07798442e56
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.lang.{Integer => JInt, Long => JLong}
+
+import scala.collection.JavaConverters._
+
+import org.apache.kafka.clients.producer.ProducerRecord
+import org.apache.kafka.common.header.Header
+import org.apache.kafka.common.header.internals.RecordHeader
+
+class RecordBuilder(topic: String, value: String) {
+  var _partition: Option[JInt] = None
+  var _timestamp: Option[JLong] = None
+  var _key: Option[String] = None
+  var _headers: Option[Seq[(String, Array[Byte])]] = None
+
+  def partition(part: JInt): RecordBuilder = {
+    _partition = Some(part)
+    this
+  }
+
+  def partition(part: Int): RecordBuilder = {
+    _partition = Some(part.intValue())
+    this
+  }
+
+  def timestamp(ts: JLong): RecordBuilder = {
+    _timestamp = Some(ts)
+    this
+  }
+
+  def timestamp(ts: Long): RecordBuilder = {
+    _timestamp = Some(ts.longValue())
+    this
+  }
+
+  def key(k: String): RecordBuilder = {
+    _key = Some(k)
+    this
+  }
+
+  def headers(hdrs: Seq[(String, Array[Byte])]): RecordBuilder = {
+    _headers = Some(hdrs)
+    this
+  }
+
+  def build(): ProducerRecord[String, String] = {
+    val part = _partition.orNull
+    val ts = _timestamp.orNull
+    val k = _key.orNull
+    val hdrs = _headers.map { h =>
+      h.map { case (k, v) => new RecordHeader(k, v).asInstanceOf[Header] }
+    }.map(_.asJava).orNull
+
+    new ProducerRecord[String, String](topic, part, ts, k, value, hdrs)
+  }
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/FetchedDataPoolSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPoolSuite.scala
similarity index 97%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/FetchedDataPoolSuite.scala
rename to external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPoolSuite.scala
index cbe54614ef127..23bab5cd48083 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/FetchedDataPoolSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPoolSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.kafka010
+package org.apache.spark.sql.kafka010.consumer
 
 import java.{util => ju}
 import java.util.concurrent.TimeUnit
@@ -29,7 +29,8 @@ import org.jmock.lib.concurrent.DeterministicScheduler
 import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.kafka010.KafkaDataConsumer.CacheKey
+import org.apache.spark.sql.kafka010.{FETCHED_DATA_CACHE_EVICTOR_THREAD_RUN_INTERVAL, FETCHED_DATA_CACHE_TIMEOUT}
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer.CacheKey
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.ManualClock
 
@@ -41,7 +42,7 @@ class FetchedDataPoolSuite extends SharedSparkSession with PrivateMethodTester {
 
   // Helper private method accessors for FetchedDataPool
   private type PoolCacheType = mutable.Map[CacheKey, CachedFetchedDataList]
-  private val _cache = PrivateMethod[PoolCacheType]('cache)
+  private val _cache = PrivateMethod[PoolCacheType](Symbol("cache"))
 
   def getCache(pool: FetchedDataPool): PoolCacheType = {
     pool.invokePrivate(_cache())
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/InternalKafkaConsumerPoolSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPoolSuite.scala
similarity index 97%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/InternalKafkaConsumerPoolSuite.scala
rename to external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPoolSuite.scala
index 78d7feef58519..3797d5b5bd6a3 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/InternalKafkaConsumerPoolSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPoolSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.kafka010
+package org.apache.spark.sql.kafka010.consumer
 
 import java.{util => ju}
 
@@ -26,7 +26,8 @@ import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.kafka010.KafkaDataConsumer.CacheKey
+import org.apache.spark.sql.kafka010.{CONSUMER_CACHE_CAPACITY, CONSUMER_CACHE_EVICTOR_THREAD_RUN_INTERVAL, CONSUMER_CACHE_TIMEOUT}
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer.CacheKey
 import org.apache.spark.sql.test.SharedSparkSession
 
 class InternalKafkaConsumerPoolSuite extends SharedSparkSession {
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumerSuite.scala
similarity index 71%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala
rename to external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumerSuite.scala
index 80f9a1b410d2c..c607c4fc81b71 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumerSuite.scala
@@ -15,8 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.kafka010
+package org.apache.spark.sql.kafka010.consumer
 
+import java.{util => ju}
+import java.nio.charset.StandardCharsets
 import java.util.concurrent.{Executors, TimeUnit}
 
 import scala.collection.JavaConverters._
@@ -29,10 +31,15 @@ import org.apache.kafka.common.serialization.ByteArrayDeserializer
 import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.{TaskContext, TaskContextImpl}
-import org.apache.spark.sql.kafka010.KafkaDataConsumer.CacheKey
+import org.apache.spark.kafka010.KafkaDelegationTokenTest
+import org.apache.spark.sql.kafka010.{KafkaTestUtils, RecordBuilder}
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer.CacheKey
 import org.apache.spark.sql.test.SharedSparkSession
 
-class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester {
+class KafkaDataConsumerSuite
+  extends SharedSparkSession
+  with PrivateMethodTester
+  with KafkaDelegationTokenTest {
 
   protected var testUtils: KafkaTestUtils = _
   private val topic = "topic" + Random.nextInt()
@@ -65,13 +72,16 @@ class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester
   private var consumerPool: InternalKafkaConsumerPool = _
 
   override def beforeEach(): Unit = {
+    super.beforeEach()
+
     fetchedDataPool = {
-      val fetchedDataPoolMethod = PrivateMethod[FetchedDataPool]('fetchedDataPool)
+      val fetchedDataPoolMethod = PrivateMethod[FetchedDataPool](Symbol("fetchedDataPool"))
       KafkaDataConsumer.invokePrivate(fetchedDataPoolMethod())
     }
 
     consumerPool = {
-      val internalKafkaConsumerPoolMethod = PrivateMethod[InternalKafkaConsumerPool]('consumerPool)
+      val internalKafkaConsumerPoolMethod =
+        PrivateMethod[InternalKafkaConsumerPool](Symbol("consumerPool"))
       KafkaDataConsumer.invokePrivate(internalKafkaConsumerPoolMethod())
     }
 
@@ -81,7 +91,7 @@ class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester
 
   test("SPARK-19886: Report error cause correctly in reportDataLoss") {
     val cause = new Exception("D'oh!")
-    val reportDataLoss = PrivateMethod[Unit]('reportDataLoss0)
+    val reportDataLoss = PrivateMethod[Unit](Symbol("reportDataLoss0"))
     val e = intercept[IllegalStateException] {
       KafkaDataConsumer.invokePrivate(reportDataLoss(true, "message", cause))
     }
@@ -91,53 +101,93 @@ class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester
   test("new KafkaDataConsumer instance in case of Task retry") {
     try {
       val kafkaParams = getKafkaParams()
-      val key = new CacheKey(groupId, topicPartition)
+      val key = CacheKey(groupId, topicPartition)
 
       val context1 = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
       TaskContext.setTaskContext(context1)
-      val consumer1 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-
-      // any method call which requires consumer is necessary
-      consumer1.getAvailableOffsetRange()
-
-      val consumer1Underlying = consumer1._consumer
-      assert(consumer1Underlying.isDefined)
-
-      consumer1.release()
-
-      assert(consumerPool.size(key) === 1)
-      // check whether acquired object is available in pool
-      val pooledObj = consumerPool.borrowObject(key, kafkaParams)
-      assert(consumer1Underlying.get.eq(pooledObj))
-      consumerPool.returnObject(pooledObj)
+      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
 
       val context2 = new TaskContextImpl(0, 0, 0, 0, 1, null, null, null)
       TaskContext.setTaskContext(context2)
-      val consumer2 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-
-      // any method call which requires consumer is necessary
-      consumer2.getAvailableOffsetRange()
+      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
 
-      val consumer2Underlying = consumer2._consumer
-      assert(consumer2Underlying.isDefined)
       // here we expect different consumer as pool will invalidate for task reattempt
-      assert(consumer2Underlying.get.ne(consumer1Underlying.get))
+      assert(consumer2Underlying.ne(consumer1Underlying))
+    } finally {
+      TaskContext.unset()
+    }
+  }
 
-      consumer2.release()
+  test("same KafkaDataConsumer instance in case of same token") {
+    try {
+      val kafkaParams = getKafkaParams()
+      val key = new CacheKey(groupId, topicPartition)
+
+      val context = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
+      TaskContext.setTaskContext(context)
+      setSparkEnv(
+        Map(
+          s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+        )
+      )
+      addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
+      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
+
+      assert(consumer2Underlying.eq(consumer1Underlying))
+    } finally {
+      TaskContext.unset()
+    }
+  }
 
-      // The first consumer should be removed from cache, but the consumer after invalidate
-      // should be cached.
-      assert(consumerPool.size(key) === 1)
-      val pooledObj2 = consumerPool.borrowObject(key, kafkaParams)
-      assert(consumer2Underlying.get.eq(pooledObj2))
-      consumerPool.returnObject(pooledObj2)
+  test("new KafkaDataConsumer instance in case of token renewal") {
+    try {
+      val kafkaParams = getKafkaParams()
+      val key = new CacheKey(groupId, topicPartition)
+
+      val context = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
+      TaskContext.setTaskContext(context)
+      setSparkEnv(
+        Map(
+          s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+        )
+      )
+      addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
+      addTokenToUGI(tokenService1, tokenId2, tokenPassword2)
+      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
+
+      assert(consumer2Underlying.ne(consumer1Underlying))
     } finally {
       TaskContext.unset()
     }
   }
 
+  private def initSingleConsumer(
+      kafkaParams: ju.Map[String, Object],
+      key: CacheKey): InternalKafkaConsumer = {
+    val consumer = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+
+    // any method call which requires consumer is necessary
+    consumer.getOrRetrieveConsumer()
+
+    val consumerUnderlying = consumer._consumer
+    assert(consumerUnderlying.isDefined)
+
+    consumer.release()
+
+    assert(consumerPool.size(key) === 1)
+    // check whether acquired object is available in pool
+    val pooledObj = consumerPool.borrowObject(key, kafkaParams)
+    assert(consumerUnderlying.get.eq(pooledObj))
+    consumerPool.returnObject(pooledObj)
+
+    consumerUnderlying.get
+  }
+
   test("SPARK-23623: concurrent use of KafkaDataConsumer") {
-    val data: immutable.IndexedSeq[String] = prepareTestTopicHavingTestMessages(topic)
+    val data: immutable.IndexedSeq[(String, Seq[(String, Array[Byte])])] =
+      prepareTestTopicHavingTestMessages(topic)
 
     val topicPartition = new TopicPartition(topic, 0)
     val kafkaParams = getKafkaParams()
@@ -157,10 +207,22 @@ class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester
       try {
         val range = consumer.getAvailableOffsetRange()
         val rcvd = range.earliest until range.latest map { offset =>
-          val bytes = consumer.get(offset, Long.MaxValue, 10000, failOnDataLoss = false).value()
-          new String(bytes)
+          val record = consumer.get(offset, Long.MaxValue, 10000, failOnDataLoss = false)
+          val value = new String(record.value(), StandardCharsets.UTF_8)
+          val headers = record.headers().toArray.map(header => (header.key(), header.value())).toSeq
+          (value, headers)
+        }
+        data.zip(rcvd).foreach { case (expected, actual) =>
+          // value
+          assert(expected._1 === actual._1)
+          // headers
+          expected._2.zip(actual._2).foreach { case (l, r) =>
+            // header key
+            assert(l._1 === r._1)
+            // header value
+            assert(l._2 === r._2)
+          }
         }
-        assert(rcvd == data)
       } catch {
         case e: Throwable =>
           error = e
@@ -307,9 +369,12 @@ class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester
   }
 
   private def prepareTestTopicHavingTestMessages(topic: String) = {
-    val data = (1 to 1000).map(_.toString)
+    val data = (1 to 1000).map(i => (i.toString, Seq[(String, Array[Byte])]()))
     testUtils.createTopic(topic, 1)
-    testUtils.sendMessages(topic, data.toArray)
+    val messages = data.map { case (value, hdrs) =>
+      new RecordBuilder(topic, value).headers(hdrs).build()
+    }
+    testUtils.sendMessages(messages)
     data
   }
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPoolSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPoolSuite.scala
new file mode 100644
index 0000000000000..97885754f204c
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPoolSuite.scala
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010.producer
+
+import java.{util => ju}
+import java.util.concurrent.{Executors, TimeUnit}
+
+import scala.util.Random
+
+import org.apache.kafka.common.serialization.ByteArraySerializer
+import org.jmock.lib.concurrent.DeterministicScheduler
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.kafka010.{PRODUCER_CACHE_EVICTOR_THREAD_RUN_INTERVAL, PRODUCER_CACHE_TIMEOUT}
+import org.apache.spark.sql.kafka010.producer.InternalKafkaProducerPool.CachedProducerEntry
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.util.ManualClock
+
+class InternalKafkaProducerPoolSuite extends SharedSparkSession {
+
+  private var pool: InternalKafkaProducerPool = _
+
+  protected override def afterEach(): Unit = {
+    if (pool != null) {
+      try {
+        pool.shutdown()
+        pool = null
+      } catch {
+        // ignore as it's known issue, DeterministicScheduler doesn't support shutdown
+        case _: UnsupportedOperationException =>
+      }
+    }
+  }
+
+  test("Should return same cached instance on calling acquire with same params.") {
+    pool = new InternalKafkaProducerPool(new SparkConf())
+
+    val kafkaParams = getTestKafkaParams()
+    val producer = pool.acquire(kafkaParams)
+    val producer2 = pool.acquire(kafkaParams)
+    assert(producer eq producer2)
+
+    val map = pool.getAsMap
+    assert(map.size === 1)
+    val cacheEntry = map.head._2
+    assertCacheEntry(pool, cacheEntry, 2L)
+
+    pool.release(producer)
+    assertCacheEntry(pool, cacheEntry, 1L)
+
+    pool.release(producer2)
+    assertCacheEntry(pool, cacheEntry, 0L)
+
+    val producer3 = pool.acquire(kafkaParams)
+    assertCacheEntry(pool, cacheEntry, 1L)
+    assert(producer eq producer3)
+  }
+
+  test("Should return different cached instances on calling acquire with different params.") {
+    pool = new InternalKafkaProducerPool(new SparkConf())
+
+    val kafkaParams = getTestKafkaParams()
+    val producer = pool.acquire(kafkaParams)
+    kafkaParams.put("acks", "1")
+    val producer2 = pool.acquire(kafkaParams)
+    // With updated conf, a new producer instance should be created.
+    assert(producer ne producer2)
+
+    val map = pool.getAsMap
+    assert(map.size === 2)
+    val cacheEntry = map.find(_._2.producer.id == producer.id).get._2
+    assertCacheEntry(pool, cacheEntry, 1L)
+    val cacheEntry2 = map.find(_._2.producer.id == producer2.id).get._2
+    assertCacheEntry(pool, cacheEntry2, 1L)
+  }
+
+  test("expire instances") {
+    val minEvictableIdleTimeMillis = 2000L
+    val evictorThreadRunIntervalMillis = 500L
+
+    val conf = new SparkConf()
+    conf.set(PRODUCER_CACHE_TIMEOUT, minEvictableIdleTimeMillis)
+    conf.set(PRODUCER_CACHE_EVICTOR_THREAD_RUN_INTERVAL, evictorThreadRunIntervalMillis)
+
+    val scheduler = new DeterministicScheduler()
+    val clock = new ManualClock()
+    pool = new InternalKafkaProducerPool(scheduler, clock, conf)
+
+    val kafkaParams = getTestKafkaParams()
+
+    var map = pool.getAsMap
+    assert(map.isEmpty)
+
+    val producer = pool.acquire(kafkaParams)
+    map = pool.getAsMap
+    assert(map.size === 1)
+
+    clock.advance(minEvictableIdleTimeMillis + 100)
+    scheduler.tick(evictorThreadRunIntervalMillis + 100, TimeUnit.MILLISECONDS)
+    map = pool.getAsMap
+    assert(map.size === 1)
+
+    pool.release(producer)
+
+    // This will clean up expired instance from cache.
+    clock.advance(minEvictableIdleTimeMillis + 100)
+    scheduler.tick(evictorThreadRunIntervalMillis + 100, TimeUnit.MILLISECONDS)
+
+    map = pool.getAsMap
+    assert(map.size === 0)
+  }
+
+  test("reference counting with concurrent access") {
+    pool = new InternalKafkaProducerPool(new SparkConf())
+
+    val kafkaParams = getTestKafkaParams()
+
+    val numThreads = 100
+    val numProducerUsages = 500
+
+    def produce(i: Int): Unit = {
+      val producer = pool.acquire(kafkaParams)
+      try {
+        val map = pool.getAsMap
+        assert(map.size === 1)
+        val cacheEntry = map.head._2
+        assert(cacheEntry.refCount > 0L)
+        assert(cacheEntry.expireAt === Long.MaxValue)
+
+        Thread.sleep(Random.nextInt(100))
+      } finally {
+        pool.release(producer)
+      }
+    }
+
+    val threadpool = Executors.newFixedThreadPool(numThreads)
+    try {
+      val futures = (1 to numProducerUsages).map { i =>
+        threadpool.submit(new Runnable {
+          override def run(): Unit = { produce(i) }
+        })
+      }
+      futures.foreach(_.get(1, TimeUnit.MINUTES))
+    } finally {
+      threadpool.shutdown()
+    }
+
+    val map = pool.getAsMap
+    assert(map.size === 1)
+
+    val cacheEntry = map.head._2
+    assertCacheEntry(pool, cacheEntry, 0L)
+  }
+
+  private def getTestKafkaParams(): ju.HashMap[String, Object] = {
+    val kafkaParams = new ju.HashMap[String, Object]()
+    kafkaParams.put("acks", "0")
+    // Here only host should be resolvable, it does not need a running instance of kafka server.
+    kafkaParams.put("bootstrap.servers", "127.0.0.1:9022")
+    kafkaParams.put("key.serializer", classOf[ByteArraySerializer].getName)
+    kafkaParams.put("value.serializer", classOf[ByteArraySerializer].getName)
+    kafkaParams
+  }
+
+  private def assertCacheEntry(
+      pool: InternalKafkaProducerPool,
+      cacheEntry: CachedProducerEntry,
+      expectedRefCount: Long): Unit = {
+    val timeoutVal = TimeUnit.MILLISECONDS.toNanos(pool.cacheExpireTimeoutMillis)
+    assert(cacheEntry.refCount === expectedRefCount)
+    if (expectedRefCount > 0) {
+      assert(cacheEntry.expireAt === Long.MaxValue)
+    } else {
+      assert(cacheEntry.expireAt <= pool.clock.nanoTime() + timeoutVal)
+    }
+  }
+}
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
index 0c61045d6d487..f54ff0d146f7a 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
@@ -57,6 +57,12 @@ private[spark] case class KafkaConfigUpdater(module: String, kafkaParams: Map[St
   }
 
   def setAuthenticationConfigIfNeeded(): this.type = {
+    val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(SparkEnv.get.conf,
+      kafkaParams(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
+    setAuthenticationConfigIfNeeded(clusterConfig)
+  }
+
+  def setAuthenticationConfigIfNeeded(clusterConfig: Option[KafkaTokenClusterConf]): this.type = {
     // There are multiple possibilities to log in and applied in the following order:
     // - JVM global security provided -> try to log in with JVM global security configuration
     //   which can be configured for example with 'java.security.auth.login.config'.
@@ -66,10 +72,9 @@ private[spark] case class KafkaConfigUpdater(module: String, kafkaParams: Map[St
     if (KafkaTokenUtil.isGlobalJaasConfigurationProvided) {
       logDebug("JVM global security configuration detected, using it for login.")
     } else {
-      val clusterConfig = KafkaTokenUtil.findMatchingToken(SparkEnv.get.conf,
-        map.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
       clusterConfig.foreach { clusterConf =>
         logDebug("Delegation token detected, using it for login.")
+        setIfUnset(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, clusterConf.securityProtocol)
         val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
         set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
         require(clusterConf.tokenMechanism.startsWith("SCRAM"),
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
index e1f3c800a51f8..ed4a6f1e34c55 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
@@ -57,6 +57,7 @@ private [kafka010] object KafkaTokenSparkConf extends Logging {
   val CLUSTERS_CONFIG_PREFIX = "spark.kafka.clusters."
   val DEFAULT_TARGET_SERVERS_REGEX = ".*"
   val DEFAULT_SASL_KERBEROS_SERVICE_NAME = "kafka"
+  val DEFAULT_SECURITY_PROTOCOL_CONFIG = SASL_SSL.name
   val DEFAULT_SASL_TOKEN_MECHANISM = "SCRAM-SHA-512"
 
   def getClusterConfig(sparkConf: SparkConf, identifier: String): KafkaTokenClusterConf = {
@@ -72,7 +73,8 @@ private [kafka010] object KafkaTokenSparkConf extends Logging {
             s"${configPrefix}auth.${CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG}")),
       sparkClusterConf.getOrElse(s"target.${CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG}.regex",
         KafkaTokenSparkConf.DEFAULT_TARGET_SERVERS_REGEX),
-      sparkClusterConf.getOrElse(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, SASL_SSL.name),
+      sparkClusterConf.getOrElse(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
+        DEFAULT_SECURITY_PROTOCOL_CONFIG),
       sparkClusterConf.getOrElse(SaslConfigs.SASL_KERBEROS_SERVICE_NAME,
         KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME),
       sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG),
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index 39e3ac74a9aeb..49109d363ba8f 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -36,8 +36,9 @@ import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, S
 import org.apache.kafka.common.security.scram.ScramLoginModule
 import org.apache.kafka.common.security.token.delegation.DelegationToken
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
@@ -241,8 +242,8 @@ private[spark] object KafkaTokenUtil extends Logging {
         "TOKENID", "HMAC", "OWNER", "RENEWERS", "ISSUEDATE", "EXPIRYDATE", "MAXDATE"))
       val tokenInfo = token.tokenInfo
       logDebug("%-15s %-15s %-15s %-25s %-15s %-15s %-15s".format(
-        REDACTION_REPLACEMENT_TEXT,
         tokenInfo.tokenId,
+        REDACTION_REPLACEMENT_TEXT,
         tokenInfo.owner,
         tokenInfo.renewersAsString,
         dateFormat.format(tokenInfo.issueTimestamp),
@@ -251,7 +252,7 @@ private[spark] object KafkaTokenUtil extends Logging {
     }
   }
 
-  def findMatchingToken(
+  def findMatchingTokenClusterConfig(
       sparkConf: SparkConf,
       bootStrapServers: String): Option[KafkaTokenClusterConf] = {
     val tokens = UserGroupInformation.getCurrentUser().getCredentials.getAllTokens.asScala
@@ -272,6 +273,7 @@ private[spark] object KafkaTokenUtil extends Logging {
   def getTokenJaasParams(clusterConf: KafkaTokenClusterConf): String = {
     val token = UserGroupInformation.getCurrentUser().getCredentials.getToken(
       getTokenService(clusterConf.identifier))
+    require(token != null, s"Token for identifier ${clusterConf.identifier} must exist")
     val username = new String(token.getIdentifier)
     val password = new String(token.getPassword)
 
@@ -288,4 +290,18 @@ private[spark] object KafkaTokenUtil extends Logging {
 
     params
   }
+
+  def needTokenUpdate(
+      sparkConf: SparkConf,
+      params: ju.Map[String, Object],
+      clusterConfig: Option[KafkaTokenClusterConf]): Boolean = {
+    if (HadoopDelegationTokenManager.isServiceEnabled(sparkConf, "kafka") &&
+        clusterConfig.isDefined && params.containsKey(SaslConfigs.SASL_JAAS_CONFIG)) {
+      logDebug("Delegation token used by connector, checking if uses the latest token.")
+      val connectorJaasParams = params.get(SaslConfigs.SASL_JAAS_CONFIG).asInstanceOf[String]
+      getTokenJaasParams(clusterConfig.get) != connectorJaasParams
+    } else {
+      false
+    }
+  }
 }
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
index 7a172892e778c..dc1e7cb8d979e 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
@@ -17,8 +17,13 @@
 
 package org.apache.spark.kafka010
 
+import java.{util => ju}
+
+import scala.collection.JavaConverters._
+
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.config.SaslConfigs
+import org.apache.kafka.common.security.auth.SecurityProtocol.SASL_PLAINTEXT
 
 import org.apache.spark.SparkFunSuite
 
@@ -62,36 +67,64 @@ class KafkaConfigUpdaterSuite extends SparkFunSuite with KafkaDelegationTokenTes
   }
 
   test("setAuthenticationConfigIfNeeded with global security should not set values") {
-    val params = Map.empty[String, String]
+    val params = Map(
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers
+    )
+    setSparkEnv(
+      Map(
+        s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+      )
+    )
     setGlobalKafkaClientConfig()
 
     val updatedParams = KafkaConfigUpdater(testModule, params)
       .setAuthenticationConfigIfNeeded()
       .build()
 
-    assert(updatedParams.size() === 0)
+    assert(updatedParams.asScala === params)
   }
 
   test("setAuthenticationConfigIfNeeded with token should set values") {
     val params = Map(
       CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers
     )
+    testWithTokenSetValues(params) { updatedParams =>
+      assert(updatedParams.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG) ===
+        KafkaTokenSparkConf.DEFAULT_SECURITY_PROTOCOL_CONFIG)
+    }
+  }
+
+  test("setAuthenticationConfigIfNeeded with token should not override user-defined protocol") {
+    val overrideProtocolName = SASL_PLAINTEXT.name
+    val params = Map(
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers,
+      CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> overrideProtocolName
+    )
+    testWithTokenSetValues(params) { updatedParams =>
+      assert(updatedParams.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG) ===
+        overrideProtocolName)
+    }
+  }
+
+  def testWithTokenSetValues(params: Map[String, String])
+      (validate: (ju.Map[String, Object]) => Unit): Unit = {
     setSparkEnv(
       Map(
         s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
       )
     )
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
 
     val updatedParams = KafkaConfigUpdater(testModule, params)
       .setAuthenticationConfigIfNeeded()
       .build()
 
-    assert(updatedParams.size() === 3)
+    assert(updatedParams.size() === 4)
     assert(updatedParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG) === bootStrapServers)
     assert(updatedParams.containsKey(SaslConfigs.SASL_JAAS_CONFIG))
     assert(updatedParams.get(SaslConfigs.SASL_MECHANISM) ===
       KafkaTokenSparkConf.DEFAULT_SASL_TOKEN_MECHANISM)
+    validate(updatedParams)
   }
 
   test("setAuthenticationConfigIfNeeded with invalid mechanism should throw exception") {
@@ -104,7 +137,7 @@ class KafkaConfigUpdaterSuite extends SparkFunSuite with KafkaDelegationTokenTes
         s"spark.kafka.clusters.$identifier1.sasl.token.mechanism" -> "intentionally_invalid"
       )
     )
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
 
     val e = intercept[IllegalArgumentException] {
       KafkaConfigUpdater(testModule, params)
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
index eebbf96afa470..19335f4221e40 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
@@ -37,8 +37,12 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
 
   private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
 
-  protected val tokenId = "tokenId" + ju.UUID.randomUUID().toString
-  protected val tokenPassword = "tokenPassword" + ju.UUID.randomUUID().toString
+  private var savedSparkEnv: SparkEnv = _
+
+  protected val tokenId1 = "tokenId" + ju.UUID.randomUUID().toString
+  protected val tokenPassword1 = "tokenPassword" + ju.UUID.randomUUID().toString
+  protected val tokenId2 = "tokenId" + ju.UUID.randomUUID().toString
+  protected val tokenPassword2 = "tokenPassword" + ju.UUID.randomUUID().toString
 
   protected val identifier1 = "cluster1"
   protected val identifier2 = "cluster2"
@@ -72,11 +76,16 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
     }
   }
 
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    savedSparkEnv = SparkEnv.get
+  }
+
   override def afterEach(): Unit = {
     try {
       Configuration.setConfiguration(null)
-      UserGroupInformation.setLoginUser(null)
-      SparkEnv.set(null)
+      UserGroupInformation.reset()
+      SparkEnv.set(savedSparkEnv)
     } finally {
       super.afterEach()
     }
@@ -86,7 +95,7 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
     Configuration.setConfiguration(new KafkaJaasConfiguration)
   }
 
-  protected def addTokenToUGI(tokenService: Text): Unit = {
+  protected def addTokenToUGI(tokenService: Text, tokenId: String, tokenPassword: String): Unit = {
     val token = new Token[KafkaDelegationTokenIdentifier](
       tokenId.getBytes,
       tokenPassword.getBytes,
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala
index 42a9fb5567b6f..225afbe5f3649 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala
@@ -68,7 +68,7 @@ class KafkaRedactionUtilSuite extends SparkFunSuite with KafkaDelegationTokenTes
   test("redactParams should redact token password from parameters") {
     setSparkEnv(Map.empty)
     val groupId = "id-" + ju.UUID.randomUUID().toString
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
     val clusterConf = createClusterConf(identifier1, SASL_SSL.name)
     val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
     val kafkaParams = Seq(
@@ -81,8 +81,8 @@ class KafkaRedactionUtilSuite extends SparkFunSuite with KafkaDelegationTokenTes
     assert(redactedParams.size === 2)
     assert(redactedParams.get(ConsumerConfig.GROUP_ID_CONFIG).get === groupId)
     val redactedJaasParams = redactedParams.get(SaslConfigs.SASL_JAAS_CONFIG).get
-    assert(redactedJaasParams.contains(tokenId))
-    assert(!redactedJaasParams.contains(tokenPassword))
+    assert(redactedJaasParams.contains(tokenId1))
+    assert(!redactedJaasParams.contains(tokenPassword1))
   }
 
   test("redactParams should redact passwords from parameters") {
@@ -113,13 +113,13 @@ class KafkaRedactionUtilSuite extends SparkFunSuite with KafkaDelegationTokenTes
   }
 
   test("redactJaasParam should redact token password") {
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
     val clusterConf = createClusterConf(identifier1, SASL_SSL.name)
     val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
 
     val redactedJaasParams = KafkaRedactionUtil.redactJaasParam(jaasParams)
 
-    assert(redactedJaasParams.contains(tokenId))
-    assert(!redactedJaasParams.contains(tokenPassword))
+    assert(redactedJaasParams.contains(tokenId1))
+    assert(!redactedJaasParams.contains(tokenPassword1))
   }
 }
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
index 5496195b41490..94f7853003bd9 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
@@ -17,15 +17,18 @@
 
 package org.apache.spark.kafka010
 
+import java.{util => ju}
 import java.security.PrivilegedExceptionAction
 
+import scala.collection.JavaConverters._
+
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.config.{SaslConfigs, SslConfigs}
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
 import org.apache.spark.internal.config._
 
 class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
@@ -174,58 +177,117 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
     assert(KafkaTokenUtil.isGlobalJaasConfigurationProvided)
   }
 
-  test("findMatchingToken without token should return None") {
-    assert(KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers) === None)
+  test("findMatchingTokenClusterConfig without token should return None") {
+    assert(KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf, bootStrapServers) === None)
   }
 
-  test("findMatchingToken with non-matching tokens should return None") {
+  test("findMatchingTokenClusterConfig with non-matching tokens should return None") {
     sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
       nonMatchingTargetServersRegex)
     sparkConf.set(s"spark.kafka.clusters.$identifier2.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier2.target.bootstrap.servers.regex",
       matchingTargetServersRegex)
-    addTokenToUGI(tokenService1)
-    addTokenToUGI(new Text("intentionally_garbage"))
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+    addTokenToUGI(new Text("intentionally_garbage"), tokenId1, tokenPassword1)
 
-    assert(KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers) === None)
+    assert(KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf, bootStrapServers) === None)
   }
 
-  test("findMatchingToken with one matching token should return cluster configuration") {
+  test("findMatchingTokenClusterConfig with one matching token should return token and cluster " +
+    "configuration") {
     sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
       matchingTargetServersRegex)
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
 
-    assert(KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers) ===
-      Some(KafkaTokenSparkConf.getClusterConfig(sparkConf, identifier1)))
+    val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf, bootStrapServers)
+    assert(clusterConfig.get === KafkaTokenSparkConf.getClusterConfig(sparkConf, identifier1))
   }
 
-  test("findMatchingToken with multiple matching tokens should throw exception") {
+  test("findMatchingTokenClusterConfig with multiple matching tokens should throw exception") {
     sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
       matchingTargetServersRegex)
     sparkConf.set(s"spark.kafka.clusters.$identifier2.auth.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier2.target.bootstrap.servers.regex",
       matchingTargetServersRegex)
-    addTokenToUGI(tokenService1)
-    addTokenToUGI(tokenService2)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+    addTokenToUGI(tokenService2, tokenId1, tokenPassword1)
 
     val thrown = intercept[IllegalArgumentException] {
-      KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers)
+      KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf, bootStrapServers)
     }
     assert(thrown.getMessage.contains("More than one delegation token matches"))
   }
 
   test("getTokenJaasParams with token should return scram module") {
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
     val clusterConf = createClusterConf(identifier1, SASL_SSL.name)
 
     val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
 
     assert(jaasParams.contains("ScramLoginModule required"))
     assert(jaasParams.contains("tokenauth=true"))
-    assert(jaasParams.contains(tokenId))
-    assert(jaasParams.contains(tokenPassword))
+    assert(jaasParams.contains(tokenId1))
+    assert(jaasParams.contains(tokenPassword1))
+  }
+
+  test("needTokenUpdate without security credentials enabled should return false") {
+    sparkConf.set(s"spark.security.credentials.kafka.enabled", "false")
+    val kafkaParams = getKafkaParams(addJaasConfig = true, Some("custom_jaas_config"))
+
+    assert(!KafkaTokenUtil.needTokenUpdate(sparkConf, kafkaParams, None))
+  }
+
+  test("needTokenUpdate without cluster config should return false") {
+    val kafkaParams = getKafkaParams(addJaasConfig = true, Some("custom_jaas_config"))
+
+    assert(!KafkaTokenUtil.needTokenUpdate(sparkConf, kafkaParams, None))
+  }
+
+  test("needTokenUpdate without jaas config should return false") {
+    setSparkEnv(Map.empty)
+    val kafkaParams = getKafkaParams(addJaasConfig = false)
+
+    assert(!KafkaTokenUtil.needTokenUpdate(SparkEnv.get.conf, kafkaParams, None))
+  }
+
+  test("needTokenUpdate with same token should return false") {
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+    val kafkaParams = getKafkaParams(addJaasConfig = true)
+    val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf,
+      kafkaParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
+
+    assert(!KafkaTokenUtil.needTokenUpdate(sparkConf, kafkaParams, clusterConfig))
+  }
+
+  test("needTokenUpdate with different token should return true") {
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+    val kafkaParams = getKafkaParams(addJaasConfig = true)
+    addTokenToUGI(tokenService1, tokenId2, tokenPassword2)
+    val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf,
+      kafkaParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
+
+    assert(KafkaTokenUtil.needTokenUpdate(sparkConf, kafkaParams, clusterConfig))
+  }
+
+  private def getKafkaParams(
+      addJaasConfig: Boolean,
+      jaasConfig: Option[String] = None): ju.Map[String, Object] = {
+    var params = Map[String, Object](
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers
+    )
+    if (addJaasConfig) {
+      params ++= Map[String, Object](
+        SaslConfigs.SASL_JAAS_CONFIG -> jaasConfig.getOrElse {
+          val clusterConf = createClusterConf(identifier1, SASL_SSL.name)
+          KafkaTokenUtil.getTokenJaasParams(clusterConf)
+        }
+      )
+    }
+    params.asJava
   }
 }
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 397de87d3cdff..ad7a8b7e23f1d 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -45,6 +45,13 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -104,6 +111,11 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.jmock</groupId>
+      <artifactId>jmock-junit4</artifactId>
+      <scope>test</scope>
+    </dependency>
 
     <!--
       This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
index 88d6d0eea5367..a449a8bb7213e 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
@@ -237,9 +237,10 @@ private[spark] class DirectKafkaInputDStream[K, V](
     val description = offsetRanges.filter { offsetRange =>
       // Don't display empty ranges.
       offsetRange.fromOffset != offsetRange.untilOffset
-    }.map { offsetRange =>
+    }.toSeq.sortBy(-_.count()).map { offsetRange =>
       s"topic: ${offsetRange.topic}\tpartition: ${offsetRange.partition}\t" +
-        s"offsets: ${offsetRange.fromOffset} to ${offsetRange.untilOffset}"
+      s"offsets: ${offsetRange.fromOffset} to ${offsetRange.untilOffset}\t" +
+      s"count: ${offsetRange.count()}"
     }.mkString("\n")
     // Copy offsetRanges to immutable.List to prevent from being modified by the user
     val metadata = Map(
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
index 4d3e476e7cc58..925327d9d58e6 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.streaming.kafka010
 
 import java.io.File
-import java.lang.{ Long => JLong }
-import java.util.{ Arrays, HashMap => JHashMap, Map => JMap, UUID }
+import java.lang.{Long => JLong}
+import java.util.{Arrays, HashMap => JHashMap, Map => JMap, UUID}
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicLong
@@ -31,13 +31,12 @@ import scala.util.Random
 import org.apache.kafka.clients.consumer._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.serialization.StringDeserializer
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.streaming.{Milliseconds, StreamingContext, Time}
+import org.apache.spark.streaming.{LocalStreamingContext, Milliseconds, StreamingContext, Time}
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.scheduler._
 import org.apache.spark.streaming.scheduler.rate.RateEstimator
@@ -45,8 +44,7 @@ import org.apache.spark.util.Utils
 
 class DirectKafkaStreamSuite
   extends SparkFunSuite
-  with BeforeAndAfter
-  with BeforeAndAfterAll
+  with LocalStreamingContext
   with Eventually
   with Logging {
   val sparkConf = new SparkConf()
@@ -56,18 +54,17 @@ class DirectKafkaStreamSuite
     // Otherwise the poll timeout defaults to 2 minutes and causes test cases to run longer.
     .set("spark.streaming.kafka.consumer.poll.ms", "10000")
 
-  private var ssc: StreamingContext = _
   private var testDir: File = _
 
   private var kafkaTestUtils: KafkaTestUtils = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     kafkaTestUtils = new KafkaTestUtils
     kafkaTestUtils.setup()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       if (kafkaTestUtils != null) {
         kafkaTestUtils.teardown()
@@ -78,12 +75,13 @@ class DirectKafkaStreamSuite
     }
   }
 
-  after {
-    if (ssc != null) {
-      ssc.stop(stopSparkContext = true)
-    }
-    if (testDir != null) {
-      Utils.deleteRecursively(testDir)
+  override def afterEach(): Unit = {
+    try {
+      if (testDir != null) {
+        Utils.deleteRecursively(testDir)
+      }
+    } finally {
+      super.afterEach()
     }
   }
 
@@ -342,7 +340,7 @@ class DirectKafkaStreamSuite
     val kafkaParams = getKafkaParams("auto.offset.reset" -> "earliest")
 
     // Send data to Kafka
-    def sendData(data: Seq[Int]) {
+    def sendData(data: Seq[Int]): Unit = {
       val strings = data.map { _.toString}
       kafkaTestUtils.sendMessages(topic, strings.map { _ -> 1}.toMap)
     }
@@ -434,7 +432,7 @@ class DirectKafkaStreamSuite
     val committed = new ConcurrentHashMap[TopicPartition, OffsetAndMetadata]()
 
     // Send data to Kafka and wait for it to be received
-    def sendDataAndWaitForReceive(data: Seq[Int]) {
+    def sendDataAndWaitForReceive(data: Seq[Int]): Unit = {
       val strings = data.map { _.toString}
       kafkaTestUtils.sendMessages(topic, strings.map { _ -> 1}.toMap)
       eventually(timeout(10.seconds), interval(50.milliseconds)) {
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
index 431473e7f1d38..82913cf416a5f 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
@@ -27,7 +27,7 @@ import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
 import org.mockito.Mockito.when
 import org.scalatest.BeforeAndAfterAll
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
index 47bc8fec2c80c..d6123e16dd238 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
@@ -47,14 +47,14 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   private var sc: SparkContext = _
 
-  override def beforeAll {
+  override def beforeAll: Unit = {
     super.beforeAll()
     sc = new SparkContext(sparkConf)
     kafkaTestUtils = new KafkaTestUtils
     kafkaTestUtils.setup()
   }
 
-  override def afterAll {
+  override def afterAll: Unit = {
     try {
       try {
         if (sc != null) {
@@ -81,7 +81,8 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   private val preferredHosts = LocationStrategies.PreferConsistent
 
-  private def compactLogs(topic: String, partition: Int, messages: Array[(String, String)]) {
+  private def compactLogs(topic: String, partition: Int,
+      messages: Array[(String, String)]): Unit = {
     val mockTime = new MockTime()
     val logs = new Pool[TopicPartition, Log]()
     val logDir = kafkaTestUtils.brokerLogDir
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
index 5dec9709011e6..0783e591def51 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
@@ -27,13 +27,14 @@ import scala.annotation.tailrec
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
-import kafka.admin.AdminUtils
 import kafka.api.Request
 import kafka.server.{KafkaConfig, KafkaServer}
-import kafka.utils.ZkUtils
+import kafka.zk.{AdminZkClient, KafkaZkClient}
 import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
+import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.serialization.StringSerializer
+import org.apache.kafka.common.utils.{Time => KTime}
 import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
 
 import org.apache.spark.{SparkConf, SparkException}
@@ -57,7 +58,8 @@ private[kafka010] class KafkaTestUtils extends Logging {
 
   private var zookeeper: EmbeddedZookeeper = _
 
-  private var zkUtils: ZkUtils = _
+  private var zkClient: KafkaZkClient = _
+  private var admClient: AdminZkClient = _
 
   // Kafka broker related configurations
   private val brokerHost = "127.0.0.1"
@@ -85,19 +87,27 @@ private[kafka010] class KafkaTestUtils extends Logging {
     s"$brokerHost:$brokerPort"
   }
 
-  def zookeeperClient: ZkUtils = {
+  def zookeeperClient: KafkaZkClient = {
     assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper client")
-    Option(zkUtils).getOrElse(
+    Option(zkClient).getOrElse(
       throw new IllegalStateException("Zookeeper client is not yet initialized"))
   }
 
+  def adminClient: AdminZkClient = {
+    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper client")
+    Option(admClient).getOrElse(
+      throw new IllegalStateException("Admin client is not yet initialized"))
+  }
+
   // Set up the Embedded Zookeeper server and get the proper Zookeeper port
   private def setupEmbeddedZookeeper(): Unit = {
     // Zookeeper server startup
     zookeeper = new EmbeddedZookeeper(s"$zkHost:$zkPort")
     // Get the actual zookeeper binding port
     zkPort = zookeeper.actualPort
-    zkUtils = ZkUtils(s"$zkHost:$zkPort", zkSessionTimeout, zkConnectionTimeout, false)
+    zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout,
+      zkConnectionTimeout, 1, KTime.SYSTEM)
+    admClient = new AdminZkClient(zkClient)
     zkReady = true
   }
 
@@ -162,9 +172,9 @@ private[kafka010] class KafkaTestUtils extends Logging {
       }
     }
 
-    if (zkUtils != null) {
-      zkUtils.close()
-      zkUtils = null
+    if (zkClient != null) {
+      zkClient.close()
+      zkClient = null
     }
 
     if (zookeeper != null) {
@@ -175,7 +185,7 @@ private[kafka010] class KafkaTestUtils extends Logging {
 
   /** Create a Kafka topic and wait until it is propagated to the whole cluster */
   def createTopic(topic: String, partitions: Int, config: Properties): Unit = {
-    AdminUtils.createTopic(zkUtils, topic, partitions, 1, config)
+    adminClient.createTopic(topic, partitions, 1, config)
     // wait until metadata is propagated
     (0 until partitions).foreach { p =>
       waitUntilMetadataIsPropagated(topic, p)
@@ -289,9 +299,9 @@ private[kafka010] class KafkaTestUtils extends Logging {
     def isPropagated = server.dataPlaneRequestProcessor.metadataCache
         .getPartitionInfo(topic, partition) match {
       case Some(partitionState) =>
-        val leader = partitionState.basePartitionState.leader
-        val isr = partitionState.basePartitionState.isr
-        zkUtils.getLeaderForPartition(topic, partition).isDefined &&
+        val leader = partitionState.leader
+        val isr = partitionState.isr
+        zkClient.getLeaderForPartition(new TopicPartition(topic, partition)).isDefined &&
           Request.isValidBrokerId(leader) && !isr.isEmpty
       case _ =>
         false
@@ -316,7 +326,7 @@ private[kafka010] class KafkaTestUtils extends Logging {
 
     val actualPort = factory.getLocalPort
 
-    def shutdown() {
+    def shutdown(): Unit = {
       factory.shutdown()
       // The directories are not closed even if the ZooKeeper server is shut down.
       // Please see ZOOKEEPER-1844, which is fixed in 3.4.6+. It leads to test failures
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
index 4811d041e7e9e..ac81f92f86109 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.streaming.kafka010.mocks
 
-import java.util.concurrent.TimeUnit
+import java.util.concurrent.{ScheduledFuture, TimeUnit}
 
 import scala.collection.mutable.PriorityQueue
 
 import kafka.utils.Scheduler
 import org.apache.kafka.common.utils.Time
+import org.jmock.lib.concurrent.DeterministicScheduler
 
 /**
  * A mock scheduler that executes tasks synchronously using a mock time instance.
@@ -41,36 +42,18 @@ import org.apache.kafka.common.utils.Time
  */
 private[kafka010] class MockScheduler(val time: Time) extends Scheduler {
 
-  /* a priority queue of tasks ordered by next execution time */
-  var tasks = new PriorityQueue[MockTask]()
+  val scheduler = new DeterministicScheduler()
 
   def isStarted: Boolean = true
 
   def startup(): Unit = {}
 
   def shutdown(): Unit = synchronized {
-    tasks.foreach(_.fun())
-    tasks.clear()
+    scheduler.runUntilIdle()
   }
 
-  /**
-   * Check for any tasks that need to execute. Since this is a mock scheduler this check only occurs
-   * when this method is called and the execution happens synchronously in the calling thread.
-   * If you are using the scheduler associated with a MockTime instance this call
-   * will be triggered automatically.
-   */
-  def tick(): Unit = synchronized {
-    val now = time.milliseconds
-    while(!tasks.isEmpty && tasks.head.nextExecution <= now) {
-      /* pop and execute the task with the lowest next execution time */
-      val curr = tasks.dequeue
-      curr.fun()
-      /* if the task is periodic, reschedule it and re-enqueue */
-      if(curr.periodic) {
-        curr.nextExecution += curr.period
-        this.tasks += curr
-      }
-    }
+  def tick(duration: Long, timeUnit: TimeUnit): Unit = synchronized {
+    scheduler.tick(duration, timeUnit)
   }
 
   def schedule(
@@ -78,20 +61,14 @@ private[kafka010] class MockScheduler(val time: Time) extends Scheduler {
       fun: () => Unit,
       delay: Long = 0,
       period: Long = -1,
-      unit: TimeUnit = TimeUnit.MILLISECONDS): Unit = synchronized {
-    tasks += MockTask(name, fun, time.milliseconds + delay, period = period)
-    tick()
-  }
-
-}
-
-case class MockTask(
-    val name: String,
-    val fun: () => Unit,
-    var nextExecution: Long,
-    val period: Long) extends Ordered[MockTask] {
-  def periodic: Boolean = period >= 0
-  def compare(t: MockTask): Int = {
-    java.lang.Long.compare(t.nextExecution, nextExecution)
+      unit: TimeUnit = TimeUnit.MILLISECONDS): ScheduledFuture[_] = synchronized {
+    val runnable = new Runnable {
+      override def run(): Unit = fun()
+    }
+    if (period >= 0) {
+      scheduler.scheduleAtFixedRate(runnable, delay, period, unit)
+    } else {
+      scheduler.schedule(runnable, delay, unit)
+    }
   }
 }
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala
index dedd691cd1b23..36ce8a2fd6b65 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala
@@ -45,9 +45,9 @@ private[kafka010] class MockTime(@volatile private var currentMs: Long) extends
   override def nanoseconds: Long =
     TimeUnit.NANOSECONDS.convert(currentMs, TimeUnit.MILLISECONDS)
 
-  override def sleep(ms: Long) {
+  override def sleep(ms: Long): Unit = {
     this.currentMs += ms
-    scheduler.tick()
+    scheduler.tick(ms, TimeUnit.MILLISECONDS)
   }
 
   override def waitObject(obj: Any, condition: Supplier[lang.Boolean], timeoutMs: Long): Unit =
diff --git a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
index 86c42df9e8435..31ca2fe5c95ff 100644
--- a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
+++ b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
@@ -32,13 +32,14 @@
 import org.apache.spark.streaming.api.java.JavaDStream;
 import org.apache.spark.streaming.api.java.JavaPairDStream;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
-import org.apache.spark.streaming.kinesis.KinesisUtils;
 
+import org.apache.spark.streaming.kinesis.KinesisInitialPositions;
+import org.apache.spark.streaming.kinesis.KinesisInputDStream;
 import scala.Tuple2;
+import scala.reflect.ClassTag$;
 
 import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
 import com.amazonaws.services.kinesis.AmazonKinesisClient;
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
 
 /**
  * Consumes messages from a Amazon Kinesis streams and does wordcount.
@@ -135,11 +136,19 @@ public static void main(String[] args) throws Exception {
     // Create the Kinesis DStreams
     List<JavaDStream<byte[]>> streamsList = new ArrayList<>(numStreams);
     for (int i = 0; i < numStreams; i++) {
-      streamsList.add(
-          KinesisUtils.createStream(jssc, kinesisAppName, streamName, endpointUrl, regionName,
-              InitialPositionInStream.LATEST, kinesisCheckpointInterval,
-              StorageLevel.MEMORY_AND_DISK_2())
-      );
+      streamsList.add(JavaDStream.fromDStream(
+          KinesisInputDStream.builder()
+              .streamingContext(jssc)
+              .checkpointAppName(kinesisAppName)
+              .streamName(streamName)
+              .endpointUrl(endpointUrl)
+              .regionName(regionName)
+              .initialPosition(new KinesisInitialPositions.Latest())
+              .checkpointInterval(kinesisCheckpointInterval)
+              .storageLevel(StorageLevel.MEMORY_AND_DISK_2())
+              .build(),
+          ClassTag$.MODULE$.apply(byte[].class)
+      ));
     }
 
     // Union all the streams if there is more than 1 stream
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index fcb790e3ea1f9..a5d5ac769b28d 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -73,7 +73,7 @@ import org.apache.spark.streaming.kinesis.KinesisInputDStream
  * the Kinesis Spark Streaming integration.
  */
 object KinesisWordCountASL extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // Check that all required args were passed in.
     if (args.length != 3) {
       System.err.println(
@@ -178,7 +178,7 @@ object KinesisWordCountASL extends Logging {
  *         https://kinesis.us-east-1.amazonaws.com us-east-1 10 5
  */
 object KinesisWordProducerASL {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 4) {
       System.err.println(
         """
@@ -269,7 +269,7 @@ object KinesisWordProducerASL {
  */
 private[streaming] object StreamingExamples extends Logging {
   // Set reasonable logging levels for streaming if the user has not configured log4j.
-  def setStreamingLogLevels() {
+  def setStreamingLogLevels(): Unit = {
     val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
     if (!log4jInitialized) {
       // We first log something to initialize Spark's default logging, then we override the
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
index 5fb83b26f8382..11e949536f2b6 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
@@ -68,7 +68,7 @@ private[kinesis] class KinesisCheckpointer(
     if (checkpointer != null) {
       try {
         // We must call `checkpoint()` with no parameter to finish reading shards.
-        // See an URL below for details:
+        // See a URL below for details:
         // https://forums.aws.amazon.com/thread.jspa?threadID=244218
         KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100)
       } catch {
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
index 608da0b8bf563..8c3931a1c87fd 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
@@ -19,7 +19,9 @@ package org.apache.spark.streaming.kinesis
 
 import scala.reflect.ClassTag
 
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import collection.JavaConverters._
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{InitialPositionInStream, KinesisClientLibConfiguration}
+import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 import com.amazonaws.services.kinesis.model.Record
 
 import org.apache.spark.rdd.RDD
@@ -43,7 +45,9 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
     val messageHandler: Record => T,
     val kinesisCreds: SparkAWSCredentials,
     val dynamoDBCreds: Option[SparkAWSCredentials],
-    val cloudWatchCreds: Option[SparkAWSCredentials]
+    val cloudWatchCreds: Option[SparkAWSCredentials],
+    val metricsLevel: MetricsLevel,
+    val metricsEnabledDimensions: Set[String]
   ) extends ReceiverInputDStream[T](_ssc) {
 
   import KinesisReadConfigurations._
@@ -79,7 +83,8 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
   override def getReceiver(): Receiver[T] = {
     new KinesisReceiver(streamName, endpointUrl, regionName, initialPosition,
       checkpointAppName, checkpointInterval, _storageLevel, messageHandler,
-      kinesisCreds, dynamoDBCreds, cloudWatchCreds)
+      kinesisCreds, dynamoDBCreds, cloudWatchCreds,
+      metricsLevel, metricsEnabledDimensions)
   }
 }
 
@@ -104,6 +109,8 @@ object KinesisInputDStream {
     private var kinesisCredsProvider: Option[SparkAWSCredentials] = None
     private var dynamoDBCredsProvider: Option[SparkAWSCredentials] = None
     private var cloudWatchCredsProvider: Option[SparkAWSCredentials] = None
+    private var metricsLevel: Option[MetricsLevel] = None
+    private var metricsEnabledDimensions: Option[Set[String]] = None
 
     /**
      * Sets the StreamingContext that will be used to construct the Kinesis DStream. This is a
@@ -237,6 +244,7 @@ object KinesisInputDStream {
      * endpoint. Defaults to [[DefaultCredentialsProvider]] if no custom value is specified.
      *
      * @param credentials [[SparkAWSCredentials]] to use for Kinesis authentication
+     * @return Reference to this [[KinesisInputDStream.Builder]]
      */
     def kinesisCredentials(credentials: SparkAWSCredentials): Builder = {
       kinesisCredsProvider = Option(credentials)
@@ -248,6 +256,7 @@ object KinesisInputDStream {
      * endpoint. Will use the same credentials used for AWS Kinesis if no custom value is set.
      *
      * @param credentials [[SparkAWSCredentials]] to use for DynamoDB authentication
+     * @return Reference to this [[KinesisInputDStream.Builder]]
      */
     def dynamoDBCredentials(credentials: SparkAWSCredentials): Builder = {
       dynamoDBCredsProvider = Option(credentials)
@@ -259,12 +268,43 @@ object KinesisInputDStream {
      * endpoint. Will use the same credentials used for AWS Kinesis if no custom value is set.
      *
      * @param credentials [[SparkAWSCredentials]] to use for CloudWatch authentication
+     * @return Reference to this [[KinesisInputDStream.Builder]]
      */
     def cloudWatchCredentials(credentials: SparkAWSCredentials): Builder = {
       cloudWatchCredsProvider = Option(credentials)
       this
     }
 
+    /**
+     * Sets the CloudWatch metrics level. Defaults to
+     * [[KinesisClientLibConfiguration.DEFAULT_METRICS_LEVEL]] if no custom value is specified.
+     *
+     * @param metricsLevel [[MetricsLevel]] to specify the CloudWatch metrics level
+     * @return Reference to this [[KinesisInputDStream.Builder]]
+     * @see
+     * [[https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html#metric-levels]]
+     */
+    def metricsLevel(metricsLevel: MetricsLevel): Builder = {
+      this.metricsLevel = Option(metricsLevel)
+      this
+    }
+
+    /**
+     * Sets the enabled CloudWatch metrics dimensions. Defaults to
+     * [[KinesisClientLibConfiguration.DEFAULT_METRICS_ENABLED_DIMENSIONS]]
+     * if no custom value is specified.
+     *
+     * @param metricsEnabledDimensions Set[String] to specify which CloudWatch metrics dimensions
+     *   should be enabled
+     * @return Reference to this [[KinesisInputDStream.Builder]]
+     * @see
+     * [[https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html#metric-levels]]
+     */
+    def metricsEnabledDimensions(metricsEnabledDimensions: Set[String]): Builder = {
+      this.metricsEnabledDimensions = Option(metricsEnabledDimensions)
+      this
+    }
+
     /**
      * Create a new instance of [[KinesisInputDStream]] with configured parameters and the provided
      * message handler.
@@ -287,7 +327,9 @@ object KinesisInputDStream {
         ssc.sc.clean(handler),
         kinesisCredsProvider.getOrElse(DefaultCredentials),
         dynamoDBCredsProvider,
-        cloudWatchCredsProvider)
+        cloudWatchCredsProvider,
+        metricsLevel.getOrElse(DEFAULT_METRICS_LEVEL),
+        metricsEnabledDimensions.getOrElse(DEFAULT_METRICS_ENABLED_DIMENSIONS))
     }
 
     /**
@@ -324,4 +366,8 @@ object KinesisInputDStream {
   private[kinesis] val DEFAULT_KINESIS_REGION_NAME: String = "us-east-1"
   private[kinesis] val DEFAULT_INITIAL_POSITION: KinesisInitialPosition = new Latest()
   private[kinesis] val DEFAULT_STORAGE_LEVEL: StorageLevel = StorageLevel.MEMORY_AND_DISK_2
+  private[kinesis] val DEFAULT_METRICS_LEVEL: MetricsLevel =
+    KinesisClientLibConfiguration.DEFAULT_METRICS_LEVEL
+  private[kinesis] val DEFAULT_METRICS_ENABLED_DIMENSIONS: Set[String] =
+    KinesisClientLibConfiguration.DEFAULT_METRICS_ENABLED_DIMENSIONS.asScala.toSet
 }
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index 69c52365b1bf8..6feb8f1b5598f 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -25,6 +25,7 @@ import scala.util.control.NonFatal
 
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.{IRecordProcessor, IRecordProcessorCheckpointer, IRecordProcessorFactory}
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{KinesisClientLibConfiguration, Worker}
+import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 import com.amazonaws.services.kinesis.model.Record
 
 import org.apache.spark.internal.Logging
@@ -92,7 +93,9 @@ private[kinesis] class KinesisReceiver[T](
     messageHandler: Record => T,
     kinesisCreds: SparkAWSCredentials,
     dynamoDBCreds: Option[SparkAWSCredentials],
-    cloudWatchCreds: Option[SparkAWSCredentials])
+    cloudWatchCreds: Option[SparkAWSCredentials],
+    metricsLevel: MetricsLevel,
+    metricsEnabledDimensions: Set[String])
   extends Receiver[T](storageLevel) with Logging { receiver =>
 
   /*
@@ -143,7 +146,7 @@ private[kinesis] class KinesisReceiver[T](
    * This is called when the KinesisReceiver starts and must be non-blocking.
    * The KCL creates and manages the receiving/processing thread pool through Worker.run().
    */
-  override def onStart() {
+  override def onStart(): Unit = {
     blockGenerator = supervisor.createBlockGenerator(new GeneratedBlockHandler)
 
     workerId = Utils.localHostName() + ":" + UUID.randomUUID()
@@ -162,6 +165,8 @@ private[kinesis] class KinesisReceiver[T](
         .withKinesisEndpoint(endpointUrl)
         .withTaskBackoffTimeMillis(500)
         .withRegionName(regionName)
+        .withMetricsLevel(metricsLevel)
+        .withMetricsEnabledDimensions(metricsEnabledDimensions.asJava)
 
       // Update the Kinesis client lib config with timestamp
       // if InitialPositionInStream.AT_TIMESTAMP is passed
@@ -211,7 +216,7 @@ private[kinesis] class KinesisReceiver[T](
    * The KCL worker.shutdown() method stops the receiving/processing threads.
    * The KCL will do its best to drain and checkpoint any in-flight records upon shutdown.
    */
-  override def onStop() {
+  override def onStop(): Unit = {
     if (workerThread != null) {
       if (worker != null) {
         worker.shutdown()
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index 8c6a399dd763e..b35573e92e168 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -51,7 +51,7 @@ private[kinesis] class KinesisRecordProcessor[T](receiver: KinesisReceiver[T], w
    *
    * @param shardId assigned by the KCL to this particular RecordProcessor.
    */
-  override def initialize(shardId: String) {
+  override def initialize(shardId: String): Unit = {
     this.shardId = shardId
     logInfo(s"Initialized workerId $workerId with shardId $shardId")
   }
@@ -65,7 +65,8 @@ private[kinesis] class KinesisRecordProcessor[T](receiver: KinesisReceiver[T], w
    * @param checkpointer used to update Kinesis when this batch has been processed/stored
    *   in the DStream
    */
-  override def processRecords(batch: List[Record], checkpointer: IRecordProcessorCheckpointer) {
+  override def processRecords(batch: List[Record],
+      checkpointer: IRecordProcessorCheckpointer): Unit = {
     if (!receiver.isStopped()) {
       try {
         // Limit the number of processed records from Kinesis stream. This is because the KCL cannot
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
deleted file mode 100644
index c60b9896a3473..0000000000000
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.streaming.kinesis
-
-import scala.reflect.ClassTag
-
-import com.amazonaws.regions.RegionUtils
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
-import com.amazonaws.services.kinesis.model.Record
-
-import org.apache.spark.api.java.function.{Function => JFunction}
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{Duration, StreamingContext}
-import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext}
-import org.apache.spark.streaming.dstream.ReceiverInputDStream
-
-object KinesisUtils {
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   *
-   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T: ClassTag](
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: Record => T): ReceiverInputDStream[T] = {
-    val cleanedHandler = ssc.sc.clean(messageHandler)
-    // Setting scope to override receiver stream's scope of "receiver stream"
-    ssc.withNamedScope("kinesis stream") {
-      new KinesisInputDStream[T](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        cleanedHandler, DefaultCredentials, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  // scalastyle:off
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T: ClassTag](
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: Record => T,
-      awsAccessKeyId: String,
-      awsSecretKey: String): ReceiverInputDStream[T] = {
-    // scalastyle:on
-    val cleanedHandler = ssc.sc.clean(messageHandler)
-    ssc.withNamedScope("kinesis stream") {
-      val kinesisCredsProvider = BasicCredentials(
-        awsAccessKeyId = awsAccessKeyId,
-        awsSecretKey = awsSecretKey)
-      new KinesisInputDStream[T](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        cleanedHandler, kinesisCredsProvider, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
-   *                         Kinesis stream.
-   * @param stsSessionName Name to uniquely identify STS sessions if multiple principals assume
-   *                       the same role.
-   * @param stsExternalId External ID that can be used to validate against the assumed IAM role's
-   *                      trust policy.
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  // scalastyle:off
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T: ClassTag](
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: Record => T,
-      awsAccessKeyId: String,
-      awsSecretKey: String,
-      stsAssumeRoleArn: String,
-      stsSessionName: String,
-      stsExternalId: String): ReceiverInputDStream[T] = {
-    // scalastyle:on
-    val cleanedHandler = ssc.sc.clean(messageHandler)
-    ssc.withNamedScope("kinesis stream") {
-      val kinesisCredsProvider = STSCredentials(
-        stsRoleArn = stsAssumeRoleArn,
-        stsSessionName = stsSessionName,
-        stsExternalId = Option(stsExternalId),
-        longLivedCreds = BasicCredentials(
-          awsAccessKeyId = awsAccessKeyId,
-          awsSecretKey = awsSecretKey))
-      new KinesisInputDStream[T](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        cleanedHandler, kinesisCredsProvider, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   *
-   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream(
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel): ReceiverInputDStream[Array[Byte]] = {
-    // Setting scope to override receiver stream's scope of "receiver stream"
-    ssc.withNamedScope("kinesis stream") {
-      new KinesisInputDStream[Array[Byte]](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        KinesisInputDStream.defaultMessageHandler, DefaultCredentials, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream(
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      awsAccessKeyId: String,
-      awsSecretKey: String): ReceiverInputDStream[Array[Byte]] = {
-    ssc.withNamedScope("kinesis stream") {
-      val kinesisCredsProvider = BasicCredentials(
-        awsAccessKeyId = awsAccessKeyId,
-        awsSecretKey = awsSecretKey)
-      new KinesisInputDStream[Array[Byte]](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        KinesisInputDStream.defaultMessageHandler, kinesisCredsProvider, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param recordClass Class of the records in DStream
-   *
-   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T](
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: JFunction[Record, T],
-      recordClass: Class[T]): JavaReceiverInputDStream[T] = {
-    implicit val recordCmt: ClassTag[T] = ClassTag(recordClass)
-    val cleanedHandler = jssc.sparkContext.clean(messageHandler.call(_))
-    createStream[T](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel, cleanedHandler)
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param recordClass Class of the records in DStream
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  // scalastyle:off
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T](
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: JFunction[Record, T],
-      recordClass: Class[T],
-      awsAccessKeyId: String,
-      awsSecretKey: String): JavaReceiverInputDStream[T] = {
-    // scalastyle:on
-    implicit val recordCmt: ClassTag[T] = ClassTag(recordClass)
-    val cleanedHandler = jssc.sparkContext.clean(messageHandler.call(_))
-    createStream[T](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel, cleanedHandler,
-      awsAccessKeyId, awsSecretKey)
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param recordClass Class of the records in DStream
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
-   *                         Kinesis stream.
-   * @param stsSessionName Name to uniquely identify STS sessions if multiple princpals assume
-   *                       the same role.
-   * @param stsExternalId External ID that can be used to validate against the assumed IAM role's
-   *                      trust policy.
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  // scalastyle:off
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T](
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: JFunction[Record, T],
-      recordClass: Class[T],
-      awsAccessKeyId: String,
-      awsSecretKey: String,
-      stsAssumeRoleArn: String,
-      stsSessionName: String,
-      stsExternalId: String): JavaReceiverInputDStream[T] = {
-    // scalastyle:on
-    implicit val recordCmt: ClassTag[T] = ClassTag(recordClass)
-    val cleanedHandler = jssc.sparkContext.clean(messageHandler.call(_))
-    createStream[T](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel, cleanedHandler,
-      awsAccessKeyId, awsSecretKey, stsAssumeRoleArn, stsSessionName, stsExternalId)
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   *
-   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream(
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel
-    ): JavaReceiverInputDStream[Array[Byte]] = {
-    createStream[Array[Byte]](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel,
-      KinesisInputDStream.defaultMessageHandler(_))
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream(
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      awsAccessKeyId: String,
-      awsSecretKey: String): JavaReceiverInputDStream[Array[Byte]] = {
-    createStream[Array[Byte]](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel,
-      KinesisInputDStream.defaultMessageHandler(_), awsAccessKeyId, awsSecretKey)
-  }
-
-  private def validateRegion(regionName: String): String = {
-    Option(RegionUtils.getRegion(regionName)).map { _.getName }.getOrElse {
-      throw new IllegalArgumentException(s"Region name '$regionName' is not valid")
-    }
-  }
-}
-
-/**
- * This is a helper class that wraps the methods in KinesisUtils into more Python-friendly class and
- * function so that it can be easily instantiated and called from Python's KinesisUtils.
- */
-private class KinesisUtilsPythonHelper {
-
-  def getInitialPositionInStream(initialPositionInStream: Int): InitialPositionInStream = {
-    initialPositionInStream match {
-      case 0 => InitialPositionInStream.LATEST
-      case 1 => InitialPositionInStream.TRIM_HORIZON
-      case _ => throw new IllegalArgumentException(
-        "Illegal InitialPositionInStream. Please use " +
-          "InitialPositionInStream.LATEST or InitialPositionInStream.TRIM_HORIZON")
-    }
-  }
-
-  // scalastyle:off
-  def createStream(
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: Int,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      awsAccessKeyId: String,
-      awsSecretKey: String,
-      stsAssumeRoleArn: String,
-      stsSessionName: String,
-      stsExternalId: String): JavaReceiverInputDStream[Array[Byte]] = {
-    // scalastyle:on
-    if (!(stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null)
-        && !(stsAssumeRoleArn == null && stsSessionName == null && stsExternalId == null)) {
-      throw new IllegalArgumentException("stsAssumeRoleArn, stsSessionName, and stsExtenalId " +
-        "must all be defined or all be null")
-    }
-
-    if (stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null) {
-      validateAwsCreds(awsAccessKeyId, awsSecretKey)
-      KinesisUtils.createStream(jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-        getInitialPositionInStream(initialPositionInStream), checkpointInterval, storageLevel,
-        KinesisInputDStream.defaultMessageHandler(_), awsAccessKeyId, awsSecretKey,
-        stsAssumeRoleArn, stsSessionName, stsExternalId)
-    } else {
-      validateAwsCreds(awsAccessKeyId, awsSecretKey)
-      if (awsAccessKeyId == null && awsSecretKey == null) {
-        KinesisUtils.createStream(jssc, kinesisAppName, streamName, endpointUrl, regionName,
-          getInitialPositionInStream(initialPositionInStream), checkpointInterval, storageLevel)
-      } else {
-        KinesisUtils.createStream(jssc, kinesisAppName, streamName, endpointUrl, regionName,
-          getInitialPositionInStream(initialPositionInStream), checkpointInterval, storageLevel,
-          awsAccessKeyId, awsSecretKey)
-      }
-    }
-  }
-
-  // Throw IllegalArgumentException unless both values are null or neither are.
-  private def validateAwsCreds(awsAccessKeyId: String, awsSecretKey: String) {
-    if (awsAccessKeyId == null && awsSecretKey != null) {
-      throw new IllegalArgumentException("awsSecretKey is set but awsAccessKeyId is null")
-    }
-    if (awsAccessKeyId != null && awsSecretKey == null) {
-      throw new IllegalArgumentException("awsAccessKeyId is set but awsSecretKey is null")
-    }
-  }
-}
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
new file mode 100644
index 0000000000000..c89dedd3366d1
--- /dev/null
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Duration
+import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext}
+
+/**
+ * This is a helper class that wraps the methods in KinesisUtils into more Python-friendly class and
+ * function so that it can be easily instantiated and called from Python's KinesisUtils.
+ */
+private class KinesisUtilsPythonHelper {
+
+  // scalastyle:off
+  def createStream(
+      jssc: JavaStreamingContext,
+      kinesisAppName: String,
+      streamName: String,
+      endpointUrl: String,
+      regionName: String,
+      initialPositionInStream: Int,
+      checkpointInterval: Duration,
+      storageLevel: StorageLevel,
+      awsAccessKeyId: String,
+      awsSecretKey: String,
+      stsAssumeRoleArn: String,
+      stsSessionName: String,
+      stsExternalId: String): JavaReceiverInputDStream[Array[Byte]] = {
+    // scalastyle:on
+    if (!(stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null)
+        && !(stsAssumeRoleArn == null && stsSessionName == null && stsExternalId == null)) {
+      throw new IllegalArgumentException("stsAssumeRoleArn, stsSessionName, and stsExtenalId " +
+        "must all be defined or all be null")
+    }
+    if (awsAccessKeyId == null && awsSecretKey != null) {
+      throw new IllegalArgumentException("awsSecretKey is set but awsAccessKeyId is null")
+    }
+    if (awsAccessKeyId != null && awsSecretKey == null) {
+      throw new IllegalArgumentException("awsAccessKeyId is set but awsSecretKey is null")
+    }
+
+    val kinesisInitialPosition = initialPositionInStream match {
+      case 0 => InitialPositionInStream.LATEST
+      case 1 => InitialPositionInStream.TRIM_HORIZON
+      case _ => throw new IllegalArgumentException(
+        "Illegal InitialPositionInStream. Please use " +
+          "InitialPositionInStream.LATEST or InitialPositionInStream.TRIM_HORIZON")
+    }
+
+    val builder = KinesisInputDStream.builder.
+      streamingContext(jssc).
+      checkpointAppName(kinesisAppName).
+      streamName(streamName).
+      endpointUrl(endpointUrl).
+      regionName(regionName).
+      initialPosition(KinesisInitialPositions.fromKinesisInitialPosition(kinesisInitialPosition)).
+      checkpointInterval(checkpointInterval).
+      storageLevel(storageLevel)
+
+    if (stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null) {
+      val kinesisCredsProvider = STSCredentials(
+        stsAssumeRoleArn, stsSessionName, Option(stsExternalId),
+        BasicCredentials(awsAccessKeyId, awsSecretKey))
+      builder.
+        kinesisCredentials(kinesisCredsProvider).
+        buildWithMessageHandler(KinesisInputDStream.defaultMessageHandler)
+    } else {
+      if (awsAccessKeyId == null && awsSecretKey == null) {
+        builder.build()
+      } else {
+        builder.kinesisCredentials(BasicCredentials(awsAccessKeyId, awsSecretKey)).build()
+      }
+    }
+  }
+
+}
diff --git a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java b/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
index 03becd73d1a06..7af0abe0e8d90 100644
--- a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
+++ b/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
@@ -18,12 +18,14 @@
 package org.apache.spark.streaming.kinesis;
 
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+import org.junit.Assert;
+import org.junit.Test;
+
 import org.apache.spark.streaming.kinesis.KinesisInitialPositions.TrimHorizon;
 import org.apache.spark.storage.StorageLevel;
 import org.apache.spark.streaming.Duration;
 import org.apache.spark.streaming.LocalJavaStreamingContext;
 import org.apache.spark.streaming.Seconds;
-import org.junit.Test;
 
 public class JavaKinesisInputDStreamBuilderSuite extends LocalJavaStreamingContext {
   /**
@@ -49,13 +51,14 @@ public void testJavaKinesisDStreamBuilder() {
       .checkpointInterval(checkpointInterval)
       .storageLevel(storageLevel)
       .build();
-    assert(kinesisDStream.streamName() == streamName);
-    assert(kinesisDStream.endpointUrl() == endpointUrl);
-    assert(kinesisDStream.regionName() == region);
-    assert(kinesisDStream.initialPosition().getPosition() == initialPosition.getPosition());
-    assert(kinesisDStream.checkpointAppName() == appName);
-    assert(kinesisDStream.checkpointInterval() == checkpointInterval);
-    assert(kinesisDStream._storageLevel() == storageLevel);
+    Assert.assertEquals(streamName, kinesisDStream.streamName());
+    Assert.assertEquals(endpointUrl, kinesisDStream.endpointUrl());
+    Assert.assertEquals(region, kinesisDStream.regionName());
+    Assert.assertEquals(initialPosition.getPosition(),
+        kinesisDStream.initialPosition().getPosition());
+    Assert.assertEquals(appName, kinesisDStream.checkpointAppName());
+    Assert.assertEquals(checkpointInterval, kinesisDStream.checkpointInterval());
+    Assert.assertEquals(storageLevel, kinesisDStream._storageLevel());
     ssc.stop();
   }
 
@@ -83,13 +86,14 @@ public void testJavaKinesisDStreamBuilderOldApi() {
       .checkpointInterval(checkpointInterval)
       .storageLevel(storageLevel)
       .build();
-    assert(kinesisDStream.streamName() == streamName);
-    assert(kinesisDStream.endpointUrl() == endpointUrl);
-    assert(kinesisDStream.regionName() == region);
-    assert(kinesisDStream.initialPosition().getPosition() == InitialPositionInStream.LATEST);
-    assert(kinesisDStream.checkpointAppName() == appName);
-    assert(kinesisDStream.checkpointInterval() == checkpointInterval);
-    assert(kinesisDStream._storageLevel() == storageLevel);
+    Assert.assertEquals(streamName, kinesisDStream.streamName());
+    Assert.assertEquals(endpointUrl, kinesisDStream.endpointUrl());
+    Assert.assertEquals(region, kinesisDStream.regionName());
+    Assert.assertEquals(InitialPositionInStream.LATEST,
+        kinesisDStream.initialPosition().getPosition());
+    Assert.assertEquals(appName, kinesisDStream.checkpointAppName());
+    Assert.assertEquals(checkpointInterval, kinesisDStream.checkpointInterval());
+    Assert.assertEquals(storageLevel, kinesisDStream._storageLevel());
     ssc.stop();
   }
 }
diff --git a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java b/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java
deleted file mode 100644
index b37b087467926..0000000000000
--- a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming.kinesis;
-
-import com.amazonaws.services.kinesis.model.Record;
-import org.junit.Test;
-
-import org.apache.spark.api.java.function.Function;
-import org.apache.spark.storage.StorageLevel;
-import org.apache.spark.streaming.Duration;
-import org.apache.spark.streaming.LocalJavaStreamingContext;
-import org.apache.spark.streaming.api.java.JavaDStream;
-
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
-
-/**
- * Demonstrate the use of the KinesisUtils Java API
- */
-public class JavaKinesisStreamSuite extends LocalJavaStreamingContext {
-  @Test
-  public void testKinesisStream() {
-    String dummyEndpointUrl = KinesisTestUtils.defaultEndpointUrl();
-    String dummyRegionName = KinesisTestUtils.getRegionNameByEndpoint(dummyEndpointUrl);
-
-    // Tests the API, does not actually test data receiving
-    JavaDStream<byte[]> kinesisStream = KinesisUtils.createStream(ssc, "myAppName", "mySparkStream",
-        dummyEndpointUrl, dummyRegionName, InitialPositionInStream.LATEST, new Duration(2000),
-        StorageLevel.MEMORY_AND_DISK_2());
-    ssc.stop();
-  }
-
-  @Test
-  public void testAwsCreds() {
-    String dummyEndpointUrl = KinesisTestUtils.defaultEndpointUrl();
-    String dummyRegionName = KinesisTestUtils.getRegionNameByEndpoint(dummyEndpointUrl);
-
-    // Tests the API, does not actually test data receiving
-    JavaDStream<byte[]> kinesisStream = KinesisUtils.createStream(ssc, "myAppName", "mySparkStream",
-        dummyEndpointUrl, dummyRegionName, InitialPositionInStream.LATEST, new Duration(2000),
-        StorageLevel.MEMORY_AND_DISK_2(), "fakeAccessKey", "fakeSecretKey");
-    ssc.stop();
-  }
-
-  private static Function<Record, String> handler = new Function<Record, String>() {
-    @Override
-    public String call(Record record) {
-      return record.getPartitionKey() + "-" + record.getSequenceNumber();
-    }
-  };
-
-  @Test
-  public void testCustomHandler() {
-    // Tests the API, does not actually test data receiving
-    JavaDStream<String> kinesisStream = KinesisUtils.createStream(ssc, "testApp", "mySparkStream",
-        "https://kinesis.us-west-2.amazonaws.com", "us-west-2", InitialPositionInStream.LATEST,
-        new Duration(2000), StorageLevel.MEMORY_AND_DISK_2(), handler, String.class);
-
-    ssc.stop();
-  }
-
-  @Test
-  public void testCustomHandlerAwsCreds() {
-    // Tests the API, does not actually test data receiving
-    JavaDStream<String> kinesisStream = KinesisUtils.createStream(ssc, "testApp", "mySparkStream",
-        "https://kinesis.us-west-2.amazonaws.com", "us-west-2", InitialPositionInStream.LATEST,
-        new Duration(2000), StorageLevel.MEMORY_AND_DISK_2(), handler, String.class,
-        "fakeAccessKey", "fakeSecretKey");
-
-    ssc.stop();
-  }
-
-  @Test
-  public void testCustomHandlerAwsStsCreds() {
-    // Tests the API, does not actually test data receiving
-    JavaDStream<String> kinesisStream = KinesisUtils.createStream(ssc, "testApp", "mySparkStream",
-        "https://kinesis.us-west-2.amazonaws.com", "us-west-2", InitialPositionInStream.LATEST,
-        new Duration(2000), StorageLevel.MEMORY_AND_DISK_2(), handler, String.class,
-        "fakeAccessKey", "fakeSecretKey", "fakeSTSRoleArn", "fakeSTSSessionName",
-        "fakeSTSExternalId");
-
-    ssc.stop();
-  }
-}
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KPLBasedKinesisTestUtils.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KPLBasedKinesisTestUtils.scala
index 2ee3224b3c286..af84498d5e47e 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KPLBasedKinesisTestUtils.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KPLBasedKinesisTestUtils.scala
@@ -22,9 +22,12 @@ import java.nio.charset.StandardCharsets
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import com.amazonaws.services.kinesis.producer.{KinesisProducer => KPLProducer, KinesisProducerConfiguration, UserRecordResult}
+import com.amazonaws.services.kinesis.producer.{KinesisProducer => KPLProducer,
+  KinesisProducerConfiguration, UserRecordResult}
 import com.google.common.util.concurrent.{FutureCallback, Futures}
 
+import org.apache.spark.util.ThreadUtils
+
 private[kinesis] class KPLBasedKinesisTestUtils(streamShardCount: Int = 2)
     extends KinesisTestUtils(streamShardCount) {
   override protected def getProducer(aggregate: Boolean): KinesisDataGenerator = {
@@ -66,7 +69,7 @@ private[kinesis] class KPLDataGenerator(regionName: String) extends KinesisDataG
           sentSeqNumbers += ((num, seqNumber))
         }
       }
-      Futures.addCallback(future, kinesisCallBack)
+      Futures.addCallback(future, kinesisCallBack, ThreadUtils.sameThreadExecutorService)
     }
     producer.flushSync()
     shardIdToSeqNumbers.toMap
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
index ac0e6a8429d06..87592b6877b33 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
@@ -28,7 +28,7 @@ import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.streaming.{Duration, TestSuiteBase}
 import org.apache.spark.util.ManualClock
@@ -52,7 +52,7 @@ class KinesisCheckpointerSuite extends TestSuiteBase
   private var kinesisCheckpointer: KinesisCheckpointer = _
   private var clock: ManualClock = _
 
-  private val checkpoint = PrivateMethod[Unit]('checkpoint)
+  private val checkpoint = PrivateMethod[Unit](Symbol("checkpoint"))
 
   override def beforeEach(): Unit = {
     receiverMock = mock[KinesisReceiver[Array[Byte]]]
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
index 1c81298a7c201..8dc4de1aa3609 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
@@ -27,7 +27,7 @@ trait KinesisFunSuite extends SparkFunSuite  {
   import KinesisTestUtils._
 
   /** Run the test if environment variable is set or ignore the test */
-  def testIfEnabled(testName: String)(testBody: => Unit) {
+  def testIfEnabled(testName: String)(testBody: => Unit): Unit = {
     if (shouldRunTests) {
       test(testName)(testBody)
     } else {
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
index 361520e292266..8b0d73c96da73 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
@@ -19,9 +19,11 @@ package org.apache.spark.streaming.kinesis
 
 import java.util.Calendar
 
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import collection.JavaConverters._
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{InitialPositionInStream, KinesisClientLibConfiguration}
+import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Duration, Seconds, StreamingContext, TestSuiteBase}
@@ -82,6 +84,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     assert(dstream.kinesisCreds == DefaultCredentials)
     assert(dstream.dynamoDBCreds == None)
     assert(dstream.cloudWatchCreds == None)
+    assert(dstream.metricsLevel == DEFAULT_METRICS_LEVEL)
+    assert(dstream.metricsEnabledDimensions == DEFAULT_METRICS_ENABLED_DIMENSIONS)
   }
 
   test("should propagate custom non-auth values to KinesisInputDStream") {
@@ -94,6 +98,9 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     val customKinesisCreds = mock[SparkAWSCredentials]
     val customDynamoDBCreds = mock[SparkAWSCredentials]
     val customCloudWatchCreds = mock[SparkAWSCredentials]
+    val customMetricsLevel = MetricsLevel.NONE
+    val customMetricsEnabledDimensions =
+      KinesisClientLibConfiguration.METRICS_ALWAYS_ENABLED_DIMENSIONS.asScala.toSet
 
     val dstream = builder
       .endpointUrl(customEndpointUrl)
@@ -105,6 +112,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
       .kinesisCredentials(customKinesisCreds)
       .dynamoDBCredentials(customDynamoDBCreds)
       .cloudWatchCredentials(customCloudWatchCreds)
+      .metricsLevel(customMetricsLevel)
+      .metricsEnabledDimensions(customMetricsEnabledDimensions)
       .build()
     assert(dstream.endpointUrl == customEndpointUrl)
     assert(dstream.regionName == customRegion)
@@ -115,6 +124,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     assert(dstream.kinesisCreds == customKinesisCreds)
     assert(dstream.dynamoDBCreds == Option(customDynamoDBCreds))
     assert(dstream.cloudWatchCreds == Option(customCloudWatchCreds))
+    assert(dstream.metricsLevel == customMetricsLevel)
+    assert(dstream.metricsEnabledDimensions == customMetricsEnabledDimensions)
 
     // Testing with AtTimestamp
     val cal = Calendar.getInstance()
@@ -132,6 +143,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
       .kinesisCredentials(customKinesisCreds)
       .dynamoDBCredentials(customDynamoDBCreds)
       .cloudWatchCredentials(customCloudWatchCreds)
+      .metricsLevel(customMetricsLevel)
+      .metricsEnabledDimensions(customMetricsEnabledDimensions)
       .build()
     assert(dstreamAtTimestamp.endpointUrl == customEndpointUrl)
     assert(dstreamAtTimestamp.regionName == customRegion)
@@ -145,6 +158,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     assert(dstreamAtTimestamp.kinesisCreds == customKinesisCreds)
     assert(dstreamAtTimestamp.dynamoDBCreds == Option(customDynamoDBCreds))
     assert(dstreamAtTimestamp.cloudWatchCreds == Option(customCloudWatchCreds))
+    assert(dstreamAtTimestamp.metricsLevel == customMetricsLevel)
+    assert(dstreamAtTimestamp.metricsEnabledDimensions == customMetricsEnabledDimensions)
   }
 
   test("old Api should throw UnsupportedOperationExceptionexception with AT_TIMESTAMP") {
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index 52690847418ef..470a8cecc8fd9 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -27,7 +27,7 @@ import com.amazonaws.services.kinesis.model.Record
 import org.mockito.ArgumentMatchers.{anyList, anyString, eq => meq}
 import org.mockito.Mockito.{never, times, verify, when}
 import org.scalatest.{BeforeAndAfter, Matchers}
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.streaming.{Duration, TestSuiteBase}
 
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
index 51ee7fd213de5..eee62d25e62bb 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
@@ -21,7 +21,6 @@ import scala.collection.mutable
 import scala.concurrent.duration._
 import scala.util.Random
 
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import com.amazonaws.services.kinesis.model.Record
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
 import org.scalatest.Matchers._
@@ -31,7 +30,7 @@ import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
-import org.apache.spark.streaming._
+import org.apache.spark.streaming.{LocalStreamingContext, _}
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 import org.apache.spark.streaming.kinesis.KinesisInitialPositions.Latest
 import org.apache.spark.streaming.kinesis.KinesisReadConfigurations._
@@ -41,7 +40,7 @@ import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
 import org.apache.spark.util.Utils
 
 abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFunSuite
-  with Eventually with BeforeAndAfter with BeforeAndAfterAll {
+  with LocalStreamingContext with Eventually with BeforeAndAfter with BeforeAndAfterAll {
 
   // This is the name that KCL will use to save metadata to DynamoDB
   private val appName = s"KinesisStreamSuite-${math.abs(Random.nextLong())}"
@@ -54,15 +53,9 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
   private val dummyAWSSecretKey = "dummySecretKey"
 
   private var testUtils: KinesisTestUtils = null
-  private var ssc: StreamingContext = null
   private var sc: SparkContext = null
 
   override def beforeAll(): Unit = {
-    val conf = new SparkConf()
-      .setMaster("local[4]")
-      .setAppName("KinesisStreamSuite") // Setting Spark app name to Kinesis app name
-    sc = new SparkContext(conf)
-
     runIfTestsEnabled("Prepare KinesisTestUtils") {
       testUtils = new KPLBasedKinesisTestUtils()
       testUtils.createStream()
@@ -71,12 +64,6 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
 
   override def afterAll(): Unit = {
     try {
-      if (ssc != null) {
-        ssc.stop()
-      }
-      if (sc != null) {
-        sc.stop()
-      }
       if (testUtils != null) {
         // Delete the Kinesis stream as well as the DynamoDB table generated by
         // Kinesis Client Library when consuming the stream
@@ -88,34 +75,36 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
     }
   }
 
-  before {
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    val conf = new SparkConf()
+      .setMaster("local[4]")
+      .setAppName("KinesisStreamSuite") // Setting Spark app name to Kinesis app name
+    sc = new SparkContext(conf)
     ssc = new StreamingContext(sc, batchDuration)
   }
 
-  after {
-    if (ssc != null) {
-      ssc.stop(stopSparkContext = false)
-      ssc = null
-    }
-    if (testUtils != null) {
-      testUtils.deleteDynamoDBTable(appName)
+  override def afterEach(): Unit = {
+    try {
+      if (testUtils != null) {
+        testUtils.deleteDynamoDBTable(appName)
+      }
+    } finally {
+      super.afterEach()
     }
   }
 
-  test("KinesisUtils API") {
-    val kinesisStream1 = KinesisUtils.createStream(ssc, "myAppName", "mySparkStream",
-      dummyEndpointUrl, dummyRegionName,
-      InitialPositionInStream.LATEST, Seconds(2), StorageLevel.MEMORY_AND_DISK_2)
-    val kinesisStream2 = KinesisUtils.createStream(ssc, "myAppName", "mySparkStream",
-      dummyEndpointUrl, dummyRegionName,
-      InitialPositionInStream.LATEST, Seconds(2), StorageLevel.MEMORY_AND_DISK_2,
-      dummyAWSAccessKey, dummyAWSSecretKey)
-  }
-
   test("RDD generation") {
-    val inputStream = KinesisUtils.createStream(ssc, appName, "dummyStream",
-      dummyEndpointUrl, dummyRegionName, InitialPositionInStream.LATEST, Seconds(2),
-      StorageLevel.MEMORY_AND_DISK_2, dummyAWSAccessKey, dummyAWSSecretKey)
+    val inputStream = KinesisInputDStream.builder.
+      streamingContext(ssc).
+      checkpointAppName(appName).
+      streamName("dummyStream").
+      endpointUrl(dummyEndpointUrl).
+      regionName(dummyRegionName).initialPosition(new Latest()).
+      checkpointInterval(Seconds(2)).
+      storageLevel(StorageLevel.MEMORY_AND_DISK_2).
+      kinesisCredentials(BasicCredentials(dummyAWSAccessKey, dummyAWSSecretKey)).
+      build()
     assert(inputStream.isInstanceOf[KinesisInputDStream[Array[Byte]]])
 
     val kinesisStream = inputStream.asInstanceOf[KinesisInputDStream[Array[Byte]]]
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index a23d255f9187c..db64b201abc2c 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -39,10 +39,10 @@
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
-
     <dependency>
-      <groupId>io.dropwizard.metrics</groupId>
-      <artifactId>metrics-ganglia</artifactId>
+      <groupId>info.ganglia.gmetric4j</groupId>
+      <artifactId>gmetric4j</artifactId>
+      <version>1.0.10</version>
     </dependency>
   </dependencies>
 </project>
diff --git a/external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java b/external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
new file mode 100644
index 0000000000000..019ee08e09188
--- /dev/null
+++ b/external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
@@ -0,0 +1,426 @@
+// Copied from
+// https://raw.githubusercontent.com/dropwizard/metrics/v3.2.6/metrics-ganglia/
+//   src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
+
+package com.codahale.metrics.ganglia;
+
+import com.codahale.metrics.*;
+import com.codahale.metrics.MetricAttribute;
+import info.ganglia.gmetric4j.gmetric.GMetric;
+import info.ganglia.gmetric4j.gmetric.GMetricSlope;
+import info.ganglia.gmetric4j.gmetric.GMetricType;
+import info.ganglia.gmetric4j.gmetric.GangliaException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+
+import static com.codahale.metrics.MetricRegistry.name;
+import static com.codahale.metrics.MetricAttribute.*;
+
+/**
+ * A reporter which announces metric values to a Ganglia cluster.
+ *
+ * @see <a href="http://ganglia.sourceforge.net/">Ganglia Monitoring System</a>
+ */
+public class GangliaReporter extends ScheduledReporter {
+
+    private static final Pattern SLASHES = Pattern.compile("\\\\");
+
+    /**
+     * Returns a new {@link Builder} for {@link GangliaReporter}.
+     *
+     * @param registry the registry to report
+     * @return a {@link Builder} instance for a {@link GangliaReporter}
+     */
+    public static Builder forRegistry(MetricRegistry registry) {
+        return new Builder(registry);
+    }
+
+    /**
+     * A builder for {@link GangliaReporter} instances. Defaults to using a {@code tmax} of {@code 60},
+     * a {@code dmax} of {@code 0}, converting rates to events/second, converting durations to
+     * milliseconds, and not filtering metrics.
+     */
+    public static class Builder {
+        private final MetricRegistry registry;
+        private String prefix;
+        private int tMax;
+        private int dMax;
+        private TimeUnit rateUnit;
+        private TimeUnit durationUnit;
+        private MetricFilter filter;
+        private ScheduledExecutorService executor;
+        private boolean shutdownExecutorOnStop;
+        private Set<MetricAttribute> disabledMetricAttributes = Collections.emptySet();
+
+        private Builder(MetricRegistry registry) {
+            this.registry = registry;
+            this.tMax = 60;
+            this.dMax = 0;
+            this.rateUnit = TimeUnit.SECONDS;
+            this.durationUnit = TimeUnit.MILLISECONDS;
+            this.filter = MetricFilter.ALL;
+            this.executor = null;
+            this.shutdownExecutorOnStop = true;
+        }
+
+        /**
+         * Specifies whether or not, the executor (used for reporting) will be stopped with same time with reporter.
+         * Default value is true.
+         * Setting this parameter to false, has the sense in combining with providing external managed executor via {@link #scheduleOn(ScheduledExecutorService)}.
+         *
+         * @param shutdownExecutorOnStop if true, then executor will be stopped in same time with this reporter
+         * @return {@code this}
+         */
+        public Builder shutdownExecutorOnStop(boolean shutdownExecutorOnStop) {
+            this.shutdownExecutorOnStop = shutdownExecutorOnStop;
+            return this;
+        }
+
+        /**
+         * Specifies the executor to use while scheduling reporting of metrics.
+         * Default value is null.
+         * Null value leads to executor will be auto created on start.
+         *
+         * @param executor the executor to use while scheduling reporting of metrics.
+         * @return {@code this}
+         */
+        public Builder scheduleOn(ScheduledExecutorService executor) {
+            this.executor = executor;
+            return this;
+        }
+
+        /**
+         * Use the given {@code tmax} value when announcing metrics.
+         *
+         * @param tMax the desired gmond {@code tmax} value
+         * @return {@code this}
+         */
+        public Builder withTMax(int tMax) {
+            this.tMax = tMax;
+            return this;
+        }
+
+        /**
+         * Prefix all metric names with the given string.
+         *
+         * @param prefix the prefix for all metric names
+         * @return {@code this}
+         */
+        public Builder prefixedWith(String prefix) {
+            this.prefix = prefix;
+            return this;
+        }
+
+        /**
+         * Use the given {@code dmax} value when announcing metrics.
+         *
+         * @param dMax the desired gmond {@code dmax} value
+         * @return {@code this}
+         */
+        public Builder withDMax(int dMax) {
+            this.dMax = dMax;
+            return this;
+        }
+
+        /**
+         * Convert rates to the given time unit.
+         *
+         * @param rateUnit a unit of time
+         * @return {@code this}
+         */
+        public Builder convertRatesTo(TimeUnit rateUnit) {
+            this.rateUnit = rateUnit;
+            return this;
+        }
+
+        /**
+         * Convert durations to the given time unit.
+         *
+         * @param durationUnit a unit of time
+         * @return {@code this}
+         */
+        public Builder convertDurationsTo(TimeUnit durationUnit) {
+            this.durationUnit = durationUnit;
+            return this;
+        }
+
+        /**
+         * Only report metrics which match the given filter.
+         *
+         * @param filter a {@link MetricFilter}
+         * @return {@code this}
+         */
+        public Builder filter(MetricFilter filter) {
+            this.filter = filter;
+            return this;
+        }
+
+        /**
+         * Don't report the passed metric attributes for all metrics (e.g. "p999", "stddev" or "m15").
+         * See {@link MetricAttribute}.
+         *
+         * @param disabledMetricAttributes a {@link MetricFilter}
+         * @return {@code this}
+         */
+        public Builder disabledMetricAttributes(Set<MetricAttribute> disabledMetricAttributes) {
+            this.disabledMetricAttributes = disabledMetricAttributes;
+            return this;
+        }
+
+        /**
+         * Builds a {@link GangliaReporter} with the given properties, announcing metrics to the
+         * given {@link GMetric} client.
+         *
+         * @param gmetric the client to use for announcing metrics
+         * @return a {@link GangliaReporter}
+         */
+        public GangliaReporter build(GMetric gmetric) {
+            return new GangliaReporter(registry, gmetric, null, prefix, tMax, dMax, rateUnit, durationUnit, filter,
+                    executor, shutdownExecutorOnStop, disabledMetricAttributes);
+        }
+
+        /**
+         * Builds a {@link GangliaReporter} with the given properties, announcing metrics to the
+         * given {@link GMetric} client.
+         *
+         * @param gmetrics the clients to use for announcing metrics
+         * @return a {@link GangliaReporter}
+         */
+        public GangliaReporter build(GMetric... gmetrics) {
+            return new GangliaReporter(registry, null, gmetrics, prefix, tMax, dMax, rateUnit, durationUnit,
+                    filter, executor, shutdownExecutorOnStop , disabledMetricAttributes);
+        }
+    }
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(GangliaReporter.class);
+
+    private final GMetric gmetric;
+    private final GMetric[] gmetrics;
+    private final String prefix;
+    private final int tMax;
+    private final int dMax;
+
+    private GangliaReporter(MetricRegistry registry,
+                            GMetric gmetric,
+                            GMetric[] gmetrics,
+                            String prefix,
+                            int tMax,
+                            int dMax,
+                            TimeUnit rateUnit,
+                            TimeUnit durationUnit,
+                            MetricFilter filter,
+                            ScheduledExecutorService executor,
+                            boolean shutdownExecutorOnStop,
+                            Set<MetricAttribute> disabledMetricAttributes) {
+        super(registry, "ganglia-reporter", filter, rateUnit, durationUnit, executor, shutdownExecutorOnStop,
+                disabledMetricAttributes);
+        this.gmetric = gmetric;
+        this.gmetrics = gmetrics;
+        this.prefix = prefix;
+        this.tMax = tMax;
+        this.dMax = dMax;
+    }
+
+    @Override
+    public void report(SortedMap<String, Gauge> gauges,
+                       SortedMap<String, Counter> counters,
+                       SortedMap<String, Histogram> histograms,
+                       SortedMap<String, Meter> meters,
+                       SortedMap<String, Timer> timers) {
+        for (Map.Entry<String, Gauge> entry : gauges.entrySet()) {
+            reportGauge(entry.getKey(), entry.getValue());
+        }
+
+        for (Map.Entry<String, Counter> entry : counters.entrySet()) {
+            reportCounter(entry.getKey(), entry.getValue());
+        }
+
+        for (Map.Entry<String, Histogram> entry : histograms.entrySet()) {
+            reportHistogram(entry.getKey(), entry.getValue());
+        }
+
+        for (Map.Entry<String, Meter> entry : meters.entrySet()) {
+            reportMeter(entry.getKey(), entry.getValue());
+        }
+
+        for (Map.Entry<String, Timer> entry : timers.entrySet()) {
+            reportTimer(entry.getKey(), entry.getValue());
+        }
+    }
+
+    private void reportTimer(String name, Timer timer) {
+        final String sanitizedName = escapeSlashes(name);
+        final String group = group(name);
+        try {
+            final Snapshot snapshot = timer.getSnapshot();
+
+            announceIfEnabled(MAX, sanitizedName, group, convertDuration(snapshot.getMax()), getDurationUnit());
+            announceIfEnabled(MEAN, sanitizedName, group, convertDuration(snapshot.getMean()), getDurationUnit());
+            announceIfEnabled(MIN, sanitizedName, group, convertDuration(snapshot.getMin()), getDurationUnit());
+            announceIfEnabled(STDDEV, sanitizedName, group, convertDuration(snapshot.getStdDev()), getDurationUnit());
+
+            announceIfEnabled(P50, sanitizedName, group, convertDuration(snapshot.getMedian()), getDurationUnit());
+            announceIfEnabled(P75, sanitizedName,
+                     group,
+                     convertDuration(snapshot.get75thPercentile()),
+                     getDurationUnit());
+            announceIfEnabled(P95, sanitizedName,
+                     group,
+                     convertDuration(snapshot.get95thPercentile()),
+                     getDurationUnit());
+            announceIfEnabled(P98, sanitizedName,
+                     group,
+                     convertDuration(snapshot.get98thPercentile()),
+                     getDurationUnit());
+            announceIfEnabled(P99, sanitizedName,
+                     group,
+                     convertDuration(snapshot.get99thPercentile()),
+                     getDurationUnit());
+            announceIfEnabled(P999, sanitizedName,
+                     group,
+                     convertDuration(snapshot.get999thPercentile()),
+                     getDurationUnit());
+
+            reportMetered(sanitizedName, timer, group, "calls");
+        } catch (GangliaException e) {
+            LOGGER.warn("Unable to report timer {}", sanitizedName, e);
+        }
+    }
+
+    private void reportMeter(String name, Meter meter) {
+        final String sanitizedName = escapeSlashes(name);
+        final String group = group(name);
+        try {
+            reportMetered(sanitizedName, meter, group, "events");
+        } catch (GangliaException e) {
+            LOGGER.warn("Unable to report meter {}", name, e);
+        }
+    }
+
+    private void reportMetered(String name, Metered meter, String group, String eventName) throws GangliaException {
+        final String unit = eventName + '/' + getRateUnit();
+        announceIfEnabled(COUNT, name, group, meter.getCount(), eventName);
+        announceIfEnabled(M1_RATE, name, group, convertRate(meter.getOneMinuteRate()), unit);
+        announceIfEnabled(M5_RATE, name, group, convertRate(meter.getFiveMinuteRate()), unit);
+        announceIfEnabled(M15_RATE, name, group, convertRate(meter.getFifteenMinuteRate()), unit);
+        announceIfEnabled(MEAN_RATE, name, group, convertRate(meter.getMeanRate()), unit);
+    }
+
+    private void reportHistogram(String name, Histogram histogram) {
+        final String sanitizedName = escapeSlashes(name);
+        final String group = group(name);
+        try {
+            final Snapshot snapshot = histogram.getSnapshot();
+
+            announceIfEnabled(COUNT, sanitizedName, group, histogram.getCount(), "");
+            announceIfEnabled(MAX, sanitizedName, group, snapshot.getMax(), "");
+            announceIfEnabled(MEAN, sanitizedName, group, snapshot.getMean(), "");
+            announceIfEnabled(MIN, sanitizedName, group, snapshot.getMin(), "");
+            announceIfEnabled(STDDEV, sanitizedName, group, snapshot.getStdDev(), "");
+            announceIfEnabled(P50, sanitizedName, group, snapshot.getMedian(), "");
+            announceIfEnabled(P75, sanitizedName, group, snapshot.get75thPercentile(), "");
+            announceIfEnabled(P95, sanitizedName, group, snapshot.get95thPercentile(), "");
+            announceIfEnabled(P98, sanitizedName, group, snapshot.get98thPercentile(), "");
+            announceIfEnabled(P99, sanitizedName, group, snapshot.get99thPercentile(), "");
+            announceIfEnabled(P999, sanitizedName, group, snapshot.get999thPercentile(), "");
+        } catch (GangliaException e) {
+            LOGGER.warn("Unable to report histogram {}", sanitizedName, e);
+        }
+    }
+
+    private void reportCounter(String name, Counter counter) {
+        final String sanitizedName = escapeSlashes(name);
+        final String group = group(name);
+        try {
+            announce(prefix(sanitizedName, COUNT.getCode()), group, Long.toString(counter.getCount()), GMetricType.DOUBLE, "");
+        } catch (GangliaException e) {
+            LOGGER.warn("Unable to report counter {}", name, e);
+        }
+    }
+
+    private void reportGauge(String name, Gauge gauge) {
+        final String sanitizedName = escapeSlashes(name);
+        final String group = group(name);
+        final Object obj = gauge.getValue();
+        final String value = String.valueOf(obj);
+        final GMetricType type = detectType(obj);
+        try {
+            announce(name(prefix, sanitizedName), group, value, type, "");
+        } catch (GangliaException e) {
+            LOGGER.warn("Unable to report gauge {}", name, e);
+        }
+    }
+
+    private static final double MIN_VAL = 1E-300;
+
+    private void announceIfEnabled(MetricAttribute metricAttribute, String metricName, String group, double value, String units)
+            throws GangliaException {
+        if (getDisabledMetricAttributes().contains(metricAttribute)) {
+            return;
+        }
+        final String string = Math.abs(value) < MIN_VAL ? "0" : Double.toString(value);
+        announce(prefix(metricName, metricAttribute.getCode()), group, string, GMetricType.DOUBLE, units);
+    }
+
+    private void announceIfEnabled(MetricAttribute metricAttribute, String metricName, String group, long value, String units)
+            throws GangliaException {
+        if (getDisabledMetricAttributes().contains(metricAttribute)) {
+            return;
+        }
+        announce(prefix(metricName, metricAttribute.getCode()), group, Long.toString(value), GMetricType.DOUBLE, units);
+    }
+
+    private void announce(String name, String group, String value, GMetricType type, String units)
+            throws GangliaException {
+        if (gmetric != null) {
+            gmetric.announce(name, value, type, units, GMetricSlope.BOTH, tMax, dMax, group);
+        } else {
+            for (GMetric gmetric : gmetrics) {
+                gmetric.announce(name, value, type, units, GMetricSlope.BOTH, tMax, dMax, group);
+            }
+        }
+    }
+
+    private GMetricType detectType(Object o) {
+        if (o instanceof Float) {
+            return GMetricType.FLOAT;
+        } else if (o instanceof Double) {
+            return GMetricType.DOUBLE;
+        } else if (o instanceof Byte) {
+            return GMetricType.INT8;
+        } else if (o instanceof Short) {
+            return GMetricType.INT16;
+        } else if (o instanceof Integer) {
+            return GMetricType.INT32;
+        } else if (o instanceof Long) {
+            return GMetricType.DOUBLE;
+        }
+        return GMetricType.STRING;
+    }
+
+    private String group(String name) {
+        final int i = name.lastIndexOf('.');
+        if (i < 0) {
+            return "";
+        }
+        return name.substring(0, i);
+    }
+
+    private String prefix(String name, String n) {
+        return name(prefix, name, n);
+    }
+
+    // ganglia metric names can't contain slashes.
+    private String escapeSlashes(String name) {
+        return SLASHES.matcher(name).replaceAll("_");
+    }
+}
diff --git a/graph/api/pom.xml b/graph/api/pom.xml
deleted file mode 100644
index a18c5290195fe..0000000000000
--- a/graph/api/pom.xml
+++ /dev/null
@@ -1,55 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.0.0-SNAPSHOT</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>spark-graph-api_2.12</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project Graph API</name>
-  <properties>
-    <sbt.project.name>graph-api</sbt.project.name>
-  </properties>
-  <url>http://spark.apache.org/</url>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  </build>
-</project>
diff --git a/graph/cypher/pom.xml b/graph/cypher/pom.xml
deleted file mode 100644
index 12b3832d9777a..0000000000000
--- a/graph/cypher/pom.xml
+++ /dev/null
@@ -1,100 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.0.0-SNAPSHOT</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>spark-cypher_2.12</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project Cypher</name>
-  <properties>
-    <sbt.project.name>cypher</sbt.project.name>
-  </properties>
-  <url>http://spark.apache.org/</url>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-graph-api_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.opencypher</groupId>
-      <artifactId>okapi-shade</artifactId>
-      <version>${okapi.version}</version>
-    </dependency>
-
-    <!-- Test dependencies -->
-
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.opencypher</groupId>
-      <artifactId>okapi-tck</artifactId>
-      <version>${okapi.version}</version>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  </build>
-</project>
diff --git a/graph/graph/pom.xml b/graph/graph/pom.xml
deleted file mode 100644
index 7438ee5e54fe4..0000000000000
--- a/graph/graph/pom.xml
+++ /dev/null
@@ -1,50 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.0.0-SNAPSHOT</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>spark-graph_2.12</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project Graph</name>
-  <properties>
-    <sbt.project.name>graph</sbt.project.name>
-  </properties>
-  <url>http://spark.apache.org/</url>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-cypher_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  </build>
-</project>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
index ecc37dcaad1fe..d733868908350 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
@@ -81,13 +81,13 @@ object Edge {
 
     override def copyElement(
         src: Array[Edge[ED]], srcPos: Int,
-        dst: Array[Edge[ED]], dstPos: Int) {
+        dst: Array[Edge[ED]], dstPos: Int): Unit = {
       dst(dstPos) = src(srcPos)
     }
 
     override def copyRange(
         src: Array[Edge[ED]], srcPos: Int,
-        dst: Array[Edge[ED]], dstPos: Int, length: Int) {
+        dst: Array[Edge[ED]], dstPos: Int, length: Int): Unit = {
       System.arraycopy(src, srcPos, dst, dstPos, length)
     }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index 49e8487dbe935..a07c2e16b1132 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -424,6 +424,18 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
     PageRank.run(graph, numIter, resetProb)
   }
 
+  /**
+   * Run PageRank for a fixed number of iterations returning a graph with vertex attributes
+   * containing the PageRank and edge attributes the normalized edge weight, optionally including
+   * including a previous pageRank computation to be used as a start point for the new iterations
+   *
+   * @see [[org.apache.spark.graphx.lib.PageRank$#runWithOptionsWithPreviousPageRank]]
+   */
+  def staticPageRank(numIter: Int, resetProb: Double,
+                     prePageRank: Graph[Double, Double]): Graph[Double, Double] = {
+    PageRank.runWithOptionsWithPreviousPageRank(graph, numIter, resetProb, None, prePageRank)
+  }
+
   /**
    * Compute the connected component membership of each vertex and return a graph with the vertex
    * value containing the lowest vertex id in the connected component containing that vertex.
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala
index ef0b943fc3c38..4ff5b02daecbe 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala
@@ -30,7 +30,7 @@ object GraphXUtils {
   /**
    * Registers classes that GraphX uses with Kryo.
    */
-  def registerKryoClasses(conf: SparkConf) {
+  def registerKryoClasses(conf: SparkConf): Unit = {
     conf.registerKryoClasses(Array(
       classOf[Edge[Object]],
       classOf[(VertexId, Object)],
@@ -54,7 +54,7 @@ object GraphXUtils {
       mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
       reduceFunc: (A, A) => A,
       activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None): VertexRDD[A] = {
-    def sendMsg(ctx: EdgeContext[VD, ED, A]) {
+    def sendMsg(ctx: EdgeContext[VD, ED, A]): Unit = {
       mapFunc(ctx.toEdgeTriplet).foreach { kv =>
         val id = kv._1
         val msg = kv._2
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
index 0e6a340a680ba..c0a2ba67d2942 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
@@ -222,7 +222,7 @@ class EdgePartition[
    *
    * @param f an external state mutating user defined function.
    */
-  def foreach(f: Edge[ED] => Unit) {
+  def foreach(f: Edge[ED] => Unit): Unit = {
     iterator.foreach(f)
   }
 
@@ -465,7 +465,7 @@ class EdgePartition[
           if (edgeIsActive) {
             val dstAttr =
               if (tripletFields.useDst) vertexAttrs(localDstId) else null.asInstanceOf[VD]
-            ctx.setRest(dstId, localDstId, dstAttr, data(pos))
+            ctx.setDest(dstId, localDstId, dstAttr, data(pos))
             sendMsg(ctx)
           }
           pos += 1
@@ -495,7 +495,7 @@ private class AggregatingEdgeContext[VD, ED, A](
       srcId: VertexId, dstId: VertexId,
       localSrcId: Int, localDstId: Int,
       srcAttr: VD, dstAttr: VD,
-      attr: ED) {
+      attr: ED): Unit = {
     _srcId = srcId
     _dstId = dstId
     _localSrcId = localSrcId
@@ -505,13 +505,13 @@ private class AggregatingEdgeContext[VD, ED, A](
     _attr = attr
   }
 
-  def setSrcOnly(srcId: VertexId, localSrcId: Int, srcAttr: VD) {
+  def setSrcOnly(srcId: VertexId, localSrcId: Int, srcAttr: VD): Unit = {
     _srcId = srcId
     _localSrcId = localSrcId
     _srcAttr = srcAttr
   }
 
-  def setRest(dstId: VertexId, localDstId: Int, dstAttr: VD, attr: ED) {
+  def setDest(dstId: VertexId, localDstId: Int, dstAttr: VD, attr: ED): Unit = {
     _dstId = dstId
     _localDstId = localDstId
     _dstAttr = dstAttr
@@ -524,14 +524,14 @@ private class AggregatingEdgeContext[VD, ED, A](
   override def dstAttr: VD = _dstAttr
   override def attr: ED = _attr
 
-  override def sendToSrc(msg: A) {
+  override def sendToSrc(msg: A): Unit = {
     send(_localSrcId, msg)
   }
-  override def sendToDst(msg: A) {
+  override def sendToDst(msg: A): Unit = {
     send(_localDstId, msg)
   }
 
-  @inline private def send(localId: Int, msg: A) {
+  @inline private def send(localId: Int, msg: A): Unit = {
     if (bitset.get(localId)) {
       aggregates(localId) = mergeMsg(aggregates(localId), msg)
     } else {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
index 27c08c894a39f..c7868f85d1f76 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -30,7 +30,7 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: Cla
   private[this] val edges = new PrimitiveVector[Edge[ED]](size)
 
   /** Add a new edge to the partition. */
-  def add(src: VertexId, dst: VertexId, d: ED) {
+  def add(src: VertexId, dst: VertexId, d: ED): Unit = {
     edges += Edge(src, dst, d)
   }
 
@@ -90,7 +90,7 @@ class ExistingEdgePartitionBuilder[
   private[this] val edges = new PrimitiveVector[EdgeWithLocalIds[ED]](size)
 
   /** Add a new edge to the partition. */
-  def add(src: VertexId, dst: VertexId, localSrc: Int, localDst: Int, d: ED) {
+  def add(src: VertexId, dst: VertexId, localSrc: Int, localDst: Int, d: ED): Unit = {
     edges += EdgeWithLocalIds(src, dst, localSrc, localDst, d)
   }
 
@@ -153,13 +153,13 @@ private[impl] object EdgeWithLocalIds {
 
       override def copyElement(
           src: Array[EdgeWithLocalIds[ED]], srcPos: Int,
-          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int) {
+          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int): Unit = {
         dst(dstPos) = src(srcPos)
       }
 
       override def copyRange(
           src: Array[EdgeWithLocalIds[ED]], srcPos: Int,
-          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int, length: Int) {
+          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int, length: Int): Unit = {
         System.arraycopy(src, srcPos, dst, dstPos, length)
       }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index 0a97ab492600d..8564597f4f135 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -103,15 +103,16 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
       (part, (e.srcId, e.dstId, e.attr))
     }
       .partitionBy(new HashPartitioner(numPartitions))
-      .mapPartitionsWithIndex( { (pid, iter) =>
-        val builder = new EdgePartitionBuilder[ED, VD]()(edTag, vdTag)
-        iter.foreach { message =>
-          val data = message._2
-          builder.add(data._1, data._2, data._3)
-        }
-        val edgePartition = builder.toEdgePartition
-        Iterator((pid, edgePartition))
-      }, preservesPartitioning = true)).cache()
+      .mapPartitionsWithIndex(
+        { (pid: Int, iter: Iterator[(PartitionID, (VertexId, VertexId, ED))]) =>
+          val builder = new EdgePartitionBuilder[ED, VD]()(edTag, vdTag)
+          iter.foreach { message =>
+            val data = message._2
+            builder.add(data._1, data._2, data._3)
+          }
+          val edgePartition = builder.toEdgePartition
+          Iterator((pid, edgePartition))
+        }, preservesPartitioning = true)).cache()
     GraphImpl.fromExistingRDDs(vertices.withEdges(newEdges), newEdges)
   }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
index d2194d85bf525..e0d4dd3248734 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
@@ -58,7 +58,7 @@ class ReplicatedVertexView[VD: ClassTag, ED: ClassTag](
    * `vertices`. This operation modifies the `ReplicatedVertexView`, and callers can access `edges`
    * afterwards to obtain the upgraded view.
    */
-  def upgrade(vertices: VertexRDD[VD], includeSrc: Boolean, includeDst: Boolean) {
+  def upgrade(vertices: VertexRDD[VD], includeSrc: Boolean, includeDst: Boolean): Unit = {
     val shipSrc = includeSrc && !hasSrcId
     val shipDst = includeDst && !hasDstId
     if (shipSrc || shipDst) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
index 6453bbeae9f10..bef380dc12c23 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
@@ -123,7 +123,7 @@ class RoutingTablePartition(
    */
   def foreachWithinEdgePartition
       (pid: PartitionID, includeSrc: Boolean, includeDst: Boolean)
-      (f: VertexId => Unit) {
+      (f: VertexId => Unit): Unit = {
     val (vidsCandidate, srcVids, dstVids) = routingTable(pid)
     val size = vidsCandidate.length
     if (includeSrc && includeDst) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 105ad57d4ed86..102dc2d2dd4b0 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -79,10 +79,46 @@ object PageRank extends Logging {
    * @return the graph containing with each vertex containing the PageRank and each edge
    *         containing the normalized weight.
    */
-  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED], numIter: Int,
-    resetProb: Double = 0.15): Graph[Double, Double] =
+  def run[VD: ClassTag, ED: ClassTag](
+      graph: Graph[VD, ED], numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] =
   {
-    runWithOptions(graph, numIter, resetProb)
+    runWithOptions(graph, numIter, resetProb, None)
+  }
+
+  /**
+   * Run an update pass of PageRank algorithm. Update the values of every node in the
+   * pageRank
+   *
+   * @param rankGraph the current PageRank
+   * @param personalized True if personalized pageRank
+   * @param resetProb the random reset probability (alpha)
+   * @param src the source vertex for a Personalized Page Rank
+   *
+   * @return the graph containing with each vertex containing the PageRank and each edge
+   *         containing the normalized weight after a single update step.
+   *
+   */
+  private def runUpdate(rankGraph: Graph[Double, Double], personalized: Boolean,
+                resetProb: Double, src: VertexId): Graph[Double, Double] = {
+
+    def delta(u: VertexId, v: VertexId): Double = { if (u == v) 1.0 else 0.0 }
+    // Compute the outgoing rank contributions of each vertex, perform local preaggregation, and
+    // do the final aggregation at the receiving vertices. Requires a shuffle for aggregation.
+    val rankUpdates = rankGraph.aggregateMessages[Double](
+      ctx => ctx.sendToDst(ctx.srcAttr * ctx.attr), _ + _, TripletFields.Src)
+
+    // Apply the final rank updates to get the new ranks, using join to preserve ranks of vertices
+    // that didn't receive a message. Requires a shuffle for broadcasting updated ranks to the
+    // edge partitions.
+    val rPrb = if (personalized) {
+      (src: VertexId, id: VertexId) => resetProb * delta(src, id)
+    } else {
+      (src: VertexId, id: VertexId) => resetProb
+    }
+
+    rankGraph.outerJoinVertices(rankUpdates) {
+      (id, oldRank, msgSumOpt) => rPrb(src, id) + (1.0 - resetProb) * msgSumOpt.getOrElse(0.0)
+    }
   }
 
   /**
@@ -128,37 +164,77 @@ object PageRank extends Logging {
         if (!(id != src && personalized)) 1.0 else 0.0
       }
 
-    def delta(u: VertexId, v: VertexId): Double = { if (u == v) 1.0 else 0.0 }
-
     var iteration = 0
     var prevRankGraph: Graph[Double, Double] = null
     while (iteration < numIter) {
       rankGraph.cache()
+      prevRankGraph = rankGraph
 
-      // Compute the outgoing rank contributions of each vertex, perform local preaggregation, and
-      // do the final aggregation at the receiving vertices. Requires a shuffle for aggregation.
-      val rankUpdates = rankGraph.aggregateMessages[Double](
-        ctx => ctx.sendToDst(ctx.srcAttr * ctx.attr), _ + _, TripletFields.Src)
+      rankGraph = runUpdate(rankGraph, personalized, resetProb, src)
+      rankGraph.cache()
+      rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices
+      logInfo(s"PageRank finished iteration $iteration.")
+      prevRankGraph.vertices.unpersist()
+      prevRankGraph.edges.unpersist()
+      iteration += 1
+    }
 
-      // Apply the final rank updates to get the new ranks, using join to preserve ranks of vertices
-      // that didn't receive a message. Requires a shuffle for broadcasting updated ranks to the
-      // edge partitions.
-      prevRankGraph = rankGraph
-      val rPrb = if (personalized) {
-        (src: VertexId, id: VertexId) => resetProb * delta(src, id)
-      } else {
-        (src: VertexId, id: VertexId) => resetProb
-      }
+    // SPARK-18847 If the graph has sinks (vertices with no outgoing edges) correct the sum of ranks
+    normalizeRankSum(rankGraph, personalized)
+  }
 
-      rankGraph = rankGraph.outerJoinVertices(rankUpdates) {
-        (id, oldRank, msgSumOpt) => rPrb(src, id) + (1.0 - resetProb) * msgSumOpt.getOrElse(0.0)
-      }.cache()
+  /**
+   * Run PageRank for a fixed number of iterations returning a graph
+   * with vertex attributes containing the PageRank and edge
+   * attributes the normalized edge weight.
+   *
+   * @tparam VD the original vertex attribute (not used)
+   * @tparam ED the original edge attribute (not used)
+   *
+   * @param graph the graph on which to compute PageRank
+   * @param numIter the number of iterations of PageRank to run
+   * @param resetProb the random reset probability (alpha)
+   * @param srcId the source vertex for a Personalized Page Rank (optional)
+   * @param preRankGraph PageRank graph from which to keep iterating
+   *
+   * @return the graph containing with each vertex containing the PageRank and each edge
+   *         containing the normalized weight.
+   *
+   */
+  def runWithOptionsWithPreviousPageRank[VD: ClassTag, ED: ClassTag](
+      graph: Graph[VD, ED], numIter: Int, resetProb: Double, srcId: Option[VertexId],
+      preRankGraph: Graph[Double, Double]): Graph[Double, Double] = {
+    require(numIter > 0, s"Number of iterations must be greater than 0," +
+      s" but got ${numIter}")
+    require(resetProb >= 0 && resetProb <= 1, s"Random reset probability must belong" +
+      s" to [0, 1], but got ${resetProb}")
+    val graphVertices = graph.numVertices
+    val prePageRankVertices = preRankGraph.numVertices
+    require(graphVertices == prePageRankVertices, s"Graph and previous pageRankGraph" +
+      s" must have the same number of vertices but got ${graphVertices} and ${prePageRankVertices}")
 
+    val personalized = srcId.isDefined
+    val src: VertexId = srcId.getOrElse(-1L)
+
+    // Initialize the PageRank graph with each edge attribute having
+    // weight 1/outDegree and each vertex with attribute 1.0.
+    // When running personalized pagerank, only the source vertex
+    // has an attribute 1.0. All others are set to 0.
+    var rankGraph: Graph[Double, Double] = preRankGraph
+
+    var iteration = 0
+    var prevRankGraph: Graph[Double, Double] = null
+
+    while (iteration < numIter) {
+      rankGraph.cache()
+      prevRankGraph = rankGraph
+
+      rankGraph = runUpdate(rankGraph, personalized, resetProb, src)
+      rankGraph.cache()
       rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices
       logInfo(s"PageRank finished iteration $iteration.")
       prevRankGraph.vertices.unpersist()
       prevRankGraph.edges.unpersist()
-
       iteration += 1
     }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index 2847a4e172d40..c508056fe3ae3 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -98,7 +98,7 @@ object SVDPlusPlus {
         (ctx: EdgeContext[
           (Array[Double], Array[Double], Double, Double),
           Double,
-          (Array[Double], Array[Double], Double)]) {
+          (Array[Double], Array[Double], Double)]): Unit = {
       val (usr, itm) = (ctx.srcAttr, ctx.dstAttr)
       val (p, q) = (usr._1, itm._1)
       val rank = p.length
@@ -177,7 +177,7 @@ object SVDPlusPlus {
 
     // calculate error on training set
     def sendMsgTestF(conf: Conf, u: Double)
-        (ctx: EdgeContext[(Array[Double], Array[Double], Double, Double), Double, Double]) {
+        (ctx: EdgeContext[(Array[Double], Array[Double], Double, Double), Double, Double]): Unit = {
       val (usr, itm) = (ctx.srcAttr, ctx.dstAttr)
       val (p, q) = (usr._1, itm._1)
       var pred = u + usr._3 + itm._3 + blas.ddot(q.length, q, 1, usr._2, 1)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 2715137d19ebc..211b4d6e4c5d3 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -85,7 +85,7 @@ object TriangleCount {
     }
 
     // Edge function computes intersection of smaller vertex with larger vertex
-    def edgeFunc(ctx: EdgeContext[VertexSet, ED, Int]) {
+    def edgeFunc(ctx: EdgeContext[VertexSet, ED, Int]): Unit = {
       val (smallSet, largeSet) = if (ctx.srcAttr.size < ctx.dstAttr.size) {
         (ctx.srcAttr, ctx.dstAttr)
       } else {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
index 5ece5ae5c359b..dc3cdc452a389 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
@@ -118,7 +118,7 @@ private[graphx] object BytecodeUtils {
       if (name == methodName) {
         new MethodVisitor(ASM7) {
           override def visitMethodInsn(
-              op: Int, owner: String, name: String, desc: String, itf: Boolean) {
+              op: Int, owner: String, name: String, desc: String, itf: Boolean): Unit = {
             if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
               if (!skipClass(owner)) {
                 methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name))
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala
index 972237da1cb28..e3b283649cb2c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala
@@ -71,7 +71,7 @@ class GraphXPrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag,
   }
 
   /** Set the value for a key */
-  def update(k: K, v: V) {
+  def update(k: K, v: V): Unit = {
     val pos = keySet.addWithoutResize(k) & OpenHashSet.POSITION_MASK
     _values(pos) = v
     keySet.rehashIfNeeded(k, grow, move)
@@ -80,7 +80,7 @@ class GraphXPrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag,
 
 
   /** Set the value for a key */
-  def setMerge(k: K, v: V, mergeF: (V, V) => V) {
+  def setMerge(k: K, v: V, mergeF: (V, V) => V): Unit = {
     val pos = keySet.addWithoutResize(k)
     val ind = pos & OpenHashSet.POSITION_MASK
     if ((pos & OpenHashSet.NONEXISTENCE_MASK) != 0) { // if first add
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index dede3b5d35ced..459cddb9a302b 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -164,12 +164,12 @@ class GraphSuite extends SparkFunSuite with LocalSparkContext {
 
   test("mapVertices changing type with same erased type") {
     withSpark { sc =>
-      val vertices = sc.parallelize(Array[(Long, Option[java.lang.Integer])](
+      val vertices = sc.parallelize(Seq[(Long, Option[java.lang.Integer])](
         (1L, Some(1)),
         (2L, Some(2)),
         (3L, Some(3))
       ))
-      val edges = sc.parallelize(Array(
+      val edges = sc.parallelize(Seq(
         Edge(1L, 2L, 0),
         Edge(2L, 3L, 0),
         Edge(3L, 1L, 0)
@@ -218,8 +218,8 @@ class GraphSuite extends SparkFunSuite with LocalSparkContext {
 
   test("reverse with join elimination") {
     withSpark { sc =>
-      val vertices: RDD[(VertexId, Int)] = sc.parallelize(Array((1L, 1), (2L, 2)))
-      val edges: RDD[Edge[Int]] = sc.parallelize(Array(Edge(1L, 2L, 0)))
+      val vertices: RDD[(VertexId, Int)] = sc.parallelize(Seq((1L, 1), (2L, 2)))
+      val edges: RDD[Edge[Int]] = sc.parallelize(Seq(Edge(1L, 2L, 0)))
       val graph = Graph(vertices, edges).reverse
       val result = GraphXUtils.mapReduceTriplets[Int, Int, Int](
         graph, et => Iterator((et.dstId, et.srcAttr)), _ + _)
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
index 1b81423563372..baa1c42235c72 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala
@@ -101,12 +101,12 @@ class ConnectedComponentsSuite extends SparkFunSuite with LocalSparkContext {
     withSpark { sc =>
       // Create an RDD for the vertices
       val users: RDD[(VertexId, (String, String))] =
-        sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
+        sc.parallelize(Seq((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
                        (5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
                        (4L, ("peter", "student"))))
       // Create an RDD for edges
       val relationships: RDD[Edge[String]] =
-        sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),
+        sc.parallelize(Seq(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),
                        Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
                        Edge(4L, 0L, "student"), Edge(5L, 0L, "colleague")))
       // Edges are:
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
index d8f1c497718bb..a5e2fc5c9a74f 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -64,6 +64,38 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
       .map { case (id, error) => error }.sum()
   }
 
+  def convergenceIterations[VD, ED](graph: Graph[VD, ED], resetProb: Double,
+                                    tol: Double, errorTol: Double): (Int, Int) = {
+    val dynamicRanks = graph.ops.pageRank(tol, resetProb).vertices.cache()
+
+    // Compute how many iterations it takes to converge
+    var iter = 1
+    var staticGraphRank = graph.ops.staticPageRank(iter, resetProb).vertices.cache()
+    while (!(compareRanks(staticGraphRank, dynamicRanks) < errorTol)) {
+      iter += 1
+      staticGraphRank = graph.ops.staticPageRank(iter, resetProb).vertices.cache()
+    }
+    val convergenceIter = iter
+    val checkPointIter = convergenceIter / 2
+
+    // CheckPoint the graph computed at half of these iterations
+    val staticGraphRankPartial = graph.ops.staticPageRank(checkPointIter, resetProb)
+
+    // Compute how many iterations it takes to converge when a checkPoint is provided
+    var iterWithCheckPoint = 1
+    var staticGraphRankWithCheckPoint = graph.ops.staticPageRank(iterWithCheckPoint,
+      resetProb, staticGraphRankPartial).vertices.cache()
+    while (compareRanks(staticGraphRankWithCheckPoint, dynamicRanks) >= errorTol) {
+      iterWithCheckPoint += 1
+      staticGraphRankWithCheckPoint = graph.ops.staticPageRank(iterWithCheckPoint,
+        resetProb, staticGraphRankPartial).vertices.cache()
+    }
+
+    val convergenceIterWithCheckPoint = iterWithCheckPoint
+
+    (convergenceIterWithCheckPoint, convergenceIter)
+  }
+
   test("Star PageRank") {
     withSpark { sc =>
       val nVertices = 100
@@ -184,6 +216,25 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
     }
   } // end of Grid PageRank
 
+  test("Grid PageRank with checkpoint") {
+    withSpark { sc =>
+      // Check that checkPointing helps the static PageRank to converge in less iterations
+      val rows = 10
+      val cols = 10
+      val resetProb = 0.15
+      val errorTol = 1.0e-5
+      val tol = 0.0001
+      val gridGraph = GraphGenerators.gridGraph(sc, rows, cols).cache()
+
+      val (iterAfterHalfCheckPoint, totalIters) =
+        convergenceIterations(gridGraph, resetProb, tol, errorTol)
+
+      // In this case checkPoint does not help much
+      assert(totalIters == 19)
+      assert(iterAfterHalfCheckPoint == 18)
+    }
+  } // end of Grid PageRank
+
   test("Chain PageRank") {
     withSpark { sc =>
       val chain1 = (0 until 9).map(x => (x, x + 1))
@@ -201,6 +252,24 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
+  test("Chain PageRank with checkpoint") {
+    withSpark { sc =>
+      val chain1 = (0 until 9).map(x => (x, x + 1))
+      val rawEdges = sc.parallelize(chain1, 1).map { case (s, d) => (s.toLong, d.toLong) }
+      val chain = Graph.fromEdgeTuples(rawEdges, 1.0).cache()
+      val resetProb = 0.15
+      val errorTol = 1.0e-5
+      val tol = 0.0001
+
+      val (iterAfterHalfCheckPoint, totalIters) =
+        convergenceIterations(chain, resetProb, tol, errorTol)
+
+      // In this case checkPoint does not help but it does not take more iterations
+      assert(totalIters == 10)
+      assert(iterAfterHalfCheckPoint == 10)
+    }
+  } // end of Grid PageRank
+
   test("Chain PersonalizedPageRank") {
     withSpark { sc =>
       // Check that implementation can handle large vertexIds, SPARK-25149
@@ -264,7 +333,23 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
       val ranks = VertexRDD(sc.parallelize(1L to 4L zip igraphPR))
       assert(compareRanks(staticRanks, ranks) < errorTol)
       assert(compareRanks(dynamicRanks, ranks) < errorTol)
+    }
+  }
+
+  test("Loop with source PageRank with checkpoint") {
+    withSpark { sc =>
+      val edges = sc.parallelize((1L, 2L) :: (2L, 3L) :: (3L, 4L) :: (4L, 2L) :: Nil)
+      val g = Graph.fromEdgeTuples(edges, 1)
+      val resetProb = 0.15
+      val tol = 0.0001
+      val errorTol = 1.0e-5
 
+      val (iterAfterHalfCheckPoint, totalIters) =
+        convergenceIterations(g, resetProb, tol, errorTol)
+
+      // In this case checkPoint helps a lot
+      assert(totalIters == 34)
+      assert(iterAfterHalfCheckPoint == 17)
     }
   }
 
@@ -313,4 +398,21 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
 
     }
   }
+
+  test("Loop with sink PageRank with checkpoint") {
+    withSpark { sc =>
+      val edges = sc.parallelize((1L, 2L) :: (2L, 3L) :: (3L, 1L) :: (1L, 4L) :: Nil)
+      val g = Graph.fromEdgeTuples(edges, 1)
+      val resetProb = 0.15
+      val tol = 0.0001
+      val errorTol = 1.0e-5
+
+      val (iterAfterHalfCheckPoint, totalIters) =
+        convergenceIterations(g, resetProb, tol, errorTol)
+
+      // In this case checkPoint helps a lot
+      assert(totalIters == 15)
+      assert(iterAfterHalfCheckPoint == 9)
+    }
+  }
 }
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala
index f19c3acdc85cf..abbd89b8eefaf 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala
@@ -26,7 +26,7 @@ class TriangleCountSuite extends SparkFunSuite with LocalSparkContext {
 
   test("Count a single triangle") {
     withSpark { sc =>
-      val rawEdges = sc.parallelize(Array( 0L -> 1L, 1L -> 2L, 2L -> 0L ), 2)
+      val rawEdges = sc.parallelize(Seq(0L -> 1L, 1L -> 2L, 2L -> 0L), 2)
       val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
       val triangleCount = graph.triangleCount()
       val verts = triangleCount.vertices
@@ -36,8 +36,8 @@ class TriangleCountSuite extends SparkFunSuite with LocalSparkContext {
 
   test("Count two triangles") {
     withSpark { sc =>
-      val triangles = Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
-        Array(0L -> -1L, -1L -> -2L, -2L -> 0L)
+      val triangles = Seq(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+          Seq(0L -> -1L, -1L -> -2L, -2L -> 0L)
       val rawEdges = sc.parallelize(triangles, 2)
       val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
       val triangleCount = graph.triangleCount()
@@ -55,8 +55,8 @@ class TriangleCountSuite extends SparkFunSuite with LocalSparkContext {
   test("Count two triangles with bi-directed edges") {
     withSpark { sc =>
       val triangles =
-        Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
-        Array(0L -> -1L, -1L -> -2L, -2L -> 0L)
+        Seq(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+            Seq(0L -> -1L, -1L -> -2L, -2L -> 0L)
       val revTriangles = triangles.map { case (a, b) => (b, a) }
       val rawEdges = sc.parallelize(triangles ++ revTriangles, 2)
       val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
@@ -74,9 +74,9 @@ class TriangleCountSuite extends SparkFunSuite with LocalSparkContext {
 
   test("Count a single triangle with duplicate edges") {
     withSpark { sc =>
-      val rawEdges = sc.parallelize(Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
-        Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
-        Array(1L -> 0L, 1L -> 1L), 2)
+      val rawEdges = sc.parallelize(Seq(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+          Seq(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+          Seq(1L -> 0L, 1L -> 1L), 2)
       val graph = Graph.fromEdgeTuples(rawEdges, true, uniqueEdges = Some(RandomVertexCut)).cache()
       val triangleCount = graph.triangleCount()
       val verts = triangleCount.vertices
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointerSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointerSuite.scala
index e0c65e6940f66..e3471759b3a70 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointerSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/util/PeriodicGraphCheckpointerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.graphx.util
 
 import org.apache.hadoop.fs.Path
+import org.scalatest.Assertions
 
 import org.apache.spark.{SparkContext, SparkFunSuite}
 import org.apache.spark.graphx.{Edge, Graph, LocalSparkContext}
@@ -88,7 +89,7 @@ class PeriodicGraphCheckpointerSuite extends SparkFunSuite with LocalSparkContex
   }
 }
 
-private object PeriodicGraphCheckpointerSuite {
+private object PeriodicGraphCheckpointerSuite extends Assertions {
   private val defaultStorageLevel = StorageLevel.MEMORY_ONLY_SER
 
   case class GraphToCheck(graph: Graph[Double, Double], gIndex: Int)
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
index 84940d96b563f..32844104c1deb 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
@@ -26,8 +26,11 @@
 import java.util.Map;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import static org.apache.spark.launcher.CommandBuilderUtils.*;
+import static org.apache.spark.launcher.CommandBuilderUtils.join;
 
 /**
  * Launcher for Spark applications.
@@ -38,6 +41,8 @@
  */
 public class SparkLauncher extends AbstractLauncher<SparkLauncher> {
 
+  private static final Logger LOG = Logger.getLogger(SparkLauncher.class.getName());
+
   /** The Spark master. */
   public static final String SPARK_MASTER = "spark.master";
 
@@ -363,6 +368,9 @@ public SparkAppHandle startApplication(SparkAppHandle.Listener... listeners) thr
 
     String loggerName = getLoggerName();
     ProcessBuilder pb = createBuilder();
+    if (LOG.isLoggable(Level.FINE)) {
+      LOG.fine(String.format("Launching Spark application:%n%s", join(" ", pb.command())));
+    }
 
     boolean outputToLog = outputStream == null;
     boolean errorToLog = !redirectErrorStream && errorStream == null;
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 3479e0c3422bd..383c3f60a595b 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -348,7 +348,7 @@ private List<String> buildPySparkShellCommand(Map<String, String> env) throws IO
   }
 
   private List<String> buildSparkRCommand(Map<String, String> env) throws IOException {
-    if (!appArgs.isEmpty() && appArgs.get(0).endsWith(".R")) {
+    if (!appArgs.isEmpty() && (appArgs.get(0).endsWith(".R") || appArgs.get(0).endsWith(".r"))) {
       System.err.println(
         "Running R applications through 'sparkR' is not supported as of Spark 2.0.\n" +
         "Use ./bin/spark-submit <R file>");
@@ -390,9 +390,7 @@ boolean isClientMode(Map<String, String> userProps) {
     String userMaster = firstNonEmpty(master, userProps.get(SparkLauncher.SPARK_MASTER));
     String userDeployMode = firstNonEmpty(deployMode, userProps.get(SparkLauncher.DEPLOY_MODE));
     // Default master is "local[*]", so assume client mode in that case
-    return userMaster == null ||
-      "client".equals(userDeployMode) ||
-      (!userMaster.equals("yarn-cluster") && userDeployMode == null);
+    return userMaster == null || userDeployMode == null || "client".equals(userDeployMode);
   }
 
   /**
diff --git a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
index fe44efd2e46ab..d1b350fd9f48b 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
@@ -77,11 +77,11 @@ public void testRedirectsSimple() throws Exception {
     SparkLauncher launcher = new SparkLauncher();
     launcher.redirectError(ProcessBuilder.Redirect.PIPE);
     assertNotNull(launcher.errorStream);
-    assertEquals(launcher.errorStream.type(), ProcessBuilder.Redirect.Type.PIPE);
+    assertEquals(ProcessBuilder.Redirect.Type.PIPE, launcher.errorStream.type());
 
     launcher.redirectOutput(ProcessBuilder.Redirect.PIPE);
     assertNotNull(launcher.outputStream);
-    assertEquals(launcher.outputStream.type(), ProcessBuilder.Redirect.Type.PIPE);
+    assertEquals(ProcessBuilder.Redirect.Type.PIPE, launcher.outputStream.type());
   }
 
   @Test
@@ -89,11 +89,11 @@ public void testRedirectLastWins() throws Exception {
     SparkLauncher launcher = new SparkLauncher();
     launcher.redirectError(ProcessBuilder.Redirect.PIPE)
       .redirectError(ProcessBuilder.Redirect.INHERIT);
-    assertEquals(launcher.errorStream.type(), ProcessBuilder.Redirect.Type.INHERIT);
+    assertEquals(ProcessBuilder.Redirect.Type.INHERIT, launcher.errorStream.type());
 
     launcher.redirectOutput(ProcessBuilder.Redirect.PIPE)
       .redirectOutput(ProcessBuilder.Redirect.INHERIT);
-    assertEquals(launcher.outputStream.type(), ProcessBuilder.Redirect.Type.INHERIT);
+    assertEquals(ProcessBuilder.Redirect.Type.INHERIT, launcher.outputStream.type());
   }
 
   @Test
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index 32a91b1789412..752e8d4c23f8b 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -250,6 +250,26 @@ public void testMissingAppResource() {
     new SparkSubmitCommandBuilder().buildSparkSubmitArgs();
   }
 
+  @Test
+  public void testIsClientMode() {
+    // Default master is "local[*]"
+    SparkSubmitCommandBuilder builder = newCommandBuilder(Collections.emptyList());
+    assertTrue("By default application run in local mode",
+      builder.isClientMode(Collections.emptyMap()));
+    // --master yarn or it can be any RM
+    List<String> sparkSubmitArgs = Arrays.asList(parser.MASTER, "yarn");
+    builder = newCommandBuilder(sparkSubmitArgs);
+    assertTrue("By default deploy mode is client", builder.isClientMode(Collections.emptyMap()));
+    // --master yarn and set spark.submit.deployMode to client
+    Map<String, String> userProps = new HashMap<>();
+    userProps.put("spark.submit.deployMode", "client");
+    assertTrue(builder.isClientMode(userProps));
+    // --master mesos --deploy-mode cluster
+    sparkSubmitArgs = Arrays.asList(parser.MASTER, "mesos", parser.DEPLOY_MODE, "cluster");
+    builder = newCommandBuilder(sparkSubmitArgs);
+    assertFalse(builder.isClientMode(Collections.emptyMap()));
+  }
+
   private void testCmdBuilder(boolean isDriver, boolean useDefaultPropertyFile) throws Exception {
     final String DRIVER_DEFAULT_PARAM = "-Ddriver-default";
     final String DRIVER_EXTRA_PARAM = "-Ddriver-extra";
diff --git a/licenses-binary/LICENSE-JLargeArrays.txt b/licenses-binary/LICENSE-JLargeArrays.txt
new file mode 100644
index 0000000000000..304e724556984
--- /dev/null
+++ b/licenses-binary/LICENSE-JLargeArrays.txt
@@ -0,0 +1,23 @@
+JLargeArrays
+Copyright (C) 2013 onward University of Warsaw, ICM
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-JTransforms.txt b/licenses-binary/LICENSE-JTransforms.txt
new file mode 100644
index 0000000000000..2f0589f76da7d
--- /dev/null
+++ b/licenses-binary/LICENSE-JTransforms.txt
@@ -0,0 +1,23 @@
+JTransforms
+Copyright (c) 2007 onward, Piotr Wendykier
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-dnsjava.txt b/licenses-binary/LICENSE-dnsjava.txt
new file mode 100644
index 0000000000000..70ee5b12ff23f
--- /dev/null
+++ b/licenses-binary/LICENSE-dnsjava.txt
@@ -0,0 +1,24 @@
+Copyright (c) 1998-2011, Brian Wellington.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-javax-transaction-transaction-api.txt b/licenses-binary/LICENSE-javax-transaction-transaction-api.txt
new file mode 100644
index 0000000000000..3d48d9c81fe12
--- /dev/null
+++ b/licenses-binary/LICENSE-javax-transaction-transaction-api.txt
@@ -0,0 +1,119 @@
+COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0
+
+1. Definitions.
+
+1.1. Contributor means each individual or entity that creates or contributes to the creation of Modifications.
+
+1.2. Contributor Version means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor.
+
+1.3. Covered Software means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof.
+
+1.4. Executable means the Covered Software in any form other than Source Code.
+
+1.5. Initial Developer means the individual or entity that first makes Original Software available under this License.
+
+1.6. Larger Work means a work which combines Covered Software or portions thereof with code not governed by the terms of this License.
+
+1.7. License means this document.
+
+1.8. Licensable means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein.
+
+1.9. Modifications means the Source Code and Executable form of any of the following:
+
+A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications;
+
+B. Any new file that contains any part of the Original Software or previous Modification; or
+
+C. Any new file that is contributed or otherwise made available under the terms of this License.
+
+1.10. Original Software means the Source Code and Executable form of computer software code that is originally released under this License.
+
+1.11. Patent Claims means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor.
+
+1.12. Source Code means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code.
+
+1.13. You (or Your) means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, You includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, control means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity.
+
+2. License Grants.
+
+2.1. The Initial Developer Grant.
+Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license:
+(a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and
+(b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof).
+(c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License.
+(d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices.
+
+2.2. Contributor Grant.
+Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license:
+(a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and
+(b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination).
+(c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party.
+(d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor.
+
+3. Distribution Obligations.
+
+3.1. Availability of Source Code.
+
+Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange.
+
+3.2. Modifications.
+
+The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License.
+
+3.3. Required Notices.
+You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer.
+
+3.4. Application of Additional Terms.
+You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer.
+
+3.5. Distribution of Executable Versions.
+You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipients rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer.
+
+3.6. Larger Works.
+You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software.
+
+4. Versions of the License.
+
+4.1. New Versions.
+Sun Microsystems, Inc. is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License.
+
+4.2. Effect of New Versions.
+
+You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward.
+4.3. Modified Versions.
+
+When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License.
+
+5. DISCLAIMER OF WARRANTY.
+
+COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN AS IS BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER.
+
+6. TERMINATION.
+
+6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive.
+
+6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as Participant) alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant.
+
+6.3. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination.
+
+7. LIMITATION OF LIABILITY.
+
+UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTYS NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU.
+
+8. U.S. GOVERNMENT END USERS.
+
+The Covered Software is a commercial item, as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of commercial computer software (as that term is defined at 48 C.F.R.  252.227-7014(a)(1)) and commercial computer software documentation as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License.
+
+9. MISCELLANEOUS.
+
+This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdictions conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software.
+
+10. RESPONSIBILITY FOR CLAIMS.
+
+As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability.
+
+NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)
+The GlassFish code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California.
+
+
+
diff --git a/licenses-binary/LICENSE-jsp-api.txt b/licenses-binary/LICENSE-jsp-api.txt
new file mode 100644
index 0000000000000..68076ad96b281
--- /dev/null
+++ b/licenses-binary/LICENSE-jsp-api.txt
@@ -0,0 +1,759 @@
+COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1
+
+1. Definitions.
+
+    1.1. "Contributor" means each individual or entity that creates or
+    contributes to the creation of Modifications.
+
+    1.2. "Contributor Version" means the combination of the Original
+    Software, prior Modifications used by a Contributor (if any), and
+    the Modifications made by that particular Contributor.
+
+    1.3. "Covered Software" means (a) the Original Software, or (b)
+    Modifications, or (c) the combination of files containing Original
+    Software with files containing Modifications, in each case including
+    portions thereof.
+
+    1.4. "Executable" means the Covered Software in any form other than
+    Source Code.
+
+    1.5. "Initial Developer" means the individual or entity that first
+    makes Original Software available under this License.
+
+    1.6. "Larger Work" means a work which combines Covered Software or
+    portions thereof with code not governed by the terms of this License.
+
+    1.7. "License" means this document.
+
+    1.8. "Licensable" means having the right to grant, to the maximum
+    extent possible, whether at the time of the initial grant or
+    subsequently acquired, any and all of the rights conveyed herein.
+
+    1.9. "Modifications" means the Source Code and Executable form of
+    any of the following:
+
+    A. Any file that results from an addition to, deletion from or
+    modification of the contents of a file containing Original Software
+    or previous Modifications;
+
+    B. Any new file that contains any part of the Original Software or
+    previous Modification; or
+
+    C. Any new file that is contributed or otherwise made available
+    under the terms of this License.
+
+    1.10. "Original Software" means the Source Code and Executable form
+    of computer software code that is originally released under this
+    License.
+
+    1.11. "Patent Claims" means any patent claim(s), now owned or
+    hereafter acquired, including without limitation, method, process,
+    and apparatus claims, in any patent Licensable by grantor.
+
+    1.12. "Source Code" means (a) the common form of computer software
+    code in which modifications are made and (b) associated
+    documentation included in or with such code.
+
+    1.13. "You" (or "Your") means an individual or a legal entity
+    exercising rights under, and complying with all of the terms of,
+    this License. For legal entities, "You" includes any entity which
+    controls, is controlled by, or is under common control with You. For
+    purposes of this definition, "control" means (a) the power, direct
+    or indirect, to cause the direction or management of such entity,
+    whether by contract or otherwise, or (b) ownership of more than
+    fifty percent (50%) of the outstanding shares or beneficial
+    ownership of such entity.
+
+2. License Grants.
+
+    2.1. The Initial Developer Grant.
+
+    Conditioned upon Your compliance with Section 3.1 below and subject
+    to third party intellectual property claims, the Initial Developer
+    hereby grants You a world-wide, royalty-free, non-exclusive license:
+
+    (a) under intellectual property rights (other than patent or
+    trademark) Licensable by Initial Developer, to use, reproduce,
+    modify, display, perform, sublicense and distribute the Original
+    Software (or portions thereof), with or without Modifications,
+    and/or as part of a Larger Work; and
+
+    (b) under Patent Claims infringed by the making, using or selling of
+    Original Software, to make, have made, use, practice, sell, and
+    offer for sale, and/or otherwise dispose of the Original Software
+    (or portions thereof).
+
+    (c) The licenses granted in Sections 2.1(a) and (b) are effective on
+    the date Initial Developer first distributes or otherwise makes the
+    Original Software available to a third party under the terms of this
+    License.
+
+    (d) Notwithstanding Section 2.1(b) above, no patent license is
+    granted: (1) for code that You delete from the Original Software, or
+    (2) for infringements caused by: (i) the modification of the
+    Original Software, or (ii) the combination of the Original Software
+    with other software or devices.
+
+    2.2. Contributor Grant.
+
+    Conditioned upon Your compliance with Section 3.1 below and subject
+    to third party intellectual property claims, each Contributor hereby
+    grants You a world-wide, royalty-free, non-exclusive license:
+
+    (a) under intellectual property rights (other than patent or
+    trademark) Licensable by Contributor to use, reproduce, modify,
+    display, perform, sublicense and distribute the Modifications
+    created by such Contributor (or portions thereof), either on an
+    unmodified basis, with other Modifications, as Covered Software
+    and/or as part of a Larger Work; and
+
+    (b) under Patent Claims infringed by the making, using, or selling
+    of Modifications made by that Contributor either alone and/or in
+    combination with its Contributor Version (or portions of such
+    combination), to make, use, sell, offer for sale, have made, and/or
+    otherwise dispose of: (1) Modifications made by that Contributor (or
+    portions thereof); and (2) the combination of Modifications made by
+    that Contributor with its Contributor Version (or portions of such
+    combination).
+
+    (c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective
+    on the date Contributor first distributes or otherwise makes the
+    Modifications available to a third party.
+
+    (d) Notwithstanding Section 2.2(b) above, no patent license is
+    granted: (1) for any code that Contributor has deleted from the
+    Contributor Version; (2) for infringements caused by: (i) third
+    party modifications of Contributor Version, or (ii) the combination
+    of Modifications made by that Contributor with other software
+    (except as part of the Contributor Version) or other devices; or (3)
+    under Patent Claims infringed by Covered Software in the absence of
+    Modifications made by that Contributor.
+
+3. Distribution Obligations.
+
+    3.1. Availability of Source Code.
+
+    Any Covered Software that You distribute or otherwise make available
+    in Executable form must also be made available in Source Code form
+    and that Source Code form must be distributed only under the terms
+    of this License. You must include a copy of this License with every
+    copy of the Source Code form of the Covered Software You distribute
+    or otherwise make available. You must inform recipients of any such
+    Covered Software in Executable form as to how they can obtain such
+    Covered Software in Source Code form in a reasonable manner on or
+    through a medium customarily used for software exchange.
+
+    3.2. Modifications.
+
+    The Modifications that You create or to which You contribute are
+    governed by the terms of this License. You represent that You
+    believe Your Modifications are Your original creation(s) and/or You
+    have sufficient rights to grant the rights conveyed by this License.
+
+    3.3. Required Notices.
+
+    You must include a notice in each of Your Modifications that
+    identifies You as the Contributor of the Modification. You may not
+    remove or alter any copyright, patent or trademark notices contained
+    within the Covered Software, or any notices of licensing or any
+    descriptive text giving attribution to any Contributor or the
+    Initial Developer.
+
+    3.4. Application of Additional Terms.
+
+    You may not offer or impose any terms on any Covered Software in
+    Source Code form that alters or restricts the applicable version of
+    this License or the recipients' rights hereunder. You may choose to
+    offer, and to charge a fee for, warranty, support, indemnity or
+    liability obligations to one or more recipients of Covered Software.
+    However, you may do so only on Your own behalf, and not on behalf of
+    the Initial Developer or any Contributor. You must make it
+    absolutely clear that any such warranty, support, indemnity or
+    liability obligation is offered by You alone, and You hereby agree
+    to indemnify the Initial Developer and every Contributor for any
+    liability incurred by the Initial Developer or such Contributor as a
+    result of warranty, support, indemnity or liability terms You offer.
+
+    3.5. Distribution of Executable Versions.
+
+    You may distribute the Executable form of the Covered Software under
+    the terms of this License or under the terms of a license of Your
+    choice, which may contain terms different from this License,
+    provided that You are in compliance with the terms of this License
+    and that the license for the Executable form does not attempt to
+    limit or alter the recipient's rights in the Source Code form from
+    the rights set forth in this License. If You distribute the Covered
+    Software in Executable form under a different license, You must make
+    it absolutely clear that any terms which differ from this License
+    are offered by You alone, not by the Initial Developer or
+    Contributor. You hereby agree to indemnify the Initial Developer and
+    every Contributor for any liability incurred by the Initial
+    Developer or such Contributor as a result of any such terms You offer.
+
+    3.6. Larger Works.
+
+    You may create a Larger Work by combining Covered Software with
+    other code not governed by the terms of this License and distribute
+    the Larger Work as a single product. In such a case, You must make
+    sure the requirements of this License are fulfilled for the Covered
+    Software.
+
+4. Versions of the License.
+
+    4.1. New Versions.
+
+    Oracle is the initial license steward and may publish revised and/or
+    new versions of this License from time to time. Each version will be
+    given a distinguishing version number. Except as provided in Section
+    4.3, no one other than the license steward has the right to modify
+    this License.
+
+    4.2. Effect of New Versions.
+
+    You may always continue to use, distribute or otherwise make the
+    Covered Software available under the terms of the version of the
+    License under which You originally received the Covered Software. If
+    the Initial Developer includes a notice in the Original Software
+    prohibiting it from being distributed or otherwise made available
+    under any subsequent version of the License, You must distribute and
+    make the Covered Software available under the terms of the version
+    of the License under which You originally received the Covered
+    Software. Otherwise, You may also choose to use, distribute or
+    otherwise make the Covered Software available under the terms of any
+    subsequent version of the License published by the license steward.
+
+    4.3. Modified Versions.
+
+    When You are an Initial Developer and You want to create a new
+    license for Your Original Software, You may create and use a
+    modified version of this License if You: (a) rename the license and
+    remove any references to the name of the license steward (except to
+    note that the license differs from this License); and (b) otherwise
+    make it clear that the license contains terms which differ from this
+    License.
+
+5. DISCLAIMER OF WARRANTY.
+
+    COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS,
+    WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
+    INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE
+    IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR
+    NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF
+    THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE
+    DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY
+    OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING,
+    REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN
+    ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS
+    AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER.
+
+6. TERMINATION.
+
+    6.1. This License and the rights granted hereunder will terminate
+    automatically if You fail to comply with terms herein and fail to
+    cure such breach within 30 days of becoming aware of the breach.
+    Provisions which, by their nature, must remain in effect beyond the
+    termination of this License shall survive.
+
+    6.2. If You assert a patent infringement claim (excluding
+    declaratory judgment actions) against Initial Developer or a
+    Contributor (the Initial Developer or Contributor against whom You
+    assert such claim is referred to as "Participant") alleging that the
+    Participant Software (meaning the Contributor Version where the
+    Participant is a Contributor or the Original Software where the
+    Participant is the Initial Developer) directly or indirectly
+    infringes any patent, then any and all rights granted directly or
+    indirectly to You by such Participant, the Initial Developer (if the
+    Initial Developer is not the Participant) and all Contributors under
+    Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice
+    from Participant terminate prospectively and automatically at the
+    expiration of such 60 day notice period, unless if within such 60
+    day period You withdraw Your claim with respect to the Participant
+    Software against such Participant either unilaterally or pursuant to
+    a written agreement with Participant.
+
+    6.3. If You assert a patent infringement claim against Participant
+    alleging that the Participant Software directly or indirectly
+    infringes any patent where such claim is resolved (such as by
+    license or settlement) prior to the initiation of patent
+    infringement litigation, then the reasonable value of the licenses
+    granted by such Participant under Sections 2.1 or 2.2 shall be taken
+    into account in determining the amount or value of any payment or
+    license.
+
+    6.4. In the event of termination under Sections 6.1 or 6.2 above,
+    all end user licenses that have been validly granted by You or any
+    distributor hereunder prior to termination (excluding licenses
+    granted to You by any distributor) shall survive termination.
+
+7. LIMITATION OF LIABILITY.
+
+    UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
+    (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE
+    INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF
+    COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE
+    TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR
+    CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT
+    LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER
+    FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR
+    LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE
+    POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT
+    APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH
+    PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH
+    LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR
+    LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION
+    AND LIMITATION MAY NOT APPLY TO YOU.
+
+8. U.S. GOVERNMENT END USERS.
+
+    The Covered Software is a "commercial item," as that term is defined
+    in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer
+    software" (as that term is defined at 48 C.F.R. §
+    252.227-7014(a)(1)) and "commercial computer software documentation"
+    as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent
+    with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4
+    (June 1995), all U.S. Government End Users acquire Covered Software
+    with only those rights set forth herein. This U.S. Government Rights
+    clause is in lieu of, and supersedes, any other FAR, DFAR, or other
+    clause or provision that addresses Government rights in computer
+    software under this License.
+
+9. MISCELLANEOUS.
+
+    This License represents the complete agreement concerning subject
+    matter hereof. If any provision of this License is held to be
+    unenforceable, such provision shall be reformed only to the extent
+    necessary to make it enforceable. This License shall be governed by
+    the law of the jurisdiction specified in a notice contained within
+    the Original Software (except to the extent applicable law, if any,
+    provides otherwise), excluding such jurisdiction's conflict-of-law
+    provisions. Any litigation relating to this License shall be subject
+    to the jurisdiction of the courts located in the jurisdiction and
+    venue specified in a notice contained within the Original Software,
+    with the losing party responsible for costs, including, without
+    limitation, court costs and reasonable attorneys' fees and expenses.
+    The application of the United Nations Convention on Contracts for
+    the International Sale of Goods is expressly excluded. Any law or
+    regulation which provides that the language of a contract shall be
+    construed against the drafter shall not apply to this License. You
+    agree that You alone are responsible for compliance with the United
+    States export administration regulations (and the export control
+    laws and regulation of any other countries) when You use, distribute
+    or otherwise make available any Covered Software.
+
+10. RESPONSIBILITY FOR CLAIMS.
+
+    As between Initial Developer and the Contributors, each party is
+    responsible for claims and damages arising, directly or indirectly,
+    out of its utilization of rights under this License and You agree to
+    work with Initial Developer and Contributors to distribute such
+    responsibility on an equitable basis. Nothing herein is intended or
+    shall be deemed to constitute any admission of liability.
+
+------------------------------------------------------------------------
+
+NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION
+LICENSE (CDDL)
+
+The code released under the CDDL shall be governed by the laws of the
+State of California (excluding conflict-of-law provisions). Any
+litigation relating to this License shall be subject to the jurisdiction
+of the Federal Courts of the Northern District of California and the
+state courts of the State of California, with venue lying in Santa Clara
+County, California.
+
+
+
+  The GNU General Public License (GPL) Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+51 Franklin Street, Fifth Floor
+Boston, MA 02110-1335
+USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to
+share and change it. By contrast, the GNU General Public License is
+intended to guarantee your freedom to share and change free software--to
+make sure the software is free for all its users. This General Public
+License applies to most of the Free Software Foundation's software and
+to any other program whose authors commit to using it. (Some other Free
+Software Foundation software is covered by the GNU Library General
+Public License instead.) You can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price.
+Our General Public Licenses are designed to make sure that you have the
+freedom to distribute copies of free software (and charge for this
+service if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs; and that you know you can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone
+to deny you these rights or to ask you to surrender the rights. These
+restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis
+or for a fee, you must give the recipients all the rights that you have.
+You must make sure that they, too, receive or can get the source code.
+And you must show them these terms so they know their rights.
+
+We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+Finally, any free program is threatened constantly by software patents.
+We wish to avoid the danger that redistributors of a free program will
+individually obtain patent licenses, in effect making the program
+proprietary. To prevent this, we have made it clear that any patent must
+be licensed for everyone's free use or not licensed at all.
+
+The precise terms and conditions for copying, distribution and
+modification follow.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License applies to any program or other work which contains a
+notice placed by the copyright holder saying it may be distributed under
+the terms of this General Public License. The "Program", below, refers
+to any such program or work, and a "work based on the Program" means
+either the Program or any derivative work under copyright law: that is
+to say, a work containing the Program or a portion of it, either
+verbatim or with modifications and/or translated into another language.
+(Hereinafter, translation is included without limitation in the term
+"modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of running
+the Program is not restricted, and the output from the Program is
+covered only if its contents constitute a work based on the Program
+(independent of having been made by running the Program). Whether that
+is true depends on what the Program does.
+
+1. You may copy and distribute verbatim copies of the Program's source
+code as you receive it, in any medium, provided that you conspicuously
+and appropriately publish on each copy an appropriate copyright notice
+and disclaimer of warranty; keep intact all the notices that refer to
+this License and to the absence of any warranty; and give any other
+recipients of the Program a copy of this License along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Program or any portion of
+it, thus forming a work based on the Program, and copy and distribute
+such modifications or work under the terms of Section 1 above, provided
+that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any part
+    thereof, to be licensed as a whole at no charge to all third parties
+    under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a notice
+    that there is no warranty (or else, saying that you provide a
+    warranty) and that users may redistribute the program under these
+    conditions, and telling the user how to view a copy of this License.
+    (Exception: if the Program itself is interactive but does not
+    normally print such an announcement, your work based on the Program
+    is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program, and
+can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based on
+the Program, the distribution of the whole must be on the terms of this
+License, whose permissions for other licensees extend to the entire
+whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of a
+storage or distribution medium does not bring the other work under the
+scope of this License.
+
+3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections 1
+    and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your cost
+    of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer to
+    distribute corresponding source code. (This alternative is allowed
+    only for noncommercial distribution and only if you received the
+    program in object code or executable form with such an offer, in
+    accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source code
+means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to control
+compilation and installation of the executable. However, as a special
+exception, the source code distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies the
+executable.
+
+If distribution of executable or object code is made by offering access
+to copy from a designated place, then offering equivalent access to copy
+the source code from the same place counts as distribution of the source
+code, even though third parties are not compelled to copy the source
+along with the object code.
+
+4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt otherwise
+to copy, modify, sublicense or distribute the Program is void, and will
+automatically terminate your rights under this License. However, parties
+who have received copies, or rights, from you under this License will
+not have their licenses terminated so long as such parties remain in
+full compliance.
+
+5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and all
+its terms and conditions for copying, distributing or modifying the
+Program or works based on it.
+
+6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further restrictions
+on the recipients' exercise of the rights granted herein. You are not
+responsible for enforcing compliance by third parties to this License.
+
+7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot distribute
+so as to satisfy simultaneously your obligations under this License and
+any other pertinent obligations, then as a consequence you may not
+distribute the Program at all. For example, if a patent license would
+not permit royalty-free redistribution of the Program by all those who
+receive copies directly or indirectly through you, then the only way you
+could satisfy both it and this License would be to refrain entirely from
+distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is implemented
+by public license practices. Many people have made generous
+contributions to the wide range of software distributed through that
+system in reliance on consistent application of that system; it is up to
+the author/donor to decide if he or she is willing to distribute
+software through any other system and a licensee cannot impose that choice.
+
+This section is intended to make thoroughly clear what is believed to be
+a consequence of the rest of this License.
+
+8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License may
+add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among countries
+not thus excluded. In such case, this License incorporates the
+limitation as if written in the body of this License.
+
+9. The Free Software Foundation may publish revised and/or new
+versions of the General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Program does not specify a version
+number of this License, you may choose any version ever published by the
+Free Software Foundation.
+
+10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the
+author to ask for permission. For software which is copyrighted by the
+Free Software Foundation, write to the Free Software Foundation; we
+sometimes make exceptions for this. Our decision will be guided by the
+two goals of preserving the free status of all derivatives of our free
+software and of promoting the sharing and reuse of software generally.
+
+NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND,
+EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH
+YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
+DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
+DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM
+(INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
+INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR
+OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program. It is safest to
+attach them to the start of each source file to most effectively convey
+the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    One line to give the program's name and a brief idea of what it does.
+    Copyright (C) <year> <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+    General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type
+    `show w'. This is free software, and you are welcome to redistribute
+    it under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License. Of course, the commands
+you use may be called something other than `show w' and `show c'; they
+could even be mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+    Yoyodyne, Inc., hereby disclaims all copyright interest in the
+    program `Gnomovision' (which makes passes at compilers) written by
+    James Hacker.
+
+    signature of Ty Coon, 1 April 1989
+    Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications
+with the library. If this is what you want to do, use the GNU Library
+General Public License instead of this License.
+
+#
+
+Certain source files distributed by Oracle America, Inc. and/or its
+affiliates are subject to the following clarification and special
+exception to the GPLv2, based on the GNU Project exception for its
+Classpath libraries, known as the GNU Classpath Exception, but only
+where Oracle has expressly included in the particular source file's
+header the words "Oracle designates this particular file as subject to
+the "Classpath" exception as provided by Oracle in the LICENSE file
+that accompanied this code."
+
+You should also note that Oracle includes multiple, independent
+programs in this software package. Some of those programs are provided
+under licenses deemed incompatible with the GPLv2 by the Free Software
+Foundation and others.  For example, the package includes programs
+licensed under the Apache License, Version 2.0.  Such programs are
+licensed to you under their original licenses.
+
+Oracle facilitates your further distribution of this package by adding
+the Classpath Exception to the necessary parts of its GPLv2 code, which
+permits you to use that code in combination with other independent
+modules not licensed under the GPLv2.  However, note that this would
+not permit you to commingle code under an incompatible license with
+Oracle's GPLv2 licensed code by, for example, cutting and pasting such
+code into a file also containing Oracle's GPLv2 licensed code and then
+distributing the result.  Additionally, if you were to remove the
+Classpath Exception from any of the files to which it applies and
+distribute the result, you would likely be required to license some or
+all of the other code in that distribution under the GPLv2 as well, and
+since the GPLv2 is incompatible with the license terms of some items
+included in the distribution by Oracle, removing the Classpath
+Exception could therefore effectively compromise your ability to
+further distribute the package.
+
+Proceed with caution and we recommend that you obtain the advice of a
+lawyer skilled in open source matters before removing the Classpath
+Exception or making modifications to this package which may
+subsequently be redistributed and/or involve the use of third party
+software.
+
+CLASSPATH EXCEPTION
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License version 2 cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from or
+based on this library.  If you modify this library, you may extend this
+exception to your version of the library, but you are not obligated to
+do so.  If you do not wish to do so, delete this exception statement
+from your version.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-jtransforms.html b/licenses-binary/LICENSE-jtransforms.html
deleted file mode 100644
index 351c17412357b..0000000000000
--- a/licenses-binary/LICENSE-jtransforms.html
+++ /dev/null
@@ -1,388 +0,0 @@
-
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en"><head><title>Mozilla Public License version 1.1</title>
-
-
-
-  <style type="text/css">
-    .very-strong{
-      text-transform:uppercase;
-    }
-    dt{
-      font-weight:bold;
-    }
-    dd p{
-      margin:0;
-    }
-  </style></head><body>
-<small> </small><h1><small>Mozilla Public License Version 1.1</small></h1>
-<small> </small><h2 id="section-1"><small>1. Definitions.</small></h2>
-<small> </small><dl>
-  <small>  <dt id="section-1.0.1">1.0.1. "Commercial Use"
-  </dt><dd>means distribution or otherwise making the Covered Code available to a third party.
-  </dd><dt id="section-1.1">1.1. "Contributor"
-  </dt><dd>means each entity that creates or contributes to the creation of Modifications.
-  </dd><dt id="section-1.2">1.2. "Contributor Version"
-  </dt><dd>means the combination of the Original Code, prior Modifications used by a Contributor,
-    and the Modifications made by that particular Contributor.
-  </dd><dt id="section-1.3">1.3. "Covered Code"
-  </dt><dd>means the Original Code or Modifications or the combination of the Original Code and
-    Modifications, in each case including portions thereof.
-  </dd><dt id="section-1.4">1.4. "Electronic Distribution Mechanism"
-  </dt><dd>means a mechanism generally accepted in the software development community for the
-    electronic transfer of data.
-  </dd><dt id="section-1.5">1.5. "Executable"
-  </dt><dd>means Covered Code in any form other than Source Code.
-  </dd><dt id="section-1.6">1.6. "Initial Developer"
-  </dt><dd>means the individual or entity identified as the Initial Developer in the Source Code
-    notice required by <a href="#exhibit-a">Exhibit A</a>.
-  </dd><dt id="section-1.7">1.7. "Larger Work"
-  </dt><dd>means a work which combines Covered Code or portions thereof with code not governed
-    by the terms of this License.
-  </dd><dt id="section-1.8">1.8. "License"
-  </dt><dd>means this document.
-  </dd><dt id="section-1.8.1">1.8.1. "Licensable"
-  </dt><dd>means having the right to grant, to the maximum extent possible, whether at the
-    time of the initial grant or subsequently acquired, any and all of the rights
-    conveyed herein.
-  </dd><dt id="section-1.9">1.9. "Modifications"
-  </dt><dd>
-    <p>means any addition to or deletion from the substance or structure of either the
-      Original Code or any previous Modifications. When Covered Code is released as a
-      series of files, a Modification is:
-    </p><ol type="a">
-    <li id="section-1.9-a">Any addition to or deletion from the contents of a file
-      containing Original Code or previous Modifications.
-    </li><li id="section-1.9-b">Any new file that contains any part of the Original Code or
-    previous Modifications.
-  </li></ol>
-  </dd><dt id="section-1.10">1.10. "Original Code"
-  </dt><dd>means Source Code of computer software code which is described in the Source Code
-    notice required by <a href="#exhibit-a">Exhibit A</a> as Original Code, and which,
-    at the time of its release under this License is not already Covered Code governed
-    by this License.
-  </dd><dt id="section-1.10.1">1.10.1. "Patent Claims"
-  </dt><dd>means any patent claim(s), now owned or hereafter acquired, including without
-    limitation, method, process, and apparatus claims, in any patent Licensable by
-    grantor.
-  </dd><dt id="section-1.11">1.11. "Source Code"
-  </dt><dd>means the preferred form of the Covered Code for making modifications to it,
-    including all modules it contains, plus any associated interface definition files,
-    scripts used to control compilation and installation of an Executable, or source
-    code differential comparisons against either the Original Code or another well known,
-    available Covered Code of the Contributor's choice. The Source Code can be in a
-    compressed or archival form, provided the appropriate decompression or de-archiving
-    software is widely available for no charge.
-  </dd><dt id="section-1.12">1.12. "You" (or "Your")
-  </dt><dd>means an individual or a legal entity exercising rights under, and complying with
-    all of the terms of, this License or a future version of this License issued under
-    <a href="#section-6.1">Section 6.1.</a> For legal entities, "You" includes any entity
-    which controls, is controlled by, or is under common control with You. For purposes of
-    this definition, "control" means (a) the power, direct or indirect, to cause the
-    direction or management of such entity, whether by contract or otherwise, or (b)
-    ownership of more than fifty percent (50%) of the outstanding shares or beneficial
-    ownership of such entity.
-  </dd></small></dl>
-<small> </small><h2 id="section-2"><small>2. Source Code License.</small></h2>
-<small> </small><h3 id="section-2.1"><small>2.1. The Initial Developer Grant.</small></h3>
-<small> </small><p><small>The Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive
-  license, subject to third party intellectual property claims:
-</small></p><ol type="a">
-  <small>  </small><li id="section-2.1-a"><small>under intellectual property rights (other than patent or
-  trademark) Licensable by Initial Developer to use, reproduce, modify, display, perform,
-  sublicense and distribute the Original Code (or portions thereof) with or without
-  Modifications, and/or as part of a Larger Work; and
-</small></li><li id="section-2.1-b"><small>under Patents Claims infringed by the making, using or selling
-  of Original Code, to make, have made, use, practice, sell, and offer for sale, and/or
-  otherwise dispose of the Original Code (or portions thereof).
-</small></li><li id="section-2.1-c"><small>the licenses granted in this Section 2.1
-  (<a href="#section-2.1-a">a</a>) and (<a href="#section-2.1-b">b</a>) are effective on
-  the date Initial Developer first distributes Original Code under the terms of this
-  License.
-</small></li><li id="section-2.1-d"><small>Notwithstanding Section 2.1 (<a href="#section-2.1-b">b</a>)
-  above, no patent license is granted: 1) for code that You delete from the Original Code;
-  2) separate from the Original Code; or 3) for infringements caused by: i) the
-  modification of the Original Code or ii) the combination of the Original Code with other
-  software or devices.
-</small></li></ol>
-<small> </small><h3 id="section-2.2"><small>2.2. Contributor Grant.</small></h3>
-<small> </small><p><small>Subject to third party intellectual property claims, each Contributor hereby grants You
-  a world-wide, royalty-free, non-exclusive license
-</small></p><ol type="a">
-  <small>  </small><li id="section-2.2-a"><small>under intellectual property rights (other than patent or trademark)
-  Licensable by Contributor, to use, reproduce, modify, display, perform, sublicense and
-  distribute the Modifications created by such Contributor (or portions thereof) either on
-  an unmodified basis, with other Modifications, as Covered Code and/or as part of a Larger
-  Work; and
-</small></li><li id="section-2.2-b"><small>under Patent Claims infringed by the making, using, or selling of
-  Modifications made by that Contributor either alone and/or in combination with its
-  Contributor Version (or portions of such combination), to make, use, sell, offer for
-  sale, have made, and/or otherwise dispose of: 1) Modifications made by that Contributor
-  (or portions thereof); and 2) the combination of Modifications made by that Contributor
-  with its Contributor Version (or portions of such combination).
-</small></li><li id="section-2.2-c"><small>the licenses granted in Sections 2.2
-  (<a href="#section-2.2-a">a</a>) and 2.2 (<a href="#section-2.2-b">b</a>) are effective
-  on the date Contributor first makes Commercial Use of the Covered Code.
-</small></li><li id="section-2.2-d"><small>Notwithstanding Section 2.2 (<a href="#section-2.2-b">b</a>)
-  above, no patent license is granted: 1) for any code that Contributor has deleted from
-  the Contributor Version; 2) separate from the Contributor Version; 3) for infringements
-  caused by: i) third party modifications of Contributor Version or ii) the combination of
-  Modifications made by that Contributor with other software (except as part of the
-  Contributor Version) or other devices; or 4) under Patent Claims infringed by Covered Code
-  in the absence of Modifications made by that Contributor.
-</small></li></ol>
-<small> </small><h2 id="section-3"><small>3. Distribution Obligations.</small></h2>
-<small> </small><h3 id="section-3.1"><small>3.1. Application of License.</small></h3>
-<small> </small><p><small>The Modifications which You create or to which You contribute are governed by the terms
-  of this License, including without limitation Section <a href="#section-2.2">2.2</a>. The
-  Source Code version of Covered Code may be distributed only under the terms of this License
-  or a future version of this License released under Section <a href="#section-6.1">6.1</a>,
-  and You must include a copy of this License with every copy of the Source Code You
-  distribute. You may not offer or impose any terms on any Source Code version that alters or
-  restricts the applicable version of this License or the recipients' rights hereunder.
-  However, You may include an additional document offering the additional rights described in
-  Section <a href="#section-3.5">3.5</a>.
-</small></p><h3 id="section-3.2"><small>3.2. Availability of Source Code.</small></h3>
-<small> </small><p><small>Any Modification which You create or to which You contribute must be made available in
-  Source Code form under the terms of this License either on the same media as an Executable
-  version or via an accepted Electronic Distribution Mechanism to anyone to whom you made an
-  Executable version available; and if made available via Electronic Distribution Mechanism,
-  must remain available for at least twelve (12) months after the date it initially became
-  available, or at least six (6) months after a subsequent version of that particular
-  Modification has been made available to such recipients. You are responsible for ensuring
-  that the Source Code version remains available even if the Electronic Distribution
-  Mechanism is maintained by a third party.
-</small></p><h3 id="section-3.3"><small>3.3. Description of Modifications.</small></h3>
-<small> </small><p><small>You must cause all Covered Code to which You contribute to contain a file documenting the
-  changes You made to create that Covered Code and the date of any change. You must include a
-  prominent statement that the Modification is derived, directly or indirectly, from Original
-  Code provided by the Initial Developer and including the name of the Initial Developer in
-  (a) the Source Code, and (b) in any notice in an Executable version or related documentation
-  in which You describe the origin or ownership of the Covered Code.
-</small></p><h3 id="section-3.4"><small>3.4. Intellectual Property Matters</small></h3>
-<small> </small><h4 id="section-3.4-a"><small>(a) Third Party Claims</small></h4>
-<small> </small><p><small>If Contributor has knowledge that a license under a third party's intellectual property
-  rights is required to exercise the rights granted by such Contributor under Sections
-  <a href="#section-2.1">2.1</a> or <a href="#section-2.2">2.2</a>, Contributor must include a
-  text file with the Source Code distribution titled "LEGAL" which describes the claim and the
-  party making the claim in sufficient detail that a recipient will know whom to contact. If
-  Contributor obtains such knowledge after the Modification is made available as described in
-  Section <a href="#section-3.2">3.2</a>, Contributor shall promptly modify the LEGAL file in
-  all copies Contributor makes available thereafter and shall take other steps (such as
-  notifying appropriate mailing lists or newsgroups) reasonably calculated to inform those who
-  received the Covered Code that new knowledge has been obtained.
-</small></p><h4 id="section-3.4-b"><small>(b) Contributor APIs</small></h4>
-<small> </small><p><small>If Contributor's Modifications include an application programming interface and Contributor
-  has knowledge of patent licenses which are reasonably necessary to implement that
-  <abbr>API</abbr>, Contributor must also include this information in the
-  <strong class="very-strong">legal</strong> file.
-</small></p><h4 id="section-3.4-c"><small>(c) Representations.</small></h4>
-<small> </small><p><small>Contributor represents that, except as disclosed pursuant to Section 3.4
-  (<a href="#section-3.4-a">a</a>) above, Contributor believes that Contributor's Modifications
-  are Contributor's original creation(s) and/or Contributor has sufficient rights to grant the
-  rights conveyed by this License.
-</small></p><h3 id="section-3.5"><small>3.5. Required Notices.</small></h3>
-<small> </small><p><small>You must duplicate the notice in <a href="#exhibit-a">Exhibit A</a> in each file of the
-  Source Code. If it is not possible to put such notice in a particular Source Code file due to
-  its structure, then You must include such notice in a location (such as a relevant directory)
-  where a user would be likely to look for such a notice. If You created one or more
-  Modification(s) You may add your name as a Contributor to the notice described in
-  <a href="#exhibit-a">Exhibit A</a>. You must also duplicate this License in any documentation
-  for the Source Code where You describe recipients' rights or ownership rights relating to
-  Covered Code. You may choose to offer, and to charge a fee for, warranty, support, indemnity
-  or liability obligations to one or more recipients of Covered Code. However, You may do so
-  only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You
-  must make it absolutely clear than any such warranty, support, indemnity or liability
-  obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer
-  and every Contributor for any liability incurred by the Initial Developer or such Contributor
-  as a result of warranty, support, indemnity or liability terms You offer.
-</small></p><h3 id="section-3.6"><small>3.6. Distribution of Executable Versions.</small></h3>
-<small> </small><p><small>You may distribute Covered Code in Executable form only if the requirements of Sections
-  <a href="#section-3.1">3.1</a>, <a href="#section-3.2">3.2</a>,
-  <a href="#section-3.3">3.3</a>, <a href="#section-3.4">3.4</a> and
-  <a href="#section-3.5">3.5</a> have been met for that Covered Code, and if You include a
-  notice stating that the Source Code version of the Covered Code is available under the terms
-  of this License, including a description of how and where You have fulfilled the obligations
-  of Section <a href="#section-3.2">3.2</a>. The notice must be conspicuously included in any
-  notice in an Executable version, related documentation or collateral in which You describe
-  recipients' rights relating to the Covered Code. You may distribute the Executable version of
-  Covered Code or ownership rights under a license of Your choice, which may contain terms
-  different from this License, provided that You are in compliance with the terms of this
-  License and that the license for the Executable version does not attempt to limit or alter the
-  recipient's rights in the Source Code version from the rights set forth in this License. If
-  You distribute the Executable version under a different license You must make it absolutely
-  clear that any terms which differ from this License are offered by You alone, not by the
-  Initial Developer or any Contributor. You hereby agree to indemnify the Initial Developer and
-  every Contributor for any liability incurred by the Initial Developer or such Contributor as
-  a result of any such terms You offer.
-</small></p><h3 id="section-3.7"><small>3.7. Larger Works.</small></h3>
-<small> </small><p><small>You may create a Larger Work by combining Covered Code with other code not governed by the
-  terms of this License and distribute the Larger Work as a single product. In such a case,
-  You must make sure the requirements of this License are fulfilled for the Covered Code.
-</small></p><h2 id="section-4"><small>4. Inability to Comply Due to Statute or Regulation.</small></h2>
-<small> </small><p><small>If it is impossible for You to comply with any of the terms of this License with respect to
-  some or all of the Covered Code due to statute, judicial order, or regulation then You must:
-  (a) comply with the terms of this License to the maximum extent possible; and (b) describe
-  the limitations and the code they affect. Such description must be included in the
-  <strong class="very-strong">legal</strong> file described in Section
-  <a href="#section-3.4">3.4</a> and must be included with all distributions of the Source Code.
-  Except to the extent prohibited by statute or regulation, such description must be
-  sufficiently detailed for a recipient of ordinary skill to be able to understand it.
-</small></p><h2 id="section-5"><small>5. Application of this License.</small></h2>
-<small> </small><p><small>This License applies to code to which the Initial Developer has attached the notice in
-  <a href="#exhibit-a">Exhibit A</a> and to related Covered Code.
-</small></p><h2 id="section-6"><small>6. Versions of the License.</small></h2>
-<small> </small><h3 id="section-6.1"><small>6.1. New Versions</small></h3>
-<small> </small><p><small>Netscape Communications Corporation ("Netscape") may publish revised and/or new versions
-  of the License from time to time. Each version will be given a distinguishing version number.
-</small></p><h3 id="section-6.2"><small>6.2. Effect of New Versions</small></h3>
-<small> </small><p><small>Once Covered Code has been published under a particular version of the License, You may
-  always continue to use it under the terms of that version. You may also choose to use such
-  Covered Code under the terms of any subsequent version of the License published by Netscape.
-  No one other than Netscape has the right to modify the terms applicable to Covered Code
-  created under this License.
-</small></p><h3 id="section-6.3"><small>6.3. Derivative Works</small></h3>
-<small> </small><p><small>If You create or use a modified version of this License (which you may only do in order to
-  apply it to code which is not already Covered Code governed by this License), You must (a)
-  rename Your license so that the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape", "MPL",
-  "NPL" or any confusingly similar phrase do not appear in your license (except to note that
-  your license differs from this License) and (b) otherwise make it clear that Your version of
-  the license contains terms which differ from the Mozilla Public License and Netscape Public
-  License. (Filling in the name of the Initial Developer, Original Code or Contributor in the
-  notice described in <a href="#exhibit-a">Exhibit A</a> shall not of themselves be deemed to
-  be modifications of this License.)
-</small></p><h2 id="section-7"><small>7. <strong class="very-strong">Disclaimer of warranty</strong></small></h2>
-<small> </small><p><small><strong class="very-strong">Covered code is provided under this license on an "as is"
-  basis, without warranty of any kind, either expressed or implied, including, without
-  limitation, warranties that the covered code is free of defects, merchantable, fit for a
-  particular purpose or non-infringing. The entire risk as to the quality and performance of
-  the covered code is with you. Should any covered code prove defective in any respect, you
-  (not the initial developer or any other contributor) assume the cost of any necessary
-  servicing, repair or correction. This disclaimer of warranty constitutes an essential part
-  of this license. No use of any covered code is authorized hereunder except under this
-  disclaimer.</strong>
-</small></p><h2 id="section-8"><small>8. Termination</small></h2>
-<small> </small><p id="section-8.1"><small>8.1. This License and the rights granted hereunder will terminate
-  automatically if You fail to comply with terms herein and fail to cure such breach
-  within 30 days of becoming aware of the breach. All sublicenses to the Covered Code which
-  are properly granted shall survive any termination of this License. Provisions which, by
-  their nature, must remain in effect beyond the termination of this License shall survive.
-</small></p><p id="section-8.2"><small>8.2. If You initiate litigation by asserting a patent infringement
-  claim (excluding declatory judgment actions) against Initial Developer or a Contributor
-  (the Initial Developer or Contributor against whom You file such action is referred to
-  as "Participant") alleging that:
-</small></p><ol type="a">
-  <small>  </small><li id="section-8.2-a"><small>such Participant's Contributor Version directly or indirectly
-  infringes any patent, then any and all rights granted by such Participant to You under
-  Sections <a href="#section-2.1">2.1</a> and/or <a href="#section-2.2">2.2</a> of this
-  License shall, upon 60 days notice from Participant terminate prospectively, unless if
-  within 60 days after receipt of notice You either: (i) agree in writing to pay
-  Participant a mutually agreeable reasonable royalty for Your past and future use of
-  Modifications made by such Participant, or (ii) withdraw Your litigation claim with
-  respect to the Contributor Version against such Participant. If within 60 days of
-  notice, a reasonable royalty and payment arrangement are not mutually agreed upon in
-  writing by the parties or the litigation claim is not withdrawn, the rights granted by
-  Participant to You under Sections <a href="#section-2.1">2.1</a> and/or
-  <a href="#section-2.2">2.2</a> automatically terminate at the expiration of the 60 day
-  notice period specified above.
-</small></li><li id="section-8.2-b"><small>any software, hardware, or device, other than such Participant's
-  Contributor Version, directly or indirectly infringes any patent, then any rights
-  granted to You by such Participant under Sections 2.1(<a href="#section-2.1-b">b</a>)
-  and 2.2(<a href="#section-2.2-b">b</a>) are revoked effective as of the date You first
-  made, used, sold, distributed, or had made, Modifications made by that Participant.
-</small></li></ol>
-<small> </small><p id="section-8.3"><small>8.3. If You assert a patent infringement claim against Participant
-  alleging that such Participant's Contributor Version directly or indirectly infringes
-  any patent where such claim is resolved (such as by license or settlement) prior to the
-  initiation of patent infringement litigation, then the reasonable value of the licenses
-  granted by such Participant under Sections <a href="#section-2.1">2.1</a> or
-  <a href="#section-2.2">2.2</a> shall be taken into account in determining the amount or
-  value of any payment or license.
-</small></p><p id="section-8.4"><small>8.4. In the event of termination under Sections
-  <a href="#section-8.1">8.1</a> or <a href="#section-8.2">8.2</a> above, all end user
-  license agreements (excluding distributors and resellers) which have been validly
-  granted by You or any distributor hereunder prior to termination shall survive
-  termination.
-</small></p><h2 id="section-9"><small>9. <strong class="very-strong">Limitation of liability</strong></small></h2>
-<small> </small><p><small><strong class="very-strong">Under no circumstances and under no legal theory, whether
-  tort (including negligence), contract, or otherwise, shall you, the initial developer,
-  any other contributor, or any distributor of covered code, or any supplier of any of
-  such parties, be liable to any person for any indirect, special, incidental, or
-  consequential damages of any character including, without limitation, damages for loss
-  of goodwill, work stoppage, computer failure or malfunction, or any and all other
-  commercial damages or losses, even if such party shall have been informed of the
-  possibility of such damages. This limitation of liability shall not apply to liability
-  for death or personal injury resulting from such party's negligence to the extent
-  applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion
-  or limitation of incidental or consequential damages, so this exclusion and limitation
-  may not apply to you.</strong>
-</small></p><h2 id="section-10"><small>10. <abbr title="United States">U.S.</abbr> government end users</small></h2>
-<small> </small><p><small>The Covered Code is a "commercial item," as that term is defined in 48
-  <abbr>C.F.R.</abbr> 2.101 (<abbr title="October">Oct.</abbr> 1995), consisting of
-  "commercial computer software" and "commercial computer software documentation," as such
-  terms are used in 48 <abbr>C.F.R.</abbr> 12.212 (<abbr title="September">Sept.</abbr>
-  1995). Consistent with 48 <abbr>C.F.R.</abbr> 12.212 and 48 <abbr>C.F.R.</abbr>
-  227.7202-1 through 227.7202-4 (June 1995), all <abbr>U.S.</abbr> Government End Users
-  acquire Covered Code with only those rights set forth herein.
-</small></p><h2 id="section-11"><small>11. Miscellaneous</small></h2>
-<small> </small><p><small>This License represents the complete agreement concerning subject matter hereof. If
-  any provision of this License is held to be unenforceable, such provision shall be
-  reformed only to the extent necessary to make it enforceable. This License shall be
-  governed by California law provisions (except to the extent applicable law, if any,
-  provides otherwise), excluding its conflict-of-law provisions. With respect to
-  disputes in which at least one party is a citizen of, or an entity chartered or
-  registered to do business in the United States of America, any litigation relating to
-  this License shall be subject to the jurisdiction of the Federal Courts of the
-  Northern District of California, with venue lying in Santa Clara County, California,
-  with the losing party responsible for costs, including without limitation, court
-  costs and reasonable attorneys' fees and expenses. The application of the United
-  Nations Convention on Contracts for the International Sale of Goods is expressly
-  excluded. Any law or regulation which provides that the language of a contract
-  shall be construed against the drafter shall not apply to this License.
-</small></p><h2 id="section-12"><small>12. Responsibility for claims</small></h2>
-<small> </small><p><small>As between Initial Developer and the Contributors, each party is responsible for
-  claims and damages arising, directly or indirectly, out of its utilization of rights
-  under this License and You agree to work with Initial Developer and Contributors to
-  distribute such responsibility on an equitable basis. Nothing herein is intended or
-  shall be deemed to constitute any admission of liability.
-</small></p><h2 id="section-13"><small>13. Multiple-licensed code</small></h2>
-<small> </small><p><small>Initial Developer may designate portions of the Covered Code as
-  "Multiple-Licensed". "Multiple-Licensed" means that the Initial Developer permits
-  you to utilize portions of the Covered Code under Your choice of the <abbr>MPL</abbr>
-  or the alternative licenses, if any, specified by the Initial Developer in the file
-  described in <a href="#exhibit-a">Exhibit A</a>.
-</small></p><h2 id="exhibit-a"><small>Exhibit A - Mozilla Public License.</small></h2>
-<small> </small><pre><small>"The contents of this file are subject to the Mozilla Public License
-Version 1.1 (the "License"); you may not use this file except in
-compliance with the License. You may obtain a copy of the License at
-http://www.mozilla.org/MPL/
-
-Software distributed under the License is distributed on an "AS IS"
-basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-License for the specific language governing rights and limitations
-under the License.
-
-The Original Code is JTransforms.
-
-The Initial Developer of the Original Code is
-Piotr Wendykier, Emory University.
-Portions created by the Initial Developer are Copyright (C) 2007-2009
-the Initial Developer. All Rights Reserved.
-
-Alternatively, the contents of this file may be used under the terms of
-either the GNU General Public License Version 2 or later (the "GPL"), or
-the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
-in which case the provisions of the GPL or the LGPL are applicable instead
-of those above. If you wish to allow use of your version of this file only
-under the terms of either the GPL or the LGPL, and not to allow others to
-use your version of this file under the terms of the MPL, indicate your
-decision by deleting the provisions above and replace them with the notice
-and other provisions required by the GPL or the LGPL. If you do not delete
-the provisions above, a recipient may use your version of this file under
-the terms of any one of the MPL, the GPL or the LGPL.</small></pre>
-<small> </small><p><small>NOTE: The text of this Exhibit A may differ slightly from the text of
-  the notices in the Source Code files of the Original Code. You should
-  use the text of this Exhibit A rather than the text found in the
-  Original Code Source Code for Your Modifications.
-
-</small></p></body></html>
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-re2j.txt b/licenses-binary/LICENSE-re2j.txt
new file mode 100644
index 0000000000000..0dc3cd70bf1f7
--- /dev/null
+++ b/licenses-binary/LICENSE-re2j.txt
@@ -0,0 +1,32 @@
+This is a work derived from Russ Cox's RE2 in Go, whose license
+http://golang.org/LICENSE is as follows:
+
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in
+     the documentation and/or other materials provided with the
+     distribution.
+
+   * Neither the name of Google Inc. nor the names of its contributors
+     may be used to endorse or promote products derived from this
+     software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
index 2a0f8c11d0a50..e054a15fc9b75 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
@@ -302,7 +302,7 @@ private[spark] object BLAS extends Serializable {
    * @param x the vector x that contains the n elements.
    * @param A the symmetric matrix A. Size of n x n.
    */
-  def syr(alpha: Double, x: Vector, A: DenseMatrix) {
+  def syr(alpha: Double, x: Vector, A: DenseMatrix): Unit = {
     val mA = A.numRows
     val nA = A.numCols
     require(mA == nA, s"A is not a square matrix (and hence is not symmetric). A: $mA x $nA")
@@ -316,7 +316,7 @@ private[spark] object BLAS extends Serializable {
     }
   }
 
-  private def syr(alpha: Double, x: DenseVector, A: DenseMatrix) {
+  private def syr(alpha: Double, x: DenseVector, A: DenseMatrix): Unit = {
     val nA = A.numRows
     val mA = A.numCols
 
@@ -334,7 +334,7 @@ private[spark] object BLAS extends Serializable {
     }
   }
 
-  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix) {
+  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix): Unit = {
     val mA = A.numCols
     val xIndices = x.indices
     val xValues = x.values
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
index 14428c6f45cce..61d35c8f7e303 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -1175,10 +1175,10 @@ object Matrices {
       numCols += mat.numCols
     }
     if (!hasSparse) {
-      new DenseMatrix(numRows, numCols, matrices.flatMap(_.toArray))
+      new DenseMatrix(numRows, numCols, matrices.flatMap { m: Matrix => m.toArray })
     } else {
       var startCol = 0
-      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat =>
+      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat: Matrix =>
         val nCols = mat.numCols
         mat match {
           case spMat: SparseMatrix =>
@@ -1189,7 +1189,7 @@ object Matrices {
               cnt += 1
             }
             startCol += nCols
-            data
+            data.toSeq
           case dnMat: DenseMatrix =>
             val data = new ArrayBuffer[(Int, Int, Double)]()
             dnMat.foreachActive { (i, j, v) =>
@@ -1198,7 +1198,7 @@ object Matrices {
               }
             }
             startCol += nCols
-            data
+            data.toSeq
         }
       }
       SparseMatrix.fromCOO(numRows, numCols, entries)
@@ -1237,7 +1237,6 @@ object Matrices {
       val allValues = new Array[Double](numRows * numCols)
       var startRow = 0
       matrices.foreach { mat =>
-        var j = 0
         val nRows = mat.numRows
         mat.foreachActive { (i, j, v) =>
           val indStart = j * numRows + startRow
@@ -1248,7 +1247,7 @@ object Matrices {
       new DenseMatrix(numRows, numCols, allValues)
     } else {
       var startRow = 0
-      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat =>
+      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat: Matrix =>
         val nRows = mat.numRows
         mat match {
           case spMat: SparseMatrix =>
@@ -1259,7 +1258,7 @@ object Matrices {
               cnt += 1
             }
             startRow += nRows
-            data
+            data.toSeq
           case dnMat: DenseMatrix =>
             val data = new ArrayBuffer[(Int, Int, Double)]()
             dnMat.foreachActive { (i, j, v) =>
@@ -1268,7 +1267,7 @@ object Matrices {
               }
             }
             startRow += nRows
-            data
+            data.toSeq
         }
       }
       SparseMatrix.fromCOO(numRows, numCols, entries)
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index 6e43d60bd03a3..83973bcffef05 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -109,6 +109,16 @@ sealed trait Vector extends Serializable {
     throw new UnsupportedOperationException(s"copy is not implemented for ${this.getClass}.")
   }
 
+  /**
+   * Applies a function `f` to all the elements of dense and sparse vector.
+   *
+   * @param f the function takes two parameters where the first parameter is the index of
+   *          the vector with type `Int`, and the second parameter is the corresponding value
+   *          with type `Double`.
+   */
+  private[spark] def foreach(f: (Int, Double) => Unit): Unit =
+    iterator.foreach { case (i, v) => f(i, v) }
+
   /**
    * Applies a function `f` to all the active elements of dense and sparse vector.
    *
@@ -117,7 +127,18 @@ sealed trait Vector extends Serializable {
    *          with type `Double`.
    */
   @Since("2.0.0")
-  def foreachActive(f: (Int, Double) => Unit): Unit
+  def foreachActive(f: (Int, Double) => Unit): Unit =
+    activeIterator.foreach { case (i, v) => f(i, v) }
+
+  /**
+   * Applies a function `f` to all the non-zero elements of dense and sparse vector.
+   *
+   * @param f the function takes two parameters where the first parameter is the index of
+   *          the vector with type `Int`, and the second parameter is the corresponding value
+   *          with type `Double`.
+   */
+  private[spark] def foreachNonZero(f: (Int, Double) => Unit): Unit =
+    nonZeroIterator.foreach { case (i, v) => f(i, v) }
 
   /**
    * Number of active entries.  An "active entry" is an element which is explicitly stored,
@@ -178,6 +199,31 @@ sealed trait Vector extends Serializable {
    */
   @Since("2.0.0")
   def argmax: Int
+
+  /**
+   * Calculate the dot product of this vector with another.
+   *
+   * If `size` does not match an [[IllegalArgumentException]] is thrown.
+   */
+  @Since("3.0.0")
+  def dot(v: Vector): Double = BLAS.dot(this, v)
+
+  /**
+   * Returns an iterator over all the elements of this vector.
+   */
+  private[spark] def iterator: Iterator[(Int, Double)] =
+    Iterator.tabulate(size)(i => (i, apply(i)))
+
+  /**
+   * Returns an iterator over all the active elements of this vector.
+   */
+  private[spark] def activeIterator: Iterator[(Int, Double)]
+
+  /**
+   * Returns an iterator over all the non-zero elements of this vector.
+   */
+  private[spark] def nonZeroIterator: Iterator[(Int, Double)] =
+    activeIterator.filter(_._2 != 0)
 }
 
 /**
@@ -465,17 +511,6 @@ class DenseVector @Since("2.0.0") ( @Since("2.0.0") val values: Array[Double]) e
     new DenseVector(values.clone())
   }
 
-  override def foreachActive(f: (Int, Double) => Unit): Unit = {
-    var i = 0
-    val localValuesSize = values.length
-    val localValues = values
-
-    while (i < localValuesSize) {
-      f(i, localValues(i))
-      i += 1
-    }
-  }
-
   override def equals(other: Any): Boolean = super.equals(other)
 
   override def hashCode(): Int = {
@@ -513,12 +548,10 @@ class DenseVector @Since("2.0.0") ( @Since("2.0.0") val values: Array[Double]) e
     val ii = new Array[Int](nnz)
     val vv = new Array[Double](nnz)
     var k = 0
-    foreachActive { (i, v) =>
-      if (v != 0) {
-        ii(k) = i
-        vv(k) = v
-        k += 1
-      }
+    foreachNonZero { (i, v) =>
+      ii(k) = i
+      vv(k) = v
+      k += 1
     }
     new SparseVector(size, ii, vv)
   }
@@ -540,6 +573,14 @@ class DenseVector @Since("2.0.0") ( @Since("2.0.0") val values: Array[Double]) e
       maxIdx
     }
   }
+
+  private[spark] override def iterator: Iterator[(Int, Double)] = {
+    val localValues = values
+    Iterator.tabulate(size)(i => (i, localValues(i)))
+  }
+
+  private[spark] override def activeIterator: Iterator[(Int, Double)] =
+    iterator
 }
 
 @Since("2.0.0")
@@ -612,18 +653,6 @@ class SparseVector @Since("2.0.0") (
     if (j < 0) 0.0 else values(j)
   }
 
-  override def foreachActive(f: (Int, Double) => Unit): Unit = {
-    var i = 0
-    val localValuesSize = values.length
-    val localIndices = indices
-    val localValues = values
-
-    while (i < localValuesSize) {
-      f(localIndices(i), localValues(i))
-      i += 1
-    }
-  }
-
   override def equals(other: Any): Boolean = super.equals(other)
 
   override def hashCode(): Int = {
@@ -664,12 +693,10 @@ class SparseVector @Since("2.0.0") (
       val ii = new Array[Int](nnz)
       val vv = new Array[Double](nnz)
       var k = 0
-      foreachActive { (i, v) =>
-        if (v != 0.0) {
-          ii(k) = i
-          vv(k) = v
-          k += 1
-        }
+      foreachNonZero { (i, v) =>
+        ii(k) = i
+        vv(k) = v
+        k += 1
       }
       new SparseVector(size, ii, vv)
     }
@@ -745,6 +772,37 @@ class SparseVector @Since("2.0.0") (
     }.unzip
     new SparseVector(selectedIndices.length, sliceInds.toArray, sliceVals.toArray)
   }
+
+  private[spark] override def iterator: Iterator[(Int, Double)] = {
+    val localSize = size
+    val localNumActives = numActives
+    val localIndices = indices
+    val localValues = values
+
+    new Iterator[(Int, Double)]() {
+      private var i = 0
+      private var j = 0
+      private var k = localIndices.headOption.getOrElse(-1)
+
+      override def hasNext: Boolean = i < localSize
+
+      override def next: (Int, Double) = {
+        val v = if (i == k) {
+          j += 1
+          k = if (j < localNumActives) localIndices(j) else -1
+          localValues(j - 1)
+        } else 0.0
+        i += 1
+        (i - 1, v)
+      }
+    }
+  }
+
+  private[spark] override def activeIterator: Iterator[(Int, Double)] = {
+    val localIndices = indices
+    val localValues = values
+    Iterator.tabulate(numActives)(j => (localIndices(j), localValues(j)))
+  }
 }
 
 @Since("2.0.0")
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
index e7f7a8e07d7f2..3e32f746e9cd9 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
@@ -55,7 +55,9 @@ class MultivariateGaussian @Since("2.0.0") (
    *    rootSigmaInv = D^(-1/2)^ * U.t, where sigma = U * D * U.t
    *    u = log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^)
    */
-  @transient private lazy val (rootSigmaInv: BDM[Double], u: Double) = calculateCovarianceConstants
+  @transient private lazy val tuple = calculateCovarianceConstants
+  @transient private lazy val rootSigmaInv = tuple._1
+  @transient private lazy val u = tuple._2
 
   /**
    * Returns density of this multivariate Gaussian at given point, x
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
index 332734bd28341..7d29d6dcea908 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
@@ -21,7 +21,7 @@ import java.util.Random
 
 import breeze.linalg.{CSCMatrix, Matrix => BM}
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar._
+import org.scalatestplus.mockito.MockitoSugar._
 import scala.collection.mutable.{Map => MutableMap}
 
 import org.apache.spark.ml.SparkMLFunSuite
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
index 0a316f57f811b..b9471f51038ac 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.ml.linalg
 
+import scala.collection.mutable.ArrayBuilder
 import scala.util.Random
 
 import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => BDM}
@@ -254,28 +255,43 @@ class VectorsSuite extends SparkMLFunSuite {
     }
   }
 
+  test("foreach") {
+    val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
+    val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
+
+    val dvMap = scala.collection.mutable.Map[Int, Double]()
+    dv.foreach { (index, value) => dvMap.put(index, value) }
+    assert(dvMap === Map(0 -> 0.0, 1 -> 1.2, 2 -> 3.1, 3 -> 0.0))
+
+    val svMap = scala.collection.mutable.Map[Int, Double]()
+    sv.foreach { (index, value) => svMap.put(index, value) }
+    assert(svMap === Map(0 -> 0.0, 1 -> 1.2, 2 -> 3.1, 3 -> 0.0))
+  }
+
   test("foreachActive") {
     val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
     val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
 
     val dvMap = scala.collection.mutable.Map[Int, Double]()
-    dv.foreachActive { (index, value) =>
-      dvMap.put(index, value)
-    }
-    assert(dvMap.size === 4)
-    assert(dvMap.get(0) === Some(0.0))
-    assert(dvMap.get(1) === Some(1.2))
-    assert(dvMap.get(2) === Some(3.1))
-    assert(dvMap.get(3) === Some(0.0))
+    dv.foreachActive { (index, value) => dvMap.put(index, value) }
+    assert(dvMap === Map(0 -> 0.0, 1 -> 1.2, 2 -> 3.1, 3 -> 0.0))
 
     val svMap = scala.collection.mutable.Map[Int, Double]()
-    sv.foreachActive { (index, value) =>
-      svMap.put(index, value)
-    }
-    assert(svMap.size === 3)
-    assert(svMap.get(1) === Some(1.2))
-    assert(svMap.get(2) === Some(3.1))
-    assert(svMap.get(3) === Some(0.0))
+    sv.foreachActive { (index, value) => svMap.put(index, value) }
+    assert(svMap === Map(1 -> 1.2, 2 -> 3.1, 3 -> 0.0))
+  }
+
+  test("foreachNonZero") {
+    val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
+    val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
+
+    val dvMap = scala.collection.mutable.Map[Int, Double]()
+    dv.foreachNonZero { (index, value) => dvMap.put(index, value) }
+    assert(dvMap === Map(1 -> 1.2, 2 -> 3.1))
+
+    val svMap = scala.collection.mutable.Map[Int, Double]()
+    sv.foreachNonZero { (index, value) => svMap.put(index, value) }
+    assert(dvMap === Map(1 -> 1.2, 2 -> 3.1))
   }
 
   test("vector p-norm") {
@@ -380,4 +396,80 @@ class VectorsSuite extends SparkMLFunSuite {
       Vectors.sparse(-1, Array((1, 2.0)))
     }
   }
+
+  test("dot product only supports vectors of same size") {
+    val vSize4 = Vectors.dense(arr)
+    val vSize1 = Vectors.zeros(1)
+    intercept[IllegalArgumentException]{ vSize1.dot(vSize4) }
+  }
+
+  test("dense vector dot product") {
+    val dv = Vectors.dense(arr)
+    assert(dv.dot(dv) === 0.26)
+  }
+
+  test("sparse vector dot product") {
+    val sv = Vectors.sparse(n, indices, values)
+    assert(sv.dot(sv) === 0.26)
+  }
+
+  test("mixed sparse and dense vector dot product") {
+    val sv = Vectors.sparse(n, indices, values)
+    val dv = Vectors.dense(arr)
+    assert(sv.dot(dv) === 0.26)
+    assert(dv.dot(sv) === 0.26)
+  }
+
+  test("iterator") {
+    Seq(
+      Vectors.dense(arr),
+      Vectors.zeros(n),
+      Vectors.sparse(n, indices, values),
+      Vectors.sparse(n, Array.emptyIntArray, Array.emptyDoubleArray)
+    ).foreach { vec =>
+      val (indices, values) = vec.iterator.toArray.unzip
+      assert(Array.range(0, vec.size) === indices)
+      assert(vec.toArray === values)
+    }
+  }
+
+  test("activeIterator") {
+    Seq(
+      Vectors.dense(arr),
+      Vectors.zeros(n),
+      Vectors.sparse(n, indices, values),
+      Vectors.sparse(n, Array.emptyIntArray, Array.emptyDoubleArray)
+    ).foreach { vec =>
+      val indicesBuilder = ArrayBuilder.make[Int]
+      val valuesBuilder = ArrayBuilder.make[Double]
+      vec.foreachActive { case (i, v) =>
+        indicesBuilder += i
+        valuesBuilder += v
+      }
+      val (indices, values) = vec.activeIterator.toArray.unzip
+      assert(indicesBuilder.result === indices)
+      assert(valuesBuilder.result === values)
+    }
+  }
+
+  test("nonZeroIterator") {
+    Seq(
+      Vectors.dense(arr),
+      Vectors.zeros(n),
+      Vectors.sparse(n, indices, values),
+      Vectors.sparse(n, Array.emptyIntArray, Array.emptyDoubleArray)
+    ).foreach { vec =>
+      val indicesBuilder = ArrayBuilder.make[Int]
+      val valuesBuilder = ArrayBuilder.make[Double]
+      vec.foreachActive { case (i, v) =>
+        if (v != 0) {
+          indicesBuilder += i
+          valuesBuilder += v
+        }
+      }
+      val (indices, values) = vec.nonZeroIterator.toArray.unzip
+      assert(indicesBuilder.result === indices)
+      assert(valuesBuilder.result === values)
+    }
+  }
 }
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..6f671405b8343
--- /dev/null
+++ b/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+VectorUDT de/serialization
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+serialize                                           269            292          13          0.0      269441.1       1.0X
+deserialize                                         164            191           9          0.0      164314.6       1.6X
+
+
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
index 169f4c60c748e..a0c853e99014b 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
@@ -2,12 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
-
-VectorUDT de/serialization:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-serialize                                      144 /  206          0.0      143979.7       1.0X
-deserialize                                    114 /  135          0.0      113802.6       1.3X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+serialize                                           271            294          12          0.0      271054.3       1.0X
+deserialize                                         190            192           2          0.0      189706.1       1.4X
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index 58815434cbdaf..98dd692cbe55d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml
 
 import org.apache.spark.annotation.{DeveloperApi, Since}
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
@@ -62,6 +62,40 @@ private[ml] trait PredictorParams extends Params
     }
     SchemaUtils.appendColumn(schema, $(predictionCol), DoubleType)
   }
+
+  /**
+   * Extract [[labelCol]], weightCol(if any) and [[featuresCol]] from the given dataset,
+   * and put it in an RDD with strong types.
+   */
+  protected def extractInstances(dataset: Dataset[_]): RDD[Instance] = {
+    val w = this match {
+      case p: HasWeightCol =>
+        if (isDefined(p.weightCol) && $(p.weightCol).nonEmpty) {
+          col($(p.weightCol)).cast(DoubleType)
+        } else {
+          lit(1.0)
+        }
+    }
+
+    dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
+      case Row(label: Double, weight: Double, features: Vector) =>
+        Instance(label, weight, features)
+    }
+  }
+
+  /**
+   * Extract [[labelCol]], weightCol(if any) and [[featuresCol]] from the given dataset,
+   * and put it in an RDD with strong types.
+   * Validate the output instances with the given function.
+   */
+  protected def extractInstances(
+      dataset: Dataset[_],
+      validateInstance: Instance => Unit): RDD[Instance] = {
+    extractInstances(dataset).map { instance =>
+      validateInstance(instance)
+      instance
+    }
+  }
 }
 
 /**
@@ -189,7 +223,11 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
   protected def featuresDataType: DataType = new VectorUDT
 
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema, fitting = false, featuresDataType)
+    var outputSchema = validateAndTransformSchema(schema, fitting = false, featuresDataType)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumeric(outputSchema, $(predictionCol))
+    }
+    outputSchema
   }
 
   /**
@@ -211,10 +249,12 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
   }
 
   protected def transformImpl(dataset: Dataset[_]): DataFrame = {
-    val predictUDF = udf { (features: Any) =>
+    val outputSchema = transformSchema(dataset.schema, logging = true)
+    val predictUDF = udf { features: Any =>
       predict(features.asInstanceOf[FeaturesType])
     }
-    dataset.withColumn($(predictionCol), predictUDF(col($(featuresCol))))
+    dataset.withColumn($(predictionCol), predictUDF(col($(featuresCol))),
+      outputSchema($(predictionCol)).metadata)
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
index a3a2b55adc25d..7874fc29db6c8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
@@ -117,9 +117,10 @@ abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]]
   }
 
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
     val transformUDF = udf(this.createTransformFunc, outputDataType)
-    dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol))))
+    dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol))),
+      outputSchema($(outputCol)).metadata)
   }
 
   override def copy(extra: ParamMap): T = defaultCopy(extra)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
index b6b02e77909bd..ac92299a931bc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.ml.classification
 
 import org.apache.spark.SparkException
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams}
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param.shared.HasRawPredictionCol
 import org.apache.spark.ml.util.{MetadataUtils, SchemaUtils}
@@ -42,6 +42,23 @@ private[spark] trait ClassifierParams
     val parentSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType)
     SchemaUtils.appendColumn(parentSchema, $(rawPredictionCol), new VectorUDT)
   }
+
+  /**
+   * Extract [[labelCol]], weightCol(if any) and [[featuresCol]] from the given dataset,
+   * and put it in an RDD with strong types.
+   * Validates the label on the classifier is a valid integer in the range [0, numClasses).
+   */
+  protected def extractInstances(
+      dataset: Dataset[_],
+      numClasses: Int): RDD[Instance] = {
+    val validateInstance = (instance: Instance) => {
+      val label = instance.label
+      require(label.toLong == label && label >= 0 && label < numClasses, s"Classifier was given" +
+        s" dataset with invalid label $label. Labels must be integers in range" +
+        s" [0, $numClasses).")
+    }
+    extractInstances(dataset, validateInstance)
+  }
 }
 
 /**
@@ -167,6 +184,19 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
   /** Number of classes (values which the label can take). */
   def numClasses: Int
 
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumValues(schema,
+        $(predictionCol), numClasses)
+    }
+    if ($(rawPredictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(rawPredictionCol), numClasses)
+    }
+    outputSchema
+  }
+
   /**
    * Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
    * parameters:
@@ -177,29 +207,31 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
    * @return transformed dataset
    */
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     // Output selected columns only.
     // This is a bit complicated since it tries to avoid repeated computation.
     var outputData = dataset
     var numColsOutput = 0
     if (getRawPredictionCol != "") {
-      val predictRawUDF = udf { (features: Any) =>
+      val predictRawUDF = udf { features: Any =>
         predictRaw(features.asInstanceOf[FeaturesType])
       }
-      outputData = outputData.withColumn(getRawPredictionCol, predictRawUDF(col(getFeaturesCol)))
+      outputData = outputData.withColumn(getRawPredictionCol, predictRawUDF(col(getFeaturesCol)),
+        outputSchema($(rawPredictionCol)).metadata)
       numColsOutput += 1
     }
     if (getPredictionCol != "") {
-      val predUDF = if (getRawPredictionCol != "") {
+      val predCol = if (getRawPredictionCol != "") {
         udf(raw2prediction _).apply(col(getRawPredictionCol))
       } else {
-        val predictUDF = udf { (features: Any) =>
+        val predictUDF = udf { features: Any =>
           predict(features.asInstanceOf[FeaturesType])
         }
         predictUDF(col(getFeaturesCol))
       }
-      outputData = outputData.withColumn(getPredictionCol, predUDF)
+      outputData = outputData.withColumn(getPredictionCol, predCol,
+        outputSchema($(predictionCol)).metadata)
       numColsOutput += 1
     }
 
@@ -234,7 +266,8 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
    *          This raw prediction may be any real number, where a larger value indicates greater
    *          confidence for that label.
    */
-  protected def predictRaw(features: FeaturesType): Vector
+  @Since("3.0.0")
+  def predictRaw(features: FeaturesType): Vector
 
   /**
    * Given a vector of raw predictions, select the predicted label.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 6bd8a26f5b1a8..530943c910d7d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -22,7 +22,6 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -33,10 +32,9 @@ import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit, udf}
-import org.apache.spark.sql.types.DoubleType
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.functions.{col, udf}
+import org.apache.spark.sql.types.StructType
 
 /**
  * Decision tree learning algorithm (http://en.wikipedia.org/wiki/Decision_tree_learning)
@@ -116,9 +114,8 @@ class DecisionTreeClassifier @Since("1.4.0") (
       dataset: Dataset[_]): DecisionTreeClassificationModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
-    val categoricalFeatures: Map[Int, Int] =
-      MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
-    val numClasses: Int = getNumClasses(dataset)
+    val categoricalFeatures = MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
+    val numClasses = getNumClasses(dataset)
 
     if (isDefined(thresholds)) {
       require($(thresholds).length == numClasses, this.getClass.getSimpleName +
@@ -126,14 +123,9 @@ class DecisionTreeClassifier @Since("1.4.0") (
         s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
     }
     validateNumClasses(numClasses)
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances =
-      dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          validateLabel(label, numClasses)
-          Instance(label, weight, features)
-      }
+    val instances = extractInstances(dataset, numClasses)
     val strategy = getOldStrategy(categoricalFeatures, numClasses)
+    require(!strategy.bootstrap, "DecisionTreeClassifier does not need bootstrap sampling")
     instr.logNumClasses(numClasses)
     instr.logParams(this, labelCol, featuresCol, predictionCol, rawPredictionCol,
       probabilityCol, leafCol, maxDepth, maxBins, minInstancesPerNode, minInfoGain,
@@ -145,20 +137,6 @@ class DecisionTreeClassifier @Since("1.4.0") (
     trees.head.asInstanceOf[DecisionTreeClassificationModel]
   }
 
-  /** (private[ml]) Train a decision tree on an RDD */
-  private[ml] def train(data: RDD[LabeledPoint],
-      oldStrategy: OldStrategy): DecisionTreeClassificationModel = instrumented { instr =>
-    val instances = data.map(_.toInstance)
-    instr.logPipelineStage(this)
-    instr.logDataset(instances)
-    instr.logParams(this, maxDepth, maxBins, minInstancesPerNode, minInfoGain, maxMemoryInMB,
-      cacheNodeIds, checkpointInterval, impurity, seed)
-    val trees = RandomForest.run(instances, oldStrategy, numTrees = 1,
-      featureSubsetStrategy = "all", seed = 0L, instr = Some(instr), parentUID = Some(uid))
-
-    trees.head.asInstanceOf[DecisionTreeClassificationModel]
-  }
-
   /** (private[ml]) Create a Strategy instance to use with the old API. */
   private[ml] def getOldStrategy(
       categoricalFeatures: Map[Int, Int],
@@ -210,19 +188,30 @@ class DecisionTreeClassificationModel private[ml] (
     rootNode.predictImpl(features).prediction
   }
 
+  @Since("3.0.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(leafCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateField(outputSchema, getLeafField($(leafCol)))
+    }
+    outputSchema
+  }
+
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val outputData = super.transform(dataset)
     if ($(leafCol).nonEmpty) {
       val leafUDF = udf { features: Vector => predictLeaf(features) }
-      outputData.withColumn($(leafCol), leafUDF(col($(featuresCol))))
+      outputData.withColumn($(leafCol), leafUDF(col($(featuresCol))),
+        outputSchema($(leafCol)).metadata)
     } else {
       outputData
     }
   }
 
-  override protected def predictRaw(features: Vector): Vector = {
+  @Since("3.0.0")
+  override def predictRaw(features: Vector): Vector = {
     Vectors.dense(rootNode.predictImpl(features).impurityStats.stats.clone())
   }
 
@@ -245,7 +234,8 @@ class DecisionTreeClassificationModel private[ml] (
 
   @Since("1.4.0")
   override def toString: String = {
-    s"DecisionTreeClassificationModel (uid=$uid) of depth $depth with $numNodes nodes"
+    s"DecisionTreeClassificationModel: uid=$uid, depth=$depth, numNodes=$numNodes, " +
+      s"numClasses=$numClasses, numFeatures=$numFeatures"
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
new file mode 100644
index 0000000000000..d511c1b5dda98
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
@@ -0,0 +1,327 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.annotation.Since
+import org.apache.spark.internal.Logging
+import org.apache.spark.ml.linalg._
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.regression.{FactorizationMachines, FactorizationMachinesParams}
+import org.apache.spark.ml.regression.FactorizationMachines._
+import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.Instrumentation.instrumented
+import org.apache.spark.mllib.linalg.{Vector => OldVector}
+import org.apache.spark.mllib.linalg.VectorImplicits._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.functions.col
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * Params for FMClassifier.
+ */
+private[classification] trait FMClassifierParams extends ProbabilisticClassifierParams
+  with FactorizationMachinesParams {
+}
+
+/**
+ * Factorization Machines learning algorithm for classification.
+ * It supports normal gradient descent and AdamW solver.
+ *
+ * The implementation is based upon:
+ * <a href="https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf">
+ * S. Rendle. "Factorization machines" 2010</a>.
+ *
+ * FM is able to estimate interactions even in problems with huge sparsity
+ * (like advertising and recommendation system).
+ * FM formula is:
+ * {{{
+ *   y = \sigma\left( w_0 + \sum\limits^n_{i-1} w_i x_i +
+ *     \sum\limits^n_{i=1} \sum\limits^n_{j=i+1} \langle v_i, v_j \rangle x_i x_j \right)
+ * }}}
+ * First two terms denote global bias and linear term (as same as linear regression),
+ * and last term denotes pairwise interactions term. {{{v_i}}} describes the i-th variable
+ * with k factors.
+ *
+ * FM classification model uses logistic loss which can be solved by gradient descent method, and
+ * regularization terms like L2 are usually added to the loss function to prevent overfitting.
+ *
+ * @note Multiclass labels are not currently supported.
+ */
+@Since("3.0.0")
+class FMClassifier @Since("3.0.0") (
+    @Since("3.0.0") override val uid: String)
+  extends ProbabilisticClassifier[Vector, FMClassifier, FMClassificationModel]
+  with FactorizationMachines with FMClassifierParams with DefaultParamsWritable with Logging {
+
+  @Since("3.0.0")
+  def this() = this(Identifiable.randomUID("fmc"))
+
+  /**
+   * Set the dimensionality of the factors.
+   * Default is 8.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setFactorSize(value: Int): this.type = set(factorSize, value)
+  setDefault(factorSize -> 8)
+
+  /**
+   * Set whether to fit intercept term.
+   * Default is true.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
+  setDefault(fitIntercept -> true)
+
+  /**
+   * Set whether to fit linear term.
+   * Default is true.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setFitLinear(value: Boolean): this.type = set(fitLinear, value)
+  setDefault(fitLinear -> true)
+
+  /**
+   * Set the L2 regularization parameter.
+   * Default is 0.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setRegParam(value: Double): this.type = set(regParam, value)
+  setDefault(regParam -> 0.0)
+
+  /**
+   * Set the mini-batch fraction parameter.
+   * Default is 1.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setMiniBatchFraction(value: Double): this.type = set(miniBatchFraction, value)
+  setDefault(miniBatchFraction -> 1.0)
+
+  /**
+   * Set the standard deviation of initial coefficients.
+   * Default is 0.01.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setInitStd(value: Double): this.type = set(initStd, value)
+  setDefault(initStd -> 0.01)
+
+  /**
+   * Set the maximum number of iterations.
+   * Default is 100.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setMaxIter(value: Int): this.type = set(maxIter, value)
+  setDefault(maxIter -> 100)
+
+  /**
+   * Set the initial step size for the first step (like learning rate).
+   * Default is 1.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setStepSize(value: Double): this.type = set(stepSize, value)
+  setDefault(stepSize -> 1.0)
+
+  /**
+   * Set the convergence tolerance of iterations.
+   * Default is 1E-6.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setTol(value: Double): this.type = set(tol, value)
+  setDefault(tol -> 1E-6)
+
+  /**
+   * Set the solver algorithm used for optimization.
+   * Supported options: "gd", "adamW".
+   * Default: "adamW"
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setSolver(value: String): this.type = set(solver, value)
+  setDefault(solver -> AdamW)
+
+  /**
+   * Set the random seed for weight initialization.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setSeed(value: Long): this.type = set(seed, value)
+
+  override protected[spark] def train(
+      dataset: Dataset[_]
+    ): FMClassificationModel = instrumented { instr =>
+
+    val numClasses = 2
+    if (isDefined(thresholds)) {
+      require($(thresholds).length == numClasses, this.getClass.getSimpleName +
+        ".train() called with non-matching numClasses and thresholds.length." +
+        s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
+    }
+
+    instr.logPipelineStage(this)
+    instr.logDataset(dataset)
+    instr.logParams(this, factorSize, fitIntercept, fitLinear, regParam,
+      miniBatchFraction, initStd, maxIter, stepSize, tol, solver)
+    instr.logNumClasses(numClasses)
+
+    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    instr.logNumFeatures(numFeatures)
+
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
+    val labeledPoint = extractLabeledPoints(dataset, numClasses)
+    val data: RDD[(Double, OldVector)] = labeledPoint.map(x => (x.label, x.features))
+
+    if (handlePersistence) data.persist(StorageLevel.MEMORY_AND_DISK)
+
+    val coefficients = trainImpl(data, numFeatures, LogisticLoss)
+
+    val (intercept, linear, factors) = splitCoefficients(
+      coefficients, numFeatures, $(factorSize), $(fitIntercept), $(fitLinear))
+
+    if (handlePersistence) data.unpersist()
+
+    copyValues(new FMClassificationModel(uid, intercept, linear, factors))
+  }
+
+  @Since("3.0.0")
+  override def copy(extra: ParamMap): FMClassifier = defaultCopy(extra)
+}
+
+@Since("3.0.0")
+object FMClassifier extends DefaultParamsReadable[FMClassifier] {
+
+  @Since("3.0.0")
+  override def load(path: String): FMClassifier = super.load(path)
+}
+
+/**
+ * Model produced by [[FMClassifier]]
+ */
+@Since("3.0.0")
+class FMClassificationModel private[classification] (
+  @Since("3.0.0") override val uid: String,
+  @Since("3.0.0") val intercept: Double,
+  @Since("3.0.0") val linear: Vector,
+  @Since("3.0.0") val factors: Matrix)
+  extends ProbabilisticClassificationModel[Vector, FMClassificationModel]
+    with FMClassifierParams with MLWritable {
+
+  @Since("3.0.0")
+  override val numClasses: Int = 2
+
+  @Since("3.0.0")
+  override val numFeatures: Int = linear.size
+
+  @Since("3.0.0")
+  override def predictRaw(features: Vector): Vector = {
+    val rawPrediction = getRawPrediction(features, intercept, linear, factors)
+    Vectors.dense(Array(-rawPrediction, rawPrediction))
+  }
+
+  override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = {
+    rawPrediction match {
+      case dv: DenseVector =>
+        dv.values(1) = 1.0 / (1.0 + math.exp(-dv.values(1)))
+        dv.values(0) = 1.0 - dv.values(1)
+        dv
+      case sv: SparseVector =>
+        throw new RuntimeException("Unexpected error in FMClassificationModel:" +
+          " raw2probabilityInPlace encountered SparseVector")
+    }
+  }
+
+  @Since("3.0.0")
+  override def copy(extra: ParamMap): FMClassificationModel = {
+    copyValues(new FMClassificationModel(uid, intercept, linear, factors), extra)
+  }
+
+  @Since("3.0.0")
+  override def write: MLWriter =
+    new FMClassificationModel.FMClassificationModelWriter(this)
+
+  override def toString: String = {
+    s"FMClassificationModel: " +
+      s"uid=${super.toString}, numClasses=$numClasses, numFeatures=$numFeatures, " +
+      s"factorSize=${$(factorSize)}, fitLinear=${$(fitLinear)}, fitIntercept=${$(fitIntercept)}"
+  }
+}
+
+@Since("3.0.0")
+object FMClassificationModel extends MLReadable[FMClassificationModel] {
+
+  @Since("3.0.0")
+  override def read: MLReader[FMClassificationModel] = new FMClassificationModelReader
+
+  @Since("3.0.0")
+  override def load(path: String): FMClassificationModel = super.load(path)
+
+  /** [[MLWriter]] instance for [[FMClassificationModel]] */
+  private[FMClassificationModel] class FMClassificationModelWriter(
+    instance: FMClassificationModel) extends MLWriter with Logging {
+
+    private case class Data(
+      intercept: Double,
+      linear: Vector,
+      factors: Matrix)
+
+    override protected def saveImpl(path: String): Unit = {
+      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      val data = Data(instance.intercept, instance.linear, instance.factors)
+      val dataPath = new Path(path, "data").toString
+      sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
+    }
+  }
+
+  private class FMClassificationModelReader extends MLReader[FMClassificationModel] {
+
+    private val className = classOf[FMClassificationModel].getName
+
+    override def load(path: String): FMClassificationModel = {
+      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val dataPath = new Path(path, "data").toString
+      val data = sparkSession.read.format("parquet").load(dataPath)
+
+      val Row(intercept: Double, linear: Vector, factors: Matrix) =
+        data.select("intercept", "linear", "factors").head()
+      val model = new FMClassificationModel(metadata.uid, intercept, linear, factors)
+      metadata.getAndSetParams(model)
+      model
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 09f81b0dcbdae..46810bccc8e69 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -23,7 +23,7 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.regression.DecisionTreeRegressionModel
@@ -34,8 +34,9 @@ import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{GradientBoostedTreesModel => OldGBTModel}
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.StructType
 
 /**
  * Gradient-Boosted Trees (GBTs) (http://en.wikipedia.org/wiki/Gradient_boosting)
@@ -79,6 +80,10 @@ class GBTClassifier @Since("1.4.0") (
   @Since("1.4.0")
   def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setMinWeightFractionPerNode(value: Double): this.type = set(minWeightFractionPerNode, value)
+
   /** @group setParam */
   @Since("1.4.0")
   def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
@@ -152,36 +157,34 @@ class GBTClassifier @Since("1.4.0") (
     set(validationIndicatorCol, value)
   }
 
+  /**
+   * Sets the value of param [[weightCol]].
+   * If this is not set or empty, we treat all instance weights as 1.0.
+   * By default the weightCol is not set, so all instances have weight 1.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   override protected def train(
       dataset: Dataset[_]): GBTClassificationModel = instrumented { instr =>
-    val categoricalFeatures: Map[Int, Int] =
-      MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
-
     val withValidation = isDefined(validationIndicatorCol) && $(validationIndicatorCol).nonEmpty
 
-    // We copy and modify this from Classifier.extractLabeledPoints since GBT only supports
-    // 2 classes now.  This lets us provide a more precise error message.
-    val convert2LabeledPoint = (dataset: Dataset[_]) => {
-      dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map {
-        case Row(label: Double, features: Vector) =>
-          require(label == 0 || label == 1, s"GBTClassifier was given" +
-            s" dataset with invalid label $label.  Labels must be in {0,1}; note that" +
-            s" GBTClassifier currently only supports binary classification.")
-          LabeledPoint(label, features)
-      }
+    val validateInstance = (instance: Instance) => {
+      val label = instance.label
+      require(label == 0 || label == 1, s"GBTClassifier was given" +
+        s" dataset with invalid label $label.  Labels must be in {0,1}; note that" +
+        s" GBTClassifier currently only supports binary classification.")
     }
 
     val (trainDataset, validationDataset) = if (withValidation) {
-      (
-        convert2LabeledPoint(dataset.filter(not(col($(validationIndicatorCol))))),
-        convert2LabeledPoint(dataset.filter(col($(validationIndicatorCol))))
-      )
+      (extractInstances(dataset.filter(not(col($(validationIndicatorCol)))), validateInstance),
+        extractInstances(dataset.filter(col($(validationIndicatorCol))), validateInstance))
     } else {
-      (convert2LabeledPoint(dataset), null)
+      (extractInstances(dataset, validateInstance), null)
     }
 
-    val boostingStrategy = super.getOldBoostingStrategy(categoricalFeatures, OldAlgo.Classification)
-
     val numClasses = 2
     if (isDefined(thresholds)) {
       require($(thresholds).length == numClasses, this.getClass.getSimpleName +
@@ -191,18 +194,22 @@ class GBTClassifier @Since("1.4.0") (
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
-    instr.logParams(this, labelCol, featuresCol, predictionCol, leafCol, impurity,
-      lossType, maxDepth, maxBins, maxIter, maxMemoryInMB, minInfoGain, minInstancesPerNode,
-      seed, stepSize, subsamplingRate, cacheNodeIds, checkpointInterval, featureSubsetStrategy,
-      validationIndicatorCol, validationTol)
+    instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, leafCol,
+      impurity, lossType, maxDepth, maxBins, maxIter, maxMemoryInMB, minInfoGain,
+      minInstancesPerNode, minWeightFractionPerNode, seed, stepSize, subsamplingRate, cacheNodeIds,
+      checkpointInterval, featureSubsetStrategy, validationIndicatorCol, validationTol)
     instr.logNumClasses(numClasses)
 
+    val categoricalFeatures = MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
+    val boostingStrategy = super.getOldBoostingStrategy(categoricalFeatures, OldAlgo.Classification)
     val (baseLearners, learnerWeights) = if (withValidation) {
       GradientBoostedTrees.runWithValidation(trainDataset, validationDataset, boostingStrategy,
-        $(seed), $(featureSubsetStrategy))
+        $(seed), $(featureSubsetStrategy), Some(instr))
     } else {
-      GradientBoostedTrees.run(trainDataset, boostingStrategy, $(seed), $(featureSubsetStrategy))
+      GradientBoostedTrees.run(trainDataset, boostingStrategy, $(seed), $(featureSubsetStrategy),
+        Some(instr))
     }
+    baseLearners.foreach(copyValues(_))
 
     val numFeatures = baseLearners.head.numFeatures
     instr.logNumFeatures(numFeatures)
@@ -286,13 +293,23 @@ class GBTClassificationModel private[ml](
   @Since("1.4.0")
   override def treeWeights: Array[Double] = _treeWeights
 
+  @Since("1.6.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(leafCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateField(outputSchema, getLeafField($(leafCol)))
+    }
+    outputSchema
+  }
+
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val outputData = super.transform(dataset)
     if ($(leafCol).nonEmpty) {
       val leafUDF = udf { features: Vector => predictLeaf(features) }
-      outputData.withColumn($(leafCol), leafUDF(col($(featuresCol))))
+      outputData.withColumn($(leafCol), leafUDF(col($(featuresCol))),
+        outputSchema($(leafCol)).metadata)
     } else {
       outputData
     }
@@ -307,7 +324,8 @@ class GBTClassificationModel private[ml](
     }
   }
 
-  override protected def predictRaw(features: Vector): Vector = {
+  @Since("3.0.0")
+  override def predictRaw(features: Vector): Vector = {
     val prediction: Double = margin(features)
     Vectors.dense(Array(-prediction, prediction))
   }
@@ -324,9 +342,6 @@ class GBTClassificationModel private[ml](
     }
   }
 
-  /** Number of trees in ensemble */
-  val numTrees: Int = trees.length
-
   @Since("1.4.0")
   override def copy(extra: ParamMap): GBTClassificationModel = {
     copyValues(new GBTClassificationModel(uid, _trees, _treeWeights, numFeatures, numClasses),
@@ -335,7 +350,8 @@ class GBTClassificationModel private[ml](
 
   @Since("1.4.0")
   override def toString: String = {
-    s"GBTClassificationModel (uid=$uid) with $numTrees trees"
+    s"GBTClassificationModel: uid = $uid, numTrees=$getNumTrees, numClasses=$numClasses, " +
+      s"numFeatures=$numFeatures"
   }
 
   /**
@@ -355,7 +371,7 @@ class GBTClassificationModel private[ml](
   /** Raw prediction for the positive class. */
   private def margin(features: Vector): Double = {
     val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction)
-    blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
+    blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1)
   }
 
   /** (private[ml]) Convert to a model in the old API */
@@ -373,12 +389,9 @@ class GBTClassificationModel private[ml](
    */
   @Since("2.4.0")
   def evaluateEachIteration(dataset: Dataset[_]): Array[Double] = {
-    val data = dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map {
-      case Row(label: Double, features: Vector) => LabeledPoint(label, features)
-    }
+    val data = extractInstances(dataset)
     GradientBoostedTrees.evaluateEachIteration(data, trees, treeWeights, loss,
-      OldAlgo.Classification
-    )
+      OldAlgo.Classification)
   }
 
   @Since("2.0.0")
@@ -422,10 +435,9 @@ object GBTClassificationModel extends MLReadable[GBTClassificationModel] {
       val numFeatures = (metadata.metadata \ numFeaturesKey).extract[Int]
       val numTrees = (metadata.metadata \ numTreesKey).extract[Int]
 
-      val trees: Array[DecisionTreeRegressionModel] = treesData.map {
+      val trees = treesData.map {
         case (treeMetadata, root) =>
-          val tree =
-            new DecisionTreeRegressionModel(treeMetadata.uid, root, numFeatures)
+          val tree = new DecisionTreeRegressionModel(treeMetadata.uid, root, numFeatures)
           treeMetadata.getAndSetParams(tree)
           tree
       }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 78503585261bf..905789090d625 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -32,13 +32,11 @@ import org.apache.spark.ml.optim.aggregator.HingeAggregator
 import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.stat._
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
-import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.storage.StorageLevel
 
 /** Params for linear SVM Classifier. */
 private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam
@@ -161,34 +159,29 @@ class LinearSVC @Since("2.2.0") (
   override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra)
 
   override protected def train(dataset: Dataset[_]): LinearSVCModel = instrumented { instr =>
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances: RDD[Instance] =
-      dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          Instance(label, weight, features)
-      }
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
+
+    val instances = extractInstances(dataset)
+    if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol,
       regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth)
 
-    val (summarizer, labelSummarizer) = {
-      val seqOp = (c: (MultivariateOnlineSummarizer, MultiClassSummarizer),
-        instance: Instance) =>
-          (c._1.add(instance.features, instance.weight), c._2.add(instance.label, instance.weight))
-
-      val combOp = (c1: (MultivariateOnlineSummarizer, MultiClassSummarizer),
-        c2: (MultivariateOnlineSummarizer, MultiClassSummarizer)) =>
-          (c1._1.merge(c2._1), c1._2.merge(c2._2))
-
-      instances.treeAggregate(
-        (new MultivariateOnlineSummarizer, new MultiClassSummarizer)
-      )(seqOp, combOp, $(aggregationDepth))
-    }
+    val (summarizer, labelSummarizer) = instances.treeAggregate(
+      (Summarizer.createSummarizerBuffer("mean", "std", "count"), new MultiClassSummarizer))(
+      seqOp = (c: (SummarizerBuffer, MultiClassSummarizer), instance: Instance) =>
+        (c._1.add(instance.features, instance.weight), c._2.add(instance.label, instance.weight)),
+      combOp = (c1: (SummarizerBuffer, MultiClassSummarizer),
+                c2: (SummarizerBuffer, MultiClassSummarizer)) =>
+        (c1._1.merge(c2._1), c1._2.merge(c2._2)),
+      depth = $(aggregationDepth)
+    )
     instr.logNumExamples(summarizer.count)
     instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString)
     instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString)
+    instr.logSumOfWeights(summarizer.weightSum)
 
     val histogram = labelSummarizer.histogram
     val numInvalid = labelSummarizer.countInvalid
@@ -215,7 +208,7 @@ class LinearSVC @Since("2.2.0") (
         throw new SparkException(msg)
       }
 
-      val featuresStd = summarizer.variance.toArray.map(math.sqrt)
+      val featuresStd = summarizer.std.toArray
       val getFeaturesStd = (j: Int) => featuresStd(j)
       val regParamL2 = $(regParam)
       val bcFeaturesStd = instances.context.broadcast(featuresStd)
@@ -275,6 +268,8 @@ class LinearSVC @Since("2.2.0") (
       (Vectors.dense(coefficientArray), intercept, scaledObjectiveHistory.result())
     }
 
+    if (handlePersistence) instances.unpersist()
+
     copyValues(new LinearSVCModel(uid, coefficientVector, interceptVector))
   }
 }
@@ -315,7 +310,8 @@ class LinearSVCModel private[classification] (
     if (margin(features) > $(threshold)) 1.0 else 0.0
   }
 
-  override protected def predictRaw(features: Vector): Vector = {
+  @Since("3.0.0")
+  override def predictRaw(features: Vector): Vector = {
     val m = margin(features)
     Vectors.dense(-m, m)
   }
@@ -332,6 +328,10 @@ class LinearSVCModel private[classification] (
   @Since("2.2.0")
   override def write: MLWriter = new LinearSVCModel.LinearSVCWriter(this)
 
+  @Since("3.0.0")
+  override def toString: String = {
+    s"LinearSVCModel: uid=$uid, numClasses=$numClasses, numFeatures=$numFeatures"
+  }
 }
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 0997c1e7b38d6..50c14d086957f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -34,15 +34,13 @@ import org.apache.spark.ml.optim.aggregator.LogisticAggregator
 import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.stat._
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, MulticlassMetrics}
-import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
 import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.VersionUtils
@@ -492,12 +490,7 @@ class LogisticRegression @Since("1.2.0") (
   protected[spark] def train(
       dataset: Dataset[_],
       handlePersistence: Boolean): LogisticRegressionModel = instrumented { instr =>
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances: RDD[Instance] =
-      dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          Instance(label, weight, features)
-      }
+    val instances = extractInstances(dataset)
 
     if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
 
@@ -507,22 +500,20 @@ class LogisticRegression @Since("1.2.0") (
       probabilityCol, regParam, elasticNetParam, standardization, threshold, maxIter, tol,
       fitIntercept)
 
-    val (summarizer, labelSummarizer) = {
-      val seqOp = (c: (MultivariateOnlineSummarizer, MultiClassSummarizer),
-        instance: Instance) =>
-          (c._1.add(instance.features, instance.weight), c._2.add(instance.label, instance.weight))
-
-      val combOp = (c1: (MultivariateOnlineSummarizer, MultiClassSummarizer),
-        c2: (MultivariateOnlineSummarizer, MultiClassSummarizer)) =>
-          (c1._1.merge(c2._1), c1._2.merge(c2._2))
+    val (summarizer, labelSummarizer) = instances.treeAggregate(
+      (Summarizer.createSummarizerBuffer("mean", "std", "count"), new MultiClassSummarizer))(
+      seqOp = (c: (SummarizerBuffer, MultiClassSummarizer), instance: Instance) =>
+        (c._1.add(instance.features, instance.weight), c._2.add(instance.label, instance.weight)),
+      combOp = (c1: (SummarizerBuffer, MultiClassSummarizer),
+                c2: (SummarizerBuffer, MultiClassSummarizer)) =>
+        (c1._1.merge(c2._1), c1._2.merge(c2._2)),
+      depth = $(aggregationDepth)
+    )
 
-      instances.treeAggregate(
-        (new MultivariateOnlineSummarizer, new MultiClassSummarizer)
-      )(seqOp, combOp, $(aggregationDepth))
-    }
     instr.logNumExamples(summarizer.count)
     instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString)
     instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString)
+    instr.logSumOfWeights(summarizer.weightSum)
 
     val histogram = labelSummarizer.histogram
     val numInvalid = labelSummarizer.countInvalid
@@ -577,14 +568,14 @@ class LogisticRegression @Since("1.2.0") (
           s"coefficients will be zeros. Training is not needed.")
         val constantLabelIndex = Vectors.dense(histogram).argmax
         val coefMatrix = new SparseMatrix(numCoefficientSets, numFeatures,
-          new Array[Int](numCoefficientSets + 1), Array.empty[Int], Array.empty[Double],
+          new Array[Int](numCoefficientSets + 1), Array.emptyIntArray, Array.emptyDoubleArray,
           isTransposed = true).compressed
         val interceptVec = if (isMultinomial) {
           Vectors.sparse(numClasses, Seq((constantLabelIndex, Double.PositiveInfinity)))
         } else {
           Vectors.dense(if (numClasses == 2) Double.PositiveInfinity else Double.NegativeInfinity)
         }
-        (coefMatrix, interceptVec, Array.empty[Double])
+        (coefMatrix, interceptVec, Array.emptyDoubleArray)
       } else {
         if (!$(fitIntercept) && isConstantLabel) {
           instr.logWarning(s"All labels belong to a single class and fitIntercept=false. It's a " +
@@ -592,7 +583,7 @@ class LogisticRegression @Since("1.2.0") (
         }
 
         val featuresMean = summarizer.mean.toArray
-        val featuresStd = summarizer.variance.toArray.map(math.sqrt)
+        val featuresStd = summarizer.std.toArray
 
         if (!$(fitIntercept) && (0 until numFeatures).exists { i =>
           featuresStd(i) == 0.0 && featuresMean(i) != 0.0 }) {
@@ -730,7 +721,7 @@ class LogisticRegression @Since("1.2.0") (
               value * featuresStd(featureIndex))
           }
           if ($(fitIntercept)) {
-            optInitialModel.get.interceptVector.foreachActive { (classIndex, value) =>
+            optInitialModel.get.interceptVector.foreachNonZero { (classIndex, value) =>
               initialCoefWithInterceptMatrix.update(classIndex, numFeatures, value)
             }
           }
@@ -864,7 +855,7 @@ class LogisticRegression @Since("1.2.0") (
             Friedman, et al. "Regularization Paths for Generalized Linear Models via
               Coordinate Descent," https://core.ac.uk/download/files/153/6287975.pdf
            */
-          val centers = Array.fill(numFeatures)(0.0)
+          val centers = Array.ofDim[Double](numFeatures)
           denseCoefficientMatrix.foreachActive { case (i, j, v) =>
             centers(j) += v
           }
@@ -1137,7 +1128,8 @@ class LogisticRegressionModel private[spark] (
     }
   }
 
-  override protected def predictRaw(features: Vector): Vector = {
+  @Since("3.0.0")
+  override def predictRaw(features: Vector): Vector = {
     if (isMultinomial) {
       margins(features)
     } else {
@@ -1191,8 +1183,7 @@ class LogisticRegressionModel private[spark] (
   override def write: MLWriter = new LogisticRegressionModel.LogisticRegressionModelWriter(this)
 
   override def toString: String = {
-    s"LogisticRegressionModel: " +
-    s"uid = ${super.toString}, numClasses = $numClasses, numFeatures = $numFeatures"
+    s"LogisticRegressionModel: uid=$uid, numClasses=$numClasses, numFeatures=$numFeatures"
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 47b8a8df637b9..6e8f92b9b1e64 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -24,16 +24,17 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.ann.{FeedForwardTopology, FeedForwardTrainer}
 import org.apache.spark.ml.feature.OneHotEncoderModel
-import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.util.VersionUtils.majorMinorVersion
 
 /** Params for Multilayer Perceptron. */
 private[classification] trait MultilayerPerceptronParams extends ProbabilisticClassifierParams
-  with HasSeed with HasMaxIter with HasTol with HasStepSize with HasSolver {
+  with HasSeed with HasMaxIter with HasTol with HasStepSize with HasSolver with HasBlockSize {
 
   import MultilayerPerceptronClassifier._
 
@@ -53,26 +54,6 @@ private[classification] trait MultilayerPerceptronParams extends ProbabilisticCl
   @Since("1.5.0")
   final def getLayers: Array[Int] = $(layers)
 
-  /**
-   * Block size for stacking input data in matrices to speed up the computation.
-   * Data is stacked within partitions. If block size is more than remaining data in
-   * a partition then it is adjusted to the size of this data.
-   * Recommended size is between 10 and 1000.
-   * Default: 128
-   *
-   * @group expertParam
-   */
-  @Since("1.5.0")
-  final val blockSize: IntParam = new IntParam(this, "blockSize",
-    "Block size for stacking input data in matrices. Data is stacked within partitions." +
-      " If block size is more than remaining data in a partition then " +
-      "it is adjusted to the size of this data. Recommended size is between 10 and 1000",
-    ParamValidators.gt(0))
-
-  /** @group expertGetParam */
-  @Since("1.5.0")
-  final def getBlockSize: Int = $(blockSize)
-
   /**
    * The solver algorithm for optimization.
    * Supported options: "gd" (minibatch gradient descent) or "l-bfgs".
@@ -247,7 +228,7 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
     }
     trainer.setStackSize($(blockSize))
     val mlpModel = trainer.train(data)
-    new MultilayerPerceptronClassificationModel(uid, myLayers, mlpModel.weights)
+    new MultilayerPerceptronClassificationModel(uid, mlpModel.weights)
   }
 }
 
@@ -273,31 +254,22 @@ object MultilayerPerceptronClassifier
  * Each layer has sigmoid activation function, output layer has softmax.
  *
  * @param uid uid
- * @param layers array of layer sizes including input and output layers
  * @param weights the weights of layers
  */
 @Since("1.5.0")
 class MultilayerPerceptronClassificationModel private[ml] (
     @Since("1.5.0") override val uid: String,
-    @Since("1.5.0") val layers: Array[Int],
     @Since("2.0.0") val weights: Vector)
   extends ProbabilisticClassificationModel[Vector, MultilayerPerceptronClassificationModel]
-  with Serializable with MLWritable {
+  with MultilayerPerceptronParams with Serializable with MLWritable {
 
   @Since("1.6.0")
-  override val numFeatures: Int = layers.head
+  override lazy val numFeatures: Int = $(layers).head
 
-  private[ml] val mlpModel = FeedForwardTopology
-    .multiLayerPerceptron(layers, softmaxOnTop = true)
+  @transient private[ml] lazy val mlpModel = FeedForwardTopology
+    .multiLayerPerceptron($(layers), softmaxOnTop = true)
     .model(weights)
 
-  /**
-   * Returns layers in a Java List.
-   */
-  private[ml] def javaLayers: java.util.List[Int] = {
-    layers.toList.asJava
-  }
-
   /**
    * Predict label for the given features.
    * This internal method is used to implement `transform()` and output [[predictionCol]].
@@ -308,7 +280,8 @@ class MultilayerPerceptronClassificationModel private[ml] (
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): MultilayerPerceptronClassificationModel = {
-    val copied = new MultilayerPerceptronClassificationModel(uid, layers, weights).setParent(parent)
+    val copied = new MultilayerPerceptronClassificationModel(uid, weights)
+      .setParent(parent)
     copyValues(copied, extra)
   }
 
@@ -320,9 +293,16 @@ class MultilayerPerceptronClassificationModel private[ml] (
     mlpModel.raw2ProbabilityInPlace(rawPrediction)
   }
 
-  override protected def predictRaw(features: Vector): Vector = mlpModel.predictRaw(features)
+  @Since("3.0.0")
+  override def predictRaw(features: Vector): Vector = mlpModel.predictRaw(features)
+
+  override def numClasses: Int = $(layers).last
 
-  override def numClasses: Int = layers.last
+  @Since("3.0.0")
+  override def toString: String = {
+    s"MultilayerPerceptronClassificationModel: uid=$uid, numLayers=${$(layers).length}, " +
+      s"numClasses=$numClasses, numFeatures=$numFeatures"
+  }
 }
 
 @Since("2.0.0")
@@ -341,13 +321,13 @@ object MultilayerPerceptronClassificationModel
   class MultilayerPerceptronClassificationModelWriter(
       instance: MultilayerPerceptronClassificationModel) extends MLWriter {
 
-    private case class Data(layers: Array[Int], weights: Vector)
+    private case class Data(weights: Vector)
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
       DefaultParamsWriter.saveMetadata(instance, path, sc)
-      // Save model data: layers, weights
-      val data = Data(instance.layers, instance.weights)
+      // Save model data: weights
+      val data = Data(instance.weights)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
     }
@@ -361,13 +341,21 @@ object MultilayerPerceptronClassificationModel
 
     override def load(path: String): MultilayerPerceptronClassificationModel = {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val (majorVersion, _) = majorMinorVersion(metadata.sparkVersion)
 
       val dataPath = new Path(path, "data").toString
-      val data = sparkSession.read.parquet(dataPath).select("layers", "weights").head()
-      val layers = data.getAs[Seq[Int]](0).toArray
-      val weights = data.getAs[Vector](1)
-      val model = new MultilayerPerceptronClassificationModel(metadata.uid, layers, weights)
-
+      val df = sparkSession.read.parquet(dataPath)
+      val model = if (majorVersion < 3) { // model prior to 3.0.0
+        val data = df.select("layers", "weights").head()
+        val layers = data.getAs[Seq[Int]](0).toArray
+        val weights = data.getAs[Vector](1)
+        val model = new MultilayerPerceptronClassificationModel(metadata.uid, weights)
+        model.set("layers", layers)
+      } else {
+        val data = df.select("weights").head()
+        val weights = data.getAs[Vector](0)
+        new MultilayerPerceptronClassificationModel(metadata.uid, weights)
+      }
       metadata.getAndSetParams(model)
       model
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index e97af0582d358..5459a0fab9135 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -18,17 +18,21 @@
 package org.apache.spark.ml.classification
 
 import org.apache.hadoop.fs.Path
+import org.json4s.DefaultFormats
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.HasWeightCol
+import org.apache.spark.ml.stat.Summarizer
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.sql.{Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
+import org.apache.spark.util.VersionUtils
 
 /**
  * Params for Naive Bayes Classifiers.
@@ -48,12 +52,13 @@ private[classification] trait NaiveBayesParams extends PredictorParams with HasW
 
   /**
    * The model type which is a string (case-sensitive).
-   * Supported options: "multinomial" and "bernoulli".
+   * Supported options: "multinomial", "complement", "bernoulli", "gaussian".
    * (default = multinomial)
    * @group param
    */
   final val modelType: Param[String] = new Param[String](this, "modelType", "The model type " +
-    "which is a string (case-sensitive). Supported options: multinomial (default) and bernoulli.",
+    "which is a string (case-sensitive). Supported options: multinomial (default), complement, " +
+    "bernoulli and gaussian.",
     ParamValidators.inArray[String](NaiveBayes.supportedModelTypes.toArray))
 
   /** @group getParam */
@@ -71,7 +76,16 @@ private[classification] trait NaiveBayesParams extends PredictorParams with HasW
  * binary (0/1) data, it can also be used as Bernoulli NB
  * (see <a href="http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html">
  * here</a>).
- * The input feature values must be nonnegative.
+ * The input feature values for Multinomial NB and Bernoulli NB must be nonnegative.
+ * Since 3.0.0, it supports Complement NB which is an adaptation of the Multinomial NB. Specifically,
+ * Complement NB uses statistics from the complement of each class to compute the model's coefficients
+ * The inventors of Complement NB show empirically that the parameter estimates for CNB are more stable
+ * than those for Multinomial NB. Like Multinomial NB, the input feature values for Complement NB must
+ * be nonnegative.
+ * Since 3.0.0, it also supports Gaussian NB
+ * (see <a href="https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Gaussian_naive_Bayes">
+ * here</a>)
+ * which can handle continuous data.
  */
 // scalastyle:on line.size.limit
 @Since("1.5.0")
@@ -96,13 +110,13 @@ class NaiveBayes @Since("1.5.0") (
 
   /**
    * Set the model type using a string (case-sensitive).
-   * Supported options: "multinomial" and "bernoulli".
+   * Supported options: "multinomial", "complement", "bernoulli", and "gaussian".
    * Default is "multinomial"
    * @group setParam
    */
   @Since("1.5.0")
   def setModelType(value: String): this.type = set(modelType, value)
-  setDefault(modelType -> NaiveBayes.Multinomial)
+  setDefault(modelType -> Multinomial)
 
   /**
    * Sets the value of param [[weightCol]].
@@ -129,6 +143,9 @@ class NaiveBayes @Since("1.5.0") (
       positiveLabel: Boolean): NaiveBayesModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
+    instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, rawPredictionCol,
+      probabilityCol, modelType, smoothing, thresholds)
+
     if (positiveLabel && isDefined(thresholds)) {
       val numClasses = getNumClasses(dataset)
       instr.logNumClasses(numClasses)
@@ -137,70 +154,171 @@ class NaiveBayes @Since("1.5.0") (
         s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
     }
 
-    val modelTypeValue = $(modelType)
-    val requireValues: Vector => Unit = {
-      modelTypeValue match {
-        case Multinomial =>
-          requireNonnegativeValues
-        case Bernoulli =>
-          requireZeroOneBernoulliValues
-        case _ =>
-          // This should never happen.
-          throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}.")
-      }
+    $(modelType) match {
+      case Bernoulli | Multinomial | Complement =>
+        trainDiscreteImpl(dataset, instr)
+      case Gaussian =>
+        trainGaussianImpl(dataset, instr)
+      case _ =>
+        // This should never happen.
+        throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}.")
     }
+  }
 
-    instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, rawPredictionCol,
-      probabilityCol, modelType, smoothing, thresholds)
+  private def trainDiscreteImpl(
+      dataset: Dataset[_],
+      instr: Instrumentation): NaiveBayesModel = {
+    val spark = dataset.sparkSession
+    import spark.implicits._
 
-    val numFeatures = dataset.select(col($(featuresCol))).head().getAs[Vector](0).size
-    instr.logNumFeatures(numFeatures)
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
+    val validateUDF = $(modelType) match {
+      case Multinomial | Complement =>
+        udf { vector: Vector => requireNonnegativeValues(vector); vector }
+      case Bernoulli =>
+        udf { vector: Vector => requireZeroOneBernoulliValues(vector); vector }
+    }
+
+    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
+      col($(weightCol)).cast(DoubleType)
+    } else {
+      lit(1.0)
+    }
 
     // Aggregates term frequencies per label.
-    // TODO: Calling aggregateByKey and collect creates two stages, we can implement something
-    // TODO: similar to reduceByKeyLocally to save one stage.
-    val aggregated = dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd
-      .map { row => (row.getDouble(0), (row.getDouble(1), row.getAs[Vector](2)))
-      }.aggregateByKey[(Double, DenseVector, Long)]((0.0, Vectors.zeros(numFeatures).toDense, 0L))(
-      seqOp = {
-         case ((weightSum, featureSum, count), (weight, features)) =>
-           requireValues(features)
-           BLAS.axpy(weight, features, featureSum)
-           (weightSum + weight, featureSum, count + 1)
-      },
-      combOp = {
-         case ((weightSum1, featureSum1, count1), (weightSum2, featureSum2, count2)) =>
-           BLAS.axpy(1.0, featureSum2, featureSum1)
-           (weightSum1 + weightSum2, featureSum1, count1 + count2)
-      }).collect().sortBy(_._1)
-
-    val numSamples = aggregated.map(_._2._3).sum
+    val aggregated = dataset.groupBy(col($(labelCol)))
+      .agg(sum(w).as("weightSum"), Summarizer.metrics("sum", "count")
+        .summary(validateUDF(col($(featuresCol))), w).as("summary"))
+      .select($(labelCol), "weightSum", "summary.sum", "summary.count")
+      .as[(Double, Double, Vector, Long)]
+      .collect().sortBy(_._1)
+
+    val numFeatures = aggregated.head._3.size
+    instr.logNumFeatures(numFeatures)
+    val numSamples = aggregated.map(_._4).sum
     instr.logNumExamples(numSamples)
     val numLabels = aggregated.length
     instr.logNumClasses(numLabels)
-    val numDocuments = aggregated.map(_._2._1).sum
+    val numDocuments = aggregated.map(_._2).sum
+    instr.logSumOfWeights(numDocuments)
 
     val labelArray = new Array[Double](numLabels)
     val piArray = new Array[Double](numLabels)
     val thetaArray = new Array[Double](numLabels * numFeatures)
 
+    val aggIter = $(modelType) match {
+      case Multinomial | Bernoulli => aggregated.iterator
+      case Complement =>
+        val featureSum = Vectors.zeros(numFeatures)
+        aggregated.foreach { case (_, _, sumTermFreqs, _) =>
+          BLAS.axpy(1.0, sumTermFreqs, featureSum)
+        }
+        aggregated.iterator.map { case (label, n, sumTermFreqs, count) =>
+          val comp = featureSum.copy
+          BLAS.axpy(-1.0, sumTermFreqs, comp)
+          (label, n, comp, count)
+        }
+    }
+
     val lambda = $(smoothing)
     val piLogDenom = math.log(numDocuments + numLabels * lambda)
     var i = 0
-    aggregated.foreach { case (label, (n, sumTermFreqs, _)) =>
+    aggIter.foreach { case (label, n, sumTermFreqs, _) =>
       labelArray(i) = label
       piArray(i) = math.log(n + lambda) - piLogDenom
       val thetaLogDenom = $(modelType) match {
-        case Multinomial => math.log(sumTermFreqs.values.sum + numFeatures * lambda)
+        case Multinomial | Complement =>
+          math.log(sumTermFreqs.toArray.sum + numFeatures * lambda)
         case Bernoulli => math.log(n + 2.0 * lambda)
-        case _ =>
-          // This should never happen.
-          throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}.")
       }
       var j = 0
+      val offset = i * numFeatures
+      while (j < numFeatures) {
+        thetaArray(offset + j) = math.log(sumTermFreqs(j) + lambda) - thetaLogDenom
+        j += 1
+      }
+      i += 1
+    }
+
+    val pi = Vectors.dense(piArray)
+    $(modelType) match {
+      case Multinomial | Bernoulli =>
+        val theta = new DenseMatrix(numLabels, numFeatures, thetaArray, true)
+        new NaiveBayesModel(uid, pi.compressed, theta.compressed, Matrices.zeros(0, 0))
+          .setOldLabels(labelArray)
+      case Complement =>
+        // Since the CNB compute the coefficient in a complement way.
+        val theta = new DenseMatrix(numLabels, numFeatures, thetaArray.map(v => -v), true)
+        new NaiveBayesModel(uid, pi.compressed, theta.compressed, Matrices.zeros(0, 0))
+    }
+  }
+
+  private def trainGaussianImpl(
+      dataset: Dataset[_],
+      instr: Instrumentation): NaiveBayesModel = {
+    val spark = dataset.sparkSession
+    import spark.implicits._
+
+    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
+      col($(weightCol)).cast(DoubleType)
+    } else {
+      lit(1.0)
+    }
+
+    // Aggregates mean vector and square-sum vector per label.
+    val aggregated = dataset.groupBy(col($(labelCol)))
+      .agg(sum(w).as("weightSum"), Summarizer.metrics("mean", "normL2")
+        .summary(col($(featuresCol)), w).as("summary"))
+      .select($(labelCol), "weightSum", "summary.mean", "summary.normL2")
+      .as[(Double, Double, Vector, Vector)]
+      .map { case (label, weightSum, mean, normL2) =>
+        (label, weightSum, mean, Vectors.dense(normL2.toArray.map(v => v * v)))
+      }.collect().sortBy(_._1)
+
+    val numFeatures = aggregated.head._3.size
+    instr.logNumFeatures(numFeatures)
+
+    val numLabels = aggregated.length
+    instr.logNumClasses(numLabels)
+
+    val numInstances = aggregated.map(_._2).sum
+    instr.logSumOfWeights(numInstances)
+
+    // If the ratio of data variance between dimensions is too small, it
+    // will cause numerical errors. To address this, we artificially
+    // boost the variance by epsilon, a small fraction of the standard
+    // deviation of the largest dimension.
+    // Refer to scikit-learn's implementation
+    // [https://github.com/scikit-learn/scikit-learn/blob/0.21.X/sklearn/naive_bayes.py#L348]
+    // and discussion [https://github.com/scikit-learn/scikit-learn/pull/5349] for detail.
+    val epsilon = Iterator.range(0, numFeatures).map { j =>
+      var globalSum = 0.0
+      var globalSqrSum = 0.0
+      aggregated.foreach { case (_, weightSum, mean, squareSum) =>
+        globalSum += mean(j) * weightSum
+        globalSqrSum += squareSum(j)
+      }
+      globalSqrSum / numInstances -
+        globalSum * globalSum / numInstances / numInstances
+    }.max * 1e-9
+
+    val piArray = new Array[Double](numLabels)
+
+    // thetaArray in Gaussian NB store the means of features per label
+    val thetaArray = new Array[Double](numLabels * numFeatures)
+
+    // thetaArray in Gaussian NB store the variances of features per label
+    val sigmaArray = new Array[Double](numLabels * numFeatures)
+
+    var i = 0
+    val logNumInstances = math.log(numInstances)
+    aggregated.foreach { case (_, weightSum, mean, squareSum) =>
+      piArray(i) = math.log(weightSum) - logNumInstances
+      var j = 0
+      val offset = i * numFeatures
       while (j < numFeatures) {
-        thetaArray(i * numFeatures + j) = math.log(sumTermFreqs(j) + lambda) - thetaLogDenom
+        val m = mean(j)
+        thetaArray(offset + j) = m
+        sigmaArray(offset + j) = epsilon + squareSum(j) / weightSum - m * m
         j += 1
       }
       i += 1
@@ -208,7 +326,8 @@ class NaiveBayes @Since("1.5.0") (
 
     val pi = Vectors.dense(piArray)
     val theta = new DenseMatrix(numLabels, numFeatures, thetaArray, true)
-    new NaiveBayesModel(uid, pi, theta).setOldLabels(labelArray)
+    val sigma = new DenseMatrix(numLabels, numFeatures, sigmaArray, true)
+    new NaiveBayesModel(uid, pi.compressed, theta.compressed, sigma.compressed)
   }
 
   @Since("1.5.0")
@@ -223,26 +342,23 @@ object NaiveBayes extends DefaultParamsReadable[NaiveBayes] {
   /** String name for Bernoulli model type. */
   private[classification] val Bernoulli: String = "bernoulli"
 
-  /* Set of modelTypes that NaiveBayes supports */
-  private[classification] val supportedModelTypes = Set(Multinomial, Bernoulli)
+  /** String name for Gaussian model type. */
+  private[classification] val Gaussian: String = "gaussian"
 
-  private[NaiveBayes] def requireNonnegativeValues(v: Vector): Unit = {
-    val values = v match {
-      case sv: SparseVector => sv.values
-      case dv: DenseVector => dv.values
-    }
+  /** String name for Complement model type. */
+  private[classification] val Complement: String = "complement"
 
-    require(values.forall(_ >= 0.0),
+  /* Set of modelTypes that NaiveBayes supports */
+  private[classification] val supportedModelTypes =
+    Set(Multinomial, Bernoulli, Gaussian, Complement)
+
+  private[ml] def requireNonnegativeValues(v: Vector): Unit = {
+    require(v.nonZeroIterator.forall(_._2 > 0.0),
       s"Naive Bayes requires nonnegative feature values but found $v.")
   }
 
-  private[NaiveBayes] def requireZeroOneBernoulliValues(v: Vector): Unit = {
-    val values = v match {
-      case sv: SparseVector => sv.values
-      case dv: DenseVector => dv.values
-    }
-
-    require(values.forall(v => v == 0.0 || v == 1.0),
+  private[ml] def requireZeroOneBernoulliValues(v: Vector): Unit = {
+    require(v.nonZeroIterator.forall(_._2 == 1.0),
       s"Bernoulli naive Bayes requires 0 or 1 feature values but found $v.")
   }
 
@@ -252,19 +368,24 @@ object NaiveBayes extends DefaultParamsReadable[NaiveBayes] {
 
 /**
  * Model produced by [[NaiveBayes]]
- * @param pi log of class priors, whose dimension is C (number of classes)
+ *
+ * @param pi    log of class priors, whose dimension is C (number of classes)
  * @param theta log of class conditional probabilities, whose dimension is C (number of classes)
  *              by D (number of features)
+ * @param sigma variance of each feature, whose dimension is C (number of classes)
+ *              by D (number of features). This matrix is only available when modelType
+ *              is set Gaussian.
  */
 @Since("1.5.0")
 class NaiveBayesModel private[ml] (
     @Since("1.5.0") override val uid: String,
     @Since("2.0.0") val pi: Vector,
-    @Since("2.0.0") val theta: Matrix)
+    @Since("2.0.0") val theta: Matrix,
+    @Since("3.0.0") val sigma: Matrix)
   extends ProbabilisticClassificationModel[Vector, NaiveBayesModel]
   with NaiveBayesParams with MLWritable {
 
-  import NaiveBayes.{Bernoulli, Multinomial}
+  import NaiveBayes._
 
   /**
    * mllib NaiveBayes is a wrapper of ml implementation currently.
@@ -284,18 +405,44 @@ class NaiveBayesModel private[ml] (
    * This precomputes log(1.0 - exp(theta)) and its sum which are used for the linear algebra
    * application of this condition (in predict function).
    */
-  private lazy val (thetaMinusNegTheta, negThetaSum) = $(modelType) match {
-    case Multinomial => (None, None)
+  @transient private lazy val thetaMinusNegTheta = $(modelType) match {
+    case Bernoulli =>
+      theta.map(value => value - math.log1p(-math.exp(value)))
+    case _ =>
+      // This should never happen.
+      throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}. " +
+        "Variables thetaMinusNegTheta should only be precomputed in Bernoulli NB.")
+  }
+
+  @transient private lazy val piMinusThetaSum = $(modelType) match {
     case Bernoulli =>
       val negTheta = theta.map(value => math.log1p(-math.exp(value)))
-      val ones = new DenseVector(Array.fill(theta.numCols) {1.0})
-      val thetaMinusNegTheta = theta.map { value =>
-        value - math.log1p(-math.exp(value))
+      val ones = new DenseVector(Array.fill(theta.numCols)(1.0))
+      val piMinusThetaSum = pi.toDense.copy
+      BLAS.gemv(1.0, negTheta, ones, 1.0, piMinusThetaSum)
+      piMinusThetaSum
+    case _ =>
+      // This should never happen.
+      throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}. " +
+        "Variables piMinusThetaSum should only be precomputed in Bernoulli NB.")
+  }
+
+  /**
+   * Gaussian scoring requires sum of log(Variance).
+   * This precomputes sum of log(Variance) which are used for the linear algebra
+   * application of this condition (in predict function).
+   */
+  @transient private lazy val logVarSum = $(modelType) match {
+    case Gaussian =>
+      Array.tabulate(numClasses) { i =>
+        Iterator.range(0, numFeatures).map { j =>
+          math.log(sigma(i, j))
+        }.sum
       }
-      (Option(thetaMinusNegTheta), Option(negTheta.multiply(ones)))
     case _ =>
       // This should never happen.
-      throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}.")
+      throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}. " +
+        "Variables logVarSum should only be precomputed in Gaussian NB.")
   }
 
   @Since("1.6.0")
@@ -305,34 +452,77 @@ class NaiveBayesModel private[ml] (
   override val numClasses: Int = pi.size
 
   private def multinomialCalculation(features: Vector) = {
-    val prob = theta.multiply(features)
-    BLAS.axpy(1.0, pi, prob)
+    requireNonnegativeValues(features)
+    val prob = pi.toDense.copy
+    BLAS.gemv(1.0, theta, features, 1.0, prob)
     prob
   }
 
+  private def complementCalculation(features: Vector) = {
+    requireNonnegativeValues(features)
+    val probArray = theta.multiply(features).toArray
+    // the following lines equal to:
+    // val logSumExp = math.log(probArray.map(math.exp).sum)
+    // However, it easily returns Infinity/NaN values.
+    // Here follows 'scipy.special.logsumexp' (which is used in Scikit-Learn's ComplementNB)
+    // to compute the log of the sum of exponentials of elements in a numeric-stable way.
+    val max = probArray.max
+    var sumExp = 0.0
+    var j = 0
+    while (j < probArray.length) {
+      sumExp += math.exp(probArray(j) - max)
+      j += 1
+    }
+    val logSumExp = math.log(sumExp) + max
+
+    j = 0
+    while (j < probArray.length) {
+      probArray(j) = probArray(j) - logSumExp
+      j += 1
+    }
+    Vectors.dense(probArray)
+  }
+
   private def bernoulliCalculation(features: Vector) = {
-    features.foreachActive((_, value) =>
-      require(value == 0.0 || value == 1.0,
-        s"Bernoulli naive Bayes requires 0 or 1 feature values but found $features.")
-    )
-    val prob = thetaMinusNegTheta.get.multiply(features)
-    BLAS.axpy(1.0, pi, prob)
-    BLAS.axpy(1.0, negThetaSum.get, prob)
+    requireZeroOneBernoulliValues(features)
+    val prob = piMinusThetaSum.copy
+    BLAS.gemv(1.0, thetaMinusNegTheta, features, 1.0, prob)
     prob
   }
 
-  override protected def predictRaw(features: Vector): Vector = {
+  private def gaussianCalculation(features: Vector) = {
+    val prob = Array.ofDim[Double](numClasses)
+    var i = 0
+    while (i < numClasses) {
+      var s = 0.0
+      var j = 0
+      while (j < numFeatures) {
+        val d = features(j) - theta(i, j)
+        s += d * d / sigma(i, j)
+        j += 1
+      }
+      prob(i) = pi(i) - (s + logVarSum(i)) / 2
+      i += 1
+    }
+    Vectors.dense(prob)
+  }
+
+  @transient private lazy val predictRawFunc = {
     $(modelType) match {
       case Multinomial =>
-        multinomialCalculation(features)
+        features: Vector => multinomialCalculation(features)
+      case Complement =>
+        features: Vector => complementCalculation(features)
       case Bernoulli =>
-        bernoulliCalculation(features)
-      case _ =>
-        // This should never happen.
-        throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}.")
+        features: Vector => bernoulliCalculation(features)
+      case Gaussian =>
+        features: Vector => gaussianCalculation(features)
     }
   }
 
+  @Since("3.0.0")
+  override def predictRaw(features: Vector): Vector = predictRawFunc(features)
+
   override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = {
     rawPrediction match {
       case dv: DenseVector =>
@@ -358,12 +548,13 @@ class NaiveBayesModel private[ml] (
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): NaiveBayesModel = {
-    copyValues(new NaiveBayesModel(uid, pi, theta).setParent(this.parent), extra)
+    copyValues(new NaiveBayesModel(uid, pi, theta, sigma).setParent(this.parent), extra)
   }
 
   @Since("1.5.0")
   override def toString: String = {
-    s"NaiveBayesModel (uid=$uid) with ${pi.size} classes"
+    s"NaiveBayesModel: uid=$uid, modelType=${$(modelType)}, numClasses=$numClasses, " +
+      s"numFeatures=$numFeatures"
   }
 
   @Since("1.6.0")
@@ -381,15 +572,23 @@ object NaiveBayesModel extends MLReadable[NaiveBayesModel] {
 
   /** [[MLWriter]] instance for [[NaiveBayesModel]] */
   private[NaiveBayesModel] class NaiveBayesModelWriter(instance: NaiveBayesModel) extends MLWriter {
+    import NaiveBayes._
 
-    private case class Data(pi: Vector, theta: Matrix)
+    private case class Data(pi: Vector, theta: Matrix, sigma: Matrix)
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
       DefaultParamsWriter.saveMetadata(instance, path, sc)
-      // Save model data: pi, theta
-      val data = Data(instance.pi, instance.theta)
       val dataPath = new Path(path, "data").toString
+
+      instance.getModelType match {
+        case Multinomial | Bernoulli | Complement =>
+          require(instance.sigma.numRows == 0 && instance.sigma.numCols == 0)
+        case Gaussian =>
+          require(instance.sigma.numRows != 0 && instance.sigma.numCols != 0)
+      }
+
+      val data = Data(instance.pi, instance.theta, instance.sigma)
       sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
     }
   }
@@ -400,15 +599,27 @@ object NaiveBayesModel extends MLReadable[NaiveBayesModel] {
     private val className = classOf[NaiveBayesModel].getName
 
     override def load(path: String): NaiveBayesModel = {
+      implicit val format = DefaultFormats
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
       val vecConverted = MLUtils.convertVectorColumnsToML(data, "pi")
-      val Row(pi: Vector, theta: Matrix) = MLUtils.convertMatrixColumnsToML(vecConverted, "theta")
-        .select("pi", "theta")
-        .head()
-      val model = new NaiveBayesModel(metadata.uid, pi, theta)
+
+      val model = if (major.toInt < 3) {
+        val Row(pi: Vector, theta: Matrix) =
+          MLUtils.convertMatrixColumnsToML(vecConverted, "theta")
+            .select("pi", "theta")
+            .head()
+        new NaiveBayesModel(metadata.uid, pi, theta, Matrices.zeros(0, 0))
+      } else {
+        val Row(pi: Vector, theta: Matrix, sigma: Matrix) =
+          MLUtils.convertMatrixColumnsToML(vecConverted, "theta", "sigma")
+            .select("pi", "theta", "sigma")
+            .head()
+        new NaiveBayesModel(metadata.uid, pi, theta, sigma)
+      }
 
       metadata.getAndSetParams(model)
       model
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 675315e3bb070..5f0b6b5386afc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -161,13 +161,23 @@ final class OneVsRestModel private[ml] (
 
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema, fitting = false, getClassifier.featuresDataType)
+    var outputSchema = validateAndTransformSchema(schema, fitting = false,
+      getClassifier.featuresDataType)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumValues(outputSchema,
+        $(predictionCol), numClasses)
+    }
+    if ($(rawPredictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(rawPredictionCol), numClasses)
+    }
+    outputSchema
   }
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
     // Check schema
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     if (getPredictionCol.isEmpty && getRawPredictionCol.isEmpty) {
       logWarning(s"$uid: OneVsRestModel.transform() does nothing" +
@@ -222,14 +232,15 @@ final class OneVsRestModel private[ml] (
       val numClass = models.length
 
       // output the RawPrediction as vector
-      val rawPredictionUDF = udf { (predictions: Map[Int, Double]) =>
-        val predArray = Array.fill[Double](numClass)(0.0)
+      val rawPredictionUDF = udf { predictions: Map[Int, Double] =>
+        val predArray = Array.ofDim[Double](numClass)
         predictions.foreach { case (idx, value) => predArray(idx) = value }
         Vectors.dense(predArray)
       }
 
       predictionColNames :+= getRawPredictionCol
       predictionColumns :+= rawPredictionUDF(col(accColName))
+        .as($(rawPredictionCol), outputSchema($(rawPredictionCol)).metadata)
     }
 
     if (getPredictionCol.nonEmpty) {
@@ -257,6 +268,12 @@ final class OneVsRestModel private[ml] (
 
   @Since("2.0.0")
   override def write: MLWriter = new OneVsRestModel.OneVsRestModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"OneVsRestModel: uid=$uid, classifier=${$(classifier)}, numClasses=$numClasses, " +
+      s"numFeatures=$numFeatures"
+  }
 }
 
 @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
index 2171ac335e7b8..cc5da3446f1f8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.classification
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.ml.linalg.{DenseVector, Vector, VectorUDT}
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util.SchemaUtils
@@ -90,6 +90,15 @@ abstract class ProbabilisticClassificationModel[
     set(thresholds, value).asInstanceOf[M]
   }
 
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(probabilityCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(probabilityCol), numClasses)
+    }
+    outputSchema
+  }
+
   /**
    * Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
    * parameters:
@@ -101,7 +110,7 @@ abstract class ProbabilisticClassificationModel[
    * @return transformed dataset
    */
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
     if (isDefined(thresholds)) {
       require($(thresholds).length == numClasses, this.getClass.getSimpleName +
         ".transform() called with non-matching numClasses and thresholds.length." +
@@ -113,36 +122,39 @@ abstract class ProbabilisticClassificationModel[
     var outputData = dataset
     var numColsOutput = 0
     if ($(rawPredictionCol).nonEmpty) {
-      val predictRawUDF = udf { (features: Any) =>
+      val predictRawUDF = udf { features: Any =>
         predictRaw(features.asInstanceOf[FeaturesType])
       }
-      outputData = outputData.withColumn(getRawPredictionCol, predictRawUDF(col(getFeaturesCol)))
+      outputData = outputData.withColumn(getRawPredictionCol, predictRawUDF(col(getFeaturesCol)),
+        outputSchema($(rawPredictionCol)).metadata)
       numColsOutput += 1
     }
     if ($(probabilityCol).nonEmpty) {
-      val probUDF = if ($(rawPredictionCol).nonEmpty) {
+      val probCol = if ($(rawPredictionCol).nonEmpty) {
         udf(raw2probability _).apply(col($(rawPredictionCol)))
       } else {
-        val probabilityUDF = udf { (features: Any) =>
+        val probabilityUDF = udf { features: Any =>
           predictProbability(features.asInstanceOf[FeaturesType])
         }
         probabilityUDF(col($(featuresCol)))
       }
-      outputData = outputData.withColumn($(probabilityCol), probUDF)
+      outputData = outputData.withColumn($(probabilityCol), probCol,
+        outputSchema($(probabilityCol)).metadata)
       numColsOutput += 1
     }
     if ($(predictionCol).nonEmpty) {
-      val predUDF = if ($(rawPredictionCol).nonEmpty) {
+      val predCol = if ($(rawPredictionCol).nonEmpty) {
         udf(raw2prediction _).apply(col($(rawPredictionCol)))
       } else if ($(probabilityCol).nonEmpty) {
         udf(probability2prediction _).apply(col($(probabilityCol)))
       } else {
-        val predictUDF = udf { (features: Any) =>
+        val predictUDF = udf { features: Any =>
           predict(features.asInstanceOf[FeaturesType])
         }
         predictUDF(col($(featuresCol)))
       }
-      outputData = outputData.withColumn($(predictionCol), predUDF)
+      outputData = outputData.withColumn($(predictionCol), predCol,
+        outputSchema($(predictionCol)).metadata)
       numColsOutput += 1
     }
 
@@ -188,7 +200,8 @@ abstract class ProbabilisticClassificationModel[
    *
    * @return Estimated class conditional probabilities
    */
-  protected def predictProbability(features: FeaturesType): Vector = {
+  @Since("3.0.0")
+  def predictProbability(features: FeaturesType): Vector = {
     val rawPreds = predictRaw(features)
     raw2probabilityInPlace(rawPreds)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 731b43b67813f..9ed81bf893450 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -21,7 +21,6 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -33,9 +32,9 @@ import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel}
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions.{col, udf}
+import org.apache.spark.sql.types.StructType
 
 /**
  * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a> learning algorithm for
@@ -68,6 +67,10 @@ class RandomForestClassifier @Since("1.4.0") (
   @Since("1.4.0")
   def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setMinWeightFractionPerNode(value: Double): this.type = set(minWeightFractionPerNode, value)
+
   /** @group setParam */
   @Since("1.4.0")
   def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
@@ -112,11 +115,25 @@ class RandomForestClassifier @Since("1.4.0") (
   @Since("1.4.0")
   def setNumTrees(value: Int): this.type = set(numTrees, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setBootstrap(value: Boolean): this.type = set(bootstrap, value)
+
   /** @group setParam */
   @Since("1.4.0")
   def setFeatureSubsetStrategy(value: String): this.type =
     set(featureSubsetStrategy, value)
 
+  /**
+   * Sets the value of param [[weightCol]].
+   * If this is not set or empty, we treat all instance weights as 1.0.
+   * By default the weightCol is not set, so all instances have weight 1.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   override protected def train(
       dataset: Dataset[_]): RandomForestClassificationModel = instrumented { instr =>
     instr.logPipelineStage(this)
@@ -131,18 +148,20 @@ class RandomForestClassifier @Since("1.4.0") (
         s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
     }
 
-    val instances: RDD[Instance] = extractLabeledPoints(dataset, numClasses).map(_.toInstance)
+    val instances = extractInstances(dataset, numClasses)
     val strategy =
       super.getOldStrategy(categoricalFeatures, numClasses, OldAlgo.Classification, getOldImpurity)
+    strategy.bootstrap = $(bootstrap)
 
-    instr.logParams(this, labelCol, featuresCol, predictionCol, probabilityCol, rawPredictionCol,
-      leafCol, impurity, numTrees, featureSubsetStrategy, maxDepth, maxBins, maxMemoryInMB,
-      minInfoGain, minInstancesPerNode, seed, subsamplingRate, thresholds, cacheNodeIds,
-      checkpointInterval)
+    instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, probabilityCol,
+      rawPredictionCol, leafCol, impurity, numTrees, featureSubsetStrategy, maxDepth, maxBins,
+      maxMemoryInMB, minInfoGain, minInstancesPerNode, minWeightFractionPerNode, seed,
+      subsamplingRate, thresholds, cacheNodeIds, checkpointInterval, bootstrap)
 
     val trees = RandomForest
       .run(instances, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr))
       .map(_.asInstanceOf[DecisionTreeClassificationModel])
+    trees.foreach(copyValues(_))
 
     val numFeatures = trees.head.numFeatures
     instr.logNumClasses(numClasses)
@@ -209,25 +228,36 @@ class RandomForestClassificationModel private[ml] (
   @Since("1.4.0")
   override def treeWeights: Array[Double] = _treeWeights
 
+  @Since("1.4.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(leafCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateField(outputSchema, getLeafField($(leafCol)))
+    }
+    outputSchema
+  }
+
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val outputData = super.transform(dataset)
     if ($(leafCol).nonEmpty) {
       val leafUDF = udf { features: Vector => predictLeaf(features) }
-      outputData.withColumn($(leafCol), leafUDF(col($(featuresCol))))
+      outputData.withColumn($(leafCol), leafUDF(col($(featuresCol))),
+        outputSchema($(leafCol)).metadata)
     } else {
       outputData
     }
   }
 
-  override protected def predictRaw(features: Vector): Vector = {
+  @Since("3.0.0")
+  override def predictRaw(features: Vector): Vector = {
     // TODO: When we add a generic Bagging class, handle transform there: SPARK-7128
     // Classifies using majority votes.
     // Ignore the tree weights since all are 1.0 for now.
-    val votes = Array.fill[Double](numClasses)(0.0)
+    val votes = Array.ofDim[Double](numClasses)
     _trees.view.foreach { tree =>
-      val classCounts: Array[Double] = tree.rootNode.predictImpl(features).impurityStats.stats
+      val classCounts = tree.rootNode.predictImpl(features).impurityStats.stats
       val total = classCounts.sum
       if (total != 0) {
         var i = 0
@@ -259,7 +289,8 @@ class RandomForestClassificationModel private[ml] (
 
   @Since("1.4.0")
   override def toString: String = {
-    s"RandomForestClassificationModel (uid=$uid) with $getNumTrees trees"
+    s"RandomForestClassificationModel: uid=$uid, numTrees=$getNumTrees, numClasses=$numClasses, " +
+      s"numFeatures=$numFeatures"
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 4ad0cb55b0078..6c7112b80569f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -28,10 +28,12 @@ import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.clustering.{BisectingKMeans => MLlibBisectingKMeans,
   BisectingKMeansModel => MLlibBisectingKMeansModel}
+import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.sql.{DataFrame, Dataset}
-import org.apache.spark.sql.functions.udf
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType}
 import org.apache.spark.storage.StorageLevel
 
 
@@ -39,7 +41,8 @@ import org.apache.spark.storage.StorageLevel
  * Common params for BisectingKMeans and BisectingKMeansModel
  */
 private[clustering] trait BisectingKMeansParams extends Params with HasMaxIter
-  with HasFeaturesCol with HasSeed with HasPredictionCol with HasDistanceMeasure {
+  with HasFeaturesCol with HasSeed with HasPredictionCol with HasDistanceMeasure
+  with HasWeightCol {
 
   /**
    * The desired number of leaf clusters. Must be &gt; 1. Default: 4.
@@ -91,6 +94,9 @@ class BisectingKMeansModel private[ml] (
   extends Model[BisectingKMeansModel] with BisectingKMeansParams with MLWritable
   with HasTrainingSummary[BisectingKMeansSummary] {
 
+  @Since("3.0.0")
+  lazy val numFeatures: Int = parentModel.clusterCenters.head.size
+
   @Since("2.0.0")
   override def copy(extra: ParamMap): BisectingKMeansModel = {
     val copied = copyValues(new BisectingKMeansModel(uid, parentModel), extra)
@@ -107,15 +113,21 @@ class BisectingKMeansModel private[ml] (
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
     val predictUDF = udf((vector: Vector) => predict(vector))
     dataset.withColumn($(predictionCol),
-      predictUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol)))
+      predictUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol)),
+      outputSchema($(predictionCol)).metadata)
   }
 
   @Since("2.0.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumValues(outputSchema,
+        $(predictionCol), parentModel.k)
+    }
+    outputSchema
   }
 
   @Since("3.0.0")
@@ -145,6 +157,12 @@ class BisectingKMeansModel private[ml] (
   @Since("2.0.0")
   override def write: MLWriter = new BisectingKMeansModel.BisectingKMeansModelWriter(this)
 
+  @Since("3.0.0")
+  override def toString: String = {
+    s"BisectingKMeansModel: uid=$uid, k=${parentModel.k}, distanceMeasure=${$(distanceMeasure)}, " +
+      s"numFeatures=$numFeatures"
+  }
+
   /**
    * Gets summary of model on training set. An exception is
    * thrown if `hasSummary` is false.
@@ -246,20 +264,39 @@ class BisectingKMeans @Since("2.0.0") (
   @Since("2.4.0")
   def setDistanceMeasure(value: String): this.type = set(distanceMeasure, value)
 
+  /**
+   * Sets the value of param [[weightCol]].
+   * If this is not set or empty, we treat all instance weights as 1.0.
+   * Default is not set, so all instances have weight one.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): BisectingKMeansModel = instrumented { instr =>
     transformSchema(dataset.schema, logging = true)
 
     val handlePersistence = dataset.storageLevel == StorageLevel.NONE
-    val rdd = DatasetUtils.columnToOldVector(dataset, getFeaturesCol)
+    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
+      col($(weightCol)).cast(DoubleType)
+    } else {
+      lit(1.0)
+    }
+
+    val instances: RDD[(OldVector, Double)] = dataset
+      .select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w).rdd.map {
+      case Row(point: Vector, weight: Double) => (OldVectors.fromML(point), weight)
+    }
     if (handlePersistence) {
-      rdd.persist(StorageLevel.MEMORY_AND_DISK)
+      instances.persist(StorageLevel.MEMORY_AND_DISK)
     }
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, featuresCol, predictionCol, k, maxIter, seed,
-      minDivisibleClusterSize, distanceMeasure)
+      minDivisibleClusterSize, distanceMeasure, weightCol)
 
     val bkm = new MLlibBisectingKMeans()
       .setK($(k))
@@ -267,10 +304,10 @@ class BisectingKMeans @Since("2.0.0") (
       .setMinDivisibleClusterSize($(minDivisibleClusterSize))
       .setSeed($(seed))
       .setDistanceMeasure($(distanceMeasure))
-    val parentModel = bkm.run(rdd, Some(instr))
+    val parentModel = bkm.runWithWeight(instances, Some(instr))
     val model = copyValues(new BisectingKMeansModel(uid, parentModel).setParent(this))
     if (handlePersistence) {
-      rdd.unpersist()
+      instances.unpersist()
     }
 
     val summary = new BisectingKMeansSummary(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
index 41718920c197d..c90798d8e1f63 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/ClusteringSummary.scala
@@ -45,7 +45,7 @@ class ClusteringSummary private[clustering] (
    * Size of (number of data points in) each cluster.
    */
   lazy val clusterSizes: Array[Long] = {
-    val sizes = Array.fill[Long](k)(0)
+    val sizes = Array.ofDim[Long](k)
     cluster.groupBy(predictionCol).count().select(predictionCol, "count").collect().foreach {
       case Row(cluster: Int, count: Long) => sizes(cluster) = count
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 86caa1247e77f..d779e602545cf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -33,9 +33,9 @@ import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.linalg.{Matrices => OldMatrices, Matrix => OldMatrix,
   Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession}
-import org.apache.spark.sql.functions.udf
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
 
 
@@ -43,7 +43,8 @@ import org.apache.spark.storage.StorageLevel
  * Common params for GaussianMixture and GaussianMixtureModel
  */
 private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter with HasFeaturesCol
-  with HasSeed with HasPredictionCol with HasProbabilityCol with HasTol {
+  with HasSeed with HasPredictionCol with HasWeightCol with HasProbabilityCol with HasTol
+  with HasAggregationDepth {
 
   /**
    * Number of independent Gaussians in the mixture model. Must be greater than 1. Default: 2.
@@ -89,6 +90,9 @@ class GaussianMixtureModel private[ml] (
   extends Model[GaussianMixtureModel] with GaussianMixtureParams with MLWritable
   with HasTrainingSummary[GaussianMixtureSummary] {
 
+  @Since("3.0.0")
+  lazy val numFeatures: Int = gaussians.head.mean.size
+
   /** @group setParam */
   @Since("2.1.0")
   def setFeaturesCol(value: String): this.type = set(featuresCol, value)
@@ -109,35 +113,51 @@ class GaussianMixtureModel private[ml] (
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
-    var predictionColNames = Seq.empty[String]
-    var predictionColumns = Seq.empty[Column]
-
-    if ($(predictionCol).nonEmpty) {
-      val predUDF = udf((vector: Vector) => predict(vector))
-      predictionColNames :+= $(predictionCol)
-      predictionColumns :+= predUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol))
-    }
+    val vectorCol = DatasetUtils.columnToVector(dataset, $(featuresCol))
+    var outputData = dataset
+    var numColsOutput = 0
 
     if ($(probabilityCol).nonEmpty) {
       val probUDF = udf((vector: Vector) => predictProbability(vector))
-      predictionColNames :+= $(probabilityCol)
-      predictionColumns :+= probUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol))
+      outputData = outputData.withColumn($(probabilityCol), probUDF(vectorCol),
+        outputSchema($(probabilityCol)).metadata)
+      numColsOutput += 1
     }
 
-    if (predictionColNames.nonEmpty) {
-      dataset.withColumns(predictionColNames, predictionColumns)
-    } else {
+    if ($(predictionCol).nonEmpty) {
+      if ($(probabilityCol).nonEmpty) {
+        val predUDF = udf((vector: Vector) => vector.argmax)
+        outputData = outputData.withColumn($(predictionCol), predUDF(col($(probabilityCol))),
+          outputSchema($(predictionCol)).metadata)
+      } else {
+        val predUDF = udf((vector: Vector) => predict(vector))
+        outputData = outputData.withColumn($(predictionCol), predUDF(vectorCol),
+          outputSchema($(predictionCol)).metadata)
+      }
+      numColsOutput += 1
+    }
+
+    if (numColsOutput == 0) {
       this.logWarning(s"$uid: GaussianMixtureModel.transform() does nothing" +
         " because no output columns were set.")
-      dataset.toDF()
     }
+    outputData.toDF
   }
 
   @Since("2.0.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumValues(outputSchema,
+        $(predictionCol), weights.length)
+    }
+    if ($(probabilityCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(probabilityCol), weights.length)
+    }
+    outputSchema
   }
 
   @Since("3.0.0")
@@ -182,6 +202,11 @@ class GaussianMixtureModel private[ml] (
   @Since("2.0.0")
   override def write: MLWriter = new GaussianMixtureModel.GaussianMixtureModelWriter(this)
 
+  @Since("3.0.0")
+  override def toString: String = {
+    s"GaussianMixtureModel: uid=$uid, k=${weights.length}, numFeatures=$numFeatures"
+  }
+
   /**
    * Gets summary of model on training set. An exception is
    * thrown if `hasSummary` is false.
@@ -321,6 +346,10 @@ class GaussianMixture @Since("2.0.0") (
   @Since("2.0.0")
   def setProbabilityCol(value: String): this.type = set(probabilityCol, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   /** @group setParam */
   @Since("2.0.0")
   def setK(value: Int): this.type = set(k, value)
@@ -337,6 +366,10 @@ class GaussianMixture @Since("2.0.0") (
   @Since("2.0.0")
   def setSeed(value: Long): this.type = set(seed, value)
 
+  /** @group expertSetParam */
+  @Since("3.0.0")
+  def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
+
   /**
    * Number of samples per cluster to use when initializing Gaussians.
    */
@@ -346,28 +379,37 @@ class GaussianMixture @Since("2.0.0") (
   override def fit(dataset: Dataset[_]): GaussianMixtureModel = instrumented { instr =>
     transformSchema(dataset.schema, logging = true)
 
-    val sc = dataset.sparkSession.sparkContext
-    val numClusters = $(k)
+    val spark = dataset.sparkSession
+    import spark.implicits._
+
+    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    require(numFeatures < GaussianMixture.MAX_NUM_FEATURES, s"GaussianMixture cannot handle more " +
+      s"than ${GaussianMixture.MAX_NUM_FEATURES} features because the size of the covariance" +
+      s" matrix is quadratic in the number of features.")
 
     val handlePersistence = dataset.storageLevel == StorageLevel.NONE
-    val instances = dataset
-      .select(DatasetUtils.columnToVector(dataset, getFeaturesCol)).rdd.map {
-      case Row(features: Vector) => features
+
+    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
+      col($(weightCol)).cast(DoubleType)
+    } else {
+      lit(1.0)
     }
 
+    val instances = dataset.select(DatasetUtils.columnToVector(dataset, $(featuresCol)), w)
+      .as[(Vector, Double)]
+      .rdd
+
     if (handlePersistence) {
       instances.persist(StorageLevel.MEMORY_AND_DISK)
     }
 
-    // Extract the number of features.
-    val numFeatures = instances.first().size
-    require(numFeatures < GaussianMixture.MAX_NUM_FEATURES, s"GaussianMixture cannot handle more " +
-      s"than ${GaussianMixture.MAX_NUM_FEATURES} features because the size of the covariance" +
-      s" matrix is quadratic in the number of features.")
+    val sc = spark.sparkContext
+    val numClusters = $(k)
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
-    instr.logParams(this, featuresCol, predictionCol, probabilityCol, k, maxIter, seed, tol)
+    instr.logParams(this, featuresCol, predictionCol, probabilityCol, weightCol, k, maxIter,
+      seed, tol, aggregationDepth)
     instr.logNumFeatures(numFeatures)
 
     val shouldDistributeGaussians = GaussianMixture.shouldDistributeGaussians(
@@ -382,25 +424,22 @@ class GaussianMixture @Since("2.0.0") (
     var iter = 0
     while (iter < $(maxIter) && math.abs(logLikelihood - logLikelihoodPrev) > $(tol)) {
 
-      val bcWeights = instances.sparkContext.broadcast(weights)
-      val bcGaussians = instances.sparkContext.broadcast(gaussians)
+      val bcWeights = sc.broadcast(weights)
+      val bcGaussians = sc.broadcast(gaussians)
 
       // aggregate the cluster contribution for all sample points
       val sums = instances.treeAggregate(
         new ExpectationAggregator(numFeatures, bcWeights, bcGaussians))(
-        seqOp = (c, v) => (c, v) match {
-          case (aggregator, instance) => aggregator.add(instance)
-        },
-        combOp = (c1, c2) => (c1, c2) match {
-          case (aggregator1, aggregator2) => aggregator1.merge(aggregator2)
-        })
+        seqOp = (c: ExpectationAggregator, v: (Vector, Double)) => c.add(v._1, v._2),
+        combOp = (c1: ExpectationAggregator, c2: ExpectationAggregator) => c1.merge(c2),
+        depth = $(aggregationDepth))
 
       bcWeights.destroy()
       bcGaussians.destroy()
 
       if (iter == 0) {
-        val numSamples = sums.count
-        instr.logNumExamples(numSamples)
+        instr.logNumExamples(sums.count)
+        instr.logSumOfWeights(sums.weights.sum)
       }
 
       /*
@@ -470,21 +509,32 @@ class GaussianMixture @Since("2.0.0") (
    *         we only save the upper triangular part as a dense vector (column major).
    */
   private def initRandom(
-      instances: RDD[Vector],
+      instances: RDD[(Vector, Double)],
       numClusters: Int,
       numFeatures: Int): (Array[Double], Array[(DenseVector, DenseVector)]) = {
-    val samples = instances.takeSample(withReplacement = true, numClusters * numSamples, $(seed))
-    val weights: Array[Double] = Array.fill(numClusters)(1.0 / numClusters)
-    val gaussians: Array[(DenseVector, DenseVector)] = Array.tabulate(numClusters) { i =>
-      val slice = samples.view(i * numSamples, (i + 1) * numSamples)
+    val (samples, sampleWeights) = instances
+      .takeSample(withReplacement = true, numClusters * numSamples, $(seed))
+      .unzip
+
+    val weights = new Array[Double](numClusters)
+    val weightSum = sampleWeights.sum
+
+    val gaussians = Array.tabulate(numClusters) { i =>
+      val start = i * numSamples
+      val end = start + numSamples
+      val sampleSlice = samples.view(start, end)
+      val weightSlice = sampleWeights.view(start, end)
+      val localWeightSum = weightSlice.sum
+      weights(i) = localWeightSum / weightSum
+
       val mean = {
         val v = new DenseVector(new Array[Double](numFeatures))
-        var i = 0
-        while (i < numSamples) {
-          BLAS.axpy(1.0, slice(i), v)
-          i += 1
+        var j = 0
+        while (j < numSamples) {
+          BLAS.axpy(weightSlice(j), sampleSlice(j), v)
+          j += 1
         }
-        BLAS.scal(1.0 / numSamples, v)
+        BLAS.scal(1.0 / localWeightSum, v)
         v
       }
       /*
@@ -496,11 +546,15 @@ class GaussianMixture @Since("2.0.0") (
        */
       val cov = {
         val ss = new DenseVector(new Array[Double](numFeatures)).asBreeze
-        slice.foreach(xi => ss += (xi.asBreeze - mean.asBreeze) ^:^ 2.0)
+        var j = 0
+        while (j < numSamples) {
+          val v = sampleSlice(j).asBreeze - mean.asBreeze
+          ss += (v * v) * weightSlice(j)
+          j += 1
+        }
         val diagVec = Vectors.fromBreeze(ss)
-        BLAS.scal(1.0 / numSamples, diagVec)
-        val covVec = new DenseVector(Array.fill[Double](
-          numFeatures * (numFeatures + 1) / 2)(0.0))
+        BLAS.scal(1.0 / localWeightSum, diagVec)
+        val covVec = new DenseVector(Array.ofDim[Double](numFeatures * (numFeatures + 1) / 2))
         diagVec.toArray.zipWithIndex.foreach { case (v: Double, i: Int) =>
           covVec.values(i + i * (i + 1) / 2) = v
         }
@@ -604,11 +658,11 @@ private class ExpectationAggregator(
   private val k: Int = bcWeights.value.length
   private var totalCnt: Long = 0L
   private var newLogLikelihood: Double = 0.0
-  private lazy val newWeights: Array[Double] = new Array[Double](k)
+  private lazy val newWeights: Array[Double] = Array.ofDim[Double](k)
   private lazy val newMeans: Array[DenseVector] = Array.fill(k)(
-    new DenseVector(Array.fill[Double](numFeatures)(0.0)))
+    new DenseVector(Array.ofDim[Double](numFeatures)))
   private lazy val newCovs: Array[DenseVector] = Array.fill(k)(
-    new DenseVector(Array.fill[Double](numFeatures * (numFeatures + 1) / 2)(0.0)))
+    new DenseVector(Array.ofDim[Double](numFeatures * (numFeatures + 1) / 2)))
 
   @transient private lazy val oldGaussians = {
     bcGaussians.value.map { case (mean, covVec) =>
@@ -632,9 +686,10 @@ private class ExpectationAggregator(
    * means and covariances for each distributions, and update the log likelihood.
    *
    * @param instance The instance of data point to be added.
+   * @param weight The instance weight.
    * @return This ExpectationAggregator object.
    */
-  def add(instance: Vector): this.type = {
+  def add(instance: Vector, weight: Double): this.type = {
     val localWeights = bcWeights.value
     val localOldGaussians = oldGaussians
 
@@ -648,16 +703,16 @@ private class ExpectationAggregator(
       i += 1
     }
 
-    newLogLikelihood += math.log(probSum)
+    newLogLikelihood += math.log(probSum) * weight
     val localNewWeights = newWeights
     val localNewMeans = newMeans
     val localNewCovs = newCovs
     i = 0
     while (i < k) {
-      prob(i) /= probSum
-      localNewWeights(i) += prob(i)
-      BLAS.axpy(prob(i), instance, localNewMeans(i))
-      BLAS.spr(prob(i), instance, localNewCovs(i))
+      val w = prob(i) / probSum * weight
+      localNewWeights(i) += w
+      BLAS.axpy(w, instance, localNewMeans(i))
+      BLAS.spr(w, instance, localNewCovs(i))
       i += 1
     }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 5cc0f38c67e71..a42c920e24987 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -31,9 +31,10 @@ import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.clustering.{DistanceMeasure, KMeans => MLlibKMeans, KMeansModel => MLlibKMeansModel}
 import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
-import org.apache.spark.sql.functions.udf
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.VersionUtils.majorVersion
 
@@ -41,7 +42,7 @@ import org.apache.spark.util.VersionUtils.majorVersion
  * Common params for KMeans and KMeansModel
  */
 private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFeaturesCol
-  with HasSeed with HasPredictionCol with HasTol with HasDistanceMeasure {
+  with HasSeed with HasPredictionCol with HasTol with HasDistanceMeasure with HasWeightCol {
 
   /**
    * The number of clusters to create (k). Must be &gt; 1. Note that it is possible for fewer than
@@ -108,6 +109,9 @@ class KMeansModel private[ml] (
   extends Model[KMeansModel] with KMeansParams with GeneralMLWritable
     with HasTrainingSummary[KMeansSummary] {
 
+  @Since("3.0.0")
+  lazy val numFeatures: Int = parentModel.clusterCenters.head.size
+
   @Since("1.5.0")
   override def copy(extra: ParamMap): KMeansModel = {
     val copied = copyValues(new KMeansModel(uid, parentModel), extra)
@@ -124,17 +128,23 @@ class KMeansModel private[ml] (
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val predictUDF = udf((vector: Vector) => predict(vector))
 
     dataset.withColumn($(predictionCol),
-      predictUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol)))
+      predictUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol)),
+      outputSchema($(predictionCol)).metadata)
   }
 
   @Since("1.5.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumValues(outputSchema,
+        $(predictionCol), parentModel.k)
+    }
+    outputSchema
   }
 
   @Since("3.0.0")
@@ -153,6 +163,12 @@ class KMeansModel private[ml] (
   @Since("1.6.0")
   override def write: GeneralMLWriter = new GeneralMLWriter(this)
 
+  @Since("3.0.0")
+  override def toString: String = {
+    s"KMeansModel: uid=$uid, k=${parentModel.k}, distanceMeasure=${$(distanceMeasure)}, " +
+      s"numFeatures=$numFeatures"
+  }
+
   /**
    * Gets summary of model on training set. An exception is
    * thrown if `hasSummary` is false.
@@ -304,12 +320,31 @@ class KMeans @Since("1.5.0") (
   @Since("1.5.0")
   def setSeed(value: Long): this.type = set(seed, value)
 
+  /**
+   * Sets the value of param [[weightCol]].
+   * If this is not set or empty, we treat all instance weights as 1.0.
+   * Default is not set, so all instances have weight one.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): KMeansModel = instrumented { instr =>
     transformSchema(dataset.schema, logging = true)
 
     val handlePersistence = dataset.storageLevel == StorageLevel.NONE
-    val instances = DatasetUtils.columnToOldVector(dataset, getFeaturesCol)
+    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
+      col($(weightCol)).cast(DoubleType)
+    } else {
+      lit(1.0)
+    }
+
+    val instances: RDD[(OldVector, Double)] = dataset
+      .select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w).rdd.map {
+      case Row(point: Vector, weight: Double) => (OldVectors.fromML(point), weight)
+    }
 
     if (handlePersistence) {
       instances.persist(StorageLevel.MEMORY_AND_DISK)
@@ -318,7 +353,7 @@ class KMeans @Since("1.5.0") (
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, featuresCol, predictionCol, k, initMode, initSteps, distanceMeasure,
-      maxIter, seed, tol)
+      maxIter, seed, tol, weightCol)
     val algo = new MLlibKMeans()
       .setK($(k))
       .setInitializationMode($(initMode))
@@ -327,7 +362,7 @@ class KMeans @Since("1.5.0") (
       .setSeed($(seed))
       .setEpsilon($(tol))
       .setDistanceMeasure($(distanceMeasure))
-    val parentModel = algo.run(instances, Option(instr))
+    val parentModel = algo.runWithWeight(instances, Option(instr))
     val model = copyValues(new KMeansModel(uid, parentModel).setParent(this))
     val summary = new KMeansSummary(
       model.transform(dataset),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 91201e7bd03f9..e30be8c20dcc3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -459,12 +459,13 @@ abstract class LDAModel private[ml] (
    */
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val func = getTopicDistributionMethod
     val transformer = udf(func)
     dataset.withColumn($(topicDistributionCol),
-      transformer(DatasetUtils.columnToVector(dataset, getFeaturesCol)))
+      transformer(DatasetUtils.columnToVector(dataset, getFeaturesCol)),
+      outputSchema($(topicDistributionCol)).metadata)
   }
 
   /**
@@ -504,7 +505,12 @@ abstract class LDAModel private[ml] (
 
   @Since("1.6.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(topicDistributionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(topicDistributionCol), oldLocalModel.k)
+    }
+    outputSchema
   }
 
   /**
@@ -620,6 +626,11 @@ class LocalLDAModel private[ml] (
 
   @Since("1.6.0")
   override def write: MLWriter = new LocalLDAModel.LocalLDAModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"LocalLDAModel: uid=$uid, k=${$(k)}, numFeatures=$vocabSize"
+  }
 }
 
 
@@ -783,6 +794,11 @@ class DistributedLDAModel private[ml] (
 
   @Since("1.6.0")
   override def write: MLWriter = new DistributedLDAModel.DistributedWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"DistributedLDAModel: uid=$uid, k=${$(k)}, numFeatures=$vocabSize"
+  }
 }
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
index 2a7b3c579b078..55b910e98d405 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -59,6 +59,28 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
   @Since("1.2.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
+  /**
+   * param for number of bins to down-sample the curves (ROC curve, PR curve) in area
+   * computation. If 0, no down-sampling will occur.
+   * Default: 1000.
+   * @group expertParam
+   */
+  @Since("3.0.0")
+  val numBins: IntParam = new IntParam(this, "numBins", "Number of bins to down-sample " +
+    "the curves (ROC curve, PR curve) in area computation. If 0, no down-sampling will occur. " +
+    "Must be >= 0.",
+    ParamValidators.gtEq(0))
+
+  /** @group expertGetParam */
+  @Since("3.0.0")
+  def getNumBins: Int = $(numBins)
+
+  /** @group expertSetParam */
+  @Since("3.0.0")
+  def setNumBins(value: Int): this.type = set(numBins, value)
+
+  setDefault(numBins -> 1000)
+
   /** @group setParam */
   @Since("1.5.0")
   def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value)
@@ -94,7 +116,7 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
         case Row(rawPrediction: Double, label: Double, weight: Double) =>
           (rawPrediction, label, weight)
       }
-    val metrics = new BinaryClassificationMetrics(scoreAndLabelsWithWeights)
+    val metrics = new BinaryClassificationMetrics(scoreAndLabelsWithWeights, $(numBins))
     val metric = $(metricName) match {
       case "areaUnderROC" => metrics.areaUnderROC()
       case "areaUnderPR" => metrics.areaUnderPR()
@@ -104,13 +126,16 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
   }
 
   @Since("1.5.0")
-  override def isLargerBetter: Boolean = $(metricName) match {
-    case "areaUnderROC" => true
-    case "areaUnderPR" => true
-  }
+  override def isLargerBetter: Boolean = true
 
   @Since("1.4.1")
   override def copy(extra: ParamMap): BinaryClassificationEvaluator = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"BinaryClassificationEvaluator: uid=$uid, metricName=${$(metricName)}, " +
+      s"numBins=${$(numBins)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
index 868bd2a763f5e..2b3f431fbfe33 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
@@ -20,7 +20,6 @@ package org.apache.spark.ml.evaluation
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.{BLAS, DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasPredictionCol}
@@ -120,6 +119,12 @@ class ClusteringEvaluator @Since("2.3.0") (@Since("2.3.0") override val uid: Str
         throw new IllegalArgumentException(s"No support for metric $mn, distance $dm")
     }
   }
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"ClusteringEvaluator: uid=$uid, metricName=${$(metricName)}, " +
+      s"distanceMeasure=${$(distanceMeasure)}"
+  }
 }
 
 
@@ -173,15 +178,6 @@ private[evaluation] abstract class Silhouette {
   def overallScore(df: DataFrame, scoreColumn: Column): Double = {
     df.select(avg(scoreColumn)).collect()(0).getDouble(0)
   }
-
-  protected def getNumberOfFeatures(dataFrame: DataFrame, columnName: String): Int = {
-    val group = AttributeGroup.fromStructField(dataFrame.schema(columnName))
-    if (group.size < 0) {
-      dataFrame.select(col(columnName)).first().getAs[Vector](0).size
-    } else {
-      group.size
-    }
-  }
 }
 
 /**
@@ -372,7 +368,7 @@ private[evaluation] object SquaredEuclideanSilhouette extends Silhouette {
     df: DataFrame,
     predictionCol: String,
     featuresCol: String): Map[Double, ClusterStats] = {
-    val numFeatures = getNumberOfFeatures(df, featuresCol)
+    val numFeatures = MetadataUtils.getNumFeatures(df, featuresCol)
     val clustersStatsRDD = df.select(
         col(predictionCol).cast(DoubleType), col(featuresCol), col("squaredNorm"))
       .rdd
@@ -568,7 +564,7 @@ private[evaluation] object CosineSilhouette extends Silhouette {
       df: DataFrame,
       featuresCol: String,
       predictionCol: String): Map[Double, (Vector, Long)] = {
-    val numFeatures = getNumberOfFeatures(df, featuresCol)
+    val numFeatures = MetadataUtils.getNumFeatures(df, featuresCol)
     val clustersStatsRDD = df.select(
       col(predictionCol).cast(DoubleType), col(normalizedFeaturesColName))
       .rdd
@@ -631,10 +627,8 @@ private[evaluation] object CosineSilhouette extends Silhouette {
     val normalizeFeatureUDF = udf {
       features: Vector => {
         val norm = Vectors.norm(features, 2.0)
-        features match {
-          case d: DenseVector => Vectors.dense(d.values.map(_ / norm))
-          case s: SparseVector => Vectors.sparse(s.size, s.indices, s.values.map(_ / norm))
-        }
+        BLAS.scal(1.0 / norm, features)
+        features
       }
     }
     val dfWithNormalizedFeatures = dataset.withColumn(normalizedFeaturesColName,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
index 85a6138c98a46..435708186242f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
@@ -18,21 +18,23 @@
 package org.apache.spark.ml.evaluation
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
-import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol}
-import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util._
 import org.apache.spark.mllib.evaluation.MulticlassMetrics
 import org.apache.spark.sql.{Dataset, Row}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.DoubleType
 
 /**
- * Evaluator for multiclass classification, which expects two input columns: prediction and label.
+ * Evaluator for multiclass classification, which expects input columns: prediction, label,
+ * weight (optional) and probability (only for logLoss).
  */
 @Since("1.5.0")
 class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") override val uid: String)
-  extends Evaluator with HasPredictionCol with HasLabelCol
-    with HasWeightCol with DefaultParamsWritable {
+  extends Evaluator with HasPredictionCol with HasLabelCol with HasWeightCol
+    with HasProbabilityCol with DefaultParamsWritable {
 
   import MulticlassClassificationEvaluator.supportedMetricNames
 
@@ -40,8 +42,12 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
   def this() = this(Identifiable.randomUID("mcEval"))
 
   /**
-   * param for metric name in evaluation (supports `"f1"` (default), `"weightedPrecision"`,
-   * `"weightedRecall"`, `"accuracy"`)
+   * param for metric name in evaluation (supports `"f1"` (default), `"accuracy"`,
+   * `"weightedPrecision"`, `"weightedRecall"`, `"weightedTruePositiveRate"`,
+   * `"weightedFalsePositiveRate"`, `"weightedFMeasure"`, `"truePositiveRateByLabel"`,
+   * `"falsePositiveRateByLabel"`, `"precisionByLabel"`, `"recallByLabel"`,
+   * `"fMeasureByLabel"`, `"logLoss"`, `"hammingLoss"`)
+   *
    * @group param
    */
   @Since("1.5.0")
@@ -71,6 +77,10 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
   @Since("3.0.0")
   def setWeightCol(value: String): this.type = set(weightCol, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setProbabilityCol(value: String): this.type = set(probabilityCol, value)
+
   @Since("3.0.0")
   final val metricLabel: DoubleParam = new DoubleParam(this, "metricLabel",
     "The class whose metric will be computed in " +
@@ -104,6 +114,21 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
 
   setDefault(beta -> 1.0)
 
+  @Since("3.0.0")
+  final val eps: DoubleParam = new DoubleParam(this, "eps",
+    "log-loss is undefined for p=0 or p=1, so probabilities are clipped to " +
+      "max(eps, min(1 - eps, p)).",
+    ParamValidators.inRange(0, 0.5, false, false))
+
+  /** @group getParam */
+  @Since("3.0.0")
+  def getEps: Double = $(eps)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setEps(value: Double): this.type = set(eps, value)
+
+  setDefault(eps -> 1e-15)
 
   @Since("2.0.0")
   override def evaluate(dataset: Dataset[_]): Double = {
@@ -111,13 +136,29 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
     SchemaUtils.checkColumnType(schema, $(predictionCol), DoubleType)
     SchemaUtils.checkNumericType(schema, $(labelCol))
 
-    val predictionAndLabelsWithWeights =
-      dataset.select(col($(predictionCol)), col($(labelCol)).cast(DoubleType),
-        if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol)))
+    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
+      col($(weightCol)).cast(DoubleType)
+    } else {
+      lit(1.0)
+    }
+
+    val rdd = if ($(metricName) == "logLoss") {
+      // probabilityCol is only needed to compute logloss
+      require(isDefined(probabilityCol) && $(probabilityCol).nonEmpty)
+      val p = DatasetUtils.columnToVector(dataset, $(probabilityCol))
+      dataset.select(col($(predictionCol)), col($(labelCol)).cast(DoubleType), w, p)
+        .rdd.map {
+        case Row(prediction: Double, label: Double, weight: Double, probability: Vector) =>
+          (prediction, label, weight, probability.toArray)
+      }
+    } else {
+      dataset.select(col($(predictionCol)), col($(labelCol)).cast(DoubleType), w)
         .rdd.map {
         case Row(prediction: Double, label: Double, weight: Double) => (prediction, label, weight)
       }
-    val metrics = new MulticlassMetrics(predictionAndLabelsWithWeights)
+    }
+
+    val metrics = new MulticlassMetrics(rdd)
     $(metricName) match {
       case "f1" => metrics.weightedFMeasure
       case "accuracy" => metrics.accuracy
@@ -131,20 +172,26 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
       case "precisionByLabel" => metrics.precision($(metricLabel))
       case "recallByLabel" => metrics.recall($(metricLabel))
       case "fMeasureByLabel" => metrics.fMeasure($(metricLabel), $(beta))
+      case "hammingLoss" => metrics.hammingLoss
+      case "logLoss" => metrics.logLoss($(eps))
     }
   }
 
   @Since("1.5.0")
-  override def isLargerBetter: Boolean = {
-    $(metricName) match {
-      case "weightedFalsePositiveRate" => false
-      case "falsePositiveRateByLabel" => false
-      case _ => true
-    }
+  override def isLargerBetter: Boolean = $(metricName) match {
+    case "weightedFalsePositiveRate" | "falsePositiveRateByLabel" | "logLoss" | "hammingLoss" =>
+      false
+    case _ => true
   }
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): MulticlassClassificationEvaluator = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"MulticlassClassificationEvaluator: uid=$uid, metricName=${$(metricName)}, " +
+      s"metricLabel=${$(metricLabel)}, beta=${$(beta)}, eps=${$(eps)}"
+  }
 }
 
 @Since("1.6.0")
@@ -154,7 +201,7 @@ object MulticlassClassificationEvaluator
   private val supportedMetricNames = Array("f1", "accuracy", "weightedPrecision", "weightedRecall",
     "weightedTruePositiveRate", "weightedFalsePositiveRate", "weightedFMeasure",
     "truePositiveRateByLabel", "falsePositiveRateByLabel", "precisionByLabel", "recallByLabel",
-    "fMeasureByLabel")
+    "fMeasureByLabel", "logLoss", "hammingLoss")
 
   @Since("1.6.0")
   override def load(path: String): MulticlassClassificationEvaluator = super.load(path)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
index f12c6700be042..5216c40819b06 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MultilabelClassificationEvaluator.scala
@@ -121,6 +121,12 @@ class MultilabelClassificationEvaluator (override val uid: String)
   }
 
   override def copy(extra: ParamMap): MultilabelClassificationEvaluator = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"MultilabelClassificationEvaluator: uid=$uid, metricName=${$(metricName)}, " +
+      s"metricLabel=${$(metricLabel)}"
+  }
 }
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala
index 64ab3c3f7fddd..ca3a8ebc1659d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RankingEvaluator.scala
@@ -105,6 +105,11 @@ class RankingEvaluator (override val uid: String)
   override def isLargerBetter: Boolean = true
 
   override def copy(extra: ParamMap): RankingEvaluator = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"RankingEvaluator: uid=$uid, metricName=${$(metricName)}, k=${$(k)}"
+  }
 }
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
index dd667a85fa598..9f32d40d166bd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml.evaluation
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
+import org.apache.spark.ml.param.{BooleanParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol}
 import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
 import org.apache.spark.mllib.evaluation.RegressionMetrics
@@ -43,13 +43,14 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
    *  - `"mse"`: mean squared error
    *  - `"r2"`: R^2^ metric
    *  - `"mae"`: mean absolute error
+   *  - `"var"`: explained variance
    *
    * @group param
    */
   @Since("1.4.0")
   val metricName: Param[String] = {
-    val allowedParams = ParamValidators.inArray(Array("mse", "rmse", "r2", "mae"))
-    new Param(this, "metricName", "metric name in evaluation (mse|rmse|r2|mae)", allowedParams)
+    val allowedParams = ParamValidators.inArray(Array("mse", "rmse", "r2", "mae", "var"))
+    new Param(this, "metricName", "metric name in evaluation (mse|rmse|r2|mae|var)", allowedParams)
   }
 
   /** @group getParam */
@@ -60,6 +61,25 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
   @Since("1.4.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
+  /**
+   * param for whether the regression is through the origin.
+   * Default: false.
+   * @group expertParam
+   */
+  @Since("3.0.0")
+  val throughOrigin: BooleanParam = new BooleanParam(this, "throughOrigin",
+    "Whether the regression is through the origin.")
+
+  /** @group expertGetParam */
+  @Since("3.0.0")
+  def getThroughOrigin: Boolean = $(throughOrigin)
+
+  /** @group expertSetParam */
+  @Since("3.0.0")
+  def setThroughOrigin(value: Boolean): this.type = set(throughOrigin, value)
+
+  setDefault(throughOrigin -> false)
+
   /** @group setParam */
   @Since("1.4.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
@@ -86,26 +106,30 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
       .rdd
       .map { case Row(prediction: Double, label: Double, weight: Double) =>
         (prediction, label, weight) }
-    val metrics = new RegressionMetrics(predictionAndLabelsWithWeights)
-    val metric = $(metricName) match {
+    val metrics = new RegressionMetrics(predictionAndLabelsWithWeights, $(throughOrigin))
+    $(metricName) match {
       case "rmse" => metrics.rootMeanSquaredError
       case "mse" => metrics.meanSquaredError
       case "r2" => metrics.r2
       case "mae" => metrics.meanAbsoluteError
+      case "var" => metrics.explainedVariance
     }
-    metric
   }
 
   @Since("1.4.0")
   override def isLargerBetter: Boolean = $(metricName) match {
-    case "rmse" => false
-    case "mse" => false
-    case "r2" => true
-    case "mae" => false
+    case "r2" | "var" => true
+    case _ => false
   }
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): RegressionEvaluator = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"RegressionEvaluator: uid=$uid, metricName=${$(metricName)}, " +
+      s"throughOrigin=${$(throughOrigin)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index 2b0862c60fdf7..5ed7619fce5dc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -21,10 +21,10 @@ import scala.collection.mutable.ArrayBuilder
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
-import org.apache.spark.ml.attribute.BinaryAttribute
+import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
@@ -32,10 +32,17 @@ import org.apache.spark.sql.types._
 
 /**
  * Binarize a column of continuous features given a threshold.
+ *
+ * Since 3.0.0,
+ * `Binarize` can map multiple columns at once by setting the `inputCols` parameter. Note that
+ * when both the `inputCol` and `inputCols` parameters are set, an Exception will be thrown. The
+ * `threshold` parameter is used for single column usage, and `thresholds` is for multiple
+ * columns.
  */
 @Since("1.4.0")
 final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
-  extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
+  extends Transformer with HasThreshold with HasThresholds with HasInputCol with HasOutputCol
+    with HasInputCols with HasOutputCols with DefaultParamsWritable {
 
   @Since("1.4.0")
   def this() = this(Identifiable.randomUID("binarizer"))
@@ -48,19 +55,32 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
    * @group param
    */
   @Since("1.4.0")
-  val threshold: DoubleParam =
+  override val threshold: DoubleParam =
     new DoubleParam(this, "threshold", "threshold used to binarize continuous features")
 
-  /** @group getParam */
-  @Since("1.4.0")
-  def getThreshold: Double = $(threshold)
-
   /** @group setParam */
   @Since("1.4.0")
   def setThreshold(value: Double): this.type = set(threshold, value)
 
   setDefault(threshold -> 0.0)
 
+  /**
+   * Array of threshold used to binarize continuous features.
+   * This is for multiple columns input. If transforming multiple columns and thresholds is
+   * not set, but threshold is set, then threshold will be applied across all columns.
+   *
+   * @group param
+   */
+  @Since("3.0.0")
+  override val thresholds: DoubleArrayParam = new DoubleArrayParam(this, "thresholds", "Array of " +
+    "threshold used to binarize continuous features. This is for multiple columns input. " +
+    "If transforming multiple columns and thresholds is not set, but threshold is set, " +
+    "then threshold will be applied across all columns.")
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setThresholds(value: Array[Double]): this.type = set(thresholds, value)
+
   /** @group setParam */
   @Since("1.4.0")
   def setInputCol(value: String): this.type = set(inputCol, value)
@@ -69,60 +89,133 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   @Since("1.4.0")
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setInputCols(value: Array[String]): this.type = set(inputCols, value)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
+
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
     val outputSchema = transformSchema(dataset.schema, logging = true)
-    val schema = dataset.schema
-    val inputType = schema($(inputCol)).dataType
-    val td = $(threshold)
-
-    val binarizerDouble = udf { in: Double => if (in > td) 1.0 else 0.0 }
-    val binarizerVector = udf { (data: Vector) =>
-      val indices = ArrayBuilder.make[Int]
-      val values = ArrayBuilder.make[Double]
-
-      data.foreachActive { (index, value) =>
-        if (value > td) {
-          indices += index
-          values +=  1.0
+
+    val (inputColNames, outputColNames, tds) =
+      if (isSet(inputCols)) {
+        if (isSet(thresholds)) {
+          ($(inputCols).toSeq, $(outputCols).toSeq, $(thresholds).toSeq)
+        } else {
+          ($(inputCols).toSeq, $(outputCols).toSeq, Seq.fill($(inputCols).length)($(threshold)))
         }
+      } else {
+        (Seq($(inputCol)), Seq($(outputCol)), Seq($(threshold)))
       }
 
-      Vectors.sparse(data.size, indices.result(), values.result()).compressed
-    }
-
-    val metadata = outputSchema($(outputCol)).metadata
+    val ouputCols = inputColNames.zip(tds).map { case (inputColName, td) =>
+      val binarizerUDF = dataset.schema(inputColName).dataType match {
+        case DoubleType =>
+          udf { in: Double => if (in > td) 1.0 else 0.0 }
+
+        case _: VectorUDT if td >= 0 =>
+          udf { vector: Vector =>
+            val indices = ArrayBuilder.make[Int]
+            val values = ArrayBuilder.make[Double]
+            vector.foreachNonZero { (index, value) =>
+              if (value > td) {
+                indices += index
+                values +=  1.0
+              }
+            }
+            Vectors.sparse(vector.size, indices.result(), values.result()).compressed
+          }
+
+        case _: VectorUDT if td < 0 =>
+          this.logWarning(s"Binarization operations on sparse dataset with negative threshold " +
+            s"$td will build a dense output, so take care when applying to sparse input.")
+          udf { vector: Vector =>
+            val values = Array.fill(vector.size)(1.0)
+            vector.foreachNonZero { (index, value) =>
+              if (value <= td) {
+                values(index) = 0.0
+              }
+            }
+            Vectors.dense(values).compressed
+          }
+      }
 
-    inputType match {
-      case DoubleType =>
-        dataset.select(col("*"), binarizerDouble(col($(inputCol))).as($(outputCol), metadata))
-      case _: VectorUDT =>
-        dataset.select(col("*"), binarizerVector(col($(inputCol))).as($(outputCol), metadata))
+      binarizerUDF(col(inputColName))
     }
+
+    val ouputMetadata = outputColNames.map(outputSchema(_).metadata)
+    dataset.withColumns(outputColNames, ouputCols, ouputMetadata)
   }
 
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
-    val inputType = schema($(inputCol)).dataType
-    val outputColName = $(outputCol)
-
-    val outCol: StructField = inputType match {
-      case DoubleType =>
-        BinaryAttribute.defaultAttr.withName(outputColName).toStructField()
-      case _: VectorUDT =>
-        StructField(outputColName, new VectorUDT)
-      case _ =>
-        throw new IllegalArgumentException(s"Data type $inputType is not supported.")
+    ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol),
+      Seq(outputCols))
+
+    if (isSet(inputCol)) {
+      require(!isSet(thresholds),
+        s"thresholds can't be set for single-column Binarizer.")
     }
 
-    if (schema.fieldNames.contains(outputColName)) {
-      throw new IllegalArgumentException(s"Output column $outputColName already exists.")
+    if (isSet(inputCols)) {
+      require(getInputCols.length == getOutputCols.length,
+        s"Binarizer $this has mismatched Params " +
+          s"for multi-column transform. Params (inputCols, outputCols) should have " +
+          s"equal lengths, but they have different lengths: " +
+          s"(${getInputCols.length}, ${getOutputCols.length}).")
+      if (isSet(thresholds)) {
+        require(getInputCols.length == getThresholds.length,
+          s"Binarizer $this has mismatched Params " +
+            s"for multi-column transform. Params (inputCols, outputCols, thresholds) " +
+            s"should have equal lengths, but they have different lengths: " +
+            s"(${getInputCols.length}, ${getOutputCols.length}, ${getThresholds.length}).")
+        require(!isSet(threshold),
+          s"exactly one of threshold, thresholds Params to be set, but both are set." )
+      }
     }
-    StructType(schema.fields :+ outCol)
+
+    val (inputColNames, outputColNames) = if (isSet(inputCols)) {
+      ($(inputCols).toSeq, $(outputCols).toSeq)
+    } else {
+      (Seq($(inputCol)), Seq($(outputCol)))
+    }
+
+    var outputFields = schema.fields
+    inputColNames.zip(outputColNames).foreach { case (inputColName, outputColName) =>
+      require(!schema.fieldNames.contains(outputColName),
+        s"Output column $outputColName already exists.")
+      val inputType = schema(inputColName).dataType
+      val outputField = inputType match {
+        case DoubleType =>
+          BinaryAttribute.defaultAttr.withName(outputColName).toStructField()
+        case _: VectorUDT =>
+          val size = AttributeGroup.fromStructField(schema(inputColName)).size
+          if (size < 0) {
+            StructField(outputColName, new VectorUDT)
+          } else {
+            new AttributeGroup(outputColName, numAttributes = size).toStructField()
+          }
+        case _ =>
+          throw new IllegalArgumentException(s"Data type $inputType is not supported.")
+      }
+      outputFields :+= outputField
+    }
+    StructType(outputFields)
   }
 
   @Since("1.4.1")
   override def copy(extra: ParamMap): Binarizer = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"Binarizer: uid=$uid" +
+      get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") +
+      get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("")
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
index c074830ec923f..4e266fbc1ec13 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
@@ -106,6 +106,11 @@ class BucketedRandomProjectionLSHModel private[ml](
   override def write: MLWriter = {
     new BucketedRandomProjectionLSHModel.BucketedRandomProjectionLSHModelWriter(this)
   }
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"BucketedRandomProjectionLSHModel: uid=$uid, numHashTables=${$(numHashTables)}"
+  }
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index 16073d5fc1b6b..9aeddae78ed76 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -194,7 +194,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
     if (isSet(inputCols)) {
       require(getInputCols.length == getOutputCols.length &&
         getInputCols.length == getSplitsArray.length, s"Bucketizer $this has mismatched Params " +
-        s"for multi-column transform.  Params (inputCols, outputCols, splitsArray) should have " +
+        s"for multi-column transform. Params (inputCols, outputCols, splitsArray) should have " +
         s"equal lengths, but they have different lengths: " +
         s"(${getInputCols.length}, ${getOutputCols.length}, ${getSplitsArray.length}).")
 
@@ -215,6 +215,13 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String
   override def copy(extra: ParamMap): Bucketizer = {
     defaultCopy[Bucketizer](extra).setParent(parent)
   }
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"Bucketizer: uid=$uid" +
+      get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") +
+      get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("")
+  }
 }
 
 @Since("1.6.0")
@@ -283,7 +290,7 @@ object Bucketizer extends DefaultParamsReadable[Bucketizer] {
         val insertPos = -idx - 1
         if (insertPos == 0 || insertPos == splits.length) {
           throw new SparkException(s"Feature value $feature out of Bucketizer bounds" +
-            s" [${splits.head}, ${splits.last}].  Check your features, or loosen " +
+            s" [${splits.head}, ${splits.last}]. Check your features, or loosen " +
             s"the lower/upper bound constraints.")
         } else {
           insertPos - 1
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
index 2a3656c49584e..76f4f944f11d5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
@@ -289,8 +289,7 @@ final class ChiSqSelectorModel private[ml] (
   override def transformSchema(schema: StructType): StructType = {
     SchemaUtils.checkColumnType(schema, $(featuresCol), new VectorUDT)
     val newField = prepOutputField(schema)
-    val outputFields = schema.fields :+ newField
-    StructType(outputFields)
+    SchemaUtils.appendColumn(schema, newField)
   }
 
   /**
@@ -316,6 +315,11 @@ final class ChiSqSelectorModel private[ml] (
 
   @Since("1.6.0")
   override def write: MLWriter = new ChiSqSelectorModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"ChiSqSelectorModel: uid=$uid, numSelectedFeatures=${selectedFeatures.length}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
index dc8eb8261dbe2..6d39f18df60ea 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
@@ -26,10 +26,10 @@ import org.apache.spark.ml.linalg.{Vectors, VectorUDT}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
 import org.apache.spark.ml.util._
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
+import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.collection.OpenHashMap
 
 /**
@@ -183,11 +183,18 @@ class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): CountVectorizerModel = {
     transformSchema(dataset.schema, logging = true)
+    if (($(minDF) >= 1.0 && $(maxDF) >= 1.0) || ($(minDF) < 1.0 && $(maxDF) < 1.0)) {
+      require($(maxDF) >= $(minDF), "maxDF must be >= minDF.")
+    }
+
     val vocSize = $(vocabSize)
-    val input = dataset.select($(inputCol)).rdd.map(_.getAs[Seq[String]](0))
+    val input = dataset.select($(inputCol)).rdd.map(_.getSeq[String](0))
     val countingRequired = $(minDF) < 1.0 || $(maxDF) < 1.0
     val maybeInputSize = if (countingRequired) {
-      Some(input.cache().count())
+      if (dataset.storageLevel == StorageLevel.NONE) {
+        input.persist(StorageLevel.MEMORY_AND_DISK)
+      }
+      Some(input.count)
     } else {
       None
     }
@@ -202,14 +209,14 @@ class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
       $(maxDF) * maybeInputSize.get
     }
     require(maxDf >= minDf, "maxDF must be >= minDF.")
-    val allWordCounts = input.flatMap { case (tokens) =>
+    val allWordCounts = input.flatMap { tokens =>
       val wc = new OpenHashMap[String, Long]
       tokens.foreach { w =>
         wc.changeValue(w, 1L, _ + 1L)
       }
       wc.map { case (word, count) => (word, (count, 1)) }
-    }.reduceByKey { case ((wc1, df1), (wc2, df2)) =>
-      (wc1 + wc2, df1 + df2)
+    }.reduceByKey { (wcdf1, wcdf2) =>
+      (wcdf1._1 + wcdf2._1, wcdf1._2 + wcdf2._2)
     }
 
     val filteringRequired = isSet(minDF) || isSet(maxDF)
@@ -221,11 +228,7 @@ class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
 
     val wordCounts = maybeFilteredWordCounts
       .map { case (word, (count, _)) => (word, count) }
-      .cache()
-
-    if (countingRequired) {
-      input.unpersist()
-    }
+      .persist(StorageLevel.MEMORY_AND_DISK)
 
     val fullVocabSize = wordCounts.count()
 
@@ -233,6 +236,11 @@ class CountVectorizer @Since("1.5.0") (@Since("1.5.0") override val uid: String)
       .top(math.min(fullVocabSize, vocSize).toInt)(Ordering.by(_._2))
       .map(_._1)
 
+    if (input.getStorageLevel != StorageLevel.NONE) {
+      input.unpersist()
+    }
+    wordCounts.unpersist()
+
     require(vocab.length > 0, "The vocabulary size should be > 0. Lower minDF as necessary.")
     copyValues(new CountVectorizerModel(uid, vocab).setParent(this))
   }
@@ -292,14 +300,14 @@ class CountVectorizerModel(
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
     if (broadcastDict.isEmpty) {
       val dict = vocabulary.zipWithIndex.toMap
       broadcastDict = Some(dataset.sparkSession.sparkContext.broadcast(dict))
     }
     val dictBr = broadcastDict.get
     val minTf = $(minTF)
-    val vectorizer = udf { (document: Seq[String]) =>
+    val vectorizer = udf { document: Seq[String] =>
       val termCounts = new OpenHashMap[Int, Double]
       var tokenCount = 0L
       document.foreach { term =>
@@ -318,14 +326,19 @@ class CountVectorizerModel(
 
       Vectors.sparse(dictBr.value.size, effectiveCounts)
     }
-    val attrs = vocabulary.map(_ => new NumericAttribute).asInstanceOf[Array[Attribute]]
-    val metadata = new AttributeGroup($(outputCol), attrs).toMetadata()
-    dataset.withColumn($(outputCol), vectorizer(col($(inputCol))), metadata)
+    dataset.withColumn($(outputCol), vectorizer(col($(inputCol))),
+      outputSchema($(outputCol)).metadata)
   }
 
   @Since("1.5.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(outputCol).nonEmpty) {
+      val attrs: Array[Attribute] = vocabulary.map(_ => new NumericAttribute)
+      val field = new AttributeGroup($(outputCol), attrs).toStructField()
+      outputSchema = SchemaUtils.updateField(outputSchema, field)
+    }
+    outputSchema
   }
 
   @Since("1.5.0")
@@ -336,6 +349,11 @@ class CountVectorizerModel(
 
   @Since("1.6.0")
   override def write: MLWriter = new CountVectorizerModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"CountVectorizerModel: uid=$uid, vocabularySize=${vocabulary.length}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
index 32d98151bdcff..d057e5a62e507 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
@@ -17,14 +17,15 @@
 
 package org.apache.spark.ml.feature
 
-import edu.emory.mathcs.jtransforms.dct._
+import org.jtransforms.dct._
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.BooleanParam
 import org.apache.spark.ml.util._
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types._
 
 /**
  * A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero
@@ -74,6 +75,23 @@ class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   }
 
   override protected def outputDataType: DataType = new VectorUDT
+
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(inputCol).nonEmpty && $(outputCol).nonEmpty) {
+      val size = AttributeGroup.fromStructField(schema($(inputCol))).size
+      if (size >= 0) {
+        outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+          $(outputCol), size)
+      }
+    }
+    outputSchema
+  }
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"DCT: uid=$uid, inverse=$inverse"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
index ece125ba8ac70..3b328f2fd8cee 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala
@@ -21,10 +21,10 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param.Param
-import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable}
+import org.apache.spark.ml.util._
 import org.apache.spark.mllib.feature.{ElementwiseProduct => OldElementwiseProduct}
 import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types._
 
 /**
  * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a
@@ -75,7 +75,27 @@ class ElementwiseProduct @Since("1.4.0") (@Since("1.4.0") override val uid: Stri
     }
   }
 
+  override protected def validateInputType(inputType: DataType): Unit = {
+    require(inputType.isInstanceOf[VectorUDT],
+      s"Input type must be ${(new VectorUDT).catalogString} but got ${inputType.catalogString}.")
+  }
+
   override protected def outputDataType: DataType = new VectorUDT()
+
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(outputCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(outputCol), $(scalingVec).size)
+    }
+    outputSchema
+  }
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"ElementwiseProduct: uid=$uid" +
+      get(scalingVec).map(v => s", vectorSize=${v.size}").getOrElse("")
+  }
 }
 
 @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
index 0a4f1b98ef67a..39862554c5d8d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
@@ -22,8 +22,8 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.Vectors
-import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators, StringArrayParam}
-import org.apache.spark.ml.param.shared.{HasInputCols, HasOutputCol}
+import org.apache.spark.ml.param.{ParamMap, StringArrayParam}
+import org.apache.spark.ml.param.shared.{HasInputCols, HasNumFeatures, HasOutputCol}
 import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
 import org.apache.spark.mllib.feature.{HashingTF => OldHashingTF}
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
@@ -83,7 +83,7 @@ import org.apache.spark.util.collection.OpenHashMap
  */
 @Since("2.3.0")
 class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transformer
-  with HasInputCols with HasOutputCol with DefaultParamsWritable {
+  with HasInputCols with HasOutputCol with HasNumFeatures with DefaultParamsWritable {
 
   @Since("2.3.0")
   def this() = this(Identifiable.randomUID("featureHasher"))
@@ -99,21 +99,6 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
   val categoricalCols = new StringArrayParam(this, "categoricalCols",
     "numeric columns to treat as categorical")
 
-  /**
-   * Number of features. Should be greater than 0.
-   * (default = 2^18^)
-   * @group param
-   */
-  @Since("2.3.0")
-  val numFeatures = new IntParam(this, "numFeatures", "number of features (> 0)",
-    ParamValidators.gt(0))
-
-  setDefault(numFeatures -> (1 << 18))
-
-  /** @group getParam */
-  @Since("2.3.0")
-  def getNumFeatures: Int = $(numFeatures)
-
   /** @group setParam */
   @Since("2.3.0")
   def setNumFeatures(value: Int): this.type = set(numFeatures, value)
@@ -214,6 +199,13 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
     val attrGroup = new AttributeGroup($(outputCol), $(numFeatures))
     SchemaUtils.appendColumn(schema, attrGroup.toStructField())
   }
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"FeatureHasher: uid=$uid, numFeatures=${$(numFeatures)}" +
+      get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") +
+      get(categoricalCols).map(c => s", numCategoricalCols=${c.length}").getOrElse("")
+  }
 }
 
 @Since("2.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index b8ce9c3169a68..80bf85936aace 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared.{HasInputCol, HasNumFeatures, HasOutputCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.mllib.feature.{HashingTF => OldHashingTF}
 import org.apache.spark.sql.{DataFrame, Dataset}
@@ -43,7 +43,8 @@ import org.apache.spark.util.VersionUtils.majorMinorVersion
  */
 @Since("1.2.0")
 class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
-  extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
+  extends Transformer with HasInputCol with HasOutputCol with HasNumFeatures
+    with DefaultParamsWritable {
 
   private var hashFunc: Any => Int = FeatureHasher.murmur3Hash
 
@@ -58,15 +59,6 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   @Since("1.4.0")
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
-  /**
-   * Number of features. Should be greater than 0.
-   * (default = 2^18^)
-   * @group param
-   */
-  @Since("1.2.0")
-  val numFeatures = new IntParam(this, "numFeatures", "number of features (> 0)",
-    ParamValidators.gt(0))
-
   /**
    * Binary toggle to control term frequency counts.
    * If true, all non-zero counts are set to 1.  This is useful for discrete probabilistic
@@ -79,11 +71,7 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
     "This is useful for discrete probabilistic models that model binary events rather " +
     "than integer counts")
 
-  setDefault(numFeatures -> (1 << 18), binary -> false)
-
-  /** @group getParam */
-  @Since("1.2.0")
-  def getNumFeatures: Int = $(numFeatures)
+  setDefault(binary -> false)
 
   /** @group setParam */
   @Since("1.2.0")
@@ -139,6 +127,11 @@ class HashingTF @Since("1.4.0") (@Since("1.4.0") override val uid: String)
 
   @Since("1.4.1")
   override def copy(extra: ParamMap): HashingTF = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"HashingTF: uid=$uid, binary=${$(binary)}, numFeatures=${$(numFeatures)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 4338421bf8bcf..e6f124ef7d666 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -131,7 +131,7 @@ class IDFModel private[ml] (
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val func = { vector: Vector =>
       vector match {
@@ -149,12 +149,18 @@ class IDFModel private[ml] (
     }
 
     val transformer = udf(func)
-    dataset.withColumn($(outputCol), transformer(col($(inputCol))))
+    dataset.withColumn($(outputCol), transformer(col($(inputCol))),
+      outputSchema($(outputCol)).metadata)
   }
 
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(outputCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(outputCol), idf.size)
+    }
+    outputSchema
   }
 
   @Since("1.4.1")
@@ -175,9 +181,13 @@ class IDFModel private[ml] (
   @Since("3.0.0")
   def numDocs: Long = idfModel.numDocs
 
-
   @Since("1.6.0")
   override def write: MLWriter = new IDFModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"IDFModel: uid=$uid, numDocs=$numDocs, numFeatures=${idf.size}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
index 99c0a0df53672..64f1722f5fcb8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCols, HasOutputCols}
+import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
@@ -32,7 +32,8 @@ import org.apache.spark.sql.types._
 /**
  * Params for [[Imputer]] and [[ImputerModel]].
  */
-private[feature] trait ImputerParams extends Params with HasInputCols with HasOutputCols {
+private[feature] trait ImputerParams extends Params with HasInputCol with HasInputCols
+  with HasOutputCol with HasOutputCols with HasRelativeError {
 
   /**
    * The imputation strategy. Currently only "mean" and "median" are supported.
@@ -63,15 +64,26 @@ private[feature] trait ImputerParams extends Params with HasInputCols with HasOu
   /** @group getParam */
   def getMissingValue: Double = $(missingValue)
 
+  /** Returns the input and output column names corresponding in pair. */
+  private[feature] def getInOutCols(): (Array[String], Array[String]) = {
+    if (isSet(inputCol)) {
+      (Array($(inputCol)), Array($(outputCol)))
+    } else {
+      ($(inputCols), $(outputCols))
+    }
+  }
+
   /** Validates and transforms the input schema. */
   protected def validateAndTransformSchema(schema: StructType): StructType = {
-    require($(inputCols).length == $(inputCols).distinct.length, s"inputCols contains" +
-      s" duplicates: (${$(inputCols).mkString(", ")})")
-    require($(outputCols).length == $(outputCols).distinct.length, s"outputCols contains" +
-      s" duplicates: (${$(outputCols).mkString(", ")})")
-    require($(inputCols).length == $(outputCols).length, s"inputCols(${$(inputCols).length})" +
-      s" and outputCols(${$(outputCols).length}) should have the same length")
-    val outputFields = $(inputCols).zip($(outputCols)).map { case (inputCol, outputCol) =>
+    ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol), Seq(outputCols))
+    val (inputColNames, outputColNames) = getInOutCols()
+    require(inputColNames.length == inputColNames.distinct.length, s"inputCols contains" +
+      s" duplicates: (${inputColNames.mkString(", ")})")
+    require(outputColNames.length == outputColNames.distinct.length, s"outputCols contains" +
+      s" duplicates: (${outputColNames.mkString(", ")})")
+    require(inputColNames.length == outputColNames.length, s"inputCols(${inputColNames.length})" +
+      s" and outputCols(${outputColNames.length}) should have the same length")
+    val outputFields = inputColNames.zip(outputColNames).map { case (inputCol, outputCol) =>
       val inputField = schema(inputCol)
       SchemaUtils.checkNumericType(schema, inputCol)
       StructField(outputCol, inputField.dataType, inputField.nullable)
@@ -101,6 +113,14 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
   @Since("2.2.0")
   def this() = this(Identifiable.randomUID("imputer"))
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
   /** @group setParam */
   @Since("2.2.0")
   def setInputCols(value: Array[String]): this.type = set(inputCols, value)
@@ -120,13 +140,19 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
   @Since("2.2.0")
   def setMissingValue(value: Double): this.type = set(missingValue, value)
 
+  /** @group expertSetParam */
+  @Since("3.0.0")
+  def setRelativeError(value: Double): this.type = set(relativeError, value)
+
   setDefault(strategy -> Imputer.mean, missingValue -> Double.NaN)
 
   override def fit(dataset: Dataset[_]): ImputerModel = {
     transformSchema(dataset.schema, logging = true)
     val spark = dataset.sparkSession
 
-    val cols = $(inputCols).map { inputCol =>
+    val (inputColumns, _) = getInOutCols()
+
+    val cols = inputColumns.map { inputCol =>
       when(col(inputCol).equalTo($(missingValue)), null)
         .when(col(inputCol).isNaN, null)
         .otherwise(col(inputCol))
@@ -139,7 +165,7 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
         // Function avg will ignore null automatically.
         // For a column only containing null, avg will return null.
         val row = dataset.select(cols.map(avg): _*).head()
-        Array.range(0, $(inputCols).length).map { i =>
+        Array.range(0, inputColumns.length).map { i =>
           if (row.isNullAt(i)) {
             Double.NaN
           } else {
@@ -150,7 +176,7 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
       case Imputer.median =>
         // Function approxQuantile will ignore null automatically.
         // For a column only containing null, approxQuantile will return an empty array.
-        dataset.select(cols: _*).stat.approxQuantile($(inputCols), Array(0.5), 0.001)
+        dataset.select(cols: _*).stat.approxQuantile(inputColumns, Array(0.5), $(relativeError))
           .map { array =>
             if (array.isEmpty) {
               Double.NaN
@@ -160,7 +186,7 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
           }
     }
 
-    val emptyCols = $(inputCols).zip(results).filter(_._2.isNaN).map(_._1)
+    val emptyCols = inputColumns.zip(results).filter(_._2.isNaN).map(_._1)
     if (emptyCols.nonEmpty) {
       throw new SparkException(s"surrogate cannot be computed. " +
         s"All the values in ${emptyCols.mkString(",")} are Null, Nan or " +
@@ -168,7 +194,7 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
     }
 
     val rows = spark.sparkContext.parallelize(Seq(Row.fromSeq(results)))
-    val schema = StructType($(inputCols).map(col => StructField(col, DoubleType, nullable = false)))
+    val schema = StructType(inputColumns.map(col => StructField(col, DoubleType, nullable = false)))
     val surrogateDF = spark.createDataFrame(rows, schema)
     copyValues(new ImputerModel(uid, surrogateDF).setParent(this))
   }
@@ -205,6 +231,14 @@ class ImputerModel private[ml] (
 
   import ImputerModel._
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
   /** @group setParam */
   def setInputCols(value: Array[String]): this.type = set(inputCols, value)
 
@@ -213,9 +247,11 @@ class ImputerModel private[ml] (
 
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
-    val surrogates = surrogateDF.select($(inputCols).map(col): _*).head().toSeq
+    val (inputColumns, outputColumns) = getInOutCols
+    val surrogates = surrogateDF.select(inputColumns.map(col): _*).head().toSeq
+
 
-    val newCols = $(inputCols).zip($(outputCols)).zip(surrogates).map {
+    val newCols = inputColumns.zip(outputColumns).zip(surrogates).map {
       case ((inputCol, outputCol), surrogate) =>
         val inputType = dataset.schema(inputCol).dataType
         val ic = col(inputCol).cast(DoubleType)
@@ -224,7 +260,7 @@ class ImputerModel private[ml] (
           .otherwise(ic)
           .cast(inputType)
     }
-    dataset.withColumns($(outputCols), newCols).toDF()
+    dataset.withColumns(outputColumns, newCols).toDF()
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -238,6 +274,13 @@ class ImputerModel private[ml] (
 
   @Since("2.2.0")
   override def write: MLWriter = new ImputerModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"ImputerModel: uid=$uid, strategy=${$(strategy)}, missingValue=${$(missingValue)}" +
+      get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") +
+      get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("")
+  }
 }
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
index dd56fbbfa2b63..11d0c4689cbba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
@@ -26,7 +26,7 @@ import org.apache.spark.ml.linalg.Vector
  * @param weight The weight of this instance.
  * @param features The vector of features for this data point.
  */
-private[ml] case class Instance(label: Double, weight: Double, features: Vector)
+private[spark] case class Instance(label: Double, weight: Double, features: Vector)
 
 /**
  * Case class that represents an instance of data point with
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
index 611f1b691b782..9a4f1d97c907a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
@@ -218,6 +218,11 @@ class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) ext
   @Since("1.6.0")
   override def copy(extra: ParamMap): Interaction = defaultCopy(extra)
 
+  @Since("3.0.0")
+  override def toString: String = {
+    s"Interaction: uid=$uid" +
+      get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("")
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
index b20852383a6ff..01741019fb546 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
@@ -25,6 +25,7 @@ import org.apache.spark.ml.param.{IntParam, ParamValidators}
 import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.util.QuantileSummaries
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 
@@ -136,15 +137,38 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
       // Limit the use of hashDist since it's controversial
       val hashDistUDF = udf((x: Seq[Vector]) => hashDistance(x, keyHash), DataTypes.DoubleType)
       val hashDistCol = hashDistUDF(col($(outputCol)))
-
-      // Compute threshold to get exact k elements.
-      // TODO: SPARK-18409: Use approxQuantile to get the threshold
-      val modelDatasetSortedByHash = modelDataset.sort(hashDistCol).limit(numNearestNeighbors)
-      val thresholdDataset = modelDatasetSortedByHash.select(max(hashDistCol))
-      val hashThreshold = thresholdDataset.take(1).head.getDouble(0)
-
-      // Filter the dataset where the hash value is less than the threshold.
-      modelDataset.filter(hashDistCol <= hashThreshold)
+      val modelDatasetWithDist = modelDataset.withColumn(distCol, hashDistCol)
+
+      val relativeError = 0.05
+      val (summary, count) = modelDatasetWithDist.select(distCol)
+        .rdd
+        .mapPartitions { iter =>
+          if (iter.hasNext) {
+            var s = new QuantileSummaries(
+              QuantileSummaries.defaultCompressThreshold, relativeError)
+            var c = 0L
+            while (iter.hasNext) {
+              val Row(dist: Double) = iter.next
+              s = s.insert(dist)
+              c += 1
+            }
+            Iterator.single((s.compress, c))
+          } else Iterator.empty
+        }.treeReduce { case ((s1, c1), (s2, c2)) => (s1.merge(s2), c1 + c2) }
+
+      // Compute threshold to get around k elements.
+      // To guarantee to have enough neighbors in one pass, we need (p - err) * N >= M
+      // so we pick quantile p = M / N + err
+      // M: the number of nearest neighbors; N: the number of elements in dataset
+      val approxQuantile = numNearestNeighbors.toDouble / count + relativeError
+
+      if (approxQuantile >= 1) {
+        modelDatasetWithDist
+      } else {
+        val hashThreshold = summary.query(approxQuantile).get
+        // Filter the dataset where the hash value is less than the threshold.
+        modelDatasetWithDist.filter(hashDistCol <= hashThreshold)
+      }
     }
 
     // Get the top k nearest neighbor by their distance to the key
@@ -169,11 +193,11 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
    *         to show the distance between each row and the key.
    */
   def approxNearestNeighbors(
-    dataset: Dataset[_],
-    key: Vector,
-    numNearestNeighbors: Int,
-    distCol: String): Dataset[_] = {
-    approxNearestNeighbors(dataset, key, numNearestNeighbors, true, distCol)
+      dataset: Dataset[_],
+      key: Vector,
+      numNearestNeighbors: Int,
+      distCol: String): Dataset[_] = {
+      approxNearestNeighbors(dataset, key, numNearestNeighbors, true, distCol)
   }
 
   /**
@@ -325,7 +349,7 @@ private[ml] abstract class LSH[T <: LSHModel[T]]
 
   override def fit(dataset: Dataset[_]): T = {
     transformSchema(dataset.schema, logging = true)
-    val inputDim = dataset.select(col($(inputCol))).head().get(0).asInstanceOf[Vector].size
+    val inputDim = MetadataUtils.getNumFeatures(dataset, $(inputCol))
     val model = createRawLSHModel(inputDim).setParent(this)
     copyValues(model)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
index 0f51e23323177..2d48a5f9f4915 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
@@ -24,9 +24,8 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.{ParamMap, Params}
 import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.stat.Summarizer
 import org.apache.spark.ml.util._
-import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
-import org.apache.spark.mllib.stat.Statistics
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{StructField, StructType}
@@ -69,14 +68,13 @@ class MaxAbsScaler @Since("2.0.0") (@Since("2.0.0") override val uid: String)
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): MaxAbsScalerModel = {
     transformSchema(dataset.schema, logging = true)
-    val input = dataset.select($(inputCol)).rdd.map {
-      case Row(v: Vector) => OldVectors.fromML(v)
-    }
-    val summary = Statistics.colStats(input)
-    val minVals = summary.min.toArray
-    val maxVals = summary.max.toArray
-    val n = minVals.length
-    val maxAbs = Array.tabulate(n) { i => math.max(math.abs(minVals(i)), math.abs(maxVals(i))) }
+
+    val Row(max: Vector, min: Vector) = dataset
+      .select(Summarizer.metrics("max", "min").summary(col($(inputCol))).as("summary"))
+      .select("summary.max", "summary.min")
+      .first()
+
+    val maxAbs = Array.tabulate(max.size) { i => math.max(math.abs(min(i)), math.abs(max(i))) }
 
     copyValues(new MaxAbsScalerModel(uid, Vectors.dense(maxAbs).compressed).setParent(this))
   }
@@ -119,19 +117,25 @@ class MaxAbsScalerModel private[ml] (
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val scale = maxAbs.toArray.map { v => if (v == 0) 1.0 else 1 / v }
     val func = StandardScalerModel.getTransformFunc(
       Array.empty, scale, false, true)
     val transformer = udf(func)
 
-    dataset.withColumn($(outputCol), transformer(col($(inputCol))))
+    dataset.withColumn($(outputCol), transformer(col($(inputCol))),
+      outputSchema($(outputCol)).metadata)
   }
 
   @Since("2.0.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(outputCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(outputCol), maxAbs.size)
+    }
+    outputSchema
   }
 
   @Since("2.0.0")
@@ -142,6 +146,11 @@ class MaxAbsScalerModel private[ml] (
 
   @Since("1.6.0")
   override def write: MLWriter = new MaxAbsScalerModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"MaxAbsScalerModel: uid=$uid, numFeatures=${maxAbs.size}"
+  }
 }
 
 @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
index da0eaad667ccb..ac3d79d07755f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
@@ -58,11 +58,10 @@ class MinHashLSHModel private[ml](
 
   @Since("2.1.0")
   override protected[ml] def hashFunction(elems: Vector): Array[Vector] = {
-    require(elems.numNonzeros > 0, "Must have at least 1 non zero entry.")
-    val elemsList = elems.toSparse.indices.toList
+    require(elems.nonZeroIterator.nonEmpty, "Must have at least 1 non zero entry.")
     val hashValues = randCoefficients.map { case (a, b) =>
-      elemsList.map { elem: Int =>
-        ((1L + elem) * a + b) % MinHashLSH.HASH_PRIME
+      elems.nonZeroIterator.map { case (i, _) =>
+        ((1L + i) * a + b) % MinHashLSH.HASH_PRIME
       }.min.toDouble
     }
     // TODO: Output vectors of dimension numHashFunctions in SPARK-18450
@@ -71,8 +70,8 @@ class MinHashLSHModel private[ml](
 
   @Since("2.1.0")
   override protected[ml] def keyDistance(x: Vector, y: Vector): Double = {
-    val xSet = x.toSparse.indices.toSet
-    val ySet = y.toSparse.indices.toSet
+    val xSet = x.nonZeroIterator.map(_._1).toSet
+    val ySet = y.nonZeroIterator.map(_._1).toSet
     val intersectionSize = xSet.intersect(ySet).size.toDouble
     val unionSize = xSet.size + ySet.size - intersectionSize
     assert(unionSize > 0, "The union of two input sets must have at least 1 elements")
@@ -96,6 +95,11 @@ class MinHashLSHModel private[ml](
 
   @Since("2.1.0")
   override def write: MLWriter = new MinHashLSHModel.MinHashLSHModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"MinHashLSHModel: uid=$uid, numHashTables=${$(numHashTables)}"
+  }
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index f4029af5d4b35..c84892c974b90 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -24,12 +24,9 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.{DoubleParam, ParamMap, Params}
 import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.stat.Summarizer
 import org.apache.spark.ml.util._
-import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
-import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.mllib.stat.Statistics
 import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{StructField, StructType}
@@ -117,12 +114,13 @@ class MinMaxScaler @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): MinMaxScalerModel = {
     transformSchema(dataset.schema, logging = true)
-    val input: RDD[OldVector] = dataset.select($(inputCol)).rdd.map {
-      case Row(v: Vector) => OldVectors.fromML(v)
-    }
-    val summary = Statistics.colStats(input)
-    copyValues(new MinMaxScalerModel(uid, summary.min.compressed,
-      summary.max.compressed).setParent(this))
+
+    val Row(max: Vector, min: Vector) = dataset
+      .select(Summarizer.metrics("max", "min").summary(col($(inputCol))).as("summary"))
+      .select("summary.max", "summary.min")
+      .first()
+
+    copyValues(new MinMaxScalerModel(uid, min.compressed, max.compressed).setParent(this))
   }
 
   @Since("1.5.0")
@@ -176,7 +174,7 @@ class MinMaxScalerModel private[ml] (
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val numFeatures = originalMax.size
     val scale = $(max) - $(min)
@@ -212,12 +210,18 @@ class MinMaxScalerModel private[ml] (
       Vectors.dense(values).compressed
     }
 
-    dataset.withColumn($(outputCol), transformer(col($(inputCol))))
+    dataset.withColumn($(outputCol), transformer(col($(inputCol))),
+      outputSchema($(outputCol)).metadata)
   }
 
   @Since("1.5.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(outputCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(outputCol), originalMin.size)
+    }
+    outputSchema
   }
 
   @Since("1.5.0")
@@ -228,6 +232,12 @@ class MinMaxScalerModel private[ml] (
 
   @Since("1.6.0")
   override def write: MLWriter = new MinMaxScalerModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"MinMaxScalerModel: uid=$uid, numFeatures=${originalMin.size}, min=${$(min)}, " +
+      s"max=${$(max)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
index e0772d5af20a9..fd6fde0744d02 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala
@@ -70,6 +70,11 @@ class NGram @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   }
 
   override protected def outputDataType: DataType = new ArrayType(StringType, false)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"NGram: uid=$uid, n=${$(n)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
index 6e96545c8cb7a..4c7583b8381dc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
@@ -19,12 +19,13 @@ package org.apache.spark.ml.feature
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
+import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param.{DoubleParam, ParamValidators}
 import org.apache.spark.ml.util._
 import org.apache.spark.mllib.feature
 import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types._
 
 /**
  * Normalize a vector to have unit norm using the given p-norm.
@@ -59,7 +60,30 @@ class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
     vector => normalizer.transform(OldVectors.fromML(vector)).asML
   }
 
+  override protected def validateInputType(inputType: DataType): Unit = {
+    require(inputType.isInstanceOf[VectorUDT],
+      s"Input type must be ${(new VectorUDT).catalogString} but got ${inputType.catalogString}.")
+  }
+
   override protected def outputDataType: DataType = new VectorUDT()
+
+  @Since("1.4.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(inputCol).nonEmpty && $(outputCol).nonEmpty) {
+      val size = AttributeGroup.fromStructField(schema($(inputCol))).size
+      if (size >= 0) {
+        outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+          $(outputCol), size)
+      }
+    }
+    outputSchema
+  }
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"Normalizer: uid=$uid, p=${$(p)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index ec9792cbbda8f..0ef092f6be463 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -25,7 +25,7 @@ import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCols, HasOutputCols}
+import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol, HasInputCols, HasOutputCol, HasOutputCols}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.expressions.UserDefinedFunction
@@ -34,7 +34,7 @@ import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
 
 /** Private trait for params and common methods for OneHotEncoder and OneHotEncoderModel */
 private[ml] trait OneHotEncoderBase extends Params with HasHandleInvalid
-    with HasInputCols with HasOutputCols {
+  with HasInputCol with HasInputCols with HasOutputCol with HasOutputCols {
 
   /**
    * Param for how to handle invalid data during transform().
@@ -68,12 +68,21 @@ private[ml] trait OneHotEncoderBase extends Params with HasHandleInvalid
   @Since("2.3.0")
   def getDropLast: Boolean = $(dropLast)
 
+  /** Returns the input and output column names corresponding in pair. */
+  private[feature] def getInOutCols(): (Array[String], Array[String]) = {
+    if (isSet(inputCol)) {
+      (Array($(inputCol)), Array($(outputCol)))
+    } else {
+      ($(inputCols), $(outputCols))
+    }
+  }
+
   protected def validateAndTransformSchema(
       schema: StructType,
       dropLast: Boolean,
       keepInvalid: Boolean): StructType = {
-    val inputColNames = $(inputCols)
-    val outputColNames = $(outputCols)
+    ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol), Seq(outputCols))
+    val (inputColNames, outputColNames) = getInOutCols()
 
     require(inputColNames.length == outputColNames.length,
       s"The number of input columns ${inputColNames.length} must be the same as the number of " +
@@ -83,7 +92,7 @@ private[ml] trait OneHotEncoderBase extends Params with HasHandleInvalid
     inputColNames.foreach(SchemaUtils.checkNumericType(schema, _))
 
     // Prepares output columns with proper attributes by examining input columns.
-    val inputFields = $(inputCols).map(schema(_))
+    val inputFields = inputColNames.map(schema(_))
 
     val outputFields = inputFields.zip(outputColNames).map { case (inputField, outputColName) =>
       OneHotEncoderCommon.transformOutputColumnSchema(
@@ -123,6 +132,14 @@ class OneHotEncoder @Since("3.0.0") (@Since("3.0.0") override val uid: String)
   @Since("3.0.0")
   def this() = this(Identifiable.randomUID("oneHotEncoder"))
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
   /** @group setParam */
   @Since("3.0.0")
   def setInputCols(values: Array[String]): this.type = set(inputCols, values)
@@ -150,13 +167,14 @@ class OneHotEncoder @Since("3.0.0") (@Since("3.0.0") override val uid: String)
   override def fit(dataset: Dataset[_]): OneHotEncoderModel = {
     transformSchema(dataset.schema)
 
+    val (inputColumns, outputColumns) = getInOutCols()
     // Compute the plain number of categories without `handleInvalid` and
     // `dropLast` taken into account.
     val transformedSchema = validateAndTransformSchema(dataset.schema, dropLast = false,
       keepInvalid = false)
-    val categorySizes = new Array[Int]($(outputCols).length)
+    val categorySizes = new Array[Int](outputColumns.length)
 
-    val columnToScanIndices = $(outputCols).zipWithIndex.flatMap { case (outputColName, idx) =>
+    val columnToScanIndices = outputColumns.zipWithIndex.flatMap { case (outputColName, idx) =>
       val numOfAttrs = AttributeGroup.fromStructField(
         transformedSchema(outputColName)).size
       if (numOfAttrs < 0) {
@@ -170,8 +188,8 @@ class OneHotEncoder @Since("3.0.0") (@Since("3.0.0") override val uid: String)
     // Some input columns don't have attributes or their attributes don't have necessary info.
     // We need to scan the data to get the number of values for each column.
     if (columnToScanIndices.length > 0) {
-      val inputColNames = columnToScanIndices.map($(inputCols)(_))
-      val outputColNames = columnToScanIndices.map($(outputCols)(_))
+      val inputColNames = columnToScanIndices.map(inputColumns(_))
+      val outputColNames = columnToScanIndices.map(outputColumns(_))
 
       // When fitting data, we want the plain number of categories without `handleInvalid` and
       // `dropLast` taken into account.
@@ -264,11 +282,19 @@ class OneHotEncoderModel private[ml] (
       if (idx < size) {
         Vectors.sparse(size, Array(idx.toInt), Array(1.0))
       } else {
-        Vectors.sparse(size, Array.empty[Int], Array.empty[Double])
+        Vectors.sparse(size, Array.emptyIntArray, Array.emptyDoubleArray)
       }
     }
   }
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
   /** @group setParam */
   @Since("3.0.0")
   def setInputCols(values: Array[String]): this.type = set(inputCols, values)
@@ -287,7 +313,7 @@ class OneHotEncoderModel private[ml] (
 
   @Since("3.0.0")
   override def transformSchema(schema: StructType): StructType = {
-    val inputColNames = $(inputCols)
+    val (inputColNames, _) = getInOutCols()
 
     require(inputColNames.length == categorySizes.length,
       s"The number of input columns ${inputColNames.length} must be the same as the number of " +
@@ -306,8 +332,9 @@ class OneHotEncoderModel private[ml] (
    */
   private def verifyNumOfValues(schema: StructType): StructType = {
     val configedSizes = getConfigedCategorySizes
-    $(outputCols).zipWithIndex.foreach { case (outputColName, idx) =>
-      val inputColName = $(inputCols)(idx)
+    val (inputColNames, outputColNames) = getInOutCols()
+    outputColNames.zipWithIndex.foreach { case (outputColName, idx) =>
+      val inputColName = inputColNames(idx)
       val attrGroup = AttributeGroup.fromStructField(schema(outputColName))
 
       // If the input metadata specifies number of category for output column,
@@ -327,10 +354,11 @@ class OneHotEncoderModel private[ml] (
   override def transform(dataset: Dataset[_]): DataFrame = {
     val transformedSchema = transformSchema(dataset.schema, logging = true)
     val keepInvalid = $(handleInvalid) == OneHotEncoder.KEEP_INVALID
+    val (inputColNames, outputColNames) = getInOutCols()
 
-    val encodedColumns = $(inputCols).indices.map { idx =>
-      val inputColName = $(inputCols)(idx)
-      val outputColName = $(outputCols)(idx)
+    val encodedColumns = inputColNames.indices.map { idx =>
+      val inputColName = inputColNames(idx)
+      val outputColName = outputColNames(idx)
 
       val outputAttrGroupFromSchema =
         AttributeGroup.fromStructField(transformedSchema(outputColName))
@@ -345,7 +373,7 @@ class OneHotEncoderModel private[ml] (
       encoder(col(inputColName).cast(DoubleType), lit(idx))
         .as(outputColName, metadata)
     }
-    dataset.withColumns($(outputCols), encodedColumns)
+    dataset.withColumns(outputColNames, encodedColumns)
   }
 
   @Since("3.0.0")
@@ -356,6 +384,13 @@ class OneHotEncoderModel private[ml] (
 
   @Since("3.0.0")
   override def write: MLWriter = new OneHotEncoderModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"OneHotEncoderModel: uid=$uid, dropLast=${$(dropLast)}, handleInvalid=${$(handleInvalid)}" +
+      get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") +
+      get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("")
+  }
 }
 
 @Since("3.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index aa5a171d4fec5..16373a4c4af13 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -29,7 +29,7 @@ import org.apache.spark.mllib.feature
 import org.apache.spark.mllib.linalg.{DenseMatrix => OldDenseMatrix, Vectors => OldVectors}
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.VersionUtils.majorVersion
 
 /**
@@ -52,10 +52,8 @@ private[feature] trait PCAParams extends Params with HasInputCol with HasOutputC
     SchemaUtils.checkColumnType(schema, $(inputCol), new VectorUDT)
     require(!schema.fieldNames.contains($(outputCol)),
       s"Output column ${$(outputCol)} already exists.")
-    val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false)
-    StructType(outputFields)
+    SchemaUtils.updateAttributeGroupSize(schema, $(outputCol), $(k))
   }
-
 }
 
 /**
@@ -145,30 +143,22 @@ class PCAModel private[ml] (
    */
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
-
-    val func = { vector: Vector =>
-      vector match {
-        case dv: DenseVector =>
-          pc.transpose.multiply(dv)
-        case SparseVector(size, indices, values) =>
-          /* SparseVector -> single row SparseMatrix */
-          val sm = Matrices.sparse(size, 1, Array(0, indices.length), indices, values).transpose
-          val projection = sm.multiply(pc)
-          Vectors.dense(projection.values)
-        case _ =>
-          throw new IllegalArgumentException("Unsupported vector format. Expected " +
-            s"SparseVector or DenseVector. Instead got: ${vector.getClass}")
-      }
-    }
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
-    val transformer = udf(func)
-    dataset.withColumn($(outputCol), transformer(col($(inputCol))))
+    val transposed = pc.transpose
+    val transformer = udf { vector: Vector => transposed.multiply(vector) }
+    dataset.withColumn($(outputCol), transformer(col($(inputCol))),
+      outputSchema($(outputCol)).metadata)
   }
 
   @Since("1.5.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(outputCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(outputCol), $(k))
+    }
+    outputSchema
   }
 
   @Since("1.5.0")
@@ -179,6 +169,11 @@ class PCAModel private[ml] (
 
   @Since("1.6.0")
   override def write: MLWriter = new PCAModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"PCAModel: uid=$uid, k=${$(k)}"
+  }
 }
 
 @Since("1.6.0")
@@ -223,8 +218,7 @@ object PCAModel extends MLReadable[PCAModel] {
         // pc field is the old matrix format in Spark <= 1.6
         // explainedVariance field is not present in Spark <= 1.6
         val Row(pc: OldDenseMatrix) = sparkSession.read.parquet(dataPath).select("pc").head()
-        new PCAModel(metadata.uid, pc.asML,
-          Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector])
+        new PCAModel(metadata.uid, pc.asML, new DenseVector(Array.emptyDoubleArray))
       }
       metadata.getAndSetParams(model)
       model
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 292f9496a456c..592ca001a2467 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -68,10 +68,20 @@ class PolynomialExpansion @Since("1.4.0") (@Since("1.4.0") override val uid: Str
     PolynomialExpansion.expand(v, $(degree))
   }
 
+  override protected def validateInputType(inputType: DataType): Unit = {
+    require(inputType.isInstanceOf[VectorUDT],
+      s"Input type must be ${(new VectorUDT).catalogString} but got ${inputType.catalogString}.")
+  }
+
   override protected def outputDataType: DataType = new VectorUDT()
 
   @Since("1.4.1")
   override def copy(extra: ParamMap): PolynomialExpansion = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"PolynomialExpansion: uid=$uid, degree=${$(degree)}"
+  }
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index 5bfaa3b7f3f52..216d99d01f2f7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -22,7 +22,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol, HasInputCols, HasOutputCol, HasOutputCols}
+import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.types.StructType
@@ -31,7 +31,8 @@ import org.apache.spark.sql.types.StructType
  * Params for [[QuantileDiscretizer]].
  */
 private[feature] trait QuantileDiscretizerBase extends Params
-  with HasHandleInvalid with HasInputCol with HasOutputCol {
+  with HasHandleInvalid with HasInputCol with HasOutputCol with HasInputCols with HasOutputCols
+  with HasRelativeError {
 
   /**
    * Number of buckets (quantiles, or categories) into which data points are grouped. Must
@@ -67,22 +68,6 @@ private[feature] trait QuantileDiscretizerBase extends Params
   /** @group getParam */
   def getNumBucketsArray: Array[Int] = $(numBucketsArray)
 
-  /**
-   * Relative error (see documentation for
-   * `org.apache.spark.sql.DataFrameStatFunctions.approxQuantile` for description)
-   * Must be in the range [0, 1].
-   * Note that in multiple columns case, relative error is applied to all columns.
-   * default: 0.001
-   * @group param
-   */
-  val relativeError = new DoubleParam(this, "relativeError", "The relative target precision " +
-    "for the approximate quantile algorithm used to generate buckets. " +
-    "Must be in the range [0, 1].", ParamValidators.inRange(0.0, 1.0))
-  setDefault(relativeError -> 0.001)
-
-  /** @group getParam */
-  def getRelativeError: Double = getOrDefault(relativeError)
-
   /**
    * Param for how to handle invalid entries. Options are 'skip' (filter out rows with
    * invalid values), 'error' (throw an error), or 'keep' (keep invalid values in a special
@@ -98,7 +83,6 @@ private[feature] trait QuantileDiscretizerBase extends Params
     "error (throw an error), or keep (keep invalid values in a special additional bucket).",
     ParamValidators.inArray(Bucketizer.supportedHandleInvalids))
   setDefault(handleInvalid, Bucketizer.ERROR_INVALID)
-
 }
 
 /**
@@ -110,7 +94,8 @@ private[feature] trait QuantileDiscretizerBase extends Params
  * parameter. If both of the `inputCol` and `inputCols` parameters are set, an Exception will be
  * thrown. To specify the number of buckets for each column, the `numBucketsArray` parameter can
  * be set, or if the number of buckets should be the same across columns, `numBuckets` can be
- * set as a convenience.
+ * set as a convenience. Note that in multiple columns case, relative error is applied to all
+ * columns.
  *
  * NaN handling:
  * null and NaN values will be ignored from the column during `QuantileDiscretizer` fitting. This
@@ -129,13 +114,12 @@ private[feature] trait QuantileDiscretizerBase extends Params
  */
 @Since("1.6.0")
 final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val uid: String)
-  extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable
-    with HasInputCols with HasOutputCols {
+  extends Estimator[Bucketizer] with QuantileDiscretizerBase with DefaultParamsWritable {
 
   @Since("1.6.0")
   def this() = this(Identifiable.randomUID("quantileDiscretizer"))
 
-  /** @group setParam */
+  /** @group expertSetParam */
   @Since("2.0.0")
   def setRelativeError(value: Double): this.type = set(relativeError, value)
 
@@ -167,31 +151,44 @@ final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val ui
   @Since("2.3.0")
   def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
 
-  private[feature] def getInOutCols: (Array[String], Array[String]) = {
-    require((isSet(inputCol) && isSet(outputCol) && !isSet(inputCols) && !isSet(outputCols)) ||
-      (!isSet(inputCol) && !isSet(outputCol) && isSet(inputCols) && isSet(outputCols)),
-      "QuantileDiscretizer only supports setting either inputCol/outputCol or" +
-        "inputCols/outputCols."
-    )
+  @Since("1.6.0")
+  override def transformSchema(schema: StructType): StructType = {
+    ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol),
+      Seq(outputCols))
 
     if (isSet(inputCol)) {
-      (Array($(inputCol)), Array($(outputCol)))
+      require(!isSet(numBucketsArray),
+        s"numBucketsArray can't be set for single-column QuantileDiscretizer.")
+    }
+
+    if (isSet(inputCols)) {
+      require(getInputCols.length == getOutputCols.length,
+        s"QuantileDiscretizer $this has mismatched Params " +
+          s"for multi-column transform. Params (inputCols, outputCols) should have " +
+          s"equal lengths, but they have different lengths: " +
+          s"(${getInputCols.length}, ${getOutputCols.length}).")
+      if (isSet(numBucketsArray)) {
+        require(getInputCols.length == getNumBucketsArray.length,
+          s"QuantileDiscretizer $this has mismatched Params " +
+            s"for multi-column transform. Params (inputCols, outputCols, numBucketsArray) " +
+            s"should have equal lengths, but they have different lengths: " +
+            s"(${getInputCols.length}, ${getOutputCols.length}, ${getNumBucketsArray.length}).")
+        require(!isSet(numBuckets),
+          s"exactly one of numBuckets, numBucketsArray Params to be set, but both are set." )
+      }
+    }
+
+    val (inputColNames, outputColNames) = if (isSet(inputCols)) {
+      ($(inputCols).toSeq, $(outputCols).toSeq)
     } else {
-      require($(inputCols).length == $(outputCols).length,
-        "inputCols number do not match outputCols")
-      ($(inputCols), $(outputCols))
+      (Seq($(inputCol)), Seq($(outputCol)))
     }
-  }
 
-  @Since("1.6.0")
-  override def transformSchema(schema: StructType): StructType = {
-    val (inputColNames, outputColNames) = getInOutCols
-    val existingFields = schema.fields
-    var outputFields = existingFields
+    var outputFields = schema.fields
     inputColNames.zip(outputColNames).foreach { case (inputColName, outputColName) =>
       SchemaUtils.checkNumericType(schema, inputColName)
-      require(existingFields.forall(_.name != outputColName),
-        s"Output column ${outputColName} already exists.")
+      require(!schema.fieldNames.contains(outputColName),
+        s"Output column $outputColName already exists.")
       val attr = NominalAttribute.defaultAttr.withName(outputColName)
       outputFields :+= attr.toStructField()
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index 6c0d5fc70ab4e..7ccfafa4ac813 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -30,6 +30,7 @@ import org.apache.spark.ml.param.{BooleanParam, Param, ParamMap, ParamValidators
 import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasHandleInvalid, HasLabelCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.types._
 
 /**
@@ -214,8 +215,11 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
       col
     }
 
+    val terms = resolvedFormula.terms.flatten.distinct.sorted
+    lazy val firstRow = dataset.select(terms.map(col): _*).first()
+
     // First we index each string column referenced by the input terms.
-    val indexed: Map[String, String] = resolvedFormula.terms.flatten.distinct.map { term =>
+    val indexed = terms.zipWithIndex.map { case (term, i) =>
       dataset.schema(term).dataType match {
         case _: StringType =>
           val indexCol = tmpColumn("stridx")
@@ -229,7 +233,7 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
         case _: VectorUDT =>
           val group = AttributeGroup.fromStructField(dataset.schema(term))
           val size = if (group.size < 0) {
-            dataset.select(term).first().getAs[Vector](0).size
+            firstRow.getAs[Vector](i).size
           } else {
             group.size
           }
@@ -316,7 +320,10 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   override def copy(extra: ParamMap): RFormula = defaultCopy(extra)
 
   @Since("2.0.0")
-  override def toString: String = s"RFormula(${get(formula).getOrElse("")}) (uid=$uid)"
+  override def toString: String = {
+    s"RFormula: uid=$uid" +
+      get(formula).map(f => s", formula = $f").getOrElse("")
+  }
 }
 
 @Since("2.0.0")
@@ -372,7 +379,9 @@ class RFormulaModel private[feature](
   }
 
   @Since("2.0.0")
-  override def toString: String = s"RFormulaModel($resolvedFormula) (uid=$uid)"
+  override def toString: String = {
+    s"RFormulaModel: uid=$uid, resolvedFormula=$resolvedFormula"
+  }
 
   private def transformLabel(dataset: Dataset[_]): DataFrame = {
     val labelName = resolvedFormula.label
@@ -392,7 +401,7 @@ class RFormulaModel private[feature](
     }
   }
 
-  private def checkCanTransform(schema: StructType) {
+  private def checkCanTransform(schema: StructType): Unit = {
     val columnNames = schema.map(_.name)
     require(!columnNames.contains($(featuresCol)), "Features column already exists.")
     require(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
index 9dae39756d31e..2a1204a40bde0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
@@ -23,18 +23,20 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol, HasRelativeError}
 import org.apache.spark.ml.util._
 import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.util.QuantileSummaries
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.StructType
 
 /**
  * Params for [[RobustScaler]] and [[RobustScalerModel]].
  */
-private[feature] trait RobustScalerParams extends Params with HasInputCol with HasOutputCol {
+private[feature] trait RobustScalerParams extends Params with HasInputCol with HasOutputCol
+  with HasRelativeError {
 
   /**
    * Lower quantile to calculate quantile range, shared by all features
@@ -98,8 +100,7 @@ private[feature] trait RobustScalerParams extends Params with HasInputCol with H
     SchemaUtils.checkColumnType(schema, $(inputCol), new VectorUDT)
     require(!schema.fieldNames.contains($(outputCol)),
       s"Output column ${$(outputCol)} already exists.")
-    val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false)
-    StructType(outputFields)
+    SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT)
   }
 }
 
@@ -116,11 +117,14 @@ private[feature] trait RobustScalerParams extends Params with HasInputCol with H
  * Typically this is done by removing the mean and scaling to unit variance. However,
  * outliers can often influence the sample mean / variance in a negative way.
  * In such cases, the median and the quantile range often give better results.
+ * Note that NaN values are ignored in the computation of medians and ranges.
  */
 @Since("3.0.0")
 class RobustScaler (override val uid: String)
   extends Estimator[RobustScalerModel] with RobustScalerParams with DefaultParamsWritable {
 
+  import RobustScaler._
+
   def this() = this(Identifiable.randomUID("robustScal"))
 
   /** @group setParam */
@@ -141,50 +145,34 @@ class RobustScaler (override val uid: String)
   /** @group setParam */
   def setWithScaling(value: Boolean): this.type = set(withScaling, value)
 
+  /** @group expertSetParam */
+  def setRelativeError(value: Double): this.type = set(relativeError, value)
+
   override def fit(dataset: Dataset[_]): RobustScalerModel = {
     transformSchema(dataset.schema, logging = true)
 
-    val summaries = dataset.select($(inputCol)).rdd.map {
-      case Row(vec: Vector) => vec
-    }.mapPartitions { iter =>
-      var agg: Array[QuantileSummaries] = null
-      while (iter.hasNext) {
-        val vec = iter.next()
-        if (agg == null) {
-          agg = Array.fill(vec.size)(
-            new QuantileSummaries(QuantileSummaries.defaultCompressThreshold, 0.001))
-        }
-        require(vec.size == agg.length,
-          s"Number of dimensions must be ${agg.length} but got ${vec.size}")
-        var i = 0
-        while (i < vec.size) {
-          agg(i) = agg(i).insert(vec(i))
-          i += 1
-        }
-      }
-
-      if (agg == null) {
-        Iterator.empty
-      } else {
-        Iterator.single(agg.map(_.compress))
-      }
-    }.treeReduce { (agg1, agg2) =>
-      require(agg1.length == agg2.length)
-      var i = 0
-      while (i < agg1.length) {
-        agg1(i) = agg1(i).merge(agg2(i))
-        i += 1
-      }
-      agg1
+    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(inputCol))
+    val vectors = dataset.select($(inputCol)).rdd.map {
+      case Row(vec: Vector) =>
+        require(vec.size == numFeatures,
+          s"Number of dimensions must be $numFeatures but got ${vec.size}")
+        vec
     }
 
-    val (range, median) = summaries.map { s =>
-      (s.query($(upper)).get - s.query($(lower)).get,
-        s.query(0.5).get)
-    }.unzip
+    val localUpper = $(upper)
+    val localLower = $(lower)
+
+    val (ranges, medians) = computeSummaries(vectors, numFeatures, $(relativeError))
+      .mapValues { s =>
+        val range = s.query(localUpper).get - s.query(localLower).get
+        val median = s.query(0.5).get
+        (range, median)
+      }.collect().sortBy(_._1).map(_._2).unzip
+    require(ranges.length == numFeatures,
+      "QuantileSummaries on some features are missing")
 
-    copyValues(new RobustScalerModel(uid, Vectors.dense(range).compressed,
-      Vectors.dense(median).compressed).setParent(this))
+    copyValues(new RobustScalerModel(uid, Vectors.dense(ranges).compressed,
+      Vectors.dense(medians).compressed).setParent(this))
   }
 
   override def transformSchema(schema: StructType): StructType = {
@@ -197,6 +185,39 @@ class RobustScaler (override val uid: String)
 @Since("3.0.0")
 object RobustScaler extends DefaultParamsReadable[RobustScaler] {
 
+  // compute QuantileSummaries for each feature
+  private[spark] def computeSummaries(
+      vectors: RDD[Vector],
+      numFeatures: Int,
+      relativeError: Double): RDD[(Int, QuantileSummaries)] = {
+    if (numFeatures <= 1000) {
+      vectors.mapPartitions { iter =>
+        if (iter.hasNext) {
+          val summaries = Array.fill(numFeatures)(
+            new QuantileSummaries(QuantileSummaries.defaultCompressThreshold, relativeError))
+          while (iter.hasNext) {
+            val vec = iter.next
+            vec.foreach { (i, v) => if (!v.isNaN) summaries(i) = summaries(i).insert(v) }
+          }
+          Iterator.tabulate(numFeatures)(i => (i, summaries(i).compress))
+        } else Iterator.empty
+      }.reduceByKey { case (s1, s2) => s1.merge(s2) }
+    } else {
+      val scale = math.max(math.ceil(math.sqrt(vectors.getNumPartitions)).toInt, 2)
+      vectors.mapPartitionsWithIndex { case (pid, iter) =>
+        val p = pid % scale
+        iter.flatMap { vec =>
+          Iterator.tabulate(numFeatures)(i => ((p, i), vec(i)))
+        }.filter(!_._2.isNaN)
+      }.aggregateByKey(
+        new QuantileSummaries(QuantileSummaries.defaultCompressThreshold, relativeError))(
+        seqOp = (s, v) => s.insert(v),
+        combOp = (s1, s2) => s1.compress.merge(s2.compress)
+      ).map { case ((_, i), s) => (i, s)
+      }.reduceByKey { case (s1, s2) => s1.compress.merge(s2.compress) }
+    }
+  }
+
   override def load(path: String): RobustScaler = super.load(path)
 }
 
@@ -222,7 +243,7 @@ class RobustScalerModel private[ml] (
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val shift = if ($(withCentering)) median.toArray else Array.emptyDoubleArray
     val scale = if ($(withScaling)) {
@@ -233,11 +254,17 @@ class RobustScalerModel private[ml] (
       shift, scale, $(withCentering), $(withScaling))
     val transformer = udf(func)
 
-    dataset.withColumn($(outputCol), transformer(col($(inputCol))))
+    dataset.withColumn($(outputCol), transformer(col($(inputCol))),
+      outputSchema($(outputCol)).metadata)
   }
 
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(outputCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(outputCol), median.size)
+    }
+    outputSchema
   }
 
   override def copy(extra: ParamMap): RobustScalerModel = {
@@ -246,6 +273,12 @@ class RobustScalerModel private[ml] (
   }
 
   override def write: MLWriter = new RobustScalerModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"RobustScalerModel: uid=$uid, numFeatures=${median.size}, " +
+      s"withCentering=${$(withCentering)}, withScaling=${$(withScaling)}"
+  }
 }
 
 @Since("3.0.0")
@@ -258,7 +291,7 @@ object RobustScalerModel extends MLReadable[RobustScalerModel] {
 
     override protected def saveImpl(path: String): Unit = {
       DefaultParamsWriter.saveMetadata(instance, path, sc)
-      val data = new Data(instance.range, instance.median)
+      val data = Data(instance.range, instance.median)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
index 0fb1d8c5dc579..b7c51c580c285 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
@@ -90,6 +90,12 @@ class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid: String)
 
   @Since("1.6.0")
   override def copy(extra: ParamMap): SQLTransformer = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"SQLTransformer: uid=$uid" +
+      get(statement).map(i => s", statement=$i").getOrElse("")
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 01be781ec5aad..c6b1b29a6d9bc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -24,10 +24,8 @@ import org.apache.spark.ml._
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.stat.Summarizer
 import org.apache.spark.ml.util._
-import org.apache.spark.mllib.feature.{StandardScaler => OldStandardScaler}
-import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
-import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
@@ -109,13 +107,13 @@ class StandardScaler @Since("1.4.0") (
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): StandardScalerModel = {
     transformSchema(dataset.schema, logging = true)
-    val input = dataset.select($(inputCol)).rdd.map {
-      case Row(v: Vector) => OldVectors.fromML(v)
-    }
-    val scaler = new OldStandardScaler(withMean = $(withMean), withStd = $(withStd))
-    val scalerModel = scaler.fit(input)
-    copyValues(new StandardScalerModel(uid, scalerModel.std.compressed,
-      scalerModel.mean.compressed).setParent(this))
+
+    val Row(mean: Vector, std: Vector) = dataset
+      .select(Summarizer.metrics("mean", "std").summary(col($(inputCol))).as("summary"))
+      .select("summary.mean", "summary.std")
+      .first()
+
+    copyValues(new StandardScalerModel(uid, std.compressed, mean.compressed).setParent(this))
   }
 
   @Since("1.4.0")
@@ -159,7 +157,7 @@ class StandardScalerModel private[ml] (
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
     val shift = if ($(withMean)) mean.toArray else Array.emptyDoubleArray
     val scale = if ($(withStd)) {
       std.toArray.map { v => if (v == 0) 0.0 else 1.0 / v }
@@ -168,12 +166,18 @@ class StandardScalerModel private[ml] (
     val func = getTransformFunc(shift, scale, $(withMean), $(withStd))
     val transformer = udf(func)
 
-    dataset.withColumn($(outputCol), transformer(col($(inputCol))))
+    dataset.withColumn($(outputCol), transformer(col($(inputCol))),
+      outputSchema($(outputCol)).metadata)
   }
 
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(outputCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(outputCol), mean.size)
+    }
+    outputSchema
   }
 
   @Since("1.4.1")
@@ -184,6 +188,12 @@ class StandardScalerModel private[ml] (
 
   @Since("1.6.0")
   override def write: MLWriter = new StandardScalerModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"StandardScalerModel: uid=$uid, numFeatures=${mean.size}, withMean=${$(withMean)}, " +
+      s"withStd=${$(withStd)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index f95e03ae6c822..b6ed4f2b000cc 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -22,15 +22,19 @@ import java.util.Locale
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.param._
-import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
+import org.apache.spark.ml.param.shared.{HasInputCol, HasInputCols, HasOutputCol, HasOutputCols}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions.{col, udf}
-import org.apache.spark.sql.types.{ArrayType, StringType, StructType}
+import org.apache.spark.sql.types.{ArrayType, StringType, StructField, StructType}
 
 /**
  * A feature transformer that filters out stop words from input.
  *
+ * Since 3.0.0, `StopWordsRemover` can filter out multiple columns at once by setting the
+ * `inputCols` parameter. Note that when both the `inputCol` and `inputCols` parameters are set,
+ * an Exception will be thrown.
+ *
  * @note null values from input array are preserved unless adding null to stopWords
  * explicitly.
  *
@@ -38,7 +42,8 @@ import org.apache.spark.sql.types.{ArrayType, StringType, StructType}
  */
 @Since("1.5.0")
 class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String)
-  extends Transformer with HasInputCol with HasOutputCol with DefaultParamsWritable {
+  extends Transformer with HasInputCol with HasOutputCol with HasInputCols with HasOutputCols
+    with DefaultParamsWritable {
 
   @Since("1.5.0")
   def this() = this(Identifiable.randomUID("stopWords"))
@@ -51,6 +56,14 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String
   @Since("1.5.0")
   def setOutputCol(value: String): this.type = set(outputCol, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setInputCols(value: Array[String]): this.type = set(inputCols, value)
+
+  /** @group setParam */
+  @Since("3.0.0")
+  def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
+
   /**
    * The words to be filtered out.
    * Default: English stop words
@@ -121,6 +134,15 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String
     }
   }
 
+  /** Returns the input and output column names corresponding in pair. */
+  private[feature] def getInOutCols(): (Array[String], Array[String]) = {
+    if (isSet(inputCol)) {
+      (Array($(inputCol)), Array($(outputCol)))
+    } else {
+      ($(inputCols), $(outputCols))
+    }
+  }
+
   setDefault(stopWords -> StopWordsRemover.loadDefaultStopWords("english"),
     caseSensitive -> false, locale -> getDefaultOrUS.toString)
 
@@ -142,20 +164,48 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String
         terms.filter(s => !lowerStopWords.contains(toLower(s)))
       }
     }
-    val metadata = outputSchema($(outputCol)).metadata
-    dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata))
+
+    val (inputColNames, outputColNames) = getInOutCols()
+    val ouputCols = inputColNames.map { inputColName =>
+      t(col(inputColName))
+    }
+    val ouputMetadata = outputColNames.map(outputSchema(_).metadata)
+    dataset.withColumns(outputColNames, ouputCols, ouputMetadata)
   }
 
   @Since("1.5.0")
   override def transformSchema(schema: StructType): StructType = {
-    val inputType = schema($(inputCol)).dataType
-    require(inputType.sameType(ArrayType(StringType)), "Input type must be " +
-      s"${ArrayType(StringType).catalogString} but got ${inputType.catalogString}.")
-    SchemaUtils.appendColumn(schema, $(outputCol), inputType, schema($(inputCol)).nullable)
+    ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol),
+      Seq(outputCols))
+
+    if (isSet(inputCols)) {
+      require(getInputCols.length == getOutputCols.length,
+        s"StopWordsRemover $this has mismatched Params " +
+          s"for multi-column transform. Params ($inputCols, $outputCols) should have " +
+          "equal lengths, but they have different lengths: " +
+          s"(${getInputCols.length}, ${getOutputCols.length}).")
+    }
+
+    val (inputColNames, outputColNames) = getInOutCols()
+    val newCols = inputColNames.zip(outputColNames).map { case (inputColName, outputColName) =>
+       require(!schema.fieldNames.contains(outputColName),
+        s"Output Column $outputColName already exists.")
+      val inputType = schema(inputColName).dataType
+      require(inputType.sameType(ArrayType(StringType)), "Input type must be " +
+        s"${ArrayType(StringType).catalogString} but got ${inputType.catalogString}.")
+      StructField(outputColName, inputType, schema(inputColName).nullable)
+    }
+    StructType(schema.fields ++ newCols)
   }
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): StopWordsRemover = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"StopWordsRemover: uid=$uid, numStopWords=${$(stopWords).length}, locale=${$(locale)}, " +
+      s"caseSensitive=${$(caseSensitive)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index eab90a32a3f4b..9f9f097a26ead 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -31,7 +31,6 @@ import org.apache.spark.sql.catalyst.expressions.{If, Literal}
 import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
-import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.ThreadUtils
 import org.apache.spark.util.VersionUtils.majorMinorVersion
 import org.apache.spark.util.collection.OpenHashMap
@@ -413,7 +412,7 @@ class StringIndexerModel (
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
 
-    var (inputColNames, outputColNames) = getInOutCols()
+    val (inputColNames, outputColNames) = getInOutCols()
     val outputColumns = new Array[Column](outputColNames.length)
 
     // Skips invalid rows if `handleInvalid` is set to `StringIndexer.SKIP_INVALID`.
@@ -474,6 +473,14 @@ class StringIndexerModel (
 
   @Since("1.6.0")
   override def write: StringIndexModelWriter = new StringIndexModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"StringIndexerModel: uid=$uid, handleInvalid=${$(handleInvalid)}" +
+      get(stringOrderType).map(t => s", stringOrderType=$t").getOrElse("") +
+      get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("") +
+      get(outputCols).map(c => s", numOutputCols=${c.length}").getOrElse("")
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
index 748c869af4117..04cd02fa0e277 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala
@@ -158,6 +158,12 @@ class RegexTokenizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
 
   @Since("1.4.1")
   override def copy(extra: ParamMap): RegexTokenizer = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"RegexTokenizer: uid=$uid, minTokenLength=${$(minTokenLength)}, gaps=${$(gaps)}, " +
+      s"pattern=${$(pattern)}, toLowercase=${$(toLowercase)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index e6e9bdfd292b1..51acbf3806b9d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -175,6 +175,12 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
 
   @Since("1.4.1")
   override def copy(extra: ParamMap): VectorAssembler = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"VectorAssembler: uid=$uid, handleInvalid=${$(handleInvalid)}" +
+      get(inputCols).map(c => s", numInputCols=${c.length}").getOrElse("")
+  }
 }
 
 @Since("1.6.0")
@@ -265,11 +271,9 @@ object VectorAssembler extends DefaultParamsReadable[VectorAssembler] {
         inputColumnIndex += 1
         featureIndex += 1
       case vec: Vector =>
-        vec.foreachActive { case (i, v) =>
-          if (v != 0.0) {
-            indices += featureIndex + i
-            values += v
-          }
+        vec.foreachNonZero { case (i, v) =>
+          indices += featureIndex + i
+          values += v
         }
         inputColumnIndex += 1
         featureIndex += vec.size
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 0e7396a621dbd..866074fb1453e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -142,16 +142,14 @@ class VectorIndexer @Since("1.4.0") (
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): VectorIndexerModel = {
     transformSchema(dataset.schema, logging = true)
-    val firstRow = dataset.select($(inputCol)).take(1)
-    require(firstRow.length == 1, s"VectorIndexer cannot be fit on an empty dataset.")
-    val numFeatures = firstRow(0).getAs[Vector](0).size
+    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(inputCol))
     val vectorDataset = dataset.select($(inputCol)).rdd.map { case Row(v: Vector) => v }
     val maxCats = $(maxCategories)
     val categoryStats: VectorIndexer.CategoryStats = vectorDataset.mapPartitions { iter =>
       val localCatStats = new VectorIndexer.CategoryStats(numFeatures, maxCats)
       iter.foreach(localCatStats.addVector)
       Iterator(localCatStats)
-    }.reduce((stats1, stats2) => stats1.merge(stats2))
+    }.treeReduce((stats1, stats2) => stats1.merge(stats2))
     val model = new VectorIndexerModel(uid, numFeatures, categoryStats.getCategoryMaps)
       .setParent(this)
     copyValues(model)
@@ -428,7 +426,7 @@ class VectorIndexerModel private[ml] (
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
     val newField = prepOutputField(dataset.schema)
-    val transformUDF = udf { (vector: Vector) => transformFunc(vector) }
+    val transformUDF = udf { vector: Vector => transformFunc(vector) }
     val newCol = transformUDF(dataset($(inputCol)))
     val ds = dataset.withColumn($(outputCol), newCol, newField.metadata)
     if (getHandleInvalid == VectorIndexer.SKIP_INVALID) {
@@ -506,6 +504,11 @@ class VectorIndexerModel private[ml] (
 
   @Since("1.6.0")
   override def write: MLWriter = new VectorIndexerModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"VectorIndexerModel: uid=$uid, numFeatures=$numFeatures, handleInvalid=${$(handleInvalid)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
index 5d787f263a12f..2cf440efae850 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
@@ -176,6 +176,12 @@ class VectorSizeHint @Since("2.3.0") (@Since("2.3.0") override val uid: String)
 
   @Since("2.3.0")
   override def copy(extra: ParamMap): this.type = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"VectorSizeHint: uid=$uid, handleInvalid=${$(handleInvalid)}" +
+      get(size).map(i => s", size=$i").getOrElse("")
+  }
 }
 
 @Since("2.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
index e3e462d07e10c..be91844ba39e6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
@@ -57,7 +57,7 @@ final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: Stri
     "An array of indices to select features from a vector column." +
       " There can be no overlap with names.", VectorSlicer.validIndices)
 
-  setDefault(indices -> Array.empty[Int])
+  setDefault(indices -> Array.emptyIntArray)
 
   /** @group getParam */
   @Since("1.5.0")
@@ -153,12 +153,17 @@ final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: Stri
     }
     val numFeaturesSelected = $(indices).length + $(names).length
     val outputAttr = new AttributeGroup($(outputCol), numFeaturesSelected)
-    val outputFields = schema.fields :+ outputAttr.toStructField()
-    StructType(outputFields)
+    SchemaUtils.appendColumn(schema, outputAttr.toStructField)
   }
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): VectorSlicer = defaultCopy(extra)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"VectorSlicer: uid=$uid" +
+      get(indices).map(i => s", numSelectedFeatures=${i.length}").getOrElse("")
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 6ae90b8050932..bbfcbfbe038ef 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -288,15 +288,16 @@ class Word2VecModel private[ml] (
    */
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
     val vectors = wordVectors.getVectors
       .mapValues(vv => Vectors.dense(vv.map(_.toDouble)))
       .map(identity) // mapValues doesn't return a serializable map (SI-7005)
     val bVectors = dataset.sparkSession.sparkContext.broadcast(vectors)
     val d = $(vectorSize)
+    val emptyVec = Vectors.sparse(d, Array.emptyIntArray, Array.emptyDoubleArray)
     val word2Vec = udf { sentence: Seq[String] =>
       if (sentence.isEmpty) {
-        Vectors.sparse(d, Array.empty[Int], Array.empty[Double])
+        emptyVec
       } else {
         val sum = Vectors.zeros(d)
         sentence.foreach { word =>
@@ -308,12 +309,18 @@ class Word2VecModel private[ml] (
         sum
       }
     }
-    dataset.withColumn($(outputCol), word2Vec(col($(inputCol))))
+    dataset.withColumn($(outputCol), word2Vec(col($(inputCol))),
+      outputSchema($(outputCol)).metadata)
   }
 
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema)
+    var outputSchema = validateAndTransformSchema(schema)
+    if ($(outputCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(outputCol), $(vectorSize))
+    }
+    outputSchema
   }
 
   @Since("1.4.1")
@@ -324,6 +331,12 @@ class Word2VecModel private[ml] (
 
   @Since("1.6.0")
   override def write: MLWriter = new Word2VecModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"Word2VecModel: uid=$uid, numWords=${wordVectors.wordIndex.size}, " +
+      s"vectorSize=${$(vectorSize)}"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index e1c9b927a28c8..4d001c159eda0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -286,7 +286,7 @@ class FPGrowthModel private[ml] (
 
     val dt = dataset.schema($(itemsCol)).dataType
     // For each rule, examine the input items and summarize the consequents
-    val predictUDF = udf((items: Seq[_]) => {
+    val predictUDF = udf((items: Seq[Any]) => {
       if (items != null) {
         val itemset = items.toSet
         brRules.value.filter(_._1.forall(itemset.contains))
@@ -310,6 +310,11 @@ class FPGrowthModel private[ml] (
 
   @Since("2.2.0")
   override def write: MLWriter = new FPGrowthModel.FPGrowthModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"FPGrowthModel: uid=$uid, numTrainingRecords=$numTrainingRecords"
+  }
 }
 
 @Since("2.2.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala
new file mode 100644
index 0000000000000..0f03231079866
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.annotation.Since
+import org.apache.spark.ml.linalg.{SparseVector, Vector}
+import org.apache.spark.mllib.linalg.{Vector => OldVector}
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.functions.udf
+
+// scalastyle:off
+@Since("3.0.0")
+object functions {
+// scalastyle:on
+  private val vectorToArrayUdf = udf { vec: Any =>
+    vec match {
+      case v: Vector => v.toArray
+      case v: OldVector => v.toArray
+      case v => throw new IllegalArgumentException(
+        "function vector_to_array requires a non-null input argument and input type must be " +
+        "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " +
+        s"but got ${ if (v == null) "null" else v.getClass.getName }.")
+    }
+  }.asNonNullable()
+
+  private val vectorToArrayFloatUdf = udf { vec: Any =>
+    vec match {
+      case v: SparseVector =>
+        val data = new Array[Float](v.size)
+        v.foreachActive { (index, value) => data(index) = value.toFloat }
+        data
+      case v: Vector => v.toArray.map(_.toFloat)
+      case v: OldVector => v.toArray.map(_.toFloat)
+      case v => throw new IllegalArgumentException(
+        "function vector_to_array requires a non-null input argument and input type must be " +
+        "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " +
+        s"but got ${ if (v == null) "null" else v.getClass.getName }.")
+    }
+  }.asNonNullable()
+
+  /**
+   * Converts a column of MLlib sparse/dense vectors into a column of dense arrays.
+   * @param v: the column of MLlib sparse/dense vectors
+   * @param dtype: the desired underlying data type in the returned array
+   * @return an array&lt;float&gt; if dtype is float32, or array&lt;double&gt; if dtype is float64
+   * @since 3.0.0
+   */
+  def vector_to_array(v: Column, dtype: String = "float64"): Column = {
+    if (dtype == "float64") {
+      vectorToArrayUdf(v)
+    } else if (dtype == "float32") {
+      vectorToArrayFloatUdf(v)
+    } else {
+      throw new IllegalArgumentException(
+        s"Unsupported dtype: $dtype. Valid values: float64, float32."
+      )
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
index 572b8cf0051b3..c2fe001d4048d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
@@ -66,7 +66,8 @@ private[ml] class IterativelyReweightedLeastSquares(
   def fit(
       instances: RDD[OffsetInstance],
       instr: OptionalInstrumentation = OptionalInstrumentation.create(
-        classOf[IterativelyReweightedLeastSquares])): IterativelyReweightedLeastSquaresModel = {
+        classOf[IterativelyReweightedLeastSquares]),
+      depth: Int = 2): IterativelyReweightedLeastSquaresModel = {
 
     var converged = false
     var iter = 0
@@ -87,7 +88,7 @@ private[ml] class IterativelyReweightedLeastSquares(
       // Estimate new model
       model = new WeightedLeastSquares(fitIntercept, regParam, elasticNetParam = 0.0,
         standardizeFeatures = false, standardizeLabel = false)
-        .fit(newInstances, instr = instr)
+        .fit(newInstances, instr = instr, depth = depth)
 
       // Check convergence
       val oldCoefficients = oldModel.coefficients
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
index 9f32603b19a6d..9acc20b8eb2e8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
@@ -96,13 +96,15 @@ private[ml] class WeightedLeastSquares(
    */
   def fit(
       instances: RDD[Instance],
-      instr: OptionalInstrumentation = OptionalInstrumentation.create(classOf[WeightedLeastSquares])
+      instr: OptionalInstrumentation = OptionalInstrumentation.create(
+        classOf[WeightedLeastSquares]),
+      depth: Int = 2
     ): WeightedLeastSquaresModel = {
     if (regParam == 0.0) {
       instr.logWarning("regParam is zero, which might cause numerical instability and overfitting.")
     }
 
-    val summary = instances.treeAggregate(new Aggregator)(_.add(_), _.merge(_))
+    val summary = instances.treeAggregate(new Aggregator)(_.add(_), _.merge(_), depth)
     summary.validate()
     instr.logInfo(s"Number of instances: ${summary.count}.")
     val k = if (fitIntercept) summary.k + 1 else summary.k
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
index 0300500a34ec0..b0906f1b06511 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
@@ -68,8 +68,8 @@ private[ml] class HingeAggregator(
 
       val dotProduct = {
         var sum = 0.0
-        features.foreachActive { (index, value) =>
-          if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+        features.foreachNonZero { (index, value) =>
+          if (localFeaturesStd(index) != 0.0) {
             sum += localCoefficients(index) * value / localFeaturesStd(index)
           }
         }
@@ -87,8 +87,8 @@ private[ml] class HingeAggregator(
 
       if (1.0 > labelScaled * dotProduct) {
         val gradientScale = -labelScaled * weight
-        features.foreachActive { (index, value) =>
-          if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+        features.foreachNonZero { (index, value) =>
+          if (localFeaturesStd(index) != 0.0) {
             localGradientSumArray(index) += value * gradientScale / localFeaturesStd(index)
           }
         }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
index fc4c423a60b2a..8a1a41b2950c1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
@@ -104,8 +104,8 @@ private[ml] class HuberAggregator(
 
       val margin = {
         var sum = 0.0
-        features.foreachActive { (index, value) =>
-          if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+        features.foreachNonZero { (index, value) =>
+          if (localFeaturesStd(index) != 0.0) {
             sum += localCoefficients(index) * (value / localFeaturesStd(index))
           }
         }
@@ -118,8 +118,8 @@ private[ml] class HuberAggregator(
         lossSum += 0.5 * weight * (sigma + math.pow(linearLoss, 2.0) / sigma)
         val linearLossDivSigma = linearLoss / sigma
 
-        features.foreachActive { (index, value) =>
-          if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+        features.foreachNonZero { (index, value) =>
+          if (localFeaturesStd(index) != 0.0) {
             localGradientSumArray(index) +=
               -1.0 * weight * linearLossDivSigma * (value / localFeaturesStd(index))
           }
@@ -133,8 +133,8 @@ private[ml] class HuberAggregator(
         lossSum += 0.5 * weight *
           (sigma + 2.0 * epsilon * math.abs(linearLoss) - sigma * epsilon * epsilon)
 
-        features.foreachActive { (index, value) =>
-          if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+        features.foreachNonZero { (index, value) =>
+          if (localFeaturesStd(index) != 0.0) {
             localGradientSumArray(index) +=
               weight * sign * epsilon * (value / localFeaturesStd(index))
           }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
index 1994b0e40e520..7a5806dc24aee 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
@@ -209,9 +209,9 @@ private[ml] class LeastSquaresAggregator(
       if (diff != 0) {
         val localGradientSumArray = gradientSumArray
         val localFeaturesStd = featuresStd
-        features.foreachActive { (index, value) =>
+        features.foreachNonZero { (index, value) =>
           val fStd = localFeaturesStd(index)
-          if (fStd != 0.0 && value != 0.0) {
+          if (fStd != 0.0) {
             localGradientSumArray(index) += weight * diff * value / fStd
           }
         }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
index 272d36dd94ae8..f2b3566f8f09e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
@@ -224,8 +224,8 @@ private[ml] class LogisticAggregator(
     val localGradientArray = gradientSumArray
     val margin = - {
       var sum = 0.0
-      features.foreachActive { (index, value) =>
-        if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+      features.foreachNonZero { (index, value) =>
+        if (localFeaturesStd(index) != 0.0) {
           sum += localCoefficients(index) * value / localFeaturesStd(index)
         }
       }
@@ -235,8 +235,8 @@ private[ml] class LogisticAggregator(
 
     val multiplier = weight * (1.0 / (1.0 + math.exp(margin)) - label)
 
-    features.foreachActive { (index, value) =>
-      if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+    features.foreachNonZero { (index, value) =>
+      if (localFeaturesStd(index) != 0.0) {
         localGradientArray(index) += multiplier * value / localFeaturesStd(index)
       }
     }
@@ -269,8 +269,8 @@ private[ml] class LogisticAggregator(
     var maxMargin = Double.NegativeInfinity
 
     val margins = new Array[Double](numClasses)
-    features.foreachActive { (index, value) =>
-      if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+    features.foreachNonZero { (index, value) =>
+      if (localFeaturesStd(index) != 0.0) {
         val stdValue = value / localFeaturesStd(index)
         var j = 0
         while (j < numClasses) {
@@ -313,8 +313,8 @@ private[ml] class LogisticAggregator(
     margins.indices.foreach { i =>
       multipliers(i) = multipliers(i) / sum - (if (label == i) 1.0 else 0.0)
     }
-    features.foreachActive { (index, value) =>
-      if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+    features.foreachNonZero { (index, value) =>
+      if (localFeaturesStd(index) != 0.0) {
         val stdValue = value / localFeaturesStd(index)
         var j = 0
         while (j < numClasses) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 1afcf1bf7ba4b..6194dfacf01fc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -58,11 +58,13 @@ private[shared] object SharedParamsCodeGen {
         " The class with largest value p/t is predicted, where p is the original probability" +
         " of that class and t is the class's threshold",
         isValid = "(t: Array[Double]) => t.forall(_ >= 0) && t.count(_ == 0) <= 1",
-        finalMethods = false),
+        finalMethods = false, finalFields = false),
       ParamDesc[String]("inputCol", "input column name"),
       ParamDesc[Array[String]]("inputCols", "input column names"),
       ParamDesc[String]("outputCol", "output column name", Some("uid + \"__output\"")),
       ParamDesc[Array[String]]("outputCols", "output column names"),
+      ParamDesc[Int]("numFeatures", "Number of features. Should be greater than 0",
+        Some("262144"), isValid = "ParamValidators.gt(0)"),
       ParamDesc[Int]("checkpointInterval", "set checkpoint interval (>= 1) or " +
         "disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed " +
         "every 10 iterations. Note: this setting will be ignored if the checkpoint directory " +
@@ -81,6 +83,9 @@ private[shared] object SharedParamsCodeGen {
         isValid = "ParamValidators.inRange(0, 1)"),
       ParamDesc[Double]("tol", "the convergence tolerance for iterative algorithms (>= 0)",
         isValid = "ParamValidators.gtEq(0)"),
+      ParamDesc[Double]("relativeError", "the relative target precision for the approximate " +
+        "quantile algorithm. Must be in the range [0, 1]",
+        Some("0.001"), isValid = "ParamValidators.inRange(0, 1)", isExpertParam = true),
       ParamDesc[Double]("stepSize", "Step size to be used for each iteration of optimization (>" +
         " 0)", isValid = "ParamValidators.gt(0)", finalFields = false),
       ParamDesc[String]("weightCol", "weight column name. If this is not set or empty, we treat " +
@@ -95,12 +100,15 @@ private[shared] object SharedParamsCodeGen {
         Some("false"), isExpertParam = true),
       ParamDesc[String]("loss", "the loss function to be optimized", finalFields = false),
       ParamDesc[String]("distanceMeasure", "The distance measure. Supported options: 'euclidean'" +
-        " and 'cosine'", Some("org.apache.spark.mllib.clustering.DistanceMeasure.EUCLIDEAN"),
-        isValid = "(value: String) => " +
-        "org.apache.spark.mllib.clustering.DistanceMeasure.validateDistanceMeasure(value)"),
+        " and 'cosine'", Some("\"euclidean\""),
+        isValid = "ParamValidators.inArray(Array(\"euclidean\", \"cosine\"))"),
       ParamDesc[String]("validationIndicatorCol", "name of the column that indicates whether " +
         "each row is for training or for validation. False indicates training; true indicates " +
-        "validation.")
+        "validation."),
+      ParamDesc[Int]("blockSize", "block size for stacking input data in matrices. Data is " +
+        "stacked within partitions. If block size is more than remaining data in a partition " +
+        "then it is adjusted to the size of this data.",
+        isValid = "ParamValidators.gt(0)", isExpertParam = true)
     )
 
     val code = genSharedParams(params)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 5928a0749f738..0c0d2b5981dbe 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -198,7 +198,7 @@ trait HasThresholds extends Params {
    * Param for Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values &gt; 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold.
    * @group param
    */
-  final val thresholds: DoubleArrayParam = new DoubleArrayParam(this, "thresholds", "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold", (t: Array[Double]) => t.forall(_ >= 0) && t.count(_ == 0) <= 1)
+  val thresholds: DoubleArrayParam = new DoubleArrayParam(this, "thresholds", "Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold", (t: Array[Double]) => t.forall(_ >= 0) && t.count(_ == 0) <= 1)
 
   /** @group getParam */
   def getThresholds: Array[Double] = $(thresholds)
@@ -274,6 +274,25 @@ trait HasOutputCols extends Params {
   final def getOutputCols: Array[String] = $(outputCols)
 }
 
+/**
+ * Trait for shared param numFeatures (default: 262144). This trait may be changed or
+ * removed between minor versions.
+ */
+@DeveloperApi
+trait HasNumFeatures extends Params {
+
+  /**
+   * Param for Number of features. Should be greater than 0.
+   * @group param
+   */
+  final val numFeatures: IntParam = new IntParam(this, "numFeatures", "Number of features. Should be greater than 0", ParamValidators.gt(0))
+
+  setDefault(numFeatures, 262144)
+
+  /** @group getParam */
+  final def getNumFeatures: Int = $(numFeatures)
+}
+
 /**
  * Trait for shared param checkpointInterval. This trait may be changed or
  * removed between minor versions.
@@ -399,6 +418,25 @@ trait HasTol extends Params {
   final def getTol: Double = $(tol)
 }
 
+/**
+ * Trait for shared param relativeError (default: 0.001). This trait may be changed or
+ * removed between minor versions.
+ */
+@DeveloperApi
+trait HasRelativeError extends Params {
+
+  /**
+   * Param for the relative target precision for the approximate quantile algorithm. Must be in the range [0, 1].
+   * @group expertParam
+   */
+  final val relativeError: DoubleParam = new DoubleParam(this, "relativeError", "the relative target precision for the approximate quantile algorithm. Must be in the range [0, 1]", ParamValidators.inRange(0, 1))
+
+  setDefault(relativeError, 0.001)
+
+  /** @group expertGetParam */
+  final def getRelativeError: Double = $(relativeError)
+}
+
 /**
  * Trait for shared param stepSize. This trait may be changed or
  * removed between minor versions.
@@ -506,7 +544,7 @@ trait HasLoss extends Params {
 }
 
 /**
- * Trait for shared param distanceMeasure (default: org.apache.spark.mllib.clustering.DistanceMeasure.EUCLIDEAN). This trait may be changed or
+ * Trait for shared param distanceMeasure (default: "euclidean"). This trait may be changed or
  * removed between minor versions.
  */
 @DeveloperApi
@@ -516,9 +554,9 @@ trait HasDistanceMeasure extends Params {
    * Param for The distance measure. Supported options: 'euclidean' and 'cosine'.
    * @group param
    */
-  final val distanceMeasure: Param[String] = new Param[String](this, "distanceMeasure", "The distance measure. Supported options: 'euclidean' and 'cosine'", (value: String) => org.apache.spark.mllib.clustering.DistanceMeasure.validateDistanceMeasure(value))
+  final val distanceMeasure: Param[String] = new Param[String](this, "distanceMeasure", "The distance measure. Supported options: 'euclidean' and 'cosine'", ParamValidators.inArray(Array("euclidean", "cosine")))
 
-  setDefault(distanceMeasure, org.apache.spark.mllib.clustering.DistanceMeasure.EUCLIDEAN)
+  setDefault(distanceMeasure, "euclidean")
 
   /** @group getParam */
   final def getDistanceMeasure: String = $(distanceMeasure)
@@ -540,4 +578,21 @@ trait HasValidationIndicatorCol extends Params {
   /** @group getParam */
   final def getValidationIndicatorCol: String = $(validationIndicatorCol)
 }
+
+/**
+ * Trait for shared param blockSize. This trait may be changed or
+ * removed between minor versions.
+ */
+@DeveloperApi
+trait HasBlockSize extends Params {
+
+  /**
+   * Param for block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data..
+   * @group expertParam
+   */
+  final val blockSize: IntParam = new IntParam(this, "blockSize", "block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data.", ParamValidators.gt(0))
+
+  /** @group expertGetParam */
+  final def getBlockSize: Int = $(blockSize)
+}
 // scalastyle:on
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
index 62f642142701b..96c588acc1406 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/MultilayerPerceptronClassifierWrapper.scala
@@ -40,7 +40,7 @@ private[r] class MultilayerPerceptronClassifierWrapper private (
     pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel]
 
   lazy val weights: Array[Double] = mlpModel.weights.toArray
-  lazy val layers: Array[Int] = mlpModel.layers
+  lazy val layers: Array[Int] = mlpModel.getLayers
 
   def transform(dataset: Dataset[_]): DataFrame = {
     pipeline.transform(dataset)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
index 132345fb9a6d9..64c2d04ac7462 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestClassificationWrapper.scala
@@ -79,7 +79,8 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
       subsamplingRate: Double,
       maxMemoryInMB: Int,
       cacheNodeIds: Boolean,
-      handleInvalid: String): RandomForestClassifierWrapper = {
+      handleInvalid: String,
+      bootstrap: Boolean): RandomForestClassifierWrapper = {
 
     val rFormula = new RFormula()
       .setFormula(formula)
@@ -107,6 +108,7 @@ private[r] object RandomForestClassifierWrapper extends MLReadable[RandomForestC
       .setFeaturesCol(rFormula.getFeaturesCol)
       .setLabelCol(rFormula.getLabelCol)
       .setPredictionCol(PREDICTED_LABEL_INDEX_COL)
+      .setBootstrap(bootstrap)
     if (seed != null && seed.length > 0) rfc.setSeed(seed.toLong)
 
     val idxToStr = new IndexToString()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressionWrapper.scala
index 038bd79c7022b..60ecc24ae2745 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RandomForestRegressionWrapper.scala
@@ -69,7 +69,8 @@ private[r] object RandomForestRegressorWrapper extends MLReadable[RandomForestRe
       seed: String,
       subsamplingRate: Double,
       maxMemoryInMB: Int,
-      cacheNodeIds: Boolean): RandomForestRegressorWrapper = {
+      cacheNodeIds: Boolean,
+      bootstrap: Boolean): RandomForestRegressorWrapper = {
 
     val rFormula = new RFormula()
       .setFormula(formula)
@@ -96,6 +97,7 @@ private[r] object RandomForestRegressorWrapper extends MLReadable[RandomForestRe
       .setMaxMemoryInMB(maxMemoryInMB)
       .setCacheNodeIds(cacheNodeIds)
       .setFeaturesCol(rFormula.getFeaturesCol)
+      .setBootstrap(bootstrap)
     if (seed != null && seed.length > 0) rfr.setSeed(seed.toLong)
 
     val pipeline = new Pipeline()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index fb7334d41ba44..002146f89e79a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 import org.json4s.JsonDSL._
 
-import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext}
+import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext, SparkException}
 import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
@@ -42,7 +42,7 @@ import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.linalg.CholeskyDecomposition
 import org.apache.spark.mllib.optimization.NNLS
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{DeterministicLevel, RDD}
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
@@ -54,7 +54,8 @@ import org.apache.spark.util.random.XORShiftRandom
 /**
  * Common params for ALS and ALSModel.
  */
-private[recommendation] trait ALSModelParams extends Params with HasPredictionCol {
+private[recommendation] trait ALSModelParams extends Params with HasPredictionCol
+  with HasBlockSize {
   /**
    * Param for the column name for user ids. Ids must be integers. Other
    * numeric types are supported for this column, but will be cast to integers as long as they
@@ -125,13 +126,15 @@ private[recommendation] trait ALSModelParams extends Params with HasPredictionCo
 
   /** @group expertGetParam */
   def getColdStartStrategy: String = $(coldStartStrategy).toLowerCase(Locale.ROOT)
+
+  setDefault(blockSize -> 4096)
 }
 
 /**
  * Common params for ALS.
  */
 private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter with HasRegParam
-  with HasPredictionCol with HasCheckpointInterval with HasSeed {
+  with HasCheckpointInterval with HasSeed {
 
   /**
    * Param for rank of the matrix factorization (positive).
@@ -288,6 +291,15 @@ class ALSModel private[ml] (
   @Since("2.2.0")
   def setColdStartStrategy(value: String): this.type = set(coldStartStrategy, value)
 
+  /**
+   * Set block size for stacking input data in matrices.
+   * Default is 4096.
+   *
+   * @group expertSetParam
+   */
+  @Since("3.0.0")
+  def setBlockSize(value: Int): this.type = set(blockSize, value)
+
   private val predict = udf { (featuresA: Seq[Float], featuresB: Seq[Float]) =>
     if (featuresA != null && featuresB != null) {
       var dotProduct = 0.0f
@@ -338,6 +350,11 @@ class ALSModel private[ml] (
   @Since("1.6.0")
   override def write: MLWriter = new ALSModel.ALSModelWriter(this)
 
+  @Since("3.0.0")
+  override def toString: String = {
+    s"ALSModel: uid=$uid, rank=$rank"
+  }
+
   /**
    * Returns top `numItems` items recommended for each user, for all users.
    * @param numItems max number of recommendations for each user
@@ -346,7 +363,7 @@ class ALSModel private[ml] (
    */
   @Since("2.2.0")
   def recommendForAllUsers(numItems: Int): DataFrame = {
-    recommendForAll(userFactors, itemFactors, $(userCol), $(itemCol), numItems)
+    recommendForAll(userFactors, itemFactors, $(userCol), $(itemCol), numItems, $(blockSize))
   }
 
   /**
@@ -361,7 +378,7 @@ class ALSModel private[ml] (
   @Since("2.3.0")
   def recommendForUserSubset(dataset: Dataset[_], numItems: Int): DataFrame = {
     val srcFactorSubset = getSourceFactorSubset(dataset, userFactors, $(userCol))
-    recommendForAll(srcFactorSubset, itemFactors, $(userCol), $(itemCol), numItems)
+    recommendForAll(srcFactorSubset, itemFactors, $(userCol), $(itemCol), numItems, $(blockSize))
   }
 
   /**
@@ -372,7 +389,7 @@ class ALSModel private[ml] (
    */
   @Since("2.2.0")
   def recommendForAllItems(numUsers: Int): DataFrame = {
-    recommendForAll(itemFactors, userFactors, $(itemCol), $(userCol), numUsers)
+    recommendForAll(itemFactors, userFactors, $(itemCol), $(userCol), numUsers, $(blockSize))
   }
 
   /**
@@ -387,7 +404,7 @@ class ALSModel private[ml] (
   @Since("2.3.0")
   def recommendForItemSubset(dataset: Dataset[_], numUsers: Int): DataFrame = {
     val srcFactorSubset = getSourceFactorSubset(dataset, itemFactors, $(itemCol))
-    recommendForAll(srcFactorSubset, userFactors, $(itemCol), $(userCol), numUsers)
+    recommendForAll(srcFactorSubset, userFactors, $(itemCol), $(userCol), numUsers, $(blockSize))
   }
 
   /**
@@ -436,11 +453,12 @@ class ALSModel private[ml] (
       dstFactors: DataFrame,
       srcOutputColumn: String,
       dstOutputColumn: String,
-      num: Int): DataFrame = {
+      num: Int,
+      blockSize: Int): DataFrame = {
     import srcFactors.sparkSession.implicits._
 
-    val srcFactorsBlocked = blockify(srcFactors.as[(Int, Array[Float])])
-    val dstFactorsBlocked = blockify(dstFactors.as[(Int, Array[Float])])
+    val srcFactorsBlocked = blockify(srcFactors.as[(Int, Array[Float])], blockSize)
+    val dstFactorsBlocked = blockify(dstFactors.as[(Int, Array[Float])], blockSize)
     val ratings = srcFactorsBlocked.crossJoin(dstFactorsBlocked)
       .as[(Seq[(Int, Array[Float])], Seq[(Int, Array[Float])])]
       .flatMap { case (srcIter, dstIter) =>
@@ -478,11 +496,10 @@ class ALSModel private[ml] (
 
   /**
    * Blockifies factors to improve the efficiency of cross join
-   * TODO: SPARK-20443 - expose blockSize as a param?
    */
   private def blockify(
       factors: Dataset[(Int, Array[Float])],
-      blockSize: Int = 4096): Dataset[Seq[(Int, Array[Float])]] = {
+      blockSize: Int): Dataset[Seq[(Int, Array[Float])]] = {
     import factors.sparkSession.implicits._
     factors.mapPartitions(_.grouped(blockSize))
   }
@@ -564,6 +581,13 @@ object ALSModel extends MLReadable[ALSModel] {
  * r is greater than 0 and 0 if r is less than or equal to 0. The ratings then act as 'confidence'
  * values related to strength of indicated user
  * preferences rather than explicit ratings given to items.
+ *
+ * Note: the input rating dataset to the ALS implementation should be deterministic.
+ * Nondeterministic data can cause failure during fitting ALS model.
+ * For example, an order-sensitive operation like sampling after a repartition makes dataset
+ * output nondeterministic, like `dataset.repartition(2).sample(false, 0.5, 1618)`.
+ * Checkpointing sampled dataset or adding a sort before sampling can help make the dataset
+ * deterministic.
  */
 @Since("1.3.0")
 class ALS(@Since("1.4.0") override val uid: String) extends Estimator[ALSModel] with ALSParams
@@ -642,6 +666,15 @@ class ALS(@Since("1.4.0") override val uid: String) extends Estimator[ALSModel]
   @Since("2.2.0")
   def setColdStartStrategy(value: String): this.type = set(coldStartStrategy, value)
 
+  /**
+   * Set block size for stacking input data in matrices.
+   * Default is 4096.
+   *
+   * @group expertSetParam
+   */
+  @Since("3.0.0")
+  def setBlockSize(value: Int): this.type = set(blockSize, value)
+
   /**
    * Sets both numUserBlocks and numItemBlocks to the specific value.
    *
@@ -671,7 +704,7 @@ class ALS(@Since("1.4.0") override val uid: String) extends Estimator[ALSModel]
     instr.logDataset(dataset)
     instr.logParams(this, rank, numUserBlocks, numItemBlocks, implicitPrefs, alpha, userCol,
       itemCol, ratingCol, predictionCol, maxIter, regParam, nonnegative, checkpointInterval,
-      seed, intermediateStorageLevel, finalStorageLevel)
+      seed, intermediateStorageLevel, finalStorageLevel, blockSize)
 
     val (userFactors, itemFactors) = ALS.train(ratings, rank = $(rank),
       numUserBlocks = $(numUserBlocks), numItemBlocks = $(numItemBlocks),
@@ -682,7 +715,8 @@ class ALS(@Since("1.4.0") override val uid: String) extends Estimator[ALSModel]
       checkpointInterval = $(checkpointInterval), seed = $(seed))
     val userDF = userFactors.toDF("id", "features")
     val itemDF = itemFactors.toDF("id", "features")
-    val model = new ALSModel(uid, $(rank), userDF, itemDF).setParent(this)
+    val model = new ALSModel(uid, $(rank), userDF, itemDF).setBlockSize($(blockSize))
+      .setParent(this)
     copyValues(model)
   }
 
@@ -794,7 +828,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
      * Given a triangular matrix in the order of fillXtX above, compute the full symmetric square
      * matrix that it represents, storing it into destMatrix.
      */
-    private def fillAtA(triAtA: Array[Double], lambda: Double) {
+    private def fillAtA(triAtA: Array[Double], lambda: Double): Unit = {
       var i = 0
       var pos = 0
       var a = 0.0
@@ -1034,13 +1068,13 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
       .persist(finalRDDStorageLevel)
     if (finalRDDStorageLevel != StorageLevel.NONE) {
       userIdAndFactors.count()
-      itemIdAndFactors.count()
-      itemFactors.unpersist()
       userInBlocks.unpersist()
       userOutBlocks.unpersist()
-      itemInBlocks.unpersist()
       itemOutBlocks.unpersist()
       blockRatings.unpersist()
+      itemIdAndFactors.count()
+      itemFactors.unpersist()
+      itemInBlocks.unpersist()
     }
     (userIdAndFactors, itemIdAndFactors)
   }
@@ -1666,6 +1700,13 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
         }
     }
     val merged = srcOut.groupByKey(new ALSPartitioner(dstInBlocks.partitions.length))
+
+    // SPARK-28927: Nondeterministic RDDs causes inconsistent in/out blocks in case of rerun.
+    // It can cause runtime error when matching in/out user/item blocks.
+    val isBlockRDDNondeterministic =
+      dstInBlocks.outputDeterministicLevel == DeterministicLevel.INDETERMINATE ||
+        srcOutBlocks.outputDeterministicLevel == DeterministicLevel.INDETERMINATE
+
     dstInBlocks.join(merged).mapValues {
       case (InBlock(dstIds, srcPtrs, srcEncodedIndices, ratings), srcFactors) =>
         val sortedSrcFactors = new Array[FactorBlock](numSrcBlocks)
@@ -1686,7 +1727,19 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
             val encoded = srcEncodedIndices(i)
             val blockId = srcEncoder.blockId(encoded)
             val localIndex = srcEncoder.localIndex(encoded)
-            val srcFactor = sortedSrcFactors(blockId)(localIndex)
+            var srcFactor: Array[Float] = null
+            try {
+              srcFactor = sortedSrcFactors(blockId)(localIndex)
+            } catch {
+              case a: ArrayIndexOutOfBoundsException if isBlockRDDNondeterministic =>
+                val errMsg = "A failure detected when matching In/Out blocks of users/items. " +
+                  "Because at least one In/Out block RDD is found to be nondeterministic now, " +
+                  "the issue is probably caused by nondeterministic input data. You can try to " +
+                  "checkpoint training data to make it deterministic. If you do `repartition` + " +
+                  "`sample` or `randomSplit`, you can also try to sort it before `sample` or " +
+                  "`randomSplit` to make it deterministic."
+                throw new SparkException(errMsg, a)
+            }
             val rating = ratings(i)
             if (implicitPrefs) {
               // Extension to the original paper to handle rating < 0. confidence is a function
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index cc1d18d3836c9..8c95d25f3b5b8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -27,14 +27,13 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.stat._
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
-import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
@@ -45,9 +44,8 @@ import org.apache.spark.storage.StorageLevel
 /**
  * Params for accelerated failure time (AFT) regression.
  */
-private[regression] trait AFTSurvivalRegressionParams extends Params
-  with HasFeaturesCol with HasLabelCol with HasPredictionCol with HasMaxIter
-  with HasTol with HasFitIntercept with HasAggregationDepth with Logging {
+private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
+  with HasMaxIter with HasTol with HasFitIntercept with HasAggregationDepth with Logging {
 
   /**
    * Param for censor column name.
@@ -127,28 +125,16 @@ private[regression] trait AFTSurvivalRegressionParams extends Params
  */
 @Since("1.6.0")
 class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: String)
-  extends Estimator[AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams
-  with DefaultParamsWritable with Logging {
+  extends Regressor[Vector, AFTSurvivalRegression, AFTSurvivalRegressionModel]
+  with AFTSurvivalRegressionParams with DefaultParamsWritable with Logging {
 
   @Since("1.6.0")
   def this() = this(Identifiable.randomUID("aftSurvReg"))
 
-  /** @group setParam */
-  @Since("1.6.0")
-  def setFeaturesCol(value: String): this.type = set(featuresCol, value)
-
-  /** @group setParam */
-  @Since("1.6.0")
-  def setLabelCol(value: String): this.type = set(labelCol, value)
-
   /** @group setParam */
   @Since("1.6.0")
   def setCensorCol(value: String): this.type = set(censorCol, value)
 
-  /** @group setParam */
-  @Since("1.6.0")
-  def setPredictionCol(value: String): this.type = set(predictionCol, value)
-
   /** @group setParam */
   @Since("1.6.0")
   def setQuantileProbabilities(value: Array[Double]): this.type = set(quantileProbabilities, value)
@@ -208,24 +194,20 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
       }
   }
 
-  @Since("2.0.0")
-  override def fit(dataset: Dataset[_]): AFTSurvivalRegressionModel = instrumented { instr =>
-    transformSchema(dataset.schema, logging = true)
+  @Since("3.0.0")
+  override def train(dataset: Dataset[_]): AFTSurvivalRegressionModel = instrumented { instr =>
     val instances = extractAFTPoints(dataset)
     val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
 
-    val featuresSummarizer = {
-      val seqOp = (c: MultivariateOnlineSummarizer, v: AFTPoint) => c.add(v.features)
-      val combOp = (c1: MultivariateOnlineSummarizer, c2: MultivariateOnlineSummarizer) => {
-        c1.merge(c2)
-      }
-      instances.treeAggregate(
-        new MultivariateOnlineSummarizer
-      )(seqOp, combOp, $(aggregationDepth))
-    }
+    val featuresSummarizer = instances.treeAggregate(
+      Summarizer.createSummarizerBuffer("mean", "std", "count"))(
+      seqOp = (c: SummarizerBuffer, v: AFTPoint) => c.add(v.features),
+      combOp = (c1: SummarizerBuffer, c2: SummarizerBuffer) => c1.merge(c2),
+      depth = $(aggregationDepth)
+    )
 
-    val featuresStd = featuresSummarizer.variance.toArray.map(math.sqrt)
+    val featuresStd = featuresSummarizer.std.toArray
     val numFeatures = featuresStd.size
 
     instr.logPipelineStage(this)
@@ -285,7 +267,7 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
     val coefficients = Vectors.dense(rawCoefficients)
     val intercept = parameters(1)
     val scale = math.exp(parameters(0))
-    copyValues(new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale).setParent(this))
+    new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale)
   }
 
   @Since("1.6.0")
@@ -313,15 +295,11 @@ class AFTSurvivalRegressionModel private[ml] (
     @Since("2.0.0") val coefficients: Vector,
     @Since("1.6.0") val intercept: Double,
     @Since("1.6.0") val scale: Double)
-  extends Model[AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams with MLWritable {
-
-  /** @group setParam */
-  @Since("1.6.0")
-  def setFeaturesCol(value: String): this.type = set(featuresCol, value)
+  extends RegressionModel[Vector, AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams
+  with MLWritable {
 
-  /** @group setParam */
-  @Since("1.6.0")
-  def setPredictionCol(value: String): this.type = set(predictionCol, value)
+  @Since("3.0.0")
+  override def numFeatures: Int = coefficients.size
 
   /** @group setParam */
   @Since("1.6.0")
@@ -350,7 +328,7 @@ class AFTSurvivalRegressionModel private[ml] (
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     var predictionColNames = Seq.empty[String]
     var predictionColumns = Seq.empty[Column]
@@ -359,12 +337,14 @@ class AFTSurvivalRegressionModel private[ml] (
       val predictUDF = udf { features: Vector => predict(features) }
       predictionColNames :+= $(predictionCol)
       predictionColumns :+= predictUDF(col($(featuresCol)))
+        .as($(predictionCol), outputSchema($(predictionCol)).metadata)
     }
 
     if (hasQuantilesCol) {
       val predictQuantilesUDF = udf { features: Vector => predictQuantiles(features)}
       predictionColNames :+= $(quantilesCol)
       predictionColumns :+= predictQuantilesUDF(col($(featuresCol)))
+        .as($(quantilesCol), outputSchema($(quantilesCol)).metadata)
     }
 
     if (predictionColNames.nonEmpty) {
@@ -378,7 +358,15 @@ class AFTSurvivalRegressionModel private[ml] (
 
   @Since("1.6.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema, fitting = false)
+    var outputSchema = validateAndTransformSchema(schema, fitting = false)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumeric(outputSchema, $(predictionCol))
+    }
+    if (isDefined(quantilesCol) && $(quantilesCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
+        $(quantilesCol), $(quantileProbabilities).length)
+    }
+    outputSchema
   }
 
   @Since("1.6.0")
@@ -390,6 +378,11 @@ class AFTSurvivalRegressionModel private[ml] (
   @Since("1.6.0")
   override def write: MLWriter =
     new AFTSurvivalRegressionModel.AFTSurvivalRegressionModelWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"AFTSurvivalRegressionModel: uid=$uid, numFeatures=$numFeatures"
+  }
 }
 
 @Since("1.6.0")
@@ -572,8 +565,8 @@ private class AFTAggregator(
 
     val margin = {
       var sum = 0.0
-      xi.foreachActive { (index, value) =>
-        if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+      xi.foreachNonZero { (index, value) =>
+        if (localFeaturesStd(index) != 0.0) {
           sum += coefficients(index) * (value / localFeaturesStd(index))
         }
       }
@@ -587,8 +580,8 @@ private class AFTAggregator(
 
     gradientSumArray(0) += delta + multiplier * sigma * epsilon
     gradientSumArray(1) += { if (fitIntercept) multiplier else 0.0 }
-    xi.foreachActive { (index, value) =>
-      if (localFeaturesStd(index) != 0.0 && value != 0.0) {
+    xi.foreachNonZero { (index, value) =>
+      if (localFeaturesStd(index) != 0.0) {
         gradientSumArray(index + 2) += multiplier * (value / localFeaturesStd(index))
       }
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 106be1b78af47..3cb69e7fe8212 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -22,8 +22,6 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.{PredictionModel, Predictor}
-import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -33,11 +31,9 @@ import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
+import org.apache.spark.sql.{Column, DataFrame, Dataset}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.DoubleType
-
+import org.apache.spark.sql.types.StructType
 
 /**
  * <a href="http://en.wikipedia.org/wiki/Decision_tree_learning">Decision tree</a>
@@ -46,7 +42,7 @@ import org.apache.spark.sql.types.DoubleType
  */
 @Since("1.4.0")
 class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
-  extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
+  extends Regressor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel]
   with DecisionTreeRegressorParams with DefaultParamsWritable {
 
   @Since("1.4.0")
@@ -118,13 +114,9 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
       dataset: Dataset[_]): DecisionTreeRegressionModel = instrumented { instr =>
     val categoricalFeatures: Map[Int, Int] =
       MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances =
-      dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          Instance(label, weight, features)
-      }
+    val instances = extractInstances(dataset)
     val strategy = getOldStrategy(categoricalFeatures)
+    require(!strategy.bootstrap, "DecisionTreeRegressor does not need bootstrap sampling")
 
     instr.logPipelineStage(this)
     instr.logDataset(instances)
@@ -136,22 +128,6 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
     trees.head.asInstanceOf[DecisionTreeRegressionModel]
   }
 
-  /** (private[ml]) Train a decision tree on an RDD */
-  private[ml] def train(
-      data: RDD[LabeledPoint],
-      oldStrategy: OldStrategy,
-      featureSubsetStrategy: String): DecisionTreeRegressionModel = instrumented { instr =>
-    instr.logPipelineStage(this)
-    instr.logDataset(data)
-    instr.logParams(this, params: _*)
-
-    val instances = data.map(_.toInstance)
-    val trees = RandomForest.run(instances, oldStrategy, numTrees = 1,
-      featureSubsetStrategy, seed = $(seed), instr = Some(instr), parentUID = Some(uid))
-
-    trees.head.asInstanceOf[DecisionTreeRegressionModel]
-  }
-
   /** (private[ml]) Create a Strategy instance to use with the old API. */
   private[ml] def getOldStrategy(categoricalFeatures: Map[Int, Int]): OldStrategy = {
     super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity,
@@ -183,7 +159,7 @@ class DecisionTreeRegressionModel private[ml] (
     override val uid: String,
     override val rootNode: Node,
     override val numFeatures: Int)
-  extends PredictionModel[Vector, DecisionTreeRegressionModel]
+  extends RegressionModel[Vector, DecisionTreeRegressionModel]
   with DecisionTreeModel with DecisionTreeRegressorParams with MLWritable with Serializable {
 
   /** @group setParam */
@@ -209,9 +185,21 @@ class DecisionTreeRegressionModel private[ml] (
     rootNode.predictImpl(features).impurityStats.calculate()
   }
 
+  @Since("1.4.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if (isDefined(varianceCol) && $(varianceCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumeric(outputSchema, $(varianceCol))
+    }
+    if ($(leafCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateField(outputSchema, getLeafField($(leafCol)))
+    }
+    outputSchema
+  }
+
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     var predictionColNames = Seq.empty[String]
     var predictionColumns = Seq.empty[Column]
@@ -220,18 +208,21 @@ class DecisionTreeRegressionModel private[ml] (
       val predictUDF = udf { features: Vector => predict(features) }
       predictionColNames :+= $(predictionCol)
       predictionColumns :+= predictUDF(col($(featuresCol)))
+        .as($(predictionCol), outputSchema($(predictionCol)).metadata)
     }
 
     if (isDefined(varianceCol) && $(varianceCol).nonEmpty) {
       val predictVarianceUDF = udf { features: Vector => predictVariance(features) }
       predictionColNames :+= $(varianceCol)
       predictionColumns :+= predictVarianceUDF(col($(featuresCol)))
+        .as($(varianceCol), outputSchema($(varianceCol)).metadata)
     }
 
     if ($(leafCol).nonEmpty) {
       val leafUDF = udf { features: Vector => predictLeaf(features) }
       predictionColNames :+= $(leafCol)
       predictionColumns :+= leafUDF(col($(featuresCol)))
+        .as($(leafCol), outputSchema($(leafCol)).metadata)
     }
 
     if (predictionColNames.nonEmpty) {
@@ -250,7 +241,8 @@ class DecisionTreeRegressionModel private[ml] (
 
   @Since("1.4.0")
   override def toString: String = {
-    s"DecisionTreeRegressionModel (uid=$uid) of depth $depth with $numNodes nodes"
+    s"DecisionTreeRegressionModel: uid=$uid, depth=$depth, numNodes=$numNodes, " +
+      s"numFeatures=$numFeatures"
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
new file mode 100644
index 0000000000000..f7810eb17cf59
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
@@ -0,0 +1,813 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.regression
+
+import scala.util.Random
+
+import breeze.linalg.{axpy => brzAxpy, norm => brzNorm, Vector => BV}
+import breeze.numerics.{sqrt => brzSqrt}
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.annotation.Since
+import org.apache.spark.internal.Logging
+import org.apache.spark.ml.PredictorParams
+import org.apache.spark.ml.linalg._
+import org.apache.spark.ml.linalg.BLAS._
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.regression.FactorizationMachines._
+import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.Instrumentation.instrumented
+import org.apache.spark.mllib.{linalg => OldLinalg}
+import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
+import org.apache.spark.mllib.linalg.VectorImplicits._
+import org.apache.spark.mllib.optimization.{Gradient, GradientDescent, SquaredL2Updater, Updater}
+import org.apache.spark.mllib.util.MLUtils
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.functions.col
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * Params for Factorization Machines
+ */
+private[ml] trait FactorizationMachinesParams extends PredictorParams
+  with HasMaxIter with HasStepSize with HasTol with HasSolver with HasSeed
+  with HasFitIntercept with HasRegParam {
+
+  /**
+   * Param for dimensionality of the factors (&gt;= 0)
+   * @group param
+   */
+  @Since("3.0.0")
+  final val factorSize: IntParam = new IntParam(this, "factorSize",
+    "Dimensionality of the factor vectors, " +
+      "which are used to get pairwise interactions between variables",
+    ParamValidators.gt(0))
+
+  /** @group getParam */
+  @Since("3.0.0")
+  final def getFactorSize: Int = $(factorSize)
+
+  /**
+   * Param for whether to fit linear term (aka 1-way term)
+   * @group param
+   */
+  @Since("3.0.0")
+  final val fitLinear: BooleanParam = new BooleanParam(this, "fitLinear",
+    "whether to fit linear term (aka 1-way term)")
+
+  /** @group getParam */
+  @Since("3.0.0")
+  final def getFitLinear: Boolean = $(fitLinear)
+
+  /**
+   * Param for mini-batch fraction, must be in range (0, 1]
+   * @group param
+   */
+  @Since("3.0.0")
+  final val miniBatchFraction: DoubleParam = new DoubleParam(this, "miniBatchFraction",
+    "fraction of the input data set that should be used for one iteration of gradient descent",
+    ParamValidators.inRange(0, 1, false, true))
+
+  /** @group getParam */
+  @Since("3.0.0")
+  final def getMiniBatchFraction: Double = $(miniBatchFraction)
+
+  /**
+   * Param for standard deviation of initial coefficients
+   * @group param
+   */
+  @Since("3.0.0")
+  final val initStd: DoubleParam = new DoubleParam(this, "initStd",
+    "standard deviation of initial coefficients", ParamValidators.gt(0))
+
+  /** @group getParam */
+  @Since("3.0.0")
+  final def getInitStd: Double = $(initStd)
+
+  /**
+   * The solver algorithm for optimization.
+   * Supported options: "gd", "adamW".
+   * Default: "adamW"
+   *
+   * @group param
+   */
+  @Since("3.0.0")
+  final override val solver: Param[String] = new Param[String](this, "solver",
+    "The solver algorithm for optimization. Supported options: " +
+      s"${supportedSolvers.mkString(", ")}. (Default adamW)",
+    ParamValidators.inArray[String](supportedSolvers))
+}
+
+private[ml] trait FactorizationMachines extends FactorizationMachinesParams {
+
+  private[ml] def initCoefficients(numFeatures: Int): OldVector = {
+    val rnd = new Random($(seed))
+    val initialCoefficients =
+      OldVectors.dense(
+        Array.fill($(factorSize) * numFeatures)(rnd.nextGaussian() * $(initStd)) ++
+        (if ($(fitLinear)) new Array[Double](numFeatures) else Array.emptyDoubleArray) ++
+        (if ($(fitIntercept)) new Array[Double](1) else Array.emptyDoubleArray))
+    initialCoefficients
+  }
+
+  private[ml] def trainImpl(
+      data: RDD[(Double, OldVector)],
+      numFeatures: Int,
+      loss: String
+    ): Vector = {
+
+    // initialize coefficients
+    val initialCoefficients = initCoefficients(numFeatures)
+    val coefficientsSize = initialCoefficients.size
+
+    // optimize coefficients with gradient descent
+    val gradient = parseLoss(loss, $(factorSize), $(fitIntercept), $(fitLinear), numFeatures)
+
+    val updater = parseSolver($(solver), coefficientsSize)
+
+    val optimizer = new GradientDescent(gradient, updater)
+      .setStepSize($(stepSize))
+      .setNumIterations($(maxIter))
+      .setRegParam($(regParam))
+      .setMiniBatchFraction($(miniBatchFraction))
+      .setConvergenceTol($(tol))
+    val coefficients = optimizer.optimize(data, initialCoefficients)
+    coefficients.asML
+  }
+}
+
+private[ml] object FactorizationMachines {
+
+  /** String name for "gd". */
+  val GD = "gd"
+
+  /** String name for "adamW". */
+  val AdamW = "adamW"
+
+  /** Set of solvers that FactorizationMachines supports. */
+  val supportedSolvers = Array(GD, AdamW)
+
+  /** String name for "logisticLoss". */
+  val LogisticLoss = "logisticLoss"
+
+  /** String name for "squaredError". */
+  val SquaredError = "squaredError"
+
+  /** Set of loss function names that FactorizationMachines supports. */
+  val supportedRegressorLosses = Array(SquaredError)
+  val supportedClassifierLosses = Array(LogisticLoss)
+  val supportedLosses = supportedRegressorLosses ++ supportedClassifierLosses
+
+  def parseSolver(solver: String, coefficientsSize: Int): Updater = {
+    solver match {
+      case GD => new SquaredL2Updater()
+      case AdamW => new AdamWUpdater(coefficientsSize)
+    }
+  }
+
+  def parseLoss(
+      lossFunc: String,
+      factorSize: Int,
+      fitIntercept: Boolean,
+      fitLinear: Boolean,
+      numFeatures: Int
+    ): BaseFactorizationMachinesGradient = {
+
+    lossFunc match {
+      case LogisticLoss =>
+        new LogisticFactorizationMachinesGradient(factorSize, fitIntercept, fitLinear, numFeatures)
+      case SquaredError =>
+        new MSEFactorizationMachinesGradient(factorSize, fitIntercept, fitLinear, numFeatures)
+      case _ => throw new IllegalArgumentException(s"loss function type $lossFunc is invalidation")
+    }
+  }
+
+  def splitCoefficients(
+      coefficients: Vector,
+      numFeatures: Int,
+      factorSize: Int,
+      fitIntercept: Boolean,
+      fitLinear: Boolean
+    ): (Double, Vector, Matrix) = {
+
+    val coefficientsSize = numFeatures * factorSize +
+      (if (fitLinear) numFeatures else 0) + (if (fitIntercept) 1 else 0)
+    require(coefficientsSize == coefficients.size,
+      s"coefficients.size did not match the excepted size ${coefficientsSize}")
+
+    val intercept = if (fitIntercept) coefficients(coefficients.size - 1) else 0.0
+    val linear: Vector = if (fitLinear) {
+      new DenseVector(coefficients.toArray.slice(
+        numFeatures * factorSize, numFeatures * factorSize + numFeatures))
+    } else {
+      Vectors.sparse(numFeatures, Seq.empty)
+    }
+    val factors = new DenseMatrix(numFeatures, factorSize,
+      coefficients.toArray.slice(0, numFeatures * factorSize), true)
+    (intercept, linear, factors)
+  }
+
+  def combineCoefficients(
+      intercept: Double,
+      linear: Vector,
+      factors: Matrix,
+      fitIntercept: Boolean,
+      fitLinear: Boolean
+    ): Vector = {
+
+    val coefficients = factors.toDense.values ++
+      (if (fitLinear) linear.toArray else Array.emptyDoubleArray) ++
+      (if (fitIntercept) Array(intercept) else Array.emptyDoubleArray)
+    new DenseVector(coefficients)
+  }
+
+  def getRawPrediction(
+      features: Vector,
+      intercept: Double,
+      linear: Vector,
+      factors: Matrix
+    ): Double = {
+    var rawPrediction = intercept + features.dot(linear)
+    (0 until factors.numCols).foreach { f =>
+      var sumSquare = 0.0
+      var sum = 0.0
+      features.foreachNonZero { case (index, value) =>
+        val vx = factors(index, f) * value
+        sumSquare += vx * vx
+        sum += vx
+      }
+      rawPrediction += 0.5 * (sum * sum - sumSquare)
+    }
+
+    rawPrediction
+  }
+}
+
+/**
+ * Params for FMRegressor
+ */
+private[regression] trait FMRegressorParams extends FactorizationMachinesParams {
+}
+
+/**
+ * Factorization Machines learning algorithm for regression.
+ * It supports normal gradient descent and AdamW solver.
+ *
+ * The implementation is based upon:
+ * <a href="https://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf">
+ * S. Rendle. "Factorization machines" 2010</a>.
+ *
+ * FM is able to estimate interactions even in problems with huge sparsity
+ * (like advertising and recommendation system).
+ * FM formula is:
+ * {{{
+ *   y = w_0 + \sum\limits^n_{i-1} w_i x_i +
+ *     \sum\limits^n_{i=1} \sum\limits^n_{j=i+1} \langle v_i, v_j \rangle x_i x_j
+ * }}}
+ * First two terms denote global bias and linear term (as same as linear regression),
+ * and last term denotes pairwise interactions term. {{{v_i}}} describes the i-th variable
+ * with k factors.
+ *
+ * FM regression model uses MSE loss which can be solved by gradient descent method, and
+ * regularization terms like L2 are usually added to the loss function to prevent overfitting.
+ */
+@Since("3.0.0")
+class FMRegressor @Since("3.0.0") (
+    @Since("3.0.0") override val uid: String)
+  extends Regressor[Vector, FMRegressor, FMRegressionModel]
+  with FactorizationMachines with FMRegressorParams with DefaultParamsWritable with Logging {
+
+  @Since("3.0.0")
+  def this() = this(Identifiable.randomUID("fmr"))
+
+  /**
+   * Set the dimensionality of the factors.
+   * Default is 8.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setFactorSize(value: Int): this.type = set(factorSize, value)
+  setDefault(factorSize -> 8)
+
+  /**
+   * Set whether to fit intercept term.
+   * Default is true.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value)
+  setDefault(fitIntercept -> true)
+
+  /**
+   * Set whether to fit linear term.
+   * Default is true.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setFitLinear(value: Boolean): this.type = set(fitLinear, value)
+  setDefault(fitLinear -> true)
+
+  /**
+   * Set the L2 regularization parameter.
+   * Default is 0.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setRegParam(value: Double): this.type = set(regParam, value)
+  setDefault(regParam -> 0.0)
+
+  /**
+   * Set the mini-batch fraction parameter.
+   * Default is 1.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setMiniBatchFraction(value: Double): this.type = set(miniBatchFraction, value)
+  setDefault(miniBatchFraction -> 1.0)
+
+  /**
+   * Set the standard deviation of initial coefficients.
+   * Default is 0.01.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setInitStd(value: Double): this.type = set(initStd, value)
+  setDefault(initStd -> 0.01)
+
+  /**
+   * Set the maximum number of iterations.
+   * Default is 100.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setMaxIter(value: Int): this.type = set(maxIter, value)
+  setDefault(maxIter -> 100)
+
+  /**
+   * Set the initial step size for the first step (like learning rate).
+   * Default is 1.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setStepSize(value: Double): this.type = set(stepSize, value)
+  setDefault(stepSize -> 1.0)
+
+  /**
+   * Set the convergence tolerance of iterations.
+   * Default is 1E-6.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setTol(value: Double): this.type = set(tol, value)
+  setDefault(tol -> 1E-6)
+
+  /**
+   * Set the solver algorithm used for optimization.
+   * Supported options: "gd", "adamW".
+   * Default: "adamW"
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setSolver(value: String): this.type = set(solver, value)
+  setDefault(solver -> AdamW)
+
+  /**
+   * Set the random seed for weight initialization.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setSeed(value: Long): this.type = set(seed, value)
+
+  override protected[spark] def train(
+      dataset: Dataset[_]
+    ): FMRegressionModel = instrumented { instr =>
+
+    instr.logPipelineStage(this)
+    instr.logDataset(dataset)
+    instr.logParams(this, factorSize, fitIntercept, fitLinear, regParam,
+      miniBatchFraction, initStd, maxIter, stepSize, tol, solver)
+
+    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    instr.logNumFeatures(numFeatures)
+
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
+    val labeledPoint = extractLabeledPoints(dataset)
+    val data: RDD[(Double, OldVector)] = labeledPoint.map(x => (x.label, x.features))
+
+    if (handlePersistence) data.persist(StorageLevel.MEMORY_AND_DISK)
+
+    val coefficients = trainImpl(data, numFeatures, SquaredError)
+
+    val (intercept, linear, factors) = splitCoefficients(
+      coefficients, numFeatures, $(factorSize), $(fitIntercept), $(fitLinear))
+
+    if (handlePersistence) data.unpersist()
+
+    copyValues(new FMRegressionModel(uid, intercept, linear, factors))
+  }
+
+  @Since("3.0.0")
+  override def copy(extra: ParamMap): FMRegressor = defaultCopy(extra)
+}
+
+@Since("3.0.0")
+object FMRegressor extends DefaultParamsReadable[FMRegressor] {
+
+  @Since("3.0.0")
+  override def load(path: String): FMRegressor = super.load(path)
+}
+
+/**
+ * Model produced by [[FMRegressor]].
+ */
+@Since("3.0.0")
+class FMRegressionModel private[regression] (
+    @Since("3.0.0") override val uid: String,
+    @Since("3.0.0") val intercept: Double,
+    @Since("3.0.0") val linear: Vector,
+    @Since("3.0.0") val factors: Matrix)
+  extends RegressionModel[Vector, FMRegressionModel]
+  with FMRegressorParams with MLWritable {
+
+  @Since("3.0.0")
+  override val numFeatures: Int = linear.size
+
+  @Since("3.0.0")
+  override def predict(features: Vector): Double = {
+    getRawPrediction(features, intercept, linear, factors)
+  }
+
+  @Since("3.0.0")
+  override def copy(extra: ParamMap): FMRegressionModel = {
+    copyValues(new FMRegressionModel(uid, intercept, linear, factors), extra)
+  }
+
+  @Since("3.0.0")
+  override def write: MLWriter =
+    new FMRegressionModel.FMRegressionModelWriter(this)
+
+  override def toString: String = {
+    s"FMRegressionModel: " +
+      s"uid=${super.toString}, numFeatures=$numFeatures, " +
+      s"factorSize=${$(factorSize)}, fitLinear=${$(fitLinear)}, fitIntercept=${$(fitIntercept)}"
+  }
+}
+
+@Since("3.0.0")
+object FMRegressionModel extends MLReadable[FMRegressionModel] {
+
+  @Since("3.0.0")
+  override def read: MLReader[FMRegressionModel] = new FMRegressionModelReader
+
+  @Since("3.0.0")
+  override def load(path: String): FMRegressionModel = super.load(path)
+
+  /** [[MLWriter]] instance for [[FMRegressionModel]] */
+  private[FMRegressionModel] class FMRegressionModelWriter(
+      instance: FMRegressionModel) extends MLWriter with Logging {
+
+    private case class Data(
+        intercept: Double,
+        linear: Vector,
+        factors: Matrix)
+
+    override protected def saveImpl(path: String): Unit = {
+      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      val data = Data(instance.intercept, instance.linear, instance.factors)
+      val dataPath = new Path(path, "data").toString
+      sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath)
+    }
+  }
+
+  private class FMRegressionModelReader extends MLReader[FMRegressionModel] {
+
+    private val className = classOf[FMRegressionModel].getName
+
+    override def load(path: String): FMRegressionModel = {
+      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val dataPath = new Path(path, "data").toString
+      val data = sparkSession.read.format("parquet").load(dataPath)
+
+      val Row(intercept: Double, linear: Vector, factors: Matrix) = data
+        .select("intercept", "linear", "factors").head()
+      val model = new FMRegressionModel(metadata.uid, intercept, linear, factors)
+      metadata.getAndSetParams(model)
+      model
+    }
+  }
+}
+
+/**
+ * Factorization Machines base gradient class
+ * Implementing the raw FM formula, include raw prediction and raw gradient,
+ * then inherit the base class to implement special gradient class(like logloss, mse).
+ *
+ * Factorization Machines raw formula:
+ * {{{
+ *   y_{fm} = w_0 + \sum\limits^n_{i-1} w_i x_i +
+ *     \sum\limits^n_{i=1} \sum\limits^n_{j=i+1} \langle v_i, v_j \rangle x_i x_j
+ * }}}
+ * the pairwise interactions (2-way term) can be reformulated:
+ * {{{
+ *   \sum\limits^n_{i=1} \sum\limits^n_{j=i+1} \langle v_i, v_j \rangle x_i x_j
+ *   = \frac{1}{2}\sum\limits^k_{f=1}
+ *   \left(\left( \sum\limits^n_{i=1}v_{i,f}x_i \right)^2 -
+ *     \sum\limits^n_{i=1}v_{i,f}^2x_i^2 \right)
+ * }}}
+ * and the gradients are:
+ * {{{
+ *   \frac{\partial}{\partial\theta}y_{fm} = \left\{
+ *   \begin{align}
+ *   &1, & if\ \theta\ is\ w_0 \\
+ *   &x_i, & if\ \theta\ is\ w_i \\
+ *   &x_i{\sum}^n_{j=1}v_{j,f}x_j - v_{i,f}x_i^2, & if\ \theta\ is\ v_{i,j} \\
+ *   \end{align}
+ *   \right.
+ * }}}
+ *
+ * Factorization Machines formula with prediction task:
+ * {{{
+ *   \hat{y} = p\left( y_{fm} \right)
+ * }}}
+ * p is the prediction function, for binary classification task is sigmoid.
+ * The loss funcation gradient formula:
+ * {{{
+ *   \frac{\partial}{\partial\theta} l\left( \hat{y},y \right) =
+ *   \frac{\partial}{\partial\theta} l\left( p\left( y_{fm} \right),y \right) =
+ *   \frac{\partial l}{\partial \hat{y}} \cdot
+ *   \frac{\partial \hat{y}}{\partial y_{fm}} \cdot
+ *   \frac{\partial y_{fm}}{\partial\theta}
+ * }}}
+ * Last term is same for all task, so be implemented in base gradient class.
+ * last term named rawGradient in following code, and first two term named multiplier.
+ */
+private[ml] abstract class BaseFactorizationMachinesGradient(
+    factorSize: Int,
+    fitIntercept: Boolean,
+    fitLinear: Boolean,
+    numFeatures: Int) extends Gradient {
+
+  override def compute(
+      data: OldVector,
+      label: Double,
+      weights: OldVector,
+      cumGradient: OldVector): Double = {
+    val (rawPrediction, sumVX) = getRawPrediction(data, weights)
+    val rawGradient = getRawGradient(data, weights, sumVX)
+    val multiplier = getMultiplier(rawPrediction, label)
+    axpy(multiplier, rawGradient, cumGradient)
+    val loss = getLoss(rawPrediction, label)
+    loss
+  }
+
+  def getPrediction(rawPrediction: Double): Double
+
+  protected def getMultiplier(rawPrediction: Double, label: Double): Double
+
+  protected def getLoss(rawPrediction: Double, label: Double): Double
+
+  def getRawPrediction(data: OldVector, weights: OldVector): (Double, Array[Double]) = {
+    val sumVX = new Array[Double](factorSize)
+    var rawPrediction = 0.0
+    val vWeightsSize = numFeatures * factorSize
+
+    if (fitIntercept) rawPrediction += weights(weights.size - 1)
+    if (fitLinear) {
+      data.foreachNonZero { case (index, value) =>
+        rawPrediction += weights(vWeightsSize + index) * value
+      }
+    }
+    (0 until factorSize).foreach { f =>
+      var sumSquare = 0.0
+      var sum = 0.0
+      data.foreachNonZero { case (index, value) =>
+        val vx = weights(index * factorSize + f) * value
+        sumSquare += vx * vx
+        sum += vx
+      }
+      sumVX(f) = sum
+      rawPrediction += 0.5 * (sum * sum - sumSquare)
+    }
+
+    (rawPrediction, sumVX)
+  }
+
+  private def getRawGradient(
+      data: OldVector,
+      weights: OldVector,
+      sumVX: Array[Double]
+    ): OldVector = {
+    data match {
+      // Usually Factorization Machines is used, there will be a lot of sparse features.
+      // So need to optimize the gradient descent of sparse vector.
+      case data: OldLinalg.SparseVector =>
+        val gardSize = data.indices.length * factorSize +
+          (if (fitLinear) data.indices.length else 0) +
+          (if (fitIntercept) 1 else 0)
+        val gradIndex = Array.ofDim[Int](gardSize)
+        val gradValue = Array.ofDim[Double](gardSize)
+        var gradI = 0
+        val vWeightsSize = numFeatures * factorSize
+
+        data.foreachNonZero { case (index, value) =>
+          (0 until factorSize).foreach { f =>
+            gradIndex(gradI) = index * factorSize + f
+            gradValue(gradI) = value * sumVX(f) - weights(index * factorSize + f) * value * value
+            gradI += 1
+          }
+        }
+        if (fitLinear) {
+          data.foreachNonZero { case (index, value) =>
+            gradIndex(gradI) = vWeightsSize + index
+            gradValue(gradI) = value
+            gradI += 1
+          }
+        }
+        if (fitIntercept) {
+          gradIndex(gradI) = weights.size - 1
+          gradValue(gradI) = 1.0
+        }
+
+        OldVectors.sparse(weights.size, gradIndex, gradValue)
+      case data: OldLinalg.DenseVector =>
+        val gradient = Array.ofDim[Double](weights.size)
+        val vWeightsSize = numFeatures * factorSize
+
+        if (fitIntercept) gradient(weights.size - 1) += 1.0
+        if (fitLinear) {
+          data.foreachNonZero { case (index, value) =>
+            gradient(vWeightsSize + index) += value
+          }
+        }
+        (0 until factorSize).foreach { f =>
+          data.foreachNonZero { case (index, value) =>
+            gradient(index * factorSize + f) +=
+              value * sumVX(f) - weights(index * factorSize + f) * value * value
+          }
+        }
+
+        OldVectors.dense(gradient)
+    }
+  }
+}
+
+/**
+ * FM with logistic loss
+ * prediction formula:
+ * {{{
+ *   \hat{y} = \sigmoid(y_{fm})
+ * }}}
+ * loss formula:
+ * {{{
+ *   - y * log(\hat{y}) - (1 - y) * log(1 - \hat{y})
+ * }}}
+ * multiplier formula:
+ * {{{
+ *   \frac{\partial l}{\partial \hat{y}} \cdot
+ *   \frac{\partial \hat{y}}{\partial y_{fm}} =
+ *   \hat{y} - y
+ * }}}
+ */
+private[ml] class LogisticFactorizationMachinesGradient(
+    factorSize: Int,
+    fitIntercept: Boolean,
+    fitLinear: Boolean,
+    numFeatures: Int)
+  extends BaseFactorizationMachinesGradient(
+    factorSize: Int,
+    fitIntercept: Boolean,
+    fitLinear: Boolean,
+    numFeatures: Int) with Logging {
+
+  override def getPrediction(rawPrediction: Double): Double = {
+    1.0 / (1.0 + math.exp(-rawPrediction))
+  }
+
+  override protected def getMultiplier(rawPrediction: Double, label: Double): Double = {
+    getPrediction(rawPrediction) - label
+  }
+
+  override protected def getLoss(rawPrediction: Double, label: Double): Double = {
+    if (label > 0) MLUtils.log1pExp(-rawPrediction)
+    else MLUtils.log1pExp(rawPrediction)
+  }
+}
+
+/**
+ * FM with mse
+ * prediction formula:
+ * {{{
+ *   \hat{y} = y_{fm}
+ * }}}
+ * loss formula:
+ * {{{
+ *   (\hat{y} - y) ^ 2
+ * }}}
+ * multiplier formula:
+ * {{{
+ *   \frac{\partial l}{\partial \hat{y}} \cdot
+ *   \frac{\partial \hat{y}}{\partial y_{fm}} =
+ *   2 * (\hat{y} - y)
+ * }}}
+ */
+private[ml] class MSEFactorizationMachinesGradient(
+    factorSize: Int,
+    fitIntercept: Boolean,
+    fitLinear: Boolean,
+    numFeatures: Int)
+  extends BaseFactorizationMachinesGradient(
+    factorSize: Int,
+    fitIntercept: Boolean,
+    fitLinear: Boolean,
+    numFeatures: Int) with Logging {
+
+  override def getPrediction(rawPrediction: Double): Double = {
+    rawPrediction
+  }
+
+  override protected def getMultiplier(rawPrediction: Double, label: Double): Double = {
+    2 * (rawPrediction - label)
+  }
+
+  override protected def getLoss(rawPrediction: Double, label: Double): Double = {
+    (rawPrediction - label) * (rawPrediction - label)
+  }
+}
+
+/**
+ * AdamW optimizer.
+ *
+ * The implementation is based upon:
+ * <a href="https://arxiv.org/pdf/1711.05101.pdf">
+ * Loshchilov I, Hutter F. "DECOUPLED WEIGHT DECAY REGULARIZATION" 2019</a>.
+ *
+ * The main contribution of this paper is to improve regularization in Adam
+ * by decoupling the weight decay from the gradient-based update.
+ * This paper proposed a simple modification to recover the original formulation of
+ * weight decay regularization by decoupling the weight decay from the optimization steps
+ * taken w.r.t. the loss function.
+ */
+private[ml] class AdamWUpdater(weightSize: Int) extends Updater with Logging {
+  val beta1: Double = 0.9
+  val beta2: Double = 0.999
+  val epsilon: Double = 1e-8
+
+  val m: BV[Double] = BV.zeros[Double](weightSize).toDenseVector
+  val v: BV[Double] = BV.zeros[Double](weightSize).toDenseVector
+  var beta1T: Double = 1.0
+  var beta2T: Double = 1.0
+
+  override def compute(
+    weightsOld: OldVector,
+    gradient: OldVector,
+    stepSize: Double,
+    iter: Int,
+    regParam: Double
+  ): (OldVector, Double) = {
+    val w: BV[Double] = weightsOld.asBreeze.toDenseVector
+    val lr = stepSize // learning rate
+    if (stepSize > 0) {
+      val g: BV[Double] = gradient.asBreeze.toDenseVector
+      m *= beta1
+      brzAxpy(1 - beta1, g, m)
+      v *= beta2
+      brzAxpy(1 - beta2, g * g, v)
+      beta1T *= beta1
+      beta2T *= beta2
+      val mHat = m / (1 - beta1T)
+      val vHat = v / (1 - beta2T)
+      w -= lr * mHat / (brzSqrt(vHat) + epsilon) + regParam * w
+    }
+    val norm = brzNorm(w, 2.0)
+
+    (Vectors.fromBreeze(w), 0.5 * regParam * norm * norm)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 00c0bc9f5e282..78d5ddaa2758b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -23,8 +23,6 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.{PredictionModel, Predictor}
-import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -34,8 +32,9 @@ import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{GradientBoostedTreesModel => OldGBTModel}
-import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
+import org.apache.spark.sql.{Column, DataFrame, Dataset}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.StructType
 
 /**
  * <a href="http://en.wikipedia.org/wiki/Gradient_boosting">Gradient-Boosted Trees (GBTs)</a>
@@ -56,7 +55,7 @@ import org.apache.spark.sql.functions._
  */
 @Since("1.4.0")
 class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
-  extends Predictor[Vector, GBTRegressor, GBTRegressionModel]
+  extends Regressor[Vector, GBTRegressor, GBTRegressionModel]
   with GBTRegressorParams with DefaultParamsWritable with Logging {
 
   @Since("1.4.0")
@@ -78,6 +77,10 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   @Since("1.4.0")
   def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setMinWeightFractionPerNode(value: Double): this.type = set(minWeightFractionPerNode, value)
+
   /** @group setParam */
   @Since("1.4.0")
   def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
@@ -151,36 +154,43 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
     set(validationIndicatorCol, value)
   }
 
-  override protected def train(dataset: Dataset[_]): GBTRegressionModel = instrumented { instr =>
-    val categoricalFeatures: Map[Int, Int] =
-      MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
+  /**
+   * Sets the value of param [[weightCol]].
+   * If this is not set or empty, we treat all instance weights as 1.0.
+   * By default the weightCol is not set, so all instances have weight 1.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
 
+  override protected def train(dataset: Dataset[_]): GBTRegressionModel = instrumented { instr =>
     val withValidation = isDefined(validationIndicatorCol) && $(validationIndicatorCol).nonEmpty
 
     val (trainDataset, validationDataset) = if (withValidation) {
-      (
-        extractLabeledPoints(dataset.filter(not(col($(validationIndicatorCol))))),
-        extractLabeledPoints(dataset.filter(col($(validationIndicatorCol))))
-      )
+      (extractInstances(dataset.filter(not(col($(validationIndicatorCol))))),
+        extractInstances(dataset.filter(col($(validationIndicatorCol)))))
     } else {
-      (extractLabeledPoints(dataset), null)
+      (extractInstances(dataset), null)
     }
-    val boostingStrategy = super.getOldBoostingStrategy(categoricalFeatures, OldAlgo.Regression)
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
-    instr.logParams(this, labelCol, featuresCol, predictionCol, leafCol, impurity, lossType,
-      maxDepth, maxBins, maxIter, maxMemoryInMB, minInfoGain, minInstancesPerNode,
-      seed, stepSize, subsamplingRate, cacheNodeIds, checkpointInterval, featureSubsetStrategy,
-      validationIndicatorCol, validationTol)
+    instr.logParams(this, labelCol, featuresCol, predictionCol, leafCol, weightCol, impurity,
+      lossType, maxDepth, maxBins, maxIter, maxMemoryInMB, minInfoGain, minInstancesPerNode,
+      minWeightFractionPerNode, seed, stepSize, subsamplingRate, cacheNodeIds, checkpointInterval,
+      featureSubsetStrategy, validationIndicatorCol, validationTol)
 
+    val categoricalFeatures = MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
+    val boostingStrategy = super.getOldBoostingStrategy(categoricalFeatures, OldAlgo.Regression)
     val (baseLearners, learnerWeights) = if (withValidation) {
       GradientBoostedTrees.runWithValidation(trainDataset, validationDataset, boostingStrategy,
-        $(seed), $(featureSubsetStrategy))
+        $(seed), $(featureSubsetStrategy), Some(instr))
     } else {
       GradientBoostedTrees.run(trainDataset, boostingStrategy,
-        $(seed), $(featureSubsetStrategy))
+        $(seed), $(featureSubsetStrategy), Some(instr))
     }
+    baseLearners.foreach(copyValues(_))
 
     val numFeatures = baseLearners.head.numFeatures
     instr.logNumFeatures(numFeatures)
@@ -216,7 +226,7 @@ class GBTRegressionModel private[ml](
     private val _trees: Array[DecisionTreeRegressionModel],
     private val _treeWeights: Array[Double],
     override val numFeatures: Int)
-  extends PredictionModel[Vector, GBTRegressionModel]
+  extends RegressionModel[Vector, GBTRegressionModel]
   with GBTRegressorParams with TreeEnsembleModel[DecisionTreeRegressionModel]
   with MLWritable with Serializable {
 
@@ -245,8 +255,17 @@ class GBTRegressionModel private[ml](
   @Since("1.4.0")
   override def treeWeights: Array[Double] = _treeWeights
 
+  @Since("1.4.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(leafCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateField(outputSchema, getLeafField($(leafCol)))
+    }
+    outputSchema
+  }
+
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     var predictionColNames = Seq.empty[String]
     var predictionColumns = Seq.empty[Column]
@@ -257,12 +276,14 @@ class GBTRegressionModel private[ml](
       val predictUDF = udf { features: Vector => bcastModel.value.predict(features) }
       predictionColNames :+= $(predictionCol)
       predictionColumns :+= predictUDF(col($(featuresCol)))
+        .as($(featuresCol), outputSchema($(featuresCol)).metadata)
     }
 
     if ($(leafCol).nonEmpty) {
       val leafUDF = udf { features: Vector => bcastModel.value.predictLeaf(features) }
       predictionColNames :+= $(leafCol)
       predictionColumns :+= leafUDF(col($(featuresCol)))
+        .as($(leafCol), outputSchema($(leafCol)).metadata)
     }
 
     if (predictionColNames.nonEmpty) {
@@ -278,12 +299,9 @@ class GBTRegressionModel private[ml](
     // TODO: When we add a generic Boosting class, handle transform there?  SPARK-7129
     // Classifies by thresholding sum of weighted tree predictions
     val treePredictions = _trees.map(_.rootNode.predictImpl(features).prediction)
-    blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
+    blas.ddot(getNumTrees, treePredictions, 1, _treeWeights, 1)
   }
 
-  /** Number of trees in ensemble */
-  val numTrees: Int = trees.length
-
   @Since("1.4.0")
   override def copy(extra: ParamMap): GBTRegressionModel = {
     copyValues(new GBTRegressionModel(uid, _trees, _treeWeights, numFeatures),
@@ -292,7 +310,7 @@ class GBTRegressionModel private[ml](
 
   @Since("1.4.0")
   override def toString: String = {
-    s"GBTRegressionModel (uid=$uid) with $numTrees trees"
+    s"GBTRegressionModel: uid=$uid, numTrees=$getNumTrees, numFeatures=$numFeatures"
   }
 
   /**
@@ -322,9 +340,7 @@ class GBTRegressionModel private[ml](
    */
   @Since("2.4.0")
   def evaluateEachIteration(dataset: Dataset[_], loss: String): Array[Double] = {
-    val data = dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map {
-      case Row(label: Double, features: Vector) => LabeledPoint(label, features)
-    }
+    val data = extractInstances(dataset)
     GradientBoostedTrees.evaluateEachIteration(data, trees, treeWeights,
       convertToOldLossType(loss), OldAlgo.Regression)
   }
@@ -367,10 +383,9 @@ object GBTRegressionModel extends MLReadable[GBTRegressionModel] {
       val numFeatures = (metadata.metadata \ "numFeatures").extract[Int]
       val numTrees = (metadata.metadata \ "numTrees").extract[Int]
 
-      val trees: Array[DecisionTreeRegressionModel] = treesData.map {
+      val trees = treesData.map {
         case (treeMetadata, root) =>
-          val tree =
-            new DecisionTreeRegressionModel(treeMetadata.uid, root, numFeatures)
+          val tree = new DecisionTreeRegressionModel(treeMetadata.uid, root, numFeatures)
           treeMetadata.getAndSetParams(tree)
           tree
       }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index a226ca49e6deb..fa41a98749f32 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -27,7 +27,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.PredictorParams
-import org.apache.spark.ml.attribute.AttributeGroup
+import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.ml.optim._
@@ -45,7 +45,7 @@ import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
  */
 private[regression] trait GeneralizedLinearRegressionBase extends PredictorParams
   with HasFitIntercept with HasMaxIter with HasTol with HasRegParam with HasWeightCol
-  with HasSolver with Logging {
+  with HasSolver with HasAggregationDepth with Logging {
 
   import GeneralizedLinearRegression._
 
@@ -213,7 +213,9 @@ private[regression] trait GeneralizedLinearRegressionBase extends PredictorParam
     }
 
     if (hasLinkPredictionCol) {
-      SchemaUtils.appendColumn(newSchema, $(linkPredictionCol), DoubleType)
+      val attr = NumericAttribute.defaultAttr
+        .withName($(linkPredictionCol))
+      SchemaUtils.appendColumn(newSchema, attr.toStructField())
     } else {
       newSchema
     }
@@ -371,15 +373,20 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
   @Since("2.0.0")
   def setLinkPredictionCol(value: String): this.type = set(linkPredictionCol, value)
 
+  /** @group expertSetParam */
+  @Since("3.0.0")
+  def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
+
   override protected def train(
       dataset: Dataset[_]): GeneralizedLinearRegressionModel = instrumented { instr =>
     val familyAndLink = FamilyAndLink(this)
 
-    val numFeatures = dataset.select(col($(featuresCol))).first().getAs[Vector](0).size
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, featuresCol, weightCol, offsetCol, predictionCol,
-      linkPredictionCol, family, solver, fitIntercept, link, maxIter, regParam, tol)
+      linkPredictionCol, family, solver, fitIntercept, link, maxIter, regParam, tol,
+      aggregationDepth)
+    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
     instr.logNumFeatures(numFeatures)
 
     if (numFeatures > WeightedLeastSquares.MAX_NUM_FEATURES) {
@@ -404,7 +411,8 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
         }
       val optimizer = new WeightedLeastSquares($(fitIntercept), $(regParam), elasticNetParam = 0.0,
         standardizeFeatures = true, standardizeLabel = true)
-      val wlsModel = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr))
+      val wlsModel = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr),
+        depth = $(aggregationDepth))
       val model = copyValues(
         new GeneralizedLinearRegressionModel(uid, wlsModel.coefficients, wlsModel.intercept)
           .setParent(this))
@@ -419,7 +427,7 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
         }
       // Fit Generalized Linear Model by iteratively reweighted least squares (IRLS).
       val initialModel = familyAndLink.initialize(instances, $(fitIntercept), $(regParam),
-        instr = OptionalInstrumentation.create(instr))
+        instr = OptionalInstrumentation.create(instr), $(aggregationDepth))
       val optimizer = new IterativelyReweightedLeastSquares(initialModel,
         familyAndLink.reweightFunc, $(fitIntercept), $(regParam), $(maxIter), $(tol))
       val irlsModel = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr))
@@ -494,7 +502,8 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
         fitIntercept: Boolean,
         regParam: Double,
         instr: OptionalInstrumentation = OptionalInstrumentation.create(
-          classOf[GeneralizedLinearRegression])
+          classOf[GeneralizedLinearRegression]),
+        depth: Int = 2
       ): WeightedLeastSquaresModel = {
       val newInstances = instances.map { instance =>
         val mu = family.initialize(instance.label, instance.weight)
@@ -504,7 +513,7 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
       // TODO: Make standardizeFeatures and standardizeLabel configurable.
       val initialModel = new WeightedLeastSquares(fitIntercept, regParam, elasticNetParam = 0.0,
         standardizeFeatures = true, standardizeLabel = true)
-        .fit(newInstances, instr)
+        .fit(newInstances, instr, depth)
       initialModel
     }
 
@@ -1036,31 +1045,38 @@ class GeneralizedLinearRegressionModel private[ml] (
   }
 
   override protected def transformImpl(dataset: Dataset[_]): DataFrame = {
-    var predictionColNames = Seq.empty[String]
-    var predictionColumns = Seq.empty[Column]
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     val offset = if (!hasOffsetCol) lit(0.0) else col($(offsetCol)).cast(DoubleType)
+    var outputData = dataset
+    var numColsOutput = 0
 
-    if ($(predictionCol).nonEmpty) {
-      val predictUDF = udf { (features: Vector, offset: Double) => predict(features, offset) }
-      predictionColNames :+= $(predictionCol)
-      predictionColumns :+= predictUDF(col($(featuresCol)), offset)
+    if (hasLinkPredictionCol) {
+      val predLinkUDF = udf((features: Vector, offset: Double) => predictLink(features, offset))
+      outputData = outputData
+        .withColumn($(linkPredictionCol), predLinkUDF(col($(featuresCol)), offset),
+          outputSchema($(linkPredictionCol)).metadata)
+      numColsOutput += 1
     }
 
-    if (hasLinkPredictionCol) {
-      val predictLinkUDF =
-        udf { (features: Vector, offset: Double) => predictLink(features, offset) }
-      predictionColNames :+= $(linkPredictionCol)
-      predictionColumns :+= predictLinkUDF(col($(featuresCol)), offset)
+    if ($(predictionCol).nonEmpty) {
+      if (hasLinkPredictionCol) {
+        val predUDF = udf((eta: Double) => familyAndLink.fitted(eta))
+        outputData = outputData.withColumn($(predictionCol), predUDF(col($(linkPredictionCol))),
+          outputSchema($(predictionCol)).metadata)
+      } else {
+        val predUDF = udf((features: Vector, offset: Double) => predict(features, offset))
+        outputData = outputData.withColumn($(predictionCol), predUDF(col($(featuresCol)), offset),
+          outputSchema($(predictionCol)).metadata)
+      }
+      numColsOutput += 1
     }
 
-    if (predictionColNames.nonEmpty) {
-      dataset.withColumns(predictionColNames, predictionColumns)
-    } else {
+    if (numColsOutput == 0) {
       this.logWarning(s"$uid: GeneralizedLinearRegressionModel.transform() does nothing" +
         " because no output columns were set.")
-      dataset.toDF()
     }
+    outputData.toDF
   }
 
   /**
@@ -1097,6 +1113,12 @@ class GeneralizedLinearRegressionModel private[ml] (
     new GeneralizedLinearRegressionModel.GeneralizedLinearRegressionModelWriter(this)
 
   override val numFeatures: Int = coefficients.size
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"GeneralizedLinearRegressionModel: uid=$uid, family=${$(family)}, link=${$(link)}, " +
+      s"numFeatures=$numFeatures"
+  }
 }
 
 @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 8b9233dcdc4d1..fe4de57de60f2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -240,7 +240,7 @@ class IsotonicRegressionModel private[ml] (
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
     val predict = dataset.schema($(featuresCol)).dataType match {
       case DoubleType =>
         udf { feature: Double => oldModel.predict(feature) }
@@ -248,17 +248,33 @@ class IsotonicRegressionModel private[ml] (
         val idx = $(featureIndex)
         udf { features: Vector => oldModel.predict(features(idx)) }
     }
-    dataset.withColumn($(predictionCol), predict(col($(featuresCol))))
+    dataset.withColumn($(predictionCol), predict(col($(featuresCol))),
+      outputSchema($(predictionCol)).metadata)
   }
 
+  @Since("3.0.0")
+  def predict(value: Double): Double = oldModel.predict(value)
+
   @Since("1.5.0")
   override def transformSchema(schema: StructType): StructType = {
-    validateAndTransformSchema(schema, fitting = false)
+    var outputSchema = validateAndTransformSchema(schema, fitting = false)
+    if ($(predictionCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateNumeric(outputSchema, $(predictionCol))
+    }
+    outputSchema
   }
 
   @Since("1.6.0")
   override def write: MLWriter =
     new IsotonicRegressionModelWriter(this)
+
+  @Since("3.0.0")
+  val numFeatures: Int = 1
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"IsotonicRegressionModel: uid=$uid, numFeatures=$numFeatures"
+  }
 }
 
 @Since("1.6.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index abf75d70ea028..64e5e191ffd17 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -36,14 +36,13 @@ import org.apache.spark.ml.optim.aggregator.{HuberAggregator, LeastSquaresAggreg
 import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction}
 import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.stat._
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.evaluation.RegressionMetrics
 import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.mllib.regression.{LinearRegressionModel => OldLinearRegressionModel}
-import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
 import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
@@ -319,14 +318,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
 
   override protected def train(dataset: Dataset[_]): LinearRegressionModel = instrumented { instr =>
     // Extract the number of features before deciding optimization solver.
-    val numFeatures = dataset.select(col($(featuresCol))).first().getAs[Vector](0).size
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
+    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
 
-    val instances: RDD[Instance] = dataset.select(
-      col($(labelCol)), w, col($(featuresCol))).rdd.map {
-      case Row(label: Double, weight: Double, features: Vector) =>
-        Instance(label, weight, features)
-    }
+    val instances = extractInstances(dataset)
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
@@ -363,27 +357,25 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
 
-    val (featuresSummarizer, ySummarizer) = {
-      val seqOp = (c: (MultivariateOnlineSummarizer, MultivariateOnlineSummarizer),
-        instance: Instance) =>
-          (c._1.add(instance.features, instance.weight),
-            c._2.add(Vectors.dense(instance.label), instance.weight))
-
-      val combOp = (c1: (MultivariateOnlineSummarizer, MultivariateOnlineSummarizer),
-        c2: (MultivariateOnlineSummarizer, MultivariateOnlineSummarizer)) =>
-          (c1._1.merge(c2._1), c1._2.merge(c2._2))
-
-      instances.treeAggregate(
-        (new MultivariateOnlineSummarizer, new MultivariateOnlineSummarizer)
-      )(seqOp, combOp, $(aggregationDepth))
-    }
+    val (featuresSummarizer, ySummarizer) = instances.treeAggregate(
+      (Summarizer.createSummarizerBuffer("mean", "std"),
+        Summarizer.createSummarizerBuffer("mean", "std", "count")))(
+      seqOp = (c: (SummarizerBuffer, SummarizerBuffer), instance: Instance) =>
+        (c._1.add(instance.features, instance.weight),
+          c._2.add(Vectors.dense(instance.label), instance.weight)),
+      combOp = (c1: (SummarizerBuffer, SummarizerBuffer),
+                c2: (SummarizerBuffer, SummarizerBuffer)) =>
+        (c1._1.merge(c2._1), c1._2.merge(c2._2)),
+      depth = $(aggregationDepth)
+    )
 
     val yMean = ySummarizer.mean(0)
-    val rawYStd = math.sqrt(ySummarizer.variance(0))
+    val rawYStd = ySummarizer.std(0)
 
     instr.logNumExamples(ySummarizer.count)
     instr.logNamedValue(Instrumentation.loggerTags.meanOfLabels, yMean)
     instr.logNamedValue(Instrumentation.loggerTags.varianceOfLabels, rawYStd)
+    instr.logSumOfWeights(featuresSummarizer.weightSum)
 
     if (rawYStd == 0.0) {
       if ($(fitIntercept) || yMean == 0.0) {
@@ -430,7 +422,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     // setting yStd=abs(yMean) ensures that y is not scaled anymore in l-bfgs algorithm.
     val yStd = if (rawYStd > 0) rawYStd else math.abs(yMean)
     val featuresMean = featuresSummarizer.mean.toArray
-    val featuresStd = featuresSummarizer.variance.toArray.map(math.sqrt)
+    val featuresStd = featuresSummarizer.std.toArray
     val bcFeaturesMean = instances.context.broadcast(featuresMean)
     val bcFeaturesStd = instances.context.broadcast(featuresStd)
 
@@ -711,6 +703,11 @@ class LinearRegressionModel private[ml] (
    */
   @Since("1.6.0")
   override def write: GeneralMLWriter = new GeneralMLWriter(this)
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"LinearRegressionModel: uid=$uid, numFeatures=$numFeatures"
+  }
 }
 
 /** A writer for LinearRegression that handles the "internal" (or default) format */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index 938aa5acac086..bb74c562f25b7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -21,7 +21,6 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.{PredictionModel, Predictor}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -33,6 +32,7 @@ import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel}
 import org.apache.spark.sql.{Column, DataFrame, Dataset}
 import org.apache.spark.sql.functions.{col, udf}
+import org.apache.spark.sql.types.StructType
 
 /**
  * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a>
@@ -41,7 +41,7 @@ import org.apache.spark.sql.functions.{col, udf}
  */
 @Since("1.4.0")
 class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
-  extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel]
+  extends Regressor[Vector, RandomForestRegressor, RandomForestRegressionModel]
   with RandomForestRegressorParams with DefaultParamsWritable {
 
   @Since("1.4.0")
@@ -63,6 +63,10 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
   @Since("1.4.0")
   def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setMinWeightFractionPerNode(value: Double): this.type = set(minWeightFractionPerNode, value)
+
   /** @group setParam */
   @Since("1.4.0")
   def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
@@ -107,29 +111,46 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
   @Since("1.4.0")
   def setNumTrees(value: Int): this.type = set(numTrees, value)
 
+  /** @group setParam */
+  @Since("3.0.0")
+  def setBootstrap(value: Boolean): this.type = set(bootstrap, value)
+
   /** @group setParam */
   @Since("1.4.0")
   def setFeatureSubsetStrategy(value: String): this.type =
     set(featureSubsetStrategy, value)
 
+  /**
+   * Sets the value of param [[weightCol]].
+   * If this is not set or empty, we treat all instance weights as 1.0.
+   * By default the weightCol is not set, so all instances have weight 1.0.
+   *
+   * @group setParam
+   */
+  @Since("3.0.0")
+  def setWeightCol(value: String): this.type = set(weightCol, value)
+
   override protected def train(
       dataset: Dataset[_]): RandomForestRegressionModel = instrumented { instr =>
     val categoricalFeatures: Map[Int, Int] =
       MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
 
-    val instances = extractLabeledPoints(dataset).map(_.toInstance)
+    val instances = extractInstances(dataset)
     val strategy =
       super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity)
+    strategy.bootstrap = $(bootstrap)
 
     instr.logPipelineStage(this)
     instr.logDataset(instances)
-    instr.logParams(this, labelCol, featuresCol, predictionCol, leafCol, impurity, numTrees,
-      featureSubsetStrategy, maxDepth, maxBins, maxMemoryInMB, minInfoGain,
-      minInstancesPerNode, seed, subsamplingRate, cacheNodeIds, checkpointInterval)
+    instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, leafCol, impurity,
+      numTrees, featureSubsetStrategy, maxDepth, maxBins, maxMemoryInMB, minInfoGain,
+      minInstancesPerNode, minWeightFractionPerNode, seed, subsamplingRate, cacheNodeIds,
+      checkpointInterval, bootstrap)
 
     val trees = RandomForest
       .run(instances, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed, Some(instr))
       .map(_.asInstanceOf[DecisionTreeRegressionModel])
+    trees.foreach(copyValues(_))
 
     val numFeatures = trees.head.numFeatures
     instr.logNamedValue(Instrumentation.loggerTags.numFeatures, numFeatures)
@@ -168,7 +189,7 @@ class RandomForestRegressionModel private[ml] (
     override val uid: String,
     private val _trees: Array[DecisionTreeRegressionModel],
     override val numFeatures: Int)
-  extends PredictionModel[Vector, RandomForestRegressionModel]
+  extends RegressionModel[Vector, RandomForestRegressionModel]
   with RandomForestRegressorParams with TreeEnsembleModel[DecisionTreeRegressionModel]
   with MLWritable with Serializable {
 
@@ -191,8 +212,17 @@ class RandomForestRegressionModel private[ml] (
   @Since("1.4.0")
   override def treeWeights: Array[Double] = _treeWeights
 
+  @Since("1.4.0")
+  override def transformSchema(schema: StructType): StructType = {
+    var outputSchema = super.transformSchema(schema)
+    if ($(leafCol).nonEmpty) {
+      outputSchema = SchemaUtils.updateField(outputSchema, getLeafField($(leafCol)))
+    }
+    outputSchema
+  }
+
   override def transform(dataset: Dataset[_]): DataFrame = {
-    transformSchema(dataset.schema, logging = true)
+    val outputSchema = transformSchema(dataset.schema, logging = true)
 
     var predictionColNames = Seq.empty[String]
     var predictionColumns = Seq.empty[Column]
@@ -203,12 +233,14 @@ class RandomForestRegressionModel private[ml] (
       val predictUDF = udf { features: Vector => bcastModel.value.predict(features) }
       predictionColNames :+= $(predictionCol)
       predictionColumns :+= predictUDF(col($(featuresCol)))
+        .as($(predictionCol), outputSchema($(predictionCol)).metadata)
     }
 
     if ($(leafCol).nonEmpty) {
       val leafUDF = udf { features: Vector => bcastModel.value.predictLeaf(features) }
       predictionColNames :+= $(leafCol)
       predictionColumns :+= leafUDF(col($(featuresCol)))
+        .as($(leafCol), outputSchema($(leafCol)).metadata)
     }
 
     if (predictionColNames.nonEmpty) {
@@ -234,7 +266,7 @@ class RandomForestRegressionModel private[ml] (
 
   @Since("1.4.0")
   override def toString: String = {
-    s"RandomForestRegressionModel (uid=$uid) with $getNumTrees trees"
+    s"RandomForestRegressionModel: uid=$uid, numTrees=$getNumTrees, numFeatures=$numFeatures"
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
index c0a1683d3cb6f..314cf422be87e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
@@ -28,7 +28,8 @@ import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams}
  * @tparam Learner  Concrete Estimator type
  * @tparam M  Concrete Model type
  */
-private[spark] abstract class Regressor[
+@DeveloperApi
+abstract class Regressor[
     FeaturesType,
     Learner <: Regressor[FeaturesType, Learner, M],
     M <: RegressionModel[FeaturesType, M]]
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
index 39dcd911a0814..6ead4df87fb54 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
@@ -25,6 +25,7 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
+import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{Vectors, VectorUDT}
 import org.apache.spark.mllib.util.MLUtils
@@ -104,14 +105,15 @@ private[libsvm] class LibSVMFileFormat
       MLUtils.computeNumFeatures(parsed)
     }
 
-    val featuresMetadata = new MetadataBuilder()
+    val labelField = StructField("label", DoubleType, nullable = false)
+
+    val extraMetadata = new MetadataBuilder()
       .putLong(LibSVMOptions.NUM_FEATURES, numFeatures)
       .build()
+    val attrGroup = new AttributeGroup(name = "features", numAttributes = numFeatures)
+    val featuresField = attrGroup.toStructField(extraMetadata)
 
-    Some(
-      StructType(
-        StructField("label", DoubleType, nullable = false) ::
-        StructField("features", new VectorUDT(), nullable = false, featuresMetadata) :: Nil))
+    Some(StructType(labelField :: featuresField :: Nil))
   }
 
   override def prepareWrite(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
index 59123a41d16fc..64a03347f0613 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
@@ -24,7 +24,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes, UnsafeArrayData}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, TypedImperativeAggregate}
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.types._
@@ -65,7 +65,7 @@ sealed abstract class SummaryBuilder {
  *   val dataframe = ... // Some dataframe containing a feature column and a weight column
  *   val multiStatsDF = dataframe.select(
  *       Summarizer.metrics("min", "max", "count").summary($"features", $"weight")
- *   val Row(Row(minVec, maxVec, count)) = multiStatsDF.first()
+ *   val Row(minVec, maxVec, count) = multiStatsDF.first()
  * }}}
  *
  * If one wants to get a single metric, shortcuts are also available:
@@ -89,7 +89,9 @@ object Summarizer extends Logging {
    *
    * The following metrics are accepted (case sensitive):
    *  - mean: a vector that contains the coefficient-wise mean.
+   *  - sum: a vector that contains the coefficient-wise sum.
    *  - variance: a vector tha contains the coefficient-wise variance.
+   *  - std: a vector tha contains the coefficient-wise standard deviation.
    *  - count: the count of all vectors seen.
    *  - numNonzeros: a vector with the number of non-zeros for each coefficients
    *  - max: the maximum for each coefficient.
@@ -106,7 +108,7 @@ object Summarizer extends Logging {
   @Since("2.3.0")
   @scala.annotation.varargs
   def metrics(metrics: String*): SummaryBuilder = {
-    require(metrics.size >= 1, "Should include at least one metric")
+    require(metrics.nonEmpty, "Should include at least one metric")
     val (typedMetrics, computeMetrics) = getRelevantMetrics(metrics)
     new SummaryBuilderImpl(typedMetrics, computeMetrics)
   }
@@ -119,6 +121,14 @@ object Summarizer extends Logging {
   @Since("2.3.0")
   def mean(col: Column): Column = mean(col, lit(1.0))
 
+  @Since("3.0.0")
+  def sum(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "sum")
+  }
+
+  @Since("3.0.0")
+  def sum(col: Column): Column = sum(col, lit(1.0))
+
   @Since("2.3.0")
   def variance(col: Column, weightCol: Column): Column = {
     getSingleMetric(col, weightCol, "variance")
@@ -127,6 +137,14 @@ object Summarizer extends Logging {
   @Since("2.3.0")
   def variance(col: Column): Column = variance(col, lit(1.0))
 
+  @Since("3.0.0")
+  def std(col: Column, weightCol: Column): Column = {
+    getSingleMetric(col, weightCol, "std")
+  }
+
+  @Since("3.0.0")
+  def std(col: Column): Column = std(col, lit(1.0))
+
   @Since("2.3.0")
   def count(col: Column, weightCol: Column): Column = {
     getSingleMetric(col, weightCol, "count")
@@ -179,6 +197,11 @@ object Summarizer extends Logging {
     val c1 = metrics(metric).summary(col, weightCol)
     c1.getField(metric).as(s"$metric($col)")
   }
+
+  private[spark] def createSummarizerBuffer(requested: String*): SummarizerBuffer = {
+    val (metrics, computeMetrics) = getRelevantMetrics(requested)
+    new SummarizerBuffer(metrics, computeMetrics)
+  }
 }
 
 private[ml] class SummaryBuilderImpl(
@@ -200,7 +223,7 @@ private[ml] class SummaryBuilderImpl(
   }
 }
 
-private[ml] object SummaryBuilderImpl extends Logging {
+private[spark] object SummaryBuilderImpl extends Logging {
 
   def implementedMetrics: Seq[String] = allMetrics.map(_._1).sorted
 
@@ -240,7 +263,9 @@ private[ml] object SummaryBuilderImpl extends Logging {
    */
   private val allMetrics: Seq[(String, Metric, DataType, Seq[ComputeMetric])] = Seq(
     ("mean", Mean, vectorUDT, Seq(ComputeMean, ComputeWeightSum)),
+    ("sum", Sum, vectorUDT, Seq(ComputeMean, ComputeWeightSum)),
     ("variance", Variance, vectorUDT, Seq(ComputeWeightSum, ComputeMean, ComputeM2n)),
+    ("std", Std, vectorUDT, Seq(ComputeWeightSum, ComputeMean, ComputeM2n)),
     ("count", Count, LongType, Seq()),
     ("numNonZeros", NumNonZeros, vectorUDT, Seq(ComputeNNZ)),
     ("max", Max, vectorUDT, Seq(ComputeMax, ComputeNNZ)),
@@ -254,7 +279,9 @@ private[ml] object SummaryBuilderImpl extends Logging {
    */
   sealed trait Metric extends Serializable
   private[stat] case object Mean extends Metric
+  private[stat] case object Sum extends Metric
   private[stat] case object Variance extends Metric
+  private[stat] case object Std extends Metric
   private[stat] case object Count extends Metric
   private[stat] case object NumNonZeros extends Metric
   private[stat] case object Max extends Metric
@@ -277,290 +304,6 @@ private[ml] object SummaryBuilderImpl extends Logging {
   private[stat] case object ComputeMax extends ComputeMetric
   private[stat] case object ComputeMin extends ComputeMetric
 
-  private[stat] class SummarizerBuffer(
-      requestedMetrics: Seq[Metric],
-      requestedCompMetrics: Seq[ComputeMetric]
-  ) extends Serializable {
-
-    private var n = 0
-    private var currMean: Array[Double] = null
-    private var currM2n: Array[Double] = null
-    private var currM2: Array[Double] = null
-    private var currL1: Array[Double] = null
-    private var totalCnt: Long = 0
-    private var totalWeightSum: Double = 0.0
-    private var weightSquareSum: Double = 0.0
-    private var weightSum: Array[Double] = null
-    private var nnz: Array[Long] = null
-    private var currMax: Array[Double] = null
-    private var currMin: Array[Double] = null
-
-    def this() {
-      this(
-        Seq(Mean, Variance, Count, NumNonZeros, Max, Min, NormL2, NormL1),
-        Seq(ComputeMean, ComputeM2n, ComputeM2, ComputeL1,
-          ComputeWeightSum, ComputeNNZ, ComputeMax, ComputeMin)
-      )
-    }
-
-    /**
-     * Add a new sample to this summarizer, and update the statistical summary.
-     */
-    def add(instance: Vector, weight: Double): this.type = {
-      require(weight >= 0.0, s"sample weight, $weight has to be >= 0.0")
-      if (weight == 0.0) return this
-
-      if (n == 0) {
-        require(instance.size > 0, s"Vector should have dimension larger than zero.")
-        n = instance.size
-
-        if (requestedCompMetrics.contains(ComputeMean)) { currMean = Array.ofDim[Double](n) }
-        if (requestedCompMetrics.contains(ComputeM2n)) { currM2n = Array.ofDim[Double](n) }
-        if (requestedCompMetrics.contains(ComputeM2)) { currM2 = Array.ofDim[Double](n) }
-        if (requestedCompMetrics.contains(ComputeL1)) { currL1 = Array.ofDim[Double](n) }
-        if (requestedCompMetrics.contains(ComputeWeightSum)) { weightSum = Array.ofDim[Double](n) }
-        if (requestedCompMetrics.contains(ComputeNNZ)) { nnz = Array.ofDim[Long](n) }
-        if (requestedCompMetrics.contains(ComputeMax)) {
-          currMax = Array.fill[Double](n)(Double.MinValue)
-        }
-        if (requestedCompMetrics.contains(ComputeMin)) {
-          currMin = Array.fill[Double](n)(Double.MaxValue)
-        }
-      }
-
-      require(n == instance.size, s"Dimensions mismatch when adding new sample." +
-        s" Expecting $n but got ${instance.size}.")
-
-      val localCurrMean = currMean
-      val localCurrM2n = currM2n
-      val localCurrM2 = currM2
-      val localCurrL1 = currL1
-      val localWeightSum = weightSum
-      val localNumNonzeros = nnz
-      val localCurrMax = currMax
-      val localCurrMin = currMin
-      instance.foreachActive { (index, value) =>
-        if (value != 0.0) {
-          if (localCurrMax != null && localCurrMax(index) < value) {
-            localCurrMax(index) = value
-          }
-          if (localCurrMin != null && localCurrMin(index) > value) {
-            localCurrMin(index) = value
-          }
-
-          if (localWeightSum != null) {
-            if (localCurrMean != null) {
-              val prevMean = localCurrMean(index)
-              val diff = value - prevMean
-              localCurrMean(index) = prevMean + weight * diff / (localWeightSum(index) + weight)
-
-              if (localCurrM2n != null) {
-                localCurrM2n(index) += weight * (value - localCurrMean(index)) * diff
-              }
-            }
-            localWeightSum(index) += weight
-          }
-
-          if (localCurrM2 != null) {
-            localCurrM2(index) += weight * value * value
-          }
-          if (localCurrL1 != null) {
-            localCurrL1(index) += weight * math.abs(value)
-          }
-
-          if (localNumNonzeros != null) {
-            localNumNonzeros(index) += 1
-          }
-        }
-      }
-
-      totalWeightSum += weight
-      weightSquareSum += weight * weight
-      totalCnt += 1
-      this
-    }
-
-    def add(instance: Vector): this.type = add(instance, 1.0)
-
-    /**
-     * Merge another SummarizerBuffer, and update the statistical summary.
-     * (Note that it's in place merging; as a result, `this` object will be modified.)
-     *
-     * @param other The other MultivariateOnlineSummarizer to be merged.
-     */
-    def merge(other: SummarizerBuffer): this.type = {
-      if (this.totalWeightSum != 0.0 && other.totalWeightSum != 0.0) {
-        require(n == other.n, s"Dimensions mismatch when merging with another summarizer. " +
-          s"Expecting $n but got ${other.n}.")
-        totalCnt += other.totalCnt
-        totalWeightSum += other.totalWeightSum
-        weightSquareSum += other.weightSquareSum
-        var i = 0
-        while (i < n) {
-          if (weightSum != null) {
-            val thisWeightSum = weightSum(i)
-            val otherWeightSum = other.weightSum(i)
-            val totalWeightSum = thisWeightSum + otherWeightSum
-
-            if (totalWeightSum != 0.0) {
-              if (currMean != null) {
-                val deltaMean = other.currMean(i) - currMean(i)
-                // merge mean together
-                currMean(i) += deltaMean * otherWeightSum / totalWeightSum
-
-                if (currM2n != null) {
-                  // merge m2n together
-                  currM2n(i) += other.currM2n(i) +
-                    deltaMean * deltaMean * thisWeightSum * otherWeightSum / totalWeightSum
-                }
-              }
-            }
-            weightSum(i) = totalWeightSum
-          }
-
-          // merge m2 together
-          if (currM2 != null) { currM2(i) += other.currM2(i) }
-          // merge l1 together
-          if (currL1 != null) { currL1(i) += other.currL1(i) }
-          // merge max and min
-          if (currMax != null) { currMax(i) = math.max(currMax(i), other.currMax(i)) }
-          if (currMin != null) { currMin(i) = math.min(currMin(i), other.currMin(i)) }
-          if (nnz != null) { nnz(i) = nnz(i) + other.nnz(i) }
-          i += 1
-        }
-      } else if (totalWeightSum == 0.0 && other.totalWeightSum != 0.0) {
-        this.n = other.n
-        if (other.currMean != null) { this.currMean = other.currMean.clone() }
-        if (other.currM2n != null) { this.currM2n = other.currM2n.clone() }
-        if (other.currM2 != null) { this.currM2 = other.currM2.clone() }
-        if (other.currL1 != null) { this.currL1 = other.currL1.clone() }
-        this.totalCnt = other.totalCnt
-        this.totalWeightSum = other.totalWeightSum
-        this.weightSquareSum = other.weightSquareSum
-        if (other.weightSum != null) { this.weightSum = other.weightSum.clone() }
-        if (other.nnz != null) { this.nnz = other.nnz.clone() }
-        if (other.currMax != null) { this.currMax = other.currMax.clone() }
-        if (other.currMin != null) { this.currMin = other.currMin.clone() }
-      }
-      this
-    }
-
-    /**
-     * Sample mean of each dimension.
-     */
-    def mean: Vector = {
-      require(requestedMetrics.contains(Mean))
-      require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
-
-      val realMean = Array.ofDim[Double](n)
-      var i = 0
-      while (i < n) {
-        realMean(i) = currMean(i) * (weightSum(i) / totalWeightSum)
-        i += 1
-      }
-      Vectors.dense(realMean)
-    }
-
-    /**
-     * Unbiased estimate of sample variance of each dimension.
-     */
-    def variance: Vector = {
-      require(requestedMetrics.contains(Variance))
-      require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
-
-      val realVariance = Array.ofDim[Double](n)
-
-      val denominator = totalWeightSum - (weightSquareSum / totalWeightSum)
-
-      // Sample variance is computed, if the denominator is less than 0, the variance is just 0.
-      if (denominator > 0.0) {
-        val deltaMean = currMean
-        var i = 0
-        val len = currM2n.length
-        while (i < len) {
-          // We prevent variance from negative value caused by numerical error.
-          realVariance(i) = math.max((currM2n(i) + deltaMean(i) * deltaMean(i) * weightSum(i) *
-            (totalWeightSum - weightSum(i)) / totalWeightSum) / denominator, 0.0)
-          i += 1
-        }
-      }
-      Vectors.dense(realVariance)
-    }
-
-    /**
-     * Sample size.
-     */
-    def count: Long = totalCnt
-
-    /**
-     * Number of nonzero elements in each dimension.
-     *
-     */
-    def numNonzeros: Vector = {
-      require(requestedMetrics.contains(NumNonZeros))
-      require(totalCnt > 0, s"Nothing has been added to this summarizer.")
-
-      Vectors.dense(nnz.map(_.toDouble))
-    }
-
-    /**
-     * Maximum value of each dimension.
-     */
-    def max: Vector = {
-      require(requestedMetrics.contains(Max))
-      require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
-
-      var i = 0
-      while (i < n) {
-        if ((nnz(i) < totalCnt) && (currMax(i) < 0.0)) currMax(i) = 0.0
-        i += 1
-      }
-      Vectors.dense(currMax)
-    }
-
-    /**
-     * Minimum value of each dimension.
-     */
-    def min: Vector = {
-      require(requestedMetrics.contains(Min))
-      require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
-
-      var i = 0
-      while (i < n) {
-        if ((nnz(i) < totalCnt) && (currMin(i) > 0.0)) currMin(i) = 0.0
-        i += 1
-      }
-      Vectors.dense(currMin)
-    }
-
-    /**
-     * L2 (Euclidean) norm of each dimension.
-     */
-    def normL2: Vector = {
-      require(requestedMetrics.contains(NormL2))
-      require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
-
-      val realMagnitude = Array.ofDim[Double](n)
-
-      var i = 0
-      val len = currM2.length
-      while (i < len) {
-        realMagnitude(i) = math.sqrt(currM2(i))
-        i += 1
-      }
-      Vectors.dense(realMagnitude)
-    }
-
-    /**
-     * L1 norm of each dimension.
-     */
-    def normL1: Vector = {
-      require(requestedMetrics.contains(NormL1))
-      require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
-
-      Vectors.dense(currL1)
-    }
-  }
 
   private case class MetricsAggregate(
       requestedMetrics: Seq[Metric],
@@ -574,7 +317,9 @@ private[ml] object SummaryBuilderImpl extends Logging {
     override def eval(state: SummarizerBuffer): Any = {
       val metrics = requestedMetrics.map {
         case Mean => vectorUDT.serialize(state.mean)
+        case Sum => vectorUDT.serialize(state.sum)
         case Variance => vectorUDT.serialize(state.variance)
+        case Std => vectorUDT.serialize(state.std)
         case Count => state.count
         case NumNonZeros => vectorUDT.serialize(state.numNonzeros)
         case Max => vectorUDT.serialize(state.max)
@@ -635,3 +380,347 @@ private[ml] object SummaryBuilderImpl extends Logging {
 
   }
 }
+
+private[spark] class SummarizerBuffer(
+    requestedMetrics: Seq[SummaryBuilderImpl.Metric],
+    requestedCompMetrics: Seq[SummaryBuilderImpl.ComputeMetric]) extends Serializable {
+  import SummaryBuilderImpl._
+
+  private var n = 0
+  private var currMean: Array[Double] = null
+  private var currM2n: Array[Double] = null
+  private var currM2: Array[Double] = null
+  private var currL1: Array[Double] = null
+  private var totalCnt: Long = 0
+  private var totalWeightSum: Double = 0.0
+  private var weightSquareSum: Double = 0.0
+  private var currWeightSum: Array[Double] = null
+  private var nnz: Array[Long] = null
+  private var currMax: Array[Double] = null
+  private var currMin: Array[Double] = null
+
+  def this() {
+    this(
+      Seq(
+        SummaryBuilderImpl.Mean,
+        SummaryBuilderImpl.Sum,
+        SummaryBuilderImpl.Variance,
+        SummaryBuilderImpl.Std,
+        SummaryBuilderImpl.Count,
+        SummaryBuilderImpl.NumNonZeros,
+        SummaryBuilderImpl.Max,
+        SummaryBuilderImpl.Min,
+        SummaryBuilderImpl.NormL2,
+        SummaryBuilderImpl.NormL1),
+      Seq(
+        SummaryBuilderImpl.ComputeMean,
+        SummaryBuilderImpl.ComputeM2n,
+        SummaryBuilderImpl.ComputeM2,
+        SummaryBuilderImpl.ComputeL1,
+        SummaryBuilderImpl.ComputeWeightSum,
+        SummaryBuilderImpl.ComputeNNZ,
+        SummaryBuilderImpl.ComputeMax,
+        SummaryBuilderImpl.ComputeMin)
+    )
+  }
+
+  def add(nonZeroIterator: Iterator[(Int, Double)], size: Int, weight: Double): this.type = {
+    require(weight >= 0.0, s"sample weight, $weight has to be >= 0.0")
+    if (weight == 0.0) return this
+
+    if (n == 0) {
+      require(size > 0, s"Vector should have dimension larger than zero.")
+      n = size
+
+      if (requestedCompMetrics.contains(ComputeMean)) { currMean = Array.ofDim[Double](n) }
+      if (requestedCompMetrics.contains(ComputeM2n)) { currM2n = Array.ofDim[Double](n) }
+      if (requestedCompMetrics.contains(ComputeM2)) { currM2 = Array.ofDim[Double](n) }
+      if (requestedCompMetrics.contains(ComputeL1)) { currL1 = Array.ofDim[Double](n) }
+      if (requestedCompMetrics.contains(ComputeWeightSum)) {
+        currWeightSum = Array.ofDim[Double](n)
+      }
+      if (requestedCompMetrics.contains(ComputeNNZ)) { nnz = Array.ofDim[Long](n) }
+      if (requestedCompMetrics.contains(ComputeMax)) {
+        currMax = Array.fill[Double](n)(Double.MinValue)
+      }
+      if (requestedCompMetrics.contains(ComputeMin)) {
+        currMin = Array.fill[Double](n)(Double.MaxValue)
+      }
+    }
+
+    require(n == size, s"Dimensions mismatch when adding new sample." +
+      s" Expecting $n but got $size.")
+
+    if (nonZeroIterator.nonEmpty) {
+      val localCurrMean = currMean
+      val localCurrM2n = currM2n
+      val localCurrM2 = currM2
+      val localCurrL1 = currL1
+      val localCurrWeightSum = currWeightSum
+      val localNumNonzeros = nnz
+      val localCurrMax = currMax
+      val localCurrMin = currMin
+      nonZeroIterator.foreach { case (index, value) =>
+        if (localCurrMax != null && localCurrMax(index) < value) {
+          localCurrMax(index) = value
+        }
+        if (localCurrMin != null && localCurrMin(index) > value) {
+          localCurrMin(index) = value
+        }
+
+        if (localCurrWeightSum != null) {
+          if (localCurrMean != null) {
+            val prevMean = localCurrMean(index)
+            val diff = value - prevMean
+            localCurrMean(index) = prevMean +
+              weight * diff / (localCurrWeightSum(index) + weight)
+
+            if (localCurrM2n != null) {
+              localCurrM2n(index) += weight * (value - localCurrMean(index)) * diff
+            }
+          }
+          localCurrWeightSum(index) += weight
+        }
+
+        if (localCurrM2 != null) {
+          localCurrM2(index) += weight * value * value
+        }
+        if (localCurrL1 != null) {
+          localCurrL1(index) += weight * math.abs(value)
+        }
+
+        if (localNumNonzeros != null) {
+          localNumNonzeros(index) += 1
+        }
+      }
+    }
+
+    totalWeightSum += weight
+    weightSquareSum += weight * weight
+    totalCnt += 1
+    this
+  }
+
+  /**
+   * Add a new sample to this summarizer, and update the statistical summary.
+   */
+  def add(instance: Vector, weight: Double): this.type =
+    add(instance.nonZeroIterator, instance.size, weight)
+
+  def add(instance: Vector): this.type = add(instance, 1.0)
+
+  /**
+   * Merge another SummarizerBuffer, and update the statistical summary.
+   * (Note that it's in place merging; as a result, `this` object will be modified.)
+   *
+   * @param other The other MultivariateOnlineSummarizer to be merged.
+   */
+  def merge(other: SummarizerBuffer): this.type = {
+    if (this.totalWeightSum != 0.0 && other.totalWeightSum != 0.0) {
+      require(n == other.n, s"Dimensions mismatch when merging with another summarizer. " +
+        s"Expecting $n but got ${other.n}.")
+      totalCnt += other.totalCnt
+      totalWeightSum += other.totalWeightSum
+      weightSquareSum += other.weightSquareSum
+      var i = 0
+      while (i < n) {
+        if (currWeightSum != null) {
+          val thisWeightSum = currWeightSum(i)
+          val otherWeightSum = other.currWeightSum(i)
+          val totalWeightSum = thisWeightSum + otherWeightSum
+
+          if (totalWeightSum != 0.0) {
+            if (currMean != null) {
+              val deltaMean = other.currMean(i) - currMean(i)
+              // merge mean together
+              currMean(i) += deltaMean * otherWeightSum / totalWeightSum
+
+              if (currM2n != null) {
+                // merge m2n together
+                currM2n(i) += other.currM2n(i) +
+                  deltaMean * deltaMean * thisWeightSum * otherWeightSum / totalWeightSum
+              }
+            }
+          }
+          currWeightSum(i) = totalWeightSum
+        }
+
+        // merge m2 together
+        if (currM2 != null) { currM2(i) += other.currM2(i) }
+        // merge l1 together
+        if (currL1 != null) { currL1(i) += other.currL1(i) }
+        // merge max and min
+        if (currMax != null) { currMax(i) = math.max(currMax(i), other.currMax(i)) }
+        if (currMin != null) { currMin(i) = math.min(currMin(i), other.currMin(i)) }
+        if (nnz != null) { nnz(i) = nnz(i) + other.nnz(i) }
+        i += 1
+      }
+    } else if (totalWeightSum == 0.0 && other.totalWeightSum != 0.0) {
+      this.n = other.n
+      if (other.currMean != null) { this.currMean = other.currMean.clone() }
+      if (other.currM2n != null) { this.currM2n = other.currM2n.clone() }
+      if (other.currM2 != null) { this.currM2 = other.currM2.clone() }
+      if (other.currL1 != null) { this.currL1 = other.currL1.clone() }
+      this.totalCnt = other.totalCnt
+      this.totalWeightSum = other.totalWeightSum
+      this.weightSquareSum = other.weightSquareSum
+      if (other.currWeightSum != null) { this.currWeightSum = other.currWeightSum.clone() }
+      if (other.nnz != null) { this.nnz = other.nnz.clone() }
+      if (other.currMax != null) { this.currMax = other.currMax.clone() }
+      if (other.currMin != null) { this.currMin = other.currMin.clone() }
+    }
+    this
+  }
+
+  /**
+   * Sample mean of each dimension.
+   */
+  def mean: Vector = {
+    require(requestedMetrics.contains(Mean))
+    require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
+
+    val realMean = Array.ofDim[Double](n)
+    var i = 0
+    while (i < n) {
+      realMean(i) = currMean(i) * (currWeightSum(i) / totalWeightSum)
+      i += 1
+    }
+    Vectors.dense(realMean)
+  }
+
+  /**
+   * Sum of each dimension.
+   */
+  def sum: Vector = {
+    require(requestedMetrics.contains(Sum))
+    require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
+
+    val realSum = Array.ofDim[Double](n)
+    var i = 0
+    while (i < n) {
+      realSum(i) = currMean(i) * currWeightSum(i)
+      i += 1
+    }
+    Vectors.dense(realSum)
+  }
+
+  /**
+   * Unbiased estimate of sample variance of each dimension.
+   */
+  def variance: Vector = {
+    require(requestedMetrics.contains(Variance))
+    require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
+
+    val realVariance = computeVariance
+    Vectors.dense(realVariance)
+  }
+
+  /**
+   * Unbiased estimate of standard deviation of each dimension.
+   */
+  def std: Vector = {
+    require(requestedMetrics.contains(Std))
+    require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
+
+    val realVariance = computeVariance
+    Vectors.dense(realVariance.map(math.sqrt))
+  }
+
+  private def computeVariance: Array[Double] = {
+    val realVariance = Array.ofDim[Double](n)
+    val denominator = totalWeightSum - (weightSquareSum / totalWeightSum)
+
+    // Sample variance is computed, if the denominator is less than 0, the variance is just 0.
+    if (denominator > 0.0) {
+      val deltaMean = currMean
+      var i = 0
+      val len = currM2n.length
+      while (i < len) {
+        // We prevent variance from negative value caused by numerical error.
+        realVariance(i) = math.max((currM2n(i) + deltaMean(i) * deltaMean(i) * currWeightSum(i) *
+          (totalWeightSum - currWeightSum(i)) / totalWeightSum) / denominator, 0.0)
+        i += 1
+      }
+    }
+    realVariance
+  }
+
+  /**
+   * Sample size.
+   */
+  def count: Long = totalCnt
+
+  /**
+   * Sum of weights.
+   */
+  def weightSum: Double = totalWeightSum
+
+  /**
+   * Number of nonzero elements in each dimension.
+   *
+   */
+  def numNonzeros: Vector = {
+    require(requestedMetrics.contains(NumNonZeros))
+    require(totalCnt > 0, s"Nothing has been added to this summarizer.")
+
+    Vectors.dense(nnz.map(_.toDouble))
+  }
+
+  /**
+   * Maximum value of each dimension.
+   */
+  def max: Vector = {
+    require(requestedMetrics.contains(Max))
+    require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
+
+    var i = 0
+    while (i < n) {
+      if ((nnz(i) < totalCnt) && (currMax(i) < 0.0)) currMax(i) = 0.0
+      i += 1
+    }
+    Vectors.dense(currMax)
+  }
+
+  /**
+   * Minimum value of each dimension.
+   */
+  def min: Vector = {
+    require(requestedMetrics.contains(Min))
+    require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
+
+    var i = 0
+    while (i < n) {
+      if ((nnz(i) < totalCnt) && (currMin(i) > 0.0)) currMin(i) = 0.0
+      i += 1
+    }
+    Vectors.dense(currMin)
+  }
+
+  /**
+   * L2 (Euclidean) norm of each dimension.
+   */
+  def normL2: Vector = {
+    require(requestedMetrics.contains(NormL2))
+    require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
+
+    val realMagnitude = Array.ofDim[Double](n)
+
+    var i = 0
+    val len = currM2.length
+    while (i < len) {
+      realMagnitude(i) = math.sqrt(currM2(i))
+      i += 1
+    }
+    Vectors.dense(realMagnitude)
+  }
+
+  /**
+   * L1 norm of each dimension.
+   */
+  def normL1: Vector = {
+    require(requestedMetrics.contains(NormL1))
+    require(totalWeightSum > 0, s"Nothing has been added to this summarizer.")
+
+    Vectors.dense(currL1)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
index d30be452a436e..cc21e9cbbe8de 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
@@ -45,6 +45,9 @@ sealed abstract class Node extends Serializable {
   /** Recursive prediction helper method */
   private[ml] def predictImpl(features: Vector): LeafNode
 
+  /** Recursive prediction helper method */
+  private[ml] def predictBinned(binned: Array[Int], splits: Array[Array[Split]]): LeafNode
+
   /**
    * Get the number of nodes in tree below this node, including leaf nodes.
    * E.g., if this is a leaf, returns 0.  If both children are leaves, returns 2.
@@ -119,6 +122,10 @@ class LeafNode private[ml] (
 
   override private[ml] def predictImpl(features: Vector): LeafNode = this
 
+  override private[ml] def predictBinned(
+      binned: Array[Int],
+      splits: Array[Array[Split]]): LeafNode = this
+
   override private[tree] def numDescendants: Int = 0
 
   override private[tree] def subtreeToString(indentFactor: Int = 0): String = {
@@ -174,6 +181,17 @@ class InternalNode private[ml] (
     }
   }
 
+  override private[ml] def predictBinned(
+      binned: Array[Int],
+      splits: Array[Array[Split]]): LeafNode = {
+    val i = split.featureIndex
+    if (split.shouldGoLeft(binned(i), splits(i))) {
+      leftChild.predictBinned(binned, splits)
+    } else {
+      rightChild.predictBinned(binned, splits)
+    }
+  }
+
   override private[tree] def numDescendants: Int = {
     2 + leftChild.numDescendants + rightChild.numDescendants
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
index dff44e2d49ec8..e59147ac5acb4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
@@ -45,7 +45,7 @@ sealed trait Split extends Serializable {
    * @param binnedFeature Binned feature value.
    * @param splits All splits for the given feature.
    */
-  private[tree] def shouldGoLeft(binnedFeature: Int, splits: Array[Split]): Boolean
+  private[ml] def shouldGoLeft(binnedFeature: Int, splits: Array[Split]): Boolean
 
   /** Convert to old Split format */
   private[tree] def toOld: OldSplit
@@ -102,7 +102,7 @@ class CategoricalSplit private[ml] (
     }
   }
 
-  override private[tree] def shouldGoLeft(binnedFeature: Int, splits: Array[Split]): Boolean = {
+  override private[ml] def shouldGoLeft(binnedFeature: Int, splits: Array[Split]): Boolean = {
     if (isLeft) {
       categories.contains(binnedFeature.toDouble)
     } else {
@@ -161,7 +161,7 @@ class ContinuousSplit private[ml] (override val featureIndex: Int, val threshold
     features(featureIndex) <= threshold
   }
 
-  override private[tree] def shouldGoLeft(binnedFeature: Int, splits: Array[Split]): Boolean = {
+  override private[ml] def shouldGoLeft(binnedFeature: Int, splits: Array[Split]): Boolean = {
     if (binnedFeature == splits.length) {
       // > last split, so split right
       false
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala
index c896b1589a936..5b8620c5eee9f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/BaggedPoint.scala
@@ -65,13 +65,13 @@ private[spark] object BaggedPoint {
       seed: Long = Utils.random.nextLong()): RDD[BaggedPoint[Datum]] = {
     // TODO: implement weighted bootstrapping
     if (withReplacement) {
-      convertToBaggedRDDSamplingWithReplacement(input, subsamplingRate, numSubsamples, seed)
+      convertToBaggedRDDSamplingWithReplacement(input, subsamplingRate, numSubsamples,
+        extractSampleWeight, seed)
+    } else if (subsamplingRate == 1.0) {
+      convertToBaggedRDDWithoutSampling(input, numSubsamples, extractSampleWeight)
     } else {
-      if (numSubsamples == 1 && subsamplingRate == 1.0) {
-        convertToBaggedRDDWithoutSampling(input, extractSampleWeight)
-      } else {
-        convertToBaggedRDDSamplingWithoutReplacement(input, subsamplingRate, numSubsamples, seed)
-      }
+      convertToBaggedRDDSamplingWithoutReplacement(input, subsamplingRate, numSubsamples,
+        extractSampleWeight, seed)
     }
   }
 
@@ -79,6 +79,7 @@ private[spark] object BaggedPoint {
       input: RDD[Datum],
       subsamplingRate: Double,
       numSubsamples: Int,
+      extractSampleWeight: (Datum => Double),
       seed: Long): RDD[BaggedPoint[Datum]] = {
     input.mapPartitionsWithIndex { (partitionIndex, instances) =>
       // Use random seed = seed + partitionIndex + 1 to make generation reproducible.
@@ -93,7 +94,7 @@ private[spark] object BaggedPoint {
           }
           subsampleIndex += 1
         }
-        new BaggedPoint(instance, subsampleCounts)
+        new BaggedPoint(instance, subsampleCounts, extractSampleWeight(instance))
       }
     }
   }
@@ -102,6 +103,7 @@ private[spark] object BaggedPoint {
       input: RDD[Datum],
       subsample: Double,
       numSubsamples: Int,
+      extractSampleWeight: (Datum => Double),
       seed: Long): RDD[BaggedPoint[Datum]] = {
     input.mapPartitionsWithIndex { (partitionIndex, instances) =>
       // Use random seed = seed + partitionIndex + 1 to make generation reproducible.
@@ -114,14 +116,20 @@ private[spark] object BaggedPoint {
           subsampleCounts(subsampleIndex) = poisson.sample()
           subsampleIndex += 1
         }
-        new BaggedPoint(instance, subsampleCounts)
+        new BaggedPoint(instance, subsampleCounts, extractSampleWeight(instance))
       }
     }
   }
 
   private def convertToBaggedRDDWithoutSampling[Datum] (
       input: RDD[Datum],
+      numSubsamples: Int,
       extractSampleWeight: (Datum => Double)): RDD[BaggedPoint[Datum]] = {
-    input.map(datum => new BaggedPoint(datum, Array(1), extractSampleWeight(datum)))
+    input.mapPartitions { instances =>
+      val subsampleCounts = Array.fill(numSubsamples)(1)
+      instances.map { instance =>
+        new BaggedPoint(instance, subsampleCounts, extractSampleWeight(instance))
+      }
+    }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
index 8f8a17171f980..a9c2941ef3a53 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
@@ -90,7 +90,7 @@ private[spark] class DecisionTreeMetadata(
    * Set number of splits for a continuous feature.
    * For a continuous feature, number of bins is number of splits plus 1.
    */
-  def setNumSplits(featureIndex: Int, numSplits: Int) {
+  def setNumSplits(featureIndex: Int, numSplits: Int): Unit = {
     require(isContinuous(featureIndex),
       s"Only number of bin for a continuous feature can be set.")
     numBins(featureIndex) = numSplits + 1
@@ -148,7 +148,7 @@ private[spark] object DecisionTreeMetadata extends Logging {
       require(maxCategoriesPerFeature <= maxPossibleBins,
         s"DecisionTree requires maxBins (= $maxPossibleBins) to be at least as large as the " +
         s"number of values in each categorical feature, but categorical feature $maxCategory " +
-        s"has $maxCategoriesPerFeature values. Considering remove this and other categorical " +
+        s"has $maxCategoriesPerFeature values. Consider removing this and other categorical " +
         "features with a large number of values, or add more training examples.")
     }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
index c31334c92e1c9..19ea7e4b393b4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.ml.tree.impl
 
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.Vector
-import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, DecisionTreeRegressor}
+import org.apache.spark.ml.regression.DecisionTreeRegressionModel
+import org.apache.spark.ml.tree._
+import org.apache.spark.ml.util.Instrumentation
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.configuration.{BoostingStrategy => OldBoostingStrategy}
 import org.apache.spark.mllib.tree.impurity.{Variance => OldVariance}
@@ -34,26 +37,28 @@ private[spark] object GradientBoostedTrees extends Logging {
 
   /**
    * Method to train a gradient boosting model
-   * @param input Training dataset: RDD of `LabeledPoint`.
+   * @param input Training dataset: RDD of `Instance`.
    * @param seed Random seed.
    * @return tuple of ensemble models and weights:
    *         (array of decision tree models, array of model weights)
    */
   def run(
-      input: RDD[LabeledPoint],
+      input: RDD[Instance],
       boostingStrategy: OldBoostingStrategy,
       seed: Long,
-      featureSubsetStrategy: String): (Array[DecisionTreeRegressionModel], Array[Double]) = {
+      featureSubsetStrategy: String,
+      instr: Option[Instrumentation] = None):
+        (Array[DecisionTreeRegressionModel], Array[Double]) = {
     val algo = boostingStrategy.treeStrategy.algo
     algo match {
       case OldAlgo.Regression =>
         GradientBoostedTrees.boost(input, input, boostingStrategy, validate = false,
-          seed, featureSubsetStrategy)
+          seed, featureSubsetStrategy, instr)
       case OldAlgo.Classification =>
         // Map labels to -1, +1 so binary classification can be treated as regression.
-        val remappedInput = input.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+        val remappedInput = input.map(x => Instance((x.label * 2) - 1, x.weight, x.features))
         GradientBoostedTrees.boost(remappedInput, remappedInput, boostingStrategy, validate = false,
-          seed, featureSubsetStrategy)
+          seed, featureSubsetStrategy, instr)
       case _ =>
         throw new IllegalArgumentException(s"$algo is not supported by gradient boosting.")
     }
@@ -61,7 +66,7 @@ private[spark] object GradientBoostedTrees extends Logging {
 
   /**
    * Method to validate a gradient boosting model
-   * @param input Training dataset: RDD of `LabeledPoint`.
+   * @param input Training dataset: RDD of `Instance`.
    * @param validationInput Validation dataset.
    *                        This dataset should be different from the training dataset,
    *                        but it should follow the same distribution.
@@ -72,24 +77,26 @@ private[spark] object GradientBoostedTrees extends Logging {
    *         (array of decision tree models, array of model weights)
    */
   def runWithValidation(
-      input: RDD[LabeledPoint],
-      validationInput: RDD[LabeledPoint],
+      input: RDD[Instance],
+      validationInput: RDD[Instance],
       boostingStrategy: OldBoostingStrategy,
       seed: Long,
-      featureSubsetStrategy: String): (Array[DecisionTreeRegressionModel], Array[Double]) = {
+      featureSubsetStrategy: String,
+      instr: Option[Instrumentation] = None):
+        (Array[DecisionTreeRegressionModel], Array[Double]) = {
     val algo = boostingStrategy.treeStrategy.algo
     algo match {
       case OldAlgo.Regression =>
         GradientBoostedTrees.boost(input, validationInput, boostingStrategy,
-          validate = true, seed, featureSubsetStrategy)
+          validate = true, seed, featureSubsetStrategy, instr)
       case OldAlgo.Classification =>
         // Map labels to -1, +1 so binary classification can be treated as regression.
         val remappedInput = input.map(
-          x => new LabeledPoint((x.label * 2) - 1, x.features))
+          x => Instance((x.label * 2) - 1, x.weight, x.features))
         val remappedValidationInput = validationInput.map(
-          x => new LabeledPoint((x.label * 2) - 1, x.features))
+          x => Instance((x.label * 2) - 1, x.weight, x.features))
         GradientBoostedTrees.boost(remappedInput, remappedValidationInput, boostingStrategy,
-          validate = true, seed, featureSubsetStrategy)
+          validate = true, seed, featureSubsetStrategy, instr)
       case _ =>
         throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.")
     }
@@ -106,13 +113,14 @@ private[spark] object GradientBoostedTrees extends Logging {
    *         corresponding to every sample.
    */
   def computeInitialPredictionAndError(
-      data: RDD[LabeledPoint],
+      data: RDD[TreePoint],
       initTreeWeight: Double,
       initTree: DecisionTreeRegressionModel,
-      loss: OldLoss): RDD[(Double, Double)] = {
-    data.map { lp =>
-      val pred = updatePrediction(lp.features, 0.0, initTree, initTreeWeight)
-      val error = loss.computeError(pred, lp.label)
+      loss: OldLoss,
+      bcSplits: Broadcast[Array[Array[Split]]]): RDD[(Double, Double)] = {
+    data.map { treePoint =>
+      val pred = updatePrediction(treePoint, 0.0, initTree, initTreeWeight, bcSplits.value)
+      val error = loss.computeError(pred, treePoint.label)
       (pred, error)
     }
   }
@@ -129,20 +137,36 @@ private[spark] object GradientBoostedTrees extends Logging {
    *         corresponding to each sample.
    */
   def updatePredictionError(
-      data: RDD[LabeledPoint],
+      data: RDD[TreePoint],
       predictionAndError: RDD[(Double, Double)],
       treeWeight: Double,
       tree: DecisionTreeRegressionModel,
-      loss: OldLoss): RDD[(Double, Double)] = {
-
-    val newPredError = data.zip(predictionAndError).mapPartitions { iter =>
-      iter.map { case (lp, (pred, error)) =>
-        val newPred = updatePrediction(lp.features, pred, tree, treeWeight)
-        val newError = loss.computeError(newPred, lp.label)
-        (newPred, newError)
-      }
+      loss: OldLoss,
+      bcSplits: Broadcast[Array[Array[Split]]]): RDD[(Double, Double)] = {
+    data.zip(predictionAndError).map { case (treePoint, (pred, _)) =>
+      val newPred = updatePrediction(treePoint, pred, tree, treeWeight, bcSplits.value)
+      val newError = loss.computeError(newPred, treePoint.label)
+      (newPred, newError)
     }
-    newPredError
+  }
+
+  /**
+   * Add prediction from a new boosting iteration to an existing prediction.
+   *
+   * @param treePoint Binned vector of features representing a single data point.
+   * @param prediction The existing prediction.
+   * @param tree New Decision Tree model.
+   * @param weight Tree weight.
+   * @return Updated prediction.
+   */
+  def updatePrediction(
+      treePoint: TreePoint,
+      prediction: Double,
+      tree: DecisionTreeRegressionModel,
+      weight: Double,
+      splits: Array[Array[Split]]): Double = {
+    prediction +
+      tree.rootNode.predictBinned(treePoint.binnedFeatures, splits).prediction * weight
   }
 
   /**
@@ -166,29 +190,50 @@ private[spark] object GradientBoostedTrees extends Logging {
    * Method to calculate error of the base learner for the gradient boosting calculation.
    * Note: This method is not used by the gradient boosting algorithm but is useful for debugging
    * purposes.
-   * @param data Training dataset: RDD of `LabeledPoint`.
+   * @param data Training dataset: RDD of `Instance`.
    * @param trees Boosted Decision Tree models
    * @param treeWeights Learning rates at each boosting iteration.
    * @param loss evaluation metric.
    * @return Measure of model error on data
    */
-  def computeError(
-      data: RDD[LabeledPoint],
+  def computeWeightedError(
+      data: RDD[Instance],
       trees: Array[DecisionTreeRegressionModel],
       treeWeights: Array[Double],
       loss: OldLoss): Double = {
-    data.map { lp =>
+    val (errSum, weightSum) = data.map { case Instance(label, weight, features) =>
       val predicted = trees.zip(treeWeights).foldLeft(0.0) { case (acc, (model, weight)) =>
-        updatePrediction(lp.features, acc, model, weight)
+        updatePrediction(features, acc, model, weight)
       }
-      loss.computeError(predicted, lp.label)
-    }.mean()
+      (loss.computeError(predicted, label) * weight, weight)
+    }.treeReduce { case ((err1, weight1), (err2, weight2)) =>
+        (err1 + err2, weight1 + weight2)
+    }
+    errSum / weightSum
+  }
+
+  /**
+   * Method to calculate error of the base learner for the gradient boosting calculation.
+   * @param data Training dataset: RDD of `TreePoint`.
+   * @param predError Prediction and error.
+   * @return Measure of model error on data
+   */
+  def computeWeightedError(
+      data: RDD[TreePoint],
+      predError: RDD[(Double, Double)]): Double = {
+    val (errSum, weightSum) = data.zip(predError).map {
+      case (treePoint, (_, err)) =>
+        (err * treePoint.weight, treePoint.weight)
+    }.treeReduce { case ((err1, weight1), (err2, weight2)) =>
+      (err1 + err2, weight1 + weight2)
+    }
+    errSum / weightSum
   }
 
   /**
    * Method to compute error or loss for every iteration of gradient boosting.
    *
-   * @param data RDD of `LabeledPoint`
+   * @param data RDD of `Instance`
    * @param trees Boosted Decision Tree models
    * @param treeWeights Learning rates at each boosting iteration.
    * @param loss evaluation metric.
@@ -197,41 +242,34 @@ private[spark] object GradientBoostedTrees extends Logging {
    *         containing the first i+1 trees
    */
   def evaluateEachIteration(
-      data: RDD[LabeledPoint],
+      data: RDD[Instance],
       trees: Array[DecisionTreeRegressionModel],
       treeWeights: Array[Double],
       loss: OldLoss,
       algo: OldAlgo.Value): Array[Double] = {
-
-    val sc = data.sparkContext
     val remappedData = algo match {
-      case OldAlgo.Classification => data.map(x => new LabeledPoint((x.label * 2) - 1, x.features))
+      case OldAlgo.Classification =>
+        data.map(x => Instance((x.label * 2) - 1, x.weight, x.features))
       case _ => data
     }
 
-    val broadcastTrees = sc.broadcast(trees)
-    val localTreeWeights = treeWeights
-    val treesIndices = trees.indices
-
-    val dataCount = remappedData.count()
-    val evaluation = remappedData.map { point =>
-      treesIndices.map { idx =>
-        val prediction = broadcastTrees.value(idx)
-          .rootNode
-          .predictImpl(point.features)
-          .prediction
-        prediction * localTreeWeights(idx)
+    val numTrees = trees.length
+    val (errSum, weightSum) = remappedData.mapPartitions { iter =>
+      iter.map { case Instance(label, weight, features) =>
+        val pred = Array.tabulate(numTrees) { i =>
+          trees(i).rootNode.predictImpl(features)
+            .prediction * treeWeights(i)
+        }
+        val err = pred.scanLeft(0.0)(_ + _).drop(1)
+          .map(p => loss.computeError(p, label) * weight)
+        (err, weight)
       }
-      .scanLeft(0.0)(_ + _).drop(1)
-      .map(prediction => loss.computeError(prediction, point.label))
+    }.treeReduce { case ((err1, weight1), (err2, weight2)) =>
+      (0 until numTrees).foreach(i => err1(i) += err2(i))
+      (err1, weight1 + weight2)
     }
-    .aggregate(treesIndices.map(_ => 0.0))(
-      (aggregated, row) => treesIndices.map(idx => aggregated(idx) + row(idx)),
-      (a, b) => treesIndices.map(idx => a(idx) + b(idx)))
-    .map(_ / dataCount)
 
-    broadcastTrees.destroy()
-    evaluation.toArray
+    errSum.map(_ / weightSum)
   }
 
   /**
@@ -245,16 +283,20 @@ private[spark] object GradientBoostedTrees extends Logging {
    *         (array of decision tree models, array of model weights)
    */
   def boost(
-      input: RDD[LabeledPoint],
-      validationInput: RDD[LabeledPoint],
+      input: RDD[Instance],
+      validationInput: RDD[Instance],
       boostingStrategy: OldBoostingStrategy,
       validate: Boolean,
       seed: Long,
-      featureSubsetStrategy: String): (Array[DecisionTreeRegressionModel], Array[Double]) = {
+      featureSubsetStrategy: String,
+      instr: Option[Instrumentation] = None):
+        (Array[DecisionTreeRegressionModel], Array[Double]) = {
     val timer = new TimeTracker()
     timer.start("total")
     timer.start("init")
 
+    val sc = input.sparkContext
+
     boostingStrategy.assertValid()
 
     // Initialize gradient boosting parameters
@@ -269,21 +311,13 @@ private[spark] object GradientBoostedTrees extends Logging {
     val validationTol = boostingStrategy.validationTol
     treeStrategy.algo = OldAlgo.Regression
     treeStrategy.impurity = OldVariance
+    require(!treeStrategy.bootstrap, "GradientBoostedTrees does not need bootstrap sampling")
     treeStrategy.assertValid()
 
-    // Cache input
-    val persistedInput = if (input.getStorageLevel == StorageLevel.NONE) {
-      input.persist(StorageLevel.MEMORY_AND_DISK)
-      true
-    } else {
-      false
-    }
-
     // Prepare periodic checkpointers
+    // Note: this is checkpointing the unweighted training error
     val predErrorCheckpointer = new PeriodicRDDCheckpointer[(Double, Double)](
-      treeStrategy.getCheckpointInterval, input.sparkContext)
-    val validatePredErrorCheckpointer = new PeriodicRDDCheckpointer[(Double, Double)](
-      treeStrategy.getCheckpointInterval, input.sparkContext)
+      treeStrategy.getCheckpointInterval, sc, StorageLevel.MEMORY_AND_DISK)
 
     timer.stop("init")
 
@@ -293,41 +327,117 @@ private[spark] object GradientBoostedTrees extends Logging {
 
     // Initialize tree
     timer.start("building tree 0")
-    val firstTree = new DecisionTreeRegressor().setSeed(seed)
-    val firstTreeModel = firstTree.train(input, treeStrategy, featureSubsetStrategy)
+    val retaggedInput = input.retag(classOf[Instance])
+    timer.start("buildMetadata")
+    val metadata = DecisionTreeMetadata.buildMetadata(retaggedInput, treeStrategy,
+      numTrees = 1, featureSubsetStrategy)
+    timer.stop("buildMetadata")
+
+    timer.start("findSplits")
+    val splits = RandomForest.findSplits(retaggedInput, metadata, seed)
+    timer.stop("findSplits")
+    val bcSplits = sc.broadcast(splits)
+
+    // Bin feature values (TreePoint representation).
+    // Cache input RDD for speedup during multiple passes.
+    val treePoints = TreePoint.convertToTreeRDD(
+      retaggedInput, splits, metadata)
+      .persist(StorageLevel.MEMORY_AND_DISK)
+      .setName("binned tree points")
+
+    val firstCounts = BaggedPoint
+      .convertToBaggedRDD(treePoints, treeStrategy.subsamplingRate, numSubsamples = 1,
+        treeStrategy.bootstrap, (tp: TreePoint) => tp.weight, seed = seed)
+      .map { bagged =>
+        require(bagged.subsampleCounts.length == 1)
+        require(bagged.sampleWeight == bagged.datum.weight)
+        bagged.subsampleCounts.head
+      }.persist(StorageLevel.MEMORY_AND_DISK)
+      .setName("firstCounts at iter=0")
+
+    val firstBagged = treePoints.zip(firstCounts)
+      .map { case (treePoint, count) =>
+        // according to current design, treePoint.weight == baggedPoint.sampleWeight
+        new BaggedPoint[TreePoint](treePoint, Array(count), treePoint.weight)
+    }
+
+    val firstTreeModel = RandomForest.runBagged(baggedInput = firstBagged,
+      metadata = metadata, bcSplits = bcSplits, strategy = treeStrategy, numTrees = 1,
+      featureSubsetStrategy = featureSubsetStrategy, seed = seed, instr = instr,
+      parentUID = None)
+      .head.asInstanceOf[DecisionTreeRegressionModel]
+
+    firstCounts.unpersist()
+
     val firstTreeWeight = 1.0
     baseLearners(0) = firstTreeModel
     baseLearnerWeights(0) = firstTreeWeight
 
-    var predError: RDD[(Double, Double)] =
-      computeInitialPredictionAndError(input, firstTreeWeight, firstTreeModel, loss)
+    var predError = computeInitialPredictionAndError(
+      treePoints, firstTreeWeight, firstTreeModel, loss, bcSplits)
     predErrorCheckpointer.update(predError)
-    logDebug("error of gbt = " + predError.values.mean())
+    logDebug(s"error of gbt = ${computeWeightedError(treePoints, predError)}")
 
     // Note: A model of type regression is used since we require raw prediction
     timer.stop("building tree 0")
 
-    var validatePredError: RDD[(Double, Double)] =
-      computeInitialPredictionAndError(validationInput, firstTreeWeight, firstTreeModel, loss)
-    if (validate) validatePredErrorCheckpointer.update(validatePredError)
-    var bestValidateError = if (validate) validatePredError.values.mean() else 0.0
+    var validationTreePoints: RDD[TreePoint] = null
+    var validatePredError: RDD[(Double, Double)] = null
+    var validatePredErrorCheckpointer: PeriodicRDDCheckpointer[(Double, Double)] = null
+    var bestValidateError = 0.0
+    if (validate) {
+      timer.start("init validation")
+      validationTreePoints = TreePoint.convertToTreeRDD(
+        validationInput.retag(classOf[Instance]), splits, metadata)
+        .persist(StorageLevel.MEMORY_AND_DISK)
+      validatePredError = computeInitialPredictionAndError(
+        validationTreePoints, firstTreeWeight, firstTreeModel, loss, bcSplits)
+      validatePredErrorCheckpointer = new PeriodicRDDCheckpointer[(Double, Double)](
+        treeStrategy.getCheckpointInterval, sc, StorageLevel.MEMORY_AND_DISK)
+      validatePredErrorCheckpointer.update(validatePredError)
+      bestValidateError = computeWeightedError(validationTreePoints, validatePredError)
+      timer.stop("init validation")
+    }
+
     var bestM = 1
 
     var m = 1
     var doneLearning = false
     while (m < numIterations && !doneLearning) {
-      // Update data with pseudo-residuals
-      val data = predError.zip(input).map { case ((pred, _), point) =>
-        LabeledPoint(-loss.gradient(pred, point.label), point.features)
-      }
-
       timer.start(s"building tree $m")
       logDebug("###################################################")
       logDebug("Gradient boosting tree iteration " + m)
       logDebug("###################################################")
 
-      val dt = new DecisionTreeRegressor().setSeed(seed + m)
-      val model = dt.train(data, treeStrategy, featureSubsetStrategy)
+      // (label: Double, count: Int)
+      val labelWithCounts = BaggedPoint
+        .convertToBaggedRDD(treePoints, treeStrategy.subsamplingRate, numSubsamples = 1,
+          treeStrategy.bootstrap, (tp: TreePoint) => tp.weight, seed = seed + m)
+        .zip(predError)
+        .map { case (bagged, (pred, _)) =>
+          require(bagged.subsampleCounts.length == 1)
+          require(bagged.sampleWeight == bagged.datum.weight)
+          // Update labels with pseudo-residuals
+          val newLabel = -loss.gradient(pred, bagged.datum.label)
+          (newLabel, bagged.subsampleCounts.head)
+        }.persist(StorageLevel.MEMORY_AND_DISK)
+        .setName(s"labelWithCounts at iter=$m")
+
+      val bagged = treePoints.zip(labelWithCounts)
+        .map { case (treePoint, (newLabel, count)) =>
+          val newTreePoint = new TreePoint(newLabel, treePoint.binnedFeatures, treePoint.weight)
+          // according to current design, treePoint.weight == baggedPoint.sampleWeight
+          new BaggedPoint[TreePoint](newTreePoint, Array(count), treePoint.weight)
+        }
+
+      val model = RandomForest.runBagged(baggedInput = bagged,
+        metadata = metadata, bcSplits = bcSplits, strategy = treeStrategy,
+        numTrees = 1, featureSubsetStrategy = featureSubsetStrategy,
+        seed = seed + m, instr = None, parentUID = None)
+        .head.asInstanceOf[DecisionTreeRegressionModel]
+
+      labelWithCounts.unpersist()
+
       timer.stop(s"building tree $m")
       // Update partial model
       baseLearners(m) = model
@@ -337,9 +447,10 @@ private[spark] object GradientBoostedTrees extends Logging {
       baseLearnerWeights(m) = learningRate
 
       predError = updatePredictionError(
-        input, predError, baseLearnerWeights(m), baseLearners(m), loss)
+        treePoints, predError, baseLearnerWeights(m),
+        baseLearners(m), loss, bcSplits)
       predErrorCheckpointer.update(predError)
-      logDebug("error of gbt = " + predError.values.mean())
+      logDebug(s"error of gbt = ${computeWeightedError(treePoints, predError)}")
 
       if (validate) {
         // Stop training early if
@@ -348,9 +459,10 @@ private[spark] object GradientBoostedTrees extends Logging {
         // We want the model returned corresponding to the best validation error.
 
         validatePredError = updatePredictionError(
-          validationInput, validatePredError, baseLearnerWeights(m), baseLearners(m), loss)
+          validationTreePoints, validatePredError, baseLearnerWeights(m),
+          baseLearners(m), loss, bcSplits)
         validatePredErrorCheckpointer.update(validatePredError)
-        val currentValidateError = validatePredError.values.mean()
+        val currentValidateError = computeWeightedError(validationTreePoints, validatePredError)
         if (bestValidateError - currentValidateError < validationTol * Math.max(
           currentValidateError, 0.01)) {
           doneLearning = true
@@ -367,11 +479,15 @@ private[spark] object GradientBoostedTrees extends Logging {
     logInfo("Internal timing for DecisionTree:")
     logInfo(s"$timer")
 
+    bcSplits.destroy()
+    treePoints.unpersist()
     predErrorCheckpointer.unpersistDataSet()
     predErrorCheckpointer.deleteAllCheckpoints()
-    validatePredErrorCheckpointer.unpersistDataSet()
-    validatePredErrorCheckpointer.deleteAllCheckpoints()
-    if (persistedInput) input.unpersist()
+    if (validate) {
+      validationTreePoints.unpersist()
+      validatePredErrorCheckpointer.unpersistDataSet()
+      validatePredErrorCheckpointer.deleteAllCheckpoints()
+    }
 
     if (validate) {
       (baseLearners.slice(0, bestM), baseLearnerWeights.slice(0, bestM))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala
deleted file mode 100644
index c270e6c0bd5e8..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.tree.impl
-
-import java.io.IOException
-
-import scala.collection.mutable
-
-import org.apache.hadoop.fs.Path
-
-import org.apache.spark.internal.Logging
-import org.apache.spark.ml.tree.{LearningNode, Split}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.StorageLevel
-
-
-/**
- * This is used by the node id cache to find the child id that a data point would belong to.
- * @param split Split information.
- * @param nodeIndex The current node index of a data point that this will update.
- */
-private[tree] case class NodeIndexUpdater(split: Split, nodeIndex: Int) {
-
-  /**
-   * Determine a child node index based on the feature value and the split.
-   * @param binnedFeature Binned feature value.
-   * @param splits Split information to convert the bin indices to approximate feature values.
-   * @return Child node index to update to.
-   */
-  def updateNodeIndex(binnedFeature: Int, splits: Array[Split]): Int = {
-    if (split.shouldGoLeft(binnedFeature, splits)) {
-      LearningNode.leftChildIndex(nodeIndex)
-    } else {
-      LearningNode.rightChildIndex(nodeIndex)
-    }
-  }
-}
-
-/**
- * Each TreePoint belongs to a particular node per tree.
- * Each row in the nodeIdsForInstances RDD is an array over trees of the node index
- * in each tree. Initially, values should all be 1 for root node.
- * The nodeIdsForInstances RDD needs to be updated at each iteration.
- * @param nodeIdsForInstances The initial values in the cache
- *                           (should be an Array of all 1's (meaning the root nodes)).
- * @param checkpointInterval The checkpointing interval
- *                           (how often should the cache be checkpointed.).
- */
-private[spark] class NodeIdCache(
-  var nodeIdsForInstances: RDD[Array[Int]],
-  val checkpointInterval: Int) extends Logging {
-
-  // Keep a reference to a previous node Ids for instances.
-  // Because we will keep on re-persisting updated node Ids,
-  // we want to unpersist the previous RDD.
-  private var prevNodeIdsForInstances: RDD[Array[Int]] = null
-
-  // To keep track of the past checkpointed RDDs.
-  private val checkpointQueue = mutable.Queue[RDD[Array[Int]]]()
-  private var rddUpdateCount = 0
-
-  // Indicates whether we can checkpoint
-  private val canCheckpoint = nodeIdsForInstances.sparkContext.getCheckpointDir.nonEmpty
-
-  // Hadoop Configuration for deleting checkpoints as needed
-  private val hadoopConf = nodeIdsForInstances.sparkContext.hadoopConfiguration
-
-  /**
-   * Update the node index values in the cache.
-   * This updates the RDD and its lineage.
-   * TODO: Passing bin information to executors seems unnecessary and costly.
-   * @param data The RDD of training rows.
-   * @param nodeIdUpdaters A map of node index updaters.
-   *                       The key is the indices of nodes that we want to update.
-   * @param splits  Split information needed to find child node indices.
-   */
-  def updateNodeIndices(
-      data: RDD[BaggedPoint[TreePoint]],
-      nodeIdUpdaters: Array[mutable.Map[Int, NodeIndexUpdater]],
-      splits: Array[Array[Split]]): Unit = {
-    if (prevNodeIdsForInstances != null) {
-      // Unpersist the previous one if one exists.
-      prevNodeIdsForInstances.unpersist()
-    }
-
-    prevNodeIdsForInstances = nodeIdsForInstances
-    nodeIdsForInstances = data.zip(nodeIdsForInstances).map { case (point, ids) =>
-      var treeId = 0
-      while (treeId < nodeIdUpdaters.length) {
-        val nodeIdUpdater = nodeIdUpdaters(treeId).getOrElse(ids(treeId), null)
-        if (nodeIdUpdater != null) {
-          val featureIndex = nodeIdUpdater.split.featureIndex
-          val newNodeIndex = nodeIdUpdater.updateNodeIndex(
-            binnedFeature = point.datum.binnedFeatures(featureIndex),
-            splits = splits(featureIndex))
-          ids(treeId) = newNodeIndex
-        }
-        treeId += 1
-      }
-      ids
-    }
-
-    // Keep on persisting new ones.
-    nodeIdsForInstances.persist(StorageLevel.MEMORY_AND_DISK)
-    rddUpdateCount += 1
-
-    // Handle checkpointing if the directory is not None.
-    if (canCheckpoint && checkpointInterval != -1 && (rddUpdateCount % checkpointInterval) == 0) {
-      // Let's see if we can delete previous checkpoints.
-      var canDelete = true
-      while (checkpointQueue.size > 1 && canDelete) {
-        // We can delete the oldest checkpoint iff
-        // the next checkpoint actually exists in the file system.
-        if (checkpointQueue(1).getCheckpointFile.isDefined) {
-          val old = checkpointQueue.dequeue()
-          // Since the old checkpoint is not deleted by Spark, we'll manually delete it here.
-          try {
-            val path = new Path(old.getCheckpointFile.get)
-            val fs = path.getFileSystem(hadoopConf)
-            fs.delete(path, true)
-          } catch {
-            case e: IOException =>
-              logError("Decision Tree learning using cacheNodeIds failed to remove checkpoint" +
-                s" file: ${old.getCheckpointFile.get}")
-          }
-        } else {
-          canDelete = false
-        }
-      }
-
-      nodeIdsForInstances.checkpoint()
-      checkpointQueue.enqueue(nodeIdsForInstances)
-    }
-  }
-
-  /**
-   * Call this after training is finished to delete any remaining checkpoints.
-   */
-  def deleteAllCheckpoints(): Unit = {
-    while (checkpointQueue.nonEmpty) {
-      val old = checkpointQueue.dequeue()
-      if (old.getCheckpointFile.isDefined) {
-        try {
-          val path = new Path(old.getCheckpointFile.get)
-          val fs = path.getFileSystem(hadoopConf)
-          fs.delete(path, true)
-        } catch {
-          case e: IOException =>
-            logError("Decision Tree learning using cacheNodeIds failed to remove checkpoint" +
-              s" file: ${old.getCheckpointFile.get}")
-        }
-      }
-    }
-    if (nodeIdsForInstances != null) {
-      // Unpersist current one if one exists.
-      nodeIdsForInstances.unpersist()
-    }
-    if (prevNodeIdsForInstances != null) {
-      // Unpersist the previous one if one exists.
-      prevNodeIdsForInstances.unpersist()
-    }
-  }
-}
-
-private[spark] object NodeIdCache {
-  /**
-   * Initialize the node Id cache with initial node Id values.
-   * @param data The RDD of training rows.
-   * @param numTrees The number of trees that we want to create cache for.
-   * @param checkpointInterval The checkpointing interval
-   *                           (how often should the cache be checkpointed.).
-   * @param initVal The initial values in the cache.
-   * @return A node Id cache containing an RDD of initial root node Indices.
-   */
-  def init(
-      data: RDD[BaggedPoint[TreePoint]],
-      numTrees: Int,
-      checkpointInterval: Int,
-      initVal: Int = 1): NodeIdCache = {
-    new NodeIdCache(
-      data.map(_ => Array.fill[Int](numTrees)(initVal)),
-      checkpointInterval)
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index 7921823374edf..e0382c694b038 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -17,11 +17,10 @@
 
 package org.apache.spark.ml.tree.impl
 
-import java.io.IOException
-
 import scala.collection.mutable
 import scala.util.Random
 
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.classification.DecisionTreeClassificationModel
 import org.apache.spark.ml.feature.Instance
@@ -34,7 +33,9 @@ import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => O
 import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
 import org.apache.spark.mllib.tree.model.ImpurityStats
 import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.util.PeriodicRDDCheckpointer
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.collection.OpenHashMap
 import org.apache.spark.util.random.{SamplingUtils, XORShiftRandom}
 
 
@@ -100,13 +101,17 @@ private[spark] object RandomForest extends Logging with Serializable {
   }
 
   /**
-   * Train a random forest.
+   * Train a random forest with metadata and splits. This method is mainly for GBT,
+   * in which bagged input can be reused among trees.
    *
-   * @param input Training data: RDD of `Instance`
+   * @param baggedInput bagged training data: RDD of `BaggedPoint`
+   * @param metadata Learning and dataset metadata for DecisionTree.
    * @return an unweighted set of trees
    */
-  def run(
-      input: RDD[Instance],
+  def runBagged(
+      baggedInput: RDD[BaggedPoint[TreePoint]],
+      metadata: DecisionTreeMetadata,
+      bcSplits: Broadcast[Array[Array[Split]]],
       strategy: OldStrategy,
       numTrees: Int,
       featureSubsetStrategy: String,
@@ -114,48 +119,25 @@ private[spark] object RandomForest extends Logging with Serializable {
       instr: Option[Instrumentation],
       prune: Boolean = true, // exposed for testing only, real trees are always pruned
       parentUID: Option[String] = None): Array[DecisionTreeModel] = {
-
     val timer = new TimeTracker()
-
     timer.start("total")
 
-    timer.start("init")
-
-    val retaggedInput = input.retag(classOf[Instance])
-    val metadata =
-      DecisionTreeMetadata.buildMetadata(retaggedInput, strategy, numTrees, featureSubsetStrategy)
+    val sc = baggedInput.sparkContext
 
     instr match {
       case Some(instrumentation) =>
         instrumentation.logNumFeatures(metadata.numFeatures)
         instrumentation.logNumClasses(metadata.numClasses)
         instrumentation.logNumExamples(metadata.numExamples)
+        instrumentation.logSumOfWeights(metadata.weightedNumExamples)
       case None =>
-        logInfo("numFeatures: " + metadata.numFeatures)
-        logInfo("numClasses: " + metadata.numClasses)
-        logInfo("numExamples: " + metadata.numExamples)
+        logInfo(s"numFeatures: ${metadata.numFeatures}")
+        logInfo(s"numClasses: ${metadata.numClasses}")
+        logInfo(s"numExamples: ${metadata.numExamples}")
+        logInfo(s"weightedNumExamples: ${metadata.weightedNumExamples}")
     }
 
-    // Find the splits and the corresponding bins (interval between the splits) using a sample
-    // of the input data.
-    timer.start("findSplits")
-    val splits = findSplits(retaggedInput, metadata, seed)
-    timer.stop("findSplits")
-    logDebug("numBins: feature: number of bins")
-    logDebug(Range(0, metadata.numFeatures).map { featureIndex =>
-      s"\t$featureIndex\t${metadata.numBins(featureIndex)}"
-    }.mkString("\n"))
-
-    // Bin feature values (TreePoint representation).
-    // Cache input RDD for speedup during multiple passes.
-    val treeInput = TreePoint.convertToTreeRDD(retaggedInput, splits, metadata)
-
-    val withReplacement = numTrees > 1
-
-    val baggedInput = BaggedPoint
-      .convertToBaggedRDD(treeInput, strategy.subsamplingRate, numTrees, withReplacement,
-        (tp: TreePoint) => tp.weight, seed = seed)
-      .persist(StorageLevel.MEMORY_AND_DISK)
+    timer.start("init")
 
     // depth of the decision tree
     val maxDepth = strategy.maxDepth
@@ -165,7 +147,7 @@ private[spark] object RandomForest extends Logging with Serializable {
     // Max memory usage for aggregates
     // TODO: Calculate memory usage more precisely.
     val maxMemoryUsage: Long = strategy.maxMemoryInMB * 1024L * 1024L
-    logDebug("max memory usage for aggregates = " + maxMemoryUsage + " bytes.")
+    logDebug(s"max memory usage for aggregates = $maxMemoryUsage bytes.")
 
     /*
      * The main idea here is to perform group-wise training of the decision tree nodes thus
@@ -174,16 +156,15 @@ private[spark] object RandomForest extends Logging with Serializable {
      * in lower levels).
      */
 
-    // Create an RDD of node Id cache.
-    // At first, all the rows belong to the root nodes (node Id == 1).
-    val nodeIdCache = if (strategy.useNodeIdCache) {
-      Some(NodeIdCache.init(
-        data = baggedInput,
-        numTrees = numTrees,
-        checkpointInterval = strategy.checkpointInterval,
-        initVal = 1))
-    } else {
-      None
+    var nodeIds: RDD[Array[Int]] = null
+    var nodeIdCheckpointer: PeriodicRDDCheckpointer[Array[Int]] = null
+    if (strategy.useNodeIdCache) {
+      // Create an RDD of node Id cache.
+      // At first, all the rows belong to the root nodes (node Id == 1).
+      nodeIds = baggedInput.map { _ => Array.fill(numTrees)(1) }
+      nodeIdCheckpointer = new PeriodicRDDCheckpointer[Array[Int]](
+        strategy.getCheckpointInterval, sc, StorageLevel.MEMORY_AND_DISK)
+      nodeIdCheckpointer.update(nodeIds)
     }
 
     /*
@@ -211,7 +192,7 @@ private[spark] object RandomForest extends Logging with Serializable {
       // Collect some nodes to split, and choose features for each node (if subsampling).
       // Each group of nodes may come from one or multiple trees, and at multiple levels.
       val (nodesForGroup, treeToNodeToIndexInfo) =
-        RandomForest.selectNodesToSplit(nodeStack, maxMemoryUsage, metadata, rng)
+      RandomForest.selectNodesToSplit(nodeStack, maxMemoryUsage, metadata, rng)
       // Sanity check (should never occur):
       assert(nodesForGroup.nonEmpty,
         s"RandomForest selected empty nodesForGroup.  Error for unknown reason.")
@@ -222,26 +203,26 @@ private[spark] object RandomForest extends Logging with Serializable {
 
       // Choose node splits, and enqueue new nodes as needed.
       timer.start("findBestSplits")
-      RandomForest.findBestSplits(baggedInput, metadata, topNodesForGroup, nodesForGroup,
-        treeToNodeToIndexInfo, splits, nodeStack, timer, nodeIdCache)
+      val bestSplit = RandomForest.findBestSplits(baggedInput, metadata, topNodesForGroup,
+        nodesForGroup, treeToNodeToIndexInfo, bcSplits, nodeStack, timer, nodeIds,
+        outputBestSplits = strategy.useNodeIdCache)
+      if (strategy.useNodeIdCache) {
+        nodeIds = updateNodeIds(baggedInput, nodeIds, bcSplits, bestSplit)
+        nodeIdCheckpointer.update(nodeIds)
+      }
+
       timer.stop("findBestSplits")
     }
 
-    baggedInput.unpersist()
-
     timer.stop("total")
 
     logInfo("Internal timing for DecisionTree:")
     logInfo(s"$timer")
 
-    // Delete any remaining checkpoints used for node Id cache.
-    if (nodeIdCache.nonEmpty) {
-      try {
-        nodeIdCache.get.deleteAllCheckpoints()
-      } catch {
-        case e: IOException =>
-          logWarning(s"delete all checkpoints failed. Error reason: ${e.getMessage}")
-      }
+    if (strategy.useNodeIdCache) {
+      // Delete any remaining checkpoints used for node Id cache.
+      nodeIdCheckpointer.unpersistDataSet()
+      nodeIdCheckpointer.deleteAllCheckpoints()
     }
 
     val numFeatures = metadata.numFeatures
@@ -271,6 +252,93 @@ private[spark] object RandomForest extends Logging with Serializable {
     }
   }
 
+  /**
+   * Train a random forest.
+   *
+   * @param input Training data: RDD of `Instance`
+   * @return an unweighted set of trees
+   */
+  def run(
+      input: RDD[Instance],
+      strategy: OldStrategy,
+      numTrees: Int,
+      featureSubsetStrategy: String,
+      seed: Long,
+      instr: Option[Instrumentation],
+      prune: Boolean = true, // exposed for testing only, real trees are always pruned
+      parentUID: Option[String] = None): Array[DecisionTreeModel] = {
+    val timer = new TimeTracker()
+
+    timer.start("build metadata")
+    val metadata = DecisionTreeMetadata
+      .buildMetadata(input.retag(classOf[Instance]), strategy, numTrees, featureSubsetStrategy)
+    timer.stop("build metadata")
+
+    val retaggedInput = input.retag(classOf[Instance])
+
+    // Find the splits and the corresponding bins (interval between the splits) using a sample
+    // of the input data.
+    timer.start("findSplits")
+    val splits = findSplits(retaggedInput, metadata, seed)
+    timer.stop("findSplits")
+    logDebug("numBins: feature: number of bins")
+    logDebug(Range(0, metadata.numFeatures).map { featureIndex =>
+      s"\t$featureIndex\t${metadata.numBins(featureIndex)}"
+    }.mkString("\n"))
+
+    // Bin feature values (TreePoint representation).
+    // Cache input RDD for speedup during multiple passes.
+    val treeInput = TreePoint.convertToTreeRDD(retaggedInput, splits, metadata)
+
+    val bcSplits = input.sparkContext.broadcast(splits)
+    val baggedInput = BaggedPoint
+      .convertToBaggedRDD(treeInput, strategy.subsamplingRate, numTrees, strategy.bootstrap,
+        (tp: TreePoint) => tp.weight, seed = seed)
+      .persist(StorageLevel.MEMORY_AND_DISK)
+      .setName("bagged tree points")
+
+    val trees = runBagged(baggedInput = baggedInput, metadata = metadata, bcSplits = bcSplits,
+      strategy = strategy, numTrees = numTrees, featureSubsetStrategy = featureSubsetStrategy,
+      seed = seed, instr = instr, prune = prune, parentUID = parentUID)
+
+    baggedInput.unpersist()
+    bcSplits.destroy()
+
+    trees
+  }
+
+  /**
+   * Update node indices by newly found splits.
+   */
+  private def updateNodeIds(
+      input: RDD[BaggedPoint[TreePoint]],
+      nodeIds: RDD[Array[Int]],
+      bcSplits: Broadcast[Array[Array[Split]]],
+      bestSplits: Array[Map[Int, Split]]): RDD[Array[Int]] = {
+    require(nodeIds != null && bestSplits != null)
+    input.zip(nodeIds).map { case (point, ids) =>
+      var treeId = 0
+      while (treeId < bestSplits.length) {
+        val bestSplitsInTree = bestSplits(treeId)
+        if (bestSplitsInTree != null) {
+          val nodeId = ids(treeId)
+          bestSplitsInTree.get(nodeId).foreach { bestSplit =>
+            val featureId = bestSplit.featureIndex
+            val bin = point.datum.binnedFeatures(featureId)
+            val newNodeId = if (bestSplit.shouldGoLeft(bin, bcSplits.value(featureId))) {
+              LearningNode.leftChildIndex(nodeId)
+            } else {
+              LearningNode.rightChildIndex(nodeId)
+            }
+            ids(treeId) = newNodeId
+          }
+        }
+        treeId += 1
+      }
+      ids
+    }
+  }
+
   /**
    * Helper for binSeqOp, for data which can contain a mix of ordered and unordered features.
    *
@@ -384,14 +452,13 @@ private[spark] object RandomForest extends Logging with Serializable {
    * @param treeToNodeToIndexInfo Mapping: treeIndex --> nodeIndex --> nodeIndexInfo,
    *                              where nodeIndexInfo stores the index in the group and the
    *                              feature subsets (if using feature subsets).
-   * @param splits possible splits for all features, indexed (numFeatures)(numSplits)
+   * @param bcSplits possible splits for all features, indexed (numFeatures)(numSplits)
    * @param nodeStack  Queue of nodes to split, with values (treeIndex, node).
    *                   Updated with new non-leaf nodes which are created.
-   * @param nodeIdCache Node Id cache containing an RDD of Array[Int] where
-   *                    each value in the array is the data point's node Id
-   *                    for a corresponding tree. This is used to prevent the need
-   *                    to pass the entire tree to the executors during
-   *                    the node stat aggregation phase.
+   * @param nodeIds an RDD of Array[Int] where each value in the array is the data
+   *                point's node Id for a corresponding tree. This is used to prevent
+   *                the need to pass the entire tree to the executors during the node
+   *                stat aggregation phase.
    */
   private[tree] def findBestSplits(
       input: RDD[BaggedPoint[TreePoint]],
@@ -399,10 +466,11 @@ private[spark] object RandomForest extends Logging with Serializable {
       topNodesForGroup: Map[Int, LearningNode],
       nodesForGroup: Map[Int, Array[LearningNode]],
       treeToNodeToIndexInfo: Map[Int, Map[Int, NodeIndexInfo]],
-      splits: Array[Array[Split]],
+      bcSplits: Broadcast[Array[Array[Split]]],
       nodeStack: mutable.ListBuffer[(Int, LearningNode)],
       timer: TimeTracker = new TimeTracker,
-      nodeIdCache: Option[NodeIdCache] = None): Unit = {
+      nodeIds: RDD[Array[Int]] = null,
+      outputBestSplits: Boolean = false): Array[Map[Int, Split]] = {
 
     /*
      * The high-level descriptions of the best split optimizations are noted here.
@@ -426,15 +494,17 @@ private[spark] object RandomForest extends Logging with Serializable {
      * drastically reduce the communication overhead.
      */
 
+    val useNodeIdCache = nodeIds != null
+
     // numNodes:  Number of nodes in this group
     val numNodes = nodesForGroup.values.map(_.length).sum
-    logDebug("numNodes = " + numNodes)
-    logDebug("numFeatures = " + metadata.numFeatures)
-    logDebug("numClasses = " + metadata.numClasses)
-    logDebug("isMulticlass = " + metadata.isMulticlass)
-    logDebug("isMulticlassWithCategoricalFeatures = " +
-      metadata.isMulticlassWithCategoricalFeatures)
-    logDebug("using nodeIdCache = " + nodeIdCache.nonEmpty.toString)
+    logDebug(s"numNodes = $numNodes")
+    logDebug(s"numFeatures = ${metadata.numFeatures}")
+    logDebug(s"numClasses = ${metadata.numClasses}")
+    logDebug(s"isMulticlass = ${metadata.isMulticlass}")
+    logDebug(s"isMulticlassWithCategoricalFeatures = " +
+      s"${metadata.isMulticlassWithCategoricalFeatures}")
+    logDebug(s"using nodeIdCache = $useNodeIdCache")
 
     /*
      * Performs a sequential aggregation over a partition for a particular tree and node.
@@ -452,7 +522,8 @@ private[spark] object RandomForest extends Logging with Serializable {
         treeIndex: Int,
         nodeInfo: NodeIndexInfo,
         agg: Array[DTStatsAggregator],
-        baggedPoint: BaggedPoint[TreePoint]): Unit = {
+        baggedPoint: BaggedPoint[TreePoint],
+        splits: Array[Array[Split]]): Unit = {
       if (nodeInfo != null) {
         val aggNodeIndex = nodeInfo.nodeIndexInGroup
         val featuresForNode = nodeInfo.featureSubset
@@ -482,11 +553,13 @@ private[spark] object RandomForest extends Logging with Serializable {
      */
     def binSeqOp(
         agg: Array[DTStatsAggregator],
-        baggedPoint: BaggedPoint[TreePoint]): Array[DTStatsAggregator] = {
+        baggedPoint: BaggedPoint[TreePoint],
+        splits: Array[Array[Split]]): Array[DTStatsAggregator] = {
       treeToNodeToIndexInfo.foreach { case (treeIndex, nodeIndexToInfo) =>
         val nodeIndex =
           topNodesForGroup(treeIndex).predictImpl(baggedPoint.datum.binnedFeatures, splits)
-        nodeBinSeqOp(treeIndex, nodeIndexToInfo.getOrElse(nodeIndex, null), agg, baggedPoint)
+        nodeBinSeqOp(treeIndex, nodeIndexToInfo.getOrElse(nodeIndex, null),
+          agg, baggedPoint, splits)
       }
       agg
     }
@@ -496,14 +569,15 @@ private[spark] object RandomForest extends Logging with Serializable {
      */
     def binSeqOpWithNodeIdCache(
         agg: Array[DTStatsAggregator],
-        dataPoint: (BaggedPoint[TreePoint], Array[Int])): Array[DTStatsAggregator] = {
+        dataPoint: (BaggedPoint[TreePoint], Array[Int]),
+        splits: Array[Array[Split]]): Array[DTStatsAggregator] = {
       treeToNodeToIndexInfo.foreach { case (treeIndex, nodeIndexToInfo) =>
         val baggedPoint = dataPoint._1
         val nodeIdCache = dataPoint._2
         val nodeIndex = nodeIdCache(treeIndex)
-        nodeBinSeqOp(treeIndex, nodeIndexToInfo.getOrElse(nodeIndex, null), agg, baggedPoint)
+        nodeBinSeqOp(treeIndex, nodeIndexToInfo.getOrElse(nodeIndex, null),
+          agg, baggedPoint, splits)
       }
-
       agg
     }
 
@@ -547,8 +621,9 @@ private[spark] object RandomForest extends Logging with Serializable {
     val nodeToFeatures = getNodeToFeatures(treeToNodeToIndexInfo)
     val nodeToFeaturesBc = input.sparkContext.broadcast(nodeToFeatures)
 
-    val partitionAggregates: RDD[(Int, DTStatsAggregator)] = if (nodeIdCache.nonEmpty) {
-      input.zip(nodeIdCache.get.nodeIdsForInstances).mapPartitions { points =>
+    val partitionAggregates = if (useNodeIdCache) {
+
+      input.zip(nodeIds).mapPartitions { points =>
         // Construct a nodeStatsAggregators array to hold node aggregate stats,
         // each node will have a nodeStatsAggregator
         val nodeStatsAggregators = Array.tabulate(numNodes) { nodeIndex =>
@@ -559,11 +634,11 @@ private[spark] object RandomForest extends Logging with Serializable {
         }
 
         // iterator all instances in current partition and update aggregate stats
-        points.foreach(binSeqOpWithNodeIdCache(nodeStatsAggregators, _))
+        points.foreach(binSeqOpWithNodeIdCache(nodeStatsAggregators, _, bcSplits.value))
 
         // transform nodeStatsAggregators array to (nodeIndex, nodeAggregateStats) pairs,
         // which can be combined with other partition using `reduceByKey`
-        nodeStatsAggregators.view.zipWithIndex.map(_.swap).iterator
+        nodeStatsAggregators.iterator.zipWithIndex.map(_.swap)
       }
     } else {
       input.mapPartitions { points =>
@@ -577,11 +652,11 @@ private[spark] object RandomForest extends Logging with Serializable {
         }
 
         // iterator all instances in current partition and update aggregate stats
-        points.foreach(binSeqOp(nodeStatsAggregators, _))
+        points.foreach(binSeqOp(nodeStatsAggregators, _, bcSplits.value))
 
         // transform nodeStatsAggregators array to (nodeIndex, nodeAggregateStats) pairs,
         // which can be combined with other partition using `reduceByKey`
-        nodeStatsAggregators.view.zipWithIndex.map(_.swap).iterator
+        nodeStatsAggregators.iterator.zipWithIndex.map(_.swap)
       }
     }
 
@@ -593,18 +668,19 @@ private[spark] object RandomForest extends Logging with Serializable {
 
         // find best split for each node
         val (split: Split, stats: ImpurityStats) =
-          binsToBestSplit(aggStats, splits, featuresForNode, nodes(nodeIndex))
+          binsToBestSplit(aggStats, bcSplits.value, featuresForNode, nodes(nodeIndex))
         (nodeIndex, (split, stats))
     }.collectAsMap()
+    nodeToFeaturesBc.destroy()
 
     timer.stop("chooseSplits")
 
-    val nodeIdUpdaters = if (nodeIdCache.nonEmpty) {
-      Array.fill[mutable.Map[Int, NodeIndexUpdater]](
-        metadata.numTrees)(mutable.Map[Int, NodeIndexUpdater]())
+    val bestSplits = if (outputBestSplits) {
+      Array.ofDim[mutable.Map[Int, Split]](metadata.numTrees)
     } else {
       null
     }
+
     // Iterate over all nodes in this group.
     nodesForGroup.foreach { case (treeIndex, nodesForTree) =>
       nodesForTree.foreach { node =>
@@ -613,14 +689,14 @@ private[spark] object RandomForest extends Logging with Serializable {
         val aggNodeIndex = nodeInfo.nodeIndexInGroup
         val (split: Split, stats: ImpurityStats) =
           nodeToBestSplits(aggNodeIndex)
-        logDebug("best split = " + split)
+        logDebug(s"best split = $split")
 
         // Extract info for this node.  Create children if not leaf.
         val isLeaf =
           (stats.gain <= 0) || (LearningNode.indexToLevel(nodeIndex) == metadata.maxDepth)
         node.isLeaf = isLeaf
         node.stats = stats
-        logDebug("Node = " + node)
+        logDebug(s"Node = $node")
 
         if (!isLeaf) {
           node.split = Some(split)
@@ -632,11 +708,13 @@ private[spark] object RandomForest extends Logging with Serializable {
           node.rightChild = Some(LearningNode(LearningNode.rightChildIndex(nodeIndex),
             rightChildIsLeaf, ImpurityStats.getEmptyImpurityStats(stats.rightImpurityCalculator)))
 
-          if (nodeIdCache.nonEmpty) {
-            val nodeIndexUpdater = NodeIndexUpdater(
-              split = split,
-              nodeIndex = nodeIndex)
-            nodeIdUpdaters(treeIndex).put(nodeIndex, nodeIndexUpdater)
+          if (outputBestSplits) {
+            val bestSplitsInTree = bestSplits(treeIndex)
+            if (bestSplitsInTree == null) {
+              bestSplits(treeIndex) = mutable.Map[Int, Split](nodeIndex -> split)
+            } else {
+              bestSplitsInTree.update(nodeIndex, split)
+            }
           }
 
           // enqueue left child and right child if they are not leaves
@@ -647,17 +725,18 @@ private[spark] object RandomForest extends Logging with Serializable {
             nodeStack.prepend((treeIndex, node.rightChild.get))
           }
 
-          logDebug("leftChildIndex = " + node.leftChild.get.id +
-            ", impurity = " + stats.leftImpurity)
-          logDebug("rightChildIndex = " + node.rightChild.get.id +
-            ", impurity = " + stats.rightImpurity)
+          logDebug(s"leftChildIndex = ${node.leftChild.get.id}" +
+            s", impurity = ${stats.leftImpurity}")
+          logDebug(s"rightChildIndex = ${node.rightChild.get.id}" +
+            s", impurity = ${stats.rightImpurity}")
         }
       }
     }
 
-    if (nodeIdCache.nonEmpty) {
-      // Update the cache if needed.
-      nodeIdCache.get.updateNodeIndices(input, nodeIdUpdaters, splits)
+    if (outputBestSplits) {
+      bestSplits.map { m => if (m == null) null else m.toMap }
+    } else {
+      null
     }
   }
 
@@ -769,7 +848,7 @@ private[spark] object RandomForest extends Logging with Serializable {
           }
           // Find best split.
           val (bestFeatureSplitIndex, bestFeatureGainStats) =
-            Range(0, numSplits).map { case splitIdx =>
+            Range(0, numSplits).map { splitIdx =>
               val leftChildStats =
                 binAggregates.getImpurityCalculator(nodeFeatureOffset, splitIdx)
               val rightChildStats =
@@ -804,7 +883,7 @@ private[spark] object RandomForest extends Logging with Serializable {
            *
            * centroidForCategories is a list: (category, centroid)
            */
-          val centroidForCategories = Range(0, numCategories).map { case featureValue =>
+          val centroidForCategories = Range(0, numCategories).map { featureValue =>
             val categoryStats =
               binAggregates.getImpurityCalculator(nodeFeatureOffset, featureValue)
             val centroid = if (categoryStats.count != 0) {
@@ -830,13 +909,14 @@ private[spark] object RandomForest extends Logging with Serializable {
             (featureValue, centroid)
           }
 
-          logDebug("Centroids for categorical variable: " + centroidForCategories.mkString(","))
+          logDebug(s"Centroids for categorical variable: " +
+            s"${centroidForCategories.mkString(",")}")
 
           // bins sorted by centroids
           val categoriesSortedByCentroid = centroidForCategories.toList.sortBy(_._2)
 
-          logDebug("Sorted centroids for categorical variable = " +
-            categoriesSortedByCentroid.mkString(","))
+          logDebug(s"Sorted centroids for categorical variable = " +
+            s"${categoriesSortedByCentroid.mkString(",")}")
 
           // Cumulative sum (scanLeft) of bin statistics.
           // Afterwards, binAggregates for a bin is the sum of aggregates for
@@ -924,7 +1004,7 @@ private[spark] object RandomForest extends Logging with Serializable {
       metadata: DecisionTreeMetadata,
       seed: Long): Array[Array[Split]] = {
 
-    logDebug("isMulticlass = " + metadata.isMulticlass)
+    logDebug(s"isMulticlass = ${metadata.isMulticlass}")
 
     val numFeatures = metadata.numFeatures
 
@@ -932,8 +1012,12 @@ private[spark] object RandomForest extends Logging with Serializable {
     val continuousFeatures = Range(0, numFeatures).filter(metadata.isContinuous)
     val sampledInput = if (continuousFeatures.nonEmpty) {
       val fraction = samplesFractionForFindSplits(metadata)
-      logDebug("fraction of data used for calculating quantiles = " + fraction)
-      input.sample(withReplacement = false, fraction, new XORShiftRandom(seed).nextInt())
+      logDebug(s"fraction of data used for calculating quantiles = $fraction")
+      if (fraction < 1) {
+        input.sample(withReplacement = false, fraction, new XORShiftRandom(seed).nextInt())
+      } else {
+        input
+      }
     } else {
       input.sparkContext.emptyRDD[Instance]
     }
@@ -946,24 +1030,34 @@ private[spark] object RandomForest extends Logging with Serializable {
       metadata: DecisionTreeMetadata,
       continuousFeatures: IndexedSeq[Int]): Array[Array[Split]] = {
 
-    val continuousSplits: scala.collection.Map[Int, Array[Split]] = {
+    val continuousSplits = if (continuousFeatures.nonEmpty) {
       // reduce the parallelism for split computations when there are less
       // continuous features than input partitions. this prevents tasks from
       // being spun up that will definitely do no work.
       val numPartitions = math.min(continuousFeatures.length, input.partitions.length)
 
-      input
-        .flatMap { point =>
-          continuousFeatures.map(idx => (idx, (point.weight, point.features(idx))))
-            .filter(_._2._2 != 0.0)
-        }.groupByKey(numPartitions)
-        .map { case (idx, samples) =>
-          val thresholds = findSplitsForContinuousFeature(samples, metadata, idx)
-          val splits: Array[Split] = thresholds.map(thresh => new ContinuousSplit(idx, thresh))
-          logDebug(s"featureIndex = $idx, numSplits = ${splits.length}")
-          (idx, splits)
-        }.collectAsMap()
-    }
+      input.flatMap { point =>
+        continuousFeatures.iterator
+          .map(idx => (idx, (point.features(idx), point.weight)))
+          .filter(_._2._1 != 0.0)
+      }.aggregateByKey((new OpenHashMap[Double, Double], 0L), numPartitions)(
+        seqOp = { case ((map, c), (v, w)) =>
+          map.changeValue(v, w, _ + w)
+          (map, c + 1L)
+        },
+        combOp = { case ((map1, c1), (map2, c2)) =>
+          map2.foreach { case (v, w) =>
+            map1.changeValue(v, w, _ + w)
+          }
+          (map1, c1 + c2)
+        }
+      ).map { case (idx, (map, c)) =>
+        val thresholds = findSplitsForContinuousFeature(map.toMap, c, metadata, idx)
+        val splits: Array[Split] = thresholds.map(thresh => new ContinuousSplit(idx, thresh))
+        logDebug(s"featureIndex = $idx, numSplits = ${splits.length}")
+        (idx, splits)
+      }.collectAsMap()
+    } else Map.empty[Int, Array[Split]]
 
     val numFeatures = metadata.numFeatures
     val splits: Array[Array[Split]] = Array.tabulate(numFeatures) {
@@ -1031,23 +1125,42 @@ private[spark] object RandomForest extends Logging with Serializable {
       featureSamples: Iterable[(Double, Double)],
       metadata: DecisionTreeMetadata,
       featureIndex: Int): Array[Double] = {
+    val valueWeights = new OpenHashMap[Double, Double]
+    var count = 0L
+    featureSamples.foreach { case (weight, value) =>
+      valueWeights.changeValue(value, weight, _ + weight)
+      count += 1L
+    }
+    findSplitsForContinuousFeature(valueWeights.toMap, count, metadata, featureIndex)
+  }
+
+  /**
+   * Find splits for a continuous feature
+   * NOTE: Returned number of splits is set based on `featureSamples` and
+   *       could be different from the specified `numSplits`.
+   *       The `numSplits` attribute in the `DecisionTreeMetadata` class will be set accordingly.
+   *
+   * @param partValueWeights non-zero distinct values and their weights
+   * @param metadata decision tree metadata
+   *                 NOTE: `metadata.numbins` will be changed accordingly
+   *                       if there are not enough splits to be found
+   * @param featureIndex feature index to find splits
+   * @return array of split thresholds
+   */
+  private[tree] def findSplitsForContinuousFeature(
+      partValueWeights: Map[Double, Double],
+      count: Long,
+      metadata: DecisionTreeMetadata,
+      featureIndex: Int): Array[Double] = {
     require(metadata.isContinuous(featureIndex),
       "findSplitsForContinuousFeature can only be used to find splits for a continuous feature.")
 
-    val splits: Array[Double] = if (featureSamples.isEmpty) {
-      Array.empty[Double]
+    val splits = if (partValueWeights.isEmpty) {
+      Array.emptyDoubleArray
     } else {
       val numSplits = metadata.numSplits(featureIndex)
 
-      // get count for each distinct value except zero value
-      val partValueCountMap = mutable.Map[Double, Double]()
-      var partNumSamples = 0.0
-      var unweightedNumSamples = 0.0
-      featureSamples.foreach { case (sampleWeight, feature) =>
-        partValueCountMap(feature) = partValueCountMap.getOrElse(feature, 0.0) + sampleWeight
-        partNumSamples += sampleWeight
-        unweightedNumSamples += 1.0
-      }
+      val partNumSamples = partValueWeights.values.sum
 
       // Calculate the expected number of samples for finding splits
       val weightedNumSamples = samplesFractionForFindSplits(metadata) *
@@ -1055,12 +1168,12 @@ private[spark] object RandomForest extends Logging with Serializable {
       // scale tolerance by number of samples with constant factor
       // Note: constant factor was tuned by running some tests where there were no zero
       // feature values and validating we are never within tolerance
-      val tolerance = Utils.EPSILON * unweightedNumSamples * 100
+      val tolerance = Utils.EPSILON * count * 100
       // add expected zero value count and get complete statistics
       val valueCountMap = if (weightedNumSamples - partNumSamples > tolerance) {
-        partValueCountMap + (0.0 -> (weightedNumSamples - partNumSamples))
+        partValueWeights + (0.0 -> (weightedNumSamples - partNumSamples))
       } else {
-        partValueCountMap
+        partValueWeights
       }
 
       // sort distinct values
@@ -1069,7 +1182,7 @@ private[spark] object RandomForest extends Logging with Serializable {
       val possibleSplits = valueCounts.length - 1
       if (possibleSplits == 0) {
         // constant feature
-        Array.empty[Double]
+        Array.emptyDoubleArray
       } else if (possibleSplits <= numSplits) {
         // if possible splits is not enough or just enough, just return all possible splits
         (1 to possibleSplits)
@@ -1078,7 +1191,7 @@ private[spark] object RandomForest extends Logging with Serializable {
       } else {
         // stride between splits
         val stride: Double = weightedNumSamples / (numSplits + 1)
-        logDebug("stride = " + stride)
+        logDebug(s"stride = $stride")
 
         // iterate `valueCount` to find splits
         val splitsBuilder = mutable.ArrayBuilder.make[Double]
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala
index 72440b2c57aa1..8bfb060cae06b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/TreePoint.scala
@@ -68,7 +68,7 @@ private[spark] object TreePoint {
       if (arity == 0) {
         splits(idx).map(_.asInstanceOf[ContinuousSplit].threshold)
       } else {
-        Array.empty[Double]
+        Array.emptyDoubleArray
       }
     }
     input.map { x =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index 10895d4fd11d9..162641f605264 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
+import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.{Param, Params}
 import org.apache.spark.ml.tree.DecisionTreeModelReadWrite.NodeData
@@ -89,6 +90,18 @@ private[spark] trait DecisionTreeModel {
     }
   }
 
+  private[ml] lazy val numLeave: Int =
+    leafIterator(rootNode).size
+
+  private[ml] lazy val leafAttr = {
+    NominalAttribute.defaultAttr
+      .withNumValues(numLeave)
+  }
+
+  private[ml] def getLeafField(leafCol: String) = {
+    leafAttr.withName(leafCol).toStructField()
+  }
+
   @transient private lazy val leafIndices: Map[LeafNode, Int] = {
     leafIterator(rootNode).zipWithIndex.toMap
   }
@@ -146,6 +159,10 @@ private[ml] trait TreeEnsembleModel[M <: DecisionTreeModel] {
     val indices = trees.map(_.predictLeaf(features))
     Vectors.dense(indices)
   }
+
+  private[ml] def getLeafField(leafCol: String) = {
+    new AttributeGroup(leafCol, attrs = trees.map(_.leafAttr)).toStructField()
+  }
 }
 
 private[ml] object TreeEnsembleModel {
@@ -361,7 +378,7 @@ private[ml] object DecisionTreeModelReadWrite {
         (thisNodeData +: (leftNodeData ++ rightNodeData), rightIdx)
       case _: LeafNode =>
         (Seq(NodeData(id, node.prediction, node.impurity, node.impurityStats.stats,
-          node.impurityStats.rawCount, -1.0, -1, -1, SplitData(-1, Array.empty[Double], -1))),
+          node.impurityStats.rawCount, -1.0, -1, -1, SplitData(-1, Array.emptyDoubleArray, -1))),
           id)
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
index 48675dc773c3e..26a639eea2992 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala
@@ -449,6 +449,20 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams {
 
   /** @group getParam */
   final def getNumTrees: Int = $(numTrees)
+
+  /**
+   * Whether bootstrap samples are used when building trees.
+   * @group expertParam
+   */
+  @Since("3.0.0")
+  final val bootstrap: BooleanParam = new BooleanParam(this, "bootstrap",
+    "Whether bootstrap samples are used when building trees.")
+
+  setDefault(bootstrap -> true)
+
+  /** @group getParam */
+  @Since("3.0.0")
+  final def getBootstrap: Boolean = $(bootstrap)
 }
 
 private[ml] trait RandomForestClassifierParams
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index e60a14f976a5c..858cc57982a3f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -137,8 +137,8 @@ class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
 
     val collectSubModelsParam = $(collectSubModels)
 
-    var subModels: Option[Array[Array[Model[_]]]] = if (collectSubModelsParam) {
-      Some(Array.fill($(numFolds))(Array.fill[Model[_]](epm.length)(null)))
+    val subModels: Option[Array[Array[Model[_]]]] = if (collectSubModelsParam) {
+      Some(Array.fill($(numFolds))(Array.ofDim[Model[_]](epm.length)))
     } else None
 
     // Compute metrics for each model over each split
@@ -323,6 +323,11 @@ class CrossValidatorModel private[ml] (
   override def write: CrossValidatorModel.CrossValidatorModelWriter = {
     new CrossValidatorModel.CrossValidatorModelWriter(this)
   }
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"CrossValidatorModel: uid=$uid, bestModel=$bestModel, numFolds=${$(numFolds)}"
+  }
 }
 
 @Since("1.6.0")
@@ -406,8 +411,8 @@ object CrossValidatorModel extends MLReadable[CrossValidatorModel] {
 
       val subModels: Option[Array[Array[Model[_]]]] = if (persistSubModels) {
         val subModelsPath = new Path(path, "subModels")
-        val _subModels = Array.fill(numFolds)(Array.fill[Model[_]](
-          estimatorParamMaps.length)(null))
+        val _subModels = Array.fill(numFolds)(
+          Array.ofDim[Model[_]](estimatorParamMaps.length))
         for (splitIndex <- 0 until numFolds) {
           val splitPath = new Path(subModelsPath, s"fold${splitIndex.toString}")
           for (paramIndex <- 0 until estimatorParamMaps.length) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index 8b251197afbef..488bff1409f53 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -140,8 +140,8 @@ class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: St
 
     val collectSubModelsParam = $(collectSubModels)
 
-    var subModels: Option[Array[Model[_]]] = if (collectSubModelsParam) {
-      Some(Array.fill[Model[_]](epm.length)(null))
+    val subModels: Option[Array[Model[_]]] = if (collectSubModelsParam) {
+      Some(Array.ofDim[Model[_]](epm.length))
     } else None
 
     // Fit models in a Future for training in parallel
@@ -314,6 +314,11 @@ class TrainValidationSplitModel private[ml] (
   override def write: TrainValidationSplitModel.TrainValidationSplitModelWriter = {
     new TrainValidationSplitModel.TrainValidationSplitModelWriter(this)
   }
+
+  @Since("3.0.0")
+  override def toString: String = {
+    s"TrainValidationSplitModel: uid=$uid, bestModel=$bestModel, trainRatio=${$(trainRatio)}"
+  }
 }
 
 @Since("2.0.0")
@@ -393,7 +398,7 @@ object TrainValidationSplitModel extends MLReadable[TrainValidationSplitModel] {
 
       val subModels: Option[Array[Model[_]]] = if (persistSubModels) {
         val subModelsPath = new Path(path, "subModels")
-        val _subModels = Array.fill[Model[_]](estimatorParamMaps.length)(null)
+        val _subModels = Array.ofDim[Model[_]](estimatorParamMaps.length)
         for (paramIndex <- 0 until estimatorParamMaps.length) {
           val modelPath = new Path(subModelsPath, paramIndex.toString).toString
           _subModels(paramIndex) =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
index 6af4b3ebc2cc2..9016940023f74 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
@@ -47,7 +47,7 @@ private[spark] object DatasetUtils {
       case fdt: ArrayType =>
         val transferUDF = fdt.elementType match {
           case _: FloatType => udf(f = (vector: Seq[Float]) => {
-            val inputArray = Array.fill[Double](vector.size)(0.0)
+            val inputArray = Array.ofDim[Double](vector.size)
             vector.indices.foreach(idx => inputArray(idx) = vector(idx).toDouble)
             Vectors.dense(inputArray)
           })
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
index 8cd4a7ca9493b..d4b39e11fd1d7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
@@ -123,6 +123,10 @@ private[spark] class Instrumentation private () extends Logging with MLEvents {
     logNamedValue(Instrumentation.loggerTags.numExamples, num)
   }
 
+  def logSumOfWeights(num: Double): Unit = {
+    logNamedValue(Instrumentation.loggerTags.sumOfWeights, num)
+  }
+
   /**
    * Logs the value with customized name field.
    */
@@ -179,6 +183,7 @@ private[spark] object Instrumentation {
     val numExamples = "numExamples"
     val meanOfLabels = "meanOfLabels"
     val varianceOfLabels = "varianceOfLabels"
+    val sumOfWeights = "sumOfWeights"
   }
 
   def instrumented[T](body: (Instrumentation => T)): T = {
@@ -205,21 +210,21 @@ private[spark] class OptionalInstrumentation private(
 
   protected override def logName: String = className
 
-  override def logInfo(msg: => String) {
+  override def logInfo(msg: => String): Unit = {
     instrumentation match {
       case Some(instr) => instr.logInfo(msg)
       case None => super.logInfo(msg)
     }
   }
 
-  override def logWarning(msg: => String) {
+  override def logWarning(msg: => String): Unit = {
     instrumentation match {
       case Some(instr) => instr.logWarning(msg)
       case None => super.logWarning(msg)
     }
   }
 
-  override def logError(msg: => String) {
+  override def logError(msg: => String): Unit = {
     instrumentation match {
       case Some(instr) => instr.logError(msg)
       case None => super.logError(msg)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
index 3e19f27183942..6db0408e8d2b3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
@@ -20,7 +20,8 @@ package org.apache.spark.ml.util
 import scala.collection.immutable.HashMap
 
 import org.apache.spark.ml.attribute._
-import org.apache.spark.ml.linalg.VectorUDT
+import org.apache.spark.ml.linalg.{Vector, VectorUDT}
+import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.types.StructField
 
 
@@ -41,6 +42,35 @@ private[spark] object MetadataUtils {
     }
   }
 
+  /**
+   * Obtain the number of features in a vector column.
+   * If no metadata is available, extract it from the dataset.
+   */
+  def getNumFeatures(dataset: Dataset[_], vectorCol: String): Int = {
+    getNumFeatures(dataset.schema(vectorCol)).getOrElse {
+      dataset.select(DatasetUtils.columnToVector(dataset, vectorCol))
+        .head.getAs[Vector](0).size
+    }
+  }
+
+  /**
+   * Examine a schema to identify the number of features in a vector column.
+   * Returns None if the number of features is not specified.
+   */
+  def getNumFeatures(vectorSchema: StructField): Option[Int] = {
+    if (vectorSchema.dataType == new VectorUDT) {
+      val group = AttributeGroup.fromStructField(vectorSchema)
+      val size = group.size
+      if (size >= 0) {
+        Some(size)
+      } else {
+        None
+      }
+    } else {
+      None
+    }
+  }
+
   /**
    * Examine a schema to identify categorical (Binary and Nominal) features.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
index c3894ebdd1785..752069daf8910 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.ml.util
 
+import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.VectorUDT
 import org.apache.spark.sql.types._
 
@@ -106,6 +107,91 @@ private[spark] object SchemaUtils {
     StructType(schema.fields :+ col)
   }
 
+  /**
+   * Update the size of a ML Vector column. If this column do not exist, append it.
+   * @param schema input schema
+   * @param colName column name
+   * @param size number of features
+   * @return new schema
+   */
+  def updateAttributeGroupSize(
+      schema: StructType,
+      colName: String,
+      size: Int): StructType = {
+    require(size > 0)
+    val attrGroup = new AttributeGroup(colName, size)
+    val field = attrGroup.toStructField
+    updateField(schema, field, true)
+  }
+
+  /**
+   * Update the number of values of an existing column. If this column do not exist, append it.
+   * @param schema input schema
+   * @param colName column name
+   * @param numValues number of values.
+   * @return new schema
+   */
+  def updateNumValues(
+      schema: StructType,
+      colName: String,
+      numValues: Int): StructType = {
+    val attr = NominalAttribute.defaultAttr
+      .withName(colName)
+      .withNumValues(numValues)
+    val field = attr.toStructField
+    updateField(schema, field, true)
+  }
+
+  /**
+   * Update the numeric meta of an existing column. If this column do not exist, append it.
+   * @param schema input schema
+   * @param colName column name
+   * @return new schema
+   */
+  def updateNumeric(
+      schema: StructType,
+      colName: String): StructType = {
+    val attr = NumericAttribute.defaultAttr
+      .withName(colName)
+    val field = attr.toStructField
+    updateField(schema, field, true)
+  }
+
+  /**
+   * Update the metadata of an existing column. If this column do not exist, append it.
+   * @param schema input schema
+   * @param field struct field
+   * @param overwriteMetadata whether to overwrite the metadata. If true, the metadata in the
+   *                          schema will be overwritten. If false, the metadata in `field`
+   *                          and `schema` will be merged to generate output metadata.
+   * @return new schema
+   */
+  def updateField(
+      schema: StructType,
+      field: StructField,
+      overwriteMetadata: Boolean = true): StructType = {
+    if (schema.fieldNames.contains(field.name)) {
+      val newFields = schema.fields.map { f =>
+        if (f.name == field.name) {
+          if (overwriteMetadata) {
+            field
+          } else {
+            val newMeta = new MetadataBuilder()
+              .withMetadata(field.metadata)
+              .withMetadata(f.metadata)
+              .build()
+            StructField(field.name, field.dataType, field.nullable, newMeta)
+          }
+        } else {
+          f
+        }
+      }
+      StructType(newFields)
+    } else {
+      appendColumn(schema, field)
+    }
+  }
+
   /**
    * Check whether the given column in the schema is one of the supporting vector type: Vector,
    * Array[Float]. Array[Double]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 4617073f9decd..259ecb3a1762f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -347,7 +347,6 @@ private[python] class PythonMLLibAPI extends Serializable {
       data: JavaRDD[Vector],
       k: Int,
       maxIterations: Int,
-      runs: Int,
       initializationMode: String,
       seed: java.lang.Long,
       initializationSteps: Int,
@@ -408,11 +407,7 @@ private[python] class PythonMLLibAPI extends Serializable {
 
     if (seed != null) gmmAlg.setSeed(seed)
 
-    try {
-      new GaussianMixtureModelWrapper(gmmAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK)))
-    } finally {
-      data.rdd.unpersist()
-    }
+    new GaussianMixtureModelWrapper(gmmAlg.run(data.rdd))
   }
 
   /**
@@ -1312,7 +1307,7 @@ private[spark] abstract class SerDeBase {
       }
     }
 
-    private[python] def saveState(obj: Object, out: OutputStream, pickler: Pickler)
+    private[python] def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit
   }
 
   def dumps(obj: AnyRef): Array[Byte] = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index d86aa01c9195a..21eb17dfaacb3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -141,8 +141,8 @@ class LogisticRegressionModel @Since("1.3.0") (
       val withBias = dataMatrix.size + 1 == dataWithBiasSize
       (0 until numClasses - 1).foreach { i =>
         var margin = 0.0
-        dataMatrix.foreachActive { (index, value) =>
-          if (value != 0.0) margin += value * weightsArray((i * dataWithBiasSize) + index)
+        dataMatrix.foreachNonZero { (index, value) =>
+          margin += value * weightsArray((i * dataWithBiasSize) + index)
         }
         // Intercept is required to be added into margin.
         if (withBias) {
@@ -224,117 +224,11 @@ class LogisticRegressionWithSGD private[mllib] (
     .setMiniBatchFraction(miniBatchFraction)
   override protected val validators = List(DataValidators.binaryLabelValidator)
 
-  /**
-   * Construct a LogisticRegression object with default parameters: {stepSize: 1.0,
-   * numIterations: 100, regParm: 0.01, miniBatchFraction: 1.0}.
-   */
-  @Since("0.8.0")
-  @deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
-  def this() = this(1.0, 100, 0.01, 1.0)
-
   override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
     new LogisticRegressionModel(weights, intercept)
   }
 }
 
-/**
- * Top-level methods for calling Logistic Regression using Stochastic Gradient Descent.
- *
- * @note Labels used in Logistic Regression should be {0, 1}
- */
-@Since("0.8.0")
-@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
-object LogisticRegressionWithSGD {
-  // NOTE(shivaram): We use multiple train methods instead of default arguments to support
-  // Java programs.
-
-  /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
-   * number of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
-   * gradient descent are initialized using the initial weights provided.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
-   *        the number of features in the data.
-   *
-   * @note Labels used in Logistic Regression should be {0, 1}
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double,
-      initialWeights: Vector): LogisticRegressionModel = {
-    new LogisticRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction)
-      .run(input, initialWeights)
-  }
-
-  /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
-   * number of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate the gradient.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   *
-   * @note Labels used in Logistic Regression should be {0, 1}
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double): LogisticRegressionModel = {
-    new LogisticRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction)
-      .run(input)
-  }
-
-  /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
-   * number of iterations of gradient descent using the specified step size. We use the entire data
-   * set to update the gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param stepSize Step size to be used for each iteration of Gradient Descent.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LogisticRegressionModel which has the weights and offset from training.
-   *
-   * @note Labels used in Logistic Regression should be {0, 1}
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double): LogisticRegressionModel = {
-    train(input, numIterations, stepSize, 1.0)
-  }
-
-  /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
-   * number of iterations of gradient descent using a step size of 1.0. We use the entire data set
-   * to update the gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LogisticRegressionModel which has the weights and offset from training.
-   *
-   * @note Labels used in Logistic Regression should be {0, 1}
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int): LogisticRegressionModel = {
-    train(input, numIterations, 1.0, 1.0)
-  }
-}
-
 /**
  * Train a classification model for Multinomial/Binary Logistic Regression using
  * Limited-memory BFGS. Standard feature scaling and L2 regularization are used by default.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 19df156d837a2..586f622fc47c5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -76,7 +76,7 @@ class NaiveBayesModel private[spark] (
     case Multinomial => (None, None)
     case Bernoulli =>
       val negTheta = thetaMatrix.map(value => math.log1p(-math.exp(value)))
-      val ones = new DenseVector(Array.fill(thetaMatrix.numCols) {1.0})
+      val ones = new DenseVector(Array.fill(thetaMatrix.numCols)(1.0))
       val thetaMinusNegTheta = thetaMatrix.map { value =>
         value - math.log1p(-math.exp(value))
       }
@@ -145,8 +145,8 @@ class NaiveBayesModel private[spark] (
   }
 
   private def bernoulliCalculation(testData: Vector) = {
-    testData.foreachActive((_, value) =>
-      if (value != 0.0 && value != 1.0) {
+    testData.foreachNonZero((_, value) =>
+      if (value != 1.0) {
         throw new SparkException(
           s"Bernoulli naive Bayes requires 0 or 1 feature values but found $testData.")
       }
@@ -371,7 +371,7 @@ class NaiveBayes private (
     val newModel = nb.trainWithLabelCheck(dataset, positiveLabel = false)
 
     val pi = newModel.pi.toArray
-    val theta = Array.fill[Double](newModel.numClasses, newModel.numFeatures)(0.0)
+    val theta = Array.ofDim[Double](newModel.numClasses, newModel.numFeatures)
     newModel.theta.foreachActive {
       case (i, j, v) =>
         theta(i)(j) = v
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index b4a31d72f94b9..7c12697be95c8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -27,6 +27,7 @@ import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.util.Instrumentation
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.BLAS.axpy
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
@@ -152,24 +153,34 @@ class BisectingKMeans private (
     this
   }
 
-
   private[spark] def run(
       input: RDD[Vector],
       instr: Option[Instrumentation]): BisectingKMeansModel = {
-    if (input.getStorageLevel == StorageLevel.NONE) {
-      logWarning(s"The input RDD ${input.id} is not directly cached, which may hurt performance if"
-        + " its parent RDDs are also not cached.")
+    val instances: RDD[(Vector, Double)] = input.map {
+      case (point) => (point, 1.0)
     }
-    val d = input.map(_.size).first()
+    runWithWeight(instances, None)
+  }
+
+  private[spark] def runWithWeight(
+      input: RDD[(Vector, Double)],
+      instr: Option[Instrumentation]): BisectingKMeansModel = {
+    val d = input.map(_._1.size).first
     logInfo(s"Feature dimension: $d.")
 
     val dMeasure: DistanceMeasure = DistanceMeasure.decodeFromString(this.distanceMeasure)
     // Compute and cache vector norms for fast distance computation.
-    val norms = input.map(v => Vectors.norm(v, 2.0)).persist(StorageLevel.MEMORY_AND_DISK)
-    val vectors = input.zip(norms).map { case (x, norm) => new VectorWithNorm(x, norm) }
+    val norms = input.map(d => Vectors.norm(d._1, 2.0))
+    val vectors = input.zip(norms).map {
+      case ((x, weight), norm) => new VectorWithNorm(x, norm, weight)
+    }
+    if (input.getStorageLevel == StorageLevel.NONE) {
+      vectors.persist(StorageLevel.MEMORY_AND_DISK)
+    }
     var assignments = vectors.map(v => (ROOT_INDEX, v))
     var activeClusters = summarize(d, assignments, dMeasure)
     instr.foreach(_.logNumExamples(activeClusters.values.map(_.size).sum))
+    instr.foreach(_.logSumOfWeights(activeClusters.values.map(_.weightSum).sum))
     val rootSummary = activeClusters(ROOT_INDEX)
     val n = rootSummary.size
     logInfo(s"Number of points: $n.")
@@ -239,7 +250,7 @@ class BisectingKMeans private (
     if (indices != null) {
       indices.unpersist()
     }
-    norms.unpersist()
+    vectors.unpersist()
     val clusters = activeClusters ++ inactiveClusters
     val root = buildTree(clusters, dMeasure)
     val totalCost = root.leafNodes.map(_.cost).sum
@@ -312,14 +323,16 @@ private object BisectingKMeans extends Serializable {
   private class ClusterSummaryAggregator(val d: Int, val distanceMeasure: DistanceMeasure)
       extends Serializable {
     private var n: Long = 0L
+    private var weightSum: Double = 0.0
     private val sum: Vector = Vectors.zeros(d)
     private var sumSq: Double = 0.0
 
     /** Adds a point. */
     def add(v: VectorWithNorm): this.type = {
       n += 1L
+      weightSum += v.weight
       // TODO: use a numerically stable approach to estimate cost
-      sumSq += v.norm * v.norm
+      sumSq += v.norm * v.norm  * v.weight
       distanceMeasure.updateClusterSum(v, sum)
       this
     }
@@ -327,16 +340,18 @@ private object BisectingKMeans extends Serializable {
     /** Merges another aggregator. */
     def merge(other: ClusterSummaryAggregator): this.type = {
       n += other.n
+      weightSum += other.weightSum
       sumSq += other.sumSq
-      distanceMeasure.updateClusterSum(new VectorWithNorm(other.sum), sum)
+      axpy(1.0, other.sum, sum)
       this
     }
 
     /** Returns the summary. */
     def summary: ClusterSummary = {
-      val center = distanceMeasure.centroid(sum.copy, n)
-      val cost = distanceMeasure.clusterCost(center, new VectorWithNorm(sum), n, sumSq)
-      ClusterSummary(n, center, cost)
+      val center = distanceMeasure.centroid(sum.copy, weightSum)
+      val cost = distanceMeasure.clusterCost(center, new VectorWithNorm(sum), weightSum,
+        sumSq)
+      ClusterSummary(n, weightSum, center, cost)
     }
   }
 
@@ -437,10 +452,15 @@ private object BisectingKMeans extends Serializable {
    * Summary of a cluster.
    *
    * @param size the number of points within this cluster
+   * @param weightSum the weightSum within this cluster
    * @param center the center of the points within this cluster
    * @param cost the sum of squared distances to the center
    */
-  private case class ClusterSummary(size: Long, center: VectorWithNorm, cost: Double)
+  private case class ClusterSummary(
+      size: Long,
+      weightSum: Double,
+      center: VectorWithNorm,
+      cost: Double)
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
index 683360efabc76..e83dd3723be15 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
@@ -76,7 +76,7 @@ private[spark] abstract class DistanceMeasure extends Serializable {
   def clusterCost(
       centroid: VectorWithNorm,
       pointsSum: VectorWithNorm,
-      numberOfPoints: Long,
+      weightSum: Double,
       pointsSquaredNorm: Double): Double
 
   /**
@@ -85,18 +85,18 @@ private[spark] abstract class DistanceMeasure extends Serializable {
    * @param sum the `sum` for a cluster to be updated
    */
   def updateClusterSum(point: VectorWithNorm, sum: Vector): Unit = {
-    axpy(1.0, point.vector, sum)
+    axpy(point.weight, point.vector, sum)
   }
 
   /**
-   * Returns a centroid for a cluster given its `sum` vector and its `count` of points.
+   * Returns a centroid for a cluster given its `sum` vector and the weightSum of points.
    *
    * @param sum   the `sum` for a cluster
-   * @param count the number of points in the cluster
+   * @param weightSum the weightSum of points in the cluster
    * @return the centroid of the cluster
    */
-  def centroid(sum: Vector, count: Long): VectorWithNorm = {
-    scal(1.0 / count, sum)
+  def centroid(sum: Vector, weightSum: Double): VectorWithNorm = {
+    scal(1.0 / weightSum, sum)
     new VectorWithNorm(sum)
   }
 
@@ -205,9 +205,9 @@ private[spark] class EuclideanDistanceMeasure extends DistanceMeasure {
   override def clusterCost(
       centroid: VectorWithNorm,
       pointsSum: VectorWithNorm,
-      numberOfPoints: Long,
+      weightSum: Double,
       pointsSquaredNorm: Double): Double = {
-    math.max(pointsSquaredNorm - numberOfPoints * centroid.norm * centroid.norm, 0.0)
+    math.max(pointsSquaredNorm - weightSum * centroid.norm * centroid.norm, 0.0)
   }
 
   /**
@@ -251,18 +251,18 @@ private[spark] class CosineDistanceMeasure extends DistanceMeasure {
    */
   override def updateClusterSum(point: VectorWithNorm, sum: Vector): Unit = {
     assert(point.norm > 0, "Cosine distance is not defined for zero-length vectors.")
-    axpy(1.0 / point.norm, point.vector, sum)
+    axpy(point.weight / point.norm, point.vector, sum)
   }
 
   /**
    * Returns a centroid for a cluster given its `sum` vector and its `count` of points.
    *
    * @param sum   the `sum` for a cluster
-   * @param count the number of points in the cluster
+   * @param weightSum the sum of weight in the cluster
    * @return the centroid of the cluster
    */
-  override def centroid(sum: Vector, count: Long): VectorWithNorm = {
-    scal(1.0 / count, sum)
+  override def centroid(sum: Vector, weightSum: Double): VectorWithNorm = {
+    scal(1.0 / weightSum, sum)
     val norm = Vectors.norm(sum, 2)
     scal(1.0 / norm, sum)
     new VectorWithNorm(sum, 1)
@@ -274,10 +274,10 @@ private[spark] class CosineDistanceMeasure extends DistanceMeasure {
   override def clusterCost(
       centroid: VectorWithNorm,
       pointsSum: VectorWithNorm,
-      numberOfPoints: Long,
+      weightSum: Double,
       pointsSquaredNorm: Double): Double = {
     val costVector = pointsSum.vector.copy
-    math.max(numberOfPoints - dot(centroid.vector, costVector) / centroid.norm, 0.0)
+    math.max(weightSum - dot(centroid.vector, costVector) / centroid.norm, 0.0)
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
index a9ed36e90974e..4d98ba41bbb7b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala
@@ -234,6 +234,7 @@ class GaussianMixture private (
       iter += 1
       compute.destroy()
     }
+    breezeData.unpersist()
 
     new GaussianMixtureModel(weights, gaussians)
   }
@@ -271,7 +272,10 @@ class GaussianMixture private (
   private def initCovariance(x: IndexedSeq[BV[Double]]): BreezeMatrix[Double] = {
     val mu = vectorMean(x)
     val ss = BDV.zeros[Double](x(0).length)
-    x.foreach(xi => ss += (xi - mu) ^:^ 2.0)
+    x.foreach { xi =>
+      val d: BV[Double] = xi - mu
+      ss += d ^:^ 2.0
+    }
     diag(ss / x.length.toDouble)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 4bb79bc69eef4..a3cf7f96478af 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -209,39 +209,38 @@ class KMeans private (
    */
   @Since("0.8.0")
   def run(data: RDD[Vector]): KMeansModel = {
-    run(data, None)
+    val instances: RDD[(Vector, Double)] = data.map {
+      case (point) => (point, 1.0)
+    }
+    runWithWeight(instances, None)
   }
 
-  private[spark] def run(
-      data: RDD[Vector],
+  private[spark] def runWithWeight(
+      data: RDD[(Vector, Double)],
       instr: Option[Instrumentation]): KMeansModel = {
 
-    if (data.getStorageLevel == StorageLevel.NONE) {
-      logWarning("The input data is not directly cached, which may hurt performance if its"
-        + " parent RDDs are also uncached.")
+    // Compute squared norms and cache them.
+    val norms = data.map { case (v, _) =>
+      Vectors.norm(v, 2.0)
     }
 
-    // Compute squared norms and cache them.
-    val norms = data.map(Vectors.norm(_, 2.0))
-    norms.persist()
-    val zippedData = data.zip(norms).map { case (v, norm) =>
-      new VectorWithNorm(v, norm)
+    val zippedData = data.zip(norms).map { case ((v, w), norm) =>
+      new VectorWithNorm(v, norm, w)
     }
-    val model = runAlgorithm(zippedData, instr)
-    norms.unpersist()
 
-    // Warn at the end of the run as well, for increased visibility.
     if (data.getStorageLevel == StorageLevel.NONE) {
-      logWarning("The input data was not directly cached, which may hurt performance if its"
-        + " parent RDDs are also uncached.")
+      zippedData.persist(StorageLevel.MEMORY_AND_DISK)
     }
+    val model = runAlgorithmWithWeight(zippedData, instr)
+    zippedData.unpersist()
+
     model
   }
 
   /**
    * Implementation of K-Means algorithm.
    */
-  private def runAlgorithm(
+  private def runAlgorithmWithWeight(
       data: RDD[VectorWithNorm],
       instr: Option[Instrumentation]): KMeansModel = {
 
@@ -283,27 +282,33 @@ class KMeans private (
         val dims = thisCenters.head.vector.size
 
         val sums = Array.fill(thisCenters.length)(Vectors.zeros(dims))
-        val counts = Array.fill(thisCenters.length)(0L)
+
+        // clusterWeightSum is needed to calculate cluster center
+        // cluster center =
+        //     sample1 * weight1/clusterWeightSum + sample2 * weight2/clusterWeightSum + ...
+        val clusterWeightSum = Array.ofDim[Double](thisCenters.length)
 
         points.foreach { point =>
           val (bestCenter, cost) = distanceMeasureInstance.findClosest(thisCenters, point)
-          costAccum.add(cost)
+          costAccum.add(cost * point.weight)
           distanceMeasureInstance.updateClusterSum(point, sums(bestCenter))
-          counts(bestCenter) += 1
+          clusterWeightSum(bestCenter) += point.weight
         }
 
-        counts.indices.filter(counts(_) > 0).map(j => (j, (sums(j), counts(j)))).iterator
-      }.reduceByKey { case ((sum1, count1), (sum2, count2)) =>
-        axpy(1.0, sum2, sum1)
-        (sum1, count1 + count2)
+        clusterWeightSum.indices.filter(clusterWeightSum(_) > 0)
+          .map(j => (j, (sums(j), clusterWeightSum(j)))).iterator
+      }.reduceByKey { (sumweight1, sumweight2) =>
+        axpy(1.0, sumweight2._1, sumweight1._1)
+        (sumweight1._1, sumweight1._2 + sumweight2._2)
       }.collectAsMap()
 
       if (iteration == 0) {
-        instr.foreach(_.logNumExamples(collected.values.map(_._2).sum))
+        instr.foreach(_.logNumExamples(costAccum.count))
+        instr.foreach(_.logSumOfWeights(collected.values.map(_._2).sum))
       }
 
-      val newCenters = collected.mapValues { case (sum, count) =>
-        distanceMeasureInstance.centroid(sum, count)
+      val newCenters = collected.mapValues { case (sum, weightSum) =>
+        distanceMeasureInstance.centroid(sum, weightSum)
       }
 
       bcCenters.destroy()
@@ -479,58 +484,6 @@ object KMeans {
       .run(data)
   }
 
-  /**
-   * Trains a k-means model using the given set of parameters.
-   *
-   * @param data Training points as an `RDD` of `Vector` types.
-   * @param k Number of clusters to create.
-   * @param maxIterations Maximum number of iterations allowed.
-   * @param runs This param has no effect since Spark 2.0.0.
-   * @param initializationMode The initialization algorithm. This can either be "random" or
-   *                           "k-means||". (default: "k-means||")
-   * @param seed Random seed for cluster initialization. Default is to generate seed based
-   *             on system time.
-   */
-  @Since("1.3.0")
-  @deprecated("Use train method without 'runs'", "2.1.0")
-  def train(
-      data: RDD[Vector],
-      k: Int,
-      maxIterations: Int,
-      runs: Int,
-      initializationMode: String,
-      seed: Long): KMeansModel = {
-    new KMeans().setK(k)
-      .setMaxIterations(maxIterations)
-      .setInitializationMode(initializationMode)
-      .setSeed(seed)
-      .run(data)
-  }
-
-  /**
-   * Trains a k-means model using the given set of parameters.
-   *
-   * @param data Training points as an `RDD` of `Vector` types.
-   * @param k Number of clusters to create.
-   * @param maxIterations Maximum number of iterations allowed.
-   * @param runs This param has no effect since Spark 2.0.0.
-   * @param initializationMode The initialization algorithm. This can either be "random" or
-   *                           "k-means||". (default: "k-means||")
-   */
-  @Since("0.8.0")
-  @deprecated("Use train method without 'runs'", "2.1.0")
-  def train(
-      data: RDD[Vector],
-      k: Int,
-      maxIterations: Int,
-      runs: Int,
-      initializationMode: String): KMeansModel = {
-    new KMeans().setK(k)
-      .setMaxIterations(maxIterations)
-      .setInitializationMode(initializationMode)
-      .run(data)
-  }
-
   /**
    * Trains a k-means model using specified parameters and the default values for unspecified.
    */
@@ -544,21 +497,6 @@ object KMeans {
       .run(data)
   }
 
-  /**
-   * Trains a k-means model using specified parameters and the default values for unspecified.
-   */
-  @Since("0.8.0")
-  @deprecated("Use train method without 'runs'", "2.1.0")
-  def train(
-      data: RDD[Vector],
-      k: Int,
-      maxIterations: Int,
-      runs: Int): KMeansModel = {
-    new KMeans().setK(k)
-      .setMaxIterations(maxIterations)
-      .run(data)
-  }
-
   private[spark] def validateInitMode(initMode: String): Boolean = {
     initMode match {
       case KMeans.RANDOM => true
@@ -571,13 +509,15 @@ object KMeans {
 /**
  * A vector with its norm for fast distance computation.
  */
-private[clustering] class VectorWithNorm(val vector: Vector, val norm: Double)
-    extends Serializable {
+private[clustering] class VectorWithNorm(
+    val vector: Vector,
+    val norm: Double,
+    val weight: Double = 1.0) extends Serializable {
 
   def this(vector: Vector) = this(vector, Vectors.norm(vector, 2.0))
 
   def this(array: Array[Double]) = this(Vectors.dense(array))
 
   /** Converts the vector to a dense vector. */
-  def toDense: VectorWithNorm = new VectorWithNorm(Vectors.dense(vector.toArray), norm)
+  def toDense: VectorWithNorm = new VectorWithNorm(Vectors.dense(vector.toArray), norm, weight)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 09c38f72c1ad5..0c6570ff81efb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -42,10 +42,10 @@ class KMeansModel (@Since("1.0.0") val clusterCenters: Array[Vector],
   private[spark] val numIter: Int)
   extends Saveable with Serializable with PMMLExportable {
 
-  private val distanceMeasureInstance: DistanceMeasure =
+  @transient private lazy val distanceMeasureInstance: DistanceMeasure =
     DistanceMeasure.decodeFromString(distanceMeasure)
 
-  private val clusterCentersWithNorm =
+  @transient private lazy val clusterCentersWithNorm =
     if (clusterCenters == null) null else clusterCenters.map(new VectorWithNorm(_))
 
   @Since("2.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
index 4aa647236b31c..e858ac9844a24 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala
@@ -330,7 +330,7 @@ class LDA private (
   def run(documents: RDD[(Long, Vector)]): LDAModel = {
     val state = ldaOptimizer.initialize(documents, this)
     var iter = 0
-    val iterationTimes = Array.fill[Double](maxIterations)(0)
+    val iterationTimes = Array.ofDim[Double](maxIterations)
     while (iter < maxIterations) {
       val start = System.nanoTime()
       state.next()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 85444770fec6b..2c2eacbeb9407 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -326,7 +326,7 @@ class LocalLDAModel private[spark] (
         val Elogthetad: BDV[Double] = LDAUtils.dirichletExpectation(gammad)
 
         // E[log p(doc | theta, beta)]
-        termCounts.foreachActive { case (idx, count) =>
+        termCounts.foreachNonZero { case (idx, count) =>
           docBound += count * LDAUtils.logSumExp(Elogthetad + localElogbeta(idx, ::).t)
         }
         // E[log p(theta | alpha) - log q(theta | gamma)]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index 5eea69022562b..dc90f6c3e3885 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -142,7 +142,7 @@ final class EMLDAOptimizer extends LDAOptimizer {
     // For each document, create an edge (Document -> Term) for each unique term in the document.
     val edges: RDD[Edge[TokenCount]] = docs.flatMap { case (docID: Long, termCounts: Vector) =>
       // Add edges for terms with non-zero counts.
-      termCounts.asBreeze.activeIterator.filter(_._2 != 0.0).map { case (term, cnt) =>
+      termCounts.nonZeroIterator.map { case (term, cnt) =>
         Edge(docID, term2index(term), cnt)
       }
     }
@@ -211,11 +211,14 @@ final class EMLDAOptimizer extends LDAOptimizer {
     val docTopicDistributions: VertexRDD[TopicCounts] =
       graph.aggregateMessages[(Boolean, TopicCounts)](sendMsg, mergeMsg)
         .mapValues(_._2)
+    val prevGraph = graph
     // Update the vertex descriptors with the new counts.
     val newGraph = Graph(docTopicDistributions, graph.edges)
     graph = newGraph
     graphCheckpointer.update(newGraph)
     globalTopicTotals = computeGlobalTopicTotals()
+    prevGraph.unpersistVertices()
+    prevGraph.edges.unpersist()
     this
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
index 4a08c0a55e68f..9e2113f1c0fc9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
@@ -82,7 +82,7 @@ private[mllib] object LocalKMeans extends Logging {
     var moved = true
     while (moved && iteration < maxIterations) {
       moved = false
-      val counts = Array.fill(k)(0.0)
+      val counts = Array.ofDim[Double](k)
       val sums = Array.fill(k)(Vectors.zeros(dimensions))
       var i = 0
       while (i < points.length) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index ff4ca0ac40fe2..c7d44e8752cd9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -269,7 +269,7 @@ class StreamingKMeans @Since("1.2.0") (
    * @param data DStream containing vector data
    */
   @Since("1.2.0")
-  def trainOn(data: DStream[Vector]) {
+  def trainOn(data: DStream[Vector]): Unit = {
     assertInitialized()
     data.foreachRDD { (rdd, time) =>
       model = model.update(rdd, decayFactor, timeUnit)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index d34a7ca6c9c7f..f4e2040569f48 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -81,7 +81,7 @@ class BinaryClassificationMetrics @Since("3.0.0") (
    * Unpersist intermediate RDDs used in the computation.
    */
   @Since("1.0.0")
-  def unpersist() {
+  def unpersist(): Unit = {
     cumulativeCounts.unpersist()
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
index c8245bf500a2f..050ebb0fa4fbd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala
@@ -23,33 +23,35 @@ import scala.collection.mutable
 import org.apache.spark.annotation.Since
 import org.apache.spark.mllib.linalg.{Matrices, Matrix}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.DataFrame
 
 /**
  * Evaluator for multiclass classification.
  *
- * @param predictionAndLabels an RDD of (prediction, label, weight) or
- *                         (prediction, label) tuples.
+ * @param predictionAndLabels an RDD of (prediction, label, weight, probability) or
+ *                            (prediction, label, weight) or (prediction, label) tuples.
  */
 @Since("1.1.0")
 class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[_ <: Product]) {
 
   /**
    * An auxiliary constructor taking a DataFrame.
-   * @param predictionAndLabels a DataFrame with two double columns: prediction and label
+   * @param predictionAndLabels a DataFrame with columns: prediction, label, weight (optional)
+   *                            and probability (only for logLoss)
    */
   private[mllib] def this(predictionAndLabels: DataFrame) =
-    this(predictionAndLabels.rdd.map {
-      case Row(prediction: Double, label: Double, weight: Double) =>
-        (prediction, label, weight)
-      case Row(prediction: Double, label: Double) =>
-        (prediction, label, 1.0)
-      case other =>
-        throw new IllegalArgumentException(s"Expected Row of tuples, got $other")
+    this(predictionAndLabels.rdd.map { r =>
+      r.size match {
+        case 2 => (r.getDouble(0), r.getDouble(1), 1.0, null)
+        case 3 => (r.getDouble(0), r.getDouble(1), r.getDouble(2), null)
+        case 4 => (r.getDouble(0), r.getDouble(1), r.getDouble(2), r.getSeq[Double](3).toArray)
+        case _ => throw new IllegalArgumentException(s"Expected Row of tuples, got $r")
+      }
     })
 
-
-  private val confusions = predictionAndLabels.map {
+  private lazy val confusions = predictionAndLabels.map {
+    case (prediction: Double, label: Double, weight: Double, _) =>
+      ((label, prediction), weight)
     case (prediction: Double, label: Double, weight: Double) =>
       ((label, prediction), weight)
     case (prediction: Double, label: Double) =>
@@ -237,4 +239,55 @@ class MulticlassMetrics @Since("1.1.0") (predictionAndLabels: RDD[_ <: Product])
    */
   @Since("1.1.0")
   lazy val labels: Array[Double] = tpByClass.keys.toArray.sorted
+
+  /**
+   * Returns Hamming-loss
+   */
+  @Since("3.0.0")
+  lazy val hammingLoss: Double = {
+    var numerator = 0.0
+    var denominator = 0.0
+    confusions.iterator.foreach {
+      case ((label, prediction), weight) =>
+        if (label != prediction) {
+          numerator += weight
+        }
+        denominator += weight
+    }
+    numerator / denominator
+  }
+
+  /**
+   * Returns the log-loss, aka logistic loss or cross-entropy loss.
+   * @param eps log-loss is undefined for p=0 or p=1, so probabilities are
+   *            clipped to max(eps, min(1 - eps, p)).
+   */
+  @Since("3.0.0")
+  def logLoss(eps: Double = 1e-15): Double = {
+    require(eps > 0 && eps < 0.5, s"eps must be in range (0, 0.5), but got $eps")
+    val loss1 = - math.log(eps)
+    val loss2 = - math.log1p(-eps)
+
+    val (lossSum, weightSum) = predictionAndLabels.map {
+      case (_, label: Double, weight: Double, probability: Array[Double]) =>
+        require(label.toInt == label && label >= 0, s"Invalid label $label")
+        require(probability != null, "probability of each class can not be null")
+        val p = probability(label.toInt)
+        val loss = if (p < eps) {
+          loss1
+        } else if (p > 1 - eps) {
+          loss2
+        } else {
+          - math.log(p)
+        }
+        (loss * weight, weight)
+
+      case other =>
+        throw new IllegalArgumentException(s"Expected quadruples, got $other")
+    }.treeReduce { case ((l1, w1), (l2, w2)) =>
+      (l1 + l2, w1 + w2)
+    }
+
+    lossSum / weightSum
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
index e5c2d0d85149c..b697d2746ce7b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.evaluation
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary}
+import org.apache.spark.mllib.stat.Statistics
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row}
 
@@ -57,18 +57,17 @@ class RegressionMetrics @Since("2.0.0") (
     })
 
   /**
-   * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors.
+   * Use SummarizerBuffer to calculate summary statistics of observations and errors.
    */
-  private lazy val summary: MultivariateStatisticalSummary = {
-    predictionAndObservations.map {
+  private lazy val summary = {
+    val weightedVectors = predictionAndObservations.map {
       case (prediction: Double, observation: Double, weight: Double) =>
         (Vectors.dense(observation, observation - prediction, prediction), weight)
       case (prediction: Double, observation: Double) =>
         (Vectors.dense(observation, observation - prediction, prediction), 1.0)
-    }.treeAggregate(new MultivariateOnlineSummarizer())(
-        (summary, sample) => summary.add(sample._1, sample._2),
-        (sum1, sum2) => sum1.merge(sum2)
-      )
+    }
+    Statistics.colStats(weightedVectors,
+      Seq("mean", "normL1", "normL2", "variance"))
   }
 
   private lazy val SSy = math.pow(summary.normL2(0), 2)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index 82f5b279846ba..b771e077b02ac 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -44,17 +44,6 @@ class ChiSqSelectorModel @Since("1.3.0") (
 
   private val filterIndices = selectedFeatures.sorted
 
-  @deprecated("not intended for subclasses to use", "2.1.0")
-  protected def isSorted(array: Array[Int]): Boolean = {
-    var i = 1
-    val len = array.length
-    while (i < len) {
-      if (array(i) < array(i-1)) return false
-      i += 1
-    }
-    true
-  }
-
   /**
    * Applies transformation on a vector.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
index 2fc517cad12db..356ed48e99387 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
@@ -46,9 +46,11 @@ class PCA @Since("1.4.0") (@Since("1.4.0") val k: Int) {
       s"source vector size $numFeatures must be no less than k=$k")
 
     val mat = if (numFeatures > 65535) {
-      val meanVector = Statistics.colStats(sources).mean.asBreeze
-      val meanCentredRdd = sources.map { rowVector =>
-        Vectors.fromBreeze(rowVector.asBreeze - meanVector)
+      val summary = Statistics.colStats(sources.map((_, 1.0)), Seq("mean"))
+      val mean = Vectors.fromML(summary.mean)
+      val meanCentredRdd = sources.map { row =>
+        BLAS.axpy(-1, mean, row)
+        row
       }
       new RowMatrix(meanCentredRdd)
     } else {
@@ -111,18 +113,7 @@ class PCAModel private[spark] (
    */
   @Since("1.4.0")
   override def transform(vector: Vector): Vector = {
-    vector match {
-      case dv: DenseVector =>
-        pc.transpose.multiply(dv)
-      case SparseVector(size, indices, values) =>
-        /* SparseVector -> single row SparseMatrix */
-        val sm = Matrices.sparse(size, 1, Array(0, indices.length), indices, values).transpose
-        val projection = sm.multiply(pc)
-        Vectors.dense(projection.values)
-      case _ =>
-        throw new IllegalArgumentException("Unsupported vector format. Expected " +
-          s"SparseVector or DenseVector. Instead got: ${vector.getClass}")
-    }
+    pc.transpose.multiply(vector)
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
index 7286733934ad9..21e01ef36814f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -21,7 +21,7 @@ import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.feature.{StandardScalerModel => NewStandardScalerModel}
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors}
-import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
+import org.apache.spark.mllib.stat.Statistics
 import org.apache.spark.rdd.RDD
 
 /**
@@ -55,12 +55,11 @@ class StandardScaler @Since("1.1.0") (withMean: Boolean, withStd: Boolean) exten
   @Since("1.1.0")
   def fit(data: RDD[Vector]): StandardScalerModel = {
     // TODO: skip computation if both withMean and withStd are false
-    val summary = data.treeAggregate(new MultivariateOnlineSummarizer)(
-      (aggregator, data) => aggregator.add(data),
-      (aggregator1, aggregator2) => aggregator1.merge(aggregator2))
+    val summary = Statistics.colStats(data.map((_, 1.0)), Seq("mean", "std"))
+
     new StandardScalerModel(
-      Vectors.dense(summary.variance.toArray.map(v => math.sqrt(v))),
-      summary.mean,
+      Vectors.fromML(summary.std),
+      Vectors.fromML(summary.mean),
       withStd,
       withMean)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 7888a8001d6b7..13899fa8296f6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -439,9 +439,20 @@ class Word2Vec extends Serializable with Logging {
           }
         }.flatten
       }
-      val synAgg = partial.reduceByKey { case (v1, v2) =>
-          blas.saxpy(vectorSize, 1.0f, v2, 1, v1, 1)
-          v1
+      // SPARK-24666: do normalization for aggregating weights from partitions.
+      // Original Word2Vec either single-thread or multi-thread which do Hogwild-style aggregation.
+      // Our approach needs to do extra normalization, otherwise adding weights continuously may
+      // cause overflow on float and lead to infinity/-infinity weights.
+      val synAgg = partial.mapPartitions { iter =>
+        iter.map { case (id, vec) =>
+          (id, (vec, 1))
+        }
+      }.reduceByKey { (vc1, vc2) =>
+        blas.saxpy(vectorSize, 1.0f, vc2._1, 1, vc1._1, 1)
+        (vc1._1, vc1._2 + vc2._2)
+      }.map { case (id, (vec, count)) =>
+        blas.sscal(vectorSize, 1.0f / count, vec, 1)
+        (id, vec)
       }.collect()
       var i = 0
       while (i < synAgg.length) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index 69e4b76b2d8c2..ac2b576f4ac4e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -316,9 +316,9 @@ object PrefixSpan extends Logging {
               ((prefix.id, item), (1L, postfixSize))
             }
           }
-        }.reduceByKey { case ((c0, s0), (c1, s1)) =>
-          (c0 + c1, s0 + s1)
-        }.filter { case (_, (c, _)) => c >= minCount }
+        }.reduceByKey { (cs0, cs1) =>
+          (cs0._1 + cs1._1, cs0._2 + cs1._2)
+        }.filter { case (_, cs) => cs._1 >= minCount }
         .collect()
       val newLargePrefixes = mutable.Map.empty[Int, Prefix]
       freqPrefixes.foreach { case ((id, item), (count, projDBSize)) =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
index cb97742245689..1f5558dc2a50e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -285,7 +285,7 @@ private[spark] object BLAS extends Serializable with Logging {
    * @param x the vector x that contains the n elements.
    * @param A the symmetric matrix A. Size of n x n.
    */
-  def syr(alpha: Double, x: Vector, A: DenseMatrix) {
+  def syr(alpha: Double, x: Vector, A: DenseMatrix): Unit = {
     val mA = A.numRows
     val nA = A.numCols
     require(mA == nA, s"A is not a square matrix (and hence is not symmetric). A: $mA x $nA")
@@ -299,7 +299,7 @@ private[spark] object BLAS extends Serializable with Logging {
     }
   }
 
-  private def syr(alpha: Double, x: DenseVector, A: DenseMatrix) {
+  private def syr(alpha: Double, x: DenseVector, A: DenseMatrix): Unit = {
     val nA = A.numRows
     val mA = A.numCols
 
@@ -317,7 +317,7 @@ private[spark] object BLAS extends Serializable with Logging {
     }
   }
 
-  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix) {
+  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix): Unit = {
     val mA = A.numCols
     val xIndices = x.indices
     val xValues = x.values
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index e474cfa002fad..83187d65530ad 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -155,7 +155,7 @@ sealed trait Matrix extends Serializable {
    *          and column indices respectively with the type `Int`, and the final parameter is the
    *          corresponding value in the matrix with type `Double`.
    */
-  private[spark] def foreachActive(f: (Int, Int, Double) => Unit)
+  private[spark] def foreachActive(f: (Int, Int, Double) => Unit): Unit
 
   /**
    * Find the number of non-zero active values.
@@ -1108,7 +1108,7 @@ object Matrices {
   @Since("1.3.0")
   def horzcat(matrices: Array[Matrix]): Matrix = {
     if (matrices.isEmpty) {
-      return new DenseMatrix(0, 0, Array.empty)
+      return new DenseMatrix(0, 0, Array.emptyDoubleArray)
     } else if (matrices.length == 1) {
       return matrices(0)
     }
@@ -1130,7 +1130,7 @@ object Matrices {
       new DenseMatrix(numRows, numCols, matrices.flatMap(_.toArray))
     } else {
       var startCol = 0
-      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat =>
+      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat: Matrix =>
         val nCols = mat.numCols
         mat match {
           case spMat: SparseMatrix =>
@@ -1141,7 +1141,7 @@ object Matrices {
               cnt += 1
             }
             startCol += nCols
-            data
+            data.toSeq
           case dnMat: DenseMatrix =>
             val data = new ArrayBuffer[(Int, Int, Double)]()
             dnMat.foreachActive { (i, j, v) =>
@@ -1150,7 +1150,7 @@ object Matrices {
               }
             }
             startCol += nCols
-            data
+            data.toSeq
         }
       }
       SparseMatrix.fromCOO(numRows, numCols, entries)
@@ -1167,7 +1167,7 @@ object Matrices {
   @Since("1.3.0")
   def vertcat(matrices: Array[Matrix]): Matrix = {
     if (matrices.isEmpty) {
-      return new DenseMatrix(0, 0, Array.empty[Double])
+      return new DenseMatrix(0, 0, Array.emptyDoubleArray)
     } else if (matrices.length == 1) {
       return matrices(0)
     }
@@ -1189,7 +1189,6 @@ object Matrices {
       val allValues = new Array[Double](numRows * numCols)
       var startRow = 0
       matrices.foreach { mat =>
-        var j = 0
         val nRows = mat.numRows
         mat.foreachActive { (i, j, v) =>
           val indStart = j * numRows + startRow
@@ -1200,7 +1199,7 @@ object Matrices {
       new DenseMatrix(numRows, numCols, allValues)
     } else {
       var startRow = 0
-      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat =>
+      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat: Matrix =>
         val nRows = mat.numRows
         mat match {
           case spMat: SparseMatrix =>
@@ -1211,7 +1210,7 @@ object Matrices {
               cnt += 1
             }
             startRow += nRows
-            data
+            data.toSeq
           case dnMat: DenseMatrix =>
             val data = new ArrayBuffer[(Int, Int, Double)]()
             dnMat.foreachActive { (i, j, v) =>
@@ -1220,7 +1219,7 @@ object Matrices {
               }
             }
             startRow += nRows
-            data
+            data.toSeq
         }
       }
       SparseMatrix.fromCOO(numRows, numCols, entries)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index b754fad0c1796..c23088de85b8a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -120,6 +120,16 @@ sealed trait Vector extends Serializable {
     throw new UnsupportedOperationException(s"copy is not implemented for ${this.getClass}.")
   }
 
+  /**
+   * Applies a function `f` to all the elements of dense and sparse vector.
+   *
+   * @param f the function takes two parameters where the first parameter is the index of
+   *          the vector with type `Int`, and the second parameter is the corresponding value
+   *          with type `Double`.
+   */
+  private[spark] def foreach(f: (Int, Double) => Unit): Unit =
+    iterator.foreach { case (i, v) => f(i, v) }
+
   /**
    * Applies a function `f` to all the active elements of dense and sparse vector.
    *
@@ -128,7 +138,18 @@ sealed trait Vector extends Serializable {
    *          with type `Double`.
    */
   @Since("1.6.0")
-  def foreachActive(f: (Int, Double) => Unit): Unit
+  def foreachActive(f: (Int, Double) => Unit): Unit =
+    activeIterator.foreach { case (i, v) => f(i, v) }
+
+  /**
+   * Applies a function `f` to all the non-zero elements of dense and sparse vector.
+   *
+   * @param f the function takes two parameters where the first parameter is the index of
+   *          the vector with type `Int`, and the second parameter is the corresponding value
+   *          with type `Double`.
+   */
+  private[spark] def foreachNonZero(f: (Int, Double) => Unit): Unit =
+    nonZeroIterator.foreach { case (i, v) => f(i, v) }
 
   /**
    * Number of active entries.  An "active entry" is an element which is explicitly stored,
@@ -204,6 +225,31 @@ sealed trait Vector extends Serializable {
    */
   @Since("2.0.0")
   def asML: newlinalg.Vector
+
+  /**
+   * Calculate the dot product of this vector with another.
+   *
+   * If `size` does not match an [[IllegalArgumentException]] is thrown.
+   */
+  @Since("3.0.0")
+  def dot(v: Vector): Double = BLAS.dot(this, v)
+
+  /**
+   * Returns an iterator over all the elements of this vector.
+   */
+  private[spark] def iterator: Iterator[(Int, Double)] =
+    Iterator.tabulate(size)(i => (i, apply(i)))
+
+  /**
+   * Returns an iterator over all the active elements of this vector.
+   */
+  private[spark] def activeIterator: Iterator[(Int, Double)]
+
+  /**
+   * Returns an iterator over all the non-zero elements of this vector.
+   */
+  private[spark] def nonZeroIterator: Iterator[(Int, Double)] =
+    activeIterator.filter(_._2 != 0)
 }
 
 /**
@@ -634,18 +680,6 @@ class DenseVector @Since("1.0.0") (
     new DenseVector(values.clone())
   }
 
-  @Since("1.6.0")
-  override def foreachActive(f: (Int, Double) => Unit): Unit = {
-    var i = 0
-    val localValuesSize = values.length
-    val localValues = values
-
-    while (i < localValuesSize) {
-      f(i, localValues(i))
-      i += 1
-    }
-  }
-
   override def equals(other: Any): Boolean = super.equals(other)
 
   override def hashCode(): Int = {
@@ -685,12 +719,10 @@ class DenseVector @Since("1.0.0") (
     val ii = new Array[Int](nnz)
     val vv = new Array[Double](nnz)
     var k = 0
-    foreachActive { (i, v) =>
-      if (v != 0) {
-        ii(k) = i
-        vv(k) = v
-        k += 1
-      }
+    foreachNonZero { (i, v) =>
+      ii(k) = i
+      vv(k) = v
+      k += 1
     }
     new SparseVector(size, ii, vv)
   }
@@ -724,6 +756,14 @@ class DenseVector @Since("1.0.0") (
   override def asML: newlinalg.DenseVector = {
     new newlinalg.DenseVector(values)
   }
+
+  private[spark] override def iterator: Iterator[(Int, Double)] = {
+    val localValues = values
+    Iterator.tabulate(size)(i => (i, localValues(i)))
+  }
+
+  private[spark] override def activeIterator: Iterator[(Int, Double)] =
+    iterator
 }
 
 @Since("1.3.0")
@@ -794,19 +834,6 @@ class SparseVector @Since("1.0.0") (
     if (j < 0) 0.0 else values(j)
   }
 
-  @Since("1.6.0")
-  override def foreachActive(f: (Int, Double) => Unit): Unit = {
-    var i = 0
-    val localValuesSize = values.length
-    val localIndices = indices
-    val localValues = values
-
-    while (i < localValuesSize) {
-      f(localIndices(i), localValues(i))
-      i += 1
-    }
-  }
-
   override def equals(other: Any): Boolean = super.equals(other)
 
   override def hashCode(): Int = {
@@ -849,12 +876,10 @@ class SparseVector @Since("1.0.0") (
       val ii = new Array[Int](nnz)
       val vv = new Array[Double](nnz)
       var k = 0
-      foreachActive { (i, v) =>
-        if (v != 0.0) {
-          ii(k) = i
-          vv(k) = v
-          k += 1
-        }
+      foreachNonZero { (i, v) =>
+        ii(k) = i
+        vv(k) = v
+        k += 1
       }
       new SparseVector(size, ii, vv)
     }
@@ -945,6 +970,37 @@ class SparseVector @Since("1.0.0") (
   override def asML: newlinalg.SparseVector = {
     new newlinalg.SparseVector(size, indices, values)
   }
+
+  private[spark] override def iterator: Iterator[(Int, Double)] = {
+    val localSize = size
+    val localNumActives = numActives
+    val localIndices = indices
+    val localValues = values
+
+    new Iterator[(Int, Double)]() {
+      private var i = 0
+      private var j = 0
+      private var k = localIndices.headOption.getOrElse(-1)
+
+      override def hasNext: Boolean = i < localSize
+
+      override def next: (Int, Double) = {
+        val v = if (i == k) {
+          j += 1
+          k = if (j < localNumActives) localIndices(j) else -1
+          localValues(j - 1)
+        } else 0.0
+        i += 1
+        (i - 1, v)
+      }
+    }
+  }
+
+  private[spark] override def activeIterator: Iterator[(Int, Double)] = {
+    val localIndices = indices
+    val localValues = values
+    Iterator.tabulate(numActives)(j => (localIndices(j), localValues(j)))
+  }
 }
 
 @Since("1.3.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index e32d615af2a47..452bbbe5f46de 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -288,7 +288,7 @@ class BlockMatrix @Since("1.3.0") (
 
           vectors.foreach { case (blockColIdx: Int, vec: Vector) =>
               val offset = colsPerBlock * blockColIdx
-              vec.foreachActive { case (colIdx: Int, value: Double) =>
+              vec.foreachNonZero { (colIdx: Int, value: Double) =>
                 arrBufferIndices += offset + colIdx
                 arrBufferValues  += value
               }
@@ -445,7 +445,7 @@ class BlockMatrix @Since("1.3.0") (
 
     val rightCounterpartsHelper = rightMatrix.groupBy(_._1).mapValues(_.map(_._2))
     val leftDestinations = leftMatrix.map { case (rowIndex, colIndex) =>
-      val rightCounterparts = rightCounterpartsHelper.getOrElse(colIndex, Array.empty[Int])
+      val rightCounterparts = rightCounterpartsHelper.getOrElse(colIndex, Array.emptyIntArray)
       val partitions = rightCounterparts.map(b => partitioner.getPartition((rowIndex, b)))
       val midDimSplitIndex = colIndex % midDimSplitNum
       ((rowIndex, colIndex),
@@ -454,7 +454,7 @@ class BlockMatrix @Since("1.3.0") (
 
     val leftCounterpartsHelper = leftMatrix.groupBy(_._2).mapValues(_.map(_._1))
     val rightDestinations = rightMatrix.map { case (rowIndex, colIndex) =>
-      val leftCounterparts = leftCounterpartsHelper.getOrElse(rowIndex, Array.empty[Int])
+      val leftCounterparts = leftCounterpartsHelper.getOrElse(rowIndex, Array.emptyIntArray)
       val partitions = leftCounterparts.map(b => partitioner.getPartition((b, colIndex)))
       val midDimSplitIndex = rowIndex % midDimSplitNum
       ((rowIndex, colIndex),
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 0d223de9b6f7e..f3b984948e483 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -153,7 +153,7 @@ class CoordinateMatrix @Since("1.0.0") (
   }
 
   /** Determines the size by computing the max row/column index. */
-  private def computeSize() {
+  private def computeSize(): Unit = {
     // Reduce will throw an exception if `entries` is empty.
     val (m1, n1) = entries.map(entry => (entry.i, entry.j)).reduce { case ((i1, j1), (i2, j2)) =>
       (math.max(i1, i2), math.max(j1, j2))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 8890662d99b52..ad79230c7513c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -267,7 +267,7 @@ class IndexedRowMatrix @Since("1.0.0") (
     val mat = BDM.zeros[Double](m, n)
     rows.collect().foreach { case IndexedRow(rowIndex, vector) =>
       val i = rowIndex.toInt
-      vector.foreachActive { case (j, v) =>
+      vector.foreachNonZero { case (j, v) =>
         mat(i, j) = v
       }
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 43f48befd014f..20e26cee9e0d6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -29,7 +29,7 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.MAX_RESULT_SIZE
 import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary}
+import org.apache.spark.mllib.stat._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.random.XORShiftRandom
@@ -433,11 +433,11 @@ class RowMatrix @Since("1.0.0") (
     val n = numCols().toInt
     checkNumColumns(n)
 
-    val summary = computeColumnSummaryStatistics()
+    val summary = Statistics.colStats(rows.map((_, 1.0)), Seq("count", "mean"))
     val m = summary.count
     require(m > 1, s"RowMatrix.computeCovariance called on matrix with only $m rows." +
       "  Cannot compute the covariance of a RowMatrix with <= 1 row.")
-    val mean = summary.mean
+    val mean = Vectors.fromML(summary.mean)
 
     if (rows.first().isInstanceOf[DenseVector]) {
       computeDenseVectorCovariance(mean, n, m)
@@ -616,7 +616,8 @@ class RowMatrix @Since("1.0.0") (
       10 * math.log(numCols()) / threshold
     }
 
-    columnSimilaritiesDIMSUM(computeColumnSummaryStatistics().normL2.toArray, gamma)
+    val summary = Statistics.colStats(rows.map((_, 1.0)), Seq("normL2"))
+    columnSimilaritiesDIMSUM(summary.normL2.toArray, gamma)
   }
 
   /**
@@ -761,7 +762,7 @@ class RowMatrix @Since("1.0.0") (
     val mat = BDM.zeros[Double](m, n)
     var i = 0
     rows.collect().foreach { vector =>
-      vector.foreachActive { case (j, v) =>
+      vector.foreachNonZero { case (j, v) =>
         mat(i, j) = v
       }
       i += 1
@@ -770,7 +771,7 @@ class RowMatrix @Since("1.0.0") (
   }
 
   /** Updates or verifies the number of rows. */
-  private def updateNumRows(m: Long) {
+  private def updateNumRows(m: Long): Unit = {
     if (nRows <= 0) {
       nRows = m
     } else {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
index 88c73241fb555..46a18295d839b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala
@@ -218,8 +218,8 @@ class LogisticGradient(numClasses: Int) extends Gradient {
 
         val margins = Array.tabulate(numClasses - 1) { i =>
           var margin = 0.0
-          data.foreachActive { (index, value) =>
-            if (value != 0.0) margin += value * weightsArray((i * dataSize) + index)
+          data.foreachNonZero { (index, value) =>
+            margin += value * weightsArray((i * dataSize) + index)
           }
           if (i == label.toInt - 1) marginY = margin
           if (margin > maxMargin) {
@@ -258,8 +258,8 @@ class LogisticGradient(numClasses: Int) extends Gradient {
           val multiplier = math.exp(margins(i)) / (sum + 1.0) - {
             if (label != 0.0 && label == i + 1) 1.0 else 0.0
           }
-          data.foreachActive { (index, value) =>
-            if (value != 0.0) cumGradientArray(i * dataSize + index) += multiplier * value
+          data.foreachNonZero { (index, value) =>
+            cumGradientArray(i * dataSize + index) += multiplier * value
           }
         }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
index 5d61796f1de60..340386c7e7bec 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala
@@ -58,7 +58,7 @@ trait PMMLExportable {
   @Since("1.4.0")
   def toPMML(sc: SparkContext, path: String): Unit = {
     val pmml = toPMML()
-    sc.parallelize(Array(pmml), 1).saveAsTextFile(path)
+    sc.parallelize(Seq(pmml), 1).saveAsTextFile(path)
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
index fa04f8eb5e796..d3b548832bb21 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
@@ -107,7 +107,7 @@ class PoissonGenerator @Since("1.1.0") (
   override def nextValue(): Double = rng.sample()
 
   @Since("1.1.0")
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
   }
 
@@ -132,7 +132,7 @@ class ExponentialGenerator @Since("1.3.0") (
   override def nextValue(): Double = rng.sample()
 
   @Since("1.3.0")
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
   }
 
@@ -159,7 +159,7 @@ class GammaGenerator @Since("1.3.0") (
   override def nextValue(): Double = rng.sample()
 
   @Since("1.3.0")
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
   }
 
@@ -187,7 +187,7 @@ class LogNormalGenerator @Since("1.3.0") (
   override def nextValue(): Double = rng.sample()
 
   @Since("1.3.0")
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 12870f819b147..f3f15ba0d0f2c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -62,6 +62,13 @@ case class Rating @Since("0.8.0") (
  * r &gt; 0 and 0 if r &lt;= 0. The ratings then act as 'confidence' values related to strength of
  * indicated user
  * preferences rather than explicit ratings given to items.
+ *
+ * Note: the input rating RDD to the ALS implementation should be deterministic.
+ * Nondeterministic data can cause failure during fitting ALS model.
+ * For example, an order-sensitive operation like sampling after a repartition makes RDD
+ * output nondeterministic, like `rdd.repartition(2).sample(false, 0.5, 1618)`.
+ * Checkpointing sampled RDD or adding a sort before sampling can help make the RDD
+ * deterministic.
  */
 @Since("0.8.0")
 class ALS private (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index ead9f5b300375..47bb1fa9127a6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -24,7 +24,6 @@ import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.pmml.PMMLExportable
 import org.apache.spark.mllib.regression.impl.GLMRegressionModel
 import org.apache.spark.mllib.util.{Loader, Saveable}
-import org.apache.spark.rdd.RDD
 
 /**
  * Regression model trained using Lasso.
@@ -99,117 +98,7 @@ class LassoWithSGD private[mllib] (
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
 
-  /**
-   * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
-   * regParam: 0.01, miniBatchFraction: 1.0}.
-   */
-  @Since("0.8.0")
-  @deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
-    "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-  def this() = this(1.0, 100, 0.01, 1.0)
-
   override protected def createModel(weights: Vector, intercept: Double) = {
     new LassoModel(weights, intercept)
   }
 }
-
-/**
- * Top-level methods for calling Lasso.
- *
- */
-@Since("0.8.0")
-@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
-  "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-object LassoWithSGD {
-
-  /**
-   * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used
-   * in gradient descent are initialized using the initial weights provided.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size scaling to be used for the iterations of gradient descent.
-   * @param regParam Regularization parameter.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
-   *        the number of features in the data.
-   *
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double,
-      miniBatchFraction: Double,
-      initialWeights: Vector): LassoModel = {
-    new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction)
-      .run(input, initialWeights)
-  }
-
-  /**
-   * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param regParam Regularization parameter.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double,
-      miniBatchFraction: Double): LassoModel = {
-    new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input)
-  }
-
-  /**
-   * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. We use the entire data set to
-   * update the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param stepSize Step size to be used for each iteration of Gradient Descent.
-   * @param regParam Regularization parameter.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LassoModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double): LassoModel = {
-    train(input, numIterations, stepSize, regParam, 1.0)
-  }
-
-  /**
-   * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LassoModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int): LassoModel = {
-    train(input, numIterations, 1.0, 0.01, 1.0)
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index cb08216fbf690..f68ebc17e294d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -24,7 +24,6 @@ import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.pmml.PMMLExportable
 import org.apache.spark.mllib.regression.impl.GLMRegressionModel
 import org.apache.spark.mllib.util.{Loader, Saveable}
-import org.apache.spark.rdd.RDD
 
 /**
  * Regression model trained using LinearRegression.
@@ -100,109 +99,8 @@ class LinearRegressionWithSGD private[mllib] (
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
 
-  /**
-   * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
-   * numIterations: 100, miniBatchFraction: 1.0}.
-   */
-  @Since("0.8.0")
-  @deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
-  def this() = this(1.0, 100, 0.0, 1.0)
-
   override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
     new LinearRegressionModel(weights, intercept)
   }
 }
 
-/**
- * Top-level methods for calling LinearRegression.
- *
- */
-@Since("0.8.0")
-@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
-object LinearRegressionWithSGD {
-
-  /**
-   * Train a Linear Regression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used
-   * in gradient descent are initialized using the initial weights provided.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
-   *        the number of features in the data.
-   *
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double,
-      initialWeights: Vector): LinearRegressionModel = {
-    new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction)
-      .run(input, initialWeights)
-  }
-
-  /**
-   * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double): LinearRegressionModel = {
-    new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run(input)
-  }
-
-  /**
-   * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param stepSize Step size to be used for each iteration of Gradient Descent.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LinearRegressionModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double): LinearRegressionModel = {
-    train(input, numIterations, stepSize, 1.0)
-  }
-
-  /**
-   * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LinearRegressionModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int): LinearRegressionModel = {
-    train(input, numIterations, 1.0, 1.0)
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 43c3154dd053b..1c3bdceab1d14 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -24,8 +24,6 @@ import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.pmml.PMMLExportable
 import org.apache.spark.mllib.regression.impl.GLMRegressionModel
 import org.apache.spark.mllib.util.{Loader, Saveable}
-import org.apache.spark.rdd.RDD
-
 
 /**
  * Regression model trained using RidgeRegression.
@@ -100,113 +98,7 @@ class RidgeRegressionWithSGD private[mllib] (
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
 
-  /**
-   * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
-   * regParam: 0.01, miniBatchFraction: 1.0}.
-   */
-  @Since("0.8.0")
-  @deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
-    "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-  def this() = this(1.0, 100, 0.01, 1.0)
-
   override protected def createModel(weights: Vector, intercept: Double) = {
     new RidgeRegressionModel(weights, intercept)
   }
 }
-
-/**
- * Top-level methods for calling RidgeRegression.
- *
- */
-@Since("0.8.0")
-@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
-  "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-object RidgeRegressionWithSGD {
-
-  /**
-   * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used
-   * in gradient descent are initialized using the initial weights provided.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param regParam Regularization parameter.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
-   *        the number of features in the data.
-   *
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double,
-      miniBatchFraction: Double,
-      initialWeights: Vector): RidgeRegressionModel = {
-    new RidgeRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(
-      input, initialWeights)
-  }
-
-  /**
-   * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param regParam Regularization parameter.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double,
-      miniBatchFraction: Double): RidgeRegressionModel = {
-    new RidgeRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input)
-  }
-
-  /**
-   * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param stepSize Step size to be used for each iteration of Gradient Descent.
-   * @param regParam Regularization parameter.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a RidgeRegressionModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double): RidgeRegressionModel = {
-    train(input, numIterations, stepSize, regParam, 1.0)
-  }
-
-  /**
-   * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a RidgeRegressionModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int): RidgeRegressionModel = {
-    train(input, numIterations, 1.0, 0.01, 1.0)
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 6d510e1633d67..00c1da66d7df6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -95,25 +95,23 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S
     val localNumNonzeros = nnz
     val localCurrMax = currMax
     val localCurrMin = currMin
-    instance.foreachActive { (index, value) =>
-      if (value != 0.0) {
-        if (localCurrMax(index) < value) {
-          localCurrMax(index) = value
-        }
-        if (localCurrMin(index) > value) {
-          localCurrMin(index) = value
-        }
+    instance.foreachNonZero { (index, value) =>
+      if (localCurrMax(index) < value) {
+        localCurrMax(index) = value
+      }
+      if (localCurrMin(index) > value) {
+        localCurrMin(index) = value
+      }
 
-        val prevMean = localCurrMean(index)
-        val diff = value - prevMean
-        localCurrMean(index) = prevMean + weight * diff / (localWeightSum(index) + weight)
-        localCurrM2n(index) += weight * (value - localCurrMean(index)) * diff
-        localCurrM2(index) += weight * value * value
-        localCurrL1(index) += weight * math.abs(value)
+      val prevMean = localCurrMean(index)
+      val diff = value - prevMean
+      localCurrMean(index) = prevMean + weight * diff / (localWeightSum(index) + weight)
+      localCurrM2n(index) += weight * (value - localCurrMean(index)) * diff
+      localCurrM2(index) += weight * value * value
+      localCurrL1(index) += weight * math.abs(value)
 
-        localWeightSum(index) += weight
-        localNumNonzeros(index) += 1
-      }
+      localWeightSum(index) += weight
+      localNumNonzeros(index) += 1
     }
 
     totalWeightSum += weight
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
index 5ebbfb2b6298d..d5f34c2630a45 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala
@@ -21,6 +21,7 @@ import scala.annotation.varargs
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD}
+import org.apache.spark.ml.stat._
 import org.apache.spark.mllib.linalg.{Matrix, Vector}
 import org.apache.spark.mllib.linalg.distributed.RowMatrix
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -46,6 +47,21 @@ object Statistics {
     new RowMatrix(X).computeColumnSummaryStatistics()
   }
 
+  /**
+   * Computes required column-wise summary statistics for the input RDD[(Vector, Double)].
+   *
+   * @param X an RDD containing vectors and weights for which column-wise summary statistics
+   *          are to be computed.
+   * @return [[SummarizerBuffer]] object containing column-wise summary statistics.
+   */
+  private[mllib] def colStats(X: RDD[(Vector, Double)], requested: Seq[String]) = {
+    X.treeAggregate(Summarizer.createSummarizerBuffer(requested: _*))(
+      seqOp = { case (c, (v, w)) => c.add(v.nonZeroIterator, v.size, w) },
+      combOp = { case (c1, c2) => c1.merge(c2) },
+      depth = 2
+    )
+  }
+
   /**
    * Compute the Pearson correlation matrix for the input RDD of Vectors.
    * Columns with 0 covariance produce NaN entries in the correlation matrix.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
index 9a746dcf35556..f34c22915ae15 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala
@@ -60,7 +60,9 @@ class MultivariateGaussian @Since("1.3.0") (
    *    rootSigmaInv = D^(-1/2)^ * U.t, where sigma = U * D * U.t
    *    u = log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^)
    */
-  @transient private lazy val (rootSigmaInv: DBM[Double], u: Double) = calculateCovarianceConstants
+  @transient private lazy val tuple = calculateCovarianceConstants
+  @transient private lazy val rootSigmaInv = tuple._1
+  @transient private lazy val u = tuple._2
 
   /**
    * Returns density of this multivariate Gaussian at given point, x
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index ee51248e53556..c2d6b787098dc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -150,7 +150,7 @@ private[spark] object ChiSqTest extends Logging {
    * Uniform distribution is assumed when `expected` is not passed in.
    */
   def chiSquared(observed: Vector,
-      expected: Vector = Vectors.dense(Array.empty[Double]),
+      expected: Vector = Vectors.dense(Array.emptyDoubleArray),
       methodName: String = PEARSON.name): ChiSqTestResult = {
 
     // Validate input arguments
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
index d24d8da0dab48..d57f1b36a572c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.tree
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.{LabeledPoint => NewLabeledPoint}
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.tree.impl.{GradientBoostedTrees => NewGBT}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.configuration.BoostingStrategy
@@ -67,8 +67,9 @@ class GradientBoostedTrees private[spark] (
   @Since("1.2.0")
   def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = {
     val algo = boostingStrategy.treeStrategy.algo
-    val (trees, treeWeights) = NewGBT.run(input.map { point =>
-      NewLabeledPoint(point.label, point.features.asML)
+    val (trees, treeWeights) = NewGBT.run(input.map {
+      case LabeledPoint(label, features) =>
+        Instance(label, 1.0, features.asML)
     }, boostingStrategy, seed.toLong, "all")
     new GradientBoostedTreesModel(algo, trees.map(_.toOld), treeWeights)
   }
@@ -97,10 +98,12 @@ class GradientBoostedTrees private[spark] (
       input: RDD[LabeledPoint],
       validationInput: RDD[LabeledPoint]): GradientBoostedTreesModel = {
     val algo = boostingStrategy.treeStrategy.algo
-    val (trees, treeWeights) = NewGBT.runWithValidation(input.map { point =>
-      NewLabeledPoint(point.label, point.features.asML)
-    }, validationInput.map { point =>
-      NewLabeledPoint(point.label, point.features.asML)
+    val (trees, treeWeights) = NewGBT.runWithValidation(input.map {
+      case LabeledPoint(label, features) =>
+        Instance(label, 1.0, features.asML)
+    }, validationInput.map {
+      case LabeledPoint(label, features) =>
+        Instance(label, 1.0, features.asML)
     }, boostingStrategy, seed.toLong, "all")
     new GradientBoostedTreesModel(algo, trees.map(_.toOld), treeWeights)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
index 94224be80752a..4f0c51e293319 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala
@@ -23,7 +23,6 @@ import scala.util.Try
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.tree.{DecisionTreeModel => NewDTModel, TreeEnsembleParams => NewRFParams}
 import org.apache.spark.ml.tree.impl.{RandomForest => NewRandomForest}
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -92,8 +91,14 @@ private class RandomForest (
    * @return RandomForestModel that can be used for prediction.
    */
   def run(input: RDD[LabeledPoint]): RandomForestModel = {
+    val treeStrategy = strategy.copy
+    if (numTrees == 1) {
+      treeStrategy.bootstrap = false
+    } else {
+      treeStrategy.bootstrap = true
+    }
     val trees: Array[NewDTModel] =
-      NewRandomForest.run(input, strategy, numTrees, featureSubsetStrategy, seed.toLong)
+      NewRandomForest.run(input, treeStrategy, numTrees, featureSubsetStrategy, seed.toLong)
     new RandomForestModel(strategy.algo, trees.map(_.toOld))
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index d9dcb80013400..09e3e22030546 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -81,7 +81,8 @@ class Strategy @Since("1.3.0") (
     @Since("1.2.0") @BeanProperty var subsamplingRate: Double = 1,
     @Since("1.2.0") @BeanProperty var useNodeIdCache: Boolean = false,
     @Since("1.2.0") @BeanProperty var checkpointInterval: Int = 10,
-    @Since("3.0.0") @BeanProperty var minWeightFractionPerNode: Double = 0.0) extends Serializable {
+    @Since("3.0.0") @BeanProperty var minWeightFractionPerNode: Double = 0.0,
+    @BeanProperty private[spark] var bootstrap: Boolean = false) extends Serializable {
 
   /**
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
index 7f84be9f37822..b6eb10e9de00a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -65,7 +65,7 @@ object KMeansDataGenerator {
   }
 
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 6) {
       // scalastyle:off println
       println("Usage: KMeansGenerator " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index 58fd010e4905f..30c77d2af8fbc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -77,7 +77,7 @@ object LinearDataGenerator {
       nPoints: Int,
       seed: Int,
       eps: Double = 0.1): Seq[LabeledPoint] = {
-    generateLinearInput(intercept, weights, Array.fill[Double](weights.length)(0.0),
+    generateLinearInput(intercept, weights, Array.ofDim[Double](weights.length),
       Array.fill[Double](weights.length)(1.0 / 3.0), nPoints, seed, eps)
   }
 
@@ -189,7 +189,7 @@ object LinearDataGenerator {
   }
 
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       // scalastyle:off println
       println("Usage: LinearDataGenerator " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
index 68835bc79677f..7e9d9465441c9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -65,7 +65,7 @@ object LogisticRegressionDataGenerator {
   }
 
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 5) {
       // scalastyle:off println
       println("Usage: LogisticRegressionGenerator " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 42c5bcdd39f76..7a308a5ec25c0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -54,7 +54,7 @@ import org.apache.spark.rdd.RDD
 @Since("0.8.0")
 object MFDataGenerator {
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       // scalastyle:off println
       println("Usage: MFDataGenerator " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 6d15a6bb01e4e..9198334ba02a1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -173,7 +173,7 @@ object MLUtils extends Logging {
    * @see `org.apache.spark.mllib.util.MLUtils.loadLibSVMFile`
    */
   @Since("1.0.0")
-  def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String) {
+  def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String): Unit = {
     // TODO: allow to specify label precision and feature precision.
     val dataStr = data.map { case LabeledPoint(label, features) =>
       val sb = new StringBuilder(label.toString)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
index c9468606544db..9f6ba025aedde 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -37,7 +37,7 @@ import org.apache.spark.rdd.RDD
 object SVMDataGenerator {
 
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       // scalastyle:off println
       println("Usage: SVMGenerator " +
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
index 004102103d52c..49ac49339415a 100644
--- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java
@@ -50,7 +50,7 @@ public void setUp() throws IOException {
   @Test
   public void logisticRegressionDefaultParams() {
     LogisticRegression lr = new LogisticRegression();
-    Assert.assertEquals(lr.getLabelCol(), "label");
+    Assert.assertEquals("label", lr.getLabelCol());
     LogisticRegressionModel model = lr.fit(dataset);
     model.transform(dataset).createOrReplaceTempView("prediction");
     Dataset<Row> predictions = spark.sql("SELECT label, probability, prediction FROM prediction");
@@ -119,8 +119,8 @@ public void logisticRegressionPredictorClassifierMethods() {
     for (Row row : trans1.collectAsList()) {
       Vector raw = (Vector) row.get(0);
       Vector prob = (Vector) row.get(1);
-      Assert.assertEquals(raw.size(), 2);
-      Assert.assertEquals(prob.size(), 2);
+      Assert.assertEquals(2, raw.size());
+      Assert.assertEquals(2, prob.size());
       double probFromRaw1 = 1.0 / (1.0 + Math.exp(-raw.apply(1)));
       Assert.assertEquals(0, Math.abs(prob.apply(1) - probFromRaw1), eps);
       Assert.assertEquals(0, Math.abs(prob.apply(0) - (1.0 - probFromRaw1)), eps);
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
index 6194167bda354..62888b85a0758 100644
--- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java
@@ -62,12 +62,12 @@ public void setUp() throws IOException {
   public void oneVsRestDefaultParams() {
     OneVsRest ova = new OneVsRest();
     ova.setClassifier(new LogisticRegression());
-    Assert.assertEquals(ova.getLabelCol(), "label");
-    Assert.assertEquals(ova.getPredictionCol(), "prediction");
+    Assert.assertEquals("label", ova.getLabelCol());
+    Assert.assertEquals("prediction", ova.getPredictionCol());
     OneVsRestModel ovaModel = ova.fit(dataset);
     Dataset<Row> predictions = ovaModel.transform(dataset).select("label", "prediction");
     predictions.collectAsList();
-    Assert.assertEquals(ovaModel.getLabelCol(), "label");
-    Assert.assertEquals(ovaModel.getPredictionCol(), "prediction");
+    Assert.assertEquals("label", ovaModel.getLabelCol());
+    Assert.assertEquals("prediction", ovaModel.getPredictionCol());
   }
 }
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
index b7956b6fd3e9a..69952f0b64ac2 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
@@ -20,7 +20,7 @@
 import java.util.Arrays;
 import java.util.List;
 
-import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D;
+import org.jtransforms.dct.DoubleDCT_1D;
 
 import org.junit.Assert;
 import org.junit.Test;
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
index 57696d0150a8b..71c644553c4a7 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
@@ -64,7 +64,7 @@ public void hashingTF() {
     Dataset<Row> rescaledData = idfModel.transform(featurizedData);
     for (Row r : rescaledData.select("features", "label").takeAsList(3)) {
       Vector features = r.getAs(0);
-      Assert.assertEquals(features.size(), numFeatures);
+      Assert.assertEquals(numFeatures, features.size());
     }
   }
 }
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
index ca8fae3a48b9d..cf5308bac3c37 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java
@@ -47,9 +47,9 @@ public void vectorIndexerAPI() {
       .setOutputCol("indexed")
       .setMaxCategories(2);
     VectorIndexerModel model = indexer.fit(data);
-    Assert.assertEquals(model.numFeatures(), 2);
+    Assert.assertEquals(2, model.numFeatures());
     Map<Integer, Map<Double, Integer>> categoryMaps = model.javaCategoryMaps();
-    Assert.assertEquals(categoryMaps.size(), 1);
+    Assert.assertEquals(1, categoryMaps.size());
     Dataset<Row> indexedData = model.transform(data);
   }
 }
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java
index 3dc2e1f896143..b9bca9d5a3be3 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorSlicerSuite.java
@@ -63,7 +63,7 @@ public void vectorSlice() {
 
     for (Row r : output.select("userFeatures", "features").takeAsList(2)) {
       Vector features = r.getAs(1);
-      Assert.assertEquals(features.size(), 2);
+      Assert.assertEquals(2, features.size());
     }
   }
 }
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
index d0a849fd11c7e..f6041e0528719 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java
@@ -53,7 +53,7 @@ public void testJavaWord2Vec() {
 
     for (Row r : result.select("result").collectAsList()) {
       double[] polyFeatures = ((Vector) r.get(0)).toArray();
-      Assert.assertEquals(polyFeatures.length, 3);
+      Assert.assertEquals(3, polyFeatures.length);
     }
   }
 }
diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java
index 1077e103a3b89..5dae65c6e50ab 100644
--- a/mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaParamsSuite.java
@@ -30,10 +30,10 @@ public class JavaParamsSuite {
   @Test
   public void testParams() {
     JavaTestParams testParams = new JavaTestParams();
-    Assert.assertEquals(testParams.getMyIntParam(), 1);
+    Assert.assertEquals(1, testParams.getMyIntParam());
     testParams.setMyIntParam(2).setMyDoubleParam(0.4).setMyStringParam("a");
-    Assert.assertEquals(testParams.getMyDoubleParam(), 0.4, 0.0);
-    Assert.assertEquals(testParams.getMyStringParam(), "a");
+    Assert.assertEquals(0.4, testParams.getMyDoubleParam(), 0.0);
+    Assert.assertEquals("a", testParams.getMyStringParam());
     Assert.assertArrayEquals(testParams.getMyDoubleArrayParam(), new double[]{1.0, 2.0}, 0.0);
   }
 
diff --git a/mllib/src/test/java/org/apache/spark/ml/stat/JavaKolmogorovSmirnovTestSuite.java b/mllib/src/test/java/org/apache/spark/ml/stat/JavaKolmogorovSmirnovTestSuite.java
index 830f668fe07b8..9037f6b854724 100644
--- a/mllib/src/test/java/org/apache/spark/ml/stat/JavaKolmogorovSmirnovTestSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/stat/JavaKolmogorovSmirnovTestSuite.java
@@ -23,6 +23,7 @@
 
 import org.apache.commons.math3.distribution.NormalDistribution;
 import org.apache.spark.sql.Encoders;
+import org.junit.Assert;
 import org.junit.Test;
 
 import org.apache.spark.SharedSparkSession;
@@ -60,7 +61,7 @@ public void testKSTestCDF() {
       .test(dataset, "sample", stdNormalCDF).head();
     double pValue1 = results.getDouble(0);
     // Cannot reject null hypothesis
-    assert(pValue1 > pThreshold);
+    Assert.assertTrue(pValue1 > pThreshold);
   }
 
   @Test
@@ -72,6 +73,6 @@ public void testKSTestNamedDistribution() {
             .test(dataset, "sample", "norm", 0.0, 1.0).head();
     double pValue1 = results.getDouble(0);
     // Cannot reject null hypothesis
-    assert(pValue1 > pThreshold);
+    Assert.assertTrue(pValue1 > pThreshold);
   }
 }
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
index c04e2e69541ba..208a5aaa2bb15 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
@@ -50,11 +50,8 @@ public void runLRUsingConstructor() {
     List<LabeledPoint> validationData =
       LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 17);
 
-    LogisticRegressionWithSGD lrImpl = new LogisticRegressionWithSGD();
+    LogisticRegressionWithSGD lrImpl = new LogisticRegressionWithSGD(1.0, 100, 1.0, 1.0);
     lrImpl.setIntercept(true);
-    lrImpl.optimizer().setStepSize(1.0)
-      .setRegParam(1.0)
-      .setNumIterations(100);
     LogisticRegressionModel model = lrImpl.run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
@@ -72,8 +69,8 @@ public void runLRUsingStaticMethods() {
     List<LabeledPoint> validationData =
       LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 17);
 
-    LogisticRegressionModel model = LogisticRegressionWithSGD.train(
-      testRDD.rdd(), 100, 1.0, 1.0);
+    LogisticRegressionModel model = new LogisticRegressionWithSGD(1.0, 100, 0.01, 1.0)
+        .run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
     Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java
index bf76719937772..51313f4fb581a 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaGaussianMixtureSuite.java
@@ -42,7 +42,7 @@ public void runGaussianMixture() {
     JavaRDD<Vector> data = jsc.parallelize(points, 2);
     GaussianMixtureModel model = new GaussianMixture().setK(2).setMaxIterations(1).setSeed(1234)
       .run(data);
-    assertEquals(model.gaussians().length, 2);
+    assertEquals(2, model.gaussians().length);
     JavaRDD<Integer> predictions = model.predict(data);
     predictions.first();
   }
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
index 270e636f82117..a9a8b7f2b88d6 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
@@ -42,11 +42,11 @@ public void runKMeansUsingStaticMethods() {
     Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
 
     JavaRDD<Vector> data = jsc.parallelize(points, 2);
-    KMeansModel model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.K_MEANS_PARALLEL());
+    KMeansModel model = KMeans.train(data.rdd(), 1, 1, KMeans.K_MEANS_PARALLEL());
     assertEquals(1, model.clusterCenters().length);
     assertEquals(expectedCenter, model.clusterCenters()[0]);
 
-    model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM());
+    model = KMeans.train(data.rdd(), 1, 1, KMeans.RANDOM());
     assertEquals(expectedCenter, model.clusterCenters()[0]);
   }
 
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
index 1458cc72bc17f..35ad24bc2a84f 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
@@ -51,10 +51,7 @@ public void runLassoUsingConstructor() {
     List<LabeledPoint> validationData =
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
 
-    LassoWithSGD lassoSGDImpl = new LassoWithSGD();
-    lassoSGDImpl.optimizer().setStepSize(1.0)
-      .setRegParam(0.01)
-      .setNumIterations(20);
+    LassoWithSGD lassoSGDImpl = new LassoWithSGD(1.0, 20, 0.01, 1.0);
     LassoModel model = lassoSGDImpl.run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
@@ -72,7 +69,7 @@ public void runLassoUsingStaticMethods() {
     List<LabeledPoint> validationData =
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
 
-    LassoModel model = LassoWithSGD.train(testRDD.rdd(), 100, 1.0, 0.01, 1.0);
+    LassoModel model = new LassoWithSGD(1.0, 100, 0.01, 1.0).run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
     Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
index 86c723aa00746..7e87588c4f0f6 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
@@ -33,7 +33,7 @@ private static int validatePrediction(
       List<LabeledPoint> validationData, LinearRegressionModel model) {
     int numAccurate = 0;
     for (LabeledPoint point : validationData) {
-      Double prediction = model.predict(point.features());
+      double prediction = model.predict(point.features());
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       if (Math.abs(prediction - point.label()) <= 0.5) {
         numAccurate++;
@@ -53,7 +53,7 @@ public void runLinearRegressionUsingConstructor() {
     List<LabeledPoint> validationData =
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
 
-    LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD();
+    LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0);
     linSGDImpl.setIntercept(true);
     LinearRegressionModel model = linSGDImpl.run(testRDD.rdd());
 
@@ -72,7 +72,8 @@ public void runLinearRegressionUsingStaticMethods() {
     List<LabeledPoint> validationData =
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
 
-    LinearRegressionModel model = LinearRegressionWithSGD.train(testRDD.rdd(), 100);
+    LinearRegressionModel model = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0)
+        .run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
     Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
@@ -85,7 +86,7 @@ public void testPredictJavaRDD() {
     double[] weights = {10, 10};
     JavaRDD<LabeledPoint> testRDD = jsc.parallelize(
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache();
-    LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD();
+    LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0);
     LinearRegressionModel model = linSGDImpl.run(testRDD.rdd());
     JavaRDD<Vector> vectors = testRDD.map(LabeledPoint::features);
     JavaRDD<Double> predictions = model.predict(vectors);
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
index 5a9389c424b44..63441950cd18f 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -34,7 +34,7 @@ private static double predictionError(List<LabeledPoint> validationData,
                                         RidgeRegressionModel model) {
     double errorSum = 0;
     for (LabeledPoint point : validationData) {
-      Double prediction = model.predict(point.features());
+      double prediction = model.predict(point.features());
       errorSum += (prediction - point.label()) * (prediction - point.label());
     }
     return errorSum / validationData.size();
@@ -60,11 +60,7 @@ public void runRidgeRegressionUsingConstructor() {
             new ArrayList<>(data.subList(0, numExamples)));
     List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);
 
-    RidgeRegressionWithSGD ridgeSGDImpl = new RidgeRegressionWithSGD();
-    ridgeSGDImpl.optimizer()
-      .setStepSize(1.0)
-      .setRegParam(0.0)
-      .setNumIterations(200);
+    RidgeRegressionWithSGD ridgeSGDImpl = new RidgeRegressionWithSGD(1.0, 200, 0.0, 1.0);
     RidgeRegressionModel model = ridgeSGDImpl.run(testRDD.rdd());
     double unRegularizedErr = predictionError(validationData, model);
 
@@ -85,10 +81,12 @@ public void runRidgeRegressionUsingStaticMethods() {
             new ArrayList<>(data.subList(0, numExamples)));
     List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);
 
-    RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0);
+    RidgeRegressionModel model = new RidgeRegressionWithSGD(1.0, 200, 0.0, 1.0)
+        .run(testRDD.rdd());
     double unRegularizedErr = predictionError(validationData, model);
 
-    model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.1);
+    model = new RidgeRegressionWithSGD(1.0, 200, 0.1, 1.0)
+        .run(testRDD.rdd());
     double regularizedErr = predictionError(validationData, model);
 
     Assert.assertTrue(regularizedErr < unRegularizedErr);
diff --git a/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/._SUCCESS.crc
new file mode 100644
index 0000000000000..3b7b044936a89
Binary files /dev/null and b/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/._SUCCESS.crc differ
diff --git a/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/.part-00000.crc
new file mode 100644
index 0000000000000..330943d39c838
Binary files /dev/null and b/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/.part-00000.crc differ
diff --git a/mllib/src/test/resources/test-data/hashingTF-pre3.0/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/_SUCCESS
similarity index 100%
rename from mllib/src/test/resources/test-data/hashingTF-pre3.0/metadata/_SUCCESS
rename to mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/_SUCCESS
diff --git a/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/part-00000 b/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/part-00000
new file mode 100644
index 0000000000000..1c36ce6a65f92
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/hashingTF-2.4.4/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.feature.HashingTF","timestamp":1577833408759,"sparkVersion":"2.4.4","uid":"hashingTF_f4565fe7f7da","paramMap":{"numFeatures":100,"outputCol":"features","inputCol":"words","binary":true},"defaultParamMap":{"numFeatures":262144,"outputCol":"hashingTF_f4565fe7f7da__output","binary":false}}
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/mlp-2.4.4/data/._SUCCESS.crc
new file mode 100644
index 0000000000000..3b7b044936a89
Binary files /dev/null and b/mllib/src/test/resources/ml-models/mlp-2.4.4/data/._SUCCESS.crc differ
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/data/.part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/mlp-2.4.4/data/.part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000..7d14cc322cf38
Binary files /dev/null and b/mllib/src/test/resources/ml-models/mlp-2.4.4/data/.part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet.crc differ
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/data/_SUCCESS b/mllib/src/test/resources/ml-models/mlp-2.4.4/data/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/data/part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet b/mllib/src/test/resources/ml-models/mlp-2.4.4/data/part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet
new file mode 100644
index 0000000000000..bf6896afa0c96
Binary files /dev/null and b/mllib/src/test/resources/ml-models/mlp-2.4.4/data/part-00000-fa18aaf6-d8df-4b90-8231-eb5f6ac12138-c000.snappy.parquet differ
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/._SUCCESS.crc
new file mode 100644
index 0000000000000..3b7b044936a89
Binary files /dev/null and b/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/._SUCCESS.crc differ
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/.part-00000.crc
new file mode 100644
index 0000000000000..322e18cad023a
Binary files /dev/null and b/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/.part-00000.crc differ
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/part-00000 b/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/part-00000
new file mode 100644
index 0000000000000..59798f8e55526
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/mlp-2.4.4/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel","timestamp":1577833765310,"sparkVersion":"2.4.4","uid":"mlpc_30aa2f44dacc","paramMap":{},"defaultParamMap":{"rawPredictionCol":"rawPrediction","predictionCol":"prediction","probabilityCol":"probability","labelCol":"label","featuresCol":"features"}}
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/._SUCCESS.crc
new file mode 100644
index 0000000000000..3b7b044936a89
Binary files /dev/null and b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/._SUCCESS.crc differ
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/.part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/.part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000..ce4d20f391d63
Binary files /dev/null and b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/.part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet.crc differ
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/_SUCCESS b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/test-data/strIndexerModel/data/part-00000-cfefeb56-2980-4c42-b8a7-a5a94265c479-c000.snappy.parquet b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet
similarity index 66%
rename from mllib/src/test/resources/test-data/strIndexerModel/data/part-00000-cfefeb56-2980-4c42-b8a7-a5a94265c479-c000.snappy.parquet
rename to mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet
index 917984c2608be..c09eddfc3b9dd 100644
Binary files a/mllib/src/test/resources/test-data/strIndexerModel/data/part-00000-cfefeb56-2980-4c42-b8a7-a5a94265c479-c000.snappy.parquet and b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/data/part-00000-f09b03f6-6e17-4756-b9ca-c5e505dcd898-c000.snappy.parquet differ
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/._SUCCESS.crc
new file mode 100644
index 0000000000000..3b7b044936a89
Binary files /dev/null and b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/._SUCCESS.crc differ
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/.part-00000.crc
new file mode 100644
index 0000000000000..24a9b178030ee
Binary files /dev/null and b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/.part-00000.crc differ
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/part-00000 b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/part-00000
new file mode 100644
index 0000000000000..6261108fbd009
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/strIndexerModel-2.4.4/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.feature.StringIndexerModel","timestamp":1577831053235,"sparkVersion":"2.4.4","uid":"myStringIndexerModel","paramMap":{"inputCol":"myInputCol","outputCol":"myOutputCol","handleInvalid":"skip"},"defaultParamMap":{"outputCol":"myStringIndexerModel__output","handleInvalid":"error"}}
diff --git a/mllib/src/test/resources/test-data/hashingTF-pre3.0/metadata/.part-00000.crc b/mllib/src/test/resources/test-data/hashingTF-pre3.0/metadata/.part-00000.crc
deleted file mode 100644
index 1ac377ac0dac4..0000000000000
Binary files a/mllib/src/test/resources/test-data/hashingTF-pre3.0/metadata/.part-00000.crc and /dev/null differ
diff --git a/mllib/src/test/resources/test-data/hashingTF-pre3.0/metadata/part-00000 b/mllib/src/test/resources/test-data/hashingTF-pre3.0/metadata/part-00000
deleted file mode 100644
index 492a07ae06d5a..0000000000000
--- a/mllib/src/test/resources/test-data/hashingTF-pre3.0/metadata/part-00000
+++ /dev/null
@@ -1 +0,0 @@
-{"class":"org.apache.spark.ml.feature.HashingTF","timestamp":1564446310495,"sparkVersion":"2.3.0-SNAPSHOT","uid":"hashingTF_8ced2ab477c1","paramMap":{"binary":true,"numFeatures":100,"outputCol":"features","inputCol":"words"}}
diff --git a/mllib/src/test/resources/test-data/strIndexerModel/metadata/part-00000 b/mllib/src/test/resources/test-data/strIndexerModel/metadata/part-00000
deleted file mode 100644
index 5650199c36dca..0000000000000
--- a/mllib/src/test/resources/test-data/strIndexerModel/metadata/part-00000
+++ /dev/null
@@ -1 +0,0 @@
-{"class":"org.apache.spark.ml.feature.StringIndexerModel","timestamp":1545536052048,"sparkVersion":"2.4.1-SNAPSHOT","uid":"strIdx_056bb5da1bf2","paramMap":{"outputCol":"index","inputCol":"str"},"defaultParamMap":{"outputCol":"strIdx_056bb5da1bf2__output","stringOrderType":"frequencyDesc","handleInvalid":"error"}}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
new file mode 100644
index 0000000000000..3dd9a7d8ec85d
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+import org.apache.spark.SparkException
+import org.apache.spark.ml.functions.vector_to_array
+import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.util.MLTest
+import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
+import org.apache.spark.sql.functions.col
+
+class FunctionsSuite extends MLTest {
+
+  import testImplicits._
+
+  test("test vector_to_array") {
+    val df = Seq(
+      (Vectors.dense(1.0, 2.0, 3.0), OldVectors.dense(10.0, 20.0, 30.0)),
+      (Vectors.sparse(3, Seq((0, 2.0), (2, 3.0))), OldVectors.sparse(3, Seq((0, 20.0), (2, 30.0))))
+    ).toDF("vec", "oldVec")
+
+    val result = df.select(vector_to_array('vec), vector_to_array('oldVec))
+                   .as[(Seq[Double], Seq[Double])].collect().toSeq
+
+    val expected = Seq(
+      (Seq(1.0, 2.0, 3.0), Seq(10.0, 20.0, 30.0)),
+      (Seq(2.0, 0.0, 3.0), Seq(20.0, 0.0, 30.0))
+    )
+    assert(result === expected)
+
+    val df2 = Seq(
+      (Vectors.dense(1.0, 2.0, 3.0),
+       OldVectors.dense(10.0, 20.0, 30.0), 1),
+      (null, null, 0)
+    ).toDF("vec", "oldVec", "label")
+
+    for ((colName, valType) <- Seq(
+        ("vec", "null"), ("oldVec", "null"), ("label", "java.lang.Integer"))) {
+      val thrown1 = intercept[SparkException] {
+        df2.select(vector_to_array(col(colName))).count
+      }
+      assert(thrown1.getCause.getMessage.contains(
+        "function vector_to_array requires a non-null input argument and input type must be " +
+        "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " +
+        s"but got ${valType}"))
+    }
+
+    val df3 = Seq(
+      (Vectors.dense(1.0, 2.0, 3.0), OldVectors.dense(10.0, 20.0, 30.0)),
+      (Vectors.sparse(3, Seq((0, 2.0), (2, 3.0))), OldVectors.sparse(3, Seq((0, 20.0), (2, 30.0))))
+    ).toDF("vec", "oldVec")
+    val dfArrayFloat = df3.select(
+      vector_to_array('vec, dtype = "float32"), vector_to_array('oldVec, dtype = "float32"))
+
+    // Check values are correct
+    val result3 = dfArrayFloat.as[(Seq[Float], Seq[Float])].collect().toSeq
+
+    val expected3 = Seq(
+      (Seq(1.0, 2.0, 3.0), Seq(10.0, 20.0, 30.0)),
+      (Seq(2.0, 0.0, 3.0), Seq(20.0, 0.0, 30.0))
+    )
+    assert(result3 === expected3)
+
+    // Check data types are correct
+    assert(dfArrayFloat.schema.simpleString ===
+      "struct<UDF(vec):array<float>,UDF(oldVec):array<float>>")
+
+    val thrown2 = intercept[IllegalArgumentException] {
+      df3.select(
+        vector_to_array('vec, dtype = "float16"), vector_to_array('oldVec, dtype = "float16"))
+    }
+    assert(thrown2.getMessage.contains(
+      s"Unsupported dtype: float16. Valid values: float64, float32."))
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
index e2ee7c05ab399..f2343b7a88560 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
@@ -25,7 +25,7 @@ import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.when
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar.mock
+import org.scalatestplus.mockito.MockitoSugar.mock
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.param.ParamMap
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
index 1183cb0617610..e6025a5a53ca6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
@@ -22,7 +22,7 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.Path
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar.mock
+import org.scalatestplus.mockito.MockitoSugar.mock
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.Pipeline.SharedReadWrite
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
index be52d99e54d3b..1a258ded7adcd 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
@@ -133,7 +133,7 @@ object ClassifierSuite {
 
     def this() = this(Identifiable.randomUID("mockclassificationmodel"))
 
-    protected def predictRaw(features: Vector): Vector = throw new UnsupportedOperationException()
+    def predictRaw(features: Vector): Vector = throw new UnsupportedOperationException()
 
     override def copy(extra: ParamMap): MockClassificationModel =
       throw new UnsupportedOperationException()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index 9f2053dcc91fc..d1ade85cea049 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -44,7 +44,7 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
 
   private val seed = 42
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     categoricalDataPointsRDD =
       sc.parallelize(OldDecisionTreeSuite.generateCategoricalDataPoints()).map(_.asML)
@@ -249,6 +249,13 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
 
     val newData: DataFrame = TreeTests.setMetadata(rdd, categoricalFeatures, numClasses)
     val newTree = dt.fit(newData)
+    newTree.setLeafCol("predictedLeafId")
+
+    val transformed = newTree.transform(newData)
+    checkNominalOnDF(transformed, "prediction", newTree.numClasses)
+    checkNominalOnDF(transformed, "predictedLeafId", newTree.numLeave)
+    checkVectorSizeOnDF(transformed, "rawPrediction", newTree.numClasses)
+    checkVectorSizeOnDF(transformed, "probability", newTree.numClasses)
 
     MLTestingUtils.checkCopyAndUids(dt, newTree)
 
@@ -279,6 +286,8 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
     val newTree = dt.fit(newData)
 
     testPredictionModelSinglePrediction(newTree, newData)
+    testClassificationModelSingleRawPrediction(newTree, newData)
+    testProbClassificationModelSingleProbPrediction(newTree, newData)
   }
 
   test("training with 1-category categorical feature") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
new file mode 100644
index 0000000000000..d477049824b19
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
@@ -0,0 +1,242 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.classification
+
+import org.apache.spark.ml.classification.LogisticRegressionSuite.generateLogisticInput
+import org.apache.spark.ml.linalg._
+import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.regression.FMRegressorSuite._
+import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.TestingUtils._
+import org.apache.spark.sql.{DataFrame, Row}
+
+class FMClassifierSuite extends MLTest with DefaultReadWriteTest {
+
+  import testImplicits._
+
+  private val seed = 42
+  @transient var smallBinaryDataset: DataFrame = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    smallBinaryDataset = generateLogisticInput(1.0, 1.0, nPoints = 100, seed = seed).toDF()
+  }
+
+  test("params") {
+    ParamsSuite.checkParams(new FMClassifier)
+    val model = new FMClassificationModel("fmc_test", 0.0, Vectors.dense(0.0),
+      new DenseMatrix(1, 8, new Array[Double](8)))
+    ParamsSuite.checkParams(model)
+  }
+
+  test("FMClassifier: Predictor, Classifier methods") {
+    val sqlContext = smallBinaryDataset.sqlContext
+    import sqlContext.implicits._
+    val fm = new FMClassifier()
+
+    val model = fm.fit(smallBinaryDataset)
+    assert(model.numClasses === 2)
+    val numFeatures = smallBinaryDataset.select("features").first().getAs[Vector](0).size
+    assert(model.numFeatures === numFeatures)
+
+    testTransformer[(Double, Vector)](smallBinaryDataset.toDF(),
+      model, "rawPrediction", "probability", "prediction") {
+      case Row(raw: Vector, prob: Vector, pred: Double) =>
+        // Compare rawPrediction with probability
+        assert(raw.size === 2)
+        assert(prob.size === 2)
+        val probFromRaw1 = 1.0 / (1.0 + math.exp(-raw(1)))
+        assert(prob(1) ~== probFromRaw1 relTol 1E-6)
+        assert(prob(0) ~== 1.0 - probFromRaw1 relTol 1E-6)
+        // Compare prediction with probability
+        val predFromProb = prob.toArray.zipWithIndex.maxBy(_._1)._2
+        assert(pred == predFromProb)
+    }
+
+    ProbabilisticClassifierSuite.testPredictMethods[
+      Vector, FMClassificationModel](this, model, smallBinaryDataset)
+  }
+
+  def logLoss(modelRes: DataFrame): Double = {
+    modelRes.select("label", "probability").rdd.map {
+      case Row(label: Double, probability: DenseVector) =>
+        if (label > 0) -math.log(probability(1))
+        else -math.log(probability(0))
+    }.mean()
+  }
+
+  test("check logisticLoss with AdamW") {
+    // This testcase only tests whether the FM logloss part is valid and does not test the
+    // pairwise interaction logic. The pairwise interaction logic be tested in FMRegressor.
+    // When there is only one feature, FM will degenerate into LR. So two models will get
+    // almost same loss value.
+
+    val fm = new FMClassifier().setMaxIter(50)
+    val fmModel = fm.fit(smallBinaryDataset)
+    val fmRes = fmModel.transform(smallBinaryDataset)
+    val fmLogLoss = logLoss(fmRes)
+
+    /*
+      Use following code to fit the dataset, the resulting logloss is 0.4756465459065247.
+      val lr = new LogisticRegression()
+      val lrModel = lr.fit(smallBinaryDataset)
+      val lrRes = lrModel.transform(smallBinaryDataset)
+      val lrLogLoss = logLoss(lrRes)
+     */
+    assert(fmLogLoss ~== 0.4756465459065247 absTol 1E-3)
+  }
+
+  test("check logisticLoss with GD") {
+    val fm = new FMClassifier().setSolver("gd")
+    val fmModel = fm.fit(smallBinaryDataset)
+    val fmRes = fmModel.transform(smallBinaryDataset)
+    val fmLogLoss = logLoss(fmRes)
+    assert(fmLogLoss ~== 0.4756465459065247 absTol 1E-2)
+  }
+
+  test("sparse datasets") {
+    // test sparse input will not throw exception
+    val dataset = spark.createDataFrame(Array(
+      (1.0, Vectors.dense(Array(1.0, 2.0, 3.0))),
+      (0.0, Vectors.sparse(3, Array(0, 2), Array(-1.0, 2.0))),
+      (0.0, Vectors.sparse(3, Array.emptyIntArray, Array.emptyDoubleArray)),
+      (1.0, Vectors.sparse(3, Array(0, 1), Array(2.0, 3.0)))
+    )).toDF("label", "features")
+    val fm = new FMClassifier().setMaxIter(10)
+    fm.fit(dataset)
+  }
+
+  test("setThreshold, getThreshold") {
+    val fm = new FMClassifier()
+
+    // default
+    withClue("FMClassifier should not have thresholds set by default.") {
+      intercept[NoSuchElementException] {
+        fm.getThresholds
+      }
+    }
+
+    // Set via thresholds
+    val fm2 = new FMClassifier()
+    val threshold = Array(0.3, 0.7)
+    fm2.setThresholds(threshold)
+    assert(fm2.getThresholds === threshold)
+  }
+
+  test("thresholds prediction") {
+    val fm = new FMClassifier()
+    val df = smallBinaryDataset.toDF()
+    val fmModel = fm.fit(df)
+
+    // should predict all zeros
+    fmModel.setThresholds(Array(0.0, 1.0))
+    testTransformer[(Double, Vector)](df, fmModel, "prediction") {
+      case Row(prediction: Double) => prediction === 0.0
+    }
+
+    // should predict all ones
+    fmModel.setThresholds(Array(1.0, 0.0))
+    testTransformer[(Double, Vector)](df, fmModel, "prediction") {
+      case Row(prediction: Double) => prediction === 1.0
+    }
+
+    val fmBase = new FMClassifier()
+    val model = fmBase.fit(df)
+    val basePredictions = model.transform(df).select("prediction").collect()
+
+    // constant threshold scaling is the same as no thresholds
+    fmModel.setThresholds(Array(1.0, 1.0))
+    testTransformerByGlobalCheckFunc[(Double, Vector)](df, fmModel, "prediction") {
+      scaledPredictions: Seq[Row] =>
+        assert(scaledPredictions.zip(basePredictions).forall { case (scaled, base) =>
+          scaled.getDouble(0) === base.getDouble(0)
+        })
+    }
+
+    // force it to use the predict method
+    model.setRawPredictionCol("").setProbabilityCol("").setThresholds(Array(0, 1))
+    testTransformer[(Double, Vector)](df, model, "prediction") {
+      case Row(prediction: Double) => prediction === 0.0
+    }
+  }
+
+  test("FMClassifier doesn't fit intercept when fitIntercept is off") {
+    val fm = new FMClassifier().setFitIntercept(false)
+    val model = fm.fit(smallBinaryDataset)
+    assert(model.intercept === 0.0)
+  }
+
+  test("FMClassifier doesn't fit linear when fitLinear is off") {
+    val fm = new FMClassifier().setFitLinear(false)
+    val model = fm.fit(smallBinaryDataset)
+    assert(model.linear === Vectors.sparse(model.numFeatures, Seq.empty))
+  }
+
+  test("prediction on single instance") {
+    val fm = new FMClassifier()
+    val fmModel = fm.fit(smallBinaryDataset)
+    testPredictionModelSinglePrediction(fmModel, smallBinaryDataset)
+  }
+
+  test("read/write") {
+    def checkModelData(
+      model: FMClassificationModel,
+      model2: FMClassificationModel
+    ): Unit = {
+      assert(model.intercept === model2.intercept)
+      assert(model.linear.toArray === model2.linear.toArray)
+      assert(model.factors.toArray === model2.factors.toArray)
+      assert(model.numFeatures === model2.numFeatures)
+    }
+    val fm = new FMClassifier()
+    val data = smallBinaryDataset
+      .withColumnRenamed("features", allParamSettings("featuresCol").toString)
+      .withColumnRenamed("label", allParamSettings("labelCol").toString)
+    testEstimatorAndModelReadWrite(fm, data, allParamSettings,
+      allParamSettings, checkModelData)
+  }
+}
+
+object FMClassifierSuite {
+
+  /**
+   * Mapping from all Params to valid settings which differ from the defaults.
+   * This is useful for tests which need to exercise all Params, such as save/load.
+   * This excludes input columns to simplify some tests.
+   */
+  val allParamSettings: Map[String, Any] = Map(
+    "featuresCol" -> "myFeatures",
+    "labelCol" -> "myLabel",
+    "predictionCol" -> "prediction",
+    "rawPredictionCol" -> "rawPrediction",
+    "probabilityCol" -> "probability",
+    "factorSize" -> 4,
+    "fitIntercept" -> false,
+    "fitLinear" -> false,
+    "regParam" -> 0.01,
+    "miniBatchFraction" -> 0.1,
+    "initStd" -> 0.01,
+    "maxIter" -> 2,
+    "stepSize" -> 0.1,
+    "tol" -> 1e-4,
+    "solver" -> "gd",
+    "seed" -> 10L,
+    "thresholds" -> Array(0.4, 0.6)
+  )
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index 467f13f808a01..a2208edcb8394 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -20,7 +20,8 @@ package org.apache.spark.ml.classification
 import com.github.fommil.netlib.BLAS
 
 import org.apache.spark.{SparkException, SparkFunSuite}
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.classification.LinearSVCSuite.generateSVMInput
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.regression.DecisionTreeRegressionModel
@@ -52,10 +53,12 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
   private var data: RDD[LabeledPoint] = _
   private var trainData: RDD[LabeledPoint] = _
   private var validationData: RDD[LabeledPoint] = _
+  private var binaryDataset: DataFrame = _
   private val eps: Double = 1e-5
   private val absEps: Double = 1e-8
+  private val seed = 42
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     data = sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100), 2)
       .map(_.asML)
@@ -65,6 +68,7 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
     validationData =
       sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 20, 80), 2)
         .map(_.asML)
+    binaryDataset = generateSVMInput(0.01, Array[Double](-1.5, 1.0), 1000, seed).toDF()
   }
 
   test("params") {
@@ -175,7 +179,8 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
         assert(raw.size === 2)
         // check that raw prediction is tree predictions dot tree weights
         val treePredictions = gbtModel.trees.map(_.rootNode.predictImpl(features).prediction)
-        val prediction = blas.ddot(gbtModel.numTrees, treePredictions, 1, gbtModel.treeWeights, 1)
+        val prediction = blas.ddot(gbtModel.getNumTrees, treePredictions, 1,
+          gbtModel.treeWeights, 1)
         assert(raw ~== Vectors.dense(-prediction, prediction) relTol eps)
 
         // Compare rawPrediction with probability
@@ -202,6 +207,8 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
     val gbtModel = gbt.fit(trainingDataset)
 
     testPredictionModelSinglePrediction(gbtModel, trainingDataset)
+    testClassificationModelSingleRawPrediction(gbtModel, trainingDataset)
+    testProbClassificationModelSingleProbPrediction(gbtModel, trainingDataset)
   }
 
   test("GBT parameter stepSize should be in interval (0, 1]") {
@@ -362,7 +369,7 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
   test("Tests of feature subset strategy") {
     val numClasses = 2
     val gbt = new GBTClassifier()
-      .setSeed(42)
+      .setSeed(seed)
       .setMaxDepth(3)
       .setMaxIter(5)
       .setFeatureSubsetStrategy("all")
@@ -397,13 +404,15 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
       model3.trees.take(2), model3.treeWeights.take(2), model3.numFeatures, model3.numClasses)
 
     val evalArr = model3.evaluateEachIteration(validationData.toDF)
-    val remappedValidationData = validationData.map(
-      x => new LabeledPoint((x.label * 2) - 1, x.features))
-    val lossErr1 = GradientBoostedTrees.computeError(remappedValidationData,
+    val remappedValidationData = validationData.map {
+      case LabeledPoint(label, features) =>
+        Instance(label * 2 - 1, 1.0, features)
+    }
+    val lossErr1 = GradientBoostedTrees.computeWeightedError(remappedValidationData,
       model1.trees, model1.treeWeights, model1.getOldLossType)
-    val lossErr2 = GradientBoostedTrees.computeError(remappedValidationData,
+    val lossErr2 = GradientBoostedTrees.computeWeightedError(remappedValidationData,
       model2.trees, model2.treeWeights, model2.getOldLossType)
-    val lossErr3 = GradientBoostedTrees.computeError(remappedValidationData,
+    val lossErr3 = GradientBoostedTrees.computeWeightedError(remappedValidationData,
       model3.trees, model3.treeWeights, model3.getOldLossType)
 
     assert(evalArr(0) ~== lossErr1 relTol 1E-3)
@@ -428,34 +437,91 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
       gbt.setValidationIndicatorCol(validationIndicatorCol)
       val modelWithValidation = gbt.fit(trainDF.union(validationDF))
 
-      assert(modelWithoutValidation.numTrees === numIter)
+      assert(modelWithoutValidation.getNumTrees === numIter)
       // early stop
-      assert(modelWithValidation.numTrees < numIter)
+      assert(modelWithValidation.getNumTrees < numIter)
 
       val (errorWithoutValidation, errorWithValidation) = {
-        val remappedRdd = validationData.map(x => new LabeledPoint(2 * x.label - 1, x.features))
-        (GradientBoostedTrees.computeError(remappedRdd, modelWithoutValidation.trees,
+        val remappedRdd = validationData.map {
+          case LabeledPoint(label, features) =>
+            Instance(label * 2 - 1, 1.0, features)
+        }
+        (GradientBoostedTrees.computeWeightedError(remappedRdd, modelWithoutValidation.trees,
           modelWithoutValidation.treeWeights, modelWithoutValidation.getOldLossType),
-          GradientBoostedTrees.computeError(remappedRdd, modelWithValidation.trees,
+          GradientBoostedTrees.computeWeightedError(remappedRdd, modelWithValidation.trees,
             modelWithValidation.treeWeights, modelWithValidation.getOldLossType))
       }
       assert(errorWithValidation < errorWithoutValidation)
 
       val evaluationArray = GradientBoostedTrees
-        .evaluateEachIteration(validationData, modelWithoutValidation.trees,
+        .evaluateEachIteration(validationData.map(_.toInstance), modelWithoutValidation.trees,
           modelWithoutValidation.treeWeights, modelWithoutValidation.getOldLossType,
           OldAlgo.Classification)
       assert(evaluationArray.length === numIter)
-      assert(evaluationArray(modelWithValidation.numTrees) >
-        evaluationArray(modelWithValidation.numTrees - 1))
+      assert(evaluationArray(modelWithValidation.getNumTrees) >
+        evaluationArray(modelWithValidation.getNumTrees - 1))
       var i = 1
-      while (i < modelWithValidation.numTrees) {
+      while (i < modelWithValidation.getNumTrees) {
         assert(evaluationArray(i) <= evaluationArray(i - 1))
         i += 1
       }
     }
   }
 
+  test("tree params") {
+    val categoricalFeatures = Map.empty[Int, Int]
+    val df: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses = 2)
+    val gbt = new GBTClassifier()
+      .setMaxDepth(2)
+      .setCheckpointInterval(5)
+      .setSeed(123)
+    val model = gbt.fit(df)
+    model.setLeafCol("predictedLeafId")
+
+    val transformed = model.transform(df)
+    checkNominalOnDF(transformed, "prediction", model.numClasses)
+    checkVectorSizeOnDF(transformed, "predictedLeafId", model.trees.length)
+    checkVectorSizeOnDF(transformed, "rawPrediction", model.numClasses)
+    checkVectorSizeOnDF(transformed, "probability", model.numClasses)
+
+    model.trees.foreach (i => {
+      assert(i.getMaxDepth === model.getMaxDepth)
+      assert(i.getCheckpointInterval === model.getCheckpointInterval)
+      assert(i.getSeed === model.getSeed)
+    })
+  }
+
+  test("training with sample weights") {
+    val df = binaryDataset
+    val numClasses = 2
+    // (maxIter, maxDepth, subsamplingRate, fractionInTol)
+    val testParams = Seq(
+      (5, 5, 1.0, 0.99),
+      (5, 10, 1.0, 0.99),
+      (5, 10, 0.95, 0.9)
+    )
+
+    for ((maxIter, maxDepth, subsamplingRate, tol) <- testParams) {
+      val estimator = new GBTClassifier()
+        .setMaxIter(maxIter)
+        .setMaxDepth(maxDepth)
+        .setSubsamplingRate(subsamplingRate)
+        .setSeed(seed)
+        .setMinWeightFractionPerNode(0.049)
+
+      MLTestingUtils.testArbitrarilyScaledWeights[GBTClassificationModel,
+        GBTClassifier](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ == _, tol))
+      MLTestingUtils.testOutliersWithSmallWeights[GBTClassificationModel,
+        GBTClassifier](df.as[LabeledPoint], estimator,
+        numClasses, MLTestingUtils.modelPredictionEquals(df, _ == _, tol),
+        outlierRatio = 2)
+      MLTestingUtils.testOversamplingVsWeighting[GBTClassificationModel,
+        GBTClassifier](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ == _, tol), seed)
+    }
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // Tests of model save/load
   /////////////////////////////////////////////////////////////////////////////
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
index cb9b8f9b6b472..c2072cea11859 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.classification
 import scala.util.Random
 
 import breeze.linalg.{DenseVector => BDV}
+import org.scalatest.Assertions._
 
 import org.apache.spark.ml.classification.LinearSVCSuite._
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
@@ -58,9 +59,9 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
     // Dataset for testing SparseVector
     val toSparse: Vector => SparseVector = _.asInstanceOf[DenseVector].toSparse
     val sparse = udf(toSparse)
-    smallSparseBinaryDataset = smallBinaryDataset.withColumn("features", sparse('features))
-    smallSparseValidationDataset = smallValidationDataset.withColumn("features", sparse('features))
-
+    smallSparseBinaryDataset = smallBinaryDataset.withColumn("features", sparse($"features"))
+    smallSparseValidationDataset =
+      smallValidationDataset.withColumn("features", sparse($"features"))
   }
 
   /**
@@ -111,8 +112,13 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
     assert(lsvc.getFeaturesCol === "features")
     assert(lsvc.getPredictionCol === "prediction")
     assert(lsvc.getRawPredictionCol === "rawPrediction")
+
     val model = lsvc.setMaxIter(5).fit(smallBinaryDataset)
-    model.transform(smallBinaryDataset)
+    val transformed = model.transform(smallBinaryDataset)
+    checkNominalOnDF(transformed, "prediction", model.numClasses)
+    checkVectorSizeOnDF(transformed, "rawPrediction", model.numClasses)
+
+    transformed
       .select("label", "prediction", "rawPrediction")
       .collect()
     assert(model.getThreshold === 0.0)
@@ -205,6 +211,7 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
     val trainer = new LinearSVC()
     val model = trainer.fit(smallBinaryDataset)
     testPredictionModelSinglePrediction(model, smallBinaryDataset)
+    testClassificationModelSingleRawPrediction(model, smallBinaryDataset)
   }
 
   test("linearSVC comparison with R e1071 and scikit-learn") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 2b5a9a396effd..6d31e6efc7e1c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -21,6 +21,8 @@ import scala.collection.JavaConverters._
 import scala.util.Random
 import scala.util.control.Breaks._
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.SparkException
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
@@ -153,8 +155,14 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     assert(!lr.isDefined(lr.weightCol))
     assert(lr.getFitIntercept)
     assert(lr.getStandardization)
+
     val model = lr.fit(smallBinaryDataset)
-    model.transform(smallBinaryDataset)
+    val transformed = model.transform(smallBinaryDataset)
+    checkNominalOnDF(transformed, "prediction", model.numClasses)
+    checkVectorSizeOnDF(transformed, "rawPrediction", model.numClasses)
+    checkVectorSizeOnDF(transformed, "probability", model.numClasses)
+
+    transformed
       .select("label", "probability", "prediction", "rawPrediction")
       .collect()
     assert(model.getThreshold === 0.5)
@@ -504,9 +512,13 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val blor = new LogisticRegression().setFamily("binomial")
     val blorModel = blor.fit(smallBinaryDataset)
     testPredictionModelSinglePrediction(blorModel, smallBinaryDataset)
+    testClassificationModelSingleRawPrediction(blorModel, smallBinaryDataset)
+    testProbClassificationModelSingleProbPrediction(blorModel, smallBinaryDataset)
     val mlor = new LogisticRegression().setFamily("multinomial")
     val mlorModel = mlor.fit(smallMultinomialDataset)
     testPredictionModelSinglePrediction(mlorModel, smallMultinomialDataset)
+    testClassificationModelSingleRawPrediction(mlorModel, smallMultinomialDataset)
+    testProbClassificationModelSingleProbPrediction(mlorModel, smallMultinomialDataset)
   }
 
   test("coefficients and intercept methods") {
@@ -1425,8 +1437,6 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("multinomial logistic regression with zero variance (SPARK-21681)") {
-    val sqlContext = multinomialDatasetWithZeroVar.sqlContext
-    import sqlContext.implicits._
     val mlr = new LogisticRegression().setFamily("multinomial").setFitIntercept(true)
       .setElasticNetParam(0.0).setRegParam(0.0).setStandardization(true).setWeightCol("weight")
 
@@ -2769,7 +2779,7 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
 
   test("toString") {
     val model = new LogisticRegressionModel("logReg", Vectors.dense(0.1, 0.2, 0.3), 0.0)
-    val expected = "LogisticRegressionModel: uid = logReg, numClasses = 2, numFeatures = 3"
+    val expected = "LogisticRegressionModel: uid=logReg, numClasses=2, numFeatures=3"
     assert(model.toString === expected)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
index 6b5fe6e49ffea..902af71e42f86 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
@@ -229,4 +229,17 @@ class MultilayerPerceptronClassifierSuite extends MLTest with DefaultReadWriteTe
         assert(expected.weights === actual.weights)
       }
   }
+
+  test("Load MultilayerPerceptronClassificationModel prior to Spark 3.0") {
+    val mlpPath = testFile("ml-models/mlp-2.4.4")
+    val model = MultilayerPerceptronClassificationModel.load(mlpPath)
+    val layers = model.getLayers
+    assert(layers(0) === 4)
+    assert(layers(1) === 5)
+    assert(layers(2) === 2)
+
+    val metadata = spark.read.json(s"$mlpPath/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr == "2.4.4")
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
index 9100ef1db6e12..af76f045fb84b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
@@ -22,15 +22,15 @@ import scala.util.Random
 import breeze.linalg.{DenseVector => BDV, Vector => BV}
 import breeze.stats.distributions.{Multinomial => BrzMultinomial, RandBasis => BrzRandBasis}
 
-import org.apache.spark.{SparkException, SparkFunSuite}
-import org.apache.spark.ml.classification.NaiveBayes.{Bernoulli, Multinomial}
+import org.apache.spark.SparkException
+import org.apache.spark.ml.classification.NaiveBayes._
 import org.apache.spark.ml.classification.NaiveBayesSuite._
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.{Dataset, Row}
 
 class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
 
@@ -38,6 +38,9 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
 
   @transient var dataset: Dataset[_] = _
   @transient var bernoulliDataset: Dataset[_] = _
+  @transient var gaussianDataset: Dataset[_] = _
+  @transient var gaussianDataset2: Dataset[_] = _
+  @transient var complementDataset: Dataset[_] = _
 
   private val seed = 42
 
@@ -53,6 +56,27 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
 
     dataset = generateNaiveBayesInput(pi, theta, 100, seed).toDF()
     bernoulliDataset = generateNaiveBayesInput(pi, theta, 100, seed, "bernoulli").toDF()
+
+    // theta for gaussian nb
+    val theta2 = Array(
+      Array(0.70, 0.10, 0.10, 0.10), // label 0: mean
+      Array(0.10, 0.70, 0.10, 0.10), // label 1: mean
+      Array(0.10, 0.10, 0.70, 0.10)  // label 2: mean
+    )
+
+    // sigma for gaussian nb
+    val sigma = Array(
+      Array(0.10, 0.10, 0.50, 0.10), // label 0: variance
+      Array(0.50, 0.10, 0.10, 0.10), // label 1: variance
+      Array(0.10, 0.10, 0.10, 0.50)  // label 2: variance
+    )
+    gaussianDataset = generateGaussianNaiveBayesInput(pi, theta2, sigma, 1000, seed).toDF()
+
+    gaussianDataset2 = spark.read.format("libsvm")
+      .load("../data/mllib/sample_multiclass_classification_data.txt")
+
+    complementDataset = spark.read.format("libsvm")
+        .load("../data/mllib/sample_libsvm_data.txt")
   }
 
   def validatePrediction(predictionAndLabels: Seq[Row]): Unit = {
@@ -67,10 +91,17 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
   def validateModelFit(
       piData: Vector,
       thetaData: Matrix,
+      sigmaData: Matrix,
       model: NaiveBayesModel): Unit = {
     assert(Vectors.dense(model.pi.toArray.map(math.exp)) ~==
       Vectors.dense(piData.toArray.map(math.exp)) absTol 0.05, "pi mismatch")
     assert(model.theta.map(math.exp) ~== thetaData.map(math.exp) absTol 0.05, "theta mismatch")
+    if (sigmaData === Matrices.zeros(0, 0)) {
+      assert(model.sigma === Matrices.zeros(0, 0), "sigma mismatch")
+    } else {
+      assert(model.sigma.map(math.exp) ~== sigmaData.map(math.exp) absTol 0.05,
+        "sigma mismatch")
+    }
   }
 
   def expectedMultinomialProbabilities(model: NaiveBayesModel, feature: Vector): Vector = {
@@ -90,6 +121,19 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
     Vectors.dense(classProbs.map(_ / classProbsSum))
   }
 
+  def expectedGaussianProbabilities(model: NaiveBayesModel, feature: Vector): Vector = {
+    val pi = model.pi.toArray.map(math.exp)
+    val classProbs = pi.indices.map { i =>
+      feature.toArray.zipWithIndex.map { case (v, j) =>
+        val mean = model.theta(i, j)
+        val variance = model.sigma(i, j)
+        math.exp(- (v - mean) * (v - mean) / variance / 2) / math.sqrt(variance * math.Pi * 2)
+      }.product * pi(i)
+    }.toArray
+    val classProbsSum = classProbs.sum
+    Vectors.dense(classProbs.map(_ / classProbsSum))
+  }
+
   def validateProbabilities(
       featureAndProbabilities: Seq[Row],
       model: NaiveBayesModel,
@@ -102,6 +146,8 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
             expectedMultinomialProbabilities(model, features)
           case Bernoulli =>
             expectedBernoulliProbabilities(model, features)
+          case Gaussian =>
+            expectedGaussianProbabilities(model, features)
           case _ =>
             throw new IllegalArgumentException(s"Invalid modelType: $modelType.")
         }
@@ -112,12 +158,15 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
   test("model types") {
     assert(Multinomial === "multinomial")
     assert(Bernoulli === "bernoulli")
+    assert(Gaussian === "gaussian")
+    assert(Complement === "complement")
   }
 
   test("params") {
     ParamsSuite.checkParams(new NaiveBayes)
     val model = new NaiveBayesModel("nb", pi = Vectors.dense(Array(0.2, 0.8)),
-      theta = new DenseMatrix(2, 3, Array(0.1, 0.2, 0.3, 0.4, 0.6, 0.4)))
+      theta = new DenseMatrix(2, 3, Array(0.1, 0.2, 0.3, 0.4, 0.6, 0.4)),
+      sigma = Matrices.zeros(0, 0))
     ParamsSuite.checkParams(model)
   }
 
@@ -146,7 +195,7 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
     val nb = new NaiveBayes().setSmoothing(1.0).setModelType("multinomial")
     val model = nb.fit(testDataset)
 
-    validateModelFit(pi, theta, model)
+    validateModelFit(pi, theta, Matrices.zeros(0, 0), model)
     assert(model.hasParent)
     MLTestingUtils.checkCopyAndUids(nb, model)
 
@@ -175,8 +224,6 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
       Array(0.10, 0.70, 0.10, 0.10), // label 1
       Array(0.10, 0.10, 0.70, 0.10)  // label 2
     ).map(_.map(math.log))
-    val pi = Vectors.dense(piArray)
-    val theta = new DenseMatrix(3, 4, thetaArray.flatten, true)
 
     val trainDataset =
       generateNaiveBayesInput(piArray, thetaArray, nPoints, seed, "multinomial").toDF()
@@ -187,17 +234,25 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
       generateNaiveBayesInput(piArray, thetaArray, nPoints, 17, "multinomial").toDF()
 
     testPredictionModelSinglePrediction(model, validationDataset)
+    testClassificationModelSingleRawPrediction(model, validationDataset)
+    testProbClassificationModelSingleProbPrediction(model, validationDataset)
   }
 
   test("Naive Bayes with weighted samples") {
     val numClasses = 3
     def modelEquals(m1: NaiveBayesModel, m2: NaiveBayesModel): Unit = {
+      assert(m1.getModelType === m2.getModelType)
       assert(m1.pi ~== m2.pi relTol 0.01)
       assert(m1.theta ~== m2.theta relTol 0.01)
+      if (m1.getModelType == Gaussian) {
+        assert(m1.sigma ~== m2.sigma relTol 0.01)
+      }
     }
     val testParams = Seq[(String, Dataset[_])](
       ("bernoulli", bernoulliDataset),
-      ("multinomial", dataset)
+      ("multinomial", dataset),
+      ("complement", dataset),
+      ("gaussian", gaussianDataset)
     )
     testParams.foreach { case (family, dataset) =>
       // NaiveBayes is sensitive to constant scaling of the weights unless smoothing is set to 0
@@ -228,7 +283,7 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
     val nb = new NaiveBayes().setSmoothing(1.0).setModelType("bernoulli")
     val model = nb.fit(testDataset)
 
-    validateModelFit(pi, theta, model)
+    validateModelFit(pi, theta, Matrices.zeros(0, 0), model)
     assert(model.hasParent)
 
     val validationDataset =
@@ -308,14 +363,168 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("Naive Bayes Gaussian") {
+    val piArray = Array(0.5, 0.1, 0.4).map(math.log)
+
+    val thetaArray = Array(
+      Array(0.70, 0.10, 0.10, 0.10), // label 0: mean
+      Array(0.10, 0.70, 0.10, 0.10), // label 1: mean
+      Array(0.10, 0.10, 0.70, 0.10)  // label 2: mean
+    )
+
+    val sigmaArray = Array(
+      Array(0.10, 0.10, 0.50, 0.10), // label 0: variance
+      Array(0.50, 0.10, 0.10, 0.10), // label 1: variance
+      Array(0.10, 0.10, 0.10, 0.50)  // label 2: variance
+    )
+
+    val pi = Vectors.dense(piArray)
+    val theta = new DenseMatrix(3, 4, thetaArray.flatten, true)
+    val sigma = new DenseMatrix(3, 4, sigmaArray.flatten, true)
+
+    val nPoints = 10000
+    val testDataset =
+      generateGaussianNaiveBayesInput(piArray, thetaArray, sigmaArray, nPoints, 42).toDF()
+    val gnb = new NaiveBayes().setModelType("gaussian")
+    val model = gnb.fit(testDataset)
+
+    validateModelFit(pi, theta, sigma, model)
+    assert(model.hasParent)
+
+    val validationDataset =
+      generateGaussianNaiveBayesInput(piArray, thetaArray, sigmaArray, nPoints, 17).toDF()
+
+    val predictionAndLabels = model.transform(validationDataset).select("prediction", "label")
+    validatePrediction(predictionAndLabels.collect())
+
+    val featureAndProbabilities = model.transform(validationDataset)
+      .select("features", "probability")
+    validateProbabilities(featureAndProbabilities.collect(), model, "gaussian")
+  }
+
+  test("Naive Bayes Gaussian - Model Coefficients") {
+    /*
+     Using the following Python code to verify the correctness.
+
+     import numpy as np
+     from sklearn.naive_bayes import GaussianNB
+     from sklearn.datasets import load_svmlight_file
+
+     path = "./data/mllib/sample_multiclass_classification_data.txt"
+     X, y = load_svmlight_file(path)
+     X = X.toarray()
+     clf = GaussianNB()
+     clf.fit(X, y)
+
+     >>> clf.class_prior_
+     array([0.33333333, 0.33333333, 0.33333333])
+     >>> clf.theta_
+     array([[ 0.27111101, -0.18833335,  0.54305072,  0.60500005],
+            [-0.60777778,  0.18166667, -0.84271174, -0.88000014],
+            [-0.09111114, -0.35833336,  0.10508474,  0.0216667 ]])
+     >>> clf.sigma_
+     array([[0.12230125, 0.07078052, 0.03430001, 0.05133607],
+            [0.03758145, 0.0988028 , 0.0033903 , 0.00782224],
+            [0.08058764, 0.06701387, 0.02486641, 0.02661392]])
+    */
+
+    val gnb = new NaiveBayes().setModelType(Gaussian)
+    val model = gnb.fit(gaussianDataset2)
+    assert(Vectors.dense(model.pi.toArray.map(math.exp)) ~=
+      Vectors.dense(0.33333333, 0.33333333, 0.33333333) relTol 1E-5)
+
+    val thetaRows = model.theta.rowIter.toArray
+    assert(thetaRows(0) ~=
+      Vectors.dense(0.27111101, -0.18833335, 0.54305072, 0.60500005) relTol 1E-5)
+    assert(thetaRows(1) ~=
+      Vectors.dense(-0.60777778, 0.18166667, -0.84271174, -0.88000014) relTol 1E-5)
+    assert(thetaRows(2) ~=
+      Vectors.dense(-0.09111114, -0.35833336, 0.10508474, 0.0216667) relTol 1E-5)
+
+    val sigmaRows = model.sigma.rowIter.toArray
+    assert(sigmaRows(0) ~=
+      Vectors.dense(0.12230125, 0.07078052, 0.03430001, 0.05133607) relTol 1E-5)
+    assert(sigmaRows(1) ~=
+      Vectors.dense(0.03758145, 0.0988028, 0.0033903, 0.00782224) relTol 1E-5)
+    assert(sigmaRows(2) ~=
+      Vectors.dense(0.08058764, 0.06701387, 0.02486641, 0.02661392) relTol 1E-5)
+  }
+
+  test("Naive Bayes Complement") {
+    /*
+     Using the following Python code to verify the correctness.
+
+     import numpy as np
+     from sklearn.naive_bayes import ComplementNB
+     from sklearn.datasets import load_svmlight_file
+
+     path = "./data/mllib/sample_libsvm_data.txt"
+     X, y = load_svmlight_file(path)
+     X = X.toarray()
+     clf = ComplementNB()
+     clf.fit(X, y)
+
+     >>> clf.feature_log_prob_[:, -5:]
+     array([[ 7.2937608 , 10.26577655, 13.73151245, 13.73151245, 13.73151245],
+            [ 6.99678043,  7.51387415,  7.74399483,  8.32904552,  9.53119848]])
+     >>> clf.predict_log_proba(X[:5])
+     array([[     0.        , -74732.70765355],
+            [-36018.30169185,      0.        ],
+            [-37126.4015229 ,      0.        ],
+            [-27649.81038619,      0.        ],
+            [-28767.84075587,      0.        ]])
+     >>> clf.predict_proba(X[:5])
+     array([[1., 0.],
+            [0., 1.],
+            [0., 1.],
+            [0., 1.],
+            [0., 1.]])
+    */
+
+    val cnb = new NaiveBayes().setModelType(Complement)
+    val model = cnb.fit(complementDataset)
+
+    val thetaRows = model.theta.rowIter.map(vec => Vectors.dense(vec.toArray.takeRight(5))).toArray
+    assert(thetaRows(0) ~=
+      Vectors.dense(7.2937608, 10.26577655, 13.73151245, 13.73151245, 13.73151245) relTol 1E-5)
+    assert(thetaRows(1) ~=
+      Vectors.dense(6.99678043, 7.51387415, 7.74399483, 8.32904552, 9.53119848) relTol 1E-5)
+
+    val preds = model.transform(complementDataset)
+      .select("rawPrediction", "probability")
+      .as[(Vector, Vector)]
+      .take(5)
+    assert(preds(0)._1 ~= Vectors.dense(0.0, -74732.70765355) relTol 1E-5)
+    assert(preds(0)._2 ~= Vectors.dense(1.0, 0.0) relTol 1E-5)
+    assert(preds(1)._1 ~= Vectors.dense(-36018.30169185, 0.0) relTol 1E-5)
+    assert(preds(1)._2 ~= Vectors.dense(0.0, 1.0) relTol 1E-5)
+    assert(preds(2)._1 ~= Vectors.dense(-37126.4015229, 0.0) relTol 1E-5)
+    assert(preds(2)._2 ~= Vectors.dense(0.0, 1.0) relTol 1E-5)
+    assert(preds(3)._1 ~= Vectors.dense(-27649.81038619, 0.0) relTol 1E-5)
+    assert(preds(3)._2 ~= Vectors.dense(0.0, 1.0) relTol 1E-5)
+    assert(preds(4)._1 ~= Vectors.dense(-28767.84075587, 0.0) relTol 1E-5)
+    assert(preds(4)._2 ~= Vectors.dense(0.0, 1.0) relTol 1E-5)
+  }
+
   test("read/write") {
     def checkModelData(model: NaiveBayesModel, model2: NaiveBayesModel): Unit = {
+      assert(model.getModelType === model2.getModelType)
       assert(model.pi === model2.pi)
       assert(model.theta === model2.theta)
+      if (model.getModelType == "gaussian") {
+        assert(model.sigma === model2.sigma)
+      } else {
+        assert(model.sigma === Matrices.zeros(0, 0) && model2.sigma === Matrices.zeros(0, 0))
+      }
     }
     val nb = new NaiveBayes()
     testEstimatorAndModelReadWrite(nb, dataset, NaiveBayesSuite.allParamSettings,
       NaiveBayesSuite.allParamSettings, checkModelData)
+
+    val gnb = new NaiveBayes().setModelType("gaussian")
+    testEstimatorAndModelReadWrite(gnb, gaussianDataset,
+      NaiveBayesSuite.allParamSettingsForGaussian,
+      NaiveBayesSuite.allParamSettingsForGaussian, checkModelData)
   }
 
   test("should support all NumericType labels and weights, and not support other types") {
@@ -324,6 +533,7 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
       nb, spark) { (expected, actual) =>
         assert(expected.pi === actual.pi)
         assert(expected.theta === actual.theta)
+        assert(expected.sigma === Matrices.zeros(0, 0) && actual.sigma === Matrices.zeros(0, 0))
       }
   }
 }
@@ -340,6 +550,16 @@ object NaiveBayesSuite {
     "smoothing" -> 0.1
   )
 
+  /**
+   * Mapping from all Params to valid settings which differ from the defaults.
+   * This is useful for tests which need to exercise all Params, such as save/load.
+   * This excludes input columns to simplify some tests.
+   */
+  val allParamSettingsForGaussian: Map[String, Any] = Map(
+    "predictionCol" -> "myPrediction",
+    "modelType" -> "gaussian"
+  )
+
   private def calcLabel(p: Double, pi: Array[Double]): Int = {
     var sum = 0.0
     for (j <- 0 until pi.length) {
@@ -384,4 +604,26 @@ object NaiveBayesSuite {
       LabeledPoint(y, Vectors.dense(xi))
     }
   }
+
+  // Generate input
+  def generateGaussianNaiveBayesInput(
+    pi: Array[Double],            // 1XC
+    theta: Array[Array[Double]],  // CXD
+    sigma: Array[Array[Double]],  // CXD
+    nPoints: Int,
+    seed: Int): Seq[LabeledPoint] = {
+    val D = theta(0).length
+    val rnd = new Random(seed)
+    val _pi = pi.map(math.exp)
+
+    for (i <- 0 until nPoints) yield {
+      val y = calcLabel(rnd.nextDouble(), _pi)
+      val xi = Array.tabulate[Double] (D) { j =>
+        val mean = theta(y)(j)
+        val variance = sigma(y)(j)
+        mean + rnd.nextGaussian() * math.sqrt(variance)
+      }
+      LabeledPoint(y, Vectors.dense(xi))
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index b6e8c927403ad..a4b1a5f18141a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml.classification
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.ml.attribute.NominalAttribute
 import org.apache.spark.ml.classification.LogisticRegressionSuite._
 import org.apache.spark.ml.feature.LabeledPoint
@@ -79,6 +81,8 @@ class OneVsRestSuite extends MLTest with DefaultReadWriteTest {
 
     assert(ovaModel.numClasses === numClasses)
     val transformedDataset = ovaModel.transform(dataset)
+    checkNominalOnDF(transformedDataset, "prediction", ovaModel.numClasses)
+    checkVectorSizeOnDF(transformedDataset, "rawPrediction", ovaModel.numClasses)
 
     // check for label metadata in prediction col
     val predictionColSchema = transformedDataset.schema(ovaModel.getPredictionCol)
@@ -202,7 +206,7 @@ class OneVsRestSuite extends MLTest with DefaultReadWriteTest {
     val ova = new OneVsRest().setWeightCol("weight").setClassifier(new LogisticRegression())
     assert(ova.fit(dataset2) !== null)
     // classifier doesn't inherit hasWeightCol
-    val ova2 = new OneVsRest().setWeightCol("weight").setClassifier(new DecisionTreeClassifier())
+    val ova2 = new OneVsRest().setWeightCol("weight").setClassifier(new FMClassifier())
     assert(ova2.fit(dataset2) !== null)
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/ProbabilisticClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/ProbabilisticClassifierSuite.scala
index 1c8c9829f18d1..dbdc6a27bc3b5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/ProbabilisticClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/ProbabilisticClassifierSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml.classification
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
@@ -32,7 +34,7 @@ final class TestProbabilisticClassificationModel(
 
   override def copy(extra: org.apache.spark.ml.param.ParamMap): this.type = defaultCopy(extra)
 
-  override protected def predictRaw(input: Vector): Vector = {
+  override def predictRaw(input: Vector): Vector = {
     input
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index 0f0954e5d8cac..e30e93ad4628c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.classification
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.classification.LinearSVCSuite.generateSVMInput
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
@@ -41,8 +42,10 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
 
   private var orderedLabeledPoints50_1000: RDD[LabeledPoint] = _
   private var orderedLabeledPoints5_20: RDD[LabeledPoint] = _
+  private var binaryDataset: DataFrame = _
+  private val seed = 42
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     orderedLabeledPoints50_1000 =
       sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000))
@@ -50,13 +53,14 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
     orderedLabeledPoints5_20 =
       sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 5, 20))
         .map(_.asML)
+    binaryDataset = generateSVMInput(0.01, Array[Double](-1.5, 1.0), 1000, seed).toDF()
   }
 
   /////////////////////////////////////////////////////////////////////////////
   // Tests calling train()
   /////////////////////////////////////////////////////////////////////////////
 
-  def binaryClassificationTestWithContinuousFeatures(rf: RandomForestClassifier) {
+  def binaryClassificationTestWithContinuousFeatures(rf: RandomForestClassifier): Unit = {
     val categoricalFeatures = Map.empty[Int, Int]
     val numClasses = 2
     val newRF = rf
@@ -78,12 +82,14 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
   test("Binary classification with continuous features:" +
     " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
     val rf = new RandomForestClassifier()
+      .setBootstrap(false)
     binaryClassificationTestWithContinuousFeatures(rf)
   }
 
   test("Binary classification with continuous features and node Id cache:" +
     " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
     val rf = new RandomForestClassifier()
+      .setBootstrap(false)
       .setCacheNodeIds(true)
     binaryClassificationTestWithContinuousFeatures(rf)
   }
@@ -169,6 +175,8 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
     val model = rf.fit(df)
 
     testPredictionModelSinglePrediction(model, df)
+    testClassificationModelSingleRawPrediction(model, df)
+    testProbClassificationModelSingleProbPrediction(model, df)
   }
 
   test("Fitting without numClasses in metadata") {
@@ -230,6 +238,64 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
       }
   }
 
+  test("tree params") {
+    val rdd = orderedLabeledPoints5_20
+    val rf = new RandomForestClassifier()
+      .setImpurity("entropy")
+      .setMaxDepth(3)
+      .setNumTrees(3)
+      .setSeed(123)
+    val categoricalFeatures = Map.empty[Int, Int]
+    val numClasses = 2
+
+    val df: DataFrame = TreeTests.setMetadata(rdd, categoricalFeatures, numClasses)
+    val model = rf.fit(df)
+    model.setLeafCol("predictedLeafId")
+
+    val transformed = model.transform(df)
+    checkNominalOnDF(transformed, "prediction", model.numClasses)
+    checkVectorSizeOnDF(transformed, "predictedLeafId", model.trees.length)
+    checkVectorSizeOnDF(transformed, "rawPrediction", model.numClasses)
+    checkVectorSizeOnDF(transformed, "probability", model.numClasses)
+
+    model.trees.foreach (i => {
+      assert(i.getMaxDepth === model.getMaxDepth)
+      assert(i.getSeed === model.getSeed)
+      assert(i.getImpurity === model.getImpurity)
+    })
+  }
+
+  test("training with sample weights") {
+    val df = binaryDataset
+    val numClasses = 2
+    // (numTrees, maxDepth, subsamplingRate, fractionInTol)
+    val testParams = Seq(
+      (20, 5, 1.0, 0.96),
+      (20, 10, 1.0, 0.96),
+      (20, 10, 0.95, 0.96)
+    )
+
+    for ((numTrees, maxDepth, subsamplingRate, tol) <- testParams) {
+      val estimator = new RandomForestClassifier()
+        .setNumTrees(numTrees)
+        .setMaxDepth(maxDepth)
+        .setSubsamplingRate(subsamplingRate)
+        .setSeed(seed)
+        .setMinWeightFractionPerNode(0.049)
+
+      MLTestingUtils.testArbitrarilyScaledWeights[RandomForestClassificationModel,
+        RandomForestClassifier](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ == _, tol))
+      MLTestingUtils.testOutliersWithSmallWeights[RandomForestClassificationModel,
+        RandomForestClassifier](df.as[LabeledPoint], estimator,
+        numClasses, MLTestingUtils.modelPredictionEquals(df, _ == _, tol),
+        outlierRatio = 2)
+      MLTestingUtils.testOversamplingVsWeighting[RandomForestClassificationModel,
+        RandomForestClassifier](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ == _, tol), seed)
+    }
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // Tests of model save/load
   /////////////////////////////////////////////////////////////////////////////
@@ -269,6 +335,7 @@ private object RandomForestClassifierSuite extends SparkFunSuite {
     val numFeatures = data.first().features.size
     val oldStrategy =
       rf.getOldStrategy(categoricalFeatures, numClasses, OldAlgo.Classification, rf.getOldImpurity)
+    oldStrategy.bootstrap = rf.getBootstrap
     val oldModel = OldRandomForest.trainClassifier(
       data.map(OldLabeledPoint.fromML), oldStrategy, rf.getNumTrees, rf.getFeatureSubsetStrategy,
       rf.getSeed.toInt)
@@ -280,7 +347,7 @@ private object RandomForestClassifierSuite extends SparkFunSuite {
       numClasses)
     TreeTests.checkEqual(oldModelAsNew, newModel)
     assert(newModel.hasParent)
-    assert(!newModel.trees.head.asInstanceOf[DecisionTreeClassificationModel].hasParent)
+    assert(!newModel.trees.head.hasParent)
     assert(newModel.numClasses === numClasses)
     assert(newModel.numFeatures === numFeatures)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index 5288595d2e239..debd0dd65d0c8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -105,7 +105,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     val bkm = new BisectingKMeans().setK(k).setPredictionCol(predictionColName).setSeed(1)
     val model = bkm.fit(dataset)
     assert(model.clusterCenters.length === k)
-    assert(model.computeCost(dataset) < 0.1)
+    assert(model.summary.trainingCost < 0.1)
     assert(model.hasParent)
 
     testTransformerByGlobalCheckFunc[Tuple1[Vector]](dataset.toDF(), model,
@@ -132,7 +132,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(clusterSizes.forall(_ >= 0))
     assert(summary.numIter == 20)
     assert(summary.trainingCost < 0.1)
-    assert(model.computeCost(dataset) == summary.trainingCost)
+    assert(model.summary.trainingCost == summary.trainingCost)
 
     model.setSummary(None)
     assert(!model.hasSummary)
@@ -149,7 +149,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
 
   test("BisectingKMeans with cosine distance is not supported for 0-length vectors") {
     val model = new BisectingKMeans().setK(2).setDistanceMeasure(DistanceMeasure.COSINE).setSeed(1)
-    val df = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       Vectors.dense(0.0, 0.0),
       Vectors.dense(10.0, 10.0),
       Vectors.dense(1.0, 0.5)
@@ -160,7 +160,7 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("BisectingKMeans with cosine distance") {
-    val df = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       Vectors.dense(1.0, 1.0),
       Vectors.dense(10.0, 10.0),
       Vectors.dense(1.0, 0.5),
@@ -174,6 +174,8 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
       .setSeed(1)
       .fit(df)
     val predictionDf = model.transform(df)
+    checkNominalOnDF(predictionDf, "prediction", model.getK)
+
     assert(predictionDf.select("prediction").distinct().count() == 3)
     val predictionsMap = predictionDf.collect().map(row =>
       row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
@@ -187,10 +189,138 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     model.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
   }
 
+  test("Comparing with and without weightCol with cosine distance") {
+    val df1 = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
+      Vectors.dense(1.0, 1.0),
+      Vectors.dense(10.0, 10.0),
+      Vectors.dense(1.0, 0.5),
+      Vectors.dense(10.0, 4.4),
+      Vectors.dense(-1.0, 1.0),
+      Vectors.dense(-100.0, 90.0)
+    )).map(v => TestRow(v)))
+
+    val model1 = new BisectingKMeans()
+      .setK(3)
+      .setDistanceMeasure(DistanceMeasure.COSINE)
+      .setSeed(1)
+      .fit(df1)
+    val predictionDf1 = model1.transform(df1)
+    checkNominalOnDF(predictionDf1, "prediction", model1.getK)
+
+    assert(predictionDf1.select("prediction").distinct().count() == 3)
+    val predictionsMap1 = predictionDf1.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap1(Vectors.dense(1.0, 1.0)) ==
+      predictionsMap1(Vectors.dense(10.0, 10.0)))
+    assert(predictionsMap1(Vectors.dense(1.0, 0.5)) ==
+      predictionsMap1(Vectors.dense(10.0, 4.4)))
+    assert(predictionsMap1(Vectors.dense(-1.0, 1.0)) ==
+      predictionsMap1(Vectors.dense(-100.0, 90.0)))
+
+    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+
+    val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+      (Vectors.dense(1.0, 1.0), 2.0), (Vectors.dense(10.0, 10.0), 2.0),
+      (Vectors.dense(1.0, 0.5), 2.0), (Vectors.dense(10.0, 4.4), 2.0),
+      (Vectors.dense(-1.0, 1.0), 2.0), (Vectors.dense(-100.0, 90.0), 2.0))))
+        .toDF("features", "weightCol")
+
+    val model2 = new BisectingKMeans()
+      .setK(3)
+      .setDistanceMeasure(DistanceMeasure.COSINE)
+      .setSeed(1)
+      .setWeightCol("weightCol")
+      .fit(df2)
+    val predictionDf2 = model2.transform(df2)
+    checkNominalOnDF(predictionDf2, "prediction", model2.getK)
+
+    assert(predictionDf2.select("prediction").distinct().count() == 3)
+    val predictionsMap2 = predictionDf2.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap2(Vectors.dense(1.0, 1.0)) ==
+      predictionsMap2(Vectors.dense(10.0, 10.0)))
+    assert(predictionsMap2(Vectors.dense(1.0, 0.5)) ==
+      predictionsMap2(Vectors.dense(10.0, 4.4)))
+    assert(predictionsMap2(Vectors.dense(-1.0, 1.0)) ==
+      predictionsMap2(Vectors.dense(-100.0, 90.0)))
+
+    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+    assert(model1.clusterCenters === model2.clusterCenters)
+  }
+
+  test("Comparing with and without weightCol") {
+    val df1 = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
+      Vectors.dense(1.0, 1.0),
+      Vectors.dense(10.0, 10.0),
+      Vectors.dense(10.0, 10.0),
+      Vectors.dense(1.0, 0.5),
+      Vectors.dense(1.0, 0.5),
+      Vectors.dense(10.0, 4.4),
+      Vectors.dense(10.0, 4.4),
+      Vectors.dense(10.0, 4.4),
+      Vectors.dense(-1.0, 1.0),
+      Vectors.dense(-1.0, 1.0),
+      Vectors.dense(-1.0, 1.0),
+      Vectors.dense(-100.0, 90.0),
+      Vectors.dense(-100.0, 90.0),
+      Vectors.dense(-100.0, 90.0),
+      Vectors.dense(-100.0, 90.0)
+    )).map(v => TestRow(v)))
+
+    val model1 = new BisectingKMeans()
+      .setK(3)
+      .setSeed(1)
+      .fit(df1)
+    val predictionDf1 = model1.transform(df1)
+    checkNominalOnDF(predictionDf1, "prediction", model1.getK)
+
+    assert(predictionDf1.select("prediction").distinct().count() == 3)
+    val predictionsMap1 = predictionDf1.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap1(Vectors.dense(1.0, 1.0)) ==
+      predictionsMap1(Vectors.dense(1.0, 0.5)))
+    assert(predictionsMap1(Vectors.dense(1.0, 1.0)) ==
+      predictionsMap1(Vectors.dense(-1.0, 1.0)))
+    assert(predictionsMap1(Vectors.dense(10.0, 10.0)) ==
+      predictionsMap1(Vectors.dense(10.0, 4.4)))
+
+    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+
+    val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+      (Vectors.dense(1.0, 1.0), 1.0), (Vectors.dense(10.0, 10.0), 2.0),
+      (Vectors.dense(1.0, 0.5), 2.0), (Vectors.dense(10.0, 4.4), 3.0),
+      (Vectors.dense(-1.0, 1.0), 3.0), (Vectors.dense(-100.0, 90.0), 4.0))))
+      .toDF("features", "weightCol")
+
+    val model2 = new BisectingKMeans()
+      .setK(3)
+      .setSeed(1)
+      .setWeightCol("weightCol")
+      .fit(df2)
+    val predictionDf2 = model2.transform(df2)
+    checkNominalOnDF(predictionDf2, "prediction", model2.getK)
+
+    assert(predictionDf2.select("prediction").distinct().count() == 3)
+    val predictionsMap2 = predictionDf2.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap2(Vectors.dense(1.0, 1.0)) ==
+      predictionsMap2(Vectors.dense(1.0, 0.5)))
+    assert(predictionsMap2(Vectors.dense(1.0, 1.0)) ==
+      predictionsMap2(Vectors.dense(-1.0, 1.0)))
+    assert(predictionsMap2(Vectors.dense(10.0, 10.0)) ==
+      predictionsMap2(Vectors.dense(10.0, 4.4)))
+
+    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+
+    assert(model1.clusterCenters(0) === model2.clusterCenters(0))
+    assert(model1.clusterCenters(1) === model2.clusterCenters(1))
+    assert(model1.clusterCenters(2) ~== model2.clusterCenters(2) absTol 1e-6)
+  }
+
   test("BisectingKMeans with Array input") {
     def trainAndComputeCost(dataset: DataFrame): Double = {
       val model = new BisectingKMeans().setK(k).setMaxIter(1).setSeed(1).fit(dataset)
-      model.computeCost(dataset)
+      model.summary.trainingCost
     }
 
     val (newDataset, newDatasetD, newDatasetF) = MLTestingUtils.generateArrayFeatureDataset(dataset)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
index 133536f763f4e..22c2ea3866d93 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.ml.stat.distribution.MultivariateGaussian
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.functions._
 
 
 class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest {
@@ -71,10 +72,15 @@ class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest {
     assert(gm.getK === 2)
     assert(gm.getFeaturesCol === "features")
     assert(gm.getPredictionCol === "prediction")
+    assert(gm.getProbabilityCol === "probability")
     assert(gm.getMaxIter === 100)
     assert(gm.getTol === 0.01)
     val model = gm.setMaxIter(1).fit(dataset)
 
+    val transformed = model.transform(dataset)
+    checkNominalOnDF(transformed, "prediction", model.weights.length)
+    checkVectorSizeOnDF(transformed, "probability", model.weights.length)
+
     MLTestingUtils.checkCopyAndUids(gm, model)
     assert(model.hasSummary)
     val copiedModel = model.copy(ParamMap.empty)
@@ -267,6 +273,18 @@ class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest {
     assert(trueLikelihood ~== floatLikelihood absTol 1e-6)
   }
 
+  test("GMM support instance weighting") {
+    val gm1 = new GaussianMixture().setK(k).setMaxIter(20).setSeed(seed)
+    val gm2 = new GaussianMixture().setK(k).setMaxIter(20).setSeed(seed).setWeightCol("weight")
+
+    Seq(1.0, 10.0, 100.0).foreach { w =>
+      val gmm1 = gm1.fit(dataset)
+      val ds2 = dataset.select(col("features"), lit(w).as("weight"))
+      val gmm2 = gm2.fit(ds2)
+      modelEquals(gmm1, gmm2)
+    }
+  }
+
   test("prediction on single instance") {
     val gmm = new GaussianMixture().setSeed(123L)
     val model = gmm.fit(dataset)
@@ -319,10 +337,14 @@ object GaussianMixtureSuite extends SparkFunSuite {
 
   def modelEquals(m1: GaussianMixtureModel, m2: GaussianMixtureModel): Unit = {
     assert(m1.weights.length === m2.weights.length)
+    val s1 = m1.weights.zip(m1.gaussians).sortBy(_._1)
+    val s2 = m2.weights.zip(m2.gaussians).sortBy(_._1)
     for (i <- m1.weights.indices) {
-      assert(m1.weights(i) ~== m2.weights(i) absTol 1E-3)
-      assert(m1.gaussians(i).mean ~== m2.gaussians(i).mean absTol 1E-3)
-      assert(m1.gaussians(i).cov ~== m2.gaussians(i).cov absTol 1E-3)
+      val (w1, g1) = s1(i)
+      val (w2, g2) = s2(i)
+      assert(w1 ~== w2 absTol 1E-3)
+      assert(g1.mean ~== g2.mean absTol 1E-3)
+      assert(g1.cov ~== g2.cov absTol 1E-3)
     }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index e3c82fafca218..584594436267f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -60,6 +60,9 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(kmeans.getDistanceMeasure === DistanceMeasure.EUCLIDEAN)
     val model = kmeans.setMaxIter(1).fit(dataset)
 
+    val transformed = model.transform(dataset)
+    checkNominalOnDF(transformed, "prediction", model.clusterCenters.length)
+
     MLTestingUtils.checkCopyAndUids(kmeans, model)
     assert(model.hasSummary)
     val copiedModel = model.copy(ParamMap.empty)
@@ -155,7 +158,7 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
   }
 
   test("KMeans using cosine distance") {
-    val df = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       Vectors.dense(1.0, 1.0),
       Vectors.dense(10.0, 10.0),
       Vectors.dense(1.0, 0.5),
@@ -188,7 +191,7 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
 
   test("KMeans with cosine distance is not supported for 0-length vectors") {
     val model = new KMeans().setDistanceMeasure(DistanceMeasure.COSINE).setK(2)
-    val df = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       Vectors.dense(0.0, 0.0),
       Vectors.dense(10.0, 10.0),
       Vectors.dense(1.0, 0.5)
@@ -214,7 +217,6 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(trueCost ~== floatArrayCost absTol 1e-6)
   }
 
-
   test("read/write") {
     def checkModelData(model: KMeansModel, model2: KMeansModel): Unit = {
       assert(model.clusterCenters === model2.clusterCenters)
@@ -251,6 +253,231 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     testClusteringModelSinglePrediction(model, model.predict, dataset,
       model.getFeaturesCol, model.getPredictionCol)
   }
+
+  test("compare with weightCol and without weightCol") {
+    val df1 = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+      Vectors.dense(1.0, 1.0),
+      Vectors.dense(10.0, 10.0), Vectors.dense(10.0, 10.0),
+      Vectors.dense(1.0, 0.5),
+      Vectors.dense(10.0, 4.4), Vectors.dense(10.0, 4.4),
+      Vectors.dense(-1.0, 1.0),
+      Vectors.dense(-100.0, 90.0), Vectors.dense(-100.0, 90.0)
+    )).map(v => TestRow(v)))
+
+    val model1 = new KMeans()
+      .setK(3)
+      .setSeed(42)
+      .setInitMode(MLlibKMeans.RANDOM)
+      .setTol(1e-6)
+      .setDistanceMeasure(DistanceMeasure.COSINE)
+      .fit(df1)
+
+    val predictionDf1 = model1.transform(df1)
+    assert(predictionDf1.select("prediction").distinct().count() == 3)
+    val predictionsMap1 = predictionDf1.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap1(Vectors.dense(1.0, 1.0)) ==
+      predictionsMap1(Vectors.dense(10.0, 10.0)))
+    assert(predictionsMap1(Vectors.dense(1.0, 0.5)) ==
+      predictionsMap1(Vectors.dense(10.0, 4.4)))
+    assert(predictionsMap1(Vectors.dense(-1.0, 1.0)) ==
+      predictionsMap1(Vectors.dense(-100.0, 90.0)))
+
+    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+
+    val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+      (Vectors.dense(1.0, 1.0), 1.0),
+      (Vectors.dense(10.0, 10.0), 2.0),
+      (Vectors.dense(1.0, 0.5), 1.0),
+      (Vectors.dense(10.0, 4.4), 2.0),
+      (Vectors.dense(-1.0, 1.0), 1.0),
+      (Vectors.dense(-100.0, 90.0), 2.0)))).toDF("features", "weightCol")
+
+    val model2 = new KMeans()
+      .setK(3)
+      .setSeed(42)
+      .setInitMode(MLlibKMeans.RANDOM)
+      .setTol(1e-6)
+      .setDistanceMeasure(DistanceMeasure.COSINE)
+      .setWeightCol("weightCol")
+      .fit(df2)
+
+    val predictionDf2 = model2.transform(df2)
+    assert(predictionDf2.select("prediction").distinct().count() == 3)
+    val predictionsMap2 = predictionDf2.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap2(Vectors.dense(1.0, 1.0)) ==
+      predictionsMap2(Vectors.dense(10.0, 10.0)))
+    assert(predictionsMap2(Vectors.dense(1.0, 0.5)) ==
+      predictionsMap2(Vectors.dense(10.0, 4.4)))
+    assert(predictionsMap2(Vectors.dense(-1.0, 1.0)) ==
+      predictionsMap2(Vectors.dense(-100.0, 90.0)))
+
+    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+
+    // compare if model1 and model2 have the same cluster centers
+    assert(model1.clusterCenters.length === model2.clusterCenters.length)
+    assert(model1.clusterCenters.toSet.subsetOf((model2.clusterCenters.toSet)))
+  }
+
+  test("Two centers with weightCol") {
+    // use the same weight for all samples.
+    val df1 = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+      (Vectors.dense(0.0, 0.0), 2.0),
+      (Vectors.dense(0.0, 0.1), 2.0),
+      (Vectors.dense(0.1, 0.0), 2.0),
+      (Vectors.dense(9.0, 0.0), 2.0),
+      (Vectors.dense(9.0, 0.2), 2.0),
+      (Vectors.dense(9.2, 0.0), 2.0)))).toDF("features", "weightCol")
+
+    val model1 = new KMeans()
+      .setK(2)
+      .setInitMode(MLlibKMeans.RANDOM)
+      .setWeightCol("weightCol")
+      .setMaxIter(10)
+      .fit(df1)
+
+    val predictionDf1 = model1.transform(df1)
+    assert(predictionDf1.select("prediction").distinct().count() == 2)
+    val predictionsMap1 = predictionDf1.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap1(Vectors.dense(0.0, 0.0)) ==
+      predictionsMap1(Vectors.dense(0.0, 0.1)))
+    assert(predictionsMap1(Vectors.dense(0.0, 0.0)) ==
+      predictionsMap1(Vectors.dense(0.1, 0.0)))
+    assert(predictionsMap1(Vectors.dense(9.0, 0.0)) ==
+      predictionsMap1(Vectors.dense(9.0, 0.2)))
+    assert(predictionsMap1(Vectors.dense(9.0, 0.2)) ==
+      predictionsMap1(Vectors.dense(9.2, 0.0)))
+
+    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+
+    // center 1:
+    // total weights in cluster 1: 2.0 + 2.0 + 2.0 = 6.0
+    // x: 9.0 * (2.0/6.0) + 9.0 * (2.0/6.0) + 9.2 * (2.0/6.0) = 9.066666666666666
+    // y: 0.0 * (2.0/6.0) + 0.2 * (2.0/6.0) + 0.0 * (2.0/6.0) = 0.06666666666666667
+    // center 2:
+    // total weights in cluster 2: 2.0 + 2.0 + 2.0 = 6.0
+    // x: 0.0 * (2.0/6.0) + 0.0 * (2.0/6.0) + 0.1 * (2.0/6.0) = 0.03333333333333333
+    // y: 0.0 * (2.0/6.0) + 0.1 * (2.0/6.0) + 0.0 * (2.0/6.0) = 0.03333333333333333
+    val model1_center1 = Vectors.dense(9.066666666666666, 0.06666666666666667)
+    val model1_center2 = Vectors.dense(0.03333333333333333, 0.03333333333333333)
+    assert(model1.clusterCenters(0) === model1_center1)
+    assert(model1.clusterCenters(1) === model1_center2)
+
+    // use different weight
+    val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+      (Vectors.dense(0.0, 0.0), 1.0),
+      (Vectors.dense(0.0, 0.1), 2.0),
+      (Vectors.dense(0.1, 0.0), 3.0),
+      (Vectors.dense(9.0, 0.0), 2.5),
+      (Vectors.dense(9.0, 0.2), 1.0),
+      (Vectors.dense(9.2, 0.0), 2.0)))).toDF("features", "weightCol")
+
+    val model2 = new KMeans()
+      .setK(2)
+      .setInitMode(MLlibKMeans.RANDOM)
+      .setWeightCol("weightCol")
+      .setMaxIter(10)
+      .fit(df2)
+
+    val predictionDf2 = model2.transform(df2)
+    assert(predictionDf2.select("prediction").distinct().count() == 2)
+    val predictionsMap2 = predictionDf2.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap2(Vectors.dense(0.0, 0.0)) ==
+      predictionsMap2(Vectors.dense(0.0, 0.1)))
+    assert(predictionsMap2(Vectors.dense(0.0, 0.0)) ==
+      predictionsMap2(Vectors.dense(0.1, 0.0)))
+    assert(predictionsMap2(Vectors.dense(9.0, 0.0)) ==
+      predictionsMap2(Vectors.dense(9.0, 0.2)))
+    assert(predictionsMap2(Vectors.dense(9.0, 0.2)) ==
+      predictionsMap2(Vectors.dense(9.2, 0.0)))
+
+    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+
+    // center 1:
+    // total weights in cluster 1: 2.5 + 1.0 + 2.0 = 5.5
+    // x: 9.0 * (2.5/5.5) + 9.0 * (1.0/5.5) + 9.2 * (2.0/5.5) = 9.072727272727272
+    // y: 0.0 * (2.5/5.5) + 0.2 * (1.0/5.5) + 0.0 * (2.0/5.5) = 0.03636363636363637
+    // center 2:
+    // total weights in cluster 2: 1.0 + 2.0 + 3.0 = 6.0
+    // x: 0.0 * (1.0/6.0) + 0.0 * (2.0/6.0) + 0.1 * (3.0/6.0) = 0.05
+    // y: 0.0 * (1.0/6.0) + 0.1 * (2.0/6.0) + 0.0 * (3.0/6.0) = 0.03333333333333333
+    val model2_center1 = Vectors.dense(9.072727272727272, 0.03636363636363637)
+    val model2_center2 = Vectors.dense(0.05, 0.03333333333333333)
+    assert(model2.clusterCenters(0) === model2_center1)
+    assert(model2.clusterCenters(1) === model2_center2)
+  }
+
+  test("Four centers with weightCol") {
+    // no weight
+    val df1 = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+      Vectors.dense(0.1, 0.1),
+      Vectors.dense(5.0, 0.2),
+      Vectors.dense(10.0, 0.0),
+      Vectors.dense(15.0, 0.5),
+      Vectors.dense(32.0, 18.0),
+      Vectors.dense(30.1, 20.0),
+      Vectors.dense(-6.0, -6.0),
+      Vectors.dense(-10.0, -10.0))).map(v => TestRow(v)))
+
+    val model1 = new KMeans()
+      .setK(4)
+      .setInitMode(MLlibKMeans.K_MEANS_PARALLEL)
+      .setMaxIter(10)
+      .fit(df1)
+
+    val predictionDf1 = model1.transform(df1)
+    assert(predictionDf1.select("prediction").distinct().count() == 4)
+    val predictionsMap1 = predictionDf1.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap1(Vectors.dense(0.1, 0.1)) ==
+      predictionsMap1(Vectors.dense(5.0, 0.2)) )
+    assert(predictionsMap1(Vectors.dense(10.0, 0.0)) ==
+      predictionsMap1(Vectors.dense(15.0, 0.5)) )
+    assert(predictionsMap1(Vectors.dense(32.0, 18.0)) ==
+      predictionsMap1(Vectors.dense(30.1, 20.0)))
+    assert(predictionsMap1(Vectors.dense(-6.0, -6.0)) ==
+      predictionsMap1(Vectors.dense(-10.0, -10.0)))
+
+    model1.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+
+    // use same weight, should have the same result as no weight
+    val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Array(
+      (Vectors.dense(0.1, 0.1), 2.0),
+      (Vectors.dense(5.0, 0.2), 2.0),
+      (Vectors.dense(10.0, 0.0), 2.0),
+      (Vectors.dense(15.0, 0.5), 2.0),
+      (Vectors.dense(32.0, 18.0), 2.0),
+      (Vectors.dense(30.1, 20.0), 2.0),
+      (Vectors.dense(-6.0, -6.0), 2.0),
+      (Vectors.dense(-10.0, -10.0), 2.0)))).toDF("features", "weightCol")
+
+    val model2 = new KMeans()
+      .setK(4)
+      .setInitMode(MLlibKMeans.K_MEANS_PARALLEL)
+      .setWeightCol("weightCol")
+      .setMaxIter(10)
+      .fit(df2)
+
+    val predictionDf2 = model2.transform(df2)
+    assert(predictionDf2.select("prediction").distinct().count() == 4)
+    val predictionsMap2 = predictionDf2.collect().map(row =>
+      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+    assert(predictionsMap2(Vectors.dense(0.1, 0.1)) ==
+      predictionsMap2(Vectors.dense(5.0, 0.2)))
+    assert(predictionsMap2(Vectors.dense(10.0, 0.0)) ==
+      predictionsMap2(Vectors.dense(15.0, 0.5)))
+    assert(predictionsMap2(Vectors.dense(32.0, 18.0)) ==
+      predictionsMap2(Vectors.dense(30.1, 20.0)))
+    assert(predictionsMap2(Vectors.dense(-6.0, -6.0)) ==
+      predictionsMap2(Vectors.dense(-10.0, -10.0)))
+
+    model2.clusterCenters.forall(Vectors.norm(_, 2) == 1.0)
+
+    assert(model1.clusterCenters === model2.clusterCenters)
+  }
 }
 
 object KMeansSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
index d3b8575327a87..6b3970def12ec 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
@@ -161,14 +161,14 @@ class PowerIterationClusteringSuite extends SparkFunSuite
   }
 
   test("test default weight") {
-    val dataWithoutWeight = data.sample(0.5, 1L).select('src, 'dst)
+    val dataWithoutWeight = data.sample(0.5, 1L).select("src", "dst")
 
     val assignments = new PowerIterationClustering()
       .setK(2)
       .setMaxIter(40)
       .assignClusters(dataWithoutWeight)
     val localAssignments = assignments
-      .select('id, 'cluster)
+      .select("id", "cluster")
       .as[(Long, Int)].collect().toSet
 
     val dataWithWeightOne = dataWithoutWeight.withColumn("weight", lit(1.0))
@@ -178,7 +178,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite
       .setMaxIter(40)
       .assignClusters(dataWithWeightOne)
     val localAssignments2 = assignments2
-      .select('id, 'cluster)
+      .select("id", "cluster")
       .as[(Long, Int)].collect().toSet
 
     assert(localAssignments === localAssignments2)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala
index b41df1b798af6..5b5212abdf7cc 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluatorSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.evaluation
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -60,4 +61,23 @@ class MulticlassClassificationEvaluatorSuite
       .setMetricLabel(1.0)
     assert(evaluator.evaluate(predictionAndLabels) ~== 3.0 / 4 absTol 1e-5)
   }
+
+  test("MulticlassClassificationEvaluator support logloss") {
+    val labels = Seq(1.0, 2.0, 0.0, 1.0)
+    val probabilities = Seq(
+      Vectors.dense(0.1, 0.8, 0.1),
+      Vectors.dense(0.9, 0.05, 0.05),
+      Vectors.dense(0.8, 0.2, 0.0),
+      Vectors.dense(0.3, 0.65, 0.05))
+
+    val df = sc.parallelize(labels.zip(probabilities)).map {
+      case (label, probability) =>
+        val prediction = probability.argmax.toDouble
+        (prediction, label, probability)
+    }.toDF("prediction", "label", "probability")
+
+    val evaluator = new MulticlassClassificationEvaluator()
+      .setMetricName("logLoss")
+    assert(evaluator.evaluate(df) ~== 0.9682005730687164 absTol 1e-5)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
index c1a156959618e..f4f858c3e92dc 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
@@ -76,6 +76,10 @@ class RegressionEvaluatorSuite
     // mae
     evaluator.setMetricName("mae")
     assert(evaluator.evaluate(predictions) ~== 0.08399089 absTol 0.01)
+
+    // var
+    evaluator.setMetricName("var")
+    assert(evaluator.evaluate(predictions) ~== 63.6944519 absTol 0.01)
   }
 
   test("read/write") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
index 05d4a6ee2dabf..9baad52db00b3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
@@ -101,6 +101,20 @@ class BinarizerSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("Binarizer should support sparse vector with negative threshold") {
+    val data = Seq(
+      (Vectors.sparse(3, Array(1), Array(0.5)), Vectors.dense(Array(1.0, 1.0, 1.0))),
+      (Vectors.dense(Array(0.0, 0.5, 0.0)), Vectors.dense(Array(1.0, 1.0, 1.0))))
+    val df = data.toDF("feature", "expected")
+    val binarizer = new Binarizer()
+      .setInputCol("feature")
+      .setOutputCol("binarized_feature")
+      .setThreshold(-0.5)
+    binarizer.transform(df).select("binarized_feature", "expected").collect().foreach {
+      case Row(x: Vector, y: Vector) =>
+        assert(x == y, "The feature value is not correct after binarization.")
+    }
+  }
 
   test("read/write") {
     val t = new Binarizer()
@@ -108,5 +122,131 @@ class BinarizerSuite extends MLTest with DefaultReadWriteTest {
       .setOutputCol("myOutputCol")
       .setThreshold(0.1)
     testDefaultReadWrite(t)
+
+    val t2 = new Binarizer()
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("result1", "result2", "result3"))
+      .setThresholds(Array(30.0, 30.0, 30.0))
+    testDefaultReadWrite(t2)
+  }
+
+  test("Multiple Columns: Test thresholds") {
+    val thresholds = Array(10.0, -0.5, 0.0)
+
+    val data1 = Seq(5.0, 11.0)
+    val expected1 = Seq(0.0, 1.0)
+    val data2 = Seq(Vectors.sparse(3, Array(1), Array(0.5)),
+      Vectors.dense(Array(0.0, 0.5, 0.0)))
+    val expected2 = Seq(Vectors.dense(Array(1.0, 1.0, 1.0)),
+      Vectors.dense(Array(1.0, 1.0, 1.0)))
+    val data3 = Seq(0.0, 1.0)
+    val expected3 = Seq(0.0, 1.0)
+
+    val df = Seq(0, 1).map { idx =>
+      (data1(idx), data2(idx), data3(idx), expected1(idx), expected2(idx), expected3(idx))
+    }.toDF("input1", "input2", "input3", "expected1", "expected2", "expected3")
+
+    val binarizer = new Binarizer()
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("result1", "result2", "result3"))
+      .setThresholds(thresholds)
+
+    binarizer.transform(df)
+      .select("result1", "expected1", "result2", "expected2", "result3", "expected3")
+      .collect().foreach {
+      case Row(r1: Double, e1: Double, r2: Vector, e2: Vector, r3: Double, e3: Double) =>
+        assert(r1 === e1,
+          s"The result value is not correct after bucketing. Expected $e1 but found $r1")
+        assert(r2 === e2,
+          s"The result value is not correct after bucketing. Expected $e2 but found $r2")
+        assert(r3 === e3,
+          s"The result value is not correct after bucketing. Expected $e3 but found $r3")
+    }
+  }
+
+  test("Multiple Columns: Comparing setting threshold with setting thresholds " +
+    "explicitly with identical values") {
+    val data1 = Array.range(1, 21, 1).map(_.toDouble)
+    val data2 = Array.range(1, 40, 2).map(_.toDouble)
+    val data3 = Array.range(1, 60, 3).map(_.toDouble)
+    val df = (0 until 20).map { idx =>
+      (data1(idx), data2(idx), data3(idx))
+    }.toDF("input1", "input2", "input3")
+
+    val binarizerSingleThreshold = new Binarizer()
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("result1", "result2", "result3"))
+      .setThreshold(30.0)
+
+    val df2 = binarizerSingleThreshold.transform(df)
+
+    val binarizerMultiThreshold = new Binarizer()
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("expected1", "expected2", "expected3"))
+      .setThresholds(Array(30.0, 30.0, 30.0))
+
+    binarizerMultiThreshold.transform(df2)
+      .select("result1", "expected1", "result2", "expected2", "result3", "expected3")
+      .collect().foreach {
+      case Row(r1: Double, e1: Double, r2: Double, e2: Double, r3: Double, e3: Double) =>
+        assert(r1 === e1,
+          s"The result value is not correct after bucketing. Expected $e1 but found $r1")
+        assert(r2 === e2,
+          s"The result value is not correct after bucketing. Expected $e2 but found $r2")
+        assert(r3 === e3,
+          s"The result value is not correct after bucketing. Expected $e3 but found $r3")
+    }
+  }
+
+  test("Multiple Columns: Mismatched sizes of inputCols/outputCols") {
+    val binarizer = new Binarizer()
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("result1", "result2"))
+      .setThreshold(1.0)
+    val df = sc.parallelize(Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
+      .map(Tuple1.apply).toDF("input")
+    intercept[IllegalArgumentException] {
+      binarizer.transform(df).count()
+    }
+  }
+
+  test("Multiple Columns: Mismatched sizes of inputCols/thresholds") {
+    val binarizer = new Binarizer()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("result1", "result2"))
+      .setThresholds(Array(1.0, 2.0, 3.0))
+    val data1 = Array(1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0)
+    val data2 = Array(1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0)
+    val df = data1.zip(data2).toSeq.toDF("input1", "input2")
+    intercept[IllegalArgumentException] {
+      binarizer.transform(df).count()
+    }
+  }
+
+  test("Multiple Columns: Mismatched sizes of inputCol/thresholds") {
+    val binarizer = new Binarizer()
+      .setInputCol("input1")
+      .setOutputCol("result1")
+      .setThresholds(Array(1.0, 2.0))
+    val data1 = Array(1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0)
+    val data2 = Array(1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0)
+    val df = data1.zip(data2).toSeq.toDF("input1", "input2")
+    intercept[IllegalArgumentException] {
+      binarizer.transform(df).count()
+    }
+  }
+
+  test("Multiple Columns: Set both of threshold/thresholds") {
+    val binarizer = new Binarizer()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("result1", "result2"))
+      .setThresholds(Array(1.0, 2.0))
+      .setThreshold(1.0)
+    val data1 = Array(1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0)
+    val data2 = Array(1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0)
+    val df = data1.zip(data2).toSeq.toDF("input1", "input2")
+    intercept[IllegalArgumentException] {
+      binarizer.transform(df).count()
+    }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
index 985e396000d05..19645b517d79c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.ml.feature
 
-import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D
+import org.jtransforms.dct.DoubleDCT_1D
+import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
@@ -74,5 +75,24 @@ class DCTSuite extends MLTest with DefaultReadWriteTest {
       case Row(resultVec: Vector, wantedVec: Vector) =>
         assert(Vectors.sqdist(resultVec, wantedVec) < 1e-6)
     }
+
+    val vectorSize = dataset
+      .select("vec")
+      .map { case Row(vec: Vector) => vec.size }
+      .head()
+
+    // Can not infer size of ouput vector, since no metadata is provided
+    intercept[TestFailedException] {
+      val transformed = transformer.transform(dataset)
+      checkVectorSizeOnDF(transformed, "resultVec", vectorSize)
+    }
+
+    val dataset2 = new VectorSizeHint()
+      .setSize(vectorSize)
+      .setInputCol("vec")
+      .transform(dataset)
+
+    val transformed2 = transformer.transform(dataset2)
+    checkVectorSizeOnDF(transformed2, "resultVec", vectorSize)
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
index be70cf89cdb21..722302e5a165f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
@@ -89,7 +89,7 @@ class HashingTFSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("SPARK-23469: Load HashingTF prior to Spark 3.0") {
-    val hashingTFPath = testFile("test-data/hashingTF-pre3.0")
+    val hashingTFPath = testFile("ml-models/hashingTF-2.4.4")
     val loadedHashingTF = HashingTF.load(hashingTFPath)
     val mLlibHashingTF = new MLlibHashingTF(100)
     assert(loadedHashingTF.indexOf("a") === mLlibHashingTF.indexOf("a"))
@@ -99,7 +99,7 @@ class HashingTFSuite extends MLTest with DefaultReadWriteTest {
 
     val metadata = spark.read.json(s"$hashingTFPath/metadata")
     val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
-    assert(sparkVersionStr == "2.3.0-SNAPSHOT")
+    assert(sparkVersionStr == "2.4.4")
   }
 
   test("read/write") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
index 73b2b82daaf43..b4e144ea5ba5e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
@@ -68,6 +68,9 @@ class IDFSuite extends MLTest with DefaultReadWriteTest {
       .setOutputCol("idfValue")
     val idfModel = idfEst.fit(df)
 
+    val transformed = idfModel.transform(df)
+    checkVectorSizeOnDF(transformed, "idfValue", idfModel.idf.size)
+
     MLTestingUtils.checkCopyAndUids(idfEst, idfModel)
 
     testTransformer[(Vector, Vector)](df, idfModel, "idfValue", "expected") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
index 02ef261a6c067..dfee2b4029c8b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
@@ -17,6 +17,8 @@
 package org.apache.spark.ml.feature
 
 import org.apache.spark.SparkException
+import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.sql.{DataFrame, Row}
@@ -36,7 +38,31 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     val imputer = new Imputer()
       .setInputCols(Array("value1", "value2"))
       .setOutputCols(Array("out1", "out2"))
-    ImputerSuite.iterateStrategyTest(imputer, df)
+    ImputerSuite.iterateStrategyTest(true, imputer, df)
+  }
+
+  test("Single Column: Imputer for Double with default missing Value NaN") {
+    val df1 = spark.createDataFrame( Seq(
+      (0, 1.0, 1.0, 1.0),
+      (1, 11.0, 11.0, 11.0),
+      (2, 3.0, 3.0, 3.0),
+      (3, Double.NaN, 5.0, 3.0)
+    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val imputer1 = new Imputer()
+      .setInputCol("value")
+      .setOutputCol("out")
+    ImputerSuite.iterateStrategyTest(false, imputer1, df1)
+
+    val df2 = spark.createDataFrame( Seq(
+      (0, 4.0, 4.0, 4.0),
+      (1, 12.0, 12.0, 12.0),
+      (2, Double.NaN, 10.0, 12.0),
+      (3, 14.0, 14.0, 14.0)
+    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val imputer2 = new Imputer()
+      .setInputCol("value")
+      .setOutputCol("out")
+    ImputerSuite.iterateStrategyTest(false, imputer2, df2)
   }
 
   test("Imputer should handle NaNs when computing surrogate value, if missingValue is not NaN") {
@@ -48,7 +74,20 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     )).toDF("id", "value", "expected_mean_value", "expected_median_value")
     val imputer = new Imputer().setInputCols(Array("value")).setOutputCols(Array("out"))
       .setMissingValue(-1.0)
-    ImputerSuite.iterateStrategyTest(imputer, df)
+    ImputerSuite.iterateStrategyTest(true, imputer, df)
+  }
+
+  test("Single Column: Imputer should handle NaNs when computing surrogate value," +
+    " if missingValue is not NaN") {
+    val df = spark.createDataFrame( Seq(
+      (0, 1.0, 1.0, 1.0),
+      (1, 3.0, 3.0, 3.0),
+      (2, Double.NaN, Double.NaN, Double.NaN),
+      (3, -1.0, 2.0, 1.0)
+    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val imputer = new Imputer().setInputCol("value").setOutputCol("out")
+      .setMissingValue(-1.0)
+    ImputerSuite.iterateStrategyTest(false, imputer, df)
   }
 
   test("Imputer for Float with missing Value -1.0") {
@@ -61,7 +100,20 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     )).toDF("id", "value", "expected_mean_value", "expected_median_value")
     val imputer = new Imputer().setInputCols(Array("value")).setOutputCols(Array("out"))
       .setMissingValue(-1)
-    ImputerSuite.iterateStrategyTest(imputer, df)
+    ImputerSuite.iterateStrategyTest(true, imputer, df)
+  }
+
+  test("Single Column: Imputer for Float with missing Value -1.0") {
+    val df = spark.createDataFrame( Seq(
+      (0, 1.0F, 1.0F, 1.0F),
+      (1, 3.0F, 3.0F, 3.0F),
+      (2, 10.0F, 10.0F, 10.0F),
+      (3, 10.0F, 10.0F, 10.0F),
+      (4, -1.0F, 6.0F, 3.0F)
+    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val imputer = new Imputer().setInputCol("value").setOutputCol("out")
+      .setMissingValue(-1)
+    ImputerSuite.iterateStrategyTest(false, imputer, df)
   }
 
   test("Imputer should impute null as well as 'missingValue'") {
@@ -74,7 +126,20 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     )).toDF("id", "rawValue", "expected_mean_value", "expected_median_value")
     val df = rawDf.selectExpr("*", "IF(rawValue=-1.0, null, rawValue) as value")
     val imputer = new Imputer().setInputCols(Array("value")).setOutputCols(Array("out"))
-    ImputerSuite.iterateStrategyTest(imputer, df)
+    ImputerSuite.iterateStrategyTest(true, imputer, df)
+  }
+
+  test("Single Column: Imputer should impute null as well as 'missingValue'") {
+    val rawDf = spark.createDataFrame( Seq(
+      (0, 4.0, 4.0, 4.0),
+      (1, 10.0, 10.0, 10.0),
+      (2, 10.0, 10.0, 10.0),
+      (3, Double.NaN, 8.0, 10.0),
+      (4, -1.0, 8.0, 10.0)
+    )).toDF("id", "rawValue", "expected_mean_value", "expected_median_value")
+    val df = rawDf.selectExpr("*", "IF(rawValue=-1.0, null, rawValue) as value")
+    val imputer = new Imputer().setInputCol("value").setOutputCol("out")
+    ImputerSuite.iterateStrategyTest(false, imputer, df)
   }
 
   test("Imputer should work with Structured Streaming") {
@@ -99,6 +164,28 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("Single Column: Imputer should work with Structured Streaming") {
+    val localSpark = spark
+    import localSpark.implicits._
+    val df = Seq[(java.lang.Double, Double)](
+      (4.0, 4.0),
+      (10.0, 10.0),
+      (10.0, 10.0),
+      (Double.NaN, 8.0),
+      (null, 8.0)
+    ).toDF("value", "expected_mean_value")
+    val imputer = new Imputer()
+      .setInputCol("value")
+      .setOutputCol("out")
+      .setStrategy("mean")
+    val model = imputer.fit(df)
+    testTransformer[(java.lang.Double, Double)](df, model, "expected_mean_value", "out") {
+      case Row(exp: java.lang.Double, out: Double) =>
+        assert((exp.isNaN && out.isNaN) || (exp == out),
+          s"Imputed values differ. Expected: $exp, actual: $out")
+    }
+  }
+
   test("Imputer throws exception when surrogate cannot be computed") {
     val df = spark.createDataFrame( Seq(
       (0, Double.NaN, 1.0, 1.0),
@@ -117,6 +204,24 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("Single Column: Imputer throws exception when surrogate cannot be computed") {
+    val df = spark.createDataFrame( Seq(
+      (0, Double.NaN, 1.0, 1.0),
+      (1, Double.NaN, 3.0, 3.0),
+      (2, Double.NaN, Double.NaN, Double.NaN)
+    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    Seq("mean", "median").foreach { strategy =>
+      val imputer = new Imputer().setInputCol("value").setOutputCol("out")
+        .setStrategy(strategy)
+      withClue("Imputer should fail all the values are invalid") {
+        val e: SparkException = intercept[SparkException] {
+          val model = imputer.fit(df)
+        }
+        assert(e.getMessage.contains("surrogate cannot be computed"))
+      }
+    }
+  }
+
   test("Imputer input & output column validation") {
     val df = spark.createDataFrame( Seq(
       (0, 1.0, 1.0, 1.0),
@@ -164,6 +269,14 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     testDefaultReadWrite(t)
   }
 
+  test("Single Column: Imputer read/write") {
+    val t = new Imputer()
+      .setInputCol("myInputCol")
+      .setOutputCol("myOutputCol")
+      .setMissingValue(-1.0)
+    testDefaultReadWrite(t)
+  }
+
   test("ImputerModel read/write") {
     val spark = this.spark
     import spark.implicits._
@@ -178,6 +291,20 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     assert(newInstance.surrogateDF.collect() === instance.surrogateDF.collect())
   }
 
+  test("Single Column: ImputerModel read/write") {
+    val spark = this.spark
+    import spark.implicits._
+    val surrogateDF = Seq(1.234).toDF("myInputCol")
+
+    val instance = new ImputerModel(
+      "myImputer", surrogateDF)
+      .setInputCol("myInputCol")
+      .setOutputCol("myOutputCol")
+    val newInstance = testDefaultReadWrite(instance)
+    assert(newInstance.surrogateDF.columns === instance.surrogateDF.columns)
+    assert(newInstance.surrogateDF.collect() === instance.surrogateDF.collect())
+  }
+
   test("Imputer for IntegerType with default missing value null") {
 
     val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)](
@@ -195,7 +322,27 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     for (mType <- types) {
       // cast all columns to desired data type for testing
       val df2 = df.select(df.columns.map(c => col(c).cast(mType)): _*)
-      ImputerSuite.iterateStrategyTest(imputer, df2)
+      ImputerSuite.iterateStrategyTest(true, imputer, df2)
+    }
+  }
+
+  test("Single Column Imputer for IntegerType with default missing value null") {
+    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)](
+      (1, 1, 1),
+      (11, 11, 11),
+      (3, 3, 3),
+      (null, 5, 3)
+    )).toDF("value", "expected_mean_value", "expected_median_value")
+
+    val imputer = new Imputer()
+      .setInputCol("value")
+      .setOutputCol("out")
+
+    val types = Seq(IntegerType, LongType)
+    for (mType <- types) {
+      // cast all columns to desired data type for testing
+      val df2 = df.select(df.columns.map(c => col(c).cast(mType)): _*)
+      ImputerSuite.iterateStrategyTest(false, imputer, df2)
     }
   }
 
@@ -217,7 +364,85 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
     for (mType <- types) {
       // cast all columns to desired data type for testing
       val df2 = df.select(df.columns.map(c => col(c).cast(mType)): _*)
-      ImputerSuite.iterateStrategyTest(imputer, df2)
+      ImputerSuite.iterateStrategyTest(true, imputer, df2)
+    }
+  }
+
+  test("Single Column: Imputer for IntegerType with missing value -1") {
+    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)](
+      (1, 1, 1),
+      (11, 11, 11),
+      (3, 3, 3),
+      (-1, 5, 3)
+    )).toDF("value", "expected_mean_value", "expected_median_value")
+
+    val imputer = new Imputer()
+      .setInputCol("value")
+      .setOutputCol("out")
+      .setMissingValue(-1.0)
+
+    val types = Seq(IntegerType, LongType)
+    for (mType <- types) {
+      // cast all columns to desired data type for testing
+      val df2 = df.select(df.columns.map(c => col(c).cast(mType)): _*)
+      ImputerSuite.iterateStrategyTest(false, imputer, df2)
+    }
+  }
+
+  test("assert exception is thrown if both multi-column and single-column params are set") {
+    import testImplicits._
+    val df = Seq((0.5, 0.3), (0.5, -0.4)).toDF("feature1", "feature2")
+    ParamsSuite.testExclusiveParams(new Imputer, df, ("inputCol", "feature1"),
+      ("inputCols", Array("feature1", "feature2")))
+    ParamsSuite.testExclusiveParams(new Imputer, df, ("inputCol", "feature1"),
+      ("outputCol", "result1"), ("outputCols", Array("result1", "result2")))
+
+    // this should fail because at least one of inputCol and inputCols must be set
+    ParamsSuite.testExclusiveParams(new Imputer, df, ("outputCol", "feature1"))
+  }
+
+  test("Compare single/multiple column(s) Imputer in pipeline") {
+    val df = spark.createDataFrame( Seq(
+      (0, 1.0, 4.0),
+      (1, 11.0, 12.0),
+      (2, 3.0, Double.NaN),
+      (3, Double.NaN, 14.0)
+    )).toDF("id", "value1", "value2")
+    Seq("mean", "median").foreach { strategy =>
+      val multiColsImputer = new Imputer()
+        .setInputCols(Array("value1", "value2"))
+        .setOutputCols(Array("result1", "result2"))
+        .setStrategy(strategy)
+
+      val plForMultiCols = new Pipeline()
+        .setStages(Array(multiColsImputer))
+        .fit(df)
+
+      val imputerForCol1 = new Imputer()
+        .setInputCol("value1")
+        .setOutputCol("result1")
+        .setStrategy(strategy)
+      val imputerForCol2 = new Imputer()
+        .setInputCol("value2")
+        .setOutputCol("result2")
+        .setStrategy(strategy)
+
+      val plForSingleCol = new Pipeline()
+        .setStages(Array(imputerForCol1, imputerForCol2))
+        .fit(df)
+
+      val resultForSingleCol = plForSingleCol.transform(df)
+        .select("result1", "result2")
+        .collect()
+      val resultForMultiCols = plForMultiCols.transform(df)
+        .select("result1", "result2")
+        .collect()
+
+      resultForSingleCol.zip(resultForMultiCols).foreach {
+        case (rowForSingle, rowForMultiCols) =>
+          assert(rowForSingle.getDouble(0) == rowForMultiCols.getDouble(0) &&
+            rowForSingle.getDouble(1) == rowForMultiCols.getDouble(1))
+      }
     }
   }
 }
@@ -228,34 +453,45 @@ object ImputerSuite {
    * Imputation strategy. Available options are ["mean", "median"].
    * @param df DataFrame with columns "id", "value", "expected_mean", "expected_median"
    */
-  def iterateStrategyTest(imputer: Imputer, df: DataFrame): Unit = {
+  def iterateStrategyTest(isMultiCol: Boolean, imputer: Imputer, df: DataFrame): Unit = {
     Seq("mean", "median").foreach { strategy =>
       imputer.setStrategy(strategy)
       val model = imputer.fit(df)
       val resultDF = model.transform(df)
-      imputer.getInputCols.zip(imputer.getOutputCols).foreach { case (inputCol, outputCol) =>
-
-        // check dataType is consistent between input and output
-        val inputType = resultDF.schema(inputCol).dataType
-        val outputType = resultDF.schema(outputCol).dataType
-        assert(inputType == outputType, "Output type is not the same as input type.")
-
-        // check value
-        resultDF.select(s"expected_${strategy}_$inputCol", outputCol).collect().foreach {
-          case Row(exp: Float, out: Float) =>
-            assert((exp.isNaN && out.isNaN) || (exp == out),
-              s"Imputed values differ. Expected: $exp, actual: $out")
-          case Row(exp: Double, out: Double) =>
-            assert((exp.isNaN && out.isNaN) || (exp ~== out absTol 1e-5),
-              s"Imputed values differ. Expected: $exp, actual: $out")
-          case Row(exp: Integer, out: Integer) =>
-            assert(exp == out,
-              s"Imputed values differ. Expected: $exp, actual: $out")
-          case Row(exp: Long, out: Long) =>
-            assert(exp == out,
-              s"Imputed values differ. Expected: $exp, actual: $out")
+      if (isMultiCol) {
+        imputer.getInputCols.zip(imputer.getOutputCols).foreach { case (inputCol, outputCol) =>
+          verifyTransformResult(strategy, inputCol, outputCol, resultDF)
         }
+      } else {
+          verifyTransformResult(strategy, imputer.getInputCol, imputer.getOutputCol, resultDF)
       }
     }
   }
+
+  def verifyTransformResult(
+      strategy: String,
+      inputCol: String,
+      outputCol: String,
+      resultDF: DataFrame): Unit = {
+    // check dataType is consistent between input and output
+    val inputType = resultDF.schema(inputCol).dataType
+    val outputType = resultDF.schema(outputCol).dataType
+    assert(inputType == outputType, "Output type is not the same as input type.")
+
+    // check value
+    resultDF.select(s"expected_${strategy}_$inputCol", outputCol).collect().foreach {
+      case Row(exp: Float, out: Float) =>
+        assert((exp.isNaN && out.isNaN) || (exp == out),
+          s"Imputed values differ. Expected: $exp, actual: $out")
+      case Row(exp: Double, out: Double) =>
+        assert((exp.isNaN && out.isNaN) || (exp ~== out absTol 1e-5),
+          s"Imputed values differ. Expected: $exp, actual: $out")
+      case Row(exp: Integer, out: Integer) =>
+        assert(exp == out,
+          s"Imputed values differ. Expected: $exp, actual: $out")
+      case Row(exp: Long, out: Long) =>
+        assert(exp == out,
+          s"Imputed values differ. Expected: $exp, actual: $out")
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
index db4f56ed60d32..76a4acd798e34 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml.feature
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.util.{MLTestingUtils, SchemaUtils}
 import org.apache.spark.sql.Dataset
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
index 8dd0f0cb91e37..5de938fa40c4d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
@@ -44,6 +44,9 @@ class MaxAbsScalerSuite extends MLTest with DefaultReadWriteTest {
       .setOutputCol("scaled")
 
     val model = scaler.fit(df)
+    val transformed = model.transform(df)
+    checkVectorSizeOnDF(transformed, "scaled", model.maxAbs.size)
+
     testTransformer[(Vector, Vector)](df, model, "expected", "scaled") {
       case Row(expectedVec: Vector, actualVec: Vector) =>
         assert(expectedVec === actualVec,
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
index 2d965f2ca2c54..9b2b0c48f4f61 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
@@ -46,6 +46,9 @@ class MinMaxScalerSuite extends MLTest with DefaultReadWriteTest {
       .setMax(5)
 
     val model = scaler.fit(df)
+    val transformed = model.transform(df)
+    checkVectorSizeOnDF(transformed, "scaled", model.originalMin.size)
+
     testTransformer[(Vector, Vector)](df, model, "expected", "scaled") {
       case Row(vector1: Vector, vector2: Vector) =>
         assert(vector1 === vector2, "Transformed vector is different with expected.")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
index eff57f1223af4..d97df0050d74e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml.feature
 
+import org.scalatest.exceptions.TestFailedException
+
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.ml.util.TestingUtils._
@@ -81,6 +83,22 @@ class NormalizerSuite extends MLTest with DefaultReadWriteTest {
         assertTypeOfVector(normalized, features)
         assertValues(normalized, expected)
     }
+
+    val vectorSize = data.head.size
+
+    // Can not infer size of output vector, since no metadata is provided
+    intercept[TestFailedException] {
+      val transformed = normalizer.transform(dataFrame)
+      checkVectorSizeOnDF(transformed, "normalized", vectorSize)
+    }
+
+    val dataFrame2 = new VectorSizeHint()
+      .setSize(vectorSize)
+      .setInputCol("features")
+      .transform(dataFrame)
+
+    val transformed2 = normalizer.transform(dataFrame2)
+    checkVectorSizeOnDF(transformed2, "normalized", vectorSize)
   }
 
   test("Normalization with setter") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
index 70f8c029a2575..897251d9815c8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.ml.feature
 
+import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.attribute.{AttributeGroup, BinaryAttribute, NominalAttribute}
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param.ParamsSuite
@@ -62,6 +63,34 @@ class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("Single Column: OneHotEncoder dropLast = false") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+      StructField("input", DoubleType),
+      StructField("expected", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder = new OneHotEncoder()
+      .setInputCol("input")
+      .setOutputCol("output")
+    assert(encoder.getDropLast)
+    encoder.setDropLast(false)
+    assert(encoder.getDropLast === false)
+    val model = encoder.fit(df)
+    testTransformer[(Double, Vector)](df, model, "output", "expected") {
+      case Row(output: Vector, expected: Vector) =>
+        assert(output === expected)
+    }
+  }
+
   test("OneHotEncoder dropLast = true") {
     val data = Seq(
       Row(0.0, Vectors.sparse(2, Seq((0, 1.0)))),
@@ -104,6 +133,22 @@ class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("Single Column: input column with ML attribute") {
+    val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large")
+    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("size")
+      .select(col("size").as("size", attr.toMetadata()))
+    val encoder = new OneHotEncoder()
+      .setInputCol("size")
+      .setOutputCol("encoded")
+    val model = encoder.fit(df)
+    testTransformerByGlobalCheckFunc[(Double)](df, model, "encoded") { rows =>
+      val group = AttributeGroup.fromStructField(rows.head.schema("encoded"))
+      assert(group.size === 2)
+      assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("small").withIndex(0))
+      assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("medium").withIndex(1))
+    }
+  }
+
   test("input column without ML attribute") {
     val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("index")
     val encoder = new OneHotEncoder()
@@ -125,6 +170,13 @@ class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
     testDefaultReadWrite(encoder)
   }
 
+  test("Single Column: read/write") {
+    val encoder = new OneHotEncoder()
+      .setInputCol("index")
+      .setOutputCol("encoded")
+    testDefaultReadWrite(encoder)
+  }
+
   test("OneHotEncoderModel read/write") {
     val instance = new OneHotEncoderModel("myOneHotEncoderModel", Array(1, 2, 3))
     val newInstance = testDefaultReadWrite(instance)
@@ -173,6 +225,48 @@ class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("Single Column: OneHotEncoder with varying types") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+      StructField("input", DoubleType),
+      StructField("expected", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    class NumericTypeWithEncoder[A](val numericType: NumericType)
+                                   (implicit val encoder: Encoder[(A, Vector)])
+
+    val types = Seq(
+      new NumericTypeWithEncoder[Short](ShortType),
+      new NumericTypeWithEncoder[Long](LongType),
+      new NumericTypeWithEncoder[Int](IntegerType),
+      new NumericTypeWithEncoder[Float](FloatType),
+      new NumericTypeWithEncoder[Byte](ByteType),
+      new NumericTypeWithEncoder[Double](DoubleType),
+      new NumericTypeWithEncoder[Decimal](DecimalType(10, 0))(ExpressionEncoder()))
+
+    for (t <- types) {
+      val dfWithTypes = df.select(col("input").cast(t.numericType), col("expected"))
+      val estimator = new OneHotEncoder()
+        .setInputCol("input")
+        .setOutputCol("output")
+        .setDropLast(false)
+
+      val model = estimator.fit(dfWithTypes)
+      testTransformer(dfWithTypes, model, "output", "expected") {
+        case Row(output: Vector, expected: Vector) =>
+          assert(output === expected)
+      }(t.encoder)
+    }
+  }
+
   test("OneHotEncoder: encoding multiple columns and dropLast = false") {
     val data = Seq(
       Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 2.0, Vectors.sparse(4, Seq((2, 1.0)))),
@@ -211,6 +305,58 @@ class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("Single Column: OneHotEncoder: encoding multiple columns and dropLast = false") {
+    val data = Seq(
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 2.0, Vectors.sparse(4, Seq((2, 1.0)))),
+      Row(1.0, Vectors.sparse(3, Seq((1, 1.0))), 3.0, Vectors.sparse(4, Seq((3, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), 0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 1.0, Vectors.sparse(4, Seq((1, 1.0)))),
+      Row(0.0, Vectors.sparse(3, Seq((0, 1.0))), 0.0, Vectors.sparse(4, Seq((0, 1.0)))),
+      Row(2.0, Vectors.sparse(3, Seq((2, 1.0))), 2.0, Vectors.sparse(4, Seq((2, 1.0)))))
+
+    val schema = StructType(Array(
+      StructField("input1", DoubleType),
+      StructField("expected1", new VectorUDT),
+      StructField("input2", DoubleType),
+      StructField("expected2", new VectorUDT)))
+
+    val df = spark.createDataFrame(sc.parallelize(data), schema)
+
+    val encoder1 = new OneHotEncoder()
+      .setInputCol("input1")
+      .setOutputCol("output1")
+    assert(encoder1.getDropLast)
+    encoder1.setDropLast(false)
+    assert(encoder1.getDropLast === false)
+
+    val model1 = encoder1.fit(df)
+    testTransformer[(Double, Vector, Double, Vector)](
+      df,
+      model1,
+      "output1",
+      "expected1") {
+      case Row(output1: Vector, expected1: Vector) =>
+        assert(output1 === expected1)
+    }
+
+    val encoder2 = new OneHotEncoder()
+      .setInputCol("input2")
+      .setOutputCol("output2")
+    assert(encoder2.getDropLast)
+    encoder2.setDropLast(false)
+    assert(encoder2.getDropLast === false)
+
+    val model2 = encoder2.fit(df)
+    testTransformer[(Double, Vector, Double, Vector)](
+      df,
+      model2,
+      "output2",
+      "expected2") {
+      case Row(output2: Vector, expected2: Vector) =>
+        assert(output2 === expected2)
+    }
+  }
+
   test("OneHotEncoder: encoding multiple columns and dropLast = true") {
     val data = Seq(
       Row(0.0, Vectors.sparse(2, Seq((0, 1.0))), 2.0, Vectors.sparse(3, Seq((2, 1.0)))),
@@ -419,4 +565,52 @@ class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
       expectedMessagePart = "OneHotEncoderModel expected 2 categorical values",
       firstResultCol = "encoded")
   }
+
+  test("assert exception is thrown if both multi-column and single-column params are set") {
+    import testImplicits._
+    val df = Seq((0.5, 0.3), (0.5, -0.4)).toDF("feature1", "feature2")
+    ParamsSuite.testExclusiveParams(new OneHotEncoder, df, ("inputCol", "feature1"),
+      ("inputCols", Array("feature1", "feature2")))
+    ParamsSuite.testExclusiveParams(new OneHotEncoder, df, ("inputCol", "feature1"),
+      ("outputCol", "result1"), ("outputCols", Array("result1", "result2")))
+
+    // this should fail because at least one of inputCol and inputCols must be set
+    ParamsSuite.testExclusiveParams(new OneHotEncoder, df, ("outputCol", "feature1"))
+  }
+
+  test("Compare single/multiple column(s) OneHotEncoder in pipeline") {
+    val df = Seq((0.0, 2.0), (1.0, 3.0), (2.0, 0.0), (0.0, 1.0), (0.0, 0.0), (2.0, 2.0))
+      .toDF("input1", "input2")
+
+    val multiColsEncoder = new OneHotEncoder()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("output1", "output2"))
+
+    val plForMultiCols = new Pipeline()
+      .setStages(Array(multiColsEncoder))
+      .fit(df)
+
+    val encoderForCol1 = new OneHotEncoder()
+      .setInputCol("input1")
+      .setOutputCol("output1")
+    val encoderForCol2 = new OneHotEncoder()
+      .setInputCol("input2")
+      .setOutputCol("output2")
+
+    val plForSingleCol = new Pipeline()
+      .setStages(Array(encoderForCol1, encoderForCol2))
+      .fit(df)
+
+    val resultForSingleCol = plForSingleCol.transform(df)
+      .select("output1", "output2")
+      .collect()
+    val resultForMultiCols = plForMultiCols.transform(df)
+      .select("output1", "output2")
+      .collect()
+
+    resultForSingleCol.zip(resultForMultiCols).foreach {
+      case (rowForSingle, rowForMultiCols) =>
+        assert(rowForSingle === rowForMultiCols)
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
index 531b1d7c4d9f7..88c9867337e7c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
@@ -58,6 +58,8 @@ class PCASuite extends MLTest with DefaultReadWriteTest {
       .setK(3)
 
     val pcaModel = pca.fit(df)
+    val transformed = pcaModel.transform(df)
+    checkVectorSizeOnDF(transformed, "pca_features", pcaModel.getK)
 
     MLTestingUtils.checkCopyAndUids(pca, pcaModel)
     testTransformer[(Vector, Vector)](df, pcaModel, "pca_features", "expected") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
index ae086d32d6d0b..6f6ab26cbac43 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.feature
 
 import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.sql._
 
@@ -423,33 +424,92 @@ class QuantileDiscretizerSuite extends MLTest with DefaultReadWriteTest {
     assert(readDiscretizer.hasDefault(readDiscretizer.outputCol))
   }
 
-  test("Multiple Columns: Both inputCol and inputCols are set") {
+  test("Multiple Columns: Mismatched sizes of inputCols/outputCols") {
     val spark = this.spark
     import spark.implicits._
     val discretizer = new QuantileDiscretizer()
-      .setInputCol("input")
-      .setOutputCol("result")
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("result1", "result2"))
       .setNumBuckets(3)
-      .setInputCols(Array("input1", "input2"))
     val df = sc.parallelize(Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
       .map(Tuple1.apply).toDF("input")
-    // When both inputCol and inputCols are set, we throw Exception.
     intercept[IllegalArgumentException] {
       discretizer.fit(df)
     }
   }
 
-  test("Multiple Columns: Mismatched sizes of inputCols / outputCols") {
+  test("Multiple Columns: Mismatched sizes of inputCols/numBucketsArray") {
     val spark = this.spark
     import spark.implicits._
     val discretizer = new QuantileDiscretizer()
-      .setInputCols(Array("input"))
+      .setInputCols(Array("input1", "input2"))
       .setOutputCols(Array("result1", "result2"))
-      .setNumBuckets(3)
+      .setNumBucketsArray(Array(2, 5, 10))
+    val data1 = Array(1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0)
+    val data2 = Array(1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0)
+    val df = data1.zip(data2).toSeq.toDF("input1", "input2")
+    intercept[IllegalArgumentException] {
+      discretizer.fit(df)
+    }
+  }
+
+  test("Multiple Columns: Set both of numBuckets/numBucketsArray") {
+    val spark = this.spark
+    import spark.implicits._
+    val discretizer = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("result1", "result2"))
+      .setNumBucketsArray(Array(2, 5))
+      .setNumBuckets(2)
+    val data1 = Array(1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0)
+    val data2 = Array(1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0)
+    val df = data1.zip(data2).toSeq.toDF("input1", "input2")
+    intercept[IllegalArgumentException] {
+      discretizer.fit(df)
+    }
+  }
+
+  test("Setting numBucketsArray for Single-Column QuantileDiscretizer") {
+    val spark = this.spark
+    import spark.implicits._
+    val discretizer = new QuantileDiscretizer()
+      .setInputCol("input")
+      .setOutputCol("result")
+      .setNumBucketsArray(Array(2, 5))
     val df = sc.parallelize(Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
       .map(Tuple1.apply).toDF("input")
     intercept[IllegalArgumentException] {
       discretizer.fit(df)
     }
   }
+
+  test("Assert exception is thrown if both multi-column and single-column params are set") {
+    val spark = this.spark
+    import spark.implicits._
+    val df = Seq((0.5, 0.3), (0.5, -0.4)).toDF("feature1", "feature2")
+    ParamsSuite.testExclusiveParams(new QuantileDiscretizer, df, ("inputCol", "feature1"),
+      ("inputCols", Array("feature1", "feature2")))
+    ParamsSuite.testExclusiveParams(new QuantileDiscretizer, df, ("inputCol", "feature1"),
+      ("outputCol", "result1"), ("outputCols", Array("result1", "result2")))
+    // this should fail because at least one of inputCol and inputCols must be set
+    ParamsSuite.testExclusiveParams(new QuantileDiscretizer, df, ("outputCol", "feature1"))
+  }
+
+  test("Setting inputCol without setting outputCol") {
+    val spark = this.spark
+    import spark.implicits._
+
+    val df = sc.parallelize(Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
+      .map(Tuple1.apply).toDF("input")
+    val numBuckets = 2
+    val discretizer = new QuantileDiscretizer()
+      .setInputCol("input")
+      .setNumBuckets(numBuckets)
+    val model = discretizer.fit(df)
+    val result = model.transform(df)
+
+    val observedNumBuckets = result.select(discretizer.getOutputCol).distinct.count
+    assert(observedNumBuckets === numBuckets,
+      "Observed number of buckets does not equal expected number of buckets.")
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala
index add1cc17ea057..efd56f7073a19 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala
@@ -25,7 +25,7 @@ class RFormulaParserSuite extends SparkFunSuite {
       formula: String,
       label: String,
       terms: Seq[String],
-      schema: StructType = new StructType) {
+      schema: StructType = new StructType): Unit = {
     val resolved = RFormulaParser.parse(formula).resolve(schema)
     assert(resolved.label == label)
     val simpleTerms = terms.map { t =>
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RobustScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/RobustScalerSuite.scala
index 335f144e748e4..6f24b9db3b3eb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/RobustScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/RobustScalerSuite.scala
@@ -31,6 +31,10 @@ class RobustScalerSuite extends MLTest with DefaultReadWriteTest {
   @transient var resWithScaling: Array[Vector] = _
   @transient var resWithCentering: Array[Vector] = _
   @transient var resWithBoth: Array[Vector] = _
+  @transient var dataWithNaN: Array[Vector] = _
+  @transient var resWithNaN: Array[Vector] = _
+  @transient var highDimData: Array[Vector] = _
+  @transient var highDimRes: Array[Vector] = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
@@ -112,6 +116,44 @@ class RobustScalerSuite extends MLTest with DefaultReadWriteTest {
       Vectors.dense(0.5, -0.5),
       Vectors.dense(1.0, -1.0)
     )
+
+    dataWithNaN = Array(
+      Vectors.dense(0.0, Double.NaN),
+      Vectors.dense(Double.NaN, 0.0),
+      Vectors.dense(1.0, -1.0),
+      Vectors.dense(2.0, -2.0),
+      Vectors.dense(3.0, -3.0),
+      Vectors.dense(4.0, -4.0)
+    )
+
+    resWithNaN = Array(
+      Vectors.dense(0.0, Double.NaN),
+      Vectors.dense(Double.NaN, 0.0),
+      Vectors.dense(0.5, -0.5),
+      Vectors.dense(1.0, -1.0),
+      Vectors.dense(1.5, -1.5),
+      Vectors.dense(2.0, -2.0)
+    )
+
+    // median = [2.0, ...]
+    // 1st quartile = [1.0, ...]
+    // 3st quartile = [3.0, ...]
+    // quantile range = IQR = [2.0, ...]
+    highDimData = Array(
+      Vectors.dense(Array.fill(2000)(0.0)),
+      Vectors.dense(Array.fill(2000)(1.0)),
+      Vectors.dense(Array.fill(2000)(2.0)),
+      Vectors.dense(Array.fill(2000)(3.0)),
+      Vectors.dense(Array.fill(2000)(4.0))
+    )
+
+    highDimRes = Array(
+      Vectors.dense(Array.fill(2000)(0.0)),
+      Vectors.dense(Array.fill(2000)(0.5)),
+      Vectors.dense(Array.fill(2000)(1.0)),
+      Vectors.dense(Array.fill(2000)(1.5)),
+      Vectors.dense(Array.fill(2000)(2.0))
+    )
   }
 
 
@@ -189,6 +231,32 @@ class RobustScalerSuite extends MLTest with DefaultReadWriteTest {
       assertResult)
   }
 
+  test("deal with NaN values") {
+    val df0 = dataWithNaN.zip(resWithNaN).toSeq.toDF("features", "expected")
+
+    val robustScalerEst0 = new RobustScaler()
+      .setInputCol("features")
+      .setOutputCol("scaled_features")
+    val robustScaler0 = robustScalerEst0.fit(df0)
+    MLTestingUtils.checkCopyAndUids(robustScalerEst0, robustScaler0)
+
+    testTransformer[(Vector, Vector)](df0, robustScaler0, "scaled_features", "expected")(
+      assertResult)
+  }
+
+  test("deal with high-dim dataset") {
+    val df0 = highDimData.zip(highDimRes).toSeq.toDF("features", "expected")
+
+    val robustScalerEst0 = new RobustScaler()
+      .setInputCol("features")
+      .setOutputCol("scaled_features")
+    val robustScaler0 = robustScalerEst0.fit(df0)
+    MLTestingUtils.checkCopyAndUids(robustScalerEst0, robustScaler0)
+
+    testTransformer[(Vector, Vector)](df0, robustScaler0, "scaled_features", "expected")(
+      assertResult)
+  }
+
   test("RobustScaler read/write") {
     val t = new RobustScaler()
       .setInputCol("myInputCol")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
index 6d0b83e85733e..dc6fb31a1f8e4 100755
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.feature
 
 import java.util.Locale
 
+import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.sql.{DataFrame, Row}
 
@@ -181,12 +182,19 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("read/write") {
-    val t = new StopWordsRemover()
+    val t1 = new StopWordsRemover()
       .setInputCol("myInputCol")
       .setOutputCol("myOutputCol")
       .setStopWords(Array("the", "a"))
       .setCaseSensitive(true)
-    testDefaultReadWrite(t)
+    testDefaultReadWrite(t1)
+
+    val t2 = new StopWordsRemover()
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("result1", "result2", "result3"))
+      .setStopWords(Array("the", "a"))
+      .setCaseSensitive(true)
+    testDefaultReadWrite(t2)
   }
 
   test("StopWordsRemover output column already exists") {
@@ -199,7 +207,7 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest {
     testTransformerByInterceptingException[(Array[String], Array[String])](
       dataSet,
       remover,
-      s"requirement failed: Column $outputCol already exists.",
+      s"requirement failed: Output Column $outputCol already exists.",
       "expected")
   }
 
@@ -217,4 +225,123 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest {
       Locale.setDefault(oldDefault)
     }
   }
+
+  test("Multiple Columns: StopWordsRemover default") {
+    val remover = new StopWordsRemover()
+      .setInputCols(Array("raw1", "raw2"))
+      .setOutputCols(Array("filtered1", "filtered2"))
+    val df = Seq(
+      (Seq("test", "test"), Seq("test1", "test2"), Seq("test", "test"), Seq("test1", "test2")),
+      (Seq("a", "b", "c", "d"), Seq("a", "b"), Seq("b", "c", "d"), Seq("b")),
+      (Seq("a", "the", "an"), Seq("the", "an"), Seq(), Seq()),
+      (Seq("A", "The", "AN"), Seq("A", "The"), Seq(), Seq()),
+      (Seq(null), Seq(null), Seq(null), Seq(null)),
+      (Seq(), Seq(), Seq(), Seq())
+    ).toDF("raw1", "raw2", "expected1", "expected2")
+
+    remover.transform(df)
+      .select("filtered1", "expected1", "filtered2", "expected2")
+      .collect().foreach {
+        case Row(r1: Seq[_], e1: Seq[_], r2: Seq[_], e2: Seq[_]) =>
+          assert(r1 === e1,
+            s"The result value is not correct after bucketing. Expected $e1 but found $r1")
+          assert(r2 === e2,
+            s"The result value is not correct after bucketing. Expected $e2 but found $r2")
+    }
+  }
+
+  test("Multiple Columns: StopWordsRemover with particular stop words list") {
+    val stopWords = Array("test", "a", "an", "the")
+    val remover = new StopWordsRemover()
+      .setInputCols(Array("raw1", "raw2"))
+      .setOutputCols(Array("filtered1", "filtered2"))
+      .setStopWords(stopWords)
+    val df = Seq(
+      (Seq("test", "test"), Seq("test1", "test2"), Seq(), Seq("test1", "test2")),
+      (Seq("a", "b", "c", "d"), Seq("a", "b"), Seq("b", "c", "d"), Seq("b")),
+      (Seq("a", "the", "an"), Seq("a", "the", "test1"), Seq(), Seq("test1")),
+      (Seq("A", "The", "AN"), Seq("A", "The", "AN"), Seq(), Seq()),
+      (Seq(null), Seq(null), Seq(null), Seq(null)),
+      (Seq(), Seq(), Seq(), Seq())
+    ).toDF("raw1", "raw2", "expected1", "expected2")
+
+    remover.transform(df)
+      .select("filtered1", "expected1", "filtered2", "expected2")
+      .collect().foreach {
+        case Row(r1: Seq[_], e1: Seq[_], r2: Seq[_], e2: Seq[_]) =>
+          assert(r1 === e1,
+            s"The result value is not correct after bucketing. Expected $e1 but found $r1")
+          assert(r2 === e2,
+            s"The result value is not correct after bucketing. Expected $e2 but found $r2")
+    }
+  }
+
+  test("Compare single/multiple column(s) StopWordsRemover in pipeline") {
+    val df = Seq(
+      (Seq("test", "test"), Seq("test1", "test2")),
+      (Seq("a", "b", "c", "d"), Seq("a", "b")),
+      (Seq("a", "the", "an"), Seq("a", "the", "test1")),
+      (Seq("A", "The", "AN"), Seq("A", "The", "AN")),
+      (Seq(null), Seq(null)),
+      (Seq(), Seq())
+    ).toDF("input1", "input2")
+
+    val multiColsRemover = new StopWordsRemover()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("output1", "output2"))
+
+    val plForMultiCols = new Pipeline()
+      .setStages(Array(multiColsRemover))
+      .fit(df)
+
+    val removerForCol1 = new StopWordsRemover()
+      .setInputCol("input1")
+      .setOutputCol("output1")
+    val removerForCol2 = new StopWordsRemover()
+      .setInputCol("input2")
+      .setOutputCol("output2")
+
+    val plForSingleCol = new Pipeline()
+      .setStages(Array(removerForCol1, removerForCol2))
+      .fit(df)
+
+    val resultForSingleCol = plForSingleCol.transform(df)
+      .select("output1", "output2")
+      .collect()
+    val resultForMultiCols = plForMultiCols.transform(df)
+      .select("output1", "output2")
+      .collect()
+
+    resultForSingleCol.zip(resultForMultiCols).foreach {
+      case (rowForSingle, rowForMultiCols) =>
+        assert(rowForSingle === rowForMultiCols)
+    }
+  }
+
+  test("Multiple Columns: Mismatched sizes of inputCols/outputCols") {
+    val remover = new StopWordsRemover()
+      .setInputCols(Array("input1"))
+      .setOutputCols(Array("result1", "result2"))
+    val df = Seq(
+      (Seq("A"), Seq("A")),
+      (Seq("The", "the"), Seq("The"))
+    ).toDF("input1", "input2")
+    intercept[IllegalArgumentException] {
+      remover.transform(df).count()
+    }
+  }
+
+  test("Multiple Columns: Set both of inputCol/inputCols") {
+    val remover = new StopWordsRemover()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("result1", "result2"))
+      .setInputCol("input1")
+    val df = Seq(
+      (Seq("A"), Seq("A")),
+      (Seq("The", "the"), Seq("The"))
+    ).toDF("input1", "input2")
+    intercept[IllegalArgumentException] {
+      remover.transform(df).count()
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
index d89930342e85d..b5ce2bace98f6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
@@ -459,13 +459,13 @@ class StringIndexerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Load StringIndexderModel prior to Spark 3.0") {
-    val modelPath = testFile("test-data/strIndexerModel")
+    val modelPath = testFile("ml-models/strIndexerModel-2.4.4")
 
     val loadedModel = StringIndexerModel.load(modelPath)
     assert(loadedModel.labelsArray === Array(Array("b", "c", "a")))
 
     val metadata = spark.read.json(s"$modelPath/metadata")
     val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
-    assert(sparkVersionStr == "2.4.1-SNAPSHOT")
+    assert(sparkVersionStr == "2.4.4")
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
index 44b0f8f8ae7d8..aa7b4e17a4df8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
@@ -113,7 +113,7 @@ class VectorIndexerSuite extends MLTest with DefaultReadWriteTest with Logging {
   test("Cannot fit an empty DataFrame") {
     val rdd = Array.empty[Vector].map(FeatureData).toSeq.toDF()
     val vectorIndexer = getIndexer
-    intercept[IllegalArgumentException] {
+    intercept[NoSuchElementException] {
       vectorIndexer.fit(rdd)
     }
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
index d28f1f4240ad0..b00624b46670f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
@@ -61,6 +61,9 @@ class Word2VecSuite extends MLTest with DefaultReadWriteTest {
       .setSeed(42L)
     val model = w2v.fit(docDF)
 
+    val transformed = model.transform(docDF)
+    checkVectorSizeOnDF(transformed, "result", model.getVectorSize)
+
     MLTestingUtils.checkCopyAndUids(w2v, model)
 
     // These expectations are just magic values, characterizing the current
@@ -75,14 +78,6 @@ class Word2VecSuite extends MLTest with DefaultReadWriteTest {
   test("getVectors") {
     val sentence = "a b " * 100 + "a c " * 10
     val doc = sc.parallelize(Seq(sentence, sentence)).map(line => line.split(" "))
-
-    val codes = Map(
-      "a" -> Array(-0.2811822295188904, -0.6356269121170044, -0.3020961284637451),
-      "b" -> Array(1.0309048891067505, -1.29472815990448, 0.22276712954044342),
-      "c" -> Array(-0.08456747233867645, 0.5137411952018738, 0.11731560528278351)
-    )
-    val expectedVectors = codes.toSeq.sortBy(_._1).map { case (w, v) => Vectors.dense(v) }
-
     val docDF = doc.zip(doc).toDF("text", "alsotext")
 
     val model = new Word2Vec()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index aeb5e41eca085..a4d1d453ca5c1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -488,8 +488,8 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging {
   }
 
   test("implicit feedback regression") {
-    val trainingWithNeg = sc.parallelize(Array(Rating(0, 0, 1), Rating(1, 1, 1), Rating(0, 1, -3)))
-    val trainingWithZero = sc.parallelize(Array(Rating(0, 0, 1), Rating(1, 1, 1), Rating(0, 1, 0)))
+    val trainingWithNeg = sc.parallelize(Seq(Rating(0, 0, 1), Rating(1, 1, 1), Rating(0, 1, -3)))
+    val trainingWithZero = sc.parallelize(Seq(Rating(0, 0, 1), Rating(1, 1, 1), Rating(0, 1, 0)))
     val modelWithNeg =
       trainALS(trainingWithNeg, rank = 1, maxIter = 5, regParam = 0.01, implicitPrefs = true)
     val modelWithZero =
@@ -661,11 +661,12 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging {
           (ex, act) =>
             ex.userFactors.first().getSeq[Float](1) === act.userFactors.first().getSeq[Float](1)
         } { (ex, act, df, enc) =>
+          // With AQE on/off, the order of result may be different. Here sortby the result.
           val expected = ex.transform(df).selectExpr("prediction")
-            .first().getFloat(0)
+            .sort("prediction").first().getFloat(0)
           testTransformerByGlobalCheckFunc(df, act, "prediction") {
             case rows: Seq[Row] =>
-              expected ~== rows.head.getFloat(0) absTol 1e-6
+              expected ~== rows.sortBy(_.getFloat(0)).head.getFloat(0) absTol 1e-6
           }(enc)
         }
     }
@@ -696,7 +697,7 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging {
       val model = als.fit(df)
       def testTransformIdExceedsIntRange[A : Encoder](dataFrame: DataFrame): Unit = {
         val e1 = intercept[SparkException] {
-          model.transform(dataFrame).first
+          model.transform(dataFrame).collect()
         }
         TestUtils.assertExceptionMsg(e1, msg)
         val e2 = intercept[StreamingQueryException] {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
index 630e785e59507..49ebcb385640e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
@@ -40,7 +40,7 @@ class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest {
 
   private val seed = 42
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     categoricalDataPointsRDD =
       sc.parallelize(OldDecisionTreeSuite.generateCategoricalDataPoints().map(_.asML))
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala
new file mode 100644
index 0000000000000..372432ceb3a41
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.regression
+
+import scala.util.Random
+
+import org.apache.spark.ml.linalg._
+import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.regression.FactorizationMachines._
+import org.apache.spark.ml.regression.FMRegressorSuite._
+import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.TestingUtils._
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.functions.{avg, col}
+
+class FMRegressorSuite extends MLTest with DefaultReadWriteTest {
+
+  private val seed = 10
+  @transient var crossDataset: DataFrame = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    val (crossDatasetTmp, _) = generateFactorInteractionInput(
+      spark, 2, 10, 1000, seed, true, true)
+    crossDataset = crossDatasetTmp
+  }
+
+  test("params") {
+    ParamsSuite.checkParams(new FMRegressor)
+    val model = new FMRegressionModel("fmr_test", 0.0, Vectors.dense(0.0),
+      new DenseMatrix(1, 8, new Array[Double](8)))
+    ParamsSuite.checkParams(model)
+  }
+
+  test("combineCoefficients") {
+    val numFeatures = 2
+    val factorSize = 4
+    val b = 0.1
+    val w = Vectors.dense(Array(0.2, 0.3))
+    val v = new DenseMatrix(numFeatures, factorSize,
+      Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1), true)
+
+    val expectList = Array(
+      (true, true, Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.1)),
+      (false, true, Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3)),
+      (true, false, Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.1)),
+      (false, false, Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1)))
+
+    expectList.foreach { case (fitIntercept, fitLinear, expectCoeffs) =>
+      assert(combineCoefficients(b, w, v, fitIntercept, fitLinear) === Vectors.dense(expectCoeffs))
+    }
+  }
+
+  test("splitCoefficients") {
+    val numFeatures = 2
+    val factorSize = 4
+    val b = 0.1
+    val w = Vectors.dense(Array(0.2, 0.3))
+    val v = new DenseMatrix(numFeatures, factorSize,
+      Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1), true)
+    val emptyB = 0.0
+    val emptyW = Vectors.sparse(numFeatures, Seq.empty)
+
+    val expectList = Array(
+      (true, true, b, w, v, Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3, 0.1)),
+      (false, true, emptyB, w, v, Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.2, 0.3)),
+      (true, false, b, emptyW, v, Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 0.1)),
+      (false, false, emptyB, emptyW, v, Array(0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1)))
+
+    expectList.foreach { case (fitIntercept, fitLinear, b1, w1, v1, coeffs) =>
+      val (b2, w2, v2) = splitCoefficients(Vectors.dense(coeffs),
+        numFeatures, factorSize, fitIntercept, fitLinear)
+      assert(b1 === b2)
+      assert(w1 === w2)
+      assert(v1 === v2)
+    }
+  }
+
+  def checkMSE(fitIntercept: Boolean, fitLinear: Boolean): Unit = {
+    val numFeatures = 3
+    val numSamples = 200
+    val factorSize = 2
+    val (data, coefficients) = generateFactorInteractionInput(
+      spark, factorSize, numFeatures, numSamples, seed, fitIntercept, fitLinear)
+    val (b, w, v) = splitCoefficients(new DenseVector(coefficients),
+      numFeatures, factorSize, fitIntercept, fitLinear)
+
+    val fm = new FMRegressor()
+      .setSolver("adamW")
+      .setFeaturesCol("features")
+      .setLabelCol("label")
+      .setFactorSize(factorSize)
+      .setFitIntercept(fitIntercept)
+      .setFitLinear(fitLinear)
+      .setInitStd(0.01)
+      .setMaxIter(100)
+      .setMiniBatchFraction(1.0)
+      .setStepSize(1.0)
+      .setRegParam(0.0)
+      .setTol(1E-6)
+    val fmModel = fm.fit(data)
+    val res = fmModel.transform(data)
+
+    // check mse value
+    val mse = res.select((col("prediction") - col("label")).as("error"))
+      .select((col("error") * col("error")).as("error_square"))
+      .agg(avg("error_square"))
+      .collect()(0).getAs[Double](0)
+    assert(mse ~== 0.0 absTol 1E-4)
+
+    // check coefficients
+    assert(b ~== fmModel.intercept absTol 1E-2)
+    assert(w ~== fmModel.linear absTol 1E-2)
+    (0 until numFeatures).foreach { i =>
+      ((i + 1) until numFeatures).foreach { j =>
+        // assert <v_i, v_j> is same
+        var innerProd1 = 0.0
+        var innerProd2 = 0.0
+        (0 until factorSize).foreach { k =>
+          innerProd1 += v(i, k) * v(j, k)
+          innerProd2 += fmModel.factors(i, k) * fmModel.factors(j, k)
+        }
+        assert(innerProd1 ~== innerProd2 absTol 1E-2)
+      }
+    }
+  }
+
+  test("MSE with intercept and linear") {
+    checkMSE(true, true)
+  }
+
+  test("MSE with intercept but without linear") {
+    checkMSE(true, false)
+  }
+
+  test("MSE with linear but without intercept") {
+    checkMSE(false, true)
+  }
+
+  test("MSE without intercept or linear") {
+    checkMSE(false, false)
+  }
+
+  test("read/write") {
+    def checkModelData(
+      model: FMRegressionModel,
+      model2: FMRegressionModel
+    ): Unit = {
+      assert(model.intercept === model2.intercept)
+      assert(model.linear.toArray === model2.linear.toArray)
+      assert(model.factors.toArray === model2.factors.toArray)
+      assert(model.numFeatures === model2.numFeatures)
+    }
+    val fm = new FMRegressor()
+    val data = crossDataset
+      .withColumnRenamed("features", allParamSettings("featuresCol").toString)
+      .withColumnRenamed("label", allParamSettings("labelCol").toString)
+    testEstimatorAndModelReadWrite(fm, data, allParamSettings,
+      allParamSettings, checkModelData)
+  }
+}
+
+object FMRegressorSuite {
+
+  /**
+   * Mapping from all Params to valid settings which differ from the defaults.
+   * This is useful for tests which need to exercise all Params, such as save/load.
+   * This excludes input columns to simplify some tests.
+   */
+  val allParamSettings: Map[String, Any] = Map(
+    "featuresCol" -> "myFeatures",
+    "labelCol" -> "myLabel",
+    "predictionCol" -> "prediction",
+    "factorSize" -> 2,
+    "fitIntercept" -> false,
+    "fitLinear" -> false,
+    "regParam" -> 0.01,
+    "miniBatchFraction" -> 0.1,
+    "initStd" -> 0.01,
+    "maxIter" -> 2,
+    "stepSize" -> 0.1,
+    "tol" -> 1e-4,
+    "solver" -> "gd",
+    "seed" -> 11L
+  )
+
+  def generateFactorInteractionInput(
+    spark: SparkSession,
+    factorSize: Int,
+    numFeatures: Int,
+    numSamples: Int,
+    seed: Int,
+    fitIntercept: Boolean,
+    fitLinear: Boolean
+  ): (DataFrame, Array[Double]) = {
+    import spark.implicits._
+    val sc = spark.sparkContext
+
+    // generate FM coefficients randomly
+    val rnd = new Random(seed)
+    val coefficientsSize = factorSize * numFeatures +
+      (if (fitLinear) numFeatures else 0) + (if (fitIntercept) 1 else 0)
+    val coefficients = Array.fill(coefficientsSize)(rnd.nextDouble() - 0.5)
+    val (intercept, linear, factors) = splitCoefficients(
+      Vectors.dense(coefficients), numFeatures, factorSize, fitIntercept, fitLinear)
+
+    // generate samples randomly
+    val X: DataFrame = sc.parallelize(0 until numSamples).map { i =>
+      val x = new DenseVector(Array.fill(numFeatures)(rnd.nextDouble() - 0.5))
+      (i, x)
+    }.toDF("id", "features")
+
+    // calculate FM prediction
+    val fmModel = new FMRegressionModel(
+      "fmr_test", intercept, linear, factors)
+    fmModel.set(fmModel.factorSize, factorSize)
+    fmModel.set(fmModel.fitIntercept, fitIntercept)
+    fmModel.set(fmModel.fitLinear, fitLinear)
+    val data = fmModel.transform(X)
+      .withColumnRenamed("prediction", "label")
+      .select("features", "label")
+    (data, coefficients)
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index 884fe2d11bf5a..04b0d4b8470f3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint}
 import org.apache.spark.mllib.tree.{EnsembleTestHelper, GradientBoostedTrees => OldGBT}
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
+import org.apache.spark.mllib.util.LinearDataGenerator
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions.lit
@@ -46,8 +47,10 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
   private var data: RDD[LabeledPoint] = _
   private var trainData: RDD[LabeledPoint] = _
   private var validationData: RDD[LabeledPoint] = _
+  private var linearRegressionData: DataFrame = _
+  private val seed = 42
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     data = sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100), 2)
       .map(_.asML)
@@ -57,6 +60,9 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
     validationData =
       sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 20, 80), 2)
         .map(_.asML)
+    linearRegressionData = sc.parallelize(LinearDataGenerator.generateLinearInput(
+      intercept = 6.3, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3),
+      xVariance = Array(0.7, 1.2), nPoints = 1000, seed, eps = 0.5), 2).map(_.asML).toDF()
   }
 
   test("Regression with continuous features") {
@@ -202,7 +208,7 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
     val gbt = new GBTRegressor()
       .setMaxDepth(3)
       .setMaxIter(5)
-      .setSeed(42)
+      .setSeed(seed)
       .setFeatureSubsetStrategy("all")
 
     // In this data, feature 1 is very important.
@@ -237,11 +243,11 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
 
       for (evalLossType <- GBTRegressor.supportedLossTypes) {
         val evalArr = model3.evaluateEachIteration(validationData.toDF, evalLossType)
-        val lossErr1 = GradientBoostedTrees.computeError(validationData,
+        val lossErr1 = GradientBoostedTrees.computeWeightedError(validationData.map(_.toInstance),
           model1.trees, model1.treeWeights, model1.convertToOldLossType(evalLossType))
-        val lossErr2 = GradientBoostedTrees.computeError(validationData,
+        val lossErr2 = GradientBoostedTrees.computeWeightedError(validationData.map(_.toInstance),
           model2.trees, model2.treeWeights, model2.convertToOldLossType(evalLossType))
-        val lossErr3 = GradientBoostedTrees.computeError(validationData,
+        val lossErr3 = GradientBoostedTrees.computeWeightedError(validationData.map(_.toInstance),
           model3.trees, model3.treeWeights, model3.convertToOldLossType(evalLossType))
 
         assert(evalArr(0) ~== lossErr1 relTol 1E-3)
@@ -268,35 +274,82 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
       gbt.setValidationIndicatorCol(validationIndicatorCol)
       val modelWithValidation = gbt.fit(trainDF.union(validationDF))
 
-      assert(modelWithoutValidation.numTrees === numIter)
+      assert(modelWithoutValidation.getNumTrees === numIter)
       // early stop
-      assert(modelWithValidation.numTrees < numIter)
+      assert(modelWithValidation.getNumTrees < numIter)
 
-      val errorWithoutValidation = GradientBoostedTrees.computeError(validationData,
+      val errorWithoutValidation = GradientBoostedTrees.computeWeightedError(
+        validationData.map(_.toInstance),
         modelWithoutValidation.trees, modelWithoutValidation.treeWeights,
         modelWithoutValidation.getOldLossType)
-      val errorWithValidation = GradientBoostedTrees.computeError(validationData,
+      val errorWithValidation = GradientBoostedTrees.computeWeightedError(
+        validationData.map(_.toInstance),
         modelWithValidation.trees, modelWithValidation.treeWeights,
         modelWithValidation.getOldLossType)
 
       assert(errorWithValidation < errorWithoutValidation)
 
       val evaluationArray = GradientBoostedTrees
-        .evaluateEachIteration(validationData, modelWithoutValidation.trees,
+        .evaluateEachIteration(validationData.map(_.toInstance), modelWithoutValidation.trees,
           modelWithoutValidation.treeWeights, modelWithoutValidation.getOldLossType,
           OldAlgo.Regression)
       assert(evaluationArray.length === numIter)
-      assert(evaluationArray(modelWithValidation.numTrees) >
-        evaluationArray(modelWithValidation.numTrees - 1))
+      assert(evaluationArray(modelWithValidation.getNumTrees) >
+        evaluationArray(modelWithValidation.getNumTrees - 1))
       var i = 1
-      while (i < modelWithValidation.numTrees) {
+      while (i < modelWithValidation.getNumTrees) {
         assert(evaluationArray(i) <= evaluationArray(i - 1))
         i += 1
       }
     }
   }
 
-    /////////////////////////////////////////////////////////////////////////////
+  test("tree params") {
+    val gbt = new GBTRegressor()
+      .setMaxDepth(2)
+      .setCheckpointInterval(5)
+      .setSeed(123)
+    val model = gbt.fit(trainData.toDF)
+
+    model.trees.foreach (i => {
+      assert(i.getMaxDepth === model.getMaxDepth)
+      assert(i.getCheckpointInterval === model.getCheckpointInterval)
+      assert(i.getSeed === model.getSeed)
+    })
+  }
+
+  test("training with sample weights") {
+    val df = linearRegressionData
+    val numClasses = 0
+    // (maxIter, maxDepth, subsamplingRate, fractionInTol)
+    val testParams = Seq(
+      (5, 5, 1.0, 0.98),
+      (5, 10, 1.0, 0.98),
+      (5, 10, 0.95, 0.6)
+    )
+
+    for ((maxIter, maxDepth, subsamplingRate, tol) <- testParams) {
+      val estimator = new GBTRegressor()
+        .setMaxIter(maxIter)
+        .setMaxDepth(maxDepth)
+        .setSubsamplingRate(subsamplingRate)
+        .setSeed(seed)
+        .setMinWeightFractionPerNode(0.1)
+
+      MLTestingUtils.testArbitrarilyScaledWeights[GBTRegressionModel,
+        GBTRegressor](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ ~= _ relTol 0.1, tol))
+      MLTestingUtils.testOutliersWithSmallWeights[GBTRegressionModel,
+        GBTRegressor](df.as[LabeledPoint], estimator, numClasses,
+        MLTestingUtils.modelPredictionEquals(df, _ ~= _ relTol 0.1, tol),
+        outlierRatio = 2)
+      MLTestingUtils.testOversamplingVsWeighting[GBTRegressionModel,
+        GBTRegressor](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ ~= _ relTol 0.1, tol), seed)
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
   // Tests of model save/load
   /////////////////////////////////////////////////////////////////////////////
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
index c6dabd1b28829..31dc6d379e76c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
@@ -22,9 +22,11 @@ import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.tree.impl.TreeTests
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
+import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint}
 import org.apache.spark.mllib.tree.{EnsembleTestHelper, RandomForest => OldRandomForest}
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
+import org.apache.spark.mllib.util.LinearDataGenerator
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row}
 
@@ -37,19 +39,25 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{
   import testImplicits._
 
   private var orderedLabeledPoints50_1000: RDD[LabeledPoint] = _
+  private var linearRegressionData: DataFrame = _
+  private val seed = 42
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     orderedLabeledPoints50_1000 =
       sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
         .map(_.asML))
+
+    linearRegressionData = sc.parallelize(LinearDataGenerator.generateLinearInput(
+      intercept = 6.3, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3),
+      xVariance = Array(0.7, 1.2), nPoints = 1000, seed, eps = 0.5), 2).map(_.asML).toDF()
   }
 
   /////////////////////////////////////////////////////////////////////////////
   // Tests calling train()
   /////////////////////////////////////////////////////////////////////////////
 
-  def regressionTestWithContinuousFeatures(rf: RandomForestRegressor) {
+  def regressionTestWithContinuousFeatures(rf: RandomForestRegressor): Unit = {
     val categoricalFeaturesInfo = Map.empty[Int, Int]
     val newRF = rf
       .setImpurity("variance")
@@ -64,12 +72,14 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{
   test("Regression with continuous features:" +
     " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
     val rf = new RandomForestRegressor()
+      .setBootstrap(false)
     regressionTestWithContinuousFeatures(rf)
   }
 
   test("Regression with continuous features and node Id cache :" +
     " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
     val rf = new RandomForestRegressor()
+      .setBootstrap(false)
       .setCacheNodeIds(true)
     regressionTestWithContinuousFeatures(rf)
   }
@@ -139,6 +149,56 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{
       }
   }
 
+  test("tree params") {
+    val rf = new RandomForestRegressor()
+      .setImpurity("variance")
+      .setMaxDepth(2)
+      .setMaxBins(10)
+      .setNumTrees(3)
+      .setSeed(123)
+
+    val df = orderedLabeledPoints50_1000.toDF()
+    val model = rf.fit(df)
+
+    model.trees.foreach (i => {
+      assert(i.getMaxDepth === model.getMaxDepth)
+      assert(i.getSeed === model.getSeed)
+      assert(i.getImpurity === model.getImpurity)
+      assert(i.getMaxBins === model.getMaxBins)
+    })
+  }
+
+  test("training with sample weights") {
+    val df = linearRegressionData
+    val numClasses = 0
+    // (numTrees, maxDepth, subsamplingRate, fractionInTol)
+    val testParams = Seq(
+      (50, 5, 1.0, 0.75),
+      (50, 10, 1.0, 0.75),
+      (50, 10, 0.95, 0.78)
+    )
+
+    for ((numTrees, maxDepth, subsamplingRate, tol) <- testParams) {
+      val estimator = new RandomForestRegressor()
+        .setNumTrees(numTrees)
+        .setMaxDepth(maxDepth)
+        .setSubsamplingRate(subsamplingRate)
+        .setSeed(seed)
+        .setMinWeightFractionPerNode(0.05)
+
+      MLTestingUtils.testArbitrarilyScaledWeights[RandomForestRegressionModel,
+        RandomForestRegressor](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ ~= _ relTol 0.2, tol))
+      MLTestingUtils.testOutliersWithSmallWeights[RandomForestRegressionModel,
+        RandomForestRegressor](df.as[LabeledPoint], estimator,
+        numClasses, MLTestingUtils.modelPredictionEquals(df, _ ~= _ relTol 0.2, tol),
+        outlierRatio = 2)
+      MLTestingUtils.testOversamplingVsWeighting[RandomForestRegressionModel,
+        RandomForestRegressor](df.as[LabeledPoint], estimator,
+        MLTestingUtils.modelPredictionEquals(df, _ ~= _ relTol 0.2, tol), seed)
+    }
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // Tests of model save/load
   /////////////////////////////////////////////////////////////////////////////
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index 3eabff434e8de..263ad26657545 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -19,11 +19,11 @@ package org.apache.spark.ml.source.libsvm
 
 import java.io.{File, IOException}
 import java.nio.charset.StandardCharsets
-import java.util.List
 
 import com.google.common.io.Files
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
 import org.apache.spark.mllib.util.MLlibTestSparkContext
@@ -73,6 +73,7 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(row1.getDouble(0) == 1.0)
     val v = row1.getAs[SparseVector](1)
     assert(v == Vectors.sparse(6, Seq((0, 1.0), (2, 2.0), (4, 3.0))))
+    assert(AttributeGroup.fromStructField(df.schema("features")).size === v.size)
   }
 
   test("select as dense vector") {
@@ -85,6 +86,7 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(row1.getDouble(0) == 1.0)
     val v = row1.getAs[DenseVector](1)
     assert(v == Vectors.dense(1.0, 0.0, 2.0, 0.0, 3.0, 0.0))
+    assert(AttributeGroup.fromStructField(df.schema("features")).size === v.size)
   }
 
   test("illegal vector types") {
@@ -101,12 +103,14 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
     val row1 = df.first()
     val v = row1.getAs[SparseVector](1)
     assert(v == Vectors.sparse(100, Seq((0, 1.0), (2, 2.0), (4, 3.0))))
+    assert(AttributeGroup.fromStructField(df.schema("features")).size === v.size)
   }
 
   test("case insensitive option") {
     val df = spark.read.option("NuMfEaTuReS", "100").format("libsvm").load(path)
     assert(df.first().getAs[SparseVector](1) ==
       Vectors.sparse(100, Seq((0, 1.0), (2, 2.0), (4, 3.0))))
+    assert(AttributeGroup.fromStructField(df.schema("features")).size === 100)
   }
 
   test("write libsvm data and read it again") {
@@ -120,6 +124,8 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
     val row1 = df2.first()
     val v = row1.getAs[SparseVector](1)
     assert(v == Vectors.sparse(6, Seq((0, 1.0), (2, 2.0), (4, 3.0))))
+    assert(AttributeGroup.fromStructField(df.schema("features")).size === v.size)
+    assert(AttributeGroup.fromStructField(df2.schema("features")).size === v.size)
   }
 
   test("write libsvm data failed due to invalid schema") {
@@ -148,6 +154,7 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
     val row1 = df2.first()
     val v = row1.getAs[SparseVector](1)
     assert(v == Vectors.sparse(3, Seq((0, 2.0), (1, 3.0))))
+    assert(AttributeGroup.fromStructField(df2.schema("features")).size === v.size)
   }
 
   test("select features from libsvm relation") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
index 5e4f402989697..68ba57c0d5fc8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml.stat
 
 import org.apache.spark.{SparkException, SparkFunSuite}
-import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, Statistics}
@@ -83,6 +83,28 @@ class SummarizerSuite extends SparkFunSuite with MLlibTestSparkContext {
         Row(Row(summarizerWithoutWeight.mean), expWithoutWeight.mean))
     }
 
+    registerTest(s"$name - sum only") {
+      val (df, c, w) = wrappedInit()
+      val weightSum = summarizer.weightSum
+      val expected1 = summarizer.mean.asML.copy
+      BLAS.scal(weightSum, expected1)
+      val expected2 = exp.mean.copy
+      BLAS.scal(weightSum, expected2)
+      compareRow(df.select(metrics("sum").summary(c, w), sum(c, w)).first(),
+        Row(Row(expected1), expected2))
+    }
+
+    registerTest(s"$name - sum only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      val weightSum = summarizerWithoutWeight.weightSum
+      val expected1 = summarizerWithoutWeight.mean.asML.copy
+      BLAS.scal(weightSum, expected1)
+      val expected2 = expWithoutWeight.mean.copy
+      BLAS.scal(weightSum, expected2)
+      compareRow(df.select(metrics("sum").summary(c), sum(c)).first(),
+        Row(Row(expected1), expected2))
+    }
+
     registerTest(s"$name - variance only") {
       val (df, c, w) = wrappedInit()
       compareRow(df.select(metrics("variance").summary(c, w), variance(c, w)).first(),
@@ -95,6 +117,22 @@ class SummarizerSuite extends SparkFunSuite with MLlibTestSparkContext {
         Row(Row(summarizerWithoutWeight.variance), expWithoutWeight.variance))
     }
 
+    registerTest(s"$name - std only") {
+      val (df, c, w) = wrappedInit()
+      val expected1 = Vectors.dense(summarizer.variance.toArray.map(math.sqrt))
+      val expected2 = Vectors.dense(exp.variance.toArray.map(math.sqrt))
+      compareRow(df.select(metrics("std").summary(c, w), std(c, w)).first(),
+        Row(Row(expected1), expected2))
+    }
+
+    registerTest(s"$name - std only w/o weight") {
+      val (df, c, _) = wrappedInit()
+      val expected1 = Vectors.dense(summarizerWithoutWeight.variance.toArray.map(math.sqrt))
+      val expected2 = Vectors.dense(expWithoutWeight.variance.toArray.map(math.sqrt))
+      compareRow(df.select(metrics("std").summary(c), std(c)).first(),
+        Row(Row(expected1), expected2))
+    }
+
     registerTest(s"$name - count only") {
       val (df, c, w) = wrappedInit()
       compareRow(df.select(metrics("count").summary(c, w), count(c, w)).first(),
@@ -192,8 +230,12 @@ class SummarizerSuite extends SparkFunSuite with MLlibTestSparkContext {
         assert(v1 ~== v2 absTol 1e-4)
       case (v1: Vector, v2: OldVector) =>
         assert(v1 ~== v2.asML absTol 1e-4)
+      case (i1: Int, i2: Int) =>
+        assert(i1 === i2)
       case (l1: Long, l2: Long) =>
         assert(l1 === l2)
+      case (d1: Double, d2: Double) =>
+        assert(d1 ~== d2 absTol 1e-4)
       case (r1: Row, r2: Row) =>
         compareRow(r1, r2)
       case (x1: Any, x2: Any) =>
@@ -531,6 +573,30 @@ class SummarizerSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(summarizer3.min ~== Vectors.dense(0.0, -10.0) absTol 1e-14)
   }
 
+  test("support new metrics: sum, std, numFeatures, sumL2, weightSum") {
+    val summarizer1 = new SummarizerBuffer()
+      .add(Vectors.dense(10.0, -10.0), 1e10)
+      .add(Vectors.dense(0.0, 0.0), 1e-7)
+
+    val summarizer2 = new SummarizerBuffer()
+    summarizer2.add(Vectors.dense(10.0, -10.0), 1e10)
+    for (i <- 1 to 100) {
+      summarizer2.add(Vectors.dense(0.0, 0.0), 1e-7)
+    }
+
+    val summarizer3 = new SummarizerBuffer()
+    for (i <- 1 to 100) {
+      summarizer3.add(Vectors.dense(0.0, 0.0), 1e-7)
+    }
+    summarizer3.add(Vectors.dense(10.0, -10.0), 1e10)
+
+    Seq(summarizer1, summarizer2, summarizer3).foreach { summarizer =>
+      val variance = summarizer.variance
+      val expectedStd = Vectors.dense(variance.toArray.map(math.sqrt))
+      assert(summarizer.std ~== expectedStd relTol 1e-14)
+    }
+  }
+
   ignore("performance test") {
     /*
     Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.12
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala
index 63985482795bb..2a95faef98b63 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/BaggedPointSuite.scala
@@ -54,8 +54,7 @@ class BaggedPointSuite extends SparkFunSuite with MLlibTestSparkContext  {
         baggedRDD.map(_.subsampleCounts.map(_.toDouble)).collect()
       EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
         expectedStddev, epsilon = 0.01)
-      // should ignore weight function for now
-      assert(baggedRDD.collect().forall(_.sampleWeight === 1.0))
+      assert(baggedRDD.collect().forall(_.sampleWeight === 2.0))
     }
   }
 
@@ -91,8 +90,7 @@ class BaggedPointSuite extends SparkFunSuite with MLlibTestSparkContext  {
         baggedRDD.map(_.subsampleCounts.map(_.toDouble)).collect()
       EnsembleTestHelper.testRandomArrays(subsampleCounts, numSubsamples, expectedMean,
         expectedStddev, epsilon = 0.01)
-      // should ignore weight function for now
-      assert(baggedRDD.collect().forall(_.sampleWeight === 1.0))
+      assert(baggedRDD.collect().forall(_.sampleWeight === 2.0))
     }
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala
index 366d5ec3a53fb..18fc1407557f1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.tree.impl
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.mllib.tree.{GradientBoostedTreesSuite => OldGBTSuite}
 import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Strategy}
 import org.apache.spark.mllib.tree.configuration.Algo._
@@ -32,15 +32,12 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
  */
 class GradientBoostedTreesSuite extends SparkFunSuite with MLlibTestSparkContext with Logging {
 
-  import testImplicits._
-
   test("runWithValidation stops early and performs better on a validation dataset") {
     // Set numIterations large enough so that it stops early.
     val numIterations = 20
-    val trainRdd = sc.parallelize(OldGBTSuite.trainData, 2).map(_.asML)
-    val validateRdd = sc.parallelize(OldGBTSuite.validateData, 2).map(_.asML)
-    val trainDF = trainRdd.toDF()
-    val validateDF = validateRdd.toDF()
+    val trainRdd = sc.parallelize(OldGBTSuite.trainData, 2).map(_.asML.toInstance)
+    val validateRdd = sc.parallelize(OldGBTSuite.validateData, 2).map(_.asML.toInstance)
+    val seed = 42
 
     val algos = Array(Regression, Regression, Classification)
     val losses = Array(SquaredError, AbsoluteError, LogLoss)
@@ -50,21 +47,21 @@ class GradientBoostedTreesSuite extends SparkFunSuite with MLlibTestSparkContext
       val boostingStrategy =
         new BoostingStrategy(treeStrategy, loss, numIterations, validationTol = 0.0)
       val (validateTrees, validateTreeWeights) = GradientBoostedTrees
-        .runWithValidation(trainRdd, validateRdd, boostingStrategy, 42L, "all")
+        .runWithValidation(trainRdd, validateRdd, boostingStrategy, seed, "all")
       val numTrees = validateTrees.length
       assert(numTrees !== numIterations)
 
       // Test that it performs better on the validation dataset.
-      val (trees, treeWeights) = GradientBoostedTrees.run(trainRdd, boostingStrategy, 42L, "all")
+      val (trees, treeWeights) = GradientBoostedTrees.run(trainRdd, boostingStrategy, seed, "all")
       val (errorWithoutValidation, errorWithValidation) = {
         if (algo == Classification) {
-          val remappedRdd = validateRdd.map(x => new LabeledPoint(2 * x.label - 1, x.features))
-          (GradientBoostedTrees.computeError(remappedRdd, trees, treeWeights, loss),
-            GradientBoostedTrees.computeError(remappedRdd, validateTrees,
+          val remappedRdd = validateRdd.map(x => Instance(2 * x.label - 1, x.weight, x.features))
+          (GradientBoostedTrees.computeWeightedError(remappedRdd, trees, treeWeights, loss),
+            GradientBoostedTrees.computeWeightedError(remappedRdd, validateTrees,
               validateTreeWeights, loss))
         } else {
-          (GradientBoostedTrees.computeError(validateRdd, trees, treeWeights, loss),
-            GradientBoostedTrees.computeError(validateRdd, validateTrees,
+          (GradientBoostedTrees.computeWeightedError(validateRdd, trees, treeWeights, loss),
+            GradientBoostedTrees.computeWeightedError(validateRdd, validateTrees,
               validateTreeWeights, loss))
         }
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
index a63ab913f2c22..2a83d0aaf9699 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
@@ -336,6 +336,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3))
     val metadata = DecisionTreeMetadata.buildMetadata(input, strategy)
     val splits = RandomForest.findSplits(input, metadata, seed = 42)
+    val bcSplits = input.sparkContext.broadcast(splits)
 
     val treeInput = TreePoint.convertToTreeRDD(input, splits, metadata)
     val baggedInput = BaggedPoint.convertToBaggedRDD(treeInput, 1.0, 1, withReplacement = false)
@@ -350,7 +351,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     ))
     val nodeStack = new mutable.ListBuffer[(Int, LearningNode)]
     RandomForest.findBestSplits(baggedInput, metadata, Map(0 -> topNode),
-      nodesForGroup, treeToNodeToIndexInfo, splits, nodeStack)
+      nodesForGroup, treeToNodeToIndexInfo, bcSplits, nodeStack)
+    bcSplits.destroy()
 
     // don't enqueue leaf nodes into node queue
     assert(nodeStack.isEmpty)
@@ -378,6 +380,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
       numClasses = 2, categoricalFeaturesInfo = Map(0 -> 3))
     val metadata = DecisionTreeMetadata.buildMetadata(input, strategy)
     val splits = RandomForest.findSplits(input, metadata, seed = 42)
+    val bcSplits = input.sparkContext.broadcast(splits)
 
     val treeInput = TreePoint.convertToTreeRDD(input, splits, metadata)
     val baggedInput = BaggedPoint.convertToBaggedRDD(treeInput, 1.0, 1, withReplacement = false)
@@ -392,7 +395,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     ))
     val nodeStack = new mutable.ListBuffer[(Int, LearningNode)]
     RandomForest.findBestSplits(baggedInput, metadata, Map(0 -> topNode),
-      nodesForGroup, treeToNodeToIndexInfo, splits, nodeStack)
+      nodesForGroup, treeToNodeToIndexInfo, bcSplits, nodeStack)
+    bcSplits.destroy()
 
     // don't enqueue a node into node queue if its impurity is 0.0
     assert(nodeStack.isEmpty)
@@ -485,7 +489,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     }
   }
 
-  def binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy: OldStrategy) {
+  def binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(
+      strategy: OldStrategy): Unit = {
     val numFeatures = 50
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures, 1000)
     val rdd = sc.parallelize(arr).map(_.asML.toInstance)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
index 8a0a48ff6095b..251b44e195607 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
@@ -24,10 +24,12 @@ import org.scalatest.Suite
 import org.apache.spark.{DebugFilesystem, SparkConf, SparkContext, TestUtils}
 import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK
 import org.apache.spark.ml.{Model, PredictionModel, Transformer}
+import org.apache.spark.ml.attribute._
+import org.apache.spark.ml.classification.{ClassificationModel, ProbabilisticClassificationModel}
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.sql.{DataFrame, Dataset, Encoder, Row}
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.test.TestSparkSession
@@ -56,7 +58,7 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
     sc.setCheckpointDir(checkpointDir)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(new File(checkpointDir))
     } finally {
@@ -64,6 +66,40 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
     }
   }
 
+  private[ml] def checkVectorSizeOnDF(
+      dataframe: DataFrame,
+      vecColName: String,
+      vecSize: Int): Unit = {
+    import dataframe.sparkSession.implicits._
+    val group = AttributeGroup.fromStructField(dataframe.schema(vecColName))
+    assert(group.size === vecSize,
+      s"the vector size obtained from schema should be $vecSize, but got ${group.size}")
+    val sizeUDF = udf { vector: Vector => vector.size }
+    assert(dataframe.select(sizeUDF(col(vecColName)))
+      .as[Int]
+      .collect()
+      .forall(_ === vecSize))
+  }
+
+  private[ml] def checkNominalOnDF(
+      dataframe: DataFrame,
+      colName: String,
+      numValues: Int): Unit = {
+    import dataframe.sparkSession.implicits._
+    val n = Attribute.fromStructField(dataframe.schema(colName)) match {
+      case binAttr: BinaryAttribute => Some(2)
+      case nomAttr: NominalAttribute => nomAttr.getNumValues
+      case unknown =>
+        throw new IllegalArgumentException(s"Attribute type: ${unknown.getClass.getName}")
+    }
+    assert(n.isDefined && n.get === numValues,
+      s"the number of values obtained from schema should be $numValues, but got $n")
+    assert(dataframe.select(colName)
+      .as[Double]
+      .collect()
+      .forall(v => v === v.toInt && v >= 0 && v < numValues))
+  }
+
   private[util] def testTransformerOnStreamData[A : Encoder](
       dataframe: DataFrame,
       transformer: Transformer,
@@ -127,17 +163,17 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
     dataframe: DataFrame,
     transformer: Transformer,
     expectedMessagePart : String,
-    firstResultCol: String) {
+    firstResultCol: String): Unit = {
 
     withClue(s"""Expected message part "${expectedMessagePart}" is not found in DF test.""") {
       val exceptionOnDf = intercept[Throwable] {
-        testTransformerOnDF(dataframe, transformer, firstResultCol)(_ => Unit)
+        testTransformerOnDF(dataframe, transformer, firstResultCol)(_ => ())
       }
       TestUtils.assertExceptionMsg(exceptionOnDf, expectedMessagePart)
     }
     withClue(s"""Expected message part "${expectedMessagePart}" is not found in stream test.""") {
       val exceptionOnStreamData = intercept[Throwable] {
-        testTransformerOnStreamData(dataframe, transformer, firstResultCol)(_ => Unit)
+        testTransformerOnStreamData(dataframe, transformer, firstResultCol)(_ => ())
       }
       TestUtils.assertExceptionMsg(exceptionOnStreamData, expectedMessagePart)
     }
@@ -145,7 +181,6 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
 
   def testPredictionModelSinglePrediction(model: PredictionModel[Vector, _],
     dataset: Dataset[_]): Unit = {
-
     model.transform(dataset).select(model.getFeaturesCol, model.getPredictionCol)
       .collect().foreach {
       case Row(features: Vector, prediction: Double) =>
@@ -153,6 +188,25 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
     }
   }
 
+  def testClassificationModelSingleRawPrediction(model: ClassificationModel[Vector, _],
+    dataset: Dataset[_]): Unit = {
+    model.transform(dataset).select(model.getFeaturesCol, model.getRawPredictionCol)
+      .collect().foreach {
+      case Row(features: Vector, rawPrediction: Vector) =>
+        assert(rawPrediction === model.predictRaw(features))
+    }
+  }
+
+  def testProbClassificationModelSingleProbPrediction(
+    model: ProbabilisticClassificationModel[Vector, _],
+    dataset: Dataset[_]): Unit = {
+    model.transform(dataset).select(model.getFeaturesCol, model.getProbabilityCol)
+      .collect().foreach {
+      case Row(features: Vector, probPrediction: Vector) =>
+        assert(probPrediction === model.predictProbability(features))
+    }
+  }
+
   def testClusteringModelSinglePrediction(
     model: Model[_],
     transform: Vector => Int,
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index 5cf4377768516..d96a4da46a630 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -21,6 +21,7 @@ import scala.collection.JavaConverters._
 import scala.util.Random
 import scala.util.control.Breaks._
 
+import org.scalatest.Assertions._
 import org.scalatest.Matchers
 
 import org.apache.spark.SparkFunSuite
@@ -206,7 +207,7 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
   def validatePrediction(
       predictions: Seq[Double],
       input: Seq[LabeledPoint],
-      expectedAcc: Double = 0.83) {
+      expectedAcc: Double = 0.83): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
     }
@@ -224,12 +225,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
 
     val testRDD = sc.parallelize(testData, 2)
     testRDD.cache()
-    val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer
-      .setStepSize(10.0)
-      .setRegParam(0.0)
-      .setNumIterations(20)
-      .setConvergenceTol(0.0005)
+    val lr = new LogisticRegressionWithSGD(10.0, 20, 0.0, 1.0).setIntercept(true)
+    lr.optimizer.setConvergenceTol(0.0005)
 
     val model = lr.run(testRDD)
 
@@ -300,11 +297,7 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
     testRDD.cache()
 
     // Use half as many iterations as the previous test.
-    val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer
-      .setStepSize(10.0)
-      .setRegParam(0.0)
-      .setNumIterations(10)
+    val lr = new LogisticRegressionWithSGD(10.0, 10, 0.0, 1.0).setIntercept(true)
 
     val model = lr.run(testRDD, initialWeights)
 
@@ -335,11 +328,7 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
     testRDD.cache()
 
     // Use half as many iterations as the previous test.
-    val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer.
-      setStepSize(1.0).
-      setNumIterations(10).
-      setRegParam(1.0)
+    val lr = new LogisticRegressionWithSGD(1.0, 10, 1.0, 1.0).setIntercept(true)
 
     val model = lr.run(testRDD, initialWeights)
 
@@ -916,7 +905,7 @@ class LogisticRegressionClusterSuite extends SparkFunSuite with LocalClusterSpar
     }.cache()
     // If we serialize data directly in the task closure, the size of the serialized task would be
     // greater than 1MB and hence Spark would throw an error.
-    val model = LogisticRegressionWithSGD.train(points, 2)
+    val model = new LogisticRegressionWithSGD(1.0, 2, 0.0, 1.0).run(points)
 
     val predictions = model.predict(points.map(_.features))
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index 725389813b3e2..47dac3ec29a5c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -91,7 +91,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   import NaiveBayes.{Multinomial, Bernoulli}
 
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOfPredictions = predictions.zip(input).count {
       case (prediction, expected) =>
         prediction != expected.label
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 3676d9c5debc8..007b8ae6e1a6a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -62,7 +62,7 @@ object SVMSuite {
 
 class SVMSuite extends SparkFunSuite with MLlibTestSparkContext {
 
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
     }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
index 5f797a60f09e6..7349e0319324a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
@@ -23,23 +23,17 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.{LocalStreamingContext, TestSuiteBase}
 import org.apache.spark.streaming.dstream.DStream
 
-class StreamingLogisticRegressionSuite extends SparkFunSuite with TestSuiteBase {
+class StreamingLogisticRegressionSuite
+  extends SparkFunSuite
+  with LocalStreamingContext
+  with TestSuiteBase {
 
   // use longer wait time to ensure job completion
   override def maxWaitTimeMillis: Int = 30000
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   // Test if we can accurately learn B for Y = logistic(BX) on streaming data
   test("parameter accuracy") {
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
index 11189d8bd4776..54ed30799e7b9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala
@@ -40,7 +40,7 @@ class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext {
   }
 
   test("single cluster") {
-    val data = sc.parallelize(Array(
+    val data = sc.parallelize(Seq(
       Vectors.dense(6.0, 9.0),
       Vectors.dense(5.0, 10.0),
       Vectors.dense(4.0, 11.0)
@@ -105,7 +105,7 @@ class GaussianMixtureSuite extends SparkFunSuite with MLlibTestSparkContext {
   }
 
   test("single cluster with sparse data") {
-    val data = sc.parallelize(Array(
+    val data = sc.parallelize(Seq(
       Vectors.sparse(3, Array(0, 2), Array(4.0, 2.0)),
       Vectors.sparse(3, Array(0, 2), Array(2.0, 4.0)),
       Vectors.sparse(3, Array(1), Array(6.0))
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index c4bf5b27187f6..e63ca70961858 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -34,7 +34,7 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext {
   private val seed = 42
 
   test("single cluster") {
-    val data = sc.parallelize(Array(
+    val data = sc.parallelize(Seq(
       Vectors.dense(1.0, 2.0, 6.0),
       Vectors.dense(1.0, 3.0, 0.0),
       Vectors.dense(1.0, 4.0, 6.0)
@@ -64,7 +64,7 @@ class KMeansSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("fewer distinct points than clusters") {
     val data = sc.parallelize(
-      Array(
+      Seq(
         Vectors.dense(1.0, 2.0, 3.0),
         Vectors.dense(1.0, 2.0, 3.0),
         Vectors.dense(1.0, 2.0, 3.0)),
@@ -367,7 +367,7 @@ class KMeansClusterSuite extends SparkFunSuite with LocalClusterSparkContext {
     for (initMode <- Seq(KMeans.RANDOM, KMeans.K_MEANS_PARALLEL)) {
       // If we serialize data directly in the task closure, the size of the serialized task would be
       // greater than 1MB and hence Spark would throw an error.
-      val model = KMeans.train(points, 2, 2, 1, initMode)
+      val model = KMeans.train(points, 2, 2, initMode)
       val predictions = model.predict(points).collect()
       val cost = model.computeCost(points)
     }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index 8906e52faebe5..56d41403f74cc 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.mllib.clustering
 import java.util.{ArrayList => JArrayList}
 
 import breeze.linalg.{argmax, argtopk, max, DenseMatrix => BDM}
+import org.scalatest.Assertions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.graphx.Edge
@@ -278,10 +279,10 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
   test("LocalLDAModel logLikelihood") {
     val ldaModel: LocalLDAModel = toyModel
 
-    val docsSingleWord = sc.parallelize(Array(Vectors.sparse(6, Array(0), Array(1)))
+    val docsSingleWord = sc.parallelize(Seq(Vectors.sparse(6, Array(0), Array(1)))
       .zipWithIndex
       .map { case (wordCounts, docId) => (docId.toLong, wordCounts) })
-    val docsRepeatedWord = sc.parallelize(Array(Vectors.sparse(6, Array(0), Array(5)))
+    val docsRepeatedWord = sc.parallelize(Seq(Vectors.sparse(6, Array(0), Array(5)))
       .zipWithIndex
       .map { case (wordCounts, docId) => (docId.toLong, wordCounts) })
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
index a1ac10c06c697..415ac87275390 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
@@ -20,23 +20,14 @@ package org.apache.spark.mllib.clustering
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.{LocalStreamingContext, TestSuiteBase}
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.util.random.XORShiftRandom
 
-class StreamingKMeansSuite extends SparkFunSuite with TestSuiteBase {
+class StreamingKMeansSuite extends SparkFunSuite with LocalStreamingContext with TestSuiteBase {
 
   override def maxWaitTimeMillis: Int = 30000
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   test("accuracy for single center and equivalence to grand average") {
     // set parameters
     val numBatches = 10
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
index 8779de590a256..a8c6339ba6824 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/MulticlassMetricsSuite.scala
@@ -176,4 +176,113 @@ class MulticlassMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
       (weight0 * f2measure0 + weight1 * f2measure1 + weight2 * f2measure2) relTol delta)
     assert(metrics.labels === labels)
   }
+
+  test("MulticlassMetrics supports binary class log-loss") {
+    /*
+     Using the following Python code to verify the correctness.
+
+     from sklearn.metrics import log_loss
+     labels = [1, 0, 0, 1]
+     probabilities = [[.1, .9], [.9, .1], [.8, .2], [.35, .65]]
+     weights = [1.5, 2.0, 1.0, 0.5]
+
+     >>> log_loss(y_true=labels, y_pred=probabilities, sample_weight=weights)
+     0.16145936283256573
+     >>> log_loss(y_true=labels, y_pred=probabilities)
+     0.21616187468057912
+    */
+
+    val labels = Seq(1.0, 0.0, 0.0, 1.0)
+    val probabilities = Seq(
+      Array(0.1, 0.9),
+      Array(0.9, 0.1),
+      Array(0.8, 0.2),
+      Array(0.35, 0.65))
+    val weights = Seq(1.5, 2.0, 1.0, 0.5)
+
+    val rdd = sc.parallelize(labels.zip(weights).zip(probabilities)).map {
+      case ((label, weight), probability) =>
+        val prediction = probability.indexOf(probability.max).toDouble
+        (prediction, label, weight, probability)
+    }
+    val metrics = new MulticlassMetrics(rdd)
+    assert(metrics.logLoss() ~== 0.16145936283256573 relTol delta)
+
+    val rdd2 = rdd.map {
+      case (prediction: Double, label: Double, weight: Double, probability: Array[Double]) =>
+        (prediction, label, 1.0, probability)
+    }
+    val metrics2 = new MulticlassMetrics(rdd2)
+    assert(metrics2.logLoss() ~== 0.21616187468057912 relTol delta)
+  }
+
+  test("MulticlassMetrics supports multi-class log-loss") {
+    /*
+     Using the following Python code to verify the correctness.
+
+     from sklearn.metrics import log_loss
+     labels = [1, 2, 0, 1]
+     probabilities = [[.1, .8, .1], [.9, .05, .05], [.8, .2, .0], [.3, .65, .05]]
+     weights = [1.5, 2.0, 1.0, 0.5]
+
+     >>> log_loss(y_true=labels, y_pred=probabilities, sample_weight=weights)
+     1.3529429766879466
+     >>> log_loss(y_true=labels, y_pred=probabilities)
+     0.9682005730687164
+    */
+
+    val labels = Seq(1.0, 2.0, 0.0, 1.0)
+    val probabilities = Seq(
+      Array(0.1, 0.8, 0.1),
+      Array(0.9, 0.05, 0.05),
+      Array(0.8, 0.2, 0.0),
+      Array(0.3, 0.65, 0.05))
+    val weights = Seq(1.5, 2.0, 1.0, 0.5)
+
+    val rdd = sc.parallelize(labels.zip(weights).zip(probabilities)).map {
+      case ((label, weight), probability) =>
+        val prediction = probability.indexOf(probability.max).toDouble
+        (prediction, label, weight, probability)
+    }
+    val metrics = new MulticlassMetrics(rdd)
+    assert(metrics.logLoss() ~== 1.3529429766879466 relTol delta)
+
+    val rdd2 = rdd.map {
+      case (prediction: Double, label: Double, weight: Double, probability: Array[Double]) =>
+        (prediction, label, 1.0, probability)
+    }
+    val metrics2 = new MulticlassMetrics(rdd2)
+    assert(metrics2.logLoss() ~== 0.9682005730687164 relTol delta)
+  }
+
+  test("MulticlassMetrics supports hammingLoss") {
+    /*
+     Using the following Python code to verify the correctness.
+
+     from sklearn.metrics import hamming_loss
+     y_true = [2, 2, 3, 4]
+     y_pred = [1, 2, 3, 4]
+     weights = [1.5, 2.0, 1.0, 0.5]
+
+     >>> hamming_loss(y_true, y_pred)
+     0.25
+     >>> hamming_loss(y_true, y_pred, sample_weight=weights)
+     0.3
+    */
+
+    val preds = Seq(1.0, 2.0, 3.0, 4.0)
+    val labels = Seq(2.0, 2.0, 3.0, 4.0)
+    val weights = Seq(1.5, 2.0, 1.0, 0.5)
+
+    val rdd = sc.parallelize(preds.zip(labels))
+    val metrics = new MulticlassMetrics(rdd)
+    assert(metrics.hammingLoss ~== 0.25 relTol delta)
+
+    val rdd2 = sc.parallelize(preds.zip(labels).zip(weights))
+      .map { case ((pred, label), weight) =>
+        (pred, label, weight)
+      }
+    val metrics2 = new MulticlassMetrics(rdd2)
+    assert(metrics2.hammingLoss ~== 0.3 relTol delta)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala
index 305cb4cbbdeea..4bc84b83ab1db 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/ChiSqSelectorSuite.scala
@@ -189,6 +189,6 @@ object ChiSqSelectorSuite extends SparkFunSuite {
   }
 
   def checkEqual(a: ChiSqSelectorModel, b: ChiSqSelectorModel): Unit = {
-    assert(a.selectedFeatures.deep == b.selectedFeatures.deep)
+    assert(a.selectedFeatures.sameElements(b.selectedFeatures))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
index e478f14906ba8..d0c8de0e75d53 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala
@@ -56,7 +56,7 @@ class PCASuite extends SparkFunSuite with MLlibTestSparkContext {
   }
 
   test("number of features more than 65535") {
-    val data1 = sc.parallelize(Array(
+    val data1 = sc.parallelize(Seq(
       Vectors.dense(Array.fill(100000)(2.0)),
       Vectors.dense(Array.fill(100000)(0.0))
     ), 2)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index b4520d42fedf5..184c89c9eaaf9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -24,7 +24,7 @@ import scala.reflect.ClassTag
 
 import breeze.linalg.{CSCMatrix, Matrix => BM}
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar._
+import org.scalatestplus.mockito.MockitoSugar._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.Kryo._
@@ -39,7 +39,7 @@ class MatricesSuite extends SparkFunSuite {
 
     val ser = new KryoSerializer(conf).newInstance()
 
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index fee0b02bf8ed8..baac015a1cab9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.mllib.linalg
 
+import scala.collection.mutable.ArrayBuilder
 import scala.reflect.ClassTag
 import scala.util.Random
 
@@ -42,7 +43,7 @@ class VectorsSuite extends SparkFunSuite with Logging {
     conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
 
@@ -305,28 +306,43 @@ class VectorsSuite extends SparkFunSuite with Logging {
     }
   }
 
+  test("foreach") {
+    val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
+    val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
+
+    val dvMap = scala.collection.mutable.Map[Int, Double]()
+    dv.foreach { (index, value) => dvMap.put(index, value) }
+    assert(dvMap === Map(0 -> 0.0, 1 -> 1.2, 2 -> 3.1, 3 -> 0.0))
+
+    val svMap = scala.collection.mutable.Map[Int, Double]()
+    sv.foreach { (index, value) => svMap.put(index, value) }
+    assert(svMap === Map(0 -> 0.0, 1 -> 1.2, 2 -> 3.1, 3 -> 0.0))
+  }
+
   test("foreachActive") {
     val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
     val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
 
     val dvMap = scala.collection.mutable.Map[Int, Double]()
-    dv.foreachActive { (index, value) =>
-      dvMap.put(index, value)
-    }
-    assert(dvMap.size === 4)
-    assert(dvMap.get(0) === Some(0.0))
-    assert(dvMap.get(1) === Some(1.2))
-    assert(dvMap.get(2) === Some(3.1))
-    assert(dvMap.get(3) === Some(0.0))
+    dv.foreachActive { (index, value) => dvMap.put(index, value) }
+    assert(dvMap === Map(0 -> 0.0, 1 -> 1.2, 2 -> 3.1, 3 -> 0.0))
 
     val svMap = scala.collection.mutable.Map[Int, Double]()
-    sv.foreachActive { (index, value) =>
-      svMap.put(index, value)
-    }
-    assert(svMap.size === 3)
-    assert(svMap.get(1) === Some(1.2))
-    assert(svMap.get(2) === Some(3.1))
-    assert(svMap.get(3) === Some(0.0))
+    sv.foreachActive { (index, value) => svMap.put(index, value) }
+    assert(svMap === Map(1 -> 1.2, 2 -> 3.1, 3 -> 0.0))
+  }
+
+  test("foreachNonZero") {
+    val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
+    val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
+
+    val dvMap = scala.collection.mutable.Map[Int, Double]()
+    dv.foreachNonZero { (index, value) => dvMap.put(index, value) }
+    assert(dvMap === Map(1 -> 1.2, 2 -> 3.1))
+
+    val svMap = scala.collection.mutable.Map[Int, Double]()
+    sv.foreachNonZero { (index, value) => svMap.put(index, value) }
+    assert(dvMap === Map(1 -> 1.2, 2 -> 3.1))
   }
 
   test("vector p-norm") {
@@ -510,4 +526,80 @@ class VectorsSuite extends SparkFunSuite with Logging {
       Vectors.sparse(-1, Array((1, 2.0)))
     }
   }
+
+  test("dot product only supports vectors of same size") {
+    val vSize4 = Vectors.dense(arr)
+    val vSize1 = Vectors.zeros(1)
+    intercept[IllegalArgumentException]{ vSize1.dot(vSize4) }
+  }
+
+  test("dense vector dot product") {
+    val dv = Vectors.dense(arr)
+    assert(dv.dot(dv) === 0.26)
+  }
+
+  test("sparse vector dot product") {
+    val sv = Vectors.sparse(n, indices, values)
+    assert(sv.dot(sv) === 0.26)
+  }
+
+  test("mixed sparse and dense vector dot product") {
+    val sv = Vectors.sparse(n, indices, values)
+    val dv = Vectors.dense(arr)
+    assert(sv.dot(dv) === 0.26)
+    assert(dv.dot(sv) === 0.26)
+  }
+
+  test("iterator") {
+    Seq(
+      Vectors.dense(arr),
+      Vectors.zeros(n),
+      Vectors.sparse(n, indices, values),
+      Vectors.sparse(n, Array.emptyIntArray, Array.emptyDoubleArray)
+    ).foreach { vec =>
+      val (indices, values) = vec.iterator.toArray.unzip
+      assert(Array.range(0, vec.size) === indices)
+      assert(vec.toArray === values)
+    }
+  }
+
+  test("activeIterator") {
+    Seq(
+      Vectors.dense(arr),
+      Vectors.zeros(n),
+      Vectors.sparse(n, indices, values),
+      Vectors.sparse(n, Array.emptyIntArray, Array.emptyDoubleArray)
+    ).foreach { vec =>
+      val indicesBuilder = ArrayBuilder.make[Int]
+      val valuesBuilder = ArrayBuilder.make[Double]
+      vec.foreachActive { case (i, v) =>
+        indicesBuilder += i
+        valuesBuilder += v
+      }
+      val (indices, values) = vec.activeIterator.toArray.unzip
+      assert(indicesBuilder.result === indices)
+      assert(valuesBuilder.result === values)
+    }
+  }
+
+  test("nonZeroIterator") {
+    Seq(
+      Vectors.dense(arr),
+      Vectors.zeros(n),
+      Vectors.sparse(n, indices, values),
+      Vectors.sparse(n, Array.emptyIntArray, Array.emptyDoubleArray)
+    ).foreach { vec =>
+      val indicesBuilder = ArrayBuilder.make[Int]
+      val valuesBuilder = ArrayBuilder.make[Double]
+      vec.foreachActive { case (i, v) =>
+        if (v != 0) {
+          indicesBuilder += i
+          valuesBuilder += v
+        }
+      }
+      val (indices, values) = vec.nonZeroIterator.toArray.unzip
+      assert(indicesBuilder.result === indices)
+      assert(valuesBuilder.result === values)
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
index f6a996940291c..9d7177e0a149e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -35,7 +35,7 @@ class BlockMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
   val numPartitions = 3
   var gridBasedMat: BlockMatrix = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val blocks: Seq[((Int, Int), Matrix)] = Seq(
       ((0, 0), new DenseMatrix(2, 2, Array(1.0, 0.0, 0.0, 2.0))),
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
index 37d75103d18d2..d197f06a393e8 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
@@ -29,7 +29,7 @@ class CoordinateMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
   val n = 4
   var mat: CoordinateMatrix = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val entries = sc.parallelize(Seq(
       (0, 0, 1.0),
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
index cca4eb4e4260e..e961d10711860 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
@@ -36,7 +36,7 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
   ).map(x => IndexedRow(x._1, x._2))
   var indexedRows: RDD[IndexedRow] = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     indexedRows = sc.parallelize(data, 2)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index a0c4c68243e67..0a4b11935580a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -57,7 +57,7 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
   var denseMat: RowMatrix = _
   var sparseMat: RowMatrix = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     denseMat = new RowMatrix(sc.parallelize(denseData, 2))
     sparseMat = new RowMatrix(sc.parallelize(sparseData, 2))
@@ -213,7 +213,7 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
     brzNorm(v, 1.0) < 1e-6
   }
 
-  def assertColumnEqualUpToSign(A: BDM[Double], B: BDM[Double], k: Int) {
+  def assertColumnEqualUpToSign(A: BDM[Double], B: BDM[Double], k: Int): Unit = {
     assert(A.rows === B.rows)
     for (j <- 0 until k) {
       val aj = A(::, j)
@@ -338,7 +338,7 @@ class RowMatrixClusterSuite extends SparkFunSuite with LocalClusterSparkContext
 
   var mat: RowMatrix = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val m = 4
     val n = 200000
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
index b3bf5a2a8f2cc..a629c6951abcd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.util.StatCounter
 
 class RandomDataGeneratorSuite extends SparkFunSuite {
 
-  def apiChecks(gen: RandomDataGenerator[Double]) {
+  def apiChecks(gen: RandomDataGenerator[Double]): Unit = {
     // resetting seed should generate the same sequence of random numbers
     gen.setSeed(42L)
     val array1 = (0 until 1000).map(_ => gen.nextValue())
@@ -56,7 +56,7 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
   def distributionChecks(gen: RandomDataGenerator[Double],
       mean: Double = 0.0,
       stddev: Double = 1.0,
-      epsilon: Double = 0.01) {
+      epsilon: Double = 0.01): Unit = {
     for (seed <- 0 until 5) {
       gen.setSeed(seed.toLong)
       val sample = (0 until 100000).map { _ => gen.nextValue()}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
index 9b4dc29d326a1..470e1016dab39 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
@@ -38,7 +38,7 @@ class RandomRDDsSuite extends SparkFunSuite with MLlibTestSparkContext with Seri
       expectedNumPartitions: Int,
       expectedMean: Double,
       expectedStddev: Double,
-      epsilon: Double = 0.01) {
+      epsilon: Double = 0.01): Unit = {
     val stats = rdd.stats()
     assert(expectedSize === stats.count)
     assert(expectedNumPartitions === rdd.partitions.size)
@@ -53,7 +53,7 @@ class RandomRDDsSuite extends SparkFunSuite with MLlibTestSparkContext with Seri
       expectedNumPartitions: Int,
       expectedMean: Double,
       expectedStddev: Double,
-      epsilon: Double = 0.01) {
+      epsilon: Double = 0.01): Unit = {
     assert(expectedNumPartitions === rdd.partitions.size)
     val values = new ArrayBuffer[Double]()
     rdd.collect.foreach { vector => {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
index 56231429859ee..db17db9c8597d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 
 class MLPairRDDFunctionsSuite extends SparkFunSuite with MLlibTestSparkContext {
   test("topByKey") {
-    val topMap = sc.parallelize(Array((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), (3, 2), (3, 7), (5,
+    val topMap = sc.parallelize(Seq((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), (3, 2), (3, 7), (5,
       1), (3, 5)), 2)
       .topByKey(5)
       .collectAsMap()
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index b08ad99f4f204..9be87db873dad 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -224,7 +224,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
       negativeWeights: Boolean = false,
       numUserBlocks: Int = -1,
       numProductBlocks: Int = -1,
-      negativeFactors: Boolean = true) {
+      negativeFactors: Boolean = true): Unit = {
     // scalastyle:on
 
     val (sampledRatings, trueRatings, truePrefs) = ALSSuite.generateRatings(users, products,
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
index d96103d01e4ab..f336dac0ccb5d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -33,7 +33,7 @@ private object LassoSuite {
 
 class LassoSuite extends SparkFunSuite with MLlibTestSparkContext {
 
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       math.abs(prediction - expected.label) > 0.5
@@ -55,8 +55,7 @@ class LassoSuite extends SparkFunSuite with MLlibTestSparkContext {
     }
     val testRDD = sc.parallelize(testData, 2).cache()
 
-    val ls = new LassoWithSGD()
-    ls.optimizer.setStepSize(1.0).setRegParam(0.01).setNumIterations(40)
+    val ls = new LassoWithSGD(1.0, 40, 0.01, 1.0)
 
     val model = ls.run(testRDD)
     val weight0 = model.weights(0)
@@ -99,8 +98,8 @@ class LassoSuite extends SparkFunSuite with MLlibTestSparkContext {
 
     val testRDD = sc.parallelize(testData, 2).cache()
 
-    val ls = new LassoWithSGD()
-    ls.optimizer.setStepSize(1.0).setRegParam(0.01).setNumIterations(40).setConvergenceTol(0.0005)
+    val ls = new LassoWithSGD(1.0, 40, 0.01, 1.0)
+    ls.optimizer.setConvergenceTol(0.0005)
 
     val model = ls.run(testRDD, initialWeights)
     val weight0 = model.weights(0)
@@ -153,7 +152,7 @@ class LassoClusterSuite extends SparkFunSuite with LocalClusterSparkContext {
     }.cache()
     // If we serialize data directly in the task closure, the size of the serialized task would be
     // greater than 1MB and hence Spark would throw an error.
-    val model = LassoWithSGD.train(points, 2)
+    val model = new LassoWithSGD(1.0, 2, 0.01, 1.0).run(points)
     val predictions = model.predict(points.map(_.features))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
index 0694079b9df9e..be0834d0fd7df 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
@@ -33,7 +33,7 @@ private object LinearRegressionSuite {
 
 class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
 
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       math.abs(prediction - expected.label) > 0.5
@@ -46,7 +46,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
   test("linear regression") {
     val testRDD = sc.parallelize(LinearDataGenerator.generateLinearInput(
       3.0, Array(10.0, 10.0), 100, 42), 2).cache()
-    val linReg = new LinearRegressionWithSGD().setIntercept(true)
+    val linReg = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0).setIntercept(true)
     linReg.optimizer.setNumIterations(1000).setStepSize(1.0)
 
     val model = linReg.run(testRDD)
@@ -72,7 +72,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
   test("linear regression without intercept") {
     val testRDD = sc.parallelize(LinearDataGenerator.generateLinearInput(
       0.0, Array(10.0, 10.0), 100, 42), 2).cache()
-    val linReg = new LinearRegressionWithSGD().setIntercept(false)
+    val linReg = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0).setIntercept(false)
     linReg.optimizer.setNumIterations(1000).setStepSize(1.0)
 
     val model = linReg.run(testRDD)
@@ -103,7 +103,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val sv = Vectors.sparse(10000, Seq((0, v(0)), (9999, v(1))))
       LabeledPoint(label, sv)
     }.cache()
-    val linReg = new LinearRegressionWithSGD().setIntercept(false)
+    val linReg = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0).setIntercept(false)
     linReg.optimizer.setNumIterations(1000).setStepSize(1.0)
 
     val model = linReg.run(sparseRDD)
@@ -160,7 +160,7 @@ class LinearRegressionClusterSuite extends SparkFunSuite with LocalClusterSparkC
     }.cache()
     // If we serialize data directly in the task closure, the size of the serialized task would be
     // greater than 1MB and hence Spark would throw an error.
-    val model = LinearRegressionWithSGD.train(points, 2)
+    val model = new LinearRegressionWithSGD(1.0, 2, 0.0, 1.0).run(points)
     val predictions = model.predict(points.map(_.features))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index 815be32d2e510..2d6aec184ad9d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -60,18 +60,13 @@ class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
     val validationRDD = sc.parallelize(validationData, 2).cache()
 
     // First run without regularization.
-    val linearReg = new LinearRegressionWithSGD()
-    linearReg.optimizer.setNumIterations(200)
-                       .setStepSize(1.0)
+    val linearReg = new LinearRegressionWithSGD(1.0, 200, 0.0, 1.0)
 
     val linearModel = linearReg.run(testRDD)
     val linearErr = predictionError(
         linearModel.predict(validationRDD.map(_.features)).collect(), validationData)
 
-    val ridgeReg = new RidgeRegressionWithSGD()
-    ridgeReg.optimizer.setNumIterations(200)
-                      .setRegParam(0.1)
-                      .setStepSize(1.0)
+    val ridgeReg = new RidgeRegressionWithSGD(1.0, 200, 0.1, 1.0)
     val ridgeModel = ridgeReg.run(testRDD)
     val ridgeErr = predictionError(
         ridgeModel.predict(validationRDD.map(_.features)).collect(), validationData)
@@ -110,7 +105,7 @@ class RidgeRegressionClusterSuite extends SparkFunSuite with LocalClusterSparkCo
     }.cache()
     // If we serialize data directly in the task closure, the size of the serialized task would be
     // greater than 1MB and hence Spark would throw an error.
-    val model = RidgeRegressionWithSGD.train(points, 2)
+    val model = new RidgeRegressionWithSGD(1.0, 2, 0.01, 1.0).run(points)
     val predictions = model.predict(points.map(_.features))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
index eaeaa3fc1e68d..8e2d7d10f2ce2 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -22,31 +22,25 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.LinearDataGenerator
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.{LocalStreamingContext, TestSuiteBase}
 import org.apache.spark.streaming.dstream.DStream
 
-class StreamingLinearRegressionSuite extends SparkFunSuite with TestSuiteBase {
+class StreamingLinearRegressionSuite
+  extends SparkFunSuite
+  with LocalStreamingContext
+  with TestSuiteBase {
 
   // use longer wait time to ensure job completion
   override def maxWaitTimeMillis: Int = 20000
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   // Assert that two values are equal within tolerance epsilon
-  def assertEqual(v1: Double, v2: Double, epsilon: Double) {
+  def assertEqual(v1: Double, v2: Double, epsilon: Double): Unit = {
     def errorMessage = v1.toString + " did not equal " + v2.toString
     assert(math.abs(v1-v2) <= epsilon, errorMessage)
   }
 
   // Assert that model predictions are correct
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       math.abs(prediction - expected.label) > 0.5
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index 88b9d4c039ba9..b738236473230 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -437,7 +437,7 @@ object DecisionTreeSuite extends SparkFunSuite {
   def validateClassifier(
       model: DecisionTreeModel,
       input: Seq[LabeledPoint],
-      requiredAccuracy: Double) {
+      requiredAccuracy: Double): Unit = {
     val predictions = input.map(x => model.predict(x.features))
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
@@ -450,7 +450,7 @@ object DecisionTreeSuite extends SparkFunSuite {
   def validateRegressor(
       model: DecisionTreeModel,
       input: Seq[LabeledPoint],
-      requiredMSE: Double) {
+      requiredMSE: Double): Unit = {
     val predictions = input.map(x => model.predict(x.features))
     val squaredError = predictions.zip(input).map { case (prediction, expected) =>
       val err = prediction - expected.label
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
index d43e62bb65535..5458a43b4f2c6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
@@ -19,6 +19,8 @@ package org.apache.spark.mllib.tree
 
 import scala.collection.mutable
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.model.TreeEnsembleModel
@@ -37,7 +39,7 @@ object EnsembleTestHelper {
       numCols: Int,
       expectedMean: Double,
       expectedStddev: Double,
-      epsilon: Double) {
+      epsilon: Double): Unit = {
     val values = new mutable.ArrayBuffer[Double]()
     data.foreach { row =>
       assert(row.size == numCols)
@@ -51,7 +53,7 @@ object EnsembleTestHelper {
   def validateClassifier(
       model: TreeEnsembleModel,
       input: Seq[LabeledPoint],
-      requiredAccuracy: Double) {
+      requiredAccuracy: Double): Unit = {
     val predictions = input.map(x => model.predict(x.features))
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
@@ -68,7 +70,7 @@ object EnsembleTestHelper {
       model: TreeEnsembleModel,
       input: Seq[LabeledPoint],
       required: Double,
-      metricName: String = "mse") {
+      metricName: String = "mse"): Unit = {
     val predictions = input.map(x => model.predict(x.features))
     val errors = predictions.zip(input).map { case (prediction, point) =>
       point.label - prediction
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
index bec61ba6a003c..b1a385a576cea 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.util.Utils
  * Test suite for [[RandomForest]].
  */
 class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
-  def binaryClassificationTestWithContinuousFeatures(strategy: Strategy) {
+  def binaryClassificationTestWithContinuousFeatures(strategy: Strategy): Unit = {
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
     val rdd = sc.parallelize(arr)
     val numTrees = 1
@@ -68,7 +68,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     binaryClassificationTestWithContinuousFeatures(strategy)
   }
 
-  def regressionTestWithContinuousFeatures(strategy: Strategy) {
+  def regressionTestWithContinuousFeatures(strategy: Strategy): Unit = {
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
     val rdd = sc.parallelize(arr)
     val numTrees = 1
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
index 2853b752cb85c..79d4785fd6fa7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
@@ -25,7 +25,7 @@ import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 trait LocalClusterSparkContext extends BeforeAndAfterAll { self: Suite =>
   @transient var sc: SparkContext = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val conf = new SparkConf()
       .setMaster("local-cluster[2, 1, 1024]")
@@ -34,7 +34,7 @@ trait LocalClusterSparkContext extends BeforeAndAfterAll { self: Suite =>
     sc = new SparkContext(conf)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       if (sc != null) {
         sc.stop()
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
index 720237bd2dddd..f9a3cd088314e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -31,7 +31,7 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
   @transient var sc: SparkContext = _
   @transient var checkpointDir: String = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     spark = SparkSession.builder
       .master("local[2]")
@@ -43,7 +43,7 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
     sc.setCheckpointDir(checkpointDir)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(new File(checkpointDir))
       SparkSession.clearActiveSession()
diff --git a/pom.xml b/pom.xml
index c1b53b8680c9b..925fa28a291a4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -106,9 +106,6 @@
     <module>external/kafka-0-10-assembly</module>
     <module>external/kafka-0-10-sql</module>
     <module>external/avro</module>
-    <module>graph/api</module>
-    <module>graph/cypher</module>
-    <module>graph/graph</module>
     <!-- See additional modules enabled by profiles below -->
   </modules>
 
@@ -118,60 +115,58 @@
     <java.version>1.8</java.version>
     <maven.compiler.source>${java.version}</maven.compiler.source>
     <maven.compiler.target>${java.version}</maven.compiler.target>
-    <maven.version>3.6.2</maven.version>
+    <maven.version>3.6.3</maven.version>
     <sbt.project.name>spark</sbt.project.name>
     <slf4j.version>1.7.16</slf4j.version>
     <log4j.version>1.2.17</log4j.version>
     <hadoop.version>2.7.4</hadoop.version>
     <protobuf.version>2.5.0</protobuf.version>
     <yarn.version>${hadoop.version}</yarn.version>
-    <zookeeper.version>3.4.6</zookeeper.version>
+    <zookeeper.version>3.4.14</zookeeper.version>
     <curator.version>2.7.1</curator.version>
-    <okapi.version>0.4.2</okapi.version>
-    <hive.group>org.spark-project.hive</hive.group>
-    <hive.classifier></hive.classifier>
+    <hive.group>org.apache.hive</hive.group>
+    <hive.classifier>core</hive.classifier>
     <!-- Version used in Maven Hive dependency -->
-    <hive.version>1.2.1.spark2</hive.version>
+    <hive.version>2.3.6</hive.version>
     <hive23.version>2.3.6</hive23.version>
     <!-- Version used for internal directory structure -->
-    <hive.version.short>1.2.1</hive.version.short>
+    <hive.version.short>2.3</hive.version.short>
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
-    <kafka.version>2.3.0</kafka.version>
+    <kafka.version>2.4.0</kafka.version>
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.1</parquet.version>
-    <orc.version>1.5.5</orc.version>
-    <orc.classifier>nohive</orc.classifier>
+    <orc.version>1.5.9</orc.version>
+    <orc.classifier></orc.classifier>
     <hive.parquet.group>com.twitter</hive.parquet.group>
     <hive.parquet.version>1.6.0</hive.parquet.version>
     <jetty.version>9.4.18.v20190429</jetty.version>
     <javaxservlet.version>3.1.0</javaxservlet.version>
-    <chill.version>0.9.3</chill.version>
+    <chill.version>0.9.5</chill.version>
     <ivy.version>2.4.0</ivy.version>
     <oro.version>2.0.8</oro.version>
-    <codahale.metrics.version>3.1.5</codahale.metrics.version>
+    <codahale.metrics.version>4.1.1</codahale.metrics.version>
     <avro.version>1.8.2</avro.version>
     <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
-    <aws.kinesis.client.version>1.8.10</aws.kinesis.client.version>
+    <aws.kinesis.client.version>1.12.0</aws.kinesis.client.version>
     <!-- Should be consistent with Kinesis client dependency -->
     <aws.java.sdk.version>1.11.271</aws.java.sdk.version>
     <!-- the producer is used in tests -->
     <aws.kinesis.producer.version>0.12.8</aws.kinesis.producer.version>
     <!--  org.apache.httpcomponents/httpclient-->
     <commons.httpclient.version>4.5.6</commons.httpclient.version>
-    <commons.httpcore.version>4.4.10</commons.httpcore.version>
+    <commons.httpcore.version>4.4.12</commons.httpcore.version>
     <!--  commons-httpclient/commons-httpclient-->
     <httpclient.classic.version>3.1</httpclient.classic.version>
     <commons.math3.version>3.4.1</commons.math3.version>
     <!-- managed up from 3.2.1 for SPARK-11652 -->
     <commons.collections.version>3.2.2</commons.collections.version>
-    <scala.version>2.12.8</scala.version>
+    <scala.version>2.12.10</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
-    <scalafmt.parameters>--diff --test</scalafmt.parameters>
+    <scalafmt.parameters>--test</scalafmt.parameters>
     <!-- for now, not running scalafmt as part of default verify pipeline -->
     <scalafmt.skip>true</scalafmt.skip>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.9.9</fasterxml.jackson.version>
-    <fasterxml.jackson.databind.version>2.9.9.3</fasterxml.jackson.databind.version>
+    <fasterxml.jackson.version>2.10.0</fasterxml.jackson.version>
     <snappy.version>1.1.7.3</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <commons-codec.version>1.10</commons-codec.version>
@@ -179,13 +174,13 @@
     <!-- org.apache.commons/commons-lang/-->
     <commons-lang2.version>2.6</commons-lang2.version>
     <!-- org.apache.commons/commons-lang3/-->
-    <commons-lang3.version>3.8.1</commons-lang3.version>
+    <commons-lang3.version>3.9</commons-lang3.version>
     <!-- org.apache.commons/commons-pool2/-->
     <commons-pool2.version>2.6.2</commons-pool2.version>
-    <datanucleus-core.version>3.2.10</datanucleus-core.version>
+    <datanucleus-core.version>4.1.17</datanucleus-core.version>
     <janino.version>3.0.15</janino.version>
-    <jersey.version>2.29</jersey.version>
-    <joda.version>2.9.3</joda.version>
+    <jersey.version>2.30</jersey.version>
+    <joda.version>2.10.5</joda.version>
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
     <libthrift.version>0.12.0</libthrift.version>
@@ -201,9 +196,11 @@
     <commons-crypto.version>1.0.0</commons-crypto.version>
     <!--
     If you are changing Arrow version specification, please check ./python/pyspark/sql/utils.py,
-    ./python/run-tests.py and ./python/setup.py too.
+    and ./python/setup.py too.
     -->
-    <arrow.version>0.12.0</arrow.version>
+    <arrow.version>0.15.1</arrow.version>
+    <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
+    <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
 
     <test.java.home>${java.home}</test.java.home>
     <test.exclude.tags></test.exclude.tags>
@@ -229,7 +226,13 @@
     -->
     <hadoop.deps.scope>compile</hadoop.deps.scope>
     <hive.deps.scope>compile</hive.deps.scope>
-    <hive.parquet.scope>${hive.deps.scope}</hive.parquet.scope>
+    <hive.parquet.scope>provided</hive.parquet.scope>
+    <hive.storage.version>2.7.1</hive.storage.version>
+    <hive.storage.scope>compile</hive.storage.scope>
+    <hive.common.scope>compile</hive.common.scope>
+    <hive.llap.scope>compile</hive.llap.scope>
+    <hive.serde.scope>compile</hive.serde.scope>
+    <hive.shims.scope>compile</hive.shims.scope>
     <orc.deps.scope>compile</orc.deps.scope>
     <parquet.deps.scope>compile</parquet.deps.scope>
     <parquet.test.deps.scope>test</parquet.test.deps.scope>
@@ -240,12 +243,29 @@
     -->
     <spark.test.home>${session.executionRootDirectory}</spark.test.home>
 
-    <CodeCacheSize>512m</CodeCacheSize>
+    <CodeCacheSize>1g</CodeCacheSize>
   </properties>
   <repositories>
     <repository>
+      <id>gcs-maven-central-mirror</id>
+      <!--
+        Google Mirror of Maven Central, placed first so that it's used instead of flaky Maven Central.
+        See https://storage-download.googleapis.com/maven-central/index.html
+      -->
+      <name>GCS Maven Central mirror</name>
+      <url>https://maven-central.storage-download.googleapis.com/repos/central/data/</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+    <repository>
+      <!--
+        This is used as a fallback when the first try fails.
+      -->
       <id>central</id>
-      <!-- This should be at top, it makes maven try the central repo first and then others and hence faster dep resolution -->
       <name>Maven Repository</name>
       <url>https://repo.maven.apache.org/maven2</url>
       <releases>
@@ -257,6 +277,21 @@
     </repository>
   </repositories>
   <pluginRepositories>
+    <pluginRepository>
+      <id>gcs-maven-central-mirror</id>
+      <!--
+        Google Mirror of Maven Central, placed first so that it's used instead of flaky Maven Central.
+        See https://storage-download.googleapis.com/maven-central/index.html
+      -->
+      <name>GCS Maven Central mirror</name>
+      <url>https://maven-central.storage-download.googleapis.com/repos/central/data/</url>
+      <releases>
+        <enabled>true</enabled>
+      </releases>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </pluginRepository>
     <pluginRepository>
       <id>central</id>
       <url>https://repo.maven.apache.org/maven2</url>
@@ -334,7 +369,7 @@
       <dependency>
         <groupId>org.apache.xbean</groupId>
         <artifactId>xbean-asm7-shaded</artifactId>
-        <version>4.14</version>
+        <version>4.15</version>
       </dependency>
 
       <!-- Shaded deps marked as provided. These are promoted to compile scope
@@ -400,6 +435,12 @@
         <version>${jetty.version}</version>
         <scope>provided</scope>
       </dependency>
+      <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-webapp</artifactId>
+        <version>${jetty.version}</version>
+        <scope>provided</scope>
+      </dependency>
       <dependency>
         <groupId>com.google.guava</groupId>
         <artifactId>guava</artifactId>
@@ -488,7 +529,7 @@
       <dependency>
         <groupId>commons-beanutils</groupId>
         <artifactId>commons-beanutils</artifactId>
-        <version>1.9.3</version>
+        <version>1.9.4</version>
       </dependency>
       <dependency>
         <groupId>commons-logging</groupId>
@@ -527,7 +568,7 @@
         <version>${commons.httpcore.version}</version>
       </dependency>
       <dependency>
-        <groupId>org.fusesource.leveldbjni</groupId>
+        <groupId>${leveldbjni.group}</groupId>
         <artifactId>leveldbjni-all</artifactId>
         <version>1.8</version>
       </dependency>
@@ -615,12 +656,12 @@
       <dependency>
         <groupId>org.lz4</groupId>
         <artifactId>lz4-java</artifactId>
-        <version>1.6.0</version>
+        <version>1.7.1</version>
       </dependency>
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.4.2-1</version>
+        <version>1.4.4-3</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>
@@ -658,7 +699,7 @@
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.1.30.Final</version>
+        <version>4.1.42.Final</version>
       </dependency>
       <dependency>
         <groupId>org.apache.derby</groupId>
@@ -682,12 +723,12 @@
       </dependency>
       <dependency>
         <groupId>io.dropwizard.metrics</groupId>
-        <artifactId>metrics-ganglia</artifactId>
+        <artifactId>metrics-graphite</artifactId>
         <version>${codahale.metrics.version}</version>
       </dependency>
       <dependency>
         <groupId>io.dropwizard.metrics</groupId>
-        <artifactId>metrics-graphite</artifactId>
+        <artifactId>metrics-jmx</artifactId>
         <version>${codahale.metrics.version}</version>
       </dependency>
       <dependency>
@@ -698,7 +739,7 @@
       <dependency>
         <groupId>com.fasterxml.jackson.core</groupId>
         <artifactId>jackson-databind</artifactId>
-        <version>${fasterxml.jackson.databind.version}</version>
+        <version>${fasterxml.jackson.version}</version>
       </dependency>
       <dependency>
         <groupId>com.fasterxml.jackson.core</groupId>
@@ -786,14 +827,8 @@
       <dependency>
         <groupId>org.scalanlp</groupId>
         <artifactId>breeze_${scala.binary.version}</artifactId>
-        <version>0.13.2</version>
+        <version>1.0</version>
         <exclusions>
-          <!-- This is included as a compile-scoped dependency by jtransforms, which is
-               a dependency of breeze. -->
-          <exclusion>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-          </exclusion>
           <exclusion>
             <groupId>org.apache.commons</groupId>
             <artifactId>commons-math3</artifactId>
@@ -839,7 +874,7 @@
       <dependency>
         <groupId>org.scala-lang.modules</groupId>
         <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>
-        <version>1.1.0</version>
+        <version>1.1.2</version>
       </dependency>
       <dependency>
         <groupId>jline</groupId>
@@ -849,13 +884,13 @@
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
-        <version>3.0.5</version>
+        <version>3.0.8</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.mockito</groupId>
         <artifactId>mockito-core</artifactId>
-        <version>2.28.2</version>
+        <version>3.1.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -867,7 +902,7 @@
       <dependency>
         <groupId>org.scalacheck</groupId>
         <artifactId>scalacheck_${scala.binary.version}</artifactId>
-        <version>1.13.5</version>
+        <version>1.14.2</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -960,6 +995,10 @@
         <version>${hadoop.version}</version>
         <scope>${hadoop.deps.scope}</scope>
         <exclusions>
+          <exclusion>
+            <groupId>org.fusesource.leveldbjni</groupId>
+            <artifactId>leveldbjni-all</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>asm</groupId>
             <artifactId>asm</artifactId>
@@ -1026,6 +1065,10 @@
             <groupId>javax.ws.rs</groupId>
             <artifactId>jsr311-api</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>org.eclipse.jetty</groupId>
+            <artifactId>jetty-webapp</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
@@ -1157,6 +1200,10 @@
             <groupId>com.sun.jersey.contribs</groupId>
             <artifactId>*</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>jdk.tools</groupId>
+            <artifactId>jdk.tools</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
@@ -1206,6 +1253,10 @@
         <classifier>tests</classifier>
         <scope>test</scope>
         <exclusions>
+          <exclusion>
+            <groupId>org.fusesource.leveldbjni</groupId>
+            <artifactId>leveldbjni-all</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>asm</groupId>
             <artifactId>asm</artifactId>
@@ -1246,6 +1297,10 @@
         <version>${yarn.version}</version>
         <scope>${hadoop.deps.scope}</scope>
         <exclusions>
+          <exclusion>
+            <groupId>org.fusesource.leveldbjni</groupId>
+            <artifactId>leveldbjni-all</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>asm</groupId>
             <artifactId>asm</artifactId>
@@ -1343,6 +1398,10 @@
             <groupId>io.netty</groupId>
             <artifactId>netty</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>com.github.spotbugs</groupId>
+            <artifactId>spotbugs-annotations</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
@@ -1393,6 +1452,11 @@
             <groupId>${hive.group}</groupId>
             <artifactId>hive-service</artifactId>
           </exclusion>
+          <exclusion>
+            <!-- All classes are covered by spark's hive-thriftserver module -->
+            <groupId>${hive.group}</groupId>
+            <artifactId>hive-service-rpc</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>${hive.group}</groupId>
             <artifactId>hive-shims</artifactId>
@@ -1449,6 +1513,11 @@
             <groupId>${hive.group}</groupId>
             <artifactId>hive-service</artifactId>
           </exclusion>
+          <exclusion>
+            <!-- All classes are covered by spark's hive-thriftserver module -->
+            <groupId>${hive.group}</groupId>
+            <artifactId>hive-service-rpc</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>${hive.group}</groupId>
             <artifactId>hive-shims</artifactId>
@@ -1702,6 +1771,11 @@
             <groupId>${hive.group}</groupId>
             <artifactId>hive-service</artifactId>
           </exclusion>
+          <exclusion>
+            <!-- All classes are covered by spark's hive-thriftserver module -->
+            <groupId>${hive.group}</groupId>
+            <artifactId>hive-service-rpc</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>${hive.group}</groupId>
             <artifactId>hive-shims</artifactId>
@@ -1852,6 +1926,11 @@
             <artifactId>groovy-all</artifactId>
           </exclusion>
           <!-- Begin of Hive 2.3 exclusion -->
+          <exclusion>
+            <!-- All classes are covered by spark's hive-thriftserver module -->
+            <groupId>${hive.group}</groupId>
+            <artifactId>hive-service-rpc</artifactId>
+          </exclusion>
           <!-- parquet-hadoop-bundle:1.8.1 conflict with 1.10.1 -->
           <exclusion>
             <groupId>org.apache.parquet</groupId>
@@ -2002,75 +2081,6 @@
         </exclusions>
       </dependency>
 
-      <dependency>
-        <groupId>${hive.group}</groupId>
-        <artifactId>hive-contrib</artifactId>
-        <version>${hive.version}</version>
-        <scope>test</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-exec</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-serde</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-shims</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>commons-codec</groupId>
-            <artifactId>commons-codec</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-api</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>${hive.group}.hcatalog</groupId>
-        <artifactId>hive-hcatalog-core</artifactId>
-        <version>${hive.version}</version>
-        <scope>test</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-exec</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-metastore</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-cli</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-common</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-
       <dependency>
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-core</artifactId>
@@ -2251,6 +2261,23 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>com.univocity</groupId>
+        <artifactId>univocity-parsers</artifactId>
+        <version>2.8.3</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hive</groupId>
+        <artifactId>hive-storage-api</artifactId>
+        <version>${hive.storage.version}</version>
+        <scope>${hive.storage.scope}</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-lang</groupId>
+            <artifactId>commons-lang</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
@@ -2287,6 +2314,17 @@
                 </rules>
               </configuration>
             </execution>
+            <execution>
+              <id>enforce-no-duplicate-dependencies</id>
+              <goals>
+                <goal>enforce</goal>
+              </goals>
+              <configuration>
+                <rules>
+                  <banDuplicatePomDependencyVersions/>
+                </rules>
+              </configuration>
+            </execution>
           </executions>
         </plugin>
         <plugin>
@@ -2297,7 +2335,7 @@
         <plugin>
           <groupId>net.alchim31.maven</groupId>
           <artifactId>scala-maven-plugin</artifactId>
-          <version>4.2.0</version>
+          <version>4.3.0</version>
           <executions>
             <execution>
               <id>eclipse-add-source</id>
@@ -2336,7 +2374,6 @@
               <arg>-deprecation</arg>
               <arg>-feature</arg>
               <arg>-explaintypes</arg>
-              <arg>-Yno-adapted-args</arg>
               <arg>-target:jvm-1.8</arg>
             </args>
             <jvmArgs>
@@ -2356,7 +2393,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-compiler-plugin</artifactId>
-          <version>3.8.0</version>
+          <version>3.8.1</version>
           <configuration>
             <source>${java.version}</source>
             <target>${java.version}</target>
@@ -2383,7 +2420,7 @@
               <include>**/*Suite.java</include>
             </includes>
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
-            <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true</argLine>
             <environmentVariables>
               <!--
                 Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
@@ -2433,7 +2470,7 @@
             <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
             <junitxml>.</junitxml>
             <filereports>SparkTestSuite.txt</filereports>
-            <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+            <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true</argLine>
             <stderr/>
             <environmentVariables>
               <!--
@@ -2519,7 +2556,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-javadoc-plugin</artifactId>
-          <version>3.0.1</version>
+          <version>3.1.1</version>
           <configuration>
             <additionalJOptions>
               <additionalJOption>-Xdoclint:all</additionalJOption>
@@ -2580,12 +2617,12 @@
             <dependency>
               <groupId>org.ow2.asm</groupId>
               <artifactId>asm</artifactId>
-              <version>7.1</version>
+              <version>7.2</version>
             </dependency>
             <dependency>
               <groupId>org.ow2.asm</groupId>
               <artifactId>asm-commons</artifactId>
-              <version>7.1</version>
+              <version>7.2</version>
             </dependency>
           </dependencies>
         </plugin>
@@ -2800,16 +2837,20 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-checkstyle-plugin</artifactId>
-        <version>3.0.0</version>
+        <version>3.1.0</version>
         <configuration>
           <failOnViolation>false</failOnViolation>
           <includeTestSourceDirectory>true</includeTestSourceDirectory>
           <sourceDirectories>
             <directory>${basedir}/src/main/java</directory>
             <directory>${basedir}/src/main/scala</directory>
+            <directory>${basedir}/v${hive.version.short}/src/main/java</directory>
+            <directory>${basedir}/v${hive.version.short}/src/main/scala</directory>
           </sourceDirectories>
           <testSourceDirectories>
             <directory>${basedir}/src/test/java</directory>
+            <directory>${basedir}/v${hive.version.short}/src/test/java</directory>
+            <directory>${basedir}/v${hive.version.short}/src/test/scala</directory>
           </testSourceDirectories>
           <configLocation>dev/checkstyle.xml</configLocation>
           <outputFile>${basedir}/target/checkstyle-output.xml</outputFile>
@@ -2820,7 +2861,7 @@
           <dependency>
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>8.23</version>
+            <version>8.29</version>
           </dependency>
         </dependencies>
         <executions>
@@ -2881,12 +2922,15 @@
       </plugin>
       <plugin>
         <groupId>org.antipathy</groupId>
-        <artifactId>mvn-scalafmt_2.12</artifactId>
-        <version>0.9_1.5.1</version>
+        <artifactId>mvn-scalafmt_${scala.binary.version}</artifactId>
+        <version>1.0.3</version>
         <configuration>
           <parameters>${scalafmt.parameters}</parameters> <!-- (Optional) Additional command line arguments -->
           <skip>${scalafmt.skip}</skip> <!-- (Optional) skip formatting -->
-          <configLocation>dev/.scalafmt.conf</configLocation> <!-- (Optional) config locataion -->
+          <skipSources>${scalafmt.skip}</skipSources>
+          <skipTestSources>${scalafmt.skip}</skipTestSources>
+          <configLocation>dev/.scalafmt.conf</configLocation> <!-- (Optional) config location -->
+          <onlyChangedFiles>true</onlyChangedFiles>
         </configuration>
         <executions>
           <execution>
@@ -2974,25 +3018,33 @@
       <properties>
         <hadoop.version>3.2.0</hadoop.version>
         <curator.version>2.13.0</curator.version>
-        <zookeeper.version>3.4.13</zookeeper.version>
-        <hive.group>org.apache.hive</hive.group>
-        <hive.classifier>core</hive.classifier>
-        <hive.version>${hive23.version}</hive.version>
-        <hive.version.short>2.3.5</hive.version.short>
-        <!-- Do not need parquet-hadoop-bundle because we already have
-          parquet-common, parquet-column and parquet-hadoop -->
-        <hive.parquet.scope>provided</hive.parquet.scope>
-        <orc.classifier></orc.classifier>
-        <datanucleus-core.version>4.1.17</datanucleus-core.version>
       </properties>
-      <dependencies>
-        <!-- Both Hive and ORC need hive-storage-api, but it is excluded by orc-mapreduce -->
-        <dependency>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-storage-api</artifactId>
-          <version>2.6.0</version>
-        </dependency>
-      </dependencies>
+    </profile>
+
+    <profile>
+      <id>hive-1.2</id>
+      <properties>
+        <hive.group>org.spark-project.hive</hive.group>
+        <hive.classifier></hive.classifier>
+        <!-- Version used in Maven Hive dependency -->
+        <hive.version>1.2.1.spark2</hive.version>
+        <!-- Version used for internal directory structure -->
+        <hive.version.short>1.2</hive.version.short>
+        <hive.parquet.scope>${hive.deps.scope}</hive.parquet.scope>
+        <hive.storage.version>2.6.0</hive.storage.version>
+        <hive.storage.scope>provided</hive.storage.scope>
+        <hive.common.scope>provided</hive.common.scope>
+        <hive.llap.scope>provided</hive.llap.scope>
+        <hive.serde.scope>provided</hive.serde.scope>
+        <hive.shims.scope>provided</hive.shims.scope>
+        <orc.classifier>nohive</orc.classifier>
+        <datanucleus-core.version>3.2.10</datanucleus-core.version>
+      </properties>
+    </profile>
+
+    <profile>
+      <id>hive-2.3</id>
+      <!-- Default hive profile. Uses global properties. -->
     </profile>
 
     <profile>
@@ -3049,9 +3101,31 @@
       </properties>
     </profile>
 
-    <!-- Exists for backwards compatibility; profile doesn't do anything -->
     <profile>
       <id>scala-2.12</id>
+      <build>
+        <pluginManagement>
+          <plugins>
+          </plugins>
+        </pluginManagement>
+      </build>
+    </profile>
+    
+    <profile>
+      <id>scala-2.13</id>
+      <properties>
+        <scala.version>2.13.1</scala.version>
+        <scala.binary.version>2.13</scala.binary.version>
+      </properties>
+      <dependencyManagement>
+        <dependencies>
+          <dependency>
+            <groupId>org.scala-lang.modules</groupId>
+            <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+            <version>0.2.0</version>
+          </dependency>
+        </dependencies>
+      </dependencyManagement>
     </profile>
 
     <!--
@@ -3122,5 +3196,12 @@
     <profile>
       <id>sparkr</id>
     </profile>
+    <!-- use org.openlabtesting.leveldbjni on aarch64 platform -->
+    <profile>
+      <id>aarch64</id>
+      <properties>
+        <leveldbjni.group>org.openlabtesting.leveldbjni</leveldbjni.group>
+      </properties>
+    </profile>
   </profiles>
 </project>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index a1237271046e9..65ffa228eddec 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,12 +36,40 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    // [SPARK-29306] Add support for Stage level scheduling for executors
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.productElement"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.productArity"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.canEqual"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.productIterator"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.productPrefix"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#RetrieveSparkAppConfig.toString"),
+
+    // [SPARK-29399][core] Remove old ExecutorPlugin interface.
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ExecutorPlugin"),
+
+    // [SPARK-][SQL][CORE][MLLIB] Remove more old deprecated items in Spark 3
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.createExternalTable"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.createExternalTable"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.KMeans.train"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.clustering.KMeans.train"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.classification.LogisticRegressionWithSGD$"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.LogisticRegressionWithSGD.this"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.ChiSqSelectorModel.isSorted"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD$"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.this"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LassoWithSGD.this"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.regression.LassoWithSGD$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD$"),
+
     // [SPARK-28486][CORE][PYTHON] Map PythonBroadcast's data file to a BroadcastBlock to avoid delete by GC
     ProblemFilters.exclude[InaccessibleMethodProblem]("java.lang.Object.finalize"),
 
     // [SPARK-27366][CORE] Support GPU Resources in Spark job scheduling
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.resources"),
 
+    // [SPARK-29417][CORE] Resource Scheduling - add TaskContext.resource java api
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.resourcesJMap"),
+
     // [SPARK-27410][MLLIB] Remove deprecated / no-op mllib.KMeans getRuns, setRuns
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.KMeans.getRuns"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.KMeans.setRuns"),
@@ -88,16 +116,19 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart.copy"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart.this"),
     ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.scheduler.SparkListenerApplicationStart$"),
-    
+
     // [SPARK-27630][CORE] Properly handle task end events from completed stages
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerSpeculativeTaskSubmitted.apply"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerSpeculativeTaskSubmitted.copy"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerSpeculativeTaskSubmitted.this"),
     ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.scheduler.SparkListenerSpeculativeTaskSubmitted$"),
-    
+
     // [SPARK-26632][Core] Separate Thread Configurations of Driver and Executor
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.network.netty.SparkTransportConf.fromSparkConf"),
 
+    // [SPARK-16872][ML][PYSPARK] Impl Gaussian Naive Bayes Classifier
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.NaiveBayesModel.this"),
+
     // [SPARK-25765][ML] Add training cost to BisectingKMeans summary
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.BisectingKMeansModel.this"),
 
@@ -193,6 +224,10 @@ object MimaExcludes {
     // [SPARK-28780][ML] Delete the incorrect setWeightCol method in LinearSVCModel
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LinearSVCModel.setWeightCol"),
 
+    // [SPARK-29645][ML][PYSPARK] ML add param RelativeError
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.QuantileDiscretizer.relativeError"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.QuantileDiscretizer.getRelativeError"),
+
     // [SPARK-25959] GBTClassifier picks wrong impurity stats on loading
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.HasVarianceImpurity.org$apache$spark$ml$tree$HasVarianceImpurity$_setter_$impurity_="),
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.HasVarianceImpurity.org$apache$spark$ml$tree$HasVarianceImpurity$_setter_$impurity_="),
@@ -200,6 +235,12 @@ object MimaExcludes {
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.HasVarianceImpurity.org$apache$spark$ml$tree$HasVarianceImpurity$_setter_$impurity_="),
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.tree.HasVarianceImpurity.org$apache$spark$ml$tree$HasVarianceImpurity$_setter_$impurity_="),
 
+    // [SPARK-28968][ML] Add HasNumFeatures in the scala side
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.FeatureHasher.getNumFeatures"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.FeatureHasher.numFeatures"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.HashingTF.getNumFeatures"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.ml.feature.HashingTF.numFeatures"),
+
     // [SPARK-25908][CORE][SQL] Remove old deprecated items in Spark 3
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.BarrierTaskContext.isRunningLocally"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.TaskContext.isRunningLocally"),
@@ -280,6 +321,10 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.feature.OneHotEncoder.outputCol"),
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.ml.feature.OneHotEncoderEstimator$"),
 
+    // [SPARK-30329][ML] add iterator/foreach methods for Vectors
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Vector.activeIterator"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.activeIterator"),
+
     // [SPARK-26141] Enable custom metrics implementation in shuffle write
     // Following are Java private classes
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.sort.UnsafeShuffleWriter.this"),
@@ -295,6 +340,14 @@ object MimaExcludes {
     // [SPARK-26457] Show hadoop configurations in HistoryServer environment tab
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.ApplicationEnvironmentInfo.this"),
 
+    // [SPARK-30144][ML] Make MultilayerPerceptronClassificationModel extend MultilayerPerceptronParams
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel.layers"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel.this"),
+
+    // [SPARK-30630][ML] Remove numTrees in GBT
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.GBTClassificationModel.numTrees"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.regression.GBTRegressionModel.numTrees"),
+
     // Data Source V2 API changes
     (problem: Problem) => problem match {
       case MissingClassProblem(cls) =>
@@ -397,7 +450,49 @@ object MimaExcludes {
 
     // [SPARK-25382][SQL][PYSPARK] Remove ImageSchema.readImages in 3.0
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.image.ImageSchema.readImages"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.image.ImageSchema.readImages")
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.image.ImageSchema.readImages"),
+
+    // [SPARK-25341][CORE] Support rolling back a shuffle map stage and re-generate the shuffle files
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.shuffle.sort.UnsafeShuffleWriter.this"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.copy$default$2"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.copy"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.this"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.copy$default$2"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.copy"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.this"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleBlockId.copy$default$2"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockId.copy"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockId.this"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.apply"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.apply"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockId.apply"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.mapId"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.mapId"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleBlockId.mapId"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.FetchFailed.mapId"),
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.FetchFailed$"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.FetchFailed.apply"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.FetchFailed.copy$default$5"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.FetchFailed.copy"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.FetchFailed.copy$default$3"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.FetchFailed.this"),
+
+    // [SPARK-28957][SQL] Copy any "spark.hive.foo=bar" spark properties into hadoop conf as "hive.foo=bar"
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.appendS3AndSparkHadoopConfigurations"),
+
+    // [SPARK-29348] Add observable metrics.
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryProgress.this"),
+
+    // [SPARK-30377][ML] Make AFTSurvivalRegression extend Regressor
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.AFTSurvivalRegression.fit"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.AFTSurvivalRegressionModel.setFeaturesCol"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.AFTSurvivalRegressionModel.setPredictionCol"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.AFTSurvivalRegression.setFeaturesCol"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.AFTSurvivalRegression.setLabelCol"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.AFTSurvivalRegression.setPredictionCol"),
+
+    // [SPARK-29543][SS][UI] Init structured streaming ui
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryListener#QueryStartedEvent.this")
   )
 
   // Exclude rules for 2.4.x
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 25c2fb4af5c34..1c5c36ea8eae2 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -49,15 +49,12 @@ object BuildCommons {
   val streamingProjects@Seq(streaming, streamingKafka010) =
     Seq("streaming", "streaming-kafka-0-10").map(ProjectRef(buildLocation, _))
 
-  val graphProjects@Seq(graph, graphApi, cypher) =
-    Seq("graph", "graph-api", "cypher").map(ProjectRef(buildLocation, _))
-  
   val allProjects@Seq(
     core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, _*
   ) = Seq(
     "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe",
     "tags", "sketch", "kvstore"
-  ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ graphProjects
+  ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects
 
   val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn,
     sparkGangliaLgpl, streamingKinesisAsl,
@@ -223,10 +220,13 @@ object SparkBuild extends PomBuild {
       .map(file),
     incOptions := incOptions.value.withNameHashing(true),
     publishMavenStyle := true,
-    unidocGenjavadocVersion := "0.13",
+    unidocGenjavadocVersion := "0.15",
 
     // Override SBT's default resolvers:
     resolvers := Seq(
+      // Google Mirror of Maven Central, placed first so that it's used instead of flaky Maven Central.
+      // See https://storage-download.googleapis.com/maven-central/index.html for more info.
+      "gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/repos/central/data/",
       DefaultMavenRepository,
       Resolver.mavenLocal,
       Resolver.file("local", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
@@ -336,7 +336,7 @@ object SparkBuild extends PomBuild {
   val mimaProjects = allProjects.filterNot { x =>
     Seq(
       spark, hive, hiveThriftServer, catalyst, repl, networkCommon, networkShuffle, networkYarn,
-      unsafe, tags, tokenProviderKafka010, sqlKafka010, kvstore, avro, graph, graphApi, cypher
+      unsafe, tags, tokenProviderKafka010, sqlKafka010, kvstore, avro
     ).contains(x)
   }
 
@@ -475,7 +475,11 @@ object SparkParallelTestGrouping {
     "org.apache.spark.ml.classification.LogisticRegressionSuite",
     "org.apache.spark.ml.classification.LinearSVCSuite",
     "org.apache.spark.sql.SQLQueryTestSuite",
-    "org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite"
+    "org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite",
+    "org.apache.spark.sql.hive.thriftserver.SparkSQLEnvSuite",
+    "org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite",
+    "org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2ListenerSuite",
+    "org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite"
   )
 
   private val DEFAULT_TEST_GROUP = "default_test_group"
@@ -816,13 +820,16 @@ object Unidoc {
       .map(_.filterNot(_.getName.contains("$")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/deploy")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/examples")))
+      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/internal")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/memory")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/network")))
       .map(_.filterNot(f =>
         f.getCanonicalPath.contains("org/apache/spark/shuffle") &&
         !f.getCanonicalPath.contains("org/apache/spark/shuffle/api")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/executor")))
-      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/unsafe")))
+      .map(_.filterNot(f =>
+        f.getCanonicalPath.contains("org/apache/spark/unsafe") &&
+        !f.getCanonicalPath.contains("org/apache/spark/unsafe/types/CalendarInterval")))
       .map(_.filterNot(_.getCanonicalPath.contains("python")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/collection")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/kvstore")))
@@ -831,6 +838,7 @@ object Unidoc {
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/internal")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/hive/test")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalog/v2/utils")))
+      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hive")))
   }
 
   private def ignoreClasspaths(classpaths: Seq[Classpath]): Seq[Classpath] = {
@@ -977,10 +985,12 @@ object TestSettings {
     javaOptions in Test += "-Dspark.unsafe.exceptionOnMemoryLeak=true",
     javaOptions in Test += "-Dsun.io.serialization.extendedDebugInfo=false",
     javaOptions in Test += "-Dderby.system.durability=test",
+    javaOptions in Test += "-Dio.netty.tryReflectionSetAccessible=true",
     javaOptions in Test ++= System.getProperties.asScala.filter(_._1.startsWith("spark"))
       .map { case (k,v) => s"-D$k=$v" }.toSeq,
     javaOptions in Test += "-ea",
-    javaOptions in Test ++= "-Xmx4g -Xss4m"
+    // SPARK-29282 This is for consistency between JDK8 and JDK11.
+    javaOptions in Test ++= "-Xmx4g -Xss4m -XX:+UseParallelGC -XX:-UseDynamicNumberOfGCThreads"
       .split(" ").toSeq,
     javaOptions += "-Xmx3g",
     // Exclude tags defined in a system property
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 86349497ede81..5f21d8126e48a 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -18,7 +18,7 @@
 addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1")
 
 // sbt-checkstyle-plugin uses an old version of checkstyle. Match it to Maven's.
-libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "8.23"
+libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "8.25"
 
 // checkstyle uses guava 23.0.
 libraryDependencies += "com.google.guava" % "guava" % "23.0"
@@ -32,6 +32,9 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.2")
 
 addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
 
+// SPARK-29560 Only sbt-mima-plugin needs this repo
+resolvers += Resolver.url("bintray",
+  new java.net.URL("https://dl.bintray.com/typesafe/sbt-plugins"))(Resolver.defaultIvyPatterns)
 addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.3.0")
 
 // sbt 1.0.0 support: https://github.com/AlpineNow/junit_xml_listener/issues/6
@@ -45,9 +48,9 @@ addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2")
 
 addSbtPlugin("io.spray" % "sbt-revolver" % "0.9.1")
 
-libraryDependencies += "org.ow2.asm"  % "asm" % "7.1"
+libraryDependencies += "org.ow2.asm"  % "asm" % "7.2"
 
-libraryDependencies += "org.ow2.asm"  % "asm-commons" % "7.1"
+libraryDependencies += "org.ow2.asm"  % "asm-commons" % "7.2"
 
 // sbt 1.0.0 support: https://github.com/ihji/sbt-antlr4/issues/14
 addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.7.13")
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 4767fd9f1c038..66d3fc425daa3 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -1,44 +1,19 @@
 # Makefile for Sphinx documentation
 #
 
-ifndef SPHINXBUILD
-ifndef SPHINXPYTHON
-SPHINXBUILD = sphinx-build
-endif
-endif
-
-ifdef SPHINXBUILD
-# User-friendly check for sphinx-build.
-ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
-$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
-endif
-else
-# Note that there is an issue with Python version and Sphinx in PySpark documentation generation.
-# Please remove this check below when this issue is fixed. See SPARK-24530 for more details.
-PYTHON_VERSION_CHECK = $(shell $(SPHINXPYTHON) -c 'import sys; print(sys.version_info < (3, 0, 0))')
-ifeq ($(PYTHON_VERSION_CHECK), True)
-$(error Note that Python 3 is required to generate PySpark documentation correctly for now. Current Python executable was less than Python 3. See SPARK-24530. To force Sphinx to use a specific Python executable, please set SPHINXPYTHON to point to the Python 3 executable.)
-endif
-# Check if Sphinx is installed.
-ifeq ($(shell $(SPHINXPYTHON) -c 'import sphinx' >/dev/null 2>&1; echo $$?), 1)
-$(error Python executable '$(SPHINXPYTHON)' did not have Sphinx installed. Make sure you have Sphinx installed, then set the SPHINXPYTHON environment variable to point to the Python executable having Sphinx installed. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
-endif
-# Use 'SPHINXPYTHON -msphinx' instead of 'sphinx-build'. See https://github.com/sphinx-doc/sphinx/pull/3523 for more details.
-SPHINXBUILD = $(SPHINXPYTHON) -msphinx
-endif
-
 # You can set these variables from the command line.
 SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
 PAPER         ?=
 BUILDDIR      ?= _build
-# You can set SPHINXBUILD to specify Sphinx build executable or SPHINXPYTHON to specify the Python executable used in Sphinx.
-# They follow:
-#   1. if SPHINXPYTHON is set, use Python. If SPHINXBUILD is set, use sphinx-build.
-#   2. If both are set, SPHINXBUILD has a higher priority over SPHINXPYTHON
-#   3. By default, SPHINXBUILD is used as 'sphinx-build'.
 
 export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.8.1-src.zip)
 
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
index 05d22eb5cdd23..742df373166da 100644
--- a/python/docs/make2.bat
+++ b/python/docs/make2.bat
@@ -2,6 +2,8 @@
 
 REM Command file for Sphinx documentation
 
+set PYTHONPATH=..;..\lib\py4j-0.10.8.1-src.zip
+
 
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
index 6a5d81706f071..e31dfddd5988e 100644
--- a/python/docs/pyspark.ml.rst
+++ b/python/docs/pyspark.ml.rst
@@ -41,6 +41,14 @@ pyspark.ml.clustering module
     :undoc-members:
     :inherited-members:
 
+pyspark.ml.functions module
+----------------------------
+
+.. automodule:: pyspark.ml.functions
+    :members:
+    :undoc-members:
+    :inherited-members:
+
 pyspark.ml.linalg module
 ----------------------------
 
diff --git a/python/docs/pyspark.sql.rst b/python/docs/pyspark.sql.rst
index 5da7b44a952a6..b69562e845920 100644
--- a/python/docs/pyspark.sql.rst
+++ b/python/docs/pyspark.sql.rst
@@ -7,6 +7,7 @@ Module Context
 .. automodule:: pyspark.sql
     :members:
     :undoc-members:
+    :inherited-members:
     :exclude-members: builder
 .. We need `exclude-members` to prevent default description generations
    as a workaround for old Sphinx (< 1.6.6).
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 70c0b27a6aa33..76a5bd0d645ba 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -113,7 +113,7 @@ def wrapper(self, *args, **kwargs):
 
 
 # for back compatibility
-from pyspark.sql import SQLContext, HiveContext, Row
+from pyspark.sql import SQLContext, Row
 
 __all__ = [
     "SparkConf", "SparkContext", "SparkFiles", "RDD", "StorageLevel", "Broadcast",
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index a97d409e7328c..803d857055dc0 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -21,10 +21,9 @@
 from tempfile import NamedTemporaryFile
 import threading
 
-from pyspark.cloudpickle import print_exec
 from pyspark.java_gateway import local_connect_and_auth
 from pyspark.serializers import ChunkedStream, pickle_protocol
-from pyspark.util import _exception_message
+from pyspark.util import _exception_message, print_exec
 
 if sys.version < '3':
     import cPickle as pickle
diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
index 54d745cbcc7f9..09d3a5e7cfb6f 100644
--- a/python/pyspark/cloudpickle.py
+++ b/python/pyspark/cloudpickle.py
@@ -44,7 +44,6 @@
 
 import dis
 from functools import partial
-import importlib
 import io
 import itertools
 import logging
@@ -56,12 +55,26 @@
 import traceback
 import types
 import weakref
+import uuid
+import threading
+
+
+try:
+    from enum import Enum
+except ImportError:
+    Enum = None
 
 # cloudpickle is meant for inter process communication: we expect all
 # communicating processes to run the same Python version hence we favor
 # communication speed over compatibility:
 DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
 
+# Track the provenance of reconstructed dynamic classes to make it possible to
+# recontruct instances from the matching singleton class definition when
+# appropriate and preserve the usual "isinstance" semantics of Python objects.
+_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
+_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
+_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
 
 if sys.version_info[0] < 3:  # pragma: no branch
     from pickle import Pickler
@@ -71,12 +84,37 @@
         from StringIO import StringIO
     string_types = (basestring,)  # noqa
     PY3 = False
+    PY2 = True
+    PY2_WRAPPER_DESCRIPTOR_TYPE = type(object.__init__)
+    PY2_METHOD_WRAPPER_TYPE = type(object.__eq__)
+    PY2_CLASS_DICT_BLACKLIST = (PY2_METHOD_WRAPPER_TYPE,
+                                PY2_WRAPPER_DESCRIPTOR_TYPE)
 else:
     types.ClassType = type
     from pickle import _Pickler as Pickler
     from io import BytesIO as StringIO
     string_types = (str,)
     PY3 = True
+    PY2 = False
+
+
+def _ensure_tracking(class_def):
+    with _DYNAMIC_CLASS_TRACKER_LOCK:
+        class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
+        if class_tracker_id is None:
+            class_tracker_id = uuid.uuid4().hex
+            _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
+            _DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
+    return class_tracker_id
+
+
+def _lookup_class_or_track(class_tracker_id, class_def):
+    if class_tracker_id is not None:
+        with _DYNAMIC_CLASS_TRACKER_LOCK:
+            class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault(
+                class_tracker_id, class_def)
+            _DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
+    return class_def
 
 
 def _make_cell_set_template_code():
@@ -112,7 +150,7 @@ def inner(value):
     # NOTE: we are marking the cell variable as a free variable intentionally
     # so that we simulate an inner function instead of the outer function. This
     # is what gives us the ``nonlocal`` behavior in a Python 2 compatible way.
-    if not PY3:  # pragma: no branch
+    if PY2:  # pragma: no branch
         return types.CodeType(
             co.co_argcount,
             co.co_nlocals,
@@ -130,24 +168,43 @@ def inner(value):
             (),
         )
     else:
-        return types.CodeType(
-            co.co_argcount,
-            co.co_kwonlyargcount,
-            co.co_nlocals,
-            co.co_stacksize,
-            co.co_flags,
-            co.co_code,
-            co.co_consts,
-            co.co_names,
-            co.co_varnames,
-            co.co_filename,
-            co.co_name,
-            co.co_firstlineno,
-            co.co_lnotab,
-            co.co_cellvars,  # this is the trickery
-            (),
-        )
-
+        if hasattr(types.CodeType, "co_posonlyargcount"):  # pragma: no branch
+            return types.CodeType(
+                co.co_argcount,
+                co.co_posonlyargcount,  # Python3.8 with PEP570
+                co.co_kwonlyargcount,
+                co.co_nlocals,
+                co.co_stacksize,
+                co.co_flags,
+                co.co_code,
+                co.co_consts,
+                co.co_names,
+                co.co_varnames,
+                co.co_filename,
+                co.co_name,
+                co.co_firstlineno,
+                co.co_lnotab,
+                co.co_cellvars,  # this is the trickery
+                (),
+            )
+        else:
+            return types.CodeType(
+                co.co_argcount,
+                co.co_kwonlyargcount,
+                co.co_nlocals,
+                co.co_stacksize,
+                co.co_flags,
+                co.co_code,
+                co.co_consts,
+                co.co_names,
+                co.co_varnames,
+                co.co_filename,
+                co.co_name,
+                co.co_firstlineno,
+                co.co_lnotab,
+                co.co_cellvars,  # this is the trickery
+                (),
+            )
 
 _cell_set_template_code = _make_cell_set_template_code()
 
@@ -220,7 +277,7 @@ def _walk_global_ops(code):
         global-referencing instructions in *code*.
         """
         code = getattr(code, 'co_code', b'')
-        if not PY3:  # pragma: no branch
+        if PY2:  # pragma: no branch
             code = map(ord, code)
 
         n = len(code)
@@ -250,6 +307,39 @@ def _walk_global_ops(code):
                 yield op, instr.arg
 
 
+def _extract_class_dict(cls):
+    """Retrieve a copy of the dict of a class without the inherited methods"""
+    clsdict = dict(cls.__dict__)  # copy dict proxy to a dict
+    if len(cls.__bases__) == 1:
+        inherited_dict = cls.__bases__[0].__dict__
+    else:
+        inherited_dict = {}
+        for base in reversed(cls.__bases__):
+            inherited_dict.update(base.__dict__)
+    to_remove = []
+    for name, value in clsdict.items():
+        try:
+            base_value = inherited_dict[name]
+            if value is base_value:
+                to_remove.append(name)
+            elif PY2:
+                # backward compat for Python 2
+                if hasattr(value, "im_func"):
+                    if value.im_func is getattr(base_value, "im_func", None):
+                        to_remove.append(name)
+                elif isinstance(value, PY2_CLASS_DICT_BLACKLIST):
+                    # On Python 2 we have no way to pickle those specific
+                    # methods types nor to check that they are actually
+                    # inherited. So we assume that they are always inherited
+                    # from builtin types.
+                    to_remove.append(name)
+        except KeyError:
+            pass
+    for name in to_remove:
+        clsdict.pop(name)
+    return clsdict
+
+
 class CloudPickler(Pickler):
 
     dispatch = Pickler.dispatch.copy()
@@ -277,7 +367,7 @@ def save_memoryview(self, obj):
 
     dispatch[memoryview] = save_memoryview
 
-    if not PY3:  # pragma: no branch
+    if PY2:  # pragma: no branch
         def save_buffer(self, obj):
             self.save(str(obj))
 
@@ -300,12 +390,23 @@ def save_codeobject(self, obj):
         Save a code object
         """
         if PY3:  # pragma: no branch
-            args = (
-                obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
-                obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, obj.co_varnames,
-                obj.co_filename, obj.co_name, obj.co_firstlineno, obj.co_lnotab, obj.co_freevars,
-                obj.co_cellvars
-            )
+            if hasattr(obj, "co_posonlyargcount"):  # pragma: no branch
+                args = (
+                    obj.co_argcount, obj.co_posonlyargcount,
+                    obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
+                    obj.co_flags, obj.co_code, obj.co_consts, obj.co_names,
+                    obj.co_varnames, obj.co_filename, obj.co_name,
+                    obj.co_firstlineno, obj.co_lnotab, obj.co_freevars,
+                    obj.co_cellvars
+                )
+            else:
+                args = (
+                    obj.co_argcount, obj.co_kwonlyargcount, obj.co_nlocals,
+                    obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts,
+                    obj.co_names, obj.co_varnames, obj.co_filename,
+                    obj.co_name, obj.co_firstlineno, obj.co_lnotab,
+                    obj.co_freevars, obj.co_cellvars
+                )
         else:
             args = (
                 obj.co_argcount, obj.co_nlocals, obj.co_stacksize, obj.co_flags, obj.co_code,
@@ -460,15 +561,40 @@ def func():
                             # then discards the reference to it
                             self.write(pickle.POP)
 
-    def save_dynamic_class(self, obj):
+    def _save_dynamic_enum(self, obj, clsdict):
+        """Special handling for dynamic Enum subclasses
+
+        Use a dedicated Enum constructor (inspired by EnumMeta.__call__) as the
+        EnumMeta metaclass has complex initialization that makes the Enum
+        subclasses hold references to their own instances.
         """
-        Save a class that can't be stored as module global.
+        members = dict((e.name, e.value) for e in obj)
+
+        # Python 2.7 with enum34 can have no qualname:
+        qualname = getattr(obj, "__qualname__", None)
+
+        self.save_reduce(_make_skeleton_enum,
+                         (obj.__bases__, obj.__name__, qualname, members,
+                          obj.__module__, _ensure_tracking(obj), None),
+                         obj=obj)
+
+        # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class:
+        # Those attributes are already handled by the metaclass.
+        for attrname in ["_generate_next_value_", "_member_names_",
+                         "_member_map_", "_member_type_",
+                         "_value2member_map_"]:
+            clsdict.pop(attrname, None)
+        for member in members:
+            clsdict.pop(member)
+
+    def save_dynamic_class(self, obj):
+        """Save a class that can't be stored as module global.
 
         This method is used to serialize classes that are defined inside
         functions, or that otherwise can't be serialized as attribute lookups
         from global modules.
         """
-        clsdict = dict(obj.__dict__)  # copy dict proxy to a dict
+        clsdict = _extract_class_dict(obj)
         clsdict.pop('__weakref__', None)
 
         # For ABCMeta in python3.7+, remove _abc_impl as it is not picklable.
@@ -496,8 +622,8 @@ def save_dynamic_class(self, obj):
                 for k in obj.__slots__:
                     clsdict.pop(k, None)
 
-        # If type overrides __dict__ as a property, include it in the type kwargs.
-        # In Python 2, we can't set this attribute after construction.
+        # If type overrides __dict__ as a property, include it in the type
+        # kwargs. In Python 2, we can't set this attribute after construction.
         __dict__ = clsdict.pop('__dict__', None)
         if isinstance(__dict__, property):
             type_kwargs['__dict__'] = __dict__
@@ -524,8 +650,16 @@ def save_dynamic_class(self, obj):
         write(pickle.MARK)
 
         # Create and memoize an skeleton class with obj's name and bases.
-        tp = type(obj)
-        self.save_reduce(tp, (obj.__name__, obj.__bases__, type_kwargs), obj=obj)
+        if Enum is not None and issubclass(obj, Enum):
+            # Special handling of Enum subclasses
+            self._save_dynamic_enum(obj, clsdict)
+        else:
+            # "Regular" class definition:
+            tp = type(obj)
+            self.save_reduce(_make_skeleton_class,
+                             (tp, obj.__name__, obj.__bases__, type_kwargs,
+                              _ensure_tracking(obj), None),
+                             obj=obj)
 
         # Now save the rest of obj's __dict__. Any references to obj
         # encountered while saving will point to the skeleton class.
@@ -591,6 +725,8 @@ def save_function_tuple(self, func):
             state['annotations'] = func.__annotations__
         if hasattr(func, '__qualname__'):
             state['qualname'] = func.__qualname__
+        if hasattr(func, '__kwdefaults__'):
+            state['kwdefaults'] = func.__kwdefaults__
         save(state)
         write(pickle.TUPLE)
         write(pickle.REDUCE)  # applies _fill_function on the tuple
@@ -666,6 +802,15 @@ def extract_func_data(self, func):
         # multiple invokations are bound to the same Cloudpickler.
         base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
 
+        if base_globals == {}:
+            # Add module attributes used to resolve relative imports
+            # instructions inside func.
+            for k in ["__package__", "__name__", "__path__", "__file__"]:
+                # Some built-in functions/methods such as object.__new__  have
+                # their __globals__ set to None in PyPy
+                if func.__globals__ is not None and k in func.__globals__:
+                    base_globals[k] = func.__globals__[k]
+
         return (code, f_globals, defaults, closure, dct, base_globals)
 
     def save_builtin_function(self, obj):
@@ -767,7 +912,7 @@ def save_inst(self, obj):
         save(stuff)
         write(pickle.BUILD)
 
-    if not PY3:  # pragma: no branch
+    if PY2:  # pragma: no branch
         dispatch[types.InstanceType] = save_inst
 
     def save_property(self, obj):
@@ -979,43 +1124,6 @@ def _restore_attr(obj, attr):
     return obj
 
 
-def _get_module_builtins():
-    return pickle.__builtins__
-
-
-def print_exec(stream):
-    ei = sys.exc_info()
-    traceback.print_exception(ei[0], ei[1], ei[2], None, stream)
-
-
-def _modules_to_main(modList):
-    """Force every module in modList to be placed into main"""
-    if not modList:
-        return
-
-    main = sys.modules['__main__']
-    for modname in modList:
-        if type(modname) is str:
-            try:
-                mod = __import__(modname)
-            except Exception:
-                sys.stderr.write('warning: could not import %s\n.  '
-                                 'Your function may unexpectedly error due to this import failing;'
-                                 'A version mismatch is likely.  Specific error was:\n' % modname)
-                print_exec(sys.stderr)
-            else:
-                setattr(main, mod.__name__, mod)
-
-
-# object generators:
-def _genpartial(func, args, kwds):
-    if not args:
-        args = ()
-    if not kwds:
-        kwds = {}
-    return partial(func, *args, **kwds)
-
-
 def _gen_ellipsis():
     return Ellipsis
 
@@ -1103,6 +1211,8 @@ def _fill_function(*args):
         func.__module__ = state['module']
     if 'qualname' in state:
         func.__qualname__ = state['qualname']
+    if 'kwdefaults' in state:
+        func.__kwdefaults__ = state['kwdefaults']
 
     cells = func.__closure__
     if cells is not None:
@@ -1143,6 +1253,22 @@ def _make_skel_func(code, cell_count, base_globals=None):
     return types.FunctionType(code, base_globals, None, None, closure)
 
 
+def _make_skeleton_class(type_constructor, name, bases, type_kwargs,
+                         class_tracker_id, extra):
+    """Build dynamic class with an empty __dict__ to be filled once memoized
+
+    If class_tracker_id is not None, try to lookup an existing class definition
+    matching that id. If none is found, track a newly reconstructed class
+    definition under that id so that other instances stemming from the same
+    class id will also reuse this class definition.
+
+    The "extra" variable is meant to be a dict (or None) that can be used for
+    forward compatibility shall the need arise.
+    """
+    skeleton_class = type_constructor(name, bases, type_kwargs)
+    return _lookup_class_or_track(class_tracker_id, skeleton_class)
+
+
 def _rehydrate_skeleton_class(skeleton_class, class_dict):
     """Put attributes from `class_dict` back on `skeleton_class`.
 
@@ -1161,6 +1287,39 @@ def _rehydrate_skeleton_class(skeleton_class, class_dict):
     return skeleton_class
 
 
+def _make_skeleton_enum(bases, name, qualname, members, module,
+                        class_tracker_id, extra):
+    """Build dynamic enum with an empty __dict__ to be filled once memoized
+
+    The creation of the enum class is inspired by the code of
+    EnumMeta._create_.
+
+    If class_tracker_id is not None, try to lookup an existing enum definition
+    matching that id. If none is found, track a newly reconstructed enum
+    definition under that id so that other instances stemming from the same
+    class id will also reuse this enum definition.
+
+    The "extra" variable is meant to be a dict (or None) that can be used for
+    forward compatibility shall the need arise.
+    """
+    # enums always inherit from their base Enum class at the last position in
+    # the list of base classes:
+    enum_base = bases[-1]
+    metacls = enum_base.__class__
+    classdict = metacls.__prepare__(name, bases)
+
+    for member_name, member_value in members.items():
+        classdict[member_name] = member_value
+    enum_class = metacls.__new__(metacls, name, bases, classdict)
+    enum_class.__module__ = module
+
+    # Python 2.7 compat
+    if qualname is not None:
+        enum_class.__qualname__ = qualname
+
+    return _lookup_class_or_track(class_tracker_id, enum_class)
+
+
 def _is_dynamic(module):
     """
     Return True if the module is special module that cannot be imported by its
@@ -1188,15 +1347,6 @@ def _is_dynamic(module):
         return False
 
 
-"""Constructors for 3rd party libraries
-Note: These can never be renamed due to client compatibility issues"""
-
-
-def _getobject(modname, attribute):
-    mod = __import__(modname, fromlist=[attribute])
-    return mod.__dict__[attribute]
-
-
 """ Use copy_reg to extend global pickle definitions """
 
 if sys.version_info < (3, 4):  # pragma: no branch
@@ -1209,4 +1359,4 @@ def _reduce_method_descriptor(obj):
         import copy_reg as copyreg
     except ImportError:
         import copyreg
-    copyreg.pickle(method_descriptor, _reduce_method_descriptor)
+    copyreg.pickle(method_descriptor, _reduce_method_descriptor)
\ No newline at end of file
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index d689d1d034bc7..6cc343e3e495c 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -15,8 +15,6 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
-
 import os
 import shutil
 import signal
@@ -42,6 +40,7 @@
 from pyspark.traceback_utils import CallSite, first_spark_call
 from pyspark.status import StatusTracker
 from pyspark.profiler import ProfilerCollector, BasicProfiler
+from pyspark.util import _warn_pin_thread
 
 if sys.version > '3':
     xrange = range
@@ -210,12 +209,12 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
         self.pythonVer = "%d.%d" % sys.version_info[:2]
 
-        if sys.version_info < (3, 0):
+        if sys.version_info < (3, 6):
             with warnings.catch_warnings():
                 warnings.simplefilter("once")
                 warnings.warn(
-                    "Support for Python 2 is deprecated as of Spark 3.0. "
-                    "See the plan for dropping Python 2 support at "
+                    "Support for Python 2 and Python 3 prior to version 3.6 is deprecated as "
+                    "of Spark 3.0. See also the plan for dropping Python 2 support at "
                     "https://spark.apache.org/news/plan-for-dropping-python-2-support.html.",
                     DeprecationWarning)
 
@@ -950,7 +949,7 @@ def addPyFile(self, path):
     def setCheckpointDir(self, dirName):
         """
         Set the directory under which RDDs are going to be checkpointed. The
-        directory must be a HDFS path if running on a cluster.
+        directory must be an HDFS path if running on a cluster.
         """
         self._jsc.sc().setCheckpointDir(dirName)
 
@@ -1009,14 +1008,42 @@ def setJobGroup(self, groupId, description, interruptOnCancel=False):
         in Thread.interrupt() being called on the job's executor threads. This is useful to help
         ensure that the tasks are actually stopped in a timely manner, but is off by default due
         to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.
+
+        .. note:: Currently, setting a group ID (set to local properties) with multiple threads
+            does not properly work. Internally threads on PVM and JVM are not synced, and JVM
+            thread can be reused for multiple threads on PVM, which fails to isolate local
+            properties for each thread on PVM.
+
+            To work around this, you can set `PYSPARK_PIN_THREAD` to
+            `'true'` (see SPARK-22340). However, note that it cannot inherit the local properties
+            from the parent thread although it isolates each thread on PVM and JVM with its own
+            local properties.
+
+            To work around this, you should manually copy and set the local
+            properties from the parent thread to the child thread when you create another thread.
         """
+        _warn_pin_thread("setJobGroup")
         self._jsc.setJobGroup(groupId, description, interruptOnCancel)
 
     def setLocalProperty(self, key, value):
         """
         Set a local property that affects jobs submitted from this thread, such as the
         Spark fair scheduler pool.
+
+        .. note:: Currently, setting a local property with multiple threads does not properly work.
+            Internally threads on PVM and JVM are not synced, and JVM thread
+            can be reused for multiple threads on PVM, which fails to isolate local properties
+            for each thread on PVM.
+
+            To work around this, you can set `PYSPARK_PIN_THREAD` to
+            `'true'` (see SPARK-22340). However, note that it cannot inherit the local properties
+            from the parent thread although it isolates each thread on PVM and JVM with its own
+            local properties.
+
+            To work around this, you should manually copy and set the local
+            properties from the parent thread to the child thread when you create another thread.
         """
+        _warn_pin_thread("setLocalProperty")
         self._jsc.setLocalProperty(key, value)
 
     def getLocalProperty(self, key):
@@ -1029,7 +1056,21 @@ def getLocalProperty(self, key):
     def setJobDescription(self, value):
         """
         Set a human readable description of the current job.
+
+        .. note:: Currently, setting a job description (set to local properties) with multiple
+            threads does not properly work. Internally threads on PVM and JVM are not synced,
+            and JVM thread can be reused for multiple threads on PVM, which fails to isolate
+            local properties for each thread on PVM.
+
+            To work around this, you can set `PYSPARK_PIN_THREAD` to
+            `'true'` (see SPARK-22340). However, note that it cannot inherit the local properties
+            from the parent thread although it isolates each thread on PVM and JVM with its own
+            local properties.
+
+            To work around this, you should manually copy and set the local
+            properties from the parent thread to the child thread when you create another thread.
         """
+        _warn_pin_thread("setJobDescription")
         self._jsc.setJobDescription(value)
 
     def sparkUser(self):
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index b09bd01638d82..316a5b4d01273 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -31,6 +31,7 @@
     xrange = range
 
 from py4j.java_gateway import java_import, JavaGateway, JavaObject, GatewayParameters
+from py4j.clientserver import ClientServer, JavaParameters, PythonParameters
 from pyspark.find_spark_home import _find_spark_home
 from pyspark.serializers import read_int, write_with_length, UTF8Deserializer
 from pyspark.util import _exception_message
@@ -125,10 +126,23 @@ def killChild():
                 Popen(["cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid)])
             atexit.register(killChild)
 
-    # Connect to the gateway
-    gateway = JavaGateway(
-        gateway_parameters=GatewayParameters(port=gateway_port, auth_token=gateway_secret,
-                                             auto_convert=True))
+    # Connect to the gateway (or client server to pin the thread between JVM and Python)
+    if os.environ.get("PYSPARK_PIN_THREAD", "false").lower() == "true":
+        gateway = ClientServer(
+            java_parameters=JavaParameters(
+                port=gateway_port,
+                auth_token=gateway_secret,
+                auto_convert=True),
+            python_parameters=PythonParameters(
+                port=0,
+                eager_load=False))
+    else:
+        gateway = JavaGateway(
+            gateway_parameters=GatewayParameters(
+                port=gateway_port,
+                auth_token=gateway_secret,
+                auto_convert=True))
+
     # Store a reference to the Popen object for use by the caller (e.g., in reading stdout/stderr)
     gateway.proc = proc
 
diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index 82ff81c58d3c6..542cb25172ead 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -194,6 +194,18 @@ class UnaryTransformer(HasInputCol, HasOutputCol, Transformer):
     .. versionadded:: 2.3.0
     """
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @abstractmethod
     def createTransformFunc(self):
         """
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 3ae2338aa77e6..1436b78b11202 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -22,12 +22,13 @@
 from pyspark import since, keyword_only
 from pyspark.ml import Estimator, Model
 from pyspark.ml.param.shared import *
-from pyspark.ml.regression import DecisionTreeModel, DecisionTreeParams, \
-    DecisionTreeRegressionModel, GBTParams, HasVarianceImpurity, RandomForestParams, \
-    TreeEnsembleModel
+from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
+    _TreeEnsembleModel, _RandomForestParams, _GBTParams, \
+    _HasVarianceImpurity, _TreeClassifierParams, _TreeEnsembleParams
+from pyspark.ml.regression import _FactorizationMachinesParams, DecisionTreeRegressionModel
 from pyspark.ml.util import *
-from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
-from pyspark.ml.wrapper import JavaWrapper
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, \
+    JavaPredictor, _JavaPredictorParams, JavaPredictionModel, JavaWrapper
 from pyspark.ml.common import inherit_doc, _java2py, _py2java
 from pyspark.ml.linalg import Vectors
 from pyspark.sql import DataFrame
@@ -44,17 +45,49 @@
            'RandomForestClassifier', 'RandomForestClassificationModel',
            'NaiveBayes', 'NaiveBayesModel',
            'MultilayerPerceptronClassifier', 'MultilayerPerceptronClassificationModel',
-           'OneVsRest', 'OneVsRestModel']
+           'OneVsRest', 'OneVsRestModel',
+           'FMClassifier', 'FMClassificationModel']
+
+
+class _JavaClassifierParams(HasRawPredictionCol, _JavaPredictorParams):
+    """
+    Java Classifier Params for classification tasks.
+
+    .. versionadded:: 3.0.0
+    """
+    pass
 
 
 @inherit_doc
-class JavaClassificationModel(JavaPredictionModel):
+class JavaClassifier(JavaPredictor, _JavaClassifierParams):
     """
-    (Private) Java Model produced by a ``Classifier``.
+    Java Classifier for classification tasks.
+    Classes are indexed {0, 1, ..., numClasses - 1}.
+    """
+
+    @since("3.0.0")
+    def setRawPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`rawPredictionCol`.
+        """
+        return self._set(rawPredictionCol=value)
+
+
+@inherit_doc
+class JavaClassificationModel(JavaPredictionModel, _JavaClassifierParams):
+    """
+    Java Model produced by a ``Classifier``.
     Classes are indexed {0, 1, ..., numClasses - 1}.
     To be mixed in with class:`pyspark.ml.JavaModel`
     """
 
+    @since("3.0.0")
+    def setRawPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`rawPredictionCol`.
+        """
+        return self._set(rawPredictionCol=value)
+
     @property
     @since("2.1.0")
     def numClasses(self):
@@ -63,11 +96,91 @@ def numClasses(self):
         """
         return self._call_java("numClasses")
 
+    @since("3.0.0")
+    def predictRaw(self, value):
+        """
+        Raw prediction for each possible label.
+        """
+        return self._call_java("predictRaw", value)
+
+
+class _JavaProbabilisticClassifierParams(HasProbabilityCol, HasThresholds, _JavaClassifierParams):
+    """
+    Params for :py:class:`JavaProbabilisticClassifier` and
+    :py:class:`JavaProbabilisticClassificationModel`.
+
+    .. versionadded:: 3.0.0
+    """
+    pass
+
+
+@inherit_doc
+class JavaProbabilisticClassifier(JavaClassifier, _JavaProbabilisticClassifierParams):
+    """
+    Java Probabilistic Classifier for classification tasks.
+    """
+
+    @since("3.0.0")
+    def setProbabilityCol(self, value):
+        """
+        Sets the value of :py:attr:`probabilityCol`.
+        """
+        return self._set(probabilityCol=value)
+
+    @since("3.0.0")
+    def setThresholds(self, value):
+        """
+        Sets the value of :py:attr:`thresholds`.
+        """
+        return self._set(thresholds=value)
+
+
+@inherit_doc
+class JavaProbabilisticClassificationModel(JavaClassificationModel,
+                                           _JavaProbabilisticClassifierParams):
+    """
+    Java Model produced by a ``ProbabilisticClassifier``.
+    """
+
+    @since("3.0.0")
+    def setProbabilityCol(self, value):
+        """
+        Sets the value of :py:attr:`probabilityCol`.
+        """
+        return self._set(probabilityCol=value)
+
+    @since("3.0.0")
+    def setThresholds(self, value):
+        """
+        Sets the value of :py:attr:`thresholds`.
+        """
+        return self._set(thresholds=value)
+
+    @since("3.0.0")
+    def predictProbability(self, value):
+        """
+        Predict the probability of each class given the features.
+        """
+        return self._call_java("predictProbability", value)
+
+
+class _LinearSVCParams(_JavaClassifierParams, HasRegParam, HasMaxIter, HasFitIntercept, HasTol,
+                       HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold):
+    """
+    Params for :py:class:`LinearSVC` and :py:class:`LinearSVCModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    threshold = Param(Params._dummy(), "threshold",
+                      "The threshold in binary classification applied to the linear model"
+                      " prediction.  This threshold can be any real number, where Inf will make"
+                      " all predictions 0.0 and -Inf will make all predictions 1.0.",
+                      typeConverter=TypeConverters.toFloat)
+
 
 @inherit_doc
-class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
-                HasRegParam, HasTol, HasRawPredictionCol, HasFitIntercept, HasStandardization,
-                HasWeightCol, HasAggregationDepth, HasThreshold, JavaMLWritable, JavaMLReadable):
+class LinearSVC(JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadable):
     """
     `Linear SVM Classifier <https://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM>`_
 
@@ -79,8 +192,28 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
     >>> df = sc.parallelize([
     ...     Row(label=1.0, features=Vectors.dense(1.0, 1.0, 1.0)),
     ...     Row(label=0.0, features=Vectors.dense(1.0, 2.0, 3.0))]).toDF()
-    >>> svm = LinearSVC(maxIter=5, regParam=0.01)
+    >>> svm = LinearSVC()
+    >>> svm.getMaxIter()
+    100
+    >>> svm.setMaxIter(5)
+    LinearSVC...
+    >>> svm.getMaxIter()
+    5
+    >>> svm.getRegParam()
+    0.0
+    >>> svm.setRegParam(0.01)
+    LinearSVC...
+    >>> svm.getRegParam()
+    0.01
     >>> model = svm.fit(df)
+    >>> model.setPredictionCol("newPrediction")
+    LinearSVCModel...
+    >>> model.getPredictionCol()
+    'newPrediction'
+    >>> model.setThreshold(0.5)
+    LinearSVCModel...
+    >>> model.getThreshold()
+    0.5
     >>> model.coefficients
     DenseVector([0.0, -0.2792, -0.1833])
     >>> model.intercept
@@ -90,8 +223,12 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
     >>> model.numFeatures
     3
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, -1.0, -1.0))]).toDF()
+    >>> model.predict(test0.head().features)
+    1.0
+    >>> model.predictRaw(test0.head().features)
+    DenseVector([-1.4831, 1.4831])
     >>> result = model.transform(test0).head()
-    >>> result.prediction
+    >>> result.newPrediction
     1.0
     >>> result.rawPrediction
     DenseVector([-1.4831, 1.4831])
@@ -111,12 +248,6 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
     .. versionadded:: 2.2.0
     """
 
-    threshold = Param(Params._dummy(), "threshold",
-                      "The threshold in binary classification applied to the linear model"
-                      " prediction.  This threshold can be any real number, where Inf will make"
-                      " all predictions 0.0 and -Inf will make all predictions 1.0.",
-                      typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
@@ -155,14 +286,77 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
     def _create_model(self, java_model):
         return LinearSVCModel(java_model)
 
+    @since("2.2.0")
+    def setMaxIter(self, value):
+        """
+        Sets the value of :py:attr:`maxIter`.
+        """
+        return self._set(maxIter=value)
+
+    @since("2.2.0")
+    def setRegParam(self, value):
+        """
+        Sets the value of :py:attr:`regParam`.
+        """
+        return self._set(regParam=value)
+
+    @since("2.2.0")
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        return self._set(tol=value)
+
+    @since("2.2.0")
+    def setFitIntercept(self, value):
+        """
+        Sets the value of :py:attr:`fitIntercept`.
+        """
+        return self._set(fitIntercept=value)
+
+    @since("2.2.0")
+    def setStandardization(self, value):
+        """
+        Sets the value of :py:attr:`standardization`.
+        """
+        return self._set(standardization=value)
 
-class LinearSVCModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
+    @since("2.2.0")
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        """
+        return self._set(threshold=value)
+
+    @since("2.2.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
+    @since("2.2.0")
+    def setAggregationDepth(self, value):
+        """
+        Sets the value of :py:attr:`aggregationDepth`.
+        """
+        return self._set(aggregationDepth=value)
+
+
+class LinearSVCModel(JavaClassificationModel, _LinearSVCParams, JavaMLWritable, JavaMLReadable):
     """
     Model fitted by LinearSVC.
 
     .. versionadded:: 2.2.0
     """
 
+    @since("3.0.0")
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        """
+        return self._set(threshold=value)
+
     @property
     @since("2.2.0")
     def coefficients(self):
@@ -180,67 +374,14 @@ def intercept(self):
         return self._call_java("intercept")
 
 
-@inherit_doc
-class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
-                         HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol,
-                         HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds,
-                         HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable):
+class _LogisticRegressionParams(_JavaProbabilisticClassifierParams, HasRegParam,
+                                HasElasticNetParam, HasMaxIter, HasFitIntercept, HasTol,
+                                HasStandardization, HasWeightCol, HasAggregationDepth,
+                                HasThreshold):
     """
-    Logistic regression.
-    This class supports multinomial logistic (softmax) and binomial logistic regression.
-
-    >>> from pyspark.sql import Row
-    >>> from pyspark.ml.linalg import Vectors
-    >>> bdf = sc.parallelize([
-    ...     Row(label=1.0, weight=1.0, features=Vectors.dense(0.0, 5.0)),
-    ...     Row(label=0.0, weight=2.0, features=Vectors.dense(1.0, 2.0)),
-    ...     Row(label=1.0, weight=3.0, features=Vectors.dense(2.0, 1.0)),
-    ...     Row(label=0.0, weight=4.0, features=Vectors.dense(3.0, 3.0))]).toDF()
-    >>> blor = LogisticRegression(regParam=0.01, weightCol="weight")
-    >>> blorModel = blor.fit(bdf)
-    >>> blorModel.coefficients
-    DenseVector([-1.080..., -0.646...])
-    >>> blorModel.intercept
-    3.112...
-    >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
-    >>> mdf = spark.read.format("libsvm").load(data_path)
-    >>> mlor = LogisticRegression(regParam=0.1, elasticNetParam=1.0, family="multinomial")
-    >>> mlorModel = mlor.fit(mdf)
-    >>> mlorModel.coefficientMatrix
-    SparseMatrix(3, 4, [0, 1, 2, 3], [3, 2, 1], [1.87..., -2.75..., -0.50...], 1)
-    >>> mlorModel.interceptVector
-    DenseVector([0.04..., -0.42..., 0.37...])
-    >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 1.0))]).toDF()
-    >>> result = blorModel.transform(test0).head()
-    >>> result.prediction
-    1.0
-    >>> result.probability
-    DenseVector([0.02..., 0.97...])
-    >>> result.rawPrediction
-    DenseVector([-3.54..., 3.54...])
-    >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
-    >>> blorModel.transform(test1).head().prediction
-    1.0
-    >>> blor.setParams("vector")
-    Traceback (most recent call last):
-        ...
-    TypeError: Method setParams forces keyword arguments.
-    >>> lr_path = temp_path + "/lr"
-    >>> blor.save(lr_path)
-    >>> lr2 = LogisticRegression.load(lr_path)
-    >>> lr2.getRegParam()
-    0.01
-    >>> model_path = temp_path + "/lr_model"
-    >>> blorModel.save(model_path)
-    >>> model2 = LogisticRegressionModel.load(model_path)
-    >>> blorModel.coefficients[0] == model2.coefficients[0]
-    True
-    >>> blorModel.intercept == model2.intercept
-    True
-    >>> model2
-    LogisticRegressionModel: uid = ..., numClasses = 2, numFeatures = 2
+    Params for :py:class:`LogisticRegression` and :py:class:`LogisticRegressionModel`.
 
-    .. versionadded:: 1.3.0
+    .. versionadded:: 3.0.0
     """
 
     threshold = Param(Params._dummy(), "threshold",
@@ -279,12 +420,205 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
                                     "lasses for multinomial regression.",
                                     typeConverter=TypeConverters.toVector)
 
-    upperBoundsOnIntercepts = Param(Params._dummy(), "upperBoundsOnIntercepts",
-                                    "The upper bounds on intercepts if fitting under bound "
-                                    "constrained optimization. The bound vector size must be "
-                                    "equal with 1 for binomial regression, or the number of "
-                                    "classes for multinomial regression.",
-                                    typeConverter=TypeConverters.toVector)
+    upperBoundsOnIntercepts = Param(Params._dummy(), "upperBoundsOnIntercepts",
+                                    "The upper bounds on intercepts if fitting under bound "
+                                    "constrained optimization. The bound vector size must be "
+                                    "equal with 1 for binomial regression, or the number of "
+                                    "classes for multinomial regression.",
+                                    typeConverter=TypeConverters.toVector)
+
+    @since("1.4.0")
+    def setThreshold(self, value):
+        """
+        Sets the value of :py:attr:`threshold`.
+        Clears value of :py:attr:`thresholds` if it has been set.
+        """
+        self._set(threshold=value)
+        self.clear(self.thresholds)
+        return self
+
+    @since("1.4.0")
+    def getThreshold(self):
+        """
+        Get threshold for binary classification.
+
+        If :py:attr:`thresholds` is set with length 2 (i.e., binary classification),
+        this returns the equivalent threshold:
+        :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
+        Otherwise, returns :py:attr:`threshold` if set or its default value if unset.
+        """
+        self._checkThresholdConsistency()
+        if self.isSet(self.thresholds):
+            ts = self.getOrDefault(self.thresholds)
+            if len(ts) != 2:
+                raise ValueError("Logistic Regression getThreshold only applies to" +
+                                 " binary classification, but thresholds has length != 2." +
+                                 "  thresholds: " + ",".join(ts))
+            return 1.0/(1.0 + ts[0]/ts[1])
+        else:
+            return self.getOrDefault(self.threshold)
+
+    @since("1.5.0")
+    def setThresholds(self, value):
+        """
+        Sets the value of :py:attr:`thresholds`.
+        Clears value of :py:attr:`threshold` if it has been set.
+        """
+        self._set(thresholds=value)
+        self.clear(self.threshold)
+        return self
+
+    @since("1.5.0")
+    def getThresholds(self):
+        """
+        If :py:attr:`thresholds` is set, return its value.
+        Otherwise, if :py:attr:`threshold` is set, return the equivalent thresholds for binary
+        classification: (1-threshold, threshold).
+        If neither are set, throw an error.
+        """
+        self._checkThresholdConsistency()
+        if not self.isSet(self.thresholds) and self.isSet(self.threshold):
+            t = self.getOrDefault(self.threshold)
+            return [1.0-t, t]
+        else:
+            return self.getOrDefault(self.thresholds)
+
+    def _checkThresholdConsistency(self):
+        if self.isSet(self.threshold) and self.isSet(self.thresholds):
+            ts = self.getOrDefault(self.thresholds)
+            if len(ts) != 2:
+                raise ValueError("Logistic Regression getThreshold only applies to" +
+                                 " binary classification, but thresholds has length != 2." +
+                                 " thresholds: {0}".format(str(ts)))
+            t = 1.0/(1.0 + ts[0]/ts[1])
+            t2 = self.getOrDefault(self.threshold)
+            if abs(t2 - t) >= 1E-5:
+                raise ValueError("Logistic Regression getThreshold found inconsistent values for" +
+                                 " threshold (%g) and thresholds (equivalent to %g)" % (t2, t))
+
+    @since("2.1.0")
+    def getFamily(self):
+        """
+        Gets the value of :py:attr:`family` or its default value.
+        """
+        return self.getOrDefault(self.family)
+
+    @since("2.3.0")
+    def getLowerBoundsOnCoefficients(self):
+        """
+        Gets the value of :py:attr:`lowerBoundsOnCoefficients`
+        """
+        return self.getOrDefault(self.lowerBoundsOnCoefficients)
+
+    @since("2.3.0")
+    def getUpperBoundsOnCoefficients(self):
+        """
+        Gets the value of :py:attr:`upperBoundsOnCoefficients`
+        """
+        return self.getOrDefault(self.upperBoundsOnCoefficients)
+
+    @since("2.3.0")
+    def getLowerBoundsOnIntercepts(self):
+        """
+        Gets the value of :py:attr:`lowerBoundsOnIntercepts`
+        """
+        return self.getOrDefault(self.lowerBoundsOnIntercepts)
+
+    @since("2.3.0")
+    def getUpperBoundsOnIntercepts(self):
+        """
+        Gets the value of :py:attr:`upperBoundsOnIntercepts`
+        """
+        return self.getOrDefault(self.upperBoundsOnIntercepts)
+
+
+@inherit_doc
+class LogisticRegression(JavaProbabilisticClassifier, _LogisticRegressionParams, JavaMLWritable,
+                         JavaMLReadable):
+    """
+    Logistic regression.
+    This class supports multinomial logistic (softmax) and binomial logistic regression.
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.ml.linalg import Vectors
+    >>> bdf = sc.parallelize([
+    ...     Row(label=1.0, weight=1.0, features=Vectors.dense(0.0, 5.0)),
+    ...     Row(label=0.0, weight=2.0, features=Vectors.dense(1.0, 2.0)),
+    ...     Row(label=1.0, weight=3.0, features=Vectors.dense(2.0, 1.0)),
+    ...     Row(label=0.0, weight=4.0, features=Vectors.dense(3.0, 3.0))]).toDF()
+    >>> blor = LogisticRegression(weightCol="weight")
+    >>> blor.getRegParam()
+    0.0
+    >>> blor.setRegParam(0.01)
+    LogisticRegression...
+    >>> blor.getRegParam()
+    0.01
+    >>> blor.setMaxIter(10)
+    LogisticRegression...
+    >>> blor.getMaxIter()
+    10
+    >>> blor.clear(blor.maxIter)
+    >>> blorModel = blor.fit(bdf)
+    >>> blorModel.setFeaturesCol("features")
+    LogisticRegressionModel...
+    >>> blorModel.setProbabilityCol("newProbability")
+    LogisticRegressionModel...
+    >>> blorModel.getProbabilityCol()
+    'newProbability'
+    >>> blorModel.setThreshold(0.1)
+    LogisticRegressionModel...
+    >>> blorModel.getThreshold()
+    0.1
+    >>> blorModel.coefficients
+    DenseVector([-1.080..., -0.646...])
+    >>> blorModel.intercept
+    3.112...
+    >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
+    >>> mdf = spark.read.format("libsvm").load(data_path)
+    >>> mlor = LogisticRegression(regParam=0.1, elasticNetParam=1.0, family="multinomial")
+    >>> mlorModel = mlor.fit(mdf)
+    >>> mlorModel.coefficientMatrix
+    SparseMatrix(3, 4, [0, 1, 2, 3], [3, 2, 1], [1.87..., -2.75..., -0.50...], 1)
+    >>> mlorModel.interceptVector
+    DenseVector([0.04..., -0.42..., 0.37...])
+    >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 1.0))]).toDF()
+    >>> blorModel.predict(test0.head().features)
+    1.0
+    >>> blorModel.predictRaw(test0.head().features)
+    DenseVector([-3.54..., 3.54...])
+    >>> blorModel.predictProbability(test0.head().features)
+    DenseVector([0.028, 0.972])
+    >>> result = blorModel.transform(test0).head()
+    >>> result.prediction
+    1.0
+    >>> result.newProbability
+    DenseVector([0.02..., 0.97...])
+    >>> result.rawPrediction
+    DenseVector([-3.54..., 3.54...])
+    >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
+    >>> blorModel.transform(test1).head().prediction
+    1.0
+    >>> blor.setParams("vector")
+    Traceback (most recent call last):
+        ...
+    TypeError: Method setParams forces keyword arguments.
+    >>> lr_path = temp_path + "/lr"
+    >>> blor.save(lr_path)
+    >>> lr2 = LogisticRegression.load(lr_path)
+    >>> lr2.getRegParam()
+    0.01
+    >>> model_path = temp_path + "/lr_model"
+    >>> blorModel.save(model_path)
+    >>> model2 = LogisticRegressionModel.load(model_path)
+    >>> blorModel.coefficients[0] == model2.coefficients[0]
+    True
+    >>> blorModel.intercept == model2.intercept
+    True
+    >>> model2
+    LogisticRegressionModel: uid=..., numClasses=2, numFeatures=2
+
+    .. versionadded:: 1.3.0
+    """
 
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
@@ -341,148 +675,92 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
     def _create_model(self, java_model):
         return LogisticRegressionModel(java_model)
 
-    @since("1.4.0")
-    def setThreshold(self, value):
-        """
-        Sets the value of :py:attr:`threshold`.
-        Clears value of :py:attr:`thresholds` if it has been set.
-        """
-        self._set(threshold=value)
-        self._clear(self.thresholds)
-        return self
-
-    @since("1.4.0")
-    def getThreshold(self):
+    @since("2.1.0")
+    def setFamily(self, value):
         """
-        Get threshold for binary classification.
-
-        If :py:attr:`thresholds` is set with length 2 (i.e., binary classification),
-        this returns the equivalent threshold:
-        :math:`\\frac{1}{1 + \\frac{thresholds(0)}{thresholds(1)}}`.
-        Otherwise, returns :py:attr:`threshold` if set or its default value if unset.
+        Sets the value of :py:attr:`family`.
         """
-        self._checkThresholdConsistency()
-        if self.isSet(self.thresholds):
-            ts = self.getOrDefault(self.thresholds)
-            if len(ts) != 2:
-                raise ValueError("Logistic Regression getThreshold only applies to" +
-                                 " binary classification, but thresholds has length != 2." +
-                                 "  thresholds: " + ",".join(ts))
-            return 1.0/(1.0 + ts[0]/ts[1])
-        else:
-            return self.getOrDefault(self.threshold)
+        return self._set(family=value)
 
-    @since("1.5.0")
-    def setThresholds(self, value):
+    @since("2.3.0")
+    def setLowerBoundsOnCoefficients(self, value):
         """
-        Sets the value of :py:attr:`thresholds`.
-        Clears value of :py:attr:`threshold` if it has been set.
+        Sets the value of :py:attr:`lowerBoundsOnCoefficients`
         """
-        self._set(thresholds=value)
-        self._clear(self.threshold)
-        return self
+        return self._set(lowerBoundsOnCoefficients=value)
 
-    @since("1.5.0")
-    def getThresholds(self):
+    @since("2.3.0")
+    def setUpperBoundsOnCoefficients(self, value):
         """
-        If :py:attr:`thresholds` is set, return its value.
-        Otherwise, if :py:attr:`threshold` is set, return the equivalent thresholds for binary
-        classification: (1-threshold, threshold).
-        If neither are set, throw an error.
+        Sets the value of :py:attr:`upperBoundsOnCoefficients`
         """
-        self._checkThresholdConsistency()
-        if not self.isSet(self.thresholds) and self.isSet(self.threshold):
-            t = self.getOrDefault(self.threshold)
-            return [1.0-t, t]
-        else:
-            return self.getOrDefault(self.thresholds)
-
-    def _checkThresholdConsistency(self):
-        if self.isSet(self.threshold) and self.isSet(self.thresholds):
-            ts = self.getOrDefault(self.thresholds)
-            if len(ts) != 2:
-                raise ValueError("Logistic Regression getThreshold only applies to" +
-                                 " binary classification, but thresholds has length != 2." +
-                                 " thresholds: {0}".format(str(ts)))
-            t = 1.0/(1.0 + ts[0]/ts[1])
-            t2 = self.getOrDefault(self.threshold)
-            if abs(t2 - t) >= 1E-5:
-                raise ValueError("Logistic Regression getThreshold found inconsistent values for" +
-                                 " threshold (%g) and thresholds (equivalent to %g)" % (t2, t))
+        return self._set(upperBoundsOnCoefficients=value)
 
-    @since("2.1.0")
-    def setFamily(self, value):
+    @since("2.3.0")
+    def setLowerBoundsOnIntercepts(self, value):
         """
-        Sets the value of :py:attr:`family`.
+        Sets the value of :py:attr:`lowerBoundsOnIntercepts`
         """
-        return self._set(family=value)
+        return self._set(lowerBoundsOnIntercepts=value)
 
-    @since("2.1.0")
-    def getFamily(self):
+    @since("2.3.0")
+    def setUpperBoundsOnIntercepts(self, value):
         """
-        Gets the value of :py:attr:`family` or its default value.
+        Sets the value of :py:attr:`upperBoundsOnIntercepts`
         """
-        return self.getOrDefault(self.family)
+        return self._set(upperBoundsOnIntercepts=value)
 
-    @since("2.3.0")
-    def setLowerBoundsOnCoefficients(self, value):
+    def setMaxIter(self, value):
         """
-        Sets the value of :py:attr:`lowerBoundsOnCoefficients`
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self._set(lowerBoundsOnCoefficients=value)
+        return self._set(maxIter=value)
 
-    @since("2.3.0")
-    def getLowerBoundsOnCoefficients(self):
+    def setRegParam(self, value):
         """
-        Gets the value of :py:attr:`lowerBoundsOnCoefficients`
+        Sets the value of :py:attr:`regParam`.
         """
-        return self.getOrDefault(self.lowerBoundsOnCoefficients)
+        return self._set(regParam=value)
 
-    @since("2.3.0")
-    def setUpperBoundsOnCoefficients(self, value):
+    def setTol(self, value):
         """
-        Sets the value of :py:attr:`upperBoundsOnCoefficients`
+        Sets the value of :py:attr:`tol`.
         """
-        return self._set(upperBoundsOnCoefficients=value)
+        return self._set(tol=value)
 
-    @since("2.3.0")
-    def getUpperBoundsOnCoefficients(self):
+    def setElasticNetParam(self, value):
         """
-        Gets the value of :py:attr:`upperBoundsOnCoefficients`
+        Sets the value of :py:attr:`elasticNetParam`.
         """
-        return self.getOrDefault(self.upperBoundsOnCoefficients)
+        return self._set(elasticNetParam=value)
 
-    @since("2.3.0")
-    def setLowerBoundsOnIntercepts(self, value):
+    def setFitIntercept(self, value):
         """
-        Sets the value of :py:attr:`lowerBoundsOnIntercepts`
+        Sets the value of :py:attr:`fitIntercept`.
         """
-        return self._set(lowerBoundsOnIntercepts=value)
+        return self._set(fitIntercept=value)
 
-    @since("2.3.0")
-    def getLowerBoundsOnIntercepts(self):
+    def setStandardization(self, value):
         """
-        Gets the value of :py:attr:`lowerBoundsOnIntercepts`
+        Sets the value of :py:attr:`standardization`.
         """
-        return self.getOrDefault(self.lowerBoundsOnIntercepts)
+        return self._set(standardization=value)
 
-    @since("2.3.0")
-    def setUpperBoundsOnIntercepts(self, value):
+    def setWeightCol(self, value):
         """
-        Sets the value of :py:attr:`upperBoundsOnIntercepts`
+        Sets the value of :py:attr:`weightCol`.
         """
-        return self._set(upperBoundsOnIntercepts=value)
+        return self._set(weightCol=value)
 
-    @since("2.3.0")
-    def getUpperBoundsOnIntercepts(self):
+    def setAggregationDepth(self, value):
         """
-        Gets the value of :py:attr:`upperBoundsOnIntercepts`
+        Sets the value of :py:attr:`aggregationDepth`.
         """
-        return self.getOrDefault(self.upperBoundsOnIntercepts)
+        return self._set(aggregationDepth=value)
 
 
-class LogisticRegressionModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable,
-                              HasTrainingSummary):
+class LogisticRegressionModel(JavaProbabilisticClassificationModel, _LogisticRegressionParams,
+                              JavaMLWritable, JavaMLReadable, HasTrainingSummary):
     """
     Model fitted by LogisticRegression.
 
@@ -555,9 +833,6 @@ def evaluate(self, dataset):
         java_blr_summary = self._call_java("evaluate", dataset)
         return BinaryLogisticRegressionSummary(java_blr_summary)
 
-    def __repr__(self):
-        return self._call_java("toString")
-
 
 class LogisticRegressionSummary(JavaWrapper):
     """
@@ -847,35 +1122,17 @@ class BinaryLogisticRegressionTrainingSummary(BinaryLogisticRegressionSummary,
     pass
 
 
-class TreeClassifierParams(object):
+@inherit_doc
+class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams):
     """
-    Private class to track supported impurity measures.
-
-    .. versionadded:: 1.4.0
+    Params for :py:class:`DecisionTreeClassifier` and :py:class:`DecisionTreeClassificationModel`.
     """
-    supportedImpurities = ["entropy", "gini"]
-
-    impurity = Param(Params._dummy(), "impurity",
-                     "Criterion used for information gain calculation (case-insensitive). " +
-                     "Supported options: " +
-                     ", ".join(supportedImpurities), typeConverter=TypeConverters.toString)
-
-    def __init__(self):
-        super(TreeClassifierParams, self).__init__()
-
-    @since("1.6.0")
-    def getImpurity(self):
-        """
-        Gets the value of impurity or its default value.
-        """
-        return self.getOrDefault(self.impurity)
+    pass
 
 
 @inherit_doc
-class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeightCol,
-                             HasPredictionCol, HasProbabilityCol, HasRawPredictionCol,
-                             DecisionTreeParams, TreeClassifierParams, HasCheckpointInterval,
-                             HasSeed, JavaMLWritable, JavaMLReadable):
+class DecisionTreeClassifier(JavaProbabilisticClassifier, _DecisionTreeClassifierParams,
+                             JavaMLWritable, JavaMLReadable):
     """
     `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
     learning algorithm for classification.
@@ -892,6 +1149,10 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeig
     >>> td = si_model.transform(df)
     >>> dt = DecisionTreeClassifier(maxDepth=2, labelCol="indexed", leafCol="leafId")
     >>> model = dt.fit(td)
+    >>> model.getLabelCol()
+    'indexed'
+    >>> model.setFeaturesCol("features")
+    DecisionTreeClassificationModel...
     >>> model.numNodes
     3
     >>> model.depth
@@ -903,8 +1164,14 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeig
     >>> model.numClasses
     2
     >>> print(model.toDebugString)
-    DecisionTreeClassificationModel (uid=...) of depth 1 with 3 nodes...
+    DecisionTreeClassificationModel...depth=1, numNodes=3...
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
+    >>> model.predictRaw(test0.head().features)
+    DenseVector([1.0, 0.0])
+    >>> model.predictProbability(test0.head().features)
+    DenseVector([1.0, 0.0])
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
@@ -938,7 +1205,7 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeig
     >>> dt3 = DecisionTreeClassifier(maxDepth=2, weightCol="weight", labelCol="indexed")
     >>> model3 = dt3.fit(td3)
     >>> print(model3.toDebugString)
-    DecisionTreeClassificationModel (uid=...) of depth 1 with 3 nodes...
+    DecisionTreeClassificationModel...depth=1, numNodes=3...
 
     .. versionadded:: 1.4.0
     """
@@ -948,20 +1215,20 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  probabilityCol="probability", rawPredictionCol="rawPrediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
-                 seed=None, weightCol=None, leafCol=""):
+                 seed=None, weightCol=None, leafCol="", minWeightFractionPerNode=0.0):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
-                 seed=None, weightCol=None, leafCol="")
+                 seed=None, weightCol=None, leafCol="", minWeightFractionPerNode=0.0)
         """
         super(DecisionTreeClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.classification.DecisionTreeClassifier", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         impurity="gini", leafCol="")
+                         impurity="gini", leafCol="", minWeightFractionPerNode=0.0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -971,13 +1238,14 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   probabilityCol="probability", rawPredictionCol="rawPrediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                  impurity="gini", seed=None, weightCol=None, leafCol=""):
+                  impurity="gini", seed=None, weightCol=None, leafCol="",
+                  minWeightFractionPerNode=0.0):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   probabilityCol="probability", rawPredictionCol="rawPrediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
-                  seed=None, weightCol=None, leafCol="")
+                  seed=None, weightCol=None, leafCol="", minWeightFractionPerNode=0.0)
         Sets params for the DecisionTreeClassifier.
         """
         kwargs = self._input_kwargs
@@ -1004,6 +1272,13 @@ def setMinInstancesPerNode(self, value):
         """
         return self._set(minInstancesPerNode=value)
 
+    @since("3.0.0")
+    def setMinWeightFractionPerNode(self, value):
+        """
+        Sets the value of :py:attr:`minWeightFractionPerNode`.
+        """
+        return self._set(minWeightFractionPerNode=value)
+
     def setMinInfoGain(self, value):
         """
         Sets the value of :py:attr:`minInfoGain`.
@@ -1029,9 +1304,30 @@ def setImpurity(self, value):
         """
         return self._set(impurity=value)
 
+    @since("1.4.0")
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        return self._set(checkpointInterval=value)
+
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
 
 @inherit_doc
-class DecisionTreeClassificationModel(DecisionTreeModel, JavaClassificationModel, JavaMLWritable,
+class DecisionTreeClassificationModel(_DecisionTreeModel, JavaProbabilisticClassificationModel,
+                                      _DecisionTreeClassifierParams, JavaMLWritable,
                                       JavaMLReadable):
     """
     Model fitted by DecisionTreeClassifier.
@@ -1062,9 +1358,15 @@ def featureImportances(self):
 
 
 @inherit_doc
-class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
-                             HasRawPredictionCol, HasProbabilityCol,
-                             RandomForestParams, TreeClassifierParams, HasCheckpointInterval,
+class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams):
+    """
+    Params for :py:class:`RandomForestClassifier` and :py:class:`RandomForestClassificationModel`.
+    """
+    pass
+
+
+@inherit_doc
+class RandomForestClassifier(JavaProbabilisticClassifier, _RandomForestClassifierParams,
                              JavaMLWritable, JavaMLReadable):
     """
     `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
@@ -1084,18 +1386,36 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
     >>> td = si_model.transform(df)
     >>> rf = RandomForestClassifier(numTrees=3, maxDepth=2, labelCol="indexed", seed=42,
     ...     leafCol="leafId")
+    >>> rf.getMinWeightFractionPerNode()
+    0.0
     >>> model = rf.fit(td)
+    >>> model.getLabelCol()
+    'indexed'
+    >>> model.setFeaturesCol("features")
+    RandomForestClassificationModel...
+    >>> model.setRawPredictionCol("newRawPrediction")
+    RandomForestClassificationModel...
+    >>> model.getBootstrap()
+    True
+    >>> model.getRawPredictionCol()
+    'newRawPrediction'
     >>> model.featureImportances
     SparseVector(1, {0: 1.0})
     >>> allclose(model.treeWeights, [1.0, 1.0, 1.0])
     True
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
+    >>> model.predictRaw(test0.head().features)
+    DenseVector([2.0, 0.0])
+    >>> model.predictProbability(test0.head().features)
+    DenseVector([1.0, 0.0])
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
     >>> numpy.argmax(result.probability)
     0
-    >>> numpy.argmax(result.rawPrediction)
+    >>> numpy.argmax(result.newRawPrediction)
     0
     >>> result.leafId
     DenseVector([0.0, 0.0, 0.0])
@@ -1103,7 +1423,7 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
     >>> model.transform(test1).head().prediction
     1.0
     >>> model.trees
-    [DecisionTreeClassificationModel (uid=...) of depth..., DecisionTreeClassificationModel...]
+    [DecisionTreeClassificationModel...depth=..., DecisionTreeClassificationModel...]
     >>> rfc_path = temp_path + "/rfc"
     >>> rf.save(rfc_path)
     >>> rf2 = RandomForestClassifier.load(rfc_path)
@@ -1124,14 +1444,14 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
                  numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0,
-                 leafCol=""):
+                 leafCol="", minWeightFractionPerNode=0.0, weightCol=None, bootstrap=True):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
                  numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0, \
-                 leafCol="")
+                 leafCol="", minWeightFractionPerNode=0.0, weightCol=None, bootstrap=True)
         """
         super(RandomForestClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1139,7 +1459,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          impurity="gini", numTrees=20, featureSubsetStrategy="auto",
-                         subsamplingRate=1.0, leafCol="")
+                         subsamplingRate=1.0, leafCol="", minWeightFractionPerNode=0.0,
+                         bootstrap=True)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1150,14 +1471,14 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
                   impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0,
-                  leafCol=""):
+                  leafCol="", minWeightFractionPerNode=0.0, weightCol=None, bootstrap=True):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, \
                   impurity="gini", numTrees=20, featureSubsetStrategy="auto", subsamplingRate=1.0, \
-                  leafCol="")
+                  leafCol="", minWeightFractionPerNode=0.0, weightCol=None, bootstrap=True)
         Sets params for linear classification.
         """
         kwargs = self._input_kwargs
@@ -1216,6 +1537,13 @@ def setNumTrees(self, value):
         """
         return self._set(numTrees=value)
 
+    @since("3.0.0")
+    def setBootstrap(self, value):
+        """
+        Sets the value of :py:attr:`bootstrap`.
+        """
+        return self._set(bootstrap=value)
+
     @since("1.4.0")
     def setSubsamplingRate(self, value):
         """
@@ -1230,8 +1558,35 @@ def setFeatureSubsetStrategy(self, value):
         """
         return self._set(featureSubsetStrategy=value)
 
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        return self._set(checkpointInterval=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
+    @since("3.0.0")
+    def setMinWeightFractionPerNode(self, value):
+        """
+        Sets the value of :py:attr:`minWeightFractionPerNode`.
+        """
+        return self._set(minWeightFractionPerNode=value)
+
 
-class RandomForestClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable,
+class RandomForestClassificationModel(_TreeEnsembleModel, JavaProbabilisticClassificationModel,
+                                      _RandomForestClassifierParams, JavaMLWritable,
                                       JavaMLReadable):
     """
     Model fitted by RandomForestClassifier.
@@ -1261,9 +1616,9 @@ def trees(self):
         return [DecisionTreeClassificationModel(m) for m in list(self._call_java("trees"))]
 
 
-class GBTClassifierParams(GBTParams, HasVarianceImpurity):
+class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity):
     """
-    Private class to track supported GBTClassifier params.
+    Params for :py:class:`GBTClassifier` and :py:class:`GBTClassifierModel`.
 
     .. versionadded:: 3.0.0
     """
@@ -1284,9 +1639,8 @@ def getLossType(self):
 
 
 @inherit_doc
-class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                    GBTClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
-                    JavaMLReadable):
+class GBTClassifier(JavaProbabilisticClassifier, _GBTClassifierParams,
+                    JavaMLWritable, JavaMLReadable):
     """
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
     learning algorithm for classification.
@@ -1315,14 +1669,34 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     >>> td = si_model.transform(df)
     >>> gbt = GBTClassifier(maxIter=5, maxDepth=2, labelCol="indexed", seed=42,
     ...     leafCol="leafId")
+    >>> gbt.setMaxIter(5)
+    GBTClassifier...
+    >>> gbt.setMinWeightFractionPerNode(0.049)
+    GBTClassifier...
+    >>> gbt.getMaxIter()
+    5
     >>> gbt.getFeatureSubsetStrategy()
     'all'
     >>> model = gbt.fit(td)
+    >>> model.getLabelCol()
+    'indexed'
+    >>> model.setFeaturesCol("features")
+    GBTClassificationModel...
+    >>> model.setThresholds([0.3, 0.7])
+    GBTClassificationModel...
+    >>> model.getThresholds()
+    [0.3, 0.7]
     >>> model.featureImportances
     SparseVector(1, {0: 1.0})
     >>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
     True
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
+    >>> model.predictRaw(test0.head().features)
+    DenseVector([1.1697, -1.1697])
+    >>> model.predictProbability(test0.head().features)
+    DenseVector([0.9121, 0.0879])
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
@@ -1334,7 +1708,7 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     >>> model.totalNumNodes
     15
     >>> print(model.toDebugString)
-    GBTClassificationModel (uid=...)...with 5 trees...
+    GBTClassificationModel...numTrees=5...
     >>> gbtc_path = temp_path + "gbtc"
     >>> gbt.save(gbtc_path)
     >>> gbt2 = GBTClassifier.load(gbtc_path)
@@ -1348,7 +1722,7 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     >>> model.treeWeights == model2.treeWeights
     True
     >>> model.trees
-    [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...]
+    [DecisionTreeRegressionModel...depth=..., DecisionTreeRegressionModel...]
     >>> validation = spark.createDataFrame([(0.0, Vectors.dense(-1.0),)],
     ...              ["indexed", "features"])
     >>> model.evaluateEachIteration(validation)
@@ -1370,14 +1744,15 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
                  maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, impurity="variance",
                  featureSubsetStrategy="all", validationTol=0.01, validationIndicatorCol=None,
-                 leafCol=""):
+                 leafCol="", minWeightFractionPerNode=0.0, weightCol=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                  lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, \
                  impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \
-                 validationIndicatorCol=None, leafCol="")
+                 validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0, \
+                 weightCol=None)
         """
         super(GBTClassifier, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1386,7 +1761,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0,
                          impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
-                         leafCol="")
+                         leafCol="", minWeightFractionPerNode=0.0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1397,14 +1772,16 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0,
                   impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
-                  validationIndicatorCol=None, leafCol=""):
+                  validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0,
+                  weightCol=None):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, \
                   impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \
-                  validationIndicatorCol=None, leafCol="")
+                  validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0, \
+                  weightCol=None)
         Sets params for Gradient Boosted Tree Classification.
         """
         kwargs = self._input_kwargs
@@ -1484,9 +1861,51 @@ def setValidationIndicatorCol(self, value):
         """
         return self._set(validationIndicatorCol=value)
 
+    @since("1.4.0")
+    def setMaxIter(self, value):
+        """
+        Sets the value of :py:attr:`maxIter`.
+        """
+        return self._set(maxIter=value)
+
+    @since("1.4.0")
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        return self._set(checkpointInterval=value)
+
+    @since("1.4.0")
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("1.4.0")
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        return self._set(stepSize=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
+    @since("3.0.0")
+    def setMinWeightFractionPerNode(self, value):
+        """
+        Sets the value of :py:attr:`minWeightFractionPerNode`.
+        """
+        return self._set(minWeightFractionPerNode=value)
+
 
-class GBTClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable,
-                             JavaMLReadable):
+class GBTClassificationModel(_TreeEnsembleModel, JavaProbabilisticClassificationModel,
+                             _GBTClassifierParams, JavaMLWritable, JavaMLReadable):
     """
     Model fitted by GBTClassifier.
 
@@ -1526,9 +1945,38 @@ def evaluateEachIteration(self, dataset):
         return self._call_java("evaluateEachIteration", dataset)
 
 
+class _NaiveBayesParams(_JavaPredictorParams, HasWeightCol):
+    """
+    Params for :py:class:`NaiveBayes` and :py:class:`NaiveBayesModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    smoothing = Param(Params._dummy(), "smoothing", "The smoothing parameter, should be >= 0, " +
+                      "default is 1.0", typeConverter=TypeConverters.toFloat)
+    modelType = Param(Params._dummy(), "modelType", "The model type which is a string " +
+                      "(case-sensitive). Supported options: multinomial (default), bernoulli " +
+                      "and gaussian.",
+                      typeConverter=TypeConverters.toString)
+
+    @since("1.5.0")
+    def getSmoothing(self):
+        """
+        Gets the value of smoothing or its default value.
+        """
+        return self.getOrDefault(self.smoothing)
+
+    @since("1.5.0")
+    def getModelType(self):
+        """
+        Gets the value of modelType or its default value.
+        """
+        return self.getOrDefault(self.modelType)
+
+
 @inherit_doc
-class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol,
-                 HasRawPredictionCol, HasThresholds, HasWeightCol, JavaMLWritable, JavaMLReadable):
+class NaiveBayes(JavaProbabilisticClassifier, _NaiveBayesParams, HasThresholds, HasWeightCol,
+                 JavaMLWritable, JavaMLReadable):
     """
     Naive Bayes Classifiers.
     It supports both Multinomial and Bernoulli NB. `Multinomial NB
@@ -1537,7 +1985,15 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
     TF-IDF vectors, it can be used for document classification. By making every vector a
     binary (0/1) data, it can also be used as `Bernoulli NB
     <http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html>`_.
-    The input feature values must be nonnegative.
+    The input feature values for Multinomial NB and Bernoulli NB must be nonnegative.
+    Since 3.0.0, it supports Complement NB which is an adaptation of the Multinomial NB.
+    Specifically, Complement NB uses statistics from the complement of each class to compute
+    the model's coefficients. The inventors of Complement NB show empirically that the parameter
+    estimates for CNB are more stable than those for Multinomial NB. Like Multinomial NB, the
+    input feature values for Complement NB must be nonnegative.
+    Since 3.0.0, it also supports Gaussian NB
+    <https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Gaussian_naive_Bayes>`_.
+    which can handle continuous data.
 
     >>> from pyspark.sql import Row
     >>> from pyspark.ml.linalg import Vectors
@@ -1547,11 +2003,23 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
     ...     Row(label=1.0, weight=1.0, features=Vectors.dense([1.0, 0.0]))])
     >>> nb = NaiveBayes(smoothing=1.0, modelType="multinomial", weightCol="weight")
     >>> model = nb.fit(df)
+    >>> model.setFeaturesCol("features")
+    NaiveBayesModel...
+    >>> model.getSmoothing()
+    1.0
     >>> model.pi
     DenseVector([-0.81..., -0.58...])
     >>> model.theta
     DenseMatrix(2, 2, [-0.91..., -0.51..., -0.40..., -1.09...], 1)
+    >>> model.sigma
+    DenseMatrix(0, 0, [...], ...)
     >>> test0 = sc.parallelize([Row(features=Vectors.dense([1.0, 0.0]))]).toDF()
+    >>> model.predict(test0.head().features)
+    1.0
+    >>> model.predictRaw(test0.head().features)
+    DenseVector([-1.72..., -0.99...])
+    >>> model.predictProbability(test0.head().features)
+    DenseVector([0.32..., 0.67...])
     >>> result = model.transform(test0).head()
     >>> result.prediction
     1.0
@@ -1579,16 +2047,24 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
     >>> result = model3.transform(test0).head()
     >>> result.prediction
     0.0
+    >>> nb3 = NaiveBayes().setModelType("gaussian")
+    >>> model4 = nb3.fit(df)
+    >>> model4.getModelType()
+    'gaussian'
+    >>> model4.sigma
+    DenseMatrix(2, 2, [0.0, 0.25, 0.0, 0.0], 1)
+    >>> nb5 = NaiveBayes(smoothing=1.0, modelType="complement", weightCol="weight")
+    >>> model5 = nb5.fit(df)
+    >>> model5.getModelType()
+    'complement'
+    >>> model5.theta
+    DenseMatrix(2, 2, [...], 1)
+    >>> model5.sigma
+    DenseMatrix(0, 0, [...], ...)
 
     .. versionadded:: 1.5.0
     """
 
-    smoothing = Param(Params._dummy(), "smoothing", "The smoothing parameter, should be >= 0, " +
-                      "default is 1.0", typeConverter=TypeConverters.toFloat)
-    modelType = Param(Params._dummy(), "modelType", "The model type which is a string " +
-                      "(case-sensitive). Supported options: multinomial (default) and bernoulli.",
-                      typeConverter=TypeConverters.toString)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0,
@@ -1629,13 +2105,6 @@ def setSmoothing(self, value):
         """
         return self._set(smoothing=value)
 
-    @since("1.5.0")
-    def getSmoothing(self):
-        """
-        Gets the value of smoothing or its default value.
-        """
-        return self.getOrDefault(self.smoothing)
-
     @since("1.5.0")
     def setModelType(self, value):
         """
@@ -1643,15 +2112,15 @@ def setModelType(self, value):
         """
         return self._set(modelType=value)
 
-    @since("1.5.0")
-    def getModelType(self):
+    def setWeightCol(self, value):
         """
-        Gets the value of modelType or its default value.
+        Sets the value of :py:attr:`weightCol`.
         """
-        return self.getOrDefault(self.modelType)
+        return self._set(weightCol=value)
 
 
-class NaiveBayesModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
+class NaiveBayesModel(JavaProbabilisticClassificationModel, _NaiveBayesParams, JavaMLWritable,
+                      JavaMLReadable):
     """
     Model fitted by NaiveBayes.
 
@@ -1674,12 +2143,50 @@ def theta(self):
         """
         return self._call_java("theta")
 
+    @property
+    @since("3.0.0")
+    def sigma(self):
+        """
+        variance of each feature.
+        """
+        return self._call_java("sigma")
+
+
+class _MultilayerPerceptronParams(_JavaProbabilisticClassifierParams, HasSeed, HasMaxIter,
+                                  HasTol, HasStepSize, HasSolver, HasBlockSize):
+    """
+    Params for :py:class:`MultilayerPerceptronClassifier`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    layers = Param(Params._dummy(), "layers", "Sizes of layers from input layer to output layer " +
+                   "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " +
+                   "neurons and output layer of 10 neurons.",
+                   typeConverter=TypeConverters.toListInt)
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
+                   "options: l-bfgs, gd.", typeConverter=TypeConverters.toString)
+    initialWeights = Param(Params._dummy(), "initialWeights", "The initial weights of the model.",
+                           typeConverter=TypeConverters.toVector)
+
+    @since("1.6.0")
+    def getLayers(self):
+        """
+        Gets the value of layers or its default value.
+        """
+        return self.getOrDefault(self.layers)
+
+    @since("2.0.0")
+    def getInitialWeights(self):
+        """
+        Gets the value of initialWeights or its default value.
+        """
+        return self.getOrDefault(self.initialWeights)
+
 
 @inherit_doc
-class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                                     HasMaxIter, HasTol, HasSeed, HasStepSize, HasSolver,
-                                     JavaMLWritable, JavaMLReadable, HasProbabilityCol,
-                                     HasRawPredictionCol):
+class MultilayerPerceptronClassifier(JavaProbabilisticClassifier, _MultilayerPerceptronParams,
+                                     JavaMLWritable, JavaMLReadable):
     """
     Classifier trainer based on the Multilayer Perceptron.
     Each layer has sigmoid activation function, output layer has softmax.
@@ -1692,15 +2199,35 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     ...     (1.0, Vectors.dense([0.0, 1.0])),
     ...     (1.0, Vectors.dense([1.0, 0.0])),
     ...     (0.0, Vectors.dense([1.0, 1.0]))], ["label", "features"])
-    >>> mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[2, 2, 2], blockSize=1, seed=123)
+    >>> mlp = MultilayerPerceptronClassifier(layers=[2, 2, 2], seed=123)
+    >>> mlp.setMaxIter(100)
+    MultilayerPerceptronClassifier...
+    >>> mlp.getMaxIter()
+    100
+    >>> mlp.getBlockSize()
+    128
+    >>> mlp.setBlockSize(1)
+    MultilayerPerceptronClassifier...
+    >>> mlp.getBlockSize()
+    1
     >>> model = mlp.fit(df)
-    >>> model.layers
+    >>> model.setFeaturesCol("features")
+    MultilayerPerceptronClassificationModel...
+    >>> model.getMaxIter()
+    100
+    >>> model.getLayers()
     [2, 2, 2]
     >>> model.weights.size
     12
     >>> testDF = spark.createDataFrame([
     ...     (Vectors.dense([1.0, 0.0]),),
     ...     (Vectors.dense([0.0, 0.0]),)], ["features"])
+    >>> model.predict(testDF.head().features)
+    1.0
+    >>> model.predictRaw(testDF.head().features)
+    DenseVector([-16.208, 16.344])
+    >>> model.predictProbability(testDF.head().features)
+    DenseVector([0.0, 1.0])
     >>> model.transform(testDF).select("features", "prediction").show()
     +---------+----------+
     | features|prediction|
@@ -1717,7 +2244,7 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     >>> model_path = temp_path + "/mlp_model"
     >>> model.save(model_path)
     >>> model2 = MultilayerPerceptronClassificationModel.load(model_path)
-    >>> model.layers == model2.layers
+    >>> model.getLayers() == model2.getLayers()
     True
     >>> model.weights == model2.weights
     True
@@ -1725,26 +2252,12 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     >>> model3 = mlp2.fit(df)
     >>> model3.weights != model2.weights
     True
-    >>> model3.layers == model.layers
+    >>> model3.getLayers() == model.getLayers()
     True
 
     .. versionadded:: 1.6.0
     """
 
-    layers = Param(Params._dummy(), "layers", "Sizes of layers from input layer to output layer " +
-                   "E.g., Array(780, 100, 10) means 780 inputs, one hidden layer with 100 " +
-                   "neurons and output layer of 10 neurons.",
-                   typeConverter=TypeConverters.toListInt)
-    blockSize = Param(Params._dummy(), "blockSize", "Block size for stacking input data in " +
-                      "matrices. Data is stacked within partitions. If block size is more than " +
-                      "remaining data in a partition then it is adjusted to the size of this " +
-                      "data. Recommended size is between 10 and 1000, default is 128.",
-                      typeConverter=TypeConverters.toInt)
-    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
-                   "options: l-bfgs, gd.", typeConverter=TypeConverters.toString)
-    initialWeights = Param(Params._dummy(), "initialWeights", "The initial weights of the model.",
-                           typeConverter=TypeConverters.toVector)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03,
@@ -1789,13 +2302,6 @@ def setLayers(self, value):
         """
         return self._set(layers=value)
 
-    @since("1.6.0")
-    def getLayers(self):
-        """
-        Gets the value of layers or its default value.
-        """
-        return self.getOrDefault(self.layers)
-
     @since("1.6.0")
     def setBlockSize(self, value):
         """
@@ -1803,43 +2309,47 @@ def setBlockSize(self, value):
         """
         return self._set(blockSize=value)
 
-    @since("1.6.0")
-    def getBlockSize(self):
+    @since("2.0.0")
+    def setInitialWeights(self, value):
         """
-        Gets the value of blockSize or its default value.
+        Sets the value of :py:attr:`initialWeights`.
         """
-        return self.getOrDefault(self.blockSize)
+        return self._set(initialWeights=value)
 
-    @since("2.0.0")
-    def setStepSize(self, value):
+    def setMaxIter(self, value):
         """
-        Sets the value of :py:attr:`stepSize`.
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self._set(stepSize=value)
+        return self._set(maxIter=value)
 
-    @since("2.0.0")
-    def getStepSize(self):
+    def setSeed(self, value):
         """
-        Gets the value of stepSize or its default value.
+        Sets the value of :py:attr:`seed`.
         """
-        return self.getOrDefault(self.stepSize)
+        return self._set(seed=value)
 
-    @since("2.0.0")
-    def setInitialWeights(self, value):
+    def setTol(self, value):
         """
-        Sets the value of :py:attr:`initialWeights`.
+        Sets the value of :py:attr:`tol`.
         """
-        return self._set(initialWeights=value)
+        return self._set(tol=value)
 
     @since("2.0.0")
-    def getInitialWeights(self):
+    def setStepSize(self, value):
         """
-        Gets the value of initialWeights or its default value.
+        Sets the value of :py:attr:`stepSize`.
         """
-        return self.getOrDefault(self.initialWeights)
+        return self._set(stepSize=value)
+
+    def setSolver(self, value):
+        """
+        Sets the value of :py:attr:`solver`.
+        """
+        return self._set(solver=value)
 
 
-class MultilayerPerceptronClassificationModel(JavaModel, JavaClassificationModel, JavaMLWritable,
+class MultilayerPerceptronClassificationModel(JavaProbabilisticClassificationModel,
+                                              _MultilayerPerceptronParams, JavaMLWritable,
                                               JavaMLReadable):
     """
     Model fitted by MultilayerPerceptronClassifier.
@@ -1847,14 +2357,6 @@ class MultilayerPerceptronClassificationModel(JavaModel, JavaClassificationModel
     .. versionadded:: 1.6.0
     """
 
-    @property
-    @since("1.6.0")
-    def layers(self):
-        """
-        array of layer sizes including input and output layers.
-        """
-        return self._call_java("javaLayers")
-
     @property
     @since("2.0.0")
     def weights(self):
@@ -1864,23 +2366,13 @@ def weights(self):
         return self._call_java("weights")
 
 
-class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasWeightCol, HasPredictionCol,
-                      HasRawPredictionCol):
+class _OneVsRestParams(_JavaClassifierParams, HasWeightCol):
     """
-    Parameters for OneVsRest and OneVsRestModel.
+    Params for :py:class:`OneVsRest` and :py:class:`OneVsRestModelModel`.
     """
 
     classifier = Param(Params._dummy(), "classifier", "base binary classifier")
 
-    @since("2.0.0")
-    def setClassifier(self, value):
-        """
-        Sets the value of :py:attr:`classifier`.
-
-        .. note:: Only LogisticRegression and NaiveBayes are supported now.
-        """
-        return self._set(classifier=value)
-
     @since("2.0.0")
     def getClassifier(self):
         """
@@ -1890,7 +2382,7 @@ def getClassifier(self):
 
 
 @inherit_doc
-class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, JavaMLWritable):
+class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, JavaMLReadable, JavaMLWritable):
     """
     Reduction of Multiclass Classification to Binary Classification.
     Performs reduction using one against all strategy.
@@ -1906,6 +2398,8 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
     >>> ovr = OneVsRest(classifier=lr)
     >>> ovr.getRawPredictionCol()
     'rawPrediction'
+    >>> ovr.setPredictionCol("newPrediction")
+    OneVsRest...
     >>> model = ovr.fit(df)
     >>> model.models[0].coefficients
     DenseVector([0.5..., -1.0..., 3.4..., 4.2...])
@@ -1916,21 +2410,21 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
     >>> [x.intercept for x in model.models]
     [-2.7..., -2.5..., -1.3...]
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0, 1.0, 1.0))]).toDF()
-    >>> model.transform(test0).head().prediction
+    >>> model.transform(test0).head().newPrediction
     0.0
     >>> test1 = sc.parallelize([Row(features=Vectors.sparse(4, [0], [1.0]))]).toDF()
-    >>> model.transform(test1).head().prediction
+    >>> model.transform(test1).head().newPrediction
     2.0
     >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4, 0.3, 0.2))]).toDF()
-    >>> model.transform(test2).head().prediction
+    >>> model.transform(test2).head().newPrediction
     0.0
     >>> model_path = temp_path + "/ovr_model"
     >>> model.save(model_path)
     >>> model2 = OneVsRestModel.load(model_path)
-    >>> model2.transform(test0).head().prediction
+    >>> model2.transform(test0).head().newPrediction
     0.0
     >>> model.transform(test2).columns
-    ['features', 'rawPrediction', 'prediction']
+    ['features', 'rawPrediction', 'newPrediction']
 
     .. versionadded:: 2.0.0
     """
@@ -1959,6 +2453,49 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    @since("2.0.0")
+    def setClassifier(self, value):
+        """
+        Sets the value of :py:attr:`classifier`.
+        """
+        return self._set(classifier=value)
+
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    def setRawPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`rawPredictionCol`.
+        """
+        return self._set(rawPredictionCol=value)
+
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
+    def setParallelism(self, value):
+        """
+        Sets the value of :py:attr:`parallelism`.
+        """
+        return self._set(parallelism=value)
+
     def _fit(self, dataset):
         labelCol = self.getLabelCol()
         featuresCol = self.getFeaturesCol()
@@ -2038,6 +2575,8 @@ def _from_java(cls, java_stage):
         py_stage = cls(featuresCol=featuresCol, labelCol=labelCol, predictionCol=predictionCol,
                        rawPredictionCol=rawPredictionCol, classifier=classifier,
                        parallelism=parallelism)
+        if java_stage.isDefined(java_stage.getParam("weightCol")):
+            py_stage.setWeightCol(java_stage.getWeightCol())
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
@@ -2054,6 +2593,8 @@ def _to_java(self):
         _java_obj.setFeaturesCol(self.getFeaturesCol())
         _java_obj.setLabelCol(self.getLabelCol())
         _java_obj.setPredictionCol(self.getPredictionCol())
+        if (self.isDefined(self.weightCol) and self.getWeightCol()):
+            _java_obj.setWeightCol(self.getWeightCol())
         _java_obj.setRawPredictionCol(self.getRawPredictionCol())
         return _java_obj
 
@@ -2102,7 +2643,7 @@ def _transfer_param_map_from_java(self, javaParamMap):
         return paramMap
 
 
-class OneVsRestModel(Model, OneVsRestParams, JavaMLReadable, JavaMLWritable):
+class OneVsRestModel(Model, _OneVsRestParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by OneVsRest.
     This stores the models resulting from training k binary classifiers: one for each class.
@@ -2112,6 +2653,24 @@ class OneVsRestModel(Model, OneVsRestParams, JavaMLReadable, JavaMLWritable):
     .. versionadded:: 2.0.0
     """
 
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    def setRawPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`rawPredictionCol`.
+        """
+        return self._set(rawPredictionCol=value)
+
     def __init__(self, models):
         super(OneVsRestModel, self).__init__()
         self.models = models
@@ -2211,8 +2770,12 @@ def _from_java(cls, java_stage):
         predictionCol = java_stage.getPredictionCol()
         classifier = JavaParams._from_java(java_stage.getClassifier())
         models = [JavaParams._from_java(model) for model in java_stage.models()]
-        py_stage = cls(models=models).setPredictionCol(predictionCol).setLabelCol(labelCol)\
-            .setFeaturesCol(featuresCol).setClassifier(classifier)
+        py_stage = cls(models=models).setPredictionCol(predictionCol)\
+            .setFeaturesCol(featuresCol)
+        py_stage._set(labelCol=labelCol)
+        if java_stage.isDefined(java_stage.getParam("weightCol")):
+            py_stage._set(weightCol=java_stage.getWeightCol())
+        py_stage._set(classifier=classifier)
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
@@ -2233,9 +2796,216 @@ def _to_java(self):
         _java_obj.set("featuresCol", self.getFeaturesCol())
         _java_obj.set("labelCol", self.getLabelCol())
         _java_obj.set("predictionCol", self.getPredictionCol())
+        if (self.isDefined(self.weightCol) and self.getWeightCol()):
+            _java_obj.set("weightCol", self.getWeightCol())
         return _java_obj
 
 
+@inherit_doc
+class FMClassifier(JavaProbabilisticClassifier, _FactorizationMachinesParams, JavaMLWritable,
+                   JavaMLReadable):
+    """
+    Factorization Machines learning algorithm for classification.
+
+    solver Supports:
+
+    * gd (normal mini-batch gradient descent)
+    * adamW (default)
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml.classification import FMClassifier
+    >>> df = spark.createDataFrame([
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>> fm = FMClassifier(factorSize=2)
+    >>> fm.setSeed(11)
+    FMClassifier...
+    >>> model = fm.fit(df)
+    >>> model.getMaxIter()
+    100
+    >>> test0 = spark.createDataFrame([
+    ...     (Vectors.dense(-1.0),),
+    ...     (Vectors.dense(0.5),),
+    ...     (Vectors.dense(1.0),),
+    ...     (Vectors.dense(2.0),)], ["features"])
+    >>> model.predictRaw(test0.head().features)
+    DenseVector([22.13..., -22.13...])
+    >>> model.predictProbability(test0.head().features)
+    DenseVector([1.0, 0.0])
+    >>> model.transform(test0).select("features", "probability").show(10, False)
+    +--------+------------------------------------------+
+    |features|probability                               |
+    +--------+------------------------------------------+
+    |[-1.0]  |[0.9999999997574736,2.425264676902229E-10]|
+    |[0.5]   |[0.47627851732981163,0.5237214826701884]  |
+    |[1.0]   |[5.491554426243495E-4,0.9994508445573757] |
+    |[2.0]   |[2.005766663870645E-10,0.9999999997994233]|
+    +--------+------------------------------------------+
+    ...
+    >>> model.intercept
+    -7.316665276826291
+    >>> model.linear
+    DenseVector([14.8232])
+    >>> model.factors
+    DenseMatrix(1, 2, [0.0163, -0.0051], 1)
+
+    .. versionadded:: 3.0.0
+    """
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 probabilityCol="probability", rawPredictionCol="rawPrediction",
+                 factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
+                 miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
+                 tol=1e-6, solver="adamW", thresholds=None, seed=None):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 probabilityCol="probability", rawPredictionCol="rawPrediction", \
+                 factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \
+                 miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \
+                 tol=1e-6, solver="adamW", thresholds=None, seed=None)
+        """
+        super(FMClassifier, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.classification.FMClassifier", self.uid)
+        self._setDefault(factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
+                         miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
+                         tol=1e-6, solver="adamW")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("3.0.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  probabilityCol="probability", rawPredictionCol="rawPrediction",
+                  factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
+                  miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
+                  tol=1e-6, solver="adamW", thresholds=None, seed=None):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
+                  factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \
+                  miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \
+                  tol=1e-6, solver="adamW", thresholds=None, seed=None)
+        Sets Params for FMClassifier.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return FMClassificationModel(java_model)
+
+    @since("3.0.0")
+    def setFactorSize(self, value):
+        """
+        Sets the value of :py:attr:`factorSize`.
+        """
+        return self._set(factorSize=value)
+
+    @since("3.0.0")
+    def setFitLinear(self, value):
+        """
+        Sets the value of :py:attr:`fitLinear`.
+        """
+        return self._set(fitLinear=value)
+
+    @since("3.0.0")
+    def setMiniBatchFraction(self, value):
+        """
+        Sets the value of :py:attr:`miniBatchFraction`.
+        """
+        return self._set(miniBatchFraction=value)
+
+    @since("3.0.0")
+    def setInitStd(self, value):
+        """
+        Sets the value of :py:attr:`initStd`.
+        """
+        return self._set(initStd=value)
+
+    @since("3.0.0")
+    def setMaxIter(self, value):
+        """
+        Sets the value of :py:attr:`maxIter`.
+        """
+        return self._set(maxIter=value)
+
+    @since("3.0.0")
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        return self._set(stepSize=value)
+
+    @since("3.0.0")
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        return self._set(tol=value)
+
+    @since("3.0.0")
+    def setSolver(self, value):
+        """
+        Sets the value of :py:attr:`solver`.
+        """
+        return self._set(solver=value)
+
+    @since("3.0.0")
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("3.0.0")
+    def setFitIntercept(self, value):
+        """
+        Sets the value of :py:attr:`fitIntercept`.
+        """
+        return self._set(fitIntercept=value)
+
+    @since("3.0.0")
+    def setRegParam(self, value):
+        """
+        Sets the value of :py:attr:`regParam`.
+        """
+        return self._set(regParam=value)
+
+
+class FMClassificationModel(JavaProbabilisticClassificationModel, _FactorizationMachinesParams,
+                            JavaMLWritable, JavaMLReadable):
+    """
+    Model fitted by :class:`FMClassifier`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    @property
+    @since("3.0.0")
+    def intercept(self):
+        """
+        Model intercept.
+        """
+        return self._call_java("intercept")
+
+    @property
+    @since("3.0.0")
+    def linear(self):
+        """
+        Model linear term.
+        """
+        return self._call_java("linear")
+
+    @property
+    @since("3.0.0")
+    def factors(self):
+        """
+        Model factor term.
+        """
+        return self._call_java("factors")
+
+
 if __name__ == "__main__":
     import doctest
     import pyspark.ml.classification
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 9b21aacacd710..7465cef7d64d4 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -22,7 +22,8 @@
 from pyspark.ml.util import *
 from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaWrapper
 from pyspark.ml.param.shared import *
-from pyspark.ml.common import inherit_doc
+from pyspark.ml.common import inherit_doc, _java2py
+from pyspark.ml.stat import MultivariateGaussian
 from pyspark.sql import DataFrame
 
 __all__ = ['BisectingKMeans', 'BisectingKMeansModel', 'BisectingKMeansSummary',
@@ -95,13 +96,55 @@ def numIter(self):
         return self._call_java("numIter")
 
 
-class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable, HasTrainingSummary):
+@inherit_doc
+class _GaussianMixtureParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol,
+                             HasProbabilityCol, HasTol, HasAggregationDepth, HasWeightCol):
+    """
+    Params for :py:class:`GaussianMixture` and :py:class:`GaussianMixtureModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "Number of independent Gaussians in the mixture model. " +
+              "Must be > 1.", typeConverter=TypeConverters.toInt)
+
+    @since("2.0.0")
+    def getK(self):
+        """
+        Gets the value of `k`
+        """
+        return self.getOrDefault(self.k)
+
+
+class GaussianMixtureModel(JavaModel, _GaussianMixtureParams, JavaMLWritable, JavaMLReadable,
+                           HasTrainingSummary):
     """
     Model fitted by GaussianMixture.
 
     .. versionadded:: 2.0.0
     """
 
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    @since("3.0.0")
+    def setProbabilityCol(self, value):
+        """
+        Sets the value of :py:attr:`probabilityCol`.
+        """
+        return self._set(probabilityCol=value)
+
     @property
     @since("2.0.0")
     def weights(self):
@@ -112,6 +155,19 @@ def weights(self):
         """
         return self._call_java("weights")
 
+    @property
+    @since("3.0.0")
+    def gaussians(self):
+        """
+        Array of :py:class:`MultivariateGaussian` where gaussians[i] represents
+        the Multivariate Gaussian (Normal) Distribution for Gaussian i
+        """
+        sc = SparkContext._active_spark_context
+        jgaussians = self._java_obj.gaussians()
+        return [
+            MultivariateGaussian(_java2py(sc, jgaussian.mean()), _java2py(sc, jgaussian.cov()))
+            for jgaussian in jgaussians]
+
     @property
     @since("2.0.0")
     def gaussiansDF(self):
@@ -135,10 +191,23 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
+    @since("3.0.0")
+    def predict(self, value):
+        """
+        Predict label for the given features.
+        """
+        return self._call_java("predict", value)
+
+    @since("3.0.0")
+    def predictProbability(self, value):
+        """
+        Predict probability for the given features.
+        """
+        return self._call_java("predictProbability", value)
+
 
 @inherit_doc
-class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed,
-                      HasProbabilityCol, JavaMLWritable, JavaMLReadable):
+class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, JavaMLReadable):
     """
     GaussianMixture clustering.
     This class performs expectation maximization for multivariate Gaussian
@@ -166,9 +235,24 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     ...         (Vectors.dense([-0.83, -0.68]),),
     ...         (Vectors.dense([-0.91, -0.76]),)]
     >>> df = spark.createDataFrame(data, ["features"])
-    >>> gm = GaussianMixture(k=3, tol=0.0001,
-    ...                      maxIter=10, seed=10)
+    >>> gm = GaussianMixture(k=3, tol=0.0001, seed=10)
+    >>> gm.getMaxIter()
+    100
+    >>> gm.setMaxIter(10)
+    GaussianMixture...
+    >>> gm.getMaxIter()
+    10
     >>> model = gm.fit(df)
+    >>> model.getAggregationDepth()
+    2
+    >>> model.getFeaturesCol()
+    'features'
+    >>> model.setPredictionCol("newPrediction")
+    GaussianMixtureModel...
+    >>> model.predict(df.head().features)
+    2
+    >>> model.predictProbability(df.head().features)
+    DenseVector([0.0, 0.4736, 0.5264])
     >>> model.hasSummary
     True
     >>> summary = model.summary
@@ -181,15 +265,33 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     >>> weights = model.weights
     >>> len(weights)
     3
+    >>> gaussians = model.gaussians
+    >>> len(gaussians)
+    3
+    >>> gaussians[0].mean
+    DenseVector([0.825, 0.8675])
+    >>> gaussians[0].cov.toArray()
+    array([[ 0.005625  , -0.0050625 ],
+           [-0.0050625 ,  0.00455625]])
+    >>> gaussians[1].mean
+    DenseVector([-0.4777, -0.4096])
+    >>> gaussians[1].cov.toArray()
+    array([[ 0.1679695 ,  0.13181786],
+           [ 0.13181786,  0.10524592]])
+    >>> gaussians[2].mean
+    DenseVector([-0.4473, -0.3853])
+    >>> gaussians[2].cov.toArray()
+    array([[ 0.16730412,  0.13112435],
+           [ 0.13112435,  0.10469614]])
     >>> model.gaussiansDF.select("mean").head()
     Row(mean=DenseVector([0.825, 0.8675]))
     >>> model.gaussiansDF.select("cov").head()
     Row(cov=DenseMatrix(2, 2, [0.0056, -0.0051, -0.0051, 0.0046], False))
-    >>> transformed = model.transform(df).select("features", "prediction")
+    >>> transformed = model.transform(df).select("features", "newPrediction")
     >>> rows = transformed.collect()
-    >>> rows[4].prediction == rows[5].prediction
+    >>> rows[4].newPrediction == rows[5].newPrediction
     True
-    >>> rows[2].prediction == rows[3].prediction
+    >>> rows[2].newPrediction == rows[3].newPrediction
     True
     >>> gmm_path = temp_path + "/gmm"
     >>> gm.save(gmm_path)
@@ -203,28 +305,41 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     False
     >>> model2.weights == model.weights
     True
+    >>> model2.gaussians[0].mean == model.gaussians[0].mean
+    True
+    >>> model2.gaussians[0].cov == model.gaussians[0].cov
+    True
+    >>> model2.gaussians[1].mean == model.gaussians[1].mean
+    True
+    >>> model2.gaussians[1].cov == model.gaussians[1].cov
+    True
+    >>> model2.gaussians[2].mean == model.gaussians[2].mean
+    True
+    >>> model2.gaussians[2].cov == model.gaussians[2].cov
+    True
     >>> model2.gaussiansDF.select("mean").head()
     Row(mean=DenseVector([0.825, 0.8675]))
     >>> model2.gaussiansDF.select("cov").head()
     Row(cov=DenseMatrix(2, 2, [0.0056, -0.0051, -0.0051, 0.0046], False))
+    >>> gm2.setWeightCol("weight")
+    GaussianMixture...
 
     .. versionadded:: 2.0.0
     """
 
-    k = Param(Params._dummy(), "k", "Number of independent Gaussians in the mixture model. " +
-              "Must be > 1.", typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
-                 probabilityCol="probability", tol=0.01, maxIter=100, seed=None):
+                 probabilityCol="probability", tol=0.01, maxIter=100, seed=None,
+                 aggregationDepth=2, weightCol=None):
         """
         __init__(self, featuresCol="features", predictionCol="prediction", k=2, \
-                 probabilityCol="probability", tol=0.01, maxIter=100, seed=None)
+                 probabilityCol="probability", tol=0.01, maxIter=100, seed=None, \
+                 aggregationDepth=2, weightCol=None)
         """
         super(GaussianMixture, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.GaussianMixture",
                                             self.uid)
-        self._setDefault(k=2, tol=0.01, maxIter=100)
+        self._setDefault(k=2, tol=0.01, maxIter=100, aggregationDepth=2)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -234,10 +349,12 @@ def _create_model(self, java_model):
     @keyword_only
     @since("2.0.0")
     def setParams(self, featuresCol="features", predictionCol="prediction", k=2,
-                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None):
+                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None,
+                  aggregationDepth=2, weightCol=None):
         """
         setParams(self, featuresCol="features", predictionCol="prediction", k=2, \
-                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None)
+                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None, \
+                  aggregationDepth=2, weightCol=None)
 
         Sets params for GaussianMixture.
         """
@@ -252,11 +369,60 @@ def setK(self, value):
         return self._set(k=value)
 
     @since("2.0.0")
-    def getK(self):
+    def setMaxIter(self, value):
         """
-        Gets the value of `k`
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self.getOrDefault(self.k)
+        return self._set(maxIter=value)
+
+    @since("2.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("2.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    @since("2.0.0")
+    def setProbabilityCol(self, value):
+        """
+        Sets the value of :py:attr:`probabilityCol`.
+        """
+        return self._set(probabilityCol=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
+    @since("2.0.0")
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("2.0.0")
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        return self._set(tol=value)
+
+    @since("3.0.0")
+    def setAggregationDepth(self, value):
+        """
+        Sets the value of :py:attr:`aggregationDepth`.
+        """
+        return self._set(aggregationDepth=value)
 
 
 class GaussianMixtureSummary(ClusteringSummary):
@@ -308,13 +474,69 @@ def trainingCost(self):
         return self._call_java("trainingCost")
 
 
-class KMeansModel(JavaModel, GeneralJavaMLWritable, JavaMLReadable, HasTrainingSummary):
+@inherit_doc
+class _KMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasTol,
+                    HasDistanceMeasure, HasWeightCol):
+    """
+    Params for :py:class:`KMeans` and :py:class:`KMeansModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "The number of clusters to create. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    initMode = Param(Params._dummy(), "initMode",
+                     "The initialization algorithm. This can be either \"random\" to " +
+                     "choose random points as initial cluster centers, or \"k-means||\" " +
+                     "to use a parallel variant of k-means++",
+                     typeConverter=TypeConverters.toString)
+    initSteps = Param(Params._dummy(), "initSteps", "The number of steps for k-means|| " +
+                      "initialization mode. Must be > 0.", typeConverter=TypeConverters.toInt)
+
+    @since("1.5.0")
+    def getK(self):
+        """
+        Gets the value of `k`
+        """
+        return self.getOrDefault(self.k)
+
+    @since("1.5.0")
+    def getInitMode(self):
+        """
+        Gets the value of `initMode`
+        """
+        return self.getOrDefault(self.initMode)
+
+    @since("1.5.0")
+    def getInitSteps(self):
+        """
+        Gets the value of `initSteps`
+        """
+        return self.getOrDefault(self.initSteps)
+
+
+class KMeansModel(JavaModel, _KMeansParams, GeneralJavaMLWritable, JavaMLReadable,
+                  HasTrainingSummary):
     """
     Model fitted by KMeans.
 
     .. versionadded:: 1.5.0
     """
 
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
     @since("1.5.0")
     def clusterCenters(self):
         """Get the cluster centers, represented as a list of NumPy arrays."""
@@ -333,28 +555,49 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
+    @since("3.0.0")
+    def predict(self, value):
+        """
+        Predict label for the given features.
+        """
+        return self._call_java("predict", value)
+
 
 @inherit_doc
-class KMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol, HasMaxIter,
-             HasTol, HasSeed, JavaMLWritable, JavaMLReadable):
+class KMeans(JavaEstimator, _KMeansParams, JavaMLWritable, JavaMLReadable):
     """
     K-means clustering with a k-means++ like initialization mode
     (the k-means|| algorithm by Bahmani et al).
 
     >>> from pyspark.ml.linalg import Vectors
-    >>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
-    ...         (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
-    >>> df = spark.createDataFrame(data, ["features"])
-    >>> kmeans = KMeans(k=2, seed=1)
+    >>> data = [(Vectors.dense([0.0, 0.0]), 2.0), (Vectors.dense([1.0, 1.0]), 2.0),
+    ...         (Vectors.dense([9.0, 8.0]), 2.0), (Vectors.dense([8.0, 9.0]), 2.0)]
+    >>> df = spark.createDataFrame(data, ["features", "weighCol"])
+    >>> kmeans = KMeans(k=2)
+    >>> kmeans.setSeed(1)
+    KMeans...
+    >>> kmeans.setWeightCol("weighCol")
+    KMeans...
+    >>> kmeans.setMaxIter(10)
+    KMeans...
+    >>> kmeans.getMaxIter()
+    10
+    >>> kmeans.clear(kmeans.maxIter)
     >>> model = kmeans.fit(df)
+    >>> model.getDistanceMeasure()
+    'euclidean'
+    >>> model.setPredictionCol("newPrediction")
+    KMeansModel...
+    >>> model.predict(df.head().features)
+    0
     >>> centers = model.clusterCenters()
     >>> len(centers)
     2
-    >>> transformed = model.transform(df).select("features", "prediction")
+    >>> transformed = model.transform(df).select("features", "newPrediction")
     >>> rows = transformed.collect()
-    >>> rows[0].prediction == rows[1].prediction
+    >>> rows[0].newPrediction == rows[1].newPrediction
     True
-    >>> rows[2].prediction == rows[3].prediction
+    >>> rows[2].newPrediction == rows[3].newPrediction
     True
     >>> model.hasSummary
     True
@@ -364,7 +607,7 @@ class KMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol
     >>> summary.clusterSizes
     [2, 2]
     >>> summary.trainingCost
-    2.0
+    4.0
     >>> kmeans_path = temp_path + "/kmeans"
     >>> kmeans.save(kmeans_path)
     >>> kmeans2 = KMeans.load(kmeans_path)
@@ -383,24 +626,14 @@ class KMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol
     .. versionadded:: 1.5.0
     """
 
-    k = Param(Params._dummy(), "k", "The number of clusters to create. Must be > 1.",
-              typeConverter=TypeConverters.toInt)
-    initMode = Param(Params._dummy(), "initMode",
-                     "The initialization algorithm. This can be either \"random\" to " +
-                     "choose random points as initial cluster centers, or \"k-means||\" " +
-                     "to use a parallel variant of k-means++",
-                     typeConverter=TypeConverters.toString)
-    initSteps = Param(Params._dummy(), "initSteps", "The number of steps for k-means|| " +
-                      "initialization mode. Must be > 0.", typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
                  initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None,
-                 distanceMeasure="euclidean"):
+                 distanceMeasure="euclidean", weightCol=None):
         """
         __init__(self, featuresCol="features", predictionCol="prediction", k=2, \
                  initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \
-                 distanceMeasure="euclidean")
+                 distanceMeasure="euclidean", weightCol=None)
         """
         super(KMeans, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid)
@@ -416,11 +649,11 @@ def _create_model(self, java_model):
     @since("1.5.0")
     def setParams(self, featuresCol="features", predictionCol="prediction", k=2,
                   initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None,
-                  distanceMeasure="euclidean"):
+                  distanceMeasure="euclidean", weightCol=None):
         """
         setParams(self, featuresCol="features", predictionCol="prediction", k=2, \
                   initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \
-                  distanceMeasure="euclidean")
+                  distanceMeasure="euclidean", weightCol=None)
 
         Sets params for KMeans.
         """
@@ -435,62 +668,122 @@ def setK(self, value):
         return self._set(k=value)
 
     @since("1.5.0")
-    def getK(self):
+    def setInitMode(self, value):
         """
-        Gets the value of `k`
+        Sets the value of :py:attr:`initMode`.
         """
-        return self.getOrDefault(self.k)
+        return self._set(initMode=value)
 
     @since("1.5.0")
-    def setInitMode(self, value):
+    def setInitSteps(self, value):
         """
-        Sets the value of :py:attr:`initMode`.
+        Sets the value of :py:attr:`initSteps`.
         """
-        return self._set(initMode=value)
+        return self._set(initSteps=value)
+
+    @since("2.4.0")
+    def setDistanceMeasure(self, value):
+        """
+        Sets the value of :py:attr:`distanceMeasure`.
+        """
+        return self._set(distanceMeasure=value)
 
     @since("1.5.0")
-    def getInitMode(self):
+    def setMaxIter(self, value):
         """
-        Gets the value of `initMode`
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self.getOrDefault(self.initMode)
+        return self._set(maxIter=value)
 
     @since("1.5.0")
-    def setInitSteps(self, value):
+    def setFeaturesCol(self, value):
         """
-        Sets the value of :py:attr:`initSteps`.
+        Sets the value of :py:attr:`featuresCol`.
         """
-        return self._set(initSteps=value)
+        return self._set(featuresCol=value)
 
     @since("1.5.0")
-    def getInitSteps(self):
+    def setPredictionCol(self, value):
         """
-        Gets the value of `initSteps`
+        Sets the value of :py:attr:`predictionCol`.
         """
-        return self.getOrDefault(self.initSteps)
+        return self._set(predictionCol=value)
 
-    @since("2.4.0")
-    def setDistanceMeasure(self, value):
+    @since("1.5.0")
+    def setSeed(self, value):
         """
-        Sets the value of :py:attr:`distanceMeasure`.
+        Sets the value of :py:attr:`seed`.
         """
-        return self._set(distanceMeasure=value)
+        return self._set(seed=value)
 
-    @since("2.4.0")
-    def getDistanceMeasure(self):
+    @since("1.5.0")
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        return self._set(tol=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
         """
-        Gets the value of `distanceMeasure`
+        Sets the value of :py:attr:`weightCol`.
         """
-        return self.getOrDefault(self.distanceMeasure)
+        return self._set(weightCol=value)
 
 
-class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable, HasTrainingSummary):
+@inherit_doc
+class _BisectingKMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol,
+                             HasDistanceMeasure, HasWeightCol):
+    """
+    Params for :py:class:`BisectingKMeans` and :py:class:`BisectingKMeansModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "The desired number of leaf clusters. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    minDivisibleClusterSize = Param(Params._dummy(), "minDivisibleClusterSize",
+                                    "The minimum number of points (if >= 1.0) or the minimum " +
+                                    "proportion of points (if < 1.0) of a divisible cluster.",
+                                    typeConverter=TypeConverters.toFloat)
+
+    @since("2.0.0")
+    def getK(self):
+        """
+        Gets the value of `k` or its default value.
+        """
+        return self.getOrDefault(self.k)
+
+    @since("2.0.0")
+    def getMinDivisibleClusterSize(self):
+        """
+        Gets the value of `minDivisibleClusterSize` or its default value.
+        """
+        return self.getOrDefault(self.minDivisibleClusterSize)
+
+
+class BisectingKMeansModel(JavaModel, _BisectingKMeansParams, JavaMLWritable, JavaMLReadable,
+                           HasTrainingSummary):
     """
     Model fitted by BisectingKMeans.
 
     .. versionadded:: 2.0.0
     """
 
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
     @since("2.0.0")
     def clusterCenters(self):
         """Get the cluster centers, represented as a list of NumPy arrays."""
@@ -524,10 +817,16 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
+    @since("3.0.0")
+    def predict(self, value):
+        """
+        Predict label for the given features.
+        """
+        return self._call_java("predict", value)
+
 
 @inherit_doc
-class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol,
-                      HasMaxIter, HasSeed, JavaMLWritable, JavaMLReadable):
+class BisectingKMeans(JavaEstimator, _BisectingKMeansParams, JavaMLWritable, JavaMLReadable):
     """
     A bisecting k-means algorithm based on the paper "A comparison of document clustering
     techniques" by Steinbach, Karypis, and Kumar, with modification to fit Spark.
@@ -539,11 +838,29 @@ class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPred
     clusters, larger clusters get higher priority.
 
     >>> from pyspark.ml.linalg import Vectors
-    >>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
-    ...         (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
-    >>> df = spark.createDataFrame(data, ["features"])
+    >>> data = [(Vectors.dense([0.0, 0.0]), 2.0), (Vectors.dense([1.0, 1.0]), 2.0),
+    ...         (Vectors.dense([9.0, 8.0]), 2.0), (Vectors.dense([8.0, 9.0]), 2.0)]
+    >>> df = spark.createDataFrame(data, ["features", "weighCol"])
     >>> bkm = BisectingKMeans(k=2, minDivisibleClusterSize=1.0)
+    >>> bkm.setMaxIter(10)
+    BisectingKMeans...
+    >>> bkm.getMaxIter()
+    10
+    >>> bkm.clear(bkm.maxIter)
+    >>> bkm.setSeed(1)
+    BisectingKMeans...
+    >>> bkm.setWeightCol("weighCol")
+    BisectingKMeans...
+    >>> bkm.getSeed()
+    1
+    >>> bkm.clear(bkm.seed)
     >>> model = bkm.fit(df)
+    >>> model.getMaxIter()
+    20
+    >>> model.setPredictionCol("newPrediction")
+    BisectingKMeansModel...
+    >>> model.predict(df.head().features)
+    0
     >>> centers = model.clusterCenters()
     >>> len(centers)
     2
@@ -557,12 +874,12 @@ class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPred
     >>> summary.clusterSizes
     [2, 2]
     >>> summary.trainingCost
-    2.000...
-    >>> transformed = model.transform(df).select("features", "prediction")
+    4.000...
+    >>> transformed = model.transform(df).select("features", "newPrediction")
     >>> rows = transformed.collect()
-    >>> rows[0].prediction == rows[1].prediction
+    >>> rows[0].newPrediction == rows[1].newPrediction
     True
-    >>> rows[2].prediction == rows[3].prediction
+    >>> rows[2].newPrediction == rows[3].newPrediction
     True
     >>> bkm_path = temp_path + "/bkm"
     >>> bkm.save(bkm_path)
@@ -584,19 +901,14 @@ class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPred
     .. versionadded:: 2.0.0
     """
 
-    k = Param(Params._dummy(), "k", "The desired number of leaf clusters. Must be > 1.",
-              typeConverter=TypeConverters.toInt)
-    minDivisibleClusterSize = Param(Params._dummy(), "minDivisibleClusterSize",
-                                    "The minimum number of points (if >= 1.0) or the minimum " +
-                                    "proportion of points (if < 1.0) of a divisible cluster.",
-                                    typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20,
-                 seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean"):
+                 seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean",
+                 weightCol=None):
         """
         __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20, \
-                 seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean")
+                 seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean", \
+                 weightCol=None)
         """
         super(BisectingKMeans, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.BisectingKMeans",
@@ -608,10 +920,12 @@ def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=2
     @keyword_only
     @since("2.0.0")
     def setParams(self, featuresCol="features", predictionCol="prediction", maxIter=20,
-                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean"):
+                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean",
+                  weightCol=None):
         """
         setParams(self, featuresCol="features", predictionCol="prediction", maxIter=20, \
-                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean")
+                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean", \
+                  weightCol=None)
         Sets params for BisectingKMeans.
         """
         kwargs = self._input_kwargs
@@ -624,64 +938,197 @@ def setK(self, value):
         """
         return self._set(k=value)
 
+    @since("2.0.0")
+    def setMinDivisibleClusterSize(self, value):
+        """
+        Sets the value of :py:attr:`minDivisibleClusterSize`.
+        """
+        return self._set(minDivisibleClusterSize=value)
+
+    @since("2.4.0")
+    def setDistanceMeasure(self, value):
+        """
+        Sets the value of :py:attr:`distanceMeasure`.
+        """
+        return self._set(distanceMeasure=value)
+
+    @since("2.0.0")
+    def setMaxIter(self, value):
+        """
+        Sets the value of :py:attr:`maxIter`.
+        """
+        return self._set(maxIter=value)
+
+    @since("2.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("2.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    @since("2.0.0")
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
+    def _create_model(self, java_model):
+        return BisectingKMeansModel(java_model)
+
+
+class BisectingKMeansSummary(ClusteringSummary):
+    """
+    Bisecting KMeans clustering results for a given model.
+
+    .. versionadded:: 2.1.0
+    """
+
+    @property
+    @since("3.0.0")
+    def trainingCost(self):
+        """
+        Sum of squared distances to the nearest centroid for all points in the training dataset.
+        This is equivalent to sklearn's inertia.
+        """
+        return self._call_java("trainingCost")
+
+
+@inherit_doc
+class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
+    """
+    Params for :py:class:`LDA` and :py:class:`LDAModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "The number of topics (clusters) to infer. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    optimizer = Param(Params._dummy(), "optimizer",
+                      "Optimizer or inference algorithm used to estimate the LDA model.  "
+                      "Supported: online, em", typeConverter=TypeConverters.toString)
+    learningOffset = Param(Params._dummy(), "learningOffset",
+                           "A (positive) learning parameter that downweights early iterations."
+                           " Larger values make early iterations count less",
+                           typeConverter=TypeConverters.toFloat)
+    learningDecay = Param(Params._dummy(), "learningDecay", "Learning rate, set as an"
+                          "exponential decay rate. This should be between (0.5, 1.0] to "
+                          "guarantee asymptotic convergence.", typeConverter=TypeConverters.toFloat)
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
+                            "Fraction of the corpus to be sampled and used in each iteration "
+                            "of mini-batch gradient descent, in range (0, 1].",
+                            typeConverter=TypeConverters.toFloat)
+    optimizeDocConcentration = Param(Params._dummy(), "optimizeDocConcentration",
+                                     "Indicates whether the docConcentration (Dirichlet parameter "
+                                     "for document-topic distribution) will be optimized during "
+                                     "training.", typeConverter=TypeConverters.toBoolean)
+    docConcentration = Param(Params._dummy(), "docConcentration",
+                             "Concentration parameter (commonly named \"alpha\") for the "
+                             "prior placed on documents' distributions over topics (\"theta\").",
+                             typeConverter=TypeConverters.toListFloat)
+    topicConcentration = Param(Params._dummy(), "topicConcentration",
+                               "Concentration parameter (commonly named \"beta\" or \"eta\") for "
+                               "the prior placed on topic' distributions over terms.",
+                               typeConverter=TypeConverters.toFloat)
+    topicDistributionCol = Param(Params._dummy(), "topicDistributionCol",
+                                 "Output column with estimates of the topic mixture distribution "
+                                 "for each document (often called \"theta\" in the literature). "
+                                 "Returns a vector of zeros for an empty document.",
+                                 typeConverter=TypeConverters.toString)
+    keepLastCheckpoint = Param(Params._dummy(), "keepLastCheckpoint",
+                               "(For EM optimizer) If using checkpointing, this indicates whether"
+                               " to keep the last checkpoint. If false, then the checkpoint will be"
+                               " deleted. Deleting the checkpoint can cause failures if a data"
+                               " partition is lost, so set this bit with care.",
+                               TypeConverters.toBoolean)
+
     @since("2.0.0")
     def getK(self):
         """
-        Gets the value of `k` or its default value.
+        Gets the value of :py:attr:`k` or its default value.
         """
         return self.getOrDefault(self.k)
 
     @since("2.0.0")
-    def setMinDivisibleClusterSize(self, value):
+    def getOptimizer(self):
         """
-        Sets the value of :py:attr:`minDivisibleClusterSize`.
+        Gets the value of :py:attr:`optimizer` or its default value.
         """
-        return self._set(minDivisibleClusterSize=value)
+        return self.getOrDefault(self.optimizer)
 
     @since("2.0.0")
-    def getMinDivisibleClusterSize(self):
+    def getLearningOffset(self):
         """
-        Gets the value of `minDivisibleClusterSize` or its default value.
+        Gets the value of :py:attr:`learningOffset` or its default value.
         """
-        return self.getOrDefault(self.minDivisibleClusterSize)
+        return self.getOrDefault(self.learningOffset)
 
-    @since("2.4.0")
-    def setDistanceMeasure(self, value):
+    @since("2.0.0")
+    def getLearningDecay(self):
+        """
+        Gets the value of :py:attr:`learningDecay` or its default value.
+        """
+        return self.getOrDefault(self.learningDecay)
+
+    @since("2.0.0")
+    def getSubsamplingRate(self):
+        """
+        Gets the value of :py:attr:`subsamplingRate` or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    @since("2.0.0")
+    def getOptimizeDocConcentration(self):
+        """
+        Gets the value of :py:attr:`optimizeDocConcentration` or its default value.
+        """
+        return self.getOrDefault(self.optimizeDocConcentration)
+
+    @since("2.0.0")
+    def getDocConcentration(self):
+        """
+        Gets the value of :py:attr:`docConcentration` or its default value.
+        """
+        return self.getOrDefault(self.docConcentration)
+
+    @since("2.0.0")
+    def getTopicConcentration(self):
         """
-        Sets the value of :py:attr:`distanceMeasure`.
+        Gets the value of :py:attr:`topicConcentration` or its default value.
         """
-        return self._set(distanceMeasure=value)
+        return self.getOrDefault(self.topicConcentration)
 
-    @since("2.4.0")
-    def getDistanceMeasure(self):
+    @since("2.0.0")
+    def getTopicDistributionCol(self):
         """
-        Gets the value of `distanceMeasure` or its default value.
+        Gets the value of :py:attr:`topicDistributionCol` or its default value.
         """
-        return self.getOrDefault(self.distanceMeasure)
-
-    def _create_model(self, java_model):
-        return BisectingKMeansModel(java_model)
-
-
-class BisectingKMeansSummary(ClusteringSummary):
-    """
-    Bisecting KMeans clustering results for a given model.
-
-    .. versionadded:: 2.1.0
-    """
+        return self.getOrDefault(self.topicDistributionCol)
 
-    @property
-    @since("3.0.0")
-    def trainingCost(self):
+    @since("2.0.0")
+    def getKeepLastCheckpoint(self):
         """
-        Sum of squared distances to the nearest centroid for all points in the training dataset.
-        This is equivalent to sklearn's inertia.
+        Gets the value of :py:attr:`keepLastCheckpoint` or its default value.
         """
-        return self._call_java("trainingCost")
+        return self.getOrDefault(self.keepLastCheckpoint)
 
 
 @inherit_doc
-class LDAModel(JavaModel):
+class LDAModel(JavaModel, _LDAParams):
     """
     Latent Dirichlet Allocation (LDA) model.
     This abstraction permits for different underlying representations,
@@ -690,6 +1137,27 @@ class LDAModel(JavaModel):
     .. versionadded:: 2.0.0
     """
 
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("3.0.0")
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("3.0.0")
+    def setTopicDistributionCol(self, value):
+        """
+        Sets the value of :py:attr:`topicDistributionCol`.
+        """
+        return self._set(topicDistributionCol=value)
+
     @since("2.0.0")
     def isDistributed(self):
         """
@@ -836,8 +1304,7 @@ class LocalLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
 
 
 @inherit_doc
-class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInterval,
-          JavaMLReadable, JavaMLWritable):
+class LDA(JavaEstimator, _LDAParams, JavaMLReadable, JavaMLWritable):
     """
     Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
 
@@ -863,7 +1330,16 @@ class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInter
     >>> df = spark.createDataFrame([[1, Vectors.dense([0.0, 1.0])],
     ...      [2, SparseVector(2, {0: 1.0})],], ["id", "features"])
     >>> lda = LDA(k=2, seed=1, optimizer="em")
+    >>> lda.setMaxIter(10)
+    LDA...
+    >>> lda.getMaxIter()
+    10
+    >>> lda.clear(lda.maxIter)
     >>> model = lda.fit(df)
+    >>> model.setSeed(1)
+    DistributedLDAModel...
+    >>> model.getTopicDistributionCol()
+    'topicDistribution'
     >>> model.isDistributed()
     True
     >>> localModel = model.toLocal()
@@ -894,46 +1370,6 @@ class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInter
     .. versionadded:: 2.0.0
     """
 
-    k = Param(Params._dummy(), "k", "The number of topics (clusters) to infer. Must be > 1.",
-              typeConverter=TypeConverters.toInt)
-    optimizer = Param(Params._dummy(), "optimizer",
-                      "Optimizer or inference algorithm used to estimate the LDA model.  "
-                      "Supported: online, em", typeConverter=TypeConverters.toString)
-    learningOffset = Param(Params._dummy(), "learningOffset",
-                           "A (positive) learning parameter that downweights early iterations."
-                           " Larger values make early iterations count less",
-                           typeConverter=TypeConverters.toFloat)
-    learningDecay = Param(Params._dummy(), "learningDecay", "Learning rate, set as an"
-                          "exponential decay rate. This should be between (0.5, 1.0] to "
-                          "guarantee asymptotic convergence.", typeConverter=TypeConverters.toFloat)
-    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
-                            "Fraction of the corpus to be sampled and used in each iteration "
-                            "of mini-batch gradient descent, in range (0, 1].",
-                            typeConverter=TypeConverters.toFloat)
-    optimizeDocConcentration = Param(Params._dummy(), "optimizeDocConcentration",
-                                     "Indicates whether the docConcentration (Dirichlet parameter "
-                                     "for document-topic distribution) will be optimized during "
-                                     "training.", typeConverter=TypeConverters.toBoolean)
-    docConcentration = Param(Params._dummy(), "docConcentration",
-                             "Concentration parameter (commonly named \"alpha\") for the "
-                             "prior placed on documents' distributions over topics (\"theta\").",
-                             typeConverter=TypeConverters.toListFloat)
-    topicConcentration = Param(Params._dummy(), "topicConcentration",
-                               "Concentration parameter (commonly named \"beta\" or \"eta\") for "
-                               "the prior placed on topic' distributions over terms.",
-                               typeConverter=TypeConverters.toFloat)
-    topicDistributionCol = Param(Params._dummy(), "topicDistributionCol",
-                                 "Output column with estimates of the topic mixture distribution "
-                                 "for each document (often called \"theta\" in the literature). "
-                                 "Returns a vector of zeros for an empty document.",
-                                 typeConverter=TypeConverters.toString)
-    keepLastCheckpoint = Param(Params._dummy(), "keepLastCheckpoint",
-                               "(For EM optimizer) If using checkpointing, this indicates whether"
-                               " to keep the last checkpoint. If false, then the checkpoint will be"
-                               " deleted. Deleting the checkpoint can cause failures if a data"
-                               " partition is lost, so set this bit with care.",
-                               TypeConverters.toBoolean)
-
     @keyword_only
     def __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,
                  k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
@@ -981,6 +1417,20 @@ def setParams(self, featuresCol="features", maxIter=20, seed=None, checkpointInt
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    @since("2.0.0")
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        return self._set(checkpointInterval=value)
+
+    @since("2.0.0")
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
     @since("2.0.0")
     def setK(self, value):
         """
@@ -992,13 +1442,6 @@ def setK(self, value):
         """
         return self._set(k=value)
 
-    @since("2.0.0")
-    def getK(self):
-        """
-        Gets the value of :py:attr:`k` or its default value.
-        """
-        return self.getOrDefault(self.k)
-
     @since("2.0.0")
     def setOptimizer(self, value):
         """
@@ -1011,13 +1454,6 @@ def setOptimizer(self, value):
         """
         return self._set(optimizer=value)
 
-    @since("2.0.0")
-    def getOptimizer(self):
-        """
-        Gets the value of :py:attr:`optimizer` or its default value.
-        """
-        return self.getOrDefault(self.optimizer)
-
     @since("2.0.0")
     def setLearningOffset(self, value):
         """
@@ -1029,13 +1465,6 @@ def setLearningOffset(self, value):
         """
         return self._set(learningOffset=value)
 
-    @since("2.0.0")
-    def getLearningOffset(self):
-        """
-        Gets the value of :py:attr:`learningOffset` or its default value.
-        """
-        return self.getOrDefault(self.learningOffset)
-
     @since("2.0.0")
     def setLearningDecay(self, value):
         """
@@ -1047,13 +1476,6 @@ def setLearningDecay(self, value):
         """
         return self._set(learningDecay=value)
 
-    @since("2.0.0")
-    def getLearningDecay(self):
-        """
-        Gets the value of :py:attr:`learningDecay` or its default value.
-        """
-        return self.getOrDefault(self.learningDecay)
-
     @since("2.0.0")
     def setSubsamplingRate(self, value):
         """
@@ -1065,13 +1487,6 @@ def setSubsamplingRate(self, value):
         """
         return self._set(subsamplingRate=value)
 
-    @since("2.0.0")
-    def getSubsamplingRate(self):
-        """
-        Gets the value of :py:attr:`subsamplingRate` or its default value.
-        """
-        return self.getOrDefault(self.subsamplingRate)
-
     @since("2.0.0")
     def setOptimizeDocConcentration(self, value):
         """
@@ -1083,13 +1498,6 @@ def setOptimizeDocConcentration(self, value):
         """
         return self._set(optimizeDocConcentration=value)
 
-    @since("2.0.0")
-    def getOptimizeDocConcentration(self):
-        """
-        Gets the value of :py:attr:`optimizeDocConcentration` or its default value.
-        """
-        return self.getOrDefault(self.optimizeDocConcentration)
-
     @since("2.0.0")
     def setDocConcentration(self, value):
         """
@@ -1101,13 +1509,6 @@ def setDocConcentration(self, value):
         """
         return self._set(docConcentration=value)
 
-    @since("2.0.0")
-    def getDocConcentration(self):
-        """
-        Gets the value of :py:attr:`docConcentration` or its default value.
-        """
-        return self.getOrDefault(self.docConcentration)
-
     @since("2.0.0")
     def setTopicConcentration(self, value):
         """
@@ -1119,13 +1520,6 @@ def setTopicConcentration(self, value):
         """
         return self._set(topicConcentration=value)
 
-    @since("2.0.0")
-    def getTopicConcentration(self):
-        """
-        Gets the value of :py:attr:`topicConcentration` or its default value.
-        """
-        return self.getOrDefault(self.topicConcentration)
-
     @since("2.0.0")
     def setTopicDistributionCol(self, value):
         """
@@ -1137,13 +1531,6 @@ def setTopicDistributionCol(self, value):
         """
         return self._set(topicDistributionCol=value)
 
-    @since("2.0.0")
-    def getTopicDistributionCol(self):
-        """
-        Gets the value of :py:attr:`topicDistributionCol` or its default value.
-        """
-        return self.getOrDefault(self.topicDistributionCol)
-
     @since("2.0.0")
     def setKeepLastCheckpoint(self, value):
         """
@@ -1156,15 +1543,75 @@ def setKeepLastCheckpoint(self, value):
         return self._set(keepLastCheckpoint=value)
 
     @since("2.0.0")
-    def getKeepLastCheckpoint(self):
+    def setMaxIter(self, value):
         """
-        Gets the value of :py:attr:`keepLastCheckpoint` or its default value.
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self.getOrDefault(self.keepLastCheckpoint)
+        return self._set(maxIter=value)
+
+    @since("2.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+
+@inherit_doc
+class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
+    """
+    Params for :py:class:`PowerIterationClustering`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k",
+              "The number of clusters to create. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    initMode = Param(Params._dummy(), "initMode",
+                     "The initialization algorithm. This can be either " +
+                     "'random' to use a random vector as vertex properties, or 'degree' to use " +
+                     "a normalized sum of similarities with other vertices.  Supported options: " +
+                     "'random' and 'degree'.",
+                     typeConverter=TypeConverters.toString)
+    srcCol = Param(Params._dummy(), "srcCol",
+                   "Name of the input column for source vertex IDs.",
+                   typeConverter=TypeConverters.toString)
+    dstCol = Param(Params._dummy(), "dstCol",
+                   "Name of the input column for destination vertex IDs.",
+                   typeConverter=TypeConverters.toString)
+
+    @since("2.4.0")
+    def getK(self):
+        """
+        Gets the value of :py:attr:`k` or its default value.
+        """
+        return self.getOrDefault(self.k)
+
+    @since("2.4.0")
+    def getInitMode(self):
+        """
+        Gets the value of :py:attr:`initMode` or its default value.
+        """
+        return self.getOrDefault(self.initMode)
+
+    @since("2.4.0")
+    def getSrcCol(self):
+        """
+        Gets the value of :py:attr:`srcCol` or its default value.
+        """
+        return self.getOrDefault(self.srcCol)
+
+    @since("2.4.0")
+    def getDstCol(self):
+        """
+        Gets the value of :py:attr:`dstCol` or its default value.
+        """
+        return self.getOrDefault(self.dstCol)
 
 
 @inherit_doc
-class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReadable,
+class PowerIterationClustering(_PowerIterationClusteringParams, JavaParams, JavaMLReadable,
                                JavaMLWritable):
     """
     Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
@@ -1184,7 +1631,9 @@ class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReada
     ...         (4, 0, 0.5), (4, 1, 0.7), (4, 2, 0.9), (4, 3, 1.1),
     ...         (5, 0, 0.5), (5, 1, 0.7), (5, 2, 0.9), (5, 3, 1.1), (5, 4, 1.3)]
     >>> df = spark.createDataFrame(data).toDF("src", "dst", "weight").repartition(1)
-    >>> pic = PowerIterationClustering(k=2, maxIter=40, weightCol="weight")
+    >>> pic = PowerIterationClustering(k=2, weightCol="weight")
+    >>> pic.setMaxIter(40)
+    PowerIterationClustering...
     >>> assignments = pic.assignClusters(df)
     >>> assignments.sort(assignments.id).show(truncate=False)
     +---+-------+
@@ -1209,22 +1658,6 @@ class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReada
     .. versionadded:: 2.4.0
     """
 
-    k = Param(Params._dummy(), "k",
-              "The number of clusters to create. Must be > 1.",
-              typeConverter=TypeConverters.toInt)
-    initMode = Param(Params._dummy(), "initMode",
-                     "The initialization algorithm. This can be either " +
-                     "'random' to use a random vector as vertex properties, or 'degree' to use " +
-                     "a normalized sum of similarities with other vertices.  Supported options: " +
-                     "'random' and 'degree'.",
-                     typeConverter=TypeConverters.toString)
-    srcCol = Param(Params._dummy(), "srcCol",
-                   "Name of the input column for source vertex IDs.",
-                   typeConverter=TypeConverters.toString)
-    dstCol = Param(Params._dummy(), "dstCol",
-                   "Name of the input column for destination vertex IDs.",
-                   typeConverter=TypeConverters.toString)
-
     @keyword_only
     def __init__(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",
                  weightCol=None):
@@ -1258,13 +1691,6 @@ def setK(self, value):
         """
         return self._set(k=value)
 
-    @since("2.4.0")
-    def getK(self):
-        """
-        Gets the value of :py:attr:`k` or its default value.
-        """
-        return self.getOrDefault(self.k)
-
     @since("2.4.0")
     def setInitMode(self, value):
         """
@@ -1272,13 +1698,6 @@ def setInitMode(self, value):
         """
         return self._set(initMode=value)
 
-    @since("2.4.0")
-    def getInitMode(self):
-        """
-        Gets the value of :py:attr:`initMode` or its default value.
-        """
-        return self.getOrDefault(self.initMode)
-
     @since("2.4.0")
     def setSrcCol(self, value):
         """
@@ -1287,25 +1706,25 @@ def setSrcCol(self, value):
         return self._set(srcCol=value)
 
     @since("2.4.0")
-    def getSrcCol(self):
+    def setDstCol(self, value):
         """
-        Gets the value of :py:attr:`srcCol` or its default value.
+        Sets the value of :py:attr:`dstCol`.
         """
-        return self.getOrDefault(self.srcCol)
+        return self._set(dstCol=value)
 
     @since("2.4.0")
-    def setDstCol(self, value):
+    def setMaxIter(self, value):
         """
-        Sets the value of :py:attr:`dstCol`.
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self._set(dstCol=value)
+        return self._set(maxIter=value)
 
     @since("2.4.0")
-    def getDstCol(self):
+    def setWeightCol(self, value):
         """
-        Gets the value of :py:attr:`dstCol` or its default value.
+        Sets the value of :py:attr:`weightCol`.
         """
-        return self.getOrDefault(self.dstCol)
+        return self._set(weightCol=value)
 
     @since("2.4.0")
     def assignClusters(self, dataset):
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index d96cdd594a3f3..556a2f85c708d 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -21,8 +21,8 @@
 from pyspark import since, keyword_only
 from pyspark.ml.wrapper import JavaParams
 from pyspark.ml.param import Param, Params, TypeConverters
-from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol, \
-    HasFeaturesCol, HasWeightCol
+from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasProbabilityCol, \
+    HasRawPredictionCol, HasFeaturesCol, HasWeightCol
 from pyspark.ml.common import inherit_doc
 from pyspark.ml.util import JavaMLReadable, JavaMLWritable
 
@@ -119,7 +119,9 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
     ...    [(0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)])
     >>> dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
     ...
-    >>> evaluator = BinaryClassificationEvaluator(rawPredictionCol="raw")
+    >>> evaluator = BinaryClassificationEvaluator()
+    >>> evaluator.setRawPredictionCol("raw")
+    BinaryClassificationEvaluator...
     >>> evaluator.evaluate(dataset)
     0.70...
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
@@ -139,6 +141,8 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
     0.70...
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
     0.82...
+    >>> evaluator.getNumBins()
+    1000
 
     .. versionadded:: 1.4.0
     """
@@ -147,17 +151,22 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
                        "metric name in evaluation (areaUnderROC|areaUnderPR)",
                        typeConverter=TypeConverters.toString)
 
+    numBins = Param(Params._dummy(), "numBins", "Number of bins to down-sample the curves "
+                    "(ROC curve, PR curve) in area computation. If 0, no down-sampling will "
+                    "occur. Must be >= 0.",
+                    typeConverter=TypeConverters.toInt)
+
     @keyword_only
     def __init__(self, rawPredictionCol="rawPrediction", labelCol="label",
-                 metricName="areaUnderROC", weightCol=None):
+                 metricName="areaUnderROC", weightCol=None, numBins=1000):
         """
         __init__(self, rawPredictionCol="rawPrediction", labelCol="label", \
-                 metricName="areaUnderROC", weightCol=None)
+                 metricName="areaUnderROC", weightCol=None, numBins=1000)
         """
         super(BinaryClassificationEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid)
-        self._setDefault(metricName="areaUnderROC")
+        self._setDefault(metricName="areaUnderROC", numBins=1000)
         kwargs = self._input_kwargs
         self._set(**kwargs)
 
@@ -175,13 +184,46 @@ def getMetricName(self):
         """
         return self.getOrDefault(self.metricName)
 
+    @since("3.0.0")
+    def setNumBins(self, value):
+        """
+        Sets the value of :py:attr:`numBins`.
+        """
+        return self._set(numBins=value)
+
+    @since("3.0.0")
+    def getNumBins(self):
+        """
+        Gets the value of numBins or its default value.
+        """
+        return self.getOrDefault(self.numBins)
+
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    def setRawPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`rawPredictionCol`.
+        """
+        return self._set(rawPredictionCol=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
     @keyword_only
     @since("1.4.0")
     def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
-                  metricName="areaUnderROC", weightCol=None):
+                  metricName="areaUnderROC", weightCol=None, numBins=1000):
         """
         setParams(self, rawPredictionCol="rawPrediction", labelCol="label", \
-                  metricName="areaUnderROC", weightCol=None)
+                  metricName="areaUnderROC", weightCol=None, numBins=1000)
         Sets params for binary classification evaluator.
         """
         kwargs = self._input_kwargs
@@ -199,7 +241,9 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeigh
     ...   (-25.98418959, -22.0), (30.69731842, 33.0), (74.69283752, 71.0)]
     >>> dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
     ...
-    >>> evaluator = RegressionEvaluator(predictionCol="raw")
+    >>> evaluator = RegressionEvaluator()
+    >>> evaluator.setPredictionCol("raw")
+    RegressionEvaluator...
     >>> evaluator.evaluate(dataset)
     2.842...
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "r2"})
@@ -218,6 +262,8 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeigh
     >>> evaluator = RegressionEvaluator(predictionCol="raw", weightCol="weight")
     >>> evaluator.evaluate(dataset)
     2.740...
+    >>> evaluator.getThroughOrigin()
+    False
 
     .. versionadded:: 1.4.0
     """
@@ -226,20 +272,25 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeigh
                        rmse - root mean squared error (default)
                        mse - mean squared error
                        r2 - r^2 metric
-                       mae - mean absolute error.""",
+                       mae - mean absolute error
+                       var - explained variance.""",
                        typeConverter=TypeConverters.toString)
 
+    throughOrigin = Param(Params._dummy(), "throughOrigin",
+                          "whether the regression is through the origin.",
+                          typeConverter=TypeConverters.toBoolean)
+
     @keyword_only
     def __init__(self, predictionCol="prediction", labelCol="label",
-                 metricName="rmse", weightCol=None):
+                 metricName="rmse", weightCol=None, throughOrigin=False):
         """
         __init__(self, predictionCol="prediction", labelCol="label", \
-                 metricName="rmse", weightCol=None)
+                 metricName="rmse", weightCol=None, throughOrigin=False)
         """
         super(RegressionEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid)
-        self._setDefault(metricName="rmse")
+        self._setDefault(metricName="rmse", throughOrigin=False)
         kwargs = self._input_kwargs
         self._set(**kwargs)
 
@@ -257,13 +308,46 @@ def getMetricName(self):
         """
         return self.getOrDefault(self.metricName)
 
+    @since("3.0.0")
+    def setThroughOrigin(self, value):
+        """
+        Sets the value of :py:attr:`throughOrigin`.
+        """
+        return self._set(throughOrigin=value)
+
+    @since("3.0.0")
+    def getThroughOrigin(self):
+        """
+        Gets the value of throughOrigin or its default value.
+        """
+        return self.getOrDefault(self.throughOrigin)
+
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
     @keyword_only
     @since("1.4.0")
     def setParams(self, predictionCol="prediction", labelCol="label",
-                  metricName="rmse", weightCol=None):
+                  metricName="rmse", weightCol=None, throughOrigin=False):
         """
         setParams(self, predictionCol="prediction", labelCol="label", \
-                  metricName="rmse", weightCol=None)
+                  metricName="rmse", weightCol=None, throughOrigin=False)
         Sets params for regression evaluator.
         """
         kwargs = self._input_kwargs
@@ -272,16 +356,17 @@ def setParams(self, predictionCol="prediction", labelCol="label",
 
 @inherit_doc
 class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeightCol,
-                                        JavaMLReadable, JavaMLWritable):
+                                        HasProbabilityCol, JavaMLReadable, JavaMLWritable):
     """
-    Evaluator for Multiclass Classification, which expects two input
-    columns: prediction and label.
+    Evaluator for Multiclass Classification, which expects input
+    columns: prediction, label, weight (optional) and probabilityCol (only for logLoss).
 
     >>> scoreAndLabels = [(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
     ...     (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)]
     >>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
-    ...
-    >>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction")
+    >>> evaluator = MulticlassClassificationEvaluator()
+    >>> evaluator.setPredictionCol("prediction")
+    MulticlassClassificationEvaluator...
     >>> evaluator.evaluate(dataset)
     0.66...
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
@@ -289,6 +374,10 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "truePositiveRateByLabel",
     ...     evaluator.metricLabel: 1.0})
     0.75...
+    >>> evaluator.setMetricName("hammingLoss")
+    MulticlassClassificationEvaluator...
+    >>> evaluator.evaluate(dataset)
+    0.33...
     >>> mce_path = temp_path + "/mce"
     >>> evaluator.save(mce_path)
     >>> evaluator2 = MulticlassClassificationEvaluator.load(mce_path)
@@ -298,13 +387,23 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
     ...     (1.0, 0.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0),
     ...     (2.0, 2.0, 1.0), (2.0, 0.0, 1.0)]
     >>> dataset = spark.createDataFrame(scoreAndLabelsAndWeight, ["prediction", "label", "weight"])
-    ...
     >>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction",
     ...     weightCol="weight")
     >>> evaluator.evaluate(dataset)
     0.66...
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
     0.66...
+    >>> predictionAndLabelsWithProbabilities = [
+    ...      (1.0, 1.0, 1.0, [0.1, 0.8, 0.1]), (0.0, 2.0, 1.0, [0.9, 0.05, 0.05]),
+    ...      (0.0, 0.0, 1.0, [0.8, 0.2, 0.0]), (1.0, 1.0, 1.0, [0.3, 0.65, 0.05])]
+    >>> dataset = spark.createDataFrame(predictionAndLabelsWithProbabilities, ["prediction",
+    ...     "label", "weight", "probability"])
+    >>> evaluator = MulticlassClassificationEvaluator(predictionCol="prediction",
+    ...     probabilityCol="probability")
+    >>> evaluator.setMetricName("logLoss")
+    MulticlassClassificationEvaluator...
+    >>> evaluator.evaluate(dataset)
+    0.9682...
 
     .. versionadded:: 1.5.0
     """
@@ -312,7 +411,8 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
                        "metric name in evaluation "
                        "(f1|accuracy|weightedPrecision|weightedRecall|weightedTruePositiveRate|"
                        "weightedFalsePositiveRate|weightedFMeasure|truePositiveRateByLabel|"
-                       "falsePositiveRateByLabel|precisionByLabel|recallByLabel|fMeasureByLabel)",
+                       "falsePositiveRateByLabel|precisionByLabel|recallByLabel|fMeasureByLabel|"
+                       "logLoss|hammingLoss)",
                        typeConverter=TypeConverters.toString)
     metricLabel = Param(Params._dummy(), "metricLabel",
                         "The class whose metric will be computed in truePositiveRateByLabel|"
@@ -323,18 +423,25 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
                  "The beta value used in weightedFMeasure|fMeasureByLabel."
                  " Must be > 0. The default value is 1.",
                  typeConverter=TypeConverters.toFloat)
+    eps = Param(Params._dummy(), "eps",
+                "log-loss is undefined for p=0 or p=1, so probabilities are clipped to "
+                "max(eps, min(1 - eps, p)). "
+                "Must be in range (0, 0.5). The default value is 1e-15.",
+                typeConverter=TypeConverters.toFloat)
 
     @keyword_only
     def __init__(self, predictionCol="prediction", labelCol="label",
-                 metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0):
+                 metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0,
+                 probabilityCol="probability", eps=1e-15):
         """
         __init__(self, predictionCol="prediction", labelCol="label", \
-                 metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0)
+                 metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0, \
+                 probabilityCol="probability", eps=1e-15)
         """
         super(MulticlassClassificationEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator", self.uid)
-        self._setDefault(metricName="f1", metricLabel=0.0, beta=1.0)
+        self._setDefault(metricName="f1", metricLabel=0.0, beta=1.0, eps=1e-15)
         kwargs = self._input_kwargs
         self._set(**kwargs)
 
@@ -380,13 +487,55 @@ def getBeta(self):
         """
         return self.getOrDefault(self.beta)
 
+    @since("3.0.0")
+    def setEps(self, value):
+        """
+        Sets the value of :py:attr:`eps`.
+        """
+        return self._set(eps=value)
+
+    @since("3.0.0")
+    def getEps(self):
+        """
+        Gets the value of eps or its default value.
+        """
+        return self.getOrDefault(self.eps)
+
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    @since("3.0.0")
+    def setProbabilityCol(self, value):
+        """
+        Sets the value of :py:attr:`probabilityCol`.
+        """
+        return self._set(probabilityCol=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
     @keyword_only
     @since("1.5.0")
     def setParams(self, predictionCol="prediction", labelCol="label",
-                  metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0):
+                  metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0,
+                  probabilityCol="probability", eps=1e-15):
         """
         setParams(self, predictionCol="prediction", labelCol="label", \
-                  metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0)
+                  metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0, \
+                  probabilityCol="probability", eps=1e-15)
         Sets params for multiclass classification evaluator.
         """
         kwargs = self._input_kwargs
@@ -407,7 +556,9 @@ class MultilabelClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
     ...     ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])]
     >>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
     ...
-    >>> evaluator = MultilabelClassificationEvaluator(predictionCol="prediction")
+    >>> evaluator = MultilabelClassificationEvaluator()
+    >>> evaluator.setPredictionCol("prediction")
+    MultilabelClassificationEvaluator...
     >>> evaluator.evaluate(dataset)
     0.63...
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "accuracy"})
@@ -474,6 +625,20 @@ def getMetricLabel(self):
         """
         return self.getOrDefault(self.metricLabel)
 
+    @since("3.0.0")
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
     @keyword_only
     @since("3.0.0")
     def setParams(self, predictionCol="prediction", labelCol="label",
@@ -506,7 +671,9 @@ class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol,
     ...     ([10.5, 11.5], 1.0), ([1.0, 1.0], 0.0), ([8.0, 6.0], 1.0)])
     >>> dataset = spark.createDataFrame(featureAndPredictions, ["features", "prediction"])
     ...
-    >>> evaluator = ClusteringEvaluator(predictionCol="prediction")
+    >>> evaluator = ClusteringEvaluator()
+    >>> evaluator.setPredictionCol("prediction")
+    ClusteringEvaluator...
     >>> evaluator.evaluate(dataset)
     0.9079...
     >>> ce_path = temp_path + "/ce"
@@ -538,6 +705,18 @@ def __init__(self, predictionCol="prediction", featuresCol="features",
         kwargs = self._input_kwargs
         self._set(**kwargs)
 
+    @keyword_only
+    @since("2.3.0")
+    def setParams(self, predictionCol="prediction", featuresCol="features",
+                  metricName="silhouette", distanceMeasure="squaredEuclidean"):
+        """
+        setParams(self, predictionCol="prediction", featuresCol="features", \
+                  metricName="silhouette", distanceMeasure="squaredEuclidean")
+        Sets params for clustering evaluator.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
     @since("2.3.0")
     def setMetricName(self, value):
         """
@@ -552,18 +731,6 @@ def getMetricName(self):
         """
         return self.getOrDefault(self.metricName)
 
-    @keyword_only
-    @since("2.3.0")
-    def setParams(self, predictionCol="prediction", featuresCol="features",
-                  metricName="silhouette", distanceMeasure="squaredEuclidean"):
-        """
-        setParams(self, predictionCol="prediction", featuresCol="features", \
-                  metricName="silhouette", distanceMeasure="squaredEuclidean")
-        Sets params for clustering evaluator.
-        """
-        kwargs = self._input_kwargs
-        return self._set(**kwargs)
-
     @since("2.4.0")
     def setDistanceMeasure(self, value):
         """
@@ -578,6 +745,18 @@ def getDistanceMeasure(self):
         """
         return self.getOrDefault(self.distanceMeasure)
 
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
 
 @inherit_doc
 class RankingEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
@@ -594,7 +773,9 @@ class RankingEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
     ...     ([1.0, 2.0, 3.0, 4.0, 5.0], [])]
     >>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
     ...
-    >>> evaluator = RankingEvaluator(predictionCol="prediction")
+    >>> evaluator = RankingEvaluator()
+    >>> evaluator.setPredictionCol("prediction")
+    RankingEvaluator...
     >>> evaluator.evaluate(dataset)
     0.35...
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "precisionAtK", evaluator.k: 2})
@@ -659,6 +840,20 @@ def getK(self):
         """
         return self.getOrDefault(self.k)
 
+    @since("3.0.0")
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
     @keyword_only
     @since("3.0.0")
     def setParams(self, predictionCol="prediction", labelCol="label",
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 9ab4e4d68691d..4c25bb495fdfb 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -65,12 +65,23 @@
 
 
 @inherit_doc
-class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class Binarizer(JavaTransformer, HasThreshold, HasThresholds, HasInputCol, HasOutputCol,
+                HasInputCols, HasOutputCols, JavaMLReadable, JavaMLWritable):
     """
-    Binarize a column of continuous features given a threshold.
+    Binarize a column of continuous features given a threshold. Since 3.0.0,
+    :py:class:`Binarize` can map multiple columns at once by setting the :py:attr:`inputCols`
+    parameter. Note that when both the :py:attr:`inputCol` and :py:attr:`inputCols` parameters
+    are set, an Exception will be thrown. The :py:attr:`threshold` parameter is used for
+    single column usage, and :py:attr:`thresholds` is for multiple columns.
 
     >>> df = spark.createDataFrame([(0.5,)], ["values"])
     >>> binarizer = Binarizer(threshold=1.0, inputCol="values", outputCol="features")
+    >>> binarizer.setThreshold(1.0)
+    Binarizer...
+    >>> binarizer.setInputCol("values")
+    Binarizer...
+    >>> binarizer.setOutputCol("features")
+    Binarizer...
     >>> binarizer.transform(df).head().features
     0.0
     >>> binarizer.setParams(outputCol="freqs").transform(df).head().freqs
@@ -83,6 +94,17 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
     >>> loadedBinarizer = Binarizer.load(binarizerPath)
     >>> loadedBinarizer.getThreshold() == binarizer.getThreshold()
     True
+    >>> df2 = spark.createDataFrame([(0.5, 0.3)], ["values1", "values2"])
+    >>> binarizer2 = Binarizer(thresholds=[0.0, 1.0])
+    >>> binarizer2.setInputCols(["values1", "values2"]).setOutputCols(["output1", "output2"])
+    Binarizer...
+    >>> binarizer2.transform(df2).show()
+    +-------+-------+-------+-------+
+    |values1|values2|output1|output2|
+    +-------+-------+-------+-------+
+    |    0.5|    0.3|    1.0|    0.0|
+    +-------+-------+-------+-------+
+    ...
 
     .. versionadded:: 1.4.0
     """
@@ -92,11 +114,19 @@ class Binarizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
                       "The features greater than the threshold will be binarized to 1.0. " +
                       "The features equal to or less than the threshold will be binarized to 0.0",
                       typeConverter=TypeConverters.toFloat)
+    thresholds = Param(Params._dummy(), "thresholds",
+                       "Param for array of threshold used to binarize continuous features. " +
+                       "This is for multiple columns input. If transforming multiple columns " +
+                       "and thresholds is not set, but threshold is set, then threshold will " +
+                       "be applied across all columns.",
+                       typeConverter=TypeConverters.toListFloat)
 
     @keyword_only
-    def __init__(self, threshold=0.0, inputCol=None, outputCol=None):
+    def __init__(self, threshold=0.0, inputCol=None, outputCol=None, thresholds=None,
+                 inputCols=None, outputCols=None):
         """
-        __init__(self, threshold=0.0, inputCol=None, outputCol=None)
+        __init__(self, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \
+                 inputCols=None, outputCols=None)
         """
         super(Binarizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Binarizer", self.uid)
@@ -106,9 +136,11 @@ def __init__(self, threshold=0.0, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, threshold=0.0, inputCol=None, outputCol=None):
+    def setParams(self, threshold=0.0, inputCol=None, outputCol=None, thresholds=None,
+                  inputCols=None, outputCols=None):
         """
-        setParams(self, threshold=0.0, inputCol=None, outputCol=None)
+        setParams(self, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \
+                  inputCols=None, outputCols=None)
         Sets params for this Binarizer.
         """
         kwargs = self._input_kwargs
@@ -121,15 +153,41 @@ def setThreshold(self, value):
         """
         return self._set(threshold=value)
 
-    @since("1.4.0")
-    def getThreshold(self):
+    @since("3.0.0")
+    def setThresholds(self, value):
         """
-        Gets the value of threshold or its default value.
+        Sets the value of :py:attr:`thresholds`.
+        """
+        return self._set(thresholds=value)
+
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("3.0.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
         """
-        return self.getOrDefault(self.threshold)
+        return self._set(outputCols=value)
 
 
-class LSHParams(Params):
+class _LSHParams(HasInputCol, HasOutputCol):
     """
     Mixin for Locality Sensitive Hashing (LSH) algorithm parameters.
     """
@@ -139,8 +197,17 @@ class LSHParams(Params):
                           "and decreasing it improves the running performance.",
                           typeConverter=TypeConverters.toInt)
 
-    def __init__(self):
-        super(LSHParams, self).__init__()
+    def getNumHashTables(self):
+        """
+        Gets the value of numHashTables or its default value.
+        """
+        return self.getOrDefault(self.numHashTables)
+
+
+class _LSH(JavaEstimator, _LSHParams, JavaMLReadable, JavaMLWritable):
+    """
+    Mixin for Locality Sensitive Hashing (LSH).
+    """
 
     def setNumHashTables(self, value):
         """
@@ -148,18 +215,36 @@ def setNumHashTables(self, value):
         """
         return self._set(numHashTables=value)
 
-    def getNumHashTables(self):
+    def setInputCol(self, value):
         """
-        Gets the value of numHashTables or its default value.
+        Sets the value of :py:attr:`inputCol`.
         """
-        return self.getOrDefault(self.numHashTables)
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
 
 
-class LSHModel(JavaModel):
+class _LSHModel(JavaModel, _LSHParams):
     """
     Mixin for Locality Sensitive Hashing (LSH) models.
     """
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     def approxNearestNeighbors(self, dataset, key, numNearestNeighbors, distCol="distCol"):
         """
         Given a large dataset and an item, approximately find at most k items which have the
@@ -200,9 +285,29 @@ def approxSimilarityJoin(self, datasetA, datasetB, threshold, distCol="distCol")
         return self._call_java("approxSimilarityJoin", datasetA, datasetB, threshold, distCol)
 
 
+class _BucketedRandomProjectionLSHParams():
+    """
+    Params for :py:class:`BucketedRandomProjectionLSH` and
+    :py:class:`BucketedRandomProjectionLSHModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    bucketLength = Param(Params._dummy(), "bucketLength", "the length of each hash bucket, " +
+                         "a larger bucket lowers the false negative rate.",
+                         typeConverter=TypeConverters.toFloat)
+
+    @since("2.2.0")
+    def getBucketLength(self):
+        """
+        Gets the value of bucketLength or its default value.
+        """
+        return self.getOrDefault(self.bucketLength)
+
+
 @inherit_doc
-class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed,
-                                  JavaMLReadable, JavaMLWritable):
+class BucketedRandomProjectionLSH(_LSH, _BucketedRandomProjectionLSHParams,
+                                  HasSeed, JavaMLReadable, JavaMLWritable):
     """
     LSH class for Euclidean distance metrics.
     The input is dense or sparse vectors, each of which represents a point in the Euclidean
@@ -220,9 +325,20 @@ class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutp
     ...         (2, Vectors.dense([1.0, -1.0 ]),),
     ...         (3, Vectors.dense([1.0, 1.0]),)]
     >>> df = spark.createDataFrame(data, ["id", "features"])
-    >>> brp = BucketedRandomProjectionLSH(inputCol="features", outputCol="hashes",
-    ...                                   seed=12345, bucketLength=1.0)
+    >>> brp = BucketedRandomProjectionLSH()
+    >>> brp.setInputCol("features")
+    BucketedRandomProjectionLSH...
+    >>> brp.setOutputCol("hashes")
+    BucketedRandomProjectionLSH...
+    >>> brp.setSeed(12345)
+    BucketedRandomProjectionLSH...
+    >>> brp.setBucketLength(1.0)
+    BucketedRandomProjectionLSH...
     >>> model = brp.fit(df)
+    >>> model.getBucketLength()
+    1.0
+    >>> model.setOutputCol("hashes")
+    BucketedRandomProjectionLSHModel...
     >>> model.transform(df).head()
     Row(id=0, features=DenseVector([-1.0, -1.0]), hashes=[DenseVector([-1.0])])
     >>> data2 = [(4, Vectors.dense([2.0, 2.0 ]),),
@@ -266,10 +382,6 @@ class BucketedRandomProjectionLSH(JavaEstimator, LSHParams, HasInputCol, HasOutp
     .. versionadded:: 2.2.0
     """
 
-    bucketLength = Param(Params._dummy(), "bucketLength", "the length of each hash bucket, " +
-                         "a larger bucket lowers the false negative rate.",
-                         typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1,
                  bucketLength=None):
@@ -303,18 +415,18 @@ def setBucketLength(self, value):
         """
         return self._set(bucketLength=value)
 
-    @since("2.2.0")
-    def getBucketLength(self):
+    def setSeed(self, value):
         """
-        Gets the value of bucketLength or its default value.
+        Sets the value of :py:attr:`seed`.
         """
-        return self.getOrDefault(self.bucketLength)
+        return self._set(seed=value)
 
     def _create_model(self, java_model):
         return BucketedRandomProjectionLSHModel(java_model)
 
 
-class BucketedRandomProjectionLSHModel(LSHModel, JavaMLReadable, JavaMLWritable):
+class BucketedRandomProjectionLSHModel(_LSHModel, _BucketedRandomProjectionLSHParams,
+                                       JavaMLReadable, JavaMLWritable):
     r"""
     Model fitted by :py:class:`BucketedRandomProjectionLSH`, where multiple random vectors are
     stored. The vectors are normalized to be unit vectors and each vector is used in a hash
@@ -327,26 +439,39 @@ class BucketedRandomProjectionLSHModel(LSHModel, JavaMLReadable, JavaMLWritable)
 
 
 @inherit_doc
-class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasHandleInvalid,
-                 JavaMLReadable, JavaMLWritable):
+class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols, HasOutputCols,
+                 HasHandleInvalid, JavaMLReadable, JavaMLWritable):
     """
-    Maps a column of continuous features to a column of feature buckets.
-
-    >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]
-    >>> df = spark.createDataFrame(values, ["values"])
-    >>> bucketizer = Bucketizer(splits=[-float("inf"), 0.5, 1.4, float("inf")],
-    ...     inputCol="values", outputCol="buckets")
+    Maps a column of continuous features to a column of feature buckets. Since 3.0.0,
+    :py:class:`Bucketizer` can map multiple columns at once by setting the :py:attr:`inputCols`
+    parameter. Note that when both the :py:attr:`inputCol` and :py:attr:`inputCols` parameters
+    are set, an Exception will be thrown. The :py:attr:`splits` parameter is only used for single
+    column usage, and :py:attr:`splitsArray` is for multiple columns.
+
+    >>> values = [(0.1, 0.0), (0.4, 1.0), (1.2, 1.3), (1.5, float("nan")),
+    ...     (float("nan"), 1.0), (float("nan"), 0.0)]
+    >>> df = spark.createDataFrame(values, ["values1", "values2"])
+    >>> bucketizer = Bucketizer()
+    >>> bucketizer.setSplits([-float("inf"), 0.5, 1.4, float("inf")])
+    Bucketizer...
+    >>> bucketizer.setInputCol("values1")
+    Bucketizer...
+    >>> bucketizer.setOutputCol("buckets")
+    Bucketizer...
     >>> bucketed = bucketizer.setHandleInvalid("keep").transform(df).collect()
-    >>> len(bucketed)
-    6
-    >>> bucketed[0].buckets
-    0.0
-    >>> bucketed[1].buckets
-    0.0
-    >>> bucketed[2].buckets
-    1.0
-    >>> bucketed[3].buckets
-    2.0
+    >>> bucketed = bucketizer.setHandleInvalid("keep").transform(df.select("values1"))
+    >>> bucketed.show(truncate=False)
+    +-------+-------+
+    |values1|buckets|
+    +-------+-------+
+    |0.1    |0.0    |
+    |0.4    |0.0    |
+    |1.2    |1.0    |
+    |1.5    |2.0    |
+    |NaN    |3.0    |
+    |NaN    |3.0    |
+    +-------+-------+
+    ...
     >>> bucketizer.setParams(outputCol="b").transform(df).head().b
     0.0
     >>> bucketizerPath = temp_path + "/bucketizer"
@@ -357,6 +482,22 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasHandleInvalid,
     >>> bucketed = bucketizer.setHandleInvalid("skip").transform(df).collect()
     >>> len(bucketed)
     4
+    >>> bucketizer2 = Bucketizer(splitsArray=
+    ...     [[-float("inf"), 0.5, 1.4, float("inf")], [-float("inf"), 0.5, float("inf")]],
+    ...     inputCols=["values1", "values2"], outputCols=["buckets1", "buckets2"])
+    >>> bucketed2 = bucketizer2.setHandleInvalid("keep").transform(df)
+    >>> bucketed2.show(truncate=False)
+    +-------+-------+--------+--------+
+    |values1|values2|buckets1|buckets2|
+    +-------+-------+--------+--------+
+    |0.1    |0.0    |0.0     |0.0     |
+    |0.4    |1.0    |0.0     |1.0     |
+    |1.2    |1.3    |1.0     |1.0     |
+    |1.5    |NaN    |2.0     |2.0     |
+    |NaN    |1.0    |3.0     |1.0     |
+    |NaN    |0.0    |3.0     |0.0     |
+    +-------+-------+--------+--------+
+    ...
 
     .. versionadded:: 1.4.0
     """
@@ -374,14 +515,30 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasHandleInvalid,
     handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries "
                           "containing NaN values. Values outside the splits will always be treated "
                           "as errors. Options are 'skip' (filter out rows with invalid values), " +
-                          "'error' (throw an error), or 'keep' (keep invalid values in a special " +
-                          "additional bucket).",
+                          "'error' (throw an error), or 'keep' (keep invalid values in a " +
+                          "special additional bucket). Note that in the multiple column " +
+                          "case, the invalid handling is applied to all columns. That said " +
+                          "for 'error' it will throw an error if any invalids are found in " +
+                          "any column, for 'skip' it will skip rows with any invalids in " +
+                          "any columns, etc.",
                           typeConverter=TypeConverters.toString)
 
+    splitsArray = Param(Params._dummy(), "splitsArray", "The array of split points for mapping " +
+                        "continuous features into buckets for multiple columns. For each input " +
+                        "column, with n+1 splits, there are n buckets. A bucket defined by " +
+                        "splits x,y holds values in the range [x,y) except the last bucket, " +
+                        "which also includes y. The splits should be of length >= 3 and " +
+                        "strictly increasing. Values at -inf, inf must be explicitly provided " +
+                        "to cover all Double values; otherwise, values outside the splits " +
+                        "specified will be treated as errors.",
+                        typeConverter=TypeConverters.toListListFloat)
+
     @keyword_only
-    def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error"):
+    def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error",
+                 splitsArray=None, inputCols=None, outputCols=None):
         """
-        __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
+        __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \
+                 splitsArray=None, inputCols=None, outputCols=None)
         """
         super(Bucketizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid)
@@ -391,9 +548,11 @@ def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="er
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error"):
+    def setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error",
+                  splitsArray=None, inputCols=None, outputCols=None):
         """
-        setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
+        setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \
+                  splitsArray=None, inputCols=None, outputCols=None)
         Sets params for this Bucketizer.
         """
         kwargs = self._input_kwargs
@@ -413,10 +572,56 @@ def getSplits(self):
         """
         return self.getOrDefault(self.splits)
 
+    @since("3.0.0")
+    def setSplitsArray(self, value):
+        """
+        Sets the value of :py:attr:`splitsArray`.
+        """
+        return self._set(splitsArray=value)
+
+    @since("3.0.0")
+    def getSplitsArray(self):
+        """
+        Gets the array of split points or its default value.
+        """
+        return self.getOrDefault(self.splitsArray)
+
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("3.0.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
 
 class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol):
     """
-    Params for :py:attr:`CountVectorizer` and :py:attr:`CountVectorizerModel`.
+    Params for :py:class:`CountVectorizer` and :py:class:`CountVectorizerModel`.
     """
 
     minTF = Param(
@@ -498,8 +703,14 @@ class CountVectorizer(JavaEstimator, _CountVectorizerParams, JavaMLReadable, Jav
     >>> df = spark.createDataFrame(
     ...    [(0, ["a", "b", "c"]), (1, ["a", "b", "b", "c", "a"])],
     ...    ["label", "raw"])
-    >>> cv = CountVectorizer(inputCol="raw", outputCol="vectors")
+    >>> cv = CountVectorizer()
+    >>> cv.setInputCol("raw")
+    CountVectorizer...
+    >>> cv.setOutputCol("vectors")
+    CountVectorizer...
     >>> model = cv.fit(df)
+    >>> model.setInputCol("raw")
+    CountVectorizerModel...
     >>> model.transform(df).show(truncate=False)
     +-----+---------------+-------------------------+
     |label|raw            |vectors                  |
@@ -598,6 +809,18 @@ def setBinary(self, value):
         """
         return self._set(binary=value)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     def _create_model(self, java_model):
         return CountVectorizerModel(java_model)
 
@@ -610,6 +833,20 @@ class CountVectorizerModel(JavaModel, _CountVectorizerParams, JavaMLReadable, Ja
     .. versionadded:: 1.6.0
     """
 
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @classmethod
     @since("2.4.0")
     def from_vocabulary(cls, vocabulary, inputCol, outputCol=None, minTF=None, binary=None):
@@ -669,7 +906,13 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df1 = spark.createDataFrame([(Vectors.dense([5.0, 8.0, 6.0]),)], ["vec"])
-    >>> dct = DCT(inverse=False, inputCol="vec", outputCol="resultVec")
+    >>> dct = DCT( )
+    >>> dct.setInverse(False)
+    DCT...
+    >>> dct.setInputCol("vec")
+    DCT...
+    >>> dct.setOutputCol("resultVec")
+    DCT...
     >>> df2 = dct.transform(df1)
     >>> df2.head().resultVec
     DenseVector([10.969..., -0.707..., -2.041...])
@@ -723,6 +966,18 @@ def getInverse(self):
         """
         return self.getOrDefault(self.inverse)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
 
 @inherit_doc
 class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
@@ -734,8 +989,13 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([2.0, 1.0, 3.0]),)], ["values"])
-    >>> ep = ElementwiseProduct(scalingVec=Vectors.dense([1.0, 2.0, 3.0]),
-    ...     inputCol="values", outputCol="eprod")
+    >>> ep = ElementwiseProduct()
+    >>> ep.setScalingVec(Vectors.dense([1.0, 2.0, 3.0]))
+    ElementwiseProduct...
+    >>> ep.setInputCol("values")
+    ElementwiseProduct...
+    >>> ep.setOutputCol("eprod")
+    ElementwiseProduct...
     >>> ep.transform(df).head().eprod
     DenseVector([2.0, 2.0, 9.0])
     >>> ep.setParams(scalingVec=Vectors.dense([2.0, 3.0, 5.0])).transform(df).head().eprod
@@ -787,6 +1047,18 @@ def getScalingVec(self):
         """
         return self.getOrDefault(self.scalingVec)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
 
 @inherit_doc
 class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures, JavaMLReadable,
@@ -826,7 +1098,11 @@ class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures,
     >>> data = [(2.0, True, "1", "foo"), (3.0, False, "2", "bar")]
     >>> cols = ["real", "bool", "stringNum", "string"]
     >>> df = spark.createDataFrame(data, cols)
-    >>> hasher = FeatureHasher(inputCols=cols, outputCol="features")
+    >>> hasher = FeatureHasher()
+    >>> hasher.setInputCols(cols)
+    FeatureHasher...
+    >>> hasher.setOutputCol("features")
+    FeatureHasher...
     >>> hasher.transform(df).head().features
     SparseVector(262144, {174475: 2.0, 247670: 1.0, 257907: 1.0, 262126: 1.0})
     >>> hasher.setCategoricalCols(["real"]).transform(df).head().features
@@ -881,6 +1157,24 @@ def getCategoricalCols(self):
         """
         return self.getOrDefault(self.categoricalCols)
 
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    def setNumFeatures(self, value):
+        """
+        Sets the value of :py:attr:`numFeatures`.
+        """
+        return self._set(numFeatures=value)
+
 
 @inherit_doc
 class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, JavaMLReadable,
@@ -894,7 +1188,9 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java
     otherwise the features will not be mapped evenly to the columns.
 
     >>> df = spark.createDataFrame([(["a", "b", "c"],)], ["words"])
-    >>> hashingTF = HashingTF(numFeatures=10, inputCol="words", outputCol="features")
+    >>> hashingTF = HashingTF(inputCol="words", outputCol="features")
+    >>> hashingTF.setNumFeatures(10)
+    HashingTF...
     >>> hashingTF.transform(df).head().features
     SparseVector(10, {5: 1.0, 7: 1.0, 8: 1.0})
     >>> hashingTF.setParams(outputCol="freqs").transform(df).head().freqs
@@ -953,6 +1249,24 @@ def getBinary(self):
         """
         return self.getOrDefault(self.binary)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    def setNumFeatures(self, value):
+        """
+        Sets the value of :py:attr:`numFeatures`.
+        """
+        return self._set(numFeatures=value)
+
     @since("3.0.0")
     def indexOf(self, term):
         """
@@ -962,16 +1276,43 @@ def indexOf(self, term):
         return self._java_obj.indexOf(term)
 
 
+class _IDFParams(HasInputCol, HasOutputCol):
+    """
+    Params for :py:class:`IDF` and :py:class:`IDFModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    minDocFreq = Param(Params._dummy(), "minDocFreq",
+                       "minimum number of documents in which a term should appear for filtering",
+                       typeConverter=TypeConverters.toInt)
+
+    @since("1.4.0")
+    def getMinDocFreq(self):
+        """
+        Gets the value of minDocFreq or its default value.
+        """
+        return self.getOrDefault(self.minDocFreq)
+
+
 @inherit_doc
-class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class IDF(JavaEstimator, _IDFParams, JavaMLReadable, JavaMLWritable):
     """
     Compute the Inverse Document Frequency (IDF) given a collection of documents.
 
     >>> from pyspark.ml.linalg import DenseVector
     >>> df = spark.createDataFrame([(DenseVector([1.0, 2.0]),),
     ...     (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"])
-    >>> idf = IDF(minDocFreq=3, inputCol="tf", outputCol="idf")
+    >>> idf = IDF(minDocFreq=3)
+    >>> idf.setInputCol("tf")
+    IDF...
+    >>> idf.setOutputCol("idf")
+    IDF...
     >>> model = idf.fit(df)
+    >>> model.setOutputCol("idf")
+    IDFModel...
+    >>> model.getMinDocFreq()
+    3
     >>> model.idf
     DenseVector([0.0, 0.0])
     >>> model.docFreq
@@ -999,10 +1340,6 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
     .. versionadded:: 1.4.0
     """
 
-    minDocFreq = Param(Params._dummy(), "minDocFreq",
-                       "minimum number of documents in which a term should appear for filtering",
-                       typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, minDocFreq=0, inputCol=None, outputCol=None):
         """
@@ -1031,24 +1368,43 @@ def setMinDocFreq(self, value):
         """
         return self._set(minDocFreq=value)
 
-    @since("1.4.0")
-    def getMinDocFreq(self):
+    def setInputCol(self, value):
         """
-        Gets the value of minDocFreq or its default value.
+        Sets the value of :py:attr:`inputCol`.
         """
-        return self.getOrDefault(self.minDocFreq)
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
 
     def _create_model(self, java_model):
         return IDFModel(java_model)
 
 
-class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class IDFModel(JavaModel, _IDFParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`IDF`.
 
     .. versionadded:: 1.4.0
     """
 
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @property
     @since("2.0.0")
     def idf(self):
@@ -1074,8 +1430,40 @@ def numDocs(self):
         return self._call_java("numDocs")
 
 
+class _ImputerParams(HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, HasRelativeError):
+    """
+    Params for :py:class:`Imputer` and :py:class:`ImputerModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    strategy = Param(Params._dummy(), "strategy",
+                     "strategy for imputation. If mean, then replace missing values using the mean "
+                     "value of the feature. If median, then replace missing values using the "
+                     "median value of the feature.",
+                     typeConverter=TypeConverters.toString)
+
+    missingValue = Param(Params._dummy(), "missingValue",
+                         "The placeholder for the missing values. All occurrences of missingValue "
+                         "will be imputed.", typeConverter=TypeConverters.toFloat)
+
+    @since("2.2.0")
+    def getStrategy(self):
+        """
+        Gets the value of :py:attr:`strategy` or its default value.
+        """
+        return self.getOrDefault(self.strategy)
+
+    @since("2.2.0")
+    def getMissingValue(self):
+        """
+        Gets the value of :py:attr:`missingValue` or its default value.
+        """
+        return self.getOrDefault(self.missingValue)
+
+
 @inherit_doc
-class Imputer(JavaEstimator, HasInputCols, JavaMLReadable, JavaMLWritable):
+class Imputer(JavaEstimator, _ImputerParams, JavaMLReadable, JavaMLWritable):
     """
     Imputation estimator for completing missing values, either using the mean or the median
     of the columns in which the missing values are located. The input columns should be of
@@ -1089,8 +1477,18 @@ class Imputer(JavaEstimator, HasInputCols, JavaMLReadable, JavaMLWritable):
 
     >>> df = spark.createDataFrame([(1.0, float("nan")), (2.0, float("nan")), (float("nan"), 3.0),
     ...                             (4.0, 4.0), (5.0, 5.0)], ["a", "b"])
-    >>> imputer = Imputer(inputCols=["a", "b"], outputCols=["out_a", "out_b"])
+    >>> imputer = Imputer()
+    >>> imputer.setInputCols(["a", "b"])
+    Imputer...
+    >>> imputer.setOutputCols(["out_a", "out_b"])
+    Imputer...
+    >>> imputer.getRelativeError()
+    0.001
     >>> model = imputer.fit(df)
+    >>> model.setInputCols(["a", "b"])
+    ImputerModel...
+    >>> model.getStrategy()
+    'mean'
     >>> model.surrogateDF.show()
     +---+---+
     |  a|  b|
@@ -1112,6 +1510,55 @@ class Imputer(JavaEstimator, HasInputCols, JavaMLReadable, JavaMLWritable):
     +---+---+-----+-----+
     |1.0|NaN|  4.0|  NaN|
     ...
+    >>> df1 = spark.createDataFrame([(1.0,), (2.0,), (float("nan"),), (4.0,), (5.0,)], ["a"])
+    >>> imputer1 = Imputer(inputCol="a", outputCol="out_a")
+    >>> model1 = imputer1.fit(df1)
+    >>> model1.surrogateDF.show()
+    +---+
+    |  a|
+    +---+
+    |3.0|
+    +---+
+    ...
+    >>> model1.transform(df1).show()
+    +---+-----+
+    |  a|out_a|
+    +---+-----+
+    |1.0|  1.0|
+    |2.0|  2.0|
+    |NaN|  3.0|
+    ...
+    >>> imputer1.setStrategy("median").setMissingValue(1.0).fit(df1).transform(df1).show()
+    +---+-----+
+    |  a|out_a|
+    +---+-----+
+    |1.0|  4.0|
+    ...
+    >>> df2 = spark.createDataFrame([(float("nan"),), (float("nan"),), (3.0,), (4.0,), (5.0,)],
+    ...                             ["b"])
+    >>> imputer2 = Imputer(inputCol="b", outputCol="out_b")
+    >>> model2 = imputer2.fit(df2)
+    >>> model2.surrogateDF.show()
+    +---+
+    |  b|
+    +---+
+    |4.0|
+    +---+
+    ...
+    >>> model2.transform(df2).show()
+    +---+-----+
+    |  b|out_b|
+    +---+-----+
+    |NaN|  4.0|
+    |NaN|  4.0|
+    |3.0|  3.0|
+    ...
+    >>> imputer2.setStrategy("median").setMissingValue(1.0).fit(df2).transform(df2).show()
+    +---+-----+
+    |  b|out_b|
+    +---+-----+
+    |NaN|  NaN|
+    ...
     >>> imputerPath = temp_path + "/imputer"
     >>> imputer.save(imputerPath)
     >>> loadedImputer = Imputer.load(imputerPath)
@@ -1128,58 +1575,31 @@ class Imputer(JavaEstimator, HasInputCols, JavaMLReadable, JavaMLWritable):
     .. versionadded:: 2.2.0
     """
 
-    outputCols = Param(Params._dummy(), "outputCols",
-                       "output column names.", typeConverter=TypeConverters.toListString)
-
-    strategy = Param(Params._dummy(), "strategy",
-                     "strategy for imputation. If mean, then replace missing values using the mean "
-                     "value of the feature. If median, then replace missing values using the "
-                     "median value of the feature.",
-                     typeConverter=TypeConverters.toString)
-
-    missingValue = Param(Params._dummy(), "missingValue",
-                         "The placeholder for the missing values. All occurrences of missingValue "
-                         "will be imputed.", typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, strategy="mean", missingValue=float("nan"), inputCols=None,
-                 outputCols=None):
+                 outputCols=None, inputCol=None, outputCol=None, relativeError=0.001):
         """
         __init__(self, strategy="mean", missingValue=float("nan"), inputCols=None, \
-                 outputCols=None):
+                 outputCols=None, inputCol=None, outputCol=None, relativeError=0.001):
         """
         super(Imputer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Imputer", self.uid)
-        self._setDefault(strategy="mean", missingValue=float("nan"))
+        self._setDefault(strategy="mean", missingValue=float("nan"), relativeError=0.001)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("2.2.0")
     def setParams(self, strategy="mean", missingValue=float("nan"), inputCols=None,
-                  outputCols=None):
+                  outputCols=None, inputCol=None, outputCol=None, relativeError=0.001):
         """
         setParams(self, strategy="mean", missingValue=float("nan"), inputCols=None, \
-                  outputCols=None)
+                  outputCols=None, inputCol=None, outputCol=None, relativeError=0.001)
         Sets params for this Imputer.
         """
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
-    @since("2.2.0")
-    def setOutputCols(self, value):
-        """
-        Sets the value of :py:attr:`outputCols`.
-        """
-        return self._set(outputCols=value)
-
-    @since("2.2.0")
-    def getOutputCols(self):
-        """
-        Gets the value of :py:attr:`outputCols` or its default value.
-        """
-        return self.getOrDefault(self.outputCols)
-
     @since("2.2.0")
     def setStrategy(self, value):
         """
@@ -1187,13 +1607,6 @@ def setStrategy(self, value):
         """
         return self._set(strategy=value)
 
-    @since("2.2.0")
-    def getStrategy(self):
-        """
-        Gets the value of :py:attr:`strategy` or its default value.
-        """
-        return self.getOrDefault(self.strategy)
-
     @since("2.2.0")
     def setMissingValue(self, value):
         """
@@ -1202,23 +1615,79 @@ def setMissingValue(self, value):
         return self._set(missingValue=value)
 
     @since("2.2.0")
-    def getMissingValue(self):
+    def setInputCols(self, value):
         """
-        Gets the value of :py:attr:`missingValue` or its default value.
+        Sets the value of :py:attr:`inputCols`.
         """
-        return self.getOrDefault(self.missingValue)
+        return self._set(inputCols=value)
+
+    @since("2.2.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("3.0.0")
+    def setRelativeError(self, value):
+        """
+        Sets the value of :py:attr:`relativeError`.
+        """
+        return self._set(relativeError=value)
 
     def _create_model(self, java_model):
         return ImputerModel(java_model)
 
 
-class ImputerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class ImputerModel(JavaModel, _ImputerParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`Imputer`.
 
     .. versionadded:: 2.2.0
     """
 
+    @since("3.0.0")
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    @since("3.0.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @property
     @since("2.2.0")
     def surrogateDF(self):
@@ -1242,7 +1711,11 @@ class Interaction(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, J
     with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`.
 
     >>> df = spark.createDataFrame([(0.0, 1.0), (2.0, 3.0)], ["a", "b"])
-    >>> interaction = Interaction(inputCols=["a", "b"], outputCol="ab")
+    >>> interaction = Interaction()
+    >>> interaction.setInputCols(["a", "b"])
+    Interaction...
+    >>> interaction.setOutputCol("ab")
+    Interaction...
     >>> interaction.transform(df).show()
     +---+---+-----+
     |  a|  b|   ab|
@@ -1281,9 +1754,32 @@ def setParams(self, inputCols=None, outputCol=None):
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    @since("3.0.0")
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+
+class _MaxAbsScalerParams(HasInputCol, HasOutputCol):
+    """
+    Params for :py:class:`MaxAbsScaler` and :py:class:`MaxAbsScalerModel`.
+
+    .. versionadded:: 3.0.0
+    """
+    pass
+
 
 @inherit_doc
-class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class MaxAbsScaler(JavaEstimator, _MaxAbsScalerParams, JavaMLReadable, JavaMLWritable):
     """
     Rescale each feature individually to range [-1, 1] by dividing through the largest maximum
     absolute value in each feature. It does not shift/center the data, and thus does not destroy
@@ -1291,15 +1787,19 @@ class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([1.0]),), (Vectors.dense([2.0]),)], ["a"])
-    >>> maScaler = MaxAbsScaler(inputCol="a", outputCol="scaled")
+    >>> maScaler = MaxAbsScaler(outputCol="scaled")
+    >>> maScaler.setInputCol("a")
+    MaxAbsScaler...
     >>> model = maScaler.fit(df)
+    >>> model.setOutputCol("scaledOutput")
+    MaxAbsScalerModel...
     >>> model.transform(df).show()
-    +-----+------+
-    |    a|scaled|
-    +-----+------+
-    |[1.0]| [0.5]|
-    |[2.0]| [1.0]|
-    +-----+------+
+    +-----+------------+
+    |    a|scaledOutput|
+    +-----+------------+
+    |[1.0]|       [0.5]|
+    |[2.0]|       [1.0]|
+    +-----+------------+
     ...
     >>> scalerPath = temp_path + "/max-abs-scaler"
     >>> maScaler.save(scalerPath)
@@ -1338,17 +1838,43 @@ def setParams(self, inputCol=None, outputCol=None):
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     def _create_model(self, java_model):
         return MaxAbsScalerModel(java_model)
 
 
-class MaxAbsScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class MaxAbsScalerModel(JavaModel, _MaxAbsScalerParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`MaxAbsScaler`.
 
     .. versionadded:: 2.0.0
     """
 
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @property
     @since("2.0.0")
     def maxAbs(self):
@@ -1359,8 +1885,7 @@ def maxAbs(self):
 
 
 @inherit_doc
-class MinHashLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed,
-                 JavaMLReadable, JavaMLWritable):
+class MinHashLSH(_LSH, HasInputCol, HasOutputCol, HasSeed, JavaMLReadable, JavaMLWritable):
 
     """
     LSH class for Jaccard distance.
@@ -1377,8 +1902,16 @@ class MinHashLSH(JavaEstimator, LSHParams, HasInputCol, HasOutputCol, HasSeed,
     ...         (1, Vectors.sparse(6, [2, 3, 4], [1.0, 1.0, 1.0]),),
     ...         (2, Vectors.sparse(6, [0, 2, 4], [1.0, 1.0, 1.0]),)]
     >>> df = spark.createDataFrame(data, ["id", "features"])
-    >>> mh = MinHashLSH(inputCol="features", outputCol="hashes", seed=12345)
+    >>> mh = MinHashLSH()
+    >>> mh.setInputCol("features")
+    MinHashLSH...
+    >>> mh.setOutputCol("hashes")
+    MinHashLSH...
+    >>> mh.setSeed(12345)
+    MinHashLSH...
     >>> model = mh.fit(df)
+    >>> model.setInputCol("features")
+    MinHashLSHModel...
     >>> model.transform(df).head()
     Row(id=0, features=SparseVector(6, {0: 1.0, 1: 1.0, 2: 1.0}), hashes=[DenseVector([6179668...
     >>> data2 = [(3, Vectors.sparse(6, [1, 3, 5], [1.0, 1.0, 1.0]),),
@@ -1434,11 +1967,17 @@ def setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1):
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
     def _create_model(self, java_model):
         return MinHashLSHModel(java_model)
 
 
-class MinHashLSHModel(LSHModel, JavaMLReadable, JavaMLWritable):
+class MinHashLSHModel(_LSHModel, JavaMLReadable, JavaMLWritable):
     r"""
     Model produced by :py:class:`MinHashLSH`, where where multiple hash functions are stored. Each
     hash function is picked from the following family of hash functions, where :math:`a_i` and
@@ -1453,8 +1992,35 @@ class MinHashLSHModel(LSHModel, JavaMLReadable, JavaMLWritable):
     """
 
 
+class _MinMaxScalerParams(HasInputCol, HasOutputCol):
+    """
+    Params for :py:class:`MinMaxScaler` and :py:class:`MinMaxScalerModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    min = Param(Params._dummy(), "min", "Lower bound of the output feature range",
+                typeConverter=TypeConverters.toFloat)
+    max = Param(Params._dummy(), "max", "Upper bound of the output feature range",
+                typeConverter=TypeConverters.toFloat)
+
+    @since("1.6.0")
+    def getMin(self):
+        """
+        Gets the value of min or its default value.
+        """
+        return self.getOrDefault(self.min)
+
+    @since("1.6.0")
+    def getMax(self):
+        """
+        Gets the value of max or its default value.
+        """
+        return self.getOrDefault(self.max)
+
+
 @inherit_doc
-class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class MinMaxScaler(JavaEstimator, _MinMaxScalerParams, JavaMLReadable, JavaMLWritable):
     """
     Rescale each feature individually to a common range [min, max] linearly using column summary
     statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
@@ -1469,19 +2035,23 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
-    >>> mmScaler = MinMaxScaler(inputCol="a", outputCol="scaled")
+    >>> mmScaler = MinMaxScaler(outputCol="scaled")
+    >>> mmScaler.setInputCol("a")
+    MinMaxScaler...
     >>> model = mmScaler.fit(df)
+    >>> model.setOutputCol("scaledOutput")
+    MinMaxScalerModel...
     >>> model.originalMin
     DenseVector([0.0])
     >>> model.originalMax
     DenseVector([2.0])
     >>> model.transform(df).show()
-    +-----+------+
-    |    a|scaled|
-    +-----+------+
-    |[0.0]| [0.0]|
-    |[2.0]| [1.0]|
-    +-----+------+
+    +-----+------------+
+    |    a|scaledOutput|
+    +-----+------------+
+    |[0.0]|       [0.0]|
+    |[2.0]|       [1.0]|
+    +-----+------------+
     ...
     >>> minMaxScalerPath = temp_path + "/min-max-scaler"
     >>> mmScaler.save(minMaxScalerPath)
@@ -1501,11 +2071,6 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     .. versionadded:: 1.6.0
     """
 
-    min = Param(Params._dummy(), "min", "Lower bound of the output feature range",
-                typeConverter=TypeConverters.toFloat)
-    max = Param(Params._dummy(), "max", "Upper bound of the output feature range",
-                typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
         """
@@ -1534,13 +2099,6 @@ def setMin(self, value):
         """
         return self._set(min=value)
 
-    @since("1.6.0")
-    def getMin(self):
-        """
-        Gets the value of min or its default value.
-        """
-        return self.getOrDefault(self.min)
-
     @since("1.6.0")
     def setMax(self, value):
         """
@@ -1548,24 +2106,57 @@ def setMax(self, value):
         """
         return self._set(max=value)
 
-    @since("1.6.0")
-    def getMax(self):
+    def setInputCol(self, value):
         """
-        Gets the value of max or its default value.
+        Sets the value of :py:attr:`inputCol`.
         """
-        return self.getOrDefault(self.max)
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
 
     def _create_model(self, java_model):
         return MinMaxScalerModel(java_model)
 
 
-class MinMaxScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class MinMaxScalerModel(JavaModel, _MinMaxScalerParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`MinMaxScaler`.
 
     .. versionadded:: 1.6.0
     """
 
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("3.0.0")
+    def setMin(self, value):
+        """
+        Sets the value of :py:attr:`min`.
+        """
+        return self._set(min=value)
+
+    @since("3.0.0")
+    def setMax(self, value):
+        """
+        Sets the value of :py:attr:`max`.
+        """
+        return self._set(max=value)
+
     @property
     @since("2.0.0")
     def originalMin(self):
@@ -1596,7 +2187,11 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr
     returned.
 
     >>> df = spark.createDataFrame([Row(inputTokens=["a", "b", "c", "d", "e"])])
-    >>> ngram = NGram(n=2, inputCol="inputTokens", outputCol="nGrams")
+    >>> ngram = NGram(n=2)
+    >>> ngram.setInputCol("inputTokens")
+    NGram...
+    >>> ngram.setOutputCol("nGrams")
+    NGram...
     >>> ngram.transform(df).head()
     Row(inputTokens=[u'a', u'b', u'c', u'd', u'e'], nGrams=[u'a b', u'b c', u'c d', u'd e'])
     >>> # Change n-gram length
@@ -1659,6 +2254,18 @@ def getN(self):
         """
         return self.getOrDefault(self.n)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
 
 @inherit_doc
 class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
@@ -1668,7 +2275,11 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     >>> from pyspark.ml.linalg import Vectors
     >>> svec = Vectors.sparse(4, {1: 4.0, 3: 3.0})
     >>> df = spark.createDataFrame([(Vectors.dense([3.0, -4.0]), svec)], ["dense", "sparse"])
-    >>> normalizer = Normalizer(p=2.0, inputCol="dense", outputCol="features")
+    >>> normalizer = Normalizer(p=2.0)
+    >>> normalizer.setInputCol("dense")
+    Normalizer...
+    >>> normalizer.setOutputCol("features")
+    Normalizer...
     >>> normalizer.transform(df).head().features
     DenseVector([0.6, -0.8])
     >>> normalizer.setParams(inputCol="sparse", outputCol="freqs").transform(df).head().freqs
@@ -1723,10 +2334,47 @@ def getP(self):
         """
         return self.getOrDefault(self.p)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+
+class _OneHotEncoderParams(HasInputCol, HasInputCols, HasOutputCol, HasOutputCols,
+                           HasHandleInvalid):
+    """
+    Params for :py:class:`OneHotEncoder` and :py:class:`OneHotEncoderModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data during " +
+                          "transform(). Options are 'keep' (invalid data presented as an extra " +
+                          "categorical feature) or error (throw an error). Note that this Param " +
+                          "is only used during transform; during fitting, invalid data will " +
+                          "result in an error.",
+                          typeConverter=TypeConverters.toString)
+
+    dropLast = Param(Params._dummy(), "dropLast", "whether to drop the last category",
+                     typeConverter=TypeConverters.toBoolean)
+
+    @since("2.3.0")
+    def getDropLast(self):
+        """
+        Gets the value of dropLast or its default value.
+        """
+        return self.getOrDefault(self.dropLast)
+
 
 @inherit_doc
-class OneHotEncoder(JavaEstimator, HasInputCols, HasOutputCols, HasHandleInvalid,
-                    JavaMLReadable, JavaMLWritable):
+class OneHotEncoder(JavaEstimator, _OneHotEncoderParams, JavaMLReadable, JavaMLWritable):
     """
     A one-hot encoder that maps a column of category indices to a column of binary vectors, with
     at most a single one-value per row that indicates the input category index.
@@ -1751,10 +2399,22 @@ class OneHotEncoder(JavaEstimator, HasInputCols, HasOutputCols, HasHandleInvalid
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(0.0,), (1.0,), (2.0,)], ["input"])
-    >>> ohe = OneHotEncoder(inputCols=["input"], outputCols=["output"])
+    >>> ohe = OneHotEncoder()
+    >>> ohe.setInputCols(["input"])
+    OneHotEncoder...
+    >>> ohe.setOutputCols(["output"])
+    OneHotEncoder...
     >>> model = ohe.fit(df)
+    >>> model.setOutputCols(["output"])
+    OneHotEncoderModel...
+    >>> model.getHandleInvalid()
+    'error'
     >>> model.transform(df).head().output
     SparseVector(2, {0: 1.0})
+    >>> single_col_ohe = OneHotEncoder(inputCol="input", outputCol="output")
+    >>> single_col_model = single_col_ohe.fit(df)
+    >>> single_col_model.transform(df).head().output
+    SparseVector(2, {0: 1.0})
     >>> ohePath = temp_path + "/ohe"
     >>> ohe.save(ohePath)
     >>> loadedOHE = OneHotEncoder.load(ohePath)
@@ -1769,20 +2429,12 @@ class OneHotEncoder(JavaEstimator, HasInputCols, HasOutputCols, HasHandleInvalid
     .. versionadded:: 2.3.0
     """
 
-    handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data during " +
-                          "transform(). Options are 'keep' (invalid data presented as an extra " +
-                          "categorical feature) or error (throw an error). Note that this Param " +
-                          "is only used during transform; during fitting, invalid data will " +
-                          "result in an error.",
-                          typeConverter=TypeConverters.toString)
-
-    dropLast = Param(Params._dummy(), "dropLast", "whether to drop the last category",
-                     typeConverter=TypeConverters.toBoolean)
-
     @keyword_only
-    def __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True):
+    def __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True,
+                 inputCol=None, outputCol=None):
         """
-        __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True)
+        __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True, \
+                 inputCol=None, outputCol=None)
         """
         super(OneHotEncoder, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1793,9 +2445,11 @@ def __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropL
 
     @keyword_only
     @since("2.3.0")
-    def setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True):
+    def setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True,
+                  inputCol=None, outputCol=None):
         """
-        setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True)
+        setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True, \
+                  inputCol=None, outputCol=None)
         Sets params for this OneHotEncoder.
         """
         kwargs = self._input_kwargs
@@ -1808,24 +2462,94 @@ def setDropLast(self, value):
         """
         return self._set(dropLast=value)
 
-    @since("2.3.0")
-    def getDropLast(self):
+    @since("3.0.0")
+    def setInputCols(self, value):
         """
-        Gets the value of dropLast or its default value.
+        Sets the value of :py:attr:`inputCols`.
         """
-        return self.getOrDefault(self.dropLast)
+        return self._set(inputCols=value)
+
+    @since("3.0.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    @since("3.0.0")
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
 
     def _create_model(self, java_model):
         return OneHotEncoderModel(java_model)
 
 
-class OneHotEncoderModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class OneHotEncoderModel(JavaModel, _OneHotEncoderParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`OneHotEncoder`.
 
     .. versionadded:: 2.3.0
     """
 
+    @since("3.0.0")
+    def setDropLast(self, value):
+        """
+        Sets the value of :py:attr:`dropLast`.
+        """
+        return self._set(dropLast=value)
+
+    @since("3.0.0")
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    @since("3.0.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("3.0.0")
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
     @property
     @since("2.3.0")
     def categorySizes(self):
@@ -1848,7 +2572,11 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLRead
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([0.5, 2.0]),)], ["dense"])
-    >>> px = PolynomialExpansion(degree=2, inputCol="dense", outputCol="expanded")
+    >>> px = PolynomialExpansion(degree=2)
+    >>> px.setInputCol("dense")
+    PolynomialExpansion...
+    >>> px.setOutputCol("expanded")
+    PolynomialExpansion...
     >>> px.transform(df).head().expanded
     DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])
     >>> px.setParams(outputCol="test").transform(df).head().test
@@ -1901,19 +2629,36 @@ def getDegree(self):
         """
         return self.getOrDefault(self.degree)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
 
 @inherit_doc
-class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
-                          JavaMLReadable, JavaMLWritable):
+class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasInputCols, HasOutputCols,
+                          HasHandleInvalid, HasRelativeError, JavaMLReadable, JavaMLWritable):
     """
-    `QuantileDiscretizer` takes a column with continuous features and outputs a column with binned
-    categorical features. The number of bins can be set using the :py:attr:`numBuckets` parameter.
-    It is possible that the number of buckets used will be less than this value, for example, if
-    there are too few distinct values of the input to create enough distinct quantiles.
+    :py:class:`QuantileDiscretizer` takes a column with continuous features and outputs a column
+    with binned categorical features. The number of bins can be set using the :py:attr:`numBuckets`
+    parameter. It is possible that the number of buckets used will be less than this value, for
+    example, if there are too few distinct values of the input to create enough distinct quantiles.
+    Since 3.0.0, :py:class:`QuantileDiscretizer` can map multiple columns at once by setting the
+    :py:attr:`inputCols` parameter. If both of the :py:attr:`inputCol` and :py:attr:`inputCols`
+    parameters are set, an Exception will be thrown. To specify the number of buckets for each
+    column, the :py:attr:`numBucketsArray` parameter can be set, or if the number of buckets
+    should be the same across columns, :py:attr:`numBuckets` can be set as a convenience.
 
     NaN handling: Note also that
-    QuantileDiscretizer will raise an error when it finds NaN values in the dataset, but the user
-    can also choose to either keep or remove NaN values within the dataset by setting
+    :py:class:`QuantileDiscretizer` will raise an error when it finds NaN values in the dataset,
+    but the user can also choose to either keep or remove NaN values within the dataset by setting
     :py:attr:`handleInvalid` parameter. If the user chooses to keep NaN values, they will be
     handled specially and placed into their own bucket, for example, if 4 buckets are used, then
     non-NaN data will be put into buckets[0-3], but NaNs will be counted in a special bucket[4].
@@ -1925,29 +2670,66 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInv
     The lower and upper bin bounds will be `-Infinity` and `+Infinity`, covering all real values.
 
     >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]
-    >>> df = spark.createDataFrame(values, ["values"])
-    >>> qds = QuantileDiscretizer(numBuckets=2,
-    ...     inputCol="values", outputCol="buckets", relativeError=0.01, handleInvalid="error")
-    >>> qds.getRelativeError()
+    >>> df1 = spark.createDataFrame(values, ["values"])
+    >>> qds1 = QuantileDiscretizer(inputCol="values", outputCol="buckets")
+    >>> qds1.setNumBuckets(2)
+    QuantileDiscretizer...
+    >>> qds1.setRelativeError(0.01)
+    QuantileDiscretizer...
+    >>> qds1.setHandleInvalid("error")
+    QuantileDiscretizer...
+    >>> qds1.getRelativeError()
     0.01
-    >>> bucketizer = qds.fit(df)
-    >>> qds.setHandleInvalid("keep").fit(df).transform(df).count()
+    >>> bucketizer = qds1.fit(df1)
+    >>> qds1.setHandleInvalid("keep").fit(df1).transform(df1).count()
     6
-    >>> qds.setHandleInvalid("skip").fit(df).transform(df).count()
+    >>> qds1.setHandleInvalid("skip").fit(df1).transform(df1).count()
     4
     >>> splits = bucketizer.getSplits()
     >>> splits[0]
     -inf
     >>> print("%2.1f" % round(splits[1], 1))
     0.4
-    >>> bucketed = bucketizer.transform(df).head()
+    >>> bucketed = bucketizer.transform(df1).head()
     >>> bucketed.buckets
     0.0
     >>> quantileDiscretizerPath = temp_path + "/quantile-discretizer"
-    >>> qds.save(quantileDiscretizerPath)
+    >>> qds1.save(quantileDiscretizerPath)
     >>> loadedQds = QuantileDiscretizer.load(quantileDiscretizerPath)
-    >>> loadedQds.getNumBuckets() == qds.getNumBuckets()
+    >>> loadedQds.getNumBuckets() == qds1.getNumBuckets()
     True
+    >>> inputs = [(0.1, 0.0), (0.4, 1.0), (1.2, 1.3), (1.5, 1.5),
+    ...     (float("nan"), float("nan")), (float("nan"), float("nan"))]
+    >>> df2 = spark.createDataFrame(inputs, ["input1", "input2"])
+    >>> qds2 = QuantileDiscretizer(relativeError=0.01, handleInvalid="error", numBuckets=2,
+    ...     inputCols=["input1", "input2"], outputCols=["output1", "output2"])
+    >>> qds2.getRelativeError()
+    0.01
+    >>> qds2.setHandleInvalid("keep").fit(df2).transform(df2).show()
+    +------+------+-------+-------+
+    |input1|input2|output1|output2|
+    +------+------+-------+-------+
+    |   0.1|   0.0|    0.0|    0.0|
+    |   0.4|   1.0|    1.0|    1.0|
+    |   1.2|   1.3|    1.0|    1.0|
+    |   1.5|   1.5|    1.0|    1.0|
+    |   NaN|   NaN|    2.0|    2.0|
+    |   NaN|   NaN|    2.0|    2.0|
+    +------+------+-------+-------+
+    ...
+    >>> qds3 = QuantileDiscretizer(relativeError=0.01, handleInvalid="error",
+    ...      numBucketsArray=[5, 10], inputCols=["input1", "input2"],
+    ...      outputCols=["output1", "output2"])
+    >>> qds3.setHandleInvalid("skip").fit(df2).transform(df2).show()
+    +------+------+-------+-------+
+    |input1|input2|output1|output2|
+    +------+------+-------+-------+
+    |   0.1|   0.0|    1.0|    1.0|
+    |   0.4|   1.0|    2.0|    2.0|
+    |   1.2|   1.3|    3.0|    3.0|
+    |   1.5|   1.5|    4.0|    4.0|
+    +------+------+-------+-------+
+    ...
 
     .. versionadded:: 2.0.0
     """
@@ -1957,23 +2739,29 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInv
                        "categories) into which data points are grouped. Must be >= 2.",
                        typeConverter=TypeConverters.toInt)
 
-    relativeError = Param(Params._dummy(), "relativeError", "The relative target precision for " +
-                          "the approximate quantile algorithm used to generate buckets. " +
-                          "Must be in the range [0, 1].",
-                          typeConverter=TypeConverters.toFloat)
-
     handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
                           "Options are skip (filter out rows with invalid values), " +
                           "error (throw an error), or keep (keep invalid values in a special " +
-                          "additional bucket).",
+                          "additional bucket). Note that in the multiple columns " +
+                          "case, the invalid handling is applied to all columns. That said " +
+                          "for 'error' it will throw an error if any invalids are found in " +
+                          "any columns, for 'skip' it will skip rows with any invalids in " +
+                          "any columns, etc.",
                           typeConverter=TypeConverters.toString)
 
+    numBucketsArray = Param(Params._dummy(), "numBucketsArray", "Array of number of buckets " +
+                            "(quantiles, or categories) into which data points are grouped. " +
+                            "This is for multiple columns input. If transforming multiple " +
+                            "columns and numBucketsArray is not set, but numBuckets is set, " +
+                            "then numBuckets will be applied across all columns.",
+                            typeConverter=TypeConverters.toListInt)
+
     @keyword_only
     def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
-                 handleInvalid="error"):
+                 handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None):
         """
         __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
-                 handleInvalid="error")
+                 handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None)
         """
         super(QuantileDiscretizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer",
@@ -1985,10 +2773,10 @@ def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.
     @keyword_only
     @since("2.0.0")
     def setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
-                  handleInvalid="error"):
+                  handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None):
         """
         setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
-                  handleInvalid="error")
+                  handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None)
         Set the params for the QuantileDiscretizer
         """
         kwargs = self._input_kwargs
@@ -2008,6 +2796,20 @@ def getNumBuckets(self):
         """
         return self.getOrDefault(self.numBuckets)
 
+    @since("3.0.0")
+    def setNumBucketsArray(self, value):
+        """
+        Sets the value of :py:attr:`numBucketsArray`.
+        """
+        return self._set(numBucketsArray=value)
+
+    @since("3.0.0")
+    def getNumBucketsArray(self):
+        """
+        Gets the value of numBucketsArray or its default value.
+        """
+        return self.getOrDefault(self.numBucketsArray)
+
     @since("2.0.0")
     def setRelativeError(self, value):
         """
@@ -2015,25 +2817,102 @@ def setRelativeError(self, value):
         """
         return self._set(relativeError=value)
 
-    @since("2.0.0")
-    def getRelativeError(self):
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("3.0.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
         """
-        Gets the value of relativeError or its default value.
+        return self._set(outputCols=value)
+
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
         """
-        return self.getOrDefault(self.relativeError)
+        return self._set(handleInvalid=value)
 
     def _create_model(self, java_model):
         """
         Private method to convert the java_model to a Python model.
         """
-        return Bucketizer(splits=list(java_model.getSplits()),
-                          inputCol=self.getInputCol(),
-                          outputCol=self.getOutputCol(),
-                          handleInvalid=self.getHandleInvalid())
+        if (self.isSet(self.inputCol)):
+            return Bucketizer(splits=list(java_model.getSplits()),
+                              inputCol=self.getInputCol(),
+                              outputCol=self.getOutputCol(),
+                              handleInvalid=self.getHandleInvalid())
+        else:
+            splitsArrayList = [list(x) for x in list(java_model.getSplitsArray())]
+            return Bucketizer(splitsArray=splitsArrayList,
+                              inputCols=self.getInputCols(),
+                              outputCols=self.getOutputCols(),
+                              handleInvalid=self.getHandleInvalid())
+
+
+class _RobustScalerParams(HasInputCol, HasOutputCol, HasRelativeError):
+    """
+    Params for :py:class:`RobustScaler` and :py:class:`RobustScalerModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    lower = Param(Params._dummy(), "lower", "Lower quantile to calculate quantile range",
+                  typeConverter=TypeConverters.toFloat)
+    upper = Param(Params._dummy(), "upper", "Upper quantile to calculate quantile range",
+                  typeConverter=TypeConverters.toFloat)
+    withCentering = Param(Params._dummy(), "withCentering", "Whether to center data with median",
+                          typeConverter=TypeConverters.toBoolean)
+    withScaling = Param(Params._dummy(), "withScaling", "Whether to scale the data to "
+                        "quantile range", typeConverter=TypeConverters.toBoolean)
+
+    @since("3.0.0")
+    def getLower(self):
+        """
+        Gets the value of lower or its default value.
+        """
+        return self.getOrDefault(self.lower)
+
+    @since("3.0.0")
+    def getUpper(self):
+        """
+        Gets the value of upper or its default value.
+        """
+        return self.getOrDefault(self.upper)
+
+    @since("3.0.0")
+    def getWithCentering(self):
+        """
+        Gets the value of withCentering or its default value.
+        """
+        return self.getOrDefault(self.withCentering)
+
+    @since("3.0.0")
+    def getWithScaling(self):
+        """
+        Gets the value of withScaling or its default value.
+        """
+        return self.getOrDefault(self.withScaling)
 
 
 @inherit_doc
-class RobustScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class RobustScaler(JavaEstimator, _RobustScalerParams, JavaMLReadable, JavaMLWritable):
     """
     RobustScaler removes the median and scales the data according to the quantile range.
     The quantile range is by default IQR (Interquartile Range, quantile range between the
@@ -2041,6 +2920,7 @@ class RobustScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     Centering and scaling happen independently on each feature by computing the relevant
     statistics on the samples in the training set. Median and quantile range are then
     stored to be used on later data using the transform method.
+    Note that NaN values are ignored in the computation of medians and ranges.
 
     >>> from pyspark.ml.linalg import Vectors
     >>> data = [(0, Vectors.dense([0.0, 0.0]),),
@@ -2049,13 +2929,19 @@ class RobustScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     ...         (3, Vectors.dense([3.0, -3.0]),),
     ...         (4, Vectors.dense([4.0, -4.0]),),]
     >>> df = spark.createDataFrame(data, ["id", "features"])
-    >>> scaler = RobustScaler(inputCol="features", outputCol="scaled")
+    >>> scaler = RobustScaler()
+    >>> scaler.setInputCol("features")
+    RobustScaler...
+    >>> scaler.setOutputCol("scaled")
+    RobustScaler...
     >>> model = scaler.fit(df)
+    >>> model.setOutputCol("output")
+    RobustScalerModel...
     >>> model.median
     DenseVector([2.0, -2.0])
     >>> model.range
     DenseVector([2.0, 2.0])
-    >>> model.transform(df).collect()[1].scaled
+    >>> model.transform(df).collect()[1].output
     DenseVector([0.5, -0.5])
     >>> scalerPath = temp_path + "/robust-scaler"
     >>> scaler.save(scalerPath)
@@ -2075,35 +2961,27 @@ class RobustScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     .. versionadded:: 3.0.0
     """
 
-    lower = Param(Params._dummy(), "lower", "Lower quantile to calculate quantile range",
-                  typeConverter=TypeConverters.toFloat)
-    upper = Param(Params._dummy(), "upper", "Upper quantile to calculate quantile range",
-                  typeConverter=TypeConverters.toFloat)
-    withCentering = Param(Params._dummy(), "withCentering", "Whether to center data with median",
-                          typeConverter=TypeConverters.toBoolean)
-    withScaling = Param(Params._dummy(), "withScaling", "Whether to scale the data to "
-                        "quantile range", typeConverter=TypeConverters.toBoolean)
-
     @keyword_only
     def __init__(self, lower=0.25, upper=0.75, withCentering=False, withScaling=True,
-                 inputCol=None, outputCol=None):
+                 inputCol=None, outputCol=None, relativeError=0.001):
         """
         __init__(self, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \
-                 inputCol=None, outputCol=None)
+                 inputCol=None, outputCol=None, relativeError=0.001)
         """
         super(RobustScaler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RobustScaler", self.uid)
-        self._setDefault(lower=0.25, upper=0.75, withCentering=False, withScaling=True)
+        self._setDefault(lower=0.25, upper=0.75, withCentering=False, withScaling=True,
+                         relativeError=0.001)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
     @keyword_only
     @since("3.0.0")
     def setParams(self, lower=0.25, upper=0.75, withCentering=False, withScaling=True,
-                  inputCol=None, outputCol=None):
+                  inputCol=None, outputCol=None, relativeError=0.001):
         """
         setParams(self, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \
-                  inputCol=None, outputCol=None)
+                  inputCol=None, outputCol=None, relativeError=0.001)
         Sets params for this RobustScaler.
         """
         kwargs = self._input_kwargs
@@ -2116,13 +2994,6 @@ def setLower(self, value):
         """
         return self._set(lower=value)
 
-    @since("3.0.0")
-    def getLower(self):
-        """
-        Gets the value of lower or its default value.
-        """
-        return self.getOrDefault(self.lower)
-
     @since("3.0.0")
     def setUpper(self, value):
         """
@@ -2131,51 +3002,65 @@ def setUpper(self, value):
         return self._set(upper=value)
 
     @since("3.0.0")
-    def getUpper(self):
+    def setWithCentering(self, value):
         """
-        Gets the value of upper or its default value.
+        Sets the value of :py:attr:`withCentering`.
         """
-        return self.getOrDefault(self.upper)
+        return self._set(withCentering=value)
 
     @since("3.0.0")
-    def setWithCentering(self, value):
+    def setWithScaling(self, value):
         """
-        Sets the value of :py:attr:`withCentering`.
+        Sets the value of :py:attr:`withScaling`.
         """
-        return self._set(withCentering=value)
+        return self._set(withScaling=value)
 
     @since("3.0.0")
-    def getWithCentering(self):
+    def setInputCol(self, value):
         """
-        Gets the value of withCentering or its default value.
+        Sets the value of :py:attr:`inputCol`.
         """
-        return self.getOrDefault(self.withCentering)
+        return self._set(inputCol=value)
 
     @since("3.0.0")
-    def setWithScaling(self, value):
+    def setOutputCol(self, value):
         """
-        Sets the value of :py:attr:`withScaling`.
+        Sets the value of :py:attr:`outputCol`.
         """
-        return self._set(withScaling=value)
+        return self._set(outputCol=value)
 
     @since("3.0.0")
-    def getWithScaling(self):
+    def setRelativeError(self, value):
         """
-        Gets the value of withScaling or its default value.
+        Sets the value of :py:attr:`relativeError`.
         """
-        return self.getOrDefault(self.withScaling)
+        return self._set(relativeError=value)
 
     def _create_model(self, java_model):
         return RobustScalerModel(java_model)
 
 
-class RobustScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class RobustScalerModel(JavaModel, _RobustScalerParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`RobustScaler`.
 
     .. versionadded:: 3.0.0
     """
 
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @property
     @since("3.0.0")
     def median(self):
@@ -2205,7 +3090,11 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
     It returns an array of strings that can be empty.
 
     >>> df = spark.createDataFrame([("A B  c",)], ["text"])
-    >>> reTokenizer = RegexTokenizer(inputCol="text", outputCol="words")
+    >>> reTokenizer = RegexTokenizer()
+    >>> reTokenizer.setInputCol("text")
+    RegexTokenizer...
+    >>> reTokenizer.setOutputCol("words")
+    RegexTokenizer...
     >>> reTokenizer.transform(df).head()
     Row(text=u'A B  c', words=[u'a', u'b', u'c'])
     >>> # Change a parameter.
@@ -2322,6 +3211,18 @@ def getToLowercase(self):
         """
         return self.getOrDefault(self.toLowercase)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
 
 @inherit_doc
 class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
@@ -2382,8 +3283,35 @@ def getStatement(self):
         return self.getOrDefault(self.statement)
 
 
+class _StandardScalerParams(HasInputCol, HasOutputCol):
+    """
+    Params for :py:class:`StandardScaler` and :py:class:`StandardScalerModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    withMean = Param(Params._dummy(), "withMean", "Center data with mean",
+                     typeConverter=TypeConverters.toBoolean)
+    withStd = Param(Params._dummy(), "withStd", "Scale to unit standard deviation",
+                    typeConverter=TypeConverters.toBoolean)
+
+    @since("1.4.0")
+    def getWithMean(self):
+        """
+        Gets the value of withMean or its default value.
+        """
+        return self.getOrDefault(self.withMean)
+
+    @since("1.4.0")
+    def getWithStd(self):
+        """
+        Gets the value of withStd or its default value.
+        """
+        return self.getOrDefault(self.withStd)
+
+
 @inherit_doc
-class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class StandardScaler(JavaEstimator, _StandardScalerParams, JavaMLReadable, JavaMLWritable):
     """
     Standardizes features by removing the mean and scaling to unit variance using column summary
     statistics on the samples in the training set.
@@ -2394,13 +3322,21 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J
 
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
-    >>> standardScaler = StandardScaler(inputCol="a", outputCol="scaled")
+    >>> standardScaler = StandardScaler()
+    >>> standardScaler.setInputCol("a")
+    StandardScaler...
+    >>> standardScaler.setOutputCol("scaled")
+    StandardScaler...
     >>> model = standardScaler.fit(df)
+    >>> model.getInputCol()
+    'a'
+    >>> model.setOutputCol("output")
+    StandardScalerModel...
     >>> model.mean
     DenseVector([1.0])
     >>> model.std
     DenseVector([1.4142])
-    >>> model.transform(df).collect()[1].scaled
+    >>> model.transform(df).collect()[1].output
     DenseVector([1.4142])
     >>> standardScalerPath = temp_path + "/standard-scaler"
     >>> standardScaler.save(standardScalerPath)
@@ -2420,11 +3356,6 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J
     .. versionadded:: 1.4.0
     """
 
-    withMean = Param(Params._dummy(), "withMean", "Center data with mean",
-                     typeConverter=TypeConverters.toBoolean)
-    withStd = Param(Params._dummy(), "withStd", "Scale to unit standard deviation",
-                    typeConverter=TypeConverters.toBoolean)
-
     @keyword_only
     def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
         """
@@ -2453,13 +3384,6 @@ def setWithMean(self, value):
         """
         return self._set(withMean=value)
 
-    @since("1.4.0")
-    def getWithMean(self):
-        """
-        Gets the value of withMean or its default value.
-        """
-        return self.getOrDefault(self.withMean)
-
     @since("1.4.0")
     def setWithStd(self, value):
         """
@@ -2467,24 +3391,41 @@ def setWithStd(self, value):
         """
         return self._set(withStd=value)
 
-    @since("1.4.0")
-    def getWithStd(self):
+    def setInputCol(self, value):
         """
-        Gets the value of withStd or its default value.
+        Sets the value of :py:attr:`inputCol`.
         """
-        return self.getOrDefault(self.withStd)
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
 
     def _create_model(self, java_model):
         return StandardScalerModel(java_model)
 
 
-class StandardScalerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class StandardScalerModel(JavaModel, _StandardScalerParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`StandardScaler`.
 
     .. versionadded:: 1.4.0
     """
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @property
     @since("2.0.0")
     def std(self):
@@ -2505,7 +3446,7 @@ def mean(self):
 class _StringIndexerParams(JavaParams, HasHandleInvalid, HasInputCol, HasOutputCol,
                            HasInputCols, HasOutputCols):
     """
-    Params for :py:attr:`StringIndexer` and :py:attr:`StringIndexerModel`.
+    Params for :py:class:`StringIndexer` and :py:class:`StringIndexerModel`.
     """
 
     stringOrderType = Param(Params._dummy(), "stringOrderType",
@@ -2545,9 +3486,13 @@ class StringIndexer(JavaEstimator, _StringIndexerParams, JavaMLReadable, JavaMLW
     so the most frequent label gets index 0. The ordering behavior is controlled by
     setting :py:attr:`stringOrderType`. Its default value is 'frequencyDesc'.
 
-    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed", handleInvalid="error",
+    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed",
     ...     stringOrderType="frequencyDesc")
+    >>> stringIndexer.setHandleInvalid("error")
+    StringIndexer...
     >>> model = stringIndexer.fit(stringIndDf)
+    >>> model.setHandleInvalid("error")
+    StringIndexerModel...
     >>> td = model.transform(stringIndDf)
     >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),
     ...     key=lambda x: x[0])
@@ -2646,6 +3591,38 @@ def setStringOrderType(self, value):
         """
         return self._set(stringOrderType=value)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("3.0.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
 
 class StringIndexerModel(JavaModel, _StringIndexerParams, JavaMLReadable, JavaMLWritable):
     """
@@ -2654,6 +3631,39 @@ class StringIndexerModel(JavaModel, _StringIndexerParams, JavaMLReadable, JavaML
     .. versionadded:: 1.4.0
     """
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("3.0.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    @since("2.4.0")
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
     @classmethod
     @since("2.4.0")
     def from_labels(cls, labels, inputCol, outputCol=None, handleInvalid=None):
@@ -2701,13 +3711,6 @@ def labels(self):
         """
         return self._call_java("labels")
 
-    @since("2.4.0")
-    def setHandleInvalid(self, value):
-        """
-        Sets the value of :py:attr:`handleInvalid`.
-        """
-        return self._set(handleInvalid=value)
-
 
 @inherit_doc
 class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
@@ -2761,15 +3764,35 @@ def getLabels(self):
         """
         return self.getOrDefault(self.labels)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
 
-class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols, HasOutputCols,
+                       JavaMLReadable, JavaMLWritable):
     """
     A feature transformer that filters out stop words from input.
+    Since 3.0.0, :py:class:`StopWordsRemover` can filter out multiple columns at once by setting
+    the :py:attr:`inputCols` parameter. Note that when both the :py:attr:`inputCol` and
+    :py:attr:`inputCols` parameters are set, an Exception will be thrown.
 
     .. note:: null values from input array are preserved unless adding null to stopWords explicitly.
 
     >>> df = spark.createDataFrame([(["a", "b", "c"],)], ["text"])
-    >>> remover = StopWordsRemover(inputCol="text", outputCol="words", stopWords=["b"])
+    >>> remover = StopWordsRemover(stopWords=["b"])
+    >>> remover.setInputCol("text")
+    StopWordsRemover...
+    >>> remover.setOutputCol("words")
+    StopWordsRemover...
     >>> remover.transform(df).head().words == ['a', 'c']
     True
     >>> stopWordsRemoverPath = temp_path + "/stopwords-remover"
@@ -2779,6 +3802,17 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadabl
     True
     >>> loadedRemover.getCaseSensitive() == remover.getCaseSensitive()
     True
+    >>> df2 = spark.createDataFrame([(["a", "b", "c"], ["a", "b"])], ["text1", "text2"])
+    >>> remover2 = StopWordsRemover(stopWords=["b"])
+    >>> remover2.setInputCols(["text1", "text2"]).setOutputCols(["words1", "words2"])
+    StopWordsRemover...
+    >>> remover2.transform(df2).show()
+    +---------+------+------+------+
+    |    text1| text2|words1|words2|
+    +---------+------+------+------+
+    |[a, b, c]|[a, b]|[a, c]|   [a]|
+    +---------+------+------+------+
+    ...
 
     .. versionadded:: 1.6.0
     """
@@ -2792,10 +3826,10 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadabl
 
     @keyword_only
     def __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=False,
-                 locale=None):
+                 locale=None, inputCols=None, outputCols=None):
         """
         __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \
-        locale=None)
+                 locale=None, inputCols=None, outputCols=None)
         """
         super(StopWordsRemover, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StopWordsRemover",
@@ -2808,10 +3842,10 @@ def __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=
     @keyword_only
     @since("1.6.0")
     def setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=False,
-                  locale=None):
+                  locale=None, inputCols=None, outputCols=None):
         """
         setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \
-        locale=None)
+                  locale=None, inputCols=None, outputCols=None)
         Sets params for this StopWordRemover.
         """
         kwargs = self._input_kwargs
@@ -2859,12 +3893,38 @@ def getLocale(self):
         """
         return self.getOrDefault(self.locale)
 
-    @staticmethod
-    @since("2.0.0")
-    def loadDefaultStopWords(language):
+    def setInputCol(self, value):
         """
-        Loads the default stop words for the given language.
-        Supported languages: danish, dutch, english, finnish, french, german, hungarian,
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("3.0.0")
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    @since("3.0.0")
+    def setOutputCols(self, value):
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    @staticmethod
+    @since("2.0.0")
+    def loadDefaultStopWords(language):
+        """
+        Loads the default stop words for the given language.
+        Supported languages: danish, dutch, english, finnish, french, german, hungarian,
         italian, norwegian, portuguese, russian, spanish, swedish, turkish
         """
         stopWordsObj = _jvm().org.apache.spark.ml.feature.StopWordsRemover
@@ -2879,7 +3939,9 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
     splits it by white spaces.
 
     >>> df = spark.createDataFrame([("a b c",)], ["text"])
-    >>> tokenizer = Tokenizer(inputCol="text", outputCol="words")
+    >>> tokenizer = Tokenizer(outputCol="words")
+    >>> tokenizer.setInputCol("text")
+    Tokenizer...
     >>> tokenizer.transform(df).head()
     Row(text=u'a b c', words=[u'a', u'b', u'c'])
     >>> # Change a parameter.
@@ -2924,6 +3986,18 @@ def setParams(self, inputCol=None, outputCol=None):
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
 
 @inherit_doc
 class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, HasHandleInvalid, JavaMLReadable,
@@ -2932,7 +4006,9 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, HasHandleInva
     A feature transformer that merges multiple columns into a vector column.
 
     >>> df = spark.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
-    >>> vecAssembler = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features")
+    >>> vecAssembler = VectorAssembler(outputCol="features")
+    >>> vecAssembler.setInputCols(["a", "b", "c"])
+    VectorAssembler...
     >>> vecAssembler.transform(df).head().features
     DenseVector([1.0, 0.0, 3.0])
     >>> vecAssembler.setParams(outputCol="freqs").transform(df).head().freqs
@@ -3000,10 +4076,54 @@ def setParams(self, inputCols=None, outputCol=None, handleInvalid="error"):
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    def setInputCols(self, value):
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
+
+class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid):
+    """
+    Params for :py:class:`VectorIndexer` and :py:class:`VectorIndexerModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    maxCategories = Param(Params._dummy(), "maxCategories",
+                          "Threshold for the number of values a categorical feature can take " +
+                          "(>= 2). If a feature is found to have > maxCategories values, then " +
+                          "it is declared continuous.", typeConverter=TypeConverters.toInt)
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data " +
+                          "(unseen labels or NULL values). Options are 'skip' (filter out " +
+                          "rows with invalid data), 'error' (throw an error), or 'keep' (put " +
+                          "invalid data in a special additional bucket, at index of the number " +
+                          "of categories of the feature).",
+                          typeConverter=TypeConverters.toString)
+
+    @since("1.4.0")
+    def getMaxCategories(self):
+        """
+        Gets the value of maxCategories or its default value.
+        """
+        return self.getOrDefault(self.maxCategories)
+
 
 @inherit_doc
-class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid, JavaMLReadable,
-                    JavaMLWritable):
+class VectorIndexer(JavaEstimator, _VectorIndexerParams, JavaMLReadable, JavaMLWritable):
     """
     Class for indexing categorical feature columns in a dataset of `Vector`.
 
@@ -3042,9 +4162,15 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([-1.0, 0.0]),),
     ...     (Vectors.dense([0.0, 1.0]),), (Vectors.dense([0.0, 2.0]),)], ["a"])
-    >>> indexer = VectorIndexer(maxCategories=2, inputCol="a", outputCol="indexed")
+    >>> indexer = VectorIndexer(maxCategories=2, inputCol="a")
+    >>> indexer.setOutputCol("indexed")
+    VectorIndexer...
     >>> model = indexer.fit(df)
-    >>> model.transform(df).head().indexed
+    >>> indexer.getHandleInvalid()
+    'error'
+    >>> model.setOutputCol("output")
+    VectorIndexerModel...
+    >>> model.transform(df).head().output
     DenseVector([1.0, 0.0])
     >>> model.numFeatures
     2
@@ -3081,18 +4207,6 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
     .. versionadded:: 1.4.0
     """
 
-    maxCategories = Param(Params._dummy(), "maxCategories",
-                          "Threshold for the number of values a categorical feature can take " +
-                          "(>= 2). If a feature is found to have > maxCategories values, then " +
-                          "it is declared continuous.", typeConverter=TypeConverters.toInt)
-
-    handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data " +
-                          "(unseen labels or NULL values). Options are 'skip' (filter out " +
-                          "rows with invalid data), 'error' (throw an error), or 'keep' (put " +
-                          "invalid data in a special additional bucket, at index of the number " +
-                          "of categories of the feature).",
-                          typeConverter=TypeConverters.toString)
-
     @keyword_only
     def __init__(self, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error"):
         """
@@ -3121,18 +4235,29 @@ def setMaxCategories(self, value):
         """
         return self._set(maxCategories=value)
 
-    @since("1.4.0")
-    def getMaxCategories(self):
+    def setInputCol(self, value):
         """
-        Gets the value of maxCategories or its default value.
+        Sets the value of :py:attr:`inputCol`.
         """
-        return self.getOrDefault(self.maxCategories)
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
 
     def _create_model(self, java_model):
         return VectorIndexerModel(java_model)
 
 
-class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class VectorIndexerModel(JavaModel, _VectorIndexerParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`VectorIndexer`.
 
@@ -3149,6 +4274,20 @@ class VectorIndexerModel(JavaModel, JavaMLReadable, JavaMLWritable):
     .. versionadded:: 1.4.0
     """
 
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @property
     @since("1.4.0")
     def numFeatures(self):
@@ -3186,7 +4325,9 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J
     ...     (Vectors.dense([-2.0, 2.3, 0.0, 0.0, 1.0]),),
     ...     (Vectors.dense([0.0, 0.0, 0.0, 0.0, 0.0]),),
     ...     (Vectors.dense([0.6, -1.1, -3.0, 4.5, 3.3]),)], ["features"])
-    >>> vs = VectorSlicer(inputCol="features", outputCol="sliced", indices=[1, 4])
+    >>> vs = VectorSlicer(outputCol="sliced", indices=[1, 4])
+    >>> vs.setInputCol("features")
+    VectorSlicer...
     >>> vs.transform(df).head().sliced
     DenseVector([2.3, 1.0])
     >>> vectorSlicerPath = temp_path + "/vector-slicer"
@@ -3257,11 +4398,83 @@ def getNames(self):
         """
         return self.getOrDefault(self.names)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+
+class _Word2VecParams(HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol):
+    """
+    Params for :py:class:`Word2Vec` and :py:class:`Word2VecModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    vectorSize = Param(Params._dummy(), "vectorSize",
+                       "the dimension of codes after transforming from words",
+                       typeConverter=TypeConverters.toInt)
+    numPartitions = Param(Params._dummy(), "numPartitions",
+                          "number of partitions for sentences of words",
+                          typeConverter=TypeConverters.toInt)
+    minCount = Param(Params._dummy(), "minCount",
+                     "the minimum number of times a token must appear to be included in the " +
+                     "word2vec model's vocabulary", typeConverter=TypeConverters.toInt)
+    windowSize = Param(Params._dummy(), "windowSize",
+                       "the window size (context words from [-window, window]). Default value is 5",
+                       typeConverter=TypeConverters.toInt)
+    maxSentenceLength = Param(Params._dummy(), "maxSentenceLength",
+                              "Maximum length (in words) of each sentence in the input data. " +
+                              "Any sentence longer than this threshold will " +
+                              "be divided into chunks up to the size.",
+                              typeConverter=TypeConverters.toInt)
+
+    @since("1.4.0")
+    def getVectorSize(self):
+        """
+        Gets the value of vectorSize or its default value.
+        """
+        return self.getOrDefault(self.vectorSize)
+
+    @since("1.4.0")
+    def getNumPartitions(self):
+        """
+        Gets the value of numPartitions or its default value.
+        """
+        return self.getOrDefault(self.numPartitions)
+
+    @since("1.4.0")
+    def getMinCount(self):
+        """
+        Gets the value of minCount or its default value.
+        """
+        return self.getOrDefault(self.minCount)
+
+    @since("2.0.0")
+    def getWindowSize(self):
+        """
+        Gets the value of windowSize or its default value.
+        """
+        return self.getOrDefault(self.windowSize)
+
+    @since("2.0.0")
+    def getMaxSentenceLength(self):
+        """
+        Gets the value of maxSentenceLength or its default value.
+        """
+        return self.getOrDefault(self.maxSentenceLength)
+
 
 @inherit_doc
 @ignore_unicode_prefix
-class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol,
-               JavaMLReadable, JavaMLWritable):
+class Word2Vec(JavaEstimator, _Word2VecParams, JavaMLReadable, JavaMLWritable):
     """
     Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
     natural language processing or machine learning process.
@@ -3269,7 +4482,16 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
     >>> sent = ("a b " * 100 + "a c " * 10).split(" ")
     >>> doc = spark.createDataFrame([(sent,), (sent,)], ["sentence"])
     >>> word2Vec = Word2Vec(vectorSize=5, seed=42, inputCol="sentence", outputCol="model")
+    >>> word2Vec.setMaxIter(10)
+    Word2Vec...
+    >>> word2Vec.getMaxIter()
+    10
+    >>> word2Vec.clear(word2Vec.maxIter)
     >>> model = word2Vec.fit(doc)
+    >>> model.getMinCount()
+    5
+    >>> model.setInputCol("sentence")
+    Word2VecModel...
     >>> model.getVectors().show()
     +----+--------------------+
     |word|              vector|
@@ -3312,24 +4534,6 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has
     .. versionadded:: 1.4.0
     """
 
-    vectorSize = Param(Params._dummy(), "vectorSize",
-                       "the dimension of codes after transforming from words",
-                       typeConverter=TypeConverters.toInt)
-    numPartitions = Param(Params._dummy(), "numPartitions",
-                          "number of partitions for sentences of words",
-                          typeConverter=TypeConverters.toInt)
-    minCount = Param(Params._dummy(), "minCount",
-                     "the minimum number of times a token must appear to be included in the " +
-                     "word2vec model's vocabulary", typeConverter=TypeConverters.toInt)
-    windowSize = Param(Params._dummy(), "windowSize",
-                       "the window size (context words from [-window, window]). Default value is 5",
-                       typeConverter=TypeConverters.toInt)
-    maxSentenceLength = Param(Params._dummy(), "maxSentenceLength",
-                              "Maximum length (in words) of each sentence in the input data. " +
-                              "Any sentence longer than this threshold will " +
-                              "be divided into chunks up to the size.",
-                              typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
                  seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000):
@@ -3363,13 +4567,6 @@ def setVectorSize(self, value):
         """
         return self._set(vectorSize=value)
 
-    @since("1.4.0")
-    def getVectorSize(self):
-        """
-        Gets the value of vectorSize or its default value.
-        """
-        return self.getOrDefault(self.vectorSize)
-
     @since("1.4.0")
     def setNumPartitions(self, value):
         """
@@ -3377,13 +4574,6 @@ def setNumPartitions(self, value):
         """
         return self._set(numPartitions=value)
 
-    @since("1.4.0")
-    def getNumPartitions(self):
-        """
-        Gets the value of numPartitions or its default value.
-        """
-        return self.getOrDefault(self.numPartitions)
-
     @since("1.4.0")
     def setMinCount(self, value):
         """
@@ -3391,13 +4581,6 @@ def setMinCount(self, value):
         """
         return self._set(minCount=value)
 
-    @since("1.4.0")
-    def getMinCount(self):
-        """
-        Gets the value of minCount or its default value.
-        """
-        return self.getOrDefault(self.minCount)
-
     @since("2.0.0")
     def setWindowSize(self, value):
         """
@@ -3405,13 +4588,6 @@ def setWindowSize(self, value):
         """
         return self._set(windowSize=value)
 
-    @since("2.0.0")
-    def getWindowSize(self):
-        """
-        Gets the value of windowSize or its default value.
-        """
-        return self.getOrDefault(self.windowSize)
-
     @since("2.0.0")
     def setMaxSentenceLength(self, value):
         """
@@ -3419,18 +4595,42 @@ def setMaxSentenceLength(self, value):
         """
         return self._set(maxSentenceLength=value)
 
-    @since("2.0.0")
-    def getMaxSentenceLength(self):
+    def setMaxIter(self, value):
         """
-        Gets the value of maxSentenceLength or its default value.
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self.getOrDefault(self.maxSentenceLength)
+        return self._set(maxIter=value)
+
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("1.4.0")
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        return self._set(stepSize=value)
 
     def _create_model(self, java_model):
         return Word2VecModel(java_model)
 
 
-class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class Word2VecModel(JavaModel, _Word2VecParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`Word2Vec`.
 
@@ -3445,6 +4645,18 @@ def getVectors(self):
         """
         return self._call_java("getVectors")
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @since("1.5.0")
     def findSynonyms(self, word, num):
         """
@@ -3471,8 +4683,26 @@ def findSynonymsArray(self, word, num):
         return list(map(lambda st: (st._1(), st._2()), list(tuples)))
 
 
+class _PCAParams(HasInputCol, HasOutputCol):
+    """
+    Params for :py:class:`PCA` and :py:class:`PCAModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "the number of principal components",
+              typeConverter=TypeConverters.toInt)
+
+    @since("1.5.0")
+    def getK(self):
+        """
+        Gets the value of k or its default value.
+        """
+        return self.getOrDefault(self.k)
+
+
 @inherit_doc
-class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
+class PCA(JavaEstimator, _PCAParams, JavaMLReadable, JavaMLWritable):
     """
     PCA trains a model to project vectors to a lower dimensional space of the
     top :py:attr:`k` principal components.
@@ -3482,9 +4712,15 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
     ...     (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
     ...     (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
     >>> df = spark.createDataFrame(data,["features"])
-    >>> pca = PCA(k=2, inputCol="features", outputCol="pca_features")
+    >>> pca = PCA(k=2, inputCol="features")
+    >>> pca.setOutputCol("pca_features")
+    PCA...
     >>> model = pca.fit(df)
-    >>> model.transform(df).collect()[0].pca_features
+    >>> model.getK()
+    2
+    >>> model.setOutputCol("output")
+    PCAModel...
+    >>> model.transform(df).collect()[0].output
     DenseVector([1.648..., -4.013...])
     >>> model.explainedVariance
     DenseVector([0.794..., 0.205...])
@@ -3504,9 +4740,6 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
     .. versionadded:: 1.5.0
     """
 
-    k = Param(Params._dummy(), "k", "the number of principal components",
-              typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, k=None, inputCol=None, outputCol=None):
         """
@@ -3534,24 +4767,43 @@ def setK(self, value):
         """
         return self._set(k=value)
 
-    @since("1.5.0")
-    def getK(self):
+    def setInputCol(self, value):
         """
-        Gets the value of k or its default value.
+        Sets the value of :py:attr:`inputCol`.
         """
-        return self.getOrDefault(self.k)
+        return self._set(inputCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
 
     def _create_model(self, java_model):
         return PCAModel(java_model)
 
 
-class PCAModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class PCAModel(JavaModel, _PCAParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`PCA`. Transforms vectors to a lower dimensional space.
 
     .. versionadded:: 1.5.0
     """
 
+    @since("3.0.0")
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @property
     @since("2.0.0")
     def pc(self):
@@ -3571,9 +4823,59 @@ def explainedVariance(self):
         return self._call_java("explainedVariance")
 
 
+class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid):
+    """
+    Params for :py:class:`RFormula` and :py:class:`RFormula`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    formula = Param(Params._dummy(), "formula", "R model formula",
+                    typeConverter=TypeConverters.toString)
+
+    forceIndexLabel = Param(Params._dummy(), "forceIndexLabel",
+                            "Force to index label whether it is numeric or string",
+                            typeConverter=TypeConverters.toBoolean)
+
+    stringIndexerOrderType = Param(Params._dummy(), "stringIndexerOrderType",
+                                   "How to order categories of a string feature column used by " +
+                                   "StringIndexer. The last category after ordering is dropped " +
+                                   "when encoding strings. Supported options: frequencyDesc, " +
+                                   "frequencyAsc, alphabetDesc, alphabetAsc. The default value " +
+                                   "is frequencyDesc. When the ordering is set to alphabetDesc, " +
+                                   "RFormula drops the same category as R when encoding strings.",
+                                   typeConverter=TypeConverters.toString)
+
+    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
+                          "Options are 'skip' (filter out rows with invalid values), " +
+                          "'error' (throw an error), or 'keep' (put invalid data in a special " +
+                          "additional bucket, at index numLabels).",
+                          typeConverter=TypeConverters.toString)
+
+    @since("1.5.0")
+    def getFormula(self):
+        """
+        Gets the value of :py:attr:`formula`.
+        """
+        return self.getOrDefault(self.formula)
+
+    @since("2.1.0")
+    def getForceIndexLabel(self):
+        """
+        Gets the value of :py:attr:`forceIndexLabel`.
+        """
+        return self.getOrDefault(self.forceIndexLabel)
+
+    @since("2.3.0")
+    def getStringIndexerOrderType(self):
+        """
+        Gets the value of :py:attr:`stringIndexerOrderType` or its default value 'frequencyDesc'.
+        """
+        return self.getOrDefault(self.stringIndexerOrderType)
+
+
 @inherit_doc
-class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, HasHandleInvalid,
-               JavaMLReadable, JavaMLWritable):
+class RFormula(JavaEstimator, _RFormulaParams, JavaMLReadable, JavaMLWritable):
     """
     Implements the transforms required for fitting a dataset against an
     R model formula. Currently we support a limited subset of the R
@@ -3588,6 +4890,8 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, HasHandleInvalid,
     ... ], ["y", "x", "s"])
     >>> rf = RFormula(formula="y ~ x + s")
     >>> model = rf.fit(df)
+    >>> model.getLabelCol()
+    'label'
     >>> model.transform(df).show()
     +---+---+---+---------+-----+
     |  y|  x|  s| features|label|
@@ -3639,28 +4943,6 @@ class RFormula(JavaEstimator, HasFeaturesCol, HasLabelCol, HasHandleInvalid,
     .. versionadded:: 1.5.0
     """
 
-    formula = Param(Params._dummy(), "formula", "R model formula",
-                    typeConverter=TypeConverters.toString)
-
-    forceIndexLabel = Param(Params._dummy(), "forceIndexLabel",
-                            "Force to index label whether it is numeric or string",
-                            typeConverter=TypeConverters.toBoolean)
-
-    stringIndexerOrderType = Param(Params._dummy(), "stringIndexerOrderType",
-                                   "How to order categories of a string feature column used by " +
-                                   "StringIndexer. The last category after ordering is dropped " +
-                                   "when encoding strings. Supported options: frequencyDesc, " +
-                                   "frequencyAsc, alphabetDesc, alphabetAsc. The default value " +
-                                   "is frequencyDesc. When the ordering is set to alphabetDesc, " +
-                                   "RFormula drops the same category as R when encoding strings.",
-                                   typeConverter=TypeConverters.toString)
-
-    handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
-                          "Options are 'skip' (filter out rows with invalid values), " +
-                          "'error' (throw an error), or 'keep' (put invalid data in a special " +
-                          "additional bucket, at index numLabels).",
-                          typeConverter=TypeConverters.toString)
-
     @keyword_only
     def __init__(self, formula=None, featuresCol="features", labelCol="label",
                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc",
@@ -3698,13 +4980,6 @@ def setFormula(self, value):
         """
         return self._set(formula=value)
 
-    @since("1.5.0")
-    def getFormula(self):
-        """
-        Gets the value of :py:attr:`formula`.
-        """
-        return self.getOrDefault(self.formula)
-
     @since("2.1.0")
     def setForceIndexLabel(self, value):
         """
@@ -3712,13 +4987,6 @@ def setForceIndexLabel(self, value):
         """
         return self._set(forceIndexLabel=value)
 
-    @since("2.1.0")
-    def getForceIndexLabel(self):
-        """
-        Gets the value of :py:attr:`forceIndexLabel`.
-        """
-        return self.getOrDefault(self.forceIndexLabel)
-
     @since("2.3.0")
     def setStringIndexerOrderType(self, value):
         """
@@ -3726,12 +4994,23 @@ def setStringIndexerOrderType(self, value):
         """
         return self._set(stringIndexerOrderType=value)
 
-    @since("2.3.0")
-    def getStringIndexerOrderType(self):
+    def setFeaturesCol(self, value):
         """
-        Gets the value of :py:attr:`stringIndexerOrderType` or its default value 'frequencyDesc'.
+        Sets the value of :py:attr:`featuresCol`.
         """
-        return self.getOrDefault(self.stringIndexerOrderType)
+        return self._set(featuresCol=value)
+
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
 
     def _create_model(self, java_model):
         return RFormulaModel(java_model)
@@ -3741,7 +5020,7 @@ def __str__(self):
         return "RFormula(%s) (uid=%s)" % (formulaStr, self.uid)
 
 
-class RFormulaModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class RFormulaModel(JavaModel, _RFormulaParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`RFormula`. Fitting is required to determine the
     factor levels of formula terms.
@@ -3754,9 +5033,82 @@ def __str__(self):
         return "RFormulaModel(%s) (uid=%s)" % (resolvedFormula, self.uid)
 
 
+class _ChiSqSelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol):
+    """
+    Params for :py:class:`ChiSqSelector` and :py:class:`ChiSqSelectorModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    selectorType = Param(Params._dummy(), "selectorType",
+                         "The selector type of the ChisqSelector. " +
+                         "Supported options: numTopFeatures (default), percentile, fpr, fdr, fwe.",
+                         typeConverter=TypeConverters.toString)
+
+    numTopFeatures = \
+        Param(Params._dummy(), "numTopFeatures",
+              "Number of features that selector will select, ordered by ascending p-value. " +
+              "If the number of features is < numTopFeatures, then this will select " +
+              "all features.", typeConverter=TypeConverters.toInt)
+
+    percentile = Param(Params._dummy(), "percentile", "Percentile of features that selector " +
+                       "will select, ordered by ascending p-value.",
+                       typeConverter=TypeConverters.toFloat)
+
+    fpr = Param(Params._dummy(), "fpr", "The highest p-value for features to be kept.",
+                typeConverter=TypeConverters.toFloat)
+
+    fdr = Param(Params._dummy(), "fdr", "The upper bound of the expected false discovery rate.",
+                typeConverter=TypeConverters.toFloat)
+
+    fwe = Param(Params._dummy(), "fwe", "The upper bound of the expected family-wise error rate.",
+                typeConverter=TypeConverters.toFloat)
+
+    @since("2.1.0")
+    def getSelectorType(self):
+        """
+        Gets the value of selectorType or its default value.
+        """
+        return self.getOrDefault(self.selectorType)
+
+    @since("2.0.0")
+    def getNumTopFeatures(self):
+        """
+        Gets the value of numTopFeatures or its default value.
+        """
+        return self.getOrDefault(self.numTopFeatures)
+
+    @since("2.1.0")
+    def getPercentile(self):
+        """
+        Gets the value of percentile or its default value.
+        """
+        return self.getOrDefault(self.percentile)
+
+    @since("2.1.0")
+    def getFpr(self):
+        """
+        Gets the value of fpr or its default value.
+        """
+        return self.getOrDefault(self.fpr)
+
+    @since("2.2.0")
+    def getFdr(self):
+        """
+        Gets the value of fdr or its default value.
+        """
+        return self.getOrDefault(self.fdr)
+
+    @since("2.2.0")
+    def getFwe(self):
+        """
+        Gets the value of fwe or its default value.
+        """
+        return self.getOrDefault(self.fwe)
+
+
 @inherit_doc
-class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, JavaMLReadable,
-                    JavaMLWritable):
+class ChiSqSelector(JavaEstimator, _ChiSqSelectorParams, JavaMLReadable, JavaMLWritable):
     """
     Chi-Squared feature selection, which selects categorical features to use for predicting a
     categorical label.
@@ -3790,6 +5142,10 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja
     ...    ["features", "label"])
     >>> selector = ChiSqSelector(numTopFeatures=1, outputCol="selectedFeatures")
     >>> model = selector.fit(df)
+    >>> model.getFeaturesCol()
+    'features'
+    >>> model.setFeaturesCol("features")
+    ChiSqSelectorModel...
     >>> model.transform(df).head().selectedFeatures
     DenseVector([18.0])
     >>> model.selectedFeatures
@@ -3808,30 +5164,6 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja
     .. versionadded:: 2.0.0
     """
 
-    selectorType = Param(Params._dummy(), "selectorType",
-                         "The selector type of the ChisqSelector. " +
-                         "Supported options: numTopFeatures (default), percentile, fpr, fdr, fwe.",
-                         typeConverter=TypeConverters.toString)
-
-    numTopFeatures = \
-        Param(Params._dummy(), "numTopFeatures",
-              "Number of features that selector will select, ordered by ascending p-value. " +
-              "If the number of features is < numTopFeatures, then this will select " +
-              "all features.", typeConverter=TypeConverters.toInt)
-
-    percentile = Param(Params._dummy(), "percentile", "Percentile of features that selector " +
-                       "will select, ordered by ascending p-value.",
-                       typeConverter=TypeConverters.toFloat)
-
-    fpr = Param(Params._dummy(), "fpr", "The highest p-value for features to be kept.",
-                typeConverter=TypeConverters.toFloat)
-
-    fdr = Param(Params._dummy(), "fdr", "The upper bound of the expected false discovery rate.",
-                typeConverter=TypeConverters.toFloat)
-
-    fwe = Param(Params._dummy(), "fwe", "The upper bound of the expected family-wise error rate.",
-                typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None,
                  labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
@@ -3869,13 +5201,6 @@ def setSelectorType(self, value):
         """
         return self._set(selectorType=value)
 
-    @since("2.1.0")
-    def getSelectorType(self):
-        """
-        Gets the value of selectorType or its default value.
-        """
-        return self.getOrDefault(self.selectorType)
-
     @since("2.0.0")
     def setNumTopFeatures(self, value):
         """
@@ -3884,13 +5209,6 @@ def setNumTopFeatures(self, value):
         """
         return self._set(numTopFeatures=value)
 
-    @since("2.0.0")
-    def getNumTopFeatures(self):
-        """
-        Gets the value of numTopFeatures or its default value.
-        """
-        return self.getOrDefault(self.numTopFeatures)
-
     @since("2.1.0")
     def setPercentile(self, value):
         """
@@ -3899,13 +5217,6 @@ def setPercentile(self, value):
         """
         return self._set(percentile=value)
 
-    @since("2.1.0")
-    def getPercentile(self):
-        """
-        Gets the value of percentile or its default value.
-        """
-        return self.getOrDefault(self.percentile)
-
     @since("2.1.0")
     def setFpr(self, value):
         """
@@ -3914,13 +5225,6 @@ def setFpr(self, value):
         """
         return self._set(fpr=value)
 
-    @since("2.1.0")
-    def getFpr(self):
-        """
-        Gets the value of fpr or its default value.
-        """
-        return self.getOrDefault(self.fpr)
-
     @since("2.2.0")
     def setFdr(self, value):
         """
@@ -3929,13 +5233,6 @@ def setFdr(self, value):
         """
         return self._set(fdr=value)
 
-    @since("2.2.0")
-    def getFdr(self):
-        """
-        Gets the value of fdr or its default value.
-        """
-        return self.getOrDefault(self.fdr)
-
     @since("2.2.0")
     def setFwe(self, value):
         """
@@ -3944,24 +5241,49 @@ def setFwe(self, value):
         """
         return self._set(fwe=value)
 
-    @since("2.2.0")
-    def getFwe(self):
+    def setFeaturesCol(self, value):
         """
-        Gets the value of fwe or its default value.
+        Sets the value of :py:attr:`featuresCol`.
         """
-        return self.getOrDefault(self.fwe)
+        return self._set(featuresCol=value)
+
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
 
     def _create_model(self, java_model):
         return ChiSqSelectorModel(java_model)
 
 
-class ChiSqSelectorModel(JavaModel, JavaMLReadable, JavaMLWritable):
+class ChiSqSelectorModel(JavaModel, _ChiSqSelectorParams, JavaMLReadable, JavaMLWritable):
     """
     Model fitted by :py:class:`ChiSqSelector`.
 
     .. versionadded:: 2.0.0
     """
 
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("3.0.0")
+    def setOutputCol(self, value):
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
     @property
     @since("2.0.0")
     def selectedFeatures(self):
@@ -4046,6 +5368,18 @@ def setSize(self, value):
         """ Sets size param, the size of vectors in `inputCol`."""
         return self._set(size=value)
 
+    def setInputCol(self, value):
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    def setHandleInvalid(self, value):
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
 
 if __name__ == "__main__":
     import doctest
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 4fc19704ae3d1..7d933daf9e032 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -16,6 +16,7 @@
 #
 
 from pyspark import keyword_only, since
+from pyspark.rdd import ignore_unicode_prefix
 from pyspark.sql import DataFrame
 from pyspark.ml.util import *
 from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
@@ -24,11 +25,15 @@
 __all__ = ["FPGrowth", "FPGrowthModel", "PrefixSpan"]
 
 
-class HasMinSupport(Params):
+class _FPGrowthParams(HasPredictionCol):
     """
-    Mixin for param minSupport.
+    Params for :py:class:`FPGrowth` and :py:class:`FPGrowthModel`.
+
+    .. versionadded:: 3.0.0
     """
 
+    itemsCol = Param(Params._dummy(), "itemsCol",
+                     "items column name", typeConverter=TypeConverters.toString)
     minSupport = Param(
         Params._dummy(),
         "minSupport",
@@ -36,25 +41,6 @@ class HasMinSupport(Params):
         "Any pattern that appears more than (minSupport * size-of-the-dataset) " +
         "times will be output in the frequent itemsets.",
         typeConverter=TypeConverters.toFloat)
-
-    def setMinSupport(self, value):
-        """
-        Sets the value of :py:attr:`minSupport`.
-        """
-        return self._set(minSupport=value)
-
-    def getMinSupport(self):
-        """
-        Gets the value of minSupport or its default value.
-        """
-        return self.getOrDefault(self.minSupport)
-
-
-class HasNumPartitions(Params):
-    """
-    Mixin for param numPartitions: Number of partitions (at least 1) used by parallel FP-growth.
-    """
-
     numPartitions = Param(
         Params._dummy(),
         "numPartitions",
@@ -62,25 +48,6 @@ class HasNumPartitions(Params):
         "By default the param is not set, " +
         "and partition number of the input dataset is used.",
         typeConverter=TypeConverters.toInt)
-
-    def setNumPartitions(self, value):
-        """
-        Sets the value of :py:attr:`numPartitions`.
-        """
-        return self._set(numPartitions=value)
-
-    def getNumPartitions(self):
-        """
-        Gets the value of :py:attr:`numPartitions` or its default value.
-        """
-        return self.getOrDefault(self.numPartitions)
-
-
-class HasMinConfidence(Params):
-    """
-    Mixin for param minConfidence.
-    """
-
     minConfidence = Param(
         Params._dummy(),
         "minConfidence",
@@ -89,11 +56,23 @@ class HasMinConfidence(Params):
         "but will affect the association rules generation.",
         typeConverter=TypeConverters.toFloat)
 
-    def setMinConfidence(self, value):
+    def getItemsCol(self):
         """
-        Sets the value of :py:attr:`minConfidence`.
+        Gets the value of itemsCol or its default value.
         """
-        return self._set(minConfidence=value)
+        return self.getOrDefault(self.itemsCol)
+
+    def getMinSupport(self):
+        """
+        Gets the value of minSupport or its default value.
+        """
+        return self.getOrDefault(self.minSupport)
+
+    def getNumPartitions(self):
+        """
+        Gets the value of :py:attr:`numPartitions` or its default value.
+        """
+        return self.getOrDefault(self.numPartitions)
 
     def getMinConfidence(self):
         """
@@ -102,33 +81,34 @@ def getMinConfidence(self):
         return self.getOrDefault(self.minConfidence)
 
 
-class HasItemsCol(Params):
-    """
-    Mixin for param itemsCol: items column name.
+class FPGrowthModel(JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable):
     """
+    Model fitted by FPGrowth.
 
-    itemsCol = Param(Params._dummy(), "itemsCol",
-                     "items column name", typeConverter=TypeConverters.toString)
+    .. versionadded:: 2.2.0
+    """
 
+    @since("3.0.0")
     def setItemsCol(self, value):
         """
         Sets the value of :py:attr:`itemsCol`.
         """
         return self._set(itemsCol=value)
 
-    def getItemsCol(self):
+    @since("3.0.0")
+    def setMinConfidence(self, value):
         """
-        Gets the value of itemsCol or its default value.
+        Sets the value of :py:attr:`minConfidence`.
         """
-        return self.getOrDefault(self.itemsCol)
-
+        return self._set(minConfidence=value)
 
-class FPGrowthModel(JavaModel, JavaMLWritable, JavaMLReadable):
-    """
-    Model fitted by FPGrowth.
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
 
-    .. versionadded:: 2.2.0
-    """
     @property
     @since("2.2.0")
     def freqItemsets(self):
@@ -152,10 +132,8 @@ def associationRules(self):
         return self._call_java("associationRules")
 
 
-class FPGrowth(JavaEstimator, HasItemsCol, HasPredictionCol,
-               HasMinSupport, HasNumPartitions, HasMinConfidence,
-               JavaMLWritable, JavaMLReadable):
-
+@ignore_unicode_prefix
+class FPGrowth(JavaEstimator, _FPGrowthParams, JavaMLWritable, JavaMLReadable):
     r"""
     A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
     Li et al., PFP: Parallel FP-Growth for Query Recommendation [LI2008]_.
@@ -184,8 +162,11 @@ class FPGrowth(JavaEstimator, HasItemsCol, HasPredictionCol,
     |[z]                     |
     |[x, z, y, r, q, t, p]   |
     +------------------------+
+    ...
     >>> fp = FPGrowth(minSupport=0.2, minConfidence=0.7)
     >>> fpm = fp.fit(data)
+    >>> fpm.setPredictionCol("newPrediction")
+    FPGrowthModel...
     >>> fpm.freqItemsets.show(5)
     +---------+----+
     |    items|freq|
@@ -197,20 +178,22 @@ class FPGrowth(JavaEstimator, HasItemsCol, HasPredictionCol,
     |      [r]|   3|
     +---------+----+
     only showing top 5 rows
+    ...
     >>> fpm.associationRules.show(5)
-    +----------+----------+----------+
-    |antecedent|consequent|confidence|
-    +----------+----------+----------+
-    |    [t, s]|       [y]|       1.0|
-    |    [t, s]|       [x]|       1.0|
-    |    [t, s]|       [z]|       1.0|
-    |       [p]|       [r]|       1.0|
-    |       [p]|       [z]|       1.0|
-    +----------+----------+----------+
+    +----------+----------+----------+----+
+    |antecedent|consequent|confidence|lift|
+    +----------+----------+----------+----+
+    |    [t, s]|       [y]|       1.0| 2.0|
+    |    [t, s]|       [x]|       1.0| 1.5|
+    |    [t, s]|       [z]|       1.0| 1.2|
+    |       [p]|       [r]|       1.0| 2.0|
+    |       [p]|       [z]|       1.0| 1.2|
+    +----------+----------+----------+----+
     only showing top 5 rows
+    ...
     >>> new_data = spark.createDataFrame([(["t", "s"], )], ["items"])
-    >>> sorted(fpm.transform(new_data).first().prediction)
-    ['x', 'y', 'z']
+    >>> sorted(fpm.transform(new_data).first().newPrediction)
+    [u'x', u'y', u'z']
 
     .. versionadded:: 2.2.0
     """
@@ -239,6 +222,36 @@ def setParams(self, minSupport=0.3, minConfidence=0.8, itemsCol="items",
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    def setItemsCol(self, value):
+        """
+        Sets the value of :py:attr:`itemsCol`.
+        """
+        return self._set(itemsCol=value)
+
+    def setMinSupport(self, value):
+        """
+        Sets the value of :py:attr:`minSupport`.
+        """
+        return self._set(minSupport=value)
+
+    def setNumPartitions(self, value):
+        """
+        Sets the value of :py:attr:`numPartitions`.
+        """
+        return self._set(numPartitions=value)
+
+    def setMinConfidence(self, value):
+        """
+        Sets the value of :py:attr:`minConfidence`.
+        """
+        return self._set(minConfidence=value)
+
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
     def _create_model(self, java_model):
         return FPGrowthModel(java_model)
 
@@ -254,8 +267,35 @@ class PrefixSpan(JavaParams):
 
     @see <a href="https://en.wikipedia.org/wiki/Sequential_Pattern_Mining">Sequential Pattern Mining
     (Wikipedia)</a>
-    .. versionadded:: 2.4.0
 
+    >>> from pyspark.ml.fpm import PrefixSpan
+    >>> from pyspark.sql import Row
+    >>> df = sc.parallelize([Row(sequence=[[1, 2], [3]]),
+    ...                      Row(sequence=[[1], [3, 2], [1, 2]]),
+    ...                      Row(sequence=[[1, 2], [5]]),
+    ...                      Row(sequence=[[6]])]).toDF()
+    >>> prefixSpan = PrefixSpan()
+    >>> prefixSpan.getMaxLocalProjDBSize()
+    32000000
+    >>> prefixSpan.getSequenceCol()
+    'sequence'
+    >>> prefixSpan.setMinSupport(0.5)
+    PrefixSpan...
+    >>> prefixSpan.setMaxPatternLength(5)
+    PrefixSpan...
+    >>> prefixSpan.findFrequentSequentialPatterns(df).sort("sequence").show(truncate=False)
+    +----------+----+
+    |sequence  |freq|
+    +----------+----+
+    |[[1]]     |3   |
+    |[[1], [3]]|2   |
+    |[[2]]     |3   |
+    |[[2, 1]]  |3   |
+    |[[3]]     |2   |
+    +----------+----+
+    ...
+
+    .. versionadded:: 2.4.0
     """
 
     minSupport = Param(Params._dummy(), "minSupport", "The minimal support level of the " +
@@ -304,6 +344,62 @@ def setParams(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=3200
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    @since("3.0.0")
+    def setMinSupport(self, value):
+        """
+        Sets the value of :py:attr:`minSupport`.
+        """
+        return self._set(minSupport=value)
+
+    @since("3.0.0")
+    def getMinSupport(self):
+        """
+        Gets the value of minSupport or its default value.
+        """
+        return self.getOrDefault(self.minSupport)
+
+    @since("3.0.0")
+    def setMaxPatternLength(self, value):
+        """
+        Sets the value of :py:attr:`maxPatternLength`.
+        """
+        return self._set(maxPatternLength=value)
+
+    @since("3.0.0")
+    def getMaxPatternLength(self):
+        """
+        Gets the value of maxPatternLength or its default value.
+        """
+        return self.getOrDefault(self.maxPatternLength)
+
+    @since("3.0.0")
+    def setMaxLocalProjDBSize(self, value):
+        """
+        Sets the value of :py:attr:`maxLocalProjDBSize`.
+        """
+        return self._set(maxLocalProjDBSize=value)
+
+    @since("3.0.0")
+    def getMaxLocalProjDBSize(self):
+        """
+        Gets the value of maxLocalProjDBSize or its default value.
+        """
+        return self.getOrDefault(self.maxLocalProjDBSize)
+
+    @since("3.0.0")
+    def setSequenceCol(self, value):
+        """
+        Sets the value of :py:attr:`sequenceCol`.
+        """
+        return self._set(sequenceCol=value)
+
+    @since("3.0.0")
+    def getSequenceCol(self):
+        """
+        Gets the value of sequenceCol or its default value.
+        """
+        return self.getOrDefault(self.sequenceCol)
+
     @since("2.4.0")
     def findFrequentSequentialPatterns(self, dataset):
         """
@@ -316,26 +412,39 @@ def findFrequentSequentialPatterns(self, dataset):
                  - `sequence: ArrayType(ArrayType(T))` (T is the item type)
                  - `freq: Long`
 
-        >>> from pyspark.ml.fpm import PrefixSpan
-        >>> from pyspark.sql import Row
-        >>> df = sc.parallelize([Row(sequence=[[1, 2], [3]]),
-        ...                      Row(sequence=[[1], [3, 2], [1, 2]]),
-        ...                      Row(sequence=[[1, 2], [5]]),
-        ...                      Row(sequence=[[6]])]).toDF()
-        >>> prefixSpan = PrefixSpan(minSupport=0.5, maxPatternLength=5)
-        >>> prefixSpan.findFrequentSequentialPatterns(df).sort("sequence").show(truncate=False)
-        +----------+----+
-        |sequence  |freq|
-        +----------+----+
-        |[[1]]     |3   |
-        |[[1], [3]]|2   |
-        |[[1, 2]]  |3   |
-        |[[2]]     |3   |
-        |[[3]]     |2   |
-        +----------+----+
-
         .. versionadded:: 2.4.0
         """
+
         self._transfer_params_to_java()
         jdf = self._java_obj.findFrequentSequentialPatterns(dataset._jdf)
         return DataFrame(jdf, dataset.sql_ctx)
+
+
+if __name__ == "__main__":
+    import doctest
+    import pyspark.ml.fpm
+    from pyspark.sql import SparkSession
+    globs = pyspark.ml.fpm.__dict__.copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    spark = SparkSession.builder\
+        .master("local[2]")\
+        .appName("ml.fpm tests")\
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+    import tempfile
+    temp_path = tempfile.mkdtemp()
+    globs['temp_path'] = temp_path
+    try:
+        (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+        spark.stop()
+    finally:
+        from shutil import rmtree
+        try:
+            rmtree(temp_path)
+        except OSError:
+            pass
+    if failure_count:
+        sys.exit(-1)
diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py
new file mode 100644
index 0000000000000..ec164f34bc4db
--- /dev/null
+++ b/python/pyspark/ml/functions.py
@@ -0,0 +1,85 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import since, SparkContext
+from pyspark.sql.column import Column, _to_java_column
+
+
+@since("3.0.0")
+def vector_to_array(col, dtype="float64"):
+    """
+    Converts a column of MLlib sparse/dense vectors into a column of dense arrays.
+    :param col: A string of the column name or a Column
+    :param dtype: The data type of the output array. Valid values: "float64" or "float32".
+    :return: The converted column of dense arrays.
+
+    .. versionadded:: 3.0.0
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml.functions import vector_to_array
+    >>> from pyspark.mllib.linalg import Vectors as OldVectors
+    >>> df = spark.createDataFrame([
+    ...     (Vectors.dense(1.0, 2.0, 3.0), OldVectors.dense(10.0, 20.0, 30.0)),
+    ...     (Vectors.sparse(3, [(0, 2.0), (2, 3.0)]),
+    ...      OldVectors.sparse(3, [(0, 20.0), (2, 30.0)]))],
+    ...     ["vec", "oldVec"])
+    >>> df1 = df.select(vector_to_array("vec").alias("vec"),
+    ...                 vector_to_array("oldVec").alias("oldVec"))
+    >>> df1.collect()
+    [Row(vec=[1.0, 2.0, 3.0], oldVec=[10.0, 20.0, 30.0]),
+     Row(vec=[2.0, 0.0, 3.0], oldVec=[20.0, 0.0, 30.0])]
+    >>> df2 = df.select(vector_to_array("vec", "float32").alias("vec"),
+    ...                 vector_to_array("oldVec", "float32").alias("oldVec"))
+    >>> df2.collect()
+    [Row(vec=[1.0, 2.0, 3.0], oldVec=[10.0, 20.0, 30.0]),
+     Row(vec=[2.0, 0.0, 3.0], oldVec=[20.0, 0.0, 30.0])]
+    >>> df1.schema.fields
+    [StructField(vec,ArrayType(DoubleType,false),false),
+    StructField(oldVec,ArrayType(DoubleType,false),false)]
+    >>> df2.schema.fields
+    [StructField(vec,ArrayType(FloatType,false),false),
+    StructField(oldVec,ArrayType(FloatType,false),false)]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(
+        sc._jvm.org.apache.spark.ml.functions.vector_to_array(_to_java_column(col), dtype))
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.ml.functions
+    import sys
+    globs = pyspark.ml.functions.__dict__.copy()
+    spark = SparkSession.builder \
+        .master("local[2]") \
+        .appName("ml.functions tests") \
+        .getOrCreate()
+    sc = spark.sparkContext
+    globs['sc'] = sc
+    globs['spark'] = spark
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.ml.functions, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 043c25cf9feb4..fe61f9f0fffd6 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -134,6 +134,16 @@ def toListFloat(value):
                 return [float(v) for v in value]
         raise TypeError("Could not convert %s to list of floats" % value)
 
+    @staticmethod
+    def toListListFloat(value):
+        """
+        Convert a value to list of list of floats, if possible.
+        """
+        if TypeConverters._can_convert_to_list(value):
+            value = TypeConverters.toList(value)
+            return [TypeConverters.toListFloat(v) for v in value]
+        raise TypeError("Could not convert %s to list of list of floats" % value)
+
     @staticmethod
     def toListInt(value):
         """
@@ -442,7 +452,7 @@ def _set(self, **kwargs):
             self._paramMap[p] = value
         return self
 
-    def _clear(self, param):
+    def clear(self, param):
         """
         Clears a param from the param map if it has been explicitly set.
         """
@@ -474,8 +484,16 @@ def _copyValues(self, to, extra=None):
         :return: the target instance with param values copied
         """
         paramMap = self._paramMap.copy()
-        if extra is not None:
-            paramMap.update(extra)
+        if isinstance(extra, dict):
+            for param, value in extra.items():
+                if isinstance(param, Param):
+                    paramMap[param] = value
+                else:
+                    raise TypeError("Expecting a valid instance of Param, but received: {}"
+                                    .format(param))
+        elif extra is not None:
+            raise TypeError("Expecting a dict, but received an object of type {}."
+                            .format(type(extra)))
         for param in self.params:
             # copy default params
             if param in self._defaultParamMap and to.hasParam(param.name):
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index ca2e4a0f5bde1..2086e831f4282 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -81,12 +81,6 @@ def _gen_param_code(name, doc, defaultValueStr):
     """
     # TODO: How to correctly inherit instance attributes?
     template = '''
-    def set$Name(self, value):
-        """
-        Sets the value of :py:attr:`$name`.
-        """
-        return self._set($name=value)
-
     def get$Name(self):
         """
         Gets the value of $name or its default value.
@@ -120,7 +114,8 @@ def get$Name(self):
         ("inputCols", "input column names.", None, "TypeConverters.toListString"),
         ("outputCol", "output column name.", "self.uid + '__output'", "TypeConverters.toString"),
         ("outputCols", "output column names.", None, "TypeConverters.toListString"),
-        ("numFeatures", "number of features.", None, "TypeConverters.toInt"),
+        ("numFeatures", "Number of features. Should be greater than 0.", "262144",
+         "TypeConverters.toInt"),
         ("checkpointInterval", "set checkpoint interval (>= 1) or disable checkpoint (-1). " +
          "E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: " +
          "this setting will be ignored if the checkpoint directory is not set in the SparkContext.",
@@ -128,6 +123,8 @@ def get$Name(self):
         ("seed", "random seed.", "hash(type(self).__name__)", "TypeConverters.toInt"),
         ("tol", "the convergence tolerance for iterative algorithms (>= 0).", None,
          "TypeConverters.toFloat"),
+        ("relativeError", "the relative target precision for the approximate quantile " +
+         "algorithm. Must be in the range [0, 1]", "0.001", "TypeConverters.toFloat"),
         ("stepSize", "Step size to be used for each iteration of optimization (>= 0).", None,
          "TypeConverters.toFloat"),
         ("handleInvalid", "how to handle invalid entries. Options are skip (which will filter " +
@@ -167,7 +164,10 @@ def get$Name(self):
          "'euclidean'", "TypeConverters.toString"),
         ("validationIndicatorCol", "name of the column that indicates whether each row is for " +
          "training or for validation. False indicates training; true indicates validation.",
-         None, "TypeConverters.toString")]
+         None, "TypeConverters.toString"),
+        ("blockSize", "block size for stacking input data in matrices. Data is stacked within "
+         "partitions. If block size is more than remaining data in a partition then it is "
+         "adjusted to the size of this data.", None, "TypeConverters.toInt")]
 
     code = []
     for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 9527ef6f576b2..24fb0d3e2554d 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -30,12 +30,6 @@ class HasMaxIter(Params):
     def __init__(self):
         super(HasMaxIter, self).__init__()
 
-    def setMaxIter(self, value):
-        """
-        Sets the value of :py:attr:`maxIter`.
-        """
-        return self._set(maxIter=value)
-
     def getMaxIter(self):
         """
         Gets the value of maxIter or its default value.
@@ -53,12 +47,6 @@ class HasRegParam(Params):
     def __init__(self):
         super(HasRegParam, self).__init__()
 
-    def setRegParam(self, value):
-        """
-        Sets the value of :py:attr:`regParam`.
-        """
-        return self._set(regParam=value)
-
     def getRegParam(self):
         """
         Gets the value of regParam or its default value.
@@ -77,12 +65,6 @@ def __init__(self):
         super(HasFeaturesCol, self).__init__()
         self._setDefault(featuresCol='features')
 
-    def setFeaturesCol(self, value):
-        """
-        Sets the value of :py:attr:`featuresCol`.
-        """
-        return self._set(featuresCol=value)
-
     def getFeaturesCol(self):
         """
         Gets the value of featuresCol or its default value.
@@ -101,12 +83,6 @@ def __init__(self):
         super(HasLabelCol, self).__init__()
         self._setDefault(labelCol='label')
 
-    def setLabelCol(self, value):
-        """
-        Sets the value of :py:attr:`labelCol`.
-        """
-        return self._set(labelCol=value)
-
     def getLabelCol(self):
         """
         Gets the value of labelCol or its default value.
@@ -125,12 +101,6 @@ def __init__(self):
         super(HasPredictionCol, self).__init__()
         self._setDefault(predictionCol='prediction')
 
-    def setPredictionCol(self, value):
-        """
-        Sets the value of :py:attr:`predictionCol`.
-        """
-        return self._set(predictionCol=value)
-
     def getPredictionCol(self):
         """
         Gets the value of predictionCol or its default value.
@@ -149,12 +119,6 @@ def __init__(self):
         super(HasProbabilityCol, self).__init__()
         self._setDefault(probabilityCol='probability')
 
-    def setProbabilityCol(self, value):
-        """
-        Sets the value of :py:attr:`probabilityCol`.
-        """
-        return self._set(probabilityCol=value)
-
     def getProbabilityCol(self):
         """
         Gets the value of probabilityCol or its default value.
@@ -173,12 +137,6 @@ def __init__(self):
         super(HasRawPredictionCol, self).__init__()
         self._setDefault(rawPredictionCol='rawPrediction')
 
-    def setRawPredictionCol(self, value):
-        """
-        Sets the value of :py:attr:`rawPredictionCol`.
-        """
-        return self._set(rawPredictionCol=value)
-
     def getRawPredictionCol(self):
         """
         Gets the value of rawPredictionCol or its default value.
@@ -196,12 +154,6 @@ class HasInputCol(Params):
     def __init__(self):
         super(HasInputCol, self).__init__()
 
-    def setInputCol(self, value):
-        """
-        Sets the value of :py:attr:`inputCol`.
-        """
-        return self._set(inputCol=value)
-
     def getInputCol(self):
         """
         Gets the value of inputCol or its default value.
@@ -219,12 +171,6 @@ class HasInputCols(Params):
     def __init__(self):
         super(HasInputCols, self).__init__()
 
-    def setInputCols(self, value):
-        """
-        Sets the value of :py:attr:`inputCols`.
-        """
-        return self._set(inputCols=value)
-
     def getInputCols(self):
         """
         Gets the value of inputCols or its default value.
@@ -243,12 +189,6 @@ def __init__(self):
         super(HasOutputCol, self).__init__()
         self._setDefault(outputCol=self.uid + '__output')
 
-    def setOutputCol(self, value):
-        """
-        Sets the value of :py:attr:`outputCol`.
-        """
-        return self._set(outputCol=value)
-
     def getOutputCol(self):
         """
         Gets the value of outputCol or its default value.
@@ -266,12 +206,6 @@ class HasOutputCols(Params):
     def __init__(self):
         super(HasOutputCols, self).__init__()
 
-    def setOutputCols(self, value):
-        """
-        Sets the value of :py:attr:`outputCols`.
-        """
-        return self._set(outputCols=value)
-
     def getOutputCols(self):
         """
         Gets the value of outputCols or its default value.
@@ -281,19 +215,14 @@ def getOutputCols(self):
 
 class HasNumFeatures(Params):
     """
-    Mixin for param numFeatures: number of features.
+    Mixin for param numFeatures: Number of features. Should be greater than 0.
     """
 
-    numFeatures = Param(Params._dummy(), "numFeatures", "number of features.", typeConverter=TypeConverters.toInt)
+    numFeatures = Param(Params._dummy(), "numFeatures", "Number of features. Should be greater than 0.", typeConverter=TypeConverters.toInt)
 
     def __init__(self):
         super(HasNumFeatures, self).__init__()
-
-    def setNumFeatures(self, value):
-        """
-        Sets the value of :py:attr:`numFeatures`.
-        """
-        return self._set(numFeatures=value)
+        self._setDefault(numFeatures=262144)
 
     def getNumFeatures(self):
         """
@@ -312,12 +241,6 @@ class HasCheckpointInterval(Params):
     def __init__(self):
         super(HasCheckpointInterval, self).__init__()
 
-    def setCheckpointInterval(self, value):
-        """
-        Sets the value of :py:attr:`checkpointInterval`.
-        """
-        return self._set(checkpointInterval=value)
-
     def getCheckpointInterval(self):
         """
         Gets the value of checkpointInterval or its default value.
@@ -336,12 +259,6 @@ def __init__(self):
         super(HasSeed, self).__init__()
         self._setDefault(seed=hash(type(self).__name__))
 
-    def setSeed(self, value):
-        """
-        Sets the value of :py:attr:`seed`.
-        """
-        return self._set(seed=value)
-
     def getSeed(self):
         """
         Gets the value of seed or its default value.
@@ -359,12 +276,6 @@ class HasTol(Params):
     def __init__(self):
         super(HasTol, self).__init__()
 
-    def setTol(self, value):
-        """
-        Sets the value of :py:attr:`tol`.
-        """
-        return self._set(tol=value)
-
     def getTol(self):
         """
         Gets the value of tol or its default value.
@@ -372,6 +283,24 @@ def getTol(self):
         return self.getOrDefault(self.tol)
 
 
+class HasRelativeError(Params):
+    """
+    Mixin for param relativeError: the relative target precision for the approximate quantile algorithm. Must be in the range [0, 1]
+    """
+
+    relativeError = Param(Params._dummy(), "relativeError", "the relative target precision for the approximate quantile algorithm. Must be in the range [0, 1]", typeConverter=TypeConverters.toFloat)
+
+    def __init__(self):
+        super(HasRelativeError, self).__init__()
+        self._setDefault(relativeError=0.001)
+
+    def getRelativeError(self):
+        """
+        Gets the value of relativeError or its default value.
+        """
+        return self.getOrDefault(self.relativeError)
+
+
 class HasStepSize(Params):
     """
     Mixin for param stepSize: Step size to be used for each iteration of optimization (>= 0).
@@ -382,12 +311,6 @@ class HasStepSize(Params):
     def __init__(self):
         super(HasStepSize, self).__init__()
 
-    def setStepSize(self, value):
-        """
-        Sets the value of :py:attr:`stepSize`.
-        """
-        return self._set(stepSize=value)
-
     def getStepSize(self):
         """
         Gets the value of stepSize or its default value.
@@ -405,12 +328,6 @@ class HasHandleInvalid(Params):
     def __init__(self):
         super(HasHandleInvalid, self).__init__()
 
-    def setHandleInvalid(self, value):
-        """
-        Sets the value of :py:attr:`handleInvalid`.
-        """
-        return self._set(handleInvalid=value)
-
     def getHandleInvalid(self):
         """
         Gets the value of handleInvalid or its default value.
@@ -429,12 +346,6 @@ def __init__(self):
         super(HasElasticNetParam, self).__init__()
         self._setDefault(elasticNetParam=0.0)
 
-    def setElasticNetParam(self, value):
-        """
-        Sets the value of :py:attr:`elasticNetParam`.
-        """
-        return self._set(elasticNetParam=value)
-
     def getElasticNetParam(self):
         """
         Gets the value of elasticNetParam or its default value.
@@ -453,12 +364,6 @@ def __init__(self):
         super(HasFitIntercept, self).__init__()
         self._setDefault(fitIntercept=True)
 
-    def setFitIntercept(self, value):
-        """
-        Sets the value of :py:attr:`fitIntercept`.
-        """
-        return self._set(fitIntercept=value)
-
     def getFitIntercept(self):
         """
         Gets the value of fitIntercept or its default value.
@@ -477,12 +382,6 @@ def __init__(self):
         super(HasStandardization, self).__init__()
         self._setDefault(standardization=True)
 
-    def setStandardization(self, value):
-        """
-        Sets the value of :py:attr:`standardization`.
-        """
-        return self._set(standardization=value)
-
     def getStandardization(self):
         """
         Gets the value of standardization or its default value.
@@ -500,12 +399,6 @@ class HasThresholds(Params):
     def __init__(self):
         super(HasThresholds, self).__init__()
 
-    def setThresholds(self, value):
-        """
-        Sets the value of :py:attr:`thresholds`.
-        """
-        return self._set(thresholds=value)
-
     def getThresholds(self):
         """
         Gets the value of thresholds or its default value.
@@ -524,12 +417,6 @@ def __init__(self):
         super(HasThreshold, self).__init__()
         self._setDefault(threshold=0.5)
 
-    def setThreshold(self, value):
-        """
-        Sets the value of :py:attr:`threshold`.
-        """
-        return self._set(threshold=value)
-
     def getThreshold(self):
         """
         Gets the value of threshold or its default value.
@@ -547,12 +434,6 @@ class HasWeightCol(Params):
     def __init__(self):
         super(HasWeightCol, self).__init__()
 
-    def setWeightCol(self, value):
-        """
-        Sets the value of :py:attr:`weightCol`.
-        """
-        return self._set(weightCol=value)
-
     def getWeightCol(self):
         """
         Gets the value of weightCol or its default value.
@@ -571,12 +452,6 @@ def __init__(self):
         super(HasSolver, self).__init__()
         self._setDefault(solver='auto')
 
-    def setSolver(self, value):
-        """
-        Sets the value of :py:attr:`solver`.
-        """
-        return self._set(solver=value)
-
     def getSolver(self):
         """
         Gets the value of solver or its default value.
@@ -594,12 +469,6 @@ class HasVarianceCol(Params):
     def __init__(self):
         super(HasVarianceCol, self).__init__()
 
-    def setVarianceCol(self, value):
-        """
-        Sets the value of :py:attr:`varianceCol`.
-        """
-        return self._set(varianceCol=value)
-
     def getVarianceCol(self):
         """
         Gets the value of varianceCol or its default value.
@@ -618,12 +487,6 @@ def __init__(self):
         super(HasAggregationDepth, self).__init__()
         self._setDefault(aggregationDepth=2)
 
-    def setAggregationDepth(self, value):
-        """
-        Sets the value of :py:attr:`aggregationDepth`.
-        """
-        return self._set(aggregationDepth=value)
-
     def getAggregationDepth(self):
         """
         Gets the value of aggregationDepth or its default value.
@@ -642,12 +505,6 @@ def __init__(self):
         super(HasParallelism, self).__init__()
         self._setDefault(parallelism=1)
 
-    def setParallelism(self, value):
-        """
-        Sets the value of :py:attr:`parallelism`.
-        """
-        return self._set(parallelism=value)
-
     def getParallelism(self):
         """
         Gets the value of parallelism or its default value.
@@ -666,12 +523,6 @@ def __init__(self):
         super(HasCollectSubModels, self).__init__()
         self._setDefault(collectSubModels=False)
 
-    def setCollectSubModels(self, value):
-        """
-        Sets the value of :py:attr:`collectSubModels`.
-        """
-        return self._set(collectSubModels=value)
-
     def getCollectSubModels(self):
         """
         Gets the value of collectSubModels or its default value.
@@ -689,12 +540,6 @@ class HasLoss(Params):
     def __init__(self):
         super(HasLoss, self).__init__()
 
-    def setLoss(self, value):
-        """
-        Sets the value of :py:attr:`loss`.
-        """
-        return self._set(loss=value)
-
     def getLoss(self):
         """
         Gets the value of loss or its default value.
@@ -713,12 +558,6 @@ def __init__(self):
         super(HasDistanceMeasure, self).__init__()
         self._setDefault(distanceMeasure='euclidean')
 
-    def setDistanceMeasure(self, value):
-        """
-        Sets the value of :py:attr:`distanceMeasure`.
-        """
-        return self._set(distanceMeasure=value)
-
     def getDistanceMeasure(self):
         """
         Gets the value of distanceMeasure or its default value.
@@ -736,14 +575,25 @@ class HasValidationIndicatorCol(Params):
     def __init__(self):
         super(HasValidationIndicatorCol, self).__init__()
 
-    def setValidationIndicatorCol(self, value):
-        """
-        Sets the value of :py:attr:`validationIndicatorCol`.
-        """
-        return self._set(validationIndicatorCol=value)
-
     def getValidationIndicatorCol(self):
         """
         Gets the value of validationIndicatorCol or its default value.
         """
         return self.getOrDefault(self.validationIndicatorCol)
+
+
+class HasBlockSize(Params):
+    """
+    Mixin for param blockSize: block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data.
+    """
+
+    blockSize = Param(Params._dummy(), "blockSize", "block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data.", typeConverter=TypeConverters.toInt)
+
+    def __init__(self):
+        super(HasBlockSize, self).__init__()
+
+    def getBlockSize(self):
+        """
+        Gets the value of blockSize or its default value.
+        """
+        return self.getOrDefault(self.blockSize)
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index bf2716485df9c..99d80aa867bda 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -28,8 +28,143 @@
 
 
 @inherit_doc
-class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, HasRegParam, HasSeed,
-          JavaMLWritable, JavaMLReadable):
+class _ALSModelParams(HasPredictionCol, HasBlockSize):
+    """
+    Params for :py:class:`ALS` and :py:class:`ALSModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    userCol = Param(Params._dummy(), "userCol", "column name for user ids. Ids must be within " +
+                    "the integer value range.", typeConverter=TypeConverters.toString)
+    itemCol = Param(Params._dummy(), "itemCol", "column name for item ids. Ids must be within " +
+                    "the integer value range.", typeConverter=TypeConverters.toString)
+    coldStartStrategy = Param(Params._dummy(), "coldStartStrategy", "strategy for dealing with " +
+                              "unknown or new users/items at prediction time. This may be useful " +
+                              "in cross-validation or production scenarios, for handling " +
+                              "user/item ids the model has not seen in the training data. " +
+                              "Supported values: 'nan', 'drop'.",
+                              typeConverter=TypeConverters.toString)
+
+    @since("1.4.0")
+    def getUserCol(self):
+        """
+        Gets the value of userCol or its default value.
+        """
+        return self.getOrDefault(self.userCol)
+
+    @since("1.4.0")
+    def getItemCol(self):
+        """
+        Gets the value of itemCol or its default value.
+        """
+        return self.getOrDefault(self.itemCol)
+
+    @since("2.2.0")
+    def getColdStartStrategy(self):
+        """
+        Gets the value of coldStartStrategy or its default value.
+        """
+        return self.getOrDefault(self.coldStartStrategy)
+
+
+@inherit_doc
+class _ALSParams(_ALSModelParams, HasMaxIter, HasRegParam, HasCheckpointInterval, HasSeed):
+    """
+    Params for :py:class:`ALS`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    rank = Param(Params._dummy(), "rank", "rank of the factorization",
+                 typeConverter=TypeConverters.toInt)
+    numUserBlocks = Param(Params._dummy(), "numUserBlocks", "number of user blocks",
+                          typeConverter=TypeConverters.toInt)
+    numItemBlocks = Param(Params._dummy(), "numItemBlocks", "number of item blocks",
+                          typeConverter=TypeConverters.toInt)
+    implicitPrefs = Param(Params._dummy(), "implicitPrefs", "whether to use implicit preference",
+                          typeConverter=TypeConverters.toBoolean)
+    alpha = Param(Params._dummy(), "alpha", "alpha for implicit preference",
+                  typeConverter=TypeConverters.toFloat)
+
+    ratingCol = Param(Params._dummy(), "ratingCol", "column name for ratings",
+                      typeConverter=TypeConverters.toString)
+    nonnegative = Param(Params._dummy(), "nonnegative",
+                        "whether to use nonnegative constraint for least squares",
+                        typeConverter=TypeConverters.toBoolean)
+    intermediateStorageLevel = Param(Params._dummy(), "intermediateStorageLevel",
+                                     "StorageLevel for intermediate datasets. Cannot be 'NONE'.",
+                                     typeConverter=TypeConverters.toString)
+    finalStorageLevel = Param(Params._dummy(), "finalStorageLevel",
+                              "StorageLevel for ALS model factors.",
+                              typeConverter=TypeConverters.toString)
+
+    @since("1.4.0")
+    def getRank(self):
+        """
+        Gets the value of rank or its default value.
+        """
+        return self.getOrDefault(self.rank)
+
+    @since("1.4.0")
+    def getNumUserBlocks(self):
+        """
+        Gets the value of numUserBlocks or its default value.
+        """
+        return self.getOrDefault(self.numUserBlocks)
+
+    @since("1.4.0")
+    def getNumItemBlocks(self):
+        """
+        Gets the value of numItemBlocks or its default value.
+        """
+        return self.getOrDefault(self.numItemBlocks)
+
+    @since("1.4.0")
+    def getImplicitPrefs(self):
+        """
+        Gets the value of implicitPrefs or its default value.
+        """
+        return self.getOrDefault(self.implicitPrefs)
+
+    @since("1.4.0")
+    def getAlpha(self):
+        """
+        Gets the value of alpha or its default value.
+        """
+        return self.getOrDefault(self.alpha)
+
+    @since("1.4.0")
+    def getRatingCol(self):
+        """
+        Gets the value of ratingCol or its default value.
+        """
+        return self.getOrDefault(self.ratingCol)
+
+    @since("1.4.0")
+    def getNonnegative(self):
+        """
+        Gets the value of nonnegative or its default value.
+        """
+        return self.getOrDefault(self.nonnegative)
+
+    @since("2.0.0")
+    def getIntermediateStorageLevel(self):
+        """
+        Gets the value of intermediateStorageLevel or its default value.
+        """
+        return self.getOrDefault(self.intermediateStorageLevel)
+
+    @since("2.0.0")
+    def getFinalStorageLevel(self):
+        """
+        Gets the value of finalStorageLevel or its default value.
+        """
+        return self.getOrDefault(self.finalStorageLevel)
+
+
+@inherit_doc
+class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable):
     """
     Alternating Least Squares (ALS) matrix factorization.
 
@@ -67,11 +202,37 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
     indicated user preferences rather than explicit ratings given to
     items.
 
+    .. note:: the input rating dataframe to the ALS implementation should be deterministic.
+              Nondeterministic data can cause failure during fitting ALS model.
+              For example, an order-sensitive operation like sampling after a repartition makes
+              dataframe output nondeterministic, like `df.repartition(2).sample(False, 0.5, 1618)`.
+              Checkpointing sampled dataframe or adding a sort before sampling can help make the
+              dataframe deterministic.
+
     >>> df = spark.createDataFrame(
     ...     [(0, 0, 4.0), (0, 1, 2.0), (1, 1, 3.0), (1, 2, 4.0), (2, 1, 1.0), (2, 2, 5.0)],
     ...     ["user", "item", "rating"])
-    >>> als = ALS(rank=10, maxIter=5, seed=0)
+    >>> als = ALS(rank=10, seed=0)
+    >>> als.setMaxIter(5)
+    ALS...
+    >>> als.getMaxIter()
+    5
+    >>> als.setRegParam(0.1)
+    ALS...
+    >>> als.getRegParam()
+    0.1
+    >>> als.clear(als.regParam)
     >>> model = als.fit(df)
+    >>> model.getBlockSize()
+    4096
+    >>> model.getUserCol()
+    'user'
+    >>> model.setUserCol("user")
+    ALSModel...
+    >>> model.getItemCol()
+    'item'
+    >>> model.setPredictionCol("newPrediction")
+    ALS...
     >>> model.rank
     10
     >>> model.userFactors.orderBy("id").collect()
@@ -79,11 +240,11 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
     >>> test = spark.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"])
     >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0])
     >>> predictions[0]
-    Row(user=0, item=2, prediction=0.6929101347923279)
+    Row(user=0, item=2, newPrediction=0.6929101347923279)
     >>> predictions[1]
-    Row(user=1, item=0, prediction=3.47356915473938)
+    Row(user=1, item=0, newPrediction=3.47356915473938)
     >>> predictions[2]
-    Row(user=2, item=0, prediction=-0.8991986513137817)
+    Row(user=2, item=0, newPrediction=-0.8991986513137817)
     >>> user_recs = model.recommendForAllUsers(3)
     >>> user_recs.where(user_recs.user == 0)\
         .select("recommendations.item", "recommendations.rating").collect()
@@ -118,50 +279,18 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
     .. versionadded:: 1.4.0
     """
 
-    rank = Param(Params._dummy(), "rank", "rank of the factorization",
-                 typeConverter=TypeConverters.toInt)
-    numUserBlocks = Param(Params._dummy(), "numUserBlocks", "number of user blocks",
-                          typeConverter=TypeConverters.toInt)
-    numItemBlocks = Param(Params._dummy(), "numItemBlocks", "number of item blocks",
-                          typeConverter=TypeConverters.toInt)
-    implicitPrefs = Param(Params._dummy(), "implicitPrefs", "whether to use implicit preference",
-                          typeConverter=TypeConverters.toBoolean)
-    alpha = Param(Params._dummy(), "alpha", "alpha for implicit preference",
-                  typeConverter=TypeConverters.toFloat)
-    userCol = Param(Params._dummy(), "userCol", "column name for user ids. Ids must be within " +
-                    "the integer value range.", typeConverter=TypeConverters.toString)
-    itemCol = Param(Params._dummy(), "itemCol", "column name for item ids. Ids must be within " +
-                    "the integer value range.", typeConverter=TypeConverters.toString)
-    ratingCol = Param(Params._dummy(), "ratingCol", "column name for ratings",
-                      typeConverter=TypeConverters.toString)
-    nonnegative = Param(Params._dummy(), "nonnegative",
-                        "whether to use nonnegative constraint for least squares",
-                        typeConverter=TypeConverters.toBoolean)
-    intermediateStorageLevel = Param(Params._dummy(), "intermediateStorageLevel",
-                                     "StorageLevel for intermediate datasets. Cannot be 'NONE'.",
-                                     typeConverter=TypeConverters.toString)
-    finalStorageLevel = Param(Params._dummy(), "finalStorageLevel",
-                              "StorageLevel for ALS model factors.",
-                              typeConverter=TypeConverters.toString)
-    coldStartStrategy = Param(Params._dummy(), "coldStartStrategy", "strategy for dealing with " +
-                              "unknown or new users/items at prediction time. This may be useful " +
-                              "in cross-validation or production scenarios, for handling " +
-                              "user/item ids the model has not seen in the training data. " +
-                              "Supported values: 'nan', 'drop'.",
-                              typeConverter=TypeConverters.toString)
-
     @keyword_only
     def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
                  implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
                  ratingCol="rating", nonnegative=False, checkpointInterval=10,
                  intermediateStorageLevel="MEMORY_AND_DISK",
-                 finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan"):
+                 finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan", blockSize=4096):
         """
         __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
                  implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=None, \
                  ratingCol="rating", nonnegative=false, checkpointInterval=10, \
                  intermediateStorageLevel="MEMORY_AND_DISK", \
-                 finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan")
+                 finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan", blockSize=4096)
         """
         super(ALS, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.recommendation.ALS", self.uid)
@@ -169,7 +298,8 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
                          implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item",
                          ratingCol="rating", nonnegative=False, checkpointInterval=10,
                          intermediateStorageLevel="MEMORY_AND_DISK",
-                         finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan")
+                         finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan",
+                         blockSize=4096)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -179,13 +309,13 @@ def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItem
                   implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
                   ratingCol="rating", nonnegative=False, checkpointInterval=10,
                   intermediateStorageLevel="MEMORY_AND_DISK",
-                  finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan"):
+                  finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan", blockSize=4096):
         """
         setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
                  implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None, \
                  ratingCol="rating", nonnegative=False, checkpointInterval=10, \
                  intermediateStorageLevel="MEMORY_AND_DISK", \
-                 finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan")
+                 finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan", blockSize=4096)
         Sets params for ALS.
         """
         kwargs = self._input_kwargs
@@ -201,13 +331,6 @@ def setRank(self, value):
         """
         return self._set(rank=value)
 
-    @since("1.4.0")
-    def getRank(self):
-        """
-        Gets the value of rank or its default value.
-        """
-        return self.getOrDefault(self.rank)
-
     @since("1.4.0")
     def setNumUserBlocks(self, value):
         """
@@ -215,13 +338,6 @@ def setNumUserBlocks(self, value):
         """
         return self._set(numUserBlocks=value)
 
-    @since("1.4.0")
-    def getNumUserBlocks(self):
-        """
-        Gets the value of numUserBlocks or its default value.
-        """
-        return self.getOrDefault(self.numUserBlocks)
-
     @since("1.4.0")
     def setNumItemBlocks(self, value):
         """
@@ -229,13 +345,6 @@ def setNumItemBlocks(self, value):
         """
         return self._set(numItemBlocks=value)
 
-    @since("1.4.0")
-    def getNumItemBlocks(self):
-        """
-        Gets the value of numItemBlocks or its default value.
-        """
-        return self.getOrDefault(self.numItemBlocks)
-
     @since("1.4.0")
     def setNumBlocks(self, value):
         """
@@ -251,13 +360,6 @@ def setImplicitPrefs(self, value):
         """
         return self._set(implicitPrefs=value)
 
-    @since("1.4.0")
-    def getImplicitPrefs(self):
-        """
-        Gets the value of implicitPrefs or its default value.
-        """
-        return self.getOrDefault(self.implicitPrefs)
-
     @since("1.4.0")
     def setAlpha(self, value):
         """
@@ -265,13 +367,6 @@ def setAlpha(self, value):
         """
         return self._set(alpha=value)
 
-    @since("1.4.0")
-    def getAlpha(self):
-        """
-        Gets the value of alpha or its default value.
-        """
-        return self.getOrDefault(self.alpha)
-
     @since("1.4.0")
     def setUserCol(self, value):
         """
@@ -279,13 +374,6 @@ def setUserCol(self, value):
         """
         return self._set(userCol=value)
 
-    @since("1.4.0")
-    def getUserCol(self):
-        """
-        Gets the value of userCol or its default value.
-        """
-        return self.getOrDefault(self.userCol)
-
     @since("1.4.0")
     def setItemCol(self, value):
         """
@@ -293,13 +381,6 @@ def setItemCol(self, value):
         """
         return self._set(itemCol=value)
 
-    @since("1.4.0")
-    def getItemCol(self):
-        """
-        Gets the value of itemCol or its default value.
-        """
-        return self.getOrDefault(self.itemCol)
-
     @since("1.4.0")
     def setRatingCol(self, value):
         """
@@ -307,13 +388,6 @@ def setRatingCol(self, value):
         """
         return self._set(ratingCol=value)
 
-    @since("1.4.0")
-    def getRatingCol(self):
-        """
-        Gets the value of ratingCol or its default value.
-        """
-        return self.getOrDefault(self.ratingCol)
-
     @since("1.4.0")
     def setNonnegative(self, value):
         """
@@ -321,13 +395,6 @@ def setNonnegative(self, value):
         """
         return self._set(nonnegative=value)
 
-    @since("1.4.0")
-    def getNonnegative(self):
-        """
-        Gets the value of nonnegative or its default value.
-        """
-        return self.getOrDefault(self.nonnegative)
-
     @since("2.0.0")
     def setIntermediateStorageLevel(self, value):
         """
@@ -335,13 +402,6 @@ def setIntermediateStorageLevel(self, value):
         """
         return self._set(intermediateStorageLevel=value)
 
-    @since("2.0.0")
-    def getIntermediateStorageLevel(self):
-        """
-        Gets the value of intermediateStorageLevel or its default value.
-        """
-        return self.getOrDefault(self.intermediateStorageLevel)
-
     @since("2.0.0")
     def setFinalStorageLevel(self, value):
         """
@@ -349,13 +409,6 @@ def setFinalStorageLevel(self, value):
         """
         return self._set(finalStorageLevel=value)
 
-    @since("2.0.0")
-    def getFinalStorageLevel(self):
-        """
-        Gets the value of finalStorageLevel or its default value.
-        """
-        return self.getOrDefault(self.finalStorageLevel)
-
     @since("2.2.0")
     def setColdStartStrategy(self, value):
         """
@@ -363,21 +416,86 @@ def setColdStartStrategy(self, value):
         """
         return self._set(coldStartStrategy=value)
 
-    @since("2.2.0")
-    def getColdStartStrategy(self):
+    def setMaxIter(self, value):
         """
-        Gets the value of coldStartStrategy or its default value.
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self.getOrDefault(self.coldStartStrategy)
+        return self._set(maxIter=value)
 
+    def setRegParam(self, value):
+        """
+        Sets the value of :py:attr:`regParam`.
+        """
+        return self._set(regParam=value)
 
-class ALSModel(JavaModel, JavaMLWritable, JavaMLReadable):
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        return self._set(checkpointInterval=value)
+
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("3.0.0")
+    def setBlockSize(self, value):
+        """
+        Sets the value of :py:attr:`blockSize`.
+        """
+        return self._set(blockSize=value)
+
+
+class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable):
     """
     Model fitted by ALS.
 
     .. versionadded:: 1.4.0
     """
 
+    @since("3.0.0")
+    def setUserCol(self, value):
+        """
+        Sets the value of :py:attr:`userCol`.
+        """
+        return self._set(userCol=value)
+
+    @since("3.0.0")
+    def setItemCol(self, value):
+        """
+        Sets the value of :py:attr:`itemCol`.
+        """
+        return self._set(itemCol=value)
+
+    @since("3.0.0")
+    def setColdStartStrategy(self, value):
+        """
+        Sets the value of :py:attr:`coldStartStrategy`.
+        """
+        return self._set(coldStartStrategy=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    @since("3.0.0")
+    def setBlockSize(self, value):
+        """
+        Sets the value of :py:attr:`blockSize`.
+        """
+        return self._set(blockSize=value)
+
     @property
     @since("1.4.0")
     def rank(self):
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 2d1d1272c17f8..a4c97827d55de 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -19,8 +19,12 @@
 
 from pyspark import since, keyword_only
 from pyspark.ml.param.shared import *
+from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
+    _TreeEnsembleModel, _TreeEnsembleParams, _RandomForestParams, _GBTParams, \
+    _HasVarianceImpurity, _TreeRegressorParams
 from pyspark.ml.util import *
-from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaWrapper
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, \
+    JavaPredictor, JavaPredictionModel, _JavaPredictorParams, JavaWrapper
 from pyspark.ml.common import inherit_doc
 from pyspark.sql import DataFrame
 
@@ -33,14 +37,58 @@
            'IsotonicRegression', 'IsotonicRegressionModel',
            'LinearRegression', 'LinearRegressionModel',
            'LinearRegressionSummary', 'LinearRegressionTrainingSummary',
-           'RandomForestRegressor', 'RandomForestRegressionModel']
+           'RandomForestRegressor', 'RandomForestRegressionModel',
+           'FMRegressor', 'FMRegressionModel']
+
+
+class JavaRegressor(JavaPredictor, _JavaPredictorParams):
+    """
+    Java Regressor for regression tasks.
+
+    .. versionadded:: 3.0.0
+    """
+    pass
+
+
+class JavaRegressionModel(JavaPredictionModel, _JavaPredictorParams):
+    """
+    Java Model produced by a ``_JavaRegressor``.
+    To be mixed in with class:`pyspark.ml.JavaModel`
+
+    .. versionadded:: 3.0.0
+    """
+    pass
+
+
+class _LinearRegressionParams(_JavaPredictorParams, HasRegParam, HasElasticNetParam, HasMaxIter,
+                              HasTol, HasFitIntercept, HasStandardization, HasWeightCol, HasSolver,
+                              HasAggregationDepth, HasLoss):
+    """
+    Params for :py:class:`LinearRegression` and :py:class:`LinearRegressionModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
+                   "options: auto, normal, l-bfgs.", typeConverter=TypeConverters.toString)
+
+    loss = Param(Params._dummy(), "loss", "The loss function to be optimized. Supported " +
+                 "options: squaredError, huber.", typeConverter=TypeConverters.toString)
+
+    epsilon = Param(Params._dummy(), "epsilon", "The shape parameter to control the amount of " +
+                    "robustness. Must be > 1.0. Only valid when loss is huber",
+                    typeConverter=TypeConverters.toFloat)
+
+    @since("2.3.0")
+    def getEpsilon(self):
+        """
+        Gets the value of epsilon or its default value.
+        """
+        return self.getOrDefault(self.epsilon)
 
 
 @inherit_doc
-class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
-                       HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
-                       HasStandardization, HasSolver, HasWeightCol, HasAggregationDepth, HasLoss,
-                       JavaMLWritable, JavaMLReadable):
+class LinearRegression(JavaRegressor, _LinearRegressionParams, JavaMLWritable, JavaMLReadable):
     """
     Linear regression.
 
@@ -64,17 +112,35 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
     >>> df = spark.createDataFrame([
     ...     (1.0, 2.0, Vectors.dense(1.0)),
     ...     (0.0, 2.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"])
-    >>> lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", weightCol="weight")
+    >>> lr = LinearRegression(regParam=0.0, solver="normal", weightCol="weight")
+    >>> lr.setMaxIter(5)
+    LinearRegression...
+    >>> lr.getMaxIter()
+    5
+    >>> lr.setRegParam(0.1)
+    LinearRegression...
+    >>> lr.getRegParam()
+    0.1
+    >>> lr.setRegParam(0.0)
+    LinearRegression...
     >>> model = lr.fit(df)
+    >>> model.setFeaturesCol("features")
+    LinearRegressionModel...
+    >>> model.setPredictionCol("newPrediction")
+    LinearRegressionModel...
+    >>> model.getMaxIter()
+    5
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
-    >>> abs(model.transform(test0).head().prediction - (-1.0)) < 0.001
+    >>> abs(model.predict(test0.head().features) - (-1.0)) < 0.001
+    True
+    >>> abs(model.transform(test0).head().newPrediction - (-1.0)) < 0.001
     True
     >>> abs(model.coefficients[0] - 1.0) < 0.001
     True
     >>> abs(model.intercept - 0.0) < 0.001
     True
     >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
-    >>> abs(model.transform(test1).head().prediction - 1.0) < 0.001
+    >>> abs(model.transform(test1).head().newPrediction - 1.0) < 0.001
     True
     >>> lr.setParams("vector")
     Traceback (most recent call last):
@@ -99,16 +165,6 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
     .. versionadded:: 1.4.0
     """
 
-    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
-                   "options: auto, normal, l-bfgs.", typeConverter=TypeConverters.toString)
-
-    loss = Param(Params._dummy(), "loss", "The loss function to be optimized. Supported " +
-                 "options: squaredError, huber.", typeConverter=TypeConverters.toString)
-
-    epsilon = Param(Params._dummy(), "epsilon", "The shape parameter to control the amount of " +
-                    "robustness. Must be > 1.0. Only valid when loss is huber",
-                    typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
@@ -153,16 +209,69 @@ def setEpsilon(self, value):
         """
         return self._set(epsilon=value)
 
-    @since("2.3.0")
-    def getEpsilon(self):
+    def setMaxIter(self, value):
         """
-        Gets the value of epsilon or its default value.
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self.getOrDefault(self.epsilon)
+        return self._set(maxIter=value)
+
+    def setRegParam(self, value):
+        """
+        Sets the value of :py:attr:`regParam`.
+        """
+        return self._set(regParam=value)
+
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        return self._set(tol=value)
+
+    def setElasticNetParam(self, value):
+        """
+        Sets the value of :py:attr:`elasticNetParam`.
+        """
+        return self._set(elasticNetParam=value)
+
+    def setFitIntercept(self, value):
+        """
+        Sets the value of :py:attr:`fitIntercept`.
+        """
+        return self._set(fitIntercept=value)
+
+    def setStandardization(self, value):
+        """
+        Sets the value of :py:attr:`standardization`.
+        """
+        return self._set(standardization=value)
+
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
+
+    def setSolver(self, value):
+        """
+        Sets the value of :py:attr:`solver`.
+        """
+        return self._set(solver=value)
+
+    def setAggregationDepth(self, value):
+        """
+        Sets the value of :py:attr:`aggregationDepth`.
+        """
+        return self._set(aggregationDepth=value)
+
+    def setLoss(self, value):
+        """
+        Sets the value of :py:attr:`loss`.
+        """
+        return self._set(lossType=value)
 
 
-class LinearRegressionModel(JavaModel, JavaPredictionModel, GeneralJavaMLWritable, JavaMLReadable,
-                            HasTrainingSummary):
+class LinearRegressionModel(JavaRegressionModel, _LinearRegressionParams, GeneralJavaMLWritable,
+                            JavaMLReadable, HasTrainingSummary):
     """
     Model fitted by :class:`LinearRegression`.
 
@@ -459,9 +568,38 @@ def totalIterations(self):
         return self._call_java("totalIterations")
 
 
+class _IsotonicRegressionParams(HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol):
+    """
+    Params for :py:class:`IsotonicRegression` and :py:class:`IsotonicRegressionModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    isotonic = Param(
+        Params._dummy(), "isotonic",
+        "whether the output sequence should be isotonic/increasing (true) or" +
+        "antitonic/decreasing (false).", typeConverter=TypeConverters.toBoolean)
+    featureIndex = Param(
+        Params._dummy(), "featureIndex",
+        "The index of the feature if featuresCol is a vector column, no effect otherwise.",
+        typeConverter=TypeConverters.toInt)
+
+    def getIsotonic(self):
+        """
+        Gets the value of isotonic or its default value.
+        """
+        return self.getOrDefault(self.isotonic)
+
+    def getFeatureIndex(self):
+        """
+        Gets the value of featureIndex or its default value.
+        """
+        return self.getOrDefault(self.featureIndex)
+
+
 @inherit_doc
-class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                         HasWeightCol, JavaMLWritable, JavaMLReadable):
+class IsotonicRegression(JavaEstimator, _IsotonicRegressionParams, HasWeightCol,
+                         JavaMLWritable, JavaMLReadable):
     """
     Currently implemented using parallelized pool adjacent violators algorithm.
     Only univariate (single feature) algorithm supported.
@@ -472,9 +610,15 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
     >>> ir = IsotonicRegression()
     >>> model = ir.fit(df)
+    >>> model.setFeaturesCol("features")
+    IsotonicRegressionModel...
+    >>> model.numFeatures
+    1
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
     >>> model.transform(test0).head().prediction
     0.0
+    >>> model.predict(test0.head().features[model.getFeatureIndex()])
+    0.0
     >>> model.boundaries
     DenseVector([0.0, 1.0])
     >>> ir_path = temp_path + "/ir"
@@ -492,16 +636,6 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
 
     .. versionadded:: 1.6.0
     """
-
-    isotonic = \
-        Param(Params._dummy(), "isotonic",
-              "whether the output sequence should be isotonic/increasing (true) or" +
-              "antitonic/decreasing (false).", typeConverter=TypeConverters.toBoolean)
-    featureIndex = \
-        Param(Params._dummy(), "featureIndex",
-              "The index of the feature if featuresCol is a vector column, no effect otherwise.",
-              typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  weightCol=None, isotonic=True, featureIndex=0):
@@ -536,278 +670,115 @@ def setIsotonic(self, value):
         """
         return self._set(isotonic=value)
 
-    def getIsotonic(self):
-        """
-        Gets the value of isotonic or its default value.
-        """
-        return self.getOrDefault(self.isotonic)
-
     def setFeatureIndex(self, value):
         """
         Sets the value of :py:attr:`featureIndex`.
         """
         return self._set(featureIndex=value)
 
-    def getFeatureIndex(self):
-        """
-        Gets the value of featureIndex or its default value.
-        """
-        return self.getOrDefault(self.featureIndex)
-
-
-class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
-    """
-    Model fitted by :class:`IsotonicRegression`.
-
-    .. versionadded:: 1.6.0
-    """
-
-    @property
     @since("1.6.0")
-    def boundaries(self):
+    def setFeaturesCol(self, value):
         """
-        Boundaries in increasing order for which predictions are known.
+        Sets the value of :py:attr:`featuresCol`.
         """
-        return self._call_java("boundaries")
+        return self._set(featuresCol=value)
 
-    @property
     @since("1.6.0")
-    def predictions(self):
-        """
-        Predictions associated with the boundaries at the same index, monotone because of isotonic
-        regression.
-        """
-        return self._call_java("predictions")
-
-
-class DecisionTreeParams(Params):
-    """
-    Mixin for Decision Tree parameters.
-    """
-
-    leafCol = Param(Params._dummy(), "leafCol", "Leaf indices column name. Predicted leaf " +
-                    "index of each instance in each tree by preorder.",
-                    typeConverter=TypeConverters.toString)
-
-    maxDepth = Param(Params._dummy(), "maxDepth", "Maximum depth of the tree. (>= 0) E.g., " +
-                     "depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.",
-                     typeConverter=TypeConverters.toInt)
-
-    maxBins = Param(Params._dummy(), "maxBins", "Max number of bins for discretizing continuous " +
-                    "features.  Must be >=2 and >= number of categories for any categorical " +
-                    "feature.", typeConverter=TypeConverters.toInt)
-
-    minInstancesPerNode = Param(Params._dummy(), "minInstancesPerNode", "Minimum number of " +
-                                "instances each child must have after split. If a split causes " +
-                                "the left or right child to have fewer than " +
-                                "minInstancesPerNode, the split will be discarded as invalid. " +
-                                "Should be >= 1.", typeConverter=TypeConverters.toInt)
-
-    minInfoGain = Param(Params._dummy(), "minInfoGain", "Minimum information gain for a split " +
-                        "to be considered at a tree node.", typeConverter=TypeConverters.toFloat)
-
-    maxMemoryInMB = Param(Params._dummy(), "maxMemoryInMB", "Maximum memory in MB allocated to " +
-                          "histogram aggregation. If too small, then 1 node will be split per " +
-                          "iteration, and its aggregates may exceed this size.",
-                          typeConverter=TypeConverters.toInt)
-
-    cacheNodeIds = Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass " +
-                         "trees to executors to match instances with nodes. If true, the " +
-                         "algorithm will cache node IDs for each instance. Caching can speed " +
-                         "up training of deeper trees. Users can set how often should the cache " +
-                         "be checkpointed or disable it by setting checkpointInterval.",
-                         typeConverter=TypeConverters.toBoolean)
-
-    def __init__(self):
-        super(DecisionTreeParams, self).__init__()
-
-    def setLeafCol(self, value):
+    def setPredictionCol(self, value):
         """
-        Sets the value of :py:attr:`leafCol`.
+        Sets the value of :py:attr:`predictionCol`.
         """
-        return self._set(leafCol=value)
+        return self._set(predictionCol=value)
 
-    def getLeafCol(self):
+    @since("1.6.0")
+    def setLabelCol(self, value):
         """
-        Gets the value of leafCol or its default value.
+        Sets the value of :py:attr:`labelCol`.
         """
-        return self.getOrDefault(self.leafCol)
+        return self._set(labelCol=value)
 
-    def getMaxDepth(self):
+    @since("1.6.0")
+    def setWeightCol(self, value):
         """
-        Gets the value of maxDepth or its default value.
+        Sets the value of :py:attr:`weightCol`.
         """
-        return self.getOrDefault(self.maxDepth)
+        return self._set(weightCol=value)
 
-    def getMaxBins(self):
-        """
-        Gets the value of maxBins or its default value.
-        """
-        return self.getOrDefault(self.maxBins)
 
-    def getMinInstancesPerNode(self):
-        """
-        Gets the value of minInstancesPerNode or its default value.
-        """
-        return self.getOrDefault(self.minInstancesPerNode)
+class IsotonicRegressionModel(JavaModel, _IsotonicRegressionParams, JavaMLWritable,
+                              JavaMLReadable):
+    """
+    Model fitted by :class:`IsotonicRegression`.
 
-    def getMinInfoGain(self):
-        """
-        Gets the value of minInfoGain or its default value.
-        """
-        return self.getOrDefault(self.minInfoGain)
+    .. versionadded:: 1.6.0
+    """
 
-    def getMaxMemoryInMB(self):
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
         """
-        Gets the value of maxMemoryInMB or its default value.
+        Sets the value of :py:attr:`featuresCol`.
         """
-        return self.getOrDefault(self.maxMemoryInMB)
+        return self._set(featuresCol=value)
 
-    def getCacheNodeIds(self):
+    @since("3.0.0")
+    def setPredictionCol(self, value):
         """
-        Gets the value of cacheNodeIds or its default value.
+        Sets the value of :py:attr:`predictionCol`.
         """
-        return self.getOrDefault(self.cacheNodeIds)
-
-
-class TreeEnsembleParams(DecisionTreeParams):
-    """
-    Mixin for Decision Tree-based ensemble algorithms parameters.
-    """
-
-    subsamplingRate = Param(Params._dummy(), "subsamplingRate", "Fraction of the training data " +
-                            "used for learning each decision tree, in range (0, 1].",
-                            typeConverter=TypeConverters.toFloat)
+        return self._set(predictionCol=value)
 
-    supportedFeatureSubsetStrategies = ["auto", "all", "onethird", "sqrt", "log2"]
-
-    featureSubsetStrategy = \
-        Param(Params._dummy(), "featureSubsetStrategy",
-              "The number of features to consider for splits at each tree node. Supported " +
-              "options: 'auto' (choose automatically for task: If numTrees == 1, set to " +
-              "'all'. If numTrees > 1 (forest), set to 'sqrt' for classification and to " +
-              "'onethird' for regression), 'all' (use all features), 'onethird' (use " +
-              "1/3 of the features), 'sqrt' (use sqrt(number of features)), 'log2' (use " +
-              "log2(number of features)), 'n' (when n is in the range (0, 1.0], use " +
-              "n * number of features. When n is in the range (1, number of features), use" +
-              " n features). default = 'auto'", typeConverter=TypeConverters.toString)
-
-    def __init__(self):
-        super(TreeEnsembleParams, self).__init__()
-
-    @since("1.4.0")
-    def getSubsamplingRate(self):
+    def setFeatureIndex(self, value):
         """
-        Gets the value of subsamplingRate or its default value.
+        Sets the value of :py:attr:`featureIndex`.
         """
-        return self.getOrDefault(self.subsamplingRate)
+        return self._set(featureIndex=value)
 
-    @since("1.4.0")
-    def getFeatureSubsetStrategy(self):
+    @property
+    @since("1.6.0")
+    def boundaries(self):
         """
-        Gets the value of featureSubsetStrategy or its default value.
+        Boundaries in increasing order for which predictions are known.
         """
-        return self.getOrDefault(self.featureSubsetStrategy)
-
-
-class HasVarianceImpurity(Params):
-    """
-    Private class to track supported impurity measures.
-    """
-
-    supportedImpurities = ["variance"]
-
-    impurity = Param(Params._dummy(), "impurity",
-                     "Criterion used for information gain calculation (case-insensitive). " +
-                     "Supported options: " +
-                     ", ".join(supportedImpurities), typeConverter=TypeConverters.toString)
-
-    def __init__(self):
-        super(HasVarianceImpurity, self).__init__()
+        return self._call_java("boundaries")
 
-    @since("1.4.0")
-    def getImpurity(self):
+    @property
+    @since("1.6.0")
+    def predictions(self):
         """
-        Gets the value of impurity or its default value.
+        Predictions associated with the boundaries at the same index, monotone because of isotonic
+        regression.
         """
-        return self.getOrDefault(self.impurity)
-
-
-class TreeRegressorParams(HasVarianceImpurity):
-    pass
-
-
-class RandomForestParams(TreeEnsembleParams):
-    """
-    Private class to track supported random forest parameters.
-    """
-
-    numTrees = Param(Params._dummy(), "numTrees", "Number of trees to train (>= 1).",
-                     typeConverter=TypeConverters.toInt)
-
-    def __init__(self):
-        super(RandomForestParams, self).__init__()
+        return self._call_java("predictions")
 
-    @since("1.4.0")
-    def getNumTrees(self):
+    @property
+    @since("3.0.0")
+    def numFeatures(self):
         """
-        Gets the value of numTrees or its default value.
+        Returns the number of features the model was trained on. If unknown, returns -1
         """
-        return self.getOrDefault(self.numTrees)
-
-
-class GBTParams(TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol):
-    """
-    Private class to track supported GBT params.
-    """
-
-    stepSize = Param(Params._dummy(), "stepSize",
-                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
-                     "the contribution of each estimator.",
-                     typeConverter=TypeConverters.toFloat)
-
-    validationTol = Param(Params._dummy(), "validationTol",
-                          "Threshold for stopping early when fit with validation is used. " +
-                          "If the error rate on the validation input changes by less than the " +
-                          "validationTol, then learning will stop early (before `maxIter`). " +
-                          "This parameter is ignored when fit without validation is used.",
-                          typeConverter=TypeConverters.toFloat)
+        return self._call_java("numFeatures")
 
     @since("3.0.0")
-    def getValidationTol(self):
+    def predict(self, value):
         """
-        Gets the value of validationTol or its default value.
+        Predict label for the given features.
         """
-        return self.getOrDefault(self.validationTol)
+        return self._call_java("predict", value)
 
 
-class GBTRegressorParams(GBTParams, TreeRegressorParams):
+class _DecisionTreeRegressorParams(_DecisionTreeParams, _TreeRegressorParams, HasVarianceCol):
     """
-    Private class to track supported GBTRegressor params.
+    Params for :py:class:`DecisionTreeRegressor` and :py:class:`DecisionTreeRegressionModel`.
 
     .. versionadded:: 3.0.0
     """
 
-    supportedLossTypes = ["squared", "absolute"]
-
-    lossType = Param(Params._dummy(), "lossType",
-                     "Loss function which GBT tries to minimize (case-insensitive). " +
-                     "Supported options: " + ", ".join(supportedLossTypes),
-                     typeConverter=TypeConverters.toString)
-
-    @since("1.4.0")
-    def getLossType(self):
-        """
-        Gets the value of lossType or its default value.
-        """
-        return self.getOrDefault(self.lossType)
+    pass
 
 
 @inherit_doc
-class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeightCol,
-                            HasPredictionCol, DecisionTreeParams, TreeRegressorParams,
-                            HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable,
-                            HasVarianceCol):
+class DecisionTreeRegressor(JavaRegressor, _DecisionTreeRegressorParams, JavaMLWritable,
+                            JavaMLReadable):
     """
     `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_
     learning algorithm for regression.
@@ -817,8 +788,14 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeigh
     >>> df = spark.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
-    >>> dt = DecisionTreeRegressor(maxDepth=2, varianceCol="variance", leafCol="leafId")
+    >>> dt = DecisionTreeRegressor(maxDepth=2)
+    >>> dt.setVarianceCol("variance")
+    DecisionTreeRegressor...
     >>> model = dt.fit(df)
+    >>> model.getVarianceCol()
+    'variance'
+    >>> model.setLeafCol("leafId")
+    DecisionTreeRegressionModel...
     >>> model.depth
     1
     >>> model.numNodes
@@ -828,9 +805,13 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeigh
     >>> model.numFeatures
     1
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
+    >>> model.predictLeaf(test0.head().features)
+    0.0
     >>> result.leafId
     0.0
     >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
@@ -858,7 +839,7 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeigh
     >>> dt3 = DecisionTreeRegressor(maxDepth=2, weightCol="weight", varianceCol="variance")
     >>> model3 = dt3.fit(df3)
     >>> print(model3.toDebugString)
-    DecisionTreeRegressionModel (uid=...) of depth 1 with 3 nodes...
+    DecisionTreeRegressionModel...depth=1, numNodes=3...
 
     .. versionadded:: 1.4.0
     """
@@ -867,20 +848,21 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeigh
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
-                 seed=None, varianceCol=None, weightCol=None, leafCol=""):
+                 seed=None, varianceCol=None, weightCol=None, leafCol="",
+                 minWeightFractionPerNode=0.0):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                  impurity="variance", seed=None, varianceCol=None, weightCol=None, \
-                 leafCol="")
+                 leafCol="", minWeightFractionPerNode=0.0)
         """
         super(DecisionTreeRegressor, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.DecisionTreeRegressor", self.uid)
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
-                         impurity="variance", leafCol="")
+                         impurity="variance", leafCol="", minWeightFractionPerNode=0.0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -890,13 +872,13 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   impurity="variance", seed=None, varianceCol=None, weightCol=None,
-                  leafCol=""):
+                  leafCol="", minWeightFractionPerNode=0.0):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   impurity="variance", seed=None, varianceCol=None, weightCol=None, \
-                  leafCol="")
+                  leafCol="", minWeightFractionPerNode=0.0)
         Sets params for the DecisionTreeRegressor.
         """
         kwargs = self._input_kwargs
@@ -905,36 +887,49 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
     def _create_model(self, java_model):
         return DecisionTreeRegressionModel(java_model)
 
+    @since("1.4.0")
     def setMaxDepth(self, value):
         """
         Sets the value of :py:attr:`maxDepth`.
         """
         return self._set(maxDepth=value)
 
+    @since("1.4.0")
     def setMaxBins(self, value):
         """
         Sets the value of :py:attr:`maxBins`.
         """
         return self._set(maxBins=value)
 
+    @since("1.4.0")
     def setMinInstancesPerNode(self, value):
         """
         Sets the value of :py:attr:`minInstancesPerNode`.
         """
         return self._set(minInstancesPerNode=value)
 
+    @since("3.0.0")
+    def setMinWeightFractionPerNode(self, value):
+        """
+        Sets the value of :py:attr:`minWeightFractionPerNode`.
+        """
+        return self._set(minWeightFractionPerNode=value)
+
+    @since("1.4.0")
     def setMinInfoGain(self, value):
         """
         Sets the value of :py:attr:`minInfoGain`.
         """
         return self._set(minInfoGain=value)
 
+    @since("1.4.0")
     def setMaxMemoryInMB(self, value):
         """
         Sets the value of :py:attr:`maxMemoryInMB`.
         """
         return self._set(maxMemoryInMB=value)
 
+    @since("1.4.0")
     def setCacheNodeIds(self, value):
         """
         Sets the value of :py:attr:`cacheNodeIds`.
@@ -948,87 +943,52 @@ def setImpurity(self, value):
         """
         return self._set(impurity=value)
 
+    @since("1.4.0")
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        return self._set(checkpointInterval=value)
 
-@inherit_doc
-class DecisionTreeModel(JavaModel, JavaPredictionModel):
-    """
-    Abstraction for Decision Tree models.
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
 
-    .. versionadded:: 1.5.0
-    """
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
 
-    @property
-    @since("1.5.0")
-    def numNodes(self):
-        """Return number of nodes of the decision tree."""
-        return self._call_java("numNodes")
-
-    @property
-    @since("1.5.0")
-    def depth(self):
-        """Return depth of the decision tree."""
-        return self._call_java("depth")
-
-    @property
-    @since("2.0.0")
-    def toDebugString(self):
-        """Full description of model."""
-        return self._call_java("toDebugString")
-
-    def __repr__(self):
-        return self._call_java("toString")
+    @since("2.0.0")
+    def setVarianceCol(self, value):
+        """
+        Sets the value of :py:attr:`varianceCol`.
+        """
+        return self._set(varianceCol=value)
 
 
 @inherit_doc
-class TreeEnsembleModel(JavaModel):
-    """
-    (private abstraction)
-
-    Represents a tree ensemble model.
-    """
-
-    @property
-    @since("2.0.0")
-    def trees(self):
-        """Trees in this ensemble. Warning: These have null parent Estimators."""
-        return [DecisionTreeModel(m) for m in list(self._call_java("trees"))]
-
-    @property
-    @since("2.0.0")
-    def getNumTrees(self):
-        """Number of trees in ensemble."""
-        return self._call_java("getNumTrees")
-
-    @property
-    @since("1.5.0")
-    def treeWeights(self):
-        """Return the weights for each tree"""
-        return list(self._call_java("javaTreeWeights"))
-
-    @property
-    @since("2.0.0")
-    def totalNumNodes(self):
-        """Total number of nodes, summed over all trees in the ensemble."""
-        return self._call_java("totalNumNodes")
-
-    @property
-    @since("2.0.0")
-    def toDebugString(self):
-        """Full description of model."""
-        return self._call_java("toDebugString")
-
-    def __repr__(self):
-        return self._call_java("toString")
-
-
-@inherit_doc
-class DecisionTreeRegressionModel(DecisionTreeModel, JavaMLWritable, JavaMLReadable):
+class DecisionTreeRegressionModel(
+    JavaRegressionModel, _DecisionTreeModel, _DecisionTreeRegressorParams,
+    JavaMLWritable, JavaMLReadable
+):
     """
     Model fitted by :class:`DecisionTreeRegressor`.
 
     .. versionadded:: 1.4.0
     """
 
+    @since("3.0.0")
+    def setVarianceCol(self, value):
+        """
+        Sets the value of :py:attr:`varianceCol`.
+        """
+        return self._set(varianceCol=value)
+
     @property
     @since("2.0.0")
     def featureImportances(self):
@@ -1051,10 +1011,18 @@ def featureImportances(self):
         return self._call_java("featureImportances")
 
 
+class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams):
+    """
+    Params for :py:class:`RandomForestRegressor` and :py:class:`RandomForestRegressionModel`.
+
+    .. versionadded:: 3.0.0
+    """
+    pass
+
+
 @inherit_doc
-class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
-                            RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
-                            JavaMLWritable, JavaMLReadable):
+class RandomForestRegressor(JavaRegressor, _RandomForestRegressorParams, JavaMLWritable,
+                            JavaMLReadable):
     """
     `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
     learning algorithm for regression.
@@ -1065,13 +1033,27 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     >>> df = spark.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
-    >>> rf = RandomForestRegressor(numTrees=2, maxDepth=2, seed=42, leafCol="leafId")
+    >>> rf = RandomForestRegressor(numTrees=2, maxDepth=2)
+    >>> rf.getMinWeightFractionPerNode()
+    0.0
+    >>> rf.setSeed(42)
+    RandomForestRegressor...
     >>> model = rf.fit(df)
+    >>> model.getBootstrap()
+    True
+    >>> model.getSeed()
+    42
+    >>> model.setLeafCol("leafId")
+    RandomForestRegressionModel...
     >>> model.featureImportances
     SparseVector(1, {0: 1.0})
     >>> allclose(model.treeWeights, [1.0, 1.0])
     True
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
+    >>> model.predictLeaf(test0.head().features)
+    DenseVector([0.0, 0.0])
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
@@ -1080,7 +1062,7 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     >>> model.numFeatures
     1
     >>> model.trees
-    [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...]
+    [DecisionTreeRegressionModel...depth=..., DecisionTreeRegressionModel...]
     >>> model.getNumTrees
     2
     >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
@@ -1105,13 +1087,15 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                  impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
-                 featureSubsetStrategy="auto", leafCol=""):
+                 featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0,
+                 weightCol=None, bootstrap=True):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                  impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
-                 featureSubsetStrategy="auto", leafCol="")
+                 featureSubsetStrategy="auto", leafCol=", minWeightFractionPerNode=0.0", \
+                 weightCol=None, bootstrap=True)
         """
         super(RandomForestRegressor, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1119,7 +1103,8 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                          maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                          impurity="variance", subsamplingRate=1.0, numTrees=20,
-                         featureSubsetStrategy="auto", leafCol="")
+                         featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0,
+                         bootstrap=True)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1129,13 +1114,15 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
-                  featureSubsetStrategy="auto", leafCol=""):
+                  featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0,
+                  weightCol=None, bootstrap=True):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
-                  featureSubsetStrategy="auto", leafCol="")
+                  featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0, \
+                  weightCol=None, bootstrap=True)
         Sets params for linear regression.
         """
         kwargs = self._input_kwargs
@@ -1194,6 +1181,13 @@ def setNumTrees(self, value):
         """
         return self._set(numTrees=value)
 
+    @since("3.0.0")
+    def setBootstrap(self, value):
+        """
+        Sets the value of :py:attr:`bootstrap`.
+        """
+        return self._set(bootstrap=value)
+
     @since("1.4.0")
     def setSubsamplingRate(self, value):
         """
@@ -1208,9 +1202,37 @@ def setFeatureSubsetStrategy(self, value):
         """
         return self._set(featureSubsetStrategy=value)
 
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        return self._set(checkpointInterval=value)
+
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
 
-class RandomForestRegressionModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable,
-                                  JavaMLReadable):
+    @since("3.0.0")
+    def setMinWeightFractionPerNode(self, value):
+        """
+        Sets the value of :py:attr:`minWeightFractionPerNode`.
+        """
+        return self._set(minWeightFractionPerNode=value)
+
+
+class RandomForestRegressionModel(
+    JavaRegressionModel, _TreeEnsembleModel, _RandomForestRegressorParams,
+    JavaMLWritable, JavaMLReadable
+):
     """
     Model fitted by :class:`RandomForestRegressor`.
 
@@ -1239,10 +1261,30 @@ def featureImportances(self):
         return self._call_java("featureImportances")
 
 
+class _GBTRegressorParams(_GBTParams, _TreeRegressorParams):
+    """
+    Params for :py:class:`GBTRegressor` and :py:class:`GBTRegressorModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    supportedLossTypes = ["squared", "absolute"]
+
+    lossType = Param(Params._dummy(), "lossType",
+                     "Loss function which GBT tries to minimize (case-insensitive). " +
+                     "Supported options: " + ", ".join(supportedLossTypes),
+                     typeConverter=TypeConverters.toString)
+
+    @since("1.4.0")
+    def getLossType(self):
+        """
+        Gets the value of lossType or its default value.
+        """
+        return self.getOrDefault(self.lossType)
+
+
 @inherit_doc
-class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                   GBTRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
-                   JavaMLReadable):
+class GBTRegressor(JavaRegressor, _GBTRegressorParams, JavaMLWritable, JavaMLReadable):
     """
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
     learning algorithm for regression.
@@ -1253,7 +1295,13 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
     >>> df = spark.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
-    >>> gbt = GBTRegressor(maxIter=5, maxDepth=2, seed=42, leafCol="leafId")
+    >>> gbt = GBTRegressor(maxDepth=2, seed=42, leafCol="leafId")
+    >>> gbt.setMaxIter(5)
+    GBTRegressor...
+    >>> gbt.setMinWeightFractionPerNode(0.049)
+    GBTRegressor...
+    >>> gbt.getMaxIter()
+    5
     >>> print(gbt.getImpurity())
     variance
     >>> print(gbt.getFeatureSubsetStrategy())
@@ -1266,6 +1314,10 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
     >>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
     True
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
+    >>> model.predictLeaf(test0.head().features)
+    DenseVector([0.0, 0.0, 0.0, 0.0, 0.0])
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
@@ -1287,7 +1339,7 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
     >>> model.treeWeights == model2.treeWeights
     True
     >>> model.trees
-    [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...]
+    [DecisionTreeRegressionModel...depth=..., DecisionTreeRegressionModel...]
     >>> validation = spark.createDataFrame([(0.0, Vectors.dense(-1.0))],
     ...              ["label", "features"])
     >>> model.evaluateEachIteration(validation, "squared")
@@ -1307,14 +1359,16 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                  checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
                  impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
-                 validationIndicatorCol=None, leafCol=""):
+                 validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0,
+                 weightCol=None):
         """
         __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
                  checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
                  impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \
-                 validationIndicatorCol=None, leafCol="")
+                 validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0,
+                 weightCol=None)
         """
         super(GBTRegressor, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid)
@@ -1322,7 +1376,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                          maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                          checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1,
                          impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
-                         leafCol="")
+                         leafCol="", minWeightFractionPerNode=0.0)
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
@@ -1333,14 +1387,16 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                   checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
                   impuriy="variance", featureSubsetStrategy="all", validationTol=0.01,
-                  validationIndicatorCol=None, leafCol=""):
+                  validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0,
+                  weightCol=None):
         """
         setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
                   checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
                   impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \
-                  validationIndicatorCol=None, leafCol="")
+                  validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0, \
+                  weightCol=None)
         Sets params for Gradient Boosted Tree Regression.
         """
         kwargs = self._input_kwargs
@@ -1349,36 +1405,42 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
     def _create_model(self, java_model):
         return GBTRegressionModel(java_model)
 
+    @since("1.4.0")
     def setMaxDepth(self, value):
         """
         Sets the value of :py:attr:`maxDepth`.
         """
         return self._set(maxDepth=value)
 
+    @since("1.4.0")
     def setMaxBins(self, value):
         """
         Sets the value of :py:attr:`maxBins`.
         """
         return self._set(maxBins=value)
 
+    @since("1.4.0")
     def setMinInstancesPerNode(self, value):
         """
         Sets the value of :py:attr:`minInstancesPerNode`.
         """
         return self._set(minInstancesPerNode=value)
 
+    @since("1.4.0")
     def setMinInfoGain(self, value):
         """
         Sets the value of :py:attr:`minInfoGain`.
         """
         return self._set(minInfoGain=value)
 
+    @since("1.4.0")
     def setMaxMemoryInMB(self, value):
         """
         Sets the value of :py:attr:`maxMemoryInMB`.
         """
         return self._set(maxMemoryInMB=value)
 
+    @since("1.4.0")
     def setCacheNodeIds(self, value):
         """
         Sets the value of :py:attr:`cacheNodeIds`.
@@ -1420,8 +1482,53 @@ def setValidationIndicatorCol(self, value):
         """
         return self._set(validationIndicatorCol=value)
 
+    @since("1.4.0")
+    def setMaxIter(self, value):
+        """
+        Sets the value of :py:attr:`maxIter`.
+        """
+        return self._set(maxIter=value)
+
+    @since("1.4.0")
+    def setCheckpointInterval(self, value):
+        """
+        Sets the value of :py:attr:`checkpointInterval`.
+        """
+        return self._set(checkpointInterval=value)
+
+    @since("1.4.0")
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("1.4.0")
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        return self._set(stepSize=value)
+
+    @since("3.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
 
-class GBTRegressionModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWritable, JavaMLReadable):
+    @since("3.0.0")
+    def setMinWeightFractionPerNode(self, value):
+        """
+        Sets the value of :py:attr:`minWeightFractionPerNode`.
+        """
+        return self._set(minWeightFractionPerNode=value)
+
+
+class GBTRegressionModel(
+    JavaRegressionModel, _TreeEnsembleModel, _GBTRegressorParams,
+    JavaMLWritable, JavaMLReadable
+):
     """
     Model fitted by :class:`GBTRegressor`.
 
@@ -1464,9 +1571,54 @@ def evaluateEachIteration(self, dataset, loss):
         return self._call_java("evaluateEachIteration", dataset, loss)
 
 
+class _AFTSurvivalRegressionParams(_JavaPredictorParams, HasMaxIter, HasTol, HasFitIntercept,
+                                   HasAggregationDepth):
+    """
+    Params for :py:class:`AFTSurvivalRegression` and :py:class:`AFTSurvivalRegressionModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    censorCol = Param(
+        Params._dummy(), "censorCol",
+        "censor column name. The value of this column could be 0 or 1. " +
+        "If the value is 1, it means the event has occurred i.e. " +
+        "uncensored; otherwise censored.", typeConverter=TypeConverters.toString)
+    quantileProbabilities = Param(
+        Params._dummy(), "quantileProbabilities",
+        "quantile probabilities array. Values of the quantile probabilities array " +
+        "should be in the range (0, 1) and the array should be non-empty.",
+        typeConverter=TypeConverters.toListFloat)
+    quantilesCol = Param(
+        Params._dummy(), "quantilesCol",
+        "quantiles column name. This column will output quantiles of " +
+        "corresponding quantileProbabilities if it is set.",
+        typeConverter=TypeConverters.toString)
+
+    @since("1.6.0")
+    def getCensorCol(self):
+        """
+        Gets the value of censorCol or its default value.
+        """
+        return self.getOrDefault(self.censorCol)
+
+    @since("1.6.0")
+    def getQuantileProbabilities(self):
+        """
+        Gets the value of quantileProbabilities or its default value.
+        """
+        return self.getOrDefault(self.quantileProbabilities)
+
+    @since("1.6.0")
+    def getQuantilesCol(self):
+        """
+        Gets the value of quantilesCol or its default value.
+        """
+        return self.getOrDefault(self.quantilesCol)
+
+
 @inherit_doc
-class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                            HasFitIntercept, HasMaxIter, HasTol, HasAggregationDepth,
+class AFTSurvivalRegression(JavaRegressor, _AFTSurvivalRegressionParams,
                             JavaMLWritable, JavaMLReadable):
     """
     Accelerated Failure Time (AFT) Model Survival Regression
@@ -1481,7 +1633,14 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     ...     (1.0, Vectors.dense(1.0), 1.0),
     ...     (1e-40, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"])
     >>> aftsr = AFTSurvivalRegression()
+    >>> aftsr.setMaxIter(10)
+    AFTSurvivalRegression...
+    >>> aftsr.getMaxIter()
+    10
+    >>> aftsr.clear(aftsr.maxIter)
     >>> model = aftsr.fit(df)
+    >>> model.setFeaturesCol("features")
+    AFTSurvivalRegressionModel...
     >>> model.predict(Vectors.dense(6.3))
     1.0
     >>> model.predictQuantiles(Vectors.dense(6.3))
@@ -1512,20 +1671,6 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     .. versionadded:: 1.6.0
     """
 
-    censorCol = Param(Params._dummy(), "censorCol",
-                      "censor column name. The value of this column could be 0 or 1. " +
-                      "If the value is 1, it means the event has occurred i.e. " +
-                      "uncensored; otherwise censored.", typeConverter=TypeConverters.toString)
-    quantileProbabilities = \
-        Param(Params._dummy(), "quantileProbabilities",
-              "quantile probabilities array. Values of the quantile probabilities array " +
-              "should be in the range (0, 1) and the array should be non-empty.",
-              typeConverter=TypeConverters.toListFloat)
-    quantilesCol = Param(Params._dummy(), "quantilesCol",
-                         "quantiles column name. This column will output quantiles of " +
-                         "corresponding quantileProbabilities if it is set.",
-                         typeConverter=TypeConverters.toString)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
@@ -1572,48 +1717,70 @@ def setCensorCol(self, value):
         return self._set(censorCol=value)
 
     @since("1.6.0")
-    def getCensorCol(self):
+    def setQuantileProbabilities(self, value):
         """
-        Gets the value of censorCol or its default value.
+        Sets the value of :py:attr:`quantileProbabilities`.
         """
-        return self.getOrDefault(self.censorCol)
+        return self._set(quantileProbabilities=value)
 
     @since("1.6.0")
-    def setQuantileProbabilities(self, value):
+    def setQuantilesCol(self, value):
         """
-        Sets the value of :py:attr:`quantileProbabilities`.
+        Sets the value of :py:attr:`quantilesCol`.
         """
-        return self._set(quantileProbabilities=value)
+        return self._set(quantilesCol=value)
 
     @since("1.6.0")
-    def getQuantileProbabilities(self):
+    def setMaxIter(self, value):
         """
-        Gets the value of quantileProbabilities or its default value.
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self.getOrDefault(self.quantileProbabilities)
+        return self._set(maxIter=value)
 
     @since("1.6.0")
-    def setQuantilesCol(self, value):
+    def setTol(self, value):
         """
-        Sets the value of :py:attr:`quantilesCol`.
+        Sets the value of :py:attr:`tol`.
         """
-        return self._set(quantilesCol=value)
+        return self._set(tol=value)
 
     @since("1.6.0")
-    def getQuantilesCol(self):
+    def setFitIntercept(self, value):
         """
-        Gets the value of quantilesCol or its default value.
+        Sets the value of :py:attr:`fitIntercept`.
         """
-        return self.getOrDefault(self.quantilesCol)
+        return self._set(fitIntercept=value)
+
+    @since("2.1.0")
+    def setAggregationDepth(self, value):
+        """
+        Sets the value of :py:attr:`aggregationDepth`.
+        """
+        return self._set(aggregationDepth=value)
 
 
-class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
+class AFTSurvivalRegressionModel(JavaRegressionModel, _AFTSurvivalRegressionParams,
+                                 JavaMLWritable, JavaMLReadable):
     """
     Model fitted by :class:`AFTSurvivalRegression`.
 
     .. versionadded:: 1.6.0
     """
 
+    @since("3.0.0")
+    def setQuantileProbabilities(self, value):
+        """
+        Sets the value of :py:attr:`quantileProbabilities`.
+        """
+        return self._set(quantileProbabilities=value)
+
+    @since("3.0.0")
+    def setQuantilesCol(self, value):
+        """
+        Sets the value of :py:attr:`quantilesCol`.
+        """
+        return self._set(quantilesCol=value)
+
     @property
     @since("2.0.0")
     def coefficients(self):
@@ -1645,18 +1812,87 @@ def predictQuantiles(self, features):
         """
         return self._call_java("predictQuantiles", features)
 
+
+class _GeneralizedLinearRegressionParams(_JavaPredictorParams, HasFitIntercept, HasMaxIter,
+                                         HasTol, HasRegParam, HasWeightCol, HasSolver,
+                                         HasAggregationDepth):
+    """
+    Params for :py:class:`GeneralizedLinearRegression` and
+    :py:class:`GeneralizedLinearRegressionModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    family = Param(Params._dummy(), "family", "The name of family which is a description of " +
+                   "the error distribution to be used in the model. Supported options: " +
+                   "gaussian (default), binomial, poisson, gamma and tweedie.",
+                   typeConverter=TypeConverters.toString)
+    link = Param(Params._dummy(), "link", "The name of link function which provides the " +
+                 "relationship between the linear predictor and the mean of the distribution " +
+                 "function. Supported options: identity, log, inverse, logit, probit, cloglog " +
+                 "and sqrt.", typeConverter=TypeConverters.toString)
+    linkPredictionCol = Param(Params._dummy(), "linkPredictionCol", "link prediction (linear " +
+                              "predictor) column name", typeConverter=TypeConverters.toString)
+    variancePower = Param(Params._dummy(), "variancePower", "The power in the variance function " +
+                          "of the Tweedie distribution which characterizes the relationship " +
+                          "between the variance and mean of the distribution. Only applicable " +
+                          "for the Tweedie family. Supported values: 0 and [1, Inf).",
+                          typeConverter=TypeConverters.toFloat)
+    linkPower = Param(Params._dummy(), "linkPower", "The index in the power link function. " +
+                      "Only applicable to the Tweedie family.",
+                      typeConverter=TypeConverters.toFloat)
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
+                   "options: irls.", typeConverter=TypeConverters.toString)
+    offsetCol = Param(Params._dummy(), "offsetCol", "The offset column name. If this is not set " +
+                      "or empty, we treat all instance offsets as 0.0",
+                      typeConverter=TypeConverters.toString)
+
+    @since("2.0.0")
+    def getFamily(self):
+        """
+        Gets the value of family or its default value.
+        """
+        return self.getOrDefault(self.family)
+
+    @since("2.0.0")
+    def getLinkPredictionCol(self):
+        """
+        Gets the value of linkPredictionCol or its default value.
+        """
+        return self.getOrDefault(self.linkPredictionCol)
+
     @since("2.0.0")
-    def predict(self, features):
+    def getLink(self):
         """
-        Predicted value
+        Gets the value of link or its default value.
         """
-        return self._call_java("predict", features)
+        return self.getOrDefault(self.link)
+
+    @since("2.2.0")
+    def getVariancePower(self):
+        """
+        Gets the value of variancePower or its default value.
+        """
+        return self.getOrDefault(self.variancePower)
+
+    @since("2.2.0")
+    def getLinkPower(self):
+        """
+        Gets the value of linkPower or its default value.
+        """
+        return self.getOrDefault(self.linkPower)
+
+    @since("2.3.0")
+    def getOffsetCol(self):
+        """
+        Gets the value of offsetCol or its default value.
+        """
+        return self.getOrDefault(self.offsetCol)
 
 
 @inherit_doc
-class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, HasPredictionCol,
-                                  HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol,
-                                  HasSolver, JavaMLWritable, JavaMLReadable):
+class GeneralizedLinearRegression(JavaRegressor, _GeneralizedLinearRegressionParams,
+                                  JavaMLWritable, JavaMLReadable):
     """
     Generalized Linear Regression.
 
@@ -1685,7 +1921,23 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
     ...     (2.0, Vectors.dense(0.0, 0.0)),
     ...     (2.0, Vectors.dense(1.0, 1.0)),], ["label", "features"])
     >>> glr = GeneralizedLinearRegression(family="gaussian", link="identity", linkPredictionCol="p")
+    >>> glr.setRegParam(0.1)
+    GeneralizedLinearRegression...
+    >>> glr.getRegParam()
+    0.1
+    >>> glr.clear(glr.regParam)
+    >>> glr.setMaxIter(10)
+    GeneralizedLinearRegression...
+    >>> glr.getMaxIter()
+    10
+    >>> glr.clear(glr.maxIter)
     >>> model = glr.fit(df)
+    >>> model.setFeaturesCol("features")
+    GeneralizedLinearRegressionModel...
+    >>> model.getMaxIter()
+    25
+    >>> model.getAggregationDepth()
+    2
     >>> transformed = model.transform(df)
     >>> abs(transformed.head().prediction - 1.5) < 0.001
     True
@@ -1713,46 +1965,22 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
     .. versionadded:: 2.0.0
     """
 
-    family = Param(Params._dummy(), "family", "The name of family which is a description of " +
-                   "the error distribution to be used in the model. Supported options: " +
-                   "gaussian (default), binomial, poisson, gamma and tweedie.",
-                   typeConverter=TypeConverters.toString)
-    link = Param(Params._dummy(), "link", "The name of link function which provides the " +
-                 "relationship between the linear predictor and the mean of the distribution " +
-                 "function. Supported options: identity, log, inverse, logit, probit, cloglog " +
-                 "and sqrt.", typeConverter=TypeConverters.toString)
-    linkPredictionCol = Param(Params._dummy(), "linkPredictionCol", "link prediction (linear " +
-                              "predictor) column name", typeConverter=TypeConverters.toString)
-    variancePower = Param(Params._dummy(), "variancePower", "The power in the variance function " +
-                          "of the Tweedie distribution which characterizes the relationship " +
-                          "between the variance and mean of the distribution. Only applicable " +
-                          "for the Tweedie family. Supported values: 0 and [1, Inf).",
-                          typeConverter=TypeConverters.toFloat)
-    linkPower = Param(Params._dummy(), "linkPower", "The index in the power link function. " +
-                      "Only applicable to the Tweedie family.",
-                      typeConverter=TypeConverters.toFloat)
-    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
-                   "options: irls.", typeConverter=TypeConverters.toString)
-    offsetCol = Param(Params._dummy(), "offsetCol", "The offset column name. If this is not set " +
-                      "or empty, we treat all instance offsets as 0.0",
-                      typeConverter=TypeConverters.toString)
-
     @keyword_only
     def __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction",
                  family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
                  regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
-                 variancePower=0.0, linkPower=None, offsetCol=None):
+                 variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2):
         """
         __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
                  family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
                  regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
-                 variancePower=0.0, linkPower=None, offsetCol=None)
+                 variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2)
         """
         super(GeneralizedLinearRegression, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.regression.GeneralizedLinearRegression", self.uid)
         self._setDefault(family="gaussian", maxIter=25, tol=1e-6, regParam=0.0, solver="irls",
-                         variancePower=0.0)
+                         variancePower=0.0, aggregationDepth=2)
         kwargs = self._input_kwargs
 
         self.setParams(**kwargs)
@@ -1762,12 +1990,12 @@ def __init__(self, labelCol="label", featuresCol="features", predictionCol="pred
     def setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction",
                   family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
                   regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
-                  variancePower=0.0, linkPower=None, offsetCol=None):
+                  variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2):
         """
         setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
                   family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
                   regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
-                  variancePower=0.0, linkPower=None, offsetCol=None)
+                  variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2)
         Sets params for generalized linear regression.
         """
         kwargs = self._input_kwargs
@@ -1783,13 +2011,6 @@ def setFamily(self, value):
         """
         return self._set(family=value)
 
-    @since("2.0.0")
-    def getFamily(self):
-        """
-        Gets the value of family or its default value.
-        """
-        return self.getOrDefault(self.family)
-
     @since("2.0.0")
     def setLinkPredictionCol(self, value):
         """
@@ -1797,13 +2018,6 @@ def setLinkPredictionCol(self, value):
         """
         return self._set(linkPredictionCol=value)
 
-    @since("2.0.0")
-    def getLinkPredictionCol(self):
-        """
-        Gets the value of linkPredictionCol or its default value.
-        """
-        return self.getOrDefault(self.linkPredictionCol)
-
     @since("2.0.0")
     def setLink(self, value):
         """
@@ -1811,13 +2025,6 @@ def setLink(self, value):
         """
         return self._set(link=value)
 
-    @since("2.0.0")
-    def getLink(self):
-        """
-        Gets the value of link or its default value.
-        """
-        return self.getOrDefault(self.link)
-
     @since("2.2.0")
     def setVariancePower(self, value):
         """
@@ -1825,13 +2032,6 @@ def setVariancePower(self, value):
         """
         return self._set(variancePower=value)
 
-    @since("2.2.0")
-    def getVariancePower(self):
-        """
-        Gets the value of variancePower or its default value.
-        """
-        return self.getOrDefault(self.variancePower)
-
     @since("2.2.0")
     def setLinkPower(self, value):
         """
@@ -1839,13 +2039,6 @@ def setLinkPower(self, value):
         """
         return self._set(linkPower=value)
 
-    @since("2.2.0")
-    def getLinkPower(self):
-        """
-        Gets the value of linkPower or its default value.
-        """
-        return self.getOrDefault(self.linkPower)
-
     @since("2.3.0")
     def setOffsetCol(self, value):
         """
@@ -1853,22 +2046,71 @@ def setOffsetCol(self, value):
         """
         return self._set(offsetCol=value)
 
-    @since("2.3.0")
-    def getOffsetCol(self):
+    @since("2.0.0")
+    def setMaxIter(self, value):
         """
-        Gets the value of offsetCol or its default value.
+        Sets the value of :py:attr:`maxIter`.
         """
-        return self.getOrDefault(self.offsetCol)
+        return self._set(maxIter=value)
+
+    @since("2.0.0")
+    def setRegParam(self, value):
+        """
+        Sets the value of :py:attr:`regParam`.
+        """
+        return self._set(regParam=value)
+
+    @since("2.0.0")
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        return self._set(tol=value)
+
+    @since("2.0.0")
+    def setFitIntercept(self, value):
+        """
+        Sets the value of :py:attr:`fitIntercept`.
+        """
+        return self._set(fitIntercept=value)
+
+    @since("2.0.0")
+    def setWeightCol(self, value):
+        """
+        Sets the value of :py:attr:`weightCol`.
+        """
+        return self._set(weightCol=value)
 
+    @since("2.0.0")
+    def setSolver(self, value):
+        """
+        Sets the value of :py:attr:`solver`.
+        """
+        return self._set(solver=value)
 
-class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable,
-                                       JavaMLReadable, HasTrainingSummary):
+    @since("3.0.0")
+    def setAggregationDepth(self, value):
+        """
+        Sets the value of :py:attr:`aggregationDepth`.
+        """
+        return self._set(aggregationDepth=value)
+
+
+class GeneralizedLinearRegressionModel(JavaRegressionModel, _GeneralizedLinearRegressionParams,
+                                       JavaMLWritable, JavaMLReadable, HasTrainingSummary):
     """
     Model fitted by :class:`GeneralizedLinearRegression`.
 
     .. versionadded:: 2.0.0
     """
 
+    @since("3.0.0")
+    def setLinkPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`linkPredictionCol`.
+        """
+        return self._set(linkPredictionCol=value)
+
     @property
     @since("2.0.0")
     def coefficients(self):
@@ -2086,6 +2328,259 @@ def __repr__(self):
         return self._call_java("toString")
 
 
+class _FactorizationMachinesParams(_JavaPredictorParams, HasMaxIter, HasStepSize, HasTol,
+                                   HasSolver, HasSeed, HasFitIntercept, HasRegParam):
+    """
+    Params for :py:class:`FMRegressor`, :py:class:`FMRegressionModel`, :py:class:`FMClassifier`
+    and :py:class:`FMClassifierModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    factorSize = Param(Params._dummy(), "factorSize", "Dimensionality of the factor vectors, " +
+                       "which are used to get pairwise interactions between variables",
+                       typeConverter=TypeConverters.toInt)
+
+    fitLinear = Param(Params._dummy(), "fitLinear", "whether to fit linear term (aka 1-way term)",
+                      typeConverter=TypeConverters.toBoolean)
+
+    miniBatchFraction = Param(Params._dummy(), "miniBatchFraction", "fraction of the input data " +
+                              "set that should be used for one iteration of gradient descent",
+                              typeConverter=TypeConverters.toFloat)
+
+    initStd = Param(Params._dummy(), "initStd", "standard deviation of initial coefficients",
+                    typeConverter=TypeConverters.toFloat)
+
+    solver = Param(Params._dummy(), "solver", "The solver algorithm for optimization. Supported " +
+                   "options: gd, adamW. (Default adamW)", typeConverter=TypeConverters.toString)
+
+    @since("3.0.0")
+    def getFactorSize(self):
+        """
+        Gets the value of factorSize or its default value.
+        """
+        return self.getOrDefault(self.factorSize)
+
+    @since("3.0.0")
+    def getFitLinear(self):
+        """
+        Gets the value of fitLinear or its default value.
+        """
+        return self.getOrDefault(self.fitLinear)
+
+    @since("3.0.0")
+    def getMiniBatchFraction(self):
+        """
+        Gets the value of miniBatchFraction or its default value.
+        """
+        return self.getOrDefault(self.miniBatchFraction)
+
+    @since("3.0.0")
+    def getInitStd(self):
+        """
+        Gets the value of initStd or its default value.
+        """
+        return self.getOrDefault(self.initStd)
+
+
+@inherit_doc
+class FMRegressor(JavaRegressor, _FactorizationMachinesParams, JavaMLWritable, JavaMLReadable):
+    """
+    Factorization Machines learning algorithm for regression.
+
+    solver Supports:
+
+    * gd (normal mini-batch gradient descent)
+    * adamW (default)
+
+    >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml.regression import FMRegressor
+    >>> df = spark.createDataFrame([
+    ...     (2.0, Vectors.dense(2.0)),
+    ...     (1.0, Vectors.dense(1.0)),
+    ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
+    >>>
+    >>> fm = FMRegressor(factorSize=2)
+    >>> fm.setSeed(16)
+    FMRegressor...
+    >>> model = fm.fit(df)
+    >>> model.getMaxIter()
+    100
+    >>> test0 = spark.createDataFrame([
+    ...     (Vectors.dense(-2.0),),
+    ...     (Vectors.dense(0.5),),
+    ...     (Vectors.dense(1.0),),
+    ...     (Vectors.dense(4.0),)], ["features"])
+    >>> model.transform(test0).show(10, False)
+    +--------+-------------------+
+    |features|prediction         |
+    +--------+-------------------+
+    |[-2.0]  |-1.9989237712341565|
+    |[0.5]   |0.4956682219523814 |
+    |[1.0]   |0.994586620589689  |
+    |[4.0]   |3.9880970124135344 |
+    +--------+-------------------+
+    ...
+    >>> model.intercept
+    -0.0032501766849261557
+    >>> model.linear
+    DenseVector([0.9978])
+    >>> model.factors
+    DenseMatrix(1, 2, [0.0173, 0.0021], 1)
+
+    .. versionadded:: 3.0.0
+    """
+
+    @keyword_only
+    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
+                 miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
+                 tol=1e-6, solver="adamW", seed=None):
+        """
+        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                 factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \
+                 miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \
+                 tol=1e-6, solver="adamW", seed=None)
+        """
+        super(FMRegressor, self).__init__()
+        self._java_obj = self._new_java_obj(
+            "org.apache.spark.ml.regression.FMRegressor", self.uid)
+        self._setDefault(factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
+                         miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
+                         tol=1e-6, solver="adamW")
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    @since("3.0.0")
+    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
+                  miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
+                  tol=1e-6, solver="adamW", seed=None):
+        """
+        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+                  factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \
+                  miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \
+                  tol=1e-6, solver="adamW", seed=None)
+        Sets Params for FMRegressor.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    def _create_model(self, java_model):
+        return FMRegressionModel(java_model)
+
+    @since("3.0.0")
+    def setFactorSize(self, value):
+        """
+        Sets the value of :py:attr:`factorSize`.
+        """
+        return self._set(factorSize=value)
+
+    @since("3.0.0")
+    def setFitLinear(self, value):
+        """
+        Sets the value of :py:attr:`fitLinear`.
+        """
+        return self._set(fitLinear=value)
+
+    @since("3.0.0")
+    def setMiniBatchFraction(self, value):
+        """
+        Sets the value of :py:attr:`miniBatchFraction`.
+        """
+        return self._set(miniBatchFraction=value)
+
+    @since("3.0.0")
+    def setInitStd(self, value):
+        """
+        Sets the value of :py:attr:`initStd`.
+        """
+        return self._set(initStd=value)
+
+    @since("3.0.0")
+    def setMaxIter(self, value):
+        """
+        Sets the value of :py:attr:`maxIter`.
+        """
+        return self._set(maxIter=value)
+
+    @since("3.0.0")
+    def setStepSize(self, value):
+        """
+        Sets the value of :py:attr:`stepSize`.
+        """
+        return self._set(stepSize=value)
+
+    @since("3.0.0")
+    def setTol(self, value):
+        """
+        Sets the value of :py:attr:`tol`.
+        """
+        return self._set(tol=value)
+
+    @since("3.0.0")
+    def setSolver(self, value):
+        """
+        Sets the value of :py:attr:`solver`.
+        """
+        return self._set(solver=value)
+
+    @since("3.0.0")
+    def setSeed(self, value):
+        """
+        Sets the value of :py:attr:`seed`.
+        """
+        return self._set(seed=value)
+
+    @since("3.0.0")
+    def setFitIntercept(self, value):
+        """
+        Sets the value of :py:attr:`fitIntercept`.
+        """
+        return self._set(fitIntercept=value)
+
+    @since("3.0.0")
+    def setRegParam(self, value):
+        """
+        Sets the value of :py:attr:`regParam`.
+        """
+        return self._set(regParam=value)
+
+
+class FMRegressionModel(JavaRegressionModel, _FactorizationMachinesParams, JavaMLWritable,
+                        JavaMLReadable):
+    """
+    Model fitted by :class:`FMRegressor`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    @property
+    @since("3.0.0")
+    def intercept(self):
+        """
+        Model intercept.
+        """
+        return self._call_java("intercept")
+
+    @property
+    @since("3.0.0")
+    def linear(self):
+        """
+        Model linear term.
+        """
+        return self._call_java("linear")
+
+    @property
+    @since("3.0.0")
+    def factors(self):
+        """
+        Model factor term.
+        """
+        return self._call_java("factors")
+
+
 if __name__ == "__main__":
     import doctest
     import pyspark.ml.regression
diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index a84b67476609b..53a57af4a5be9 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -19,6 +19,7 @@
 
 from pyspark import since, SparkContext
 from pyspark.ml.common import _java2py, _py2java
+from pyspark.ml.linalg import DenseMatrix, Vectors
 from pyspark.ml.wrapper import JavaWrapper, _jvm
 from pyspark.sql.column import Column, _to_seq
 from pyspark.sql.functions import lit
@@ -243,6 +244,14 @@ def mean(col, weightCol=None):
         """
         return Summarizer._get_single_metric(col, weightCol, "mean")
 
+    @staticmethod
+    @since("3.0.0")
+    def sum(col, weightCol=None):
+        """
+        return a column of sum summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "sum")
+
     @staticmethod
     @since("2.4.0")
     def variance(col, weightCol=None):
@@ -251,6 +260,14 @@ def variance(col, weightCol=None):
         """
         return Summarizer._get_single_metric(col, weightCol, "variance")
 
+    @staticmethod
+    @since("3.0.0")
+    def std(col, weightCol=None):
+        """
+        return a column of std summary
+        """
+        return Summarizer._get_single_metric(col, weightCol, "std")
+
     @staticmethod
     @since("2.4.0")
     def count(col, weightCol=None):
@@ -323,7 +340,9 @@ def metrics(*metrics):
 
         The following metrics are accepted (case sensitive):
          - mean: a vector that contains the coefficient-wise mean.
+         - sum: a vector that contains the coefficient-wise sum.
          - variance: a vector tha contains the coefficient-wise variance.
+         - std: a vector tha contains the coefficient-wise standard deviation.
          - count: the count of all vectors seen.
          - numNonzeros: a vector with the number of non-zeros for each coefficients
          - max: the maximum for each coefficient.
@@ -376,6 +395,22 @@ def summary(self, featuresCol, weightCol=None):
         return Column(self._java_obj.summary(featuresCol._jc, weightCol._jc))
 
 
+class MultivariateGaussian(object):
+    """Represents a (mean, cov) tuple
+
+    >>> m = MultivariateGaussian(Vectors.dense([11,12]), DenseMatrix(2, 2, (1.0, 3.0, 5.0, 2.0)))
+    >>> (m.mean, m.cov.toArray())
+    (DenseVector([11.0, 12.0]), array([[ 1.,  5.],
+           [ 3.,  2.]]))
+
+    .. versionadded:: 3.0.0
+
+    """
+    def __init__(self, mean, cov):
+        self.mean = mean
+        self.cov = cov
+
+
 if __name__ == "__main__":
     import doctest
     import numpy
diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index 1864a66487a28..2faf2d98f0271 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -21,7 +21,7 @@
 
 import numpy as np
 
-from pyspark.ml.classification import DecisionTreeClassifier, LogisticRegression, \
+from pyspark.ml.classification import FMClassifier, LogisticRegression, \
     MultilayerPerceptronClassifier, OneVsRest
 from pyspark.ml.clustering import DistributedLDAModel, KMeans, LocalLDAModel, LDA, LDAModel
 from pyspark.ml.fpm import FPGrowth
@@ -140,7 +140,7 @@ def test_support_for_weightCol(self):
         ovr = OneVsRest(classifier=lr, weightCol="weight")
         self.assertIsNotNone(ovr.fit(df))
         # classifier doesn't inherit hasWeightCol
-        dt = DecisionTreeClassifier()
+        dt = FMClassifier()
         ovr2 = OneVsRest(classifier=dt, weightCol="weight")
         self.assertIsNotNone(ovr2.fit(df))
 
diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py
index 6b0d1dc9d0624..4c6bfa696b110 100644
--- a/python/pyspark/ml/tests/test_feature.py
+++ b/python/pyspark/ml/tests/test_feature.py
@@ -34,12 +34,13 @@ class FeatureTests(SparkSessionTestCase):
 
     def test_binarizer(self):
         b0 = Binarizer()
-        self.assertListEqual(b0.params, [b0.inputCol, b0.outputCol, b0.threshold])
+        self.assertListEqual(b0.params, [b0.inputCol, b0.inputCols, b0.outputCol,
+                                         b0.outputCols, b0.threshold, b0.thresholds])
         self.assertTrue(all([~b0.isSet(p) for p in b0.params]))
         self.assertTrue(b0.hasDefault(b0.threshold))
         self.assertEqual(b0.getThreshold(), 0.0)
         b0.setParams(inputCol="input", outputCol="output").setThreshold(1.0)
-        self.assertTrue(all([b0.isSet(p) for p in b0.params]))
+        self.assertTrue(not all([b0.isSet(p) for p in b0.params]))
         self.assertEqual(b0.getThreshold(), 1.0)
         self.assertEqual(b0.getInputCol(), "input")
         self.assertEqual(b0.getOutputCol(), "output")
diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py
index 0008b0b670d34..5cc2a815eaa14 100644
--- a/python/pyspark/ml/tests/test_image.py
+++ b/python/pyspark/ml/tests/test_image.py
@@ -20,7 +20,7 @@
 
 from pyspark.ml.image import ImageSchema
 from pyspark.testing.mlutils import PySparkTestCase, SparkSessionTestCase
-from pyspark.sql import HiveContext, Row
+from pyspark.sql import Row
 from pyspark.testing.utils import QuietTest
 
 
@@ -67,47 +67,6 @@ def test_read_images(self):
                 lambda: ImageSchema.toImage("a"))
 
 
-class ImageFileFormatOnHiveContextTest(PySparkTestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        super(ImageFileFormatOnHiveContextTest, cls).setUpClass()
-        cls.hive_available = True
-        # Note that here we enable Hive's support.
-        cls.spark = None
-        try:
-            cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
-        except py4j.protocol.Py4JError:
-            cls.tearDownClass()
-            cls.hive_available = False
-        except TypeError:
-            cls.tearDownClass()
-            cls.hive_available = False
-        if cls.hive_available:
-            cls.spark = HiveContext._createForTesting(cls.sc)
-
-    def setUp(self):
-        if not self.hive_available:
-            self.skipTest("Hive is not available.")
-
-    @classmethod
-    def tearDownClass(cls):
-        super(ImageFileFormatOnHiveContextTest, cls).tearDownClass()
-        if cls.spark is not None:
-            cls.spark.sparkSession.stop()
-            cls.spark = None
-
-    def test_read_images_multiple_times(self):
-        # This test case is to check if ImageFileFormat tries to
-        # initiate Hive client multiple times. See SPARK-22651.
-        data_path = 'data/mllib/images/origin/kittens'
-        for i in range(2):
-            self.spark.read.format("image") \
-                .option("dropInvalid", True) \
-                .option("recursiveFileLookup", True) \
-                .load(data_path)
-
-
 if __name__ == "__main__":
     from pyspark.ml.tests.test_image import *
 
diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py
index cbeac0b1319c8..777b4930ce8c9 100644
--- a/python/pyspark/ml/tests/test_param.py
+++ b/python/pyspark/ml/tests/test_param.py
@@ -27,8 +27,8 @@
 from pyspark.ml.classification import LogisticRegression
 from pyspark.ml.clustering import KMeans
 from pyspark.ml.feature import Binarizer, Bucketizer, ElementwiseProduct, IndexToString, \
-    VectorSlicer, Word2Vec
-from pyspark.ml.linalg import DenseVector, SparseVector
+    MaxAbsScaler, VectorSlicer, Word2Vec
+from pyspark.ml.linalg import DenseVector, SparseVector, Vectors
 from pyspark.ml.param import Param, Params, TypeConverters
 from pyspark.ml.param.shared import HasInputCol, HasMaxIter, HasSeed
 from pyspark.ml.wrapper import JavaParams
@@ -87,6 +87,15 @@ def test_list_float(self):
         self.assertTrue(all([type(v) == float for v in b.getSplits()]))
         self.assertRaises(TypeError, lambda: Bucketizer(splits=["a", 1.0]))
 
+    def test_list_list_float(self):
+        b = Bucketizer(splitsArray=[[-0.1, 0.5, 3], [-5, 1.5]])
+        self.assertEqual(b.getSplitsArray(), [[-0.1, 0.5, 3.0], [-5.0, 1.5]])
+        self.assertTrue(all([type(v) == list for v in b.getSplitsArray()]))
+        self.assertTrue(all([type(v) == float for v in b.getSplitsArray()[0]]))
+        self.assertTrue(all([type(v) == float for v in b.getSplitsArray()[1]]))
+        self.assertRaises(TypeError, lambda: Bucketizer(splitsArray=["a", 1.0]))
+        self.assertRaises(TypeError, lambda: Bucketizer(splitsArray=[[-5, 1.5], ["a", 1.0]]))
+
     def test_list_string(self):
         for labels in [np.array(['a', u'b']), ['a', u'b'], np.array(['a', 'b'])]:
             idx_to_string = IndexToString(labels=labels)
@@ -212,9 +221,6 @@ def test_params(self):
         self.assertFalse(testParams.isSet(maxIter))
         self.assertTrue(testParams.isDefined(maxIter))
         self.assertEqual(testParams.getMaxIter(), 10)
-        testParams.setMaxIter(100)
-        self.assertTrue(testParams.isSet(maxIter))
-        self.assertEqual(testParams.getMaxIter(), 100)
 
         self.assertTrue(testParams.hasParam(inputCol.name))
         self.assertFalse(testParams.hasDefault(inputCol))
@@ -231,13 +237,24 @@ def test_params(self):
 
         # Since the default is normally random, set it to a known number for debug str
         testParams._setDefault(seed=41)
-        testParams.setSeed(43)
 
         self.assertEqual(
             testParams.explainParams(),
             "\n".join(["inputCol: input column name. (undefined)",
-                       "maxIter: max number of iterations (>= 0). (default: 10, current: 100)",
-                       "seed: random seed. (default: 41, current: 43)"]))
+                       "maxIter: max number of iterations (>= 0). (default: 10)",
+                       "seed: random seed. (default: 41)"]))
+
+    def test_clear_param(self):
+        df = self.spark.createDataFrame([(Vectors.dense([1.0]),), (Vectors.dense([2.0]),)], ["a"])
+        maScaler = MaxAbsScaler(inputCol="a", outputCol="scaled")
+        model = maScaler.fit(df)
+        self.assertTrue(model.isSet(model.outputCol))
+        self.assertEqual(model.getOutputCol(), "scaled")
+        model.clear(model.outputCol)
+        self.assertFalse(model.isSet(model.outputCol))
+        self.assertEqual(model.getOutputCol()[:12], 'MaxAbsScaler')
+        output = model.transform(df)
+        self.assertEqual(model.getOutputCol(), output.schema.names[1])
 
     def test_kmeans_param(self):
         algo = KMeans()
@@ -290,6 +307,10 @@ def test_copy_param_extras(self):
                 copied_no_extra[k] = v
         self.assertEqual(tp._paramMap, copied_no_extra)
         self.assertEqual(tp._defaultParamMap, tp_copy._defaultParamMap)
+        with self.assertRaises(TypeError):
+            tp.copy(extra={"unknown_parameter": None})
+        with self.assertRaises(TypeError):
+            tp.copy(extra=["must be a dict"])
 
     def test_logistic_regression_check_thresholds(self):
         self.assertIsInstance(
@@ -343,7 +364,8 @@ def test_java_params(self):
         for module in modules:
             for name, cls in inspect.getmembers(module, inspect.isclass):
                 if not name.endswith('Model') and not name.endswith('Params') \
-                        and issubclass(cls, JavaParams) and not inspect.isabstract(cls):
+                        and issubclass(cls, JavaParams) and not inspect.isabstract(cls) \
+                        and not name.startswith('Java') and name != '_LSH':
                     # NOTE: disable check_params_exist until there is parity with Scala API
                     check_params(self, cls(), check_params_exist=False)
 
diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py
index 69f59a776296b..d4edcc26e17ac 100644
--- a/python/pyspark/ml/tests/test_persistence.py
+++ b/python/pyspark/ml/tests/test_persistence.py
@@ -269,21 +269,27 @@ def test_python_transformer_pipeline_persistence(self):
 
     def test_onevsrest(self):
         temp_path = tempfile.mkdtemp()
-        df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
-                                         (1.0, Vectors.sparse(2, [], [])),
-                                         (2.0, Vectors.dense(0.5, 0.5))] * 10,
-                                        ["label", "features"])
+        df = self.spark.createDataFrame([(0.0, 0.5, Vectors.dense(1.0, 0.8)),
+                                         (1.0, 0.5, Vectors.sparse(2, [], [])),
+                                         (2.0, 1.0, Vectors.dense(0.5, 0.5))] * 10,
+                                        ["label", "wt", "features"])
+
         lr = LogisticRegression(maxIter=5, regParam=0.01)
         ovr = OneVsRest(classifier=lr)
-        model = ovr.fit(df)
-        ovrPath = temp_path + "/ovr"
-        ovr.save(ovrPath)
-        loadedOvr = OneVsRest.load(ovrPath)
-        self._compare_pipelines(ovr, loadedOvr)
-        modelPath = temp_path + "/ovrModel"
-        model.save(modelPath)
-        loadedModel = OneVsRestModel.load(modelPath)
-        self._compare_pipelines(model, loadedModel)
+
+        def reload_and_compare(ovr, suffix):
+            model = ovr.fit(df)
+            ovrPath = temp_path + "/{}".format(suffix)
+            ovr.save(ovrPath)
+            loadedOvr = OneVsRest.load(ovrPath)
+            self._compare_pipelines(ovr, loadedOvr)
+            modelPath = temp_path + "/{}Model".format(suffix)
+            model.save(modelPath)
+            loadedModel = OneVsRestModel.load(modelPath)
+            self._compare_pipelines(model, loadedModel)
+
+        reload_and_compare(OneVsRest(classifier=lr), "ovr")
+        reload_and_compare(OneVsRest(classifier=lr).setWeightCol("wt"), "ovrw")
 
     def test_decisiontree_classifier(self):
         dt = DecisionTreeClassifier(maxDepth=1)
diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py
index 176e99d052d30..9d8ba37c60da4 100644
--- a/python/pyspark/ml/tests/test_tuning.py
+++ b/python/pyspark/ml/tests/test_tuning.py
@@ -63,6 +63,15 @@ def _fit(self, dataset):
         return model
 
 
+class ParamGridBuilderTests(SparkSessionTestCase):
+
+    def test_addGrid(self):
+        with self.assertRaises(TypeError):
+            grid = (ParamGridBuilder()
+                    .addGrid("must be an instance of Param", ["not", "string"])
+                    .build())
+
+
 class CrossValidatorTests(SparkSessionTestCase):
 
     def test_copy(self):
diff --git a/python/pyspark/ml/tests/test_wrapper.py b/python/pyspark/ml/tests/test_wrapper.py
index 09456d8e97a44..c0747155cb72e 100644
--- a/python/pyspark/ml/tests/test_wrapper.py
+++ b/python/pyspark/ml/tests/test_wrapper.py
@@ -24,6 +24,7 @@
 from pyspark.ml.wrapper import _java2py, _py2java, JavaParams, JavaWrapper
 from pyspark.testing.mllibutils import MLlibTestCase
 from pyspark.testing.mlutils import SparkSessionTestCase
+from pyspark.testing.utils import eventually
 
 
 class JavaWrapperMemoryTests(SparkSessionTestCase):
@@ -50,19 +51,27 @@ def test_java_object_gets_detached(self):
 
         model.__del__()
 
-        with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
-            model._java_obj.toString()
-        self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString())
+        def condition():
+            with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
+                model._java_obj.toString()
+            self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString())
+            return True
+
+        eventually(condition, timeout=10, catch_assertions=True)
 
         try:
             summary.__del__()
         except:
             pass
 
-        with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
-            model._java_obj.toString()
-        with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
-            summary._java_obj.toString()
+        def condition():
+            with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
+                model._java_obj.toString()
+            with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
+                summary._java_obj.toString()
+            return True
+
+        eventually(condition, timeout=10, catch_assertions=True)
 
 
 class WrapperTests(MLlibTestCase):
diff --git a/python/pyspark/ml/tree.py b/python/pyspark/ml/tree.py
new file mode 100644
index 0000000000000..a13b27ec8a79c
--- /dev/null
+++ b/python/pyspark/ml/tree.py
@@ -0,0 +1,355 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark import since, keyword_only
+from pyspark.ml.param.shared import *
+from pyspark.ml.util import *
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, \
+    JavaPredictor, JavaPredictionModel
+from pyspark.ml.common import inherit_doc, _java2py, _py2java
+
+
+@inherit_doc
+class _DecisionTreeModel(JavaPredictionModel):
+    """
+    Abstraction for Decision Tree models.
+
+    .. versionadded:: 1.5.0
+    """
+
+    @property
+    @since("1.5.0")
+    def numNodes(self):
+        """Return number of nodes of the decision tree."""
+        return self._call_java("numNodes")
+
+    @property
+    @since("1.5.0")
+    def depth(self):
+        """Return depth of the decision tree."""
+        return self._call_java("depth")
+
+    @property
+    @since("2.0.0")
+    def toDebugString(self):
+        """Full description of model."""
+        return self._call_java("toDebugString")
+
+    @since("3.0.0")
+    def predictLeaf(self, value):
+        """
+        Predict the indices of the leaves corresponding to the feature vector.
+        """
+        return self._call_java("predictLeaf", value)
+
+
+class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
+    """
+    Mixin for Decision Tree parameters.
+    """
+
+    leafCol = Param(Params._dummy(), "leafCol", "Leaf indices column name. Predicted leaf " +
+                    "index of each instance in each tree by preorder.",
+                    typeConverter=TypeConverters.toString)
+
+    maxDepth = Param(Params._dummy(), "maxDepth", "Maximum depth of the tree. (>= 0) E.g., " +
+                     "depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.",
+                     typeConverter=TypeConverters.toInt)
+
+    maxBins = Param(Params._dummy(), "maxBins", "Max number of bins for discretizing continuous " +
+                    "features.  Must be >=2 and >= number of categories for any categorical " +
+                    "feature.", typeConverter=TypeConverters.toInt)
+
+    minInstancesPerNode = Param(Params._dummy(), "minInstancesPerNode", "Minimum number of " +
+                                "instances each child must have after split. If a split causes " +
+                                "the left or right child to have fewer than " +
+                                "minInstancesPerNode, the split will be discarded as invalid. " +
+                                "Should be >= 1.", typeConverter=TypeConverters.toInt)
+
+    minWeightFractionPerNode = Param(Params._dummy(), "minWeightFractionPerNode", "Minimum "
+                                     "fraction of the weighted sample count that each child "
+                                     "must have after split. If a split causes the fraction "
+                                     "of the total weight in the left or right child to be "
+                                     "less than minWeightFractionPerNode, the split will be "
+                                     "discarded as invalid. Should be in interval [0.0, 0.5).",
+                                     typeConverter=TypeConverters.toFloat)
+
+    minInfoGain = Param(Params._dummy(), "minInfoGain", "Minimum information gain for a split " +
+                        "to be considered at a tree node.", typeConverter=TypeConverters.toFloat)
+
+    maxMemoryInMB = Param(Params._dummy(), "maxMemoryInMB", "Maximum memory in MB allocated to " +
+                          "histogram aggregation. If too small, then 1 node will be split per " +
+                          "iteration, and its aggregates may exceed this size.",
+                          typeConverter=TypeConverters.toInt)
+
+    cacheNodeIds = Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass " +
+                         "trees to executors to match instances with nodes. If true, the " +
+                         "algorithm will cache node IDs for each instance. Caching can speed " +
+                         "up training of deeper trees. Users can set how often should the cache " +
+                         "be checkpointed or disable it by setting checkpointInterval.",
+                         typeConverter=TypeConverters.toBoolean)
+
+    def __init__(self):
+        super(_DecisionTreeParams, self).__init__()
+
+    def setLeafCol(self, value):
+        """
+        Sets the value of :py:attr:`leafCol`.
+        """
+        return self._set(leafCol=value)
+
+    def getLeafCol(self):
+        """
+        Gets the value of leafCol or its default value.
+        """
+        return self.getOrDefault(self.leafCol)
+
+    def getMaxDepth(self):
+        """
+        Gets the value of maxDepth or its default value.
+        """
+        return self.getOrDefault(self.maxDepth)
+
+    def getMaxBins(self):
+        """
+        Gets the value of maxBins or its default value.
+        """
+        return self.getOrDefault(self.maxBins)
+
+    def getMinInstancesPerNode(self):
+        """
+        Gets the value of minInstancesPerNode or its default value.
+        """
+        return self.getOrDefault(self.minInstancesPerNode)
+
+    def getMinWeightFractionPerNode(self):
+        """
+        Gets the value of minWeightFractionPerNode or its default value.
+        """
+        return self.getOrDefault(self.minWeightFractionPerNode)
+
+    def getMinInfoGain(self):
+        """
+        Gets the value of minInfoGain or its default value.
+        """
+        return self.getOrDefault(self.minInfoGain)
+
+    def getMaxMemoryInMB(self):
+        """
+        Gets the value of maxMemoryInMB or its default value.
+        """
+        return self.getOrDefault(self.maxMemoryInMB)
+
+    def getCacheNodeIds(self):
+        """
+        Gets the value of cacheNodeIds or its default value.
+        """
+        return self.getOrDefault(self.cacheNodeIds)
+
+
+@inherit_doc
+class _TreeEnsembleModel(JavaPredictionModel):
+    """
+    (private abstraction)
+    Represents a tree ensemble model.
+    """
+
+    @property
+    @since("2.0.0")
+    def trees(self):
+        """Trees in this ensemble. Warning: These have null parent Estimators."""
+        return [_DecisionTreeModel(m) for m in list(self._call_java("trees"))]
+
+    @property
+    @since("2.0.0")
+    def getNumTrees(self):
+        """Number of trees in ensemble."""
+        return self._call_java("getNumTrees")
+
+    @property
+    @since("1.5.0")
+    def treeWeights(self):
+        """Return the weights for each tree"""
+        return list(self._call_java("javaTreeWeights"))
+
+    @property
+    @since("2.0.0")
+    def totalNumNodes(self):
+        """Total number of nodes, summed over all trees in the ensemble."""
+        return self._call_java("totalNumNodes")
+
+    @property
+    @since("2.0.0")
+    def toDebugString(self):
+        """Full description of model."""
+        return self._call_java("toDebugString")
+
+    @since("3.0.0")
+    def predictLeaf(self, value):
+        """
+        Predict the indices of the leaves corresponding to the feature vector.
+        """
+        return self._call_java("predictLeaf", value)
+
+
+class _TreeEnsembleParams(_DecisionTreeParams):
+    """
+    Mixin for Decision Tree-based ensemble algorithms parameters.
+    """
+
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate", "Fraction of the training data " +
+                            "used for learning each decision tree, in range (0, 1].",
+                            typeConverter=TypeConverters.toFloat)
+
+    supportedFeatureSubsetStrategies = ["auto", "all", "onethird", "sqrt", "log2"]
+
+    featureSubsetStrategy = \
+        Param(Params._dummy(), "featureSubsetStrategy",
+              "The number of features to consider for splits at each tree node. Supported " +
+              "options: 'auto' (choose automatically for task: If numTrees == 1, set to " +
+              "'all'. If numTrees > 1 (forest), set to 'sqrt' for classification and to " +
+              "'onethird' for regression), 'all' (use all features), 'onethird' (use " +
+              "1/3 of the features), 'sqrt' (use sqrt(number of features)), 'log2' (use " +
+              "log2(number of features)), 'n' (when n is in the range (0, 1.0], use " +
+              "n * number of features. When n is in the range (1, number of features), use" +
+              " n features). default = 'auto'", typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(_TreeEnsembleParams, self).__init__()
+
+    @since("1.4.0")
+    def getSubsamplingRate(self):
+        """
+        Gets the value of subsamplingRate or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    @since("1.4.0")
+    def getFeatureSubsetStrategy(self):
+        """
+        Gets the value of featureSubsetStrategy or its default value.
+        """
+        return self.getOrDefault(self.featureSubsetStrategy)
+
+
+class _RandomForestParams(_TreeEnsembleParams):
+    """
+    Private class to track supported random forest parameters.
+    """
+
+    numTrees = Param(Params._dummy(), "numTrees", "Number of trees to train (>= 1).",
+                     typeConverter=TypeConverters.toInt)
+
+    bootstrap = Param(Params._dummy(), "bootstrap", "Whether bootstrap samples are used "
+                      "when building trees.", typeConverter=TypeConverters.toBoolean)
+
+    def __init__(self):
+        super(_RandomForestParams, self).__init__()
+
+    @since("1.4.0")
+    def getNumTrees(self):
+        """
+        Gets the value of numTrees or its default value.
+        """
+        return self.getOrDefault(self.numTrees)
+
+    @since("3.0.0")
+    def getBootstrap(self):
+        """
+        Gets the value of bootstrap or its default value.
+        """
+        return self.getOrDefault(self.bootstrap)
+
+
+class _GBTParams(_TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol):
+    """
+    Private class to track supported GBT params.
+    """
+
+    stepSize = Param(Params._dummy(), "stepSize",
+                     "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " +
+                     "the contribution of each estimator.",
+                     typeConverter=TypeConverters.toFloat)
+
+    validationTol = Param(Params._dummy(), "validationTol",
+                          "Threshold for stopping early when fit with validation is used. " +
+                          "If the error rate on the validation input changes by less than the " +
+                          "validationTol, then learning will stop early (before `maxIter`). " +
+                          "This parameter is ignored when fit without validation is used.",
+                          typeConverter=TypeConverters.toFloat)
+
+    @since("3.0.0")
+    def getValidationTol(self):
+        """
+        Gets the value of validationTol or its default value.
+        """
+        return self.getOrDefault(self.validationTol)
+
+
+class _HasVarianceImpurity(Params):
+    """
+    Private class to track supported impurity measures.
+    """
+
+    supportedImpurities = ["variance"]
+
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " +
+                     ", ".join(supportedImpurities), typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(_HasVarianceImpurity, self).__init__()
+
+    @since("1.4.0")
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+
+class _TreeClassifierParams(Params):
+    """
+    Private class to track supported impurity measures.
+
+    .. versionadded:: 1.4.0
+    """
+
+    supportedImpurities = ["entropy", "gini"]
+
+    impurity = Param(Params._dummy(), "impurity",
+                     "Criterion used for information gain calculation (case-insensitive). " +
+                     "Supported options: " +
+                     ", ".join(supportedImpurities), typeConverter=TypeConverters.toString)
+
+    def __init__(self):
+        super(_TreeClassifierParams, self).__init__()
+
+    @since("1.6.0")
+    def getImpurity(self):
+        """
+        Gets the value of impurity or its default value.
+        """
+        return self.getOrDefault(self.impurity)
+
+
+class _TreeRegressorParams(_HasVarianceImpurity):
+    """
+    Private class to track supported impurity measures.
+    """
+    pass
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index d80d6e8aaa342..cb4542c5d025f 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -22,7 +22,7 @@
 
 from pyspark import since, keyword_only
 from pyspark.ml import Estimator, Model
-from pyspark.ml.common import _py2java
+from pyspark.ml.common import _py2java, _java2py
 from pyspark.ml.param import Params, Param, TypeConverters
 from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
 from pyspark.ml.util import *
@@ -88,8 +88,14 @@ def __init__(self):
     def addGrid(self, param, values):
         """
         Sets the given parameters in this grid to fixed values.
+
+        param must be an instance of Param associated with an instance of Params
+        (such as Estimator or Transformer).
         """
-        self._param_grid[param] = values
+        if isinstance(param, Param):
+            self._param_grid[param] = values
+        else:
+            raise TypeError("param must be an instance of Param")
 
         return self
 
@@ -122,7 +128,7 @@ def to_key_value_pairs(keys, values):
         return [dict(to_key_value_pairs(keys, prod)) for prod in itertools.product(*grid_values)]
 
 
-class ValidatorParams(HasSeed):
+class _ValidatorParams(HasSeed):
     """
     Common params for TrainValidationSplit and CrossValidator.
     """
@@ -133,36 +139,21 @@ class ValidatorParams(HasSeed):
         Params._dummy(), "evaluator",
         "evaluator used to select hyper-parameters that maximize the validator metric")
 
-    def setEstimator(self, value):
-        """
-        Sets the value of :py:attr:`estimator`.
-        """
-        return self._set(estimator=value)
-
+    @since("2.0.0")
     def getEstimator(self):
         """
         Gets the value of estimator or its default value.
         """
         return self.getOrDefault(self.estimator)
 
-    def setEstimatorParamMaps(self, value):
-        """
-        Sets the value of :py:attr:`estimatorParamMaps`.
-        """
-        return self._set(estimatorParamMaps=value)
-
+    @since("2.0.0")
     def getEstimatorParamMaps(self):
         """
         Gets the value of estimatorParamMaps or its default value.
         """
         return self.getOrDefault(self.estimatorParamMaps)
 
-    def setEvaluator(self, value):
-        """
-        Sets the value of :py:attr:`evaluator`.
-        """
-        return self._set(evaluator=value)
-
+    @since("2.0.0")
     def getEvaluator(self):
         """
         Gets the value of evaluator or its default value.
@@ -199,7 +190,25 @@ def _to_java_impl(self):
         return java_estimator, java_epms, java_evaluator
 
 
-class CrossValidator(Estimator, ValidatorParams, HasParallelism, HasCollectSubModels,
+class _CrossValidatorParams(_ValidatorParams):
+    """
+    Params for :py:class:`CrossValidator` and :py:class:`CrossValidatorModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    numFolds = Param(Params._dummy(), "numFolds", "number of folds for cross validation",
+                     typeConverter=TypeConverters.toInt)
+
+    @since("1.4.0")
+    def getNumFolds(self):
+        """
+        Gets the value of numFolds or its default value.
+        """
+        return self.getOrDefault(self.numFolds)
+
+
+class CrossValidator(Estimator, _CrossValidatorParams, HasParallelism, HasCollectSubModels,
                      MLReadable, MLWritable):
     """
 
@@ -213,6 +222,8 @@ class CrossValidator(Estimator, ValidatorParams, HasParallelism, HasCollectSubMo
     >>> from pyspark.ml.classification import LogisticRegression
     >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator
     >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml.tuning import CrossValidatorModel
+    >>> import tempfile
     >>> dataset = spark.createDataFrame(
     ...     [(Vectors.dense([0.0]), 0.0),
     ...      (Vectors.dense([0.4]), 1.0),
@@ -226,17 +237,22 @@ class CrossValidator(Estimator, ValidatorParams, HasParallelism, HasCollectSubMo
     >>> cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
     ...     parallelism=2)
     >>> cvModel = cv.fit(dataset)
+    >>> cvModel.getNumFolds()
+    3
     >>> cvModel.avgMetrics[0]
     0.5
+    >>> path = tempfile.mkdtemp()
+    >>> model_path = path + "/model"
+    >>> cvModel.write().save(model_path)
+    >>> cvModelRead = CrossValidatorModel.read().load(model_path)
+    >>> cvModelRead.avgMetrics
+    [0.5, ...
     >>> evaluator.evaluate(cvModel.transform(dataset))
     0.8333...
 
     .. versionadded:: 1.4.0
     """
 
-    numFolds = Param(Params._dummy(), "numFolds", "number of folds for cross validation",
-                     typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,
                  seed=None, parallelism=1, collectSubModels=False):
@@ -261,6 +277,27 @@ def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, num
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    @since("2.0.0")
+    def setEstimator(self, value):
+        """
+        Sets the value of :py:attr:`estimator`.
+        """
+        return self._set(estimator=value)
+
+    @since("2.0.0")
+    def setEstimatorParamMaps(self, value):
+        """
+        Sets the value of :py:attr:`estimatorParamMaps`.
+        """
+        return self._set(estimatorParamMaps=value)
+
+    @since("2.0.0")
+    def setEvaluator(self, value):
+        """
+        Sets the value of :py:attr:`evaluator`.
+        """
+        return self._set(evaluator=value)
+
     @since("1.4.0")
     def setNumFolds(self, value):
         """
@@ -268,12 +305,23 @@ def setNumFolds(self, value):
         """
         return self._set(numFolds=value)
 
-    @since("1.4.0")
-    def getNumFolds(self):
+    def setSeed(self, value):
         """
-        Gets the value of numFolds or its default value.
+        Sets the value of :py:attr:`seed`.
         """
-        return self.getOrDefault(self.numFolds)
+        return self._set(seed=value)
+
+    def setParallelism(self, value):
+        """
+        Sets the value of :py:attr:`parallelism`.
+        """
+        return self._set(parallelism=value)
+
+    def setCollectSubModels(self, value):
+        """
+        Sets the value of :py:attr:`collectSubModels`.
+        """
+        return self._set(collectSubModels=value)
 
     def _fit(self, dataset):
         est = self.getOrDefault(self.estimator)
@@ -387,7 +435,7 @@ def _to_java(self):
         return _java_obj
 
 
-class CrossValidatorModel(Model, ValidatorParams, MLReadable, MLWritable):
+class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable):
     """
 
     CrossValidatorModel contains the model with the highest average cross-validation
@@ -446,11 +494,13 @@ def _from_java(cls, java_stage):
         Given a Java CrossValidatorModel, create and return a Python wrapper of it.
         Used for ML persistence.
         """
+        sc = SparkContext._active_spark_context
         bestModel = JavaParams._from_java(java_stage.bestModel())
+        avgMetrics = _java2py(sc, java_stage.avgMetrics())
         estimator, epms, evaluator = super(CrossValidatorModel, cls)._from_java_impl(java_stage)
 
-        py_stage = cls(bestModel=bestModel).setEstimator(estimator)
-        py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)
+        py_stage = cls(bestModel=bestModel, avgMetrics=avgMetrics)._set(estimator=estimator)
+        py_stage = py_stage._set(estimatorParamMaps=epms)._set(evaluator=evaluator)
 
         if java_stage.hasSubModels():
             py_stage.subModels = [[JavaParams._from_java(sub_model)
@@ -468,11 +518,10 @@ def _to_java(self):
         """
 
         sc = SparkContext._active_spark_context
-        # TODO: persist average metrics as well
         _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.tuning.CrossValidatorModel",
                                              self.uid,
                                              self.bestModel._to_java(),
-                                             _py2java(sc, []))
+                                             _py2java(sc, self.avgMetrics))
         estimator, epms, evaluator = super(CrossValidatorModel, self)._to_java_impl()
 
         _java_obj.set("evaluator", evaluator)
@@ -486,8 +535,26 @@ def _to_java(self):
         return _java_obj
 
 
-class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, HasCollectSubModels,
-                           MLReadable, MLWritable):
+class _TrainValidationSplitParams(_ValidatorParams):
+    """
+    Params for :py:class:`TrainValidationSplit` and :py:class:`TrainValidationSplitModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    trainRatio = Param(Params._dummy(), "trainRatio", "Param for ratio between train and\
+     validation data. Must be between 0 and 1.", typeConverter=TypeConverters.toFloat)
+
+    @since("2.0.0")
+    def getTrainRatio(self):
+        """
+        Gets the value of trainRatio or its default value.
+        """
+        return self.getOrDefault(self.trainRatio)
+
+
+class TrainValidationSplit(Estimator, _TrainValidationSplitParams, HasParallelism,
+                           HasCollectSubModels, MLReadable, MLWritable):
     """
     Validation for hyper-parameter tuning. Randomly splits the input dataset into train and
     validation sets, and uses evaluation metric on the validation set to select the best model.
@@ -496,6 +563,8 @@ class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, HasCollec
     >>> from pyspark.ml.classification import LogisticRegression
     >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator
     >>> from pyspark.ml.linalg import Vectors
+    >>> from pyspark.ml.tuning import TrainValidationSplitModel
+    >>> import tempfile
     >>> dataset = spark.createDataFrame(
     ...     [(Vectors.dense([0.0]), 0.0),
     ...      (Vectors.dense([0.4]), 1.0),
@@ -509,15 +578,22 @@ class TrainValidationSplit(Estimator, ValidatorParams, HasParallelism, HasCollec
     >>> tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator,
     ...     parallelism=1, seed=42)
     >>> tvsModel = tvs.fit(dataset)
+    >>> tvsModel.getTrainRatio()
+    0.75
+    >>> tvsModel.validationMetrics
+    [0.5, ...
+    >>> path = tempfile.mkdtemp()
+    >>> model_path = path + "/model"
+    >>> tvsModel.write().save(model_path)
+    >>> tvsModelRead = TrainValidationSplitModel.read().load(model_path)
+    >>> tvsModelRead.validationMetrics
+    [0.5, ...
     >>> evaluator.evaluate(tvsModel.transform(dataset))
     0.833...
 
     .. versionadded:: 2.0.0
     """
 
-    trainRatio = Param(Params._dummy(), "trainRatio", "Param for ratio between train and\
-     validation data. Must be between 0 and 1.", typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,
                  parallelism=1, collectSubModels=False, seed=None):
@@ -542,6 +618,27 @@ def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, tra
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    @since("2.0.0")
+    def setEstimator(self, value):
+        """
+        Sets the value of :py:attr:`estimator`.
+        """
+        return self._set(estimator=value)
+
+    @since("2.0.0")
+    def setEstimatorParamMaps(self, value):
+        """
+        Sets the value of :py:attr:`estimatorParamMaps`.
+        """
+        return self._set(estimatorParamMaps=value)
+
+    @since("2.0.0")
+    def setEvaluator(self, value):
+        """
+        Sets the value of :py:attr:`evaluator`.
+        """
+        return self._set(evaluator=value)
+
     @since("2.0.0")
     def setTrainRatio(self, value):
         """
@@ -549,12 +646,23 @@ def setTrainRatio(self, value):
         """
         return self._set(trainRatio=value)
 
-    @since("2.0.0")
-    def getTrainRatio(self):
+    def setSeed(self, value):
         """
-        Gets the value of trainRatio or its default value.
+        Sets the value of :py:attr:`seed`.
         """
-        return self.getOrDefault(self.trainRatio)
+        return self._set(seed=value)
+
+    def setParallelism(self, value):
+        """
+        Sets the value of :py:attr:`parallelism`.
+        """
+        return self._set(parallelism=value)
+
+    def setCollectSubModels(self, value):
+        """
+        Sets the value of :py:attr:`collectSubModels`.
+        """
+        return self._set(collectSubModels=value)
 
     def _fit(self, dataset):
         est = self.getOrDefault(self.estimator)
@@ -662,7 +770,7 @@ def _to_java(self):
         return _java_obj
 
 
-class TrainValidationSplitModel(Model, ValidatorParams, MLReadable, MLWritable):
+class TrainValidationSplitModel(Model, _TrainValidationSplitParams, MLReadable, MLWritable):
     """
     Model from train validation split.
 
@@ -720,12 +828,15 @@ def _from_java(cls, java_stage):
         """
 
         # Load information from java_stage to the instance.
+        sc = SparkContext._active_spark_context
         bestModel = JavaParams._from_java(java_stage.bestModel())
+        validationMetrics = _java2py(sc, java_stage.validationMetrics())
         estimator, epms, evaluator = super(TrainValidationSplitModel,
                                            cls)._from_java_impl(java_stage)
         # Create a new instance of this stage.
-        py_stage = cls(bestModel=bestModel).setEstimator(estimator)
-        py_stage = py_stage.setEstimatorParamMaps(epms).setEvaluator(evaluator)
+        py_stage = cls(bestModel=bestModel,
+                       validationMetrics=validationMetrics)._set(estimator=estimator)
+        py_stage = py_stage._set(estimatorParamMaps=epms)._set(evaluator=evaluator)
 
         if java_stage.hasSubModels():
             py_stage.subModels = [JavaParams._from_java(sub_model)
@@ -741,12 +852,11 @@ def _to_java(self):
         """
 
         sc = SparkContext._active_spark_context
-        # TODO: persst validation metrics as well
         _java_obj = JavaParams._new_java_obj(
             "org.apache.spark.ml.tuning.TrainValidationSplitModel",
             self.uid,
             self.bestModel._to_java(),
-            _py2java(sc, []))
+            _py2java(sc, self.validationMetrics))
         estimator, epms, evaluator = super(TrainValidationSplitModel, self)._to_java_impl()
 
         _java_obj.set("evaluator", evaluator)
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index debaf38e0896e..81b785e71f302 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -342,22 +342,6 @@ def read(cls):
         return JavaMLReader(cls)
 
 
-@inherit_doc
-class JavaPredictionModel():
-    """
-    (Private) Java Model for prediction tasks (regression and classification).
-    To be mixed in with class:`pyspark.ml.JavaModel`
-    """
-
-    @property
-    @since("2.1.0")
-    def numFeatures(self):
-        """
-        Returns the number of features the model was trained on. If unknown, returns -1
-        """
-        return self._call_java("numFeatures")
-
-
 @inherit_doc
 class DefaultParamsWritable(MLWritable):
     """
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 9bb1262a54500..ae3a6ba24ffa5 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -20,10 +20,12 @@
 if sys.version >= '3':
     xrange = range
 
+from pyspark import since
 from pyspark import SparkContext
 from pyspark.sql import DataFrame
 from pyspark.ml import Estimator, Transformer, Model
 from pyspark.ml.param import Params
+from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol
 from pyspark.ml.util import _jvm
 from pyspark.ml.common import inherit_doc, _java2py, _py2java
 
@@ -278,6 +280,14 @@ def copy(self, extra=None):
             that._transfer_params_to_java()
         return that
 
+    def clear(self, param):
+        """
+        Clears a param from the param map if it has been explicitly set.
+        """
+        super(JavaParams, self).clear(param)
+        java_param = self._java_obj.getParam(param.name)
+        self._java_obj.clear(java_param)
+
 
 @inherit_doc
 class JavaEstimator(JavaParams, Estimator):
@@ -361,3 +371,80 @@ def __init__(self, java_model=None):
             self._create_params_from_java()
 
             self._resetUid(java_model.uid())
+
+    def __repr__(self):
+        return self._call_java("toString")
+
+
+@inherit_doc
+class _JavaPredictorParams(HasLabelCol, HasFeaturesCol, HasPredictionCol):
+    """
+    Params for :py:class:`JavaPredictor` and :py:class:`JavaPredictorModel`.
+
+    .. versionadded:: 3.0.0
+    """
+    pass
+
+
+@inherit_doc
+class JavaPredictor(JavaEstimator, _JavaPredictorParams):
+    """
+    (Private) Java Estimator for prediction tasks (regression and classification).
+    """
+
+    @since("3.0.0")
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+
+@inherit_doc
+class JavaPredictionModel(JavaModel, _JavaPredictorParams):
+    """
+    (Private) Java Model for prediction tasks (regression and classification).
+    """
+
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    @property
+    @since("2.1.0")
+    def numFeatures(self):
+        """
+        Returns the number of features the model was trained on. If unknown, returns -1
+        """
+        return self._call_java("numFeatures")
+
+    @since("3.0.0")
+    def predict(self, value):
+        """
+        Predict label for the given features.
+        """
+        return self._call_java("predict", value)
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index f220911993096..e41e5c9cc8e89 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -304,7 +304,7 @@ class KMeans(object):
 
     @classmethod
     @since('0.9.0')
-    def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||",
+    def train(cls, rdd, k, maxIterations=100, initializationMode="k-means||",
               seed=None, initializationSteps=2, epsilon=1e-4, initialModel=None):
         """
         Train a k-means clustering model.
@@ -317,8 +317,6 @@ def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"
         :param maxIterations:
           Maximum number of iterations allowed.
           (default: 100)
-        :param runs:
-          This param has no effect since Spark 2.0.0.
         :param initializationMode:
           The initialization algorithm. This can be either "random" or
           "k-means||".
@@ -342,8 +340,6 @@ def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"
           rather than using the random or k-means|| initializationModel.
           (default: None)
         """
-        if runs != 1:
-            warnings.warn("The param `runs` has no effect since Spark 2.0.0.")
         clusterInitialModel = []
         if initialModel is not None:
             if not isinstance(initialModel, KMeansModel):
@@ -351,7 +347,7 @@ def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"
                                 "to be of <type 'KMeansModel'>")
             clusterInitialModel = [_convert_to_vector(c) for c in initialModel.clusterCenters]
         model = callMLlibFunc("trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations,
-                              runs, initializationMode, seed, initializationSteps, epsilon,
+                              initializationMode, seed, initializationSteps, epsilon,
                               clusterInitialModel)
         centers = callJavaFunc(rdd.context, model.clusterCenters)
         return KMeansModel([c.toArray() for c in centers])
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 30df4a1846d0b..f3be827fb6e4f 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -20,7 +20,7 @@
 from pyspark import since
 from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc
 from pyspark.sql import SQLContext
-from pyspark.sql.types import StructField, StructType, DoubleType
+from pyspark.sql.types import ArrayType, StructField, StructType, DoubleType
 
 __all__ = ['BinaryClassificationMetrics', 'RegressionMetrics',
            'MulticlassMetrics', 'RankingMetrics']
@@ -182,7 +182,8 @@ class MulticlassMetrics(JavaModelWrapper):
     """
     Evaluator for multiclass classification.
 
-    :param predictionAndLabels: an RDD of prediction, label and optional weight.
+    :param predictionAndLabels: an RDD of prediction, label, optional weight
+     and optional probability.
 
     >>> predictionAndLabels = sc.parallelize([(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
     ...     (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)])
@@ -239,6 +240,12 @@ class MulticlassMetrics(JavaModelWrapper):
     0.66...
     >>> metrics.weightedFMeasure(2.0)
     0.65...
+    >>> predictionAndLabelsWithProbabilities = sc.parallelize([
+    ...      (1.0, 1.0, 1.0, [0.1, 0.8, 0.1]), (0.0, 2.0, 1.0, [0.9, 0.05, 0.05]),
+    ...      (0.0, 0.0, 1.0, [0.8, 0.2, 0.0]), (1.0, 1.0, 1.0, [0.3, 0.65, 0.05])])
+    >>> metrics = MulticlassMetrics(predictionAndLabelsWithProbabilities)
+    >>> metrics.logLoss()
+    0.9682...
 
     .. versionadded:: 1.4.0
     """
@@ -250,8 +257,10 @@ def __init__(self, predictionAndLabels):
         schema = StructType([
             StructField("prediction", DoubleType(), nullable=False),
             StructField("label", DoubleType(), nullable=False)])
-        if numCol == 3:
+        if numCol >= 3:
             schema.add("weight", DoubleType(), False)
+        if numCol == 4:
+            schema.add("probability", ArrayType(DoubleType(), False), False)
         df = sql_ctx.createDataFrame(predictionAndLabels, schema)
         java_class = sc._jvm.org.apache.spark.mllib.evaluation.MulticlassMetrics
         java_model = java_class(df._jdf)
@@ -356,6 +365,13 @@ def weightedFMeasure(self, beta=None):
         else:
             return self.call("weightedFMeasure", beta)
 
+    @since('3.0.0')
+    def logLoss(self, eps=1e-15):
+        """
+        Returns weighted logLoss.
+        """
+        return self.call("logLoss", eps)
+
 
 class RankingMetrics(JavaModelWrapper):
     """
diff --git a/python/pyspark/mllib/tests/test_streaming_algorithms.py b/python/pyspark/mllib/tests/test_streaming_algorithms.py
index 6f098f4582930..2077809a043f1 100644
--- a/python/pyspark/mllib/tests/test_streaming_algorithms.py
+++ b/python/pyspark/mllib/tests/test_streaming_algorithms.py
@@ -28,6 +28,7 @@
 from pyspark.mllib.regression import LabeledPoint, StreamingLinearRegressionWithSGD
 from pyspark.mllib.util import LinearDataGenerator
 from pyspark.streaming import StreamingContext
+from pyspark.testing.utils import eventually
 
 
 class MLLibStreamingTestCase(unittest.TestCase):
@@ -39,44 +40,6 @@ def tearDown(self):
         self.ssc.stop(False)
         self.sc.stop()
 
-    @staticmethod
-    def _eventually(condition, timeout=30.0, catch_assertions=False):
-        """
-        Wait a given amount of time for a condition to pass, else fail with an error.
-        This is a helper utility for streaming ML tests.
-        :param condition: Function that checks for termination conditions.
-                          condition() can return:
-                           - True: Conditions met. Return without error.
-                           - other value: Conditions not met yet. Continue. Upon timeout,
-                                          include last such value in error message.
-                          Note that this method may be called at any time during
-                          streaming execution (e.g., even before any results
-                          have been created).
-        :param timeout: Number of seconds to wait.  Default 30 seconds.
-        :param catch_assertions: If False (default), do not catch AssertionErrors.
-                                 If True, catch AssertionErrors; continue, but save
-                                 error to throw upon timeout.
-        """
-        start_time = time()
-        lastValue = None
-        while time() - start_time < timeout:
-            if catch_assertions:
-                try:
-                    lastValue = condition()
-                except AssertionError as e:
-                    lastValue = e
-            else:
-                lastValue = condition()
-            if lastValue is True:
-                return
-            sleep(0.01)
-        if isinstance(lastValue, AssertionError):
-            raise lastValue
-        else:
-            raise AssertionError(
-                "Test failed due to timeout after %g sec, with last condition returning: %s"
-                % (timeout, lastValue))
-
 
 class StreamingKMeansTest(MLLibStreamingTestCase):
     def test_model_params(self):
@@ -111,7 +74,7 @@ def test_accuracy_for_single_center(self):
         def condition():
             self.assertEqual(stkm.latestModel().clusterWeights, [25.0])
             return True
-        self._eventually(condition, catch_assertions=True)
+        eventually(condition, catch_assertions=True)
 
         realCenters = array_sum(array(centers), axis=0)
         for i in range(5):
@@ -155,7 +118,7 @@ def condition():
             self.assertTrue(all(finalModel.centers == array(initCenters)))
             self.assertEqual(finalModel.clusterWeights, [5.0, 5.0, 5.0, 5.0])
             return True
-        self._eventually(condition, catch_assertions=True)
+        eventually(condition, catch_assertions=True)
 
     def test_predictOn_model(self):
         """Test that the model predicts correctly on toy data."""
@@ -183,7 +146,7 @@ def condition():
             self.assertEqual(result, [[0], [1], [2], [3]])
             return True
 
-        self._eventually(condition, catch_assertions=True)
+        eventually(condition, catch_assertions=True)
 
     @unittest.skip("SPARK-10086: Flaky StreamingKMeans test in PySpark")
     def test_trainOn_predictOn(self):
@@ -216,7 +179,7 @@ def condition():
             self.assertEqual(predict_results, [[0, 1, 1], [1, 0, 1]])
             return True
 
-        self._eventually(condition, catch_assertions=True)
+        eventually(condition, catch_assertions=True)
 
 
 class StreamingLogisticRegressionWithSGDTests(MLLibStreamingTestCase):
@@ -263,7 +226,7 @@ def condition():
             self.assertAlmostEqual(rel, 0.1, 1)
             return True
 
-        self._eventually(condition, catch_assertions=True)
+        eventually(condition, timeout=60.0, catch_assertions=True)
 
     def test_convergence(self):
         """
@@ -289,7 +252,7 @@ def condition():
             return True
 
         # We want all batches to finish for this test.
-        self._eventually(condition, 60.0, catch_assertions=True)
+        eventually(condition, 60.0, catch_assertions=True)
 
         t_models = array(models)
         diff = t_models[1:] - t_models[:-1]
@@ -322,7 +285,7 @@ def condition():
             self.assertEqual(len(true_predicted), len(input_batches))
             return True
 
-        self._eventually(condition, catch_assertions=True)
+        eventually(condition, catch_assertions=True)
 
         # Test that the accuracy error is no more than 0.4 on each batch.
         for batch in true_predicted:
@@ -364,7 +327,7 @@ def condition():
                 return True
             return "Latest errors: " + ", ".join(map(lambda x: str(x), errors))
 
-        self._eventually(condition, timeout=60.0)
+        eventually(condition, timeout=60.0)
 
 
 class StreamingLinearRegressionWithTests(MLLibStreamingTestCase):
@@ -400,7 +363,7 @@ def condition():
             self.assertAlmostEqual(slr.latestModel().intercept, 0.0, 1)
             return True
 
-        self._eventually(condition, catch_assertions=True)
+        eventually(condition, catch_assertions=True)
 
     def test_parameter_convergence(self):
         """Test that the model parameters improve with streaming data."""
@@ -426,7 +389,7 @@ def condition():
             return True
 
         # We want all batches to finish for this test.
-        self._eventually(condition, catch_assertions=True)
+        eventually(condition, catch_assertions=True)
 
         w = array(model_weights)
         diff = w[1:] - w[:-1]
@@ -459,7 +422,7 @@ def condition():
             return True
 
         # We want all batches to finish for this test.
-        self._eventually(condition, catch_assertions=True)
+        eventually(condition, catch_assertions=True)
 
         # Test that mean absolute error on each batch is less than 0.1
         for batch in samples:
@@ -500,7 +463,7 @@ def condition():
                 return True
             return "Latest errors: " + ", ".join(map(lambda x: str(x), errors))
 
-        self._eventually(condition)
+        eventually(condition)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 16c226f02e633..52ab86c0d88ee 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -75,6 +75,7 @@ class PythonEvalType(object):
     SQL_WINDOW_AGG_PANDAS_UDF = 203
     SQL_SCALAR_PANDAS_ITER_UDF = 204
     SQL_MAP_PANDAS_ITER_UDF = 205
+    SQL_COGROUPED_MAP_PANDAS_UDF = 206
 
 
 def portable_hash(x):
@@ -2436,17 +2437,23 @@ def countApproxDistinct(self, relativeSD=0.05):
         hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF)
         return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD)
 
-    def toLocalIterator(self):
+    def toLocalIterator(self, prefetchPartitions=False):
         """
         Return an iterator that contains all of the elements in this RDD.
         The iterator will consume as much memory as the largest partition in this RDD.
+        With prefetch it may consume up to the memory of the 2 largest partitions.
+
+        :param prefetchPartitions: If Spark should pre-fetch the next partition
+                                   before it is needed.
 
         >>> rdd = sc.parallelize(range(10))
         >>> [x for x in rdd.toLocalIterator()]
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
         """
         with SCCallSiteSync(self.context) as css:
-            sock_info = self.ctx._jvm.PythonRDD.toLocalIteratorAndServe(self._jrdd.rdd())
+            sock_info = self.ctx._jvm.PythonRDD.toLocalIteratorAndServe(
+                self._jrdd.rdd(),
+                prefetchPartitions)
         return _local_iterator_from_socket(sock_info, self._jrdd_deserializer)
 
     def barrier(self):
@@ -2528,6 +2535,20 @@ def func(s, iterator):
             return f(iterator)
         return PipelinedRDD(self.rdd, func, preservesPartitioning, isFromBarrier=True)
 
+    def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
+        """
+        .. note:: Experimental
+
+        Returns a new RDD by applying a function to each partition of the wrapped RDD, while
+        tracking the index of the original partition. And all tasks are launched together
+        in a barrier stage.
+        The interface is the same as :func:`RDD.mapPartitionsWithIndex`.
+        Please see the API doc there.
+
+        .. versionadded:: 3.0.0
+        """
+        return PipelinedRDD(self.rdd, f, preservesPartitioning, isFromBarrier=True)
+
 
 class PipelinedRDD(RDD):
 
diff --git a/python/pyspark/resultiterable.py b/python/pyspark/resultiterable.py
index 1ab5ce14c3531..c867b51877ffe 100644
--- a/python/pyspark/resultiterable.py
+++ b/python/pyspark/resultiterable.py
@@ -15,12 +15,16 @@
 # limitations under the License.
 #
 
-import collections
+try:
+    from collections.abc import Iterable
+except ImportError:
+    from collections import Iterable
+
 
 __all__ = ["ResultIterable"]
 
 
-class ResultIterable(collections.Iterable):
+class ResultIterable(Iterable):
 
     """
     A special result iterable. This is used because the standard
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 00f6081a3b14f..49b7cb4546676 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -69,7 +69,7 @@
 pickle_protocol = pickle.HIGHEST_PROTOCOL
 
 from pyspark import cloudpickle
-from pyspark.util import _exception_message
+from pyspark.util import _exception_message, print_exec
 
 
 __all__ = ["PickleSerializer", "MarshalSerializer", "UTF8Deserializer"]
@@ -185,222 +185,6 @@ def loads(self, obj):
         raise NotImplementedError
 
 
-class ArrowCollectSerializer(Serializer):
-    """
-    Deserialize a stream of batches followed by batch order information. Used in
-    DataFrame._collectAsArrow() after invoking Dataset.collectAsArrowToPython() in the JVM.
-    """
-
-    def __init__(self):
-        self.serializer = ArrowStreamSerializer()
-
-    def dump_stream(self, iterator, stream):
-        return self.serializer.dump_stream(iterator, stream)
-
-    def load_stream(self, stream):
-        """
-        Load a stream of un-ordered Arrow RecordBatches, where the last iteration yields
-        a list of indices that can be used to put the RecordBatches in the correct order.
-        """
-        # load the batches
-        for batch in self.serializer.load_stream(stream):
-            yield batch
-
-        # load the batch order indices or propagate any error that occurred in the JVM
-        num = read_int(stream)
-        if num == -1:
-            error_msg = UTF8Deserializer().loads(stream)
-            raise RuntimeError("An error occurred while calling "
-                               "ArrowCollectSerializer.load_stream: {}".format(error_msg))
-        batch_order = []
-        for i in xrange(num):
-            index = read_int(stream)
-            batch_order.append(index)
-        yield batch_order
-
-    def __repr__(self):
-        return "ArrowCollectSerializer(%s)" % self.serializer
-
-
-class ArrowStreamSerializer(Serializer):
-    """
-    Serializes Arrow record batches as a stream.
-    """
-
-    def dump_stream(self, iterator, stream):
-        import pyarrow as pa
-        writer = None
-        try:
-            for batch in iterator:
-                if writer is None:
-                    writer = pa.RecordBatchStreamWriter(stream, batch.schema)
-                writer.write_batch(batch)
-        finally:
-            if writer is not None:
-                writer.close()
-
-    def load_stream(self, stream):
-        import pyarrow as pa
-        reader = pa.ipc.open_stream(stream)
-        for batch in reader:
-            yield batch
-
-    def __repr__(self):
-        return "ArrowStreamSerializer"
-
-
-class ArrowStreamPandasSerializer(ArrowStreamSerializer):
-    """
-    Serializes Pandas.Series as Arrow data with Arrow streaming format.
-
-    :param timezone: A timezone to respect when handling timestamp values
-    :param safecheck: If True, conversion from Arrow to Pandas checks for overflow/truncation
-    :param assign_cols_by_name: If True, then Pandas DataFrames will get columns by name
-    """
-
-    def __init__(self, timezone, safecheck, assign_cols_by_name):
-        super(ArrowStreamPandasSerializer, self).__init__()
-        self._timezone = timezone
-        self._safecheck = safecheck
-        self._assign_cols_by_name = assign_cols_by_name
-
-    def arrow_to_pandas(self, arrow_column):
-        from pyspark.sql.types import _check_series_localize_timestamps
-
-        # If the given column is a date type column, creates a series of datetime.date directly
-        # instead of creating datetime64[ns] as intermediate data to avoid overflow caused by
-        # datetime64[ns] type handling.
-        s = arrow_column.to_pandas(date_as_object=True)
-
-        s = _check_series_localize_timestamps(s, self._timezone)
-        return s
-
-    def _create_batch(self, series):
-        """
-        Create an Arrow record batch from the given pandas.Series or list of Series,
-        with optional type.
-
-        :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
-        :return: Arrow RecordBatch
-        """
-        import pandas as pd
-        import pyarrow as pa
-        from pyspark.sql.types import _check_series_convert_timestamps_internal
-        # Make input conform to [(series1, type1), (series2, type2), ...]
-        if not isinstance(series, (list, tuple)) or \
-                (len(series) == 2 and isinstance(series[1], pa.DataType)):
-            series = [series]
-        series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)
-
-        def create_array(s, t):
-            mask = s.isnull()
-            # Ensure timestamp series are in expected form for Spark internal representation
-            if t is not None and pa.types.is_timestamp(t):
-                s = _check_series_convert_timestamps_internal(s, self._timezone)
-            try:
-                array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck)
-            except pa.ArrowException as e:
-                error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \
-                            "Array (%s). It can be caused by overflows or other unsafe " + \
-                            "conversions warned by Arrow. Arrow safe type check can be " + \
-                            "disabled by using SQL config " + \
-                            "`spark.sql.execution.pandas.arrowSafeTypeConversion`."
-                raise RuntimeError(error_msg % (s.dtype, t), e)
-            return array
-
-        arrs = []
-        for s, t in series:
-            if t is not None and pa.types.is_struct(t):
-                if not isinstance(s, pd.DataFrame):
-                    raise ValueError("A field of type StructType expects a pandas.DataFrame, "
-                                     "but got: %s" % str(type(s)))
-
-                # Input partition and result pandas.DataFrame empty, make empty Arrays with struct
-                if len(s) == 0 and len(s.columns) == 0:
-                    arrs_names = [(pa.array([], type=field.type), field.name) for field in t]
-                # Assign result columns by schema name if user labeled with strings
-                elif self._assign_cols_by_name and any(isinstance(name, basestring)
-                                                       for name in s.columns):
-                    arrs_names = [(create_array(s[field.name], field.type), field.name)
-                                  for field in t]
-                # Assign result columns by  position
-                else:
-                    arrs_names = [(create_array(s[s.columns[i]], field.type), field.name)
-                                  for i, field in enumerate(t)]
-
-                struct_arrs, struct_names = zip(*arrs_names)
-                arrs.append(pa.StructArray.from_arrays(struct_arrs, struct_names))
-            else:
-                arrs.append(create_array(s, t))
-
-        return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
-
-    def dump_stream(self, iterator, stream):
-        """
-        Make ArrowRecordBatches from Pandas Series and serialize. Input is a single series or
-        a list of series accompanied by an optional pyarrow type to coerce the data to.
-        """
-        batches = (self._create_batch(series) for series in iterator)
-        super(ArrowStreamPandasSerializer, self).dump_stream(batches, stream)
-
-    def load_stream(self, stream):
-        """
-        Deserialize ArrowRecordBatches to an Arrow table and return as a list of pandas.Series.
-        """
-        batches = super(ArrowStreamPandasSerializer, self).load_stream(stream)
-        import pyarrow as pa
-        for batch in batches:
-            yield [self.arrow_to_pandas(c) for c in pa.Table.from_batches([batch]).itercolumns()]
-
-    def __repr__(self):
-        return "ArrowStreamPandasSerializer"
-
-
-class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer):
-    """
-    Serializer used by Python worker to evaluate Pandas UDFs
-    """
-
-    def __init__(self, timezone, safecheck, assign_cols_by_name, df_for_struct=False):
-        super(ArrowStreamPandasUDFSerializer, self) \
-            .__init__(timezone, safecheck, assign_cols_by_name)
-        self._df_for_struct = df_for_struct
-
-    def arrow_to_pandas(self, arrow_column):
-        import pyarrow.types as types
-
-        if self._df_for_struct and types.is_struct(arrow_column.type):
-            import pandas as pd
-            series = [super(ArrowStreamPandasUDFSerializer, self).arrow_to_pandas(column)
-                      .rename(field.name)
-                      for column, field in zip(arrow_column.flatten(), arrow_column.type)]
-            s = pd.concat(series, axis=1)
-        else:
-            s = super(ArrowStreamPandasUDFSerializer, self).arrow_to_pandas(arrow_column)
-        return s
-
-    def dump_stream(self, iterator, stream):
-        """
-        Override because Pandas UDFs require a START_ARROW_STREAM before the Arrow stream is sent.
-        This should be sent after creating the first record batch so in case of an error, it can
-        be sent back to the JVM before the Arrow stream starts.
-        """
-
-        def init_stream_yield_batches():
-            should_write_start_length = True
-            for series in iterator:
-                batch = self._create_batch(series)
-                if should_write_start_length:
-                    write_int(SpecialLengths.START_ARROW_STREAM, stream)
-                    should_write_start_length = False
-                yield batch
-
-        return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream)
-
-    def __repr__(self):
-        return "ArrowStreamPandasUDFSerializer"
-
-
 class BatchedSerializer(Serializer):
 
     """
@@ -690,7 +474,7 @@ def dumps(self, obj):
                 msg = "Object too large to serialize: %s" % emsg
             else:
                 msg = "Could not serialize object: %s: %s" % (e.__class__.__name__, emsg)
-            cloudpickle.print_exec(sys.stderr)
+            print_exec(sys.stderr)
             raise pickle.PicklingError(msg)
 
 
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index c3c06c8124362..0a8d71c12e060 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -43,7 +43,7 @@
 
 
 from pyspark.sql.types import Row
-from pyspark.sql.context import SQLContext, HiveContext, UDFRegistration
+from pyspark.sql.context import SQLContext, UDFRegistration
 from pyspark.sql.session import SparkSession
 from pyspark.sql.column import Column
 from pyspark.sql.catalog import Catalog
@@ -51,11 +51,12 @@
 from pyspark.sql.group import GroupedData
 from pyspark.sql.readwriter import DataFrameReader, DataFrameWriter
 from pyspark.sql.window import Window, WindowSpec
+from pyspark.sql.pandas.group_ops import PandasCogroupedOps
 
 
 __all__ = [
-    'SparkSession', 'SQLContext', 'HiveContext', 'UDFRegistration',
+    'SparkSession', 'SQLContext', 'UDFRegistration',
     'DataFrame', 'GroupedData', 'Column', 'Catalog', 'Row',
     'DataFrameNaFunctions', 'DataFrameStatFunctions', 'Window', 'WindowSpec',
-    'DataFrameReader', 'DataFrameWriter'
+    'DataFrameReader', 'DataFrameWriter', 'PandasCogroupedOps'
 ]
diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
index 711de6532e28c..ed62a72d6c8fb 100644
--- a/python/pyspark/sql/avro/functions.py
+++ b/python/pyspark/sql/avro/functions.py
@@ -30,9 +30,11 @@
 @since(3.0)
 def from_avro(data, jsonFormatSchema, options={}):
     """
-    Converts a binary column of avro format into its corresponding catalyst value. The specified
-    schema must match the read data, otherwise the behavior is undefined: it may fail or return
-    arbitrary result.
+    Converts a binary column of Avro format into its corresponding catalyst value.
+    The specified schema must match the read data, otherwise the behavior is undefined:
+    it may fail or return arbitrary result.
+    To deserialize the data with a compatible and evolved schema, the expected Avro schema can be
+    set via the option avroSchema.
 
     Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the
     application as per the deployment section of "Apache Avro Data Source Guide".
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 974251f63b37a..08cf6ee330785 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -138,26 +138,6 @@ def listColumns(self, tableName, dbName=None):
                 isBucket=jcolumn.isBucket()))
         return columns
 
-    @since(2.0)
-    def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
-        """Creates a table based on the dataset in a data source.
-
-        It returns the DataFrame associated with the external table.
-
-        The data source is specified by the ``source`` and a set of ``options``.
-        If ``source`` is not specified, the default data source configured by
-        ``spark.sql.sources.default`` will be used.
-
-        Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
-        created external table.
-
-        :return: :class:`DataFrame`
-        """
-        warnings.warn(
-            "createExternalTable is deprecated since Spark 2.2, please use createTable instead.",
-            DeprecationWarning)
-        return self.createTable(tableName, path, source, schema, **options)
-
     @since(2.2)
     def createTable(self, tableName, path=None, source=None, schema=None, **options):
         """Creates a table based on the dataset in a data source.
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 7f12d2324e715..59d1408e26ad5 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -296,14 +296,12 @@ def getItem(self, key):
         +----+------+
         |   1| value|
         +----+------+
-        >>> df.select(df.l[0], df.d["key"]).show()
-        +----+------+
-        |l[0]|d[key]|
-        +----+------+
-        |   1| value|
-        +----+------+
+
+        .. versionchanged:: 3.0
+           If `key` is a `Column` object, the indexing operator should be used instead.
+           For example, `map_col.getItem(col('id'))` should be replaced with `map_col[col('id')]`.
         """
-        return self[key]
+        return _bin_op("getItem")(self, key)
 
     @since(1.3)
     def getField(self, name):
@@ -671,8 +669,9 @@ def over(self, window):
         >>> window = Window.partitionBy("name").orderBy("age") \
                 .rowsBetween(Window.unboundedPreceding, Window.currentRow)
         >>> from pyspark.sql.functions import rank, min
+        >>> from pyspark.sql.functions import desc
         >>> df.withColumn("rank", rank().over(window)) \
-                .withColumn("min", min('age').over(window)).show()
+                .withColumn("min", min('age').over(window)).sort(desc("age")).show()
         +---+-----+----+---+
         |age| name|rank|min|
         +---+-----+----+---+
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 48a49c583f9c7..68d5ef4ca7b15 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -32,7 +32,7 @@
 from pyspark.sql.udf import UDFRegistration
 from pyspark.sql.utils import install_exception_handler
 
-__all__ = ["SQLContext", "HiveContext"]
+__all__ = ["SQLContext"]
 
 
 class SQLContext(object):
@@ -318,31 +318,13 @@ def registerDataFrameAsTable(self, df, tableName):
 
     @since(1.6)
     def dropTempTable(self, tableName):
-        """ Remove the temp table from catalog.
+        """ Remove the temporary table from catalog.
 
         >>> sqlContext.registerDataFrameAsTable(df, "table1")
         >>> sqlContext.dropTempTable("table1")
         """
         self.sparkSession.catalog.dropTempView(tableName)
 
-    @since(1.3)
-    def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
-        """Creates an external table based on the dataset in a data source.
-
-        It returns the DataFrame associated with the external table.
-
-        The data source is specified by the ``source`` and a set of ``options``.
-        If ``source`` is not specified, the default data source configured by
-        ``spark.sql.sources.default`` will be used.
-
-        Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
-        created external table.
-
-        :return: :class:`DataFrame`
-        """
-        return self.sparkSession.catalog.createExternalTable(
-            tableName, path, source, schema, **options)
-
     @ignore_unicode_prefix
     @since(1.0)
     def sql(self, sqlQuery):
@@ -466,53 +448,6 @@ def streams(self):
         return StreamingQueryManager(self._ssql_ctx.streams())
 
 
-class HiveContext(SQLContext):
-    """A variant of Spark SQL that integrates with data stored in Hive.
-
-    Configuration for Hive is read from ``hive-site.xml`` on the classpath.
-    It supports running both SQL and HiveQL commands.
-
-    :param sparkContext: The SparkContext to wrap.
-    :param jhiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new
-        :class:`HiveContext` in the JVM, instead we make all calls to this object.
-
-    .. note:: Deprecated in 2.0.0. Use SparkSession.builder.enableHiveSupport().getOrCreate().
-    """
-
-    def __init__(self, sparkContext, jhiveContext=None):
-        warnings.warn(
-            "HiveContext is deprecated in Spark 2.0.0. Please use " +
-            "SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
-            DeprecationWarning)
-        if jhiveContext is None:
-            sparkContext._conf.set("spark.sql.catalogImplementation", "hive")
-            sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate()
-        else:
-            sparkSession = SparkSession(sparkContext, jhiveContext.sparkSession())
-        SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext)
-
-    @classmethod
-    def _createForTesting(cls, sparkContext):
-        """(Internal use only) Create a new HiveContext for testing.
-
-        All test code that touches HiveContext *must* go through this method. Otherwise,
-        you may end up launching multiple derby instances and encounter with incredibly
-        confusing error messages.
-        """
-        jsc = sparkContext._jsc.sc()
-        jtestHive = sparkContext._jvm.org.apache.spark.sql.hive.test.TestHiveContext(jsc, False)
-        return cls(sparkContext, jtestHive)
-
-    def refreshTable(self, tableName):
-        """Invalidate and refresh all the cached the metadata of the given
-        table. For performance reasons, Spark SQL or the external data source
-        library it uses might cache certain metadata about a table, such as the
-        location of blocks. When those change outside of Spark SQL, users should
-        call this function to invalidate the cache.
-        """
-        self._ssql_ctx.refreshTable(tableName)
-
-
 def _test():
     import os
     import doctest
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 398471234d2b7..2432b8127840b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -31,8 +31,8 @@
 
 from pyspark import copy_func, since, _NoValue
 from pyspark.rdd import RDD, _load_from_socket, _local_iterator_from_socket, \
-    ignore_unicode_prefix, PythonEvalType
-from pyspark.serializers import ArrowCollectSerializer, BatchedSerializer, PickleSerializer, \
+    ignore_unicode_prefix
+from pyspark.serializers import BatchedSerializer, PickleSerializer, \
     UTF8Deserializer
 from pyspark.storagelevel import StorageLevel
 from pyspark.traceback_utils import SCCallSiteSync
@@ -40,14 +40,14 @@
 from pyspark.sql.column import Column, _to_seq, _to_list, _to_java_column
 from pyspark.sql.readwriter import DataFrameWriter
 from pyspark.sql.streaming import DataStreamWriter
-from pyspark.sql.types import IntegralType
 from pyspark.sql.types import *
-from pyspark.util import _exception_message
+from pyspark.sql.pandas.conversion import PandasConversionMixin
+from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
 
 __all__ = ["DataFrame", "DataFrameNaFunctions", "DataFrameStatFunctions"]
 
 
-class DataFrame(object):
+class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
     """A distributed collection of data grouped into named columns.
 
     A :class:`DataFrame` is equivalent to a relational table in Spark SQL,
@@ -58,7 +58,7 @@ class DataFrame(object):
     Once created, it can be manipulated using the various domain-specific-language
     (DSL) functions defined in: :class:`DataFrame`, :class:`Column`.
 
-    To select a column from the data frame, use the apply method::
+    To select a column from the :class:`DataFrame`, use the apply method::
 
         ageCol = people.age
 
@@ -124,7 +124,7 @@ def toJSON(self, use_unicode=True):
 
     @since(2.0)
     def createTempView(self, name):
-        """Creates a local temporary view with this DataFrame.
+        """Creates a local temporary view with this :class:`DataFrame`.
 
         The lifetime of this temporary table is tied to the :class:`SparkSession`
         that was used to create this :class:`DataFrame`.
@@ -146,7 +146,7 @@ def createTempView(self, name):
 
     @since(2.0)
     def createOrReplaceTempView(self, name):
-        """Creates or replaces a local temporary view with this DataFrame.
+        """Creates or replaces a local temporary view with this :class:`DataFrame`.
 
         The lifetime of this temporary table is tied to the :class:`SparkSession`
         that was used to create this :class:`DataFrame`.
@@ -164,7 +164,7 @@ def createOrReplaceTempView(self, name):
 
     @since(2.1)
     def createGlobalTempView(self, name):
-        """Creates a global temporary view with this DataFrame.
+        """Creates a global temporary view with this :class:`DataFrame`.
 
         The lifetime of this temporary view is tied to this Spark application.
         throws :class:`TempTableAlreadyExistsException`, if the view name already exists in the
@@ -253,10 +253,18 @@ def printSchema(self):
         print(self._jdf.schema().treeString())
 
     @since(1.3)
-    def explain(self, extended=False):
+    def explain(self, extended=None, mode=None):
         """Prints the (logical and physical) plans to the console for debugging purpose.
 
         :param extended: boolean, default ``False``. If ``False``, prints only the physical plan.
+        :param mode: specifies the expected output format of plans.
+
+            * ``simple``: Print only a physical plan.
+            * ``extended``: Print both logical and physical plans.
+            * ``codegen``: Print a physical plan and generated codes if they are available.
+            * ``cost``: Print a logical plan and statistics if they are available.
+            * ``formatted``: Split explain output into two sections: a physical plan outline \
+                and node details.
 
         >>> df.explain()
         == Physical Plan ==
@@ -271,11 +279,48 @@ def explain(self, extended=False):
         ...
         == Physical Plan ==
         ...
+
+        >>> df.explain(mode="formatted")
+        == Physical Plan ==
+        * Scan ExistingRDD (1)
+        (1) Scan ExistingRDD [codegen id : 1]
+        Output: [age#0, name#1]
+
+        .. versionchanged:: 3.0.0
+           Added optional argument `mode` to specify the expected output format of plans.
         """
-        if extended:
-            print(self._jdf.queryExecution().toString())
-        else:
-            print(self._jdf.queryExecution().simpleString())
+
+        if extended is not None and mode is not None:
+            raise Exception("extended and mode can not be specified simultaneously")
+
+        # For the no argument case: df.explain()
+        is_no_argument = extended is None and mode is None
+
+        # For the cases below:
+        #   explain(True)
+        #   explain(extended=False)
+        is_extended_case = extended is not None and isinstance(extended, bool)
+
+        # For the mode specified: df.explain(mode="formatted")
+        is_mode_case = mode is not None and isinstance(mode, basestring)
+
+        if not is_no_argument and not (is_extended_case or is_mode_case):
+            if extended is not None:
+                err_msg = "extended (optional) should be provided as bool" \
+                    ", got {0}".format(type(extended))
+            else:  # For mode case
+                err_msg = "mode (optional) should be provided as str, got {0}".format(type(mode))
+            raise TypeError(err_msg)
+
+        # Sets an explain mode depending on a given argument
+        if is_no_argument:
+            explain_mode = "simple"
+        elif is_extended_case:
+            explain_mode = "extended" if extended else "simple"
+        elif is_mode_case:
+            explain_mode = mode
+
+        print(self._sc._jvm.PythonSQLUtils.explainString(self._jdf.queryExecution(), explain_mode))
 
     @since(2.4)
     def exceptAll(self, other):
@@ -312,7 +357,7 @@ def isLocal(self):
     @property
     @since(2.0)
     def isStreaming(self):
-        """Returns true if this :class:`Dataset` contains one or more sources that continuously
+        """Returns ``True`` if this :class:`Dataset` contains one or more sources that continuously
         return data as it arrives. A :class:`Dataset` that reads data from a streaming source
         must be executed as a :class:`StreamingQuery` using the :func:`start` method in
         :class:`DataStreamWriter`.  Methods that return a single answer, (e.g., :func:`count` or
@@ -328,10 +373,10 @@ def show(self, n=20, truncate=True, vertical=False):
         """Prints the first ``n`` rows to the console.
 
         :param n: Number of rows to show.
-        :param truncate: If set to True, truncate strings longer than 20 chars by default.
+        :param truncate: If set to ``True``, truncate strings longer than 20 chars by default.
             If set to a number greater than one, truncates long strings to length ``truncate``
             and align cells right.
-        :param vertical: If set to True, print output rows vertically (one line
+        :param vertical: If set to ``True``, print output rows vertically (one line
             per column value).
 
         >>> df
@@ -373,7 +418,7 @@ def __repr__(self):
             return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
 
     def _repr_html_(self):
-        """Returns a dataframe with html code when you enabled eager evaluation
+        """Returns a :class:`DataFrame` with html code when you enabled eager evaluation
         by 'spark.sql.repl.eagerEval.enabled', this only called by REPL you are
         using support eager evaluation with HTML.
         """
@@ -407,11 +452,11 @@ def _repr_html_(self):
     @since(2.1)
     def checkpoint(self, eager=True):
         """Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
-        logical plan of this DataFrame, which is especially useful in iterative algorithms where the
-        plan may grow exponentially. It will be saved to files inside the checkpoint
+        logical plan of this :class:`DataFrame`, which is especially useful in iterative algorithms
+        where the plan may grow exponentially. It will be saved to files inside the checkpoint
         directory set with :meth:`SparkContext.setCheckpointDir`.
 
-        :param eager: Whether to checkpoint this DataFrame immediately
+        :param eager: Whether to checkpoint this :class:`DataFrame` immediately
 
         .. note:: Experimental
         """
@@ -421,11 +466,11 @@ def checkpoint(self, eager=True):
     @since(2.3)
     def localCheckpoint(self, eager=True):
         """Returns a locally checkpointed version of this Dataset. Checkpointing can be used to
-        truncate the logical plan of this DataFrame, which is especially useful in iterative
-        algorithms where the plan may grow exponentially. Local checkpoints are stored in the
-        executors using the caching subsystem and therefore they are not reliable.
+        truncate the logical plan of this :class:`DataFrame`, which is especially useful in
+        iterative algorithms where the plan may grow exponentially. Local checkpoints are
+        stored in the executors using the caching subsystem and therefore they are not reliable.
 
-        :param eager: Whether to checkpoint this DataFrame immediately
+        :param eager: Whether to checkpoint this :class:`DataFrame` immediately
 
         .. note:: Experimental
         """
@@ -468,7 +513,7 @@ def withWatermark(self, eventTime, delayThreshold):
 
     @since(2.2)
     def hint(self, name, *parameters):
-        """Specifies some hint on the current DataFrame.
+        """Specifies some hint on the current :class:`DataFrame`.
 
         :param name: A name of the hint.
         :param parameters: Optional parameters.
@@ -520,16 +565,21 @@ def collect(self):
 
     @ignore_unicode_prefix
     @since(2.0)
-    def toLocalIterator(self):
+    def toLocalIterator(self, prefetchPartitions=False):
         """
         Returns an iterator that contains all of the rows in this :class:`DataFrame`.
-        The iterator will consume as much memory as the largest partition in this DataFrame.
+        The iterator will consume as much memory as the largest partition in this
+        :class:`DataFrame`. With prefetch it may consume up to the memory of the 2 largest
+        partitions.
+
+        :param prefetchPartitions: If Spark should pre-fetch the next partition
+                                   before it is needed.
 
         >>> list(df.toLocalIterator())
         [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
         """
         with SCCallSiteSync(self._sc) as css:
-            sock_info = self._jdf.toPythonIterator()
+            sock_info = self._jdf.toPythonIterator(prefetchPartitions)
         return _local_iterator_from_socket(sock_info, BatchedSerializer(PickleSerializer()))
 
     @ignore_unicode_prefix
@@ -555,6 +605,22 @@ def take(self, num):
         """
         return self.limit(num).collect()
 
+    @ignore_unicode_prefix
+    @since(3.0)
+    def tail(self, num):
+        """
+        Returns the last ``num`` rows as a :class:`list` of :class:`Row`.
+
+        Running tail requires moving data into the application's driver process, and doing so with
+        a very large ``num`` can crash the driver process with OutOfMemoryError.
+
+        >>> df.tail(1)
+        [Row(age=5, name=u'Bob')]
+        """
+        with SCCallSiteSync(self._sc):
+            sock_info = self._jdf.tailToPython(num)
+        return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
+
     @since(1.3)
     def foreach(self, f):
         """Applies the ``f`` function to all :class:`Row` of this :class:`DataFrame`.
@@ -629,7 +695,7 @@ def unpersist(self, blocking=False):
         """Marks the :class:`DataFrame` as non-persistent, and remove all blocks for it from
         memory and disk.
 
-        .. note:: `blocking` default has changed to False to match Scala in 2.0.
+        .. note:: `blocking` default has changed to ``False`` to match Scala in 2.0.
         """
         self.is_cached = False
         self._jdf.unpersist(blocking)
@@ -664,7 +730,7 @@ def coalesce(self, numPartitions):
     def repartition(self, numPartitions, *cols):
         """
         Returns a new :class:`DataFrame` partitioned by the given partitioning expressions. The
-        resulting DataFrame is hash partitioned.
+        resulting :class:`DataFrame` is hash partitioned.
 
         :param numPartitions:
             can be an int to specify the target number of partitions or a Column.
@@ -726,7 +792,7 @@ def repartition(self, numPartitions, *cols):
     def repartitionByRange(self, numPartitions, *cols):
         """
         Returns a new :class:`DataFrame` partitioned by the given partitioning expressions. The
-        resulting DataFrame is range partitioned.
+        resulting :class:`DataFrame` is range partitioned.
 
         :param numPartitions:
             can be an int to specify the target number of partitions or a Column.
@@ -786,7 +852,7 @@ def distinct(self):
     def sample(self, withReplacement=None, fraction=None, seed=None):
         """Returns a sampled subset of this :class:`DataFrame`.
 
-        :param withReplacement: Sample with replacement or not (default False).
+        :param withReplacement: Sample with replacement or not (default ``False``).
         :param fraction: Fraction of rows to generate, range [0.0, 1.0].
         :param seed: Seed for sampling (default a random seed).
 
@@ -858,7 +924,7 @@ def sampleBy(self, col, fractions, seed=None):
             sampling fraction for each stratum. If a stratum is not
             specified, we treat its fraction as zero.
         :param seed: random seed
-        :return: a new DataFrame that represents the stratified sample
+        :return: a new :class:`DataFrame` that represents the stratified sample
 
         >>> from pyspark.sql.functions import col
         >>> dataset = sqlContext.range(0, 100).select((col("id") % 3).alias("key"))
@@ -894,8 +960,8 @@ def sampleBy(self, col, fractions, seed=None):
     def randomSplit(self, weights, seed=None):
         """Randomly splits this :class:`DataFrame` with the provided weights.
 
-        :param weights: list of doubles as weights with which to split the DataFrame. Weights will
-            be normalized if they don't sum up to 1.0.
+        :param weights: list of doubles as weights with which to split the :class:`DataFrame`.
+            Weights will be normalized if they don't sum up to 1.0.
         :param seed: The seed for sampling.
 
         >>> splits = df4.randomSplit([1.0, 2.0], 24)
@@ -960,13 +1026,14 @@ def colRegex(self, colName):
     def alias(self, alias):
         """Returns a new :class:`DataFrame` with an alias set.
 
-        :param alias: string, an alias name to be set for the DataFrame.
+        :param alias: string, an alias name to be set for the :class:`DataFrame`.
 
         >>> from pyspark.sql.functions import *
         >>> df_as1 = df.alias("df_as1")
         >>> df_as2 = df.alias("df_as2")
         >>> joined_df = df_as1.join(df_as2, col("df_as1.name") == col("df_as2.name"), 'inner')
-        >>> joined_df.select("df_as1.name", "df_as2.name", "df_as2.age").collect()
+        >>> joined_df.select("df_as1.name", "df_as2.name", "df_as2.age") \
+                .sort(desc("df_as1.name")).collect()
         [Row(name=u'Bob', name=u'Bob', age=5), Row(name=u'Alice', name=u'Alice', age=2)]
         """
         assert isinstance(alias, basestring), "alias should be a string"
@@ -1007,11 +1074,12 @@ def join(self, other, on=None, how=None):
             ``anti``, ``leftanti`` and ``left_anti``.
 
         The following performs a full outer join between ``df1`` and ``df2``.
+        >>> from pyspark.sql.functions import desc
+        >>> df.join(df2, df.name == df2.name, 'outer').select(df.name, df2.height) \
+                .sort(desc("name")).collect()
+        [Row(name=u'Bob', height=85), Row(name=u'Alice', height=None), Row(name=None, height=80)]
 
-        >>> df.join(df2, df.name == df2.name, 'outer').select(df.name, df2.height).collect()
-        [Row(name=None, height=80), Row(name=u'Bob', height=85), Row(name=u'Alice', height=None)]
-
-        >>> df.join(df2, 'name', 'outer').select('name', 'height').collect()
+        >>> df.join(df2, 'name', 'outer').select('name', 'height').sort(desc("name")).collect()
         [Row(name=u'Tom', height=80), Row(name=u'Bob', height=85), Row(name=u'Alice', height=None)]
 
         >>> cond = [df.name == df3.name, df.age == df3.age]
@@ -1052,7 +1120,7 @@ def sortWithinPartitions(self, *cols, **kwargs):
         """Returns a new :class:`DataFrame` with each partition sorted by the specified column(s).
 
         :param cols: list of :class:`Column` or column names to sort by.
-        :param ascending: boolean or list of boolean (default True).
+        :param ascending: boolean or list of boolean (default ``True``).
             Sort ascending vs. descending. Specify list for multiple sort orders.
             If a list is specified, length of the list must equal length of the `cols`.
 
@@ -1073,7 +1141,7 @@ def sort(self, *cols, **kwargs):
         """Returns a new :class:`DataFrame` sorted by the specified column(s).
 
         :param cols: list of :class:`Column` or column names to sort by.
-        :param ascending: boolean or list of boolean (default True).
+        :param ascending: boolean or list of boolean (default ``True``).
             Sort ascending vs. descending. Specify list for multiple sort orders.
             If a list is specified, length of the list must equal length of the `cols`.
 
@@ -1140,7 +1208,8 @@ def describe(self, *cols):
         given, this function computes statistics for all numerical or string columns.
 
         .. note:: This function is meant for exploratory data analysis, as we make no
-            guarantee about the backward compatibility of the schema of the resulting DataFrame.
+            guarantee about the backward compatibility of the schema of the resulting
+            :class:`DataFrame`.
 
         >>> df.describe(['age']).show()
         +-------+------------------+
@@ -1184,7 +1253,8 @@ def summary(self, *statistics):
         approximate quartiles (percentiles at 25%, 50%, and 75%), and max.
 
         .. note:: This function is meant for exploratory data analysis, as we make no
-            guarantee about the backward compatibility of the schema of the resulting DataFrame.
+            guarantee about the backward compatibility of the schema of the resulting
+            :class:`DataFrame`.
 
         >>> df.summary().show()
         +-------+------------------+-----+
@@ -1306,7 +1376,7 @@ def select(self, *cols):
 
         :param cols: list of column names (string) or expressions (:class:`Column`).
             If one of the column names is '*', that column is expanded to include all columns
-            in the current DataFrame.
+            in the current :class:`DataFrame`.
 
         >>> df.select('*').collect()
         [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
@@ -1410,7 +1480,7 @@ def rollup(self, *cols):
     def cube(self, *cols):
         """
         Create a multi-dimensional cube for the current :class:`DataFrame` using
-        the specified columns, so we can run aggregation on them.
+        the specified columns, so we can run aggregations on them.
 
         >>> df.cube("name", df.age).count().orderBy("name", "age").show()
         +-----+----+-----+
@@ -1444,7 +1514,8 @@ def agg(self, *exprs):
 
     @since(2.0)
     def union(self, other):
-        """ Return a new :class:`DataFrame` containing union of rows in this and another frame.
+        """ Return a new :class:`DataFrame` containing union of rows in this and another
+        :class:`DataFrame`.
 
         This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union
         (that does deduplication of elements), use this function followed by :func:`distinct`.
@@ -1455,7 +1526,8 @@ def union(self, other):
 
     @since(1.3)
     def unionAll(self, other):
-        """ Return a new :class:`DataFrame` containing union of rows in this and another frame.
+        """ Return a new :class:`DataFrame` containing union of rows in this and another
+        :class:`DataFrame`.
 
         This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union
         (that does deduplication of elements), use this function followed by :func:`distinct`.
@@ -1466,7 +1538,8 @@ def unionAll(self, other):
 
     @since(2.3)
     def unionByName(self, other):
-        """ Returns a new :class:`DataFrame` containing union of rows in this and another frame.
+        """ Returns a new :class:`DataFrame` containing union of rows in this and another
+        :class:`DataFrame`.
 
         This is different from both `UNION ALL` and `UNION DISTINCT` in SQL. To do a SQL-style set
         union (that does deduplication of elements), use this function followed by :func:`distinct`.
@@ -1489,7 +1562,7 @@ def unionByName(self, other):
     @since(1.3)
     def intersect(self, other):
         """ Return a new :class:`DataFrame` containing rows only in
-        both this frame and another frame.
+        both this :class:`DataFrame` and another :class:`DataFrame`.
 
         This is equivalent to `INTERSECT` in SQL.
         """
@@ -1497,8 +1570,8 @@ def intersect(self, other):
 
     @since(2.4)
     def intersectAll(self, other):
-        """ Return a new :class:`DataFrame` containing rows in both this dataframe and other
-        dataframe while preserving duplicates.
+        """ Return a new :class:`DataFrame` containing rows in both this :class:`DataFrame`
+        and another :class:`DataFrame` while preserving duplicates.
 
         This is equivalent to `INTERSECT ALL` in SQL.
         >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"])
@@ -1519,8 +1592,8 @@ def intersectAll(self, other):
 
     @since(1.3)
     def subtract(self, other):
-        """ Return a new :class:`DataFrame` containing rows in this frame
-        but not in another frame.
+        """ Return a new :class:`DataFrame` containing rows in this :class:`DataFrame`
+        but not in another :class:`DataFrame`.
 
         This is equivalent to `EXCEPT DISTINCT` in SQL.
 
@@ -1810,12 +1883,12 @@ def all_of_(xs):
     def approxQuantile(self, col, probabilities, relativeError):
         """
         Calculates the approximate quantiles of numerical columns of a
-        DataFrame.
+        :class:`DataFrame`.
 
         The result of this algorithm has the following deterministic bound:
-        If the DataFrame has N elements and if we request the quantile at
+        If the :class:`DataFrame` has N elements and if we request the quantile at
         probability `p` up to error `err`, then the algorithm will return
-        a sample `x` from the DataFrame so that the *exact* rank of `x` is
+        a sample `x` from the :class:`DataFrame` so that the *exact* rank of `x` is
         close to (p * N). More precisely,
 
           floor((p - err) * N) <= rank(x) <= ceil((p + err) * N).
@@ -1883,7 +1956,7 @@ def approxQuantile(self, col, probabilities, relativeError):
     @since(1.4)
     def corr(self, col1, col2, method=None):
         """
-        Calculates the correlation of two columns of a DataFrame as a double value.
+        Calculates the correlation of two columns of a :class:`DataFrame` as a double value.
         Currently only supports the Pearson Correlation Coefficient.
         :func:`DataFrame.corr` and :func:`DataFrameStatFunctions.corr` are aliases of each other.
 
@@ -1931,7 +2004,7 @@ def crosstab(self, col1, col2):
         :param col1: The name of the first column. Distinct items will make the first item of
             each row.
         :param col2: The name of the second column. Distinct items will make the column names
-            of the DataFrame.
+            of the :class:`DataFrame`.
         """
         if not isinstance(col1, basestring):
             raise ValueError("col1 should be a string.")
@@ -1948,7 +2021,8 @@ def freqItems(self, cols, support=None):
         :func:`DataFrame.freqItems` and :func:`DataFrameStatFunctions.freqItems` are aliases.
 
         .. note:: This function is meant for exploratory data analysis, as we make no
-            guarantee about the backward compatibility of the schema of the resulting DataFrame.
+            guarantee about the backward compatibility of the schema of the resulting
+            :class:`DataFrame`.
 
         :param cols: Names of the columns to calculate frequent items for as a list or tuple of
             strings.
@@ -1970,8 +2044,8 @@ def withColumn(self, colName, col):
         Returns a new :class:`DataFrame` by adding a column or replacing the
         existing column that has the same name.
 
-        The column expression must be an expression over this DataFrame; attempting to add
-        a column from some other dataframe will raise an error.
+        The column expression must be an expression over this :class:`DataFrame`; attempting to add
+        a column from some other :class:`DataFrame` will raise an error.
 
         :param colName: string, name of the new column.
         :param col: a :class:`Column` expression for the new column.
@@ -2079,191 +2153,16 @@ def transform(self, func):
                                               "should have been DataFrame." % type(result)
         return result
 
-    @since(1.3)
-    def toPandas(self):
-        """
-        Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
-
-        This is only available if Pandas is installed and available.
-
-        .. note:: This method should only be used if the resulting Pandas's DataFrame is expected
-            to be small, as all the data is loaded into the driver's memory.
-
-        .. note:: Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental.
-
-        >>> df.toPandas()  # doctest: +SKIP
-           age   name
-        0    2  Alice
-        1    5    Bob
-        """
-        from pyspark.sql.utils import require_minimum_pandas_version
-        require_minimum_pandas_version()
-
-        import pandas as pd
-
-        if self.sql_ctx._conf.pandasRespectSessionTimeZone():
-            timezone = self.sql_ctx._conf.sessionLocalTimeZone()
-        else:
-            timezone = None
-
-        if self.sql_ctx._conf.arrowPySparkEnabled():
-            use_arrow = True
-            try:
-                from pyspark.sql.types import to_arrow_schema
-                from pyspark.sql.utils import require_minimum_pyarrow_version
-
-                require_minimum_pyarrow_version()
-                to_arrow_schema(self.schema)
-            except Exception as e:
-
-                if self.sql_ctx._conf.arrowPySparkFallbackEnabled():
-                    msg = (
-                        "toPandas attempted Arrow optimization because "
-                        "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
-                        "failed by the reason below:\n  %s\n"
-                        "Attempting non-optimization as "
-                        "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
-                        "true." % _exception_message(e))
-                    warnings.warn(msg)
-                    use_arrow = False
-                else:
-                    msg = (
-                        "toPandas attempted Arrow optimization because "
-                        "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
-                        "reached the error below and will not continue because automatic fallback "
-                        "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
-                        "false.\n  %s" % _exception_message(e))
-                    warnings.warn(msg)
-                    raise
-
-            # Try to use Arrow optimization when the schema is supported and the required version
-            # of PyArrow is found, if 'spark.sql.execution.arrow.pyspark.enabled' is enabled.
-            if use_arrow:
-                try:
-                    from pyspark.sql.types import _check_dataframe_localize_timestamps
-                    import pyarrow
-                    batches = self._collectAsArrow()
-                    if len(batches) > 0:
-                        table = pyarrow.Table.from_batches(batches)
-                        # Pandas DataFrame created from PyArrow uses datetime64[ns] for date type
-                        # values, but we should use datetime.date to match the behavior with when
-                        # Arrow optimization is disabled.
-                        pdf = table.to_pandas(date_as_object=True)
-                        return _check_dataframe_localize_timestamps(pdf, timezone)
-                    else:
-                        return pd.DataFrame.from_records([], columns=self.columns)
-                except Exception as e:
-                    # We might have to allow fallback here as well but multiple Spark jobs can
-                    # be executed. So, simply fail in this case for now.
-                    msg = (
-                        "toPandas attempted Arrow optimization because "
-                        "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
-                        "reached the error below and can not continue. Note that "
-                        "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
-                        "effect on failures in the middle of "
-                        "computation.\n  %s" % _exception_message(e))
-                    warnings.warn(msg)
-                    raise
-
-        # Below is toPandas without Arrow optimization.
-        pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
-
-        dtype = {}
-        for field in self.schema:
-            pandas_type = _to_corrected_pandas_type(field.dataType)
-            # SPARK-21766: if an integer field is nullable and has null values, it can be
-            # inferred by pandas as float column. Once we convert the column with NaN back
-            # to integer type e.g., np.int16, we will hit exception. So we use the inferred
-            # float type, not the corrected type from the schema in this case.
-            if pandas_type is not None and \
-                not(isinstance(field.dataType, IntegralType) and field.nullable and
-                    pdf[field.name].isnull().any()):
-                dtype[field.name] = pandas_type
-
-        for f, t in dtype.items():
-            pdf[f] = pdf[f].astype(t, copy=False)
-
-        if timezone is None:
-            return pdf
-        else:
-            from pyspark.sql.types import _check_series_convert_timestamps_local_tz
-            for field in self.schema:
-                # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
-                if isinstance(field.dataType, TimestampType):
-                    pdf[field.name] = \
-                        _check_series_convert_timestamps_local_tz(pdf[field.name], timezone)
-            return pdf
-
-    def mapInPandas(self, udf):
-        """
-        Maps an iterator of batches in the current :class:`DataFrame` using a Pandas user-defined
-        function and returns the result as a :class:`DataFrame`.
-
-        The user-defined function should take an iterator of `pandas.DataFrame`\\s and return
-        another iterator of `pandas.DataFrame`\\s. All columns are passed
-        together as an iterator of `pandas.DataFrame`\\s to the user-defined function and the
-        returned iterator of `pandas.DataFrame`\\s are combined as a :class:`DataFrame`.
-        Each `pandas.DataFrame` size can be controlled by
-        `spark.sql.execution.arrow.maxRecordsPerBatch`.
-        Its schema must match the returnType of the Pandas user-defined function.
-
-        :param udf: A function object returned by :meth:`pyspark.sql.functions.pandas_udf`
-
-        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
-        >>> df = spark.createDataFrame([(1, 21), (2, 30)],
-        ...                            ("id", "age"))  # doctest: +SKIP
-        >>> @pandas_udf(df.schema, PandasUDFType.MAP_ITER)  # doctest: +SKIP
-        ... def filter_func(batch_iter):
-        ...     for pdf in batch_iter:
-        ...         yield pdf[pdf.id == 1]
-        >>> df.mapInPandas(filter_func).show()  # doctest: +SKIP
-        +---+---+
-        | id|age|
-        +---+---+
-        |  1| 21|
-        +---+---+
-
-        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
-
-        """
-        # Columns are special because hasattr always return True
-        if isinstance(udf, Column) or not hasattr(udf, 'func') \
-                or udf.evalType != PythonEvalType.SQL_MAP_PANDAS_ITER_UDF:
-            raise ValueError("Invalid udf: the udf argument must be a pandas_udf of type "
-                             "MAP_ITER.")
-
-        udf_column = udf(*[self[col] for col in self.columns])
-        jdf = self._jdf.mapInPandas(udf_column._jc.expr())
-        return DataFrame(jdf, self.sql_ctx)
-
-    def _collectAsArrow(self):
-        """
-        Returns all records as a list of ArrowRecordBatches, pyarrow must be installed
-        and available on driver and worker Python environments.
-
-        .. note:: Experimental.
-        """
-        with SCCallSiteSync(self._sc) as css:
-            port, auth_secret, jsocket_auth_server = self._jdf.collectAsArrowToPython()
-
-        # Collect list of un-ordered batches where last element is a list of correct order indices
-        try:
-            results = list(_load_from_socket((port, auth_secret), ArrowCollectSerializer()))
-        finally:
-            # Join serving thread and raise any exceptions from collectAsArrowToPython
-            jsocket_auth_server.getResult()
-
-        # Separate RecordBatches from batch order indices in results
-        batches = results[:-1]
-        batch_order = results[-1]
-
-        # Re-order the batch list using the correct order
-        return [batches[i] for i in batch_order]
-
-    ##########################################################################################
-    # Pandas compatibility
-    ##########################################################################################
+    where = copy_func(
+        filter,
+        sinceversion=1.3,
+        doc=":func:`where` is an alias for :func:`filter`.")
 
+    # Two aliases below were added for pandas compatibility many years ago.
+    # There are too many differences compared to pandas and we cannot just
+    # make it "compatible" by adding aliases. Therefore, we stop adding such
+    # aliases as of Spark 3.0. Two methods below remain just
+    # for legacy users currently.
     groupby = copy_func(
         groupBy,
         sinceversion=1.4,
@@ -2274,11 +2173,6 @@ def _collectAsArrow(self):
         sinceversion=1.4,
         doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.")
 
-    where = copy_func(
-        filter,
-        sinceversion=1.3,
-        doc=":func:`where` is an alias for :func:`filter`.")
-
 
 def _to_scala_map(sc, jm):
     """
@@ -2287,24 +2181,6 @@ def _to_scala_map(sc, jm):
     return sc._jvm.PythonUtils.toScalaMap(jm)
 
 
-def _to_corrected_pandas_type(dt):
-    """
-    When converting Spark SQL records to Pandas DataFrame, the inferred data type may be wrong.
-    This method gets the corrected data type for Pandas if that type may be inferred uncorrectly.
-    """
-    import numpy as np
-    if type(dt) == ByteType:
-        return np.int8
-    elif type(dt) == ShortType:
-        return np.int16
-    elif type(dt) == IntegerType:
-        return np.int32
-    elif type(dt) == FloatType:
-        return np.float32
-    else:
-        return None
-
-
 class DataFrameNaFunctions(object):
     """Functionality for working with missing data in :class:`DataFrame`.
 
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index c7ff2882ed95a..e80d556cc89e3 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -36,6 +36,8 @@
 from pyspark.sql.types import StringType, DataType
 # Keep UserDefinedFunction import for backwards compatible import; moved in SPARK-22409
 from pyspark.sql.udf import UserDefinedFunction, _create_udf
+# Keep pandas_udf and PandasUDFType import for backwards compatible import; moved in SPARK-28264
+from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType
 from pyspark.sql.utils import to_str
 
 # Note to developers: all of PySpark functions here take string as column names whenever possible.
@@ -196,7 +198,7 @@ def _options_to_str(options):
     Aggregate function: returns a list of objects with duplicates.
 
     .. note:: The function is non-deterministic because the order of collected results depends
-        on order of rows which may be non-deterministic after a shuffle.
+        on the order of the rows which may be non-deterministic after a shuffle.
 
     >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
     >>> df2.agg(collect_list('age')).collect()
@@ -206,7 +208,7 @@ def _options_to_str(options):
     Aggregate function: returns a set of objects with duplicate elements eliminated.
 
     .. note:: The function is non-deterministic because the order of collected results depends
-        on order of rows which may be non-deterministic after a shuffle.
+        on the order of the rows which may be non-deterministic after a shuffle.
 
     >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
     >>> df2.agg(collect_set('age')).collect()
@@ -444,8 +446,8 @@ def first(col, ignorenulls=False):
     The function by default returns the first values it sees. It will return the first non-null
     value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
 
-    .. note:: The function is non-deterministic because its results depends on order of rows which
-        may be non-deterministic after a shuffle.
+    .. note:: The function is non-deterministic because its results depends on the order of the
+        rows which may be non-deterministic after a shuffle.
     """
     sc = SparkContext._active_spark_context
     jc = sc._jvm.functions.first(_to_java_column(col), ignorenulls)
@@ -535,8 +537,8 @@ def last(col, ignorenulls=False):
     The function by default returns the last values it sees. It will return the last non-null
     value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
 
-    .. note:: The function is non-deterministic because its results depends on order of rows
-        which may be non-deterministic after a shuffle.
+    .. note:: The function is non-deterministic because its results depends on the order of the
+        rows which may be non-deterministic after a shuffle.
     """
     sc = SparkContext._active_spark_context
     jc = sc._jvm.functions.last(_to_java_column(col), ignorenulls)
@@ -1133,12 +1135,11 @@ def months_between(date1, date2, roundOff=True):
 
 @since(2.2)
 def to_date(col, format=None):
-    """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or
-    :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType`
+    """Converts a :class:`Column` into :class:`pyspark.sql.types.DateType`
     using the optionally specified format. Specify formats according to
     `DateTimeFormatter <https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html>`_. # noqa
     By default, it follows casting rules to :class:`pyspark.sql.types.DateType` if the format
-    is omitted (equivalent to ``col.cast("date")``).
+    is omitted. Equivalent to ``col.cast("date")``.
 
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
     >>> df.select(to_date(df.t).alias('date')).collect()
@@ -1158,12 +1159,11 @@ def to_date(col, format=None):
 
 @since(2.2)
 def to_timestamp(col, format=None):
-    """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or
-    :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType`
+    """Converts a :class:`Column` into :class:`pyspark.sql.types.TimestampType`
     using the optionally specified format. Specify formats according to
     `DateTimeFormatter <https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html>`_. # noqa
     By default, it follows casting rules to :class:`pyspark.sql.types.TimestampType` if the format
-    is omitted (equivalent to ``col.cast("timestamp")``).
+    is omitted. Equivalent to ``col.cast("timestamp")``.
 
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
     >>> df.select(to_timestamp(df.t).alias('dt')).collect()
@@ -1311,10 +1311,7 @@ def from_utc_timestamp(timestamp, tz):
     [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
     >>> df.select(from_utc_timestamp(df.ts, df.tz).alias('local_time')).collect()
     [Row(local_time=datetime.datetime(1997, 2, 28, 19, 30))]
-
-    .. note:: Deprecated in 3.0. See SPARK-25496
     """
-    warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
     sc = SparkContext._active_spark_context
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
@@ -1348,10 +1345,7 @@ def to_utc_timestamp(timestamp, tz):
     [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
     >>> df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect()
     [Row(utc_time=datetime.datetime(1997, 2, 28, 1, 30))]
-
-    .. note:: Deprecated in 3.0. See SPARK-25496
     """
-    warnings.warn("Deprecated in 3.0. See SPARK-25496", DeprecationWarning)
     sc = SparkContext._active_spark_context
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
@@ -1598,6 +1592,40 @@ def instr(str, substr):
     return Column(sc._jvm.functions.instr(_to_java_column(str), substr))
 
 
+@since(3.0)
+def overlay(src, replace, pos, len=-1):
+    """
+    Overlay the specified portion of `src` with `replace`,
+    starting from byte position `pos` of `src` and proceeding for `len` bytes.
+
+    >>> df = spark.createDataFrame([("SPARK_SQL", "CORE")], ("x", "y"))
+    >>> df.select(overlay("x", "y", 7).alias("overlayed")).show()
+    +----------+
+    | overlayed|
+    +----------+
+    |SPARK_CORE|
+    +----------+
+    """
+    if not isinstance(pos, (int, str, Column)):
+        raise TypeError(
+            "pos should be an integer or a Column / column name, got {}".format(type(pos)))
+    if len is not None and not isinstance(len, (int, str, Column)):
+        raise TypeError(
+            "len should be an integer or a Column / column name, got {}".format(type(len)))
+
+    pos = _create_column_from_literal(pos) if isinstance(pos, int) else _to_java_column(pos)
+    len = _create_column_from_literal(len) if isinstance(len, int) else _to_java_column(len)
+
+    sc = SparkContext._active_spark_context
+
+    return Column(sc._jvm.functions.overlay(
+        _to_java_column(src),
+        _to_java_column(replace),
+        pos,
+        len
+    ))
+
+
 @since(1.5)
 @ignore_unicode_prefix
 def substring(str, pos, len):
@@ -1940,13 +1968,16 @@ def array_contains(col, value):
     given value, and false otherwise.
 
     :param col: name of column containing array
-    :param value: value to check for in array
+    :param value: value or column to check for in array
 
     >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data'])
     >>> df.select(array_contains(df.data, "a")).collect()
     [Row(array_contains(data, a)=True), Row(array_contains(data, a)=False)]
+    >>> df.select(array_contains(df.data, lit("a"))).collect()
+    [Row(array_contains(data, a)=True), Row(array_contains(data, a)=False)]
     """
     sc = SparkContext._active_spark_context
+    value = value._jc if isinstance(value, Column) else value
     return Column(sc._jvm.functions.array_contains(_to_java_column(col), value))
 
 
@@ -1969,7 +2000,12 @@ def arrays_overlap(a1, a2):
 def slice(x, start, length):
     """
     Collection function: returns an array containing  all the elements in `x` from index `start`
-    (or starting from the end if `start` is negative) with the specified `length`.
+    (array indices start at 1, or from the end if `start` is negative) with the specified `length`.
+
+    :param x: the array to be sliced
+    :param start: the starting index
+    :param length: the length of the slice
+
     >>> df = spark.createDataFrame([([1, 2, 3],), ([4, 5],)], ['x'])
     >>> df.select(slice(df.x, 2, 2).alias("sliced")).collect()
     [Row(sliced=[2, 3]), Row(sliced=[5])]
@@ -2052,11 +2088,12 @@ def element_at(col, extraction):
     [Row(element_at(data, 1)=u'a'), Row(element_at(data, 1)=None)]
 
     >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},), ({},)], ['data'])
-    >>> df.select(element_at(df.data, "a")).collect()
+    >>> df.select(element_at(df.data, lit("a"))).collect()
     [Row(element_at(data, a)=1.0), Row(element_at(data, a)=None)]
     """
     sc = SparkContext._active_spark_context
-    return Column(sc._jvm.functions.element_at(_to_java_column(col), extraction))
+    return Column(sc._jvm.functions.element_at(
+        _to_java_column(col), lit(extraction)._jc))  # noqa: F821 'lit' is dynamically defined.
 
 
 @since(2.4)
@@ -2805,20 +2842,6 @@ def from_csv(col, schema, options={}):
 
 # ---------------------------- User Defined Function ----------------------------------
 
-class PandasUDFType(object):
-    """Pandas UDF Types. See :meth:`pyspark.sql.functions.pandas_udf`.
-    """
-    SCALAR = PythonEvalType.SQL_SCALAR_PANDAS_UDF
-
-    SCALAR_ITER = PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF
-
-    GROUPED_MAP = PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
-
-    GROUPED_AGG = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF
-
-    MAP_ITER = PythonEvalType.SQL_MAP_PANDAS_ITER_UDF
-
-
 @since(1.3)
 def udf(f=None, returnType=StringType()):
     """Creates a user defined function (UDF).
@@ -2906,430 +2929,6 @@ def udf(f=None, returnType=StringType()):
                            evalType=PythonEvalType.SQL_BATCHED_UDF)
 
 
-@since(2.3)
-def pandas_udf(f=None, returnType=None, functionType=None):
-    """
-    Creates a vectorized user defined function (UDF).
-
-    :param f: user-defined function. A python function if used as a standalone function
-    :param returnType: the return type of the user-defined function. The value can be either a
-        :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
-    :param functionType: an enum value in :class:`pyspark.sql.functions.PandasUDFType`.
-                         Default: SCALAR.
-
-    The function type of the UDF can be one of the following:
-
-    1. SCALAR
-
-       A scalar UDF defines a transformation: One or more `pandas.Series` -> A `pandas.Series`.
-       The length of the returned `pandas.Series` must be of the same as the input `pandas.Series`.
-       If the return type is :class:`StructType`, the returned value should be a `pandas.DataFrame`.
-
-       :class:`MapType`, nested :class:`StructType` are currently not supported as output types.
-
-       Scalar UDFs can be used with :meth:`pyspark.sql.DataFrame.withColumn` and
-       :meth:`pyspark.sql.DataFrame.select`.
-
-       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
-       >>> from pyspark.sql.types import IntegerType, StringType
-       >>> slen = pandas_udf(lambda s: s.str.len(), IntegerType())  # doctest: +SKIP
-       >>> @pandas_udf(StringType())  # doctest: +SKIP
-       ... def to_upper(s):
-       ...     return s.str.upper()
-       ...
-       >>> @pandas_udf("integer", PandasUDFType.SCALAR)  # doctest: +SKIP
-       ... def add_one(x):
-       ...     return x + 1
-       ...
-       >>> df = spark.createDataFrame([(1, "John Doe", 21)],
-       ...                            ("id", "name", "age"))  # doctest: +SKIP
-       >>> df.select(slen("name").alias("slen(name)"), to_upper("name"), add_one("age")) \\
-       ...     .show()  # doctest: +SKIP
-       +----------+--------------+------------+
-       |slen(name)|to_upper(name)|add_one(age)|
-       +----------+--------------+------------+
-       |         8|      JOHN DOE|          22|
-       +----------+--------------+------------+
-       >>> @pandas_udf("first string, last string")  # doctest: +SKIP
-       ... def split_expand(n):
-       ...     return n.str.split(expand=True)
-       >>> df.select(split_expand("name")).show()  # doctest: +SKIP
-       +------------------+
-       |split_expand(name)|
-       +------------------+
-       |       [John, Doe]|
-       +------------------+
-
-       .. note:: The length of `pandas.Series` within a scalar UDF is not that of the whole input
-           column, but is the length of an internal batch used for each call to the function.
-           Therefore, this can be used, for example, to ensure the length of each returned
-           `pandas.Series`, and can not be used as the column length.
-
-    2. SCALAR_ITER
-
-       A scalar iterator UDF is semantically the same as the scalar Pandas UDF above except that the
-       wrapped Python function takes an iterator of batches as input instead of a single batch and,
-       instead of returning a single output batch, it yields output batches or explicitly returns an
-       generator or an iterator of output batches.
-       It is useful when the UDF execution requires initializing some state, e.g., loading a machine
-       learning model file to apply inference to every input batch.
-
-       .. note:: It is not guaranteed that one invocation of a scalar iterator UDF will process all
-           batches from one partition, although it is currently implemented this way.
-           Your code shall not rely on this behavior because it might change in the future for
-           further optimization, e.g., one invocation processes multiple partitions.
-
-       Scalar iterator UDFs are used with :meth:`pyspark.sql.DataFrame.withColumn` and
-       :meth:`pyspark.sql.DataFrame.select`.
-
-       >>> import pandas as pd  # doctest: +SKIP
-       >>> from pyspark.sql.functions import col, pandas_udf, struct, PandasUDFType
-       >>> pdf = pd.DataFrame([1, 2, 3], columns=["x"])  # doctest: +SKIP
-       >>> df = spark.createDataFrame(pdf)  # doctest: +SKIP
-
-       When the UDF is called with a single column that is not `StructType`, the input to the
-       underlying function is an iterator of `pd.Series`.
-
-       >>> @pandas_udf("long", PandasUDFType.SCALAR_ITER)  # doctest: +SKIP
-       ... def plus_one(batch_iter):
-       ...     for x in batch_iter:
-       ...         yield x + 1
-       ...
-       >>> df.select(plus_one(col("x"))).show()  # doctest: +SKIP
-       +-----------+
-       |plus_one(x)|
-       +-----------+
-       |          2|
-       |          3|
-       |          4|
-       +-----------+
-
-       When the UDF is called with more than one columns, the input to the underlying function is an
-       iterator of `pd.Series` tuple.
-
-       >>> @pandas_udf("long", PandasUDFType.SCALAR_ITER)  # doctest: +SKIP
-       ... def multiply_two_cols(batch_iter):
-       ...     for a, b in batch_iter:
-       ...         yield a * b
-       ...
-       >>> df.select(multiply_two_cols(col("x"), col("x"))).show()  # doctest: +SKIP
-       +-----------------------+
-       |multiply_two_cols(x, x)|
-       +-----------------------+
-       |                      1|
-       |                      4|
-       |                      9|
-       +-----------------------+
-
-       When the UDF is called with a single column that is `StructType`, the input to the underlying
-       function is an iterator of `pd.DataFrame`.
-
-       >>> @pandas_udf("long", PandasUDFType.SCALAR_ITER)  # doctest: +SKIP
-       ... def multiply_two_nested_cols(pdf_iter):
-       ...    for pdf in pdf_iter:
-       ...        yield pdf["a"] * pdf["b"]
-       ...
-       >>> df.select(
-       ...     multiply_two_nested_cols(
-       ...         struct(col("x").alias("a"), col("x").alias("b"))
-       ...     ).alias("y")
-       ... ).show()  # doctest: +SKIP
-       +---+
-       |  y|
-       +---+
-       |  1|
-       |  4|
-       |  9|
-       +---+
-
-       In the UDF, you can initialize some states before processing batches, wrap your code with
-       `try ... finally ...` or use context managers to ensure the release of resources at the end
-       or in case of early termination.
-
-       >>> y_bc = spark.sparkContext.broadcast(1)  # doctest: +SKIP
-       >>> @pandas_udf("long", PandasUDFType.SCALAR_ITER)  # doctest: +SKIP
-       ... def plus_y(batch_iter):
-       ...     y = y_bc.value  # initialize some state
-       ...     try:
-       ...         for x in batch_iter:
-       ...             yield x + y
-       ...     finally:
-       ...         pass  # release resources here, if any
-       ...
-       >>> df.select(plus_y(col("x"))).show()  # doctest: +SKIP
-       +---------+
-       |plus_y(x)|
-       +---------+
-       |        2|
-       |        3|
-       |        4|
-       +---------+
-
-    3. GROUPED_MAP
-
-       A grouped map UDF defines transformation: A `pandas.DataFrame` -> A `pandas.DataFrame`
-       The returnType should be a :class:`StructType` describing the schema of the returned
-       `pandas.DataFrame`. The column labels of the returned `pandas.DataFrame` must either match
-       the field names in the defined returnType schema if specified as strings, or match the
-       field data types by position if not strings, e.g. integer indices.
-       The length of the returned `pandas.DataFrame` can be arbitrary.
-
-       Grouped map UDFs are used with :meth:`pyspark.sql.GroupedData.apply`.
-
-       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
-       >>> df = spark.createDataFrame(
-       ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
-       ...     ("id", "v"))  # doctest: +SKIP
-       >>> @pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)  # doctest: +SKIP
-       ... def normalize(pdf):
-       ...     v = pdf.v
-       ...     return pdf.assign(v=(v - v.mean()) / v.std())
-       >>> df.groupby("id").apply(normalize).show()  # doctest: +SKIP
-       +---+-------------------+
-       | id|                  v|
-       +---+-------------------+
-       |  1|-0.7071067811865475|
-       |  1| 0.7071067811865475|
-       |  2|-0.8320502943378437|
-       |  2|-0.2773500981126146|
-       |  2| 1.1094003924504583|
-       +---+-------------------+
-
-       Alternatively, the user can define a function that takes two arguments.
-       In this case, the grouping key(s) will be passed as the first argument and the data will
-       be passed as the second argument. The grouping key(s) will be passed as a tuple of numpy
-       data types, e.g., `numpy.int32` and `numpy.float64`. The data will still be passed in
-       as a `pandas.DataFrame` containing all columns from the original Spark DataFrame.
-       This is useful when the user does not want to hardcode grouping key(s) in the function.
-
-       >>> import pandas as pd  # doctest: +SKIP
-       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
-       >>> df = spark.createDataFrame(
-       ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
-       ...     ("id", "v"))  # doctest: +SKIP
-       >>> @pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)  # doctest: +SKIP
-       ... def mean_udf(key, pdf):
-       ...     # key is a tuple of one numpy.int64, which is the value
-       ...     # of 'id' for the current group
-       ...     return pd.DataFrame([key + (pdf.v.mean(),)])
-       >>> df.groupby('id').apply(mean_udf).show()  # doctest: +SKIP
-       +---+---+
-       | id|  v|
-       +---+---+
-       |  1|1.5|
-       |  2|6.0|
-       +---+---+
-       >>> @pandas_udf(
-       ...    "id long, `ceil(v / 2)` long, v double",
-       ...    PandasUDFType.GROUPED_MAP)  # doctest: +SKIP
-       >>> def sum_udf(key, pdf):
-       ...     # key is a tuple of two numpy.int64s, which is the values
-       ...     # of 'id' and 'ceil(df.v / 2)' for the current group
-       ...     return pd.DataFrame([key + (pdf.v.sum(),)])
-       >>> df.groupby(df.id, ceil(df.v / 2)).apply(sum_udf).show()  # doctest: +SKIP
-       +---+-----------+----+
-       | id|ceil(v / 2)|   v|
-       +---+-----------+----+
-       |  2|          5|10.0|
-       |  1|          1| 3.0|
-       |  2|          3| 5.0|
-       |  2|          2| 3.0|
-       +---+-----------+----+
-
-       .. note:: If returning a new `pandas.DataFrame` constructed with a dictionary, it is
-           recommended to explicitly index the columns by name to ensure the positions are correct,
-           or alternatively use an `OrderedDict`.
-           For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or
-           `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`.
-
-       .. seealso:: :meth:`pyspark.sql.GroupedData.apply`
-
-    4. GROUPED_AGG
-
-       A grouped aggregate UDF defines a transformation: One or more `pandas.Series` -> A scalar
-       The `returnType` should be a primitive data type, e.g., :class:`DoubleType`.
-       The returned scalar can be either a python primitive type, e.g., `int` or `float`
-       or a numpy data type, e.g., `numpy.int64` or `numpy.float64`.
-
-       :class:`MapType` and :class:`StructType` are currently not supported as output types.
-
-       Group aggregate UDFs are used with :meth:`pyspark.sql.GroupedData.agg` and
-       :class:`pyspark.sql.Window`
-
-       This example shows using grouped aggregated UDFs with groupby:
-
-       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
-       >>> df = spark.createDataFrame(
-       ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
-       ...     ("id", "v"))
-       >>> @pandas_udf("double", PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
-       ... def mean_udf(v):
-       ...     return v.mean()
-       >>> df.groupby("id").agg(mean_udf(df['v'])).show()  # doctest: +SKIP
-       +---+-----------+
-       | id|mean_udf(v)|
-       +---+-----------+
-       |  1|        1.5|
-       |  2|        6.0|
-       +---+-----------+
-
-       This example shows using grouped aggregated UDFs as window functions.
-
-       >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
-       >>> from pyspark.sql import Window
-       >>> df = spark.createDataFrame(
-       ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
-       ...     ("id", "v"))
-       >>> @pandas_udf("double", PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
-       ... def mean_udf(v):
-       ...     return v.mean()
-       >>> w = (Window.partitionBy('id')
-       ...            .orderBy('v')
-       ...            .rowsBetween(-1, 0))
-       >>> df.withColumn('mean_v', mean_udf(df['v']).over(w)).show()  # doctest: +SKIP
-       +---+----+------+
-       | id|   v|mean_v|
-       +---+----+------+
-       |  1| 1.0|   1.0|
-       |  1| 2.0|   1.5|
-       |  2| 3.0|   3.0|
-       |  2| 5.0|   4.0|
-       |  2|10.0|   7.5|
-       +---+----+------+
-
-       .. note:: For performance reasons, the input series to window functions are not copied.
-            Therefore, mutating the input series is not allowed and will cause incorrect results.
-            For the same reason, users should also not rely on the index of the input series.
-
-       .. seealso:: :meth:`pyspark.sql.GroupedData.agg` and :class:`pyspark.sql.Window`
-
-    5. MAP_ITER
-
-       A map iterator Pandas UDFs are used to transform data with an iterator of batches.
-       It can be used with :meth:`pyspark.sql.DataFrame.mapInPandas`.
-
-       It can return the output of arbitrary length in contrast to the scalar Pandas UDF.
-       It maps an iterator of batches in the current :class:`DataFrame` using a Pandas user-defined
-       function and returns the result as a :class:`DataFrame`.
-
-       The user-defined function should take an iterator of `pandas.DataFrame`\\s and return another
-       iterator of `pandas.DataFrame`\\s. All columns are passed together as an
-       iterator of `pandas.DataFrame`\\s to the user-defined function and the returned iterator of
-       `pandas.DataFrame`\\s are combined as a :class:`DataFrame`.
-
-       >>> df = spark.createDataFrame([(1, 21), (2, 30)],
-       ...                            ("id", "age"))  # doctest: +SKIP
-       >>> @pandas_udf(df.schema, PandasUDFType.MAP_ITER)  # doctest: +SKIP
-       ... def filter_func(batch_iter):
-       ...     for pdf in batch_iter:
-       ...         yield pdf[pdf.id == 1]
-       >>> df.mapInPandas(filter_func).show()  # doctest: +SKIP
-       +---+---+
-       | id|age|
-       +---+---+
-       |  1| 21|
-       +---+---+
-
-    .. note:: The user-defined functions are considered deterministic by default. Due to
-        optimization, duplicate invocations may be eliminated or the function may even be invoked
-        more times than it is present in the query. If your function is not deterministic, call
-        `asNondeterministic` on the user defined function. E.g.:
-
-    >>> @pandas_udf('double', PandasUDFType.SCALAR)  # doctest: +SKIP
-    ... def random(v):
-    ...     import numpy as np
-    ...     import pandas as pd
-    ...     return pd.Series(np.random.randn(len(v))
-    >>> random = random.asNondeterministic()  # doctest: +SKIP
-
-    .. note:: The user-defined functions do not support conditional expressions or short circuiting
-        in boolean expressions and it ends up with being executed all internally. If the functions
-        can fail on special rows, the workaround is to incorporate the condition into the functions.
-
-    .. note:: The user-defined functions do not take keyword arguments on the calling side.
-
-    .. note:: The data type of returned `pandas.Series` from the user-defined functions should be
-        matched with defined returnType (see :meth:`types.to_arrow_type` and
-        :meth:`types.from_arrow_type`). When there is mismatch between them, Spark might do
-        conversion on returned data. The conversion is not guaranteed to be correct and results
-        should be checked for accuracy by users.
-    """
-
-    # The following table shows most of Pandas data and SQL type conversions in Pandas UDFs that
-    # are not yet visible to the user. Some of behaviors are buggy and might be changed in the near
-    # future. The table might have to be eventually documented externally.
-    # Please see SPARK-28132's PR to see the codes in order to generate the table below.
-    #
-    # +-----------------------------+----------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+------------------+--------------+--------------+--------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+--------------+-----------------+------------------+-----------+--------------------------------+  # noqa
-    # |SQL Type \ Pandas Value(Type)|None(object(NoneType))|        True(bool)|           1(int8)|          1(int16)|            1(int32)|            1(int64)|          1(uint8)|         1(uint16)|         1(uint32)|         1(uint64)|  1.0(float16)|  1.0(float32)|  1.0(float64)|1970-01-01 00:00:00(datetime64[ns])|1970-01-01 00:00:00-05:00(datetime64[ns, US/Eastern])|a(object(string))|  1(object(Decimal))|[1 2 3](object(array[int32]))| 1.0(float128)|(1+0j)(complex64)|(1+0j)(complex128)|A(category)|1 days 00:00:00(timedelta64[ns])|  # noqa
-    # +-----------------------------+----------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+------------------+--------------+--------------+--------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+--------------+-----------------+------------------+-----------+--------------------------------+  # noqa
-    # |                      boolean|                  None|              True|              True|              True|                True|                True|              True|              True|              True|              True|          True|          True|          True|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                      tinyint|                  None|                 1|                 1|                 1|                   1|                   1|                 1|                 1|                 1|                 1|             1|             1|             1|                                  X|                                                    X|                X|                   1|                            X|             X|                X|                 X|          0|                               X|  # noqa
-    # |                     smallint|                  None|                 1|                 1|                 1|                   1|                   1|                 1|                 1|                 1|                 1|             1|             1|             1|                                  X|                                                    X|                X|                   1|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                          int|                  None|                 1|                 1|                 1|                   1|                   1|                 1|                 1|                 1|                 1|             1|             1|             1|                                  X|                                                    X|                X|                   1|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                       bigint|                  None|                 1|                 1|                 1|                   1|                   1|                 1|                 1|                 1|                 1|             1|             1|             1|                                  0|                                       18000000000000|                X|                   1|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                        float|                  None|               1.0|               1.0|               1.0|                 1.0|                 1.0|               1.0|               1.0|               1.0|               1.0|           1.0|           1.0|           1.0|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                       double|                  None|               1.0|               1.0|               1.0|                 1.0|                 1.0|               1.0|               1.0|               1.0|               1.0|           1.0|           1.0|           1.0|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                         date|                  None|                 X|                 X|                 X|datetime.date(197...|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|               datetime.date(197...|                                 datetime.date(197...|                X|datetime.date(197...|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                    timestamp|                  None|                 X|                 X|                 X|                   X|datetime.datetime...|                 X|                 X|                 X|                 X|             X|             X|             X|               datetime.datetime...|                                 datetime.datetime...|                X|datetime.datetime...|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                       string|                  None|                ''|                ''|                ''|              '\x01'|              '\x01'|                ''|                ''|            '\x01'|            '\x01'|            ''|            ''|            ''|                                  X|                                                    X|              'a'|                   X|                            X|            ''|                X|                ''|          X|                               X|  # noqa
-    # |                decimal(10,0)|                  None|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|        Decimal('1')|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                   array<int>|                  None|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                    [1, 2, 3]|             X|                X|                 X|          X|                               X|  # noqa
-    # |              map<string,int>|                     X|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |               struct<_1:int>|                     X|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
-    # |                       binary|                  None|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|  bytearray(b'\x01')|  bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'')|bytearray(b'')|bytearray(b'')|                     bytearray(b'')|                                       bytearray(b'')|  bytearray(b'a')|                   X|                            X|bytearray(b'')|   bytearray(b'')|    bytearray(b'')|          X|                  bytearray(b'')|  # noqa
-    # +-----------------------------+----------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+------------------+--------------+--------------+--------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+--------------+-----------------+------------------+-----------+--------------------------------+  # noqa
-    #
-    # Note: DDL formatted string is used for 'SQL Type' for simplicity. This string can be
-    #       used in `returnType`.
-    # Note: The values inside of the table are generated by `repr`.
-    # Note: Python 3.7.3, Pandas 0.24.2 and PyArrow 0.13.0 are used.
-    # Note: Timezone is KST.
-    # Note: 'X' means it throws an exception during the conversion.
-
-    # decorator @pandas_udf(returnType, functionType)
-    is_decorator = f is None or isinstance(f, (str, DataType))
-
-    if is_decorator:
-        # If DataType has been passed as a positional argument
-        # for decorator use it as a returnType
-        return_type = f or returnType
-
-        if functionType is not None:
-            # @pandas_udf(dataType, functionType=functionType)
-            # @pandas_udf(returnType=dataType, functionType=functionType)
-            eval_type = functionType
-        elif returnType is not None and isinstance(returnType, int):
-            # @pandas_udf(dataType, functionType)
-            eval_type = returnType
-        else:
-            # @pandas_udf(dataType) or @pandas_udf(returnType=dataType)
-            eval_type = PythonEvalType.SQL_SCALAR_PANDAS_UDF
-    else:
-        return_type = returnType
-
-        if functionType is not None:
-            eval_type = functionType
-        else:
-            eval_type = PythonEvalType.SQL_SCALAR_PANDAS_UDF
-
-    if return_type is None:
-        raise ValueError("Invalid returnType: returnType can not be None")
-
-    if eval_type not in [PythonEvalType.SQL_SCALAR_PANDAS_UDF,
-                         PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
-                         PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
-                         PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
-                         PythonEvalType.SQL_MAP_PANDAS_ITER_UDF]:
-        raise ValueError("Invalid functionType: "
-                         "functionType must be one the values from PandasUDFType")
-
-    if is_decorator:
-        return functools.partial(_create_udf, returnType=return_type, evalType=eval_type)
-    else:
-        return _create_udf(f=f, returnType=return_type, evalType=eval_type)
-
-
 blacklist = ['map', 'since', 'ignore_unicode_prefix']
 __all__ = [k for k, v in globals().items()
            if not k.startswith('_') and k[0].islower() and callable(v) and k not in blacklist]
@@ -3350,13 +2949,9 @@ def _test():
     globs['sc'] = sc
     globs['spark'] = spark
     globs['df'] = spark.createDataFrame([Row(name='Alice', age=2), Row(name='Bob', age=5)])
-
-    spark.conf.set("spark.sql.legacy.utcTimestampFunc.enabled", "true")
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.functions, globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
-    spark.conf.unset("spark.sql.legacy.utcTimestampFunc.enabled")
-
     spark.stop()
     if failure_count:
         sys.exit(-1)
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index ec90ba905ef66..ac826bc64ad7e 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -18,9 +18,10 @@
 import sys
 
 from pyspark import since
-from pyspark.rdd import ignore_unicode_prefix, PythonEvalType
+from pyspark.rdd import ignore_unicode_prefix
 from pyspark.sql.column import Column, _to_seq
 from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin
 from pyspark.sql.types import *
 
 __all__ = ["GroupedData"]
@@ -46,7 +47,7 @@ def _api(self, *cols):
     return _api
 
 
-class GroupedData(object):
+class GroupedData(PandasGroupedOpsMixin):
     """
     A set of methods for aggregations on a :class:`DataFrame`,
     created by :func:`DataFrame.groupBy`.
@@ -218,59 +219,6 @@ def pivot(self, pivot_col, values=None):
             jgd = self._jgd.pivot(pivot_col, values)
         return GroupedData(jgd, self._df)
 
-    @since(2.3)
-    def apply(self, udf):
-        """
-        Maps each group of the current :class:`DataFrame` using a pandas udf and returns the result
-        as a `DataFrame`.
-
-        The user-defined function should take a `pandas.DataFrame` and return another
-        `pandas.DataFrame`. For each group, all columns are passed together as a `pandas.DataFrame`
-        to the user-function and the returned `pandas.DataFrame` are combined as a
-        :class:`DataFrame`.
-
-        The returned `pandas.DataFrame` can be of arbitrary length and its schema must match the
-        returnType of the pandas udf.
-
-        .. note:: This function requires a full shuffle. all the data of a group will be loaded
-            into memory, so the user should be aware of the potential OOM risk if data is skewed
-            and certain groups are too large to fit in memory.
-
-        :param udf: a grouped map user-defined function returned by
-            :func:`pyspark.sql.functions.pandas_udf`.
-
-        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
-        >>> df = spark.createDataFrame(
-        ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
-        ...     ("id", "v"))
-        >>> @pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)  # doctest: +SKIP
-        ... def normalize(pdf):
-        ...     v = pdf.v
-        ...     return pdf.assign(v=(v - v.mean()) / v.std())
-        >>> df.groupby("id").apply(normalize).show()  # doctest: +SKIP
-        +---+-------------------+
-        | id|                  v|
-        +---+-------------------+
-        |  1|-0.7071067811865475|
-        |  1| 0.7071067811865475|
-        |  2|-0.8320502943378437|
-        |  2|-0.2773500981126146|
-        |  2| 1.1094003924504583|
-        +---+-------------------+
-
-        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
-
-        """
-        # Columns are special because hasattr always return True
-        if isinstance(udf, Column) or not hasattr(udf, 'func') \
-           or udf.evalType != PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
-            raise ValueError("Invalid udf: the udf argument must be a pandas_udf of type "
-                             "GROUPED_MAP.")
-        df = self._df
-        udf_column = udf(*[df[col] for col in df.columns])
-        jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr())
-        return DataFrame(jdf, self.sql_ctx)
-
 
 def _test():
     import doctest
diff --git a/python/pyspark/sql/pandas/__init__.py b/python/pyspark/sql/pandas/__init__.py
new file mode 100644
index 0000000000000..32a88e9b37107
--- /dev/null
+++ b/python/pyspark/sql/pandas/__init__.py
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+This package includes the internal APIs for PySpark about interoperability
+between pandas, PySpark and PyArrow. This package should not be directly
+imported and used.
+"""
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
new file mode 100644
index 0000000000000..8548cd222bf10
--- /dev/null
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -0,0 +1,429 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import sys
+import warnings
+if sys.version >= '3':
+    basestring = unicode = str
+    xrange = range
+else:
+    from itertools import izip as zip
+
+from pyspark import since
+from pyspark.rdd import _load_from_socket
+from pyspark.sql.pandas.serializers import ArrowCollectSerializer
+from pyspark.sql.types import IntegralType
+from pyspark.sql.types import *
+from pyspark.traceback_utils import SCCallSiteSync
+from pyspark.util import _exception_message
+
+
+class PandasConversionMixin(object):
+    """
+    Min-in for the conversion from Spark to pandas. Currently, only :class:`DataFrame`
+    can use this class.
+    """
+
+    @since(1.3)
+    def toPandas(self):
+        """
+        Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
+
+        This is only available if Pandas is installed and available.
+
+        .. note:: This method should only be used if the resulting Pandas's :class:`DataFrame` is
+            expected to be small, as all the data is loaded into the driver's memory.
+
+        .. note:: Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental.
+
+        >>> df.toPandas()  # doctest: +SKIP
+           age   name
+        0    2  Alice
+        1    5    Bob
+        """
+        from pyspark.sql.dataframe import DataFrame
+
+        assert isinstance(self, DataFrame)
+
+        from pyspark.sql.pandas.utils import require_minimum_pandas_version
+        require_minimum_pandas_version()
+
+        import numpy as np
+        import pandas as pd
+
+        timezone = self.sql_ctx._conf.sessionLocalTimeZone()
+
+        if self.sql_ctx._conf.arrowPySparkEnabled():
+            use_arrow = True
+            try:
+                from pyspark.sql.pandas.types import to_arrow_schema
+                from pyspark.sql.pandas.utils import require_minimum_pyarrow_version
+
+                require_minimum_pyarrow_version()
+                to_arrow_schema(self.schema)
+            except Exception as e:
+
+                if self.sql_ctx._conf.arrowPySparkFallbackEnabled():
+                    msg = (
+                        "toPandas attempted Arrow optimization because "
+                        "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
+                        "failed by the reason below:\n  %s\n"
+                        "Attempting non-optimization as "
+                        "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
+                        "true." % _exception_message(e))
+                    warnings.warn(msg)
+                    use_arrow = False
+                else:
+                    msg = (
+                        "toPandas attempted Arrow optimization because "
+                        "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
+                        "reached the error below and will not continue because automatic fallback "
+                        "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
+                        "false.\n  %s" % _exception_message(e))
+                    warnings.warn(msg)
+                    raise
+
+            # Try to use Arrow optimization when the schema is supported and the required version
+            # of PyArrow is found, if 'spark.sql.execution.arrow.pyspark.enabled' is enabled.
+            if use_arrow:
+                try:
+                    from pyspark.sql.pandas.types import _check_series_localize_timestamps
+                    import pyarrow
+                    batches = self._collect_as_arrow()
+                    if len(batches) > 0:
+                        table = pyarrow.Table.from_batches(batches)
+                        # Pandas DataFrame created from PyArrow uses datetime64[ns] for date type
+                        # values, but we should use datetime.date to match the behavior with when
+                        # Arrow optimization is disabled.
+                        pdf = table.to_pandas(date_as_object=True)
+                        for field in self.schema:
+                            if isinstance(field.dataType, TimestampType):
+                                pdf[field.name] = \
+                                    _check_series_localize_timestamps(pdf[field.name], timezone)
+                        return pdf
+                    else:
+                        return pd.DataFrame.from_records([], columns=self.columns)
+                except Exception as e:
+                    # We might have to allow fallback here as well but multiple Spark jobs can
+                    # be executed. So, simply fail in this case for now.
+                    msg = (
+                        "toPandas attempted Arrow optimization because "
+                        "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
+                        "reached the error below and can not continue. Note that "
+                        "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
+                        "effect on failures in the middle of "
+                        "computation.\n  %s" % _exception_message(e))
+                    warnings.warn(msg)
+                    raise
+
+        # Below is toPandas without Arrow optimization.
+        pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
+
+        dtype = {}
+        for field in self.schema:
+            pandas_type = PandasConversionMixin._to_corrected_pandas_type(field.dataType)
+            # SPARK-21766: if an integer field is nullable and has null values, it can be
+            # inferred by pandas as float column. Once we convert the column with NaN back
+            # to integer type e.g., np.int16, we will hit exception. So we use the inferred
+            # float type, not the corrected type from the schema in this case.
+            if pandas_type is not None and \
+                not(isinstance(field.dataType, IntegralType) and field.nullable and
+                    pdf[field.name].isnull().any()):
+                dtype[field.name] = pandas_type
+            # Ensure we fall back to nullable numpy types, even when whole column is null:
+            if isinstance(field.dataType, IntegralType) and pdf[field.name].isnull().any():
+                dtype[field.name] = np.float64
+            if isinstance(field.dataType, BooleanType) and pdf[field.name].isnull().any():
+                dtype[field.name] = np.object
+
+        for f, t in dtype.items():
+            pdf[f] = pdf[f].astype(t, copy=False)
+
+        if timezone is None:
+            return pdf
+        else:
+            from pyspark.sql.pandas.types import _check_series_convert_timestamps_local_tz
+            for field in self.schema:
+                # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+                if isinstance(field.dataType, TimestampType):
+                    pdf[field.name] = \
+                        _check_series_convert_timestamps_local_tz(pdf[field.name], timezone)
+            return pdf
+
+    @staticmethod
+    def _to_corrected_pandas_type(dt):
+        """
+        When converting Spark SQL records to Pandas :class:`DataFrame`, the inferred data type
+        may be wrong. This method gets the corrected data type for Pandas if that type may be
+        inferred incorrectly.
+        """
+        import numpy as np
+        if type(dt) == ByteType:
+            return np.int8
+        elif type(dt) == ShortType:
+            return np.int16
+        elif type(dt) == IntegerType:
+            return np.int32
+        elif type(dt) == LongType:
+            return np.int64
+        elif type(dt) == FloatType:
+            return np.float32
+        elif type(dt) == DoubleType:
+            return np.float64
+        elif type(dt) == BooleanType:
+            return np.bool
+        elif type(dt) == TimestampType:
+            return np.datetime64
+        else:
+            return None
+
+    def _collect_as_arrow(self):
+        """
+        Returns all records as a list of ArrowRecordBatches, pyarrow must be installed
+        and available on driver and worker Python environments.
+
+        .. note:: Experimental.
+        """
+        from pyspark.sql.dataframe import DataFrame
+
+        assert isinstance(self, DataFrame)
+
+        with SCCallSiteSync(self._sc):
+            port, auth_secret, jsocket_auth_server = self._jdf.collectAsArrowToPython()
+
+        # Collect list of un-ordered batches where last element is a list of correct order indices
+        try:
+            results = list(_load_from_socket((port, auth_secret), ArrowCollectSerializer()))
+        finally:
+            # Join serving thread and raise any exceptions from collectAsArrowToPython
+            jsocket_auth_server.getResult()
+
+        # Separate RecordBatches from batch order indices in results
+        batches = results[:-1]
+        batch_order = results[-1]
+
+        # Re-order the batch list using the correct order
+        return [batches[i] for i in batch_order]
+
+
+class SparkConversionMixin(object):
+    """
+    Min-in for the conversion from pandas to Spark. Currently, only :class:`SparkSession`
+    can use this class.
+    """
+    def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=True):
+        from pyspark.sql import SparkSession
+
+        assert isinstance(self, SparkSession)
+
+        from pyspark.sql.pandas.utils import require_minimum_pandas_version
+        require_minimum_pandas_version()
+
+        timezone = self._wrapped._conf.sessionLocalTimeZone()
+
+        # If no schema supplied by user then get the names of columns only
+        if schema is None:
+            schema = [str(x) if not isinstance(x, basestring) else
+                      (x.encode('utf-8') if not isinstance(x, str) else x)
+                      for x in data.columns]
+
+        if self._wrapped._conf.arrowPySparkEnabled() and len(data) > 0:
+            try:
+                return self._create_from_pandas_with_arrow(data, schema, timezone)
+            except Exception as e:
+                from pyspark.util import _exception_message
+
+                if self._wrapped._conf.arrowPySparkFallbackEnabled():
+                    msg = (
+                        "createDataFrame attempted Arrow optimization because "
+                        "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
+                        "failed by the reason below:\n  %s\n"
+                        "Attempting non-optimization as "
+                        "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
+                        "true." % _exception_message(e))
+                    warnings.warn(msg)
+                else:
+                    msg = (
+                        "createDataFrame attempted Arrow optimization because "
+                        "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
+                        "reached the error below and will not continue because automatic "
+                        "fallback with 'spark.sql.execution.arrow.pyspark.fallback.enabled' "
+                        "has been set to false.\n  %s" % _exception_message(e))
+                    warnings.warn(msg)
+                    raise
+        data = self._convert_from_pandas(data, schema, timezone)
+        return self._create_dataframe(data, schema, samplingRatio, verifySchema)
+
+    def _convert_from_pandas(self, pdf, schema, timezone):
+        """
+         Convert a pandas.DataFrame to list of records that can be used to make a DataFrame
+         :return list of records
+        """
+        from pyspark.sql import SparkSession
+
+        assert isinstance(self, SparkSession)
+
+        if timezone is not None:
+            from pyspark.sql.pandas.types import _check_series_convert_timestamps_tz_local
+            copied = False
+            if isinstance(schema, StructType):
+                for field in schema:
+                    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+                    if isinstance(field.dataType, TimestampType):
+                        s = _check_series_convert_timestamps_tz_local(pdf[field.name], timezone)
+                        if s is not pdf[field.name]:
+                            if not copied:
+                                # Copy once if the series is modified to prevent the original
+                                # Pandas DataFrame from being updated
+                                pdf = pdf.copy()
+                                copied = True
+                            pdf[field.name] = s
+            else:
+                for column, series in pdf.iteritems():
+                    s = _check_series_convert_timestamps_tz_local(series, timezone)
+                    if s is not series:
+                        if not copied:
+                            # Copy once if the series is modified to prevent the original
+                            # Pandas DataFrame from being updated
+                            pdf = pdf.copy()
+                            copied = True
+                        pdf[column] = s
+
+        # Convert pandas.DataFrame to list of numpy records
+        np_records = pdf.to_records(index=False)
+
+        # Check if any columns need to be fixed for Spark to infer properly
+        if len(np_records) > 0:
+            record_dtype = self._get_numpy_record_dtype(np_records[0])
+            if record_dtype is not None:
+                return [r.astype(record_dtype).tolist() for r in np_records]
+
+        # Convert list of numpy records to python lists
+        return [r.tolist() for r in np_records]
+
+    def _get_numpy_record_dtype(self, rec):
+        """
+        Used when converting a pandas.DataFrame to Spark using to_records(), this will correct
+        the dtypes of fields in a record so they can be properly loaded into Spark.
+        :param rec: a numpy record to check field dtypes
+        :return corrected dtype for a numpy.record or None if no correction needed
+        """
+        import numpy as np
+        cur_dtypes = rec.dtype
+        col_names = cur_dtypes.names
+        record_type_list = []
+        has_rec_fix = False
+        for i in xrange(len(cur_dtypes)):
+            curr_type = cur_dtypes[i]
+            # If type is a datetime64 timestamp, convert to microseconds
+            # NOTE: if dtype is datetime[ns] then np.record.tolist() will output values as longs,
+            # conversion from [us] or lower will lead to py datetime objects, see SPARK-22417
+            if curr_type == np.dtype('datetime64[ns]'):
+                curr_type = 'datetime64[us]'
+                has_rec_fix = True
+            record_type_list.append((str(col_names[i]), curr_type))
+        return np.dtype(record_type_list) if has_rec_fix else None
+
+    def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
+        """
+        Create a DataFrame from a given pandas.DataFrame by slicing it into partitions, converting
+        to Arrow data, then sending to the JVM to parallelize. If a schema is passed in, the
+        data types will be used to coerce the data in Pandas to Arrow conversion.
+        """
+        from pyspark.sql import SparkSession
+        from pyspark.sql.dataframe import DataFrame
+
+        assert isinstance(self, SparkSession)
+
+        from pyspark.sql.pandas.serializers import ArrowStreamPandasSerializer
+        from pyspark.sql.types import TimestampType
+        from pyspark.sql.pandas.types import from_arrow_type, to_arrow_type
+        from pyspark.sql.pandas.utils import require_minimum_pandas_version, \
+            require_minimum_pyarrow_version
+
+        require_minimum_pandas_version()
+        require_minimum_pyarrow_version()
+
+        from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
+        import pyarrow as pa
+
+        # Create the Spark schema from list of names passed in with Arrow types
+        if isinstance(schema, (list, tuple)):
+            arrow_schema = pa.Schema.from_pandas(pdf, preserve_index=False)
+            struct = StructType()
+            for name, field in zip(schema, arrow_schema):
+                struct.add(name, from_arrow_type(field.type), nullable=field.nullable)
+            schema = struct
+
+        # Determine arrow types to coerce data when creating batches
+        if isinstance(schema, StructType):
+            arrow_types = [to_arrow_type(f.dataType) for f in schema.fields]
+        elif isinstance(schema, DataType):
+            raise ValueError("Single data type %s is not supported with Arrow" % str(schema))
+        else:
+            # Any timestamps must be coerced to be compatible with Spark
+            arrow_types = [to_arrow_type(TimestampType())
+                           if is_datetime64_dtype(t) or is_datetime64tz_dtype(t) else None
+                           for t in pdf.dtypes]
+
+        # Slice the DataFrame to be batched
+        step = -(-len(pdf) // self.sparkContext.defaultParallelism)  # round int up
+        pdf_slices = (pdf[start:start + step] for start in xrange(0, len(pdf), step))
+
+        # Create list of Arrow (columns, type) for serializer dump_stream
+        arrow_data = [[(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)]
+                      for pdf_slice in pdf_slices]
+
+        jsqlContext = self._wrapped._jsqlContext
+
+        safecheck = self._wrapped._conf.arrowSafeTypeConversion()
+        col_by_name = True  # col by name only applies to StructType columns, can't happen here
+        ser = ArrowStreamPandasSerializer(timezone, safecheck, col_by_name)
+
+        def reader_func(temp_filename):
+            return self._jvm.PythonSQLUtils.readArrowStreamFromFile(jsqlContext, temp_filename)
+
+        def create_RDD_server():
+            return self._jvm.ArrowRDDServer(jsqlContext)
+
+        # Create Spark DataFrame from Arrow stream file, using one batch per partition
+        jrdd = self._sc._serialize_to_jvm(arrow_data, ser, reader_func, create_RDD_server)
+        jdf = self._jvm.PythonSQLUtils.toDataFrame(jrdd, schema.json(), jsqlContext)
+        df = DataFrame(jdf, self._wrapped)
+        df._schema = schema
+        return df
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.pandas.conversion
+    globs = pyspark.sql.pandas.conversion.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.pandas.conversion tests")\
+        .getOrCreate()
+    globs['spark'] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.pandas.conversion, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
new file mode 100644
index 0000000000000..31aa321bf5826
--- /dev/null
+++ b/python/pyspark/sql/pandas/functions.py
@@ -0,0 +1,418 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import functools
+import sys
+import warnings
+
+from pyspark import since
+from pyspark.rdd import PythonEvalType
+from pyspark.sql.pandas.typehints import infer_eval_type
+from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
+from pyspark.sql.types import DataType
+from pyspark.sql.udf import _create_udf
+from pyspark.util import _get_argspec
+
+
+class PandasUDFType(object):
+    """Pandas UDF Types. See :meth:`pyspark.sql.functions.pandas_udf`.
+    """
+    SCALAR = PythonEvalType.SQL_SCALAR_PANDAS_UDF
+
+    SCALAR_ITER = PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF
+
+    GROUPED_MAP = PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
+
+    GROUPED_AGG = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF
+
+
+@since(2.3)
+def pandas_udf(f=None, returnType=None, functionType=None):
+    """
+    Creates a pandas user defined function (a.k.a. vectorized user defined function).
+
+    Pandas UDFs are user defined functions that are executed by Spark using Arrow to transfer
+    data and Pandas to work with the data, which allows vectorized operations. A Pandas UDF
+    is defined using the `pandas_udf` as a decorator or to wrap the function, and no
+    additional configuration is required. A Pandas UDF behaves as a regular PySpark function
+    API in general.
+
+    :param f: user-defined function. A python function if used as a standalone function
+    :param returnType: the return type of the user-defined function. The value can be either a
+        :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+    :param functionType: an enum value in :class:`pyspark.sql.functions.PandasUDFType`.
+        Default: SCALAR.
+
+        .. note:: This parameter exists for compatibility. Using Python type hints is encouraged.
+
+    In order to use this API, customarily the below are imported:
+
+    >>> import pandas as pd
+    >>> from pyspark.sql.functions import pandas_udf
+
+    From Spark 3.0 with Python 3.6+, `Python type hints <https://www.python.org/dev/peps/pep-0484>`_
+    detect the function types as below:
+
+    >>> @pandas_udf(IntegerType())
+    ... def slen(s: pd.Series) -> pd.Series:
+    ...     return s.str.len()
+
+    Prior to Spark 3.0, the pandas UDF used `functionType` to decide the execution type as below:
+
+    >>> from pyspark.sql.functions import PandasUDFType
+    >>> from pyspark.sql.types import IntegerType
+    >>> @pandas_udf(IntegerType(), PandasUDFType.SCALAR)
+    ... def slen(s):
+    ...     return s.str.len()
+
+    It is preferred to specify type hints for the pandas UDF instead of specifying pandas UDF
+    type via `functionType` which will be deprecated in the future releases.
+
+    Note that the type hint should use `pandas.Series` in all cases but there is one variant
+    that `pandas.DataFrame` should be used for its input or output type hint instead when the input
+    or output column is of :class:`pyspark.sql.types.StructType`. The following example shows
+    a Pandas UDF which takes long column, string column and struct column, and outputs a struct
+    column. It requires the function to specify the type hints of `pandas.Series` and
+    `pandas.DataFrame` as below:
+
+    >>> @pandas_udf("col1 string, col2 long")
+    >>> def func(s1: pd.Series, s2: pd.Series, s3: pd.DataFrame) -> pd.DataFrame:
+    ...     s3['col2'] = s1 + s2.str.len()
+    ...     return s3
+    ...
+    >>> # Create a Spark DataFrame that has three columns including a sturct column.
+    ... df = spark.createDataFrame(
+    ...     [[1, "a string", ("a nested string",)]],
+    ...     "long_col long, string_col string, struct_col struct<col1:string>")
+    >>> df.printSchema()
+    root
+    |-- long_column: long (nullable = true)
+    |-- string_column: string (nullable = true)
+    |-- struct_column: struct (nullable = true)
+    |    |-- col1: string (nullable = true)
+    >>> df.select(func("long_col", "string_col", "struct_col")).printSchema()
+    |-- func(long_col, string_col, struct_col): struct (nullable = true)
+    |    |-- col1: string (nullable = true)
+    |    |-- col2: long (nullable = true)
+
+    In the following sections, it describes the cominations of the supported type hints. For
+    simplicity, `pandas.DataFrame` variant is omitted.
+
+    * Series to Series
+        `pandas.Series`, ... -> `pandas.Series`
+
+        The function takes one or more `pandas.Series` and outputs one `pandas.Series`.
+        The output of the function should always be of the same length as the input.
+
+        >>> @pandas_udf("string")
+        ... def to_upper(s: pd.Series) -> pd.Series:
+        ...     return s.str.upper()
+        ...
+        >>> df = spark.createDataFrame([("John Doe",)], ("name",))
+        >>> df.select(to_upper("name")).show()
+        +--------------+
+        |to_upper(name)|
+        +--------------+
+        |      JOHN DOE|
+        +--------------+
+
+        >>> @pandas_udf("first string, last string")
+        ... def split_expand(s: pd.Series) -> pd.DataFrame:
+        ...     return s.str.split(expand=True)
+        ...
+        >>> df = spark.createDataFrame([("John Doe",)], ("name",))
+        >>> df.select(split_expand("name")).show()
+        +------------------+
+        |split_expand(name)|
+        +------------------+
+        |       [John, Doe]|
+        +------------------+
+
+        .. note:: The length of the input is not that of the whole input column, but is the
+            length of an internal batch used for each call to the function.
+
+    * Iterator of Series to Iterator of Series
+        `Iterator[pandas.Series]` -> `Iterator[pandas.Series]`
+
+        The function takes an iterator of `pandas.Series` and outputs an iterator of
+        `pandas.Series`. In this case, the created pandas UDF instance requires one input
+        column when this is called as a PySpark column. The output of each series from
+        the function should always be of the same length as the input.
+
+        It is useful when the UDF execution
+        requires initializing some states although internally it works identically as
+        Series to Series case. The pseudocode below illustrates the example.
+
+        .. highlight:: python
+        .. code-block:: python
+
+            @pandas_udf("long")
+            def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
+                # Do some expensive initialization with a state
+                state = very_expensive_initialization()
+                for x in iterator:
+                    # Use that state for whole iterator.
+                    yield calculate_with_state(x, state)
+
+            df.select(calculate("value")).show()
+
+        >>> from typing import Iterator
+        >>> @pandas_udf("long")
+        ... def plus_one(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
+        ...     for s in iterator:
+        ...         yield s + 1
+        ...
+        >>> df = spark.createDataFrame(pd.DataFrame([1, 2, 3], columns=["v"]))
+        >>> df.select(plus_one(df.v)).show()
+        +-----------+
+        |plus_one(v)|
+        +-----------+
+        |          2|
+        |          3|
+        |          4|
+        +-----------+
+
+        .. note:: The length of each series is the length of a batch internally used.
+
+    * Iterator of Multiple Series to Iterator of Series
+        `Iterator[Tuple[pandas.Series, ...]]` -> `Iterator[pandas.Series]`
+
+        The function takes an iterator of a tuple of multiple `pandas.Series` and outputs an
+        iterator of `pandas.Series`. In this case, the created pandas UDF instance requires
+        input columns as many as the series when this is called as a PySpark column.
+        It works identically as Iterator of Series to Iterator of Series case except
+        the parameter difference. The output of each series from the function should always
+        be of the same length as the input.
+
+        >>> from typing import Iterator, Tuple
+        >>> from pyspark.sql.functions import struct, col
+        >>> @pandas_udf("long")
+        ... def multiply(iterator: Iterator[Tuple[pd.Series, pd.DataFrame]]) -> Iterator[pd.Series]:
+        ...     for s1, df in iterator:
+        ...         yield s1 * df.v
+        ...
+        >>> df = spark.createDataFrame(pd.DataFrame([1, 2, 3], columns=["v"]))
+        >>> df.withColumn('output', multiply(col("v"), struct(col("v")))).show()
+        +---+------+
+        |  v|output|
+        +---+------+
+        |  1|     1|
+        |  2|     4|
+        |  3|     9|
+        +---+------+
+
+        .. note:: The length of each series is the length of a batch internally used.
+
+    * Series to Scalar
+        `pandas.Series`, ... -> `Any`
+
+        The function takes `pandas.Series` and returns a scalar value. The `returnType`
+        should be a primitive data type, and the returned scalar can be either a python primitive
+        type, e.g., int or float or a numpy data type, e.g., numpy.int64 or numpy.float64.
+        `Any` should ideally be a specific scalar type accordingly.
+
+        >>> @pandas_udf("double")
+        ... def mean_udf(v: pd.Series) -> float:
+        ...     return v.mean()
+        ...
+        >>> df = spark.createDataFrame(
+        ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)], ("id", "v"))
+        >>> df.groupby("id").agg(mean_udf(df['v'])).show()
+        +---+-----------+
+        | id|mean_udf(v)|
+        +---+-----------+
+        |  1|        1.5|
+        |  2|        6.0|
+        +---+-----------+
+
+        This UDF can also be used as window functions as below:
+
+        >>> from pyspark.sql import Window
+        >>> @pandas_udf("double")
+        ... def mean_udf(v: pd.Series) -> float:
+        ...     return v.mean()
+        ...
+        >>> df = spark.createDataFrame(
+        ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)], ("id", "v"))
+        >>> w = Window.partitionBy('id').orderBy('v').rowsBetween(-1, 0)
+        >>> df.withColumn('mean_v', mean_udf("v").over(w)).show()
+        +---+----+------+
+        | id|   v|mean_v|
+        +---+----+------+
+        |  1| 1.0|   1.0|
+        |  1| 2.0|   1.5|
+        |  2| 3.0|   3.0|
+        |  2| 5.0|   4.0|
+        |  2|10.0|   7.5|
+        +---+----+------+
+
+        .. note:: For performance reasons, the input series to window functions are not copied.
+            Therefore, mutating the input series is not allowed and will cause incorrect results.
+            For the same reason, users should also not rely on the index of the input series.
+
+        .. seealso:: :meth:`pyspark.sql.GroupedData.agg` and :class:`pyspark.sql.Window`
+
+    .. note:: The user-defined functions do not support conditional expressions or short circuiting
+        in boolean expressions and it ends up with being executed all internally. If the functions
+        can fail on special rows, the workaround is to incorporate the condition into the functions.
+
+    .. note:: The user-defined functions do not take keyword arguments on the calling side.
+
+    .. note:: The data type of returned `pandas.Series` from the user-defined functions should be
+        matched with defined `returnType` (see :meth:`types.to_arrow_type` and
+        :meth:`types.from_arrow_type`). When there is mismatch between them, Spark might do
+        conversion on returned data. The conversion is not guaranteed to be correct and results
+        should be checked for accuracy by users.
+
+    .. note:: Currently,
+        :class:`pyspark.sql.types.MapType`,
+        :class:`pyspark.sql.types.ArrayType` of :class:`pyspark.sql.types.TimestampType` and
+        nested :class:`pyspark.sql.types.StructType`
+        are currently not supported as output types.
+
+    .. seealso:: :meth:`pyspark.sql.DataFrame.mapInPandas`
+    .. seealso:: :meth:`pyspark.sql.GroupedData.applyInPandas`
+    .. seealso:: :meth:`pyspark.sql.PandasCogroupedOps.applyInPandas`
+    .. seealso:: :meth:`pyspark.sql.UDFRegistration.register`
+    """
+
+    # The following table shows most of Pandas data and SQL type conversions in Pandas UDFs that
+    # are not yet visible to the user. Some of behaviors are buggy and might be changed in the near
+    # future. The table might have to be eventually documented externally.
+    # Please see SPARK-28132's PR to see the codes in order to generate the table below.
+    #
+    # +-----------------------------+----------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+------------------+--------------+--------------+--------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+--------------+-----------------+------------------+-----------+--------------------------------+  # noqa
+    # |SQL Type \ Pandas Value(Type)|None(object(NoneType))|        True(bool)|           1(int8)|          1(int16)|            1(int32)|            1(int64)|          1(uint8)|         1(uint16)|         1(uint32)|         1(uint64)|  1.0(float16)|  1.0(float32)|  1.0(float64)|1970-01-01 00:00:00(datetime64[ns])|1970-01-01 00:00:00-05:00(datetime64[ns, US/Eastern])|a(object(string))|  1(object(Decimal))|[1 2 3](object(array[int32]))| 1.0(float128)|(1+0j)(complex64)|(1+0j)(complex128)|A(category)|1 days 00:00:00(timedelta64[ns])|  # noqa
+    # +-----------------------------+----------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+------------------+--------------+--------------+--------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+--------------+-----------------+------------------+-----------+--------------------------------+  # noqa
+    # |                      boolean|                  None|              True|              True|              True|                True|                True|              True|              True|              True|              True|          True|          True|          True|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                      tinyint|                  None|                 1|                 1|                 1|                   1|                   1|                 1|                 1|                 1|                 1|             1|             1|             1|                                  X|                                                    X|                X|                   1|                            X|             X|                X|                 X|          0|                               X|  # noqa
+    # |                     smallint|                  None|                 1|                 1|                 1|                   1|                   1|                 1|                 1|                 1|                 1|             1|             1|             1|                                  X|                                                    X|                X|                   1|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                          int|                  None|                 1|                 1|                 1|                   1|                   1|                 1|                 1|                 1|                 1|             1|             1|             1|                                  X|                                                    X|                X|                   1|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                       bigint|                  None|                 1|                 1|                 1|                   1|                   1|                 1|                 1|                 1|                 1|             1|             1|             1|                                  0|                                       18000000000000|                X|                   1|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                        float|                  None|               1.0|               1.0|               1.0|                 1.0|                 1.0|               1.0|               1.0|               1.0|               1.0|           1.0|           1.0|           1.0|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                       double|                  None|               1.0|               1.0|               1.0|                 1.0|                 1.0|               1.0|               1.0|               1.0|               1.0|           1.0|           1.0|           1.0|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                         date|                  None|                 X|                 X|                 X|datetime.date(197...|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|               datetime.date(197...|                                 datetime.date(197...|                X|datetime.date(197...|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                    timestamp|                  None|                 X|                 X|                 X|                   X|datetime.datetime...|                 X|                 X|                 X|                 X|             X|             X|             X|               datetime.datetime...|                                 datetime.datetime...|                X|datetime.datetime...|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                       string|                  None|                ''|                ''|                ''|              '\x01'|              '\x01'|                ''|                ''|            '\x01'|            '\x01'|            ''|            ''|            ''|                                  X|                                                    X|              'a'|                   X|                            X|            ''|                X|                ''|          X|                               X|  # noqa
+    # |                decimal(10,0)|                  None|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|        Decimal('1')|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                   array<int>|                  None|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                    [1, 2, 3]|             X|                X|                 X|          X|                               X|  # noqa
+    # |              map<string,int>|                     X|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |               struct<_1:int>|                     X|                 X|                 X|                 X|                   X|                   X|                 X|                 X|                 X|                 X|             X|             X|             X|                                  X|                                                    X|                X|                   X|                            X|             X|                X|                 X|          X|                               X|  # noqa
+    # |                       binary|                  None|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|  bytearray(b'\x01')|  bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'\x01')|bytearray(b'')|bytearray(b'')|bytearray(b'')|                     bytearray(b'')|                                       bytearray(b'')|  bytearray(b'a')|                   X|                            X|bytearray(b'')|   bytearray(b'')|    bytearray(b'')|          X|                  bytearray(b'')|  # noqa
+    # +-----------------------------+----------------------+------------------+------------------+------------------+--------------------+--------------------+------------------+------------------+------------------+------------------+--------------+--------------+--------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+--------------+-----------------+------------------+-----------+--------------------------------+  # noqa
+    #
+    # Note: DDL formatted string is used for 'SQL Type' for simplicity. This string can be
+    #       used in `returnType`.
+    # Note: The values inside of the table are generated by `repr`.
+    # Note: Python 3.7.3, Pandas 0.24.2 and PyArrow 0.13.0 are used.
+    # Note: Timezone is KST.
+    # Note: 'X' means it throws an exception during the conversion.
+    require_minimum_pandas_version()
+    require_minimum_pyarrow_version()
+
+    # decorator @pandas_udf(returnType, functionType)
+    is_decorator = f is None or isinstance(f, (str, DataType))
+
+    if is_decorator:
+        # If DataType has been passed as a positional argument
+        # for decorator use it as a returnType
+        return_type = f or returnType
+
+        if functionType is not None:
+            # @pandas_udf(dataType, functionType=functionType)
+            # @pandas_udf(returnType=dataType, functionType=functionType)
+            eval_type = functionType
+        elif returnType is not None and isinstance(returnType, int):
+            # @pandas_udf(dataType, functionType)
+            eval_type = returnType
+        else:
+            # @pandas_udf(dataType) or @pandas_udf(returnType=dataType)
+            eval_type = None
+    else:
+        return_type = returnType
+
+        if functionType is not None:
+            eval_type = functionType
+        else:
+            eval_type = None
+
+    if return_type is None:
+        raise ValueError("Invalid return type: returnType can not be None")
+
+    if eval_type not in [PythonEvalType.SQL_SCALAR_PANDAS_UDF,
+                         PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
+                         PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
+                         PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
+                         PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
+                         PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+                         None]:  # None means it should infer the type from type hints.
+
+        raise ValueError("Invalid function type: "
+                         "functionType must be one the values from PandasUDFType")
+
+    if is_decorator:
+        return functools.partial(_create_pandas_udf, returnType=return_type, evalType=eval_type)
+    else:
+        return _create_pandas_udf(f=f, returnType=return_type, evalType=eval_type)
+
+
+def _create_pandas_udf(f, returnType, evalType):
+    argspec = _get_argspec(f)
+
+    # pandas UDF by type hints.
+    if sys.version_info >= (3, 6):
+        from inspect import signature
+
+        if evalType in [PythonEvalType.SQL_SCALAR_PANDAS_UDF,
+                        PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
+                        PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF]:
+            warnings.warn(
+                "In Python 3.6+ and Spark 3.0+, it is preferred to specify type hints for "
+                "pandas UDF instead of specifying pandas UDF type which will be deprecated "
+                "in the future releases. See SPARK-28264 for more details.", UserWarning)
+        elif len(argspec.annotations) > 0:
+            evalType = infer_eval_type(signature(f))
+            assert evalType is not None
+
+    if evalType is None:
+        # Set default is scalar UDF.
+        evalType = PythonEvalType.SQL_SCALAR_PANDAS_UDF
+
+    if (evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF or
+            evalType == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF) and \
+            len(argspec.args) == 0 and \
+            argspec.varargs is None:
+        raise ValueError(
+            "Invalid function: 0-arg pandas_udfs are not supported. "
+            "Instead, create a 1-arg pandas_udf and ignore the arg in your function."
+        )
+
+    if evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF \
+            and len(argspec.args) not in (1, 2):
+        raise ValueError(
+            "Invalid function: pandas_udf with function type GROUPED_MAP or "
+            "the function in groupby.applyInPandas "
+            "must take either one argument (data) or two arguments (key, data).")
+
+    if evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF \
+            and len(argspec.args) not in (2, 3):
+        raise ValueError(
+            "Invalid function: the function in cogroup.applyInPandas "
+            "must take either two arguments (left, right) "
+            "or three arguments (key, left, right).")
+
+    return _create_udf(f, returnType, evalType)
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
new file mode 100644
index 0000000000000..b93f0516cadb1
--- /dev/null
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -0,0 +1,327 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import sys
+import warnings
+
+from pyspark import since
+from pyspark.rdd import PythonEvalType
+from pyspark.sql.column import Column
+from pyspark.sql.dataframe import DataFrame
+
+
+class PandasGroupedOpsMixin(object):
+    """
+    Min-in for pandas grouped operations. Currently, only :class:`GroupedData`
+    can use this class.
+    """
+
+    @since(2.3)
+    def apply(self, udf):
+        """
+        It is an alias of :meth:`pyspark.sql.GroupedData.applyInPandas`; however, it takes a
+        :meth:`pyspark.sql.functions.pandas_udf` whereas
+        :meth:`pyspark.sql.GroupedData.applyInPandas` takes a Python native function.
+
+        .. note:: It is preferred to use :meth:`pyspark.sql.GroupedData.applyInPandas` over this
+            API. This API will be deprecated in the future releases.
+
+        :param udf: a grouped map user-defined function returned by
+            :func:`pyspark.sql.functions.pandas_udf`.
+
+        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+        >>> df = spark.createDataFrame(
+        ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+        ...     ("id", "v"))
+        >>> @pandas_udf("id long, v double", PandasUDFType.GROUPED_MAP)  # doctest: +SKIP
+        ... def normalize(pdf):
+        ...     v = pdf.v
+        ...     return pdf.assign(v=(v - v.mean()) / v.std())
+        >>> df.groupby("id").apply(normalize).show()  # doctest: +SKIP
+        +---+-------------------+
+        | id|                  v|
+        +---+-------------------+
+        |  1|-0.7071067811865475|
+        |  1| 0.7071067811865475|
+        |  2|-0.8320502943378437|
+        |  2|-0.2773500981126146|
+        |  2| 1.1094003924504583|
+        +---+-------------------+
+
+        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
+
+        """
+        # Columns are special because hasattr always return True
+        if isinstance(udf, Column) or not hasattr(udf, 'func') \
+                or udf.evalType != PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
+            raise ValueError("Invalid udf: the udf argument must be a pandas_udf of type "
+                             "GROUPED_MAP.")
+
+        warnings.warn(
+            "It is preferred to use 'applyInPandas' over this "
+            "API. This API will be deprecated in the future releases. See SPARK-28264 for "
+            "more details.", UserWarning)
+
+        return self.applyInPandas(udf.func, schema=udf.returnType)
+
+    @since(3.0)
+    def applyInPandas(self, func, schema):
+        """
+        Maps each group of the current :class:`DataFrame` using a pandas udf and returns the result
+        as a `DataFrame`.
+
+        The function should take a `pandas.DataFrame` and return another
+        `pandas.DataFrame`. For each group, all columns are passed together as a `pandas.DataFrame`
+        to the user-function and the returned `pandas.DataFrame` are combined as a
+        :class:`DataFrame`.
+
+        The `schema` should be a :class:`StructType` describing the schema of the returned
+        `pandas.DataFrame`. The column labels of the returned `pandas.DataFrame` must either match
+        the field names in the defined schema if specified as strings, or match the
+        field data types by position if not strings, e.g. integer indices.
+        The length of the returned `pandas.DataFrame` can be arbitrary.
+
+        :param func: a Python native function that takes a `pandas.DataFrame`, and outputs a
+            `pandas.DataFrame`.
+        :param schema: the return type of the `func` in PySpark. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+
+        >>> import pandas as pd  # doctest: +SKIP
+        >>> from pyspark.sql.functions import pandas_udf, ceil
+        >>> df = spark.createDataFrame(
+        ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+        ...     ("id", "v"))  # doctest: +SKIP
+        >>> def normalize(pdf):
+        ...     v = pdf.v
+        ...     return pdf.assign(v=(v - v.mean()) / v.std())
+        >>> df.groupby("id").applyInPandas(
+        ...     normalize, schema="id long, v double").show()  # doctest: +SKIP
+        +---+-------------------+
+        | id|                  v|
+        +---+-------------------+
+        |  1|-0.7071067811865475|
+        |  1| 0.7071067811865475|
+        |  2|-0.8320502943378437|
+        |  2|-0.2773500981126146|
+        |  2| 1.1094003924504583|
+        +---+-------------------+
+
+        Alternatively, the user can pass a function that takes two arguments.
+        In this case, the grouping key(s) will be passed as the first argument and the data will
+        be passed as the second argument. The grouping key(s) will be passed as a tuple of numpy
+        data types, e.g., `numpy.int32` and `numpy.float64`. The data will still be passed in
+        as a `pandas.DataFrame` containing all columns from the original Spark DataFrame.
+        This is useful when the user does not want to hardcode grouping key(s) in the function.
+
+        >>> df = spark.createDataFrame(
+        ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+        ...     ("id", "v"))  # doctest: +SKIP
+        >>> def mean_func(key, pdf):
+        ...     # key is a tuple of one numpy.int64, which is the value
+        ...     # of 'id' for the current group
+        ...     return pd.DataFrame([key + (pdf.v.mean(),)])
+        >>> df.groupby('id').applyInPandas(
+        ...     mean_func, schema="id long, v double").show()  # doctest: +SKIP
+        +---+---+
+        | id|  v|
+        +---+---+
+        |  1|1.5|
+        |  2|6.0|
+        +---+---+
+        >>> def sum_func(key, pdf):
+        ...     # key is a tuple of two numpy.int64s, which is the values
+        ...     # of 'id' and 'ceil(df.v / 2)' for the current group
+        ...     return pd.DataFrame([key + (pdf.v.sum(),)])
+        >>> df.groupby(df.id, ceil(df.v / 2)).applyInPandas(
+        ...     sum_func, schema="id long, `ceil(v / 2)` long, v double").show()  # doctest: +SKIP
+        +---+-----------+----+
+        | id|ceil(v / 2)|   v|
+        +---+-----------+----+
+        |  2|          5|10.0|
+        |  1|          1| 3.0|
+        |  2|          3| 5.0|
+        |  2|          2| 3.0|
+        +---+-----------+----+
+
+        .. note:: This function requires a full shuffle. All the data of a group will be loaded
+            into memory, so the user should be aware of the potential OOM risk if data is skewed
+            and certain groups are too large to fit in memory.
+
+        .. note:: If returning a new `pandas.DataFrame` constructed with a dictionary, it is
+            recommended to explicitly index the columns by name to ensure the positions are correct,
+            or alternatively use an `OrderedDict`.
+            For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or
+            `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`.
+
+        .. note:: Experimental
+
+        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
+        """
+        from pyspark.sql import GroupedData
+        from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+        assert isinstance(self, GroupedData)
+
+        udf = pandas_udf(
+            func, returnType=schema, functionType=PandasUDFType.GROUPED_MAP)
+        df = self._df
+        udf_column = udf(*[df[col] for col in df.columns])
+        jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr())
+        return DataFrame(jdf, self.sql_ctx)
+
+    @since(3.0)
+    def cogroup(self, other):
+        """
+        Cogroups this group with another group so that we can run cogrouped operations.
+
+        See :class:`CoGroupedData` for the operations that can be run.
+        """
+        from pyspark.sql import GroupedData
+
+        assert isinstance(self, GroupedData)
+
+        return PandasCogroupedOps(self, other)
+
+
+class PandasCogroupedOps(object):
+    """
+    A logical grouping of two :class:`GroupedData`,
+    created by :func:`GroupedData.cogroup`.
+
+    .. note:: Experimental
+
+    .. versionadded:: 3.0
+    """
+
+    def __init__(self, gd1, gd2):
+        self._gd1 = gd1
+        self._gd2 = gd2
+        self.sql_ctx = gd1.sql_ctx
+
+    @since(3.0)
+    def applyInPandas(self, func, schema):
+        """
+        Applies a function to each cogroup using pandas and returns the result
+        as a `DataFrame`.
+
+        The function should take two `pandas.DataFrame`\\s and return another
+        `pandas.DataFrame`.  For each side of the cogroup, all columns are passed together as a
+        `pandas.DataFrame` to the user-function and the returned `pandas.DataFrame` are combined as
+        a :class:`DataFrame`.
+
+        The `schema` should be a :class:`StructType` describing the schema of the returned
+        `pandas.DataFrame`. The column labels of the returned `pandas.DataFrame` must either match
+        the field names in the defined schema if specified as strings, or match the
+        field data types by position if not strings, e.g. integer indices.
+        The length of the returned `pandas.DataFrame` can be arbitrary.
+
+        :param func: a Python native function that takes two `pandas.DataFrame`\\s, and
+            outputs a `pandas.DataFrame`, or that takes one tuple (grouping keys) and two
+            pandas ``DataFrame``s, and outputs a pandas ``DataFrame``.
+        :param schema: the return type of the `func` in PySpark. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+
+        >>> from pyspark.sql.functions import pandas_udf
+        >>> df1 = spark.createDataFrame(
+        ...     [(20000101, 1, 1.0), (20000101, 2, 2.0), (20000102, 1, 3.0), (20000102, 2, 4.0)],
+        ...     ("time", "id", "v1"))
+        >>> df2 = spark.createDataFrame(
+        ...     [(20000101, 1, "x"), (20000101, 2, "y")],
+        ...     ("time", "id", "v2"))
+        >>> def asof_join(l, r):
+        ...     return pd.merge_asof(l, r, on="time", by="id")
+        >>> df1.groupby("id").cogroup(df2.groupby("id")).applyInPandas(
+        ...     asof_join, schema="time int, id int, v1 double, v2 string"
+        ... ).show()  # doctest: +SKIP
+        +--------+---+---+---+
+        |    time| id| v1| v2|
+        +--------+---+---+---+
+        |20000101|  1|1.0|  x|
+        |20000102|  1|3.0|  x|
+        |20000101|  2|2.0|  y|
+        |20000102|  2|4.0|  y|
+        +--------+---+---+---+
+
+        Alternatively, the user can define a function that takes three arguments.  In this case,
+        the grouping key(s) will be passed as the first argument and the data will be passed as the
+        second and third arguments.  The grouping key(s) will be passed as a tuple of numpy data
+        types, e.g., `numpy.int32` and `numpy.float64`. The data will still be passed in as two
+        `pandas.DataFrame` containing all columns from the original Spark DataFrames.
+
+        >>> def asof_join(k, l, r):
+        ...     if k == (1,):
+        ...         return pd.merge_asof(l, r, on="time", by="id")
+        ...     else:
+        ...         return pd.DataFrame(columns=['time', 'id', 'v1', 'v2'])
+        >>> df1.groupby("id").cogroup(df2.groupby("id")).applyInPandas(
+        ...     asof_join, "time int, id int, v1 double, v2 string").show()  # doctest: +SKIP
+        +--------+---+---+---+
+        |    time| id| v1| v2|
+        +--------+---+---+---+
+        |20000101|  1|1.0|  x|
+        |20000102|  1|3.0|  x|
+        +--------+---+---+---+
+
+        .. note:: This function requires a full shuffle. All the data of a cogroup will be loaded
+            into memory, so the user should be aware of the potential OOM risk if data is skewed
+            and certain groups are too large to fit in memory.
+
+        .. note:: If returning a new `pandas.DataFrame` constructed with a dictionary, it is
+            recommended to explicitly index the columns by name to ensure the positions are correct,
+            or alternatively use an `OrderedDict`.
+            For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or
+            `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`.
+
+        .. note:: Experimental
+
+        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
+
+        """
+        from pyspark.sql.pandas.functions import pandas_udf
+
+        udf = pandas_udf(
+            func, returnType=schema, functionType=PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF)
+        all_cols = self._extract_cols(self._gd1) + self._extract_cols(self._gd2)
+        udf_column = udf(*all_cols)
+        jdf = self._gd1._jgd.flatMapCoGroupsInPandas(self._gd2._jgd, udf_column._jc.expr())
+        return DataFrame(jdf, self.sql_ctx)
+
+    @staticmethod
+    def _extract_cols(gd):
+        df = gd._df
+        return [df[col] for col in df.columns]
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.pandas.group_ops
+    globs = pyspark.sql.pandas.group_ops.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.pandas.group tests")\
+        .getOrCreate()
+    globs['spark'] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.pandas.group_ops, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
new file mode 100644
index 0000000000000..9835e88c6ac21
--- /dev/null
+++ b/python/pyspark/sql/pandas/map_ops.py
@@ -0,0 +1,95 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import sys
+
+from pyspark import since
+from pyspark.rdd import PythonEvalType
+
+
+class PandasMapOpsMixin(object):
+    """
+    Min-in for pandas map operations. Currently, only :class:`DataFrame`
+    can use this class.
+    """
+
+    @since(3.0)
+    def mapInPandas(self, func, schema):
+        """
+        Maps an iterator of batches in the current :class:`DataFrame` using a Python native
+        function that takes and outputs a pandas DataFrame, and returns the result as a
+        :class:`DataFrame`.
+
+        The function should take an iterator of `pandas.DataFrame`\\s and return
+        another iterator of `pandas.DataFrame`\\s. All columns are passed
+        together as an iterator of `pandas.DataFrame`\\s to the function and the
+        returned iterator of `pandas.DataFrame`\\s are combined as a :class:`DataFrame`.
+        Each `pandas.DataFrame` size can be controlled by
+        `spark.sql.execution.arrow.maxRecordsPerBatch`.
+
+        :param func: a Python native function that takes an iterator of `pandas.DataFrame`\\s, and
+            outputs an iterator of `pandas.DataFrame`\\s.
+        :param schema: the return type of the `func` in PySpark. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+
+        >>> from pyspark.sql.functions import pandas_udf
+        >>> df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age"))
+        >>> def filter_func(iterator):
+        ...     for pdf in iterator:
+        ...         yield pdf[pdf.id == 1]
+        >>> df.mapInPandas(filter_func, df.schema).show()  # doctest: +SKIP
+        +---+---+
+        | id|age|
+        +---+---+
+        |  1| 21|
+        +---+---+
+
+        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
+
+        .. note:: Experimental
+        """
+        from pyspark.sql import DataFrame
+        from pyspark.sql.pandas.functions import pandas_udf
+
+        assert isinstance(self, DataFrame)
+
+        udf = pandas_udf(
+            func, returnType=schema, functionType=PythonEvalType.SQL_MAP_PANDAS_ITER_UDF)
+        udf_column = udf(*[self[col] for col in self.columns])
+        jdf = self._jdf.mapInPandas(udf_column._jc.expr())
+        return DataFrame(jdf, self.sql_ctx)
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.pandas.map_ops
+    globs = pyspark.sql.pandas.map_ops.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.pandas.map_ops tests")\
+        .getOrCreate()
+    globs['spark'] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.pandas.map_ops, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
new file mode 100644
index 0000000000000..6f46e92f5a2a4
--- /dev/null
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -0,0 +1,284 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Serializers for PyArrow and pandas conversions. See `pyspark.serializers` for more details.
+"""
+
+import sys
+if sys.version < '3':
+    from itertools import izip as zip
+else:
+    basestring = unicode = str
+    xrange = range
+
+from pyspark.serializers import Serializer, read_int, write_int, UTF8Deserializer
+
+
+class SpecialLengths(object):
+    END_OF_DATA_SECTION = -1
+    PYTHON_EXCEPTION_THROWN = -2
+    TIMING_DATA = -3
+    END_OF_STREAM = -4
+    NULL = -5
+    START_ARROW_STREAM = -6
+
+
+class ArrowCollectSerializer(Serializer):
+    """
+    Deserialize a stream of batches followed by batch order information. Used in
+    PandasConversionMixin._collect_as_arrow() after invoking Dataset.collectAsArrowToPython()
+    in the JVM.
+    """
+
+    def __init__(self):
+        self.serializer = ArrowStreamSerializer()
+
+    def dump_stream(self, iterator, stream):
+        return self.serializer.dump_stream(iterator, stream)
+
+    def load_stream(self, stream):
+        """
+        Load a stream of un-ordered Arrow RecordBatches, where the last iteration yields
+        a list of indices that can be used to put the RecordBatches in the correct order.
+        """
+        # load the batches
+        for batch in self.serializer.load_stream(stream):
+            yield batch
+
+        # load the batch order indices or propagate any error that occurred in the JVM
+        num = read_int(stream)
+        if num == -1:
+            error_msg = UTF8Deserializer().loads(stream)
+            raise RuntimeError("An error occurred while calling "
+                               "ArrowCollectSerializer.load_stream: {}".format(error_msg))
+        batch_order = []
+        for i in xrange(num):
+            index = read_int(stream)
+            batch_order.append(index)
+        yield batch_order
+
+    def __repr__(self):
+        return "ArrowCollectSerializer(%s)" % self.serializer
+
+
+class ArrowStreamSerializer(Serializer):
+    """
+    Serializes Arrow record batches as a stream.
+    """
+
+    def dump_stream(self, iterator, stream):
+        import pyarrow as pa
+        writer = None
+        try:
+            for batch in iterator:
+                if writer is None:
+                    writer = pa.RecordBatchStreamWriter(stream, batch.schema)
+                writer.write_batch(batch)
+        finally:
+            if writer is not None:
+                writer.close()
+
+    def load_stream(self, stream):
+        import pyarrow as pa
+        reader = pa.ipc.open_stream(stream)
+        for batch in reader:
+            yield batch
+
+    def __repr__(self):
+        return "ArrowStreamSerializer"
+
+
+class ArrowStreamPandasSerializer(ArrowStreamSerializer):
+    """
+    Serializes Pandas.Series as Arrow data with Arrow streaming format.
+
+    :param timezone: A timezone to respect when handling timestamp values
+    :param safecheck: If True, conversion from Arrow to Pandas checks for overflow/truncation
+    :param assign_cols_by_name: If True, then Pandas DataFrames will get columns by name
+    """
+
+    def __init__(self, timezone, safecheck, assign_cols_by_name):
+        super(ArrowStreamPandasSerializer, self).__init__()
+        self._timezone = timezone
+        self._safecheck = safecheck
+        self._assign_cols_by_name = assign_cols_by_name
+
+    def arrow_to_pandas(self, arrow_column):
+        from pyspark.sql.pandas.types import _check_series_localize_timestamps
+        import pyarrow
+
+        # If the given column is a date type column, creates a series of datetime.date directly
+        # instead of creating datetime64[ns] as intermediate data to avoid overflow caused by
+        # datetime64[ns] type handling.
+        s = arrow_column.to_pandas(date_as_object=True)
+
+        if pyarrow.types.is_timestamp(arrow_column.type):
+            return _check_series_localize_timestamps(s, self._timezone)
+        else:
+            return s
+
+    def _create_batch(self, series):
+        """
+        Create an Arrow record batch from the given pandas.Series or list of Series,
+        with optional type.
+
+        :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
+        :return: Arrow RecordBatch
+        """
+        import pandas as pd
+        import pyarrow as pa
+        from pyspark.sql.pandas.types import _check_series_convert_timestamps_internal
+        # Make input conform to [(series1, type1), (series2, type2), ...]
+        if not isinstance(series, (list, tuple)) or \
+                (len(series) == 2 and isinstance(series[1], pa.DataType)):
+            series = [series]
+        series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series)
+
+        def create_array(s, t):
+            mask = s.isnull()
+            # Ensure timestamp series are in expected form for Spark internal representation
+            if t is not None and pa.types.is_timestamp(t):
+                s = _check_series_convert_timestamps_internal(s, self._timezone)
+            try:
+                array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck)
+            except pa.ArrowException as e:
+                error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \
+                            "Array (%s). It can be caused by overflows or other unsafe " + \
+                            "conversions warned by Arrow. Arrow safe type check can be " + \
+                            "disabled by using SQL config " + \
+                            "`spark.sql.execution.pandas.arrowSafeTypeConversion`."
+                raise RuntimeError(error_msg % (s.dtype, t), e)
+            return array
+
+        arrs = []
+        for s, t in series:
+            if t is not None and pa.types.is_struct(t):
+                if not isinstance(s, pd.DataFrame):
+                    raise ValueError("A field of type StructType expects a pandas.DataFrame, "
+                                     "but got: %s" % str(type(s)))
+
+                # Input partition and result pandas.DataFrame empty, make empty Arrays with struct
+                if len(s) == 0 and len(s.columns) == 0:
+                    arrs_names = [(pa.array([], type=field.type), field.name) for field in t]
+                # Assign result columns by schema name if user labeled with strings
+                elif self._assign_cols_by_name and any(isinstance(name, basestring)
+                                                       for name in s.columns):
+                    arrs_names = [(create_array(s[field.name], field.type), field.name)
+                                  for field in t]
+                # Assign result columns by  position
+                else:
+                    arrs_names = [(create_array(s[s.columns[i]], field.type), field.name)
+                                  for i, field in enumerate(t)]
+
+                struct_arrs, struct_names = zip(*arrs_names)
+                arrs.append(pa.StructArray.from_arrays(struct_arrs, struct_names))
+            else:
+                arrs.append(create_array(s, t))
+
+        return pa.RecordBatch.from_arrays(arrs, ["_%d" % i for i in xrange(len(arrs))])
+
+    def dump_stream(self, iterator, stream):
+        """
+        Make ArrowRecordBatches from Pandas Series and serialize. Input is a single series or
+        a list of series accompanied by an optional pyarrow type to coerce the data to.
+        """
+        batches = (self._create_batch(series) for series in iterator)
+        super(ArrowStreamPandasSerializer, self).dump_stream(batches, stream)
+
+    def load_stream(self, stream):
+        """
+        Deserialize ArrowRecordBatches to an Arrow table and return as a list of pandas.Series.
+        """
+        batches = super(ArrowStreamPandasSerializer, self).load_stream(stream)
+        import pyarrow as pa
+        for batch in batches:
+            yield [self.arrow_to_pandas(c) for c in pa.Table.from_batches([batch]).itercolumns()]
+
+    def __repr__(self):
+        return "ArrowStreamPandasSerializer"
+
+
+class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer):
+    """
+    Serializer used by Python worker to evaluate Pandas UDFs
+    """
+
+    def __init__(self, timezone, safecheck, assign_cols_by_name, df_for_struct=False):
+        super(ArrowStreamPandasUDFSerializer, self) \
+            .__init__(timezone, safecheck, assign_cols_by_name)
+        self._df_for_struct = df_for_struct
+
+    def arrow_to_pandas(self, arrow_column):
+        import pyarrow.types as types
+
+        if self._df_for_struct and types.is_struct(arrow_column.type):
+            import pandas as pd
+            series = [super(ArrowStreamPandasUDFSerializer, self).arrow_to_pandas(column)
+                      .rename(field.name)
+                      for column, field in zip(arrow_column.flatten(), arrow_column.type)]
+            s = pd.concat(series, axis=1)
+        else:
+            s = super(ArrowStreamPandasUDFSerializer, self).arrow_to_pandas(arrow_column)
+        return s
+
+    def dump_stream(self, iterator, stream):
+        """
+        Override because Pandas UDFs require a START_ARROW_STREAM before the Arrow stream is sent.
+        This should be sent after creating the first record batch so in case of an error, it can
+        be sent back to the JVM before the Arrow stream starts.
+        """
+
+        def init_stream_yield_batches():
+            should_write_start_length = True
+            for series in iterator:
+                batch = self._create_batch(series)
+                if should_write_start_length:
+                    write_int(SpecialLengths.START_ARROW_STREAM, stream)
+                    should_write_start_length = False
+                yield batch
+
+        return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream)
+
+    def __repr__(self):
+        return "ArrowStreamPandasUDFSerializer"
+
+
+class CogroupUDFSerializer(ArrowStreamPandasUDFSerializer):
+
+    def load_stream(self, stream):
+        """
+        Deserialize Cogrouped ArrowRecordBatches to a tuple of Arrow tables and yield as two
+        lists of pandas.Series.
+        """
+        import pyarrow as pa
+        dataframes_in_group = None
+
+        while dataframes_in_group is None or dataframes_in_group > 0:
+            dataframes_in_group = read_int(stream)
+
+            if dataframes_in_group == 2:
+                batch1 = [batch for batch in ArrowStreamSerializer.load_stream(self, stream)]
+                batch2 = [batch for batch in ArrowStreamSerializer.load_stream(self, stream)]
+                yield (
+                    [self.arrow_to_pandas(c) for c in pa.Table.from_batches(batch1).itercolumns()],
+                    [self.arrow_to_pandas(c) for c in pa.Table.from_batches(batch2).itercolumns()]
+                )
+
+            elif dataframes_in_group != 0:
+                raise ValueError(
+                    'Invalid number of pandas.DataFrames in group {0}'.format(dataframes_in_group))
diff --git a/python/pyspark/sql/pandas/typehints.py b/python/pyspark/sql/pandas/typehints.py
new file mode 100644
index 0000000000000..b0323ba1697df
--- /dev/null
+++ b/python/pyspark/sql/pandas/typehints.py
@@ -0,0 +1,141 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.pandas.utils import require_minimum_pandas_version
+
+
+def infer_eval_type(sig):
+    """
+    Infers the evaluation type in :class:`pyspark.rdd.PythonEvalType` from
+    :class:`inspect.Signature` instance.
+    """
+    from pyspark.sql.pandas.functions import PandasUDFType
+
+    require_minimum_pandas_version()
+
+    import pandas as pd
+
+    annotations = {}
+    for param in sig.parameters.values():
+        if param.annotation is not param.empty:
+            annotations[param.name] = param.annotation
+
+    # Check if all arguments have type hints
+    parameters_sig = [annotations[parameter] for parameter
+                      in sig.parameters if parameter in annotations]
+    if len(parameters_sig) != len(sig.parameters):
+        raise ValueError(
+            "Type hints for all parameters should be specified; however, got %s" % sig)
+
+    # Check if the return has a type hint
+    return_annotation = sig.return_annotation
+    if sig.empty is return_annotation:
+        raise ValueError(
+            "Type hint for the return type should be specified; however, got %s" % sig)
+
+    # Series, Frame or Union[DataFrame, Series], ... -> Series or Frame
+    is_series_or_frame = (
+        all(a == pd.Series or  # Series
+            a == pd.DataFrame or  # DataFrame
+            check_union_annotation(  # Union[DataFrame, Series]
+                a,
+                parameter_check_func=lambda na: na == pd.Series or na == pd.DataFrame)
+            for a in parameters_sig) and
+        (return_annotation == pd.Series or return_annotation == pd.DataFrame))
+
+    # Iterator[Tuple[Series, Frame or Union[DataFrame, Series], ...] -> Iterator[Series or Frame]
+    is_iterator_tuple_series_or_frame = (
+        len(parameters_sig) == 1 and
+        check_iterator_annotation(  # Iterator
+            parameters_sig[0],
+            parameter_check_func=lambda a: check_tuple_annotation(  # Tuple
+                a,
+                parameter_check_func=lambda ta: (
+                    ta == Ellipsis or  # ...
+                    ta == pd.Series or  # Series
+                    ta == pd.DataFrame or  # DataFrame
+                    check_union_annotation(  # Union[DataFrame, Series]
+                        ta,
+                        parameter_check_func=lambda na: (
+                            na == pd.Series or na == pd.DataFrame))))) and
+        check_iterator_annotation(
+            return_annotation,
+            parameter_check_func=lambda a: a == pd.DataFrame or a == pd.Series))
+
+    # Iterator[Series, Frame or Union[DataFrame, Series]] -> Iterator[Series or Frame]
+    is_iterator_series_or_frame = (
+        len(parameters_sig) == 1 and
+        check_iterator_annotation(
+            parameters_sig[0],
+            parameter_check_func=lambda a: (
+                a == pd.Series or  # Series
+                a == pd.DataFrame or  # DataFrame
+                check_union_annotation(  # Union[DataFrame, Series]
+                    a,
+                    parameter_check_func=lambda ua: ua == pd.Series or ua == pd.DataFrame))) and
+        check_iterator_annotation(
+            return_annotation,
+            parameter_check_func=lambda a: a == pd.DataFrame or a == pd.Series))
+
+    # Series, Frame or Union[DataFrame, Series], ... -> Any
+    is_series_or_frame_agg = (
+        all(a == pd.Series or  # Series
+            a == pd.DataFrame or  # DataFrame
+            check_union_annotation(  # Union[DataFrame, Series]
+                a,
+                parameter_check_func=lambda ua: ua == pd.Series or ua == pd.DataFrame)
+            for a in parameters_sig) and (
+            # It's tricky to whitelist which types pd.Series constructor can take.
+            # Simply blacklist common types used here for now (which becomes object
+            # types Spark can't recognize).
+            return_annotation != pd.Series and
+            return_annotation != pd.DataFrame and
+            not check_iterator_annotation(return_annotation) and
+            not check_tuple_annotation(return_annotation)
+        ))
+
+    if is_series_or_frame:
+        return PandasUDFType.SCALAR
+    elif is_iterator_tuple_series_or_frame or is_iterator_series_or_frame:
+        return PandasUDFType.SCALAR_ITER
+    elif is_series_or_frame_agg:
+        return PandasUDFType.GROUPED_AGG
+    else:
+        raise NotImplementedError("Unsupported signature: %s." % sig)
+
+
+def check_tuple_annotation(annotation, parameter_check_func=None):
+    # Python 3.6 has `__name__`. Python 3.7 and 3.8 have `_name`.
+    # Check if the name is Tuple first. After that, check the generic types.
+    name = getattr(annotation, "_name", getattr(annotation, "__name__", None))
+    return name == "Tuple" and (
+        parameter_check_func is None or all(map(parameter_check_func, annotation.__args__)))
+
+
+def check_iterator_annotation(annotation, parameter_check_func=None):
+    name = getattr(annotation, "_name", getattr(annotation, "__name__", None))
+    return name == "Iterator" and (
+        parameter_check_func is None or all(map(parameter_check_func, annotation.__args__)))
+
+
+def check_union_annotation(annotation, parameter_check_func=None):
+    import typing
+
+    # Note that we cannot rely on '__origin__' in other type hints as it has changed from version
+    # to version. For example, it's abc.Iterator in Python 3.7 but typing.Iterator in Python 3.6.
+    origin = getattr(annotation, "__origin__", None)
+    return origin == typing.Union and (
+        parameter_check_func is None or all(map(parameter_check_func, annotation.__args__)))
diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py
new file mode 100644
index 0000000000000..d1edf3f9c47c1
--- /dev/null
+++ b/python/pyspark/sql/pandas/types.py
@@ -0,0 +1,268 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Type-specific codes between pandas and PyArrow. Also contains some utils to correct
+pandas instances during the type conversion.
+"""
+
+from pyspark.sql.types import *
+
+
+def to_arrow_type(dt):
+    """ Convert Spark data type to pyarrow type
+    """
+    import pyarrow as pa
+    if type(dt) == BooleanType:
+        arrow_type = pa.bool_()
+    elif type(dt) == ByteType:
+        arrow_type = pa.int8()
+    elif type(dt) == ShortType:
+        arrow_type = pa.int16()
+    elif type(dt) == IntegerType:
+        arrow_type = pa.int32()
+    elif type(dt) == LongType:
+        arrow_type = pa.int64()
+    elif type(dt) == FloatType:
+        arrow_type = pa.float32()
+    elif type(dt) == DoubleType:
+        arrow_type = pa.float64()
+    elif type(dt) == DecimalType:
+        arrow_type = pa.decimal128(dt.precision, dt.scale)
+    elif type(dt) == StringType:
+        arrow_type = pa.string()
+    elif type(dt) == BinaryType:
+        arrow_type = pa.binary()
+    elif type(dt) == DateType:
+        arrow_type = pa.date32()
+    elif type(dt) == TimestampType:
+        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
+        arrow_type = pa.timestamp('us', tz='UTC')
+    elif type(dt) == ArrayType:
+        if type(dt.elementType) in [StructType, TimestampType]:
+            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
+        arrow_type = pa.list_(to_arrow_type(dt.elementType))
+    elif type(dt) == StructType:
+        if any(type(field.dataType) == StructType for field in dt):
+            raise TypeError("Nested StructType not supported in conversion to Arrow")
+        fields = [pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
+                  for field in dt]
+        arrow_type = pa.struct(fields)
+    else:
+        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
+    return arrow_type
+
+
+def to_arrow_schema(schema):
+    """ Convert a schema from Spark to Arrow
+    """
+    import pyarrow as pa
+    fields = [pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
+              for field in schema]
+    return pa.schema(fields)
+
+
+def from_arrow_type(at):
+    """ Convert pyarrow type to Spark data type.
+    """
+    import pyarrow.types as types
+    if types.is_boolean(at):
+        spark_type = BooleanType()
+    elif types.is_int8(at):
+        spark_type = ByteType()
+    elif types.is_int16(at):
+        spark_type = ShortType()
+    elif types.is_int32(at):
+        spark_type = IntegerType()
+    elif types.is_int64(at):
+        spark_type = LongType()
+    elif types.is_float32(at):
+        spark_type = FloatType()
+    elif types.is_float64(at):
+        spark_type = DoubleType()
+    elif types.is_decimal(at):
+        spark_type = DecimalType(precision=at.precision, scale=at.scale)
+    elif types.is_string(at):
+        spark_type = StringType()
+    elif types.is_binary(at):
+        spark_type = BinaryType()
+    elif types.is_date32(at):
+        spark_type = DateType()
+    elif types.is_timestamp(at):
+        spark_type = TimestampType()
+    elif types.is_list(at):
+        if types.is_timestamp(at.value_type):
+            raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
+        spark_type = ArrayType(from_arrow_type(at.value_type))
+    elif types.is_struct(at):
+        if any(types.is_struct(field.type) for field in at):
+            raise TypeError("Nested StructType not supported in conversion from Arrow: " + str(at))
+        return StructType(
+            [StructField(field.name, from_arrow_type(field.type), nullable=field.nullable)
+             for field in at])
+    else:
+        raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
+    return spark_type
+
+
+def from_arrow_schema(arrow_schema):
+    """ Convert schema from Arrow to Spark.
+    """
+    return StructType(
+        [StructField(field.name, from_arrow_type(field.type), nullable=field.nullable)
+         for field in arrow_schema])
+
+
+def _get_local_timezone():
+    """ Get local timezone using pytz with environment variable, or dateutil.
+
+    If there is a 'TZ' environment variable, pass it to pandas to use pytz and use it as timezone
+    string, otherwise use the special word 'dateutil/:' which means that pandas uses dateutil and
+    it reads system configuration to know the system local timezone.
+
+    See also:
+    - https://github.com/pandas-dev/pandas/blob/0.19.x/pandas/tslib.pyx#L1753
+    - https://github.com/dateutil/dateutil/blob/2.6.1/dateutil/tz/tz.py#L1338
+    """
+    import os
+    return os.environ.get('TZ', 'dateutil/:')
+
+
+def _check_series_localize_timestamps(s, timezone):
+    """
+    Convert timezone aware timestamps to timezone-naive in the specified timezone or local timezone.
+
+    If the input series is not a timestamp series, then the same series is returned. If the input
+    series is a timestamp series, then a converted series is returned.
+
+    :param s: pandas.Series
+    :param timezone: the timezone to convert. if None then use local timezone
+    :return pandas.Series that have been converted to tz-naive
+    """
+    from pyspark.sql.pandas.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    from pandas.api.types import is_datetime64tz_dtype
+    tz = timezone or _get_local_timezone()
+    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+    if is_datetime64tz_dtype(s.dtype):
+        return s.dt.tz_convert(tz).dt.tz_localize(None)
+    else:
+        return s
+
+
+def _check_series_convert_timestamps_internal(s, timezone):
+    """
+    Convert a tz-naive timestamp in the specified timezone or local timezone to UTC normalized for
+    Spark internal storage
+
+    :param s: a pandas.Series
+    :param timezone: the timezone to convert. if None then use local timezone
+    :return pandas.Series where if it is a timestamp, has been UTC normalized without a time zone
+    """
+    from pyspark.sql.pandas.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
+    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+    if is_datetime64_dtype(s.dtype):
+        # When tz_localize a tz-naive timestamp, the result is ambiguous if the tz-naive
+        # timestamp is during the hour when the clock is adjusted backward during due to
+        # daylight saving time (dst).
+        # E.g., for America/New_York, the clock is adjusted backward on 2015-11-01 2:00 to
+        # 2015-11-01 1:00 from dst-time to standard time, and therefore, when tz_localize
+        # a tz-naive timestamp 2015-11-01 1:30 with America/New_York timezone, it can be either
+        # dst time (2015-01-01 1:30-0400) or standard time (2015-11-01 1:30-0500).
+        #
+        # Here we explicit choose to use standard time. This matches the default behavior of
+        # pytz.
+        #
+        # Here are some code to help understand this behavior:
+        # >>> import datetime
+        # >>> import pandas as pd
+        # >>> import pytz
+        # >>>
+        # >>> t = datetime.datetime(2015, 11, 1, 1, 30)
+        # >>> ts = pd.Series([t])
+        # >>> tz = pytz.timezone('America/New_York')
+        # >>>
+        # >>> ts.dt.tz_localize(tz, ambiguous=True)
+        # 0   2015-11-01 01:30:00-04:00
+        # dtype: datetime64[ns, America/New_York]
+        # >>>
+        # >>> ts.dt.tz_localize(tz, ambiguous=False)
+        # 0   2015-11-01 01:30:00-05:00
+        # dtype: datetime64[ns, America/New_York]
+        # >>>
+        # >>> str(tz.localize(t))
+        # '2015-11-01 01:30:00-05:00'
+        tz = timezone or _get_local_timezone()
+        return s.dt.tz_localize(tz, ambiguous=False).dt.tz_convert('UTC')
+    elif is_datetime64tz_dtype(s.dtype):
+        return s.dt.tz_convert('UTC')
+    else:
+        return s
+
+
+def _check_series_convert_timestamps_localize(s, from_timezone, to_timezone):
+    """
+    Convert timestamp to timezone-naive in the specified timezone or local timezone
+
+    :param s: a pandas.Series
+    :param from_timezone: the timezone to convert from. if None then use local timezone
+    :param to_timezone: the timezone to convert to. if None then use local timezone
+    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
+    """
+    from pyspark.sql.pandas.utils import require_minimum_pandas_version
+    require_minimum_pandas_version()
+
+    import pandas as pd
+    from pandas.api.types import is_datetime64tz_dtype, is_datetime64_dtype
+    from_tz = from_timezone or _get_local_timezone()
+    to_tz = to_timezone or _get_local_timezone()
+    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+    if is_datetime64tz_dtype(s.dtype):
+        return s.dt.tz_convert(to_tz).dt.tz_localize(None)
+    elif is_datetime64_dtype(s.dtype) and from_tz != to_tz:
+        # `s.dt.tz_localize('tzlocal()')` doesn't work properly when including NaT.
+        return s.apply(
+            lambda ts: ts.tz_localize(from_tz, ambiguous=False).tz_convert(to_tz).tz_localize(None)
+            if ts is not pd.NaT else pd.NaT)
+    else:
+        return s
+
+
+def _check_series_convert_timestamps_local_tz(s, timezone):
+    """
+    Convert timestamp to timezone-naive in the specified timezone or local timezone
+
+    :param s: a pandas.Series
+    :param timezone: the timezone to convert to. if None then use local timezone
+    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
+    """
+    return _check_series_convert_timestamps_localize(s, None, timezone)
+
+
+def _check_series_convert_timestamps_tz_local(s, timezone):
+    """
+    Convert timestamp to timezone-naive in the specified timezone or local timezone
+
+    :param s: a pandas.Series
+    :param timezone: the timezone to convert from. if None then use local timezone
+    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
+    """
+    return _check_series_convert_timestamps_localize(s, timezone, None)
diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py
new file mode 100644
index 0000000000000..481aa3e6432e1
--- /dev/null
+++ b/python/pyspark/sql/pandas/utils.py
@@ -0,0 +1,60 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+def require_minimum_pandas_version():
+    """ Raise ImportError if minimum version of Pandas is not installed
+    """
+    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
+    minimum_pandas_version = "0.23.2"
+
+    from distutils.version import LooseVersion
+    try:
+        import pandas
+        have_pandas = True
+    except ImportError:
+        have_pandas = False
+    if not have_pandas:
+        raise ImportError("Pandas >= %s must be installed; however, "
+                          "it was not found." % minimum_pandas_version)
+    if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
+        raise ImportError("Pandas >= %s must be installed; however, "
+                          "your version was %s." % (minimum_pandas_version, pandas.__version__))
+
+
+def require_minimum_pyarrow_version():
+    """ Raise ImportError if minimum version of pyarrow is not installed
+    """
+    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
+    minimum_pyarrow_version = "0.15.1"
+
+    from distutils.version import LooseVersion
+    import os
+    try:
+        import pyarrow
+        have_arrow = True
+    except ImportError:
+        have_arrow = False
+    if not have_arrow:
+        raise ImportError("PyArrow >= %s must be installed; however, "
+                          "it was not found." % minimum_pyarrow_version)
+    if LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version):
+        raise ImportError("PyArrow >= %s must be installed; however, "
+                          "your version was %s." % (minimum_pyarrow_version, pyarrow.__version__))
+    if os.environ.get("ARROW_PRE_0_15_IPC_FORMAT", "0") == "1":
+        raise RuntimeError("Arrow legacy IPC format is not supported in PySpark, "
+                           "please unset ARROW_PRE_0_15_IPC_FORMAT")
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index e51ff9bad0746..69660395ad823 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -48,7 +48,7 @@ def _set_opts(self, schema=None, **options):
 class DataFrameReader(OptionUtils):
     """
     Interface used to load a :class:`DataFrame` from external storage systems
-    (e.g. file systems, key-value stores, etc). Use :func:`spark.read`
+    (e.g. file systems, key-value stores, etc). Use :attr:`SparkSession.read`
     to access this.
 
     .. versionadded:: 1.4
@@ -171,7 +171,8 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
              allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
              mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None,
              multiLine=None, allowUnquotedControlChars=None, lineSep=None, samplingRatio=None,
-             dropFieldIfAllNull=None, encoding=None, locale=None):
+             dropFieldIfAllNull=None, encoding=None, locale=None, pathGlobFilter=None,
+             recursiveFileLookup=None):
         """
         Loads JSON files and returns the results as a :class:`DataFrame`.
 
@@ -247,6 +248,13 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
         :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set,
                        it uses the default value, ``en-US``. For instance, ``locale`` is used while
                        parsing dates and timestamps.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+                                    disables `partition discovery`_.
+
+        .. _partition discovery: /sql-data-sources-parquet.html#partition-discovery
 
         >>> df1 = spark.read.json('python/test_support/sql/people.json')
         >>> df1.dtypes
@@ -266,7 +274,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
             timestampFormat=timestampFormat, multiLine=multiLine,
             allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep,
             samplingRatio=samplingRatio, dropFieldIfAllNull=dropFieldIfAllNull, encoding=encoding,
-            locale=locale)
+            locale=locale, pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             path = [path]
         if type(path) == list:
@@ -300,23 +308,35 @@ def table(self, tableName):
         return self._df(self._jreader.table(tableName))
 
     @since(1.4)
-    def parquet(self, *paths):
-        """Loads Parquet files, returning the result as a :class:`DataFrame`.
-
-        You can set the following Parquet-specific option(s) for reading Parquet files:
-            * ``mergeSchema``: sets whether we should merge schemas collected from all \
-                Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``. \
-                The default value is specified in ``spark.sql.parquet.mergeSchema``.
+    def parquet(self, *paths, **options):
+        """
+        Loads Parquet files, returning the result as a :class:`DataFrame`.
+
+        :param mergeSchema: sets whether we should merge schemas collected from all
+                            Parquet part-files. This will override
+                            ``spark.sql.parquet.mergeSchema``. The default value is specified in
+                            ``spark.sql.parquet.mergeSchema``.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+                                    disables `partition discovery`_.
 
         >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
         >>> df.dtypes
         [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
         """
+        mergeSchema = options.get('mergeSchema', None)
+        pathGlobFilter = options.get('pathGlobFilter', None)
+        recursiveFileLookup = options.get('recursiveFileLookup', None)
+        self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter,
+                       recursiveFileLookup=recursiveFileLookup)
         return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths)))
 
     @ignore_unicode_prefix
     @since(1.6)
-    def text(self, paths, wholetext=False, lineSep=None):
+    def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None,
+             recursiveFileLookup=None):
         """
         Loads text files and returns a :class:`DataFrame` whose schema starts with a
         string column named "value", and followed by partitioned columns if there
@@ -329,6 +349,11 @@ def text(self, paths, wholetext=False, lineSep=None):
         :param wholetext: if true, read each file from input path(s) as a single row.
         :param lineSep: defines the line separator that should be used for parsing. If None is
                         set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+                                    disables `partition discovery`_.
 
         >>> df = spark.read.text('python/test_support/sql/text-test.txt')
         >>> df.collect()
@@ -337,7 +362,9 @@ def text(self, paths, wholetext=False, lineSep=None):
         >>> df.collect()
         [Row(value=u'hello\\nthis')]
         """
-        self._set_opts(wholetext=wholetext, lineSep=lineSep)
+        self._set_opts(
+            wholetext=wholetext, lineSep=lineSep, pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup)
         if isinstance(paths, basestring):
             paths = [paths]
         return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths)))
@@ -349,7 +376,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             negativeInf=None, dateFormat=None, timestampFormat=None, maxColumns=None,
             maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
             columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None,
-            samplingRatio=None, enforceSchema=None, emptyValue=None, locale=None, lineSep=None):
+            samplingRatio=None, enforceSchema=None, emptyValue=None, locale=None, lineSep=None,
+            pathGlobFilter=None, recursiveFileLookup=None):
         r"""Loads a CSV file and returns the result as a  :class:`DataFrame`.
 
         This function will go through the input once to determine the input schema if
@@ -360,8 +388,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                      or RDD of Strings storing CSV rows.
         :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
                        or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param sep: sets a single character as a separator for each field and value.
-                    If None is set, it uses the default value, ``,``.
+        :param sep: sets a separator (one or more characters) for each field and value. If None is
+                    set, it uses the default value, ``,``.
         :param encoding: decodes the CSV files by the given encoding type. If None is set,
                          it uses the default value, ``UTF-8``.
         :param quote: sets a single character used for escaping quoted values where the
@@ -457,6 +485,11 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         :param lineSep: defines the line separator that should be used for parsing. If None is
                         set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
                         Maximum length is 1 character.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+                                    disables `partition discovery`_.
 
         >>> df = spark.read.csv('python/test_support/sql/ages.csv')
         >>> df.dtypes
@@ -476,7 +509,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             maxMalformedLogPerPartition=maxMalformedLogPerPartition, mode=mode,
             columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine,
             charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, samplingRatio=samplingRatio,
-            enforceSchema=enforceSchema, emptyValue=emptyValue, locale=locale, lineSep=lineSep)
+            enforceSchema=enforceSchema, emptyValue=emptyValue, locale=locale, lineSep=lineSep,
+            pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             path = [path]
         if type(path) == list:
@@ -504,13 +538,24 @@ def func(iterator):
             raise TypeError("path can be only string, list or RDD")
 
     @since(1.5)
-    def orc(self, path):
+    def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None):
         """Loads ORC files, returning the result as a :class:`DataFrame`.
 
+        :param mergeSchema: sets whether we should merge schemas collected from all
+                            ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
+                            The default value is specified in ``spark.sql.orc.mergeSchema``.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+                                    disables `partition discovery`_.
+
         >>> df = spark.read.orc('python/test_support/sql/orc_partitioned')
         >>> df.dtypes
         [('a', 'bigint'), ('b', 'int'), ('c', 'int')]
         """
+        self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter,
+                       recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             path = [path]
         return self._df(self._jreader.orc(_to_seq(self._spark._sc, path)))
@@ -571,7 +616,7 @@ def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPar
 class DataFrameWriter(OptionUtils):
     """
     Interface used to write a :class:`DataFrame` to external storage systems
-    (e.g. file systems, key-value stores, etc). Use :func:`DataFrame.write`
+    (e.g. file systems, key-value stores, etc). Use :attr:`DataFrame.write`
     to access this.
 
     .. versionadded:: 1.4
@@ -733,7 +778,7 @@ def save(self, path=None, format=None, mode=None, partitionBy=None, **options):
         :param partitionBy: names of partitioning columns
         :param options: all other string options
 
-        >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
+        >>> df.write.mode("append").save(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self.mode(mode).options(**options)
         if partitionBy is not None:
@@ -788,7 +833,7 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options)
 
     @since(1.4)
     def json(self, path, mode=None, compression=None, dateFormat=None, timestampFormat=None,
-             lineSep=None, encoding=None):
+             lineSep=None, encoding=None, ignoreNullFields=None):
         """Saves the content of the :class:`DataFrame` in JSON format
         (`JSON Lines text format or newline-delimited JSON <http://jsonlines.org/>`_) at the
         specified path.
@@ -817,13 +862,15 @@ def json(self, path, mode=None, compression=None, dateFormat=None, timestampForm
                         the default UTF-8 charset will be used.
         :param lineSep: defines the line separator that should be used for writing. If None is
                         set, it uses the default value, ``\\n``.
+        :param ignoreNullFields: Whether to ignore null fields when generating JSON objects.
+                        If None is set, it uses the default value, ``true``.
 
         >>> df.write.json(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self.mode(mode)
         self._set_opts(
             compression=compression, dateFormat=dateFormat, timestampFormat=timestampFormat,
-            lineSep=lineSep, encoding=encoding)
+            lineSep=lineSep, encoding=encoding, ignoreNullFields=ignoreNullFields)
         self._jwrite.json(path)
 
     @since(1.4)
@@ -890,7 +937,7 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
         :param compression: compression codec to use when saving to file. This can be one of the
                             known case-insensitive shorten names (none, bzip2, gzip, lz4,
                             snappy and deflate).
-        :param sep: sets a single character as a separator for each field and value. If None is
+        :param sep: sets a separator (one or more characters) for each field and value. If None is
                     set, it uses the default value, ``,``.
         :param quote: sets a single character used for escaping quoted values where the
                       separator can be part of the value. If None is set, it uses the default
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index cdab840b2c40a..233f4927389d2 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -15,6 +15,8 @@
 # limitations under the License.
 #
 
+# To disallow implicit relative import. Remove this once we drop Python 2.
+from __future__ import absolute_import
 from __future__ import print_function
 import sys
 import warnings
@@ -25,15 +27,16 @@
     basestring = unicode = str
     xrange = range
 else:
-    from itertools import izip as zip, imap as map
+    from itertools import imap as map
 
 from pyspark import since
 from pyspark.rdd import RDD, ignore_unicode_prefix
 from pyspark.sql.conf import RuntimeConfig
 from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.pandas.conversion import SparkConversionMixin
 from pyspark.sql.readwriter import DataFrameReader
 from pyspark.sql.streaming import DataStreamReader
-from pyspark.sql.types import Row, DataType, StringType, StructType, TimestampType, \
+from pyspark.sql.types import Row, DataType, StringType, StructType, \
     _make_type_verifier, _infer_schema, _has_nulltype, _merge_type, _create_converter, \
     _parse_datatype_string
 from pyspark.sql.utils import install_exception_handler
@@ -60,7 +63,7 @@ def toDF(self, schema=None, sampleRatio=None):
     RDD.toDF = toDF
 
 
-class SparkSession(object):
+class SparkSession(SparkConversionMixin):
     """The entry point to programming Spark with the Dataset and DataFrame API.
 
     A SparkSession can be used create :class:`DataFrame`, register :class:`DataFrame` as
@@ -136,7 +139,7 @@ def appName(self, name):
         @since(2.0)
         def enableHiveSupport(self):
             """Enables Hive support, including connectivity to a persistent Hive metastore, support
-            for Hive serdes, and Hive user-defined functions.
+            for Hive SerDes, and Hive user-defined functions.
             """
             return self.config("spark.sql.catalogImplementation", "hive")
 
@@ -189,7 +192,7 @@ def getOrCreate(self):
                 return session
 
     builder = Builder()
-    """A class attribute having a :class:`Builder` to construct :class:`SparkSession` instances"""
+    """A class attribute having a :class:`Builder` to construct :class:`SparkSession` instances."""
 
     _instantiatedSession = None
     _activeSession = None
@@ -310,7 +313,7 @@ def conf(self):
     @since(2.0)
     def catalog(self):
         """Interface through which the user may create, drop, alter or query underlying
-        databases, tables, functions etc.
+        databases, tables, functions, etc.
 
         :return: :class:`Catalog`
         """
@@ -458,135 +461,6 @@ def _createFromLocal(self, data, schema):
         data = [schema.toInternal(row) for row in data]
         return self._sc.parallelize(data), schema
 
-    def _get_numpy_record_dtype(self, rec):
-        """
-        Used when converting a pandas.DataFrame to Spark using to_records(), this will correct
-        the dtypes of fields in a record so they can be properly loaded into Spark.
-        :param rec: a numpy record to check field dtypes
-        :return corrected dtype for a numpy.record or None if no correction needed
-        """
-        import numpy as np
-        cur_dtypes = rec.dtype
-        col_names = cur_dtypes.names
-        record_type_list = []
-        has_rec_fix = False
-        for i in xrange(len(cur_dtypes)):
-            curr_type = cur_dtypes[i]
-            # If type is a datetime64 timestamp, convert to microseconds
-            # NOTE: if dtype is datetime[ns] then np.record.tolist() will output values as longs,
-            # conversion from [us] or lower will lead to py datetime objects, see SPARK-22417
-            if curr_type == np.dtype('datetime64[ns]'):
-                curr_type = 'datetime64[us]'
-                has_rec_fix = True
-            record_type_list.append((str(col_names[i]), curr_type))
-        return np.dtype(record_type_list) if has_rec_fix else None
-
-    def _convert_from_pandas(self, pdf, schema, timezone):
-        """
-         Convert a pandas.DataFrame to list of records that can be used to make a DataFrame
-         :return list of records
-        """
-        if timezone is not None:
-            from pyspark.sql.types import _check_series_convert_timestamps_tz_local
-            copied = False
-            if isinstance(schema, StructType):
-                for field in schema:
-                    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
-                    if isinstance(field.dataType, TimestampType):
-                        s = _check_series_convert_timestamps_tz_local(pdf[field.name], timezone)
-                        if s is not pdf[field.name]:
-                            if not copied:
-                                # Copy once if the series is modified to prevent the original
-                                # Pandas DataFrame from being updated
-                                pdf = pdf.copy()
-                                copied = True
-                            pdf[field.name] = s
-            else:
-                for column, series in pdf.iteritems():
-                    s = _check_series_convert_timestamps_tz_local(series, timezone)
-                    if s is not series:
-                        if not copied:
-                            # Copy once if the series is modified to prevent the original
-                            # Pandas DataFrame from being updated
-                            pdf = pdf.copy()
-                            copied = True
-                        pdf[column] = s
-
-        # Convert pandas.DataFrame to list of numpy records
-        np_records = pdf.to_records(index=False)
-
-        # Check if any columns need to be fixed for Spark to infer properly
-        if len(np_records) > 0:
-            record_dtype = self._get_numpy_record_dtype(np_records[0])
-            if record_dtype is not None:
-                return [r.astype(record_dtype).tolist() for r in np_records]
-
-        # Convert list of numpy records to python lists
-        return [r.tolist() for r in np_records]
-
-    def _create_from_pandas_with_arrow(self, pdf, schema, timezone):
-        """
-        Create a DataFrame from a given pandas.DataFrame by slicing it into partitions, converting
-        to Arrow data, then sending to the JVM to parallelize. If a schema is passed in, the
-        data types will be used to coerce the data in Pandas to Arrow conversion.
-        """
-        from pyspark.serializers import ArrowStreamPandasSerializer
-        from pyspark.sql.types import from_arrow_type, to_arrow_type, TimestampType
-        from pyspark.sql.utils import require_minimum_pandas_version, \
-            require_minimum_pyarrow_version
-
-        require_minimum_pandas_version()
-        require_minimum_pyarrow_version()
-
-        from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
-        import pyarrow as pa
-
-        # Create the Spark schema from list of names passed in with Arrow types
-        if isinstance(schema, (list, tuple)):
-            arrow_schema = pa.Schema.from_pandas(pdf, preserve_index=False)
-            struct = StructType()
-            for name, field in zip(schema, arrow_schema):
-                struct.add(name, from_arrow_type(field.type), nullable=field.nullable)
-            schema = struct
-
-        # Determine arrow types to coerce data when creating batches
-        if isinstance(schema, StructType):
-            arrow_types = [to_arrow_type(f.dataType) for f in schema.fields]
-        elif isinstance(schema, DataType):
-            raise ValueError("Single data type %s is not supported with Arrow" % str(schema))
-        else:
-            # Any timestamps must be coerced to be compatible with Spark
-            arrow_types = [to_arrow_type(TimestampType())
-                           if is_datetime64_dtype(t) or is_datetime64tz_dtype(t) else None
-                           for t in pdf.dtypes]
-
-        # Slice the DataFrame to be batched
-        step = -(-len(pdf) // self.sparkContext.defaultParallelism)  # round int up
-        pdf_slices = (pdf[start:start + step] for start in xrange(0, len(pdf), step))
-
-        # Create list of Arrow (columns, type) for serializer dump_stream
-        arrow_data = [[(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)]
-                      for pdf_slice in pdf_slices]
-
-        jsqlContext = self._wrapped._jsqlContext
-
-        safecheck = self._wrapped._conf.arrowSafeTypeConversion()
-        col_by_name = True  # col by name only applies to StructType columns, can't happen here
-        ser = ArrowStreamPandasSerializer(timezone, safecheck, col_by_name)
-
-        def reader_func(temp_filename):
-            return self._jvm.PythonSQLUtils.readArrowStreamFromFile(jsqlContext, temp_filename)
-
-        def create_RDD_server():
-            return self._jvm.ArrowRDDServer(jsqlContext)
-
-        # Create Spark DataFrame from Arrow stream file, using one batch per partition
-        jrdd = self._sc._serialize_to_jvm(arrow_data, ser, reader_func, create_RDD_server)
-        jdf = self._jvm.PythonSQLUtils.toDataFrame(jrdd, schema.json(), jsqlContext)
-        df = DataFrame(jdf, self._wrapped)
-        df._schema = schema
-        return df
-
     @staticmethod
     def _create_shell_session():
         """
@@ -624,20 +498,20 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
         will be inferred from ``data``.
 
         When ``schema`` is ``None``, it will try to infer the schema (column names and types)
-        from ``data``, which should be an RDD of :class:`Row`,
-        or :class:`namedtuple`, or :class:`dict`.
+        from ``data``, which should be an RDD of either :class:`Row`,
+        :class:`namedtuple`, or :class:`dict`.
 
         When ``schema`` is :class:`pyspark.sql.types.DataType` or a datatype string, it must match
         the real data, or an exception will be thrown at runtime. If the given schema is not
         :class:`pyspark.sql.types.StructType`, it will be wrapped into a
-        :class:`pyspark.sql.types.StructType` as its only field, and the field name will be "value",
-        each record will also be wrapped into a tuple, which can be converted to row later.
+        :class:`pyspark.sql.types.StructType` as its only field, and the field name will be "value".
+        Each record will also be wrapped into a tuple, which can be converted to row later.
 
         If schema inference is needed, ``samplingRatio`` is used to determined the ratio of
         rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``.
 
-        :param data: an RDD of any kind of SQL data representation(e.g. row, tuple, int, boolean,
-            etc.), or :class:`list`, or :class:`pandas.DataFrame`.
+        :param data: an RDD of any kind of SQL data representation (e.g. row, tuple, int, boolean,
+            etc.), :class:`list`, or :class:`pandas.DataFrame`.
         :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
             column names, default is ``None``.  The data type string format equals to
             :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
@@ -722,46 +596,12 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
         except Exception:
             has_pandas = False
         if has_pandas and isinstance(data, pandas.DataFrame):
-            from pyspark.sql.utils import require_minimum_pandas_version
-            require_minimum_pandas_version()
-
-            if self._wrapped._conf.pandasRespectSessionTimeZone():
-                timezone = self._wrapped._conf.sessionLocalTimeZone()
-            else:
-                timezone = None
-
-            # If no schema supplied by user then get the names of columns only
-            if schema is None:
-                schema = [str(x) if not isinstance(x, basestring) else
-                          (x.encode('utf-8') if not isinstance(x, str) else x)
-                          for x in data.columns]
-
-            if self._wrapped._conf.arrowPySparkEnabled() and len(data) > 0:
-                try:
-                    return self._create_from_pandas_with_arrow(data, schema, timezone)
-                except Exception as e:
-                    from pyspark.util import _exception_message
-
-                    if self._wrapped._conf.arrowPySparkFallbackEnabled():
-                        msg = (
-                            "createDataFrame attempted Arrow optimization because "
-                            "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
-                            "failed by the reason below:\n  %s\n"
-                            "Attempting non-optimization as "
-                            "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
-                            "true." % _exception_message(e))
-                        warnings.warn(msg)
-                    else:
-                        msg = (
-                            "createDataFrame attempted Arrow optimization because "
-                            "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
-                            "reached the error below and will not continue because automatic "
-                            "fallback with 'spark.sql.execution.arrow.pyspark.fallback.enabled' "
-                            "has been set to false.\n  %s" % _exception_message(e))
-                        warnings.warn(msg)
-                        raise
-            data = self._convert_from_pandas(data, schema, timezone)
+            # Create a DataFrame from pandas DataFrame.
+            return super(SparkSession, self).createDataFrame(
+                data, schema, samplingRatio, verifySchema)
+        return self._create_dataframe(data, schema, samplingRatio, verifySchema)
 
+    def _create_dataframe(self, data, schema, samplingRatio, verifySchema):
         if isinstance(schema, StructType):
             verify_func = _make_type_verifier(schema) if verifySchema else lambda _: True
 
@@ -846,7 +686,7 @@ def readStream(self):
     @since(2.0)
     def streams(self):
         """Returns a :class:`StreamingQueryManager` that allows managing all the
-        :class:`StreamingQuery` StreamingQueries active on `this` context.
+        :class:`StreamingQuery` instances active on `this` context.
 
         .. note:: Evolving.
 
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 7faa8662ebf03..5fced8aca9bdf 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -276,9 +276,9 @@ def resetTerminated(self):
 
 class DataStreamReader(OptionUtils):
     """
-    Interface used to load a streaming :class:`DataFrame` from external storage systems
-    (e.g. file systems, key-value stores, etc). Use :func:`spark.readStream`
-    to access this.
+    Interface used to load a streaming :class:`DataFrame <pyspark.sql.DataFrame>` from external
+    storage systems (e.g. file systems, key-value stores, etc).
+    Use :attr:`SparkSession.readStream <pyspark.sql.SparkSession.readStream>` to access this.
 
     .. note:: Evolving.
 
@@ -341,9 +341,6 @@ def option(self, key, value):
             * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
                 in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
-            * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching
-                the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter.
-                It does not change the behavior of partition discovery.
 
         .. note:: Evolving.
 
@@ -360,9 +357,6 @@ def options(self, **options):
             * ``timeZone``: sets the string that indicates a timezone to be used to parse timestamps
                 in the JSON/CSV datasources or partition values.
                 If it isn't set, it uses the default value, session local timezone.
-            * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching
-                the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter.
-                It does not change the behavior of partition discovery.
 
         .. note:: Evolving.
 
@@ -411,7 +405,8 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
              allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
              mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None,
              multiLine=None,  allowUnquotedControlChars=None, lineSep=None, locale=None,
-             dropFieldIfAllNull=None, encoding=None):
+             dropFieldIfAllNull=None, encoding=None, pathGlobFilter=None,
+             recursiveFileLookup=None):
         """
         Loads a JSON file stream and returns the results as a :class:`DataFrame`.
 
@@ -487,6 +482,13 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
                          the JSON files. For example UTF-16BE, UTF-32LE. If None is set,
                          the encoding of input JSON will be detected automatically
                          when the multiLine option is set to ``true``.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+                                    disables `partition discovery`_.
+
+        .. _partition discovery: /sql-data-sources-parquet.html#partition-discovery
 
         >>> json_sdf = spark.readStream.json(tempfile.mkdtemp(), schema = sdf_schema)
         >>> json_sdf.isStreaming
@@ -502,46 +504,66 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
             mode=mode, columnNameOfCorruptRecord=columnNameOfCorruptRecord, dateFormat=dateFormat,
             timestampFormat=timestampFormat, multiLine=multiLine,
             allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep, locale=locale,
-            dropFieldIfAllNull=dropFieldIfAllNull, encoding=encoding)
+            dropFieldIfAllNull=dropFieldIfAllNull, encoding=encoding,
+            pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             return self._df(self._jreader.json(path))
         else:
             raise TypeError("path can be only a single string")
 
     @since(2.3)
-    def orc(self, path):
+    def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None):
         """Loads a ORC file stream, returning the result as a :class:`DataFrame`.
 
         .. note:: Evolving.
 
+        :param mergeSchema: sets whether we should merge schemas collected from all
+                            ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
+                            The default value is specified in ``spark.sql.orc.mergeSchema``.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+            disables `partition discovery`_.
+
         >>> orc_sdf = spark.readStream.schema(sdf_schema).orc(tempfile.mkdtemp())
         >>> orc_sdf.isStreaming
         True
         >>> orc_sdf.schema == sdf_schema
         True
         """
+        self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter,
+                       recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             return self._df(self._jreader.orc(path))
         else:
             raise TypeError("path can be only a single string")
 
     @since(2.0)
-    def parquet(self, path):
-        """Loads a Parquet file stream, returning the result as a :class:`DataFrame`.
-
-        You can set the following Parquet-specific option(s) for reading Parquet files:
-            * ``mergeSchema``: sets whether we should merge schemas collected from all \
-                Parquet part-files. This will override ``spark.sql.parquet.mergeSchema``. \
-                The default value is specified in ``spark.sql.parquet.mergeSchema``.
+    def parquet(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None):
+        """
+        Loads a Parquet file stream, returning the result as a :class:`DataFrame`.
 
         .. note:: Evolving.
 
+        :param mergeSchema: sets whether we should merge schemas collected from all
+                            Parquet part-files. This will override
+                            ``spark.sql.parquet.mergeSchema``. The default value is specified in
+                            ``spark.sql.parquet.mergeSchema``.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+                                    disables `partition discovery`_.
+
         >>> parquet_sdf = spark.readStream.schema(sdf_schema).parquet(tempfile.mkdtemp())
         >>> parquet_sdf.isStreaming
         True
         >>> parquet_sdf.schema == sdf_schema
         True
         """
+        self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter,
+                       recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             return self._df(self._jreader.parquet(path))
         else:
@@ -549,7 +571,8 @@ def parquet(self, path):
 
     @ignore_unicode_prefix
     @since(2.0)
-    def text(self, path, wholetext=False, lineSep=None):
+    def text(self, path, wholetext=False, lineSep=None, pathGlobFilter=None,
+             recursiveFileLookup=None):
         """
         Loads a text file stream and returns a :class:`DataFrame` whose schema starts with a
         string column named "value", and followed by partitioned columns if there
@@ -564,6 +587,11 @@ def text(self, path, wholetext=False, lineSep=None):
         :param wholetext: if true, read each file from input path(s) as a single row.
         :param lineSep: defines the line separator that should be used for parsing. If None is
                         set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+                                    disables `partition discovery`_.
 
         >>> text_sdf = spark.readStream.text(tempfile.mkdtemp())
         >>> text_sdf.isStreaming
@@ -571,7 +599,9 @@ def text(self, path, wholetext=False, lineSep=None):
         >>> "value" in str(text_sdf.schema)
         True
         """
-        self._set_opts(wholetext=wholetext, lineSep=lineSep)
+        self._set_opts(
+            wholetext=wholetext, lineSep=lineSep, pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             return self._df(self._jreader.text(path))
         else:
@@ -584,7 +614,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             negativeInf=None, dateFormat=None, timestampFormat=None, maxColumns=None,
             maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
             columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None,
-            enforceSchema=None, emptyValue=None, locale=None, lineSep=None):
+            enforceSchema=None, emptyValue=None, locale=None, lineSep=None,
+            pathGlobFilter=None, recursiveFileLookup=None):
         r"""Loads a CSV file stream and returns the result as a :class:`DataFrame`.
 
         This function will go through the input once to determine the input schema if
@@ -596,8 +627,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         :param path: string, or list of strings, for input path(s).
         :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
                        or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param sep: sets a single character as a separator for each field and value.
-                    If None is set, it uses the default value, ``,``.
+        :param sep: sets a separator (one or more characters) for each field and value. If None is
+                    set, it uses the default value, ``,``.
         :param encoding: decodes the CSV files by the given encoding type. If None is set,
                          it uses the default value, ``UTF-8``.
         :param quote: sets a single character used for escaping quoted values where the
@@ -687,6 +718,11 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         :param lineSep: defines the line separator that should be used for parsing. If None is
                         set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
                         Maximum length is 1 character.
+        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
+                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+                               It does not change the behavior of `partition discovery`_.
+        :param recursiveFileLookup: recursively scan a directory for files. Using this option
+                                    disables `partition discovery`_.
 
         >>> csv_sdf = spark.readStream.csv(tempfile.mkdtemp(), schema = sdf_schema)
         >>> csv_sdf.isStreaming
@@ -704,7 +740,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             maxMalformedLogPerPartition=maxMalformedLogPerPartition, mode=mode,
             columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine,
             charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, enforceSchema=enforceSchema,
-            emptyValue=emptyValue, locale=locale, lineSep=lineSep)
+            emptyValue=emptyValue, locale=locale, lineSep=lineSep,
+            pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, basestring):
             return self._df(self._jreader.csv(path))
         else:
@@ -713,8 +750,9 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
 
 class DataStreamWriter(object):
     """
-    Interface used to write a streaming :class:`DataFrame` to external storage systems
-    (e.g. file systems, key-value stores, etc). Use :func:`DataFrame.writeStream`
+    Interface used to write a streaming :class:`DataFrame <pyspark.sql.DataFrame>` to external
+    storage systems (e.g. file systems, key-value stores, etc).
+    Use :attr:`DataFrame.writeStream <pyspark.sql.DataFrame.writeStream>`
     to access this.
 
     .. note:: Evolving.
@@ -839,10 +877,13 @@ def trigger(self, processingTime=None, once=None, continuous=None):
         .. note:: Evolving.
 
         :param processingTime: a processing time interval as a string, e.g. '5 seconds', '1 minute'.
-                               Set a trigger that runs a query periodically based on the processing
-                               time. Only one trigger can be set.
+                               Set a trigger that runs a microbatch query periodically based on the
+                               processing time. Only one trigger can be set.
         :param once: if set to True, set a trigger that processes only one batch of data in a
                      streaming query then terminates the query. Only one trigger can be set.
+        :param continuous: a time interval as a string, e.g. '5 seconds', '1 minute'.
+                           Set a trigger that runs a continuous query with a given checkpoint
+                           interval. Only one trigger can be set.
 
         >>> # trigger the query for execution every 5 seconds
         >>> writer = sdf.writeStream.trigger(processingTime='5 seconds')
@@ -1062,7 +1103,7 @@ def foreachBatch(self, func):
         >>> def func(batch_df, batch_id):
         ...     batch_df.collect()
         ...
-        >>> writer = sdf.writeStream.foreach(func)
+        >>> writer = sdf.writeStream.foreachBatch(func)
         """
 
         from pyspark.java_gateway import ensure_callback_server_started
diff --git a/python/pyspark/sql/tests/test_appsubmit.py b/python/pyspark/sql/tests/test_appsubmit.py
deleted file mode 100644
index 99c0317cc45df..0000000000000
--- a/python/pyspark/sql/tests/test_appsubmit.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import subprocess
-import tempfile
-
-import py4j
-
-from pyspark import SparkContext
-from pyspark.tests.test_appsubmit import SparkSubmitTests
-
-
-class HiveSparkSubmitTests(SparkSubmitTests):
-
-    @classmethod
-    def setUpClass(cls):
-        # get a SparkContext to check for availability of Hive
-        sc = SparkContext('local[4]', cls.__name__)
-        cls.hive_available = True
-        try:
-            sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
-        except py4j.protocol.Py4JError:
-            cls.hive_available = False
-        except TypeError:
-            cls.hive_available = False
-        finally:
-            # we don't need this SparkContext for the test
-            sc.stop()
-
-    def setUp(self):
-        super(HiveSparkSubmitTests, self).setUp()
-        if not self.hive_available:
-            self.skipTest("Hive is not available.")
-
-    def test_hivecontext(self):
-        # This test checks that HiveContext is using Hive metastore (SPARK-16224).
-        # It sets a metastore url and checks if there is a derby dir created by
-        # Hive metastore. If this derby dir exists, HiveContext is using
-        # Hive metastore.
-        metastore_path = os.path.join(tempfile.mkdtemp(), "spark16224_metastore_db")
-        metastore_URL = "jdbc:derby:;databaseName=" + metastore_path + ";create=true"
-        hive_site_dir = os.path.join(self.programDir, "conf")
-        hive_site_file = self.createTempFile("hive-site.xml", ("""
-            |<configuration>
-            |  <property>
-            |  <name>javax.jdo.option.ConnectionURL</name>
-            |  <value>%s</value>
-            |  </property>
-            |</configuration>
-            """ % metastore_URL).lstrip(), "conf")
-        script = self.createTempFile("test.py", """
-            |import os
-            |
-            |from pyspark.conf import SparkConf
-            |from pyspark.context import SparkContext
-            |from pyspark.sql import HiveContext
-            |
-            |conf = SparkConf()
-            |sc = SparkContext(conf=conf)
-            |hive_context = HiveContext(sc)
-            |print(hive_context.sql("show databases").collect())
-            """)
-        proc = subprocess.Popen(
-            self.sparkSubmit + ["--master", "local-cluster[1,1,1024]",
-                                "--driver-class-path", hive_site_dir, script],
-            stdout=subprocess.PIPE)
-        out, err = proc.communicate()
-        self.assertEqual(0, proc.returncode)
-        self.assertIn("default", out.decode('utf-8'))
-        self.assertTrue(os.path.exists(metastore_path))
-
-
-if __name__ == "__main__":
-    import unittest
-    from pyspark.sql.tests.test_appsubmit import *
-
-    try:
-        import xmlrunner
-        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
-    except ImportError:
-        testRunner = None
-    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index f32513771cbcf..004c79f290213 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -163,22 +163,19 @@ def test_toPandas_arrow_toggle(self):
     def test_toPandas_respect_session_timezone(self):
         df = self.spark.createDataFrame(self.data, schema=self.schema)
 
-        timezone = "America/New_York"
-        with self.sql_conf({
-                "spark.sql.execution.pandas.respectSessionTimeZone": False,
-                "spark.sql.session.timeZone": timezone}):
+        timezone = "America/Los_Angeles"
+        with self.sql_conf({"spark.sql.session.timeZone": timezone}):
             pdf_la, pdf_arrow_la = self._toPandas_arrow_toggle(df)
             assert_frame_equal(pdf_arrow_la, pdf_la)
 
-        with self.sql_conf({
-                "spark.sql.execution.pandas.respectSessionTimeZone": True,
-                "spark.sql.session.timeZone": timezone}):
+        timezone = "America/New_York"
+        with self.sql_conf({"spark.sql.session.timeZone": timezone}):
             pdf_ny, pdf_arrow_ny = self._toPandas_arrow_toggle(df)
             assert_frame_equal(pdf_arrow_ny, pdf_ny)
 
             self.assertFalse(pdf_ny.equals(pdf_la))
 
-            from pyspark.sql.types import _check_series_convert_timestamps_local_tz
+            from pyspark.sql.pandas.types import _check_series_convert_timestamps_local_tz
             pdf_la_corrected = pdf_la.copy()
             for field in self.schema:
                 if isinstance(field.dataType, TimestampType):
@@ -234,18 +231,15 @@ def test_createDataFrame_toggle(self):
     def test_createDataFrame_respect_session_timezone(self):
         from datetime import timedelta
         pdf = self.create_pandas_data_frame()
-        timezone = "America/New_York"
-        with self.sql_conf({
-                "spark.sql.execution.pandas.respectSessionTimeZone": False,
-                "spark.sql.session.timeZone": timezone}):
+        timezone = "America/Los_Angeles"
+        with self.sql_conf({"spark.sql.session.timeZone": timezone}):
             df_no_arrow_la, df_arrow_la = self._createDataFrame_toggle(pdf, schema=self.schema)
             result_la = df_no_arrow_la.collect()
             result_arrow_la = df_arrow_la.collect()
             self.assertEqual(result_la, result_arrow_la)
 
-        with self.sql_conf({
-                "spark.sql.execution.pandas.respectSessionTimeZone": True,
-                "spark.sql.session.timeZone": timezone}):
+        timezone = "America/New_York"
+        with self.sql_conf({"spark.sql.session.timeZone": timezone}):
             df_no_arrow_ny, df_arrow_ny = self._createDataFrame_toggle(pdf, schema=self.schema)
             result_ny = df_no_arrow_ny.collect()
             result_arrow_ny = df_arrow_ny.collect()
@@ -303,15 +297,15 @@ def test_createDataFrame_does_not_modify_input(self):
         # Some series get converted for Spark to consume, this makes sure input is unchanged
         pdf = self.create_pandas_data_frame()
         # Use a nanosecond value to make sure it is not truncated
-        pdf.ix[0, '8_timestamp_t'] = pd.Timestamp(1)
+        pdf.iloc[0, 7] = pd.Timestamp(1)
         # Integers with nulls will get NaNs filled with 0 and will be casted
-        pdf.ix[1, '2_int_t'] = None
+        pdf.iloc[1, 1] = None
         pdf_copy = pdf.copy(deep=True)
         self.spark.createDataFrame(pdf, schema=self.schema)
         self.assertTrue(pdf.equals(pdf_copy))
 
     def test_schema_conversion_roundtrip(self):
-        from pyspark.sql.types import from_arrow_schema, to_arrow_schema
+        from pyspark.sql.pandas.types import from_arrow_schema, to_arrow_schema
         arrow_schema = to_arrow_schema(self.schema)
         schema_rt = from_arrow_schema(arrow_schema)
         self.assertEquals(self.schema, schema_rt)
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index f7f2164dcd350..d9d933110dab5 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -18,6 +18,8 @@
 
 import sys
 
+from py4j.protocol import Py4JJavaError
+
 from pyspark.sql import Column, Row
 from pyspark.sql.types import *
 from pyspark.sql.utils import AnalysisException
@@ -85,7 +87,7 @@ def test_column_operators(self):
                                 "Cannot apply 'in' operator against a column",
                                 lambda: 1 in cs)
 
-    def test_column_getitem(self):
+    def test_column_apply(self):
         from pyspark.sql.functions import col
 
         self.assertIsInstance(col("foo")[1:3], Column)
@@ -93,6 +95,16 @@ def test_column_getitem(self):
         self.assertIsInstance(col("foo")["bar"], Column)
         self.assertRaises(ValueError, lambda: col("foo")[0:10:2])
 
+    def test_column_getitem(self):
+        from pyspark.sql.functions import col, create_map, lit
+
+        map_col = create_map(lit(0), lit(100), lit(1), lit(200))
+        self.assertRaisesRegexp(
+            Py4JJavaError,
+            "Unsupported literal type class org.apache.spark.sql.Column id",
+            lambda: map_col.getItem(col('id'))
+        )
+
     def test_column_select(self):
         df = self.df
         self.assertEqual(self.testData, df.select("*").collect())
diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py
index b2df3921e62a9..d57ebc48cf5e6 100644
--- a/python/pyspark/sql/tests/test_context.py
+++ b/python/pyspark/sql/tests/test_context.py
@@ -27,7 +27,7 @@
 
 import py4j
 
-from pyspark import HiveContext, Row
+from pyspark.sql import Row, SparkSession
 from pyspark.sql.types import *
 from pyspark.sql.window import Window
 from pyspark.testing.utils import ReusedPySparkTestCase
@@ -40,15 +40,20 @@ def setUpClass(cls):
         ReusedPySparkTestCase.setUpClass()
         cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
         cls.hive_available = True
+        cls.spark = None
         try:
             cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
         except py4j.protocol.Py4JError:
+            cls.tearDownClass()
             cls.hive_available = False
         except TypeError:
+            cls.tearDownClass()
             cls.hive_available = False
+        if cls.hive_available:
+            cls.spark = SparkSession.builder.enableHiveSupport().getOrCreate()
+
         os.unlink(cls.tempdir.name)
         if cls.hive_available:
-            cls.spark = HiveContext._createForTesting(cls.sc)
             cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
             cls.df = cls.sc.parallelize(cls.testData).toDF()
 
@@ -60,13 +65,16 @@ def setUp(self):
     def tearDownClass(cls):
         ReusedPySparkTestCase.tearDownClass()
         shutil.rmtree(cls.tempdir.name, ignore_errors=True)
+        if cls.spark is not None:
+            cls.spark.stop()
+            cls.spark = None
 
     def test_save_and_load_table(self):
         df = self.df
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
         df.write.saveAsTable("savedJsonTable", "json", "append", path=tmpPath)
-        actual = self.spark.createExternalTable("externalJsonTable", tmpPath, "json")
+        actual = self.spark.catalog.createTable("externalJsonTable", tmpPath, "json")
         self.assertEqual(sorted(df.collect()),
                          sorted(self.spark.sql("SELECT * FROM savedJsonTable").collect()))
         self.assertEqual(sorted(df.collect()),
@@ -76,7 +84,7 @@ def test_save_and_load_table(self):
 
         df.write.saveAsTable("savedJsonTable", "json", "overwrite", path=tmpPath)
         schema = StructType([StructField("value", StringType(), True)])
-        actual = self.spark.createExternalTable("externalJsonTable", source="json",
+        actual = self.spark.catalog.createTable("externalJsonTable", source="json",
                                                 schema=schema, path=tmpPath,
                                                 noUse="this options will not be used")
         self.assertEqual(sorted(df.collect()),
@@ -87,11 +95,11 @@ def test_save_and_load_table(self):
         self.spark.sql("DROP TABLE savedJsonTable")
         self.spark.sql("DROP TABLE externalJsonTable")
 
-        defaultDataSourceName = self.spark.getConf("spark.sql.sources.default",
-                                                   "org.apache.spark.sql.parquet")
+        defaultDataSourceName = self.spark.conf.get("spark.sql.sources.default",
+                                                    "org.apache.spark.sql.parquet")
         self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
         df.write.saveAsTable("savedJsonTable", path=tmpPath, mode="overwrite")
-        actual = self.spark.createExternalTable("externalJsonTable", path=tmpPath)
+        actual = self.spark.catalog.createTable("externalJsonTable", path=tmpPath)
         self.assertEqual(sorted(df.collect()),
                          sorted(self.spark.sql("SELECT * FROM savedJsonTable").collect()))
         self.assertEqual(sorted(df.collect()),
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index bc4ee8814f7af..d738449799bda 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -547,6 +547,76 @@ def test_to_pandas_avoid_astype(self):
         self.assertEquals(types[1], np.object)
         self.assertEquals(types[2], np.float64)
 
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    def test_to_pandas_from_empty_dataframe(self):
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            # SPARK-29188 test that toPandas() on an empty dataframe has the correct dtypes
+            import numpy as np
+            sql = """
+            SELECT CAST(1 AS TINYINT) AS tinyint,
+            CAST(1 AS SMALLINT) AS smallint,
+            CAST(1 AS INT) AS int,
+            CAST(1 AS BIGINT) AS bigint,
+            CAST(0 AS FLOAT) AS float,
+            CAST(0 AS DOUBLE) AS double,
+            CAST(1 AS BOOLEAN) AS boolean,
+            CAST('foo' AS STRING) AS string,
+            CAST('2019-01-01' AS TIMESTAMP) AS timestamp
+            """
+            dtypes_when_nonempty_df = self.spark.sql(sql).toPandas().dtypes
+            dtypes_when_empty_df = self.spark.sql(sql).filter("False").toPandas().dtypes
+            self.assertTrue(np.all(dtypes_when_empty_df == dtypes_when_nonempty_df))
+
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    def test_to_pandas_from_null_dataframe(self):
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            # SPARK-29188 test that toPandas() on a dataframe with only nulls has correct dtypes
+            import numpy as np
+            sql = """
+            SELECT CAST(NULL AS TINYINT) AS tinyint,
+            CAST(NULL AS SMALLINT) AS smallint,
+            CAST(NULL AS INT) AS int,
+            CAST(NULL AS BIGINT) AS bigint,
+            CAST(NULL AS FLOAT) AS float,
+            CAST(NULL AS DOUBLE) AS double,
+            CAST(NULL AS BOOLEAN) AS boolean,
+            CAST(NULL AS STRING) AS string,
+            CAST(NULL AS TIMESTAMP) AS timestamp
+            """
+            pdf = self.spark.sql(sql).toPandas()
+            types = pdf.dtypes
+            self.assertEqual(types[0], np.float64)
+            self.assertEqual(types[1], np.float64)
+            self.assertEqual(types[2], np.float64)
+            self.assertEqual(types[3], np.float64)
+            self.assertEqual(types[4], np.float32)
+            self.assertEqual(types[5], np.float64)
+            self.assertEqual(types[6], np.object)
+            self.assertEqual(types[7], np.object)
+            self.assertTrue(np.can_cast(np.datetime64, types[8]))
+
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    def test_to_pandas_from_mixed_dataframe(self):
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            # SPARK-29188 test that toPandas() on a dataframe with some nulls has correct dtypes
+            import numpy as np
+            sql = """
+            SELECT CAST(col1 AS TINYINT) AS tinyint,
+            CAST(col2 AS SMALLINT) AS smallint,
+            CAST(col3 AS INT) AS int,
+            CAST(col4 AS BIGINT) AS bigint,
+            CAST(col5 AS FLOAT) AS float,
+            CAST(col6 AS DOUBLE) AS double,
+            CAST(col7 AS BOOLEAN) AS boolean,
+            CAST(col8 AS STRING) AS string,
+            CAST(col9 AS TIMESTAMP) AS timestamp
+            FROM VALUES (1, 1, 1, 1, 1, 1, 1, 1, 1),
+                        (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+            """
+            pdf_with_some_nulls = self.spark.sql(sql).toPandas()
+            pdf_with_only_nulls = self.spark.sql(sql).filter('tinyint is null').toPandas()
+            self.assertTrue(np.all(pdf_with_only_nulls.dtypes == pdf_with_some_nulls.dtypes))
+
     def test_create_dataframe_from_array_of_long(self):
         import array
         data = [Row(longarray=array.array('l', [-9223372036854775808, 0, 9223372036854775807]))]
@@ -690,6 +760,12 @@ def test_to_local_iterator(self):
         expected = df.collect()
         self.assertEqual(expected, list(it))
 
+    def test_to_local_iterator_prefetch(self):
+        df = self.spark.range(8, numPartitions=4)
+        expected = df.collect()
+        it = df.toLocalIterator(prefetchPartitions=True)
+        self.assertEqual(expected, list(it))
+
     def test_to_local_iterator_not_fully_consumed(self):
         # SPARK-23961: toLocalIterator throws exception when not fully consumed
         # Create a DataFrame large enough so that write to socket will eventually block
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 36e1e8a660f00..fa9ee57ff5f90 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -310,6 +310,33 @@ def test_input_file_name_udf(self):
         file_name = df.collect()[0].file
         self.assertTrue("python/test_support/hello/hello.txt" in file_name)
 
+    def test_overlay(self):
+        from pyspark.sql.functions import col, lit, overlay
+        from itertools import chain
+        import re
+
+        actual = list(chain.from_iterable([
+            re.findall("(overlay\\(.*\\))", str(x)) for x in [
+                overlay(col("foo"), col("bar"), 1),
+                overlay("x", "y", 3),
+                overlay(col("x"), col("y"), 1, 3),
+                overlay("x", "y", 2, 5),
+                overlay("x", "y", lit(11)),
+                overlay("x", "y", lit(2), lit(5)),
+            ]
+        ]))
+
+        expected = [
+            "overlay(foo, bar, 1, -1)",
+            "overlay(x, y, 3, -1)",
+            "overlay(x, y, 1, 3)",
+            "overlay(x, y, 2, 5)",
+            "overlay(x, y, 11, -1)",
+            "overlay(x, y, 2, 5)",
+        ]
+
+        self.assertListEqual(actual, expected)
+
 
 if __name__ == "__main__":
     import unittest
diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
new file mode 100644
index 0000000000000..3ed9d2ac62fd3
--- /dev/null
+++ b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
@@ -0,0 +1,244 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+import sys
+
+from pyspark.sql.functions import array, explode, col, lit, udf, sum, pandas_udf, PandasUDFType
+from pyspark.sql.types import DoubleType, StructType, StructField
+from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
+    pandas_requirement_message, pyarrow_requirement_message
+from pyspark.testing.utils import QuietTest
+
+if have_pandas:
+    import pandas as pd
+    from pandas.util.testing import assert_frame_equal, assert_series_equal
+
+if have_pyarrow:
+    import pyarrow as pa
+
+
+# Tests below use pd.DataFrame.assign that will infer mixed types (unicode/str) for column names
+# From kwargs w/ Python 2, so need to set check_column_type=False and avoid this check
+_check_column_type = sys.version >= '3'
+
+
+@unittest.skipIf(
+    not have_pandas or not have_pyarrow,
+    pandas_requirement_message or pyarrow_requirement_message)
+class CogroupedMapInPandasTests(ReusedSQLTestCase):
+
+    @property
+    def data1(self):
+        return self.spark.range(10).toDF('id') \
+            .withColumn("ks", array([lit(i) for i in range(20, 30)])) \
+            .withColumn("k", explode(col('ks')))\
+            .withColumn("v", col('k') * 10)\
+            .drop('ks')
+
+    @property
+    def data2(self):
+        return self.spark.range(10).toDF('id') \
+            .withColumn("ks", array([lit(i) for i in range(20, 30)])) \
+            .withColumn("k", explode(col('ks'))) \
+            .withColumn("v2", col('k') * 100) \
+            .drop('ks')
+
+    def test_simple(self):
+        self._test_merge(self.data1, self.data2)
+
+    def test_left_group_empty(self):
+        left = self.data1.where(col("id") % 2 == 0)
+        self._test_merge(left, self.data2)
+
+    def test_right_group_empty(self):
+        right = self.data2.where(col("id") % 2 == 0)
+        self._test_merge(self.data1, right)
+
+    def test_different_schemas(self):
+        right = self.data2.withColumn('v3', lit('a'))
+        self._test_merge(self.data1, right, 'id long, k int, v int, v2 int, v3 string')
+
+    def test_complex_group_by(self):
+        left = pd.DataFrame.from_dict({
+            'id': [1, 2, 3],
+            'k':  [5, 6, 7],
+            'v': [9, 10, 11]
+        })
+
+        right = pd.DataFrame.from_dict({
+            'id': [11, 12, 13],
+            'k': [5, 6, 7],
+            'v2': [90, 100, 110]
+        })
+
+        left_gdf = self.spark\
+            .createDataFrame(left)\
+            .groupby(col('id') % 2 == 0)
+
+        right_gdf = self.spark \
+            .createDataFrame(right) \
+            .groupby(col('id') % 2 == 0)
+
+        def merge_pandas(l, r):
+            return pd.merge(l[['k', 'v']], r[['k', 'v2']], on=['k'])
+
+        result = left_gdf \
+            .cogroup(right_gdf) \
+            .applyInPandas(merge_pandas, 'k long, v long, v2 long') \
+            .sort(['k']) \
+            .toPandas()
+
+        expected = pd.DataFrame.from_dict({
+            'k': [5, 6, 7],
+            'v': [9, 10, 11],
+            'v2': [90, 100, 110]
+        })
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+    def test_empty_group_by(self):
+        left = self.data1
+        right = self.data2
+
+        def merge_pandas(l, r):
+            return pd.merge(l, r, on=['id', 'k'])
+
+        result = left.groupby().cogroup(right.groupby())\
+            .applyInPandas(merge_pandas, 'id long, k int, v int, v2 int') \
+            .sort(['id', 'k']) \
+            .toPandas()
+
+        left = left.toPandas()
+        right = right.toPandas()
+
+        expected = pd \
+            .merge(left, right, on=['id', 'k']) \
+            .sort_values(by=['id', 'k'])
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+    def test_mixed_scalar_udfs_followed_by_cogrouby_apply(self):
+        df = self.spark.range(0, 10).toDF('v1')
+        df = df.withColumn('v2', udf(lambda x: x + 1, 'int')(df['v1'])) \
+            .withColumn('v3', pandas_udf(lambda x: x + 2, 'int')(df['v1']))
+
+        result = df.groupby().cogroup(df.groupby()) \
+            .applyInPandas(lambda x, y: pd.DataFrame([(x.sum().sum(), y.sum().sum())]),
+                           'sum1 int, sum2 int').collect()
+
+        self.assertEquals(result[0]['sum1'], 165)
+        self.assertEquals(result[0]['sum2'], 165)
+
+    def test_with_key_left(self):
+        self._test_with_key(self.data1, self.data1, isLeft=True)
+
+    def test_with_key_right(self):
+        self._test_with_key(self.data1, self.data1, isLeft=False)
+
+    def test_with_key_left_group_empty(self):
+        left = self.data1.where(col("id") % 2 == 0)
+        self._test_with_key(left, self.data1, isLeft=True)
+
+    def test_with_key_right_group_empty(self):
+        right = self.data1.where(col("id") % 2 == 0)
+        self._test_with_key(self.data1, right, isLeft=False)
+
+    def test_with_key_complex(self):
+
+        def left_assign_key(key, l, _):
+            return l.assign(key=key[0])
+
+        result = self.data1 \
+            .groupby(col('id') % 2 == 0)\
+            .cogroup(self.data2.groupby(col('id') % 2 == 0)) \
+            .applyInPandas(left_assign_key, 'id long, k int, v int, key boolean') \
+            .sort(['id', 'k']) \
+            .toPandas()
+
+        expected = self.data1.toPandas()
+        expected = expected.assign(key=expected.id % 2 == 0)
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+    def test_wrong_return_type(self):
+        # Test that we get a sensible exception invalid values passed to apply
+        left = self.data1
+        right = self.data2
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    NotImplementedError,
+                    'Invalid return type.*MapType'):
+                left.groupby('id').cogroup(right.groupby('id')).applyInPandas(
+                    lambda l, r: l, 'id long, v map<int, int>')
+
+    def test_wrong_args(self):
+        left = self.data1
+        right = self.data2
+        with self.assertRaisesRegexp(ValueError, 'Invalid function'):
+            left.groupby('id').cogroup(right.groupby('id')) \
+                .applyInPandas(lambda: 1, StructType([StructField("d", DoubleType())]))
+
+    @staticmethod
+    def _test_with_key(left, right, isLeft):
+
+        def right_assign_key(key, l, r):
+            return l.assign(key=key[0]) if isLeft else r.assign(key=key[0])
+
+        result = left \
+            .groupby('id') \
+            .cogroup(right.groupby('id')) \
+            .applyInPandas(right_assign_key, 'id long, k int, v int, key long') \
+            .toPandas()
+
+        expected = left.toPandas() if isLeft else right.toPandas()
+        expected = expected.assign(key=expected.id)
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+    @staticmethod
+    def _test_merge(left, right, output_schema='id long, k int, v int, v2 int'):
+
+        def merge_pandas(l, r):
+            return pd.merge(l, r, on=['id', 'k'])
+
+        result = left \
+            .groupby('id') \
+            .cogroup(right.groupby('id')) \
+            .applyInPandas(merge_pandas, output_schema)\
+            .sort(['id', 'k']) \
+            .toPandas()
+
+        left = left.toPandas()
+        right = right.toPandas()
+
+        expected = pd \
+            .merge(left, right, on=['id', 'k']) \
+            .sort_values(by=['id', 'k'])
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_pandas_cogrouped_map import *
+
+    try:
+        import xmlrunner
+        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py
similarity index 83%
rename from python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
rename to python/pyspark/sql/tests/test_pandas_grouped_map.py
index adbe2d103ade0..ff53a0c6f2cf2 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py
@@ -23,7 +23,8 @@
 from decimal import Decimal
 
 from pyspark.sql import Row
-from pyspark.sql.functions import array, explode, col, lit, udf, sum, pandas_udf, PandasUDFType
+from pyspark.sql.functions import array, explode, col, lit, udf, sum, pandas_udf, PandasUDFType, \
+    window
 from pyspark.sql.types import *
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
@@ -37,20 +38,15 @@
     import pyarrow as pa
 
 
-"""
-Tests below use pd.DataFrame.assign that will infer mixed types (unicode/str) for column names
-from kwargs w/ Python 2, so need to set check_column_type=False and avoid this check
-"""
-if sys.version < '3':
-    _check_column_type = False
-else:
-    _check_column_type = True
+# Tests below use pd.DataFrame.assign that will infer mixed types (unicode/str) for column names
+# from kwargs w/ Python 2, so need to set check_column_type=False and avoid this check
+_check_column_type = sys.version >= '3'
 
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
     pandas_requirement_message or pyarrow_requirement_message)
-class GroupedMapPandasUDFTests(ReusedSQLTestCase):
+class GroupedMapInPandasTests(ReusedSQLTestCase):
 
     @property
     def data(self):
@@ -254,7 +250,7 @@ def test_wrong_return_type(self):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
                     NotImplementedError,
-                    'Invalid returnType.*grouped map Pandas UDF.*MapType'):
+                    'Invalid return type.*grouped map Pandas UDF.*MapType'):
                 pandas_udf(
                     lambda pdf: pdf,
                     'id long, v map<int, int>',
@@ -282,7 +278,7 @@ def test_wrong_args(self):
                     pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR))
 
     def test_unsupported_types(self):
-        common_err_msg = 'Invalid returnType.*grouped map Pandas UDF.*'
+        common_err_msg = 'Invalid return type.*grouped map Pandas UDF.*'
         unsupported_types = [
             StructField('map', MapType(StringType(), IntegerType())),
             StructField('arr_ts', ArrayType(TimestampType())),
@@ -394,11 +390,11 @@ def rename_pdf(pdf, names):
         # Function returns a pdf with required column names, but order could be arbitrary using dict
         def change_col_order(pdf):
             # Constructing a DataFrame from a dict should result in the same order,
-            # but use from_items to ensure the pdf column order is different than schema
-            return pd.DataFrame.from_items([
+            # but use OrderedDict to ensure the pdf column order is different than schema
+            return pd.DataFrame.from_dict(OrderedDict([
                 ('id', pdf.id),
                 ('u', pdf.v * 2),
-                ('v', pdf.v)])
+                ('v', pdf.v)]))
 
         ordered_udf = pandas_udf(
             change_col_order,
@@ -516,9 +512,84 @@ def test_grouped_with_empty_partition(self):
         result = df.groupBy('id').apply(f).collect()
         self.assertEqual(result, expected)
 
+    def test_grouped_over_window(self):
+
+        data = [(0, 1, "2018-03-10T00:00:00+00:00", [0]),
+                (1, 2, "2018-03-11T00:00:00+00:00", [0]),
+                (2, 2, "2018-03-12T00:00:00+00:00", [0]),
+                (3, 3, "2018-03-15T00:00:00+00:00", [0]),
+                (4, 3, "2018-03-16T00:00:00+00:00", [0]),
+                (5, 3, "2018-03-17T00:00:00+00:00", [0]),
+                (6, 3, "2018-03-21T00:00:00+00:00", [0])]
+
+        expected = {0: [0],
+                    1: [1, 2],
+                    2: [1, 2],
+                    3: [3, 4, 5],
+                    4: [3, 4, 5],
+                    5: [3, 4, 5],
+                    6: [6]}
+
+        df = self.spark.createDataFrame(data, ['id', 'group', 'ts', 'result'])
+        df = df.select(col('id'), col('group'), col('ts').cast('timestamp'), col('result'))
+
+        def f(pdf):
+            # Assign each result element the ids of the windowed group
+            pdf['result'] = [pdf['id']] * len(pdf)
+            return pdf
+
+        result = df.groupby('group', window('ts', '5 days')).applyInPandas(f, df.schema)\
+            .select('id', 'result').collect()
+        for r in result:
+            self.assertListEqual(expected[r[0]], r[1])
+
+    def test_grouped_over_window_with_key(self):
+
+        data = [(0, 1, "2018-03-10T00:00:00+00:00", False),
+                (1, 2, "2018-03-11T00:00:00+00:00", False),
+                (2, 2, "2018-03-12T00:00:00+00:00", False),
+                (3, 3, "2018-03-15T00:00:00+00:00", False),
+                (4, 3, "2018-03-16T00:00:00+00:00", False),
+                (5, 3, "2018-03-17T00:00:00+00:00", False),
+                (6, 3, "2018-03-21T00:00:00+00:00", False)]
+
+        expected_window = [
+            {'start': datetime.datetime(2018, 3, 10, 0, 0),
+             'end': datetime.datetime(2018, 3, 15, 0, 0)},
+            {'start': datetime.datetime(2018, 3, 15, 0, 0),
+             'end': datetime.datetime(2018, 3, 20, 0, 0)},
+            {'start': datetime.datetime(2018, 3, 20, 0, 0),
+             'end': datetime.datetime(2018, 3, 25, 0, 0)},
+        ]
+
+        expected = {0: (1, expected_window[0]),
+                    1: (2, expected_window[0]),
+                    2: (2, expected_window[0]),
+                    3: (3, expected_window[1]),
+                    4: (3, expected_window[1]),
+                    5: (3, expected_window[1]),
+                    6: (3, expected_window[2])}
+
+        df = self.spark.createDataFrame(data, ['id', 'group', 'ts', 'result'])
+        df = df.select(col('id'), col('group'), col('ts').cast('timestamp'), col('result'))
+
+        @pandas_udf(df.schema, PandasUDFType.GROUPED_MAP)
+        def f(key, pdf):
+            group = key[0]
+            window_range = key[1]
+            # Result will be True if group and window range equal to expected
+            is_expected = pdf.id.apply(lambda id: (expected[id][0] == group and
+                                                   expected[id][1] == window_range))
+            return pdf.assign(result=is_expected)
+
+        result = df.groupby('group', window('ts', '5 days')).apply(f).select('result').collect()
+
+        # Check that all group and window_range values from udf matched expected
+        self.assertTrue(all([r[0] for r in result]))
+
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_udf_grouped_map import *
+    from pyspark.sql.tests.test_pandas_grouped_map import *
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/test_pandas_udf_iter.py b/python/pyspark/sql/tests/test_pandas_map.py
similarity index 82%
rename from python/pyspark/sql/tests/test_pandas_udf_iter.py
rename to python/pyspark/sql/tests/test_pandas_map.py
index 2a5709e0407ff..f1956a2523e48 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_iter.py
+++ b/python/pyspark/sql/tests/test_pandas_map.py
@@ -33,7 +33,7 @@
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
     pandas_requirement_message or pyarrow_requirement_message)
-class ScalarPandasIterUDFTests(ReusedSQLTestCase):
+class MapInPandasTests(ReusedSQLTestCase):
 
     @classmethod
     def setUpClass(cls):
@@ -57,7 +57,6 @@ def tearDownClass(cls):
         ReusedSQLTestCase.tearDownClass()
 
     def test_map_partitions_in_pandas(self):
-        @pandas_udf('id long', PandasUDFType.MAP_ITER)
         def func(iterator):
             for pdf in iterator:
                 assert isinstance(pdf, pd.DataFrame)
@@ -65,7 +64,7 @@ def func(iterator):
                 yield pdf
 
         df = self.spark.range(10)
-        actual = df.mapInPandas(func).collect()
+        actual = df.mapInPandas(func, 'id long').collect()
         expected = df.collect()
         self.assertEquals(actual, expected)
 
@@ -73,45 +72,40 @@ def test_multiple_columns(self):
         data = [(1, "foo"), (2, None), (3, "bar"), (4, "bar")]
         df = self.spark.createDataFrame(data, "a int, b string")
 
-        @pandas_udf(df.schema, PandasUDFType.MAP_ITER)
         def func(iterator):
             for pdf in iterator:
                 assert isinstance(pdf, pd.DataFrame)
                 assert [d.name for d in list(pdf.dtypes)] == ['int32', 'object']
                 yield pdf
 
-        actual = df.mapInPandas(func).collect()
+        actual = df.mapInPandas(func, df.schema).collect()
         expected = df.collect()
         self.assertEquals(actual, expected)
 
     def test_different_output_length(self):
-        @pandas_udf('a long', PandasUDFType.MAP_ITER)
         def func(iterator):
             for _ in iterator:
                 yield pd.DataFrame({'a': list(range(100))})
 
         df = self.spark.range(10)
-        actual = df.repartition(1).mapInPandas(func).collect()
+        actual = df.repartition(1).mapInPandas(func, 'a long').collect()
         self.assertEquals(set((r.a for r in actual)), set(range(100)))
 
     def test_empty_iterator(self):
-        @pandas_udf('a int, b string', PandasUDFType.MAP_ITER)
         def empty_iter(_):
             return iter([])
 
         self.assertEqual(
-            self.spark.range(10).mapInPandas(empty_iter).count(), 0)
+            self.spark.range(10).mapInPandas(empty_iter, 'a int, b string').count(), 0)
 
     def test_empty_rows(self):
-        @pandas_udf('a int', PandasUDFType.MAP_ITER)
         def empty_rows(_):
             return iter([pd.DataFrame({'a': []})])
 
         self.assertEqual(
-            self.spark.range(10).mapInPandas(empty_rows).count(), 0)
+            self.spark.range(10).mapInPandas(empty_rows, 'a int').count(), 0)
 
     def test_chain_map_partitions_in_pandas(self):
-        @pandas_udf('id long', PandasUDFType.MAP_ITER)
         def func(iterator):
             for pdf in iterator:
                 assert isinstance(pdf, pd.DataFrame)
@@ -119,13 +113,13 @@ def func(iterator):
                 yield pdf
 
         df = self.spark.range(10)
-        actual = df.mapInPandas(func).mapInPandas(func).collect()
+        actual = df.mapInPandas(func, 'id long').mapInPandas(func, 'id long').collect()
         expected = df.collect()
         self.assertEquals(actual, expected)
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_udf_iter import *
+    from pyspark.sql.tests.test_pandas_map import *
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py
index e16e7b2b2e92e..94801a16ce7cd 100644
--- a/python/pyspark/sql/tests/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/test_pandas_udf.py
@@ -116,11 +116,11 @@ def test_udf_wrong_arg(self):
                 @pandas_udf('blah')
                 def foo(x):
                     return x
-            with self.assertRaisesRegexp(ValueError, 'Invalid returnType.*None'):
+            with self.assertRaisesRegexp(ValueError, 'Invalid return type.*None'):
                 @pandas_udf(functionType=PandasUDFType.SCALAR)
                 def foo(x):
                     return x
-            with self.assertRaisesRegexp(ValueError, 'Invalid functionType'):
+            with self.assertRaisesRegexp(ValueError, 'Invalid function'):
                 @pandas_udf('double', 100)
                 def foo(x):
                     return x
@@ -132,11 +132,11 @@ def foo(x):
                 def zero_with_type():
                     return 1
 
-            with self.assertRaisesRegexp(TypeError, 'Invalid returnType'):
+            with self.assertRaisesRegexp(TypeError, 'Invalid return type'):
                 @pandas_udf(returnType=PandasUDFType.GROUPED_MAP)
                 def foo(df):
                     return df
-            with self.assertRaisesRegexp(TypeError, 'Invalid returnType'):
+            with self.assertRaisesRegexp(TypeError, 'Invalid return type'):
                 @pandas_udf(returnType='double', functionType=PandasUDFType.GROUPED_MAP)
                 def foo(df):
                     return df
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
index 6d460df66da28..21679785a769e 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
@@ -319,16 +319,18 @@ def test_complex_groupby(self):
         expected4 = df.groupby(plus_one(df.id)).agg(sum(df.v))
 
         # groupby one scalar pandas UDF
-        result5 = df.groupby(plus_two(df.id)).agg(sum_udf(df.v))
-        expected5 = df.groupby(plus_two(df.id)).agg(sum(df.v))
+        result5 = df.groupby(plus_two(df.id)).agg(sum_udf(df.v)).sort('sum(v)')
+        expected5 = df.groupby(plus_two(df.id)).agg(sum(df.v)).sort('sum(v)')
 
         # groupby one expression and one python UDF
         result6 = df.groupby(df.v % 2, plus_one(df.id)).agg(sum_udf(df.v))
         expected6 = df.groupby(df.v % 2, plus_one(df.id)).agg(sum(df.v))
 
         # groupby one expression and one scalar pandas UDF
-        result7 = df.groupby(df.v % 2, plus_two(df.id)).agg(sum_udf(df.v)).sort('sum(v)')
-        expected7 = df.groupby(df.v % 2, plus_two(df.id)).agg(sum(df.v)).sort('sum(v)')
+        result7 = (df.groupby(df.v % 2, plus_two(df.id))
+                   .agg(sum_udf(df.v)).sort(['sum(v)', 'plus_two(id)']))
+        expected7 = (df.groupby(df.v % 2, plus_two(df.id))
+                     .agg(sum(df.v)).sort(['sum(v)', 'plus_two(id)']))
 
         assert_frame_equal(expected1.toPandas(), result1.toPandas())
         assert_frame_equal(expected2.toPandas(), result2.toPandas())
@@ -354,8 +356,8 @@ def test_complex_expressions(self):
                         sum_udf(col('v2')) + 5,
                         plus_one(sum_udf(col('v1'))),
                         sum_udf(plus_one(col('v2'))))
-                   .sort('id')
-                   .toPandas())
+                   .sort(['id', '(v % 2)'])
+                   .toPandas().sort_values(by=['id', '(v % 2)']))
 
         expected1 = (df.withColumn('v1', df.v + 1)
                      .withColumn('v2', df.v + 2)
@@ -365,8 +367,8 @@ def test_complex_expressions(self):
                           sum(col('v2')) + 5,
                           plus_one(sum(col('v1'))),
                           sum(plus_one(col('v2'))))
-                     .sort('id')
-                     .toPandas())
+                     .sort(['id', '(v % 2)'])
+                     .toPandas().sort_values(by=['id', '(v % 2)']))
 
         # Test complex expressions with sql expression, scala pandas UDF and
         # group aggregate pandas UDF
@@ -378,8 +380,8 @@ def test_complex_expressions(self):
                         sum_udf(col('v2')) + 5,
                         plus_two(sum_udf(col('v1'))),
                         sum_udf(plus_two(col('v2'))))
-                   .sort('id')
-                   .toPandas())
+                   .sort(['id', '(v % 2)'])
+                   .toPandas().sort_values(by=['id', '(v % 2)']))
 
         expected2 = (df.withColumn('v1', df.v + 1)
                      .withColumn('v2', df.v + 2)
@@ -389,8 +391,8 @@ def test_complex_expressions(self):
                           sum(col('v2')) + 5,
                           plus_two(sum(col('v1'))),
                           sum(plus_two(col('v2'))))
-                     .sort('id')
-                     .toPandas())
+                     .sort(['id', '(v % 2)'])
+                     .toPandas().sort_values(by=['id', '(v % 2)']))
 
         # Test sequential groupby aggregate
         result3 = (df.groupby('id')
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index d254508e5d35b..b07de3c0b3cc9 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -381,7 +381,7 @@ def test_vectorized_udf_nested_struct(self):
             with QuietTest(self.sc):
                 with self.assertRaisesRegexp(
                         Exception,
-                        'Invalid returnType with scalar Pandas UDFs'):
+                        'Invalid return type with scalar Pandas UDFs'):
                     pandas_udf(lambda x: x, returnType=nested_type, functionType=udf_type)
 
     def test_vectorized_udf_complex(self):
@@ -509,7 +509,7 @@ def test_vectorized_udf_wrong_return_type(self):
             for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
                 with self.assertRaisesRegexp(
                         NotImplementedError,
-                        'Invalid returnType.*scalar Pandas UDF.*MapType'):
+                        'Invalid return type.*scalar Pandas UDF.*MapType'):
                     pandas_udf(lambda x: x, MapType(LongType(), LongType()), udf_type)
 
     def test_vectorized_udf_return_scalar(self):
@@ -582,11 +582,11 @@ def test_vectorized_udf_unsupported_types(self):
             for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
                 with self.assertRaisesRegexp(
                         NotImplementedError,
-                        'Invalid returnType.*scalar Pandas UDF.*MapType'):
+                        'Invalid return type.*scalar Pandas UDF.*MapType'):
                     pandas_udf(lambda x: x, MapType(StringType(), IntegerType()), udf_type)
                 with self.assertRaisesRegexp(
                         NotImplementedError,
-                        'Invalid returnType.*scalar Pandas UDF.*ArrayType.StructType'):
+                        'Invalid return type.*scalar Pandas UDF.*ArrayType.StructType'):
                     pandas_udf(lambda x: x,
                                ArrayType(StructType([StructField('a', IntegerType())])), udf_type)
 
@@ -745,10 +745,8 @@ def iter_internal_value(it):
         for internal_value, udf_type in [(scalar_internal_value, PandasUDFType.SCALAR),
                                          (iter_internal_value, PandasUDFType.SCALAR_ITER)]:
             f_timestamp_copy = pandas_udf(lambda ts: ts, TimestampType(), udf_type)
-            timezone = "America/New_York"
-            with self.sql_conf({
-                    "spark.sql.execution.pandas.respectSessionTimeZone": False,
-                    "spark.sql.session.timeZone": timezone}):
+            timezone = "America/Los_Angeles"
+            with self.sql_conf({"spark.sql.session.timeZone": timezone}):
                 df_la = df.withColumn("tscopy", f_timestamp_copy(col("timestamp"))) \
                     .withColumn("internal_value", internal_value(col("timestamp")))
                 result_la = df_la.select(col("idx"), col("internal_value")).collect()
@@ -757,9 +755,8 @@ def iter_internal_value(it):
                 result_la_corrected = \
                     df_la.select(col("idx"), col("tscopy"), col("internal_value") + diff).collect()
 
-            with self.sql_conf({
-                    "spark.sql.execution.pandas.respectSessionTimeZone": True,
-                    "spark.sql.session.timeZone": timezone}):
+            timezone = "America/New_York"
+            with self.sql_conf({"spark.sql.session.timeZone": timezone}):
                 df_ny = df.withColumn("tscopy", f_timestamp_copy(col("timestamp"))) \
                     .withColumn("internal_value", internal_value(col("timestamp")))
                 result_ny = df_ny.select(col("idx"), col("tscopy"), col("internal_value")).collect()
@@ -871,7 +868,7 @@ def test_close(batch_iter):
 
             with QuietTest(self.sc):
                 with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": 1,
-                                    "spark.sql.pandas.udf.buffer.size": 4}):
+                                    "spark.sql.execution.pandas.udf.buffer.size": 4}):
                     self.spark.range(10).repartition(1) \
                         .select(test_close(col("id"))).limit(2).collect()
                     # wait here because python udf worker will take some time to detect
@@ -1110,7 +1107,7 @@ def test_datasource_with_udf(self):
                 .format("org.apache.spark.sql.sources.SimpleScanSource") \
                 .option('from', 0).option('to', 1).load().toDF('i')
             datasource_v2_df = self.spark.read \
-                .format("org.apache.spark.sql.sources.v2.SimpleDataSourceV2") \
+                .format("org.apache.spark.sql.connector.SimpleDataSourceV2") \
                 .load().toDF('i', 'j')
 
             c1 = pandas_udf(lambda x: x + 1, 'int')(lit(1))
diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
new file mode 100644
index 0000000000000..7c83c78f108d9
--- /dev/null
+++ b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
@@ -0,0 +1,273 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import sys
+import unittest
+import inspect
+
+from pyspark.sql.functions import mean, lit
+from pyspark.testing.sqlutils import ReusedSQLTestCase, \
+    have_pandas, have_pyarrow, pandas_requirement_message, \
+    pyarrow_requirement_message
+from pyspark.sql.pandas.typehints import infer_eval_type
+from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType
+
+if have_pandas:
+    import pandas as pd
+    from pandas.util.testing import assert_frame_equal
+
+python_requirement_message = "pandas UDF with type hints are supported with Python 3.6+."
+
+
+@unittest.skipIf(
+    not have_pandas or not have_pyarrow or sys.version_info[:2] < (3, 6),
+    pandas_requirement_message or pyarrow_requirement_message or python_requirement_message)
+class PandasUDFTypeHintsTests(ReusedSQLTestCase):
+    # Note that, we should remove `exec` once we drop Python 2 in this class.
+
+    def setUp(self):
+        self.local = {'pd': pd}
+
+    def test_type_annotation_scalar(self):
+        exec(
+            "def func(col: pd.Series) -> pd.Series: pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR)
+
+        exec(
+            "def func(col: pd.DataFrame, col1: pd.Series) -> pd.DataFrame: pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR)
+
+        exec(
+            "def func(col: pd.DataFrame, *args: pd.Series) -> pd.Series: pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR)
+
+        exec(
+            "def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame) -> pd.Series:\n"
+            "    pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR)
+
+        exec(
+            "def func(col: pd.Series, *, col2: pd.DataFrame) -> pd.DataFrame:\n"
+            "    pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR)
+
+        exec(
+            "from typing import Union\n"
+            "def func(col: Union[pd.Series, pd.DataFrame], *, col2: pd.DataFrame) -> pd.Series:\n"
+            "    pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR)
+
+    def test_type_annotation_scalar_iter(self):
+        exec(
+            "from typing import Iterator\n"
+            "def func(iter: Iterator[pd.Series]) -> Iterator[pd.Series]: pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR_ITER)
+
+        exec(
+            "from typing import Iterator, Tuple\n"
+            "def func(iter: Iterator[Tuple[pd.DataFrame, pd.Series]]) -> Iterator[pd.DataFrame]:\n"
+            "    pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR_ITER)
+
+        exec(
+            "from typing import Iterator, Tuple\n"
+            "def func(iter: Iterator[Tuple[pd.DataFrame, ...]]) -> Iterator[pd.Series]: pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR_ITER)
+
+        exec(
+            "from typing import Iterator, Tuple, Union\n"
+            "def func(iter: Iterator[Tuple[Union[pd.DataFrame, pd.Series], ...]])"
+            " -> Iterator[pd.Series]: pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.SCALAR_ITER)
+
+    def test_type_annotation_group_agg(self):
+        exec(
+            "def func(col: pd.Series) -> str: pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.GROUPED_AGG)
+
+        exec(
+            "def func(col: pd.DataFrame, col1: pd.Series) -> int: pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.GROUPED_AGG)
+
+        exec(
+            "from pyspark.sql import Row\n"
+            "def func(col: pd.DataFrame, *args: pd.Series) -> Row: pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.GROUPED_AGG)
+
+        exec(
+            "def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame) -> str:\n"
+            "    pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.GROUPED_AGG)
+
+        exec(
+            "def func(col: pd.Series, *, col2: pd.DataFrame) -> float:\n"
+            "    pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.GROUPED_AGG)
+
+        exec(
+            "from typing import Union\n"
+            "def func(col: Union[pd.Series, pd.DataFrame], *, col2: pd.DataFrame) -> float:\n"
+            "    pass",
+            self.local)
+        self.assertEqual(
+            infer_eval_type(inspect.signature(self.local['func'])), PandasUDFType.GROUPED_AGG)
+
+    def test_type_annotation_negative(self):
+        exec(
+            "def func(col: str) -> pd.Series: pass",
+            self.local)
+        self.assertRaisesRegex(
+            NotImplementedError,
+            "Unsupported signature.*str",
+            infer_eval_type, inspect.signature(self.local['func']))
+
+        exec(
+            "def func(col: pd.DataFrame, col1: int) -> pd.DataFrame: pass",
+            self.local)
+        self.assertRaisesRegex(
+            NotImplementedError,
+            "Unsupported signature.*int",
+            infer_eval_type, inspect.signature(self.local['func']))
+
+        exec(
+            "from typing import Union\n"
+            "def func(col: Union[pd.DataFrame, str], col1: int) -> pd.DataFrame: pass",
+            self.local)
+        self.assertRaisesRegex(
+            NotImplementedError,
+            "Unsupported signature.*str",
+            infer_eval_type, inspect.signature(self.local['func']))
+
+        exec(
+            "from typing import Tuple\n"
+            "def func(col: pd.Series) -> Tuple[pd.DataFrame]: pass",
+            self.local)
+        self.assertRaisesRegex(
+            NotImplementedError,
+            "Unsupported signature.*Tuple",
+            infer_eval_type, inspect.signature(self.local['func']))
+
+        exec(
+            "def func(col, *args: pd.Series) -> pd.Series: pass",
+            self.local)
+        self.assertRaisesRegex(
+            ValueError,
+            "should be specified.*Series",
+            infer_eval_type, inspect.signature(self.local['func']))
+
+        exec(
+            "def func(col: pd.Series, *args: pd.Series, **kwargs: pd.DataFrame):\n"
+            "    pass",
+            self.local)
+        self.assertRaisesRegex(
+            ValueError,
+            "should be specified.*Series",
+            infer_eval_type, inspect.signature(self.local['func']))
+
+        exec(
+            "def func(col: pd.Series, *, col2) -> pd.DataFrame:\n"
+            "    pass",
+            self.local)
+        self.assertRaisesRegex(
+            ValueError,
+            "should be specified.*Series",
+            infer_eval_type, inspect.signature(self.local['func']))
+
+    def test_scalar_udf_type_hint(self):
+        df = self.spark.range(10).selectExpr("id", "id as v")
+
+        exec(
+            "import typing\n"
+            "def plus_one(v: typing.Union[pd.Series, pd.DataFrame]) -> pd.Series:\n"
+            "    return v + 1",
+            self.local)
+
+        plus_one = pandas_udf("long")(self.local["plus_one"])
+
+        actual = df.select(plus_one(df.v).alias("plus_one"))
+        expected = df.selectExpr("(v + 1) as plus_one")
+        assert_frame_equal(expected.toPandas(), actual.toPandas())
+
+    def test_scalar_iter_udf_type_hint(self):
+        df = self.spark.range(10).selectExpr("id", "id as v")
+
+        exec(
+            "import typing\n"
+            "def plus_one(itr: typing.Iterator[pd.Series]) -> typing.Iterator[pd.Series]:\n"
+            "    for s in itr:\n"
+            "        yield s + 1",
+            self.local)
+
+        plus_one = pandas_udf("long")(self.local["plus_one"])
+
+        actual = df.select(plus_one(df.v).alias("plus_one"))
+        expected = df.selectExpr("(v + 1) as plus_one")
+        assert_frame_equal(expected.toPandas(), actual.toPandas())
+
+    def test_group_agg_udf_type_hint(self):
+        df = self.spark.range(10).selectExpr("id", "id as v")
+        exec(
+            "import numpy as np\n"
+            "def weighted_mean(v: pd.Series, w: pd.Series) -> float:\n"
+            "    return np.average(v, weights=w)",
+            self.local)
+
+        weighted_mean = pandas_udf("double")(self.local["weighted_mean"])
+
+        actual = df.groupby('id').agg(weighted_mean(df.v, lit(1.0))).sort('id')
+        expected = df.groupby('id').agg(mean(df.v).alias('weighted_mean(v, 1.0)')).sort('id')
+        assert_frame_equal(expected.toPandas(), actual.toPandas())
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_pandas_udf_typehints import *
+
+    try:
+        import xmlrunner
+        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py
index ea2a686cddaa2..052a5b2835245 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -132,6 +132,10 @@ def test_int_array_serialization(self):
         df = self.spark.createDataFrame(data, "array<integer>")
         self.assertEqual(len(list(filter(lambda r: None in r.value, df.collect()))), 0)
 
+    def test_bytes_as_binary_type(self):
+        df = self.spark.createDataFrame([[b"abcd"]], "col binary")
+        self.assertEqual(df.first().col, bytearray(b'abcd'))
+
 
 if __name__ == "__main__":
     import unittest
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 1cd84e0cd24e8..37ce62b7f7159 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -204,10 +204,14 @@ def test_create_dataframe_from_dict_respects_schema(self):
         self.assertEqual(df.columns, ['b'])
 
     def test_negative_decimal(self):
-        df = self.spark.createDataFrame([(1, ), (11, )], ["value"])
-        ret = df.select(col("value").cast(DecimalType(1, -1))).collect()
-        actual = list(map(lambda r: int(r.value), ret))
-        self.assertEqual(actual, [0, 10])
+        try:
+            self.spark.sql("set spark.sql.legacy.allowNegativeScaleOfDecimal.enabled=true")
+            df = self.spark.createDataFrame([(1, ), (11, )], ["value"])
+            ret = df.select(col("value").cast(DecimalType(1, -1))).collect()
+            actual = list(map(lambda r: int(r.value), ret))
+            self.assertEqual(actual, [0, 10])
+        finally:
+            self.spark.sql("set spark.sql.legacy.allowNegativeScaleOfDecimal.enabled=false")
 
     def test_create_dataframe_from_objects(self):
         data = [MyObject(1, "1"), MyObject(2, "2")]
@@ -536,6 +540,22 @@ def test_infer_long_type(self):
         self.assertEqual(_infer_type(2**61), LongType())
         self.assertEqual(_infer_type(2**71), LongType())
 
+    @unittest.skipIf(sys.version < "3", "only Python 3 infers bytes as binary type")
+    def test_infer_binary_type(self):
+        binaryrow = [Row(f1='a', f2=b"abcd")]
+        df = self.sc.parallelize(binaryrow).toDF()
+        self.assertEqual(df.schema.fields[1].dataType, BinaryType())
+
+        # this saving as Parquet caused issues as well.
+        output_dir = os.path.join(self.tempdir.name, "infer_binary_type")
+        df.write.parquet(output_dir)
+        df1 = self.spark.read.parquet(output_dir)
+        self.assertEqual('a', df1.first().f1)
+        self.assertEqual(b"abcd", df1.first().f2)
+
+        self.assertEqual(_infer_type(b""), BinaryType())
+        self.assertEqual(_infer_type(b"1234"), BinaryType())
+
     def test_merge_type(self):
         self.assertEqual(_merge_type(LongType(), NullType()), LongType())
         self.assertEqual(_merge_type(NullType(), LongType()), LongType())
@@ -952,6 +972,19 @@ def __init__(self, **kwargs):
             with self.assertRaises(exp, msg=msg):
                 _make_type_verifier(data_type, nullable=False)(obj)
 
+    @unittest.skipIf(sys.version_info[:2] < (3, 6), "Create Row without sorting fields")
+    def test_row_without_field_sorting(self):
+        sorting_enabled_tmp = Row._row_field_sorting_enabled
+        Row._row_field_sorting_enabled = False
+
+        r = Row(b=1, a=2)
+        TestRow = Row("b", "a")
+        expected = TestRow(1, 2)
+
+        self.assertEqual(r, expected)
+        self.assertEqual(repr(r), "Row(b=1, a=2)")
+        Row._row_field_sorting_enabled = sorting_enabled_tmp
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.test_types import *
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 4a0a3760a556c..061d3f5e1f7ac 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -96,7 +96,7 @@ def test_udf_registration_return_type_none(self):
 
     def test_udf_registration_return_type_not_none(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(TypeError, "Invalid returnType"):
+            with self.assertRaisesRegexp(TypeError, "Invalid return type"):
                 self.spark.catalog.registerFunction(
                     "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType())
 
@@ -552,7 +552,7 @@ def test_datasource_with_udf(self):
                 .format("org.apache.spark.sql.sources.SimpleScanSource") \
                 .option('from', 0).option('to', 1).load().toDF('i')
             datasource_v2_df = self.spark.read \
-                .format("org.apache.spark.sql.sources.v2.SimpleDataSourceV2") \
+                .format("org.apache.spark.sql.connector.SimpleDataSourceV2") \
                 .load().toDF('i', 'j')
 
             c1 = udf(lambda x: x + 1, 'int')(lit(1))
@@ -629,6 +629,19 @@ def task(iterator):
 
         self.sc.parallelize(range(1), 1).mapPartitions(task).count()
 
+    def test_udf_with_256_args(self):
+        N = 256
+        data = [["data-%d" % i for i in range(N)]] * 5
+        df = self.spark.createDataFrame(data)
+
+        def f(*a):
+            return "success"
+
+        fUdf = udf(f, StringType())
+
+        r = df.select(fUdf(*df.columns))
+        self.assertEqual(r.first()[0], "success")
+
 
 class UDFInitializationTests(unittest.TestCase):
     def tearDown(self):
diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
index fdadd5631cb73..072ea08085fba 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -26,6 +27,12 @@ def test_capture_analysis_exception(self):
         self.assertRaises(AnalysisException, lambda: self.spark.sql("select abc"))
         self.assertRaises(AnalysisException, lambda: self.df.selectExpr("a + b"))
 
+    def test_capture_user_friendly_exception(self):
+        try:
+            self.spark.sql("select `中文字段`")
+        except AnalysisException as e:
+            self.assertRegexpMatches(str(e), "cannot resolve '`中文字段`'")
+
     def test_capture_parse_exception(self):
         self.assertRaises(ParseException, lambda: self.spark.sql("abc"))
 
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 0c7f4ce3ddc67..a5302e7bfd5ab 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+import os
 import sys
 import decimal
 import time
@@ -25,6 +26,7 @@
 import base64
 from array import array
 import ctypes
+import warnings
 
 if sys.version >= "3":
     long = int
@@ -74,7 +76,7 @@ def json(self):
 
     def needConversion(self):
         """
-        Does this type need to conversion between Python object and internal SQL object.
+        Does this type needs conversion between Python object and internal SQL object.
 
         This is used to avoid the unnecessary conversion for ArrayType/MapType/StructType.
         """
@@ -208,17 +210,17 @@ class DecimalType(FractionalType):
 
     The precision can be up to 38, the scale must be less or equal to precision.
 
-    When create a DecimalType, the default precision and scale is (10, 0). When infer
+    When creating a DecimalType, the default precision and scale is (10, 0). When inferring
     schema from decimal.Decimal objects, it will be DecimalType(38, 18).
 
-    :param precision: the maximum total number of digits (default: 10)
+    :param precision: the maximum (i.e. total) number of digits (default: 10)
     :param scale: the number of digits on right side of dot. (default: 0)
     """
 
     def __init__(self, precision=10, scale=0):
         self.precision = precision
         self.scale = scale
-        self.hasPrecisionInfo = True  # this is public API
+        self.hasPrecisionInfo = True  # this is a public API
 
     def simpleString(self):
         return "decimal(%d,%d)" % (self.precision, self.scale)
@@ -455,8 +457,8 @@ class StructType(DataType):
 
     This is the data type representing a :class:`Row`.
 
-    Iterating a :class:`StructType` will iterate its :class:`StructField`\\s.
-    A contained :class:`StructField` can be accessed by name or position.
+    Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s.
+    A contained :class:`StructField` can be accessed by its name or position.
 
     >>> struct1 = StructType([StructField("f1", StringType(), True)])
     >>> struct1["f1"]
@@ -490,8 +492,8 @@ def __init__(self, fields=None):
 
     def add(self, field, data_type=None, nullable=True, metadata=None):
         """
-        Construct a StructType by adding new elements to it to define the schema. The method accepts
-        either:
+        Construct a StructType by adding new elements to it, to define the schema.
+        The method accepts either:
 
             a) A single parameter which is a StructField object.
             b) Between 2 and 4 parameters as (name, data_type, nullable (optional),
@@ -674,7 +676,7 @@ def needConversion(self):
     @classmethod
     def _cachedSqlType(cls):
         """
-        Cache the sqlType() into class, because it's heavy used in `toInternal`.
+        Cache the sqlType() into class, because it's heavily used in `toInternal`.
         """
         if not hasattr(cls, "_cached_sql_type"):
             cls._cached_sql_type = cls.sqlType()
@@ -691,7 +693,7 @@ def fromInternal(self, obj):
 
     def serialize(self, obj):
         """
-        Converts the a user-type object into a SQL datum.
+        Converts a user-type object into a SQL datum.
         """
         raise NotImplementedError("UDT must implement toInternal().")
 
@@ -758,7 +760,7 @@ def __eq__(self, other):
 def _parse_datatype_string(s):
     """
     Parses the given data type string to a :class:`DataType`. The data type string format equals
-    to :class:`DataType.simpleString`, except that top level struct type can omit
+    :class:`DataType.simpleString`, except that the top level struct type can omit
     the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use ``byte`` instead
     of ``tinyint`` for :class:`ByteType`. We can also use ``int`` as a short name
     for :class:`IntegerType`. Since Spark 2.3, this also supports a schema in a DDL-formatted
@@ -865,8 +867,6 @@ def _parse_datatype_json_string(json_string):
     >>> complex_maptype = MapType(complex_structtype,
     ...                           complex_arraytype, False)
     >>> check_datatype(complex_maptype)
-    >>> # Decimal with negative scale.
-    >>> check_datatype(DecimalType(1,-1))
     """
     return _parse_datatype_json_value(json.loads(json_string))
 
@@ -912,11 +912,16 @@ def _parse_datatype_json_value(json_value):
         long: LongType,
     })
 
+if sys.version >= "3":
+    _type_mappings.update({
+        bytes: BinaryType,
+    })
+
 # Mapping Python array types to Spark SQL DataType
 # We should be careful here. The size of these types in python depends on C
 # implementation. We need to make sure that this conversion does not lose any
 # precision. Also, JVM only support signed types, when converting unsigned types,
-# keep in mind that it required 1 more bit when stored as singed types.
+# keep in mind that it require 1 more bit when stored as signed types.
 #
 # Reference for C integer size, see:
 # ISO/IEC 9899:201x specification, chapter 5.2.4.2.1 Sizes of integer types <limits.h>.
@@ -954,7 +959,7 @@ def _int_size_to_type(size):
     if size <= 64:
         return LongType
 
-# The list of all supported array typecodes is stored here
+# The list of all supported array typecodes, is stored here
 _array_type_mappings = {
     # Warning: Actual properties for float and double in C is not specified in C.
     # On almost every system supported by both python and JVM, they are IEEE 754
@@ -990,9 +995,9 @@ def _int_size_to_type(size):
     _array_type_mappings['c'] = StringType
 
 # SPARK-21465:
-# In python2, array of 'L' happened to be mistakenly partially supported. To
+# In python2, array of 'L' happened to be mistakenly, just partially supported. To
 # avoid breaking user's code, we should keep this partial support. Below is a
-# dirty hacking to keep this partial support and make the unit test passes
+# dirty hacking to keep this partial support and pass the unit test.
 import platform
 if sys.version_info[0] < 3 and platform.python_implementation() != 'PyPy':
     if 'L' not in _array_type_mappings.keys():
@@ -1066,7 +1071,7 @@ def _infer_schema(row, names=None):
 
 
 def _has_nulltype(dt):
-    """ Return whether there is NullType in `dt` or not """
+    """ Return whether there is a NullType in `dt` or not """
     if isinstance(dt, StructType):
         return any(_has_nulltype(f.dataType) for f in dt.fields)
     elif isinstance(dt, ArrayType):
@@ -1190,7 +1195,7 @@ def convert_struct(obj):
     DoubleType: (float,),
     DecimalType: (decimal.Decimal,),
     StringType: (str, unicode),
-    BinaryType: (bytearray,),
+    BinaryType: (bytearray, bytes),
     DateType: (datetime.date, datetime.datetime),
     TimestampType: (datetime.datetime,),
     ArrayType: (list, tuple, array),
@@ -1206,7 +1211,7 @@ def _make_type_verifier(dataType, nullable=True, name=None):
 
     This verifier also checks the value of obj against datatype and raises a ValueError if it's not
     within the allowed range, e.g. using 128 as ByteType will overflow. Note that, Python float is
-    not checked, so it will become infinity when cast to Java float if it overflows.
+    not checked, so it will become infinity when cast to Java float, if it overflows.
 
     >>> _make_type_verifier(StructType([]))(None)
     >>> _make_type_verifier(StringType())("")
@@ -1427,10 +1432,23 @@ class Row(tuple):
 
     ``key in row`` will search through row keys.
 
-    Row can be used to create a row object by using named arguments,
-    the fields will be sorted by names. It is not allowed to omit
-    a named argument to represent the value is None or missing. This should be
-    explicitly set to None in this case.
+    Row can be used to create a row object by using named arguments.
+    It is not allowed to omit a named argument to represent that the value is
+    None or missing. This should be explicitly set to None in this case.
+
+    NOTE: As of Spark 3.0.0, Rows created from named arguments no longer have
+    field names sorted alphabetically and will be ordered in the position as
+    entered. To enable sorting for Rows compatible with Spark 2.x, set the
+    environment variable "PYSPARK_ROW_FIELD_SORTING_ENABLED" to "true". This
+    option is deprecated and will be removed in future versions of Spark. For
+    Python versions < 3.6, the order of named arguments is not guaranteed to
+    be the same as entered, see https://www.python.org/dev/peps/pep-0468. In
+    this case, a warning will be issued and the Row will fallback to sort the
+    field names automatically.
+
+    NOTE: Examples with Row in pydocs are run with the environment variable
+    "PYSPARK_ROW_FIELD_SORTING_ENABLED" set to "true" which results in output
+    where fields are sorted.
 
     >>> row = Row(name="Alice", age=11)
     >>> row
@@ -1469,27 +1487,46 @@ class Row(tuple):
     True
     """
 
-    def __new__(self, *args, **kwargs):
+    # Remove after Python < 3.6 dropped, see SPARK-29748
+    _row_field_sorting_enabled = \
+        os.environ.get('PYSPARK_ROW_FIELD_SORTING_ENABLED', 'false').lower() == 'true'
+
+    if _row_field_sorting_enabled:
+        warnings.warn("The environment variable 'PYSPARK_ROW_FIELD_SORTING_ENABLED' "
+                      "is deprecated and will be removed in future versions of Spark")
+
+    def __new__(cls, *args, **kwargs):
         if args and kwargs:
             raise ValueError("Can not use both args "
                              "and kwargs to create Row")
         if kwargs:
+            if not Row._row_field_sorting_enabled and sys.version_info[:2] < (3, 6):
+                warnings.warn("To use named arguments for Python version < 3.6, Row fields will be "
+                              "automatically sorted. This warning can be skipped by setting the "
+                              "environment variable 'PYSPARK_ROW_FIELD_SORTING_ENABLED' to 'true'.")
+                Row._row_field_sorting_enabled = True
+
             # create row objects
-            names = sorted(kwargs.keys())
-            row = tuple.__new__(self, [kwargs[n] for n in names])
-            row.__fields__ = names
-            row.__from_dict__ = True
-            return row
+            if Row._row_field_sorting_enabled:
+                # Remove after Python < 3.6 dropped, see SPARK-29748
+                names = sorted(kwargs.keys())
+                row = tuple.__new__(cls, [kwargs[n] for n in names])
+                row.__fields__ = names
+                row.__from_dict__ = True
+            else:
+                row = tuple.__new__(cls, list(kwargs.values()))
+                row.__fields__ = list(kwargs.keys())
 
+            return row
         else:
             # create row class or objects
-            return tuple.__new__(self, args)
+            return tuple.__new__(cls, args)
 
     def asDict(self, recursive=False):
         """
-        Return as an dict
+        Return as a dict
 
-        :param recursive: turns the nested Row as dict (default: False).
+        :param recursive: turns the nested Rows to dict (default: False).
 
         >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11}
         True
@@ -1603,267 +1640,6 @@ def convert(self, obj, gateway_client):
 register_input_converter(DateConverter())
 
 
-def to_arrow_type(dt):
-    """ Convert Spark data type to pyarrow type
-    """
-    import pyarrow as pa
-    if type(dt) == BooleanType:
-        arrow_type = pa.bool_()
-    elif type(dt) == ByteType:
-        arrow_type = pa.int8()
-    elif type(dt) == ShortType:
-        arrow_type = pa.int16()
-    elif type(dt) == IntegerType:
-        arrow_type = pa.int32()
-    elif type(dt) == LongType:
-        arrow_type = pa.int64()
-    elif type(dt) == FloatType:
-        arrow_type = pa.float32()
-    elif type(dt) == DoubleType:
-        arrow_type = pa.float64()
-    elif type(dt) == DecimalType:
-        arrow_type = pa.decimal128(dt.precision, dt.scale)
-    elif type(dt) == StringType:
-        arrow_type = pa.string()
-    elif type(dt) == BinaryType:
-        arrow_type = pa.binary()
-    elif type(dt) == DateType:
-        arrow_type = pa.date32()
-    elif type(dt) == TimestampType:
-        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
-        arrow_type = pa.timestamp('us', tz='UTC')
-    elif type(dt) == ArrayType:
-        if type(dt.elementType) in [StructType, TimestampType]:
-            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
-        arrow_type = pa.list_(to_arrow_type(dt.elementType))
-    elif type(dt) == StructType:
-        if any(type(field.dataType) == StructType for field in dt):
-            raise TypeError("Nested StructType not supported in conversion to Arrow")
-        fields = [pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
-                  for field in dt]
-        arrow_type = pa.struct(fields)
-    else:
-        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
-    return arrow_type
-
-
-def to_arrow_schema(schema):
-    """ Convert a schema from Spark to Arrow
-    """
-    import pyarrow as pa
-    fields = [pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
-              for field in schema]
-    return pa.schema(fields)
-
-
-def from_arrow_type(at):
-    """ Convert pyarrow type to Spark data type.
-    """
-    import pyarrow.types as types
-    if types.is_boolean(at):
-        spark_type = BooleanType()
-    elif types.is_int8(at):
-        spark_type = ByteType()
-    elif types.is_int16(at):
-        spark_type = ShortType()
-    elif types.is_int32(at):
-        spark_type = IntegerType()
-    elif types.is_int64(at):
-        spark_type = LongType()
-    elif types.is_float32(at):
-        spark_type = FloatType()
-    elif types.is_float64(at):
-        spark_type = DoubleType()
-    elif types.is_decimal(at):
-        spark_type = DecimalType(precision=at.precision, scale=at.scale)
-    elif types.is_string(at):
-        spark_type = StringType()
-    elif types.is_binary(at):
-        spark_type = BinaryType()
-    elif types.is_date32(at):
-        spark_type = DateType()
-    elif types.is_timestamp(at):
-        spark_type = TimestampType()
-    elif types.is_list(at):
-        if types.is_timestamp(at.value_type):
-            raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
-        spark_type = ArrayType(from_arrow_type(at.value_type))
-    elif types.is_struct(at):
-        if any(types.is_struct(field.type) for field in at):
-            raise TypeError("Nested StructType not supported in conversion from Arrow: " + str(at))
-        return StructType(
-            [StructField(field.name, from_arrow_type(field.type), nullable=field.nullable)
-             for field in at])
-    else:
-        raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
-    return spark_type
-
-
-def from_arrow_schema(arrow_schema):
-    """ Convert schema from Arrow to Spark.
-    """
-    return StructType(
-        [StructField(field.name, from_arrow_type(field.type), nullable=field.nullable)
-         for field in arrow_schema])
-
-
-def _get_local_timezone():
-    """ Get local timezone using pytz with environment variable, or dateutil.
-
-    If there is a 'TZ' environment variable, pass it to pandas to use pytz and use it as timezone
-    string, otherwise use the special word 'dateutil/:' which means that pandas uses dateutil and
-    it reads system configuration to know the system local timezone.
-
-    See also:
-    - https://github.com/pandas-dev/pandas/blob/0.19.x/pandas/tslib.pyx#L1753
-    - https://github.com/dateutil/dateutil/blob/2.6.1/dateutil/tz/tz.py#L1338
-    """
-    import os
-    return os.environ.get('TZ', 'dateutil/:')
-
-
-def _check_series_localize_timestamps(s, timezone):
-    """
-    Convert timezone aware timestamps to timezone-naive in the specified timezone or local timezone.
-
-    If the input series is not a timestamp series, then the same series is returned. If the input
-    series is a timestamp series, then a converted series is returned.
-
-    :param s: pandas.Series
-    :param timezone: the timezone to convert. if None then use local timezone
-    :return pandas.Series that have been converted to tz-naive
-    """
-    from pyspark.sql.utils import require_minimum_pandas_version
-    require_minimum_pandas_version()
-
-    from pandas.api.types import is_datetime64tz_dtype
-    tz = timezone or _get_local_timezone()
-    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
-    if is_datetime64tz_dtype(s.dtype):
-        return s.dt.tz_convert(tz).dt.tz_localize(None)
-    else:
-        return s
-
-
-def _check_dataframe_localize_timestamps(pdf, timezone):
-    """
-    Convert timezone aware timestamps to timezone-naive in the specified timezone or local timezone
-
-    :param pdf: pandas.DataFrame
-    :param timezone: the timezone to convert. if None then use local timezone
-    :return pandas.DataFrame where any timezone aware columns have been converted to tz-naive
-    """
-    from pyspark.sql.utils import require_minimum_pandas_version
-    require_minimum_pandas_version()
-
-    for column, series in pdf.iteritems():
-        pdf[column] = _check_series_localize_timestamps(series, timezone)
-    return pdf
-
-
-def _check_series_convert_timestamps_internal(s, timezone):
-    """
-    Convert a tz-naive timestamp in the specified timezone or local timezone to UTC normalized for
-    Spark internal storage
-
-    :param s: a pandas.Series
-    :param timezone: the timezone to convert. if None then use local timezone
-    :return pandas.Series where if it is a timestamp, has been UTC normalized without a time zone
-    """
-    from pyspark.sql.utils import require_minimum_pandas_version
-    require_minimum_pandas_version()
-
-    from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
-    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
-    if is_datetime64_dtype(s.dtype):
-        # When tz_localize a tz-naive timestamp, the result is ambiguous if the tz-naive
-        # timestamp is during the hour when the clock is adjusted backward during due to
-        # daylight saving time (dst).
-        # E.g., for America/New_York, the clock is adjusted backward on 2015-11-01 2:00 to
-        # 2015-11-01 1:00 from dst-time to standard time, and therefore, when tz_localize
-        # a tz-naive timestamp 2015-11-01 1:30 with America/New_York timezone, it can be either
-        # dst time (2015-01-01 1:30-0400) or standard time (2015-11-01 1:30-0500).
-        #
-        # Here we explicit choose to use standard time. This matches the default behavior of
-        # pytz.
-        #
-        # Here are some code to help understand this behavior:
-        # >>> import datetime
-        # >>> import pandas as pd
-        # >>> import pytz
-        # >>>
-        # >>> t = datetime.datetime(2015, 11, 1, 1, 30)
-        # >>> ts = pd.Series([t])
-        # >>> tz = pytz.timezone('America/New_York')
-        # >>>
-        # >>> ts.dt.tz_localize(tz, ambiguous=True)
-        # 0   2015-11-01 01:30:00-04:00
-        # dtype: datetime64[ns, America/New_York]
-        # >>>
-        # >>> ts.dt.tz_localize(tz, ambiguous=False)
-        # 0   2015-11-01 01:30:00-05:00
-        # dtype: datetime64[ns, America/New_York]
-        # >>>
-        # >>> str(tz.localize(t))
-        # '2015-11-01 01:30:00-05:00'
-        tz = timezone or _get_local_timezone()
-        return s.dt.tz_localize(tz, ambiguous=False).dt.tz_convert('UTC')
-    elif is_datetime64tz_dtype(s.dtype):
-        return s.dt.tz_convert('UTC')
-    else:
-        return s
-
-
-def _check_series_convert_timestamps_localize(s, from_timezone, to_timezone):
-    """
-    Convert timestamp to timezone-naive in the specified timezone or local timezone
-
-    :param s: a pandas.Series
-    :param from_timezone: the timezone to convert from. if None then use local timezone
-    :param to_timezone: the timezone to convert to. if None then use local timezone
-    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
-    """
-    from pyspark.sql.utils import require_minimum_pandas_version
-    require_minimum_pandas_version()
-
-    import pandas as pd
-    from pandas.api.types import is_datetime64tz_dtype, is_datetime64_dtype
-    from_tz = from_timezone or _get_local_timezone()
-    to_tz = to_timezone or _get_local_timezone()
-    # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
-    if is_datetime64tz_dtype(s.dtype):
-        return s.dt.tz_convert(to_tz).dt.tz_localize(None)
-    elif is_datetime64_dtype(s.dtype) and from_tz != to_tz:
-        # `s.dt.tz_localize('tzlocal()')` doesn't work properly when including NaT.
-        return s.apply(
-            lambda ts: ts.tz_localize(from_tz, ambiguous=False).tz_convert(to_tz).tz_localize(None)
-            if ts is not pd.NaT else pd.NaT)
-    else:
-        return s
-
-
-def _check_series_convert_timestamps_local_tz(s, timezone):
-    """
-    Convert timestamp to timezone-naive in the specified timezone or local timezone
-
-    :param s: a pandas.Series
-    :param timezone: the timezone to convert to. if None then use local timezone
-    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
-    """
-    return _check_series_convert_timestamps_localize(s, None, timezone)
-
-
-def _check_series_convert_timestamps_tz_local(s, timezone):
-    """
-    Convert timestamp to timezone-naive in the specified timezone or local timezone
-
-    :param s: a pandas.Series
-    :param timezone: the timezone to convert from. if None then use local timezone
-    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
-    """
-    return _check_series_convert_timestamps_localize(s, timezone, None)
-
-
 def _test():
     import doctest
     from pyspark.context import SparkContext
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index 188ec2634974a..10546ecacc57f 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -23,9 +23,8 @@
 from pyspark import SparkContext, since
 from pyspark.rdd import _prepare_for_python_RDD, PythonEvalType, ignore_unicode_prefix
 from pyspark.sql.column import Column, _to_java_column, _to_seq
-from pyspark.sql.types import StringType, DataType, StructType, _parse_datatype_string,\
-    to_arrow_type, to_arrow_schema
-from pyspark.util import _get_argspec
+from pyspark.sql.types import StringType, DataType, StructType, _parse_datatype_string
+from pyspark.sql.pandas.types import to_arrow_type
 
 __all__ = ["UDFRegistration"]
 
@@ -38,33 +37,6 @@ def _wrap_function(sc, func, returnType):
 
 
 def _create_udf(f, returnType, evalType):
-
-    if evalType in (PythonEvalType.SQL_SCALAR_PANDAS_UDF,
-                    PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
-                    PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
-                    PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
-                    PythonEvalType.SQL_MAP_PANDAS_ITER_UDF):
-
-        from pyspark.sql.utils import require_minimum_pyarrow_version
-        require_minimum_pyarrow_version()
-
-        argspec = _get_argspec(f)
-
-        if (evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF or
-                evalType == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF) and \
-                len(argspec.args) == 0 and \
-                argspec.varargs is None:
-            raise ValueError(
-                "Invalid function: 0-arg pandas_udfs are not supported. "
-                "Instead, create a 1-arg pandas_udf and ignore the arg in your function."
-            )
-
-        if evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF \
-                and len(argspec.args) not in (1, 2):
-            raise ValueError(
-                "Invalid function: pandas_udfs with function type GROUPED_MAP "
-                "must take either one argument (data) or two arguments (key, data).")
-
     # Set the name of the UserDefinedFunction object to be the name of function f
     udf_obj = UserDefinedFunction(
         f, returnType=returnType, name=None, evalType=evalType, deterministic=True)
@@ -76,6 +48,10 @@ class UserDefinedFunction(object):
     User defined function in Python
 
     .. versionadded:: 1.3
+
+    .. note:: The constructor of this class is not supposed to be directly called.
+        Use :meth:`pyspark.sql.functions.udf` or :meth:`pyspark.sql.functions.pandas_udf`
+        to create this instance.
     """
     def __init__(self, func,
                  returnType=StringType(),
@@ -89,12 +65,12 @@ def __init__(self, func,
 
         if not isinstance(returnType, (DataType, str)):
             raise TypeError(
-                "Invalid returnType: returnType should be DataType or str "
+                "Invalid return type: returnType should be DataType or str "
                 "but is {}".format(returnType))
 
         if not isinstance(evalType, int):
             raise TypeError(
-                "Invalid evalType: evalType should be an int but is {}".format(evalType))
+                "Invalid evaluation type: evalType should be an int but is {}".format(evalType))
 
         self.func = func
         self._returnType = returnType
@@ -123,7 +99,7 @@ def returnType(self):
                 to_arrow_type(self._returnType_placeholder)
             except TypeError:
                 raise NotImplementedError(
-                    "Invalid returnType with scalar Pandas UDFs: %s is "
+                    "Invalid return type with scalar Pandas UDFs: %s is "
                     "not supported" % str(self._returnType_placeholder))
         elif self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
             if isinstance(self._returnType_placeholder, StructType):
@@ -131,22 +107,35 @@ def returnType(self):
                     to_arrow_type(self._returnType_placeholder)
                 except TypeError:
                     raise NotImplementedError(
-                        "Invalid returnType with grouped map Pandas UDFs: "
-                        "%s is not supported" % str(self._returnType_placeholder))
+                        "Invalid return type with grouped map Pandas UDFs or "
+                        "at groupby.applyInPandas: %s is not supported" % str(
+                            self._returnType_placeholder))
             else:
-                raise TypeError("Invalid returnType for grouped map Pandas "
-                                "UDFs: returnType must be a StructType.")
+                raise TypeError("Invalid return type for grouped map Pandas "
+                                "UDFs or at groupby.applyInPandas: return type must be a "
+                                "StructType.")
         elif self.evalType == PythonEvalType.SQL_MAP_PANDAS_ITER_UDF:
             if isinstance(self._returnType_placeholder, StructType):
                 try:
                     to_arrow_type(self._returnType_placeholder)
                 except TypeError:
                     raise NotImplementedError(
-                        "Invalid returnType with map iterator Pandas UDFs: "
+                        "Invalid return type in mapInPandas: "
+                        "%s is not supported" % str(self._returnType_placeholder))
+            else:
+                raise TypeError("Invalid return type in mapInPandas: "
+                                "return type must be a StructType.")
+        elif self.evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+            if isinstance(self._returnType_placeholder, StructType):
+                try:
+                    to_arrow_type(self._returnType_placeholder)
+                except TypeError:
+                    raise NotImplementedError(
+                        "Invalid return type in cogroup.applyInPandas: "
                         "%s is not supported" % str(self._returnType_placeholder))
             else:
-                raise TypeError("Invalid returnType for map iterator Pandas "
-                                "UDFs: returnType must be a StructType.")
+                raise TypeError("Invalid return type in cogroup.applyInPandas: "
+                                "return type must be a StructType.")
         elif self.evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
             try:
                 # StructType is not yet allowed as a return type, explicitly check here to fail fast
@@ -155,7 +144,7 @@ def returnType(self):
                 to_arrow_type(self._returnType_placeholder)
             except TypeError:
                 raise NotImplementedError(
-                    "Invalid returnType with grouped aggregate Pandas UDFs: "
+                    "Invalid  return type with grouped aggregate Pandas UDFs: "
                     "%s is not supported" % str(self._returnType_placeholder))
 
         return self._returnType_placeholder
@@ -308,17 +297,18 @@ def register(self, name, f, returnType=None):
             >>> spark.sql("SELECT random_udf()").collect()  # doctest: +SKIP
             [Row(random_udf()=82)]
 
-            >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
-            >>> @pandas_udf("integer", PandasUDFType.SCALAR)  # doctest: +SKIP
-            ... def add_one(x):
-            ...     return x + 1
+            >>> import pandas as pd  # doctest: +SKIP
+            >>> from pyspark.sql.functions import pandas_udf
+            >>> @pandas_udf("integer")  # doctest: +SKIP
+            ... def add_one(s: pd.Series) -> pd.Series:
+            ...     return s + 1
             ...
             >>> _ = spark.udf.register("add_one", add_one)  # doctest: +SKIP
             >>> spark.sql("SELECT add_one(id) FROM range(3)").collect()  # doctest: +SKIP
             [Row(add_one(id)=1), Row(add_one(id)=2), Row(add_one(id)=3)]
 
-            >>> @pandas_udf("integer", PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
-            ... def sum_udf(v):
+            >>> @pandas_udf("integer")  # doctest: +SKIP
+            ... def sum_udf(v: pd.Series) -> int:
             ...     return v.sum()
             ...
             >>> _ = spark.udf.register("sum_udf", sum_udf)  # doctest: +SKIP
@@ -335,7 +325,7 @@ def register(self, name, f, returnType=None):
         if hasattr(f, 'asNondeterministic'):
             if returnType is not None:
                 raise TypeError(
-                    "Invalid returnType: data type can not be specified when f is"
+                    "Invalid return type: data type can not be specified when f is"
                     "a user-defined function, but got %s." % returnType)
             if f.evalType not in [PythonEvalType.SQL_BATCHED_UDF,
                                   PythonEvalType.SQL_SCALAR_PANDAS_UDF,
@@ -407,7 +397,8 @@ def registerJavaUDAF(self, name, javaClassName):
         >>> spark.udf.registerJavaUDAF("javaUDAF", "test.org.apache.spark.sql.MyDoubleAvg")
         >>> df = spark.createDataFrame([(1, "a"),(2, "b"), (3, "a")],["id", "name"])
         >>> df.createOrReplaceTempView("df")
-        >>> spark.sql("SELECT name, javaUDAF(id) as avg from df group by name").collect()
+        >>> spark.sql("SELECT name, javaUDAF(id) as avg from df group by name order by name desc") \
+                .collect()
         [Row(name=u'b', avg=102.0), Row(name=u'a', avg=102.0)]
         """
 
@@ -424,6 +415,9 @@ def _test():
         .appName("sql.udf tests")\
         .getOrCreate()
     globs['spark'] = spark
+    # Hack to skip the unit tests in register. These are currently being tested in proper tests.
+    # We should reenable this test once we completely drop Python 2.
+    del pyspark.sql.udf.UDFRegistration.register
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.udf, globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index c30cc1482750a..147ac3325efd9 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -16,6 +16,10 @@
 #
 
 import py4j
+import sys
+
+if sys.version_info.major >= 3:
+    unicode = str
 
 
 class CapturedException(Exception):
@@ -25,7 +29,12 @@ def __init__(self, desc, stackTrace, cause=None):
         self.cause = convert_exception(cause) if cause is not None else None
 
     def __str__(self):
-        return repr(self.desc)
+        desc = self.desc
+        # encode unicode instance for python2 for human readable description
+        if sys.version_info.major < 3 and isinstance(desc, unicode):
+            return str(desc.encode('utf-8'))
+        else:
+            return str(desc)
 
 
 class AnalysisException(CapturedException):
@@ -127,46 +136,6 @@ def toJArray(gateway, jtype, arr):
     return jarr
 
 
-def require_minimum_pandas_version():
-    """ Raise ImportError if minimum version of Pandas is not installed
-    """
-    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
-    minimum_pandas_version = "0.23.2"
-
-    from distutils.version import LooseVersion
-    try:
-        import pandas
-        have_pandas = True
-    except ImportError:
-        have_pandas = False
-    if not have_pandas:
-        raise ImportError("Pandas >= %s must be installed; however, "
-                          "it was not found." % minimum_pandas_version)
-    if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
-        raise ImportError("Pandas >= %s must be installed; however, "
-                          "your version was %s." % (minimum_pandas_version, pandas.__version__))
-
-
-def require_minimum_pyarrow_version():
-    """ Raise ImportError if minimum version of pyarrow is not installed
-    """
-    # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
-    minimum_pyarrow_version = "0.12.1"
-
-    from distutils.version import LooseVersion
-    try:
-        import pyarrow
-        have_arrow = True
-    except ImportError:
-        have_arrow = False
-    if not have_arrow:
-        raise ImportError("PyArrow >= %s must be installed; however, "
-                          "it was not found." % minimum_pyarrow_version)
-    if LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version):
-        raise ImportError("PyArrow >= %s must be installed; however, "
-                          "your version was %s." % (minimum_pyarrow_version, pyarrow.__version__))
-
-
 def require_test_compiled():
     """ Raise Exception if test classes are not compiled
     """
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index 67c594c539d52..82f74346ba928 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -109,16 +109,16 @@ def rowsBetween(start, end):
         >>> tup = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
         >>> df = sqlContext.createDataFrame(tup, ["id", "category"])
         >>> window = Window.partitionBy("category").orderBy("id").rowsBetween(Window.currentRow, 1)
-        >>> df.withColumn("sum", func.sum("id").over(window)).show()
+        >>> df.withColumn("sum", func.sum("id").over(window)).sort("id", "category", "sum").show()
         +---+--------+---+
         | id|category|sum|
         +---+--------+---+
-        |  1|       b|  3|
-        |  2|       b|  5|
-        |  3|       b|  3|
         |  1|       a|  2|
         |  1|       a|  3|
+        |  1|       b|  3|
         |  2|       a|  2|
+        |  2|       b|  5|
+        |  3|       b|  3|
         +---+--------+---+
 
         :param start: boundary start, inclusive.
@@ -168,16 +168,16 @@ def rangeBetween(start, end):
         >>> tup = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
         >>> df = sqlContext.createDataFrame(tup, ["id", "category"])
         >>> window = Window.partitionBy("category").orderBy("id").rangeBetween(Window.currentRow, 1)
-        >>> df.withColumn("sum", func.sum("id").over(window)).show()
+        >>> df.withColumn("sum", func.sum("id").over(window)).sort("id", "category").show()
         +---+--------+---+
         | id|category|sum|
         +---+--------+---+
-        |  1|       b|  3|
-        |  2|       b|  5|
-        |  3|       b|  3|
         |  1|       a|  4|
         |  1|       a|  4|
+        |  1|       b|  3|
         |  2|       a|  2|
+        |  2|       b|  5|
+        |  3|       b|  3|
         +---+--------+---+
 
         :param start: boundary start, inclusive.
diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py
index 4ed9f2a40c3a7..729ec97505aad 100644
--- a/python/pyspark/streaming/kinesis.py
+++ b/python/pyspark/streaming/kinesis.py
@@ -80,7 +80,6 @@ def createStream(ssc, kinesisAppName, streamName, endpointUrl, regionName,
         jduration = ssc._jduration(checkpointInterval)
 
         try:
-            # Use KinesisUtilsPythonHelper to access Scala's KinesisUtils
             helper = ssc._jvm.org.apache.spark.streaming.kinesis.KinesisUtilsPythonHelper()
         except TypeError as e:
             if str(e) == "'JavaPackage' object is not callable":
diff --git a/python/pyspark/taskcontext.py b/python/pyspark/taskcontext.py
index b0c32c15793ac..d648f63338514 100644
--- a/python/pyspark/taskcontext.py
+++ b/python/pyspark/taskcontext.py
@@ -53,6 +53,10 @@ def _getOrCreate(cls):
             cls._taskContext = TaskContext()
         return cls._taskContext
 
+    @classmethod
+    def _setTaskContext(cls, taskContext):
+        cls._taskContext = taskContext
+
     @classmethod
     def get(cls):
         """
@@ -162,7 +166,10 @@ def get(cls):
         running tasks.
 
         .. note:: Must be called on the worker, not the driver. Returns None if not initialized.
+            An Exception will raise if it is not in a barrier stage.
         """
+        if not isinstance(cls._taskContext, BarrierTaskContext):
+            raise Exception('It is not in a barrier stage')
         return cls._taskContext
 
     @classmethod
diff --git a/python/pyspark/testing/mlutils.py b/python/pyspark/testing/mlutils.py
index 12bf650a28ee1..a36d0709d8013 100644
--- a/python/pyspark/testing/mlutils.py
+++ b/python/pyspark/testing/mlutils.py
@@ -62,7 +62,7 @@ def check_params(test_self, py_stage, check_params_exist=True):
                 continue  # Random seeds between Spark and PySpark are different
             java_default = _java2py(test_self.sc,
                                     java_stage.clear(java_param).getOrDefault(java_param))
-            py_stage._clear(p)
+            py_stage.clear(p)
             py_default = py_stage.getOrDefault(p)
             # equality test for NaN is always False
             if isinstance(java_default, float) and np.isnan(java_default):
diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py
index 13800cfa528b4..085fce6daa4ec 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -29,7 +29,7 @@
 
 pandas_requirement_message = None
 try:
-    from pyspark.sql.utils import require_minimum_pandas_version
+    from pyspark.sql.pandas.utils import require_minimum_pandas_version
     require_minimum_pandas_version()
 except ImportError as e:
     # If Pandas version requirement is not satisfied, skip related tests.
@@ -37,7 +37,7 @@
 
 pyarrow_requirement_message = None
 try:
-    from pyspark.sql.utils import require_minimum_pyarrow_version
+    from pyspark.sql.pandas.utils import require_minimum_pyarrow_version
     require_minimum_pyarrow_version()
 except ImportError as e:
     # If Arrow version requirement is not satisfied, skip related tests.
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index 2b42b898f9ede..cda902b6f44d4 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -19,6 +19,7 @@
 import struct
 import sys
 import unittest
+from time import time, sleep
 
 from pyspark import SparkContext, SparkConf
 
@@ -50,6 +51,45 @@ def write_int(i):
     return struct.pack("!i", i)
 
 
+def eventually(condition, timeout=30.0, catch_assertions=False):
+    """
+    Wait a given amount of time for a condition to pass, else fail with an error.
+    This is a helper utility for PySpark tests.
+
+    :param condition: Function that checks for termination conditions.
+                      condition() can return:
+                       - True: Conditions met. Return without error.
+                       - other value: Conditions not met yet. Continue. Upon timeout,
+                                      include last such value in error message.
+                      Note that this method may be called at any time during
+                      streaming execution (e.g., even before any results
+                      have been created).
+    :param timeout: Number of seconds to wait.  Default 30 seconds.
+    :param catch_assertions: If False (default), do not catch AssertionErrors.
+                             If True, catch AssertionErrors; continue, but save
+                             error to throw upon timeout.
+    """
+    start_time = time()
+    lastValue = None
+    while time() - start_time < timeout:
+        if catch_assertions:
+            try:
+                lastValue = condition()
+            except AssertionError as e:
+                lastValue = e
+        else:
+            lastValue = condition()
+        if lastValue is True:
+            return
+        sleep(0.01)
+    if isinstance(lastValue, AssertionError):
+        raise lastValue
+    else:
+        raise AssertionError(
+            "Test failed due to timeout after %g sec, with last condition returning: %s"
+            % (timeout, lastValue))
+
+
 class QuietTest(object):
     def __init__(self, sc):
         self.log4j = sc._jvm.org.apache.log4j
diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py
index 3f3150b0bd4ed..edfea42bed71d 100644
--- a/python/pyspark/tests/test_context.py
+++ b/python/pyspark/tests/test_context.py
@@ -214,6 +214,10 @@ def test_progress_api(self):
             rdd = sc.parallelize(range(10)).map(lambda x: time.sleep(100))
 
             def run():
+                # When thread is pinned, job group should be set for each thread for now.
+                # Local properties seem not being inherited like Scala side does.
+                if os.environ.get("PYSPARK_PIN_THREAD", "false").lower() == "true":
+                    sc.setJobGroup('test_progress_api', '', True)
                 try:
                     rdd.count()
                 except Exception:
@@ -271,9 +275,13 @@ def setUp(self):
         self.tempFile = tempfile.NamedTemporaryFile(delete=False)
         self.tempFile.write(b'echo {\\"name\\": \\"gpu\\", \\"addresses\\": [\\"0\\"]}')
         self.tempFile.close()
+        # create temporary directory for Worker resources coordination
+        self.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(self.tempdir.name)
         os.chmod(self.tempFile.name, stat.S_IRWXU | stat.S_IXGRP | stat.S_IRGRP |
                  stat.S_IROTH | stat.S_IXOTH)
         conf = SparkConf().set("spark.test.home", SPARK_HOME)
+        conf = conf.set("spark.resources.dir", self.tempdir.name)
         conf = conf.set("spark.driver.resource.gpu.amount", "1")
         conf = conf.set("spark.driver.resource.gpu.discoveryScript", self.tempFile.name)
         self.sc = SparkContext('local-cluster[2,1,1024]', class_name, conf=conf)
@@ -288,6 +296,7 @@ def test_resources(self):
 
     def tearDown(self):
         os.unlink(self.tempFile.name)
+        shutil.rmtree(self.tempdir.name)
         self.sc.stop()
 
 
diff --git a/python/pyspark/tests/test_pin_thread.py b/python/pyspark/tests/test_pin_thread.py
new file mode 100644
index 0000000000000..657d129fe63bb
--- /dev/null
+++ b/python/pyspark/tests/test_pin_thread.py
@@ -0,0 +1,156 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import time
+import random
+import threading
+import unittest
+
+from pyspark import SparkContext, SparkConf
+
+
+class PinThreadTests(unittest.TestCase):
+    # These tests are in a separate class because it uses
+    # 'PYSPARK_PIN_THREAD' environment variable to test thread pin feature.
+
+    @classmethod
+    def setUpClass(cls):
+        cls.old_pin_thread = os.environ.get("PYSPARK_PIN_THREAD")
+        os.environ["PYSPARK_PIN_THREAD"] = "true"
+        cls.sc = SparkContext('local[4]', cls.__name__, conf=SparkConf())
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.sc.stop()
+        if cls.old_pin_thread is not None:
+            os.environ["PYSPARK_PIN_THREAD"] = cls.old_pin_thread
+        else:
+            del os.environ["PYSPARK_PIN_THREAD"]
+
+    def test_pinned_thread(self):
+        threads = []
+        exceptions = []
+        property_name = "test_property_%s" % PinThreadTests.__name__
+        jvm_thread_ids = []
+
+        for i in range(10):
+            def test_local_property():
+                jvm_thread_id = self.sc._jvm.java.lang.Thread.currentThread().getId()
+                jvm_thread_ids.append(jvm_thread_id)
+
+                # If a property is set in this thread, later it should get the same property
+                # within this thread.
+                self.sc.setLocalProperty(property_name, str(i))
+
+                # 5 threads, 1 second sleep. 5 threads without a sleep.
+                time.sleep(i % 2)
+
+                try:
+                    assert self.sc.getLocalProperty(property_name) == str(i)
+
+                    # Each command might create a thread in multi-threading mode in Py4J.
+                    # This assert makes sure that the created thread is being reused.
+                    assert jvm_thread_id == self.sc._jvm.java.lang.Thread.currentThread().getId()
+                except Exception as e:
+                    exceptions.append(e)
+            threads.append(threading.Thread(target=test_local_property))
+
+        for t in threads:
+            t.start()
+
+        for t in threads:
+            t.join()
+
+        for e in exceptions:
+            raise e
+
+        # Created JVM threads should be 10 because Python thread are 10.
+        assert len(set(jvm_thread_ids)) == 10
+
+    def test_multiple_group_jobs(self):
+        # SPARK-22340 Add a mode to pin Python thread into JVM's
+
+        group_a = "job_ids_to_cancel"
+        group_b = "job_ids_to_run"
+
+        threads = []
+        thread_ids = range(4)
+        thread_ids_to_cancel = [i for i in thread_ids if i % 2 == 0]
+        thread_ids_to_run = [i for i in thread_ids if i % 2 != 0]
+
+        # A list which records whether job is cancelled.
+        # The index of the array is the thread index which job run in.
+        is_job_cancelled = [False for _ in thread_ids]
+
+        def run_job(job_group, index):
+            """
+            Executes a job with the group ``job_group``. Each job waits for 3 seconds
+            and then exits.
+            """
+            try:
+                self.sc.setJobGroup(job_group, "test rdd collect with setting job group")
+                self.sc.parallelize([15]).map(lambda x: time.sleep(x)).collect()
+                is_job_cancelled[index] = False
+            except Exception:
+                # Assume that exception means job cancellation.
+                is_job_cancelled[index] = True
+
+        # Test if job succeeded when not cancelled.
+        run_job(group_a, 0)
+        self.assertFalse(is_job_cancelled[0])
+
+        # Run jobs
+        for i in thread_ids_to_cancel:
+            t = threading.Thread(target=run_job, args=(group_a, i))
+            t.start()
+            threads.append(t)
+
+        for i in thread_ids_to_run:
+            t = threading.Thread(target=run_job, args=(group_b, i))
+            t.start()
+            threads.append(t)
+
+        # Wait to make sure all jobs are executed.
+        time.sleep(3)
+        # And then, cancel one job group.
+        self.sc.cancelJobGroup(group_a)
+
+        # Wait until all threads launching jobs are finished.
+        for t in threads:
+            t.join()
+
+        for i in thread_ids_to_cancel:
+            self.assertTrue(
+                is_job_cancelled[i],
+                "Thread {i}: Job in group A was not cancelled.".format(i=i))
+
+        for i in thread_ids_to_run:
+            self.assertFalse(
+                is_job_cancelled[i],
+                "Thread {i}: Job in group B did not succeeded.".format(i=i))
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.tests.test_pin_thread import *
+
+    try:
+        import xmlrunner
+        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index bff080362085f..15cc48ae2153d 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -14,11 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from datetime import datetime, timedelta
 import hashlib
 import os
 import random
 import sys
 import tempfile
+import time
 from glob import glob
 
 from py4j.protocol import Py4JJavaError
@@ -68,6 +70,26 @@ def test_to_localiterator(self):
         it2 = rdd2.toLocalIterator()
         self.assertEqual([1, 2, 3], sorted(it2))
 
+    def test_to_localiterator_prefetch(self):
+        # Test that we fetch the next partition in parallel
+        # We do this by returning the current time and:
+        # reading the first elem, waiting, and reading the second elem
+        # If not in parallel then these would be at different times
+        # But since they are being computed in parallel we see the time
+        # is "close enough" to the same.
+        rdd = self.sc.parallelize(range(2), 2)
+        times1 = rdd.map(lambda x: datetime.now())
+        times2 = rdd.map(lambda x: datetime.now())
+        times_iter_prefetch = times1.toLocalIterator(prefetchPartitions=True)
+        times_iter = times2.toLocalIterator(prefetchPartitions=False)
+        times_prefetch_head = next(times_iter_prefetch)
+        times_head = next(times_iter)
+        time.sleep(2)
+        times_next = next(times_iter)
+        times_prefetch_next = next(times_iter_prefetch)
+        self.assertTrue(times_next - times_head >= timedelta(seconds=2))
+        self.assertTrue(times_prefetch_next - times_prefetch_head < timedelta(seconds=1))
+
     def test_save_as_textfile_with_unicode(self):
         # Regression test for SPARK-970
         x = u"\u00A1Hola, mundo!"
@@ -681,8 +703,8 @@ def test_pipe_functions(self):
         data = ['1', '2', '3']
         rdd = self.sc.parallelize(data)
         with QuietTest(self.sc):
-            self.assertEqual([], rdd.pipe('cc').collect())
-            self.assertRaises(Py4JJavaError, rdd.pipe('cc', checkCode=True).collect)
+            self.assertEqual([], rdd.pipe('java').collect())
+            self.assertRaises(Py4JJavaError, rdd.pipe('java', checkCode=True).collect)
         result = rdd.pipe('cat').collect()
         result.sort()
         for x, y in zip(data, result):
diff --git a/python/pyspark/tests/test_rddbarrier.py b/python/pyspark/tests/test_rddbarrier.py
new file mode 100644
index 0000000000000..8534fb4abb876
--- /dev/null
+++ b/python/pyspark/tests/test_rddbarrier.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.testing.utils import ReusedPySparkTestCase
+
+
+class RDDBarrierTests(ReusedPySparkTestCase):
+    def test_map_partitions(self):
+        """Test RDDBarrier.mapPartitions"""
+        rdd = self.sc.parallelize(range(12), 4)
+        self.assertFalse(rdd._is_barrier())
+
+        rdd1 = rdd.barrier().mapPartitions(lambda it: it)
+        self.assertTrue(rdd1._is_barrier())
+
+    def test_map_partitions_with_index(self):
+        """Test RDDBarrier.mapPartitionsWithIndex"""
+        rdd = self.sc.parallelize(range(12), 4)
+        self.assertFalse(rdd._is_barrier())
+
+        def f(index, iterator):
+            yield index
+        rdd1 = rdd.barrier().mapPartitionsWithIndex(f)
+        self.assertTrue(rdd1._is_barrier())
+        self.assertEqual(rdd1.collect(), [0, 1, 2, 3])
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.tests.test_rddbarrier import *
+
+    try:
+        import xmlrunner
+        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/tests/test_taskcontext.py b/python/pyspark/tests/test_taskcontext.py
index 66c5f9f3c2fd9..68cfe814762e0 100644
--- a/python/pyspark/tests/test_taskcontext.py
+++ b/python/pyspark/tests/test_taskcontext.py
@@ -16,6 +16,7 @@
 #
 import os
 import random
+import shutil
 import stat
 import sys
 import tempfile
@@ -25,6 +26,9 @@
 from pyspark import SparkConf, SparkContext, TaskContext, BarrierTaskContext
 from pyspark.testing.utils import PySparkTestCase, SPARK_HOME
 
+if sys.version_info[0] >= 3:
+    xrange = range
+
 
 class TaskContextTests(PySparkTestCase):
 
@@ -146,6 +150,49 @@ def f(iterator):
         self.assertTrue(len(taskInfos) == 4)
         self.assertTrue(len(taskInfos[0]) == 4)
 
+    def test_context_get(self):
+        """
+        Verify that TaskContext.get() works both in or not in a barrier stage.
+        """
+        rdd = self.sc.parallelize(range(10), 4)
+
+        def f(iterator):
+            taskContext = TaskContext.get()
+            if isinstance(taskContext, BarrierTaskContext):
+                yield taskContext.partitionId() + 1
+            elif isinstance(taskContext, TaskContext):
+                yield taskContext.partitionId() + 2
+            else:
+                yield -1
+
+        # for normal stage
+        result1 = rdd.mapPartitions(f).collect()
+        self.assertTrue(result1 == [2, 3, 4, 5])
+        # for barrier stage
+        result2 = rdd.barrier().mapPartitions(f).collect()
+        self.assertTrue(result2 == [1, 2, 3, 4])
+
+    def test_barrier_context_get(self):
+        """
+        Verify that BarrierTaskContext.get() should only works in a barrier stage.
+        """
+        rdd = self.sc.parallelize(range(10), 4)
+
+        def f(iterator):
+            try:
+                taskContext = BarrierTaskContext.get()
+            except Exception:
+                yield -1
+            else:
+                yield taskContext.partitionId()
+
+        # for normal stage
+        result1 = rdd.mapPartitions(f).collect()
+        self.assertTrue(result1 == [-1, -1, -1, -1])
+        # for barrier stage
+        result2 = rdd.barrier().mapPartitions(f).collect()
+        self.assertTrue(result2 == [0, 1, 2, 3])
+
 
 class TaskContextTestsWithWorkerReuse(unittest.TestCase):
 
@@ -181,6 +228,45 @@ def context_barrier(x):
         for pid in pids:
             self.assertTrue(pid in worker_pids)
 
+    def test_task_context_correct_with_python_worker_reuse(self):
+        """Verify the task context correct when reused python worker"""
+        # start a normal job first to start all workers and get all worker pids
+        worker_pids = self.sc.parallelize(xrange(2), 2).map(lambda x: os.getpid()).collect()
+        # the worker will reuse in this barrier job
+        rdd = self.sc.parallelize(xrange(10), 2)
+
+        def context(iterator):
+            tp = TaskContext.get().partitionId()
+            try:
+                bp = BarrierTaskContext.get().partitionId()
+            except Exception:
+                bp = -1
+
+            yield (tp, bp, os.getpid())
+
+        # normal stage after normal stage
+        normal_result = rdd.mapPartitions(context).collect()
+        tps, bps, pids = zip(*normal_result)
+        print(tps)
+        self.assertTrue(tps == (0, 1))
+        self.assertTrue(bps == (-1, -1))
+        for pid in pids:
+            self.assertTrue(pid in worker_pids)
+        # barrier stage after normal stage
+        barrier_result = rdd.barrier().mapPartitions(context).collect()
+        tps, bps, pids = zip(*barrier_result)
+        self.assertTrue(tps == (0, 1))
+        self.assertTrue(bps == (0, 1))
+        for pid in pids:
+            self.assertTrue(pid in worker_pids)
+        # normal stage after barrier stage
+        normal_result2 = rdd.mapPartitions(context).collect()
+        tps, bps, pids = zip(*normal_result2)
+        self.assertTrue(tps == (0, 1))
+        self.assertTrue(bps == (-1, -1))
+        for pid in pids:
+            self.assertTrue(pid in worker_pids)
+
     def tearDown(self):
         self.sc.stop()
 
@@ -192,9 +278,13 @@ def setUp(self):
         self.tempFile = tempfile.NamedTemporaryFile(delete=False)
         self.tempFile.write(b'echo {\\"name\\": \\"gpu\\", \\"addresses\\": [\\"0\\"]}')
         self.tempFile.close()
+        # create temporary directory for Worker resources coordination
+        self.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(self.tempdir.name)
         os.chmod(self.tempFile.name, stat.S_IRWXU | stat.S_IXGRP | stat.S_IRGRP |
                  stat.S_IROTH | stat.S_IXOTH)
         conf = SparkConf().set("spark.test.home", SPARK_HOME)
+        conf = conf.set("spark.resources.dir", self.tempdir.name)
         conf = conf.set("spark.worker.resource.gpu.discoveryScript", self.tempFile.name)
         conf = conf.set("spark.worker.resource.gpu.amount", 1)
         conf = conf.set("spark.task.resource.gpu.amount", "1")
@@ -212,6 +302,7 @@ def test_resources(self):
 
     def tearDown(self):
         os.unlink(self.tempFile.name)
+        shutil.rmtree(self.tempdir.name)
         self.sc.stop()
 
 if __name__ == "__main__":
diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index 18fde17f4a064..9d7deb23da604 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -150,6 +151,20 @@ def test_with_different_versions_of_python(self):
         finally:
             self.sc.pythonVer = version
 
+    def test_python_exception_non_hanging(self):
+        # SPARK-21045: exceptions with no ascii encoding shall not hanging PySpark.
+        try:
+            def f():
+                raise Exception("exception with 中 and \xd6\xd0")
+
+            self.sc.parallelize([1]).map(lambda x: f()).count()
+        except Py4JJavaError as e:
+            if sys.version_info.major < 3:
+                # we have to use unicode here to avoid UnicodeDecodeError
+                self.assertRegexpMatches(unicode(e).encode("utf-8"), "exception with 中")
+            else:
+                self.assertRegexpMatches(str(e), "exception with 中")
+
 
 class WorkerReuseTest(PySparkTestCase):
 
@@ -168,7 +183,7 @@ def test_reuse_worker_of_parallelize_xrange(self):
 class WorkerMemoryTest(PySparkTestCase):
 
     def test_memory_limit(self):
-        self.sc._conf.set("spark.executor.pyspark.memory", "1m")
+        self.sc._conf.set("spark.executor.pyspark.memory", "2g")
         rdd = self.sc.parallelize(xrange(1), 1)
 
         def getrlimit():
@@ -179,8 +194,8 @@ def getrlimit():
         self.assertTrue(len(actual) == 1)
         self.assertTrue(len(actual[0]) == 2)
         [(soft_limit, hard_limit)] = actual
-        self.assertEqual(soft_limit, 1024 * 1024)
-        self.assertEqual(hard_limit, 1024 * 1024)
+        self.assertEqual(soft_limit, 2 * 1024 * 1024 * 1024)
+        self.assertEqual(hard_limit, 2 * 1024 * 1024 * 1024)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index d0ecd43ead5a6..93137560de25e 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -18,6 +18,9 @@
 
 import re
 import sys
+import traceback
+import os
+import warnings
 import inspect
 from py4j.protocol import Py4JJavaError
 
@@ -62,6 +65,11 @@ def _get_argspec(f):
     return argspec
 
 
+def print_exec(stream):
+    ei = sys.exc_info()
+    traceback.print_exception(ei[0], ei[1], ei[2], None, stream)
+
+
 class VersionUtils(object):
     """
     Provides utility method to determine Spark versions with given input string.
@@ -106,6 +114,33 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
+def _warn_pin_thread(name):
+    if os.environ.get("PYSPARK_PIN_THREAD", "false").lower() == "true":
+        msg = (
+            "PYSPARK_PIN_THREAD feature is enabled. "
+            "However, note that it cannot inherit the local properties from the parent thread "
+            "although it isolates each thread on PVM and JVM with its own local properties. "
+            "\n"
+            "To work around this, you should manually copy and set the local properties from "
+            "the parent thread to the child thread when you create another thread.")
+    else:
+        msg = (
+            "Currently, '%s' (set to local properties) with multiple threads does "
+            "not properly work. "
+            "\n"
+            "Internally threads on PVM and JVM are not synced, and JVM thread can be reused "
+            "for multiple threads on PVM, which fails to isolate local properties for each "
+            "thread on PVM. "
+            "\n"
+            "To work around this, you can set PYSPARK_PIN_THREAD to true (see SPARK-22340). "
+            "However, note that it cannot inherit the local properties from the parent thread "
+            "although it isolates each thread on PVM and JVM with its own local properties. "
+            "\n"
+            "To work around this, you should manually copy and set the local properties from "
+            "the parent thread to the child thread when you create another thread." % name)
+    warnings.warn(msg, UserWarning)
+
+
 def _print_missing_jar(lib_name, pkg_name, jar_name, spark_version):
     print("""
 ________________________________________________________________________________________________
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index ba2a40cec01e6..1abc41279ebe8 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 7f38c27360ed9..5d498421e259d 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -39,8 +39,10 @@
 from pyspark.rdd import PythonEvalType
 from pyspark.serializers import write_with_length, write_int, read_long, read_bool, \
     write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer, \
-    BatchedSerializer, ArrowStreamPandasUDFSerializer
-from pyspark.sql.types import to_arrow_type, StructType
+    BatchedSerializer
+from pyspark.sql.pandas.serializers import ArrowStreamPandasUDFSerializer, CogroupUDFSerializer
+from pyspark.sql.pandas.types import to_arrow_type
+from pyspark.sql.types import StructType
 from pyspark.util import _get_argspec, fail_on_stopiteration
 from pyspark import shuffle
 
@@ -121,6 +123,33 @@ def verify_result_type(result):
                                  map(verify_result_type, f(*iterator)))
 
 
+def wrap_cogrouped_map_pandas_udf(f, return_type, argspec):
+
+    def wrapped(left_key_series, left_value_series, right_key_series, right_value_series):
+        import pandas as pd
+
+        left_df = pd.concat(left_value_series, axis=1)
+        right_df = pd.concat(right_value_series, axis=1)
+
+        if len(argspec.args) == 2:
+            result = f(left_df, right_df)
+        elif len(argspec.args) == 3:
+            key_series = left_key_series if not left_df.empty else right_key_series
+            key = tuple(s[0] for s in key_series)
+            result = f(key, left_df, right_df)
+        if not isinstance(result, pd.DataFrame):
+            raise TypeError("Return type of the user-defined function should be "
+                            "pandas.DataFrame, but is {}".format(type(result)))
+        if not len(result.columns) == len(return_type):
+            raise RuntimeError(
+                "Number of columns of the returned pandas.DataFrame "
+                "doesn't match specified schema. "
+                "Expected: {} Actual: {}".format(len(return_type), len(result.columns)))
+        return result
+
+    return lambda kl, vl, kr, vr: [(wrapped(kl, vl, kr, vr), to_arrow_type(return_type))]
+
+
 def wrap_grouped_map_pandas_udf(f, return_type, argspec):
 
     def wrapped(key_series, value_series):
@@ -244,6 +273,9 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index):
     elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
         argspec = _get_argspec(chained_func)  # signature was lost when wrapping it
         return arg_offsets, wrap_grouped_map_pandas_udf(func, return_type, argspec)
+    elif eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+        argspec = _get_argspec(chained_func)  # signature was lost when wrapping it
+        return arg_offsets, wrap_cogrouped_map_pandas_udf(func, return_type, argspec)
     elif eval_type == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
         return arg_offsets, wrap_grouped_agg_pandas_udf(func, return_type)
     elif eval_type == PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF:
@@ -258,6 +290,7 @@ def read_udfs(pickleSer, infile, eval_type):
     runner_conf = {}
 
     if eval_type in (PythonEvalType.SQL_SCALAR_PANDAS_UDF,
+                     PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
                      PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
                      PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
                      PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
@@ -280,13 +313,16 @@ def read_udfs(pickleSer, infile, eval_type):
             "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName", "true")\
             .lower() == "true"
 
-        # Scalar Pandas UDF handles struct type arguments as pandas DataFrames instead of
-        # pandas Series. See SPARK-27240.
-        df_for_struct = (eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF or
-                         eval_type == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF or
-                         eval_type == PythonEvalType.SQL_MAP_PANDAS_ITER_UDF)
-        ser = ArrowStreamPandasUDFSerializer(timezone, safecheck, assign_cols_by_name,
-                                             df_for_struct)
+        if eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+            ser = CogroupUDFSerializer(timezone, safecheck, assign_cols_by_name)
+        else:
+            # Scalar Pandas UDF handles struct type arguments as pandas DataFrames instead of
+            # pandas Series. See SPARK-27240.
+            df_for_struct = (eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF or
+                             eval_type == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF or
+                             eval_type == PythonEvalType.SQL_MAP_PANDAS_ITER_UDF)
+            ser = ArrowStreamPandasUDFSerializer(timezone, safecheck, assign_cols_by_name,
+                                                 df_for_struct)
     else:
         ser = BatchedSerializer(PickleSerializer(), 100)
 
@@ -343,40 +379,76 @@ def map_batch(batch):
         # profiling is not supported for UDF
         return func, None, ser, ser
 
-    udfs = {}
-    call_udf = []
-    mapper_str = ""
-    if eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
-        # Create function like this:
-        #   lambda a: f([a[0]], [a[0], a[1]])
+    def extract_key_value_indexes(grouped_arg_offsets):
+        """
+        Helper function to extract the key and value indexes from arg_offsets for the grouped and
+        cogrouped pandas udfs. See BasePandasGroupExec.resolveArgOffsets for equivalent scala code.
+
+        :param grouped_arg_offsets:  List containing the key and value indexes of columns of the
+            DataFrames to be passed to the udf. It consists of n repeating groups where n is the
+            number of DataFrames.  Each group has the following format:
+                group[0]: length of group
+                group[1]: length of key indexes
+                group[2.. group[1] +2]: key attributes
+                group[group[1] +3 group[0]]: value attributes
+        """
+        parsed = []
+        idx = 0
+        while idx < len(grouped_arg_offsets):
+            offsets_len = grouped_arg_offsets[idx]
+            idx += 1
+            offsets = grouped_arg_offsets[idx: idx + offsets_len]
+            split_index = offsets[0] + 1
+            offset_keys = offsets[1: split_index]
+            offset_values = offsets[split_index:]
+            parsed.append([offset_keys, offset_values])
+            idx += offsets_len
+        return parsed
 
+    if eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
         # We assume there is only one UDF here because grouped map doesn't
         # support combining multiple UDFs.
         assert num_udfs == 1
 
         # See FlatMapGroupsInPandasExec for how arg_offsets are used to
         # distinguish between grouping attributes and data attributes
-        arg_offsets, udf = read_single_udf(
-            pickleSer, infile, eval_type, runner_conf, udf_index=0)
-        udfs['f'] = udf
-        split_offset = arg_offsets[0] + 1
-        arg0 = ["a[%d]" % o for o in arg_offsets[1: split_offset]]
-        arg1 = ["a[%d]" % o for o in arg_offsets[split_offset:]]
-        mapper_str = "lambda a: f([%s], [%s])" % (", ".join(arg0), ", ".join(arg1))
-    else:
+        arg_offsets, f = read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index=0)
+        parsed_offsets = extract_key_value_indexes(arg_offsets)
+
         # Create function like this:
-        #   lambda a: (f0(a[0]), f1(a[1], a[2]), f2(a[3]))
-        # In the special case of a single UDF this will return a single result rather
-        # than a tuple of results; this is the format that the JVM side expects.
+        #   mapper a: f([a[0]], [a[0], a[1]])
+        def mapper(a):
+            keys = [a[o] for o in parsed_offsets[0][0]]
+            vals = [a[o] for o in parsed_offsets[0][1]]
+            return f(keys, vals)
+    elif eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+        # We assume there is only one UDF here because cogrouped map doesn't
+        # support combining multiple UDFs.
+        assert num_udfs == 1
+        arg_offsets, f = read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index=0)
+
+        parsed_offsets = extract_key_value_indexes(arg_offsets)
+
+        def mapper(a):
+            df1_keys = [a[0][o] for o in parsed_offsets[0][0]]
+            df1_vals = [a[0][o] for o in parsed_offsets[0][1]]
+            df2_keys = [a[1][o] for o in parsed_offsets[1][0]]
+            df2_vals = [a[1][o] for o in parsed_offsets[1][1]]
+            return f(df1_keys, df1_vals, df2_keys, df2_vals)
+    else:
+        udfs = []
         for i in range(num_udfs):
-            arg_offsets, udf = read_single_udf(
-                pickleSer, infile, eval_type, runner_conf, udf_index=i)
-            udfs['f%d' % i] = udf
-            args = ["a[%d]" % o for o in arg_offsets]
-            call_udf.append("f%d(%s)" % (i, ", ".join(args)))
-        mapper_str = "lambda a: (%s)" % (", ".join(call_udf))
-
-    mapper = eval(mapper_str, udfs)
+            udfs.append(read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index=i))
+
+        def mapper(a):
+            result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs)
+            # In the special case of a single UDF this will return a single result rather
+            # than a tuple of results; this is the format that the JVM side expects.
+            if len(result) == 1:
+                return result[0]
+            else:
+                return result
+
     func = lambda _, it: map(mapper, it)
 
     # profiling is not supported for UDF
@@ -429,6 +501,9 @@ def main(infile, outfile):
         if isBarrier:
             taskContext = BarrierTaskContext._getOrCreate()
             BarrierTaskContext._initialize(boundPort, secret)
+            # Set the task context instance here, so we can get it by TaskContext.get for
+            # both TaskContext and BarrierTaskContext
+            TaskContext._setTaskContext(taskContext)
         else:
             taskContext = TaskContext._getOrCreate()
         # read inputs for TaskContext info
@@ -522,10 +597,22 @@ def process():
             profiler.profile(process)
         else:
             process()
+
+        # Reset task context to None. This is a guard code to avoid residual context when worker
+        # reuse.
+        TaskContext._setTaskContext(None)
+        BarrierTaskContext._setTaskContext(None)
     except Exception:
         try:
+            exc_info = traceback.format_exc()
+            if isinstance(exc_info, bytes):
+                # exc_info may contains other encoding bytes, replace the invalid bytes and convert
+                # it back to utf-8 again
+                exc_info = exc_info.decode("utf-8", "replace").encode("utf-8")
+            else:
+                exc_info = exc_info.encode("utf-8")
             write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
-            write_with_length(traceback.format_exc().encode("utf-8"), outfile)
+            write_with_length(exc_info, outfile)
         except IOError:
             # JVM close the socket
             pass
diff --git a/python/run-tests b/python/run-tests
index 24949657ed7ab..b8c64d8a295a2 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -21,4 +21,10 @@
 FWDIR="$(cd "`dirname $0`"/..; pwd)"
 cd "$FWDIR"
 
-exec python -u ./python/run-tests.py "$@"
+PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))')
+if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then
+  echo "Python versions prior to 3.6 are not supported."
+  exit -1
+fi
+
+exec python3 -u ./python/run-tests.py "$@"
diff --git a/python/run-tests.py b/python/run-tests.py
index 3ad661800b908..b677a5134ec93 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,7 +17,6 @@
 # limitations under the License.
 #
 
-from __future__ import print_function
 import logging
 from argparse import ArgumentParser
 import os
@@ -75,7 +74,8 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
         'SPARK_TESTING': '1',
         'SPARK_PREPEND_CLASSES': '1',
         'PYSPARK_PYTHON': which(pyspark_python),
-        'PYSPARK_DRIVER_PYTHON': which(pyspark_python)
+        'PYSPARK_DRIVER_PYTHON': which(pyspark_python),
+        'PYSPARK_ROW_FIELD_SORTING_ENABLED': 'true'
     })
 
     # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is
@@ -87,9 +87,10 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
     env["TMPDIR"] = tmp_dir
 
     # Also override the JVM's temp directory by setting driver and executor options.
+    java_options = "-Djava.io.tmpdir={0} -Dio.netty.tryReflectionSetAccessible=true".format(tmp_dir)
     spark_args = [
-        "--conf", "spark.driver.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir),
-        "--conf", "spark.executor.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir),
+        "--conf", "spark.driver.extraJavaOptions='{0}'".format(java_options),
+        "--conf", "spark.executor.extraJavaOptions='{0}'".format(java_options),
         "pyspark-shell"
     ]
     env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args)
@@ -117,7 +118,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
                     log_file.writelines(per_test_output)
                 per_test_output.seek(0)
                 for line in per_test_output:
-                    decoded_line = line.decode()
+                    decoded_line = line.decode("utf-8", "replace")
                     if not re.match('[0-9]+', decoded_line):
                         print(decoded_line, end='')
                 per_test_output.close()
@@ -134,7 +135,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
             per_test_output.seek(0)
             # Here expects skipped test output from unittest when verbosity level is
             # 2 (or --verbose option is enabled).
-            decoded_lines = map(lambda line: line.decode(), iter(per_test_output))
+            decoded_lines = map(lambda line: line.decode("utf-8", "replace"), iter(per_test_output))
             skipped_tests = list(filter(
                 lambda line: re.search(r'test_.* \(pyspark\..*\) ... (skip|SKIP)', line),
                 decoded_lines))
@@ -160,11 +161,15 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
 
 
 def get_default_python_executables():
-    python_execs = [x for x in ["python2.7", "python3.6", "pypy"] if which(x)]
-    if "python2.7" not in python_execs:
-        LOGGER.warning("Not testing against `python2.7` because it could not be found; falling"
-                       " back to `python` instead")
-        python_execs.insert(0, "python")
+    python_execs = [x for x in ["python3.6", "python2.7", "pypy"] if which(x)]
+
+    if "python3.6" not in python_execs:
+        p = which("python3")
+        if not p:
+            LOGGER.error("No python3 executable found.  Exiting!")
+            os._exit(1)
+        else:
+            python_execs.insert(0, p)
     return python_execs
 
 
@@ -262,8 +267,8 @@ def main():
         python_implementation = subprocess_check_output(
             [python_exec, "-c", "import platform; print(platform.python_implementation())"],
             universal_newlines=True).strip()
-        LOGGER.debug("%s python_implementation is %s", python_exec, python_implementation)
-        LOGGER.debug("%s version is: %s", python_exec, subprocess_check_output(
+        LOGGER.info("%s python_implementation is %s", python_exec, python_implementation)
+        LOGGER.info("%s version is: %s", python_exec, subprocess_check_output(
             [python_exec, "--version"], stderr=subprocess.STDOUT, universal_newlines=True).strip())
         if should_test_modules:
             for module in modules_to_test:
diff --git a/python/setup.py b/python/setup.py
old mode 100644
new mode 100755
index ee5c32683efae..40b49aaeeb27c
--- a/python/setup.py
+++ b/python/setup.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
@@ -16,15 +16,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from __future__ import print_function
 import glob
 import os
 import sys
 from setuptools import setup
 from shutil import copyfile, copytree, rmtree
 
-if sys.version_info < (2, 7):
-    print("Python versions prior to 2.7 are not supported for pip installed PySpark.",
+if sys.version_info < (3, 6):
+    print("Python versions prior to 3.6 are not supported for pip installed PySpark.",
           file=sys.stderr)
     sys.exit(-1)
 
@@ -106,7 +105,7 @@ def _supports_symlinks():
 # For Arrow, you should also check ./pom.xml and ensure there are no breaking changes in the
 # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications.
 _minimum_pandas_version = "0.23.2"
-_minimum_pyarrow_version = "0.12.1"
+_minimum_pyarrow_version = "0.15.1"
 
 try:
     # We copy the shell script to be under pyspark/python/pyspark so that the launcher scripts
@@ -154,21 +153,15 @@ def _supports_symlinks():
     # will search for SPARK_HOME with Python.
     scripts.append("pyspark/find_spark_home.py")
 
-    # Parse the README markdown file into rst for PyPI
-    long_description = "!!!!! missing pandoc do not upload to PyPI !!!!"
-    try:
-        import pypandoc
-        long_description = pypandoc.convert('README.md', 'rst')
-    except ImportError:
-        print("Could not import pypandoc - required to package PySpark", file=sys.stderr)
-    except OSError:
-        print("Could not convert - pandoc is not installed", file=sys.stderr)
+    with open('README.md') as f:
+        long_description = f.read()
 
     setup(
         name='pyspark',
         version=VERSION,
         description='Apache Spark Python API',
         long_description=long_description,
+        long_description_content_type="text/markdown",
         author='Spark Developers',
         author_email='dev@spark.apache.org',
         url='https://github.com/apache/spark/tree/master/python',
@@ -180,6 +173,8 @@ def _supports_symlinks():
                   'pyspark.ml.linalg',
                   'pyspark.ml.param',
                   'pyspark.sql',
+                  'pyspark.sql.avro',
+                  'pyspark.sql.pandas',
                   'pyspark.streaming',
                   'pyspark.bin',
                   'pyspark.sbin',
@@ -212,7 +207,6 @@ def _supports_symlinks():
         scripts=scripts,
         license='http://www.apache.org/licenses/LICENSE-2.0',
         install_requires=['py4j==0.10.8.1'],
-        setup_requires=['pypandoc'],
         extras_require={
             'ml': ['numpy>=1.7'],
             'mllib': ['numpy>=1.7'],
@@ -230,6 +224,7 @@ def _supports_symlinks():
             'Programming Language :: Python :: 3.5',
             'Programming Language :: Python :: 3.6',
             'Programming Language :: Python :: 3.7',
+            'Programming Language :: Python :: 3.8',
             'Programming Language :: Python :: Implementation :: CPython',
             'Programming Language :: Python :: Implementation :: PyPy']
     )
diff --git a/repl/pom.xml b/repl/pom.xml
index c7de67e41ca94..3aee53cc9599a 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -32,8 +32,6 @@
 
   <properties>
     <sbt.project.name>repl</sbt.project.name>
-    <extra.source.dir>src/main/scala-${scala.binary.version}</extra.source.dir>
-    <extra.testsource.dir>src/test/scala-${scala.binary.version}</extra.testsource.dir>
   </properties>
 
   <dependencies>
@@ -146,7 +144,7 @@
             </goals>
             <configuration>
               <sources>
-                <source>${extra.source.dir}</source>
+                <source>src/main/scala-${scala.binary.version}</source>
               </sources>
             </configuration>
           </execution>
@@ -158,7 +156,7 @@
             </goals>
             <configuration>
               <sources>
-                <source>${extra.testsource.dir}</source>
+                <source>src/test/scala-${scala.binary.version}</source>
               </sources>
             </configuration>
           </execution>
diff --git a/repl/src/main/scala/org/apache/spark/repl/Main.scala b/repl/src/main/scala/org/apache/spark/repl/Main.scala
index e4ddcef9772e4..a68b112ed2b96 100644
--- a/repl/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/Main.scala
@@ -53,7 +53,7 @@ object Main extends Logging {
     // scalastyle:on println
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     isShellSession = true
     doMain(args, new SparkILoop)
   }
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index ed3c672e7dafa..92984ed45f828 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -89,7 +89,7 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter)
   }
 
   /** Print a welcome message */
-  override def printWelcome() {
+  override def printWelcome(): Unit = {
     import org.apache.spark.SPARK_VERSION
     echo("""Welcome to
       ____              __
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 1d2e8acff9a3a..5428fa4ee9df7 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -36,7 +36,7 @@ import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfterAll
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.TestUtils.JavaSourceFromString
@@ -58,7 +58,7 @@ class ExecutorClassLoaderSuite
   var url1: String = _
   var urls2: Array[URL] = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     tempDir1 = Utils.createTempDir()
     tempDir2 = Utils.createTempDir()
@@ -71,7 +71,7 @@ class ExecutorClassLoaderSuite
     parentClassNames.foreach(TestUtils.createCompiledClass(_, tempDir2, "2"))
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(tempDir1)
       Utils.deleteRecursively(tempDir2)
@@ -272,7 +272,8 @@ class ExecutorClassLoaderSuite
         assert(e.getMessage.contains("ThisIsAClassName"))
         // RemoteClassLoaderError must not be LinkageError nor ClassNotFoundException. Otherwise,
         // JVM will cache it and doesn't retry to load a class.
-        assert(!e.isInstanceOf[LinkageError] && !e.isInstanceOf[ClassNotFoundException])
+        assert(!(classOf[LinkageError].isAssignableFrom(e.getClass)))
+        assert(!(classOf[ClassNotFoundException].isAssignableFrom(e.getClass)))
       } finally {
         rpcEnv.shutdown()
         rpcEnv.awaitTermination()
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 38e3fc4f93ae0..1e92b36c336d8 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -18,13 +18,15 @@
 package org.apache.spark.repl
 
 import java.io._
+import java.nio.file.Files
 
 import scala.tools.nsc.interpreter.SimpleReader
 
-import org.apache.log4j.{Level, LogManager}
+import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 
@@ -72,13 +74,13 @@ class ReplSuite extends SparkFunSuite with BeforeAndAfterAll {
   def runInterpreterInPasteMode(master: String, input: String): String =
     runInterpreter(master, ":paste\n" + input + 4.toChar) // 4 is the ascii code of CTRL + D
 
-  def assertContains(message: String, output: String) {
+  def assertContains(message: String, output: String): Unit = {
     val isContain = output.contains(message)
     assert(isContain,
       "Interpreter output did not contain '" + message + "':\n" + output)
   }
 
-  def assertDoesNotContain(message: String, output: String) {
+  def assertDoesNotContain(message: String, output: String): Unit = {
     val isContain = output.contains(message)
     assert(!isContain,
       "Interpreter output contained '" + message + "':\n" + output)
@@ -297,4 +299,114 @@ class ReplSuite extends SparkFunSuite with BeforeAndAfterAll {
     assertContains("successful", output)
   }
 
+  test("SPARK-30167: Log4j configuration for REPL should override root logger properly") {
+    val testConfiguration =
+      """
+        |# Set everything to be logged to the console
+        |log4j.rootCategory=INFO, console
+        |log4j.appender.console=org.apache.log4j.ConsoleAppender
+        |log4j.appender.console.target=System.err
+        |log4j.appender.console.layout=org.apache.log4j.PatternLayout
+        |log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+        |
+        |# Set the log level for this class to WARN same as the default setting.
+        |log4j.logger.org.apache.spark.repl.Main=ERROR
+        |""".stripMargin
+
+    val log4jprops = Files.createTempFile("log4j.properties.d", "log4j.properties")
+    Files.write(log4jprops, testConfiguration.getBytes)
+
+    val originalRootLogger = LogManager.getRootLogger
+    val originalRootAppender = originalRootLogger.getAppender("file")
+    val originalStderr = System.err
+    val originalReplThresholdLevel = Logging.sparkShellThresholdLevel
+
+    val replLoggerLogMessage = "Log level for REPL: "
+    val warnLogMessage1 = "warnLogMessage1 should not be output"
+    val errorLogMessage1 = "errorLogMessage1 should be output"
+    val infoLogMessage1 = "infoLogMessage2 should be output"
+    val infoLogMessage2 = "infoLogMessage3 should be output"
+
+    val out = try {
+      PropertyConfigurator.configure(log4jprops.toAbsolutePath.toString)
+
+      // Re-initialization is needed to set SparkShellLoggingFilter to ConsoleAppender
+      Main.initializeForcefully(true, false)
+      runInterpreter("local",
+        s"""
+           |import java.io.{ByteArrayOutputStream, PrintStream}
+           |
+           |import org.apache.log4j.{ConsoleAppender, Level, LogManager}
+           |
+           |val replLogger = LogManager.getLogger("${Main.getClass.getName.stripSuffix("$")}")
+           |
+           |// Log level for REPL is expected to be ERROR
+           |"$replLoggerLogMessage" + replLogger.getLevel()
+           |
+           |val bout = new ByteArrayOutputStream()
+           |
+           |// Configure stderr to let log messages output to ByteArrayOutputStream.
+           |val defaultErrStream: PrintStream = System.err
+           |try {
+           |  System.setErr(new PrintStream(bout))
+           |
+           |  // Reconfigure ConsoleAppender to reflect the stderr setting.
+           |  val consoleAppender =
+           |    LogManager.getRootLogger.getAllAppenders.nextElement.asInstanceOf[ConsoleAppender]
+           |  consoleAppender.activateOptions()
+           |
+           |  // customLogger1 is not explicitly configured neither its log level nor appender
+           |  // so this inherits the settings of rootLogger
+           |  // but ConsoleAppender can use a different log level.
+           |  val customLogger1 = LogManager.getLogger("customLogger1")
+           |  customLogger1.warn("$warnLogMessage1")
+           |  customLogger1.error("$errorLogMessage1")
+           |
+           |  // customLogger2 is explicitly configured its log level as INFO
+           |  // so info level messages logged via customLogger2 should be output.
+           |  val customLogger2 = LogManager.getLogger("customLogger2")
+           |  customLogger2.setLevel(Level.INFO)
+           |  customLogger2.info("$infoLogMessage1")
+           |
+           |  // customLogger2 is explicitly configured its log level
+           |  // so its child should inherit the settings.
+           |  val customLogger3 = LogManager.getLogger("customLogger2.child")
+           |  customLogger3.info("$infoLogMessage2")
+           |
+           |  // echo log messages
+           |  bout.toString
+           |} finally {
+           |  System.setErr(defaultErrStream)
+           |}
+           |""".stripMargin)
+    } finally {
+      // Restore log4j settings for this suite
+      val log4jproperties = Thread.currentThread()
+        .getContextClassLoader.getResource("log4j.properties")
+      LogManager.resetConfiguration()
+      PropertyConfigurator.configure(log4jproperties)
+      Logging.sparkShellThresholdLevel = originalReplThresholdLevel
+    }
+
+    // Ensure stderr configuration is successfully restored.
+    assert(originalStderr eq System.err)
+
+    // Ensure log4j settings are successfully restored.
+    val restoredRootLogger = LogManager.getRootLogger
+    val restoredRootAppender = restoredRootLogger.getAppender("file")
+    assert(originalRootAppender.getClass == restoredRootAppender.getClass)
+    assert(originalRootLogger.getLevel == restoredRootLogger.getLevel)
+
+    // Ensure loggers added in this test case are successfully removed.
+    assert(LogManager.getLogger("customLogger2").getLevel == null)
+    assert(LogManager.getLogger("customLogger2.child").getLevel == null)
+
+    // Ensure log level threshold for REPL is ERROR.
+    assertContains(replLoggerLogMessage + "ERROR", out)
+
+    assertDoesNotContain(warnLogMessage1, out)
+    assertContains(errorLogMessage1, out)
+    assertContains(infoLogMessage1, out)
+    assertContains(infoLogMessage2, out)
+  }
 }
diff --git a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
index 5b0ad4249754a..4795306692f7a 100644
--- a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
@@ -95,13 +95,13 @@ class SingletonReplSuite extends SparkFunSuite {
     out.getBuffer.substring(currentOffset)
   }
 
-  def assertContains(message: String, output: String) {
+  def assertContains(message: String, output: String): Unit = {
     val isContain = output.contains(message)
     assert(isContain,
       "Interpreter output did not contain '" + message + "':\n" + output)
   }
 
-  def assertDoesNotContain(message: String, output: String) {
+  def assertDoesNotContain(message: String, output: String): Unit = {
     val isContain = output.contains(message)
     assert(!isContain,
       "Interpreter output contained '" + message + "':\n" + output)
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 952dbd31989dd..f8c6b38225559 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -29,7 +29,7 @@
   <name>Spark Project Kubernetes</name>
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
-    <kubernetes.client.version>4.4.2</kubernetes.client.version>
+    <kubernetes.client.version>4.7.1</kubernetes.client.version>
   </properties>
 
   <dependencies>
@@ -88,12 +88,6 @@
     </dependency>
     <!-- End of shaded deps. -->
 
-    <dependency>
-      <groupId>com.squareup.okhttp3</groupId>
-      <artifactId>okhttp</artifactId>
-      <version>3.8.1</version>
-    </dependency>
-
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 54afe92e81567..46fd8e7be2f1d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -36,6 +36,13 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
+  val KUBERNETES_DRIVER_MASTER_URL =
+    ConfigBuilder("spark.kubernetes.driver.master")
+      .doc("The internal Kubernetes master (API server) address " +
+        "to be used for driver to request executors.")
+      .stringConf
+      .createWithDefault(KUBERNETES_MASTER_INTERNAL_URL)
+
   val KUBERNETES_NAMESPACE =
     ConfigBuilder("spark.kubernetes.namespace")
       .doc("The namespace that will be used for running the driver and executor pods.")
@@ -109,7 +116,7 @@ private[spark] object Config extends Logging {
       .intConf
       .createWithDefault(10000)
 
-  val KUBERNETES_SERVICE_ACCOUNT_NAME =
+  val KUBERNETES_DRIVER_SERVICE_ACCOUNT_NAME =
     ConfigBuilder(s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.serviceAccountName")
       .doc("Service account that is used when running the driver pod. The driver pod uses " +
         "this service account when requesting executor pods from the API server. If specific " +
@@ -118,6 +125,13 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
+  val KUBERNETES_EXECUTOR_SERVICE_ACCOUNT_NAME =
+    ConfigBuilder(s"$KUBERNETES_AUTH_EXECUTOR_CONF_PREFIX.serviceAccountName")
+      .doc("Service account that is used when running the executor pod." +
+        "If this parameter is not setup, the fallback logic will use the driver's service account.")
+      .stringConf
+      .createOptional
+
   val KUBERNETES_DRIVER_LIMIT_CORES =
     ConfigBuilder("spark.kubernetes.driver.limit.cores")
       .doc("Specify the hard cpu limit for the driver pod")
@@ -142,6 +156,12 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
+  val KUBERNETES_EXECUTOR_SCHEDULER_NAME =
+    ConfigBuilder("spark.kubernetes.executor.scheduler.name")
+      .doc("Specify the scheduler name for each executor pod")
+      .stringConf
+      .createOptional
+
   val KUBERNETES_EXECUTOR_REQUEST_CORES =
     ConfigBuilder("spark.kubernetes.executor.request.cores")
       .doc("Specify the cpu request for each executor pod")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index a2a46614fb8f8..f42f3415baa15 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -24,6 +24,7 @@ import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
+import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.util.Utils
 
@@ -54,6 +55,9 @@ private[spark] abstract class KubernetesConf(val sparkConf: SparkConf) {
       }
   }
 
+  def workerDecommissioning: Boolean =
+    sparkConf.get(org.apache.spark.internal.config.Worker.WORKER_DECOMMISSION_ENABLED)
+
   def nodeSelector: Map[String, String] =
     KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_NODE_SELECTOR_PREFIX)
 
@@ -123,7 +127,7 @@ private[spark] class KubernetesExecutorConf(
     val appId: String,
     val executorId: String,
     val driverPod: Option[Pod])
-  extends KubernetesConf(sparkConf) {
+  extends KubernetesConf(sparkConf) with Logging {
 
   override val resourceNamePrefix: String = {
     get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX).getOrElse(
@@ -148,7 +152,8 @@ private[spark] class KubernetesExecutorConf(
     executorCustomLabels ++ presetLabels
   }
 
-  override def environment: Map[String, String] = sparkConf.getExecutorEnv.toMap
+  override def environment: Map[String, String] = sparkConf.getExecutorEnv.filter(
+    p => checkExecutorEnvKey(p._1)).toMap
 
   override def annotations: Map[String, String] = {
     KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_EXECUTOR_ANNOTATION_PREFIX)
@@ -166,6 +171,20 @@ private[spark] class KubernetesExecutorConf(
     KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, KUBERNETES_EXECUTOR_VOLUMES_PREFIX)
   }
 
+  private def checkExecutorEnvKey(key: String): Boolean = {
+    // Pattern for matching an executorEnv key, which meets certain naming rules.
+    val executorEnvRegex = "[-._a-zA-Z][-._a-zA-Z0-9]*".r
+    if (executorEnvRegex.pattern.matcher(key).matches()) {
+      true
+    } else {
+      logWarning(s"Invalid key: $key: " +
+        "a valid environment variable name must consist of alphabetic characters, " +
+        "digits, '_', '-', or '.', and must not start with a digit." +
+        s"Regex used for validation is '$executorEnvRegex')")
+      false
+    }
+  }
+
 }
 
 private[spark] object KubernetesConf {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index b1b7751b012e1..c49f4a15de974 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -23,7 +23,7 @@ import java.util.UUID
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, Pod, PodBuilder, Quantity, QuantityBuilder}
+import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, Pod, PodBuilder, Quantity}
 import io.fabric8.kubernetes.client.KubernetesClient
 import org.apache.commons.codec.binary.Hex
 import org.apache.hadoop.fs.{FileSystem, Path}
@@ -228,11 +228,13 @@ private[spark] object KubernetesUtils extends Logging {
       sparkConf: SparkConf): Map[String, Quantity] = {
     val requests = ResourceUtils.parseAllResourceRequests(sparkConf, componentName)
     requests.map { request =>
-      val vendorDomain = request.vendor.getOrElse(throw new SparkException("Resource: " +
-        s"${request.id.resourceName} was requested, but vendor was not specified."))
-      val quantity = new QuantityBuilder(false)
-        .withAmount(request.amount.toString)
-        .build()
+      val vendorDomain = if (request.vendor.isPresent()) {
+        request.vendor.get()
+      } else {
+        throw new SparkException(s"Resource: ${request.id.resourceName} was requested, " +
+          "but vendor was not specified.")
+      }
+      val quantity = new Quantity(request.amount.toString)
       (KubernetesConf.buildKubernetesResourceName(vendorDomain, request.id.resourceName), quantity)
     }.toMap
   }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 1944ba9766cff..eec275e6e6f23 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -70,7 +70,9 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
   private val driverMemoryWithOverheadMiB = driverMemoryMiB + memoryOverheadMiB
 
   override def configurePod(pod: SparkPod): SparkPod = {
-    val driverCustomEnvs = conf.environment.toSeq
+    val driverCustomEnvs = (Seq(
+      (ENV_APPLICATION_ID, conf.appId)
+    ) ++ conf.environment)
       .map { env =>
         new EnvVarBuilder()
           .withName(env._1)
@@ -78,14 +80,10 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
           .build()
       }
 
-    val driverCpuQuantity = new QuantityBuilder(false)
-      .withAmount(driverCoresRequest)
-      .build()
-    val driverMemoryQuantity = new QuantityBuilder(false)
-      .withAmount(s"${driverMemoryWithOverheadMiB}Mi")
-      .build()
+    val driverCpuQuantity = new Quantity(driverCoresRequest)
+    val driverMemoryQuantity = new Quantity(s"${driverMemoryWithOverheadMiB}Mi")
     val maybeCpuLimitQuantity = driverLimitCores.map { limitCores =>
-      ("cpu", new QuantityBuilder(false).withAmount(limitCores).build())
+      ("cpu", new Quantity(limitCores))
     }
 
     val driverResourceQuantities =
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index d6487556a371e..f575241de9540 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -24,6 +24,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python._
 import org.apache.spark.rpc.RpcEndpointAddress
@@ -33,7 +34,7 @@ import org.apache.spark.util.Utils
 private[spark] class BasicExecutorFeatureStep(
     kubernetesConf: KubernetesExecutorConf,
     secMgr: SecurityManager)
-  extends KubernetesFeatureConfigStep {
+  extends KubernetesFeatureConfigStep with Logging {
 
   // Consider moving some of these fields to KubernetesConf or KubernetesExecutorSpecificConf
   private val executorContainerImage = kubernetesConf
@@ -88,12 +89,8 @@ private[spark] class BasicExecutorFeatureStep(
       // Replace dangerous characters in the remaining string with a safe alternative.
       .replaceAll("[^\\w-]+", "_")
 
-    val executorMemoryQuantity = new QuantityBuilder(false)
-      .withAmount(s"${executorMemoryTotal}Mi")
-      .build()
-    val executorCpuQuantity = new QuantityBuilder(false)
-      .withAmount(executorCoresRequest)
-      .build()
+    val executorMemoryQuantity = new Quantity(s"${executorMemoryTotal}Mi")
+    val executorCpuQuantity = new Quantity(executorCoresRequest)
 
     val executorResourceQuantities =
       KubernetesUtils.buildResourcesQuantities(SPARK_EXECUTOR_PREFIX,
@@ -183,15 +180,28 @@ private[spark] class BasicExecutorFeatureStep(
       .addToArgs("executor")
       .build()
     val containerWithLimitCores = executorLimitCores.map { limitCores =>
-      val executorCpuLimitQuantity = new QuantityBuilder(false)
-        .withAmount(limitCores)
-        .build()
+      val executorCpuLimitQuantity = new Quantity(limitCores)
       new ContainerBuilder(executorContainer)
         .editResources()
           .addToLimits("cpu", executorCpuLimitQuantity)
           .endResources()
         .build()
     }.getOrElse(executorContainer)
+    val containerWithLifecycle =
+      if (!kubernetesConf.workerDecommissioning) {
+        logInfo("Decommissioning not enabled, skipping shutdown script")
+        containerWithLimitCores
+      } else {
+        logInfo("Adding decommission script to lifecycle")
+        new ContainerBuilder(containerWithLimitCores).withNewLifecycle()
+          .withNewPreStop()
+            .withNewExec()
+              .addToCommand("/opt/decom.sh")
+            .endExec()
+          .endPreStop()
+          .endLifecycle()
+          .build()
+      }
     val ownerReference = kubernetesConf.driverPod.map { pod =>
       new OwnerReferenceBuilder()
         .withController(true)
@@ -216,6 +226,9 @@ private[spark] class BasicExecutorFeatureStep(
         .endSpec()
       .build()
 
-    SparkPod(executorPod, containerWithLimitCores)
+    kubernetesConf.get(KUBERNETES_EXECUTOR_SCHEDULER_NAME)
+      .foreach(executorPod.getSpec.setSchedulerName)
+
+    SparkPod(executorPod, containerWithLifecycle)
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala
index b94fc9d2ffe9c..7233ddd4c0d2e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStep.scala
@@ -42,7 +42,7 @@ private[spark] class DriverKubernetesCredentialsFeatureStep(kubernetesConf: Kube
     s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX")
   private val maybeMountedCaCertFile = kubernetesConf.getOption(
     s"$KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX")
-  private val driverServiceAccount = kubernetesConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME)
+  private val driverServiceAccount = kubernetesConf.get(KUBERNETES_DRIVER_SERVICE_ACCOUNT_NAME)
 
   private val oauthTokenBase64 = kubernetesConf
     .getOption(s"$KUBERNETES_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStep.scala
index 8b84aad553855..9514a2cb8e0ca 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStep.scala
@@ -17,19 +17,23 @@
 package org.apache.spark.deploy.k8s.features
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
-import org.apache.spark.deploy.k8s.Config.KUBERNETES_SERVICE_ACCOUNT_NAME
+import org.apache.spark.deploy.k8s.Config.{KUBERNETES_DRIVER_SERVICE_ACCOUNT_NAME, KUBERNETES_EXECUTOR_SERVICE_ACCOUNT_NAME}
 import org.apache.spark.deploy.k8s.KubernetesUtils.buildPodWithServiceAccount
 
 private[spark] class ExecutorKubernetesCredentialsFeatureStep(kubernetesConf: KubernetesConf)
   extends KubernetesFeatureConfigStep {
-  private lazy val driverServiceAccount = kubernetesConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME)
+
+  private lazy val driverServiceAccount = kubernetesConf.get(KUBERNETES_DRIVER_SERVICE_ACCOUNT_NAME)
+  private lazy val executorServiceAccount =
+    kubernetesConf.get(KUBERNETES_EXECUTOR_SERVICE_ACCOUNT_NAME)
 
   override def configurePod(pod: SparkPod): SparkPod = {
       pod.copy(
-        // if not setup by the pod template fallback to the driver's sa,
-        // last option is the default sa.
+        // if not setup by the pod template, fallback to the executor's sa,
+        // if executor's sa is not setup, the last option is driver's sa.
         pod = if (Option(pod.pod.getSpec.getServiceAccount).isEmpty) {
-          buildPodWithServiceAccount(driverServiceAccount, pod).getOrElse(pod.pod)
+          buildPodWithServiceAccount(executorServiceAccount
+            .orElse(driverServiceAccount), pod).getOrElse(pod.pod)
         } else {
           pod.pod
         })
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index 8548e7057cdf0..4599df99b3c61 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -65,7 +65,7 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
           new VolumeBuilder()
             .withEmptyDir(
               new EmptyDirVolumeSource(medium.getOrElse(""),
-              new Quantity(sizeLimit.orNull)))
+                sizeLimit.map(new Quantity(_)).orNull))
       }
 
       val volume = volumeBuilder.withName(spec.volumeName).build()
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 11bbad9c480a1..8e5532d70b5da 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -86,15 +86,12 @@ private[spark] object ClientArguments {
  * @param builder Responsible for building the base driver pod based on a composition of
  *                implemented features.
  * @param kubernetesClient the client to talk to the Kubernetes API server
- * @param waitForAppCompletion a flag indicating whether the client should wait for the application
- *                             to complete
  * @param watcher a watcher that monitors and logs the application status
  */
 private[spark] class Client(
     conf: KubernetesDriverConf,
     builder: KubernetesDriverBuilder,
     kubernetesClient: KubernetesClient,
-    waitForAppCompletion: Boolean,
     watcher: LoggingPodStatusWatcher) extends Logging {
 
   def run(): Unit = {
@@ -124,10 +121,11 @@ private[spark] class Client(
           .endVolume()
         .endSpec()
       .build()
+    val driverPodName = resolvedDriverPod.getMetadata.getName
     Utils.tryWithResource(
       kubernetesClient
         .pods()
-        .withName(resolvedDriverPod.getMetadata.getName)
+        .withName(driverPodName)
         .watch(watcher)) { _ =>
       val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod)
       try {
@@ -141,16 +139,8 @@ private[spark] class Client(
           throw e
       }
 
-      val sId = s"${Option(conf.namespace).map(_ + ":").getOrElse("")}" +
-        s"${resolvedDriverPod.getMetadata.getName}"
-      if (waitForAppCompletion) {
-        logInfo(s"Waiting for application ${conf.appName} with submission ID ${sId} to finish...")
-        watcher.awaitCompletion()
-        logInfo(s"Application ${conf.appName} with submission ID ${sId} finished.")
-      } else {
-        logInfo(s"Deployed Spark application ${conf.appName} with " +
-          s"submission ID ${sId} into Kubernetes.")
-      }
+      val sId = Seq(conf.namespace, driverPodName).mkString(":")
+      watcher.watchOrStop(sId)
     }
   }
 
@@ -199,13 +189,11 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
   }
 
   private def run(clientArguments: ClientArguments, sparkConf: SparkConf): Unit = {
-    val appName = sparkConf.getOption("spark.app.name").getOrElse("spark")
     // For constructing the app ID, we can't use the Spark application name, as the app ID is going
     // to be added as a label to group resources belonging to the same application. Label values are
     // considerably restrictive, e.g. must be no longer than 63 characters in length. So we generate
     // a unique app ID (captured by spark.app.id) in the format below.
     val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}"
-    val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION)
     val kubernetesConf = KubernetesConf.createDriverConf(
       sparkConf,
       kubernetesAppId,
@@ -215,9 +203,7 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
     // The master URL has been checked for validity already in SparkSubmit.
     // We just need to get rid of the "k8s://" prefix here.
     val master = KubernetesUtils.parseMasterUrl(sparkConf.get("spark.master"))
-    val loggingInterval = if (waitForAppCompletion) Some(sparkConf.get(REPORT_INTERVAL)) else None
-
-    val watcher = new LoggingPodStatusWatcherImpl(kubernetesAppId, loggingInterval)
+    val watcher = new LoggingPodStatusWatcherImpl(kubernetesConf)
 
     Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient(
       master,
@@ -231,7 +217,6 @@ private[spark] class KubernetesClientApplication extends SparkApplication {
           kubernetesConf,
           new KubernetesDriverBuilder(),
           kubernetesClient,
-          waitForAppCompletion,
           watcher)
         client.run()
     }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
index f16d1f3be7a6c..ce3c80c0f85b1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
@@ -16,49 +16,36 @@
  */
 package org.apache.spark.deploy.k8s.submit
 
-import java.util.concurrent.{CountDownLatch, TimeUnit}
-
 import io.fabric8.kubernetes.api.model.Pod
 import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher}
 import io.fabric8.kubernetes.client.Watcher.Action
 
+import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.KubernetesDriverConf
 import org.apache.spark.deploy.k8s.KubernetesUtils._
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.ThreadUtils
 
 private[k8s] trait LoggingPodStatusWatcher extends Watcher[Pod] {
-  def awaitCompletion(): Unit
+  def watchOrStop(submissionId: String): Unit
 }
 
 /**
  * A monitor for the running Kubernetes pod of a Spark application. Status logging occurs on
  * every state change and also at an interval for liveness.
  *
- * @param appId application ID.
- * @param maybeLoggingInterval ms between each state request. If provided, must be a positive
- *                             number.
+ * @param conf kubernetes driver conf.
  */
-private[k8s] class LoggingPodStatusWatcherImpl(
-    appId: String,
-    maybeLoggingInterval: Option[Long])
+private[k8s] class LoggingPodStatusWatcherImpl(conf: KubernetesDriverConf)
   extends LoggingPodStatusWatcher with Logging {
 
-  private val podCompletedFuture = new CountDownLatch(1)
-  // start timer for periodic logging
-  private val scheduler =
-    ThreadUtils.newDaemonSingleThreadScheduledExecutor("logging-pod-status-watcher")
-  private val logRunnable: Runnable = () => logShortStatus()
+  private val appId = conf.appId
+
+  private var podCompleted = false
 
   private var pod = Option.empty[Pod]
 
   private def phase: String = pod.map(_.getStatus.getPhase).getOrElse("unknown")
 
-  def start(): Unit = {
-    maybeLoggingInterval.foreach { interval =>
-      scheduler.scheduleAtFixedRate(logRunnable, 0, interval, TimeUnit.MILLISECONDS)
-    }
-  }
-
   override def eventReceived(action: Action, pod: Pod): Unit = {
     this.pod = Option(pod)
     action match {
@@ -78,11 +65,7 @@ private[k8s] class LoggingPodStatusWatcherImpl(
     closeWatch()
   }
 
-  private def logShortStatus() = {
-    logInfo(s"Application status for $appId (phase: $phase)")
-  }
-
-  private def logLongStatus() = {
+  private def logLongStatus(): Unit = {
     logInfo("State changed, new state: " + pod.map(formatPodState).getOrElse("unknown"))
   }
 
@@ -90,15 +73,25 @@ private[k8s] class LoggingPodStatusWatcherImpl(
     phase == "Succeeded" || phase == "Failed"
   }
 
-  private def closeWatch(): Unit = {
-    podCompletedFuture.countDown()
-    scheduler.shutdown()
+  private def closeWatch(): Unit = synchronized {
+    podCompleted = true
+    this.notifyAll()
   }
 
-  override def awaitCompletion(): Unit = {
-    podCompletedFuture.await()
-    logInfo(pod.map { p =>
-      s"Container final statuses:\n\n${containersDescription(p)}"
-    }.getOrElse("No containers were found in the driver pod."))
+  override def watchOrStop(sId: String): Unit = if (conf.get(WAIT_FOR_APP_COMPLETION)) {
+    logInfo(s"Waiting for application ${conf.appName} with submission ID $sId to finish...")
+    val interval = conf.get(REPORT_INTERVAL)
+    synchronized {
+      while (!podCompleted) {
+        wait(interval)
+        logInfo(s"Application status for $appId (phase: $phase)")
+      }
+    }
+    logInfo(
+      pod.map { p => s"Container final statuses:\n\n${containersDescription(p)}" }
+        .getOrElse("No containers were found in the driver pod."))
+    logInfo(s"Application ${conf.appName} with submission ID $sId finished")
+  } else {
+    logInfo(s"Deployed Spark application ${conf.appName} with submission ID $sId into Kubernetes")
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 2201bf91d3905..b394f35b15111 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -72,6 +72,11 @@ private[spark] class ExecutorPodsAllocator(
 
   private var lastSnapshot = ExecutorPodsSnapshot(Nil)
 
+  // Executors that have been deleted by this allocator but not yet detected as deleted in
+  // a snapshot from the API server. This is used to deny registration from these executors
+  // if they happen to come up before the deletion takes effect.
+  @volatile private var deletedExecutorIds = Set.empty[Long]
+
   def start(applicationId: String): Unit = {
     snapshotsStore.addSubscriber(podAllocationDelay) {
       onNewSnapshots(applicationId, _)
@@ -85,6 +90,8 @@ private[spark] class ExecutorPodsAllocator(
     }
   }
 
+  def isDeleted(executorId: String): Boolean = deletedExecutorIds.contains(executorId.toLong)
+
   private def onNewSnapshots(
       applicationId: String,
       snapshots: Seq[ExecutorPodsSnapshot]): Unit = synchronized {
@@ -141,10 +148,17 @@ private[spark] class ExecutorPodsAllocator(
       }
       .map { case (id, _) => id }
 
+    // Make a local, non-volatile copy of the reference since it's used multiple times. This
+    // is the only method that modifies the list, so this is safe.
+    var _deletedExecutorIds = deletedExecutorIds
+
     if (snapshots.nonEmpty) {
       logDebug(s"Pod allocation status: $currentRunningCount running, " +
         s"${currentPendingExecutors.size} pending, " +
         s"${newlyCreatedExecutors.size} unacknowledged.")
+
+      val existingExecs = lastSnapshot.executorPods.keySet
+      _deletedExecutorIds = _deletedExecutorIds.filter(existingExecs.contains)
     }
 
     val currentTotalExpectedExecutors = totalExpectedExecutors.get
@@ -169,6 +183,8 @@ private[spark] class ExecutorPodsAllocator(
 
       if (toDelete.nonEmpty) {
         logInfo(s"Deleting ${toDelete.size} excess pod requests (${toDelete.mkString(",")}).")
+        _deletedExecutorIds = _deletedExecutorIds ++ toDelete
+
         Utils.tryLogNonFatalError {
           kubernetesClient
             .pods()
@@ -209,6 +225,8 @@ private[spark] class ExecutorPodsAllocator(
       }
     }
 
+    deletedExecutorIds = _deletedExecutorIds
+
     // Update the flag that helps the setTotalExpectedExecutors() callback avoid triggering this
     // update method when not needed.
     hasPendingPods.set(knownPendingCount + newlyCreatedExecutors.size > 0)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStore.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStore.scala
index d50ea85d3757b..c175308590964 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStore.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStore.scala
@@ -22,7 +22,7 @@ private[spark] trait ExecutorPodsSnapshotsStore {
 
   def addSubscriber
       (processBatchIntervalMillis: Long)
-      (onNewSnapshots: Seq[ExecutorPodsSnapshot] => Unit)
+      (onNewSnapshots: Seq[ExecutorPodsSnapshot] => Unit): Unit
 
   def stop(): Unit
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index 6fff9dd4f9443..b9d7a7083f41a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -23,7 +23,7 @@ import com.google.common.cache.CacheBuilder
 import io.fabric8.kubernetes.client.Config
 
 import org.apache.spark.SparkContext
-import org.apache.spark.deploy.k8s.{KubernetesUtils, SparkKubernetesClientFactory}
+import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkKubernetesClientFactory}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
@@ -51,7 +51,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
         "If the application is deployed using spark-submit in cluster mode, the driver pod name " +
           "must be provided.")
       (KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX,
-        KUBERNETES_MASTER_INTERNAL_URL,
+        sc.conf.get(KUBERNETES_DRIVER_MASTER_URL),
         Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)),
         Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH)))
     } else {
@@ -61,6 +61,17 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
         None)
     }
 
+    // If KUBERNETES_EXECUTOR_POD_NAME_PREFIX is not set, initialize it so that all executors have
+    // the same prefix. This is needed for client mode, where the feature steps code that sets this
+    // configuration is not used.
+    //
+    // If/when feature steps are executed in client mode, they should instead take care of this,
+    // and this code should be removed.
+    if (!sc.conf.contains(KUBERNETES_EXECUTOR_POD_NAME_PREFIX)) {
+      sc.conf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX,
+        KubernetesConf.getResourceNamePrefix(sc.conf.get("spark.app.name")))
+    }
+
     val kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient(
       apiServerUri,
       Some(sc.conf.get(KUBERNETES_NAMESPACE)),
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index e221a926daca8..5655ef50d214f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -27,6 +27,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.config.SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.RpcAddress
 import org.apache.spark.scheduler.{ExecutorKilled, ExecutorLossReason, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SchedulerBackendUtils}
@@ -55,6 +56,8 @@ private[spark] class KubernetesClusterSchedulerBackend(
 
   private val shouldDeleteExecutors = conf.get(KUBERNETES_DELETE_EXECUTORS)
 
+  private val defaultProfile = scheduler.sc.resourceProfileManager.defaultResourceProfile
+
   // Allow removeExecutor to be accessible by ExecutorPodsLifecycleEventHandler
   private[k8s] def doRemoveExecutor(executorId: String, reason: ExecutorLossReason): Unit = {
     if (isExecutorActive(executorId)) {
@@ -116,8 +119,9 @@ private[spark] class KubernetesClusterSchedulerBackend(
     }
   }
 
-  override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = {
-    podAllocator.setTotalExpectedExecutors(requestedTotal)
+  override def doRequestTotalExecutors(
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Future[Boolean] = {
+    podAllocator.setTotalExpectedExecutors(resourceProfileToTotalExecs(defaultProfile))
     Future.successful(true)
   }
 
@@ -181,6 +185,10 @@ private[spark] class KubernetesClusterSchedulerBackend(
     Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration, driverEndpoint))
   }
 
+  override protected def isBlacklisted(executorId: String, hostname: String): Boolean = {
+    podAllocator.isDeleted(executorId)
+  }
+
   private class KubernetesDriverEndpoint extends DriverEndpoint {
 
     override def onDisconnected(rpcAddress: RpcAddress): Unit = {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
index d51b1e661bb1e..5591974c564e8 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -44,6 +44,12 @@ class KubernetesConfSuite extends SparkFunSuite {
     "customEnvKey2" -> "customEnvValue2")
   private val DRIVER_POD = new PodBuilder().build()
   private val EXECUTOR_ID = "executor-id"
+  private val EXECUTOR_ENV_VARS = Map(
+    "spark.executorEnv.1executorEnvVars1/var1" -> "executorEnvVars1",
+    "spark.executorEnv.executorEnvVars2*var2" -> "executorEnvVars2",
+    "spark.executorEnv.executorEnvVars3_var3" -> "executorEnvVars3",
+    "spark.executorEnv.executorEnvVars4-var4" -> "executorEnvVars4",
+    "spark.executorEnv.executorEnvVars5-var5" -> "executorEnvVars5/var5")
 
   test("Resolve driver labels, annotations, secret mount paths, envs, and memory overhead") {
     val sparkConf = new SparkConf(false)
@@ -132,4 +138,22 @@ class KubernetesConfSuite extends SparkFunSuite {
     assert(conf.secretNamesToMountPaths === SECRET_NAMES_TO_MOUNT_PATHS)
     assert(conf.secretEnvNamesToKeyRefs === SECRET_ENV_VARS)
   }
+
+  test("Verify that executorEnv key conforms to the regular specification") {
+    val sparkConf = new SparkConf(false)
+    EXECUTOR_ENV_VARS.foreach { case (key, value) =>
+      sparkConf.set(key, value)
+    }
+
+    val conf = KubernetesConf.createExecutorConf(
+      sparkConf,
+      EXECUTOR_ID,
+      KubernetesTestConf.APP_ID,
+      Some(DRIVER_POD))
+    assert(conf.environment ===
+      Map(
+        "executorEnvVars3_var3" -> "executorEnvVars3",
+        "executorEnvVars4-var4" -> "executorEnvVars4",
+        "executorEnvVars5-var5" -> "executorEnvVars5/var5"))
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
index 707c823d69cf0..26bd317de8ec6 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -101,8 +101,7 @@ abstract class PodBuilderSuite extends SparkFunSuite {
     assert(container.getArgs.contains("arg"))
     assert(container.getCommand.equals(List("command").asJava))
     assert(container.getEnv.asScala.exists(_.getName == "env-key"))
-    assert(container.getResources.getLimits.get("gpu") ===
-      new QuantityBuilder().withAmount("1").build())
+    assert(container.getResources.getLimits.get("gpu") === new Quantity("1"))
     assert(container.getSecurityContext.getRunAsNonRoot)
     assert(container.getStdin)
     assert(container.getTerminationMessagePath === "termination-message-path")
@@ -156,7 +155,7 @@ abstract class PodBuilderSuite extends SparkFunSuite {
           .withImagePullPolicy("Always")
           .withName("executor-container")
           .withNewResources()
-            .withLimits(Map("gpu" -> new QuantityBuilder().withAmount("1").build()).asJava)
+            .withLimits(Map("gpu" -> new Quantity("1")).asJava)
             .endResources()
           .withNewSecurityContext()
             .withRunAsNonRoot(true)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 370672162ef0d..ef69600ea88ab 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, LocalObjectReferenceBuilder}
+import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, LocalObjectReferenceBuilder, Quantity}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
@@ -47,7 +47,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     }
 
   test("Check the pod respects all configurations from the user.") {
-    val resourceID = ResourceID(SPARK_DRIVER_PREFIX, GPU)
+    val resourceID = new ResourceID(SPARK_DRIVER_PREFIX, GPU)
     val resources =
       Map(("nvidia.com/gpu" -> TestResourceInformation(resourceID, "2", "nvidia.com")))
     val sparkConf = new SparkConf()
@@ -93,6 +93,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       assert(envs(v) === v)
     }
     assert(envs(ENV_SPARK_USER) === Utils.getCurrentUserName())
+    assert(envs(ENV_APPLICATION_ID) === kubernetesConf.appId)
 
     assert(configuredPod.pod.getSpec().getImagePullSecrets.asScala ===
       TEST_IMAGE_PULL_SECRET_OBJECTS)
@@ -104,13 +105,13 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
     val resourceRequirements = configuredPod.container.getResources
     val requests = resourceRequirements.getRequests.asScala
-    assert(requests("cpu").getAmount === "2")
-    assert(requests("memory").getAmount === "456Mi")
+    assert(amountAndFormat(requests("cpu")) === "2")
+    assert(amountAndFormat(requests("memory")) === "456Mi")
     val limits = resourceRequirements.getLimits.asScala
-    assert(limits("memory").getAmount === "456Mi")
-    assert(limits("cpu").getAmount === "4")
+    assert(amountAndFormat(limits("memory")) === "456Mi")
+    assert(amountAndFormat(limits("cpu")) === "4")
     resources.foreach { case (k8sName, testRInfo) =>
-      assert(limits(k8sName).getAmount === testRInfo.count)
+      assert(amountAndFormat(limits(k8sName)) === testRInfo.count)
     }
 
     val driverPodMetadata = configuredPod.pod.getMetadata
@@ -140,7 +141,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       .configurePod(basePod)
       .container.getResources
       .getRequests.asScala
-    assert(requests1("cpu").getAmount === "1")
+    assert(amountAndFormat(requests1("cpu")) === "1")
 
     // if spark.driver.cores is set it should be used
     sparkConf.set(DRIVER_CORES, 10)
@@ -148,7 +149,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       .configurePod(basePod)
       .container.getResources
       .getRequests.asScala
-    assert(requests2("cpu").getAmount === "10")
+    assert(amountAndFormat(requests2("cpu")) === "10")
 
     // spark.kubernetes.driver.request.cores should be preferred over spark.driver.cores
     Seq("0.1", "100m").foreach { value =>
@@ -157,7 +158,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
         .configurePod(basePod)
         .container.getResources
         .getRequests.asScala
-      assert(requests3("cpu").getAmount === value)
+      assert(amountAndFormat(requests3("cpu")) === value)
     }
   }
 
@@ -203,7 +204,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
         mainAppResource = resource)
       val step = new BasicDriverFeatureStep(conf)
       val pod = step.configurePod(SparkPod.initialPod())
-      val mem = pod.container.getResources.getRequests.get("memory").getAmount()
+      val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory"))
       val expected = (driverMem + driverMem * expectedFactor).toInt
       assert(mem === s"${expected}Mi")
 
@@ -218,4 +219,6 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
       .withContainerPort(portNumber)
       .withProtocol("TCP")
       .build()
+
+  private def amountAndFormat(quantity: Quantity): String = quantity.getAmount + quantity.getFormat
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index 51067bd889a58..da50372d04c73 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -115,8 +115,8 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("basic executor pod with resources") {
-    val fpgaResourceID = ResourceID(SPARK_EXECUTOR_PREFIX, FPGA)
-    val gpuExecutorResourceID = ResourceID(SPARK_EXECUTOR_PREFIX, GPU)
+    val fpgaResourceID = new ResourceID(SPARK_EXECUTOR_PREFIX, FPGA)
+    val gpuExecutorResourceID = new ResourceID(SPARK_EXECUTOR_PREFIX, GPU)
     val gpuResources =
       Map(("nvidia.com/gpu" -> TestResourceInformation(gpuExecutorResourceID, "2", "nvidia.com")),
       ("foo.com/fpga" -> TestResourceInformation(fpgaResourceID, "1", "foo.com")))
@@ -128,10 +128,11 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     val executor = step.configurePod(SparkPod.initialPod())
 
     assert(executor.container.getResources.getLimits.size() === 3)
-    assert(executor.container.getResources
-      .getLimits.get("memory").getAmount === "1408Mi")
+    assert(amountAndFormat(executor.container.getResources
+      .getLimits.get("memory")) === "1408Mi")
     gpuResources.foreach { case (k8sName, testRInfo) =>
-      assert(executor.container.getResources.getLimits.get(k8sName).getAmount === testRInfo.count)
+      assert(amountAndFormat(
+        executor.container.getResources.getLimits.get(k8sName)) === testRInfo.count)
     }
   }
 
@@ -151,8 +152,8 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     assert(executor.container.getImage === EXECUTOR_IMAGE)
     assert(executor.container.getVolumeMounts.isEmpty)
     assert(executor.container.getResources.getLimits.size() === 1)
-    assert(executor.container.getResources
-      .getLimits.get("memory").getAmount === "1408Mi")
+    assert(amountAndFormat(executor.container.getResources
+      .getLimits.get("memory")) === "1408Mi")
 
     // The pod has no node selector, volumes.
     assert(executor.pod.getSpec.getNodeSelector.isEmpty)
@@ -201,7 +202,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
     val executor = step.configurePod(SparkPod.initialPod())
     // This is checking that basic executor + executorMemory = 1408 + 42 = 1450
-    assert(executor.container.getResources.getRequests.get("memory").getAmount === "1450Mi")
+    assert(amountAndFormat(executor.container.getResources.getRequests.get("memory")) === "1450Mi")
   }
 
   test("auth secret propagation") {
@@ -273,4 +274,6 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     val expectedEnvs = defaultEnvs ++ additionalEnvVars ++ extraJavaOptsEnvs
     assert(containerEnvs === expectedEnvs)
   }
+
+  private def amountAndFormat(quantity: Quantity): String = quantity.getAmount + quantity.getFormat
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStepSuite.scala
new file mode 100644
index 0000000000000..59cc7ac91d1ab
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStepSuite.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.features
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SparkPod}
+import org.apache.spark.deploy.k8s.Config._
+
+class ExecutorKubernetesCredentialsFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
+
+  private var baseConf: SparkConf = _
+
+  before {
+    baseConf = new SparkConf(false)
+  }
+
+  private def newExecutorConf(environment: Map[String, String] = Map.empty):
+  KubernetesExecutorConf = {
+    KubernetesTestConf.createExecutorConf(
+      sparkConf = baseConf,
+      environment = environment)
+  }
+
+  test("configure spark pod with executor service account") {
+    baseConf.set(KUBERNETES_EXECUTOR_SERVICE_ACCOUNT_NAME, "executor-name")
+    val step = new ExecutorKubernetesCredentialsFeatureStep(newExecutorConf())
+    val spec = step
+      .configurePod(SparkPod.initialPod())
+      .pod
+      .getSpec
+
+    val serviceAccountName = spec.getServiceAccountName
+    val accountName = spec.getServiceAccount
+    assertSAName(serviceAccountName, accountName)
+  }
+
+  test("configure spark pod with with driver service account " +
+    "and without executor service account") {
+    baseConf.set(KUBERNETES_DRIVER_SERVICE_ACCOUNT_NAME, "driver-name")
+    val step = new ExecutorKubernetesCredentialsFeatureStep(newExecutorConf())
+    val spec = step
+      .configurePod(SparkPod.initialPod())
+      .pod
+      .getSpec
+
+    val serviceAccountName = spec.getServiceAccountName
+    val accountName = spec.getServiceAccount
+    assertSAName(serviceAccountName, accountName)
+  }
+
+  test("configure spark pod with with driver service account " +
+    "and with executor service account") {
+    baseConf.set(KUBERNETES_DRIVER_SERVICE_ACCOUNT_NAME, "driver-name")
+    baseConf.set(KUBERNETES_EXECUTOR_SERVICE_ACCOUNT_NAME, "executor-name")
+
+    val step = new ExecutorKubernetesCredentialsFeatureStep(newExecutorConf())
+    val spec = step
+      .configurePod(SparkPod.initialPod())
+      .pod
+      .getSpec
+
+    val serviceAccountName = spec.getServiceAccountName
+    val accountName = spec.getServiceAccount
+    assertSAName(serviceAccountName, accountName)
+  }
+
+  def assertSAName(serviceAccountName: String, accountName: String): Unit = {
+    assert(serviceAccountName.equals(serviceAccountName))
+    assert(accountName.equals(accountName))
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index 8c430eeb3fa71..3888062785324 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -79,7 +79,8 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
     val emptyDir = configuredPod.pod.getSpec.getVolumes.get(0).getEmptyDir
     assert(emptyDir.getMedium === "Memory")
-    assert(emptyDir.getSizeLimit.getAmount === "6G")
+    assert(emptyDir.getSizeLimit.getAmount ===  "6")
+    assert(emptyDir.getSizeLimit.getFormat === "G")
     assert(configuredPod.container.getVolumeMounts.size() === 1)
     assert(configuredPod.container.getVolumeMounts.get(0).getMountPath === "/tmp")
     assert(configuredPod.container.getVolumeMounts.get(0).getName === "testVolume")
@@ -101,7 +102,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
     val emptyDir = configuredPod.pod.getSpec.getVolumes.get(0).getEmptyDir
     assert(emptyDir.getMedium === "")
-    assert(emptyDir.getSizeLimit.getAmount === null)
+    assert(emptyDir.getSizeLimit === null)
     assert(configuredPod.container.getVolumeMounts.size() === 1)
     assert(configuredPod.container.getVolumeMounts.get(0).getMountPath === "/tmp")
     assert(configuredPod.container.getVolumeMounts.get(0).getName === "testVolume")
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index aa421be6e8412..5d49ac0bbaafa 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -22,7 +22,7 @@ import io.fabric8.kubernetes.client.dsl.PodResource
 import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations}
 import org.mockito.Mockito.{verify, when}
 import org.scalatest.BeforeAndAfter
-import org.scalatest.mockito.MockitoSugar._
+import org.scalatestplus.mockito.MockitoSugar._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.k8s._
@@ -146,7 +146,6 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
       kconf,
       driverBuilder,
       kubernetesClient,
-      false,
       loggingPodStatusWatcher)
     submissionClient.run()
     verify(podOperations).create(FULL_EXPECTED_POD)
@@ -157,7 +156,6 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
       kconf,
       driverBuilder,
       kubernetesClient,
-      false,
       loggingPodStatusWatcher)
     submissionClient.run()
     val otherCreatedResources = createdResourcesArgumentCaptor.getAllValues
@@ -181,9 +179,8 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
       kconf,
       driverBuilder,
       kubernetesClient,
-      true,
       loggingPodStatusWatcher)
     submissionClient.run()
-    verify(loggingPodStatusWatcher).awaitCompletion()
+    verify(loggingPodStatusWatcher).watchOrStop(kconf.namespace + ":driver")
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala
index 1b6dfe5443377..9ac7e0222054a 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala
@@ -48,4 +48,13 @@ class DeterministicExecutorPodsSnapshotsStore extends ExecutorPodsSnapshotsStore
     currentSnapshot = ExecutorPodsSnapshot(newSnapshot)
     snapshotsBuffer += currentSnapshot
   }
+
+  def removeDeletedExecutors(): Unit = {
+    val nonDeleted = currentSnapshot.executorPods.filter {
+      case (_, PodDeleted(_)) => false
+      case _ => true
+    }
+    currentSnapshot = ExecutorPodsSnapshot(nonDeleted)
+    snapshotsBuffer += currentSnapshot
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 4475d5db6f03a..a0abded3823bb 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -189,6 +189,17 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     verify(podOperations, times(4)).create(any())
     verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "3", "4")
     verify(podOperations).delete()
+    assert(podsAllocatorUnderTest.isDeleted("3"))
+    assert(podsAllocatorUnderTest.isDeleted("4"))
+
+    // Update the snapshot to not contain the deleted executors, make sure the
+    // allocator cleans up internal state.
+    snapshotsStore.updatePod(deletedExecutor(3))
+    snapshotsStore.updatePod(deletedExecutor(4))
+    snapshotsStore.removeDeletedExecutors()
+    snapshotsStore.notifySubscribers()
+    assert(!podsAllocatorUnderTest.isDeleted("3"))
+    assert(!podsAllocatorUnderTest.isDeleted("4"))
   }
 
   private def executorPodAnswer(): Answer[SparkPod] =
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index 7e1e39c85a183..8c683e85dd5e2 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkFunSuite}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
+import org.apache.spark.resource.ResourceProfileManager
 import org.apache.spark.rpc.{RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.{ExecutorKilled, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor
@@ -86,10 +87,13 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
   private var driverEndpoint: ArgumentCaptor[RpcEndpoint] = _
   private var schedulerBackendUnderTest: KubernetesClusterSchedulerBackend = _
 
+  private val resourceProfileManager = new ResourceProfileManager(sparkConf)
+
   before {
     MockitoAnnotations.initMocks(this)
     when(taskScheduler.sc).thenReturn(sc)
     when(sc.conf).thenReturn(sparkConf)
+    when(sc.resourceProfileManager).thenReturn(resourceProfileManager)
     when(sc.env).thenReturn(env)
     when(env.rpcEnv).thenReturn(rpcEnv)
     driverEndpoint = ArgumentCaptor.forClass(classOf[RpcEndpoint])
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index 871d34b11e174..cc65a7da12eef 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -14,8 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+ARG java_image_tag=8-jre-slim
 
-FROM openjdk:8-alpine
+FROM openjdk:${java_image_tag}
 
 ARG spark_uid=185
 
@@ -27,9 +28,9 @@ ARG spark_uid=185
 # docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .
 
 RUN set -ex && \
-    apk upgrade --no-cache && \
+    apt-get update && \
     ln -s /lib /lib64 && \
-    apk add --no-cache bash tini libc6-compat linux-pam krb5 krb5-libs nss && \
+    apt install -y bash tini libc6 libpam-modules krb5-user libnss3 procps && \
     mkdir -p /opt/spark && \
     mkdir -p /opt/spark/examples && \
     mkdir -p /opt/spark/work-dir && \
@@ -37,12 +38,14 @@ RUN set -ex && \
     rm /bin/sh && \
     ln -sv /bin/bash /bin/sh && \
     echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
-    chgrp root /etc/passwd && chmod ug+rw /etc/passwd
+    chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
+    rm -rf /var/cache/apt/*
 
 COPY jars /opt/spark/jars
 COPY bin /opt/spark/bin
 COPY sbin /opt/spark/sbin
 COPY kubernetes/dockerfiles/spark/entrypoint.sh /opt/
+COPY kubernetes/dockerfiles/spark/decom.sh /opt/
 COPY examples /opt/spark/examples
 COPY kubernetes/tests /opt/spark/tests
 COPY data /opt/spark/data
@@ -51,6 +54,7 @@ ENV SPARK_HOME /opt/spark
 
 WORKDIR /opt/spark/work-dir
 RUN chmod g+w /opt/spark/work-dir
+RUN chmod a+x /opt/decom.sh
 
 ENTRYPOINT [ "/opt/entrypoint.sh" ]
 
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
index 34d449c9f08b9..59f375b707ca7 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
@@ -25,7 +25,7 @@ USER 0
 
 RUN mkdir ${SPARK_HOME}/R
 
-RUN apk add --no-cache R R-dev
+RUN apt-get update && apt install -y r-base r-base-dev && rm -rf /var/cache/apt/*
 
 COPY R ${SPARK_HOME}/R
 ENV R_HOME /usr/lib/R
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
index 8237c9223223a..8dfc5f7ff60c5 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
@@ -25,17 +25,16 @@ USER 0
 
 RUN mkdir ${SPARK_HOME}/python
 # TODO: Investigate running both pip and pip3 via virtualenvs
-RUN apk add --no-cache python && \
-    apk add --no-cache python3 && \
-    python -m ensurepip && \
-    python3 -m ensurepip && \
+RUN apt-get update && \
+    apt install -y python python-pip && \
+    apt install -y python3 python3-pip && \
     # We remove ensurepip since it adds no functionality since pip is
     # installed on the image and it just takes up 1.6MB on the image
     rm -r /usr/lib/python*/ensurepip && \
     pip install --upgrade pip setuptools && \
     # You may install with python3 packages by using pip3.6
     # Removed the .cache to save space
-    rm -r /root/.cache
+    rm -r /root/.cache && rm -rf /var/cache/apt/*
 
 COPY python/pyspark ${SPARK_HOME}/python/pyspark
 COPY python/lib ${SPARK_HOME}/python/lib
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/decom.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/decom.sh
new file mode 100755
index 0000000000000..8a5208d49a70f
--- /dev/null
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/decom.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+set -ex
+echo "Asked to decommission"
+# Find the pid to signal
+date | tee -a ${LOG}
+WORKER_PID=$(ps -o pid -C java | tail -n 1| awk '{ sub(/^[ \t]+/, ""); print }')
+echo "Using worker pid $WORKER_PID"
+kill -s SIGPWR ${WORKER_PID}
+# For now we expect this to timeout, since we don't start exiting the backend.
+echo "Waiting for worker pid to exit"
+# If the worker does exit stop blocking the cleanup.
+timeout 60 tail --pid=${WORKER_PID} -f /dev/null
+date
+echo "Done"
+date
+sleep 30
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index 2097fb8865de9..05ab782caecae 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -30,9 +30,9 @@ set -e
 # If there is no passwd entry for the container UID, attempt to create one
 if [ -z "$uidentry" ] ; then
     if [ -w /etc/passwd ] ; then
-        echo "$myuid:x:$myuid:$mygid:${SPARK_USER_NAME:-anonymous uid}:$SPARK_HOME:/bin/false" >> /etc/passwd
+	echo "$myuid:x:$myuid:$mygid:${SPARK_USER_NAME:-anonymous uid}:$SPARK_HOME:/bin/false" >> /etc/passwd
     else
-        echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
+	echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID"
     fi
 fi
 
@@ -56,6 +56,12 @@ elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
     export PYSPARK_DRIVER_PYTHON="python3"
 fi
 
+# If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor.
+# It does not set SPARK_DIST_CLASSPATH if already set, to avoid overriding customizations of this value from elsewhere e.g. Docker/K8s.
+if [ -n ${HADOOP_HOME}  ] && [ -z ${SPARK_DIST_CLASSPATH}  ]; then
+  export SPARK_DIST_CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath)
+fi
+
 if ! [ -z ${HADOOP_CONF_DIR+x} ]; then
   SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
 fi
@@ -77,7 +83,7 @@ case "$1" in
       "${SPARK_EXECUTOR_JAVA_OPTS[@]}"
       -Xms$SPARK_EXECUTOR_MEMORY
       -Xmx$SPARK_EXECUTOR_MEMORY
-      -cp "$SPARK_CLASSPATH"
+      -cp "$SPARK_CLASSPATH:$SPARK_DIST_CLASSPATH"
       org.apache.spark.executor.CoarseGrainedExecutorBackend
       --driver-url $SPARK_DRIVER_URL
       --executor-id $SPARK_EXECUTOR_ID
@@ -94,4 +100,4 @@ case "$1" in
 esac
 
 # Execute the container CMD under tini for better hygiene
-exec /sbin/tini -s -- "${CMD[@]}"
+exec /usr/bin/tini -s -- "${CMD[@]}"
diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index d7ad35a175a61..18b91916208d6 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -6,13 +6,17 @@ title: Spark on Kubernetes Integration Tests
 # Running the Kubernetes Integration Tests
 
 Note that the integration test framework is currently being heavily revised and
-is subject to change. Note that currently the integration tests only run with Java 8.
+is subject to change.
 
 The simplest way to run the integration tests is to install and run Minikube, then run the following from this
 directory:
 
     ./dev/dev-run-integration-tests.sh
 
+To run tests with Java 11 instead of Java 8, use `--java-image-tag` to specify the base image.
+
+    ./dev/dev-run-integration-tests.sh --java-image-tag 11-jre-slim
+
 The minimum tested version of Minikube is 0.23.0. The kube-dns addon must be enabled. Minikube should
 run with a minimum of 4 CPUs and 6G of memory:
 
@@ -183,7 +187,14 @@ to the wrapper scripts and using the wrapper scripts will simply set these appro
       A specific image tag to use, when set assumes images with those tags are already built and available in the 
       specified image repository.  When set to <code>N/A</code> (the default) fresh images will be built.
     </td>
-    <td><code>N/A</code>
+    <td><code>N/A</code></td>
+  </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.javaImageTag</code></td>
+    <td>
+      A specific OpenJDK base image tag to use, when set uses it instead of 8-jre-slim.
+    </td>
+    <td><code>8-jre-slim</code></td>
   </tr>
   <tr>
     <td><code>spark.kubernetes.test.imageTagFile</code></td>
diff --git a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
index 1f0a8035cea7b..292abe91d35b6 100755
--- a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
+++ b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
@@ -16,13 +16,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-set -xo errexit
+set -exo errexit
 TEST_ROOT_DIR=$(git rev-parse --show-toplevel)
 
 DEPLOY_MODE="minikube"
 IMAGE_REPO="docker.io/kubespark"
 SPARK_TGZ="N/A"
 IMAGE_TAG="N/A"
+JAVA_IMAGE_TAG=
 BASE_IMAGE_NAME=
 JVM_IMAGE_NAME=
 PYTHON_IMAGE_NAME=
@@ -41,6 +42,9 @@ SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version 2>/dev/nu
     | grep -v "WARNING"\
     | tail -n 1)
 
+export SCALA_VERSION
+echo $SCALA_VERSION
+
 # Parse arguments
 while (( "$#" )); do
   case $1 in
@@ -52,6 +56,10 @@ while (( "$#" )); do
       IMAGE_TAG="$2"
       shift
       ;;
+    --java-image-tag)
+      JAVA_IMAGE_TAG="$2"
+      shift
+      ;;
     --deploy-mode)
       DEPLOY_MODE="$2"
       shift
@@ -105,7 +113,8 @@ while (( "$#" )); do
       shift
       ;;
     *)
-      break
+      echo "Unexpected command line flag $2 $1."
+      exit 1
       ;;
   esac
   shift
@@ -120,27 +129,32 @@ properties=(
   -Dtest.include.tags=$INCLUDE_TAGS
 )
 
-if [ -n $NAMESPACE ];
+if [ -n "$JAVA_IMAGE_TAG" ];
+then
+  properties=( ${properties[@]} -Dspark.kubernetes.test.javaImageTag=$JAVA_IMAGE_TAG )
+fi
+
+if [ -n "$NAMESPACE" ];
 then
   properties=( ${properties[@]} -Dspark.kubernetes.test.namespace=$NAMESPACE )
 fi
 
-if [ -n $SERVICE_ACCOUNT ];
+if [ -n "$SERVICE_ACCOUNT" ];
 then
   properties=( ${properties[@]} -Dspark.kubernetes.test.serviceAccountName=$SERVICE_ACCOUNT )
 fi
 
-if [ -n $CONTEXT ];
+if [ -n "$CONTEXT" ];
 then
   properties=( ${properties[@]} -Dspark.kubernetes.test.kubeConfigContext=$CONTEXT )
 fi
 
-if [ -n $SPARK_MASTER ];
+if [ -n "$SPARK_MASTER" ];
 then
   properties=( ${properties[@]} -Dspark.kubernetes.test.master=$SPARK_MASTER )
 fi
 
-if [ -n $EXCLUDE_TAGS ];
+if [ -n "$EXCLUDE_TAGS" ];
 then
   properties=( ${properties[@]} -Dtest.exclude.tags=$EXCLUDE_TAGS )
 fi
@@ -154,6 +168,7 @@ properties+=(
   -Dspark.kubernetes.test.jvmImage=$JVM_IMAGE_NAME
   -Dspark.kubernetes.test.pythonImage=$PYTHON_IMAGE_NAME
   -Dspark.kubernetes.test.rImage=$R_IMAGE_NAME
+  -Dlog4j.logger.org.apache.spark=DEBUG
 )
 
 $TEST_ROOT_DIR/build/mvn integration-test -f $TEST_ROOT_DIR/pom.xml -pl resource-managers/kubernetes/integration-tests -am -Pscala-$SCALA_VERSION -Pkubernetes -Pkubernetes-integration-tests ${properties[@]}
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index cd3e8b234c189..369dfd491826c 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -29,7 +29,7 @@
     <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
     <exec-maven-plugin.version>1.4.0</exec-maven-plugin.version>
     <extraScalaTestArgs></extraScalaTestArgs>
-    <kubernetes-client.version>4.4.2</kubernetes-client.version>
+    <kubernetes-client.version>4.6.4</kubernetes-client.version>
     <scala-maven-plugin.version>3.2.2</scala-maven-plugin.version>
     <scalatest-maven-plugin.version>1.0</scalatest-maven-plugin.version>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>
@@ -39,6 +39,7 @@
     <spark.kubernetes.test.sparkTgz></spark.kubernetes.test.sparkTgz>
     <spark.kubernetes.test.unpackSparkDir>${project.build.directory}/spark-dist-unpacked</spark.kubernetes.test.unpackSparkDir>
     <spark.kubernetes.test.imageTag>N/A</spark.kubernetes.test.imageTag>
+    <spark.kubernetes.test.javaImageTag>8-jre-slim</spark.kubernetes.test.javaImageTag>
     <spark.kubernetes.test.imageTagFile>${project.build.directory}/imageTag.txt</spark.kubernetes.test.imageTagFile>
     <spark.kubernetes.test.deployMode>minikube</spark.kubernetes.test.deployMode>
     <spark.kubernetes.test.imageRepo>docker.io/kubespark</spark.kubernetes.test.imageRepo>
@@ -109,6 +110,9 @@
                 <argument>--image-tag</argument>
                 <argument>${spark.kubernetes.test.imageTag}</argument>
 
+                <argument>--java-image-tag</argument>
+                <argument>${spark.kubernetes.test.javaImageTag}</argument>
+
                 <argument>--image-tag-output-file</argument>
                 <argument>${spark.kubernetes.test.imageTagFile}</argument>
 
@@ -141,7 +145,7 @@
           <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
           <junitxml>.</junitxml>
           <filereports>SparkTestSuite.txt</filereports>
-          <argLine>-ea -Xmx4g -XX:ReservedCodeCacheSize=512m ${extraScalaTestArgs}</argLine>
+          <argLine>-ea -Xmx4g -XX:ReservedCodeCacheSize=1g ${extraScalaTestArgs}</argLine>
           <stderr/>
           <systemProperties>
             <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
index 072730e97e778..ab906604fce06 100755
--- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
+++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
@@ -23,6 +23,7 @@ IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt"
 DEPLOY_MODE="minikube"
 IMAGE_REPO="docker.io/kubespark"
 IMAGE_TAG="N/A"
+JAVA_IMAGE_TAG="8-jre-slim"
 SPARK_TGZ="N/A"
 
 # Parse arguments
@@ -40,6 +41,10 @@ while (( "$#" )); do
       IMAGE_TAG="$2"
       shift
       ;;
+    --java-image-tag)
+      JAVA_IMAGE_TAG="$2"
+      shift
+      ;;
     --image-tag-output-file)
       IMAGE_TAG_OUTPUT_FILE="$2"
       shift
@@ -82,11 +87,14 @@ then
   IMAGE_TAG=$(uuidgen);
   cd $SPARK_INPUT_DIR
 
+  # OpenJDK base-image tag (e.g. 8-jre-slim, 11-jre-slim)
+  JAVA_IMAGE_TAG_BUILD_ARG="-b java_image_tag=$JAVA_IMAGE_TAG"
+
   # Build PySpark image
   LANGUAGE_BINDING_BUILD_ARGS="-p $DOCKER_FILE_BASE_PATH/bindings/python/Dockerfile"
 
-  # Build SparkR image -- disabled since this fails, re-enable as part of SPARK-25152
-  # LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $DOCKER_FILE_BASE_PATH/bindings/R/Dockerfile"
+  # Build SparkR image
+  LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $DOCKER_FILE_BASE_PATH/bindings/R/Dockerfile"
 
   # Unset SPARK_HOME to let the docker-image-tool script detect SPARK_HOME. Otherwise, it cannot
   # indicate the unpacked directory as its home. See SPARK-28550.
@@ -95,7 +103,7 @@ then
   case $DEPLOY_MODE in
     cloud)
       # Build images
-      $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
+      $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $JAVA_IMAGE_TAG_BUILD_ARG $LANGUAGE_BINDING_BUILD_ARGS build
 
       # Push images appropriately
       if [[ $IMAGE_REPO == gcr.io* ]] ;
@@ -109,13 +117,13 @@ then
     docker-for-desktop)
        # Only need to build as this will place it in our local Docker repo which is all
        # we need for Docker for Desktop to work so no need to also push
-       $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
+       $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $JAVA_IMAGE_TAG_BUILD_ARG $LANGUAGE_BINDING_BUILD_ARGS build
        ;;
 
     minikube)
        # Only need to build and if we do this with the -m option for minikube we will
        # build the images directly using the minikube Docker daemon so no need to push
-       $SPARK_INPUT_DIR/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
+       $SPARK_INPUT_DIR/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $JAVA_IMAGE_TAG_BUILD_ARG $LANGUAGE_BINDING_BUILD_ARGS build
        ;;
     *)
        echo "Unrecognized deploy mode $DEPLOY_MODE" && exit 1
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala
index 2720cdf74ca8f..4e205f69ce039 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala
@@ -27,6 +27,7 @@ private[spark] trait ClientModeTestsSuite { k8sSuite: KubernetesSuite =>
     val labels = Map("spark-app-selector" -> driverPodName)
     val driverPort = 7077
     val blockManagerPort = 10000
+    val executorLabel = "spark-client-it"
     val driverService = testBackend
       .getKubernetesClient
       .services()
@@ -78,10 +79,11 @@ private[spark] trait ClientModeTestsSuite { k8sSuite: KubernetesSuite =>
               "/var/run/secrets/kubernetes.io/serviceaccount/token")
             .addToArgs("--conf", "spark.kubernetes.authenticate.caCertFile=" +
               "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt")
-            .addToArgs("--conf", s"spark.kubernetes.driver.pod.name=$driverPodName")
             .addToArgs("--conf", "spark.executor.memory=500m")
             .addToArgs("--conf", "spark.executor.cores=1")
-            .addToArgs("--conf", "spark.executor.instances=1")
+            .addToArgs("--conf", "spark.executor.instances=2")
+            .addToArgs("--conf", "spark.kubernetes.executor.deleteOnTermination=false")
+            .addToArgs("--conf", s"spark.kubernetes.executor.label.$executorLabel=$executorLabel")
             .addToArgs("--conf",
               s"spark.driver.host=" +
                 s"${driverService.getMetadata.getName}.${kubernetesTestComponents.namespace}.svc")
@@ -99,6 +101,20 @@ private[spark] trait ClientModeTestsSuite { k8sSuite: KubernetesSuite =>
           .getLog
           .contains("Pi is roughly 3"), "The application did not complete.")
       }
+
+      val executors = kubernetesTestComponents
+        .kubernetesClient
+        .pods()
+        .inNamespace(kubernetesTestComponents.namespace)
+        .withLabel(executorLabel, executorLabel)
+        .list()
+        .getItems()
+      assert(executors.size === 2)
+      val prefixes = executors.asScala.map { pod =>
+        val name = pod.getMetadata().getName()
+        name.substring(0, name.lastIndexOf("-"))
+      }.toSet
+      assert(prefixes.size === 1, s"Executor prefixes did not match: $prefixes")
     } finally {
       // Have to delete the service manually since it doesn't have an owner reference
       kubernetesTestComponents
@@ -106,6 +122,13 @@ private[spark] trait ClientModeTestsSuite { k8sSuite: KubernetesSuite =>
         .services()
         .inNamespace(kubernetesTestComponents.namespace)
         .delete(driverService)
+      // Delete all executors, since the test explicitly asks them not to be deleted by the app.
+      kubernetesTestComponents
+        .kubernetesClient
+        .pods()
+        .inNamespace(kubernetesTestComponents.namespace)
+        .withLabel(executorLabel, executorLabel)
+        .delete()
     }
   }
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
new file mode 100644
index 0000000000000..f5eab6e4bbad6
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+import org.apache.spark.internal.config.Worker
+
+private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
+
+  import DecommissionSuite._
+  import KubernetesSuite.k8sTestTag
+
+  test("Test basic decommissioning", k8sTestTag) {
+    sparkAppConf
+      .set(Worker.WORKER_DECOMMISSION_ENABLED.key, "true")
+      .set("spark.kubernetes.pyspark.pythonVersion", "3")
+      .set("spark.kubernetes.container.image", pyImage)
+
+    runSparkApplicationAndVerifyCompletion(
+      appResource = PYSPARK_DECOMISSIONING,
+      mainClass = "",
+      expectedLogOnCompletion = Seq("decommissioning executor",
+        "Finished waiting, stopping Spark"),
+      appArgs = Array.empty[String],
+      driverPodChecker = doBasicDriverPyPodCheck,
+      executorPodChecker = doBasicExecutorPyPodCheck,
+      appLocator = appLocator,
+      isJVM = false,
+      decommissioningTest = true)
+  }
+}
+
+private[spark] object DecommissionSuite {
+  val TEST_LOCAL_PYSPARK: String = "local:///opt/spark/tests/"
+  val PYSPARK_DECOMISSIONING: String = TEST_LOCAL_PYSPARK + "decommissioning.py"
+}
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index b6f3484a062c8..367cff62cd493 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -54,16 +54,12 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
     ).toArray
 
     val resources = Map(
-      "cpu" -> new QuantityBuilder()
-        .withAmount("1")
-        .build(),
-      "memory" -> new QuantityBuilder()
-        .withAmount("512M")
-        .build()
+      "cpu" -> new Quantity("1"),
+      "memory" -> new Quantity("512M")
     ).asJava
 
     new ContainerBuilder()
-      .withImage("ceph/daemon:latest")
+      .withImage("ceph/daemon:v4.0.3-stable-4.0-nautilus-centos-7-x86_64")
       .withImagePullPolicy("Always")
       .withName(cName)
       .withPorts(new ContainerPortBuilder()
@@ -224,9 +220,20 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
   }
 
   private def getServiceUrl(serviceName: String): String = {
+    val fuzzyUrlMatcher = """^(.*?)([a-zA-Z]+://.*?)(\s*)$""".r
     Eventually.eventually(TIMEOUT, INTERVAL) {
       // ns is always available either random or provided by the user
-      Minikube.minikubeServiceAction(serviceName, "-n", kubernetesTestComponents.namespace, "--url")
+      val rawUrl = Minikube.minikubeServiceAction(
+        serviceName, "-n", kubernetesTestComponents.namespace, "--url")
+      val url = rawUrl match {
+        case fuzzyUrlMatcher(junk, url, extra) =>
+          logDebug(s"Service url matched junk ${junk} - url ${url} - extra ${extra}")
+          url
+        case _ =>
+          logWarning(s"Response from minikube ${rawUrl} did not match URL regex")
+          rawUrl
+      }
+      url
     }
   }
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 80d5f239a09cc..61e1f27b55462 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -42,7 +42,9 @@ import org.apache.spark.internal.config._
 class KubernetesSuite extends SparkFunSuite
   with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite
   with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite with PVTestsSuite
-  with DepsTestsSuite with Logging with Eventually with Matchers {
+  with DepsTestsSuite with DecommissionSuite with RTestsSuite with Logging with Eventually
+  with Matchers {
+
 
   import KubernetesSuite._
 
@@ -254,6 +256,7 @@ class KubernetesSuite extends SparkFunSuite
     }
   }
 
+  // scalastyle:off argcount
   protected def runSparkApplicationAndVerifyCompletion(
       appResource: String,
       mainClass: String,
@@ -264,60 +267,120 @@ class KubernetesSuite extends SparkFunSuite
       appLocator: String,
       isJVM: Boolean,
       pyFiles: Option[String] = None,
-      executorPatience: Option[(Option[Interval], Option[Timeout])] = None): Unit = {
+      executorPatience: Option[(Option[Interval], Option[Timeout])] = None,
+      decommissioningTest: Boolean = false): Unit = {
+
+  // scalastyle:on argcount
     val appArguments = SparkAppArguments(
       mainAppResource = appResource,
       mainClass = mainClass,
       appArgs = appArgs)
-    SparkAppLauncher.launch(
-      appArguments,
-      sparkAppConf,
-      TIMEOUT.value.toSeconds.toInt,
-      sparkHomeDir,
-      isJVM,
-      pyFiles)
 
-    val driverPod = kubernetesTestComponents.kubernetesClient
-      .pods()
-      .withLabel("spark-app-locator", appLocator)
-      .withLabel("spark-role", "driver")
-      .list()
-      .getItems
-      .get(0)
-    driverPodChecker(driverPod)
     val execPods = scala.collection.mutable.Map[String, Pod]()
+    val (patienceInterval, patienceTimeout) = {
+      executorPatience match {
+        case Some(patience) => (patience._1.getOrElse(INTERVAL), patience._2.getOrElse(TIMEOUT))
+        case _ => (INTERVAL, TIMEOUT)
+      }
+    }
+    def checkPodReady(namespace: String, name: String) = {
+      val execPod = kubernetesTestComponents.kubernetesClient
+        .pods()
+        .inNamespace(namespace)
+        .withName(name)
+        .get()
+      val resourceStatus = execPod.getStatus
+      val conditions = resourceStatus.getConditions().asScala
+      val conditionTypes = conditions.map(_.getType())
+      val readyConditions = conditions.filter{cond => cond.getType() == "Ready"}
+      val result = readyConditions
+        .map(cond => cond.getStatus() == "True")
+        .headOption.getOrElse(false)
+      result
+    }
     val execWatcher = kubernetesTestComponents.kubernetesClient
       .pods()
       .withLabel("spark-app-locator", appLocator)
       .withLabel("spark-role", "executor")
       .watch(new Watcher[Pod] {
-        logInfo("Beginning watch of executors")
+        logDebug("Beginning watch of executors")
         override def onClose(cause: KubernetesClientException): Unit =
           logInfo("Ending watch of executors")
         override def eventReceived(action: Watcher.Action, resource: Pod): Unit = {
           val name = resource.getMetadata.getName
+          val namespace = resource.getMetadata().getNamespace()
           action match {
-            case Action.ADDED | Action.MODIFIED =>
+            case Action.MODIFIED =>
+              execPods(name) = resource
+            case Action.ADDED =>
+              logDebug(s"Add event received for $name.")
               execPods(name) = resource
+              // If testing decommissioning start a thread to simulate
+              // decommissioning.
+              if (decommissioningTest && execPods.size == 1) {
+                // Wait for all the containers in the pod to be running
+                logDebug("Waiting for first pod to become OK prior to deletion")
+                Eventually.eventually(patienceTimeout, patienceInterval) {
+                  val result = checkPodReady(namespace, name)
+                  result shouldBe (true)
+                }
+                // Sleep a small interval to allow execution of job
+                logDebug("Sleeping before killing pod.")
+                Thread.sleep(2000)
+                // Delete the pod to simulate cluster scale down/migration.
+                val pod = kubernetesTestComponents.kubernetesClient.pods().withName(name)
+                pod.delete()
+                logDebug(s"Triggered pod decom/delete: $name deleted")
+              }
             case Action.DELETED | Action.ERROR =>
               execPods.remove(name)
           }
         }
       })
 
-    val (patienceInterval, patienceTimeout) = {
-      executorPatience match {
-        case Some(patience) => (patience._1.getOrElse(INTERVAL), patience._2.getOrElse(TIMEOUT))
-        case _ => (INTERVAL, TIMEOUT)
-      }
-    }
+    logDebug("Starting Spark K8s job")
+    SparkAppLauncher.launch(
+      appArguments,
+      sparkAppConf,
+      TIMEOUT.value.toSeconds.toInt,
+      sparkHomeDir,
+      isJVM,
+      pyFiles)
 
+    val driverPod = kubernetesTestComponents.kubernetesClient
+      .pods()
+      .withLabel("spark-app-locator", appLocator)
+      .withLabel("spark-role", "driver")
+      .list()
+      .getItems
+      .get(0)
+
+    driverPodChecker(driverPod)
+    // If we're testing decommissioning we delete all the executors, but we should have
+    // an executor at some point.
     Eventually.eventually(patienceTimeout, patienceInterval) {
       execPods.values.nonEmpty should be (true)
     }
+    // If decommissioning we need to wait and check the executors were removed
+    if (decommissioningTest) {
+      // Sleep a small interval to ensure everything is registered.
+      Thread.sleep(100)
+      // Wait for the executors to become ready
+      Eventually.eventually(patienceTimeout, patienceInterval) {
+        val anyReadyPods = ! execPods.map{
+          case (name, resource) =>
+            (name, resource.getMetadata().getNamespace())
+        }.filter{
+          case (name, namespace) => checkPodReady(namespace, name)
+        }.isEmpty
+        val podsEmpty = execPods.values.isEmpty
+        val podsReadyOrDead = anyReadyPods || podsEmpty
+        podsReadyOrDead shouldBe (true)
+      }
+    }
     execWatcher.close()
     execPods.values.foreach(executorPodChecker(_))
-    Eventually.eventually(TIMEOUT, patienceInterval) {
+    Eventually.eventually(patienceTimeout, patienceInterval) {
       expectedLogOnCompletion.foreach { e =>
         assert(kubernetesTestComponents.kubernetesClient
           .pods()
@@ -425,5 +488,5 @@ private[spark] object KubernetesSuite {
   val SPARK_REMOTE_MAIN_CLASS: String = "org.apache.spark.examples.SparkRemoteFileTest"
   val SPARK_DRIVER_MAIN_CLASS: String = "org.apache.spark.examples.DriverSubmissionTest"
   val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes))
-  val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds))
+  val INTERVAL = PatienceConfiguration.Interval(Span(1, Seconds))
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
index f0218217e6afb..4b4dff93f5742 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
@@ -45,7 +45,7 @@ private[spark] trait PVTestsSuite { k8sSuite: KubernetesSuite =>
         .withName("test-local-pv")
       .endMetadata()
       .withNewSpec()
-        .withCapacity(Map("storage" -> new QuantityBuilder().withAmount("1Gi").build()).asJava)
+        .withCapacity(Map("storage" -> new Quantity("1Gi")).asJava)
         .withAccessModes("ReadWriteOnce")
         .withPersistentVolumeReclaimPolicy("Retain")
         .withStorageClassName("test-local-storage")
@@ -71,8 +71,7 @@ private[spark] trait PVTestsSuite { k8sSuite: KubernetesSuite =>
         .withAccessModes("ReadWriteOnce")
         .withStorageClassName("test-local-storage")
         .withResources(new ResourceRequirementsBuilder()
-        .withRequests(Map("storage" -> new QuantityBuilder()
-          .withAmount("1Gi").build()).asJava).build())
+          .withRequests(Map("storage" -> new Quantity("1Gi")).asJava).build())
       .endSpec()
 
     kubernetesTestComponents
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala
index cd61ea1040f35..54a9dbf07816c 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest
 
+import java.util.Locale
+
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model.{Pod, SecretBuilder}
@@ -57,11 +59,17 @@ private[spark] trait SecretsTestsSuite { k8sSuite: KubernetesSuite =>
     createTestSecret()
     sparkAppConf
       .set(s"spark.kubernetes.driver.secrets.$ENV_SECRET_NAME", SECRET_MOUNT_PATH)
-      .set(s"spark.kubernetes.driver.secretKeyRef.USERNAME", s"$ENV_SECRET_NAME:username")
-      .set(s"spark.kubernetes.driver.secretKeyRef.PASSWORD", s"$ENV_SECRET_NAME:password")
+      .set(
+        s"spark.kubernetes.driver.secretKeyRef.${ENV_SECRET_KEY_1_CAP}",
+        s"$ENV_SECRET_NAME:${ENV_SECRET_KEY_1}")
+      .set(
+        s"spark.kubernetes.driver.secretKeyRef.${ENV_SECRET_KEY_2_CAP}",
+        s"$ENV_SECRET_NAME:${ENV_SECRET_KEY_2}")
       .set(s"spark.kubernetes.executor.secrets.$ENV_SECRET_NAME", SECRET_MOUNT_PATH)
-      .set(s"spark.kubernetes.executor.secretKeyRef.USERNAME", s"$ENV_SECRET_NAME:username")
-      .set(s"spark.kubernetes.executor.secretKeyRef.PASSWORD", s"$ENV_SECRET_NAME:password")
+      .set(s"spark.kubernetes.executor.secretKeyRef.${ENV_SECRET_KEY_1_CAP}",
+        s"${ENV_SECRET_NAME}:$ENV_SECRET_KEY_1")
+      .set(s"spark.kubernetes.executor.secretKeyRef.${ENV_SECRET_KEY_2_CAP}",
+        s"${ENV_SECRET_NAME}:$ENV_SECRET_KEY_2")
     try {
       runSparkPiAndVerifyCompletion(
         driverPodChecker = (driverPod: Pod) => {
@@ -81,19 +89,30 @@ private[spark] trait SecretsTestsSuite { k8sSuite: KubernetesSuite =>
   }
 
   private def checkSecrets(pod: Pod): Unit = {
-    Eventually.eventually(TIMEOUT, INTERVAL) {
-      implicit val podName: String = pod.getMetadata.getName
-      implicit val components: KubernetesTestComponents = kubernetesTestComponents
+    logDebug(s"Checking secrets for ${pod}")
+    // Wait for the pod to become ready & have secrets provisioned
+    implicit val podName: String = pod.getMetadata.getName
+    implicit val components: KubernetesTestComponents = kubernetesTestComponents
+    val env = Eventually.eventually(TIMEOUT, INTERVAL) {
+      logDebug(s"Checking env of ${pod.getMetadata().getName()} ....")
       val env = Utils.executeCommand("env")
-      assert(env.toString.contains(ENV_SECRET_VALUE_1))
-      assert(env.toString.contains(ENV_SECRET_VALUE_2))
-      val fileUsernameContents = Utils
-        .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_1")
-      val filePasswordContents = Utils
-        .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_2")
-      assert(fileUsernameContents.toString.trim.equals(ENV_SECRET_VALUE_1))
-      assert(filePasswordContents.toString.trim.equals(ENV_SECRET_VALUE_2))
+      assert(!env.isEmpty)
+      env
     }
+    env.toString should include (s"${ENV_SECRET_KEY_1_CAP}=$ENV_SECRET_VALUE_1")
+    env.toString should include (s"${ENV_SECRET_KEY_2_CAP}=$ENV_SECRET_VALUE_2")
+
+    // Make sure our secret files are mounted correctly
+    val files = Utils.executeCommand("ls", s"$SECRET_MOUNT_PATH")
+    files should include (ENV_SECRET_KEY_1)
+    files should include (ENV_SECRET_KEY_2)
+    // Validate the contents
+    val fileUsernameContents = Utils
+      .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_1")
+    fileUsernameContents.toString.trim should equal(ENV_SECRET_VALUE_1)
+    val filePasswordContents = Utils
+      .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_2")
+    filePasswordContents.toString.trim should equal(ENV_SECRET_VALUE_2)
   }
 }
 
@@ -102,6 +121,8 @@ private[spark] object SecretsTestsSuite {
   val SECRET_MOUNT_PATH = "/etc/secret"
   val ENV_SECRET_KEY_1 = "username"
   val ENV_SECRET_KEY_2 = "password"
+  val ENV_SECRET_KEY_1_CAP = ENV_SECRET_KEY_1.toUpperCase(Locale.ROOT)
+  val ENV_SECRET_KEY_2_CAP = ENV_SECRET_KEY_2.toUpperCase(Locale.ROOT)
   val ENV_SECRET_VALUE_1 = "secretusername"
   val ENV_SECRET_VALUE_2 = "secretpassword"
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index a687a1bca1800..9f85805b9d315 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -18,9 +18,12 @@ package org.apache.spark.deploy.k8s.integrationtest
 
 import java.io.{Closeable, File, PrintWriter}
 import java.nio.file.{Files, Path}
+import java.util.concurrent.CountDownLatch
 
 import scala.collection.JavaConverters._
 
+import io.fabric8.kubernetes.client.dsl.ExecListener
+import okhttp3.Response
 import org.apache.commons.io.output.ByteArrayOutputStream
 
 import org.apache.spark.{SPARK_VERSION, SparkException}
@@ -45,20 +48,49 @@ object Utils extends Logging {
       implicit podName: String,
       kubernetesTestComponents: KubernetesTestComponents): String = {
     val out = new ByteArrayOutputStream()
-    val watch = kubernetesTestComponents
+    val pod = kubernetesTestComponents
       .kubernetesClient
       .pods()
       .withName(podName)
+    // Avoid timing issues by looking for open/close
+    class ReadyListener extends ExecListener {
+      val openLatch: CountDownLatch = new CountDownLatch(1)
+      val closeLatch: CountDownLatch = new CountDownLatch(1)
+
+      override def onOpen(response: Response) {
+        openLatch.countDown()
+      }
+
+      override def onClose(a: Int, b: String) {
+        closeLatch.countDown()
+      }
+
+      override def onFailure(e: Throwable, r: Response) {
+      }
+
+      def waitForInputStreamToConnect(): Unit = {
+        openLatch.await()
+      }
+
+      def waitForClose(): Unit = {
+        closeLatch.await()
+      }
+    }
+    val listener = new ReadyListener()
+    val watch = pod
       .readingInput(System.in)
       .writingOutput(out)
       .writingError(System.err)
       .withTTY()
+      .usingListener(listener)
       .exec(cmd.toArray: _*)
-    // wait to get some result back
-    Thread.sleep(1000)
+    // under load sometimes the stdout isn't connected by the time we try to read from it.
+    listener.waitForInputStreamToConnect()
+    listener.waitForClose()
     watch.close()
     out.flush()
-    out.toString()
+    val result = out.toString()
+    result
   }
 
   def createTempFile(contents: String, hostPath: String): String = {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index ce2ce1c61c194..a7d35b67d1b92 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -30,6 +30,7 @@ private[spark] object Minikube extends Logging {
   private val KUBELET_PREFIX = "kubelet:"
   private val APISERVER_PREFIX = "apiserver:"
   private val KUBECTL_PREFIX = "kubectl:"
+  private val KUBECONFIG_PREFIX = "kubeconfig:"
   private val MINIKUBE_VM_PREFIX = "minikubeVM: "
   private val MINIKUBE_PREFIX = "minikube: "
   private val MINIKUBE_PATH = ".minikube"
@@ -86,18 +87,23 @@ private[spark] object Minikube extends Logging {
     val kubeletString = statusString.find(_.contains(s"$KUBELET_PREFIX "))
     val apiserverString = statusString.find(_.contains(s"$APISERVER_PREFIX "))
     val kubectlString = statusString.find(_.contains(s"$KUBECTL_PREFIX "))
+    val kubeconfigString = statusString.find(_.contains(s"$KUBECONFIG_PREFIX "))
+    val hasConfigStatus = kubectlString.isDefined || kubeconfigString.isDefined
 
-    if (hostString.isEmpty || kubeletString.isEmpty
-      || apiserverString.isEmpty || kubectlString.isEmpty) {
+    if (hostString.isEmpty || kubeletString.isEmpty || apiserverString.isEmpty ||
+        !hasConfigStatus) {
       MinikubeStatus.NONE
     } else {
       val status1 = hostString.get.replaceFirst(s"$HOST_PREFIX ", "")
       val status2 = kubeletString.get.replaceFirst(s"$KUBELET_PREFIX ", "")
       val status3 = apiserverString.get.replaceFirst(s"$APISERVER_PREFIX ", "")
-      val status4 = kubectlString.get.replaceFirst(s"$KUBECTL_PREFIX ", "")
-      if (!status4.contains("Correctly Configured:")) {
-        MinikubeStatus.NONE
+      val isConfigured = if (kubectlString.isDefined) {
+        val cfgStatus = kubectlString.get.replaceFirst(s"$KUBECTL_PREFIX ", "")
+        cfgStatus.contains("Correctly Configured:")
       } else {
+        kubeconfigString.get.replaceFirst(s"$KUBECONFIG_PREFIX ", "") == "Configured"
+      }
+      if (isConfigured) {
         val stats = List(status1, status2, status3)
           .map(MinikubeStatus.unapply)
           .map(_.getOrElse(throw new IllegalStateException(s"Unknown status $statusString")))
@@ -106,6 +112,8 @@ private[spark] object Minikube extends Logging {
         } else {
           MinikubeStatus.RUNNING
         }
+      } else {
+        MinikubeStatus.NONE
       }
     }
   }
diff --git a/resource-managers/kubernetes/integration-tests/tests/decommissioning.py b/resource-managers/kubernetes/integration-tests/tests/decommissioning.py
new file mode 100644
index 0000000000000..f68f24d49763d
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/tests/decommissioning.py
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import time
+
+from pyspark.sql import SparkSession
+
+
+if __name__ == "__main__":
+    """
+        Usage: decommissioning
+    """
+    print("Starting decom test")
+    spark = SparkSession \
+        .builder \
+        .appName("PyMemoryTest") \
+        .getOrCreate()
+    sc = spark._sc
+    rdd = sc.parallelize(range(10))
+    rdd.collect()
+    print("Waiting to give nodes time to finish.")
+    time.sleep(5)
+    rdd.collect()
+    print("Waiting some more....")
+    time.sleep(10)
+    rdd.collect()
+    print("Finished waiting, stopping Spark.")
+    spark.stop()
+    print("Done, exiting Python")
+    sys.exit(0)
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
index bc1247ad78936..c6139dd268d5c 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
@@ -106,7 +106,7 @@ private[mesos] object MesosClusterDispatcher
   extends Logging
   with CommandLineUtils {
 
-  override def main(args: Array[String]) {
+  override def main(args: Array[String]): Unit = {
     Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler)
     Utils.initDaemon(log)
     val conf = new SparkConf
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
index 15bbe60d6c8fb..c0cdcda14291f 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
@@ -37,7 +37,7 @@ private[spark] class MesosClusterUI(
 
   def activeWebUiUrl: String = "http://" + dispatcherPublicAddress + ":" + boundPort
 
-  override def initialize() {
+  override def initialize(): Unit = {
     attachPage(new MesosClusterPage(this))
     attachPage(new DriverPage(this))
     addStaticHandler(MesosClusterUI.STATIC_RESOURCE_DIR)
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index bfa88d68d06c2..47243e83d1335 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -30,6 +30,7 @@ import org.apache.spark.TaskState
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.EXECUTOR_ID
+import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.scheduler.TaskDescription
 import org.apache.spark.scheduler.cluster.mesos.MesosSchedulerUtils
 import org.apache.spark.util.Utils
@@ -43,7 +44,7 @@ private[spark] class MesosExecutorBackend
   var executor: Executor = null
   var driver: ExecutorDriver = null
 
-  override def statusUpdate(taskId: Long, state: TaskState.TaskState, data: ByteBuffer) {
+  override def statusUpdate(taskId: Long, state: TaskState.TaskState, data: ByteBuffer): Unit = {
     val mesosTaskId = TaskID.newBuilder().setValue(taskId.toString).build()
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
@@ -56,7 +57,7 @@ private[spark] class MesosExecutorBackend
       driver: ExecutorDriver,
       executorInfo: ExecutorInfo,
       frameworkInfo: FrameworkInfo,
-      slaveInfo: SlaveInfo) {
+      slaveInfo: SlaveInfo): Unit = {
 
     // Get num cores for this task from ExecutorInfo, created in MesosSchedulerBackend.
     val cpusPerTask = executorInfo.getResourcesList.asScala
@@ -82,10 +83,11 @@ private[spark] class MesosExecutorBackend
     executor = new Executor(
       executorId,
       slaveInfo.getHostname,
-      env)
+      env,
+      resources = Map.empty[String, ResourceInformation])
   }
 
-  override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo) {
+  override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo): Unit = {
     val taskDescription = TaskDescription.decode(taskInfo.getData.asReadOnlyByteBuffer())
     if (executor == null) {
       logError("Received launchTask but executor was null")
@@ -96,11 +98,11 @@ private[spark] class MesosExecutorBackend
     }
   }
 
-  override def error(d: ExecutorDriver, message: String) {
+  override def error(d: ExecutorDriver, message: String): Unit = {
     logError("Error from Mesos: " + message)
   }
 
-  override def killTask(d: ExecutorDriver, t: TaskID) {
+  override def killTask(d: ExecutorDriver, t: TaskID): Unit = {
     if (executor == null) {
       logError("Received KillTask but executor was null")
     } else {
@@ -110,20 +112,20 @@ private[spark] class MesosExecutorBackend
     }
   }
 
-  override def reregistered(d: ExecutorDriver, p2: SlaveInfo) {}
+  override def reregistered(d: ExecutorDriver, p2: SlaveInfo): Unit = {}
 
-  override def disconnected(d: ExecutorDriver) {}
+  override def disconnected(d: ExecutorDriver): Unit = {}
 
-  override def frameworkMessage(d: ExecutorDriver, data: Array[Byte]) {}
+  override def frameworkMessage(d: ExecutorDriver, data: Array[Byte]): Unit = {}
 
-  override def shutdown(d: ExecutorDriver) {}
+  override def shutdown(d: ExecutorDriver): Unit = {}
 }
 
 /**
  * Entry point for Mesos executor.
  */
 private[spark] object MesosExecutorBackend extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     Utils.initDaemon(log)
     // Create a new Executor and start it running
     val runner = new MesosExecutorBackend()
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index a54bca800a007..0b447025c8a7a 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -38,6 +38,7 @@ import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.mesos.MesosExternalBlockStoreClient
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.{RpcEndpointAddress, RpcEndpointRef}
 import org.apache.spark.scheduler.{SlaveLost, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
@@ -181,13 +182,16 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
 
   private var schedulerDriver: SchedulerDriver = _
 
+  private val defaultProfile = sc.resourceProfileManager.defaultResourceProfile
+
+
   def newMesosTaskId(): String = {
     val id = nextMesosTaskId
     nextMesosTaskId += 1
     id.toString
   }
 
-  override def start() {
+  override def start(): Unit = {
     super.start()
 
     if (sc.deployMode == "client") {
@@ -322,12 +326,12 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     }
   }
 
-  override def offerRescinded(d: org.apache.mesos.SchedulerDriver, o: OfferID) {}
+  override def offerRescinded(d: org.apache.mesos.SchedulerDriver, o: OfferID): Unit = {}
 
   override def registered(
       driver: org.apache.mesos.SchedulerDriver,
       frameworkId: FrameworkID,
-      masterInfo: MasterInfo) {
+      masterInfo: MasterInfo): Unit = {
 
     this.appId = frameworkId.getValue
     this.mesosExternalShuffleClient.foreach(_.init(appId))
@@ -341,11 +345,11 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     totalCoreCount.get >= maxCoresOption.getOrElse(0) * minRegisteredRatio
   }
 
-  override def disconnected(d: org.apache.mesos.SchedulerDriver) {
+  override def disconnected(d: org.apache.mesos.SchedulerDriver): Unit = {
     launcherBackend.setState(SparkAppHandle.State.SUBMITTED)
   }
 
-  override def reregistered(d: org.apache.mesos.SchedulerDriver, masterInfo: MasterInfo) {
+  override def reregistered(d: org.apache.mesos.SchedulerDriver, masterInfo: MasterInfo): Unit = {
     launcherBackend.setState(SparkAppHandle.State.RUNNING)
   }
 
@@ -353,7 +357,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
    * Method called by Mesos to offer resources on slaves. We respond by launching an executor,
    * unless we've already launched more than we wanted to.
    */
-  override def resourceOffers(d: org.apache.mesos.SchedulerDriver, offers: JList[Offer]) {
+  override def resourceOffers(d: org.apache.mesos.SchedulerDriver, offers: JList[Offer]): Unit = {
     stateLock.synchronized {
       if (stopCalled) {
         logDebug("Ignoring offers during shutdown")
@@ -595,13 +599,16 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   }
 
   private def satisfiesLocality(offerHostname: String): Boolean = {
+    val hostToLocalTaskCount =
+      rpHostToLocalTaskCount.getOrElse(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, Map.empty)
     if (!Utils.isDynamicAllocationEnabled(conf) || hostToLocalTaskCount.isEmpty) {
       return true
     }
 
     // Check the locality information
     val currentHosts = slaves.values.filter(_.taskIDs.nonEmpty).map(_.hostname).toSet
-    val allDesiredHosts = hostToLocalTaskCount.keys.toSet
+    val allDesiredHosts = hostToLocalTaskCount.map { case (k, v) => k }.toSet
+
     // Try to match locality for hosts which do not have executors yet, to potentially
     // increase coverage.
     val remainingHosts = allDesiredHosts -- currentHosts
@@ -613,7 +620,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     return true
   }
 
-  override def statusUpdate(d: org.apache.mesos.SchedulerDriver, status: TaskStatus) {
+  override def statusUpdate(d: org.apache.mesos.SchedulerDriver, status: TaskStatus): Unit = {
     val taskId = status.getTaskId.getValue
     val slaveId = status.getSlaveId.getValue
     val state = mesosToTaskState(status.getState)
@@ -675,12 +682,12 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     }
   }
 
-  override def error(d: org.apache.mesos.SchedulerDriver, message: String) {
+  override def error(d: org.apache.mesos.SchedulerDriver, message: String): Unit = {
     logError(s"Mesos error: $message")
     scheduler.error(message)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     stopSchedulerBackend()
     launcherBackend.setState(SparkAppHandle.State.FINISHED)
     launcherBackend.close()
@@ -722,7 +729,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   }
 
   override def frameworkMessage(
-      d: org.apache.mesos.SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]) {}
+      d: org.apache.mesos.SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]): Unit = {}
 
   /**
    * Called when a slave is lost or a Mesos task finished. Updates local view on
@@ -759,11 +766,14 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
       super.applicationId
     }
 
-  override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future.successful {
+  override def doRequestTotalExecutors(
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]
+  ): Future[Boolean] = Future.successful {
     // We don't truly know if we can fulfill the full amount of executors
     // since at coarse grain it depends on the amount of slaves available.
-    logInfo("Capping the total amount of executors to " + requestedTotal)
-    executorLimitOption = Some(requestedTotal)
+    val numExecs = resourceProfileToTotalExecs.getOrElse(defaultProfile, 0)
+    logInfo("Capping the total amount of executors to " + numExecs)
+    executorLimitOption = Some(numExecs)
     // Update the locality wait start time to continue trying for locality.
     localityWaitStartTimeNs = System.nanoTime()
     true
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
index a03fecdb2abee..e2a99148dd799 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
@@ -74,7 +74,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
 
   @volatile var appId: String = _
 
-  override def start() {
+  override def start(): Unit = {
     classLoader = Thread.currentThread.getContextClassLoader
     val driver = createSchedulerDriver(
       master,
@@ -184,12 +184,12 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     execArgs
   }
 
-  override def offerRescinded(d: org.apache.mesos.SchedulerDriver, o: OfferID) {}
+  override def offerRescinded(d: org.apache.mesos.SchedulerDriver, o: OfferID): Unit = {}
 
   override def registered(
       driver: org.apache.mesos.SchedulerDriver,
       frameworkId: FrameworkID,
-      masterInfo: MasterInfo) {
+      masterInfo: MasterInfo): Unit = {
     inClassLoader() {
       appId = frameworkId.getValue
       logInfo("Registered as framework ID " + appId)
@@ -208,9 +208,9 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
   }
 
-  override def disconnected(d: org.apache.mesos.SchedulerDriver) {}
+  override def disconnected(d: org.apache.mesos.SchedulerDriver): Unit = {}
 
-  override def reregistered(d: org.apache.mesos.SchedulerDriver, masterInfo: MasterInfo) {}
+  override def reregistered(d: org.apache.mesos.SchedulerDriver, masterInfo: MasterInfo): Unit = {}
 
   private def getTasksSummary(tasks: JArrayList[MesosTaskInfo]): String = {
     val builder = new StringBuilder
@@ -229,7 +229,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
    * for tasks in order of priority. We fill each node with tasks in a round-robin manner so that
    * tasks are balanced across the cluster.
    */
-  override def resourceOffers(d: org.apache.mesos.SchedulerDriver, offers: JList[Offer]) {
+  override def resourceOffers(d: org.apache.mesos.SchedulerDriver, offers: JList[Offer]): Unit = {
     inClassLoader() {
       // Fail first on offers with unmet constraints
       val (offersMatchingConstraints, offersNotMatchingConstraints) =
@@ -368,7 +368,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     (taskInfo, finalResources.asJava)
   }
 
-  override def statusUpdate(d: org.apache.mesos.SchedulerDriver, status: TaskStatus) {
+  override def statusUpdate(d: org.apache.mesos.SchedulerDriver, status: TaskStatus): Unit = {
     inClassLoader() {
       val tid = status.getTaskId.getValue.toLong
       val state = mesosToTaskState(status.getState)
@@ -386,7 +386,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
   }
 
-  override def error(d: org.apache.mesos.SchedulerDriver, message: String) {
+  override def error(d: org.apache.mesos.SchedulerDriver, message: String): Unit = {
     inClassLoader() {
       logError("Mesos error: " + message)
       markErr()
@@ -394,18 +394,18 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     if (schedulerDriver != null) {
       schedulerDriver.stop()
     }
   }
 
-  override def reviveOffers() {
+  override def reviveOffers(): Unit = {
     schedulerDriver.reviveOffers()
   }
 
   override def frameworkMessage(
-      d: org.apache.mesos.SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]) {}
+      d: org.apache.mesos.SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]): Unit = {}
 
   /**
    * Remove executor associated with slaveId in a thread safe manner.
@@ -418,7 +418,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
   }
 
   private def recordSlaveLost(
-      d: org.apache.mesos.SchedulerDriver, slaveId: SlaveID, reason: ExecutorLossReason) {
+      d: org.apache.mesos.SchedulerDriver, slaveId: SlaveID, reason: ExecutorLossReason): Unit = {
     inClassLoader() {
       logInfo("Mesos slave lost: " + slaveId.getValue)
       removeExecutor(slaveId.getValue, reason.toString)
@@ -426,12 +426,15 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
   }
 
-  override def slaveLost(d: org.apache.mesos.SchedulerDriver, slaveId: SlaveID) {
+  override def slaveLost(d: org.apache.mesos.SchedulerDriver, slaveId: SlaveID): Unit = {
     recordSlaveLost(d, slaveId, SlaveLost())
   }
 
   override def executorLost(
-      d: org.apache.mesos.SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, status: Int) {
+      d: org.apache.mesos.SchedulerDriver,
+      executorId: ExecutorID,
+      slaveId: SlaveID,
+      status: Int): Unit = {
     logInfo("Executor lost: %s, marking slave %s as lost".format(executorId.getValue,
                                                                  slaveId.getValue))
     recordSlaveLost(d, slaveId, ExecutorExited(status, exitCausedByApp = true))
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 06993712035ff..ed3bd358d4082 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -149,7 +149,7 @@ trait MesosSchedulerUtils extends Logging {
       // until the scheduler exists
       new Thread(Utils.getFormattedClassName(this) + "-mesos-driver") {
         setDaemon(true)
-        override def run() {
+        override def run(): Unit = {
           try {
             val ret = newDriver.run()
             logInfo("driver.run() returned with code " + ret)
@@ -285,7 +285,6 @@ trait MesosSchedulerUtils extends Logging {
    * The attribute values are the mesos attribute types and they are
    *
    * @param offerAttributes the attributes offered
-   * @return
    */
   protected def toAttributeMap(offerAttributes: JList[Attribute])
     : Map[String, GeneratedMessageV3] = {
@@ -553,7 +552,7 @@ trait MesosSchedulerUtils extends Logging {
    * the same frameworkID.  To enforce that only the first driver registers with the configured
    * framework ID, the driver calls this method after the first registration.
    */
-  def unsetFrameworkID(sc: SparkContext) {
+  def unsetFrameworkID(sc: SparkContext): Unit = {
     sc.conf.remove(mesosConfig.DRIVER_FRAMEWORK_ID)
     System.clearProperty(mesosConfig.DRIVER_FRAMEWORK_ID.key)
   }
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
index 1f83149a05652..344fc38c84fb1 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.rest.mesos
 
 import javax.servlet.http.HttpServletResponse
 
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.TestPrematureExit
@@ -45,7 +45,7 @@ class MesosRestServerSuite extends SparkFunSuite
     testOverheadMemory(conf, "2000M", 3000)
   }
 
-  def testOverheadMemory(conf: SparkConf, driverMemory: String, expectedResult: Int) {
+  def testOverheadMemory(conf: SparkConf, driverMemory: String, expectedResult: Int): Unit = {
     conf.set("spark.master", "testmaster")
     conf.set("spark.app.name", "testapp")
     conf.set(config.DRIVER_MEMORY.key, driverMemory)
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
index e8520061ac38d..7f409ae224fc3 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.deploy.mesos.{config => mesosConfig}
 import org.apache.spark.internal.config._
 
 class MesosClusterManagerSuite extends SparkFunSuite with LocalSparkContext {
-    def testURL(masterURL: String, expectedClass: Class[_], coarse: Boolean) {
+    def testURL(masterURL: String, expectedClass: Class[_], coarse: Boolean): Unit = {
       val conf = new SparkConf().set(mesosConfig.COARSE_MODE, coarse)
       sc = new SparkContext("local", "test", conf)
       val clusterManager = new MesosClusterManager()
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 924a991c50f23..9a50142b51d97 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -27,7 +27,7 @@ import org.apache.mesos.SchedulerDriver
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.Command
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index f810da17e6c44..5ab277ed87a72 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -28,12 +28,13 @@ import org.mockito.ArgumentMatchers.{any, anyInt, anyLong, anyString, eq => meq}
 import org.mockito.Mockito.{times, verify, when}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.ScalaFutures
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{LocalSparkContext, SecurityManager, SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.deploy.mesos.{config => mesosConfig}
 import org.apache.spark.internal.config._
 import org.apache.spark.network.shuffle.mesos.MesosExternalBlockStoreClient
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RegisterExecutor}
@@ -70,8 +71,10 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     offerResources(offers)
     verifyTaskLaunched(driver, "o1")
 
+    val totalExecs = Map(ResourceProfile.getOrCreateDefaultProfile(sparkConf) -> 0)
     // kills executors
-    assert(backend.doRequestTotalExecutors(0).futureValue)
+    val defaultResourceProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    assert(backend.doRequestTotalExecutors(Map(defaultResourceProfile -> 0)).futureValue)
     assert(backend.doKillExecutors(Seq("0")).futureValue)
     val taskID0 = createTaskId("0")
     verify(driver, times(1)).killTask(taskID0)
@@ -81,7 +84,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     verifyDeclinedOffer(driver, createOfferId("o2"))
 
     // Launches a new task when requested executors is positive
-    backend.doRequestTotalExecutors(2)
+    backend.doRequestTotalExecutors(Map(defaultResourceProfile -> 2))
     offerResources(offers, 2)
     verifyTaskLaunched(driver, "o2")
   }
@@ -634,7 +637,12 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
 
     assert(backend.getExecutorIds().isEmpty)
 
-    backend.requestTotalExecutors(2, 2, Map("hosts10" -> 1, "hosts11" -> 1))
+    val defaultProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
+    val defaultProf = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    backend.requestTotalExecutors(
+      Map(defaultProfileId -> 2),
+      Map(defaultProfileId -> 2),
+      Map(defaultProfileId -> Map("hosts10" -> 1, "hosts11" -> 1)))
 
     // Offer non-local resources, which should be rejected
     offerResourcesAndVerify(1, false)
@@ -650,7 +658,11 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     offerResourcesAndVerify(1, true)
 
     // Update total executors
-    backend.requestTotalExecutors(3, 3, Map("hosts10" -> 1, "hosts11" -> 1, "hosts12" -> 1))
+    backend.requestTotalExecutors(
+      Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> 3),
+      Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> 2),
+      Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID ->
+        Map("hosts10" -> 1, "hosts11" -> 1, "hosts12" -> 1)))
 
     // Offer non-local resources, which should be rejected
     offerResourcesAndVerify(3, false)
@@ -659,8 +671,11 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     Thread.sleep(2000)
 
     // Update total executors
-    backend.requestTotalExecutors(4, 4, Map("hosts10" -> 1, "hosts11" -> 1, "hosts12" -> 1,
-      "hosts13" -> 1))
+    backend.requestTotalExecutors(
+      Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> 4),
+      Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> 4),
+      Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID ->
+            Map("hosts10" -> 1, "hosts11" -> 1, "hosts12" -> 1, "hosts13" -> 1)))
 
     // Offer non-local resources, which should be rejected
     offerResourcesAndVerify(3, false)
@@ -716,7 +731,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     val mockEndpointRef = mock[RpcEndpointRef]
     val mockAddress = mock[RpcAddress]
     val message = RegisterExecutor(executorId, mockEndpointRef, slaveId, cores, Map.empty,
-      Map.empty, Map.empty)
+      Map.empty, Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
 
     backend.driverEndpoint.askSync[Boolean](message)
   }
@@ -809,7 +824,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     }
   }
 
-  private def setBackend(sparkConfVars: Map[String, String] = null, home: String = "/path") {
+  private def setBackend(sparkConfVars: Map[String, String] = null,
+      home: String = "/path"): Unit = {
     initializeSparkConf(sparkConfVars, home)
     sc = new SparkContext(sparkConf)
 
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
index 79a57ad031b71..a5bd34888a0a6 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
@@ -34,7 +34,7 @@ import org.apache.mesos.Protos.Value.Scalar
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{any, anyLong, eq => meq}
 import org.mockito.Mockito._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext,
   SparkFunSuite}
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
index 8be24cdbc949a..0ed6fe66c56eb 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
@@ -26,7 +26,7 @@ import com.google.common.io.Files
 import org.apache.mesos.Protos.{FrameworkInfo, Resource, Value}
 import org.mockito.Mockito._
 import org.scalatest._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException, SparkFunSuite}
 import org.apache.spark.deploy.mesos.{config => mesosConfig}
@@ -63,12 +63,12 @@ class MesosSchedulerUtilsSuite extends SparkFunSuite with Matchers with MockitoS
 
   def arePortsEqual(array1: Array[(Long, Long)], array2: Array[(Long, Long)])
     : Boolean = {
-    array1.sortBy(identity).deep == array2.sortBy(identity).deep
+    array1.sortBy(identity).sameElements(array2.sortBy(identity))
   }
 
   def arePortsEqual(array1: Array[Long], array2: Array[Long])
     : Boolean = {
-    array1.sortBy(identity).deep == array2.sortBy(identity).deep
+    array1.sortBy(identity).sameElements(array2.sortBy(identity))
   }
 
   def getRangesFromResources(resources: List[Resource]): List[(Long, Long)] = {
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala
index 65e595e3cf2bf..5a4bf1dd2d409 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala
@@ -28,6 +28,7 @@ import org.apache.mesos.protobuf.ByteString
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{times, verify}
+import org.scalatest.Assertions._
 
 import org.apache.spark.deploy.mesos.config.MesosSecretConfig
 
@@ -161,12 +162,14 @@ object Utils {
     val variableOne = envVars.filter(_.getName == "USER").head
     assert(variableOne.getSecret.isInitialized)
     assert(variableOne.getSecret.getType == Secret.Type.VALUE)
-    assert(variableOne.getSecret.getValue.getData == ByteString.copyFrom("user".getBytes))
+    assert(variableOne.getSecret.getValue.getData ==
+      ByteString.copyFrom("user".getBytes))
     assert(variableOne.getType == Environment.Variable.Type.SECRET)
     val variableTwo = envVars.filter(_.getName == "PASSWORD").head
     assert(variableTwo.getSecret.isInitialized)
     assert(variableTwo.getSecret.getType == Secret.Type.VALUE)
-    assert(variableTwo.getSecret.getValue.getData == ByteString.copyFrom("password".getBytes))
+    assert(variableTwo.getSecret.getValue.getData ==
+      ByteString.copyFrom("password".getBytes))
     assert(variableTwo.getType == Environment.Variable.Type.SECRET)
   }
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index ae9486d7e44cf..1e8f4084ef9c7 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -48,6 +48,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Streaming.STREAMING_DYN_ALLOCATION_MAX_EXECUTORS
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, YarnSchedulerBackend}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -455,7 +456,8 @@ private[spark] class ApplicationMaster(
       val executorMemory = _sparkConf.get(EXECUTOR_MEMORY).toInt
       val executorCores = _sparkConf.get(EXECUTOR_CORES)
       val dummyRunner = new ExecutorRunnable(None, yarnConf, _sparkConf, driverUrl, "<executorId>",
-        "<hostname>", executorMemory, executorCores, appId, securityMgr, localResources)
+        "<hostname>", executorMemory, executorCores, appId, securityMgr, localResources,
+        ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
       dummyRunner.launchContextDebugInfo()
     }
 
@@ -578,7 +580,11 @@ private[spark] class ApplicationMaster(
             e.getMessage)
         case e: Throwable =>
           failureCount += 1
-          if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
+          if (!NonFatal(e)) {
+            finish(FinalApplicationStatus.FAILED,
+              ApplicationMaster.EXIT_REPORTER_FAILURE,
+              "Fatal exception: " + StringUtils.stringifyException(e))
+          } else if (failureCount >= reporterMaxFailures) {
             finish(FinalApplicationStatus.FAILED,
               ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " +
                 s"$failureCount time(s) from Reporter thread.")
@@ -703,7 +709,8 @@ private[spark] class ApplicationMaster(
       // of files to add to PYTHONPATH, which Client.scala already handles, so it's empty.
       userArgs = Seq(args.primaryPyFile, "") ++ userArgs
     }
-    if (args.primaryRFile != null && args.primaryRFile.endsWith(".R")) {
+    if (args.primaryRFile != null &&
+        (args.primaryRFile.endsWith(".R") || args.primaryRFile.endsWith(".r"))) {
       // TODO(davies): add R dependencies here
     }
 
@@ -711,7 +718,7 @@ private[spark] class ApplicationMaster(
       .getMethod("main", classOf[Array[String]])
 
     val userThread = new Thread {
-      override def run() {
+      override def run(): Unit = {
         try {
           if (!Modifier.isStatic(mainMethod.getModifiers)) {
             logError(s"Could not find static main method in object ${args.userClass}")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
index c10206c847271..d2275980814e3 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
@@ -82,7 +82,7 @@ class ApplicationMasterArguments(val args: Array[String]) {
     userArgs = userArgsBuffer.toList
   }
 
-  def printUsageAndExit(exitCode: Int, unknownParam: Any = null) {
+  def printUsageAndExit(exitCode: Int, unknownParam: Any = null): Unit = {
     // scalastyle:off println
     if (unknownParam != null) {
       System.err.println("Unknown/unsupported param " + unknownParam)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 9be3e7bbbfa67..696afaacb0e79 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -307,6 +307,10 @@ private[spark] class Client(
       }
     }
     appContext.setUnmanagedAM(isClientUnmanagedAMEnabled)
+
+    sparkConf.get(APPLICATION_PRIORITY).foreach { appPriority =>
+      appContext.setPriority(Priority.newInstance(appPriority))
+    }
     appContext
   }
 
@@ -534,7 +538,11 @@ private[spark] class Client(
             if (!Utils.isLocalUri(jar)) {
               val path = getQualifiedLocalPath(Utils.resolveURI(jar), hadoopConf)
               val pathFs = FileSystem.get(path.toUri(), hadoopConf)
-              pathFs.globStatus(path).filter(_.isFile()).foreach { entry =>
+              val fss = pathFs.globStatus(path)
+              if (fss == null) {
+                throw new FileNotFoundException(s"Path ${path.toString} does not exist")
+              }
+              fss.filter(_.isFile()).foreach { entry =>
                 val uri = entry.getPath().toUri()
                 statCache.update(uri, entry)
                 distribute(uri.toString(), targetDir = Some(LOCALIZED_LIB_DIR))
@@ -973,7 +981,8 @@ private[spark] class Client(
       } else {
         Utils.classForName("org.apache.spark.deploy.yarn.ExecutorLauncher").getName
       }
-    if (args.primaryRFile != null && args.primaryRFile.endsWith(".R")) {
+    if (args.primaryRFile != null &&
+        (args.primaryRFile.endsWith(".R") || args.primaryRFile.endsWith(".r"))) {
       args.userArgs = ArrayBuffer(args.primaryRFile) ++ args.userArgs
     }
     val userArgs = args.userArgs.flatMap { arg =>
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 61c027ec4483a..45627e9d3f1f8 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -74,7 +74,7 @@ private[spark] class ClientArguments(args: Array[String]) {
       s"""
       |Usage: org.apache.spark.deploy.yarn.Client [options]
       |Options:
-      |  --jar JAR_PATH           Path to your application's JAR file (required in yarn-cluster
+      |  --jar JAR_PATH           Path to your application's JAR file (required in YARN cluster
       |                           mode)
       |  --class CLASS_NAME       Name of your application's main class (required)
       |  --primary-py-file        A main Python file
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 7046ad74056fc..d9262bbac6586 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -40,7 +40,8 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.util.{Utils, YarnContainerInfoHelper}
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.util.Utils
 
 private[yarn] class ExecutorRunnable(
     container: Option[Container],
@@ -53,7 +54,8 @@ private[yarn] class ExecutorRunnable(
     executorCores: Int,
     appId: String,
     securityMgr: SecurityManager,
-    localResources: Map[String, LocalResource]) extends Logging {
+    localResources: Map[String, LocalResource],
+    resourceProfileId: Int) extends Logging {
 
   var rpc: YarnRPC = YarnRPC.create(conf)
   var nmClient: NMClient = _
@@ -72,7 +74,7 @@ private[yarn] class ExecutorRunnable(
 
     s"""
     |===============================================================================
-    |YARN executor launch context:
+    |Default YARN executor launch context:
     |  env:
     |${Utils.redact(sparkConf, env.toSeq).map { case (k, v) => s"    $k -> $v\n" }.mkString}
     |  command:
@@ -207,7 +209,8 @@ private[yarn] class ExecutorRunnable(
         "--executor-id", executorId,
         "--hostname", hostname,
         "--cores", executorCores.toString,
-        "--app-id", appId) ++
+        "--app-id", appId,
+        "--resourceProfileId", resourceProfileId.toString) ++
       userClassPath ++
       Seq(
         s"1>${ApplicationConstants.LOG_DIR_EXPANSION_VAR}/stdout",
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
index f524962141455..ae316b02eefd8 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
@@ -40,8 +40,10 @@ import org.apache.spark.util.{CausedBy, Utils}
 private object ResourceRequestHelper extends Logging {
   private val AMOUNT_AND_UNIT_REGEX = "([0-9]+)([A-Za-z]*)".r
   private val RESOURCE_INFO_CLASS = "org.apache.hadoop.yarn.api.records.ResourceInformation"
+  private val RESOURCE_NOT_FOUND = "org.apache.hadoop.yarn.exceptions.ResourceNotFoundException"
   val YARN_GPU_RESOURCE_CONFIG = "yarn.io/gpu"
   val YARN_FPGA_RESOURCE_CONFIG = "yarn.io/fpga"
+  @volatile private var numResourceErrors: Int = 0
 
   private[yarn] def getYarnResourcesAndAmounts(
       sparkConf: SparkConf,
@@ -76,7 +78,7 @@ private object ResourceRequestHelper extends Logging {
   ): Map[String, String] = {
     Map(GPU -> YARN_GPU_RESOURCE_CONFIG, FPGA -> YARN_FPGA_RESOURCE_CONFIG).map {
       case (rName, yarnName) =>
-        (yarnName -> sparkConf.get(ResourceID(confPrefix, rName).amountConf, "0"))
+        (yarnName -> sparkConf.get(new ResourceID(confPrefix, rName).amountConf, "0"))
     }.filter { case (_, count) => count.toLong > 0 }
   }
 
@@ -108,13 +110,13 @@ private object ResourceRequestHelper extends Logging {
       (AM_CORES.key, YARN_AM_RESOURCE_TYPES_PREFIX + "cpu-vcores"),
       (DRIVER_CORES.key, YARN_DRIVER_RESOURCE_TYPES_PREFIX + "cpu-vcores"),
       (EXECUTOR_CORES.key, YARN_EXECUTOR_RESOURCE_TYPES_PREFIX + "cpu-vcores"),
-      (ResourceID(SPARK_EXECUTOR_PREFIX, "fpga").amountConf,
+      (new ResourceID(SPARK_EXECUTOR_PREFIX, "fpga").amountConf,
         s"${YARN_EXECUTOR_RESOURCE_TYPES_PREFIX}${YARN_FPGA_RESOURCE_CONFIG}"),
-      (ResourceID(SPARK_DRIVER_PREFIX, "fpga").amountConf,
+      (new ResourceID(SPARK_DRIVER_PREFIX, "fpga").amountConf,
         s"${YARN_DRIVER_RESOURCE_TYPES_PREFIX}${YARN_FPGA_RESOURCE_CONFIG}"),
-      (ResourceID(SPARK_EXECUTOR_PREFIX, "gpu").amountConf,
+      (new ResourceID(SPARK_EXECUTOR_PREFIX, "gpu").amountConf,
         s"${YARN_EXECUTOR_RESOURCE_TYPES_PREFIX}${YARN_GPU_RESOURCE_CONFIG}"),
-      (ResourceID(SPARK_DRIVER_PREFIX, "gpu").amountConf,
+      (new ResourceID(SPARK_DRIVER_PREFIX, "gpu").amountConf,
         s"${YARN_DRIVER_RESOURCE_TYPES_PREFIX}${YARN_GPU_RESOURCE_CONFIG}"))
 
     val errorMessage = new mutable.StringBuilder()
@@ -185,7 +187,24 @@ private object ResourceRequestHelper extends Logging {
               s"does not match pattern $AMOUNT_AND_UNIT_REGEX.")
         case CausedBy(e: IllegalArgumentException) =>
           throw new IllegalArgumentException(s"Invalid request for $name: ${e.getMessage}")
-        case e: InvocationTargetException if e.getCause != null => throw e.getCause
+        case e: InvocationTargetException =>
+          if (e.getCause != null) {
+            if (Try(Utils.classForName(RESOURCE_NOT_FOUND)).isSuccess) {
+              if (e.getCause().getClass().getName().equals(RESOURCE_NOT_FOUND)) {
+                // warn a couple times and then stop so we don't spam the logs
+                if (numResourceErrors < 2) {
+                  logWarning(s"YARN doesn't know about resource $name, your resource discovery " +
+                    s"has to handle properly discovering and isolating the resource! Error: " +
+                    s"${e.getCause().getMessage}")
+                  numResourceErrors += 1
+                }
+              } else {
+                throw e.getCause
+              }
+            } else {
+              throw e.getCause
+            }
+          }
       }
     }
   }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index f68be33e057bc..09414cbbe50a4 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -38,6 +38,7 @@ import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python._
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef}
 import org.apache.spark.scheduler.{ExecutorExited, ExecutorLossReason}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor
@@ -565,7 +566,8 @@ private[yarn] class YarnAllocator(
                 executorCores,
                 appAttemptId.getApplicationId.toString,
                 securityMgr,
-                localResources
+                localResources,
+                ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID // use until fully supported
               ).run()
               updateInternalState()
             } catch {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index 4c187b2cc68e7..8dfbef27be2fb 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -33,6 +33,13 @@ package object config {
     .toSequence
     .createOptional
 
+  private[spark] val APPLICATION_PRIORITY = ConfigBuilder("spark.yarn.priority")
+    .doc("Application priority for YARN to define pending applications ordering policy, those" +
+      " with higher value have a better opportunity to be activated. Currently, YARN only" +
+      " supports application priority when using FIFO ordering policy.")
+    .intConf
+    .createOptional
+
   private[spark] val AM_ATTEMPT_FAILURE_VALIDITY_INTERVAL_MS =
     ConfigBuilder("spark.yarn.am.attemptFailuresValidityInterval")
       .doc("Interval after which AM failures will be considered independent and " +
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala
index 2e5748b6144f9..669e39fb7c1c7 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/executor/YarnCoarseGrainedExecutorBackend.scala
@@ -22,6 +22,7 @@ import java.net.URL
 import org.apache.spark.SparkEnv
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.util.YarnContainerInfoHelper
 
@@ -34,20 +35,24 @@ private[spark] class YarnCoarseGrainedExecutorBackend(
     rpcEnv: RpcEnv,
     driverUrl: String,
     executorId: String,
+    bindAddress: String,
     hostname: String,
     cores: Int,
     userClassPath: Seq[URL],
     env: SparkEnv,
-    resourcesFile: Option[String])
+    resourcesFile: Option[String],
+    resourceProfile: ResourceProfile)
   extends CoarseGrainedExecutorBackend(
     rpcEnv,
     driverUrl,
     executorId,
+    bindAddress,
     hostname,
     cores,
     userClassPath,
     env,
-    resourcesFile) with Logging {
+    resourcesFile,
+    resourceProfile) with Logging {
 
   private lazy val hadoopConfiguration = SparkHadoopUtil.get.newConfiguration(env.conf)
 
@@ -65,11 +70,11 @@ private[spark] class YarnCoarseGrainedExecutorBackend(
 private[spark] object YarnCoarseGrainedExecutorBackend extends Logging {
 
   def main(args: Array[String]): Unit = {
-    val createFn: (RpcEnv, CoarseGrainedExecutorBackend.Arguments, SparkEnv) =>
-      CoarseGrainedExecutorBackend = { case (rpcEnv, arguments, env) =>
+    val createFn: (RpcEnv, CoarseGrainedExecutorBackend.Arguments, SparkEnv, ResourceProfile) =>
+      CoarseGrainedExecutorBackend = { case (rpcEnv, arguments, env, resourceProfile) =>
       new YarnCoarseGrainedExecutorBackend(rpcEnv, arguments.driverUrl, arguments.executorId,
-        arguments.hostname, arguments.cores, arguments.userClassPath, env,
-        arguments.resourcesFileOpt)
+        arguments.bindAddress, arguments.hostname, arguments.cores, arguments.userClassPath, env,
+        arguments.resourcesFileOpt, resourceProfile)
     }
     val backendArgs = CoarseGrainedExecutorBackend.parseArguments(args,
       this.getClass.getCanonicalName.stripSuffix("$"))
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index c7c495fef853f..cb0de5a0d50b4 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -42,7 +42,7 @@ private[spark] class YarnClientSchedulerBackend(
    * Create a Yarn client to submit an application to the ResourceManager.
    * This waits until the application is running.
    */
-  override def start() {
+  override def start(): Unit = {
     super.start()
 
     val driverHost = conf.get(config.DRIVER_HOST_ADDRESS)
@@ -109,7 +109,7 @@ private[spark] class YarnClientSchedulerBackend(
   private class MonitorThread extends Thread {
     private var allowInterrupt = true
 
-    override def run() {
+    override def run(): Unit = {
       try {
         val YarnAppReport(_, state, diags) =
           client.monitorApplication(appId.get, logApplicationReport = false)
@@ -148,7 +148,7 @@ private[spark] class YarnClientSchedulerBackend(
   /**
    * Stop the scheduler. This assumes `start()` has already been called.
    */
-  override def stop() {
+  override def stop(): Unit = {
     assert(client != null, "Attempted to stop this scheduler before starting it!")
     if (monitorThread != null) {
       monitorThread.stopMonitor()
@@ -164,7 +164,7 @@ private[spark] class YarnClientSchedulerBackend(
 
     super.stop()
     client.stop()
-    logInfo("Stopped")
+    logInfo("YARN client scheduler backend Stopped")
   }
 
   override protected def updateDelegationTokens(tokens: Array[Byte]): Unit = {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
index 1f622a02a62ae..1812a55c39589 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -28,7 +28,7 @@ private[spark] class YarnClusterScheduler(sc: SparkContext) extends YarnSchedule
 
   logInfo("Created YarnClusterScheduler")
 
-  override def postStartHook() {
+  override def postStartHook(): Unit = {
     ApplicationMaster.sparkContextInitialized(sc)
     super.postStartHook()
     logInfo("YarnClusterScheduler.postStartHook done")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
index b5575a10a05a0..e70a78d3c4c8d 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -27,7 +27,7 @@ private[spark] class YarnClusterSchedulerBackend(
     sc: SparkContext)
   extends YarnSchedulerBackend(scheduler, sc) {
 
-  override def start() {
+  override def start(): Unit = {
     val attemptId = ApplicationMaster.getAttemptId
     bindToYarn(attemptId.getApplicationId(), Some(attemptId))
     super.start()
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index dda8172fb6369..f8bbc39c8bcc5 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -27,13 +27,13 @@ import scala.util.{Failure, Success}
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.yarn.api.records.{ApplicationAttemptId, ApplicationId}
-import org.eclipse.jetty.servlet.{FilterHolder, FilterMapping}
 
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.UI._
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -92,7 +92,7 @@ private[spark] abstract class YarnSchedulerBackend(
     try {
       // SPARK-12009: To prevent Yarn allocator from requesting backup for the executors which
       // was Stopped by SchedulerBackend.
-      requestTotalExecutors(0, 0, Map.empty)
+      requestTotalExecutors(Map.empty, Map.empty, Map.empty)
       super.stop()
     } finally {
       stopped.set(true)
@@ -123,21 +123,28 @@ private[spark] abstract class YarnSchedulerBackend(
     }
   }
 
-  private[cluster] def prepareRequestExecutors(requestedTotal: Int): RequestExecutors = {
+  private[cluster] def prepareRequestExecutors(
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): RequestExecutors = {
     val nodeBlacklist: Set[String] = scheduler.nodeBlacklist()
     // For locality preferences, ignore preferences for nodes that are blacklisted
-    val filteredHostToLocalTaskCount =
-      hostToLocalTaskCount.filter { case (k, v) => !nodeBlacklist.contains(k) }
-    RequestExecutors(requestedTotal, localityAwareTasks, filteredHostToLocalTaskCount,
-      nodeBlacklist)
+    val filteredRPHostToLocalTaskCount = rpHostToLocalTaskCount.map { case (rpid, v) =>
+      (rpid, v.filter { case (host, count) => !nodeBlacklist.contains(host) })
+    }
+    // TODO - default everything to default profile until YARN pieces
+    val defaultProf = ResourceProfile.getOrCreateDefaultProfile(conf)
+    val hostToLocalTaskCount = filteredRPHostToLocalTaskCount.getOrElse(defaultProf.id, Map.empty)
+    val localityAwareTasks = numLocalityAwareTasksPerResourceProfileId.getOrElse(defaultProf.id, 0)
+    val numExecutors = resourceProfileToTotalExecs.getOrElse(defaultProf, 0)
+    RequestExecutors(numExecutors, localityAwareTasks, hostToLocalTaskCount, nodeBlacklist)
   }
 
   /**
    * Request executors from the ApplicationMaster by specifying the total number desired.
    * This includes executors already pending or running.
    */
-  override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = {
-    yarnSchedulerEndpointRef.ask[Boolean](prepareRequestExecutors(requestedTotal))
+  override def doRequestTotalExecutors(
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Future[Boolean] = {
+    yarnSchedulerEndpointRef.ask[Boolean](prepareRequestExecutors(resourceProfileToTotalExecs))
   }
 
   /**
@@ -198,7 +205,7 @@ private[spark] abstract class YarnSchedulerBackend(
    * and re-registered itself to driver after a failure. The stale state in driver should be
    * cleaned.
    */
-  override protected def reset(): Unit = {
+  override protected[scheduler] def reset(): Unit = {
     super.reset()
     sc.executorAllocationManager.foreach(_.reset())
   }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index e16857e84887c..f8ef0d08d829c 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -63,7 +63,7 @@ abstract class BaseYarnClusterSuite
 
   def newYarnConfig(): YarnConfiguration
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
 
     tempDir = Utils.createTempDir()
@@ -115,7 +115,7 @@ abstract class BaseYarnClusterSuite
     File.createTempFile("token", ".txt", hadoopConfDir)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       yarnCluster.stop()
     } finally {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
index b091fec926c4c..c04b4e5cb559e 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.yarn.api.records.LocalResourceType
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility
 import org.apache.hadoop.yarn.util.ConverterUtils
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.yarn.config._
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 847fc3773de59..b42c8b933dda0 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy.yarn
 
-import java.io.{File, FileInputStream, FileOutputStream}
+import java.io.{File, FileInputStream, FileNotFoundException, FileOutputStream}
 import java.net.URI
 import java.util.Properties
 
@@ -181,6 +181,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
       .set(MAX_APP_ATTEMPTS, 42)
       .set("spark.app.name", "foo-test-app")
       .set(QUEUE_NAME, "staging-queue")
+      .set(APPLICATION_PRIORITY, 1)
     val args = new ClientArguments(Array())
 
     val appContext = Records.newRecord(classOf[ApplicationSubmissionContext])
@@ -202,6 +203,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
       tags.asScala.count(_.nonEmpty) should be (4)
     }
     appContext.getMaxAppAttempts should be (42)
+    appContext.getPriority.getPriority should be (1)
   }
 
   test("spark.yarn.jars with multiple paths and globs") {
@@ -401,7 +403,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
       conf.set(s"${YARN_DRIVER_RESOURCE_TYPES_PREFIX}${yarnName}.${AMOUNT}", "2")
     }
     resources.values.foreach { rName =>
-      conf.set(ResourceID(SPARK_DRIVER_PREFIX, rName).amountConf, "3")
+      conf.set(new ResourceID(SPARK_DRIVER_PREFIX, rName).amountConf, "3")
     }
 
     val error = intercept[SparkException] {
@@ -424,7 +426,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
       conf.set(s"${YARN_EXECUTOR_RESOURCE_TYPES_PREFIX}${yarnName}.${AMOUNT}", "2")
     }
     resources.values.foreach { rName =>
-      conf.set(ResourceID(SPARK_EXECUTOR_PREFIX, rName).amountConf, "3")
+      conf.set(new ResourceID(SPARK_EXECUTOR_PREFIX, rName).amountConf, "3")
     }
 
     val error = intercept[SparkException] {
@@ -448,7 +450,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
 
     val conf = new SparkConf().set(SUBMIT_DEPLOY_MODE, "cluster")
     resources.values.foreach { rName =>
-      conf.set(ResourceID(SPARK_DRIVER_PREFIX, rName).amountConf, "3")
+      conf.set(new ResourceID(SPARK_DRIVER_PREFIX, rName).amountConf, "3")
     }
     // also just set yarn one that we don't convert
     conf.set(s"${YARN_DRIVER_RESOURCE_TYPES_PREFIX}${yarnMadeupResource}.${AMOUNT}", "5")
@@ -471,6 +473,18 @@ class ClientSuite extends SparkFunSuite with Matchers {
     assert(allResourceInfo.get(yarnMadeupResource).get === 5)
   }
 
+  test("test yarn jars path not exists") {
+    withTempDir { dir =>
+      val conf = new SparkConf().set(SPARK_JARS, Seq(dir.getAbsolutePath + "/test"))
+      val client = new Client(new ClientArguments(Array()), conf, null)
+      withTempDir { distDir =>
+        intercept[FileNotFoundException] {
+          client.prepareLocalResources(new Path(distDir.getAbsolutePath), Nil)
+        }
+      }
+    }
+  }
+
   private val matching = Seq(
     ("files URI match test1", "file:///file1", "file:///file2"),
     ("files URI match test2", "file:///c:file1", "file://c:file2"),
@@ -523,7 +537,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
     val mapAppConf = mapYARNAppConf ++ mapMRAppConf
   }
 
-  def withAppConf(m: Map[String, String] = Map())(testCode: (Configuration) => Any) {
+  def withAppConf(m: Map[String, String] = Map())(testCode: (Configuration) => Any): Unit = {
     val conf = new Configuration
     m.foreach { case (k, v) => conf.set(k, v, "ClientSpec") }
     testCode(conf)
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala
index afb4b691b52de..29f1c0512fbd5 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala
@@ -30,11 +30,11 @@ class ContainerPlacementStrategySuite extends SparkFunSuite with Matchers with B
   def createContainerRequest(nodes: Array[String]): ContainerRequest =
     new ContainerRequest(containerResource, nodes, null, YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     yarnAllocatorSuite.beforeEach()
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     yarnAllocatorSuite.afterEach()
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index 6f47a418f9180..6216d473882e6 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -69,7 +69,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
 
   var containerNum = 0
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
     rmClient = AMRMClient.createAMRMClient()
     rmClient.init(conf)
@@ -77,7 +77,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
     clock = new ManualClock()
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       rmClient.stop()
     } finally {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 7264e2e51ee45..b7c9e83446012 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -333,7 +333,7 @@ private[spark] class SaveExecutorInfo extends SparkListener {
   var driverLogs: Option[collection.Map[String, String]] = None
   var driverAttributes: Option[collection.Map[String, String]] = None
 
-  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
+  override def onExecutorAdded(executor: SparkListenerExecutorAdded): Unit = {
     addedExecutorInfos(executor.executorId) = executor.executorInfo
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
index 835f0736c5a1e..c0c6fff5130bb 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
@@ -21,9 +21,10 @@ import java.util.concurrent.atomic.AtomicReference
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.ui.TestFilter
@@ -51,7 +52,8 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
   private class TestYarnSchedulerBackend(scheduler: TaskSchedulerImpl, sc: SparkContext)
       extends YarnSchedulerBackend(scheduler, sc) {
     def setHostToLocalTaskCount(hostToLocalTaskCount: Map[String, Int]): Unit = {
-      this.hostToLocalTaskCount = hostToLocalTaskCount
+      this.rpHostToLocalTaskCount = Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID ->
+        hostToLocalTaskCount)
     }
   }
 
@@ -72,7 +74,8 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
     } {
       yarnSchedulerBackendExtended.setHostToLocalTaskCount(hostToLocalCount)
       sched.setNodeBlacklist(blacklist)
-      val req = yarnSchedulerBackendExtended.prepareRequestExecutors(numRequested)
+      val numReq = Map(ResourceProfile.getOrCreateDefaultProfile(sc.getConf) -> numRequested)
+      val req = yarnSchedulerBackendExtended.prepareRequestExecutors(numReq)
       assert(req.requestedTotal === numRequested)
       assert(req.nodeBlacklist === blacklist)
       assert(req.hostToLocalTaskCount.keySet.intersect(blacklist).isEmpty)
diff --git a/sbin/decommission-slave.sh b/sbin/decommission-slave.sh
new file mode 100644
index 0000000000000..4bbf257ff1d3a
--- /dev/null
+++ b/sbin/decommission-slave.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# A shell script to decommission all workers on a single slave
+#
+# Environment variables
+#
+#   SPARK_WORKER_INSTANCES The number of worker instances that should be
+#                          running on this slave.  Default is 1.
+
+# Usage: decommission-slave.sh [--block-until-exit]
+#   Decommissions all slaves on this worker machine
+
+set -ex
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
+  "${SPARK_HOME}/sbin"/spark-daemon.sh decommission org.apache.spark.deploy.worker.Worker 1
+else
+  for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
+    "${SPARK_HOME}/sbin"/spark-daemon.sh decommission org.apache.spark.deploy.worker.Worker $(( $i + 1 ))
+  done
+fi
+
+# Check if --block-until-exit is set.
+# This is done for systems which block on the decomissioning script and on exit
+# shut down the entire system (e.g. K8s).
+if [ "$1" == "--block-until-exit" ]; then
+  shift
+  # For now we only block on the 0th instance if there multiple instances.
+  instance=$1
+  pid="$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid"
+  wait $pid
+fi
diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index 6de67e039b48f..81f2fd40a706f 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -215,6 +215,21 @@ case $option in
     fi
     ;;
 
+  (decommission)
+
+    if [ -f $pid ]; then
+      TARGET_ID="$(cat "$pid")"
+      if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]]; then
+        echo "decommissioning $command"
+        kill -s SIGPWR "$TARGET_ID"
+      else
+        echo "no $command to decommission"
+      fi
+    else
+      echo "no $command to decommission"
+    fi
+    ;;
+
   (status)
 
     if [ -f $pid ]; then
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 4892819ae9973..73ac14fdba1cf 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -258,7 +258,7 @@ This file is divided into 3 sections:
     scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
   </check>
 
-  <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
+  <check customId="commonslang2" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
     <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
     <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
     of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
diff --git a/sql/README.md b/sql/README.md
index f0ea848a41d09..ae5ebd1d75370 100644
--- a/sql/README.md
+++ b/sql/README.md
@@ -6,7 +6,7 @@ This module provides support for executing relational queries expressed in eithe
 Spark SQL is broken up into four subprojects:
  - Catalyst (sql/catalyst) - An implementation-agnostic framework for manipulating trees of relational operators and expressions.
  - Execution (sql/core) - A query planner / execution engine for translating Catalyst's logical query plans into Spark RDDs.  This component also includes a new public interface, SQLContext, that allows users to execute SQL or LINQ statements against existing RDDs and Parquet files.
- - Hive Support (sql/hive) - Includes an extension of SQLContext called HiveContext that allows users to write queries using a subset of HiveQL and access data from a Hive Metastore using Hive SerDes. There are also wrappers that allow users to run queries that include Hive UDFs, UDAFs, and UDTFs.
+ - Hive Support (sql/hive) - Includes extensions that allow users to write queries using a subset of HiveQL and access data from a Hive Metastore using Hive SerDes. There are also wrappers that allow users to run queries that include Hive UDFs, UDAFs, and UDTFs.
  - HiveServer and CLI support (sql/hive-thriftserver) - Includes support for the SQL CLI (bin/spark-sql) and a HiveServer2 (for JDBC/ODBC) compatible server.
 
-Running `./sql/create-docs.sh` generates SQL documentation for built-in functions under `sql/site`.
+Running `./sql/create-docs.sh` generates SQL documentation for built-in functions under `sql/site`, and SQL configuration documentation that gets included as part of `configuration.md` in the main `docs` directory.
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..6a5a9b781e894
--- /dev/null
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk11-results.txt
@@ -0,0 +1,10 @@
+OpenJDK 64-Bit Server VM 11.0.5+10 on Mac OS X 10.14.6
+Intel(R) Core(TM) i5-8210Y CPU @ 1.60GHz
+constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+arrayOfAny                                            6              7           1       1770.9           0.6       1.0X
+arrayOfAnyAsObject                                    6              7           2       1709.3           0.6       1.0X
+arrayOfAnyAsSeq                                       5              6           2       2195.5           0.5       1.2X
+arrayOfInt                                          452            469          13         22.1          45.2       0.0X
+arrayOfIntAsObject                                  678            690          11         14.7          67.8       0.0X
+
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
new file mode 100644
index 0000000000000..02971749662f5
--- /dev/null
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
@@ -0,0 +1,10 @@
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.14.6
+Intel(R) Core(TM) i5-8210Y CPU @ 1.60GHz
+constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+arrayOfAny                                            7              8           2       1471.6           0.7       1.0X
+arrayOfAnyAsObject                                  197            207           9         50.7          19.7       0.0X
+arrayOfAnyAsSeq                                      25             27           2        398.0           2.5       0.3X
+arrayOfInt                                          613            630          15         16.3          61.3       0.0X
+arrayOfIntAsObject                                  866            872           8         11.5          86.6       0.0X
+
diff --git a/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..d6452891a3ddd
--- /dev/null
+++ b/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt
@@ -0,0 +1,70 @@
+================================================================================================
+single ints
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                3285           3286           1        163.4           6.1       1.0X
+codegen version                                    6838           6838           0         78.5          12.7       0.5X
+codegen version 64-bit                             6247           6247           0         85.9          11.6       0.5X
+codegen HiveHash version                           4927           4927           0        109.0           9.2       0.7X
+
+
+================================================================================================
+single longs
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                3615           3615           0        148.5           6.7       1.0X
+codegen version                                    9630           9633           5         55.8          17.9       0.4X
+codegen version 64-bit                             6763           6764           2         79.4          12.6       0.5X
+codegen HiveHash version                           5709           5711           2         94.0          10.6       0.6X
+
+
+================================================================================================
+normal
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                2946           2965          27          0.7        1404.7       1.0X
+codegen version                                    2386           2386           1          0.9        1137.7       1.2X
+codegen version 64-bit                              895            895           0          2.3         426.8       3.3X
+codegen HiveHash version                           4454           4463          14          0.5        2123.6       0.7X
+
+
+================================================================================================
+array
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                2325           2374          68          0.1       17740.6       1.0X
+codegen version                                    4928           4977          69          0.0       37597.1       0.5X
+codegen version 64-bit                             3404           3408           5          0.0       25973.0       0.7X
+codegen HiveHash version                           1491           1512          30          0.1       11376.3       1.6X
+
+
+================================================================================================
+map
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                   0              0           0         45.1          22.2       1.0X
+codegen version                                     271            278           8          0.0       66062.7       0.0X
+codegen version 64-bit                              208            210           2          0.0       50775.0       0.0X
+codegen HiveHash version                             52             53           0          0.1       12794.9       0.0X
+
+
diff --git a/sql/catalyst/benchmarks/HashBenchmark-results.txt b/sql/catalyst/benchmarks/HashBenchmark-results.txt
index 2459b35c75bb5..8075df73399b9 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For single ints:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                           5615 / 5616         95.6          10.5       1.0X
-codegen version                               8400 / 8407         63.9          15.6       0.7X
-codegen version 64-bit                        8139 / 8145         66.0          15.2       0.7X
-codegen HiveHash version                      7213 / 7348         74.4          13.4       0.8X
+Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                3364           3364           0        159.6           6.3       1.0X
+codegen version                                    6921           6924           4         77.6          12.9       0.5X
+codegen version 64-bit                             6139           6140           2         87.5          11.4       0.5X
+codegen HiveHash version                           5097           5099           2        105.3           9.5       0.7X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For single longs:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                           6053 / 6054         88.7          11.3       1.0X
-codegen version                               9367 / 9369         57.3          17.4       0.6X
-codegen version 64-bit                        8041 / 8051         66.8          15.0       0.8X
-codegen HiveHash version                      7546 / 7575         71.1          14.1       0.8X
+Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                3838           3838           0        139.9           7.1       1.0X
+codegen version                                    9690           9703          19         55.4          18.0       0.4X
+codegen version 64-bit                             6876           6877           1         78.1          12.8       0.6X
+codegen HiveHash version                           5717           5718           1         93.9          10.6       0.7X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For normal:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                           3181 / 3182          0.7        1517.0       1.0X
-codegen version                               2403 / 2403          0.9        1145.7       1.3X
-codegen version 64-bit                         915 /  916          2.3         436.2       3.5X
-codegen HiveHash version                      4505 / 4527          0.5        2148.3       0.7X
+Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                2914           2918           6          0.7        1389.6       1.0X
+codegen version                                    2337           2341           6          0.9        1114.5       1.2X
+codegen version 64-bit                              910            911           3          2.3         433.9       3.2X
+codegen HiveHash version                           4479           4480           1          0.5        2135.9       0.7X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For array:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                           1828 / 1844          0.1       13946.1       1.0X
-codegen version                               3678 / 3804          0.0       28058.2       0.5X
-codegen version 64-bit                        2925 / 2931          0.0       22317.8       0.6X
-codegen HiveHash version                      1216 / 1217          0.1        9280.0       1.5X
+Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                2081           2133          74          0.1       15878.7       1.0X
+codegen version                                    4385           4506         172          0.0       33452.2       0.5X
+codegen version 64-bit                             3518           3552          48          0.0       26842.8       0.6X
+codegen HiveHash version                           1715           1752          52          0.1       13085.4       1.2X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For map:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                              0 /    0         44.3          22.6       1.0X
-codegen version                                176 /  176          0.0       42978.8       0.0X
-codegen version 64-bit                         173 /  175          0.0       42214.3       0.0X
-codegen HiveHash version                        44 /   44          0.1       10659.9       0.0X
+Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                   0              0           0         50.4          19.8       1.0X
+codegen version                                     195            204           8          0.0       47681.7       0.0X
+codegen version 64-bit                              186            190           3          0.0       45296.8       0.0X
+codegen HiveHash version                             48             48           0          0.1       11610.3       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..d8d43068929ed
--- /dev/null
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt
@@ -0,0 +1,77 @@
+================================================================================================
+Benchmark for MurMurHash 3 and xxHash64
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       14             14           0        147.0           6.8       1.0X
+xxHash 64-bit                                        18             18           0        119.7           8.4       0.8X
+HiveHasher                                           16             16           0        129.9           7.7       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       23             23           0         89.8          11.1       1.0X
+xxHash 64-bit                                        22             23           0         93.3          10.7       1.0X
+HiveHasher                                           26             26           0         79.4          12.6       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       32             32           0         65.5          15.3       1.0X
+xxHash 64-bit                                        26             26           0         80.8          12.4       1.2X
+HiveHasher                                           41             41           0         50.8          19.7       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       49             49           0         43.2          23.2       1.0X
+xxHash 64-bit                                        44             44           0         48.0          20.8       1.1X
+HiveHasher                                           55             56           0         37.8          26.4       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                      127            127           0         16.5          60.5       1.0X
+xxHash 64-bit                                        83             83           0         25.3          39.5       1.5X
+HiveHasher                                          196            196           0         10.7          93.3       0.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                      327            327           0          6.4         155.9       1.0X
+xxHash 64-bit                                       138            138           0         15.2          65.9       2.4X
+HiveHasher                                          628            628           0          3.3         299.6       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     1145           1145           0          1.8         545.9       1.0X
+xxHash 64-bit                                       370            371           0          5.7         176.6       3.1X
+HiveHasher                                         2325           2325           0          0.9        1108.6       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     2209           2211           3          0.9        1053.4       1.0X
+xxHash 64-bit                                       615            615           0          3.4         293.0       3.6X
+HiveHasher                                         4590           4590           0          0.5        2188.7       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     8687           8702          22          0.2        4142.2       1.0X
+xxHash 64-bit                                      2033           2034           1          1.0         969.5       4.3X
+HiveHasher                                        18216          18218           2          0.1        8686.1       0.5X
+
+
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
index a4304ee3b5f60..83bd970e14392 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 8:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                  16 /   16        127.7           7.8       1.0X
-xxHash 64-bit                                   23 /   23         90.7          11.0       0.7X
-HiveHasher                                      16 /   16        134.8           7.4       1.1X
+Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       15             15           0        138.0           7.2       1.0X
+xxHash 64-bit                                        17             17           0        125.7           8.0       0.9X
+HiveHasher                                           16             16           0        134.4           7.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 16:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                  26 /   26         79.5          12.6       1.0X
-xxHash 64-bit                                   26 /   27         79.3          12.6       1.0X
-HiveHasher                                      30 /   30         70.1          14.3       0.9X
+Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       24             24           0         86.8          11.5       1.0X
+xxHash 64-bit                                        22             22           0         96.5          10.4       1.1X
+HiveHasher                                           31             31           0         66.8          15.0       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 24:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                  36 /   36         58.1          17.2       1.0X
-xxHash 64-bit                                   30 /   30         70.2          14.2       1.2X
-HiveHasher                                      45 /   45         46.4          21.5       0.8X
+Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       34             34           0         61.9          16.2       1.0X
+xxHash 64-bit                                        26             26           0         79.6          12.6       1.3X
+HiveHasher                                           48             48           0         44.0          22.7       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 31:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                  50 /   50         41.8          23.9       1.0X
-xxHash 64-bit                                   43 /   43         49.3          20.3       1.2X
-HiveHasher                                      58 /   58         35.9          27.8       0.9X
+Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       50             50           0         41.9          23.9       1.0X
+xxHash 64-bit                                        40             40           0         52.4          19.1       1.3X
+HiveHasher                                           61             61           0         34.4          29.1       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 95:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                 132 /  132         15.9          62.7       1.0X
-xxHash 64-bit                                   79 /   79         26.7          37.5       1.7X
-HiveHasher                                     198 /  199         10.6          94.6       0.7X
+Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                      132            133           0         15.9          63.1       1.0X
+xxHash 64-bit                                        77             78           0         27.4          36.5       1.7X
+HiveHasher                                          209            209           0         10.0          99.6       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 287:        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                 334 /  334          6.3         159.3       1.0X
-xxHash 64-bit                                  126 /  126         16.7          59.9       2.7X
-HiveHasher                                     633 /  634          3.3         302.0       0.5X
+Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                      333            334           0          6.3         158.9       1.0X
+xxHash 64-bit                                       123            123           0         17.1          58.6       2.7X
+HiveHasher                                          630            630           0          3.3         300.3       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 1055:       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                1149 / 1149          1.8         547.9       1.0X
-xxHash 64-bit                                  327 /  327          6.4         155.9       3.5X
-HiveHasher                                    2338 / 2346          0.9        1114.6       0.5X
+Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     1151           1151           0          1.8         548.9       1.0X
+xxHash 64-bit                                       321            321           0          6.5         153.2       3.6X
+HiveHasher                                         2332           2332           1          0.9        1111.8       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 2079:       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                2215 / 2216          0.9        1056.1       1.0X
-xxHash 64-bit                                  554 /  554          3.8         264.0       4.0X
-HiveHasher                                    4609 / 4609          0.5        2197.5       0.5X
+Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     2213           2213           0          0.9        1055.2       1.0X
+xxHash 64-bit                                       550            550           0          3.8         262.3       4.0X
+HiveHasher                                         4599           4599           1          0.5        2192.8       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 8223:       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                8633 / 8643          0.2        4116.3       1.0X
-xxHash 64-bit                                 1891 / 1892          1.1         901.6       4.6X
-HiveHasher                                  18206 / 18206          0.1        8681.3       0.5X
+Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     8660           8664           6          0.2        4129.3       1.0X
+xxHash 64-bit                                      1889           1893           6          1.1         900.6       4.6X
+HiveHasher                                        18269          18272           5          0.1        8711.3       0.5X
 
 
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..8b56fcbebc79c
--- /dev/null
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt
@@ -0,0 +1,14 @@
+================================================================================================
+unsafe projection
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+single long                                        2218           2219           1        121.0           8.3       1.0X
+single nullable long                               3200           3201           1         83.9          11.9       0.7X
+7 primitive types                                  7809           7813           5         34.4          29.1       0.3X
+7 nullable primitive types                        11906          11908           3         22.5          44.4       0.2X
+
+
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
index 43156dc6fc67f..7b30e2075f010 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-unsafe projection:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-single long                                   2867 / 2868         93.6          10.7       1.0X
-single nullable long                          3915 / 3949         68.6          14.6       0.7X
-7 primitive types                             8166 / 8167         32.9          30.4       0.4X
-7 nullable primitive types                  12767 / 12767         21.0          47.6       0.2X
+unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+single long                                        2173           2173           0        123.5           8.1       1.0X
+single nullable long                               3156           3157           2         85.0          11.8       0.7X
+7 primitive types                                  6725           6726           2         39.9          25.1       0.3X
+7 nullable primitive types                        11399          11400           2         23.5          42.5       0.2X
 
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 79d9b4951b151..304e3a7457fcc 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -111,7 +111,6 @@
     <dependency>
       <groupId>com.univocity</groupId>
       <artifactId>univocity-parsers</artifactId>
-      <version>2.7.3</version>
       <type>jar</type>
     </dependency>
     <dependency>
@@ -149,7 +148,7 @@
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
         <configuration>
-          <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+          <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true</argLine>
         </configuration>
       </plugin>
       <plugin>
@@ -168,6 +167,36 @@
           <treatWarningsAsErrors>true</treatWarningsAsErrors>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/main/scala-${scala.binary.version}</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
+
+  <profiles>
+    <profile>
+      <id>scala-2.13</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index f16ac6df8cb04..2bc71476aba02 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -23,6 +23,18 @@ grammar SqlBase;
    */
   public boolean legacy_setops_precedence_enbled = false;
 
+  /**
+   * When false, a literal with an exponent would be converted into
+   * double type rather than decimal type.
+   */
+  public boolean legacy_exponent_literal_as_decimal_enabled = false;
+
+  /**
+   * When false, CREATE TABLE syntax without a provider will use
+   * the value of spark.sql.sources.default as its provider.
+   */
+  public boolean legacy_create_hive_table_by_default_enabled = false;
+
   /**
    * Verify whether current token is a valid decimal token (which contains dot).
    * Returns true if the character that follows the token is not a digit or letter or underscore.
@@ -46,9 +58,9 @@ grammar SqlBase;
   }
 
   /**
-   * When true, ANSI SQL parsing mode is enabled.
+   * When true, the behavior of keywords follows ANSI SQL standard.
    */
-  public boolean ansi = false;
+  public boolean SQL_standard_keyword_behavior = false;
 }
 
 singleStatement
@@ -82,26 +94,29 @@ singleTableSchema
 statement
     : query                                                            #statementDefault
     | ctes? dmlStatementNoWith                                         #dmlStatement
-    | USE db=errorCapturingIdentifier                                  #use
-    | CREATE database (IF NOT EXISTS)? db=errorCapturingIdentifier
-        ((COMMENT comment=STRING) |
+    | USE NAMESPACE? multipartIdentifier                               #use
+    | CREATE namespace (IF NOT EXISTS)? multipartIdentifier
+        (commentSpec |
          locationSpec |
-         (WITH DBPROPERTIES tablePropertyList))*                       #createDatabase
-    | ALTER database db=errorCapturingIdentifier
-        SET DBPROPERTIES tablePropertyList                             #setDatabaseProperties
-    | DROP database (IF EXISTS)? db=errorCapturingIdentifier
-        (RESTRICT | CASCADE)?                                          #dropDatabase
-    | SHOW DATABASES (LIKE? pattern=STRING)?                           #showDatabases
-    | createTableHeader ('(' colTypeList ')')? tableProvider
-        ((OPTIONS options=tablePropertyList) |
-        (PARTITIONED BY partitioning=transformList) |
-        bucketSpec |
-        locationSpec |
-        (COMMENT comment=STRING) |
-        (TBLPROPERTIES tableProps=tablePropertyList))*
+         (WITH (DBPROPERTIES | PROPERTIES) tablePropertyList))*        #createNamespace
+    | ALTER namespace multipartIdentifier
+        SET (DBPROPERTIES | PROPERTIES) tablePropertyList              #setNamespaceProperties
+    | ALTER namespace multipartIdentifier
+        SET locationSpec                                               #setNamespaceLocation
+    | DROP namespace (IF EXISTS)? multipartIdentifier
+        (RESTRICT | CASCADE)?                                          #dropNamespace
+    | SHOW (DATABASES | NAMESPACES) ((FROM | IN) multipartIdentifier)?
+        (LIKE? pattern=STRING)?                                        #showNamespaces
+    | {!legacy_create_hive_table_by_default_enabled}?
+        createTableHeader ('(' colTypeList ')')? tableProvider?
+        createTableClauses
+        (AS? query)?                                                   #createTable
+    | {legacy_create_hive_table_by_default_enabled}?
+        createTableHeader ('(' colTypeList ')')? tableProvider
+        createTableClauses
         (AS? query)?                                                   #createTable
     | createTableHeader ('(' columns=colTypeList ')')?
-        ((COMMENT comment=STRING) |
+        (commentSpec |
         (PARTITIONED BY '(' partitionColumns=colTypeList ')' |
         PARTITIONED BY partitionColumnNames=identifierList) |
         bucketSpec |
@@ -112,16 +127,16 @@ statement
         (TBLPROPERTIES tableProps=tablePropertyList))*
         (AS? query)?                                                   #createHiveTable
     | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
-        LIKE source=tableIdentifier locationSpec?                      #createTableLike
-    | replaceTableHeader ('(' colTypeList ')')? tableProvider
-        ((OPTIONS options=tablePropertyList) |
-        (PARTITIONED BY partitioning=transformList) |
-        bucketSpec |
+        LIKE source=tableIdentifier
+        (tableProvider |
+        rowFormat |
+        createFileFormat |
         locationSpec |
-        (COMMENT comment=STRING) |
-        (TBLPROPERTIES tableProps=tablePropertyList))*
+        (TBLPROPERTIES tableProps=tablePropertyList))*                 #createTableLike
+    | replaceTableHeader ('(' colTypeList ')')? tableProvider
+        createTableClauses
         (AS? query)?                                                   #replaceTable
-    | ANALYZE TABLE tableIdentifier partitionSpec? COMPUTE STATISTICS
+    | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS
         (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)?    #analyze
     | ALTER TABLE multipartIdentifier
         ADD (COLUMN | COLUMNS)
@@ -129,92 +144,97 @@ statement
     | ALTER TABLE multipartIdentifier
         ADD (COLUMN | COLUMNS)
         '(' columns=qualifiedColTypeWithPositionList ')'               #addTableColumns
+    | ALTER TABLE table=multipartIdentifier
+        RENAME COLUMN
+        from=multipartIdentifier TO to=errorCapturingIdentifier        #renameTableColumn
     | ALTER TABLE multipartIdentifier
-        RENAME COLUMN from=qualifiedName TO to=identifier              #renameTableColumn
-    | ALTER TABLE multipartIdentifier
-        DROP (COLUMN | COLUMNS) '(' columns=qualifiedNameList ')'      #dropTableColumns
+        DROP (COLUMN | COLUMNS)
+        '(' columns=multipartIdentifierList ')'                        #dropTableColumns
     | ALTER TABLE multipartIdentifier
-        DROP (COLUMN | COLUMNS) columns=qualifiedNameList              #dropTableColumns
-    | ALTER (TABLE | VIEW) from=tableIdentifier
-        RENAME TO to=tableIdentifier                                   #renameTable
+        DROP (COLUMN | COLUMNS) columns=multipartIdentifierList        #dropTableColumns
+    | ALTER (TABLE | VIEW) from=multipartIdentifier
+        RENAME TO to=multipartIdentifier                               #renameTable
     | ALTER (TABLE | VIEW) multipartIdentifier
         SET TBLPROPERTIES tablePropertyList                            #setTableProperties
     | ALTER (TABLE | VIEW) multipartIdentifier
         UNSET TBLPROPERTIES (IF EXISTS)? tablePropertyList             #unsetTableProperties
-    | ALTER TABLE multipartIdentifier
-        (ALTER | CHANGE) COLUMN? qualifiedName
-        (TYPE dataType)? (COMMENT comment=STRING)? colPosition?        #alterTableColumn
-    | ALTER TABLE tableIdentifier partitionSpec?
+    |ALTER TABLE table=multipartIdentifier
+        (ALTER | CHANGE) COLUMN? column=multipartIdentifier
+        alterColumnAction?                                             #alterTableAlterColumn
+    | ALTER TABLE table=multipartIdentifier partitionSpec?
         CHANGE COLUMN?
-        colName=errorCapturingIdentifier colType colPosition?          #changeColumn
-    | ALTER TABLE tableIdentifier (partitionSpec)?
+        colName=multipartIdentifier colType colPosition?               #hiveChangeColumn
+    | ALTER TABLE table=multipartIdentifier partitionSpec?
+        REPLACE COLUMNS
+        '(' columns=qualifiedColTypeWithPositionList ')'               #hiveReplaceColumns
+    | ALTER TABLE multipartIdentifier (partitionSpec)?
         SET SERDE STRING (WITH SERDEPROPERTIES tablePropertyList)?     #setTableSerDe
-    | ALTER TABLE tableIdentifier (partitionSpec)?
+    | ALTER TABLE multipartIdentifier (partitionSpec)?
         SET SERDEPROPERTIES tablePropertyList                          #setTableSerDe
-    | ALTER TABLE tableIdentifier ADD (IF NOT EXISTS)?
+    | ALTER (TABLE | VIEW) multipartIdentifier ADD (IF NOT EXISTS)?
         partitionSpecLocation+                                         #addTablePartition
-    | ALTER VIEW tableIdentifier ADD (IF NOT EXISTS)?
-        partitionSpec+                                                 #addTablePartition
-    | ALTER TABLE tableIdentifier
+    | ALTER TABLE multipartIdentifier
         from=partitionSpec RENAME TO to=partitionSpec                  #renameTablePartition
-    | ALTER TABLE tableIdentifier
+    | ALTER (TABLE | VIEW) multipartIdentifier
         DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE?    #dropTablePartitions
-    | ALTER VIEW tableIdentifier
-        DROP (IF EXISTS)? partitionSpec (',' partitionSpec)*           #dropTablePartitions
-    | ALTER TABLE multipartIdentifier SET locationSpec                 #setTableLocation
-    | ALTER TABLE tableIdentifier partitionSpec SET locationSpec       #setPartitionLocation
-    | ALTER TABLE tableIdentifier RECOVER PARTITIONS                   #recoverPartitions
+    | ALTER TABLE multipartIdentifier
+        (partitionSpec)? SET locationSpec                              #setTableLocation
+    | ALTER TABLE multipartIdentifier RECOVER PARTITIONS               #recoverPartitions
     | DROP TABLE (IF EXISTS)? multipartIdentifier PURGE?               #dropTable
     | DROP VIEW (IF EXISTS)? multipartIdentifier                       #dropView
     | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
-        VIEW (IF NOT EXISTS)? tableIdentifier
+        VIEW (IF NOT EXISTS)? multipartIdentifier
         identifierCommentList?
-        ((COMMENT STRING) |
+        (commentSpec |
          (PARTITIONED ON identifierList) |
          (TBLPROPERTIES tablePropertyList))*
         AS query                                                       #createView
     | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW
         tableIdentifier ('(' colTypeList ')')? tableProvider
         (OPTIONS tablePropertyList)?                                   #createTempViewUsing
-    | ALTER VIEW tableIdentifier AS? query                             #alterViewQuery
+    | ALTER VIEW multipartIdentifier AS? query                         #alterViewQuery
     | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)?
-        qualifiedName AS className=STRING
+        multipartIdentifier AS className=STRING
         (USING resource (',' resource)*)?                              #createFunction
-    | DROP TEMPORARY? FUNCTION (IF EXISTS)? qualifiedName              #dropFunction
+    | DROP TEMPORARY? FUNCTION (IF EXISTS)? multipartIdentifier        #dropFunction
     | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
         statement                                                      #explain
     | SHOW TABLES ((FROM | IN) multipartIdentifier)?
         (LIKE? pattern=STRING)?                                        #showTables
-    | SHOW TABLE EXTENDED ((FROM | IN) db=errorCapturingIdentifier)?
+    | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)?
         LIKE pattern=STRING partitionSpec?                             #showTable
-    | SHOW TBLPROPERTIES table=tableIdentifier
+    | SHOW TBLPROPERTIES table=multipartIdentifier
         ('(' key=tablePropertyKey ')')?                                #showTblProperties
-    | SHOW COLUMNS (FROM | IN) tableIdentifier
-        ((FROM | IN) db=errorCapturingIdentifier)?                     #showColumns
-    | SHOW PARTITIONS tableIdentifier partitionSpec?                   #showPartitions
+    | SHOW COLUMNS (FROM | IN) table=multipartIdentifier
+        ((FROM | IN) ns=multipartIdentifier)?                          #showColumns
+    | SHOW PARTITIONS multipartIdentifier partitionSpec?               #showPartitions
     | SHOW identifier? FUNCTIONS
-        (LIKE? (qualifiedName | pattern=STRING))?                      #showFunctions
-    | SHOW CREATE TABLE tableIdentifier                                #showCreateTable
+        (LIKE? (multipartIdentifier | pattern=STRING))?                #showFunctions
+    | SHOW CREATE TABLE multipartIdentifier (AS SERDE)?                #showCreateTable
+    | SHOW CURRENT NAMESPACE                                           #showCurrentNamespace
     | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName            #describeFunction
-    | (DESC | DESCRIBE) database EXTENDED? db=errorCapturingIdentifier #describeDatabase
+    | (DESC | DESCRIBE) namespace EXTENDED?
+        multipartIdentifier                                            #describeNamespace
     | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
-        multipartIdentifier partitionSpec? describeColName?                #describeTable
+        multipartIdentifier partitionSpec? describeColName?            #describeRelation
     | (DESC | DESCRIBE) QUERY? query                                   #describeQuery
-    | REFRESH TABLE tableIdentifier                                    #refreshTable
+    | COMMENT ON namespace multipartIdentifier IS
+        comment=(STRING | NULL)                                        #commentNamespace
+    | COMMENT ON TABLE multipartIdentifier IS comment=(STRING | NULL)  #commentTable
+    | REFRESH TABLE multipartIdentifier                                #refreshTable
     | REFRESH (STRING | .*?)                                           #refreshResource
-    | CACHE LAZY? TABLE tableIdentifier
+    | CACHE LAZY? TABLE multipartIdentifier
         (OPTIONS options=tablePropertyList)? (AS? query)?              #cacheTable
-    | UNCACHE TABLE (IF EXISTS)? tableIdentifier                       #uncacheTable
+    | UNCACHE TABLE (IF EXISTS)? multipartIdentifier                   #uncacheTable
     | CLEAR CACHE                                                      #clearCache
     | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE
-        tableIdentifier partitionSpec?                                 #loadData
-    | TRUNCATE TABLE tableIdentifier partitionSpec?                    #truncateTable
-    | MSCK REPAIR TABLE tableIdentifier                                #repairTable
-    | op=(ADD | LIST) identifier .*?                                   #manageResource
+        multipartIdentifier partitionSpec?                             #loadData
+    | TRUNCATE TABLE multipartIdentifier partitionSpec?                #truncateTable
+    | MSCK REPAIR TABLE multipartIdentifier                            #repairTable
+    | op=(ADD | LIST) identifier (STRING | .*?)                        #manageResource
     | SET ROLE .*?                                                     #failNativeCommand
     | SET .*?                                                          #setConfiguration
     | RESET                                                            #resetConfiguration
-    | DELETE FROM multipartIdentifier tableAlias whereClause           #deleteFromTable
     | unsupportedHiveNativeCommands .*?                                #failNativeCommand
     ;
 
@@ -263,7 +283,6 @@ unsupportedHiveNativeCommands
     | kw1=COMMIT
     | kw1=ROLLBACK
     | kw1=DFS
-    | kw1=DELETE kw2=FROM
     ;
 
 createTableHeader
@@ -290,6 +309,10 @@ locationSpec
     : LOCATION STRING
     ;
 
+commentSpec
+    : COMMENT STRING
+    ;
+
 query
     : ctes? queryTerm queryOrganization
     ;
@@ -313,8 +336,9 @@ partitionVal
     : identifier (EQ constant)?
     ;
 
-database
-    : DATABASE
+namespace
+    : NAMESPACE
+    | DATABASE
     | SCHEMA
     ;
 
@@ -339,7 +363,16 @@ namedQuery
     ;
 
 tableProvider
-    : USING qualifiedName
+    : USING multipartIdentifier
+    ;
+
+createTableClauses
+    :((OPTIONS options=tablePropertyList) |
+     (PARTITIONED BY partitioning=transformList) |
+     bucketSpec |
+     locationSpec |
+     commentSpec |
+     (TBLPROPERTIES tableProps=tablePropertyList))*
     ;
 
 tablePropertyList
@@ -391,6 +424,14 @@ resource
 dmlStatementNoWith
     : insertInto queryTerm queryOrganization                                       #singleInsertQuery
     | fromClause multiInsertQueryBody+                                             #multiInsertQuery
+    | DELETE FROM multipartIdentifier tableAlias whereClause?                      #deleteFromTable
+    | UPDATE multipartIdentifier tableAlias setClause whereClause?                 #updateTable
+    | MERGE INTO target=multipartIdentifier targetAlias=tableAlias
+        USING (source=multipartIdentifier |
+          '(' sourceQuery=query')') sourceAlias=tableAlias
+        ON mergeCondition=booleanExpression
+        matchedClause*
+        notMatchedClause*                                                          #mergeIntoTable
     ;
 
 queryOrganization
@@ -474,6 +515,37 @@ selectClause
     : SELECT (hints+=hint)* setQuantifier? namedExpressionSeq
     ;
 
+setClause
+    : SET assignmentList
+    ;
+
+matchedClause
+    : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction
+    ;
+notMatchedClause
+    : WHEN NOT MATCHED (AND notMatchedCond=booleanExpression)? THEN notMatchedAction
+    ;
+
+matchedAction
+    : DELETE
+    | UPDATE SET ASTERISK
+    | UPDATE SET assignmentList
+    ;
+
+notMatchedAction
+    : INSERT ASTERISK
+    | INSERT '(' columns=multipartIdentifierList ')'
+        VALUES '(' expression (',' expression)* ')'
+    ;
+
+assignmentList
+    : assignment (',' assignment)*
+    ;
+
+assignment
+    : key=multipartIdentifier EQ value=expression
+    ;
+
 whereClause
     : WHERE booleanExpression
     ;
@@ -587,7 +659,7 @@ identifierCommentList
     ;
 
 identifierComment
-    : identifier (COMMENT STRING)?
+    : identifier commentSpec?
     ;
 
 relationPrimary
@@ -620,6 +692,10 @@ rowFormat
       (NULL DEFINED AS nullDefinedAs=STRING)?                                       #rowFormatDelimited
     ;
 
+multipartIdentifierList
+    : multipartIdentifier (',' multipartIdentifier)*
+    ;
+
 multipartIdentifier
     : parts+=errorCapturingIdentifier ('.' parts+=errorCapturingIdentifier)*
     ;
@@ -671,7 +747,8 @@ predicate
     : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
     | NOT? kind=IN '(' expression (',' expression)* ')'
     | NOT? kind=IN '(' query ')'
-    | NOT? kind=(RLIKE | LIKE) pattern=valueExpression
+    | NOT? kind=RLIKE pattern=valueExpression
+    | NOT? kind=LIKE pattern=valueExpression (ESCAPE escapeChar=STRING)?
     | IS NOT? kind=NULL
     | IS NOT? kind=(TRUE | FALSE | UNKNOWN)
     | IS NOT? kind=DISTINCT FROM right=valueExpression
@@ -694,18 +771,18 @@ primaryExpression
     | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END                  #simpleCase
     | CAST '(' expression AS dataType ')'                                                      #cast
     | STRUCT '(' (argument+=namedExpression (',' argument+=namedExpression)*)? ')'             #struct
-    | (FIRST | FIRST_VALUE) '(' expression ((IGNORE | RESPECT) NULLS)? ')'                     #first
-    | (LAST | LAST_VALUE) '(' expression ((IGNORE | RESPECT) NULLS)? ')'                       #last
+    | FIRST '(' expression (IGNORE NULLS)? ')'                                                 #first
+    | LAST '(' expression (IGNORE NULLS)? ')'                                                  #last
     | POSITION '(' substr=valueExpression IN str=valueExpression ')'                           #position
     | constant                                                                                 #constantDefault
     | ASTERISK                                                                                 #star
     | qualifiedName '.' ASTERISK                                                               #star
     | '(' namedExpression (',' namedExpression)+ ')'                                           #rowConstructor
     | '(' query ')'                                                                            #subqueryExpression
-    | qualifiedName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
-       (OVER windowSpec)?                                                                      #functionCall
-    | IDENTIFIER '->' expression                                                               #lambda
-    | '(' IDENTIFIER (',' IDENTIFIER)+ ')' '->' expression                                     #lambda
+    | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
+       (FILTER '(' WHERE where=booleanExpression ')')? (OVER windowSpec)?                      #functionCall
+    | identifier '->' expression                                                               #lambda
+    | '(' identifier (',' identifier)+ ')' '->' expression                                     #lambda
     | value=primaryExpression '[' index=valueExpression ']'                                    #subscript
     | identifier                                                                               #columnReference
     | base=primaryExpression '.' fieldName=identifier                                          #dereference
@@ -745,12 +822,23 @@ booleanValue
     ;
 
 interval
-    : {ansi}? INTERVAL? intervalField+
-    | {!ansi}? INTERVAL intervalField*
+    : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
     ;
 
-intervalField
-    : value=intervalValue unit=intervalUnit (TO to=intervalUnit)?
+errorCapturingMultiUnitsInterval
+    : multiUnitsInterval unitToUnitInterval?
+    ;
+
+multiUnitsInterval
+    : (intervalValue intervalUnit)+
+    ;
+
+errorCapturingUnitToUnitInterval
+    : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)?
+    ;
+
+unitToUnitInterval
+    : value=intervalValue from=intervalUnit TO to=intervalUnit
     ;
 
 intervalValue
@@ -760,27 +848,16 @@ intervalValue
 
 intervalUnit
     : DAY
-    | DAYS
     | HOUR
-    | HOURS
-    | MICROSECOND
-    | MICROSECONDS
-    | MILLISECOND
-    | MILLISECONDS
     | MINUTE
-    | MINUTES
     | MONTH
-    | MONTHS
     | SECOND
-    | SECONDS
-    | WEEK
-    | WEEKS
     | YEAR
-    | YEARS
+    | identifier
     ;
 
 colPosition
-    : FIRST | AFTER qualifiedName
+    : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier
     ;
 
 dataType
@@ -795,7 +872,7 @@ qualifiedColTypeWithPositionList
     ;
 
 qualifiedColTypeWithPosition
-    : name=qualifiedName dataType (COMMENT comment=STRING)? colPosition?
+    : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition?
     ;
 
 colTypeList
@@ -803,7 +880,7 @@ colTypeList
     ;
 
 colType
-    : colName=errorCapturingIdentifier dataType (COMMENT STRING)?
+    : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec?
     ;
 
 complexColTypeList
@@ -811,7 +888,7 @@ complexColTypeList
     ;
 
 complexColType
-    : identifier ':' dataType (COMMENT STRING)?
+    : identifier ':' dataType (NOT NULL)? commentSpec?
     ;
 
 whenClause
@@ -854,6 +931,13 @@ qualifiedNameList
     : qualifiedName (',' qualifiedName)*
     ;
 
+functionName
+    : qualifiedName
+    | FILTER
+    | LEFT
+    | RIGHT
+    ;
+
 qualifiedName
     : identifier ('.' identifier)*
     ;
@@ -873,14 +957,14 @@ errorCapturingIdentifierExtra
 
 identifier
     : strictIdentifier
-    | {!ansi}? strictNonReserved
+    | {!SQL_standard_keyword_behavior}? strictNonReserved
     ;
 
 strictIdentifier
     : IDENTIFIER              #unquotedIdentifier
     | quotedIdentifier        #quotedIdentifierAlternative
-    | {ansi}? ansiNonReserved #unquotedIdentifier
-    | {!ansi}? nonReserved    #unquotedIdentifier
+    | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
+    | {!SQL_standard_keyword_behavior}? nonReserved    #unquotedIdentifier
     ;
 
 quotedIdentifier
@@ -888,7 +972,9 @@ quotedIdentifier
     ;
 
 number
-    : MINUS? DECIMAL_VALUE            #decimalLiteral
+    : {!legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral
+    | {!legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE  #decimalLiteral
+    | {legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral
     | MINUS? INTEGER_VALUE            #integerLiteral
     | MINUS? BIGINT_LITERAL           #bigIntLiteral
     | MINUS? SMALLINT_LITERAL         #smallIntLiteral
@@ -897,13 +983,20 @@ number
     | MINUS? BIGDECIMAL_LITERAL       #bigDecimalLiteral
     ;
 
-// When `spark.sql.parser.ansi.enabled=true`, there are 2 kinds of keywords in Spark SQL.
+alterColumnAction
+    : TYPE dataType
+    | commentSpec
+    | colPosition
+    | setOrDrop=(SET | DROP) NOT NULL
+    ;
+
+// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
 // - Reserved keywords:
 //     Keywords that are reserved and can't be used as identifiers for table, view, column,
 //     function, alias, etc.
 // - Non-reserved keywords:
 //     Keywords that have a special meaning only in particular contexts and can be used as
-//     identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK
+//     identifiers in other contexts. For example, `EXPLAIN SELECT ...` is a command, but EXPLAIN
 //     can be used as identifiers in other places.
 // You can find the full keywords list by searching "Start of the keywords list" in this file.
 // The non-reserved keywords are listed below. Keywords not in this list are reserved keywords.
@@ -941,7 +1034,6 @@ ansiNonReserved
     | DATA
     | DATABASE
     | DATABASES
-    | DAYS
     | DBPROPERTIES
     | DEFINED
     | DELETE
@@ -972,7 +1064,6 @@ ansiNonReserved
     | FUNCTIONS
     | GLOBAL
     | GROUPING
-    | HOURS
     | IF
     | IGNORE
     | IMPORT
@@ -999,13 +1090,11 @@ ansiNonReserved
     | LOGICAL
     | MACRO
     | MAP
-    | MICROSECOND
-    | MICROSECONDS
-    | MILLISECOND
-    | MILLISECONDS
-    | MINUTES
-    | MONTHS
+    | MATCHED
+    | MERGE
     | MSCK
+    | NAMESPACE
+    | NAMESPACES
     | NO
     | NULLS
     | OF
@@ -1025,6 +1114,7 @@ ansiNonReserved
     | POSITION
     | PRECEDING
     | PRINCIPALS
+    | PROPERTIES
     | PURGE
     | QUERY
     | RANGE
@@ -1037,7 +1127,6 @@ ansiNonReserved
     | REPAIR
     | REPLACE
     | RESET
-    | RESPECT
     | RESTRICT
     | REVOKE
     | RLIKE
@@ -1048,7 +1137,6 @@ ansiNonReserved
     | ROW
     | ROWS
     | SCHEMA
-    | SECONDS
     | SEPARATED
     | SERDE
     | SERDEPROPERTIES
@@ -1082,18 +1170,16 @@ ansiNonReserved
     | UNCACHE
     | UNLOCK
     | UNSET
+    | UPDATE
     | USE
     | VALUES
     | VIEW
-    | WEEK
-    | WEEKS
     | WINDOW
-    | YEARS
     ;
 
-// When `spark.sql.parser.ansi.enabled=false`, there are 2 kinds of keywords in Spark SQL.
+// When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL.
 // - Non-reserved keywords:
-//     Same definition as the one when `spark.sql.parser.ansi.enabled=true`.
+//     Same definition as the one when `SQL_standard_keyword_behavior=true`.
 // - Strict-non-reserved keywords:
 //     A strict version of non-reserved keywords, which can not be used as table alias.
 // You can find the full keywords list by searching "Start of the keywords list" in this file.
@@ -1170,7 +1256,6 @@ nonReserved
     | DATABASE
     | DATABASES
     | DAY
-    | DAYS
     | DBPROPERTIES
     | DEFINED
     | DELETE
@@ -1186,6 +1271,7 @@ nonReserved
     | DROP
     | ELSE
     | END
+    | ESCAPE
     | ESCAPED
     | EXCHANGE
     | EXISTS
@@ -1196,10 +1282,10 @@ nonReserved
     | EXTRACT
     | FALSE
     | FETCH
+    | FILTER
     | FIELDS
     | FILEFORMAT
     | FIRST
-    | FIRST_VALUE
     | FOLLOWING
     | FOR
     | FOREIGN
@@ -1214,7 +1300,6 @@ nonReserved
     | GROUPING
     | HAVING
     | HOUR
-    | HOURS
     | IF
     | IGNORE
     | IMPORT
@@ -1230,7 +1315,6 @@ nonReserved
     | ITEMS
     | KEYS
     | LAST
-    | LAST_VALUE
     | LATERAL
     | LAZY
     | LEADING
@@ -1246,15 +1330,13 @@ nonReserved
     | LOGICAL
     | MACRO
     | MAP
-    | MICROSECOND
-    | MICROSECONDS
-    | MILLISECOND
-    | MILLISECONDS
+    | MATCHED
+    | MERGE
     | MINUTE
-    | MINUTES
     | MONTH
-    | MONTHS
     | MSCK
+    | NAMESPACE
+    | NAMESPACES
     | NO
     | NOT
     | NULL
@@ -1282,6 +1364,7 @@ nonReserved
     | PRECEDING
     | PRIMARY
     | PRINCIPALS
+    | PROPERTIES
     | PURGE
     | QUERY
     | RANGE
@@ -1295,7 +1378,6 @@ nonReserved
     | REPAIR
     | REPLACE
     | RESET
-    | RESPECT
     | RESTRICT
     | REVOKE
     | RLIKE
@@ -1307,7 +1389,6 @@ nonReserved
     | ROWS
     | SCHEMA
     | SECOND
-    | SECONDS
     | SELECT
     | SEPARATED
     | SERDE
@@ -1351,18 +1432,16 @@ nonReserved
     | UNKNOWN
     | UNLOCK
     | UNSET
+    | UPDATE
     | USE
     | USER
     | VALUES
     | VIEW
-    | WEEK
-    | WEEKS
     | WHEN
     | WHERE
     | WINDOW
     | WITH
     | YEAR
-    | YEARS
     ;
 
 // NOTE: If you add a new token in the list below, you should update the list of keywords
@@ -1425,7 +1504,6 @@ DATA: 'DATA';
 DATABASE: 'DATABASE';
 DATABASES: 'DATABASES' | 'SCHEMAS';
 DAY: 'DAY';
-DAYS: 'DAYS';
 DBPROPERTIES: 'DBPROPERTIES';
 DEFINED: 'DEFINED';
 DELETE: 'DELETE';
@@ -1440,6 +1518,7 @@ DISTRIBUTE: 'DISTRIBUTE';
 DROP: 'DROP';
 ELSE: 'ELSE';
 END: 'END';
+ESCAPE: 'ESCAPE';
 ESCAPED: 'ESCAPED';
 EXCEPT: 'EXCEPT';
 EXCHANGE: 'EXCHANGE';
@@ -1452,9 +1531,9 @@ EXTRACT: 'EXTRACT';
 FALSE: 'FALSE';
 FETCH: 'FETCH';
 FIELDS: 'FIELDS';
+FILTER: 'FILTER';
 FILEFORMAT: 'FILEFORMAT';
 FIRST: 'FIRST';
-FIRST_VALUE: 'FIRST_VALUE';
 FOLLOWING: 'FOLLOWING';
 FOR: 'FOR';
 FOREIGN: 'FOREIGN';
@@ -1470,7 +1549,6 @@ GROUP: 'GROUP';
 GROUPING: 'GROUPING';
 HAVING: 'HAVING';
 HOUR: 'HOUR';
-HOURS: 'HOURS';
 IF: 'IF';
 IGNORE: 'IGNORE';
 IMPORT: 'IMPORT';
@@ -1489,7 +1567,6 @@ ITEMS: 'ITEMS';
 JOIN: 'JOIN';
 KEYS: 'KEYS';
 LAST: 'LAST';
-LAST_VALUE: 'LAST_VALUE';
 LATERAL: 'LATERAL';
 LAZY: 'LAZY';
 LEADING: 'LEADING';
@@ -1506,15 +1583,13 @@ LOCKS: 'LOCKS';
 LOGICAL: 'LOGICAL';
 MACRO: 'MACRO';
 MAP: 'MAP';
-MICROSECOND: 'MICROSECOND';
-MICROSECONDS: 'MICROSECONDS';
-MILLISECOND: 'MILLISECOND';
-MILLISECONDS: 'MILLISECONDS';
+MATCHED: 'MATCHED';
+MERGE: 'MERGE';
 MINUTE: 'MINUTE';
-MINUTES: 'MINUTES';
 MONTH: 'MONTH';
-MONTHS: 'MONTHS';
 MSCK: 'MSCK';
+NAMESPACE: 'NAMESPACE';
+NAMESPACES: 'NAMESPACES';
 NATURAL: 'NATURAL';
 NO: 'NO';
 NOT: 'NOT' | '!';
@@ -1544,6 +1619,7 @@ POSITION: 'POSITION';
 PRECEDING: 'PRECEDING';
 PRIMARY: 'PRIMARY';
 PRINCIPALS: 'PRINCIPALS';
+PROPERTIES: 'PROPERTIES';
 PURGE: 'PURGE';
 QUERY: 'QUERY';
 RANGE: 'RANGE';
@@ -1557,7 +1633,6 @@ RENAME: 'RENAME';
 REPAIR: 'REPAIR';
 REPLACE: 'REPLACE';
 RESET: 'RESET';
-RESPECT: 'RESPECT';
 RESTRICT: 'RESTRICT';
 REVOKE: 'REVOKE';
 RIGHT: 'RIGHT';
@@ -1570,7 +1645,6 @@ ROW: 'ROW';
 ROWS: 'ROWS';
 SCHEMA: 'SCHEMA';
 SECOND: 'SECOND';
-SECONDS: 'SECONDS';
 SELECT: 'SELECT';
 SEMI: 'SEMI';
 SEPARATED: 'SEPARATED';
@@ -1617,19 +1691,17 @@ UNIQUE: 'UNIQUE';
 UNKNOWN: 'UNKNOWN';
 UNLOCK: 'UNLOCK';
 UNSET: 'UNSET';
+UPDATE: 'UPDATE';
 USE: 'USE';
 USER: 'USER';
 USING: 'USING';
 VALUES: 'VALUES';
 VIEW: 'VIEW';
-WEEK: 'WEEK';
-WEEKS: 'WEEKS';
 WHEN: 'WHEN';
 WHERE: 'WHERE';
 WINDOW: 'WINDOW';
 WITH: 'WITH';
 YEAR: 'YEAR';
-YEARS: 'YEARS';
 //============================
 // End of the keywords list
 //============================
@@ -1676,9 +1748,13 @@ INTEGER_VALUE
     : DIGIT+
     ;
 
-DECIMAL_VALUE
+EXPONENT_VALUE
     : DIGIT+ EXPONENT
-    | DECIMAL_DIGITS EXPONENT? {isValidDecimal()}?
+    | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
+    ;
+
+DECIMAL_VALUE
+    : DECIMAL_DIGITS {isValidDecimal()}?
     ;
 
 DOUBLE_LITERAL
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 769cf36c3df3f..8ee90ed6f4c3b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * Expression information, will be used to describe a expression.
  */
@@ -56,6 +58,11 @@ public String getArguments() {
         return arguments;
     }
 
+    @VisibleForTesting
+    public String getOriginalExamples() {
+        return examples;
+    }
+
     public String getExamples() {
         return replaceFunctionName(examples);
     }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala
new file mode 100644
index 0000000000000..e48fd8adaef09
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.types.{DataType, IntegerType}
+
+/**
+ * Base class for expressions that are converted to v2 partition transforms.
+ *
+ * Subclasses represent abstract transform functions with concrete implementations that are
+ * determined by data source implementations. Because the concrete implementation is not known,
+ * these expressions are [[Unevaluable]].
+ *
+ * These expressions are used to pass transformations from the DataFrame API:
+ *
+ * {{{
+ *   df.writeTo("catalog.db.table").partitionedBy($"category", days($"timestamp")).create()
+ * }}}
+ */
+abstract class PartitionTransformExpression extends Expression with Unevaluable {
+  override def nullable: Boolean = true
+}
+
+/**
+ * Expression for the v2 partition transform years.
+ */
+case class Years(child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(child)
+}
+
+/**
+ * Expression for the v2 partition transform months.
+ */
+case class Months(child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(child)
+}
+
+/**
+ * Expression for the v2 partition transform days.
+ */
+case class Days(child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(child)
+}
+
+/**
+ * Expression for the v2 partition transform hours.
+ */
+case class Hours(child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(child)
+}
+
+/**
+ * Expression for the v2 partition transform bucket.
+ */
+case class Bucket(numBuckets: Literal, child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(numBuckets, child)
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index db6401b18c0e4..9e686985b0607 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -230,9 +230,10 @@ public CalendarInterval getInterval(int ordinal) {
     if (isNullAt(ordinal)) return null;
     final long offsetAndSize = getLong(ordinal);
     final int offset = (int) (offsetAndSize >> 32);
-    final int months = (int) Platform.getLong(baseObject, baseOffset + offset);
+    final int months = Platform.getInt(baseObject, baseOffset + offset);
+    final int days = Platform.getInt(baseObject, baseOffset + offset + 4);
     final long microseconds = Platform.getLong(baseObject, baseOffset + offset + 8);
-    return new CalendarInterval(months, microseconds);
+    return new CalendarInterval(months, days, microseconds);
   }
 
   @Override
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 8fd6029e976ee..23e7d1f07e4a3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -103,7 +103,8 @@ public static boolean isFixedLength(DataType dt) {
   }
 
   public static boolean isMutable(DataType dt) {
-    return mutableFieldTypes.contains(dt) || dt instanceof DecimalType;
+    return mutableFieldTypes.contains(dt) || dt instanceof DecimalType ||
+      dt instanceof CalendarIntervalType;
   }
 
   //////////////////////////////////////////////////////////////////////////////
@@ -297,6 +298,26 @@ public void setDecimal(int ordinal, Decimal value, int precision) {
     }
   }
 
+  @Override
+  public void setInterval(int ordinal, CalendarInterval value) {
+    assertIndexIsValid(ordinal);
+    long cursor = getLong(ordinal) >>> 32;
+    assert cursor > 0 : "invalid cursor " + cursor;
+    if (value == null) {
+      setNullAt(ordinal);
+      // zero-out the bytes
+      Platform.putLong(baseObject, baseOffset + cursor, 0L);
+      Platform.putLong(baseObject, baseOffset + cursor + 8, 0L);
+      // keep the offset for future update
+      Platform.putLong(baseObject, getFieldOffset(ordinal), (cursor << 32) | 16L);
+    } else {
+      Platform.putInt(baseObject, baseOffset + cursor, value.months);
+      Platform.putInt(baseObject, baseOffset + cursor + 4, value.days);
+      Platform.putLong(baseObject, baseOffset + cursor + 8, value.microseconds);
+      setLong(ordinal, (cursor << 32) | 16L);
+    }
+  }
+
   @Override
   public Object get(int ordinal, DataType dataType) {
     return SpecializedGettersReader.read(this, ordinal, dataType, true, true);
@@ -401,9 +422,10 @@ public CalendarInterval getInterval(int ordinal) {
     } else {
       final long offsetAndSize = getLong(ordinal);
       final int offset = (int) (offsetAndSize >> 32);
-      final int months = (int) Platform.getLong(baseObject, baseOffset + offset);
+      final int months = Platform.getInt(baseObject, baseOffset + offset);
+      final int days = Platform.getInt(baseObject, baseOffset + offset + 4);
       final long microseconds = Platform.getLong(baseObject, baseOffset + offset + 8);
-      return new CalendarInterval(months, microseconds);
+      return new CalendarInterval(months, days, microseconds);
     }
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
index 95263a0da95a8..84b2b29479414 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
@@ -22,6 +22,7 @@
 import org.apache.spark.sql.types.Decimal;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
+import org.apache.spark.unsafe.bitset.BitSetMethods;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 
@@ -134,12 +135,16 @@ public final void write(int ordinal, CalendarInterval input) {
     // grow the global buffer before writing data.
     grow(16);
 
-    // Write the months and microseconds fields of Interval to the variable length portion.
-    Platform.putLong(getBuffer(), cursor(), input.months);
-    Platform.putLong(getBuffer(), cursor() + 8, input.microseconds);
-
+    if (input == null) {
+      BitSetMethods.set(getBuffer(), startingOffset, ordinal);
+    } else {
+      // Write the months, days and microseconds fields of interval to the variable length portion.
+      Platform.putInt(getBuffer(), cursor(), input.months);
+      Platform.putInt(getBuffer(), cursor() + 4, input.days);
+      Platform.putLong(getBuffer(), cursor() + 8, input.microseconds);
+    }
+    // we need to reserve the space so that we can update it later.
     setOffsetAndSize(ordinal, 16);
-
     // move the cursor forward.
     increaseCursor(16);
   }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/ExternalCommandRunner.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/ExternalCommandRunner.java
new file mode 100644
index 0000000000000..37534be31b6f6
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/ExternalCommandRunner.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector;
+
+import org.apache.spark.annotation.Unstable;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * An interface to execute an arbitrary string command inside an external execution engine rather
+ * than Spark. This could be useful when user wants to execute some commands out of Spark. For
+ * example, executing custom DDL/DML command for JDBC, creating index for ElasticSearch, creating
+ * cores for Solr and so on.
+ * <p>
+ * This interface will be instantiated when end users call `SparkSession#executeCommand`.
+ */
+@Unstable
+public interface ExternalCommandRunner {
+
+  /**
+   * Execute the given command.
+   *
+   * @param command The command string provided by users.
+   * @param options The user-specified case-insensitive options.
+   *
+   * @return The output of the command.
+   */
+  String[] executeCommand(String command, CaseInsensitiveStringMap options);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogExtension.java
new file mode 100644
index 0000000000000..65e0b6be00ef3
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogExtension.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * An API to extend the Spark built-in session catalog. Implementation can get the built-in session
+ * catalog from {@link #setDelegateCatalog(CatalogPlugin)}, implement catalog functions with
+ * some custom logic and call the built-in session catalog at the end. For example, they can
+ * implement {@code createTable}, do something else before calling {@code createTable} of the
+ * built-in session catalog.
+ */
+@Experimental
+public interface CatalogExtension extends TableCatalog, SupportsNamespaces {
+
+  /**
+   * This will be called only once by Spark to pass in the Spark built-in session catalog, after
+   * {@link #initialize(String, CaseInsensitiveStringMap)} is called.
+   */
+  void setDelegateCatalog(CatalogPlugin delegate);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
similarity index 85%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
index 5d4995a05d233..34fdc5dd4d0fb 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.internal.SQLConf;
@@ -58,4 +58,18 @@ public interface CatalogPlugin {
    * called to pass the catalog's name.
    */
   String name();
+
+  /**
+   * Return a default namespace for the catalog.
+   * <p>
+   * When this catalog is set as the current catalog, the namespace returned by this method will be
+   * set as the current namespace.
+   * <p>
+   * The namespace returned by this method is not required to exist.
+   *
+   * @return a multi-part namespace
+   */
+  default String[] defaultNamespace() {
+    return new String[0];
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Catalogs.java
similarity index 98%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Catalogs.java
index f471a4e71cf43..b6a896c52d535 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Catalogs.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.SparkException;
 import org.apache.spark.annotation.Private;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
new file mode 100644
index 0000000000000..cf0eef96d5a9c
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Map;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * A simple implementation of {@link CatalogExtension}, which implements all the catalog functions
+ * by calling the built-in session catalog directly. This is created for convenience, so that users
+ * only need to override some methods where they want to apply custom logic. For example, they can
+ * override {@code createTable}, do something else before calling {@code super.createTable}.
+ */
+@Experimental
+public abstract class DelegatingCatalogExtension implements CatalogExtension {
+
+  private CatalogPlugin delegate;
+
+  public final void setDelegateCatalog(CatalogPlugin delegate) {
+    this.delegate = delegate;
+  }
+
+  @Override
+  public String name() {
+    return delegate.name();
+  }
+
+  @Override
+  public final void initialize(String name, CaseInsensitiveStringMap options) {}
+
+  @Override
+  public String[] defaultNamespace() {
+    return delegate.defaultNamespace();
+  }
+
+  @Override
+  public Identifier[] listTables(String[] namespace) throws NoSuchNamespaceException {
+    return asTableCatalog().listTables(namespace);
+  }
+
+  @Override
+  public Table loadTable(Identifier ident) throws NoSuchTableException {
+    return asTableCatalog().loadTable(ident);
+  }
+
+  @Override
+  public void invalidateTable(Identifier ident) {
+    asTableCatalog().invalidateTable(ident);
+  }
+
+  @Override
+  public boolean tableExists(Identifier ident) {
+    return asTableCatalog().tableExists(ident);
+  }
+
+  @Override
+  public Table createTable(
+      Identifier ident,
+      StructType schema,
+      Transform[] partitions,
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException {
+    return asTableCatalog().createTable(ident, schema, partitions, properties);
+  }
+
+  @Override
+  public Table alterTable(
+      Identifier ident,
+      TableChange... changes) throws NoSuchTableException {
+    return asTableCatalog().alterTable(ident, changes);
+  }
+
+  @Override
+  public boolean dropTable(Identifier ident) {
+    return asTableCatalog().dropTable(ident);
+  }
+
+  @Override
+  public void renameTable(
+      Identifier oldIdent,
+      Identifier newIdent) throws NoSuchTableException, TableAlreadyExistsException {
+    asTableCatalog().renameTable(oldIdent, newIdent);
+  }
+
+  @Override
+  public String[][] listNamespaces() throws NoSuchNamespaceException {
+    return asNamespaceCatalog().listNamespaces();
+  }
+
+  @Override
+  public String[][] listNamespaces(String[] namespace) throws NoSuchNamespaceException {
+    return asNamespaceCatalog().listNamespaces(namespace);
+  }
+
+  @Override
+  public boolean namespaceExists(String[] namespace) {
+    return asNamespaceCatalog().namespaceExists(namespace);
+  }
+
+  @Override
+  public Map<String, String> loadNamespaceMetadata(
+      String[] namespace) throws NoSuchNamespaceException {
+    return asNamespaceCatalog().loadNamespaceMetadata(namespace);
+  }
+
+  @Override
+  public void createNamespace(
+      String[] namespace,
+      Map<String, String> metadata) throws NamespaceAlreadyExistsException {
+    asNamespaceCatalog().createNamespace(namespace, metadata);
+  }
+
+  @Override
+  public void alterNamespace(
+      String[] namespace,
+      NamespaceChange... changes) throws NoSuchNamespaceException {
+    asNamespaceCatalog().alterNamespace(namespace, changes);
+  }
+
+  @Override
+  public boolean dropNamespace(String[] namespace) throws NoSuchNamespaceException {
+    return asNamespaceCatalog().dropNamespace(namespace);
+  }
+
+  private TableCatalog asTableCatalog() {
+    return (TableCatalog)delegate;
+  }
+
+  private SupportsNamespaces asNamespaceCatalog() {
+    return (SupportsNamespaces)delegate;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java
index 3e697c1945bfc..c3ac9c20db3b0 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Experimental;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
similarity index 87%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
index 34f3882c9c412..a56007b2a5ab8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
@@ -15,16 +15,17 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
-
-import com.google.common.base.Preconditions;
-import org.apache.spark.annotation.Experimental;
+package org.apache.spark.sql.connector.catalog;
 
 import java.util.Arrays;
 import java.util.Objects;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import com.google.common.base.Preconditions;
+
+import org.apache.spark.annotation.Experimental;
+
 /**
  *  An {@link Identifier} implementation.
  */
@@ -51,19 +52,11 @@ public String name() {
     return name;
   }
 
-  private String escapeQuote(String part) {
-    if (part.contains("`")) {
-      return part.replace("`", "``");
-    } else {
-      return part;
-    }
-  }
-
   @Override
   public String toString() {
     return Stream.concat(Stream.of(namespace), Stream.of(name))
-        .map(part -> '`' + escapeQuote(part) + '`')
-        .collect(Collectors.joining("."));
+      .map(CatalogV2Implicits::quote)
+      .collect(Collectors.joining("."));
   }
 
   @Override
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/NamespaceChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/NamespaceChange.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/NamespaceChange.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/NamespaceChange.java
index 6f5895bcc3806..1db6877c8d0be 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/NamespaceChange.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/NamespaceChange.java
@@ -15,7 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Experimental;
 
 /**
  * NamespaceChange subclasses represent requested changes to a namespace. These are passed to
@@ -29,6 +31,7 @@
  *     )
  * </pre>
  */
+@Experimental
 public interface NamespaceChange {
   /**
    * Create a NamespaceChange for setting a namespace property.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SessionConfigSupport.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SessionConfigSupport.java
index d27fbfdd14617..fc076ee00a623 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SessionConfigSupport.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/StagedTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
similarity index 74%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/StagedTable.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
index b2baa93b146a5..84b24f204bf9c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/StagedTable.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import java.util.Map;
-import org.apache.spark.sql.catalog.v2.Identifier;
-import org.apache.spark.sql.catalog.v2.StagingTableCatalog;
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.connector.write.LogicalWriteInfo;
 import org.apache.spark.sql.types.StructType;
-import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * Represents a table which is staged for being committed to the metastore.
@@ -32,11 +32,12 @@
  * {@link StagingTableCatalog#stageCreate(Identifier, StructType, Transform[], Map)} or
  * {@link StagingTableCatalog#stageReplace(Identifier, StructType, Transform[], Map)} to prepare the
  * table for being written to. This table should usually implement {@link SupportsWrite}. A new
- * writer will be constructed via {@link SupportsWrite#newWriteBuilder(CaseInsensitiveStringMap)},
- * and the write will be committed. The job concludes with a call to {@link #commitStagedChanges()},
- * at which point implementations are expected to commit the table's metadata into the metastore
- * along with the data that was written by the writes from the write builder this table created.
+ * writer will be constructed via {@link SupportsWrite#newWriteBuilder(LogicalWriteInfo)}, and the
+ * write will be committed. The job concludes with a call to {@link #commitStagedChanges()}, at
+ * which point implementations are expected to commit the table's metadata into the metastore along
+ * with the data that was written by the writes from the write builder this table created.
  */
+@Experimental
 public interface StagedTable extends Table {
 
   /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/StagingTableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
similarity index 92%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/StagingTableCatalog.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
index fc055e91a6acf..1c8e9c5024376 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/StagingTableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
@@ -15,20 +15,19 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import java.util.Map;
 
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.connector.write.LogicalWriteInfo;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
-import org.apache.spark.sql.sources.v2.StagedTable;
-import org.apache.spark.sql.sources.v2.SupportsWrite;
-import org.apache.spark.sql.sources.v2.writer.BatchWrite;
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.apache.spark.sql.connector.write.BatchWrite;
+import org.apache.spark.sql.connector.write.WriterCommitMessage;
 import org.apache.spark.sql.types.StructType;
-import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
  * An optional mix-in for implementations of {@link TableCatalog} that support staging creation of
@@ -40,9 +39,9 @@
  * TABLE AS SELECT operation, if the catalog does not implement this trait, the planner will first
  * drop the table via {@link TableCatalog#dropTable(Identifier)}, then create the table via
  * {@link TableCatalog#createTable(Identifier, StructType, Transform[], Map)}, and then perform
- * the write via {@link SupportsWrite#newWriteBuilder(CaseInsensitiveStringMap)}. However, if the
- * write operation fails, the catalog will have already dropped the table, and the planner cannot
- * roll back the dropping of the table.
+ * the write via {@link SupportsWrite#newWriteBuilder(LogicalWriteInfo)}.
+ * However, if the write operation fails, the catalog will have already dropped the table, and the
+ * planner cannot roll back the dropping of the table.
  * <p>
  * If the catalog implements this plugin, the catalog can implement the methods to "stage" the
  * creation and the replacement of a table. After the table's
@@ -50,6 +49,7 @@
  * {@link StagedTable#commitStagedChanges()} is called, at which point the staged table can
  * complete both the data write and the metadata swap operation atomically.
  */
+@Experimental
 public interface StagingTableCatalog extends TableCatalog {
 
   /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsCatalogOptions.java
similarity index 50%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsCatalogOptions.java
index 1d37ff042bd33..5225b12788c49 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsCatalogOptions.java
@@ -15,47 +15,39 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
- * The base interface for v2 data sources which don't have a real catalog. Implementations must
- * have a public, 0-arg constructor.
- * <p>
- * Note that, TableProvider can only apply data operations to existing tables, like read, append,
- * delete, and overwrite. It does not support the operations that require metadata changes, like
- * create/drop tables.
- * <p>
- * The major responsibility of this interface is to return a {@link Table} for read/write.
- * </p>
+ * An interface, which TableProviders can implement, to support table existence checks and creation
+ * through a catalog, without having to use table identifiers. For example, when file based data
+ * sources use the `DataFrameWriter.save(path)` method, the option `path` can translate to a
+ * PathIdentifier. A catalog can then use this PathIdentifier to check the existence of a table, or
+ * whether a table can be created at a given directory.
  */
 @Evolving
-public interface TableProvider {
-
+public interface SupportsCatalogOptions extends TableProvider {
   /**
-   * Return a {@link Table} instance to do read/write with user-specified options.
+   * Return a {@link Identifier} instance that can identify a table for a DataSource given
+   * DataFrame[Reader|Writer] options.
    *
    * @param options the user-specified options that can identify a table, e.g. file path, Kafka
    *                topic name, etc. It's an immutable case-insensitive string-to-string map.
    */
-  Table getTable(CaseInsensitiveStringMap options);
+  Identifier extractIdentifier(CaseInsensitiveStringMap options);
 
   /**
-   * Return a {@link Table} instance to do read/write with user-specified schema and options.
-   * <p>
-   * By default this method throws {@link UnsupportedOperationException}, implementations should
-   * override this method to handle user-specified schema.
-   * </p>
+   * Return the name of a catalog that can be used to check the existence of, load, and create
+   * a table for this DataSource given the identifier that will be extracted by
+   * {@link #extractIdentifier(CaseInsensitiveStringMap) extractIdentifier}. A `null` value can
+   * be used to defer to the V2SessionCatalog.
+   *
    * @param options the user-specified options that can identify a table, e.g. file path, Kafka
    *                topic name, etc. It's an immutable case-insensitive string-to-string map.
-   * @param schema the user-specified schema.
-   * @throws UnsupportedOperationException
    */
-  default Table getTable(CaseInsensitiveStringMap options, StructType schema) {
-    throw new UnsupportedOperationException(
-      this.getClass().getSimpleName() + " source does not support user-specified schema");
+  default String extractCatalog(CaseInsensitiveStringMap options) {
+    return CatalogManager.SESSION_CATALOG_NAME();
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsDelete.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
similarity index 94%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsDelete.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
index 8650a0ef1d4ba..80aa57ca18775 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsDelete.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
@@ -15,14 +15,16 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
+import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.sources.Filter;
 
 /**
  * A mix-in interface for {@link Table} delete support. Data sources can implement this
  * interface to provide the ability to delete data from tables that matches filter expressions.
  */
+@Experimental
 public interface SupportsDelete {
   /**
    * Delete data from a data source table that matches filter expressions.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/SupportsNamespaces.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java
similarity index 86%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/SupportsNamespaces.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java
index 12c2e511f33f8..190f1a14d7129 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/SupportsNamespaces.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java
@@ -15,8 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
+import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 
@@ -35,21 +36,25 @@
  * drop a namespace. Implementations are allowed to discover the existence of objects or namespaces
  * without throwing {@link NoSuchNamespaceException} when no namespace is found.
  */
+@Experimental
 public interface SupportsNamespaces extends CatalogPlugin {
 
   /**
-   * Return a default namespace for the catalog.
-   * <p>
-   * When this catalog is set as the current catalog, the namespace returned by this method will be
-   * set as the current namespace.
-   * <p>
-   * The namespace returned by this method is not required to exist.
-   *
-   * @return a multi-part namespace
+   * A reserved property to specify the location of the namespace. If the namespace
+   * needs to store files, it should be under this location.
    */
-  default String[] defaultNamespace() {
-    return new String[0];
-  }
+  String PROP_LOCATION = "location";
+
+  /**
+   * A reserved property to specify the description of the namespace. The description
+   * will be returned in the result of "DESCRIBE NAMESPACE" command.
+   */
+  String PROP_COMMENT = "comment";
+
+  /**
+   * A reserved property to specify the owner of the namespace.
+   */
+  String PROP_OWNER = "owner";
 
   /**
    * List top-level namespaces from the catalog.
@@ -129,16 +134,14 @@ void alterNamespace(
       NamespaceChange... changes) throws NoSuchNamespaceException;
 
   /**
-   * Drop a namespace from the catalog.
+   * Drop a namespace from the catalog, recursively dropping all objects within the namespace.
    * <p>
-   * This operation may be rejected by the catalog implementation if the namespace is not empty by
-   * throwing {@link IllegalStateException}. If the catalog implementation does not support this
-   * operation, it may throw {@link UnsupportedOperationException}.
+   * If the catalog implementation does not support this operation, it may throw
+   * {@link UnsupportedOperationException}.
    *
    * @param namespace a multi-part namespace
    * @return true if the namespace was dropped
    * @throws NoSuchNamespaceException If the namespace does not exist (optional)
-   * @throws IllegalStateException If the namespace is not empty
    * @throws UnsupportedOperationException If drop is not a supported operation
    */
   boolean dropNamespace(String[] namespace) throws NoSuchNamespaceException;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRead.java
similarity index 87%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRead.java
index 826fa2f8a0720..cdff1a38ff05f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRead.java
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.read.Scan;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
@@ -26,6 +27,7 @@
  * {@link #newScanBuilder(CaseInsensitiveStringMap)} that is used to create a scan for batch,
  * micro-batch, or continuous processing.
  */
+@Experimental
 public interface SupportsRead extends Table {
 
   /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsWrite.java
similarity index 71%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsWrite.java
index c52e54569dc0c..90d79ed492ef3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsWrite.java
@@ -15,22 +15,24 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
-import org.apache.spark.sql.sources.v2.writer.BatchWrite;
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
-import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.write.BatchWrite;
+import org.apache.spark.sql.connector.write.LogicalWriteInfo;
+import org.apache.spark.sql.connector.write.WriteBuilder;
 
 /**
  * A mix-in interface of {@link Table}, to indicate that it's writable. This adds
- * {@link #newWriteBuilder(CaseInsensitiveStringMap)} that is used to create a write
- * for batch or streaming.
+ * {@link #newWriteBuilder(LogicalWriteInfo)} that is used to create a
+ * write for batch or streaming.
  */
+@Experimental
 public interface SupportsWrite extends Table {
 
   /**
    * Returns a {@link WriteBuilder} which can be used to create {@link BatchWrite}. Spark will call
    * this method to configure each data source write.
    */
-  WriteBuilder newWriteBuilder(CaseInsensitiveStringMap options);
+  WriteBuilder newWriteBuilder(LogicalWriteInfo info);
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/Table.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/Table.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java
index 21e1e8c533f3b..a493736c77277 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/Table.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
+import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.types.StructType;
 
 import java.util.Collections;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
similarity index 89%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
index eccf2892b039b..9765118a8dbf7 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Experimental;
 
@@ -24,7 +24,7 @@
  * <p>
  * Tables use {@link Table#capabilities()} to return a set of capabilities. Each capability signals
  * to Spark that the table supports a feature identified by the capability. For example, returning
- * {@code BATCH_READ} allows Spark to read from the table using a batch scan.
+ * {@link #BATCH_READ} allows Spark to read from the table using a batch scan.
  */
 @Experimental
 public enum TableCapability {
@@ -66,7 +66,7 @@ public enum TableCapability {
    * <p>
    * Truncating a table removes all existing rows.
    * <p>
-   * See {@code org.apache.spark.sql.sources.v2.writer.SupportsTruncate}.
+   * See {@link org.apache.spark.sql.connector.write.SupportsTruncate}.
    */
   TRUNCATE,
 
@@ -74,7 +74,7 @@ public enum TableCapability {
    * Signals that the table can replace existing data that matches a filter with appended data in
    * a write operation.
    * <p>
-   * See {@code org.apache.spark.sql.sources.v2.writer.SupportsOverwrite}.
+   * See {@link org.apache.spark.sql.connector.write.SupportsOverwrite}.
    */
   OVERWRITE_BY_FILTER,
 
@@ -82,7 +82,7 @@ public enum TableCapability {
    * Signals that the table can dynamically replace existing data partitions with appended data in
    * a write operation.
    * <p>
-   * See {@code org.apache.spark.sql.sources.v2.writer.SupportsDynamicOverwrite}.
+   * See {@link org.apache.spark.sql.connector.write.SupportsDynamicOverwrite}.
    */
   OVERWRITE_DYNAMIC,
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
similarity index 89%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index 4775b58edf049..2f102348ec517 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
-import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.types.StructType;
 
 import java.util.Map;
@@ -35,7 +35,30 @@
  * table schema when updating, renaming, or dropping existing columns when catalyst analysis is case
  * insensitive.
  */
+@Experimental
 public interface TableCatalog extends CatalogPlugin {
+
+  /**
+   * A reserved property to specify the location of the table. The files of the table
+   * should be under this location.
+   */
+  String PROP_LOCATION = "location";
+
+  /**
+   * A reserved property to specify the description of the table.
+   */
+  String PROP_COMMENT = "comment";
+
+  /**
+   * A reserved property to specify the provider of the table.
+   */
+  String PROP_PROVIDER = "provider";
+
+  /**
+   * A reserved property to specify the owner of the table.
+   */
+  String PROP_OWNER = "owner";
+
   /**
    * List the tables in a namespace from the catalog.
    * <p>
@@ -111,6 +134,8 @@ Table createTable(
    * Implementations may reject the requested changes. If any change is rejected, none of the
    * changes should be applied to the table.
    * <p>
+   * The requested changes must be applied in the order given.
+   * <p>
    * If the catalog supports views and contains a view for the identifier and not a table, this
    * must throw {@link NoSuchTableException}.
    *
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
similarity index 53%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
index 7eef6aea88120..5ce020912f213 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
@@ -15,8 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
+import java.util.Arrays;
+import java.util.Objects;
+import javax.annotation.Nullable;
+
+import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.types.DataType;
 
 /**
@@ -32,6 +37,7 @@
  *     )
  * </pre>
  */
+@Experimental
 public interface TableChange {
 
   /**
@@ -71,7 +77,7 @@ static TableChange removeProperty(String property) {
    * @return a TableChange for the addition
    */
   static TableChange addColumn(String[] fieldNames, DataType dataType) {
-    return new AddColumn(fieldNames, dataType, true, null);
+    return new AddColumn(fieldNames, dataType, true, null, null);
   }
 
   /**
@@ -87,7 +93,7 @@ static TableChange addColumn(String[] fieldNames, DataType dataType) {
    * @return a TableChange for the addition
    */
   static TableChange addColumn(String[] fieldNames, DataType dataType, boolean isNullable) {
-    return new AddColumn(fieldNames, dataType, isNullable, null);
+    return new AddColumn(fieldNames, dataType, isNullable, null, null);
   }
 
   /**
@@ -108,7 +114,30 @@ static TableChange addColumn(
       DataType dataType,
       boolean isNullable,
       String comment) {
-    return new AddColumn(fieldNames, dataType, isNullable, comment);
+    return new AddColumn(fieldNames, dataType, isNullable, comment, null);
+  }
+
+  /**
+   * Create a TableChange for adding a column.
+   * <p>
+   * If the field already exists, the change will result in an {@link IllegalArgumentException}.
+   * If the new field is nested and its parent does not exist or is not a struct, the change will
+   * result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the new column
+   * @param dataType the new column's data type
+   * @param isNullable whether the new column can contain null
+   * @param comment the new field's comment string
+   * @param position the new columns's position
+   * @return a TableChange for the addition
+   */
+  static TableChange addColumn(
+      String[] fieldNames,
+      DataType dataType,
+      boolean isNullable,
+      String comment,
+      ColumnPosition position) {
+    return new AddColumn(fieldNames, dataType, isNullable, comment, position);
   }
 
   /**
@@ -139,25 +168,22 @@ static TableChange renameColumn(String[] fieldNames, String newName) {
    * @return a TableChange for the update
    */
   static TableChange updateColumnType(String[] fieldNames, DataType newDataType) {
-    return new UpdateColumnType(fieldNames, newDataType, true);
+    return new UpdateColumnType(fieldNames, newDataType);
   }
 
   /**
-   * Create a TableChange for updating the type of a field.
+   * Create a TableChange for updating the nullability of a field.
    * <p>
-   * The field names are used to find the field to update.
+   * The name is used to find the field to update.
    * <p>
    * If the field does not exist, the change will result in an {@link IllegalArgumentException}.
    *
    * @param fieldNames field names of the column to update
-   * @param newDataType the new data type
+   * @param nullable the nullability
    * @return a TableChange for the update
    */
-  static TableChange updateColumnType(
-      String[] fieldNames,
-      DataType newDataType,
-      boolean isNullable) {
-    return new UpdateColumnType(fieldNames, newDataType, isNullable);
+  static TableChange updateColumnNullability(String[] fieldNames, boolean nullable) {
+    return new UpdateColumnNullability(fieldNames, nullable);
   }
 
   /**
@@ -175,6 +201,21 @@ static TableChange updateColumnComment(String[] fieldNames, String newComment) {
     return new UpdateColumnComment(fieldNames, newComment);
   }
 
+  /**
+   * Create a TableChange for updating the position of a field.
+   * <p>
+   * The name is used to find the field to update.
+   * <p>
+   * If the field does not exist, the change will result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the column to update
+   * @param newPosition the new position
+   * @return a TableChange for the update
+   */
+  static TableChange updateColumnPosition(String[] fieldNames, ColumnPosition newPosition) {
+    return new UpdateColumnPosition(fieldNames, newPosition);
+  }
+
   /**
    * Create a TableChange for deleting a field.
    * <p>
@@ -208,6 +249,20 @@ public String property() {
     public String value() {
       return value;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      SetProperty that = (SetProperty) o;
+      return property.equals(that.property) &&
+        value.equals(that.value);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(property, value);
+    }
   }
 
   /**
@@ -225,6 +280,82 @@ private RemoveProperty(String property) {
     public String property() {
       return property;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      RemoveProperty that = (RemoveProperty) o;
+      return property.equals(that.property);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(property);
+    }
+  }
+
+  interface ColumnPosition {
+
+    static ColumnPosition first() {
+      return First.INSTANCE;
+    }
+
+    static ColumnPosition after(String column) {
+      return new After(column);
+    }
+  }
+
+  /**
+   * Column position FIRST means the specified column should be the first column.
+   * Note that, the specified column may be a nested field, and then FIRST means this field should
+   * be the first one within the struct.
+   */
+  final class First implements ColumnPosition {
+    private static final First INSTANCE = new First();
+
+    private First() {}
+
+    @Override
+    public String toString() {
+      return "FIRST";
+    }
+  }
+
+  /**
+   * Column position AFTER means the specified column should be put after the given `column`.
+   * Note that, the specified column may be a nested field, and then the given `column` refers to
+   * a field in the same struct.
+   */
+  final class After implements ColumnPosition {
+    private final String column;
+
+    private After(String column) {
+      assert column != null;
+      this.column = column;
+    }
+
+    public String column() {
+      return column;
+    }
+
+    @Override
+    public String toString() {
+      return "AFTER " + column;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      After after = (After) o;
+      return column.equals(after.column);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(column);
+    }
   }
 
   interface ColumnChange extends TableChange {
@@ -243,12 +374,19 @@ final class AddColumn implements ColumnChange {
     private final DataType dataType;
     private final boolean isNullable;
     private final String comment;
-
-    private AddColumn(String[] fieldNames, DataType dataType, boolean isNullable, String comment) {
+    private final ColumnPosition position;
+
+    private AddColumn(
+        String[] fieldNames,
+        DataType dataType,
+        boolean isNullable,
+        String comment,
+        ColumnPosition position) {
       this.fieldNames = fieldNames;
       this.dataType = dataType;
       this.isNullable = isNullable;
       this.comment = comment;
+      this.position = position;
     }
 
     @Override
@@ -264,9 +402,34 @@ public boolean isNullable() {
       return isNullable;
     }
 
+    @Nullable
     public String comment() {
       return comment;
     }
+
+    @Nullable
+    public ColumnPosition position() {
+      return position;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      AddColumn addColumn = (AddColumn) o;
+      return isNullable == addColumn.isNullable &&
+        Arrays.equals(fieldNames, addColumn.fieldNames) &&
+        dataType.equals(addColumn.dataType) &&
+        Objects.equals(comment, addColumn.comment) &&
+        Objects.equals(position, addColumn.position);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(dataType, isNullable, comment, position);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
   }
 
   /**
@@ -294,6 +457,22 @@ public String[] fieldNames() {
     public String newName() {
       return newName;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      RenameColumn that = (RenameColumn) o;
+      return Arrays.equals(fieldNames, that.fieldNames) &&
+        newName.equals(that.newName);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(newName);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
   }
 
   /**
@@ -306,12 +485,10 @@ public String newName() {
   final class UpdateColumnType implements ColumnChange {
     private final String[] fieldNames;
     private final DataType newDataType;
-    private final boolean isNullable;
 
-    private UpdateColumnType(String[] fieldNames, DataType newDataType, boolean isNullable) {
+    private UpdateColumnType(String[] fieldNames, DataType newDataType) {
       this.fieldNames = fieldNames;
       this.newDataType = newDataType;
-      this.isNullable = isNullable;
     }
 
     @Override
@@ -323,8 +500,61 @@ public DataType newDataType() {
       return newDataType;
     }
 
-    public boolean isNullable() {
-      return isNullable;
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      UpdateColumnType that = (UpdateColumnType) o;
+      return Arrays.equals(fieldNames, that.fieldNames) &&
+        newDataType.equals(that.newDataType);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(newDataType);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
+  }
+
+  /**
+   * A TableChange to update the nullability of a field.
+   * <p>
+   * The field names are used to find the field to update.
+   * <p>
+   * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
+   */
+  final class UpdateColumnNullability implements ColumnChange {
+    private final String[] fieldNames;
+    private final boolean nullable;
+
+    private UpdateColumnNullability(String[] fieldNames, boolean nullable) {
+      this.fieldNames = fieldNames;
+      this.nullable = nullable;
+    }
+
+    public String[] fieldNames() {
+      return fieldNames;
+    }
+
+    public boolean nullable() {
+      return nullable;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      UpdateColumnNullability that = (UpdateColumnNullability) o;
+      return nullable == that.nullable &&
+        Arrays.equals(fieldNames, that.fieldNames);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(nullable);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
     }
   }
 
@@ -352,6 +582,64 @@ public String[] fieldNames() {
     public String newComment() {
       return newComment;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      UpdateColumnComment that = (UpdateColumnComment) o;
+      return Arrays.equals(fieldNames, that.fieldNames) &&
+        newComment.equals(that.newComment);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(newComment);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
+  }
+
+  /**
+   * A TableChange to update the position of a field.
+   * <p>
+   * The field names are used to find the field to update.
+   * <p>
+   * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
+   */
+  final class UpdateColumnPosition implements ColumnChange {
+    private final String[] fieldNames;
+    private final ColumnPosition position;
+
+    private UpdateColumnPosition(String[] fieldNames, ColumnPosition position) {
+      this.fieldNames = fieldNames;
+      this.position = position;
+    }
+
+    @Override
+    public String[] fieldNames() {
+      return fieldNames;
+    }
+
+    public ColumnPosition position() {
+      return position;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      UpdateColumnPosition that = (UpdateColumnPosition) o;
+      return Arrays.equals(fieldNames, that.fieldNames) &&
+        position.equals(that.position);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(position);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
   }
 
   /**
@@ -370,6 +658,19 @@ private DeleteColumn(String[] fieldNames) {
     public String[] fieldNames() {
       return fieldNames;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      DeleteColumn that = (DeleteColumn) o;
+      return Arrays.equals(fieldNames, that.fieldNames);
+    }
+
+    @Override
+    public int hashCode() {
+      return Arrays.hashCode(fieldNames);
+    }
   }
 
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java
new file mode 100644
index 0000000000000..732c5352a15ac
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Map;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * The base interface for v2 data sources which don't have a real catalog. Implementations must
+ * have a public, 0-arg constructor.
+ * <p>
+ * Note that, TableProvider can only apply data operations to existing tables, like read, append,
+ * delete, and overwrite. It does not support the operations that require metadata changes, like
+ * create/drop tables.
+ * <p>
+ * The major responsibility of this interface is to return a {@link Table} for read/write.
+ * </p>
+ */
+@Evolving
+public interface TableProvider {
+
+  /**
+   * Infer the schema of the table identified by the given options.
+   *
+   * @param options an immutable case-insensitive string-to-string map that can identify a table,
+   *                e.g. file path, Kafka topic name, etc.
+   */
+  StructType inferSchema(CaseInsensitiveStringMap options);
+
+  /**
+   * Infer the partitioning of the table identified by the given options.
+   * <p>
+   * By default this method returns empty partitioning, please override it if this source support
+   * partitioning.
+   *
+   * @param options an immutable case-insensitive string-to-string map that can identify a table,
+   *                e.g. file path, Kafka topic name, etc.
+   */
+  default Transform[] inferPartitioning(CaseInsensitiveStringMap options) {
+    return new Transform[0];
+  }
+
+  /**
+   * Return a {@link Table} instance with the specified table schema, partitioning and properties
+   * to do read/write. The returned table should report the same schema and partitioning with the
+   * specified ones, or Spark may fail the operation.
+   *
+   * @param schema The specified table schema.
+   * @param partitioning The specified table partitioning.
+   * @param properties The specified table properties. It's case preserving (contains exactly what
+   *                   users specified) and implementations are free to use it case sensitively or
+   *                   insensitively. It should be able to identify a table, e.g. file path, Kafka
+   *                   topic name, etc.
+   */
+  Table getTable(StructType schema, Transform[] partitioning, Map<String, String> properties);
+
+  /**
+   * Returns true if the source has the ability of accepting external table metadata when getting
+   * tables. The external table metadata includes user-specified schema from
+   * `DataFrameReader`/`DataStreamReader` and schema/partitioning stored in Spark catalog.
+   * <p>
+   * By default this method returns false, which means the schema and partitioning passed to
+   * `getTable` are from the infer methods. Please override it if this source has expensive
+   * schema/partitioning inference and wants external table metadata to avoid inference.
+   */
+  default boolean supportsExternalMetadata() {
+    return false;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java
index 1e2aca9556df4..807731ac22948 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
 
 import org.apache.spark.annotation.Experimental;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
similarity index 86%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
index 7b264e7480e17..a06ac9f77c5b5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
@@ -15,13 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
+
+import java.util.Arrays;
 
-import org.apache.spark.annotation.Experimental;
-import org.apache.spark.sql.types.DataType;
 import scala.collection.JavaConverters;
 
-import java.util.Arrays;
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.types.DataType;
 
 /**
  * Helper methods to create logical transforms to pass into Spark.
@@ -46,13 +47,13 @@ public static Transform apply(String name, Expression... args) {
   }
 
   /**
-   * Create a named reference expression for a column.
+   * Create a named reference expression for a (nested) column.
    *
-   * @param name a column name
+   * @param name The column name. It refers to nested column if name contains dot.
    * @return a named reference for the column
    */
   public static NamedReference column(String name) {
-    return LogicalExpressions.reference(name);
+    return LogicalExpressions.parseReference(name);
   }
 
   /**
@@ -82,8 +83,10 @@ public static <T> Literal<T> literal(T value) {
    * @return a logical bucket transform with name "bucket"
    */
   public static Transform bucket(int numBuckets, String... columns) {
-    return LogicalExpressions.bucket(numBuckets,
-        JavaConverters.asScalaBuffer(Arrays.asList(columns)).toSeq());
+    NamedReference[] references = Arrays.stream(columns)
+      .map(Expressions::column)
+      .toArray(NamedReference[]::new);
+    return LogicalExpressions.bucket(numBuckets, references);
   }
 
   /**
@@ -97,7 +100,7 @@ public static Transform bucket(int numBuckets, String... columns) {
    * @return a logical identity transform with name "identity"
    */
   public static Transform identity(String column) {
-    return LogicalExpressions.identity(column);
+    return LogicalExpressions.identity(Expressions.column(column));
   }
 
   /**
@@ -111,7 +114,7 @@ public static Transform identity(String column) {
    * @return a logical yearly transform with name "years"
    */
   public static Transform years(String column) {
-    return LogicalExpressions.years(column);
+    return LogicalExpressions.years(Expressions.column(column));
   }
 
   /**
@@ -126,7 +129,7 @@ public static Transform years(String column) {
    * @return a logical monthly transform with name "months"
    */
   public static Transform months(String column) {
-    return LogicalExpressions.months(column);
+    return LogicalExpressions.months(Expressions.column(column));
   }
 
   /**
@@ -141,7 +144,7 @@ public static Transform months(String column) {
    * @return a logical daily transform with name "days"
    */
   public static Transform days(String column) {
-    return LogicalExpressions.days(column);
+    return LogicalExpressions.days(Expressions.column(column));
   }
 
   /**
@@ -156,7 +159,7 @@ public static Transform days(String column) {
    * @return a logical hourly transform with name "hours"
    */
   public static Transform hours(String column) {
-    return LogicalExpressions.hours(column);
+    return LogicalExpressions.hours(Expressions.column(column));
   }
 
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java
index e41bcf9000c52..942744bf6a5d3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
 
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.types.DataType;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java
index c71ffbe70651f..e6e5fa59c2499 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
 
 import org.apache.spark.annotation.Experimental;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java
index c85e0c412f1ab..edac6df8a4d3e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
 
 import org.apache.spark.annotation.Experimental;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Batch.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Batch.java
index 28d80b7a5bc3f..09592598a82f9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Batch.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/InputPartition.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/InputPartition.java
index 413349782efa2..bedf30803f73f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/InputPartition.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import java.io.Serializable;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReader.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReader.java
index 2945925959538..254ee56501b9a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReader.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import java.io.Closeable;
 import java.io.IOException;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReaderFactory.java
similarity index 98%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReaderFactory.java
index 97f4a473953fc..7fef69ce2a5b8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReaderFactory.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import java.io.Serializable;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
similarity index 93%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
index c3964e2176d4f..c1584a58c117f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousStream;
-import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream;
+import org.apache.spark.sql.connector.read.streaming.ContinuousStream;
+import org.apache.spark.sql.connector.read.streaming.MicroBatchStream;
 import org.apache.spark.sql.types.StructType;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableCapability;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableCapability;
 
 /**
  * A logical representation of a data source scan. This interface is used to provide logical
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java
index d4bc1ff977132..ca2cd59b2e442 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java
index a0b194a41f585..42778b4e28a1a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import java.util.OptionalLong;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownFilters.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownFilters.java
index f10fd884daabe..bee9e5508ca66 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownFilters.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.Filter;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownRequiredColumns.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownRequiredColumns.java
index 862bd14bffede..97143686d3efc 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownRequiredColumns.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.types.StructType;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportPartitioning.java
similarity index 91%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportPartitioning.java
index 4ce97bc5e76b5..5bbc191730391 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportPartitioning.java
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
+import org.apache.spark.sql.connector.read.partitioning.Partitioning;
 
 /**
  * A mix in interface for {@link Scan}. Data sources can implement this interface to
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportStatistics.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportStatistics.java
index d7364af69e89c..3cd5ef60dc533 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportStatistics.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/ClusteredDistribution.java
similarity index 91%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/ClusteredDistribution.java
index 1cdc02f5736b1..23d3c6b563d1c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/ClusteredDistribution.java
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.partitioning;
+package org.apache.spark.sql.connector.read.partitioning;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReader;
 
 /**
  * A concrete implementation of {@link Distribution}. Represents a distribution where records that
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java
similarity index 93%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java
index 02b0e68974919..fd397c760f05a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.partitioning;
+package org.apache.spark.sql.connector.read.partitioning;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReader;
 
 /**
  * An interface to represent data distribution requirement, which specifies how the records should
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java
similarity index 90%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java
index c7370eb3d38af..f0b9410ee6e61 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.partitioning;
+package org.apache.spark.sql.connector.read.partitioning;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.SupportsReportPartitioning;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.SupportsReportPartitioning;
 
 /**
  * An interface to represent the output data partitioning for a data source, which is returned by
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
similarity index 91%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
index c7f6fce6e81af..c2ad9ec244a0d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
@@ -15,13 +15,15 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReader;
 
 /**
  * A variation on {@link PartitionReader} for use with continuous streaming processing.
+ *
+ * @since 3.0.0
  */
 @Evolving
 public interface ContinuousPartitionReader<T> extends PartitionReader<T> {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
similarity index 80%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
index 41195befe5e57..385c6f655440f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
@@ -15,18 +15,20 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.InternalRow;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 
 /**
  * A variation on {@link PartitionReaderFactory} that returns {@link ContinuousPartitionReader}
- * instead of {@link org.apache.spark.sql.sources.v2.reader.PartitionReader}. It's used for
- * continuous streaming processing.
+ * instead of {@link PartitionReader}. It's used for continuous streaming processing.
+ *
+ * @since 3.0.0
  */
 @Evolving
 public interface ContinuousPartitionReaderFactory extends PartitionReaderFactory {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
similarity index 93%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
index fff5b95a4de14..a84578fe461a3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
@@ -15,14 +15,16 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.Scan;
 
 /**
  * A {@link SparkDataStream} for streaming queries with continuous mode.
+ *
+ * @since 3.0.0
  */
 @Evolving
 public interface ContinuousStream extends SparkDataStream {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
similarity index 86%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
index 330f07ba4f2f8..40ecbf0578ee5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
@@ -15,16 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
-import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.Scan;
 
 /**
  * A {@link SparkDataStream} for streaming queries with micro-batch mode.
+ *
+ * @since 3.0.0
  */
 @Evolving
 public interface MicroBatchStream extends SparkDataStream {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
index 1d34fdd1c28ab..efb8ebb684f06 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
 
@@ -25,6 +25,8 @@
  * During execution, offsets provided by the data source implementation will be logged and used as
  * restart checkpoints. Each source should provide an offset implementation which the source can use
  * to reconstruct a position in the stream up to which data has been seen/processed.
+ *
+ * @since 3.0.0
  */
 @Evolving
 public abstract class Offset {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
similarity index 93%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
index 2c97d924a0629..faee230467bea 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import java.io.Serializable;
 
@@ -26,6 +26,8 @@
  * provide a method to merge these into a global Offset.
  *
  * These offsets must be serializable.
+ *
+ * @since 3.0.0
  */
 @Evolving
 public interface PartitionOffset extends Serializable {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadAllAvailable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadAllAvailable.java
new file mode 100644
index 0000000000000..5a946ad14b3a2
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadAllAvailable.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read.streaming;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Represents a {@link ReadLimit} where the {@link MicroBatchStream} must scan all the data
+ * available at the streaming source. This is meant to be a hard specification as being able
+ * to return all available data is necessary for Trigger.Once() to work correctly.
+ * If a source is unable to scan all available data, then it must throw an error.
+ *
+ * @see SupportsAdmissionControl#latestOffset(Offset, ReadLimit)
+ * @since 3.0.0
+ */
+@Evolving
+public final class ReadAllAvailable implements ReadLimit {
+  static final ReadAllAvailable INSTANCE = new ReadAllAvailable();
+
+  private ReadAllAvailable() {}
+
+  @Override
+  public String toString() {
+    return "All Available";
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java
new file mode 100644
index 0000000000000..36f6e05e365d9
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadLimit.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read.streaming;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Interface representing limits on how much to read from a {@link MicroBatchStream} when it
+ * implements {@link SupportsAdmissionControl}. There are several child interfaces representing
+ * various kinds of limits.
+ *
+ * @see SupportsAdmissionControl#latestOffset(Offset, ReadLimit)
+ * @see ReadAllAvailable
+ * @see ReadMaxRows
+ * @since 3.0.0
+ */
+@Evolving
+public interface ReadLimit {
+  static ReadLimit maxRows(long rows) { return new ReadMaxRows(rows); }
+
+  static ReadLimit maxFiles(int files) { return new ReadMaxFiles(files); }
+
+  static ReadLimit allAvailable() { return ReadAllAvailable.INSTANCE; }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadMaxFiles.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadMaxFiles.java
new file mode 100644
index 0000000000000..441a6c8e77a6f
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadMaxFiles.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read.streaming;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Represents a {@link ReadLimit} where the {@link MicroBatchStream} should scan approximately the
+ * given maximum number of files.
+ *
+ * @see SupportsAdmissionControl#latestOffset(Offset, ReadLimit)
+ * @since 3.0.0
+ */
+@Evolving
+public class ReadMaxFiles implements ReadLimit {
+  private int files;
+
+  ReadMaxFiles(int maxFiles) {
+    this.files = maxFiles;
+  }
+
+  /** Approximate maximum rows to scan. */
+  public int maxFiles() { return this.files; }
+
+  @Override
+  public String toString() {
+    return "MaxFiles: " + maxFiles();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+    ReadMaxFiles other = (ReadMaxFiles) o;
+    return other.maxFiles() == maxFiles();
+  }
+
+  @Override
+  public int hashCode() { return files; }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadMaxRows.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadMaxRows.java
new file mode 100644
index 0000000000000..65a68c543ff71
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReadMaxRows.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read.streaming;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Represents a {@link ReadLimit} where the {@link MicroBatchStream} should scan approximately the
+ * given maximum number of rows.
+ *
+ * @see SupportsAdmissionControl#latestOffset(Offset, ReadLimit)
+ * @since 3.0.0
+ */
+@Evolving
+public final class ReadMaxRows implements ReadLimit {
+  private long rows;
+
+  ReadMaxRows(long rows) {
+    this.rows = rows;
+  }
+
+  /** Approximate maximum rows to scan. */
+  public long maxRows() { return this.rows; }
+
+  @Override
+  public String toString() {
+    return "MaxRows: " + maxRows();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+    ReadMaxRows other = (ReadMaxRows) o;
+    return other.maxRows() == maxRows();
+  }
+
+  @Override
+  public int hashCode() { return Long.hashCode(this.rows); }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
index 2068a84fc6bb1..95703e255ea4e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
 
@@ -25,6 +25,8 @@
  *
  * Data sources should implement concrete data stream interfaces:
  * {@link MicroBatchStream} and {@link ContinuousStream}.
+ *
+ * @since 3.0.0
  */
 @Evolving
 public interface SparkDataStream {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java
new file mode 100644
index 0000000000000..027763ce6fcdf
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read.streaming;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * A mix-in interface for {@link SparkDataStream} streaming sources to signal that they can control
+ * the rate of data ingested into the system. These rate limits can come implicitly from the
+ * contract of triggers, e.g. Trigger.Once() requires that a micro-batch process all data
+ * available to the system at the start of the micro-batch. Alternatively, sources can decide to
+ * limit ingest through data source options.
+ *
+ * Through this interface, a MicroBatchStream should be able to return the next offset that it will
+ * process until given a {@link ReadLimit}.
+ *
+ * @since 3.0.0
+ */
+@Evolving
+public interface SupportsAdmissionControl extends SparkDataStream {
+
+  /**
+   * Returns the read limits potentially passed to the data source through options when creating
+   * the data source.
+   */
+  default ReadLimit getDefaultReadLimit() { return ReadLimit.allAvailable(); }
+
+  /**
+   * Returns the most recent offset available given a read limit. The start offset can be used
+   * to figure out how much new data should be read given the limit. Users should implement this
+   * method instead of latestOffset for a MicroBatchStream or getOffset for Source.
+   *
+   * When this method is called on a `Source`, the source can return `null` if there is no
+   * data to process. In addition, for the very first micro-batch, the `startOffset` will be
+   * null as well.
+   *
+   * When this method is called on a MicroBatchStream, the `startOffset` will be `initialOffset`
+   * for the very first micro-batch. The source can return `null` if there is no data to process.
+   */
+  Offset latestOffset(Offset startOffset, ReadLimit limit);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java
similarity index 93%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java
index 91297759971b5..3e8b14172d6b2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import org.apache.spark.annotation.Evolving;
 
@@ -23,8 +23,8 @@
  * An interface that defines how to write the data to data source for batch processing.
  *
  * The writing procedure is:
- *   1. Create a writer factory by {@link #createBatchWriterFactory()}, serialize and send it to all
- *      the partitions of the input data(RDD).
+ *   1. Create a writer factory by {@link #createBatchWriterFactory(PhysicalWriteInfo)}, serialize
+ *      and send it to all the partitions of the input data(RDD).
  *   2. For each partition, create the data writer, and write the data of the partition with this
  *      writer. If all the data are written successfully, call {@link DataWriter#commit()}. If
  *      exception happens during the writing, call {@link DataWriter#abort()}.
@@ -45,8 +45,10 @@ public interface BatchWrite {
    *
    * If this method fails (by throwing an exception), the action will fail and no Spark job will be
    * submitted.
+   *
+   * @param info Physical information about the input data that will be written to this table.
    */
-  DataWriterFactory createBatchWriterFactory();
+  DataWriterFactory createBatchWriterFactory(PhysicalWriteInfo info);
 
   /**
    * Returns whether Spark should use the commit coordinator to ensure that at most one task for
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriter.java
similarity index 94%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriter.java
index 11228ad1ea672..59c69a18292d8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriter.java
@@ -15,8 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
+import java.io.Closeable;
 import java.io.IOException;
 
 import org.apache.spark.annotation.Evolving;
@@ -31,8 +32,9 @@
  * the {@link #write(Object)}, {@link #abort()} is called afterwards and the remaining records will
  * not be processed. If all records are successfully written, {@link #commit()} is called.
  *
- * Once a data writer returns successfully from {@link #commit()} or {@link #abort()}, its lifecycle
- * is over and Spark will not use it again.
+ * Once a data writer returns successfully from {@link #commit()} or {@link #abort()}, Spark will
+ * call {@link #close()} to let DataWriter doing resource cleanup. After calling {@link #close()},
+ * its lifecycle is over and Spark will not use it again.
  *
  * If this data writer succeeds(all records are successfully written and {@link #commit()}
  * succeeds), a {@link WriterCommitMessage} will be sent to the driver side and pass to
@@ -56,7 +58,7 @@
  * Note that, Currently the type `T` can only be {@link org.apache.spark.sql.catalyst.InternalRow}.
  */
 @Evolving
-public interface DataWriter<T> {
+public interface DataWriter<T> extends Closeable {
 
   /**
    * Writes one record.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriterFactory.java
similarity index 90%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriterFactory.java
index bf2db9059b088..310575df05d97 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriterFactory.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import java.io.Serializable;
 
@@ -24,8 +24,9 @@
 import org.apache.spark.sql.catalyst.InternalRow;
 
 /**
- * A factory of {@link DataWriter} returned by {@link BatchWrite#createBatchWriterFactory()},
- * which is responsible for creating and initializing the actual data writer at executor side.
+ * A factory of {@link DataWriter} returned by
+ * {@link BatchWrite#createBatchWriterFactory(PhysicalWriteInfo)}, which is responsible for
+ * creating and initializing the actual data writer at executor side.
  *
  * Note that, the writer factory will be serialized and sent to executors, then the data writer
  * will be created on executors and do the actual writing. So this interface must be
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/LogicalWriteInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/LogicalWriteInfo.java
new file mode 100644
index 0000000000000..831f4e5aac481
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/LogicalWriteInfo.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * This interface contains logical write information that data sources can use when generating a
+ * {@link WriteBuilder}.
+ */
+@Evolving
+public interface LogicalWriteInfo {
+  /**
+   * the options that the user specified when writing the dataset
+   */
+  CaseInsensitiveStringMap options();
+
+  /**
+   * `queryId` is a unique string of the query. It's possible that there are many queries
+   * running at the same time, or a query is restarted and resumed. {@link BatchWrite} can use
+   * this id to identify the query.
+   */
+  String queryId();
+
+  /**
+   * the schema of the input data from Spark to data source.
+   */
+  StructType schema();
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterViewStatements.scala b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/PhysicalWriteInfo.java
similarity index 61%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterViewStatements.scala
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/PhysicalWriteInfo.java
index bba7f12c94e50..55a092e39970e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterViewStatements.scala
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/PhysicalWriteInfo.java
@@ -15,19 +15,19 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.plans.logical.sql
+package org.apache.spark.sql.connector.write;
 
-/**
- * ALTER VIEW ... SET TBLPROPERTIES command, as parsed from SQL.
- */
-case class AlterViewSetPropertiesStatement(
-    viewName: Seq[String],
-    properties: Map[String, String]) extends ParsedStatement
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory;
 
 /**
- * ALTER VIEW ... UNSET TBLPROPERTIES command, as parsed from SQL.
+ * This interface contains physical write information that data sources can use when
+ * generating a {@link DataWriterFactory} or a {@link StreamingDataWriterFactory}.
  */
-case class AlterViewUnsetPropertiesStatement(
-    viewName: Seq[String],
-    propertyKeys: Seq[String],
-    ifExists: Boolean) extends ParsedStatement
+@Evolving
+public interface PhysicalWriteInfo {
+  /**
+   * The number of partitions of the input data that is going to be written.
+   */
+  int numPartitions();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java
index 8058964b662bd..90e668d1935b1 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 /**
  * Write builder trait for tables that support dynamic partition overwrite.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java
index b443b3c3aeb4a..6063a155ee209 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import org.apache.spark.sql.sources.AlwaysTrue$;
 import org.apache.spark.sql.sources.Filter;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsTruncate.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsTruncate.java
index 69c2ba5e01a49..15a68375032d2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsTruncate.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 /**
  * Write builder trait for tables that support truncation.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
similarity index 65%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
index 158066d402922..a8d99a8f04b1b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
@@ -15,13 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableCapability;
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite;
-import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableCapability;
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite;
 
 /**
  * An interface for building the {@link BatchWrite}. Implementations can mix in some interfaces to
@@ -33,28 +32,6 @@
 @Evolving
 public interface WriteBuilder {
 
-  /**
-   * Passes the `queryId` from Spark to data source. `queryId` is a unique string of the query. It's
-   * possible that there are many queries running at the same time, or a query is restarted and
-   * resumed. {@link BatchWrite} can use this id to identify the query.
-   *
-   * @return a new builder with the `queryId`. By default it returns `this`, which means the given
-   *         `queryId` is ignored. Please override this method to take the `queryId`.
-   */
-  default WriteBuilder withQueryId(String queryId) {
-    return this;
-  }
-
-  /**
-   * Passes the schema of the input data from Spark to data source.
-   *
-   * @return a new builder with the `schema`. By default it returns `this`, which means the given
-   *         `schema` is ignored. Please override this method to take the `schema`.
-   */
-  default WriteBuilder withInputDataSchema(StructType schema) {
-    return this;
-  }
-
   /**
    * Returns a {@link BatchWrite} to write data to batch source. By default this method throws
    * exception, data sources must overwrite this method to provide an implementation, if the
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriterCommitMessage.java
similarity index 92%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriterCommitMessage.java
index 23e8580c404d4..823f955bffb53 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriterCommitMessage.java
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import java.io.Serializable;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite;
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite;
 
 /**
  * A commit message returned by {@link DataWriter#commit()} and will be sent back to the driver side
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
similarity index 87%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
index af2f03c9d4192..0923d07e7e5a3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
@@ -15,23 +15,26 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer.streaming;
+package org.apache.spark.sql.connector.write.streaming;
 
 import java.io.Serializable;
 
 import org.apache.spark.TaskContext;
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.InternalRow;
-import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.apache.spark.sql.connector.write.DataWriter;
+import org.apache.spark.sql.connector.write.PhysicalWriteInfo;
 
 /**
  * A factory of {@link DataWriter} returned by
- * {@link StreamingWrite#createStreamingWriterFactory()}, which is responsible for creating
- * and initializing the actual data writer at executor side.
+ * {@link StreamingWrite#createStreamingWriterFactory(PhysicalWriteInfo)}, which is responsible for
+ * creating and initializing the actual data writer at executor side.
  *
  * Note that, the writer factory will be serialized and sent to executors, then the data writer
  * will be created on executors and do the actual writing. So this interface must be
  * serializable and {@link DataWriter} doesn't need to be.
+ *
+ * @since 3.0.0
  */
 @Evolving
 public interface StreamingDataWriterFactory extends Serializable {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
similarity index 87%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
index 5617f1cdc0efc..e3dec3b2ff55e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
@@ -15,18 +15,19 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer.streaming;
+package org.apache.spark.sql.connector.write.streaming;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.writer.DataWriter;
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.apache.spark.sql.connector.write.DataWriter;
+import org.apache.spark.sql.connector.write.PhysicalWriteInfo;
+import org.apache.spark.sql.connector.write.WriterCommitMessage;
 
 /**
  * An interface that defines how to write the data to data source in streaming queries.
  *
  * The writing procedure is:
- *   1. Create a writer factory by {@link #createStreamingWriterFactory()}, serialize and send it to
- *      all the partitions of the input data(RDD).
+ *   1. Create a writer factory by {@link #createStreamingWriterFactory(PhysicalWriteInfo)},
+ *      serialize and send it to all the partitions of the input data(RDD).
  *   2. For each epoch in each partition, create the data writer, and write the data of the epoch in
  *      the partition with this writer. If all the data are written successfully, call
  *      {@link DataWriter#commit()}. If exception happens during the writing, call
@@ -39,6 +40,8 @@
  * do it manually in their Spark applications if they want to retry.
  *
  * Please refer to the documentation of commit/abort methods for detailed specifications.
+ *
+ * @since 3.0.0
  */
 @Evolving
 public interface StreamingWrite {
@@ -48,8 +51,10 @@ public interface StreamingWrite {
    *
    * If this method fails (by throwing an exception), the action will fail and no Spark job will be
    * submitted.
+   *
+   * @param info Information about the RDD that will be written to this data writer
    */
-  StreamingDataWriterFactory createStreamingWriterFactory();
+  StreamingDataWriterFactory createStreamingWriterFactory(PhysicalWriteInfo info);
 
   /**
    * Commits this writing job for the specified epoch with a list of commit messages. The commit
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index 541818331a0bd..d2220dc2668f9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -123,7 +123,8 @@ public ColumnarArray getArray(int rowId) {
 
   @Override
   public ColumnarMap getMap(int rowId) {
-    throw new UnsupportedOperationException();
+    if (isNullAt(rowId)) return null;
+    return accessor.getMap(rowId);
   }
 
   @Override
@@ -156,6 +157,9 @@ public ArrowColumnVector(ValueVector vector) {
       accessor = new DateAccessor((DateDayVector) vector);
     } else if (vector instanceof TimeStampMicroTZVector) {
       accessor = new TimestampAccessor((TimeStampMicroTZVector) vector);
+    } else if (vector instanceof MapVector) {
+      MapVector mapVector = (MapVector) vector;
+      accessor = new MapAccessor(mapVector);
     } else if (vector instanceof ListVector) {
       ListVector listVector = (ListVector) vector;
       accessor = new ArrayAccessor(listVector);
@@ -236,6 +240,10 @@ byte[] getBinary(int rowId) {
     ColumnarArray getArray(int rowId) {
       throw new UnsupportedOperationException();
     }
+
+    ColumnarMap getMap(int rowId) {
+      throw new UnsupportedOperationException();
+    }
   }
 
   private static class BooleanAccessor extends ArrowVectorAccessor {
@@ -472,4 +480,26 @@ private static class StructAccessor extends ArrowVectorAccessor {
       super(vector);
     }
   }
+
+  private static class MapAccessor extends ArrowVectorAccessor {
+    private final MapVector accessor;
+    private final ArrowColumnVector keys;
+    private final ArrowColumnVector values;
+
+    MapAccessor(MapVector vector) {
+      super(vector);
+      this.accessor = vector;
+      StructVector entries = (StructVector) vector.getDataVector();
+      this.keys = new ArrowColumnVector(entries.getChild(MapVector.KEY_NAME));
+      this.values = new ArrowColumnVector(entries.getChild(MapVector.VALUE_NAME));
+    }
+
+    @Override
+    final ColumnarMap getMap(int rowId) {
+      int index = rowId * MapVector.OFFSET_WIDTH;
+      int offset = accessor.getOffsetBuffer().getInt(index);
+      int length = accessor.getInnerValueCountAt(rowId);
+      return new ColumnarMap(keys, values, offset, length);
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
index f18d00359c90c..2158ef8324845 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
@@ -267,21 +267,24 @@ public final ColumnarRow getStruct(int rowId) {
    * Returns the calendar interval type value for rowId. If the slot for rowId is null, it should
    * return null.
    *
-   * In Spark, calendar interval type value is basically an integer value representing the number of
-   * months in this interval, and a long value representing the number of microseconds in this
-   * interval. An interval type vector is the same as a struct type vector with 2 fields: `months`
-   * and `microseconds`.
+   * In Spark, calendar interval type value is basically two integer values representing the number
+   * of months and days in this interval, and a long value representing the number of microseconds
+   * in this interval. An interval type vector is the same as a struct type vector with 3 fields:
+   * `months`, `days` and `microseconds`.
    *
-   * To support interval type, implementations must implement {@link #getChild(int)} and define 2
+   * To support interval type, implementations must implement {@link #getChild(int)} and define 3
    * child vectors: the first child vector is an int type vector, containing all the month values of
-   * all the interval values in this vector. The second child vector is a long type vector,
-   * containing all the microsecond values of all the interval values in this vector.
+   * all the interval values in this vector. The second child vector is an int type vector,
+   * containing all the day values of all the interval values in this vector. The third child vector
+   * is a long type vector, containing all the microsecond values of all the interval values in this
+   * vector.
    */
   public final CalendarInterval getInterval(int rowId) {
     if (isNullAt(rowId)) return null;
     final int months = getChild(0).getInt(rowId);
-    final long microseconds = getChild(1).getLong(rowId);
-    return new CalendarInterval(months, microseconds);
+    final int days = getChild(1).getInt(rowId);
+    final long microseconds = getChild(2).getLong(rowId);
+    return new CalendarInterval(months, days, microseconds);
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
similarity index 94%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
rename to sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
index 9f4a0f2b7017a..75a8bec018a1f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
+++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
@@ -31,6 +31,8 @@ object AttributeMap {
 class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)])
   extends Map[Attribute, A] with Serializable {
 
+  //  Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation.
+
   override def get(k: Attribute): Option[A] = baseMap.get(k.exprId).map(_._2)
 
   override def contains(k: Attribute): Boolean = get(k).isDefined
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
similarity index 97%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
rename to sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
index 7e8e7b8cd5f18..5198c6dd2fdf0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
+++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
@@ -57,6 +57,8 @@ class ExpressionSet protected(
     protected val originals: mutable.Buffer[Expression] = new ArrayBuffer)
   extends Set[Expression] {
 
+  //  Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation.
+
   protected def add(e: Expression): Unit = {
     if (!e.deterministic) {
       originals += e
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
similarity index 96%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
rename to sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
index 06f95989f2e3a..699b42bd6d60d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
+++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
@@ -30,6 +30,8 @@ import java.util.Locale
 class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) extends Map[String, T]
   with Serializable {
 
+  //  Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation.
+
   val keyLowerCasedMap = originalMap.map(kv => kv.copy(_1 = kv._1.toLowerCase(Locale.ROOT)))
 
   override def get(k: String): Option[T] = keyLowerCasedMap.get(k.toLowerCase(Locale.ROOT))
diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
new file mode 100644
index 0000000000000..4caa3d0461875
--- /dev/null
+++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+/**
+ * Builds a map that is keyed by an Attribute's expression id. Using the expression id allows values
+ * to be looked up even when the attributes used differ cosmetically (i.e., the capitalization
+ * of the name, or the expected nullability).
+ */
+object AttributeMap {
+  def apply[A](kvs: Seq[(Attribute, A)]): AttributeMap[A] = {
+    new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap)
+  }
+}
+
+class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)])
+  extends Map[Attribute, A] with Serializable {
+
+  //  Note: this class supports Scala 2.13. A parallel source tree has a 2.12 implementation.
+
+  override def get(k: Attribute): Option[A] = baseMap.get(k.exprId).map(_._2)
+
+  override def contains(k: Attribute): Boolean = get(k).isDefined
+
+  override def updated[B1 >: A](key: Attribute, value: B1): Map[Attribute, B1] =
+    baseMap.values.toMap + (key -> value)
+
+  override def iterator: Iterator[(Attribute, A)] = baseMap.valuesIterator
+
+  override def removed(key: Attribute): Map[Attribute, A] = baseMap.values.toMap - key
+}
diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
new file mode 100644
index 0000000000000..cf74b98b31b0c
--- /dev/null
+++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+object ExpressionSet {
+  /** Constructs a new [[ExpressionSet]] by applying [[Canonicalize]] to `expressions`. */
+  def apply(expressions: TraversableOnce[Expression]): ExpressionSet = {
+    val set = new ExpressionSet()
+    expressions.foreach(set.add)
+    set
+  }
+}
+
+/**
+ * A [[Set]] where membership is determined based on determinacy and a canonical representation of
+ * an [[Expression]] (i.e. one that attempts to ignore cosmetic differences).
+ * See [[Canonicalize]] for more details.
+ *
+ * Internally this set uses the canonical representation, but keeps also track of the original
+ * expressions to ease debugging.  Since different expressions can share the same canonical
+ * representation, this means that operations that extract expressions from this set are only
+ * guaranteed to see at least one such expression.  For example:
+ *
+ * {{{
+ *   val set = ExpressionSet(a + 1, 1 + a)
+ *
+ *   set.iterator => Iterator(a + 1)
+ *   set.contains(a + 1) => true
+ *   set.contains(1 + a) => true
+ *   set.contains(a + 2) => false
+ * }}}
+ *
+ * For non-deterministic expressions, they are always considered as not contained in the [[Set]].
+ * On adding a non-deterministic expression, simply append it to the original expressions.
+ * This is consistent with how we define `semanticEquals` between two expressions.
+ */
+class ExpressionSet protected(
+    protected val baseSet: mutable.Set[Expression] = new mutable.HashSet,
+    protected val originals: mutable.Buffer[Expression] = new ArrayBuffer)
+  extends Set[Expression] {
+
+  //  Note: this class supports Scala 2.13. A parallel source tree has a 2.12 implementation.
+
+  protected def add(e: Expression): Unit = {
+    if (!e.deterministic) {
+      originals += e
+    } else if (!baseSet.contains(e.canonicalized) ) {
+      baseSet.add(e.canonicalized)
+      originals += e
+    }
+  }
+
+  override def contains(elem: Expression): Boolean = baseSet.contains(elem.canonicalized)
+
+  override def incl(elem: Expression): ExpressionSet = {
+    val newSet = new ExpressionSet(baseSet.clone(), originals.clone())
+    newSet.add(elem)
+    newSet
+  }
+
+  override def excl(elem: Expression): ExpressionSet = {
+    if (elem.deterministic) {
+      val newBaseSet = baseSet.clone().filterNot(_ == elem.canonicalized)
+      val newOriginals = originals.clone().filterNot(_.canonicalized == elem.canonicalized)
+      new ExpressionSet(newBaseSet, newOriginals)
+    } else {
+      new ExpressionSet(baseSet.clone(), originals.clone())
+    }
+  }
+
+  override def iterator: Iterator[Expression] = originals.iterator
+
+  /**
+   * Returns a string containing both the post [[Canonicalize]] expressions and the original
+   * expressions in this set.
+   */
+  def toDebugString: String =
+    s"""
+       |baseSet: ${baseSet.mkString(", ")}
+       |originals: ${originals.mkString(", ")}
+     """.stripMargin
+}
diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
new file mode 100644
index 0000000000000..1f8cc6c0f83a2
--- /dev/null
+++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.util.Locale
+
+/**
+ * Builds a map in which keys are case insensitive. Input map can be accessed for cases where
+ * case-sensitive information is required. The primary constructor is marked private to avoid
+ * nested case-insensitive map creation, otherwise the keys in the original map will become
+ * case-insensitive in this scenario.
+ * Note: CaseInsensitiveMap is serializable. However, after transformation, e.g. `filterKeys()`,
+ *       it may become not serializable.
+ */
+class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) extends Map[String, T]
+  with Serializable {
+
+  //  Note: this class supports Scala 2.13. A parallel source tree has a 2.12 implementation.
+
+  val keyLowerCasedMap = originalMap.map(kv => kv.copy(_1 = kv._1.toLowerCase(Locale.ROOT)))
+
+  override def get(k: String): Option[T] = keyLowerCasedMap.get(k.toLowerCase(Locale.ROOT))
+
+  override def contains(k: String): Boolean =
+    keyLowerCasedMap.contains(k.toLowerCase(Locale.ROOT))
+
+  override def updated[B1 >: T](key: String, value: B1): Map[String, B1] = {
+    new CaseInsensitiveMap[B1](originalMap + (key -> value))
+  }
+
+  override def iterator: Iterator[(String, T)] = keyLowerCasedMap.iterator
+
+  override def removed(key: String): Map[String, T] = {
+    new CaseInsensitiveMap(originalMap.filter(!_._1.equalsIgnoreCase(key)))
+  }
+}
+
+object CaseInsensitiveMap {
+  def apply[T](params: Map[String, T]): CaseInsensitiveMap[T] = params match {
+    case caseSensitiveMap: CaseInsensitiveMap[T] => caseSensitiveMap
+    case _ => new CaseInsensitiveMap(params)
+  }
+}
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index 494387ecfe0c1..9a7e077b658df 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -17,12 +17,25 @@
 
 package org.apache.spark.sql
 
+import java.sql.{Date, Timestamp}
+import java.time.{Instant, LocalDate}
+import java.util.Base64
+
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 import scala.util.hashing.MurmurHash3
 
-import org.apache.spark.annotation.Stable
+import org.json4s._
+import org.json4s.JsonAST.JValue
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.annotation.{Stable, Unstable}
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions.GenericRow
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
  * @since 1.3.0
@@ -501,4 +514,97 @@ trait Row extends Serializable {
   private def getAnyValAs[T <: AnyVal](i: Int): T =
     if (isNullAt(i)) throw new NullPointerException(s"Value at index $i is null")
     else getAs[T](i)
+
+  /**
+   * The compact JSON representation of this row.
+   * @since 3.0
+   */
+  @Unstable
+  def json: String = compact(jsonValue)
+
+  /**
+   * The pretty (i.e. indented) JSON representation of this row.
+   * @since 3.0
+   */
+  @Unstable
+  def prettyJson: String = pretty(render(jsonValue))
+
+  /**
+   * JSON representation of the row.
+   *
+   * Note that this only supports the data types that are also supported by
+   * [[org.apache.spark.sql.catalyst.encoders.RowEncoder]].
+   *
+   * @return the JSON representation of the row.
+   */
+  private[sql] def jsonValue: JValue = {
+    require(schema != null, "JSON serialization requires a non-null schema.")
+
+    lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
+    lazy val dateFormatter = DateFormatter.apply(zoneId)
+    lazy val timestampFormatter = TimestampFormatter(zoneId)
+
+    // Convert an iterator of values to a json array
+    def iteratorToJsonArray(iterator: Iterator[_], elementType: DataType): JArray = {
+      JArray(iterator.map(toJson(_, elementType)).toList)
+    }
+
+    // Convert a value to json.
+    def toJson(value: Any, dataType: DataType): JValue = (value, dataType) match {
+      case (null, _) => JNull
+      case (b: Boolean, _) => JBool(b)
+      case (b: Byte, _) => JLong(b)
+      case (s: Short, _) => JLong(s)
+      case (i: Int, _) => JLong(i)
+      case (l: Long, _) => JLong(l)
+      case (f: Float, _) => JDouble(f)
+      case (d: Double, _) => JDouble(d)
+      case (d: BigDecimal, _) => JDecimal(d)
+      case (d: java.math.BigDecimal, _) => JDecimal(d)
+      case (d: Decimal, _) => JDecimal(d.toBigDecimal)
+      case (s: String, _) => JString(s)
+      case (b: Array[Byte], BinaryType) =>
+        JString(Base64.getEncoder.encodeToString(b))
+      case (d: LocalDate, _) =>
+        JString(dateFormatter.format(DateTimeUtils.localDateToDays(d)))
+      case (d: Date, _) =>
+        JString(dateFormatter.format(DateTimeUtils.fromJavaDate(d)))
+      case (i: Instant, _) =>
+        JString(timestampFormatter.format(DateTimeUtils.instantToMicros(i)))
+      case (t: Timestamp, _) =>
+        JString(timestampFormatter.format(DateTimeUtils.fromJavaTimestamp(t)))
+      case (i: CalendarInterval, _) => JString(i.toString)
+      case (a: Array[_], ArrayType(elementType, _)) =>
+        iteratorToJsonArray(a.iterator, elementType)
+      case (s: Seq[_], ArrayType(elementType, _)) =>
+        iteratorToJsonArray(s.iterator, elementType)
+      case (m: Map[String @unchecked, _], MapType(StringType, valueType, _)) =>
+        new JObject(m.toList.sortBy(_._1).map {
+          case (k, v) => k -> toJson(v, valueType)
+        })
+      case (m: Map[_, _], MapType(keyType, valueType, _)) =>
+        new JArray(m.iterator.map {
+          case (k, v) =>
+            new JObject("key" -> toJson(k, keyType) :: "value" -> toJson(v, valueType) :: Nil)
+        }.toList)
+      case (r: Row, _) => r.jsonValue
+      case (v: Any, udt: UserDefinedType[Any @unchecked]) =>
+        val dataType = udt.sqlType
+        toJson(CatalystTypeConverters.convertToScala(udt.serialize(v), dataType), dataType)
+      case _ =>
+        throw new IllegalArgumentException(s"Failed to convert value $value " +
+          s"(class of ${value.getClass}}) with the type of $dataType to JSON.")
+    }
+
+    // Convert the row fields to json
+    var n = 0
+    var elements = new mutable.ListBuffer[JField]
+    val len = length
+    while (n < len) {
+      val field = schema(n)
+      elements += (field.name -> toJson(apply(n), field.dataType))
+      n += 1
+    }
+    new JObject(elements.toList)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogManager.scala
deleted file mode 100644
index d5a6a61f82579..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogManager.scala
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalog.v2
-
-import scala.collection.mutable
-import scala.util.control.NonFatal
-
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * A thread-safe manager for [[CatalogPlugin]]s. It tracks all the registered catalogs, and allow
- * the caller to look up a catalog by name.
- */
-class CatalogManager(conf: SQLConf) extends Logging {
-
-  private val catalogs = mutable.HashMap.empty[String, CatalogPlugin]
-
-  def catalog(name: String): CatalogPlugin = synchronized {
-    catalogs.getOrElseUpdate(name, Catalogs.load(name, conf))
-  }
-
-  def defaultCatalog: Option[CatalogPlugin] = {
-    conf.defaultV2Catalog.flatMap { catalogName =>
-      try {
-        Some(catalog(catalogName))
-      } catch {
-        case NonFatal(e) =>
-          logError(s"Cannot load default v2 catalog: $catalogName", e)
-          None
-      }
-    }
-  }
-
-  def v2SessionCatalog: Option[CatalogPlugin] = {
-    try {
-      Some(catalog(CatalogManager.SESSION_CATALOG_NAME))
-    } catch {
-      case NonFatal(e) =>
-        logError("Cannot load v2 session catalog", e)
-        None
-    }
-  }
-
-  private def getDefaultNamespace(c: CatalogPlugin) = c match {
-    case c: SupportsNamespaces => c.defaultNamespace()
-    case _ => Array.empty[String]
-  }
-
-  private var _currentNamespace: Option[Array[String]] = None
-
-  def currentNamespace: Array[String] = synchronized {
-    _currentNamespace.getOrElse {
-      currentCatalog.map { catalogName =>
-        getDefaultNamespace(catalog(catalogName))
-      }.getOrElse(Array("default")) // The builtin catalog use "default" as the default database.
-    }
-  }
-
-  def setCurrentNamespace(namespace: Array[String]): Unit = synchronized {
-    _currentNamespace = Some(namespace)
-  }
-
-  private var _currentCatalog: Option[String] = None
-
-  // Returns the name of current catalog. None means the current catalog is the builtin catalog.
-  def currentCatalog: Option[String] = synchronized {
-    _currentCatalog.orElse(conf.defaultV2Catalog)
-  }
-
-  def setCurrentCatalog(catalogName: String): Unit = synchronized {
-    _currentCatalog = Some(catalogName)
-    _currentNamespace = None
-  }
-
-  // Clear all the registered catalogs. Only used in tests.
-  private[sql] def reset(): Unit = synchronized {
-    catalogs.clear()
-    _currentNamespace = None
-    _currentCatalog = None
-  }
-}
-
-object CatalogManager {
-  val SESSION_CATALOG_NAME: String = "session"
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala
deleted file mode 100644
index 846810fc18061..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalog.v2
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.TableIdentifier
-
-/**
- * A trait to encapsulate catalog lookup function and helpful extractors.
- */
-@Experimental
-trait LookupCatalog extends Logging {
-
-  protected val catalogManager: CatalogManager
-
-  /**
-   * Returns the default catalog. When set, this catalog is used for all identifiers that do not
-   * set a specific catalog. When this is None, the session catalog is responsible for the
-   * identifier.
-   *
-   * If this is None and a table's provider (source) is a v2 provider, the v2 session catalog will
-   * be used.
-   */
-  def defaultCatalog: Option[CatalogPlugin] = catalogManager.defaultCatalog
-
-  /**
-   * This catalog is a v2 catalog that delegates to the v1 session catalog. it is used when the
-   * session catalog is responsible for an identifier, but the source requires the v2 catalog API.
-   * This happens when the source implementation extends the v2 TableProvider API and is not listed
-   * in the fallback configuration, spark.sql.sources.write.useV1SourceList
-   */
-  def sessionCatalog: Option[CatalogPlugin] = catalogManager.v2SessionCatalog
-
-  /**
-   * Extract catalog plugin and remaining identifier names.
-   *
-   * This does not substitute the default catalog if no catalog is set in the identifier.
-   */
-  private object CatalogAndIdentifier {
-    def unapply(parts: Seq[String]): Some[(Option[CatalogPlugin], Seq[String])] = parts match {
-      case Seq(_) =>
-        Some((None, parts))
-      case Seq(catalogName, tail @ _*) =>
-        try {
-          Some((Some(catalogManager.catalog(catalogName)), tail))
-        } catch {
-          case _: CatalogNotFoundException =>
-            Some((None, parts))
-        }
-    }
-  }
-
-  type CatalogObjectIdentifier = (Option[CatalogPlugin], Identifier)
-
-  /**
-   * Extract catalog and identifier from a multi-part identifier with the default catalog if needed.
-   */
-  object CatalogObjectIdentifier {
-    def unapply(parts: Seq[String]): Some[CatalogObjectIdentifier] = parts match {
-      case CatalogAndIdentifier(maybeCatalog, nameParts) =>
-        Some((
-            maybeCatalog.orElse(defaultCatalog),
-            Identifier.of(nameParts.init.toArray, nameParts.last)
-        ))
-    }
-  }
-
-  type CatalogNamespace = (Option[CatalogPlugin], Seq[String])
-
-  /**
-   * Extract catalog and namespace from a multi-part identifier with the default catalog if needed.
-   * Catalog name takes precedence over namespaces.
-   */
-  object CatalogNamespace {
-    def unapply(parts: Seq[String]): Some[CatalogNamespace] = parts match {
-      case Seq(catalogName, tail @ _*) =>
-        try {
-          Some((Some(catalogManager.catalog(catalogName)), tail))
-        } catch {
-          case _: CatalogNotFoundException =>
-            Some((defaultCatalog, parts))
-        }
-    }
-  }
-
-  /**
-   * Extract legacy table identifier from a multi-part identifier.
-   *
-   * For legacy support only. Please use [[CatalogObjectIdentifier]] instead on DSv2 code paths.
-   */
-  object AsTableIdentifier {
-    def unapply(parts: Seq[String]): Option[TableIdentifier] = parts match {
-      case CatalogAndIdentifier(None, names) if defaultCatalog.isEmpty =>
-        names match {
-          case Seq(name) =>
-            Some(TableIdentifier(name))
-          case Seq(database, name) =>
-            Some(TableIdentifier(name, Some(database)))
-          case _ =>
-            None
-        }
-      case _ =>
-        None
-    }
-  }
-
-  /**
-   * For temp views, extract a table identifier from a multi-part identifier if it has no catalog.
-   */
-  object AsTemporaryViewIdentifier {
-    def unapply(parts: Seq[String]): Option[TableIdentifier] = parts match {
-      case CatalogAndIdentifier(None, Seq(table)) =>
-        Some(TableIdentifier(table))
-      case CatalogAndIdentifier(None, Seq(database, table)) =>
-        Some(TableIdentifier(table, Some(database)))
-      case _ =>
-        None
-    }
-  }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 488252aa0c7b5..34d2f45e715e9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -344,7 +344,7 @@ object CatalystTypeConverters {
   private class DecimalConverter(dataType: DecimalType)
     extends CatalystTypeConverter[Any, JavaBigDecimal, Decimal] {
 
-    private val nullOnOverflow = SQLConf.get.decimalOperationsNullOnOverflow
+    private val nullOnOverflow = !SQLConf.get.ansiEnabled
 
     override def toCatalystImpl(scalaValue: Any): Decimal = {
       val decimal = scalaValue match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index bdab407688a65..f98b59edd4226 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 /**
  * An abstract class for row used internally in Spark SQL, which only contains the columns as
@@ -56,7 +56,9 @@ abstract class InternalRow extends SpecializedGetters with Serializable {
    * Note: In order to support update decimal with precision > 18 in UnsafeRow,
    * CAN NOT call setNullAt() for decimal column on UnsafeRow, call setDecimal(i, null, precision).
    */
-  def setDecimal(i: Int, value: Decimal, precision: Int) { update(i, value) }
+  def setDecimal(i: Int, value: Decimal, precision: Int): Unit = update(i, value)
+
+  def setInterval(i: Int, value: CalendarInterval): Unit = update(i, value)
 
   /**
    * Make a copy of the current [[InternalRow]] object.
@@ -169,6 +171,8 @@ object InternalRow {
     case LongType | TimestampType => (input, v) => input.setLong(ordinal, v.asInstanceOf[Long])
     case FloatType => (input, v) => input.setFloat(ordinal, v.asInstanceOf[Float])
     case DoubleType => (input, v) => input.setDouble(ordinal, v.asInstanceOf[Double])
+    case CalendarIntervalType =>
+      (input, v) => input.setInterval(ordinal, v.asInstanceOf[CalendarInterval])
     case DecimalType.Fixed(precision, _) =>
       (input, v) => input.setDecimal(ordinal, v.asInstanceOf[Decimal], precision)
     case udt: UserDefinedType[_] => getWriter(ordinal, udt.sqlType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index c5be3efc6371e..91ddf0f28ad80 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -47,6 +47,17 @@ object JavaTypeInference {
   private val keySetReturnType = classOf[JMap[_, _]].getMethod("keySet").getGenericReturnType
   private val valuesReturnType = classOf[JMap[_, _]].getMethod("values").getGenericReturnType
 
+  // Guava changed the name of this method; this tries to stay compatible with both
+  // TODO replace with isSupertypeOf when Guava 14 support no longer needed for Hadoop
+  private val ttIsAssignableFrom: (TypeToken[_], TypeToken[_]) => Boolean = {
+    val ttMethods = classOf[TypeToken[_]].getMethods.
+      filter(_.getParameterCount == 1).
+      filter(_.getParameterTypes.head == classOf[TypeToken[_]])
+    val isAssignableFromMethod = ttMethods.find(_.getName == "isSupertypeOf").getOrElse(
+      ttMethods.find(_.getName == "isAssignableFrom").get)
+    (a: TypeToken[_], b: TypeToken[_]) => isAssignableFromMethod.invoke(a, b).asInstanceOf[Boolean]
+  }
+
   /**
    * Infers the corresponding SQL data type of a JavaBean class.
    * @param beanClass Java type
@@ -111,11 +122,11 @@ object JavaTypeInference {
         val (dataType, nullable) = inferDataType(typeToken.getComponentType, seenTypeSet)
         (ArrayType(dataType, nullable), true)
 
-      case _ if iterableType.isAssignableFrom(typeToken) =>
+      case _ if ttIsAssignableFrom(iterableType, typeToken) =>
         val (dataType, nullable) = inferDataType(elementType(typeToken), seenTypeSet)
         (ArrayType(dataType, nullable), true)
 
-      case _ if mapType.isAssignableFrom(typeToken) =>
+      case _ if ttIsAssignableFrom(mapType, typeToken) =>
         val (keyType, valueType) = mapKeyValueType(typeToken)
         val (keyDataType, _) = inferDataType(keyType, seenTypeSet)
         val (valueDataType, nullable) = inferDataType(valueType, seenTypeSet)
@@ -273,7 +284,7 @@ object JavaTypeInference {
         }
         Invoke(arrayData, methodName, ObjectType(c))
 
-      case c if listType.isAssignableFrom(typeToken) =>
+      case c if ttIsAssignableFrom(listType, typeToken) =>
         val et = elementType(typeToken)
         val newTypePath = walkedTypePath.recordArray(et.getType.getTypeName)
         val (dataType, elementNullable) = inferDataType(et)
@@ -289,7 +300,7 @@ object JavaTypeInference {
 
         UnresolvedMapObjects(mapFunction, path, customCollectionCls = Some(c))
 
-      case _ if mapType.isAssignableFrom(typeToken) =>
+      case _ if ttIsAssignableFrom(mapType, typeToken) =>
         val (keyType, valueType) = mapKeyValueType(typeToken)
         val newTypePath = walkedTypePath.recordMap(keyType.getType.getTypeName,
           valueType.getType.getTypeName)
@@ -404,10 +415,10 @@ object JavaTypeInference {
         case _ if typeToken.isArray =>
           toCatalystArray(inputObject, typeToken.getComponentType)
 
-        case _ if listType.isAssignableFrom(typeToken) =>
+        case _ if ttIsAssignableFrom(listType, typeToken) =>
           toCatalystArray(inputObject, elementType(typeToken))
 
-        case _ if mapType.isAssignableFrom(typeToken) =>
+        case _ if ttIsAssignableFrom(mapType, typeToken) =>
           val (keyType, valueType) = mapKeyValueType(typeToken)
 
           createSerializerForMap(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 2a4e84241a941..1f7634bafa420 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -72,7 +72,7 @@ object ScalaReflection extends ScalaReflection {
   /**
    * Synchronize to prevent concurrent usage of `<:<` operator.
    * This operator is not thread safe in any current version of scala; i.e.
-   * (2.11.12, 2.12.8, 2.13.0-M5).
+   * (2.11.12, 2.12.10, 2.13.0-M5).
    *
    * See https://github.com/scala/bug/issues/10766
    */
@@ -127,7 +127,7 @@ object ScalaReflection extends ScalaReflection {
       case other =>
         // There is probably a better way to do this, but I couldn't find it...
         val elementType = dataTypeFor(other).asInstanceOf[ObjectType].cls
-        java.lang.reflect.Array.newInstance(elementType, 1).getClass
+        java.lang.reflect.Array.newInstance(elementType, 0).getClass
 
     }
     ObjectType(cls)
@@ -906,7 +906,18 @@ trait ScalaReflection extends Logging {
    * only defines a constructor via `apply` method.
    */
   private def getCompanionConstructor(tpe: Type): Symbol = {
-    tpe.typeSymbol.asClass.companion.asTerm.typeSignature.member(universe.TermName("apply"))
+    def throwUnsupportedOperation = {
+      throw new UnsupportedOperationException(s"Unable to find constructor for $tpe. " +
+        s"This could happen if $tpe is an interface, or a trait without companion object " +
+        "constructor.")
+    }
+    tpe.typeSymbol.asClass.companion match {
+      case NoSymbol => throwUnsupportedOperation
+      case sym => sym.asTerm.typeSignature.member(universe.TermName("apply")) match {
+        case NoSymbol => throwUnsupportedOperation
+        case constructorSym => constructorSym
+      }
+    }
   }
 
   protected def constructParams(tpe: Type): Seq[Symbol] = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
index 75c278e781140..026ff6f2983fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
@@ -26,7 +26,7 @@ import org.apache.spark.unsafe.types.UTF8String
 
 object SerializerBuildHelper {
 
-  private def nullOnOverflow: Boolean = SQLConf.get.decimalOperationsNullOnOverflow
+  private def nullOnOverflow: Boolean = !SQLConf.get.ansiEnabled
 
   def createSerializerForBoolean(inputObject: Expression): Expression = {
     Invoke(inputObject, "booleanValue", BooleanType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
index f5e9a146bf359..7e5d56a7d1196 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-import org.apache.spark.sql.catalog.v2.Identifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.connector.catalog.Identifier
 
 /**
  * Thrown by a catalog when an item already exists. The analyzer will rethrow the exception
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6f17256f8163e..ce82b3b567b54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.util
 import java.util.Locale
 
 import scala.collection.mutable
@@ -24,8 +25,6 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2._
-import org.apache.spark.sql.catalog.v2.expressions.{FieldReference, IdentityTransform}
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
@@ -35,16 +34,18 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.sql._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnChange, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy}
-import org.apache.spark.sql.sources.v2.Table
-import org.apache.spark.sql.sources.v2.internal.V1Table
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]].
@@ -52,14 +53,35 @@ import org.apache.spark.sql.types._
  * to resolve attribute references.
  */
 object SimpleAnalyzer extends Analyzer(
-  new SessionCatalog(
-    new InMemoryCatalog,
-    EmptyFunctionRegistry,
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) {
-    override def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean) {}
-  },
+  new CatalogManager(
+    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true),
+    FakeV2SessionCatalog,
+    new SessionCatalog(
+      new InMemoryCatalog,
+      EmptyFunctionRegistry,
+      new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) {
+      override def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {}
+    }),
   new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
 
+object FakeV2SessionCatalog extends TableCatalog {
+  private def fail() = throw new UnsupportedOperationException
+  override def listTables(namespace: Array[String]): Array[Identifier] = fail()
+  override def loadTable(ident: Identifier): Table = {
+    throw new NoSuchTableException(ident.toString)
+  }
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = fail()
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = fail()
+  override def dropTable(ident: Identifier): Boolean = fail()
+  override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = fail()
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = fail()
+  override def name(): String = CatalogManager.SESSION_CATALOG_NAME
+}
+
 /**
  * Provides a way to keep state during the analysis, this enables us to decouple the concerns
  * of analysis environment from the catalog.
@@ -67,14 +89,19 @@ object SimpleAnalyzer extends Analyzer(
  *
  * Note this is thread local.
  *
- * @param defaultDatabase The default database used in the view resolution, this overrules the
- *                        current catalog database.
+ * @param catalogAndNamespace The catalog and namespace used in the view resolution. This overrides
+ *                            the current catalog and namespace when resolving relations inside
+ *                            views.
  * @param nestedViewDepth The nested depth in the view resolution, this enables us to limit the
  *                        depth of nested views.
+ * @param relationCache A mapping from qualified table names to resolved relations. This can ensure
+ *                      that the table is resolved only once if a table is used multiple times
+ *                      in a query.
  */
 case class AnalysisContext(
-    defaultDatabase: Option[String] = None,
-    nestedViewDepth: Int = 0)
+    catalogAndNamespace: Seq[String] = Nil,
+    nestedViewDepth: Int = 0,
+    relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty)
 
 object AnalysisContext {
   private val value = new ThreadLocal[AnalysisContext]() {
@@ -86,10 +113,10 @@ object AnalysisContext {
 
   private def set(context: AnalysisContext): Unit = value.set(context)
 
-  def withAnalysisContext[A](database: Option[String])(f: => A): A = {
+  def withAnalysisContext[A](catalogAndNamespace: Seq[String])(f: => A): A = {
     val originContext = value.get()
-    val context = AnalysisContext(defaultDatabase = database,
-      nestedViewDepth = originContext.nestedViewDepth + 1)
+    val context = AnalysisContext(
+      catalogAndNamespace, originContext.nestedViewDepth + 1, originContext.relationCache)
     set(context)
     try f finally { set(originContext) }
   }
@@ -100,16 +127,26 @@ object AnalysisContext {
  * [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]].
  */
 class Analyzer(
-    catalog: SessionCatalog,
+    override val catalogManager: CatalogManager,
     conf: SQLConf,
     maxIterations: Int)
   extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog {
 
+  private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog
+
+  override def isView(nameParts: Seq[String]): Boolean = v1SessionCatalog.isView(nameParts)
+
+  // Only for tests.
   def this(catalog: SessionCatalog, conf: SQLConf) = {
-    this(catalog, conf, conf.optimizerMaxIterations)
+    this(
+      new CatalogManager(conf, FakeV2SessionCatalog, catalog),
+      conf,
+      conf.analyzerMaxIterations)
   }
 
-  override val catalogManager: CatalogManager = new CatalogManager(conf)
+  def this(catalogManager: CatalogManager, conf: SQLConf) = {
+    this(catalogManager, conf, conf.analyzerMaxIterations)
+  }
 
   def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
     AnalysisHelper.markInAnalyzer {
@@ -139,7 +176,15 @@ class Analyzer(
 
   def resolver: Resolver = conf.resolver
 
-  protected val fixedPoint = FixedPoint(maxIterations)
+  /**
+   * If the plan cannot be resolved within maxIterations, analyzer will throw exception to inform
+   * user to increase the value of SQLConf.ANALYZER_MAX_ITERATIONS.
+   */
+  protected val fixedPoint =
+    FixedPoint(
+      maxIterations,
+      errorOnExceed = true,
+      maxIterationsSetting = SQLConf.ANALYZER_MAX_ITERATIONS.key)
 
   /**
    * Override to provide additional rules for the "Resolution" batch.
@@ -156,8 +201,7 @@ class Analyzer(
   lazy val batches: Seq[Batch] = Seq(
     Batch("Hints", fixedPoint,
       new ResolveHints.ResolveJoinStrategyHints(conf),
-      ResolveHints.ResolveCoalesceHints,
-      new ResolveHints.RemoveAllHints(conf)),
+      new ResolveHints.ResolveCoalesceHints(conf)),
     Batch("Simple Sanity Check", Once,
       LookupFunctions),
     Batch("Substitution", fixedPoint,
@@ -167,11 +211,11 @@ class Analyzer(
       new SubstituteUnresolvedOrdinals(conf)),
     Batch("Resolution", fixedPoint,
       ResolveTableValuedFunctions ::
-      ResolveAlterTable ::
-      ResolveDescribeTable ::
+      ResolveNamespace(catalogManager) ::
+      new ResolveCatalogs(catalogManager) ::
       ResolveInsertInto ::
-      ResolveTables ::
       ResolveRelations ::
+      ResolveTables ::
       ResolveReferences ::
       ResolveCreateNamedStruct ::
       ResolveDeserializer ::
@@ -197,13 +241,17 @@ class Analyzer(
       ResolveAggregateFunctions ::
       TimeWindowing ::
       ResolveInlineTables(conf) ::
-      ResolveHigherOrderFunctions(catalog) ::
+      ResolveHigherOrderFunctions(v1SessionCatalog) ::
       ResolveLambdaVariables(conf) ::
       ResolveTimeZone(conf) ::
       ResolveRandomSeed ::
+      ResolveBinaryArithmetic(conf) ::
       TypeCoercion.typeCoercionRules(conf) ++
       extendedResolutionRules : _*),
     Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*),
+    Batch("Normalize Alter Table", Once, ResolveAlterTableChanges),
+    Batch("Remove Unresolved Hints", Once,
+      new ResolveHints.RemoveAllHints(conf)),
     Batch("Nondeterministic", Once,
       PullOutNondeterministic),
     Batch("UDF", Once,
@@ -216,6 +264,61 @@ class Analyzer(
       CleanupAliases)
   )
 
+  /**
+   * For [[Add]]:
+   * 1. if both side are interval, stays the same;
+   * 2. else if one side is interval, turns it to [[TimeAdd]];
+   * 3. else if one side is date, turns it to [[DateAdd]] ;
+   * 4. else stays the same.
+   *
+   * For [[Subtract]]:
+   * 1. if both side are interval, stays the same;
+   * 2. else if the right side is an interval, turns it to [[TimeSub]];
+   * 3. else if one side is timestamp, turns it to [[SubtractTimestamps]];
+   * 4. else if the right side is date, turns it to [[DateDiff]]/[[SubtractDates]];
+   * 5. else if the left side is date, turns it to [[DateSub]];
+   * 6. else turns it to stays the same.
+   *
+   * For [[Multiply]]:
+   * 1. If one side is interval, turns it to [[MultiplyInterval]];
+   * 2. otherwise, stays the same.
+   *
+   * For [[Divide]]:
+   * 1. If the left side is interval, turns it to [[DivideInterval]];
+   * 2. otherwise, stays the same.
+   */
+  case class ResolveBinaryArithmetic(conf: SQLConf) extends Rule[LogicalPlan] {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+      case p: LogicalPlan => p.transformExpressionsUp {
+        case a @ Add(l, r) if a.childrenResolved => (l.dataType, r.dataType) match {
+          case (CalendarIntervalType, CalendarIntervalType) => a
+          case (_, CalendarIntervalType) => Cast(TimeAdd(l, r), l.dataType)
+          case (CalendarIntervalType, _) => Cast(TimeAdd(r, l), r.dataType)
+          case (DateType, _) => DateAdd(l, r)
+          case (_, DateType) => DateAdd(r, l)
+          case _ => a
+        }
+        case s @ Subtract(l, r) if s.childrenResolved => (l.dataType, r.dataType) match {
+          case (CalendarIntervalType, CalendarIntervalType) => s
+          case (_, CalendarIntervalType) => Cast(TimeSub(l, r), l.dataType)
+          case (TimestampType, _) => SubtractTimestamps(l, r)
+          case (_, TimestampType) => SubtractTimestamps(l, r)
+          case (_, DateType) => SubtractDates(l, r)
+          case (DateType, _) => DateSub(l, r)
+          case _ => s
+        }
+        case m @ Multiply(l, r) if m.childrenResolved => (l.dataType, r.dataType) match {
+          case (CalendarIntervalType, _) => MultiplyInterval(l, r)
+          case (_, CalendarIntervalType) => MultiplyInterval(r, l)
+          case _ => m
+        }
+        case d @ Divide(l, r) if d.childrenResolved => (l.dataType, r.dataType) match {
+          case (CalendarIntervalType, _) => DivideInterval(l, r)
+          case _ => d
+        }
+      }
+    }
+  }
   /**
    * Substitute child plan with WindowSpecDefinitions.
    */
@@ -361,7 +464,7 @@ class Analyzer(
         gid: Attribute): LogicalPlan = {
       // Change the nullability of group by aliases if necessary. For example, if we have
       // GROUPING SETS ((a,b), a), we do not need to change the nullability of a, but we
-      // should change the nullabilty of b to be TRUE.
+      // should change the nullability of b to be TRUE.
       // TODO: For Cube/Rollup just set nullability to be `true`.
       val expandedAttributes = groupByAliases.map { alias =>
         if (selectedGroupByExprs.exists(!_.contains(alias.child))) {
@@ -634,75 +737,137 @@ class Analyzer(
     }
   }
 
+  case class ResolveNamespace(catalogManager: CatalogManager)
+    extends Rule[LogicalPlan] with LookupCatalog {
+    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+      case s @ ShowTables(UnresolvedNamespace(Seq()), _) =>
+        s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
+      case UnresolvedNamespace(Seq()) =>
+        ResolvedNamespace(currentCatalog, Seq.empty[String])
+      case UnresolvedNamespace(CatalogAndNamespace(catalog, ns)) =>
+        ResolvedNamespace(catalog, ns)
+    }
+  }
+
+  private def isResolvingView: Boolean = AnalysisContext.get.catalogAndNamespace.nonEmpty
+
+  /**
+   * Resolve relations to temp views. This is not an actual rule, and is called by
+   * [[ResolveTables]] and [[ResolveRelations]].
+   */
+  object ResolveTempViews extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+      case u @ UnresolvedRelation(ident) =>
+        lookupTempView(ident).getOrElse(u)
+      case i @ InsertIntoStatement(UnresolvedRelation(ident), _, _, _, _) =>
+        lookupTempView(ident)
+          .map(view => i.copy(table = view))
+          .getOrElse(i)
+      case u @ UnresolvedTable(ident) =>
+        lookupTempView(ident).foreach { _ =>
+          u.failAnalysis(s"${ident.quoted} is a temp view not table.")
+        }
+        u
+      case u @ UnresolvedTableOrView(ident) =>
+        lookupTempView(ident).map(_ => ResolvedView(ident.asIdentifier)).getOrElse(u)
+    }
+
+    def lookupTempView(identifier: Seq[String]): Option[LogicalPlan] = {
+      // Permanent View can't refer to temp views, no need to lookup at all.
+      if (isResolvingView) return None
+
+      identifier match {
+        case Seq(part1) => v1SessionCatalog.lookupTempView(part1)
+        case Seq(part1, part2) => v1SessionCatalog.lookupGlobalTempView(part1, part2)
+        case _ => None
+      }
+    }
+  }
+
+  // If we are resolving relations insides views, we need to expand single-part relation names with
+  // the current catalog and namespace of when the view was created.
+  private def expandRelationName(nameParts: Seq[String]): Seq[String] = {
+    if (!isResolvingView) return nameParts
+
+    if (nameParts.length == 1) {
+      AnalysisContext.get.catalogAndNamespace :+ nameParts.head
+    } else if (catalogManager.isCatalogRegistered(nameParts.head)) {
+      nameParts
+    } else {
+      AnalysisContext.get.catalogAndNamespace.head +: nameParts
+    }
+  }
+
   /**
    * Resolve table relations with concrete relations from v2 catalog.
    *
    * [[ResolveRelations]] still resolves v1 tables.
    */
   object ResolveTables extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+    def apply(plan: LogicalPlan): LogicalPlan = ResolveTempViews(plan).resolveOperatorsUp {
       case u: UnresolvedRelation =>
-        val v2TableOpt = lookupV2Relation(u.multipartIdentifier) match {
-          case scala.Left((_, _, tableOpt)) => tableOpt
-          case scala.Right(tableOpt) => tableOpt
-        }
-        v2TableOpt.map(DataSourceV2Relation.create).getOrElse(u)
+        lookupV2Relation(u.multipartIdentifier)
+          .map(SubqueryAlias(u.multipartIdentifier, _))
+          .getOrElse(u)
+
+      case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident)) =>
+        CatalogV2Util.loadTable(catalog, ident)
+          .map(ResolvedTable(catalog.asTableCatalog, ident, _))
+          .getOrElse(u)
+
+      case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident)) =>
+        CatalogV2Util.loadTable(catalog, ident)
+          .map(ResolvedTable(catalog.asTableCatalog, ident, _))
+          .getOrElse(u)
+
+      case i @ InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) if i.query.resolved =>
+        lookupV2Relation(u.multipartIdentifier)
+          .map(v2Relation => i.copy(table = v2Relation))
+          .getOrElse(i)
+
+      case alter @ AlterTable(_, _, u: UnresolvedV2Relation, _) =>
+        CatalogV2Util.loadRelation(u.catalog, u.tableName)
+          .map(rel => alter.copy(table = rel))
+          .getOrElse(alter)
+
+      case u: UnresolvedV2Relation =>
+        CatalogV2Util.loadRelation(u.catalog, u.tableName).getOrElse(u)
     }
+
+    /**
+     * Performs the lookup of DataSourceV2 Tables from v2 catalog.
+     */
+    private def lookupV2Relation(identifier: Seq[String]): Option[DataSourceV2Relation] =
+      expandRelationName(identifier) match {
+        case NonSessionCatalogAndIdentifier(catalog, ident) =>
+          CatalogV2Util.loadTable(catalog, ident) match {
+            case Some(table) =>
+              Some(DataSourceV2Relation.create(table, Some(catalog), Some(ident)))
+            case None => None
+          }
+        case _ => None
+      }
   }
 
   /**
    * Replaces [[UnresolvedRelation]]s with concrete relations from the catalog.
    */
   object ResolveRelations extends Rule[LogicalPlan] {
-
-    // If the unresolved relation is running directly on files, we just return the original
-    // UnresolvedRelation, the plan will get resolved later. Else we look up the table from catalog
-    // and change the default database name(in AnalysisContext) if it is a view.
-    // We usually look up a table from the default database if the table identifier has an empty
-    // database part, for a view the default database should be the currentDb when the view was
-    // created. When the case comes to resolving a nested view, the view may have different default
-    // database with that the referenced view has, so we need to use
-    // `AnalysisContext.defaultDatabase` to track the current default database.
-    // When the relation we resolve is a view, we fetch the view.desc(which is a CatalogTable), and
-    // then set the value of `CatalogTable.viewDefaultDatabase` to
-    // `AnalysisContext.defaultDatabase`, we look up the relations that the view references using
-    // the default database.
-    // For example:
-    // |- view1 (defaultDatabase = db1)
-    //   |- operator
-    //     |- table2 (defaultDatabase = db1)
-    //     |- view2 (defaultDatabase = db2)
-    //        |- view3 (defaultDatabase = db3)
-    //   |- view4 (defaultDatabase = db4)
-    // In this case, the view `view1` is a nested view, it directly references `table2`, `view2`
-    // and `view4`, the view `view2` references `view3`. On resolving the table, we look up the
-    // relations `table2`, `view2`, `view4` using the default database `db1`, and look up the
-    // relation `view3` using the default database `db2`.
-    //
-    // Note this is compatible with the views defined by older versions of Spark(before 2.2), which
-    // have empty defaultDatabase and all the relations in viewText have database part defined.
-    def resolveRelation(plan: LogicalPlan): LogicalPlan = plan match {
-      case u @ UnresolvedRelation(AsTemporaryViewIdentifier(ident))
-        if catalog.isTemporaryTable(ident) =>
-        resolveRelation(lookupTableFromCatalog(ident, u, AnalysisContext.get.defaultDatabase))
-
-      case u @ UnresolvedRelation(AsTableIdentifier(ident)) if !isRunningDirectlyOnFiles(ident) =>
-        val defaultDatabase = AnalysisContext.get.defaultDatabase
-        val foundRelation = lookupTableFromCatalog(ident, u, defaultDatabase)
-        if (foundRelation != u) {
-          resolveRelation(foundRelation)
-        } else {
-          u
-        }
-
+    // The current catalog and namespace may be different from when the view was created, we must
+    // resolve the view logical plan here, with the catalog and namespace stored in view metadata.
+    // This is done by keeping the catalog and namespace in `AnalysisContext`, and analyzer will
+    // look at `AnalysisContext.catalogAndNamespace` when resolving relations with single-part name.
+    // If `AnalysisContext.catalogAndNamespace` is non-empty, analyzer will expand single-part names
+    // with it, instead of current catalog and namespace.
+    private def resolveViews(plan: LogicalPlan): LogicalPlan = plan match {
       // The view's child should be a logical plan parsed from the `desc.viewText`, the variable
       // `viewText` should be defined, or else we throw an error on the generation of the View
       // operator.
       case view @ View(desc, _, child) if !child.resolved =>
         // Resolve all the UnresolvedRelations and Views in the child.
-        val newChild = AnalysisContext.withAnalysisContext(desc.viewDefaultDatabase) {
+        val newChild = AnalysisContext.withAnalysisContext(desc.viewCatalogAndNamespace) {
           if (AnalysisContext.get.nestedViewDepth > conf.maxNestedViewDepth) {
-            view.failAnalysis(s"The depth of view ${view.desc.identifier} exceeds the maximum " +
+            view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " +
               s"view resolution depth (${conf.maxNestedViewDepth}). Analysis is aborted to " +
               s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " +
               "around this.")
@@ -711,91 +876,97 @@ class Analyzer(
         }
         view.copy(child = newChild)
       case p @ SubqueryAlias(_, view: View) =>
-        val newChild = resolveRelation(view)
-        p.copy(child = newChild)
+        p.copy(child = resolveViews(view))
       case _ => plan
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
-      case i @ InsertIntoTable(u @ UnresolvedRelation(AsTableIdentifier(ident)), _, child, _, _)
-          if child.resolved =>
-        EliminateSubqueryAliases(lookupTableFromCatalog(ident, u)) match {
+    def apply(plan: LogicalPlan): LogicalPlan = ResolveTempViews(plan).resolveOperatorsUp {
+      case i @ InsertIntoStatement(table, _, _, _, _) if i.query.resolved =>
+        val relation = table match {
+          case u: UnresolvedRelation =>
+            lookupRelation(u.multipartIdentifier).getOrElse(u)
+          case other => other
+        }
+
+        EliminateSubqueryAliases(relation) match {
           case v: View =>
-            u.failAnalysis(s"Inserting into a view is not allowed. View: ${v.desc.identifier}.")
+            table.failAnalysis(s"Inserting into a view is not allowed. View: ${v.desc.identifier}.")
           case other => i.copy(table = other)
         }
-      case u: UnresolvedRelation => resolveRelation(u)
-    }
-
-    // Look up the table with the given name from catalog. The database we used is decided by the
-    // precedence:
-    // 1. Use the database part of the table identifier, if it is defined;
-    // 2. Use defaultDatabase, if it is defined(In this case, no temporary objects can be used,
-    //    and the default database is only used to look up a view);
-    // 3. Use the currentDb of the SessionCatalog.
-    private def lookupTableFromCatalog(
-        tableIdentifier: TableIdentifier,
-        u: UnresolvedRelation,
-        defaultDatabase: Option[String] = None): LogicalPlan = {
-      val tableIdentWithDb = tableIdentifier.copy(
-        database = tableIdentifier.database.orElse(defaultDatabase))
-      try {
-        catalog.lookupRelation(tableIdentWithDb)
-      } catch {
-        case _: NoSuchTableException | _: NoSuchDatabaseException =>
-          u
+
+      case u: UnresolvedRelation =>
+        lookupRelation(u.multipartIdentifier).map(resolveViews).getOrElse(u)
+
+      case u @ UnresolvedTable(identifier) =>
+        lookupTableOrView(identifier).map {
+          case v: ResolvedView =>
+            u.failAnalysis(s"${v.identifier.quoted} is a view not table.")
+          case table => table
+        }.getOrElse(u)
+
+      case u @ UnresolvedTableOrView(identifier) =>
+        lookupTableOrView(identifier).getOrElse(u)
+    }
+
+    private def lookupTableOrView(identifier: Seq[String]): Option[LogicalPlan] = {
+      expandRelationName(identifier) match {
+        case SessionCatalogAndIdentifier(catalog, ident) =>
+          CatalogV2Util.loadTable(catalog, ident).map {
+            case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW =>
+              ResolvedView(ident)
+            case table =>
+              ResolvedTable(catalog.asTableCatalog, ident, table)
+          }
+        case _ => None
       }
     }
 
-    // If the database part is specified, and we support running SQL directly on files, and
-    // it's not a temporary view, and the table does not exist, then let's just return the
-    // original UnresolvedRelation. It is possible we are matching a query like "select *
-    // from parquet.`/path/to/query`". The plan will get resolved in the rule `ResolveDataSource`.
-    // Note that we are testing (!db_exists || !table_exists) because the catalog throws
-    // an exception from tableExists if the database does not exist.
-    private def isRunningDirectlyOnFiles(table: TableIdentifier): Boolean = {
-      table.database.isDefined && conf.runSQLonFile && !catalog.isTemporaryTable(table) &&
-        (!catalog.databaseExists(table.database.get) || !catalog.tableExists(table))
+    // Look up a relation from the session catalog with the following logic:
+    // 1) If the resolved catalog is not session catalog, return None.
+    // 2) If a relation is not found in the catalog, return None.
+    // 3) If a v1 table is found, create a v1 relation. Otherwise, create a v2 relation.
+    private def lookupRelation(identifier: Seq[String]): Option[LogicalPlan] = {
+      expandRelationName(identifier) match {
+        case SessionCatalogAndIdentifier(catalog, ident) =>
+          def loaded = CatalogV2Util.loadTable(catalog, ident).map {
+            case v1Table: V1Table =>
+              v1SessionCatalog.getRelation(v1Table.v1Table)
+            case table =>
+              SubqueryAlias(
+                identifier,
+                DataSourceV2Relation.create(table, Some(catalog), Some(ident)))
+          }
+          val key = catalog.name +: ident.namespace :+ ident.name
+          Option(AnalysisContext.get.relationCache.getOrElseUpdate(key, loaded.orNull))
+        case _ => None
+      }
     }
   }
 
   object ResolveInsertInto extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case i @ InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) if i.query.resolved =>
-        lookupV2Relation(u.multipartIdentifier) match {
-          case scala.Left((_, _, Some(v2Table: Table))) =>
-            resolveV2Insert(i, v2Table)
-          case scala.Right(Some(v2Table: Table)) =>
-            resolveV2Insert(i, v2Table)
-          case _ =>
-            InsertIntoTable(i.table, i.partitionSpec, i.query, i.overwrite, i.ifPartitionNotExists)
+      case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _) if i.query.resolved =>
+        // ifPartitionNotExists is append with validation, but validation is not supported
+        if (i.ifPartitionNotExists) {
+          throw new AnalysisException(
+            s"Cannot write, IF NOT EXISTS is not supported for table: ${r.table.name}")
         }
-    }
-
-    private def resolveV2Insert(i: InsertIntoStatement, table: Table): LogicalPlan = {
-      val relation = DataSourceV2Relation.create(table)
-      // ifPartitionNotExists is append with validation, but validation is not supported
-      if (i.ifPartitionNotExists) {
-        throw new AnalysisException(
-          s"Cannot write, IF NOT EXISTS is not supported for table: ${relation.table.name}")
-      }
 
-      val partCols = partitionColumnNames(relation.table)
-      validatePartitionSpec(partCols, i.partitionSpec)
+        val partCols = partitionColumnNames(r.table)
+        validatePartitionSpec(partCols, i.partitionSpec)
 
-      val staticPartitions = i.partitionSpec.filter(_._2.isDefined).mapValues(_.get)
-      val query = addStaticPartitionColumns(relation, i.query, staticPartitions)
-      val dynamicPartitionOverwrite = partCols.size > staticPartitions.size &&
-        conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
+        val staticPartitions = i.partitionSpec.filter(_._2.isDefined).mapValues(_.get)
+        val query = addStaticPartitionColumns(r, i.query, staticPartitions)
+        val dynamicPartitionOverwrite = partCols.size > staticPartitions.size &&
+          conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
 
-      if (!i.overwrite) {
-        AppendData.byPosition(relation, query)
-      } else if (dynamicPartitionOverwrite) {
-        OverwritePartitionsDynamic.byPosition(relation, query)
-      } else {
-        OverwriteByExpression.byPosition(
-          relation, query, staticDeleteExpression(relation, staticPartitions))
-      }
+        if (!i.overwrite) {
+          AppendData.byPosition(r, query)
+        } else if (dynamicPartitionOverwrite) {
+          OverwritePartitionsDynamic.byPosition(r, query)
+        } else {
+          OverwriteByExpression.byPosition(r, query, staticDeleteExpression(r, staticPartitions))
+        }
     }
 
     private def partitionColumnNames(table: Table): Seq[String] = {
@@ -890,95 +1061,6 @@ class Analyzer(
     }
   }
 
-  /**
-   * Resolve ALTER TABLE statements that use a DSv2 catalog.
-   *
-   * This rule converts unresolved ALTER TABLE statements to v2 when a v2 catalog is responsible
-   * for the table identifier. A v2 catalog is responsible for an identifier when the identifier
-   * has a catalog specified, like prod_catalog.db.table, or when a default v2 catalog is set and
-   * the table identifier does not include a catalog.
-   */
-  object ResolveAlterTable extends Rule[LogicalPlan] {
-    import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case alter @ AlterTableAddColumnsStatement(tableName, cols) =>
-        val changes = cols.map { col =>
-          TableChange.addColumn(col.name.toArray, col.dataType, true, col.comment.orNull)
-        }
-        resolveV2Alter(tableName, changes).getOrElse(alter)
-
-      case alter @ AlterTableAlterColumnStatement(tableName, colName, dataType, comment) =>
-        val typeChange = dataType.map { newDataType =>
-          TableChange.updateColumnType(colName.toArray, newDataType, true)
-        }
-
-        val commentChange = comment.map { newComment =>
-          TableChange.updateColumnComment(colName.toArray, newComment)
-        }
-
-        resolveV2Alter(tableName, typeChange.toSeq ++ commentChange.toSeq).getOrElse(alter)
-
-      case alter @ AlterTableRenameColumnStatement(tableName, col, newName) =>
-        val changes = Seq(TableChange.renameColumn(col.toArray, newName))
-        resolveV2Alter(tableName, changes).getOrElse(alter)
-
-      case alter @ AlterTableDropColumnsStatement(tableName, cols) =>
-        val changes = cols.map(col => TableChange.deleteColumn(col.toArray))
-        resolveV2Alter(tableName, changes).getOrElse(alter)
-
-      case alter @ AlterTableSetPropertiesStatement(tableName, props) =>
-        val changes = props.map { case (key, value) =>
-          TableChange.setProperty(key, value)
-        }
-
-        resolveV2Alter(tableName, changes.toSeq).getOrElse(alter)
-
-      case alter @ AlterTableUnsetPropertiesStatement(tableName, keys, _) =>
-        resolveV2Alter(tableName, keys.map(key => TableChange.removeProperty(key))).getOrElse(alter)
-
-      case alter @ AlterTableSetLocationStatement(tableName, newLoc) =>
-        resolveV2Alter(tableName, Seq(TableChange.setProperty("location", newLoc))).getOrElse(alter)
-    }
-
-    private def resolveV2Alter(
-        tableName: Seq[String],
-        changes: Seq[TableChange]): Option[AlterTable] = {
-      lookupV2Relation(tableName) match {
-        case scala.Left((v2Catalog, ident, tableOpt)) =>
-          Some(AlterTable(
-            v2Catalog.asTableCatalog,
-            ident,
-            tableOpt.map(DataSourceV2Relation.create).getOrElse(UnresolvedRelation(tableName)),
-            changes
-          ))
-        case scala.Right(tableOpt) =>
-          tableOpt.map { table =>
-            AlterTable(
-              sessionCatalog.get.asTableCatalog, // table being resolved means this exists
-              Identifier.of(tableName.init.toArray, tableName.last),
-              DataSourceV2Relation.create(table),
-              changes
-            )
-          }
-      }
-    }
-  }
-  /**
-   * Resolve DESCRIBE TABLE statements that use a DSv2 catalog.
-   *
-   * This rule converts unresolved DESCRIBE TABLE statements to v2 when a v2 catalog is responsible
-   * for the table identifier. A v2 catalog is responsible for an identifier when the identifier
-   * has a catalog specified, like prod_catalog.db.table, or when a default v2 catalog is set and
-   * the table identifier does not include a catalog.
-   */
-  object ResolveDescribeTable extends Rule[LogicalPlan] {
-    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case describe @ DescribeTableStatement(
-          CatalogObjectIdentifier(Some(v2Catalog), ident), _, isExtended) =>
-        DescribeTable(UnresolvedRelation(describe.tableName), isExtended)
-    }
-  }
-
   /**
    * Replaces [[UnresolvedAttribute]]s with concrete [[AttributeReference]]s from
    * a logical plan node's children.
@@ -993,61 +1075,90 @@ class Analyzer(
       logDebug(s"Conflicting attributes ${conflictingAttributes.mkString(",")} " +
         s"between $left and $right")
 
-      right.collect {
+      /**
+       * For LogicalPlan likes MultiInstanceRelation, Project, Aggregate, etc, whose output doesn't
+       * inherit directly from its children, we could just stop collect on it. Because we could
+       * always replace all the lower conflict attributes with the new attributes from the new
+       * plan. Theoretically, we should do recursively collect for Generate and Window but we leave
+       * it to the next batch to reduce possible overhead because this should be a corner case.
+       */
+      def collectConflictPlans(plan: LogicalPlan): Seq[(LogicalPlan, LogicalPlan)] = plan match {
         // Handle base relations that might appear more than once.
         case oldVersion: MultiInstanceRelation
             if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
           val newVersion = oldVersion.newInstance()
-          (oldVersion, newVersion)
+          Seq((oldVersion, newVersion))
 
         case oldVersion: SerializeFromObject
             if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
-          (oldVersion, oldVersion.copy(serializer = oldVersion.serializer.map(_.newInstance())))
+          Seq((oldVersion, oldVersion.copy(
+            serializer = oldVersion.serializer.map(_.newInstance()))))
 
         // Handle projects that create conflicting aliases.
         case oldVersion @ Project(projectList, _)
             if findAliases(projectList).intersect(conflictingAttributes).nonEmpty =>
-          (oldVersion, oldVersion.copy(projectList = newAliases(projectList)))
+          Seq((oldVersion, oldVersion.copy(projectList = newAliases(projectList))))
 
         case oldVersion @ Aggregate(_, aggregateExpressions, _)
             if findAliases(aggregateExpressions).intersect(conflictingAttributes).nonEmpty =>
-          (oldVersion, oldVersion.copy(aggregateExpressions = newAliases(aggregateExpressions)))
+          Seq((oldVersion, oldVersion.copy(
+            aggregateExpressions = newAliases(aggregateExpressions))))
 
         case oldVersion @ FlatMapGroupsInPandas(_, _, output, _)
             if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
-          (oldVersion, oldVersion.copy(output = output.map(_.newInstance())))
+          Seq((oldVersion, oldVersion.copy(output = output.map(_.newInstance()))))
 
         case oldVersion: Generate
             if oldVersion.producedAttributes.intersect(conflictingAttributes).nonEmpty =>
           val newOutput = oldVersion.generatorOutput.map(_.newInstance())
-          (oldVersion, oldVersion.copy(generatorOutput = newOutput))
+          Seq((oldVersion, oldVersion.copy(generatorOutput = newOutput)))
+
+        case oldVersion: Expand
+            if oldVersion.producedAttributes.intersect(conflictingAttributes).nonEmpty =>
+          val producedAttributes = oldVersion.producedAttributes
+          val newOutput = oldVersion.output.map { attr =>
+            if (producedAttributes.contains(attr)) {
+              attr.newInstance()
+            } else {
+              attr
+            }
+          }
+          Seq((oldVersion, oldVersion.copy(output = newOutput)))
 
         case oldVersion @ Window(windowExpressions, _, _, child)
             if AttributeSet(windowExpressions.map(_.toAttribute)).intersect(conflictingAttributes)
-              .nonEmpty =>
-          (oldVersion, oldVersion.copy(windowExpressions = newAliases(windowExpressions)))
+            .nonEmpty =>
+          Seq((oldVersion, oldVersion.copy(windowExpressions = newAliases(windowExpressions))))
+
+        case _ => plan.children.flatMap(collectConflictPlans)
       }
-        // Only handle first case, others will be fixed on the next pass.
-        .headOption match {
-        case None =>
-          /*
-           * No result implies that there is a logical plan node that produces new references
-           * that this rule cannot handle. When that is the case, there must be another rule
-           * that resolves these conflicts. Otherwise, the analysis will fail.
-           */
-          right
-        case Some((oldRelation, newRelation)) =>
-          val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output))
-          right transformUp {
-            case r if r == oldRelation => newRelation
-          } transformUp {
-            case other => other transformExpressions {
-              case a: Attribute =>
-                dedupAttr(a, attributeRewrites)
-              case s: SubqueryExpression =>
-                s.withNewPlan(dedupOuterReferencesInSubquery(s.plan, attributeRewrites))
-            }
+
+      val conflictPlans = collectConflictPlans(right)
+
+      /*
+       * Note that it's possible `conflictPlans` can be empty which implies that there
+       * is a logical plan node that produces new references that this rule cannot handle.
+       * When that is the case, there must be another rule that resolves these conflicts.
+       * Otherwise, the analysis will fail.
+       */
+      if (conflictPlans.isEmpty) {
+        right
+      } else {
+        val attributeRewrites = AttributeMap(conflictPlans.flatMap {
+          case (oldRelation, newRelation) => oldRelation.output.zip(newRelation.output)})
+        val conflictPlanMap = conflictPlans.toMap
+        // transformDown so that we can replace all the old Relations in one turn due to
+        // the reason that `conflictPlans` are also collected in pre-order.
+        right transformDown {
+          case r => conflictPlanMap.getOrElse(r, r)
+        } transformUp {
+          case other => other transformExpressions {
+            case a: Attribute =>
+              dedupAttr(a, attributeRewrites)
+            case s: SubqueryExpression =>
+              s.withNewPlan(dedupOuterReferencesInSubquery(s.plan, attributeRewrites))
           }
+        }
       }
     }
 
@@ -1171,6 +1282,12 @@ class Analyzer(
       // To resolve duplicate expression IDs for Join and Intersect
       case j @ Join(left, right, _, _, _) if !j.duplicateResolved =>
         j.copy(right = dedupRight(left, right))
+      case f @ FlatMapCoGroupsInPandas(leftAttributes, rightAttributes, _, _, left, right) =>
+        val leftRes = leftAttributes
+          .map(x => resolveExpressionBottomUp(x, left).asInstanceOf[Attribute])
+        val rightRes = rightAttributes
+          .map(x => resolveExpressionBottomUp(x, right).asInstanceOf[Attribute])
+        f.copy(leftAttributes = leftRes, rightAttributes = rightRes)
       // intersect/except will be rewritten to join at the begininng of optimizer. Here we need to
       // deduplicate the right side plan, so that we won't produce an invalid self-join later.
       case i @ Intersect(left, right, _) if !i.duplicateResolved =>
@@ -1227,11 +1344,81 @@ class Analyzer(
         // table by ResolveOutputRelation. that rule will alias the attributes to the table's names.
         o
 
+      case m @ MergeIntoTable(targetTable, sourceTable, _, _, _)
+        if !m.resolved && targetTable.resolved && sourceTable.resolved =>
+
+        EliminateSubqueryAliases(targetTable) match {
+          case r: NamedRelation if r.skipSchemaResolution =>
+            // Do not resolve the expression if the target table accepts any schema.
+            // This allows data sources to customize their own resolution logic using
+            // custom resolution rules.
+            m
+
+          case _ =>
+            val newMatchedActions = m.matchedActions.map {
+              case DeleteAction(deleteCondition) =>
+                val resolvedDeleteCondition = deleteCondition.map(resolveExpressionTopDown(_, m))
+                DeleteAction(resolvedDeleteCondition)
+              case UpdateAction(updateCondition, assignments) =>
+                val resolvedUpdateCondition = updateCondition.map(resolveExpressionTopDown(_, m))
+                // The update value can access columns from both target and source tables.
+                UpdateAction(
+                  resolvedUpdateCondition,
+                  resolveAssignments(assignments, m, resolveValuesWithSourceOnly = false))
+              case o => o
+            }
+            val newNotMatchedActions = m.notMatchedActions.map {
+              case InsertAction(insertCondition, assignments) =>
+                // The insert action is used when not matched, so its condition and value can only
+                // access columns from the source table.
+                val resolvedInsertCondition =
+                  insertCondition.map(resolveExpressionTopDown(_, Project(Nil, m.sourceTable)))
+                InsertAction(
+                  resolvedInsertCondition,
+                  resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true))
+              case o => o
+            }
+            val resolvedMergeCondition = resolveExpressionTopDown(m.mergeCondition, m)
+            m.copy(mergeCondition = resolvedMergeCondition,
+              matchedActions = newMatchedActions,
+              notMatchedActions = newNotMatchedActions)
+        }
+
       case q: LogicalPlan =>
         logTrace(s"Attempting to resolve ${q.simpleString(SQLConf.get.maxToStringFields)}")
         q.mapExpressions(resolveExpressionTopDown(_, q))
     }
 
+    def resolveAssignments(
+        assignments: Seq[Assignment],
+        mergeInto: MergeIntoTable,
+        resolveValuesWithSourceOnly: Boolean): Seq[Assignment] = {
+      if (assignments.isEmpty) {
+        val expandedColumns = mergeInto.targetTable.output
+        val expandedValues = mergeInto.sourceTable.output
+        expandedColumns.zip(expandedValues).map(kv => Assignment(kv._1, kv._2))
+      } else {
+        assignments.map { assign =>
+          val resolvedKey = assign.key match {
+            case c if !c.resolved =>
+              resolveExpressionTopDown(c, Project(Nil, mergeInto.targetTable))
+            case o => o
+          }
+          val resolvedValue = assign.value match {
+            // The update values may contain target and/or source references.
+            case c if !c.resolved =>
+              if (resolveValuesWithSourceOnly) {
+                resolveExpressionTopDown(c, Project(Nil, mergeInto.sourceTable))
+              } else {
+                resolveExpressionTopDown(c, mergeInto)
+              }
+            case o => o
+          }
+          Assignment(resolvedKey, resolvedValue)
+        }
+      }
+    }
+
     def newAliases(expressions: Seq[NamedExpression]): Seq[NamedExpression] = {
       expressions.map {
         case a: Alias => Alias(a.child, a.name)()
@@ -1270,8 +1457,8 @@ class Analyzer(
      */
     def expandStarExpression(expr: Expression, child: LogicalPlan): Expression = {
       expr.transformUp {
-        case f1: UnresolvedFunction if containsStar(f1.children) =>
-          f1.copy(children = f1.children.flatMap {
+        case f1: UnresolvedFunction if containsStar(f1.arguments) =>
+          f1.copy(arguments = f1.arguments.flatMap {
             case s: Star => s.expand(child, resolver)
             case o => o :: Nil
           })
@@ -1569,13 +1756,14 @@ class Analyzer(
       plan.resolveExpressions {
         case f: UnresolvedFunction
           if externalFunctionNameSet.contains(normalizeFuncName(f.name)) => f
-        case f: UnresolvedFunction if catalog.isRegisteredFunction(f.name) => f
-        case f: UnresolvedFunction if catalog.isPersistentFunction(f.name) =>
+        case f: UnresolvedFunction if v1SessionCatalog.isRegisteredFunction(f.name) => f
+        case f: UnresolvedFunction if v1SessionCatalog.isPersistentFunction(f.name) =>
           externalFunctionNameSet.add(normalizeFuncName(f.name))
           f
         case f: UnresolvedFunction =>
           withPosition(f) {
-            throw new NoSuchFunctionException(f.name.database.getOrElse(catalog.getCurrentDatabase),
+            throw new NoSuchFunctionException(
+              f.name.database.getOrElse(v1SessionCatalog.getCurrentDatabase),
               f.name.funcName)
           }
       }
@@ -1590,7 +1778,7 @@ class Analyzer(
 
       val databaseName = name.database match {
         case Some(a) => formatDatabaseName(a)
-        case None => catalog.getCurrentDatabase
+        case None => v1SessionCatalog.getCurrentDatabase
       }
 
       FunctionIdentifier(funcName, Some(databaseName))
@@ -1615,33 +1803,44 @@ class Analyzer(
             }
           case u @ UnresolvedGenerator(name, children) =>
             withPosition(u) {
-              catalog.lookupFunction(name, children) match {
+              v1SessionCatalog.lookupFunction(name, children) match {
                 case generator: Generator => generator
                 case other =>
                   failAnalysis(s"$name is expected to be a generator. However, " +
                     s"its class is ${other.getClass.getCanonicalName}, which is not a generator.")
               }
             }
-          case u @ UnresolvedFunction(funcId, children, isDistinct) =>
+          case u @ UnresolvedFunction(funcId, arguments, isDistinct, filter) =>
             withPosition(u) {
-              catalog.lookupFunction(funcId, children) match {
+              v1SessionCatalog.lookupFunction(funcId, arguments) match {
                 // AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
                 // the context of a Window clause. They do not need to be wrapped in an
                 // AggregateExpression.
                 case wf: AggregateWindowFunction =>
-                  if (isDistinct) {
-                    failAnalysis(
-                      s"DISTINCT specified, but ${wf.prettyName} is not an aggregate function")
+                  if (isDistinct || filter.isDefined) {
+                    failAnalysis("DISTINCT or FILTER specified, " +
+                      s"but ${wf.prettyName} is not an aggregate function")
                   } else {
                     wf
                   }
                 // We get an aggregate function, we need to wrap it in an AggregateExpression.
-                case agg: AggregateFunction => AggregateExpression(agg, Complete, isDistinct)
+                case agg: AggregateFunction =>
+                  // TODO: SPARK-30276 Support Filter expression allows simultaneous use of DISTINCT
+                  if (filter.isDefined) {
+                    if (isDistinct) {
+                      failAnalysis("DISTINCT and FILTER cannot be used in aggregate functions " +
+                        "at the same time")
+                    } else if (!filter.get.deterministic) {
+                      failAnalysis("FILTER expression is non-deterministic, " +
+                        "it cannot be used in aggregate functions")
+                    }
+                  }
+                  AggregateExpression(agg, Complete, isDistinct, filter)
                 // This function is not an aggregate function, just return the resolved one.
                 case other =>
-                  if (isDistinct) {
-                    failAnalysis(
-                      s"DISTINCT specified, but ${other.prettyName} is not an aggregate function")
+                  if (isDistinct || filter.isDefined) {
+                    failAnalysis("DISTINCT or FILTER specified, " +
+                      s"but ${other.prettyName} is not an aggregate function")
                   } else {
                     other
                   }
@@ -1753,8 +1952,10 @@ class Analyzer(
       // Only a few unary nodes (Project/Filter/Aggregate) can contain subqueries.
       case q: UnaryNode if q.childrenResolved =>
         resolveSubQueries(q, q.children)
-      case d: DeleteFromTable if d.childrenResolved =>
-        resolveSubQueries(d, d.children)
+      case j: Join if j.childrenResolved =>
+        resolveSubQueries(j, Seq(j, j.left, j.right))
+      case s: SupportsSubquery if s.childrenResolved =>
+        resolveSubQueries(s, s.children)
     }
   }
 
@@ -2235,9 +2436,13 @@ class Analyzer(
             }
             wsc.copy(partitionSpec = newPartitionSpec, orderSpec = newOrderSpec)
 
+          case WindowExpression(ae: AggregateExpression, _) if ae.filter.isDefined =>
+            failAnalysis(
+              "window aggregate function with filter predicate is not supported yet.")
+
           // Extract Windowed AggregateExpression
           case we @ WindowExpression(
-              ae @ AggregateExpression(function, _, _, _),
+              ae @ AggregateExpression(function, _, _, _, _),
               spec: WindowSpecDefinition) =>
             val newChildren = function.children.map(extractExpr)
             val newFunction = function.withNewChildren(newChildren).asInstanceOf[AggregateFunction]
@@ -2245,7 +2450,7 @@ class Analyzer(
             seenWindowAggregates += newAgg
             WindowExpression(newAgg, spec)
 
-          case AggregateExpression(aggFunc, _, _, _) if hasWindowFunction(aggFunc.children) =>
+          case AggregateExpression(aggFunc, _, _, _, _) if hasWindowFunction(aggFunc.children) =>
             failAnalysis("It is not allowed to use a window function inside an aggregate " +
               "function. Please use the inner window function in a sub-query.")
 
@@ -2407,6 +2612,10 @@ class Analyzer(
           nondeterToAttr.get(e).map(_.toAttribute).getOrElse(e)
         }.copy(child = newChild)
 
+      // Don't touch collect metrics. Top-level metrics are not supported (check analysis will fail)
+      // and we want to retain them inside the aggregate functions.
+      case m: CollectMetrics => m
+
       // todo: It's hard to write a general rule to pull out nondeterministic expressions
       // from LogicalPlan, currently we only do it for UnaryNode which has same output
       // schema with its child.
@@ -2558,11 +2767,11 @@ class Analyzer(
    */
   object ResolveOutputRelation extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
-      case append @ AppendData(table, query, isByName)
+      case append @ AppendData(table, query, _, isByName)
           if table.resolved && query.resolved && !append.outputResolved =>
+        validateStoreAssignmentPolicy()
         val projection =
-          TableOutputResolver.resolveOutputColumns(
-            table.name, table.output, query, isByName, conf, storeAssignmentPolicy)
+          TableOutputResolver.resolveOutputColumns(table.name, table.output, query, isByName, conf)
 
         if (projection != query) {
           append.copy(query = projection)
@@ -2570,11 +2779,11 @@ class Analyzer(
           append
         }
 
-      case overwrite @ OverwriteByExpression(table, _, query, isByName)
+      case overwrite @ OverwriteByExpression(table, _, query, _, isByName)
           if table.resolved && query.resolved && !overwrite.outputResolved =>
+        validateStoreAssignmentPolicy()
         val projection =
-          TableOutputResolver.resolveOutputColumns(
-            table.name, table.output, query, isByName, conf, storeAssignmentPolicy)
+          TableOutputResolver.resolveOutputColumns(table.name, table.output, query, isByName, conf)
 
         if (projection != query) {
           overwrite.copy(query = projection)
@@ -2582,11 +2791,11 @@ class Analyzer(
           overwrite
         }
 
-      case overwrite @ OverwritePartitionsDynamic(table, query, isByName)
+      case overwrite @ OverwritePartitionsDynamic(table, query, _, isByName)
           if table.resolved && query.resolved && !overwrite.outputResolved =>
+        validateStoreAssignmentPolicy()
         val projection =
-          TableOutputResolver.resolveOutputColumns(
-            table.name, table.output, query, isByName, conf, storeAssignmentPolicy)
+          TableOutputResolver.resolveOutputColumns(table.name, table.output, query, isByName, conf)
 
         if (projection != query) {
           overwrite.copy(query = projection)
@@ -2596,16 +2805,14 @@ class Analyzer(
     }
   }
 
-  private def storeAssignmentPolicy: StoreAssignmentPolicy.Value = {
-    val policy = conf.storeAssignmentPolicy.getOrElse(StoreAssignmentPolicy.STRICT)
+  private def validateStoreAssignmentPolicy(): Unit = {
     // SPARK-28730: LEGACY store assignment policy is disallowed in data source v2.
-    if (policy == StoreAssignmentPolicy.LEGACY) {
+    if (conf.storeAssignmentPolicy == StoreAssignmentPolicy.LEGACY) {
       val configKey = SQLConf.STORE_ASSIGNMENT_POLICY.key
       throw new AnalysisException(s"""
         |"LEGACY" store assignment policy is disallowed in Spark data source V2.
         |Please set the configuration $configKey to other values.""".stripMargin)
     }
-    policy
   }
 
   private def commonNaturalJoinProcessing(
@@ -2812,36 +3019,157 @@ class Analyzer(
     }
   }
 
-  /**
-   * Performs the lookup of DataSourceV2 Tables. The order of resolution is:
-   *   1. Check if this relation is a temporary table
-   *   2. Check if it has a catalog identifier. Here we try to load the table. If we find the table,
-   *      we can return the table. The result returned by an explicit catalog will be returned on
-   *      the Left projection of the Either.
-   *   3. Try resolving the relation using the V2SessionCatalog if that is defined. If the
-   *      V2SessionCatalog returns a V1 table definition (UnresolvedTable), then we return a `None`
-   *      on the right side so that we can fallback to the V1 code paths.
-   * The basic idea is, if a value is returned on the Left, it means a v2 catalog is defined and
-   * must be used to resolve the table. If a value is returned on the right, then we can try
-   * creating a V2 relation if a V2 Table is defined. If it isn't defined, then we should defer
-   * to V1 code paths.
-   */
-  private def lookupV2Relation(
-      identifier: Seq[String]
-      ): Either[(CatalogPlugin, Identifier, Option[Table]), Option[Table]] = {
-    import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util._
-
-    identifier match {
-      case AsTemporaryViewIdentifier(ti) if catalog.isTemporaryTable(ti) =>
-        scala.Right(None)
-      case CatalogObjectIdentifier(Some(v2Catalog), ident) =>
-        scala.Left((v2Catalog, ident, loadTable(v2Catalog, ident)))
-      case CatalogObjectIdentifier(None, ident) =>
-        catalogManager.v2SessionCatalog.flatMap(loadTable(_, ident)) match {
-          case Some(_: V1Table) => scala.Right(None)
-          case other => scala.Right(other)
+  /** Rule to mostly resolve, normalize and rewrite column names based on case sensitivity. */
+  object ResolveAlterTableChanges extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+      case a @ AlterTable(_, _, t: NamedRelation, changes) if t.resolved =>
+        val schema = t.schema
+        val normalizedChanges = changes.flatMap {
+          case add: AddColumn =>
+            val parent = add.fieldNames().init
+            if (parent.nonEmpty) {
+              // Adding a nested field, need to normalize the parent column and position
+              val target = schema.findNestedField(parent, includeCollections = true, conf.resolver)
+              if (target.isEmpty) {
+                // Leave unresolved. Throws error in CheckAnalysis
+                Some(add)
+              } else {
+                val (normalizedName, sf) = target.get
+                sf.dataType match {
+                  case struct: StructType =>
+                    val pos = findColumnPosition(add.position(), parent.quoted, struct)
+                    Some(TableChange.addColumn(
+                      (normalizedName ++ Seq(sf.name, add.fieldNames().last)).toArray,
+                      add.dataType(),
+                      add.isNullable,
+                      add.comment,
+                      pos))
+
+                  case other =>
+                    Some(add)
+                }
+              }
+            } else {
+              // Adding to the root. Just need to normalize position
+              val pos = findColumnPosition(add.position(), "root", schema)
+              Some(TableChange.addColumn(
+                add.fieldNames(),
+                add.dataType(),
+                add.isNullable,
+                add.comment,
+                pos))
+            }
+
+          case typeChange: UpdateColumnType =>
+            // Hive style syntax provides the column type, even if it may not have changed
+            val fieldOpt = schema.findNestedField(
+              typeChange.fieldNames(), includeCollections = true, conf.resolver)
+
+            if (fieldOpt.isEmpty) {
+              // We couldn't resolve the field. Leave it to CheckAnalysis
+              Some(typeChange)
+            } else {
+              val (fieldNames, field) = fieldOpt.get
+              if (field.dataType == typeChange.newDataType()) {
+                // The user didn't want the field to change, so remove this change
+                None
+              } else {
+                Some(TableChange.updateColumnType(
+                  (fieldNames :+ field.name).toArray, typeChange.newDataType()))
+              }
+            }
+          case n: UpdateColumnNullability =>
+            // Need to resolve column
+            resolveFieldNames(
+              schema,
+              n.fieldNames(),
+              TableChange.updateColumnNullability(_, n.nullable())).orElse(Some(n))
+
+          case position: UpdateColumnPosition =>
+            position.position() match {
+              case after: After =>
+                // Need to resolve column as well as position reference
+                val fieldOpt = schema.findNestedField(
+                  position.fieldNames(), includeCollections = true, conf.resolver)
+
+                if (fieldOpt.isEmpty) {
+                  Some(position)
+                } else {
+                  val (normalizedPath, field) = fieldOpt.get
+                  val targetCol = schema.findNestedField(
+                    normalizedPath :+ after.column(), includeCollections = true, conf.resolver)
+                  if (targetCol.isEmpty) {
+                    // Leave unchanged to CheckAnalysis
+                    Some(position)
+                  } else {
+                    Some(TableChange.updateColumnPosition(
+                      (normalizedPath :+ field.name).toArray,
+                      ColumnPosition.after(targetCol.get._2.name)))
+                  }
+                }
+              case _ =>
+                // Need to resolve column
+                resolveFieldNames(
+                  schema,
+                  position.fieldNames(),
+                  TableChange.updateColumnPosition(_, position.position())).orElse(Some(position))
+            }
+
+          case comment: UpdateColumnComment =>
+            resolveFieldNames(
+              schema,
+              comment.fieldNames(),
+              TableChange.updateColumnComment(_, comment.newComment())).orElse(Some(comment))
+
+          case rename: RenameColumn =>
+            resolveFieldNames(
+              schema,
+              rename.fieldNames(),
+              TableChange.renameColumn(_, rename.newName())).orElse(Some(rename))
+
+          case delete: DeleteColumn =>
+            resolveFieldNames(schema, delete.fieldNames(), TableChange.deleteColumn)
+              .orElse(Some(delete))
+
+          case column: ColumnChange =>
+            // This is informational for future developers
+            throw new UnsupportedOperationException(
+              "Please add an implementation for a column change here")
+          case other => Some(other)
         }
-      case _ => scala.Right(None)
+
+        a.copy(changes = normalizedChanges)
+    }
+
+    /**
+     * Returns the table change if the field can be resolved, returns None if the column is not
+     * found. An error will be thrown in CheckAnalysis for columns that can't be resolved.
+     */
+    private def resolveFieldNames(
+        schema: StructType,
+        fieldNames: Array[String],
+        copy: Array[String] => TableChange): Option[TableChange] = {
+      val fieldOpt = schema.findNestedField(
+        fieldNames, includeCollections = true, conf.resolver)
+      fieldOpt.map { case (path, field) => copy((path :+ field.name).toArray) }
+    }
+
+    private def findColumnPosition(
+        position: ColumnPosition,
+        field: String,
+        struct: StructType): ColumnPosition = {
+      position match {
+        case null => null
+        case after: After =>
+          struct.fieldNames.find(n => conf.resolver(n, after.column())) match {
+            case Some(colName) =>
+              ColumnPosition.after(colName)
+            case None =>
+              throw new AnalysisException("Couldn't find the reference column for " +
+                s"$after at $field")
+          }
+        case other => other
+      }
     }
   }
 }
@@ -2911,6 +3239,12 @@ object CleanupAliases extends Rule[LogicalPlan] {
       Window(cleanedWindowExprs, partitionSpec.map(trimAliases),
         orderSpec.map(trimAliases(_).asInstanceOf[SortOrder]), child)
 
+    case CollectMetrics(name, metrics, child) =>
+      val cleanedMetrics = metrics.map {
+        e => trimNonTopLevelAliases(e).asInstanceOf[NamedExpression]
+      }
+      CollectMetrics(name, cleanedMetrics, child)
+
     // Operators that operate on objects should only have expressions from encoders, which should
     // never have extra aliases.
     case o: ObjectConsumer => o
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
index 60e6bf8db06d7..d2be15d87d023 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, With}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -28,10 +29,54 @@ import org.apache.spark.sql.internal.SQLConf.LEGACY_CTE_PRECEDENCE_ENABLED
  */
 object CTESubstitution extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-    if (SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_ENABLED)) {
+    val isLegacy = SQLConf.get.getConf(LEGACY_CTE_PRECEDENCE_ENABLED)
+    if (isLegacy.isEmpty) {
+      assertNoNameConflictsInCTE(plan, inTraverse = false)
+      traverseAndSubstituteCTE(plan, inTraverse = false)
+    } else if (isLegacy.get) {
       legacyTraverseAndSubstituteCTE(plan)
     } else {
-      traverseAndSubstituteCTE(plan, false)
+      traverseAndSubstituteCTE(plan, inTraverse = false)
+    }
+  }
+
+  /**
+   * Check the plan to be traversed has naming conflicts in nested CTE or not, traverse through
+   * child, innerChildren and subquery for the current plan.
+   */
+  private def assertNoNameConflictsInCTE(
+      plan: LogicalPlan,
+      inTraverse: Boolean,
+      cteNames: Set[String] = Set.empty): Unit = {
+    plan.foreach {
+      case w @ With(child, relations) =>
+        val newNames = relations.map {
+          case (cteName, _) =>
+            if (cteNames.contains(cteName)) {
+              throw new AnalysisException(s"Name $cteName is ambiguous in nested CTE. " +
+                s"Please set ${LEGACY_CTE_PRECEDENCE_ENABLED.key} to false so that name defined " +
+                "in inner CTE takes precedence. See more details in SPARK-28228.")
+            } else {
+              cteName
+            }
+        }.toSet
+        child.transformExpressions {
+          case e: SubqueryExpression =>
+            assertNoNameConflictsInCTE(e.plan, inTraverse = true, cteNames ++ newNames)
+            e
+        }
+        w.innerChildren.foreach { p =>
+          assertNoNameConflictsInCTE(p, inTraverse = true, cteNames ++ newNames)
+        }
+
+      case other if inTraverse =>
+        other.transformExpressions {
+          case e: SubqueryExpression =>
+            assertNoNameConflictsInCTE(e.plan, inTraverse = true, cteNames)
+            e
+        }
+
+      case _ =>
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
index 3036f7c21093f..123d8aff25fb9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
@@ -19,7 +19,7 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.Identifier
+import org.apache.spark.sql.connector.catalog.Identifier
 
 class CannotReplaceMissingTableException(
     tableIdentifier: Identifier,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 30e0117423f75..67c509ed98245 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -14,19 +14,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.api.python.PythonEvalType
+import scala.collection.mutable
+
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnType}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.sql.AlterTableStatement
+import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -35,7 +35,9 @@ import org.apache.spark.sql.types._
  */
 trait CheckAnalysis extends PredicateHelper {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  protected def isView(nameParts: Seq[String]): Boolean
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   /**
    * Override to provide additional checks for correct analysis.
@@ -65,20 +67,20 @@ trait CheckAnalysis extends PredicateHelper {
     case _ => None
   }
 
-  private def checkLimitClause(limitExpr: Expression): Unit = {
+  private def checkLimitLikeClause(name: String, limitExpr: Expression): Unit = {
     limitExpr match {
       case e if !e.foldable => failAnalysis(
-        "The limit expression must evaluate to a constant value, but got " +
+        s"The $name expression must evaluate to a constant value, but got " +
           limitExpr.sql)
       case e if e.dataType != IntegerType => failAnalysis(
-        s"The limit expression must be integer type, but got " +
+        s"The $name expression must be integer type, but got " +
           e.dataType.catalogString)
       case e =>
         e.eval() match {
           case null => failAnalysis(
-            s"The evaluated limit expression must not be null, but got ${limitExpr.sql}")
+            s"The evaluated $name expression must not be null, but got ${limitExpr.sql}")
           case v: Int if v < 0 => failAnalysis(
-            s"The limit expression must be equal to or greater than 0, but got $v")
+            s"The $name expression must be equal to or greater than 0, but got $v")
           case _ => // OK
         }
     }
@@ -91,9 +93,35 @@ trait CheckAnalysis extends PredicateHelper {
 
       case p if p.analyzed => // Skip already analyzed sub-plans
 
+      case u: UnresolvedNamespace =>
+        u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}")
+
+      case u: UnresolvedTable =>
+        u.failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
+
+      case u: UnresolvedTableOrView =>
+        u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
+
       case u: UnresolvedRelation =>
         u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
 
+      case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) =>
+        failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
+
+      case u: UnresolvedV2Relation if isView(u.originalNameParts) =>
+        u.failAnalysis(
+          s"Invalid command: '${u.originalNameParts.quoted}' is a view not a table.")
+
+      case u: UnresolvedV2Relation =>
+        u.failAnalysis(s"Table not found: ${u.originalNameParts.quoted}")
+
+      case AlterTable(_, _, u: UnresolvedV2Relation, _) if isView(u.originalNameParts) =>
+        u.failAnalysis(
+          s"Invalid command: '${u.originalNameParts.quoted}' is a view not a table.")
+
+      case AlterTable(_, _, u: UnresolvedV2Relation, _) =>
+        failAnalysis(s"Table not found: ${u.originalNameParts.quoted}")
+
       case operator: LogicalPlan =>
         // Check argument data types of higher-order functions downwards first.
         // If the arguments of the higher-order functions are resolved but the type check fails,
@@ -130,7 +158,7 @@ trait CheckAnalysis extends PredicateHelper {
           case g: GroupingID =>
             failAnalysis("grouping_id() can only be used with GroupingSets/Cube/Rollup")
 
-          case w @ WindowExpression(AggregateExpression(_, _, true, _), _) =>
+          case w @ WindowExpression(AggregateExpression(_, _, true, _, _), _) =>
             failAnalysis(s"Distinct window functions are not supported: $w")
 
           case w @ WindowExpression(_: OffsetWindowFunction,
@@ -256,6 +284,44 @@ trait CheckAnalysis extends PredicateHelper {
             groupingExprs.foreach(checkValidGroupingExprs)
             aggregateExprs.foreach(checkValidAggregateExpression)
 
+          case CollectMetrics(name, metrics, _) =>
+            if (name == null || name.isEmpty) {
+              operator.failAnalysis(s"observed metrics should be named: $operator")
+            }
+            // Check if an expression is a valid metric. A metric must meet the following criteria:
+            // - Is not a window function;
+            // - Is not nested aggregate function;
+            // - Is not a distinct aggregate function;
+            // - Has only non-deterministic functions that are nested inside an aggregate function;
+            // - Has only attributes that are nested inside an aggregate function.
+            def checkMetric(s: Expression, e: Expression, seenAggregate: Boolean = false): Unit = {
+              e match {
+                case _: WindowExpression =>
+                  e.failAnalysis(
+                    "window expressions are not allowed in observed metrics, but found: " + s.sql)
+                case _ if !e.deterministic && !seenAggregate =>
+                  e.failAnalysis(s"non-deterministic expression ${s.sql} can only be used " +
+                    "as an argument to an aggregate function.")
+                case a: AggregateExpression if seenAggregate =>
+                  e.failAnalysis(
+                    "nested aggregates are not allowed in observed metrics, but found: " + s.sql)
+                case a: AggregateExpression if a.isDistinct =>
+                  e.failAnalysis(
+                    "distinct aggregates are not allowed in observed metrics, but found: " + s.sql)
+                case a: AggregateExpression if a.filter.isDefined =>
+                  e.failAnalysis("aggregates with filter predicate are not allowed in " +
+                    "observed metrics, but found: " + s.sql)
+                case _: Attribute if !seenAggregate =>
+                  e.failAnalysis (s"attribute ${s.sql} can only be used as an argument to an " +
+                    "aggregate function.")
+                case _: AggregateExpression =>
+                  e.children.foreach(checkMetric (s, _, seenAggregate = true))
+                case _ =>
+                  e.children.foreach(checkMetric (s, _, seenAggregate))
+              }
+            }
+            metrics.foreach(m => checkMetric(m, m))
+
           case Sort(orders, _, _) =>
             orders.foreach { order =>
               if (!RowOrdering.isOrderable(order.dataType)) {
@@ -264,9 +330,11 @@ trait CheckAnalysis extends PredicateHelper {
               }
             }
 
-          case GlobalLimit(limitExpr, _) => checkLimitClause(limitExpr)
+          case GlobalLimit(limitExpr, _) => checkLimitLikeClause("limit", limitExpr)
+
+          case LocalLimit(limitExpr, _) => checkLimitLikeClause("limit", limitExpr)
 
-          case LocalLimit(limitExpr, _) => checkLimitClause(limitExpr)
+          case Tail(limitExpr, _) => checkLimitLikeClause("tail", limitExpr)
 
           case _: Union | _: SetOperation if operator.children.length > 1 =>
             def dataTypes(plan: LogicalPlan): Seq[DataType] = plan.output.map(_.dataType)
@@ -315,6 +383,11 @@ trait CheckAnalysis extends PredicateHelper {
               failAnalysis(s"Invalid partitioning: ${badReferences.mkString(", ")}")
             }
 
+            create.tableSchema.foreach(f => TypeUtils.failWithIntervalType(f.dataType))
+
+          case write: V2WriteCommand if write.resolved =>
+            write.query.schema.foreach(f => TypeUtils.failWithIntervalType(f.dataType))
+
           // If the view output doesn't have the same number of columns neither with the child
           // output, nor with the query column names, throw an AnalysisException.
           // If the view's child output can't up cast to the view output,
@@ -355,58 +428,114 @@ trait CheckAnalysis extends PredicateHelper {
               case _ =>
             }
 
-          case alter: AlterTableStatement =>
-            alter.failAnalysis(s"Table or view not found: ${alter.tableName.quoted}")
-
-          case alter: AlterTable if alter.childrenResolved =>
+          case alter: AlterTable if alter.table.resolved =>
             val table = alter.table
             def findField(operation: String, fieldName: Array[String]): StructField = {
               // include collections because structs nested in maps and arrays may be altered
               val field = table.schema.findNestedField(fieldName, includeCollections = true)
               if (field.isEmpty) {
-                throw new AnalysisException(
-                  s"Cannot $operation missing field in ${table.name} schema: ${fieldName.quoted}")
+                alter.failAnalysis(
+                  s"Cannot $operation missing field ${fieldName.quoted} in ${table.name} schema: " +
+                  table.schema.treeString)
+              }
+              field.get._2
+            }
+            def positionArgumentExists(position: ColumnPosition, struct: StructType): Unit = {
+              position match {
+                case after: After =>
+                  if (!struct.fieldNames.contains(after.column())) {
+                    alter.failAnalysis(s"Couldn't resolve positional argument $position amongst " +
+                      s"${struct.fieldNames.mkString("[", ", ", "]")}")
+                  }
+                case _ =>
+              }
+            }
+            def findParentStruct(operation: String, fieldNames: Array[String]): StructType = {
+              val parent = fieldNames.init
+              val field = if (parent.nonEmpty) {
+                findField(operation, parent).dataType
+              } else {
+                table.schema
+              }
+              field match {
+                case s: StructType => s
+                case o => alter.failAnalysis(s"Cannot $operation ${fieldNames.quoted}, because " +
+                  s"its parent is not a StructType. Found $o")
+              }
+            }
+            def checkColumnNotExists(
+                operation: String,
+                fieldNames: Array[String],
+                struct: StructType): Unit = {
+              if (struct.findNestedField(fieldNames, includeCollections = true).isDefined) {
+                alter.failAnalysis(s"Cannot $operation column, because ${fieldNames.quoted} " +
+                  s"already exists in ${struct.treeString}")
               }
-              field.get
             }
 
+            val colsToDelete = mutable.Set.empty[Seq[String]]
+
             alter.changes.foreach {
               case add: AddColumn =>
-                val parent = add.fieldNames.init
-                if (parent.nonEmpty) {
-                  findField("add to", parent)
+                // If a column to add is a part of columns to delete, we don't need to check
+                // if column already exists - applies to REPLACE COLUMNS scenario.
+                if (!colsToDelete.contains(add.fieldNames())) {
+                  checkColumnNotExists("add", add.fieldNames(), table.schema)
                 }
+                val parent = findParentStruct("add", add.fieldNames())
+                positionArgumentExists(add.position(), parent)
+                TypeUtils.failWithIntervalType(add.dataType())
               case update: UpdateColumnType =>
                 val field = findField("update", update.fieldNames)
                 val fieldName = update.fieldNames.quoted
                 update.newDataType match {
                   case _: StructType =>
-                    throw new AnalysisException(
-                      s"Cannot update ${table.name} field $fieldName type: " +
-                          s"update a struct by adding, deleting, or updating its fields")
+                    alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " +
+                      s"update a struct by updating its fields")
                   case _: MapType =>
-                    throw new AnalysisException(
-                      s"Cannot update ${table.name} field $fieldName type: " +
-                          s"update a map by updating $fieldName.key or $fieldName.value")
+                    alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " +
+                      s"update a map by updating $fieldName.key or $fieldName.value")
                   case _: ArrayType =>
-                    throw new AnalysisException(
-                      s"Cannot update ${table.name} field $fieldName type: " +
-                          s"update the element by updating $fieldName.element")
-                  case _: AtomicType =>
+                    alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " +
+                      s"update the element by updating $fieldName.element")
+                  case u: UserDefinedType[_] =>
+                    alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " +
+                      s"update a UserDefinedType[${u.sql}] by updating its fields")
+                  case _: CalendarIntervalType =>
+                    alter.failAnalysis(s"Cannot update ${table.name} field $fieldName to " +
+                      s"interval type")
+                  case _ =>
                     // update is okay
                 }
                 if (!Cast.canUpCast(field.dataType, update.newDataType)) {
-                  throw new AnalysisException(
+                  alter.failAnalysis(
                     s"Cannot update ${table.name} field $fieldName: " +
                         s"${field.dataType.simpleString} cannot be cast to " +
                         s"${update.newDataType.simpleString}")
                 }
+              case update: UpdateColumnNullability =>
+                val field = findField("update", update.fieldNames)
+                val fieldName = update.fieldNames.quoted
+                if (!update.nullable && field.nullable) {
+                  alter.failAnalysis(
+                    s"Cannot change nullable column to non-nullable: $fieldName")
+                }
+              case updatePos: UpdateColumnPosition =>
+                findField("update", updatePos.fieldNames)
+                val parent = findParentStruct("update", updatePos.fieldNames())
+                positionArgumentExists(updatePos.position(), parent)
               case rename: RenameColumn =>
                 findField("rename", rename.fieldNames)
+                checkColumnNotExists(
+                  "rename", rename.fieldNames().init :+ rename.newName(), table.schema)
               case update: UpdateColumnComment =>
                 findField("update", update.fieldNames)
               case delete: DeleteColumn =>
                 findField("delete", delete.fieldNames)
+                // REPLACE COLUMNS has deletes followed by adds. Remember the deleted columns
+                // so that add operations do not fail when the columns to add exist and they
+                // are to be deleted.
+                colsToDelete += delete.fieldNames
               case _ =>
               // no validation needed for set and remove property
             }
@@ -491,9 +620,6 @@ trait CheckAnalysis extends PredicateHelper {
             throw new IllegalStateException(
               "Internal error: logical hint operator should have been removed during analysis")
 
-          case InsertIntoTable(u: UnresolvedRelation, _, _, _, _) =>
-            failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
-
           case f @ Filter(condition, _)
             if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty =>
             val invalidExprSqls = PlanHelper.specialExpressionsInUnsupportedOperator(f).map(_.sql)
@@ -516,6 +642,7 @@ trait CheckAnalysis extends PredicateHelper {
           case _ => // Analysis successful!
         }
     }
+    checkCollectedMetrics(plan)
     extendedCheckRules.foreach(_(plan))
     plan.foreachUp {
       case o if !o.resolved =>
@@ -588,19 +715,19 @@ trait CheckAnalysis extends PredicateHelper {
           // Only certain operators are allowed to host subquery expression containing
           // outer references.
           plan match {
-            case _: Filter | _: Aggregate | _: Project | _: DeleteFromTable => // Ok
+            case _: Filter | _: Aggregate | _: Project | _: SupportsSubquery => // Ok
             case other => failAnalysis(
               "Correlated scalar sub-queries can only be used in a " +
-                s"Filter/Aggregate/Project: $plan")
+                s"Filter/Aggregate/Project and a few commands: $plan")
           }
         }
 
       case inSubqueryOrExistsSubquery =>
         plan match {
-          case _: Filter | _: DeleteFromTable => // Ok
+          case _: Filter | _: SupportsSubquery | _: Join => // Ok
           case _ =>
             failAnalysis(s"IN/EXISTS predicate sub-queries can only be used in" +
-                s" Filter/DeleteFromTable: $plan")
+                s" Filter/Join and a few commands: $plan")
         }
     }
 
@@ -609,6 +736,38 @@ trait CheckAnalysis extends PredicateHelper {
     checkCorrelationsInSubquery(expr.plan)
   }
 
+  /**
+   * Validate that collected metrics names are unique. The same name cannot be used for metrics
+   * with different results. However multiple instances of metrics with with same result and name
+   * are allowed (e.g. self-joins).
+   */
+  private def checkCollectedMetrics(plan: LogicalPlan): Unit = {
+    val metricsMap = mutable.Map.empty[String, LogicalPlan]
+    def check(plan: LogicalPlan): Unit = plan.foreach { node =>
+      node match {
+        case metrics @ CollectMetrics(name, _, _) =>
+          metricsMap.get(name) match {
+            case Some(other) =>
+              // Exact duplicates are allowed. They can be the result
+              // of a CTE that is used multiple times or a self join.
+              if (!metrics.sameResult(other)) {
+                failAnalysis(
+                  s"Multiple definitions of observed metrics named '$name': $plan")
+              }
+            case None =>
+              metricsMap.put(name, metrics)
+          }
+        case _ =>
+      }
+      node.expressions.foreach(_.foreach {
+        case subquery: SubqueryExpression =>
+          check(subquery.plan)
+        case _ =>
+      })
+    }
+    check(plan)
+  }
+
   /**
    * Validates to make sure the outer references appearing inside the subquery
    * are allowed.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 856c2ed828002..a64befecb68d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -82,7 +82,7 @@ object DecimalPrecision extends TypeCoercionRule {
     PromotePrecision(Cast(e, dataType))
   }
 
-  private def nullOnOverflow: Boolean = SQLConf.get.decimalOperationsNullOnOverflow
+  private def nullOnOverflow: Boolean = !SQLConf.get.ansiEnabled
 
   override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     // fix decimal precision for expressions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 5177f1e55829e..6c4aee4f58b75 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -22,7 +22,6 @@ import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
 import scala.reflect.ClassTag
-import scala.util.{Failure, Success, Try}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
@@ -31,6 +30,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.xml._
+import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.types._
 
 
@@ -193,6 +193,8 @@ object FunctionRegistry {
 
   type FunctionBuilder = Seq[Expression] => Expression
 
+  val FUNC_ALIAS = TreeNodeTag[String]("functionAliasName")
+
   // Note: Whenever we add a new entry here, make sure we also update ExpressionToSQLSuite
   val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map(
     // misc non-aggregate functions
@@ -289,33 +291,35 @@ object FunctionRegistry {
     expression[CovPopulation]("covar_pop"),
     expression[CovSample]("covar_samp"),
     expression[First]("first"),
-    expression[First]("first_value"),
+    expression[First]("first_value", true),
     expression[Kurtosis]("kurtosis"),
     expression[Last]("last"),
-    expression[Last]("last_value"),
+    expression[Last]("last_value", true),
     expression[Max]("max"),
     expression[MaxBy]("max_by"),
-    expression[Average]("mean"),
+    expression[Average]("mean", true),
     expression[Min]("min"),
     expression[MinBy]("min_by"),
     expression[Percentile]("percentile"),
     expression[Skewness]("skewness"),
     expression[ApproximatePercentile]("percentile_approx"),
-    expression[ApproximatePercentile]("approx_percentile"),
-    expression[StddevSamp]("std"),
-    expression[StddevSamp]("stddev"),
+    expression[ApproximatePercentile]("approx_percentile", true),
+    expression[StddevSamp]("std", true),
+    expression[StddevSamp]("stddev", true),
     expression[StddevPop]("stddev_pop"),
     expression[StddevSamp]("stddev_samp"),
     expression[Sum]("sum"),
-    expression[VarianceSamp]("variance"),
+    expression[VarianceSamp]("variance", true),
     expression[VariancePop]("var_pop"),
     expression[VarianceSamp]("var_samp"),
     expression[CollectList]("collect_list"),
     expression[CollectSet]("collect_set"),
     expression[CountMinSketchAgg]("count_min_sketch"),
-    expression[EveryAgg]("every"),
-    expression[AnyAgg]("any"),
-    expression[SomeAgg]("some"),
+    expression[BoolAnd]("every", true),
+    expression[BoolAnd]("bool_and"),
+    expression[BoolOr]("any", true),
+    expression[BoolOr]("some", true),
+    expression[BoolOr]("bool_or"),
 
     // string functions
     expression[Ascii]("ascii"),
@@ -417,6 +421,8 @@ object FunctionRegistry {
     expression[TimeWindow]("window"),
     expression[MakeDate]("make_date"),
     expression[MakeTimestamp]("make_timestamp"),
+    expression[MakeInterval]("make_interval"),
+    expression[DatePart]("date_part"),
 
     // collection functions
     expression[CreateArray]("array"),
@@ -483,6 +489,8 @@ object FunctionRegistry {
     expression[CurrentDatabase]("current_database"),
     expression[CallMethodViaReflection]("reflect"),
     expression[CallMethodViaReflection]("java_method"),
+    expression[SparkVersion]("version"),
+    expression[TypeOf]("typeof"),
 
     // grouping sets
     expression[Cube]("cube"),
@@ -521,6 +529,10 @@ object FunctionRegistry {
     expression[BitwiseNot]("~"),
     expression[BitwiseOr]("|"),
     expression[BitwiseXor]("^"),
+    expression[BitwiseCount]("bit_count"),
+    expression[BitAndAgg]("bit_and"),
+    expression[BitOrAgg]("bit_or"),
+    expression[BitXorAgg]("bit_xor"),
 
     // json
     expression[StructsToJson]("to_json"),
@@ -560,7 +572,7 @@ object FunctionRegistry {
   val functionSet: Set[FunctionIdentifier] = builtin.listFunction().toSet
 
   /** See usage above. */
-  private def expression[T <: Expression](name: String)
+  private def expression[T <: Expression](name: String, setAlias: Boolean = false)
       (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
 
     // For `RuntimeReplaceable`, skip the constructor with most arguments, which is the main
@@ -577,12 +589,12 @@ object FunctionRegistry {
     val builder = (expressions: Seq[Expression]) => {
       if (varargCtor.isDefined) {
         // If there is an apply method that accepts Seq[Expression], use that one.
-        Try(varargCtor.get.newInstance(expressions).asInstanceOf[Expression]) match {
-          case Success(e) => e
-          case Failure(e) =>
-            // the exception is an invocation exception. To get a meaningful message, we need the
-            // cause.
-            throw new AnalysisException(e.getCause.getMessage)
+        try {
+          varargCtor.get.newInstance(expressions).asInstanceOf[Expression]
+        } catch {
+          // the exception is an invocation exception. To get a meaningful message, we need the
+          // cause.
+          case e: Exception => throw new AnalysisException(e.getCause.getMessage)
         }
       } else {
         // Otherwise, find a constructor method that matches the number of arguments, and use that.
@@ -605,12 +617,14 @@ object FunctionRegistry {
           }
           throw new AnalysisException(invalidArgumentsMsg)
         }
-        Try(f.newInstance(expressions : _*).asInstanceOf[Expression]) match {
-          case Success(e) => e
-          case Failure(e) =>
-            // the exception is an invocation exception. To get a meaningful message, we need the
-            // cause.
-            throw new AnalysisException(e.getCause.getMessage)
+        try {
+          val exp = f.newInstance(expressions : _*).asInstanceOf[Expression]
+          if (setAlias) exp.setTagValue(FUNC_ALIAS, name)
+          exp
+        } catch {
+          // the exception is an invocation exception. To get a meaningful message, we need the
+          // cause.
+          case e: Exception => throw new AnalysisException(e.getCause.getMessage)
         }
       }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
index 7ac8ae61ed537..9b5b059908c00 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-import org.apache.spark.sql.catalog.v2.Identifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.connector.catalog.Identifier
 
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
new file mode 100644
index 0000000000000..96558410d4004
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange}
+
+/**
+ * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
+ * to the corresponding v2 commands if the resolved catalog is not the session catalog.
+ */
+class ResolveCatalogs(val catalogManager: CatalogManager)
+  extends Rule[LogicalPlan] with LookupCatalog {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Util._
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    case AlterTableAddColumnsStatement(
+         nameParts @ NonSessionCatalogAndTable(catalog, tbl), cols) =>
+      val changes = cols.map { col =>
+        TableChange.addColumn(
+          col.name.toArray,
+          col.dataType,
+          col.nullable,
+          col.comment.orNull,
+          col.position.orNull)
+      }
+      createAlterTable(nameParts, catalog, tbl, changes)
+
+    case AlterTableReplaceColumnsStatement(
+        nameParts @ NonSessionCatalogAndTable(catalog, tbl), cols) =>
+      val changes: Seq[TableChange] = loadTable(catalog, tbl.asIdentifier) match {
+        case Some(table) =>
+          // REPLACE COLUMNS deletes all the existing columns and adds new columns specified.
+          val deleteChanges = table.schema.fieldNames.map { name =>
+            TableChange.deleteColumn(Array(name))
+          }
+          val addChanges = cols.map { col =>
+            TableChange.addColumn(
+              col.name.toArray,
+              col.dataType,
+              col.nullable,
+              col.comment.orNull,
+              col.position.orNull)
+          }
+          deleteChanges ++ addChanges
+        case None => Seq()
+      }
+      createAlterTable(nameParts, catalog, tbl, changes)
+
+    case a @ AlterTableAlterColumnStatement(
+         nameParts @ NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _) =>
+      val colName = a.column.toArray
+      val typeChange = a.dataType.map { newDataType =>
+        TableChange.updateColumnType(colName, newDataType)
+      }
+      val nullabilityChange = a.nullable.map { nullable =>
+        TableChange.updateColumnNullability(colName, nullable)
+      }
+      val commentChange = a.comment.map { newComment =>
+        TableChange.updateColumnComment(colName, newComment)
+      }
+      val positionChange = a.position.map { newPosition =>
+        TableChange.updateColumnPosition(colName, newPosition)
+      }
+      createAlterTable(
+        nameParts,
+        catalog,
+        tbl,
+        typeChange.toSeq ++ nullabilityChange ++ commentChange ++ positionChange)
+
+    case AlterTableRenameColumnStatement(
+         nameParts @ NonSessionCatalogAndTable(catalog, tbl), col, newName) =>
+      val changes = Seq(TableChange.renameColumn(col.toArray, newName))
+      createAlterTable(nameParts, catalog, tbl, changes)
+
+    case AlterTableDropColumnsStatement(
+         nameParts @ NonSessionCatalogAndTable(catalog, tbl), cols) =>
+      val changes = cols.map(col => TableChange.deleteColumn(col.toArray))
+      createAlterTable(nameParts, catalog, tbl, changes)
+
+    case AlterTableSetPropertiesStatement(
+         nameParts @ NonSessionCatalogAndTable(catalog, tbl), props) =>
+      val changes = props.map { case (key, value) =>
+        TableChange.setProperty(key, value)
+      }.toSeq
+      createAlterTable(nameParts, catalog, tbl, changes)
+
+    // TODO: v2 `UNSET TBLPROPERTIES` should respect the ifExists flag.
+    case AlterTableUnsetPropertiesStatement(
+         nameParts @ NonSessionCatalogAndTable(catalog, tbl), keys, _) =>
+      val changes = keys.map(key => TableChange.removeProperty(key))
+      createAlterTable(nameParts, catalog, tbl, changes)
+
+    case AlterTableSetLocationStatement(
+         nameParts @ NonSessionCatalogAndTable(catalog, tbl), partitionSpec, newLoc) =>
+      if (partitionSpec.nonEmpty) {
+        throw new AnalysisException(
+          "ALTER TABLE SET LOCATION does not support partition for v2 tables.")
+      }
+      val changes = Seq(TableChange.setProperty(TableCatalog.PROP_LOCATION, newLoc))
+      createAlterTable(nameParts, catalog, tbl, changes)
+
+    case AlterViewSetPropertiesStatement(
+         NonSessionCatalogAndTable(catalog, tbl), props) =>
+      throw new AnalysisException(
+        s"Can not specify catalog `${catalog.name}` for view ${tbl.quoted} " +
+          s"because view support in catalog has not been implemented yet")
+
+    case AlterViewUnsetPropertiesStatement(
+         NonSessionCatalogAndTable(catalog, tbl), keys, ifExists) =>
+      throw new AnalysisException(
+        s"Can not specify catalog `${catalog.name}` for view ${tbl.quoted} " +
+          s"because view support in catalog has not been implemented yet")
+
+    case RenameTableStatement(NonSessionCatalogAndTable(catalog, oldName), newNameParts, isView) =>
+      if (isView) {
+        throw new AnalysisException("Renaming view is not supported in v2 catalogs.")
+      }
+      RenameTable(catalog.asTableCatalog, oldName.asIdentifier, newNameParts.asIdentifier)
+
+    case DescribeColumnStatement(
+         NonSessionCatalogAndTable(catalog, tbl), colNameParts, isExtended) =>
+      throw new AnalysisException("Describing columns is not supported for v2 tables.")
+
+    case c @ CreateTableStatement(
+         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+      CreateV2Table(
+        catalog.asTableCatalog,
+        tbl.asIdentifier,
+        c.tableSchema,
+        // convert the bucket spec and add it as a transform
+        c.partitioning ++ c.bucketSpec.map(_.asTransform),
+        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        ignoreIfExists = c.ifNotExists)
+
+    case c @ CreateTableAsSelectStatement(
+         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+      CreateTableAsSelect(
+        catalog.asTableCatalog,
+        tbl.asIdentifier,
+        // convert the bucket spec and add it as a transform
+        c.partitioning ++ c.bucketSpec.map(_.asTransform),
+        c.asSelect,
+        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        writeOptions = c.options,
+        ignoreIfExists = c.ifNotExists)
+
+    case RefreshTableStatement(NonSessionCatalogAndTable(catalog, tbl)) =>
+      RefreshTable(catalog.asTableCatalog, tbl.asIdentifier)
+
+    case c @ ReplaceTableStatement(
+         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+      ReplaceTable(
+        catalog.asTableCatalog,
+        tbl.asIdentifier,
+        c.tableSchema,
+        // convert the bucket spec and add it as a transform
+        c.partitioning ++ c.bucketSpec.map(_.asTransform),
+        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        orCreate = c.orCreate)
+
+    case c @ ReplaceTableAsSelectStatement(
+         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+      ReplaceTableAsSelect(
+        catalog.asTableCatalog,
+        tbl.asIdentifier,
+        // convert the bucket spec and add it as a transform
+        c.partitioning ++ c.bucketSpec.map(_.asTransform),
+        c.asSelect,
+        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        writeOptions = c.options,
+        orCreate = c.orCreate)
+
+    case DropTableStatement(NonSessionCatalogAndTable(catalog, tbl), ifExists, _) =>
+      DropTable(catalog.asTableCatalog, tbl.asIdentifier, ifExists)
+
+    case DropViewStatement(NonSessionCatalogAndTable(catalog, viewName), _) =>
+      throw new AnalysisException(
+        s"Can not specify catalog `${catalog.name}` for view ${viewName.quoted} " +
+          s"because view support in catalog has not been implemented yet")
+
+    case c @ CreateNamespaceStatement(CatalogAndNamespace(catalog, ns), _, _)
+        if !isSessionCatalog(catalog) =>
+      CreateNamespace(catalog.asNamespaceCatalog, ns, c.ifNotExists, c.properties)
+
+    case UseStatement(isNamespaceSet, nameParts) =>
+      if (isNamespaceSet) {
+        SetCatalogAndNamespace(catalogManager, None, Some(nameParts))
+      } else {
+        val CatalogAndNamespace(catalog, ns) = nameParts
+        val namespace = if (ns.nonEmpty) Some(ns) else None
+        SetCatalogAndNamespace(catalogManager, Some(catalog.name()), namespace)
+      }
+
+    case ShowCurrentNamespaceStatement() =>
+      ShowCurrentNamespace(catalogManager)
+  }
+
+  object NonSessionCatalogAndTable {
+    def unapply(nameParts: Seq[String]): Option[(CatalogPlugin, Seq[String])] = nameParts match {
+      case NonSessionCatalogAndIdentifier(catalog, ident) =>
+        Some(catalog -> ident.asMultipartIdentifier)
+      case _ => None
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index 3a9c4b7392e38..5b77d67bd1340 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -22,7 +22,7 @@ import java.util.Locale
 import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.IntegerLiteral
+import org.apache.spark.sql.catalyst.expressions.{Ascending, Expression, IntegerLiteral, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
@@ -137,31 +137,101 @@ object ResolveHints {
   }
 
   /**
-   * COALESCE Hint accepts name "COALESCE" and "REPARTITION".
-   * Its parameter includes a partition number.
+   * COALESCE Hint accepts names "COALESCE", "REPARTITION", and "REPARTITION_BY_RANGE".
    */
-  object ResolveCoalesceHints extends Rule[LogicalPlan] {
-    private val COALESCE_HINT_NAMES = Set("COALESCE", "REPARTITION")
+  class ResolveCoalesceHints(conf: SQLConf) extends Rule[LogicalPlan] {
+
+    /**
+     * This function handles hints for "COALESCE" and "REPARTITION".
+     * The "COALESCE" hint only has a partition number as a parameter. The "REPARTITION" hint
+     * has a partition number, columns, or both of them as parameters.
+     */
+    private def createRepartition(
+        shuffle: Boolean, hint: UnresolvedHint): LogicalPlan = {
+      val hintName = hint.name.toUpperCase(Locale.ROOT)
+
+      def createRepartitionByExpression(
+          numPartitions: Int, partitionExprs: Seq[Any]): RepartitionByExpression = {
+        val sortOrders = partitionExprs.filter(_.isInstanceOf[SortOrder])
+        if (sortOrders.nonEmpty) throw new IllegalArgumentException(
+          s"""Invalid partitionExprs specified: $sortOrders
+             |For range partitioning use REPARTITION_BY_RANGE instead.
+           """.stripMargin)
+        val invalidParams = partitionExprs.filter(!_.isInstanceOf[UnresolvedAttribute])
+        if (invalidParams.nonEmpty) {
+          throw new AnalysisException(s"$hintName Hint parameter should include columns, but " +
+            s"${invalidParams.mkString(", ")} found")
+        }
+        RepartitionByExpression(
+          partitionExprs.map(_.asInstanceOf[Expression]), hint.child, numPartitions)
+      }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
-      case h: UnresolvedHint if COALESCE_HINT_NAMES.contains(h.name.toUpperCase(Locale.ROOT)) =>
-        val hintName = h.name.toUpperCase(Locale.ROOT)
-        val shuffle = hintName match {
-          case "REPARTITION" => true
-          case "COALESCE" => false
+      hint.parameters match {
+        case Seq(IntegerLiteral(numPartitions)) =>
+          Repartition(numPartitions, shuffle, hint.child)
+        case Seq(numPartitions: Int) =>
+          Repartition(numPartitions, shuffle, hint.child)
+        // The "COALESCE" hint (shuffle = false) must have a partition number only
+        case _ if !shuffle =>
+          throw new AnalysisException(s"$hintName Hint expects a partition number as a parameter")
+
+        case param @ Seq(IntegerLiteral(numPartitions), _*) if shuffle =>
+          createRepartitionByExpression(numPartitions, param.tail)
+        case param @ Seq(numPartitions: Int, _*) if shuffle =>
+          createRepartitionByExpression(numPartitions, param.tail)
+        case param @ Seq(_*) if shuffle =>
+          createRepartitionByExpression(conf.numShufflePartitions, param)
+      }
+    }
+
+    /**
+     * This function handles hints for "REPARTITION_BY_RANGE".
+     * The "REPARTITION_BY_RANGE" hint must have column names and a partition number is optional.
+     */
+    private def createRepartitionByRange(hint: UnresolvedHint): RepartitionByExpression = {
+      val hintName = hint.name.toUpperCase(Locale.ROOT)
+
+      def createRepartitionByExpression(
+          numPartitions: Int, partitionExprs: Seq[Any]): RepartitionByExpression = {
+        val invalidParams = partitionExprs.filter(!_.isInstanceOf[UnresolvedAttribute])
+        if (invalidParams.nonEmpty) {
+          throw new AnalysisException(s"$hintName Hint parameter should include columns, but " +
+            s"${invalidParams.mkString(", ")} found")
         }
-        val numPartitions = h.parameters match {
-          case Seq(IntegerLiteral(numPartitions)) =>
-            numPartitions
-          case Seq(numPartitions: Int) =>
-            numPartitions
-          case _ =>
-            throw new AnalysisException(s"$hintName Hint expects a partition number as parameter")
+        val sortOrder = partitionExprs.map {
+          case expr: SortOrder => expr
+          case expr: Expression => SortOrder(expr, Ascending)
+        }
+        RepartitionByExpression(sortOrder, hint.child, numPartitions)
+      }
+
+      hint.parameters match {
+        case param @ Seq(IntegerLiteral(numPartitions), _*) =>
+          createRepartitionByExpression(numPartitions, param.tail)
+        case param @ Seq(numPartitions: Int, _*) =>
+          createRepartitionByExpression(numPartitions, param.tail)
+        case param @ Seq(_*) =>
+          createRepartitionByExpression(conf.numShufflePartitions, param)
+      }
+    }
+
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+      case hint @ UnresolvedHint(hintName, _, _) => hintName.toUpperCase(Locale.ROOT) match {
+          case "REPARTITION" =>
+            createRepartition(shuffle = true, hint)
+          case "COALESCE" =>
+            createRepartition(shuffle = false, hint)
+          case "REPARTITION_BY_RANGE" =>
+            createRepartitionByRange(hint)
+          case _ => hint
         }
-        Repartition(numPartitions, shuffle, h.child)
     }
   }
 
+  object ResolveCoalesceHints {
+    val COALESCE_HINT_NAMES: Set[String] = Set("COALESCE", "REPARTITION", "REPARTITION_BY_RANGE")
+  }
+
   /**
    * Removes all the hints, used to remove invalid hints provided by the user.
    * This must be executed after all the other hint rules are executed.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
index c1d72f9b58a4b..3faf3403f9a52 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -24,10 +24,10 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
-
 /**
  * Helper object for stream joins. See [[StreamingSymmetricHashJoinExec]] in SQL for more details.
  */
@@ -256,7 +256,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
           val castedLit = lit.dataType match {
             case CalendarIntervalType =>
               val calendarInterval = lit.value.asInstanceOf[CalendarInterval]
-              if (calendarInterval.months > 0) {
+              if (calendarInterval.months != 0) {
                 invalid = true
                 logWarning(
                   s"Failed to extract state value watermark from condition $exprToCollectFrom " +
@@ -264,7 +264,8 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
                     s"watermark calculation. Use interval in terms of day instead.")
                 Literal(0.0)
               } else {
-                Literal(calendarInterval.microseconds.toDouble)
+                Literal(calendarInterval.days * MICROS_PER_DAY.toDouble +
+                  calendarInterval.microseconds.toDouble)
               }
             case DoubleType =>
               Multiply(lit, Literal(1000000.0))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
index 6769773cfec45..4f33ca99c02db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, Attribute, Cast, NamedExpression}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
@@ -32,8 +32,7 @@ object TableOutputResolver {
       expected: Seq[Attribute],
       query: LogicalPlan,
       byName: Boolean,
-      conf: SQLConf,
-      storeAssignmentPolicy: StoreAssignmentPolicy.Value): LogicalPlan = {
+      conf: SQLConf): LogicalPlan = {
 
     if (expected.size < query.output.size) {
       throw new AnalysisException(
@@ -47,8 +46,7 @@ object TableOutputResolver {
       expected.flatMap { tableAttr =>
         query.resolve(Seq(tableAttr.name), conf.resolver) match {
           case Some(queryExpr) =>
-            checkField(
-              tableAttr, queryExpr, byName, conf, storeAssignmentPolicy, err => errors += err)
+            checkField(tableAttr, queryExpr, byName, conf, err => errors += err)
           case None =>
             errors += s"Cannot find data for output column '${tableAttr.name}'"
             None
@@ -66,8 +64,7 @@ object TableOutputResolver {
 
       query.output.zip(expected).flatMap {
         case (queryExpr, tableAttr) =>
-          checkField(
-            tableAttr, queryExpr, byName, conf, storeAssignmentPolicy, err => errors += err)
+          checkField(tableAttr, queryExpr, byName, conf, err => errors += err)
       }
     }
 
@@ -88,9 +85,9 @@ object TableOutputResolver {
       queryExpr: NamedExpression,
       byName: Boolean,
       conf: SQLConf,
-      storeAssignmentPolicy: StoreAssignmentPolicy.Value,
       addError: String => Unit): Option[NamedExpression] = {
 
+    val storeAssignmentPolicy = conf.storeAssignmentPolicy
     lazy val outputField = if (tableAttr.dataType.sameType(queryExpr.dataType) &&
       tableAttr.name == queryExpr.name &&
       tableAttr.metadata == queryExpr.metadata) {
@@ -99,9 +96,16 @@ object TableOutputResolver {
       // Renaming is needed for handling the following cases like
       // 1) Column names/types do not match, e.g., INSERT INTO TABLE tab1 SELECT 1, 2
       // 2) Target tables have column metadata
-      Some(Alias(
-        Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)),
-        tableAttr.name)(explicitMetadata = Option(tableAttr.metadata)))
+      storeAssignmentPolicy match {
+        case StoreAssignmentPolicy.ANSI =>
+          Some(Alias(
+            AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)),
+            tableAttr.name)(explicitMetadata = Option(tableAttr.metadata)))
+        case _ =>
+          Some(Alias(
+            Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)),
+            tableAttr.name)(explicitMetadata = Option(tableAttr.metadata)))
+      }
     }
 
     storeAssignmentPolicy match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 3125f8cb732db..f416e8efe17ab 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -59,7 +59,7 @@ object TypeCoercion {
       CaseWhenCoercion ::
       IfCoercion ::
       StackCoercion ::
-      Division(conf) ::
+      Division ::
       ImplicitTypeCasts ::
       DateTimeOperations ::
       WindowFrameCoercion ::
@@ -132,12 +132,9 @@ object TypeCoercion {
     case (NullType, StringType) => Some(StringType)
 
     // Cast to TimestampType when we compare DateType with TimestampType
-    // if conf.compareDateTimestampInTimestamp is true
     // i.e. TimeStamp('2017-03-01 00:00:00') eq Date('2017-03-01') = true
-    case (TimestampType, DateType)
-      => if (conf.compareDateTimestampInTimestamp) Some(TimestampType) else Some(StringType)
-    case (DateType, TimestampType)
-      => if (conf.compareDateTimestampInTimestamp) Some(TimestampType) else Some(StringType)
+    case (TimestampType, DateType) => Some(TimestampType)
+    case (DateType, TimestampType) => Some(TimestampType)
 
     // There is no proper decimal type we can pick,
     // using double type is the best we can do.
@@ -246,7 +243,7 @@ object TypeCoercion {
    * string. If the wider decimal type exceeds system limitation, this rule will truncate
    * the decimal type before return it.
    */
-  private[analysis] def findWiderTypeWithoutStringPromotionForTwo(
+  private[catalyst] def findWiderTypeWithoutStringPromotionForTwo(
       t1: DataType,
       t2: DataType): Option[DataType] = {
     findTightestCommonType(t1, t2)
@@ -473,8 +470,7 @@ object TypeCoercion {
         val rhs = sub.output
 
         val commonTypes = lhs.zip(rhs).flatMap { case (l, r) =>
-          findCommonTypeForBinaryComparison(l.dataType, r.dataType, conf)
-            .orElse(findTightestCommonType(l.dataType, r.dataType))
+          findWiderTypeForTwo(l.dataType, r.dataType)
         }
 
         // The number of columns/expressions must match between LHS and RHS of an
@@ -666,7 +662,7 @@ object TypeCoercion {
    * Hive only performs integral division with the DIV operator. The arguments to / are always
    * converted to fractional types.
    */
-  case class Division(conf: SQLConf)  extends TypeCoercionRule {
+  object Division extends TypeCoercionRule {
     override protected def coerceTypes(
         plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
       // Skip nodes who has not been resolved yet,
@@ -677,12 +673,7 @@ object TypeCoercion {
       case d: Divide if d.dataType == DoubleType => d
       case d: Divide if d.dataType.isInstanceOf[DecimalType] => d
       case Divide(left, right) if isNumericOrNull(left) && isNumericOrNull(right) =>
-        (left.dataType, right.dataType) match {
-          case (_: IntegralType, _: IntegralType) if conf.preferIntegralDivision =>
-            IntegralDivide(left, right)
-          case _ =>
-            Divide(Cast(left, DoubleType), Cast(right, DoubleType))
-        }
+        Divide(Cast(left, DoubleType), Cast(right, DoubleType))
     }
 
     private def isNumericOrNull(ex: Expression): Boolean = {
@@ -824,31 +815,24 @@ object TypeCoercion {
     }
   }
 
-  /**
-   * 1. Turns Add/Subtract of DateType/TimestampType/StringType and CalendarIntervalType
-   *    to TimeAdd/TimeSub.
-   * 2. Turns Add/Subtract of DateType/IntegerType and IntegerType/DateType
-   *    to DateAdd/DateSub/DateDiff.
-   */
   object DateTimeOperations extends Rule[LogicalPlan] {
-
-    private val acceptedTypes = Seq(DateType, TimestampType, StringType)
-
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
-
-      case Add(l @ CalendarIntervalType(), r) if acceptedTypes.contains(r.dataType) =>
-        Cast(TimeAdd(r, l), r.dataType)
-      case Add(l, r @ CalendarIntervalType()) if acceptedTypes.contains(l.dataType) =>
-        Cast(TimeAdd(l, r), l.dataType)
-      case Subtract(l, r @ CalendarIntervalType()) if acceptedTypes.contains(l.dataType) =>
-        Cast(TimeSub(l, r), l.dataType)
-
-      case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r)
-      case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l)
-      case Subtract(l @ DateType(), r @ IntegerType()) => DateSub(l, r)
-      case Subtract(l @ DateType(), r @ DateType()) => DateDiff(l, r)
+      case d @ DateAdd(TimestampType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateAdd(StringType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateSub(TimestampType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateSub(StringType(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+
+      case s @ SubtractTimestamps(DateType(), _) =>
+        s.copy(endTimestamp = Cast(s.endTimestamp, TimestampType))
+      case s @ SubtractTimestamps(_, DateType()) =>
+        s.copy(startTimestamp = Cast(s.startTimestamp, TimestampType))
+
+      case t @ TimeAdd(DateType(), _, _) => t.copy(start = Cast(t.start, TimestampType))
+      case t @ TimeAdd(StringType(), _, _) => t.copy(start = Cast(t.start, TimestampType))
+      case t @ TimeSub(DateType(), _, _) => t.copy(start = Cast(t.start, TimestampType))
+      case t @ TimeSub(StringType(), _, _) => t.copy(start = Cast(t.start, TimestampType))
     }
   }
 
@@ -861,7 +845,10 @@ object TypeCoercion {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
-      case b @ BinaryOperator(left, right) if left.dataType != right.dataType =>
+      // If DecimalType operands are involved, DecimalPrecision will handle it
+      case b @ BinaryOperator(left, right) if !left.dataType.isInstanceOf[DecimalType] &&
+          !right.dataType.isInstanceOf[DecimalType] &&
+          left.dataType != right.dataType =>
         findTightestCommonType(left.dataType, right.dataType).map { commonType =>
           if (b.inputType.acceptsType(commonType)) {
             // If the expression accepts the tightest common type, cast to that.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 288ff1a04737e..2f8cb26ffaa9c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
@@ -30,7 +31,7 @@ import org.apache.spark.sql.streaming.OutputMode
 /**
  * Analyzes the presence of unsupported operations in a logical plan.
  */
-object UnsupportedOperationChecker {
+object UnsupportedOperationChecker extends Logging {
 
   def checkForBatch(plan: LogicalPlan): Unit = {
     plan.foreachUp {
@@ -41,8 +42,50 @@ object UnsupportedOperationChecker {
     }
   }
 
-  def checkForStreaming(plan: LogicalPlan, outputMode: OutputMode): Unit = {
+  def checkStreamingQueryGlobalWatermarkLimit(
+      plan: LogicalPlan,
+      outputMode: OutputMode,
+      failWhenDetected: Boolean): Unit = {
+    def isStatefulOperationPossiblyEmitLateRows(p: LogicalPlan): Boolean = p match {
+      case s: Aggregate
+        if s.isStreaming && outputMode == InternalOutputModes.Append => true
+      case Join(left, right, joinType, _, _)
+        if left.isStreaming && right.isStreaming && joinType != Inner => true
+      case f: FlatMapGroupsWithState
+        if f.isStreaming && f.outputMode == OutputMode.Append() => true
+      case _ => false
+    }
+
+    def isStatefulOperation(p: LogicalPlan): Boolean = p match {
+      case s: Aggregate if s.isStreaming => true
+      case _ @ Join(left, right, _, _, _) if left.isStreaming && right.isStreaming => true
+      case f: FlatMapGroupsWithState if f.isStreaming => true
+      case d: Deduplicate if d.isStreaming => true
+      case _ => false
+    }
+
+    try {
+      plan.foreach { subPlan =>
+        if (isStatefulOperation(subPlan)) {
+          subPlan.find { p =>
+            (p ne subPlan) && isStatefulOperationPossiblyEmitLateRows(p)
+          }.foreach { _ =>
+            val errorMsg = "Detected pattern of possible 'correctness' issue " +
+              "due to global watermark. " +
+              "The query contains stateful operation which can emit rows older than " +
+              "the current watermark plus allowed late record delay, which are \"late rows\"" +
+              " in downstream stateful operations and these rows can be discarded. " +
+              "Please refer the programming guide doc for more details."
+            throwError(errorMsg)(plan)
+          }
+        }
+      }
+    } catch {
+      case e: AnalysisException if !failWhenDetected => logWarning(s"${e.message};\n$plan")
+    }
+  }
 
+  def checkForStreaming(plan: LogicalPlan, outputMode: OutputMode): Unit = {
     if (!plan.isStreaming) {
       throwError(
         "Queries without streaming sources cannot be executed with writeStream.start()")(plan)
@@ -339,6 +382,8 @@ object UnsupportedOperationChecker {
       // Check if there are unsupported expressions in streaming query plan.
       checkUnsupportedExpressions(subPlan)
     }
+
+    checkStreamingQueryGlobalWatermarkLimit(plan, outputMode, failWhenDetected = false)
   }
 
   def checkForContinuous(plan: LogicalPlan, outputMode: OutputMode): Unit = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index 1cd7f412bb678..11f94762d43e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -33,11 +33,14 @@ import org.apache.spark.sql.types.DataType
 case class ResolveHigherOrderFunctions(catalog: SessionCatalog) extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveExpressions {
-    case u @ UnresolvedFunction(fn, children, false)
+    case u @ UnresolvedFunction(fn, children, false, filter)
         if hasLambdaAndResolvedArguments(children) =>
       withPosition(u) {
         catalog.lookupFunction(fn, children) match {
-          case func: HigherOrderFunction => func
+          case func: HigherOrderFunction =>
+            filter.foreach(_.failAnalysis("FILTER predicate specified, " +
+              s"but ${func.prettyName} is not an aggregate function"))
+            func
           case other => other.failAnalysis(
             "A lambda function should only be used in a higher order function. However, " +
               s"its class is ${other.getClass.getCanonicalName}, which is not a " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 9e0e0d528a968..608f39c2d86fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.parser.ParserUtils
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, UnaryNode}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 import org.apache.spark.sql.types.{DataType, Metadata, StructType}
 
 /**
@@ -42,7 +43,7 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str
  */
 case class UnresolvedRelation(
     multipartIdentifier: Seq[String]) extends LeafNode with NamedRelation {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   /** Returns a `.` separated name for this relation. */
   def tableName: String = multipartIdentifier.quoted
@@ -59,6 +60,28 @@ object UnresolvedRelation {
     UnresolvedRelation(tableIdentifier.database.toSeq :+ tableIdentifier.table)
 }
 
+/**
+ * A variant of [[UnresolvedRelation]] which can only be resolved to a v2 relation
+ * (`DataSourceV2Relation`), not v1 relation or temp view.
+ *
+ * @param originalNameParts the original table identifier name parts before catalog is resolved.
+ * @param catalog The catalog which the table should be looked up from.
+ * @param tableName The name of the table to look up.
+ */
+case class UnresolvedV2Relation(
+    originalNameParts: Seq[String],
+    catalog: TableCatalog,
+    tableName: Identifier)
+  extends LeafNode with NamedRelation {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  override def name: String = originalNameParts.quoted
+
+  override def output: Seq[Attribute] = Nil
+
+  override lazy val resolved = false
+}
+
 /**
  * An inline table that has not been resolved yet. Once resolved, it is turned by the analyzer into
  * a [[org.apache.spark.sql.catalyst.plans.logical.LocalRelation]].
@@ -220,10 +243,13 @@ case class UnresolvedGenerator(name: FunctionIdentifier, children: Seq[Expressio
 
 case class UnresolvedFunction(
     name: FunctionIdentifier,
-    children: Seq[Expression],
-    isDistinct: Boolean)
+    arguments: Seq[Expression],
+    isDistinct: Boolean,
+    filter: Option[Expression] = None)
   extends Expression with Unevaluable {
 
+  override def children: Seq[Expression] = arguments ++ filter.toSeq
+
   override def dataType: DataType = throw new UnresolvedException(this, "dataType")
   override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
@@ -234,8 +260,8 @@ case class UnresolvedFunction(
 }
 
 object UnresolvedFunction {
-  def apply(name: String, children: Seq[Expression], isDistinct: Boolean): UnresolvedFunction = {
-    UnresolvedFunction(FunctionIdentifier(name, None), children, isDistinct)
+  def apply(name: String, arguments: Seq[Expression], isDistinct: Boolean): UnresolvedFunction = {
+    UnresolvedFunction(FunctionIdentifier(name, None), arguments, isDistinct)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
new file mode 100644
index 0000000000000..f3d40c6d36cc3
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, SupportsNamespaces, Table, TableCatalog}
+
+/**
+ * Holds the name of a namespace that has yet to be looked up in a catalog. It will be resolved to
+ * [[ResolvedNamespace]] during analysis.
+ */
+case class UnresolvedNamespace(multipartIdentifier: Seq[String]) extends LeafNode {
+  override lazy val resolved: Boolean = false
+
+  override def output: Seq[Attribute] = Nil
+}
+
+/**
+ * Holds the name of a table that has yet to be looked up in a catalog. It will be resolved to
+ * [[ResolvedTable]] during analysis.
+ */
+case class UnresolvedTable(multipartIdentifier: Seq[String]) extends LeafNode {
+  override lazy val resolved: Boolean = false
+
+  override def output: Seq[Attribute] = Nil
+}
+
+/**
+ * Holds the name of a table or view that has yet to be looked up in a catalog. It will
+ * be resolved to [[ResolvedTable]] or [[ResolvedView]] during analysis.
+ */
+case class UnresolvedTableOrView(multipartIdentifier: Seq[String]) extends LeafNode {
+  override lazy val resolved: Boolean = false
+  override def output: Seq[Attribute] = Nil
+}
+
+/**
+ * A plan containing resolved namespace.
+ */
+case class ResolvedNamespace(catalog: CatalogPlugin, namespace: Seq[String])
+  extends LeafNode {
+  override def output: Seq[Attribute] = Nil
+}
+
+/**
+ * A plan containing resolved table.
+ */
+case class ResolvedTable(catalog: TableCatalog, identifier: Identifier, table: Table)
+  extends LeafNode {
+  override def output: Seq[Attribute] = Nil
+}
+
+/**
+ * A plan containing resolved (temp) views.
+ */
+// TODO: create a generic representation for temp view, v1 view and v2 view, after we add view
+//       support to v2 catalog. For now we only need the identifier to fallback to v1 command.
+case class ResolvedView(identifier: Identifier) extends LeafNode {
+  override def output: Seq[Attribute] = Nil
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
index 76bf3740ed0fc..65601640fa044 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
 
 /**
- * This file defines analysis rules related to views.
+ * This file defines view types and analysis rules related to views.
  */
 
 /**
@@ -90,3 +90,36 @@ object EliminateView extends Rule[LogicalPlan] with CastSupport {
       child
   }
 }
+
+/**
+ * ViewType is used to specify the expected view type when we want to create or replace a view in
+ * [[CreateViewStatement]].
+ */
+sealed trait ViewType {
+  override def toString: String = getClass.getSimpleName.stripSuffix("$")
+}
+
+/**
+ * LocalTempView means session-scoped local temporary views. Its lifetime is the lifetime of the
+ * session that created it, i.e. it will be automatically dropped when the session terminates. It's
+ * not tied to any databases, i.e. we can't use `db1.view1` to reference a local temporary view.
+ */
+object LocalTempView extends ViewType
+
+/**
+ * GlobalTempView means cross-session global temporary views. Its lifetime is the lifetime of the
+ * Spark application, i.e. it will be automatically dropped when the application terminates. It's
+ * tied to a system preserved database `global_temp`, and we must use the qualified name to refer a
+ * global temp view, e.g. SELECT * FROM global_temp.view1.
+ */
+object GlobalTempView extends ViewType
+
+/**
+ * PersistedView means cross-session persisted views. Persisted views stay until they are
+ * explicitly dropped by user command. It's always tied to a database, default to the current
+ * database if not specified.
+ *
+ * Note that, Existing persisted view with the same name are not visible to the current session
+ * while the local temporary view exists, unless the view name is qualified by database.
+ */
+object PersistedView extends ViewType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index 4cff162c116a4..ae3b75dc3334b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.catalog
 
 import java.net.URI
-import java.util.Locale
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.util.Shell
@@ -26,7 +25,7 @@ import org.apache.hadoop.util.Shell
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BoundReference, Expression, InterpretedPredicate}
+import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BoundReference, Expression, Predicate}
 
 object ExternalCatalogUtils {
   // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since catalyst doesn't
@@ -148,7 +147,7 @@ object ExternalCatalogUtils {
       }
 
       val boundPredicate =
-        InterpretedPredicate.create(predicates.reduce(And).transform {
+        Predicate.createInterpreted(predicates.reduce(And).transform {
           case att: AttributeReference =>
             val index = partitionSchema.indexWhere(_.name == att.name)
             BoundReference(index, partitionSchema(index).dataType, nullable = true)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 2a6124a4079a1..12f9a61fc2b65 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -327,8 +327,7 @@ class SessionCatalog(
 
   def validateTableLocation(table: CatalogTable): Unit = {
     // SPARK-19724: the default location of a managed table should be non-existent or empty.
-    if (table.tableType == CatalogTableType.MANAGED &&
-      !conf.allowCreatingManagedTableUsingNonemptyLocation) {
+    if (table.tableType == CatalogTableType.MANAGED) {
       val tableLocation =
         new Path(table.storage.locationUri.getOrElse(defaultTablePath(table.identifier)))
       val fs = tableLocation.getFileSystem(hadoopConf)
@@ -353,9 +352,20 @@ class SessionCatalog(
     val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
     val table = formatTableName(tableDefinition.identifier.table)
     val tableIdentifier = TableIdentifier(table, Some(db))
-    val newTableDefinition = tableDefinition.copy(identifier = tableIdentifier)
     requireDbExists(db)
     requireTableExists(tableIdentifier)
+    val newTableDefinition = if (tableDefinition.storage.locationUri.isDefined
+      && !tableDefinition.storage.locationUri.get.isAbsolute) {
+      // make the location of the table qualified.
+      val qualifiedTableLocation =
+        makeQualifiedPath(tableDefinition.storage.locationUri.get)
+      tableDefinition.copy(
+        storage = tableDefinition.storage.copy(locationUri = Some(qualifiedTableLocation)),
+        identifier = tableIdentifier)
+    } else {
+      tableDefinition.copy(identifier = tableIdentifier)
+    }
+
     externalCatalog.alterTable(newTableDefinition)
   }
 
@@ -565,6 +575,10 @@ class SessionCatalog(
     tempViews.get(formatTableName(name))
   }
 
+  def getTempViewNames(): Seq[String] = synchronized {
+    tempViews.keySet.toSeq
+  }
+
   /**
    * Return a global temporary view exactly as it was stored.
    */
@@ -733,26 +747,60 @@ class SessionCatalog(
         }.getOrElse(throw new NoSuchTableException(db, table))
       } else if (name.database.isDefined || !tempViews.contains(table)) {
         val metadata = externalCatalog.getTable(db, table)
-        if (metadata.tableType == CatalogTableType.VIEW) {
-          val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text."))
-          logDebug(s"'$viewText' will be used for the view($table).")
-          // The relation is a view, so we wrap the relation by:
-          // 1. Add a [[View]] operator over the relation to keep track of the view desc;
-          // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view.
-          val child = View(
-            desc = metadata,
-            output = metadata.schema.toAttributes,
-            child = parser.parsePlan(viewText))
-          SubqueryAlias(table, db, child)
-        } else {
-          SubqueryAlias(table, db, UnresolvedCatalogRelation(metadata))
-        }
+        getRelation(metadata)
       } else {
         SubqueryAlias(table, tempViews(table))
       }
     }
   }
 
+  def getRelation(metadata: CatalogTable): LogicalPlan = {
+    val name = metadata.identifier
+    val db = formatDatabaseName(name.database.getOrElse(currentDb))
+    val table = formatTableName(name.table)
+
+    if (metadata.tableType == CatalogTableType.VIEW) {
+      val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text."))
+      logDebug(s"'$viewText' will be used for the view($table).")
+      // The relation is a view, so we wrap the relation by:
+      // 1. Add a [[View]] operator over the relation to keep track of the view desc;
+      // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view.
+      val child = View(
+        desc = metadata,
+        output = metadata.schema.toAttributes,
+        child = parser.parsePlan(viewText))
+      SubqueryAlias(table, db, child)
+    } else {
+      SubqueryAlias(table, db, UnresolvedCatalogRelation(metadata))
+    }
+  }
+
+  def lookupTempView(table: String): Option[SubqueryAlias] = {
+    val formattedTable = formatTableName(table)
+    getTempView(formattedTable).map { view =>
+      SubqueryAlias(formattedTable, view)
+    }
+  }
+
+  def lookupGlobalTempView(db: String, table: String): Option[SubqueryAlias] = {
+    val formattedDB = formatDatabaseName(db)
+    if (formattedDB == globalTempViewManager.database) {
+      val formattedTable = formatTableName(table)
+      getGlobalTempView(formattedTable).map { view =>
+        SubqueryAlias(formattedTable, formattedDB, view)
+      }
+    } else {
+      None
+    }
+  }
+
+  // TODO: merge it with `isTemporaryTable`.
+  def isTempView(nameParts: Seq[String]): Boolean = {
+    if (nameParts.length > 2) return false
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    isTemporaryTable(nameParts.asTableIdentifier)
+  }
+
   /**
    * Return whether a table with the specified name is a temporary view.
    *
@@ -770,6 +818,20 @@ class SessionCatalog(
     }
   }
 
+  def isView(nameParts: Seq[String]): Boolean = {
+    nameParts.length <= 2 && {
+      import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+      val ident = nameParts.asTableIdentifier
+      try {
+        getTempViewOrPermanentTableMetadata(ident).tableType == CatalogTableType.VIEW
+      } catch {
+        case _: NoSuchTableException => false
+        case _: NoSuchDatabaseException => false
+        case _: NoSuchNamespaceException => false
+      }
+    }
+  }
+
   /**
    * List all tables in the specified database, including local temporary views.
    *
@@ -882,7 +944,8 @@ class SessionCatalog(
     requireTableExists(TableIdentifier(table, Option(db)))
     requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
     requireNonEmptyValueInPartitionSpec(parts.map(_.spec))
-    externalCatalog.createPartitions(db, table, parts, ignoreIfExists)
+    externalCatalog.createPartitions(
+      db, table, partitionWithQualifiedPath(tableName, parts), ignoreIfExists)
   }
 
   /**
@@ -942,7 +1005,7 @@ class SessionCatalog(
     requireTableExists(TableIdentifier(table, Option(db)))
     requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
     requireNonEmptyValueInPartitionSpec(parts.map(_.spec))
-    externalCatalog.alterPartitions(db, table, parts)
+    externalCatalog.alterPartitions(db, table, partitionWithQualifiedPath(tableName, parts))
   }
 
   /**
@@ -1064,6 +1127,23 @@ class SessionCatalog(
     }
   }
 
+  /**
+   * Make the partition path qualified.
+   * If the partition path is relative, e.g. 'paris', it will be qualified with
+   * parent path using table location, e.g. 'file:/warehouse/table/paris'
+   */
+  private def partitionWithQualifiedPath(
+      tableIdentifier: TableIdentifier,
+      parts: Seq[CatalogTablePartition]): Seq[CatalogTablePartition] = {
+    lazy val tbl = getTableMetadata(tableIdentifier)
+    parts.map { part =>
+      if (part.storage.locationUri.isDefined && !part.storage.locationUri.get.isAbsolute) {
+        val partPath = new Path(new Path(tbl.location), new Path(part.storage.locationUri.get))
+        val qualifiedPartPath = makeQualifiedPath(CatalogUtils.stringToURI(partPath.toString))
+        part.copy(storage = part.storage.copy(locationUri = Some(qualifiedPartPath)))
+      } else part
+    }
+  }
   // ----------------------------------------------------------------------------
   // Functions
   // ----------------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index ce8c23ac6dceb..81561c53f83c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -117,7 +118,7 @@ case class CatalogTablePartition(
     }
     map.put("Created Time", new Date(createTime).toString)
     val lastAccess = {
-      if (-1 == lastAccessTime) "UNKNOWN" else new Date(lastAccessTime).toString
+      if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
     }
     map.put("Last Access", lastAccess)
     stats.foreach(s => map.put("Partition Statistics", s.simpleString))
@@ -282,10 +283,26 @@ case class CatalogTable(
   def qualifiedName: String = identifier.unquotedString
 
   /**
-   * Return the default database name we use to resolve a view, should be None if the CatalogTable
-   * is not a View or created by older versions of Spark(before 2.2.0).
+   * Return the current catalog and namespace (concatenated as a Seq[String]) of when the view was
+   * created.
    */
-  def viewDefaultDatabase: Option[String] = properties.get(VIEW_DEFAULT_DATABASE)
+  def viewCatalogAndNamespace: Seq[String] = {
+    if (properties.contains(VIEW_CATALOG_AND_NAMESPACE)) {
+      val numParts = properties(VIEW_CATALOG_AND_NAMESPACE).toInt
+      (0 until numParts).map { index =>
+        properties.getOrElse(
+          s"$VIEW_CATALOG_AND_NAMESPACE_PART_PREFIX$index",
+          throw new AnalysisException("Corrupted table name context in catalog: " +
+            s"$numParts parts expected, but part $index is missing.")
+        )
+      }
+    } else if (properties.contains(VIEW_DEFAULT_DATABASE)) {
+      // Views created before Spark 3.0 can only access tables in the session catalog.
+      Seq(CatalogManager.SESSION_CATALOG_NAME, properties(VIEW_DEFAULT_DATABASE))
+    } else {
+      Nil
+    }
+  }
 
   /**
    * Return the output column names of the query that creates a view, the column names are used to
@@ -320,12 +337,15 @@ case class CatalogTable(
     val map = new mutable.LinkedHashMap[String, String]()
     val tableProperties = properties.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
     val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
+    val lastAccess = {
+      if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
+    }
 
     identifier.database.foreach(map.put("Database", _))
     map.put("Table", identifier.table)
     if (owner != null && owner.nonEmpty) map.put("Owner", owner)
     map.put("Created Time", new Date(createTime).toString)
-    map.put("Last Access", new Date(lastAccessTime).toString)
+    map.put("Last Access", lastAccess)
     map.put("Created By", "Spark " + createVersion)
     map.put("Type", tableType.name)
     provider.foreach(map.put("Provider", _))
@@ -334,7 +354,10 @@ case class CatalogTable(
     if (tableType == CatalogTableType.VIEW) {
       viewText.foreach(map.put("View Text", _))
       viewOriginalText.foreach(map.put("View Original Text", _))
-      viewDefaultDatabase.foreach(map.put("View Default Database", _))
+      if (viewCatalogAndNamespace.nonEmpty) {
+        import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+        map.put("View Catalog and Namespace", viewCatalogAndNamespace.quoted)
+      }
       if (viewQueryColumnNames.nonEmpty) {
         map.put("View Query Output Columns", viewQueryColumnNames.mkString("[", ", ", "]"))
       }
@@ -365,8 +388,29 @@ case class CatalogTable(
 }
 
 object CatalogTable {
-  val VIEW_DEFAULT_DATABASE = "view.default.database"
-  val VIEW_QUERY_OUTPUT_PREFIX = "view.query.out."
+  val VIEW_PREFIX = "view."
+  // Starting from Spark 3.0, we don't use this property any more. `VIEW_CATALOG_AND_NAMESPACE` is
+  // used instead.
+  val VIEW_DEFAULT_DATABASE = VIEW_PREFIX + "default.database"
+
+  val VIEW_CATALOG_AND_NAMESPACE = VIEW_PREFIX + "catalogAndNamespace.numParts"
+  val VIEW_CATALOG_AND_NAMESPACE_PART_PREFIX = VIEW_PREFIX + "catalogAndNamespace.part."
+  // Convert the current catalog and namespace to properties.
+  def catalogAndNamespaceToProps(
+      currentCatalog: String,
+      currentNamespace: Seq[String]): Map[String, String] = {
+    val props = new mutable.HashMap[String, String]
+    val parts = currentCatalog +: currentNamespace
+    if (parts.nonEmpty) {
+      props.put(VIEW_CATALOG_AND_NAMESPACE, parts.length.toString)
+      parts.zipWithIndex.foreach { case (name, index) =>
+        props.put(s"$VIEW_CATALOG_AND_NAMESPACE_PART_PREFIX$index", name)
+      }
+    }
+    props.toMap
+  }
+
+  val VIEW_QUERY_OUTPUT_PREFIX = VIEW_PREFIX + "query.out."
   val VIEW_QUERY_OUTPUT_NUM_COLUMNS = VIEW_QUERY_OUTPUT_PREFIX + "numCols"
   val VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX = VIEW_QUERY_OUTPUT_PREFIX + "col."
 }
@@ -385,16 +429,16 @@ case class CatalogStatistics(
    * Convert [[CatalogStatistics]] to [[Statistics]], and match column stats to attributes based
    * on column names.
    */
-  def toPlanStats(planOutput: Seq[Attribute], cboEnabled: Boolean): Statistics = {
-    if (cboEnabled && rowCount.isDefined) {
+  def toPlanStats(planOutput: Seq[Attribute], planStatsEnabled: Boolean): Statistics = {
+    if (planStatsEnabled && rowCount.isDefined) {
       val attrStats = AttributeMap(planOutput
         .flatMap(a => colStats.get(a.name).map(a -> _.toPlanStat(a.name, a.dataType))))
       // Estimate size as number of rows * row size.
       val size = EstimationUtils.getOutputSize(planOutput, rowCount.get, attrStats)
       Statistics(sizeInBytes = size, rowCount = rowCount, attributeStats = attrStats)
     } else {
-      // When CBO is disabled or the table doesn't have other statistics, we apply the size-only
-      // estimation strategy and only propagate sizeInBytes in statistics.
+      // When plan statistics are disabled or the table doesn't have other statistics,
+      // we apply the size-only estimation strategy and only propagate sizeInBytes in statistics.
       Statistics(sizeInBytes = sizeInBytes)
     }
   }
@@ -488,7 +532,7 @@ object CatalogColumnStat extends Logging {
     dataType match {
       case BooleanType => s.toBoolean
       case DateType if version == 1 => DateTimeUtils.fromJavaDate(java.sql.Date.valueOf(s))
-      case DateType => DateFormatter().parse(s)
+      case DateType => DateFormatter(ZoneOffset.UTC).parse(s)
       case TimestampType if version == 1 =>
         DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(s))
       case TimestampType => getTimestampFormatter().parse(s)
@@ -513,7 +557,7 @@ object CatalogColumnStat extends Logging {
    */
   def toExternalString(v: Any, colName: String, dataType: DataType): String = {
     val externalValue = dataType match {
-      case DateType => DateFormatter().format(v.asInstanceOf[Int])
+      case DateType => DateFormatter(ZoneOffset.UTC).format(v.asInstanceOf[Int])
       case TimestampType => getTimestampFormatter().format(v.asInstanceOf[Long])
       case BooleanType | _: IntegralType | FloatType | DoubleType => v
       case _: DecimalType => v.asInstanceOf[Decimal].toJavaBigDecimal
@@ -607,7 +651,9 @@ case class HiveTableRelation(
     tableMeta: CatalogTable,
     dataCols: Seq[AttributeReference],
     partitionCols: Seq[AttributeReference],
-    tableStats: Option[Statistics] = None) extends LeafNode with MultiInstanceRelation {
+    tableStats: Option[Statistics] = None,
+    @transient prunedPartitions: Option[Seq[CatalogTablePartition]] = None)
+  extends LeafNode with MultiInstanceRelation {
   assert(tableMeta.identifier.database.isDefined)
   assert(tableMeta.partitionSchema.sameType(partitionCols.toStructType))
   assert(tableMeta.dataSchema.sameType(dataCols.toStructType))
@@ -631,7 +677,7 @@ case class HiveTableRelation(
   )
 
   override def computeStats(): Statistics = {
-    tableMeta.stats.map(_.toPlanStats(output, conf.cboEnabled))
+    tableMeta.stats.map(_.toPlanStats(output, conf.cboEnabled || conf.planStatsEnabled))
       .orElse(tableStats)
       .getOrElse {
       throw new IllegalStateException("table stats must be specified.")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
index bbe27831f01df..3e83c1dcb4758 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.csv
 
+import org.apache.commons.lang3.StringUtils
+
 object CSVExprUtils {
   /**
    * Filter ignorable rows for CSV iterator (lines empty and starting with `comment`).
@@ -79,4 +81,48 @@ object CSVExprUtils {
         throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
     }
   }
+
+  /**
+   * Helper method that converts string representation of a character sequence to actual
+   * delimiter characters. The input is processed in "chunks", and each chunk is converted
+   * by calling [[CSVExprUtils.toChar()]].  A chunk is either:
+   * <ul>
+   *   <li>a backslash followed by another character</li>
+   *   <li>a non-backslash character by itself</li>
+   * </ul>
+   * , in that order of precedence. The result of the converting all chunks is returned as
+   * a [[String]].
+   *
+   * <br/><br/>Examples:
+   * <ul><li>`\t` will result in a single tab character as the separator (same as before)
+   * </li><li>`|||` will result in a sequence of three pipe characters as the separator
+   * </li><li>`\\` will result in a single backslash as the separator (same as before)
+   * </li><li>`\.` will result in an error (since a dot is not a character that needs escaped)
+   * </li><li>`\\.` will result in a backslash, then dot, as the separator character sequence
+   * </li><li>`.\t.` will result in a dot, then tab, then dot as the separator character sequence
+   * </li>
+   * </ul>
+   *
+   * @param str the string representing the sequence of separator characters
+   * @return a [[String]] representing the multi-character delimiter
+   * @throws IllegalArgumentException if any of the individual input chunks are illegal
+   */
+  def toDelimiterStr(str: String): String = {
+    var idx = 0
+
+    var delimiter = ""
+
+    while (idx < str.length()) {
+      // if the current character is a backslash, check it plus the next char
+      // in order to use existing escape logic
+      val readAhead = if (str(idx) == '\\') 2 else 1
+      // get the chunk of 1 or 2 input characters to convert to a single delimiter char
+      val chunk = StringUtils.substring(str, idx, idx + readAhead)
+      delimiter += toChar(chunk)
+      // advance the counter by the length of input chunk processed
+      idx += chunk.length()
+    }
+
+    delimiter.mkString("")
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVFilters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVFilters.scala
new file mode 100644
index 0000000000000..b50a76a496556
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVFilters.scala
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.csv
+
+import scala.util.Try
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * An instance of the class compiles filters to predicates and allows to
+ * apply the predicates to an internal row with partially initialized values
+ * converted from parsed CSV fields.
+ *
+ * @param filters The filters pushed down to CSV datasource.
+ * @param requiredSchema The schema with only fields requested by the upper layer.
+ */
+class CSVFilters(filters: Seq[sources.Filter], requiredSchema: StructType) {
+  /**
+   * Converted filters to predicates and grouped by maximum field index
+   * in the read schema. For example, if an filter refers to 2 attributes
+   * attrA with field index 5 and attrB with field index 10 in the read schema:
+   *   0 === $"attrA" or $"attrB" < 100
+   * the filter is compiled to a predicate, and placed to the `predicates`
+   * array at the position 10. In this way, if there is a row with initialized
+   * fields from the 0 to 10 index, the predicate can be applied to the row
+   * to check that the row should be skipped or not.
+   * Multiple predicates with the same maximum reference index are combined
+   * by the `And` expression.
+   */
+  private val predicates: Array[BasePredicate] = {
+    val len = requiredSchema.fields.length
+    val groupedPredicates = Array.fill[BasePredicate](len)(null)
+    if (SQLConf.get.csvFilterPushDown) {
+      val groupedFilters = Array.fill(len)(Seq.empty[sources.Filter])
+      for (filter <- filters) {
+        val refs = filter.references
+        val index = if (refs.isEmpty) {
+          // For example, AlwaysTrue and AlwaysFalse doesn't have any references
+          // Filters w/o refs always return the same result. Taking into account
+          // that predicates are combined via And, we can apply such filters only
+          // once at the position 0.
+          0
+        } else {
+          // readSchema must contain attributes of all filters.
+          // Accordingly, fieldIndex() returns a valid index always.
+          refs.map(requiredSchema.fieldIndex).max
+        }
+        groupedFilters(index) :+= filter
+      }
+      if (len > 0 && !groupedFilters(0).isEmpty) {
+        // We assume that filters w/o refs like AlwaysTrue and AlwaysFalse
+        // can be evaluated faster that others. We put them in front of others.
+        val (literals, others) = groupedFilters(0).partition(_.references.isEmpty)
+        groupedFilters(0) = literals ++ others
+      }
+      for (i <- 0 until len) {
+        if (!groupedFilters(i).isEmpty) {
+          val reducedExpr = groupedFilters(i)
+            .flatMap(CSVFilters.filterToExpression(_, toRef))
+            .reduce(And)
+          groupedPredicates(i) = Predicate.create(reducedExpr)
+        }
+      }
+    }
+    groupedPredicates
+  }
+
+  /**
+   * Applies all filters that refer to row fields at the positions from 0 to index.
+   * @param row The internal row to check.
+   * @param index Maximum field index. The function assumes that all fields
+   *              from 0 to index position are set.
+   * @return false iff row fields at the position from 0 to index pass filters
+   *         or there are no applicable filters
+   *         otherwise false if at least one of the filters returns false.
+   */
+  def skipRow(row: InternalRow, index: Int): Boolean = {
+    val predicate = predicates(index)
+    predicate != null && !predicate.eval(row)
+  }
+
+  // Finds a filter attribute in the read schema and converts it to a `BoundReference`
+  private def toRef(attr: String): Option[BoundReference] = {
+    requiredSchema.getFieldIndex(attr).map { index =>
+      val field = requiredSchema(index)
+      BoundReference(requiredSchema.fieldIndex(attr), field.dataType, field.nullable)
+    }
+  }
+}
+
+object CSVFilters {
+  private def checkFilterRefs(filter: sources.Filter, schema: StructType): Boolean = {
+    val fieldNames = schema.fields.map(_.name).toSet
+    filter.references.forall(fieldNames.contains(_))
+  }
+
+  /**
+   * Returns the filters currently supported by CSV datasource.
+   * @param filters The filters pushed down to CSV datasource.
+   * @param schema data schema of CSV files.
+   * @return a sub-set of `filters` that can be handled by CSV datasource.
+   */
+  def pushedFilters(filters: Array[sources.Filter], schema: StructType): Array[sources.Filter] = {
+    filters.filter(checkFilterRefs(_, schema))
+  }
+
+  private def zip[A, B](a: Option[A], b: Option[B]): Option[(A, B)] = {
+    a.zip(b).headOption
+  }
+
+  private def toLiteral(value: Any): Option[Literal] = {
+    Try(Literal(value)).toOption
+  }
+
+  /**
+   * Converts a filter to an expression and binds it to row positions.
+   *
+   * @param filter The filter to convert.
+   * @param toRef The function converts a filter attribute to a bound reference.
+   * @return some expression with resolved attributes or None if the conversion
+   *         of the given filter to an expression is impossible.
+   */
+  def filterToExpression(
+      filter: sources.Filter,
+      toRef: String => Option[BoundReference]): Option[Expression] = {
+    def zipAttributeAndValue(name: String, value: Any): Option[(BoundReference, Literal)] = {
+      zip(toRef(name), toLiteral(value))
+    }
+    def translate(filter: sources.Filter): Option[Expression] = filter match {
+      case sources.And(left, right) =>
+        zip(translate(left), translate(right)).map(And.tupled)
+      case sources.Or(left, right) =>
+        zip(translate(left), translate(right)).map(Or.tupled)
+      case sources.Not(child) =>
+        translate(child).map(Not)
+      case sources.EqualTo(attribute, value) =>
+        zipAttributeAndValue(attribute, value).map(EqualTo.tupled)
+      case sources.EqualNullSafe(attribute, value) =>
+        zipAttributeAndValue(attribute, value).map(EqualNullSafe.tupled)
+      case sources.IsNull(attribute) =>
+        toRef(attribute).map(IsNull)
+      case sources.IsNotNull(attribute) =>
+        toRef(attribute).map(IsNotNull)
+      case sources.In(attribute, values) =>
+        val literals = values.toSeq.flatMap(toLiteral)
+        if (literals.length == values.length) {
+          toRef(attribute).map(In(_, literals))
+        } else {
+          None
+        }
+      case sources.GreaterThan(attribute, value) =>
+        zipAttributeAndValue(attribute, value).map(GreaterThan.tupled)
+      case sources.GreaterThanOrEqual(attribute, value) =>
+        zipAttributeAndValue(attribute, value).map(GreaterThanOrEqual.tupled)
+      case sources.LessThan(attribute, value) =>
+        zipAttributeAndValue(attribute, value).map(LessThan.tupled)
+      case sources.LessThanOrEqual(attribute, value) =>
+        zipAttributeAndValue(attribute, value).map(LessThanOrEqual.tupled)
+      case sources.StringContains(attribute, value) =>
+        zipAttributeAndValue(attribute, value).map(Contains.tupled)
+      case sources.StringStartsWith(attribute, value) =>
+        zipAttributeAndValue(attribute, value).map(StartsWith.tupled)
+      case sources.StringEndsWith(attribute, value) =>
+        zipAttributeAndValue(attribute, value).map(EndsWith.tupled)
+      case sources.AlwaysTrue() =>
+        Some(Literal(true, BooleanType))
+      case sources.AlwaysFalse() =>
+        Some(Literal(false, BooleanType))
+    }
+    translate(filter)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 03cc3cbdf790a..c6a03183ab45e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -24,6 +24,7 @@ import scala.util.control.Exception.allCatch
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.catalyst.util.TimestampFormatter
 import org.apache.spark.sql.types._
 
@@ -32,7 +33,8 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
   private val timestampParser = TimestampFormatter(
     options.timestampFormat,
     options.zoneId,
-    options.locale)
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
 
   private val decimalParser = if (options.locale == Locale.US) {
     // Special handling the default locale for backward compatibility
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 8a91c71f1f145..8892037e03a7d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -95,7 +95,7 @@ class CSVOptions(
     }
   }
 
-  val delimiter = CSVExprUtils.toChar(
+  val delimiter = CSVExprUtils.toDelimiterStr(
     parameters.getOrElse("sep", parameters.getOrElse("delimiter", ",")))
   val parseMode: ParseMode =
     parameters.get("mode").map(ParseMode.fromString).getOrElse(PermissiveMode)
@@ -146,10 +146,10 @@ class CSVOptions(
   // A language tag in IETF BCP 47 format
   val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
 
-  val dateFormat: String = parameters.getOrElse("dateFormat", "uuuu-MM-dd")
+  val dateFormat: String = parameters.getOrElse("dateFormat", DateFormatter.defaultPattern)
 
   val timestampFormat: String =
-    parameters.getOrElse("timestampFormat", "uuuu-MM-dd'T'HH:mm:ss.SSSXXX")
+    parameters.getOrElse("timestampFormat", s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX")
 
   val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index 9ca94501f5c58..00e3d49787db1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -23,6 +23,7 @@ import com.univocity.parsers.csv.CsvWriter
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.types._
 
 class UnivocityGenerator(
@@ -44,8 +45,13 @@ class UnivocityGenerator(
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
     options.zoneId,
-    options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
+  private val dateFormatter = DateFormatter(
+    options.dateFormat,
+    options.zoneId,
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
 
   private def makeConverter(dataType: DataType): ValueConverter = dataType match {
     case DateType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 8456b7d218ead..cd69c21a01976 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -27,6 +27,8 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericInternalRow}
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
+import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -39,15 +41,20 @@ import org.apache.spark.unsafe.types.UTF8String
  * @param requiredSchema The schema of the data that should be output for each row. This should be a
  *                       subset of the columns in dataSchema.
  * @param options Configuration options for a CSV parser.
+ * @param filters The pushdown filters that should be applied to converted values.
  */
 class UnivocityParser(
     dataSchema: StructType,
     requiredSchema: StructType,
-    val options: CSVOptions) extends Logging {
+    val options: CSVOptions,
+    filters: Seq[Filter]) extends Logging {
   require(requiredSchema.toSet.subsetOf(dataSchema.toSet),
     s"requiredSchema (${requiredSchema.catalogString}) should be the subset of " +
       s"dataSchema (${dataSchema.catalogString}).")
 
+  def this(dataSchema: StructType, requiredSchema: StructType, options: CSVOptions) = {
+    this(dataSchema, requiredSchema, options, Seq.empty)
+  }
   def this(schema: StructType, options: CSVOptions) = this(schema, schema, options)
 
   // A `ValueConverter` is responsible for converting the given value to a desired type.
@@ -61,24 +68,34 @@ class UnivocityParser(
   // their positions in the data schema.
   private val parsedSchema = if (options.columnPruning) requiredSchema else dataSchema
 
-  val tokenizer = {
+  val tokenizer: CsvParser = {
     val parserSetting = options.asParserSettings
     // When to-be-parsed schema is shorter than the to-be-read data schema, we let Univocity CSV
     // parser select a sequence of fields for reading by their positions.
-    // if (options.columnPruning && requiredSchema.length < dataSchema.length) {
     if (parsedSchema.length < dataSchema.length) {
       parserSetting.selectIndexes(tokenIndexArr: _*)
     }
     new CsvParser(parserSetting)
   }
 
-  private val row = new GenericInternalRow(requiredSchema.length)
+  // Pre-allocated Some to avoid the overhead of building Some per each-row.
+  private val requiredRow = Some(new GenericInternalRow(requiredSchema.length))
+  // Pre-allocated empty sequence returned when the parsed row cannot pass filters.
+  // We preallocate it avoid unnecessary allocations.
+  private val noRows = None
 
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
     options.zoneId,
-    options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
+  private val dateFormatter = DateFormatter(
+    options.dateFormat,
+    options.zoneId,
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
+
+  private val csvFilters = new CSVFilters(filters, requiredSchema)
 
   // Retrieve the raw record string.
   private def getCurrentInput: UTF8String = {
@@ -166,6 +183,11 @@ class UnivocityParser(
     case _: StringType => (d: String) =>
       nullSafeDatum(d, name, nullable, options)(UTF8String.fromString)
 
+    case CalendarIntervalType => (d: String) =>
+      nullSafeDatum(d, name, nullable, options) { datum =>
+        IntervalUtils.safeStringToInterval(UTF8String.fromString(datum))
+      }
+
     case udt: UserDefinedType[_] =>
       makeConverter(name, udt.sqlType, nullable)
 
@@ -188,19 +210,21 @@ class UnivocityParser(
     }
   }
 
-  private val doParse = if (requiredSchema.nonEmpty) {
-    (input: String) => convert(tokenizer.parseLine(input))
-  } else {
-    // If `columnPruning` enabled and partition attributes scanned only,
-    // `schema` gets empty.
-    (_: String) => InternalRow.empty
-  }
-
   /**
    * Parses a single CSV string and turns it into either one resulting row or no row (if the
    * the record is malformed).
    */
-  def parse(input: String): InternalRow = doParse(input)
+  val parse: String => Option[InternalRow] = {
+    // This is intentionally a val to create a function once and reuse.
+    if (options.columnPruning && requiredSchema.isEmpty) {
+      // If `columnPruning` enabled and partition attributes scanned only,
+      // `schema` gets empty.
+      (_: String) => Some(InternalRow.empty)
+    } else {
+      // parse if the columnPruning is disabled or requiredSchema is nonEmpty
+      (input: String) => convert(tokenizer.parseLine(input))
+    }
+  }
 
   private val getToken = if (options.columnPruning) {
     (tokens: Array[String], index: Int) => tokens(index)
@@ -208,54 +232,53 @@ class UnivocityParser(
     (tokens: Array[String], index: Int) => tokens(tokenIndexArr(index))
   }
 
-  private def convert(tokens: Array[String]): InternalRow = {
+  private def convert(tokens: Array[String]): Option[InternalRow] = {
     if (tokens == null) {
       throw BadRecordException(
         () => getCurrentInput,
         () => None,
         new RuntimeException("Malformed CSV record"))
-    } else if (tokens.length != parsedSchema.length) {
+    }
+
+    var badRecordException: Option[Throwable] = if (tokens.length != parsedSchema.length) {
       // If the number of tokens doesn't match the schema, we should treat it as a malformed record.
-      // However, we still have chance to parse some of the tokens, by adding extra null tokens in
-      // the tail if the number is smaller, or by dropping extra tokens if the number is larger.
-      val checkedTokens = if (parsedSchema.length > tokens.length) {
-        tokens ++ new Array[String](parsedSchema.length - tokens.length)
-      } else {
-        tokens.take(parsedSchema.length)
-      }
-      def getPartialResult(): Option[InternalRow] = {
-        try {
-          Some(convert(checkedTokens))
-        } catch {
-          case _: BadRecordException => None
-        }
-      }
-      // For records with less or more tokens than the schema, tries to return partial results
-      // if possible.
-      throw BadRecordException(
-        () => getCurrentInput,
-        () => getPartialResult(),
-        new RuntimeException("Malformed CSV record"))
-    } else {
-      // When the length of the returned tokens is identical to the length of the parsed schema,
-      // we just need to convert the tokens that correspond to the required columns.
-      var badRecordException: Option[Throwable] = None
-      var i = 0
-      while (i < requiredSchema.length) {
-        try {
+      // However, we still have chance to parse some of the tokens. It continues to parses the
+      // tokens normally and sets null when `ArrayIndexOutOfBoundsException` occurs for missing
+      // tokens.
+      Some(new RuntimeException("Malformed CSV record"))
+    } else None
+    // When the length of the returned tokens is identical to the length of the parsed schema,
+    // we just need to:
+    //  1. Convert the tokens that correspond to the required schema.
+    //  2. Apply the pushdown filters to `requiredRow`.
+    var i = 0
+    val row = requiredRow.get
+    var skipRow = false
+    while (i < requiredSchema.length) {
+      try {
+        if (skipRow) {
+          row.setNullAt(i)
+        } else {
           row(i) = valueConverters(i).apply(getToken(tokens, i))
-        } catch {
-          case NonFatal(e) =>
-            badRecordException = badRecordException.orElse(Some(e))
-            row.setNullAt(i)
+          if (csvFilters.skipRow(row, i)) {
+            skipRow = true
+          }
         }
-        i += 1
+      } catch {
+        case NonFatal(e) =>
+          badRecordException = badRecordException.orElse(Some(e))
+          row.setNullAt(i)
       }
-
-      if (badRecordException.isEmpty) {
-        row
+      i += 1
+    }
+    if (skipRow) {
+      noRows
+    } else {
+      if (badRecordException.isDefined) {
+        throw BadRecordException(
+          () => getCurrentInput, () => requiredRow.headOption, badRecordException.get)
       } else {
-        throw BadRecordException(() => getCurrentInput, () => Some(row), badRecordException.get)
+        requiredRow
       }
     }
   }
@@ -287,7 +310,7 @@ private[sql] object UnivocityParser {
       schema: StructType): Iterator[InternalRow] = {
     val tokenizer = parser.tokenizer
     val safeParser = new FailureSafeParser[Array[String]](
-      input => Seq(parser.convert(input)),
+      input => parser.convert(input),
       parser.options.parseMode,
       schema,
       parser.options.columnNameOfCorruptRecord)
@@ -340,7 +363,7 @@ private[sql] object UnivocityParser {
     val filteredLines: Iterator[String] = CSVExprUtils.filterCommentAndEmpty(lines, options)
 
     val safeParser = new FailureSafeParser[String](
-      input => Seq(parser.parse(input)),
+      input => parser.parse(input),
       parser.options.parseMode,
       schema,
       parser.options.columnNameOfCorruptRecord)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 796043fff665e..b4a8bafe22dfb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.sql._
 import org.apache.spark.sql.types._
 
 /**
@@ -65,6 +64,7 @@ package object dsl {
   trait ImplicitOperators {
     def expr: Expression
 
+    def unary_+ : Expression = UnaryPositive(expr)
     def unary_- : Expression = UnaryMinus(expr)
     def unary_! : Predicate = Not(expr)
     def unary_~ : Expression = BitwiseNot(expr)
@@ -98,7 +98,8 @@ package object dsl {
       case _ => In(expr, list)
     }
 
-    def like(other: Expression): Expression = Like(expr, other)
+    def like(other: Expression, escapeChar: Char = '\\'): Expression =
+      Like(expr, other, escapeChar)
     def rlike(other: Expression): Expression = RLike(expr, other)
     def contains(other: Expression): Expression = Contains(expr, other)
     def startsWith(other: Expression): Expression = StartsWith(expr, other)
@@ -115,7 +116,13 @@ package object dsl {
     def getField(fieldName: String): UnresolvedExtractValue =
       UnresolvedExtractValue(expr, Literal(fieldName))
 
-    def cast(to: DataType): Expression = Cast(expr, to)
+    def cast(to: DataType): Expression = {
+      if (expr.resolved && expr.dataType.sameType(to)) {
+        expr
+      } else {
+        Cast(expr, to)
+      }
+    }
 
     def asc: SortOrder = SortOrder(expr, Ascending)
     def asc_nullsLast: SortOrder = SortOrder(expr, Ascending, NullsLast, Set.empty)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index afe8a23f8f150..765018f07d87a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -114,7 +114,7 @@ object RowEncoder {
         d,
         "fromDecimal",
         inputObject :: Nil,
-        returnNullable = false), d, SQLConf.get.decimalOperationsNullOnOverflow)
+        returnNullable = false), d, !SQLConf.get.ansiEnabled)
 
     case StringType => createSerializerForString(inputObject)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index baa98171e265f..05b4fbef2b697 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -23,11 +23,12 @@ import java.util.Locale
 import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.catalyst.{InternalRow, WalkedTypePath}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -106,7 +107,7 @@ object Cast {
    * * Cast.castToTimestamp
    */
   def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
-    case (StringType, TimestampType) => true
+    case (StringType, TimestampType | DateType) => true
     case (DateType, TimestampType) => true
     case (TimestampType, StringType) => true
     case (TimestampType, DateType) => true
@@ -165,6 +166,7 @@ object Cast {
    */
   def canANSIStoreAssign(from: DataType, to: DataType): Boolean = (from, to) match {
     case _ if from == to => true
+    case (NullType, _) => true
     case (_: NumericType, _: NumericType) => true
     case (_: AtomicType, StringType) => true
     case (_: CalendarIntervalType, StringType) => true
@@ -243,23 +245,11 @@ object Cast {
   }
 }
 
-/**
- * Cast the child expression to the target data type.
- *
- * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
- * session local timezone by an analyzer [[ResolveTimeZone]].
- */
-@ExpressionDescription(
-  usage = "_FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_('10' as int);
-       10
-  """)
-case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
-  extends UnaryExpression with TimeZoneAwareExpression with NullIntolerant {
+abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression with NullIntolerant {
+
+  def child: Expression
 
-  def this(child: Expression, dataType: DataType) = this(child, dataType, None)
+  def dataType: DataType
 
   override def toString: String = s"cast($child as ${dataType.simpleString})"
 
@@ -274,8 +264,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   override def nullable: Boolean = Cast.forceNullable(child.dataType, dataType) || child.nullable
 
-  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
-    copy(timeZoneId = Option(timeZoneId))
+  protected def ansiEnabled: Boolean
 
   // When this cast involves TimeZone, it's only resolved if the timeZoneId is set;
   // Otherwise behave like Expression.resolved.
@@ -287,12 +276,13 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // [[func]] assumes the input is no longer null because eval already does the null check.
   @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
 
-  private lazy val dateFormatter = DateFormatter()
+  private lazy val dateFormatter = DateFormatter(zoneId)
   private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
-  private val failOnIntegralTypeOverflow = SQLConf.get.failOnIntegralTypeOverflow
 
   // UDFToString
   private[this] def castToString(from: DataType): Any => Any = from match {
+    case CalendarIntervalType =>
+      buildCast[CalendarInterval](_, i => UTF8String.fromString(i.toString))
     case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes)
     case DateType => buildCast[Int](_, d => UTF8String.fromString(dateFormatter.format(d)))
     case TimestampType => buildCast[Long](_,
@@ -449,7 +439,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   }
 
   private[this] def decimalToTimestamp(d: Decimal): Long = {
-    (d.toBigDecimal * MICROS_PER_SECOND).longValue()
+    (d.toBigDecimal * MICROS_PER_SECOND).longValue
   }
   private[this] def doubleToTimestamp(d: Double): Any = {
     if (d.isNaN || d.isInfinite) null else (d * MICROS_PER_SECOND).toLong
@@ -469,7 +459,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // DateConverter
   private[this] def castToDate(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull)
+      buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull)
     case TimestampType =>
       // throw valid precision more than seconds, according to Hive.
       // Timestamp.nanos is in 0 to 999,999,999, no more than a second.
@@ -479,11 +469,13 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // IntervalConverter
   private[this] def castToInterval(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => CalendarInterval.fromString(s.toString))
+      buildCast[UTF8String](_, s => IntervalUtils.safeStringToInterval(s))
   }
 
   // LongConverter
   private[this] def castToLong(from: DataType): Any => Any = from match {
+    case StringType if ansiEnabled =>
+      buildCast[UTF8String](_, _.toLongExact())
     case StringType =>
       val result = new LongWrapper()
       buildCast[UTF8String](_, s => if (s.toLong(result)) result.value else null)
@@ -493,7 +485,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       buildCast[Int](_, d => null)
     case TimestampType =>
       buildCast[Long](_, t => timestampToLong(t))
-    case x: NumericType if failOnIntegralTypeOverflow =>
+    case x: NumericType if ansiEnabled =>
       b => x.exactNumeric.asInstanceOf[Numeric[Any]].toLong(b)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toLong(b)
@@ -501,6 +493,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   // IntConverter
   private[this] def castToInt(from: DataType): Any => Any = from match {
+    case StringType if ansiEnabled =>
+      buildCast[UTF8String](_, _.toIntExact())
     case StringType =>
       val result = new IntWrapper()
       buildCast[UTF8String](_, s => if (s.toInt(result)) result.value else null)
@@ -508,11 +502,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       buildCast[Boolean](_, b => if (b) 1 else 0)
     case DateType =>
       buildCast[Int](_, d => null)
-    case TimestampType if failOnIntegralTypeOverflow =>
+    case TimestampType if ansiEnabled =>
       buildCast[Long](_, t => LongExactNumeric.toInt(timestampToLong(t)))
     case TimestampType =>
       buildCast[Long](_, t => timestampToLong(t).toInt)
-    case x: NumericType if failOnIntegralTypeOverflow =>
+    case x: NumericType if ansiEnabled =>
       b => x.exactNumeric.asInstanceOf[Numeric[Any]].toInt(b)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b)
@@ -520,6 +514,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   // ShortConverter
   private[this] def castToShort(from: DataType): Any => Any = from match {
+    case StringType if ansiEnabled =>
+      buildCast[UTF8String](_, _.toShortExact())
     case StringType =>
       val result = new IntWrapper()
       buildCast[UTF8String](_, s => if (s.toShort(result)) {
@@ -531,29 +527,29 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       buildCast[Boolean](_, b => if (b) 1.toShort else 0.toShort)
     case DateType =>
       buildCast[Int](_, d => null)
-    case TimestampType if failOnIntegralTypeOverflow =>
+    case TimestampType if ansiEnabled =>
       buildCast[Long](_, t => {
         val longValue = timestampToLong(t)
         if (longValue == longValue.toShort) {
           longValue.toShort
         } else {
-          throw new ArithmeticException(s"Casting $t to short causes overflow.")
+          throw new ArithmeticException(s"Casting $t to short causes overflow")
         }
       })
     case TimestampType =>
       buildCast[Long](_, t => timestampToLong(t).toShort)
-    case x: NumericType if failOnIntegralTypeOverflow =>
+    case x: NumericType if ansiEnabled =>
       b =>
         val intValue = try {
           x.exactNumeric.asInstanceOf[Numeric[Any]].toInt(b)
         } catch {
           case _: ArithmeticException =>
-            throw new ArithmeticException(s"Casting $b to short causes overflow.")
+            throw new ArithmeticException(s"Casting $b to short causes overflow")
         }
         if (intValue == intValue.toShort) {
           intValue.toShort
         } else {
-          throw new ArithmeticException(s"Casting $b to short causes overflow.")
+          throw new ArithmeticException(s"Casting $b to short causes overflow")
         }
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toShort
@@ -561,6 +557,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   // ByteConverter
   private[this] def castToByte(from: DataType): Any => Any = from match {
+    case StringType if ansiEnabled =>
+      buildCast[UTF8String](_, _.toByteExact())
     case StringType =>
       val result = new IntWrapper()
       buildCast[UTF8String](_, s => if (s.toByte(result)) {
@@ -572,41 +570,39 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       buildCast[Boolean](_, b => if (b) 1.toByte else 0.toByte)
     case DateType =>
       buildCast[Int](_, d => null)
-    case TimestampType if failOnIntegralTypeOverflow =>
+    case TimestampType if ansiEnabled =>
       buildCast[Long](_, t => {
         val longValue = timestampToLong(t)
         if (longValue == longValue.toByte) {
           longValue.toByte
         } else {
-          throw new ArithmeticException(s"Casting $t to byte causes overflow.")
+          throw new ArithmeticException(s"Casting $t to byte causes overflow")
         }
       })
     case TimestampType =>
       buildCast[Long](_, t => timestampToLong(t).toByte)
-    case x: NumericType if failOnIntegralTypeOverflow =>
+    case x: NumericType if ansiEnabled =>
       b =>
         val intValue = try {
           x.exactNumeric.asInstanceOf[Numeric[Any]].toInt(b)
         } catch {
           case _: ArithmeticException =>
-            throw new ArithmeticException(s"Casting $b to byte causes overflow.")
+            throw new ArithmeticException(s"Casting $b to byte causes overflow")
         }
         if (intValue == intValue.toByte) {
           intValue.toByte
         } else {
-          throw new ArithmeticException(s"Casting $b to byte causes overflow.")
+          throw new ArithmeticException(s"Casting $b to byte causes overflow")
         }
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toByte
   }
 
-  private val nullOnOverflow = SQLConf.get.decimalOperationsNullOnOverflow
-
   /**
    * Change the precision / scale in a given decimal to those set in `decimalType` (if any),
    * modifying `value` in-place and returning it if successful. If an overflow occurs, it
    * either returns null or throws an exception according to the value set for
-   * `spark.sql.decimalOperations.nullOnOverflow`.
+   * `spark.sql.ansi.enabled`.
    *
    * NOTE: this modifies `value` in-place, so don't call it on external data.
    */
@@ -614,7 +610,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     if (value.changePrecision(decimalType.precision, decimalType.scale)) {
       value
     } else {
-      if (nullOnOverflow) {
+      if (!ansiEnabled) {
         null
       } else {
         throw new ArithmeticException(s"${value.toDebugString} cannot be represented as " +
@@ -625,20 +621,27 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   /**
    * Create new `Decimal` with precision and scale given in `decimalType` (if any).
-   * If overflow occurs, if `spark.sql.decimalOperations.nullOnOverflow` is true, null is returned;
+   * If overflow occurs, if `spark.sql.ansi.enabled` is false, null is returned;
    * otherwise, an `ArithmeticException` is thrown.
    */
   private[this] def toPrecision(value: Decimal, decimalType: DecimalType): Decimal =
     value.toPrecision(
-      decimalType.precision, decimalType.scale, Decimal.ROUND_HALF_UP, nullOnOverflow)
+      decimalType.precision, decimalType.scale, Decimal.ROUND_HALF_UP, !ansiEnabled)
 
 
   private[this] def castToDecimal(from: DataType, target: DecimalType): Any => Any = from match {
     case StringType =>
       buildCast[UTF8String](_, s => try {
-        changePrecision(Decimal(new JavaBigDecimal(s.toString)), target)
+        // According the benchmark test,  `s.toString.trim` is much faster than `s.trim.toString`.
+        // Please refer to https://github.com/apache/spark/pull/26640
+        changePrecision(Decimal(new JavaBigDecimal(s.toString.trim)), target)
       } catch {
-        case _: NumberFormatException => null
+        case _: NumberFormatException =>
+          if (ansiEnabled) {
+            throw new NumberFormatException(s"invalid input syntax for type numeric: $s")
+          } else {
+            null
+          }
       })
     case BooleanType =>
       buildCast[Boolean](_, b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target))
@@ -666,7 +669,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
         val doubleStr = s.toString
         try doubleStr.toDouble catch {
           case _: NumberFormatException =>
-            Cast.processFloatingPointSpecialLiterals(doubleStr, false)
+            val d = Cast.processFloatingPointSpecialLiterals(doubleStr, false)
+            if(ansiEnabled && d == null) {
+              throw new NumberFormatException(s"invalid input syntax for type numeric: $s")
+            } else {
+              d
+            }
         }
       })
     case BooleanType =>
@@ -686,7 +694,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
         val floatStr = s.toString
         try floatStr.toFloat catch {
           case _: NumberFormatException =>
-            Cast.processFloatingPointSpecialLiterals(floatStr, true)
+            val f = Cast.processFloatingPointSpecialLiterals(floatStr, true)
+            if (ansiEnabled && f == null) {
+              throw new NumberFormatException(s"invalid input syntax for type numeric: $s")
+            } else {
+              f
+            }
         }
       })
     case BooleanType =>
@@ -999,6 +1012,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
           timestampFormatter.getClass)
         (c, evPrim, evNull) => code"""$evPrim = UTF8String.fromString(
           org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString($tf, $c));"""
+      case CalendarIntervalType =>
+        (c, evPrim, _) => code"""$evPrim = UTF8String.fromString($c.toString());"""
       case ArrayType(et, _) =>
         (c, evPrim, evNull) => {
           val buffer = ctx.freshVariable("buffer", classOf[UTF8StringBuilder])
@@ -1056,28 +1071,35 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   private[this] def castToDateCode(
       from: DataType,
-      ctx: CodegenContext): CastFunction = from match {
-    case StringType =>
-      val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]])
-      (c, evPrim, evNull) => code"""
-        scala.Option<Integer> $intOpt =
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c);
-        if ($intOpt.isDefined()) {
-          $evPrim = ((Integer) $intOpt.get()).intValue();
-        } else {
-          $evNull = true;
-        }
-       """
-    case TimestampType =>
+      ctx: CodegenContext): CastFunction = {
+    def getZoneId() = {
       val zoneIdClass = classOf[ZoneId]
-      val zid = JavaCode.global(
+      JavaCode.global(
         ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName),
         zoneIdClass)
-      (c, evPrim, evNull) =>
-        code"""$evPrim =
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
-    case _ =>
-      (c, evPrim, evNull) => code"$evNull = true;"
+    }
+    from match {
+      case StringType =>
+        val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]])
+        val zid = getZoneId()
+        (c, evPrim, evNull) =>
+          code"""
+          scala.Option<Integer> $intOpt =
+            org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid);
+          if ($intOpt.isDefined()) {
+            $evPrim = ((Integer) $intOpt.get()).intValue();
+          } else {
+            $evNull = true;
+          }
+         """
+      case TimestampType =>
+        val zid = getZoneId()
+        (c, evPrim, evNull) =>
+          code"""$evPrim =
+            org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
+      case _ =>
+        (c, evPrim, evNull) => code"$evNull = true;"
+    }
   }
 
   private[this] def changePrecision(d: ExprValue, decimalType: DecimalType,
@@ -1088,7 +1110,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
          |$evPrim = $d;
        """.stripMargin
     } else {
-      val overflowCode = if (nullOnOverflow) {
+      val overflowCode = if (!ansiEnabled) {
         s"$evNull = true;"
       } else {
         s"""
@@ -1115,12 +1137,17 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     from match {
       case StringType =>
         (c, evPrim, evNull) =>
+          val handleException = if (ansiEnabled) {
+            s"""throw new NumberFormatException("invalid input syntax for type numeric: $c");"""
+          } else {
+            s"$evNull =true;"
+          }
           code"""
             try {
-              Decimal $tmp = Decimal.apply(new java.math.BigDecimal($c.toString()));
+              Decimal $tmp = Decimal.apply(new java.math.BigDecimal($c.toString().trim()));
               ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)}
             } catch (java.lang.NumberFormatException e) {
-              $evNull = true;
+              $handleException
             }
           """
       case BooleanType =>
@@ -1221,8 +1248,9 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   private[this] def castToIntervalCode(from: DataType): CastFunction = from match {
     case StringType =>
+      val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
       (c, evPrim, evNull) =>
-        code"""$evPrim = CalendarInterval.fromString($c.toString());
+        code"""$evPrim = $util.safeStringToInterval($c);
            if(${evPrim} == null) {
              ${evNull} = true;
            }
@@ -1267,7 +1295,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   private[this] def castTimestampToIntegralTypeCode(
       ctx: CodegenContext,
       integralType: String): CastFunction = {
-    if (failOnIntegralTypeOverflow) {
+    if (ansiEnabled) {
       val longValue = ctx.freshName("longValue")
       (c, evPrim, evNull) =>
         code"""
@@ -1275,7 +1303,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
           if ($longValue == ($integralType) $longValue) {
             $evPrim = ($integralType) $longValue;
           } else {
-            throw new ArithmeticException("Casting $c to $integralType causes overflow");
+            throw new ArithmeticException("Casting " + $c + " to $integralType causes overflow");
           }
         """
     } else {
@@ -1286,7 +1314,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   private[this] def castDecimalToIntegralTypeCode(
       ctx: CodegenContext,
       integralType: String): CastFunction = {
-    if (failOnIntegralTypeOverflow) {
+    if (ansiEnabled) {
       (c, evPrim, evNull) => code"$evPrim = $c.roundTo${integralType.capitalize}();"
     } else {
       (c, evPrim, evNull) => code"$evPrim = $c.to${integralType.capitalize}();"
@@ -1294,13 +1322,13 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   }
 
   private[this] def castIntegralTypeToIntegralTypeExactCode(integralType: String): CastFunction = {
-    assert(failOnIntegralTypeOverflow)
+    assert(ansiEnabled)
     (c, evPrim, evNull) =>
       code"""
         if ($c == ($integralType) $c) {
           $evPrim = ($integralType) $c;
         } else {
-          throw new ArithmeticException("Casting $c to $integralType causes overflow");
+          throw new ArithmeticException("Casting " + $c + " to $integralType causes overflow");
         }
       """
   }
@@ -1322,7 +1350,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   private[this] def castFractionToIntegralTypeCode(
       fractionType: String,
       integralType: String): CastFunction = {
-    assert(failOnIntegralTypeOverflow)
+    assert(ansiEnabled)
     val (min, max) = lowerAndUpperBound(fractionType, integralType)
     val mathClass = classOf[Math].getName
     // When casting floating values to integral types, Spark uses the method `Numeric.toInt`
@@ -1335,12 +1363,14 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
         if ($mathClass.floor($c) <= $max && $mathClass.ceil($c) >= $min) {
           $evPrim = ($integralType) $c;
         } else {
-          throw new ArithmeticException("Casting $c to $integralType causes overflow");
+          throw new ArithmeticException("Casting " + $c + " to $integralType causes overflow");
         }
       """
   }
 
   private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match {
+    case StringType if ansiEnabled =>
+      (c, evPrim, evNull) => code"$evPrim = $c.toByteExact();"
     case StringType =>
       val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper])
       (c, evPrim, evNull) =>
@@ -1359,11 +1389,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "byte")
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "byte")
-    case _: ShortType | _: IntegerType | _: LongType if failOnIntegralTypeOverflow =>
+    case _: ShortType | _: IntegerType | _: LongType if ansiEnabled =>
       castIntegralTypeToIntegralTypeExactCode("byte")
-    case _: FloatType if failOnIntegralTypeOverflow =>
+    case _: FloatType if ansiEnabled =>
       castFractionToIntegralTypeCode("float", "byte")
-    case _: DoubleType if failOnIntegralTypeOverflow =>
+    case _: DoubleType if ansiEnabled =>
       castFractionToIntegralTypeCode("double", "byte")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (byte) $c;"
@@ -1372,6 +1402,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   private[this] def castToShortCode(
       from: DataType,
       ctx: CodegenContext): CastFunction = from match {
+    case StringType if ansiEnabled =>
+      (c, evPrim, evNull) => code"$evPrim = $c.toShortExact();"
     case StringType =>
       val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper])
       (c, evPrim, evNull) =>
@@ -1390,17 +1422,19 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "short")
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "short")
-    case _: IntegerType | _: LongType if failOnIntegralTypeOverflow =>
+    case _: IntegerType | _: LongType if ansiEnabled =>
       castIntegralTypeToIntegralTypeExactCode("short")
-    case _: FloatType if failOnIntegralTypeOverflow =>
+    case _: FloatType if ansiEnabled =>
       castFractionToIntegralTypeCode("float", "short")
-    case _: DoubleType if failOnIntegralTypeOverflow =>
+    case _: DoubleType if ansiEnabled =>
       castFractionToIntegralTypeCode("double", "short")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (short) $c;"
   }
 
   private[this] def castToIntCode(from: DataType, ctx: CodegenContext): CastFunction = from match {
+    case StringType if ansiEnabled =>
+      (c, evPrim, evNull) => code"$evPrim = $c.toIntExact();"
     case StringType =>
       val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper])
       (c, evPrim, evNull) =>
@@ -1419,19 +1453,20 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "int")
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "int")
-    case _: LongType if failOnIntegralTypeOverflow => castIntegralTypeToIntegralTypeExactCode("int")
-    case _: FloatType if failOnIntegralTypeOverflow =>
+    case _: LongType if ansiEnabled => castIntegralTypeToIntegralTypeExactCode("int")
+    case _: FloatType if ansiEnabled =>
       castFractionToIntegralTypeCode("float", "int")
-    case _: DoubleType if failOnIntegralTypeOverflow =>
+    case _: DoubleType if ansiEnabled =>
       castFractionToIntegralTypeCode("double", "int")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (int) $c;"
   }
 
   private[this] def castToLongCode(from: DataType, ctx: CodegenContext): CastFunction = from match {
+    case StringType if ansiEnabled =>
+      (c, evPrim, evNull) => code"$evPrim = $c.toLongExact();"
     case StringType =>
       val wrapper = ctx.freshVariable("longWrapper", classOf[UTF8String.LongWrapper])
-
       (c, evPrim, evNull) =>
         code"""
           UTF8String.LongWrapper $wrapper = new UTF8String.LongWrapper();
@@ -1449,9 +1484,9 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case TimestampType =>
       (c, evPrim, evNull) => code"$evPrim = (long) ${timestampToLongCode(c)};"
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "long")
-    case _: FloatType if failOnIntegralTypeOverflow =>
+    case _: FloatType if ansiEnabled =>
       castFractionToIntegralTypeCode("float", "long")
-    case _: DoubleType if failOnIntegralTypeOverflow =>
+    case _: DoubleType if ansiEnabled =>
       castFractionToIntegralTypeCode("double", "long")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (long) $c;"
@@ -1462,6 +1497,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       case StringType =>
         val floatStr = ctx.freshVariable("floatStr", StringType)
         (c, evPrim, evNull) =>
+          val handleNull = if (ansiEnabled) {
+            s"""throw new NumberFormatException("invalid input syntax for type numeric: $c");"""
+          } else {
+            s"$evNull = true;"
+          }
           code"""
           final String $floatStr = $c.toString();
           try {
@@ -1469,7 +1509,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
           } catch (java.lang.NumberFormatException e) {
             final Float f = (Float) Cast.processFloatingPointSpecialLiterals($floatStr, true);
             if (f == null) {
-              $evNull = true;
+              $handleNull
             } else {
               $evPrim = f.floatValue();
             }
@@ -1493,6 +1533,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       case StringType =>
         val doubleStr = ctx.freshVariable("doubleStr", StringType)
         (c, evPrim, evNull) =>
+          val handleNull = if (ansiEnabled) {
+            s"""throw new NumberFormatException("invalid input syntax for type numeric: $c");"""
+          } else {
+            s"$evNull = true;"
+          }
           code"""
           final String $doubleStr = $c.toString();
           try {
@@ -1500,7 +1545,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
           } catch (java.lang.NumberFormatException e) {
             final Double d = (Double) Cast.processFloatingPointSpecialLiterals($doubleStr, false);
             if (d == null) {
-              $evNull = true;
+              $handleNull
             } else {
               $evPrim = d.doubleValue();
             }
@@ -1640,6 +1685,43 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   }
 }
 
+/**
+ * Cast the child expression to the target data type.
+ *
+ * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
+ * session local timezone by an analyzer [[ResolveTimeZone]].
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('10' as int);
+       10
+  """)
+case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
+  extends CastBase {
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  override protected val ansiEnabled: Boolean = SQLConf.get.ansiEnabled
+}
+
+/**
+ * Cast the child expression to the target data type as per ANSI SQL standard.
+ * A runtime exception will be thrown on casting failure such as converting an out-of-range value
+ * to an integral type.
+ *
+ * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
+ * session local timezone by an analyzer [[ResolveTimeZone]].
+ */
+case class AnsiCast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
+  extends CastBase {
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  override protected val ansiEnabled: Boolean = true
+}
+
 /**
  * Cast the child expression to the target data type, but will throw error if the cast might
  * truncate, e.g. long -> int, timestamp -> data.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index 72ff9361d8f75..a32052ce121df 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.mutable
 
+import org.apache.spark.TaskContext
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable
 
@@ -72,7 +73,10 @@ class EquivalentExpressions {
     val skip = expr.isInstanceOf[LeafExpression] ||
       // `LambdaVariable` is usually used as a loop variable, which can't be evaluated ahead of the
       // loop. So we can't evaluate sub-expressions containing `LambdaVariable` at the beginning.
-      expr.find(_.isInstanceOf[LambdaVariable]).isDefined
+      expr.find(_.isInstanceOf[LambdaVariable]).isDefined ||
+      // `PlanExpression` wraps query plan. To compare query plans of `PlanExpression` on executor,
+      // can cause error like NPE.
+      (expr.isInstanceOf[PlanExpression[_]] && TaskContext.get != null)
 
     // There are some special expressions that we should not recurse into all of its children.
     //   1. CodegenFallback: it's children will not be used to generate code (call eval() instead)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
index 5c8aa4e2e9d83..a2daec0b1ade1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
@@ -50,11 +50,11 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
   def currentValue: InternalRow = mutableRow
 
   override def target(row: InternalRow): MutableProjection = {
-    // If `mutableRow` is `UnsafeRow`, `MutableProjection` accepts fixed-length types only
+    // If `mutableRow` is `UnsafeRow`, `MutableProjection` accepts mutable types only
     require(!row.isInstanceOf[UnsafeRow] ||
-      validExprs.forall { case (e, _) => UnsafeRow.isFixedLength(e.dataType) },
+      validExprs.forall { case (e, _) => UnsafeRow.isMutable(e.dataType) },
       "MutableProjection cannot use UnsafeRow for output data types: " +
-        validExprs.map(_._1.dataType).filterNot(UnsafeRow.isFixedLength)
+        validExprs.map(_._1.dataType).filterNot(UnsafeRow.isMutable)
           .map(_.catalogString).mkString(", "))
     mutableRow = row
     this
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
index 55a5bd380859e..39a16e917c4a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
@@ -132,7 +132,7 @@ object InterpretedUnsafeProjection {
       dt: DataType,
       nullable: Boolean): (SpecializedGetters, Int) => Unit = {
 
-    // Create the the basic writer.
+    // Create the basic writer.
     val unsafeWriter: (SpecializedGetters, Int) => Unit = dt match {
       case BooleanType =>
         (v, i) => writer.write(i, v.getBoolean(i))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
index eaaf94baac216..b4a85e3e50bec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -89,14 +89,14 @@ object MutableProjection
   }
 
   /**
-   * Returns an MutableProjection for given sequence of bound Expressions.
+   * Returns a MutableProjection for given sequence of bound Expressions.
    */
   def create(exprs: Seq[Expression]): MutableProjection = {
     createObject(exprs)
   }
 
   /**
-   * Returns an MutableProjection for given sequence of Expressions, which will be bound to
+   * Returns a MutableProjection for given sequence of Expressions, which will be bound to
    * `inputSchema`.
    */
   def create(exprs: Seq[Expression], inputSchema: Seq[Attribute]): MutableProjection = {
@@ -127,12 +127,6 @@ object UnsafeProjection
     InterpretedUnsafeProjection.createProjection(in)
   }
 
-  protected def toUnsafeExprs(exprs: Seq[Expression]): Seq[Expression] = {
-    exprs.map(_ transform {
-      case CreateNamedStruct(children) => CreateNamedStructUnsafe(children)
-    })
-  }
-
   /**
    * Returns an UnsafeProjection for given StructType.
    *
@@ -153,7 +147,7 @@ object UnsafeProjection
    * Returns an UnsafeProjection for given sequence of bound Expressions.
    */
   def create(exprs: Seq[Expression]): UnsafeProjection = {
-    createObject(toUnsafeExprs(exprs))
+    createObject(exprs)
   }
 
   def create(expr: Expression): UnsafeProjection = create(Seq(expr))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
index 75feaf670c84a..159f90995c1fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
@@ -221,7 +221,7 @@ final class SpecificInternalRow(val values: Array[MutableValue]) extends BaseGen
 
   override protected def genericGet(i: Int): Any = values(i).boxed
 
-  override def update(ordinal: Int, value: Any) {
+  override def update(ordinal: Int, value: Any): Unit = {
     if (value == null) {
       setNullAt(ordinal)
     } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
index 9aae678deb4bc..caacb71814f17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
@@ -22,8 +22,10 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY
+import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.UTF8String
 
 case class TimeWindow(
     timeColumn: Expression,
@@ -102,12 +104,12 @@ object TimeWindow {
    *         precision.
    */
   private def getIntervalInMicroSeconds(interval: String): Long = {
-    val cal = CalendarInterval.fromCaseInsensitiveString(interval)
-    if (cal.months > 0) {
+    val cal = IntervalUtils.stringToInterval(UTF8String.fromString(interval))
+    if (cal.months != 0) {
       throw new IllegalArgumentException(
         s"Intervals greater than a month is not supported ($interval).")
     }
-    cal.microseconds
+    cal.days * MICROS_PER_DAY + cal.microseconds
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index ea0ed2e8fa11b..f8060956df875 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -22,7 +22,7 @@ import java.nio.ByteBuffer
 import com.google.common.primitives.{Doubles, Ints, Longs}
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile.PercentileDigest
@@ -83,19 +83,20 @@ case class ApproximatePercentile(
   }
 
   // Mark as lazy so that accuracyExpression is not evaluated during tree transformation.
-  private lazy val accuracy: Int = accuracyExpression.eval().asInstanceOf[Int]
+  private lazy val accuracy: Long = accuracyExpression.eval().asInstanceOf[Number].longValue
 
   override def inputTypes: Seq[AbstractDataType] = {
     // Support NumericType, DateType and TimestampType since their internal types are all numeric,
     // and can be easily cast to double for processing.
     Seq(TypeCollection(NumericType, DateType, TimestampType),
-      TypeCollection(DoubleType, ArrayType(DoubleType)), IntegerType)
+      TypeCollection(DoubleType, ArrayType(DoubleType, containsNull = false)), IntegralType)
   }
 
   // Mark as lazy so that percentageExpression is not evaluated during tree transformation.
-  private lazy val (returnPercentileArray: Boolean, percentages: Array[Double]) =
+  private lazy val (returnPercentileArray, percentages) =
     percentageExpression.eval() match {
       // Rule ImplicitTypeCasts can cast other numeric types to double
+      case null => (false, null)
       case num: Double => (false, Array(num))
       case arrayData: ArrayData => (true, arrayData.toDoubleArray())
     }
@@ -106,9 +107,11 @@ case class ApproximatePercentile(
       defaultCheck
     } else if (!percentageExpression.foldable || !accuracyExpression.foldable) {
       TypeCheckFailure(s"The accuracy or percentage provided must be a constant literal")
-    } else if (accuracy <= 0) {
-      TypeCheckFailure(
-        s"The accuracy provided must be a positive integer literal (current value = $accuracy)")
+    } else if (accuracy <= 0 || accuracy > Int.MaxValue) {
+      TypeCheckFailure(s"The accuracy provided must be a literal between (0, ${Int.MaxValue}]" +
+        s" (current value = $accuracy)")
+    } else if (percentages == null) {
+      TypeCheckFailure("Percentage value must not be null")
     } else if (percentages.exists(percentage => percentage < 0.0D || percentage > 1.0D)) {
       TypeCheckFailure(
         s"All percentage values must be between 0.0 and 1.0 " +
@@ -185,7 +188,8 @@ case class ApproximatePercentile(
     if (returnPercentileArray) ArrayType(child.dataType, false) else child.dataType
   }
 
-  override def prettyName: String = "percentile_approx"
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("percentile_approx")
 
   override def serialize(obj: PercentileDigest): Array[Byte] = {
     ApproximatePercentile.serializer.serialize(obj)
@@ -246,7 +250,7 @@ object ApproximatePercentile {
     def getPercentiles(percentages: Array[Double]): Array[Double] = {
       if (!isCompressed) compress()
       if (summaries.count == 0 || percentages.length == 0) {
-        Array.empty[Double]
+        Array.emptyDoubleArray
       } else {
         val result = new Array[Double](percentages.length)
         var i = 0
@@ -321,4 +325,5 @@ object ApproximatePercentile {
   }
 
   val serializer: PercentileDigestSerializer = new PercentileDigestSerializer
+
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
index 66ac73087b4d9..996c548e1329c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.{DecimalPrecision, TypeCheckResult}
+import org.apache.spark.sql.catalyst.analysis.{DecimalPrecision, FunctionRegistry}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 
 @ExpressionDescription(
@@ -31,18 +30,17 @@ import org.apache.spark.sql.types._
        2.0
       > SELECT _FUNC_(col) FROM VALUES (1), (2), (NULL) AS tab(col);
        1.5
+      > SELECT _FUNC_(cast(v as interval)) FROM VALUES ('-1 weeks'), ('2 seconds'), (null) t(v);
+       -3 days -11 hours -59 minutes -59 seconds
   """,
   since = "1.0.0")
 case class Average(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes {
 
-  override def prettyName: String = "avg"
+  override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("avg")
 
   override def children: Seq[Expression] = child :: Nil
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
-
-  override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForNumericExpr(child.dataType, "function average")
+  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
 
   override def nullable: Boolean = true
 
@@ -52,11 +50,13 @@ case class Average(child: Expression) extends DeclarativeAggregate with Implicit
   private lazy val resultType = child.dataType match {
     case DecimalType.Fixed(p, s) =>
       DecimalType.bounded(p + 4, s + 4)
+    case interval: CalendarIntervalType => interval
     case _ => DoubleType
   }
 
   private lazy val sumDataType = child.dataType match {
     case _ @ DecimalType.Fixed(p, s) => DecimalType.bounded(p + 10, s)
+    case interval: CalendarIntervalType => interval
     case _ => DoubleType
   }
 
@@ -66,7 +66,7 @@ case class Average(child: Expression) extends DeclarativeAggregate with Implicit
   override lazy val aggBufferAttributes = sum :: count :: Nil
 
   override lazy val initialValues = Seq(
-    /* sum = */ Literal(0).cast(sumDataType),
+    /* sum = */ Literal.default(sumDataType),
     /* count = */ Literal(0L)
   )
 
@@ -79,6 +79,9 @@ case class Average(child: Expression) extends DeclarativeAggregate with Implicit
   override lazy val evaluateExpression = child.dataType match {
     case _: DecimalType =>
       DecimalPrecision.decimalAndDecimal(sum / count.cast(DecimalType.LongDecimal)).cast(resultType)
+    case CalendarIntervalType =>
+      val newCount = If(EqualTo(count, Literal(0L)), Literal(null, LongType), count)
+      DivideInterval(sum.cast(resultType), newCount.cast(DoubleType))
     case _ =>
       sum.cast(resultType) / count.cast(resultType)
   }
@@ -87,7 +90,7 @@ case class Average(child: Expression) extends DeclarativeAggregate with Implicit
     /* sum = */
     Add(
       sum,
-      coalesce(child.cast(sumDataType), Literal(0).cast(sumDataType))),
+      coalesce(child.cast(sumDataType), Literal.default(sumDataType))),
     /* count = */ If(child.isNull, count, count + 1L)
   )
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 1870c58c548c9..bf402807d62d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types._
@@ -174,7 +175,8 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
       If(n === 1.0, Double.NaN, sqrt(m2 / (n - 1.0))))
   }
 
-  override def prettyName: String = "stddev_samp"
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("stddev_samp")
 }
 
 // Compute the population variance of a column
@@ -215,7 +217,7 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
       If(n === 1.0, Double.NaN, m2 / (n - 1.0)))
   }
 
-  override def prettyName: String = "var_samp"
+  override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("var_samp")
 }
 
 @ExpressionDescription(
@@ -223,7 +225,7 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
   examples = """
     Examples:
       > SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
-       1.1135657469022013
+       1.1135657469022011
       > SELECT _FUNC_(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col);
        -1.1135657469022011
   """,
@@ -245,9 +247,9 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
   examples = """
     Examples:
       > SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
-       -0.7014368047529618
+       -0.7014368047529627
       > SELECT _FUNC_(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col);
-       0.19432323191698986
+       0.19432323191699075
   """,
   since = "1.6.0")
 case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
index 9f351395846e4..210acf33fc431 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
@@ -43,6 +43,10 @@ import org.apache.spark.sql.types._
       > SELECT _FUNC_(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
        5
   """,
+  note = """
+    The function is non-deterministic because its results depends on the order of the rows
+    which may be non-deterministic after a shuffle.
+  """,
   since = "2.0.0")
 case class First(child: Expression, ignoreNullsExpr: Expression)
   extends DeclarativeAggregate with ExpectsInputTypes {
@@ -113,5 +117,7 @@ case class First(child: Expression, ignoreNullsExpr: Expression)
 
   override lazy val evaluateExpression: AttributeReference = first
 
-  override def toString: String = s"first($child)${if (ignoreNulls) " ignore nulls"}"
+  override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("first")
+
+  override def toString: String = s"$prettyName($child)${if (ignoreNulls) " ignore nulls"}"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
index 405719faaeb58..2c89a4b973a7e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
@@ -43,6 +43,10 @@ import org.apache.spark.sql.types._
       > SELECT _FUNC_(col, true) FROM VALUES (10), (5), (NULL) AS tab(col);
        5
   """,
+  note = """
+    The function is non-deterministic because its results depends on the order of the rows
+    which may be non-deterministic after a shuffle.
+  """,
   since = "2.0.0")
 case class Last(child: Expression, ignoreNullsExpr: Expression)
   extends DeclarativeAggregate with ExpectsInputTypes {
@@ -111,5 +115,7 @@ case class Last(child: Expression, ignoreNullsExpr: Expression)
 
   override lazy val evaluateExpression: AttributeReference = last
 
-  override def toString: String = s"last($child)${if (ignoreNulls) " ignore nulls"}"
+  override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("last")
+
+  override def toString: String = s"$prettyName($child)${if (ignoreNulls) " ignore nulls"}"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
index c7fdb15130c4f..b69b341b0ee3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
@@ -98,7 +98,7 @@ abstract class MaxMinBy extends DeclarativeAggregate {
       > SELECT _FUNC_(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y);
        b
   """,
-  since = "3.0")
+  since = "3.0.0")
 case class MaxBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMinBy {
   override protected def funcName: String = "max_by"
 
@@ -116,7 +116,7 @@ case class MaxBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMin
       > SELECT _FUNC_(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y);
        a
   """,
-  since = "3.0")
+  since = "3.0.0")
 case class MinBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMinBy {
   override protected def funcName: String = "min_by"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
index a91a6d7d166ea..0f1c0fb5fcb69 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
@@ -21,7 +21,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, Da
 import java.util
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions._
@@ -93,12 +93,13 @@ case class Percentile(
 
   @transient
   private lazy val percentages = percentageExpression.eval() match {
-      case num: Double => Seq(num)
-      case arrayData: ArrayData => arrayData.toDoubleArray().toSeq
+    case null => null
+    case num: Double => Array(num)
+    case arrayData: ArrayData => arrayData.toDoubleArray()
   }
 
   override def children: Seq[Expression] = {
-    child  :: percentageExpression ::frequencyExpression :: Nil
+    child :: percentageExpression :: frequencyExpression :: Nil
   }
 
   // Returns null for empty inputs
@@ -111,7 +112,7 @@ case class Percentile(
 
   override def inputTypes: Seq[AbstractDataType] = {
     val percentageExpType = percentageExpression.dataType match {
-      case _: ArrayType => ArrayType(DoubleType)
+      case _: ArrayType => ArrayType(DoubleType, false)
       case _ => DoubleType
     }
     Seq(NumericType, percentageExpType, IntegralType)
@@ -129,6 +130,8 @@ case class Percentile(
       // percentageExpression must be foldable
       TypeCheckFailure("The percentage(s) must be a constant literal, " +
         s"but got $percentageExpression")
+    } else if (percentages == null) {
+      TypeCheckFailure("Percentage value must not be null")
     } else if (percentages.exists(percentage => percentage < 0.0 || percentage > 1.0)) {
       // percentages(s) must be in the range [0.0, 1.0]
       TypeCheckFailure("Percentage(s) must be between 0.0 and 1.0, " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
index 33bc5b5821b36..17471535873fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
@@ -83,7 +83,7 @@ case class PivotFirst(
 
   override val dataType: DataType = ArrayType(valueDataType)
 
-  val pivotIndex = if (pivotColumn.dataType.isInstanceOf[AtomicType]) {
+  val pivotIndex: Map[Any, Int] = if (pivotColumn.dataType.isInstanceOf[AtomicType]) {
     HashMap(pivotColumnValues.zipWithIndex: _*)
   } else {
     TreeMap(pivotColumnValues.zipWithIndex: _*)(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
index d04fe9249d064..87f1a4f02e4fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
@@ -17,13 +17,12 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.",
   examples = """
@@ -34,8 +33,11 @@ import org.apache.spark.sql.types._
        25
       > SELECT _FUNC_(col) FROM VALUES (NULL), (NULL) AS tab(col);
        NULL
+      > SELECT _FUNC_(cast(col as interval)) FROM VALUES ('1 seconds'), ('2 seconds'), (null) tab(col);
+       3 seconds
   """,
   since = "1.0.0")
+// scalastyle:on line.size.limit
 case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes {
 
   override def children: Seq[Expression] = child :: Nil
@@ -45,14 +47,12 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast
   // Return data type.
   override def dataType: DataType = resultType
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
-
-  override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForNumericExpr(child.dataType, "function sum")
+  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
 
   private lazy val resultType = child.dataType match {
     case DecimalType.Fixed(precision, scale) =>
       DecimalType.bounded(precision + 10, scale)
+    case _: CalendarIntervalType => CalendarIntervalType
     case _: IntegralType => LongType
     case _ => DoubleType
   }
@@ -61,7 +61,7 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast
 
   private lazy val sum = AttributeReference("sum", sumDataType)()
 
-  private lazy val zero = Cast(Literal(0), sumDataType)
+  private lazy val zero = Literal.default(resultType)
 
   override lazy val aggBufferAttributes = sum :: Nil
 
@@ -91,7 +91,7 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast
   }
 
   override lazy val evaluateExpression: Expression = resultType match {
-    case d: DecimalType => CheckOverflow(sum, d, SQLConf.get.decimalOperationsNullOnOverflow)
+    case d: DecimalType => CheckOverflow(sum, d, !SQLConf.get.ansiEnabled)
     case _ => sum
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala
index 4562fbcff5f3d..a1cd4a77d0445 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/UnevaluableAggs.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types._
 
@@ -52,8 +52,8 @@ abstract class UnevaluableBooleanAggBase(arg: Expression)
        false
   """,
   since = "3.0.0")
-case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
-  override def nodeName: String = "Every"
+case class BoolAnd(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
+  override def nodeName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("bool_and")
 }
 
 @ExpressionDescription(
@@ -68,22 +68,6 @@ case class EveryAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
        false
   """,
   since = "3.0.0")
-case class AnyAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
-  override def nodeName: String = "Any"
-}
-
-@ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns true if at least one value of `expr` is true.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_(col) FROM VALUES (true), (false), (false) AS tab(col);
-       true
-      > SELECT _FUNC_(col) FROM VALUES (NULL), (true), (false) AS tab(col);
-       true
-      > SELECT _FUNC_(col) FROM VALUES (false), (false), (NULL) AS tab(col);
-       false
-  """,
-  since = "3.0.0")
-case class SomeAgg(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
-  override def nodeName: String = "Some"
+case class BoolOr(arg: Expression) extends UnevaluableBooleanAggBase(arg) {
+  override def nodeName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("bool_or")
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala
new file mode 100644
index 0000000000000..b77c3bd9cbde4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryArithmetic, BitwiseAnd, BitwiseOr, BitwiseXor, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal}
+import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegralType}
+
+abstract class BitAggregate extends DeclarativeAggregate with ExpectsInputTypes {
+
+  val child: Expression
+
+  def bitOperator(left: Expression, right: Expression): BinaryArithmetic
+
+  override def children: Seq[Expression] = child :: Nil
+
+  override def nullable: Boolean = true
+
+  override def dataType: DataType = child.dataType
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType)
+
+  private lazy val bitAgg = AttributeReference(nodeName, child.dataType)()
+
+  override lazy val initialValues: Seq[Literal] = Literal.create(null, dataType) :: Nil
+
+  override lazy val aggBufferAttributes: Seq[AttributeReference] = bitAgg :: Nil
+
+  override lazy val evaluateExpression: AttributeReference = bitAgg
+
+  override lazy val updateExpressions: Seq[Expression] =
+    If(IsNull(bitAgg),
+      child,
+      If(IsNull(child), bitAgg, bitOperator(bitAgg, child))) :: Nil
+
+  override lazy val mergeExpressions: Seq[Expression] =
+    If(IsNull(bitAgg.left),
+      bitAgg.right,
+      If(IsNull(bitAgg.right), bitAgg.left, bitOperator(bitAgg.left, bitAgg.right))) :: Nil
+}
+
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns the bitwise AND of all non-null input values, or null if none.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col);
+       1
+  """,
+  since = "3.0.0")
+case class BitAndAgg(child: Expression) extends BitAggregate {
+
+  override def nodeName: String = "bit_and"
+
+  override def bitOperator(left: Expression, right: Expression): BinaryArithmetic = {
+    BitwiseAnd(left, right)
+  }
+}
+
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns the bitwise OR of all non-null input values, or null if none.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col);
+       7
+  """,
+  since = "3.0.0")
+case class BitOrAgg(child: Expression) extends BitAggregate {
+
+  override def nodeName: String = "bit_or"
+
+  override def bitOperator(left: Expression, right: Expression): BinaryArithmetic = {
+    BitwiseOr(left, right)
+  }
+}
+
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns the bitwise XOR of all non-null input values, or null if none.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col);
+       6
+  """,
+  since = "3.0.0")
+case class BitXorAgg(child: Expression) extends BitAggregate {
+
+  override def nodeName: String = "bit_xor"
+
+  override def bitOperator(left: Expression, right: Expression): BinaryArithmetic = {
+    BitwiseXor(left, right)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index 7cc43dfdfac33..29f89989b4961 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -40,7 +40,7 @@ abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImper
 
   override def nullable: Boolean = true
 
-  override def dataType: DataType = ArrayType(child.dataType)
+  override def dataType: DataType = ArrayType(child.dataType, false)
 
   // Both `CollectList` and `CollectSet` are non-deterministic since their results depend on the
   // actual order of input rows.
@@ -92,6 +92,10 @@ abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImper
       > SELECT _FUNC_(col) FROM VALUES (1), (2), (1) AS tab(col);
        [1,2,1]
   """,
+  note = """
+    The function is non-deterministic because the order of collected results depends
+    on the order of the rows which may be non-deterministic after a shuffle.
+  """,
   since = "2.0.0")
 case class CollectList(
     child: Expression,
@@ -121,6 +125,10 @@ case class CollectList(
       > SELECT _FUNC_(col) FROM VALUES (1), (2), (1) AS tab(col);
        [1,2]
   """,
+  note = """
+    The function is non-deterministic because the order of collected results depends
+    on the order of the rows which may be non-deterministic after a shuffle.
+  """,
   since = "2.0.0")
 case class CollectSet(
     child: Expression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
index d863f83686020..222ad6fab19e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
@@ -71,27 +71,32 @@ object AggregateExpression {
   def apply(
       aggregateFunction: AggregateFunction,
       mode: AggregateMode,
-      isDistinct: Boolean): AggregateExpression = {
+      isDistinct: Boolean,
+      filter: Option[Expression] = None): AggregateExpression = {
     AggregateExpression(
       aggregateFunction,
       mode,
       isDistinct,
+      filter,
       NamedExpression.newExprId)
   }
 }
 
 /**
  * A container for an [[AggregateFunction]] with its [[AggregateMode]] and a field
- * (`isDistinct`) indicating if DISTINCT keyword is specified for this function.
+ * (`isDistinct`) indicating if DISTINCT keyword is specified for this function and
+ * a field (`filter`) indicating if filter clause is specified for this function.
  */
 case class AggregateExpression(
     aggregateFunction: AggregateFunction,
     mode: AggregateMode,
     isDistinct: Boolean,
+    filter: Option[Expression],
     resultId: ExprId)
   extends Expression
   with Unevaluable {
 
+  @transient
   lazy val resultAttribute: Attribute = if (aggregateFunction.resolved) {
     AttributeReference(
       aggregateFunction.toString,
@@ -104,6 +109,8 @@ case class AggregateExpression(
     UnresolvedAttribute(aggregateFunction.toString)
   }
 
+  def filterAttributes: AttributeSet = filter.map(_.references).getOrElse(AttributeSet.empty)
+
   // We compute the same thing regardless of our final result.
   override lazy val canonicalized: Expression = {
     val normalizedAggFunc = mode match {
@@ -119,20 +126,23 @@ case class AggregateExpression(
       normalizedAggFunc.canonicalized.asInstanceOf[AggregateFunction],
       mode,
       isDistinct,
+      filter.map(_.canonicalized),
       ExprId(0))
   }
 
-  override def children: Seq[Expression] = aggregateFunction :: Nil
+  override def children: Seq[Expression] = aggregateFunction +: filter.toSeq
+
   override def dataType: DataType = aggregateFunction.dataType
   override def foldable: Boolean = false
   override def nullable: Boolean = aggregateFunction.nullable
 
   @transient
   override lazy val references: AttributeSet = {
-    mode match {
+    val aggAttributes = mode match {
       case Partial | Complete => aggregateFunction.references
       case PartialMerge | Final => AttributeSet(aggregateFunction.aggBufferAttributes)
     }
+    aggAttributes ++ filterAttributes
   }
 
   override def toString: String = {
@@ -141,10 +151,20 @@ case class AggregateExpression(
       case PartialMerge => "merge_"
       case Final | Complete => ""
     }
-    prefix + aggregateFunction.toAggString(isDistinct)
+    val aggFuncStr = prefix + aggregateFunction.toAggString(isDistinct)
+    filter match {
+      case Some(predicate) => s"$aggFuncStr FILTER (WHERE $predicate)"
+      case _ => aggFuncStr
+    }
   }
 
-  override def sql: String = aggregateFunction.sql(isDistinct)
+  override def sql: String = {
+    val aggFuncStr = aggregateFunction.sql(isDistinct)
+    filter match {
+      case Some(predicate) => s"$aggFuncStr FILTER (WHERE ${predicate.sql})"
+      case _ => aggFuncStr
+    }
+  }
 }
 
 /**
@@ -546,7 +566,7 @@ abstract class TypedImperativeAggregate[T] extends ImperativeAggregate {
 
   private[this] val anyObjectType = ObjectType(classOf[AnyRef])
   private def getBufferObject(bufferRow: InternalRow): T = {
-    bufferRow.get(mutableAggBufferOffset, anyObjectType).asInstanceOf[T]
+    getBufferObject(bufferRow, mutableAggBufferOffset)
   }
 
   final override lazy val aggBufferAttributes: Seq[AttributeReference] = {
@@ -570,4 +590,21 @@ abstract class TypedImperativeAggregate[T] extends ImperativeAggregate {
   final def serializeAggregateBufferInPlace(buffer: InternalRow): Unit = {
     buffer(mutableAggBufferOffset) = serialize(getBufferObject(buffer))
   }
+
+  /**
+   * Merge an input buffer into the aggregation buffer, where both buffers contain the deserialized
+   * java object. This function is used by aggregating accumulators.
+   *
+   * @param buffer the aggregation buffer that is updated.
+   * @param inputBuffer the buffer that is merged into the aggregation buffer.
+   */
+  final def mergeBuffersObjects(buffer: InternalRow, inputBuffer: InternalRow): Unit = {
+    val bufferObject = getBufferObject(buffer)
+    val inputObject = getBufferObject(inputBuffer, inputAggBufferOffset)
+    buffer(mutableAggBufferOffset) = merge(bufferObject, inputObject)
+  }
+
+  private def getBufferObject(buffer: InternalRow, offset: Int): T = {
+    buffer.get(offset, anyObjectType).asInstanceOf[T]
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index a13929bb772f9..debd7c89adb9f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.catalyst.util.{IntervalUtils, TypeUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -35,7 +35,7 @@ import org.apache.spark.unsafe.types.CalendarInterval
   """)
 case class UnaryMinus(child: Expression) extends UnaryExpression
     with ExpectsInputTypes with NullIntolerant {
-  private val checkOverflow = SQLConf.get.failOnIntegralTypeOverflow
+  private val checkOverflow = SQLConf.get.ansiEnabled
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
 
@@ -73,15 +73,17 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
         ${CodeGenerator.javaType(dt)} $originValue = (${CodeGenerator.javaType(dt)})($eval);
         ${ev.value} = (${CodeGenerator.javaType(dt)})(-($originValue));
       """})
-    case _: CalendarIntervalType => defineCodeGen(ctx, ev, c => s"$c.negate()")
+    case _: CalendarIntervalType =>
+      val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
+      val method = if (checkOverflow) "negateExact" else "negate"
+      defineCodeGen(ctx, ev, c => s"$iu.$method($c)")
   }
 
-  protected override def nullSafeEval(input: Any): Any = {
-    if (dataType.isInstanceOf[CalendarIntervalType]) {
-      input.asInstanceOf[CalendarInterval].negate()
-    } else {
-      numeric.negate(input)
-    }
+  protected override def nullSafeEval(input: Any): Any = dataType match {
+    case CalendarIntervalType if checkOverflow =>
+      IntervalUtils.negateExact(input.asInstanceOf[CalendarInterval])
+    case CalendarIntervalType => IntervalUtils.negate(input.asInstanceOf[CalendarInterval])
+    case _ => numeric.negate(input)
   }
 
   override def sql: String = s"(- ${child.sql})"
@@ -136,7 +138,7 @@ case class Abs(child: Expression)
 
 abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
 
-  protected val checkOverflow = SQLConf.get.failOnIntegralTypeOverflow
+  protected val checkOverflow = SQLConf.get.ansiEnabled
 
   override def dataType: DataType = left.dataType
 
@@ -150,16 +152,18 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
   def calendarIntervalMethod: String =
     sys.error("BinaryArithmetics must override either calendarIntervalMethod or genCode")
 
-  /** Name of the function for the exact version of this expression in [[Math]]. */
-  def exactMathMethod: String =
-    sys.error("BinaryArithmetics must override either exactMathMethod or genCode")
+  // Name of the function for the exact version of this expression in [[Math]].
+  // If the option "spark.sql.ansi.enabled" is enabled and there is corresponding
+  // function in [[Math]], the exact function will be called instead of evaluation with [[symbol]].
+  def exactMathMethod: Option[String] = None
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match {
     case _: DecimalType =>
       // Overflow is handled in the CheckOverflow operator
       defineCodeGen(ctx, ev, (eval1, eval2) => s"$eval1.$decimalMethod($eval2)")
     case CalendarIntervalType =>
-      defineCodeGen(ctx, ev, (eval1, eval2) => s"$eval1.$calendarIntervalMethod($eval2)")
+      val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
+      defineCodeGen(ctx, ev, (eval1, eval2) => s"$iu.$calendarIntervalMethod($eval1, $eval2)")
     // byte and short are casted into int when add, minus, times or divide
     case ByteType | ShortType =>
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
@@ -182,9 +186,9 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
       })
     case IntegerType | LongType =>
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-        val operation = if (checkOverflow) {
+        val operation = if (checkOverflow && exactMathMethod.isDefined) {
           val mathClass = classOf[Math].getName
-          s"$mathClass.$exactMathMethod($eval1, $eval2)"
+          s"$mathClass.${exactMathMethod.get}($eval1, $eval2)"
         } else {
           s"$eval1 $symbol $eval2"
         }
@@ -223,19 +227,21 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def decimalMethod: String = "$plus"
 
-  override def calendarIntervalMethod: String = "add"
+  override def calendarIntervalMethod: String = if (checkOverflow) "addExact" else "add"
 
   private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
 
-  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
-    if (dataType.isInstanceOf[CalendarIntervalType]) {
-      input1.asInstanceOf[CalendarInterval].add(input2.asInstanceOf[CalendarInterval])
-    } else {
-      numeric.plus(input1, input2)
-    }
+  protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match {
+    case CalendarIntervalType if checkOverflow =>
+      IntervalUtils.addExact(
+        input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
+    case CalendarIntervalType =>
+      IntervalUtils.add(
+        input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
+    case _ => numeric.plus(input1, input2)
   }
 
-  override def exactMathMethod: String = "addExact"
+  override def exactMathMethod: Option[String] = Some("addExact")
 }
 
 @ExpressionDescription(
@@ -253,19 +259,21 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti
 
   override def decimalMethod: String = "$minus"
 
-  override def calendarIntervalMethod: String = "subtract"
+  override def calendarIntervalMethod: String = if (checkOverflow) "subtractExact" else "subtract"
 
   private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
 
-  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
-    if (dataType.isInstanceOf[CalendarIntervalType]) {
-      input1.asInstanceOf[CalendarInterval].subtract(input2.asInstanceOf[CalendarInterval])
-    } else {
-      numeric.minus(input1, input2)
-    }
+  protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match {
+    case CalendarIntervalType if checkOverflow =>
+      IntervalUtils.subtractExact(
+        input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
+    case CalendarIntervalType =>
+      IntervalUtils.subtract(
+        input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
+    case _ => numeric.minus(input1, input2)
   }
 
-  override def exactMathMethod: String = "subtractExact"
+  override def exactMathMethod: Option[String] = Some("subtractExact")
 }
 
 @ExpressionDescription(
@@ -286,7 +294,7 @@ case class Multiply(left: Expression, right: Expression) extends BinaryArithmeti
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2)
 
-  override def exactMathMethod: String = "multiplyExact"
+  override def exactMathMethod: Option[String] = Some("multiplyExact")
 }
 
 // Common base trait for Divide and Remainder, since these two classes are almost identical
@@ -447,7 +455,7 @@ case class IntegralDivide(left: Expression, right: Expression) extends DivModLik
   usage = "expr1 _FUNC_ expr2 - Returns the remainder after `expr1`/`expr2`.",
   examples = """
     Examples:
-      > SELECT 2 _FUNC_ 1.8;
+      > SELECT 2 % 1.8;
        0.2
       > SELECT MOD(2, 1.8);
        0.2
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index c766bd8e56bb6..56f170cd77073 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -154,3 +154,35 @@ case class BitwiseNot(child: Expression) extends UnaryExpression with ExpectsInp
 
   override def sql: String = s"~${child.sql}"
 }
+
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns the number of bits that are set in the argument expr as an" +
+    " unsigned 64-bit integer, or NULL if the argument is NULL.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(0);
+       0
+  """)
+case class BitwiseCount(child: Expression) extends UnaryExpression with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(IntegralType, BooleanType))
+
+  override def dataType: DataType = IntegerType
+
+  override def toString: String = s"bit_count($child)"
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = child.dataType match {
+    case BooleanType => defineCodeGen(ctx, ev, c => s"if ($c) 1 else 0")
+    case _ => defineCodeGen(ctx, ev, c => s"java.lang.Long.bitCount($c)")
+  }
+
+  protected override def nullSafeEval(input: Any): Any = child.dataType match {
+    case BooleanType => if (input.asInstanceOf[Boolean]) 1 else 0
+    case ByteType => java.lang.Long.bitCount(input.asInstanceOf[Byte])
+    case ShortType => java.lang.Long.bitCount(input.asInstanceOf[Short])
+    case IntegerType => java.lang.Long.bitCount(input.asInstanceOf[Int])
+    case LongType => java.lang.Long.bitCount(input.asInstanceOf[Long])
+  }
+
+  override def sql: String = s"bit_count(${child.sql})"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 95fad412002e2..58c95c94ba198 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -39,6 +39,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -112,7 +113,7 @@ private[codegen] case class NewFunctionSpec(
  * A context for codegen, tracking a list of objects that could be passed into generated Java
  * function.
  */
-class CodegenContext {
+class CodegenContext extends Logging {
 
   import CodeGenerator._
 
@@ -403,13 +404,14 @@ class CodegenContext {
    *  equivalentExpressions will match the tree containing `col1 + col2` and it will only
    *  be evaluated once.
    */
-  val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
+  private val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
 
   // Foreach expression that is participating in subexpression elimination, the state to use.
-  var subExprEliminationExprs = Map.empty[Expression, SubExprEliminationState]
+  // Visible for testing.
+  private[expressions] var subExprEliminationExprs = Map.empty[Expression, SubExprEliminationState]
 
   // The collection of sub-expression result resetting methods that need to be called on each row.
-  val subexprFunctions = mutable.ArrayBuffer.empty[String]
+  private val subexprFunctions = mutable.ArrayBuffer.empty[String]
 
   val outerClassName = "OuterClass"
 
@@ -622,8 +624,8 @@ class CodegenContext {
   def genComp(dataType: DataType, c1: String, c2: String): String = dataType match {
     // java boolean doesn't support > or < operator
     case BooleanType => s"($c1 == $c2 ? 0 : ($c1 ? 1 : -1))"
-    case DoubleType => s"org.apache.spark.util.Utils.nanSafeCompareDoubles($c1, $c2)"
-    case FloatType => s"org.apache.spark.util.Utils.nanSafeCompareFloats($c1, $c2)"
+    case DoubleType => s"java.lang.Double.compare($c1, $c2)"
+    case FloatType => s"java.lang.Float.compare($c1, $c2)"
     // use c1 - c2 may overflow
     case dt: DataType if isPrimitiveType(dt) => s"($c1 > $c2 ? 1 : $c1 < $c2 ? -1 : 0)"
     case BinaryType => s"org.apache.spark.sql.catalyst.util.TypeUtils.compareBinary($c1, $c2)"
@@ -993,6 +995,15 @@ class CodegenContext {
     }
   }
 
+  /**
+   * Returns the code for subexpression elimination after splitting it if necessary.
+   */
+  def subexprFunctionsCode: String = {
+    // Whole-stage codegen's subexpression elimination is handled in another code path
+    assert(currentVars == null || subexprFunctions.isEmpty)
+    splitExpressions(subexprFunctions, "subexprFunc_split", Seq("InternalRow" -> INPUT_ROW))
+  }
+
   /**
    * Perform a function which generates a sequence of ExprCodes with a given mapping between
    * expressions and common expressions, instead of using the mapping in current context.
@@ -1028,13 +1039,70 @@ class CodegenContext {
     // Get all the expressions that appear at least twice and set up the state for subexpression
     // elimination.
     val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
-    val codes = commonExprs.map { e =>
-      val expr = e.head
-      // Generate the code for this expression tree.
-      val eval = expr.genCode(this)
-      val state = SubExprEliminationState(eval.isNull, eval.value)
-      e.foreach(localSubExprEliminationExprs.put(_, state))
-      eval.code.toString
+    val commonExprVals = commonExprs.map(_.head.genCode(this))
+
+    lazy val nonSplitExprCode = {
+      commonExprs.zip(commonExprVals).map { case (exprs, eval) =>
+        // Generate the code for this expression tree.
+        val state = SubExprEliminationState(eval.isNull, eval.value)
+        exprs.foreach(localSubExprEliminationExprs.put(_, state))
+        eval.code.toString
+      }
+    }
+
+    val codes = if (commonExprVals.map(_.code.length).sum > SQLConf.get.methodSplitThreshold) {
+      val inputVarsForAllFuncs = commonExprs.map { expr =>
+        getLocalInputVariableValues(this, expr.head).toSeq
+      }
+      if (inputVarsForAllFuncs.map(calculateParamLengthFromExprValues).forall(isValidParamLength)) {
+        commonExprs.zipWithIndex.map { case (exprs, i) =>
+          val expr = exprs.head
+          val eval = commonExprVals(i)
+
+          val isNullLiteral = eval.isNull match {
+            case TrueLiteral | FalseLiteral => true
+            case _ => false
+          }
+          val (isNull, isNullEvalCode) = if (!isNullLiteral) {
+            val v = addMutableState(JAVA_BOOLEAN, "subExprIsNull")
+            (JavaCode.isNullGlobal(v), s"$v = ${eval.isNull};")
+          } else {
+            (eval.isNull, "")
+          }
+
+          // Generate the code for this expression tree and wrap it in a function.
+          val fnName = freshName("subExpr")
+          val inputVars = inputVarsForAllFuncs(i)
+          val argList = inputVars.map(v => s"${v.javaType.getName} ${v.variableName}")
+          val returnType = javaType(expr.dataType)
+          val fn =
+            s"""
+               |private $returnType $fnName(${argList.mkString(", ")}) {
+               |  ${eval.code}
+               |  $isNullEvalCode
+               |  return ${eval.value};
+               |}
+               """.stripMargin
+
+          val value = freshName("subExprValue")
+          val state = SubExprEliminationState(isNull, JavaCode.variable(value, expr.dataType))
+          exprs.foreach(localSubExprEliminationExprs.put(_, state))
+          val inputVariables = inputVars.map(_.variableName).mkString(", ")
+          s"$returnType $value = ${addNewFunction(fnName, fn)}($inputVariables);"
+        }
+      } else {
+        val errMsg = "Failed to split subexpression code into small functions because the " +
+          "parameter length of at least one split function went over the JVM limit: " +
+          MAX_JVM_METHOD_PARAMS_LENGTH
+        if (Utils.isTesting) {
+          throw new IllegalStateException(errMsg)
+        } else {
+          logInfo(errMsg)
+          nonSplitExprCode
+        }
+      }
+    } else {
+      nonSplitExprCode
     }
     SubExprCodes(codes, localSubExprEliminationExprs.toMap)
   }
@@ -1201,6 +1269,15 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
   }
 }
 
+/**
+ * Java bytecode statistics of a compiled class by Janino.
+ */
+case class ByteCodeStats(maxMethodCodeSize: Int, maxConstPoolSize: Int, numInnerClasses: Int)
+
+object ByteCodeStats {
+  val UNAVAILABLE = ByteCodeStats(-1, -1, -1)
+}
+
 object CodeGenerator extends Logging {
 
   // This is the default value of HugeMethodLimit in the OpenJDK HotSpot JVM,
@@ -1210,6 +1287,9 @@ object CodeGenerator extends Logging {
   // The max valid length of method parameters in JVM.
   final val MAX_JVM_METHOD_PARAMS_LENGTH = 255
 
+  // The max number of constant pool entries in JVM.
+  final val MAX_JVM_CONSTANT_POOL_SIZE = 65535
+
   // This is the threshold over which the methods in an inner class are grouped in a single
   // method which is going to be called by the outer class instead of the many small ones
   final val MERGE_SPLIT_METHODS_THRESHOLD = 3
@@ -1232,9 +1312,9 @@ object CodeGenerator extends Logging {
   /**
    * Compile the Java source code into a Java class, using Janino.
    *
-   * @return a pair of a generated class and the max bytecode size of generated functions.
+   * @return a pair of a generated class and the bytecode statistics of generated functions.
    */
-  def compile(code: CodeAndComment): (GeneratedClass, Int) = try {
+  def compile(code: CodeAndComment): (GeneratedClass, ByteCodeStats) = try {
     cache.get(code)
   } catch {
     // Cache.get() may wrap the original exception. See the following URL
@@ -1247,7 +1327,7 @@ object CodeGenerator extends Logging {
   /**
    * Compile the Java source code into a Java class, using Janino.
    */
-  private[this] def doCompile(code: CodeAndComment): (GeneratedClass, Int) = {
+  private[this] def doCompile(code: CodeAndComment): (GeneratedClass, ByteCodeStats) = {
     val evaluator = new ClassBodyEvaluator()
 
     // A special classloader used to wrap the actual parent classloader of
@@ -1286,32 +1366,40 @@ object CodeGenerator extends Logging {
       s"\n${CodeFormatter.format(code)}"
     })
 
-    val maxCodeSize = try {
+    val codeStats = try {
       evaluator.cook("generated.java", code.body)
       updateAndGetCompilationStats(evaluator)
     } catch {
       case e: InternalCompilerException =>
         val msg = s"failed to compile: $e"
         logError(msg, e)
-        val maxLines = SQLConf.get.loggingMaxLinesForCodegen
-        logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
+        logGeneratedCode(code)
         throw new InternalCompilerException(msg, e)
       case e: CompileException =>
         val msg = s"failed to compile: $e"
         logError(msg, e)
-        val maxLines = SQLConf.get.loggingMaxLinesForCodegen
-        logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
+        logGeneratedCode(code)
         throw new CompileException(msg, e.getLocation)
     }
 
-    (evaluator.getClazz().getConstructor().newInstance().asInstanceOf[GeneratedClass], maxCodeSize)
+    (evaluator.getClazz().getConstructor().newInstance().asInstanceOf[GeneratedClass], codeStats)
+  }
+
+  private def logGeneratedCode(code: CodeAndComment): Unit = {
+    val maxLines = SQLConf.get.loggingMaxLinesForCodegen
+    if (Utils.isTesting) {
+      logError(s"\n${CodeFormatter.format(code, maxLines)}")
+    } else {
+      logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
+    }
   }
 
   /**
-   * Returns the max bytecode size of the generated functions by inspecting janino private fields.
+   * Returns the bytecode statistics (max method bytecode size, max constant pool size, and
+   * # of inner classes) of generated classes by inspecting Janino classes.
    * Also, this method updates the metrics information.
    */
-  private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator): Int = {
+  private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator): ByteCodeStats = {
     // First retrieve the generated classes.
     val classes = {
       val resultField = classOf[SimpleCompiler].getDeclaredField("result")
@@ -1326,11 +1414,13 @@ object CodeGenerator extends Logging {
     val codeAttr = Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
     val codeAttrField = codeAttr.getDeclaredField("code")
     codeAttrField.setAccessible(true)
-    val codeSizes = classes.flatMap { case (_, classBytes) =>
-      CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classBytes.length)
+    val codeStats = classes.map { case (_, classBytes) =>
+      val classCodeSize = classBytes.length
+      CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classCodeSize)
       try {
         val cf = new ClassFile(new ByteArrayInputStream(classBytes))
-        val stats = cf.methodInfos.asScala.flatMap { method =>
+        val constPoolSize = cf.getConstantPoolSize
+        val methodCodeSizes = cf.methodInfos.asScala.flatMap { method =>
           method.getAttributes().filter(_.getClass eq codeAttr).map { a =>
             val byteCodeSize = codeAttrField.get(a).asInstanceOf[Array[Byte]].length
             CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(byteCodeSize)
@@ -1343,19 +1433,20 @@ object CodeGenerator extends Logging {
             byteCodeSize
           }
         }
-        Some(stats)
+        (methodCodeSizes.max, constPoolSize)
       } catch {
         case NonFatal(e) =>
           logWarning("Error calculating stats of compiled class.", e)
-          None
+          (-1, -1)
       }
-    }.flatten
-
-    if (codeSizes.nonEmpty) {
-      codeSizes.max
-    } else {
-      0
     }
+
+    val (maxMethodSizes, constPoolSize) = codeStats.unzip
+    ByteCodeStats(
+      maxMethodCodeSize = maxMethodSizes.max,
+      maxConstPoolSize = constPoolSize.max,
+      // Minus 2 for `GeneratedClass` and an outer-most generated class
+      numInnerClasses = classes.size - 2)
   }
 
   /**
@@ -1370,8 +1461,8 @@ object CodeGenerator extends Logging {
   private val cache = CacheBuilder.newBuilder()
     .maximumSize(SQLConf.get.codegenCacheMaxEntries)
     .build(
-      new CacheLoader[CodeAndComment, (GeneratedClass, Int)]() {
-        override def load(code: CodeAndComment): (GeneratedClass, Int) = {
+      new CacheLoader[CodeAndComment, (GeneratedClass, ByteCodeStats)]() {
+        override def load(code: CodeAndComment): (GeneratedClass, ByteCodeStats) = {
           val startTime = System.nanoTime()
           val result = doCompile(code)
           val endTime = System.nanoTime()
@@ -1487,6 +1578,7 @@ object CodeGenerator extends Logging {
     val jt = javaType(dataType)
     dataType match {
       case _ if isPrimitiveType(jt) => s"$row.set${primitiveTypeName(jt)}($ordinal, $value)"
+      case CalendarIntervalType => s"$row.setInterval($ordinal, $value)"
       case t: DecimalType => s"$row.setDecimal($ordinal, $value, ${t.precision})"
       case udt: UserDefinedType[_] => setColumn(row, udt.sqlType, ordinal, value)
       // The UTF8String, InternalRow, ArrayData and MapData may came from UnsafeRow, we should copy
@@ -1510,8 +1602,10 @@ object CodeGenerator extends Logging {
       nullable: Boolean,
       isVectorized: Boolean = false): String = {
     if (nullable) {
-      // Can't call setNullAt on DecimalType, because we need to keep the offset
-      if (!isVectorized && dataType.isInstanceOf[DecimalType]) {
+      // Can't call setNullAt on DecimalType/CalendarIntervalType, because we need to keep the
+      // offset
+      if (!isVectorized && (dataType.isInstanceOf[DecimalType] ||
+        dataType.isInstanceOf[CalendarIntervalType])) {
         s"""
            |if (!${ev.isNull}) {
            |  ${setColumn(row, dataType, ordinal, ev.value)};
@@ -1542,6 +1636,7 @@ object CodeGenerator extends Logging {
       case _ if isPrimitiveType(jt) =>
         s"$vector.put${primitiveTypeName(jt)}($rowId, $value);"
       case t: DecimalType => s"$vector.putDecimal($rowId, $value, ${t.precision});"
+      case CalendarIntervalType => s"$vector.putInterval($rowId, $value);"
       case t: StringType => s"$vector.putByteArray($rowId, $value.getBytes());"
       case _ =>
         throw new IllegalArgumentException(s"cannot generate code for unsupported type: $dataType")
@@ -1612,6 +1707,48 @@ object CodeGenerator extends Logging {
     }
   }
 
+  /**
+   * Extracts all the input variables from references and subexpression elimination states
+   * for a given `expr`. This result will be used to split the generated code of
+   * expressions into multiple functions.
+   */
+  def getLocalInputVariableValues(
+      ctx: CodegenContext,
+      expr: Expression,
+      subExprs: Map[Expression, SubExprEliminationState] = Map.empty): Set[VariableValue] = {
+    val argSet = mutable.Set[VariableValue]()
+    if (ctx.INPUT_ROW != null) {
+      argSet += JavaCode.variable(ctx.INPUT_ROW, classOf[InternalRow])
+    }
+
+    // Collects local variables from a given `expr` tree
+    val collectLocalVariable = (ev: ExprValue) => ev match {
+      case vv: VariableValue => argSet += vv
+      case _ =>
+    }
+
+    val stack = mutable.Stack[Expression](expr)
+    while (stack.nonEmpty) {
+      stack.pop() match {
+        case e if subExprs.contains(e) =>
+          val SubExprEliminationState(isNull, value) = subExprs(e)
+          collectLocalVariable(value)
+          collectLocalVariable(isNull)
+
+        case ref: BoundReference if ctx.currentVars != null &&
+            ctx.currentVars(ref.ordinal) != null =>
+          val ExprCode(_, isNull, value) = ctx.currentVars(ref.ordinal)
+          collectLocalVariable(value)
+          collectLocalVariable(isNull)
+
+        case e =>
+          stack.pushAll(e.children)
+      }
+    }
+
+    argSet.toSet
+  }
+
   /**
    * Returns the name used in accessor and setter for a Java primitive type.
    */
@@ -1682,6 +1819,14 @@ object CodeGenerator extends Logging {
 
   def boxedType(dt: DataType): String = boxedType(javaType(dt))
 
+  def typeName(clazz: Class[_]): String = {
+    if (clazz.isArray) {
+      typeName(clazz.getComponentType) + "[]"
+    } else {
+      clazz.getName
+    }
+  }
+
   /**
    * Returns the representation of default value for a given Java Type.
    * @param jt the string name of the Java type
@@ -1719,11 +1864,24 @@ object CodeGenerator extends Logging {
     1 + params.map(paramLengthForExpr).sum
   }
 
+  def calculateParamLengthFromExprValues(params: Seq[ExprValue]): Int = {
+    def paramLengthForExpr(input: ExprValue): Int = input.javaType match {
+      case java.lang.Long.TYPE | java.lang.Double.TYPE => 2
+      case _ => 1
+    }
+    // Initial value is 1 for `this`.
+    1 + params.map(paramLengthForExpr).sum
+  }
+
   /**
    * In Java, a method descriptor is valid only if it represents method parameters with a total
    * length less than a pre-defined constant.
    */
   def isValidParamLength(paramLength: Int): Boolean = {
-    paramLength <= MAX_JVM_METHOD_PARAMS_LENGTH
+    // This config is only for testing
+    SQLConf.get.getConfString("spark.sql.CodeGenerator.validParamLength", null) match {
+      case null | "" => paramLength <= MAX_JVM_METHOD_PARAMS_LENGTH
+      case validLength => paramLength <= validLength.toInt
+    }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 838bd1c679e4d..2e018de07101e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -92,7 +92,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
     }
 
     // Evaluate all the subexpressions.
-    val evalSubexpr = ctx.subexprFunctions.mkString("\n")
+    val evalSubexpr = ctx.subexprFunctionsCode
 
     val allProjections = ctx.splitExpressionsWithCurrentInputs(projectionCodes.map(_._1))
     val allUpdates = ctx.splitExpressionsWithCurrentInputs(projectionCodes.map(_._2))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index b66b80ad31dc2..63bd59e7628b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -29,19 +29,11 @@ import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
-/**
- * Inherits some default implementation for Java from `Ordering[Row]`
- */
-class BaseOrdering extends Ordering[InternalRow] {
-  def compare(a: InternalRow, b: InternalRow): Int = {
-    throw new UnsupportedOperationException
-  }
-}
 
 /**
  * Generates bytecode for an [[Ordering]] of rows for a given set of expressions.
  */
-object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalRow]] with Logging {
+object GenerateOrdering extends CodeGenerator[Seq[SortOrder], BaseOrdering] with Logging {
 
   protected def canonicalize(in: Seq[SortOrder]): Seq[SortOrder] =
     in.map(ExpressionCanonicalizer.execute(_).asInstanceOf[SortOrder])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index e0fabad6d089a..6ba646d360d2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -20,31 +20,17 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 
-/**
- * Interface for generated predicate
- */
-abstract class Predicate {
-  def eval(r: InternalRow): Boolean
-
-  /**
-   * Initializes internal states given the current partition index.
-   * This is used by nondeterministic expressions to set initial states.
-   * The default implementation does nothing.
-   */
-  def initialize(partitionIndex: Int): Unit = {}
-}
-
 /**
  * Generates bytecode that evaluates a boolean [[Expression]] on a given input [[InternalRow]].
  */
-object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
+object GeneratePredicate extends CodeGenerator[Expression, BasePredicate] {
 
   protected def canonicalize(in: Expression): Expression = ExpressionCanonicalizer.execute(in)
 
   protected def bind(in: Expression, inputSchema: Seq[Attribute]): Expression =
     BindReferences.bindReference(in, inputSchema)
 
-  protected def create(predicate: Expression): Predicate = {
+  protected def create(predicate: Expression): BasePredicate = {
     val ctx = newCodeGenContext()
     val eval = predicate.genCode(ctx)
 
@@ -53,7 +39,7 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
         return new SpecificPredicate(references);
       }
 
-      class SpecificPredicate extends ${classOf[Predicate].getName} {
+      class SpecificPredicate extends ${classOf[BasePredicate].getName} {
         private final Object[] references;
         ${ctx.declareMutableStates()}
 
@@ -79,6 +65,6 @@ object GeneratePredicate extends CodeGenerator[Expression, Predicate] {
     logDebug(s"Generated predicate '$predicate':\n${CodeFormatter.format(code)}")
 
     val (clazz, _) = CodeGenerator.compile(code)
-    clazz.generate(ctx.references.toArray).asInstanceOf[Predicate]
+    clazz.generate(ctx.references.toArray).asInstanceOf[BasePredicate]
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index fb1d8a3c8e739..459c1d9a8ba11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -111,6 +111,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
           case t: DecimalType if t.precision > Decimal.MAX_LONG_DIGITS =>
             // Can't call setNullAt() for DecimalType with precision larger than 18.
             s"$rowWriter.write($index, (Decimal) null, ${t.precision}, ${t.scale});"
+          case CalendarIntervalType => s"$rowWriter.write($index, (CalendarInterval) null);"
           case _ => s"$rowWriter.setNullAt($index);"
         }
 
@@ -299,7 +300,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
       v => s"$v = new $rowWriterClass(${expressions.length}, ${numVarLenFields * 32});")
 
     // Evaluate all the subexpression.
-    val evalSubexpr = ctx.subexprFunctions.mkString("\n")
+    val evalSubexpr = ctx.subexprFunctionsCode
 
     val writeExpressions = writeExpressionsToBuffer(
       ctx, ctx.INPUT_ROW, exprEvals, exprSchemas, rowWriter, isTopLevel = true)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
index 3bb3c602f775b..dff258902a0b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/javaCode.scala
@@ -143,7 +143,10 @@ trait Block extends TreeNode[Block] with JavaCode {
     case _ => code.trim
   }
 
-  def length: Int = toString.length
+  def length: Int = {
+    // Returns a code length without comments
+    CodeFormatter.stripExtraNewLinesAndComments(toString).length
+  }
 
   def isEmpty: Boolean = toString.isEmpty
 
@@ -220,6 +223,11 @@ object Block {
   implicit def blocksToBlock(blocks: Seq[Block]): Block = blocks.reduceLeft(_ + _)
 
   implicit class BlockHelper(val sc: StringContext) extends AnyVal {
+    /**
+     * A string interpolator that retains references to the `JavaCode` inputs, and behaves like
+     * the Scala builtin StringContext.s() interpolator otherwise, i.e. it will treat escapes in
+     * the code parts, and will not treat escapes in the input arguments.
+     */
     def code(args: Any*): Block = {
       sc.checkLengths(args)
       if (sc.parts.length == 0) {
@@ -247,7 +255,7 @@ object Block {
     val inputs = args.iterator
     val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH)
 
-    buf.append(strings.next)
+    buf.append(StringContext.treatEscapes(strings.next))
     while (strings.hasNext) {
       val input = inputs.next
       input match {
@@ -259,7 +267,7 @@ object Block {
         case _ =>
           buf.append(input)
       }
-      buf.append(strings.next)
+      buf.append(StringContext.treatEscapes(strings.next))
     }
     codeParts += buf.toString
 
@@ -283,10 +291,10 @@ case class CodeBlock(codeParts: Seq[String], blockInputs: Seq[JavaCode]) extends
     val strings = codeParts.iterator
     val inputs = blockInputs.iterator
     val buf = new StringBuilder(Block.CODE_BLOCK_BUFFER_LENGTH)
-    buf.append(StringContext.treatEscapes(strings.next))
+    buf.append(strings.next)
     while (strings.hasNext) {
       buf.append(inputs.next)
-      buf.append(StringContext.treatEscapes(strings.next))
+      buf.append(strings.next)
     }
     buf.toString
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 5314821ea3a59..6ed68e47ce7a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -28,14 +28,14 @@ import org.apache.spark.sql.catalyst.expressions.ArraySortLike.NullOrder
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
-import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.{ByteArray, CalendarInterval, UTF8String}
 import org.apache.spark.util.collection.OpenHashSet
 
 /**
@@ -79,7 +79,7 @@ trait BinaryArrayExpressionWithImplicitCast extends BinaryExpression
     _FUNC_(expr) - Returns the size of an array or a map.
     The function returns -1 if its input is null and spark.sql.legacy.sizeOfNull is set to true.
     If spark.sql.legacy.sizeOfNull is set to false, the function returns null for null input.
-    By default, the spark.sql.legacy.sizeOfNull parameter is set to true.
+    By default, the spark.sql.legacy.sizeOfNull parameter is set to false.
   """,
   examples = """
     Examples:
@@ -88,7 +88,7 @@ trait BinaryArrayExpressionWithImplicitCast extends BinaryExpression
       > SELECT _FUNC_(map('a', 1, 'b', 2));
        2
       > SELECT _FUNC_(NULL);
-       -1
+       NULL
   """)
 case class Size(child: Expression) extends UnaryExpression with ExpectsInputTypes {
 
@@ -900,54 +900,6 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
   override def prettyName: String = "sort_array"
 }
 
-
-/**
- * Sorts the input array in ascending order according to the natural ordering of
- * the array elements and returns it.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = """
-    _FUNC_(array) - Sorts the input array in ascending order. The elements of the input array must
-      be orderable. Null elements will be placed at the end of the returned array.
-  """,
-  examples = """
-    Examples:
-      > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
-       ["a","b","c","d",null]
-  """,
-  since = "2.4.0")
-// scalastyle:on line.size.limit
-case class ArraySort(child: Expression) extends UnaryExpression with ArraySortLike {
-
-  override def dataType: DataType = child.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
-
-  override def arrayExpression: Expression = child
-  override def nullOrder: NullOrder = NullOrder.Greatest
-
-  override def checkInputDataTypes(): TypeCheckResult = child.dataType match {
-    case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
-      TypeCheckResult.TypeCheckSuccess
-    case ArrayType(dt, _) =>
-      val dtSimple = dt.catalogString
-      TypeCheckResult.TypeCheckFailure(
-        s"$prettyName does not support sorting array of type $dtSimple which is not orderable")
-    case _ =>
-      TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.")
-  }
-
-  override def nullSafeEval(array: Any): Any = {
-    sortEval(array, true)
-  }
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, c => sortCodegen(ctx, ev, c, "true"))
-  }
-
-  override def prettyName: String = "array_sort"
-}
-
 /**
  * Returns a random permutation of the given array.
  */
@@ -1129,7 +1081,7 @@ case class ArrayContains(left: Expression, right: Expression)
     (left.dataType, right.dataType) match {
       case (_, NullType) => Seq.empty
       case (ArrayType(e1, hasNull), e2) =>
-        TypeCoercion.findTightestCommonType(e1, e2) match {
+        TypeCoercion.findWiderTypeWithoutStringPromotionForTwo(e1, e2) match {
           case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
           case _ => Seq.empty
         }
@@ -1428,7 +1380,7 @@ case class ArraysOverlap(left: Expression, right: Expression)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x, start, length) - Subsets array x starting from index start (or starting from the end if start is negative) with the specified length.",
+  usage = "_FUNC_(x, start, length) - Subsets array x starting from index start (array indices start at 1, or starting from the end if start is negative) with the specified length.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(1, 2, 3, 4), 2, 2);
@@ -2610,25 +2562,33 @@ object Sequence {
     override val defaultStep: DefaultStep = new DefaultStep(
       (dt.ordering.lteq _).asInstanceOf[LessThanOrEqualFn],
       CalendarIntervalType,
-      new CalendarInterval(0, MICROS_PER_DAY))
+      new CalendarInterval(0, 1, 0))
 
     private val backedSequenceImpl = new IntegralSequenceImpl[T](dt)
-    private val microsPerMonth = 28 * CalendarInterval.MICROS_PER_DAY
+    private val microsPerDay = HOURS_PER_DAY * MICROS_PER_HOUR
+    // We choose a minimum days(28) in one month to calculate the `intervalStepInMicros`
+    // in order to make sure the estimated array length is long enough
+    private val microsPerMonth = 28 * microsPerDay
 
     override def eval(input1: Any, input2: Any, input3: Any): Array[T] = {
       val start = input1.asInstanceOf[T]
       val stop = input2.asInstanceOf[T]
       val step = input3.asInstanceOf[CalendarInterval]
       val stepMonths = step.months
+      val stepDays = step.days
       val stepMicros = step.microseconds
 
-      if (stepMonths == 0) {
-        backedSequenceImpl.eval(start, stop, fromLong(stepMicros / scale))
+      if (stepMonths == 0 && stepMicros == 0 && scale == MICROS_PER_DAY) {
+        backedSequenceImpl.eval(start, stop, fromLong(stepDays))
+
+      } else if (stepMonths == 0 && stepDays == 0 && scale == 1) {
+        backedSequenceImpl.eval(start, stop, fromLong(stepMicros))
 
       } else {
         // To estimate the resulted array length we need to make assumptions
-        // about a month length in microseconds
-        val intervalStepInMicros = stepMicros + stepMonths * microsPerMonth
+        // about a month length in days and a day length in microseconds
+        val intervalStepInMicros =
+          stepMicros + stepMonths * microsPerMonth + stepDays * microsPerDay
         val startMicros: Long = num.toLong(start) * scale
         val stopMicros: Long = num.toLong(stop) * scale
         val maxEstimatedArrayLength =
@@ -2643,7 +2603,8 @@ object Sequence {
         while (t < exclusiveItem ^ stepSign < 0) {
           arr(i) = fromLong(t / scale)
           i += 1
-          t = timestampAddInterval(startMicros, i * stepMonths, i * stepMicros, zoneId)
+          t = timestampAddInterval(
+            startMicros, i * stepMonths, i * stepDays, i * stepMicros, zoneId)
         }
 
         // truncate array to the correct length
@@ -2659,6 +2620,7 @@ object Sequence {
         arr: String,
         elemType: String): String = {
       val stepMonths = ctx.freshName("stepMonths")
+      val stepDays = ctx.freshName("stepDays")
       val stepMicros = ctx.freshName("stepMicros")
       val stepScaled = ctx.freshName("stepScaled")
       val intervalInMicros = ctx.freshName("intervalInMicros")
@@ -2673,18 +2635,21 @@ object Sequence {
 
       val sequenceLengthCode =
         s"""
-           |final long $intervalInMicros = $stepMicros + $stepMonths * ${microsPerMonth}L;
+           |final long $intervalInMicros =
+           |  $stepMicros + $stepMonths * ${microsPerMonth}L + $stepDays * ${microsPerDay}L;
            |${genSequenceLengthCode(ctx, startMicros, stopMicros, intervalInMicros, arrLength)}
           """.stripMargin
 
       s"""
          |final int $stepMonths = $step.months;
+         |final int $stepDays = $step.days;
          |final long $stepMicros = $step.microseconds;
          |
-         |if ($stepMonths == 0) {
-         |  final $elemType $stepScaled = ($elemType) ($stepMicros / ${scale}L);
-         |  ${backedSequenceImpl.genCode(ctx, start, stop, stepScaled, arr, elemType)};
+         |if ($stepMonths == 0 && $stepMicros == 0 && ${scale}L == ${MICROS_PER_DAY}L) {
+         |  ${backedSequenceImpl.genCode(ctx, start, stop, stepDays, arr, elemType)};
          |
+         |} else if ($stepMonths == 0 && $stepDays == 0 && ${scale}L == 1) {
+         |  ${backedSequenceImpl.genCode(ctx, start, stop, stepMicros, arr, elemType)};
          |} else {
          |  final long $startMicros = $start * ${scale}L;
          |  final long $stopMicros = $stop * ${scale}L;
@@ -2702,7 +2667,7 @@ object Sequence {
          |    $arr[$i] = ($elemType) ($t / ${scale}L);
          |    $i += 1;
          |    $t = org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampAddInterval(
-         |       $startMicros, $i * $stepMonths, $i * $stepMicros, $zid);
+         |       $startMicros, $i * $stepMonths, $i * $stepDays, $i * $stepMicros, $zid);
          |  }
          |
          |  if ($arr.length > $i) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 319a7fc87e59a..4bd85d304ded2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -44,10 +45,18 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
     TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), s"function $prettyName")
   }
 
+  private val defaultElementType: DataType = {
+    if (SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE)) {
+      StringType
+    } else {
+      NullType
+    }
+  }
+
   override def dataType: ArrayType = {
     ArrayType(
       TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(children.map(_.dataType))
-        .getOrElse(StringType),
+        .getOrElse(defaultElementType),
       containsNull = children.exists(_.nullable))
   }
 
@@ -136,6 +145,14 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
   lazy val keys = children.indices.filter(_ % 2 == 0).map(children)
   lazy val values = children.indices.filter(_ % 2 != 0).map(children)
 
+  private val defaultElementType: DataType = {
+    if (SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE)) {
+      StringType
+    } else {
+      NullType
+    }
+  }
+
   override def foldable: Boolean = children.forall(_.foldable)
 
   override def checkInputDataTypes(): TypeCheckResult = {
@@ -158,9 +175,9 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
   override lazy val dataType: MapType = {
     MapType(
       keyType = TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(keys.map(_.dataType))
-        .getOrElse(StringType),
+        .getOrElse(defaultElementType),
       valueType = TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(values.map(_.dataType))
-        .getOrElse(StringType),
+        .getOrElse(defaultElementType),
       valueContainsNull = values.exists(_.nullable))
   }
 
@@ -295,9 +312,20 @@ object CreateStruct extends FunctionBuilder {
 }
 
 /**
- * Common base class for both [[CreateNamedStruct]] and [[CreateNamedStructUnsafe]].
+ * Creates a struct with the given field names and values
+ *
+ * @param children Seq(name1, val1, name2, val2, ...)
  */
-trait CreateNamedStructLike extends Expression {
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_("a", 1, "b", 2, "c", 3);
+       {"a":1,"b":2,"c":3}
+  """)
+// scalastyle:on line.size.limit
+case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
   lazy val (nameExprs, valExprs) = children.grouped(2).map {
     case Seq(name, value) => (name, value)
   }.toList.unzip
@@ -348,23 +376,6 @@ trait CreateNamedStructLike extends Expression {
   override def eval(input: InternalRow): Any = {
     InternalRow(valExprs.map(_.eval(input)): _*)
   }
-}
-
-/**
- * Creates a struct with the given field names and values
- *
- * @param children Seq(name1, val1, name2, val2, ...)
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_("a", 1, "b", 2, "c", 3);
-       {"a":1,"b":2,"c":3}
-  """)
-// scalastyle:on line.size.limit
-case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStructLike {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val rowClass = classOf[GenericInternalRow].getName
@@ -397,22 +408,6 @@ case class CreateNamedStruct(children: Seq[Expression]) extends CreateNamedStruc
   override def prettyName: String = "named_struct"
 }
 
-/**
- * Creates a struct with the given field names and values. This is a variant that returns
- * UnsafeRow directly. The unsafe projection operator replaces [[CreateStruct]] with
- * this expression automatically at runtime.
- *
- * @param children Seq(name1, val1, name2, val2, ...)
- */
-case class CreateNamedStructUnsafe(children: Seq[Expression]) extends CreateNamedStructLike {
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val eval = GenerateUnsafeProjection.createCode(ctx, valExprs)
-    ExprCode(code = eval.code, isNull = FalseLiteral, value = eval.value)
-  }
-
-  override def prettyName: String = "named_struct_unsafe"
-}
-
 /**
  * Creates a map after splitting the input text into key/value pairs using delimiters
  */
@@ -422,13 +417,14 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends CreateName
   examples = """
     Examples:
       > SELECT _FUNC_('a:1,b:2,c:3', ',', ':');
-       map("a":"1","b":"2","c":"3")
+       {"a":"1","b":"2","c":"3"}
       > SELECT _FUNC_('a');
-       map("a":null)
-  """)
+       {"a":null}
+  """,
+  since = "2.0.1")
 // scalastyle:on line.size.limit
 case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression)
-  extends TernaryExpression with CodegenFallback with ExpectsInputTypes {
+  extends TernaryExpression with ExpectsInputTypes {
 
   def this(child: Expression, pairDelim: Expression) = {
     this(child, pairDelim, Literal(":"))
@@ -473,5 +469,21 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
     mapBuilder.build()
   }
 
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val builderTerm = ctx.addReferenceObj("mapBuilder", mapBuilder)
+    val keyValues = ctx.freshName("kvs")
+
+    nullSafeCodeGen(ctx, ev, (text, pd, kvd) =>
+      s"""
+         |UTF8String[] $keyValues = $text.split($pd, -1);
+         |for(UTF8String kvEntry: $keyValues) {
+         |  UTF8String[] kv = kvEntry.split($kvd, 2);
+         |  $builderTerm.put(kv[0], kv.length == 2 ? kv[1] : null);
+         |}
+         |${ev.value} = $builderTerm.build();
+         |""".stripMargin
+    )
+  }
+
   override def prettyName: String = "str_to_map"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index f92c196250f30..6c6210994954c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -111,9 +111,9 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
   examples = """
     Examples:
       > SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END;
-       1
+       1.0
       > SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END;
-       2
+       2.0
       > SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 < 0 THEN 2.0 END;
        NULL
   """)
@@ -185,7 +185,7 @@ case class CaseWhen(
     "CASE" + cases + elseCase + " END"
   }
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+  private def multiBranchesCodegen(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     // This variable holds the state of the result:
     // -1 means the condition is not met yet and the result is unknown.
     val NOT_MATCHED = -1
@@ -279,6 +279,18 @@ case class CaseWhen(
          |final boolean ${ev.isNull} = ($resultState != $HAS_NONNULL);
        """.stripMargin)
   }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    if (branches.length == 1) {
+      // If we have only single branch we can use If expression and its codeGen
+      If(
+        branches(0)._1,
+        branches(0)._2,
+        elseValue.getOrElse(Literal.create(null, branches(0)._2.dataType))).doGenCode(ctx, ev)
+    } else {
+      multiBranchesCodegen(ctx, ev)
+    }
+  }
 }
 
 /** Factory methods for CaseWhen. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 65b10f36373d1..54af314fe4171 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -40,9 +40,9 @@ import org.apache.spark.unsafe.types.UTF8String
   examples = """
     Examples:
       > SELECT _FUNC_('1, 0.8', 'a INT, b DOUBLE');
-       {"a":1, "b":0.8}
-      > SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'))
-       {"time":2015-08-26 00:00:00.0}
+       {"a":1,"b":0.8}
+      > SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
+       {"time":2015-08-26 00:00:00}
   """,
   since = "3.0.0")
 // scalastyle:on line.size.limit
@@ -114,7 +114,7 @@ case class CsvToStructs(
       StructType(nullableSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
     val rawParser = new UnivocityParser(actualSchema, actualSchema, parsedOptions)
     new FailureSafeParser[String](
-      input => Seq(rawParser.parse(input)),
+      input => rawParser.parse(input),
       mode,
       nullableSchema,
       parsedOptions.columnNameOfCorruptRecord)
@@ -199,7 +199,7 @@ case class SchemaOfCsv(
       > SELECT _FUNC_(named_struct('a', 1, 'b', 2));
        1,2
       > SELECT _FUNC_(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'));
-       "26/08/2015"
+       26/08/2015
   """,
   since = "3.0.0")
 // scalastyle:on line.size.limit
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 1ce493ece18b9..adf7251256041 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
-import java.time.{DateTimeException, Instant, LocalDate, LocalDateTime, ZoneId}
+import java.time.{DateTimeException, LocalDate, LocalDateTime, ZoneId}
 import java.time.temporal.IsoFields
 import java.util.{Locale, TimeZone}
 
@@ -30,9 +30,10 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.catalyst.util.LegacyDateFormats.SIMPLE_DATE_FORMAT
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
@@ -76,9 +77,7 @@ case class CurrentDate(timeZoneId: Option[String] = None)
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
-  override def eval(input: InternalRow): Any = {
-    localDateToDays(LocalDate.now(zoneId))
-  }
+  override def eval(input: InternalRow): Any = currentDate(zoneId)
 
   override def prettyName: String = "current_date"
 }
@@ -98,9 +97,7 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
 
   override def dataType: DataType = TimestampType
 
-  override def eval(input: InternalRow): Any = {
-    instantToMicros(Instant.now())
-  }
+  override def eval(input: InternalRow): Any = currentTimestamp()
 
   override def prettyName: String = "current_timestamp"
 }
@@ -138,7 +135,7 @@ case class CurrentBatchTimestamp(
   def toLiteral: Literal = dataType match {
     case _: TimestampType =>
       Literal(DateTimeUtils.fromJavaTimestamp(new Timestamp(timestampMs)), TimestampType)
-    case _: DateType => Literal(DateTimeUtils.millisToDays(timestampMs, timeZone), DateType)
+    case _: DateType => Literal(DateTimeUtils.millisToDays(timestampMs, zoneId), DateType)
   }
 }
 
@@ -154,17 +151,18 @@ case class CurrentBatchTimestamp(
   """,
   since = "1.5.0")
 case class DateAdd(startDate: Expression, days: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
+  extends BinaryExpression with ExpectsInputTypes {
 
   override def left: Expression = startDate
   override def right: Expression = days
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(DateType, TypeCollection(IntegerType, ShortType, ByteType))
 
   override def dataType: DataType = DateType
 
   override def nullSafeEval(start: Any, d: Any): Any = {
-    start.asInstanceOf[Int] + d.asInstanceOf[Int]
+    start.asInstanceOf[Int] + d.asInstanceOf[Number].intValue()
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -188,16 +186,17 @@ case class DateAdd(startDate: Expression, days: Expression)
   """,
   since = "1.5.0")
 case class DateSub(startDate: Expression, days: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes {
+  extends BinaryExpression with ExpectsInputTypes {
   override def left: Expression = startDate
   override def right: Expression = days
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(DateType, TypeCollection(IntegerType, ShortType, ByteType))
 
   override def dataType: DataType = DateType
 
   override def nullSafeEval(start: Any, d: Any): Any = {
-    start.asInstanceOf[Int] - d.asInstanceOf[Int]
+    start.asInstanceOf[Int] - d.asInstanceOf[Number].intValue()
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -302,6 +301,30 @@ case class Second(child: Expression, timeZoneId: Option[String] = None)
   }
 }
 
+case class SecondWithFraction(child: Expression, timeZoneId: Option[String] = None)
+  extends UnaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes {
+
+  def this(child: Expression) = this(child, None)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
+
+  // 2 digits for seconds, and 6 digits for the fractional part with microsecond precision.
+  override def dataType: DataType = DecimalType(8, 6)
+
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  override protected def nullSafeEval(timestamp: Any): Any = {
+    DateTimeUtils.getSecondsWithFraction(timestamp.asInstanceOf[Long], timeZone)
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
+    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+    defineCodeGen(ctx, ev, c => s"$dtu.getSecondsWithFraction($c, $tz)")
+  }
+}
+
 case class Milliseconds(child: Expression, timeZoneId: Option[String] = None)
   extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression {
 
@@ -573,6 +596,12 @@ case class WeekOfYear(child: Expression) extends UnaryExpression with ImplicitCa
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(timestamp, fmt) - Converts `timestamp` to a value of string in the format specified by the date format `fmt`.",
+  arguments = """
+    Arguments:
+      * timestamp - A date/timestamp or string to be converted to the given format.
+      * fmt - Date/time format pattern to follow. See `java.time.format.DateTimeFormatter` for valid date
+              and time format patterns.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_('2016-04-08', 'y');
@@ -592,19 +621,41 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
+  @transient private lazy val formatter: Option[TimestampFormatter] = {
+    if (right.foldable) {
+      Option(right.eval()).map { format =>
+        TimestampFormatter(format.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
+      }
+    } else None
+  }
+
   override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
-    val df = TimestampFormatter(format.toString, zoneId)
-    UTF8String.fromString(df.format(timestamp.asInstanceOf[Long]))
+    val tf = if (formatter.isEmpty) {
+      TimestampFormatter(format.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
+    } else {
+      formatter.get
+    }
+    UTF8String.fromString(tf.format(timestamp.asInstanceOf[Long]))
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
-    val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
-    val locale = ctx.addReferenceObj("locale", Locale.US)
-    defineCodeGen(ctx, ev, (timestamp, format) => {
-      s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $zid, $locale)
-          .format($timestamp))"""
-    })
+    formatter.map { tf =>
+      val timestampFormatter = ctx.addReferenceObj("timestampFormatter", tf)
+      defineCodeGen(ctx, ev, (timestamp, _) => {
+        s"""UTF8String.fromString($timestampFormatter.format($timestamp))"""
+      })
+    }.getOrElse {
+      val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
+      val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$")
+      val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
+      defineCodeGen(ctx, ev, (timestamp, format) => {
+        s"""|UTF8String.fromString($tf$$.MODULE$$.apply(
+            |  $format.toString(),
+            |  $zid,
+            |  $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT())
+            |.format($timestamp))""".stripMargin
+      })
+    }
   }
 
   override def prettyName: String = "date_format"
@@ -615,11 +666,18 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
  * Deterministic version of [[UnixTimestamp]], must have at least one parameter.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr[, pattern]) - Returns the UNIX timestamp of the given time.",
+  usage = "_FUNC_(timeExp[, format]) - Returns the UNIX timestamp of the given time.",
+  arguments = """
+    Arguments:
+      * timeExp - A date/timestamp or string which is returned as a UNIX timestamp.
+      * format - Date/time format pattern to follow. Ignored if `timeExp` is not a string.
+                 Default value is "uuuu-MM-dd HH:mm:ss". See `java.time.format.DateTimeFormatter`
+                 for valid date and time format patterns.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd');
-       1460041200
+       1460098800
   """,
   since = "1.6.0")
 case class ToUnixTimestamp(
@@ -637,7 +695,7 @@ case class ToUnixTimestamp(
     copy(timeZoneId = Option(timeZoneId))
 
   def this(time: Expression) = {
-    this(time, Literal("uuuu-MM-dd HH:mm:ss"))
+    this(time, Literal(TimestampFormatter.defaultPattern))
   }
 
   override def prettyName: String = "to_unix_timestamp"
@@ -647,13 +705,20 @@ case class ToUnixTimestamp(
  * Converts time string with given pattern to Unix time stamp (in seconds), returns null if fail.
  * See [https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html].
  * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null.
- * If the second parameter is missing, use "yyyy-MM-dd HH:mm:ss".
+ * If the second parameter is missing, use "uuuu-MM-dd HH:mm:ss".
  * If no parameters provided, the first parameter will be current_timestamp.
  * If the first parameter is a Date or Timestamp instead of String, we will ignore the
  * second parameter.
  */
 @ExpressionDescription(
-  usage = "_FUNC_([expr[, pattern]]) - Returns the UNIX timestamp of current or specified time.",
+  usage = "_FUNC_([timeExp[, format]]) - Returns the UNIX timestamp of current or specified time.",
+  arguments = """
+    Arguments:
+      * timeExp - A date/timestamp or string. If not provided, this defaults to current time.
+      * format - Date/time format pattern to follow. Ignored if `timeExp` is not a string.
+                 Default value is "uuuu-MM-dd HH:mm:ss". See `java.time.format.DateTimeFormatter`
+                 for valid date and time format patterns.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_();
@@ -674,7 +739,7 @@ case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Op
     copy(timeZoneId = Option(timeZoneId))
 
   def this(time: Expression) = {
-    this(time, Literal("uuuu-MM-dd HH:mm:ss"))
+    this(time, Literal(TimestampFormatter.defaultPattern))
   }
 
   def this() = {
@@ -700,7 +765,7 @@ abstract class ToTimestamp
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
   private lazy val formatter: TimestampFormatter =
     try {
-      TimestampFormatter(constFormat.toString, zoneId)
+      TimestampFormatter(constFormat.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
     } catch {
       case NonFatal(_) => null
     }
@@ -733,8 +798,8 @@ abstract class ToTimestamp
           } else {
             val formatString = f.asInstanceOf[UTF8String].toString
             try {
-              TimestampFormatter(formatString, zoneId).parse(
-                t.asInstanceOf[UTF8String].toString) / downScaleFactor
+              TimestampFormatter(formatString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
+                .parse(t.asInstanceOf[UTF8String].toString) / downScaleFactor
             } catch {
               case NonFatal(_) => null
             }
@@ -773,13 +838,16 @@ abstract class ToTimestamp
         }
       case StringType =>
         val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
-        val locale = ctx.addReferenceObj("locale", Locale.US)
         val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
+        val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$")
         nullSafeCodeGen(ctx, ev, (string, format) => {
           s"""
             try {
-              ${ev.value} = $tf$$.MODULE$$.apply($format.toString(), $zid, $locale)
-                .parse($string.toString()) / $downScaleFactor;
+              ${ev.value} = $tf$$.MODULE$$.apply(
+                $format.toString(),
+                $zid,
+                $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT())
+              .parse($string.toString()) / $downScaleFactor;
             } catch (java.lang.IllegalArgumentException e) {
               ${ev.isNull} = true;
             } catch (java.text.ParseException e) {
@@ -827,10 +895,16 @@ abstract class UnixTime extends ToTimestamp {
  */
 @ExpressionDescription(
   usage = "_FUNC_(unix_time, format) - Returns `unix_time` in the specified `format`.",
+  arguments = """
+    Arguments:
+      * unix_time - UNIX Timestamp to be converted to the provided format.
+      * format - Date/time format pattern to follow. See `java.time.format.DateTimeFormatter`
+                 for valid date and time format patterns.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');
-       1970-01-01 00:00:00
+       1969-12-31 16:00:00
   """,
   since = "1.5.0")
 case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[String] = None)
@@ -844,7 +918,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
   override def prettyName: String = "from_unixtime"
 
   def this(unix: Expression) = {
-    this(unix, Literal("uuuu-MM-dd HH:mm:ss"))
+    this(unix, Literal(TimestampFormatter.defaultPattern))
   }
 
   override def dataType: DataType = StringType
@@ -858,7 +932,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
   private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
   private lazy val formatter: TimestampFormatter =
     try {
-      TimestampFormatter(constFormat.toString, zoneId)
+      TimestampFormatter(constFormat.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
     } catch {
       case NonFatal(_) => null
     }
@@ -884,8 +958,9 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
           null
         } else {
           try {
-            UTF8String.fromString(TimestampFormatter(f.toString, zoneId)
-              .format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
+            UTF8String.fromString(
+              TimestampFormatter(f.toString, zoneId, legacyFormat = SIMPLE_DATE_FORMAT)
+                .format(time.asInstanceOf[Long] * MICROS_PER_SECOND))
           } catch {
             case NonFatal(_) => null
           }
@@ -916,13 +991,14 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
       }
     } else {
       val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
-      val locale = ctx.addReferenceObj("locale", Locale.US)
       val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
+      val ldf = LegacyDateFormats.getClass.getName.stripSuffix("$")
       nullSafeCodeGen(ctx, ev, (seconds, f) => {
         s"""
         try {
-          ${ev.value} = UTF8String.fromString($tf$$.MODULE$$.apply($f.toString(), $zid, $locale).
-            format($seconds * 1000000L));
+          ${ev.value} = UTF8String.fromString(
+            $tf$$.MODULE$$.apply($f.toString(), $zid, $ldf$$.MODULE$$.SIMPLE_DATE_FORMAT())
+              .format($seconds * 1000000L));
         } catch (java.lang.IllegalArgumentException e) {
           ${ev.isNull} = true;
         }"""
@@ -1035,7 +1111,7 @@ case class NextDay(startDate: Expression, dayOfWeek: Expression)
  * Adds an interval to timestamp.
  */
 case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[String] = None)
-  extends BinaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes {
+  extends BinaryExpression with TimeZoneAwareExpression with ExpectsInputTypes {
 
   def this(start: Expression, interval: Expression) = this(start, interval, None)
 
@@ -1054,14 +1130,14 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
   override def nullSafeEval(start: Any, interval: Any): Any = {
     val itvl = interval.asInstanceOf[CalendarInterval]
     DateTimeUtils.timestampAddInterval(
-      start.asInstanceOf[Long], itvl.months, itvl.microseconds, zoneId)
+      start.asInstanceOf[Long], itvl.months, itvl.days, itvl.microseconds, zoneId)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, (sd, i) => {
-      s"""$dtu.timestampAddInterval($sd, $i.months, $i.microseconds, $zid)"""
+      s"""$dtu.timestampAddInterval($sd, $i.months, $i.days, $i.microseconds, $zid)"""
     })
   }
 }
@@ -1085,22 +1161,14 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
   usage = "_FUNC_(timestamp, timezone) - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'.",
   examples = """
     Examples:
-      > SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');
+      > SELECT _FUNC_('2016-08-31', 'Asia/Seoul');
        2016-08-31 09:00:00
   """,
-  since = "1.5.0",
-  deprecated = """
-    Deprecated since 3.0.0. See SPARK-25496.
-  """)
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class FromUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
-  if (!SQLConf.get.utcTimestampFuncEnabled) {
-    throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0." +
-      s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
-  }
-
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
   override def dataType: DataType = TimestampType
   override def prettyName: String = "from_utc_timestamp"
@@ -1120,14 +1188,12 @@ case class FromUTCTimestamp(left: Expression, right: Expression)
            |long ${ev.value} = 0;
          """.stripMargin)
       } else {
-        val tzClass = classOf[TimeZone].getName
+        val tzClass = classOf[ZoneId].getName
         val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
         val escapedTz = StringEscapeUtils.escapeJava(tz.toString)
         val tzTerm = ctx.addMutableState(tzClass, "tz",
-          v => s"""$v = $dtu.getTimeZone("$escapedTz");""")
-        val utcTerm = "tzUTC"
-        ctx.addImmutableStateIfNotExists(tzClass, utcTerm,
-          v => s"""$v = $dtu.getTimeZone("UTC");""")
+          v => s"""$v = $dtu.getZoneId("$escapedTz");""")
+        val utcTerm = "java.time.ZoneOffset.UTC"
         val eval = left.genCode(ctx)
         ev.copy(code = code"""
            |${eval.code}
@@ -1150,7 +1216,7 @@ case class FromUTCTimestamp(left: Expression, right: Expression)
  * Subtracts an interval from timestamp.
  */
 case class TimeSub(start: Expression, interval: Expression, timeZoneId: Option[String] = None)
-  extends BinaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes {
+  extends BinaryExpression with TimeZoneAwareExpression with ExpectsInputTypes {
 
   def this(start: Expression, interval: Expression) = this(start, interval, None)
 
@@ -1169,14 +1235,14 @@ case class TimeSub(start: Expression, interval: Expression, timeZoneId: Option[S
   override def nullSafeEval(start: Any, interval: Any): Any = {
     val itvl = interval.asInstanceOf[CalendarInterval]
     DateTimeUtils.timestampAddInterval(
-      start.asInstanceOf[Long], 0 - itvl.months, 0 - itvl.microseconds, zoneId)
+      start.asInstanceOf[Long], 0 - itvl.months, 0 - itvl.days, 0 - itvl.microseconds, zoneId)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, (sd, i) => {
-      s"""$dtu.timestampAddInterval($sd, 0 - $i.months, 0 - $i.microseconds, $zid)"""
+      s"""$dtu.timestampAddInterval($sd, 0 - $i.months, 0 - $i.days, 0 - $i.microseconds, $zid)"""
     })
   }
 }
@@ -1266,14 +1332,14 @@ case class MonthsBetween(
 
   override def nullSafeEval(t1: Any, t2: Any, roundOff: Any): Any = {
     DateTimeUtils.monthsBetween(
-      t1.asInstanceOf[Long], t2.asInstanceOf[Long], roundOff.asInstanceOf[Boolean], timeZone)
+      t1.asInstanceOf[Long], t2.asInstanceOf[Long], roundOff.asInstanceOf[Boolean], zoneId)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tz = ctx.addReferenceObj("timeZone", timeZone)
+    val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, (d1, d2, roundOff) => {
-      s"""$dtu.monthsBetween($d1, $d2, $roundOff, $tz)"""
+      s"""$dtu.monthsBetween($d1, $d2, $roundOff, $zid)"""
     })
   }
 
@@ -1302,19 +1368,11 @@ case class MonthsBetween(
       > SELECT _FUNC_('2016-08-31', 'Asia/Seoul');
        2016-08-30 15:00:00
   """,
-  since = "1.5.0",
-  deprecated = """
-    Deprecated since 3.0.0. See SPARK-25496.
-  """)
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class ToUTCTimestamp(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes {
 
-  if (!SQLConf.get.utcTimestampFuncEnabled) {
-    throw new AnalysisException(s"The $prettyName function has been disabled since Spark 3.0. " +
-      s"Set ${SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key} to true to enable this function.")
-  }
-
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
   override def dataType: DataType = TimestampType
   override def prettyName: String = "to_utc_timestamp"
@@ -1334,14 +1392,12 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
            |long ${ev.value} = 0;
          """.stripMargin)
       } else {
-        val tzClass = classOf[TimeZone].getName
+        val tzClass = classOf[ZoneId].getName
         val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
         val escapedTz = StringEscapeUtils.escapeJava(tz.toString)
         val tzTerm = ctx.addMutableState(tzClass, "tz",
-          v => s"""$v = $dtu.getTimeZone("$escapedTz");""")
-        val utcTerm = "tzUTC"
-        ctx.addImmutableStateIfNotExists(tzClass, utcTerm,
-          v => s"""$v = $dtu.getTimeZone("UTC");""")
+          v => s"""$v = $dtu.getZoneId("$escapedTz");""")
+        val utcTerm = "java.time.ZoneOffset.UTC"
         val eval = left.genCode(ctx)
         ev.copy(code = code"""
            |${eval.code}
@@ -1369,6 +1425,12 @@ case class ToUTCTimestamp(left: Expression, right: Expression)
       a date. Returns null with invalid input. By default, it follows casting rules to a date if
       the `fmt` is omitted.
   """,
+  arguments = """
+    Arguments:
+      * date_str - A string to be parsed to date.
+      * fmt - Date format pattern to follow. See `java.time.format.DateTimeFormatter` for valid
+              date and time format patterns.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_('2009-07-30 04:17:52');
@@ -1407,10 +1469,16 @@ case class ParseToDate(left: Expression, format: Option[Expression], child: Expr
  */
 @ExpressionDescription(
   usage = """
-    _FUNC_(timestamp[, fmt]) - Parses the `timestamp` expression with the `fmt` expression to
-      a timestamp. Returns null with invalid input. By default, it follows casting rules to
+    _FUNC_(timestamp_str[, fmt]) - Parses the `timestamp_str` expression with the `fmt` expression
+      to a timestamp. Returns null with invalid input. By default, it follows casting rules to
       a timestamp if the `fmt` is omitted.
   """,
+  arguments = """
+    Arguments:
+      * timestamp_str - A string to be parsed to timestamp.
+      * fmt - Timestamp format pattern to follow. See `java.time.format.DateTimeFormatter` for valid
+              date and time format patterns.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_('2016-12-31 00:12:00');
@@ -1622,15 +1690,15 @@ case class TruncTimestamp(
 
   override def eval(input: InternalRow): Any = {
     evalHelper(input, minLevel = MIN_LEVEL_OF_TIMESTAMP_TRUNC) { (t: Any, level: Int) =>
-      DateTimeUtils.truncTimestamp(t.asInstanceOf[Long], level, timeZone)
+      DateTimeUtils.truncTimestamp(t.asInstanceOf[Long], level, zoneId)
     }
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tz = ctx.addReferenceObj("timeZone", timeZone)
+    val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
     codeGenHelper(ctx, ev, minLevel = MIN_LEVEL_OF_TIMESTAMP_TRUNC, true) {
       (date: String, fmt: String) =>
-        s"truncTimestamp($date, $fmt, $tz);"
+        s"truncTimestamp($date, $fmt, $zid);"
     }
   }
 }
@@ -1754,10 +1822,10 @@ case class MakeDate(year: Expression, month: Expression, day: Expression)
       > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
        2014-12-28 06:30:45.887
       > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
-       2014-12-28 10:30:45.887
-      > SELECT _FUNC_(2019, 6, 30, 23, 59, 60)
+       2014-12-27 21:30:45.887
+      > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
        2019-07-01 00:00:00
-      > SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 13);
+      > SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 'PST');
        NULL
       > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
        NULL
@@ -1963,3 +2031,168 @@ case class Epoch(child: Expression, timeZoneId: Option[String] = None)
     defineCodeGen(ctx, ev, c => s"$dtu.getEpoch($c, $zid)")
   }
 }
+
+object DatePart {
+
+  def parseExtractField(
+      extractField: String,
+      source: Expression,
+      errorHandleFunc: => Nothing): Expression = extractField.toUpperCase(Locale.ROOT) match {
+    case "MILLENNIUM" | "MILLENNIA" | "MIL" | "MILS" => Millennium(source)
+    case "CENTURY" | "CENTURIES" | "C" | "CENT" => Century(source)
+    case "DECADE" | "DECADES" | "DEC" | "DECS" => Decade(source)
+    case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => Year(source)
+    case "ISOYEAR" => IsoYear(source)
+    case "QUARTER" | "QTR" => Quarter(source)
+    case "MONTH" | "MON" | "MONS" | "MONTHS" => Month(source)
+    case "WEEK" | "W" | "WEEKS" => WeekOfYear(source)
+    case "DAY" | "D" | "DAYS" => DayOfMonth(source)
+    case "DAYOFWEEK" => DayOfWeek(source)
+    case "DOW" => Subtract(DayOfWeek(source), Literal(1))
+    case "ISODOW" => Add(WeekDay(source), Literal(1))
+    case "DOY" => DayOfYear(source)
+    case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => Hour(source)
+    case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" => Minute(source)
+    case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => SecondWithFraction(source)
+    case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" =>
+      Milliseconds(source)
+    case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" =>
+      Microseconds(source)
+    case "EPOCH" => Epoch(source)
+    case _ => errorHandleFunc
+  }
+}
+
+@ExpressionDescription(
+  usage = "_FUNC_(field, source) - Extracts a part of the date/timestamp or interval source.",
+  arguments = """
+    Arguments:
+      * field - selects which part of the source should be extracted.
+               Supported string values of `field` for dates and timestamps are:
+                ["MILLENNIUM", ("MILLENNIA", "MIL", "MILS"),
+                 "CENTURY", ("CENTURIES", "C", "CENT"),
+                 "DECADE", ("DECADES", "DEC", "DECS"),
+                 "YEAR", ("Y", "YEARS", "YR", "YRS"),
+                 "ISOYEAR",
+                 "QUARTER", ("QTR"),
+                 "MONTH", ("MON", "MONS", "MONTHS"),
+                 "WEEK", ("W", "WEEKS"),
+                 "DAY", ("D", "DAYS"),
+                 "DAYOFWEEK",
+                 "DOW",
+                 "ISODOW",
+                 "DOY",
+                 "HOUR", ("H", "HOURS", "HR", "HRS"),
+                 "MINUTE", ("M", "MIN", "MINS", "MINUTES"),
+                 "SECOND", ("S", "SEC", "SECONDS", "SECS"),
+                 "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS"),
+                 "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"),
+                 "EPOCH"]
+                Supported string values of `field` for intervals are:
+                 ["MILLENNIUM", ("MILLENNIA", "MIL", "MILS"),
+                   "CENTURY", ("CENTURIES", "C", "CENT"),
+                   "DECADE", ("DECADES", "DEC", "DECS"),
+                   "YEAR", ("Y", "YEARS", "YR", "YRS"),
+                   "QUARTER", ("QTR"),
+                   "MONTH", ("MON", "MONS", "MONTHS"),
+                   "DAY", ("D", "DAYS"),
+                   "HOUR", ("H", "HOURS", "HR", "HRS"),
+                   "MINUTE", ("M", "MIN", "MINS", "MINUTES"),
+                   "SECOND", ("S", "SEC", "SECONDS", "SECS"),
+                   "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS"),
+                   "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"),
+                   "EPOCH"]
+      * source - a date/timestamp or interval column from where `field` should be extracted
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456');
+       2019
+      > SELECT _FUNC_('week', timestamp'2019-08-12 01:00:00.123456');
+       33
+      > SELECT _FUNC_('doy', DATE'2019-08-12');
+       224
+      > SELECT _FUNC_('SECONDS', timestamp'2019-10-01 00:00:01.000001');
+       1.000001
+      > SELECT _FUNC_('days', interval 1 year 10 months 5 days);
+       5
+      > SELECT _FUNC_('seconds', interval 5 hours 30 seconds 1 milliseconds 1 microseconds);
+       30.001001
+  """,
+  since = "3.0.0")
+case class DatePart(field: Expression, source: Expression, child: Expression)
+  extends RuntimeReplaceable {
+
+  def this(field: Expression, source: Expression) {
+    this(field, source, {
+      if (!field.foldable) {
+        throw new AnalysisException("The field parameter needs to be a foldable string value.")
+      }
+      val fieldEval = field.eval()
+      if (fieldEval == null) {
+        Literal(null, DoubleType)
+      } else {
+        val fieldStr = fieldEval.asInstanceOf[UTF8String].toString
+        val errMsg = s"Literals of type '$fieldStr' are currently not supported " +
+          s"for the ${source.dataType.catalogString} type."
+        if (source.dataType == CalendarIntervalType) {
+          ExtractIntervalPart.parseExtractField(
+            fieldStr,
+            source,
+            throw new AnalysisException(errMsg))
+        } else {
+          DatePart.parseExtractField(fieldStr, source, throw new AnalysisException(errMsg))
+        }
+      }
+    })
+  }
+
+  override def flatArguments: Iterator[Any] = Iterator(field, source)
+  override def sql: String = s"$prettyName(${field.sql}, ${source.sql})"
+  override def prettyName: String = "date_part"
+}
+
+/**
+ * Returns the interval from startTimestamp to endTimestamp in which the `months` and `day` field
+ * is set to 0 and the `microseconds` field is initialized to the microsecond difference
+ * between the given timestamps.
+ */
+case class SubtractTimestamps(endTimestamp: Expression, startTimestamp: Expression)
+  extends BinaryExpression with ExpectsInputTypes {
+
+  override def left: Expression = endTimestamp
+  override def right: Expression = startTimestamp
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType)
+  override def dataType: DataType = CalendarIntervalType
+
+  override def nullSafeEval(end: Any, start: Any): Any = {
+    new CalendarInterval(0, 0, end.asInstanceOf[Long] - start.asInstanceOf[Long])
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, (end, start) =>
+      s"new org.apache.spark.unsafe.types.CalendarInterval(0, 0, $end - $start)")
+  }
+}
+
+/**
+ * Returns the interval from the `left` date (inclusive) to the `right` date (exclusive).
+ */
+case class SubtractDates(left: Expression, right: Expression)
+  extends BinaryExpression with ImplicitCastInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType)
+  override def dataType: DataType = CalendarIntervalType
+
+  override def nullSafeEval(leftDays: Any, rightDays: Any): Any = {
+    DateTimeUtils.subtractDates(leftDays.asInstanceOf[Int], rightDays.asInstanceOf[Int])
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, (leftDays, rightDays) => {
+      val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+      s"$dtu.subtractDates($leftDays, $rightDays)"
+    })
+  }
+}
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
index b5b712cda8ea3..7b2489e682661 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
@@ -47,7 +47,7 @@ case class UnscaledValue(child: Expression) extends UnaryExpression {
  */
 case class MakeDecimal(child: Expression, precision: Int, scale: Int) extends UnaryExpression {
 
-  private val nullOnOverflow = SQLConf.get.decimalOperationsNullOnOverflow
+  private val nullOnOverflow = !SQLConf.get.ansiEnabled
 
   override def dataType: DataType = DecimalType(precision, scale)
   override def nullable: Boolean = child.nullable || nullOnOverflow
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 82a7d9825e30a..b0a23c62284d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -127,16 +127,16 @@ case class UserDefinedGenerator(
  *   3      NULL
  * }}}
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.",
   examples = """
     Examples:
       > SELECT _FUNC_(2, 1, 2, 3);
-       1  2
-       3  NULL
+       1	2
+       3	NULL
   """)
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Stack(children: Seq[Expression]) extends Generator {
 
   private lazy val numRows = children.head.eval().asInstanceOf[Int]
@@ -375,16 +375,16 @@ case class Explode(child: Expression) extends ExplodeBase {
  *   1  20
  * }}}
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(10,20));
-       0  10
-       1  20
+       0	10
+       1	20
   """)
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class PosExplode(child: Expression) extends ExplodeBase {
   override val position = true
 }
@@ -392,16 +392,16 @@ case class PosExplode(child: Expression) extends ExplodeBase {
 /**
  * Explodes an array of structs into a table.
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
-       1  a
-       2  b
+       1	a
+       2	b
   """)
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
   override val inline: Boolean = true
   override val position: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
index 221b97bdc7856..2da3f6c74b0e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
@@ -38,7 +38,7 @@ trait GroupingSet extends Expression with CodegenFallback {
   override def eval(input: InternalRow): Any = throw new UnsupportedOperationException
 }
 
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the specified columns
@@ -47,19 +47,19 @@ trait GroupingSet extends Expression with CodegenFallback {
   examples = """
     Examples:
       > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
-        NULL    2       1
-        NULL    NULL    2
-        Alice   2       1
-        Bob     5       1
-        NULL    5       1
-        Bob     NULL    1
-        Alice   NULL    1
+        Bob	5	1
+        Alice	2	1
+        NULL	NULL	2
+        NULL	5	1
+        Bob	NULL	1
+        Alice	NULL	1
+        NULL	2	1
   """,
   since = "2.0.0")
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the specified columns
@@ -68,21 +68,21 @@ case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
   examples = """
     Examples:
       > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
-        NULL    NULL    2
-        Alice   2       1
-        Bob     5       1
-        Bob     NULL    1
-        Alice   NULL    1
+        Bob	5	1
+        Alice	2	1
+        NULL	NULL	2
+        Bob	NULL	1
+        Alice	NULL	1
   """,
   since = "2.0.0")
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
 /**
  * Indicates whether a specified column expression in a GROUP BY list is aggregated or not.
  * GROUPING returns 1 for aggregated or 0 for not aggregated in the result set.
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_(col) - indicates whether a specified column in a GROUP BY is aggregated or
@@ -91,12 +91,12 @@ case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
   examples = """
     Examples:
       > SELECT name, _FUNC_(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);
-        Alice   0       2
-        NULL    1       7
-        Bob     0       5
+        Bob	0	5
+        Alice	0	2
+        NULL	1	7
   """,
   since = "2.0.0")
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Grouping(child: Expression) extends Expression with Unevaluable {
   @transient
   override lazy val references: AttributeSet =
@@ -111,7 +111,7 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
  *
  * If groupByExprs is empty, it means all grouping expressions in GroupingSets.
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_([col1[, col2 ..]]) - returns the level of grouping, equals to
@@ -120,20 +120,20 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
   examples = """
     Examples:
       > SELECT name, _FUNC_(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);
-        NULL    2       2       165.0
-        Alice   0       2       165.0
-        NULL    2       5       180.0
-        NULL    3       7       172.5
-        Bob     0       5       180.0
-        Bob     1       5       180.0
-        Alice   1       2       165.0
+        NULL	2	5	180.0
+        Alice	0	2	165.0
+        NULL	3	7	172.5
+        NULL	2	2	165.0
+        Bob	1	5	180.0
+        Alice	1	2	165.0
+        Bob	0	5	180.0
   """,
   note = """
     Input columns should match with grouping columns exactly, or empty (means all the grouping
     columns).
   """,
   since = "2.0.0")
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable {
   @transient
   override lazy val references: AttributeSet =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 2aa1e6cc518cb..52429a63b306d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.hash.Murmur3_x86_32
@@ -282,6 +282,7 @@ abstract class HashExpression[E] extends Expression {
     }
 
     val hashResultType = CodeGenerator.javaType(dataType)
+    val typedSeed = if (dataType.sameType(LongType)) s"${seed}L" else s"$seed"
     val codes = ctx.splitExpressionsWithCurrentInputs(
       expressions = childrenHash,
       funcName = "computeHash",
@@ -296,7 +297,7 @@ abstract class HashExpression[E] extends Expression {
 
     ev.copy(code =
       code"""
-         |$hashResultType ${ev.value} = $seed;
+         |$hashResultType ${ev.value} = $typedSeed;
          |$codes
        """.stripMargin)
   }
@@ -495,7 +496,7 @@ abstract class InterpretedHashFunction {
           val bytes = d.toJavaBigDecimal.unscaledValue().toByteArray
           hashUnsafeBytes(bytes, Platform.BYTE_ARRAY_OFFSET, bytes.length, seed)
         }
-      case c: CalendarInterval => hashInt(c.months, hashLong(c.microseconds, seed))
+      case c: CalendarInterval => hashInt(c.months, hashInt(c.days, hashLong(c.microseconds, seed)))
       case a: Array[Byte] =>
         hashUnsafeBytes(a, Platform.BYTE_ARRAY_OFFSET, a.length, seed)
       case s: UTF8String =>
@@ -902,12 +903,11 @@ object HiveHashFunction extends InterpretedHashFunction {
    *   with nanosecond values will lead to wrong output hashes (ie. non adherent with Hive output)
    */
   def hashCalendarInterval(calendarInterval: CalendarInterval): Long = {
-    val totalSeconds = calendarInterval.microseconds / CalendarInterval.MICROS_PER_SECOND.toInt
+    val totalMicroSeconds = calendarInterval.days * MICROS_PER_DAY + calendarInterval.microseconds
+    val totalSeconds = totalMicroSeconds / MICROS_PER_SECOND.toInt
     val result: Int = (17 * 37) + (totalSeconds ^ totalSeconds >> 32).toInt
 
-    val nanoSeconds =
-      (calendarInterval.microseconds -
-        (totalSeconds * CalendarInterval.MICROS_PER_SECOND.toInt)).toInt * 1000
+    val nanoSeconds = (totalMicroSeconds - (totalSeconds * MICROS_PER_SECOND.toInt)).toInt * 1000
      (result * 37) + nanoSeconds
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index ed26bb375de25..f8142d6b993cb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.util.Comparator
 import java.util.concurrent.atomic.AtomicReference
 
 import scala.collection.mutable
@@ -285,6 +286,113 @@ case class ArrayTransform(
   override def prettyName: String = "transform"
 }
 
+/**
+ * Sorts elements in an array using a comparator function.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """_FUNC_(expr, func) - Sorts the input array. If func is omitted, sort
+    in ascending order. The elements of the input array must be orderable. Null elements
+    will be placed at the end of the returned array. Since 3.0.0 this function also sorts
+    and returns the array based on the given comparator function. The comparator will
+    take two arguments representing two elements of the array.
+    It returns -1, 0, or 1 as the first element is less than, equal to, or greater
+    than the second element. If the comparator function returns other
+    values (including null), the function will fail and raise an error.
+    """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end);
+       [1,5,6]
+      > SELECT _FUNC_(array('bc', 'ab', 'dc'), (left, right) -> case when left is null and right is null then 0 when left is null then -1 when right is null then 1 when left < right then 1 when left > right then -1 else 0 end);
+       ["dc","bc","ab"]
+      > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
+       ["a","b","c","d",null]
+  """,
+  since = "2.4.0")
+// scalastyle:on line.size.limit
+case class ArraySort(
+    argument: Expression,
+    function: Expression)
+  extends ArrayBasedSimpleHigherOrderFunction with CodegenFallback {
+
+  def this(argument: Expression) = this(argument, ArraySort.defaultComparator)
+
+  @transient lazy val elementType: DataType =
+    argument.dataType.asInstanceOf[ArrayType].elementType
+
+  override def dataType: ArrayType = argument.dataType.asInstanceOf[ArrayType]
+  override def checkInputDataTypes(): TypeCheckResult = {
+    checkArgumentDataTypes() match {
+      case TypeCheckResult.TypeCheckSuccess =>
+        argument.dataType match {
+          case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+            if (function.dataType == IntegerType) {
+              TypeCheckResult.TypeCheckSuccess
+            } else {
+              TypeCheckResult.TypeCheckFailure("Return type of the given function has to be " +
+                "IntegerType")
+            }
+          case ArrayType(dt, _) =>
+            val dtSimple = dt.catalogString
+            TypeCheckResult.TypeCheckFailure(
+              s"$prettyName does not support sorting array of type $dtSimple which is not " +
+                "orderable")
+          case _ =>
+            TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.")
+        }
+      case failure => failure
+    }
+  }
+
+  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArraySort = {
+    val ArrayType(elementType, containsNull) = argument.dataType
+        copy(function =
+          f(function, (elementType, containsNull) :: (elementType, containsNull) :: Nil))
+  }
+
+  @transient lazy val LambdaFunction(_,
+    Seq(firstElemVar: NamedLambdaVariable, secondElemVar: NamedLambdaVariable), _) = function
+
+  def comparator(inputRow: InternalRow): Comparator[Any] = {
+    val f = functionForEval
+    (o1: Any, o2: Any) => {
+      firstElemVar.value.set(o1)
+      secondElemVar.value.set(o2)
+      f.eval(inputRow).asInstanceOf[Int]
+    }
+  }
+
+  override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
+    val arr = argumentValue.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
+    if (elementType != NullType) {
+      java.util.Arrays.sort(arr, comparator(inputRow))
+    }
+    new GenericArrayData(arr.asInstanceOf[Array[Any]])
+  }
+
+  override def prettyName: String = "array_sort"
+}
+
+object ArraySort {
+
+  def comparator(left: Expression, right: Expression): Expression = {
+    val lit0 = Literal(0)
+    val lit1 = Literal(1)
+    val litm1 = Literal(-1)
+
+    If(And(IsNull(left), IsNull(right)), lit0,
+      If(IsNull(left), lit1, If(IsNull(right), litm1,
+        If(LessThan(left, right), litm1, If(GreaterThan(left, right), lit1, lit0)))))
+  }
+
+  val defaultComparator: LambdaFunction = {
+    val left = UnresolvedNamedLambdaVariable(Seq("left"))
+    val right = UnresolvedNamedLambdaVariable(Seq("right"))
+    LambdaFunction(comparator(left, right), Seq(left, right))
+  }
+}
+
 /**
  * Filters entries in a map using the provided function.
  */
@@ -344,8 +452,13 @@ case class MapFilter(
     Examples:
       > SELECT _FUNC_(array(1, 2, 3), x -> x % 2 == 1);
        [1,3]
+      > SELECT _FUNC_(array(0, 2, 3), (x, i) -> x > i);
+       [2,3]
   """,
-  since = "2.4.0")
+  since = "2.4.0",
+  note = """
+    The inner function may use the index argument since 3.0.0.
+  """)
 case class ArrayFilter(
     argument: Expression,
     function: Expression)
@@ -357,10 +470,19 @@ case class ArrayFilter(
 
   override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayFilter = {
     val ArrayType(elementType, containsNull) = argument.dataType
-    copy(function = f(function, (elementType, containsNull) :: Nil))
+    function match {
+      case LambdaFunction(_, arguments, _) if arguments.size == 2 =>
+        copy(function = f(function, (elementType, containsNull) :: (IntegerType, false) :: Nil))
+      case _ =>
+        copy(function = f(function, (elementType, containsNull) :: Nil))
+    }
   }
 
-  @transient lazy val LambdaFunction(_, Seq(elementVar: NamedLambdaVariable), _) = function
+  @transient lazy val (elementVar, indexVar) = {
+    val LambdaFunction(_, (elementVar: NamedLambdaVariable) +: tail, _) = function
+    val indexVar = tail.headOption.map(_.asInstanceOf[NamedLambdaVariable])
+    (elementVar, indexVar)
+  }
 
   override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
     val arr = argumentValue.asInstanceOf[ArrayData]
@@ -369,6 +491,9 @@ case class ArrayFilter(
     var i = 0
     while (i < arr.numElements) {
       elementVar.value.set(arr.get(i, elementVar.dataType))
+      if (indexVar.isDefined) {
+        indexVar.get.value.set(i)
+      }
       if (f.eval(inputRow).asInstanceOf[Boolean]) {
         buffer += elementVar.value.get
       }
@@ -463,7 +588,7 @@ case class ArrayExists(
       > SELECT _FUNC_(array(1, null, 3), x -> x % 2 == 0);
        false
       > SELECT _FUNC_(array(2, null, 8), x -> x % 2 == 0);
-       null
+       NULL
   """,
   since = "3.0.0")
 case class ArrayForAll(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
new file mode 100644
index 0000000000000..831510e7f0f3e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.util.Locale
+
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.util.IntervalUtils
+import org.apache.spark.sql.catalyst.util.IntervalUtils._
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+
+abstract class ExtractIntervalPart(
+    child: Expression,
+    val dataType: DataType,
+    func: CalendarInterval => Any,
+    funcName: String)
+  extends UnaryExpression with ExpectsInputTypes with Serializable {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(CalendarIntervalType)
+
+  override protected def nullSafeEval(interval: Any): Any = {
+    func(interval.asInstanceOf[CalendarInterval])
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val iu = IntervalUtils.getClass.getName.stripSuffix("$")
+    defineCodeGen(ctx, ev, c => s"$iu.$funcName($c)")
+  }
+}
+
+case class ExtractIntervalMillenniums(child: Expression)
+  extends ExtractIntervalPart(child, IntegerType, getMillenniums, "getMillenniums")
+
+case class ExtractIntervalCenturies(child: Expression)
+  extends ExtractIntervalPart(child, IntegerType, getCenturies, "getCenturies")
+
+case class ExtractIntervalDecades(child: Expression)
+  extends ExtractIntervalPart(child, IntegerType, getDecades, "getDecades")
+
+case class ExtractIntervalYears(child: Expression)
+  extends ExtractIntervalPart(child, IntegerType, getYears, "getYears")
+
+case class ExtractIntervalQuarters(child: Expression)
+  extends ExtractIntervalPart(child, ByteType, getQuarters, "getQuarters")
+
+case class ExtractIntervalMonths(child: Expression)
+  extends ExtractIntervalPart(child, ByteType, getMonths, "getMonths")
+
+case class ExtractIntervalDays(child: Expression)
+  extends ExtractIntervalPart(child, IntegerType, getDays, "getDays")
+
+case class ExtractIntervalHours(child: Expression)
+  extends ExtractIntervalPart(child, LongType, getHours, "getHours")
+
+case class ExtractIntervalMinutes(child: Expression)
+  extends ExtractIntervalPart(child, ByteType, getMinutes, "getMinutes")
+
+case class ExtractIntervalSeconds(child: Expression)
+  extends ExtractIntervalPart(child, DecimalType(8, 6), getSeconds, "getSeconds")
+
+case class ExtractIntervalMilliseconds(child: Expression)
+  extends ExtractIntervalPart(child, DecimalType(8, 3), getMilliseconds, "getMilliseconds")
+
+case class ExtractIntervalMicroseconds(child: Expression)
+  extends ExtractIntervalPart(child, LongType, getMicroseconds, "getMicroseconds")
+
+// Number of seconds in 10000 years is 315576000001 (30 days per one month)
+// which is 12 digits + 6 digits for the fractional part of seconds.
+case class ExtractIntervalEpoch(child: Expression)
+  extends ExtractIntervalPart(child, DecimalType(18, 6), getEpoch, "getEpoch")
+
+object ExtractIntervalPart {
+
+  def parseExtractField(
+      extractField: String,
+      source: Expression,
+      errorHandleFunc: => Nothing): Expression = extractField.toUpperCase(Locale.ROOT) match {
+    case "MILLENNIUM" | "MILLENNIA" | "MIL" | "MILS" => ExtractIntervalMillenniums(source)
+    case "CENTURY" | "CENTURIES" | "C" | "CENT" => ExtractIntervalCenturies(source)
+    case "DECADE" | "DECADES" | "DEC" | "DECS" => ExtractIntervalDecades(source)
+    case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => ExtractIntervalYears(source)
+    case "QUARTER" | "QTR" => ExtractIntervalQuarters(source)
+    case "MONTH" | "MON" | "MONS" | "MONTHS" => ExtractIntervalMonths(source)
+    case "DAY" | "D" | "DAYS" => ExtractIntervalDays(source)
+    case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => ExtractIntervalHours(source)
+    case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" => ExtractIntervalMinutes(source)
+    case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => ExtractIntervalSeconds(source)
+    case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" =>
+      ExtractIntervalMilliseconds(source)
+    case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" =>
+      ExtractIntervalMicroseconds(source)
+    case "EPOCH" => ExtractIntervalEpoch(source)
+    case _ => errorHandleFunc
+  }
+}
+
+abstract class IntervalNumOperation(
+    interval: Expression,
+    num: Expression,
+    operation: (CalendarInterval, Double) => CalendarInterval,
+    operationName: String)
+  extends BinaryExpression with ImplicitCastInputTypes with Serializable {
+  override def left: Expression = interval
+  override def right: Expression = num
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(CalendarIntervalType, DoubleType)
+  override def dataType: DataType = CalendarIntervalType
+
+  override def nullable: Boolean = true
+
+  override def nullSafeEval(interval: Any, num: Any): Any = {
+    operation(interval.asInstanceOf[CalendarInterval], num.asInstanceOf[Double])
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val iu = IntervalUtils.getClass.getName.stripSuffix("$")
+    defineCodeGen(ctx, ev, (interval, num) => s"$iu.$operationName($interval, $num)")
+  }
+
+  override def prettyName: String = operationName.stripSuffix("Exact") + "_interval"
+}
+
+case class MultiplyInterval(interval: Expression, num: Expression)
+  extends IntervalNumOperation(interval, num, multiplyExact, "multiplyExact")
+
+case class DivideInterval(interval: Expression, num: Expression)
+  extends IntervalNumOperation(interval, num, divideExact, "divideExact")
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(years, months, weeks, days, hours, mins, secs) - Make interval from years, months, weeks, days, hours, mins and secs.",
+  arguments = """
+    Arguments:
+      * years - the number of years, positive or negative
+      * months - the number of months, positive or negative
+      * weeks - the number of weeks, positive or negative
+      * days - the number of days, positive or negative
+      * hours - the number of hours, positive or negative
+      * mins - the number of minutes, positive or negative
+      * secs - the number of seconds with the fractional part in microsecond precision.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(100, 11, 1, 1, 12, 30, 01.001001);
+       100 years 11 months 8 days 12 hours 30 minutes 1.001001 seconds
+      > SELECT _FUNC_(100, null, 3);
+       NULL
+  """,
+  since = "3.0.0")
+// scalastyle:on line.size.limit
+case class MakeInterval(
+    years: Expression,
+    months: Expression,
+    weeks: Expression,
+    days: Expression,
+    hours: Expression,
+    mins: Expression,
+    secs: Expression)
+  extends SeptenaryExpression with ImplicitCastInputTypes {
+
+  def this(
+      years: Expression,
+      months: Expression,
+      weeks: Expression,
+      days: Expression,
+      hours: Expression,
+      mins: Expression) = {
+    this(years, months, weeks, days, hours, mins, Literal(Decimal(0, 8, 6)))
+  }
+  def this(
+      years: Expression,
+      months: Expression,
+      weeks: Expression,
+      days: Expression,
+      hours: Expression) = {
+    this(years, months, weeks, days, hours, Literal(0))
+  }
+  def this(years: Expression, months: Expression, weeks: Expression, days: Expression) =
+    this(years, months, weeks, days, Literal(0))
+  def this(years: Expression, months: Expression, weeks: Expression) =
+    this(years, months, weeks, Literal(0))
+  def this(years: Expression, months: Expression) = this(years, months, Literal(0))
+  def this(years: Expression) = this(years, Literal(0))
+  def this() = this(Literal(0))
+
+  override def children: Seq[Expression] = Seq(years, months, weeks, days, hours, mins, secs)
+  // Accept `secs` as DecimalType to avoid loosing precision of microseconds while converting
+  // them to the fractional part of `secs`.
+  override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType,
+    IntegerType, IntegerType, IntegerType, DecimalType(8, 6))
+  override def dataType: DataType = CalendarIntervalType
+  override def nullable: Boolean = true
+
+  override def nullSafeEval(
+      year: Any,
+      month: Any,
+      week: Any,
+      day: Any,
+      hour: Any,
+      min: Any,
+      sec: Option[Any]): Any = {
+    try {
+      IntervalUtils.makeInterval(
+        year.asInstanceOf[Int],
+        month.asInstanceOf[Int],
+        week.asInstanceOf[Int],
+        day.asInstanceOf[Int],
+        hour.asInstanceOf[Int],
+        min.asInstanceOf[Int],
+        sec.map(_.asInstanceOf[Decimal]).getOrElse(Decimal(0, 8, 6)))
+    } catch {
+      case _: ArithmeticException => null
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    nullSafeCodeGen(ctx, ev, (year, month, week, day, hour, min, sec) => {
+      val iu = IntervalUtils.getClass.getName.stripSuffix("$")
+      val secFrac = sec.getOrElse("0")
+      s"""
+        try {
+          ${ev.value} = $iu.makeInterval($year, $month, $week, $day, $hour, $min, $secFrac);
+        } catch (java.lang.ArithmeticException e) {
+          ${ev.isNull} = true;
+        }
+      """
+    })
+  }
+
+  override def prettyName: String = "make_interval"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 655e44e4e4919..61afdb6c9492f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -22,6 +22,7 @@ import java.io._
 import scala.util.parsing.combinator.RegexParsers
 
 import com.fasterxml.jackson.core._
+import com.fasterxml.jackson.core.json.JsonReadFeature
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
@@ -99,10 +100,11 @@ private[this] object JsonPathParser extends RegexParsers {
 }
 
 private[this] object SharedFactory {
-  val jsonFactory = new JsonFactory()
-
-  // Enabled for Hive compatibility
-  jsonFactory.enable(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS)
+  val jsonFactory = new JsonFactoryBuilder()
+    // The two options below enabled for Hive compatibility
+    .enable(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS)
+    .enable(JsonReadFeature.ALLOW_SINGLE_QUOTES)
+    .build()
 }
 
 /**
@@ -331,15 +333,15 @@ case class GetJsonObject(json: Expression, path: Expression)
   }
 }
 
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.",
   examples = """
     Examples:
       > SELECT _FUNC_('{"a":1, "b":2}', 'a', 'b');
-       1  2
+       1	2
   """)
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class JsonTuple(children: Seq[Expression])
   extends Generator with CodegenFallback {
 
@@ -502,9 +504,9 @@ case class JsonTuple(children: Seq[Expression])
   examples = """
     Examples:
       > SELECT _FUNC_('{"a":1, "b":0.8}', 'a INT, b DOUBLE');
-       {"a":1, "b":0.8}
+       {"a":1,"b":0.8}
       > SELECT _FUNC_('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
-       {"time":"2015-08-26 00:00:00.0"}
+       {"time":2015-08-26 00:00:00}
   """,
   since = "2.2.0")
 // scalastyle:on line.size.limit
@@ -515,12 +517,10 @@ case class JsonToStructs(
     timeZoneId: Option[String] = None)
   extends UnaryExpression with TimeZoneAwareExpression with CodegenFallback with ExpectsInputTypes {
 
-  val forceNullableSchema = SQLConf.get.getConf(SQLConf.FROM_JSON_FORCE_NULLABLE_SCHEMA)
-
   // The JSON input data might be missing certain fields. We force the nullability
   // of the user-provided schema to avoid data corruptions. In particular, the parquet-mr encoder
   // can generate incorrect files if values are missing in columns declared as non-nullable.
-  val nullableSchema = if (forceNullableSchema) schema.asNullable else schema
+  val nullableSchema = schema.asNullable
 
   override def nullable: Boolean = true
 
@@ -694,10 +694,8 @@ case class StructsToJson(
           TypeCheckResult.TypeCheckFailure(e.getMessage)
       }
     case map: MapType =>
-      // TODO: let `JacksonUtils.verifySchema` verify a `MapType`
       try {
-        val st = StructType(StructField("a", map) :: Nil)
-        JacksonUtils.verifySchema(st)
+        JacksonUtils.verifyType(prettyName, map)
         TypeCheckResult.TypeCheckSuccess
       } catch {
         case e: UnsupportedOperationException =>
@@ -758,11 +756,7 @@ case class SchemaOfJson(
   private lazy val jsonOptions = new JSONOptions(options, "UTC")
 
   @transient
-  private lazy val jsonFactory = {
-    val factory = new JsonFactory()
-    jsonOptions.setJacksonOptions(factory)
-    factory
-  }
+  private lazy val jsonFactory = jsonOptions.buildJsonFactory()
 
   @transient
   private lazy val jsonInferSchema = new JsonInferSchema(jsonOptions)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 9cef3ecadc543..213a58a3244e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -63,9 +63,12 @@ object Literal {
     case s: String => Literal(UTF8String.fromString(s), StringType)
     case c: Char => Literal(UTF8String.fromString(c.toString), StringType)
     case b: Boolean => Literal(b, BooleanType)
-    case d: BigDecimal => Literal(Decimal(d), DecimalType.fromBigDecimal(d))
+    case d: BigDecimal =>
+      val decimal = Decimal(d)
+      Literal(decimal, DecimalType.fromDecimal(decimal))
     case d: JavaBigDecimal =>
-      Literal(Decimal(d), DecimalType(Math.max(d.precision, d.scale), d.scale()))
+      val decimal = Decimal(d)
+      Literal(decimal, DecimalType.fromDecimal(decimal))
     case d: Decimal => Literal(d, DecimalType(Math.max(d.precision, d.scale), d.scale))
     case i: Instant => Literal(instantToMicros(i), TimestampType)
     case t: Timestamp => Literal(DateTimeUtils.fromJavaTimestamp(t), TimestampType)
@@ -162,7 +165,7 @@ object Literal {
     case TimestampType => create(0L, TimestampType)
     case StringType => Literal("")
     case BinaryType => Literal("".getBytes(StandardCharsets.UTF_8))
-    case CalendarIntervalType => Literal(new CalendarInterval(0, 0))
+    case CalendarIntervalType => Literal(new CalendarInterval(0, 0, 0))
     case arr: ArrayType => create(Array(), arr)
     case map: MapType => create(Map(), map)
     case struct: StructType =>
@@ -223,6 +226,26 @@ object NonNullLiteral {
   }
 }
 
+/**
+ * Extractor for retrieving Float literals.
+ */
+object FloatLiteral {
+  def unapply(a: Any): Option[Float] = a match {
+    case Literal(a: Float, FloatType) => Some(a)
+    case _ => None
+  }
+}
+
+/**
+ * Extractor for retrieving Double literals.
+ */
+object DoubleLiteral {
+  def unapply(a: Any): Option[Double] = a match {
+    case Literal(a: Double, DoubleType) => Some(a)
+    case _ => None
+  }
+}
+
 /**
  * Extractor for retrieving Int literals.
  */
@@ -233,6 +256,16 @@ object IntegerLiteral {
   }
 }
 
+/**
+ * Extractor for retrieving String literals.
+ */
+object StringLiteral {
+  def unapply(a: Any): Option[String] = a match {
+    case Literal(s: UTF8String, StringType) => Some(s.toString)
+    case _ => None
+  }
+}
+
 /**
  * Extractor for and other utility methods for decimal literals.
  */
@@ -360,7 +393,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
         case _ if v.isNaN => "'NaN'"
         case Float.PositiveInfinity => "'Infinity'"
         case Float.NegativeInfinity => "'-Infinity'"
-        case _ => v
+        case _ => s"'$v'"
       }
       s"CAST($castedValue AS ${FloatType.sql})"
     case (v: Double, DoubleType) =>
@@ -371,11 +404,15 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
         case _ => v + "D"
       }
     case (v: Decimal, t: DecimalType) => v + "BD"
-    case (v: Int, DateType) => s"DATE '${DateFormatter().format(v)}'"
+    case (v: Int, DateType) =>
+      val formatter = DateFormatter(DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
+      s"DATE '${formatter.format(v)}'"
     case (v: Long, TimestampType) =>
       val formatter = TimestampFormatter.getFractionFormatter(
         DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
-      s"TIMESTAMP('${formatter.format(v)}')"
+      s"TIMESTAMP '${formatter.format(v)}'"
+    case (i: CalendarInterval, CalendarIntervalType) =>
+      s"INTERVAL '${i.toString}'"
     case (v: Array[Byte], BinaryType) => s"X'${DatatypeConverter.printHexBinary(v)}'"
     case _ => value.toString
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index b9f089ec056c2..d5b959b91c23d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1291,7 +1291,7 @@ abstract class RoundBase(child: Expression, scale: Expression,
   examples = """
     Examples:
       > SELECT _FUNC_(2.5, 0);
-       3.0
+       3
   """)
 // scalastyle:on line.size.limit
 case class Round(child: Expression, scale: Expression)
@@ -1311,7 +1311,7 @@ case class Round(child: Expression, scale: Expression)
   examples = """
     Examples:
       > SELECT _FUNC_(2.5, 0);
-       2.0
+       2
   """)
 // scalastyle:on line.size.limit
 case class BRound(child: Expression, scale: Expression)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 2af2b13ad77f5..f576873829f27 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.util.UUID
-
+import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
@@ -164,3 +163,38 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Sta
 
   override def freshCopy(): Uuid = Uuid(randomSeed)
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """_FUNC_() - Returns the Spark version. The string contains 2 fields, the first being a release version and the second being a git revision.""",
+  since = "3.0.0")
+// scalastyle:on line.size.limit
+case class SparkVersion() extends LeafExpression with CodegenFallback {
+  override def nullable: Boolean = false
+  override def foldable: Boolean = true
+  override def dataType: DataType = StringType
+  override def eval(input: InternalRow): Any = {
+    UTF8String.fromString(SPARK_VERSION_SHORT + " " + SPARK_REVISION)
+  }
+}
+
+@ExpressionDescription(
+  usage = """_FUNC_(expr) - Return DDL-formatted type string for the data type of the input.""",
+  examples = """
+      Examples:
+      > SELECT _FUNC_(1);
+       int
+      > SELECT _FUNC_(array(1));
+       array<int>
+  """,
+  since = "3.0.0")
+case class TypeOf(child: Expression) extends UnaryExpression {
+  override def nullable: Boolean = false
+  override def foldable: Boolean = true
+  override def dataType: DataType = StringType
+  override def eval(input: InternalRow): Any = UTF8String.fromString(child.dataType.catalogString)
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, _ => s"""UTF8String.fromString(${child.dataType.catalogString})""")
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 3362353e2662a..02e90f8458c3d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -236,8 +236,6 @@ case class AttributeReference(
     val qualifier: Seq[String] = Seq.empty[String])
   extends Attribute with Unevaluable {
 
-  // currently can only handle qualifier of length 2
-  require(qualifier.length <= 2)
   /**
    * Returns true iff the expression id is the same for both attributes.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 293d28e93039a..f54d5f167856c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -354,12 +354,14 @@ case class IsNotNull(child: Expression) extends UnaryExpression with Predicate {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval = child.genCode(ctx)
-    val value = eval.isNull match {
-      case TrueLiteral => FalseLiteral
-      case FalseLiteral => TrueLiteral
-      case v => JavaCode.isNullExpression(s"!$v")
+    val (value, newCode) = eval.isNull match {
+      case TrueLiteral => (FalseLiteral, EmptyBlock)
+      case FalseLiteral => (TrueLiteral, EmptyBlock)
+      case v =>
+        val value = ctx.freshName("value")
+        (JavaCode.variable(value, BooleanType), code"boolean $value = !$v;")
     }
-    ExprCode(code = eval.code, isNull = FalseLiteral, value = value)
+    ExprCode(code = eval.code + newCode, isNull = FalseLiteral, value = value)
   }
 
   override def sql: String = s"(${child.sql} IS NOT NULL)"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 871aba67cf132..54abd09d89ddb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -916,7 +916,7 @@ case class MapObjects private(
                ${classOf[Builder[_, _]].getName} $builder = $getBuilder;
                $builder.sizeHint($dataLength);
              """,
-            genValue => s"$builder.$$plus$$eq($genValue);",
+            (genValue: String) => s"$builder.$$plus$$eq($genValue);",
             s"(${cls.getName}) $builder.result();"
           )
         case Some(cls) if classOf[java.util.List[_]].isAssignableFrom(cls) =>
@@ -930,7 +930,7 @@ case class MapObjects private(
               val param = Try(cls.getConstructor(Integer.TYPE)).map(_ => dataLength).getOrElse("")
               s"${cls.getName} $builder = new ${cls.getName}($param);"
             },
-            genValue => s"$builder.add($genValue);",
+            (genValue: String) => s"$builder.add($genValue);",
             s"$builder;"
           )
         case None =>
@@ -940,7 +940,7 @@ case class MapObjects private(
                $convertedType[] $convertedArray = null;
                $convertedArray = $arrayConstructor;
              """,
-            genValue => s"$convertedArray[$loopIndex] = $genValue;",
+            (genValue: String) => s"$convertedArray[$loopIndex] = $genValue;",
             s"new ${classOf[GenericArrayData].getName}($convertedArray);"
           )
       }
@@ -1692,7 +1692,7 @@ case class ValidateExternalType(child: Expression, expected: DataType)
 
   override val dataType: DataType = RowEncoder.externalDataTypeForInput(expected)
 
-  private val errMsg = s" is not a valid external type for schema of ${expected.catalogString}"
+  private lazy val errMsg = s" is not a valid external type for schema of ${expected.simpleString}"
 
   private lazy val checkType: (Any) => Boolean = expected match {
     case _: DecimalType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala
index c8d667143f452..fa2978cddcaae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ordering.scala
@@ -19,18 +19,28 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering
 import org.apache.spark.sql.types._
 
 
+/**
+ * A base class for generated/interpreted row ordering.
+ */
+class BaseOrdering extends Ordering[InternalRow] {
+  def compare(a: InternalRow, b: InternalRow): Int = {
+    throw new UnsupportedOperationException
+  }
+}
+
 /**
  * An interpreted row ordering comparator.
  */
-class InterpretedOrdering(ordering: Seq[SortOrder]) extends Ordering[InternalRow] {
+class InterpretedOrdering(ordering: Seq[SortOrder]) extends BaseOrdering {
 
   def this(ordering: Seq[SortOrder], inputSchema: Seq[Attribute]) =
     this(bindReferences(ordering, inputSchema))
 
-  def compare(a: InternalRow, b: InternalRow): Int = {
+  override def compare(a: InternalRow, b: InternalRow): Int = {
     var i = 0
     val size = ordering.size
     while (i < size) {
@@ -67,7 +77,7 @@ class InterpretedOrdering(ordering: Seq[SortOrder]) extends Ordering[InternalRow
       }
       i += 1
     }
-    return 0
+    0
   }
 }
 
@@ -83,7 +93,7 @@ object InterpretedOrdering {
   }
 }
 
-object RowOrdering {
+object RowOrdering extends CodeGeneratorWithInterpretedFallback[Seq[SortOrder], BaseOrdering] {
 
   /**
    * Returns true iff the data type can be ordered (i.e. can be sorted).
@@ -101,4 +111,26 @@ object RowOrdering {
    * Returns true iff outputs from the expressions can be ordered.
    */
   def isOrderable(exprs: Seq[Expression]): Boolean = exprs.forall(e => isOrderable(e.dataType))
+
+  override protected def createCodeGeneratedObject(in: Seq[SortOrder]): BaseOrdering = {
+    GenerateOrdering.generate(in)
+  }
+
+  override protected def createInterpretedObject(in: Seq[SortOrder]): BaseOrdering = {
+    new InterpretedOrdering(in)
+  }
+
+  def create(order: Seq[SortOrder], inputSchema: Seq[Attribute]): BaseOrdering = {
+    createObject(bindReferences(order, inputSchema))
+  }
+
+  /**
+   * Creates a row ordering for the given schema, in natural ascending order.
+   */
+  def createNaturalAscendingOrdering(dataTypes: Seq[DataType]): BaseOrdering = {
+    val order: Seq[SortOrder] = dataTypes.zipWithIndex.map {
+      case (dt, index) => SortOrder(BoundReference(index, dt, nullable = true), Ascending)
+    }
+    create(order, Seq.empty)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 7164b6b82adbc..9f42e643e4cb2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -23,7 +23,6 @@ import com.google.common.collect.Maps
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute}
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
@@ -153,13 +152,19 @@ package object expressions  {
       unique(grouped)
     }
 
-    /** Perform attribute resolution given a name and a resolver. */
-    def resolve(nameParts: Seq[String], resolver: Resolver): Option[NamedExpression] = {
+    /** Returns true if all qualifiers in `attrs` have 2 or less parts. */
+    @transient private val hasTwoOrLessQualifierParts: Boolean =
+      attrs.forall(_.qualifier.length <= 2)
+
+    /** Match attributes for the case where all qualifiers in `attrs` have 2 or less parts. */
+    private def matchWithTwoOrLessQualifierParts(
+        nameParts: Seq[String],
+        resolver: Resolver): (Seq[Attribute], Seq[String]) = {
       // Collect matching attributes given a name and a lookup.
       def collectMatches(name: String, candidates: Option[Seq[Attribute]]): Seq[Attribute] = {
-        candidates.toSeq.flatMap(_.collect {
+        candidates.getOrElse(Nil).collect {
           case a if resolver(a.name, name) => a.withName(name)
-        })
+        }
       }
 
       // Find matches for the given name assuming that the 1st two parts are qualifier
@@ -204,13 +209,79 @@ package object expressions  {
 
       // If none of attributes match database.table.column pattern or
       // `table.column` pattern, we try to resolve it as a column.
-      val (candidates, nestedFields) = matches match {
+      matches match {
         case (Seq(), _) =>
           val name = nameParts.head
           val attributes = collectMatches(name, direct.get(name.toLowerCase(Locale.ROOT)))
           (attributes, nameParts.tail)
         case _ => matches
       }
+    }
+
+    /**
+     * Match attributes for the case where at least one qualifier in `attrs` has more than 2 parts.
+     */
+    private def matchWithThreeOrMoreQualifierParts(
+        nameParts: Seq[String],
+        resolver: Resolver): (Seq[Attribute], Seq[String]) = {
+      // Returns true if the `short` qualifier is a subset of the last elements of
+      // `long` qualifier. For example, Seq("a", "b") is a subset of Seq("a", "a", "b"),
+      // but not a subset of Seq("a", "b", "b").
+      def matchQualifier(short: Seq[String], long: Seq[String]): Boolean = {
+        (long.length >= short.length) &&
+          long.takeRight(short.length)
+            .zip(short)
+            .forall(x => resolver(x._1, x._2))
+      }
+
+      // Collect attributes that match the given name and qualifier.
+      // A match occurs if
+      //   1) the given name matches the attribute's name according to the resolver.
+      //   2) the given qualifier is a subset of the attribute's qualifier.
+      def collectMatches(
+          name: String,
+          qualifier: Seq[String],
+          candidates: Option[Seq[Attribute]]): Seq[Attribute] = {
+        candidates.getOrElse(Nil).collect {
+          case a if resolver(name, a.name) && matchQualifier(qualifier, a.qualifier) =>
+            a.withName(name)
+        }
+      }
+
+      // Iterate each string in `nameParts` in a reverse order and try to match the attributes
+      // considering the current string as the attribute name. For example, if `nameParts` is
+      // Seq("a", "b", "c"), the match will be performed in the following order:
+      // 1) name = "c", qualifier = Seq("a", "b")
+      // 2) name = "b", qualifier = Seq("a")
+      // 3) name = "a", qualifier = Seq()
+      // Note that the match is performed in the reverse order in order to match the longest
+      // qualifier as possible. If a match is found, the remaining portion of `nameParts`
+      // is also returned as nested fields.
+      var candidates: Seq[Attribute] = Nil
+      var nestedFields: Seq[String] = Nil
+      var i = nameParts.length - 1
+      while (i >= 0 && candidates.isEmpty) {
+        val name = nameParts(i)
+        candidates = collectMatches(
+          name,
+          nameParts.take(i),
+          direct.get(name.toLowerCase(Locale.ROOT)))
+        if (candidates.nonEmpty) {
+          nestedFields = nameParts.takeRight(nameParts.length - i - 1)
+        }
+        i -= 1
+      }
+
+      (candidates, nestedFields)
+    }
+
+    /** Perform attribute resolution given a name and a resolver. */
+    def resolve(nameParts: Seq[String], resolver: Resolver): Option[NamedExpression] = {
+      val (candidates, nestedFields) = if (hasTwoOrLessQualifierParts) {
+        matchWithTwoOrLessQualifierParts(nameParts, resolver)
+      } else {
+        matchWithThreeOrMoreQualifierParts(nameParts, resolver)
+      }
 
       def name = UnresolvedAttribute(nameParts).name
       candidates match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 7ce113120e4c6..bcd442ad3cc35 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -21,8 +21,9 @@ import scala.collection.immutable.TreeSet
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral, GenerateSafeProjection, GenerateUnsafeProjection, Predicate => BasePredicate}
+import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LeafNode, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.util.TypeUtils
@@ -30,11 +31,18 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
-object InterpretedPredicate {
-  def create(expression: Expression, inputSchema: Seq[Attribute]): InterpretedPredicate =
-    create(BindReferences.bindReference(expression, inputSchema))
+/**
+ * A base class for generated/interpreted predicate
+ */
+abstract class BasePredicate {
+  def eval(r: InternalRow): Boolean
 
-  def create(expression: Expression): InterpretedPredicate = new InterpretedPredicate(expression)
+  /**
+   * Initializes internal states given the current partition index.
+   * This is used by nondeterministic expressions to set initial states.
+   * The default implementation does nothing.
+   */
+  def initialize(partitionIndex: Int): Unit = {}
 }
 
 case class InterpretedPredicate(expression: Expression) extends BasePredicate {
@@ -56,6 +64,35 @@ trait Predicate extends Expression {
   override def dataType: DataType = BooleanType
 }
 
+/**
+ * The factory object for `BasePredicate`.
+ */
+object Predicate extends CodeGeneratorWithInterpretedFallback[Expression, BasePredicate] {
+
+  override protected def createCodeGeneratedObject(in: Expression): BasePredicate = {
+    GeneratePredicate.generate(in)
+  }
+
+  override protected def createInterpretedObject(in: Expression): BasePredicate = {
+    InterpretedPredicate(in)
+  }
+
+  def createInterpreted(e: Expression): InterpretedPredicate = InterpretedPredicate(e)
+
+  /**
+   * Returns a BasePredicate for an Expression, which will be bound to `inputSchema`.
+   */
+  def create(e: Expression, inputSchema: Seq[Attribute]): BasePredicate = {
+    createObject(bindReference(e, inputSchema))
+  }
+
+  /**
+   * Returns a BasePredicate for a given bound Expression.
+   */
+  def create(e: Expression): BasePredicate = {
+    createObject(e)
+  }
+}
 
 trait PredicateHelper {
   protected def splitConjunctivePredicates(condition: Expression): Seq[Expression] = {
@@ -457,17 +494,25 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
           break;
        """)
 
+    val switchCode = if (caseBranches.size > 0) {
+      code"""
+        switch (${valueGen.value}) {
+          ${caseBranches.mkString("\n")}
+          default:
+            ${ev.isNull} = $hasNull;
+        }
+       """
+    } else {
+      s"${ev.isNull} = $hasNull;"
+    }
+
     ev.copy(code =
       code"""
         ${valueGen.code}
         ${CodeGenerator.JAVA_BOOLEAN} ${ev.isNull} = ${valueGen.isNull};
         ${CodeGenerator.JAVA_BOOLEAN} ${ev.value} = false;
         if (!${valueGen.isNull}) {
-          switch (${valueGen.value}) {
-            ${caseBranches.mkString("\n")}
-            default:
-              ${ev.isNull} = $hasNull;
-          }
+          $switchCode
         }
        """)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 9229ef2039fed..3f60ca388a807 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -38,9 +38,10 @@ abstract class StringRegexExpression extends BinaryExpression
   override def dataType: DataType = BooleanType
   override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
 
-  // try cache the pattern for Literal
+  // try cache foldable pattern
   private lazy val cache: Pattern = right match {
-    case x @ Literal(value: String, StringType) => compile(value)
+    case p: Expression if p.foldable =>
+      compile(p.eval().asInstanceOf[UTF8String].toString)
     case _ => null
   }
 
@@ -65,13 +66,13 @@ abstract class StringRegexExpression extends BinaryExpression
   override def sql: String = s"${left.sql} ${prettyName.toUpperCase(Locale.ROOT)} ${right.sql}"
 }
 
-
+// scalastyle:off line.contains.tab
 /**
  * Simple RegEx pattern matching function
  */
 @ExpressionDescription(
-  usage = "str _FUNC_ pattern - Returns true if str matches pattern, " +
-    "null if any arguments are null, false otherwise.",
+  usage = "str _FUNC_ pattern[ ESCAPE escape] - Returns true if str matches `pattern` with " +
+    "`escape`, null if any arguments are null, false otherwise.",
   arguments = """
     Arguments:
       * str - a string expression
@@ -83,33 +84,49 @@ abstract class StringRegexExpression extends BinaryExpression
           % matches zero or more characters in the input (similar to .* in posix regular
           expressions)
 
-          The escape character is '\'. If an escape character precedes a special symbol or another
-          escape character, the following character is matched literally. It is invalid to escape
-          any other character.
-
           Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order
           to match "\abc", the pattern should be "\\abc".
 
           When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks
           to Spark 1.6 behavior regarding string literal parsing. For example, if the config is
           enabled, the pattern to match "\abc" should be "\abc".
+      * escape - an character added since Spark 3.0. The default escape character is the '\'.
+          If an escape character precedes a special symbol or another escape character, the
+          following character is matched literally. It is invalid to escape any other character.
   """,
   examples = """
     Examples:
-      > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%'
+      > SELECT _FUNC_('Spark', '_park');
+      true
+      > SET spark.sql.parser.escapedStringLiterals=true;
+      spark.sql.parser.escapedStringLiterals	true
+      > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%';
+      true
+      > SET spark.sql.parser.escapedStringLiterals=false;
+      spark.sql.parser.escapedStringLiterals	false
+      > SELECT '%SystemDrive%\\Users\\John' _FUNC_ '\%SystemDrive\%\\\\Users%';
+      true
+      > SELECT '%SystemDrive%/Users/John' _FUNC_ '/%SystemDrive/%//Users%' ESCAPE '/';
       true
   """,
   note = """
     Use RLIKE to match with standard regular expressions.
   """,
   since = "1.0.0")
-case class Like(left: Expression, right: Expression) extends StringRegexExpression {
+// scalastyle:on line.contains.tab
+case class Like(left: Expression, right: Expression, escapeChar: Char)
+  extends StringRegexExpression {
+
+  def this(left: Expression, right: Expression) = this(left, right, '\\')
 
-  override def escape(v: String): String = StringUtils.escapeLikeRegex(v)
+  override def escape(v: String): String = StringUtils.escapeLikeRegex(v, escapeChar)
 
   override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches()
 
-  override def toString: String = s"$left LIKE $right"
+  override def toString: String = escapeChar match {
+    case '\\' => s"$left LIKE $right"
+    case c => s"$left LIKE $right ESCAPE '$c'"
+  }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val patternClass = classOf[Pattern].getName
@@ -142,10 +159,14 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
     } else {
       val pattern = ctx.freshName("pattern")
       val rightStr = ctx.freshName("rightStr")
+      // We need to escape the escapeChar to make sure the generated code is valid.
+      // Otherwise we'll hit org.codehaus.commons.compiler.CompileException.
+      val escapedEscapeChar = StringEscapeUtils.escapeJava(escapeChar.toString)
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         s"""
           String $rightStr = $eval2.toString();
-          $patternClass $pattern = $patternClass.compile($escapeFunc($rightStr));
+          $patternClass $pattern = $patternClass.compile(
+            $escapeFunc($rightStr, '$escapedEscapeChar'));
           ${ev.value} = $pattern.matcher($eval1.toString()).matches();
         """
       })
@@ -153,6 +174,7 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
   }
 }
 
+// scalastyle:off line.contains.tab
 @ExpressionDescription(
   usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.",
   arguments = """
@@ -170,18 +192,20 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
   """,
   examples = """
     Examples:
-      When spark.sql.parser.escapedStringLiterals is disabled (default).
-      > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*'
+      > SET spark.sql.parser.escapedStringLiterals=true;
+      spark.sql.parser.escapedStringLiterals	true
+      > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*';
       true
-
-      When spark.sql.parser.escapedStringLiterals is enabled.
-      > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\Users.*'
+      > SET spark.sql.parser.escapedStringLiterals=false;
+      spark.sql.parser.escapedStringLiterals	false
+      > SELECT '%SystemDrive%\\Users\\John' _FUNC_ '%SystemDrive%\\\\Users.*';
       true
   """,
   note = """
     Use LIKE to match with simple string pattern.
   """,
   since = "1.0.0")
+// scalastyle:on line.contains.tab
 case class RLike(left: Expression, right: Expression) extends StringRegexExpression {
 
   override def escape(v: String): String = v
@@ -386,6 +410,15 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
   }
 }
 
+object RegExpExtract {
+  def checkGroupIndex(groupCount: Int, groupIndex: Int): Unit = {
+    if (groupCount < groupIndex) {
+      throw new IllegalArgumentException(
+        s"Regex group count is $groupCount, but the specified group index is $groupIndex")
+    }
+  }
+}
+
 /**
  * Extract a specific(idx) group identified by a Java regex.
  *
@@ -417,7 +450,9 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
     val m = pattern.matcher(s.toString)
     if (m.find) {
       val mr: MatchResult = m.toMatchResult
-      val group = mr.group(r.asInstanceOf[Int])
+      val index = r.asInstanceOf[Int]
+      RegExpExtract.checkGroupIndex(mr.groupCount, index)
+      val group = mr.group(index)
       if (group == null) { // Pattern matched, but not optional group
         UTF8String.EMPTY_UTF8
       } else {
@@ -435,6 +470,7 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val classNamePattern = classOf[Pattern].getCanonicalName
+    val classNameRegExpExtract = classOf[RegExpExtract].getCanonicalName
     val matcher = ctx.freshName("matcher")
     val matchResult = ctx.freshName("matchResult")
 
@@ -458,6 +494,7 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
         $termPattern.matcher($subject.toString());
       if ($matcher.find()) {
         java.util.regex.MatchResult $matchResult = $matcher.toMatchResult();
+        $classNameRegExpExtract.checkGroupIndex($matchResult.groupCount(), $idx);
         if ($matchResult.group($idx) == null) {
           ${ev.value} = UTF8String.EMPTY_UTF8;
         } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index d7a5fb27a3d56..211ae3f02a0d8 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -472,6 +472,19 @@ object Overlay {
     builder.append(input.substringSQL(pos + length, Int.MaxValue))
     builder.build()
   }
+
+  def calculate(input: Array[Byte], replace: Array[Byte], pos: Int, len: Int): Array[Byte] = {
+    // If you specify length, it must be a positive whole number or zero.
+    // Otherwise it will be ignored.
+    // The default value for length is the length of replace.
+    val length = if (len >= 0) {
+      len
+    } else {
+      replace.length
+    }
+    ByteArray.concat(ByteArray.subStringSQL(input, 1, pos - 1),
+      replace, ByteArray.subStringSQL(input, pos + length, Int.MaxValue))
+  }
 }
 
 // scalastyle:off line.size.limit
@@ -487,6 +500,14 @@ object Overlay {
        Spark ANSI SQL
       > SELECT _FUNC_('Spark SQL' PLACING 'tructured' FROM 2 FOR 4);
        Structured SQL
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('_', 'utf-8') FROM 6);
+       Spark_SQL
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('CORE', 'utf-8') FROM 7);
+       Spark CORE
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('ANSI ', 'utf-8') FROM 7 FOR 0);
+       Spark ANSI SQL
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('tructured', 'utf-8') FROM 2 FOR 4);
+       Structured SQL
   """)
 // scalastyle:on line.size.limit
 case class Overlay(input: Expression, replace: Expression, pos: Expression, len: Expression)
@@ -496,19 +517,42 @@ case class Overlay(input: Expression, replace: Expression, pos: Expression, len:
     this(str, replace, pos, Literal.create(-1, IntegerType))
   }
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = input.dataType
 
-  override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringType, StringType, IntegerType, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType),
+    TypeCollection(StringType, BinaryType), IntegerType, IntegerType)
 
   override def children: Seq[Expression] = input :: replace :: pos :: len :: Nil
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val inputTypeCheck = super.checkInputDataTypes()
+    if (inputTypeCheck.isSuccess) {
+      TypeUtils.checkForSameTypeInputExpr(
+        input.dataType :: replace.dataType :: Nil, s"function $prettyName")
+    } else {
+      inputTypeCheck
+    }
+  }
+
+  private lazy val replaceFunc = input.dataType match {
+    case StringType =>
+      (inputEval: Any, replaceEval: Any, posEval: Int, lenEval: Int) => {
+        Overlay.calculate(
+          inputEval.asInstanceOf[UTF8String],
+          replaceEval.asInstanceOf[UTF8String],
+          posEval, lenEval)
+      }
+    case BinaryType =>
+      (inputEval: Any, replaceEval: Any, posEval: Int, lenEval: Int) => {
+        Overlay.calculate(
+          inputEval.asInstanceOf[Array[Byte]],
+          replaceEval.asInstanceOf[Array[Byte]],
+          posEval, lenEval)
+      }
+  }
+
   override def nullSafeEval(inputEval: Any, replaceEval: Any, posEval: Any, lenEval: Any): Any = {
-    val inputStr = inputEval.asInstanceOf[UTF8String]
-    val replaceStr = replaceEval.asInstanceOf[UTF8String]
-    val position = posEval.asInstanceOf[Int]
-    val length = lenEval.asInstanceOf[Int]
-    Overlay.calculate(inputStr, replaceStr, position, length)
+    replaceFunc(inputEval, replaceEval, posEval.asInstanceOf[Int], lenEval.asInstanceOf[Int])
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -803,7 +847,7 @@ object StringTrimLeft {
   usage = """
     _FUNC_(str) - Removes the leading space characters from `str`.
 
-    _FUNC_(trimStr, str) - Removes the leading string contains the characters from the trim string
+    _FUNC_(str, trimStr) - Removes the leading string contains the characters from the trim string
   """,
   arguments = """
     Arguments:
@@ -814,7 +858,7 @@ object StringTrimLeft {
     Examples:
       > SELECT _FUNC_('    SparkSQL   ');
        SparkSQL
-      > SELECT _FUNC_('Sp', 'SSparkSQLS');
+      > SELECT _FUNC_('SparkSQLS', 'Sp');
        arkSQLS
   """,
   since = "1.5.0")
@@ -905,7 +949,7 @@ object StringTrimRight {
   usage = """
     _FUNC_(str) - Removes the trailing space characters from `str`.
 
-    _FUNC_(trimStr, str) - Removes the trailing string which contains the characters from the trim string from the `str`
+    _FUNC_(str, trimStr) - Removes the trailing string which contains the characters from the trim string from the `str`
   """,
   arguments = """
     Arguments:
@@ -916,7 +960,7 @@ object StringTrimRight {
     Examples:
       > SELECT _FUNC_('    SparkSQL   ');
        SparkSQL
-      > SELECT _FUNC_('LQSa', 'SSparkSQLS');
+      > SELECT _FUNC_('SSparkSQLS', 'SQLS');
        SSpark
   """,
   since = "1.5.0")
@@ -1255,11 +1299,11 @@ object ParseUrl {
   usage = "_FUNC_(url, partToExtract[, key]) - Extracts a part from a URL.",
   examples = """
     Examples:
-      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST')
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST');
        spark.apache.org
-      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY')
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY');
        query=1
-      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query')
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query');
        1
   """,
   since = "2.0.0")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index 48f5136d33998..e33cff2f14e17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -62,11 +62,13 @@ abstract class SubqueryExpression(
 
 object SubqueryExpression {
   /**
-   * Returns true when an expression contains an IN or EXISTS subquery and false otherwise.
+   * Returns true when an expression contains an IN or correlated EXISTS subquery
+   * and false otherwise.
    */
-  def hasInOrExistsSubquery(e: Expression): Boolean = {
+  def hasInOrCorrelatedExistsSubquery(e: Expression): Boolean = {
     e.find {
-      case _: ListQuery | _: Exists => true
+      case _: ListQuery => true
+      case _: Exists if e.children.nonEmpty => true
       case _ => false
     }.isDefined
   }
@@ -302,7 +304,10 @@ case class ListQuery(
 }
 
 /**
- * The [[Exists]] expression checks if a row exists in a subquery given some correlated condition.
+ * The [[Exists]] expression checks if a row exists in a subquery given some correlated condition
+ * or some uncorrelated condition.
+ *
+ * 1. correlated condition:
  *
  * For example (SQL):
  * {{{
@@ -312,6 +317,17 @@ case class ListQuery(
  *                   FROM    b
  *                   WHERE   b.id = a.id)
  * }}}
+ *
+ * 2. uncorrelated condition example:
+ *
+ * For example (SQL):
+ * {{{
+ *   SELECT  *
+ *   FROM    a
+ *   WHERE   EXISTS (SELECT  *
+ *                   FROM    b
+ *                   WHERE   b.id > 10)
+ * }}}
  */
 case class Exists(
     plan: LogicalPlan,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 0b674d025d1ac..74df1e7b8e8c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -606,7 +606,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
     zero,
     zero,
     zero,
-    (n / buckets).cast(IntegerType),
+    (n.cast(DecimalType.IntDecimal) / buckets.cast(DecimalType.IntDecimal)).cast(IntegerType),
     (n % buckets).cast(IntegerType)
   )
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index aacf1a44e2ad0..073b45af51caf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -194,7 +194,7 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
   examples = """
     Examples:
       > SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
-       ['b1','b2','b3']
+       ["b1","b2","b3"]
   """)
 // scalastyle:on line.size.limit
 case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
index deceec73dda30..c574a20da0b5b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
@@ -49,19 +49,21 @@ sealed trait IdentifierWithDatabase {
 
 /**
  * Encapsulates an identifier that is either a alias name or an identifier that has table
- * name and optionally a database name.
+ * name and a qualifier.
  * The SubqueryAlias node keeps track of the qualifier using the information in this structure
- * @param identifier - Is an alias name or a table name
- * @param database - Is a database name and is optional
+ * @param name - Is an alias name or a table name
+ * @param qualifier - Is a qualifier
  */
-case class AliasIdentifier(identifier: String, database: Option[String])
-  extends IdentifierWithDatabase {
+case class AliasIdentifier(name: String, qualifier: Seq[String]) {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  def this(identifier: String) = this(identifier, Seq())
 
-  def this(identifier: String) = this(identifier, None)
+  override def toString: String = (qualifier :+ name).quoted
 }
 
 object AliasIdentifier {
-  def apply(identifier: String): AliasIdentifier = new AliasIdentifier(identifier)
+  def apply(name: String): AliasIdentifier = new AliasIdentifier(name)
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index dc26a28c74f11..45c4edff47070 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -21,15 +21,17 @@ import java.nio.charset.{Charset, StandardCharsets}
 import java.time.ZoneId
 import java.util.Locale
 
-import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
+import com.fasterxml.jackson.core.{JsonFactory, JsonFactoryBuilder}
+import com.fasterxml.jackson.core.json.JsonReadFeature
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Options for parsing JSON data into Spark SQL rows.
  *
- * Most of these map directly to Jackson's internal options, specified in [[JsonParser.Feature]].
+ * Most of these map directly to Jackson's internal options, specified in [[JsonReadFeature]].
  */
 private[sql] class JSONOptions(
     @transient val parameters: CaseInsensitiveMap[String],
@@ -76,16 +78,20 @@ private[sql] class JSONOptions(
   // Whether to ignore column of all null values or empty array/struct during schema inference
   val dropFieldIfAllNull = parameters.get("dropFieldIfAllNull").map(_.toBoolean).getOrElse(false)
 
+  // Whether to ignore null fields during json generating
+  val ignoreNullFields = parameters.get("ignoreNullFields").map(_.toBoolean)
+    .getOrElse(SQLConf.get.jsonGeneratorIgnoreNullFields)
+
   // A language tag in IETF BCP 47 format
   val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
 
   val zoneId: ZoneId = DateTimeUtils.getZoneId(
     parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
 
-  val dateFormat: String = parameters.getOrElse("dateFormat", "uuuu-MM-dd")
+  val dateFormat: String = parameters.getOrElse("dateFormat", DateFormatter.defaultPattern)
 
   val timestampFormat: String =
-    parameters.getOrElse("timestampFormat", "uuuu-MM-dd'T'HH:mm:ss.SSSXXX")
+    parameters.getOrElse("timestampFormat", s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX")
 
   val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
 
@@ -124,16 +130,19 @@ private[sql] class JSONOptions(
    */
   val inferTimestamp: Boolean = parameters.get("inferTimestamp").map(_.toBoolean).getOrElse(true)
 
-  /** Sets config options on a Jackson [[JsonFactory]]. */
-  def setJacksonOptions(factory: JsonFactory): Unit = {
-    factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments)
-    factory.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, allowUnquotedFieldNames)
-    factory.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, allowSingleQuotes)
-    factory.configure(JsonParser.Feature.ALLOW_NUMERIC_LEADING_ZEROS, allowNumericLeadingZeros)
-    factory.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, allowNonNumericNumbers)
-    factory.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER,
-      allowBackslashEscapingAnyCharacter)
-    factory.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, allowUnquotedControlChars)
+  /** Build a Jackson [[JsonFactory]] using JSON options. */
+  def buildJsonFactory(): JsonFactory = {
+    new JsonFactoryBuilder()
+      .configure(JsonReadFeature.ALLOW_JAVA_COMMENTS, allowComments)
+      .configure(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES, allowUnquotedFieldNames)
+      .configure(JsonReadFeature.ALLOW_SINGLE_QUOTES, allowSingleQuotes)
+      .configure(JsonReadFeature.ALLOW_LEADING_ZEROS_FOR_NUMBERS, allowNumericLeadingZeros)
+      .configure(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS, allowNonNumericNumbers)
+      .configure(
+        JsonReadFeature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER,
+        allowBackslashEscapingAnyCharacter)
+      .configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS, allowUnquotedControlChars)
+      .build()
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index 3378040d1b640..141360ff02117 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -24,6 +24,7 @@ import com.fasterxml.jackson.core._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.types._
 
 /**
@@ -80,8 +81,13 @@ private[sql] class JacksonGenerator(
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
     options.zoneId,
-    options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
+  private val dateFormatter = DateFormatter(
+    options.dateFormat,
+    options.zoneId,
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
 
   private def makeWriter(dataType: DataType): ValueWriter = dataType match {
     case NullType =>
@@ -130,6 +136,10 @@ private[sql] class JacksonGenerator(
         val dateString = dateFormatter.format(row.getInt(ordinal))
         gen.writeString(dateString)
 
+    case CalendarIntervalType =>
+      (row: SpecializedGetters, ordinal: Int) =>
+        gen.writeString(row.getInterval(ordinal).toString)
+
     case BinaryType =>
       (row: SpecializedGetters, ordinal: Int) =>
         gen.writeBinary(row.getBinary(ordinal))
@@ -181,6 +191,9 @@ private[sql] class JacksonGenerator(
       if (!row.isNullAt(i)) {
         gen.writeFieldName(field.name)
         fieldWriters(i).apply(row, i)
+      } else if (!options.ignoreNullFields) {
+        gen.writeFieldName(field.name)
+        gen.writeNull()
       }
       i += 1
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 19bc5bf3b29e3..1e408cdb126b8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -30,8 +30,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 import org.apache.spark.util.Utils
 
 /**
@@ -52,21 +54,25 @@ class JacksonParser(
   // `ValueConverter`s for the root schema for all fields in the schema
   private val rootConverter = makeRootConverter(schema)
 
-  private val factory = new JsonFactory()
-  options.setJacksonOptions(factory)
+  private val factory = options.buildJsonFactory()
 
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
     options.zoneId,
-    options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
+  private val dateFormatter = DateFormatter(
+    options.dateFormat,
+    options.zoneId,
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
 
   /**
    * Create a converter which converts the JSON documents held by the `JsonParser`
    * to a value according to a desired schema. This is a wrapper for the method
    * `makeConverter()` to handle a row wrapped with an array.
    */
-  private def makeRootConverter(dt: DataType): JsonParser => Seq[InternalRow] = {
+  private def makeRootConverter(dt: DataType): JsonParser => Iterable[InternalRow] = {
     dt match {
       case st: StructType => makeStructRootConverter(st)
       case mt: MapType => makeMapRootConverter(mt)
@@ -74,11 +80,11 @@ class JacksonParser(
     }
   }
 
-  private def makeStructRootConverter(st: StructType): JsonParser => Seq[InternalRow] = {
+  private def makeStructRootConverter(st: StructType): JsonParser => Iterable[InternalRow] = {
     val elementConverter = makeConverter(st)
     val fieldConverters = st.map(_.dataType).map(makeConverter).toArray
-    (parser: JsonParser) => parseJsonToken[Seq[InternalRow]](parser, st) {
-      case START_OBJECT => convertObject(parser, st, fieldConverters) :: Nil
+    (parser: JsonParser) => parseJsonToken[Iterable[InternalRow]](parser, st) {
+      case START_OBJECT => Some(convertObject(parser, st, fieldConverters))
         // SPARK-3308: support reading top level JSON arrays and take every element
         // in such an array as a row
         //
@@ -97,26 +103,26 @@ class JacksonParser(
         // Here, as we support reading top level JSON arrays and take every element
         // in such an array as a row, this case is possible.
         if (array.numElements() == 0) {
-          Nil
+          Array.empty[InternalRow]
         } else {
-          array.toArray[InternalRow](schema).toSeq
+          array.toArray[InternalRow](schema)
         }
       case START_ARRAY =>
         throw new RuntimeException("Parsing JSON arrays as structs is forbidden.")
     }
   }
 
-  private def makeMapRootConverter(mt: MapType): JsonParser => Seq[InternalRow] = {
+  private def makeMapRootConverter(mt: MapType): JsonParser => Iterable[InternalRow] = {
     val fieldConverter = makeConverter(mt.valueType)
-    (parser: JsonParser) => parseJsonToken[Seq[InternalRow]](parser, mt) {
-      case START_OBJECT => Seq(InternalRow(convertMap(parser, fieldConverter)))
+    (parser: JsonParser) => parseJsonToken[Iterable[InternalRow]](parser, mt) {
+      case START_OBJECT => Some(InternalRow(convertMap(parser, fieldConverter)))
     }
   }
 
-  private def makeArrayRootConverter(at: ArrayType): JsonParser => Seq[InternalRow] = {
+  private def makeArrayRootConverter(at: ArrayType): JsonParser => Iterable[InternalRow] = {
     val elemConverter = makeConverter(at.elementType)
-    (parser: JsonParser) => parseJsonToken[Seq[InternalRow]](parser, at) {
-      case START_ARRAY => Seq(InternalRow(convertArray(parser, elemConverter)))
+    (parser: JsonParser) => parseJsonToken[Iterable[InternalRow]](parser, at) {
+      case START_ARRAY => Some(InternalRow(convertArray(parser, elemConverter)))
       case START_OBJECT if at.elementType.isInstanceOf[StructType] =>
         // This handles the case when an input JSON object is a structure but
         // the specified schema is an array of structures. In that case, the input JSON is
@@ -138,7 +144,7 @@ class JacksonParser(
         //
         val st = at.elementType.asInstanceOf[StructType]
         val fieldConverters = st.map(_.dataType).map(makeConverter).toArray
-        Seq(InternalRow(new GenericArrayData(Seq(convertObject(parser, st, fieldConverters)))))
+        Some(InternalRow(new GenericArrayData(Seq(convertObject(parser, st, fieldConverters)))))
     }
   }
 
@@ -250,6 +256,12 @@ class JacksonParser(
           Decimal(bigDecimal, dt.precision, dt.scale)
       }
 
+    case CalendarIntervalType => (parser: JsonParser) =>
+      parseJsonToken[CalendarInterval](parser, dataType) {
+        case VALUE_STRING =>
+          IntervalUtils.safeStringToInterval(UTF8String.fromString(parser.getText))
+      }
+
     case st: StructType =>
       val fieldConverters = st.map(_.dataType).map(makeConverter).toArray
       (parser: JsonParser) => parseJsonToken[InternalRow](parser, dataType) {
@@ -299,6 +311,8 @@ class JacksonParser(
     }
   }
 
+  private val allowEmptyString = SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_EMPTY_STRING_IN_JSON)
+
   /**
    * This function throws an exception for failed conversion. For empty string on data types
    * except for string and binary types, this also throws an exception.
@@ -307,7 +321,16 @@ class JacksonParser(
       parser: JsonParser,
       dataType: DataType): PartialFunction[JsonToken, R] = {
 
-    // SPARK-25040: Disallow empty strings for data types except for string and binary types.
+    // SPARK-25040: Disallows empty strings for data types except for string and binary types.
+    // But treats empty strings as null for certain types if the legacy config is enabled.
+    case VALUE_STRING if parser.getTextLength < 1 && allowEmptyString =>
+      dataType match {
+        case FloatType | DoubleType | TimestampType | DateType =>
+          throw new RuntimeException(
+            s"Failed to parse an empty string for data type ${dataType.catalogString}")
+        case _ => null
+      }
+
     case VALUE_STRING if parser.getTextLength < 1 =>
       throw new RuntimeException(
         s"Failed to parse an empty string for data type ${dataType.catalogString}")
@@ -393,13 +416,13 @@ class JacksonParser(
   def parse[T](
       record: T,
       createParser: (JsonFactory, T) => JsonParser,
-      recordLiteral: T => UTF8String): Seq[InternalRow] = {
+      recordLiteral: T => UTF8String): Iterable[InternalRow] = {
     try {
       Utils.tryWithResource(createParser(factory, record)) { parser =>
         // a null first token is equivalent to testing for input.trim.isEmpty
         // but it works on any token stream and not just strings
         parser.nextToken() match {
-          case null => Nil
+          case null => None
           case _ => rootConverter.apply(parser) match {
             case null => throw new RuntimeException("Root converter returned null")
             case rows => rows
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
index 2d89c7066d080..386c7e3f7541c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
@@ -34,8 +34,7 @@ object JacksonUtils {
 
   def verifyType(name: String, dataType: DataType): Unit = {
     dataType match {
-      case NullType | BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType |
-           DoubleType | StringType | TimestampType | DateType | BinaryType | _: DecimalType =>
+      case NullType | _: AtomicType | CalendarIntervalType =>
 
       case st: StructType => st.foreach(field => verifyType(field.name, field.dataType))
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index c5a97c7b8835a..82dd6d0da2632 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.JacksonUtils.nextUntil
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -40,7 +41,8 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
   private val timestampFormatter = TimestampFormatter(
     options.timestampFormat,
     options.zoneId,
-    options.locale)
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT)
 
   /**
    * Infer the type of a collection of json records in three stages:
@@ -57,8 +59,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
     // In each RDD partition, perform schema inference on each row and merge afterwards.
     val typeMerger = JsonInferSchema.compatibleRootType(columnNameOfCorruptRecord, parseMode)
     val mergedTypesFromPartitions = json.mapPartitions { iter =>
-      val factory = new JsonFactory()
-      options.setJacksonOptions(factory)
+      val factory = options.buildJsonFactory()
       iter.flatMap { row =>
         try {
           Utils.tryWithResource(createParser(factory, row)) { parser =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index db7d6d3254bd2..28dc8e9d0d5f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -1,64 +1,64 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
-import org.apache.spark.sql.catalyst.rules.Rule
-
-/**
- * Simplify redundant [[CreateNamedStructLike]], [[CreateArray]] and [[CreateMap]] expressions.
- */
-object SimplifyExtractValueOps extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    // One place where this optimization is invalid is an aggregation where the select
-    // list expression is a function of a grouping expression:
-    //
-    // SELECT struct(a,b).a FROM tbl GROUP BY struct(a,b)
-    //
-    // cannot be simplified to SELECT a FROM tbl GROUP BY struct(a,b). So just skip this
-    // optimization for Aggregates (although this misses some cases where the optimization
-    // can be made).
-    case a: Aggregate => a
-    case p => p.transformExpressionsUp {
-      // Remove redundant field extraction.
-      case GetStructField(createNamedStructLike: CreateNamedStructLike, ordinal, _) =>
-        createNamedStructLike.valExprs(ordinal)
-
-      // Remove redundant array indexing.
-      case GetArrayStructFields(CreateArray(elems), field, ordinal, _, _) =>
-        // Instead of selecting the field on the entire array, select it from each member
-        // of the array. Pushing down the operation this way may open other optimizations
-        // opportunities (i.e. struct(...,x,...).x)
-        CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name))))
-
-      // Remove redundant map lookup.
-      case ga @ GetArrayItem(CreateArray(elems), IntegerLiteral(idx)) =>
-        // Instead of creating the array and then selecting one row, remove array creation
-        // altogether.
-        if (idx >= 0 && idx < elems.size) {
-          // valid index
-          elems(idx)
-        } else {
-          // out of bounds, mimic the runtime behavior and return null
-          Literal(null, ga.dataType)
-        }
-      case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems)
-    }
-  }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions.
+ */
+object SimplifyExtractValueOps extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    // One place where this optimization is invalid is an aggregation where the select
+    // list expression is a function of a grouping expression:
+    //
+    // SELECT struct(a,b).a FROM tbl GROUP BY struct(a,b)
+    //
+    // cannot be simplified to SELECT a FROM tbl GROUP BY struct(a,b). So just skip this
+    // optimization for Aggregates (although this misses some cases where the optimization
+    // can be made).
+    case a: Aggregate => a
+    case p => p.transformExpressionsUp {
+      // Remove redundant field extraction.
+      case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) =>
+        createNamedStruct.valExprs(ordinal)
+
+      // Remove redundant array indexing.
+      case GetArrayStructFields(CreateArray(elems), field, ordinal, _, _) =>
+        // Instead of selecting the field on the entire array, select it from each member
+        // of the array. Pushing down the operation this way may open other optimizations
+        // opportunities (i.e. struct(...,x,...).x)
+        CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name))))
+
+      // Remove redundant map lookup.
+      case ga @ GetArrayItem(CreateArray(elems), IntegerLiteral(idx)) =>
+        // Instead of creating the array and then selecting one row, remove array creation
+        // altogether.
+        if (idx >= 0 && idx < elems.size) {
+          // valid index
+          elems(idx)
+        } else {
+          // out of bounds, mimic the runtime behavior and return null
+          Literal(null, ga.dataType)
+        }
+      case GetMapValue(CreateMap(elems), key) => CaseKeyWhen(key, elems)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
index b036092cf1fcc..ea01d9e63eef7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CreateArray, CreateMap, CreateNamedStruct, CreateNamedStructUnsafe, CreateStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, KnownFloatingPointNormalized, LambdaFunction, NamedLambdaVariable, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CreateArray, CreateMap, CreateNamedStruct, CreateStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, KnownFloatingPointNormalized, LambdaFunction, NamedLambdaVariable, UnaryExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery, Window}
@@ -114,9 +114,6 @@ object NormalizeFloatingNumbers extends Rule[LogicalPlan] {
     case CreateNamedStruct(children) =>
       CreateNamedStruct(children.map(normalize))
 
-    case CreateNamedStructUnsafe(children) =>
-      CreateNamedStructUnsafe(children.map(normalize))
-
     case CreateArray(children) =>
       CreateArray(children.map(normalize))
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b992ab130bbcc..08acac18f48bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -35,7 +36,7 @@ import org.apache.spark.util.Utils
  * Abstract class all optimizers should inherit of, contains the standard batches (extending
  * Optimizers can override this.
  */
-abstract class Optimizer(sessionCatalog: SessionCatalog)
+abstract class Optimizer(catalogManager: CatalogManager)
   extends RuleExecutor[LogicalPlan] {
 
   // Check for structural integrity of the plan in test mode.
@@ -52,7 +53,10 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       "PartitionPruning",
       "Extract Python UDFs")
 
-  protected def fixedPoint = FixedPoint(SQLConf.get.optimizerMaxIterations)
+  protected def fixedPoint =
+    FixedPoint(
+      SQLConf.get.optimizerMaxIterations,
+      maxIterationsSetting = SQLConf.OPTIMIZER_MAX_ITERATIONS.key)
 
   /**
    * Defines the default rule batches in the Optimizer.
@@ -96,7 +100,6 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
         SimplifyBinaryComparison,
         ReplaceNullWithFalseInPredicate,
         PruneFilters,
-        EliminateSorts,
         SimplifyCasts,
         SimplifyCaseConversionExpressions,
         RewriteCorrelatedScalarSubquery,
@@ -118,7 +121,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
         rulesWithoutInferFiltersFromConstraints: _*) :: Nil
     }
 
-    (Batch("Eliminate Distinct", Once, EliminateDistinct) ::
+    val batches = (Batch("Eliminate Distinct", Once, EliminateDistinct) ::
     // Technically some of the rules in Finish Analysis are not optimizer rules and belong more
     // in the analyzer, because they are needed for correctness (e.g. ComputeCurrentTime).
     // However, because we also use the analyzer to canonicalized queries (for view definition),
@@ -128,8 +131,9 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       EliminateSubqueryAliases,
       EliminateView,
       ReplaceExpressions,
+      RewriteNonCorrelatedExists,
       ComputeCurrentTime,
-      GetCurrentDatabase(sessionCatalog),
+      GetCurrentDatabase(catalogManager),
       RewriteDistinctAggregates,
       ReplaceDeduplicateWithAggregate) ::
     //////////////////////////////////////////////////////////////////////////////////////////
@@ -169,12 +173,16 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       RemoveLiteralFromGroupExpressions,
       RemoveRepetitionFromGroupExpressions) :: Nil ++
     operatorOptimizationBatch) :+
+    // This batch pushes filters and projections into scan nodes. Before this batch, the logical
+    // plan may contain nodes that do not report stats. Anything that uses stats must run after
+    // this batch.
+    Batch("Early Filter and Projection Push-Down", Once, earlyScanPushDownRules: _*) :+
     // Since join costs in AQP can change between multiple runs, there is no reason that we have an
     // idempotence enforcement on this batch. We thus make it FixedPoint(1) instead of Once.
     Batch("Join Reorder", FixedPoint(1),
       CostBasedJoinReorder) :+
-    Batch("Remove Redundant Sorts", Once,
-      RemoveRedundantSorts) :+
+    Batch("Eliminate Sorts", Once,
+      EliminateSorts) :+
     Batch("Decimal Optimizations", fixedPoint,
       DecimalAggregates) :+
     Batch("Object Expressions Optimization", fixedPoint,
@@ -195,6 +203,9 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       RemoveNoopOperators) :+
     // This batch must be executed after the `RewriteSubquery` batch, which creates joins.
     Batch("NormalizeFloatingNumbers", Once, NormalizeFloatingNumbers)
+
+    // remove any batches with no rules. this may happen when subclasses do not add optional rules.
+    batches.filter(_.rules.nonEmpty)
   }
 
   /**
@@ -212,7 +223,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       EliminateView.ruleName ::
       ReplaceExpressions.ruleName ::
       ComputeCurrentTime.ruleName ::
-      GetCurrentDatabase(sessionCatalog).ruleName ::
+      GetCurrentDatabase(catalogManager).ruleName ::
       RewriteDistinctAggregates.ruleName ::
       ReplaceDeduplicateWithAggregate.ruleName ::
       ReplaceIntersectWithSemiJoin.ruleName ::
@@ -239,7 +250,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
     }
     def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
       case s: SubqueryExpression =>
-        val Subquery(newPlan) = Optimizer.this.execute(Subquery(s.plan))
+        val Subquery(newPlan, _) = Optimizer.this.execute(Subquery.fromExpression(s))
         // At this point we have an optimized subquery plan that we are going to attach
         // to this subquery expression. Here we can safely remove any top level sort
         // in the plan as tuples produced by a subquery are un-ordered.
@@ -252,6 +263,11 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
    */
   def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] = Nil
 
+  /**
+   * Override to provide additional rules for early projection and filter pushdown to scans.
+   */
+  def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil
+
   /**
    * Returns (defaultBatches - (excludedRules - nonExcludableRules)), the rule batches that
    * eventually run in the Optimizer.
@@ -318,10 +334,10 @@ object EliminateDistinct extends Rule[LogicalPlan] {
 object SimpleTestOptimizer extends SimpleTestOptimizer
 
 class SimpleTestOptimizer extends Optimizer(
-  new SessionCatalog(
-    new InMemoryCatalog,
-    EmptyFunctionRegistry,
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)))
+  new CatalogManager(
+    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true),
+    FakeV2SessionCatalog,
+    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf())))
 
 /**
  * Remove redundant aliases from a query plan. A redundant alias is an alias that does not change
@@ -365,8 +381,8 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
     plan match {
       // We want to keep the same output attributes for subqueries. This means we cannot remove
       // the aliases that produce these attributes
-      case Subquery(child) =>
-        Subquery(removeRedundantAliases(child, blacklist ++ child.outputSet))
+      case Subquery(child, correlated) =>
+        Subquery(removeRedundantAliases(child, blacklist ++ child.outputSet), correlated)
 
       // A join has to be treated differently, because the left and the right side of the join are
       // not allowed to use the same attributes. We use a blacklist to prevent us from creating a
@@ -394,7 +410,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
 
         // Create the attribute mapping. Note that the currentNextAttrPairs can contain duplicate
         // keys in case of Union (this is caused by the PushProjectionThroughUnion rule); in this
-        // case we use the the first mapping (which should be provided by the first child).
+        // case we use the first mapping (which should be provided by the first child).
         val mapping = AttributeMap(currentNextAttrPairs)
 
         // Create a an expression cleaning function for nodes that can actually produce redundant
@@ -952,40 +968,61 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
 }
 
 /**
- * Removes no-op SortOrder from Sort
+ * Removes Sort operation. This can happen:
+ * 1) if the sort order is empty or the sort order does not have any reference
+ * 2) if the child is already sorted
+ * 3) if there is another Sort operator separated by 0...n Project/Filter operators
+ * 4) if the Sort operator is within Join separated by 0...n Project/Filter operators only,
+ *    and the Join conditions is deterministic
+ * 5) if the Sort operator is within GroupBy separated by 0...n Project/Filter operators only,
+ *    and the aggregate function is order irrelevant
  */
 object EliminateSorts extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case s @ Sort(orders, _, child) if orders.isEmpty || orders.exists(_.child.foldable) =>
       val newOrders = orders.filterNot(_.child.foldable)
       if (newOrders.isEmpty) child else s.copy(order = newOrders)
-  }
-}
-
-/**
- * Removes redundant Sort operation. This can happen:
- * 1) if the child is already sorted
- * 2) if there is another Sort operator separated by 0...n Project/Filter operators
- */
-object RemoveRedundantSorts extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
     case Sort(orders, true, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
       child
     case s @ Sort(_, _, child) => s.copy(child = recursiveRemoveSort(child))
+    case j @ Join(originLeft, originRight, _, cond, _) if cond.forall(_.deterministic) =>
+      j.copy(left = recursiveRemoveSort(originLeft), right = recursiveRemoveSort(originRight))
+    case g @ Aggregate(_, aggs, originChild) if isOrderIrrelevantAggs(aggs) =>
+      g.copy(child = recursiveRemoveSort(originChild))
   }
 
-  def recursiveRemoveSort(plan: LogicalPlan): LogicalPlan = plan match {
+  private def recursiveRemoveSort(plan: LogicalPlan): LogicalPlan = plan match {
     case Sort(_, _, child) => recursiveRemoveSort(child)
     case other if canEliminateSort(other) =>
       other.withNewChildren(other.children.map(recursiveRemoveSort))
     case _ => plan
   }
 
-  def canEliminateSort(plan: LogicalPlan): Boolean = plan match {
+  private def canEliminateSort(plan: LogicalPlan): Boolean = plan match {
     case p: Project => p.projectList.forall(_.deterministic)
     case f: Filter => f.condition.deterministic
     case _ => false
   }
+
+  private def isOrderIrrelevantAggs(aggs: Seq[NamedExpression]): Boolean = {
+    def isOrderIrrelevantAggFunction(func: AggregateFunction): Boolean = func match {
+      case _: Min | _: Max | _: Count => true
+      // Arithmetic operations for floating-point values are order-sensitive
+      // (they are not associative).
+      case _: Sum | _: Average | _: CentralMomentAgg =>
+        !Seq(FloatType, DoubleType).exists(_.sameType(func.children.head.dataType))
+      case _ => false
+    }
+
+    def checkValidAggregateExpression(expr: Expression): Boolean = expr match {
+      case _: AttributeReference => true
+      case ae: AggregateExpression => isOrderIrrelevantAggFunction(ae.aggregateFunction)
+      case _: UserDefinedExpression => false
+      case e => e.children.forall(checkValidAggregateExpression)
+    }
+
+    aggs.forall(checkValidAggregateExpression)
+  }
 }
 
 /**
@@ -1426,7 +1463,7 @@ object DecimalAggregates extends Rule[LogicalPlan] {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsDown {
-      case we @ WindowExpression(ae @ AggregateExpression(af, _, _, _), _) => af match {
+      case we @ WindowExpression(ae @ AggregateExpression(af, _, _, _, _), _) => af match {
         case Sum(e @ DecimalType.Expression(prec, scale)) if prec + 10 <= MAX_LONG_DIGITS =>
           MakeDecimal(we.copy(windowFunction = ae.copy(aggregateFunction = Sum(UnscaledValue(e)))),
             prec + 10, scale)
@@ -1440,7 +1477,7 @@ object DecimalAggregates extends Rule[LogicalPlan] {
 
         case _ => we
       }
-      case ae @ AggregateExpression(af, _, _, _) => af match {
+      case ae @ AggregateExpression(af, _, _, _, _) => af match {
         case Sum(e @ DecimalType.Expression(prec, scale)) if prec + 10 <= MAX_LONG_DIGITS =>
           MakeDecimal(ae.copy(aggregateFunction = Sum(UnscaledValue(e))), prec + 10, scale)
 
@@ -1473,7 +1510,7 @@ object ConvertToLocalRelation extends Rule[LogicalPlan] {
 
     case Filter(condition, LocalRelation(output, data, isStreaming))
         if !hasUnevaluableExpr(condition) =>
-      val predicate = InterpretedPredicate.create(condition, output)
+      val predicate = Predicate.create(condition, output)
       predicate.initialize(0)
       LocalRelation(output, data.filter(row => predicate.eval(row)), isStreaming)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index b9468007cac61..e5571069a7c41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.types.IntegerType
  * aggregation in which the regular aggregation expressions and every distinct clause is aggregated
  * in a separate group. The results are then combined in a second aggregate.
  *
- * For example (in scala):
+ * First example: query without filter clauses (in scala):
  * {{{
  *   val data = Seq(
  *     ("a", "ca1", "cb1", 10),
@@ -75,6 +75,49 @@ import org.apache.spark.sql.types.IntegerType
  *       LocalTableScan [...]
  * }}}
  *
+ * Second example: aggregate function without distinct and with filter clauses (in sql):
+ * {{{
+ *   SELECT
+ *     COUNT(DISTINCT cat1) as cat1_cnt,
+ *     COUNT(DISTINCT cat2) as cat2_cnt,
+ *     SUM(value) FILTER (WHERE id > 1) AS total
+ *  FROM
+ *    data
+ *  GROUP BY
+ *    key
+ * }}}
+ *
+ * This translates to the following (pseudo) logical plan:
+ * {{{
+ * Aggregate(
+ *    key = ['key]
+ *    functions = [COUNT(DISTINCT 'cat1),
+ *                 COUNT(DISTINCT 'cat2),
+ *                 sum('value) with FILTER('id > 1)]
+ *    output = ['key, 'cat1_cnt, 'cat2_cnt, 'total])
+ *   LocalTableScan [...]
+ * }}}
+ *
+ * This rule rewrites this logical plan to the following (pseudo) logical plan:
+ * {{{
+ * Aggregate(
+ *    key = ['key]
+ *    functions = [count(if (('gid = 1)) 'cat1 else null),
+ *                 count(if (('gid = 2)) 'cat2 else null),
+ *                 first(if (('gid = 0)) 'total else null) ignore nulls]
+ *    output = ['key, 'cat1_cnt, 'cat2_cnt, 'total])
+ *   Aggregate(
+ *      key = ['key, 'cat1, 'cat2, 'gid]
+ *      functions = [sum('value) with FILTER('id > 1)]
+ *      output = ['key, 'cat1, 'cat2, 'gid, 'total])
+ *     Expand(
+ *        projections = [('key, null, null, 0, cast('value as bigint), 'id),
+ *                       ('key, 'cat1, null, 1, null, null),
+ *                       ('key, null, 'cat2, 2, null, null)]
+ *        output = ['key, 'cat1, 'cat2, 'gid, 'value, 'id])
+ *       LocalTableScan [...]
+ * }}}
+ *
  * The rule does the following things here:
  * 1. Expand the data. There are three aggregation groups in this query:
  *    i. the non-distinct group;
@@ -101,8 +144,18 @@ import org.apache.spark.sql.types.IntegerType
  */
 object RewriteDistinctAggregates extends Rule[LogicalPlan] {
 
+  private def mayNeedtoRewrite(exprs: Seq[Expression]): Boolean = {
+    val distinctAggs = exprs.flatMap { _.collect {
+      case ae: AggregateExpression if ae.isDistinct => ae
+    }}
+    // We need at least two distinct aggregates for this rule because aggregation
+    // strategy can handle a single distinct group.
+    // This check can produce false-positives, e.g., SUM(DISTINCT a) & COUNT(DISTINCT a).
+    distinctAggs.size > 1
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
-    case a: Aggregate => rewrite(a)
+    case a: Aggregate if mayNeedtoRewrite(a.aggregateExpressions) => rewrite(a)
   }
 
   def rewrite(a: Aggregate): Aggregate = {
@@ -183,9 +236,10 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
       // only expand unfoldable children
       val regularAggExprs = aggExpressions
         .filter(e => !e.isDistinct && e.children.exists(!_.foldable))
-      val regularAggChildren = regularAggExprs
+      val regularAggFunChildren = regularAggExprs
         .flatMap(_.aggregateFunction.children.filter(!_.foldable))
-        .distinct
+      val regularAggFilterAttrs = regularAggExprs.flatMap(_.filterAttributes)
+      val regularAggChildren = (regularAggFunChildren ++ regularAggFilterAttrs).distinct
       val regularAggChildAttrMap = regularAggChildren.map(expressionAttributePair)
 
       // Setup aggregates for 'regular' aggregate expressions.
@@ -194,7 +248,12 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
       val regularAggOperatorMap = regularAggExprs.map { e =>
         // Perform the actual aggregation in the initial aggregate.
         val af = patchAggregateFunctionChildren(e.aggregateFunction)(regularAggChildAttrLookup.get)
-        val operator = Alias(e.copy(aggregateFunction = af), e.sql)()
+        // We changed the attributes in the [[Expand]] output using expressionAttributePair.
+        // So we need to replace the attributes in FILTER expression with new ones.
+        val filterOpt = e.filter.map(_.transform {
+          case a: Attribute => regularAggChildAttrLookup.getOrElse(a, a)
+        })
+        val operator = Alias(e.copy(aggregateFunction = af, filter = filterOpt), e.sql)()
 
         // Select the result of the first aggregate in the last aggregate.
         val result = AggregateExpression(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 39709529c00d3..bd400f86ea2c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -71,7 +71,7 @@ object ConstantFolding extends Rule[LogicalPlan] {
 object ConstantPropagation extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case f: Filter =>
-      val (newCondition, _) = traverse(f.condition, replaceChildren = true)
+      val (newCondition, _) = traverse(f.condition, replaceChildren = true, nullIsFalse = true)
       if (newCondition.isDefined) {
         f.copy(condition = newCondition.get)
       } else {
@@ -92,22 +92,33 @@ object ConstantPropagation extends Rule[LogicalPlan] with PredicateHelper {
    * - Otherwise, stop traversal and propagate empty mapping.
    * @param condition condition to be traversed
    * @param replaceChildren whether to replace attributes with constant values in children
+   * @param nullIsFalse whether a boolean expression result can be considered to false e.g. in the
+   *                    case of `WHERE e`, null result of expression `e` means the same as if it
+   *                    resulted false
    * @return A tuple including:
    *         1. Option[Expression]: optional changed condition after traversal
    *         2. EqualityPredicates: propagated mapping of attribute => constant
    */
-  private def traverse(condition: Expression, replaceChildren: Boolean)
+  private def traverse(condition: Expression, replaceChildren: Boolean, nullIsFalse: Boolean)
     : (Option[Expression], EqualityPredicates) =
     condition match {
-      case e @ EqualTo(left: AttributeReference, right: Literal) => (None, Seq(((left, right), e)))
-      case e @ EqualTo(left: Literal, right: AttributeReference) => (None, Seq(((right, left), e)))
-      case e @ EqualNullSafe(left: AttributeReference, right: Literal) =>
+      case e @ EqualTo(left: AttributeReference, right: Literal)
+        if safeToReplace(left, nullIsFalse) =>
         (None, Seq(((left, right), e)))
-      case e @ EqualNullSafe(left: Literal, right: AttributeReference) =>
+      case e @ EqualTo(left: Literal, right: AttributeReference)
+        if safeToReplace(right, nullIsFalse) =>
+        (None, Seq(((right, left), e)))
+      case e @ EqualNullSafe(left: AttributeReference, right: Literal)
+        if safeToReplace(left, nullIsFalse) =>
+        (None, Seq(((left, right), e)))
+      case e @ EqualNullSafe(left: Literal, right: AttributeReference)
+        if safeToReplace(right, nullIsFalse) =>
         (None, Seq(((right, left), e)))
       case a: And =>
-        val (newLeft, equalityPredicatesLeft) = traverse(a.left, replaceChildren = false)
-        val (newRight, equalityPredicatesRight) = traverse(a.right, replaceChildren = false)
+        val (newLeft, equalityPredicatesLeft) =
+          traverse(a.left, replaceChildren = false, nullIsFalse)
+        val (newRight, equalityPredicatesRight) =
+          traverse(a.right, replaceChildren = false, nullIsFalse)
         val equalityPredicates = equalityPredicatesLeft ++ equalityPredicatesRight
         val newSelf = if (equalityPredicates.nonEmpty && replaceChildren) {
           Some(And(replaceConstants(newLeft.getOrElse(a.left), equalityPredicates),
@@ -122,8 +133,8 @@ object ConstantPropagation extends Rule[LogicalPlan] with PredicateHelper {
         (newSelf, equalityPredicates)
       case o: Or =>
         // Ignore the EqualityPredicates from children since they are only propagated through And.
-        val (newLeft, _) = traverse(o.left, replaceChildren = true)
-        val (newRight, _) = traverse(o.right, replaceChildren = true)
+        val (newLeft, _) = traverse(o.left, replaceChildren = true, nullIsFalse)
+        val (newRight, _) = traverse(o.right, replaceChildren = true, nullIsFalse)
         val newSelf = if (newLeft.isDefined || newRight.isDefined) {
           Some(Or(left = newLeft.getOrElse(o.left), right = newRight.getOrElse((o.right))))
         } else {
@@ -132,11 +143,19 @@ object ConstantPropagation extends Rule[LogicalPlan] with PredicateHelper {
         (newSelf, Seq.empty)
       case n: Not =>
         // Ignore the EqualityPredicates from children since they are only propagated through And.
-        val (newChild, _) = traverse(n.child, replaceChildren = true)
+        val (newChild, _) = traverse(n.child, replaceChildren = true, nullIsFalse = false)
         (newChild.map(Not), Seq.empty)
       case _ => (None, Seq.empty)
     }
 
+  // We need to take into account if an attribute is nullable and the context of the conjunctive
+  // expression. E.g. `SELECT * FROM t WHERE NOT(c = 1 AND c + 1 = 1)` where attribute `c` can be
+  // substituted into `1 + 1 = 1` if 'c' isn't nullable. If 'c' is nullable then the enclosing
+  // NOT prevents us to do the substitution as NOT flips the context (`nullIsFalse`) of what a
+  // null result of the enclosed expression means.
+  private def safeToReplace(ar: AttributeReference, nullIsFalse: Boolean) =
+    !ar.nullable || nullIsFalse
+
   private def replaceConstants(condition: Expression, equalityPredicates: EqualityPredicates)
     : Expression = {
     val constantsMap = AttributeMap(equalityPredicates.map(_._1))
@@ -227,8 +246,8 @@ object OptimizeIn extends Rule[LogicalPlan] {
         if (newList.length == 1
           // TODO: `EqualTo` for structural types are not working. Until SPARK-24443 is addressed,
           // TODO: we exclude them in this rule.
-          && !v.isInstanceOf[CreateNamedStructLike]
-          && !newList.head.isInstanceOf[CreateNamedStructLike]) {
+          && !v.isInstanceOf[CreateNamedStruct]
+          && !newList.head.isInstanceOf[CreateNamedStruct]) {
           EqualTo(v, newList.head)
         } else if (newList.length > SQLConf.get.optimizerInSetConversionThreshold) {
           val hSet = newList.map(e => e.eval(EmptyRow))
@@ -373,6 +392,9 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
       case Not(a And b) => Or(Not(a), Not(b))
 
       case Not(Not(e)) => e
+
+      case Not(IsNull(e)) => IsNotNull(e)
+      case Not(IsNotNull(e)) => IsNull(e)
     }
   }
 }
@@ -384,21 +406,43 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
  * 2) Replace '=', '<=', and '>=' with 'true' literal if both operands are non-nullable.
  * 3) Replace '<' and '>' with 'false' literal if both operands are non-nullable.
  */
-object SimplifyBinaryComparison extends Rule[LogicalPlan] with PredicateHelper {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case q: LogicalPlan => q transformExpressionsUp {
-      // True with equality
-      case a EqualNullSafe b if a.semanticEquals(b) => TrueLiteral
-      case a EqualTo b if !a.nullable && !b.nullable && a.semanticEquals(b) => TrueLiteral
-      case a GreaterThanOrEqual b if !a.nullable && !b.nullable && a.semanticEquals(b) =>
-        TrueLiteral
-      case a LessThanOrEqual b if !a.nullable && !b.nullable && a.semanticEquals(b) => TrueLiteral
-
-      // False with inequality
-      case a GreaterThan b if !a.nullable && !b.nullable && a.semanticEquals(b) => FalseLiteral
-      case a LessThan b if !a.nullable && !b.nullable && a.semanticEquals(b) => FalseLiteral
+object SimplifyBinaryComparison
+  extends Rule[LogicalPlan] with PredicateHelper with ConstraintHelper {
+
+  private def canSimplifyComparison(
+      left: Expression,
+      right: Expression,
+      notNullExpressions: => ExpressionSet): Boolean = {
+    if (left.semanticEquals(right)) {
+      (!left.nullable && !right.nullable) || notNullExpressions.contains(left)
+    } else {
+      false
     }
   }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case l: LogicalPlan =>
+      lazy val notNullExpressions = ExpressionSet(l match {
+        case Filter(fc, _) =>
+          splitConjunctivePredicates(fc).collect {
+            case i: IsNotNull => i.child
+          }
+        case _ => Seq.empty
+      })
+
+      l transformExpressionsUp {
+        // True with equality
+        case a EqualNullSafe b if a.semanticEquals(b) => TrueLiteral
+        case a EqualTo b if canSimplifyComparison(a, b, notNullExpressions) => TrueLiteral
+        case a GreaterThanOrEqual b if canSimplifyComparison(a, b, notNullExpressions) =>
+          TrueLiteral
+        case a LessThanOrEqual b if canSimplifyComparison(a, b, notNullExpressions) => TrueLiteral
+
+        // False with inequality
+        case a GreaterThan b if canSimplifyComparison(a, b, notNullExpressions) => FalseLiteral
+        case a LessThan b if canSimplifyComparison(a, b, notNullExpressions) => FalseLiteral
+      }
+  }
 }
 
 
@@ -481,27 +525,27 @@ object LikeSimplification extends Rule[LogicalPlan] {
   private val equalTo = "([^_%]*)".r
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
-    case Like(input, Literal(pattern, StringType)) =>
+    case Like(input, Literal(pattern, StringType), escapeChar) =>
       if (pattern == null) {
         // If pattern is null, return null value directly, since "col like null" == null.
         Literal(null, BooleanType)
       } else {
+        val escapeStr = String.valueOf(escapeChar)
         pattern.toString match {
-          case startsWith(prefix) if !prefix.endsWith("\\") =>
+          case startsWith(prefix) if !prefix.endsWith(escapeStr) =>
             StartsWith(input, Literal(prefix))
           case endsWith(postfix) =>
             EndsWith(input, Literal(postfix))
           // 'a%a' pattern is basically same with 'a%' && '%a'.
           // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
-          case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") =>
+          case startsAndEndsWith(prefix, postfix) if !prefix.endsWith(escapeStr) =>
             And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),
               And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))
-          case contains(infix) if !infix.endsWith("\\") =>
+          case contains(infix) if !infix.endsWith(escapeStr) =>
             Contains(input, Literal(infix))
           case equalTo(str) =>
             EqualTo(input, Literal(str))
-          case _ =>
-            Like(input, Literal.create(pattern, StringType))
+          case _ => Like(input, Literal.create(pattern, StringType), escapeChar)
         }
       }
   }
@@ -523,9 +567,9 @@ object NullPropagation extends Rule[LogicalPlan] {
     case q: LogicalPlan => q transformExpressionsUp {
       case e @ WindowExpression(Cast(Literal(0L, _), _, _), _) =>
         Cast(Literal(0L), e.dataType, Option(SQLConf.get.sessionLocalTimeZone))
-      case e @ AggregateExpression(Count(exprs), _, _, _) if exprs.forall(isNullLiteral) =>
+      case e @ AggregateExpression(Count(exprs), _, _, _, _) if exprs.forall(isNullLiteral) =>
         Cast(Literal(0L), e.dataType, Option(SQLConf.get.sessionLocalTimeZone))
-      case ae @ AggregateExpression(Count(exprs), _, false, _) if !exprs.exists(_.nullable) =>
+      case ae @ AggregateExpression(Count(exprs), _, false, _, _) if !exprs.exists(_.nullable) =>
         // This rule should be only triggered when isDistinct field is false.
         ae.copy(aggregateFunction = Count(Literal(1)))
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 69ba76827c781..c79bf3e20b776 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -21,12 +21,12 @@ import java.time.LocalDate
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.types._
 
 
@@ -47,12 +47,26 @@ object ReplaceExpressions extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
     case e: RuntimeReplaceable => e.child
     case CountIf(predicate) => Count(new NullIf(predicate, Literal.FalseLiteral))
-    case SomeAgg(arg) => Max(arg)
-    case AnyAgg(arg) => Max(arg)
-    case EveryAgg(arg) => Min(arg)
+    case BoolOr(arg) => Max(arg)
+    case BoolAnd(arg) => Min(arg)
   }
 }
 
+/**
+ * Rewrite non correlated exists subquery to use ScalarSubquery
+ *   WHERE EXISTS (SELECT A FROM TABLE B WHERE COL1 > 10)
+ * will be rewritten to
+ *   WHERE (SELECT 1 FROM (SELECT A FROM TABLE B WHERE COL1 > 10) LIMIT 1) IS NOT NULL
+ */
+object RewriteNonCorrelatedExists extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+    case exists: Exists if exists.children.isEmpty =>
+      IsNotNull(
+        ScalarSubquery(
+          plan = Limit(Literal(1), Project(Seq(Alias(Literal(1), "col")()), exists.plan)),
+          exprId = exists.exprId))
+  }
+}
 
 /**
  * Computes the current date and time to make sure we return the same result in a single query.
@@ -78,11 +92,14 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
 
 
 /** Replaces the expression of CurrentDatabase with the current database name. */
-case class GetCurrentDatabase(sessionCatalog: SessionCatalog) extends Rule[LogicalPlan] {
+case class GetCurrentDatabase(catalogManager: CatalogManager) extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    val currentNamespace = catalogManager.currentNamespace.quoted
+
     plan transformAllExpressions {
       case CurrentDatabase() =>
-        Literal.create(sessionCatalog.getCurrentDatabase, StringType)
+        Literal.create(currentNamespace, StringType)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index ad93ef347a974..0fa29e87e5f39 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -235,8 +235,6 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] {
  */
 object ReassignLambdaVariableID extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    if (!SQLConf.get.getConf(SQLConf.OPTIMIZER_REASSIGN_LAMBDA_VARIABLE_ID)) return plan
-
     // The original LambdaVariable IDs are all positive. To avoid conflicts, the new IDs are all
     // negative and starts from -1.
     var newId = 0L
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 32dbd389afd93..b6974624c6514 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -96,7 +96,8 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case Filter(condition, child) =>
       val (withSubquery, withoutSubquery) =
-        splitConjunctivePredicates(condition).partition(SubqueryExpression.hasInOrExistsSubquery)
+        splitConjunctivePredicates(condition)
+          .partition(SubqueryExpression.hasInOrCorrelatedExistsSubquery)
 
       // Construct the pruned filter condition.
       val newFilter: LogicalPlan = withoutSubquery match {
@@ -368,7 +369,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
     // in the expression with the value they would return for zero input tuples.
     // Also replace attribute refs (for example, for grouping columns) with NULL.
     val rewrittenExpr = expr transform {
-      case a @ AggregateExpression(aggFunc, _, _, resultId) =>
+      case a @ AggregateExpression(aggFunc, _, _, resultId, _) =>
         aggFunc.defaultResult.getOrElse(Literal.default(NullType))
 
       case _: AttributeReference => Literal.default(NullType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 27579273f08b6..b3541a7f7374d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -28,18 +28,20 @@ import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2
-import org.apache.spark.sql.catalog.v2.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat}
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, FunctionResource, FunctionResourceType}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableAlterColumnStatement, AlterTableDropColumnsStatement, AlterTableRenameColumnStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement, AlterViewSetPropertiesStatement, AlterViewUnsetPropertiesStatement, CreateTableAsSelectStatement, CreateTableStatement, DeleteFromStatement, DescribeColumnStatement, DescribeTableStatement, DropTableStatement, DropViewStatement, InsertIntoStatement, QualifiedColType, ReplaceTableAsSelectStatement, ReplaceTableStatement, ShowTablesStatement}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
+import org.apache.spark.sql.catalyst.util.IntervalUtils
+import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit
+import org.apache.spark.sql.connector.catalog.{SupportsNamespaces, TableCatalog}
+import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
+import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -338,18 +340,133 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     throw new ParseException("INSERT OVERWRITE DIRECTORY is not supported", ctx)
   }
 
+  private def getTableAliasWithoutColumnAlias(
+      ctx: TableAliasContext, op: String): Option[String] = {
+    if (ctx == null) {
+      None
+    } else {
+      val ident = ctx.strictIdentifier()
+      if (ctx.identifierList() != null) {
+        throw new ParseException(s"Columns aliases are not allowed in $op.", ctx.identifierList())
+      }
+      if (ident != null) Some(ident.getText) else None
+    }
+  }
+
   override def visitDeleteFromTable(
       ctx: DeleteFromTableContext): LogicalPlan = withOrigin(ctx) {
+    val table = UnresolvedRelation(visitMultipartIdentifier(ctx.multipartIdentifier()))
+    val tableAlias = getTableAliasWithoutColumnAlias(ctx.tableAlias(), "DELETE")
+    val aliasedTable = tableAlias.map(SubqueryAlias(_, table)).getOrElse(table)
+    val predicate = if (ctx.whereClause() != null) {
+      Some(expression(ctx.whereClause().booleanExpression()))
+    } else {
+      None
+    }
+    DeleteFromTable(aliasedTable, predicate)
+  }
 
-    val tableId = visitMultipartIdentifier(ctx.multipartIdentifier)
-    val tableAlias = if (ctx.tableAlias() != null) {
-      val ident = ctx.tableAlias().strictIdentifier()
-      if (ident != null) { Some(ident.getText) } else { None }
+  override def visitUpdateTable(ctx: UpdateTableContext): LogicalPlan = withOrigin(ctx) {
+    val table = UnresolvedRelation(visitMultipartIdentifier(ctx.multipartIdentifier()))
+    val tableAlias = getTableAliasWithoutColumnAlias(ctx.tableAlias(), "UPDATE")
+    val aliasedTable = tableAlias.map(SubqueryAlias(_, table)).getOrElse(table)
+    val assignments = withAssignments(ctx.setClause().assignmentList())
+    val predicate = if (ctx.whereClause() != null) {
+      Some(expression(ctx.whereClause().booleanExpression()))
     } else {
       None
     }
 
-    DeleteFromStatement(tableId, tableAlias, expression(ctx.whereClause().booleanExpression()))
+    UpdateTable(aliasedTable, assignments, predicate)
+  }
+
+  private def withAssignments(assignCtx: SqlBaseParser.AssignmentListContext): Seq[Assignment] =
+    withOrigin(assignCtx) {
+      assignCtx.assignment().asScala.map { assign =>
+        Assignment(UnresolvedAttribute(visitMultipartIdentifier(assign.key)),
+          expression(assign.value))
+      }
+    }
+
+  override def visitMergeIntoTable(ctx: MergeIntoTableContext): LogicalPlan = withOrigin(ctx) {
+    val targetTable = UnresolvedRelation(visitMultipartIdentifier(ctx.target))
+    val targetTableAlias = getTableAliasWithoutColumnAlias(ctx.targetAlias, "MERGE")
+    val aliasedTarget = targetTableAlias.map(SubqueryAlias(_, targetTable)).getOrElse(targetTable)
+
+    val sourceTableOrQuery = if (ctx.source != null) {
+      UnresolvedRelation(visitMultipartIdentifier(ctx.source))
+    } else if (ctx.sourceQuery != null) {
+      visitQuery(ctx.sourceQuery)
+    } else {
+      throw new ParseException("Empty source for merge: you should specify a source" +
+          " table/subquery in merge.", ctx.source)
+    }
+    val sourceTableAlias = getTableAliasWithoutColumnAlias(ctx.sourceAlias, "MERGE")
+    val aliasedSource =
+      sourceTableAlias.map(SubqueryAlias(_, sourceTableOrQuery)).getOrElse(sourceTableOrQuery)
+
+    val mergeCondition = expression(ctx.mergeCondition)
+
+    val matchedClauses = ctx.matchedClause()
+    if (matchedClauses.size() > 2) {
+      throw new ParseException("There should be at most 2 'WHEN MATCHED' clauses.",
+        matchedClauses.get(2))
+    }
+    val matchedActions = matchedClauses.asScala.map {
+      clause => {
+        if (clause.matchedAction().DELETE() != null) {
+          DeleteAction(Option(clause.matchedCond).map(expression))
+        } else if (clause.matchedAction().UPDATE() != null) {
+          val condition = Option(clause.matchedCond).map(expression)
+          if (clause.matchedAction().ASTERISK() != null) {
+            UpdateAction(condition, Seq())
+          } else {
+            UpdateAction(condition, withAssignments(clause.matchedAction().assignmentList()))
+          }
+        } else {
+          // It should not be here.
+          throw new ParseException(
+            s"Unrecognized matched action: ${clause.matchedAction().getText}",
+            clause.matchedAction())
+        }
+      }
+    }
+    val notMatchedClauses = ctx.notMatchedClause()
+    if (notMatchedClauses.size() > 1) {
+      throw new ParseException("There should be at most 1 'WHEN NOT MATCHED' clause.",
+        notMatchedClauses.get(1))
+    }
+    val notMatchedActions = notMatchedClauses.asScala.map {
+      clause => {
+        if (clause.notMatchedAction().INSERT() != null) {
+          val condition = Option(clause.notMatchedCond).map(expression)
+          if (clause.notMatchedAction().ASTERISK() != null) {
+            InsertAction(condition, Seq())
+          } else {
+            val columns = clause.notMatchedAction().columns.multipartIdentifier()
+                .asScala.map(attr => UnresolvedAttribute(visitMultipartIdentifier(attr)))
+            val values = clause.notMatchedAction().expression().asScala.map(expression)
+            if (columns.size != values.size) {
+              throw new ParseException("The number of inserted values cannot match the fields.",
+                clause.notMatchedAction())
+            }
+            InsertAction(condition, columns.zip(values).map(kv => Assignment(kv._1, kv._2)))
+          }
+        } else {
+          // It should not be here.
+          throw new ParseException(
+            s"Unrecognized not matched action: ${clause.notMatchedAction().getText}",
+            clause.notMatchedAction())
+        }
+      }
+    }
+
+    MergeIntoTable(
+      aliasedTarget,
+      aliasedSource,
+      mergeCondition,
+      matchedActions,
+      notMatchedActions)
   }
 
   /**
@@ -1270,7 +1387,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       case SqlBaseParser.IN =>
         invertIfNotDefined(In(e, ctx.expression.asScala.map(expression)))
       case SqlBaseParser.LIKE =>
-        invertIfNotDefined(Like(e, expression(ctx.pattern)))
+        val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
+          if (str.length != 1) {
+            throw new ParseException("Invalid escape string." +
+              "Escape string must contains only one character.", ctx)
+          }
+          str.charAt(0)
+        }.getOrElse('\\')
+        invertIfNotDefined(Like(e, expression(ctx.pattern), escapeChar))
       case SqlBaseParser.RLIKE =>
         invertIfNotDefined(RLike(e, expression(ctx.pattern)))
       case SqlBaseParser.NULL if ctx.NOT != null =>
@@ -1345,7 +1469,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val value = expression(ctx.valueExpression)
     ctx.operator.getType match {
       case SqlBaseParser.PLUS =>
-        value
+        UnaryPositive(value)
       case SqlBaseParser.MINUS =>
         UnaryMinus(value)
       case SqlBaseParser.TILDE =>
@@ -1354,7 +1478,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   override def visitCurrentDatetime(ctx: CurrentDatetimeContext): Expression = withOrigin(ctx) {
-    if (conf.ansiParserEnabled) {
+    if (conf.ansiEnabled) {
       ctx.name.getType match {
         case SqlBaseParser.CURRENT_DATE =>
           CurrentDate()
@@ -1409,48 +1533,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    * Create a Extract expression.
    */
   override def visitExtract(ctx: ExtractContext): Expression = withOrigin(ctx) {
-    ctx.field.getText.toUpperCase(Locale.ROOT) match {
-      case "MILLENNIUM" | "MILLENNIA" | "MIL" | "MILS" =>
-        Millennium(expression(ctx.source))
-      case "CENTURY" | "CENTURIES" | "C" | "CENT" =>
-        Century(expression(ctx.source))
-      case "DECADE" | "DECADES" | "DEC" | "DECS" =>
-        Decade(expression(ctx.source))
-      case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" =>
-        Year(expression(ctx.source))
-      case "ISOYEAR" =>
-        IsoYear(expression(ctx.source))
-      case "QUARTER" | "QTR" =>
-        Quarter(expression(ctx.source))
-      case "MONTH" | "MON" | "MONS" | "MONTHS" =>
-        Month(expression(ctx.source))
-      case "WEEK" | "W" | "WEEKS" =>
-        WeekOfYear(expression(ctx.source))
-      case "DAY" | "D" | "DAYS" =>
-        DayOfMonth(expression(ctx.source))
-      case "DAYOFWEEK" =>
-        DayOfWeek(expression(ctx.source))
-      case "DOW" =>
-        Subtract(DayOfWeek(expression(ctx.source)), Literal(1))
-      case "ISODOW" =>
-        Add(WeekDay(expression(ctx.source)), Literal(1))
-      case "DOY" =>
-        DayOfYear(expression(ctx.source))
-      case "HOUR" | "H" | "HOURS" | "HR" | "HRS" =>
-        Hour(expression(ctx.source))
-      case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" =>
-        Minute(expression(ctx.source))
-      case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" =>
-        Second(expression(ctx.source))
-      case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" =>
-        Milliseconds(expression(ctx.source))
-      case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" =>
-        Microseconds(expression(ctx.source))
-      case "EPOCH" =>
-        Epoch(expression(ctx.source))
-      case other =>
-        throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx)
-    }
+    val fieldStr = ctx.field.getText
+    val source = expression(ctx.source)
+    val extractField = DatePart.parseExtractField(fieldStr, source, {
+      throw new ParseException(s"Literals of type '$fieldStr' are currently not supported.", ctx)
+    })
+    new DatePart(Literal(fieldStr), expression(ctx.source), extractField)
   }
 
   /**
@@ -1502,7 +1590,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitFunctionCall(ctx: FunctionCallContext): Expression = withOrigin(ctx) {
     // Create the function call.
-    val name = ctx.qualifiedName.getText
+    val name = ctx.functionName.getText
     val isDistinct = Option(ctx.setQuantifier()).exists(_.DISTINCT != null)
     val arguments = ctx.argument.asScala.map(expression) match {
       case Seq(UnresolvedStar(None))
@@ -1512,7 +1600,9 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       case expressions =>
         expressions
     }
-    val function = UnresolvedFunction(visitFunctionName(ctx.qualifiedName), arguments, isDistinct)
+    val filter = Option(ctx.where).map(expression(_))
+    val function = UnresolvedFunction(
+      getFunctionIdentifier(ctx.functionName), arguments, isDistinct, filter)
 
     // Check if the function is evaluated in a windowed context.
     ctx.windowSpec match {
@@ -1524,14 +1614,42 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
   }
 
+
+  /**
+   * Create a function database (optional) and name pair, for multipartIdentifier.
+   * This is used in CREATE FUNCTION, DROP FUNCTION, SHOWFUNCTIONS.
+   */
+  protected def visitFunctionName(ctx: MultipartIdentifierContext): FunctionIdentifier = {
+    visitFunctionName(ctx, ctx.parts.asScala.map(_.getText))
+  }
+
   /**
    * Create a function database (optional) and name pair.
    */
   protected def visitFunctionName(ctx: QualifiedNameContext): FunctionIdentifier = {
-    ctx.identifier().asScala.map(_.getText) match {
+    visitFunctionName(ctx, ctx.identifier().asScala.map(_.getText))
+  }
+
+  /**
+   * Create a function database (optional) and name pair.
+   */
+  private def visitFunctionName(ctx: ParserRuleContext, texts: Seq[String]): FunctionIdentifier = {
+    texts match {
       case Seq(db, fn) => FunctionIdentifier(fn, Option(db))
       case Seq(fn) => FunctionIdentifier(fn, None)
-      case other => throw new ParseException(s"Unsupported function name '${ctx.getText}'", ctx)
+      case other =>
+        throw new ParseException(s"Unsupported function name '${texts.mkString(".")}'", ctx)
+    }
+  }
+
+  /**
+   * Get a function identifier consist by database (optional) and name.
+   */
+  protected def getFunctionIdentifier(ctx: FunctionNameContext): FunctionIdentifier = {
+    if (ctx.qualifiedName != null) {
+      visitFunctionName(ctx.qualifiedName)
+    } else {
+      FunctionIdentifier(ctx.getText, None)
     }
   }
 
@@ -1539,7 +1657,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    * Create an [[LambdaFunction]].
    */
   override def visitLambda(ctx: LambdaContext): Expression = withOrigin(ctx) {
-    val arguments = ctx.IDENTIFIER().asScala.map { name =>
+    val arguments = ctx.identifier().asScala.map { name =>
       UnresolvedNamedLambdaVariable(UnresolvedAttribute.quoted(name.getText).nameParts)
     }
     val function = expression(ctx.expression).transformUp {
@@ -1754,6 +1872,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) {
     val value = string(ctx.STRING)
     val valueType = ctx.identifier.getText.toUpperCase(Locale.ROOT)
+
     def toLiteral[T](f: UTF8String => Option[T], t: DataType): Literal = {
       f(UTF8String.fromString(value)).map(Literal(_, t)).getOrElse {
         throw new ParseException(s"Cannot parse the $valueType value: $value", ctx)
@@ -1761,17 +1880,27 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
     try {
       valueType match {
-        case "DATE" => toLiteral(stringToDate, DateType)
+        case "DATE" =>
+          toLiteral(stringToDate(_, getZoneId(SQLConf.get.sessionLocalTimeZone)), DateType)
         case "TIMESTAMP" =>
           val zoneId = getZoneId(SQLConf.get.sessionLocalTimeZone)
           toLiteral(stringToTimestamp(_, zoneId), TimestampType)
         case "INTERVAL" =>
-          Literal(CalendarInterval.fromString(value), CalendarIntervalType)
+          val interval = try {
+            IntervalUtils.stringToInterval(UTF8String.fromString(value))
+          } catch {
+            case e: IllegalArgumentException =>
+              val ex = new ParseException("Cannot parse the INTERVAL value: " + value, ctx)
+              ex.setStackTrace(e.getStackTrace)
+              throw ex
+          }
+          Literal(interval, CalendarIntervalType)
         case "X" =>
           val padding = if (value.length % 2 != 0) "0" else ""
           Literal(DatatypeConverter.parseHexBinary(padding + value))
         case other =>
-          throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx)
+          throw new ParseException(s"Literals of type '$other' are currently not" +
+            " supported.", ctx)
       }
     } catch {
       case e: IllegalArgumentException =>
@@ -1805,9 +1934,9 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitIntegerLiteral(ctx: IntegerLiteralContext): Literal = withOrigin(ctx) {
     BigDecimal(ctx.getText) match {
       case v if v.isValidInt =>
-        Literal(v.intValue())
+        Literal(v.intValue)
       case v if v.isValidLong =>
-        Literal(v.longValue())
+        Literal(v.longValue)
       case v => Literal(v.underlying())
     }
   }
@@ -1819,11 +1948,29 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     Literal(BigDecimal(ctx.getText).underlying())
   }
 
+  /**
+   * Create a decimal literal for a regular decimal number or a scientific decimal number.
+   */
+  override def visitLegacyDecimalLiteral(
+      ctx: LegacyDecimalLiteralContext): Literal = withOrigin(ctx) {
+    Literal(BigDecimal(ctx.getText).underlying())
+  }
+
+  /**
+   * Create a double literal for number with an exponent, e.g. 1E-30
+   */
+  override def visitExponentLiteral(ctx: ExponentLiteralContext): Literal = {
+    numericLiteral(ctx, ctx.getText, /* exponent values don't have a suffix */
+      Double.MinValue, Double.MaxValue, DoubleType.simpleString)(_.toDouble)
+  }
+
   /** Create a numeric literal expression. */
-  private def numericLiteral
-      (ctx: NumberContext, minValue: BigDecimal, maxValue: BigDecimal, typeName: String)
-      (converter: String => Any): Literal = withOrigin(ctx) {
-    val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1)
+  private def numericLiteral(
+      ctx: NumberContext,
+      rawStrippedQualifier: String,
+      minValue: BigDecimal,
+      maxValue: BigDecimal,
+      typeName: String)(converter: String => Any): Literal = withOrigin(ctx) {
     try {
       val rawBigDecimal = BigDecimal(rawStrippedQualifier)
       if (rawBigDecimal < minValue || rawBigDecimal > maxValue) {
@@ -1841,28 +1988,36 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    * Create a Byte Literal expression.
    */
   override def visitTinyIntLiteral(ctx: TinyIntLiteralContext): Literal = {
-    numericLiteral(ctx, Byte.MinValue, Byte.MaxValue, ByteType.simpleString)(_.toByte)
+    val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1)
+    numericLiteral(ctx, rawStrippedQualifier,
+      Byte.MinValue, Byte.MaxValue, ByteType.simpleString)(_.toByte)
   }
 
   /**
    * Create a Short Literal expression.
    */
   override def visitSmallIntLiteral(ctx: SmallIntLiteralContext): Literal = {
-    numericLiteral(ctx, Short.MinValue, Short.MaxValue, ShortType.simpleString)(_.toShort)
+    val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1)
+    numericLiteral(ctx, rawStrippedQualifier,
+      Short.MinValue, Short.MaxValue, ShortType.simpleString)(_.toShort)
   }
 
   /**
    * Create a Long Literal expression.
    */
   override def visitBigIntLiteral(ctx: BigIntLiteralContext): Literal = {
-    numericLiteral(ctx, Long.MinValue, Long.MaxValue, LongType.simpleString)(_.toLong)
+    val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1)
+    numericLiteral(ctx, rawStrippedQualifier,
+      Long.MinValue, Long.MaxValue, LongType.simpleString)(_.toLong)
   }
 
   /**
    * Create a Double Literal expression.
    */
   override def visitDoubleLiteral(ctx: DoubleLiteralContext): Literal = {
-    numericLiteral(ctx, Double.MinValue, Double.MaxValue, DoubleType.simpleString)(_.toDouble)
+    val rawStrippedQualifier = ctx.getText.substring(0, ctx.getText.length - 1)
+    numericLiteral(ctx, rawStrippedQualifier,
+      Double.MinValue, Double.MaxValue, DoubleType.simpleString)(_.toDouble)
   }
 
   /**
@@ -1901,58 +2056,97 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create a [[CalendarInterval]] literal expression. An interval expression can contain multiple
-   * unit value pairs, for instance: interval 2 months 2 days.
+   * Create a [[CalendarInterval]] literal expression. Two syntaxes are supported:
+   * - multiple unit value pairs, for instance: interval 2 months 2 days.
+   * - from-to unit, for instance: interval '1-2' year to month.
    */
   override def visitInterval(ctx: IntervalContext): Literal = withOrigin(ctx) {
-    val intervals = ctx.intervalField.asScala.map(visitIntervalField)
-    validate(intervals.nonEmpty, "at least one time unit should be given for interval literal", ctx)
-    Literal(intervals.reduce(_.add(_)))
+    if (ctx.errorCapturingMultiUnitsInterval != null) {
+      val innerCtx = ctx.errorCapturingMultiUnitsInterval
+      if (innerCtx.unitToUnitInterval != null) {
+        throw new ParseException(
+          "Can only have a single from-to unit in the interval literal syntax",
+          innerCtx.unitToUnitInterval)
+      }
+      Literal(visitMultiUnitsInterval(innerCtx.multiUnitsInterval), CalendarIntervalType)
+    } else if (ctx.errorCapturingUnitToUnitInterval != null) {
+      val innerCtx = ctx.errorCapturingUnitToUnitInterval
+      if (innerCtx.error1 != null || innerCtx.error2 != null) {
+        val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2
+        throw new ParseException(
+          "Can only have a single from-to unit in the interval literal syntax",
+          errorCtx)
+      }
+      Literal(visitUnitToUnitInterval(innerCtx.body), CalendarIntervalType)
+    } else {
+      throw new ParseException("at least one time unit should be given for interval literal", ctx)
+    }
   }
 
   /**
-   * Create a [[CalendarInterval]] for a unit value pair. Two unit configuration types are
-   * supported:
-   * - Single unit.
-   * - From-To unit ('YEAR TO MONTH', 'DAY TO HOUR', 'DAY TO MINUTE', 'DAY TO SECOND',
-   * 'HOUR TO MINUTE', 'HOUR TO SECOND' and 'MINUTE TO SECOND' are supported).
+   * Creates a [[CalendarInterval]] with multiple unit value pairs, e.g. 1 YEAR 2 DAYS.
    */
-  override def visitIntervalField(ctx: IntervalFieldContext): CalendarInterval = withOrigin(ctx) {
-    import ctx._
-    val s = value.getText
-    try {
-      val unitText = unit.getText.toLowerCase(Locale.ROOT)
-      val interval = (unitText, Option(to).map(_.getText.toLowerCase(Locale.ROOT))) match {
-        case (u, None) if u.endsWith("s") =>
-          // Handle plural forms, e.g: yearS/monthS/weekS/dayS/hourS/minuteS/hourS/...
-          CalendarInterval.fromSingleUnitString(u.substring(0, u.length - 1), s)
-        case (u, None) =>
-          CalendarInterval.fromSingleUnitString(u, s)
-        case ("year", Some("month")) =>
-          CalendarInterval.fromYearMonthString(s)
-        case ("day", Some("hour")) =>
-          CalendarInterval.fromDayTimeString(s, "day", "hour")
-        case ("day", Some("minute")) =>
-          CalendarInterval.fromDayTimeString(s, "day", "minute")
-        case ("day", Some("second")) =>
-          CalendarInterval.fromDayTimeString(s, "day", "second")
-        case ("hour", Some("minute")) =>
-          CalendarInterval.fromDayTimeString(s, "hour", "minute")
-        case ("hour", Some("second")) =>
-          CalendarInterval.fromDayTimeString(s, "hour", "second")
-        case ("minute", Some("second")) =>
-          CalendarInterval.fromDayTimeString(s, "minute", "second")
-        case (from, Some(t)) =>
-          throw new ParseException(s"Intervals FROM $from TO $t are not supported.", ctx)
+  override def visitMultiUnitsInterval(ctx: MultiUnitsIntervalContext): CalendarInterval = {
+    withOrigin(ctx) {
+      val units = ctx.intervalUnit().asScala
+      val values = ctx.intervalValue().asScala
+      try {
+        assert(units.length == values.length)
+        val kvs = units.indices.map { i =>
+          val u = units(i).getText
+          val v = if (values(i).STRING() != null) {
+            string(values(i).STRING())
+          } else {
+            values(i).getText
+          }
+          UTF8String.fromString(" " + v + " " + u)
+        }
+        IntervalUtils.stringToInterval(UTF8String.concat(kvs: _*))
+      } catch {
+        case i: IllegalArgumentException =>
+          val e = new ParseException(i.getMessage, ctx)
+          e.setStackTrace(i.getStackTrace)
+          throw e
+      }
+    }
+  }
+
+  /**
+   * Creates a [[CalendarInterval]] with from-to unit, e.g. '2-1' YEAR TO MONTH.
+   */
+  override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = {
+    withOrigin(ctx) {
+      val value = Option(ctx.intervalValue.STRING).map(string).getOrElse {
+        throw new ParseException("The value of from-to unit must be a string", ctx.intervalValue)
+      }
+      try {
+        val from = ctx.from.getText.toLowerCase(Locale.ROOT)
+        val to = ctx.to.getText.toLowerCase(Locale.ROOT)
+        (from, to) match {
+          case ("year", "month") =>
+            IntervalUtils.fromYearMonthString(value)
+          case ("day", "hour") =>
+            IntervalUtils.fromDayTimeString(value, IntervalUnit.DAY, IntervalUnit.HOUR)
+          case ("day", "minute") =>
+            IntervalUtils.fromDayTimeString(value, IntervalUnit.DAY, IntervalUnit.MINUTE)
+          case ("day", "second") =>
+            IntervalUtils.fromDayTimeString(value, IntervalUnit.DAY, IntervalUnit.SECOND)
+          case ("hour", "minute") =>
+            IntervalUtils.fromDayTimeString(value, IntervalUnit.HOUR, IntervalUnit.MINUTE)
+          case ("hour", "second") =>
+            IntervalUtils.fromDayTimeString(value, IntervalUnit.HOUR, IntervalUnit.SECOND)
+          case ("minute", "second") =>
+            IntervalUtils.fromDayTimeString(value, IntervalUnit.MINUTE, IntervalUnit.SECOND)
+          case _ =>
+            throw new ParseException(s"Intervals FROM $from TO $to are not supported.", ctx)
+        }
+      } catch {
+        // Handle Exceptions thrown by CalendarInterval
+        case e: IllegalArgumentException =>
+          val pe = new ParseException(e.getMessage, ctx)
+          pe.setStackTrace(e.getStackTrace)
+          throw pe
       }
-      validate(interval != null, "No interval can be constructed", ctx)
-      interval
-    } catch {
-      // Handle Exceptions thrown by CalendarInterval
-      case e: IllegalArgumentException =>
-        val pe = new ParseException(e.getMessage, ctx)
-        pe.setStackTrace(e.getStackTrace)
-        throw pe
     }
   }
 
@@ -1977,17 +2171,18 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       case ("smallint" | "short", Nil) => ShortType
       case ("int" | "integer", Nil) => IntegerType
       case ("bigint" | "long", Nil) => LongType
-      case ("float", Nil) => FloatType
+      case ("float" | "real", Nil) => FloatType
       case ("double", Nil) => DoubleType
       case ("date", Nil) => DateType
       case ("timestamp", Nil) => TimestampType
       case ("string", Nil) => StringType
-      case ("char", length :: Nil) => CharType(length.getText.toInt)
+      case ("character" | "char", length :: Nil) => CharType(length.getText.toInt)
       case ("varchar", length :: Nil) => VarcharType(length.getText.toInt)
       case ("binary", Nil) => BinaryType
-      case ("decimal", Nil) => DecimalType.USER_DEFAULT
-      case ("decimal", precision :: Nil) => DecimalType(precision.getText.toInt, 0)
-      case ("decimal", precision :: scale :: Nil) =>
+      case ("decimal" | "dec" | "numeric", Nil) => DecimalType.USER_DEFAULT
+      case ("decimal" | "dec" | "numeric", precision :: Nil) =>
+        DecimalType(precision.getText.toInt, 0)
+      case ("decimal" | "dec" | "numeric", precision :: scale :: Nil) =>
         DecimalType(precision.getText.toInt, scale.getText.toInt)
       case ("interval", Nil) => CalendarIntervalType
       case (dt, params) =>
@@ -2032,9 +2227,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
 
     val builder = new MetadataBuilder
     // Add comment to metadata
-    if (STRING != null) {
-      builder.putString("comment", string(STRING))
+    Option(commentSpec()).map(visitCommentSpec).foreach {
+      builder.putString("comment", _)
     }
+
     // Add Hive type string to metadata.
     val rawDataType = typedVisit[DataType](ctx.dataType)
     val cleanedDataType = HiveStringType.replaceCharType(rawDataType)
@@ -2043,10 +2239,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
 
     StructField(
-      colName.getText,
-      cleanedDataType,
-      nullable = true,
-      builder.build())
+      name = colName.getText,
+      dataType = cleanedDataType,
+      nullable = NULL == null,
+      metadata = builder.build())
   }
 
   /**
@@ -2069,17 +2265,41 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitComplexColType(ctx: ComplexColTypeContext): StructField = withOrigin(ctx) {
     import ctx._
-    val structField = StructField(identifier.getText, typedVisit(dataType), nullable = true)
-    if (STRING == null) structField else structField.withComment(string(STRING))
+    val structField = StructField(
+      name = identifier.getText,
+      dataType = typedVisit(dataType()),
+      nullable = NULL == null)
+    Option(commentSpec).map(visitCommentSpec).map(structField.withComment).getOrElse(structField)
   }
 
   /**
-   * Create location string.
+   * Create a location string.
    */
   override def visitLocationSpec(ctx: LocationSpecContext): String = withOrigin(ctx) {
     string(ctx.STRING)
   }
 
+  /**
+   * Create an optional location string.
+   */
+  protected def visitLocationSpecList(ctx: java.util.List[LocationSpecContext]): Option[String] = {
+    ctx.asScala.headOption.map(visitLocationSpec)
+  }
+
+  /**
+   * Create a comment string.
+   */
+  override def visitCommentSpec(ctx: CommentSpecContext): String = withOrigin(ctx) {
+    string(ctx.STRING)
+  }
+
+  /**
+   * Create an optional comment string.
+   */
+  protected def visitCommentSpecList(ctx: java.util.List[CommentSpecContext]): Option[String] = {
+    ctx.asScala.headOption.map(visitCommentSpec)
+  }
+
   /**
    * Create a [[BucketSpec]].
    */
@@ -2177,6 +2397,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   type TableHeader = (Seq[String], Boolean, Boolean, Boolean)
 
+  /**
+   * Type to keep track of table clauses:
+   * (partitioning, bucketSpec, properties, options, location, comment).
+   */
+  type TableClauses = (Seq[Transform], Option[BucketSpec], Map[String, String],
+    Map[String, String], Option[String], Option[String])
+
   /**
    * Validate a create table statement and return the [[TableIdentifier]].
    */
@@ -2213,7 +2440,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitTransformList(ctx: TransformListContext): Seq[Transform] = withOrigin(ctx) {
     def getFieldReference(
         ctx: ApplyTransformContext,
-        arg: v2.expressions.Expression): FieldReference = {
+        arg: V2Expression): FieldReference = {
       lazy val name: String = ctx.identifier.getText
       arg match {
         case ref: FieldReference =>
@@ -2226,7 +2453,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
 
     def getSingleFieldReference(
         ctx: ApplyTransformContext,
-        arguments: Seq[v2.expressions.Expression]): FieldReference = {
+        arguments: Seq[V2Expression]): FieldReference = {
       lazy val name: String = ctx.identifier.getText
       if (arguments.size > 1) {
         throw new ParseException(s"Too many arguments for transform $name", ctx)
@@ -2283,7 +2510,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    * Parse an argument to a transform. An argument may be a field reference (qualified name) or
    * a value literal.
    */
-  override def visitTransformArgument(ctx: TransformArgumentContext): v2.expressions.Expression = {
+  override def visitTransformArgument(ctx: TransformArgumentContext): V2Expression = {
     withOrigin(ctx) {
       val reference = Option(ctx.qualifiedName)
           .map(typedVisit[Seq[String]])
@@ -2296,6 +2523,206 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
   }
 
+  private def cleanNamespaceProperties(
+      properties: Map[String, String],
+      ctx: ParserRuleContext): Map[String, String] = withOrigin(ctx) {
+    import SupportsNamespaces._
+    val legacyOn = conf.getConf(SQLConf.LEGACY_PROPERTY_NON_RESERVED)
+    properties.filter {
+      case (PROP_LOCATION, _) if !legacyOn =>
+        throw new ParseException(s"$PROP_LOCATION is a reserved namespace property, please use" +
+          s" the LOCATION clause to specify it.", ctx)
+      case (PROP_LOCATION, _) => false
+      case (PROP_OWNER, _) if !legacyOn =>
+        throw new ParseException(s"$PROP_OWNER is a reserved namespace property, it will be" +
+          s" set to the current user.", ctx)
+      case (PROP_OWNER, _) => false
+      case _ => true
+    }
+  }
+
+  /**
+   * Create a [[CreateNamespaceStatement]] command.
+   *
+   * For example:
+   * {{{
+   *   CREATE NAMESPACE [IF NOT EXISTS] ns1.ns2.ns3
+   *     create_namespace_clauses;
+   *
+   *   create_namespace_clauses (order insensitive):
+   *     [COMMENT namespace_comment]
+   *     [LOCATION path]
+   *     [WITH PROPERTIES (key1=val1, key2=val2, ...)]
+   * }}}
+   */
+  override def visitCreateNamespace(ctx: CreateNamespaceContext): LogicalPlan = withOrigin(ctx) {
+    import SupportsNamespaces._
+    checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
+    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
+    checkDuplicateClauses(ctx.PROPERTIES, "WITH PROPERTIES", ctx)
+    checkDuplicateClauses(ctx.DBPROPERTIES, "WITH DBPROPERTIES", ctx)
+
+    if (!ctx.PROPERTIES.isEmpty && !ctx.DBPROPERTIES.isEmpty) {
+      throw new ParseException(s"Either PROPERTIES or DBPROPERTIES is allowed.", ctx)
+    }
+
+    var properties = ctx.tablePropertyList.asScala.headOption
+      .map(visitPropertyKeyValues)
+      .getOrElse(Map.empty)
+
+    properties = cleanNamespaceProperties(properties, ctx)
+
+    visitCommentSpecList(ctx.commentSpec()).foreach {
+      properties += PROP_COMMENT -> _
+    }
+
+    visitLocationSpecList(ctx.locationSpec()).foreach {
+      properties += PROP_LOCATION -> _
+    }
+
+    CreateNamespaceStatement(
+      visitMultipartIdentifier(ctx.multipartIdentifier),
+      ctx.EXISTS != null,
+      properties)
+  }
+
+  /**
+   * Create a [[DropNamespace]] command.
+   *
+   * For example:
+   * {{{
+   *   DROP (DATABASE|SCHEMA|NAMESPACE) [IF EXISTS] ns1.ns2 [RESTRICT|CASCADE];
+   * }}}
+   */
+  override def visitDropNamespace(ctx: DropNamespaceContext): LogicalPlan = withOrigin(ctx) {
+    DropNamespace(
+      UnresolvedNamespace(visitMultipartIdentifier(ctx.multipartIdentifier)),
+      ctx.EXISTS != null,
+      ctx.CASCADE != null)
+  }
+
+  /**
+   * Create an [[AlterNamespaceSetProperties]] logical plan.
+   *
+   * For example:
+   * {{{
+   *   ALTER (DATABASE|SCHEMA|NAMESPACE) database
+   *   SET (DBPROPERTIES|PROPERTIES) (property_name=property_value, ...);
+   * }}}
+   */
+  override def visitSetNamespaceProperties(ctx: SetNamespacePropertiesContext): LogicalPlan = {
+    withOrigin(ctx) {
+      val properties = cleanNamespaceProperties(visitPropertyKeyValues(ctx.tablePropertyList), ctx)
+      AlterNamespaceSetProperties(
+        UnresolvedNamespace(visitMultipartIdentifier(ctx.multipartIdentifier)),
+        properties)
+    }
+  }
+
+  /**
+   * Create an [[AlterNamespaceSetLocation]] logical plan.
+   *
+   * For example:
+   * {{{
+   *   ALTER (DATABASE|SCHEMA|NAMESPACE) namespace SET LOCATION path;
+   * }}}
+   */
+  override def visitSetNamespaceLocation(ctx: SetNamespaceLocationContext): LogicalPlan = {
+    withOrigin(ctx) {
+      AlterNamespaceSetLocation(
+        UnresolvedNamespace(visitMultipartIdentifier(ctx.multipartIdentifier)),
+        visitLocationSpec(ctx.locationSpec))
+    }
+  }
+
+  /**
+   * Create a [[ShowNamespaces]] command.
+   */
+  override def visitShowNamespaces(ctx: ShowNamespacesContext): LogicalPlan = withOrigin(ctx) {
+    if (ctx.DATABASES != null && ctx.multipartIdentifier != null) {
+      throw new ParseException(s"FROM/IN operator is not allowed in SHOW DATABASES", ctx)
+    }
+
+    val multiPart = Option(ctx.multipartIdentifier).map(visitMultipartIdentifier)
+    ShowNamespaces(
+      UnresolvedNamespace(multiPart.getOrElse(Seq.empty[String])),
+      Option(ctx.pattern).map(string))
+  }
+
+  /**
+   * Create a [[DescribeNamespace]].
+   *
+   * For example:
+   * {{{
+   *   DESCRIBE (DATABASE|SCHEMA|NAMESPACE) [EXTENDED] database;
+   * }}}
+   */
+  override def visitDescribeNamespace(ctx: DescribeNamespaceContext): LogicalPlan =
+    withOrigin(ctx) {
+      DescribeNamespace(
+        UnresolvedNamespace(visitMultipartIdentifier(ctx.multipartIdentifier())),
+        ctx.EXTENDED != null)
+    }
+
+  def cleanTableProperties(
+      ctx: ParserRuleContext, properties: Map[String, String]): Map[String, String] = {
+    import TableCatalog._
+    val legacyOn = conf.getConf(SQLConf.LEGACY_PROPERTY_NON_RESERVED)
+    properties.filter {
+      case (PROP_PROVIDER, _) if !legacyOn =>
+        throw new ParseException(s"$PROP_PROVIDER is a reserved table property, please use" +
+          s" the USING clause to specify it.", ctx)
+      case (PROP_PROVIDER, _) => false
+      case (PROP_LOCATION, _) if !legacyOn =>
+        throw new ParseException(s"$PROP_LOCATION is a reserved table property, please use" +
+          s" the LOCATION clause to specify it.", ctx)
+      case (PROP_LOCATION, _) => false
+      case (PROP_OWNER, _) if !legacyOn =>
+        throw new ParseException(s"$PROP_OWNER is a reserved table property, it will be" +
+          s" set to the current user", ctx)
+      case (PROP_OWNER, _) => false
+      case _ => true
+    }
+  }
+
+  def cleanTableOptions(
+      ctx: ParserRuleContext,
+      options: Map[String, String],
+      location: Option[String]): (Map[String, String], Option[String]) = {
+    var path = location
+    val filtered = cleanTableProperties(ctx, options).filter {
+      case (k, v) if k.equalsIgnoreCase("path") && path.nonEmpty =>
+        throw new ParseException(s"Duplicated table paths found: '${path.get}' and '$v'. LOCATION" +
+          s" and the case insensitive key 'path' in OPTIONS are all used to indicate the custom" +
+          s" table path, you can only specify one of them.", ctx)
+      case (k, v) if k.equalsIgnoreCase("path") =>
+        path = Some(v)
+        false
+      case _ => true
+    }
+    (filtered, path)
+  }
+
+  override def visitCreateTableClauses(ctx: CreateTableClausesContext): TableClauses = {
+    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
+    checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
+    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
+    checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
+    checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
+    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
+
+    val partitioning: Seq[Transform] =
+      Option(ctx.partitioning).map(visitTransformList).getOrElse(Nil)
+    val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
+    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    val cleanedProperties = cleanTableProperties(ctx, properties)
+    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    val location = visitLocationSpecList(ctx.locationSpec())
+    val (cleanedOptions, newLocation) = cleanTableOptions(ctx, options, location)
+    val comment = visitCommentSpecList(ctx.commentSpec())
+    (partitioning, bucketSpec, cleanedProperties, cleanedOptions, newLocation, comment)
+  }
+
   /**
    * Create a table, returning a [[CreateTableStatement]] logical plan.
    *
@@ -2321,26 +2748,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
     val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
     if (external) {
-      operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx)
+      operationNotAllowed("CREATE EXTERNAL TABLE ...", ctx)
     }
-
-    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
-    checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
-    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
-    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
-    checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
-    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
-
     val schema = Option(ctx.colTypeList()).map(createSchema)
-    val partitioning: Seq[Transform] =
-      Option(ctx.partitioning).map(visitTransformList).getOrElse(Nil)
-    val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
-    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
-
-    val provider = ctx.tableProvider.qualifiedName.getText
-    val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
-    val comment = Option(ctx.comment).map(string)
+    val defaultProvider = conf.defaultDataSourceName
+    val provider =
+      Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse(defaultProvider)
+    val (partitioning, bucketSpec, properties, options, location, comment) =
+      visitCreateTableClauses(ctx.createTableClauses())
 
     Option(ctx.query).map(plan) match {
       case Some(_) if temp =>
@@ -2395,23 +2810,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       operationNotAllowed("REPLACE EXTERNAL TABLE ... USING", ctx)
     }
 
-    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
-    checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
-    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
-    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
-    checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
-    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
-
+    val (partitioning, bucketSpec, properties, options, location, comment) =
+      visitCreateTableClauses(ctx.createTableClauses())
     val schema = Option(ctx.colTypeList()).map(createSchema)
-    val partitioning: Seq[Transform] =
-      Option(ctx.partitioning).map(visitTransformList).getOrElse(Nil)
-    val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
-    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
-
-    val provider = ctx.tableProvider.qualifiedName.getText
-    val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
-    val comment = Option(ctx.comment).map(string)
+    val provider = ctx.tableProvider.multipartIdentifier.getText
     val orCreate = ctx.replaceTableHeader().CREATE() != null
 
     Option(ctx.query).map(plan) match {
@@ -2450,27 +2852,59 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create a [[ShowTablesStatement]] command.
+   * Create a [[UseStatement]] logical plan.
+   */
+  override def visitUse(ctx: UseContext): LogicalPlan = withOrigin(ctx) {
+    val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier)
+    UseStatement(ctx.NAMESPACE != null, nameParts)
+  }
+
+  /**
+   * Create a [[ShowCurrentNamespaceStatement]].
+   */
+  override def visitShowCurrentNamespace(
+      ctx: ShowCurrentNamespaceContext) : LogicalPlan = withOrigin(ctx) {
+    ShowCurrentNamespaceStatement()
+  }
+
+  /**
+   * Create a [[ShowTables]] command.
    */
   override def visitShowTables(ctx: ShowTablesContext): LogicalPlan = withOrigin(ctx) {
-    ShowTablesStatement(
-      Option(ctx.multipartIdentifier).map(visitMultipartIdentifier),
+    val multiPart = Option(ctx.multipartIdentifier).map(visitMultipartIdentifier)
+    ShowTables(
+      UnresolvedNamespace(multiPart.getOrElse(Seq.empty[String])),
       Option(ctx.pattern).map(string))
   }
 
+  /**
+   * Create a [[ShowTableStatement]] command.
+   */
+  override def visitShowTable(ctx: ShowTableContext): LogicalPlan = withOrigin(ctx) {
+    ShowTableStatement(
+      Option(ctx.ns).map(visitMultipartIdentifier),
+      string(ctx.pattern),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))
+  }
+
+  override def visitColPosition(ctx: ColPositionContext): ColumnPosition = {
+    ctx.position.getType match {
+      case SqlBaseParser.FIRST => ColumnPosition.first()
+      case SqlBaseParser.AFTER => ColumnPosition.after(ctx.afterCol.getText)
+    }
+  }
+
   /**
    * Parse new column info from ADD COLUMN into a QualifiedColType.
    */
   override def visitQualifiedColTypeWithPosition(
       ctx: QualifiedColTypeWithPositionContext): QualifiedColType = withOrigin(ctx) {
-    if (ctx.colPosition != null) {
-      operationNotAllowed("ALTER TABLE table ADD COLUMN ... FIRST | AFTER otherCol", ctx)
-    }
-
     QualifiedColType(
-      typedVisit[Seq[String]](ctx.name),
-      typedVisit[DataType](ctx.dataType),
-      Option(ctx.comment).map(string))
+      name = typedVisit[Seq[String]](ctx.name),
+      dataType = typedVisit[DataType](ctx.dataType),
+      nullable = ctx.NULL == null,
+      comment = Option(ctx.commentSpec()).map(visitCommentSpec),
+      position = Option(ctx.colPosition).map(typedVisit[ColumnPosition]))
   }
 
   /**
@@ -2500,37 +2934,121 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitRenameTableColumn(
       ctx: RenameTableColumnContext): LogicalPlan = withOrigin(ctx) {
     AlterTableRenameColumnStatement(
-      visitMultipartIdentifier(ctx.multipartIdentifier),
-      ctx.from.identifier.asScala.map(_.getText),
+      visitMultipartIdentifier(ctx.table),
+      ctx.from.parts.asScala.map(_.getText),
       ctx.to.getText)
   }
 
   /**
-   * Parse a [[AlterTableAlterColumnStatement]] command.
+   * Parse a [[AlterTableAlterColumnStatement]] command to alter a column's property.
    *
    * For example:
    * {{{
    *   ALTER TABLE table1 ALTER COLUMN a.b.c TYPE bigint
-   *   ALTER TABLE table1 ALTER COLUMN a.b.c TYPE bigint COMMENT 'new comment'
+   *   ALTER TABLE table1 ALTER COLUMN a.b.c SET NOT NULL
+   *   ALTER TABLE table1 ALTER COLUMN a.b.c DROP NOT NULL
    *   ALTER TABLE table1 ALTER COLUMN a.b.c COMMENT 'new comment'
+   *   ALTER TABLE table1 ALTER COLUMN a.b.c FIRST
+   *   ALTER TABLE table1 ALTER COLUMN a.b.c AFTER x
    * }}}
    */
-  override def visitAlterTableColumn(
-      ctx: AlterTableColumnContext): LogicalPlan = withOrigin(ctx) {
-    val verb = if (ctx.CHANGE != null) "CHANGE" else "ALTER"
-    if (ctx.colPosition != null) {
-      operationNotAllowed(s"ALTER TABLE table $verb COLUMN ... FIRST | AFTER otherCol", ctx)
+  override def visitAlterTableAlterColumn(
+      ctx: AlterTableAlterColumnContext): LogicalPlan = withOrigin(ctx) {
+    val action = ctx.alterColumnAction
+    if (action == null) {
+      val verb = if (ctx.CHANGE != null) "CHANGE" else "ALTER"
+      operationNotAllowed(
+        s"ALTER TABLE table $verb COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER",
+        ctx)
     }
 
-    if (ctx.dataType == null && ctx.comment == null) {
-      operationNotAllowed(s"ALTER TABLE table $verb COLUMN requires a TYPE or a COMMENT", ctx)
+    val dataType = if (action.dataType != null) {
+      Some(typedVisit[DataType](action.dataType))
+    } else {
+      None
+    }
+    val nullable = if (action.setOrDrop != null) {
+      action.setOrDrop.getType match {
+        case SqlBaseParser.SET => Some(false)
+        case SqlBaseParser.DROP => Some(true)
+      }
+    } else {
+      None
     }
+    val comment = if (action.commentSpec != null) {
+      Some(visitCommentSpec(action.commentSpec()))
+    } else {
+      None
+    }
+    val position = if (action.colPosition != null) {
+      Some(typedVisit[ColumnPosition](action.colPosition))
+    } else {
+      None
+    }
+
+    assert(Seq(dataType, nullable, comment, position).count(_.nonEmpty) == 1)
 
     AlterTableAlterColumnStatement(
+      visitMultipartIdentifier(ctx.table),
+      typedVisit[Seq[String]](ctx.column),
+      dataType = dataType,
+      nullable = nullable,
+      comment = comment,
+      position = position)
+  }
+
+  /**
+   * Parse a [[AlterTableAlterColumnStatement]] command. This is Hive SQL syntax.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table [PARTITION partition_spec]
+   *   CHANGE [COLUMN] column_old_name column_new_name column_dataType [COMMENT column_comment]
+   *   [FIRST | AFTER column_name];
+   * }}}
+   */
+  override def visitHiveChangeColumn(ctx: HiveChangeColumnContext): LogicalPlan = withOrigin(ctx) {
+    if (ctx.partitionSpec != null) {
+      operationNotAllowed("ALTER TABLE table PARTITION partition_spec CHANGE COLUMN", ctx)
+    }
+    val columnNameParts = typedVisit[Seq[String]](ctx.colName)
+    if (!conf.resolver(columnNameParts.last, ctx.colType().colName.getText)) {
+      throw new AnalysisException("Renaming column is not supported in Hive-style ALTER COLUMN, " +
+        "please run RENAME COLUMN instead.")
+    }
+    if (ctx.colType.NULL != null) {
+      throw new AnalysisException("NOT NULL is not supported in Hive-style ALTER COLUMN, " +
+        "please run ALTER COLUMN ... SET/DROP NOT NULL instead.")
+    }
+
+    AlterTableAlterColumnStatement(
+      typedVisit[Seq[String]](ctx.table),
+      columnNameParts,
+      dataType = Option(ctx.colType().dataType()).map(typedVisit[DataType]),
+      nullable = None,
+      comment = Option(ctx.colType().commentSpec()).map(visitCommentSpec),
+      position = Option(ctx.colPosition).map(typedVisit[ColumnPosition]))
+  }
+
+  override def visitHiveReplaceColumns(
+      ctx: HiveReplaceColumnsContext): LogicalPlan = withOrigin(ctx) {
+    if (ctx.partitionSpec != null) {
+      operationNotAllowed("ALTER TABLE table PARTITION partition_spec REPLACE COLUMNS", ctx)
+    }
+    AlterTableReplaceColumnsStatement(
       visitMultipartIdentifier(ctx.multipartIdentifier),
-      typedVisit[Seq[String]](ctx.qualifiedName),
-      Option(ctx.dataType).map(typedVisit[DataType]),
-      Option(ctx.comment).map(string))
+      ctx.columns.qualifiedColTypeWithPosition.asScala.map { colType =>
+        if (colType.NULL != null) {
+          throw new AnalysisException(
+            "NOT NULL is not supported in Hive-style REPLACE COLUMNS")
+        }
+        if (colType.colPosition != null) {
+          throw new AnalysisException(
+            "Column position is not supported in Hive-style REPLACE COLUMNS")
+        }
+        typedVisit[QualifiedColType](colType)
+      }
+    )
   }
 
   /**
@@ -2544,7 +3062,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    */
   override def visitDropTableColumns(
       ctx: DropTableColumnsContext): LogicalPlan = withOrigin(ctx) {
-    val columnsToDrop = ctx.columns.qualifiedName.asScala.map(typedVisit[Seq[String]])
+    val columnsToDrop = ctx.columns.multipartIdentifier.asScala.map(typedVisit[Seq[String]])
     AlterTableDropColumnsStatement(
       visitMultipartIdentifier(ctx.multipartIdentifier),
       columnsToDrop)
@@ -2555,18 +3073,19 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    *
    * For example:
    * {{{
-   *   ALTER TABLE table SET TBLPROPERTIES ('comment' = new_comment);
-   *   ALTER VIEW view SET TBLPROPERTIES ('comment' = new_comment);
+   *   ALTER TABLE table SET TBLPROPERTIES ('table_property' = 'property_value');
+   *   ALTER VIEW view SET TBLPROPERTIES ('table_property' = 'property_value');
    * }}}
    */
   override def visitSetTableProperties(
       ctx: SetTablePropertiesContext): LogicalPlan = withOrigin(ctx) {
     val identifier = visitMultipartIdentifier(ctx.multipartIdentifier)
     val properties = visitPropertyKeyValues(ctx.tablePropertyList)
+    val cleanedTableProperties = cleanTableProperties(ctx, properties)
     if (ctx.VIEW != null) {
-      AlterViewSetPropertiesStatement(identifier, properties)
+      AlterViewSetPropertiesStatement(identifier, cleanedTableProperties)
     } else {
-      AlterTableSetPropertiesStatement(identifier, properties)
+      AlterTableSetPropertiesStatement(identifier, cleanedTableProperties)
     }
   }
 
@@ -2583,11 +3102,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       ctx: UnsetTablePropertiesContext): LogicalPlan = withOrigin(ctx) {
     val identifier = visitMultipartIdentifier(ctx.multipartIdentifier)
     val properties = visitPropertyKeys(ctx.tablePropertyList)
+    val cleanedProperties = cleanTableProperties(ctx, properties.map(_ -> "").toMap).keys.toSeq
+
     val ifExists = ctx.EXISTS != null
     if (ctx.VIEW != null) {
-      AlterViewUnsetPropertiesStatement(identifier, properties, ifExists)
+      AlterViewUnsetPropertiesStatement(identifier, cleanedProperties, ifExists)
     } else {
-      AlterTableUnsetPropertiesStatement(identifier, properties, ifExists)
+      AlterTableUnsetPropertiesStatement(identifier, cleanedProperties, ifExists)
     }
   }
 
@@ -2602,13 +3123,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitSetTableLocation(ctx: SetTableLocationContext): LogicalPlan = withOrigin(ctx) {
     AlterTableSetLocationStatement(
       visitMultipartIdentifier(ctx.multipartIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec),
       visitLocationSpec(ctx.locationSpec))
   }
 
   /**
-   * Create a [[DescribeColumnStatement]] or [[DescribeTableStatement]] commands.
+   * Create a [[DescribeColumnStatement]] or [[DescribeRelation]] commands.
    */
-  override def visitDescribeTable(ctx: DescribeTableContext): LogicalPlan = withOrigin(ctx) {
+  override def visitDescribeRelation(ctx: DescribeRelationContext): LogicalPlan = withOrigin(ctx) {
     val isExtended = ctx.EXTENDED != null || ctx.FORMATTED != null
     if (ctx.describeColName != null) {
       if (ctx.partitionSpec != null) {
@@ -2630,10 +3152,483 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       } else {
         Map.empty[String, String]
       }
-      DescribeTableStatement(
-        visitMultipartIdentifier(ctx.multipartIdentifier()),
+      DescribeRelation(
+        UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())),
         partitionSpec,
         isExtended)
     }
   }
+
+  /**
+   * Create an [[AnalyzeTableStatement]], or an [[AnalyzeColumnStatement]].
+   * Example SQL for analyzing a table or a set of partitions :
+   * {{{
+   *   ANALYZE TABLE multi_part_name [PARTITION (partcol1[=val1], partcol2[=val2], ...)]
+   *   COMPUTE STATISTICS [NOSCAN];
+   * }}}
+   *
+   * Example SQL for analyzing columns :
+   * {{{
+   *   ANALYZE TABLE multi_part_name COMPUTE STATISTICS FOR COLUMNS column1, column2;
+   * }}}
+   *
+   * Example SQL for analyzing all columns of a table:
+   * {{{
+   *   ANALYZE TABLE multi_part_name COMPUTE STATISTICS FOR ALL COLUMNS;
+   * }}}
+   */
+  override def visitAnalyze(ctx: AnalyzeContext): LogicalPlan = withOrigin(ctx) {
+    def checkPartitionSpec(): Unit = {
+      if (ctx.partitionSpec != null) {
+        logWarning("Partition specification is ignored when collecting column statistics: " +
+          ctx.partitionSpec.getText)
+      }
+    }
+    if (ctx.identifier != null &&
+        ctx.identifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
+      throw new ParseException(s"Expected `NOSCAN` instead of `${ctx.identifier.getText}`", ctx)
+    }
+
+    val tableName = visitMultipartIdentifier(ctx.multipartIdentifier())
+    if (ctx.ALL() != null) {
+      checkPartitionSpec()
+      AnalyzeColumnStatement(tableName, None, allColumns = true)
+    } else if (ctx.identifierSeq() == null) {
+      val partitionSpec = if (ctx.partitionSpec != null) {
+        visitPartitionSpec(ctx.partitionSpec)
+      } else {
+        Map.empty[String, Option[String]]
+      }
+      AnalyzeTableStatement(tableName, partitionSpec, noScan = ctx.identifier != null)
+    } else {
+      checkPartitionSpec()
+      AnalyzeColumnStatement(
+        tableName, Option(visitIdentifierSeq(ctx.identifierSeq())), allColumns = false)
+    }
+  }
+
+  /**
+   * Create a [[RepairTableStatement]].
+   *
+   * For example:
+   * {{{
+   *   MSCK REPAIR TABLE multi_part_name
+   * }}}
+   */
+  override def visitRepairTable(ctx: RepairTableContext): LogicalPlan = withOrigin(ctx) {
+    RepairTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()))
+  }
+
+  /**
+   * Create a [[LoadDataStatement]].
+   *
+   * For example:
+   * {{{
+   *   LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE multi_part_name
+   *   [PARTITION (partcol1=val1, partcol2=val2 ...)]
+   * }}}
+   */
+  override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) {
+    LoadDataStatement(
+      tableName = visitMultipartIdentifier(ctx.multipartIdentifier),
+      path = string(ctx.path),
+      isLocal = ctx.LOCAL != null,
+      isOverwrite = ctx.OVERWRITE != null,
+      partition = Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)
+    )
+  }
+
+  /**
+   * Creates a [[ShowCreateTableStatement]]
+   */
+  override def visitShowCreateTable(ctx: ShowCreateTableContext): LogicalPlan = withOrigin(ctx) {
+    ShowCreateTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()), ctx.SERDE != null)
+  }
+
+  /**
+   * Create a [[CacheTableStatement]].
+   *
+   * For example:
+   * {{{
+   *   CACHE [LAZY] TABLE multi_part_name
+   *   [OPTIONS tablePropertyList] [[AS] query]
+   * }}}
+   */
+  override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+    val query = Option(ctx.query).map(plan)
+    val tableName = visitMultipartIdentifier(ctx.multipartIdentifier)
+    if (query.isDefined && tableName.length > 1) {
+      val catalogAndNamespace = tableName.init
+      throw new ParseException("It is not allowed to add catalog/namespace " +
+        s"prefix ${catalogAndNamespace.quoted} to " +
+        "the table name in CACHE TABLE AS SELECT", ctx)
+    }
+    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    CacheTableStatement(tableName, query, ctx.LAZY != null, options)
+  }
+
+  /**
+   * Create an [[UncacheTableStatement]] logical plan.
+   */
+  override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) {
+    UncacheTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier), ctx.EXISTS != null)
+  }
+
+  /**
+   * Create a [[TruncateTableStatement]] command.
+   *
+   * For example:
+   * {{{
+   *   TRUNCATE TABLE multi_part_name [PARTITION (partcol1=val1, partcol2=val2 ...)]
+   * }}}
+   */
+  override def visitTruncateTable(ctx: TruncateTableContext): LogicalPlan = withOrigin(ctx) {
+    TruncateTableStatement(
+      visitMultipartIdentifier(ctx.multipartIdentifier),
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))
+  }
+
+  /**
+   * A command for users to list the partition names of a table. If partition spec is specified,
+   * partitions that match the spec are returned. Otherwise an empty result set is returned.
+   *
+   * This function creates a [[ShowPartitionsStatement]] logical plan
+   *
+   * The syntax of using this command in SQL is:
+   * {{{
+   *   SHOW PARTITIONS multi_part_name [partition_spec];
+   * }}}
+   */
+  override def visitShowPartitions(ctx: ShowPartitionsContext): LogicalPlan = withOrigin(ctx) {
+    val table = visitMultipartIdentifier(ctx.multipartIdentifier)
+    val partitionKeys = Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)
+    ShowPartitionsStatement(table, partitionKeys)
+  }
+
+  /**
+   * Create a [[RefreshTableStatement]].
+   *
+   * For example:
+   * {{{
+   *   REFRESH TABLE multi_part_name
+   * }}}
+   */
+  override def visitRefreshTable(ctx: RefreshTableContext): LogicalPlan = withOrigin(ctx) {
+    RefreshTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()))
+  }
+
+  /**
+   * A command for users to list the column names for a table.
+   * This function creates a [[ShowColumnsStatement]] logical plan.
+   *
+   * The syntax of using this command in SQL is:
+   * {{{
+   *   SHOW COLUMNS (FROM | IN) tableName=multipartIdentifier
+   *        ((FROM | IN) namespace=multipartIdentifier)?
+   * }}}
+   */
+  override def visitShowColumns(ctx: ShowColumnsContext): LogicalPlan = withOrigin(ctx) {
+    val table = visitMultipartIdentifier(ctx.table)
+    val namespace = Option(ctx.ns).map(visitMultipartIdentifier)
+    ShowColumnsStatement(table, namespace)
+  }
+
+  /**
+   * Create an [[AlterTableRecoverPartitionsStatement]]
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE multi_part_name RECOVER PARTITIONS;
+   * }}}
+   */
+  override def visitRecoverPartitions(
+      ctx: RecoverPartitionsContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableRecoverPartitionsStatement(visitMultipartIdentifier(ctx.multipartIdentifier))
+  }
+
+  /**
+   * Create an [[AlterTableAddPartitionStatement]].
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE multi_part_name ADD [IF NOT EXISTS] PARTITION spec [LOCATION 'loc1']
+   *   ALTER VIEW multi_part_name ADD [IF NOT EXISTS] PARTITION spec
+   * }}}
+   *
+   * ALTER VIEW ... ADD PARTITION ... is not supported because the concept of partitioning
+   * is associated with physical tables
+   */
+  override def visitAddTablePartition(
+      ctx: AddTablePartitionContext): LogicalPlan = withOrigin(ctx) {
+    if (ctx.VIEW != null) {
+      operationNotAllowed("ALTER VIEW ... ADD PARTITION", ctx)
+    }
+    // Create partition spec to location mapping.
+    val specsAndLocs = ctx.partitionSpecLocation.asScala.map { splCtx =>
+      val spec = visitNonOptionalPartitionSpec(splCtx.partitionSpec)
+      val location = Option(splCtx.locationSpec).map(visitLocationSpec)
+      spec -> location
+    }
+    AlterTableAddPartitionStatement(
+      visitMultipartIdentifier(ctx.multipartIdentifier),
+      specsAndLocs,
+      ctx.EXISTS != null)
+  }
+
+  /**
+   * Create an [[AlterTableRenamePartitionStatement]]
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE multi_part_name PARTITION spec1 RENAME TO PARTITION spec2;
+   * }}}
+   */
+  override def visitRenameTablePartition(
+      ctx: RenameTablePartitionContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableRenamePartitionStatement(
+      visitMultipartIdentifier(ctx.multipartIdentifier),
+      visitNonOptionalPartitionSpec(ctx.from),
+      visitNonOptionalPartitionSpec(ctx.to))
+  }
+
+  /**
+   * Create an [[AlterTableDropPartitionStatement]]
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE multi_part_name DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...]
+   *     [PURGE];
+   *   ALTER VIEW view DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...];
+   * }}}
+   *
+   * ALTER VIEW ... DROP PARTITION ... is not supported because the concept of partitioning
+   * is associated with physical tables
+   */
+  override def visitDropTablePartitions(
+      ctx: DropTablePartitionsContext): LogicalPlan = withOrigin(ctx) {
+    if (ctx.VIEW != null) {
+      operationNotAllowed("ALTER VIEW ... DROP PARTITION", ctx)
+    }
+    AlterTableDropPartitionStatement(
+      visitMultipartIdentifier(ctx.multipartIdentifier),
+      ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec),
+      ifExists = ctx.EXISTS != null,
+      purge = ctx.PURGE != null,
+      retainData = false)
+  }
+
+  /**
+   * Create an [[AlterTableSerDePropertiesStatement]]
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE multi_part_name [PARTITION spec] SET SERDE serde_name
+   *     [WITH SERDEPROPERTIES props];
+   *   ALTER TABLE multi_part_name [PARTITION spec] SET SERDEPROPERTIES serde_properties;
+   * }}}
+   */
+  override def visitSetTableSerDe(ctx: SetTableSerDeContext): LogicalPlan = withOrigin(ctx) {
+    AlterTableSerDePropertiesStatement(
+      visitMultipartIdentifier(ctx.multipartIdentifier),
+      Option(ctx.STRING).map(string),
+      Option(ctx.tablePropertyList).map(visitPropertyKeyValues),
+      // TODO a partition spec is allowed to have optional values. This is currently violated.
+      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))
+  }
+
+  /**
+   * Create or replace a view. This creates a [[CreateViewStatement]]
+   *
+   * For example:
+   * {{{
+   *   CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [IF NOT EXISTS] multi_part_name
+   *   [(column_name [COMMENT column_comment], ...) ]
+   *   create_view_clauses
+   *
+   *   AS SELECT ...;
+   *
+   *   create_view_clauses (order insensitive):
+   *     [COMMENT view_comment]
+   *     [TBLPROPERTIES (property_name = property_value, ...)]
+   * }}}
+   */
+  override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) {
+    if (!ctx.identifierList.isEmpty) {
+      operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
+    }
+
+    checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
+    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED ON", ctx)
+    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
+
+    val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
+      icl.identifierComment.asScala.map { ic =>
+        ic.identifier.getText -> Option(ic.commentSpec()).map(visitCommentSpec)
+      }
+    }
+
+    val viewType = if (ctx.TEMPORARY == null) {
+      PersistedView
+    } else if (ctx.GLOBAL != null) {
+      GlobalTempView
+    } else {
+      LocalTempView
+    }
+    CreateViewStatement(
+      visitMultipartIdentifier(ctx.multipartIdentifier),
+      userSpecifiedColumns,
+      visitCommentSpecList(ctx.commentSpec()),
+      ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues)
+        .getOrElse(Map.empty),
+      Option(source(ctx.query)),
+      plan(ctx.query),
+      ctx.EXISTS != null,
+      ctx.REPLACE != null,
+      viewType)
+  }
+
+  /**
+   * Alter the query of a view. This creates a [[AlterViewAsStatement]]
+   *
+   * For example:
+   * {{{
+   *   ALTER VIEW multi_part_name AS SELECT ...;
+   * }}}
+   */
+  override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) {
+    AlterViewAsStatement(
+      visitMultipartIdentifier(ctx.multipartIdentifier),
+      originalText = source(ctx.query),
+      query = plan(ctx.query))
+  }
+
+  /**
+   * Create a [[RenameTableStatement]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE multi_part_name1 RENAME TO multi_part_name2;
+   *   ALTER VIEW multi_part_name1 RENAME TO multi_part_name2;
+   * }}}
+   */
+  override def visitRenameTable(ctx: RenameTableContext): LogicalPlan = withOrigin(ctx) {
+    RenameTableStatement(
+      visitMultipartIdentifier(ctx.from),
+      visitMultipartIdentifier(ctx.to),
+      ctx.VIEW != null)
+  }
+
+  /**
+   * A command for users to list the properties for a table. If propertyKey is specified, the value
+   * for the propertyKey is returned. If propertyKey is not specified, all the keys and their
+   * corresponding values are returned.
+   * The syntax of using this command in SQL is:
+   * {{{
+   *   SHOW TBLPROPERTIES multi_part_name[('propertyKey')];
+   * }}}
+   */
+  override def visitShowTblProperties(
+      ctx: ShowTblPropertiesContext): LogicalPlan = withOrigin(ctx) {
+    ShowTableProperties(
+      UnresolvedTable(visitMultipartIdentifier(ctx.table)),
+      Option(ctx.key).map(visitTablePropertyKey))
+  }
+
+  /**
+   * Create a plan for a DESCRIBE FUNCTION statement.
+   */
+  override def visitDescribeFunction(ctx: DescribeFunctionContext): LogicalPlan = withOrigin(ctx) {
+    import ctx._
+    val functionName =
+      if (describeFuncName.STRING() != null) {
+        Seq(string(describeFuncName.STRING()))
+      } else if (describeFuncName.qualifiedName() != null) {
+        visitQualifiedName(describeFuncName.qualifiedName)
+      } else {
+        Seq(describeFuncName.getText)
+      }
+    DescribeFunctionStatement(functionName, EXTENDED != null)
+  }
+
+  /**
+   * Create a plan for a SHOW FUNCTIONS command.
+   */
+  override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = withOrigin(ctx) {
+    val (userScope, systemScope) = Option(ctx.identifier)
+      .map(_.getText.toLowerCase(Locale.ROOT)) match {
+        case None | Some("all") => (true, true)
+        case Some("system") => (false, true)
+        case Some("user") => (true, false)
+        case Some(x) => throw new ParseException(s"SHOW $x FUNCTIONS not supported", ctx)
+    }
+    val pattern = Option(ctx.pattern).map(string(_))
+    val functionName = Option(ctx.multipartIdentifier).map(visitMultipartIdentifier)
+    ShowFunctionsStatement(userScope, systemScope, pattern, functionName)
+  }
+
+  /**
+   * Create a DROP FUNCTION statement.
+   *
+   * For example:
+   * {{{
+   *   DROP [TEMPORARY] FUNCTION [IF EXISTS] function;
+   * }}}
+   */
+  override def visitDropFunction(ctx: DropFunctionContext): LogicalPlan = withOrigin(ctx) {
+    val functionName = visitMultipartIdentifier(ctx.multipartIdentifier)
+    DropFunctionStatement(
+      functionName,
+      ctx.EXISTS != null,
+      ctx.TEMPORARY != null)
+  }
+
+  /**
+   * Create a CREATE FUNCTION statement.
+   *
+   * For example:
+   * {{{
+   *   CREATE [OR REPLACE] [TEMPORARY] FUNCTION [IF NOT EXISTS] [db_name.]function_name
+   *   AS class_name [USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri']];
+   * }}}
+   */
+  override def visitCreateFunction(ctx: CreateFunctionContext): LogicalPlan = withOrigin(ctx) {
+    val resources = ctx.resource.asScala.map { resource =>
+      val resourceType = resource.identifier.getText.toLowerCase(Locale.ROOT)
+      resourceType match {
+        case "jar" | "file" | "archive" =>
+          FunctionResource(FunctionResourceType.fromString(resourceType), string(resource.STRING))
+        case other =>
+          operationNotAllowed(s"CREATE FUNCTION with resource type '$resourceType'", ctx)
+      }
+    }
+
+    val functionIdentifier = visitMultipartIdentifier(ctx.multipartIdentifier)
+    CreateFunctionStatement(
+      functionIdentifier,
+      string(ctx.className),
+      resources,
+      ctx.TEMPORARY != null,
+      ctx.EXISTS != null,
+      ctx.REPLACE != null)
+  }
+
+  override def visitCommentNamespace(ctx: CommentNamespaceContext): LogicalPlan = withOrigin(ctx) {
+    val comment = ctx.comment.getType match {
+      case SqlBaseParser.NULL => ""
+      case _ => string(ctx.STRING)
+    }
+    val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier)
+    CommentOnNamespace(UnresolvedNamespace(nameParts), comment)
+  }
+
+  override def visitCommentTable(ctx: CommentTableContext): LogicalPlan = withOrigin(ctx) {
+    val comment = ctx.comment.getType match {
+      case SqlBaseParser.NULL => ""
+      case _ => string(ctx.STRING)
+    }
+    val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier)
+    CommentOnTable(UnresolvedTable(nameParts), comment)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
index 60d7361242c69..058136a213e9e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
@@ -84,7 +84,7 @@ object LegacyTypeStringParser extends RegexParsers {
   /**
    * Parses a string representation of a DataType.
    */
-  def parse(asString: String): DataType = parseAll(dataType, asString) match {
+  def parseString(asString: String): DataType = parseAll(dataType, asString) match {
     case Success(result, _) => result
     case failure: NoSuccess =>
       throw new IllegalArgumentException(s"Unsupported dataType: $asString, $failure")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 932e795f1d0bc..c96f2da9a5289 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types.{DataType, StructType}
 /**
  * Base SQL parsing infrastructure.
  */
-abstract class AbstractSqlParser extends ParserInterface with Logging {
+abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Logging {
 
   /** Creates/Resolves DataType for a given SQL string. */
   override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser =>
@@ -91,16 +91,20 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
     val lexer = new SqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command)))
     lexer.removeErrorListeners()
     lexer.addErrorListener(ParseErrorListener)
-    lexer.legacy_setops_precedence_enbled = SQLConf.get.setOpsPrecedenceEnforced
-    lexer.ansi = SQLConf.get.ansiParserEnabled
+    lexer.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
+    lexer.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
+    lexer.legacy_create_hive_table_by_default_enabled = conf.createHiveTableByDefaultEnabled
+    lexer.SQL_standard_keyword_behavior = conf.ansiEnabled
 
     val tokenStream = new CommonTokenStream(lexer)
     val parser = new SqlBaseParser(tokenStream)
     parser.addParseListener(PostProcessor)
     parser.removeErrorListeners()
     parser.addErrorListener(ParseErrorListener)
-    parser.legacy_setops_precedence_enbled = SQLConf.get.setOpsPrecedenceEnforced
-    parser.ansi = SQLConf.get.ansiParserEnabled
+    parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
+    parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
+    parser.legacy_create_hive_table_by_default_enabled = conf.createHiveTableByDefaultEnabled
+    parser.SQL_standard_keyword_behavior = conf.ansiEnabled
 
     try {
       try {
@@ -134,12 +138,12 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
 /**
  * Concrete SQL parser for Catalyst-only SQL statements.
  */
-class CatalystSqlParser(conf: SQLConf) extends AbstractSqlParser {
+class CatalystSqlParser(conf: SQLConf) extends AbstractSqlParser(conf) {
   val astBuilder = new AstBuilder(conf)
 }
 
 /** For test-only. */
-object CatalystSqlParser extends AbstractSqlParser {
+object CatalystSqlParser extends AbstractSqlParser(SQLConf.get) {
   val astBuilder = new AstBuilder(SQLConf.get)
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 89347f4b1f7bf..a3779698a5ac3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -116,7 +116,7 @@ object ParserUtils {
     var enclosure: Character = null
     val sb = new StringBuilder(b.length())
 
-    def appendEscapedChar(n: Char) {
+    def appendEscapedChar(n: Char): Unit = {
       n match {
         case '0' => sb.append('\u0000')
         case '\'' => sb.append('\'')
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index 51d2a73ea97b7..415ce46788119 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.planning
 
-import scala.collection.mutable
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
@@ -26,6 +24,30 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 
+trait OperationHelper {
+  type ReturnType = (Seq[NamedExpression], Seq[Expression], LogicalPlan)
+
+  protected def collectAliases(fields: Seq[Expression]): AttributeMap[Expression] =
+    AttributeMap(fields.collect {
+      case a: Alias => (a.toAttribute, a.child)
+    })
+
+  protected def substitute(aliases: AttributeMap[Expression])(expr: Expression): Expression = {
+    // use transformUp instead of transformDown to avoid dead loop
+    // in case of there's Alias whose exprId is the same as its child attribute.
+    expr.transformUp {
+      case a @ Alias(ref: AttributeReference, name) =>
+        aliases.get(ref)
+          .map(Alias(_, name)(a.exprId, a.qualifier))
+          .getOrElse(a)
+
+      case a: AttributeReference =>
+        aliases.get(a)
+          .map(Alias(_, a.name)(a.exprId, a.qualifier)).getOrElse(a)
+    }
+  }
+}
+
 /**
  * A pattern that matches any number of project or filter operations on top of another relational
  * operator.  All filter operators are collected and their conditions are broken up and returned
@@ -33,8 +55,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
  * [[org.apache.spark.sql.catalyst.expressions.Alias Aliases]] are in-lined/substituted if
  * necessary.
  */
-object PhysicalOperation extends PredicateHelper {
-  type ReturnType = (Seq[NamedExpression], Seq[Expression], LogicalPlan)
+object PhysicalOperation extends OperationHelper with PredicateHelper {
 
   def unapply(plan: LogicalPlan): Option[ReturnType] = {
     val (fields, filters, child, _) = collectProjectsAndFilters(plan)
@@ -56,7 +77,7 @@ object PhysicalOperation extends PredicateHelper {
    * }}}
    */
   private def collectProjectsAndFilters(plan: LogicalPlan):
-      (Option[Seq[NamedExpression]], Seq[Expression], LogicalPlan, Map[Attribute, Expression]) =
+      (Option[Seq[NamedExpression]], Seq[Expression], LogicalPlan, AttributeMap[Expression]) =
     plan match {
       case Project(fields, child) if fields.forall(_.deterministic) =>
         val (_, filters, other, aliases) = collectProjectsAndFilters(child)
@@ -72,23 +93,76 @@ object PhysicalOperation extends PredicateHelper {
         collectProjectsAndFilters(h.child)
 
       case other =>
-        (None, Nil, other, Map.empty)
+        (None, Nil, other, AttributeMap(Seq()))
     }
+}
 
-  private def collectAliases(fields: Seq[Expression]): Map[Attribute, Expression] = fields.collect {
-    case a @ Alias(child, _) => a.toAttribute -> child
-  }.toMap
+/**
+ * A variant of [[PhysicalOperation]]. It matches any number of project or filter
+ * operations even if they are non-deterministic, as long as they satisfy the
+ * requirement of CollapseProject and CombineFilters.
+ */
+object ScanOperation extends OperationHelper with PredicateHelper {
+  type ScanReturnType = Option[(Option[Seq[NamedExpression]],
+    Seq[Expression], LogicalPlan, AttributeMap[Expression])]
 
-  private def substitute(aliases: Map[Attribute, Expression])(expr: Expression): Expression = {
-    expr.transform {
-      case a @ Alias(ref: AttributeReference, name) =>
-        aliases.get(ref)
-          .map(Alias(_, name)(a.exprId, a.qualifier))
-          .getOrElse(a)
+  def unapply(plan: LogicalPlan): Option[ReturnType] = {
+    collectProjectsAndFilters(plan) match {
+      case Some((fields, filters, child, _)) =>
+        Some((fields.getOrElse(child.output), filters, child))
+      case None => None
+    }
+  }
 
-      case a: AttributeReference =>
-        aliases.get(a)
-          .map(Alias(_, a.name)(a.exprId, a.qualifier)).getOrElse(a)
+  private def hasCommonNonDeterministic(
+      expr: Seq[Expression],
+      aliases: AttributeMap[Expression]): Boolean = {
+    expr.exists(_.collect {
+      case a: AttributeReference if aliases.contains(a) => aliases(a)
+    }.exists(!_.deterministic))
+  }
+
+  private def collectProjectsAndFilters(plan: LogicalPlan): ScanReturnType = {
+    plan match {
+      case Project(fields, child) =>
+        collectProjectsAndFilters(child) match {
+          case Some((_, filters, other, aliases)) =>
+            // Follow CollapseProject and only keep going if the collected Projects
+            // do not have common non-deterministic expressions.
+            if (!hasCommonNonDeterministic(fields, aliases)) {
+              val substitutedFields =
+                fields.map(substitute(aliases)).asInstanceOf[Seq[NamedExpression]]
+              Some((Some(substitutedFields), filters, other, collectAliases(substitutedFields)))
+            } else {
+              None
+            }
+          case None => None
+        }
+
+      case Filter(condition, child) =>
+        collectProjectsAndFilters(child) match {
+          case Some((fields, filters, other, aliases)) =>
+            // Follow CombineFilters and only keep going if 1) the collected Filters
+            // and this filter are all deterministic or 2) if this filter is the first
+            // collected filter and doesn't have common non-deterministic expressions
+            // with lower Project.
+            val substitutedCondition = substitute(aliases)(condition)
+            val canCombineFilters = (filters.nonEmpty && filters.forall(_.deterministic) &&
+              substitutedCondition.deterministic) || filters.isEmpty
+            if (canCombineFilters && !hasCommonNonDeterministic(Seq(condition), aliases)) {
+              Some((fields, filters ++ splitConjunctivePredicates(substitutedCondition),
+                other, aliases))
+            } else {
+              None
+            }
+          case None => None
+        }
+
+      case h: ResolvedHint =>
+        collectProjectsAndFilters(h.child)
+
+      case other =>
+        Some((None, Nil, other, AttributeMap(Seq())))
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala
index ec5766e1f67f2..732c8ce2b5d98 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala
@@ -27,4 +27,8 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 trait Command extends LogicalPlan {
   override def output: Seq[Attribute] = Seq.empty
   override def children: Seq[LogicalPlan] = Seq.empty
+  // Commands are eagerly executed. They will be converted to LocalRelation after the DataFrame
+  // is created. That said, the statistics of a command is useless. Here we just return a dummy
+  // statistics to avoid unnecessary statistics calculation of command's children.
+  override def stats: Statistics = Statistics.DUMMY
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
index 8441c2c481ec5..b6bf7cd85d472 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -28,9 +29,7 @@ object EventTimeWatermark {
   val delayKey = "spark.watermarkDelayMs"
 
   def getDelayMs(delay: CalendarInterval): Long = {
-    // We define month as `31 days` to simplify calculation.
-    val millisPerMonth = TimeUnit.MICROSECONDS.toMillis(CalendarInterval.MICROS_PER_DAY) * 31
-    delay.milliseconds + delay.months * millisPerMonth
+    IntervalUtils.getDuration(delay, TimeUnit.MILLISECONDS)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala
index 4a28d879d1145..63348f766a5b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala
@@ -43,7 +43,9 @@ object PlanHelper {
         case e: WindowExpression
           if !plan.isInstanceOf[Window] => e
         case e: AggregateExpression
-          if !(plan.isInstanceOf[Aggregate] || plan.isInstanceOf[Window]) => e
+          if !(plan.isInstanceOf[Aggregate] ||
+               plan.isInstanceOf[Window] ||
+               plan.isInstanceOf[CollectMetrics]) => e
         case e: Generator
           if !plan.isInstanceOf[Generate] => e
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
index 1355003358b9f..4c4ec000d0930 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
@@ -62,11 +62,17 @@ trait ConstraintHelper {
    */
   def inferAdditionalConstraints(constraints: Set[Expression]): Set[Expression] = {
     var inferredConstraints = Set.empty[Expression]
-    constraints.foreach {
+    // IsNotNull should be constructed by `constructIsNotNullConstraints`.
+    val predicates = constraints.filterNot(_.isInstanceOf[IsNotNull])
+    predicates.foreach {
       case eq @ EqualTo(l: Attribute, r: Attribute) =>
-        val candidateConstraints = constraints - eq
+        val candidateConstraints = predicates - eq
         inferredConstraints ++= replaceConstraints(candidateConstraints, l, r)
         inferredConstraints ++= replaceConstraints(candidateConstraints, r, l)
+      case eq @ EqualTo(l @ Cast(_: Attribute, _, _), r: Attribute) =>
+        inferredConstraints ++= replaceConstraints(predicates - eq, r, l)
+      case eq @ EqualTo(l: Attribute, r @ Cast(_: Attribute, _, _)) =>
+        inferredConstraints ++= replaceConstraints(predicates - eq, l, r)
       case _ => // No inference
     }
     inferredConstraints -- constraints
@@ -75,7 +81,7 @@ trait ConstraintHelper {
   private def replaceConstraints(
       constraints: Set[Expression],
       source: Expression,
-      destination: Attribute): Set[Expression] = constraints.map(_ transform {
+      destination: Expression): Set[Expression] = constraints.map(_ transform {
     case e: Expression if e.semanticEquals(source) => destination
   })
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index c008d776e4794..49f89bed154bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -35,6 +35,9 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
+object Statistics {
+  val DUMMY = Statistics(Long.MaxValue)
+}
 
 /**
  * Estimates of various statistics.  The default estimation logic simply lazily multiplies the
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 0be61cf147041..54e5ff7aeb754 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -17,18 +17,17 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog, TableChange}
-import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, ColumnChange}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
+import scala.collection.mutable
+
 import org.apache.spark.sql.catalyst.AliasIdentifier
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
+import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
-import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.types._
 import org.apache.spark.util.random.RandomSampler
 
@@ -46,11 +45,19 @@ case class ReturnAnswer(child: LogicalPlan) extends UnaryNode {
 /**
  * This node is inserted at the top of a subquery when it is optimized. This makes sure we can
  * recognize a subquery as such, and it allows us to write subquery aware transformations.
+ *
+ * @param correlated flag that indicates the subquery is correlated, and will be rewritten into a
+ *                   join during analysis.
  */
-case class Subquery(child: LogicalPlan) extends OrderPreservingUnaryNode {
+case class Subquery(child: LogicalPlan, correlated: Boolean) extends OrderPreservingUnaryNode {
   override def output: Seq[Attribute] = child.output
 }
 
+object Subquery {
+  def fromExpression(s: SubqueryExpression): Subquery =
+    Subquery(s.plan, SubqueryExpression.hasCorrelatedSubquery(s))
+}
+
 case class Project(projectList: Seq[NamedExpression], child: LogicalPlan)
     extends OrderPreservingUnaryNode {
   override def output: Seq[Attribute] = projectList.map(_.toAttribute)
@@ -383,287 +390,6 @@ case class Join(
   }
 }
 
-/**
- * Base trait for DataSourceV2 write commands
- */
-trait V2WriteCommand extends Command {
-  def table: NamedRelation
-  def query: LogicalPlan
-
-  override def children: Seq[LogicalPlan] = Seq(query)
-
-  override lazy val resolved: Boolean = outputResolved
-
-  def outputResolved: Boolean = {
-    // If the table doesn't require schema match, we don't need to resolve the output columns.
-    table.skipSchemaResolution || {
-      table.resolved && query.resolved && query.output.size == table.output.size &&
-        query.output.zip(table.output).forall {
-          case (inAttr, outAttr) =>
-            // names and types must match, nullability must be compatible
-            inAttr.name == outAttr.name &&
-              DataType.equalsIgnoreCompatibleNullability(outAttr.dataType, inAttr.dataType) &&
-              (outAttr.nullable || !inAttr.nullable)
-        }
-    }
-  }
-}
-
-/**
- * Create a new table with a v2 catalog.
- */
-case class CreateV2Table(
-    catalog: TableCatalog,
-    tableName: Identifier,
-    tableSchema: StructType,
-    partitioning: Seq[Transform],
-    properties: Map[String, String],
-    ignoreIfExists: Boolean) extends Command with V2CreateTablePlan {
-  override def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan = {
-    this.copy(partitioning = rewritten)
-  }
-}
-
-/**
- * Create a new table from a select query with a v2 catalog.
- */
-case class CreateTableAsSelect(
-    catalog: TableCatalog,
-    tableName: Identifier,
-    partitioning: Seq[Transform],
-    query: LogicalPlan,
-    properties: Map[String, String],
-    writeOptions: Map[String, String],
-    ignoreIfExists: Boolean) extends Command with V2CreateTablePlan {
-
-  override def tableSchema: StructType = query.schema
-  override def children: Seq[LogicalPlan] = Seq(query)
-
-  override lazy val resolved: Boolean = childrenResolved && {
-    // the table schema is created from the query schema, so the only resolution needed is to check
-    // that the columns referenced by the table's partitioning exist in the query schema
-    val references = partitioning.flatMap(_.references).toSet
-    references.map(_.fieldNames).forall(query.schema.findNestedField(_).isDefined)
-  }
-
-  override def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan = {
-    this.copy(partitioning = rewritten)
-  }
-}
-
-/**
- * Replace a table with a v2 catalog.
- *
- * If the table does not exist, and orCreate is true, then it will be created.
- * If the table does not exist, and orCreate is false, then an exception will be thrown.
- *
- * The persisted table will have no contents as a result of this operation.
- */
-case class ReplaceTable(
-    catalog: TableCatalog,
-    tableName: Identifier,
-    tableSchema: StructType,
-    partitioning: Seq[Transform],
-    properties: Map[String, String],
-    orCreate: Boolean) extends Command with V2CreateTablePlan {
-  override def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan = {
-    this.copy(partitioning = rewritten)
-  }
-}
-
-/**
- * Replaces a table from a select query with a v2 catalog.
- *
- * If the table does not exist, and orCreate is true, then it will be created.
- * If the table does not exist, and orCreate is false, then an exception will be thrown.
- */
-case class ReplaceTableAsSelect(
-    catalog: TableCatalog,
-    tableName: Identifier,
-    partitioning: Seq[Transform],
-    query: LogicalPlan,
-    properties: Map[String, String],
-    writeOptions: Map[String, String],
-    orCreate: Boolean) extends Command with V2CreateTablePlan {
-
-  override def tableSchema: StructType = query.schema
-  override def children: Seq[LogicalPlan] = Seq(query)
-
-  override lazy val resolved: Boolean = {
-    // the table schema is created from the query schema, so the only resolution needed is to check
-    // that the columns referenced by the table's partitioning exist in the query schema
-    val references = partitioning.flatMap(_.references).toSet
-    references.map(_.fieldNames).forall(query.schema.findNestedField(_).isDefined)
-  }
-
-  override def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan = {
-    this.copy(partitioning = rewritten)
-  }
-}
-
-/**
- * Append data to an existing table.
- */
-case class AppendData(
-    table: NamedRelation,
-    query: LogicalPlan,
-    isByName: Boolean) extends V2WriteCommand
-
-object AppendData {
-  def byName(table: NamedRelation, df: LogicalPlan): AppendData = {
-    new AppendData(table, df, isByName = true)
-  }
-
-  def byPosition(table: NamedRelation, query: LogicalPlan): AppendData = {
-    new AppendData(table, query, isByName = false)
-  }
-}
-
-/**
- * Overwrite data matching a filter in an existing table.
- */
-case class OverwriteByExpression(
-    table: NamedRelation,
-    deleteExpr: Expression,
-    query: LogicalPlan,
-    isByName: Boolean) extends V2WriteCommand {
-  override lazy val resolved: Boolean = outputResolved && deleteExpr.resolved
-}
-
-object OverwriteByExpression {
-  def byName(
-      table: NamedRelation, df: LogicalPlan, deleteExpr: Expression): OverwriteByExpression = {
-    OverwriteByExpression(table, deleteExpr, df, isByName = true)
-  }
-
-  def byPosition(
-      table: NamedRelation, query: LogicalPlan, deleteExpr: Expression): OverwriteByExpression = {
-    OverwriteByExpression(table, deleteExpr, query, isByName = false)
-  }
-}
-
-/**
- * Dynamically overwrite partitions in an existing table.
- */
-case class OverwritePartitionsDynamic(
-    table: NamedRelation,
-    query: LogicalPlan,
-    isByName: Boolean) extends V2WriteCommand
-
-object OverwritePartitionsDynamic {
-  def byName(table: NamedRelation, df: LogicalPlan): OverwritePartitionsDynamic = {
-    OverwritePartitionsDynamic(table, df, isByName = true)
-  }
-
-  def byPosition(table: NamedRelation, query: LogicalPlan): OverwritePartitionsDynamic = {
-    OverwritePartitionsDynamic(table, query, isByName = false)
-  }
-}
-
-case class DescribeTable(table: NamedRelation, isExtended: Boolean) extends Command {
-
-  override def children: Seq[LogicalPlan] = Seq(table)
-
-  override val output = DescribeTableSchema.describeTableAttributes()
-}
-
-case class DeleteFromTable(
-    child: LogicalPlan,
-    condition: Expression) extends Command {
-
-  override def children: Seq[LogicalPlan] = child :: Nil
-}
-
-/**
- * Drop a table.
- */
-case class DropTable(
-    catalog: TableCatalog,
-    ident: Identifier,
-    ifExists: Boolean) extends Command
-
-/**
- * Alter a table.
- */
-case class AlterTable(
-    catalog: TableCatalog,
-    ident: Identifier,
-    table: NamedRelation,
-    changes: Seq[TableChange]) extends Command {
-
-  override def children: Seq[LogicalPlan] = Seq(table)
-
-  override lazy val resolved: Boolean = childrenResolved && {
-    changes.forall {
-      case add: AddColumn =>
-        add.fieldNames match {
-          case Array(_) =>
-            // a top-level field can always be added
-            true
-          case _ =>
-            // the parent field must exist
-            table.schema.findNestedField(add.fieldNames.init, includeCollections = true).isDefined
-        }
-
-      case colChange: ColumnChange =>
-        // the column that will be changed must exist
-        table.schema.findNestedField(colChange.fieldNames, includeCollections = true).isDefined
-
-      case _ =>
-        // property changes require no resolution checks
-        true
-    }
-  }
-}
-
-/**
- * The logical plan of the SHOW TABLE command that works for v2 catalogs.
- */
-case class ShowTables(
-    catalog: TableCatalog,
-    namespace: Seq[String],
-    pattern: Option[String]) extends Command {
-  override val output: Seq[Attribute] = Seq(
-    AttributeReference("namespace", StringType, nullable = false)(),
-    AttributeReference("tableName", StringType, nullable = false)())
-}
-
-/**
- * Insert some data into a table. Note that this plan is unresolved and has to be replaced by the
- * concrete implementations during analysis.
- *
- * @param table the logical plan representing the table. In the future this should be a
- *              [[org.apache.spark.sql.catalyst.catalog.CatalogTable]] once we converge Hive tables
- *              and data source tables.
- * @param partition a map from the partition key to the partition value (optional). If the partition
- *                  value is optional, dynamic partition insert will be performed.
- *                  As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have
- *                  Map('a' -> Some('1'), 'b' -> Some('2')),
- *                  and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
- *                  would have Map('a' -> Some('1'), 'b' -> None).
- * @param query the logical plan representing data to write to.
- * @param overwrite overwrite existing table or partitions.
- * @param ifPartitionNotExists If true, only write if the partition does not exist.
- *                             Only valid for static partitions.
- */
-case class InsertIntoTable(
-    table: LogicalPlan,
-    partition: Map[String, Option[String]],
-    query: LogicalPlan,
-    overwrite: Boolean,
-    ifPartitionNotExists: Boolean)
-  extends LogicalPlan {
-  // IF NOT EXISTS is only valid in INSERT OVERWRITE
-  assert(overwrite || !ifPartitionNotExists)
-  // IF NOT EXISTS is only valid in static partitions
-  assert(partition.values.forall(_.nonEmpty) || !ifPartitionNotExists)
-
-  // We don't want `table` in children as sometimes we don't want to transform it.
-  override def children: Seq[LogicalPlan] = query :: Nil
-  override def output: Seq[Attribute] = Seq.empty
-  override lazy val resolved: Boolean = false
-}
-
 /**
  * Insert query result into a directory.
  *
@@ -915,11 +641,14 @@ object Expand {
     child: LogicalPlan): Expand = {
     val attrMap = groupByAttrs.zipWithIndex.toMap
 
+    val hasDuplicateGroupingSets = groupingSetsAttrs.size !=
+      groupingSetsAttrs.map(_.map(_.exprId).toSet).distinct.size
+
     // Create an array of Projections for the child projection, and replace the projections'
     // expressions which equal GroupBy expressions with Literal(null), if those expressions
     // are not set for this grouping set.
-    val projections = groupingSetsAttrs.map { groupingSetAttrs =>
-      child.output ++ groupByAttrs.map { attr =>
+    val projections = groupingSetsAttrs.zipWithIndex.map { case (groupingSetAttrs, i) =>
+      val projAttrs = child.output ++ groupByAttrs.map { attr =>
         if (!groupingSetAttrs.contains(attr)) {
           // if the input attribute in the Invalid Grouping Expression set of for this group
           // replace it with constant null
@@ -929,11 +658,25 @@ object Expand {
         }
       // groupingId is the last output, here we use the bit mask as the concrete value for it.
       } :+ Literal.create(buildBitmask(groupingSetAttrs, attrMap), IntegerType)
+
+      if (hasDuplicateGroupingSets) {
+        // If `groupingSetsAttrs` has duplicate entries (e.g., GROUPING SETS ((key), (key))),
+        // we add one more virtual grouping attribute (`_gen_grouping_pos`) to avoid
+        // wrongly grouping rows with the same grouping ID.
+        projAttrs :+ Literal.create(i, IntegerType)
+      } else {
+        projAttrs
+      }
     }
 
     // the `groupByAttrs` has different meaning in `Expand.output`, it could be the original
     // grouping expression or null, so here we create new instance of it.
-    val output = child.output ++ groupByAttrs.map(_.newInstance) :+ gid
+    val output = if (hasDuplicateGroupingSets) {
+      val gpos = AttributeReference("_gen_grouping_pos", IntegerType, false)()
+      child.output ++ groupByAttrs.map(_.newInstance) :+ gid :+ gpos
+    } else {
+      child.output ++ groupByAttrs.map(_.newInstance) :+ gid
+    }
     Expand(projections, output, Project(child.output ++ groupByAliases, child))
   }
 }
@@ -954,6 +697,8 @@ case class Expand(
   override lazy val references: AttributeSet =
     AttributeSet(projections.flatten.flatMap(_.references))
 
+  override def producedAttributes: AttributeSet = AttributeSet(output diff child.output)
+
   // This operator can reuse attributes (for example making them null when doing a roll up) so
   // the constraints of the child may no longer be valid.
   override protected lazy val validConstraints: Set[Expression] = Set.empty[Expression]
@@ -1082,21 +827,41 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends OrderPr
   }
 }
 
+/**
+ * This is similar with [[Limit]] except:
+ *
+ * - It does not have plans for global/local separately because currently there is only single
+ *   implementation which initially mimics both global/local tails. See
+ *   `org.apache.spark.sql.execution.CollectTailExec` and
+ *   `org.apache.spark.sql.execution.CollectLimitExec`
+ *
+ * - Currently, this plan can only be a root node.
+ */
+case class Tail(limitExpr: Expression, child: LogicalPlan) extends OrderPreservingUnaryNode {
+  override def output: Seq[Attribute] = child.output
+  override def maxRows: Option[Long] = {
+    limitExpr match {
+      case IntegerLiteral(limit) => Some(limit)
+      case _ => None
+    }
+  }
+}
+
 /**
  * Aliased subquery.
  *
- * @param name the alias identifier for this subquery.
+ * @param identifier the alias identifier for this subquery.
  * @param child the logical plan of this subquery.
  */
 case class SubqueryAlias(
-    name: AliasIdentifier,
+    identifier: AliasIdentifier,
     child: LogicalPlan)
   extends OrderPreservingUnaryNode {
 
-  def alias: String = name.identifier
+  def alias: String = identifier.name
 
   override def output: Seq[Attribute] = {
-    val qualifierList = name.database.map(Seq(_, alias)).getOrElse(Seq(alias))
+    val qualifierList = identifier.qualifier :+ alias
     child.output.map(_.withQualifier(qualifierList))
   }
   override def doCanonicalize(): LogicalPlan = child.canonicalized
@@ -1113,7 +878,13 @@ object SubqueryAlias {
       identifier: String,
       database: String,
       child: LogicalPlan): SubqueryAlias = {
-    SubqueryAlias(AliasIdentifier(identifier, Some(database)), child)
+    SubqueryAlias(AliasIdentifier(identifier, Seq(database)), child)
+  }
+
+  def apply(
+      multipartIdentifier: Seq[String],
+      child: LogicalPlan): SubqueryAlias = {
+    SubqueryAlias(AliasIdentifier(multipartIdentifier.last, multipartIdentifier.init), child)
   }
 }
 /**
@@ -1238,15 +1009,30 @@ case class Deduplicate(
   override def output: Seq[Attribute] = child.output
 }
 
-/** A trait used for logical plan nodes that create or replace V2 table definitions. */
-trait V2CreateTablePlan extends LogicalPlan {
-  def tableName: Identifier
-  def partitioning: Seq[Transform]
-  def tableSchema: StructType
+/**
+ * A trait to represent the commands that support subqueries.
+ * This is used to whitelist such commands in the subquery-related checks.
+ */
+trait SupportsSubquery extends LogicalPlan
 
-  /**
-   * Creates a copy of this node with the new partitoning transforms. This method is used to
-   * rewrite the partition transforms normalized according to the table schema.
-   */
-  def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan
+/**
+ * Collect arbitrary (named) metrics from a dataset. As soon as the query reaches a completion
+ * point (batch query completes or streaming query epoch completes) an event is emitted on the
+ * driver which can be observed by attaching a listener to the spark session. The metrics are named
+ * so we can collect metrics at multiple places in a single dataset.
+ *
+ * This node behaves like a global aggregate. All the metrics collected must be aggregate functions
+ * or be literals.
+ */
+case class CollectMetrics(
+    name: String,
+    metrics: Seq[NamedExpression],
+    child: LogicalPlan)
+  extends UnaryNode {
+
+  override lazy val resolved: Boolean = {
+    name.nonEmpty && metrics.nonEmpty && metrics.forall(_.resolved) && childrenResolved
+  }
+
+  override def output: Seq[Attribute] = child.output
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index 20c15947d16cc..f26e5662ee856 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -159,6 +159,14 @@ case object SHUFFLE_REPLICATE_NL extends JoinStrategyHint {
     "SHUFFLE_REPLICATE_NL")
 }
 
+/**
+ * An internal hint to discourage broadcast hash join, used by adaptive query execution.
+ */
+case object NO_BROADCAST_HASH extends JoinStrategyHint {
+  override def displayName: String = "no_broadcast_hash"
+  override def hintAliases: Set[String] = Set.empty
+}
+
 /**
  * The callback for implementing customized strategies of handling hint errors.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
index dc2185194d84e..c4f741cd2cec8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression, PythonUDF}
 
 /**
- * FlatMap groups using an udf: pandas.Dataframe -> pandas.DataFrame.
+ * FlatMap groups using a udf: pandas.Dataframe -> pandas.DataFrame.
  * This is used by DataFrame.groupby().apply().
  */
 case class FlatMapGroupsInPandas(
@@ -40,7 +40,7 @@ case class FlatMapGroupsInPandas(
 }
 
 /**
- * Map partitions using an udf: iter(pandas.Dataframe) -> iter(pandas.DataFrame).
+ * Map partitions using a udf: iter(pandas.Dataframe) -> iter(pandas.DataFrame).
  * This is used by DataFrame.mapInPandas()
  */
 case class MapInPandas(
@@ -51,6 +51,21 @@ case class MapInPandas(
   override val producedAttributes = AttributeSet(output)
 }
 
+/**
+ * Flatmap cogroups using a udf: pandas.Dataframe, pandas.Dataframe -> pandas.Dataframe
+ * This is used by DataFrame.groupby().cogroup().apply().
+ */
+case class FlatMapCoGroupsInPandas(
+    leftAttributes: Seq[Attribute],
+    rightAttributes: Seq[Attribute],
+    functionExpr: Expression,
+    output: Seq[Attribute],
+    left: LogicalPlan,
+    right: LogicalPlan) extends BinaryNode {
+
+  override val producedAttributes = AttributeSet(output)
+}
+
 trait BaseEvalPython extends UnaryNode {
 
   def udfs: Seq[PythonUDF]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterTableStatements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterTableStatements.scala
deleted file mode 100644
index 8c1b54be46cf6..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterTableStatements.scala
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical.sql
-
-import org.apache.spark.sql.types.DataType
-
-/**
- * Column data as parsed by ALTER TABLE ... ADD COLUMNS.
- */
-case class QualifiedColType(name: Seq[String], dataType: DataType, comment: Option[String])
-
-trait AlterTableStatement extends ParsedStatement {
-  val tableName: Seq[String]
-}
-
-/**
- * ALTER TABLE ... ADD COLUMNS command, as parsed from SQL.
- */
-case class AlterTableAddColumnsStatement(
-    tableName: Seq[String],
-    columnsToAdd: Seq[QualifiedColType]) extends AlterTableStatement
-
-/**
- * ALTER TABLE ... CHANGE COLUMN command, as parsed from SQL.
- */
-case class AlterTableAlterColumnStatement(
-    tableName: Seq[String],
-    column: Seq[String],
-    dataType: Option[DataType],
-    comment: Option[String]) extends AlterTableStatement
-
-/**
- * ALTER TABLE ... RENAME COLUMN command, as parsed from SQL.
- */
-case class AlterTableRenameColumnStatement(
-    tableName: Seq[String],
-    column: Seq[String],
-    newName: String) extends AlterTableStatement
-
-/**
- * ALTER TABLE ... DROP COLUMNS command, as parsed from SQL.
- */
-case class AlterTableDropColumnsStatement(
-    tableName: Seq[String],
-    columnsToDrop: Seq[Seq[String]]) extends AlterTableStatement
-
-/**
- * ALTER TABLE ... SET TBLPROPERTIES command, as parsed from SQL.
- */
-case class AlterTableSetPropertiesStatement(
-    tableName: Seq[String],
-    properties: Map[String, String]) extends AlterTableStatement
-
-/**
- * ALTER TABLE ... UNSET TBLPROPERTIES command, as parsed from SQL.
- */
-case class AlterTableUnsetPropertiesStatement(
-    tableName: Seq[String],
-    propertyKeys: Seq[String],
-    ifExists: Boolean) extends AlterTableStatement
-
-/**
- * ALTER TABLE ... SET LOCATION command, as parsed from SQL.
- */
-case class AlterTableSetLocationStatement(
-    tableName: Seq[String],
-    location: String) extends AlterTableStatement
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
deleted file mode 100644
index 190711303e32d..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical.sql
-
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.catalyst.catalog.BucketSpec
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.types.StructType
-
-/**
- * A CREATE TABLE command, as parsed from SQL.
- *
- * This is a metadata-only command and is not used to write data to the created table.
- */
-case class CreateTableStatement(
-    tableName: Seq[String],
-    tableSchema: StructType,
-    partitioning: Seq[Transform],
-    bucketSpec: Option[BucketSpec],
-    properties: Map[String, String],
-    provider: String,
-    options: Map[String, String],
-    location: Option[String],
-    comment: Option[String],
-    ifNotExists: Boolean) extends ParsedStatement
-
-/**
- * A CREATE TABLE AS SELECT command, as parsed from SQL.
- */
-case class CreateTableAsSelectStatement(
-    tableName: Seq[String],
-    asSelect: LogicalPlan,
-    partitioning: Seq[Transform],
-    bucketSpec: Option[BucketSpec],
-    properties: Map[String, String],
-    provider: String,
-    options: Map[String, String],
-    location: Option[String],
-    comment: Option[String],
-    ifNotExists: Boolean) extends ParsedStatement {
-
-  override def children: Seq[LogicalPlan] = Seq(asSelect)
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/InsertIntoStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/InsertIntoStatement.scala
deleted file mode 100644
index c4210eabe26a3..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/InsertIntoStatement.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical.sql
-
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-/**
- * An INSERT INTO statement, as parsed from SQL.
- *
- * @param table                the logical plan representing the table.
- * @param query                the logical plan representing data to write to.
- * @param overwrite            overwrite existing table or partitions.
- * @param partitionSpec        a map from the partition key to the partition value (optional).
- *                             If the value is missing, dynamic partition insert will be performed.
- *                             As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS` would have
- *                             Map('a' -> Some('1'), 'b' -> Some('2')),
- *                             and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
- *                             would have Map('a' -> Some('1'), 'b' -> None).
- * @param ifPartitionNotExists If true, only write if the partition does not exist.
- *                             Only valid for static partitions.
- */
-case class InsertIntoStatement(
-    table: LogicalPlan,
-    partitionSpec: Map[String, Option[String]],
-    query: LogicalPlan,
-    overwrite: Boolean,
-    ifPartitionNotExists: Boolean) extends ParsedStatement {
-
-  require(overwrite || !ifPartitionNotExists,
-    "IF NOT EXISTS is only valid in INSERT OVERWRITE")
-  require(partitionSpec.values.forall(_.nonEmpty) || !ifPartitionNotExists,
-    "IF NOT EXISTS is only valid with static partitions")
-
-  override def children: Seq[LogicalPlan] = query :: Nil
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ParsedStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ParsedStatement.scala
deleted file mode 100644
index 23fc009fecdc2..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ParsedStatement.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical.sql
-
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-/**
- * A logical plan node that contains exactly what was parsed from SQL.
- *
- * This is used to hold information parsed from SQL when there are multiple implementations of a
- * query or command. For example, CREATE TABLE may be implemented by different nodes for v1 and v2.
- * Instead of parsing directly to a v1 CreateTable that keeps metadata in CatalogTable, and then
- * converting that v1 metadata to the v2 equivalent, the sql [[CreateTableStatement]] plan is
- * produced by the parser and converted once into both implementations.
- *
- * Parsed logical plans are not resolved because they must be converted to concrete logical plans.
- *
- * Parsed logical plans are located in Catalyst so that as much SQL parsing logic as possible is be
- * kept in a [[org.apache.spark.sql.catalyst.parser.AbstractSqlParser]].
- */
-private[sql] abstract class ParsedStatement extends LogicalPlan {
-  // Redact properties and options when parsed nodes are used by generic methods like toString
-  override def productIterator: Iterator[Any] = super.productIterator.map {
-    case mapArg: Map[_, _] => conf.redactOptions(mapArg)
-    case other => other
-  }
-
-  override def output: Seq[Attribute] = Seq.empty
-
-  override def children: Seq[LogicalPlan] = Seq.empty
-
-  final override lazy val resolved = false
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ReplaceTableStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ReplaceTableStatement.scala
deleted file mode 100644
index 2808892b089b9..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ReplaceTableStatement.scala
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.plans.logical.sql
-
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.catalyst.catalog.BucketSpec
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.types.StructType
-
-/**
- * A REPLACE TABLE command, as parsed from SQL.
- *
- * If the table exists prior to running this command, executing this statement
- * will replace the table's metadata and clear the underlying rows from the table.
- */
-case class ReplaceTableStatement(
-    tableName: Seq[String],
-    tableSchema: StructType,
-    partitioning: Seq[Transform],
-    bucketSpec: Option[BucketSpec],
-    properties: Map[String, String],
-    provider: String,
-    options: Map[String, String],
-    location: Option[String],
-    comment: Option[String],
-    orCreate: Boolean) extends ParsedStatement
-
-/**
- * A REPLACE TABLE AS SELECT command, as parsed from SQL.
- */
-case class ReplaceTableAsSelectStatement(
-    tableName: Seq[String],
-    asSelect: LogicalPlan,
-    partitioning: Seq[Transform],
-    bucketSpec: Option[BucketSpec],
-    properties: Map[String, String],
-    provider: String,
-    options: Map[String, String],
-    location: Option[String],
-    comment: Option[String],
-    orCreate: Boolean) extends ParsedStatement {
-
-  override def children: Seq[LogicalPlan] = Seq(asSelect)
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
new file mode 100644
index 0000000000000..6731214d3842d
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -0,0 +1,480 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.analysis.ViewType
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, FunctionResource}
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * A logical plan node that contains exactly what was parsed from SQL.
+ *
+ * This is used to hold information parsed from SQL when there are multiple implementations of a
+ * query or command. For example, CREATE TABLE may be implemented by different nodes for v1 and v2.
+ * Instead of parsing directly to a v1 CreateTable that keeps metadata in CatalogTable, and then
+ * converting that v1 metadata to the v2 equivalent, the sql [[CreateTableStatement]] plan is
+ * produced by the parser and converted once into both implementations.
+ *
+ * Parsed logical plans are not resolved because they must be converted to concrete logical plans.
+ *
+ * Parsed logical plans are located in Catalyst so that as much SQL parsing logic as possible is be
+ * kept in a [[org.apache.spark.sql.catalyst.parser.AbstractSqlParser]].
+ */
+abstract class ParsedStatement extends LogicalPlan {
+  // Redact properties and options when parsed nodes are used by generic methods like toString
+  override def productIterator: Iterator[Any] = super.productIterator.map {
+    case mapArg: Map[_, _] => conf.redactOptions(mapArg)
+    case other => other
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq.empty
+
+  final override lazy val resolved = false
+}
+
+/**
+ * A CREATE TABLE command, as parsed from SQL.
+ *
+ * This is a metadata-only command and is not used to write data to the created table.
+ */
+case class CreateTableStatement(
+    tableName: Seq[String],
+    tableSchema: StructType,
+    partitioning: Seq[Transform],
+    bucketSpec: Option[BucketSpec],
+    properties: Map[String, String],
+    provider: String,
+    options: Map[String, String],
+    location: Option[String],
+    comment: Option[String],
+    ifNotExists: Boolean) extends ParsedStatement
+
+/**
+ * A CREATE TABLE AS SELECT command, as parsed from SQL.
+ */
+case class CreateTableAsSelectStatement(
+    tableName: Seq[String],
+    asSelect: LogicalPlan,
+    partitioning: Seq[Transform],
+    bucketSpec: Option[BucketSpec],
+    properties: Map[String, String],
+    provider: String,
+    options: Map[String, String],
+    location: Option[String],
+    comment: Option[String],
+    ifNotExists: Boolean) extends ParsedStatement {
+
+  override def children: Seq[LogicalPlan] = Seq(asSelect)
+}
+
+/**
+ * A CREATE VIEW statement, as parsed from SQL.
+ */
+case class CreateViewStatement(
+    viewName: Seq[String],
+    userSpecifiedColumns: Seq[(String, Option[String])],
+    comment: Option[String],
+    properties: Map[String, String],
+    originalText: Option[String],
+    child: LogicalPlan,
+    allowExisting: Boolean,
+    replace: Boolean,
+    viewType: ViewType) extends ParsedStatement
+
+/**
+ * A REPLACE TABLE command, as parsed from SQL.
+ *
+ * If the table exists prior to running this command, executing this statement
+ * will replace the table's metadata and clear the underlying rows from the table.
+ */
+case class ReplaceTableStatement(
+    tableName: Seq[String],
+    tableSchema: StructType,
+    partitioning: Seq[Transform],
+    bucketSpec: Option[BucketSpec],
+    properties: Map[String, String],
+    provider: String,
+    options: Map[String, String],
+    location: Option[String],
+    comment: Option[String],
+    orCreate: Boolean) extends ParsedStatement
+
+/**
+ * A REPLACE TABLE AS SELECT command, as parsed from SQL.
+ */
+case class ReplaceTableAsSelectStatement(
+    tableName: Seq[String],
+    asSelect: LogicalPlan,
+    partitioning: Seq[Transform],
+    bucketSpec: Option[BucketSpec],
+    properties: Map[String, String],
+    provider: String,
+    options: Map[String, String],
+    location: Option[String],
+    comment: Option[String],
+    orCreate: Boolean) extends ParsedStatement {
+
+  override def children: Seq[LogicalPlan] = Seq(asSelect)
+}
+
+
+/**
+ * Column data as parsed by ALTER TABLE ... ADD COLUMNS.
+ */
+case class QualifiedColType(
+    name: Seq[String],
+    dataType: DataType,
+    nullable: Boolean,
+    comment: Option[String],
+    position: Option[ColumnPosition])
+
+/**
+ * ALTER TABLE ... ADD COLUMNS command, as parsed from SQL.
+ */
+case class AlterTableAddColumnsStatement(
+    tableName: Seq[String],
+    columnsToAdd: Seq[QualifiedColType]) extends ParsedStatement
+
+case class AlterTableReplaceColumnsStatement(
+    tableName: Seq[String],
+    columnsToAdd: Seq[QualifiedColType]) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... CHANGE COLUMN command, as parsed from SQL.
+ */
+case class AlterTableAlterColumnStatement(
+    tableName: Seq[String],
+    column: Seq[String],
+    dataType: Option[DataType],
+    nullable: Option[Boolean],
+    comment: Option[String],
+    position: Option[ColumnPosition]) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... RENAME COLUMN command, as parsed from SQL.
+ */
+case class AlterTableRenameColumnStatement(
+    tableName: Seq[String],
+    column: Seq[String],
+    newName: String) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... DROP COLUMNS command, as parsed from SQL.
+ */
+case class AlterTableDropColumnsStatement(
+    tableName: Seq[String],
+    columnsToDrop: Seq[Seq[String]]) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... SET TBLPROPERTIES command, as parsed from SQL.
+ */
+case class AlterTableSetPropertiesStatement(
+    tableName: Seq[String],
+    properties: Map[String, String]) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... UNSET TBLPROPERTIES command, as parsed from SQL.
+ */
+case class AlterTableUnsetPropertiesStatement(
+    tableName: Seq[String],
+    propertyKeys: Seq[String],
+    ifExists: Boolean) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... SET LOCATION command, as parsed from SQL.
+ */
+case class AlterTableSetLocationStatement(
+    tableName: Seq[String],
+    partitionSpec: Option[TablePartitionSpec],
+    location: String) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... RECOVER PARTITIONS command, as parsed from SQL.
+ */
+case class AlterTableRecoverPartitionsStatement(
+    tableName: Seq[String]) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... ADD PARTITION command, as parsed from SQL
+ */
+case class AlterTableAddPartitionStatement(
+    tableName: Seq[String],
+    partitionSpecsAndLocs: Seq[(TablePartitionSpec, Option[String])],
+    ifNotExists: Boolean) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... RENAME PARTITION command, as parsed from SQL.
+ */
+case class AlterTableRenamePartitionStatement(
+    tableName: Seq[String],
+    from: TablePartitionSpec,
+    to: TablePartitionSpec) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... DROP PARTITION command, as parsed from SQL
+ */
+case class AlterTableDropPartitionStatement(
+    tableName: Seq[String],
+    specs: Seq[TablePartitionSpec],
+    ifExists: Boolean,
+    purge: Boolean,
+    retainData: Boolean) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... SERDEPROPERTIES command, as parsed from SQL
+ */
+case class AlterTableSerDePropertiesStatement(
+    tableName: Seq[String],
+    serdeClassName: Option[String],
+    serdeProperties: Option[Map[String, String]],
+    partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
+
+/**
+ * ALTER VIEW ... SET TBLPROPERTIES command, as parsed from SQL.
+ */
+case class AlterViewSetPropertiesStatement(
+    viewName: Seq[String],
+    properties: Map[String, String]) extends ParsedStatement
+
+/**
+ * ALTER VIEW ... UNSET TBLPROPERTIES command, as parsed from SQL.
+ */
+case class AlterViewUnsetPropertiesStatement(
+    viewName: Seq[String],
+    propertyKeys: Seq[String],
+    ifExists: Boolean) extends ParsedStatement
+
+/**
+ * ALTER VIEW ... Query command, as parsed from SQL.
+ */
+case class AlterViewAsStatement(
+    viewName: Seq[String],
+    originalText: String,
+    query: LogicalPlan) extends ParsedStatement
+
+/**
+ * ALTER TABLE ... RENAME TO command, as parsed from SQL.
+ */
+case class RenameTableStatement(
+    oldName: Seq[String],
+    newName: Seq[String],
+    isView: Boolean) extends ParsedStatement
+
+/**
+ * A DROP TABLE statement, as parsed from SQL.
+ */
+case class DropTableStatement(
+    tableName: Seq[String],
+    ifExists: Boolean,
+    purge: Boolean) extends ParsedStatement
+
+/**
+ * A DROP VIEW statement, as parsed from SQL.
+ */
+case class DropViewStatement(
+    viewName: Seq[String],
+    ifExists: Boolean) extends ParsedStatement
+
+/**
+ * A DESCRIBE TABLE tbl_name col_name statement, as parsed from SQL.
+ */
+case class DescribeColumnStatement(
+    tableName: Seq[String],
+    colNameParts: Seq[String],
+    isExtended: Boolean) extends ParsedStatement
+
+/**
+ * An INSERT INTO statement, as parsed from SQL.
+ *
+ * @param table                the logical plan representing the table.
+ * @param query                the logical plan representing data to write to.
+ * @param overwrite            overwrite existing table or partitions.
+ * @param partitionSpec        a map from the partition key to the partition value (optional).
+ *                             If the value is missing, dynamic partition insert will be performed.
+ *                             As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS` would have
+ *                             Map('a' -> Some('1'), 'b' -> Some('2')),
+ *                             and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
+ *                             would have Map('a' -> Some('1'), 'b' -> None).
+ * @param ifPartitionNotExists If true, only write if the partition does not exist.
+ *                             Only valid for static partitions.
+ */
+case class InsertIntoStatement(
+    table: LogicalPlan,
+    partitionSpec: Map[String, Option[String]],
+    query: LogicalPlan,
+    overwrite: Boolean,
+    ifPartitionNotExists: Boolean) extends ParsedStatement {
+
+  require(overwrite || !ifPartitionNotExists,
+    "IF NOT EXISTS is only valid in INSERT OVERWRITE")
+  require(partitionSpec.values.forall(_.nonEmpty) || !ifPartitionNotExists,
+    "IF NOT EXISTS is only valid with static partitions")
+
+  override def children: Seq[LogicalPlan] = query :: Nil
+}
+
+/**
+ * A SHOW TABLE EXTENDED statement, as parsed from SQL.
+ */
+case class ShowTableStatement(
+    namespace: Option[Seq[String]],
+    pattern: String,
+    partitionSpec: Option[TablePartitionSpec])
+  extends ParsedStatement
+
+/**
+ * A CREATE NAMESPACE statement, as parsed from SQL.
+ */
+case class CreateNamespaceStatement(
+    namespace: Seq[String],
+    ifNotExists: Boolean,
+    properties: Map[String, String]) extends ParsedStatement
+
+/**
+ * A USE statement, as parsed from SQL.
+ */
+case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends ParsedStatement
+
+/**
+ * An ANALYZE TABLE statement, as parsed from SQL.
+ */
+case class AnalyzeTableStatement(
+    tableName: Seq[String],
+    partitionSpec: Map[String, Option[String]],
+    noScan: Boolean) extends ParsedStatement
+
+/**
+ * An ANALYZE TABLE FOR COLUMNS statement, as parsed from SQL.
+ */
+case class AnalyzeColumnStatement(
+    tableName: Seq[String],
+    columnNames: Option[Seq[String]],
+    allColumns: Boolean) extends ParsedStatement {
+  require(columnNames.isDefined ^ allColumns, "Parameter `columnNames` or `allColumns` are " +
+    "mutually exclusive. Only one of them should be specified.")
+}
+
+/**
+ * A REPAIR TABLE statement, as parsed from SQL
+ */
+case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement
+
+/**
+ * A LOAD DATA INTO TABLE statement, as parsed from SQL
+ */
+case class LoadDataStatement(
+    tableName: Seq[String],
+    path: String,
+    isLocal: Boolean,
+    isOverwrite: Boolean,
+    partition: Option[TablePartitionSpec]) extends ParsedStatement
+
+/**
+ * A SHOW CREATE TABLE statement, as parsed from SQL.
+ */
+case class ShowCreateTableStatement(
+    tableName: Seq[String],
+    asSerde: Boolean = false) extends ParsedStatement
+
+/**
+ * A CACHE TABLE statement, as parsed from SQL
+ */
+case class CacheTableStatement(
+    tableName: Seq[String],
+    plan: Option[LogicalPlan],
+    isLazy: Boolean,
+    options: Map[String, String]) extends ParsedStatement
+
+/**
+ * An UNCACHE TABLE statement, as parsed from SQL
+ */
+case class UncacheTableStatement(
+    tableName: Seq[String],
+    ifExists: Boolean) extends ParsedStatement
+
+/**
+ * A TRUNCATE TABLE statement, as parsed from SQL
+ */
+case class TruncateTableStatement(
+    tableName: Seq[String],
+    partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
+
+/**
+ * A SHOW PARTITIONS statement, as parsed from SQL
+ */
+case class ShowPartitionsStatement(
+    tableName: Seq[String],
+    partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
+
+/**
+ * A REFRESH TABLE statement, as parsed from SQL
+ */
+case class RefreshTableStatement(tableName: Seq[String]) extends ParsedStatement
+
+/**
+ * A SHOW COLUMNS statement, as parsed from SQL
+ */
+case class ShowColumnsStatement(
+    table: Seq[String],
+    namespace: Option[Seq[String]]) extends ParsedStatement
+
+/**
+ * A SHOW CURRENT NAMESPACE statement, as parsed from SQL
+ */
+case class ShowCurrentNamespaceStatement() extends ParsedStatement
+
+/**
+ * A DESCRIBE FUNCTION statement, as parsed from SQL
+ */
+case class DescribeFunctionStatement(
+    functionName: Seq[String],
+    isExtended: Boolean) extends ParsedStatement
+
+/**
+ *  SHOW FUNCTIONS statement, as parsed from SQL
+ */
+case class ShowFunctionsStatement(
+    userScope: Boolean,
+    systemScope: Boolean,
+    pattern: Option[String],
+    functionName: Option[Seq[String]]) extends ParsedStatement
+
+/**
+ *  DROP FUNCTION statement, as parsed from SQL
+ */
+case class DropFunctionStatement(
+    functionName: Seq[String],
+    ifExists: Boolean,
+    isTemp: Boolean) extends ParsedStatement
+
+/**
+ *  CREATE FUNCTION statement, as parsed from SQL
+ */
+case class CreateFunctionStatement(
+    functionName: Seq[String],
+    className: String,
+    resources: Seq[FunctionResource],
+    isTemp: Boolean,
+    ignoreIfExists: Boolean,
+    replace: Boolean) extends ParsedStatement
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
index 11d2f024c13a0..e1dbef9ebeede 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
@@ -63,7 +63,7 @@ object EstimationUtils {
     }
   }
 
-  def ceil(bigDecimal: BigDecimal): BigInt = bigDecimal.setScale(0, RoundingMode.CEILING).toBigInt()
+  def ceil(bigDecimal: BigDecimal): BigInt = bigDecimal.setScale(0, RoundingMode.CEILING).toBigInt
 
   /** Get column stats for output attributes. */
   def getOutputMap(inputMap: AttributeMap[ColumnStat], output: Seq[Attribute])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
new file mode 100644
index 0000000000000..c04e56355a68f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -0,0 +1,502 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.analysis.{NamedRelation, UnresolvedException}
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, Unevaluable}
+import org.apache.spark.sql.catalyst.plans.DescribeTableSchema
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.types.{DataType, MetadataBuilder, StringType, StructType}
+
+/**
+ * Base trait for DataSourceV2 write commands
+ */
+trait V2WriteCommand extends Command {
+  def table: NamedRelation
+  def query: LogicalPlan
+
+  override def children: Seq[LogicalPlan] = Seq(query)
+
+  override lazy val resolved: Boolean = outputResolved
+
+  def outputResolved: Boolean = {
+    // If the table doesn't require schema match, we don't need to resolve the output columns.
+    table.skipSchemaResolution || {
+      table.resolved && query.resolved && query.output.size == table.output.size &&
+        query.output.zip(table.output).forall {
+          case (inAttr, outAttr) =>
+            // names and types must match, nullability must be compatible
+            inAttr.name == outAttr.name &&
+              DataType.equalsIgnoreCompatibleNullability(outAttr.dataType, inAttr.dataType) &&
+              (outAttr.nullable || !inAttr.nullable)
+        }
+    }
+  }
+}
+
+/**
+ * Append data to an existing table.
+ */
+case class AppendData(
+    table: NamedRelation,
+    query: LogicalPlan,
+    writeOptions: Map[String, String],
+    isByName: Boolean) extends V2WriteCommand
+
+object AppendData {
+  def byName(
+      table: NamedRelation,
+      df: LogicalPlan,
+      writeOptions: Map[String, String] = Map.empty): AppendData = {
+    new AppendData(table, df, writeOptions, isByName = true)
+  }
+
+  def byPosition(
+      table: NamedRelation,
+      query: LogicalPlan,
+      writeOptions: Map[String, String] = Map.empty): AppendData = {
+    new AppendData(table, query, writeOptions, isByName = false)
+  }
+}
+
+/**
+ * Overwrite data matching a filter in an existing table.
+ */
+case class OverwriteByExpression(
+    table: NamedRelation,
+    deleteExpr: Expression,
+    query: LogicalPlan,
+    writeOptions: Map[String, String],
+    isByName: Boolean) extends V2WriteCommand {
+  override lazy val resolved: Boolean = outputResolved && deleteExpr.resolved
+}
+
+object OverwriteByExpression {
+  def byName(
+      table: NamedRelation,
+      df: LogicalPlan,
+      deleteExpr: Expression,
+      writeOptions: Map[String, String] = Map.empty): OverwriteByExpression = {
+    OverwriteByExpression(table, deleteExpr, df, writeOptions, isByName = true)
+  }
+
+  def byPosition(
+      table: NamedRelation,
+      query: LogicalPlan,
+      deleteExpr: Expression,
+      writeOptions: Map[String, String] = Map.empty): OverwriteByExpression = {
+    OverwriteByExpression(table, deleteExpr, query, writeOptions, isByName = false)
+  }
+}
+
+/**
+ * Dynamically overwrite partitions in an existing table.
+ */
+case class OverwritePartitionsDynamic(
+    table: NamedRelation,
+    query: LogicalPlan,
+    writeOptions: Map[String, String],
+    isByName: Boolean) extends V2WriteCommand
+
+object OverwritePartitionsDynamic {
+  def byName(
+      table: NamedRelation,
+      df: LogicalPlan,
+      writeOptions: Map[String, String] = Map.empty): OverwritePartitionsDynamic = {
+    OverwritePartitionsDynamic(table, df, writeOptions, isByName = true)
+  }
+
+  def byPosition(
+      table: NamedRelation,
+      query: LogicalPlan,
+      writeOptions: Map[String, String] = Map.empty): OverwritePartitionsDynamic = {
+    OverwritePartitionsDynamic(table, query, writeOptions, isByName = false)
+  }
+}
+
+
+/** A trait used for logical plan nodes that create or replace V2 table definitions. */
+trait V2CreateTablePlan extends LogicalPlan {
+  def tableName: Identifier
+  def partitioning: Seq[Transform]
+  def tableSchema: StructType
+
+  /**
+   * Creates a copy of this node with the new partitioning transforms. This method is used to
+   * rewrite the partition transforms normalized according to the table schema.
+   */
+  def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan
+}
+
+/**
+ * Create a new table with a v2 catalog.
+ */
+case class CreateV2Table(
+    catalog: TableCatalog,
+    tableName: Identifier,
+    tableSchema: StructType,
+    partitioning: Seq[Transform],
+    properties: Map[String, String],
+    ignoreIfExists: Boolean) extends Command with V2CreateTablePlan {
+  override def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan = {
+    this.copy(partitioning = rewritten)
+  }
+}
+
+/**
+ * Create a new table from a select query with a v2 catalog.
+ */
+case class CreateTableAsSelect(
+    catalog: TableCatalog,
+    tableName: Identifier,
+    partitioning: Seq[Transform],
+    query: LogicalPlan,
+    properties: Map[String, String],
+    writeOptions: Map[String, String],
+    ignoreIfExists: Boolean) extends Command with V2CreateTablePlan {
+
+  override def tableSchema: StructType = query.schema
+  override def children: Seq[LogicalPlan] = Seq(query)
+
+  override lazy val resolved: Boolean = childrenResolved && {
+    // the table schema is created from the query schema, so the only resolution needed is to check
+    // that the columns referenced by the table's partitioning exist in the query schema
+    val references = partitioning.flatMap(_.references).toSet
+    references.map(_.fieldNames).forall(query.schema.findNestedField(_).isDefined)
+  }
+
+  override def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan = {
+    this.copy(partitioning = rewritten)
+  }
+}
+
+/**
+ * Replace a table with a v2 catalog.
+ *
+ * If the table does not exist, and orCreate is true, then it will be created.
+ * If the table does not exist, and orCreate is false, then an exception will be thrown.
+ *
+ * The persisted table will have no contents as a result of this operation.
+ */
+case class ReplaceTable(
+    catalog: TableCatalog,
+    tableName: Identifier,
+    tableSchema: StructType,
+    partitioning: Seq[Transform],
+    properties: Map[String, String],
+    orCreate: Boolean) extends Command with V2CreateTablePlan {
+  override def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan = {
+    this.copy(partitioning = rewritten)
+  }
+}
+
+/**
+ * Replaces a table from a select query with a v2 catalog.
+ *
+ * If the table does not exist, and orCreate is true, then it will be created.
+ * If the table does not exist, and orCreate is false, then an exception will be thrown.
+ */
+case class ReplaceTableAsSelect(
+    catalog: TableCatalog,
+    tableName: Identifier,
+    partitioning: Seq[Transform],
+    query: LogicalPlan,
+    properties: Map[String, String],
+    writeOptions: Map[String, String],
+    orCreate: Boolean) extends Command with V2CreateTablePlan {
+
+  override def tableSchema: StructType = query.schema
+  override def children: Seq[LogicalPlan] = Seq(query)
+
+  override lazy val resolved: Boolean = childrenResolved && {
+    // the table schema is created from the query schema, so the only resolution needed is to check
+    // that the columns referenced by the table's partitioning exist in the query schema
+    val references = partitioning.flatMap(_.references).toSet
+    references.map(_.fieldNames).forall(query.schema.findNestedField(_).isDefined)
+  }
+
+  override def withPartitioning(rewritten: Seq[Transform]): V2CreateTablePlan = {
+    this.copy(partitioning = rewritten)
+  }
+}
+
+/**
+ * The logical plan of the CREATE NAMESPACE command that works for v2 catalogs.
+ */
+case class CreateNamespace(
+    catalog: SupportsNamespaces,
+    namespace: Seq[String],
+    ifNotExists: Boolean,
+    properties: Map[String, String]) extends Command
+
+/**
+ * The logical plan of the DROP NAMESPACE command that works for v2 catalogs.
+ */
+case class DropNamespace(
+    namespace: LogicalPlan,
+    ifExists: Boolean,
+    cascade: Boolean) extends Command {
+  override def children: Seq[LogicalPlan] = Seq(namespace)
+}
+
+/**
+ * The logical plan of the DESCRIBE NAMESPACE command that works for v2 catalogs.
+ */
+case class DescribeNamespace(
+    namespace: LogicalPlan,
+    extended: Boolean) extends Command {
+  override def children: Seq[LogicalPlan] = Seq(namespace)
+
+  override def output: Seq[Attribute] = Seq(
+    AttributeReference("name", StringType, nullable = false,
+      new MetadataBuilder().putString("comment", "name of the column").build())(),
+    AttributeReference("value", StringType, nullable = true,
+      new MetadataBuilder().putString("comment", "value of the column").build())())
+}
+
+/**
+ * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET (DBPROPERTIES|PROPERTIES)
+ * command that works for v2 catalogs.
+ */
+case class AlterNamespaceSetProperties(
+    namespace: LogicalPlan,
+    properties: Map[String, String]) extends Command {
+  override def children: Seq[LogicalPlan] = Seq(namespace)
+}
+
+/**
+ * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET LOCATION
+ * command that works for v2 catalogs.
+ */
+case class AlterNamespaceSetLocation(
+    namespace: LogicalPlan,
+    location: String) extends Command {
+  override def children: Seq[LogicalPlan] = Seq(namespace)
+}
+
+/**
+ * The logical plan of the SHOW NAMESPACES command that works for v2 catalogs.
+ */
+case class ShowNamespaces(
+    namespace: LogicalPlan,
+    pattern: Option[String]) extends Command {
+  override def children: Seq[LogicalPlan] = Seq(namespace)
+
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("namespace", StringType, nullable = false)())
+}
+
+/**
+ * The logical plan of the DESCRIBE relation_name command that works for v2 tables.
+ */
+case class DescribeRelation(
+    relation: LogicalPlan,
+    partitionSpec: TablePartitionSpec,
+    isExtended: Boolean) extends Command {
+  override def children: Seq[LogicalPlan] = Seq(relation)
+  override def output: Seq[Attribute] = DescribeTableSchema.describeTableAttributes()
+}
+
+/**
+ * The logical plan of the DELETE FROM command that works for v2 tables.
+ */
+case class DeleteFromTable(
+    table: LogicalPlan,
+    condition: Option[Expression]) extends Command with SupportsSubquery {
+  override def children: Seq[LogicalPlan] = table :: Nil
+}
+
+/**
+ * The logical plan of the UPDATE TABLE command that works for v2 tables.
+ */
+case class UpdateTable(
+    table: LogicalPlan,
+    assignments: Seq[Assignment],
+    condition: Option[Expression]) extends Command with SupportsSubquery {
+  override def children: Seq[LogicalPlan] = table :: Nil
+}
+
+/**
+ * The logical plan of the MERGE INTO command that works for v2 tables.
+ */
+case class MergeIntoTable(
+    targetTable: LogicalPlan,
+    sourceTable: LogicalPlan,
+    mergeCondition: Expression,
+    matchedActions: Seq[MergeAction],
+    notMatchedActions: Seq[MergeAction]) extends Command with SupportsSubquery {
+  override def children: Seq[LogicalPlan] = Seq(targetTable, sourceTable)
+}
+
+sealed abstract class MergeAction(
+    condition: Option[Expression]) extends Expression with Unevaluable {
+  override def foldable: Boolean = false
+  override def nullable: Boolean = false
+  override def dataType: DataType = throw new UnresolvedException(this, "nullable")
+  override def children: Seq[Expression] = condition.toSeq
+}
+
+case class DeleteAction(condition: Option[Expression]) extends MergeAction(condition)
+
+case class UpdateAction(
+    condition: Option[Expression],
+    assignments: Seq[Assignment]) extends MergeAction(condition) {
+  override def children: Seq[Expression] = condition.toSeq ++ assignments
+}
+
+case class InsertAction(
+    condition: Option[Expression],
+    assignments: Seq[Assignment]) extends MergeAction(condition) {
+  override def children: Seq[Expression] = condition.toSeq ++ assignments
+}
+
+case class Assignment(key: Expression, value: Expression) extends Expression with Unevaluable {
+  override def foldable: Boolean = false
+  override def nullable: Boolean = false
+  override def dataType: DataType = throw new UnresolvedException(this, "nullable")
+  override def children: Seq[Expression] = key ::  value :: Nil
+}
+
+/**
+ * The logical plan of the DROP TABLE command that works for v2 tables.
+ */
+case class DropTable(
+    catalog: TableCatalog,
+    ident: Identifier,
+    ifExists: Boolean) extends Command
+
+/**
+ * The logical plan of the ALTER TABLE command that works for v2 tables.
+ */
+case class AlterTable(
+    catalog: TableCatalog,
+    ident: Identifier,
+    table: NamedRelation,
+    changes: Seq[TableChange]) extends Command {
+
+  override lazy val resolved: Boolean = table.resolved && {
+    changes.forall {
+      case add: AddColumn =>
+        add.fieldNames match {
+          case Array(_) =>
+            // a top-level field can always be added
+            true
+          case _ =>
+            // the parent field must exist
+            table.schema.findNestedField(add.fieldNames.init, includeCollections = true).isDefined
+        }
+
+      case colChange: ColumnChange =>
+        // the column that will be changed must exist
+        table.schema.findNestedField(colChange.fieldNames, includeCollections = true).isDefined
+
+      case _ =>
+        // property changes require no resolution checks
+        true
+    }
+  }
+}
+
+/**
+ * The logical plan of the ALTER TABLE RENAME command that works for v2 tables.
+ */
+case class RenameTable(
+    catalog: TableCatalog,
+    oldIdent: Identifier,
+    newIdent: Identifier) extends Command
+
+/**
+ * The logical plan of the SHOW TABLE command that works for v2 catalogs.
+ */
+case class ShowTables(
+    namespace: LogicalPlan,
+    pattern: Option[String]) extends Command {
+  override def children: Seq[LogicalPlan] = Seq(namespace)
+
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("namespace", StringType, nullable = false)(),
+    AttributeReference("tableName", StringType, nullable = false)())
+}
+
+/**
+ * The logical plan of the USE/USE NAMESPACE command that works for v2 catalogs.
+ */
+case class SetCatalogAndNamespace(
+    catalogManager: CatalogManager,
+    catalogName: Option[String],
+    namespace: Option[Seq[String]]) extends Command
+
+/**
+ * The logical plan of the REFRESH TABLE command that works for v2 catalogs.
+ */
+case class RefreshTable(
+    catalog: TableCatalog,
+    ident: Identifier) extends Command
+
+/**
+ * The logical plan of the SHOW CURRENT NAMESPACE command that works for v2 catalogs.
+ */
+case class ShowCurrentNamespace(catalogManager: CatalogManager) extends Command {
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("catalog", StringType, nullable = false)(),
+    AttributeReference("namespace", StringType, nullable = false)())
+}
+
+/**
+ * The logical plan of the SHOW TBLPROPERTIES command that works for v2 catalogs.
+ */
+case class ShowTableProperties(
+    table: LogicalPlan,
+    propertyKey: Option[String]) extends Command {
+  override def children: Seq[LogicalPlan] = table :: Nil
+
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("key", StringType, nullable = false)(),
+    AttributeReference("value", StringType, nullable = false)())
+}
+
+/**
+ * The logical plan that defines or changes the comment of an NAMESPACE for v2 catalogs.
+ *
+ * {{{
+ *   COMMENT ON (DATABASE|SCHEMA|NAMESPACE) namespaceIdentifier IS ('text' | NULL)
+ * }}}
+ *
+ * where the `text` is the new comment written as a string literal; or `NULL` to drop the comment.
+ *
+ */
+case class CommentOnNamespace(child: LogicalPlan, comment: String) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
+/**
+ * The logical plan that defines or changes the comment of an TABLE for v2 catalogs.
+ *
+ * {{{
+ *   COMMENT ON TABLE tableIdentifier IS ('text' | NULL)
+ * }}}
+ *
+ * where the `text` is the new comment written as a string literal; or `NULL` to drop the comment.
+ *
+ */
+case class CommentOnTable(child: LogicalPlan, comment: String) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
index 7a86433d56c03..875c46d2a417a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/QueryExecutionMetering.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 
 import com.google.common.util.concurrent.AtomicLongMap
 
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.NANOS_PER_SECOND
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
 
 case class QueryExecutionMetering() {
   private val timeMap = AtomicLongMap.create[String]()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index 287ae0e8e9f67..da5242bee28e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -45,7 +45,17 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
    * An execution strategy for rules that indicates the maximum number of executions. If the
    * execution reaches fix point (i.e. converge) before maxIterations, it will stop.
    */
-  abstract class Strategy { def maxIterations: Int }
+  abstract class Strategy {
+
+    /** The maximum number of executions. */
+    def maxIterations: Int
+
+    /** Whether to throw exception when exceeding the maximum number. */
+    def errorOnExceed: Boolean = false
+
+    /** The key of SQLConf setting to tune maxIterations */
+    def maxIterationsSetting: String = null
+  }
 
   /** A strategy that is run once and idempotent. */
   case object Once extends Strategy { val maxIterations = 1 }
@@ -54,7 +64,10 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
    * A strategy that runs until fix point or maxIterations times, whichever comes first.
    * Especially, a FixedPoint(1) batch is supposed to run only once.
    */
-  case class FixedPoint(maxIterations: Int) extends Strategy
+  case class FixedPoint(
+    override val maxIterations: Int,
+    override val errorOnExceed: Boolean = false,
+    override val maxIterationsSetting: String = null) extends Strategy
 
   /** A batch of rules. */
   protected case class Batch(name: String, strategy: Strategy, rules: Rule[TreeType]*)
@@ -155,8 +168,14 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
         if (iteration > batch.strategy.maxIterations) {
           // Only log if this is a rule that is supposed to run more than once.
           if (iteration != 2) {
-            val message = s"Max iterations (${iteration - 1}) reached for batch ${batch.name}"
-            if (Utils.isTesting) {
+            val endingMsg = if (batch.strategy.maxIterationsSetting == null) {
+              "."
+            } else {
+              s", please set '${batch.strategy.maxIterationsSetting}' to a larger value."
+            }
+            val message = s"Max iterations (${iteration - 1}) reached for batch ${batch.name}" +
+              s"$endingMsg"
+            if (Utils.isTesting || batch.strategy.errorOnExceed) {
               throw new TreeNodeException(curPlan, message, null)
             } else {
               logWarning(message)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 00fb81e361fdf..56a198763b4e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -27,9 +27,8 @@ import org.json4s.JsonAST._
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.{AliasIdentifier, IdentifierWithDatabase}
 import org.apache.spark.sql.catalyst.ScalaReflection._
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, FunctionResource}
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
@@ -780,9 +779,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   private def shouldConvertToJson(product: Product): Boolean = product match {
     case exprId: ExprId => true
     case field: StructField => true
-    case id: TableIdentifier => true
+    case id: IdentifierWithDatabase => true
+    case alias: AliasIdentifier => true
     case join: JoinType => true
-    case id: FunctionIdentifier => true
     case spec: BucketSpec => true
     case catalog: CatalogTable => true
     case partition: Partitioning => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
index 98934368205ec..37d65309e2b89 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
@@ -29,12 +29,11 @@ import org.apache.spark.unsafe.array.ByteArrayMethods
  */
 class ArrayBasedMapBuilder(keyType: DataType, valueType: DataType) extends Serializable {
   assert(!keyType.existsRecursively(_.isInstanceOf[MapType]), "key of map cannot be/contain map")
-  assert(keyType != NullType, "map key cannot be null type.")
 
   private lazy val keyToIndex = keyType match {
     // Binary type data is `byte[]`, which can't use `==` to check equality.
-    case _: AtomicType | _: CalendarIntervalType if !keyType.isInstanceOf[BinaryType] =>
-      new java.util.HashMap[Any, Int]()
+    case _: AtomicType | _: CalendarIntervalType | _: NullType
+      if !keyType.isInstanceOf[BinaryType] => new java.util.HashMap[Any, Int]()
     case _ =>
       // for complex types, use interpreted ordering to be able to compare unsafe data with safe
       // data, e.g. UnsafeRow vs GenericInternalRow.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index 4940aa83a3017..2cf82d1cfa177 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -17,8 +17,14 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.LocalDate
-import java.util.Locale
+import java.text.SimpleDateFormat
+import java.time.{LocalDate, ZoneId}
+import java.util.{Date, Locale}
+
+import org.apache.commons.lang3.time.FastDateFormat
+
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, localDateToDays}
+import org.apache.spark.sql.internal.SQLConf
 
 sealed trait DateFormatter extends Serializable {
   def parse(s: String): Int // returns days since epoch
@@ -27,14 +33,18 @@ sealed trait DateFormatter extends Serializable {
 
 class Iso8601DateFormatter(
     pattern: String,
+    zoneId: ZoneId,
     locale: Locale) extends DateFormatter with DateTimeFormatterHelper {
 
   @transient
   private lazy val formatter = getOrCreateFormatter(pattern, locale)
 
   override def parse(s: String): Int = {
-    val localDate = LocalDate.parse(s, formatter)
-    DateTimeUtils.localDateToDays(localDate)
+    val specialDate = convertSpecialDate(s.trim, zoneId)
+    specialDate.getOrElse {
+      val localDate = LocalDate.parse(s, formatter)
+      localDateToDays(localDate)
+    }
   }
 
   override def format(days: Int): String = {
@@ -42,15 +52,76 @@ class Iso8601DateFormatter(
   }
 }
 
+trait LegacyDateFormatter extends DateFormatter {
+  def parseToDate(s: String): Date
+  def formatDate(d: Date): String
+
+  override def parse(s: String): Int = {
+    val milliseconds = parseToDate(s).getTime
+    DateTimeUtils.millisToDays(milliseconds)
+  }
+
+  override def format(days: Int): String = {
+    val date = DateTimeUtils.toJavaDate(days)
+    formatDate(date)
+  }
+}
+
+class LegacyFastDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
+  @transient
+  private lazy val fdf = FastDateFormat.getInstance(pattern, locale)
+  override def parseToDate(s: String): Date = fdf.parse(s)
+  override def formatDate(d: Date): String = fdf.format(d)
+}
+
+class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
+  @transient
+  private lazy val sdf = new SimpleDateFormat(pattern, locale)
+  override def parseToDate(s: String): Date = sdf.parse(s)
+  override def formatDate(d: Date): String = sdf.format(d)
+}
+
 object DateFormatter {
-  val defaultPattern: String = "uuuu-MM-dd"
+  import LegacyDateFormats._
+
   val defaultLocale: Locale = Locale.US
 
-  def apply(format: String, locale: Locale): DateFormatter = {
-    new Iso8601DateFormatter(format, locale)
+  def defaultPattern(): String = {
+    if (SQLConf.get.legacyTimeParserEnabled) "yyyy-MM-dd" else "uuuu-MM-dd"
+  }
+
+  private def getFormatter(
+    format: Option[String],
+    zoneId: ZoneId,
+    locale: Locale = defaultLocale,
+    legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT): DateFormatter = {
+
+    val pattern = format.getOrElse(defaultPattern)
+    if (SQLConf.get.legacyTimeParserEnabled) {
+      legacyFormat match {
+        case FAST_DATE_FORMAT =>
+          new LegacyFastDateFormatter(pattern, locale)
+        case SIMPLE_DATE_FORMAT | LENIENT_SIMPLE_DATE_FORMAT =>
+          new LegacySimpleDateFormatter(pattern, locale)
+      }
+    } else {
+      new Iso8601DateFormatter(pattern, zoneId, locale)
+    }
+  }
+
+  def apply(
+    format: String,
+    zoneId: ZoneId,
+    locale: Locale,
+    legacyFormat: LegacyDateFormat): DateFormatter = {
+    getFormatter(Some(format), zoneId, locale, legacyFormat)
   }
 
-  def apply(format: String): DateFormatter = apply(format, defaultLocale)
+  def apply(format: String, zoneId: ZoneId): DateFormatter = {
+    getFormatter(Some(format), zoneId)
+  }
 
-  def apply(): DateFormatter = apply(defaultPattern)
+  def apply(zoneId: ZoneId): DateFormatter = {
+    getFormatter(None, zoneId)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 65a9bee5eaedd..dcc7337116777 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.time._
 import java.time.temporal.{ChronoField, ChronoUnit, IsoFields}
@@ -25,8 +26,9 @@ import java.util.concurrent.TimeUnit._
 
 import scala.util.control.NonFatal
 
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.types.Decimal
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 /**
  * Helper functions for converting between internal and external date and time representations.
@@ -45,20 +47,6 @@ object DateTimeUtils {
   // it's 2440587.5, rounding up to compatible with Hive
   final val JULIAN_DAY_OF_EPOCH = 2440588
 
-  // Pre-calculated values can provide an opportunity of additional optimizations
-  // to the compiler like constants propagation and folding.
-  final val NANOS_PER_MICROS: Long = 1000
-  final val MICROS_PER_MILLIS: Long = 1000
-  final val MILLIS_PER_SECOND: Long = 1000
-  final val SECONDS_PER_DAY: Long = 24 * 60 * 60
-  final val MICROS_PER_SECOND: Long = MILLIS_PER_SECOND * MICROS_PER_MILLIS
-  final val NANOS_PER_MILLIS: Long = NANOS_PER_MICROS * MICROS_PER_MILLIS
-  final val NANOS_PER_SECOND: Long = NANOS_PER_MICROS * MICROS_PER_SECOND
-  final val MICROS_PER_DAY: Long = SECONDS_PER_DAY * MICROS_PER_SECOND
-  final val MILLIS_PER_MINUTE: Long = 60 * MILLIS_PER_SECOND
-  final val MILLIS_PER_HOUR: Long = 60 * MILLIS_PER_MINUTE
-  final val MILLIS_PER_DAY: Long = SECONDS_PER_DAY * MILLIS_PER_SECOND
-
   // number of days between 1.1.1970 and 1.1.2001
   final val to2001 = -11323
 
@@ -79,24 +67,22 @@ object DateTimeUtils {
 
   // we should use the exact day as Int, for example, (year, month, day) -> day
   def millisToDays(millisUtc: Long): SQLDate = {
-    millisToDays(millisUtc, defaultTimeZone())
+    millisToDays(millisUtc, defaultTimeZone().toZoneId)
   }
 
-  def millisToDays(millisUtc: Long, timeZone: TimeZone): SQLDate = {
-    // SPARK-6785: use Math.floorDiv so negative number of days (dates before 1970)
-    // will correctly work as input for function toJavaDate(Int)
-    val millisLocal = millisUtc + timeZone.getOffset(millisUtc)
-    Math.floorDiv(millisLocal, MILLIS_PER_DAY).toInt
+  def millisToDays(millisUtc: Long, zoneId: ZoneId): SQLDate = {
+    val instant = microsToInstant(Math.multiplyExact(millisUtc, MICROS_PER_MILLIS))
+    localDateToDays(LocalDateTime.ofInstant(instant, zoneId).toLocalDate)
   }
 
   // reverse of millisToDays
   def daysToMillis(days: SQLDate): Long = {
-    daysToMillis(days, defaultTimeZone())
+    daysToMillis(days, defaultTimeZone().toZoneId)
   }
 
-  def daysToMillis(days: SQLDate, timeZone: TimeZone): Long = {
-    val millisLocal = days.toLong * MILLIS_PER_DAY
-    millisLocal - getOffsetFromLocalMillis(millisLocal, timeZone)
+  def daysToMillis(days: SQLDate, zoneId: ZoneId): Long = {
+    val instant = daysToLocalDate(days).atStartOfDay(zoneId).toInstant
+    instantToMicros(instant) / MICROS_PER_MILLIS
   }
 
   // Converts Timestamp to string according to Hive TimestampWritable convention.
@@ -217,7 +203,9 @@ object DateTimeUtils {
     val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0)
     var i = 0
     var currentSegmentValue = 0
-    val bytes = s.trim.getBytes
+    val bytes = s.trimAll().getBytes
+    val specialTimestamp = convertSpecialTimestamp(bytes, timeZoneId)
+    if (specialTimestamp.isDefined) return specialTimestamp
     var j = 0
     var digitsMilli = 0
     var justTime = false
@@ -283,7 +271,7 @@ object DateTimeUtils {
             i += 1
           }
         } else {
-          if (b == ':' || b == ' ') {
+          if (i < segments.length && (b == ':' || b == ' ')) {
             segments(i) = currentSegmentValue
             currentSegmentValue = 0
             i += 1
@@ -347,7 +335,9 @@ object DateTimeUtils {
 
   def microsToInstant(us: Long): Instant = {
     val secs = Math.floorDiv(us, MICROS_PER_SECOND)
-    val mos = Math.floorMod(us, MICROS_PER_SECOND)
+    // Unfolded Math.floorMod(us, MICROS_PER_SECOND) to reuse the result of
+    // the above calculation of `secs` via `floorDiv`.
+    val mos = us - secs * MICROS_PER_SECOND
     Instant.ofEpochSecond(secs, mos * NANOS_PER_MICROS)
   }
 
@@ -375,14 +365,16 @@ object DateTimeUtils {
    * `yyyy-[m]m-[d]d *`
    * `yyyy-[m]m-[d]dT*`
    */
-  def stringToDate(s: UTF8String): Option[SQLDate] = {
+  def stringToDate(s: UTF8String, zoneId: ZoneId): Option[SQLDate] = {
     if (s == null) {
       return None
     }
     val segments: Array[Int] = Array[Int](1, 1, 1)
     var i = 0
     var currentSegmentValue = 0
-    val bytes = s.trim.getBytes
+    val bytes = s.trimAll().getBytes
+    val specialDate = convertSpecialDate(bytes, zoneId)
+    if (specialDate.isDefined) return specialDate
     var j = 0
     while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) {
       val b = bytes(j)
@@ -456,13 +448,20 @@ object DateTimeUtils {
     (MICROSECONDS.toSeconds(localTimestamp(microsec, timeZone)) % 60).toInt
   }
 
+  /**
+   * Returns the seconds part and its fractional part with microseconds.
+   */
+  def getSecondsWithFraction(microsec: SQLTimestamp, timeZone: TimeZone): Decimal = {
+    val secFrac = localTimestamp(microsec, timeZone) % (MILLIS_PER_MINUTE * MICROS_PER_MILLIS)
+    Decimal(secFrac, 8, 6)
+  }
+
   /**
    * Returns seconds, including fractional parts, multiplied by 1000. The timestamp
    * is expressed in microseconds since the epoch.
    */
   def getMilliseconds(timestamp: SQLTimestamp, timeZone: TimeZone): Decimal = {
-    val micros = Decimal(getMicroseconds(timestamp, timeZone))
-    (micros / Decimal(MICROS_PER_MILLIS)).toPrecision(8, 3)
+    Decimal(getMicroseconds(timestamp, timeZone), 8, 3)
   }
 
   /**
@@ -563,11 +562,13 @@ object DateTimeUtils {
   def timestampAddInterval(
       start: SQLTimestamp,
       months: Int,
+      days: Int,
       microseconds: Long,
       zoneId: ZoneId): SQLTimestamp = {
     val resultTimestamp = microsToInstant(start)
       .atZone(zoneId)
       .plusMonths(months)
+      .plusDays(days)
       .plus(microseconds, ChronoUnit.MICROS)
     instantToMicros(resultTimestamp.toInstant)
   }
@@ -586,11 +587,11 @@ object DateTimeUtils {
       time1: SQLTimestamp,
       time2: SQLTimestamp,
       roundOff: Boolean,
-      timeZone: TimeZone): Double = {
+      zoneId: ZoneId): Double = {
     val millis1 = MICROSECONDS.toMillis(time1)
     val millis2 = MICROSECONDS.toMillis(time2)
-    val date1 = millisToDays(millis1, timeZone)
-    val date2 = millisToDays(millis2, timeZone)
+    val date1 = millisToDays(millis1, zoneId)
+    val date2 = millisToDays(millis2, zoneId)
     val (year1, monthInYear1, dayInMonth1, daysToMonthEnd1) = splitDate(date1)
     val (year2, monthInYear2, dayInMonth2, daysToMonthEnd2) = splitDate(date2)
 
@@ -604,8 +605,8 @@ object DateTimeUtils {
     }
     // using milliseconds can cause precision loss with more than 8 digits
     // we follow Hive's implementation which uses seconds
-    val secondsInDay1 = MILLISECONDS.toSeconds(millis1 - daysToMillis(date1, timeZone))
-    val secondsInDay2 = MILLISECONDS.toSeconds(millis2 - daysToMillis(date2, timeZone))
+    val secondsInDay1 = MILLISECONDS.toSeconds(millis1 - daysToMillis(date1, zoneId))
+    val secondsInDay2 = MILLISECONDS.toSeconds(millis2 - daysToMillis(date2, zoneId))
     val secondsDiff = (dayInMonth1 - dayInMonth2) * SECONDS_PER_DAY + secondsInDay1 - secondsInDay2
     val secondsInMonth = DAYS.toSeconds(31)
     val diff = monthDiff + secondsDiff / secondsInMonth.toDouble
@@ -690,11 +691,11 @@ object DateTimeUtils {
   def truncDate(d: SQLDate, level: Int): SQLDate = {
     def truncToYearLevel(divider: Int, adjust: Int): SQLDate = {
       val oldYear = getYear(d)
-      var newYear = Math.floorDiv(oldYear, divider)
-      if (adjust > 0 && Math.floorMod(oldYear, divider) == 0) {
-        newYear -= 1
+      var newYear = Math.floorDiv(oldYear, divider) * divider
+      if (adjust > 0 && newYear == oldYear) {
+        newYear -= divider
       }
-      newYear = newYear * divider + adjust
+      newYear += adjust
       localDateToDays(LocalDate.of(newYear, 1, 1))
     }
     level match {
@@ -712,32 +713,34 @@ object DateTimeUtils {
     }
   }
 
+  private def truncToUnit(t: SQLTimestamp, zoneId: ZoneId, unit: ChronoUnit): SQLTimestamp = {
+    val truncated = microsToInstant(t).atZone(zoneId).truncatedTo(unit)
+    instantToMicros(truncated.toInstant)
+  }
+
   /**
    * Returns the trunc date time from original date time and trunc level.
    * Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 12.
    */
-  def truncTimestamp(t: SQLTimestamp, level: Int, timeZone: TimeZone): SQLTimestamp = {
-    if (level == TRUNC_TO_MICROSECOND) return t
-    var millis = MICROSECONDS.toMillis(t)
-    val truncated = level match {
-      case TRUNC_TO_MILLISECOND => millis
-      case TRUNC_TO_SECOND =>
-        millis - millis % MILLIS_PER_SECOND
-      case TRUNC_TO_MINUTE =>
-        millis - millis % MILLIS_PER_MINUTE
-      case TRUNC_TO_HOUR =>
-        val offset = timeZone.getOffset(millis)
-        millis += offset
-        millis - millis % MILLIS_PER_HOUR - offset
-      case TRUNC_TO_DAY =>
-        val offset = timeZone.getOffset(millis)
-        millis += offset
-        millis - millis % MILLIS_PER_DAY - offset
-      case _ => // Try to truncate date levels
-        val dDays = millisToDays(millis, timeZone)
-        daysToMillis(truncDate(dDays, level), timeZone)
+  def truncTimestamp(t: SQLTimestamp, level: Int, zoneId: ZoneId): SQLTimestamp = {
+    level match {
+      case TRUNC_TO_MICROSECOND => t
+      case TRUNC_TO_HOUR => truncToUnit(t, zoneId, ChronoUnit.HOURS)
+      case TRUNC_TO_DAY => truncToUnit(t, zoneId, ChronoUnit.DAYS)
+      case _ =>
+        val millis = MICROSECONDS.toMillis(t)
+        val truncated = level match {
+          case TRUNC_TO_MILLISECOND => millis
+          case TRUNC_TO_SECOND =>
+            millis - millis % MILLIS_PER_SECOND
+          case TRUNC_TO_MINUTE =>
+            millis - millis % MILLIS_PER_MINUTE
+          case _ => // Try to truncate date levels
+            val dDays = millisToDays(millis, zoneId)
+            daysToMillis(truncDate(dDays, level), zoneId)
+        }
+        truncated * MICROS_PER_MILLIS
     }
-    truncated * MICROS_PER_MILLIS
   }
 
   /**
@@ -767,32 +770,6 @@ object DateTimeUtils {
     }
   }
 
-  /**
-   * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in given timezone.
-   * TODO: Improve handling of normalization differences.
-   * TODO: Replace with JSR-310 or similar system - see SPARK-16788
-   */
-  private[sql] def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = {
-    var guess = tz.getRawOffset
-    // the actual offset should be calculated based on milliseconds in UTC
-    val offset = tz.getOffset(millisLocal - guess)
-    if (offset != guess) {
-      guess = tz.getOffset(millisLocal - offset)
-      if (guess != offset) {
-        // fallback to do the reverse lookup using java.time.LocalDateTime
-        // this should only happen near the start or end of DST
-        val localDate = LocalDate.ofEpochDay(MILLISECONDS.toDays(millisLocal))
-        val localTime = LocalTime.ofNanoOfDay(MILLISECONDS.toNanos(
-          Math.floorMod(millisLocal, MILLIS_PER_DAY)))
-        val localDateTime = LocalDateTime.of(localDate, localTime)
-        val millisEpoch = localDateTime.atZone(tz.toZoneId).toInstant.toEpochMilli
-
-        guess = (millisLocal - millisEpoch).toInt
-      }
-    }
-    guess
-  }
-
   /**
    * Convert the timestamp `ts` from one timezone to another.
    *
@@ -800,27 +777,9 @@ object DateTimeUtils {
    * mapping, the conversion here may return wrong result, we should make the timestamp
    * timezone-aware.
    */
-  def convertTz(ts: SQLTimestamp, fromZone: TimeZone, toZone: TimeZone): SQLTimestamp = {
-    // We always use local timezone to parse or format a timestamp
-    val localZone = defaultTimeZone()
-    val utcTs = if (fromZone.getID == localZone.getID) {
-      ts
-    } else {
-      // get the human time using local time zone, that actually is in fromZone.
-      val localZoneOffsetMs = localZone.getOffset(MICROSECONDS.toMillis(ts))
-      val localTsUs = ts + MILLISECONDS.toMicros(localZoneOffsetMs)  // in fromZone
-      val offsetFromLocalMs = getOffsetFromLocalMillis(MICROSECONDS.toMillis(localTsUs), fromZone)
-      localTsUs - MILLISECONDS.toMicros(offsetFromLocalMs)
-    }
-    if (toZone.getID == localZone.getID) {
-      utcTs
-    } else {
-      val toZoneOffsetMs = toZone.getOffset(MICROSECONDS.toMillis(utcTs))
-      val localTsUs = utcTs + MILLISECONDS.toMicros(toZoneOffsetMs)  // in toZone
-      // treat it as local timezone, convert to UTC (we could get the expected human time back)
-      val offsetFromLocalMs = getOffsetFromLocalMillis(MICROSECONDS.toMillis(localTsUs), localZone)
-      localTsUs - MILLISECONDS.toMicros(offsetFromLocalMs)
-    }
+  def convertTz(ts: SQLTimestamp, fromZone: ZoneId, toZone: ZoneId): SQLTimestamp = {
+    val rebasedDateTime = microsToInstant(ts).atZone(toZone).toLocalDateTime.atZone(fromZone)
+    instantToMicros(rebasedDateTime.toInstant)
   }
 
   /**
@@ -828,7 +787,7 @@ object DateTimeUtils {
    * representation in their timezone.
    */
   def fromUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
-    convertTz(time, TimeZoneGMT, getTimeZone(timeZone))
+    convertTz(time, ZoneOffset.UTC, getZoneId(timeZone))
   }
 
   /**
@@ -836,7 +795,7 @@ object DateTimeUtils {
    * string representation in their timezone.
    */
   def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
-    convertTz(time, getTimeZone(timeZone), TimeZoneGMT)
+    convertTz(time, getZoneId(timeZone), ZoneOffset.UTC)
   }
 
   /**
@@ -844,8 +803,114 @@ object DateTimeUtils {
    * since 1970-01-01 00:00:00 local time.
    */
   def getEpoch(timestamp: SQLTimestamp, zoneId: ZoneId): Decimal = {
-    val offset = zoneId.getRules.getOffset(microsToInstant(timestamp)).getTotalSeconds
-    val sinceEpoch = BigDecimal(timestamp) / MICROS_PER_SECOND + offset
-    new Decimal().set(sinceEpoch, 20, 6)
+    val offset = SECONDS.toMicros(
+      zoneId.getRules.getOffset(microsToInstant(timestamp)).getTotalSeconds)
+    val sinceEpoch = timestamp + offset
+    Decimal(sinceEpoch, 20, 6)
+  }
+
+  def currentTimestamp(): SQLTimestamp = instantToMicros(Instant.now())
+
+  def currentDate(zoneId: ZoneId): SQLDate = localDateToDays(LocalDate.now(zoneId))
+
+  private def today(zoneId: ZoneId): ZonedDateTime = {
+    Instant.now().atZone(zoneId).`with`(LocalTime.MIDNIGHT)
+  }
+
+  private val specialValueRe = """(\p{Alpha}+)\p{Blank}*(.*)""".r
+
+  /**
+   * Extracts special values from an input string ignoring case.
+   * @param input - a trimmed string
+   * @param zoneId - zone identifier used to get the current date.
+   * @return some special value in lower case or None.
+   */
+  private def extractSpecialValue(input: String, zoneId: ZoneId): Option[String] = {
+    def isValid(value: String, timeZoneId: String): Boolean = {
+      // Special value can be without any time zone
+      if (timeZoneId.isEmpty) return true
+      // "now" must not have the time zone field
+      if (value.compareToIgnoreCase("now") == 0) return false
+      // If the time zone field presents in the input, it must be resolvable
+      try {
+        getZoneId(timeZoneId)
+        true
+      } catch {
+        case NonFatal(_) => false
+      }
+    }
+
+    assert(input.trim.length == input.length)
+    if (input.length < 3 || !input(0).isLetter) return None
+    input match {
+      case specialValueRe(v, z) if isValid(v, z) => Some(v.toLowerCase(Locale.US))
+      case _ => None
+    }
+  }
+
+  /**
+   * Converts notational shorthands that are converted to ordinary timestamps.
+   * @param input - a trimmed string
+   * @param zoneId - zone identifier used to get the current date.
+   * @return some of microseconds since the epoch if the conversion completed
+   *         successfully otherwise None.
+   */
+  def convertSpecialTimestamp(input: String, zoneId: ZoneId): Option[SQLTimestamp] = {
+    extractSpecialValue(input, zoneId).flatMap {
+      case "epoch" => Some(0)
+      case "now" => Some(currentTimestamp())
+      case "today" => Some(instantToMicros(today(zoneId).toInstant))
+      case "tomorrow" => Some(instantToMicros(today(zoneId).plusDays(1).toInstant))
+      case "yesterday" => Some(instantToMicros(today(zoneId).minusDays(1).toInstant))
+      case _ => None
+    }
+  }
+
+  private def convertSpecialTimestamp(bytes: Array[Byte], zoneId: ZoneId): Option[SQLTimestamp] = {
+    if (bytes.length > 0 && Character.isAlphabetic(bytes(0))) {
+      convertSpecialTimestamp(new String(bytes, StandardCharsets.UTF_8), zoneId)
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Converts notational shorthands that are converted to ordinary dates.
+   * @param input - a trimmed string
+   * @param zoneId - zone identifier used to get the current date.
+   * @return some of days since the epoch if the conversion completed successfully otherwise None.
+   */
+  def convertSpecialDate(input: String, zoneId: ZoneId): Option[SQLDate] = {
+    extractSpecialValue(input, zoneId).flatMap {
+      case "epoch" => Some(0)
+      case "now" | "today" => Some(currentDate(zoneId))
+      case "tomorrow" => Some(Math.addExact(currentDate(zoneId), 1))
+      case "yesterday" => Some(Math.subtractExact(currentDate(zoneId), 1))
+      case _ => None
+    }
+  }
+
+  private def convertSpecialDate(bytes: Array[Byte], zoneId: ZoneId): Option[SQLDate] = {
+    if (bytes.length > 0 && Character.isAlphabetic(bytes(0))) {
+      convertSpecialDate(new String(bytes, StandardCharsets.UTF_8), zoneId)
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Subtracts two dates.
+   * @param endDate - the end date, exclusive
+   * @param startDate - the start date, inclusive
+   * @return an interval between two dates. The interval can be negative
+   *         if the end date is before the start date.
+   */
+  def subtractDates(endDate: SQLDate, startDate: SQLDate): CalendarInterval = {
+    val period = Period.between(
+      LocalDate.ofEpochDay(startDate),
+      LocalDate.ofEpochDay(endDate))
+    val months = period.getMonths + 12 * period.getYears
+    val days = period.getDays
+    new CalendarInterval(months, days, 0)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
index 361c8b29db33d..bc5e9be324bb2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.unsafe.types.UTF8String
 
 class FailureSafeParser[IN](
-    rawParser: IN => Seq[InternalRow],
+    rawParser: IN => Iterable[InternalRow],
     mode: ParseMode,
     schema: StructType,
     columnNameOfCorruptRecord: String) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
index 83ad08d8e1758..1f88a700847de 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
@@ -23,16 +23,6 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types.{DataType, Decimal}
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
-private object GenericArrayData {
-
-  // SPARK-16634: Workaround for JVM bug present in some 1.7 versions.
-  def anyToSeq(seqOrArray: Any): Seq[Any] = seqOrArray match {
-    case seq: Seq[Any] => seq
-    case array: Array[_] => array.toSeq
-  }
-
-}
-
 class GenericArrayData(val array: Array[Any]) extends ArrayData {
 
   def this(seq: Seq[Any]) = this(seq.toArray)
@@ -47,7 +37,11 @@ class GenericArrayData(val array: Array[Any]) extends ArrayData {
   def this(primitiveArray: Array[Byte]) = this(primitiveArray.toSeq)
   def this(primitiveArray: Array[Boolean]) = this(primitiveArray.toSeq)
 
-  def this(seqOrArray: Any) = this(GenericArrayData.anyToSeq(seqOrArray))
+  def this(seqOrArray: Any) = this(seqOrArray match {
+    case seq: Seq[Any] => seq.toArray
+    case array: Array[Any] => array  // array of objects, so no need to convert
+    case array: Array[_] => array.toSeq.toArray[Any] // array of primitives, so box them
+  })
 
   override def copy(): ArrayData = {
     val newValues = new Array[Any](array.length)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
new file mode 100644
index 0000000000000..2d98384363323
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -0,0 +1,764 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.math.BigDecimal
+import java.util.concurrent.TimeUnit
+
+import scala.util.control.NonFatal
+
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.Decimal
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+
+object IntervalUtils {
+
+  object IntervalUnit extends Enumeration {
+    type IntervalUnit = Value
+
+    val NANOSECOND = Value(0, "nanosecond")
+    val MICROSECOND = Value(1, "microsecond")
+    val MILLISECOND = Value(2, "millisecond")
+    val SECOND = Value(3, "second")
+    val MINUTE = Value(4, "minute")
+    val HOUR = Value(5, "hour")
+    val DAY = Value(6, "day")
+    val WEEK = Value(7, "week")
+    val MONTH = Value(8, "month")
+    val YEAR = Value(9, "year")
+  }
+  import IntervalUnit._
+
+  def getYears(interval: CalendarInterval): Int = {
+    interval.months / MONTHS_PER_YEAR
+  }
+
+  def getMillenniums(interval: CalendarInterval): Int = {
+    getYears(interval) / YEARS_PER_MILLENNIUM
+  }
+
+  def getCenturies(interval: CalendarInterval): Int = {
+    getYears(interval) / YEARS_PER_CENTURY
+  }
+
+  def getDecades(interval: CalendarInterval): Int = {
+    getYears(interval) / YEARS_PER_DECADE
+  }
+
+  def getMonths(interval: CalendarInterval): Byte = {
+    (interval.months % MONTHS_PER_YEAR).toByte
+  }
+
+  def getQuarters(interval: CalendarInterval): Byte = {
+    (getMonths(interval) / MONTHS_PER_QUARTER + 1).toByte
+  }
+
+  def getDays(interval: CalendarInterval): Int = {
+    interval.days
+  }
+
+  def getHours(interval: CalendarInterval): Long = {
+    interval.microseconds / MICROS_PER_HOUR
+  }
+
+  def getMinutes(interval: CalendarInterval): Byte = {
+    ((interval.microseconds % MICROS_PER_HOUR) / MICROS_PER_MINUTE).toByte
+  }
+
+  def getMicroseconds(interval: CalendarInterval): Long = {
+    interval.microseconds % MICROS_PER_MINUTE
+  }
+
+  def getSeconds(interval: CalendarInterval): Decimal = {
+    Decimal(getMicroseconds(interval), 8, 6)
+  }
+
+  def getMilliseconds(interval: CalendarInterval): Decimal = {
+    Decimal(getMicroseconds(interval), 8, 3)
+  }
+
+  // Returns total number of seconds with microseconds fractional part in the given interval.
+  def getEpoch(interval: CalendarInterval): Decimal = {
+    var result = interval.microseconds
+    result += MICROS_PER_DAY * interval.days
+    result += MICROS_PER_YEAR * (interval.months / MONTHS_PER_YEAR)
+    result += MICROS_PER_MONTH * (interval.months % MONTHS_PER_YEAR)
+    Decimal(result, 18, 6)
+  }
+
+  private def toLongWithRange(
+      fieldName: IntervalUnit,
+      s: String,
+      minValue: Long,
+      maxValue: Long): Long = {
+    val result = if (s == null) 0L else s.toLong
+    require(minValue <= result && result <= maxValue,
+      s"$fieldName $result outside range [$minValue, $maxValue]")
+
+    result
+  }
+
+  private val yearMonthPattern = "^([+|-])?(\\d+)-(\\d+)$".r
+
+  /**
+   * Parse YearMonth string in form: [+|-]YYYY-MM
+   *
+   * adapted from HiveIntervalYearMonth.valueOf
+   */
+  def fromYearMonthString(input: String): CalendarInterval = {
+    require(input != null, "Interval year-month string must be not null")
+    def toInterval(yearStr: String, monthStr: String): CalendarInterval = {
+      try {
+        val years = toLongWithRange(YEAR, yearStr, 0, Integer.MAX_VALUE).toInt
+        val months = toLongWithRange(MONTH, monthStr, 0, 11).toInt
+        val totalMonths = Math.addExact(Math.multiplyExact(years, 12), months)
+        new CalendarInterval(totalMonths, 0, 0)
+      } catch {
+        case NonFatal(e) =>
+          throw new IllegalArgumentException(
+            s"Error parsing interval year-month string: ${e.getMessage}", e)
+      }
+    }
+    assert(input.length == input.trim.length)
+    input match {
+      case yearMonthPattern("-", yearStr, monthStr) =>
+        negateExact(toInterval(yearStr, monthStr))
+      case yearMonthPattern(_, yearStr, monthStr) =>
+        toInterval(yearStr, monthStr)
+      case _ =>
+        throw new IllegalArgumentException(
+          s"Interval string does not match year-month format of 'y-m': $input")
+    }
+  }
+
+  /**
+   * Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn
+   *
+   * adapted from HiveIntervalDayTime.valueOf
+   */
+  def fromDayTimeString(s: String): CalendarInterval = {
+    fromDayTimeString(s, DAY, SECOND)
+  }
+
+  /**
+   * Parse dayTime string in form: [-]d HH:mm:ss.nnnnnnnnn and [-]HH:mm:ss.nnnnnnnnn
+   *
+   * adapted from HiveIntervalDayTime.valueOf.
+   * Below interval conversion patterns are supported:
+   * - DAY TO (HOUR|MINUTE|SECOND)
+   * - HOUR TO (MINUTE|SECOND)
+   * - MINUTE TO SECOND
+   */
+  def fromDayTimeString(input: String, from: IntervalUnit, to: IntervalUnit): CalendarInterval = {
+    if (SQLConf.get.getConf(SQLConf.LEGACY_FROM_DAYTIME_STRING)) {
+      parseDayTimeLegacy(input, from, to)
+    } else {
+      parseDayTime(input, from, to)
+    }
+  }
+
+  private val dayTimePatternLegacy =
+    "^([+|-])?((\\d+) )?((\\d+):)?(\\d+):(\\d+)(\\.(\\d+))?$".r
+
+  private val fallbackNotice = s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
+    "to restore the behavior before Spark 3.0."
+
+  /**
+   * Legacy method of parsing a string in a day-time format. It ignores the `from` bound,
+   * and takes into account only the `to` bound by truncating the result. For example,
+   * if the input string is "2 12:30:15", `from` is "hour" and `to` is "second", the result
+   * is "2 days 12 hours 30 minutes".
+   *
+   * @param input The day-time string
+   * @param from The interval units from which the input strings begins
+   * @param to The interval units at which the input string ends
+   * @return an instance of `CalendarInterval` if parsing completes successfully otherwise
+   *         the exception `IllegalArgumentException` is raised.
+   */
+  private def parseDayTimeLegacy(
+      input: String,
+      from: IntervalUnit,
+      to: IntervalUnit): CalendarInterval = {
+    require(input != null, "Interval day-time string must be not null")
+    assert(input.length == input.trim.length)
+    val m = dayTimePatternLegacy.pattern.matcher(input)
+    require(m.matches, s"Interval string must match day-time format of 'd h:m:s.n': $input, " +
+      s"$fallbackNotice")
+
+    try {
+      val sign = if (m.group(1) != null && m.group(1) == "-") -1 else 1
+      val days = if (m.group(2) == null) {
+        0
+      } else {
+        toLongWithRange(DAY, m.group(3), 0, Integer.MAX_VALUE).toInt
+      }
+      var hours: Long = 0L
+      var minutes: Long = 0L
+      var seconds: Long = 0L
+      if (m.group(5) != null || from == MINUTE) { // 'HH:mm:ss' or 'mm:ss minute'
+        hours = toLongWithRange(HOUR, m.group(5), 0, 23)
+        minutes = toLongWithRange(MINUTE, m.group(6), 0, 59)
+        seconds = toLongWithRange(SECOND, m.group(7), 0, 59)
+      } else if (m.group(8) != null) { // 'mm:ss.nn'
+        minutes = toLongWithRange(MINUTE, m.group(6), 0, 59)
+        seconds = toLongWithRange(SECOND, m.group(7), 0, 59)
+      } else { // 'HH:mm'
+        hours = toLongWithRange(HOUR, m.group(6), 0, 23)
+        minutes = toLongWithRange(SECOND, m.group(7), 0, 59)
+      }
+      // Hive allow nanosecond precision interval
+      var secondsFraction = parseNanos(m.group(9), seconds < 0)
+      to match {
+        case HOUR =>
+          minutes = 0
+          seconds = 0
+          secondsFraction = 0
+        case MINUTE =>
+          seconds = 0
+          secondsFraction = 0
+        case SECOND =>
+          // No-op
+        case _ =>
+          throw new IllegalArgumentException(
+            s"Cannot support (interval '$input' $from to $to) expression")
+      }
+      var micros = secondsFraction
+      micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR))
+      micros = Math.addExact(micros, Math.multiplyExact(minutes, MICROS_PER_MINUTE))
+      micros = Math.addExact(micros, Math.multiplyExact(seconds, MICROS_PER_SECOND))
+      new CalendarInterval(0, sign * days, sign * micros)
+    } catch {
+      case e: Exception =>
+        throw new IllegalArgumentException(
+          s"Error parsing interval day-time string: ${e.getMessage}", e)
+    }
+  }
+
+  private val signRe = "(?<sign>[+|-])"
+  private val dayRe = "(?<day>\\d+)"
+  private val hourRe = "(?<hour>\\d{1,2})"
+  private val minuteRe = "(?<minute>\\d{1,2})"
+  private val secondRe = "(?<second>(\\d{1,2})(\\.(\\d{1,9}))?)"
+
+  private val dayTimePattern = Map(
+    (MINUTE, SECOND) -> s"^$signRe?$minuteRe:$secondRe$$".r,
+    (HOUR, MINUTE) -> s"^$signRe?$hourRe:$minuteRe$$".r,
+    (HOUR, SECOND) -> s"^$signRe?$hourRe:$minuteRe:$secondRe$$".r,
+    (DAY, HOUR) -> s"^$signRe?$dayRe $hourRe$$".r,
+    (DAY, MINUTE) -> s"^$signRe?$dayRe $hourRe:$minuteRe$$".r,
+    (DAY, SECOND) -> s"^$signRe?$dayRe $hourRe:$minuteRe:$secondRe$$".r
+  )
+
+  private def unitsRange(start: IntervalUnit, end: IntervalUnit): Seq[IntervalUnit] = {
+    (start.id to end.id).map(IntervalUnit(_))
+  }
+
+  /**
+   * Parses an input string in the day-time format defined by the `from` and `to` bounds.
+   * It supports the following formats:
+   * - [+|-]D+ H[H]:m[m]:s[s][.SSSSSSSSS] for DAY TO SECOND
+   * - [+|-]D+ H[H]:m[m] for DAY TO MINUTE
+   * - [+|-]D+ H[H] for DAY TO HOUR
+   * - [+|-]H[H]:m[m]s[s][.SSSSSSSSS] for HOUR TO SECOND
+   * - [+|-]H[H]:m[m] for HOUR TO MINUTE
+   * - [+|-]m[m]:s[s][.SSSSSSSSS] for MINUTE TO SECOND
+   *
+   * Note: the seconds fraction is truncated to microseconds.
+   *
+   * @param input The input string to parse.
+   * @param from The interval unit from which the input string begins.
+   * @param to The interval unit at where the input string ends.
+   * @return an instance of `CalendarInterval` if the input string was parsed successfully
+   *         otherwise throws an exception.
+   * @throws IllegalArgumentException The input string has incorrect format and cannot be parsed.
+   * @throws ArithmeticException An interval unit value is out of valid range or the resulted
+   *                             interval fields `days` or `microseconds` are out of the valid
+   *                             ranges.
+   */
+  private def parseDayTime(
+      input: String,
+      from: IntervalUnit,
+      to: IntervalUnit): CalendarInterval = {
+    require(input != null, "Interval day-time string must be not null")
+    val regexp = dayTimePattern.get(from -> to)
+    require(regexp.isDefined, s"Cannot support (interval '$input' $from to $to) expression")
+    val pattern = regexp.get.pattern
+    val m = pattern.matcher(input)
+    require(m.matches, s"Interval string must match day-time format of '$pattern': $input, " +
+      s"$fallbackNotice")
+    var micros: Long = 0L
+    var days: Int = 0
+    unitsRange(to, from).foreach {
+      case unit @ DAY =>
+        days = toLongWithRange(unit, m.group(unit.toString), 0, Int.MaxValue).toInt
+      case unit @ HOUR =>
+        val parsed = toLongWithRange(unit, m.group(unit.toString), 0, 23)
+        micros = Math.addExact(micros, parsed * MICROS_PER_HOUR)
+      case unit @ MINUTE =>
+        val parsed = toLongWithRange(unit, m.group(unit.toString), 0, 59)
+        micros = Math.addExact(micros, parsed * MICROS_PER_MINUTE)
+      case unit @ SECOND =>
+        micros = Math.addExact(micros, parseSecondNano(m.group(unit.toString)))
+      case _ =>
+        throw new IllegalArgumentException(
+          s"Cannot support (interval '$input' $from to $to) expression")
+    }
+    val sign = if (m.group("sign") != null && m.group("sign") == "-") -1 else 1
+    new CalendarInterval(0, sign * days, sign * micros)
+  }
+
+  // Parses a string with nanoseconds, truncates the result and returns microseconds
+  private def parseNanos(nanosStr: String, isNegative: Boolean): Long = {
+    if (nanosStr != null) {
+      val maxNanosLen = 9
+      val alignedStr = if (nanosStr.length < maxNanosLen) {
+        (nanosStr + "000000000").substring(0, maxNanosLen)
+      } else nanosStr
+      val nanos = toLongWithRange(NANOSECOND, alignedStr, 0L, 999999999L)
+      val micros = nanos / NANOS_PER_MICROS
+      if (isNegative) -micros else micros
+    } else {
+      0L
+    }
+  }
+
+  /**
+   * Parse second_nano string in ss.nnnnnnnnn format to microseconds
+   */
+  private def parseSecondNano(secondNano: String): Long = {
+    def parseSeconds(secondsStr: String): Long = {
+      toLongWithRange(
+        SECOND,
+        secondsStr,
+        Long.MinValue / MICROS_PER_SECOND,
+        Long.MaxValue / MICROS_PER_SECOND) * MICROS_PER_SECOND
+    }
+
+    secondNano.split("\\.") match {
+      case Array(secondsStr) => parseSeconds(secondsStr)
+      case Array("", nanosStr) => parseNanos(nanosStr, false)
+      case Array(secondsStr, nanosStr) =>
+        val seconds = parseSeconds(secondsStr)
+        Math.addExact(seconds, parseNanos(nanosStr, seconds < 0))
+      case _ =>
+        throw new IllegalArgumentException(
+          "Interval string does not match second-nano format of ss.nnnnnnnnn")
+    }
+  }
+
+  /**
+   * Gets interval duration
+   *
+   * @param interval The interval to get duration
+   * @param targetUnit Time units of the result
+   * @param daysPerMonth The number of days per one month. The default value is 31 days
+   *                     per month. This value was taken as the default because it is used
+   *                     in Structured Streaming for watermark calculations. Having 31 days
+   *                     per month, we can guarantee that events are not dropped before
+   *                     the end of any month (February with 29 days or January with 31 days).
+   * @return Duration in the specified time units
+   */
+  def getDuration(
+      interval: CalendarInterval,
+      targetUnit: TimeUnit,
+      daysPerMonth: Int = 31): Long = {
+    val monthsDuration = Math.multiplyExact(
+      daysPerMonth * MICROS_PER_DAY,
+      interval.months)
+    val daysDuration = Math.multiplyExact(
+      MICROS_PER_DAY,
+      interval.days)
+    val result = Math.addExact(interval.microseconds, Math.addExact(daysDuration, monthsDuration))
+    targetUnit.convert(result, TimeUnit.MICROSECONDS)
+  }
+
+  /**
+   * Checks the interval is negative
+   *
+   * @param interval The checked interval
+   * @param daysPerMonth The number of days per one month. The default value is 31 days
+   *                     per month. This value was taken as the default because it is used
+   *                     in Structured Streaming for watermark calculations. Having 31 days
+   *                     per month, we can guarantee that events are not dropped before
+   *                     the end of any month (February with 29 days or January with 31 days).
+   * @return true if duration of the given interval is less than 0 otherwise false
+   */
+  def isNegative(interval: CalendarInterval, daysPerMonth: Int = 31): Boolean = {
+    getDuration(interval, TimeUnit.MICROSECONDS, daysPerMonth) < 0
+  }
+
+  /**
+   * Makes an interval from months, days and micros with the fractional part by
+   * adding the month fraction to days and the days fraction to micros.
+   *
+   * @throws ArithmeticException if the result overflows any field value
+   */
+  private def fromDoubles(
+      monthsWithFraction: Double,
+      daysWithFraction: Double,
+      microsWithFraction: Double): CalendarInterval = {
+    val truncatedMonths = Math.toIntExact(monthsWithFraction.toLong)
+    val days = daysWithFraction + DAYS_PER_MONTH * (monthsWithFraction - truncatedMonths)
+    val truncatedDays = Math.toIntExact(days.toLong)
+    val micros = microsWithFraction + MICROS_PER_DAY * (days - truncatedDays)
+    new CalendarInterval(truncatedMonths, truncatedDays, micros.round)
+  }
+
+  /**
+   * Unary minus, return the negated the calendar interval value.
+   *
+   * @throws ArithmeticException if the result overflows any field value
+   */
+  def negateExact(interval: CalendarInterval): CalendarInterval = {
+    val months = Math.negateExact(interval.months)
+    val days = Math.negateExact(interval.days)
+    val microseconds = Math.negateExact(interval.microseconds)
+    new CalendarInterval(months, days, microseconds)
+  }
+
+  /**
+   * Unary minus, return the negated the calendar interval value.
+   */
+  def negate(interval: CalendarInterval): CalendarInterval = {
+    new CalendarInterval(-interval.months, -interval.days, -interval.microseconds)
+  }
+
+  /**
+   * Return a new calendar interval instance of the sum of two intervals.
+   *
+   * @throws ArithmeticException if the result overflows any field value
+   */
+  def addExact(left: CalendarInterval, right: CalendarInterval): CalendarInterval = {
+    val months = Math.addExact(left.months, right.months)
+    val days = Math.addExact(left.days, right.days)
+    val microseconds = Math.addExact(left.microseconds, right.microseconds)
+    new CalendarInterval(months, days, microseconds)
+  }
+
+  /**
+   * Return a new calendar interval instance of the sum of two intervals.
+   */
+  def add(left: CalendarInterval, right: CalendarInterval): CalendarInterval = {
+    val months = left.months + right.months
+    val days = left.days + right.days
+    val microseconds = left.microseconds + right.microseconds
+    new CalendarInterval(months, days, microseconds)
+  }
+
+  /**
+   * Return a new calendar interval instance of the left interval minus the right one.
+   *
+   * @throws ArithmeticException if the result overflows any field value
+   */
+  def subtractExact(left: CalendarInterval, right: CalendarInterval): CalendarInterval = {
+    val months = Math.subtractExact(left.months, right.months)
+    val days = Math.subtractExact(left.days, right.days)
+    val microseconds = Math.subtractExact(left.microseconds, right.microseconds)
+    new CalendarInterval(months, days, microseconds)
+  }
+
+  /**
+   * Return a new calendar interval instance of the left interval minus the right one.
+   */
+  def subtract(left: CalendarInterval, right: CalendarInterval): CalendarInterval = {
+    val months = left.months - right.months
+    val days = left.days - right.days
+    val microseconds = left.microseconds - right.microseconds
+    new CalendarInterval(months, days, microseconds)
+  }
+
+  /**
+   * Return a new calendar interval instance of the left interval times a multiplier.
+   *
+   * @throws ArithmeticException if the result overflows any field value
+   */
+  def multiplyExact(interval: CalendarInterval, num: Double): CalendarInterval = {
+    fromDoubles(num * interval.months, num * interval.days, num * interval.microseconds)
+  }
+
+  /**
+   * Return a new calendar interval instance of the left interval divides by a dividend.
+   *
+   * @throws ArithmeticException if the result overflows any field value or divided by zero
+   */
+  def divideExact(interval: CalendarInterval, num: Double): CalendarInterval = {
+    if (num == 0) throw new ArithmeticException("divide by zero")
+    fromDoubles(interval.months / num, interval.days / num, interval.microseconds / num)
+  }
+
+  private object ParseState extends Enumeration {
+    type ParseState = Value
+
+    val PREFIX,
+        TRIM_BEFORE_SIGN,
+        SIGN,
+        TRIM_BEFORE_VALUE,
+        VALUE,
+        VALUE_FRACTIONAL_PART,
+        TRIM_BEFORE_UNIT,
+        UNIT_BEGIN,
+        UNIT_SUFFIX,
+        UNIT_END = Value
+  }
+  private final val intervalStr = UTF8String.fromString("interval")
+  private def unitToUtf8(unit: IntervalUnit): UTF8String = {
+    UTF8String.fromString(unit.toString)
+  }
+  private final val yearStr = unitToUtf8(YEAR)
+  private final val monthStr = unitToUtf8(MONTH)
+  private final val weekStr = unitToUtf8(WEEK)
+  private final val dayStr = unitToUtf8(DAY)
+  private final val hourStr = unitToUtf8(HOUR)
+  private final val minuteStr = unitToUtf8(MINUTE)
+  private final val secondStr = unitToUtf8(SECOND)
+  private final val millisStr = unitToUtf8(MILLISECOND)
+  private final val microsStr = unitToUtf8(MICROSECOND)
+
+  /**
+   * A safe version of `stringToInterval`. It returns null for invalid input string.
+   */
+  def safeStringToInterval(input: UTF8String): CalendarInterval = {
+    try {
+      stringToInterval(input)
+    } catch {
+      case _: IllegalArgumentException => null
+    }
+  }
+
+  /**
+   * Converts a string to [[CalendarInterval]] case-insensitively.
+   *
+   * @throws IllegalArgumentException if the input string is not in valid interval format.
+   */
+  def stringToInterval(input: UTF8String): CalendarInterval = {
+    import ParseState._
+    def throwIAE(msg: String, e: Exception = null) = {
+      throw new IllegalArgumentException(s"Error parsing '$input' to interval, $msg", e)
+    }
+
+    if (input == null) {
+      throwIAE("interval string cannot be null")
+    }
+    // scalastyle:off caselocale .toLowerCase
+    val s = input.trimAll().toLowerCase
+    // scalastyle:on
+    val bytes = s.getBytes
+    if (bytes.isEmpty) {
+      throwIAE("interval string cannot be empty")
+    }
+    var state = PREFIX
+    var i = 0
+    var currentValue: Long = 0
+    var isNegative: Boolean = false
+    var months: Int = 0
+    var days: Int = 0
+    var microseconds: Long = 0
+    var fractionScale: Int = 0
+    val initialFractionScale = (NANOS_PER_SECOND / 10).toInt
+    var fraction: Int = 0
+    var pointPrefixed: Boolean = false
+
+    def trimToNextState(b: Byte, next: ParseState): Unit = {
+      if (b <= ' ') {
+        i += 1
+      } else {
+        state = next
+      }
+    }
+
+    def currentWord: String = {
+      val sep = "\\s+"
+      val strings = s.toString.split(sep)
+      val lenRight = s.substring(i, s.numBytes()).toString.split(sep).length
+      strings(strings.length - lenRight)
+    }
+
+    while (i < bytes.length) {
+      val b = bytes(i)
+      state match {
+        case PREFIX =>
+          if (s.startsWith(intervalStr)) {
+            if (s.numBytes() == intervalStr.numBytes()) {
+              throwIAE("interval string cannot be empty")
+            } else if (bytes(i + intervalStr.numBytes()) > ' ') {
+              throwIAE(s"invalid interval prefix $currentWord")
+            } else {
+              i += intervalStr.numBytes() + 1
+            }
+          }
+          state = TRIM_BEFORE_SIGN
+        case TRIM_BEFORE_SIGN => trimToNextState(b, SIGN)
+        case SIGN =>
+          currentValue = 0
+          fraction = 0
+          // We preset next state from SIGN to TRIM_BEFORE_VALUE. If we meet '.' in the SIGN state,
+          // it means that the interval value we deal with here is a numeric with only fractional
+          // part, such as '.11 second', which can be parsed to 0.11 seconds. In this case, we need
+          // to reset next state to `VALUE_FRACTIONAL_PART` to go parse the fraction part of the
+          // interval value.
+          state = TRIM_BEFORE_VALUE
+          // We preset the scale to an invalid value to track fraction presence in the UNIT_BEGIN
+          // state. If we meet '.', the scale become valid for the VALUE_FRACTIONAL_PART state.
+          fractionScale = -1
+          pointPrefixed = false
+          b match {
+            case '-' =>
+              isNegative = true
+              i += 1
+            case '+' =>
+              isNegative = false
+              i += 1
+            case _ if '0' <= b && b <= '9' =>
+              isNegative = false
+            case '.' =>
+              isNegative = false
+              fractionScale = initialFractionScale
+              pointPrefixed = true
+              i += 1
+              state = VALUE_FRACTIONAL_PART
+            case _ => throwIAE( s"unrecognized number '$currentWord'")
+          }
+        case TRIM_BEFORE_VALUE => trimToNextState(b, VALUE)
+        case VALUE =>
+          b match {
+            case _ if '0' <= b && b <= '9' =>
+              try {
+                currentValue = Math.addExact(Math.multiplyExact(10, currentValue), (b - '0'))
+              } catch {
+                case e: ArithmeticException => throwIAE(e.getMessage, e)
+              }
+            case _ if b <= ' ' => state = TRIM_BEFORE_UNIT
+            case '.' =>
+              fractionScale = initialFractionScale
+              state = VALUE_FRACTIONAL_PART
+            case _ => throwIAE(s"invalid value '$currentWord'")
+          }
+          i += 1
+        case VALUE_FRACTIONAL_PART =>
+          if ('0' <= b && b <= '9' && fractionScale > 0) {
+            fraction += (b - '0') * fractionScale
+            fractionScale /= 10
+          } else if (b <= ' ' && (!pointPrefixed || fractionScale < initialFractionScale)) {
+            fraction /= NANOS_PER_MICROS.toInt
+            state = TRIM_BEFORE_UNIT
+          } else if ('0' <= b && b <= '9') {
+            throwIAE(s"interval can only support nanosecond precision, '$currentWord' is out" +
+              s" of range")
+          } else {
+            throwIAE(s"invalid value '$currentWord'")
+          }
+          i += 1
+        case TRIM_BEFORE_UNIT => trimToNextState(b, UNIT_BEGIN)
+        case UNIT_BEGIN =>
+          // Checks that only seconds can have the fractional part
+          if (b != 's' && fractionScale >= 0) {
+            throwIAE(s"'$currentWord' cannot have fractional part")
+          }
+          if (isNegative) {
+            currentValue = -currentValue
+            fraction = -fraction
+          }
+          try {
+            b match {
+              case 'y' if s.matchAt(yearStr, i) =>
+                val monthsInYears = Math.multiplyExact(MONTHS_PER_YEAR, currentValue)
+                months = Math.toIntExact(Math.addExact(months, monthsInYears))
+                i += yearStr.numBytes()
+              case 'w' if s.matchAt(weekStr, i) =>
+                val daysInWeeks = Math.multiplyExact(DAYS_PER_WEEK, currentValue)
+                days = Math.toIntExact(Math.addExact(days, daysInWeeks))
+                i += weekStr.numBytes()
+              case 'd' if s.matchAt(dayStr, i) =>
+                days = Math.addExact(days, Math.toIntExact(currentValue))
+                i += dayStr.numBytes()
+              case 'h' if s.matchAt(hourStr, i) =>
+                val hoursUs = Math.multiplyExact(currentValue, MICROS_PER_HOUR)
+                microseconds = Math.addExact(microseconds, hoursUs)
+                i += hourStr.numBytes()
+              case 's' if s.matchAt(secondStr, i) =>
+                val secondsUs = Math.multiplyExact(currentValue, MICROS_PER_SECOND)
+                microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction)
+                i += secondStr.numBytes()
+              case 'm' =>
+                if (s.matchAt(monthStr, i)) {
+                  months = Math.addExact(months, Math.toIntExact(currentValue))
+                  i += monthStr.numBytes()
+                } else if (s.matchAt(minuteStr, i)) {
+                  val minutesUs = Math.multiplyExact(currentValue, MICROS_PER_MINUTE)
+                  microseconds = Math.addExact(microseconds, minutesUs)
+                  i += minuteStr.numBytes()
+                } else if (s.matchAt(millisStr, i)) {
+                  val millisUs = Math.multiplyExact(
+                    currentValue,
+                    MICROS_PER_MILLIS)
+                  microseconds = Math.addExact(microseconds, millisUs)
+                  i += millisStr.numBytes()
+                } else if (s.matchAt(microsStr, i)) {
+                  microseconds = Math.addExact(microseconds, currentValue)
+                  i += microsStr.numBytes()
+                } else throwIAE(s"invalid unit '$currentWord'")
+              case _ => throwIAE(s"invalid unit '$currentWord'")
+            }
+          } catch {
+            case e: ArithmeticException => throwIAE(e.getMessage, e)
+          }
+          state = UNIT_SUFFIX
+        case UNIT_SUFFIX =>
+          b match {
+            case 's' => state = UNIT_END
+            case _ if b <= ' ' => state = TRIM_BEFORE_SIGN
+            case _ => throwIAE(s"invalid unit '$currentWord'")
+          }
+          i += 1
+        case UNIT_END =>
+          if (b <= ' ') {
+            i += 1
+            state = TRIM_BEFORE_SIGN
+          } else {
+            throwIAE(s"invalid unit '$currentWord'")
+          }
+      }
+    }
+
+    val result = state match {
+      case UNIT_SUFFIX | UNIT_END | TRIM_BEFORE_SIGN =>
+        new CalendarInterval(months, days, microseconds)
+      case _ => null
+    }
+
+    result
+  }
+
+  def makeInterval(
+      years: Int,
+      months: Int,
+      weeks: Int,
+      days: Int,
+      hours: Int,
+      mins: Int,
+      secs: Decimal): CalendarInterval = {
+    val totalMonths = Math.addExact(months, Math.multiplyExact(years, MONTHS_PER_YEAR))
+    val totalDays = Math.addExact(days, Math.multiplyExact(weeks, DAYS_PER_WEEK))
+    var micros = (secs * Decimal(MICROS_PER_SECOND)).toLong
+    micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR))
+    micros = Math.addExact(micros, Math.multiplyExact(mins, MICROS_PER_MINUTE))
+
+    new CalendarInterval(totalMonths, totalDays, micros)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
index 2a03f85ab594b..3a0490d07733d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
@@ -159,14 +159,72 @@ class QuantileSummaries(
       other.shallowCopy
     } else {
       // Merge the two buffers.
-      // The GK algorithm is a bit unclear about it, but it seems there is no need to adjust the
-      // statistics during the merging: the invariants are still respected after the merge.
-      // TODO: could replace full sort by ordered merge, the two lists are known to be sorted
-      // already.
-      val res = (sampled ++ other.sampled).sortBy(_.value)
-      val comp = compressImmut(res, mergeThreshold = 2 * relativeError * count)
-      new QuantileSummaries(
-        other.compressThreshold, other.relativeError, comp, other.count + count, true)
+      // The GK algorithm is a bit unclear about it, but we need to adjust the statistics during the
+      // merging. The main idea is that samples that come from one side will suffer from the lack of
+      // precision of the other.
+      // As a concrete example, take two QuantileSummaries whose samples (value, g, delta) are:
+      // `a = [(0, 1, 0), (20, 99, 0)]` and `b = [(10, 1, 0), (30, 49, 0)]`
+      // This means `a` has 100 values, whose minimum is 0 and maximum is 20,
+      // while `b` has 50 values, between 10 and 30.
+      // The resulting samples of the merge will be:
+      // a+b = [(0, 1, 0), (10, 1, ??), (20, 99, ??), (30, 49, 0)]
+      // The values of `g` do not change, as they represent the minimum number of values between two
+      // consecutive samples. The values of `delta` should be adjusted, however.
+      // Take the case of the sample `10` from `b`. In the original stream, it could have appeared
+      // right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`.
+      // In the GK algorithm's style of working in terms of maximum bounds, one can observe that the
+      // maximum additional uncertainty over samples comming from `b` is `max(g_a + delta_a) =
+      // floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is
+      // `floor(2 * eps_b * n_b)`.
+      // Only samples that interleave the other side are affected. That means that samples from
+      // one side that are lesser (or greater) than all samples from the other side are just copied
+      // unmodifed.
+      // If the merging instances have different `relativeError`, the resulting instance will cary
+      // the largest one: `eps_ab = max(eps_a, eps_b)`.
+      // The main invariant of the GK algorithm is kept:
+      // `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
+      // `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)`
+      // Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])`
+
+      val mergedSampled = new ArrayBuffer[Stats]()
+      val mergedRelativeError = math.max(relativeError, other.relativeError)
+      val mergedCount = count + other.count
+      val additionalSelfDelta = math.floor(2 * other.relativeError * other.count).toLong
+      val additionalOtherDelta = math.floor(2 * relativeError * count).toLong
+
+      // Do a merge of two sorted lists until one of the lists is fully consumed
+      var selfIdx = 0
+      var otherIdx = 0
+      while (selfIdx < sampled.length && otherIdx < other.sampled.length) {
+        val selfSample = sampled(selfIdx)
+        val otherSample = other.sampled(otherIdx)
+
+        // Detect next sample
+        val (nextSample, additionalDelta) = if (selfSample.value < otherSample.value) {
+          selfIdx += 1
+          (selfSample, if (otherIdx > 0) additionalSelfDelta else 0)
+        } else {
+          otherIdx += 1
+          (otherSample, if (selfIdx > 0) additionalOtherDelta else 0)
+        }
+
+        // Insert it
+        mergedSampled += nextSample.copy(delta = nextSample.delta + additionalDelta)
+      }
+
+      // Copy the remaining samples from the other list
+      // (by construction, at most one `while` loop will run)
+      while (selfIdx < sampled.length) {
+        mergedSampled += sampled(selfIdx)
+        selfIdx += 1
+      }
+      while (otherIdx < other.sampled.length) {
+        mergedSampled += other.sampled(otherIdx)
+        otherIdx += 1
+      }
+
+      val comp = compressImmut(mergedSampled, 2 * mergedRelativeError * mergedCount)
+      new QuantileSummaries(other.compressThreshold, mergedRelativeError, comp, mergedCount, true)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index a14ae540f5056..b42ae4e45366e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.util.concurrent.atomic.AtomicBoolean
 import java.util.regex.{Pattern, PatternSyntaxException}
 
 import scala.collection.mutable.ArrayBuffer
@@ -39,9 +38,10 @@ object StringUtils extends Logging {
    * throw an [[AnalysisException]].
    *
    * @param pattern the SQL pattern to convert
+   * @param escapeChar the escape string contains one character.
    * @return the equivalent Java regular expression of the pattern
    */
-  def escapeLikeRegex(pattern: String): String = {
+  def escapeLikeRegex(pattern: String, escapeChar: Char): String = {
     val in = pattern.toIterator
     val out = new StringBuilder()
 
@@ -50,13 +50,14 @@ object StringUtils extends Logging {
 
     while (in.hasNext) {
       in.next match {
-        case '\\' if in.hasNext =>
+        case c1 if c1 == escapeChar && in.hasNext =>
           val c = in.next
           c match {
-            case '_' | '%' | '\\' => out ++= Pattern.quote(Character.toString(c))
+            case '_' | '%' => out ++= Pattern.quote(Character.toString(c))
+            case c if c == escapeChar => out ++= Pattern.quote(Character.toString(c))
             case _ => fail(s"the escape character is not allowed to precede '$c'")
           }
-        case '\\' => fail("it is not allowed to end with the escape character")
+        case c if c == escapeChar => fail("it is not allowed to end with the escape character")
         case '_' => out ++= "."
         case '%' => out ++= ".*"
         case c => out ++= Pattern.quote(Character.toString(c))
@@ -65,16 +66,16 @@ object StringUtils extends Logging {
     "(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines
   }
 
-  // "true", "yes", "1", "false", "no", "0", and unique prefixes of these strings are accepted.
   private[this] val trueStrings =
-    Set("true", "tru", "tr", "t", "yes", "ye", "y", "on", "1").map(UTF8String.fromString)
+    Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
 
   private[this] val falseStrings =
-    Set("false", "fals", "fal", "fa", "f", "no", "n", "off", "of", "0").map(UTF8String.fromString)
+    Set("f", "false", "n", "no", "0").map(UTF8String.fromString)
 
   // scalastyle:off caselocale
-  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase.trim())
-  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase.trim())
+  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.trimAll().toLowerCase)
+
+  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.trimAll().toLowerCase)
   // scalastyle:on caselocale
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index b23cec64568df..4893a7ec91cbb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -17,14 +17,21 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.text.ParseException
+import java.text.{ParseException, ParsePosition, SimpleDateFormat}
 import java.time._
 import java.time.format.DateTimeParseException
 import java.time.temporal.ChronoField.MICRO_OF_SECOND
 import java.time.temporal.TemporalQueries
-import java.util.{Locale, TimeZone}
+import java.util.{Calendar, GregorianCalendar, Locale, TimeZone}
 import java.util.concurrent.TimeUnit.SECONDS
 
+import org.apache.commons.lang3.time.FastDateFormat
+
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{ convertSpecialTimestamp, SQLTimestamp}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.Decimal
+
 sealed trait TimestampFormatter extends Serializable {
   /**
    * Parses a timestamp in a string and converts it to microseconds.
@@ -50,14 +57,17 @@ class Iso8601TimestampFormatter(
   protected lazy val formatter = getOrCreateFormatter(pattern, locale)
 
   override def parse(s: String): Long = {
-    val parsed = formatter.parse(s)
-    val parsedZoneId = parsed.query(TemporalQueries.zone())
-    val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
-    val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
-    val epochSeconds = zonedDateTime.toEpochSecond
-    val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
-
-    Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
+    val specialDate = convertSpecialTimestamp(s.trim, zoneId)
+    specialDate.getOrElse {
+      val parsed = formatter.parse(s)
+      val parsedZoneId = parsed.query(TemporalQueries.zone())
+      val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
+      val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
+      val epochSeconds = zonedDateTime.toEpochSecond
+      val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
+
+      Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
+    }
   }
 
   override def format(us: Long): String = {
@@ -67,7 +77,7 @@ class Iso8601TimestampFormatter(
 }
 
 /**
- * The formatter parses/formats timestamps according to the pattern `yyyy-MM-dd HH:mm:ss.[..fff..]`
+ * The formatter parses/formats timestamps according to the pattern `uuuu-MM-dd HH:mm:ss.[..fff..]`
  * where `[..fff..]` is a fraction of second up to microsecond resolution. The formatter does not
  * output trailing zeros in the fraction. For example, the timestamp `2019-03-05 15:00:01.123400` is
  * formatted as the string `2019-03-05 15:00:01.1234`.
@@ -81,20 +91,139 @@ class FractionTimestampFormatter(zoneId: ZoneId)
   override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter
 }
 
+/**
+ * The custom sub-class of `GregorianCalendar` is needed to get access to
+ * protected `fields` immediately after parsing. We cannot use
+ * the `get()` method because it performs normalization of the fraction
+ * part. Accordingly, the `MILLISECOND` field doesn't contain original value.
+ *
+ * Also this class allows to set raw value to the `MILLISECOND` field
+ * directly before formatting.
+ */
+class MicrosCalendar(tz: TimeZone, digitsInFraction: Int)
+  extends GregorianCalendar(tz, Locale.US) {
+  // Converts parsed `MILLISECOND` field to seconds fraction in microsecond precision.
+  // For example if the fraction pattern is `SSSS` then `digitsInFraction` = 4, and
+  // if the `MILLISECOND` field was parsed to `1234`.
+  def getMicros(): SQLTimestamp = {
+    // Append 6 zeros to the field: 1234 -> 1234000000
+    val d = fields(Calendar.MILLISECOND) * MICROS_PER_SECOND
+    // Take the first 6 digits from `d`: 1234000000 -> 123400
+    // The rest contains exactly `digitsInFraction`: `0000` = 10 ^ digitsInFraction
+    // So, the result is `(1234 * 1000000) / (10 ^ digitsInFraction)
+    d / Decimal.POW_10(digitsInFraction)
+  }
+
+  // Converts the seconds fraction in microsecond precision to a value
+  // that can be correctly formatted according to the specified fraction pattern.
+  // The method performs operations opposite to `getMicros()`.
+  def setMicros(micros: Long): Unit = {
+    val d = micros * Decimal.POW_10(digitsInFraction)
+    fields(Calendar.MILLISECOND) = (d / MICROS_PER_SECOND).toInt
+  }
+}
+
+class LegacyFastTimestampFormatter(
+    pattern: String,
+    zoneId: ZoneId,
+    locale: Locale) extends TimestampFormatter {
+
+  @transient private lazy val fastDateFormat =
+    FastDateFormat.getInstance(pattern, TimeZone.getTimeZone(zoneId), locale)
+  @transient private lazy val cal = new MicrosCalendar(
+    fastDateFormat.getTimeZone,
+    fastDateFormat.getPattern.count(_ == 'S'))
+
+  def parse(s: String): SQLTimestamp = {
+    cal.clear() // Clear the calendar because it can be re-used many times
+    if (!fastDateFormat.parse(s, new ParsePosition(0), cal)) {
+      throw new IllegalArgumentException(s"'$s' is an invalid timestamp")
+    }
+    val micros = cal.getMicros()
+    cal.set(Calendar.MILLISECOND, 0)
+    cal.getTimeInMillis * MICROS_PER_MILLIS + micros
+  }
+
+  def format(timestamp: SQLTimestamp): String = {
+    cal.setTimeInMillis(Math.floorDiv(timestamp, MICROS_PER_SECOND) * MILLIS_PER_SECOND)
+    cal.setMicros(Math.floorMod(timestamp, MICROS_PER_SECOND))
+    fastDateFormat.format(cal)
+  }
+}
+
+class LegacySimpleTimestampFormatter(
+    pattern: String,
+    zoneId: ZoneId,
+    locale: Locale,
+    lenient: Boolean = true) extends TimestampFormatter {
+  @transient private lazy val sdf = {
+    val formatter = new SimpleDateFormat(pattern, locale)
+    formatter.setTimeZone(TimeZone.getTimeZone(zoneId))
+    formatter.setLenient(lenient)
+    formatter
+  }
+
+  override def parse(s: String): Long = {
+    sdf.parse(s).getTime * MICROS_PER_MILLIS
+  }
+
+  override def format(us: Long): String = {
+    val timestamp = DateTimeUtils.toJavaTimestamp(us)
+    sdf.format(timestamp)
+  }
+}
+
+object LegacyDateFormats extends Enumeration {
+  type LegacyDateFormat = Value
+  val FAST_DATE_FORMAT, SIMPLE_DATE_FORMAT, LENIENT_SIMPLE_DATE_FORMAT = Value
+}
+
 object TimestampFormatter {
-  val defaultPattern: String = "uuuu-MM-dd HH:mm:ss"
+  import LegacyDateFormats._
+
   val defaultLocale: Locale = Locale.US
 
-  def apply(format: String, zoneId: ZoneId, locale: Locale): TimestampFormatter = {
-    new Iso8601TimestampFormatter(format, zoneId, locale)
+  def defaultPattern(): String = s"${DateFormatter.defaultPattern()} HH:mm:ss"
+
+  private def getFormatter(
+    format: Option[String],
+    zoneId: ZoneId,
+    locale: Locale = defaultLocale,
+    legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT): TimestampFormatter = {
+
+    val pattern = format.getOrElse(defaultPattern)
+    if (SQLConf.get.legacyTimeParserEnabled) {
+      legacyFormat match {
+        case FAST_DATE_FORMAT =>
+          new LegacyFastTimestampFormatter(pattern, zoneId, locale)
+        case SIMPLE_DATE_FORMAT =>
+          new LegacySimpleTimestampFormatter(pattern, zoneId, locale, lenient = false)
+        case LENIENT_SIMPLE_DATE_FORMAT =>
+          new LegacySimpleTimestampFormatter(pattern, zoneId, locale, lenient = true)
+      }
+    } else {
+      new Iso8601TimestampFormatter(pattern, zoneId, locale)
+    }
+  }
+
+  def apply(
+    format: String,
+    zoneId: ZoneId,
+    locale: Locale,
+    legacyFormat: LegacyDateFormat): TimestampFormatter = {
+    getFormatter(Some(format), zoneId, locale, legacyFormat)
+  }
+
+  def apply(format: String, zoneId: ZoneId, legacyFormat: LegacyDateFormat): TimestampFormatter = {
+    getFormatter(Some(format), zoneId, defaultLocale, legacyFormat)
   }
 
   def apply(format: String, zoneId: ZoneId): TimestampFormatter = {
-    apply(format, zoneId, defaultLocale)
+    getFormatter(Some(format), zoneId)
   }
 
   def apply(zoneId: ZoneId): TimestampFormatter = {
-    apply(defaultPattern, zoneId, defaultLocale)
+    getFormatter(None, zoneId)
   }
 
   def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
index 9680ea3cd2067..e8266dd401362 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
 import org.apache.spark.sql.catalyst.expressions.RowOrdering
 import org.apache.spark.sql.types._
@@ -98,4 +99,18 @@ object TypeUtils {
     case _: AtomicType => true
     case _ => false
   }
+
+  def failWithIntervalType(dataType: DataType): Unit = {
+    dataType match {
+      case CalendarIntervalType =>
+        throw new AnalysisException("Cannot use interval type in the table schema.")
+      case ArrayType(et, _) => failWithIntervalType(et)
+      case MapType(kt, vt, _) =>
+        failWithIntervalType(kt)
+        failWithIntervalType(vt)
+      case s: StructType => s.foreach(f => failWithIntervalType(f.dataType))
+      case u: UserDefinedType[_] => failWithIntervalType(u.sqlType)
+      case _ =>
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
new file mode 100644
index 0000000000000..a81d8f79d6fcc
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import scala.collection.mutable
+import scala.util.control.NonFatal
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * A thread-safe manager for [[CatalogPlugin]]s. It tracks all the registered catalogs, and allow
+ * the caller to look up a catalog by name.
+ *
+ * There are still many commands (e.g. ANALYZE TABLE) that do not support v2 catalog API. They
+ * ignore the current catalog and blindly go to the v1 `SessionCatalog`. To avoid tracking current
+ * namespace in both `SessionCatalog` and `CatalogManger`, we let `CatalogManager` to set/get
+ * current database of `SessionCatalog` when the current catalog is the session catalog.
+ */
+// TODO: all commands should look up table from the current catalog. The `SessionCatalog` doesn't
+//       need to track current database at all.
+private[sql]
+class CatalogManager(
+    conf: SQLConf,
+    defaultSessionCatalog: CatalogPlugin,
+    val v1SessionCatalog: SessionCatalog) extends Logging {
+  import CatalogManager.SESSION_CATALOG_NAME
+
+  private val catalogs = mutable.HashMap.empty[String, CatalogPlugin]
+
+  def catalog(name: String): CatalogPlugin = synchronized {
+    if (name.equalsIgnoreCase(SESSION_CATALOG_NAME)) {
+      v2SessionCatalog
+    } else {
+      catalogs.getOrElseUpdate(name, Catalogs.load(name, conf))
+    }
+  }
+
+  def isCatalogRegistered(name: String): Boolean = {
+    try {
+      catalog(name)
+      true
+    } catch {
+      case _: CatalogNotFoundException => false
+    }
+  }
+
+  private def loadV2SessionCatalog(): CatalogPlugin = {
+    Catalogs.load(SESSION_CATALOG_NAME, conf) match {
+      case extension: CatalogExtension =>
+        extension.setDelegateCatalog(defaultSessionCatalog)
+        extension
+      case other => other
+    }
+  }
+
+  /**
+   * If the V2_SESSION_CATALOG config is specified, we try to instantiate the user-specified v2
+   * session catalog. Otherwise, return the default session catalog.
+   *
+   * This catalog is a v2 catalog that delegates to the v1 session catalog. it is used when the
+   * session catalog is responsible for an identifier, but the source requires the v2 catalog API.
+   * This happens when the source implementation extends the v2 TableProvider API and is not listed
+   * in the fallback configuration, spark.sql.sources.write.useV1SourceList
+   */
+  private[sql] def v2SessionCatalog: CatalogPlugin = {
+    conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).map { customV2SessionCatalog =>
+      try {
+        catalogs.getOrElseUpdate(SESSION_CATALOG_NAME, loadV2SessionCatalog())
+      } catch {
+        case NonFatal(_) =>
+          logError(
+            "Fail to instantiate the custom v2 session catalog: " + customV2SessionCatalog)
+          defaultSessionCatalog
+      }
+    }.getOrElse(defaultSessionCatalog)
+  }
+
+  private var _currentNamespace: Option[Array[String]] = None
+
+  def currentNamespace: Array[String] = synchronized {
+    _currentNamespace.getOrElse {
+      if (currentCatalog.name() == SESSION_CATALOG_NAME) {
+        Array(v1SessionCatalog.getCurrentDatabase)
+      } else {
+        currentCatalog.defaultNamespace()
+      }
+    }
+  }
+
+  def setCurrentNamespace(namespace: Array[String]): Unit = synchronized {
+    if (currentCatalog.name() == SESSION_CATALOG_NAME) {
+      if (namespace.length != 1) {
+        throw new NoSuchNamespaceException(namespace)
+      }
+      v1SessionCatalog.setCurrentDatabase(namespace.head)
+    } else {
+      _currentNamespace = Some(namespace)
+    }
+  }
+
+  private var _currentCatalogName: Option[String] = None
+
+  def currentCatalog: CatalogPlugin = synchronized {
+    catalog(_currentCatalogName.getOrElse(conf.getConf(SQLConf.DEFAULT_CATALOG)))
+  }
+
+  def setCurrentCatalog(catalogName: String): Unit = synchronized {
+    // `setCurrentCatalog` is noop if it doesn't switch to a different catalog.
+    if (currentCatalog.name() != catalogName) {
+      _currentCatalogName = Some(catalogName)
+      _currentNamespace = None
+      // Reset the current database of v1 `SessionCatalog` when switching current catalog, so that
+      // when we switch back to session catalog, the current namespace definitely is ["default"].
+      v1SessionCatalog.setCurrentDatabase(SessionCatalog.DEFAULT_DATABASE)
+    }
+  }
+
+  // Clear all the registered catalogs. Only used in tests.
+  private[sql] def reset(): Unit = synchronized {
+    catalogs.clear()
+    _currentNamespace = None
+    _currentCatalogName = None
+    v1SessionCatalog.setCurrentDatabase(SessionCatalog.DEFAULT_DATABASE)
+  }
+}
+
+private[sql] object CatalogManager {
+  val SESSION_CATALOG_NAME: String = "spark_catalog"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogNotFoundException.scala
similarity index 95%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogNotFoundException.scala
index 86de1c9285b73..d376b98afa415 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogNotFoundException.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2
+package org.apache.spark.sql.connector.catalog
 
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Experimental
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
similarity index 58%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index f512cd5e23c6b..3478af8783af6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -15,19 +15,23 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2
+package org.apache.spark.sql.connector.catalog
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.expressions.{BucketTransform, IdentityTransform, LogicalExpressions, Transform}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.connector.expressions.{BucketTransform, IdentityTransform, LogicalExpressions, Transform}
 
 /**
  * Conversion helpers for working with v2 [[CatalogPlugin]].
  */
-object CatalogV2Implicits {
-  implicit class PartitionTypeHelper(partitionType: StructType) {
-    def asTransforms: Array[Transform] = partitionType.names.map(LogicalExpressions.identity)
+private[sql] object CatalogV2Implicits {
+  import LogicalExpressions._
+
+  implicit class PartitionTypeHelper(colNames: Seq[String]) {
+    def asTransforms: Array[Transform] = {
+      colNames.map(col => identity(reference(Seq(col)))).toArray
+    }
   }
 
   implicit class BucketSpecHelper(spec: BucketSpec) {
@@ -37,7 +41,8 @@ object CatalogV2Implicits {
           s"Cannot convert bucketing with sort columns to a transform: $spec")
       }
 
-      LogicalExpressions.bucket(spec.numBuckets, spec.bucketColumnNames: _*)
+      val references = spec.bucketColumnNames.map(col => reference(Seq(col)))
+      bucket(spec.numBuckets, references.toArray)
     }
   }
 
@@ -68,6 +73,14 @@ object CatalogV2Implicits {
       case _ =>
         throw new AnalysisException(s"Cannot use catalog ${plugin.name}: not a TableCatalog")
     }
+
+    def asNamespaceCatalog: SupportsNamespaces = plugin match {
+      case namespaceCatalog: SupportsNamespaces =>
+        namespaceCatalog
+      case _ =>
+        throw new AnalysisException(
+          s"Cannot use catalog ${plugin.name}: does not support namespaces")
+    }
   }
 
   implicit class NamespaceHelper(namespace: Array[String]) {
@@ -82,13 +95,37 @@ object CatalogV2Implicits {
         quote(ident.name)
       }
     }
+
+    def asMultipartIdentifier: Seq[String] = ident.namespace :+ ident.name
+
+    def asTableIdentifier: TableIdentifier = ident.namespace match {
+      case ns if ns.isEmpty => TableIdentifier(ident.name)
+      case Array(dbName) => TableIdentifier(ident.name, Some(dbName))
+      case _ =>
+        throw new AnalysisException(
+          s"$quoted is not a valid TableIdentifier as it has more than 2 name parts.")
+    }
   }
 
-  implicit class MultipartIdentifierHelper(namespace: Seq[String]) {
-    def quoted: String = namespace.map(quote).mkString(".")
+  implicit class MultipartIdentifierHelper(parts: Seq[String]) {
+    if (parts.isEmpty) {
+      throw new AnalysisException("multi-part identifier cannot be empty.")
+    }
+
+    def asIdentifier: Identifier = Identifier.of(parts.init.toArray, parts.last)
+
+    def asTableIdentifier: TableIdentifier = parts match {
+      case Seq(tblName) => TableIdentifier(tblName)
+      case Seq(dbName, tblName) => TableIdentifier(tblName, Some(dbName))
+      case _ =>
+        throw new AnalysisException(
+          s"$quoted is not a valid TableIdentifier as it has more than 2 name parts.")
+    }
+
+    def quoted: String = parts.map(quote).mkString(".")
   }
 
-  private def quote(part: String): String = {
+  def quote(part: String): String = {
     if (part.contains(".") || part.contains("`")) {
       s"`${part.replace("`", "``")}`"
     } else {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/utils/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
similarity index 57%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/utils/CatalogV2Util.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index d5079202c8fee..0fabe4df6c9a4 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/utils/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -15,21 +15,52 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.utils
+package org.apache.spark.sql.connector.catalog
 
 import java.util
 import java.util.Collections
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, NamespaceChange, TableChange}
-import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RemoveProperty, RenameColumn, SetProperty, UpdateColumnComment, UpdateColumnType}
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException}
-import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.catalyst.analysis.{NamedRelation, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, UnresolvedV2Relation}
+import org.apache.spark.sql.catalyst.plans.logical.AlterTable
+import org.apache.spark.sql.connector.catalog.TableChange._
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
 
-object CatalogV2Util {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+private[sql] object CatalogV2Util {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  /**
+   * The list of reserved table properties, which can not be removed or changed directly by
+   * the syntax:
+   * {{
+   *   ALTER TABLE ... SET TBLPROPERTIES ...
+   * }}
+   *
+   * They need specific syntax to modify
+   */
+  val TABLE_RESERVED_PROPERTIES =
+    Seq(TableCatalog.PROP_COMMENT,
+      TableCatalog.PROP_LOCATION,
+      TableCatalog.PROP_PROVIDER,
+      TableCatalog.PROP_OWNER)
+
+  /**
+   * The list of reserved namespace properties, which can not be removed or changed directly by
+   * the syntax:
+   * {{
+   *   ALTER NAMESPACE ... SET PROPERTIES ...
+   * }}
+   *
+   * They need specific syntax to modify
+   */
+  val NAMESPACE_RESERVED_PROPERTIES =
+    Seq(SupportsNamespaces.PROP_COMMENT,
+      SupportsNamespaces.PROP_LOCATION,
+      SupportsNamespaces.PROP_OWNER)
 
   /**
    * Apply properties changes to a map and return the result.
@@ -102,26 +133,16 @@ object CatalogV2Util {
         case add: AddColumn =>
           add.fieldNames match {
             case Array(name) =>
-              val newField = StructField(name, add.dataType, nullable = add.isNullable)
-              Option(add.comment) match {
-                case Some(comment) =>
-                  schema.add(newField.withComment(comment))
-                case _ =>
-                  schema.add(newField)
-              }
+              val field = StructField(name, add.dataType, nullable = add.isNullable)
+              val newField = Option(add.comment).map(field.withComment).getOrElse(field)
+              addField(schema, newField, add.position())
 
             case names =>
               replace(schema, names.init, parent => parent.dataType match {
                 case parentType: StructType =>
                   val field = StructField(names.last, add.dataType, nullable = add.isNullable)
-                  val newParentType = Option(add.comment) match {
-                    case Some(comment) =>
-                      parentType.add(field.withComment(comment))
-                    case None =>
-                      parentType.add(field)
-                  }
-
-                  Some(StructField(parent.name, newParentType, parent.nullable, parent.metadata))
+                  val newField = Option(add.comment).map(field.withComment).getOrElse(field)
+                  Some(parent.copy(dataType = addField(parentType, newField, add.position())))
 
                 case _ =>
                   throw new IllegalArgumentException(s"Not a struct: ${names.init.last}")
@@ -134,17 +155,39 @@ object CatalogV2Util {
 
         case update: UpdateColumnType =>
           replace(schema, update.fieldNames, field => {
-            if (!update.isNullable && field.nullable) {
-              throw new IllegalArgumentException(
-                s"Cannot change optional column to required: $field.name")
-            }
-            Some(StructField(field.name, update.newDataType, update.isNullable, field.metadata))
+            Some(field.copy(dataType = update.newDataType))
+          })
+
+        case update: UpdateColumnNullability =>
+          replace(schema, update.fieldNames, field => {
+            Some(field.copy(nullable = update.nullable))
           })
 
         case update: UpdateColumnComment =>
           replace(schema, update.fieldNames, field =>
             Some(field.withComment(update.newComment)))
 
+        case update: UpdateColumnPosition =>
+          def updateFieldPos(struct: StructType, name: String): StructType = {
+            val oldField = struct.fields.find(_.name == name).getOrElse {
+              throw new IllegalArgumentException("Field not found: " + name)
+            }
+            val withFieldRemoved = StructType(struct.fields.filter(_ != oldField))
+            addField(withFieldRemoved, oldField, update.position())
+          }
+
+          update.fieldNames() match {
+            case Array(name) =>
+              updateFieldPos(schema, name)
+            case names =>
+              replace(schema, names.init, parent => parent.dataType match {
+                case parentType: StructType =>
+                  Some(parent.copy(dataType = updateFieldPos(parentType, names.last)))
+                case _ =>
+                  throw new IllegalArgumentException(s"Not a struct: ${names.init.last}")
+              })
+          }
+
         case delete: DeleteColumn =>
           replace(schema, delete.fieldNames, _ => None)
 
@@ -155,6 +198,25 @@ object CatalogV2Util {
     }
   }
 
+  private def addField(
+      schema: StructType,
+      field: StructField,
+      position: ColumnPosition): StructType = {
+    if (position == null) {
+      schema.add(field)
+    } else if (position.isInstanceOf[First]) {
+      StructType(field +: schema.fields)
+    } else {
+      val afterCol = position.asInstanceOf[After].column()
+      val fieldIndex = schema.fields.indexWhere(_.name == afterCol)
+      if (fieldIndex == -1) {
+        throw new IllegalArgumentException("AFTER column not found: " + afterCol)
+      }
+      val (before, after) = schema.fields.splitAt(fieldIndex + 1)
+      StructType(before ++ (field +: after))
+    }
+  }
+
   private def replace(
       struct: StructType,
       fieldNames: Seq[String],
@@ -222,4 +284,50 @@ object CatalogV2Util {
       case _: NoSuchDatabaseException => None
       case _: NoSuchNamespaceException => None
     }
+
+  def loadRelation(catalog: CatalogPlugin, ident: Identifier): Option[NamedRelation] = {
+    loadTable(catalog, ident).map(DataSourceV2Relation.create(_, Some(catalog), Some(ident)))
+  }
+
+  def isSessionCatalog(catalog: CatalogPlugin): Boolean = {
+    catalog.name().equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)
+  }
+
+  def convertTableProperties(
+      properties: Map[String, String],
+      options: Map[String, String],
+      location: Option[String],
+      comment: Option[String],
+      provider: String): Map[String, String] = {
+    properties ++
+      options ++
+      Map(TableCatalog.PROP_PROVIDER -> provider) ++
+      comment.map(TableCatalog.PROP_COMMENT -> _) ++
+      location.map(TableCatalog.PROP_LOCATION -> _)
+  }
+
+  def withDefaultOwnership(properties: Map[String, String]): Map[String, String] = {
+    properties ++ Map(TableCatalog.PROP_OWNER -> Utils.getCurrentUserName())
+  }
+
+  def createAlterTable(
+      originalNameParts: Seq[String],
+      catalog: CatalogPlugin,
+      tableName: Seq[String],
+      changes: Seq[TableChange]): AlterTable = {
+    val tableCatalog = catalog.asTableCatalog
+    val ident = tableName.asIdentifier
+    val unresolved = UnresolvedV2Relation(originalNameParts, tableCatalog, ident)
+    AlterTable(tableCatalog, ident, unresolved, changes)
+  }
+
+  def getTableProviderCatalog(
+      provider: SupportsCatalogOptions,
+      catalogManager: CatalogManager,
+      options: CaseInsensitiveStringMap): TableCatalog = {
+    Option(provider.extractCatalog(options))
+      .map(catalogManager.catalog)
+      .getOrElse(catalogManager.v2SessionCatalog)
+      .asTableCatalog
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
new file mode 100644
index 0000000000000..080ddf1d027e9
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+
+/**
+ * A trait to encapsulate catalog lookup function and helpful extractors.
+ */
+private[sql] trait LookupCatalog extends Logging {
+
+  protected val catalogManager: CatalogManager
+
+  /**
+   * Returns the current catalog set.
+   */
+  def currentCatalog: CatalogPlugin = catalogManager.currentCatalog
+
+  /**
+   * Extract catalog plugin and remaining identifier names.
+   *
+   * This does not substitute the default catalog if no catalog is set in the identifier.
+   */
+  private object CatalogAndMultipartIdentifier {
+    def unapply(parts: Seq[String]): Some[(Option[CatalogPlugin], Seq[String])] = parts match {
+      case Seq(_) =>
+        Some((None, parts))
+      case Seq(catalogName, tail @ _*) =>
+        try {
+          Some((Some(catalogManager.catalog(catalogName)), tail))
+        } catch {
+          case _: CatalogNotFoundException =>
+            Some((None, parts))
+        }
+    }
+  }
+
+  /**
+   * Extract session catalog and identifier from a multi-part identifier.
+   */
+  object SessionCatalogAndIdentifier {
+    def unapply(parts: Seq[String]): Option[(CatalogPlugin, Identifier)] = parts match {
+      case CatalogAndIdentifier(catalog, ident) if CatalogV2Util.isSessionCatalog(catalog) =>
+        Some(catalog, ident)
+      case _ => None
+    }
+  }
+
+  /**
+   * Extract non-session catalog and identifier from a multi-part identifier.
+   */
+  object NonSessionCatalogAndIdentifier {
+    def unapply(parts: Seq[String]): Option[(CatalogPlugin, Identifier)] = parts match {
+      case CatalogAndIdentifier(catalog, ident) if !CatalogV2Util.isSessionCatalog(catalog) =>
+        Some(catalog, ident)
+      case _ => None
+    }
+  }
+
+  /**
+   * Extract catalog and namespace from a multi-part name with the current catalog if needed.
+   * Catalog name takes precedence over namespaces.
+   */
+  object CatalogAndNamespace {
+    def unapply(nameParts: Seq[String]): Some[(CatalogPlugin, Seq[String])] = {
+      assert(nameParts.nonEmpty)
+      try {
+        Some((catalogManager.catalog(nameParts.head), nameParts.tail))
+      } catch {
+        case _: CatalogNotFoundException =>
+          Some((currentCatalog, nameParts))
+      }
+    }
+  }
+
+  /**
+   * Extract catalog and identifier from a multi-part name with the current catalog if needed.
+   * Catalog name takes precedence over identifier, but for a single-part name, identifier takes
+   * precedence over catalog name.
+   */
+  object CatalogAndIdentifier {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+
+    private val globalTempDB = SQLConf.get.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+
+    def unapply(nameParts: Seq[String]): Option[(CatalogPlugin, Identifier)] = {
+      assert(nameParts.nonEmpty)
+      if (nameParts.length == 1) {
+        // If the current catalog is session catalog, the current namespace is not used because
+        // the single-part name could be referencing a temp view, which doesn't belong to any
+        // namespaces. An empty namespace will be resolved inside the session catalog
+        // implementation when a relation is looked up.
+        val ns = if (CatalogV2Util.isSessionCatalog(currentCatalog)) {
+          Array.empty[String]
+        } else {
+          catalogManager.currentNamespace
+        }
+        Some((currentCatalog, Identifier.of(ns, nameParts.head)))
+      } else if (nameParts.head.equalsIgnoreCase(globalTempDB)) {
+        // Conceptually global temp views are in a special reserved catalog. However, the v2 catalog
+        // API does not support view yet, and we have to use v1 commands to deal with global temp
+        // views. To simplify the implementation, we put global temp views in a special namespace
+        // in the session catalog. The special namespace has higher priority during name resolution.
+        // For example, if the name of a custom catalog is the same with `GLOBAL_TEMP_DATABASE`,
+        // this custom catalog can't be accessed.
+        Some((catalogManager.v2SessionCatalog, nameParts.asIdentifier))
+      } else {
+        try {
+          Some((catalogManager.catalog(nameParts.head), nameParts.tail.asIdentifier))
+        } catch {
+          case _: CatalogNotFoundException =>
+            Some((currentCatalog, nameParts.asIdentifier))
+        }
+      }
+    }
+  }
+
+  /**
+   * Extract legacy table identifier from a multi-part identifier.
+   *
+   * For legacy support only. Please use [[CatalogAndIdentifier]] instead on DSv2 code paths.
+   */
+  object AsTableIdentifier {
+    def unapply(parts: Seq[String]): Option[TableIdentifier] = parts match {
+      case CatalogAndMultipartIdentifier(None, names)
+          if CatalogV2Util.isSessionCatalog(currentCatalog) =>
+        names match {
+          case Seq(name) =>
+            Some(TableIdentifier(name))
+          case Seq(database, name) =>
+            Some(TableIdentifier(name, Some(database)))
+          case _ =>
+            None
+        }
+      case _ =>
+        None
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/internal/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
similarity index 84%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/internal/V1Table.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index ddfc77d0cc0ac..91e0c58a1c6d0 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/internal/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -15,23 +15,22 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.internal
+package org.apache.spark.sql.connector.catalog
 
 import java.util
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalog.v2.expressions.{LogicalExpressions, Transform}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.sources.v2.{Table, TableCapability}
+import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
 import org.apache.spark.sql.types.StructType
 
 /**
  * An implementation of catalog v2 `Table` to expose v1 table metadata.
  */
-case class V1Table(v1Table: CatalogTable) extends Table {
+private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
   implicit class IdentifierHelper(identifier: TableIdentifier) {
     def quoted: String = {
       identifier.database match {
@@ -68,14 +67,15 @@ case class V1Table(v1Table: CatalogTable) extends Table {
   override lazy val schema: StructType = v1Table.schema
 
   override lazy val partitioning: Array[Transform] = {
+    import CatalogV2Implicits._
     val partitions = new mutable.ArrayBuffer[Transform]()
 
     v1Table.partitionColumnNames.foreach { col =>
-      partitions += LogicalExpressions.identity(col)
+      partitions += LogicalExpressions.identity(LogicalExpressions.reference(Seq(col)))
     }
 
     v1Table.bucketSpec.foreach { spec =>
-      partitions += LogicalExpressions.bucket(spec.numBuckets, spec.bucketColumnNames: _*)
+      partitions += spec.asTransform
     }
 
     partitions.toArray
@@ -85,5 +85,5 @@ case class V1Table(v1Table: CatalogTable) extends Table {
 
   override def capabilities: util.Set[TableCapability] = new util.HashSet[TableCapability]()
 
-  override def toString: String = s"UnresolvedTable($name)"
+  override def toString: String = s"V1Table($name)"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
similarity index 89%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
index bceea147dddd5..84adddf2671f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions
+package org.apache.spark.sql.connector.expressions
 
 import org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -40,23 +40,25 @@ private[sql] object LogicalExpressions {
 
   def literal[T](value: T, dataType: DataType): LiteralValue[T] = LiteralValue(value, dataType)
 
-  def reference(name: String): NamedReference =
+  def parseReference(name: String): NamedReference =
     FieldReference(parser.parseMultipartIdentifier(name))
 
+  def reference(nameParts: Seq[String]): NamedReference = FieldReference(nameParts)
+
   def apply(name: String, arguments: Expression*): Transform = ApplyTransform(name, arguments)
 
-  def bucket(numBuckets: Int, columns: String*): BucketTransform =
-    BucketTransform(literal(numBuckets, IntegerType), columns.map(reference))
+  def bucket(numBuckets: Int, references: Array[NamedReference]): BucketTransform =
+    BucketTransform(literal(numBuckets, IntegerType), references)
 
-  def identity(column: String): IdentityTransform = IdentityTransform(reference(column))
+  def identity(reference: NamedReference): IdentityTransform = IdentityTransform(reference)
 
-  def years(column: String): YearsTransform = YearsTransform(reference(column))
+  def years(reference: NamedReference): YearsTransform = YearsTransform(reference)
 
-  def months(column: String): MonthsTransform = MonthsTransform(reference(column))
+  def months(reference: NamedReference): MonthsTransform = MonthsTransform(reference)
 
-  def days(column: String): DaysTransform = DaysTransform(reference(column))
+  def days(reference: NamedReference): DaysTransform = DaysTransform(reference)
 
-  def hours(column: String): HoursTransform = HoursTransform(reference(column))
+  def hours(reference: NamedReference): HoursTransform = HoursTransform(reference)
 }
 
 /**
@@ -253,7 +255,7 @@ private[sql] final case class LiteralValue[T](value: T, dataType: DataType) exte
 }
 
 private[sql] final case class FieldReference(parts: Seq[String]) extends NamedReference {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.MultipartIdentifierHelper
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
   override def fieldNames: Array[String] = parts.toArray
   override def describe: String = parts.quoted
   override def toString: String = describe
@@ -261,6 +263,6 @@ private[sql] final case class FieldReference(parts: Seq[String]) extends NamedRe
 
 private[sql] object FieldReference {
   def apply(column: String): NamedReference = {
-    LogicalExpressions.reference(column)
+    LogicalExpressions.parseReference(column)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DeleteFromStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/LogicalWriteInfoImpl.scala
similarity index 70%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DeleteFromStatement.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/LogicalWriteInfoImpl.scala
index 21e24127eee31..b1492e4298102 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DeleteFromStatement.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/LogicalWriteInfoImpl.scala
@@ -15,13 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.plans.logical.sql
+package org.apache.spark.sql.connector.write
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-case class DeleteFromStatement(
-    tableName: Seq[String],
-    tableAlias: Option[String],
-    condition: Expression)
-    extends ParsedStatement
+private[sql] case class LogicalWriteInfoImpl(
+    queryId: String,
+    schema: StructType,
+    options: CaseInsensitiveStringMap) extends LogicalWriteInfo
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DescribeColumnStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/PhysicalWriteInfoImpl.scala
similarity index 80%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DescribeColumnStatement.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/PhysicalWriteInfoImpl.scala
index 06b9f3e9a9a3e..a663822f3eb45 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DescribeColumnStatement.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/PhysicalWriteInfoImpl.scala
@@ -15,9 +15,6 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.plans.logical.sql
+package org.apache.spark.sql.connector.write
 
-case class DescribeColumnStatement(
-    tableName: Seq[String],
-    colNameParts: Seq[String],
-    isExtended: Boolean) extends ParsedStatement
+private[sql] case class PhysicalWriteInfoImpl(numPartitions: Int) extends PhysicalWriteInfo
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RowIterator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/RowIterator.scala
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/RowIterator.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/execution/RowIterator.scala
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
index 2d59c42ee8684..86ef867eca547 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import scala.collection.JavaConverters._
+
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.sources.v2.{SupportsDelete, SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.{SupportsDelete, SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object DataSourceV2Implicits {
   implicit class TableHelper(table: Table) {
@@ -53,4 +56,10 @@ object DataSourceV2Implicits {
 
     def supportsAny(capabilities: TableCapability*): Boolean = capabilities.exists(supports)
   }
+
+  implicit class OptionsHelper(options: Map[String, String]) {
+    def asOptions: CaseInsensitiveStringMap = {
+      new CaseInsensitiveStringMap(options.asJava)
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 9ae3dbbc45502..45d89498f5ae9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -21,22 +21,28 @@ import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelat
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.{Statistics => V2Statistics, _}
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
-import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCapability}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, Statistics => V2Statistics, SupportsReportStatistics}
+import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
 
 /**
  * A logical plan representing a data source v2 table.
  *
  * @param table   The table that this relation represents.
+ * @param output the output attributes of this relation.
+ * @param catalog catalogPlugin for the table. None if no catalog is specified.
+ * @param identifier the identifier for the table. None if no identifier is defined.
  * @param options The options for this table operation. It's used to create fresh [[ScanBuilder]]
  *                and [[WriteBuilder]].
  */
 case class DataSourceV2Relation(
     table: Table,
     output: Seq[AttributeReference],
+    catalog: Option[CatalogPlugin],
+    identifier: Option[Identifier],
     options: CaseInsensitiveStringMap)
   extends LeafNode with MultiInstanceRelation with NamedRelation {
 
@@ -50,12 +56,53 @@ case class DataSourceV2Relation(
     s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
   }
 
-  def newScanBuilder(): ScanBuilder = {
-    table.asReadable.newScanBuilder(options)
+  override def computeStats(): Statistics = {
+    if (Utils.isTesting) {
+      // when testing, throw an exception if this computeStats method is called because stats should
+      // not be accessed before pushing the projection and filters to create a scan. otherwise, the
+      // stats are not accurate because they are based on a full table scan of all columns.
+      throw new IllegalStateException(
+        s"BUG: computeStats called before pushdown on DSv2 relation: $name")
+    } else {
+      // when not testing, return stats because bad stats are better than failing a query
+      table.asReadable.newScanBuilder(options) match {
+        case r: SupportsReportStatistics =>
+          val statistics = r.estimateStatistics()
+          DataSourceV2Relation.transformV2Stats(statistics, None, conf.defaultSizeInBytes)
+        case _ =>
+          Statistics(sizeInBytes = conf.defaultSizeInBytes)
+      }
+    }
+  }
+
+  override def newInstance(): DataSourceV2Relation = {
+    copy(output = output.map(_.newInstance()))
+  }
+}
+
+/**
+ * A logical plan for a DSv2 table with a scan already created.
+ *
+ * This is used in the optimizer to push filters and projection down before conversion to physical
+ * plan. This ensures that the stats that are used by the optimizer account for the filters and
+ * projection that will be pushed down.
+ *
+ * @param table a DSv2 [[Table]]
+ * @param scan a DSv2 [[Scan]]
+ * @param output the output attributes of this relation
+ */
+case class DataSourceV2ScanRelation(
+    table: Table,
+    scan: Scan,
+    output: Seq[AttributeReference]) extends LeafNode with NamedRelation {
+
+  override def name: String = table.name()
+
+  override def simpleString(maxFields: Int): String = {
+    s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
   }
 
   override def computeStats(): Statistics = {
-    val scan = newScanBuilder().build()
     scan match {
       case r: SupportsReportStatistics =>
         val statistics = r.estimateStatistics()
@@ -64,10 +111,6 @@ case class DataSourceV2Relation(
         Statistics(sizeInBytes = conf.defaultSizeInBytes)
     }
   }
-
-  override def newInstance(): DataSourceV2Relation = {
-    copy(output = output.map(_.newInstance()))
-  }
 }
 
 /**
@@ -99,12 +142,20 @@ case class StreamingDataSourceV2Relation(
 }
 
 object DataSourceV2Relation {
-  def create(table: Table, options: CaseInsensitiveStringMap): DataSourceV2Relation = {
+  def create(
+      table: Table,
+      catalog: Option[CatalogPlugin],
+      identifier: Option[Identifier],
+      options: CaseInsensitiveStringMap): DataSourceV2Relation = {
     val output = table.schema().toAttributes
-    DataSourceV2Relation(table, output, options)
+    DataSourceV2Relation(table, output, catalog, identifier, options)
   }
 
-  def create(table: Table): DataSourceV2Relation = create(table, CaseInsensitiveStringMap.empty)
+  def create(
+      table: Table,
+      catalog: Option[CatalogPlugin],
+      identifier: Option[Identifier]): DataSourceV2Relation =
+    create(table, catalog, identifier, CaseInsensitiveStringMap.empty)
 
   /**
    * This is used to transform data source v2 statistics to logical.Statistics.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 006bb99e59392..2214e03f34f0b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -31,12 +31,13 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.{SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.{IGNORE_MISSING_FILES => SPARK_IGNORE_MISSING_FILES}
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
 import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler
-import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
 
@@ -89,11 +90,16 @@ object SQLConf {
   }
 
   def withExistingConf[T](conf: SQLConf)(f: => T): T = {
+    val old = existingConf.get()
     existingConf.set(conf)
     try {
       f
     } finally {
-      existingConf.remove()
+      if (old != null) {
+        existingConf.set(old)
+      } else {
+        existingConf.remove()
+      }
     }
   }
 
@@ -115,7 +121,9 @@ object SQLConf {
    * Returns the active config object within the current scope. If there is an active SparkSession,
    * the proper SQLConf associated with the thread's active session is used. If it's called from
    * tasks in the executor side, a SQLConf will be created from job local properties, which are set
-   * and propagated from the driver side.
+   * and propagated from the driver side, unless a `SQLConf` has been set in the scope by
+   * `withExistingConf` as done for propagating SQLConf for operations performed on RDDs created
+   * from DataFrames.
    *
    * The way this works is a little bit convoluted, due to the fact that config was added initially
    * only for physical plans (and as a result not in sql/catalyst module).
@@ -129,10 +137,16 @@ object SQLConf {
    */
   def get: SQLConf = {
     if (TaskContext.get != null) {
-      new ReadOnlySQLConf(TaskContext.get())
+      val conf = existingConf.get()
+      if (conf != null) {
+        conf
+      } else {
+        new ReadOnlySQLConf(TaskContext.get())
+      }
     } else {
       val isSchedulerEventLoopThread = SparkContext.getActive
-        .map(_.dagScheduler.eventProcessLoop.eventThread)
+        .flatMap { sc => Option(sc.dagScheduler) }
+        .map(_.eventProcessLoop.eventThread)
         .exists(_.getId == Thread.currentThread().getId)
       if (isSchedulerEventLoopThread) {
         // DAGScheduler event loop thread does not have an active SparkSession, the `confGetter`
@@ -147,11 +161,22 @@ object SQLConf {
           confGetter.get()()
         }
       } else {
-        confGetter.get()()
+        val conf = existingConf.get()
+        if (conf != null) {
+          conf
+        } else {
+          confGetter.get()()
+        }
       }
     }
   }
 
+  val ANALYZER_MAX_ITERATIONS = buildConf("spark.sql.analyzer.maxIterations")
+    .internal()
+    .doc("The max number of iterations the analyzer runs.")
+    .intConf
+    .createWithDefault(100)
+
   val OPTIMIZER_EXCLUDED_RULES = buildConf("spark.sql.optimizer.excludedRules")
     .doc("Configures a list of rules to be disabled in the optimizer, in which the rules are " +
       "specified by their rule names and separated by comma. It is not guaranteed that all the " +
@@ -162,7 +187,7 @@ object SQLConf {
 
   val OPTIMIZER_MAX_ITERATIONS = buildConf("spark.sql.optimizer.maxIterations")
     .internal()
-    .doc("The max number of iterations the optimizer and analyzer runs.")
+    .doc("The max number of iterations the optimizer runs.")
     .intConf
     .createWithDefault(100)
 
@@ -209,13 +234,6 @@ object SQLConf {
     .stringConf
     .createOptional
 
-  val OPTIMIZER_REASSIGN_LAMBDA_VARIABLE_ID =
-    buildConf("spark.sql.optimizer.reassignLambdaVariableID")
-      .doc("When true, Spark optimizer reassigns per-query unique IDs to LambdaVariable, so that " +
-        "it's more likely to hit codegen cache.")
-    .booleanConf
-    .createWithDefault(true)
-
   val DYNAMIC_PARTITION_PRUNING_ENABLED =
     buildConf("spark.sql.optimizer.dynamicPartitionPruning.enabled")
       .doc("When true, we will generate predicate for partition column when it's used as join key")
@@ -241,11 +259,11 @@ object SQLConf {
     .doubleConf
     .createWithDefault(0.5)
 
-  val DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST =
-    buildConf("spark.sql.optimizer.dynamicPartitionPruning.reuseBroadcast")
+  val DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY =
+    buildConf("spark.sql.optimizer.dynamicPartitionPruning.reuseBroadcastOnly")
       .internal()
-      .doc("When true, dynamic partition pruning will seek to reuse the broadcast results from " +
-        "a broadcast hash join operation.")
+      .doc("When true, dynamic partition pruning will only apply when the broadcast exchange of " +
+        "a broadcast hash join operation can be reused as the dynamic pruning filter.")
       .booleanConf
       .createWithDefault(true)
 
@@ -306,11 +324,11 @@ object SQLConf {
     .doc("Configures the maximum size in bytes for a table that will be broadcast to all worker " +
       "nodes when performing a join.  By setting this value to -1 broadcasting can be disabled. " +
       "Note that currently statistics are only supported for Hive Metastore tables where the " +
-      "command <code>ANALYZE TABLE &lt;tableName&gt; COMPUTE STATISTICS noscan</code> has been " +
+      "command `ANALYZE TABLE <tableName> COMPUTE STATISTICS noscan` has been " +
       "run, and file-based data source tables where the statistics are computed directly on " +
       "the files of data.")
     .bytesConf(ByteUnit.BYTE)
-    .createWithDefault(10L * 1024 * 1024)
+    .createWithDefaultString("10MB")
 
   val LIMIT_SCALE_UP_FACTOR = buildConf("spark.sql.limit.scaleUpFactor")
     .internal()
@@ -335,42 +353,117 @@ object SQLConf {
     .checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be positive")
     .createWithDefault(200)
 
-  val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE =
-    buildConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize")
-      .doc("The target post-shuffle input size in bytes of a task.")
-      .bytesConf(ByteUnit.BYTE)
-      .createWithDefault(64 * 1024 * 1024)
-
   val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled")
     .doc("When true, enable adaptive query execution.")
     .booleanConf
     .createWithDefault(false)
 
-  val REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED =
-    buildConf("spark.sql.adaptive.reducePostShufflePartitions.enabled")
-    .doc("When true and adaptive execution is enabled, this enables reducing the number of " +
-      "post-shuffle partitions based on map output statistics.")
+  val ADAPTIVE_EXECUTION_FORCE_APPLY = buildConf("spark.sql.adaptive.forceApply")
+    .internal()
+    .doc("Adaptive query execution is skipped when the query does not have exchanges or " +
+      "sub-queries. By setting this config to true (together with " +
+      s"'${ADAPTIVE_EXECUTION_ENABLED.key}' enabled), Spark will force apply adaptive query " +
+      "execution for all supported queries.")
     .booleanConf
-    .createWithDefault(true)
+    .createWithDefault(false)
+
+  val REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED =
+    buildConf("spark.sql.adaptive.shuffle.reducePostShufflePartitions.enabled")
+      .doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is enabled, this enables reducing " +
+        "the number of post-shuffle partitions based on map output statistics.")
+      .booleanConf
+      .createWithDefault(true)
+
+  val FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED =
+    buildConf("spark.sql.adaptive.shuffle.fetchShuffleBlocksInBatch.enabled")
+      .doc("Whether to fetch the continuous shuffle blocks in batch. Instead of fetching blocks " +
+        "one by one, fetching continuous shuffle blocks for the same map task in batch can " +
+        "reduce IO and improve performance. Note, multiple continuous blocks exist in single " +
+        s"fetch request only happen when '${ADAPTIVE_EXECUTION_ENABLED.key}' and " +
+        s"'${REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED.key}' is enabled, this feature also depends " +
+        "on a relocatable serializer, the concatenation support codec in use and the new version " +
+        "shuffle fetch protocol.")
+      .booleanConf
+      .createWithDefault(true)
 
   val SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS =
-    buildConf("spark.sql.adaptive.minNumPostShufflePartitions")
-      .doc("The advisory minimum number of post-shuffle partitions used in adaptive execution.")
+    buildConf("spark.sql.adaptive.shuffle.minNumPostShufflePartitions")
+      .doc("The advisory minimum number of post-shuffle partitions used when " +
+        s"'${ADAPTIVE_EXECUTION_ENABLED.key}' and " +
+        s"'${REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED.key}' is enabled.")
       .intConf
       .checkValue(_ > 0, "The minimum shuffle partition number " +
         "must be a positive integer.")
       .createWithDefault(1)
 
+  val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE =
+    buildConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize")
+      .doc("The target post-shuffle input size in bytes of a task. This configuration only has " +
+        s"an effect when '${ADAPTIVE_EXECUTION_ENABLED.key}' and " +
+        s"'${REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED.key}' is enabled.")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("64MB")
+
   val SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS =
-    buildConf("spark.sql.adaptive.maxNumPostShufflePartitions")
+    buildConf("spark.sql.adaptive.shuffle.maxNumPostShufflePartitions")
       .doc("The advisory maximum number of post-shuffle partitions used in adaptive execution. " +
         "This is used as the initial number of pre-shuffle partitions. By default it equals to " +
-        "spark.sql.shuffle.partitions")
+        "spark.sql.shuffle.partitions. This configuration only has an effect when " +
+        s"'${ADAPTIVE_EXECUTION_ENABLED.key}' and " +
+        s"'${REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED.key}' is enabled.")
       .intConf
       .checkValue(_ > 0, "The maximum shuffle partition number " +
         "must be a positive integer.")
       .createOptional
 
+  val LOCAL_SHUFFLE_READER_ENABLED =
+    buildConf("spark.sql.adaptive.shuffle.localShuffleReader.enabled")
+    .doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is enabled, this enables the " +
+      "optimization of converting the shuffle reader to local shuffle reader for the shuffle " +
+      "exchange of the broadcast hash join in probe side.")
+    .booleanConf
+    .createWithDefault(true)
+
+  val ADAPTIVE_EXECUTION_SKEWED_JOIN_ENABLED =
+    buildConf("spark.sql.adaptive.optimizeSkewedJoin.enabled")
+    .doc("When true and adaptive execution is enabled, a skewed join is automatically handled at " +
+      "runtime.")
+    .booleanConf
+    .createWithDefault(true)
+
+  val ADAPTIVE_EXECUTION_SKEWED_PARTITION_SIZE_THRESHOLD =
+    buildConf("spark.sql.adaptive.optimizeSkewedJoin.skewedPartitionSizeThreshold")
+      .doc("Configures the minimum size in bytes for a partition that is considered as a skewed " +
+        "partition in adaptive skewed join.")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("64MB")
+
+  val ADAPTIVE_EXECUTION_SKEWED_PARTITION_FACTOR =
+    buildConf("spark.sql.adaptive.optimizeSkewedJoin.skewedPartitionFactor")
+      .doc("A partition is considered as a skewed partition if its size is larger than" +
+        " this factor multiple the median partition size and also larger than " +
+        s" ${ADAPTIVE_EXECUTION_SKEWED_PARTITION_SIZE_THRESHOLD.key}")
+      .intConf
+      .createWithDefault(10)
+
+  val ADAPTIVE_EXECUTION_SKEWED_PARTITION_MAX_SPLITS =
+    buildConf("spark.sql.adaptive.optimizeSkewedJoin.skewedPartitionMaxSplits")
+      .doc("Configures the maximum number of task to handle a skewed partition in adaptive skewed" +
+        "join.")
+      .intConf
+      .checkValue( _ >= 1, "The split size at least be 1")
+      .createWithDefault(5)
+
+  val NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN =
+    buildConf("spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin")
+      .doc("The relation with a non-empty partition ratio lower than this config will not be " +
+        "considered as the build side of a broadcast-hash join in adaptive execution regardless " +
+        "of its size.This configuration only has an effect when " +
+        s"'${ADAPTIVE_EXECUTION_ENABLED.key}' is enabled.")
+      .doubleConf
+      .checkValue(_ >= 0, "The non-empty partition ratio must be positive number.")
+      .createWithDefault(0.2)
+
   val SUBEXPRESSION_ELIMINATION_ENABLED =
     buildConf("spark.sql.subexpressionElimination.enabled")
       .internal()
@@ -394,12 +487,6 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
-  val ANSI_SQL_PARSER =
-    buildConf("spark.sql.parser.ansi.enabled")
-      .doc("When true, tries to conform to ANSI SQL syntax.")
-      .booleanConf
-      .createWithDefault(false)
-
   val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals")
     .internal()
     .doc("When true, string literals (including regex patterns) remain escaped in our SQL " +
@@ -470,14 +557,6 @@ object SQLConf {
     .checkValues(ParquetOutputTimestampType.values.map(_.toString))
     .createWithDefault(ParquetOutputTimestampType.TIMESTAMP_MICROS.toString)
 
-  val PARQUET_INT64_AS_TIMESTAMP_MILLIS = buildConf("spark.sql.parquet.int64AsTimestampMillis")
-    .doc(s"(Deprecated since Spark 2.3, please set ${PARQUET_OUTPUT_TIMESTAMP_TYPE.key}.) " +
-      "When true, timestamp values will be stored as INT64 with TIMESTAMP_MILLIS as the " +
-      "extended type. In this mode, the microsecond portion of the timestamp value will be" +
-      "truncated.")
-    .booleanConf
-    .createWithDefault(false)
-
   val PARQUET_COMPRESSION = buildConf("spark.sql.parquet.compression.codec")
     .doc("Sets the compression codec used when writing Parquet files. If either `compression` or " +
       "`parquet.compression` is specified in the table-specific options/properties, the " +
@@ -554,8 +633,9 @@ object SQLConf {
   val PARQUET_OUTPUT_COMMITTER_CLASS = buildConf("spark.sql.parquet.output.committer.class")
     .doc("The output committer class used by Parquet. The specified class needs to be a " +
       "subclass of org.apache.hadoop.mapreduce.OutputCommitter. Typically, it's also a subclass " +
-      "of org.apache.parquet.hadoop.ParquetOutputCommitter. If it is not, then metadata summaries" +
-      "will never be created, irrespective of the value of parquet.summary.metadata.level")
+      "of org.apache.parquet.hadoop.ParquetOutputCommitter. If it is not, then metadata " +
+      "summaries will never be created, irrespective of the value of " +
+      "parquet.summary.metadata.level")
     .internal()
     .stringConf
     .createWithDefault("org.apache.parquet.hadoop.ParquetOutputCommitter")
@@ -592,8 +672,8 @@ object SQLConf {
     .createWithDefault("snappy")
 
   val ORC_IMPLEMENTATION = buildConf("spark.sql.orc.impl")
-    .doc("When native, use the native version of ORC support instead of the ORC library in Hive " +
-      "1.2.1. It is 'hive' by default prior to Spark 2.4.")
+    .doc("When native, use the native version of ORC support instead of the ORC library in Hive. " +
+      "It is 'hive' by default prior to Spark 2.4.")
     .internal()
     .stringConf
     .checkValues(Set("hive", "native"))
@@ -624,7 +704,7 @@ object SQLConf {
   val HIVE_VERIFY_PARTITION_PATH = buildConf("spark.sql.hive.verifyPartitionPath")
     .doc("When true, check all the partition paths under the table\'s root directory " +
          "when reading data stored in HDFS. This configuration will be deprecated in the future " +
-         "releases and replaced by spark.files.ignoreMissingFiles.")
+         s"releases and replaced by ${SPARK_IGNORE_MISSING_FILES.key}.")
     .booleanConf
     .createWithDefault(false)
 
@@ -687,18 +767,10 @@ object SQLConf {
     .stringConf
     .createWithDefault("_corrupt_record")
 
-  val FROM_JSON_FORCE_NULLABLE_SCHEMA = buildConf("spark.sql.fromJsonForceNullableSchema")
-    .internal()
-    .doc("When true, force the output schema of the from_json() function to be nullable " +
-      "(including all the fields). Otherwise, the schema might not be compatible with" +
-      "actual data, which leads to corruptions. This config will be removed in Spark 3.0.")
-    .booleanConf
-    .createWithDefault(true)
-
   val BROADCAST_TIMEOUT = buildConf("spark.sql.broadcastTimeout")
     .doc("Timeout in seconds for the broadcast wait time in broadcast joins.")
     .timeConf(TimeUnit.SECONDS)
-    .createWithDefault(5 * 60)
+    .createWithDefaultString(s"${5 * 60}")
 
   // This is only used for the thriftserver
   val THRIFTSERVER_POOL = buildConf("spark.sql.thriftserver.scheduler.pool")
@@ -758,7 +830,7 @@ object SQLConf {
     .createWithDefault(true)
 
   val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets")
-    .doc("The maximum number of buckets allowed. Defaults to 100000")
+    .doc("The maximum number of buckets allowed.")
     .intConf
     .checkValue(_ > 0, "the value of spark.sql.sources.bucketing.maxBuckets must be greater than 0")
     .createWithDefault(100000)
@@ -821,6 +893,17 @@ object SQLConf {
       .intConf
       .createWithDefault(10000)
 
+  val IGNORE_DATA_LOCALITY =
+    buildConf("spark.sql.sources.ignoreDataLocality.enabled")
+      .doc("If true, Spark will not fetch the block locations for each file on " +
+        "listing files. This speeds up file listing, but the scheduler cannot " +
+        "schedule tasks to take advantage of data locality. It can be particularly " +
+        "useful if data is read from a remote cluster so the scheduler could never " +
+        "take advantage of locality anyway.")
+      .internal()
+      .booleanConf
+      .createWithDefault(false)
+
   // Whether to automatically resolve ambiguity in join conditions for self-joins.
   // See SPARK-6231.
   val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY =
@@ -829,8 +912,8 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val FAIL_AMBIGUOUS_SELF_JOIN =
-    buildConf("spark.sql.analyzer.failAmbiguousSelfJoin")
+  val FAIL_AMBIGUOUS_SELF_JOIN_ENABLED =
+    buildConf("spark.sql.analyzer.failAmbiguousSelfJoin.enabled")
       .doc("When true, fail the Dataset query if it contains ambiguous self-join.")
       .internal()
       .booleanConf
@@ -935,28 +1018,35 @@ object SQLConf {
       .createWithDefault(true)
 
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
-    .doc("The maximum number of bytes to pack into a single partition when reading files.")
+    .doc("The maximum number of bytes to pack into a single partition when reading files. " +
+      "This configuration is effective only when using file-based sources such as Parquet, JSON " +
+      "and ORC.")
     .bytesConf(ByteUnit.BYTE)
-    .createWithDefault(128 * 1024 * 1024) // parquet.block.size
+    .createWithDefaultString("128MB") // parquet.block.size
 
   val FILES_OPEN_COST_IN_BYTES = buildConf("spark.sql.files.openCostInBytes")
     .internal()
     .doc("The estimated cost to open a file, measured by the number of bytes could be scanned in" +
       " the same time. This is used when putting multiple files into a partition. It's better to" +
       " over estimated, then the partitions with small files will be faster than partitions with" +
-      " bigger files (which is scheduled first).")
+      " bigger files (which is scheduled first). This configuration is effective only when using" +
+      " file-based sources such as Parquet, JSON and ORC.")
     .longConf
     .createWithDefault(4 * 1024 * 1024)
 
   val IGNORE_CORRUPT_FILES = buildConf("spark.sql.files.ignoreCorruptFiles")
     .doc("Whether to ignore corrupt files. If true, the Spark jobs will continue to run when " +
-      "encountering corrupted files and the contents that have been read will still be returned.")
+      "encountering corrupted files and the contents that have been read will still be returned. " +
+      "This configuration is effective only when using file-based sources such as Parquet, JSON " +
+      "and ORC.")
     .booleanConf
     .createWithDefault(false)
 
   val IGNORE_MISSING_FILES = buildConf("spark.sql.files.ignoreMissingFiles")
     .doc("Whether to ignore missing files. If true, the Spark jobs will continue to run when " +
-      "encountering missing files and the contents that have been read will still be returned.")
+      "encountering missing files and the contents that have been read will still be returned. " +
+      "This configuration is effective only when using file-based sources such as Parquet, JSON " +
+      "and ORC.")
     .booleanConf
     .createWithDefault(false)
 
@@ -972,7 +1062,7 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
-  val SUBQUERY_REUSE_ENABLED = buildConf("spark.sql.subquery.reuse")
+  val SUBQUERY_REUSE_ENABLED = buildConf("spark.sql.execution.subquery.reuse.enabled")
     .internal()
     .doc("When true, the planner will try to find out duplicated subqueries and re-use them.")
     .booleanConf
@@ -1012,7 +1102,7 @@ object SQLConf {
     .createOptional
 
   val FORCE_DELETE_TEMP_CHECKPOINT_LOCATION =
-    buildConf("spark.sql.streaming.forceDeleteTempCheckpointLocation")
+    buildConf("spark.sql.streaming.forceDeleteTempCheckpointLocation.enabled")
       .doc("When true, enable temporary checkpoint locations force delete.")
       .booleanConf
       .createWithDefault(false)
@@ -1042,6 +1132,25 @@ object SQLConf {
       .checkValue(v => Set(1, 2).contains(v), "Valid versions are 1 and 2")
       .createWithDefault(2)
 
+  val STREAMING_STOP_ACTIVE_RUN_ON_RESTART =
+    buildConf("spark.sql.streaming.stopActiveRunOnRestart")
+    .doc("Running multiple runs of the same streaming query concurrently is not supported. " +
+      "If we find a concurrent active run for a streaming query (in the same or different " +
+      "SparkSessions on the same cluster) and this flag is true, we will stop the old streaming " +
+      "query run to start the new one.")
+    .booleanConf
+    .createWithDefault(true)
+
+  val STREAMING_JOIN_STATE_FORMAT_VERSION =
+    buildConf("spark.sql.streaming.join.stateFormatVersion")
+      .internal()
+      .doc("State format version used by streaming join operations in a streaming query. " +
+        "State between versions are tend to be incompatible, so state format version shouldn't " +
+        "be modified after running.")
+      .intConf
+      .checkValue(v => Set(1, 2).contains(v), "Valid versions are 1 and 2")
+      .createWithDefault(2)
+
   val UNSUPPORTED_OPERATION_CHECK_ENABLED =
     buildConf("spark.sql.streaming.unsupportedOperationCheck")
       .internal()
@@ -1052,24 +1161,18 @@ object SQLConf {
 
   val VARIABLE_SUBSTITUTE_ENABLED =
     buildConf("spark.sql.variable.substitute")
-      .doc("This enables substitution using syntax like ${var} ${system:var} and ${env:var}.")
+      .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " +
+        "and `${env:var}`.")
       .booleanConf
       .createWithDefault(true)
 
-  val VARIABLE_SUBSTITUTE_DEPTH =
-    buildConf("spark.sql.variable.substitute.depth")
-      .internal()
-      .doc("Deprecated: The maximum replacements the substitution engine will do.")
-      .intConf
-      .createWithDefault(40)
-
   val ENABLE_TWOLEVEL_AGG_MAP =
     buildConf("spark.sql.codegen.aggregate.map.twolevel.enabled")
       .internal()
       .doc("Enable two-level aggregate hash map. When enabled, records will first be " +
         "inserted/looked-up at a 1st-level, small, fast map, and then fallback to a " +
         "2nd-level, larger, slower map when 1st level is full or keys cannot be found. " +
-        "When disabled, records go directly to the 2nd level. Defaults to true.")
+        "When disabled, records go directly to the 2nd level.")
       .booleanConf
       .createWithDefault(true)
 
@@ -1080,6 +1183,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val CODEGEN_SPLIT_AGGREGATE_FUNC =
+    buildConf("spark.sql.codegen.aggregate.splitAggregateFunc.enabled")
+      .internal()
+      .doc("When true, the code generator would split aggregate code into individual methods " +
+        "instead of a single big method. This can be used to avoid oversized function that " +
+        "can miss the opportunity of JIT optimization.")
+      .booleanConf
+      .createWithDefault(true)
+
   val MAX_NESTED_VIEW_DEPTH =
     buildConf("spark.sql.view.maxNestedViewDepth")
       .internal()
@@ -1103,8 +1215,8 @@ object SQLConf {
     buildConf("spark.sql.streaming.multipleWatermarkPolicy")
       .doc("Policy to calculate the global watermark value when there are multiple watermark " +
         "operators in a streaming query. The default value is 'min' which chooses " +
-        "the minimum watermark reported across multiple operators. Other alternative value is" +
-        "'max' which chooses the maximum across multiple operators." +
+        "the minimum watermark reported across multiple operators. Other alternative value is " +
+        "'max' which chooses the maximum across multiple operators. " +
         "Note: This configuration cannot be changed between query restarts from the same " +
         "checkpoint location.")
       .stringConf
@@ -1133,6 +1245,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val JSON_GENERATOR_IGNORE_NULL_FIELDS =
+    buildConf("spark.sql.jsonGenerator.ignoreNullFields")
+      .doc("Whether to ignore null fields when generating JSON objects in JSON data source and " +
+        "JSON functions such as to_json. " +
+        "If false, it generates null for null fields in JSON objects.")
+      .booleanConf
+      .createWithDefault(true)
+
   val FILE_SINK_LOG_DELETION = buildConf("spark.sql.streaming.fileSink.log.deletion")
     .internal()
     .doc("Whether to delete the expired log files in file stream sink.")
@@ -1184,6 +1304,12 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val FILE_SOURCE_CLEANER_NUM_THREADS =
+    buildConf("spark.sql.streaming.fileSource.cleaner.numThreads")
+      .doc("Number of threads used in the file source completed file cleaner.")
+      .intConf
+      .createWithDefault(1)
+
   val STREAMING_SCHEMA_INFERENCE =
     buildConf("spark.sql.streaming.schemaInference")
       .internal()
@@ -1198,6 +1324,13 @@ object SQLConf {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefault(10L)
 
+  val STREAMING_STOP_TIMEOUT =
+    buildConf("spark.sql.streaming.stopTimeout")
+      .doc("How long to wait in milliseconds for the streaming execution thread to stop when " +
+        "calling the streaming query's stop() method. 0 or negative values wait indefinitely.")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("0")
+
   val STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL =
     buildConf("spark.sql.streaming.noDataProgressEventInterval")
       .internal()
@@ -1244,7 +1377,7 @@ object SQLConf {
     buildConf("spark.sql.statistics.parallelFileListingInStatsComputation.enabled")
       .internal()
       .doc("When true, SQL commands use parallel file listing, " +
-        "as opposed to single thread listing." +
+        "as opposed to single thread listing. " +
         "This usually speeds up commands that need to list many directories.")
       .booleanConf
       .createWithDefault(true)
@@ -1317,6 +1450,12 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val PLAN_STATS_ENABLED =
+    buildConf("spark.sql.cbo.planStats.enabled")
+      .doc("When true, the logical plan will fetch row counts and column statistics from catalog.")
+      .booleanConf
+      .createWithDefault(false)
+
   val JOIN_REORDER_ENABLED =
     buildConf("spark.sql.cbo.joinReorder.enabled")
       .doc("Enables join reorder in CBO.")
@@ -1471,23 +1610,14 @@ object SQLConf {
       .createWithDefault(10000)
 
   val PANDAS_UDF_BUFFER_SIZE =
-    buildConf("spark.sql.pandas.udf.buffer.size")
+    buildConf("spark.sql.execution.pandas.udf.buffer.size")
       .doc(
-        s"Same as ${BUFFER_SIZE} but only applies to Pandas UDF executions. If it is not set, " +
-        s"the fallback is ${BUFFER_SIZE}. Note that Pandas execution requires more than 4 bytes. " +
-        "Lowering this value could make small Pandas UDF batch iterated and pipelined; however, " +
-        "it might degrade performance. See SPARK-27870.")
+        s"Same as `${BUFFER_SIZE.key}` but only applies to Pandas UDF executions. If it is not " +
+        s"set, the fallback is `${BUFFER_SIZE.key}`. Note that Pandas execution requires more " +
+        "than 4 bytes. Lowering this value could make small Pandas UDF batch iterated and " +
+        "pipelined; however, it might degrade performance. See SPARK-27870.")
       .fallbackConf(BUFFER_SIZE)
 
-  val PANDAS_RESPECT_SESSION_LOCAL_TIMEZONE =
-    buildConf("spark.sql.execution.pandas.respectSessionTimeZone")
-      .internal()
-      .doc("When true, make Pandas DataFrame with timestamp type respecting session local " +
-        "timezone when converting to/from Pandas DataFrame. This configuration will be " +
-        "deprecated in the future releases.")
-      .booleanConf
-      .createWithDefault(true)
-
   val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME =
     buildConf("spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName")
       .internal()
@@ -1531,16 +1661,6 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val DECIMAL_OPERATIONS_NULL_ON_OVERFLOW =
-    buildConf("spark.sql.decimalOperations.nullOnOverflow")
-      .internal()
-      .doc("When true (default), if an overflow on a decimal occurs, then NULL is returned. " +
-        "Spark's older versions and Hive behave in this way. If turned to false, SQL ANSI 2011 " +
-        "specification will be followed instead: an arithmetic exception is thrown, as most " +
-        "of the SQL databases do.")
-      .booleanConf
-      .createWithDefault(true)
-
   val LITERAL_PICK_MINIMUM_PRECISION =
     buildConf("spark.sql.legacy.literal.pickMinimumPrecision")
       .internal()
@@ -1569,28 +1689,13 @@ object SQLConf {
 
   val CONCAT_BINARY_AS_STRING = buildConf("spark.sql.function.concatBinaryAsString")
     .doc("When this option is set to false and all inputs are binary, `functions.concat` returns " +
-      "an output as binary. Otherwise, it returns as a string. ")
+      "an output as binary. Otherwise, it returns as a string.")
     .booleanConf
     .createWithDefault(false)
 
   val ELT_OUTPUT_AS_STRING = buildConf("spark.sql.function.eltOutputAsString")
     .doc("When this option is set to false and all inputs are binary, `elt` returns " +
-      "an output as binary. Otherwise, it returns as a string. ")
-    .booleanConf
-    .createWithDefault(false)
-
-  val PREFER_INTEGRAL_DIVISION = buildConf("spark.sql.function.preferIntegralDivision")
-    .internal()
-    .doc("When true, will perform integral division with the / operator " +
-      "if both sides are integral types. This is for PostgreSQL test cases only.")
-    .booleanConf
-    .createWithDefault(false)
-
-  val ALLOW_CREATING_MANAGED_TABLE_USING_NONEMPTY_LOCATION =
-    buildConf("spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation")
-    .internal()
-    .doc("When this option is set to true, creating managed tables with nonempty location " +
-      "is allowed. Otherwise, an analysis exception is thrown. ")
+      "an output as binary. Otherwise, it returns as a string.")
     .booleanConf
     .createWithDefault(false)
 
@@ -1598,7 +1703,7 @@ object SQLConf {
     buildConf("spark.sql.sources.validatePartitionColumns")
       .internal()
       .doc("When this option is set to true, partition column values will be validated with " +
-        "user-specified schema. If the validation fails, a runtime exception is thrown." +
+        "user-specified schema. If the validation fails, a runtime exception is thrown. " +
         "When this option is set to false, the partition column value will be converted to null " +
         "if it can not be casted to corresponding user-specified schema.")
       .booleanConf
@@ -1633,7 +1738,7 @@ object SQLConf {
       "implementation class names for which Data Source V2 code path is disabled. These data " +
       "sources will fallback to Data Source V1 code path.")
     .stringConf
-    .createWithDefault("")
+    .createWithDefault("avro,csv,json,kafka,orc,parquet,text")
 
   val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
     .doc("A comma-separated list of fully qualified data source register class names for which" +
@@ -1695,7 +1800,15 @@ object SQLConf {
       .stringConf
       .transform(_.toUpperCase(Locale.ROOT))
       .checkValues(StoreAssignmentPolicy.values.map(_.toString))
-      .createOptional
+      .createWithDefault(StoreAssignmentPolicy.ANSI.toString)
+
+  val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled")
+    .doc("When true, Spark tries to conform to the ANSI SQL specification: 1. Spark will " +
+      "throw a runtime exception if an overflow occurs in any operation on integral/decimal " +
+      "field. 2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in " +
+      "the SQL parser.")
+    .booleanConf
+    .createWithDefault(false)
 
   val SORT_BEFORE_REPARTITION =
     buildConf("spark.sql.execution.sortBeforeRepartition")
@@ -1717,7 +1830,7 @@ object SQLConf {
         "reading unnecessary nested column data. Currently Parquet and ORC are the " +
         "data sources that implement this optimization.")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED =
     buildConf("spark.sql.optimizer.serializer.nestedSchemaPruning.enabled")
@@ -1726,7 +1839,7 @@ object SQLConf {
         "satisfying a query. This optimization allows object serializers to avoid " +
         "executing unnecessary nested expressions.")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val NESTED_PRUNING_ON_EXPRESSIONS =
     buildConf("spark.sql.optimizer.expression.nestedPruning.enabled")
@@ -1736,7 +1849,7 @@ object SQLConf {
         "physical data source scanning. For pruning nested fields from scanning, please use " +
         "`spark.sql.optimizer.nestedSchemaPruning.enabled` config.")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val TOP_K_SORT_FALLBACK_THRESHOLD =
     buildConf("spark.sql.execution.topKSortFallbackThreshold")
@@ -1812,24 +1925,16 @@ object SQLConf {
     .checkValues((1 to 9).toSet + Deflater.DEFAULT_COMPRESSION)
     .createWithDefault(Deflater.DEFAULT_COMPRESSION)
 
-  val COMPARE_DATE_TIMESTAMP_IN_TIMESTAMP =
-    buildConf("spark.sql.legacy.compareDateTimestampInTimestamp")
-      .internal()
-      .doc("When true (default), compare Date with Timestamp after converting both sides to " +
-        "Timestamp. This behavior is compatible with Hive 2.2 or later. See HIVE-15236. " +
-        "When false, restore the behavior prior to Spark 2.4. Compare Date with Timestamp after " +
-        "converting both sides to string. This config will be removed in Spark 3.0.")
-      .booleanConf
-      .createWithDefault(true)
-
   val LEGACY_SIZE_OF_NULL = buildConf("spark.sql.legacy.sizeOfNull")
+    .internal()
     .doc("If it is set to true, size of null returns -1. This behavior was inherited from Hive. " +
       "The size function returns null for null input if the flag is disabled.")
     .booleanConf
-    .createWithDefault(true)
+    .createWithDefault(false)
 
   val LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED =
     buildConf("spark.sql.legacy.replaceDatabricksSparkAvro.enabled")
+      .internal()
       .doc("If it is set to true, the data source provider com.databricks.spark.avro is mapped " +
         "to the built-in but external Avro data source module for backward compatibility.")
       .booleanConf
@@ -1845,6 +1950,32 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_EXPONENT_LITERAL_AS_DECIMAL_ENABLED =
+    buildConf("spark.sql.legacy.exponentLiteralAsDecimal.enabled")
+      .internal()
+      .doc("When set to true, a literal with an exponent (e.g. 1E-30) would be parsed " +
+        "as Decimal rather than Double.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED =
+    buildConf("spark.sql.legacy.allowNegativeScaleOfDecimal.enabled")
+      .internal()
+      .doc("When set to true, negative scale of Decimal type is allowed. For example, " +
+        "the type of number 1E10BD under legacy mode is DecimalType(2, -9), but is " +
+        "Decimal(11, 0) in non legacy mode.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED =
+    buildConf("spark.sql.legacy.createHiveTableByDefault.enabled")
+      .internal()
+      .doc("When set to true, CREATE TABLE syntax without a provider will use hive " +
+        s"instead of the value of ${DEFAULT_DATA_SOURCE_NAME.key}.")
+      .booleanConf
+      .createWithDefault(false)
+
+
   val LEGACY_INTEGRALDIVIDE_RETURN_LONG = buildConf("spark.sql.legacy.integralDivide.returnBigint")
     .doc("If it is set to true, the div operator returns always a bigint. This behavior was " +
       "inherited from Hive. Otherwise, the return type is the data type of the operands.")
@@ -1860,15 +1991,6 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val FAIL_ON_INTEGRAL_TYPE_OVERFLOW =
-    buildConf("spark.sql.failOnIntegralTypeOverflow")
-      .doc("If it is set to true, all operations on integral fields throw an " +
-        "exception if an overflow occurs. If it is false (default), in case of overflow a wrong " +
-        "result is returned.")
-      .internal()
-      .booleanConf
-      .createWithDefault(false)
-
   val LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE =
     buildConf("spark.sql.legacy.parser.havingWithoutGroupByAsWhere")
       .internal()
@@ -1877,6 +1999,31 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_ALLOW_EMPTY_STRING_IN_JSON =
+    buildConf("spark.sql.legacy.json.allowEmptyString.enabled")
+      .internal()
+      .doc("When set to true, the parser of JSON data source treats empty strings as null for " +
+      "some data types such as `IntegerType`.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE =
+    buildConf("spark.sql.legacy.createEmptyCollectionUsingStringType")
+      .internal()
+      .doc("When set to true, Spark returns an empty collection with `StringType` as element " +
+        "type if the `array`/`map` function is called without any parameters. Otherwise, Spark " +
+        "returns an empty collection with `NullType` as element type.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val TRUNCATE_TABLE_IGNORE_PERMISSION_ACL =
+    buildConf("spark.sql.truncateTable.ignorePermissionAcl.enabled")
+      .internal()
+      .doc("When set to true, TRUNCATE TABLE command will not try to set back original " +
+        "permission and ACLs when re-creating the table/partition paths.")
+      .booleanConf
+      .createWithDefault(false)
+
   val NAME_NON_STRUCT_GROUPING_KEY_AS_VALUE =
     buildConf("spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue")
       .internal()
@@ -1902,7 +2049,7 @@ object SQLConf {
     .checkValue(i => i >= 0 && i <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH, "Invalid " +
       "value for 'spark.sql.maxPlanStringLength'.  Length must be a valid string length " +
       "(nonnegative and shorter than the maximum size).")
-    .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
+    .createWithDefaultString(s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}")
 
   val SET_COMMAND_REJECTS_SPARK_CORE_CONFS =
     buildConf("spark.sql.legacy.setCommandRejectsSparkCoreConfs")
@@ -1920,12 +2067,6 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val UTC_TIMESTAMP_FUNC_ENABLED = buildConf("spark.sql.legacy.utcTimestampFunc.enabled")
-    .doc("The configuration property enables the to_utc_timestamp() " +
-         "and from_utc_timestamp() functions.")
-    .booleanConf
-    .createWithDefault(false)
-
   val SOURCES_BINARY_FILE_MAX_LENGTH = buildConf("spark.sql.sources.binaryFile.maxLength")
     .doc("The max length of a file that can be read by the binary file data source. " +
       "Spark will fail fast and not attempt to read the file if its length exceeds this value. " +
@@ -1935,38 +2076,188 @@ object SQLConf {
     .createWithDefault(Int.MaxValue)
 
   val LEGACY_CAST_DATETIME_TO_STRING =
-    buildConf("spark.sql.legacy.typeCoercion.datetimeToString")
+    buildConf("spark.sql.legacy.typeCoercion.datetimeToString.enabled")
+      .internal()
       .doc("If it is set to true, date/timestamp will cast to string in binary comparisons " +
         "with String")
-    .booleanConf
-    .createWithDefault(false)
+      .booleanConf
+      .createWithDefault(false)
 
-  val DEFAULT_V2_CATALOG = buildConf("spark.sql.default.catalog")
-    .doc("Name of the default v2 catalog, used when a catalog is not identified in queries")
+  val DEFAULT_CATALOG = buildConf("spark.sql.defaultCatalog")
+    .doc("Name of the default catalog. This will be the current catalog if users have not " +
+      "explicitly set the current catalog yet.")
     .stringConf
-    .createOptional
-
-  val V2_SESSION_CATALOG = buildConf("spark.sql.catalog.session")
-      .doc("Name of the default v2 catalog, used when a catalog is not identified in queries")
+    .createWithDefault(SESSION_CATALOG_NAME)
+
+  val V2_SESSION_CATALOG_IMPLEMENTATION =
+    buildConf(s"spark.sql.catalog.$SESSION_CATALOG_NAME")
+      .doc("A catalog implementation that will be used as the v2 interface to Spark's built-in " +
+        s"v1 catalog: $SESSION_CATALOG_NAME. This catalog shares its identifier namespace with " +
+        s"the $SESSION_CATALOG_NAME and must be consistent with it; for example, if a table can " +
+        s"be loaded by the $SESSION_CATALOG_NAME, this catalog must also return the table " +
+        s"metadata. To delegate operations to the $SESSION_CATALOG_NAME, implementations can " +
+        "extend 'CatalogExtension'.")
       .stringConf
-      .createWithDefault("org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog")
+      .createOptional
 
   val LEGACY_LOOSE_UPCAST = buildConf("spark.sql.legacy.looseUpcast")
+    .internal()
     .doc("When true, the upcast will be loose and allows string to atomic types.")
     .booleanConf
     .createWithDefault(false)
 
   val LEGACY_CTE_PRECEDENCE_ENABLED = buildConf("spark.sql.legacy.ctePrecedence.enabled")
     .internal()
-    .doc("When true, outer CTE definitions takes precedence over inner definitions.")
+    .doc("When true, outer CTE definitions takes precedence over inner definitions. If set to " +
+      "false, inner CTE definitions take precedence. The default value is empty, " +
+      "AnalysisException is thrown while name conflict is detected in nested CTE.")
     .booleanConf
-    .createWithDefault(false)
+    .createOptional
 
   val LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC =
     buildConf("spark.sql.legacy.arrayExistsFollowsThreeValuedLogic")
+      .internal()
       .doc("When true, the ArrayExists will follow the three-valued boolean logic.")
       .booleanConf
       .createWithDefault(true)
+
+  val ADDITIONAL_REMOTE_REPOSITORIES =
+    buildConf("spark.sql.maven.additionalRemoteRepositories")
+      .doc("A comma-delimited string config of the optional additional remote Maven mirror " +
+        "repositories. This is only used for downloading Hive jars in IsolatedClientLoader " +
+        "if the default Maven Central repo is unreachable.")
+      .stringConf
+      .createWithDefault(
+        "https://maven-central.storage-download.googleapis.com/repos/central/data/")
+
+  val LEGACY_FROM_DAYTIME_STRING =
+    buildConf("spark.sql.legacy.fromDayTimeString.enabled")
+      .internal()
+      .doc("When true, the `from` bound is not taken into account in conversion of " +
+        "a day-time string to an interval, and the `to` bound is used to skip " +
+        "all interval units out of the specified range. If it is set to `false`, " +
+        "`ParseException` is thrown if the input does not match to the pattern " +
+        "defined by `from` and `to`.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val LEGACY_PROPERTY_NON_RESERVED =
+    buildConf("spark.sql.legacy.property.nonReserved")
+      .internal()
+      .doc("When true, all database and table properties are not reserved and available for " +
+        "create/alter syntaxes. But please be aware that the reserved properties will be " +
+        "silently removed.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val LEGACY_ADD_DIRECTORY_USING_RECURSIVE =
+    buildConf("spark.sql.legacy.addDirectory.recursive.enabled")
+      .internal()
+      .doc("When true, users can add directory by passing path of a directory to ADD FILE " +
+        "command of SQL. If false, then only a single file can be added.")
+      .booleanConf
+      .createWithDefault(true)
+
+  val LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED =
+    buildConf("spark.sql.legacy.mssqlserver.numericMapping.enabled")
+      .internal()
+      .doc("When true, use legacy MySqlServer SMALLINT and REAL type mapping.")
+      .booleanConf
+      .createWithDefault(false)
+
+  val CSV_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.csv.filterPushdown.enabled")
+    .doc("When true, enable filter pushdown to CSV datasource.")
+    .booleanConf
+    .createWithDefault(true)
+
+  val ADD_PARTITION_BATCH_SIZE =
+    buildConf("spark.sql.addPartitionInBatch.size")
+      .internal()
+      .doc("The number of partitions to be handled in one turn when use " +
+        "`AlterTableAddPartitionCommand` to add partitions into table. The smaller " +
+        "batch size is, the less memory is required for the real handler, e.g. Hive Metastore.")
+      .intConf
+      .checkValue(_ > 0, "The value of spark.sql.addPartitionInBatch.size must be positive")
+      .createWithDefault(100)
+
+  val LEGACY_TIME_PARSER_ENABLED = buildConf("spark.sql.legacy.timeParser.enabled")
+    .internal()
+    .doc("When set to true, java.text.SimpleDateFormat is used for formatting and parsing " +
+      "dates/timestamps in a locale-sensitive manner. When set to false, classes from " +
+      "java.time.* packages are used for the same purpose.")
+    .booleanConf
+    .createWithDefault(false)
+
+  /**
+   * Holds information about keys that have been deprecated.
+   *
+   * @param key The deprecated key.
+   * @param version Version of Spark where key was deprecated.
+   * @param comment Additional info regarding to the removed config. For example,
+   *                reasons of config deprecation, what users should use instead of it.
+   */
+  case class DeprecatedConfig(key: String, version: String, comment: String)
+
+  /**
+   * Maps deprecated SQL config keys to information about the deprecation.
+   *
+   * The extra information is logged as a warning when the SQL config is present
+   * in the user's configuration.
+   */
+  val deprecatedSQLConfigs: Map[String, DeprecatedConfig] = {
+    val configs = Seq(
+      DeprecatedConfig(
+        PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME.key, "2.4",
+        "The config allows to switch to the behaviour before Spark 2.4 " +
+          "and will be removed in the future releases."),
+      DeprecatedConfig(HIVE_VERIFY_PARTITION_PATH.key, "3.0",
+        s"This config is replaced by '${SPARK_IGNORE_MISSING_FILES.key}'."),
+      DeprecatedConfig(ARROW_EXECUTION_ENABLED.key, "3.0",
+        s"Use '${ARROW_PYSPARK_EXECUTION_ENABLED.key}' instead of it."),
+      DeprecatedConfig(ARROW_FALLBACK_ENABLED.key, "3.0",
+        s"Use '${ARROW_PYSPARK_FALLBACK_ENABLED.key}' instead of it.")
+    )
+
+    Map(configs.map { cfg => cfg.key -> cfg } : _*)
+  }
+
+  /**
+   * Holds information about keys that have been removed.
+   *
+   * @param key The removed config key.
+   * @param version Version of Spark where key was removed.
+   * @param defaultValue The default config value. It can be used to notice
+   *                     users that they set non-default value to an already removed config.
+   * @param comment Additional info regarding to the removed config.
+   */
+  case class RemovedConfig(key: String, version: String, defaultValue: String, comment: String)
+
+  /**
+   * The map contains info about removed SQL configs. Keys are SQL config names,
+   * map values contain extra information like the version in which the config was removed,
+   * config's default value and a comment.
+   */
+  val removedSQLConfigs: Map[String, RemovedConfig] = {
+    val configs = Seq(
+      RemovedConfig("spark.sql.fromJsonForceNullableSchema", "3.0.0", "true",
+        "It was removed to prevent errors like SPARK-23173 for non-default value."),
+      RemovedConfig(
+        "spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation", "3.0.0", "false",
+        "It was removed to prevent loosing of users data for non-default value."),
+      RemovedConfig("spark.sql.legacy.compareDateTimestampInTimestamp", "3.0.0", "true",
+        "It was removed to prevent errors like SPARK-23549 for non-default value."),
+      RemovedConfig("spark.sql.variable.substitute.depth", "3.0.0", "40",
+        "It was deprecated since Spark 2.1, and not used in Spark 2.4."),
+      RemovedConfig("spark.sql.parquet.int64AsTimestampMillis", "3.0.0", "false",
+        "The config was deprecated since Spark 2.3." +
+        s"Use '${PARQUET_OUTPUT_TIMESTAMP_TYPE.key}' instead of it."),
+      RemovedConfig("spark.sql.execution.pandas.respectSessionTimeZone", "3.0.0", "true",
+        "The non-default behavior is considered as a bug, see SPARK-22395. " +
+        "The config was deprecated since Spark 2.3.")
+    )
+
+    Map(configs.map { cfg => cfg.key -> cfg } : _*)
+  }
 }
 
 /**
@@ -1989,6 +2280,8 @@ class SQLConf extends Serializable with Logging {
 
   /** ************************ Spark SQL Params/Hints ******************* */
 
+  def analyzerMaxIterations: Int = getConf(ANALYZER_MAX_ITERATIONS)
+
   def optimizerExcludedRules: Option[String] = getConf(OPTIMIZER_EXCLUDED_RULES)
 
   def optimizerMaxIterations: Int = getConf(OPTIMIZER_MAX_ITERATIONS)
@@ -2010,8 +2303,8 @@ class SQLConf extends Serializable with Logging {
   def dynamicPartitionPruningFallbackFilterRatio: Double =
     getConf(DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO)
 
-  def dynamicPartitionPruningReuseBroadcast: Boolean =
-    getConf(DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST)
+  def dynamicPartitionPruningReuseBroadcastOnly: Boolean =
+    getConf(DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY)
 
   def stateStoreProviderClass: String = getConf(STATE_STORE_PROVIDER_CLASS)
 
@@ -2079,15 +2372,18 @@ class SQLConf extends Serializable with Logging {
 
   def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)
 
-  def targetPostShuffleInputSize: Long =
-    getConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE)
-
   def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED)
 
+  def targetPostShuffleInputSize: Long = getConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE)
+
+  def fetchShuffleBlocksInBatchEnabled: Boolean = getConf(FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED)
+
+  def nonEmptyPartitionRatioForBroadcastJoin: Double =
+    getConf(NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN)
+
   def reducePostShufflePartitionsEnabled: Boolean = getConf(REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED)
 
-  def minNumPostShufflePartitions: Int =
-    getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS)
+  def minNumPostShufflePartitions: Int = getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS)
 
   def maxNumPostShufflePartitions: Int =
     getConf(SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS).getOrElse(numShufflePartitions)
@@ -2125,8 +2421,6 @@ class SQLConf extends Serializable with Logging {
   def caseSensitiveInferenceMode: HiveCaseSensitiveInferenceMode.Value =
     HiveCaseSensitiveInferenceMode.withName(getConf(HIVE_CASE_SENSITIVE_INFERENCE))
 
-  def compareDateTimestampInTimestamp : Boolean = getConf(COMPARE_DATE_TIMESTAMP_IN_TIMESTAMP)
-
   def gatherFastStats: Boolean = getConf(GATHER_FASTSTAT)
 
   def optimizerMetadataOnly: Boolean = getConf(OPTIMIZER_METADATA_ONLY)
@@ -2163,8 +2457,6 @@ class SQLConf extends Serializable with Logging {
 
   def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED)
 
-  def ansiParserEnabled: Boolean = getConf(ANSI_SQL_PARSER)
-
   def escapedStringLiterals: Boolean = getConf(ESCAPED_STRING_LITERALS)
 
   def fileCompressionFactor: Double = getConf(FILE_COMPRESSION_FACTOR)
@@ -2179,7 +2471,12 @@ class SQLConf extends Serializable with Logging {
 
   def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_ENABLED)
 
-  def utcTimestampFuncEnabled: Boolean = getConf(UTC_TIMESTAMP_FUNC_ENABLED)
+  def addDirectoryRecursiveEnabled: Boolean = getConf(LEGACY_ADD_DIRECTORY_USING_RECURSIVE)
+
+  def legacyMsSqlServerNumericMappingEnabled: Boolean =
+    getConf(LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED)
+
+  def legacyTimeParserEnabled: Boolean = getConf(SQLConf.LEGACY_TIME_PARSER_ENABLED)
 
   /**
    * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
@@ -2224,18 +2521,8 @@ class SQLConf extends Serializable with Logging {
 
   def isParquetINT96TimestampConversion: Boolean = getConf(PARQUET_INT96_TIMESTAMP_CONVERSION)
 
-  def isParquetINT64AsTimestampMillis: Boolean = getConf(PARQUET_INT64_AS_TIMESTAMP_MILLIS)
-
   def parquetOutputTimestampType: ParquetOutputTimestampType.Value = {
-    val isOutputTimestampTypeSet = settings.containsKey(PARQUET_OUTPUT_TIMESTAMP_TYPE.key)
-    if (!isOutputTimestampTypeSet && isParquetINT64AsTimestampMillis) {
-      // If PARQUET_OUTPUT_TIMESTAMP_TYPE is not set and PARQUET_INT64_AS_TIMESTAMP_MILLIS is set,
-      // respect PARQUET_INT64_AS_TIMESTAMP_MILLIS and use TIMESTAMP_MILLIS. Otherwise,
-      // PARQUET_OUTPUT_TIMESTAMP_TYPE has higher priority.
-      ParquetOutputTimestampType.TIMESTAMP_MILLIS
-    } else {
-      ParquetOutputTimestampType.withName(getConf(PARQUET_OUTPUT_TIMESTAMP_TYPE))
-    }
+    ParquetOutputTimestampType.withName(getConf(PARQUET_OUTPUT_TIMESTAMP_TYPE))
   }
 
   def writeLegacyParquetFormat: Boolean = getConf(PARQUET_WRITE_LEGACY_FORMAT)
@@ -2293,8 +2580,6 @@ class SQLConf extends Serializable with Logging {
 
   def variableSubstituteEnabled: Boolean = getConf(VARIABLE_SUBSTITUTE_ENABLED)
 
-  def variableSubstituteDepth: Int = getConf(VARIABLE_SUBSTITUTE_DEPTH)
-
   def warehousePath: String = new Path(getConf(StaticSQLConf.WAREHOUSE_PATH)).toString
 
   def hiveThriftServerSingleSession: Boolean =
@@ -2310,6 +2595,8 @@ class SQLConf extends Serializable with Logging {
 
   def sessionLocalTimeZone: String = getConf(SQLConf.SESSION_LOCAL_TIMEZONE)
 
+  def jsonGeneratorIgnoreNullFields: Boolean = getConf(SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS)
+
   def parallelFileListingInStatsComputation: Boolean =
     getConf(SQLConf.PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION)
 
@@ -2327,6 +2614,8 @@ class SQLConf extends Serializable with Logging {
 
   def cboEnabled: Boolean = getConf(SQLConf.CBO_ENABLED)
 
+  def planStatsEnabled: Boolean = getConf(SQLConf.PLAN_STATS_ENABLED)
+
   def autoSizeUpdateEnabled: Boolean = getConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED)
 
   def joinReorderEnabled: Boolean = getConf(SQLConf.JOIN_REORDER_ENABLED)
@@ -2353,6 +2642,8 @@ class SQLConf extends Serializable with Logging {
   def cartesianProductExecBufferSpillThreshold: Int =
     getConf(CARTESIAN_PRODUCT_EXEC_BUFFER_SPILL_THRESHOLD)
 
+  def codegenSplitAggregateFunc: Boolean = getConf(SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC)
+
   def maxNestedViewDepth: Int = getConf(SQLConf.MAX_NESTED_VIEW_DEPTH)
 
   def starSchemaDetection: Boolean = getConf(STARSCHEMA_DETECTION)
@@ -2373,8 +2664,6 @@ class SQLConf extends Serializable with Logging {
 
   def pandasUDFBufferSize: Int = getConf(PANDAS_UDF_BUFFER_SIZE)
 
-  def pandasRespectSessionTimeZone: Boolean = getConf(PANDAS_RESPECT_SESSION_LOCAL_TIMEZONE)
-
   def pandasGroupedMapAssignColumnsByName: Boolean =
     getConf(SQLConf.PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME)
 
@@ -2384,10 +2673,6 @@ class SQLConf extends Serializable with Logging {
 
   def decimalOperationsAllowPrecisionLoss: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS)
 
-  def decimalOperationsNullOnOverflow: Boolean = getConf(DECIMAL_OPERATIONS_NULL_ON_OVERFLOW)
-
-  def failOnIntegralTypeOverflow: Boolean = getConf(FAIL_ON_INTEGRAL_TYPE_OVERFLOW)
-
   def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION)
 
   def continuousStreamingEpochBacklogQueueSize: Int =
@@ -2407,18 +2692,15 @@ class SQLConf extends Serializable with Logging {
 
   def eltOutputAsString: Boolean = getConf(ELT_OUTPUT_AS_STRING)
 
-  def preferIntegralDivision: Boolean = getConf(PREFER_INTEGRAL_DIVISION)
-
-  def allowCreatingManagedTableUsingNonemptyLocation: Boolean =
-    getConf(ALLOW_CREATING_MANAGED_TABLE_USING_NONEMPTY_LOCATION)
-
   def validatePartitionColumns: Boolean = getConf(VALIDATE_PARTITION_COLUMNS)
 
   def partitionOverwriteMode: PartitionOverwriteMode.Value =
     PartitionOverwriteMode.withName(getConf(PARTITION_OVERWRITE_MODE))
 
-  def storeAssignmentPolicy: Option[StoreAssignmentPolicy.Value] =
-    getConf(STORE_ASSIGNMENT_POLICY).map(StoreAssignmentPolicy.withName)
+  def storeAssignmentPolicy: StoreAssignmentPolicy.Value =
+    StoreAssignmentPolicy.withName(getConf(STORE_ASSIGNMENT_POLICY))
+
+  def ansiEnabled: Boolean = getConf(ANSI_ENABLED)
 
   def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED)
 
@@ -2446,8 +2728,20 @@ class SQLConf extends Serializable with Logging {
 
   def setOpsPrecedenceEnforced: Boolean = getConf(SQLConf.LEGACY_SETOPS_PRECEDENCE_ENABLED)
 
+  def exponentLiteralAsDecimalEnabled: Boolean =
+    getConf(SQLConf.LEGACY_EXPONENT_LITERAL_AS_DECIMAL_ENABLED)
+
+  def allowNegativeScaleOfDecimalEnabled: Boolean =
+    getConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED)
+
+  def createHiveTableByDefaultEnabled: Boolean =
+    getConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED)
+
   def integralDivideReturnLong: Boolean = getConf(SQLConf.LEGACY_INTEGRALDIVIDE_RETURN_LONG)
 
+  def truncateTableIgnorePermissionAcl: Boolean =
+    getConf(SQLConf.TRUNCATE_TABLE_IGNORE_PERMISSION_ACL)
+
   def nameNonStructGroupingKeyAsValue: Boolean =
     getConf(SQLConf.NAME_NON_STRUCT_GROUPING_KEY_AS_VALUE)
 
@@ -2460,7 +2754,9 @@ class SQLConf extends Serializable with Logging {
 
   def castDatetimeToString: Boolean = getConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING)
 
-  def defaultV2Catalog: Option[String] = getConf(DEFAULT_V2_CATALOG)
+  def ignoreDataLocality: Boolean = getConf(SQLConf.IGNORE_DATA_LOCALITY)
+
+  def csvFilterPushDown: Boolean = getConf(CSV_FILTER_PUSHDOWN_ENABLED)
 
   /** ********************** SQLConf functionality methods ************ */
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index d665d16ae4195..563e51ed597b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -145,9 +145,17 @@ object StaticSQLConf {
         "cause longer waiting for other broadcasting. Also, increasing parallelism may " +
         "cause memory problem.")
       .intConf
-      .checkValue(thres => thres > 0 && thres <= 128, "The threshold must be in [0,128].")
+      .checkValue(thres => thres > 0 && thres <= 128, "The threshold must be in (0,128].")
       .createWithDefault(128)
 
+  val SUBQUERY_MAX_THREAD_THRESHOLD =
+    buildStaticConf("spark.sql.subquery.maxThreadThreshold")
+      .internal()
+      .doc("The maximum degree of parallelism to execute the subquery.")
+      .intConf
+      .checkValue(thres => thres > 0 && thres <= 128, "The threshold must be in (0,128].")
+      .createWithDefault(16)
+
   val SQL_EVENT_TRUNCATE_LENGTH = buildStaticConf("spark.sql.event.truncate.length")
     .doc("Threshold of SQL length beyond which it will be truncated before adding to " +
       "event. Defaults to no truncation. If set to 0, callsite will be logged instead.")
@@ -161,4 +169,37 @@ object StaticSQLConf {
         "defaults, dropping any overrides in its parent SparkSession.")
       .booleanConf
       .createWithDefault(false)
+
+  val DEFAULT_URL_STREAM_HANDLER_FACTORY_ENABLED =
+    buildStaticConf("spark.sql.defaultUrlStreamHandlerFactory.enabled")
+      .doc(
+        "When true, register Hadoop's FsUrlStreamHandlerFactory to support " +
+        "ADD JAR against HDFS locations. " +
+        "It should be disabled when a different stream protocol handler should be registered " +
+        "to support a particular protocol type, or if Hadoop's FsUrlStreamHandlerFactory " +
+        "conflicts with other protocol types such as `http` or `https`. See also SPARK-25694 " +
+        "and HADOOP-14598.")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
+
+  val STREAMING_UI_ENABLED =
+    buildStaticConf("spark.sql.streaming.ui.enabled")
+      .doc("Whether to run the Structured Streaming Web UI for the Spark application when the " +
+        "Spark Web UI is enabled.")
+      .booleanConf
+      .createWithDefault(true)
+
+  val STREAMING_UI_RETAINED_PROGRESS_UPDATES =
+    buildStaticConf("spark.sql.streaming.ui.retainedProgressUpdates")
+      .doc("The number of progress updates to retain for a streaming query for Structured " +
+        "Streaming UI.")
+      .intConf
+      .createWithDefault(100)
+
+  val STREAMING_UI_RETAINED_QUERIES =
+    buildStaticConf("spark.sql.streaming.ui.retainedQueries")
+      .doc("The number of inactive queries to retain for Structured Streaming UI.")
+      .intConf
+      .createWithDefault(100)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SimpleTableProvider.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SimpleTableProvider.scala
new file mode 100644
index 0000000000000..7bfe1df1117ac
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SimpleTableProvider.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal.connector
+
+import java.util
+
+import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+// A simple version of `TableProvider` which doesn't support specified table schema/partitioning
+// and treats table properties case-insensitively. This is private and only used in builtin sources.
+trait SimpleTableProvider extends TableProvider {
+
+  def getTable(options: CaseInsensitiveStringMap): Table
+
+  private[this] var loadedTable: Table = _
+  private def getOrLoadTable(options: CaseInsensitiveStringMap): Table = {
+    if (loadedTable == null) loadedTable = getTable(options)
+    loadedTable
+  }
+
+  override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
+    getOrLoadTable(options).schema()
+  }
+
+  override def getTable(
+      schema: StructType,
+      partitioning: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    assert(partitioning.isEmpty)
+    getOrLoadTable(new CaseInsensitiveStringMap(properties))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
index a1ab55a7185ce..fc7d33e823a99 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -29,7 +29,7 @@ import org.apache.spark.annotation.{Evolving, Stable}
  * @since 1.3.0
  */
 @Stable
-abstract class Filter {
+sealed abstract class Filter {
   /**
    * List of columns that are referenced by this filter.
    * @since 2.1.0
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index 7465569868f07..a3a2ccf5ab12c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -23,6 +23,7 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 
 /**
  * Companion object for ArrayType.
@@ -66,10 +67,15 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
   /** No-arg constructor for kryo. */
   protected def this() = this(null, false)
 
-  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
-    builder.append(
-      s"$prefix-- element: ${elementType.typeName} (containsNull = $containsNull)\n")
-    DataType.buildFormattedString(elementType, s"$prefix    |", builder)
+  private[sql] def buildFormattedString(
+      prefix: String,
+      stringConcat: StringConcat,
+      maxDepth: Int): Unit = {
+    if (maxDepth > 0) {
+      stringConcat.append(
+        s"$prefix-- element: ${elementType.typeName} (containsNull = $containsNull)\n")
+      DataType.buildFormattedString(elementType, s"$prefix    |", stringConcat, maxDepth)
+    }
   }
 
   override private[sql] def jsonValue =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
index ea94cf626698a..35ad864db0e7d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
@@ -20,12 +20,15 @@ package org.apache.spark.sql.types
 import org.apache.spark.annotation.Stable
 
 /**
- * The data type representing calendar time intervals. The calendar time interval is stored
- * internally in two components: number of months the number of microseconds.
+ * The data type representing calendar intervals. The calendar interval is stored internally in
+ * three components:
+ *   an integer value representing the number of `months` in this interval,
+ *   an integer value representing the number of `days` in this interval,
+ *   a long value representing the number of `microseconds` in this interval.
  *
- * Please use the singleton `DataTypes.CalendarIntervalType`.
+ * Please use the singleton `DataTypes.CalendarIntervalType` to refer the type.
  *
- * @note Calendar intervals are not comparable.
+ * @note Calendar intervals support comparison and ordering since 3.0.0.
  *
  * @since 1.5.0
  */
@@ -34,7 +37,7 @@ class CalendarIntervalType private() extends DataType {
 
   override def defaultSize: Int = 16
 
-  override def simpleString: String = "interval"
+  override def typeName: String = "interval"
 
   private[spark] override def asNullable: CalendarIntervalType = this
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 3a10a56f6937f..8a8cea194bf2c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -21,15 +21,21 @@ import java.util.Locale
 
 import scala.util.control.NonFatal
 
+import com.fasterxml.jackson.core.{JsonGenerator, JsonParser}
+import com.fasterxml.jackson.databind.{DeserializationContext, JsonDeserializer, JsonSerializer, SerializerProvider}
+import com.fasterxml.jackson.databind.`type`.TypeFactory
+import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize}
 import org.json4s._
 import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
+import org.json4s.jackson.{JValueDeserializer, JValueSerializer}
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy.{ANSI, STRICT}
@@ -40,7 +46,10 @@ import org.apache.spark.util.Utils
  *
  * @since 1.3.0
  */
+
 @Stable
+@JsonSerialize(using = classOf[DataTypeJsonSerializer])
+@JsonDeserialize(using = classOf[DataTypeJsonDeserializer])
 abstract class DataType extends AbstractDataType {
   /**
    * Enables matching against DataType for expressions:
@@ -216,16 +225,17 @@ object DataType {
   }
 
   protected[types] def buildFormattedString(
-    dataType: DataType,
-    prefix: String,
-    builder: StringBuilder): Unit = {
+      dataType: DataType,
+      prefix: String,
+      stringConcat: StringConcat,
+      maxDepth: Int): Unit = {
     dataType match {
       case array: ArrayType =>
-        array.buildFormattedString(prefix, builder)
+        array.buildFormattedString(prefix, stringConcat, maxDepth - 1)
       case struct: StructType =>
-        struct.buildFormattedString(prefix, builder)
+        struct.buildFormattedString(prefix, stringConcat, maxDepth - 1)
       case map: MapType =>
-        map.buildFormattedString(prefix, builder)
+        map.buildFormattedString(prefix, stringConcat, maxDepth - 1)
       case _ =>
     }
   }
@@ -456,6 +466,8 @@ object DataType {
           true
         }
 
+      case (_: NullType, _) if storeAssignmentPolicy == ANSI => true
+
       case (w: AtomicType, r: AtomicType) if storeAssignmentPolicy == ANSI =>
         if (!Cast.canANSIStoreAssign(w, r)) {
           addError(s"Cannot safely cast '$context': $w to $r")
@@ -473,3 +485,30 @@ object DataType {
     }
   }
 }
+
+/**
+ * Jackson serializer for [[DataType]]. Internally this delegates to json4s based serialization.
+ */
+class DataTypeJsonSerializer extends JsonSerializer[DataType] {
+  private val delegate = new JValueSerializer
+  override def serialize(
+      value: DataType,
+      gen: JsonGenerator,
+      provider: SerializerProvider): Unit = {
+    delegate.serialize(value.jsonValue, gen, provider)
+  }
+}
+
+/**
+ * Jackson deserializer for [[DataType]]. Internally this delegates to json4s based deserialization.
+ */
+class DataTypeJsonDeserializer extends JsonDeserializer[DataType] {
+  private val delegate = new JValueDeserializer(classOf[Any])
+
+  override def deserialize(
+      jsonParser: JsonParser,
+      deserializationContext: DeserializationContext): DataType = {
+    val json = delegate.deserialize(jsonParser, deserializationContext)
+    DataType.parseDataType(json.asInstanceOf[JValue])
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 6445820f1237b..f32e48e1cc128 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -20,8 +20,10 @@ package org.apache.spark.sql.types
 import java.lang.{Long => JLong}
 import java.math.{BigInteger, MathContext, RoundingMode}
 
+import scala.util.Try
+
 import org.apache.spark.annotation.Unstable
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A mutable implementation of BigDecimal that can hold a Long if values are small enough.
@@ -30,6 +32,8 @@ import org.apache.spark.sql.AnalysisException
  * - _precision and _scale represent the SQL precision and scale we are looking for
  * - If decimalVal is set, it represents the whole decimal value
  * - Otherwise, the decimal value is longVal / (10 ** _scale)
+ *
+ * Note, for values between -1.0 and 1.0, precision digits are only counted after dot.
  */
 @Unstable
 final class Decimal extends Ordered[Decimal] with Serializable {
@@ -86,6 +90,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
    * and return it, or return null if it cannot be set due to overflow.
    */
   def setOrNull(unscaled: Long, precision: Int, scale: Int): Decimal = {
+    DecimalType.checkNegativeScale(scale)
     if (unscaled <= -POW_10(MAX_LONG_DIGITS) || unscaled >= POW_10(MAX_LONG_DIGITS)) {
       // We can't represent this compactly as a long without risking overflow
       if (precision < 19) {
@@ -110,6 +115,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
    * Set this Decimal to the given BigDecimal value, with a given precision and scale.
    */
   def set(decimal: BigDecimal, precision: Int, scale: Int): Decimal = {
+    DecimalType.checkNegativeScale(scale)
     this.decimalVal = decimal.setScale(scale, ROUND_HALF_UP)
     if (decimalVal.precision > precision) {
       throw new ArithmeticException(
@@ -127,16 +133,22 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   def set(decimal: BigDecimal): Decimal = {
     this.decimalVal = decimal
     this.longVal = 0L
-    if (decimal.precision <= decimal.scale) {
+    if (decimal.precision < decimal.scale) {
       // For Decimal, we expect the precision is equal to or large than the scale, however,
       // in BigDecimal, the digit count starts from the leftmost nonzero digit of the exact
       // result. For example, the precision of 0.01 equals to 1 based on the definition, but
-      // the scale is 2. The expected precision should be 3.
-      this._precision = decimal.scale + 1
+      // the scale is 2. The expected precision should be 2.
+      this._precision = decimal.scale
+      this._scale = decimal.scale
+    } else if (decimal.scale < 0 && !SQLConf.get.allowNegativeScaleOfDecimalEnabled) {
+      this._precision = decimal.precision - decimal.scale
+      this._scale = 0
+      // set scale to 0 to correct unscaled value
+      this.decimalVal = decimal.setScale(0)
     } else {
       this._precision = decimal.precision
+      this._scale = decimal.scale
     }
-    this._scale = decimal.scale
     this
   }
 
@@ -188,7 +200,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
 
   def toScalaBigInt: BigInt = {
     if (decimalVal.ne(null)) {
-      decimalVal.toBigInt()
+      decimalVal.toBigInt
     } else {
       BigInt(toLong)
     }
@@ -220,15 +232,15 @@ final class Decimal extends Ordered[Decimal] with Serializable {
     }
   }
 
-  def toDouble: Double = toBigDecimal.doubleValue()
+  def toDouble: Double = toBigDecimal.doubleValue
 
-  def toFloat: Float = toBigDecimal.floatValue()
+  def toFloat: Float = toBigDecimal.floatValue
 
   def toLong: Long = {
     if (decimalVal.eq(null)) {
       longVal / POW_10(_scale)
     } else {
-      decimalVal.longValue()
+      decimalVal.longValue
     }
   }
 
@@ -239,7 +251,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   def toByte: Byte = toLong.toByte
 
   private def overflowException(dataType: String) =
-    throw new ArithmeticException(s"Casting $this to $dataType causes overflow.")
+    throw new ArithmeticException(s"Casting $this to $dataType causes overflow")
 
   /**
    * @return the Byte value that is equal to the rounded decimal.
@@ -372,6 +384,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
     if (precision == this.precision && scale == this.scale) {
       return true
     }
+    DecimalType.checkNegativeScale(scale)
     // First, update our longVal if we can, or transfer over to using a BigDecimal
     if (decimalVal.eq(null)) {
       if (scale < _scale) {
@@ -528,7 +541,7 @@ object Decimal {
   /** Maximum number of decimal digits a Long can represent */
   val MAX_LONG_DIGITS = 18
 
-  private val POW_10 = Array.tabulate[Long](MAX_LONG_DIGITS + 1)(i => math.pow(10, i).toLong)
+  val POW_10 = Array.tabulate[Long](MAX_LONG_DIGITS + 1)(i => math.pow(10, i).toLong)
 
   private val BIG_DEC_ZERO = BigDecimal(0)
 
@@ -580,6 +593,7 @@ object Decimal {
    * Creates a decimal from unscaled, precision and scale without checking the bounds.
    */
   def createUnsafe(unscaled: Long, precision: Int, scale: Int): Decimal = {
+    DecimalType.checkNegativeScale(scale)
     val dec = new Decimal()
     dec.longVal = unscaled
     dec._precision = precision
@@ -622,6 +636,9 @@ object Decimal {
     override def toLong(x: Decimal): Long = x.toLong
     override def fromInt(x: Int): Decimal = new Decimal().set(x)
     override def compare(x: Decimal, y: Decimal): Int = x.compare(y)
+    // Added from Scala 2.13; don't override to work in 2.12
+    // TODO revisit once Scala 2.12 support is dropped
+    def parseString(str: String): Option[Decimal] = Try(Decimal(str)).toOption
   }
 
   /** A [[scala.math.Fractional]] evidence parameter for Decimals. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index 26657cb6c5c27..05069e24e2329 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -24,6 +24,7 @@ import scala.reflect.runtime.universe.typeTag
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * The data type representing `java.math.BigDecimal` values.
@@ -41,6 +42,8 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
 @Stable
 case class DecimalType(precision: Int, scale: Int) extends FractionalType {
 
+  DecimalType.checkNegativeScale(scale)
+
   if (scale > precision) {
     throw new AnalysisException(
       s"Decimal scale ($scale) cannot be greater than precision ($precision).")
@@ -141,20 +144,26 @@ object DecimalType extends AbstractDataType {
   }
 
   private[sql] def fromLiteral(literal: Literal): DecimalType = literal.value match {
-    case v: Short => fromBigDecimal(BigDecimal(v))
-    case v: Int => fromBigDecimal(BigDecimal(v))
-    case v: Long => fromBigDecimal(BigDecimal(v))
+    case v: Short => fromDecimal(Decimal(BigDecimal(v)))
+    case v: Int => fromDecimal(Decimal(BigDecimal(v)))
+    case v: Long => fromDecimal(Decimal(BigDecimal(v)))
     case _ => forType(literal.dataType)
   }
 
-  private[sql] def fromBigDecimal(d: BigDecimal): DecimalType = {
-    DecimalType(Math.max(d.precision, d.scale), d.scale)
-  }
+  private[sql] def fromDecimal(d: Decimal): DecimalType = DecimalType(d.precision, d.scale)
 
   private[sql] def bounded(precision: Int, scale: Int): DecimalType = {
     DecimalType(min(precision, MAX_PRECISION), min(scale, MAX_SCALE))
   }
 
+  private[sql] def checkNegativeScale(scale: Int): Unit = {
+    if (scale < 0 && !SQLConf.get.allowNegativeScaleOfDecimalEnabled) {
+      throw new AnalysisException(s"Negative scale is not allowed: $scale. " +
+        s"You can use spark.sql.legacy.allowNegativeScaleOfDecimal.enabled=true " +
+        s"to enable legacy mode to allow it.")
+    }
+  }
+
   /**
    * Scale adjustment implementation is based on Hive's one, which is itself inspired to
    * SQLServer's one. In particular, when a result precision is greater than
@@ -164,7 +173,8 @@ object DecimalType extends AbstractDataType {
    * This method is used only when `spark.sql.decimalOperations.allowPrecisionLoss` is set to true.
    */
   private[sql] def adjustPrecisionScale(precision: Int, scale: Int): DecimalType = {
-    // Assumption:
+    // Assumptions:
+    checkNegativeScale(scale)
     assert(precision >= scale)
 
     if (precision <= MAX_PRECISION) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
index ba0fe34a8c283..01268a9ff1667 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.sql.types
 
 import scala.math.{Fractional, Numeric}
-import scala.math.Numeric.DoubleAsIfIntegral
 import scala.reflect.runtime.universe.typeTag
+import scala.util.Try
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.util.Utils
 
 /**
  * The data type representing `Double` values. Please use the singleton `DataTypes.DoubleType`.
@@ -39,8 +38,8 @@ class DoubleType private() extends FractionalType {
   private[sql] val numeric = implicitly[Numeric[Double]]
   private[sql] val fractional = implicitly[Fractional[Double]]
   private[sql] val ordering =
-    (x: Double, y: Double) => Utils.nanSafeCompareDoubles(x, y)
-  private[sql] val asIntegral = DoubleAsIfIntegral
+    (x: Double, y: Double) => java.lang.Double.compare(x, y)
+  private[sql] val asIntegral = DoubleType.DoubleAsIfIntegral
 
   override private[sql] def exactNumeric = DoubleExactNumeric
 
@@ -56,4 +55,34 @@ class DoubleType private() extends FractionalType {
  * @since 1.3.0
  */
 @Stable
-case object DoubleType extends DoubleType
+case object DoubleType extends DoubleType {
+
+  // Traits below copied from Scala 2.12; not present in 2.13
+  // TODO: SPARK-30011 revisit once Scala 2.12 support is dropped
+  trait DoubleIsConflicted extends Numeric[Double] {
+    def plus(x: Double, y: Double): Double = x + y
+    def minus(x: Double, y: Double): Double = x - y
+    def times(x: Double, y: Double): Double = x * y
+    def negate(x: Double): Double = -x
+    def fromInt(x: Int): Double = x.toDouble
+    def toInt(x: Double): Int = x.toInt
+    def toLong(x: Double): Long = x.toLong
+    def toFloat(x: Double): Float = x.toFloat
+    def toDouble(x: Double): Double = x
+    // logic in Numeric base trait mishandles abs(-0.0)
+    override def abs(x: Double): Double = math.abs(x)
+    // Added from Scala 2.13; don't override to work in 2.12
+    def parseString(str: String): Option[Double] =
+      Try(java.lang.Double.parseDouble(str)).toOption
+
+  }
+
+  trait DoubleAsIfIntegral extends DoubleIsConflicted with Integral[Double] {
+    def quot(x: Double, y: Double): Double = (BigDecimal(x) quot BigDecimal(y)).doubleValue
+    def rem(x: Double, y: Double): Double = (BigDecimal(x) remainder BigDecimal(y)).doubleValue
+  }
+
+  object DoubleAsIfIntegral extends DoubleAsIfIntegral {
+    override def compare(x: Double, y: Double): Int = java.lang.Double.compare(x, y)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
index 9bd216ceb6f30..1491f5904baef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.sql.types
 
 import scala.math.{Fractional, Numeric}
-import scala.math.Numeric.FloatAsIfIntegral
 import scala.reflect.runtime.universe.typeTag
+import scala.util.Try
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.util.Utils
 
 /**
  * The data type representing `Float` values. Please use the singleton `DataTypes.FloatType`.
@@ -39,8 +38,8 @@ class FloatType private() extends FractionalType {
   private[sql] val numeric = implicitly[Numeric[Float]]
   private[sql] val fractional = implicitly[Fractional[Float]]
   private[sql] val ordering =
-    (x: Float, y: Float) => Utils.nanSafeCompareFloats(x, y)
-  private[sql] val asIntegral = FloatAsIfIntegral
+    (x: Float, y: Float) => java.lang.Float.compare(x, y)
+  private[sql] val asIntegral = FloatType.FloatAsIfIntegral
 
   override private[sql] def exactNumeric = FloatExactNumeric
 
@@ -57,4 +56,37 @@ class FloatType private() extends FractionalType {
  * @since 1.3.0
  */
 @Stable
-case object FloatType extends FloatType
+case object FloatType extends FloatType {
+
+  // Traits below copied from Scala 2.12; not present in 2.13
+  // TODO: SPARK-30011 revisit once Scala 2.12 support is dropped
+  trait FloatIsConflicted extends Numeric[Float] {
+    def plus(x: Float, y: Float): Float = x + y
+    def minus(x: Float, y: Float): Float = x - y
+    def times(x: Float, y: Float): Float = x * y
+    def negate(x: Float): Float = -x
+    def fromInt(x: Int): Float = x.toFloat
+    def toInt(x: Float): Int = x.toInt
+    def toLong(x: Float): Long = x.toLong
+    def toFloat(x: Float): Float = x
+    def toDouble(x: Float): Double = x.toDouble
+    // logic in Numeric base trait mishandles abs(-0.0f)
+    override def abs(x: Float): Float = math.abs(x)
+    // Added from Scala 2.13; don't override to work in 2.12
+    def parseString(str: String): Option[Float] =
+      Try(java.lang.Float.parseFloat(str)).toOption
+  }
+
+  trait FloatAsIfIntegral extends FloatIsConflicted with Integral[Float] {
+    def quot(x: Float, y: Float): Float = {
+      (BigDecimal(x.toDouble) quot BigDecimal(y.toDouble)).floatValue
+    }
+    def rem(x: Float, y: Float): Float = {
+      (BigDecimal(x.toDouble) remainder BigDecimal(y.toDouble)).floatValue
+    }
+  }
+
+  object FloatAsIfIntegral extends FloatAsIfIntegral {
+    override def compare(x: Float, y: Float): Int = java.lang.Float.compare(x, y)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
index 29b9ffc0c3549..2e5c7f731dcc7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -21,6 +21,7 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 
 /**
  * The data type for Maps. Keys in a map are not allowed to have `null` values.
@@ -40,12 +41,17 @@ case class MapType(
   /** No-arg constructor for kryo. */
   def this() = this(null, null, false)
 
-  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
-    builder.append(s"$prefix-- key: ${keyType.typeName}\n")
-    DataType.buildFormattedString(keyType, s"$prefix    |", builder)
-    builder.append(s"$prefix-- value: ${valueType.typeName} " +
-      s"(valueContainsNull = $valueContainsNull)\n")
-    DataType.buildFormattedString(valueType, s"$prefix    |", builder)
+  private[sql] def buildFormattedString(
+      prefix: String,
+      stringConcat: StringConcat,
+      maxDepth: Int = Int.MaxValue): Unit = {
+    if (maxDepth > 0) {
+      stringConcat.append(s"$prefix-- key: ${keyType.typeName}\n")
+      DataType.buildFormattedString(keyType, s"$prefix    |", stringConcat, maxDepth)
+      stringConcat.append(s"$prefix-- value: ${valueType.typeName} " +
+        s"(valueContainsNull = $valueContainsNull)\n")
+      DataType.buildFormattedString(valueType, s"$prefix    |", stringConcat, maxDepth)
+    }
   }
 
   override private[sql] def jsonValue: JValue =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
index 6f6b561d67d49..93478af425955 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -22,6 +22,7 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier}
+import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 
 /**
  * A field inside a StructType.
@@ -43,9 +44,14 @@ case class StructField(
   /** No-arg constructor for kryo. */
   protected def this() = this(null, null)
 
-  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
-    builder.append(s"$prefix-- $name: ${dataType.typeName} (nullable = $nullable)\n")
-    DataType.buildFormattedString(dataType, s"$prefix    |", builder)
+  private[sql] def buildFormattedString(
+      prefix: String,
+      stringConcat: StringConcat,
+      maxDepth: Int): Unit = {
+    if (maxDepth > 0) {
+      stringConcat.append(s"$prefix-- $name: ${dataType.typeName} (nullable = $nullable)\n")
+      DataType.buildFormattedString(dataType, s"$prefix    |", stringConcat, maxDepth)
+    }
   }
 
   // override the default toString to be compatible with legacy parquet files.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 236f73ba3832c..bd2c1d5c26299 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -25,9 +25,12 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser}
-import org.apache.spark.sql.catalyst.util.{quoteIdentifier, truncatedString}
+import org.apache.spark.sql.catalyst.util.{quoteIdentifier, truncatedString, StringUtils}
+import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -308,52 +311,75 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   }
 
   /**
-   * Returns a field in this struct and its child structs.
+   * Returns the normalized path to a field and the field in this struct and its child structs.
    *
    * If includeCollections is true, this will return fields that are nested in maps and arrays.
    */
   private[sql] def findNestedField(
       fieldNames: Seq[String],
-      includeCollections: Boolean = false): Option[StructField] = {
-    fieldNames.headOption.flatMap(nameToField.get) match {
-      case Some(field) =>
-        (fieldNames.tail, field.dataType, includeCollections) match {
-          case (Seq(), _, _) =>
-            Some(field)
+      includeCollections: Boolean = false,
+      resolver: Resolver = _ == _): Option[(Seq[String], StructField)] = {
+    def prettyFieldName(nameParts: Seq[String]): String = {
+      import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+      nameParts.quoted
+    }
+
+    def findField(
+        struct: StructType,
+        searchPath: Seq[String],
+        normalizedPath: Seq[String]): Option[(Seq[String], StructField)] = {
+      searchPath.headOption.flatMap { searchName =>
+        val found = struct.fields.filter(f => resolver(searchName, f.name))
+        if (found.length > 1) {
+          val names = found.map(f => prettyFieldName(normalizedPath :+ f.name))
+            .mkString("[", ", ", " ]")
+          throw new AnalysisException(
+            s"Ambiguous field name: ${prettyFieldName(normalizedPath :+ searchName)}. Found " +
+              s"multiple columns that can match: $names")
+        } else if (found.isEmpty) {
+          None
+        } else {
+          val field = found.head
+          (searchPath.tail, field.dataType, includeCollections) match {
+            case (Seq(), _, _) =>
+              Some(normalizedPath -> field)
 
-          case (names, struct: StructType, _) =>
-            struct.findNestedField(names, includeCollections)
+            case (names, struct: StructType, _) =>
+              findField(struct, names, normalizedPath :+ field.name)
 
-          case (_, _, false) =>
-            None // types nested in maps and arrays are not used
+            case (_, _, false) =>
+              None // types nested in maps and arrays are not used
 
-          case (Seq("key"), MapType(keyType, _, _), true) =>
-            // return the key type as a struct field to include nullability
-            Some(StructField("key", keyType, nullable = false))
+            case (Seq("key"), MapType(keyType, _, _), true) =>
+              // return the key type as a struct field to include nullability
+              Some((normalizedPath :+ field.name) -> StructField("key", keyType, nullable = false))
 
-          case (Seq("key", names @ _*), MapType(struct: StructType, _, _), true) =>
-            struct.findNestedField(names, includeCollections)
+            case (Seq("key", names @ _*), MapType(struct: StructType, _, _), true) =>
+              findField(struct, names, normalizedPath ++ Seq(field.name, "key"))
 
-          case (Seq("value"), MapType(_, valueType, isNullable), true) =>
-            // return the value type as a struct field to include nullability
-            Some(StructField("value", valueType, nullable = isNullable))
+            case (Seq("value"), MapType(_, valueType, isNullable), true) =>
+              // return the value type as a struct field to include nullability
+              Some((normalizedPath :+ field.name) ->
+                StructField("value", valueType, nullable = isNullable))
 
-          case (Seq("value", names @ _*), MapType(_, struct: StructType, _), true) =>
-            struct.findNestedField(names, includeCollections)
+            case (Seq("value", names @ _*), MapType(_, struct: StructType, _), true) =>
+              findField(struct, names, normalizedPath ++ Seq(field.name, "value"))
 
-          case (Seq("element"), ArrayType(elementType, isNullable), true) =>
-            // return the element type as a struct field to include nullability
-            Some(StructField("element", elementType, nullable = isNullable))
+            case (Seq("element"), ArrayType(elementType, isNullable), true) =>
+              // return the element type as a struct field to include nullability
+              Some((normalizedPath :+ field.name) ->
+                StructField("element", elementType, nullable = isNullable))
 
-          case (Seq("element", names @ _*), ArrayType(struct: StructType, _), true) =>
-            struct.findNestedField(names, includeCollections)
+            case (Seq("element", names @ _*), ArrayType(struct: StructType, _), true) =>
+              findField(struct, names, normalizedPath ++ Seq(field.name, "element"))
 
-          case _ =>
-            None
+            case _ =>
+              None
+          }
         }
-      case _ =>
-        None
+      }
     }
+    findField(this, fieldNames, Nil)
   }
 
   protected[sql] def toAttributes: Seq[AttributeReference] =
@@ -361,25 +387,24 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
 
   def treeString: String = treeString(Int.MaxValue)
 
-  def treeString(level: Int): String = {
-    val builder = new StringBuilder
-    builder.append("root\n")
+  def treeString(maxDepth: Int): String = {
+    val stringConcat = new StringUtils.StringConcat()
+    stringConcat.append("root\n")
     val prefix = " |"
-    fields.foreach(field => field.buildFormattedString(prefix, builder))
-
-    if (level <= 0 || level == Int.MaxValue) {
-      builder.toString()
-    } else {
-      builder.toString().split("\n").filter(_.lastIndexOf("|--") < level * 5 + 1).mkString("\n")
-    }
+    val depth = if (maxDepth > 0) maxDepth else Int.MaxValue
+    fields.foreach(field => field.buildFormattedString(prefix, stringConcat, depth))
+    stringConcat.toString()
   }
 
   // scalastyle:off println
   def printTreeString(): Unit = println(treeString)
   // scalastyle:on println
 
-  private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
-    fields.foreach(field => field.buildFormattedString(prefix, builder))
+  private[sql] def buildFormattedString(
+      prefix: String,
+      stringConcat: StringConcat,
+      maxDepth: Int): Unit = {
+    fields.foreach(field => field.buildFormattedString(prefix, stringConcat, maxDepth))
   }
 
   override private[sql] def jsonValue =
@@ -407,8 +432,17 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
 
   override def catalogString: String = {
     // in catalogString, we should not truncate
-    val fieldTypes = fields.map(field => s"${field.name}:${field.dataType.catalogString}")
-    s"struct<${fieldTypes.mkString(",")}>"
+    val stringConcat = new StringUtils.StringConcat()
+    val len = fields.length
+    stringConcat.append("struct<")
+    var i = 0
+    while (i < len) {
+      stringConcat.append(s"${fields(i).name}:${fields(i).dataType.catalogString}")
+      i += 1
+      if (i < len) stringConcat.append(",")
+    }
+    stringConcat.append(">")
+    stringConcat.toString
   }
 
   override def sql: String = {
@@ -490,7 +524,7 @@ object StructType extends AbstractDataType {
   override private[sql] def simpleString: String = "struct"
 
   private[sql] def fromString(raw: String): StructType = {
-    Try(DataType.fromJson(raw)).getOrElse(LegacyTypeStringParser.parse(raw)) match {
+    Try(DataType.fromJson(raw)).getOrElse(LegacyTypeStringParser.parseString(raw)) match {
       case t: StructType => t
       case _ => throw new RuntimeException(s"Failed parsing ${StructType.simpleString}: $raw")
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
index 9ff55b7f6e6ec..b5226213effc4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
@@ -22,7 +22,6 @@ import scala.math.Ordering
 
 import org.apache.spark.sql.types.Decimal.DecimalIsConflicted
 
-
 object ByteExactNumeric extends ByteIsIntegral with Ordering.ByteOrdering {
   private def checkOverflow(res: Int, x: Byte, y: Byte, op: String): Unit = {
     if (res > Byte.MaxValue || res < Byte.MinValue) {
@@ -114,18 +113,18 @@ object LongExactNumeric extends LongIsIntegral with Ordering.LongOrdering {
     if (x == x.toInt) {
       x.toInt
     } else {
-      throw new ArithmeticException(s"Casting $x to int causes overflow.")
+      throw new ArithmeticException(s"Casting $x to int causes overflow")
     }
 }
 
-object FloatExactNumeric extends FloatIsFractional with Ordering.FloatOrdering {
+object FloatExactNumeric extends FloatIsFractional {
   private def overflowException(x: Float, dataType: String) =
-    throw new ArithmeticException(s"Casting $x to $dataType causes overflow.")
+    throw new ArithmeticException(s"Casting $x to $dataType causes overflow")
 
-  private val intUpperBound = Int.MaxValue.toFloat
-  private val intLowerBound = Int.MinValue.toFloat
-  private val longUpperBound = Long.MaxValue.toFloat
-  private val longLowerBound = Long.MinValue.toFloat
+  private val intUpperBound = Int.MaxValue
+  private val intLowerBound = Int.MinValue
+  private val longUpperBound = Long.MaxValue
+  private val longLowerBound = Long.MinValue
 
   override def toInt(x: Float): Int = {
     // When casting floating values to integral types, Spark uses the method `Numeric.toInt`
@@ -148,16 +147,18 @@ object FloatExactNumeric extends FloatIsFractional with Ordering.FloatOrdering {
       overflowException(x, "int")
     }
   }
+
+  override def compare(x: Float, y: Float): Int = java.lang.Float.compare(x, y)
 }
 
-object DoubleExactNumeric extends DoubleIsFractional with Ordering.DoubleOrdering {
+object DoubleExactNumeric extends DoubleIsFractional {
   private def overflowException(x: Double, dataType: String) =
-    throw new ArithmeticException(s"Casting $x to $dataType causes overflow.")
+    throw new ArithmeticException(s"Casting $x to $dataType causes overflow")
 
-  private val intUpperBound = Int.MaxValue.toDouble
-  private val intLowerBound = Int.MinValue.toDouble
-  private val longUpperBound = Long.MaxValue.toDouble
-  private val longLowerBound = Long.MinValue.toDouble
+  private val intUpperBound = Int.MaxValue
+  private val intLowerBound = Int.MinValue
+  private val longUpperBound = Long.MaxValue
+  private val longLowerBound = Long.MinValue
 
   override def toInt(x: Double): Int = {
     if (Math.floor(x) <= intUpperBound && Math.ceil(x) >= intLowerBound) {
@@ -174,6 +175,8 @@ object DoubleExactNumeric extends DoubleIsFractional with Ordering.DoubleOrderin
       overflowException(x, "long")
     }
   }
+
+  override def compare(x: Double, y: Double): Int = java.lang.Double.compare(x, y)
 }
 
 object DecimalExactNumeric extends DecimalIsConflicted {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
index 62546a322d3c9..003ce850c926e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
@@ -20,13 +20,14 @@ package org.apache.spark.sql.util
 import scala.collection.JavaConverters._
 
 import org.apache.arrow.memory.RootAllocator
+import org.apache.arrow.vector.complex.MapVector
 import org.apache.arrow.vector.types.{DateUnit, FloatingPointPrecision, TimeUnit}
 import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema}
 
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-object ArrowUtils {
+private[sql] object ArrowUtils {
 
   val rootAllocator = new RootAllocator(Long.MaxValue)
 
@@ -88,6 +89,16 @@ object ArrowUtils {
           fields.map { field =>
             toArrowField(field.name, field.dataType, field.nullable, timeZoneId)
           }.toSeq.asJava)
+      case MapType(keyType, valueType, valueContainsNull) =>
+        val mapType = new FieldType(nullable, new ArrowType.Map(false), null)
+        // Note: Map Type struct can not be null, Struct Type key field can not be null
+        new Field(name, mapType,
+          Seq(toArrowField(MapVector.DATA_VECTOR_NAME,
+            new StructType()
+              .add(MapVector.KEY_NAME, keyType, nullable = false)
+              .add(MapVector.VALUE_NAME, valueType, nullable = valueContainsNull),
+            nullable = false,
+            timeZoneId)).asJava)
       case dataType =>
         val fieldType = new FieldType(nullable, toArrowType(dataType, timeZoneId), null)
         new Field(name, fieldType, Seq.empty[Field].asJava)
@@ -96,6 +107,11 @@ object ArrowUtils {
 
   def fromArrowField(field: Field): DataType = {
     field.getType match {
+      case _: ArrowType.Map =>
+        val elementField = field.getChildren.get(0)
+        val keyType = fromArrowField(elementField.getChildren.get(0))
+        val valueType = fromArrowField(elementField.getChildren.get(1))
+        MapType(keyType, valueType, elementField.getChildren.get(1).isNullable)
       case ArrowType.List.INSTANCE =>
         val elementField = field.getChildren().get(0)
         val elementType = fromArrowField(elementField)
@@ -126,11 +142,7 @@ object ArrowUtils {
 
   /** Return Map with conf settings to be used in ArrowPythonRunner */
   def getPythonRunnerConfMap(conf: SQLConf): Map[String, String] = {
-    val timeZoneConf = if (conf.pandasRespectSessionTimeZone) {
-      Seq(SQLConf.SESSION_LOCAL_TIMEZONE.key -> conf.sessionLocalTimeZone)
-    } else {
-      Nil
-    }
+    val timeZoneConf = Seq(SQLConf.SESSION_LOCAL_TIMEZONE.key -> conf.sessionLocalTimeZone)
     val pandasColsByName = Seq(SQLConf.PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME.key ->
       conf.pandasGroupedMapAssignColumnsByName.toString)
     val arrowSafeTypeCheck = Seq(SQLConf.PANDAS_ARROW_SAFE_TYPE_CONVERSION.key ->
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
index d15440632f324..27b5eec27281d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.util
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.expressions._
 import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, NamedTransform, Transform}
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
 
 
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
index f3d82b485e54e..b02346adecf82 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
@@ -161,8 +161,8 @@ public void batchType() {
         valueSchema, taskMemoryManager, DEFAULT_CAPACITY);
          RowBasedKeyValueBatch batch2 = RowBasedKeyValueBatch.allocate(fixedKeySchema,
         valueSchema, taskMemoryManager, DEFAULT_CAPACITY)) {
-      Assert.assertEquals(batch1.getClass(), VariableLengthRowBasedKeyValueBatch.class);
-      Assert.assertEquals(batch2.getClass(), FixedLengthRowBasedKeyValueBatch.class);
+      Assert.assertEquals(VariableLengthRowBasedKeyValueBatch.class, batch1.getClass());
+      Assert.assertEquals(FixedLengthRowBasedKeyValueBatch.class, batch2.getClass());
     }
   }
 
@@ -290,7 +290,7 @@ public void appendRowUntilExceedingCapacity() throws Exception {
         appendRow(batch, key, value);
       }
       UnsafeRow ret = appendRow(batch, key, value);
-      Assert.assertEquals(batch.numRows(), 10);
+      Assert.assertEquals(10, batch.numRows());
       Assert.assertNull(ret);
       org.apache.spark.unsafe.KVIterator<UnsafeRow, UnsafeRow> iterator
               = batch.rowIterator();
@@ -322,7 +322,7 @@ public void appendRowUntilExceedingPageSize() throws Exception {
         numRows++;
       }
       UnsafeRow ret = appendRow(batch, key, value);
-      Assert.assertEquals(batch.numRows(), numRows);
+      Assert.assertEquals(numRows, batch.numRows());
       Assert.assertNull(ret);
       org.apache.spark.unsafe.KVIterator<UnsafeRow, UnsafeRow> iterator
               = batch.rowIterator();
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
similarity index 99%
rename from sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
rename to sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
index 6bd9192816992..37f60511cd60a 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.SparkException;
 import org.apache.spark.sql.internal.SQLConf;
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/streaming/JavaOutputModeSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/streaming/JavaOutputModeSuite.java
index d8845e0c838ff..ca2b18b8eed49 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/streaming/JavaOutputModeSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/streaming/JavaOutputModeSuite.java
@@ -19,6 +19,7 @@
 
 import java.util.Locale;
 
+import org.junit.Assert;
 import org.junit.Test;
 
 public class JavaOutputModeSuite {
@@ -26,8 +27,8 @@ public class JavaOutputModeSuite {
   @Test
   public void testOutputModes() {
     OutputMode o1 = OutputMode.Append();
-    assert(o1.toString().toLowerCase(Locale.ROOT).contains("append"));
+    Assert.assertTrue(o1.toString().toLowerCase(Locale.ROOT).contains("append"));
     OutputMode o2 = OutputMode.Complete();
-    assert (o2.toString().toLowerCase(Locale.ROOT).contains("complete"));
+    Assert.assertTrue(o2.toString().toLowerCase(Locale.ROOT).contains("complete"));
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index d361e6248e2f5..a7c20c34d78bc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -23,10 +23,10 @@ import scala.collection.mutable
 import scala.util.Random
 
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_DAY
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
-
 /**
  * Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
  * values; instead, they're biased to return "interesting" values (such as maximum / minimum values)
@@ -172,7 +172,7 @@ object RandomDataGenerator {
               // January 1, 1970, 00:00:00 GMT for "9999-12-31 23:59:59.999999".
               milliseconds = rand.nextLong() % 253402329599999L
             }
-            DateTimeUtils.toJavaDate((milliseconds / DateTimeUtils.MILLIS_PER_DAY).toInt)
+            DateTimeUtils.toJavaDate((milliseconds / MILLIS_PER_DAY).toInt)
           }
         Some(generator)
       case TimestampType =>
@@ -193,8 +193,9 @@ object RandomDataGenerator {
         Some(generator)
       case CalendarIntervalType => Some(() => {
         val months = rand.nextInt(1000)
+        val days = rand.nextInt(10000)
         val ns = rand.nextLong()
-        new CalendarInterval(months, ns)
+        new CalendarInterval(months, days, ns)
       })
       case DecimalType.Fixed(precision, scale) => Some(
         () => BigDecimal.apply(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
new file mode 100644
index 0000000000000..ac18b0f79b5f3
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.sql.{Date, Timestamp}
+import java.time.{Instant, LocalDate}
+
+import org.json4s.JsonAST.{JArray, JBool, JDecimal, JDouble, JLong, JNull, JObject, JString, JValue}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.encoders.{ExamplePoint, ExamplePointUDT}
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+/**
+ * Test suite for [[Row]] JSON serialization.
+ */
+class RowJsonSuite extends SparkFunSuite {
+  private val schema = new StructType()
+    .add("c1", "string")
+    .add("c2", IntegerType)
+
+  private def testJson(name: String, value: Any, dt: DataType, expected: JValue): Unit = {
+    test(name) {
+      val row = new GenericRowWithSchema(Array(value), new StructType().add("a", dt))
+      assert(row.jsonValue === JObject("a" -> expected))
+    }
+  }
+
+  private def testJson(value: Any, dt: DataType, expected: JValue): Unit = {
+    testJson(s"$dt $value", value, dt, expected)
+  }
+
+  // Nulls
+  private def testJsonNull(dt: DataType, expected: JValue): Unit = {
+    testJson(null, dt, JNull)
+  }
+  testJsonNull(IntegerType, JNull)
+  testJsonNull(FloatType, JNull)
+  testJsonNull(ArrayType(DoubleType, containsNull = true), JNull)
+
+  // Primitives
+  testJson(true, BooleanType, JBool(true))
+  testJson(false, BooleanType, JBool(false))
+  testJson(23.toByte, ByteType, JLong(23))
+  testJson(-126.toByte, ByteType, JLong(-126))
+  testJson(20281.toShort, ShortType, JLong(20281))
+  testJson(-8752.toShort, ShortType, JLong(-8752))
+  testJson(1078231987, IntegerType, JLong(1078231987))
+  testJson(-10, IntegerType, JLong(-10))
+  testJson(139289832109874199L, LongType, JLong(139289832109874199L))
+  testJson(-7873748239973488L, LongType, JLong(-7873748239973488L))
+  testJson(10.232e10f, FloatType, JDouble(10.232e10f))
+  testJson(9.7e-13f, FloatType, JDouble(9.7e-13f))
+  testJson(3.891e98d, DoubleType, JDouble(3.891e98d))
+  testJson(-7.8e5d, DoubleType, JDouble(-7.8e5d))
+  testJson(BigDecimal("1092.88"), DecimalType(10, 2), JDecimal(BigDecimal("1092.88")))
+  testJson(Decimal("782.0003"), DecimalType(7, 4), JDecimal(BigDecimal("782.0003")))
+  testJson(new java.math.BigDecimal("-77.89"), DecimalType(4, 2), JDecimal(BigDecimal("-77.89")))
+  testJson("hello world", StringType, JString("hello world"))
+  testJson("BinaryType", Array('a'.toByte, 'b'.toByte), BinaryType, JString("YWI="))
+  testJson(Date.valueOf("2019-04-22"), DateType, JString("2019-04-22"))
+  testJson(LocalDate.of(2018, 5, 14), DateType, JString("2018-05-14"))
+  testJson(
+    Timestamp.valueOf("2017-01-06 10:22:03.00"),
+    TimestampType,
+    JString("2017-01-06 10:22:03"))
+  testJson(
+    Timestamp.valueOf("2017-05-30 10:22:03.00").toInstant,
+    TimestampType,
+    JString("2017-05-30 10:22:03"))
+
+  // Complex types
+  testJson(
+    "ArrayType(LongType,true)",
+    Array(1L, null, 77L),
+    ArrayType(LongType, containsNull = true),
+    JArray(JLong(1L) :: JNull :: JLong(77L) :: Nil))
+
+  testJson(
+    Seq(1, -2, 3),
+    ArrayType(IntegerType, containsNull = false),
+    JArray(JLong(1) :: JLong(-2) :: JLong(3) :: Nil))
+
+  testJson(
+    Map("a" -> "b", "c" -> "d", "e" -> null),
+    MapType(StringType, StringType, valueContainsNull = true),
+    JObject("a" -> JString("b"), "c" -> JString("d"), "e" -> JNull))
+
+  testJson(
+    Map(1 -> "b", 2 -> "d", 3 -> null),
+    MapType(IntegerType, StringType, valueContainsNull = true),
+    JArray(
+      JObject("key" -> JLong(1), "value" -> JString("b")) ::
+      JObject("key" -> JLong(2), "value" -> JString("d")) ::
+      JObject("key" -> JLong(3), "value" -> JNull) :: Nil))
+
+  testJson(
+    new GenericRowWithSchema(Array("1", 2), schema),
+    schema,
+    JObject("c1" -> JString("1"), "c2" -> JLong(2)))
+
+  testJson(
+    "UDT",
+    new ExamplePoint(3.4d, 8.98d),
+    new ExamplePointUDT,
+    JArray(JDouble(3.4d) :: JDouble(8.98d) :: Nil))
+
+  test("no schema") {
+    val e = intercept[IllegalArgumentException] {
+      Row("a").jsonValue
+    }
+    assert(e.getMessage.contains("requires a non-null schema"))
+  }
+
+  test("unsupported type") {
+    val e = intercept[IllegalArgumentException] {
+      val row = new GenericRowWithSchema(
+        Array((1, 2)),
+        new StructType().add("a", ObjectType(classOf[(Int, Int)])))
+      row.jsonValue
+    }
+    assert(e.getMessage.contains("Failed to convert value"))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
index 39228102682b9..e94c1204a2be7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
@@ -27,7 +27,7 @@ class DistributionSuite extends SparkFunSuite {
   protected def checkSatisfied(
       inputPartitioning: Partitioning,
       requiredDistribution: Distribution,
-      satisfied: Boolean) {
+      satisfied: Boolean): Unit = {
     if (inputPartitioning.satisfies(requiredDistribution) != satisfied) {
       fail(
         s"""
@@ -59,12 +59,12 @@ class DistributionSuite extends SparkFunSuite {
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('a), 10),
+      HashPartitioning(Seq($"a"), 10),
       UnspecifiedDistribution,
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc), 10),
+      RangePartitioning(Seq($"a".asc), 10),
       UnspecifiedDistribution,
       true)
 
@@ -101,22 +101,22 @@ class DistributionSuite extends SparkFunSuite {
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('a), 1),
+      HashPartitioning(Seq($"a"), 1),
       AllTuples,
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('a), 10),
+      HashPartitioning(Seq($"a"), 10),
       AllTuples,
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc), 1),
+      RangePartitioning(Seq($"a".asc), 1),
       AllTuples,
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc), 10),
+      RangePartitioning(Seq($"a".asc), 10),
       AllTuples,
       false)
 
@@ -130,17 +130,17 @@ class DistributionSuite extends SparkFunSuite {
     // SinglePartition can satisfy all the distributions except `BroadcastDistribution`
     checkSatisfied(
       SinglePartition,
-      ClusteredDistribution(Seq('a, 'b, 'c)),
+      ClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
       SinglePartition,
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
       SinglePartition,
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       true)
 
     checkSatisfied(
@@ -153,56 +153,56 @@ class DistributionSuite extends SparkFunSuite {
     // HashPartitioning can satisfy ClusteredDistribution iff its hash expressions are a subset of
     // the required clustering expressions.
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('b, 'c), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"b", $"c"), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      ClusteredDistribution(Seq('b, 'c)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      ClusteredDistribution(Seq($"b", $"c")),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      ClusteredDistribution(Seq('d, 'e)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      ClusteredDistribution(Seq($"d", $"e")),
       false)
 
     // HashPartitioning can satisfy HashClusteredDistribution iff its hash expressions are exactly
     // same with the required hash clustering expressions.
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
-      HashPartitioning(Seq('c, 'b, 'a), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"c", $"b", $"a"), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      HashPartitioning(Seq($"a", $"b"), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       false)
 
     // HashPartitioning cannot satisfy OrderedDistribution
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 1),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 1),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       false) // TODO: this can be relaxed.
 
     checkSatisfied(
-      HashPartitioning(Seq('b, 'c), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      HashPartitioning(Seq($"b", $"c"), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       false)
   }
 
@@ -210,18 +210,18 @@ class DistributionSuite extends SparkFunSuite {
     // RangePartitioning can satisfy OrderedDistribution iff its ordering is a prefix
     // of the required ordering, or the required ordering is a prefix of its ordering.
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc)),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'c.asc, 'd.desc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc, $"d".desc)),
       true)
 
     // TODO: We can have an optimization to first sort the dataset
@@ -229,78 +229,78 @@ class DistributionSuite extends SparkFunSuite {
     // should tradeoff the benefit of a less number of Exchange operators
     // and the parallelism.
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.desc, 'c.asc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".desc, $"c".asc)),
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('b.asc, 'a.asc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"b".asc, $"a".asc)),
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      OrderedDistribution(Seq('a.asc, 'b.asc, 'd.desc)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      OrderedDistribution(Seq($"a".asc, $"b".asc, $"d".desc)),
       false)
 
     // RangePartitioning can satisfy ClusteredDistribution iff its ordering expressions are a subset
     // of the required clustering expressions.
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c")),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('c, 'b, 'a)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"c", $"b", $"a")),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('b, 'c, 'a, 'd)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"b", $"c", $"a", $"d")),
       true)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('a, 'b)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"a", $"b")),
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('c, 'd)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"c", $"d")),
       false)
 
     // RangePartitioning cannot satisfy HashClusteredDistribution
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c")),
       false)
   }
 
   test("Partitioning.numPartitions must match Distribution.requiredNumPartitions to satisfy it") {
     checkSatisfied(
       SinglePartition,
-      ClusteredDistribution(Seq('a, 'b, 'c), Some(10)),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), Some(10)),
       false)
 
     checkSatisfied(
       SinglePartition,
-      HashClusteredDistribution(Seq('a, 'b, 'c), Some(10)),
+      HashClusteredDistribution(Seq($"a", $"b", $"c"), Some(10)),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
       false)
 
     checkSatisfied(
-      HashPartitioning(Seq('a, 'b, 'c), 10),
-      HashClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
+      HashPartitioning(Seq($"a", $"b", $"c"), 10),
+      HashClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
       false)
 
     checkSatisfied(
-      RangePartitioning(Seq('a.asc, 'b.asc, 'c.asc), 10),
-      ClusteredDistribution(Seq('a, 'b, 'c), Some(5)),
+      RangePartitioning(Seq($"a".asc, $"b".asc, $"c".asc), 10),
+      ClusteredDistribution(Seq($"a", $"b", $"c"), Some(5)),
       false)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index a2e58c3eaa0bd..f9cd9c3c398f6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -138,6 +138,14 @@ trait ScroogeLikeExample extends Product1[Int] with Serializable {
   override def hashCode: Int = x
 }
 
+/** Counter-examples to [[ScroogeLikeExample]] as a trait without a companion object constructor */
+trait TraitProductWithoutCompanion extends Product1[Int] {}
+
+/** Counter-examples to [[ScroogeLikeExample]] as a trait with no-constructor companion object */
+object TraitProductWithNoConstructorCompanion {}
+
+trait TraitProductWithNoConstructorCompanion extends Product1[Int] {}
+
 class ScalaReflectionSuite extends SparkFunSuite {
   import org.apache.spark.sql.catalyst.ScalaReflection._
 
@@ -404,6 +412,20 @@ class ScalaReflectionSuite extends SparkFunSuite {
         StructField("x", IntegerType, nullable = false))), nullable = true))
   }
 
+  test("SPARK-29026: schemaFor for trait without companion object throws exception ") {
+    val e = intercept[UnsupportedOperationException] {
+      schemaFor[TraitProductWithoutCompanion]
+    }
+    assert(e.getMessage.contains("Unable to find constructor"))
+  }
+
+  test("SPARK-29026: schemaFor for trait with no-constructor companion throws exception ") {
+    val e = intercept[UnsupportedOperationException] {
+      schemaFor[TraitProductWithNoConstructorCompanion]
+    }
+    assert(e.getMessage.contains("Unable to find constructor"))
+  }
+
   test("SPARK-27625: annotated data types") {
     assert(serializerFor[FooWithAnnotation].dataType == StructType(Seq(
       StructField("f1", StringType),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index f0356f5a42d67..5cc0453135c07 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -110,24 +112,24 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorTest(
     "scalar subquery with 2 columns",
      testRelation.select(
-       (ScalarSubquery(testRelation.select('a, dateLit.as('b))) + Literal(1)).as('a)),
+       (ScalarSubquery(testRelation.select($"a", dateLit.as("b"))) + Literal(1)).as("a")),
        "Scalar subquery must return only one column, but got 2" :: Nil)
 
   errorTest(
     "scalar subquery with no column",
-    testRelation.select(ScalarSubquery(LocalRelation()).as('a)),
+    testRelation.select(ScalarSubquery(LocalRelation()).as("a")),
     "Scalar subquery must return only one column, but got 0" :: Nil)
 
   errorTest(
     "single invalid type, single arg",
-    testRelation.select(TestFunction(dateLit :: Nil, IntegerType :: Nil).as('a)),
+    testRelation.select(TestFunction(dateLit :: Nil, IntegerType :: Nil).as("a")),
     "cannot resolve" :: "testfunction(CAST(NULL AS DATE))" :: "argument 1" :: "requires int type" ::
     "'CAST(NULL AS DATE)' is of date type" :: Nil)
 
   errorTest(
     "single invalid type, second arg",
     testRelation.select(
-      TestFunction(dateLit :: dateLit :: Nil, DateType :: IntegerType :: Nil).as('a)),
+      TestFunction(dateLit :: dateLit :: Nil, DateType :: IntegerType :: Nil).as("a")),
     "cannot resolve" :: "testfunction(CAST(NULL AS DATE), CAST(NULL AS DATE))" ::
       "argument 2" :: "requires int type" ::
       "'CAST(NULL AS DATE)' is of date type" :: Nil)
@@ -135,7 +137,7 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorTest(
     "multiple invalid type",
     testRelation.select(
-      TestFunction(dateLit :: dateLit :: Nil, IntegerType :: IntegerType :: Nil).as('a)),
+      TestFunction(dateLit :: dateLit :: Nil, IntegerType :: IntegerType :: Nil).as("a")),
     "cannot resolve" :: "testfunction(CAST(NULL AS DATE), CAST(NULL AS DATE))" ::
       "argument 1" :: "argument 2" :: "requires int type" ::
       "'CAST(NULL AS DATE)' is of date type" :: Nil)
@@ -148,7 +150,7 @@ class AnalysisErrorSuite extends AnalysisTest {
         WindowSpecDefinition(
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
-          UnspecifiedFrame)).as('window)),
+          UnspecifiedFrame)).as("window")),
     "not supported within a window function" :: Nil)
 
   errorTest(
@@ -159,22 +161,64 @@ class AnalysisErrorSuite extends AnalysisTest {
         WindowSpecDefinition(
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
-          UnspecifiedFrame)).as('window)),
+          UnspecifiedFrame)).as("window")),
     "Distinct window functions are not supported" :: Nil)
 
+  errorTest(
+    "window aggregate function with filter predicate",
+    testRelation2.select(
+      WindowExpression(
+        AggregateExpression(
+          Count(UnresolvedAttribute("b")),
+          Complete,
+          isDistinct = false,
+          filter = Some(UnresolvedAttribute("b") > 1)),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          UnspecifiedFrame)).as("window")),
+    "window aggregate function with filter predicate is not supported" :: Nil
+  )
+
   errorTest(
     "distinct function",
     CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
-    "DISTINCT specified, but hex is not an aggregate function" :: Nil)
+    "DISTINCT or FILTER specified, but hex is not an aggregate function" :: Nil)
+
+  errorTest(
+    "non aggregate function with filter predicate",
+    CatalystSqlParser.parsePlan("SELECT hex(a) FILTER (WHERE c = 1) FROM TaBlE2"),
+    "DISTINCT or FILTER specified, but hex is not an aggregate function" :: Nil)
 
   errorTest(
     "distinct window function",
-    CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) over () FROM TaBlE"),
-    "DISTINCT specified, but percent_rank is not an aggregate function" :: Nil)
+    CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) OVER () FROM TaBlE"),
+    "DISTINCT or FILTER specified, but percent_rank is not an aggregate function" :: Nil)
+
+  errorTest(
+    "window function with filter predicate",
+    CatalystSqlParser.parsePlan("SELECT percent_rank(a) FILTER (WHERE c > 1) OVER () FROM TaBlE2"),
+    "DISTINCT or FILTER specified, but percent_rank is not an aggregate function" :: Nil)
+
+  errorTest(
+    "higher order function with filter predicate",
+    CatalystSqlParser.parsePlan("SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) " +
+      "FILTER (WHERE c > 1)"),
+    "FILTER predicate specified, but aggregate is not an aggregate function" :: Nil)
+
+  errorTest(
+    "DISTINCT aggregate function with filter predicate",
+    CatalystSqlParser.parsePlan("SELECT count(DISTINCT a) FILTER (WHERE c > 1) FROM TaBlE2"),
+    "DISTINCT and FILTER cannot be used in aggregate functions at the same time" :: Nil)
+
+  errorTest(
+    "non-deterministic filter predicate in aggregate functions",
+    CatalystSqlParser.parsePlan("SELECT count(a) FILTER (WHERE rand(int(c)) > 1) FROM TaBlE2"),
+    "FILTER expression is non-deterministic, it cannot be used in aggregate functions" :: Nil)
 
   errorTest(
     "nested aggregate functions",
-    testRelation.groupBy('a)(
+    testRelation.groupBy($"a")(
       AggregateExpression(
         Max(AggregateExpression(Count(Literal(1)), Complete, isDistinct = false)),
         Complete,
@@ -190,39 +234,39 @@ class AnalysisErrorSuite extends AnalysisTest {
         WindowSpecDefinition(
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
-          SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as('window)),
+          SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as("window")),
     "window frame" :: "must match the required frame" :: Nil)
 
   errorTest(
     "too many generators",
-    listRelation.select(Explode('list).as('a), Explode('list).as('b)),
+    listRelation.select(Explode($"list").as("a"), Explode($"list").as("b")),
     "only one generator" :: "explode" :: Nil)
 
   errorTest(
     "unresolved attributes",
-    testRelation.select('abcd),
+    testRelation.select($"abcd"),
     "cannot resolve" :: "abcd" :: Nil)
 
   errorTest(
     "unresolved attributes with a generated name",
-    testRelation2.groupBy('a)(max('b))
-      .where(sum('b) > 0)
-      .orderBy('havingCondition.asc),
+    testRelation2.groupBy($"a")(max($"b"))
+      .where(sum($"b") > 0)
+      .orderBy($"havingCondition".asc),
     "cannot resolve" :: "havingCondition" :: Nil)
 
   errorTest(
     "unresolved star expansion in max",
-    testRelation2.groupBy('a)(sum(UnresolvedStar(None))),
+    testRelation2.groupBy($"a")(sum(UnresolvedStar(None))),
     "Invalid usage of '*'" :: "in expression 'sum'" :: Nil)
 
   errorTest(
     "sorting by unsupported column types",
-    mapRelation.orderBy('map.asc),
+    mapRelation.orderBy($"map".asc),
     "sort" :: "type" :: "map<int,int>" :: Nil)
 
   errorTest(
     "sorting by attributes are not from grouping expressions",
-    testRelation2.groupBy('a, 'c)('a, 'c, count('a).as("a3")).orderBy('b.asc),
+    testRelation2.groupBy($"a", $"c")($"a", $"c", count($"a").as("a3")).orderBy($"b".asc),
     "cannot resolve" :: "'`b`'" :: "given input columns" :: "[a, a3, c]" :: Nil)
 
   errorTest(
@@ -237,7 +281,7 @@ class AnalysisErrorSuite extends AnalysisTest {
 
   errorTest(
     "missing group by",
-    testRelation2.groupBy('a)('b),
+    testRelation2.groupBy($"a")($"b"),
     "'`b`'" :: "group by" :: Nil
   )
 
@@ -315,7 +359,7 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorTest(
     "SPARK-9955: correct error message for aggregate",
     // When parse SQL string, we will wrap aggregate expressions with UnresolvedAlias.
-    testRelation2.where('bad_column > 1).groupBy('a)(UnresolvedAlias(max('b))),
+    testRelation2.where($"bad_column" > 1).groupBy($"a")(UnresolvedAlias(max($"b"))),
     "cannot resolve '`bad_column`'" :: Nil)
 
   errorTest(
@@ -383,14 +427,14 @@ class AnalysisErrorSuite extends AnalysisTest {
 
   errorTest(
     "generator nested in expressions",
-    listRelation.select(Explode('list) + 1),
+    listRelation.select(Explode($"list") + 1),
     "Generators are not supported when it's nested in expressions, but got: (explode(list) + 1)"
       :: Nil
   )
 
   errorTest(
     "generator appears in operator which is not Project",
-    listRelation.sortBy(Explode('list).asc),
+    listRelation.sortBy(Explode($"list").asc),
     "Generators are not supported outside the SELECT clause, but got: Sort" :: Nil
   )
 
@@ -408,7 +452,7 @@ class AnalysisErrorSuite extends AnalysisTest {
 
   errorTest(
     "more than one generators in SELECT",
-    listRelation.select(Explode('list), Explode('list)),
+    listRelation.select(Explode($"list"), Explode($"list")),
     "Only one generator allowed per select clause but found 2: explode(list), explode(list)" :: Nil
   )
 
@@ -508,20 +552,20 @@ class AnalysisErrorSuite extends AnalysisTest {
   }
 
   test("Join can work on binary types but can't work on map types") {
-    val left = LocalRelation('a.binary, 'b.map(StringType, StringType))
-    val right = LocalRelation('c.binary, 'd.map(StringType, StringType))
+    val left = LocalRelation(Symbol("a").binary, Symbol("b").map(StringType, StringType))
+    val right = LocalRelation(Symbol("c").binary, Symbol("d").map(StringType, StringType))
 
     val plan1 = left.join(
       right,
       joinType = Cross,
-      condition = Some('a === 'c))
+      condition = Some(Symbol("a") === Symbol("c")))
 
     assertAnalysisSuccess(plan1)
 
     val plan2 = left.join(
       right,
       joinType = Cross,
-      condition = Some('b === 'd))
+      condition = Some(Symbol("b") === Symbol("d")))
     assertAnalysisError(plan2, "EqualTo does not support ordering on type map" :: Nil)
   }
 
@@ -532,7 +576,7 @@ class AnalysisErrorSuite extends AnalysisTest {
       Seq(a, Alias(InSubquery(Seq(a), ListQuery(LocalRelation(b))), "c")()),
       LocalRelation(a))
     assertAnalysisError(plan, "Predicate sub-queries can only be used" +
-        " in Filter/DeleteFromTable" :: Nil)
+        " in Filter" :: Nil)
   }
 
   test("PredicateSubQuery is used is a nested condition") {
@@ -594,7 +638,7 @@ class AnalysisErrorSuite extends AnalysisTest {
     val plan5 = Filter(
       Exists(
         Sample(0.0, 0.5, false, 1L,
-          Filter(EqualTo(UnresolvedAttribute("a"), b), LocalRelation(b))).select('b)
+          Filter(EqualTo(UnresolvedAttribute("a"), b), LocalRelation(b))).select("b")
       ),
       LocalRelation(a))
     assertAnalysisError(plan5,
@@ -604,7 +648,7 @@ class AnalysisErrorSuite extends AnalysisTest {
   test("Error on filter condition containing aggregate expressions") {
     val a = AttributeReference("a", IntegerType)()
     val b = AttributeReference("b", IntegerType)()
-    val plan = Filter('a === UnresolvedFunction("max", Seq(b), true), LocalRelation(a, b))
+    val plan = Filter(Symbol("a") === UnresolvedFunction("max", Seq(b), true), LocalRelation(a, b))
     assertAnalysisError(plan,
       "Aggregate/Window/Generate expressions are not valid in where clause of the query" :: Nil)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
index 9b6e8841a9807..a85ac3fc4d0b7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
@@ -59,7 +59,7 @@ class AnalysisExternalCatalogSuite extends AnalysisTest with Matchers {
         Alias(UnresolvedFunction("sum", Seq(UnresolvedAttribute("a")), isDistinct = false), "s")()
       val plan = Project(Seq(func), testRelation)
       analyzer.execute(plan)
-      verifyZeroInteractions(catalog)
+      verifyNoInteractions(catalog)
     }
   }
 
@@ -73,7 +73,7 @@ class AnalysisExternalCatalogSuite extends AnalysisTest with Matchers {
         ignoreIfExists = false)
       reset(externCatalog)
       catalog.functionExists(FunctionIdentifier("sum"))
-      verifyZeroInteractions(externCatalog)
+      verifyNoInteractions(externCatalog)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 74445a111e4d7..d38513319388b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -25,15 +25,16 @@ import org.scalatest.Matchers
 
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count, Sum}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
 import org.apache.spark.sql.catalyst.plans.{Cross, Inner}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning,
-  RangePartitioning, RoundRobinPartitioning}
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
@@ -47,7 +48,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val plan = (1 to 120)
       .map(_ => testRelation)
       .fold[LogicalPlan](testRelation) { (a, b) =>
-        a.select(UnresolvedStar(None)).select('a).union(b.select(UnresolvedStar(None)))
+        a.select(UnresolvedStar(None)).select($"a").union(b.select(UnresolvedStar(None)))
       }
 
     assertAnalysisSuccess(plan)
@@ -99,9 +100,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     // Case 1: one missing attribute is in the leaf node and another is in the unary node
     val plan1 = testRelation2
-      .where('a > "str").select('a, 'b)
-      .where('b > "str").select('a)
-      .sortBy('b.asc, 'c.desc)
+      .where($"a" > "str").select($"a", $"b")
+      .where($"b" > "str").select($"a")
+      .sortBy($"b".asc, $"c".desc)
     val expected1 = testRelation2
       .where(a > "str").select(a, b, c)
       .where(b > "str").select(a, b, c)
@@ -111,9 +112,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     // Case 2: all the missing attributes are in the leaf node
     val plan2 = testRelation2
-      .where('a > "str").select('a)
-      .where('a > "str").select('a)
-      .sortBy('b.asc, 'c.desc)
+      .where($"a" > "str").select($"a")
+      .where($"a" > "str").select($"a")
+      .sortBy($"b".asc, $"c".desc)
     val expected2 = testRelation2
       .where(a > "str").select(a, b, c)
       .where(a > "str").select(a, b, c)
@@ -130,8 +131,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     // Case: join itself can resolve all the missing attributes
     val plan = testRelation2.join(testRelation3)
-      .where('a > "str").select('a, 'b)
-      .sortBy('c.desc, 'h.asc)
+      .where($"a" > "str").select($"a", $"b")
+      .sortBy($"c".desc, $"h".asc)
     val expected = testRelation2.join(testRelation3)
       .where(a > "str").select(a, b, c, h)
       .sortBy(c.desc, h.asc)
@@ -149,9 +150,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     // Case 1: when the child of Sort is not Aggregate,
     //   the sort reference is handled by the rule ResolveSortReferences
     val plan1 = testRelation2
-      .groupBy('a, 'c, 'b)('a, 'c, count('a).as("a3"))
-      .select('a, 'c, 'a3)
-      .orderBy('b.asc)
+      .groupBy($"a", $"c", $"b")($"a", $"c", count($"a").as("a3"))
+      .select($"a", $"c", $"a3")
+      .orderBy($"b".asc)
 
     val expected1 = testRelation2
       .groupBy(a, c, b)(a, c, alias_a3, b)
@@ -164,8 +165,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     // Case 2: when the child of Sort is Aggregate,
     //   the sort reference is handled by the rule ResolveAggregateFunctions
     val plan2 = testRelation2
-      .groupBy('a, 'c, 'b)('a, 'c, count('a).as("a3"))
-      .orderBy('b.asc)
+      .groupBy($"a", $"c", $"b")($"a", $"c", count($"a").as("a3"))
+      .orderBy($"b".asc)
 
     val expected2 = testRelation2
       .groupBy(a, c, b)(a, c, alias_a3, alias_b)
@@ -187,11 +188,11 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   test("divide should be casted into fractional types") {
     val plan = caseInsensitiveAnalyzer.execute(
       testRelation2.select(
-        'a / Literal(2) as 'div1,
-        'a / 'b as 'div2,
-        'a / 'c as 'div3,
-        'a / 'd as 'div4,
-        'e / 'e as 'div5))
+        $"a" / Literal(2) as "div1",
+        $"a" / $"b" as "div2",
+        $"a" / $"c" as "div3",
+        $"a" / $"d" as "div4",
+        $"e" / $"e" as "div5"))
     val pl = plan.asInstanceOf[Project].projectList
 
     assert(pl(0).dataType == DoubleType)
@@ -259,7 +260,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         CreateNamedStruct(Seq(
           Literal(att1.name), att1,
           Literal("a_plus_1"), (att1 + 1))),
-          'col.struct(prevPlan.output(0).dataType.asInstanceOf[StructType]).notNull
+          Symbol("col").struct(prevPlan.output(0).dataType.asInstanceOf[StructType]).notNull
       )).as("arr")
     )
 
@@ -270,7 +271,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val a = testRelation2.output(0)
     val c = testRelation2.output(2)
 
-    val plan = testRelation2.select('c).orderBy(Floor('a).asc)
+    val plan = testRelation2.select($"c").orderBy(Floor($"a").asc)
     val expected = testRelation2.select(c, a)
       .orderBy(Floor(Cast(a, DoubleType, Option(TimeZone.getDefault().getID))).asc).select(c)
 
@@ -374,8 +375,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val alias3 = count(a).as("a3")
 
     val plan = testRelation2
-      .groupBy('a, 'c)('a.as("a1"), 'c.as("a2"), count('a).as("a3"))
-      .orderBy('a1.asc, 'c.asc)
+      .groupBy($"a", $"c")($"a".as("a1"), $"c".as("a2"), count($"a").as("a3"))
+      .orderBy($"a1".asc, $"c".asc)
 
     val expected = testRelation2
       .groupBy(a, c)(alias1, alias2, alias3)
@@ -391,13 +392,15 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-12102: Ignore nullablity when comparing two sides of case") {
-    val relation = LocalRelation('a.struct('x.int), 'b.struct('x.int.withNullability(false)))
-    val plan = relation.select(CaseWhen(Seq((Literal(true), 'a.attr)), 'b).as("val"))
+    val relation = LocalRelation(Symbol("a").struct(Symbol("x").int),
+      Symbol("b").struct(Symbol("x").int.withNullability(false)))
+    val plan = relation.select(
+      CaseWhen(Seq((Literal(true), Symbol("a").attr)), Symbol("b")).as("val"))
     assertAnalysisSuccess(plan)
   }
 
   test("Keep attribute qualifiers after dedup") {
-    val input = LocalRelation('key.int, 'value.string)
+    val input = LocalRelation(Symbol("key").int, Symbol("value").string)
 
     val query =
       Project(Seq($"x.key", $"y.key"),
@@ -524,8 +527,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
   test("SPARK-20963 Support aliases for join relations in FROM clause") {
     def joinRelationWithAliases(outputNames: Seq[String]): LogicalPlan = {
-      val src1 = LocalRelation('id.int, 'v1.string).as("s1")
-      val src2 = LocalRelation('id.int, 'v2.string).as("s2")
+      val src1 = LocalRelation(Symbol("id").int, Symbol("v1").string).as("s1")
+      val src2 = LocalRelation(Symbol("id").int, Symbol("v2").string).as("s2")
       UnresolvedSubqueryColumnAliases(
         outputNames,
         SubqueryAlias(
@@ -553,12 +556,13 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     }
 
     checkPartitioning[HashPartitioning](numPartitions = 10, exprs = Literal(20))
-    checkPartitioning[HashPartitioning](numPartitions = 10, exprs = 'a.attr, 'b.attr)
+    checkPartitioning[HashPartitioning](numPartitions = 10,
+      exprs = Symbol("a").attr, Symbol("b").attr)
 
     checkPartitioning[RangePartitioning](numPartitions = 10,
       exprs = SortOrder(Literal(10), Ascending))
     checkPartitioning[RangePartitioning](numPartitions = 10,
-      exprs = SortOrder('a.attr, Ascending), SortOrder('b.attr, Descending))
+      exprs = SortOrder(Symbol("a").attr, Ascending), SortOrder(Symbol("b").attr, Descending))
 
     checkPartitioning[RoundRobinPartitioning](numPartitions = 10, exprs = Seq.empty: _*)
 
@@ -569,7 +573,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       checkPartitioning(numPartitions = -1, exprs = Literal(20))
     }
     intercept[IllegalArgumentException] {
-      checkPartitioning(numPartitions = 10, exprs = SortOrder('a.attr, Ascending), 'b.attr)
+      checkPartitioning(numPartitions = 10, exprs =
+        SortOrder(Symbol("a").attr, Ascending), Symbol("b").attr)
     }
   }
 
@@ -592,10 +597,10 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
   test("SPARK-24488 Generator with multiple aliases") {
     assertAnalysisSuccess(
-      listRelation.select(Explode('list).as("first_alias").as("second_alias")))
+      listRelation.select(Explode($"list").as("first_alias").as("second_alias")))
     assertAnalysisSuccess(
       listRelation.select(MultiAlias(MultiAlias(
-        PosExplode('list), Seq("first_pos", "first_val")), Seq("second_pos", "second_val"))))
+        PosExplode($"list"), Seq("first_pos", "first_val")), Seq("second_pos", "second_val"))))
   }
 
   test("SPARK-24151: CURRENT_DATE, CURRENT_TIMESTAMP should be case insensitive") {
@@ -618,18 +623,18 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     object ViewAnalyzer extends RuleExecutor[LogicalPlan] {
       val batches = Batch("View", Once, EliminateView) :: Nil
     }
-    val relation = LocalRelation('a.int.notNull, 'b.string)
+    val relation = LocalRelation(Symbol("a").int.notNull, Symbol("b").string)
     val view = View(CatalogTable(
         identifier = TableIdentifier("v1"),
         tableType = CatalogTableType.VIEW,
         storage = CatalogStorageFormat.empty,
         schema = StructType(Seq(StructField("a", IntegerType), StructField("b", StringType)))),
-      output = Seq('a.int, 'b.string),
+      output = Seq(Symbol("a").int, Symbol("b").string),
       child = relation)
     val tz = Option(conf.sessionLocalTimeZone)
     val expected = Project(Seq(
-        Alias(Cast('a.int.notNull, IntegerType, tz), "a")(),
-        Alias(Cast('b.string, StringType, tz), "b")()),
+        Alias(Cast(Symbol("a").int.notNull, IntegerType, tz), "a")(),
+        Alias(Cast(Symbol("b").string, StringType, tz), "b")()),
       relation)
     val res = ViewAnalyzer.execute(view)
     comparePlans(res, expected)
@@ -650,4 +655,117 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     assertAnalysisError(parsePlan("INSERT INTO test VALUES (1)"),
       Seq("Table not found: test"))
   }
+
+  test("check CollectMetrics resolved") {
+    val a = testRelation.output.head
+    val sum = Sum(a).toAggregateExpression().as("sum")
+    val random_sum = Sum(Rand(1L)).toAggregateExpression().as("rand_sum")
+    val literal = Literal(1).as("lit")
+
+    // Ok
+    assert(CollectMetrics("event", literal :: sum :: random_sum :: Nil, testRelation).resolved)
+
+    // Bad name
+    assert(!CollectMetrics("", sum :: Nil, testRelation).resolved)
+    assertAnalysisError(CollectMetrics("", sum :: Nil, testRelation),
+      "observed metrics should be named" :: Nil)
+
+    // No columns
+    assert(!CollectMetrics("evt", Nil, testRelation).resolved)
+
+    def checkAnalysisError(exprs: Seq[NamedExpression], errors: String*): Unit = {
+      assertAnalysisError(CollectMetrics("event", exprs, testRelation), errors)
+    }
+
+    // Unwrapped attribute
+    checkAnalysisError(
+      a :: Nil,
+      "Attribute", "can only be used as an argument to an aggregate function")
+
+    // Unwrapped non-deterministic expression
+    checkAnalysisError(
+      Rand(10).as("rnd") :: Nil,
+      "non-deterministic expression", "can only be used as an argument to an aggregate function")
+
+    // Distinct aggregate
+    checkAnalysisError(
+      Sum(a).toAggregateExpression(isDistinct = true).as("sum") :: Nil,
+    "distinct aggregates are not allowed in observed metrics, but found")
+
+    // Nested aggregate
+    checkAnalysisError(
+      Sum(Sum(a).toAggregateExpression()).toAggregateExpression().as("sum") :: Nil,
+      "nested aggregates are not allowed in observed metrics, but found")
+
+    // Windowed aggregate
+    val windowExpr = WindowExpression(
+      RowNumber(),
+      WindowSpecDefinition(Nil, a.asc :: Nil,
+        SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow)))
+    checkAnalysisError(
+      windowExpr.as("rn") :: Nil,
+      "window expressions are not allowed in observed metrics, but found")
+  }
+
+  test("check CollectMetrics duplicates") {
+    val a = testRelation.output.head
+    val sum = Sum(a).toAggregateExpression().as("sum")
+    val count = Count(Literal(1)).toAggregateExpression().as("cnt")
+
+    // Same result - duplicate names are allowed
+    assertAnalysisSuccess(Union(
+      CollectMetrics("evt1", count :: Nil, testRelation) ::
+      CollectMetrics("evt1", count :: Nil, testRelation) :: Nil))
+
+    // Same children, structurally different metrics - fail
+    assertAnalysisError(Union(
+      CollectMetrics("evt1", count :: Nil, testRelation) ::
+      CollectMetrics("evt1", sum :: Nil, testRelation) :: Nil),
+      "Multiple definitions of observed metrics" :: "evt1" :: Nil)
+
+    // Different children, same metrics - fail
+    val b = Symbol("b").string
+    val tblB = LocalRelation(b)
+    assertAnalysisError(Union(
+      CollectMetrics("evt1", count :: Nil, testRelation) ::
+      CollectMetrics("evt1", count :: Nil, tblB) :: Nil),
+      "Multiple definitions of observed metrics" :: "evt1" :: Nil)
+
+    // Subquery different tree - fail
+    val subquery = Aggregate(Nil, sum :: Nil, CollectMetrics("evt1", count :: Nil, testRelation))
+    val query = Project(
+      b :: ScalarSubquery(subquery, Nil).as("sum") :: Nil,
+      CollectMetrics("evt1", count :: Nil, tblB))
+    assertAnalysisError(query, "Multiple definitions of observed metrics" :: "evt1" :: Nil)
+
+    // Aggregate with filter predicate - fail
+    val sumWithFilter = sum.transform {
+      case a: AggregateExpression => a.copy(filter = Some(true))
+    }.asInstanceOf[NamedExpression]
+    assertAnalysisError(
+      CollectMetrics("evt1", sumWithFilter :: Nil, testRelation),
+      "aggregates with filter predicate are not allowed" :: Nil)
+  }
+
+  test("Analysis exceed max iterations") {
+    // RuleExecutor only throw exception or log warning when the rule is supposed to run
+    // more than once.
+    val maxIterations = 2
+    val conf = new SQLConf().copy(SQLConf.ANALYZER_MAX_ITERATIONS -> maxIterations)
+    val testAnalyzer = new Analyzer(
+      new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+
+    val plan = testRelation2.select(
+      $"a" / Literal(2) as "div1",
+      $"a" / $"b" as "div2",
+      $"a" / $"c" as "div3",
+      $"a" / $"d" as "div4",
+      $"e" / $"e" as "div5")
+
+    val message = intercept[TreeNodeException[LogicalPlan]] {
+      testAnalyzer.execute(plan)
+    }.getMessage
+    assert(message.startsWith(s"Max iterations ($maxIterations) reached for batch Resolution, " +
+      s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value."))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 7d196f8b8edd2..3f8d409992381 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -26,12 +26,15 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog,
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
 
 trait AnalysisTest extends PlanTest {
 
-  protected val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true)
-  protected val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false)
+  protected lazy val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true)
+  protected lazy val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false)
+
+  protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = Nil
 
   private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
     val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
@@ -43,7 +46,7 @@ trait AnalysisTest extends PlanTest {
     catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true)
     catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)
     new Analyzer(catalog, conf) {
-      override val extendedResolutionRules = EliminateSubqueryAliases :: Nil
+      override val extendedResolutionRules = EliminateSubqueryAliases +: extendedAnalysisRules
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index c4e7194f7d651..f433229595e9e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.LogicalExpressions
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, LeafNode}
 import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.connector.expressions.{Expressions, LogicalExpressions}
 import org.apache.spark.sql.types.{DoubleType, LongType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -32,7 +32,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val plan = CreateTableAsSelect(
       catalog,
       Identifier.of(Array(), "table_name"),
-      LogicalExpressions.bucket(4, "does_not_exist") :: Nil,
+      Expressions.bucket(4, "does_not_exist") :: Nil,
       TestRelation2,
       Map.empty,
       Map.empty,
@@ -48,7 +48,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val plan = CreateTableAsSelect(
       catalog,
       Identifier.of(Array(), "table_name"),
-      LogicalExpressions.bucket(4, "does_not_exist.z") :: Nil,
+      Expressions.bucket(4, "does_not_exist.z") :: Nil,
       TestRelation2,
       Map.empty,
       Map.empty,
@@ -64,7 +64,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val plan = CreateTableAsSelect(
       catalog,
       Identifier.of(Array(), "table_name"),
-      LogicalExpressions.bucket(4, "point.z") :: Nil,
+      Expressions.bucket(4, "point.z") :: Nil,
       TestRelation2,
       Map.empty,
       Map.empty,
@@ -80,7 +80,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val plan = CreateTableAsSelect(
       catalog,
       Identifier.of(Array(), "table_name"),
-      LogicalExpressions.bucket(4, "does_not_exist", "point.z") :: Nil,
+      Expressions.bucket(4, "does_not_exist", "point.z") :: Nil,
       TestRelation2,
       Map.empty,
       Map.empty,
@@ -97,7 +97,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val plan = CreateTableAsSelect(
       catalog,
       Identifier.of(Array(), "table_name"),
-      LogicalExpressions.bucket(4, "id") :: Nil,
+      Expressions.bucket(4, "id") :: Nil,
       TestRelation2,
       Map.empty,
       Map.empty,
@@ -110,7 +110,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val plan = CreateTableAsSelect(
       catalog,
       Identifier.of(Array(), "table_name"),
-      LogicalExpressions.bucket(4, "point.x") :: Nil,
+      Expressions.bucket(4, "point.x") :: Nil,
       TestRelation2,
       Map.empty,
       Map.empty,
@@ -123,7 +123,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val plan = CreateTableAsSelect(
       catalog,
       Identifier.of(Array(), "table_name"),
-      LogicalExpressions.bucket(4, "point") :: Nil,
+      Expressions.bucket(4, "point") :: Nil,
       TestRelation2,
       Map.empty,
       Map.empty,
@@ -133,7 +133,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
   }
 }
 
-private object CreateTablePartitioningValidationSuite {
+private[sql] object CreateTablePartitioningValidationSuite {
   val catalog: TableCatalog = {
     val cat = new InMemoryTableCatalog()
     cat.initialize("test", CaseInsensitiveStringMap.empty())
@@ -146,7 +146,7 @@ private object CreateTablePartitioningValidationSuite {
       .add("point", new StructType().add("x", DoubleType).add("y", DoubleType))
 }
 
-private case object TestRelation2 extends LeafNode with NamedRelation {
+private[sql] case object TestRelation2 extends LeafNode with NamedRelation {
   override def name: String = "source_relation"
   override def output: Seq[AttributeReference] =
     CreateTablePartitioningValidationSuite.schema.toAttributes
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
index 388eb238db0c0..c316e0406b1b8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
@@ -22,9 +22,9 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 
 class DSLHintSuite extends AnalysisTest {
-  lazy val a = 'a.int
-  lazy val b = 'b.string
-  lazy val c = 'c.string
+  lazy val a = Symbol("a").int
+  lazy val b = Symbol("b").string
+  lazy val c = Symbol("c").string
   lazy val r1 = LocalRelation(a, b, c)
 
   test("various hint parameters") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index eade9b6112fe4..c01dea96fe2de 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -21,7 +21,7 @@ import java.net.URI
 import java.util.Locale
 
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, Expression, LessThanOrEqual, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, AttributeReference, Cast, Expression, LessThanOrEqual, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
@@ -77,57 +77,30 @@ class V2OverwriteByExpressionANSIAnalysisSuite extends DataSourceV2ANSIAnalysisS
   }
 
   test("delete expression is resolved using table fields") {
-    val table = TestRelation(StructType(Seq(
-      StructField("x", DoubleType, nullable = false),
-      StructField("y", DoubleType))).toAttributes)
-
-    val query = TestRelation(StructType(Seq(
-      StructField("a", DoubleType, nullable = false),
-      StructField("b", DoubleType))).toAttributes)
-
-    val a = query.output.head
-    val b = query.output.last
-    val x = table.output.head
-
-    val parsedPlan = OverwriteByExpression.byPosition(table, query,
-      LessThanOrEqual(UnresolvedAttribute(Seq("x")), Literal(15.0d)))
-
-    val expectedPlan = OverwriteByExpression.byPosition(table,
-      Project(Seq(
-        Alias(Cast(a, DoubleType, Some(conf.sessionLocalTimeZone)), "x")(),
-        Alias(Cast(b, DoubleType, Some(conf.sessionLocalTimeZone)), "y")()),
-        query),
-      LessThanOrEqual(
-        AttributeReference("x", DoubleType, nullable = false)(x.exprId),
-        Literal(15.0d)))
-
-    assertNotResolved(parsedPlan)
-    checkAnalysis(parsedPlan, expectedPlan)
-    assertResolved(expectedPlan)
+    testResolvedOverwriteByExpression()
   }
 
   test("delete expression is not resolved using query fields") {
-    val xRequiredTable = TestRelation(StructType(Seq(
-      StructField("x", DoubleType, nullable = false),
-      StructField("y", DoubleType))).toAttributes)
+    testNotResolvedOverwriteByExpression()
+  }
+}
 
-    val query = TestRelation(StructType(Seq(
-      StructField("a", DoubleType, nullable = false),
-      StructField("b", DoubleType))).toAttributes)
+class V2OverwriteByExpressionStrictAnalysisSuite extends DataSourceV2StrictAnalysisSuite {
+  override def byName(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    OverwriteByExpression.byName(table, query, Literal(true))
+  }
 
-    // the write is resolved (checked above). this test plan is not because of the expression.
-    val parsedPlan = OverwriteByExpression.byPosition(xRequiredTable, query,
-      LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
+  override def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    OverwriteByExpression.byPosition(table, query, Literal(true))
+  }
 
-    assertNotResolved(parsedPlan)
-    assertAnalysisError(parsedPlan, Seq("cannot resolve", "`a`", "given input columns", "x, y"))
+  test("delete expression is resolved using table fields") {
+    testResolvedOverwriteByExpression()
   }
-}
 
-class V2OverwriteByExpressionStrictAnalysisSuite extends V2OverwriteByExpressionANSIAnalysisSuite {
-  override def getSQLConf(caseSensitive: Boolean): SQLConf =
-    super.getSQLConf(caseSensitive)
-      .copy(SQLConf.STORE_ASSIGNMENT_POLICY -> StoreAssignmentPolicy.STRICT)
+  test("delete expression is not resolved using query fields") {
+    testNotResolvedOverwriteByExpression()
+  }
 }
 
 case class TestRelation(output: Seq[AttributeReference]) extends LeafNode with NamedRelation {
@@ -144,6 +117,19 @@ abstract class DataSourceV2ANSIAnalysisSuite extends DataSourceV2AnalysisBaseSui
   override def getSQLConf(caseSensitive: Boolean): SQLConf =
     super.getSQLConf(caseSensitive)
       .copy(SQLConf.STORE_ASSIGNMENT_POLICY -> StoreAssignmentPolicy.ANSI)
+
+
+  // For Ansi store assignment policy, expression `AnsiCast` is used instead of `Cast`.
+  override def checkAnalysis(
+      inputPlan: LogicalPlan,
+      expectedPlan: LogicalPlan,
+      caseSensitive: Boolean): Unit = {
+    val expectedPlanWithAnsiCast = expectedPlan transformAllExpressions {
+      case c: Cast => AnsiCast(c.child, c.dataType, c.timeZoneId)
+      case other => other
+    }
+    super.checkAnalysis(inputPlan, expectedPlanWithAnsiCast, caseSensitive)
+  }
 }
 
 abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseSuite {
@@ -571,4 +557,51 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
   def toLower(attr: AttributeReference): AttributeReference = {
     AttributeReference(attr.name.toLowerCase(Locale.ROOT), attr.dataType)(attr.exprId)
   }
+
+  protected def testResolvedOverwriteByExpression(): Unit = {
+    val table = TestRelation(StructType(Seq(
+      StructField("x", DoubleType, nullable = false),
+      StructField("y", DoubleType))).toAttributes)
+
+    val query = TestRelation(StructType(Seq(
+      StructField("a", DoubleType, nullable = false),
+      StructField("b", DoubleType))).toAttributes)
+
+    val a = query.output.head
+    val b = query.output.last
+    val x = table.output.head
+
+    val parsedPlan = OverwriteByExpression.byPosition(table, query,
+      LessThanOrEqual(UnresolvedAttribute(Seq("x")), Literal(15.0d)))
+
+    val expectedPlan = OverwriteByExpression.byPosition(table,
+      Project(Seq(
+        Alias(Cast(a, DoubleType, Some(conf.sessionLocalTimeZone)), "x")(),
+        Alias(Cast(b, DoubleType, Some(conf.sessionLocalTimeZone)), "y")()),
+        query),
+      LessThanOrEqual(
+        AttributeReference("x", DoubleType, nullable = false)(x.exprId),
+        Literal(15.0d)))
+
+    assertNotResolved(parsedPlan)
+    checkAnalysis(parsedPlan, expectedPlan)
+    assertResolved(expectedPlan)
+  }
+
+  protected def testNotResolvedOverwriteByExpression(): Unit = {
+    val xRequiredTable = TestRelation(StructType(Seq(
+      StructField("x", DoubleType, nullable = false),
+      StructField("y", DoubleType))).toAttributes)
+
+    val query = TestRelation(StructType(Seq(
+      StructField("a", DoubleType, nullable = false),
+      StructField("b", DoubleType))).toAttributes)
+
+    // the write is resolved (checked above). this test plan is not because of the expression.
+    val parsedPlan = OverwriteByExpression.byPosition(xRequiredTable, query,
+      LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
+
+    assertNotResolved(parsedPlan)
+    assertAnalysisError(parsedPlan, Seq("cannot resolve", "`a`", "given input columns", "x, y"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
index 79fc38c4d30ea..c3e18c7f9557f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLite
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project, Union}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -273,12 +274,14 @@ class DecimalPrecisionSuite extends AnalysisTest with BeforeAndAfter {
   }
 
   test("SPARK-24468: operations on decimals with negative scale") {
-    val a = AttributeReference("a", DecimalType(3, -10))()
-    val b = AttributeReference("b", DecimalType(1, -1))()
-    val c = AttributeReference("c", DecimalType(35, 1))()
-    checkType(Multiply(a, b), DecimalType(5, -11))
-    checkType(Multiply(a, c), DecimalType(38, -9))
-    checkType(Multiply(b, c), DecimalType(37, 0))
+    withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
+      val a = AttributeReference("a", DecimalType(3, -10))()
+      val b = AttributeReference("b", DecimalType(1, -1))()
+      val c = AttributeReference("c", DecimalType(35, 1))()
+      checkType(Multiply(a, b), DecimalType(5, -11))
+      checkType(Multiply(a, c), DecimalType(38, -9))
+      checkType(Multiply(b, c), DecimalType(37, 0))
+    }
   }
 
   /** strength reduction for integer/decimal comparisons */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index 4440ac9e281c4..86a1f1fb58a07 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -29,12 +29,12 @@ import org.apache.spark.sql.types._
 class ExpressionTypeCheckingSuite extends SparkFunSuite {
 
   val testRelation = LocalRelation(
-    'intField.int,
-    'stringField.string,
-    'booleanField.boolean,
-    'decimalField.decimal(8, 0),
-    'arrayField.array(StringType),
-    'mapField.map(StringType, LongType))
+    Symbol("intField").int,
+    Symbol("stringField").string,
+    Symbol("booleanField").boolean,
+    Symbol("decimalField").decimal(8, 0),
+    Symbol("arrayField").array(StringType),
+    Symbol("mapField").map(StringType, LongType))
 
   def assertError(expr: Expression, errorMessage: String): Unit = {
     val e = intercept[AnalysisException] {
@@ -56,83 +56,92 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
   }
 
   test("check types for unary arithmetic") {
-    assertError(BitwiseNot('stringField), "requires integral type")
+    assertError(BitwiseNot(Symbol("stringField")), "requires integral type")
   }
 
   test("check types for binary arithmetic") {
     // We will cast String to Double for binary arithmetic
-    assertSuccess(Add('intField, 'stringField))
-    assertSuccess(Subtract('intField, 'stringField))
-    assertSuccess(Multiply('intField, 'stringField))
-    assertSuccess(Divide('intField, 'stringField))
-    assertSuccess(Remainder('intField, 'stringField))
-    // checkAnalysis(BitwiseAnd('intField, 'stringField))
-
-    assertErrorForDifferingTypes(Add('intField, 'booleanField))
-    assertErrorForDifferingTypes(Subtract('intField, 'booleanField))
-    assertErrorForDifferingTypes(Multiply('intField, 'booleanField))
-    assertErrorForDifferingTypes(Divide('intField, 'booleanField))
-    assertErrorForDifferingTypes(Remainder('intField, 'booleanField))
-    assertErrorForDifferingTypes(BitwiseAnd('intField, 'booleanField))
-    assertErrorForDifferingTypes(BitwiseOr('intField, 'booleanField))
-    assertErrorForDifferingTypes(BitwiseXor('intField, 'booleanField))
-
-    assertError(Add('booleanField, 'booleanField), "requires (numeric or interval) type")
-    assertError(Subtract('booleanField, 'booleanField),
+    assertSuccess(Add(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(Subtract(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(Multiply(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(Divide(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(Remainder(Symbol("intField"), Symbol("stringField")))
+    // checkAnalysis(BitwiseAnd(Symbol("intField"), Symbol("stringField")))
+
+    assertErrorForDifferingTypes(Add(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(Subtract(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(Multiply(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(Divide(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(Remainder(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(BitwiseAnd(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(BitwiseOr(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(BitwiseXor(Symbol("intField"), Symbol("booleanField")))
+
+    assertError(Add(Symbol("booleanField"), Symbol("booleanField")),
       "requires (numeric or interval) type")
-    assertError(Multiply('booleanField, 'booleanField), "requires numeric type")
-    assertError(Divide('booleanField, 'booleanField), "requires (double or decimal) type")
-    assertError(Remainder('booleanField, 'booleanField), "requires numeric type")
-
-    assertError(BitwiseAnd('booleanField, 'booleanField), "requires integral type")
-    assertError(BitwiseOr('booleanField, 'booleanField), "requires integral type")
-    assertError(BitwiseXor('booleanField, 'booleanField), "requires integral type")
+    assertError(Subtract(Symbol("booleanField"), Symbol("booleanField")),
+      "requires (numeric or interval) type")
+    assertError(Multiply(Symbol("booleanField"), Symbol("booleanField")), "requires numeric type")
+    assertError(Divide(Symbol("booleanField"), Symbol("booleanField")),
+      "requires (double or decimal) type")
+    assertError(Remainder(Symbol("booleanField"), Symbol("booleanField")), "requires numeric type")
+
+    assertError(BitwiseAnd(Symbol("booleanField"), Symbol("booleanField")),
+      "requires integral type")
+    assertError(BitwiseOr(Symbol("booleanField"), Symbol("booleanField")), "requires integral type")
+    assertError(BitwiseXor(Symbol("booleanField"), Symbol("booleanField")),
+      "requires integral type")
   }
 
   test("check types for predicates") {
     // We will cast String to Double for binary comparison
-    assertSuccess(EqualTo('intField, 'stringField))
-    assertSuccess(EqualNullSafe('intField, 'stringField))
-    assertSuccess(LessThan('intField, 'stringField))
-    assertSuccess(LessThanOrEqual('intField, 'stringField))
-    assertSuccess(GreaterThan('intField, 'stringField))
-    assertSuccess(GreaterThanOrEqual('intField, 'stringField))
+    assertSuccess(EqualTo(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(EqualNullSafe(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(LessThan(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(LessThanOrEqual(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(GreaterThan(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(GreaterThanOrEqual(Symbol("intField"), Symbol("stringField")))
 
     // We will transform EqualTo with numeric and boolean types to CaseKeyWhen
-    assertSuccess(EqualTo('intField, 'booleanField))
-    assertSuccess(EqualNullSafe('intField, 'booleanField))
-
-    assertErrorForDifferingTypes(EqualTo('intField, 'mapField))
-    assertErrorForDifferingTypes(EqualNullSafe('intField, 'mapField))
-    assertErrorForDifferingTypes(LessThan('intField, 'booleanField))
-    assertErrorForDifferingTypes(LessThanOrEqual('intField, 'booleanField))
-    assertErrorForDifferingTypes(GreaterThan('intField, 'booleanField))
-    assertErrorForDifferingTypes(GreaterThanOrEqual('intField, 'booleanField))
-
-    assertError(EqualTo('mapField, 'mapField), "EqualTo does not support ordering on type map")
-    assertError(EqualNullSafe('mapField, 'mapField),
+    assertSuccess(EqualTo(Symbol("intField"), Symbol("booleanField")))
+    assertSuccess(EqualNullSafe(Symbol("intField"), Symbol("booleanField")))
+
+    assertErrorForDifferingTypes(EqualTo(Symbol("intField"), Symbol("mapField")))
+    assertErrorForDifferingTypes(EqualNullSafe(Symbol("intField"), Symbol("mapField")))
+    assertErrorForDifferingTypes(LessThan(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(LessThanOrEqual(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(GreaterThan(Symbol("intField"), Symbol("booleanField")))
+    assertErrorForDifferingTypes(GreaterThanOrEqual(Symbol("intField"), Symbol("booleanField")))
+
+    assertError(EqualTo(Symbol("mapField"), Symbol("mapField")),
+      "EqualTo does not support ordering on type map")
+    assertError(EqualNullSafe(Symbol("mapField"), Symbol("mapField")),
       "EqualNullSafe does not support ordering on type map")
-    assertError(LessThan('mapField, 'mapField),
+    assertError(LessThan(Symbol("mapField"), Symbol("mapField")),
       "LessThan does not support ordering on type map")
-    assertError(LessThanOrEqual('mapField, 'mapField),
+    assertError(LessThanOrEqual(Symbol("mapField"), Symbol("mapField")),
       "LessThanOrEqual does not support ordering on type map")
-    assertError(GreaterThan('mapField, 'mapField),
+    assertError(GreaterThan(Symbol("mapField"), Symbol("mapField")),
       "GreaterThan does not support ordering on type map")
-    assertError(GreaterThanOrEqual('mapField, 'mapField),
+    assertError(GreaterThanOrEqual(Symbol("mapField"), Symbol("mapField")),
       "GreaterThanOrEqual does not support ordering on type map")
 
-    assertError(If('intField, 'stringField, 'stringField),
+    assertError(If(Symbol("intField"), Symbol("stringField"), Symbol("stringField")),
       "type of predicate expression in If should be boolean")
-    assertErrorForDifferingTypes(If('booleanField, 'intField, 'booleanField))
+    assertErrorForDifferingTypes(
+      If(Symbol("booleanField"), Symbol("intField"), Symbol("booleanField")))
 
     assertError(
-      CaseWhen(Seq(('booleanField.attr, 'intField.attr), ('booleanField.attr, 'mapField.attr))),
+      CaseWhen(Seq((Symbol("booleanField").attr, Symbol("intField").attr),
+        (Symbol("booleanField").attr, Symbol("mapField").attr))),
       "THEN and ELSE expressions should all be same type or coercible to a common type")
     assertError(
-      CaseKeyWhen('intField, Seq('intField, 'stringField, 'intField, 'mapField)),
+      CaseKeyWhen(Symbol("intField"), Seq(Symbol("intField"), Symbol("stringField"),
+        Symbol("intField"), Symbol("mapField"))),
       "THEN and ELSE expressions should all be same type or coercible to a common type")
     assertError(
-      CaseWhen(Seq(('booleanField.attr, 'intField.attr), ('intField.attr, 'intField.attr))),
+      CaseWhen(Seq((Symbol("booleanField").attr, Symbol("intField").attr),
+        (Symbol("intField").attr, Symbol("intField").attr))),
       "WHEN expressions in CaseWhen should all be boolean type")
   }
 
@@ -141,30 +150,29 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
     // instead of from AggregateExpression, which is the wrapper of an AggregateFunction.
 
     // We will cast String to Double for sum and average
-    assertSuccess(Sum('stringField))
-    assertSuccess(Average('stringField))
-    assertSuccess(Min('arrayField))
-    assertSuccess(new EveryAgg('booleanField))
-    assertSuccess(new AnyAgg('booleanField))
-    assertSuccess(new SomeAgg('booleanField))
-
-    assertError(Min('mapField), "min does not support ordering on type")
-    assertError(Max('mapField), "max does not support ordering on type")
-    assertError(Sum('booleanField), "function sum requires numeric type")
-    assertError(Average('booleanField), "function average requires numeric type")
+    assertSuccess(Sum(Symbol("stringField")))
+    assertSuccess(Average(Symbol("stringField")))
+    assertSuccess(Min(Symbol("arrayField")))
+    assertSuccess(new BoolAnd(Symbol("booleanField")))
+    assertSuccess(new BoolOr(Symbol("booleanField")))
+
+    assertError(Min(Symbol("mapField")), "min does not support ordering on type")
+    assertError(Max(Symbol("mapField")), "max does not support ordering on type")
+    assertError(Sum(Symbol("booleanField")), "requires (numeric or interval) type")
+    assertError(Average(Symbol("booleanField")), "requires (numeric or interval) type")
   }
 
   test("check types for others") {
-    assertError(CreateArray(Seq('intField, 'booleanField)),
+    assertError(CreateArray(Seq(Symbol("intField"), Symbol("booleanField"))),
       "input to function array should all be the same type")
-    assertError(Coalesce(Seq('intField, 'booleanField)),
+    assertError(Coalesce(Seq(Symbol("intField"), Symbol("booleanField"))),
       "input to function coalesce should all be the same type")
     assertError(Coalesce(Nil), "function coalesce requires at least one argument")
     assertError(new Murmur3Hash(Nil), "function hash requires at least one argument")
     assertError(new XxHash64(Nil), "function xxhash64 requires at least one argument")
-    assertError(Explode('intField),
+    assertError(Explode(Symbol("intField")),
       "input to function explode should be array or map type")
-    assertError(PosExplode('intField),
+    assertError(PosExplode(Symbol("intField")),
       "input to function explode should be array or map type")
   }
 
@@ -175,7 +183,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
       CreateNamedStruct(Seq(1, "a", "b", 2.0)),
       "Only foldable string expressions are allowed to appear at odd position")
     assertError(
-      CreateNamedStruct(Seq('a.string.at(0), "a", "b", 2.0)),
+      CreateNamedStruct(Seq(Symbol("a").string.at(0), "a", "b", 2.0)),
       "Only foldable string expressions are allowed to appear at odd position")
     assertError(
       CreateNamedStruct(Seq(Literal.create(null, StringType), "a")),
@@ -185,36 +193,42 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite {
   test("check types for CreateMap") {
     assertError(CreateMap(Seq("a", "b", 2.0)), "even number of arguments")
     assertError(
-      CreateMap(Seq('intField, 'stringField, 'booleanField, 'stringField)),
+      CreateMap(Seq(Symbol("intField"), Symbol("stringField"),
+        Symbol("booleanField"), Symbol("stringField"))),
       "keys of function map should all be the same type")
     assertError(
-      CreateMap(Seq('stringField, 'intField, 'stringField, 'booleanField)),
+      CreateMap(Seq(Symbol("stringField"), Symbol("intField"),
+        Symbol("stringField"), Symbol("booleanField"))),
       "values of function map should all be the same type")
   }
 
   test("check types for ROUND/BROUND") {
     assertSuccess(Round(Literal(null), Literal(null)))
-    assertSuccess(Round('intField, Literal(1)))
+    assertSuccess(Round(Symbol("intField"), Literal(1)))
 
-    assertError(Round('intField, 'intField), "Only foldable Expression is allowed")
-    assertError(Round('intField, 'booleanField), "requires int type")
-    assertError(Round('intField, 'mapField), "requires int type")
-    assertError(Round('booleanField, 'intField), "requires numeric type")
+    assertError(Round(Symbol("intField"), Symbol("intField")),
+      "Only foldable Expression is allowed")
+    assertError(Round(Symbol("intField"), Symbol("booleanField")), "requires int type")
+    assertError(Round(Symbol("intField"), Symbol("mapField")), "requires int type")
+    assertError(Round(Symbol("booleanField"), Symbol("intField")), "requires numeric type")
 
     assertSuccess(BRound(Literal(null), Literal(null)))
-    assertSuccess(BRound('intField, Literal(1)))
+    assertSuccess(BRound(Symbol("intField"), Literal(1)))
 
-    assertError(BRound('intField, 'intField), "Only foldable Expression is allowed")
-    assertError(BRound('intField, 'booleanField), "requires int type")
-    assertError(BRound('intField, 'mapField), "requires int type")
-    assertError(BRound('booleanField, 'intField), "requires numeric type")
+    assertError(BRound(Symbol("intField"), Symbol("intField")),
+      "Only foldable Expression is allowed")
+    assertError(BRound(Symbol("intField"), Symbol("booleanField")), "requires int type")
+    assertError(BRound(Symbol("intField"), Symbol("mapField")), "requires int type")
+    assertError(BRound(Symbol("booleanField"), Symbol("intField")), "requires numeric type")
   }
 
   test("check types for Greatest/Least") {
     for (operator <- Seq[(Seq[Expression] => Expression)](Greatest, Least)) {
-      assertError(operator(Seq('booleanField)), "requires at least two arguments")
-      assertError(operator(Seq('intField, 'stringField)), "should all have the same type")
-      assertError(operator(Seq('mapField, 'mapField)), "does not support ordering")
+      assertError(operator(Seq(Symbol("booleanField"))), "requires at least two arguments")
+      assertError(operator(Seq(Symbol("intField"), Symbol("stringField"))),
+        "should all have the same type")
+      assertError(operator(Seq(Symbol("mapField"), Symbol("mapField"))),
+        "does not support ordering")
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
index 474e58a335e7c..5e66c038738a4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
@@ -17,29 +17,19 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.log4j.{AppenderSkeleton, Level}
-import org.apache.log4j.spi.LoggingEvent
+import org.apache.log4j.Level
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, Literal, SortOrder}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.Inner
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types.IntegerType
 
 class ResolveHintsSuite extends AnalysisTest {
   import org.apache.spark.sql.catalyst.analysis.TestRelations._
 
-  class MockAppender extends AppenderSkeleton {
-    val loggingEvents = new ArrayBuffer[LoggingEvent]()
-
-    override def append(loggingEvent: LoggingEvent): Unit = loggingEvents.append(loggingEvent)
-    override def close(): Unit = {}
-    override def requiresLayout(): Boolean = false
-  }
-
   test("invalid hints should be ignored") {
     checkAnalysis(
       UnresolvedHint("some_random_hint_that_does_not_exist", Seq("TaBlE"), table("TaBlE")),
@@ -150,28 +140,90 @@ class ResolveHintsSuite extends AnalysisTest {
       UnresolvedHint("RePARTITion", Seq(Literal(200)), table("TaBlE")),
       Repartition(numPartitions = 200, shuffle = true, child = testRelation))
 
-    val errMsgCoal = "COALESCE Hint expects a partition number as parameter"
+    val errMsg = "COALESCE Hint expects a partition number as a parameter"
+
     assertAnalysisError(
       UnresolvedHint("COALESCE", Seq.empty, table("TaBlE")),
-      Seq(errMsgCoal))
+      Seq(errMsg))
     assertAnalysisError(
       UnresolvedHint("COALESCE", Seq(Literal(10), Literal(false)), table("TaBlE")),
-      Seq(errMsgCoal))
+      Seq(errMsg))
     assertAnalysisError(
       UnresolvedHint("COALESCE", Seq(Literal(1.0)), table("TaBlE")),
-      Seq(errMsgCoal))
+      Seq(errMsg))
 
-    val errMsgRepa = "REPARTITION Hint expects a partition number as parameter"
-    assertAnalysisError(
+    checkAnalysis(
+      UnresolvedHint("RePartition", Seq(Literal(10), UnresolvedAttribute("a")), table("TaBlE")),
+      RepartitionByExpression(Seq(AttributeReference("a", IntegerType)()), testRelation, 10))
+
+    checkAnalysis(
+      UnresolvedHint("REPARTITION", Seq(Literal(10), UnresolvedAttribute("a")), table("TaBlE")),
+      RepartitionByExpression(Seq(AttributeReference("a", IntegerType)()), testRelation, 10))
+
+    checkAnalysis(
       UnresolvedHint("REPARTITION", Seq(UnresolvedAttribute("a")), table("TaBlE")),
-      Seq(errMsgRepa))
+      RepartitionByExpression(
+        Seq(AttributeReference("a", IntegerType)()), testRelation, conf.numShufflePartitions))
+
+    val e = intercept[IllegalArgumentException] {
+      checkAnalysis(
+        UnresolvedHint("REPARTITION",
+          Seq(SortOrder(AttributeReference("a", IntegerType)(), Ascending)),
+          table("TaBlE")),
+        RepartitionByExpression(
+          Seq(SortOrder(AttributeReference("a", IntegerType)(), Ascending)), testRelation, 10)
+      )
+    }
+    e.getMessage.contains("For range partitioning use REPARTITION_BY_RANGE instead")
+
+    checkAnalysis(
+      UnresolvedHint(
+        "REPARTITION_BY_RANGE", Seq(Literal(10), UnresolvedAttribute("a")), table("TaBlE")),
+      RepartitionByExpression(
+        Seq(SortOrder(AttributeReference("a", IntegerType)(), Ascending)), testRelation, 10))
+
+    checkAnalysis(
+      UnresolvedHint(
+        "REPARTITION_BY_RANGE", Seq(UnresolvedAttribute("a")), table("TaBlE")),
+      RepartitionByExpression(
+        Seq(SortOrder(AttributeReference("a", IntegerType)(), Ascending)),
+        testRelation, conf.numShufflePartitions))
+
+    val errMsg2 = "REPARTITION Hint parameter should include columns, but"
+
     assertAnalysisError(
       UnresolvedHint("REPARTITION", Seq(Literal(true)), table("TaBlE")),
-      Seq(errMsgRepa))
+      Seq(errMsg2))
+
+    assertAnalysisError(
+      UnresolvedHint("REPARTITION",
+        Seq(Literal(1.0), AttributeReference("a", IntegerType)()),
+        table("TaBlE")),
+      Seq(errMsg2))
+
+    val errMsg3 = "REPARTITION_BY_RANGE Hint parameter should include columns, but"
+
+    assertAnalysisError(
+      UnresolvedHint("REPARTITION_BY_RANGE",
+        Seq(Literal(1.0), AttributeReference("a", IntegerType)()),
+        table("TaBlE")),
+      Seq(errMsg3))
+
+    assertAnalysisError(
+      UnresolvedHint("REPARTITION_BY_RANGE",
+        Seq(Literal(10), Literal(10)),
+        table("TaBlE")),
+      Seq(errMsg3))
+
+    assertAnalysisError(
+      UnresolvedHint("REPARTITION_BY_RANGE",
+        Seq(Literal(10), Literal(10), UnresolvedAttribute("a")),
+        table("TaBlE")),
+      Seq(errMsg3))
   }
 
   test("log warnings for invalid hints") {
-    val logAppender = new MockAppender()
+    val logAppender = new LogAppender("invalid hints")
     withLogAppender(logAppender) {
       checkAnalysis(
         UnresolvedHint("unknown_hint", Seq("TaBlE"), table("TaBlE")),
@@ -182,4 +234,11 @@ class ResolveHintsSuite extends AnalysisTest {
       e => e.getLevel == Level.WARN &&
         e.getRenderedMessage.contains("Unrecognized hint: unknown_hint")))
   }
+
+  test("SPARK-30003: Do not throw stack overflow exception in non-root unknown hint resolution") {
+    checkAnalysis(
+      Project(testRelation.output, UnresolvedHint("unknown_hint", Seq("TaBlE"), table("TaBlE"))),
+      Project(testRelation.output, testRelation),
+      caseSensitive = false)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
index 74a8590b5eefe..5aa80e1a9bd7f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
@@ -20,7 +20,8 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.{InSubquery, ListQuery}
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, Project}
+import org.apache.spark.sql.catalyst.plans.Inner
+import org.apache.spark.sql.catalyst.plans.logical._
 
 /**
  * Unit tests for [[ResolveSubquery]].
@@ -29,8 +30,10 @@ class ResolveSubquerySuite extends AnalysisTest {
 
   val a = 'a.int
   val b = 'b.int
+  val c = 'c.int
   val t1 = LocalRelation(a)
   val t2 = LocalRelation(b)
+  val t3 = LocalRelation(c)
 
   test("SPARK-17251 Improve `OuterReference` to be `NamedExpression`") {
     val expr = Filter(
@@ -41,4 +44,13 @@ class ResolveSubquerySuite extends AnalysisTest {
     assert(m.contains(
       "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses"))
   }
+
+  test("SPARK-29145 Support subquery in join condition") {
+    val expr = Join(t1,
+      t2,
+      Inner,
+      Some(InSubquery(Seq(a), ListQuery(Project(Seq(UnresolvedAttribute("c")), t3)))),
+      JoinHint.NONE)
+    assertAnalysisSuccess(expr)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
new file mode 100644
index 0000000000000..eed962cd0f69d
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import java.io.File
+
+import org.mockito.ArgumentMatchers.any
+import org.mockito.Mockito._
+import org.mockito.invocation.InvocationOnMock
+import org.scalatest.Matchers
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType, ExternalCatalog, InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, V1Table}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+class TableLookupCacheSuite extends AnalysisTest with Matchers {
+  private def getAnalyzer(externalCatalog: ExternalCatalog, databasePath: File): Analyzer = {
+    val conf = new SQLConf()
+    val v1Catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin, conf)
+    v1Catalog.createDatabase(
+      CatalogDatabase("default", "", databasePath.toURI, Map.empty),
+      ignoreIfExists = false)
+    v1Catalog.createTable(
+      CatalogTable(
+        TableIdentifier("t1", Some("default")),
+        CatalogTableType.MANAGED,
+        CatalogStorageFormat.empty,
+        StructType(Seq(StructField("a", IntegerType)))),
+      ignoreIfExists = false)
+    val v2Catalog = new InMemoryTableCatalog {
+      override def loadTable(ident: Identifier): Table = {
+        V1Table(externalCatalog.getTable("default", ident.name))
+      }
+      override def name: String = CatalogManager.SESSION_CATALOG_NAME
+    }
+    val catalogManager = mock(classOf[CatalogManager])
+    when(catalogManager.catalog(any())).thenAnswer((invocation: InvocationOnMock) => {
+      invocation.getArgument[String](0) match {
+        case CatalogManager.SESSION_CATALOG_NAME => v2Catalog
+        case name =>
+          throw new CatalogNotFoundException(s"No such catalog: $name")
+      }
+    })
+    when(catalogManager.v1SessionCatalog).thenReturn(v1Catalog)
+    when(catalogManager.currentCatalog).thenReturn(v2Catalog)
+    when(catalogManager.currentNamespace).thenReturn(Array("default"))
+
+    new Analyzer(catalogManager, conf)
+  }
+
+  test("table lookups to external catalog are cached") {
+    withTempDir { tempDir =>
+      val inMemoryCatalog = new InMemoryCatalog
+      val catalog = spy(inMemoryCatalog)
+      val analyzer = getAnalyzer(catalog, tempDir)
+      reset(catalog)
+      analyzer.execute(table("t1").join(table("t1")).join(table("t1")))
+      verify(catalog, times(1)).getTable("default", "t1")
+    }
+  }
+
+  test("table lookups via nested views are cached") {
+    withTempDir { tempDir =>
+      val inMemoryCatalog = new InMemoryCatalog
+      val catalog = spy(inMemoryCatalog)
+      val analyzer = getAnalyzer(catalog, tempDir)
+      val viewDef = CatalogTable(
+        TableIdentifier("view", Some("default")),
+        CatalogTableType.VIEW,
+        CatalogStorageFormat.empty,
+        StructType(Seq(StructField("a", IntegerType, nullable = true))),
+        viewText = Some("select * from t1")
+      )
+      catalog.createTable(viewDef, ignoreIfExists = false)
+      reset(catalog)
+      analyzer.execute(table("t1").join(table("view")).join(table("view")))
+      verify(catalog, times(1)).getTable("default", "t1")
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 949bb30d15503..0d6f9bcedb6a2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -690,7 +690,8 @@ class TypeCoercionSuite extends AnalysisTest {
       Some(new StructType().add("a", StringType)))
   }
 
-  private def ruleTest(rule: Rule[LogicalPlan], initial: Expression, transformed: Expression) {
+  private def ruleTest(rule: Rule[LogicalPlan],
+      initial: Expression, transformed: Expression): Unit = {
     ruleTest(Seq(rule), initial, transformed)
   }
 
@@ -1400,38 +1401,6 @@ class TypeCoercionSuite extends AnalysisTest {
     }
   }
 
-  test("rule for date/timestamp operations") {
-    val dateTimeOperations = TypeCoercion.DateTimeOperations
-    val date = Literal(new java.sql.Date(0L))
-    val timestamp = Literal(new Timestamp(0L))
-    val interval = Literal(new CalendarInterval(0, 0))
-    val str = Literal("2015-01-01")
-    val intValue = Literal(0, IntegerType)
-
-    ruleTest(dateTimeOperations, Add(date, interval), Cast(TimeAdd(date, interval), DateType))
-    ruleTest(dateTimeOperations, Add(interval, date), Cast(TimeAdd(date, interval), DateType))
-    ruleTest(dateTimeOperations, Add(timestamp, interval),
-      Cast(TimeAdd(timestamp, interval), TimestampType))
-    ruleTest(dateTimeOperations, Add(interval, timestamp),
-      Cast(TimeAdd(timestamp, interval), TimestampType))
-    ruleTest(dateTimeOperations, Add(str, interval), Cast(TimeAdd(str, interval), StringType))
-    ruleTest(dateTimeOperations, Add(interval, str), Cast(TimeAdd(str, interval), StringType))
-
-    ruleTest(dateTimeOperations, Subtract(date, interval), Cast(TimeSub(date, interval), DateType))
-    ruleTest(dateTimeOperations, Subtract(timestamp, interval),
-      Cast(TimeSub(timestamp, interval), TimestampType))
-    ruleTest(dateTimeOperations, Subtract(str, interval), Cast(TimeSub(str, interval), StringType))
-
-    // interval operations should not be effected
-    ruleTest(dateTimeOperations, Add(interval, interval), Add(interval, interval))
-    ruleTest(dateTimeOperations, Subtract(interval, interval), Subtract(interval, interval))
-
-    ruleTest(dateTimeOperations, Add(date, intValue), DateAdd(date, intValue))
-    ruleTest(dateTimeOperations, Add(intValue, date), DateAdd(date, intValue))
-    ruleTest(dateTimeOperations, Subtract(date, intValue), DateSub(date, intValue))
-    ruleTest(dateTimeOperations, Subtract(date, date), DateDiff(date, date))
-  }
-
   /**
    * There are rules that need to not fire before child expressions get resolved.
    * We use this test to make sure those rules do not fire early.
@@ -1456,7 +1425,7 @@ class TypeCoercionSuite extends AnalysisTest {
 
   test("SPARK-15776 Divide expression's dataType should be casted to Double or Decimal " +
     "in aggregation function like sum") {
-    val rules = Seq(FunctionArgumentConversion, Division(conf))
+    val rules = Seq(FunctionArgumentConversion, Division)
     // Casts Integer to Double
     ruleTest(rules, sum(Divide(4, 3)), sum(Divide(Cast(4, DoubleType), Cast(3, DoubleType))))
     // Left expression is Double, right expression is Int. Another rule ImplicitTypeCasts will
@@ -1475,35 +1444,12 @@ class TypeCoercionSuite extends AnalysisTest {
   }
 
   test("SPARK-17117 null type coercion in divide") {
-    val rules = Seq(FunctionArgumentConversion, Division(conf), ImplicitTypeCasts)
+    val rules = Seq(FunctionArgumentConversion, Division, ImplicitTypeCasts)
     val nullLit = Literal.create(null, NullType)
     ruleTest(rules, Divide(1L, nullLit), Divide(Cast(1L, DoubleType), Cast(nullLit, DoubleType)))
     ruleTest(rules, Divide(nullLit, 1L), Divide(Cast(nullLit, DoubleType), Cast(1L, DoubleType)))
   }
 
-  test("SPARK-28395 Division operator support integral division") {
-    val rules = Seq(FunctionArgumentConversion, Division(conf))
-    Seq(true, false).foreach { preferIntegralDivision =>
-      withSQLConf(SQLConf.PREFER_INTEGRAL_DIVISION.key -> s"$preferIntegralDivision") {
-        val result1 = if (preferIntegralDivision) {
-          IntegralDivide(1L, 1L)
-        } else {
-          Divide(Cast(1L, DoubleType), Cast(1L, DoubleType))
-        }
-        ruleTest(rules, Divide(1L, 1L), result1)
-        val result2 = if (preferIntegralDivision) {
-          IntegralDivide(1, Cast(1, ShortType))
-        } else {
-          Divide(Cast(1, DoubleType), Cast(Cast(1, ShortType), DoubleType))
-        }
-        ruleTest(rules, Divide(1, Cast(1, ShortType)), result2)
-
-        ruleTest(rules, Divide(1L, 1D), Divide(Cast(1L, DoubleType), Cast(1D, DoubleType)))
-        ruleTest(rules, Divide(Decimal(1.1), 1L), Divide(Decimal(1.1), 1L))
-      }
-    }
-  }
-
   test("binary comparison with string promotion") {
     val rule = TypeCoercion.PromoteStrings(conf)
     ruleTest(rule,
@@ -1519,26 +1465,15 @@ class TypeCoercionSuite extends AnalysisTest {
       GreaterThan(Literal("1.5"), Literal(BigDecimal("0.5"))),
       GreaterThan(Cast(Literal("1.5"), DoubleType), Cast(Literal(BigDecimal("0.5")),
         DoubleType)))
-    Seq(true, false).foreach { convertToTS =>
-      withSQLConf(
-        SQLConf.COMPARE_DATE_TIMESTAMP_IN_TIMESTAMP.key -> convertToTS.toString) {
-        val date0301 = Literal(java.sql.Date.valueOf("2017-03-01"))
-        val timestamp0301000000 = Literal(Timestamp.valueOf("2017-03-01 00:00:00"))
-        val timestamp0301000001 = Literal(Timestamp.valueOf("2017-03-01 00:00:01"))
-        if (convertToTS) {
-          // `Date` should be treated as timestamp at 00:00:00 See SPARK-23549
-          ruleTest(rule, EqualTo(date0301, timestamp0301000000),
-            EqualTo(Cast(date0301, TimestampType), timestamp0301000000))
-          ruleTest(rule, LessThan(date0301, timestamp0301000001),
-            LessThan(Cast(date0301, TimestampType), timestamp0301000001))
-        } else {
-          ruleTest(rule, LessThan(date0301, timestamp0301000000),
-            LessThan(Cast(date0301, StringType), Cast(timestamp0301000000, StringType)))
-          ruleTest(rule, LessThan(date0301, timestamp0301000001),
-            LessThan(Cast(date0301, StringType), Cast(timestamp0301000001, StringType)))
-        }
-      }
-    }
+    // Checks that dates/timestamps are not promoted to strings
+    val date0301 = Literal(java.sql.Date.valueOf("2017-03-01"))
+    val timestamp0301000000 = Literal(Timestamp.valueOf("2017-03-01 00:00:00"))
+    val timestamp0301000001 = Literal(Timestamp.valueOf("2017-03-01 00:00:01"))
+    // `Date` should be treated as timestamp at 00:00:00 See SPARK-23549
+    ruleTest(rule, EqualTo(date0301, timestamp0301000000),
+      EqualTo(Cast(date0301, TimestampType), timestamp0301000000))
+    ruleTest(rule, LessThan(date0301, timestamp0301000001),
+      LessThan(Cast(date0301, TimestampType), timestamp0301000001))
   }
 
   test("cast WindowFrame boundaries to the type they operate upon") {
@@ -1576,6 +1511,20 @@ class TypeCoercionSuite extends AnalysisTest {
         SpecifiedWindowFrame(RangeFrame, CurrentRow, UnboundedFollowing))
     )
   }
+
+  test("SPARK-29000: skip to handle decimals in ImplicitTypeCasts") {
+    ruleTest(TypeCoercion.ImplicitTypeCasts,
+      Multiply(CaseWhen(Seq((EqualTo(1, 2), Cast(1, DecimalType(34, 24)))),
+        Cast(100, DecimalType(34, 24))), Literal(1)),
+      Multiply(CaseWhen(Seq((EqualTo(1, 2), Cast(1, DecimalType(34, 24)))),
+        Cast(100, DecimalType(34, 24))), Literal(1)))
+
+    ruleTest(TypeCoercion.ImplicitTypeCasts,
+      Multiply(CaseWhen(Seq((EqualTo(1, 2), Cast(1, DecimalType(34, 24)))),
+        Cast(100, DecimalType(34, 24))), Cast(1, IntegerType)),
+      Multiply(CaseWhen(Seq((EqualTo(1, 2), Cast(1, DecimalType(34, 24)))),
+        Cast(100, DecimalType(34, 24))), Cast(1, IntegerType)))
+  }
 }
 
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 0fe646edb340e..3ec6fdeedd4b8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -643,6 +643,153 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
       null,
       new TestStreamingRelationV2(attribute)), OutputMode.Append())
 
+  // streaming aggregation
+  {
+    assertPassOnGlobalWatermarkLimit(
+      "single streaming aggregation in Append mode",
+      streamRelation.groupBy("a")(count("*")),
+      OutputMode.Append())
+
+    assertFailOnGlobalWatermarkLimit(
+      "chained streaming aggregations in Append mode",
+      streamRelation.groupBy("a")(count("*")).groupBy()(count("*")),
+      OutputMode.Append())
+
+    Seq(Inner, LeftOuter, RightOuter).foreach { joinType =>
+      val plan = streamRelation.join(streamRelation.groupBy("a")(count("*")), joinType = joinType)
+      assertFailOnGlobalWatermarkLimit(
+        s"$joinType join after streaming aggregation in Append mode",
+        streamRelation.join(streamRelation.groupBy("a")(count("*")), joinType = joinType),
+        OutputMode.Append())
+    }
+
+    assertFailOnGlobalWatermarkLimit(
+      "deduplicate after streaming aggregation in Append mode",
+      Deduplicate(Seq(attribute), streamRelation.groupBy("a")(count("*"))),
+      OutputMode.Append())
+
+    assertFailOnGlobalWatermarkLimit(
+      "FlatMapGroupsWithState after streaming aggregation in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null,
+        streamRelation.groupBy("a")(count("*"))),
+      OutputMode.Append())
+  }
+
+  // stream-stream join
+  // stream-stream inner join doesn't emit late rows, whereas outer joins could
+  Seq((Inner, false), (LeftOuter, true), (RightOuter, true)).map { case (joinType, expectFailure) =>
+    assertPassOnGlobalWatermarkLimit(
+      s"single $joinType join in Append mode",
+      streamRelation.join(streamRelation, joinType = RightOuter,
+        condition = Some(attributeWithWatermark === attribute)),
+      OutputMode.Append())
+
+    testGlobalWatermarkLimit(
+      s"streaming aggregation after stream-stream $joinType join in Append mode",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attributeWithWatermark === attribute))
+        .groupBy("a")(count("*")),
+      OutputMode.Append(),
+      expectFailure = expectFailure)
+
+    Seq(Inner, LeftOuter, RightOuter).map { joinType2 =>
+      testGlobalWatermarkLimit(
+        s"streaming-stream $joinType2 after stream-stream $joinType join in Append mode",
+        streamRelation.join(
+          streamRelation.join(streamRelation, joinType = joinType,
+            condition = Some(attributeWithWatermark === attribute)),
+          joinType = joinType2,
+          condition = Some(attributeWithWatermark === attribute)),
+        OutputMode.Append(),
+        expectFailure = expectFailure)
+    }
+
+    testGlobalWatermarkLimit(
+      s"FlatMapGroupsWithState after stream-stream $joinType join in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null,
+        streamRelation.join(streamRelation, joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute))),
+      OutputMode.Append(),
+      expectFailure = expectFailure)
+
+    testGlobalWatermarkLimit(
+      s"deduplicate after stream-stream $joinType join in Append mode",
+      Deduplicate(Seq(attribute), streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attributeWithWatermark === attribute))),
+      OutputMode.Append(),
+      expectFailure = expectFailure)
+  }
+
+  // FlatMapGroupsWithState
+  {
+    assertPassOnGlobalWatermarkLimit(
+      "single FlatMapGroupsWithState in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null, streamRelation),
+      OutputMode.Append())
+
+    assertFailOnGlobalWatermarkLimit(
+      "streaming aggregation after FlatMapGroupsWithState in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null, streamRelation).groupBy("*")(count("*")),
+      OutputMode.Append())
+
+    Seq(Inner, LeftOuter, RightOuter).map { joinType =>
+      assertFailOnGlobalWatermarkLimit(
+        s"stream-stream $joinType after FlatMapGroupsWithState in Append mode",
+        streamRelation.join(
+          FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+          isMapGroupsWithState = false, null, streamRelation), joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute)),
+        OutputMode.Append())
+    }
+
+    assertFailOnGlobalWatermarkLimit(
+      "FlatMapGroupsWithState after FlatMapGroupsWithState in Append mode",
+      FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null,
+        FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+          isMapGroupsWithState = false, null, streamRelation)),
+      OutputMode.Append())
+
+    assertFailOnGlobalWatermarkLimit(
+      s"deduplicate after FlatMapGroupsWithState in Append mode",
+      Deduplicate(Seq(attribute),
+        FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+          isMapGroupsWithState = false, null, streamRelation)),
+      OutputMode.Append())
+  }
+
+  // deduplicate
+  {
+    assertPassOnGlobalWatermarkLimit(
+      "streaming aggregation after deduplicate in Append mode",
+      Deduplicate(Seq(attribute), streamRelation).groupBy("a")(count("*")),
+      OutputMode.Append())
+
+    Seq(Inner, LeftOuter, RightOuter).map { joinType =>
+      assertPassOnGlobalWatermarkLimit(
+        s"$joinType join after deduplicate in Append mode",
+        streamRelation.join(Deduplicate(Seq(attribute), streamRelation), joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute)),
+        OutputMode.Append())
+    }
+
+    assertPassOnGlobalWatermarkLimit(
+      "FlatMapGroupsWithState after deduplicate in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null,
+        Deduplicate(Seq(attribute), streamRelation)),
+      OutputMode.Append())
+  }
+
   /*
     =======================================================================================
                                      TESTING FUNCTIONS
@@ -839,6 +986,40 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     }
   }
 
+
+  def assertPassOnGlobalWatermarkLimit(
+      testNamePostfix: String,
+      plan: LogicalPlan,
+      outputMode: OutputMode): Unit = {
+    testGlobalWatermarkLimit(testNamePostfix, plan, outputMode, expectFailure = false)
+  }
+
+  def assertFailOnGlobalWatermarkLimit(
+      testNamePostfix: String,
+      plan: LogicalPlan,
+      outputMode: OutputMode): Unit = {
+    testGlobalWatermarkLimit(testNamePostfix, plan, outputMode, expectFailure = true)
+  }
+
+  def testGlobalWatermarkLimit(
+      testNamePostfix: String,
+      plan: LogicalPlan,
+      outputMode: OutputMode,
+      expectFailure: Boolean): Unit = {
+    test(s"Global watermark limit - $testNamePostfix") {
+      if (expectFailure) {
+        val e = intercept[AnalysisException] {
+          UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
+            wrapInStreaming(plan), outputMode, failWhenDetected = true)
+        }
+        assert(e.message.contains("Detected pattern of possible 'correctness' issue"))
+      } else {
+        UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
+          wrapInStreaming(plan), outputMode, failWhenDetected = true)
+      }
+    }
+  }
+
   /**
    * Test whether the body of code will fail. If it does fail, then check if it has expected
    * messages.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogManagerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogManagerSuite.scala
deleted file mode 100644
index f7f190136bfce..0000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogManagerSuite.scala
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.catalog
-
-import java.util
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalog.v2.{CatalogManager, NamespaceChange, SupportsNamespaces}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
-
-class CatalogManagerSuite extends SparkFunSuite {
-
-  test("CatalogManager should reflect the changes of default catalog") {
-    val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf)
-    assert(catalogManager.currentCatalog.isEmpty)
-    assert(catalogManager.currentNamespace.sameElements(Array("default")))
-
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    conf.setConfString(SQLConf.DEFAULT_V2_CATALOG.key, "dummy")
-
-    // The current catalog should be changed if the default catalog is set.
-    assert(catalogManager.currentCatalog == Some("dummy"))
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
-  }
-
-  test("CatalogManager should keep the current catalog once set") {
-    val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf)
-    assert(catalogManager.currentCatalog.isEmpty)
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy")
-    assert(catalogManager.currentCatalog == Some("dummy"))
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
-
-    conf.setConfString("spark.sql.catalog.dummy2", classOf[DummyCatalog].getName)
-    conf.setConfString(SQLConf.DEFAULT_V2_CATALOG.key, "dummy2")
-    // The current catalog shouldn't be changed if it's set before.
-    assert(catalogManager.currentCatalog == Some("dummy"))
-  }
-
-  test("current namespace should be updated when switching current catalog") {
-    val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf)
-    catalogManager.setCurrentNamespace(Array("abc"))
-    assert(catalogManager.currentNamespace.sameElements(Array("abc")))
-
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy")
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
-  }
-}
-
-class DummyCatalog extends SupportsNamespaces {
-  override def defaultNamespace(): Array[String] = Array("a", "b")
-
-  override def listNamespaces(): Array[Array[String]] = {
-    throw new UnsupportedOperationException
-  }
-  override def listNamespaces(namespace: Array[String]): Array[Array[String]] = {
-    throw new UnsupportedOperationException
-  }
-  override def loadNamespaceMetadata(namespace: Array[String]): util.Map[String, String] = {
-    throw new UnsupportedOperationException
-  }
-  override def createNamespace(
-      namespace: Array[String], metadata: util.Map[String, String]): Unit = {
-    throw new UnsupportedOperationException
-  }
-  override def alterNamespace(namespace: Array[String], changes: NamespaceChange*): Unit = {
-    throw new UnsupportedOperationException
-  }
-  override def dropNamespace(namespace: Array[String]): Boolean = {
-    throw new UnsupportedOperationException
-  }
-  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
-  override def name(): String = "dummy"
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 6b1c35094e4a4..55712d0da518d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, N
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -143,8 +144,8 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     // Note: alter properties here because Hive does not support altering other fields
     catalog.alterDatabase(db1.copy(properties = Map("k" -> "v3", "good" -> "true")))
     val newDb1 = catalog.getDatabase("db1")
-    assert(db1.properties.isEmpty)
-    assert(newDb1.properties.size == 2)
+    assert((db1.properties -- Seq(PROP_OWNER)).isEmpty)
+    assert((newDb1.properties -- Seq(PROP_OWNER)).size == 2)
     assert(newDb1.properties.get("k") == Some("v3"))
     assert(newDb1.properties.get("good") == Some("true"))
   }
@@ -981,7 +982,6 @@ abstract class CatalogTestUtils {
     catalog.createDatabase(newDb("db3"), ignoreIfExists = false)
     catalog.createTable(newTable("tbl1", "db2"), ignoreIfExists = false)
     catalog.createTable(newTable("tbl2", "db2"), ignoreIfExists = false)
-    catalog.createTable(newView("view1", Some("db3")), ignoreIfExists = false)
     catalog.createPartitions("db2", "tbl2", Seq(part1, part2), ignoreIfExists = false)
     catalog.createFunction("db2", newFunc("func1", Some("db2")))
     catalog
@@ -1013,11 +1013,11 @@ abstract class CatalogTestUtils {
   }
 
   def newView(
+      db: String,
       name: String,
-      database: Option[String] = None): CatalogTable = {
-    val viewDefaultDatabase = database.getOrElse("default")
+      props: Map[String, String]): CatalogTable = {
     CatalogTable(
-      identifier = TableIdentifier(name, database),
+      identifier = TableIdentifier(name, Some(db)),
       tableType = CatalogTableType.VIEW,
       storage = CatalogStorageFormat.empty,
       schema = new StructType()
@@ -1026,7 +1026,7 @@ abstract class CatalogTestUtils {
         .add("a", "int")
         .add("b", "string"),
       viewText = Some("SELECT * FROM tbl1"),
-      properties = Map(CatalogTable.VIEW_DEFAULT_DATABASE -> viewDefaultDatabase))
+      properties = props)
   }
 
   def newFunc(name: String, database: Option[String] = None): CatalogFunction = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 59fb941f41f7e..0d9e2f61e812a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -18,11 +18,13 @@
 package org.apache.spark.sql.catalyst.catalog
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{AliasIdentifier, FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.{Range, SubqueryAlias, View}
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -87,7 +89,7 @@ abstract class SessionCatalogSuite extends AnalysisTest {
     }
   }
 
-  def testInvalidName(func: (String) => Unit) {
+  def testInvalidName(func: (String) => Unit): Unit = {
     // scalastyle:off
     // non ascii characters are not allowed in the source code, so we disable the scalastyle.
     val name = "砖"
@@ -207,8 +209,8 @@ abstract class SessionCatalogSuite extends AnalysisTest {
       // Note: alter properties here because Hive does not support altering other fields
       catalog.alterDatabase(db1.copy(properties = Map("k" -> "v3", "good" -> "true")))
       val newDb1 = catalog.getDatabaseMetadata("db1")
-      assert(db1.properties.isEmpty)
-      assert(newDb1.properties.size == 2)
+      assert((db1.properties -- Seq(PROP_OWNER)).isEmpty)
+      assert((newDb1.properties -- Seq(PROP_OWNER)).size == 2)
       assert(newDb1.properties.get("k") == Some("v3"))
       assert(newDb1.properties.get("good") == Some("true"))
     }
@@ -620,10 +622,16 @@ abstract class SessionCatalogSuite extends AnalysisTest {
 
   test("look up view relation") {
     withBasicCatalog { catalog =>
+      val props = CatalogTable.catalogAndNamespaceToProps("cat1", Seq("ns1"))
+      catalog.createTable(
+        newView("db3", "view1", props),
+        ignoreIfExists = false)
       val metadata = catalog.externalCatalog.getTable("db3", "view1")
-      catalog.setCurrentDatabase("default")
-      // Look up a view.
       assert(metadata.viewText.isDefined)
+      assert(metadata.viewCatalogAndNamespace == Seq("cat1", "ns1"))
+
+      // Look up a view.
+      catalog.setCurrentDatabase("default")
       val view = View(desc = metadata, output = metadata.schema.toAttributes,
         child = CatalystSqlParser.parsePlan(metadata.viewText.get))
       comparePlans(catalog.lookupRelation(TableIdentifier("view1", Some("db3"))),
@@ -635,6 +643,22 @@ abstract class SessionCatalogSuite extends AnalysisTest {
     }
   }
 
+  test("look up view created before Spark 3.0") {
+    withBasicCatalog { catalog =>
+      val oldView = newView("db3", "view2", Map(CatalogTable.VIEW_DEFAULT_DATABASE -> "db2"))
+      catalog.createTable(oldView, ignoreIfExists = false)
+
+      val metadata = catalog.externalCatalog.getTable("db3", "view2")
+      assert(metadata.viewText.isDefined)
+      assert(metadata.viewCatalogAndNamespace == Seq(CatalogManager.SESSION_CATALOG_NAME, "db2"))
+
+      val view = View(desc = metadata, output = metadata.schema.toAttributes,
+        child = CatalystSqlParser.parsePlan(metadata.viewText.get))
+      comparePlans(catalog.lookupRelation(TableIdentifier("view2", Some("db3"))),
+        SubqueryAlias("view2", "db3", view))
+    }
+  }
+
   test("table exists") {
     withBasicCatalog { catalog =>
       assert(catalog.tableExists(TableIdentifier("tbl1", Some("db2"))))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/LookupCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/LookupCatalogSuite.scala
deleted file mode 100644
index 229ed22b507a3..0000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/LookupCatalogSuite.scala
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.catalyst.catalog.v2
-
-import org.mockito.ArgumentMatchers.any
-import org.mockito.Mockito.{mock, when}
-import org.mockito.invocation.InvocationOnMock
-import org.scalatest.Inside
-import org.scalatest.Matchers._
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalog.v2.{CatalogManager, CatalogNotFoundException, CatalogPlugin, Identifier, LookupCatalog}
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
-
-private case class TestCatalogPlugin(override val name: String) extends CatalogPlugin {
-
-  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = Unit
-}
-
-class LookupCatalogSuite extends SparkFunSuite with LookupCatalog with Inside {
-  import CatalystSqlParser._
-
-  private val catalogs = Seq("prod", "test").map(x => x -> TestCatalogPlugin(x)).toMap
-
-  override val catalogManager: CatalogManager = {
-    val manager = mock(classOf[CatalogManager])
-    when(manager.catalog(any())).thenAnswer((invocation: InvocationOnMock) => {
-      val name = invocation.getArgument[String](0)
-      catalogs.getOrElse(name, throw new CatalogNotFoundException(s"$name not found"))
-    })
-    when(manager.defaultCatalog).thenReturn(None)
-    manager
-  }
-
-  test("catalog object identifier") {
-    Seq(
-      ("tbl", None, Seq.empty, "tbl"),
-      ("db.tbl", None, Seq("db"), "tbl"),
-      ("prod.func", catalogs.get("prod"), Seq.empty, "func"),
-      ("ns1.ns2.tbl", None, Seq("ns1", "ns2"), "tbl"),
-      ("prod.db.tbl", catalogs.get("prod"), Seq("db"), "tbl"),
-      ("test.db.tbl", catalogs.get("test"), Seq("db"), "tbl"),
-      ("test.ns1.ns2.ns3.tbl", catalogs.get("test"), Seq("ns1", "ns2", "ns3"), "tbl"),
-      ("`db.tbl`", None, Seq.empty, "db.tbl"),
-      ("parquet.`file:/tmp/db.tbl`", None, Seq("parquet"), "file:/tmp/db.tbl"),
-      ("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`", None,
-        Seq("org.apache.spark.sql.json"), "s3://buck/tmp/abc.json")).foreach {
-      case (sql, expectedCatalog, namespace, name) =>
-        inside(parseMultipartIdentifier(sql)) {
-          case CatalogObjectIdentifier(catalog, ident) =>
-            catalog shouldEqual expectedCatalog
-            ident shouldEqual Identifier.of(namespace.toArray, name)
-        }
-    }
-  }
-
-  test("table identifier") {
-    Seq(
-      ("tbl", "tbl", None),
-      ("db.tbl", "tbl", Some("db")),
-      ("`db.tbl`", "db.tbl", None),
-      ("parquet.`file:/tmp/db.tbl`", "file:/tmp/db.tbl", Some("parquet")),
-      ("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`", "s3://buck/tmp/abc.json",
-        Some("org.apache.spark.sql.json"))).foreach {
-      case (sql, table, db) =>
-        inside (parseMultipartIdentifier(sql)) {
-          case AsTableIdentifier(ident) =>
-            ident shouldEqual TableIdentifier(table, db)
-        }
-    }
-    Seq(
-      "prod.func",
-      "prod.db.tbl",
-      "ns1.ns2.tbl").foreach { sql =>
-      parseMultipartIdentifier(sql) match {
-        case AsTableIdentifier(_) =>
-          fail(s"$sql should not be resolved as TableIdentifier")
-        case _ =>
-      }
-    }
-  }
-
-  test("temporary table identifier") {
-    Seq(
-      ("tbl", TableIdentifier("tbl")),
-      ("db.tbl", TableIdentifier("tbl", Some("db"))),
-      ("`db.tbl`", TableIdentifier("db.tbl")),
-      ("parquet.`file:/tmp/db.tbl`", TableIdentifier("file:/tmp/db.tbl", Some("parquet"))),
-      ("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`",
-          TableIdentifier("s3://buck/tmp/abc.json", Some("org.apache.spark.sql.json")))).foreach {
-        case (sqlIdent: String, expectedTableIdent: TableIdentifier) =>
-          // when there is no catalog and the namespace has one part, the rule should match
-          inside(parseMultipartIdentifier(sqlIdent)) {
-            case AsTemporaryViewIdentifier(ident) =>
-              ident shouldEqual expectedTableIdent
-          }
-    }
-
-    Seq("prod.func", "prod.db.tbl", "test.db.tbl", "ns1.ns2.tbl", "test.ns1.ns2.ns3.tbl")
-        .foreach { sqlIdent =>
-          inside(parseMultipartIdentifier(sqlIdent)) {
-            case AsTemporaryViewIdentifier(_) =>
-              fail("AsTemporaryViewIdentifier should not match when " +
-                  "the catalog is set or the namespace has multiple parts")
-            case _ =>
-              // expected
-          }
-    }
-  }
-}
-
-class LookupCatalogWithDefaultSuite extends SparkFunSuite with LookupCatalog with Inside {
-  import CatalystSqlParser._
-
-  private val catalogs = Seq("prod", "test").map(x => x -> TestCatalogPlugin(x)).toMap
-
-  override val catalogManager: CatalogManager = {
-    val manager = mock(classOf[CatalogManager])
-    when(manager.catalog(any())).thenAnswer((invocation: InvocationOnMock) => {
-      val name = invocation.getArgument[String](0)
-      catalogs.getOrElse(name, throw new CatalogNotFoundException(s"$name not found"))
-    })
-    when(manager.defaultCatalog).thenReturn(catalogs.get("prod"))
-    manager
-  }
-
-  test("catalog object identifier") {
-    Seq(
-      ("tbl", catalogs.get("prod"), Seq.empty, "tbl"),
-      ("db.tbl", catalogs.get("prod"), Seq("db"), "tbl"),
-      ("prod.func", catalogs.get("prod"), Seq.empty, "func"),
-      ("ns1.ns2.tbl", catalogs.get("prod"), Seq("ns1", "ns2"), "tbl"),
-      ("prod.db.tbl", catalogs.get("prod"), Seq("db"), "tbl"),
-      ("test.db.tbl", catalogs.get("test"), Seq("db"), "tbl"),
-      ("test.ns1.ns2.ns3.tbl", catalogs.get("test"), Seq("ns1", "ns2", "ns3"), "tbl"),
-      ("`db.tbl`", catalogs.get("prod"), Seq.empty, "db.tbl"),
-      ("parquet.`file:/tmp/db.tbl`", catalogs.get("prod"), Seq("parquet"), "file:/tmp/db.tbl"),
-      ("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`", catalogs.get("prod"),
-          Seq("org.apache.spark.sql.json"), "s3://buck/tmp/abc.json")).foreach {
-      case (sql, expectedCatalog, namespace, name) =>
-        inside(parseMultipartIdentifier(sql)) {
-          case CatalogObjectIdentifier(catalog, ident) =>
-            catalog shouldEqual expectedCatalog
-            ident shouldEqual Identifier.of(namespace.toArray, name)
-        }
-    }
-  }
-
-  test("table identifier") {
-    Seq(
-      "tbl",
-      "db.tbl",
-      "`db.tbl`",
-      "parquet.`file:/tmp/db.tbl`",
-      "`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`",
-      "prod.func",
-      "prod.db.tbl",
-      "ns1.ns2.tbl").foreach { sql =>
-      parseMultipartIdentifier(sql) match {
-        case AsTableIdentifier(_) =>
-          fail(s"$sql should not be resolved as TableIdentifier")
-        case _ =>
-      }
-    }
-  }
-
-  test("temporary table identifier") {
-    Seq(
-      ("tbl", TableIdentifier("tbl")),
-      ("db.tbl", TableIdentifier("tbl", Some("db"))),
-      ("`db.tbl`", TableIdentifier("db.tbl")),
-      ("parquet.`file:/tmp/db.tbl`", TableIdentifier("file:/tmp/db.tbl", Some("parquet"))),
-      ("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`",
-          TableIdentifier("s3://buck/tmp/abc.json", Some("org.apache.spark.sql.json")))).foreach {
-      case (sqlIdent: String, expectedTableIdent: TableIdentifier) =>
-        // when there is no catalog and the namespace has one part, the rule should match
-        inside(parseMultipartIdentifier(sqlIdent)) {
-          case AsTemporaryViewIdentifier(ident) =>
-            ident shouldEqual expectedTableIdent
-        }
-    }
-
-    Seq("prod.func", "prod.db.tbl", "test.db.tbl", "ns1.ns2.tbl", "test.ns1.ns2.ns3.tbl")
-        .foreach { sqlIdent =>
-          inside(parseMultipartIdentifier(sqlIdent)) {
-            case AsTemporaryViewIdentifier(_) =>
-              fail("AsTemporaryViewIdentifier should not match when " +
-                  "the catalog is set or the namespace has multiple parts")
-            case _ =>
-            // expected
-          }
-        }
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
index 838ac42184fa5..06ffb4231062c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.csv
 
+import org.scalatest.prop.TableDrivenPropertyChecks._
+
 import org.apache.spark.SparkFunSuite
 
 class CSVExprUtilsSuite extends SparkFunSuite {
@@ -58,4 +60,40 @@ class CSVExprUtilsSuite extends SparkFunSuite {
     }
     assert(exception.getMessage.contains("Delimiter cannot be empty string"))
   }
+
+  val testCases = Table(
+    ("input", "separatorStr", "expectedErrorMsg"),
+    // normal tab
+    ("""\t""", Some("\t"), None),
+    // backslash, then tab
+    ("""\\t""", Some("""\t"""), None),
+    // invalid special character (dot)
+    ("""\.""", None, Some("Unsupported special character for delimiter")),
+    // backslash, then dot
+    ("""\\.""", Some("""\."""), None),
+    // nothing special, just straight conversion
+    ("""foo""", Some("foo"), None),
+    // tab in the middle of some other letters
+    ("""ba\tr""", Some("ba\tr"), None),
+    // null character, expressed in Unicode literal syntax
+    ("""\u0000""", Some("\u0000"), None),
+    // and specified directly
+    ("\u0000", Some("\u0000"), None)
+  )
+
+  test("should correctly produce separator strings, or exceptions, from input") {
+    forAll(testCases) { (input, separatorStr, expectedErrorMsg) =>
+      try {
+        val separator = CSVExprUtils.toDelimiterStr(input)
+        assert(separatorStr.isDefined)
+        assert(expectedErrorMsg.isEmpty)
+        assert(separator.equals(separatorStr.get))
+      } catch {
+        case e: IllegalArgumentException =>
+          assert(separatorStr.isEmpty)
+          assert(expectedErrorMsg.isDefined)
+          assert(e.getMessage.contains(expectedErrorMsg.get))
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVFiltersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVFiltersSuite.scala
new file mode 100644
index 0000000000000..499bbaf452aee
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVFiltersSuite.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.csv
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.sources
+import org.apache.spark.sql.sources.{AlwaysFalse, AlwaysTrue, Filter}
+import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.unsafe.types.UTF8String
+
+class CSVFiltersSuite extends SparkFunSuite {
+  test("filter to expression conversion") {
+    val ref = BoundReference(0, IntegerType, true)
+    def check(f: Filter, expr: Expression): Unit = {
+      assert(CSVFilters.filterToExpression(f, _ => Some(ref)).get === expr)
+    }
+
+    check(sources.AlwaysTrue, Literal(true))
+    check(sources.AlwaysFalse, Literal(false))
+    check(sources.IsNull("a"), IsNull(ref))
+    check(sources.Not(sources.IsNull("a")), Not(IsNull(ref)))
+    check(sources.IsNotNull("a"), IsNotNull(ref))
+    check(sources.EqualTo("a", "b"), EqualTo(ref, Literal("b")))
+    check(sources.EqualNullSafe("a", "b"), EqualNullSafe(ref, Literal("b")))
+    check(sources.StringStartsWith("a", "b"), StartsWith(ref, Literal("b")))
+    check(sources.StringEndsWith("a", "b"), EndsWith(ref, Literal("b")))
+    check(sources.StringContains("a", "b"), Contains(ref, Literal("b")))
+    check(sources.LessThanOrEqual("a", 1), LessThanOrEqual(ref, Literal(1)))
+    check(sources.LessThan("a", 1), LessThan(ref, Literal(1)))
+    check(sources.GreaterThanOrEqual("a", 1), GreaterThanOrEqual(ref, Literal(1)))
+    check(sources.GreaterThan("a", 1), GreaterThan(ref, Literal(1)))
+    check(sources.And(sources.AlwaysTrue, sources.AlwaysTrue), And(Literal(true), Literal(true)))
+    check(sources.Or(sources.AlwaysTrue, sources.AlwaysTrue), Or(Literal(true), Literal(true)))
+    check(sources.In("a", Array(1)), In(ref, Seq(Literal(1))))
+  }
+
+  private def getSchema(str: String): StructType = str match {
+    case "" => new StructType()
+    case _ => StructType.fromDDL(str)
+  }
+
+  test("skipping rows") {
+    def check(
+        requiredSchema: String = "i INTEGER, d DOUBLE",
+        filters: Seq[Filter],
+        row: InternalRow,
+        pos: Int,
+        skip: Boolean): Unit = {
+      val csvFilters = new CSVFilters(filters, getSchema(requiredSchema))
+      assert(csvFilters.skipRow(row, pos) === skip)
+    }
+
+    check(filters = Seq(), row = InternalRow(3.14), pos = 0, skip = false)
+    check(filters = Seq(AlwaysTrue), row = InternalRow(1), pos = 0, skip = false)
+    check(filters = Seq(AlwaysFalse), row = InternalRow(1), pos = 0, skip = true)
+    check(
+      filters = Seq(sources.EqualTo("i", 1), sources.LessThan("d", 10), sources.AlwaysFalse),
+      row = InternalRow(1, 3.14),
+      pos = 0,
+      skip = true)
+    check(
+      filters = Seq(sources.EqualTo("i", 10)),
+      row = InternalRow(10, 3.14),
+      pos = 0,
+      skip = false)
+    check(
+      filters = Seq(sources.IsNotNull("d"), sources.GreaterThanOrEqual("d", 2.96)),
+      row = InternalRow(3.14),
+      pos = 0,
+      skip = false)
+    check(
+      filters = Seq(sources.In("i", Array(10, 20)), sources.LessThanOrEqual("d", 2.96)),
+      row = InternalRow(10, 3.14),
+      pos = 1,
+      skip = true)
+    val filters1 = Seq(
+      sources.Or(
+        sources.AlwaysTrue,
+        sources.And(
+          sources.Not(sources.IsNull("i")),
+          sources.Not(
+            sources.And(
+              sources.StringEndsWith("s", "ab"),
+              sources.StringEndsWith("s", "cd")
+            )
+          )
+        )
+      ),
+      sources.GreaterThan("d", 0),
+      sources.LessThan("i", 500)
+    )
+    val filters2 = Seq(
+      sources.And(
+        sources.StringContains("s", "abc"),
+        sources.And(
+          sources.Not(sources.IsNull("i")),
+          sources.And(
+            sources.StringEndsWith("s", "ab"),
+            sources.StringEndsWith("s", "bc")
+          )
+        )
+      ),
+      sources.GreaterThan("d", 100),
+      sources.LessThan("i", 0)
+    )
+    Seq(filters1 -> false, filters2 -> true).foreach { case (filters, skip) =>
+      for (p <- 0 until 3) {
+        check(
+          requiredSchema = "i INTEGER, d DOUBLE, s STRING",
+          filters = filters,
+          row = InternalRow(10, 3.14, UTF8String.fromString("abc")),
+          pos = p,
+          skip = skip)
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index 24d909ed99b93..ee73da33a1a90 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -22,6 +22,7 @@ import java.util.Locale
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
@@ -101,13 +102,13 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
 
     assert(
       inferSchema.mergeRowTypes(Array(StringType),
-        Array(DoubleType)).deep == Array(StringType).deep)
+        Array(DoubleType)).sameElements(Array(StringType)))
     assert(
       inferSchema.mergeRowTypes(Array(IntegerType),
-        Array(LongType)).deep == Array(LongType).deep)
+        Array(LongType)).sameElements(Array(LongType)))
     assert(
       inferSchema.mergeRowTypes(Array(DoubleType),
-        Array(LongType)).deep == Array(DoubleType).deep)
+        Array(LongType)).sameElements(Array(DoubleType)))
   }
 
   test("Null fields are handled properly when a nullValue is specified") {
@@ -133,7 +134,7 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
     val inferSchema = new CSVInferSchema(options)
 
     val mergedNullTypes = inferSchema.mergeRowTypes(Array(NullType), Array(NullType))
-    assert(mergedNullTypes.deep == Array(NullType).deep)
+    assert(mergedNullTypes.sameElements(Array(NullType)))
   }
 
   test("SPARK-18433: Improve DataSource option keys to be more case-insensitive") {
@@ -147,13 +148,15 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
     val options = new CSVOptions(Map.empty[String, String], false, "GMT")
     val inferSchema = new CSVInferSchema(options)
 
-    // 9.03E+12 is Decimal(3, -10) and 1.19E+11 is Decimal(3, -9).
-    assert(inferSchema.inferField(DecimalType(3, -10), "1.19E11") ==
-      DecimalType(4, -9))
+    withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
+      // 9.03E+12 is Decimal(3, -10) and 1.19E+11 is Decimal(3, -9).
+      assert(inferSchema.inferField(DecimalType(3, -10), "1.19E11") ==
+        DecimalType(4, -9))
+    }
 
     // BigDecimal("12345678901234567890.01234567890123456789") is precision 40 and scale 20.
     val value = "12345678901234567890.01234567890123456789"
-    assert(inferSchema.inferField(DecimalType(3, -10), value) == DoubleType)
+    assert(inferSchema.inferField(DecimalType(3, 0), value) == DoubleType)
 
     // Seq(s"${Long.MaxValue}1", "2015-12-01 00:00:00") should be StringType
     assert(inferSchema.inferField(NullType, s"${Long.MaxValue}1") == DecimalType(20, 0))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 933c5764d77d7..536c76f042d23 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -19,13 +19,17 @@ package org.apache.spark.sql.catalyst.csv
 
 import java.math.BigDecimal
 import java.text.{DecimalFormat, DecimalFormatSymbols}
+import java.time.ZoneOffset
 import java.util.{Locale, TimeZone}
 
 import org.apache.commons.lang3.time.FastDateFormat
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.sources.{EqualTo, Filter, StringStartsWith}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -134,18 +138,18 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     val expectedDate = format.parse(customDate).getTime
     val castedDate = parser.makeConverter("_1", DateType, nullable = true)
         .apply(customDate)
-    assert(castedDate == DateTimeUtils.millisToDays(expectedDate, TimeZone.getTimeZone("GMT")))
+    assert(castedDate == DateTimeUtils.millisToDays(expectedDate, ZoneOffset.UTC))
 
     val timestamp = "2015-01-01 00:00:00"
     timestampsOptions = new CSVOptions(Map(
       "timestampFormat" -> "yyyy-MM-dd HH:mm:ss",
       "dateFormat" -> "yyyy-MM-dd"), false, "UTC")
     parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions)
-    val expected = 1420070400 * DateTimeUtils.MICROS_PER_SECOND
+    val expected = 1420070400 * MICROS_PER_SECOND
     assert(parser.makeConverter("_1", TimestampType).apply(timestamp) ==
       expected)
     assert(parser.makeConverter("_1", DateType).apply("2015-01-01") ==
-      expected / DateTimeUtils.MICROS_PER_DAY)
+      expected / MICROS_PER_DAY)
   }
 
   test("Throws exception for casting an invalid string to Float and Double Types") {
@@ -266,4 +270,52 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     assert(convertedValue.isInstanceOf[UTF8String])
     assert(convertedValue == expected)
   }
+
+  test("skipping rows using pushdown filters") {
+    def check(
+        input: String = "1,a",
+        dataSchema: StructType = StructType.fromDDL("i INTEGER, s STRING"),
+        requiredSchema: StructType = StructType.fromDDL("i INTEGER"),
+        filters: Seq[Filter],
+        expected: Option[InternalRow]): Unit = {
+      Seq(false, true).foreach { columnPruning =>
+        val options = new CSVOptions(Map.empty[String, String], columnPruning, "GMT")
+        val parser = new UnivocityParser(dataSchema, requiredSchema, options, filters)
+        val actual = parser.parse(input)
+        assert(actual === expected)
+      }
+    }
+
+    check(filters = Seq(), expected = Some(InternalRow(1)))
+    check(filters = Seq(EqualTo("i", 1)), expected = Some(InternalRow(1)))
+    check(filters = Seq(EqualTo("i", 2)), expected = None)
+    check(
+      requiredSchema = StructType.fromDDL("s STRING"),
+      filters = Seq(StringStartsWith("s", "b")),
+      expected = None)
+    check(
+      requiredSchema = StructType.fromDDL("i INTEGER, s STRING"),
+      filters = Seq(StringStartsWith("s", "a")),
+      expected = Some(InternalRow(1, UTF8String.fromString("a"))))
+    check(
+      input = "1,a,3.14",
+      dataSchema = StructType.fromDDL("i INTEGER, s STRING, d DOUBLE"),
+      requiredSchema = StructType.fromDDL("i INTEGER, d DOUBLE"),
+      filters = Seq(EqualTo("d", 3.14)),
+      expected = Some(InternalRow(1, 3.14)))
+
+    val errMsg = intercept[IllegalArgumentException] {
+      check(filters = Seq(EqualTo("invalid attr", 1)), expected = None)
+    }.getMessage
+    assert(errMsg.contains("invalid attr does not exist"))
+
+    val errMsg2 = intercept[IllegalArgumentException] {
+      check(
+        dataSchema = new StructType(),
+        requiredSchema = new StructType(),
+        filters = Seq(EqualTo("i", 1)),
+        expected = Some(InternalRow.empty))
+    }.getMessage
+    assert(errMsg2.contains("i does not exist"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 9380c7e3f5f72..c1f1be3b30e4b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -432,16 +432,16 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
   }
 
   private def testOverflowingBigNumeric[T: TypeTag](bigNumeric: T, testName: String): Unit = {
-    Seq(true, false).foreach { allowNullOnOverflow =>
+    Seq(true, false).foreach { ansiEnabled =>
       testAndVerifyNotLeakingReflectionObjects(
-        s"overflowing $testName, allowNullOnOverflow=$allowNullOnOverflow") {
+        s"overflowing $testName, ansiEnabled=$ansiEnabled") {
         withSQLConf(
-          SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> allowNullOnOverflow.toString
+          SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString
         ) {
           // Need to construct Encoder here rather than implicitly resolving it
           // so that SQLConf changes are respected.
           val encoder = ExpressionEncoder[T]()
-          if (allowNullOnOverflow) {
+          if (!ansiEnabled) {
             val convertedBack = encoder.resolveAndBind().fromRow(encoder.toRow(bigNumeric))
             assert(convertedBack === null)
           } else {
@@ -553,7 +553,7 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
     r
   }
 
-  private def testAndVerifyNotLeakingReflectionObjects(testName: String)(testFun: => Any) {
+  private def testAndVerifyNotLeakingReflectionObjects(testName: String)(testFun: => Any): Unit = {
     test(testName) {
       verifyNotLeakingReflectionObjects(testFun)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 5d21e4a2a83ca..1a1cab823d4f3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -169,7 +169,7 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
   }
 
   private def testDecimalOverflow(schema: StructType, row: Row): Unit = {
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       val encoder = RowEncoder(schema).resolveAndBind()
       intercept[Exception] {
         encoder.toRow(row)
@@ -182,7 +182,7 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
       }
     }
 
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       val encoder = RowEncoder(schema).resolveAndBind()
       assert(encoder.fromRow(encoder.toRow(row)).get(0) == null)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
index 729e2f529b97f..b4a1ae20c3e11 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
@@ -61,7 +61,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Add(positiveLongLit, negativeLongLit), -1L)
 
     Seq("true", "false").foreach { checkOverflow =>
-      withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> checkOverflow) {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) {
         DataTypeTestUtils.numericAndInterval.foreach { tpe =>
           checkConsistencyBetweenInterpretedAndCodegenAllowingException(Add, tpe, tpe)
         }
@@ -80,7 +80,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(UnaryMinus(Literal(Int.MinValue)), Int.MinValue)
     checkEvaluation(UnaryMinus(Literal(Short.MinValue)), Short.MinValue)
     checkEvaluation(UnaryMinus(Literal(Byte.MinValue)), Byte.MinValue)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       checkExceptionInExpression[ArithmeticException](
         UnaryMinus(Literal(Long.MinValue)), "overflow")
       checkExceptionInExpression[ArithmeticException](
@@ -122,7 +122,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Subtract(positiveLongLit, negativeLongLit), positiveLong - negativeLong)
 
     Seq("true", "false").foreach { checkOverflow =>
-      withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> checkOverflow) {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) {
         DataTypeTestUtils.numericAndInterval.foreach { tpe =>
           checkConsistencyBetweenInterpretedAndCodegenAllowingException(Subtract, tpe, tpe)
         }
@@ -144,7 +144,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Multiply(positiveLongLit, negativeLongLit), positiveLong * negativeLong)
 
     Seq("true", "false").foreach { checkOverflow =>
-      withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> checkOverflow) {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) {
         DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe =>
           checkConsistencyBetweenInterpretedAndCodegenAllowingException(Multiply, tpe, tpe)
         }
@@ -409,9 +409,9 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
   test("SPARK-28322: IntegralDivide supports decimal type") {
     withSQLConf(SQLConf.LEGACY_INTEGRALDIVIDE_RETURN_LONG.key -> "false") {
       checkEvaluation(IntegralDivide(Literal(Decimal(1)), Literal(Decimal(2))), Decimal(0))
-      checkEvaluation(IntegralDivide(Literal(Decimal(1.4)), Literal(Decimal(0.6))), Decimal(2))
+      checkEvaluation(IntegralDivide(Literal(Decimal(2.4)), Literal(Decimal(1.1))), Decimal(2))
       checkEvaluation(IntegralDivide(Literal(Decimal(1.2)), Literal(Decimal(1.1))), Decimal(1))
-      checkEvaluation(IntegralDivide(Literal(Decimal(1.2)), Literal(Decimal(0.0))), null)
+      checkEvaluation(IntegralDivide(Literal(Decimal(0.2)), Literal(Decimal(0.0))), null)
       checkEvaluation(DecimalPrecision.decimalAndDecimal.apply(IntegralDivide(
         Literal(Decimal("99999999999999999999999999999999999")), Literal(Decimal(0.001)))),
         BigDecimal("99999999999999999999999999999999999000"))
@@ -422,9 +422,9 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     }
     withSQLConf(SQLConf.LEGACY_INTEGRALDIVIDE_RETURN_LONG.key -> "true") {
       checkEvaluation(IntegralDivide(Literal(Decimal(1)), Literal(Decimal(2))), 0L)
-      checkEvaluation(IntegralDivide(Literal(Decimal(1.4)), Literal(Decimal(0.6))), 2L)
+      checkEvaluation(IntegralDivide(Literal(Decimal(2.4)), Literal(Decimal(1.1))), 2L)
       checkEvaluation(IntegralDivide(Literal(Decimal(1.2)), Literal(Decimal(1.1))), 1L)
-      checkEvaluation(IntegralDivide(Literal(Decimal(1.2)), Literal(Decimal(0.0))), null)
+      checkEvaluation(IntegralDivide(Literal(Decimal(0.2)), Literal(Decimal(0.0))), null)
       // overflows long and so returns a wrong result
       checkEvaluation(DecimalPrecision.decimalAndDecimal.apply(IntegralDivide(
         Literal(Decimal("99999999999999999999999999999999999")), Literal(Decimal(0.001)))),
@@ -445,12 +445,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     val e4 = Add(minLongLiteral, minLongLiteral)
     val e5 = Subtract(minLongLiteral, maxLongLiteral)
     val e6 = Multiply(minLongLiteral, minLongLiteral)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(e1, Long.MinValue)
       checkEvaluation(e2, Long.MinValue)
       checkEvaluation(e3, -2L)
@@ -469,12 +469,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     val e4 = Add(minIntLiteral, minIntLiteral)
     val e5 = Subtract(minIntLiteral, maxIntLiteral)
     val e6 = Multiply(minIntLiteral, minIntLiteral)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(e1, Int.MinValue)
       checkEvaluation(e2, Int.MinValue)
       checkEvaluation(e3, -2)
@@ -493,12 +493,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     val e4 = Add(minShortLiteral, minShortLiteral)
     val e5 = Subtract(minShortLiteral, maxShortLiteral)
     val e6 = Multiply(minShortLiteral, minShortLiteral)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(e1, Short.MinValue)
       checkEvaluation(e2, Short.MinValue)
       checkEvaluation(e3, (-2).toShort)
@@ -517,12 +517,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     val e4 = Add(minByteLiteral, minByteLiteral)
     val e5 = Subtract(minByteLiteral, maxByteLiteral)
     val e6 = Multiply(minByteLiteral, minByteLiteral)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(e1, Byte.MinValue)
       checkEvaluation(e2, Byte.MinValue)
       checkEvaluation(e3, (-2).toByte)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
new file mode 100644
index 0000000000000..813a68f68451c
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution}
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+
+class AttributeResolutionSuite extends SparkFunSuite {
+  val resolver = caseInsensitiveResolution
+
+  test("basic attribute resolution with namespaces") {
+    val attrs = Seq(
+      AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "ns2", "t1")),
+      AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "ns2", "ns3", "t2")))
+
+    // Try to match attribute reference with name "a" with qualifier "ns1.ns2.t1".
+    Seq(Seq("t1", "a"), Seq("ns2", "t1", "a"), Seq("ns1", "ns2", "t1", "a")).foreach { nameParts =>
+      attrs.resolve(nameParts, resolver) match {
+        case Some(attr) => assert(attr.semanticEquals(attrs(0)))
+        case _ => fail()
+      }
+    }
+
+    // Non-matching cases
+    Seq(Seq("ns1", "ns2", "t1"), Seq("ns2", "a")).foreach { nameParts =>
+      assert(attrs.resolve(nameParts, resolver).isEmpty)
+    }
+  }
+
+  test("attribute resolution where table and attribute names are the same") {
+    val attrs = Seq(AttributeReference("t", IntegerType)(qualifier = Seq("ns1", "ns2", "t")))
+    // Matching cases
+    Seq(
+      Seq("t"), Seq("t", "t"), Seq("ns2", "t", "t"), Seq("ns1", "ns2", "t", "t")
+    ).foreach { nameParts =>
+      attrs.resolve(nameParts, resolver) match {
+        case Some(attr) => assert(attr.semanticEquals(attrs(0)))
+        case _ => fail()
+      }
+    }
+
+    // Non-matching case
+    assert(attrs.resolve(Seq("ns1", "ns2", "t"), resolver).isEmpty)
+  }
+
+  test("attribute resolution ambiguity at the attribute name level") {
+    val attrs = Seq(
+      AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t1")),
+      AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "ns2", "t2")))
+
+    val ex = intercept[AnalysisException] {
+      attrs.resolve(Seq("a"), resolver)
+    }
+    assert(ex.getMessage.contains(
+      "Reference 'a' is ambiguous, could be: ns1.t1.a, ns1.ns2.t2.a."))
+  }
+
+  test("attribute resolution ambiguity at the qualifier level") {
+    val attrs = Seq(
+      AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t")),
+      AttributeReference("a", IntegerType)(qualifier = Seq("ns2", "ns1", "t")))
+
+    val ex = intercept[AnalysisException] {
+      attrs.resolve(Seq("ns1", "t", "a"), resolver)
+    }
+    assert(ex.getMessage.contains(
+      "Reference 'ns1.t.a' is ambiguous, could be: ns1.t.a, ns2.ns1.t.a."))
+  }
+
+  test("attribute resolution with nested fields") {
+    val attrType = StructType(Seq(StructField("aa", IntegerType), StructField("bb", IntegerType)))
+    val attrs = Seq(AttributeReference("a", attrType)(qualifier = Seq("ns1", "t")))
+
+    val resolved = attrs.resolve(Seq("ns1", "t", "a", "aa"), resolver)
+    resolved match {
+      case Some(Alias(_, name)) => assert(name == "aa")
+      case _ => fail()
+    }
+
+    val ex = intercept[AnalysisException] {
+      attrs.resolve(Seq("ns1", "t", "a", "cc"), resolver)
+    }
+    assert(ex.getMessage.contains("No such struct field cc in aa, bb"))
+  }
+
+  test("attribute resolution with case insensitive resolver") {
+    val attrs = Seq(AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t")))
+    attrs.resolve(Seq("Ns1", "T", "A"), caseInsensitiveResolution) match {
+      case Some(attr) => assert(attr.semanticEquals(attrs(0)) && attr.name == "A")
+      case _ => fail()
+    }
+  }
+
+  test("attribute resolution with case sensitive resolver") {
+    val attrs = Seq(AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t")))
+    assert(attrs.resolve(Seq("Ns1", "T", "A"), caseSensitiveResolution).isEmpty)
+    assert(attrs.resolve(Seq("ns1", "t", "A"), caseSensitiveResolution).isEmpty)
+    attrs.resolve(Seq("ns1", "t", "a"), caseSensitiveResolution) match {
+      case Some(attr) => assert(attr.semanticEquals(attrs(0)))
+      case _ => fail()
+    }
+  }
+
+  test("attribute resolution should try to match the longest qualifier") {
+    // We have two attributes:
+    // 1) "a.b" where "a" is the name and "b" is the nested field.
+    // 2) "a.b.a" where "b" is the name, left-side "a" is the qualifier and the right-side "a"
+    //    is the nested field.
+    // When "a.b" is resolved, "b" is tried first as the name, so it is resolved to #2 attribute.
+    val a1Type = StructType(Seq(StructField("b", IntegerType)))
+    val a2Type = StructType(Seq(StructField("a", IntegerType)))
+    val attrs = Seq(
+      AttributeReference("a", a1Type)(),
+      AttributeReference("b", a2Type)(qualifier = Seq("a")))
+    attrs.resolve(Seq("a", "b"), resolver) match {
+      case Some(attr) => assert(attr.semanticEquals(attrs(1)))
+      case _ => fail()
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 7a781295a7957..ad66873c02518 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -21,11 +21,15 @@ import java.sql.{Date, Timestamp}
 import java.util.{Calendar, TimeZone}
 import java.util.concurrent.TimeUnit._
 
+import scala.collection.parallel.immutable.ParVector
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion.numericPrecedence
+import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
@@ -33,24 +37,19 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
-/**
- * Test suite for data type casting expression [[Cast]].
- */
-class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
+abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
 
-  private def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): Cast = {
-    v match {
-      case lit: Expression => Cast(lit, targetType, timeZoneId)
-      case _ => Cast(Literal(v), targetType, timeZoneId)
-    }
-  }
+  // Whether it is required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
+  protected def requiredAnsiEnabledForOverflowTestCases: Boolean
+
+  protected def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase
 
   // expected cannot be null
-  private def checkCast(v: Any, expected: Any): Unit = {
+  protected def checkCast(v: Any, expected: Any): Unit = {
     checkEvaluation(cast(v, Literal(expected).dataType), expected)
   }
 
-  private def checkNullCast(from: DataType, to: DataType): Unit = {
+  protected def checkNullCast(from: DataType, to: DataType): Unit = {
     checkEvaluation(cast(Literal.create(null, from), to, Option("GMT")), null)
   }
 
@@ -113,7 +112,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("cast string to timestamp") {
-    ALL_TIMEZONES.par.foreach { tz =>
+    new ParVector(ALL_TIMEZONES.toVector).foreach { tz =>
       def checkCastStringToTimestamp(str: String, expected: Timestamp): Unit = {
         checkEvaluation(cast(Literal(str), TimestampType, Option(tz.getID)), expected)
       }
@@ -207,43 +206,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("cast from int") {
-    checkCast(0, false)
-    checkCast(1, true)
-    checkCast(-5, true)
-    checkCast(1, 1.toByte)
-    checkCast(1, 1.toShort)
-    checkCast(1, 1)
-    checkCast(1, 1.toLong)
-    checkCast(1, 1.0f)
-    checkCast(1, 1.0)
-    checkCast(123, "123")
-
-    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 1)), null)
-    checkEvaluation(cast(123, DecimalType(2, 0)), null)
-  }
-
-  test("cast from long") {
-    checkCast(0L, false)
-    checkCast(1L, true)
-    checkCast(-5L, true)
-    checkCast(1L, 1.toByte)
-    checkCast(1L, 1.toShort)
-    checkCast(1L, 1)
-    checkCast(1L, 1.toLong)
-    checkCast(1L, 1.0f)
-    checkCast(1L, 1.0)
-    checkCast(123L, "123")
-
-    checkEvaluation(cast(123L, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123L, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
-
-    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
-  }
-
   test("cast from boolean") {
     checkEvaluation(cast(true, IntegerType), 1)
     checkEvaluation(cast(false, IntegerType), 0)
@@ -253,17 +215,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast(cast(0, BooleanType), IntegerType), 0)
   }
 
-  test("cast from int 2") {
-    checkEvaluation(cast(1, LongType), 1.toLong)
-    checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
-    checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
-
-    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 1)), null)
-    checkEvaluation(cast(123, DecimalType(2, 0)), null)
-  }
-
   test("cast from float") {
     checkCast(0.0f, false)
     checkCast(0.5f, true)
@@ -333,7 +284,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     val gmtId = Option("GMT")
 
     checkEvaluation(cast("abdef", StringType), "abdef")
-    checkEvaluation(cast("abdef", DecimalType.USER_DEFAULT), null)
     checkEvaluation(cast("abdef", TimestampType, gmtId), null)
     checkEvaluation(cast("12.65", DecimalType.SYSTEM_DEFAULT), Decimal(12.65))
 
@@ -373,7 +323,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast("23", DecimalType.USER_DEFAULT), Decimal(23))
     checkEvaluation(cast("23", ByteType), 23.toByte)
     checkEvaluation(cast("23", ShortType), 23.toShort)
-    checkEvaluation(cast("2012-12-11", DoubleType), null)
     checkEvaluation(cast(123, IntegerType), 123)
 
     checkEvaluation(cast(Literal.create(null, IntegerType), ShortType), null)
@@ -401,101 +350,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkCast(Decimal(1.5), "1.5")
   }
 
-  test("casting to fixed-precision decimals") {
-    assert(cast(123, DecimalType.USER_DEFAULT).nullable === false)
-    assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable)
-    assert(cast(10.03, DecimalType.SYSTEM_DEFAULT).nullable)
-    assert(cast(Decimal(10.03), DecimalType.SYSTEM_DEFAULT).nullable === false)
-
-    assert(cast(123, DecimalType(2, 1)).nullable)
-    assert(cast(10.03f, DecimalType(2, 1)).nullable)
-    assert(cast(10.03, DecimalType(2, 1)).nullable)
-    assert(cast(Decimal(10.03), DecimalType(2, 1)).nullable)
-
-    assert(cast(123, DecimalType.IntDecimal).nullable === false)
-    assert(cast(10.03f, DecimalType.FloatDecimal).nullable)
-    assert(cast(10.03, DecimalType.DoubleDecimal).nullable)
-    assert(cast(Decimal(10.03), DecimalType(4, 2)).nullable === false)
-    assert(cast(Decimal(10.03), DecimalType(5, 3)).nullable === false)
-
-    assert(cast(Decimal(10.03), DecimalType(3, 1)).nullable)
-    assert(cast(Decimal(10.03), DecimalType(4, 1)).nullable === false)
-    assert(cast(Decimal(9.95), DecimalType(2, 1)).nullable)
-    assert(cast(Decimal(9.95), DecimalType(3, 1)).nullable === false)
-
-    assert(cast(Decimal("1003"), DecimalType(3, -1)).nullable)
-    assert(cast(Decimal("1003"), DecimalType(4, -1)).nullable === false)
-    assert(cast(Decimal("995"), DecimalType(2, -1)).nullable)
-    assert(cast(Decimal("995"), DecimalType(3, -1)).nullable === false)
-
-    assert(cast(true, DecimalType.SYSTEM_DEFAULT).nullable === false)
-    assert(cast(true, DecimalType(1, 1)).nullable)
-
-
-    checkEvaluation(cast(10.03, DecimalType.SYSTEM_DEFAULT), Decimal(10.03))
-    checkEvaluation(cast(10.03, DecimalType(4, 2)), Decimal(10.03))
-    checkEvaluation(cast(10.03, DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(10.03, DecimalType(2, 0)), Decimal(10))
-    checkEvaluation(cast(10.03, DecimalType(1, 0)), null)
-    checkEvaluation(cast(10.03, DecimalType(2, 1)), null)
-    checkEvaluation(cast(10.03, DecimalType(3, 2)), null)
-    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 2)), null)
-
-    checkEvaluation(cast(10.05, DecimalType.SYSTEM_DEFAULT), Decimal(10.05))
-    checkEvaluation(cast(10.05, DecimalType(4, 2)), Decimal(10.05))
-    checkEvaluation(cast(10.05, DecimalType(3, 1)), Decimal(10.1))
-    checkEvaluation(cast(10.05, DecimalType(2, 0)), Decimal(10))
-    checkEvaluation(cast(10.05, DecimalType(1, 0)), null)
-    checkEvaluation(cast(10.05, DecimalType(2, 1)), null)
-    checkEvaluation(cast(10.05, DecimalType(3, 2)), null)
-    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 1)), Decimal(10.1))
-    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 2)), null)
-
-    checkEvaluation(cast(9.95, DecimalType(3, 2)), Decimal(9.95))
-    checkEvaluation(cast(9.95, DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(9.95, DecimalType(2, 0)), Decimal(10))
-    checkEvaluation(cast(9.95, DecimalType(2, 1)), null)
-    checkEvaluation(cast(9.95, DecimalType(1, 0)), null)
-    checkEvaluation(cast(Decimal(9.95), DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(Decimal(9.95), DecimalType(1, 0)), null)
-
-    checkEvaluation(cast(-9.95, DecimalType(3, 2)), Decimal(-9.95))
-    checkEvaluation(cast(-9.95, DecimalType(3, 1)), Decimal(-10.0))
-    checkEvaluation(cast(-9.95, DecimalType(2, 0)), Decimal(-10))
-    checkEvaluation(cast(-9.95, DecimalType(2, 1)), null)
-    checkEvaluation(cast(-9.95, DecimalType(1, 0)), null)
-    checkEvaluation(cast(Decimal(-9.95), DecimalType(3, 1)), Decimal(-10.0))
-    checkEvaluation(cast(Decimal(-9.95), DecimalType(1, 0)), null)
-
-    checkEvaluation(cast(Decimal("1003"), DecimalType.SYSTEM_DEFAULT), Decimal(1003))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(4, 0)), Decimal(1003))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(3, -1)), Decimal(1000))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(2, -2)), Decimal(1000))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(1, -2)), null)
-    checkEvaluation(cast(Decimal("1003"), DecimalType(2, -1)), null)
-    checkEvaluation(cast(Decimal("1003"), DecimalType(3, 0)), null)
-
-    checkEvaluation(cast(Decimal("995"), DecimalType(3, 0)), Decimal(995))
-    checkEvaluation(cast(Decimal("995"), DecimalType(3, -1)), Decimal(1000))
-    checkEvaluation(cast(Decimal("995"), DecimalType(2, -2)), Decimal(1000))
-    checkEvaluation(cast(Decimal("995"), DecimalType(2, -1)), null)
-    checkEvaluation(cast(Decimal("995"), DecimalType(1, -2)), null)
-
-    checkEvaluation(cast(Double.NaN, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(1.0 / 0.0, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(Float.NaN, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(1.0f / 0.0f, DecimalType.SYSTEM_DEFAULT), null)
-
-    checkEvaluation(cast(Double.NaN, DecimalType(2, 1)), null)
-    checkEvaluation(cast(1.0 / 0.0, DecimalType(2, 1)), null)
-    checkEvaluation(cast(Float.NaN, DecimalType(2, 1)), null)
-    checkEvaluation(cast(1.0f / 0.0f, DecimalType(2, 1)), null)
-
-    checkEvaluation(cast(true, DecimalType(2, 1)), Decimal(1))
-    checkEvaluation(cast(true, DecimalType(1, 1)), null)
-  }
-
   test("cast from date") {
     val d = Date.valueOf("1970-01-01")
     checkEvaluation(cast(d, ShortType), null)
@@ -554,15 +408,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     checkNullCast(ArrayType(StringType), ArrayType(IntegerType))
 
-    {
-      val ret = cast(array, ArrayType(IntegerType, containsNull = true))
-      assert(ret.resolved)
-      checkEvaluation(ret, Seq(123, null, null, null))
-    }
-    {
-      val ret = cast(array, ArrayType(IntegerType, containsNull = false))
-      assert(ret.resolved === false)
-    }
     {
       val ret = cast(array, ArrayType(BooleanType, containsNull = true))
       assert(ret.resolved)
@@ -573,15 +418,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
       assert(ret.resolved === false)
     }
 
-    {
-      val ret = cast(array_notNull, ArrayType(IntegerType, containsNull = true))
-      assert(ret.resolved)
-      checkEvaluation(ret, Seq(123, null, null))
-    }
-    {
-      val ret = cast(array_notNull, ArrayType(IntegerType, containsNull = false))
-      assert(ret.resolved === false)
-    }
     {
       val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = true))
       assert(ret.resolved)
@@ -608,15 +444,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     checkNullCast(MapType(StringType, IntegerType), MapType(StringType, StringType))
 
-    {
-      val ret = cast(map, MapType(StringType, IntegerType, valueContainsNull = true))
-      assert(ret.resolved)
-      checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null, "d" -> null))
-    }
-    {
-      val ret = cast(map, MapType(StringType, IntegerType, valueContainsNull = false))
-      assert(ret.resolved === false)
-    }
     {
       val ret = cast(map, MapType(StringType, BooleanType, valueContainsNull = true))
       assert(ret.resolved)
@@ -630,16 +457,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
       val ret = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
       assert(ret.resolved === false)
     }
-
-    {
-      val ret = cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = true))
-      assert(ret.resolved)
-      checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null))
-    }
-    {
-      val ret = cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = false))
-      assert(ret.resolved === false)
-    }
     {
       val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = true))
       assert(ret.resolved)
@@ -690,23 +507,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
         StructField("b", StringType, nullable = false),
         StructField("c", StringType, nullable = false))))
 
-    {
-      val ret = cast(struct, StructType(Seq(
-        StructField("a", IntegerType, nullable = true),
-        StructField("b", IntegerType, nullable = true),
-        StructField("c", IntegerType, nullable = true),
-        StructField("d", IntegerType, nullable = true))))
-      assert(ret.resolved)
-      checkEvaluation(ret, InternalRow(123, null, null, null))
-    }
-    {
-      val ret = cast(struct, StructType(Seq(
-        StructField("a", IntegerType, nullable = true),
-        StructField("b", IntegerType, nullable = true),
-        StructField("c", IntegerType, nullable = false),
-        StructField("d", IntegerType, nullable = true))))
-      assert(ret.resolved === false)
-    }
     {
       val ret = cast(struct, StructType(Seq(
         StructField("a", BooleanType, nullable = true),
@@ -725,21 +525,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
       assert(ret.resolved === false)
     }
 
-    {
-      val ret = cast(struct_notNull, StructType(Seq(
-        StructField("a", IntegerType, nullable = true),
-        StructField("b", IntegerType, nullable = true),
-        StructField("c", IntegerType, nullable = true))))
-      assert(ret.resolved)
-      checkEvaluation(ret, InternalRow(123, null, null))
-    }
-    {
-      val ret = cast(struct_notNull, StructType(Seq(
-        StructField("a", IntegerType, nullable = true),
-        StructField("b", IntegerType, nullable = true),
-        StructField("c", IntegerType, nullable = false))))
-      assert(ret.resolved === false)
-    }
     {
       val ret = cast(struct_notNull, StructType(Seq(
         StructField("a", BooleanType, nullable = true),
@@ -810,43 +595,32 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     import org.apache.spark.unsafe.types.CalendarInterval
 
     checkEvaluation(Cast(Literal(""), CalendarIntervalType), null)
-    checkEvaluation(Cast(Literal("interval -3 month 7 hours"), CalendarIntervalType),
-      new CalendarInterval(-3, 7 * CalendarInterval.MICROS_PER_HOUR))
+    checkEvaluation(Cast(Literal("interval -3 month 1 day 7 hours"), CalendarIntervalType),
+      new CalendarInterval(-3, 1, 7 * MICROS_PER_HOUR))
     checkEvaluation(Cast(Literal.create(
-      new CalendarInterval(15, -3 * CalendarInterval.MICROS_PER_DAY), CalendarIntervalType),
+      new CalendarInterval(15, 9, -3 * MICROS_PER_HOUR), CalendarIntervalType),
       StringType),
-      "interval 1 years 3 months -3 days")
+      "1 years 3 months 9 days -3 hours")
+    checkEvaluation(Cast(Literal("INTERVAL 1 Second 1 microsecond"), CalendarIntervalType),
+      new CalendarInterval(0, 0, 1000001))
+    checkEvaluation(Cast(Literal("1 MONTH 1 Microsecond"), CalendarIntervalType),
+      new CalendarInterval(1, 0, 1))
   }
 
   test("cast string to boolean") {
-    checkCast("true", true)
-    checkCast("tru", true)
-    checkCast("tr", true)
     checkCast("t", true)
+    checkCast("true", true)
     checkCast("tRUe", true)
-    checkCast("    tRue   ", true)
-    checkCast("    tRu   ", true)
-    checkCast("yes", true)
-    checkCast("ye", true)
     checkCast("y", true)
+    checkCast("yes", true)
     checkCast("1", true)
-    checkCast("on", true)
-
-    checkCast("false", false)
-    checkCast("fals", false)
-    checkCast("fal", false)
-    checkCast("fa", false)
     checkCast("f", false)
-    checkCast("    fAlse    ", false)
-    checkCast("    fAls    ", false)
-    checkCast("    FAlsE    ", false)
-    checkCast("no", false)
+    checkCast("false", false)
+    checkCast("FAlsE", false)
     checkCast("n", false)
+    checkCast("no", false)
     checkCast("0", false)
-    checkCast("off", false)
-    checkCast("of", false)
 
-    checkEvaluation(cast("o", BooleanType), null)
     checkEvaluation(cast("abc", BooleanType), null)
     checkEvaluation(cast("", BooleanType), null)
   }
@@ -1043,24 +817,17 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("SPARK-28470: Cast should honor nullOnOverflow property") {
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "true") {
-      checkEvaluation(Cast(Literal("134.12"), DecimalType(3, 2)), null)
-      checkEvaluation(
-        Cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)), null)
-      checkEvaluation(Cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), null)
-      checkEvaluation(Cast(Literal(134.12), DecimalType(3, 2)), null)
-    }
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "false") {
+  test("Throw exception on casting out-of-range value to decimal type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       checkExceptionInExpression[ArithmeticException](
-        Cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
+        cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
       checkExceptionInExpression[ArithmeticException](
-        Cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)),
+        cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)),
         "cannot be represented")
       checkExceptionInExpression[ArithmeticException](
-        Cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
+        cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
       checkExceptionInExpression[ArithmeticException](
-        Cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
+        cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
     }
   }
 
@@ -1083,11 +850,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     Seq("nan", "nAn", " nan ").foreach { value =>
       checkEvaluation(cast(value, DoubleType), Double.NaN)
     }
-
-    // Invalid literals when casted to double and float results in null.
-    Seq(DoubleType, FloatType).foreach { dataType =>
-      checkEvaluation(cast("badvalue", dataType), null)
-    }
   }
 
   private def testIntMaxAndMin(dt: DataType): Unit = {
@@ -1116,8 +878,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Cast to byte with option FAIL_ON_INTEGER_OVERFLOW enabled") {
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+  test("Throw exception on casting out-of-range value to byte type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       testIntMaxAndMin(ByteType)
       Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
         checkExceptionInExpression[ArithmeticException](cast(value, ByteType), "overflow")
@@ -1141,8 +903,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Cast to short with option FAIL_ON_INTEGER_OVERFLOW enabled") {
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+  test("Throw exception on casting out-of-range value to short type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       testIntMaxAndMin(ShortType)
       Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
         checkExceptionInExpression[ArithmeticException](cast(value, ShortType), "overflow")
@@ -1166,8 +928,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Cast to int with option FAIL_ON_INTEGER_OVERFLOW enabled") {
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+  test("Throw exception on casting out-of-range value to int type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       testIntMaxAndMin(IntegerType)
       testLongMaxAndMin(IntegerType)
 
@@ -1183,8 +945,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Cast to long with option FAIL_ON_INTEGER_OVERFLOW enabled") {
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+  test("Throw exception on casting out-of-range value to long type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       testLongMaxAndMin(LongType)
 
       Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
@@ -1201,3 +963,340 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 }
+
+/**
+ * Test suite for data type casting expression [[Cast]].
+ */
+class CastSuite extends CastSuiteBase {
+  // It is required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
+  override protected def requiredAnsiEnabledForOverflowTestCases: Boolean = true
+
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => Cast(lit, targetType, timeZoneId)
+      case _ => Cast(Literal(v), targetType, timeZoneId)
+    }
+  }
+
+  test("cast from int") {
+    checkCast(0, false)
+    checkCast(1, true)
+    checkCast(-5, true)
+    checkCast(1, 1.toByte)
+    checkCast(1, 1.toShort)
+    checkCast(1, 1)
+    checkCast(1, 1.toLong)
+    checkCast(1, 1.0f)
+    checkCast(1, 1.0)
+    checkCast(123, "123")
+
+    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 1)), null)
+    checkEvaluation(cast(123, DecimalType(2, 0)), null)
+  }
+
+  test("cast from long") {
+    checkCast(0L, false)
+    checkCast(1L, true)
+    checkCast(-5L, true)
+    checkCast(1L, 1.toByte)
+    checkCast(1L, 1.toShort)
+    checkCast(1L, 1)
+    checkCast(1L, 1.toLong)
+    checkCast(1L, 1.0f)
+    checkCast(1L, 1.0)
+    checkCast(123L, "123")
+
+    checkEvaluation(cast(123L, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123L, DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
+
+    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
+  }
+
+  test("cast from int 2") {
+    checkEvaluation(cast(1, LongType), 1.toLong)
+    checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
+    checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
+
+    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 1)), null)
+    checkEvaluation(cast(123, DecimalType(2, 0)), null)
+  }
+
+  test("casting to fixed-precision decimals") {
+    assert(cast(123, DecimalType.USER_DEFAULT).nullable === false)
+    assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable)
+    assert(cast(10.03, DecimalType.SYSTEM_DEFAULT).nullable)
+    assert(cast(Decimal(10.03), DecimalType.SYSTEM_DEFAULT).nullable === false)
+
+    assert(cast(123, DecimalType(2, 1)).nullable)
+    assert(cast(10.03f, DecimalType(2, 1)).nullable)
+    assert(cast(10.03, DecimalType(2, 1)).nullable)
+    assert(cast(Decimal(10.03), DecimalType(2, 1)).nullable)
+
+    assert(cast(123, DecimalType.IntDecimal).nullable === false)
+    assert(cast(10.03f, DecimalType.FloatDecimal).nullable)
+    assert(cast(10.03, DecimalType.DoubleDecimal).nullable)
+    assert(cast(Decimal(10.03), DecimalType(4, 2)).nullable === false)
+    assert(cast(Decimal(10.03), DecimalType(5, 3)).nullable === false)
+
+    assert(cast(Decimal(10.03), DecimalType(3, 1)).nullable)
+    assert(cast(Decimal(10.03), DecimalType(4, 1)).nullable === false)
+    assert(cast(Decimal(9.95), DecimalType(2, 1)).nullable)
+    assert(cast(Decimal(9.95), DecimalType(3, 1)).nullable === false)
+
+    assert(cast(true, DecimalType.SYSTEM_DEFAULT).nullable === false)
+    assert(cast(true, DecimalType(1, 1)).nullable)
+
+    checkEvaluation(cast(10.03, DecimalType.SYSTEM_DEFAULT), Decimal(10.03))
+    checkEvaluation(cast(10.03, DecimalType(4, 2)), Decimal(10.03))
+    checkEvaluation(cast(10.03, DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(10.03, DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(cast(10.03, DecimalType(1, 0)), null)
+    checkEvaluation(cast(10.03, DecimalType(2, 1)), null)
+    checkEvaluation(cast(10.03, DecimalType(3, 2)), null)
+    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 2)), null)
+
+    checkEvaluation(cast(10.05, DecimalType.SYSTEM_DEFAULT), Decimal(10.05))
+    checkEvaluation(cast(10.05, DecimalType(4, 2)), Decimal(10.05))
+    checkEvaluation(cast(10.05, DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(cast(10.05, DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(cast(10.05, DecimalType(1, 0)), null)
+    checkEvaluation(cast(10.05, DecimalType(2, 1)), null)
+    checkEvaluation(cast(10.05, DecimalType(3, 2)), null)
+    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 2)), null)
+
+    checkEvaluation(cast(9.95, DecimalType(3, 2)), Decimal(9.95))
+    checkEvaluation(cast(9.95, DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(9.95, DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(cast(9.95, DecimalType(2, 1)), null)
+    checkEvaluation(cast(9.95, DecimalType(1, 0)), null)
+    checkEvaluation(cast(Decimal(9.95), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(Decimal(9.95), DecimalType(1, 0)), null)
+
+    checkEvaluation(cast(-9.95, DecimalType(3, 2)), Decimal(-9.95))
+    checkEvaluation(cast(-9.95, DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(cast(-9.95, DecimalType(2, 0)), Decimal(-10))
+    checkEvaluation(cast(-9.95, DecimalType(2, 1)), null)
+    checkEvaluation(cast(-9.95, DecimalType(1, 0)), null)
+    checkEvaluation(cast(Decimal(-9.95), DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(cast(Decimal(-9.95), DecimalType(1, 0)), null)
+
+    checkEvaluation(cast(Decimal("1003"), DecimalType.SYSTEM_DEFAULT), Decimal(1003))
+    checkEvaluation(cast(Decimal("1003"), DecimalType(4, 0)), Decimal(1003))
+    checkEvaluation(cast(Decimal("1003"), DecimalType(3, 0)), null)
+
+    checkEvaluation(cast(Decimal("995"), DecimalType(3, 0)), Decimal(995))
+
+    checkEvaluation(cast(Double.NaN, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(1.0 / 0.0, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(Float.NaN, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(1.0f / 0.0f, DecimalType.SYSTEM_DEFAULT), null)
+
+    checkEvaluation(cast(Double.NaN, DecimalType(2, 1)), null)
+    checkEvaluation(cast(1.0 / 0.0, DecimalType(2, 1)), null)
+    checkEvaluation(cast(Float.NaN, DecimalType(2, 1)), null)
+    checkEvaluation(cast(1.0f / 0.0f, DecimalType(2, 1)), null)
+
+    checkEvaluation(cast(true, DecimalType(2, 1)), Decimal(1))
+    checkEvaluation(cast(true, DecimalType(1, 1)), null)
+
+    withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
+      assert(cast(Decimal("1003"), DecimalType(3, -1)).nullable)
+      assert(cast(Decimal("1003"), DecimalType(4, -1)).nullable === false)
+      assert(cast(Decimal("995"), DecimalType(2, -1)).nullable)
+      assert(cast(Decimal("995"), DecimalType(3, -1)).nullable === false)
+
+      checkEvaluation(cast(Decimal("1003"), DecimalType(3, -1)), Decimal(1000))
+      checkEvaluation(cast(Decimal("1003"), DecimalType(2, -2)), Decimal(1000))
+      checkEvaluation(cast(Decimal("1003"), DecimalType(1, -2)), null)
+      checkEvaluation(cast(Decimal("1003"), DecimalType(2, -1)), null)
+
+      checkEvaluation(cast(Decimal("995"), DecimalType(3, -1)), Decimal(1000))
+      checkEvaluation(cast(Decimal("995"), DecimalType(2, -2)), Decimal(1000))
+      checkEvaluation(cast(Decimal("995"), DecimalType(2, -1)), null)
+      checkEvaluation(cast(Decimal("995"), DecimalType(1, -2)), null)
+    }
+  }
+
+  test("SPARK-28470: Cast should honor nullOnOverflow property") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(Cast(Literal("134.12"), DecimalType(3, 2)), null)
+      checkEvaluation(
+        Cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)), null)
+      checkEvaluation(Cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), null)
+      checkEvaluation(Cast(Literal(134.12), DecimalType(3, 2)), null)
+    }
+  }
+
+  test("collect_list/collect_set can cast to ArrayType not containsNull") {
+    val list = CollectList(Literal(1))
+    assert(Cast.canCast(list.dataType, ArrayType(IntegerType, false)))
+    val set = CollectSet(Literal(1))
+    assert(Cast.canCast(set.dataType, ArrayType(StringType, false)))
+  }
+
+  test("Cast should output null for invalid strings when ANSI is not enabled.") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(cast("abdef", DecimalType.USER_DEFAULT), null)
+      checkEvaluation(cast("2012-12-11", DoubleType), null)
+
+      // cast to array
+      val array = Literal.create(Seq("123", "true", "f", null),
+        ArrayType(StringType, containsNull = true))
+      val array_notNull = Literal.create(Seq("123", "true", "f"),
+        ArrayType(StringType, containsNull = false))
+
+      {
+        val ret = cast(array, ArrayType(IntegerType, containsNull = true))
+        assert(ret.resolved)
+        checkEvaluation(ret, Seq(123, null, null, null))
+      }
+      {
+        val ret = cast(array, ArrayType(IntegerType, containsNull = false))
+        assert(ret.resolved === false)
+      }
+      {
+        val ret = cast(array_notNull, ArrayType(IntegerType, containsNull = true))
+        assert(ret.resolved)
+        checkEvaluation(ret, Seq(123, null, null))
+      }
+      {
+        val ret = cast(array_notNull, ArrayType(IntegerType, containsNull = false))
+        assert(ret.resolved === false)
+      }
+
+      // cast from map
+      val map = Literal.create(
+        Map("a" -> "123", "b" -> "true", "c" -> "f", "d" -> null),
+        MapType(StringType, StringType, valueContainsNull = true))
+      val map_notNull = Literal.create(
+        Map("a" -> "123", "b" -> "true", "c" -> "f"),
+        MapType(StringType, StringType, valueContainsNull = false))
+
+      {
+        val ret = cast(map, MapType(StringType, IntegerType, valueContainsNull = true))
+        assert(ret.resolved)
+        checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null, "d" -> null))
+      }
+      {
+        val ret = cast(map, MapType(StringType, IntegerType, valueContainsNull = false))
+        assert(ret.resolved === false)
+      }
+      {
+        val ret = cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = true))
+        assert(ret.resolved)
+        checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null))
+      }
+      {
+        val ret = cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = false))
+        assert(ret.resolved === false)
+      }
+
+      // cast from struct
+      val struct = Literal.create(
+        InternalRow(
+          UTF8String.fromString("123"),
+          UTF8String.fromString("true"),
+          UTF8String.fromString("f"),
+          null),
+        StructType(Seq(
+          StructField("a", StringType, nullable = true),
+          StructField("b", StringType, nullable = true),
+          StructField("c", StringType, nullable = true),
+          StructField("d", StringType, nullable = true))))
+      val struct_notNull = Literal.create(
+        InternalRow(
+          UTF8String.fromString("123"),
+          UTF8String.fromString("true"),
+          UTF8String.fromString("f")),
+        StructType(Seq(
+          StructField("a", StringType, nullable = false),
+          StructField("b", StringType, nullable = false),
+          StructField("c", StringType, nullable = false))))
+
+      {
+        val ret = cast(struct, StructType(Seq(
+          StructField("a", IntegerType, nullable = true),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = true),
+          StructField("d", IntegerType, nullable = true))))
+        assert(ret.resolved)
+        checkEvaluation(ret, InternalRow(123, null, null, null))
+      }
+      {
+        val ret = cast(struct, StructType(Seq(
+          StructField("a", IntegerType, nullable = true),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = false),
+          StructField("d", IntegerType, nullable = true))))
+        assert(ret.resolved === false)
+      }
+      {
+        val ret = cast(struct_notNull, StructType(Seq(
+          StructField("a", IntegerType, nullable = true),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = true))))
+        assert(ret.resolved)
+        checkEvaluation(ret, InternalRow(123, null, null))
+      }
+      {
+        val ret = cast(struct_notNull, StructType(Seq(
+          StructField("a", IntegerType, nullable = true),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = false))))
+        assert(ret.resolved === false)
+      }
+
+      // Invalid literals when casted to double and float results in null.
+      Seq(DoubleType, FloatType).foreach { dataType =>
+        checkEvaluation(cast("badvalue", dataType), null)
+      }
+    }
+  }
+}
+
+/**
+ * Test suite for data type casting expression [[AnsiCast]].
+ */
+class AnsiCastSuite extends CastSuiteBase {
+  // It is not required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
+  override protected def requiredAnsiEnabledForOverflowTestCases: Boolean = false
+
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
+      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
+    }
+  }
+
+  test("cast from invalid string to numeric should throw NumberFormatException") {
+    // cast to IntegerType
+    Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
+      val array = Literal.create(Seq("123", "true", "f", null),
+        ArrayType(StringType, containsNull = true))
+      checkExceptionInExpression[NumberFormatException](
+        cast(array, ArrayType(dataType, containsNull = true)), "invalid input")
+      checkExceptionInExpression[NumberFormatException](
+        cast("string", dataType), "invalid input")
+      checkExceptionInExpression[NumberFormatException](
+        cast("123-string", dataType), "invalid input")
+      checkExceptionInExpression[NumberFormatException](
+        cast("2020-07-19", dataType), "invalid input")
+    }
+
+    Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>
+      checkExceptionInExpression[NumberFormatException](
+        cast("string", dataType), "invalid input")
+      checkExceptionInExpression[NumberFormatException](
+        cast("123.000.00", dataType), "invalid input")
+      checkExceptionInExpression[NumberFormatException](
+        cast("abc.com", dataType), "invalid input")
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 4e64313da136b..b7628a373d0bd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -19,9 +19,6 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
 
-import org.apache.log4j.AppenderSkeleton
-import org.apache.log4j.spi.LoggingEvent
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.metrics.source.CodegenMetrics
 import org.apache.spark.sql.Row
@@ -31,7 +28,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ThreadUtils
@@ -190,42 +186,36 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-17702: split wide constructor into blocks due to JVM code size limit") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      val length = 5000
-      val expressions = Seq.fill(length) {
-        ToUTCTimestamp(
-          Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
-          Literal.create("PST", StringType))
-      }
-      val plan = GenerateMutableProjection.generate(expressions)
-      val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
-      val expected = Seq.fill(length)(
-        DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
-
-      if (actual != expected) {
-        fail(
-          s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
-      }
+    val length = 5000
+    val expressions = Seq.fill(length) {
+      ToUTCTimestamp(
+        Literal.create(Timestamp.valueOf("2015-07-24 00:00:00"), TimestampType),
+        Literal.create("PST", StringType))
+    }
+    val plan = GenerateMutableProjection.generate(expressions)
+    val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
+    val expected = Seq.fill(length)(
+      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2015-07-24 07:00:00")))
+
+    if (actual != expected) {
+      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
     }
   }
 
   test("SPARK-22226: group splitted expressions into one method per nested class") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      val length = 10000
-      val expressions = Seq.fill(length) {
-        ToUTCTimestamp(
-          Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
-          Literal.create("PST", StringType))
-      }
-      val plan = GenerateMutableProjection.generate(expressions)
-      val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
-      val expected = Seq.fill(length)(
-        DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
-
-      if (actual != expected) {
-        fail(
-          s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
-      }
+    val length = 10000
+    val expressions = Seq.fill(length) {
+      ToUTCTimestamp(
+        Literal.create(Timestamp.valueOf("2017-10-10 00:00:00"), TimestampType),
+        Literal.create("PST", StringType))
+    }
+    val plan = GenerateMutableProjection.generate(expressions)
+    val actual = plan(new GenericInternalRow(length)).toSeq(expressions.map(_.dataType))
+    val expected = Seq.fill(length)(
+      DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2017-10-10 07:00:00")))
+
+    if (actual != expected) {
+      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, expected: $expected")
     }
   }
 
@@ -522,20 +512,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-25113: should log when there exists generated methods above HugeMethodLimit") {
-    class MockAppender extends AppenderSkeleton {
-      var seenMessage = false
-
-      override def append(loggingEvent: LoggingEvent): Unit = {
-        if (loggingEvent.getRenderedMessage().contains("Generated method too long")) {
-          seenMessage = true
-        }
-      }
-
-      override def close(): Unit = {}
-      override def requiresLayout(): Boolean = false
-    }
-
-    val appender = new MockAppender()
+    val appender = new LogAppender("huge method limit")
     withLogAppender(appender, loggerName = Some(classOf[CodeGenerator[_, _]].getName)) {
       val x = 42
       val expr = HugeCodeIntExpression(x)
@@ -543,7 +520,20 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
       val actual = proj(null)
       assert(actual.getInt(0) == x)
     }
-    assert(appender.seenMessage)
+    assert(appender.loggingEvents
+      .exists(_.getRenderedMessage().contains("Generated method too long")))
+  }
+
+  test("SPARK-28916: subexrepssion elimination can cause 64kb code limit on UnsafeProjection") {
+    val numOfExprs = 10000
+    val exprs = (0 to numOfExprs).flatMap(colIndex =>
+      Seq(Add(BoundReference(colIndex, DoubleType, true),
+        BoundReference(numOfExprs + colIndex, DoubleType, true)),
+        Add(BoundReference(colIndex, DoubleType, true),
+          BoundReference(numOfExprs + colIndex, DoubleType, true))))
+    // these should not fail to compile due to 64K limit
+    GenerateUnsafeProjection.generate(exprs, true)
+    GenerateMutableProjection.generate(exprs, true)
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 603073b40d7aa..9e98e146c7a0e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -20,20 +20,25 @@ package org.apache.spark.sql.catalyst.expressions
 import java.sql.{Date, Timestamp}
 import java.util.TimeZone
 
+import scala.language.implicitConversions
 import scala.util.Random
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
+import org.apache.spark.sql.catalyst.util.IntervalUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.UTF8String
 
 class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
+  implicit def stringToUTF8Str(str: String): UTF8String = UTF8String.fromString(str)
+
   def testSize(sizeOfNull: Any): Unit = {
     val a0 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType))
     val a1 = Literal.create(Seq[Integer](), ArrayType(IntegerType))
@@ -363,16 +368,6 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val arrayArrayStruct = Literal.create(Seq(aas2, aas1), typeAAS)
 
     checkEvaluation(new SortArray(arrayArrayStruct), Seq(aas1, aas2))
-
-    checkEvaluation(ArraySort(a0), Seq(1, 2, 3))
-    checkEvaluation(ArraySort(a1), Seq[Integer]())
-    checkEvaluation(ArraySort(a2), Seq("a", "b"))
-    checkEvaluation(ArraySort(a3), Seq("a", "b", null))
-    checkEvaluation(ArraySort(a4), Seq(d1, d2))
-    checkEvaluation(ArraySort(a5), Seq(null, null))
-    checkEvaluation(ArraySort(arrayStruct), Seq(create_row(1), create_row(2)))
-    checkEvaluation(ArraySort(arrayArray), Seq(aa1, aa2))
-    checkEvaluation(ArraySort(arrayArrayStruct), Seq(aas1, aas2))
   }
 
   test("Array contains") {
@@ -720,7 +715,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-01-01 00:00:00")),
       Literal(Timestamp.valueOf("2018-01-02 00:00:00")),
-      Literal(CalendarInterval.fromString("interval 12 hours"))),
+      Literal(stringToInterval("interval 12 hours"))),
       Seq(
         Timestamp.valueOf("2018-01-01 00:00:00"),
         Timestamp.valueOf("2018-01-01 12:00:00"),
@@ -729,7 +724,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-01-01 00:00:00")),
       Literal(Timestamp.valueOf("2018-01-02 00:00:01")),
-      Literal(CalendarInterval.fromString("interval 12 hours"))),
+      Literal(stringToInterval("interval 12 hours"))),
       Seq(
         Timestamp.valueOf("2018-01-01 00:00:00"),
         Timestamp.valueOf("2018-01-01 12:00:00"),
@@ -738,7 +733,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-01-02 00:00:00")),
       Literal(Timestamp.valueOf("2018-01-01 00:00:00")),
-      Literal(CalendarInterval.fromString("interval 12 hours").negate())),
+      Literal(negateExact(stringToInterval("interval 12 hours")))),
       Seq(
         Timestamp.valueOf("2018-01-02 00:00:00"),
         Timestamp.valueOf("2018-01-01 12:00:00"),
@@ -747,7 +742,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-01-02 00:00:00")),
       Literal(Timestamp.valueOf("2017-12-31 23:59:59")),
-      Literal(CalendarInterval.fromString("interval 12 hours").negate())),
+      Literal(negateExact(stringToInterval("interval 12 hours")))),
       Seq(
         Timestamp.valueOf("2018-01-02 00:00:00"),
         Timestamp.valueOf("2018-01-01 12:00:00"),
@@ -756,7 +751,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-01-01 00:00:00")),
       Literal(Timestamp.valueOf("2018-03-01 00:00:00")),
-      Literal(CalendarInterval.fromString("interval 1 month"))),
+      Literal(stringToInterval("interval 1 month"))),
       Seq(
         Timestamp.valueOf("2018-01-01 00:00:00"),
         Timestamp.valueOf("2018-02-01 00:00:00"),
@@ -765,7 +760,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-03-01 00:00:00")),
       Literal(Timestamp.valueOf("2018-01-01 00:00:00")),
-      Literal(CalendarInterval.fromString("interval 1 month").negate())),
+      Literal(negateExact(stringToInterval("interval 1 month")))),
       Seq(
         Timestamp.valueOf("2018-03-01 00:00:00"),
         Timestamp.valueOf("2018-02-01 00:00:00"),
@@ -774,7 +769,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-03-03 00:00:00")),
       Literal(Timestamp.valueOf("2018-01-01 00:00:00")),
-      Literal(CalendarInterval.fromString("interval 1 month 1 day").negate())),
+      Literal(negateExact(stringToInterval("interval 1 month 1 day")))),
       Seq(
         Timestamp.valueOf("2018-03-03 00:00:00"),
         Timestamp.valueOf("2018-02-02 00:00:00"),
@@ -783,7 +778,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-01-31 00:00:00")),
       Literal(Timestamp.valueOf("2018-04-30 00:00:00")),
-      Literal(CalendarInterval.fromString("interval 1 month"))),
+      Literal(stringToInterval("interval 1 month"))),
       Seq(
         Timestamp.valueOf("2018-01-31 00:00:00"),
         Timestamp.valueOf("2018-02-28 00:00:00"),
@@ -793,7 +788,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-01-01 00:00:00")),
       Literal(Timestamp.valueOf("2018-03-01 00:00:00")),
-      Literal(CalendarInterval.fromString("interval 1 month 1 second"))),
+      Literal(stringToInterval("interval 1 month 1 second"))),
       Seq(
         Timestamp.valueOf("2018-01-01 00:00:00"),
         Timestamp.valueOf("2018-02-01 00:00:01")))
@@ -801,7 +796,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-01-01 00:00:00")),
       Literal(Timestamp.valueOf("2018-03-01 00:04:06")),
-      Literal(CalendarInterval.fromString("interval 1 month 2 minutes 3 seconds"))),
+      Literal(stringToInterval("interval 1 month 2 minutes 3 seconds"))),
       Seq(
         Timestamp.valueOf("2018-01-01 00:00:00"),
         Timestamp.valueOf("2018-02-01 00:02:03"),
@@ -810,7 +805,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2018-01-01 00:00:00")),
       Literal(Timestamp.valueOf("2023-01-01 00:00:00")),
-      Literal(CalendarInterval.fromYearMonthString("1-5"))),
+      Literal(fromYearMonthString("1-5"))),
       Seq(
         Timestamp.valueOf("2018-01-01 00:00:00.000"),
         Timestamp.valueOf("2019-06-01 00:00:00.000"),
@@ -820,7 +815,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(new Sequence(
       Literal(Timestamp.valueOf("2022-04-01 00:00:00")),
       Literal(Timestamp.valueOf("2017-01-01 00:00:00")),
-      Literal(CalendarInterval.fromYearMonthString("1-5").negate())),
+      Literal(negateExact(fromYearMonthString("1-5")))),
       Seq(
         Timestamp.valueOf("2022-04-01 00:00:00.000"),
         Timestamp.valueOf("2020-11-01 00:00:00.000"),
@@ -839,7 +834,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(new Sequence(
         Literal(Timestamp.valueOf("2018-03-25 01:30:00")),
         Literal(Timestamp.valueOf("2018-03-25 03:30:00")),
-        Literal(CalendarInterval.fromString("interval 30 minutes"))),
+        Literal(stringToInterval("interval 30 minutes"))),
         Seq(
           Timestamp.valueOf("2018-03-25 01:30:00"),
           Timestamp.valueOf("2018-03-25 03:00:00"),
@@ -849,7 +844,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(new Sequence(
         Literal(Timestamp.valueOf("2018-10-28 01:30:00")),
         Literal(Timestamp.valueOf("2018-10-28 03:30:00")),
-        Literal(CalendarInterval.fromString("interval 30 minutes"))),
+        Literal(stringToInterval("interval 30 minutes"))),
         Seq(
           Timestamp.valueOf("2018-10-28 01:30:00"),
           noDST(Timestamp.valueOf("2018-10-28 02:00:00")),
@@ -866,7 +861,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(new Sequence(
         Literal(Date.valueOf("2018-01-01")),
         Literal(Date.valueOf("2018-01-05")),
-        Literal(CalendarInterval.fromString("interval 2 days"))),
+        Literal(stringToInterval("interval 2 days"))),
         Seq(
           Date.valueOf("2018-01-01"),
           Date.valueOf("2018-01-03"),
@@ -875,7 +870,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(new Sequence(
         Literal(Date.valueOf("2018-01-01")),
         Literal(Date.valueOf("2018-03-01")),
-        Literal(CalendarInterval.fromString("interval 1 month"))),
+        Literal(stringToInterval("interval 1 month"))),
         Seq(
           Date.valueOf("2018-01-01"),
           Date.valueOf("2018-02-01"),
@@ -884,7 +879,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(new Sequence(
         Literal(Date.valueOf("2018-01-31")),
         Literal(Date.valueOf("2018-04-30")),
-        Literal(CalendarInterval.fromString("interval 1 month"))),
+        Literal(stringToInterval("interval 1 month"))),
         Seq(
           Date.valueOf("2018-01-31"),
           Date.valueOf("2018-02-28"),
@@ -894,7 +889,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(new Sequence(
         Literal(Date.valueOf("2018-01-01")),
         Literal(Date.valueOf("2023-01-01")),
-        Literal(CalendarInterval.fromYearMonthString("1-5"))),
+        Literal(fromYearMonthString("1-5"))),
         Seq(
           Date.valueOf("2018-01-01"),
           Date.valueOf("2019-06-01"),
@@ -905,16 +900,16 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
         new Sequence(
           Literal(Date.valueOf("1970-01-02")),
           Literal(Date.valueOf("1970-01-01")),
-          Literal(CalendarInterval.fromString("interval 1 day"))),
+          Literal(stringToInterval("interval 1 day"))),
         EmptyRow, "sequence boundaries: 1 to 0 by 1")
 
       checkExceptionInExpression[IllegalArgumentException](
         new Sequence(
           Literal(Date.valueOf("1970-01-01")),
           Literal(Date.valueOf("1970-02-01")),
-          Literal(CalendarInterval.fromString("interval 1 month").negate())),
+          Literal(negateExact(stringToInterval("interval 1 month")))),
         EmptyRow,
-        s"sequence boundaries: 0 to 2678400000000 by -${28 * CalendarInterval.MICROS_PER_DAY}")
+        s"sequence boundaries: 0 to 2678400000000 by -${28 * MICROS_PER_DAY}")
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 0c4438987cd2a..9039cd6451590 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -369,7 +369,6 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     val b = AttributeReference("b", IntegerType)()
     checkMetadata(CreateStruct(Seq(a, b)))
     checkMetadata(CreateNamedStruct(Seq("a", a, "b", b)))
-    checkMetadata(CreateNamedStructUnsafe(Seq("a", a, "b", b)))
   }
 
   test("StringToMap") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
index 51ba7c95b3146..87e34aca510f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
@@ -220,7 +220,8 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
 
   test("SPARK-22705: case when should use less global variables") {
     val ctx = new CodegenContext()
-    CaseWhen(Seq((Literal.create(false, BooleanType), Literal(1))), Literal(-1)).genCode(ctx)
+    CaseWhen(Seq((Literal.create(false, BooleanType), Literal(1)),
+      (Literal.create(false, BooleanType), Literal(2))), Literal(-1)).genCode(ctx)
     assert(ctx.inlinedMutableStates.size == 1)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
index 98c93a4946f4f..e623910e2efe1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.PlanTestBase
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with PlanTestBase {
   val badCsv = "\u0000\u0000\u0000A\u0001AAA"
@@ -237,4 +237,14 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P
         timeZoneId = gmtId),
       expectedErrMsg = "The field for corrupt records must be string type and nullable")
   }
+
+  test("from/to csv with intervals") {
+    val schema = new StructType().add("a", "interval")
+    checkEvaluation(
+      StructsToCsv(Map.empty, Literal.create(create_row(new CalendarInterval(1, 2, 3)), schema)),
+       "1 months 2 days 0.000003 seconds")
+    checkEvaluation(
+      CsvToStructs(schema, Map.empty, Literal.create("1 day")),
+      InternalRow(new CalendarInterval(0, 1, 0)))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index ae3549b4aaf5f..39b859af47ca9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -19,16 +19,16 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.{LocalDateTime, ZoneId, ZoneOffset}
+import java.time.{Instant, LocalDate, LocalDateTime, ZoneId, ZoneOffset}
 import java.util.{Calendar, Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 import java.util.concurrent.TimeUnit._
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
-import org.apache.spark.sql.catalyst.util.{DateTimeUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimeZoneGMT
 import org.apache.spark.sql.internal.SQLConf
@@ -56,9 +56,9 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   val ts = new Timestamp(toMillis(time))
 
   test("datetime function current_date") {
-    val d0 = DateTimeUtils.millisToDays(System.currentTimeMillis(), TimeZoneGMT)
+    val d0 = DateTimeUtils.millisToDays(System.currentTimeMillis(), ZoneOffset.UTC)
     val cd = CurrentDate(gmtId).eval(EmptyRow).asInstanceOf[Int]
-    val d1 = DateTimeUtils.millisToDays(System.currentTimeMillis(), TimeZoneGMT)
+    val d1 = DateTimeUtils.millisToDays(System.currentTimeMillis(), ZoneOffset.UTC)
     assert(d0 <= cd && cd <= d1 && d1 - d0 <= 1)
 
     val cdjst = CurrentDate(jstId).eval(EmptyRow).asInstanceOf[Int]
@@ -242,41 +242,45 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("DateFormat") {
-    checkEvaluation(
-      DateFormatClass(Literal.create(null, TimestampType), Literal("y"), gmtId),
-      null)
-    checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId),
-      Literal.create(null, StringType), gmtId), null)
-
-    checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId),
-      Literal("y"), gmtId), "2015")
-    checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), gmtId), "2013")
-    checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId),
-      Literal("H"), gmtId), "0")
-    checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), gmtId), "13")
-
-    checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, pstId),
-      Literal("y"), pstId), "2015")
-    checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), pstId), "2013")
-    checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, pstId),
-      Literal("H"), pstId), "0")
-    checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), pstId), "5")
-
-    checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, jstId),
-      Literal("y"), jstId), "2015")
-    checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), jstId), "2013")
-    checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, jstId),
-      Literal("H"), jstId), "0")
-    checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), jstId), "22")
-
-    // SPARK-28072 The codegen path should work
-    checkEvaluation(
-      expression = DateFormatClass(
-        BoundReference(ordinal = 0, dataType = TimestampType, nullable = true),
-        BoundReference(ordinal = 1, dataType = StringType, nullable = true),
-        jstId),
-      expected = "22",
-      inputRow = InternalRow(DateTimeUtils.fromJavaTimestamp(ts), UTF8String.fromString("H")))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        checkEvaluation(
+          DateFormatClass(Literal.create(null, TimestampType), Literal("y"), gmtId),
+          null)
+        checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId),
+          Literal.create(null, StringType), gmtId), null)
+
+        checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId),
+          Literal("y"), gmtId), "2015")
+        checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), gmtId), "2013")
+        checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, gmtId),
+          Literal("H"), gmtId), "0")
+        checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), gmtId), "13")
+
+        checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, pstId),
+          Literal("y"), pstId), "2015")
+        checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), pstId), "2013")
+        checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, pstId),
+          Literal("H"), pstId), "0")
+        checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), pstId), "5")
+
+        checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, jstId),
+          Literal("y"), jstId), "2015")
+        checkEvaluation(DateFormatClass(Literal(ts), Literal("y"), jstId), "2013")
+        checkEvaluation(DateFormatClass(Cast(Literal(d), TimestampType, jstId),
+          Literal("H"), jstId), "0")
+        checkEvaluation(DateFormatClass(Literal(ts), Literal("H"), jstId), "22")
+
+        // SPARK-28072 The codegen path should work
+        checkEvaluation(
+          expression = DateFormatClass(
+            BoundReference(ordinal = 0, dataType = TimestampType, nullable = true),
+            BoundReference(ordinal = 1, dataType = StringType, nullable = true),
+            jstId),
+          expected = "22",
+          inputRow = InternalRow(DateTimeUtils.fromJavaTimestamp(ts), UTF8String.fromString("H")))
+      }
+    }
   }
 
   test("Hour") {
@@ -331,6 +335,12 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("date_add") {
+    checkEvaluation(
+      DateAdd(Literal(Date.valueOf("2016-02-28")), Literal(1.toByte)),
+      DateTimeUtils.fromJavaDate(Date.valueOf("2016-02-29")))
+    checkEvaluation(
+      DateAdd(Literal(Date.valueOf("2016-02-28")), Literal(1.toShort)),
+      DateTimeUtils.fromJavaDate(Date.valueOf("2016-02-29")))
     checkEvaluation(
       DateAdd(Literal(Date.valueOf("2016-02-28")), Literal(1)),
       DateTimeUtils.fromJavaDate(Date.valueOf("2016-02-29")))
@@ -346,10 +356,18 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       DateAdd(Literal(Date.valueOf("2016-02-28")), positiveIntLit), 49627)
     checkEvaluation(
       DateAdd(Literal(Date.valueOf("2016-02-28")), negativeIntLit), -15910)
+    checkConsistencyBetweenInterpretedAndCodegen(DateAdd, DateType, ByteType)
+    checkConsistencyBetweenInterpretedAndCodegen(DateAdd, DateType, ShortType)
     checkConsistencyBetweenInterpretedAndCodegen(DateAdd, DateType, IntegerType)
   }
 
   test("date_sub") {
+    checkEvaluation(
+      DateSub(Literal(Date.valueOf("2015-01-01")), Literal(1.toByte)),
+      DateTimeUtils.fromJavaDate(Date.valueOf("2014-12-31")))
+    checkEvaluation(
+      DateSub(Literal(Date.valueOf("2015-01-01")), Literal(1.toShort)),
+      DateTimeUtils.fromJavaDate(Date.valueOf("2014-12-31")))
     checkEvaluation(
       DateSub(Literal(Date.valueOf("2015-01-01")), Literal(1)),
       DateTimeUtils.fromJavaDate(Date.valueOf("2014-12-31")))
@@ -365,6 +383,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       DateSub(Literal(Date.valueOf("2016-02-28")), positiveIntLit), -15909)
     checkEvaluation(
       DateSub(Literal(Date.valueOf("2016-02-28")), negativeIntLit), 49628)
+    checkConsistencyBetweenInterpretedAndCodegen(DateSub, DateType, ByteType)
+    checkConsistencyBetweenInterpretedAndCodegen(DateSub, DateType, ShortType)
     checkConsistencyBetweenInterpretedAndCodegen(DateSub, DateType, IntegerType)
   }
 
@@ -377,15 +397,15 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(
         TimeAdd(
           Literal(new Timestamp(sdf.parse("2016-01-29 10:00:00.000").getTime)),
-          Literal(new CalendarInterval(1, 123000L)),
+          Literal(new CalendarInterval(1, 2, 123000L)),
           timeZoneId),
         DateTimeUtils.fromJavaTimestamp(
-          new Timestamp(sdf.parse("2016-02-29 10:00:00.123").getTime)))
+          new Timestamp(sdf.parse("2016-03-02 10:00:00.123").getTime)))
 
       checkEvaluation(
         TimeAdd(
           Literal.create(null, TimestampType),
-          Literal(new CalendarInterval(1, 123000L)),
+          Literal(new CalendarInterval(1, 2, 123000L)),
           timeZoneId),
         null)
       checkEvaluation(
@@ -415,22 +435,36 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(
         TimeSub(
           Literal(new Timestamp(sdf.parse("2016-03-31 10:00:00.000").getTime)),
-          Literal(new CalendarInterval(1, 0)),
+          Literal(new CalendarInterval(1, 0, 0)),
           timeZoneId),
         DateTimeUtils.fromJavaTimestamp(
           new Timestamp(sdf.parse("2016-02-29 10:00:00.000").getTime)))
+      checkEvaluation(
+        TimeSub(
+          Literal(new Timestamp(sdf.parse("2016-03-31 10:00:00.000").getTime)),
+          Literal(new CalendarInterval(1, 1, 0)),
+          timeZoneId),
+        DateTimeUtils.fromJavaTimestamp(
+          new Timestamp(sdf.parse("2016-02-28 10:00:00.000").getTime)))
       checkEvaluation(
         TimeSub(
           Literal(new Timestamp(sdf.parse("2016-03-30 00:00:01.000").getTime)),
-          Literal(new CalendarInterval(1, 2000000.toLong)),
+          Literal(new CalendarInterval(1, 0, 2000000.toLong)),
           timeZoneId),
         DateTimeUtils.fromJavaTimestamp(
           new Timestamp(sdf.parse("2016-02-28 23:59:59.000").getTime)))
+      checkEvaluation(
+        TimeSub(
+          Literal(new Timestamp(sdf.parse("2016-03-30 00:00:01.000").getTime)),
+          Literal(new CalendarInterval(1, 1, 2000000.toLong)),
+          timeZoneId),
+        DateTimeUtils.fromJavaTimestamp(
+          new Timestamp(sdf.parse("2016-02-27 23:59:59.000").getTime)))
 
       checkEvaluation(
         TimeSub(
           Literal.create(null, TimestampType),
-          Literal(new CalendarInterval(1, 123000L)),
+          Literal(new CalendarInterval(1, 2, 123000L)),
           timeZoneId),
         null)
       checkEvaluation(
@@ -465,7 +499,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // Valid range of DateType is [0001-01-01, 9999-12-31]
     val maxMonthInterval = 10000 * 12
     checkEvaluation(
-      AddMonths(Literal(Date.valueOf("0001-01-01")), Literal(maxMonthInterval)), 2933261)
+      AddMonths(Literal(LocalDate.parse("0001-01-01")), Literal(maxMonthInterval)),
+      LocalDate.of(10001, 1, 1).toEpochDay.toInt)
     checkEvaluation(
       AddMonths(Literal(Date.valueOf("9999-12-31")), Literal(-1 * maxMonthInterval)), -719529)
     // Test evaluation results between Interpreted mode and Codegen mode
@@ -676,162 +711,189 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("from_unixtime") {
-    val fmt1 = "yyyy-MM-dd HH:mm:ss"
-    val sdf1 = new SimpleDateFormat(fmt1, Locale.US)
-    val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
-    val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
-    for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) {
-      val timeZoneId = Option(tz.getID)
-      sdf1.setTimeZone(tz)
-      sdf2.setTimeZone(tz)
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val fmt1 = "yyyy-MM-dd HH:mm:ss"
+        val sdf1 = new SimpleDateFormat(fmt1, Locale.US)
+        val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
+        val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
+        for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) {
+          val timeZoneId = Option(tz.getID)
+          sdf1.setTimeZone(tz)
+          sdf2.setTimeZone(tz)
 
-      checkEvaluation(
-        FromUnixTime(Literal(0L), Literal(fmt1), timeZoneId),
-        sdf1.format(new Timestamp(0)))
-      checkEvaluation(FromUnixTime(
-        Literal(1000L), Literal(fmt1), timeZoneId),
-        sdf1.format(new Timestamp(1000000)))
-      checkEvaluation(
-        FromUnixTime(Literal(-1000L), Literal(fmt2), timeZoneId),
-        sdf2.format(new Timestamp(-1000000)))
-      checkEvaluation(
-        FromUnixTime(Literal.create(null, LongType), Literal.create(null, StringType), timeZoneId),
-        null)
-      checkEvaluation(
-        FromUnixTime(Literal.create(null, LongType), Literal(fmt1), timeZoneId),
-        null)
-      checkEvaluation(
-        FromUnixTime(Literal(1000L), Literal.create(null, StringType), timeZoneId),
-        null)
-      checkEvaluation(
-        FromUnixTime(Literal(0L), Literal("not a valid format"), timeZoneId), null)
+          checkEvaluation(
+            FromUnixTime(Literal(0L), Literal(fmt1), timeZoneId),
+            sdf1.format(new Timestamp(0)))
+          checkEvaluation(FromUnixTime(
+            Literal(1000L), Literal(fmt1), timeZoneId),
+            sdf1.format(new Timestamp(1000000)))
+          checkEvaluation(
+            FromUnixTime(Literal(-1000L), Literal(fmt2), timeZoneId),
+            sdf2.format(new Timestamp(-1000000)))
+          checkEvaluation(
+            FromUnixTime(
+              Literal.create(null, LongType),
+              Literal.create(null, StringType), timeZoneId),
+            null)
+          checkEvaluation(
+            FromUnixTime(Literal.create(null, LongType), Literal(fmt1), timeZoneId),
+            null)
+          checkEvaluation(
+            FromUnixTime(Literal(1000L), Literal.create(null, StringType), timeZoneId),
+            null)
+          checkEvaluation(
+            FromUnixTime(Literal(0L), Literal("not a valid format"), timeZoneId), null)
 
-      // SPARK-28072 The codegen path for non-literal input should also work
-      checkEvaluation(
-        expression = FromUnixTime(
-          BoundReference(ordinal = 0, dataType = LongType, nullable = true),
-          BoundReference(ordinal = 1, dataType = StringType, nullable = true),
-          timeZoneId),
-        expected = UTF8String.fromString(sdf1.format(new Timestamp(0))),
-        inputRow = InternalRow(0L, UTF8String.fromString(fmt1)))
+          // SPARK-28072 The codegen path for non-literal input should also work
+          checkEvaluation(
+            expression = FromUnixTime(
+              BoundReference(ordinal = 0, dataType = LongType, nullable = true),
+              BoundReference(ordinal = 1, dataType = StringType, nullable = true),
+              timeZoneId),
+            expected = UTF8String.fromString(sdf1.format(new Timestamp(0))),
+            inputRow = InternalRow(0L, UTF8String.fromString(fmt1)))
+        }
+      }
     }
   }
 
   test("unix_timestamp") {
-    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
-    val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
-    val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
-    val fmt3 = "yy-MM-dd"
-    val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
-    sdf3.setTimeZone(TimeZoneGMT)
-
-    withDefaultTimeZone(TimeZoneGMT) {
-      for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) {
-        val timeZoneId = Option(tz.getID)
-        sdf1.setTimeZone(tz)
-        sdf2.setTimeZone(tz)
-
-        val date1 = Date.valueOf("2015-07-24")
-        checkEvaluation(UnixTimestamp(
-          Literal(sdf1.format(new Timestamp(0))), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), 0L)
-        checkEvaluation(UnixTimestamp(
-          Literal(sdf1.format(new Timestamp(1000000))), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
-          1000L)
-        checkEvaluation(
-          UnixTimestamp(
-            Literal(new Timestamp(1000000)), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
-          1000L)
-        checkEvaluation(
-          UnixTimestamp(Literal(date1), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
-          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
-        checkEvaluation(
-          UnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2), timeZoneId),
-          -1000L)
-        checkEvaluation(UnixTimestamp(
-          Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId),
-          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(
-            DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz)))
-        val t1 = UnixTimestamp(
-          CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
-        val t2 = UnixTimestamp(
-          CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
-        assert(t2 - t1 <= 1)
-        checkEvaluation(
-          UnixTimestamp(
-            Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId),
-          null)
-        checkEvaluation(
-          UnixTimestamp(Literal.create(null, DateType), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
-          null)
-        checkEvaluation(
-          UnixTimestamp(Literal(date1), Literal.create(null, StringType), timeZoneId),
-          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
-        checkEvaluation(
-          UnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null)
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
+        val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
+        val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
+        val fmt3 = "yy-MM-dd"
+        val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
+        sdf3.setTimeZone(TimeZoneGMT)
+
+        withDefaultTimeZone(TimeZoneGMT) {
+          for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) {
+            val timeZoneId = Option(tz.getID)
+            sdf1.setTimeZone(tz)
+            sdf2.setTimeZone(tz)
+
+            val date1 = Date.valueOf("2015-07-24")
+            checkEvaluation(UnixTimestamp(
+              Literal(sdf1.format(new Timestamp(0))),
+              Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId), 0L)
+            checkEvaluation(UnixTimestamp(
+              Literal(sdf1.format(new Timestamp(1000000))),
+              Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+              1000L)
+            checkEvaluation(
+              UnixTimestamp(
+                Literal(new Timestamp(1000000)), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+              1000L)
+            checkEvaluation(
+              UnixTimestamp(Literal(date1), Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+              MILLISECONDS.toSeconds(
+                DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz.toZoneId)))
+            checkEvaluation(
+              UnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))),
+                Literal(fmt2), timeZoneId),
+              -1000L)
+            checkEvaluation(UnixTimestamp(
+              Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId),
+              MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(
+                DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz.toZoneId)))
+            val t1 = UnixTimestamp(
+              CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
+            val t2 = UnixTimestamp(
+              CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
+            assert(t2 - t1 <= 1)
+            checkEvaluation(
+              UnixTimestamp(
+                Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId),
+              null)
+            checkEvaluation(
+              UnixTimestamp(
+                Literal.create(null, DateType),
+                Literal("yyyy-MM-dd HH:mm:ss"), timeZoneId),
+              null)
+            checkEvaluation(
+              UnixTimestamp(Literal(date1), Literal.create(null, StringType), timeZoneId),
+              MILLISECONDS.toSeconds(
+                DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz.toZoneId)))
+            checkEvaluation(
+              UnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null)
+          }
+        }
       }
     }
   }
 
   test("to_unix_timestamp") {
-    val fmt1 = "yyyy-MM-dd HH:mm:ss"
-    val sdf1 = new SimpleDateFormat(fmt1, Locale.US)
-    val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
-    val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
-    val fmt3 = "yy-MM-dd"
-    val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
-    sdf3.setTimeZone(TimeZoneGMT)
-
-    withDefaultTimeZone(TimeZoneGMT) {
-      for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) {
-        val timeZoneId = Option(tz.getID)
-        sdf1.setTimeZone(tz)
-        sdf2.setTimeZone(tz)
-
-        val date1 = Date.valueOf("2015-07-24")
-        checkEvaluation(ToUnixTimestamp(
-          Literal(sdf1.format(new Timestamp(0))), Literal(fmt1), timeZoneId), 0L)
-        checkEvaluation(ToUnixTimestamp(
-          Literal(sdf1.format(new Timestamp(1000000))), Literal(fmt1), timeZoneId),
-          1000L)
-        checkEvaluation(ToUnixTimestamp(
-          Literal(new Timestamp(1000000)), Literal(fmt1)),
-          1000L)
-        checkEvaluation(
-          ToUnixTimestamp(Literal(date1), Literal(fmt1), timeZoneId),
-          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
-        checkEvaluation(
-          ToUnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2), timeZoneId),
-          -1000L)
-        checkEvaluation(ToUnixTimestamp(
-          Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId),
-          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(
-            DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz)))
-        val t1 = ToUnixTimestamp(
-          CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long]
-        val t2 = ToUnixTimestamp(
-          CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long]
-        assert(t2 - t1 <= 1)
-        checkEvaluation(ToUnixTimestamp(
-          Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId), null)
-        checkEvaluation(
-          ToUnixTimestamp(
-            Literal.create(null, DateType), Literal(fmt1), timeZoneId),
-          null)
-        checkEvaluation(ToUnixTimestamp(
-          Literal(date1), Literal.create(null, StringType), timeZoneId),
-          MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz)))
-        checkEvaluation(
-          ToUnixTimestamp(Literal("2015-07-24"), Literal("not a valid format"), timeZoneId), null)
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val fmt1 = "yyyy-MM-dd HH:mm:ss"
+        val sdf1 = new SimpleDateFormat(fmt1, Locale.US)
+        val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
+        val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
+        val fmt3 = "yy-MM-dd"
+        val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
+        sdf3.setTimeZone(TimeZoneGMT)
+
+        withDefaultTimeZone(TimeZoneGMT) {
+          for (tz <- Seq(TimeZoneGMT, TimeZonePST, TimeZoneJST)) {
+            val timeZoneId = Option(tz.getID)
+            sdf1.setTimeZone(tz)
+            sdf2.setTimeZone(tz)
+
+            val date1 = Date.valueOf("2015-07-24")
+            checkEvaluation(ToUnixTimestamp(
+              Literal(sdf1.format(new Timestamp(0))), Literal(fmt1), timeZoneId), 0L)
+            checkEvaluation(ToUnixTimestamp(
+              Literal(sdf1.format(new Timestamp(1000000))), Literal(fmt1), timeZoneId),
+              1000L)
+            checkEvaluation(ToUnixTimestamp(
+              Literal(new Timestamp(1000000)), Literal(fmt1)),
+              1000L)
+            checkEvaluation(
+              ToUnixTimestamp(Literal(date1), Literal(fmt1), timeZoneId),
+              MILLISECONDS.toSeconds(
+                DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz.toZoneId)))
+            checkEvaluation(
+              ToUnixTimestamp(
+                Literal(sdf2.format(new Timestamp(-1000000))),
+                Literal(fmt2), timeZoneId),
+              -1000L)
+            checkEvaluation(ToUnixTimestamp(
+              Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3), timeZoneId),
+              MILLISECONDS.toSeconds(DateTimeUtils.daysToMillis(
+                DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24")), tz.toZoneId)))
+            val t1 = ToUnixTimestamp(
+              CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long]
+            val t2 = ToUnixTimestamp(
+              CurrentTimestamp(), Literal(fmt1)).eval().asInstanceOf[Long]
+            assert(t2 - t1 <= 1)
+            checkEvaluation(ToUnixTimestamp(
+              Literal.create(null, DateType), Literal.create(null, StringType), timeZoneId), null)
+            checkEvaluation(
+              ToUnixTimestamp(
+                Literal.create(null, DateType), Literal(fmt1), timeZoneId),
+              null)
+            checkEvaluation(ToUnixTimestamp(
+              Literal(date1), Literal.create(null, StringType), timeZoneId),
+              MILLISECONDS.toSeconds(
+                DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1), tz.toZoneId)))
+            checkEvaluation(
+              ToUnixTimestamp(
+                Literal("2015-07-24"),
+                Literal("not a valid format"), timeZoneId), null)
 
-        // SPARK-28072 The codegen path for non-literal input should also work
-        checkEvaluation(
-          expression = ToUnixTimestamp(
-            BoundReference(ordinal = 0, dataType = StringType, nullable = true),
-            BoundReference(ordinal = 1, dataType = StringType, nullable = true),
-            timeZoneId),
-          expected = 0L,
-          inputRow = InternalRow(
-            UTF8String.fromString(sdf1.format(new Timestamp(0))), UTF8String.fromString(fmt1)))
+            // SPARK-28072 The codegen path for non-literal input should also work
+            checkEvaluation(
+              expression = ToUnixTimestamp(
+                BoundReference(ordinal = 0, dataType = StringType, nullable = true),
+                BoundReference(ordinal = 1, dataType = StringType, nullable = true),
+                timeZoneId),
+              expected = 0L,
+              inputRow = InternalRow(
+                UTF8String.fromString(sdf1.format(new Timestamp(0))), UTF8String.fromString(fmt1)))
+          }
+        }
       }
     }
   }
@@ -863,29 +925,21 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           NonFoldableLiteral.create(tz, StringType)),
         if (expected != null) Timestamp.valueOf(expected) else null)
     }
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
-      test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
-      test(null, "UTC", null)
-      test("2015-07-24 00:00:00", null, null)
-      test(null, null, null)
-    }
-    val msg = intercept[AnalysisException] {
-      test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
-    }.getMessage
-    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
+    test("2015-07-24 00:00:00", "PST", "2015-07-24 07:00:00")
+    test("2015-01-24 00:00:00", "PST", "2015-01-24 08:00:00")
+    test(null, "UTC", null)
+    test("2015-07-24 00:00:00", null, null)
+    test(null, null, null)
   }
 
   test("to_utc_timestamp - invalid time zone id") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
-        val msg = intercept[java.time.DateTimeException] {
-          GenerateUnsafeProjection.generate(
-            ToUTCTimestamp(
-              Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
-        }.getMessage
-        assert(msg.contains(invalidTz))
-      }
+    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+      val msg = intercept[java.time.DateTimeException] {
+        GenerateUnsafeProjection.generate(
+          ToUTCTimestamp(
+            Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
+      }.getMessage
+      assert(msg.contains(invalidTz))
     }
   }
 
@@ -902,28 +956,19 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           NonFoldableLiteral.create(tz, StringType)),
         if (expected != null) Timestamp.valueOf(expected) else null)
     }
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
-      test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
-      test(null, "UTC", null)
-      test("2015-07-24 00:00:00", null, null)
-      test(null, null, null)
-    }
-    val msg = intercept[AnalysisException] {
-      test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
-    }.getMessage
-    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
+    test("2015-07-24 00:00:00", "PST", "2015-07-23 17:00:00")
+    test("2015-01-24 00:00:00", "PST", "2015-01-23 16:00:00")
+    test(null, "UTC", null)
+    test("2015-07-24 00:00:00", null, null)
+    test(null, null, null)
   }
 
   test("from_utc_timestamp - invalid time zone id") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
-        val msg = intercept[java.time.DateTimeException] {
-          GenerateUnsafeProjection.generate(
-            FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
-        }.getMessage
-        assert(msg.contains(invalidTz))
-      }
+    Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
+      val msg = intercept[java.time.DateTimeException] {
+        GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
+      }.getMessage
+      assert(msg.contains(invalidTz))
     }
   }
 
@@ -1039,11 +1084,11 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val nanos = 123456000
     val timestamp = Epoch(MakeTimestamp(
       Literal(2019), Literal(8), Literal(9), Literal(0), Literal(0),
-      Literal(Decimal(nanos / DateTimeUtils.NANOS_PER_SECOND.toDouble, 8, 6)),
+      Literal(Decimal(nanos / NANOS_PER_SECOND.toDouble, 8, 6)),
       Some(Literal(zoneId.getId))))
     val instant = LocalDateTime.of(2019, 8, 9, 0, 0, 0, nanos)
       .atZone(zoneId).toInstant
-    val expected = Decimal(BigDecimal(nanos) / DateTimeUtils.NANOS_PER_SECOND +
+    val expected = Decimal(BigDecimal(nanos) / NANOS_PER_SECOND +
       instant.getEpochSecond +
       zoneId.getRules.getOffset(instant).getTotalSeconds)
     checkEvaluation(timestamp, expected)
@@ -1053,4 +1098,58 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(1))), 2005)
     checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(2))), 2006)
   }
+
+  test("extract the seconds part with fraction from timestamps") {
+    outstandingTimezonesIds.foreach { timezone =>
+      val timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10),
+        Literal(0), Literal(0), Literal(Decimal(10.123456, 8, 6)),
+        Some(Literal(timezone)))
+
+      checkEvaluation(SecondWithFraction(timestamp), Decimal(10.123456, 8, 6))
+      checkEvaluation(
+        SecondWithFraction(timestamp.copy(sec = Literal(Decimal(59000001, 8, 6)))),
+        Decimal(59000001, 8, 6))
+      checkEvaluation(
+        SecondWithFraction(timestamp.copy(sec = Literal(Decimal(1, 8, 6)))),
+        Decimal(0.000001, 8, 6))
+    }
+  }
+
+  test("timestamps difference") {
+    val end = Instant.parse("2019-10-04T11:04:01.123456Z")
+    checkEvaluation(SubtractTimestamps(Literal(end), Literal(end)),
+      new CalendarInterval(0, 0, 0))
+    checkEvaluation(SubtractTimestamps(Literal(end), Literal(Instant.EPOCH)),
+      IntervalUtils.stringToInterval(UTF8String.fromString("interval " +
+        "436163 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds")))
+    checkEvaluation(SubtractTimestamps(Literal(Instant.EPOCH), Literal(end)),
+      IntervalUtils.stringToInterval(UTF8String.fromString("interval " +
+        "-436163 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds")))
+    checkEvaluation(
+      SubtractTimestamps(
+        Literal(Instant.parse("9999-12-31T23:59:59.999999Z")),
+        Literal(Instant.parse("0001-01-01T00:00:00Z"))),
+      IntervalUtils.stringToInterval(UTF8String.fromString("interval " +
+        "87649415 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds")))
+  }
+
+  test("subtract dates") {
+    val end = LocalDate.of(2019, 10, 5)
+    checkEvaluation(SubtractDates(Literal(end), Literal(end)),
+      new CalendarInterval(0, 0, 0))
+    checkEvaluation(SubtractDates(Literal(end.plusDays(1)), Literal(end)),
+      IntervalUtils.stringToInterval(UTF8String.fromString("interval 1 days")))
+    checkEvaluation(SubtractDates(Literal(end.minusDays(1)), Literal(end)),
+      IntervalUtils.stringToInterval(UTF8String.fromString("interval -1 days")))
+    val epochDate = Literal(LocalDate.ofEpochDay(0))
+    checkEvaluation(SubtractDates(Literal(end), epochDate),
+      IntervalUtils.stringToInterval(UTF8String.fromString("interval 49 years 9 months 4 days")))
+    checkEvaluation(SubtractDates(epochDate, Literal(end)),
+      IntervalUtils.stringToInterval(UTF8String.fromString("interval -49 years -9 months -4 days")))
+    checkEvaluation(
+      SubtractDates(
+        Literal(LocalDate.of(10000, 1, 1)),
+        Literal(LocalDate.of(1, 1, 1))),
+      IntervalUtils.stringToInterval(UTF8String.fromString("interval 9999 years")))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala
index fc5e8dc5ee7f1..36bc3db580400 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala
@@ -32,7 +32,7 @@ class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("MakeDecimal") {
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
       checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
       val overflowExpr = MakeDecimal(Literal.create(1000L, LongType), 3, 1)
@@ -41,7 +41,7 @@ class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       evaluateWithoutCodegen(overflowExpr, null)
       checkEvaluationWithUnsafeProjection(overflowExpr, null)
     }
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
       checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
       val overflowExpr = MakeDecimal(Literal.create(1000L, LongType), 3, 1)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index bc1f31b101c6e..6f73c1b0c04fb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -22,7 +22,7 @@ import scala.reflect.ClassTag
 import org.scalacheck.Gen
 import org.scalactic.TripleEqualsSupport.Spread
 import org.scalatest.exceptions.TestFailedException
-import org.scalatest.prop.GeneratorDrivenPropertyChecks
+import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.JavaSerializer
@@ -41,7 +41,7 @@ import org.apache.spark.util.Utils
 /**
  * A few helper functions for expression evaluation testing. Mixin this trait to use them.
  */
-trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBase {
+trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestBase {
   self: SparkFunSuite =>
 
   protected def create_row(values: Any*): InternalRow = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSQLBuilderSuite.scala
similarity index 74%
rename from sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSQLBuilderSuite.scala
index ae701f266bf45..492d97ba9d524 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/catalyst/ExpressionSQLBuilderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSQLBuilderSuite.scala
@@ -15,19 +15,21 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst
+package org.apache.spark.sql.catalyst.expressions
 
 import java.time.LocalDateTime
 
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_HOUR
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.TimestampType
 import org.apache.spark.unsafe.types.CalendarInterval
 
-class ExpressionSQLBuilderSuite extends QueryTest with TestHiveSingleton {
+class ExpressionSQLBuilderSuite extends SparkFunSuite {
+  import org.apache.spark.sql.catalyst.parser.CatalystSqlParser._
+
   protected def checkSQL(e: Expression, expectedSQL: String): Unit = {
     val actualSQL = e.sql
     try {
@@ -42,6 +44,24 @@ class ExpressionSQLBuilderSuite extends QueryTest with TestHiveSingleton {
              |$cause
            """.stripMargin)
     }
+
+    // For literals, check that the SQL evaluates to the same value. Excludes timestamp type which
+    // currently doesn't have a round-trippable format.
+    if (e.isInstanceOf[Literal] && e.dataType != TimestampType) {
+      val roundTrippedValue = parseExpression(actualSQL).eval()
+
+      e match {
+        // NaNs don't compare equal so we need special checks.
+        case FloatLiteral(f) if f.isNaN =>
+          assert(roundTrippedValue.isInstanceOf[Float])
+          assert(roundTrippedValue.asInstanceOf[Float].isNaN)
+        case DoubleLiteral(d) if d.isNaN =>
+          assert(roundTrippedValue.isInstanceOf[Double])
+          assert(roundTrippedValue.asInstanceOf[Double].isNaN)
+        case lit: Literal =>
+          assert(lit.value === roundTrippedValue)
+      }
+    }
   }
 
   test("literal") {
@@ -52,20 +72,24 @@ class ExpressionSQLBuilderSuite extends QueryTest with TestHiveSingleton {
     checkSQL(Literal(2: Short), "2S")
     checkSQL(Literal(4: Int), "4")
     checkSQL(Literal(8: Long), "8L")
-    checkSQL(Literal(1.5F), "CAST(1.5 AS FLOAT)")
+    checkSQL(Literal(1.5F), "CAST('1.5' AS FLOAT)")
     checkSQL(Literal(Float.PositiveInfinity), "CAST('Infinity' AS FLOAT)")
     checkSQL(Literal(Float.NegativeInfinity), "CAST('-Infinity' AS FLOAT)")
     checkSQL(Literal(Float.NaN), "CAST('NaN' AS FLOAT)")
+    checkSQL(Literal(Float.MinPositiveValue), "CAST('1.4E-45' AS FLOAT)")
     checkSQL(Literal(2.5D), "2.5D")
     checkSQL(Literal(Double.PositiveInfinity), "CAST('Infinity' AS DOUBLE)")
     checkSQL(Literal(Double.NegativeInfinity), "CAST('-Infinity' AS DOUBLE)")
     checkSQL(Literal(Double.NaN), "CAST('NaN' AS DOUBLE)")
+    checkSQL(Literal(Double.MinPositiveValue), "4.9E-324D")
     checkSQL(Literal(BigDecimal("10.0000000").underlying), "10.0000000BD")
     checkSQL(Literal(Array(0x01, 0xA3).map(_.toByte)), "X'01A3'")
-    val timestamp = LocalDateTime.of(2016, 1, 1, 0, 0, 0)
+
+    // Nanos are truncated, but micros should not be
+    val timestamp = LocalDateTime.of(2016, 1, 1, 0, 0, 0, 987654321)
       .atZone(DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
       .toInstant
-    checkSQL(Literal(timestamp), "TIMESTAMP('2016-01-01 00:00:00')")
+    checkSQL(Literal(timestamp), "TIMESTAMP '2016-01-01 00:00:00.987654'")
     // TODO tests for decimals
   }
 
@@ -141,16 +165,16 @@ class ExpressionSQLBuilderSuite extends QueryTest with TestHiveSingleton {
   }
 
   test("interval arithmetic") {
-    val interval = Literal(new CalendarInterval(0, CalendarInterval.MICROS_PER_DAY))
+    val interval = Literal(new CalendarInterval(0, 0, MICROS_PER_HOUR))
 
     checkSQL(
       TimeAdd('a, interval),
-      "`a` + interval 1 days"
+      "`a` + INTERVAL '1 hours'"
     )
 
     checkSQL(
       TimeSub('a, interval),
-      "`a` - interval 1 days"
+      "`a` - INTERVAL '1 hours'"
     )
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index b5cfaf8f4b0fd..e6cf979649c83 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets
 import java.time.{ZoneId, ZoneOffset}
 
 import scala.collection.mutable.ArrayBuffer
+import scala.language.implicitConversions
 
 import org.apache.commons.codec.digest.DigestUtils
 import org.scalatest.exceptions.TestFailedException
@@ -30,12 +31,13 @@ import org.apache.spark.sql.{RandomDataGenerator, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData, IntervalUtils}
 import org.apache.spark.sql.types.{ArrayType, StructType, _}
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.UTF8String
 
 class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   val random = new scala.util.Random
+  implicit def stringToUTF8Str(str: String): UTF8String = UTF8String.fromString(str)
 
   test("md5") {
     checkEvaluation(Md5(Literal("ABC".getBytes(StandardCharsets.UTF_8))),
@@ -174,7 +176,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("hive-hash for date type") {
     def checkHiveHashForDateType(dateString: String, expected: Long): Unit = {
       checkHiveHash(
-        DateTimeUtils.stringToDate(UTF8String.fromString(dateString)).get,
+        DateTimeUtils.stringToDate(UTF8String.fromString(dateString), ZoneOffset.UTC).get,
         DateType,
         expected)
     }
@@ -252,7 +254,8 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("hive-hash for CalendarInterval type") {
     def checkHiveHashForIntervalType(interval: String, expected: Long): Unit = {
-      checkHiveHash(CalendarInterval.fromString(interval), CalendarIntervalType, expected)
+      checkHiveHash(IntervalUtils.stringToInterval(UTF8String.fromString(interval)),
+        CalendarIntervalType, expected)
     }
 
     // ----- MICROSEC -----
@@ -681,6 +684,33 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(murmur3HashPlan(wideRow).getInt(0) == murmursHashEval)
   }
 
+  test("SPARK-30633: xxHash with different type seeds") {
+    val literal = Literal.create(42L, LongType)
+
+    val longSeeds = Seq(
+      Long.MinValue,
+      Integer.MIN_VALUE.toLong - 1L,
+      0L,
+      Integer.MAX_VALUE.toLong + 1L,
+      Long.MaxValue
+    )
+    for (seed <- longSeeds) {
+      checkEvaluation(XxHash64(Seq(literal), seed), XxHash64(Seq(literal), seed).eval())
+    }
+
+    val intSeeds = Seq(
+      Integer.MIN_VALUE,
+      0,
+      Integer.MAX_VALUE
+    )
+    for (seed <- intSeeds) {
+      checkEvaluation(XxHash64(Seq(literal), seed), XxHash64(Seq(literal), seed).eval())
+    }
+
+    checkEvaluation(XxHash64(Seq(literal), 100), XxHash64(Seq(literal), 100L).eval())
+    checkEvaluation(XxHash64(Seq(literal), 100L), XxHash64(Seq(literal), 100).eval())
+  }
+
   private def testHash(inputSchema: StructType): Unit = {
     val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get
     val encoder = RowEncoder(inputSchema)
@@ -697,5 +727,17 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         checkEvaluation(HiveHash(literals), HiveHash(literals).eval())
       }
     }
+
+    val longSeed = Math.abs(seed).toLong + Integer.MAX_VALUE.toLong
+    test(s"SPARK-30633: xxHash64 with long seed: ${inputSchema.simpleString}") {
+      for (_ <- 1 to 10) {
+        val input = encoder.toRow(inputGenerator.apply().asInstanceOf[Row]).asInstanceOf[UnsafeRow]
+        val literals = input.toSeq(inputSchema).zip(inputSchema.map(_.dataType)).map {
+          case (value, dt) => Literal.create(value, dt)
+        }
+        // Only test the interpreted version has same result with codegen version.
+        checkEvaluation(XxHash64(literals, longSeed), XxHash64(literals, longSeed).eval())
+      }
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
index b83d03025d21c..e7b713840b884 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
@@ -84,11 +84,25 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     ArrayTransform(expr, createLambda(et, cn, IntegerType, false, f)).bind(validateBinding)
   }
 
+  def arraySort(expr: Expression): Expression = {
+    arraySort(expr, ArraySort.comparator)
+  }
+
+  def arraySort(expr: Expression, f: (Expression, Expression) => Expression): Expression = {
+    val ArrayType(et, cn) = expr.dataType
+    ArraySort(expr, createLambda(et, cn, et, cn, f)).bind(validateBinding)
+  }
+
   def filter(expr: Expression, f: Expression => Expression): Expression = {
     val ArrayType(et, cn) = expr.dataType
     ArrayFilter(expr, createLambda(et, cn, f)).bind(validateBinding)
   }
 
+  def filter(expr: Expression, f: (Expression, Expression) => Expression): Expression = {
+    val ArrayType(et, cn) = expr.dataType
+    ArrayFilter(expr, createLambda(et, cn, IntegerType, false, f)).bind(validateBinding)
+  }
+
   def transformKeys(expr: Expression, f: (Expression, Expression) => Expression): Expression = {
     val MapType(kt, vt, vcn) = expr.dataType
     TransformKeys(expr, createLambda(kt, false, vt, vcn, f)).bind(validateBinding)
@@ -162,6 +176,47 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
       Seq("[1, 3, 5]", null, "[4, 6]"))
   }
 
+  test("ArraySort") {
+    val a0 = Literal.create(Seq(2, 1, 3), ArrayType(IntegerType))
+    val a1 = Literal.create(Seq[Integer](), ArrayType(IntegerType))
+    val a2 = Literal.create(Seq("b", "a"), ArrayType(StringType))
+    val a3 = Literal.create(Seq("b", null, "a"), ArrayType(StringType))
+    val d1 = new Decimal().set(10)
+    val d2 = new Decimal().set(100)
+    val a4 = Literal.create(Seq(d2, d1), ArrayType(DecimalType(10, 0)))
+    val a5 = Literal.create(Seq(null, null), ArrayType(NullType))
+
+    val typeAS = ArrayType(StructType(StructField("a", IntegerType) :: Nil))
+    val arrayStruct = Literal.create(Seq(create_row(2), create_row(1)), typeAS)
+
+    val typeAA = ArrayType(ArrayType(IntegerType))
+    val aa1 = Array[java.lang.Integer](1, 2)
+    val aa2 = Array[java.lang.Integer](3, null, 4)
+    val arrayArray = Literal.create(Seq(aa2, aa1), typeAA)
+
+    val typeAAS = ArrayType(ArrayType(StructType(StructField("a", IntegerType) :: Nil)))
+    val aas1 = Array(create_row(1))
+    val aas2 = Array(create_row(2))
+    val arrayArrayStruct = Literal.create(Seq(aas2, aas1), typeAAS)
+
+    checkEvaluation(arraySort(a0), Seq(1, 2, 3))
+    checkEvaluation(arraySort(a1), Seq[Integer]())
+    checkEvaluation(arraySort(a2), Seq("a", "b"))
+    checkEvaluation(arraySort(a3), Seq("a", "b", null))
+    checkEvaluation(arraySort(a4), Seq(d1, d2))
+    checkEvaluation(arraySort(a5), Seq(null, null))
+    checkEvaluation(arraySort(arrayStruct), Seq(create_row(1), create_row(2)))
+    checkEvaluation(arraySort(arrayArray), Seq(aa1, aa2))
+    checkEvaluation(arraySort(arrayArrayStruct), Seq(aas1, aas2))
+
+    checkEvaluation(arraySort(a0, (left, right) => UnaryMinus(ArraySort.comparator(left, right))),
+      Seq(3, 2, 1))
+    checkEvaluation(arraySort(a3, (left, right) => UnaryMinus(ArraySort.comparator(left, right))),
+      Seq(null, "b", "a"))
+    checkEvaluation(arraySort(a4, (left, right) => UnaryMinus(ArraySort.comparator(left, right))),
+      Seq(d2, d1))
+  }
+
   test("MapFilter") {
     def mapFilter(expr: Expression, f: (Expression, Expression) => Expression): Expression = {
       val MapType(kt, vt, vcn) = expr.dataType
@@ -218,9 +273,11 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
 
     val isEven: Expression => Expression = x => x % 2 === 0
     val isNullOrOdd: Expression => Expression = x => x.isNull || x % 2 === 1
+    val indexIsEven: (Expression, Expression) => Expression = { case (_, idx) => idx % 2 === 0 }
 
     checkEvaluation(filter(ai0, isEven), Seq(2))
     checkEvaluation(filter(ai0, isNullOrOdd), Seq(1, 3))
+    checkEvaluation(filter(ai0, indexIsEven), Seq(1, 3))
     checkEvaluation(filter(ai1, isEven), Seq.empty)
     checkEvaluation(filter(ai1, isNullOrOdd), Seq(1, null, 3))
     checkEvaluation(filter(ain, isEven), null)
@@ -234,13 +291,17 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val startsWithA: Expression => Expression = x => x.startsWith("a")
 
     checkEvaluation(filter(as0, startsWithA), Seq("a0", "a2"))
+    checkEvaluation(filter(as0, indexIsEven), Seq("a0", "a2"))
     checkEvaluation(filter(as1, startsWithA), Seq("a"))
+    checkEvaluation(filter(as1, indexIsEven), Seq("a", "c"))
     checkEvaluation(filter(asn, startsWithA), null)
 
     val aai = Literal.create(Seq(Seq(1, 2, 3), null, Seq(4, 5)),
       ArrayType(ArrayType(IntegerType, containsNull = false), containsNull = true))
     checkEvaluation(transform(aai, ix => filter(ix, isNullOrOdd)),
       Seq(Seq(1, 3), null, Seq(5)))
+    checkEvaluation(transform(aai, ix => filter(ix, indexIsEven)),
+      Seq(Seq(1, 3), null, Seq(4)))
   }
 
   test("ArrayExists") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
new file mode 100644
index 0000000000000..d31a0e210552d
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import scala.language.implicitConversions
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
+import org.apache.spark.sql.catalyst.util.IntervalUtils.{safeStringToInterval, stringToInterval}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.Decimal
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+
+class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+  implicit def stringToUTF8Str(str: String): UTF8String = UTF8String.fromString(str)
+
+  implicit def interval(s: String): Literal = {
+    Literal(stringToInterval( "interval " + s))
+  }
+
+  test("millenniums") {
+    checkEvaluation(ExtractIntervalMillenniums("0 years"), 0)
+    checkEvaluation(ExtractIntervalMillenniums("9999 years"), 9)
+    checkEvaluation(ExtractIntervalMillenniums("1000 years"), 1)
+    checkEvaluation(ExtractIntervalMillenniums("-2000 years"), -2)
+    // Microseconds part must not be taken into account
+    checkEvaluation(ExtractIntervalMillenniums("999 years 400 days"), 0)
+    // Millennium must be taken from years and months
+    checkEvaluation(ExtractIntervalMillenniums("999 years 12 months"), 1)
+    checkEvaluation(ExtractIntervalMillenniums("1000 years -1 months"), 0)
+  }
+
+  test("centuries") {
+    checkEvaluation(ExtractIntervalCenturies("0 years"), 0)
+    checkEvaluation(ExtractIntervalCenturies("9999 years"), 99)
+    checkEvaluation(ExtractIntervalCenturies("1000 years"), 10)
+    checkEvaluation(ExtractIntervalCenturies("-2000 years"), -20)
+    // Microseconds part must not be taken into account
+    checkEvaluation(ExtractIntervalCenturies("99 years 400 days"), 0)
+    // Century must be taken from years and months
+    checkEvaluation(ExtractIntervalCenturies("99 years 12 months"), 1)
+    checkEvaluation(ExtractIntervalCenturies("100 years -1 months"), 0)
+  }
+
+  test("decades") {
+    checkEvaluation(ExtractIntervalDecades("0 years"), 0)
+    checkEvaluation(ExtractIntervalDecades("9999 years"), 999)
+    checkEvaluation(ExtractIntervalDecades("1000 years"), 100)
+    checkEvaluation(ExtractIntervalDecades("-2000 years"), -200)
+    // Microseconds part must not be taken into account
+    checkEvaluation(ExtractIntervalDecades("9 years 400 days"), 0)
+    // Decade must be taken from years and months
+    checkEvaluation(ExtractIntervalDecades("9 years 12 months"), 1)
+    checkEvaluation(ExtractIntervalDecades("10 years -1 months"), 0)
+  }
+
+  test("years") {
+    checkEvaluation(ExtractIntervalYears("0 years"), 0)
+    checkEvaluation(ExtractIntervalYears("9999 years"), 9999)
+    checkEvaluation(ExtractIntervalYears("1000 years"), 1000)
+    checkEvaluation(ExtractIntervalYears("-2000 years"), -2000)
+    // Microseconds part must not be taken into account
+    checkEvaluation(ExtractIntervalYears("9 years 400 days"), 9)
+    // Year must be taken from years and months
+    checkEvaluation(ExtractIntervalYears("9 years 12 months"), 10)
+    checkEvaluation(ExtractIntervalYears("10 years -1 months"), 9)
+  }
+
+  test("quarters") {
+    checkEvaluation(ExtractIntervalQuarters("0 months"), 1.toByte)
+    checkEvaluation(ExtractIntervalQuarters("1 months"), 1.toByte)
+    checkEvaluation(ExtractIntervalQuarters("-1 months"), 1.toByte)
+    checkEvaluation(ExtractIntervalQuarters("2 months"), 1.toByte)
+    checkEvaluation(ExtractIntervalQuarters("-2 months"), 1.toByte)
+    checkEvaluation(ExtractIntervalQuarters("1 years -1 months"), 4.toByte)
+    checkEvaluation(ExtractIntervalQuarters("-1 years 1 months"), -2.toByte)
+    checkEvaluation(ExtractIntervalQuarters("2 years 3 months"), 2.toByte)
+    checkEvaluation(ExtractIntervalQuarters("-2 years -3 months"), 0.toByte)
+    checkEvaluation(ExtractIntervalQuarters("9999 years"), 1.toByte)
+  }
+
+  test("months") {
+    checkEvaluation(ExtractIntervalMonths("0 year"), 0.toByte)
+    for (m <- -24 to 24) {
+      checkEvaluation(ExtractIntervalMonths(s"$m months"), (m % 12).toByte)
+    }
+    checkEvaluation(ExtractIntervalMonths("1 year 10 months"), 10.toByte)
+    checkEvaluation(ExtractIntervalMonths("-2 year -10 months"), -10.toByte)
+    checkEvaluation(ExtractIntervalMonths("9999 years"), 0.toByte)
+  }
+
+  private val largeInterval: String = "9999 years 11 months " +
+    "31 days 11 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds"
+
+  test("days") {
+    checkEvaluation(ExtractIntervalDays("0 days"), 0)
+    checkEvaluation(ExtractIntervalDays("1 days 100 seconds"), 1)
+    checkEvaluation(ExtractIntervalDays("-1 days -100 seconds"), -1)
+    checkEvaluation(ExtractIntervalDays("-365 days"), -365)
+    checkEvaluation(ExtractIntervalDays("365 days"), 365)
+    // Years and months must not be taken into account
+    checkEvaluation(ExtractIntervalDays("100 year 10 months 5 days"), 5)
+    checkEvaluation(ExtractIntervalDays(largeInterval), 31)
+  }
+
+  test("hours") {
+    checkEvaluation(ExtractIntervalHours("0 hours"), 0L)
+    checkEvaluation(ExtractIntervalHours("1 hour"), 1L)
+    checkEvaluation(ExtractIntervalHours("-1 hour"), -1L)
+    checkEvaluation(ExtractIntervalHours("23 hours"), 23L)
+    checkEvaluation(ExtractIntervalHours("-23 hours"), -23L)
+    // Years, months and days must not be taken into account
+    checkEvaluation(ExtractIntervalHours("100 year 10 months 10 days 10 hours"), 10L)
+    // Minutes should be taken into account
+    checkEvaluation(ExtractIntervalHours("10 hours 100 minutes"), 11L)
+    checkEvaluation(ExtractIntervalHours(largeInterval), 11L)
+  }
+
+  test("minutes") {
+    checkEvaluation(ExtractIntervalMinutes("0 minute"), 0.toByte)
+    checkEvaluation(ExtractIntervalMinutes("1 minute"), 1.toByte)
+    checkEvaluation(ExtractIntervalMinutes("-1 minute"), -1.toByte)
+    checkEvaluation(ExtractIntervalMinutes("59 minute"), 59.toByte)
+    checkEvaluation(ExtractIntervalMinutes("-59 minute"), -59.toByte)
+    // Years and months must not be taken into account
+    checkEvaluation(ExtractIntervalMinutes("100 year 10 months 10 minutes"), 10.toByte)
+    checkEvaluation(ExtractIntervalMinutes(largeInterval), 59.toByte)
+  }
+
+  test("seconds") {
+    checkEvaluation(ExtractIntervalSeconds("0 second"), Decimal(0, 8, 6))
+    checkEvaluation(ExtractIntervalSeconds("1 second"), Decimal(1.0, 8, 6))
+    checkEvaluation(ExtractIntervalSeconds("-1 second"), Decimal(-1.0, 8, 6))
+    checkEvaluation(ExtractIntervalSeconds("1 minute 59 second"), Decimal(59.0, 8, 6))
+    checkEvaluation(ExtractIntervalSeconds("-59 minutes -59 seconds"), Decimal(-59.0, 8, 6))
+    // Years and months must not be taken into account
+    checkEvaluation(ExtractIntervalSeconds("100 year 10 months 10 seconds"), Decimal(10.0, 8, 6))
+    checkEvaluation(ExtractIntervalSeconds(largeInterval), Decimal(59.999999, 8, 6))
+    checkEvaluation(
+      ExtractIntervalSeconds("10 seconds 1 milliseconds 1 microseconds"),
+      Decimal(10001001, 8, 6))
+    checkEvaluation(ExtractIntervalSeconds("61 seconds 1 microseconds"), Decimal(1000001, 8, 6))
+  }
+
+  test("milliseconds") {
+    checkEvaluation(ExtractIntervalMilliseconds("0 milliseconds"), Decimal(0, 8, 3))
+    checkEvaluation(ExtractIntervalMilliseconds("1 milliseconds"), Decimal(1.0, 8, 3))
+    checkEvaluation(ExtractIntervalMilliseconds("-1 milliseconds"), Decimal(-1.0, 8, 3))
+    checkEvaluation(
+      ExtractIntervalMilliseconds("1 second 999 milliseconds"),
+      Decimal(1999.0, 8, 3))
+    checkEvaluation(
+      ExtractIntervalMilliseconds("999 milliseconds 1 microsecond"),
+      Decimal(999.001, 8, 3))
+    checkEvaluation(
+      ExtractIntervalMilliseconds("-1 second -999 milliseconds"),
+      Decimal(-1999.0, 8, 3))
+    // Years and months must not be taken into account
+    checkEvaluation(ExtractIntervalMilliseconds("100 year 1 millisecond"), Decimal(1.0, 8, 3))
+    checkEvaluation(ExtractIntervalMilliseconds(largeInterval), Decimal(59999.999, 8, 3))
+  }
+
+  test("microseconds") {
+    checkEvaluation(ExtractIntervalMicroseconds("0 microseconds"), 0L)
+    checkEvaluation(ExtractIntervalMicroseconds("1 microseconds"), 1L)
+    checkEvaluation(ExtractIntervalMicroseconds("-1 microseconds"), -1L)
+    checkEvaluation(ExtractIntervalMicroseconds("1 second 999 microseconds"), 1000999L)
+    checkEvaluation(ExtractIntervalMicroseconds("999 milliseconds 1 microseconds"), 999001L)
+    checkEvaluation(ExtractIntervalMicroseconds("-1 second -999 microseconds"), -1000999L)
+    // Years and months must not be taken into account
+    checkEvaluation(ExtractIntervalMicroseconds("11 year 1 microseconds"), 1L)
+    checkEvaluation(ExtractIntervalMicroseconds(largeInterval), 59999999L)
+  }
+
+  test("epoch") {
+    checkEvaluation(ExtractIntervalEpoch("0 months"), Decimal(0.0, 18, 6))
+    checkEvaluation(ExtractIntervalEpoch("10000 years"), Decimal(315576000000.0, 18, 6))
+    checkEvaluation(ExtractIntervalEpoch("1 year"), Decimal(31557600.0, 18, 6))
+    checkEvaluation(ExtractIntervalEpoch("-1 year"), Decimal(-31557600.0, 18, 6))
+    checkEvaluation(
+      ExtractIntervalEpoch("1 second 1 millisecond 1 microsecond"),
+      Decimal(1.001001, 18, 6))
+  }
+
+  test("multiply") {
+    def check(interval: String, num: Double, expected: String): Unit = {
+      val expr = MultiplyInterval(Literal(stringToInterval(interval)), Literal(num))
+      val expectedRes = safeStringToInterval(expected)
+      Seq("true", "false").foreach { v =>
+        withSQLConf(SQLConf.ANSI_ENABLED.key -> v) {
+          if (expectedRes == null) {
+            checkExceptionInExpression[ArithmeticException](expr, expected)
+          } else {
+            checkEvaluation(expr, expectedRes)
+          }
+        }
+      }
+    }
+
+    check("0 seconds", 10, "0 seconds")
+    check("10 hours", 0, "0 hours")
+    check("12 months 1 microseconds", 2, "2 years 2 microseconds")
+    check("-5 year 3 seconds", 3, "-15 years 9 seconds")
+    check("1 year 1 second", 0.5, "6 months 500 milliseconds")
+    check("-100 years -1 millisecond", 0.5, "-50 years -500 microseconds")
+    check("2 months 4 seconds", -0.5, "-1 months -2 seconds")
+    check("1 month 2 microseconds", 1.5, "1 months 15 days 3 microseconds")
+    check("2 months", Int.MaxValue, "integer overflow")
+  }
+
+  test("divide") {
+    def check(interval: String, num: Double, expected: String): Unit = {
+      val expr = DivideInterval(Literal(stringToInterval(interval)), Literal(num))
+      val expectedRes = safeStringToInterval(expected)
+      Seq("true", "false").foreach { v =>
+        withSQLConf(SQLConf.ANSI_ENABLED.key -> v) {
+          if (expectedRes == null) {
+            checkExceptionInExpression[ArithmeticException](expr, expected)
+          } else {
+            checkEvaluation(expr, expectedRes)
+          }
+        }
+      }
+    }
+
+    check("0 seconds", 10, "0 seconds")
+    check("12 months 3 milliseconds", 2, "6 months 0.0015 seconds")
+    check("-5 year 3 seconds", 3, "-1 years -8 months 1 seconds")
+    check("6 years -7 seconds", 3, "2 years -2.333333 seconds")
+    check("2 years -8 seconds", 0.5, "4 years -16 seconds")
+    check("-1 month 2 microseconds", -0.25, "4 months -8 microseconds")
+    check("1 month 3 microsecond", 1.5, "20 days 2 microseconds")
+    check("1 second", 0, "divide by zero")
+    check(s"${Int.MaxValue} months", 0.9, "integer overflow")
+  }
+
+  test("make interval") {
+    def check(
+        years: Int = 0,
+        months: Int = 0,
+        weeks: Int = 0,
+        days: Int = 0,
+        hours: Int = 0,
+        minutes: Int = 0,
+        seconds: Int = 0,
+        millis: Int = 0,
+        micros: Int = 0): Unit = {
+      val secFrac = seconds * MICROS_PER_SECOND + millis * MICROS_PER_MILLIS + micros
+      val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks),
+        Literal(days), Literal(hours), Literal(minutes), Literal(Decimal(secFrac, 8, 6)))
+      val totalMonths = years * MONTHS_PER_YEAR + months
+      val totalDays = weeks * DAYS_PER_WEEK + days
+      val totalMicros = secFrac + minutes * MICROS_PER_MINUTE + hours * MICROS_PER_HOUR
+      val expected = new CalendarInterval(totalMonths, totalDays, totalMicros)
+      checkEvaluation(intervalExpr, expected)
+    }
+
+    check(months = 0, days = 0, micros = 0)
+    check(years = -123)
+    check(weeks = 123)
+    check(millis = -123)
+    check(9999, 11, 0, 31, 23, 59, 59, 999, 999)
+    check(years = 10000, micros = -1)
+    check(-9999, -11, 0, -31, -23, -59, -59, -999, -999)
+    check(years = -10000, micros = 1)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index f8400a590606a..3693531f47610 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -25,12 +25,10 @@ import org.scalatest.exceptions.TestFailedException
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.plans.PlanTestBase
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with PlanTestBase {
   val json =
@@ -680,48 +678,37 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
       output)
   }
 
-  test("to_json: verify MapType's value type instead of key type") {
-    // Keys in map are treated as strings when converting to JSON. The type doesn't matter at all.
-    val mapType1 = MapType(CalendarIntervalType, IntegerType)
-    val schema1 = StructType(StructField("a", mapType1) :: Nil)
-    val struct1 = Literal.create(null, schema1)
+  test("from/to json - interval support") {
+    val schema = StructType(StructField("i", CalendarIntervalType) :: Nil)
     checkEvaluation(
-      StructsToJson(Map.empty, struct1, gmtId),
-      null
-    )
+      JsonToStructs(schema, Map.empty, Literal.create("""{"i":"1 year 1 day"}""", StringType)),
+      InternalRow(new CalendarInterval(12, 1, 0)))
 
-    // The value type must be valid for converting to JSON.
-    val mapType2 = MapType(IntegerType, CalendarIntervalType)
-    val schema2 = StructType(StructField("a", mapType2) :: Nil)
-    val struct2 = Literal.create(null, schema2)
-    StructsToJson(Map.empty, struct2, gmtId).checkInputDataTypes() match {
-      case TypeCheckResult.TypeCheckFailure(msg) =>
-        assert(msg.contains("Unable to convert column a of type interval to JSON"))
-      case _ => fail("from_json should not work on interval map value type.")
-    }
+    Seq(MapType(CalendarIntervalType, IntegerType), MapType(IntegerType, CalendarIntervalType))
+      .foreach { dt =>
+        val schema = StructField("a", dt) :: Nil
+        val struct = Literal.create(null, StructType(schema))
+        assert(StructsToJson(Map.empty, struct).checkInputDataTypes().isSuccess)
+      }
   }
 
   test("from_json missing fields") {
-    for (forceJsonNullableSchema <- Seq(false, true)) {
-      withSQLConf(SQLConf.FROM_JSON_FORCE_NULLABLE_SCHEMA.key -> forceJsonNullableSchema.toString) {
-        val input =
-          """{
-          |  "a": 1,
-          |  "c": "foo"
-          |}
-          |""".stripMargin
-        val jsonSchema = new StructType()
-          .add("a", LongType, nullable = false)
-          .add("b", StringType, nullable = !forceJsonNullableSchema)
-          .add("c", StringType, nullable = false)
-        val output = InternalRow(1L, null, UTF8String.fromString("foo"))
-        val expr = JsonToStructs(jsonSchema, Map.empty, Literal.create(input, StringType), gmtId)
-        checkEvaluation(expr, output)
-        val schema = expr.dataType
-        val schemaToCompare = if (forceJsonNullableSchema) jsonSchema.asNullable else jsonSchema
-        assert(schemaToCompare == schema)
-      }
-    }
+    val input =
+      """{
+      |  "a": 1,
+      |  "c": "foo"
+      |}
+      |""".stripMargin
+    val jsonSchema = new StructType()
+      .add("a", LongType, nullable = false)
+      .add("b", StringType, nullable = false)
+      .add("c", StringType, nullable = false)
+    val output = InternalRow(1L, null, UTF8String.fromString("foo"))
+    val expr = JsonToStructs(jsonSchema, Map.empty, Literal.create(input, StringType), gmtId)
+    checkEvaluation(expr, output)
+    val schema = expr.dataType
+    val schemaToCompare = jsonSchema.asNullable
+    assert(schemaToCompare == schema)
   }
 
   test("SPARK-24709: infer schema of json strings") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 269f1a09ac533..4714635a3370b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -27,12 +27,12 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection}
 import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
-
 class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("null") {
@@ -75,7 +75,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(Literal.default(DateType), LocalDate.ofEpochDay(0))
       checkEvaluation(Literal.default(TimestampType), Instant.ofEpochSecond(0))
     }
-    checkEvaluation(Literal.default(CalendarIntervalType), new CalendarInterval(0, 0L))
+    checkEvaluation(Literal.default(CalendarIntervalType), new CalendarInterval(0, 0, 0L))
     checkEvaluation(Literal.default(ArrayType(StringType)), Array())
     checkEvaluation(Literal.default(MapType(IntegerType, StringType)), Map())
     checkEvaluation(Literal.default(StructType(StructField("a", StringType) :: Nil)), Row(""))
@@ -187,7 +187,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkArrayLiteral(Array(1, 2, 3))
     checkArrayLiteral(Array("a", "b", "c"))
     checkArrayLiteral(Array(1.0, 4.0))
-    checkArrayLiteral(Array(CalendarInterval.MICROS_PER_DAY, CalendarInterval.MICROS_PER_HOUR))
+    checkArrayLiteral(Array(MICROS_PER_DAY, MICROS_PER_HOUR))
     val arr = collection.mutable.WrappedArray.make(Array(1.0, 4.0))
     checkEvaluation(Literal(arr), toCatalyst(arr))
   }
@@ -199,7 +199,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkSeqLiteral(Seq(1, 2, 3), IntegerType)
     checkSeqLiteral(Seq("a", "b", "c"), StringType)
     checkSeqLiteral(Seq(1.0, 4.0), DoubleType)
-    checkSeqLiteral(Seq(CalendarInterval.MICROS_PER_DAY, CalendarInterval.MICROS_PER_HOUR),
+    checkSeqLiteral(Seq(MICROS_PER_DAY, MICROS_PER_HOUR),
       CalendarIntervalType)
   }
 
@@ -302,7 +302,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       val timestamp = LocalDateTime.of(2019, 3, 21, 0, 2, 3, 456000000)
         .atZone(ZoneOffset.UTC)
         .toInstant
-      val expected = "TIMESTAMP('2019-03-21 01:02:03.456')"
+      val expected = "TIMESTAMP '2019-03-21 01:02:03.456'"
       val literalStr = Literal.create(timestamp).sql
       assert(literalStr === expected)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
index b111797c3588e..d92eb01b69bf0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
@@ -22,8 +22,9 @@ import java.time.{Duration, Instant, LocalDate}
 import java.util.concurrent.TimeUnit
 
 import org.scalacheck.{Arbitrary, Gen}
+import org.scalatest.Assertions._
 
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_DAY
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -107,7 +108,7 @@ object LiteralGenerator {
     val minDay = LocalDate.of(1, 1, 1).toEpochDay
     val maxDay = LocalDate.of(9999, 12, 31).toEpochDay
     for { day <- Gen.choose(minDay, maxDay) }
-      yield Literal.create(new Date(day * DateTimeUtils.MILLIS_PER_DAY), DateType)
+      yield Literal.create(new Date(day * MILLIS_PER_DAY), DateType)
   }
 
   lazy val timestampLiteralGen: Gen[Literal] = {
@@ -135,10 +136,12 @@ object LiteralGenerator {
       Instant.parse("0001-01-01T00:00:00.000000Z"),
       Instant.parse("9999-12-31T23:59:59.999999Z")).getSeconds
     val maxMicros = TimeUnit.SECONDS.toMicros(maxDurationInSec)
+    val maxDays = TimeUnit.SECONDS.toDays(maxDurationInSec).toInt
     for {
       months <- Gen.choose(-1 * maxIntervalInMonths, maxIntervalInMonths)
       micros <- Gen.choose(-1 * maxMicros, maxMicros)
-    } yield Literal.create(new CalendarInterval(months, micros), CalendarIntervalType)
+      days <- Gen.choose(-1 * maxDays, maxDays)
+    } yield Literal.create(new CalendarInterval(months, days, micros), CalendarIntervalType)
   }
 
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
index 0d594eb10962e..c31310bc54023 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
@@ -20,9 +20,10 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.UTF8String
 
 class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -56,9 +57,10 @@ class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   testBothCodegenAndInterpreted("variable-length types") {
     val proj = createMutableProjection(variableLengthTypes)
-    val scalaValues = Seq("abc", BigDecimal(10), CalendarInterval.fromString("interval 1 day"),
+    val scalaValues = Seq("abc", BigDecimal(10),
+      IntervalUtils.stringToInterval(UTF8String.fromString("interval 1 day")),
       Array[Byte](1, 2), Array("123", "456"), Map(1 -> "a", 2 -> "b"), Row(1, "a"),
-      new java.lang.Integer(5))
+      Integer.valueOf(5))
     val inputRow = InternalRow.fromSeq(scalaValues.zip(variableLengthTypes).map {
       case (v, dataType) => CatalystTypeConverters.createToCatalystConverter(dataType)(v)
     })
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index a171885471a36..ef7764dba1e9e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -35,8 +35,7 @@ import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData, IntervalUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -486,7 +485,8 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       ("abcd".getBytes, BinaryType),
       ("abcd", StringType),
       (BigDecimal.valueOf(10), DecimalType.IntDecimal),
-      (CalendarInterval.fromString("interval 3 day"), CalendarIntervalType),
+      (IntervalUtils.stringToInterval(UTF8String.fromString("interval 3 day")),
+        CalendarIntervalType),
       (java.math.BigDecimal.valueOf(10), DecimalType.BigIntDecimal),
       (Array(3, 2, 1), ArrayType(IntegerType))
     ).foreach { case (input, dt) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 7bff277c793ea..67a41e7cc2767 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -38,7 +38,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   private def booleanLogicTest(
     name: String,
     op: (Expression, Expression) => Expression,
-    truthTable: Seq[(Any, Any, Any)]) {
+    truthTable: Seq[(Any, Any, Any)]): Unit = {
     test(s"3VL $name") {
       truthTable.foreach {
         case (l, r, answer) =>
@@ -510,7 +510,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("Interpreted Predicate should initialize nondeterministic expressions") {
-    val interpreted = InterpretedPredicate.create(LessThan(Rand(7), Literal(1.0)))
+    val interpreted = Predicate.create(LessThan(Rand(7), Literal(1.0)))
     interpreted.initialize(0)
     assert(interpreted.eval(new UnsafeRow()))
   }
@@ -560,4 +560,10 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
         assert(msg.contains("argument 1 requires boolean type"))
     }
   }
+
+  test("SPARK-29100: InSet with empty input set") {
+    val row = create_row(1)
+    val inSet = InSet(BoundReference(0, IntegerType, true), Set.empty)
+    checkEvaluation(inSet, false, row)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 06fb73ad83923..712d2bc4c4736 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -118,6 +118,84 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkLiteralRow("""%SystemDrive%\Users\John""" like _, """\%SystemDrive\%\\Users%""", true)
   }
 
+  Seq('/', '#', '\"').foreach { escapeChar =>
+    test(s"LIKE Pattern ESCAPE '$escapeChar'") {
+      // null handling
+      checkLiteralRow(Literal.create(null, StringType).like(_, escapeChar), "a", null)
+      checkEvaluation(
+        Literal.create("a", StringType).like(Literal.create(null, StringType), escapeChar), null)
+      checkEvaluation(
+        Literal.create(null, StringType).like(Literal.create(null, StringType), escapeChar), null)
+      checkEvaluation(Literal.create("a", StringType).like(
+        NonFoldableLiteral.create("a", StringType), escapeChar), true)
+      checkEvaluation(Literal.create("a", StringType).like(
+        NonFoldableLiteral.create(null, StringType), escapeChar), null)
+      checkEvaluation(Literal.create(null, StringType).like(
+        NonFoldableLiteral.create("a", StringType), escapeChar), null)
+      checkEvaluation(Literal.create(null, StringType).like(
+        NonFoldableLiteral.create(null, StringType), escapeChar), null)
+
+      // simple patterns
+      checkLiteralRow("abdef" like(_, escapeChar), "abdef", true)
+      checkLiteralRow("a_%b" like(_, escapeChar), s"a${escapeChar}__b", true)
+      checkLiteralRow("addb" like(_, escapeChar), "a_%b", true)
+      checkLiteralRow("addb" like(_, escapeChar), s"a${escapeChar}__b", false)
+      checkLiteralRow("addb" like(_, escapeChar), s"a%$escapeChar%b", false)
+      checkLiteralRow("a_%b" like(_, escapeChar), s"a%$escapeChar%b", true)
+      checkLiteralRow("addb" like(_, escapeChar), "a%", true)
+      checkLiteralRow("addb" like(_, escapeChar), "**", false)
+      checkLiteralRow("abc" like(_, escapeChar), "a%", true)
+      checkLiteralRow("abc"  like(_, escapeChar), "b%", false)
+      checkLiteralRow("abc"  like(_, escapeChar), "bc%", false)
+      checkLiteralRow("a\nb" like(_, escapeChar), "a_b", true)
+      checkLiteralRow("ab" like(_, escapeChar), "a%b", true)
+      checkLiteralRow("a\nb" like(_, escapeChar), "a%b", true)
+
+      // empty input
+      checkLiteralRow("" like(_, escapeChar), "", true)
+      checkLiteralRow("a" like(_, escapeChar), "", false)
+      checkLiteralRow("" like(_, escapeChar), "a", false)
+
+      // SI-17647 double-escaping backslash
+      checkLiteralRow(s"""$escapeChar$escapeChar$escapeChar$escapeChar""" like(_, escapeChar),
+        s"""%$escapeChar$escapeChar%""", true)
+      checkLiteralRow("""%%""" like(_, escapeChar), """%%""", true)
+      checkLiteralRow(s"""${escapeChar}__""" like(_, escapeChar),
+        s"""$escapeChar$escapeChar${escapeChar}__""", true)
+      checkLiteralRow(s"""$escapeChar$escapeChar${escapeChar}__""" like(_, escapeChar),
+        s"""%$escapeChar$escapeChar%$escapeChar%""", false)
+      checkLiteralRow(s"""_$escapeChar$escapeChar$escapeChar%""" like(_, escapeChar),
+        s"""%$escapeChar${escapeChar}""", false)
+
+      // unicode
+      // scalastyle:off nonascii
+      checkLiteralRow("a\u20ACa" like(_, escapeChar), "_\u20AC_", true)
+      checkLiteralRow("a€a" like(_, escapeChar), "_€_", true)
+      checkLiteralRow("a€a" like(_, escapeChar), "_\u20AC_", true)
+      checkLiteralRow("a\u20ACa" like(_, escapeChar), "_€_", true)
+      // scalastyle:on nonascii
+
+      // invalid escaping
+      val invalidEscape = intercept[AnalysisException] {
+        evaluateWithoutCodegen("""a""" like(s"""${escapeChar}a""", escapeChar))
+      }
+      assert(invalidEscape.getMessage.contains("pattern"))
+      val endEscape = intercept[AnalysisException] {
+        evaluateWithoutCodegen("""a""" like(s"""a$escapeChar""", escapeChar))
+      }
+      assert(endEscape.getMessage.contains("pattern"))
+
+      // case
+      checkLiteralRow("A" like(_, escapeChar), "a%", false)
+      checkLiteralRow("a" like(_, escapeChar), "A%", false)
+      checkLiteralRow("AaA" like(_, escapeChar), "_a_", true)
+
+      // example
+      checkLiteralRow(s"""%SystemDrive%${escapeChar}Users${escapeChar}John""" like(_, escapeChar),
+        s"""$escapeChar%SystemDrive$escapeChar%$escapeChar${escapeChar}Users%""", true)
+    }
+  }
+
   test("RLIKE Regular Expression") {
     checkLiteralRow(Literal.create(null, StringType) rlike _, "abdef", null)
     checkEvaluation("abdef" rlike Literal.create(null, StringType), null)
@@ -215,6 +293,18 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     val nonNullExpr = RegExpExtract(Literal("100-200"), Literal("(\\d+)-(\\d+)"), Literal(1))
     checkEvaluation(nonNullExpr, "100", row1)
+
+    // invalid group index
+    val row8 = create_row("100-200", "(\\d+)-(\\d+)", 3)
+    val row9 = create_row("100-200", "(\\d+).*", 2)
+    val row10 = create_row("100-200", "\\d+", 1)
+
+    checkExceptionInExpression[IllegalArgumentException](
+      expr, row8, "Regex group count is 2, but the specified group index is 3")
+    checkExceptionInExpression[IllegalArgumentException](
+      expr, row9, "Regex group count is 1, but the specified group index is 2")
+    checkExceptionInExpression[IllegalArgumentException](
+      expr, row10, "Regex group count is 0, but the specified group index is 1")
   }
 
   test("SPLIT") {
@@ -239,4 +329,12 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(StringSplit(s1, s2, -1), null, row3)
   }
 
+  test("SPARK-30759: cache initialization for literal patterns") {
+    val expr = "A" like Literal.create("a", StringType)
+    expr.eval()
+    val cache = expr.getClass.getSuperclass
+      .getDeclaredFields.filter(_.getName.endsWith("cache")).head
+    cache.setAccessible(true)
+    assert(cache.get(expr).asInstanceOf[java.util.regex.Pattern].pattern().contains("a"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
index 981ef57c051fd..c5ffc381b58e2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
@@ -57,7 +57,7 @@ class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-28369: honor nullOnOverflow config for ScalaUDF") {
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       val udf = ScalaUDF(
         (a: java.math.BigDecimal) => a.multiply(new java.math.BigDecimal(100)),
         DecimalType.SYSTEM_DEFAULT,
@@ -69,7 +69,7 @@ class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
       assert(e2.getCause.isInstanceOf[ArithmeticException])
     }
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       val udf = ScalaUDF(
         (a: java.math.BigDecimal) => a.multiply(new java.math.BigDecimal(100)),
         DecimalType.SYSTEM_DEFAULT,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
index 6a3cc21804991..3c826e812b5cc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
@@ -508,7 +508,7 @@ class SelectedFieldSuite extends AnalysisTest {
   // Test that the given SELECT expressions prune the test schema to the single-column schema
   // defined by the given field
   private def testSelect(inputSchema: StructType, selectExprs: String*)
-                        (expected: StructField) {
+                        (expected: StructField): Unit = {
     test(s"SELECT ${selectExprs.map(s => s""""$s"""").mkString(", ")} should select the schema\n" +
       indent(StructType(expected :: Nil).treeString)) {
       for (selectExpr <- selectExprs) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 1b5acf4b0abcc..4308f98d6969a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.types._
 
@@ -428,7 +429,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // scalastyle:on
   }
 
-  test("overlay") {
+  test("overlay for string") {
     checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("_"),
       Literal.create(6, IntegerType)), "Spark_SQL")
     checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("CORE"),
@@ -450,6 +451,75 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(new Overlay(Literal("Spark的SQL"), Literal("_"),
       Literal.create(6, IntegerType)), "Spark_SQL")
     // scalastyle:on
+    // position greater than the length of input string
+    checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("_"),
+      Literal.create(10, IntegerType)), "Spark SQL_")
+    checkEvaluation(Overlay(Literal("Spark SQL"), Literal("_"),
+      Literal.create(10, IntegerType), Literal.create(4, IntegerType)), "Spark SQL_")
+    // position is zero
+    checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("__"),
+      Literal.create(0, IntegerType)), "__park SQL")
+    checkEvaluation(Overlay(Literal("Spark SQL"), Literal("__"),
+      Literal.create(0, IntegerType), Literal.create(4, IntegerType)), "__rk SQL")
+    // position is negative
+    checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("__"),
+      Literal.create(-10, IntegerType)), "__park SQL")
+    checkEvaluation(Overlay(Literal("Spark SQL"), Literal("__"),
+      Literal.create(-10, IntegerType), Literal.create(4, IntegerType)), "__rk SQL")
+  }
+
+  test("overlay for byte array") {
+    val input = Literal(Array[Byte](1, 2, 3, 4, 5, 6, 7, 8, 9))
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1)),
+      Literal.create(6, IntegerType)), Array[Byte](1, 2, 3, 4, 5, -1, 7, 8, 9))
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1, -1, -1, -1)),
+      Literal.create(7, IntegerType)), Array[Byte](1, 2, 3, 4, 5, 6, -1, -1, -1, -1))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1, -1)), Literal.create(7, IntegerType),
+      Literal.create(0, IntegerType)), Array[Byte](1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1, -1, -1, -1, -1)),
+      Literal.create(2, IntegerType), Literal.create(4, IntegerType)),
+      Array[Byte](1, -1, -1, -1, -1, -1, 6, 7, 8, 9))
+
+    val nullInput = Literal.create(null, BinaryType)
+    checkEvaluation(new Overlay(nullInput, Literal(Array[Byte](-1)),
+      Literal.create(6, IntegerType)), null)
+    checkEvaluation(new Overlay(nullInput, Literal(Array[Byte](-1, -1, -1, -1)),
+      Literal.create(7, IntegerType)), null)
+    checkEvaluation(Overlay(nullInput, Literal(Array[Byte](-1, -1)),
+      Literal.create(7, IntegerType), Literal.create(0, IntegerType)), null)
+    checkEvaluation(Overlay(nullInput, Literal(Array[Byte](-1, -1, -1, -1, -1)),
+      Literal.create(2, IntegerType), Literal.create(4, IntegerType)), null)
+    // position greater than the length of input byte array
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1)),
+      Literal.create(10, IntegerType)), Array[Byte](1, 2, 3, 4, 5, 6, 7, 8, 9, -1))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1)), Literal.create(10, IntegerType),
+      Literal.create(4, IntegerType)), Array[Byte](1, 2, 3, 4, 5, 6, 7, 8, 9, -1))
+    // position is zero
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1, -1)),
+      Literal.create(0, IntegerType)), Array[Byte](-1, -1, 2, 3, 4, 5, 6, 7, 8, 9))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1, -1)), Literal.create(0, IntegerType),
+      Literal.create(4, IntegerType)), Array[Byte](-1, -1, 4, 5, 6, 7, 8, 9))
+    // position is negative
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1, -1)),
+      Literal.create(-10, IntegerType)), Array[Byte](-1, -1, 2, 3, 4, 5, 6, 7, 8, 9))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1, -1)), Literal.create(-10, IntegerType),
+      Literal.create(4, IntegerType)), Array[Byte](-1, -1, 4, 5, 6, 7, 8, 9))
+  }
+
+  test("Check Overlay.checkInputDataTypes results") {
+    assert(new Overlay(Literal("Spark SQL"), Literal("_"),
+      Literal.create(6, IntegerType)).checkInputDataTypes().isSuccess)
+    assert(Overlay(Literal("Spark SQL"), Literal("ANSI "), Literal.create(7, IntegerType),
+      Literal.create(0, IntegerType)).checkInputDataTypes().isSuccess)
+    assert(new Overlay(Literal.create("Spark SQL".getBytes), Literal.create("_".getBytes),
+      Literal.create(6, IntegerType)).checkInputDataTypes().isSuccess)
+    assert(Overlay(Literal.create("Spark SQL".getBytes), Literal.create("ANSI ".getBytes),
+      Literal.create(7, IntegerType), Literal.create(0, IntegerType))
+      .checkInputDataTypes().isSuccess)
+    assert(new Overlay(Literal.create(1), Literal.create(2), Literal.create(0, IntegerType))
+      .checkInputDataTypes().isFailure)
+    assert(Overlay(Literal("Spark SQL"), Literal.create(2), Literal.create(7, IntegerType),
+      Literal.create(0, IntegerType)).checkInputDataTypes().isFailure)
   }
 
   test("translate") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeWindowSuite.scala
index d202c2f271d97..e9d2178c0ef22 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeWindowSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeWindowSuite.scala
@@ -90,7 +90,7 @@ class TimeWindowSuite extends SparkFunSuite with ExpressionEvalHelper with Priva
     }
   }
 
-  private val parseExpression = PrivateMethod[Long]('parseExpression)
+  private val parseExpression = PrivateMethod[Long](Symbol("parseExpression"))
 
   test("parse sql expression for duration in microseconds - string") {
     val dur = TimeWindow.invokePrivate(parseExpression(Literal("5 seconds")))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
index 69523fa81bc65..0149f0deb8d2b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
@@ -124,6 +124,36 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB
     (Timestamp.valueOf("2015-06-22 08:10:25"))
   }
 
+  testBothCodegenAndInterpreted(
+    "basic conversion with primitive, string and interval types") {
+    val factory = UnsafeProjection
+    val fieldTypes: Array[DataType] = Array(LongType, StringType, CalendarIntervalType)
+    val converter = factory.create(fieldTypes)
+
+    val row = new SpecificInternalRow(fieldTypes)
+    row.setLong(0, 0)
+    row.update(1, UTF8String.fromString("Hello"))
+    val interval1 = new CalendarInterval(3, 1, 1000L)
+    row.update(2, interval1)
+
+    val unsafeRow: UnsafeRow = converter.apply(row)
+    assert(unsafeRow.getSizeInBytes ===
+      8 + 8 * 3 + roundedSize("Hello".getBytes(StandardCharsets.UTF_8).length) + 16)
+
+    assert(unsafeRow.getLong(0) === 0)
+    assert(unsafeRow.getString(1) === "Hello")
+    assert(unsafeRow.getInterval(2) === interval1)
+
+    val interval2 = new CalendarInterval(1, 2, 3L)
+    unsafeRow.setInterval(2, interval2)
+    assert(unsafeRow.getInterval(2) === interval2)
+
+    val offset = unsafeRow.getLong(2) >>> 32
+    unsafeRow.setInterval(2, null)
+    assert(unsafeRow.getInterval(2) === null)
+    assert(unsafeRow.getLong(2) >>> 32 === offset)
+  }
+
   testBothCodegenAndInterpreted("null handling") {
     val factory = UnsafeProjection
     val fieldTypes: Array[DataType] = Array(
@@ -207,7 +237,7 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB
       rowWithNoNullColumns.getDecimal(11, 38, 18))
 
     for (i <- fieldTypes.indices) {
-      // Cann't call setNullAt() on DecimalType
+      // Can't call setNullAt() on DecimalType
       if (i == 11) {
         setToNullAfterCreation.setDecimal(11, null, 38)
       } else {
@@ -531,7 +561,8 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB
     // Simple tests
     val inputRow = InternalRow.fromSeq(Seq(
       false, 3.toByte, 15.toShort, -83, 129L, 1.0f, 8.0, UTF8String.fromString("test"),
-      Decimal(255), CalendarInterval.fromString("interval 1 day"), Array[Byte](1, 2)
+      Decimal(255), IntervalUtils.stringToInterval(UTF8String.fromString( "interval 1 day")),
+        Array[Byte](1, 2)
     ))
     val fields1 = Array(
       BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
index 84b3cc79cef5e..303fa137d8925 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
+import java.sql.Date
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
@@ -26,10 +28,9 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, BoundReference, Cast, CreateArray, DecimalLiteral, GenericInternalRow, Literal}
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile.{PercentileDigest, PercentileDigestSerializer}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.catalyst.util.ArrayData
-import org.apache.spark.sql.catalyst.util.QuantileSummaries
+import org.apache.spark.sql.catalyst.util.{ArrayData, QuantileSummaries}
 import org.apache.spark.sql.catalyst.util.QuantileSummaries.Stats
-import org.apache.spark.sql.types.{ArrayType, DoubleType, IntegerType}
+import org.apache.spark.sql.types.{ArrayType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType, LongType}
 import org.apache.spark.util.SizeEstimator
 
 class ApproximatePercentileSuite extends SparkFunSuite {
@@ -239,16 +240,18 @@ class ApproximatePercentileSuite extends SparkFunSuite {
       accuracyExpression = Literal(-1))
     assertEqual(
       wrongAccuracy.checkInputDataTypes(),
-      TypeCheckFailure(
-        "The accuracy provided must be a positive integer literal (current value = -1)"))
+      TypeCheckFailure(s"The accuracy provided must be a literal between (0, ${Int.MaxValue}]" +
+        " (current value = -1)"))
 
-    val correctPercentageExpresions = Seq(
+    val correctPercentageExpressions = Seq(
+      Literal(0.1f, FloatType),
+      Literal(Decimal(0.2), DecimalType(2, 1)),
       Literal(0D),
       Literal(1D),
       Literal(0.5D),
       CreateArray(Seq(0D, 1D, 0.5D).map(Literal(_)))
     )
-    correctPercentageExpresions.foreach { percentageExpression =>
+    correctPercentageExpressions.foreach { percentageExpression =>
       val correctPercentage = new ApproximatePercentile(
         AttributeReference("a", DoubleType)(),
         percentageExpression = percentageExpression,
@@ -281,10 +284,15 @@ class ApproximatePercentileSuite extends SparkFunSuite {
   test("class ApproximatePercentile, automatically add type casting for parameters") {
     val testRelation = LocalRelation('a.int)
 
-    // Compatible accuracy types: Long type and decimal type
-    val accuracyExpressions = Seq(Literal(1000L), DecimalLiteral(10000), Literal(123.0D))
-    // Compatible percentage types: float, decimal
+    // accuracy types must be integral, no type casting
+    val accuracyExpressions = Seq(
+      Literal(1.toByte),
+      Literal(100.toShort),
+      Literal(100),
+      Literal(1000L))
+    // Compatible percentage types: float, decimal, string
     val percentageExpressions = Seq(Literal(0.3f), DecimalLiteral(0.5),
+      Literal("0.2"),
       CreateArray(Seq(Literal(0.3f), Literal(0.5D), DecimalLiteral(0.7))))
 
     accuracyExpressions.foreach { accuracyExpression =>
@@ -300,13 +308,52 @@ class ApproximatePercentileSuite extends SparkFunSuite {
             assert(agg.child.dataType == IntegerType)
             assert(agg.percentageExpression.dataType == DoubleType ||
               agg.percentageExpression.dataType == ArrayType(DoubleType, containsNull = false))
-            assert(agg.accuracyExpression.dataType == IntegerType)
+            assert(agg.accuracyExpression.dataType.isInstanceOf[IntegralType])
           case _ => fail()
         }
       }
     }
   }
 
+  test("ApproximatePercentile: nulls in percentage expression") {
+
+    assert(new ApproximatePercentile(
+      AttributeReference("a", DoubleType)(),
+      percentageExpression = Literal(null, DoubleType)).checkInputDataTypes() ===
+      TypeCheckFailure("Percentage value must not be null"))
+
+    val nullPercentageExprs =
+      Seq(CreateArray(Seq(null).map(Literal(_))), CreateArray(Seq(0.1D, null).map(Literal(_))))
+    nullPercentageExprs.foreach {
+      percentageExpression =>
+        val wrongPercentage = new ApproximatePercentile(
+          AttributeReference("a", DoubleType)(),
+          percentageExpression = percentageExpression,
+          accuracyExpression = Literal(100))
+        assert(
+          wrongPercentage.checkInputDataTypes() match {
+            case TypeCheckFailure(msg)
+                if msg.contains("argument 2 requires (double or array<double>) type") =>
+              true
+            case _ => false
+          })
+    }
+  }
+
+  test("ApproximatePercentile: invalid accuracy expressions") {
+    val invalidAccuracies = Seq(null, 1.2f, 1.9d, BigDecimal(1.9), new Date(0), "1.5")
+    invalidAccuracies.foreach { acc =>
+      val wrongPercentage = new ApproximatePercentile(
+        AttributeReference("a", DoubleType)(),
+        percentageExpression = Literal(0.5),
+        accuracyExpression = Literal(acc))
+      assert(wrongPercentage.checkInputDataTypes() match {
+        case TypeCheckFailure(msg) if msg.contains("argument 3 requires integral type") => true
+        case _ => false
+      })
+    }
+  }
+
   test("class ApproximatePercentile, null handling") {
     val childExpression = Cast(BoundReference(0, IntegerType, nullable = true), DoubleType)
     val agg = new ApproximatePercentile(childExpression, Literal(0.5D))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
index 0e0c8e167a0a7..972db7fa30a91 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
@@ -21,7 +21,10 @@ import org.apache.spark.SparkException
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult._
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.util.ArrayData
 import org.apache.spark.sql.types._
 import org.apache.spark.util.collection.OpenHashMap
@@ -81,7 +84,7 @@ class PercentileSuite extends SparkFunSuite {
 
   private def runTest(agg: Percentile,
         rows : Seq[Seq[Any]],
-        expectedPercentiles : Seq[Double]) {
+        expectedPercentiles : Seq[Double]): Unit = {
     assert(agg.nullable)
     val group1 = (0 until rows.length / 2)
     val group1Buffer = agg.createAggregationBuffer()
@@ -243,6 +246,51 @@ class PercentileSuite extends SparkFunSuite {
     }
   }
 
+  test("class ApproximatePercentile, automatically add type casting for parameters") {
+    val testRelation = LocalRelation(AttributeReference("a", IntegerType)())
+
+    // Compatible percentage types: float, decimal, string
+    val percentageExpressions = Seq(Literal(0.3f), DecimalLiteral(0.5), Literal("0.2"),
+      CreateArray(Seq(Literal(0.3f), Literal(0.5D), DecimalLiteral(0.7))))
+
+    percentageExpressions.foreach { percentageExpression =>
+      val agg = new Percentile(
+        UnresolvedAttribute("a"),
+        percentageExpression)
+      val analyzed = testRelation.select(agg).analyze.expressions.head
+      analyzed match {
+        case Alias(agg: Percentile, _) =>
+          assert(agg.resolved)
+          assert(agg.child.dataType == IntegerType)
+          assert(agg.percentageExpression.dataType == DoubleType ||
+            agg.percentageExpression.dataType == ArrayType(DoubleType, containsNull = false))
+        case _ => fail()
+      }
+    }
+  }
+
+  test("nulls in percentage expression") {
+
+    assert(new Percentile(
+      AttributeReference("a", DoubleType)(),
+      percentageExpression = Literal(null, DoubleType)).checkInputDataTypes() ===
+      TypeCheckFailure("Percentage value must not be null"))
+
+    val nullPercentageExprs =
+      Seq(CreateArray(Seq(null).map(Literal(_))), CreateArray(Seq(0.1D, null).map(Literal(_))))
+
+    nullPercentageExprs.foreach { percentageExpression =>
+        val wrongPercentage = new Percentile(
+          AttributeReference("a", DoubleType)(),
+          percentageExpression = percentageExpression)
+        assert(
+          wrongPercentage.checkInputDataTypes() match {
+            case TypeCheckFailure(msg) if msg.contains("argument 2 requires array<double>") => true
+            case _ => false
+          })
+    }
+  }
+
   test("null handling") {
 
     // Percentile without frequency column
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
index 55569b6f2933e..67e3bc69543e8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
@@ -37,6 +37,18 @@ class CodeBlockSuite extends SparkFunSuite {
     assert(code.asInstanceOf[CodeBlock].blockInputs === Seq(value))
   }
 
+  test("Code parts should be treated for escapes, but string inputs shouldn't be") {
+    val strlit = raw"\\"
+    val code = code"""String s = "foo\\bar" + "$strlit";"""
+
+    val builtin = s"""String s = "foo\\bar" + "$strlit";"""
+
+    val expected = raw"""String s = "foo\bar" + "\\";"""
+
+    assert(builtin == expected)
+    assert(code.asInstanceOf[CodeBlock].toString == expected)
+  }
+
   test("Block.stripMargin") {
     val isNull = JavaCode.isNullVariable("expr1_isNull")
     val value = JavaCode.variable("expr1", IntegerType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
index 75c6beeb32150..81e29931e41dd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
@@ -105,7 +105,8 @@ class GenerateUnsafeRowJoinerSuite extends SparkFunSuite {
     }
   }
 
-  private def testConcatOnce(numFields1: Int, numFields2: Int, candidateTypes: Seq[DataType]) {
+  private def testConcatOnce(numFields1: Int, numFields2: Int,
+      candidateTypes: Seq[DataType]): Unit = {
     info(s"schema size $numFields1, $numFields2")
     val random = new Random()
     val schema1 = RandomDataGenerator.randomSchema(random, numFields1, candidateTypes)
@@ -129,7 +130,7 @@ class GenerateUnsafeRowJoinerSuite extends SparkFunSuite {
       schema1: StructType,
       row1: UnsafeRow,
       schema2: StructType,
-      row2: UnsafeRow) {
+      row2: UnsafeRow): Unit = {
 
     // Run the joiner.
     val mergedSchema = StructType(schema1 ++ schema2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
index 86b8fa54c0fd4..eaed279679251 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeRowWriterSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types.Decimal
+import org.apache.spark.unsafe.types.CalendarInterval
 
 class UnsafeRowWriterSuite extends SparkFunSuite {
 
@@ -49,4 +50,15 @@ class UnsafeRowWriterSuite extends SparkFunSuite {
     // The two rows should be the equal
     assert(res1 == res2)
   }
+
+  test("write and get calendar intervals through UnsafeRowWriter") {
+    val rowWriter = new UnsafeRowWriter(2)
+    rowWriter.resetRowWriter()
+    rowWriter.write(0, null.asInstanceOf[CalendarInterval])
+    assert(rowWriter.getRow.isNullAt(0))
+    assert(rowWriter.getRow.getInterval(0) === null)
+    val interval = new CalendarInterval(0, 1, 0)
+    rowWriter.write(1, interval)
+    assert(rowWriter.getRow.getInterval(1) === interval)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala
index 9b27490ed0e35..2bb948ec24fb3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonGeneratorSuite.scala
@@ -39,6 +39,33 @@ class JacksonGeneratorSuite extends SparkFunSuite {
     assert(writer.toString === """{"a":1}""")
   }
 
+  test("SPARK-29444: initial with StructType and write out an empty row " +
+      "with ignoreNullFields=false") {
+    val dataType = StructType(StructField("a", IntegerType) :: Nil)
+    val input = InternalRow(null)
+    val writer = new CharArrayWriter()
+    val allowNullOption =
+      new JSONOptions(Map("ignoreNullFields" -> "false"), gmtId)
+    val gen = new JacksonGenerator(dataType, writer, allowNullOption)
+    gen.write(input)
+    gen.flush()
+    assert(writer.toString === """{"a":null}""")
+  }
+
+  test("SPARK-29444: initial with StructType field and write out a row " +
+    "with ignoreNullFields=false and struct inner null") {
+    val fieldType = StructType(StructField("b", IntegerType) :: Nil)
+    val dataType = StructType(StructField("a", fieldType) :: Nil)
+    val input = InternalRow(InternalRow(null))
+    val writer = new CharArrayWriter()
+    val allowNullOption =
+      new JSONOptions(Map("ignoreNullFields" -> "false"), gmtId)
+    val gen = new JacksonGenerator(dataType, writer, allowNullOption)
+    gen.write(input)
+    gen.flush()
+    assert(writer.toString === """{"a":{"b":null}}""")
+  }
+
   test("initial with StructType and write out rows") {
     val dataType = StructType(StructField("a", IntegerType) :: Nil)
     val input = new GenericArrayData(InternalRow(1) :: InternalRow(2) :: Nil)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
index 8ce45f06ba65d..c2e03bd2c3609 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.catalyst.json
 
-import com.fasterxml.jackson.core.JsonFactory
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
@@ -28,8 +27,7 @@ class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
   def checkType(options: Map[String, String], json: String, dt: DataType): Unit = {
     val jsonOptions = new JSONOptions(options, "UTC", "")
     val inferSchema = new JsonInferSchema(jsonOptions)
-    val factory = new JsonFactory()
-    jsonOptions.setJacksonOptions(factory)
+    val factory = jsonOptions.buildJsonFactory()
     val parser = CreateJacksonParser.string(factory, json)
     parser.nextToken()
     val expectedType = StructType(Seq(StructField("a", dt, true)))
@@ -42,45 +40,61 @@ class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("inferring timestamp type") {
-    checkTimestampType("yyyy", """{"a": "2018"}""")
-    checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
-    checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
-    checkTimestampType(
-      "yyyy-MM-dd'T'HH:mm:ss.SSS",
-      """{"a": "2018-12-02T21:04:00.123"}""")
-    checkTimestampType(
-      "yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
-      """{"a": "2018-12-02T21:04:00.123567+01:00"}""")
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        checkTimestampType("yyyy", """{"a": "2018"}""")
+        checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
+        checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
+        checkTimestampType(
+          "yyyy-MM-dd'T'HH:mm:ss.SSS",
+          """{"a": "2018-12-02T21:04:00.123"}""")
+        checkTimestampType(
+          "yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
+          """{"a": "2018-12-02T21:04:00.123567+01:00"}""")
+      }
+    }
   }
 
   test("prefer decimals over timestamps") {
-    checkType(
-      options = Map(
-        "prefersDecimal" -> "true",
-        "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
-      ),
-      json = """{"a": "20181202.210400123"}""",
-      dt = DecimalType(17, 9)
-    )
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        checkType(
+          options = Map(
+            "prefersDecimal" -> "true",
+            "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
+          ),
+          json = """{"a": "20181202.210400123"}""",
+          dt = DecimalType(17, 9)
+        )
+      }
+    }
   }
 
   test("skip decimal type inferring") {
-    checkType(
-      options = Map(
-        "prefersDecimal" -> "false",
-        "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
-      ),
-      json = """{"a": "20181202.210400123"}""",
-      dt = TimestampType
-    )
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        checkType(
+          options = Map(
+            "prefersDecimal" -> "false",
+            "timestampFormat" -> "yyyyMMdd.HHmmssSSS"
+          ),
+          json = """{"a": "20181202.210400123"}""",
+          dt = TimestampType
+        )
+      }
+    }
   }
 
   test("fallback to string type") {
-    checkType(
-      options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
-      json = """{"a": "20181202.210400123"}""",
-      dt = StringType
-    )
+    Seq(true, false).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        checkType(
+          options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
+          json = """{"a": "20181202.210400123"}""",
+          dt = StringType
+        )
+      }
+    }
   }
 
   test("disable timestamp inferring") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
index 5794691a365a9..9c71cc8e0d291 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLite
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
 class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper {
@@ -33,6 +34,8 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
     val batches =
       Batch("AnalysisNodes", Once,
         EliminateSubqueryAliases) ::
+      Batch("Infer Filters", Once,
+          InferFiltersFromConstraints) ::
       Batch("Constant Folding", FixedPoint(50),
         NullPropagation,
         ConstantFolding,
@@ -44,12 +47,15 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
   val nullableRelation = LocalRelation('a.int.withNullability(true))
   val nonNullableRelation = LocalRelation('a.int.withNullability(false))
 
-  test("Preserve nullable exprs in general") {
-    for (e <- Seq('a === 'a, 'a <= 'a, 'a >= 'a, 'a < 'a, 'a > 'a)) {
-      val plan = nullableRelation.where(e).analyze
-      val actual = Optimize.execute(plan)
-      val correctAnswer = plan
-      comparePlans(actual, correctAnswer)
+  test("Preserve nullable exprs when constraintPropagation is false") {
+    withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
+      val a = Symbol("a")
+      for (e <- Seq(a === a, a <= a, a >= a, a < a, a > a)) {
+        val plan = nullableRelation.where(e).analyze
+        val actual = Optimize.execute(plan)
+        val correctAnswer = plan
+        comparePlans(actual, correctAnswer)
+      }
     }
   }
 
@@ -122,4 +128,51 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("Simplify null and nonnull with filter constraints") {
+    val a = Symbol("a")
+    Seq(a === a, a <= a, a >= a, a < a, a > a).foreach { condition =>
+      val plan = nonNullableRelation.where(condition).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = nonNullableRelation.analyze
+      comparePlans(actual, correctAnswer)
+    }
+
+    // infer filter constraints will add IsNotNull
+    Seq(a === a, a <= a, a >= a).foreach { condition =>
+      val plan = nullableRelation.where(condition).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = nullableRelation.where('a.isNotNull).analyze
+      comparePlans(actual, correctAnswer)
+    }
+
+    Seq(a < a, a > a).foreach { condition =>
+      val plan = nullableRelation.where(condition).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = nullableRelation.analyze
+      comparePlans(actual, correctAnswer)
+    }
+  }
+
+  test("Simplify nullable without constraints propagation") {
+    withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
+      val a = Symbol("a")
+      Seq(And(a === a, a.isNotNull),
+        And(a <= a, a.isNotNull),
+        And(a >= a, a.isNotNull)).foreach { condition =>
+        val plan = nullableRelation.where(condition).analyze
+        val actual = Optimize.execute(plan)
+        val correctAnswer = nullableRelation.where('a.isNotNull).analyze
+        comparePlans(actual, correctAnswer)
+      }
+
+      Seq(And(a < a, a.isNotNull), And(a > a, a.isNotNull))
+        .foreach { condition =>
+        val plan = nullableRelation.where(condition).analyze
+        val actual = Optimize.execute(plan)
+        val correctAnswer = nullableRelation.analyze
+        comparePlans(actual, correctAnswer)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index a0de5f6930958..a8b8417754b00 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -239,6 +239,11 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
     checkCondition(!'f || 'e, testRelationWithData.where(!'f || 'e).analyze)
   }
 
+  test("simplify NOT(IsNull(x)) and NOT(IsNotNull(x))") {
+    checkCondition(Not(IsNotNull('b)), IsNull('b))
+    checkCondition(Not(IsNull('b)), IsNotNull('b))
+  }
+
   protected def assertEquivalent(e1: Expression, e2: Expression): Unit = {
     val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation()).analyze
     val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation()).analyze)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 75ff07637fccc..5be37318ae6eb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -109,7 +109,7 @@ class ColumnPruningSuite extends PlanTest {
         replacedGenerator: Seq[String] => Generator,
         aliasedExprs: Seq[String] => Seq[Expression],
         unrequiredChildIndex: Seq[Int],
-        generatorOutputNames: Seq[String]) {
+        generatorOutputNames: Seq[String]): Unit = {
       withSQLConf(SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> "true") {
         val structType = StructType.fromDDL("d double, e array<string>, f double, g double, " +
           "h array<struct<h1: int, h2: double>>")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 641c89873dcc4..23ab6b2df3e64 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -196,8 +196,8 @@ class ConstantFoldingSuite extends PlanTest {
       EqualTo(Literal.create(null, IntegerType), 1) as 'c11,
       EqualTo(1, Literal.create(null, IntegerType)) as 'c12,
 
-      Like(Literal.create(null, StringType), "abc") as 'c13,
-      Like("abc", Literal.create(null, StringType)) as 'c14,
+      new Like(Literal.create(null, StringType), "abc") as 'c13,
+      new Like("abc", Literal.create(null, StringType)) as 'c14,
 
       Upper(Literal.create(null, StringType)) as 'c15,
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantPropagationSuite.scala
index 94174eec8fd0f..171ac4e3091c3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantPropagationSuite.scala
@@ -40,11 +40,12 @@ class ConstantPropagationSuite extends PlanTest {
           BooleanSimplification) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.int.notNull)
 
   private val columnA = 'a
   private val columnB = 'b
   private val columnC = 'c
+  private val columnD = 'd
 
   test("basic test") {
     val query = testRelation
@@ -163,4 +164,26 @@ class ConstantPropagationSuite extends PlanTest {
 
     comparePlans(Optimize.execute(query.analyze), correctAnswer)
   }
+
+  test("SPARK-30447: take nullability into account") {
+    val query = testRelation
+      .select(columnA)
+      .where(!(columnA === Literal(1) && Add(columnA, 1) === Literal(1)))
+      .analyze
+    val correctAnswer = testRelation
+      .select(columnA)
+      .where(columnA =!= Literal(1) || Add(columnA, 1) =!= Literal(1))
+      .analyze
+    comparePlans(Optimize.execute(query), correctAnswer)
+
+    val query2 = testRelation
+      .select(columnD)
+      .where(!(columnD === Literal(1) && Add(columnD, 1) === Literal(1)))
+      .analyze
+    val correctAnswer2 = testRelation
+      .select(columnD)
+      .where(true)
+      .analyze
+    comparePlans(Optimize.execute(query2), correctAnswer2)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
index e318f36d78270..d9a6fbf81de91 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -27,6 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, ORDER_BY_ORDINAL}
+import org.apache.spark.sql.types.IntegerType
 
 class EliminateSortsSuite extends PlanTest {
   override val conf = new SQLConf().copy(CASE_SENSITIVE -> true, ORDER_BY_ORDINAL -> false)
@@ -35,12 +37,22 @@ class EliminateSortsSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
-      Batch("Eliminate Sorts", FixedPoint(10),
+      Batch("Default", FixedPoint(10),
         FoldablePropagation,
-        EliminateSorts) :: Nil
+        LimitPushDown) ::
+      Batch("Eliminate Sorts", Once,
+        EliminateSorts) ::
+      Batch("Collapse Project", Once,
+        CollapseProject) :: Nil
+  }
+
+  object PushDownOptimizer extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Limit PushDown", FixedPoint(10), LimitPushDown) :: Nil
   }
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelationB = LocalRelation('d.int)
 
   test("Empty order by clause") {
     val x = testRelation
@@ -83,4 +95,217 @@ class EliminateSortsSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("remove redundant order by") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
+    val unnecessaryReordered = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc_nullsFirst)
+    val optimized = Optimize.execute(unnecessaryReordered.analyze)
+    val correctAnswer = orderedPlan.limit(2).select('a).analyze
+    comparePlans(Optimize.execute(optimized), correctAnswer)
+  }
+
+  test("do not remove sort if the order is different") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
+    val reorderedDifferently = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc)
+    val optimized = Optimize.execute(reorderedDifferently.analyze)
+    val correctAnswer = reorderedDifferently.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("filters don't affect order") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
+    val filteredAndReordered = orderedPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc)
+    val optimized = Optimize.execute(filteredAndReordered.analyze)
+    val correctAnswer = orderedPlan.where('a > Literal(10)).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("limits don't affect order") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
+    val filteredAndReordered = orderedPlan.limit(Literal(10)).orderBy('a.asc, 'b.desc)
+    val optimized = Optimize.execute(filteredAndReordered.analyze)
+    val correctAnswer = orderedPlan.limit(Literal(10)).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("different sorts are not simplified if limit is in between") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('b.desc).limit(Literal(10))
+      .orderBy('a.asc)
+    val optimized = Optimize.execute(orderedPlan.analyze)
+    val correctAnswer = orderedPlan.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("range is already sorted") {
+    val inputPlan = Range(1L, 1000L, 1, 10)
+    val orderedPlan = inputPlan.orderBy('id.asc)
+    val optimized = Optimize.execute(orderedPlan.analyze)
+    val correctAnswer = inputPlan.analyze
+    comparePlans(optimized, correctAnswer)
+
+    val reversedPlan = inputPlan.orderBy('id.desc)
+    val reversedOptimized = Optimize.execute(reversedPlan.analyze)
+    val reversedCorrectAnswer = reversedPlan.analyze
+    comparePlans(reversedOptimized, reversedCorrectAnswer)
+
+    val negativeStepInputPlan = Range(10L, 1L, -1, 10)
+    val negativeStepOrderedPlan = negativeStepInputPlan.orderBy('id.desc)
+    val negativeStepOptimized = Optimize.execute(negativeStepOrderedPlan.analyze)
+    val negativeStepCorrectAnswer = negativeStepInputPlan.analyze
+    comparePlans(negativeStepOptimized, negativeStepCorrectAnswer)
+  }
+
+  test("sort should not be removed when there is a node which doesn't guarantee any order") {
+    val orderedPlan = testRelation.select('a, 'b)
+    val groupedAndResorted = orderedPlan.groupBy('a)(sum('a)).orderBy('a.asc)
+    val optimized = Optimize.execute(groupedAndResorted.analyze)
+    val correctAnswer = groupedAndResorted.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("remove two consecutive sorts") {
+    val orderedTwice = testRelation.orderBy('a.asc).orderBy('b.desc)
+    val optimized = Optimize.execute(orderedTwice.analyze)
+    val correctAnswer = testRelation.orderBy('b.desc).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("remove sorts separated by Filter/Project operators") {
+    val orderedTwiceWithProject = testRelation.orderBy('a.asc).select('b).orderBy('b.desc)
+    val optimizedWithProject = Optimize.execute(orderedTwiceWithProject.analyze)
+    val correctAnswerWithProject = testRelation.select('b).orderBy('b.desc).analyze
+    comparePlans(optimizedWithProject, correctAnswerWithProject)
+
+    val orderedTwiceWithFilter =
+      testRelation.orderBy('a.asc).where('b > Literal(0)).orderBy('b.desc)
+    val optimizedWithFilter = Optimize.execute(orderedTwiceWithFilter.analyze)
+    val correctAnswerWithFilter = testRelation.where('b > Literal(0)).orderBy('b.desc).analyze
+    comparePlans(optimizedWithFilter, correctAnswerWithFilter)
+
+    val orderedTwiceWithBoth =
+      testRelation.orderBy('a.asc).select('b).where('b > Literal(0)).orderBy('b.desc)
+    val optimizedWithBoth = Optimize.execute(orderedTwiceWithBoth.analyze)
+    val correctAnswerWithBoth =
+      testRelation.select('b).where('b > Literal(0)).orderBy('b.desc).analyze
+    comparePlans(optimizedWithBoth, correctAnswerWithBoth)
+
+    val orderedThrice = orderedTwiceWithBoth.select(('b + 1).as('c)).orderBy('c.asc)
+    val optimizedThrice = Optimize.execute(orderedThrice.analyze)
+    val correctAnswerThrice = testRelation.select('b).where('b > Literal(0))
+      .select(('b + 1).as('c)).orderBy('c.asc).analyze
+    comparePlans(optimizedThrice, correctAnswerThrice)
+  }
+
+  test("remove orderBy in groupBy clause with count aggs") {
+    val projectPlan = testRelation.select('a, 'b)
+    val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val groupByPlan = unnecessaryOrderByPlan.groupBy('a)(count(1))
+    val optimized = Optimize.execute(groupByPlan.analyze)
+    val correctAnswer = projectPlan.groupBy('a)(count(1)).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("remove orderBy in groupBy clause with sum aggs") {
+    val projectPlan = testRelation.select('a, 'b)
+    val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val groupByPlan = unnecessaryOrderByPlan.groupBy('a)(sum('a) + 10 as "sum")
+    val optimized = Optimize.execute(groupByPlan.analyze)
+    val correctAnswer = projectPlan.groupBy('a)(sum('a) + 10 as "sum").analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("should not remove orderBy in groupBy clause with first aggs") {
+    val projectPlan = testRelation.select('a, 'b)
+    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val groupByPlan = orderByPlan.groupBy('a)(first('a))
+    val optimized = Optimize.execute(groupByPlan.analyze)
+    val correctAnswer = groupByPlan.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("should not remove orderBy in groupBy clause with first and count aggs") {
+    val projectPlan = testRelation.select('a, 'b)
+    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val groupByPlan = orderByPlan.groupBy('a)(first('a), count(1))
+    val optimized = Optimize.execute(groupByPlan.analyze)
+    val correctAnswer = groupByPlan.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("should not remove orderBy in groupBy clause with PythonUDF as aggs") {
+    val pythonUdf = PythonUDF("pyUDF", null,
+      IntegerType, Seq.empty, PythonEvalType.SQL_BATCHED_UDF, true)
+    val projectPlan = testRelation.select('a, 'b)
+    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val groupByPlan = orderByPlan.groupBy('a)(pythonUdf)
+    val optimized = Optimize.execute(groupByPlan.analyze)
+    val correctAnswer = groupByPlan.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("should not remove orderBy in groupBy clause with ScalaUDF as aggs") {
+    val scalaUdf = ScalaUDF((s: Int) => s, IntegerType, 'a :: Nil, true :: Nil)
+    val projectPlan = testRelation.select('a, 'b)
+    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val groupByPlan = orderByPlan.groupBy('a)(scalaUdf)
+    val optimized = Optimize.execute(groupByPlan.analyze)
+    val correctAnswer = groupByPlan.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("should not remove orderBy with limit in groupBy clause") {
+    val projectPlan = testRelation.select('a, 'b)
+    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc).limit(10)
+    val groupByPlan = orderByPlan.groupBy('a)(count(1))
+    val optimized = Optimize.execute(groupByPlan.analyze)
+    val correctAnswer = groupByPlan.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("remove orderBy in join clause") {
+    val projectPlan = testRelation.select('a, 'b)
+    val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val projectPlanB = testRelationB.select('d)
+    val joinPlan = unnecessaryOrderByPlan.join(projectPlanB).select('a, 'd)
+    val optimized = Optimize.execute(joinPlan.analyze)
+    val correctAnswer = projectPlan.join(projectPlanB).select('a, 'd).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("should not remove orderBy with limit in join clause") {
+    val projectPlan = testRelation.select('a, 'b)
+    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc).limit(10)
+    val projectPlanB = testRelationB.select('d)
+    val joinPlan = orderByPlan.join(projectPlanB).select('a, 'd)
+    val optimized = Optimize.execute(joinPlan.analyze)
+    val correctAnswer = joinPlan.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("should not remove orderBy in left join clause if there is an outer limit") {
+    val projectPlan = testRelation.select('a, 'b)
+    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val projectPlanB = testRelationB.select('d)
+    val joinPlan = orderByPlan
+      .join(projectPlanB, LeftOuter)
+      .limit(10)
+    val optimized = Optimize.execute(joinPlan.analyze)
+    val correctAnswer = PushDownOptimizer.execute(joinPlan.analyze)
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("remove orderBy in right join clause event if there is an outer limit") {
+    val projectPlan = testRelation.select('a, 'b)
+    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val projectPlanB = testRelationB.select('d)
+    val joinPlan = orderByPlan
+      .join(projectPlanB, RightOuter)
+      .limit(10)
+    val optimized = Optimize.execute(joinPlan.analyze)
+    val noOrderByPlan = projectPlan
+      .join(projectPlanB, RightOuter)
+      .limit(10)
+    val correctAnswer = PushDownOptimizer.execute(noOrderByPlan.analyze)
+    comparePlans(optimized, correctAnswer)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 3ec8d18bc871d..70e29dca46e9e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -1154,7 +1154,7 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("watermark pushdown: no pushdown on watermark attribute #1") {
-    val interval = new CalendarInterval(2, 2000L)
+    val interval = new CalendarInterval(2, 2, 2000L)
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
@@ -1169,7 +1169,7 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("watermark pushdown: no pushdown for nondeterministic filter") {
-    val interval = new CalendarInterval(2, 2000L)
+    val interval = new CalendarInterval(2, 2, 2000L)
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
@@ -1184,7 +1184,7 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("watermark pushdown: full pushdown") {
-    val interval = new CalendarInterval(2, 2000L)
+    val interval = new CalendarInterval(2, 2, 2000L)
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
@@ -1198,7 +1198,7 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("watermark pushdown: no pushdown on watermark attribute #2") {
-    val interval = new CalendarInterval(2, 2000L)
+    val interval = new CalendarInterval(2, 2, 2000L)
 
     val originalQuery = EventTimeWatermark('a, interval, testRelation)
       .where('a === 5 && 'b === 10)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
index 974bc781d36ab..79bd573f1d84a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{IntegerType, LongType}
 
 class InferFiltersFromConstraintsSuite extends PlanTest {
 
@@ -46,8 +47,8 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
       y: LogicalPlan,
       expectedLeft: LogicalPlan,
       expectedRight: LogicalPlan,
-      joinType: JoinType) = {
-    val condition = Some("x.a".attr === "y.a".attr)
+      joinType: JoinType,
+      condition: Option[Expression] = Some("x.a".attr === "y.a".attr)) = {
     val originalQuery = x.join(y, joinType, condition).analyze
     val correctAnswer = expectedLeft.join(expectedRight, joinType, condition).analyze
     val optimized = Optimize.execute(originalQuery)
@@ -263,4 +264,56 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
     val y = testRelation.subquery('y)
     testConstraintsAfterJoin(x, y, x.where(IsNotNull('a)), y, RightOuter)
   }
+
+  test("Constraints should be inferred from cast equality constraint(filter higher data type)") {
+    val testRelation1 = LocalRelation('a.int)
+    val testRelation2 = LocalRelation('b.long)
+    val originalLeft = testRelation1.subquery('left)
+    val originalRight = testRelation2.where('b === 1L).subquery('right)
+
+    val left = testRelation1.where(IsNotNull('a) && 'a.cast(LongType) === 1L).subquery('left)
+    val right = testRelation2.where(IsNotNull('b) && 'b === 1L).subquery('right)
+
+    Seq(Some("left.a".attr.cast(LongType) === "right.b".attr),
+      Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition =>
+      testConstraintsAfterJoin(originalLeft, originalRight, left, right, Inner, condition)
+    }
+
+    Seq(Some("left.a".attr === "right.b".attr.cast(IntegerType)),
+      Some("right.b".attr.cast(IntegerType) === "left.a".attr)).foreach { condition =>
+      testConstraintsAfterJoin(
+        originalLeft,
+        originalRight,
+        testRelation1.where(IsNotNull('a)).subquery('left),
+        right,
+        Inner,
+        condition)
+    }
+  }
+
+  test("Constraints shouldn't be inferred from cast equality constraint(filter lower data type)") {
+    val testRelation1 = LocalRelation('a.int)
+    val testRelation2 = LocalRelation('b.long)
+    val originalLeft = testRelation1.where('a === 1).subquery('left)
+    val originalRight = testRelation2.subquery('right)
+
+    val left = testRelation1.where(IsNotNull('a) && 'a === 1).subquery('left)
+    val right = testRelation2.where(IsNotNull('b)).subquery('right)
+
+    Seq(Some("left.a".attr.cast(LongType) === "right.b".attr),
+      Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition =>
+      testConstraintsAfterJoin(originalLeft, originalRight, left, right, Inner, condition)
+    }
+
+    Seq(Some("left.a".attr === "right.b".attr.cast(IntegerType)),
+      Some("right.b".attr.cast(IntegerType) === "left.a".attr)).foreach { condition =>
+      testConstraintsAfterJoin(
+        originalLeft,
+        originalRight,
+        left,
+        testRelation2.where(IsNotNull('b) && 'b.attr.cast(IntegerType) === 1).subquery('right),
+        Inner,
+        condition)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index 0f93305565224..3d81c567eff11 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -52,7 +52,8 @@ class JoinOptimizationSuite extends PlanTest {
     val y = testRelation1.subquery('y)
     val z = testRelation.subquery('z)
 
-    def testExtract(plan: LogicalPlan, expected: Option[(Seq[LogicalPlan], Seq[Expression])]) {
+    def testExtract(plan: LogicalPlan,
+        expected: Option[(Seq[LogicalPlan], Seq[Expression])]): Unit = {
       val expectedNoCross = expected map {
         seq_pair => {
           val plans = seq_pair._1
@@ -63,8 +64,8 @@ class JoinOptimizationSuite extends PlanTest {
       testExtractCheckCross(plan, expectedNoCross)
     }
 
-    def testExtractCheckCross
-        (plan: LogicalPlan, expected: Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression])]) {
+    def testExtractCheckCross(plan: LogicalPlan,
+        expected: Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression])]): Unit = {
       assert(
         ExtractFiltersAndInnerJoins.unapply(plan) === expected.map(e => (e._1, e._2)))
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
index f6d1898dc64a8..a3da9f73ebd40 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
@@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types.IntegerType
-import org.apache.spark.unsafe.types.CalendarInterval
 
 class LeftSemiPushdownSuite extends PlanTest {
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
index f64d10357ce2f..436f62e4225c8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
@@ -106,4 +106,14 @@ class LikeSimplificationSuite extends PlanTest {
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, testRelation.where(Literal(null, BooleanType)).analyze)
   }
+
+  test("test like escape syntax") {
+    val originalQuery1 = testRelation.where('a.like("abc#%", '#'))
+    val optimized1 = Optimize.execute(originalQuery1.analyze)
+    comparePlans(optimized1, originalQuery1.analyze)
+
+    val originalQuery2 = testRelation.where('a.like("abc#%abc", '#'))
+    val optimized2 = Optimize.execute(originalQuery2.analyze)
+    comparePlans(optimized2, originalQuery2.analyze)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
index 7a432d269abe6..d3b0a0e7d9347 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
@@ -17,10 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.log4j.{Appender, AppenderSkeleton, Level, Logger}
-import org.apache.log4j.spi.LoggingEvent
+import org.apache.log4j.Level
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -39,34 +36,8 @@ class OptimizerLoggingSuite extends PlanTest {
         ColumnPruning) :: Nil
   }
 
-  class MockAppender extends AppenderSkeleton {
-    val loggingEvents = new ArrayBuffer[LoggingEvent]()
-
-    override def append(loggingEvent: LoggingEvent): Unit = {
-      if (loggingEvent.getRenderedMessage().contains("Applying Rule") ||
-        loggingEvent.getRenderedMessage().contains("Result of Batch") ||
-        loggingEvent.getRenderedMessage().contains("has no effect")) {
-        loggingEvents.append(loggingEvent)
-      }
-    }
-
-    override def close(): Unit = {}
-    override def requiresLayout(): Boolean = false
-  }
-
-  private def withLogLevelAndAppender(level: Level, appender: Appender)(f: => Unit): Unit = {
-    val logger = Logger.getLogger(Optimize.getClass.getName.dropRight(1))
-    val restoreLevel = logger.getLevel
-    logger.setLevel(level)
-    logger.addAppender(appender)
-    try f finally {
-      logger.setLevel(restoreLevel)
-      logger.removeAppender(appender)
-    }
-  }
-
   private def verifyLog(expectedLevel: Level, expectedRulesOrBatches: Seq[String]): Unit = {
-    val logAppender = new MockAppender()
+    val logAppender = new LogAppender("optimizer rules")
     withLogAppender(logAppender,
         loggerName = Some(Optimize.getClass.getName.dropRight(1)), level = Some(Level.TRACE)) {
       val input = LocalRelation('a.int, 'b.string, 'c.double)
@@ -74,10 +45,16 @@ class OptimizerLoggingSuite extends PlanTest {
       val expected = input.where('a > 1).select('a).analyze
       comparePlans(Optimize.execute(query), expected)
     }
-    val logMessages = logAppender.loggingEvents.map(_.getRenderedMessage)
+    val events = logAppender.loggingEvents.filter {
+      case event => Seq(
+        "Applying Rule",
+        "Result of Batch",
+        "has no effect").exists(event.getRenderedMessage().contains)
+    }
+    val logMessages = events.map(_.getRenderedMessage)
     assert(expectedRulesOrBatches.forall
     (ruleOrBatch => logMessages.exists(_.contains(ruleOrBatch))))
-    assert(logAppender.loggingEvents.forall(_.getLevel == expectedLevel))
+    assert(events.forall(_.getLevel == expectedLevel))
   }
 
   test("test log level") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
index 2a8780346d99e..a277a2d339e91 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
@@ -28,7 +28,7 @@ class OptimizerRuleExclusionSuite extends PlanTest {
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
 
-  private def verifyExcludedRules(optimizer: Optimizer, rulesToExclude: Seq[String]) {
+  private def verifyExcludedRules(optimizer: Optimizer, rulesToExclude: Seq[String]): Unit = {
     val nonExcludableRules = optimizer.nonExcludableRules
 
     val excludedRuleNames = rulesToExclude.filter(!nonExcludableRules.contains(_))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
index 5e0d2041fac5d..5998437f11f4d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, NamedExpressio
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 
 
@@ -43,10 +44,10 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
   }
 
   object Optimize extends Optimizer(
-    new SessionCatalog(
-      new InMemoryCatalog,
-      EmptyFunctionRegistry,
-      new SQLConf())) {
+    new CatalogManager(
+      new SQLConf(),
+      FakeV2SessionCatalog,
+      new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf()))) {
     val newBatch = Batch("OptimizeRuleBreakSI", Once, OptimizeRuleBreakSI)
     override def defaultBatches: Seq[Batch] = Seq(newBatch) ++ super.defaultBatches
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
index 3802dbf5d6e06..2e0ab7f64f4d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
@@ -119,9 +119,13 @@ class RemoveRedundantAliasAndProjectSuite extends PlanTest with PredicateHelper
 
   test("do not remove output attributes from a subquery") {
     val relation = LocalRelation('a.int, 'b.int)
-    val query = Subquery(relation.select('a as "a", 'b as "b").where('b < 10).select('a).analyze)
+    val query = Subquery(
+      relation.select('a as "a", 'b as "b").where('b < 10).select('a).analyze,
+      correlated = false)
     val optimized = Optimize.execute(query)
-    val expected = Subquery(relation.select('a as "a", 'b).where('b < 10).select('a).analyze)
+    val expected = Subquery(
+      relation.select('a as "a", 'b).where('b < 10).select('a).analyze,
+      correlated = false)
     comparePlans(optimized, expected)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
deleted file mode 100644
index dae5e6f3ee3dd..0000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.rules._
-
-class RemoveRedundantSortsSuite extends PlanTest {
-
-  object Optimize extends RuleExecutor[LogicalPlan] {
-    val batches =
-      Batch("Remove Redundant Sorts", Once,
-        RemoveRedundantSorts) ::
-      Batch("Collapse Project", Once,
-        CollapseProject) :: Nil
-  }
-
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-
-  test("remove redundant order by") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
-    val unnecessaryReordered = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc_nullsFirst)
-    val optimized = Optimize.execute(unnecessaryReordered.analyze)
-    val correctAnswer = orderedPlan.limit(2).select('a).analyze
-    comparePlans(Optimize.execute(optimized), correctAnswer)
-  }
-
-  test("do not remove sort if the order is different") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
-    val reorderedDifferently = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc)
-    val optimized = Optimize.execute(reorderedDifferently.analyze)
-    val correctAnswer = reorderedDifferently.analyze
-    comparePlans(optimized, correctAnswer)
-  }
-
-  test("filters don't affect order") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
-    val filteredAndReordered = orderedPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc)
-    val optimized = Optimize.execute(filteredAndReordered.analyze)
-    val correctAnswer = orderedPlan.where('a > Literal(10)).analyze
-    comparePlans(optimized, correctAnswer)
-  }
-
-  test("limits don't affect order") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
-    val filteredAndReordered = orderedPlan.limit(Literal(10)).orderBy('a.asc, 'b.desc)
-    val optimized = Optimize.execute(filteredAndReordered.analyze)
-    val correctAnswer = orderedPlan.limit(Literal(10)).analyze
-    comparePlans(optimized, correctAnswer)
-  }
-
-  test("different sorts are not simplified if limit is in between") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('b.desc).limit(Literal(10))
-      .orderBy('a.asc)
-    val optimized = Optimize.execute(orderedPlan.analyze)
-    val correctAnswer = orderedPlan.analyze
-    comparePlans(optimized, correctAnswer)
-  }
-
-  test("range is already sorted") {
-    val inputPlan = Range(1L, 1000L, 1, 10)
-    val orderedPlan = inputPlan.orderBy('id.asc)
-    val optimized = Optimize.execute(orderedPlan.analyze)
-    val correctAnswer = inputPlan.analyze
-    comparePlans(optimized, correctAnswer)
-
-    val reversedPlan = inputPlan.orderBy('id.desc)
-    val reversedOptimized = Optimize.execute(reversedPlan.analyze)
-    val reversedCorrectAnswer = reversedPlan.analyze
-    comparePlans(reversedOptimized, reversedCorrectAnswer)
-
-    val negativeStepInputPlan = Range(10L, 1L, -1, 10)
-    val negativeStepOrderedPlan = negativeStepInputPlan.orderBy('id.desc)
-    val negativeStepOptimized = Optimize.execute(negativeStepOrderedPlan.analyze)
-    val negativeStepCorrectAnswer = negativeStepInputPlan.analyze
-    comparePlans(negativeStepOptimized, negativeStepCorrectAnswer)
-  }
-
-  test("sort should not be removed when there is a node which doesn't guarantee any order") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc)
-    val groupedAndResorted = orderedPlan.groupBy('a)(sum('a)).orderBy('a.asc)
-    val optimized = Optimize.execute(groupedAndResorted.analyze)
-    val correctAnswer = groupedAndResorted.analyze
-    comparePlans(optimized, correctAnswer)
-  }
-
-  test("remove two consecutive sorts") {
-    val orderedTwice = testRelation.orderBy('a.asc).orderBy('b.desc)
-    val optimized = Optimize.execute(orderedTwice.analyze)
-    val correctAnswer = testRelation.orderBy('b.desc).analyze
-    comparePlans(optimized, correctAnswer)
-  }
-
-  test("remove sorts separated by Filter/Project operators") {
-    val orderedTwiceWithProject = testRelation.orderBy('a.asc).select('b).orderBy('b.desc)
-    val optimizedWithProject = Optimize.execute(orderedTwiceWithProject.analyze)
-    val correctAnswerWithProject = testRelation.select('b).orderBy('b.desc).analyze
-    comparePlans(optimizedWithProject, correctAnswerWithProject)
-
-    val orderedTwiceWithFilter =
-      testRelation.orderBy('a.asc).where('b > Literal(0)).orderBy('b.desc)
-    val optimizedWithFilter = Optimize.execute(orderedTwiceWithFilter.analyze)
-    val correctAnswerWithFilter = testRelation.where('b > Literal(0)).orderBy('b.desc).analyze
-    comparePlans(optimizedWithFilter, correctAnswerWithFilter)
-
-    val orderedTwiceWithBoth =
-      testRelation.orderBy('a.asc).select('b).where('b > Literal(0)).orderBy('b.desc)
-    val optimizedWithBoth = Optimize.execute(orderedTwiceWithBoth.analyze)
-    val correctAnswerWithBoth =
-      testRelation.select('b).where('b > Literal(0)).orderBy('b.desc).analyze
-    comparePlans(optimizedWithBoth, correctAnswerWithBoth)
-
-    val orderedThrice = orderedTwiceWithBoth.select(('b + 1).as('c)).orderBy('c.asc)
-    val optimizedThrice = Optimize.execute(orderedThrice.analyze)
-    val correctAnswerThrice = testRelation.select('b).where('b > Literal(0))
-      .select(('b + 1).as('c)).orderBy('c.asc).analyze
-    comparePlans(optimizedThrice, correctAnswerThrice)
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 5452e72b38647..d55746002783a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -1,455 +1,455 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
-import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range}
-import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.catalyst.util.GenericArrayData
-import org.apache.spark.sql.types._
-
-/**
-* SPARK-18601 discusses simplification direct access to complex types creators.
-* i.e. {{{create_named_struct(square, `x` * `x`).square}}} can be simplified to {{{`x` * `x`}}}.
-* sam applies to create_array and create_map
-*/
-class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
-
-  object Optimizer extends RuleExecutor[LogicalPlan] {
-    val batches =
-      Batch("collapse projections", FixedPoint(10),
-          CollapseProject) ::
-      Batch("Constant Folding", FixedPoint(10),
-          NullPropagation,
-          ConstantFolding,
-          BooleanSimplification,
-          SimplifyConditionals,
-          SimplifyBinaryComparison,
-          SimplifyExtractValueOps) :: Nil
-  }
-
-  private val idAtt = ('id).long.notNull
-  private val nullableIdAtt = ('nullable_id).long
-
-  private val relation = LocalRelation(idAtt, nullableIdAtt)
-  private val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.double, 'e.int)
-
-  private def checkRule(originalQuery: LogicalPlan, correctAnswer: LogicalPlan) = {
-    val optimized = Optimizer.execute(originalQuery.analyze)
-    assert(optimized.resolved, "optimized plans must be still resolvable")
-    comparePlans(optimized, correctAnswer.analyze)
-  }
-
-  test("explicit get from namedStruct") {
-    val query = relation
-      .select(
-        GetStructField(
-          CreateNamedStruct(Seq("att", 'id )),
-          0,
-          None) as "outerAtt")
-    val expected = relation.select('id as "outerAtt")
-
-    checkRule(query, expected)
-  }
-
-  test("explicit get from named_struct- expression maintains original deduced alias") {
-    val query = relation
-      .select(GetStructField(CreateNamedStruct(Seq("att", 'id)), 0, None))
-
-    val expected = relation
-      .select('id as "named_struct(att, id).att")
-
-    checkRule(query, expected)
-  }
-
-  test("collapsed getStructField ontop of namedStruct") {
-    val query = relation
-      .select(CreateNamedStruct(Seq("att", 'id)) as "struct1")
-      .select(GetStructField('struct1, 0, None) as "struct1Att")
-    val expected = relation.select('id as "struct1Att")
-    checkRule(query, expected)
-  }
-
-  test("collapse multiple CreateNamedStruct/GetStructField pairs") {
-    val query = relation
-      .select(
-        CreateNamedStruct(Seq(
-          "att1", 'id,
-          "att2", 'id * 'id)) as "struct1")
-      .select(
-        GetStructField('struct1, 0, None) as "struct1Att1",
-        GetStructField('struct1, 1, None) as "struct1Att2")
-
-    val expected =
-      relation.
-        select(
-          'id as "struct1Att1",
-          ('id * 'id) as "struct1Att2")
-
-    checkRule(query, expected)
-  }
-
-  test("collapsed2 - deduced names") {
-    val query = relation
-      .select(
-        CreateNamedStruct(Seq(
-          "att1", 'id,
-          "att2", 'id * 'id)) as "struct1")
-      .select(
-        GetStructField('struct1, 0, None),
-        GetStructField('struct1, 1, None))
-
-    val expected =
-      relation.
-        select(
-          'id as "struct1.att1",
-          ('id * 'id) as "struct1.att2")
-
-    checkRule(query, expected)
-  }
-
-  test("simplified array ops") {
-    val rel = relation.select(
-      CreateArray(Seq(
-        CreateNamedStruct(Seq(
-          "att1", 'id,
-          "att2", 'id * 'id)),
-        CreateNamedStruct(Seq(
-          "att1", 'id + 1,
-          "att2", ('id + 1) * ('id + 1))
-       ))
-      ) as "arr"
-    )
-    val query = rel
-      .select(
-        GetArrayStructFields('arr, StructField("att1", LongType, false), 0, 1, false) as "a1",
-        GetArrayItem('arr, 1) as "a2",
-        GetStructField(GetArrayItem('arr, 1), 0, None) as "a3",
-        GetArrayItem(
-          GetArrayStructFields('arr,
-            StructField("att1", LongType, false),
-            0,
-            1,
-            false),
-          1) as "a4")
-
-    val expected = relation
-      .select(
-        CreateArray(Seq('id, 'id + 1L)) as "a1",
-        CreateNamedStruct(Seq(
-          "att1", ('id + 1L),
-          "att2", (('id + 1L) * ('id + 1L)))) as "a2",
-        ('id + 1L) as "a3",
-        ('id + 1L) as "a4")
-    checkRule(query, expected)
-  }
-
-  test("SPARK-22570: CreateArray should not create a lot of global variables") {
-    val ctx = new CodegenContext
-    CreateArray(Seq(Literal(1))).genCode(ctx)
-    assert(ctx.inlinedMutableStates.length == 0)
-  }
-
-  test("SPARK-23208: Test code splitting for create array related methods") {
-    val inputs = (1 to 2500).map(x => Literal(s"l_$x"))
-    checkEvaluation(CreateArray(inputs), new GenericArrayData(inputs.map(_.eval())))
-  }
-
-  test("simplify map ops") {
-    val rel = relation
-      .select(
-        CreateMap(Seq(
-          "r1", CreateNamedStruct(Seq("att1", 'id)),
-          "r2", CreateNamedStruct(Seq("att1", ('id + 1L))))) as "m")
-    val query = rel
-      .select(
-        GetMapValue('m, "r1") as "a1",
-        GetStructField(GetMapValue('m, "r1"), 0, None) as "a2",
-        GetMapValue('m, "r32") as "a3",
-        GetStructField(GetMapValue('m, "r32"), 0, None) as "a4")
-
-    val expected =
-      relation.select(
-        CreateNamedStruct(Seq("att1", 'id)) as "a1",
-        'id as "a2",
-        Literal.create(
-          null,
-          StructType(
-            StructField("att1", LongType, nullable = false) :: Nil
-          )
-        ) as "a3",
-        Literal.create(null, LongType) as "a4")
-    checkRule(query, expected)
-  }
-
-  test("simplify map ops, constant lookup, dynamic keys") {
-    val query = relation.select(
-      GetMapValue(
-        CreateMap(Seq(
-          'id, ('id + 1L),
-          ('id + 1L), ('id + 2L),
-          ('id + 2L), ('id + 3L),
-          Literal(13L), 'id,
-          ('id + 3L), ('id + 4L),
-          ('id + 4L), ('id + 5L))),
-        13L) as "a")
-
-    val expected = relation
-      .select(
-        CaseWhen(Seq(
-          (EqualTo(13L, 'id), ('id + 1L)),
-          (EqualTo(13L, ('id + 1L)), ('id + 2L)),
-          (EqualTo(13L, ('id + 2L)), ('id + 3L)),
-          (Literal(true), 'id))) as "a")
-    checkRule(query, expected)
-  }
-
-  test("simplify map ops, dynamic lookup, dynamic keys, lookup is equivalent to one of the keys") {
-    val query = relation
-      .select(
-        GetMapValue(
-          CreateMap(Seq(
-            'id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), ('id + 3L),
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))),
-            ('id + 3L)) as "a")
-    val expected = relation
-      .select(
-        CaseWhen(Seq(
-          (EqualTo('id + 3L, 'id), ('id + 1L)),
-          (EqualTo('id + 3L, ('id + 1L)), ('id + 2L)),
-          (EqualTo('id + 3L, ('id + 2L)), ('id + 3L)),
-          (Literal(true), ('id + 4L)))) as "a")
-    checkRule(query, expected)
-  }
-
-  test("simplify map ops, no positive match") {
-    val rel = relation
-      .select(
-        GetMapValue(
-          CreateMap(Seq(
-            'id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), ('id + 3L),
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))),
-          'id + 30L) as "a")
-    val expected = relation.select(
-      CaseWhen(Seq(
-        (EqualTo('id + 30L, 'id), ('id + 1L)),
-        (EqualTo('id + 30L, ('id + 1L)), ('id + 2L)),
-        (EqualTo('id + 30L, ('id + 2L)), ('id + 3L)),
-        (EqualTo('id + 30L, ('id + 3L)), ('id + 4L)),
-        (EqualTo('id + 30L, ('id + 4L)), ('id + 5L)))) as "a")
-    checkRule(rel, expected)
-  }
-
-  test("simplify map ops, constant lookup, mixed keys, eliminated constants") {
-    val rel = relation
-      .select(
-        GetMapValue(
-          CreateMap(Seq(
-            'id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), ('id + 3L),
-            Literal(14L), 'id,
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))),
-          13L) as "a")
-
-    val expected = relation
-      .select(
-        CaseKeyWhen(13L,
-          Seq('id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), ('id + 3L),
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))) as "a")
-
-    checkRule(rel, expected)
-  }
-
-  test("simplify map ops, potential dynamic match with null value + an absolute constant match") {
-    val rel = relation
-      .select(
-        GetMapValue(
-          CreateMap(Seq(
-            'id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), Literal.create(null, LongType),
-            Literal(2L), 'id,
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))),
-          2L ) as "a")
-
-    val expected = relation
-      .select(
-        CaseWhen(Seq(
-          (EqualTo(2L, 'id), ('id + 1L)),
-          // these two are possible matches, we can't tell until runtime
-          (EqualTo(2L, ('id + 1L)), ('id + 2L)),
-          (EqualTo(2L, 'id + 2L), Literal.create(null, LongType)),
-          // this is a definite match (two constants),
-          // but it cannot override a potential match with ('id + 2L),
-          // which is exactly what [[Coalesce]] would do in this case.
-          (Literal.TrueLiteral, 'id))) as "a")
-    checkRule(rel, expected)
-  }
-
-  test("SPARK-23500: Simplify array ops that are not at the top node") {
-    val query = LocalRelation('id.long)
-      .select(
-        CreateArray(Seq(
-          CreateNamedStruct(Seq(
-            "att1", 'id,
-            "att2", 'id * 'id)),
-          CreateNamedStruct(Seq(
-            "att1", 'id + 1,
-            "att2", ('id + 1) * ('id + 1))
-          ))
-        ) as "arr")
-      .select(
-        GetStructField(GetArrayItem('arr, 1), 0, None) as "a1",
-        GetArrayItem(
-          GetArrayStructFields('arr,
-            StructField("att1", LongType, nullable = false),
-            ordinal = 0,
-            numFields = 1,
-            containsNull = false),
-          ordinal = 1) as "a2")
-      .orderBy('id.asc)
-
-    val expected = LocalRelation('id.long)
-      .select(
-        ('id + 1L) as "a1",
-        ('id + 1L) as "a2")
-      .orderBy('id.asc)
-    checkRule(query, expected)
-  }
-
-  test("SPARK-23500: Simplify map ops that are not top nodes") {
-    val query =
-      LocalRelation('id.long)
-        .select(
-          CreateMap(Seq(
-            "r1", 'id,
-            "r2", 'id + 1L)) as "m")
-        .select(
-          GetMapValue('m, "r1") as "a1",
-          GetMapValue('m, "r32") as "a2")
-        .orderBy('id.asc)
-        .select('a1, 'a2)
-
-    val expected =
-      LocalRelation('id.long).select(
-        'id as "a1",
-        Literal.create(null, LongType) as "a2")
-        .orderBy('id.asc)
-    checkRule(query, expected)
-  }
-
-  test("SPARK-23500: Simplify complex ops that aren't at the plan root") {
-    val structRel = relation
-      .select(GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None) as "foo")
-      .groupBy($"foo")("1")
-    val structExpected = relation
-      .select('nullable_id as "foo")
-      .groupBy($"foo")("1")
-    checkRule(structRel, structExpected)
-
-    val arrayRel = relation
-      .select(GetArrayItem(CreateArray(Seq('nullable_id, 'nullable_id + 1L)), 0) as "a1")
-      .groupBy($"a1")("1")
-    val arrayExpected = relation.select('nullable_id as "a1").groupBy($"a1")("1")
-    checkRule(arrayRel, arrayExpected)
-
-    val mapRel = relation
-      .select(GetMapValue(CreateMap(Seq("id", 'nullable_id)), "id") as "m1")
-      .groupBy($"m1")("1")
-    val mapExpected = relation
-      .select('nullable_id as "m1")
-      .groupBy($"m1")("1")
-    checkRule(mapRel, mapExpected)
-  }
-
-  test("SPARK-23500: Ensure that aggregation expressions are not simplified") {
-    // Make sure that aggregation exprs are correctly ignored. Maps can't be used in
-    // grouping exprs so aren't tested here.
-    val structAggRel = relation.groupBy(
-      CreateNamedStruct(Seq("att1", 'nullable_id)))(
-      GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None))
-    checkRule(structAggRel, structAggRel)
-
-    val arrayAggRel = relation.groupBy(
-      CreateArray(Seq('nullable_id)))(GetArrayItem(CreateArray(Seq('nullable_id)), 0))
-    checkRule(arrayAggRel, arrayAggRel)
-
-    // This could be done if we had a more complex rule that checks that
-    // the CreateMap does not come from key.
-    val originalQuery = relation
-      .groupBy('id)(
-        GetMapValue(CreateMap(Seq('id, 'id + 1L)), 0L) as "a"
-      )
-    checkRule(originalQuery, originalQuery)
-  }
-
-  test("SPARK-23500: namedStruct and getField in the same Project #1") {
-    val originalQuery =
-      testRelation
-        .select(
-          namedStruct("col1", 'b, "col2", 'c).as("s1"), 'a, 'b)
-        .select('s1 getField "col2" as 's1Col2,
-          namedStruct("col1", 'a, "col2", 'b).as("s2"))
-        .select('s1Col2, 's2 getField "col2" as 's2Col2)
-    val correctAnswer =
-      testRelation
-        .select('c as 's1Col2, 'b as 's2Col2)
-    checkRule(originalQuery, correctAnswer)
-  }
-
-  test("SPARK-23500: namedStruct and getField in the same Project #2") {
-    val originalQuery =
-      testRelation
-        .select(
-          namedStruct("col1", 'b, "col2", 'c) getField "col2" as 'sCol2,
-          namedStruct("col1", 'a, "col2", 'c) getField "col1" as 'sCol1)
-    val correctAnswer =
-      testRelation
-        .select('c as 'sCol2, 'a as 'sCol1)
-    checkRule(originalQuery, correctAnswer)
-  }
-
-  test("SPARK-24313: support binary type as map keys in GetMapValue") {
-    val mb0 = Literal.create(
-      Map(Array[Byte](1, 2) -> "1", Array[Byte](3, 4) -> null, Array[Byte](2, 1) -> "2"),
-      MapType(BinaryType, StringType))
-    val mb1 = Literal.create(Map[Array[Byte], String](), MapType(BinaryType, StringType))
-
-    checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](1, 2, 3))), null)
-
-    checkEvaluation(GetMapValue(mb1, Literal(Array[Byte](1, 2))), null)
-    checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](2, 1), BinaryType)), "2")
-    checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](3, 4))), null)
-  }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.types._
+
+/**
+* SPARK-18601 discusses simplification direct access to complex types creators.
+* i.e. {{{create_named_struct(square, `x` * `x`).square}}} can be simplified to {{{`x` * `x`}}}.
+* sam applies to create_array and create_map
+*/
+class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
+
+  object Optimizer extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("collapse projections", FixedPoint(10),
+          CollapseProject) ::
+      Batch("Constant Folding", FixedPoint(10),
+          NullPropagation,
+          ConstantFolding,
+          BooleanSimplification,
+          SimplifyConditionals,
+          SimplifyBinaryComparison,
+          SimplifyExtractValueOps) :: Nil
+  }
+
+  private val idAtt = ('id).long.notNull
+  private val nullableIdAtt = ('nullable_id).long
+
+  private val relation = LocalRelation(idAtt, nullableIdAtt)
+  private val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.double, 'e.int)
+
+  private def checkRule(originalQuery: LogicalPlan, correctAnswer: LogicalPlan) = {
+    val optimized = Optimizer.execute(originalQuery.analyze)
+    assert(optimized.resolved, "optimized plans must be still resolvable")
+    comparePlans(optimized, correctAnswer.analyze)
+  }
+
+  test("explicit get from namedStruct") {
+    val query = relation
+      .select(
+        GetStructField(
+          CreateNamedStruct(Seq("att", 'id )),
+          0,
+          None) as "outerAtt")
+    val expected = relation.select('id as "outerAtt")
+
+    checkRule(query, expected)
+  }
+
+  test("explicit get from named_struct- expression maintains original deduced alias") {
+    val query = relation
+      .select(GetStructField(CreateNamedStruct(Seq("att", 'id)), 0, None))
+
+    val expected = relation
+      .select('id as "named_struct(att, id).att")
+
+    checkRule(query, expected)
+  }
+
+  test("collapsed getStructField ontop of namedStruct") {
+    val query = relation
+      .select(CreateNamedStruct(Seq("att", 'id)) as "struct1")
+      .select(GetStructField('struct1, 0, None) as "struct1Att")
+    val expected = relation.select('id as "struct1Att")
+    checkRule(query, expected)
+  }
+
+  test("collapse multiple CreateNamedStruct/GetStructField pairs") {
+    val query = relation
+      .select(
+        CreateNamedStruct(Seq(
+          "att1", 'id,
+          "att2", 'id * 'id)) as "struct1")
+      .select(
+        GetStructField('struct1, 0, None) as "struct1Att1",
+        GetStructField('struct1, 1, None) as "struct1Att2")
+
+    val expected =
+      relation.
+        select(
+          'id as "struct1Att1",
+          ('id * 'id) as "struct1Att2")
+
+    checkRule(query, expected)
+  }
+
+  test("collapsed2 - deduced names") {
+    val query = relation
+      .select(
+        CreateNamedStruct(Seq(
+          "att1", 'id,
+          "att2", 'id * 'id)) as "struct1")
+      .select(
+        GetStructField('struct1, 0, None),
+        GetStructField('struct1, 1, None))
+
+    val expected =
+      relation.
+        select(
+          'id as "struct1.att1",
+          ('id * 'id) as "struct1.att2")
+
+    checkRule(query, expected)
+  }
+
+  test("simplified array ops") {
+    val rel = relation.select(
+      CreateArray(Seq(
+        CreateNamedStruct(Seq(
+          "att1", 'id,
+          "att2", 'id * 'id)),
+        CreateNamedStruct(Seq(
+          "att1", 'id + 1,
+          "att2", ('id + 1) * ('id + 1))
+       ))
+      ) as "arr"
+    )
+    val query = rel
+      .select(
+        GetArrayStructFields('arr, StructField("att1", LongType, false), 0, 1, false) as "a1",
+        GetArrayItem('arr, 1) as "a2",
+        GetStructField(GetArrayItem('arr, 1), 0, None) as "a3",
+        GetArrayItem(
+          GetArrayStructFields('arr,
+            StructField("att1", LongType, false),
+            0,
+            1,
+            false),
+          1) as "a4")
+
+    val expected = relation
+      .select(
+        CreateArray(Seq('id, 'id + 1L)) as "a1",
+        CreateNamedStruct(Seq(
+          "att1", ('id + 1L),
+          "att2", (('id + 1L) * ('id + 1L)))) as "a2",
+        ('id + 1L) as "a3",
+        ('id + 1L) as "a4")
+    checkRule(query, expected)
+  }
+
+  test("SPARK-22570: CreateArray should not create a lot of global variables") {
+    val ctx = new CodegenContext
+    CreateArray(Seq(Literal(1))).genCode(ctx)
+    assert(ctx.inlinedMutableStates.length == 0)
+  }
+
+  test("SPARK-23208: Test code splitting for create array related methods") {
+    val inputs = (1 to 2500).map(x => Literal(s"l_$x"))
+    checkEvaluation(CreateArray(inputs), new GenericArrayData(inputs.map(_.eval())))
+  }
+
+  test("simplify map ops") {
+    val rel = relation
+      .select(
+        CreateMap(Seq(
+          "r1", CreateNamedStruct(Seq("att1", 'id)),
+          "r2", CreateNamedStruct(Seq("att1", ('id + 1L))))) as "m")
+    val query = rel
+      .select(
+        GetMapValue('m, "r1") as "a1",
+        GetStructField(GetMapValue('m, "r1"), 0, None) as "a2",
+        GetMapValue('m, "r32") as "a3",
+        GetStructField(GetMapValue('m, "r32"), 0, None) as "a4")
+
+    val expected =
+      relation.select(
+        CreateNamedStruct(Seq("att1", 'id)) as "a1",
+        'id as "a2",
+        Literal.create(
+          null,
+          StructType(
+            StructField("att1", LongType, nullable = false) :: Nil
+          )
+        ) as "a3",
+        Literal.create(null, LongType) as "a4")
+    checkRule(query, expected)
+  }
+
+  test("simplify map ops, constant lookup, dynamic keys") {
+    val query = relation.select(
+      GetMapValue(
+        CreateMap(Seq(
+          'id, ('id + 1L),
+          ('id + 1L), ('id + 2L),
+          ('id + 2L), ('id + 3L),
+          Literal(13L), 'id,
+          ('id + 3L), ('id + 4L),
+          ('id + 4L), ('id + 5L))),
+        13L) as "a")
+
+    val expected = relation
+      .select(
+        CaseWhen(Seq(
+          (EqualTo(13L, 'id), ('id + 1L)),
+          (EqualTo(13L, ('id + 1L)), ('id + 2L)),
+          (EqualTo(13L, ('id + 2L)), ('id + 3L)),
+          (Literal(true), 'id))) as "a")
+    checkRule(query, expected)
+  }
+
+  test("simplify map ops, dynamic lookup, dynamic keys, lookup is equivalent to one of the keys") {
+    val query = relation
+      .select(
+        GetMapValue(
+          CreateMap(Seq(
+            'id, ('id + 1L),
+            ('id + 1L), ('id + 2L),
+            ('id + 2L), ('id + 3L),
+            ('id + 3L), ('id + 4L),
+            ('id + 4L), ('id + 5L))),
+            ('id + 3L)) as "a")
+    val expected = relation
+      .select(
+        CaseWhen(Seq(
+          (EqualTo('id + 3L, 'id), ('id + 1L)),
+          (EqualTo('id + 3L, ('id + 1L)), ('id + 2L)),
+          (EqualTo('id + 3L, ('id + 2L)), ('id + 3L)),
+          (Literal(true), ('id + 4L)))) as "a")
+    checkRule(query, expected)
+  }
+
+  test("simplify map ops, no positive match") {
+    val rel = relation
+      .select(
+        GetMapValue(
+          CreateMap(Seq(
+            'id, ('id + 1L),
+            ('id + 1L), ('id + 2L),
+            ('id + 2L), ('id + 3L),
+            ('id + 3L), ('id + 4L),
+            ('id + 4L), ('id + 5L))),
+          'id + 30L) as "a")
+    val expected = relation.select(
+      CaseWhen(Seq(
+        (EqualTo('id + 30L, 'id), ('id + 1L)),
+        (EqualTo('id + 30L, ('id + 1L)), ('id + 2L)),
+        (EqualTo('id + 30L, ('id + 2L)), ('id + 3L)),
+        (EqualTo('id + 30L, ('id + 3L)), ('id + 4L)),
+        (EqualTo('id + 30L, ('id + 4L)), ('id + 5L)))) as "a")
+    checkRule(rel, expected)
+  }
+
+  test("simplify map ops, constant lookup, mixed keys, eliminated constants") {
+    val rel = relation
+      .select(
+        GetMapValue(
+          CreateMap(Seq(
+            'id, ('id + 1L),
+            ('id + 1L), ('id + 2L),
+            ('id + 2L), ('id + 3L),
+            Literal(14L), 'id,
+            ('id + 3L), ('id + 4L),
+            ('id + 4L), ('id + 5L))),
+          13L) as "a")
+
+    val expected = relation
+      .select(
+        CaseKeyWhen(13L,
+          Seq('id, ('id + 1L),
+            ('id + 1L), ('id + 2L),
+            ('id + 2L), ('id + 3L),
+            ('id + 3L), ('id + 4L),
+            ('id + 4L), ('id + 5L))) as "a")
+
+    checkRule(rel, expected)
+  }
+
+  test("simplify map ops, potential dynamic match with null value + an absolute constant match") {
+    val rel = relation
+      .select(
+        GetMapValue(
+          CreateMap(Seq(
+            'id, ('id + 1L),
+            ('id + 1L), ('id + 2L),
+            ('id + 2L), Literal.create(null, LongType),
+            Literal(2L), 'id,
+            ('id + 3L), ('id + 4L),
+            ('id + 4L), ('id + 5L))),
+          2L ) as "a")
+
+    val expected = relation
+      .select(
+        CaseWhen(Seq(
+          (EqualTo(2L, 'id), ('id + 1L)),
+          // these two are possible matches, we can't tell until runtime
+          (EqualTo(2L, ('id + 1L)), ('id + 2L)),
+          (EqualTo(2L, 'id + 2L), Literal.create(null, LongType)),
+          // this is a definite match (two constants),
+          // but it cannot override a potential match with ('id + 2L),
+          // which is exactly what [[Coalesce]] would do in this case.
+          (Literal.TrueLiteral, 'id))) as "a")
+    checkRule(rel, expected)
+  }
+
+  test("SPARK-23500: Simplify array ops that are not at the top node") {
+    val query = LocalRelation('id.long)
+      .select(
+        CreateArray(Seq(
+          CreateNamedStruct(Seq(
+            "att1", 'id,
+            "att2", 'id * 'id)),
+          CreateNamedStruct(Seq(
+            "att1", 'id + 1,
+            "att2", ('id + 1) * ('id + 1))
+          ))
+        ) as "arr")
+      .select(
+        GetStructField(GetArrayItem('arr, 1), 0, None) as "a1",
+        GetArrayItem(
+          GetArrayStructFields('arr,
+            StructField("att1", LongType, nullable = false),
+            ordinal = 0,
+            numFields = 1,
+            containsNull = false),
+          ordinal = 1) as "a2")
+      .orderBy('id.asc)
+
+    val expected = LocalRelation('id.long)
+      .select(
+        ('id + 1L) as "a1",
+        ('id + 1L) as "a2")
+      .orderBy('id.asc)
+    checkRule(query, expected)
+  }
+
+  test("SPARK-23500: Simplify map ops that are not top nodes") {
+    val query =
+      LocalRelation('id.long)
+        .select(
+          CreateMap(Seq(
+            "r1", 'id,
+            "r2", 'id + 1L)) as "m")
+        .select(
+          GetMapValue('m, "r1") as "a1",
+          GetMapValue('m, "r32") as "a2")
+        .orderBy('id.asc)
+        .select('a1, 'a2)
+
+    val expected =
+      LocalRelation('id.long).select(
+        'id as "a1",
+        Literal.create(null, LongType) as "a2")
+        .orderBy('id.asc)
+    checkRule(query, expected)
+  }
+
+  test("SPARK-23500: Simplify complex ops that aren't at the plan root") {
+    val structRel = relation
+      .select(GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None) as "foo")
+      .groupBy($"foo")("1")
+    val structExpected = relation
+      .select('nullable_id as "foo")
+      .groupBy($"foo")("1")
+    checkRule(structRel, structExpected)
+
+    val arrayRel = relation
+      .select(GetArrayItem(CreateArray(Seq('nullable_id, 'nullable_id + 1L)), 0) as "a1")
+      .groupBy($"a1")("1")
+    val arrayExpected = relation.select('nullable_id as "a1").groupBy($"a1")("1")
+    checkRule(arrayRel, arrayExpected)
+
+    val mapRel = relation
+      .select(GetMapValue(CreateMap(Seq("id", 'nullable_id)), "id") as "m1")
+      .groupBy($"m1")("1")
+    val mapExpected = relation
+      .select('nullable_id as "m1")
+      .groupBy($"m1")("1")
+    checkRule(mapRel, mapExpected)
+  }
+
+  test("SPARK-23500: Ensure that aggregation expressions are not simplified") {
+    // Make sure that aggregation exprs are correctly ignored. Maps can't be used in
+    // grouping exprs so aren't tested here.
+    val structAggRel = relation.groupBy(
+      CreateNamedStruct(Seq("att1", 'nullable_id)))(
+      GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None))
+    checkRule(structAggRel, structAggRel)
+
+    val arrayAggRel = relation.groupBy(
+      CreateArray(Seq('nullable_id)))(GetArrayItem(CreateArray(Seq('nullable_id)), 0))
+    checkRule(arrayAggRel, arrayAggRel)
+
+    // This could be done if we had a more complex rule that checks that
+    // the CreateMap does not come from key.
+    val originalQuery = relation
+      .groupBy('id)(
+        GetMapValue(CreateMap(Seq('id, 'id + 1L)), 0L) as "a"
+      )
+    checkRule(originalQuery, originalQuery)
+  }
+
+  test("SPARK-23500: namedStruct and getField in the same Project #1") {
+    val originalQuery =
+      testRelation
+        .select(
+          namedStruct("col1", 'b, "col2", 'c).as("s1"), 'a, 'b)
+        .select('s1 getField "col2" as 's1Col2,
+          namedStruct("col1", 'a, "col2", 'b).as("s2"))
+        .select('s1Col2, 's2 getField "col2" as 's2Col2)
+    val correctAnswer =
+      testRelation
+        .select('c as 's1Col2, 'b as 's2Col2)
+    checkRule(originalQuery, correctAnswer)
+  }
+
+  test("SPARK-23500: namedStruct and getField in the same Project #2") {
+    val originalQuery =
+      testRelation
+        .select(
+          namedStruct("col1", 'b, "col2", 'c) getField "col2" as 'sCol2,
+          namedStruct("col1", 'a, "col2", 'c) getField "col1" as 'sCol1)
+    val correctAnswer =
+      testRelation
+        .select('c as 'sCol2, 'a as 'sCol1)
+    checkRule(originalQuery, correctAnswer)
+  }
+
+  test("SPARK-24313: support binary type as map keys in GetMapValue") {
+    val mb0 = Literal.create(
+      Map(Array[Byte](1, 2) -> "1", Array[Byte](3, 4) -> null, Array[Byte](2, 1) -> "2"),
+      MapType(BinaryType, StringType))
+    val mb1 = Literal.create(Map[Array[Byte], String](), MapType(BinaryType, StringType))
+
+    checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](1, 2, 3))), null)
+
+    checkEvaluation(GetMapValue(mb1, Literal(Array[Byte](1, 2))), null)
+    checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](2, 1), BinaryType)), "2")
+    checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](3, 4))), null)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 9a0a326ecd5d2..049f56c8c9ce1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedRelation, UnresolvedStar}
-import org.apache.spark.sql.catalyst.catalog.BucketSpec
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
-import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableAlterColumnStatement, AlterTableDropColumnsStatement, AlterTableRenameColumnStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement, AlterViewSetPropertiesStatement, AlterViewUnsetPropertiesStatement, CreateTableAsSelectStatement, CreateTableStatement, DescribeColumnStatement, DescribeTableStatement, DropTableStatement, DropViewStatement, InsertIntoStatement, QualifiedColType, ReplaceTableAsSelectStatement, ReplaceTableStatement, ShowTablesStatement}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView}
+import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, FunctionResourceType, JarResource}
+import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
+import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -48,14 +49,34 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(parsePlan(sql), expected, checkAnalysis = false)
   }
 
+  test("SPARK-30098: create table without provider should " +
+    "use default data source under non-legacy mode") {
+    val createSql = "CREATE TABLE my_tab(a INT COMMENT 'test', b STRING)"
+    val defaultProvider = conf.defaultDataSourceName
+    val expectedPlan = CreateTableStatement(
+      Seq("my_tab"),
+      new StructType()
+        .add("a", IntegerType, nullable = true, "test")
+        .add("b", StringType),
+      Seq.empty[Transform],
+      None,
+      Map.empty[String, String],
+      defaultProvider,
+      Map.empty[String, String],
+      None,
+      None,
+      false)
+    parseCompare(createSql, expectedPlan)
+  }
+
   test("create/replace table using - schema") {
-    val createSql = "CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) USING parquet"
-    val replaceSql = "REPLACE TABLE my_tab(a INT COMMENT 'test', b STRING) USING parquet"
+    val createSql = "CREATE TABLE my_tab(a INT COMMENT 'test', b STRING NOT NULL) USING parquet"
+    val replaceSql = "REPLACE TABLE my_tab(a INT COMMENT 'test', b STRING NOT NULL) USING parquet"
     val expectedTableSpec = TableSpec(
       Seq("my_tab"),
       Some(new StructType()
         .add("a", IntegerType, nullable = true, "test")
-        .add("b", StringType)),
+        .add("b", StringType, nullable = false)),
       Seq.empty[Transform],
       None,
       Map.empty[String, String],
@@ -407,7 +428,7 @@ class DDLParserSuite extends AnalysisTest {
   private def testCreateOrReplaceDdl(
       sqlStatement: String,
       tableSpec: TableSpec,
-      expectedIfNotExists: Boolean) {
+      expectedIfNotExists: Boolean): Unit = {
     val parsedPlan = parsePlan(sqlStatement)
     val newTableToken = sqlStatement.split(" ")(0).trim.toUpperCase(Locale.ROOT)
     parsedPlan match {
@@ -471,7 +492,7 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("ALTER TABLE table_name ADD COLUMN x int"),
       AlterTableAddColumnsStatement(Seq("table_name"), Seq(
-        QualifiedColType(Seq("x"), IntegerType, None)
+        QualifiedColType(Seq("x"), IntegerType, true, None, None)
       )))
   }
 
@@ -479,8 +500,8 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("ALTER TABLE table_name ADD COLUMNS x int, y string"),
       AlterTableAddColumnsStatement(Seq("table_name"), Seq(
-        QualifiedColType(Seq("x"), IntegerType, None),
-        QualifiedColType(Seq("y"), StringType, None)
+        QualifiedColType(Seq("x"), IntegerType, true, None, None),
+        QualifiedColType(Seq("y"), StringType, true, None, None)
       )))
   }
 
@@ -488,7 +509,7 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("ALTER TABLE table_name ADD COLUMNS x int"),
       AlterTableAddColumnsStatement(Seq("table_name"), Seq(
-        QualifiedColType(Seq("x"), IntegerType, None)
+        QualifiedColType(Seq("x"), IntegerType, true, None, None)
       )))
   }
 
@@ -496,7 +517,7 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("ALTER TABLE table_name ADD COLUMNS (x int)"),
       AlterTableAddColumnsStatement(Seq("table_name"), Seq(
-        QualifiedColType(Seq("x"), IntegerType, None)
+        QualifiedColType(Seq("x"), IntegerType, true, None, None)
       )))
   }
 
@@ -504,7 +525,15 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("ALTER TABLE table_name ADD COLUMNS (x int COMMENT 'doc')"),
       AlterTableAddColumnsStatement(Seq("table_name"), Seq(
-        QualifiedColType(Seq("x"), IntegerType, Some("doc"))
+        QualifiedColType(Seq("x"), IntegerType, true, Some("doc"), None)
+      )))
+  }
+
+  test("alter table: add non-nullable column") {
+    comparePlans(
+      parsePlan("ALTER TABLE table_name ADD COLUMN x int NOT NULL"),
+      AlterTableAddColumnsStatement(Seq("table_name"), Seq(
+        QualifiedColType(Seq("x"), IntegerType, false, None, None)
       )))
   }
 
@@ -512,7 +541,21 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("ALTER TABLE table_name ADD COLUMN x int COMMENT 'doc'"),
       AlterTableAddColumnsStatement(Seq("table_name"), Seq(
-        QualifiedColType(Seq("x"), IntegerType, Some("doc"))
+        QualifiedColType(Seq("x"), IntegerType, true, Some("doc"), None)
+      )))
+  }
+
+  test("alter table: add column with position") {
+    comparePlans(
+      parsePlan("ALTER TABLE table_name ADD COLUMN x int FIRST"),
+      AlterTableAddColumnsStatement(Seq("table_name"), Seq(
+        QualifiedColType(Seq("x"), IntegerType, true, None, Some(first()))
+      )))
+
+    comparePlans(
+      parsePlan("ALTER TABLE table_name ADD COLUMN x int AFTER y"),
+      AlterTableAddColumnsStatement(Seq("table_name"), Seq(
+        QualifiedColType(Seq("x"), IntegerType, true, None, Some(after("y")))
       )))
   }
 
@@ -520,30 +563,30 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("ALTER TABLE table_name ADD COLUMN x.y.z int COMMENT 'doc'"),
       AlterTableAddColumnsStatement(Seq("table_name"), Seq(
-        QualifiedColType(Seq("x", "y", "z"), IntegerType, Some("doc"))
+        QualifiedColType(Seq("x", "y", "z"), IntegerType, true, Some("doc"), None)
       )))
   }
 
   test("alter table: add multiple columns with nested column name") {
     comparePlans(
-      parsePlan("ALTER TABLE table_name ADD COLUMN x.y.z int COMMENT 'doc', a.b string"),
+      parsePlan("ALTER TABLE table_name ADD COLUMN x.y.z int COMMENT 'doc', a.b string FIRST"),
       AlterTableAddColumnsStatement(Seq("table_name"), Seq(
-        QualifiedColType(Seq("x", "y", "z"), IntegerType, Some("doc")),
-        QualifiedColType(Seq("a", "b"), StringType, None)
+        QualifiedColType(Seq("x", "y", "z"), IntegerType, true, Some("doc"), None),
+        QualifiedColType(Seq("a", "b"), StringType, true, None, Some(first()))
       )))
   }
 
-  test("alter table: add column at position (not supported)") {
-    assertUnsupported("ALTER TABLE table_name ADD COLUMNS name bigint COMMENT 'doc' FIRST, a.b int")
-    assertUnsupported("ALTER TABLE table_name ADD COLUMN name bigint COMMENT 'doc' FIRST")
-    assertUnsupported("ALTER TABLE table_name ADD COLUMN name string AFTER a.b")
-  }
-
   test("alter table: set location") {
-    val sql1 = "ALTER TABLE table_name SET LOCATION 'new location'"
-    val parsed1 = parsePlan(sql1)
-    val expected1 = AlterTableSetLocationStatement(Seq("table_name"), "new location")
-    comparePlans(parsed1, expected1)
+    comparePlans(
+      parsePlan("ALTER TABLE a.b.c SET LOCATION 'new location'"),
+      AlterTableSetLocationStatement(Seq("a", "b", "c"), None, "new location"))
+
+    comparePlans(
+      parsePlan("ALTER TABLE a.b.c PARTITION(ds='2017-06-10') SET LOCATION 'new location'"),
+      AlterTableSetLocationStatement(
+        Seq("a", "b", "c"),
+        Some(Map("ds" -> "2017-06-10")),
+        "new location"))
   }
 
   test("alter table: rename column") {
@@ -562,6 +605,8 @@ class DDLParserSuite extends AnalysisTest {
         Seq("table_name"),
         Seq("a", "b", "c"),
         Some(LongType),
+        None,
+        None,
         None))
   }
 
@@ -572,6 +617,8 @@ class DDLParserSuite extends AnalysisTest {
         Seq("table_name"),
         Seq("a", "b", "c"),
         Some(LongType),
+        None,
+        None,
         None))
   }
 
@@ -582,22 +629,57 @@ class DDLParserSuite extends AnalysisTest {
         Seq("table_name"),
         Seq("a", "b", "c"),
         None,
-        Some("new comment")))
+        None,
+        Some("new comment"),
+        None))
   }
 
-  test("alter table: update column type and comment") {
+  test("alter table: update column position") {
     comparePlans(
-      parsePlan("ALTER TABLE table_name CHANGE COLUMN a.b.c TYPE bigint COMMENT 'new comment'"),
+      parsePlan("ALTER TABLE table_name CHANGE COLUMN a.b.c FIRST"),
       AlterTableAlterColumnStatement(
         Seq("table_name"),
         Seq("a", "b", "c"),
-        Some(LongType),
-        Some("new comment")))
+        None,
+        None,
+        None,
+        Some(first())))
   }
 
-  test("alter table: change column position (not supported)") {
-    assertUnsupported("ALTER TABLE table_name CHANGE COLUMN name COMMENT 'doc' FIRST")
-    assertUnsupported("ALTER TABLE table_name CHANGE COLUMN name TYPE INT AFTER other_col")
+  test("alter table: mutiple property changes are not allowed") {
+    intercept[ParseException] {
+      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
+        "TYPE bigint COMMENT 'new comment'")}
+
+    intercept[ParseException] {
+      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
+        "TYPE bigint COMMENT AFTER d")}
+
+    intercept[ParseException] {
+      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
+        "TYPE bigint COMMENT 'new comment' AFTER d")}
+  }
+
+  test("alter table: SET/DROP NOT NULL") {
+    comparePlans(
+      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c SET NOT NULL"),
+      AlterTableAlterColumnStatement(
+        Seq("table_name"),
+        Seq("a", "b", "c"),
+        None,
+        Some(false),
+        None,
+        None))
+
+    comparePlans(
+      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c DROP NOT NULL"),
+      AlterTableAlterColumnStatement(
+        Seq("table_name"),
+        Seq("a", "b", "c"),
+        None,
+        Some(true),
+        None,
+        None))
   }
 
   test("alter table: drop column") {
@@ -617,6 +699,104 @@ class DDLParserSuite extends AnalysisTest {
     }
   }
 
+  test("alter table: hive style change column") {
+    val sql1 = "ALTER TABLE table_name CHANGE COLUMN a.b.c c INT"
+    val sql2 = "ALTER TABLE table_name CHANGE COLUMN a.b.c c INT COMMENT 'new_comment'"
+    val sql3 = "ALTER TABLE table_name CHANGE COLUMN a.b.c c INT AFTER other_col"
+
+    comparePlans(
+      parsePlan(sql1),
+      AlterTableAlterColumnStatement(
+        Seq("table_name"),
+        Seq("a", "b", "c"),
+        Some(IntegerType),
+        None,
+        None,
+        None))
+
+    comparePlans(
+      parsePlan(sql2),
+      AlterTableAlterColumnStatement(
+        Seq("table_name"),
+        Seq("a", "b", "c"),
+        Some(IntegerType),
+        None,
+        Some("new_comment"),
+        None))
+
+    comparePlans(
+      parsePlan(sql3),
+      AlterTableAlterColumnStatement(
+        Seq("table_name"),
+        Seq("a", "b", "c"),
+        Some(IntegerType),
+        None,
+        None,
+        Some(after("other_col"))))
+
+    // renaming column not supported in hive style ALTER COLUMN.
+    intercept("ALTER TABLE table_name CHANGE COLUMN a.b.c new_name INT",
+      "please run RENAME COLUMN instead")
+
+    // ALTER COLUMN for a partition is not supported.
+    intercept("ALTER TABLE table_name PARTITION (a='1') CHANGE COLUMN a.b.c c INT")
+  }
+
+  test("alter table: hive style replace columns") {
+    val sql1 = "ALTER TABLE table_name REPLACE COLUMNS (x string)"
+    val sql2 = "ALTER TABLE table_name REPLACE COLUMNS (x string COMMENT 'x1')"
+    val sql3 = "ALTER TABLE table_name REPLACE COLUMNS (x string COMMENT 'x1', y int)"
+    val sql4 = "ALTER TABLE table_name REPLACE COLUMNS (x string COMMENT 'x1', y int COMMENT 'y1')"
+
+    comparePlans(
+      parsePlan(sql1),
+      AlterTableReplaceColumnsStatement(
+        Seq("table_name"),
+        Seq(QualifiedColType(Seq("x"), StringType, true, None, None))))
+
+    comparePlans(
+      parsePlan(sql2),
+      AlterTableReplaceColumnsStatement(
+        Seq("table_name"),
+        Seq(QualifiedColType(Seq("x"), StringType, true, Some("x1"), None))))
+
+    comparePlans(
+      parsePlan(sql3),
+      AlterTableReplaceColumnsStatement(
+        Seq("table_name"),
+        Seq(
+          QualifiedColType(Seq("x"), StringType, true, Some("x1"), None),
+          QualifiedColType(Seq("y"), IntegerType, true, None, None)
+        )))
+
+    comparePlans(
+      parsePlan(sql4),
+      AlterTableReplaceColumnsStatement(
+        Seq("table_name"),
+        Seq(
+          QualifiedColType(Seq("x"), StringType, true, Some("x1"), None),
+          QualifiedColType(Seq("y"), IntegerType, true, Some("y1"), None)
+        )))
+
+    intercept("ALTER TABLE table_name PARTITION (a='1') REPLACE COLUMNS (x string)",
+      "Operation not allowed: ALTER TABLE table PARTITION partition_spec REPLACE COLUMNS")
+
+    intercept("ALTER TABLE table_name REPLACE COLUMNS (x string NOT NULL)",
+      "NOT NULL is not supported in Hive-style REPLACE COLUMNS")
+
+    intercept("ALTER TABLE table_name REPLACE COLUMNS (x string FIRST)",
+      "Column position is not supported in Hive-style REPLACE COLUMNS")
+  }
+
+  test("alter table/view: rename table/view") {
+    comparePlans(
+      parsePlan("ALTER TABLE a.b.c RENAME TO x.y.z"),
+      RenameTableStatement(Seq("a", "b", "c"), Seq("x", "y", "z"), isView = false))
+    comparePlans(
+      parsePlan("ALTER VIEW a.b.c RENAME TO x.y.z"),
+      RenameTableStatement(Seq("a", "b", "c"), Seq("x", "y", "z"), isView = true))
+  }
+
   test("describe table column") {
     comparePlans(parsePlan("DESCRIBE t col"),
       DescribeColumnStatement(
@@ -647,15 +827,24 @@ class DDLParserSuite extends AnalysisTest {
         "DESC TABLE COLUMN for a specific partition is not supported"))
   }
 
+  test("describe database") {
+    val sql1 = "DESCRIBE DATABASE EXTENDED a.b"
+    val sql2 = "DESCRIBE DATABASE a.b"
+    comparePlans(parsePlan(sql1),
+      DescribeNamespace(UnresolvedNamespace(Seq("a", "b")), extended = true))
+    comparePlans(parsePlan(sql2),
+      DescribeNamespace(UnresolvedNamespace(Seq("a", "b")), extended = false))
+  }
+
   test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") {
     comparePlans(parsePlan("describe t"),
-      DescribeTableStatement(Seq("t"), Map.empty, isExtended = false))
+      DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = false))
     comparePlans(parsePlan("describe table t"),
-      DescribeTableStatement(Seq("t"), Map.empty, isExtended = false))
+      DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = false))
     comparePlans(parsePlan("describe table extended t"),
-      DescribeTableStatement(Seq("t"), Map.empty, isExtended = true))
+      DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = true))
     comparePlans(parsePlan("describe table formatted t"),
-      DescribeTableStatement(Seq("t"), Map.empty, isExtended = true))
+      DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = true))
   }
 
   test("insert table: basic append") {
@@ -764,19 +953,1100 @@ class DDLParserSuite extends AnalysisTest {
     assert(exc.getMessage.contains("INSERT INTO ... IF NOT EXISTS"))
   }
 
+  test("delete from table: delete all") {
+    parseCompare("DELETE FROM testcat.ns1.ns2.tbl",
+      DeleteFromTable(
+        UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
+        None))
+  }
+
+  test("delete from table: with alias and where clause") {
+    parseCompare("DELETE FROM testcat.ns1.ns2.tbl AS t WHERE t.a = 2",
+      DeleteFromTable(
+        SubqueryAlias("t", UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl"))),
+        Some(EqualTo(UnresolvedAttribute("t.a"), Literal(2)))))
+  }
+
+  test("delete from table: columns aliases is not allowed") {
+    val exc = intercept[ParseException] {
+      parsePlan("DELETE FROM testcat.ns1.ns2.tbl AS t(a,b,c,d) WHERE d = 2")
+    }
+
+    assert(exc.getMessage.contains("Columns aliases are not allowed in DELETE."))
+  }
+
+  test("update table: basic") {
+    parseCompare(
+      """
+        |UPDATE testcat.ns1.ns2.tbl
+        |SET a='Robert', b=32
+      """.stripMargin,
+      UpdateTable(
+        UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
+        Seq(Assignment(UnresolvedAttribute("a"), Literal("Robert")),
+          Assignment(UnresolvedAttribute("b"), Literal(32))),
+        None))
+  }
+
+  test("update table: with alias and where clause") {
+    parseCompare(
+      """
+        |UPDATE testcat.ns1.ns2.tbl AS t
+        |SET t.a='Robert', t.b=32
+        |WHERE t.c=2
+      """.stripMargin,
+      UpdateTable(
+        SubqueryAlias("t", UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl"))),
+        Seq(Assignment(UnresolvedAttribute("t.a"), Literal("Robert")),
+          Assignment(UnresolvedAttribute("t.b"), Literal(32))),
+        Some(EqualTo(UnresolvedAttribute("t.c"), Literal(2)))))
+  }
+
+  test("update table: columns aliases is not allowed") {
+    val exc = intercept[ParseException] {
+      parsePlan(
+        """
+          |UPDATE testcat.ns1.ns2.tbl AS t(a,b,c,d)
+          |SET b='Robert', c=32
+          |WHERE d=2
+        """.stripMargin)
+    }
+
+    assert(exc.getMessage.contains("Columns aliases are not allowed in UPDATE."))
+  }
+
+  test("merge into table: basic") {
+    parseCompare(
+      """
+        |MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+        |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = source.col2
+        |WHEN NOT MATCHED AND (target.col2='insert')
+        |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+      """.stripMargin,
+      MergeIntoTable(
+        SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
+        SubqueryAlias("source", UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl"))),
+        EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))),
+            Seq(Assignment(UnresolvedAttribute("target.col2"),
+              UnresolvedAttribute("source.col2"))))),
+        Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))),
+          Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))))))
+  }
+
+  test("merge into table: using subquery") {
+    parseCompare(
+      """
+        |MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING (SELECT * FROM testcat2.ns1.ns2.tbl) AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+        |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = source.col2
+        |WHEN NOT MATCHED AND (target.col2='insert')
+        |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+      """.stripMargin,
+      MergeIntoTable(
+        SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
+        SubqueryAlias("source", Project(Seq(UnresolvedStar(None)),
+          UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl")))),
+        EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))),
+            Seq(Assignment(UnresolvedAttribute("target.col2"),
+              UnresolvedAttribute("source.col2"))))),
+        Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))),
+          Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))))))
+  }
+
+  test("merge into table: cte") {
+    parseCompare(
+      """
+        |MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING (WITH s as (SELECT * FROM testcat2.ns1.ns2.tbl) SELECT * FROM s) AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+        |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = source.col2
+        |WHEN NOT MATCHED AND (target.col2='insert')
+        |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+      """.stripMargin,
+      MergeIntoTable(
+        SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
+        SubqueryAlias("source", With(Project(Seq(UnresolvedStar(None)),
+          UnresolvedRelation(Seq("s"))),
+          Seq("s" -> SubqueryAlias("s", Project(Seq(UnresolvedStar(None)),
+            UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl"))))))),
+        EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))),
+            Seq(Assignment(UnresolvedAttribute("target.col2"),
+              UnresolvedAttribute("source.col2"))))),
+        Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))),
+          Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))))))
+  }
+
+  test("merge into table: no additional condition") {
+    parseCompare(
+      """
+        |MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED THEN UPDATE SET target.col2 = source.col2
+        |WHEN NOT MATCHED
+        |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+      """.stripMargin,
+    MergeIntoTable(
+      SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
+      SubqueryAlias("source", UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl"))),
+      EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+      Seq(UpdateAction(None,
+        Seq(Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
+      Seq(InsertAction(None,
+        Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+          Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))))))
+  }
+
+  test("merge into table: star") {
+    parseCompare(
+      """
+        |MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+        |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET *
+        |WHEN NOT MATCHED AND (target.col2='insert')
+        |THEN INSERT *
+      """.stripMargin,
+    MergeIntoTable(
+      SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
+      SubqueryAlias("source", UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl"))),
+      EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+      Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
+        UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))), Seq())),
+      Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))),
+        Seq()))))
+  }
+
+  test("merge into table: columns aliases are not allowed") {
+    Seq("target(c1, c2)" -> "source", "target" -> "source(c1, c2)").foreach {
+      case (targetAlias, sourceAlias) =>
+        val exc = intercept[ParseException] {
+          parsePlan(
+            s"""
+              |MERGE INTO testcat1.ns1.ns2.tbl AS $targetAlias
+              |USING testcat2.ns1.ns2.tbl AS $sourceAlias
+              |ON target.col1 = source.col1
+              |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+              |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = source.col2
+              |WHEN NOT MATCHED AND (target.col2='insert')
+              |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+            """.stripMargin)
+        }
+
+        assert(exc.getMessage.contains("Columns aliases are not allowed in MERGE."))
+    }
+  }
+
+  test("merge into table: at most two matched clauses") {
+    val exc = intercept[ParseException] {
+      parsePlan(
+        """
+          |MERGE INTO testcat1.ns1.ns2.tbl AS target
+          |USING testcat2.ns1.ns2.tbl AS source
+          |ON target.col1 = source.col1
+          |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+          |WHEN MATCHED AND (target.col2='update1') THEN UPDATE SET target.col2 = source.col2
+          |WHEN MATCHED AND (target.col2='update2') THEN UPDATE SET target.col2 = source.col2
+          |WHEN NOT MATCHED AND (target.col2='insert')
+          |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+        """.stripMargin)
+    }
+
+    assert(exc.getMessage.contains("There should be at most 2 'WHEN MATCHED' clauses."))
+  }
+
+  test("merge into table: at most one not matched clause") {
+    val exc = intercept[ParseException] {
+      parsePlan(
+        """
+          |MERGE INTO testcat1.ns1.ns2.tbl AS target
+          |USING testcat2.ns1.ns2.tbl AS source
+          |ON target.col1 = source.col1
+          |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+          |WHEN MATCHED AND (target.col2='update1') THEN UPDATE SET target.col2 = source.col2
+          |WHEN NOT MATCHED AND (target.col2='insert1')
+          |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+          |WHEN NOT MATCHED AND (target.col2='insert2')
+          |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+        """.stripMargin)
+    }
+
+    assert(exc.getMessage.contains("There should be at most 1 'WHEN NOT MATCHED' clause."))
+  }
+
   test("show tables") {
     comparePlans(
       parsePlan("SHOW TABLES"),
-      ShowTablesStatement(None, None))
+      ShowTables(UnresolvedNamespace(Seq.empty[String]), None))
     comparePlans(
       parsePlan("SHOW TABLES FROM testcat.ns1.ns2.tbl"),
-      ShowTablesStatement(Some(Seq("testcat", "ns1", "ns2", "tbl")), None))
+      ShowTables(UnresolvedNamespace(Seq("testcat", "ns1", "ns2", "tbl")), None))
     comparePlans(
       parsePlan("SHOW TABLES IN testcat.ns1.ns2.tbl"),
-      ShowTablesStatement(Some(Seq("testcat", "ns1", "ns2", "tbl")), None))
+      ShowTables(UnresolvedNamespace(Seq("testcat", "ns1", "ns2", "tbl")), None))
     comparePlans(
       parsePlan("SHOW TABLES IN tbl LIKE '*dog*'"),
-      ShowTablesStatement(Some(Seq("tbl")), Some("*dog*")))
+      ShowTables(UnresolvedNamespace(Seq("tbl")), Some("*dog*")))
+  }
+
+  test("show table extended") {
+    comparePlans(
+      parsePlan("SHOW TABLE EXTENDED LIKE '*test*'"),
+      ShowTableStatement(None, "*test*", None))
+    comparePlans(
+      parsePlan("SHOW TABLE EXTENDED FROM testcat.ns1.ns2 LIKE '*test*'"),
+      ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*", None))
+    comparePlans(
+      parsePlan("SHOW TABLE EXTENDED IN testcat.ns1.ns2 LIKE '*test*'"),
+      ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*", None))
+    comparePlans(
+      parsePlan("SHOW TABLE EXTENDED LIKE '*test*' PARTITION(ds='2008-04-09', hr=11)"),
+      ShowTableStatement(None, "*test*", Some(Map("ds" -> "2008-04-09", "hr" -> "11"))))
+    comparePlans(
+      parsePlan("SHOW TABLE EXTENDED FROM testcat.ns1.ns2 LIKE '*test*' " +
+        "PARTITION(ds='2008-04-09')"),
+      ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*",
+        Some(Map("ds" -> "2008-04-09"))))
+    comparePlans(
+      parsePlan("SHOW TABLE EXTENDED IN testcat.ns1.ns2 LIKE '*test*' " +
+        "PARTITION(ds='2008-04-09')"),
+      ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*",
+        Some(Map("ds" -> "2008-04-09"))))
+  }
+
+  test("create namespace -- backward compatibility with DATABASE/DBPROPERTIES") {
+    val expected = CreateNamespaceStatement(
+      Seq("a", "b", "c"),
+      ifNotExists = true,
+      Map(
+        "a" -> "a",
+        "b" -> "b",
+        "c" -> "c",
+        "comment" -> "namespace_comment",
+        "location" -> "/home/user/db"))
+
+    comparePlans(
+      parsePlan(
+        """
+          |CREATE NAMESPACE IF NOT EXISTS a.b.c
+          |WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c')
+          |COMMENT 'namespace_comment' LOCATION '/home/user/db'
+        """.stripMargin),
+      expected)
+
+    comparePlans(
+      parsePlan(
+        """
+          |CREATE DATABASE IF NOT EXISTS a.b.c
+          |WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
+          |COMMENT 'namespace_comment' LOCATION '/home/user/db'
+        """.stripMargin),
+      expected)
+  }
+
+  test("create namespace -- check duplicates") {
+    def createDatabase(duplicateClause: String): String = {
+      s"""
+         |CREATE NAMESPACE IF NOT EXISTS a.b.c
+         |$duplicateClause
+         |$duplicateClause
+      """.stripMargin
+    }
+    val sql1 = createDatabase("COMMENT 'namespace_comment'")
+    val sql2 = createDatabase("LOCATION '/home/user/db'")
+    val sql3 = createDatabase("WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c')")
+    val sql4 = createDatabase("WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
+
+    intercept(sql1, "Found duplicate clauses: COMMENT")
+    intercept(sql2, "Found duplicate clauses: LOCATION")
+    intercept(sql3, "Found duplicate clauses: WITH PROPERTIES")
+    intercept(sql4, "Found duplicate clauses: WITH DBPROPERTIES")
+  }
+
+  test("create namespace - property values must be set") {
+    assertUnsupported(
+      sql = "CREATE NAMESPACE a.b.c WITH PROPERTIES('key_without_value', 'key_with_value'='x')",
+      containsThesePhrases = Seq("key_without_value"))
+  }
+
+  test("create namespace -- either PROPERTIES or DBPROPERTIES is allowed") {
+    val sql =
+      s"""
+         |CREATE NAMESPACE IF NOT EXISTS a.b.c
+         |WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c')
+         |WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
+      """.stripMargin
+    intercept(sql, "Either PROPERTIES or DBPROPERTIES is allowed")
+  }
+
+  test("create namespace - support for other types in PROPERTIES") {
+    val sql =
+      """
+        |CREATE NAMESPACE a.b.c
+        |LOCATION '/home/user/db'
+        |WITH PROPERTIES ('a'=1, 'b'=0.1, 'c'=TRUE)
+      """.stripMargin
+    comparePlans(
+      parsePlan(sql),
+      CreateNamespaceStatement(
+        Seq("a", "b", "c"),
+        ifNotExists = false,
+        Map(
+          "a" -> "1",
+          "b" -> "0.1",
+          "c" -> "true",
+          "location" -> "/home/user/db")))
+  }
+
+  test("drop namespace") {
+    comparePlans(
+      parsePlan("DROP NAMESPACE a.b.c"),
+      DropNamespace(
+        UnresolvedNamespace(Seq("a", "b", "c")), ifExists = false, cascade = false))
+
+    comparePlans(
+      parsePlan("DROP NAMESPACE IF EXISTS a.b.c"),
+      DropNamespace(
+        UnresolvedNamespace(Seq("a", "b", "c")), ifExists = true, cascade = false))
+
+    comparePlans(
+      parsePlan("DROP NAMESPACE IF EXISTS a.b.c RESTRICT"),
+      DropNamespace(
+        UnresolvedNamespace(Seq("a", "b", "c")), ifExists = true, cascade = false))
+
+    comparePlans(
+      parsePlan("DROP NAMESPACE IF EXISTS a.b.c CASCADE"),
+      DropNamespace(
+        UnresolvedNamespace(Seq("a", "b", "c")), ifExists = true, cascade = true))
+
+    comparePlans(
+      parsePlan("DROP NAMESPACE a.b.c CASCADE"),
+      DropNamespace(
+        UnresolvedNamespace(Seq("a", "b", "c")), ifExists = false, cascade = true))
+  }
+
+  test("set namespace properties") {
+    comparePlans(
+      parsePlan("ALTER DATABASE a.b.c SET PROPERTIES ('a'='a', 'b'='b', 'c'='c')"),
+      AlterNamespaceSetProperties(
+        UnresolvedNamespace(Seq("a", "b", "c")), Map("a" -> "a", "b" -> "b", "c" -> "c")))
+
+    comparePlans(
+      parsePlan("ALTER SCHEMA a.b.c SET PROPERTIES ('a'='a')"),
+      AlterNamespaceSetProperties(
+        UnresolvedNamespace(Seq("a", "b", "c")), Map("a" -> "a")))
+
+    comparePlans(
+      parsePlan("ALTER NAMESPACE a.b.c SET PROPERTIES ('b'='b')"),
+      AlterNamespaceSetProperties(
+        UnresolvedNamespace(Seq("a", "b", "c")), Map("b" -> "b")))
+
+    comparePlans(
+      parsePlan("ALTER DATABASE a.b.c SET DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')"),
+      AlterNamespaceSetProperties(
+        UnresolvedNamespace(Seq("a", "b", "c")), Map("a" -> "a", "b" -> "b", "c" -> "c")))
+
+    comparePlans(
+      parsePlan("ALTER SCHEMA a.b.c SET DBPROPERTIES ('a'='a')"),
+      AlterNamespaceSetProperties(
+        UnresolvedNamespace(Seq("a", "b", "c")), Map("a" -> "a")))
+
+    comparePlans(
+      parsePlan("ALTER NAMESPACE a.b.c SET DBPROPERTIES ('b'='b')"),
+      AlterNamespaceSetProperties(
+        UnresolvedNamespace(Seq("a", "b", "c")), Map("b" -> "b")))
+  }
+
+  test("set namespace location") {
+    comparePlans(
+      parsePlan("ALTER DATABASE a.b.c SET LOCATION '/home/user/db'"),
+      AlterNamespaceSetLocation(
+        UnresolvedNamespace(Seq("a", "b", "c")), "/home/user/db"))
+
+    comparePlans(
+      parsePlan("ALTER SCHEMA a.b.c SET LOCATION '/home/user/db'"),
+      AlterNamespaceSetLocation(
+        UnresolvedNamespace(Seq("a", "b", "c")), "/home/user/db"))
+
+    comparePlans(
+      parsePlan("ALTER NAMESPACE a.b.c SET LOCATION '/home/user/db'"),
+      AlterNamespaceSetLocation(
+        UnresolvedNamespace(Seq("a", "b", "c")), "/home/user/db"))
+  }
+
+  test("show databases: basic") {
+    comparePlans(
+      parsePlan("SHOW DATABASES"),
+      ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), None))
+    comparePlans(
+      parsePlan("SHOW DATABASES LIKE 'defau*'"),
+      ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), Some("defau*")))
+  }
+
+  test("show databases: FROM/IN operator is not allowed") {
+    def verify(sql: String): Unit = {
+      val exc = intercept[ParseException] { parsePlan(sql) }
+      assert(exc.getMessage.contains("FROM/IN operator is not allowed in SHOW DATABASES"))
+    }
+
+    verify("SHOW DATABASES FROM testcat.ns1.ns2")
+    verify("SHOW DATABASES IN testcat.ns1.ns2")
+  }
+
+  test("show namespaces") {
+    comparePlans(
+      parsePlan("SHOW NAMESPACES"),
+      ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), None))
+    comparePlans(
+      parsePlan("SHOW NAMESPACES FROM testcat.ns1.ns2"),
+      ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None))
+    comparePlans(
+      parsePlan("SHOW NAMESPACES IN testcat.ns1.ns2"),
+      ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None))
+    comparePlans(
+      parsePlan("SHOW NAMESPACES IN testcat.ns1 LIKE '*pattern*'"),
+      ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1")), Some("*pattern*")))
+  }
+
+  test("analyze table statistics") {
+    comparePlans(parsePlan("analyze table a.b.c compute statistics"),
+      AnalyzeTableStatement(Seq("a", "b", "c"), Map.empty, noScan = false))
+    comparePlans(parsePlan("analyze table a.b.c compute statistics noscan"),
+      AnalyzeTableStatement(Seq("a", "b", "c"), Map.empty, noScan = true))
+    comparePlans(parsePlan("analyze table a.b.c partition (a) compute statistics nOscAn"),
+      AnalyzeTableStatement(Seq("a", "b", "c"), Map("a" -> None), noScan = true))
+
+    // Partitions specified
+    comparePlans(
+      parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS"),
+      AnalyzeTableStatement(
+        Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = false))
+    comparePlans(
+      parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan"),
+      AnalyzeTableStatement(
+        Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = true))
+    comparePlans(
+      parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09') COMPUTE STATISTICS noscan"),
+      AnalyzeTableStatement(Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09")), noScan = true))
+    comparePlans(
+      parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS"),
+      AnalyzeTableStatement(
+        Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = false))
+    comparePlans(
+      parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS noscan"),
+      AnalyzeTableStatement(
+        Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = true))
+    comparePlans(
+      parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr=11) COMPUTE STATISTICS noscan"),
+      AnalyzeTableStatement(
+        Seq("a", "b", "c"), Map("ds" -> None, "hr" -> Some("11")), noScan = true))
+    comparePlans(
+      parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS"),
+      AnalyzeTableStatement(Seq("a", "b", "c"), Map("ds" -> None, "hr" -> None), noScan = false))
+    comparePlans(
+      parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS noscan"),
+      AnalyzeTableStatement(Seq("a", "b", "c"), Map("ds" -> None, "hr" -> None), noScan = true))
+
+    intercept("analyze table a.b.c compute statistics xxxx",
+      "Expected `NOSCAN` instead of `xxxx`")
+    intercept("analyze table a.b.c partition (a) compute statistics xxxx",
+      "Expected `NOSCAN` instead of `xxxx`")
+  }
+
+  test("analyze table column statistics") {
+    intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS", "")
+
+    comparePlans(
+      parsePlan("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS key, value"),
+      AnalyzeColumnStatement(Seq("a", "b", "c"), Option(Seq("key", "value")), allColumns = false))
+
+    // Partition specified - should be ignored
+    comparePlans(
+      parsePlan(
+        s"""
+           |ANALYZE TABLE a.b.c PARTITION(ds='2017-06-10')
+           |COMPUTE STATISTICS FOR COLUMNS key, value
+         """.stripMargin),
+      AnalyzeColumnStatement(Seq("a", "b", "c"), Option(Seq("key", "value")), allColumns = false))
+
+    // Partition specified should be ignored in case of COMPUTE STATISTICS FOR ALL COLUMNS
+    comparePlans(
+      parsePlan(
+        s"""
+           |ANALYZE TABLE a.b.c PARTITION(ds='2017-06-10')
+           |COMPUTE STATISTICS FOR ALL COLUMNS
+         """.stripMargin),
+      AnalyzeColumnStatement(Seq("a", "b", "c"), None, allColumns = true))
+
+    intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value",
+      "mismatched input 'key' expecting <EOF>")
+    intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL",
+      "missing 'COLUMNS' at '<EOF>'")
+  }
+
+  test("MSCK REPAIR TABLE") {
+    comparePlans(
+      parsePlan("MSCK REPAIR TABLE a.b.c"),
+      RepairTableStatement(Seq("a", "b", "c")))
+  }
+
+  test("LOAD DATA INTO table") {
+    comparePlans(
+      parsePlan("LOAD DATA INPATH 'filepath' INTO TABLE a.b.c"),
+      LoadDataStatement(Seq("a", "b", "c"), "filepath", false, false, None))
+
+    comparePlans(
+      parsePlan("LOAD DATA LOCAL INPATH 'filepath' INTO TABLE a.b.c"),
+      LoadDataStatement(Seq("a", "b", "c"), "filepath", true, false, None))
+
+    comparePlans(
+      parsePlan("LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c"),
+      LoadDataStatement(Seq("a", "b", "c"), "filepath", true, true, None))
+
+    comparePlans(
+      parsePlan(
+        s"""
+           |LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c
+           |PARTITION(ds='2017-06-10')
+         """.stripMargin),
+      LoadDataStatement(
+        Seq("a", "b", "c"),
+        "filepath",
+        true,
+        true,
+        Some(Map("ds" -> "2017-06-10"))))
+  }
+
+  test("SHOW CREATE table") {
+    comparePlans(
+      parsePlan("SHOW CREATE TABLE a.b.c"),
+      ShowCreateTableStatement(Seq("a", "b", "c")))
+  }
+
+  test("CACHE TABLE") {
+    comparePlans(
+      parsePlan("CACHE TABLE a.b.c"),
+      CacheTableStatement(Seq("a", "b", "c"), None, false, Map.empty))
+
+    comparePlans(
+      parsePlan("CACHE LAZY TABLE a.b.c"),
+      CacheTableStatement(Seq("a", "b", "c"), None, true, Map.empty))
+
+    comparePlans(
+      parsePlan("CACHE LAZY TABLE a.b.c OPTIONS('storageLevel' 'DISK_ONLY')"),
+      CacheTableStatement(Seq("a", "b", "c"), None, true, Map("storageLevel" -> "DISK_ONLY")))
+
+    intercept("CACHE TABLE a.b.c AS SELECT * FROM testData",
+      "It is not allowed to add catalog/namespace prefix a.b")
+  }
+
+  test("UNCACHE TABLE") {
+    comparePlans(
+      parsePlan("UNCACHE TABLE a.b.c"),
+      UncacheTableStatement(Seq("a", "b", "c"), ifExists = false))
+
+    comparePlans(
+      parsePlan("UNCACHE TABLE IF EXISTS a.b.c"),
+      UncacheTableStatement(Seq("a", "b", "c"), ifExists = true))
+  }
+
+  test("TRUNCATE table") {
+    comparePlans(
+      parsePlan("TRUNCATE TABLE a.b.c"),
+      TruncateTableStatement(Seq("a", "b", "c"), None))
+
+    comparePlans(
+      parsePlan("TRUNCATE TABLE a.b.c PARTITION(ds='2017-06-10')"),
+      TruncateTableStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10"))))
+  }
+
+  test("SHOW PARTITIONS") {
+    val sql1 = "SHOW PARTITIONS t1"
+    val sql2 = "SHOW PARTITIONS db1.t1"
+    val sql3 = "SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')"
+    val sql4 = "SHOW PARTITIONS a.b.c"
+    val sql5 = "SHOW PARTITIONS a.b.c PARTITION(ds='2017-06-10')"
+
+    val parsed1 = parsePlan(sql1)
+    val expected1 = ShowPartitionsStatement(Seq("t1"), None)
+    val parsed2 = parsePlan(sql2)
+    val expected2 = ShowPartitionsStatement(Seq("db1", "t1"), None)
+    val parsed3 = parsePlan(sql3)
+    val expected3 = ShowPartitionsStatement(Seq("t1"),
+      Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue")))
+    val parsed4 = parsePlan(sql4)
+    val expected4 = ShowPartitionsStatement(Seq("a", "b", "c"), None)
+    val parsed5 = parsePlan(sql5)
+    val expected5 = ShowPartitionsStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10")))
+
+    comparePlans(parsed1, expected1)
+    comparePlans(parsed2, expected2)
+    comparePlans(parsed3, expected3)
+    comparePlans(parsed4, expected4)
+    comparePlans(parsed5, expected5)
+  }
+
+  test("REFRESH TABLE") {
+    comparePlans(
+      parsePlan("REFRESH TABLE a.b.c"),
+      RefreshTableStatement(Seq("a", "b", "c")))
+  }
+
+  test("show columns") {
+    val sql1 = "SHOW COLUMNS FROM t1"
+    val sql2 = "SHOW COLUMNS IN db1.t1"
+    val sql3 = "SHOW COLUMNS FROM t1 IN db1"
+    val sql4 = "SHOW COLUMNS FROM db1.t1 IN db1"
+
+    val parsed1 = parsePlan(sql1)
+    val expected1 = ShowColumnsStatement(Seq("t1"), None)
+    val parsed2 = parsePlan(sql2)
+    val expected2 = ShowColumnsStatement(Seq("db1", "t1"), None)
+    val parsed3 = parsePlan(sql3)
+    val expected3 = ShowColumnsStatement(Seq("t1"), Some(Seq("db1")))
+    val parsed4 = parsePlan(sql4)
+    val expected4 = ShowColumnsStatement(Seq("db1", "t1"), Some(Seq("db1")))
+
+    comparePlans(parsed1, expected1)
+    comparePlans(parsed2, expected2)
+    comparePlans(parsed3, expected3)
+    comparePlans(parsed4, expected4)
+  }
+
+  test("alter table: recover partitions") {
+    comparePlans(
+      parsePlan("ALTER TABLE a.b.c RECOVER PARTITIONS"),
+      AlterTableRecoverPartitionsStatement(Seq("a", "b", "c")))
+  }
+
+  test("alter table: add partition") {
+    val sql1 =
+      """
+        |ALTER TABLE a.b.c ADD IF NOT EXISTS PARTITION
+        |(dt='2008-08-08', country='us') LOCATION 'location1' PARTITION
+        |(dt='2009-09-09', country='uk')
+      """.stripMargin
+    val sql2 = "ALTER TABLE a.b.c ADD PARTITION (dt='2008-08-08') LOCATION 'loc'"
+
+    val parsed1 = parsePlan(sql1)
+    val parsed2 = parsePlan(sql2)
+
+    val expected1 = AlterTableAddPartitionStatement(
+      Seq("a", "b", "c"),
+      Seq(
+        (Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")),
+        (Map("dt" -> "2009-09-09", "country" -> "uk"), None)),
+      ifNotExists = true)
+    val expected2 = AlterTableAddPartitionStatement(
+      Seq("a", "b", "c"),
+      Seq((Map("dt" -> "2008-08-08"), Some("loc"))),
+      ifNotExists = false)
+
+    comparePlans(parsed1, expected1)
+    comparePlans(parsed2, expected2)
+  }
+
+  test("alter view: add partition (not supported)") {
+    assertUnsupported(
+      """
+        |ALTER VIEW a.b.c ADD IF NOT EXISTS PARTITION
+        |(dt='2008-08-08', country='us') PARTITION
+        |(dt='2009-09-09', country='uk')
+      """.stripMargin)
+  }
+
+  test("alter table: rename partition") {
+    val sql1 =
+      """
+        |ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us')
+        |RENAME TO PARTITION (dt='2008-09-09', country='uk')
+      """.stripMargin
+    val parsed1 = parsePlan(sql1)
+    val expected1 = AlterTableRenamePartitionStatement(
+      Seq("table_name"),
+      Map("dt" -> "2008-08-08", "country" -> "us"),
+      Map("dt" -> "2008-09-09", "country" -> "uk"))
+    comparePlans(parsed1, expected1)
+
+    val sql2 =
+      """
+        |ALTER TABLE a.b.c PARTITION (ds='2017-06-10')
+        |RENAME TO PARTITION (ds='2018-06-10')
+      """.stripMargin
+    val parsed2 = parsePlan(sql2)
+    val expected2 = AlterTableRenamePartitionStatement(
+      Seq("a", "b", "c"),
+      Map("ds" -> "2017-06-10"),
+      Map("ds" -> "2018-06-10"))
+    comparePlans(parsed2, expected2)
+  }
+
+  // ALTER TABLE table_name DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...]
+  // ALTER VIEW table_name DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...]
+  test("alter table: drop partition") {
+    val sql1_table =
+      """
+        |ALTER TABLE table_name DROP IF EXISTS PARTITION
+        |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk')
+      """.stripMargin
+    val sql2_table =
+      """
+        |ALTER TABLE table_name DROP PARTITION
+        |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk')
+      """.stripMargin
+    val sql1_view = sql1_table.replace("TABLE", "VIEW")
+    val sql2_view = sql2_table.replace("TABLE", "VIEW")
+
+    val parsed1_table = parsePlan(sql1_table)
+    val parsed2_table = parsePlan(sql2_table)
+    val parsed1_purge = parsePlan(sql1_table + " PURGE")
+
+    assertUnsupported(sql1_view)
+    assertUnsupported(sql2_view)
+
+    val expected1_table = AlterTableDropPartitionStatement(
+      Seq("table_name"),
+      Seq(
+        Map("dt" -> "2008-08-08", "country" -> "us"),
+        Map("dt" -> "2009-09-09", "country" -> "uk")),
+      ifExists = true,
+      purge = false,
+      retainData = false)
+    val expected2_table = expected1_table.copy(ifExists = false)
+    val expected1_purge = expected1_table.copy(purge = true)
+
+    comparePlans(parsed1_table, expected1_table)
+    comparePlans(parsed2_table, expected2_table)
+    comparePlans(parsed1_purge, expected1_purge)
+
+    val sql3_table = "ALTER TABLE a.b.c DROP IF EXISTS PARTITION (ds='2017-06-10')"
+    val expected3_table = AlterTableDropPartitionStatement(
+      Seq("a", "b", "c"),
+      Seq(Map("ds" -> "2017-06-10")),
+      ifExists = true,
+      purge = false,
+      retainData = false)
+
+    val parsed3_table = parsePlan(sql3_table)
+    comparePlans(parsed3_table, expected3_table)
+  }
+
+  test("show current namespace") {
+    comparePlans(
+      parsePlan("SHOW CURRENT NAMESPACE"),
+      ShowCurrentNamespaceStatement())
+  }
+
+  test("alter table: SerDe properties") {
+    val sql1 = "ALTER TABLE table_name SET SERDE 'org.apache.class'"
+    val parsed1 = parsePlan(sql1)
+    val expected1 = AlterTableSerDePropertiesStatement(
+      Seq("table_name"), Some("org.apache.class"), None, None)
+    comparePlans(parsed1, expected1)
+
+    val sql2 =
+      """
+        |ALTER TABLE table_name SET SERDE 'org.apache.class'
+        |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed2 = parsePlan(sql2)
+    val expected2 = AlterTableSerDePropertiesStatement(
+      Seq("table_name"),
+      Some("org.apache.class"),
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      None)
+    comparePlans(parsed2, expected2)
+
+    val sql3 =
+      """
+        |ALTER TABLE table_name
+        |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed3 = parsePlan(sql3)
+    val expected3 = AlterTableSerDePropertiesStatement(
+      Seq("table_name"), None, Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), None)
+    comparePlans(parsed3, expected3)
+
+    val sql4 =
+      """
+        |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', country='us')
+        |SET SERDE 'org.apache.class'
+        |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed4 = parsePlan(sql4)
+    val expected4 = AlterTableSerDePropertiesStatement(
+      Seq("table_name"),
+      Some("org.apache.class"),
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
+    comparePlans(parsed4, expected4)
+
+    val sql5 =
+      """
+        |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', country='us')
+        |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed5 = parsePlan(sql5)
+    val expected5 = AlterTableSerDePropertiesStatement(
+      Seq("table_name"),
+      None,
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
+    comparePlans(parsed5, expected5)
+
+    val sql6 =
+      """
+        |ALTER TABLE a.b.c SET SERDE 'org.apache.class'
+        |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed6 = parsePlan(sql6)
+    val expected6 = AlterTableSerDePropertiesStatement(
+      Seq("a", "b", "c"),
+      Some("org.apache.class"),
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      None)
+    comparePlans(parsed6, expected6)
+
+    val sql7 =
+      """
+        |ALTER TABLE a.b.c PARTITION (test=1, dt='2008-08-08', country='us')
+        |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed7 = parsePlan(sql7)
+    val expected7 = AlterTableSerDePropertiesStatement(
+      Seq("a", "b", "c"),
+      None,
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
+    comparePlans(parsed7, expected7)
+  }
+
+  test("alter view: AS Query") {
+    val parsed = parsePlan("ALTER VIEW a.b.c AS SELECT 1")
+    val expected = AlterViewAsStatement(
+      Seq("a", "b", "c"), "SELECT 1", parsePlan("SELECT 1"))
+    comparePlans(parsed, expected)
+  }
+
+  test("create view -- basic") {
+    val v1 = "CREATE VIEW view1 AS SELECT * FROM tab1"
+    val parsed1 = parsePlan(v1)
+
+    val expected1 = CreateViewStatement(
+      Seq("view1"),
+      Seq.empty[(String, Option[String])],
+      None,
+      Map.empty[String, String],
+      Some("SELECT * FROM tab1"),
+      parsePlan("SELECT * FROM tab1"),
+      false,
+      false,
+      PersistedView)
+    comparePlans(parsed1, expected1)
+
+    val v2 = "CREATE TEMPORARY VIEW a.b.c AS SELECT * FROM tab1"
+    val parsed2 = parsePlan(v2)
+
+    val expected2 = CreateViewStatement(
+      Seq("a", "b", "c"),
+      Seq.empty[(String, Option[String])],
+      None,
+      Map.empty[String, String],
+      Some("SELECT * FROM tab1"),
+      parsePlan("SELECT * FROM tab1"),
+      false,
+      false,
+      LocalTempView)
+    comparePlans(parsed2, expected2)
+  }
+
+  test("create view - full") {
+    val v1 =
+      """
+        |CREATE OR REPLACE VIEW view1
+        |(col1, col3 COMMENT 'hello')
+        |TBLPROPERTIES('prop1Key'="prop1Val")
+        |COMMENT 'BLABLA'
+        |AS SELECT * FROM tab1
+      """.stripMargin
+    val parsed1 = parsePlan(v1)
+    val expected1 = CreateViewStatement(
+      Seq("view1"),
+      Seq("col1" -> None, "col3" -> Some("hello")),
+      Some("BLABLA"),
+      Map("prop1Key" -> "prop1Val"),
+      Some("SELECT * FROM tab1"),
+      parsePlan("SELECT * FROM tab1"),
+      false,
+      true,
+      PersistedView)
+    comparePlans(parsed1, expected1)
+
+    val v2 =
+      """
+        |CREATE OR REPLACE GLOBAL TEMPORARY VIEW a.b.c
+        |(col1, col3 COMMENT 'hello')
+        |TBLPROPERTIES('prop1Key'="prop1Val")
+        |COMMENT 'BLABLA'
+        |AS SELECT * FROM tab1
+      """.stripMargin
+    val parsed2 = parsePlan(v2)
+    val expected2 = CreateViewStatement(
+      Seq("a", "b", "c"),
+      Seq("col1" -> None, "col3" -> Some("hello")),
+      Some("BLABLA"),
+      Map("prop1Key" -> "prop1Val"),
+      Some("SELECT * FROM tab1"),
+      parsePlan("SELECT * FROM tab1"),
+      false,
+      true,
+      GlobalTempView)
+    comparePlans(parsed2, expected2)
+  }
+
+  test("create view -- partitioned view") {
+    val v1 = "CREATE VIEW view1 partitioned on (ds, hr) as select * from srcpart"
+    intercept[ParseException] {
+      parsePlan(v1)
+    }
+  }
+
+  test("create view - duplicate clauses") {
+    def createViewStatement(duplicateClause: String): String = {
+      s"""
+         |CREATE OR REPLACE VIEW view1
+         |(col1, col3 COMMENT 'hello')
+         |$duplicateClause
+         |$duplicateClause
+         |AS SELECT * FROM tab1
+      """.stripMargin
+    }
+    val sql1 = createViewStatement("COMMENT 'BLABLA'")
+    val sql2 = createViewStatement("TBLPROPERTIES('prop1Key'=\"prop1Val\")")
+    intercept(sql1, "Found duplicate clauses: COMMENT")
+    intercept(sql2, "Found duplicate clauses: TBLPROPERTIES")
+  }
+
+  test("SHOW TBLPROPERTIES table") {
+    comparePlans(
+      parsePlan("SHOW TBLPROPERTIES a.b.c"),
+      ShowTableProperties(UnresolvedTable(Seq("a", "b", "c")), None))
+
+    comparePlans(
+      parsePlan("SHOW TBLPROPERTIES a.b.c('propKey1')"),
+      ShowTableProperties(UnresolvedTable(Seq("a", "b", "c")), Some("propKey1")))
+  }
+
+  test("DESCRIBE FUNCTION") {
+    comparePlans(
+      parsePlan("DESC FUNCTION a"),
+      DescribeFunctionStatement(Seq("a"), false))
+    comparePlans(
+      parsePlan("DESCRIBE FUNCTION a"),
+      DescribeFunctionStatement(Seq("a"), false))
+    comparePlans(
+      parsePlan("DESCRIBE FUNCTION a.b.c"),
+      DescribeFunctionStatement(Seq("a", "b", "c"), false))
+    comparePlans(
+      parsePlan("DESCRIBE FUNCTION EXTENDED a.b.c"),
+      DescribeFunctionStatement(Seq("a", "b", "c"), true))
+  }
+
+  test("SHOW FUNCTIONS") {
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS"),
+      ShowFunctionsStatement(true, true, None, None))
+    comparePlans(
+      parsePlan("SHOW USER FUNCTIONS"),
+      ShowFunctionsStatement(true, false, None, None))
+    comparePlans(
+      parsePlan("SHOW user FUNCTIONS"),
+      ShowFunctionsStatement(true, false, None, None))
+    comparePlans(
+      parsePlan("SHOW SYSTEM FUNCTIONS"),
+      ShowFunctionsStatement(false, true, None, None))
+    comparePlans(
+      parsePlan("SHOW ALL FUNCTIONS"),
+      ShowFunctionsStatement(true, true, None, None))
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS LIKE 'funct*'"),
+      ShowFunctionsStatement(true, true, Some("funct*"), None))
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS LIKE a.b.c"),
+      ShowFunctionsStatement(true, true, None, Some(Seq("a", "b", "c"))))
+    val sql = "SHOW other FUNCTIONS"
+    intercept(sql, s"$sql not supported")
+  }
+
+  test("DROP FUNCTION") {
+    comparePlans(
+      parsePlan("DROP FUNCTION a"),
+      DropFunctionStatement(Seq("a"), false, false))
+    comparePlans(
+      parsePlan("DROP FUNCTION a.b.c"),
+      DropFunctionStatement(Seq("a", "b", "c"), false, false))
+    comparePlans(
+      parsePlan("DROP TEMPORARY FUNCTION a.b.c"),
+      DropFunctionStatement(Seq("a", "b", "c"), false, true))
+    comparePlans(
+      parsePlan("DROP FUNCTION IF EXISTS a.b.c"),
+      DropFunctionStatement(Seq("a", "b", "c"), true, false))
+    comparePlans(
+      parsePlan("DROP TEMPORARY FUNCTION IF EXISTS a.b.c"),
+      DropFunctionStatement(Seq("a", "b", "c"), true, true))
+  }
+
+  test("CREATE FUNCTION") {
+    parseCompare("CREATE FUNCTION a as 'fun'",
+      CreateFunctionStatement(Seq("a"), "fun", Seq(), false, false, false))
+
+    parseCompare("CREATE FUNCTION a.b.c as 'fun'",
+      CreateFunctionStatement(Seq("a", "b", "c"), "fun", Seq(), false, false, false))
+
+    parseCompare("CREATE OR REPLACE FUNCTION a.b.c as 'fun'",
+      CreateFunctionStatement(Seq("a", "b", "c"), "fun", Seq(), false, false, true))
+
+    parseCompare("CREATE TEMPORARY FUNCTION a.b.c as 'fun'",
+      CreateFunctionStatement(Seq("a", "b", "c"), "fun", Seq(), true, false, false))
+
+    parseCompare("CREATE FUNCTION IF NOT EXISTS a.b.c as 'fun'",
+      CreateFunctionStatement(Seq("a", "b", "c"), "fun", Seq(), false, true, false))
+
+    parseCompare("CREATE FUNCTION a as 'fun' USING JAR 'j'",
+      CreateFunctionStatement(Seq("a"), "fun", Seq(FunctionResource(JarResource, "j")),
+        false, false, false))
+
+    parseCompare("CREATE FUNCTION a as 'fun' USING ARCHIVE 'a'",
+      CreateFunctionStatement(Seq("a"), "fun", Seq(FunctionResource(ArchiveResource, "a")),
+        false, false, false))
+
+    parseCompare("CREATE FUNCTION a as 'fun' USING FILE 'f'",
+      CreateFunctionStatement(Seq("a"), "fun", Seq(FunctionResource(FileResource, "f")),
+        false, false, false))
+
+    parseCompare("CREATE FUNCTION a as 'fun' USING JAR 'j', ARCHIVE 'a', FILE 'f'",
+      CreateFunctionStatement(Seq("a"), "fun", Seq(FunctionResource(JarResource, "j"),
+        FunctionResource(ArchiveResource, "a"), FunctionResource(FileResource, "f")),
+        false, false, false))
+
+    intercept("CREATE FUNCTION a as 'fun' USING OTHER 'o'",
+      "Operation not allowed: CREATE FUNCTION with resource type 'other'")
   }
 
   private case class TableSpec(
@@ -843,4 +2113,22 @@ class DDLParserSuite extends AnalysisTest {
       }
     }
   }
+
+  test("comment on") {
+    comparePlans(
+      parsePlan("COMMENT ON DATABASE a.b.c IS NULL"),
+      CommentOnNamespace(UnresolvedNamespace(Seq("a", "b", "c")), ""))
+
+    comparePlans(
+      parsePlan("COMMENT ON DATABASE a.b.c IS 'NULL'"),
+      CommentOnNamespace(UnresolvedNamespace(Seq("a", "b", "c")), "NULL"))
+
+    comparePlans(
+      parsePlan("COMMENT ON NAMESPACE a.b.c IS ''"),
+      CommentOnNamespace(UnresolvedNamespace(Seq("a", "b", "c")), ""))
+
+    comparePlans(
+      parsePlan("COMMENT ON TABLE a.b.c IS 'xYz'"),
+      CommentOnTable(UnresolvedTable(Seq("a", "b", "c")), "xYz"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index 1a6286067a618..d519fdf378786 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -51,10 +51,13 @@ class DataTypeParserSuite extends SparkFunSuite {
   checkDataType("dOUBle", DoubleType)
   checkDataType("decimal(10, 5)", DecimalType(10, 5))
   checkDataType("decimal", DecimalType.USER_DEFAULT)
+  checkDataType("Dec(10, 5)", DecimalType(10, 5))
+  checkDataType("deC", DecimalType.USER_DEFAULT)
   checkDataType("DATE", DateType)
   checkDataType("timestamp", TimestampType)
   checkDataType("string", StringType)
   checkDataType("ChaR(5)", StringType)
+  checkDataType("ChaRacter(5)", StringType)
   checkDataType("varchAr(20)", StringType)
   checkDataType("cHaR(27)", StringType)
   checkDataType("BINARY", BinaryType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index 478953fb1b571..00b6828c08b38 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -95,9 +95,30 @@ class ErrorParserSuite extends AnalysisTest {
       """
         |ALTER TABLE t
         |CHANGE COLUMN
-        |test-col BIGINT
+        |test-col TYPE BIGINT
+      """.stripMargin, 4, 4, 5, msg + " test-col")
+    intercept(
+      """
+        |ALTER TABLE t
+        |RENAME COLUMN
+        |test-col TO test
+      """.stripMargin, 4, 4, 5, msg + " test-col")
+    intercept(
+      """
+        |ALTER TABLE t
+        |RENAME COLUMN
+        |test TO test-col
+      """.stripMargin, 4, 12, 13, msg + " test-col")
+    intercept(
+      """
+        |ALTER TABLE t
+        |DROP COLUMN
+        |test-col, test
       """.stripMargin, 4, 4, 5, msg + " test-col")
     intercept("CREATE TABLE test (attri-bute INT)", 1, 24, 25, msg + " attri-bute")
+    intercept("CREATE FUNCTION test-func as org.test.func", 1, 20, 21, msg + " test-func")
+    intercept("DROP FUNCTION test-func as org.test.func", 1, 18, 19, msg + " test-func")
+    intercept("SHOW FUNCTIONS LIKE test-func", 1, 24, 25, msg + " test-func")
     intercept(
       """
         |CREATE TABLE IF NOT EXISTS mydb.page-view
@@ -106,6 +127,11 @@ class ErrorParserSuite extends AnalysisTest {
         |LOCATION '/user/external/page_view'
         |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
         |AS SELECT * FROM src""".stripMargin, 2, 36, 37, msg + " page-view")
+    intercept(
+      """
+        |CREATE TABLE IF NOT EXISTS tab
+        |USING test-provider
+        |AS SELECT * FROM src""".stripMargin, 3, 10, 11, msg + " test-provider")
     intercept("SHOW TABLES IN hyphen-database", 1, 21, 22, msg + " hyphen-database")
     intercept("SHOW TABLE EXTENDED IN hyphen-db LIKE \"str\"", 1, 29, 30, msg + " hyphen-db")
     intercept("SHOW COLUMNS IN t FROM test-db", 1, 27, 28, msg + " test-db")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index ee89980d3eedd..df012ccf09620 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -20,14 +20,18 @@ import java.sql.{Date, Timestamp}
 import java.time.LocalDateTime
 import java.util.concurrent.TimeUnit
 
+import scala.language.implicitConversions
+
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
-import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
+import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
+import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 /**
  * Test basic expression parsing.
@@ -41,6 +45,8 @@ class ExpressionParserSuite extends AnalysisTest {
   import org.apache.spark.sql.catalyst.dsl.expressions._
   import org.apache.spark.sql.catalyst.dsl.plans._
 
+  implicit def stringToUTF8Str(str: String): UTF8String = UTF8String.fromString(str)
+
   val defaultParser = CatalystSqlParser
 
   def assertEqual(
@@ -182,6 +188,18 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("a not regexp 'pattern%'", !('a rlike "pattern%"))
   }
 
+  test("like escape expressions") {
+    val message = "Escape string must contains only one character."
+    assertEqual("a like 'pattern%' escape '#'", 'a.like("pattern%", '#'))
+    assertEqual("a like 'pattern%' escape '\"'", 'a.like("pattern%", '\"'))
+    intercept("a like 'pattern%' escape '##'", message)
+    intercept("a like 'pattern%' escape ''", message)
+    assertEqual("a not like 'pattern%' escape '#'", !('a.like("pattern%", '#')))
+    assertEqual("a not like 'pattern%' escape '\"'", !('a.like("pattern%", '\"')))
+    intercept("a not like 'pattern%' escape '\"/'", message)
+    intercept("a not like 'pattern%' escape ''", message)
+  }
+
   test("like expressions with ESCAPED_STRING_LITERALS = true") {
     val conf = new SQLConf()
     conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, "true")
@@ -222,10 +240,10 @@ class ExpressionParserSuite extends AnalysisTest {
   }
 
   test("unary arithmetic expressions") {
-    assertEqual("+a", 'a)
+    assertEqual("+a", +'a)
     assertEqual("-a", -'a)
     assertEqual("~a", ~'a)
-    assertEqual("-+~~a", -(~(~'a)))
+    assertEqual("-+~~a", -( +(~(~'a))))
   }
 
   test("cast expressions") {
@@ -424,18 +442,21 @@ class ExpressionParserSuite extends AnalysisTest {
   test("type constructors") {
     // Dates.
     assertEqual("dAte '2016-03-11'", Literal(Date.valueOf("2016-03-11")))
-    intercept("DAtE 'mar 11 2016'")
+    intercept("DAtE 'mar 11 2016'", "Cannot parse the DATE value")
 
     // Timestamps.
     assertEqual("tImEstAmp '2016-03-11 20:54:00.000'",
       Literal(Timestamp.valueOf("2016-03-11 20:54:00.000")))
-    intercept("timestamP '2016-33-11 20:54:00.000'")
+    intercept("timestamP '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP value")
 
     // Interval.
-    assertEqual("InterVal 'interval 3 month 1 hour'",
-      Literal(CalendarInterval.fromString("interval 3 month 1 hour")))
-    assertEqual("Interval 'interval 3 monthsss 1 hoursss'",
-      Literal(null, CalendarIntervalType))
+    val intervalLiteral = Literal(IntervalUtils.stringToInterval("interval 3 month 1 hour"))
+    assertEqual("InterVal 'interval 3 month 1 hour'", intervalLiteral)
+    assertEqual("INTERVAL '3 month 1 hour'", intervalLiteral)
+    intercept("Interval 'interval 3 monthsss 1 hoursss'", "Cannot parse the INTERVAL value")
+    assertEqual(
+      "-interval '3 month 1 hour'",
+      UnaryMinus(Literal(IntervalUtils.stringToInterval("interval 3 month 1 hour"))))
 
     // Binary.
     assertEqual("X'A'", Literal(Array(0x0a).map(_.toByte)))
@@ -466,13 +487,15 @@ class ExpressionParserSuite extends AnalysisTest {
     // Decimal
     testDecimal("7873247234798249279371.2334")
 
-    // Scientific Decimal
-    testDecimal("9.0e1")
-    testDecimal(".9e+2")
-    testDecimal("0.9e+2")
-    testDecimal("900e-1")
-    testDecimal("900.0E-1")
-    testDecimal("9.e+1")
+    // SPARK-29956: Scientific Decimal is parsed as Double by default.
+    assertEqual("9.0e1", Literal(90.toDouble))
+    assertEqual(".9e+2", Literal(90.toDouble))
+    assertEqual("0.9e+2", Literal(90.toDouble))
+
+    // Scientific Decimal with suffix BD should still be parsed as Decimal
+    assertEqual("900e-1BD", Literal(BigDecimal("900e-1").underlying()))
+    assertEqual("900.0E-1BD", Literal(BigDecimal("900.0E-1").underlying()))
+    assertEqual("9.e+1BD", Literal(BigDecimal("9.e+1").underlying()))
     intercept(".e3")
 
     // Tiny Int Literal
@@ -501,6 +524,31 @@ class ExpressionParserSuite extends AnalysisTest {
     intercept("1.20E-38BD", "decimal can only support precision up to 38")
   }
 
+  test("SPARK-30252: Decimal should set zero scale rather than negative scale by default") {
+    assertEqual("123.0BD", Literal(Decimal(BigDecimal("123.0")), DecimalType(4, 1)))
+    assertEqual("123BD", Literal(Decimal(BigDecimal("123")), DecimalType(3, 0)))
+    assertEqual("123E10BD", Literal(Decimal(BigDecimal("123E10")), DecimalType(13, 0)))
+    assertEqual("123E+10BD", Literal(Decimal(BigDecimal("123E+10")), DecimalType(13, 0)))
+    assertEqual("123E-10BD", Literal(Decimal(BigDecimal("123E-10")), DecimalType(10, 10)))
+    assertEqual("1.23E10BD", Literal(Decimal(BigDecimal("1.23E10")), DecimalType(11, 0)))
+    assertEqual("-1.23E10BD", Literal(Decimal(BigDecimal("-1.23E10")), DecimalType(11, 0)))
+  }
+
+  test("SPARK-29956: scientific decimal should be parsed as Decimal in legacy mode") {
+    def testDecimal(value: String, parser: ParserInterface): Unit = {
+      assertEqual(value, Literal(BigDecimal(value).underlying), parser)
+    }
+    val conf = new SQLConf()
+    conf.setConf(SQLConf.LEGACY_EXPONENT_LITERAL_AS_DECIMAL_ENABLED, true)
+    val parser = new CatalystSqlParser(conf)
+    testDecimal("9e1", parser)
+    testDecimal("9e-1", parser)
+    testDecimal("-9e1", parser)
+    testDecimal("9.0e1", parser)
+    testDecimal(".9e+2", parser)
+    testDecimal("0.9e+2", parser)
+  }
+
   test("strings") {
     Seq(true, false).foreach { escape =>
       val conf = new SQLConf()
@@ -586,27 +634,27 @@ class ExpressionParserSuite extends AnalysisTest {
   }
 
   val intervalUnits = Seq(
-    "year",
-    "month",
-    "week",
-    "day",
-    "hour",
-    "minute",
-    "second",
-    "millisecond",
-    "microsecond")
+    YEAR,
+    MONTH,
+    WEEK,
+    DAY,
+    HOUR,
+    MINUTE,
+    SECOND,
+    MILLISECOND,
+    MICROSECOND)
 
-  def intervalLiteral(u: String, s: String): Literal = {
-    Literal(CalendarInterval.fromSingleUnitString(u, s))
+  def intervalLiteral(u: IntervalUnit, s: String): Literal = {
+    Literal(IntervalUtils.stringToInterval(s + " " + u.toString))
   }
 
   test("intervals") {
     def checkIntervals(intervalValue: String, expected: Literal): Unit = {
-      assertEqual(s"interval $intervalValue", expected)
-
-      // SPARK-23264 Support interval values without INTERVAL clauses if ANSI SQL enabled
-      withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
-        assertEqual(intervalValue, expected)
+      Seq(
+        "" -> expected,
+        "-" -> UnaryMinus(expected)
+      ).foreach { case (sign, expectedLiteral) =>
+        assertEqual(s"${sign}interval $intervalValue", expectedLiteral)
       }
     }
 
@@ -627,17 +675,29 @@ class ExpressionParserSuite extends AnalysisTest {
     }
 
     // Hive nanosecond notation.
-    checkIntervals("13.123456789 seconds", intervalLiteral("second", "13.123456789"))
-    checkIntervals("-13.123456789 second", intervalLiteral("second", "-13.123456789"))
+    checkIntervals("13.123456789 seconds", intervalLiteral(SECOND, "13.123456789"))
+    checkIntervals(
+      "-13.123456789 second",
+      Literal(new CalendarInterval(
+        0,
+        0,
+        -13 * MICROS_PER_SECOND - 123 * MICROS_PER_MILLIS - 456)))
+    checkIntervals(
+      "13.123456 second",
+      Literal(new CalendarInterval(
+        0,
+        0,
+        13 * MICROS_PER_SECOND + 123 * MICROS_PER_MILLIS + 456)))
+    checkIntervals("1.001 second",
+      Literal(IntervalUtils.stringToInterval("1 second 1 millisecond")))
 
     // Non Existing unit
-    intercept("interval 10 nanoseconds",
-      "no viable alternative at input 'interval 10 nanoseconds'")
+    intercept("interval 10 nanoseconds", "invalid unit 'nanoseconds'")
 
     // Year-Month intervals.
     val yearMonthValues = Seq("123-10", "496-0", "-2-3", "-123-0")
     yearMonthValues.foreach { value =>
-      val result = Literal(CalendarInterval.fromYearMonthString(value))
+      val result = Literal(IntervalUtils.fromYearMonthString(value))
       checkIntervals(s"'$value' year to month", result)
     }
 
@@ -650,7 +710,7 @@ class ExpressionParserSuite extends AnalysisTest {
       "-1 0:0:0",
       "1 0:0:1")
     datTimeValues.foreach { value =>
-      val result = Literal(CalendarInterval.fromDayTimeString(value))
+      val result = Literal(IntervalUtils.fromDayTimeString(value))
       checkIntervals(s"'$value' day to second", result)
     }
 
@@ -662,50 +722,18 @@ class ExpressionParserSuite extends AnalysisTest {
       "0:0:0",
       "0:0:1")
     hourTimeValues.foreach { value =>
-      val result = Literal(CalendarInterval.fromDayTimeString(value))
+      val result = Literal(IntervalUtils.fromDayTimeString(value, HOUR, SECOND))
       checkIntervals(s"'$value' hour to second", result)
     }
 
     // Unknown FROM TO intervals
-    intercept("interval 10 month to second",
+    intercept("interval '10' month to second",
       "Intervals FROM month TO second are not supported.")
 
     // Composed intervals.
     checkIntervals(
-      "3 months 22 seconds 1 millisecond",
-      Literal(new CalendarInterval(3, 22001000L)))
-    checkIntervals(
-      "3 years '-1-10' year to month 3 weeks '1 0:0:2' day to second",
-      Literal(new CalendarInterval(14,
-        22 * CalendarInterval.MICROS_PER_DAY + 2 * CalendarInterval.MICROS_PER_SECOND)))
-  }
-
-  test("SPARK-23264 Interval Compatibility tests") {
-    def checkIntervals(intervalValue: String, expected: Literal): Unit = {
-      withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
-        assertEqual(intervalValue, expected)
-      }
-
-      // Compatibility tests: If ANSI SQL disabled, `intervalValue` should be parsed as an alias
-      withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "false") {
-        val aliases = defaultParser.parseExpression(intervalValue).collect {
-          case a @ Alias(_: Literal, name)
-            if intervalUnits.exists { unit => name.startsWith(unit) } => a
-        }
-        assert(aliases.size === 1)
-      }
-    }
-    val forms = Seq("", "s")
-    val values = Seq("5", "1", "-11", "8")
-    intervalUnits.foreach { unit =>
-      forms.foreach { form =>
-         values.foreach { value =>
-           val expected = intervalLiteral(unit, value)
-           checkIntervals(s"$value $unit$form", expected)
-           checkIntervals(s"'$value' $unit$form", expected)
-         }
-      }
-    }
+      "3 months 4 days 22 seconds 1 millisecond",
+      Literal(new CalendarInterval(3, 4, 22001000L)))
   }
 
   test("composed expressions") {
@@ -743,15 +771,6 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("last(a)", Last('a, Literal(false)).toAggregateExpression())
   }
 
-  test("Support respect nulls keywords for first_value and last_value") {
-    assertEqual("first_value(a ignore nulls)", First('a, Literal(true)).toAggregateExpression())
-    assertEqual("first_value(a respect nulls)", First('a, Literal(false)).toAggregateExpression())
-    assertEqual("first_value(a)", First('a, Literal(false)).toAggregateExpression())
-    assertEqual("last_value(a ignore nulls)", Last('a, Literal(true)).toAggregateExpression())
-    assertEqual("last_value(a respect nulls)", Last('a, Literal(false)).toAggregateExpression())
-    assertEqual("last_value(a)", Last('a, Literal(false)).toAggregateExpression())
-  }
-
   test("timestamp literals") {
     DateTimeTestUtils.outstandingTimezones.foreach { timeZone =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone.getID) {
@@ -785,12 +804,12 @@ class ExpressionParserSuite extends AnalysisTest {
   }
 
   test("current date/timestamp braceless expressions") {
-    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       assertEqual("current_date", CurrentDate())
       assertEqual("current_timestamp", CurrentTimestamp())
     }
 
-    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       assertEqual("current_date", UnresolvedAttribute.quoted("current_date"))
       assertEqual("current_timestamp", UnresolvedAttribute.quoted("current_timestamp"))
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
index cba24fbe2a65b..6008d091022de 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
@@ -40,7 +40,7 @@ class ParserUtilsSuite extends SparkFunSuite {
   }
 
   val showDbsContext = buildContext("show databases like 'identifier_with_wildcards'") { parser =>
-    parser.statement().asInstanceOf[ShowDatabasesContext]
+    parser.statement().asInstanceOf[ShowNamespacesContext]
   }
 
   val createDbContext = buildContext(
@@ -50,7 +50,7 @@ class ParserUtilsSuite extends SparkFunSuite {
       |WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
     """.stripMargin
   ) { parser =>
-    parser.statement().asInstanceOf[CreateDatabaseContext]
+    parser.statement().asInstanceOf[CreateNamespaceContext]
   }
 
   val emptyContext = buildContext("") { parser =>
@@ -151,7 +151,7 @@ class ParserUtilsSuite extends SparkFunSuite {
 
   test("string") {
     assert(string(showDbsContext.pattern) == "identifier_with_wildcards")
-    assert(string(createDbContext.comment) == "database_comment")
+    assert(string(createDbContext.commentSpec().get(0).STRING()) == "database_comment")
 
     assert(string(createDbContext.locationSpec.asScala.head.STRING) == "/home/user/db")
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 61f8c3b99149a..875096f615241 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, Un
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.sql.InsertIntoStatement
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.IntegerType
 
@@ -625,6 +624,52 @@ class PlanParserSuite extends AnalysisTest {
           table("t").select(star()))))
 
     intercept("SELECT /*+ COALESCE(30 + 50) */ * FROM t", "mismatched input")
+
+    comparePlans(
+      parsePlan("SELECT /*+ REPARTITION(c) */ * FROM t"),
+      UnresolvedHint("REPARTITION", Seq(UnresolvedAttribute("c")),
+        table("t").select(star())))
+
+    comparePlans(
+      parsePlan("SELECT /*+ REPARTITION(100, c) */ * FROM t"),
+      UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
+        table("t").select(star())))
+
+    comparePlans(
+      parsePlan("SELECT /*+ REPARTITION(100, c), COALESCE(50) */ * FROM t"),
+      UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
+        UnresolvedHint("COALESCE", Seq(Literal(50)),
+          table("t").select(star()))))
+
+    comparePlans(
+      parsePlan("SELECT /*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50) */ * FROM t"),
+      UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
+        UnresolvedHint("BROADCASTJOIN", Seq($"u"),
+          UnresolvedHint("COALESCE", Seq(Literal(50)),
+            table("t").select(star())))))
+
+    comparePlans(
+      parsePlan(
+        """
+          |SELECT
+          |/*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50), REPARTITION(300, c) */
+          |* FROM t
+        """.stripMargin),
+      UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
+        UnresolvedHint("BROADCASTJOIN", Seq($"u"),
+          UnresolvedHint("COALESCE", Seq(Literal(50)),
+            UnresolvedHint("REPARTITION", Seq(Literal(300), UnresolvedAttribute("c")),
+              table("t").select(star()))))))
+
+    comparePlans(
+      parsePlan("SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t"),
+      UnresolvedHint("REPARTITION_BY_RANGE", Seq(UnresolvedAttribute("c")),
+        table("t").select(star())))
+
+    comparePlans(
+      parsePlan("SELECT /*+ REPARTITION_BY_RANGE(100, c) */ * FROM t"),
+      UnresolvedHint("REPARTITION_BY_RANGE", Seq(Literal(100), UnresolvedAttribute("c")),
+        table("t").select(star())))
   }
 
   test("SPARK-20854: select hint syntax with expressions") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 246d0cf01e9ba..053d57846ce8d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -74,7 +74,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "date",
     "datetime",
     "day",
-    "days",
     "dbproperties",
     "decimal",
     "deferred",
@@ -114,7 +113,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "grouping",
     "hold_ddltime",
     "hour",
-    "hours",
     "idxproperties",
     "ignore",
     "import",
@@ -150,15 +148,9 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "mapjoin",
     "materialized",
     "metadata",
-    "microsecond",
-    "microseconds",
-    "millisecond",
-    "milliseconds",
     "minus",
     "minute",
-    "minutes",
     "month",
-    "months",
     "msck",
     "no_drop",
     "none",
@@ -213,7 +205,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "rows",
     "schemas",
     "second",
-    "seconds",
     "serde",
     "serdeproperties",
     "server",
@@ -263,14 +254,11 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "utctimestamp",
     "values",
     "view",
-    "week",
-    "weeks",
     "while",
     "with",
     "work",
     "write",
-    "year",
-    "years")
+    "year")
 
   val hiveStrictNonReservedKeyword = Seq(
     "anti",
@@ -351,7 +339,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "database",
     "databases",
     "day",
-    "days",
     "dbproperties",
     "defined",
     "delete",
@@ -367,6 +354,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "drop",
     "else",
     "end",
+    "escape",
     "escaped",
     "except",
     "exchange",
@@ -381,7 +369,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "fields",
     "fileformat",
     "first",
-    "first_value",
     "following",
     "for",
     "foreign",
@@ -397,7 +384,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "grouping",
     "having",
     "hour",
-    "hours",
     "if",
     "ignore",
     "import",
@@ -416,7 +402,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "join",
     "keys",
     "last",
-    "last_value",
     "lateral",
     "lazy",
     "leading",
@@ -433,16 +418,11 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "logical",
     "macro",
     "map",
-    "microsecond",
-    "microseconds",
-    "millisecond",
-    "milliseconds",
     "minus",
     "minute",
-    "minutes",
     "month",
-    "months",
     "msck",
+    "namespaces",
     "natural",
     "no",
     "not",
@@ -485,7 +465,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "repair",
     "replace",
     "reset",
-    "respect",
     "restrict",
     "revoke",
     "right",
@@ -498,7 +477,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "rows",
     "schema",
     "second",
-    "seconds",
     "select",
     "semi",
     "separated",
@@ -547,14 +525,11 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "using",
     "values",
     "view",
-    "week",
-    "weeks",
     "when",
     "where",
     "window",
     "with",
-    "year",
-    "years")
+    "year")
 
   val reservedKeywordsInAnsiMode = Set(
     "all",
@@ -580,10 +555,10 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "distinct",
     "else",
     "end",
+    "escape",
     "except",
     "false",
     "fetch",
-    "first_value",
     "for",
     "foreign",
     "from",
@@ -598,7 +573,6 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "into",
     "join",
     "is",
-    "last_value",
     "leading",
     "left",
     "minute",
@@ -657,7 +631,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("table identifier - reserved/non-reserved keywords if ANSI mode enabled") {
-    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       reservedKeywordsInAnsiMode.foreach { keyword =>
         val errMsg = intercept[ParseException] {
           parseTableIdentifier(keyword)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala
new file mode 100644
index 0000000000000..7790f467a890b
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.planning
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.TestRelations
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types.DoubleType
+
+class ScanOperationSuite extends SparkFunSuite {
+  private val relation = TestRelations.testRelation2
+  private val colA = relation.output(0)
+  private val colB = relation.output(1)
+  private val aliasR = Alias(Rand(1), "r")()
+  private val aliasId = Alias(MonotonicallyIncreasingID(), "id")()
+  private val colR = AttributeReference("r", DoubleType)(aliasR.exprId, aliasR.qualifier)
+
+  test("Project with a non-deterministic field and a deterministic child Filter") {
+    val project1 = Project(Seq(colB, aliasR), Filter(EqualTo(colA, Literal(1)), relation))
+    project1 match {
+      case ScanOperation(projects, filters, _: LocalRelation) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colB)
+        assert(projects(1) === aliasR)
+        assert(filters.size === 1)
+    }
+  }
+
+  test("Project with all deterministic fields but a non-deterministic child Filter") {
+    val project2 = Project(Seq(colA, colB), Filter(EqualTo(aliasR, Literal(1)), relation))
+    project2 match {
+      case ScanOperation(projects, filters, _: LocalRelation) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colA)
+        assert(projects(1) === colB)
+        assert(filters.size === 1)
+    }
+  }
+
+  test("Project which has the same non-deterministic expression with its child Project") {
+    val project3 = Project(Seq(colA, colR), Project(Seq(colA, aliasR), relation))
+    assert(ScanOperation.unapply(project3).isEmpty)
+  }
+
+  test("Project which has different non-deterministic expressions with its child Project") {
+    val project4 = Project(Seq(colA, aliasId), Project(Seq(colA, aliasR), relation))
+    project4 match {
+      case ScanOperation(projects, _, _: LocalRelation) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colA)
+        assert(projects(1) === aliasId)
+    }
+  }
+
+  test("Filter which has the same non-deterministic expression with its child Project") {
+    val filter1 = Filter(EqualTo(colR, Literal(1)), Project(Seq(colA, aliasR), relation))
+    assert(ScanOperation.unapply(filter1).isEmpty)
+  }
+
+  test("Deterministic filter with a child Project with a non-deterministic expression") {
+    val filter2 = Filter(EqualTo(colA, Literal(1)), Project(Seq(colA, aliasR), relation))
+    filter2 match {
+      case ScanOperation(projects, filters, _: LocalRelation) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colA)
+        assert(projects(1) === aliasR)
+        assert(filters.size === 1)
+    }
+  }
+
+  test("Filter which has different non-deterministic expressions with its child Project") {
+    val filter3 = Filter(EqualTo(MonotonicallyIncreasingID(), Literal(1)),
+      Project(Seq(colA, aliasR), relation))
+    filter3 match {
+      case ScanOperation(projects, filters, _: LocalRelation) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colA)
+        assert(projects(1) === aliasR)
+        assert(filters.size === 1)
+    }
+  }
+
+
+  test("Deterministic filter which has a non-deterministic child Filter") {
+    val filter4 = Filter(EqualTo(colA, Literal(1)), Filter(EqualTo(aliasR, Literal(1)), relation))
+    assert(ScanOperation.unapply(filter4).isEmpty)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 08f1f87514b1d..b28e6ded6ca98 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -160,7 +160,7 @@ trait PlanTestBase extends PredicateHelper with SQLHelper { self: Suite =>
   }
 
   /** Fails the test if the join order in the two plans do not match */
-  protected def compareJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan) {
+  protected def compareJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan): Unit = {
     val normalized1 = normalizePlan(normalizeExprIds(plan1))
     val normalized2 = normalizePlan(normalizeExprIds(plan2))
     if (!sameJoinPlan(normalized1, normalized2)) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SQLHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SQLHelper.scala
index 4d869d79ad594..d213743946e76 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SQLHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SQLHelper.scala
@@ -17,8 +17,13 @@
 package org.apache.spark.sql.catalyst.plans
 
 import java.io.File
+import java.time.ZoneId
+
+import scala.util.control.NonFatal
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.getZoneId
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.Utils
 
@@ -61,4 +66,21 @@ trait SQLHelper {
     path.delete()
     try f(path) finally Utils.deleteRecursively(path)
   }
+
+
+  def testSpecialDatetimeValues[T](test: ZoneId => T): Unit = {
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+        val zoneId = getZoneId(timeZone)
+        // The test can fail around midnight if it gets the reference value
+        // before midnight but tested code resolves special value after midnight.
+        // Retry can guarantee that both values were taken on the same day.
+        try {
+          test(zoneId)
+        } catch {
+          case NonFatal(_) => test(zoneId)
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelperSuite.scala
index 9100e10ca0c09..0a3f86ebf6808 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelperSuite.scala
@@ -118,7 +118,7 @@ class AnalysisHelperSuite extends SparkFunSuite {
 
   test("do not allow transform in analyzer") {
     val plan = Project(Nil, LocalRelation())
-    // These should be OK since we are not in the analzyer
+    // These should be OK since we are not in the analyzer
     plan.transform { case p: Project => p }
     plan.transformUp { case p: Project => p }
     plan.transformDown { case p: Project => p }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 16a5c2d3001a7..dfe790dca54d8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -17,11 +17,15 @@
 
 package org.apache.spark.sql.catalyst.statsEstimation
 
+import org.mockito.Mockito.mock
+
+import org.apache.spark.sql.catalyst.analysis.ResolvedNamespace
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Literal}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.IntegerType
 
@@ -115,6 +119,15 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
       plan, expectedStatsCboOn = expectedCboStats, expectedStatsCboOff = expectedDefaultStats)
   }
 
+  test("command should report a dummy stats") {
+    val plan = CommentOnNamespace(
+      ResolvedNamespace(mock(classOf[SupportsNamespaces]), Array("ns")), "comment")
+    checkStats(
+      plan,
+      expectedStatsCboOn = Statistics.DUMMY,
+      expectedStatsCboOff = Statistics.DUMMY)
+  }
+
   /** Check estimated stats when cbo is turned on/off. */
   private def checkStats(
       plan: LogicalPlan,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index fbaa5527a7058..e72b2e9b1b214 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -28,7 +28,7 @@ import org.json4s.jackson.JsonMethods
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.{AliasIdentifier, FunctionIdentifier, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions.DslString
 import org.apache.spark.sql.catalyst.expressions._
@@ -431,6 +431,30 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
         "product-class" -> JString(classOf[FunctionIdentifier].getName),
           "funcName" -> "function"))
 
+    // Converts AliasIdentifier to JSON
+    assertJSON(
+      AliasIdentifier("alias", Seq("ns1", "ns2")),
+      JObject(
+        "product-class" -> JString(classOf[AliasIdentifier].getName),
+          "name" -> "alias",
+          "qualifier" -> "[ns1, ns2]"))
+
+    // Converts SubqueryAlias to JSON
+    assertJSON(
+      SubqueryAlias("t1", JsonTestTreeNode("0")),
+      List(
+        JObject(
+          "class" -> classOf[SubqueryAlias].getName,
+          "num-children" -> 1,
+          "identifier" -> JObject("product-class" -> JString(classOf[AliasIdentifier].getName),
+            "name" -> "t1",
+            "qualifier" -> JArray(Nil)),
+          "child" -> 0),
+        JObject(
+          "class" -> classOf[JsonTestTreeNode].getName,
+          "num-children" -> 0,
+          "arg" -> "0")))
+
     // Converts BucketSpec to JSON
     assertJSON(
       BucketSpec(1, Seq("bucket"), Seq("sort")),
@@ -680,12 +704,12 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("clone") {
-    def assertDifferentInstance(before: AnyRef, after: AnyRef): Unit = {
+    def assertDifferentInstance[T <: TreeNode[T]](before: TreeNode[T], after: TreeNode[T]): Unit = {
       assert(before.ne(after) && before == after)
-      before.asInstanceOf[TreeNode[_]].children.zip(
-          after.asInstanceOf[TreeNode[_]].children).foreach {
-        case (beforeChild: AnyRef, afterChild: AnyRef) =>
-          assertDifferentInstance(beforeChild, afterChild)
+      before.children.zip(after.children).foreach { case (beforeChild, afterChild) =>
+        assertDifferentInstance(
+          beforeChild.asInstanceOf[TreeNode[T]],
+          afterChild.asInstanceOf[TreeNode[T]])
       }
     }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 056337205ae7e..ff4d8a2457922 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -19,23 +19,27 @@ package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.ZoneId
+import java.time.{Instant, LocalDate, LocalDateTime, LocalTime, ZoneId, ZoneOffset}
 import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 
+import org.scalatest.Matchers
+
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.unsafe.types.UTF8String
 
-class DateTimeUtilsSuite extends SparkFunSuite {
+class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
 
   val TimeZonePST = TimeZone.getTimeZone("PST")
   private def defaultZoneId = ZoneId.systemDefault()
 
   test("nanoseconds truncation") {
     val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId)
-    def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
+    def checkStringToTimestamp(originalTime: String, expectedParsedTime: String): Unit = {
       val parsedTimestampOp = DateTimeUtils.stringToTimestamp(
         UTF8String.fromString(originalTime), defaultZoneId)
       assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly")
@@ -82,9 +86,13 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   }
 
   test("SPARK-6785: java date conversion before and after epoch") {
+    def format(d: Date): String = {
+      TimestampFormatter("uuuu-MM-dd", defaultTimeZone().toZoneId)
+        .format(d.getTime * MICROS_PER_MILLIS)
+    }
     def checkFromToJavaDate(d1: Date): Unit = {
       val d2 = toJavaDate(fromJavaDate(d1))
-      assert(d2.toString === d1.toString)
+      assert(format(d2) === format(d1))
     }
 
     val df1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
@@ -118,34 +126,42 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     checkFromToJavaDate(new Date(df2.parse("1776-07-04 18:30:00 UTC").getTime))
   }
 
+  private def toDate(s: String, zoneId: ZoneId = ZoneOffset.UTC): Option[SQLDate] = {
+    stringToDate(UTF8String.fromString(s), zoneId)
+  }
+
   test("string to date") {
-    assert(stringToDate(UTF8String.fromString("2015-01-28")).get === days(2015, 1, 28))
-    assert(stringToDate(UTF8String.fromString("2015")).get === days(2015, 1, 1))
-    assert(stringToDate(UTF8String.fromString("0001")).get === days(1, 1, 1))
-    assert(stringToDate(UTF8String.fromString("2015-03")).get === days(2015, 3, 1))
+    assert(toDate("2015-01-28").get === days(2015, 1, 28))
+    assert(toDate("2015").get === days(2015, 1, 1))
+    assert(toDate("0001").get === days(1, 1, 1))
+    assert(toDate("2015-03").get === days(2015, 3, 1))
     Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18 123142",
       "2015-03-18T123123", "2015-03-18T").foreach { s =>
-      assert(stringToDate(UTF8String.fromString(s)).get === days(2015, 3, 18))
+      assert(toDate(s).get === days(2015, 3, 18))
     }
 
-    assert(stringToDate(UTF8String.fromString("2015-03-18X")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015/03/18")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015.03.18")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("20150318")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015-031-8")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("02015-03-18")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("015-03-18")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("015")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("02015")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("1999 08 01")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("1999-08 01")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("1999 08")).isEmpty)
+    assert(toDate("2015-03-18X").isEmpty)
+    assert(toDate("2015/03/18").isEmpty)
+    assert(toDate("2015.03.18").isEmpty)
+    assert(toDate("20150318").isEmpty)
+    assert(toDate("2015-031-8").isEmpty)
+    assert(toDate("02015-03-18").isEmpty)
+    assert(toDate("015-03-18").isEmpty)
+    assert(toDate("015").isEmpty)
+    assert(toDate("02015").isEmpty)
+    assert(toDate("1999 08 01").isEmpty)
+    assert(toDate("1999-08 01").isEmpty)
+    assert(toDate("1999 08").isEmpty)
+  }
+
+  private def toTimestamp(str: String, zoneId: ZoneId): Option[SQLTimestamp] = {
+    stringToTimestamp(UTF8String.fromString(str), zoneId)
   }
 
   test("string to timestamp") {
     for (tz <- ALL_TIMEZONES) {
       def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = {
-        assert(stringToTimestamp(UTF8String.fromString(str), tz.toZoneId) === expected)
+        assert(toTimestamp(str, tz.toZoneId) === expected)
       }
 
       checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, tz = tz)))
@@ -258,12 +274,10 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 
   test("SPARK-15379: special invalid date string") {
     // Test stringToDate
-    assert(stringToDate(
-      UTF8String.fromString("2015-02-29 00:00:00")).isEmpty)
-    assert(stringToDate(
-      UTF8String.fromString("2015-04-31 00:00:00")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015-02-29")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015-04-31")).isEmpty)
+    assert(toDate("2015-02-29 00:00:00").isEmpty)
+    assert(toDate("2015-04-31 00:00:00").isEmpty)
+    assert(toDate("2015-02-29").isEmpty)
+    assert(toDate("2015-04-31").isEmpty)
 
 
     // Test stringToTimestamp
@@ -271,8 +285,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       UTF8String.fromString("2015-02-29 00:00:00"), defaultZoneId).isEmpty)
     assert(stringToTimestamp(
       UTF8String.fromString("2015-04-31 00:00:00"), defaultZoneId).isEmpty)
-    assert(stringToTimestamp(UTF8String.fromString("2015-02-29"), defaultZoneId).isEmpty)
-    assert(stringToTimestamp(UTF8String.fromString("2015-04-31"), defaultZoneId).isEmpty)
+    assert(toTimestamp("2015-02-29", defaultZoneId).isEmpty)
+    assert(toTimestamp("2015-04-31", defaultZoneId).isEmpty)
   }
 
   test("hours") {
@@ -365,34 +379,60 @@ class DateTimeUtilsSuite extends SparkFunSuite {
   test("timestamp add months") {
     val ts1 = date(1997, 2, 28, 10, 30, 0)
     val ts2 = date(2000, 2, 28, 10, 30, 0, 123000)
-    assert(timestampAddInterval(ts1, 36, 123000, defaultZoneId) === ts2)
+    assert(timestampAddInterval(ts1, 36, 0, 123000, defaultZoneId) === ts2)
 
     val ts3 = date(1997, 2, 27, 16, 0, 0, 0, TimeZonePST)
     val ts4 = date(2000, 2, 27, 16, 0, 0, 123000, TimeZonePST)
     val ts5 = date(2000, 2, 28, 0, 0, 0, 123000, TimeZoneGMT)
-    assert(timestampAddInterval(ts3, 36, 123000, TimeZonePST.toZoneId) === ts4)
-    assert(timestampAddInterval(ts3, 36, 123000, TimeZoneGMT.toZoneId) === ts5)
+    assert(timestampAddInterval(ts3, 36, 0, 123000, TimeZonePST.toZoneId) === ts4)
+    assert(timestampAddInterval(ts3, 36, 0, 123000, TimeZoneGMT.toZoneId) === ts5)
+  }
+
+  test("timestamp add days") {
+    // 2019-3-9 is the end of Pacific Standard Time
+    val ts1 = date(2019, 3, 9, 12, 0, 0, 123000, TimeZonePST)
+    // 2019-3-10 is the start of Pacific Daylight Time
+    val ts2 = date(2019, 3, 10, 12, 0, 0, 123000, TimeZonePST)
+    val ts3 = date(2019, 5, 9, 12, 0, 0, 123000, TimeZonePST)
+    val ts4 = date(2019, 5, 10, 12, 0, 0, 123000, TimeZonePST)
+    // 2019-11-2 is the end of Pacific Daylight Time
+    val ts5 = date(2019, 11, 2, 12, 0, 0, 123000, TimeZonePST)
+    // 2019-11-3 is the start of Pacific Standard Time
+    val ts6 = date(2019, 11, 3, 12, 0, 0, 123000, TimeZonePST)
+
+    // transit from Pacific Standard Time to Pacific Daylight Time
+    assert(timestampAddInterval(
+      ts1, 0, 0, 23 * MICROS_PER_HOUR, TimeZonePST.toZoneId) === ts2)
+    assert(timestampAddInterval(ts1, 0, 1, 0, TimeZonePST.toZoneId) === ts2)
+    // just a normal day
+    assert(timestampAddInterval(
+      ts3, 0, 0, 24 * MICROS_PER_HOUR, TimeZonePST.toZoneId) === ts4)
+    assert(timestampAddInterval(ts3, 0, 1, 0, TimeZonePST.toZoneId) === ts4)
+    // transit from Pacific Daylight Time to Pacific Standard Time
+    assert(timestampAddInterval(
+      ts5, 0, 0, 25 * MICROS_PER_HOUR, TimeZonePST.toZoneId) === ts6)
+    assert(timestampAddInterval(ts5, 0, 1, 0, TimeZonePST.toZoneId) === ts6)
   }
 
   test("monthsBetween") {
     val date1 = date(1997, 2, 28, 10, 30, 0)
     var date2 = date(1996, 10, 30)
-    assert(monthsBetween(date1, date2, true, TimeZoneUTC) === 3.94959677)
-    assert(monthsBetween(date1, date2, false, TimeZoneUTC) === 3.9495967741935485)
+    assert(monthsBetween(date1, date2, true, ZoneOffset.UTC) === 3.94959677)
+    assert(monthsBetween(date1, date2, false, ZoneOffset.UTC) === 3.9495967741935485)
     Seq(true, false).foreach { roundOff =>
       date2 = date(2000, 2, 28)
-      assert(monthsBetween(date1, date2, roundOff, TimeZoneUTC) === -36)
+      assert(monthsBetween(date1, date2, roundOff, ZoneOffset.UTC) === -36)
       date2 = date(2000, 2, 29)
-      assert(monthsBetween(date1, date2, roundOff, TimeZoneUTC) === -36)
+      assert(monthsBetween(date1, date2, roundOff, ZoneOffset.UTC) === -36)
       date2 = date(1996, 3, 31)
-      assert(monthsBetween(date1, date2, roundOff, TimeZoneUTC) === 11)
+      assert(monthsBetween(date1, date2, roundOff, ZoneOffset.UTC) === 11)
     }
 
     val date3 = date(2000, 2, 28, 16, tz = TimeZonePST)
     val date4 = date(1997, 2, 28, 16, tz = TimeZonePST)
-    assert(monthsBetween(date3, date4, true, TimeZonePST) === 36.0)
-    assert(monthsBetween(date3, date4, true, TimeZoneGMT) === 35.90322581)
-    assert(monthsBetween(date3, date4, false, TimeZoneGMT) === 35.903225806451616)
+    assert(monthsBetween(date3, date4, true, TimeZonePST.toZoneId) === 36.0)
+    assert(monthsBetween(date3, date4, true, ZoneOffset.UTC) === 35.90322581)
+    assert(monthsBetween(date3, date4, false, ZoneOffset.UTC) === 35.903225806451616)
   }
 
   test("from UTC timestamp") {
@@ -448,16 +488,21 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     }
   }
 
+  test("trailing characters while converting string to timestamp") {
+    val s = UTF8String.fromString("2019-10-31T10:59:23Z:::")
+    val time = DateTimeUtils.stringToTimestamp(s, defaultZoneId)
+    assert(time == None)
+  }
+
   test("truncTimestamp") {
     def testTrunc(
         level: Int,
         expected: String,
         inputTS: SQLTimestamp,
-        timezone: TimeZone = DateTimeUtils.defaultTimeZone()): Unit = {
+        zoneId: ZoneId = defaultZoneId): Unit = {
       val truncated =
-        DateTimeUtils.truncTimestamp(inputTS, level, timezone)
-      val expectedTS =
-        DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected), defaultZoneId)
+        DateTimeUtils.truncTimestamp(inputTS, level, zoneId)
+      val expectedTS = toTimestamp(expected, defaultZoneId)
       assert(truncated === expectedTS.get)
     }
 
@@ -494,6 +539,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 
     for (tz <- ALL_TIMEZONES) {
       withDefaultTimeZone(tz) {
+        val zid = tz.toZoneId
         val inputTS = DateTimeUtils.stringToTimestamp(
           UTF8String.fromString("2015-03-05T09:32:05.359"), defaultZoneId)
         val inputTS1 = DateTimeUtils.stringToTimestamp(
@@ -507,53 +553,53 @@ class DateTimeUtilsSuite extends SparkFunSuite {
         val inputTS5 = DateTimeUtils.stringToTimestamp(
           UTF8String.fromString("1999-03-29T01:02:03.456789"), defaultZoneId)
 
-        testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", inputTS.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", inputTS.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", inputTS.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", inputTS.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", inputTS.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS1.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS2.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS3.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", inputTS4.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS1.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", inputTS2.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_DECADE, "1990-01-01", inputTS5.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_CENTURY, "1901-01-01", inputTS5.get, tz)
-        testTrunc(DateTimeUtils.TRUNC_TO_MILLENNIUM, "2001-01-01", inputTS.get, tz)
+        testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS1.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS2.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS3.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", inputTS4.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS1.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", inputTS2.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_DECADE, "1990-01-01", inputTS5.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_CENTURY, "1901-01-01", inputTS5.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_MILLENNIUM, "2001-01-01", inputTS.get, zid)
       }
     }
   }
 
   test("daysToMillis and millisToDays") {
     val input = TimeUnit.MICROSECONDS.toMillis(date(2015, 12, 31, 16, tz = TimeZonePST))
-    assert(millisToDays(input, TimeZonePST) === 16800)
-    assert(millisToDays(input, TimeZoneGMT) === 16801)
-    assert(millisToDays(-1 * MILLIS_PER_DAY + 1, TimeZoneGMT) == -1)
+    assert(millisToDays(input, TimeZonePST.toZoneId) === 16800)
+    assert(millisToDays(input, ZoneOffset.UTC) === 16801)
+    assert(millisToDays(-1 * MILLIS_PER_DAY + 1, ZoneOffset.UTC) == -1)
 
     var expected = TimeUnit.MICROSECONDS.toMillis(date(2015, 12, 31, tz = TimeZonePST))
-    assert(daysToMillis(16800, TimeZonePST) === expected)
+    assert(daysToMillis(16800, TimeZonePST.toZoneId) === expected)
 
     expected = TimeUnit.MICROSECONDS.toMillis(date(2015, 12, 31, tz = TimeZoneGMT))
-    assert(daysToMillis(16800, TimeZoneGMT) === expected)
+    assert(daysToMillis(16800, ZoneOffset.UTC) === expected)
 
     // There are some days are skipped entirely in some timezone, skip them here.
     val skipped_days = Map[String, Set[Int]](
-      "Kwajalein" -> Set(8632, 8633),
+      "Kwajalein" -> Set(8632, 8633, 8634),
       "Pacific/Apia" -> Set(15338),
       "Pacific/Enderbury" -> Set(9130, 9131),
       "Pacific/Fakaofo" -> Set(15338),
       "Pacific/Kiritimati" -> Set(9130, 9131),
-      "Pacific/Kwajalein" -> Set(8632, 8633),
+      "Pacific/Kwajalein" -> Set(8632, 8633, 8634),
       "MIT" -> Set(15338))
     for (tz <- ALL_TIMEZONES) {
       val skipped = skipped_days.getOrElse(tz.getID, Set.empty)
       (-20000 to 20000).foreach { d =>
         if (!skipped.contains(d)) {
-          assert(millisToDays(daysToMillis(d, tz), tz) === d,
+          assert(millisToDays(daysToMillis(d, tz.toZoneId), tz.toZoneId) === d,
             s"Round trip of ${d} did not work in tz ${tz}")
         }
       }
@@ -564,4 +610,36 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     assert(DateTimeUtils.toMillis(-9223372036844776001L) === -9223372036844777L)
     assert(DateTimeUtils.toMillis(-157700927876544L) === -157700927877L)
   }
+
+  test("special timestamp values") {
+    testSpecialDatetimeValues { zoneId =>
+      val tolerance = TimeUnit.SECONDS.toMicros(30)
+
+      assert(toTimestamp("Epoch", zoneId).get === 0)
+      val now = instantToMicros(Instant.now())
+      toTimestamp("NOW", zoneId).get should be(now +- tolerance)
+      assert(toTimestamp("now UTC", zoneId) === None)
+      val localToday = LocalDateTime.now(zoneId)
+        .`with`(LocalTime.MIDNIGHT)
+        .atZone(zoneId)
+      val yesterday = instantToMicros(localToday.minusDays(1).toInstant)
+      toTimestamp(" Yesterday", zoneId).get should be(yesterday +- tolerance)
+      val today = instantToMicros(localToday.toInstant)
+      toTimestamp("Today ", zoneId).get should be(today +- tolerance)
+      val tomorrow = instantToMicros(localToday.plusDays(1).toInstant)
+      toTimestamp(" tomorrow CET ", zoneId).get should be(tomorrow +- tolerance)
+    }
+  }
+
+  test("special date values") {
+    testSpecialDatetimeValues { zoneId =>
+      assert(toDate("epoch", zoneId).get === 0)
+      val today = localDateToDays(LocalDate.now(zoneId))
+      assert(toDate("YESTERDAY", zoneId).get === today - 1)
+      assert(toDate(" Now ", zoneId).get === today)
+      assert(toDate("now UTC", zoneId) === None) // "now" does not accept time zones
+      assert(toDate("today", zoneId).get === today)
+      assert(toDate("tomorrow CET ", zoneId).get === today + 1)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GenericArrayDataBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GenericArrayDataBenchmark.scala
new file mode 100644
index 0000000000000..3ad045f29c07d
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GenericArrayDataBenchmark.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+
+/**
+ * Benchmark for [[GenericArrayData]].
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
+ *   2. build/sbt "catalyst/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain <this class>"
+ *      Results will be written to "benchmarks/GenericArrayDataBenchmark-results.txt".
+ * }}}
+ */
+object GenericArrayDataBenchmark extends BenchmarkBase {
+
+  // Benchmarks of GenericArrayData's constructors (see SPARK-30413):
+  def constructorBenchmark(): Unit = {
+    val valuesPerIteration: Long = 1000 * 1000 * 10
+    val arraySize = 10
+    val benchmark = new Benchmark("constructor", valuesPerIteration, output = output)
+
+    benchmark.addCase("arrayOfAny") { _ =>
+      val arr: Array[Any] = new Array[Any](arraySize)
+      var n = 0
+      while (n < valuesPerIteration) {
+        new GenericArrayData(arr)
+        n += 1
+      }
+    }
+
+    benchmark.addCase("arrayOfAnyAsObject") { _ =>
+      val arr: Object = new Array[Any](arraySize)
+      var n = 0
+      while (n < valuesPerIteration) {
+        new GenericArrayData(arr)
+        n += 1
+      }
+    }
+
+    benchmark.addCase("arrayOfAnyAsSeq") { _ =>
+      val arr: Seq[Any] = new Array[Any](arraySize)
+      var n = 0
+      while (n < valuesPerIteration) {
+        new GenericArrayData(arr)
+        n += 1
+      }
+    }
+
+    benchmark.addCase("arrayOfInt") { _ =>
+      val arr: Array[Int] = new Array[Int](arraySize)
+      var n = 0
+      while (n < valuesPerIteration) {
+        new GenericArrayData(arr)
+        n += 1
+      }
+    }
+
+    benchmark.addCase("arrayOfIntAsObject") { _ =>
+      val arr: Object = new Array[Int](arraySize)
+      var n = 0
+      while (n < valuesPerIteration) {
+        new GenericArrayData(arr)
+        n += 1
+      }
+    }
+
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    constructorBenchmark()
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
new file mode 100644
index 0000000000000..514804cbda16c
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
@@ -0,0 +1,394 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
+import org.apache.spark.sql.catalyst.util.IntervalUtils._
+import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+
+class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
+
+  private def checkFromString(input: String, expected: CalendarInterval): Unit = {
+    assert(stringToInterval(UTF8String.fromString(input)) === expected)
+    assert(safeStringToInterval(UTF8String.fromString(input)) === expected)
+  }
+
+  private def checkFromStringWithFunc(
+      input: String,
+      months: Int,
+      days: Int,
+      us: Long,
+      func: CalendarInterval => CalendarInterval): Unit = {
+    val expected = new CalendarInterval(months, days, us)
+    assert(func(stringToInterval(UTF8String.fromString(input))) === expected)
+    assert(func(safeStringToInterval(UTF8String.fromString(input))) === expected)
+  }
+
+  private def checkFromInvalidString(input: String, errorMsg: String): Unit = {
+    try {
+      stringToInterval(UTF8String.fromString(input))
+      fail("Expected to throw an exception for the invalid input")
+    } catch {
+      case e: IllegalArgumentException =>
+        val msg = e.getMessage
+        assert(msg.contains(errorMsg))
+    }
+    assert(safeStringToInterval(UTF8String.fromString(input)) === null)
+  }
+
+  private def testSingleUnit(
+    unit: String, number: Int, months: Int, days: Int, microseconds: Long): Unit = {
+    for (prefix <- Seq("interval ", "")) {
+      val input1 = prefix + number + " " + unit
+      val input2 = prefix + number + " " + unit + "s"
+      val result = new CalendarInterval(months, days, microseconds)
+      checkFromString(input1, result)
+      checkFromString(input2, result)
+    }
+  }
+
+  test("string to interval: basic") {
+    testSingleUnit("YEAR", 3, 36, 0, 0)
+    testSingleUnit("Month", 3, 3, 0, 0)
+    testSingleUnit("Week", 3, 0, 21, 0)
+    testSingleUnit("DAY", 3, 0, 3, 0)
+    testSingleUnit("HouR", 3, 0, 0, 3 * MICROS_PER_HOUR)
+    testSingleUnit("MiNuTe", 3, 0, 0, 3 * MICROS_PER_MINUTE)
+    testSingleUnit("Second", 3, 0, 0, 3 * MICROS_PER_SECOND)
+    testSingleUnit("MilliSecond", 3, 0, 0, 3 * MICROS_PER_MILLIS)
+    testSingleUnit("MicroSecond", 3, 0, 0, 3)
+
+    checkFromInvalidString(null, "cannot be null")
+
+    for (input <- Seq("", "interval", "foo", "foo 1 day")) {
+      checkFromInvalidString(input, "Error parsing")
+    }
+  }
+
+
+  test("string to interval: multiple units") {
+    Seq(
+      "-1 MONTH 1 day -1 microseconds" -> new CalendarInterval(-1, 1, -1),
+      " 123 MONTHS        123 DAYS  123 Microsecond    " -> new CalendarInterval(123, 123, 123),
+      "interval -1 day +3 Microseconds" -> new CalendarInterval(0, -1, 3),
+      "interval -   1 day +     3 Microseconds" -> new CalendarInterval(0, -1, 3),
+      "  interval  8  years -11 months 123  weeks   -1 day " +
+        "23 hours -22 minutes 1 second  -123  millisecond    567 microseconds " ->
+        new CalendarInterval(85, 860, 81480877567L)).foreach { case (input, expected) =>
+      checkFromString(input, expected)
+    }
+  }
+
+  test("string to interval: special cases") {
+    // Support any order of interval units
+    checkFromString("1 day 1 year", new CalendarInterval(12, 1, 0))
+    // Allow duplicated units and summarize their values
+    checkFromString("1 day 10 day", new CalendarInterval(0, 11, 0))
+    // Only the seconds units can have the fractional part
+    checkFromInvalidString("1.5 days", "'days' cannot have fractional part")
+    checkFromInvalidString("1. hour", "'hour' cannot have fractional part")
+    checkFromInvalidString("1 hourX", "invalid unit 'hourx'")
+    checkFromInvalidString("~1 hour", "unrecognized number '~1'")
+    checkFromInvalidString("1 Mour", "invalid unit 'mour'")
+    checkFromInvalidString("1 aour", "invalid unit 'aour'")
+    checkFromInvalidString("1a1 hour", "invalid value '1a1'")
+    checkFromInvalidString("1.1a1 seconds", "invalid value '1.1a1'")
+    checkFromInvalidString("2234567890 days", "integer overflow")
+    checkFromInvalidString(". seconds", "invalid value '.'")
+  }
+
+  test("string to interval: whitespaces") {
+    checkFromInvalidString(" ", "Error parsing ' ' to interval")
+    checkFromInvalidString("\n", "Error parsing '\n' to interval")
+    checkFromInvalidString("\t", "Error parsing '\t' to interval")
+    checkFromString("1 \t day \n 2 \r hour", new CalendarInterval(0, 1, 2 * MICROS_PER_HOUR))
+    checkFromInvalidString("interval1 \t day \n 2 \r hour", "invalid interval prefix interval1")
+    checkFromString("interval\r1\tday", new CalendarInterval(0, 1, 0))
+  }
+
+  test("string to interval: seconds with fractional part") {
+    checkFromString("0.1 seconds", new CalendarInterval(0, 0, 100000))
+    checkFromString("1. seconds", new CalendarInterval(0, 0, 1000000))
+    checkFromString("123.001 seconds", new CalendarInterval(0, 0, 123001000))
+    checkFromString("1.001001 seconds", new CalendarInterval(0, 0, 1001001))
+    checkFromString("1 minute 1.001001 seconds", new CalendarInterval(0, 0, 61001001))
+    checkFromString("-1.5 seconds", new CalendarInterval(0, 0, -1500000))
+    // truncate nanoseconds to microseconds
+    checkFromString("0.999999999 seconds", new CalendarInterval(0, 0, 999999))
+    checkFromString(".999999999 seconds", new CalendarInterval(0, 0, 999999))
+    checkFromInvalidString("0.123456789123 seconds", "'0.123456789123' is out of range")
+  }
+
+  test("from year-month string") {
+    assert(fromYearMonthString("99-10") === new CalendarInterval(99 * 12 + 10, 0, 0L))
+    assert(fromYearMonthString("+99-10") === new CalendarInterval(99 * 12 + 10, 0, 0L))
+    assert(fromYearMonthString("-8-10") === new CalendarInterval(-8 * 12 - 10, 0, 0L))
+
+    try {
+      fromYearMonthString("99-15")
+      fail("Expected to throw an exception for the invalid input")
+    } catch {
+      case e: IllegalArgumentException =>
+        assert(e.getMessage.contains("month 15 outside range"))
+    }
+
+    try {
+      fromYearMonthString("9a9-15")
+      fail("Expected to throw an exception for the invalid input")
+    } catch {
+      case e: IllegalArgumentException =>
+        assert(e.getMessage.contains("Interval string does not match year-month format"))
+    }
+  }
+
+  test("from day-time string - legacy") {
+    withSQLConf(SQLConf.LEGACY_FROM_DAYTIME_STRING.key -> "true") {
+      assert(fromDayTimeString("5 12:40:30.999999999") ===
+        new CalendarInterval(
+          0,
+          5,
+          12 * MICROS_PER_HOUR +
+            40 * MICROS_PER_MINUTE +
+            30 * MICROS_PER_SECOND + 999999L))
+      assert(fromDayTimeString("10 0:12:0.888") ===
+        new CalendarInterval(
+          0,
+          10,
+          12 * MICROS_PER_MINUTE + 888 * MICROS_PER_MILLIS))
+      assert(fromDayTimeString("-3 0:0:0") === new CalendarInterval(0, -3, 0L))
+
+      try {
+        fromDayTimeString("5 30:12:20")
+        fail("Expected to throw an exception for the invalid input")
+      } catch {
+        case e: IllegalArgumentException =>
+          assert(e.getMessage.contains("hour 30 outside range"))
+      }
+
+      try {
+        fromDayTimeString("5 30-12")
+        fail("Expected to throw an exception for the invalid input")
+      } catch {
+        case e: IllegalArgumentException =>
+          assert(e.getMessage.contains("must match day-time format"))
+      }
+
+      try {
+        fromDayTimeString("5 1:12:20", HOUR, MICROSECOND)
+        fail("Expected to throw an exception for the invalid convention type")
+      } catch {
+        case e: IllegalArgumentException =>
+          assert(e.getMessage.contains("Cannot support (interval"))
+      }
+    }
+  }
+
+  test("interval duration") {
+    def duration(s: String, unit: TimeUnit, daysPerMonth: Int): Long = {
+      IntervalUtils.getDuration(stringToInterval(UTF8String.fromString(s)), unit, daysPerMonth)
+    }
+
+    assert(duration("0 seconds", TimeUnit.MILLISECONDS, 31) === 0)
+    assert(duration("1 month", TimeUnit.DAYS, 31) === 31)
+    assert(duration("1 microsecond", TimeUnit.MICROSECONDS, 30) === 1)
+    assert(duration("1 month -30 days", TimeUnit.DAYS, 31) === 1)
+
+    try {
+      duration(Integer.MAX_VALUE + " month", TimeUnit.SECONDS, 31)
+      fail("Expected to throw an exception for the invalid input")
+    } catch {
+      case e: ArithmeticException =>
+        assert(e.getMessage.contains("overflow"))
+    }
+  }
+
+  test("negative interval") {
+    def isNegative(s: String, daysPerMonth: Int): Boolean = {
+      IntervalUtils.isNegative(stringToInterval(UTF8String.fromString(s)), daysPerMonth)
+    }
+
+    assert(isNegative("-1 months", 28))
+    assert(isNegative("-1 microsecond", 30))
+    assert(isNegative("-1 month 30 days", 31))
+    assert(isNegative("2 months -61 days", 30))
+    assert(isNegative("-1 year -2 seconds", 30))
+    assert(!isNegative("0 months", 28))
+    assert(!isNegative("1 year -360 days", 31))
+    assert(!isNegative("-1 year 380 days", 31))
+  }
+
+  test("negate") {
+    assert(negateExact(new CalendarInterval(1, 2, 3)) === new CalendarInterval(-1, -2, -3))
+    assert(negate(new CalendarInterval(1, 2, 3)) === new CalendarInterval(-1, -2, -3))
+  }
+
+  test("subtract one interval by another") {
+    val input1 = new CalendarInterval(3, 1, 1 * MICROS_PER_HOUR)
+    val input2 = new CalendarInterval(2, 4, 100 * MICROS_PER_HOUR)
+    val input3 = new CalendarInterval(-10, -30, -81 * MICROS_PER_HOUR)
+    val input4 = new CalendarInterval(75, 150, 200 * MICROS_PER_HOUR)
+    Seq[(CalendarInterval, CalendarInterval) => CalendarInterval](subtractExact, subtract)
+      .foreach { func =>
+        assert(new CalendarInterval(1, -3, -99 * MICROS_PER_HOUR) === func(input1, input2))
+        assert(new CalendarInterval(-85, -180, -281 * MICROS_PER_HOUR) === func(input3, input4))
+      }
+  }
+
+  test("add two intervals") {
+    val input1 = new CalendarInterval(3, 1, 1 * MICROS_PER_HOUR)
+    val input2 = new CalendarInterval(2, 4, 100 * MICROS_PER_HOUR)
+    val input3 = new CalendarInterval(-10, -30, -81 * MICROS_PER_HOUR)
+    val input4 = new CalendarInterval(75, 150, 200 * MICROS_PER_HOUR)
+    Seq[(CalendarInterval, CalendarInterval) => CalendarInterval](addExact, add).foreach { func =>
+      assert(new CalendarInterval(5, 5, 101 * MICROS_PER_HOUR) === func(input1, input2))
+      assert(new CalendarInterval(65, 120, 119 * MICROS_PER_HOUR) === func(input3, input4))
+    }
+  }
+
+  test("multiply by num") {
+    var interval = new CalendarInterval(0, 0, 0)
+    assert(interval === multiplyExact(interval, 0))
+    interval = new CalendarInterval(123, 456, 789)
+    assert(new CalendarInterval(123 * 42, 456 * 42, 789 * 42) === multiplyExact(interval, 42))
+    interval = new CalendarInterval(-123, -456, -789)
+    assert(new CalendarInterval(-123 * 42, -456 * 42, -789 * 42) === multiplyExact(interval, 42))
+    assert(new CalendarInterval(1, 22, 12 * MICROS_PER_HOUR) ===
+      multiplyExact(new CalendarInterval(1, 5, 0), 1.5))
+    assert(new CalendarInterval(2, 14, 12 * MICROS_PER_HOUR) ===
+      multiplyExact(new CalendarInterval(2, 2, 2 * MICROS_PER_HOUR), 1.2))
+
+    try {
+      multiplyExact(new CalendarInterval(2, 0, 0), Integer.MAX_VALUE)
+      fail("Expected to throw an exception on months overflow")
+    } catch {
+      case e: ArithmeticException => assert(e.getMessage.contains("overflow"))
+    }
+  }
+
+  test("divide by num") {
+    var interval = new CalendarInterval(0, 0, 0)
+    assert(interval === divideExact(interval, 10))
+    interval = new CalendarInterval(1, 3, 30 * MICROS_PER_SECOND)
+    assert(new CalendarInterval(0, 16, 12 * MICROS_PER_HOUR + 15 * MICROS_PER_SECOND) ===
+      divideExact(interval, 2))
+    assert(new CalendarInterval(2, 6, MICROS_PER_MINUTE) === divideExact(interval, 0.5))
+    interval = new CalendarInterval(-1, 0, -30 * MICROS_PER_SECOND)
+    assert(new CalendarInterval(0, -15, -15 * MICROS_PER_SECOND) === divideExact(interval, 2))
+    assert(new CalendarInterval(-2, 0, -1 * MICROS_PER_MINUTE) === divideExact(interval, 0.5))
+    try {
+      divideExact(new CalendarInterval(123, 456, 789), 0)
+      fail("Expected to throw an exception on divide by zero")
+    } catch {
+      case e: ArithmeticException => assert(e.getMessage.contains("divide by zero"))
+    }
+  }
+
+  test("from day-time string") {
+    def check(input: String, from: IntervalUnit, to: IntervalUnit, expected: String): Unit = {
+      withClue(s"from = $from, to = $to") {
+        val expectedUtf8 = UTF8String.fromString(expected)
+        assert(fromDayTimeString(input, from, to) === safeStringToInterval(expectedUtf8))
+      }
+    }
+    def checkFail(
+        input: String,
+        from: IntervalUnit,
+        to: IntervalUnit,
+        errMsg: String): Unit = {
+      try {
+        fromDayTimeString(input, from, to)
+        fail("Expected to throw an exception for the invalid input")
+      } catch {
+        case e: IllegalArgumentException =>
+          assert(e.getMessage.contains(errMsg))
+      }
+    }
+
+    check("12:40", HOUR, MINUTE, "12 hours 40 minutes")
+    check("+12:40", HOUR, MINUTE, "12 hours 40 minutes")
+    check("-12:40", HOUR, MINUTE, "-12 hours -40 minutes")
+    checkFail("5 12:40", HOUR, MINUTE, "must match day-time format")
+
+    check("12:40:30.999999999", HOUR, SECOND, "12 hours 40 minutes 30.999999 seconds")
+    check("+12:40:30.123456789", HOUR, SECOND, "12 hours 40 minutes 30.123456 seconds")
+    check("-12:40:30.123456789", HOUR, SECOND, "-12 hours -40 minutes -30.123456 seconds")
+    checkFail("5 12:40:30", HOUR, SECOND, "must match day-time format")
+    checkFail("12:40:30.0123456789", HOUR, SECOND, "must match day-time format")
+
+    check("40:30.123456789", MINUTE, SECOND, "40 minutes 30.123456 seconds")
+    check("+40:30.123456789", MINUTE, SECOND, "40 minutes 30.123456 seconds")
+    check("-40:30.123456789", MINUTE, SECOND, "-40 minutes -30.123456 seconds")
+    checkFail("12:40:30", MINUTE, SECOND, "must match day-time format")
+
+    check("5 12", DAY, HOUR, "5 days 12 hours")
+    check("+5 12", DAY, HOUR, "5 days 12 hours")
+    check("-5 12", DAY, HOUR, "-5 days -12 hours")
+    checkFail("5 12:30", DAY, HOUR, "must match day-time format")
+
+    check("5 12:40", DAY, MINUTE, "5 days 12 hours 40 minutes")
+    check("+5 12:40", DAY, MINUTE, "5 days 12 hours 40 minutes")
+    check("-5 12:40", DAY, MINUTE, "-5 days -12 hours -40 minutes")
+    checkFail("5 12", DAY, MINUTE, "must match day-time format")
+
+    check("5 12:40:30.123", DAY, SECOND, "5 days 12 hours 40 minutes 30.123 seconds")
+    check("+5 12:40:30.123456", DAY, SECOND, "5 days 12 hours 40 minutes 30.123456 seconds")
+    check("-5 12:40:30.123456789", DAY, SECOND, "-5 days -12 hours -40 minutes -30.123456 seconds")
+    checkFail("5 12", DAY, SECOND, "must match day-time format")
+
+    checkFail("5 30:12:20", DAY, SECOND, "hour 30 outside range")
+    checkFail("5 30-12", DAY, SECOND, "must match day-time format")
+    checkFail("5 1:12:20", HOUR, MICROSECOND, "Cannot support (interval")
+  }
+
+  test("interval overflow check") {
+    val maxMonth = new CalendarInterval(Int.MaxValue, 0, 0)
+    val minMonth = new CalendarInterval(Int.MinValue, 0, 0)
+    val oneMonth = new CalendarInterval(1, 0, 0)
+    val maxDay = new CalendarInterval(0, Int.MaxValue, 0)
+    val minDay = new CalendarInterval(0, Int.MinValue, 0)
+    val oneDay = new CalendarInterval(0, 1, 0)
+    val maxMicros = new CalendarInterval(0, 0, Long.MaxValue)
+    val minMicros = new CalendarInterval(0, 0, Long.MinValue)
+    val oneMicros = new CalendarInterval(0, 0, 1)
+    intercept[ArithmeticException](negateExact(minMonth))
+    assert(negate(minMonth) === minMonth)
+
+    intercept[ArithmeticException](addExact(maxMonth, oneMonth))
+    intercept[ArithmeticException](addExact(maxDay, oneDay))
+    intercept[ArithmeticException](addExact(maxMicros, oneMicros))
+    assert(add(maxMonth, oneMonth) === minMonth)
+    assert(add(maxDay, oneDay) === minDay)
+    assert(add(maxMicros, oneMicros) === minMicros)
+
+    intercept[ArithmeticException](subtractExact(minDay, oneDay))
+    intercept[ArithmeticException](subtractExact(minMonth, oneMonth))
+    intercept[ArithmeticException](subtractExact(minMicros, oneMicros))
+    assert(subtract(minMonth, oneMonth) === maxMonth)
+    assert(subtract(minDay, oneDay) === maxDay)
+    assert(subtract(minMicros, oneMicros) === maxMicros)
+
+    intercept[ArithmeticException](multiplyExact(maxMonth, 2))
+    intercept[ArithmeticException](divideExact(maxDay, 0.5))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala
index 650813975d75c..e53d0bbccc614 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/QuantileSummariesSuite.scala
@@ -169,5 +169,22 @@ class QuantileSummariesSuite extends SparkFunSuite {
       checkQuantile(0.1, data, s)
       checkQuantile(0.001, data, s)
     }
+
+    // length of data21 is 4 * length of data22
+    val data21 = data.zipWithIndex.filter(_._2 % 5 != 0).map(_._1).toSeq
+    val data22 = data.zipWithIndex.filter(_._2 % 5 == 0).map(_._1).toSeq
+
+    test(
+      s"Merging unbalanced interleaved lists with epsi=$epsi and seq=$seq_name, " +
+        s"compression=$compression") {
+      val s1 = buildSummary(data21, epsi, compression)
+      val s2 = buildSummary(data22, epsi, compression)
+      val s = s1.merge(s2)
+      // Check all quantiles
+      for (queryRank <- 1 to n) {
+        val queryQuantile = queryRank.toDouble / n.toDouble
+        checkQuantile(queryQuantile, data, s)
+      }
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
index 63d3831404d47..67bc4bc81cb92 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
@@ -23,13 +23,34 @@ import org.apache.spark.sql.catalyst.util.StringUtils._
 class StringUtilsSuite extends SparkFunSuite {
 
   test("escapeLikeRegex") {
-    assert(escapeLikeRegex("abdef") === "(?s)\\Qa\\E\\Qb\\E\\Qd\\E\\Qe\\E\\Qf\\E")
-    assert(escapeLikeRegex("a\\__b") === "(?s)\\Qa\\E\\Q_\\E.\\Qb\\E")
-    assert(escapeLikeRegex("a_%b") === "(?s)\\Qa\\E..*\\Qb\\E")
-    assert(escapeLikeRegex("a%\\%b") === "(?s)\\Qa\\E.*\\Q%\\E\\Qb\\E")
-    assert(escapeLikeRegex("a%") === "(?s)\\Qa\\E.*")
-    assert(escapeLikeRegex("**") === "(?s)\\Q*\\E\\Q*\\E")
-    assert(escapeLikeRegex("a_b") === "(?s)\\Qa\\E.\\Qb\\E")
+    val expectedEscapedStrOne = "(?s)\\Qa\\E\\Qb\\E\\Qd\\E\\Qe\\E\\Qf\\E"
+    val expectedEscapedStrTwo = "(?s)\\Qa\\E\\Q_\\E.\\Qb\\E"
+    val expectedEscapedStrThree = "(?s)\\Qa\\E..*\\Qb\\E"
+    val expectedEscapedStrFour = "(?s)\\Qa\\E.*\\Q%\\E\\Qb\\E"
+    val expectedEscapedStrFive = "(?s)\\Qa\\E.*"
+    val expectedEscapedStrSix = "(?s)\\Q*\\E\\Q*\\E"
+    val expectedEscapedStrSeven = "(?s)\\Qa\\E.\\Qb\\E"
+    assert(escapeLikeRegex("abdef", '\\') === expectedEscapedStrOne)
+    assert(escapeLikeRegex("abdef", '/') === expectedEscapedStrOne)
+    assert(escapeLikeRegex("abdef", '\"') === expectedEscapedStrOne)
+    assert(escapeLikeRegex("a\\__b", '\\') === expectedEscapedStrTwo)
+    assert(escapeLikeRegex("a/__b", '/') === expectedEscapedStrTwo)
+    assert(escapeLikeRegex("a\"__b", '\"') === expectedEscapedStrTwo)
+    assert(escapeLikeRegex("a_%b", '\\') === expectedEscapedStrThree)
+    assert(escapeLikeRegex("a_%b", '/') === expectedEscapedStrThree)
+    assert(escapeLikeRegex("a_%b", '\"') === expectedEscapedStrThree)
+    assert(escapeLikeRegex("a%\\%b", '\\') === expectedEscapedStrFour)
+    assert(escapeLikeRegex("a%/%b", '/') === expectedEscapedStrFour)
+    assert(escapeLikeRegex("a%\"%b", '\"') === expectedEscapedStrFour)
+    assert(escapeLikeRegex("a%", '\\') === expectedEscapedStrFive)
+    assert(escapeLikeRegex("a%", '/') === expectedEscapedStrFive)
+    assert(escapeLikeRegex("a%", '\"') === expectedEscapedStrFive)
+    assert(escapeLikeRegex("**", '\\') === expectedEscapedStrSix)
+    assert(escapeLikeRegex("**", '/') === expectedEscapedStrSix)
+    assert(escapeLikeRegex("**", '\"') === expectedEscapedStrSix)
+    assert(escapeLikeRegex("a_b", '\\') === expectedEscapedStrSeven)
+    assert(escapeLikeRegex("a_b", '/') === expectedEscapedStrSeven)
+    assert(escapeLikeRegex("a_b", '\"') === expectedEscapedStrSeven)
   }
 
   test("filter pattern") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
index 0b9e023b0b450..e7b1c0810a033 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.ZoneId
+import java.time.{ZoneId, ZoneOffset}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
@@ -38,8 +38,8 @@ class UnsafeArraySuite extends SparkFunSuite {
   val doubleArray = Array(1.1, 2.2, 3.3)
   val stringArray = Array("1", "10", "100")
   val dateArray = Array(
-    DateTimeUtils.stringToDate(UTF8String.fromString("1970-1-1")).get,
-    DateTimeUtils.stringToDate(UTF8String.fromString("2016-7-26")).get)
+    DateTimeUtils.stringToDate(UTF8String.fromString("1970-1-1"), ZoneOffset.UTC).get,
+    DateTimeUtils.stringToDate(UTF8String.fromString("2016-7-26"), ZoneOffset.UTC).get)
   private def defaultZoneId = ZoneId.systemDefault()
   val timestampArray = Array(
     DateTimeUtils.stringToTimestamp(
@@ -53,7 +53,8 @@ class UnsafeArraySuite extends SparkFunSuite {
     BigDecimal("1.2345678901234567890123456").setScale(21, BigDecimal.RoundingMode.FLOOR),
     BigDecimal("2.3456789012345678901234567").setScale(21, BigDecimal.RoundingMode.FLOOR))
 
-  val calenderintervalArray = Array(new CalendarInterval(3, 321), new CalendarInterval(1, 123))
+  val calenderintervalArray = Array(
+    new CalendarInterval(3, 2, 321), new CalendarInterval(1, 2, 123))
 
   val intMultiDimArray = Array(Array(1), Array(2, 20), Array(3, 30, 300))
   val doubleMultiDimArray = Array(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index 0dea1e3a68dc8..0187ae31e2d1c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -22,12 +22,14 @@ import java.util
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalog.v2.expressions.{IdentityTransform, Transform}
+import org.scalatest.Assertions._
+
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.expressions.{IdentityTransform, NamedReference, Transform}
+import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.connector.write._
 import org.apache.spark.sql.sources.{And, EqualTo, Filter, IsNotNull}
-import org.apache.spark.sql.sources.v2.{SupportsDelete, SupportsRead, SupportsWrite, Table, TableCapability}
-import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, PartitionReader, PartitionReaderFactory, Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -41,8 +43,11 @@ class InMemoryTable(
     override val properties: util.Map[String, String])
   extends Table with SupportsRead with SupportsWrite with SupportsDelete {
 
+  private val allowUnsupportedTransforms =
+    properties.getOrDefault("allow-unsupported-transforms", "false").toBoolean
+
   partitioning.foreach { t =>
-    if (!t.isInstanceOf[IdentityTransform]) {
+    if (!t.isInstanceOf[IdentityTransform] && !allowUnsupportedTransforms) {
       throw new IllegalArgumentException(s"Transform $t must be IdentityTransform")
     }
   }
@@ -54,10 +59,30 @@ class InMemoryTable(
 
   def rows: Seq[InternalRow] = dataMap.values.flatMap(_.rows).toSeq
 
-  private val partFieldNames = partitioning.flatMap(_.references).toSeq.flatMap(_.fieldNames)
-  private val partIndexes = partFieldNames.map(schema.fieldIndex)
+  private val partCols: Array[Array[String]] = partitioning.flatMap(_.references).map { ref =>
+    schema.findNestedField(ref.fieldNames(), includeCollections = false) match {
+      case Some(_) => ref.fieldNames()
+      case None => throw new IllegalArgumentException(s"${ref.describe()} does not exist.")
+    }
+  }
 
-  private def getKey(row: InternalRow): Seq[Any] = partIndexes.map(row.toSeq(schema)(_))
+  private def getKey(row: InternalRow): Seq[Any] = {
+    def extractor(fieldNames: Array[String], schema: StructType, row: InternalRow): Any = {
+      val index = schema.fieldIndex(fieldNames(0))
+      val value = row.toSeq(schema).apply(index)
+      if (fieldNames.length > 1) {
+        (value, schema(index).dataType) match {
+          case (row: InternalRow, nestedSchema: StructType) =>
+            extractor(fieldNames.drop(1), nestedSchema, row)
+          case (_, dataType) =>
+            throw new IllegalArgumentException(s"Unsupported type, ${dataType.simpleString}")
+        }
+      } else {
+        value
+      }
+    }
+    partCols.map(fieldNames => extractor(fieldNames, schema, row))
+  }
 
   def withData(data: Array[BufferedRows]): InMemoryTable = dataMap.synchronized {
     data.foreach(_.rows.foreach { row =>
@@ -90,8 +115,8 @@ class InMemoryTable(
     override def createReaderFactory(): PartitionReaderFactory = BufferedRowsReaderFactory
   }
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
-    InMemoryTable.maybeSimulateFailedTableWrite(options)
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    InMemoryTable.maybeSimulateFailedTableWrite(info.options)
 
     new WriteBuilder with SupportsTruncate with SupportsOverwrite with SupportsDynamicOverwrite {
       private var writer: BatchWrite = Append
@@ -119,7 +144,7 @@ class InMemoryTable(
   }
 
   private abstract class TestBatchWrite extends BatchWrite {
-    override def createBatchWriterFactory(): DataWriterFactory = {
+    override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = {
       BufferedRowsWriterFactory
     }
 
@@ -141,8 +166,10 @@ class InMemoryTable(
   }
 
   private class Overwrite(filters: Array[Filter]) extends TestBatchWrite {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
     override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
-      val deleteKeys = InMemoryTable.filtersToKeys(dataMap.keys, partFieldNames, filters)
+      val deleteKeys = InMemoryTable.filtersToKeys(
+        dataMap.keys, partCols.map(_.toSeq.quoted), filters)
       dataMap --= deleteKeys
       withData(messages.map(_.asInstanceOf[BufferedRows]))
     }
@@ -156,7 +183,8 @@ class InMemoryTable(
   }
 
   override def deleteWhere(filters: Array[Filter]): Unit = dataMap.synchronized {
-    dataMap --= InMemoryTable.filtersToKeys(dataMap.keys, partFieldNames, filters)
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+    dataMap --= InMemoryTable.filtersToKeys(dataMap.keys, partCols.map(_.toSeq.quoted), filters)
   }
 }
 
@@ -247,4 +275,6 @@ private class BufferWriter extends DataWriter[InternalRow] {
   override def commit(): WriterCommitMessage = buffer
 
   override def abort(): Unit = {}
+
+  override def close(): Unit = {}
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala
index cff09f7550385..6824efd9880a9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala
@@ -22,22 +22,22 @@ import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.catalog.v2.{Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class InMemoryTableCatalog extends TableCatalog with SupportsNamespaces {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+class BasicInMemoryTableCatalog extends TableCatalog {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   protected val namespaces: util.Map[List[String], Map[String, String]] =
     new ConcurrentHashMap[List[String], Map[String, String]]()
 
-  protected val tables: util.Map[Identifier, InMemoryTable] =
-    new ConcurrentHashMap[Identifier, InMemoryTable]()
+  protected val tables: util.Map[Identifier, Table] =
+    new ConcurrentHashMap[Identifier, Table]()
+
+  private val invalidatedTables: util.Set[Identifier] = ConcurrentHashMap.newKeySet()
 
   private var _name: Option[String] = None
 
@@ -60,6 +60,10 @@ class InMemoryTableCatalog extends TableCatalog with SupportsNamespaces {
     }
   }
 
+  override def invalidateTable(ident: Identifier): Unit = {
+    invalidatedTables.add(ident)
+  }
+
   override def createTable(
       ident: Identifier,
       schema: StructType,
@@ -73,6 +77,7 @@ class InMemoryTableCatalog extends TableCatalog with SupportsNamespaces {
 
     val table = new InMemoryTable(s"$name.${ident.quoted}", schema, partitions, properties)
     tables.put(ident, table)
+    namespaces.putIfAbsent(ident.namespace.toList, Map())
     table
   }
 
@@ -109,10 +114,16 @@ class InMemoryTableCatalog extends TableCatalog with SupportsNamespaces {
     }
   }
 
+  def isTableInvalidated(ident: Identifier): Boolean = {
+    invalidatedTables.contains(ident)
+  }
+
   def clearTables(): Unit = {
     tables.clear()
   }
+}
 
+class InMemoryTableCatalog extends BasicInMemoryTableCatalog with SupportsNamespaces {
   private def allNamespaces: Seq[Seq[String]] = {
     (tables.keySet.asScala.map(_.namespace.toSeq) ++ namespaces.keySet.asScala).toSeq.distinct
   }
@@ -169,9 +180,8 @@ class InMemoryTableCatalog extends TableCatalog with SupportsNamespaces {
   }
 
   override def dropNamespace(namespace: Array[String]): Boolean = {
-    if (listTables(namespace).nonEmpty) {
-      throw new IllegalStateException(s"Cannot delete non-empty namespace: ${namespace.quoted}")
-    }
+    listNamespaces(namespace).foreach(dropNamespace)
+    listTables(namespace).foreach(dropTable)
     Option(namespaces.remove(namespace.toList)).isDefined
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/StagingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/StagingInMemoryTableCatalog.scala
index e1b0e6136de95..6d4879d355375 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/StagingInMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/StagingInMemoryTableCatalog.scala
@@ -19,18 +19,17 @@ package org.apache.spark.sql.connector
 
 import java.util
 
-import org.apache.spark.sql.catalog.v2.{Identifier, StagingTableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.sources.v2.{StagedTable, SupportsRead, SupportsWrite, TableCapability}
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class StagingInMemoryTableCatalog extends InMemoryTableCatalog with StagingTableCatalog {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
   import InMemoryTableCatalog._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override def stageCreate(
       ident: Identifier,
@@ -89,8 +88,8 @@ class StagingInMemoryTableCatalog extends InMemoryTableCatalog with StagingTable
 
     override def capabilities(): util.Set[TableCapability] = delegateTable.capabilities
 
-    override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
-      delegateTable.newWriteBuilder(options)
+    override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+      delegateTable.newWriteBuilder(info)
     }
 
     override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
new file mode 100644
index 0000000000000..17d326019f86b
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.net.URI
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, NoSuchNamespaceException}
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class CatalogManagerSuite extends SparkFunSuite {
+
+  private def createSessionCatalog(conf: SQLConf): SessionCatalog = {
+    val catalog = new InMemoryCatalog()
+    catalog.createDatabase(
+      CatalogDatabase(SessionCatalog.DEFAULT_DATABASE, "", new URI("fake"), Map.empty),
+      ignoreIfExists = true)
+    new SessionCatalog(catalog, EmptyFunctionRegistry, conf)
+  }
+
+  test("CatalogManager should reflect the changes of default catalog") {
+    val conf = new SQLConf
+    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
+    assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
+    assert(catalogManager.currentNamespace.sameElements(Array("default")))
+
+    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
+    conf.setConfString(SQLConf.DEFAULT_CATALOG.key, "dummy")
+
+    // The current catalog should be changed if the default catalog is set.
+    assert(catalogManager.currentCatalog.name() == "dummy")
+    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+  }
+
+  test("CatalogManager should keep the current catalog once set") {
+    val conf = new SQLConf
+    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
+    assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
+    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
+    catalogManager.setCurrentCatalog("dummy")
+    assert(catalogManager.currentCatalog.name() == "dummy")
+    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+
+    conf.setConfString("spark.sql.catalog.dummy2", classOf[DummyCatalog].getName)
+    conf.setConfString(SQLConf.DEFAULT_CATALOG.key, "dummy2")
+    // The current catalog shouldn't be changed if it's set before.
+    assert(catalogManager.currentCatalog.name() == "dummy")
+  }
+
+  test("current namespace should be updated when switching current catalog") {
+    val conf = new SQLConf
+    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
+    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
+    catalogManager.setCurrentCatalog("dummy")
+    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+    catalogManager.setCurrentNamespace(Array("a"))
+    assert(catalogManager.currentNamespace.sameElements(Array("a")))
+
+    // If we set current catalog to the same catalog, current namespace should stay the same.
+    catalogManager.setCurrentCatalog("dummy")
+    assert(catalogManager.currentNamespace.sameElements(Array("a")))
+
+    // If we switch to a different catalog, current namespace should be reset.
+    conf.setConfString("spark.sql.catalog.dummy2", classOf[DummyCatalog].getName)
+    catalogManager.setCurrentCatalog("dummy2")
+    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+  }
+
+  test("set current namespace") {
+    val conf = new SQLConf
+    val v1SessionCatalog = createSessionCatalog(conf)
+    v1SessionCatalog.createDatabase(
+      CatalogDatabase(
+        "test", "", v1SessionCatalog.getDefaultDBPath("test"), Map.empty),
+      ignoreIfExists = false)
+    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, v1SessionCatalog)
+
+    // If the current catalog is session catalog, setting current namespace actually sets
+    // `SessionCatalog.currentDb`.
+    catalogManager.setCurrentNamespace(Array("test"))
+    assert(catalogManager.currentNamespace.sameElements(Array("test")))
+    assert(v1SessionCatalog.getCurrentDatabase == "test")
+
+    intercept[NoSuchNamespaceException] {
+      catalogManager.setCurrentNamespace(Array("ns1", "ns2"))
+    }
+
+    // when switching current catalog, `SessionCatalog.currentDb` should be reset.
+    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
+    catalogManager.setCurrentCatalog("dummy")
+    assert(v1SessionCatalog.getCurrentDatabase == "default")
+    catalogManager.setCurrentNamespace(Array("test2"))
+    assert(v1SessionCatalog.getCurrentDatabase == "default")
+  }
+}
+
+class DummyCatalog extends CatalogPlugin {
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    _name = name
+  }
+  private var _name: String = null
+  override def name(): String = _name
+  override def defaultNamespace(): Array[String] = Array("a", "b")
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala
new file mode 100644
index 0000000000000..7a9a7f52ff8fd
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import org.mockito.Mockito.{mock, when}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.types.StructType
+
+class CatalogV2UtilSuite extends SparkFunSuite {
+  test("Load relation should encode the identifiers for V2Relations") {
+    val testCatalog = mock(classOf[TableCatalog])
+    val ident = mock(classOf[Identifier])
+    val table = mock(classOf[Table])
+    when(table.schema()).thenReturn(mock(classOf[StructType]))
+    when(testCatalog.loadTable(ident)).thenReturn(table)
+    val r = CatalogV2Util.loadRelation(testCatalog, ident)
+    assert(r.isDefined)
+    assert(r.get.isInstanceOf[DataSourceV2Relation])
+    val v2Relation = r.get.asInstanceOf[DataSourceV2Relation]
+    assert(v2Relation.catalog.exists(_ == testCatalog))
+    assert(v2Relation.identifier.exists(_ == ident))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/LookupCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/LookupCatalogSuite.scala
new file mode 100644
index 0000000000000..b2f27e4740cbe
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/LookupCatalogSuite.scala
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connector.catalog
+
+import org.mockito.ArgumentMatchers.any
+import org.mockito.Mockito.{mock, when}
+import org.mockito.invocation.InvocationOnMock
+import org.scalatest.Inside
+import org.scalatest.Matchers._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.FakeV2SessionCatalog
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+private case class DummyCatalogPlugin(override val name: String) extends CatalogPlugin {
+
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = ()
+}
+
+class LookupCatalogSuite extends SparkFunSuite with LookupCatalog with Inside {
+  import CatalystSqlParser._
+
+  private val globalTempDB = SQLConf.get.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+  private val catalogs =
+    Seq("prod", "test", globalTempDB).map(x => x -> DummyCatalogPlugin(x)).toMap
+  private val sessionCatalog = FakeV2SessionCatalog
+
+  override val catalogManager: CatalogManager = {
+    val manager = mock(classOf[CatalogManager])
+    when(manager.catalog(any())).thenAnswer((invocation: InvocationOnMock) => {
+      val name = invocation.getArgument[String](0)
+      catalogs.getOrElse(name, throw new CatalogNotFoundException(s"$name not found"))
+    })
+    when(manager.currentCatalog).thenReturn(sessionCatalog)
+    when(manager.v2SessionCatalog).thenReturn(sessionCatalog)
+    manager
+  }
+
+  test("catalog and identifier") {
+    Seq(
+      ("tbl", sessionCatalog, Seq.empty, "tbl"),
+      ("db.tbl", sessionCatalog, Seq("db"), "tbl"),
+      (s"$globalTempDB.tbl", sessionCatalog, Seq(globalTempDB), "tbl"),
+      (s"$globalTempDB.ns1.ns2.tbl", sessionCatalog, Seq(globalTempDB, "ns1", "ns2"), "tbl"),
+      ("prod.func", catalogs("prod"), Seq.empty, "func"),
+      ("ns1.ns2.tbl", sessionCatalog, Seq("ns1", "ns2"), "tbl"),
+      ("prod.db.tbl", catalogs("prod"), Seq("db"), "tbl"),
+      ("test.db.tbl", catalogs("test"), Seq("db"), "tbl"),
+      ("test.ns1.ns2.ns3.tbl", catalogs("test"), Seq("ns1", "ns2", "ns3"), "tbl"),
+      ("`db.tbl`", sessionCatalog, Seq.empty, "db.tbl"),
+      ("parquet.`file:/tmp/db.tbl`", sessionCatalog, Seq("parquet"), "file:/tmp/db.tbl"),
+      ("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`", sessionCatalog,
+        Seq("org.apache.spark.sql.json"), "s3://buck/tmp/abc.json")).foreach {
+      case (sql, expectedCatalog, namespace, name) =>
+        inside(parseMultipartIdentifier(sql)) {
+          case CatalogAndIdentifier(catalog, ident) =>
+            catalog shouldEqual expectedCatalog
+            ident shouldEqual Identifier.of(namespace.toArray, name)
+        }
+    }
+  }
+
+  test("table identifier") {
+    Seq(
+      ("tbl", "tbl", None),
+      ("db.tbl", "tbl", Some("db")),
+      ("`db.tbl`", "db.tbl", None),
+      ("parquet.`file:/tmp/db.tbl`", "file:/tmp/db.tbl", Some("parquet")),
+      ("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`", "s3://buck/tmp/abc.json",
+        Some("org.apache.spark.sql.json"))).foreach {
+      case (sql, table, db) =>
+        inside (parseMultipartIdentifier(sql)) {
+          case AsTableIdentifier(ident) =>
+            ident shouldEqual TableIdentifier(table, db)
+        }
+    }
+    Seq(
+      "prod.func",
+      "prod.db.tbl",
+      "ns1.ns2.tbl").foreach { sql =>
+      parseMultipartIdentifier(sql) match {
+        case AsTableIdentifier(_) =>
+          fail(s"$sql should not be resolved as TableIdentifier")
+        case _ =>
+      }
+    }
+  }
+}
+
+class LookupCatalogWithDefaultSuite extends SparkFunSuite with LookupCatalog with Inside {
+  import CatalystSqlParser._
+
+  private val catalogs = Seq("prod", "test").map(x => x -> DummyCatalogPlugin(x)).toMap
+
+  override val catalogManager: CatalogManager = {
+    val manager = mock(classOf[CatalogManager])
+    when(manager.catalog(any())).thenAnswer((invocation: InvocationOnMock) => {
+      val name = invocation.getArgument[String](0)
+      catalogs.getOrElse(name, throw new CatalogNotFoundException(s"$name not found"))
+    })
+    when(manager.currentCatalog).thenReturn(catalogs("prod"))
+    when(manager.currentNamespace).thenReturn(Array.empty[String])
+    manager
+  }
+
+  test("catalog and identifier") {
+    Seq(
+      ("tbl", catalogs("prod"), Seq.empty, "tbl"),
+      ("db.tbl", catalogs("prod"), Seq("db"), "tbl"),
+      ("prod.func", catalogs("prod"), Seq.empty, "func"),
+      ("ns1.ns2.tbl", catalogs("prod"), Seq("ns1", "ns2"), "tbl"),
+      ("prod.db.tbl", catalogs("prod"), Seq("db"), "tbl"),
+      ("test.db.tbl", catalogs("test"), Seq("db"), "tbl"),
+      ("test.ns1.ns2.ns3.tbl", catalogs("test"), Seq("ns1", "ns2", "ns3"), "tbl"),
+      ("`db.tbl`", catalogs("prod"), Seq.empty, "db.tbl"),
+      ("parquet.`file:/tmp/db.tbl`", catalogs("prod"), Seq("parquet"), "file:/tmp/db.tbl"),
+      ("`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`", catalogs("prod"),
+          Seq("org.apache.spark.sql.json"), "s3://buck/tmp/abc.json")).foreach {
+      case (sql, expectedCatalog, namespace, name) =>
+        inside(parseMultipartIdentifier(sql)) {
+          case CatalogAndIdentifier(catalog, ident) =>
+            catalog shouldEqual expectedCatalog
+            ident shouldEqual Identifier.of(namespace.toArray, name)
+        }
+    }
+  }
+
+  test("table identifier") {
+    Seq(
+      "tbl",
+      "db.tbl",
+      "`db.tbl`",
+      "parquet.`file:/tmp/db.tbl`",
+      "`org.apache.spark.sql.json`.`s3://buck/tmp/abc.json`",
+      "prod.func",
+      "prod.db.tbl",
+      "ns1.ns2.tbl").foreach { sql =>
+      parseMultipartIdentifier(sql) match {
+        case AsTableIdentifier(_) =>
+          fail(s"$sql should not be resolved as TableIdentifier")
+        case _ =>
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
similarity index 96%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
index 3904790a5e5c9..a4c85ec64ecf6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2
+package org.apache.spark.sql.connector.catalog
 
 import java.util
 import java.util.Collections
@@ -368,7 +368,7 @@ class TableCatalogSuite extends SparkFunSuite {
     assert(updated.schema == expectedSchema)
   }
 
-  test("alterTable: update column data type and nullability") {
+  test("alterTable: update column nullability") {
     val catalog = newCatalog()
 
     val originalSchema = new StructType()
@@ -379,27 +379,12 @@ class TableCatalogSuite extends SparkFunSuite {
     assert(table.schema == originalSchema)
 
     val updated = catalog.alterTable(testIdent,
-      TableChange.updateColumnType(Array("id"), LongType, true))
+      TableChange.updateColumnNullability(Array("id"), true))
 
-    val expectedSchema = new StructType().add("id", LongType).add("data", StringType)
+    val expectedSchema = new StructType().add("id", IntegerType).add("data", StringType)
     assert(updated.schema == expectedSchema)
   }
 
-  test("alterTable: update optional column to required fails") {
-    val catalog = newCatalog()
-
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
-
-    assert(table.schema == schema)
-
-    val exc = intercept[IllegalArgumentException] {
-      catalog.alterTable(testIdent, TableChange.updateColumnType(Array("id"), LongType, false))
-    }
-
-    assert(exc.getMessage.contains("Cannot change optional column to required"))
-    assert(exc.getMessage.contains("id"))
-  }
-
   test("alterTable: update missing column fails") {
     val catalog = newCatalog()
 
@@ -847,19 +832,16 @@ class TableCatalogSuite extends SparkFunSuite {
     assert(catalog.namespaceExists(testNs) === false)
   }
 
-  test("dropNamespace: fail if not empty") {
+  test("dropNamespace: drop even if it's not empty") {
     val catalog = newCatalog()
 
     catalog.createNamespace(testNs, Map("property" -> "value").asJava)
     catalog.createTable(testIdent, schema, Array.empty, emptyProps)
 
-    val exc = intercept[IllegalStateException] {
-      catalog.dropNamespace(testNs)
-    }
+    assert(catalog.dropNamespace(testNs))
 
-    assert(exc.getMessage.contains(testNs.quoted))
-    assert(catalog.namespaceExists(testNs) === true)
-    assert(catalog.loadNamespaceMetadata(testNs).asScala === Map("property" -> "value"))
+    assert(!catalog.namespaceExists(testNs))
+    assert(catalog.listTables(testNs).isEmpty)
   }
 
   test("alterNamespace: basic behavior") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/expressions/TransformExtractorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala
similarity index 98%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/expressions/TransformExtractorSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala
index c0a5dada19dba..fbd6a886d011b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/expressions/TransformExtractorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions
+package org.apache.spark.sql.connector.expressions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 122a3125ee2c4..b71dc91eed4fa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -21,6 +21,7 @@ import com.fasterxml.jackson.core.JsonParseException
 
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 
 class DataTypeSuite extends SparkFunSuite {
 
@@ -462,11 +463,11 @@ class DataTypeSuite extends SparkFunSuite {
       StructField("c", DataTypes.IntegerType),
       StructField("d", DataTypes.IntegerType)))
 
-    val builder = new StringBuilder
+    val stringConcat = new StringConcat
 
-    MapType(keyType, valueType).buildFormattedString(prefix = "", builder = builder)
+    MapType(keyType, valueType).buildFormattedString(prefix = "", stringConcat = stringConcat)
 
-    val result = builder.toString()
+    val result = stringConcat.toString()
     val expected =
       """-- key: struct
         |    |-- a: integer (nullable = true)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
index 9d6827194f004..c47332f5d9fcb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
@@ -76,6 +76,14 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa
       assert(err.contains("Cannot safely cast"))
     }
   }
+
+  test("Check NullType is incompatible with all other types") {
+    allNonNullTypes.foreach { t =>
+      assertSingleError(NullType, t, "nulls", s"Should not allow writing None to type $t") { err =>
+        assert(err.contains(s"incompatible with $t"))
+      }
+    }
+  }
 }
 
 class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBaseSuite {
@@ -145,6 +153,12 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase
       assert(err.contains("Cannot safely cast 'timestampToLong': TimestampType to LongType"))
     }
   }
+
+  test("Check NullType is compatible with all other types") {
+    allNonNullTypes.foreach { t =>
+      assertAllowed(NullType, t, "nulls", s"Should allow writing None to type $t")
+    }
+  }
 }
 
 abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
@@ -175,17 +189,9 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
   private val nestedContainerTypes = Seq(ArrayType(point2, containsNull = false),
     MapType(StringType, point3, valueContainsNull = false))
 
-  private val allNonNullTypes = Seq(
+  protected val allNonNullTypes = Seq(
     atomicTypes, simpleContainerTypes, nestedContainerTypes, Seq(CalendarIntervalType)).flatten
 
-  test("Check NullType is incompatible with all other types") {
-    allNonNullTypes.foreach { t =>
-      assertSingleError(NullType, t, "nulls", s"Should not allow writing None to type $t") { err =>
-        assert(err.contains(s"incompatible with $t"))
-      }
-    }
-  }
-
   test("Check each type with itself") {
     allNonNullTypes.foreach { t =>
       assertAllowed(t, t, "t", s"Should allow writing type to itself $t")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
index d69bb2f0b6bcc..7d0346fc0145e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
@@ -20,9 +20,12 @@ package org.apache.spark.sql.types
 import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.Decimal._
 
-class DecimalSuite extends SparkFunSuite with PrivateMethodTester {
+class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper {
   /** Check that a Decimal has the given string representation, precision and scale */
   private def checkDecimal(d: Decimal, string: String, precision: Int, scale: Int): Unit = {
     assert(d.toString === string)
@@ -32,15 +35,15 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester {
 
   test("creating decimals") {
     checkDecimal(new Decimal(), "0", 1, 0)
-    checkDecimal(Decimal(BigDecimal("0.09")), "0.09", 3, 2)
-    checkDecimal(Decimal(BigDecimal("0.9")), "0.9", 2, 1)
-    checkDecimal(Decimal(BigDecimal("0.90")), "0.90", 3, 2)
-    checkDecimal(Decimal(BigDecimal("0.0")), "0.0", 2, 1)
+    checkDecimal(Decimal(BigDecimal("0.09")), "0.09", 2, 2)
+    checkDecimal(Decimal(BigDecimal("0.9")), "0.9", 1, 1)
+    checkDecimal(Decimal(BigDecimal("0.90")), "0.90", 2, 2)
+    checkDecimal(Decimal(BigDecimal("0.0")), "0.0", 1, 1)
     checkDecimal(Decimal(BigDecimal("0")), "0", 1, 0)
     checkDecimal(Decimal(BigDecimal("1.0")), "1.0", 2, 1)
-    checkDecimal(Decimal(BigDecimal("-0.09")), "-0.09", 3, 2)
-    checkDecimal(Decimal(BigDecimal("-0.9")), "-0.9", 2, 1)
-    checkDecimal(Decimal(BigDecimal("-0.90")), "-0.90", 3, 2)
+    checkDecimal(Decimal(BigDecimal("-0.09")), "-0.09", 2, 2)
+    checkDecimal(Decimal(BigDecimal("-0.9")), "-0.9", 1, 1)
+    checkDecimal(Decimal(BigDecimal("-0.90")), "-0.90", 2, 2)
     checkDecimal(Decimal(BigDecimal("-1.0")), "-1.0", 2, 1)
     checkDecimal(Decimal(BigDecimal("10.030")), "10.030", 5, 3)
     checkDecimal(Decimal(BigDecimal("10.030"), 4, 1), "10.0", 4, 1)
@@ -63,13 +66,27 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester {
     intercept[ArithmeticException](Decimal(1e17.toLong, 17, 0))
   }
 
-  test("creating decimals with negative scale") {
-    checkDecimal(Decimal(BigDecimal("98765"), 5, -3), "9.9E+4", 5, -3)
-    checkDecimal(Decimal(BigDecimal("314.159"), 6, -2), "3E+2", 6, -2)
-    checkDecimal(Decimal(BigDecimal(1.579e12), 4, -9), "1.579E+12", 4, -9)
-    checkDecimal(Decimal(BigDecimal(1.579e12), 4, -10), "1.58E+12", 4, -10)
-    checkDecimal(Decimal(103050709L, 9, -10), "1.03050709E+18", 9, -10)
-    checkDecimal(Decimal(1e8.toLong, 10, -10), "1.00000000E+18", 10, -10)
+  test("creating decimals with negative scale under legacy mode") {
+    withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
+      checkDecimal(Decimal(BigDecimal("98765"), 5, -3), "9.9E+4", 5, -3)
+      checkDecimal(Decimal(BigDecimal("314.159"), 6, -2), "3E+2", 6, -2)
+      checkDecimal(Decimal(BigDecimal(1.579e12), 4, -9), "1.579E+12", 4, -9)
+      checkDecimal(Decimal(BigDecimal(1.579e12), 4, -10), "1.58E+12", 4, -10)
+      checkDecimal(Decimal(103050709L, 9, -10), "1.03050709E+18", 9, -10)
+      checkDecimal(Decimal(1e8.toLong, 10, -10), "1.00000000E+18", 10, -10)
+    }
+  }
+
+  test("SPARK-30252: Negative scale is not allowed by default") {
+    def checkNegativeScaleDecimal(d: => Decimal): Unit = {
+      intercept[AnalysisException](d)
+        .getMessage
+        .contains("Negative scale is not allowed under ansi mode")
+    }
+    checkNegativeScaleDecimal(Decimal(BigDecimal("98765"), 5, -3))
+    checkNegativeScaleDecimal(Decimal(BigDecimal("98765").underlying(), 5, -3))
+    checkNegativeScaleDecimal(Decimal(98765L, 5, -3))
+    checkNegativeScaleDecimal(Decimal.createUnsafe(98765L, 5, -3))
   }
 
   test("double and long values") {
@@ -99,7 +116,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester {
   }
 
   // Accessor for the BigDecimal value of a Decimal, which will be null if it's using Longs
-  private val decimalVal = PrivateMethod[BigDecimal]('decimalVal)
+  private val decimalVal = PrivateMethod[BigDecimal](Symbol("decimalVal"))
 
   /** Check whether a decimal is represented compactly (passing whether we expect it to be) */
   private def checkCompact(d: Decimal, expected: Boolean): Unit = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index e1fe983a42c2d..6824a64badc10 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -81,6 +81,22 @@ class StructTypeSuite extends SparkFunSuite {
     assert(7 == schema.treeString.split("\n").length)
     assert(7 == schema.treeString(0).split("\n").length)
     assert(7 == schema.treeString(-1).split("\n").length)
+
+    val schema2 = StructType.fromDDL(
+      "c1 INT, c2 ARRAY<STRUCT<c3: INT>>, c4 STRUCT<c5: INT, c6: ARRAY<ARRAY<INT>>>")
+    assert(4 == schema2.treeString(1).split("\n").length)
+    assert(7 == schema2.treeString(2).split("\n").length)
+    assert(9 == schema2.treeString(3).split("\n").length)
+    assert(10 == schema2.treeString(4).split("\n").length)
+    assert(10 == schema2.treeString(0).split("\n").length)
+
+    val schema3 = StructType.fromDDL(
+      "c1 MAP<INT, STRUCT<c2: MAP<INT, INT>>>, c3 STRUCT<c4: MAP<INT, MAP<INT, INT>>>")
+    assert(3 == schema3.treeString(1).split("\n").length)
+    assert(6 == schema3.treeString(2).split("\n").length)
+    assert(9 == schema3.treeString(3).split("\n").length)
+    assert(13 == schema3.treeString(4).split("\n").length)
+    assert(13 == schema3.treeString(0).split("\n").length)
   }
 
   test("interval keyword in schema string") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
index 1f0eff2e5b114..d617b1c1d8237 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -17,18 +17,19 @@
 
 package org.apache.spark.sql.util
 
-import java.time.LocalDate
+import java.time.{LocalDate, ZoneOffset}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, localDateToDays}
 import org.apache.spark.sql.internal.SQLConf
 
 class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   test("parsing dates") {
     DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-        val formatter = DateFormatter()
+        val formatter = DateFormatter(getZoneId(timeZone))
         val daysSinceEpoch = formatter.parse("2018-12-02")
         assert(daysSinceEpoch === 17867)
       }
@@ -38,7 +39,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   test("format dates") {
     DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-        val formatter = DateFormatter()
+        val formatter = DateFormatter(getZoneId(timeZone))
         val date = formatter.format(17867)
         assert(date === "2018-12-02")
       }
@@ -58,7 +59,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
       "5010-11-17").foreach { date =>
       DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
         withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter()
+          val formatter = DateFormatter(getZoneId(timeZone))
           val days = formatter.parse(date)
           val formatted = formatter.format(days)
           assert(date === formatted)
@@ -81,7 +82,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
       1110657).foreach { days =>
       DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
         withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter()
+          val formatter = DateFormatter(getZoneId(timeZone))
           val date = formatter.format(days)
           val parsed = formatter.parse(date)
           assert(days === parsed)
@@ -91,13 +92,26 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("parsing date without explicit day") {
-    val formatter = DateFormatter("yyyy MMM")
+    val formatter = DateFormatter("yyyy MMM", ZoneOffset.UTC)
     val daysSinceEpoch = formatter.parse("2018 Dec")
     assert(daysSinceEpoch === LocalDate.of(2018, 12, 1).toEpochDay)
   }
 
   test("formatting negative years with default pattern") {
     val epochDays = LocalDate.of(-99, 1, 1).toEpochDay.toInt
-    assert(DateFormatter().format(epochDays) === "-0099-01-01")
+    assert(DateFormatter(ZoneOffset.UTC).format(epochDays) === "-0099-01-01")
+  }
+
+  test("special date values") {
+    testSpecialDatetimeValues { zoneId =>
+      val formatter = DateFormatter(zoneId)
+
+      assert(formatter.parse("EPOCH") === 0)
+      val today = localDateToDays(LocalDate.now(zoneId))
+      assert(formatter.parse("Yesterday") === today - 1)
+      assert(formatter.parse("now") === today)
+      assert(formatter.parse("today ") === today)
+      assert(formatter.parse("tomorrow UTC") === today + 1)
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index c223639a47294..082849c88669a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -17,14 +17,18 @@
 
 package org.apache.spark.sql.util
 
-import java.time.{LocalDateTime, ZoneId, ZoneOffset}
+import java.time.{Instant, LocalDateTime, LocalTime, ZoneOffset}
 import java.util.concurrent.TimeUnit
 
+import org.scalatest.Matchers
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
+import org.apache.spark.unsafe.types.UTF8String
 
-class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
+class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers {
 
   test("parsing timestamps using time zones") {
     val localDate = "2018-12-02T10:11:12.001234"
@@ -131,4 +135,102 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
     val micros = DateTimeUtils.instantToMicros(instant)
     assert(TimestampFormatter(ZoneOffset.UTC).format(micros) === "-0099-01-01 00:00:00")
   }
+
+  test("special timestamp values") {
+    testSpecialDatetimeValues { zoneId =>
+      val formatter = TimestampFormatter(zoneId)
+      val tolerance = TimeUnit.SECONDS.toMicros(30)
+
+      assert(formatter.parse("EPOCH") === 0)
+      val now = instantToMicros(Instant.now())
+      formatter.parse("now") should be(now +- tolerance)
+      val localToday = LocalDateTime.now(zoneId)
+        .`with`(LocalTime.MIDNIGHT)
+        .atZone(zoneId)
+      val yesterday = instantToMicros(localToday.minusDays(1).toInstant)
+      formatter.parse("yesterday CET") should be(yesterday +- tolerance)
+      val today = instantToMicros(localToday.toInstant)
+      formatter.parse(" TODAY ") should be(today +- tolerance)
+      val tomorrow = instantToMicros(localToday.plusDays(1).toInstant)
+      formatter.parse("Tomorrow ") should be(tomorrow +- tolerance)
+    }
+  }
+
+  test("parsing timestamp strings with various seconds fractions") {
+    DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
+      def check(pattern: String, input: String, reference: String): Unit = {
+        val formatter = TimestampFormatter(pattern, zoneId)
+        val expected = DateTimeUtils.stringToTimestamp(
+          UTF8String.fromString(reference), zoneId).get
+        val actual = formatter.parse(input)
+        assert(actual === expected)
+      }
+
+      check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSXXX",
+        "2019-10-14T09:39:07.3220000Z", "2019-10-14T09:39:07.322Z")
+      check("yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
+        "2019-10-14T09:39:07.322000", "2019-10-14T09:39:07.322")
+      check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX",
+        "2019-10-14T09:39:07.123456Z", "2019-10-14T09:39:07.123456Z")
+      check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX",
+        "2019-10-14T09:39:07.000010Z", "2019-10-14T09:39:07.00001Z")
+      check("yyyy HH:mm:ss.SSSSS", "1970 01:02:03.00004", "1970-01-01 01:02:03.00004")
+      check("yyyy HH:mm:ss.SSSS", "2019 00:00:07.0100", "2019-01-01 00:00:07.0100")
+      check("yyyy-MM-dd'T'HH:mm:ss.SSSX",
+        "2019-10-14T09:39:07.322Z", "2019-10-14T09:39:07.322Z")
+      check("yyyy-MM-dd'T'HH:mm:ss.SS",
+        "2019-10-14T09:39:07.10", "2019-10-14T09:39:07.1")
+      check("yyyy-MM-dd'T'HH:mm:ss.S",
+        "2019-10-14T09:39:07.1", "2019-10-14T09:39:07.1")
+
+      try {
+        TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId)
+          .parse("2019/11/14 20#25#30.123456")
+        fail("Expected to throw an exception for the invalid input")
+      } catch {
+        case e: java.time.format.DateTimeParseException =>
+          assert(e.getMessage.contains("could not be parsed"))
+      }
+    }
+  }
+
+  test("formatting timestamp strings up to microsecond precision") {
+    DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
+      def check(pattern: String, input: String, expected: String): Unit = {
+        val formatter = TimestampFormatter(pattern, zoneId)
+        val timestamp = DateTimeUtils.stringToTimestamp(
+          UTF8String.fromString(input), zoneId).get
+        val actual = formatter.format(timestamp)
+        assert(actual === expected)
+      }
+
+      check(
+        "yyyy-MM-dd HH:mm:ss.SSSSSSS", "2019-10-14T09:39:07.123456",
+        "2019-10-14 09:39:07.1234560")
+      check(
+        "yyyy-MM-dd HH:mm:ss.SSSSSS", "1960-01-01T09:39:07.123456",
+        "1960-01-01 09:39:07.123456")
+      check(
+        "yyyy-MM-dd HH:mm:ss.SSSSS", "0001-10-14T09:39:07.1",
+        "0001-10-14 09:39:07.10000")
+      check(
+        "yyyy-MM-dd HH:mm:ss.SSSS", "9999-12-31T23:59:59.999",
+        "9999-12-31 23:59:59.9990")
+      check(
+        "yyyy-MM-dd HH:mm:ss.SSS", "1970-01-01T00:00:00.0101",
+        "1970-01-01 00:00:00.010")
+      check(
+        "yyyy-MM-dd HH:mm:ss.SS", "2019-10-14T09:39:07.09",
+        "2019-10-14 09:39:07.09")
+      check(
+        "yyyy-MM-dd HH:mm:ss.S", "2019-10-14T09:39:07.2",
+        "2019-10-14 09:39:07.2")
+      check(
+        "yyyy-MM-dd HH:mm:ss.S", "2019-10-14T09:39:07",
+        "2019-10-14 09:39:07.0")
+      check(
+        "yyyy-MM-dd HH:mm:ss", "2019-10-14T09:39:07.123456",
+        "2019-10-14 09:39:07")
+    }
+  }
 }
diff --git a/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt b/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..baa237d3142d8
--- /dev/null
+++ b/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt
@@ -0,0 +1,143 @@
+================================================================================================
+aggregate without grouping
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+agg w/o group wholestage off                      59499          62969        2375         35.2          28.4       1.0X
+agg w/o group wholestage on                         890            902          10       2357.2           0.4      66.9X
+
+
+================================================================================================
+stat functions
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+stddev wholestage off                              8702           8870         237         12.0          83.0       1.0X
+stddev wholestage on                               1307           1314           9         80.2          12.5       6.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+kurtosis wholestage off                           42656          42797         198          2.5         406.8       1.0X
+kurtosis wholestage on                             1440           1466          32         72.8          13.7      29.6X
+
+
+================================================================================================
+aggregate with linear keys
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                       10963          11035         101          7.7         130.7       1.0X
+codegen = T hashmap = F                            6852           7080         207         12.2          81.7       1.6X
+codegen = T hashmap = T                            1377           1421          43         60.9          16.4       8.0X
+
+
+================================================================================================
+aggregate with randomized keys
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                       12240          12296          80          6.9         145.9       1.0X
+codegen = T hashmap = F                            8318           8376          87         10.1          99.2       1.5X
+codegen = T hashmap = T                            2551           2617          80         32.9          30.4       4.8X
+
+
+================================================================================================
+aggregate with string key
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        4442           4504          88          4.7         211.8       1.0X
+codegen = T hashmap = F                            2685           2778          95          7.8         128.0       1.7X
+codegen = T hashmap = T                            1181           1203          15         17.8          56.3       3.8X
+
+
+================================================================================================
+aggregate with decimal key
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        3487           3534          66          6.0         166.3       1.0X
+codegen = T hashmap = F                            2239           2353         161          9.4         106.8       1.6X
+codegen = T hashmap = T                             664            693          26         31.6          31.7       5.3X
+
+
+================================================================================================
+aggregate with multiple key types
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        6414           6475          86          3.3         305.9       1.0X
+codegen = T hashmap = F                            3776           3800          34          5.6         180.1       1.7X
+codegen = T hashmap = T                            2824           2852          40          7.4         134.7       2.3X
+
+
+================================================================================================
+max function bytecode size of wholestagecodegen
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                         654            696          36          1.0         998.6       1.0X
+codegen = T hugeMethodLimit = 10000                 378            405          24          1.7         577.1       1.7X
+codegen = T hugeMethodLimit = 1500                  383            403          22          1.7         584.3       1.7X
+
+
+================================================================================================
+cube
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cube wholestage off                                3443           3444           1          1.5         656.7       1.0X
+cube wholestage on                                 1797           1815          20          2.9         342.7       1.9X
+
+
+================================================================================================
+hash and BytesToBytesMap
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeRowhash                                       313            314           2         67.1          14.9       1.0X
+murmur3 hash                                        144            145           2        145.9           6.9       2.2X
+fast hash                                            70             72           5        301.5           3.3       4.5X
+arrayEqual                                          189            190           0        111.0           9.0       1.7X
+Java HashMap (Long)                                 129            132           4        162.4           6.2       2.4X
+Java HashMap (two ints)                             148            154           7        142.0           7.0       2.1X
+Java HashMap (UnsafeRow)                            839            840           1         25.0          40.0       0.4X
+LongToUnsafeRowMap (opt=false)                      462            464           1         45.4          22.0       0.7X
+LongToUnsafeRowMap (opt=true)                       108            108           1        194.2           5.2       2.9X
+BytesToBytesMap (off Heap)                         1027           1029           2         20.4          49.0       0.3X
+BytesToBytesMap (on Heap)                           999           1001           3         21.0          47.6       0.3X
+Aggregate HashMap                                    44             44           0        477.8           2.1       7.1X
+
+
diff --git a/sql/core/benchmarks/AggregateBenchmark-results.txt b/sql/core/benchmarks/AggregateBenchmark-results.txt
index 19e524777692e..b1c2e9d6ae9fe 100644
--- a/sql/core/benchmarks/AggregateBenchmark-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-results.txt
@@ -2,142 +2,142 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-agg w/o group:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                65374 / 70665         32.1          31.2       1.0X
-agg w/o group wholestage on                   1178 / 1209       1779.8           0.6      55.5X
+agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+agg w/o group wholestage off                      49902          52257         NaN         42.0          23.8       1.0X
+agg w/o group wholestage on                        1162           1171          10       1805.2           0.6      43.0X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-stddev:                                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-stddev wholestage off                         8667 / 8851         12.1          82.7       1.0X
-stddev wholestage on                          1266 / 1273         82.8          12.1       6.8X
+stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+stddev wholestage off                              8203           8243          56         12.8          78.2       1.0X
+stddev wholestage on                               1287           1303          10         81.5          12.3       6.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-kurtosis:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-kurtosis wholestage off                     41218 / 41231          2.5         393.1       1.0X
-kurtosis wholestage on                        1347 / 1357         77.8          12.8      30.6X
+kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+kurtosis wholestage off                           39557          39919         511          2.7         377.2       1.0X
+kurtosis wholestage on                             1398           1476         138         75.0          13.3      28.3X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w keys:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                   9309 / 9389          9.0         111.0       1.0X
-codegen = T hashmap = F                       4417 / 4435         19.0          52.7       2.1X
-codegen = T hashmap = T                       1289 / 1298         65.1          15.4       7.2X
+Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                       11236          12182        1337          7.5         133.9       1.0X
+codegen = T hashmap = F                            7079           7337         250         11.9          84.4       1.6X
+codegen = T hashmap = T                            1278           1419         186         65.6          15.2       8.8X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w keys:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                 11424 / 11426          7.3         136.2       1.0X
-codegen = T hashmap = F                       6441 / 6496         13.0          76.8       1.8X
-codegen = T hashmap = T                       2333 / 2344         36.0          27.8       4.9X
+Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                       11629          11650          30          7.2         138.6       1.0X
+codegen = T hashmap = F                            7552           7747         169         11.1          90.0       1.5X
+codegen = T hashmap = T                            2414           2662         167         34.7          28.8       4.8X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w string key:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                   4751 / 4890          4.4         226.5       1.0X
-codegen = T hashmap = F                       3146 / 3182          6.7         150.0       1.5X
-codegen = T hashmap = T                       2211 / 2261          9.5         105.4       2.1X
+Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        4790           4904         162          4.4         228.4       1.0X
+codegen = T hashmap = F                            3439           3504         105          6.1         164.0       1.4X
+codegen = T hashmap = T                            2327           2365          39          9.0         111.0       2.1X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w decimal key:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                   3029 / 3062          6.9         144.4       1.0X
-codegen = T hashmap = F                       1534 / 1569         13.7          73.2       2.0X
-codegen = T hashmap = T                        575 /  578         36.5          27.4       5.3X
+Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        3260           3418         223          6.4         155.5       1.0X
+codegen = T hashmap = F                            2316           2325          14          9.1         110.4       1.4X
+codegen = T hashmap = T                             605            607           2         34.7          28.8       5.4X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w multiple keys:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                   7506 / 7521          2.8         357.9       1.0X
-codegen = T hashmap = F                       4791 / 4808          4.4         228.5       1.6X
-codegen = T hashmap = T                       3553 / 3585          5.9         169.4       2.1X
+Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        7426           7473          67          2.8         354.1       1.0X
+codegen = T hashmap = F                            4685           4723          54          4.5         223.4       1.6X
+codegen = T hashmap = T                            3946           4005          83          5.3         188.2       1.9X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-max function bytecode size:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                    608 /  656          1.1         927.1       1.0X
-codegen = T hugeMethodLimit = 10000            402 /  419          1.6         613.5       1.5X
-codegen = T hugeMethodLimit = 1500             616 /  619          1.1         939.9       1.0X
+max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                         628            672          49          1.0         958.4       1.0X
+codegen = T hugeMethodLimit = 10000                 357            373          12          1.8         545.3       1.8X
+codegen = T hugeMethodLimit = 1500                  344            356           7          1.9         525.6       1.8X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-cube:                                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-cube wholestage off                           3229 / 3237          1.6         615.9       1.0X
-cube wholestage on                            1285 / 1306          4.1         245.2       2.5X
+cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cube wholestage off                                3167           3266         140          1.7         604.1       1.0X
+cube wholestage on                                 1549           1576          29          3.4         295.4       2.0X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-BytesToBytesMap:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-UnsafeRowhash                                  328 /  330         64.0          15.6       1.0X
-murmur3 hash                                   167 /  167        125.4           8.0       2.0X
-fast hash                                       84 /   85        249.0           4.0       3.9X
-arrayEqual                                     192 /  192        109.3           9.1       1.7X
-Java HashMap (Long)                            144 /  147        145.9           6.9       2.3X
-Java HashMap (two ints)                        147 /  153        142.3           7.0       2.2X
-Java HashMap (UnsafeRow)                       785 /  788         26.7          37.4       0.4X
-LongToUnsafeRowMap (opt=false)                 456 /  457         46.0          21.8       0.7X
-LongToUnsafeRowMap (opt=true)                  125 /  125        168.3           5.9       2.6X
-BytesToBytesMap (off Heap)                     885 /  885         23.7          42.2       0.4X
-BytesToBytesMap (on Heap)                      860 /  864         24.4          41.0       0.4X
-Aggregate HashMap                               56 /   56        373.9           2.7       5.8X
+BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeRowhash                                       326            328           2         64.3          15.5       1.0X
+murmur3 hash                                        147            147           0        142.7           7.0       2.2X
+fast hash                                            74             75           1        282.3           3.5       4.4X
+arrayEqual                                          175            175           0        119.8           8.3       1.9X
+Java HashMap (Long)                                 138            140           4        152.1           6.6       2.4X
+Java HashMap (two ints)                             148            154           7        141.7           7.1       2.2X
+Java HashMap (UnsafeRow)                           1043           1090          66         20.1          49.8       0.3X
+LongToUnsafeRowMap (opt=false)                      464            466           2         45.2          22.1       0.7X
+LongToUnsafeRowMap (opt=true)                       104            106           8        202.3           4.9       3.1X
+BytesToBytesMap (off Heap)                         1140           1149          12         18.4          54.4       0.3X
+BytesToBytesMap (on Heap)                          1002           1132         183         20.9          47.8       0.3X
+Aggregate HashMap                                    74             74           0        281.9           3.5       4.4X
 
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..cb1e96a3df236
--- /dev/null
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
@@ -0,0 +1,24 @@
+================================================================================================
+ORC Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                              19554          20736        1672          5.1         195.5       1.0X
+With bloom filter                                 22112          22203         129          4.5         221.1       0.9X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                               1866           1879          19         53.6          18.7       1.0X
+With bloom filter                                  1523           1544          29         65.6          15.2       1.2X
+
+
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
index 2eeb26c899b42..ec629129559f4 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
@@ -2,23 +2,23 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Write 100M rows:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Without bloom filter                        16765 / 17587          6.0         167.7       1.0X
-With bloom filter                           20060 / 20626          5.0         200.6       0.8X
+Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                              19110          19145          50          5.2         191.1       1.0X
+With bloom filter                                 21890          21908          25          4.6         218.9       0.9X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Read a row from 100M rows:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Without bloom filter                          1857 / 1904         53.9          18.6       1.0X
-With bloom filter                             1399 / 1437         71.5          14.0       1.3X
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                               1724           1746          30         58.0          17.2       1.0X
+With bloom filter                                  1364           1376          17         73.3          13.6       1.3X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..bc13f6e9b9424
--- /dev/null
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
@@ -0,0 +1,60 @@
+================================================================================================
+Parquet writer benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet writer benchmark:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2552           2690         195          6.2         162.2       1.0X
+Output Single Double Column                        2865           2892          38          5.5         182.2       0.9X
+Output Int and String Column                       7876           7885          12          2.0         500.7       0.3X
+Output Partitions                                  5079           5871        1120          3.1         322.9       0.5X
+Output Buckets                                     6980           6994          20          2.3         443.8       0.4X
+
+
+================================================================================================
+ORC writer benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           1799           1902         146          8.7         114.4       1.0X
+Output Single Double Column                        2268           2276          11          6.9         144.2       0.8X
+Output Int and String Column                       6650           6670          28          2.4         422.8       0.3X
+Output Partitions                                  4697           4719          31          3.3         298.6       0.4X
+Output Buckets                                     6394           6436          60          2.5         406.5       0.3X
+
+
+================================================================================================
+JSON writer benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2778           3522        1052          5.7         176.6       1.0X
+Output Single Double Column                        4222           4269          67          3.7         268.4       0.7X
+Output Int and String Column                      10822          10845          33          1.5         688.0       0.3X
+Output Partitions                                  5450           5523         104          2.9         346.5       0.5X
+Output Buckets                                    10827          11622        1123          1.5         688.4       0.3X
+
+
+================================================================================================
+CSV writer benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           3649           3698          68          4.3         232.0       1.0X
+Output Single Double Column                        4612           4696         120          3.4         293.2       0.8X
+Output Int and String Column                       7334           7517         258          2.1         466.3       0.5X
+Output Partitions                                  6386           6541         220          2.5         406.0       0.6X
+Output Buckets                                     8692           9439        1057          1.8         552.6       0.4X
+
+
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
index 9d656fc10dce4..1380a6158954a 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
@@ -2,59 +2,59 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet writer benchmark:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      2354 / 2438          6.7         149.7       1.0X
-Output Single Double Column                   2462 / 2485          6.4         156.5       1.0X
-Output Int and String Column                  8083 / 8100          1.9         513.9       0.3X
-Output Partitions                             5015 / 5027          3.1         318.8       0.5X
-Output Buckets                                6883 / 6887          2.3         437.6       0.3X
+Parquet writer benchmark:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2537           2594          81          6.2         161.3       1.0X
+Output Single Double Column                        2615           2645          42          6.0         166.3       1.0X
+Output Int and String Column                       8931           8933           4          1.8         567.8       0.3X
+Output Partitions                                  5011           5321         438          3.1         318.6       0.5X
+Output Buckets                                     6789           6793           6          2.3         431.6       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ORC writer benchmark:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      1769 / 1789          8.9         112.4       1.0X
-Output Single Double Column                   1989 / 2009          7.9         126.5       0.9X
-Output Int and String Column                  7323 / 7400          2.1         465.6       0.2X
-Output Partitions                             4374 / 4381          3.6         278.1       0.4X
-Output Buckets                                6086 / 6104          2.6         386.9       0.3X
+ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           1780           1937         222          8.8         113.2       1.0X
+Output Single Double Column                        2248           2265          23          7.0         142.9       0.8X
+Output Int and String Column                       7996           8031          49          2.0         508.4       0.2X
+Output Partitions                                  4827           4939         159          3.3         306.9       0.4X
+Output Buckets                                     6521           6595         104          2.4         414.6       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-JSON writer benchmark:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      2954 / 4085          5.3         187.8       1.0X
-Output Single Double Column                   3832 / 3837          4.1         243.6       0.8X
-Output Int and String Column                 9591 / 10336          1.6         609.8       0.3X
-Output Partitions                             4956 / 4994          3.2         315.1       0.6X
-Output Buckets                                6608 / 6676          2.4         420.1       0.4X
+JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2791           2823          45          5.6         177.5       1.0X
+Output Single Double Column                        4078           5089        1431          3.9         259.3       0.7X
+Output Int and String Column                      10249          10296          66          1.5         651.6       0.3X
+Output Partitions                                  5247           5296          69          3.0         333.6       0.5X
+Output Buckets                                     8168           9714        2187          1.9         519.3       0.3X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-CSV writer benchmark:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      4118 / 4125          3.8         261.8       1.0X
-Output Single Double Column                   4888 / 4891          3.2         310.8       0.8X
-Output Int and String Column                  9788 / 9872          1.6         622.3       0.4X
-Output Partitions                             6578 / 6640          2.4         418.2       0.6X
-Output Buckets                                9125 / 9171          1.7         580.2       0.5X
+CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           4055           4200         205          3.9         257.8       1.0X
+Output Single Double Column                        4978           5010          45          3.2         316.5       0.8X
+Output Int and String Column                       9285           9294          13          1.7         590.3       0.4X
+Output Partitions                                  6681           6738          80          2.4         424.8       0.6X
+Output Buckets                                     9495          10052         787          1.7         603.7       0.4X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..d8071e7bbdb35
--- /dev/null
+++ b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
@@ -0,0 +1,67 @@
+================================================================================================
+Benchmark to measure CSV read/write performance
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+One quoted string                                 44297          44515         373          0.0      885948.7       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 1000 columns                              196720         197783        1560          0.0      196719.8       1.0X
+Select 100 columns                                46691          46861         219          0.0       46691.4       4.2X
+Select one column                                 36811          36922         111          0.0       36811.3       5.3X
+count()                                            8520           8610         106          0.1        8520.5      23.1X
+Select 100 columns, one bad input field           67914          67994         136          0.0       67914.0       2.9X
+Select 100 columns, corrupt record field          77272          77445         214          0.0       77272.0       2.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 10 columns + count()                       25965          26054         103          0.4        2596.5       1.0X
+Select 1 column + count()                         18591          18666          91          0.5        1859.1       1.4X
+count()                                            6102           6119          18          1.6         610.2       4.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Create a dataset of timestamps                     2142           2161          17          4.7         214.2       1.0X
+to_csv(timestamp)                                 14744          14950         182          0.7        1474.4       0.1X
+write timestamps to files                         12078          12202         175          0.8        1207.8       0.2X
+Create a dataset of dates                          2275           2291          18          4.4         227.5       0.9X
+to_csv(date)                                      11407          11464          51          0.9        1140.7       0.2X
+write dates to files                               7638           7702          90          1.3         763.8       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                     2578           2590          10          3.9         257.8       1.0X
+read timestamps from files                        60103          60694         512          0.2        6010.3       0.0X
+infer timestamps from files                      107871         108268         351          0.1       10787.1       0.0X
+read date text from files                          2306           2310           4          4.3         230.6       1.1X
+read date from files                              47415          47657         367          0.2        4741.5       0.1X
+infer date from files                             35261          35447         164          0.3        3526.1       0.1X
+timestamp strings                                  3045           3056          11          3.3         304.5       0.8X
+parse timestamps from Dataset[String]             62221          63173         849          0.2        6222.1       0.0X
+infer timestamps from Dataset[String]            118838         119629         697          0.1       11883.8       0.0X
+date strings                                       3459           3481          19          2.9         345.9       0.7X
+parse dates from Dataset[String]                  51026          51447         503          0.2        5102.6       0.1X
+from_csv(timestamp)                               60738          61818         936          0.2        6073.8       0.0X
+from_csv(date)                                    46012          46278         370          0.2        4601.2       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10 on Mac OS X 10.15.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+w/o filters                                       11889          11945          52          0.0      118893.1       1.0X
+pushdown disabled                                 11790          11860         115          0.0      117902.3       1.0X
+w/ filters                                         1240           1278          33          0.1       12400.8       9.6X
+
+
diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt
index 888c2ce9f2845..b3ba69c9eb6b1 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,58 +2,66 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 36998          37134         120          0.0      739953.1       1.0X
+One quoted string                                 51602          51659          59          0.0     1032039.4       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                              140620         141162         737          0.0      140620.5       1.0X
-Select 100 columns                                35170          35287         183          0.0       35170.0       4.0X
-Select one column                                 27711          27927         187          0.0       27710.9       5.1X
-count()                                            7707           7804          84          0.1        7707.4      18.2X
-Select 100 columns, one bad input field           41762          41851         117          0.0       41761.8       3.4X
-Select 100 columns, corrupt record field          48717          48761          44          0.0       48717.4       2.9X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Select 1000 columns                              191926         192879        1615          0.0      191925.6       1.0X
+Select 100 columns                                46766          46846          69          0.0       46766.1       4.1X
+Select one column                                 35877          35930          83          0.0       35876.8       5.3X
+count()                                           11186          11262          65          0.1       11186.0      17.2X
+Select 100 columns, one bad input field           59943          60107         232          0.0       59943.0       3.2X
+Select 100 columns, corrupt record field          73062          73406         479          0.0       73062.2       2.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       16001          16053          53          0.6        1600.1       1.0X
-Select 1 column + count()                         11571          11614          58          0.9        1157.1       1.4X
-count()                                            4752           4766          18          2.1         475.2       3.4X
+Select 10 columns + count()                       22389          22447          87          0.4        2238.9       1.0X
+Select 1 column + count()                         14844          14890          43          0.7        1484.4       1.5X
+count()                                            5519           5538          18          1.8         551.9       4.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1070           1072           2          9.3         107.0       1.0X
-to_csv(timestamp)                                 10446          10746         344          1.0        1044.6       0.1X
-write timestamps to files                          9573           9659         101          1.0         957.3       0.1X
-Create a dataset of dates                          1245           1260          17          8.0         124.5       0.9X
-to_csv(date)                                       7157           7167          11          1.4         715.7       0.1X
-write dates to files                               5415           5450          57          1.8         541.5       0.2X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Create a dataset of timestamps                     1949           1977          25          5.1         194.9       1.0X
+to_csv(timestamp)                                 14944          15702         714          0.7        1494.4       0.1X
+write timestamps to files                         12983          12998          14          0.8        1298.3       0.2X
+Create a dataset of dates                          2156           2164           7          4.6         215.6       0.9X
+to_csv(date)                                       9675           9709          41          1.0         967.5       0.2X
+write dates to files                               7880           7897          15          1.3         788.0       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     1880           1887           8          5.3         188.0       1.0X
-read timestamps from files                        27135          27180          43          0.4        2713.5       0.1X
-infer timestamps from files                       51426          51534          97          0.2        5142.6       0.0X
-read date text from files                          1618           1622           4          6.2         161.8       1.2X
-read date from files                              20207          20218          13          0.5        2020.7       0.1X
-infer date from files                             19418          19479          94          0.5        1941.8       0.1X
-timestamp strings                                  2289           2300          13          4.4         228.9       0.8X
-parse timestamps from Dataset[String]             29367          29391          24          0.3        2936.7       0.1X
-infer timestamps from Dataset[String]             54782          54902         126          0.2        5478.2       0.0X
-date strings                                       2508           2524          16          4.0         250.8       0.7X
-parse dates from Dataset[String]                  21884          21902          19          0.5        2188.4       0.1X
-from_csv(timestamp)                               27188          27723         477          0.4        2718.8       0.1X
-from_csv(date)                                    21137          21191          84          0.5        2113.7       0.1X
+read timestamp text from files                     2235           2245          10          4.5         223.5       1.0X
+read timestamps from files                        54490          54690         283          0.2        5449.0       0.0X
+infer timestamps from files                      104501         104737         236          0.1       10450.1       0.0X
+read date text from files                          2035           2040           6          4.9         203.5       1.1X
+read date from files                              39650          39707          52          0.3        3965.0       0.1X
+infer date from files                             29235          29363         164          0.3        2923.5       0.1X
+timestamp strings                                  3412           3426          18          2.9         341.2       0.7X
+parse timestamps from Dataset[String]             66864          67804         981          0.1        6686.4       0.0X
+infer timestamps from Dataset[String]            118780         119284         837          0.1       11878.0       0.0X
+date strings                                       3730           3734           4          2.7         373.0       0.6X
+parse dates from Dataset[String]                  48728          49071         309          0.2        4872.8       0.0X
+from_csv(timestamp)                               62294          62493         260          0.2        6229.4       0.0X
+from_csv(date)                                    44581          44665         117          0.2        4458.1       0.1X
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_231-b11 on Mac OS X 10.15.2
+Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+w/o filters                                       12557          12634          78          0.0      125572.9       1.0X
+pushdown disabled                                 12449          12509          65          0.0      124486.4       1.0X
+w/ filters                                         1372           1393          18          0.1       13724.8       9.1X
 
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..d5dcdab6b942a
--- /dev/null
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt
@@ -0,0 +1,59 @@
+================================================================================================
+Int Read/Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Java Array                                          233            234           0       1406.0           0.7       1.0X
+ByteBuffer Unsafe                                   396            397           0        826.5           1.2       0.6X
+ByteBuffer API                                      603            605           3        543.0           1.8       0.4X
+DirectByteBuffer                                   1189           1196           9        275.6           3.6       0.2X
+Unsafe Buffer                                       321            324           3       1022.0           1.0       0.7X
+Column(on heap)                                     235            236           2       1393.1           0.7       1.0X
+Column(off heap)                                    524            526           1        624.8           1.6       0.4X
+Column(off heap direct)                             317            318           1       1034.1           1.0       0.7X
+UnsafeRow (on heap)                                 690            693           3        474.8           2.1       0.3X
+UnsafeRow (off heap)                                642            646           3        510.1           2.0       0.4X
+Column On Heap Append                               522            527           4        628.0           1.6       0.4X
+
+
+================================================================================================
+Boolean Read/Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Bitset                                              778            780           3        431.1           2.3       1.0X
+Byte Array                                          355            356           1        944.9           1.1       2.2X
+
+
+================================================================================================
+String Read/Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+On Heap                                             447            453           5         36.6          27.3       1.0X
+Off Heap                                            652            658           4         25.1          39.8       0.7X
+
+
+================================================================================================
+Array Vector Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+On Heap Read Size Only                              101            102           0       1616.4           0.6       1.0X
+Off Heap Read Size Only                             305            307           2        536.8           1.9       0.3X
+On Heap Read Elements                              4255           4256           1         38.5          26.0       0.0X
+Off Heap Read Elements                             6097           6104          10         26.9          37.2       0.0X
+
+
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
index 59637162f0a1d..8ae21d7a7e653 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Int Read/Write:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Java Array                                     244 /  244       1342.3           0.7       1.0X
-ByteBuffer Unsafe                              445 /  445        736.5           1.4       0.5X
-ByteBuffer API                                2124 / 2125        154.3           6.5       0.1X
-DirectByteBuffer                               750 /  750        437.2           2.3       0.3X
-Unsafe Buffer                                  234 /  236       1401.3           0.7       1.0X
-Column(on heap)                                245 /  245       1335.6           0.7       1.0X
-Column(off heap)                               489 /  489        670.3           1.5       0.5X
-Column(off heap direct)                        236 /  236       1388.1           0.7       1.0X
-UnsafeRow (on heap)                            532 /  534        616.0           1.6       0.5X
-UnsafeRow (off heap)                           564 /  565        580.7           1.7       0.4X
-Column On Heap Append                          489 /  489        670.6           1.5       0.5X
+Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Java Array                                          246            246           0       1333.2           0.8       1.0X
+ByteBuffer Unsafe                                   446            447           1        734.6           1.4       0.6X
+ByteBuffer API                                     2141           2144           4        153.0           6.5       0.1X
+DirectByteBuffer                                    745            745           0        439.9           2.3       0.3X
+Unsafe Buffer                                       237            240           1       1379.7           0.7       1.0X
+Column(on heap)                                     247            247           0       1327.3           0.8       1.0X
+Column(off heap)                                    487            488           1        673.1           1.5       0.5X
+Column(off heap direct)                             242            243           1       1354.1           0.7       1.0X
+UnsafeRow (on heap)                                 541            543           1        605.4           1.7       0.5X
+UnsafeRow (off heap)                                581            583           2        563.7           1.8       0.4X
+Column On Heap Append                               520            521           1        629.8           1.6       0.5X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Boolean Read/Write:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Bitset                                         879 /  879        381.9           2.6       1.0X
-Byte Array                                     794 /  794        422.6           2.4       1.1X
+Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Bitset                                              883            883           1        380.1           2.6       1.0X
+Byte Array                                          775            777           2        432.9           2.3       1.1X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String Read/Write:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-On Heap                                        449 /  449         36.5          27.4       1.0X
-Off Heap                                       679 /  679         24.1          41.4       0.7X
+String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+On Heap                                             463            470          10         35.4          28.3       1.0X
+Off Heap                                            676            677           2         24.2          41.2       0.7X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Array Vector Read:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-On Heap Read Size Only                         713 /  713        229.8           4.4       1.0X
-Off Heap Read Size Only                        757 /  757        216.5           4.6       0.9X
-On Heap Read Elements                         3648 / 3650         44.9          22.3       0.2X
-Off Heap Read Elements                        5263 / 5265         31.1          32.1       0.1X
+Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+On Heap Read Size Only                              422            422           0        388.7           2.6       1.0X
+Off Heap Read Size Only                             259            259           0        633.2           1.6       1.6X
+On Heap Read Elements                              3932           3960          39         41.7          24.0       0.1X
+Off Heap Read Elements                             5143           5173          43         31.9          31.4       0.1X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..4fd57a9e95560
--- /dev/null
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
@@ -0,0 +1,137 @@
+================================================================================================
+Compression Scheme Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    3              3           0      21087.3           0.0       1.0X
+RunLengthEncoding(2.514)                            739            739           1         90.8          11.0       0.0X
+BooleanBitSet(0.125)                                378            379           1        177.4           5.6       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                         147            147           1        456.1           2.2       1.0X
+RunLengthEncoding                                   731            732           1         91.8          10.9       0.2X
+BooleanBitSet                                      1410           1411           1         47.6          21.0       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    7              7           0       9729.9           0.1       1.0X
+RunLengthEncoding(1.491)                           1576           1576           1         42.6          23.5       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1151           1152           1         58.3          17.2       1.0X
+RunLengthEncoding                                  1619           1621           3         41.4          24.1       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    7              7           0      10135.7           0.1       1.0X
+RunLengthEncoding(2.010)                           1659           1660           0         40.4          24.7       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1151           1151           1         58.3          17.2       1.0X
+RunLengthEncoding                                  1655           1655           0         40.5          24.7       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   23             23           0       2952.8           0.3       1.0X
+RunLengthEncoding(0.997)                           2356           2356           0         28.5          35.1       0.0X
+DictionaryEncoding(0.500)                          1402           1402           0         47.9          20.9       0.0X
+IntDelta(0.250)                                     213            213           0        315.2           3.2       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1319           1319           1         50.9          19.7       1.0X
+RunLengthEncoding                                  1803           1806           5         37.2          26.9       0.7X
+DictionaryEncoding                                  931            931           0         72.1          13.9       1.4X
+IntDelta                                            817            821           4         82.2          12.2       1.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   23             23           0       2976.8           0.3       1.0X
+RunLengthEncoding(1.337)                           2552           2552           1         26.3          38.0       0.0X
+DictionaryEncoding(0.501)                          1377           1377           0         48.7          20.5       0.0X
+IntDelta(0.250)                                     213            214           2        315.3           3.2       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1438           1439           1         46.7          21.4       1.0X
+RunLengthEncoding                                  1987           1988           1         33.8          29.6       0.7X
+DictionaryEncoding                                 1249           1250           0         53.7          18.6       1.2X
+IntDelta                                           1135           1136           3         59.2          16.9       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   45             45           0       1487.9           0.7       1.0X
+RunLengthEncoding(0.750)                           2496           2496           1         26.9          37.2       0.0X
+DictionaryEncoding(0.250)                          1433           1433           1         46.8          21.4       0.0X
+LongDelta(0.125)                                    215            215           0        312.6           3.2       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1468           1469           1         45.7          21.9       1.0X
+RunLengthEncoding                                  1977           1981           6         33.9          29.5       0.7X
+DictionaryEncoding                                 1248           1250           3         53.8          18.6       1.2X
+LongDelta                                           838            840           2         80.1          12.5       1.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   47             47           0       1437.2           0.7       1.0X
+RunLengthEncoding(1.002)                           2743           2744           0         24.5          40.9       0.0X
+DictionaryEncoding(0.251)                          2016           2016           0         33.3          30.0       0.0X
+LongDelta(0.125)                                    215            217           5        312.1           3.2       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1468           1468           0         45.7          21.9       1.0X
+RunLengthEncoding                                  2020           2021           2         33.2          30.1       0.7X
+DictionaryEncoding                                 1248           1248           0         53.8          18.6       1.2X
+LongDelta                                          1131           1134           4         59.4          16.8       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   71             71           0        939.6           1.1       1.0X
+RunLengthEncoding(0.890)                           6050           6052           2         11.1          90.2       0.0X
+DictionaryEncoding(0.167)                          3723           3725           2         18.0          55.5       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        2804           2815          16         23.9          41.8       1.0X
+RunLengthEncoding                                  3390           3391           1         19.8          50.5       0.8X
+DictionaryEncoding                                 2901           2905           5         23.1          43.2       1.0X
+
+
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
index caa9378301f5d..3f6fbe35a7b86 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-BOOLEAN Encode:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                               4 /    4      17998.9           0.1       1.0X
-RunLengthEncoding(2.501)                       680 /  680         98.7          10.1       0.0X
-BooleanBitSet(0.125)                           365 /  365        183.9           5.4       0.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    3              3           0      21114.6           0.0       1.0X
+RunLengthEncoding(2.505)                            694            696           4         96.7          10.3       0.0X
+BooleanBitSet(0.125)                                366            366           0        183.4           5.5       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-BOOLEAN Decode:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                    144 /  144        466.5           2.1       1.0X
-RunLengthEncoding                              679 /  679         98.9          10.1       0.2X
-BooleanBitSet                                 1425 / 1431         47.1          21.2       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                         145            145           0        464.2           2.2       1.0X
+RunLengthEncoding                                   735            735           0         91.3          10.9       0.2X
+BooleanBitSet                                      1437           1437           1         46.7          21.4       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SHORT Encode (Lower Skew):               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                               7 /    7      10115.0           0.1       1.0X
-RunLengthEncoding(1.494)                      1671 / 1672         40.2          24.9       0.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SHORT Decode (Lower Skew):               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1128 / 1128         59.5          16.8       1.0X
-RunLengthEncoding                             1630 / 1633         41.2          24.3       0.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SHORT Encode (Higher Skew):              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                               7 /    7      10164.2           0.1       1.0X
-RunLengthEncoding(1.989)                      1562 / 1563         43.0          23.3       0.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SHORT Decode (Higher Skew):              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1127 / 1127         59.6          16.8       1.0X
-RunLengthEncoding                             1629 / 1631         41.2          24.3       0.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-INT Encode (Lower Skew):                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              22 /   23       2983.2           0.3       1.0X
-RunLengthEncoding(1.003)                      2426 / 2427         27.7          36.1       0.0X
-DictionaryEncoding(0.500)                      958 /  958         70.1          14.3       0.0X
-IntDelta(0.250)                                286 /  286        235.0           4.3       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-INT Decode (Lower Skew):                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1268 / 1269         52.9          18.9       1.0X
-RunLengthEncoding                             1906 / 1911         35.2          28.4       0.7X
-DictionaryEncoding                             981 /  982         68.4          14.6       1.3X
-IntDelta                                       812 /  817         82.6          12.1       1.6X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-INT Encode (Higher Skew):                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              23 /   23       2926.9           0.3       1.0X
-RunLengthEncoding(1.326)                      2614 / 2614         25.7          38.9       0.0X
-DictionaryEncoding(0.501)                     1024 / 1024         65.5          15.3       0.0X
-IntDelta(0.250)                                286 /  286        234.7           4.3       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-INT Decode (Higher Skew):                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1433 / 1433         46.8          21.4       1.0X
-RunLengthEncoding                             1923 / 1926         34.9          28.6       0.7X
-DictionaryEncoding                            1285 / 1285         52.2          19.2       1.1X
-IntDelta                                      1129 / 1137         59.4          16.8       1.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-LONG Encode (Lower Skew):                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              45 /   45       1495.6           0.7       1.0X
-RunLengthEncoding(0.738)                      2662 / 2663         25.2          39.7       0.0X
-DictionaryEncoding(0.250)                     1269 / 1269         52.9          18.9       0.0X
-LongDelta(0.125)                               450 /  450        149.1           6.7       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-LONG Decode (Lower Skew):                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1483 / 1483         45.3          22.1       1.0X
-RunLengthEncoding                             1875 / 1875         35.8          27.9       0.8X
-DictionaryEncoding                            1213 / 1214         55.3          18.1       1.2X
-LongDelta                                      816 /  817         82.2          12.2       1.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-LONG Encode (Higher Skew):               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              45 /   45       1489.3           0.7       1.0X
-RunLengthEncoding(1.003)                      2906 / 2906         23.1          43.3       0.0X
-DictionaryEncoding(0.251)                     1610 / 1610         41.7          24.0       0.0X
-LongDelta(0.125)                               451 /  451        148.7           6.7       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-LONG Decode (Higher Skew):               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1485 / 1485         45.2          22.1       1.0X
-RunLengthEncoding                             1889 / 1890         35.5          28.2       0.8X
-DictionaryEncoding                            1215 / 1216         55.2          18.1       1.2X
-LongDelta                                     1107 / 1110         60.6          16.5       1.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-STRING Encode:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              67 /   68        994.5           1.0       1.0X
-RunLengthEncoding(0.894)                      5877 / 5882         11.4          87.6       0.0X
-DictionaryEncoding(0.167)                     3597 / 3602         18.7          53.6       0.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-STRING Decode:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   3243 / 3244         20.7          48.3       1.0X
-RunLengthEncoding                             3598 / 3601         18.7          53.6       0.9X
-DictionaryEncoding                            3182 / 3182         21.1          47.4       1.0X
+SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    7              7           0       9336.6           0.1       1.0X
+RunLengthEncoding(1.494)                           1912           1917           7         35.1          28.5       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1104           1104           0         60.8          16.4       1.0X
+RunLengthEncoding                                  1627           1628           0         41.2          24.3       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    7              7           0       9710.6           0.1       1.0X
+RunLengthEncoding(2.003)                           2021           2027           9         33.2          30.1       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1104           1104           0         60.8          16.5       1.0X
+RunLengthEncoding                                  1621           1621           0         41.4          24.1       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   24             24           0       2854.3           0.4       1.0X
+RunLengthEncoding(1.005)                           2395           2396           2         28.0          35.7       0.0X
+DictionaryEncoding(0.500)                          1366           1366           0         49.1          20.3       0.0X
+IntDelta(0.250)                                     286            287           0        234.2           4.3       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1248           1248           0         53.8          18.6       1.0X
+RunLengthEncoding                                  1738           1739           2         38.6          25.9       0.7X
+DictionaryEncoding                                  969            970           0         69.2          14.4       1.3X
+IntDelta                                            777            779           1         86.3          11.6       1.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   23             23           0       2874.4           0.3       1.0X
+RunLengthEncoding(1.334)                           2581           2581           0         26.0          38.5       0.0X
+DictionaryEncoding(0.501)                          1490           1490           0         45.0          22.2       0.0X
+IntDelta(0.250)                                     286            286           0        234.5           4.3       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1389           1389           0         48.3          20.7       1.0X
+RunLengthEncoding                                  1903           1903           0         35.3          28.4       0.7X
+DictionaryEncoding                                 1231           1232           1         54.5          18.3       1.1X
+IntDelta                                           1103           1108           7         60.8          16.4       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   48             48           0       1405.2           0.7       1.0X
+RunLengthEncoding(0.757)                           2525           2525           1         26.6          37.6       0.0X
+DictionaryEncoding(0.250)                          1380           1381           1         48.6          20.6       0.0X
+LongDelta(0.125)                                    474            474           0        141.7           7.1       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1348           1349           0         49.8          20.1       1.0X
+RunLengthEncoding                                  1850           1851           2         36.3          27.6       0.7X
+DictionaryEncoding                                 1190           1192           3         56.4          17.7       1.1X
+LongDelta                                           801            801           0         83.8          11.9       1.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   46             46           0       1451.2           0.7       1.0X
+RunLengthEncoding(1.003)                           2742           2743           1         24.5          40.9       0.0X
+DictionaryEncoding(0.251)                          1714           1715           0         39.1          25.5       0.0X
+LongDelta(0.125)                                    476            476           0        140.9           7.1       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1362           1363           1         49.3          20.3       1.0X
+RunLengthEncoding                                  1862           1863           1         36.0          27.7       0.7X
+DictionaryEncoding                                 1190           1192           3         56.4          17.7       1.1X
+LongDelta                                          1079           1082           4         62.2          16.1       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   67             67           0        994.8           1.0       1.0X
+RunLengthEncoding(0.888)                           6135           6137           2         10.9          91.4       0.0X
+DictionaryEncoding(0.167)                          3747           3748           0         17.9          55.8       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        3180           3185           8         21.1          47.4       1.0X
+RunLengthEncoding                                  3658           3660           3         18.3          54.5       0.9X
+DictionaryEncoding                                 3292           3295           4         20.4          49.1       1.0X
 
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..595f69c8e1407
--- /dev/null
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
@@ -0,0 +1,252 @@
+================================================================================================
+SQL Single Numeric Column Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           28338          28589         356          0.6        1801.7       1.0X
+SQL Json                                           9273           9332          83          1.7         589.6       3.1X
+SQL Parquet Vectorized                              186            217          22         84.3          11.9     152.0X
+SQL Parquet MR                                     1951           1972          29          8.1         124.1      14.5X
+SQL ORC Vectorized                                  256            277          22         61.4          16.3     110.6X
+SQL ORC MR                                         1627           1717         127          9.7         103.4      17.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single TINYINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            208            223          13         75.8          13.2       1.0X
+ParquetReader Vectorized -> Row                      96             97           1        164.1           6.1       2.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           28493          28516          33          0.6        1811.5       1.0X
+SQL Json                                          10257          10291          47          1.5         652.1       2.8X
+SQL Parquet Vectorized                              215            233          14         73.2          13.7     132.5X
+SQL Parquet MR                                     2384           2388           7          6.6         151.5      12.0X
+SQL ORC Vectorized                                  298            307           7         52.8          18.9      95.6X
+SQL ORC MR                                         1798           1814          22          8.7         114.3      15.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            286            293           6         54.9          18.2       1.0X
+ParquetReader Vectorized -> Row                     154            179          57        102.3           9.8       1.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           30821          30902         114          0.5        1959.5       1.0X
+SQL Json                                          10935          10944          13          1.4         695.3       2.8X
+SQL Parquet Vectorized                              203            213          12         77.6          12.9     152.1X
+SQL Parquet MR                                     2334           2351          24          6.7         148.4      13.2X
+SQL ORC Vectorized                                  281            286           4         56.0          17.9     109.6X
+SQL ORC MR                                         1943           2022         112          8.1         123.5      15.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single INT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            284            291           9         55.5          18.0       1.0X
+ParquetReader Vectorized -> Row                     277            281           6         56.8          17.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           38264          38306          60          0.4        2432.7       1.0X
+SQL Json                                          14369          14371           3          1.1         913.6       2.7X
+SQL Parquet Vectorized                              313            319           6         50.3          19.9     122.3X
+SQL Parquet MR                                     2581           2602          30          6.1         164.1      14.8X
+SQL ORC Vectorized                                  423            432           9         37.2          26.9      90.4X
+SQL ORC MR                                         2108           2142          49          7.5         134.0      18.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single BIGINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            401            409           8         39.2          25.5       1.0X
+ParquetReader Vectorized -> Row                     392            400          15         40.2          24.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           36276          36300          34          0.4        2306.4       1.0X
+SQL Json                                          13691          14374         967          1.1         870.4       2.6X
+SQL Parquet Vectorized                              193            198           5         81.6          12.3     188.2X
+SQL Parquet MR                                     2361           2389          40          6.7         150.1      15.4X
+SQL ORC Vectorized                                  430            434           4         36.6          27.3      84.4X
+SQL ORC MR                                         2037           2072          50          7.7         129.5      17.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single FLOAT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            277            284          10         56.8          17.6       1.0X
+ParquetReader Vectorized -> Row                     274            276           4         57.5          17.4       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           39757          39761           5          0.4        2527.7       1.0X
+SQL Json                                          20049          20052           5          0.8        1274.7       2.0X
+SQL Parquet Vectorized                              310            318          10         50.7          19.7     128.3X
+SQL Parquet MR                                     2535           2571          52          6.2         161.2      15.7X
+SQL ORC Vectorized                                  537            543           8         29.3          34.1      74.1X
+SQL ORC MR                                         2132           2161          41          7.4         135.6      18.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single DOUBLE Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            390            394           5         40.3          24.8       1.0X
+ParquetReader Vectorized -> Row                     389            391           5         40.5          24.7       1.0X
+
+
+================================================================================================
+Int and String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           27215          27240          34          0.4        2595.5       1.0X
+SQL Json                                          12713          12783         100          0.8        1212.4       2.1X
+SQL Parquet Vectorized                             2265           2269           5          4.6         216.0      12.0X
+SQL Parquet MR                                     4477           4544          95          2.3         426.9       6.1X
+SQL ORC Vectorized                                 2388           2404          23          4.4         227.7      11.4X
+SQL ORC MR                                         4295           4305          15          2.4         409.6       6.3X
+
+
+================================================================================================
+Repeated String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           17544          17580          51          0.6        1673.1       1.0X
+SQL Json                                           8277           8328          71          1.3         789.4       2.1X
+SQL Parquet Vectorized                              674            682           7         15.6          64.3      26.0X
+SQL Parquet MR                                     1960           1972          17          5.3         187.0       8.9X
+SQL ORC Vectorized                                  551            558          11         19.0          52.6      31.8X
+SQL ORC MR                                         2047           2052           6          5.1         195.2       8.6X
+
+
+================================================================================================
+Partitioned Table Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column - CSV                                 40273          40290          24          0.4        2560.5       1.0X
+Data column - Json                                14420          14440          28          1.1         916.8       2.8X
+Data column - Parquet Vectorized                    336            342           6         46.8          21.4     119.8X
+Data column - Parquet MR                           2651           2652           2          5.9         168.5      15.2X
+Data column - ORC Vectorized                        444            451           9         35.4          28.2      90.7X
+Data column - ORC MR                               2342           2356          20          6.7         148.9      17.2X
+Partition column - CSV                            11307          11310           4          1.4         718.9       3.6X
+Partition column - Json                           12105          12115          14          1.3         769.6       3.3X
+Partition column - Parquet Vectorized                87             97          13        181.2           5.5     464.0X
+Partition column - Parquet MR                      1364           1368           7         11.5          86.7      29.5X
+Partition column - ORC Vectorized                    83             97          13        189.0           5.3     484.1X
+Partition column - ORC MR                          1424           1437          19         11.0          90.5      28.3X
+Both columns - CSV                                41896          42166         381          0.4        2663.7       1.0X
+Both columns - Json                               15852          15871          27          1.0        1007.8       2.5X
+Both columns - Parquet Vectorized                   379            383           5         41.5          24.1     106.2X
+Both columns - Parquet MR                          2889           2916          38          5.4         183.7      13.9X
+Both columns - ORC Vectorized                       581            582           2         27.1          36.9      69.3X
+Both columns - ORC MR                              2626           2641          22          6.0         166.9      15.3X
+
+
+================================================================================================
+String with Nulls Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           20831          21141         439          0.5        1986.6       1.0X
+SQL Json                                          11720          11721           1          0.9        1117.7       1.8X
+SQL Parquet Vectorized                             1470           1475           7          7.1         140.2      14.2X
+SQL Parquet MR                                     3902           3902           0          2.7         372.1       5.3X
+ParquetReader Vectorized                           1074           1077           4          9.8         102.5      19.4X
+SQL ORC Vectorized                                 1289           1334          64          8.1         122.9      16.2X
+SQL ORC MR                                         3603           3612          13          2.9         343.6       5.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           21850          21910          85          0.5        2083.8       1.0X
+SQL Json                                           8651           8668          24          1.2         825.0       2.5X
+SQL Parquet Vectorized                             1079           1090          16          9.7         102.9      20.3X
+SQL Parquet MR                                     2906           2925          27          3.6         277.1       7.5X
+ParquetReader Vectorized                            951            954           4         11.0          90.7      23.0X
+SQL ORC Vectorized                                 1246           1250           5          8.4         118.8      17.5X
+SQL ORC MR                                         3146           3162          22          3.3         300.1       6.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           18993          19140         209          0.6        1811.3       1.0X
+SQL Json                                           5467           5469           2          1.9         521.4       3.5X
+SQL Parquet Vectorized                              240            248          10         43.8          22.8      79.3X
+SQL Parquet MR                                     1745           1753          12          6.0         166.4      10.9X
+ParquetReader Vectorized                            240            244           5         43.7          22.9      79.1X
+SQL ORC Vectorized                                  496            500           4         21.1          47.3      38.3X
+SQL ORC MR                                         1822           1827           8          5.8         173.7      10.4X
+
+
+================================================================================================
+Single Column Scan From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                            3907           3911           6          0.3        3726.3       1.0X
+SQL Json                                           3755           3763          12          0.3        3581.2       1.0X
+SQL Parquet Vectorized                               68             71           6         15.4          64.8      57.5X
+SQL Parquet MR                                      234            239           5          4.5         223.0      16.7X
+SQL ORC Vectorized                                   74             77           5         14.2          70.4      52.9X
+SQL ORC MR                                          203            204           2          5.2         193.3      19.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                            7909           7927          25          0.1        7542.9       1.0X
+SQL Json                                          15014          15101         123          0.1       14318.8       0.5X
+SQL Parquet Vectorized                              105            128          22         10.0         100.0      75.4X
+SQL Parquet MR                                      275            283           9          3.8         261.9      28.8X
+SQL ORC Vectorized                                  104            116           9         10.1          98.9      76.3X
+SQL ORC MR                                          234            245          12          4.5         223.0      33.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           13033          13129         136          0.1       12429.1       1.0X
+SQL Json                                          28298          29130        1176          0.0       26987.3       0.5X
+SQL Parquet Vectorized                              139            151           9          7.5         132.7      93.7X
+SQL Parquet MR                                      314            322           7          3.3         299.5      41.5X
+SQL ORC Vectorized                                  123            143          17          8.5         117.3     106.0X
+SQL ORC MR                                          260            272           9          4.0         248.1      50.1X
+
+
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
index f547f61654b5f..40e8dfc77c712 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
@@ -2,251 +2,251 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     26366 / 26562          0.6        1676.3       1.0X
-SQL Json                                      8709 / 8724          1.8         553.7       3.0X
-SQL Parquet Vectorized                         166 /  187         94.8          10.5     159.0X
-SQL Parquet MR                                1706 / 1720          9.2         108.4      15.5X
-SQL ORC Vectorized                             167 /  174         94.2          10.6     157.9X
-SQL ORC MR                                    1433 / 1465         11.0          91.1      18.4X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single TINYINT Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       200 /  207         78.7          12.7       1.0X
-ParquetReader Vectorized -> Row                117 /  119        134.7           7.4       1.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     26489 / 26547          0.6        1684.1       1.0X
-SQL Json                                      8990 / 8998          1.7         571.5       2.9X
-SQL Parquet Vectorized                         209 /  221         75.1          13.3     126.5X
-SQL Parquet MR                                1949 / 1949          8.1         123.9      13.6X
-SQL ORC Vectorized                             221 /  228         71.3          14.0     120.1X
-SQL ORC MR                                    1527 / 1549         10.3          97.1      17.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single SMALLINT Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       286 /  296         54.9          18.2       1.0X
-ParquetReader Vectorized -> Row                249 /  253         63.1          15.8       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     27701 / 27744          0.6        1761.2       1.0X
-SQL Json                                      9703 / 9733          1.6         616.9       2.9X
-SQL Parquet Vectorized                         176 /  182         89.2          11.2     157.0X
-SQL Parquet MR                                2164 / 2173          7.3         137.6      12.8X
-SQL ORC Vectorized                             307 /  314         51.2          19.5      90.2X
-SQL ORC MR                                    1690 / 1700          9.3         107.4      16.4X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single INT Column Scan:   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       259 /  277         60.7          16.5       1.0X
-ParquetReader Vectorized -> Row                261 /  265         60.3          16.6       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     34813 / 34900          0.5        2213.3       1.0X
-SQL Json                                    12570 / 12617          1.3         799.2       2.8X
-SQL Parquet Vectorized                         270 /  308         58.2          17.2     128.9X
-SQL Parquet MR                                2427 / 2431          6.5         154.3      14.3X
-SQL ORC Vectorized                             388 /  398         40.6          24.6      89.8X
-SQL ORC MR                                    1819 / 1851          8.6         115.7      19.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single BIGINT Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       372 /  379         42.3          23.7       1.0X
-ParquetReader Vectorized -> Row                357 /  368         44.1          22.7       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     28753 / 28781          0.5        1828.0       1.0X
-SQL Json                                    12039 / 12215          1.3         765.4       2.4X
-SQL Parquet Vectorized                         170 /  177         92.4          10.8     169.0X
-SQL Parquet MR                                2184 / 2196          7.2         138.9      13.2X
-SQL ORC Vectorized                             432 /  440         36.4          27.5      66.5X
-SQL ORC MR                                    1812 / 1833          8.7         115.2      15.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single FLOAT Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       253 /  260         62.2          16.1       1.0X
-ParquetReader Vectorized -> Row                256 /  257         61.6          16.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     36177 / 36188          0.4        2300.1       1.0X
-SQL Json                                    18895 / 18898          0.8        1201.3       1.9X
-SQL Parquet Vectorized                         267 /  276         58.9          17.0     135.6X
-SQL Parquet MR                                2355 / 2363          6.7         149.7      15.4X
-SQL ORC Vectorized                             543 /  546         29.0          34.5      66.6X
-SQL ORC MR                                    2246 / 2258          7.0         142.8      16.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single DOUBLE Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       353 /  367         44.6          22.4       1.0X
-ParquetReader Vectorized -> Row                351 /  357         44.7          22.3       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           24716          24743          38          0.6        1571.4       1.0X
+SQL Json                                           9669           9686          25          1.6         614.7       2.6X
+SQL Parquet Vectorized                              172            193          21         91.2          11.0     143.4X
+SQL Parquet MR                                     1929           1942          18          8.2         122.7      12.8X
+SQL ORC Vectorized                                  247            266          19         63.6          15.7      99.9X
+SQL ORC MR                                         1640           1660          29          9.6         104.3      15.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single TINYINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            197            200           4         79.9          12.5       1.0X
+ParquetReader Vectorized -> Row                      96             98           3        164.1           6.1       2.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           25320          25343          32          0.6        1609.8       1.0X
+SQL Json                                          10460          10465           8          1.5         665.0       2.4X
+SQL Parquet Vectorized                              206            218          13         76.5          13.1     123.2X
+SQL Parquet MR                                     2032           2036           6          7.7         129.2      12.5X
+SQL ORC Vectorized                                  295            301           4         53.4          18.7      85.9X
+SQL ORC MR                                         1867           1885          25          8.4         118.7      13.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            288            294           6         54.6          18.3       1.0X
+ParquetReader Vectorized -> Row                     252            254           4         62.3          16.0       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           27385          27423          54          0.6        1741.1       1.0X
+SQL Json                                          10118          10133          20          1.6         643.3       2.7X
+SQL Parquet Vectorized                              180            189          10         87.4          11.4     152.1X
+SQL Parquet MR                                     2548           2552           6          6.2         162.0      10.7X
+SQL ORC Vectorized                                  306            312           8         51.4          19.4      89.5X
+SQL ORC MR                                         1882           1927          64          8.4         119.6      14.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single INT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            255            260           7         61.7          16.2       1.0X
+ParquetReader Vectorized -> Row                     252            257           6         62.4          16.0       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           36971          37037          94          0.4        2350.5       1.0X
+SQL Json                                          13285          13300          22          1.2         844.6       2.8X
+SQL Parquet Vectorized                              275            285           5         57.1          17.5     134.3X
+SQL Parquet MR                                     2599           2603           6          6.1         165.3      14.2X
+SQL ORC Vectorized                                  386            395           5         40.7          24.6      95.7X
+SQL ORC MR                                         2059           2075          22          7.6         130.9      18.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single BIGINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            352            361          14         44.7          22.4       1.0X
+ParquetReader Vectorized -> Row                     386            392           8         40.7          24.6       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           29272          29322          71          0.5        1861.1       1.0X
+SQL Json                                          15022          15099         108          1.0         955.1       1.9X
+SQL Parquet Vectorized                              172            178           6         91.5          10.9     170.2X
+SQL Parquet MR                                     2184           2206          31          7.2         138.9      13.4X
+SQL ORC Vectorized                                  477            485           6         32.9          30.4      61.3X
+SQL ORC MR                                         2036           2054          26          7.7         129.4      14.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single FLOAT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            251            255           5         62.6          16.0       1.0X
+ParquetReader Vectorized -> Row                     248            254           7         63.5          15.7       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           38020          38024           6          0.4        2417.2       1.0X
+SQL Json                                          20449          20463          19          0.8        1300.1       1.9X
+SQL Parquet Vectorized                              268            274           8         58.7          17.0     141.8X
+SQL Parquet MR                                     2484           2493          12          6.3         157.9      15.3X
+SQL ORC Vectorized                                  580            582           2         27.1          36.9      65.6X
+SQL ORC MR                                         2179           2199          29          7.2         138.5      17.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single DOUBLE Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            344            350           7         45.7          21.9       1.0X
+ParquetReader Vectorized -> Row                     346            352          12         45.5          22.0       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     21130 / 21246          0.5        2015.1       1.0X
-SQL Json                                    12145 / 12174          0.9        1158.2       1.7X
-SQL Parquet Vectorized                        2363 / 2377          4.4         225.3       8.9X
-SQL Parquet MR                                4555 / 4557          2.3         434.4       4.6X
-SQL ORC Vectorized                            2361 / 2388          4.4         225.1       9.0X
-SQL ORC MR                                    4186 / 4209          2.5         399.2       5.0X
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           27652          28221         804          0.4        2637.1       1.0X
+SQL Json                                          12827          12842          21          0.8        1223.3       2.2X
+SQL Parquet Vectorized                             2297           2311          19          4.6         219.1      12.0X
+SQL Parquet MR                                     4207           4217          15          2.5         401.2       6.6X
+SQL ORC Vectorized                                 2316           2342          36          4.5         220.9      11.9X
+SQL ORC MR                                         4158           4236         110          2.5         396.5       6.7X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     11693 / 11729          0.9        1115.1       1.0X
-SQL Json                                      7025 / 7025          1.5         669.9       1.7X
-SQL Parquet Vectorized                         803 /  821         13.1          76.6      14.6X
-SQL Parquet MR                                1776 / 1790          5.9         169.4       6.6X
-SQL ORC Vectorized                             491 /  494         21.4          46.8      23.8X
-SQL ORC MR                                    2050 / 2063          5.1         195.5       5.7X
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           19185          19343         224          0.5        1829.6       1.0X
+SQL Json                                           7682           7692          14          1.4         732.6       2.5X
+SQL Parquet Vectorized                              796            805           9         13.2          75.9      24.1X
+SQL Parquet MR                                     1880           1891          17          5.6         179.2      10.2X
+SQL ORC Vectorized                                  553            558           5         19.0          52.7      34.7X
+SQL ORC MR                                         2105           2128          32          5.0         200.8       9.1X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Data column - CSV                           30965 / 31041          0.5        1968.7       1.0X
-Data column - Json                          12876 / 12882          1.2         818.6       2.4X
-Data column - Parquet Vectorized               277 /  282         56.7          17.6     111.6X
-Data column - Parquet MR                      3398 / 3402          4.6         216.0       9.1X
-Data column - ORC Vectorized                   399 /  407         39.4          25.4      77.5X
-Data column - ORC MR                          2583 / 2589          6.1         164.2      12.0X
-Partition column - CSV                        7403 / 7427          2.1         470.7       4.2X
-Partition column - Json                       5587 / 5625          2.8         355.2       5.5X
-Partition column - Parquet Vectorized           71 /   78        222.6           4.5     438.3X
-Partition column - Parquet MR                 1798 / 1808          8.7         114.3      17.2X
-Partition column - ORC Vectorized               72 /   75        219.0           4.6     431.2X
-Partition column - ORC MR                     1772 / 1778          8.9         112.6      17.5X
-Both columns - CSV                          30211 / 30212          0.5        1920.7       1.0X
-Both columns - Json                         13382 / 13391          1.2         850.8       2.3X
-Both columns - Parquet Vectorized              321 /  333         49.0          20.4      96.4X
-Both columns - Parquet MR                     3656 / 3661          4.3         232.4       8.5X
-Both columns - ORC Vectorized                  443 /  448         35.5          28.2      69.9X
-Both columns - ORC MR                         2626 / 2633          6.0         167.0      11.8X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column - CSV                                 43759          43811          73          0.4        2782.1       1.0X
+Data column - Json                                13866          13874          11          1.1         881.6       3.2X
+Data column - Parquet Vectorized                    292            302          10         53.9          18.5     150.1X
+Data column - Parquet MR                           2681           2697          23          5.9         170.5      16.3X
+Data column - ORC Vectorized                        416            422          12         37.8          26.4     105.2X
+Data column - ORC MR                               2256           2275          27          7.0         143.4      19.4X
+Partition column - CSV                            13909          13949          56          1.1         884.3       3.1X
+Partition column - Json                           11248          11252           7          1.4         715.1       3.9X
+Partition column - Parquet Vectorized                83             95          13        189.4           5.3     526.9X
+Partition column - Parquet MR                      1531           1532           2         10.3          97.3      28.6X
+Partition column - ORC Vectorized                    81             97          17        193.1           5.2     537.3X
+Partition column - ORC MR                          1557           1570          19         10.1          99.0      28.1X
+Both columns - CSV                                48341          48524         259          0.3        3073.4       0.9X
+Both columns - Json                               13636          13652          23          1.2         866.9       3.2X
+Both columns - Parquet Vectorized                   341            354          16         46.1          21.7     128.2X
+Both columns - Parquet MR                          2806           2825          26          5.6         178.4      15.6X
+Both columns - ORC Vectorized                       548            554           8         28.7          34.8      79.8X
+Both columns - ORC MR                              2602           2632          43          6.0         165.4      16.8X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     13918 / 13979          0.8        1327.3       1.0X
-SQL Json                                    10068 / 10068          1.0         960.1       1.4X
-SQL Parquet Vectorized                        1563 / 1564          6.7         149.0       8.9X
-SQL Parquet MR                                3835 / 3836          2.7         365.8       3.6X
-ParquetReader Vectorized                      1115 / 1118          9.4         106.4      12.5X
-SQL ORC Vectorized                            1172 / 1208          8.9         111.8      11.9X
-SQL ORC MR                                    3708 / 3711          2.8         353.6       3.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (50.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     13972 / 14043          0.8        1332.5       1.0X
-SQL Json                                      7436 / 7469          1.4         709.1       1.9X
-SQL Parquet Vectorized                        1103 / 1112          9.5         105.2      12.7X
-SQL Parquet MR                                2841 / 2847          3.7         271.0       4.9X
-ParquetReader Vectorized                       992 / 1012         10.6          94.6      14.1X
-SQL ORC Vectorized                            1275 / 1349          8.2         121.6      11.0X
-SQL ORC MR                                    3244 / 3259          3.2         309.3       4.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (95.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     11228 / 11244          0.9        1070.8       1.0X
-SQL Json                                      5200 / 5247          2.0         495.9       2.2X
-SQL Parquet Vectorized                         238 /  242         44.1          22.7      47.2X
-SQL Parquet MR                                1730 / 1734          6.1         165.0       6.5X
-ParquetReader Vectorized                       237 /  238         44.3          22.6      47.4X
-SQL ORC Vectorized                             459 /  462         22.8          43.8      24.4X
-SQL ORC MR                                    1767 / 1783          5.9         168.5       6.4X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           22570          22687         166          0.5        2152.4       1.0X
+SQL Json                                          11103          11129          38          0.9        1058.8       2.0X
+SQL Parquet Vectorized                             1508           1516          12          7.0         143.8      15.0X
+SQL Parquet MR                                     3686           3692           9          2.8         351.5       6.1X
+ParquetReader Vectorized                           1117           1133          22          9.4         106.6      20.2X
+SQL ORC Vectorized                                 1195           1212          24          8.8         114.0      18.9X
+SQL ORC MR                                         3617           3618           3          2.9         344.9       6.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           19569          19819         354          0.5        1866.2       1.0X
+SQL Json                                           8292           8308          22          1.3         790.8       2.4X
+SQL Parquet Vectorized                             1107           1136          41          9.5         105.6      17.7X
+SQL Parquet MR                                     2784           2812          39          3.8         265.5       7.0X
+ParquetReader Vectorized                            990            994           5         10.6          94.4      19.8X
+SQL ORC Vectorized                                 1198           1199           2          8.8         114.2      16.3X
+SQL ORC MR                                         3164           3195          44          3.3         301.7       6.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           15940          15969          41          0.7        1520.1       1.0X
+SQL Json                                           4845           4845           0          2.2         462.0       3.3X
+SQL Parquet Vectorized                              243            249           6         43.1          23.2      65.5X
+SQL Parquet MR                                     1732           1751          26          6.1         165.2       9.2X
+ParquetReader Vectorized                            241            243           3         43.4          23.0      66.0X
+SQL ORC Vectorized                                  425            431           7         24.7          40.5      37.5X
+SQL ORC MR                                         1713           1728          20          6.1         163.4       9.3X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 10 columns:      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                       3322 / 3356          0.3        3167.9       1.0X
-SQL Json                                      2808 / 2843          0.4        2678.2       1.2X
-SQL Parquet Vectorized                          56 /   63         18.9          52.9      59.8X
-SQL Parquet MR                                 215 /  219          4.9         205.4      15.4X
-SQL ORC Vectorized                              64 /   76         16.4          60.9      52.0X
-SQL ORC MR                                     314 /  316          3.3         299.6      10.6X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 50 columns:      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                       7978 / 7989          0.1        7608.5       1.0X
-SQL Json                                    10294 / 10325          0.1        9816.9       0.8X
-SQL Parquet Vectorized                          72 /   85         14.5          69.0     110.3X
-SQL Parquet MR                                 237 /  241          4.4         226.4      33.6X
-SQL ORC Vectorized                              82 /   92         12.7          78.5      97.0X
-SQL ORC MR                                     900 /  909          1.2         858.5       8.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     13489 / 13508          0.1       12864.3       1.0X
-SQL Json                                    18813 / 18827          0.1       17941.4       0.7X
-SQL Parquet Vectorized                         107 /  111          9.8         101.8     126.3X
-SQL Parquet MR                                 275 /  286          3.8         262.3      49.0X
-SQL ORC Vectorized                             107 /  115          9.8         101.7     126.4X
-SQL ORC MR                                    1659 / 1664          0.6        1582.3       8.1X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                            3838           3885          66          0.3        3660.4       1.0X
+SQL Json                                           3615           3615           0          0.3        3447.8       1.1X
+SQL Parquet Vectorized                               66             74           8         15.8          63.2      57.9X
+SQL Parquet MR                                      230            237           6          4.6         219.3      16.7X
+SQL ORC Vectorized                                   72             77           9         14.5          68.9      53.1X
+SQL ORC MR                                          194            201           5          5.4         185.3      19.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                            8711           8754          60          0.1        8307.9       1.0X
+SQL Json                                          14414          14423          12          0.1       13746.5       0.6X
+SQL Parquet Vectorized                               97            106          12         10.8          92.7      89.6X
+SQL Parquet MR                                      267            274           7          3.9         254.2      32.7X
+SQL ORC Vectorized                                  100            104           7         10.5          95.1      87.4X
+SQL ORC MR                                          226            230           6          4.6         215.2      38.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           14509          14596         123          0.1       13836.8       1.0X
+SQL Json                                          27545          27909         515          0.0       26269.1       0.5X
+SQL Parquet Vectorized                              141            151          13          7.4         134.8     102.7X
+SQL Parquet MR                                      313            341          23          3.4         298.4      46.4X
+SQL ORC Vectorized                                  121            129          15          8.7         115.4     119.9X
+SQL ORC MR                                          252            269          33          4.2         240.3      57.6X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt b/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..f07fae9121df1
--- /dev/null
+++ b/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt
@@ -0,0 +1,46 @@
+================================================================================================
+Dataset Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                               14574          14759         261          6.9         145.7       1.0X
+DataFrame                                          2468           2655         264         40.5          24.7       5.9X
+Dataset                                            3498           3533          50         28.6          35.0       4.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                               17877          18133         361          5.6         178.8       1.0X
+DataFrame                                          5968           5991          33         16.8          59.7       3.0X
+Dataset                                           12638          12859         313          7.9         126.4       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                                3399           3464          92         29.4          34.0       1.0X
+DataFrame                                          1609           1628          28         62.2          16.1       2.1X
+Dataset                                            3637           3648          16         27.5          36.4       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                                4850           4859          13         20.6          48.5       1.0X
+DataFrame                                           211            244          21        472.9           2.1      22.9X
+Dataset                                            5864           6126         372         17.1          58.6       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD sum                                            4821           4914         131         20.7          48.2       1.0X
+DataFrame sum                                        71             83           8       1412.4           0.7      68.1X
+Dataset sum using Aggregator                       6001           6012          16         16.7          60.0       0.8X
+Dataset complex Aggregator                        10247          10455         294          9.8         102.5       0.5X
+
+
diff --git a/sql/core/benchmarks/DatasetBenchmark-results.txt b/sql/core/benchmarks/DatasetBenchmark-results.txt
index dcc190eb45c03..2774f14664230 100644
--- a/sql/core/benchmarks/DatasetBenchmark-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-back-to-back map long:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD                                         11800 / 12042          8.5         118.0       1.0X
-DataFrame                                     1927 / 2189         51.9          19.3       6.1X
-Dataset                                       2483 / 2605         40.3          24.8       4.8X
+back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                               12720          12777          80          7.9         127.2       1.0X
+DataFrame                                          2242           2501         366         44.6          22.4       5.7X
+Dataset                                            3040           3174         189         32.9          30.4       4.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-back-to-back map:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD                                         16286 / 16301          6.1         162.9       1.0X
-DataFrame                                     8101 / 8104         12.3          81.0       2.0X
-Dataset                                     17445 / 17811          5.7         174.4       0.9X
+back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                               15865          15922          82          6.3         158.6       1.0X
+DataFrame                                          8423           8476          75         11.9          84.2       1.9X
+Dataset                                           17180          18142        1361          5.8         171.8       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-back-to-back filter Long:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD                                           2971 / 3184         33.7          29.7       1.0X
-DataFrame                                     1243 / 1296         80.5          12.4       2.4X
-Dataset                                       3062 / 3091         32.7          30.6       1.0X
+back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                                2928           3009         114         34.1          29.3       1.0X
+DataFrame                                          1386           1427          59         72.2          13.9       2.1X
+Dataset                                            3448           3451           5         29.0          34.5       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-back-to-back filter:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD                                           5253 / 5269         19.0          52.5       1.0X
-DataFrame                                      211 /  234        473.4           2.1      24.9X
-Dataset                                       9550 / 9552         10.5          95.5       0.6X
+back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                                5476           5483          10         18.3          54.8       1.0X
+DataFrame                                           209            235          23        479.1           2.1      26.2X
+Dataset                                            9433           9549         163         10.6          94.3       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-aggregate:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD sum                                       5086 / 5108         19.7          50.9       1.0X
-DataFrame sum                                   65 /   73       1548.9           0.6      78.8X
-Dataset sum using Aggregator                  9024 / 9320         11.1          90.2       0.6X
-Dataset complex Aggregator                  15079 / 15171          6.6         150.8       0.3X
+aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD sum                                            5146           5239         132         19.4          51.5       1.0X
+DataFrame sum                                        84             99          15       1196.9           0.8      61.6X
+Dataset sum using Aggregator                       8944           9021         109         11.2          89.4       0.6X
+Dataset complex Aggregator                        12832          13141         436          7.8         128.3       0.4X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..729b112de5e75
--- /dev/null
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
@@ -0,0 +1,429 @@
+================================================================================================
+Extract components
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp wholestage off                    546            572          36         18.3          54.6       1.0X
+cast to timestamp wholestage on                     412            438          16         24.3          41.2       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+year of timestamp wholestage off                   1240           1295          77          8.1         124.0       1.0X
+year of timestamp wholestage on                    1109           1130          24          9.0         110.9       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+quarter of timestamp wholestage off                1572           1574           3          6.4         157.2       1.0X
+quarter of timestamp wholestage on                 1386           1405          18          7.2         138.6       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+month of timestamp wholestage off                  1194           1196           2          8.4         119.4       1.0X
+month of timestamp wholestage on                   1057           1069          12          9.5         105.7       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+weekofyear of timestamp wholestage off             2070           2071           2          4.8         207.0       1.0X
+weekofyear of timestamp wholestage on              1549           1555           6          6.5         154.9       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+day of timestamp wholestage off                    1173           1186          18          8.5         117.3       1.0X
+day of timestamp wholestage on                     1056           1076          26          9.5         105.6       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofyear of timestamp wholestage off              1207           1211           6          8.3         120.7       1.0X
+dayofyear of timestamp wholestage on               1097           1108           9          9.1         109.7       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofmonth of timestamp wholestage off             1184           1190           8          8.4         118.4       1.0X
+dayofmonth of timestamp wholestage on              1053           1060           9          9.5         105.3       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofweek of timestamp wholestage off              1343           1362          27          7.4         134.3       1.0X
+dayofweek of timestamp wholestage on               1228           1239           7          8.1         122.8       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+weekday of timestamp wholestage off                1276           1278           3          7.8         127.6       1.0X
+weekday of timestamp wholestage on                 1160           1181          22          8.6         116.0       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+hour of timestamp wholestage off                    854            862          12         11.7          85.4       1.0X
+hour of timestamp wholestage on                     741            748           6         13.5          74.1       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+minute of timestamp wholestage off                  853            854           1         11.7          85.3       1.0X
+minute of timestamp wholestage on                   730            737          11         13.7          73.0       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+second of timestamp wholestage off                  726            728           2         13.8          72.6       1.0X
+second of timestamp wholestage on                   614            623           9         16.3          61.4       1.2X
+
+
+================================================================================================
+Current date and time
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+current_date wholestage off                         369            370           2         27.1          36.9       1.0X
+current_date wholestage on                          277            284           8         36.2          27.7       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+current_timestamp wholestage off                    410            411           1         24.4          41.0       1.0X
+current_timestamp wholestage on                     283            418         259         35.4          28.3       1.5X
+
+
+================================================================================================
+Date arithmetic
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date wholestage off                         987            992           7         10.1          98.7       1.0X
+cast to date wholestage on                          891            896           4         11.2          89.1       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+last_day wholestage off                            1179           1179           1          8.5         117.9       1.0X
+last_day wholestage on                             1052           1080          44          9.5         105.2       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+next_day wholestage off                            1073           1081          11          9.3         107.3       1.0X
+next_day wholestage on                              948            955          10         10.5          94.8       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_add wholestage off                            1006           1009           5          9.9         100.6       1.0X
+date_add wholestage on                              867            870           4         11.5          86.7       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_sub wholestage off                             980            988          11         10.2          98.0       1.0X
+date_sub wholestage on                              866            873           9         11.6          86.6       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+add_months wholestage off                          1329           1332           4          7.5         132.9       1.0X
+add_months wholestage on                           1199           1206           8          8.3         119.9       1.1X
+
+
+================================================================================================
+Formatting dates
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+format date wholestage off                         4724           4736          18          2.1         472.4       1.0X
+format date wholestage on                          4550           4574          26          2.2         455.0       1.0X
+
+
+================================================================================================
+Formatting timestamps
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+from_unixtime wholestage off                       7171           7183          17          1.4         717.1       1.0X
+from_unixtime wholestage on                        7114           7141          20          1.4         711.4       1.0X
+
+
+================================================================================================
+Convert timestamps
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+from_utc_timestamp wholestage off                  1498           1504           8          6.7         149.8       1.0X
+from_utc_timestamp wholestage on                   1399           1405           5          7.1         139.9       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_utc_timestamp wholestage off                    1505           1507           2          6.6         150.5       1.0X
+to_utc_timestamp wholestage on                     1396           1401           5          7.2         139.6       1.1X
+
+
+================================================================================================
+Intervals
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast interval wholestage off                        423            428           7         23.6          42.3       1.0X
+cast interval wholestage on                         300            302           2         33.3          30.0       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+datediff wholestage off                            1626           1630           6          6.1         162.6       1.0X
+datediff wholestage on                             1467           1471           3          6.8         146.7       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+months_between wholestage off                      1988           1992           5          5.0         198.8       1.0X
+months_between wholestage on                       1812           1834          24          5.5         181.2       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+window wholestage off                              2277           2334          80          0.4        2277.1       1.0X
+window wholestage on                              48996          49048          67          0.0       48996.0       0.0X
+
+
+================================================================================================
+Truncation
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YEAR wholestage off                      867            870           6         11.5          86.7       1.0X
+date_trunc YEAR wholestage on                       815            819           6         12.3          81.5       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YYYY wholestage off                      866            875          13         11.5          86.6       1.0X
+date_trunc YYYY wholestage on                       811            813           2         12.3          81.1       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YY wholestage off                        864            867           4         11.6          86.4       1.0X
+date_trunc YY wholestage on                         812            824          10         12.3          81.2       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MON wholestage off                       881            884           4         11.3          88.1       1.0X
+date_trunc MON wholestage on                        820            826           7         12.2          82.0       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MONTH wholestage off                     880            881           2         11.4          88.0       1.0X
+date_trunc MONTH wholestage on                      819            822           4         12.2          81.9       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MM wholestage off                        889            904          21         11.2          88.9       1.0X
+date_trunc MM wholestage on                         818            828           8         12.2          81.8       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc DAY wholestage off                       590            593           4         16.9          59.0       1.0X
+date_trunc DAY wholestage on                        510            514           4         19.6          51.0       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc DD wholestage off                        596            604          11         16.8          59.6       1.0X
+date_trunc DD wholestage on                         511            519           9         19.6          51.1       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc HOUR wholestage off                      586            592           9         17.1          58.6       1.0X
+date_trunc HOUR wholestage on                       507            513           5         19.7          50.7       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MINUTE wholestage off                    561            562           1         17.8          56.1       1.0X
+date_trunc MINUTE wholestage on                     480            485           4         20.8          48.0       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc SECOND wholestage off                    561            561           1         17.8          56.1       1.0X
+date_trunc SECOND wholestage on                     479            480           2         20.9          47.9       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc WEEK wholestage off                      725            727           2         13.8          72.5       1.0X
+date_trunc WEEK wholestage on                       674            684          11         14.8          67.4       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc QUARTER wholestage off                  1653           1659          10          6.1         165.3       1.0X
+date_trunc QUARTER wholestage on                   1588           1601          12          6.3         158.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc year wholestage off                           391            393           2         25.6          39.1       1.0X
+trunc year wholestage on                            312            316           4         32.1          31.2       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc yyyy wholestage off                           390            394           5         25.6          39.0       1.0X
+trunc yyyy wholestage on                            316            319           4         31.6          31.6       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc yy wholestage off                             387            390           5         25.8          38.7       1.0X
+trunc yy wholestage on                              313            316           3         31.9          31.3       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc mon wholestage off                            388            395          11         25.8          38.8       1.0X
+trunc mon wholestage on                             314            316           2         31.8          31.4       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc month wholestage off                          388            390           4         25.8          38.8       1.0X
+trunc month wholestage on                           315            318           2         31.7          31.5       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc mm wholestage off                             384            388           4         26.0          38.4       1.0X
+trunc mm wholestage on                              314            321           9         31.9          31.4       1.2X
+
+
+================================================================================================
+Parsing
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to timestamp str wholestage off                     178            191          18          5.6         178.5       1.0X
+to timestamp str wholestage on                      157            160           2          6.4         156.6       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_timestamp wholestage off                        1790           1795           7          0.6        1790.0       1.0X
+to_timestamp wholestage on                         1813           1820          10          0.6        1813.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_unix_timestamp wholestage off                   1836           1837           1          0.5        1835.8       1.0X
+to_unix_timestamp wholestage on                    1786           1791           3          0.6        1785.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to date str wholestage off                          169            169           1          5.9         168.8       1.0X
+to date str wholestage on                           151            153           2          6.6         151.2       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_date wholestage off                             2504           2512          10          0.4        2504.4       1.0X
+to_date wholestage on                              2522           2536          19          0.4        2522.3       1.0X
+
+
+================================================================================================
+Conversion from/to external types
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+From java.sql.Timestamp                             346            367          35         14.5          69.2       1.0X
+Collect longs                                      2139           2329         289          2.3         427.7       0.2X
+Collect timestamps                                 1883           2086         303          2.7         376.5       0.2X
+
+
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index 1a58b05a2abba..0c30534dd71ca 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -2,424 +2,428 @@
 Extract components
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-cast to timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off               275 /  287         36.4          27.5       1.0X
-cast to timestamp wholestage on                243 /  253         41.2          24.3       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-year of timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-year of timestamp wholestage off               661 /  667         15.1          66.1       1.0X
-year of timestamp wholestage on                659 /  669         15.2          65.9       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-quarter of timestamp:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off            820 /  822         12.2          82.0       1.0X
-quarter of timestamp wholestage on             768 /  776         13.0          76.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-month of timestamp:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-month of timestamp wholestage off              636 /  638         15.7          63.6       1.0X
-month of timestamp wholestage on               648 /  654         15.4          64.8       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-weekofyear of timestamp:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off        1093 / 1097          9.2         109.3       1.0X
-weekofyear of timestamp wholestage on         1101 / 1107          9.1         110.1       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-day of timestamp:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                643 /  644         15.6          64.3       1.0X
-day of timestamp wholestage on                 655 /  657         15.3          65.5       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-dayofyear of timestamp:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off          681 /  692         14.7          68.1       1.0X
-dayofyear of timestamp wholestage on           675 /  680         14.8          67.5       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-dayofmonth of timestamp:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off         656 /  657         15.2          65.6       1.0X
-dayofmonth of timestamp wholestage on          651 /  658         15.4          65.1       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-dayofweek of timestamp:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off          775 /  776         12.9          77.5       1.0X
-dayofweek of timestamp wholestage on           777 /  781         12.9          77.7       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-weekday of timestamp:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off            737 /  737         13.6          73.7       1.0X
-weekday of timestamp wholestage on             737 /  739         13.6          73.7       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-hour of timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off               425 /  426         23.5          42.5       1.0X
-hour of timestamp wholestage on                430 /  434         23.2          43.0       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-minute of timestamp:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off             430 /  439         23.3          43.0       1.0X
-minute of timestamp wholestage on              436 /  438         23.0          43.6       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-second of timestamp:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-second of timestamp wholestage off             413 /  413         24.2          41.3       1.0X
-second of timestamp wholestage on              413 /  425         24.2          41.3       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp wholestage off                    425            447          30         23.5          42.5       1.0X
+cast to timestamp wholestage on                     368            401          29         27.2          36.8       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+year of timestamp wholestage off                   1158           1215          80          8.6         115.8       1.0X
+year of timestamp wholestage on                    1158           1179          31          8.6         115.8       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+quarter of timestamp wholestage off                1285           1295          15          7.8         128.5       1.0X
+quarter of timestamp wholestage on                 1243           1257          11          8.0         124.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+month of timestamp wholestage off                  1076           1082           8          9.3         107.6       1.0X
+month of timestamp wholestage on                   1088           1098           9          9.2         108.8       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+weekofyear of timestamp wholestage off             1649           1659          14          6.1         164.9       1.0X
+weekofyear of timestamp wholestage on              1648           1656           8          6.1         164.8       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+day of timestamp wholestage off                    1083           1084           3          9.2         108.3       1.0X
+day of timestamp wholestage on                     1082           1089          13          9.2         108.2       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofyear of timestamp wholestage off              1102           1103           1          9.1         110.2       1.0X
+dayofyear of timestamp wholestage on               1123           1138          14          8.9         112.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofmonth of timestamp wholestage off             1068           1073           7          9.4         106.8       1.0X
+dayofmonth of timestamp wholestage on              1082           1095          13          9.2         108.2       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofweek of timestamp wholestage off              1265           1294          41          7.9         126.5       1.0X
+dayofweek of timestamp wholestage on               1253           1262          11          8.0         125.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+weekday of timestamp wholestage off                1189           1191           3          8.4         118.9       1.0X
+weekday of timestamp wholestage on                 1193           1199           6          8.4         119.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+hour of timestamp wholestage off                    366            368           3         27.3          36.6       1.0X
+hour of timestamp wholestage on                     360            364           6         27.8          36.0       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+minute of timestamp wholestage off                  348            350           2         28.7          34.8       1.0X
+minute of timestamp wholestage on                   355            361           9         28.1          35.5       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+second of timestamp wholestage off                  347            352           7         28.8          34.7       1.0X
+second of timestamp wholestage on                   351            359          10         28.5          35.1       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-current_date:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-current_date wholestage off                    205 /  206         48.7          20.5       1.0X
-current_date wholestage on                     219 /  224         45.8          21.9       0.9X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+current_date wholestage off                         284            287           4         35.2          28.4       1.0X
+current_date wholestage on                          312            318           6         32.1          31.2       0.9X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-current_timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-current_timestamp wholestage off               212 /  213         47.3          21.2       1.0X
-current_timestamp wholestage on                202 /  205         49.6          20.2       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+current_timestamp wholestage off                    291            292           2         34.4          29.1       1.0X
+current_timestamp wholestage on                     297            333          40         33.6          29.7       1.0X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-cast to date:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-cast to date wholestage off                    459 /  462         21.8          45.9       1.0X
-cast to date wholestage on                     493 /  500         20.3          49.3       0.9X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-last_day:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-last_day wholestage off                        680 /  686         14.7          68.0       1.0X
-last_day wholestage on                         671 /  681         14.9          67.1       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-next_day:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-next_day wholestage off                        532 /  533         18.8          53.2       1.0X
-next_day wholestage on                         576 /  580         17.4          57.6       0.9X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_add:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_add wholestage off                        445 /  446         22.5          44.5       1.0X
-date_add wholestage on                         455 /  457         22.0          45.5       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_sub:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_sub wholestage off                        454 /  457         22.0          45.4       1.0X
-date_sub wholestage on                         455 /  458         22.0          45.5       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-add_months:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-add_months wholestage off                      898 /  900         11.1          89.8       1.0X
-add_months wholestage on                       894 /  909         11.2          89.4       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date wholestage off                         903            903           1         11.1          90.3       1.0X
+cast to date wholestage on                          897            900           7         11.2          89.7       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+last_day wholestage off                            1082           1082           1          9.2         108.2       1.0X
+last_day wholestage on                             1107           1118          16          9.0         110.7       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+next_day wholestage off                             968            974           8         10.3          96.8       1.0X
+next_day wholestage on                              958            959           1         10.4          95.8       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_add wholestage off                             894            895           1         11.2          89.4       1.0X
+date_add wholestage on                              882            890           9         11.3          88.2       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_sub wholestage off                             892            896           6         11.2          89.2       1.0X
+date_sub wholestage on                              881            888           7         11.3          88.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+add_months wholestage off                          1221           1223           3          8.2         122.1       1.0X
+add_months wholestage on                           1212           1217           5          8.2         121.2       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-format date:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-format date wholestage off                    7180 / 7181          1.4         718.0       1.0X
-format date wholestage on                     7051 / 7194          1.4         705.1       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+format date wholestage off                         4989           5009          29          2.0         498.9       1.0X
+format date wholestage on                          5037           5055          26          2.0         503.7       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-from_unixtime:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                  7136 / 7163          1.4         713.6       1.0X
-from_unixtime wholestage on                   7144 / 7174          1.4         714.4       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+from_unixtime wholestage off                       9157           9164          10          1.1         915.7       1.0X
+from_unixtime wholestage on                        9101           9120          16          1.1         910.1       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-from_utc_timestamp:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off              880 /  888         11.4          88.0       1.0X
-from_utc_timestamp wholestage on               841 /  854         11.9          84.1       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+from_utc_timestamp wholestage off                   732            739          10         13.7          73.2       1.0X
+from_utc_timestamp wholestage on                    767            776           8         13.0          76.7       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to_utc_timestamp:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                879 /  884         11.4          87.9       1.0X
-to_utc_timestamp wholestage on                 862 /  876         11.6          86.2       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_utc_timestamp wholestage off                     802            805           3         12.5          80.2       1.0X
+to_utc_timestamp wholestage on                      776            781           5         12.9          77.6       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-cast interval:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-cast interval wholestage off                   242 /  250         41.3          24.2       1.0X
-cast interval wholestage on                    221 /  223         45.3          22.1       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-datediff:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-datediff wholestage off                        697 /  698         14.3          69.7       1.0X
-datediff wholestage on                         680 /  683         14.7          68.0       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-months_between:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-months_between wholestage off                 1675 / 1677          6.0         167.5       1.0X
-months_between wholestage on                  1636 / 1649          6.1         163.6       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-window:                                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-window wholestage off                         1600 / 1627          0.6        1599.7       1.0X
-window wholestage on                        19480 / 19530          0.1       19479.6       0.1X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast interval wholestage off                        328            330           3         30.5          32.8       1.0X
+cast interval wholestage on                         319            326           7         31.3          31.9       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+datediff wholestage off                            1762           1764           3          5.7         176.2       1.0X
+datediff wholestage on                             1495           1502           7          6.7         149.5       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+months_between wholestage off                      1338           1339           1          7.5         133.8       1.0X
+months_between wholestage on                       1334           1339           5          7.5         133.4       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+window wholestage off                              2023           2094         100          0.5        2023.2       1.0X
+window wholestage on                              43505          43551          33          0.0       43504.8       0.0X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc YEAR:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                 863 /  864         11.6          86.3       1.0X
-date_trunc YEAR wholestage on                  812 /  814         12.3          81.2       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc YYYY:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                 865 /  926         11.6          86.5       1.0X
-date_trunc YYYY wholestage on                  811 /  820         12.3          81.1       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc YY:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                   863 /  867         11.6          86.3       1.0X
-date_trunc YY wholestage on                    810 /  822         12.3          81.0       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc MON:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                  917 /  921         10.9          91.7       1.0X
-date_trunc MON wholestage on                   857 /  860         11.7          85.7       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc MONTH:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                919 /  919         10.9          91.9       1.0X
-date_trunc MONTH wholestage on                 862 /  863         11.6          86.2       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc MM:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                   923 /  924         10.8          92.3       1.0X
-date_trunc MM wholestage on                    855 /  859         11.7          85.5       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc DAY:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                  444 /  444         22.5          44.4       1.0X
-date_trunc DAY wholestage on                   404 /  406         24.7          40.4       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc DD:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                   445 /  446         22.5          44.5       1.0X
-date_trunc DD wholestage on                    404 /  406         24.7          40.4       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc HOUR:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                 462 /  464         21.6          46.2       1.0X
-date_trunc HOUR wholestage on                  416 /  422         24.1          41.6       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc MINUTE:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off               294 /  294         34.0          29.4       1.0X
-date_trunc MINUTE wholestage on                258 /  266         38.8          25.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc SECOND:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off               292 /  295         34.2          29.2       1.0X
-date_trunc SECOND wholestage on                271 /  276         36.9          27.1       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc WEEK:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                 739 /  740         13.5          73.9       1.0X
-date_trunc WEEK wholestage on                  712 /  715         14.0          71.2       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc QUARTER:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off             1465 / 1467          6.8         146.5       1.0X
-date_trunc QUARTER wholestage on              1419 / 1423          7.0         141.9       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc year:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc year wholestage off                      222 /  222         45.0          22.2       1.0X
-trunc year wholestage on                       207 /  214         48.3          20.7       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc yyyy:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                      221 /  225         45.2          22.1       1.0X
-trunc yyyy wholestage on                       208 /  212         48.0          20.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc yy:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc yy wholestage off                        221 /  222         45.3          22.1       1.0X
-trunc yy wholestage on                         208 /  210         48.0          20.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc mon:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc mon wholestage off                       231 /  239         43.3          23.1       1.0X
-trunc mon wholestage on                        208 /  214         48.0          20.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc month:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc month wholestage off                     222 /  222         45.1          22.2       1.0X
-trunc month wholestage on                      208 /  224         48.1          20.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc mm:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc mm wholestage off                        222 /  226         45.1          22.2       1.0X
-trunc mm wholestage on                         208 /  216         48.0          20.8       1.1X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YEAR wholestage off                      660            661           1         15.1          66.0       1.0X
+date_trunc YEAR wholestage on                       589            599           7         17.0          58.9       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YYYY wholestage off                      656            657           1         15.2          65.6       1.0X
+date_trunc YYYY wholestage on                       593            604          16         16.9          59.3       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YY wholestage off                        666            669           4         15.0          66.6       1.0X
+date_trunc YY wholestage on                         591            603          19         16.9          59.1       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MON wholestage off                       592            592           1         16.9          59.2       1.0X
+date_trunc MON wholestage on                        569            580           8         17.6          56.9       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MONTH wholestage off                     593            594           2         16.9          59.3       1.0X
+date_trunc MONTH wholestage on                      575            579           4         17.4          57.5       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MM wholestage off                        589            590           2         17.0          58.9       1.0X
+date_trunc MM wholestage on                         569            575           4         17.6          56.9       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc DAY wholestage off                       438            442           5         22.8          43.8       1.0X
+date_trunc DAY wholestage on                        346            350           4         28.9          34.6       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc DD wholestage off                        438            439           2         22.8          43.8       1.0X
+date_trunc DD wholestage on                         347            354           7         28.8          34.7       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc HOUR wholestage off                      384            386           2         26.0          38.4       1.0X
+date_trunc HOUR wholestage on                       357            365           6         28.0          35.7       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MINUTE wholestage off                    373            375           3         26.8          37.3       1.0X
+date_trunc MINUTE wholestage on                     327            331           5         30.6          32.7       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc SECOND wholestage off                    361            363           3         27.7          36.1       1.0X
+date_trunc SECOND wholestage on                     335            341           8         29.9          33.5       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc WEEK wholestage off                      515            516           2         19.4          51.5       1.0X
+date_trunc WEEK wholestage on                       455            459           4         22.0          45.5       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc QUARTER wholestage off                  1337           1341           6          7.5         133.7       1.0X
+date_trunc QUARTER wholestage on                   1328           1334          10          7.5         132.8       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc year wholestage off                           318            328          14         31.4          31.8       1.0X
+trunc year wholestage on                            297            308          17         33.6          29.7       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc yyyy wholestage off                           318            319           2         31.5          31.8       1.0X
+trunc yyyy wholestage on                            296            302          10         33.8          29.6       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc yy wholestage off                             321            345          35         31.2          32.1       1.0X
+trunc yy wholestage on                              297            319          45         33.6          29.7       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc mon wholestage off                            318            318           0         31.5          31.8       1.0X
+trunc mon wholestage on                             299            306           6         33.4          29.9       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc month wholestage off                          316            318           3         31.6          31.6       1.0X
+trunc month wholestage on                           296            301           7         33.8          29.6       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc mm wholestage off                             316            321           8         31.7          31.6       1.0X
+trunc mm wholestage on                              295            302           8         33.9          29.5       1.1X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to timestamp str:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                165 /  166          6.1         164.7       1.0X
-to timestamp str wholestage on                 160 /  163          6.2         160.5       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to timestamp str wholestage off                     219            220           1          4.6         219.3       1.0X
+to timestamp str wholestage on                      212            214           2          4.7         212.3       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                        1308           1353          64          0.8        1307.9       1.0X
-to_timestamp wholestage on                         1197           1230          21          0.8        1197.0       1.1X
+to_timestamp wholestage off                        1852           1852           1          0.5        1851.9       1.0X
+to_timestamp wholestage on                         1862           1869           9          0.5        1861.6       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                   1221           1224           4          0.8        1221.0       1.0X
-to_unix_timestamp wholestage on                    1224           1228           4          0.8        1223.8       1.0X
+to_unix_timestamp wholestage off                   1839           1842           4          0.5        1839.1       1.0X
+to_unix_timestamp wholestage on                    1861           1866           4          0.5        1861.2       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to date str:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to date str wholestage off                     155 /  157          6.4         155.4       1.0X
-to date str wholestage on                      154 /  156          6.5         154.3       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to date str wholestage off                          222            228           9          4.5         221.6       1.0X
+to date str wholestage on                           210            211           2          4.8         209.5       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_date wholestage off                             2386           2392           8          0.4        2386.3       1.0X
+to_date wholestage on                              2438           2457          18          0.4        2437.7       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to_date:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to_date wholestage off                        1477 / 1479          0.7        1477.3       1.0X
-to_date wholestage on                         1468 / 1473          0.7        1468.2       1.0X
 
+================================================================================================
+Conversion from/to external types
+================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Timestamp                             283            301          19         17.7          56.6       1.0X
-Collect longs                                      1048           1087          36          4.8         209.6       0.3X
-Collect timestamps                                 1425           1479          56          3.5         285.1       0.2X
+From java.sql.Timestamp                             287            291           7         17.4          57.3       1.0X
+Collect longs                                      1903           2672         694          2.6         380.6       0.2X
+Collect timestamps                                 1544           1644          89          3.2         308.8       0.2X
 
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..4f50a894e5c07
--- /dev/null
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt
@@ -0,0 +1,45 @@
+================================================================================================
+WITHOUT SPILL
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                        5371           5392          30         19.1          52.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                   6724           6778          77         15.2          65.7       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                        9120           9121           1         28.7          34.8       1.0X
+ExternalAppendOnlyUnsafeRowArray                  37713          37739          38          7.0         143.9       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                       18037          18052          21         27.3          36.7       1.0X
+ExternalAppendOnlyUnsafeRowArray                  34726          34771          63         14.2          70.7       0.5X
+
+
+================================================================================================
+WITH SPILL
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                              29668          29676          11          8.8         113.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                  12658          12663           6         20.7          48.3       2.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                                 12             12           0         13.8          72.7       1.0X
+ExternalAppendOnlyUnsafeRowArray                      8              8           0         19.8          50.6       1.4X
+
+
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
index 02c6b72f32216..c4be80af1334b 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Array with 100000 rows:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ArrayBuffer                                   6378 / 6550         16.1          62.3       1.0X
-ExternalAppendOnlyUnsafeRowArray              6196 / 6242         16.5          60.5       1.0X
+Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                        7626           7776         212         13.4          74.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                   8432           8498          93         12.1          82.3       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Array with 1000 rows:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ArrayBuffer                                 11988 / 12027         21.9          45.7       1.0X
-ExternalAppendOnlyUnsafeRowArray            37480 / 37574          7.0         143.0       0.3X
+Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                       11753          11763          15         22.3          44.8       1.0X
+ExternalAppendOnlyUnsafeRowArray                  38921          39236         446          6.7         148.5       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Array with 30000 rows:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ArrayBuffer                                 23536 / 23538         20.9          47.9       1.0X
-ExternalAppendOnlyUnsafeRowArray            31275 / 31277         15.7          63.6       0.8X
+Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                       23820          23829          12         20.6          48.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                  33449          33466          25         14.7          68.1       0.7X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Spilling with 1000 rows:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                        29241 / 29279          9.0         111.5       1.0X
-ExternalAppendOnlyUnsafeRowArray            14309 / 14313         18.3          54.6       2.0X
+Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                              31384          31479         135          8.4         119.7       1.0X
+ExternalAppendOnlyUnsafeRowArray                  15278          15303          35         17.2          58.3       2.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Spilling with 10000 rows:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                            11 /   11         14.8          67.4       1.0X
-ExternalAppendOnlyUnsafeRowArray                 9 /    9         17.6          56.8       1.2X
+Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                                 11             11           1         14.7          68.0       1.0X
+ExternalAppendOnlyUnsafeRowArray                      9             10           1         17.1          58.5       1.2X
 
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt b/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..0572cc25bf6c2
--- /dev/null
+++ b/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt
@@ -0,0 +1,119 @@
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   380            456          67         26.3          38.0       1.0X
+MILLENNIUM of timestamp                            1274           1361         138          7.9         127.4       0.3X
+CENTURY of timestamp                               1119           1132          19          8.9         111.9       0.3X
+DECADE of timestamp                                1076           1083           6          9.3         107.6       0.4X
+YEAR of timestamp                                  1066           1098          43          9.4         106.6       0.4X
+ISOYEAR of timestamp                               1190           1194           4          8.4         119.0       0.3X
+QUARTER of timestamp                               1269           1273           4          7.9         126.9       0.3X
+MONTH of timestamp                                 1060           1075          22          9.4         106.0       0.4X
+WEEK of timestamp                                  1560           1565           8          6.4         156.0       0.2X
+DAY of timestamp                                   1039           1046           8          9.6         103.9       0.4X
+DAYOFWEEK of timestamp                             1248           1274          24          8.0         124.8       0.3X
+DOW of timestamp                                   1252           1273          25          8.0         125.2       0.3X
+ISODOW of timestamp                                1195           1204           9          8.4         119.5       0.3X
+DOY of timestamp                                   1081           1086           6          9.3         108.1       0.4X
+HOUR of timestamp                                   778            781           5         12.9          77.8       0.5X
+MINUTE of timestamp                                 779            780           1         12.8          77.9       0.5X
+SECOND of timestamp                                 597            611          20         16.7          59.7       0.6X
+MILLISECONDS of timestamp                           636            642           6         15.7          63.6       0.6X
+MICROSECONDS of timestamp                           498            504           5         20.1          49.8       0.8X
+EPOCH of timestamp                                  946            956           9         10.6          94.6       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   356            362           9         28.1          35.6       1.0X
+MILLENNIUM of timestamp                            1260           1297          41          7.9         126.0       0.3X
+CENTURY of timestamp                               1082           1085           3          9.2         108.2       0.3X
+DECADE of timestamp                                1056           1068          19          9.5         105.6       0.3X
+YEAR of timestamp                                  1045           1053          13          9.6         104.5       0.3X
+ISOYEAR of timestamp                               1300           1316          25          7.7         130.0       0.3X
+QUARTER of timestamp                               1279           1280           2          7.8         127.9       0.3X
+MONTH of timestamp                                 1037           1046          11          9.6         103.7       0.3X
+WEEK of timestamp                                  1539           1557          28          6.5         153.9       0.2X
+DAY of timestamp                                   1032           1038           6          9.7         103.2       0.3X
+DAYOFWEEK of timestamp                             1241           1244           4          8.1         124.1       0.3X
+DOW of timestamp                                   1237           1241           7          8.1         123.7       0.3X
+ISODOW of timestamp                                1155           1158           3          8.7         115.5       0.3X
+DOY of timestamp                                   1075           1080           4          9.3         107.5       0.3X
+HOUR of timestamp                                   766            770           5         13.1          76.6       0.5X
+MINUTE of timestamp                                 764            769           4         13.1          76.4       0.5X
+SECOND of timestamp                                 590            592           2         16.9          59.0       0.6X
+MILLISECONDS of timestamp                           627            636          10         16.0          62.7       0.6X
+MICROSECONDS of timestamp                           493            505          15         20.3          49.3       0.7X
+EPOCH of timestamp                                  962            966           4         10.4          96.2       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                        886            905          17         11.3          88.6       1.0X
+MILLENNIUM of date                                 1253           1261           7          8.0         125.3       0.7X
+CENTURY of date                                    1068           1079          10          9.4         106.8       0.8X
+DECADE of date                                     1040           1068          25          9.6         104.0       0.9X
+YEAR of date                                       1032           1043          11          9.7         103.2       0.9X
+ISOYEAR of date                                    1304           1313          12          7.7         130.4       0.7X
+QUARTER of date                                    1284           1301          16          7.8         128.4       0.7X
+MONTH of date                                      1033           1036           4          9.7         103.3       0.9X
+WEEK of date                                       1535           1545           8          6.5         153.5       0.6X
+DAY of date                                        1023           1033          11          9.8         102.3       0.9X
+DAYOFWEEK of date                                  1230           1236           6          8.1         123.0       0.7X
+DOW of date                                        1238           1247           9          8.1         123.8       0.7X
+ISODOW of date                                     1159           1169          17          8.6         115.9       0.8X
+DOY of date                                        1082           1084           3          9.2         108.2       0.8X
+HOUR of date                                       1879           1891          11          5.3         187.9       0.5X
+MINUTE of date                                     1881           1905          21          5.3         188.1       0.5X
+SECOND of date                                     1718           1724           5          5.8         171.8       0.5X
+MILLISECONDS of date                               1733           1737           6          5.8         173.3       0.5X
+MICROSECONDS of date                               1629           1644          23          6.1         162.9       0.5X
+EPOCH of date                                      2085           2090           5          4.8         208.5       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                        888            891           6         11.3          88.8       1.0X
+MILLENNIUM of date                                 1250           1260          17          8.0         125.0       0.7X
+CENTURY of date                                    1070           1076           6          9.3         107.0       0.8X
+DECADE of date                                     1036           1041           5          9.6         103.6       0.9X
+YEAR of date                                       1037           1038           1          9.6         103.7       0.9X
+ISOYEAR of date                                    1300           1307           9          7.7         130.0       0.7X
+QUARTER of date                                    1267           1277           9          7.9         126.7       0.7X
+MONTH of date                                      1034           1037           4          9.7         103.4       0.9X
+WEEK of date                                       1543           1554          10          6.5         154.3       0.6X
+DAY of date                                        1022           1030          12          9.8         102.2       0.9X
+DAYOFWEEK of date                                  1230           1232           4          8.1         123.0       0.7X
+DOW of date                                        1227           1242          15          8.1         122.7       0.7X
+ISODOW of date                                     1157           1173          20          8.6         115.7       0.8X
+DOY of date                                        1073           1083          18          9.3         107.3       0.8X
+HOUR of date                                       1873           1878           7          5.3         187.3       0.5X
+MINUTE of date                                     1861           1876          14          5.4         186.1       0.5X
+SECOND of date                                     1717           1724           6          5.8         171.7       0.5X
+MILLISECONDS of date                               1729           1736           7          5.8         172.9       0.5X
+MICROSECONDS of date                               1622           1627           5          6.2         162.2       0.5X
+EPOCH of date                                      2066           2079          19          4.8         206.6       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to interval                                   1273           1279           6          7.9         127.3       1.0X
+MILLENNIUM of interval                             1323           1352          36          7.6         132.3       1.0X
+CENTURY of interval                                1353           1356           4          7.4         135.3       0.9X
+DECADE of interval                                 1326           1339          11          7.5         132.6       1.0X
+YEAR of interval                                   1341           1345           3          7.5         134.1       0.9X
+QUARTER of interval                                1368           1372           4          7.3         136.8       0.9X
+MONTH of interval                                  1320           1326           6          7.6         132.0       1.0X
+DAY of interval                                    1306           1310           4          7.7         130.6       1.0X
+HOUR of interval                                   1341           1347           8          7.5         134.1       0.9X
+MINUTE of interval                                 1337           1349          11          7.5         133.7       1.0X
+SECOND of interval                                 1450           1451           1          6.9         145.0       0.9X
+MILLISECONDS of interval                           1476           1490          23          6.8         147.6       0.9X
+MICROSECONDS of interval                           1316           1331          25          7.6         131.6       1.0X
+EPOCH of interval                                  1461           1462           1          6.8         146.1       0.9X
+
diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt
index 7ee185e9adc59..317f107bd746c 100644
--- a/sql/core/benchmarks/ExtractBenchmark-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-results.txt
@@ -1,145 +1,119 @@
-================================================================================================
-Extract
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    407            432          36         24.6          40.7       1.0X
-cast to timestamp wholestage on                     348            396          80         28.7          34.8       1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MILLENNIUM of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MILLENNIUM of timestamp wholestage off             1407           1408           2          7.1         140.7       1.0X
-MILLENNIUM of timestamp wholestage on              1334           1380          81          7.5         133.4       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-CENTURY of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-CENTURY of timestamp wholestage off                1362           1364           3          7.3         136.2       1.0X
-CENTURY of timestamp wholestage on                 1334           1342           8          7.5         133.4       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DECADE of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DECADE of timestamp wholestage off                 1226           1229           4          8.2         122.6       1.0X
-DECADE of timestamp wholestage on                  1218           1225           8          8.2         121.8       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-YEAR of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-YEAR of timestamp wholestage off                   1207           1210           4          8.3         120.7       1.0X
-YEAR of timestamp wholestage on                    1201           1216          17          8.3         120.1       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ISOYEAR of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ISOYEAR of timestamp wholestage off                1442           1446           6          6.9         144.2       1.0X
-ISOYEAR of timestamp wholestage on                 1315           1336          18          7.6         131.5       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-QUARTER of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-QUARTER of timestamp wholestage off                1443           1454          16          6.9         144.3       1.0X
-QUARTER of timestamp wholestage on                 1429           1442           9          7.0         142.9       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MONTH of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MONTH of timestamp wholestage off                  1196           1200           5          8.4         119.6       1.0X
-MONTH of timestamp wholestage on                   1192           1204          10          8.4         119.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-WEEK of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-WEEK of timestamp wholestage off                   2103           2104           2          4.8         210.3       1.0X
-WEEK of timestamp wholestage on                    1798           1804           8          5.6         179.8       1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DAY of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DAY of timestamp wholestage off                    1211           1228          23          8.3         121.1       1.0X
-DAY of timestamp wholestage on                     1204           1212           6          8.3         120.4       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DAYOFWEEK of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DAYOFWEEK of timestamp wholestage off              1387           1389           3          7.2         138.7       1.0X
-DAYOFWEEK of timestamp wholestage on               1353           1360           8          7.4         135.3       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DOW of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DOW of timestamp wholestage off                    1373           1373           0          7.3         137.3       1.0X
-DOW of timestamp wholestage on                     1361           1372          15          7.3         136.1       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ISODOW of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ISODOW of timestamp wholestage off                 1311           1366          77          7.6         131.1       1.0X
-ISODOW of timestamp wholestage on                  1307           1314           6          7.7         130.7       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DOY of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DOY of timestamp wholestage off                    1241           1243           2          8.1         124.1       1.0X
-DOY of timestamp wholestage on                     1229           1239           9          8.1         122.9       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-HOUR of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-HOUR of timestamp wholestage off                    353            358           8         28.3          35.3       1.0X
-HOUR of timestamp wholestage on                     358            365           5         27.9          35.8       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MINUTE of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MINUTE of timestamp wholestage off                  353            354           2         28.3          35.3       1.0X
-MINUTE of timestamp wholestage on                   362            368           9         27.6          36.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SECOND of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-SECOND of timestamp wholestage off                  341            350          13         29.3          34.1       1.0X
-SECOND of timestamp wholestage on                   362            368           7         27.6          36.2       0.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MILLISECONDS of timestamp:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MILLISECONDS of timestamp wholestage off          36785          36808          32          0.3        3678.5       1.0X
-MILLISECONDS of timestamp wholestage on           36644          36760          72          0.3        3664.4       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MICROSECONDS of timestamp:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MICROSECONDS of timestamp wholestage off            446            447           0         22.4          44.6       1.0X
-MICROSECONDS of timestamp wholestage on             458            463           4         21.8          45.8       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-EPOCH of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-EPOCH of timestamp wholestage off                 29807          29811           5          0.3        2980.7       1.0X
-EPOCH of timestamp wholestage on                  29843          29930          64          0.3        2984.3       1.0X
-
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   409            457          45         24.5          40.9       1.0X
+MILLENNIUM of timestamp                            1240           1295          57          8.1         124.0       0.3X
+CENTURY of timestamp                               1186           1240          49          8.4         118.6       0.3X
+DECADE of timestamp                                1083           1104          20          9.2         108.3       0.4X
+YEAR of timestamp                                  1061           1073          15          9.4         106.1       0.4X
+ISOYEAR of timestamp                               1198           1213          25          8.3         119.8       0.3X
+QUARTER of timestamp                               1304           1322          23          7.7         130.4       0.3X
+MONTH of timestamp                                 1052           1067          19          9.5         105.2       0.4X
+WEEK of timestamp                                  1534           1558          25          6.5         153.4       0.3X
+DAY of timestamp                                   1038           1057          26          9.6         103.8       0.4X
+DAYOFWEEK of timestamp                             1226           1239          22          8.2         122.6       0.3X
+DOW of timestamp                                   1212           1224          13          8.3         121.2       0.3X
+ISODOW of timestamp                                1148           1165          24          8.7         114.8       0.4X
+DOY of timestamp                                   1066           1075          14          9.4         106.6       0.4X
+HOUR of timestamp                                   358            362           6         27.9          35.8       1.1X
+MINUTE of timestamp                                 364            369           4         27.4          36.4       1.1X
+SECOND of timestamp                                 453            471          26         22.1          45.3       0.9X
+MILLISECONDS of timestamp                           497            500           2         20.1          49.7       0.8X
+MICROSECONDS of timestamp                           360            363           4         27.8          36.0       1.1X
+EPOCH of timestamp                                 1100           1104           5          9.1         110.0       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   315            317           2         31.8          31.5       1.0X
+MILLENNIUM of timestamp                            1155           1162           8          8.7         115.5       0.3X
+CENTURY of timestamp                               1146           1152           5          8.7         114.6       0.3X
+DECADE of timestamp                                1031           1043          11          9.7         103.1       0.3X
+YEAR of timestamp                                  1033           1041          10          9.7         103.3       0.3X
+ISOYEAR of timestamp                               1274           1278           5          7.8         127.4       0.2X
+QUARTER of timestamp                               1326           1346          30          7.5         132.6       0.2X
+MONTH of timestamp                                 1027           1031           7          9.7         102.7       0.3X
+WEEK of timestamp                                  1529           1535           6          6.5         152.9       0.2X
+DAY of timestamp                                   1024           1031           9          9.8         102.4       0.3X
+DAYOFWEEK of timestamp                             1197           1201           5          8.4         119.7       0.3X
+DOW of timestamp                                   1201           1218          15          8.3         120.1       0.3X
+ISODOW of timestamp                                1143           1149           6          8.8         114.3       0.3X
+DOY of timestamp                                   1074           1085           9          9.3         107.4       0.3X
+HOUR of timestamp                                   354            354           0         28.3          35.4       0.9X
+MINUTE of timestamp                                 358            361           5         27.9          35.8       0.9X
+SECOND of timestamp                                 445            456          17         22.5          44.5       0.7X
+MILLISECONDS of timestamp                           504            514          12         19.8          50.4       0.6X
+MICROSECONDS of timestamp                           361            370           8         27.7          36.1       0.9X
+EPOCH of timestamp                                 1111           1117           9          9.0         111.1       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                        849            851           3         11.8          84.9       1.0X
+MILLENNIUM of date                                 1129           1139          11          8.9         112.9       0.8X
+CENTURY of date                                    1136           1143           7          8.8         113.6       0.7X
+DECADE of date                                     1039           1043           5          9.6         103.9       0.8X
+YEAR of date                                       1030           1037          10          9.7         103.0       0.8X
+ISOYEAR of date                                    1269           1278           9          7.9         126.9       0.7X
+QUARTER of date                                    1323           1330           6          7.6         132.3       0.6X
+MONTH of date                                      1021           1023           2          9.8         102.1       0.8X
+WEEK of date                                       1541           1549           8          6.5         154.1       0.6X
+DAY of date                                        1021           1033          13          9.8         102.1       0.8X
+DAYOFWEEK of date                                  1196           1209          11          8.4         119.6       0.7X
+DOW of date                                        1214           1229          13          8.2         121.4       0.7X
+ISODOW of date                                     1148           1153           7          8.7         114.8       0.7X
+DOY of date                                        1073           1079           5          9.3         107.3       0.8X
+HOUR of date                                       1311           1314           4          7.6         131.1       0.6X
+MINUTE of date                                     1311           1311           1          7.6         131.1       0.6X
+SECOND of date                                     1420           1434          13          7.0         142.0       0.6X
+MILLISECONDS of date                               1426           1442          14          7.0         142.6       0.6X
+MICROSECONDS of date                               1312           1318           6          7.6         131.2       0.6X
+EPOCH of date                                      2034           2050          16          4.9         203.4       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                        852            879          42         11.7          85.2       1.0X
+MILLENNIUM of date                                 1131           1136           7          8.8         113.1       0.8X
+CENTURY of date                                    1138           1145           6          8.8         113.8       0.7X
+DECADE of date                                     1030           1043          13          9.7         103.0       0.8X
+YEAR of date                                       1022           1028           8          9.8         102.2       0.8X
+ISOYEAR of date                                    1260           1265           6          7.9         126.0       0.7X
+QUARTER of date                                    1326           1330           7          7.5         132.6       0.6X
+MONTH of date                                      1014           1034          26          9.9         101.4       0.8X
+WEEK of date                                       1523           1526           5          6.6         152.3       0.6X
+DAY of date                                        1022           1023           2          9.8         102.2       0.8X
+DAYOFWEEK of date                                  1197           1203           9          8.4         119.7       0.7X
+DOW of date                                        1188           1198          16          8.4         118.8       0.7X
+ISODOW of date                                     1143           1153           9          8.8         114.3       0.7X
+DOY of date                                        1052           1058           7          9.5         105.2       0.8X
+HOUR of date                                       1309           1311           4          7.6         130.9       0.7X
+MINUTE of date                                     1302           1305           6          7.7         130.2       0.7X
+SECOND of date                                     1414           1432          16          7.1         141.4       0.6X
+MILLISECONDS of date                               1441           1450          11          6.9         144.1       0.6X
+MICROSECONDS of date                               1292           1301           8          7.7         129.2       0.7X
+EPOCH of date                                      2030           2036           8          4.9         203.0       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to interval                                   1249           1254           6          8.0         124.9       1.0X
+MILLENNIUM of interval                             1310           1316           9          7.6         131.0       1.0X
+CENTURY of interval                                1304           1315          10          7.7         130.4       1.0X
+DECADE of interval                                 1306           1313           7          7.7         130.6       1.0X
+YEAR of interval                                   1304           1313          11          7.7         130.4       1.0X
+QUARTER of interval                                1310           1317           7          7.6         131.0       1.0X
+MONTH of interval                                  1311           1319          12          7.6         131.1       1.0X
+DAY of interval                                    1295           1304          13          7.7         129.5       1.0X
+HOUR of interval                                   1301           1306           8          7.7         130.1       1.0X
+MINUTE of interval                                 1316           1319           3          7.6         131.6       0.9X
+SECOND of interval                                 1437           1440           3          7.0         143.7       0.9X
+MILLISECONDS of interval                           1435           1449          16          7.0         143.5       0.9X
+MICROSECONDS of interval                           1304           1314           9          7.7         130.4       1.0X
+EPOCH of interval                                  1440           1453          19          6.9         144.0       0.9X
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-jdk11-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..d292607e2cbcb
--- /dev/null
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-jdk11-results.txt
@@ -0,0 +1,670 @@
+================================================================================================
+Pushdown for many distinct value case
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11943          12023          69          1.3         759.3       1.0X
+Parquet Vectorized (Pushdown)                       880            934          44         17.9          55.9      13.6X
+Native ORC Vectorized                              7847           7896          41          2.0         498.9       1.5X
+Native ORC Vectorized (Pushdown)                    507            525          22         31.0          32.3      23.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12020          12040          13          1.3         764.2       1.0X
+Parquet Vectorized (Pushdown)                       819            840          24         19.2          52.0      14.7X
+Native ORC Vectorized                              8045           8062          14          2.0         511.5       1.5X
+Native ORC Vectorized (Pushdown)                    498            535          55         31.6          31.7      24.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12006          12028          21          1.3         763.3       1.0X
+Parquet Vectorized (Pushdown)                       772            800          25         20.4          49.1      15.6X
+Native ORC Vectorized                              8074           8091          15          1.9         513.3       1.5X
+Native ORC Vectorized (Pushdown)                    467            481          10         33.7          29.7      25.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11995          12021          30          1.3         762.6       1.0X
+Parquet Vectorized (Pushdown)                       780            792          10         20.2          49.6      15.4X
+Native ORC Vectorized                              8046           8066          12          2.0         511.6       1.5X
+Native ORC Vectorized (Pushdown)                    476            489          13         33.0          30.3      25.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11967          11989          12          1.3         760.9       1.0X
+Parquet Vectorized (Pushdown)                       775            788          11         20.3          49.3      15.4X
+Native ORC Vectorized                              8028           8046          25          2.0         510.4       1.5X
+Native ORC Vectorized (Pushdown)                    461            493          29         34.1          29.3      26.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                21435          21513          46          0.7        1362.8       1.0X
+Parquet Vectorized (Pushdown)                     21710          21742          33          0.7        1380.3       1.0X
+Native ORC Vectorized                             19324          19373          35          0.8        1228.6       1.1X
+Native ORC Vectorized (Pushdown)                  19535          19572          40          0.8        1242.0       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11008          11034          21          1.4         699.9       1.0X
+Parquet Vectorized (Pushdown)                       762            767           6         20.6          48.4      14.4X
+Native ORC Vectorized                              7156           7186          58          2.2         454.9       1.5X
+Native ORC Vectorized (Pushdown)                    436            447           9         36.1          27.7      25.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10987          11033          38          1.4         698.5       1.0X
+Parquet Vectorized (Pushdown)                       766            776          11         20.5          48.7      14.4X
+Native ORC Vectorized                              7160           7192          35          2.2         455.2       1.5X
+Native ORC Vectorized (Pushdown)                    440            454          26         35.8          27.9      25.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11066          11082          19          1.4         703.6       1.0X
+Parquet Vectorized (Pushdown)                       749            770          15         21.0          47.6      14.8X
+Native ORC Vectorized                              7248           7294          57          2.2         460.8       1.5X
+Native ORC Vectorized (Pushdown)                    437            450          16         36.0          27.8      25.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11066          11091          32          1.4         703.5       1.0X
+Parquet Vectorized (Pushdown)                       754            776          28         20.9          47.9      14.7X
+Native ORC Vectorized                              7257           7265           6          2.2         461.4       1.5X
+Native ORC Vectorized (Pushdown)                    449            457           7         35.0          28.6      24.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11067          11079          14          1.4         703.6       1.0X
+Parquet Vectorized (Pushdown)                       753            762           8         20.9          47.9      14.7X
+Native ORC Vectorized                              7251           7267          22          2.2         461.0       1.5X
+Native ORC Vectorized (Pushdown)                    427            435           6         36.8          27.2      25.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11055          11086          18          1.4         702.8       1.0X
+Parquet Vectorized (Pushdown)                       748            753           6         21.0          47.5      14.8X
+Native ORC Vectorized                              7227           7253          27          2.2         459.5       1.5X
+Native ORC Vectorized (Pushdown)                    429            435           5         36.7          27.3      25.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12060          12091          21          1.3         766.7       1.0X
+Parquet Vectorized (Pushdown)                      2799           2821          17          5.6         178.0       4.3X
+Native ORC Vectorized                              8334           8391          55          1.9         529.9       1.4X
+Native ORC Vectorized (Pushdown)                   2197           2209          12          7.2         139.7       5.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                15593          15619          22          1.0         991.3       1.0X
+Parquet Vectorized (Pushdown)                     10573          10584          10          1.5         672.2       1.5X
+Native ORC Vectorized                             12276          12290          16          1.3         780.5       1.3X
+Native ORC Vectorized (Pushdown)                   8931           8958          31          1.8         567.8       1.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                19094          19120          23          0.8        1214.0       1.0X
+Parquet Vectorized (Pushdown)                     18327          18342          14          0.9        1165.2       1.0X
+Native ORC Vectorized                             15890          15943          44          1.0        1010.2       1.2X
+Native ORC Vectorized (Pushdown)                  15414          15442          24          1.0         980.0       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                20010          20037          21          0.8        1272.2       1.0X
+Parquet Vectorized (Pushdown)                     20297          20318          21          0.8        1290.4       1.0X
+Native ORC Vectorized                             16994          17029          39          0.9        1080.5       1.2X
+Native ORC Vectorized (Pushdown)                  17180          17224          53          0.9        1092.3       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                19988          20061          96          0.8        1270.8       1.0X
+Parquet Vectorized (Pushdown)                     20343          20389          42          0.8        1293.4       1.0X
+Native ORC Vectorized                             16932          16958          22          0.9        1076.5       1.2X
+Native ORC Vectorized (Pushdown)                  17308          17351          31          0.9        1100.4       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                20167          20210          36          0.8        1282.2       1.0X
+Parquet Vectorized (Pushdown)                     20508          20543          44          0.8        1303.9       1.0X
+Native ORC Vectorized                             17038          17070          32          0.9        1083.3       1.2X
+Native ORC Vectorized (Pushdown)                  17250          17663         538          0.9        1096.7       1.2X
+
+
+================================================================================================
+Pushdown for few distinct value case (use dictionary encoding)
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10641          10674          57          1.5         676.6       1.0X
+Parquet Vectorized (Pushdown)                       661            665           7         23.8          42.0      16.1X
+Native ORC Vectorized                              9194           9227          33          1.7         584.5       1.2X
+Native ORC Vectorized (Pushdown)                    798            808          18         19.7          50.7      13.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10785          10810          20          1.5         685.7       1.0X
+Parquet Vectorized (Pushdown)                       656            666          10         24.0          41.7      16.5X
+Native ORC Vectorized                              9435           9457          16          1.7         599.9       1.1X
+Native ORC Vectorized (Pushdown)                    796            822          34         19.8          50.6      13.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10639          10664          26          1.5         676.4       1.0X
+Parquet Vectorized (Pushdown)                       734            742           5         21.4          46.7      14.5X
+Native ORC Vectorized                              9346           9362          17          1.7         594.2       1.1X
+Native ORC Vectorized (Pushdown)                    863            869           5         18.2          54.9      12.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10617          10672          59          1.5         675.0       1.0X
+Parquet Vectorized (Pushdown)                       736            747           8         21.4          46.8      14.4X
+Native ORC Vectorized                              9345           9445         125          1.7         594.1       1.1X
+Native ORC Vectorized (Pushdown)                    868            877           7         18.1          55.2      12.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10750          10790          35          1.5         683.5       1.0X
+Parquet Vectorized (Pushdown)                       745            748           6         21.1          47.4      14.4X
+Native ORC Vectorized                              9471           9488          16          1.7         602.1       1.1X
+Native ORC Vectorized (Pushdown)                    857            866           6         18.3          54.5      12.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                21173          21253          91          0.7        1346.1       1.0X
+Parquet Vectorized (Pushdown)                     21369          21456          75          0.7        1358.6       1.0X
+Native ORC Vectorized                             20282          20397          83          0.8        1289.5       1.0X
+Native ORC Vectorized (Pushdown)                  20704          20768          48          0.8        1316.3       1.0X
+
+
+================================================================================================
+Pushdown benchmark for StringStartsWith
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12426          12474          59          1.3         790.0       1.0X
+Parquet Vectorized (Pushdown)                      1847           1855           8          8.5         117.4       6.7X
+Native ORC Vectorized                              8336           8391          68          1.9         530.0       1.5X
+Native ORC Vectorized (Pushdown)                   8514           8536          14          1.8         541.3       1.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12143          12167          23          1.3         772.1       1.0X
+Parquet Vectorized (Pushdown)                       751            758           9         21.0          47.7      16.2X
+Native ORC Vectorized                              8064           8069           8          2.0         512.7       1.5X
+Native ORC Vectorized (Pushdown)                   8226           8254          35          1.9         523.0       1.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12123          12142          13          1.3         770.8       1.0X
+Parquet Vectorized (Pushdown)                       739            743           4         21.3          47.0      16.4X
+Native ORC Vectorized                              8038           8052          10          2.0         511.0       1.5X
+Native ORC Vectorized (Pushdown)                   8211           8227          12          1.9         522.0       1.5X
+
+
+================================================================================================
+Pushdown benchmark for decimal
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 5889           5912          23          2.7         374.4       1.0X
+Parquet Vectorized (Pushdown)                       186            193           7         84.6          11.8      31.7X
+Native ORC Vectorized                              5272           5291          18          3.0         335.2       1.1X
+Native ORC Vectorized (Pushdown)                    156            183          36        100.9           9.9      37.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 7738           7804         111          2.0         492.0       1.0X
+Parquet Vectorized (Pushdown)                      3172           3188          27          5.0         201.6       2.4X
+Native ORC Vectorized                              7522           7528           4          2.1         478.2       1.0X
+Native ORC Vectorized (Pushdown)                   3390           3433          35          4.6         215.5       2.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                13990          14000           9          1.1         889.4       1.0X
+Parquet Vectorized (Pushdown)                     13251          13266          21          1.2         842.5       1.1X
+Native ORC Vectorized                             14799          14817          14          1.1         940.9       0.9X
+Native ORC Vectorized (Pushdown)                  14149          14195          64          1.1         899.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                15114          15165          44          1.0         960.9       1.0X
+Parquet Vectorized (Pushdown)                     15182          15222          23          1.0         965.2       1.0X
+Native ORC Vectorized                             16653          16683          19          0.9        1058.8       0.9X
+Native ORC Vectorized (Pushdown)                  16730          16743           9          0.9        1063.7       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 6112           6127          17          2.6         388.6       1.0X
+Parquet Vectorized (Pushdown)                       186            190           3         84.7          11.8      32.9X
+Native ORC Vectorized                              5292           5359          74          3.0         336.4       1.2X
+Native ORC Vectorized (Pushdown)                    152            159          14        103.3           9.7      40.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 7119           7158          57          2.2         452.6       1.0X
+Parquet Vectorized (Pushdown)                      1765           1784          13          8.9         112.2       4.0X
+Native ORC Vectorized                              6389           6398          11          2.5         406.2       1.1X
+Native ORC Vectorized (Pushdown)                   1732           1743          13          9.1         110.1       4.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10996          11025          19          1.4         699.1       1.0X
+Parquet Vectorized (Pushdown)                      8047           8074          16          2.0         511.6       1.4X
+Native ORC Vectorized                             10620          10642          23          1.5         675.2       1.0X
+Native ORC Vectorized (Pushdown)                   8086           8102          14          1.9         514.1       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                14800          14834          28          1.1         941.0       1.0X
+Parquet Vectorized (Pushdown)                     14261          14268           7          1.1         906.7       1.0X
+Native ORC Vectorized                             14832          14880          44          1.1         943.0       1.0X
+Native ORC Vectorized (Pushdown)                  14402          14428          28          1.1         915.7       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8132           8178          36          1.9         517.0       1.0X
+Parquet Vectorized (Pushdown)                       201            209          10         78.3          12.8      40.5X
+Native ORC Vectorized                              5270           5290          16          3.0         335.0       1.5X
+Native ORC Vectorized (Pushdown)                    149            153           2        105.2           9.5      54.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9422           9465          50          1.7         599.1       1.0X
+Parquet Vectorized (Pushdown)                      2271           2283          13          6.9         144.4       4.1X
+Native ORC Vectorized                              6651           6662          10          2.4         422.9       1.4X
+Native ORC Vectorized (Pushdown)                   2002           2016          17          7.9         127.3       4.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                14781          14814          37          1.1         939.7       1.0X
+Parquet Vectorized (Pushdown)                     10675          10696          24          1.5         678.7       1.4X
+Native ORC Vectorized                             11887          11983         101          1.3         755.8       1.2X
+Native ORC Vectorized (Pushdown)                   9383           9396          11          1.7         596.5       1.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                19753          19777          28          0.8        1255.9       1.0X
+Parquet Vectorized (Pushdown)                     19049          19089          49          0.8        1211.1       1.0X
+Native ORC Vectorized                             17238          17247           7          0.9        1096.0       1.1X
+Native ORC Vectorized (Pushdown)                  16780          16804          21          0.9        1066.9       1.2X
+
+
+================================================================================================
+Pushdown benchmark for InSet -> InFilters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11101          11144          74          1.4         705.8       1.0X
+Parquet Vectorized (Pushdown)                       773            776           3         20.3          49.1      14.4X
+Native ORC Vectorized                              6973           7001          28          2.3         443.3       1.6X
+Native ORC Vectorized (Pushdown)                    445            451           5         35.4          28.3      25.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11098          11111          13          1.4         705.6       1.0X
+Parquet Vectorized (Pushdown)                       775            778           4         20.3          49.2      14.3X
+Native ORC Vectorized                              6980           6996           9          2.3         443.8       1.6X
+Native ORC Vectorized (Pushdown)                    441            448           8         35.7          28.0      25.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11089          11118          26          1.4         705.0       1.0X
+Parquet Vectorized (Pushdown)                       772            777           3         20.4          49.1      14.4X
+Native ORC Vectorized                              6973           6985           9          2.3         443.3       1.6X
+Native ORC Vectorized (Pushdown)                    446            448           3         35.3          28.3      24.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11113          11137          19          1.4         706.5       1.0X
+Parquet Vectorized (Pushdown)                       802            807           5         19.6          51.0      13.9X
+Native ORC Vectorized                              7034           7080          63          2.2         447.2       1.6X
+Native ORC Vectorized (Pushdown)                    461            469           5         34.1          29.3      24.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11122          11134          16          1.4         707.1       1.0X
+Parquet Vectorized (Pushdown)                       804            817          16         19.6          51.1      13.8X
+Native ORC Vectorized                              7027           7037          10          2.2         446.8       1.6X
+Native ORC Vectorized (Pushdown)                    463            470           5         34.0          29.4      24.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11097          11115          13          1.4         705.5       1.0X
+Parquet Vectorized (Pushdown)                       804            810           6         19.6          51.1      13.8X
+Native ORC Vectorized                              7010           7022          15          2.2         445.7       1.6X
+Native ORC Vectorized (Pushdown)                    465            468           4         33.8          29.6      23.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11378          11387           7          1.4         723.4       1.0X
+Parquet Vectorized (Pushdown)                     11640          11675          37          1.4         740.1       1.0X
+Native ORC Vectorized                              7266           7284          24          2.2         462.0       1.6X
+Native ORC Vectorized (Pushdown)                    612            624          12         25.7          38.9      18.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11411          11448          49          1.4         725.5       1.0X
+Parquet Vectorized (Pushdown)                     11670          11692          20          1.3         741.9       1.0X
+Native ORC Vectorized                              7284           7292          10          2.2         463.1       1.6X
+Native ORC Vectorized (Pushdown)                    652            659           8         24.1          41.4      17.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11399          11447          71          1.4         724.7       1.0X
+Parquet Vectorized (Pushdown)                     11659          11684          15          1.3         741.3       1.0X
+Native ORC Vectorized                              7290           7301          10          2.2         463.5       1.6X
+Native ORC Vectorized (Pushdown)                    655            700          77         24.0          41.7      17.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11398          11424          23          1.4         724.7       1.0X
+Parquet Vectorized (Pushdown)                     11660          11775         100          1.3         741.3       1.0X
+Native ORC Vectorized                              7292           7312          24          2.2         463.6       1.6X
+Native ORC Vectorized (Pushdown)                    789            796           5         19.9          50.2      14.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11399          11462          55          1.4         724.7       1.0X
+Parquet Vectorized (Pushdown)                     11648          11670          24          1.4         740.6       1.0X
+Native ORC Vectorized                              7254           7265           8          2.2         461.2       1.6X
+Native ORC Vectorized (Pushdown)                    851            857           5         18.5          54.1      13.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11383          11499          97          1.4         723.7       1.0X
+Parquet Vectorized (Pushdown)                     11694          11731          22          1.3         743.5       1.0X
+Native ORC Vectorized                              7244           7272          22          2.2         460.6       1.6X
+Native ORC Vectorized (Pushdown)                    887            896          12         17.7          56.4      12.8X
+
+
+================================================================================================
+Pushdown benchmark for tinyint
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 6301           6337          24          2.5         400.6       1.0X
+Parquet Vectorized (Pushdown)                       254            260           4         61.9          16.2      24.8X
+Native ORC Vectorized                              3257           3273          12          4.8         207.1       1.9X
+Native ORC Vectorized (Pushdown)                    211            217           4         74.4          13.4      29.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 7169           7219          46          2.2         455.8       1.0X
+Parquet Vectorized (Pushdown)                      1665           1672          12          9.4         105.9       4.3X
+Native ORC Vectorized                              4199           4208          12          3.7         267.0       1.7X
+Native ORC Vectorized (Pushdown)                   1376           1394          30         11.4          87.5       5.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10703          10739          29          1.5         680.5       1.0X
+Parquet Vectorized (Pushdown)                      7648           7671          20          2.1         486.3       1.4X
+Native ORC Vectorized                              7815           7836          14          2.0         496.9       1.4X
+Native ORC Vectorized (Pushdown)                   6289           6295           9          2.5         399.8       1.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                14239          14284          35          1.1         905.3       1.0X
+Parquet Vectorized (Pushdown)                     13733          13769          28          1.1         873.1       1.0X
+Native ORC Vectorized                             11432          11481          32          1.4         726.8       1.2X
+Native ORC Vectorized (Pushdown)                  11241          11254          11          1.4         714.7       1.3X
+
+
+================================================================================================
+Pushdown benchmark for Timestamp
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 timestamp stored as INT96 row (value = CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 6805           6823          17          2.3         432.7       1.0X
+Parquet Vectorized (Pushdown)                      6876           6892          11          2.3         437.2       1.0X
+Native ORC Vectorized                              3186           3208          33          4.9         202.5       2.1X
+Native ORC Vectorized (Pushdown)                    131            140           9        120.4           8.3      52.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% timestamp stored as INT96 rows (value < CAST(1572864 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 7742           7765          21          2.0         492.2       1.0X
+Parquet Vectorized (Pushdown)                      7798           7853          70          2.0         495.8       1.0X
+Native ORC Vectorized                              4128           4138           8          3.8         262.4       1.9X
+Native ORC Vectorized (Pushdown)                   1392           1402          17         11.3          88.5       5.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% timestamp stored as INT96 rows (value < CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11354          11380          33          1.4         721.9       1.0X
+Parquet Vectorized (Pushdown)                     11408          11449          36          1.4         725.3       1.0X
+Native ORC Vectorized                              7752           7780          26          2.0         492.8       1.5X
+Native ORC Vectorized (Pushdown)                   6233           6246          12          2.5         396.3       1.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% timestamp stored as INT96 rows (value < CAST(14155776 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                14944          14997          49          1.1         950.1       1.0X
+Parquet Vectorized (Pushdown)                     15066          15120          95          1.0         957.9       1.0X
+Native ORC Vectorized                             11422          11487          45          1.4         726.2       1.3X
+Native ORC Vectorized (Pushdown)                  11169          11211          30          1.4         710.1       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 timestamp stored as TIMESTAMP_MICROS row (value = CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 6047           6083          28          2.6         384.5       1.0X
+Parquet Vectorized (Pushdown)                       188            192           3         83.5          12.0      32.1X
+Native ORC Vectorized                              3169           3180          11          5.0         201.5       1.9X
+Native ORC Vectorized (Pushdown)                    127            138          15        124.0           8.1      47.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(1572864 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 6967           7009          26          2.3         443.0       1.0X
+Parquet Vectorized (Pushdown)                      1705           1711           7          9.2         108.4       4.1X
+Native ORC Vectorized                              4092           4115          18          3.8         260.2       1.7X
+Native ORC Vectorized (Pushdown)                   1389           1394           9         11.3          88.3       5.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10658          10695          23          1.5         677.6       1.0X
+Parquet Vectorized (Pushdown)                      7760           7792          35          2.0         493.4       1.4X
+Native ORC Vectorized                              7780           7802          24          2.0         494.6       1.4X
+Native ORC Vectorized (Pushdown)                   6241           6279          24          2.5         396.8       1.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(14155776 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                14209          14253          53          1.1         903.4       1.0X
+Parquet Vectorized (Pushdown)                     13742          13764          15          1.1         873.7       1.0X
+Native ORC Vectorized                             11437          11463          18          1.4         727.1       1.2X
+Native ORC Vectorized (Pushdown)                  11172          11218          43          1.4         710.3       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 6124           6173          46          2.6         389.4       1.0X
+Parquet Vectorized (Pushdown)                       187            189           3         84.2          11.9      32.8X
+Native ORC Vectorized                              3161           3174          13          5.0         200.9       1.9X
+Native ORC Vectorized (Pushdown)                    129            144          24        122.4           8.2      47.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(1572864 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 7068           7094          23          2.2         449.4       1.0X
+Parquet Vectorized (Pushdown)                      1724           1740          19          9.1         109.6       4.1X
+Native ORC Vectorized                              4127           4137          12          3.8         262.4       1.7X
+Native ORC Vectorized (Pushdown)                   1385           1393          10         11.4          88.1       5.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                10767          10786          21          1.5         684.5       1.0X
+Parquet Vectorized (Pushdown)                      7805           7820          13          2.0         496.2       1.4X
+Native ORC Vectorized                              7714           7775          64          2.0         490.4       1.4X
+Native ORC Vectorized (Pushdown)                   6247           6266          17          2.5         397.2       1.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(14155776 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                14337          14392          34          1.1         911.5       1.0X
+Parquet Vectorized (Pushdown)                     13774          13791          13          1.1         875.7       1.0X
+Native ORC Vectorized                             11426          11448          16          1.4         726.4       1.3X
+Native ORC Vectorized (Pushdown)                  11149          11181          27          1.4         708.8       1.3X
+
+
+================================================================================================
+Pushdown benchmark with many filters
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                  509            514           6          0.0   509244147.0       1.0X
+Parquet Vectorized (Pushdown)                       512            519           6          0.0   511810881.0       1.0X
+Native ORC Vectorized                               488            495           8          0.0   488404846.0       1.0X
+Native ORC Vectorized (Pushdown)                    489            500          16          0.0   489064429.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 1704           1712           7          0.0  1703741457.0       1.0X
+Parquet Vectorized (Pushdown)                      1782           1794           7          0.0  1781727704.0       1.0X
+Native ORC Vectorized                              1668           1674           5          0.0  1668306343.0       1.0X
+Native ORC Vectorized (Pushdown)                   1670           1679           7          0.0  1669592413.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 5011           5044          25          0.0  5011180244.0       1.0X
+Parquet Vectorized (Pushdown)                      5375           5397          17          0.0  5375351637.0       0.9X
+Native ORC Vectorized                              4979           4992          12          0.0  4979326800.0       1.0X
+Native ORC Vectorized (Pushdown)                   4979           5008          26          0.0  4978952429.0       1.0X
+
+
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
index e680ddff53dd1..1fda7bffc8e82 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
@@ -2,669 +2,669 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 0 string row (value IS NULL):     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          11405 / 11485          1.4         725.1       1.0X
-Parquet Vectorized (Pushdown)                  675 /  690         23.3          42.9      16.9X
-Native ORC Vectorized                         7127 / 7170          2.2         453.1       1.6X
-Native ORC Vectorized (Pushdown)               519 /  541         30.3          33.0      22.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 0 string row ('7864320' < value < '7864320'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          11457 / 11473          1.4         728.4       1.0X
-Parquet Vectorized (Pushdown)                  656 /  686         24.0          41.7      17.5X
-Native ORC Vectorized                         7328 / 7342          2.1         465.9       1.6X
-Native ORC Vectorized (Pushdown)               539 /  565         29.2          34.2      21.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 string row (value = '7864320'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          11878 / 11888          1.3         755.2       1.0X
-Parquet Vectorized (Pushdown)                  630 /  654         25.0          40.1      18.9X
-Native ORC Vectorized                         7342 / 7362          2.1         466.8       1.6X
-Native ORC Vectorized (Pushdown)               519 /  537         30.3          33.0      22.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 string row (value <=> '7864320'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          11423 / 11440          1.4         726.2       1.0X
-Parquet Vectorized (Pushdown)                  625 /  643         25.2          39.7      18.3X
-Native ORC Vectorized                         7315 / 7335          2.2         465.1       1.6X
-Native ORC Vectorized (Pushdown)               507 /  520         31.0          32.2      22.5X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 string row ('7864320' <= value <= '7864320'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          11440 / 11478          1.4         727.3       1.0X
-Parquet Vectorized (Pushdown)                  634 /  652         24.8          40.3      18.0X
-Native ORC Vectorized                         7311 / 7324          2.2         464.8       1.6X
-Native ORC Vectorized (Pushdown)               517 /  548         30.4          32.8      22.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select all string rows (value IS NOT NULL): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          20750 / 20872          0.8        1319.3       1.0X
-Parquet Vectorized (Pushdown)               21002 / 21032          0.7        1335.3       1.0X
-Native ORC Vectorized                       16714 / 16742          0.9        1062.6       1.2X
-Native ORC Vectorized (Pushdown)            16926 / 16965          0.9        1076.1       1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 0 int row (value IS NULL):        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10510 / 10532          1.5         668.2       1.0X
-Parquet Vectorized (Pushdown)                  642 /  665         24.5          40.8      16.4X
-Native ORC Vectorized                         6609 / 6618          2.4         420.2       1.6X
-Native ORC Vectorized (Pushdown)               502 /  512         31.4          31.9      21.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 0 int row (7864320 < value < 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10505 / 10514          1.5         667.9       1.0X
-Parquet Vectorized (Pushdown)                  659 /  673         23.9          41.9      15.9X
-Native ORC Vectorized                         6634 / 6641          2.4         421.8       1.6X
-Native ORC Vectorized (Pushdown)               513 /  526         30.7          32.6      20.5X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 int row (value = 7864320):      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10555 / 10570          1.5         671.1       1.0X
-Parquet Vectorized (Pushdown)                  651 /  668         24.2          41.4      16.2X
-Native ORC Vectorized                         6721 / 6728          2.3         427.3       1.6X
-Native ORC Vectorized (Pushdown)               508 /  519         31.0          32.3      20.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 int row (value <=> 7864320):    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10556 / 10566          1.5         671.1       1.0X
-Parquet Vectorized (Pushdown)                  647 /  654         24.3          41.1      16.3X
-Native ORC Vectorized                         6716 / 6728          2.3         427.0       1.6X
-Native ORC Vectorized (Pushdown)               510 /  521         30.9          32.4      20.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 int row (7864320 <= value <= 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10556 / 10565          1.5         671.1       1.0X
-Parquet Vectorized (Pushdown)                  649 /  654         24.2          41.3      16.3X
-Native ORC Vectorized                         6700 / 6712          2.3         426.0       1.6X
-Native ORC Vectorized (Pushdown)               509 /  520         30.9          32.3      20.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 int row (7864319 < value < 7864321): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10547 / 10566          1.5         670.5       1.0X
-Parquet Vectorized (Pushdown)                  649 /  653         24.2          41.3      16.3X
-Native ORC Vectorized                         6703 / 6713          2.3         426.2       1.6X
-Native ORC Vectorized (Pushdown)               510 /  520         30.8          32.5      20.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 10% int rows (value < 1572864):   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          11478 / 11525          1.4         729.7       1.0X
-Parquet Vectorized (Pushdown)                 2576 / 2587          6.1         163.8       4.5X
-Native ORC Vectorized                         7633 / 7657          2.1         485.3       1.5X
-Native ORC Vectorized (Pushdown)              2076 / 2096          7.6         132.0       5.5X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 50% int rows (value < 7864320):   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          14785 / 14802          1.1         940.0       1.0X
-Parquet Vectorized (Pushdown)                 9971 / 9977          1.6         633.9       1.5X
-Native ORC Vectorized                       11082 / 11107          1.4         704.6       1.3X
-Native ORC Vectorized (Pushdown)              8061 / 8073          2.0         512.5       1.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 90% int rows (value < 14155776):  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          18174 / 18214          0.9        1155.5       1.0X
-Parquet Vectorized (Pushdown)               17387 / 17403          0.9        1105.5       1.0X
-Native ORC Vectorized                       14465 / 14492          1.1         919.7       1.3X
-Native ORC Vectorized (Pushdown)            14024 / 14041          1.1         891.6       1.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select all int rows (value IS NOT NULL): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          19004 / 19014          0.8        1208.2       1.0X
-Parquet Vectorized (Pushdown)               19219 / 19232          0.8        1221.9       1.0X
-Native ORC Vectorized                       15266 / 15290          1.0         970.6       1.2X
-Native ORC Vectorized (Pushdown)            15469 / 15482          1.0         983.5       1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select all int rows (value > -1):        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          19036 / 19052          0.8        1210.3       1.0X
-Parquet Vectorized (Pushdown)               19287 / 19306          0.8        1226.2       1.0X
-Native ORC Vectorized                       15311 / 15371          1.0         973.5       1.2X
-Native ORC Vectorized (Pushdown)            15517 / 15590          1.0         986.5       1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select all int rows (value != -1):       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          19072 / 19102          0.8        1212.6       1.0X
-Parquet Vectorized (Pushdown)               19288 / 19318          0.8        1226.3       1.0X
-Native ORC Vectorized                       15277 / 15293          1.0         971.3       1.2X
-Native ORC Vectorized (Pushdown)            15479 / 15499          1.0         984.1       1.2X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9582           9636          49          1.6         609.2       1.0X
+Parquet Vectorized (Pushdown)                       759            785          40         20.7          48.3      12.6X
+Native ORC Vectorized                              7141           7183          60          2.2         454.0       1.3X
+Native ORC Vectorized (Pushdown)                    513            556          49         30.7          32.6      18.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9657           9675          22          1.6         614.0       1.0X
+Parquet Vectorized (Pushdown)                       707            724          26         22.2          44.9      13.7X
+Native ORC Vectorized                              7250           7296          49          2.2         460.9       1.3X
+Native ORC Vectorized (Pushdown)                    503            537          51         31.3          32.0      19.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9636           9657          20          1.6         612.7       1.0X
+Parquet Vectorized (Pushdown)                       679            693          13         23.2          43.2      14.2X
+Native ORC Vectorized                              7327           7359          25          2.1         465.8       1.3X
+Native ORC Vectorized (Pushdown)                    483            512          30         32.6          30.7      20.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9688           9710          29          1.6         615.9       1.0X
+Parquet Vectorized (Pushdown)                       674            692          14         23.3          42.8      14.4X
+Native ORC Vectorized                              7315           7343          22          2.2         465.1       1.3X
+Native ORC Vectorized (Pushdown)                    483            498          24         32.6          30.7      20.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9611           9638          21          1.6         611.1       1.0X
+Parquet Vectorized (Pushdown)                       670            676           4         23.5          42.6      14.3X
+Native ORC Vectorized                              7261           7313          31          2.2         461.6       1.3X
+Native ORC Vectorized (Pushdown)                    480            518          51         32.8          30.5      20.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                18597          18738          91          0.8        1182.4       1.0X
+Parquet Vectorized (Pushdown)                     18864          18915          30          0.8        1199.4       1.0X
+Native ORC Vectorized                             16865          16968          67          0.9        1072.3       1.1X
+Native ORC Vectorized (Pushdown)                  17056          17168         120          0.9        1084.4       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8923           8981          48          1.8         567.3       1.0X
+Parquet Vectorized (Pushdown)                       651            670          25         24.1          41.4      13.7X
+Native ORC Vectorized                              6477           6507          46          2.4         411.8       1.4X
+Native ORC Vectorized (Pushdown)                    453            470          26         34.7          28.8      19.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8979           8989          12          1.8         570.9       1.0X
+Parquet Vectorized (Pushdown)                       659            674          15         23.9          41.9      13.6X
+Native ORC Vectorized                              6502           6522          13          2.4         413.4       1.4X
+Native ORC Vectorized (Pushdown)                    460            479          22         34.2          29.2      19.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9046           9065          23          1.7         575.1       1.0X
+Parquet Vectorized (Pushdown)                       654            662           9         24.1          41.6      13.8X
+Native ORC Vectorized                              6592           6617          24          2.4         419.1       1.4X
+Native ORC Vectorized (Pushdown)                    450            468          20         34.9          28.6      20.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9036           9052          16          1.7         574.5       1.0X
+Parquet Vectorized (Pushdown)                       652            661           9         24.1          41.5      13.9X
+Native ORC Vectorized                              6566           6616          36          2.4         417.4       1.4X
+Native ORC Vectorized (Pushdown)                    456            493          47         34.5          29.0      19.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9024           9062          52          1.7         573.7       1.0X
+Parquet Vectorized (Pushdown)                       657            662           4         23.9          41.8      13.7X
+Native ORC Vectorized                              6575           6606          19          2.4         418.1       1.4X
+Native ORC Vectorized (Pushdown)                    454            464          15         34.6          28.9      19.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8992           9012          18          1.7         571.7       1.0X
+Parquet Vectorized (Pushdown)                       651            656           4         24.2          41.4      13.8X
+Native ORC Vectorized                              6615           6622           5          2.4         420.6       1.4X
+Native ORC Vectorized (Pushdown)                    455            467          22         34.5          29.0      19.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9928           9972          49          1.6         631.2       1.0X
+Parquet Vectorized (Pushdown)                      2416           2427          11          6.5         153.6       4.1X
+Native ORC Vectorized                              7620           7650          21          2.1         484.5       1.3X
+Native ORC Vectorized (Pushdown)                   2094           2161         119          7.5         133.1       4.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                13110          13154          46          1.2         833.5       1.0X
+Parquet Vectorized (Pushdown)                      9096           9130          34          1.7         578.3       1.4X
+Native ORC Vectorized                             11109          11139          21          1.4         706.3       1.2X
+Native ORC Vectorized (Pushdown)                   8100           8158          40          1.9         515.0       1.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                16372          16424          53          1.0        1040.9       1.0X
+Parquet Vectorized (Pushdown)                     15745          15794          51          1.0        1001.0       1.0X
+Native ORC Vectorized                             14642          14681          31          1.1         930.9       1.1X
+Native ORC Vectorized (Pushdown)                  14209          14239          19          1.1         903.4       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                17032          17078          45          0.9        1082.9       1.0X
+Parquet Vectorized (Pushdown)                     17495          17509          18          0.9        1112.3       1.0X
+Native ORC Vectorized                             15626          15720          61          1.0         993.5       1.1X
+Native ORC Vectorized (Pushdown)                  15871          15950          71          1.0        1009.1       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                17169          17200          20          0.9        1091.6       1.0X
+Parquet Vectorized (Pushdown)                     17450          17490          33          0.9        1109.4       1.0X
+Native ORC Vectorized                             16062          16134          81          1.0        1021.2       1.1X
+Native ORC Vectorized (Pushdown)                  16225          16281          48          1.0        1031.5       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                17091          17199         102          0.9        1086.6       1.0X
+Parquet Vectorized (Pushdown)                     17394          17480          60          0.9        1105.9       1.0X
+Native ORC Vectorized                             16322          16366          35          1.0        1037.7       1.0X
+Native ORC Vectorized (Pushdown)                  16459          16543          52          1.0        1046.4       1.0X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 0 distinct string row (value IS NULL): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10250 / 10274          1.5         651.7       1.0X
-Parquet Vectorized (Pushdown)                  571 /  576         27.5          36.3      17.9X
-Native ORC Vectorized                         8651 / 8660          1.8         550.0       1.2X
-Native ORC Vectorized (Pushdown)               909 /  933         17.3          57.8      11.3X
+Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8671           8722          42          1.8         551.3       1.0X
+Parquet Vectorized (Pushdown)                       567            576           9         27.7          36.1      15.3X
+Native ORC Vectorized                              8567           8600          48          1.8         544.7       1.0X
+Native ORC Vectorized (Pushdown)                    846            870          30         18.6          53.8      10.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 0 distinct string row ('100' < value < '100'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10420 / 10426          1.5         662.5       1.0X
-Parquet Vectorized (Pushdown)                  574 /  579         27.4          36.5      18.2X
-Native ORC Vectorized                         8973 / 8982          1.8         570.5       1.2X
-Native ORC Vectorized (Pushdown)               916 /  955         17.2          58.2      11.4X
+Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8852           8890          30          1.8         562.8       1.0X
+Parquet Vectorized (Pushdown)                       563            567           5         27.9          35.8      15.7X
+Native ORC Vectorized                              8816           8847          25          1.8         560.5       1.0X
+Native ORC Vectorized (Pushdown)                    838            869          41         18.8          53.3      10.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 distinct string row (value = '100'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10428 / 10441          1.5         663.0       1.0X
-Parquet Vectorized (Pushdown)                  789 /  809         19.9          50.2      13.2X
-Native ORC Vectorized                         9042 / 9055          1.7         574.9       1.2X
-Native ORC Vectorized (Pushdown)              1130 / 1145         13.9          71.8       9.2X
+Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8764           8799          24          1.8         557.2       1.0X
+Parquet Vectorized (Pushdown)                       632            637           6         24.9          40.2      13.9X
+Native ORC Vectorized                              8688           8707          24          1.8         552.4       1.0X
+Native ORC Vectorized (Pushdown)                    907           1073         259         17.3          57.7       9.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 distinct string row (value <=> '100'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10402 / 10416          1.5         661.3       1.0X
-Parquet Vectorized (Pushdown)                  791 /  806         19.9          50.3      13.2X
-Native ORC Vectorized                         9042 / 9055          1.7         574.9       1.2X
-Native ORC Vectorized (Pushdown)              1112 / 1145         14.1          70.7       9.4X
+Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8789           8802          11          1.8         558.8       1.0X
+Parquet Vectorized (Pushdown)                       638            646          13         24.6          40.6      13.8X
+Native ORC Vectorized                              8707           8728          16          1.8         553.6       1.0X
+Native ORC Vectorized (Pushdown)                    909            931          26         17.3          57.8       9.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 distinct string row ('100' <= value <= '100'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10548 / 10563          1.5         670.6       1.0X
-Parquet Vectorized (Pushdown)                  790 /  796         19.9          50.2      13.4X
-Native ORC Vectorized                         9144 / 9153          1.7         581.3       1.2X
-Native ORC Vectorized (Pushdown)              1117 / 1148         14.1          71.0       9.4X
+Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8888           8901          11          1.8         565.1       1.0X
+Parquet Vectorized (Pushdown)                       632            639           5         24.9          40.2      14.1X
+Native ORC Vectorized                              8835           9125         424          1.8         561.7       1.0X
+Native ORC Vectorized (Pushdown)                    909            951          53         17.3          57.8       9.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select all distinct string rows (value IS NOT NULL): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          20445 / 20469          0.8        1299.8       1.0X
-Parquet Vectorized (Pushdown)               20686 / 20699          0.8        1315.2       1.0X
-Native ORC Vectorized                       18851 / 18953          0.8        1198.5       1.1X
-Native ORC Vectorized (Pushdown)            19255 / 19268          0.8        1224.2       1.1X
+Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                18995          19038          52          0.8        1207.7       1.0X
+Parquet Vectorized (Pushdown)                     19265          19339          56          0.8        1224.9       1.0X
+Native ORC Vectorized                             19356          19526         130          0.8        1230.6       1.0X
+Native ORC Vectorized (Pushdown)                  19683          19762         105          0.8        1251.4       1.0X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-StringStartsWith filter: (value like '10%'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          14265 / 15213          1.1         907.0       1.0X
-Parquet Vectorized (Pushdown)                 4228 / 4870          3.7         268.8       3.4X
-Native ORC Vectorized                       10116 / 10977          1.6         643.2       1.4X
-Native ORC Vectorized (Pushdown)            10653 / 11376          1.5         677.3       1.3X
+StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9986          10034          42          1.6         634.9       1.0X
+Parquet Vectorized (Pushdown)                      1539           1553          20         10.2          97.8       6.5X
+Native ORC Vectorized                              7520           7560          30          2.1         478.1       1.3X
+Native ORC Vectorized (Pushdown)                   7717           7764          37          2.0         490.6       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-StringStartsWith filter: (value like '1000%'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          11499 / 11539          1.4         731.1       1.0X
-Parquet Vectorized (Pushdown)                  669 /  672         23.5          42.5      17.2X
-Native ORC Vectorized                         7343 / 7363          2.1         466.8       1.6X
-Native ORC Vectorized (Pushdown)              7559 / 7568          2.1         480.6       1.5X
+StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9711           9754          31          1.6         617.4       1.0X
+Parquet Vectorized (Pushdown)                       647            657          12         24.3          41.1      15.0X
+Native ORC Vectorized                              7257           7309          42          2.2         461.4       1.3X
+Native ORC Vectorized (Pushdown)                   7424           7497          56          2.1         472.0       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-StringStartsWith filter: (value like '786432%'): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          11463 / 11468          1.4         728.8       1.0X
-Parquet Vectorized (Pushdown)                  647 /  651         24.3          41.1      17.7X
-Native ORC Vectorized                         7322 / 7338          2.1         465.5       1.6X
-Native ORC Vectorized (Pushdown)              7533 / 7544          2.1         478.9       1.5X
+StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9747           9835          94          1.6         619.7       1.0X
+Parquet Vectorized (Pushdown)                       648            656           8         24.3          41.2      15.0X
+Native ORC Vectorized                              7233           7262          50          2.2         459.8       1.3X
+Native ORC Vectorized (Pushdown)                   7463           7539         115          2.1         474.5       1.3X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 decimal(9, 2) row (value = 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            5543 / 5564          2.8         352.4       1.0X
-Parquet Vectorized (Pushdown)                  168 /  174         93.7          10.7      33.0X
-Native ORC Vectorized                         4992 / 5052          3.2         317.4       1.1X
-Native ORC Vectorized (Pushdown)               840 /  850         18.7          53.4       6.6X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 10% decimal(9, 2) rows (value < 1572864): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            7312 / 7358          2.2         464.9       1.0X
-Parquet Vectorized (Pushdown)                 3008 / 3078          5.2         191.2       2.4X
-Native ORC Vectorized                         6775 / 6798          2.3         430.7       1.1X
-Native ORC Vectorized (Pushdown)              6819 / 6832          2.3         433.5       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 50% decimal(9, 2) rows (value < 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          13232 / 13241          1.2         841.3       1.0X
-Parquet Vectorized (Pushdown)               12555 / 12569          1.3         798.2       1.1X
-Native ORC Vectorized                       12597 / 12627          1.2         800.9       1.1X
-Native ORC Vectorized (Pushdown)            12677 / 12711          1.2         806.0       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 90% decimal(9, 2) rows (value < 14155776): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          14725 / 14729          1.1         936.2       1.0X
-Parquet Vectorized (Pushdown)               14781 / 14800          1.1         939.7       1.0X
-Native ORC Vectorized                       15360 / 15453          1.0         976.5       1.0X
-Native ORC Vectorized (Pushdown)            15444 / 15466          1.0         981.9       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 decimal(18, 2) row (value = 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            5746 / 5763          2.7         365.3       1.0X
-Parquet Vectorized (Pushdown)                  166 /  169         94.8          10.6      34.6X
-Native ORC Vectorized                         5007 / 5023          3.1         318.3       1.1X
-Native ORC Vectorized (Pushdown)              2629 / 2640          6.0         167.1       2.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 10% decimal(18, 2) rows (value < 1572864): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            6827 / 6864          2.3         434.0       1.0X
-Parquet Vectorized (Pushdown)                 1809 / 1827          8.7         115.0       3.8X
-Native ORC Vectorized                         6287 / 6296          2.5         399.7       1.1X
-Native ORC Vectorized (Pushdown)              6364 / 6377          2.5         404.6       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 50% decimal(18, 2) rows (value < 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          11315 / 11342          1.4         719.4       1.0X
-Parquet Vectorized (Pushdown)                 8431 / 8450          1.9         536.0       1.3X
-Native ORC Vectorized                       11591 / 11611          1.4         736.9       1.0X
-Native ORC Vectorized (Pushdown)            11424 / 11475          1.4         726.3       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 90% decimal(18, 2) rows (value < 14155776): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          15703 / 15712          1.0         998.4       1.0X
-Parquet Vectorized (Pushdown)               14982 / 15009          1.0         952.5       1.0X
-Native ORC Vectorized                       16887 / 16955          0.9        1073.7       0.9X
-Native ORC Vectorized (Pushdown)            16518 / 16530          1.0        1050.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 decimal(38, 2) row (value = 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            8101 / 8130          1.9         515.1       1.0X
-Parquet Vectorized (Pushdown)                  184 /  187         85.6          11.7      44.1X
-Native ORC Vectorized                         4998 / 5027          3.1         317.8       1.6X
-Native ORC Vectorized (Pushdown)               165 /  168         95.6          10.5      49.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 10% decimal(38, 2) rows (value < 1572864): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            9405 / 9447          1.7         597.9       1.0X
-Parquet Vectorized (Pushdown)                 2269 / 2275          6.9         144.2       4.1X
-Native ORC Vectorized                         6167 / 6203          2.6         392.1       1.5X
-Native ORC Vectorized (Pushdown)              1783 / 1787          8.8         113.3       5.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 50% decimal(38, 2) rows (value < 7864320): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          14700 / 14707          1.1         934.6       1.0X
-Parquet Vectorized (Pushdown)               10699 / 10712          1.5         680.2       1.4X
-Native ORC Vectorized                       10687 / 10703          1.5         679.5       1.4X
-Native ORC Vectorized (Pushdown)              8364 / 8415          1.9         531.8       1.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 90% decimal(38, 2) rows (value < 14155776): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          19780 / 19894          0.8        1257.6       1.0X
-Parquet Vectorized (Pushdown)               19003 / 19025          0.8        1208.1       1.0X
-Native ORC Vectorized                       15385 / 15404          1.0         978.2       1.3X
-Native ORC Vectorized (Pushdown)            15032 / 15060          1.0         955.7       1.3X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 4018           4049          32          3.9         255.4       1.0X
+Parquet Vectorized (Pushdown)                       163            178          19         96.6          10.4      24.7X
+Native ORC Vectorized                              4918           4969          39          3.2         312.7       0.8X
+Native ORC Vectorized (Pushdown)                    160            167          11         98.1          10.2      25.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 5800           5837          36          2.7         368.8       1.0X
+Parquet Vectorized (Pushdown)                      2764           2787          19          5.7         175.7       2.1X
+Native ORC Vectorized                              7034           7050          13          2.2         447.2       0.8X
+Native ORC Vectorized (Pushdown)                   3179           3191           9          4.9         202.1       1.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12173          12215          33          1.3         773.9       1.0X
+Parquet Vectorized (Pushdown)                     11669          11723          31          1.3         741.9       1.0X
+Native ORC Vectorized                             13874          13893          19          1.1         882.1       0.9X
+Native ORC Vectorized (Pushdown)                  13241          13285          33          1.2         841.8       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                13290          13316          23          1.2         845.0       1.0X
+Parquet Vectorized (Pushdown)                     13335          13367          23          1.2         847.8       1.0X
+Native ORC Vectorized                             15485          15504          18          1.0         984.5       0.9X
+Native ORC Vectorized (Pushdown)                  15576          15605          18          1.0         990.3       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 4154           4175          32          3.8         264.1       1.0X
+Parquet Vectorized (Pushdown)                       159            162           7         98.9          10.1      26.1X
+Native ORC Vectorized                              4902           4926          25          3.2         311.7       0.8X
+Native ORC Vectorized (Pushdown)                    153            162          23        102.6           9.7      27.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 5090           5114          46          3.1         323.6       1.0X
+Parquet Vectorized (Pushdown)                      1509           1533          45         10.4          95.9       3.4X
+Native ORC Vectorized                              5943           5952          11          2.6         377.9       0.9X
+Native ORC Vectorized (Pushdown)                   1649           1686          61          9.5         104.8       3.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8887           8938          79          1.8         565.0       1.0X
+Parquet Vectorized (Pushdown)                      6901           6917          13          2.3         438.8       1.3X
+Native ORC Vectorized                             10030          10047          15          1.6         637.7       0.9X
+Native ORC Vectorized (Pushdown)                   7672           7689          12          2.1         487.8       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12618          12633          19          1.2         802.2       1.0X
+Parquet Vectorized (Pushdown)                     12296          12315          13          1.3         781.7       1.0X
+Native ORC Vectorized                             14260          14318          43          1.1         906.6       0.9X
+Native ORC Vectorized (Pushdown)                  13661          13678          12          1.2         868.5       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 6470           6492          29          2.4         411.4       1.0X
+Parquet Vectorized (Pushdown)                       175            178           3         89.8          11.1      36.9X
+Native ORC Vectorized                              4928           4949          17          3.2         313.3       1.3X
+Native ORC Vectorized (Pushdown)                    153            166          23        103.0           9.7      42.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 7674           7709          60          2.0         487.9       1.0X
+Parquet Vectorized (Pushdown)                      1984           1994          10          7.9         126.1       3.9X
+Native ORC Vectorized                              6245           6259          15          2.5         397.1       1.2X
+Native ORC Vectorized (Pushdown)                   1926           1935          11          8.2         122.5       4.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12485          12497          10          1.3         793.7       1.0X
+Parquet Vectorized (Pushdown)                      9348           9376          26          1.7         594.3       1.3X
+Native ORC Vectorized                             11573          11583          12          1.4         735.8       1.1X
+Native ORC Vectorized (Pushdown)                   9211           9228          10          1.7         585.6       1.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                17198          17206           7          0.9        1093.4       1.0X
+Parquet Vectorized (Pushdown)                     16613          16629          12          0.9        1056.2       1.0X
+Native ORC Vectorized                             17081          17103          31          0.9        1086.0       1.0X
+Native ORC Vectorized (Pushdown)                  16659          16693          28          0.9        1059.2       1.0X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 5, distribution: 10): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10521 / 10534          1.5         668.9       1.0X
-Parquet Vectorized (Pushdown)                  677 /  691         23.2          43.1      15.5X
-Native ORC Vectorized                         6768 / 6776          2.3         430.3       1.6X
-Native ORC Vectorized (Pushdown)               501 /  512         31.4          31.8      21.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 5, distribution: 50): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10531 / 10538          1.5         669.5       1.0X
-Parquet Vectorized (Pushdown)                  677 /  718         23.2          43.0      15.6X
-Native ORC Vectorized                         6765 / 6773          2.3         430.1       1.6X
-Native ORC Vectorized (Pushdown)               499 /  507         31.5          31.7      21.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 5, distribution: 90): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10540 / 10553          1.5         670.1       1.0X
-Parquet Vectorized (Pushdown)                  678 /  710         23.2          43.1      15.5X
-Native ORC Vectorized                         6787 / 6794          2.3         431.5       1.6X
-Native ORC Vectorized (Pushdown)               501 /  509         31.4          31.9      21.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 10, distribution: 10): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10551 / 10559          1.5         670.8       1.0X
-Parquet Vectorized (Pushdown)                  703 /  708         22.4          44.7      15.0X
-Native ORC Vectorized                         6791 / 6802          2.3         431.7       1.6X
-Native ORC Vectorized (Pushdown)               519 /  526         30.3          33.0      20.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 10, distribution: 50): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10561 / 10565          1.5         671.4       1.0X
-Parquet Vectorized (Pushdown)                  711 /  716         22.1          45.2      14.9X
-Native ORC Vectorized                         6791 / 6806          2.3         431.8       1.6X
-Native ORC Vectorized (Pushdown)               529 /  537         29.8          33.6      20.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 10, distribution: 90): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10572 / 10590          1.5         672.1       1.0X
-Parquet Vectorized (Pushdown)                  713 /  716         22.1          45.3      14.8X
-Native ORC Vectorized                         6808 / 6815          2.3         432.9       1.6X
-Native ORC Vectorized (Pushdown)               530 /  541         29.7          33.7      19.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 50, distribution: 10): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10871 / 10882          1.4         691.2       1.0X
-Parquet Vectorized (Pushdown)               11104 / 11110          1.4         706.0       1.0X
-Native ORC Vectorized                         7088 / 7104          2.2         450.7       1.5X
-Native ORC Vectorized (Pushdown)               665 /  677         23.6          42.3      16.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 50, distribution: 50): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10861 / 10867          1.4         690.5       1.0X
-Parquet Vectorized (Pushdown)               11094 / 11099          1.4         705.3       1.0X
-Native ORC Vectorized                         7075 / 7092          2.2         449.8       1.5X
-Native ORC Vectorized (Pushdown)               718 /  733         21.9          45.6      15.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 50, distribution: 90): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10868 / 10887          1.4         691.0       1.0X
-Parquet Vectorized (Pushdown)               11100 / 11106          1.4         705.7       1.0X
-Native ORC Vectorized                         7087 / 7093          2.2         450.6       1.5X
-Native ORC Vectorized (Pushdown)               712 /  731         22.1          45.3      15.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 100, distribution: 10): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10850 / 10888          1.4         689.8       1.0X
-Parquet Vectorized (Pushdown)               11086 / 11105          1.4         704.9       1.0X
-Native ORC Vectorized                         7090 / 7101          2.2         450.8       1.5X
-Native ORC Vectorized (Pushdown)               867 /  882         18.1          55.1      12.5X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 100, distribution: 50): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10816 / 10819          1.5         687.7       1.0X
-Parquet Vectorized (Pushdown)               11052 / 11059          1.4         702.7       1.0X
-Native ORC Vectorized                         7037 / 7044          2.2         447.4       1.5X
-Native ORC Vectorized (Pushdown)               919 /  931         17.1          58.4      11.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-InSet -> InFilters (values count: 100, distribution: 90): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10807 / 10815          1.5         687.1       1.0X
-Parquet Vectorized (Pushdown)               11047 / 11054          1.4         702.4       1.0X
-Native ORC Vectorized                         7042 / 7047          2.2         447.7       1.5X
-Native ORC Vectorized (Pushdown)               950 /  961         16.6          60.4      11.4X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8986           9024          59          1.8         571.3       1.0X
+Parquet Vectorized (Pushdown)                       662            676          14         23.7          42.1      13.6X
+Native ORC Vectorized                              6568           6589          27          2.4         417.6       1.4X
+Native ORC Vectorized (Pushdown)                    462            475          18         34.0          29.4      19.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8973           8986          21          1.8         570.5       1.0X
+Parquet Vectorized (Pushdown)                       663            666           5         23.7          42.1      13.5X
+Native ORC Vectorized                              6569           6574           6          2.4         417.6       1.4X
+Native ORC Vectorized (Pushdown)                    462            507          57         34.0          29.4      19.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9003           9037          37          1.7         572.4       1.0X
+Parquet Vectorized (Pushdown)                       666            671           6         23.6          42.4      13.5X
+Native ORC Vectorized                              6571           6581           8          2.4         417.8       1.4X
+Native ORC Vectorized (Pushdown)                    463            473          18         34.0          29.4      19.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9015           9022           8          1.7         573.2       1.0X
+Parquet Vectorized (Pushdown)                       687            698          13         22.9          43.7      13.1X
+Native ORC Vectorized                              6592           6611          13          2.4         419.1       1.4X
+Native ORC Vectorized (Pushdown)                    485            497          16         32.5          30.8      18.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9001           9008          14          1.7         572.3       1.0X
+Parquet Vectorized (Pushdown)                       686            692           5         22.9          43.6      13.1X
+Native ORC Vectorized                              6599           6619          32          2.4         419.6       1.4X
+Native ORC Vectorized (Pushdown)                    473            483          17         33.2          30.1      19.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8996           9002           8          1.7         571.9       1.0X
+Parquet Vectorized (Pushdown)                       690            697           6         22.8          43.9      13.0X
+Native ORC Vectorized                              6579           6587           8          2.4         418.3       1.4X
+Native ORC Vectorized (Pushdown)                    481            490          15         32.7          30.6      18.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9292           9304           9          1.7         590.8       1.0X
+Parquet Vectorized (Pushdown)                      9558           9567           9          1.6         607.7       1.0X
+Native ORC Vectorized                              6898           6911          10          2.3         438.6       1.3X
+Native ORC Vectorized (Pushdown)                    625            641          15         25.2          39.8      14.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9281           9298          12          1.7         590.1       1.0X
+Parquet Vectorized (Pushdown)                      9546           9561          17          1.6         606.9       1.0X
+Native ORC Vectorized                              6877           6897          18          2.3         437.2       1.3X
+Native ORC Vectorized (Pushdown)                    661            668          15         23.8          42.0      14.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9322           9335          22          1.7         592.7       1.0X
+Parquet Vectorized (Pushdown)                      9551           9573          18          1.6         607.2       1.0X
+Native ORC Vectorized                              6902           6915          13          2.3         438.8       1.4X
+Native ORC Vectorized (Pushdown)                    659            680          25         23.9          41.9      14.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9278           9294          18          1.7         589.9       1.0X
+Parquet Vectorized (Pushdown)                      9520           9560          27          1.7         605.3       1.0X
+Native ORC Vectorized                              6855           6870          16          2.3         435.9       1.4X
+Native ORC Vectorized (Pushdown)                    795            808          16         19.8          50.5      11.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9306           9311           4          1.7         591.6       1.0X
+Parquet Vectorized (Pushdown)                      9529           9551          16          1.7         605.8       1.0X
+Native ORC Vectorized                              6875           6882           7          2.3         437.1       1.4X
+Native ORC Vectorized (Pushdown)                    853            865          15         18.4          54.2      10.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9256           9271           9          1.7         588.5       1.0X
+Parquet Vectorized (Pushdown)                      9500           9520          13          1.7         604.0       1.0X
+Native ORC Vectorized                              6843           6857           9          2.3         435.1       1.4X
+Native ORC Vectorized (Pushdown)                    858            870          14         18.3          54.6      10.8X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 tinyint row (value = CAST(63 AS tinyint)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            6034 / 6048          2.6         383.6       1.0X
-Parquet Vectorized (Pushdown)                  333 /  344         47.2          21.2      18.1X
-Native ORC Vectorized                         3240 / 3307          4.9         206.0       1.9X
-Native ORC Vectorized (Pushdown)               330 /  341         47.6          21.0      18.3X
+Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 4303           4312           8          3.7         273.6       1.0X
+Parquet Vectorized (Pushdown)                       208            213           6         75.5          13.2      20.7X
+Native ORC Vectorized                              2950           2958           9          5.3         187.6       1.5X
+Native ORC Vectorized (Pushdown)                    207            212          10         76.1          13.1      20.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 10% tinyint rows (value < CAST(12 AS tinyint)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            6759 / 6800          2.3         429.7       1.0X
-Parquet Vectorized (Pushdown)                 1533 / 1537         10.3          97.5       4.4X
-Native ORC Vectorized                         3863 / 3874          4.1         245.6       1.7X
-Native ORC Vectorized (Pushdown)              1235 / 1248         12.7          78.5       5.5X
+Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 5125           5162          57          3.1         325.9       1.0X
+Parquet Vectorized (Pushdown)                      1402           1419          16         11.2          89.2       3.7X
+Native ORC Vectorized                              3840           3849           8          4.1         244.1       1.3X
+Native ORC Vectorized (Pushdown)                   1302           1306           3         12.1          82.8       3.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 50% tinyint rows (value < CAST(63 AS tinyint)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10247 / 10289          1.5         651.5       1.0X
-Parquet Vectorized (Pushdown)                 7430 / 7453          2.1         472.4       1.4X
-Native ORC Vectorized                         6995 / 7009          2.2         444.7       1.5X
-Native ORC Vectorized (Pushdown)              5561 / 5571          2.8         353.6       1.8X
+Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8562           8577          12          1.8         544.4       1.0X
+Parquet Vectorized (Pushdown)                      6539           6564          22          2.4         415.8       1.3X
+Native ORC Vectorized                              7300           7320          13          2.2         464.1       1.2X
+Native ORC Vectorized (Pushdown)                   5944           5954          15          2.6         377.9       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 90% tinyint rows (value < CAST(114 AS tinyint)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          13949 / 13991          1.1         886.9       1.0X
-Parquet Vectorized (Pushdown)               13486 / 13511          1.2         857.4       1.0X
-Native ORC Vectorized                       10149 / 10186          1.5         645.3       1.4X
-Native ORC Vectorized (Pushdown)              9889 / 9905          1.6         628.7       1.4X
+Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11989          12005          13          1.3         762.2       1.0X
+Parquet Vectorized (Pushdown)                     11637          11686          54          1.4         739.8       1.0X
+Native ORC Vectorized                             10795          10816          18          1.5         686.3       1.1X
+Native ORC Vectorized (Pushdown)                  10592          10599           5          1.5         673.4       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 timestamp stored as INT96 row (value = CAST(7864320 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            6307 / 6310          2.5         401.0       1.0X
-Parquet Vectorized (Pushdown)                 6360 / 6397          2.5         404.3       1.0X
-Native ORC Vectorized                         2912 / 2917          5.4         185.1       2.2X
-Native ORC Vectorized (Pushdown)               138 /  141        114.4           8.7      45.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 10% timestamp stored as INT96 rows (value < CAST(1572864 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            7225 / 7233          2.2         459.4       1.0X
-Parquet Vectorized (Pushdown)                 7250 / 7255          2.2         461.0       1.0X
-Native ORC Vectorized                         3772 / 3783          4.2         239.8       1.9X
-Native ORC Vectorized (Pushdown)              1277 / 1282         12.3          81.2       5.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 50% timestamp stored as INT96 rows (value < CAST(7864320 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10952 / 10965          1.4         696.3       1.0X
-Parquet Vectorized (Pushdown)               10985 / 10998          1.4         698.4       1.0X
-Native ORC Vectorized                         7178 / 7227          2.2         456.3       1.5X
-Native ORC Vectorized (Pushdown)              5825 / 5830          2.7         370.3       1.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 90% timestamp stored as INT96 rows (value < CAST(14155776 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          14560 / 14583          1.1         925.7       1.0X
-Parquet Vectorized (Pushdown)               14608 / 14620          1.1         928.7       1.0X
-Native ORC Vectorized                       10601 / 10640          1.5         674.0       1.4X
-Native ORC Vectorized (Pushdown)            10392 / 10406          1.5         660.7       1.4X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 timestamp stored as TIMESTAMP_MICROS row (value = CAST(7864320 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            5653 / 5658          2.8         359.4       1.0X
-Parquet Vectorized (Pushdown)                  165 /  169         95.1          10.5      34.2X
-Native ORC Vectorized                         2918 / 2921          5.4         185.5       1.9X
-Native ORC Vectorized (Pushdown)               137 /  145        114.9           8.7      41.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(1572864 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            6540 / 6552          2.4         415.8       1.0X
-Parquet Vectorized (Pushdown)                 1610 / 1614          9.8         102.3       4.1X
-Native ORC Vectorized                         3775 / 3788          4.2         240.0       1.7X
-Native ORC Vectorized (Pushdown)              1274 / 1277         12.3          81.0       5.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(7864320 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10259 / 10278          1.5         652.3       1.0X
-Parquet Vectorized (Pushdown)                 7591 / 7601          2.1         482.6       1.4X
-Native ORC Vectorized                         7185 / 7194          2.2         456.8       1.4X
-Native ORC Vectorized (Pushdown)              5828 / 5843          2.7         370.6       1.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(14155776 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          13850 / 13868          1.1         880.5       1.0X
-Parquet Vectorized (Pushdown)               13433 / 13450          1.2         854.0       1.0X
-Native ORC Vectorized                       10635 / 10669          1.5         676.1       1.3X
-Native ORC Vectorized (Pushdown)            10437 / 10448          1.5         663.6       1.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = CAST(7864320 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            5884 / 5888          2.7         374.1       1.0X
-Parquet Vectorized (Pushdown)                  166 /  170         94.7          10.6      35.4X
-Native ORC Vectorized                         2913 / 2916          5.4         185.2       2.0X
-Native ORC Vectorized (Pushdown)               136 /  144        115.4           8.7      43.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(1572864 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            6763 / 6776          2.3         430.0       1.0X
-Parquet Vectorized (Pushdown)                 1634 / 1638          9.6         103.9       4.1X
-Native ORC Vectorized                         3777 / 3785          4.2         240.1       1.8X
-Native ORC Vectorized (Pushdown)              1276 / 1279         12.3          81.2       5.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(7864320 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          10460 / 10469          1.5         665.0       1.0X
-Parquet Vectorized (Pushdown)                 7689 / 7698          2.0         488.9       1.4X
-Native ORC Vectorized                         7190 / 7197          2.2         457.1       1.5X
-Native ORC Vectorized (Pushdown)              5820 / 5834          2.7         370.0       1.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(14155776 AS timestamp)): Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                          14033 / 14039          1.1         892.2       1.0X
-Parquet Vectorized (Pushdown)               13608 / 13636          1.2         865.2       1.0X
-Native ORC Vectorized                       10635 / 10686          1.5         676.2       1.3X
-Native ORC Vectorized (Pushdown)            10420 / 10442          1.5         662.5       1.3X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 timestamp stored as INT96 row (value = CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 4700           4716          19          3.3         298.8       1.0X
+Parquet Vectorized (Pushdown)                      4745           4758          11          3.3         301.7       1.0X
+Native ORC Vectorized                              2848           2865          22          5.5         181.1       1.7X
+Native ORC Vectorized (Pushdown)                    129            135          12        122.2           8.2      36.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% timestamp stored as INT96 rows (value < CAST(1572864 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 5575           5590           9          2.8         354.5       1.0X
+Parquet Vectorized (Pushdown)                      5610           5623          10          2.8         356.7       1.0X
+Native ORC Vectorized                              3706           3723          11          4.2         235.6       1.5X
+Native ORC Vectorized (Pushdown)                   1286           1287           2         12.2          81.7       4.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% timestamp stored as INT96 rows (value < CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 9872           9887          13          1.6         627.7       1.0X
+Parquet Vectorized (Pushdown)                      9932           9942           8          1.6         631.4       1.0X
+Native ORC Vectorized                              7238           7273          25          2.2         460.2       1.4X
+Native ORC Vectorized (Pushdown)                   5933           5937           6          2.7         377.2       1.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% timestamp stored as INT96 rows (value < CAST(14155776 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12660          12677          12          1.2         804.9       1.0X
+Parquet Vectorized (Pushdown)                     12652          12704          31          1.2         804.4       1.0X
+Native ORC Vectorized                             10842          10870          19          1.5         689.3       1.2X
+Native ORC Vectorized (Pushdown)                  10630          10639           6          1.5         675.9       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 timestamp stored as TIMESTAMP_MICROS row (value = CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 4057           4065           7          3.9         257.9       1.0X
+Parquet Vectorized (Pushdown)                       155            159           7        101.2           9.9      26.1X
+Native ORC Vectorized                              2828           2840           8          5.6         179.8       1.4X
+Native ORC Vectorized (Pushdown)                    126            130           9        124.8           8.0      32.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(1572864 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 4926           4938           9          3.2         313.2       1.0X
+Parquet Vectorized (Pushdown)                      1415           1422           6         11.1          90.0       3.5X
+Native ORC Vectorized                              3705           3714           7          4.2         235.6       1.3X
+Native ORC Vectorized (Pushdown)                   1279           1285           9         12.3          81.3       3.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8409           8413           4          1.9         534.6       1.0X
+Parquet Vectorized (Pushdown)                      6489           6497           6          2.4         412.5       1.3X
+Native ORC Vectorized                              7248           7255          10          2.2         460.8       1.2X
+Native ORC Vectorized (Pushdown)                   5922           5932           7          2.7         376.5       1.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < CAST(14155776 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                11821          11830           8          1.3         751.5       1.0X
+Parquet Vectorized (Pushdown)                     11478          11490          11          1.4         729.8       1.0X
+Native ORC Vectorized                             10851          10856           4          1.4         689.9       1.1X
+Native ORC Vectorized (Pushdown)                  10620          10628          10          1.5         675.2       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 4307           4341          75          3.7         273.8       1.0X
+Parquet Vectorized (Pushdown)                       156            161           6        101.1           9.9      27.7X
+Native ORC Vectorized                              2825           2836           8          5.6         179.6       1.5X
+Native ORC Vectorized (Pushdown)                    125            128           9        125.4           8.0      34.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(1572864 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 5173           5178           3          3.0         328.9       1.0X
+Parquet Vectorized (Pushdown)                      1444           1451           9         10.9          91.8       3.6X
+Native ORC Vectorized                              3707           3715           5          4.2         235.7       1.4X
+Native ORC Vectorized (Pushdown)                   1275           1276           2         12.3          81.0       4.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(7864320 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 8645           8655           9          1.8         549.6       1.0X
+Parquet Vectorized (Pushdown)                      6588           6603          11          2.4         418.9       1.3X
+Native ORC Vectorized                              7233           7246          13          2.2         459.9       1.2X
+Native ORC Vectorized (Pushdown)                   5904           5912           8          2.7         375.3       1.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < CAST(14155776 AS timestamp)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                12057          12069          12          1.3         766.5       1.0X
+Parquet Vectorized (Pushdown)                     11694          11699           5          1.3         743.5       1.0X
+Native ORC Vectorized                             10817          10843          28          1.5         687.7       1.1X
+Native ORC Vectorized (Pushdown)                  10615          10634          11          1.5         674.9       1.1X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 row with 1 filters:             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                             319 /  323          0.0   318789986.0       1.0X
-Parquet Vectorized (Pushdown)                  323 /  347          0.0   322755287.0       1.0X
-Native ORC Vectorized                          316 /  336          0.0   315670745.0       1.0X
-Native ORC Vectorized (Pushdown)               317 /  320          0.0   317392594.0       1.0X
+Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                  463            468           6          0.0   462711905.0       1.0X
+Parquet Vectorized (Pushdown)                       468            470           3          0.0   468018831.0       1.0X
+Native ORC Vectorized                               447            450           5          0.0   446698170.0       1.0X
+Native ORC Vectorized (Pushdown)                    449            458          11          0.0   448997785.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 row with 250 filters:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            2192 / 2218          0.0  2191883823.0       1.0X
-Parquet Vectorized (Pushdown)                 2675 / 2687          0.0  2675439029.0       0.8X
-Native ORC Vectorized                         2158 / 2162          0.0  2157646071.0       1.0X
-Native ORC Vectorized (Pushdown)              2309 / 2326          0.0  2309096612.0       0.9X
+Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 1538           1549           9          0.0  1538463215.0       1.0X
+Parquet Vectorized (Pushdown)                      1659           1668           6          0.0  1659315980.0       0.9X
+Native ORC Vectorized                              1513           1517           5          0.0  1512577059.0       1.0X
+Native ORC Vectorized (Pushdown)                   1517           1538          12          0.0  1516938695.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Select 1 row with 500 filters:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Parquet Vectorized                            6219 / 6248          0.0  6218727737.0       1.0X
-Parquet Vectorized (Pushdown)                 7376 / 7436          0.0  7375977710.0       0.8X
-Native ORC Vectorized                         6252 / 6279          0.0  6252473320.0       1.0X
-Native ORC Vectorized (Pushdown)              6858 / 6876          0.0  6857854486.0       0.9X
+Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 4497           4525          29          0.0  4497410600.0       1.0X
+Parquet Vectorized (Pushdown)                      4945           4955           8          0.0  4945493883.0       0.9X
+Native ORC Vectorized                              4466           4485          23          0.0  4466103057.0       1.0X
+Native ORC Vectorized (Pushdown)                   4477           4496          18          0.0  4476752574.0       1.0X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..e01561364b989
--- /dev/null
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+LongToUnsafeRowMap metrics
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+LongToUnsafeRowMap                                  568            575           5          0.9        1136.4       1.0X
+
+
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
index 338244ad542f4..7483f517a62e8 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_181-b13 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
-LongToUnsafeRowMap metrics:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                             234 /  315          2.1         467.3       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+LongToUnsafeRowMap                                  513            629         106          1.0        1026.6       1.0X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-jdk11-results.txt b/sql/core/benchmarks/InExpressionBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..04474566ef43d
--- /dev/null
+++ b/sql/core/benchmarks/InExpressionBenchmark-jdk11-results.txt
@@ -0,0 +1,740 @@
+================================================================================================
+In Expression Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       163            213          53         61.4          16.3       1.0X
+InSet expression                                    105            130          16         95.5          10.5       1.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       112            126           9         89.4          11.2       1.0X
+InSet expression                                     78             92          11        128.4           7.8       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       170            181           8         58.9          17.0       1.0X
+InSet expression                                    109            118           9         91.8          10.9       1.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       279            283           3         35.8          27.9       1.0X
+InSet expression                                    140            148           8         71.6          14.0       2.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       461            486          28         21.7          46.1       1.0X
+InSet expression                                    184            187           3         54.5          18.4       2.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       735            754          16         13.6          73.5       1.0X
+InSet expression                                    317            323           5         31.6          31.7       2.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        63             68           5        157.8           6.3       1.0X
+InSet expression                                     56             61           4        177.7           5.6       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        84             87           2        118.8           8.4       1.0X
+InSet expression                                     58             62           4        171.6           5.8       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       139            146          10         72.0          13.9       1.0X
+InSet expression                                     58             63           7        173.5           5.8       2.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       231            236           7         43.3          23.1       1.0X
+InSet expression                                     59             64           8        170.5           5.9       3.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       411            414           3         24.3          41.1       1.0X
+InSet expression                                     64             72           8        155.4           6.4       6.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       775            779           6         12.9          77.5       1.0X
+InSet expression                                     70             74           4        142.7           7.0      11.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1129           1133           3          8.9         112.9       1.0X
+InSet expression                                     86             91           6        116.9           8.6      13.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1574           1577           3          6.4         157.4       1.0X
+InSet expression                                     89             92           4        112.9           8.9      17.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1865           1914         100          5.4         186.5       1.0X
+InSet expression                                    275            279           4         36.3          27.5       6.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        63             66           4        158.5           6.3       1.0X
+InSet expression                                     62             68          14        160.7           6.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        81             87          10        124.0           8.1       1.0X
+InSet expression                                     70             82          15        142.4           7.0       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       142            145           5         70.4          14.2       1.0X
+InSet expression                                     78             80           3        128.4           7.8       1.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       229            235          14         43.8          22.9       1.0X
+InSet expression                                     91             94           4        109.5           9.1       2.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       409            411           2         24.4          40.9       1.0X
+InSet expression                                    108            112           5         92.9          10.8       3.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       772            779           8         12.9          77.2       1.0X
+InSet expression                                    126            131           4         79.6          12.6       6.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1138           1144           6          8.8         113.8       1.0X
+InSet expression                                    136            140           6         73.5          13.6       8.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1500           1504           3          6.7         150.0       1.0X
+InSet expression                                    148            154           6         67.7          14.8      10.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1932           1969          72          5.2         193.2       1.0X
+InSet expression                                    275            278           3         36.3          27.5       7.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        57             59           2        174.1           5.7       1.0X
+InSet expression                                     53             57           5        187.1           5.3       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        76             79           7        131.9           7.6       1.0X
+InSet expression                                     54             57           8        186.1           5.4       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       128            132           3         77.9          12.8       1.0X
+InSet expression                                     55             58           4        183.2           5.5       2.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       227            237          11         44.1          22.7       1.0X
+InSet expression                                     56             58           2        178.0           5.6       4.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       407            413           9         24.5          40.7       1.0X
+InSet expression                                     59             64           7        169.0           5.9       6.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       772            775           4         13.0          77.2       1.0X
+InSet expression                                     67             70           5        148.7           6.7      11.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1128           1132           3          8.9         112.8       1.0X
+InSet expression                                     75             77           2        133.6           7.5      15.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1502           1507           6          6.7         150.2       1.0X
+InSet expression                                     82             84           2        121.6           8.2      18.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1860           1897          75          5.4         186.0       1.0X
+InSet expression                                    246            249           2         40.7          24.6       7.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        46             48           3        219.3           4.6       1.0X
+InSet expression                                     40             41           2        251.6           4.0       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        65             67           2        154.5           6.5       1.0X
+InSet expression                                     45             47           2        221.2           4.5       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       121            124           3         82.5          12.1       1.0X
+InSet expression                                     46             49           4        217.2           4.6       2.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       214            216           2         46.7          21.4       1.0X
+InSet expression                                     57             59           3        175.3           5.7       3.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       400            406           6         25.0          40.0       1.0X
+InSet expression                                     58             61           3        173.9           5.8       7.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       762            765           2         13.1          76.2       1.0X
+InSet expression                                     69             71           3        145.5           6.9      11.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1116           1122           4          9.0         111.6       1.0X
+InSet expression                                     84             87           2        118.5           8.4      13.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1491           1498           5          6.7         149.1       1.0X
+InSet expression                                     88             91           2        113.4           8.8      16.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1858           1909         109          5.4         185.8       1.0X
+InSet expression                                    247            249           2         40.5          24.7       7.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        53             54           2        190.2           5.3       1.0X
+InSet expression                                    156            162          13         64.3          15.6       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        67             69           3        149.9           6.7       1.0X
+InSet expression                                    171            174           2         58.5          17.1       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       120            122           4         83.0          12.0       1.0X
+InSet expression                                    178            181           1         56.2          17.8       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       212            213           2         47.2          21.2       1.0X
+InSet expression                                    222            224           3         45.0          22.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       393            398           4         25.5          39.3       1.0X
+InSet expression                                    193            197           2         51.8          19.3       2.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       756            763           7         13.2          75.6       1.0X
+InSet expression                                    186            188           2         53.8          18.6       4.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        92             94           4        109.1           9.2       1.0X
+InSet expression                                    205            208           2         48.8          20.5       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       140            141           2         71.6          14.0       1.0X
+InSet expression                                    232            235           2         43.0          23.2       0.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       247            250           5         40.5          24.7       1.0X
+InSet expression                                    240            242           3         41.6          24.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       426            429           5         23.5          42.6       1.0X
+InSet expression                                    290            293           4         34.5          29.0       1.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       785            787           2         12.7          78.5       1.0X
+InSet expression                                    250            252           2         40.0          25.0       3.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      3101           3190         192          3.2         310.1       1.0X
+InSet expression                                    251            252           1         39.8          25.1      12.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        89             92           4        112.1           8.9       1.0X
+InSet expression                                    171            175           2         58.4          17.1       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       135            136           2         73.9          13.5       1.0X
+InSet expression                                    191            194           1         52.4          19.1       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       243            245           2         41.2          24.3       1.0X
+InSet expression                                    199            201           1         50.4          19.9       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       420            423           2         23.8          42.0       1.0X
+InSet expression                                    242            244           2         41.3          24.2       1.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       774            776           2         12.9          77.4       1.0X
+InSet expression                                    210            216           5         47.7          21.0       3.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      3627           3744         248          2.8         362.7       1.0X
+InSet expression                                    202            206           2         49.4          20.2      17.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        52             55           4         19.2          52.1       1.0X
+InSet expression                                    158            172          24          6.3         158.0       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        63             64           2         16.0          62.6       1.0X
+InSet expression                                    160            164           4          6.2         160.1       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        97             99           2         10.3          97.2       1.0X
+InSet expression                                    163            166           2          6.1         163.2       0.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       184            186           2          5.4         184.4       1.0X
+InSet expression                                    173            176           2          5.8         173.0       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       472            478           9          2.1         472.4       1.0X
+InSet expression                                    178            181           4          5.6         177.6       2.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1054           1184         282          0.9        1053.8       1.0X
+InSet expression                                    188            191           1          5.3         187.9       5.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       275            282           8          3.6         274.7       1.0X
+InSet expression                                    195            199           8          5.1         194.7       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       458            470           8          2.2         458.4       1.0X
+InSet expression                                    195            200           5          5.1         195.5       2.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1093           1099           6          0.9        1093.3       1.0X
+InSet expression                                    202            204           1          4.9         202.2       5.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      2091           2101          12          0.5        2090.6       1.0X
+InSet expression                                    207            210           2          4.8         206.7      10.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      4164           4187          16          0.2        4164.1       1.0X
+InSet expression                                    239            244           4          4.2         239.1      17.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      8331           8353          21          0.1        8330.7       1.0X
+InSet expression                                    251            252           1          4.0         250.7      33.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        66             67           2         15.2          65.8       1.0X
+InSet expression                                     81             84           6         12.3          81.0       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        70             72           2         14.3          70.1       1.0X
+InSet expression                                     84             86           2         12.0          83.6       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        89             92           3         11.2          89.3       1.0X
+InSet expression                                     92             94           2         10.9          92.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       123            125           2          8.1         123.0       1.0X
+InSet expression                                     95             97           2         10.6          94.7       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       221            223           2          4.5         220.7       1.0X
+InSet expression                                     92             94           3         10.9          91.7       2.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       753            903         327          1.3         752.9       1.0X
+InSet expression                                     97            100           3         10.3          97.2       7.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        48             49           2        209.3           4.8       1.0X
+InSet expression                                    178            181           2         56.1          17.8       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        65             67           4        154.9           6.5       1.0X
+InSet expression                                    196            198           1         50.9          19.6       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       118            121           6         84.5          11.8       1.0X
+InSet expression                                    259            261           1         38.6          25.9       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       199            201           1         50.2          19.9       1.0X
+InSet expression                                    280            282           3         35.7          28.0       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       368            370           2         27.1          36.8       1.0X
+InSet expression                                    252            254           2         39.7          25.2       1.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       710            715           3         14.1          71.0       1.0X
+InSet expression                                    251            255           6         39.8          25.1       2.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       699            701           2         14.3          69.9       1.0X
+InSet expression                                    695            695           1         14.4          69.5       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       700            703           2         14.3          70.0       1.0X
+InSet expression                                    700            701           1         14.3          70.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       742            746           3         13.5          74.2       1.0X
+InSet expression                                    704            706           1         14.2          70.4       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       839            840           2         11.9          83.9       1.0X
+InSet expression                                    710            713           2         14.1          71.0       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1027           1030           2          9.7         102.7       1.0X
+InSet expression                                    712            714           1         14.0          71.2       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1390           1411          43          7.2         139.0       1.0X
+InSet expression                                    727            730           3         13.8          72.7       1.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      1767           1774           8          5.7         176.7       1.0X
+InSet expression                                    739            741           3         13.5          73.9       2.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      2144           2155          12          4.7         214.4       1.0X
+InSet expression                                    758            766           7         13.2          75.8       2.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      2528           2552          22          4.0         252.8       1.0X
+InSet expression                                    865            868           2         11.6          86.5       2.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        58             61           3         17.2          58.3       1.0X
+InSet expression                                    113            115           2          8.9         112.7       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        86             89           3         11.7          85.8       1.0X
+InSet expression                                    113            116           2          8.8         113.3       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       278            280           2          3.6         278.4       1.0X
+InSet expression                                    141            145           5          7.1         140.6       2.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       577            704          75          1.7         576.9       1.0X
+InSet expression                                    193            196           3          5.2         193.5       3.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      2462           2643         375          0.4        2461.6       1.0X
+InSet expression                                    234            238           6          4.3         234.1      10.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      8526           9225         487          0.1        8526.5       1.0X
+InSet expression                                    306            312           5          3.3         306.0      27.9X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        51             55           7         19.5          51.3       1.0X
+InSet expression                                    169            172           2          5.9         168.8       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                        62             66           6         16.2          61.6       1.0X
+InSet expression                                    170            175           7          5.9         169.9       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       141            145           3          7.1         141.4       1.0X
+InSet expression                                    213            220           8          4.7         212.7       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       335            365          67          3.0         334.9       1.0X
+InSet expression                                    303            309           7          3.3         303.4       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                       976           1114         286          1.0         976.4       1.0X
+InSet expression                                    358            365          10          2.8         357.5       2.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+In expression                                      4162           4703         859          0.2        4162.5       1.0X
+InSet expression                                    473            476           3          2.1         473.2       8.8X
+
+
diff --git a/sql/core/benchmarks/InExpressionBenchmark-results.txt b/sql/core/benchmarks/InExpressionBenchmark-results.txt
index f6685bfc45089..36d38ea9b162b 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       105            148          42         94.8          10.5       1.0X
-InSet expression                                     79             98          19        126.9           7.9       1.3X
+In expression                                       132            172          32         75.7          13.2       1.0X
+InSet expression                                     79             98          13        125.8           7.9       1.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       101            115          20         99.3          10.1       1.0X
-InSet expression                                     76             84           8        131.4           7.6       1.3X
+In expression                                       100            111           7         99.7          10.0       1.0X
+InSet expression                                     70             78           9        143.0           7.0       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       172            176           3         58.0          17.2       1.0X
-InSet expression                                    100            107           9         99.6          10.0       1.7X
+In expression                                       161            170           7         62.0          16.1       1.0X
+InSet expression                                     88             93           7        113.9           8.8       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       299            302           4         33.5          29.9       1.0X
-InSet expression                                    145            149           5         69.0          14.5       2.1X
+In expression                                       270            277           6         37.0          27.0       1.0X
+InSet expression                                    116            123           9         86.0          11.6       2.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       518            524          11         19.3          51.8       1.0X
-InSet expression                                    240            250          12         41.6          24.0       2.2X
+In expression                                       450            463          13         22.2          45.0       1.0X
+InSet expression                                    182            189           7         54.9          18.2       2.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       831            844          14         12.0          83.1       1.0X
-InSet expression                                    425            432           4         23.5          42.5       2.0X
+In expression                                       706            714           7         14.2          70.6       1.0X
+InSet expression                                    302            311           7         33.1          30.2       2.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        58             62           5        171.9           5.8       1.0X
-InSet expression                                     56             58           5        178.0           5.6       1.0X
+In expression                                        62             64           5        162.2           6.2       1.0X
+InSet expression                                     57             59           5        176.7           5.7       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        76             79           5        131.9           7.6       1.0X
-InSet expression                                     50             55           7        198.2           5.0       1.5X
+In expression                                        79             82           5        126.7           7.9       1.0X
+InSet expression                                     53             54           3        188.7           5.3       1.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       129            139          23         77.3          12.9       1.0X
-InSet expression                                     48             50           5        210.5           4.8       2.7X
+In expression                                       132            135           5         75.8          13.2       1.0X
+InSet expression                                     50             53           6        200.7           5.0       2.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       225            226           0         44.4          22.5       1.0X
-InSet expression                                     52             56           7        191.2           5.2       4.3X
+In expression                                       228            231           4         43.8          22.8       1.0X
+InSet expression                                     51             53           3        195.5           5.1       4.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       400            406          11         25.0          40.0       1.0X
-InSet expression                                     54             58           7        185.0           5.4       7.4X
+In expression                                       404            404           0         24.8          40.4       1.0X
+InSet expression                                     58             61           4        171.0           5.8       6.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       761            762           1         13.1          76.1       1.0X
-InSet expression                                     60             61           2        167.1           6.0      12.7X
+In expression                                       766            767           1         13.1          76.6       1.0X
+InSet expression                                     66             68           3        151.0           6.6      11.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1118           1119           1          8.9         111.8       1.0X
-InSet expression                                     66             67           2        152.2           6.6      17.0X
+In expression                                      1124           1124           0          8.9         112.4       1.0X
+InSet expression                                     74             77           4        135.3           7.4      15.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1478           1487          19          6.8         147.8       1.0X
-InSet expression                                     71             75          11        141.7           7.1      20.9X
+In expression                                      1566           1567           2          6.4         156.6       1.0X
+InSet expression                                     82             84           4        121.9           8.2      19.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1836           1854          27          5.4         183.6       1.0X
-InSet expression                                    248            253           3         40.2          24.8       7.4X
+In expression                                      1841           1871          37          5.4         184.1       1.0X
+InSet expression                                    314            318           5         31.8          31.4       5.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        55             68          19        180.3           5.5       1.0X
-InSet expression                                     60             63           7        167.0           6.0       0.9X
+In expression                                        58             62           5        171.0           5.8       1.0X
+InSet expression                                     53             55           4        187.5           5.3       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        72             76           5        138.0           7.2       1.0X
-InSet expression                                     63             68          11        157.7           6.3       1.1X
+In expression                                        76             77           2        131.3           7.6       1.0X
+InSet expression                                     65             66           3        154.3           6.5       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       133            136           5         75.0          13.3       1.0X
-InSet expression                                     73             78          10        137.2           7.3       1.8X
+In expression                                       138            140           5         72.7          13.8       1.0X
+InSet expression                                     74             78           8        135.2           7.4       1.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       223            225           2         44.8          22.3       1.0X
-InSet expression                                     81             84          14        124.1           8.1       2.8X
+In expression                                       226            227           1         44.2          22.6       1.0X
+InSet expression                                     83             86           7        120.8           8.3       2.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       402            404           1         24.9          40.2       1.0X
-InSet expression                                     90             91           2        111.6           9.0       4.5X
+In expression                                       402            403           0         24.9          40.2       1.0X
+InSet expression                                     93             94           3        108.0           9.3       4.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       758            759           0         13.2          75.8       1.0X
-InSet expression                                    110            119          20         91.0          11.0       6.9X
+In expression                                       761            762           0         13.1          76.1       1.0X
+InSet expression                                    113            116           7         88.4          11.3       6.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1121           1123           3          8.9         112.1       1.0X
-InSet expression                                    121            122           2         82.6          12.1       9.3X
+In expression                                      1125           1125           0          8.9         112.5       1.0X
+InSet expression                                    136            142          11         73.5          13.6       8.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1482           1484           2          6.7         148.2       1.0X
-InSet expression                                    134            135           2         74.6          13.4      11.1X
+In expression                                      1486           1487           1          6.7         148.6       1.0X
+InSet expression                                    141            142           2         70.8          14.1      10.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1838           1882          92          5.4         183.8       1.0X
-InSet expression                                    251            254           3         39.8          25.1       7.3X
+In expression                                      1842           1873          67          5.4         184.2       1.0X
+InSet expression                                    315            318           3         31.7          31.5       5.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        51             52           2        197.1           5.1       1.0X
-InSet expression                                     61             63           3        162.8           6.1       0.8X
+In expression                                        54             55           4        186.6           5.4       1.0X
+InSet expression                                     49             51           3        203.0           4.9       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        69             73          10        145.0           6.9       1.0X
-InSet expression                                     43             46           7        231.2           4.3       1.6X
+In expression                                        72             74           5        139.7           7.2       1.0X
+InSet expression                                     46             48           5        218.2           4.6       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       123            129          19         81.4          12.3       1.0X
-InSet expression                                     43             46           8        230.0           4.3       2.8X
+In expression                                       125            127           5         79.9          12.5       1.0X
+InSet expression                                     47             48           4        212.5           4.7       2.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       222            223           1         45.1          22.2       1.0X
-InSet expression                                     49             50           2        206.2           4.9       4.6X
+In expression                                       221            224           4         45.2          22.1       1.0X
+InSet expression                                     48             49           3        206.3           4.8       4.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       401            402           0         24.9          40.1       1.0X
-InSet expression                                     51             56          11        196.6           5.1       7.9X
+In expression                                       401            404           6         25.0          40.1       1.0X
+InSet expression                                     55             56           2        180.5           5.5       7.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       755            756           1         13.2          75.5       1.0X
-InSet expression                                     56             57           2        179.5           5.6      13.5X
+In expression                                       762            763           1         13.1          76.2       1.0X
+InSet expression                                     63             69          14        159.8           6.3      12.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1115           1116           1          9.0         111.5       1.0X
-InSet expression                                     61             62           4        165.2           6.1      18.4X
+In expression                                      1117           1117           0          9.0         111.7       1.0X
+InSet expression                                     70             71           2        143.3           7.0      16.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1476           1478           1          6.8         147.6       1.0X
-InSet expression                                     66             67           2        152.2           6.6      22.5X
+In expression                                      1557           1558           1          6.4         155.7       1.0X
+InSet expression                                     77             78           2        129.6           7.7      20.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1834           1873          85          5.5         183.4       1.0X
-InSet expression                                    230            233           3         43.5          23.0       8.0X
+In expression                                      1841           1877          79          5.4         184.1       1.0X
+InSet expression                                    320            322           2         31.2          32.0       5.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             42           2        247.6           4.0       1.0X
-InSet expression                                     37             39           3        271.6           3.7       1.1X
+In expression                                        43             44           3        231.6           4.3       1.0X
+InSet expression                                     40             42           4        252.4           4.0       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        59             60           3        170.0           5.9       1.0X
-InSet expression                                     42             44           3        237.6           4.2       1.4X
+In expression                                        62             64           4        162.0           6.2       1.0X
+InSet expression                                     45             47           4        222.2           4.5       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       114            116           6         87.5          11.4       1.0X
-InSet expression                                     53             58          10        188.0           5.3       2.1X
+In expression                                       117            119           4         85.4          11.7       1.0X
+InSet expression                                     57             59           5        176.9           5.7       2.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       207            214          14         48.3          20.7       1.0X
-InSet expression                                     62             63           2        162.1           6.2       3.4X
+In expression                                       207            207           0         48.3          20.7       1.0X
+InSet expression                                     65             66           3        153.3           6.5       3.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       385            391           6         26.0          38.5       1.0X
-InSet expression                                     71             73           2        140.4           7.1       5.4X
+In expression                                       390            394           4         25.7          39.0       1.0X
+InSet expression                                     76             77           3        132.0           7.6       5.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       746            750           2         13.4          74.6       1.0X
-InSet expression                                    101            105           8         98.5          10.1       7.4X
+In expression                                       752            755           2         13.3          75.2       1.0X
+InSet expression                                    111            112           3         90.2          11.1       6.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1100           1106           4          9.1         110.0       1.0X
-InSet expression                                    109            111           2         91.6          10.9      10.1X
+In expression                                      1106           1115           6          9.0         110.6       1.0X
+InSet expression                                    129            130           3         77.7          12.9       8.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1470           1480           7          6.8         147.0       1.0X
-InSet expression                                    115            116           2         87.1          11.5      12.8X
+In expression                                      1476           1484           6          6.8         147.6       1.0X
+InSet expression                                    129            130           3         77.3          12.9      11.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1838           1907         152          5.4         183.8       1.0X
-InSet expression                                    231            233           2         43.3          23.1       8.0X
+In expression                                      1844           1900         124          5.4         184.4       1.0X
+InSet expression                                    321            322           2         31.2          32.1       5.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        48             52           6        206.5           4.8       1.0X
-InSet expression                                    150            152           4         66.8          15.0       0.3X
+In expression                                        50             53           5        199.5           5.0       1.0X
+InSet expression                                    166            169           5         60.2          16.6       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        62             63           1        161.3           6.2       1.0X
-InSet expression                                    165            168           5         60.7          16.5       0.4X
+In expression                                        64             68           8        155.5           6.4       1.0X
+InSet expression                                    186            188           4         53.9          18.6       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       116            116           0         86.1          11.6       1.0X
-InSet expression                                    173            175           3         57.9          17.3       0.7X
+In expression                                       118            119           2         84.7          11.8       1.0X
+InSet expression                                    194            208          26         51.4          19.4       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       206            208           6         48.6          20.6       1.0X
-InSet expression                                    212            214           2         47.1          21.2       1.0X
+In expression                                       208            208           0         48.1          20.8       1.0X
+InSet expression                                    240            244           5         41.7          24.0       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       384            386           2         26.0          38.4       1.0X
-InSet expression                                    183            185           2         54.6          18.3       2.1X
+In expression                                       390            397          10         25.6          39.0       1.0X
+InSet expression                                    205            207           5         48.8          20.5       1.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       742            748          13         13.5          74.2       1.0X
-InSet expression                                    175            177           2         57.1          17.5       4.2X
+In expression                                       745            745           1         13.4          74.5       1.0X
+InSet expression                                    194            197           5         51.5          19.4       3.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        88             89           1        114.2           8.8       1.0X
-InSet expression                                    168            170           2         59.5          16.8       0.5X
+In expression                                        90             91           1        111.2           9.0       1.0X
+InSet expression                                    199            202           4         50.2          19.9       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       128            129           3         78.0          12.8       1.0X
-InSet expression                                    187            188           2         53.6          18.7       0.7X
+In expression                                       131            132           0         76.4          13.1       1.0X
+InSet expression                                    221            223           2         45.2          22.1       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       242            242           0         41.3          24.2       1.0X
-InSet expression                                    192            194           2         52.0          19.2       1.3X
+In expression                                       244            245           0         40.9          24.4       1.0X
+InSet expression                                    235            236           1         42.6          23.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       419            420           0         23.8          41.9       1.0X
-InSet expression                                    235            236           1         42.5          23.5       1.8X
+In expression                                       423            424           2         23.7          42.3       1.0X
+InSet expression                                    284            285           2         35.3          28.4       1.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       774            775           1         12.9          77.4       1.0X
-InSet expression                                    205            206           3         48.9          20.5       3.8X
+In expression                                       777            778           1         12.9          77.7       1.0X
+InSet expression                                    248            249           2         40.4          24.8       3.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      3036           3123         191          3.3         303.6       1.0X
-InSet expression                                    197            198           1         50.8          19.7      15.4X
+In expression                                      3032           3125         203          3.3         303.2       1.0X
+InSet expression                                    239            241           2         41.8          23.9      12.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        83             84           2        120.9           8.3       1.0X
-InSet expression                                    167            168           2         60.0          16.7       0.5X
+In expression                                        84             86           3        118.8           8.4       1.0X
+InSet expression                                    167            168           2         59.9          16.7       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       125            126           3         80.3          12.5       1.0X
-InSet expression                                    186            188           2         53.7          18.6       0.7X
+In expression                                       126            127           1         79.1          12.6       1.0X
+InSet expression                                    183            185           2         54.6          18.3       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       237            238           1         42.1          23.7       1.0X
-InSet expression                                    192            195           3         52.0          19.2       1.2X
+In expression                                       239            240           0         41.8          23.9       1.0X
+InSet expression                                    189            192           4         52.9          18.9       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       414            415           0         24.1          41.4       1.0X
-InSet expression                                    239            242           3         41.9          23.9       1.7X
+In expression                                       417            417           0         24.0          41.7       1.0X
+InSet expression                                    231            234           4         43.3          23.1       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       769            771           3         13.0          76.9       1.0X
-InSet expression                                    203            213          22         49.3          20.3       3.8X
+In expression                                       770            772           3         13.0          77.0       1.0X
+InSet expression                                    201            204           5         49.7          20.1       3.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      3757           3796          85          2.7         375.7       1.0X
-InSet expression                                    193            194           2         51.9          19.3      19.5X
+In expression                                      3587           3686         212          2.8         358.7       1.0X
+InSet expression                                    196            198           3         50.9          19.6      18.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        47             48           3         21.3          47.0       1.0X
-InSet expression                                    155            168          29          6.4         155.3       0.3X
+In expression                                        50             51           2         20.1          49.7       1.0X
+InSet expression                                    151            153           3          6.6         150.7       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        58             59           2         17.4          57.6       1.0X
-InSet expression                                    157            160           2          6.4         157.4       0.4X
+In expression                                        62             62           2         16.2          61.6       1.0X
+InSet expression                                    153            155           2          6.5         153.3       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        92             92           2         10.9          91.5       1.0X
-InSet expression                                    160            162           2          6.3         159.6       0.6X
+In expression                                        95             96           2         10.5          95.0       1.0X
+InSet expression                                    156            158           2          6.4         156.4       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       171            171           0          5.9         170.8       1.0X
-InSet expression                                    169            172           3          5.9         169.3       1.0X
+In expression                                       175            177           5          5.7         175.2       1.0X
+InSet expression                                    165            167           3          6.1         164.9       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       429            429           0          2.3         428.6       1.0X
-InSet expression                                    170            172           2          5.9         170.4       2.5X
+In expression                                       440            440           0          2.3         439.7       1.0X
+InSet expression                                    167            169           2          6.0         167.0       2.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       996           1144         328          1.0         996.3       1.0X
-InSet expression                                    177            179           3          5.7         176.8       5.6X
+In expression                                      1005           1154         330          1.0        1004.8       1.0X
+InSet expression                                    180            182           2          5.5         180.2       5.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       248            251           2          4.0         248.0       1.0X
-InSet expression                                    175            177           2          5.7         174.9       1.4X
+In expression                                       251            258           7          4.0         251.1       1.0X
+InSet expression                                    193            197           3          5.2         193.2       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       420            426          11          2.4         420.0       1.0X
-InSet expression                                    177            180           3          5.7         176.9       2.4X
+In expression                                       424            437           8          2.4         424.2       1.0X
+InSet expression                                    196            198           2          5.1         195.5       2.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1005           1008           4          1.0        1004.9       1.0X
-InSet expression                                    184            187           3          5.4         183.7       5.5X
+In expression                                      1018           1023           5          1.0        1017.8       1.0X
+InSet expression                                    203            206           3          4.9         202.9       5.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1922           1933          13          0.5        1922.2       1.0X
-InSet expression                                    189            193           7          5.3         188.9      10.2X
+In expression                                      1947           1955           7          0.5        1947.5       1.0X
+InSet expression                                    208            211           3          4.8         208.4       9.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      3861           3871          12          0.3        3860.5       1.0X
-InSet expression                                    213            225          30          4.7         213.5      18.1X
+In expression                                      3886           3899          10          0.3        3885.9       1.0X
+InSet expression                                    233            235           4          4.3         232.6      16.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      7731           7774          25          0.1        7731.5       1.0X
-InSet expression                                    222            225           3          4.5         222.4      34.8X
+In expression                                      7702           7793          60          0.1        7701.8       1.0X
+InSet expression                                    243            248           6          4.1         243.4      31.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       127            133           9          7.9         126.8       1.0X
-InSet expression                                    142            143           2          7.0         141.9       0.9X
+In expression                                       132            135           4          7.6         132.1       1.0X
+InSet expression                                    147            149           2          6.8         147.1       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       132            133           2          7.6         131.7       1.0X
-InSet expression                                    144            146           2          6.9         144.1       0.9X
+In expression                                       139            141           5          7.2         139.0       1.0X
+InSet expression                                    150            151           2          6.7         149.6       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       153            154           2          6.5         152.9       1.0X
-InSet expression                                    151            153           2          6.6         151.2       1.0X
+In expression                                       160            161           2          6.3         159.6       1.0X
+InSet expression                                    157            158           2          6.4         157.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       186            187           2          5.4         185.8       1.0X
-InSet expression                                    154            156           3          6.5         153.7       1.2X
+In expression                                       190            192           2          5.3         189.8       1.0X
+InSet expression                                    160            161           2          6.2         160.5       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       260            263           3          3.8         260.3       1.0X
-InSet expression                                    151            153           2          6.6         151.3       1.7X
+In expression                                       264            265           2          3.8         263.8       1.0X
+InSet expression                                    159            160           2          6.3         158.5       1.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       731            891         352          1.4         731.4       1.0X
-InSet expression                                    155            157           3          6.4         155.4       4.7X
+In expression                                       735            884         327          1.4         735.4       1.0X
+InSet expression                                    164            166           3          6.1         163.9       4.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        42             43           2        240.1           4.2       1.0X
-InSet expression                                    159            160           2         63.0          15.9       0.3X
+In expression                                        45             46           3        223.6           4.5       1.0X
+InSet expression                                    162            164           3         61.6          16.2       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        58             59           2        171.4           5.8       1.0X
-InSet expression                                    174            183          21         57.5          17.4       0.3X
+In expression                                        62             65           7        161.9           6.2       1.0X
+InSet expression                                    179            181           2         56.0          17.9       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       111            113           3         90.0          11.1       1.0X
-InSet expression                                    228            229           2         43.9          22.8       0.5X
+In expression                                       115            116           1         86.8          11.5       1.0X
+InSet expression                                    222            225           6         45.1          22.2       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       192            193           1         52.1          19.2       1.0X
-InSet expression                                    250            250           1         40.1          25.0       0.8X
+In expression                                       198            199           1         50.6          19.8       1.0X
+InSet expression                                    238            239           2         42.0          23.8       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       373            384          12         26.8          37.3       1.0X
-InSet expression                                    229            236           7         43.7          22.9       1.6X
+In expression                                       363            368           5         27.6          36.3       1.0X
+InSet expression                                    222            224           2         45.0          22.2       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       694            707          25         14.4          69.4       1.0X
-InSet expression                                    221            226           7         45.2          22.1       3.1X
+In expression                                       701            706           4         14.3          70.1       1.0X
+InSet expression                                    226            228           3         44.2          22.6       3.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       196            198           2         50.9          19.6       1.0X
-InSet expression                                    169            170           0         59.2          16.9       1.2X
+In expression                                       659            665           5         15.2          65.9       1.0X
+InSet expression                                    660            664           3         15.1          66.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       212            212           0         47.3          21.2       1.0X
-InSet expression                                    197            197           0         50.8          19.7       1.1X
+In expression                                       663            667           2         15.1          66.3       1.0X
+InSet expression                                    659            661           2         15.2          65.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       266            266           1         37.7          26.6       1.0X
-InSet expression                                    203            217          23         49.4          20.3       1.3X
+In expression                                       700            705           4         14.3          70.0       1.0X
+InSet expression                                    667            669           2         15.0          66.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       356            367          12         28.1          35.6       1.0X
-InSet expression                                    212            213           1         47.1          21.2       1.7X
+In expression                                       801            805           3         12.5          80.1       1.0X
+InSet expression                                    675            677           1         14.8          67.5       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       559            573          26         17.9          55.9       1.0X
-InSet expression                                    221            223           2         45.2          22.1       2.5X
+In expression                                       984            987           5         10.2          98.4       1.0X
+InSet expression                                    685            690           4         14.6          68.5       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       901            916           9         11.1          90.1       1.0X
-InSet expression                                    238            241           9         42.1          23.8       3.8X
+In expression                                      1350           1356           4          7.4         135.0       1.0X
+InSet expression                                    710            712           3         14.1          71.0       1.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1264           1282          10          7.9         126.4       1.0X
-InSet expression                                    253            262          15         39.5          25.3       5.0X
+In expression                                      1716           1722           6          5.8         171.6       1.0X
+InSet expression                                    716            719           2         14.0          71.6       2.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1628           1646          11          6.1         162.8       1.0X
-InSet expression                                    264            265           1         37.8          26.4       6.2X
+In expression                                      2083           2100          13          4.8         208.3       1.0X
+InSet expression                                    742            744           1         13.5          74.2       2.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1993           2015          15          5.0         199.3       1.0X
-InSet expression                                    355            368          10         28.2          35.5       5.6X
+In expression                                      2469           2486          11          4.1         246.9       1.0X
+InSet expression                                    829            831           2         12.1          82.9       3.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        52             63          14         19.3          51.8       1.0X
-InSet expression                                     96             98           2         10.4          95.9       0.5X
+In expression                                        56             58           4         17.9          55.9       1.0X
+InSet expression                                    123            124           2          8.1         123.1       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        78             80           3         12.8          77.9       1.0X
-InSet expression                                     97            154          48         10.3          97.1       0.8X
+In expression                                        83             84           2         12.1          82.7       1.0X
+InSet expression                                    124            127           7          8.1         123.8       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       269            279          27          3.7         268.7       1.0X
-InSet expression                                    120            124          13          8.3         119.9       2.2X
+In expression                                       275            281           9          3.6         275.5       1.0X
+InSet expression                                    155            158           4          6.4         155.2       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       579            626          96          1.7         579.2       1.0X
-InSet expression                                    165            167           3          6.1         165.1       3.5X
+In expression                                       659            718         109          1.5         659.4       1.0X
+InSet expression                                    217            218           2          4.6         217.0       3.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      2582           2775         415          0.4        2582.1       1.0X
-InSet expression                                    196            201          10          5.1         196.0      13.2X
+In expression                                      2488           2684         434          0.4        2488.4       1.0X
+InSet expression                                    267            270           3          3.7         266.7       9.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      9438           9939         763          0.1        9437.9       1.0X
-InSet expression                                    256            258           3          3.9         255.8      36.9X
+In expression                                      9462          10091         897          0.1        9462.2       1.0X
+InSet expression                                    347            349           2          2.9         347.4      27.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        47             48           2         21.4          46.8       1.0X
-InSet expression                                    158            160           2          6.3         157.6       0.3X
+In expression                                        51             55           7         19.7          50.8       1.0X
+InSet expression                                    166            168           4          6.0         166.3       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        62             63           4         16.2          61.9       1.0X
-InSet expression                                    158            161           4          6.3         158.4       0.4X
+In expression                                        66             70           5         15.1          66.4       1.0X
+InSet expression                                    167            170           3          6.0         167.3       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       138            140           3          7.3         137.9       1.0X
-InSet expression                                    202            219          43          5.0         201.7       0.7X
+In expression                                       142            144           3          7.0         142.1       1.0X
+InSet expression                                    211            215           6          4.7         211.2       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       366            367           1          2.7         365.7       1.0X
-InSet expression                                    286            289           4          3.5         285.6       1.3X
+In expression                                       375            378           5          2.7         375.5       1.0X
+InSet expression                                    297            298           2          3.4         297.5       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1055           1212         346          0.9        1054.7       1.0X
-InSet expression                                    348            354           6          2.9         347.9       3.0X
+In expression                                      1122           1260         304          0.9        1122.2       1.0X
+InSet expression                                    354            357           4          2.8         353.9       3.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      6463           6772         650          0.2        6463.3       1.0X
-InSet expression                                    450            455           4          2.2         449.6      14.4X
+In expression                                      5651           5989         644          0.2        5651.4       1.0X
+InSet expression                                    471            473           2          2.1         471.0      12.0X
 
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..7d23e5467baed
--- /dev/null
+++ b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt
@@ -0,0 +1,29 @@
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare string w/ interval                          756            831          78          1.3         756.4       1.0X
+prepare string w/o interval                         622            637          15          1.6         622.4       1.2X
+1 units w/ interval                                 506            532          22          2.0         506.2       1.5X
+1 units w/o interval                                472            477           9          2.1         471.6       1.6X
+2 units w/ interval                                 699            710          11          1.4         698.5       1.1X
+2 units w/o interval                                670            674           6          1.5         669.9       1.1X
+3 units w/ interval                                1431           1437           7          0.7        1431.3       0.5X
+3 units w/o interval                               1418           1429          11          0.7        1418.2       0.5X
+4 units w/ interval                                1687           1692           8          0.6        1686.8       0.4X
+4 units w/o interval                               1679           1688           7          0.6        1679.4       0.5X
+5 units w/ interval                                1862           1864           3          0.5        1861.9       0.4X
+5 units w/o interval                               1847           1864          15          0.5        1846.9       0.4X
+6 units w/ interval                                2067           2081          12          0.5        2066.9       0.4X
+6 units w/o interval                               2070           2071           2          0.5        2069.6       0.4X
+7 units w/ interval                                2458           2468          13          0.4        2457.7       0.3X
+7 units w/o interval                               2450           2453           3          0.4        2450.1       0.3X
+8 units w/ interval                                2833           2838           8          0.4        2832.6       0.3X
+8 units w/o interval                               2830           2839           8          0.4        2829.8       0.3X
+9 units w/ interval                                2873           2880           6          0.3        2873.4       0.3X
+9 units w/o interval                               2860           2863           3          0.3        2860.1       0.3X
+10 units w/ interval                               3252           3257           5          0.3        3252.2       0.2X
+10 units w/o interval                              3212           3220           8          0.3        3211.6       0.2X
+11 units w/ interval                               3369           3376           6          0.3        3368.5       0.2X
+11 units w/o interval                              3384           3395          15          0.3        3384.2       0.2X
+
diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt
new file mode 100644
index 0000000000000..da2f0e66dacac
--- /dev/null
+++ b/sql/core/benchmarks/IntervalBenchmark-results.txt
@@ -0,0 +1,29 @@
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare string w/ interval                          648            721          94          1.5         648.3       1.0X
+prepare string w/o interval                         562            596          49          1.8         562.3       1.2X
+1 units w/ interval                                 568            590          21          1.8         568.5       1.1X
+1 units w/o interval                                522            538          20          1.9         521.7       1.2X
+2 units w/ interval                                 751            754           3          1.3         751.5       0.9X
+2 units w/o interval                                716            723           6          1.4         716.1       0.9X
+3 units w/ interval                                1402           1411          11          0.7        1401.6       0.5X
+3 units w/o interval                               1381           1387           5          0.7        1381.2       0.5X
+4 units w/ interval                                1591           1595           6          0.6        1591.2       0.4X
+4 units w/o interval                               1582           1585           3          0.6        1582.3       0.4X
+5 units w/ interval                                1747           1749           2          0.6        1747.3       0.4X
+5 units w/o interval                               1738           1746          10          0.6        1737.7       0.4X
+6 units w/ interval                                1929           1931           3          0.5        1929.1       0.3X
+6 units w/o interval                               1919           1922           2          0.5        1919.0       0.3X
+7 units w/ interval                                2345           2354           8          0.4        2345.0       0.3X
+7 units w/o interval                               2334           2336           2          0.4        2334.1       0.3X
+8 units w/ interval                                2533           2546          16          0.4        2533.0       0.3X
+8 units w/o interval                               2519           2521           1          0.4        2519.4       0.3X
+9 units w/ interval                                2885           2889           5          0.3        2884.5       0.2X
+9 units w/o interval                               2804           2813          12          0.4        2803.9       0.2X
+10 units w/ interval                               3041           3060          16          0.3        3041.3       0.2X
+10 units w/o interval                              3031           3043          15          0.3        3031.2       0.2X
+11 units w/ interval                               3270           3280           9          0.3        3269.9       0.2X
+11 units w/o interval                              3273           3280           7          0.3        3272.6       0.2X
+
diff --git a/sql/core/benchmarks/JSONBenchmark-results.txt b/sql/core/benchmarks/JSONBenchmark-results.txt
deleted file mode 100644
index 7846983b44fb3..0000000000000
--- a/sql/core/benchmarks/JSONBenchmark-results.txt
+++ /dev/null
@@ -1,112 +0,0 @@
-================================================================================================
-Benchmark for performance of JSON parsing
-================================================================================================
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-No encoding                                       50949          51086         150          2.0         509.5       1.0X
-UTF-8 is set                                      72012          72147         120          1.4         720.1       0.7X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-No encoding                                       36799          36891          80          2.7         368.0       1.0X
-UTF-8 is set                                      59796          59880          74          1.7         598.0       0.6X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-No encoding                                       55803          55967         152          0.2        5580.3       1.0X
-UTF-8 is set                                      80623          80825         178          0.1        8062.3       0.7X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-No encoding                                       84263          85750        1476          0.0      168526.2       1.0X
-UTF-8 is set                                      98848         100183        1592          0.0      197696.0       0.9X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                 13930          13996          60          0.7        1393.0       1.0X
-Select 1 column                                   17092          17394         360          0.6        1709.2       0.8X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                      5596           5711         101          1.8         559.6       1.0X
-Short column with UTF-8                            7983           8158         160          1.3         798.3       0.7X
-Wide column without encoding                     110189         118451         NaN          0.1       11018.9       0.1X
-Wide column with UTF-8                           137827         142813         NaN          0.1       13782.7       0.0X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Text read                                           951            953           2         10.5          95.1       1.0X
-from_json                                         13015          13045          27          0.8        1301.5       0.1X
-json_tuple                                        16257          16306          43          0.6        1625.7       0.1X
-get_json_object                                   13195          13225          39          0.8        1319.5       0.1X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Text read                                          4632           4687          49         10.8          92.6       1.0X
-schema inferring                                  29176          29297         146          1.7         583.5       0.2X
-parsing                                           24268          24457         175          2.1         485.4       0.2X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Text read                                          8264           8272           7          6.1         165.3       1.0X
-Schema inferring                                  31910          32375         543          1.6         638.2       0.3X
-Parsing without charset                           29290          29397         124          1.7         585.8       0.3X
-Parsing with UTF-8                                41301          41390          81          1.2         826.0       0.2X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1149           1160          11          8.7         114.9       1.0X
-to_json(timestamp)                                11585          11688         140          0.9        1158.5       0.1X
-write timestamps to files                         10212          10260          49          1.0        1021.2       0.1X
-Create a dataset of dates                          1322           1328          10          7.6         132.2       0.9X
-to_json(date)                                      7226           7241          14          1.4         722.6       0.2X
-write dates to files                               5634           5648          20          1.8         563.4       0.2X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2097           2137          41          4.8         209.7       1.0X
-read timestamps from files                        20438          20451          11          0.5        2043.8       0.1X
-infer timestamps from files                       41694          41770          66          0.2        4169.4       0.1X
-read date text from files                          1832           1847          16          5.5         183.2       1.1X
-read date from files                              13796          13837          49          0.7        1379.6       0.2X
-timestamp strings                                  3213           3233          26          3.1         321.3       0.7X
-parse timestamps from Dataset[String]             22686          22743          53          0.4        2268.6       0.1X
-infer timestamps from Dataset[String]             45301          45368          58          0.2        4530.1       0.0X
-date strings                                       3431           3439           7          2.9         343.1       0.6X
-parse dates from Dataset[String]                  17688          17734          41          0.6        1768.8       0.1X
-from_json(timestamp)                              33439          33456          24          0.3        3343.9       0.1X
-from_json(date)                                   24055          24164         107          0.4        2405.5       0.1X
-
-
diff --git a/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt b/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..1e61017425aa3
--- /dev/null
+++ b/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt
@@ -0,0 +1,75 @@
+================================================================================================
+Join Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w long wholestage off                         4441           4572         185          4.7         211.8       1.0X
+Join w long wholestage on                          1409           1500          96         14.9          67.2       3.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w long duplicated wholestage off              5111           5116           7          4.1         243.7       1.0X
+Join w long duplicated wholestage on               1493           1518          22         14.0          71.2       3.4X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 ints wholestage off                     171821         171906         121          0.1        8193.0       1.0X
+Join w 2 ints wholestage on                      166559         166975         263          0.1        7942.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 longs wholestage off                      7511           7555          62          2.8         358.2       1.0X
+Join w 2 longs wholestage on                       3776           4119         232          5.6         180.1       2.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 longs duplicated wholestage off          13563          13617          77          1.5         646.7       1.0X
+Join w 2 longs duplicated wholestage on            7947           8053          71          2.6         378.9       1.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+outer join w long wholestage off                   3915           3923          12          5.4         186.7       1.0X
+outer join w long wholestage on                    1421           1461          30         14.8          67.8       2.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+semi join w long wholestage off                    2310           2332          30          9.1         110.2       1.0X
+semi join w long wholestage on                      835            860          34         25.1          39.8       2.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort merge join wholestage off                     1846           1886          56          1.1         880.5       1.0X
+sort merge join wholestage on                      1402           1654         234          1.5         668.3       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+sort merge join with duplicates:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort merge join with duplicates wholestage off           2852           2879          38          0.7        1360.0       1.0X
+sort merge join with duplicates wholestage on           2645           2742         156          0.8        1261.0       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+shuffle hash join wholestage off                   1506           1564          82          2.8         359.1       1.0X
+shuffle hash join wholestage on                    1303           1330          23          3.2         310.6       1.2X
+
+
diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt
index 8ceb5e7a7fe94..c19dd4f12bb32 100644
--- a/sql/core/benchmarks/JoinBenchmark-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-results.txt
@@ -2,74 +2,74 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w long:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w long wholestage off                    4464 / 4483          4.7         212.9       1.0X
-Join w long wholestage on                      289 /  339         72.6          13.8      15.5X
+Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w long wholestage off                         4531           4557          37          4.6         216.1       1.0X
+Join w long wholestage on                          1214           1310          95         17.3          57.9       3.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w long duplicated:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off         5662 / 5678          3.7         270.0       1.0X
-Join w long duplicated wholestage on           332 /  345         63.1          15.8      17.0X
+Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w long duplicated wholestage off              5200           5239          55          4.0         248.0       1.0X
+Join w long duplicated wholestage on               1535           1547          11         13.7          73.2       3.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w 2 ints:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off              173174 / 173183          0.1        8257.6       1.0X
-Join w 2 ints wholestage on               166350 / 198362          0.1        7932.2       1.0X
+Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 ints wholestage off                     170776         170795          27          0.1        8143.2       1.0X
+Join w 2 ints wholestage on                      165134         165183          36          0.1        7874.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w 2 longs:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                 7055 / 7214          3.0         336.4       1.0X
-Join w 2 longs wholestage on                  1869 / 1985         11.2          89.1       3.8X
+Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 longs wholestage off                      6561           6595          48          3.2         312.8       1.0X
+Join w 2 longs wholestage on                       2999           3070          85          7.0         143.0       2.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w 2 longs duplicated:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off    19256 / 20283          1.1         918.2       1.0X
-Join w 2 longs duplicated wholestage on       2467 / 2544          8.5         117.7       7.8X
+Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 longs duplicated wholestage off          15731          15757          38          1.3         750.1       1.0X
+Join w 2 longs duplicated wholestage on            8017           8112          80          2.6         382.3       2.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-outer join w long:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-outer join w long wholestage off              3756 / 3761          5.6         179.1       1.0X
-outer join w long wholestage on                218 /  250         96.2          10.4      17.2X
+outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+outer join w long wholestage off                   3573           3577           6          5.9         170.4       1.0X
+outer join w long wholestage on                    1310           1325          15         16.0          62.5       2.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-semi join w long:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-semi join w long wholestage off               2393 / 2416          8.8         114.1       1.0X
-semi join w long wholestage on                 214 /  218         97.9          10.2      11.2X
+semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+semi join w long wholestage off                    1893           1916          33         11.1          90.3       1.0X
+semi join w long wholestage on                      819            842          30         25.6          39.0       2.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-sort merge join:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sort merge join wholestage off                     2318 / 2392          0.9        1105.3       1.0X
-sort merge join wholestage on                      1669 / 1811          1.3         795.9       1.4X
+sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort merge join wholestage off                     1302           1312          13          1.6         620.9       1.0X
+sort merge join wholestage on                      1168           1233          62          1.8         557.0       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-sort merge join with duplicates:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off      2966 / 2976          0.7        1414.5       1.0X
-sort merge join with duplicates wholestage on      2413 / 2641          0.9        1150.5       1.2X
+sort merge join with duplicates:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort merge join with duplicates wholestage off           1996           2005          12          1.1         951.7       1.0X
+sort merge join with duplicates wholestage on           1766           1803          42          1.2         842.0       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-shuffle hash join:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off              1475 / 1479          2.8         351.7       1.0X
-shuffle hash join wholestage on               1209 / 1238          3.5         288.3       1.2X
+shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+shuffle hash join wholestage off                   1298           1300           3          3.2         309.6       1.0X
+shuffle hash join wholestage on                    1201           1210          10          3.5         286.4       1.1X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..920e0a7723e70
--- /dev/null
+++ b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
@@ -0,0 +1,112 @@
+================================================================================================
+Benchmark for performance of JSON parsing
+================================================================================================
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       84774          84927         264          1.2         847.7       1.0X
+UTF-8 is set                                     119081         120155        1773          0.8        1190.8       0.7X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       49293          49356          70          2.0         492.9       1.0X
+UTF-8 is set                                      80183          80211          25          1.2         801.8       0.6X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       61070          61476         536          0.2        6107.0       1.0X
+UTF-8 is set                                     109765         109881         102          0.1       10976.5       0.6X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                      176999         178163        1008          0.0      353997.9       1.0X
+UTF-8 is set                                     201209         201641         614          0.0      402419.0       0.9X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 10 columns                                 18768          20587         496          0.5        1876.8       1.0X
+Select 1 column                                   22642          22644           3          0.4        2264.2       0.8X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Short column without encoding                      7697           7738          55          1.3         769.7       1.0X
+Short column with UTF-8                           14051          14189         176          0.7        1405.1       0.5X
+Wide column without encoding                     108999         110075        1085          0.1       10899.9       0.1X
+Wide column with UTF-8                           157433         157779         308          0.1       15743.3       0.0X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                           644            647           4         15.5          64.4       1.0X
+from_json                                         25859          25872          12          0.4        2585.9       0.0X
+json_tuple                                        31679          31761          71          0.3        3167.9       0.0X
+get_json_object                                   24772          25220         389          0.4        2477.2       0.0X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                          3135           3165          52         15.9          62.7       1.0X
+schema inferring                                  29383          29389          10          1.7         587.7       0.1X
+parsing                                           32623          35183         NaN          1.5         652.5       0.1X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                         11874          11948          82          4.2         237.5       1.0X
+Schema inferring                                  42382          42398          23          1.2         847.6       0.3X
+Parsing without charset                           36410          36442          54          1.4         728.2       0.3X
+Parsing with UTF-8                                62412          62463          48          0.8        1248.2       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Create a dataset of timestamps                     2191           2209          20          4.6         219.1       1.0X
+to_json(timestamp)                                18670          19042         565          0.5        1867.0       0.1X
+write timestamps to files                         11836          13156         NaN          0.8        1183.6       0.2X
+Create a dataset of dates                          2321           2351          33          4.3         232.1       0.9X
+to_json(date)                                     12703          12726          24          0.8        1270.3       0.2X
+write dates to files                               8230           8303          76          1.2         823.0       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                     2780           2795          13          3.6         278.0       1.0X
+read timestamps from files                        37158          37305         137          0.3        3715.8       0.1X
+infer timestamps from files                       73666          73838         149          0.1        7366.6       0.0X
+read date text from files                          2597           2609          10          3.9         259.7       1.1X
+read date from files                              24439          24501          56          0.4        2443.9       0.1X
+timestamp strings                                  3052           3064          12          3.3         305.2       0.9X
+parse timestamps from Dataset[String]             43611          43665          52          0.2        4361.1       0.1X
+infer timestamps from Dataset[String]             83745          84153         376          0.1        8374.5       0.0X
+date strings                                       4068           4076          10          2.5         406.8       0.7X
+parse dates from Dataset[String]                  34700          34807         118          0.3        3470.0       0.1X
+from_json(timestamp)                              64074          64124          53          0.2        6407.4       0.0X
+from_json(date)                                   52520          52617         101          0.2        5252.0       0.1X
+
+
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt
new file mode 100644
index 0000000000000..e435f573a5d92
--- /dev/null
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -0,0 +1,112 @@
+================================================================================================
+Benchmark for performance of JSON parsing
+================================================================================================
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       61888          61918          27          1.6         618.9       1.0X
+UTF-8 is set                                     109057         113663         NaN          0.9        1090.6       0.6X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       44517          44535          29          2.2         445.2       1.0X
+UTF-8 is set                                      75722          75840         111          1.3         757.2       0.6X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       63677          64090         633          0.2        6367.7       1.0X
+UTF-8 is set                                      99424          99615         185          0.1        9942.4       0.6X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                      174052         174251         174          0.0      348104.1       1.0X
+UTF-8 is set                                     189000         189098         113          0.0      378000.9       0.9X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 10 columns                                 18387          18473         142          0.5        1838.7       1.0X
+Select 1 column                                   25560          25571          13          0.4        2556.0       0.7X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Short column without encoding                      9323           9384          58          1.1         932.3       1.0X
+Short column with UTF-8                           14016          14058          55          0.7        1401.6       0.7X
+Wide column without encoding                     133258         133532         382          0.1       13325.8       0.1X
+Wide column with UTF-8                           181212         181283          61          0.1       18121.2       0.1X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                          1168           1174           5          8.6         116.8       1.0X
+from_json                                         22604          23571         883          0.4        2260.4       0.1X
+json_tuple                                        29979          30053          91          0.3        2997.9       0.0X
+get_json_object                                   21987          22263         241          0.5        2198.7       0.1X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                          5831           5842          14          8.6         116.6       1.0X
+schema inferring                                  31372          31456          73          1.6         627.4       0.2X
+parsing                                           35911          36191         254          1.4         718.2       0.2X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                         10249          10314          77          4.9         205.0       1.0X
+Schema inferring                                  35403          35436          40          1.4         708.1       0.3X
+Parsing without charset                           32875          32879           4          1.5         657.5       0.3X
+Parsing with UTF-8                                53444          53519         100          0.9        1068.9       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Create a dataset of timestamps                     1909           1924          17          5.2         190.9       1.0X
+to_json(timestamp)                                18956          19122         208          0.5        1895.6       0.1X
+write timestamps to files                         13446          13472          43          0.7        1344.6       0.1X
+Create a dataset of dates                          2180           2200          28          4.6         218.0       0.9X
+to_json(date)                                     12780          12899         109          0.8        1278.0       0.1X
+write dates to files                               7835           7865          29          1.3         783.5       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                     2467           2477           9          4.1         246.7       1.0X
+read timestamps from files                        40186          40342         135          0.2        4018.6       0.1X
+infer timestamps from files                       82005          82079          71          0.1        8200.5       0.0X
+read date text from files                          2243           2264          22          4.5         224.3       1.1X
+read date from files                              24852          24863          19          0.4        2485.2       0.1X
+timestamp strings                                  3836           3854          16          2.6         383.6       0.6X
+parse timestamps from Dataset[String]             51521          51697         242          0.2        5152.1       0.0X
+infer timestamps from Dataset[String]             97300          97398         133          0.1        9730.0       0.0X
+date strings                                       4488           4491           5          2.2         448.8       0.5X
+parse dates from Dataset[String]                  37918          37976          68          0.3        3791.8       0.1X
+from_json(timestamp)                              69611          69632          36          0.1        6961.1       0.0X
+from_json(date)                                   56598          56974         347          0.2        5659.8       0.0X
+
+
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..65faa752b94cb
--- /dev/null
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt
@@ -0,0 +1,22 @@
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare make_date()                                3204           3323         139         31.2          32.0       1.0X
+make_date(2019, 9, 16)                             2529           2604         126         39.5          25.3       1.3X
+make_date(*, *, *)                                 5102           5113          10         19.6          51.0       0.6X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+make_timestamp():                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare make_timestamp()                           3484           3513          28          0.3        3484.3       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)            112            131          17          9.0         111.5      31.2X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             93            102          10         10.8          92.8      37.6X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             85             88           4         11.8          84.8      41.1X
+make_timestamp(*, *, *, 3, 4, 50.123456)            303            308           8          3.3         302.8      11.5X
+make_timestamp(*, *, *, *, *, 0)                    303            307           3          3.3         302.8      11.5X
+make_timestamp(*, *, *, *, *, 60.0)                 289            297           8          3.5         289.1      12.1X
+make_timestamp(2019, 1, 2, *, *, *)                3576           3585          11          0.3        3576.4       1.0X
+make_timestamp(*, *, *, *, *, *)                   3610           3618          12          0.3        3610.0       1.0X
+
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
new file mode 100644
index 0000000000000..92bcc4444e60a
--- /dev/null
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
@@ -0,0 +1,22 @@
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare make_date()                                2920           3239         278         34.3          29.2       1.0X
+make_date(2019, 9, 16)                             2322           2371          61         43.1          23.2       1.3X
+make_date(*, *, *)                                 4553           4560           6         22.0          45.5       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+make_timestamp():                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare make_timestamp()                           3636           3673          38          0.3        3635.7       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             94             99           4         10.7          93.8      38.8X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             68             80          13         14.6          68.3      53.2X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             65             79          19         15.3          65.3      55.7X
+make_timestamp(*, *, *, 3, 4, 50.123456)            271            280          14          3.7         270.7      13.4X
+make_timestamp(*, *, *, *, *, 0)                    255            263          11          3.9         255.5      14.2X
+make_timestamp(*, *, *, *, *, 60.0)                 254            258           4          3.9         254.2      14.3X
+make_timestamp(2019, 1, 2, *, *, *)                3714           3722           8          0.3        3713.9       1.0X
+make_timestamp(*, *, *, *, *, *)                   3736           3741           6          0.3        3736.3       1.0X
+
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk11-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..e33ed30eaa559
--- /dev/null
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk11-results.txt
@@ -0,0 +1,12 @@
+OpenJDK 64-Bit Server VM 11.0.4+11 on Linux 4.15.0-66-generic
+Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
+metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 stage(s)                                          672            841         179          0.0   671888474.0       1.0X
+2 stage(s)                                         1700           1842         201          0.0  1699591662.0       0.4X
+3 stage(s)                                         2601           2776         247          0.0  2601465786.0       0.3X
+
+Stage Count    Stage Proc. Time    Aggreg. Time
+     1              436                164
+     2              537                354
+     3              480                602
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
new file mode 100644
index 0000000000000..4fae928258d32
--- /dev/null
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
@@ -0,0 +1,12 @@
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_181-b13 on Linux 4.15.0-66-generic
+Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
+metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 stage(s)                                          740            883         147          0.0   740089816.0       1.0X
+2 stage(s)                                         1661           1943         399          0.0  1660649192.0       0.4X
+3 stage(s)                                         2711           2967         362          0.0  2711110178.0       0.3X
+
+Stage Count    Stage Proc. Time    Aggreg. Time
+     1              405                179
+     2              375                414
+     3              364                644
diff --git a/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt b/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..281b0fe28171b
--- /dev/null
+++ b/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt
@@ -0,0 +1,127 @@
+================================================================================================
+filter & aggregate without group
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+range/filter/sum wholestage off                   54616          54834         309         38.4          26.0       1.0X
+range/filter/sum wholestage on                     3263           3369         129        642.6           1.6      16.7X
+
+
+================================================================================================
+range/limit/sum
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+range/limit/sum wholestage off                      239            265          36       2190.0           0.5       1.0X
+range/limit/sum wholestage on                       137            162          15       3814.6           0.3       1.7X
+
+
+================================================================================================
+sample
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sample with replacement wholestage off            13093          13417         458         10.0          99.9       1.0X
+sample with replacement wholestage on              7606           7624          13         17.2          58.0       1.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+sample without replacement:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sample without replacement wholestage off           3031           3038           9         43.2          23.1       1.0X
+sample without replacement wholestage on           1156           1177          16        113.4           8.8       2.6X
+
+
+================================================================================================
+collect
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+collect 1 million                                   335            360          20          3.1         319.0       1.0X
+collect 2 millions                                  579            633          42          1.8         552.2       0.6X
+collect 4 millions                                 1192           1331         196          0.9        1137.1       0.3X
+
+
+================================================================================================
+collect limit
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+collect limit 1 million                             378            386          12          2.8         360.8       1.0X
+collect limit 2 millions                            724            733          12          1.4         690.3       0.5X
+
+
+================================================================================================
+generate explode
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate explode array wholestage off             15880          16159         395          1.1         946.5       1.0X
+generate explode array wholestage on              15690          15783          73          1.1         935.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate explode map wholestage off               51293          51311          26          0.3        3057.3       1.0X
+generate explode map wholestage on                50409          50795         388          0.3        3004.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate posexplode array wholestage off          17246          17860         869          1.0        1027.9       1.0X
+generate posexplode array wholestage on           17344          17472          88          1.0        1033.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate inline array wholestage off              13698          13790         130          1.2         816.5       1.0X
+generate inline array wholestage on               12995          13033          38          1.3         774.6       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate big struct array wholestage off            476            489          18          0.1        7938.6       1.0X
+generate big struct array wholestage on             424            440          14          0.1        7074.9       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate big nested struct array:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate big nested struct array wholestage off            413            433          28          0.1        6886.8       1.0X
+generate big nested struct array wholestage on            394            420          39          0.2        6560.9       1.0X
+
+
+================================================================================================
+generate regular generator
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate stack wholestage off                     26494          26510          23          0.6        1579.2       1.0X
+generate stack wholestage on                      24205          24339          80          0.7        1442.8       1.1X
+
+
diff --git a/sql/core/benchmarks/MiscBenchmark-results.txt b/sql/core/benchmarks/MiscBenchmark-results.txt
index c4ae052095656..8b1e728ed9cf9 100644
--- a/sql/core/benchmarks/MiscBenchmark-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-range/filter/sum:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off             47752 / 48952         43.9          22.8       1.0X
-range/filter/sum wholestage on                3123 / 3558        671.5           1.5      15.3X
+range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+range/filter/sum wholestage off                   46812          48110        1836         44.8          22.3       1.0X
+range/filter/sum wholestage on                     3116           3656         309        673.1           1.5      15.0X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-range/limit/sum:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                 229 /  236       2288.9           0.4       1.0X
-range/limit/sum wholestage on                  257 /  267       2041.0           0.5       0.9X
+range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+range/limit/sum wholestage off                      200            219          27       2621.6           0.4       1.0X
+range/limit/sum wholestage on                       117            125           7       4477.8           0.2       1.7X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-sample with replacement:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sample with replacement wholestage off      12908 / 13076         10.2          98.5       1.0X
-sample with replacement wholestage on         7334 / 7346         17.9          56.0       1.8X
+sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sample with replacement wholestage off            12963          13506         768         10.1          98.9       1.0X
+sample with replacement wholestage on              7397           7742         300         17.7          56.4       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-sample without replacement:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sample without replacement wholestage off      3082 / 3095         42.5          23.5       1.0X
-sample without replacement wholestage on      1125 / 1211        116.5           8.6       2.7X
+sample without replacement:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sample without replacement wholestage off           2977           2977           0         44.0          22.7       1.0X
+sample without replacement wholestage on           1087           1099          13        120.6           8.3       2.7X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-collect:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-collect 1 million                              291 /  311          3.6         277.3       1.0X
-collect 2 millions                             552 /  564          1.9         526.6       0.5X
-collect 4 millions                            1104 / 1108          0.9        1053.0       0.3X
+collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+collect 1 million                                   281            315          34          3.7         268.2       1.0X
+collect 2 millions                                  531            564          28          2.0         506.6       0.5X
+collect 4 millions                                 1179           1970        1118          0.9        1124.5       0.2X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-collect limit:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-collect limit 1 million                        311 /  340          3.4         296.2       1.0X
-collect limit 2 millions                       581 /  614          1.8         554.4       0.5X
+collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+collect limit 1 million                             344            352           5          3.0         328.0       1.0X
+collect limit 2 millions                            656            660           6          1.6         625.2       0.5X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate explode array:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate explode array wholestage off       15211 / 15368          1.1         906.6       1.0X
-generate explode array wholestage on        10761 / 10776          1.6         641.4       1.4X
+generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate explode array wholestage off             14664          14780         164          1.1         874.0       1.0X
+generate explode array wholestage on              14789          14886          87          1.1         881.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate explode map:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate explode map wholestage off         22128 / 22578          0.8        1318.9       1.0X
-generate explode map wholestage on          16421 / 16520          1.0         978.8       1.3X
+generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate explode map wholestage off               50441          51014         811          0.3        3006.5       1.0X
+generate explode map wholestage on                49164          49915         892          0.3        2930.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate posexplode array:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off    17108 / 18019          1.0        1019.7       1.0X
-generate posexplode array wholestage on     11715 / 11804          1.4         698.3       1.5X
+generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate posexplode array wholestage off          17101          17130          40          1.0        1019.3       1.0X
+generate posexplode array wholestage on           15625          15675          46          1.1         931.3       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate inline array:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate inline array wholestage off        16358 / 16418          1.0         975.0       1.0X
-generate inline array wholestage on         11152 / 11472          1.5         664.7       1.5X
+generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate inline array wholestage off              13923          14720        1127          1.2         829.9       1.0X
+generate inline array wholestage on               12246          12591         695          1.4         729.9       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate big struct array:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate big struct array wholestage off       708 /  776          0.1       11803.5       1.0X
-generate big struct array wholestage on        535 /  589          0.1        8913.9       1.3X
+generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate big struct array wholestage off            442            456          20          0.1        7368.2       1.0X
+generate big struct array wholestage on             409            429          22          0.1        6823.9       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 generate big nested struct array:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off            540            553          19          0.1        8997.4       1.0X
-generate big nested struct array wholestage on            523            554          31          0.1        8725.0       1.0X
+generate big nested struct array wholestage off            409            415           8          0.1        6822.4       1.0X
+generate big nested struct array wholestage on            401            414          23          0.1        6687.0       1.0X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate stack:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate stack wholestage off               29082 / 29393          0.6        1733.4       1.0X
-generate stack wholestage on                21066 / 21128          0.8        1255.6       1.4X
+generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate stack wholestage off                     27043          27118         106          0.6        1611.9       1.0X
+generate stack wholestage on                      24002          24036          34          0.7        1430.6       1.1X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..c286dc2da8076
--- /dev/null
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt
@@ -0,0 +1,53 @@
+================================================================================================
+Nested Schema Pruning Benchmark For ORC v1
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    155            197          23          6.5         155.0       1.0X
+Nested column                                      1290           1338          43          0.8        1289.9       0.1X
+Nested column in array                             5914           5989          57          0.2        5914.1       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    463            537          53          2.2         463.2       1.0X
+Nested column                                      1807           1874          62          0.6        1807.3       0.3X
+Nested column in array                             6729           6906         116          0.1        6729.4       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    378            402          18          2.6         377.7       1.0X
+Nested column                                      1765           1794          46          0.6        1765.4       0.2X
+Nested column in array                             6675           6718          42          0.1        6674.6       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    384            399           8          2.6         383.7       1.0X
+Nested column                                      4724           4820          72          0.2        4724.3       0.1X
+Nested column in array                             9256           9405         133          0.1        9255.7       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    121            137          13          8.3         120.7       1.0X
+Nested column                                      1303           1361          45          0.8        1303.2       0.1X
+Nested column in array                             5901           5978          49          0.2        5901.0       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    651            675          30          1.5         650.8       1.0X
+Nested column                                      5398           5527         179          0.2        5397.9       0.1X
+Nested column in array                            10413          10685         310          0.1       10413.1       0.1X
+
+
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
index 765193d6c6436..0cbd855ec5e3a 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    127            163          24          7.9         127.1       1.0X
-Nested column                                       974           1023          39          1.0         974.2       0.1X
-Nested column in array                             4834           4857          23          0.2        4834.1       0.0X
+Top-level column                                    116            148          22          8.6         115.9       1.0X
+Nested column                                      1200           1221          23          0.8        1200.3       0.1X
+Nested column in array                             4786           4898         202          0.2        4785.6       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    454            488          45          2.2         454.3       1.0X
-Nested column                                      1539           1602          80          0.6        1539.3       0.3X
-Nested column in array                             5765           5848          69          0.2        5764.7       0.1X
+Top-level column                                    428            452          18          2.3         427.7       1.0X
+Nested column                                      1681           1725          53          0.6        1680.7       0.3X
+Nested column in array                             5652           5699          47          0.2        5651.9       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    365            395          58          2.7         364.9       1.0X
-Nested column                                      1456           1477          23          0.7        1456.0       0.3X
-Nested column in array                             5734           5842          91          0.2        5734.4       0.1X
+Top-level column                                    341            351          15          2.9         340.9       1.0X
+Nested column                                      1529           1559          31          0.7        1528.6       0.2X
+Nested column in array                             5468           5517          97          0.2        5468.2       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    373            387          15          2.7         372.8       1.0X
-Nested column                                      4349           4397          59          0.2        4348.8       0.1X
-Nested column in array                             8893           8971          73          0.1        8893.2       0.0X
+Top-level column                                    353            365          12          2.8         352.5       1.0X
+Nested column                                      4135           4171          23          0.2        4135.2       0.1X
+Nested column in array                             8766           8816          56          0.1        8766.0       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    130            159          24          7.7         129.9       1.0X
-Nested column                                      1160           1216          50          0.9        1159.8       0.1X
-Nested column in array                             5297           5420         176          0.2        5296.8       0.0X
+Top-level column                                    115            133          25          8.7         115.3       1.0X
+Nested column                                      1200           1216          13          0.8        1199.7       0.1X
+Nested column in array                             5296           5345          39          0.2        5296.3       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    585            615          60          1.7         585.5       1.0X
-Nested column                                      4972           5213         156          0.2        4972.2       0.1X
-Nested column in array                            10095          10156          32          0.1       10095.4       0.1X
+Top-level column                                    545            560          14          1.8         544.7       1.0X
+Nested column                                      4704           4813         184          0.2        4703.8       0.1X
+Nested column in array                             9785           9824          42          0.1        9784.8       0.1X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..31970672aacd4
--- /dev/null
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt
@@ -0,0 +1,53 @@
+================================================================================================
+Nested Schema Pruning Benchmark For ORC v2
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    176            207          20          5.7         176.2       1.0X
+Nested column                                      1173           1212          61          0.9        1173.2       0.2X
+Nested column in array                             5557           5628          58          0.2        5557.5       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    143            178          21          7.0         143.5       1.0X
+Nested column                                      1233           1272          31          0.8        1233.0       0.1X
+Nested column in array                             5585           5691          66          0.2        5585.4       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    395            416          23          2.5         395.4       1.0X
+Nested column                                      1665           1691          16          0.6        1664.7       0.2X
+Nested column in array                             6194           6307         183          0.2        6194.5       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    394            407          14          2.5         393.7       1.0X
+Nested column                                      4381           4536          97          0.2        4380.6       0.1X
+Nested column in array                             8797           8833          34          0.1        8796.7       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    136            170          24          7.4         135.6       1.0X
+Nested column                                      1135           1171          30          0.9        1135.0       0.1X
+Nested column in array                             4833           4911          92          0.2        4833.3       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    327            336           8          3.1         327.0       1.0X
+Nested column                                      3321           3377          56          0.3        3321.0       0.1X
+Nested column in array                             8080           8161          97          0.1        8080.4       0.0X
+
+
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
index fdd347f4bad9b..c7d8c8a541b59 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    122            161          29          8.2         121.9       1.0X
-Nested column                                      1255           1279          23          0.8        1255.4       0.1X
-Nested column in array                             5352           5393          37          0.2        5352.3       0.0X
+Top-level column                                    121            156          27          8.3         121.1       1.0X
+Nested column                                      1373           1406          37          0.7        1373.4       0.1X
+Nested column in array                             5545           5579          54          0.2        5544.8       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    132            162          32          7.6         131.8       1.0X
-Nested column                                      1246           1286          32          0.8        1245.6       0.1X
-Nested column in array                             5395           5542         143          0.2        5394.9       0.0X
+Top-level column                                    127            147          20          7.9         127.0       1.0X
+Nested column                                      1280           1328          32          0.8        1280.2       0.1X
+Nested column in array                             5617           5696          70          0.2        5617.0       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    385            403          20          2.6         385.4       1.0X
-Nested column                                      1663           1691          52          0.6        1663.2       0.2X
-Nested column in array                             6264           6335          73          0.2        6264.4       0.1X
+Top-level column                                    343            356          17          2.9         342.6       1.0X
+Nested column                                      1692           1710          14          0.6        1692.3       0.2X
+Nested column in array                             6128           6168          30          0.2        6128.0       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    392            422          58          2.5         392.2       1.0X
-Nested column                                      4104           4153          57          0.2        4104.0       0.1X
-Nested column in array                             8668           8748          55          0.1        8668.3       0.0X
+Top-level column                                    348            355          11          2.9         348.1       1.0X
+Nested column                                      4350           4392          35          0.2        4349.8       0.1X
+Nested column in array                             8864           8901          29          0.1        8864.1       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    130            146          22          7.7         130.1       1.0X
-Nested column                                      1127           1166          53          0.9        1127.3       0.1X
-Nested column in array                             4906           4968          40          0.2        4905.8       0.0X
+Top-level column                                    123            143          27          8.2         122.5       1.0X
+Nested column                                      1233           1295          29          0.8        1233.2       0.1X
+Nested column in array                             5534           5597          53          0.2        5533.7       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    291            308          25          3.4         290.5       1.0X
-Nested column                                      3016           3091          58          0.3        3016.0       0.1X
-Nested column in array                             7730           7821         140          0.1        7729.5       0.0X
+Top-level column                                    265            280          20          3.8         264.8       1.0X
+Nested column                                      3211           3263          96          0.3        3211.2       0.1X
+Nested column in array                             8324           8357          42          0.1        8323.6       0.0X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..f6135968bb97b
--- /dev/null
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt
@@ -0,0 +1,53 @@
+================================================================================================
+Nested Schema Pruning Benchmark For Parquet
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    150            218          36          6.6         150.5       1.0X
+Nested column                                       294            334          39          3.4         293.8       0.5X
+Nested column in array                              994           1024          34          1.0         994.3       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    161            183          13          6.2         160.9       1.0X
+Nested column                                       296            332          43          3.4         295.6       0.5X
+Nested column in array                             1048           1089          48          1.0        1047.9       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    393            411          16          2.5         393.1       1.0X
+Nested column                                       550            563           9          1.8         549.9       0.7X
+Nested column in array                             1468           1494          20          0.7        1467.5       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    404            419          15          2.5         404.2       1.0X
+Nested column                                      2833           2875          24          0.4        2832.8       0.1X
+Nested column in array                             3340           3396          53          0.3        3340.3       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    142            159          12          7.0         142.3       1.0X
+Nested column                                       308            318           8          3.2         307.8       0.5X
+Nested column in array                             1119           1154          32          0.9        1118.5       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    338            342           2          3.0         338.0       1.0X
+Nested column                                      1873           1971         129          0.5        1873.1       0.2X
+Nested column in array                             2708           2760          50          0.4        2707.9       0.1X
+
+
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
index 4e0c368b5370e..9f64e0425df8d 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    151            174          16          6.6         151.3       1.0X
-Nested column                                       316            375          88          3.2         315.7       0.5X
-Nested column in array                             1277           1292          11          0.8        1277.0       0.1X
+Top-level column                                    136            157          19          7.3         136.3       1.0X
+Nested column                                       254            267           8          3.9         254.3       0.5X
+Nested column in array                             1071           1089          18          0.9        1071.1       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    452            501          67          2.2         451.9       1.0X
-Nested column                                       664            722          77          1.5         664.4       0.7X
-Nested column in array                             1906           1997          91          0.5        1905.6       0.2X
+Top-level column                                    134            147          12          7.5         134.1       1.0X
+Nested column                                       288            295           5          3.5         287.7       0.5X
+Nested column in array                             1104           1135          35          0.9        1104.1       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    385            410          39          2.6         385.5       1.0X
-Nested column                                       612            620          10          1.6         611.9       0.6X
-Nested column in array                             1790           1845          80          0.6        1789.5       0.2X
+Top-level column                                    361            372          14          2.8         361.1       1.0X
+Nested column                                       522            535          16          1.9         521.8       0.7X
+Nested column in array                             1540           1553          11          0.6        1539.6       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    386            402          26          2.6         386.0       1.0X
-Nested column                                      2982           3057          64          0.3        2982.0       0.1X
-Nested column in array                             3504           3690         248          0.3        3503.7       0.1X
+Top-level column                                    375            384          11          2.7         374.6       1.0X
+Nested column                                      2686           2715          24          0.4        2686.2       0.1X
+Nested column in array                             3067           3080          13          0.3        3067.2       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    138            152          10          7.2         138.3       1.0X
-Nested column                                       345            369          16          2.9         344.8       0.4X
-Nested column in array                             1358           1405          50          0.7        1358.5       0.1X
+Top-level column                                    120            135           8          8.3         120.3       1.0X
+Nested column                                       280            290          13          3.6         279.9       0.4X
+Nested column in array                             1114           1143          29          0.9        1114.2       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    606            632          45          1.6         606.3       1.0X
-Nested column                                      3586           3679         107          0.3        3585.8       0.2X
-Nested column in array                             4452           4831         244          0.2        4451.8       0.1X
+Top-level column                                    263            277          18          3.8         263.0       1.0X
+Nested column                                      1724           1763          38          0.6        1724.1       0.2X
+Nested column in array                             2530           2605          65          0.4        2529.9       0.1X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..0e5a051d92c90
--- /dev/null
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+Write primitive arrays in dataset
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                 636            681          50         13.2          75.8       1.0X
+Double                                              727            809          71         11.5          86.7       0.9X
+
+
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
index b06b5c092b61a..837145e7c93a5 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
@@ -2,12 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
-
-Write an array in Dataset:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                            437 /  529         19.2          52.1       1.0X
-Double                                         638 /  670         13.1          76.1       0.7X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                 631            684          65         13.3          75.2       1.0X
+Double                                              792            854          61         10.6          94.4       0.8X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt b/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..44109e406ebb6
--- /dev/null
+++ b/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt
@@ -0,0 +1,15 @@
+================================================================================================
+range
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+full scan                                         11004          11558         944         47.6          21.0       1.0X
+limit after range                                   121            135          19       4320.4           0.2      90.7X
+filter after range                                 1529           1544          23        342.9           2.9       7.2X
+count after range                                   123            143          21       4268.5           0.2      89.6X
+count after limit after range                       115            123           6       4547.6           0.2      95.4X
+
+
diff --git a/sql/core/benchmarks/RangeBenchmark-results.txt b/sql/core/benchmarks/RangeBenchmark-results.txt
index 21766e0fd8664..e006785e7a2ca 100644
--- a/sql/core/benchmarks/RangeBenchmark-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-results.txt
@@ -2,15 +2,14 @@
 range
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_161-b12 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
-range:                                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-full scan                                   12674 / 12840         41.4          24.2       1.0X
-limit after range                               33 /   37      15900.2           0.1     384.4X
-filter after range                             969 /  985        541.0           1.8      13.1X
-count after range                               42 /   42      12510.5           0.1     302.4X
-count after limit after range                   32 /   33      16337.0           0.1     394.9X
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+full scan                                         11565          11703         217         45.3          22.1       1.0X
+limit after range                                    96            100           4       5455.9           0.2     120.3X
+filter after range                                 1426           1432          10        367.7           2.7       8.1X
+count after range                                    82             84           2       6412.8           0.2     141.5X
+count after limit after range                        72             76           3       7264.9           0.1     160.2X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-jdk11-results.txt b/sql/core/benchmarks/SortBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..a61989b174bf5
--- /dev/null
+++ b/sql/core/benchmarks/SortBenchmark-jdk11-results.txt
@@ -0,0 +1,16 @@
+================================================================================================
+radix sort
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+reference TimSort key prefix array                15588          15592           6          1.6         623.5       1.0X
+reference Arrays.sort                              3367           3377          14          7.4         134.7       4.6X
+radix sort one byte                                 436            447          10         57.3          17.5      35.7X
+radix sort two bytes                                875            885           9         28.6          35.0      17.8X
+radix sort eight bytes                             3409           3429          27          7.3         136.4       4.6X
+radix sort key prefix array                        5985           6058         104          4.2         239.4       2.6X
+
+
diff --git a/sql/core/benchmarks/SortBenchmark-results.txt b/sql/core/benchmarks/SortBenchmark-results.txt
index 0d00a0c89d02d..b321d1115b727 100644
--- a/sql/core/benchmarks/SortBenchmark-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-results.txt
@@ -2,16 +2,15 @@
 radix sort
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
-
-radix sort 25000000:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-reference TimSort key prefix array          11770 / 11960          2.1         470.8       1.0X
-reference Arrays.sort                         2106 / 2128         11.9          84.3       5.6X
-radix sort one byte                             93 /  100        269.7           3.7     126.9X
-radix sort two bytes                           171 /  179        146.0           6.9      68.7X
-radix sort eight bytes                         659 /  664         37.9          26.4      17.9X
-radix sort key prefix array                   1024 / 1053         24.4          41.0      11.5X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+reference TimSort key prefix array                15736          15778          59          1.6         629.4       1.0X
+reference Arrays.sort                              3051           3057          10          8.2         122.0       5.2X
+radix sort one byte                                 442            453          10         56.6          17.7      35.6X
+radix sort two bytes                                883            885           1         28.3          35.3      17.8X
+radix sort eight bytes                             3422           3440          26          7.3         136.9       4.6X
+radix sort key prefix array                        6025           6138         159          4.1         241.0       2.6X
 
 
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk11-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..0e1a6d504da0b
--- /dev/null
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk11-results.txt
@@ -0,0 +1,810 @@
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q1                                                 1460           1941         680          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q2                                                 2422           2665         344          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q3                                                  566            578           8          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q4                                                15396          15718         456          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q5                                                 3251           3670         592          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q6                                                 2139           2232         131          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q7                                                 1285           1365         113          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q8                                                 1038           1085          67          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q9                                                 2709           2729          28          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q10                                                5975           6075         140          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q11                                                3569           4018         635          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q12                                                 503            541          45          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q13                                                2950           3044         132          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14a                                              24716          25725        1427          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14b                                              20165          20747         822          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q15                                                 837            896          55          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q16                                                2124           2190          93          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q17                                                3178           3351         245          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q18                                                2523           2653         184          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q19                                                 818            876          53          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q20                                                 513            521           7          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q21                                                1458           1496          54          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q22                                                4247           4364         166          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q23a                                              15449          15516          95          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q23b                                              18832          19116         401          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q24a                                               3190           3852         937          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q24b                                               3326           3374          68          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q25                                                3145           3174          40          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q26                                                 852            879          25          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q27                                                1392           1393           1          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q28                                                3913           3932          27          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q29                                                3145           3199          77          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q30                                                1263           1304          57          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q31                                                2182           2520         479          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q32                                                 641            652          14          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q33                                                1779           1971         272          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q34                                                 828            841          11          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q35                                                5293           5346          75          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q36                                                1222           1226           6          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q37                                                1432           1460          39          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q38                                                1927           2083         221          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q39a                                               2762           2911         210          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q39b                                               2710           2841         186          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q40                                                 732            801          64          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q41                                                 412            466          50          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q42                                                 424            444          22          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q43                                                 673            683          11          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q44                                                1325           1340          21          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q45                                                 552            618          52          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q46                                                1097           1124          39          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q47                                                4876           5064         266          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q48                                                2709           2734          35          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q49                                                2172           2361         267          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q50                                                1467           1516          69          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q51                                                4681           4847         234          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q52                                                 423            443          27          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q53                                                 723            741          18          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q54                                                3656           3675          27          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q55                                                 416            450          31          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q56                                                1552           1585          47          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q57                                                2971           3246         388          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q58                                                1843           2035         271          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q59                                                2128           2177          69          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q60                                                1478           1498          27          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q61                                                1471           1574         145          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q62                                                 539            550          11          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q63                                                 738            751          23          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q64                                                8378           9105        1028          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q65                                                1642           1685          61          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q66                                                2038           2056          24          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q67                                               10208          10302         133          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q68                                                1021           1050          42          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q69                                                5244           5326         116          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q70                                                1441           1448           9          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q71                                                1230           1240          13          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q72                                               21418          22601        1674          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q73                                                 779            783           3          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q74                                                2947           3332         545          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q75                                                5149           5374         317          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q76                                                 969            974           5          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q77                                                1928           2256         464          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q78                                                4871           5152         397          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q79                                                 906            958          57          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q80                                                3756           4051         417          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q81                                                1124           1152          40          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q82                                                1954           1981          39          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q83                                                1150           1159          13          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q84                                                1301           1333          46          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q85                                                4009           4176         235          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q86                                                 657            678          25          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q87                                                2230           2470         339          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q88                                                2772           2959         265          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q89                                                 819            856          40          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q90                                                 436            445           8          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q91                                                 753            837          73          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q92                                                 532            557          27          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q93                                                1280           1304          34          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q94                                                1034           1072          53          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q95                                                6223           6526         429          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q96                                                 392            399           7          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q97                                                1845           1932         124          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q98                                                 607            643          26          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q99                                                 650            689          51          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q5a-v2.7                                           5008           5199         270          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q6-v2.7                                            1873           1930          80          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q10a-v2.7                                          4496           4505          13          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q11-v2.7                                           3597           3918         454          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q12-v2.7                                            405            432          22          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14-v2.7                                          18204          18604         565          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14a-v2.7                                        116778         117402         883          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q18a-v2.7                                          4616           4850         331          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q20-v2.7                                            449            487          46          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q22-v2.7                                          20882          20987         149          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q22a-v2.7                                         10305          10646         483          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q24-v2.7                                           2843           3091         350          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q27a-v2.7                                          2733           2857         177          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q34-v2.7                                            784            834          43          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q35-v2.7                                           4648           4900         358          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q35a-v2.7                                          4225           4350         177          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q36a-v2.7                                          2651           2830         253          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q47-v2.7                                           4884           5022         195          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q49-v2.7                                           2126           2311         262          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q51a-v2.7                                         30262          30597         474          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q57-v2.7                                           2962           3086         176          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q64-v2.7                                           8345           8680         474          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q67a-v2.7                                         18924          19300         532          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q70a-v2.7                                          2575           2677         144          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q72-v2.7                                          21513          22533        1442          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q74-v2.7                                           2891           3182         411          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q75-v2.7                                           5079           5308         324          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q77a-v2.7                                          4166           4419         357          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q78-v2.7                                           4992           5258         376          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q80a-v2.7                                          6027           6309         399          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q86a-v2.7                                          1475           1639         232          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q98-v2.7                                            596            626          34          0.0      Infinity       1.0X
+
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
new file mode 100644
index 0000000000000..8228e191ec487
--- /dev/null
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
@@ -0,0 +1,810 @@
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q1                                                 1626           1675          69          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q2                                                 2166           2277         158          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q3                                                  465            505          65          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q4                                                15108          15662         784          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q5                                                 3087           3281         274          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q6                                                 1780           1873         132          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q7                                                 1103           1137          49          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q8                                                  998           1019          30          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q9                                                 2445           2463          25          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q10                                                4853           5233         537          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q11                                                3370           3417          67          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q12                                                 442            538          67          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q13                                                2589           2767         253          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14a                                              23714          24391         957          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14b                                              19056          19103          66          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q15                                                 771            797          24          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q16                                                1658           1707          69          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q17                                                2905           2979         104          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q18                                                2272           2423         213          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q19                                                 707            730          35          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q20                                                 449            506          42          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q21                                                1154           1167          19          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q22                                                4056           4476         594          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q23a                                              14557          14780         317          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q23b                                              17887          18451         799          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q24a                                               2930           3193         372          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q24b                                               2760           2958         280          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q25                                                2913           3150         335          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q26                                                 810            819          15          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q27                                                1171           1217          65          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q28                                                3212           3273          86          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q29                                                2917           3107         270          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q30                                                1248           1277          40          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q31                                                1924           2091         237          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q32                                                 559            597          26          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q33                                                1314           1325          16          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q34                                                 761            773          11          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q35                                                4967           4984          24          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q36                                                1109           1116           9          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q37                                                1200           1234          48          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q38                                                1898           2035         194          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q39a                                               2252           2362         155          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q39b                                               2142           2248         150          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q40                                                 654            687          57          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q41                                                 383            448          51          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q42                                                 358            383          21          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q43                                                 577            619          37          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q44                                                1188           1234          65          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q45                                                 529            562          25          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q46                                                 993           1023          42          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q47                                                4547           4741         274          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q48                                                2303           2426         174          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q49                                                2080           2086           9          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q50                                                1371           1388          24          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q51                                                4373           4513         197          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q52                                                 360            379          28          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q53                                                 661            677          23          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q54                                                3454           3611         222          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q55                                                 360            383          37          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q56                                                1350           1388          53          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q57                                                2910           3156         349          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q58                                                1733           1762          42          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q59                                                2021           2044          33          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q60                                                1356           1404          67          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q61                                                1290           1292           3          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q62                                                 479            506          33          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q63                                                 619            647          20          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q64                                                7745           8352         859          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q65                                                1307           1337          43          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q66                                                1879           2128         352          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q67                                                9682           9703          29          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q68                                                 928            952          34          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q69                                                4261           4330          97          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q70                                                1345           1361          23          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q71                                                1103           1119          22          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q72                                               20211          21316        1562          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q73                                                 680            725          49          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q74                                                3007           3109         144          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q75                                                4597           4942         487          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q76                                                 899            937          38          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q77                                                1794           2086         412          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q78                                                4717           4827         155          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q79                                                 830            880          47          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q80                                                3233           3315         116          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q81                                                 982           1070         123          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q82                                                1674           1738          89          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q83                                                1067           1104          52          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q84                                                1166           1210          62          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q85                                                3682           3831         211          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q86                                                 616            635          18          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q87                                                2101           2230         183          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q88                                                2415           2523         153          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q89                                                 677            732          47          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q90                                                 414            429          19          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q91                                                 793            814          19          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q92                                                 508            530          24          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q93                                                1138           1155          24          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q94                                                 979           1060         115          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q95                                                5805           6024         310          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q96                                                 337            345          10          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q97                                                1641           1748         152          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q98                                                 538            587          61          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q99                                                 619            633          19          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q5a-v2.7                                           4861           4954         132          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q6-v2.7                                            1798           1861          89          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q10a-v2.7                                          4093           4209         164          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q11-v2.7                                           3336           3404          96          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q12-v2.7                                            380            408          30          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14-v2.7                                          17331          17776         629          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14a-v2.7                                        111982         112268         404          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q18a-v2.7                                          4063           4659         843          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q20-v2.7                                            420            446          29          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q22-v2.7                                          18976          19164         265          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q22a-v2.7                                          9087           9281         275          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q24-v2.7                                           2817           2834          24          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q27a-v2.7                                          2301           2401         141          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q34-v2.7                                            700            731          48          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q35-v2.7                                           4158           4513         503          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q35a-v2.7                                          3904           3979         106          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q36a-v2.7                                          2430           2534         147          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q47-v2.7                                           4502           4808         433          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q49-v2.7                                           1904           2159         360          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q51a-v2.7                                         27939          28264         460          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q57-v2.7                                           2813           2981         237          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q64-v2.7                                           8413           8612         282          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q67a-v2.7                                         17696          17858         230          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q70a-v2.7                                          2511           2562          71          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q72-v2.7                                          20209          22083        2650          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q74-v2.7                                           2870           2912          60          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q75-v2.7                                           4534           4870         475          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q77a-v2.7                                          4010           4285         388          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q78-v2.7                                           4879           4969         127          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q80a-v2.7                                          5338           5728         552          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q86a-v2.7                                          1370           1391          29          0.0      Infinity       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q98-v2.7                                            577            612          31          0.0      Infinity       1.0X
+
diff --git a/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt b/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..79daf73a95bb4
--- /dev/null
+++ b/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt
@@ -0,0 +1,59 @@
+================================================================================================
+UDF with mixed input types
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int/string to string:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int/string to string wholestage off            279            320          58          0.4        2789.3       1.0X
+long/nullable int/string to string wholestage on            182            188           7          0.6        1818.0       1.5X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int/string to option:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int/string to option wholestage off            133            137           5          0.8        1332.3       1.0X
+long/nullable int/string to option wholestage on            110            118           8          0.9        1097.3       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int/string to primitive:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int/string to primitive wholestage off             69             83          19          1.4         694.7       1.0X
+long/nullable int/string to primitive wholestage on             66             81          18          1.5         657.5       1.1X
+
+
+================================================================================================
+UDF with primitive types
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int to string:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int to string wholestage off             76             80           5          1.3         761.5       1.0X
+long/nullable int to string wholestage on             65             67           2          1.5         654.2       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int to option:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int to option wholestage off             49             50           2          2.0         487.9       1.0X
+long/nullable int to option wholestage on             50             66          12          2.0         503.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int to primitive:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int to primitive wholestage off             48             58          14          2.1         483.5       1.0X
+long/nullable int to primitive wholestage on             46             52           6          2.2         456.8       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Baseline                                             39             41           3          2.6         385.8       1.0X
+With identity UDF                                    44             46           3          2.3         444.4       0.9X
+
+
diff --git a/sql/core/benchmarks/UDFBenchmark-results.txt b/sql/core/benchmarks/UDFBenchmark-results.txt
index 3dfd0c1caeb28..3f17a999094a3 100644
--- a/sql/core/benchmarks/UDFBenchmark-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int/string to string:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off            194            248          76          0,5        1941,4       1,0X
-long/nullable int/string to string wholestage on            127            136           8          0,8        1269,5       1,5X
+long/nullable int/string to string wholestage off            250            327         108          0.4        2500.6       1.0X
+long/nullable int/string to string wholestage on            142            157          16          0.7        1421.2       1.8X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int/string to option:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             91             97           8          1,1         910,1       1,0X
-long/nullable int/string to option wholestage on             60             79          29          1,7         603,8       1,5X
+long/nullable int/string to option wholestage off            124            125           2          0.8        1237.8       1.0X
+long/nullable int/string to option wholestage on             73             93          27          1.4         730.1       1.7X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int/string to primitive:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             55             63          12          1,8         547,9       1,0X
-long/nullable int/string to primitive wholestage on             43             44           2          2,3         428,0       1,3X
+long/nullable int/string to primitive wholestage off             66             69           4          1.5         658.8       1.0X
+long/nullable int/string to primitive wholestage on             61             67          11          1.6         611.7       1.1X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int to string:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             46             48           2          2,2         461,2       1,0X
-long/nullable int to string wholestage on             49             56           8          2,0         488,9       0,9X
+long/nullable int to string wholestage off             66             67           0          1.5         663.9       1.0X
+long/nullable int to string wholestage on             66             68           2          1.5         664.6       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int to option:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             41             47           9          2,4         408,2       1,0X
-long/nullable int to option wholestage on             26             28           2          3,9         256,7       1,6X
+long/nullable int to option wholestage off             40             42           3          2.5         402.6       1.0X
+long/nullable int to option wholestage on             40             42           2          2.5         401.3       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int to primitive:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             26             27           0          3,8         263,7       1,0X
-long/nullable int to primitive wholestage on             26             31           5          3,8         262,2       1,0X
+long/nullable int to primitive wholestage off             38             39           0          2.6         384.8       1.0X
+long/nullable int to primitive wholestage on             39             45           5          2.5         392.6       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                             20             22           1          4,9         204,3       1,0X
-With identity UDF                                    24             26           2          4,1         241,3       0,8X
+Baseline                                             32             33           1          3.1         320.8       1.0X
+With identity UDF                                    37             40           6          2.7         369.1       0.9X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..577cdb0db62b3
--- /dev/null
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt
@@ -0,0 +1,33 @@
+================================================================================================
+Benchmark UnsafeArrayData
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                 183            184           0        914.7           1.1       1.0X
+Double                                              188            189           0        891.1           1.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  31             32           1        670.0           1.5       1.0X
+Double                                               70             71           1        300.3           3.3       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  71             73           3        887.3           1.1       1.0X
+Double                                              144            151           6        438.2           2.3       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  69             70           1        906.7           1.1       1.0X
+Double                                              140            142           1        448.8           2.2       0.5X
+
+
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
index 4ecc1f1fad4b9..6c4669d7ea442 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Read UnsafeArrayData:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                            233 /  234        718.6           1.4       1.0X
-Double                                         244 /  244        687.0           1.5       1.0X
+Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                 183            183           0        919.0           1.1       1.0X
+Double                                              192            196           4        872.4           1.1       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Write UnsafeArrayData:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                             32 /   33        658.6           1.5       1.0X
-Double                                          73 /   75        287.0           3.5       0.4X
+Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  33             36           3        639.3           1.6       1.0X
+Double                                               76             79           2        274.6           3.6       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Get primitive array from UnsafeArrayData: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                             70 /   72        895.0           1.1       1.0X
-Double                                         141 /  143        446.9           2.2       0.5X
+Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  71             76           4        890.8           1.1       1.0X
+Double                                              147            151           6        427.4           2.3       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Create UnsafeArrayData from primitive array: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                             72 /   73        874.7           1.1       1.0X
-Double                                         145 /  146        433.7           2.3       0.5X
+Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  73             75           3        866.8           1.2       1.0X
+Double                                              148            150           1        424.8           2.4       0.5X
 
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-jdk11-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..19ce183820c05
--- /dev/null
+++ b/sql/core/benchmarks/WideSchemaBenchmark-jdk11-results.txt
@@ -0,0 +1,145 @@
+================================================================================================
+parsing large select expressions
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 select expressions                                  8             15           8          0.0     8028037.0       1.0X
+100 select expressions                               15             18           3          0.0    14899892.0       0.5X
+2500 select expressions                             237            243           8          0.0   237252523.0       0.0X
+
+
+================================================================================================
+many column field read and write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 cols x 100000 rows (read in-mem)                   59             72           8          1.7         591.0       1.0X
+1 cols x 100000 rows (exec in-mem)                   57             81          15          1.8         566.0       1.0X
+1 cols x 100000 rows (read parquet)                  61             78          13          1.6         614.8       1.0X
+1 cols x 100000 rows (write parquet)                147            158          10          0.7        1468.5       0.4X
+100 cols x 1000 rows (read in-mem)                   57             62           6          1.8         565.8       1.0X
+100 cols x 1000 rows (exec in-mem)                   76             83          10          1.3         758.7       0.8X
+100 cols x 1000 rows (read parquet)                  70             79          10          1.4         700.8       0.8X
+100 cols x 1000 rows (write parquet)                150            162          11          0.7        1498.8       0.4X
+2500 cols x 40 rows (read in-mem)                   413            424          15          0.2        4134.4       0.1X
+2500 cols x 40 rows (exec in-mem)                   753            772          23          0.1        7528.2       0.1X
+2500 cols x 40 rows (read parquet)                  304            312           8          0.3        3044.6       0.2X
+2500 cols x 40 rows (write parquet)                 507            520          11          0.2        5069.3       0.1X
+
+
+================================================================================================
+wide shallowly nested struct field read and write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 wide x 100000 rows (read in-mem)                   54             63           8          1.8         540.7       1.0X
+1 wide x 100000 rows (exec in-mem)                   67             77          11          1.5         671.8       0.8X
+1 wide x 100000 rows (read parquet)                  90             97           6          1.1         901.2       0.6X
+1 wide x 100000 rows (write parquet)                150            163          11          0.7        1503.9       0.4X
+100 wide x 1000 rows (read in-mem)                   69             75           8          1.4         689.8       0.8X
+100 wide x 1000 rows (exec in-mem)                  111            148          96          0.9        1111.5       0.5X
+100 wide x 1000 rows (read parquet)                 181            241          35          0.6        1808.7       0.3X
+100 wide x 1000 rows (write parquet)                164            180          27          0.6        1636.1       0.3X
+2500 wide x 40 rows (read in-mem)                    78            101          84          1.3         781.0       0.7X
+2500 wide x 40 rows (exec in-mem)                   943            966          37          0.1        9430.9       0.1X
+2500 wide x 40 rows (read parquet)                 1385           1453          95          0.1       13853.3       0.0X
+2500 wide x 40 rows (write parquet)                 175            190          19          0.6        1745.5       0.3X
+
+
+================================================================================================
+deeply nested struct field read and write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 deep x 100000 rows (read in-mem)                   44             49           6          2.3         441.1       1.0X
+1 deep x 100000 rows (exec in-mem)                   54             59           6          1.9         536.4       0.8X
+1 deep x 100000 rows (read parquet)                  65             68           6          1.5         646.1       0.7X
+1 deep x 100000 rows (write parquet)                141            147           9          0.7        1413.9       0.3X
+100 deep x 1000 rows (read in-mem)                  459            470          11          0.2        4592.9       0.1X
+100 deep x 1000 rows (exec in-mem)                 1736           1740           6          0.1       17355.1       0.0X
+100 deep x 1000 rows (read parquet)                1638           1643           6          0.1       16382.2       0.0X
+100 deep x 1000 rows (write parquet)                555            567          12          0.2        5548.4       0.1X
+250 deep x 400 rows (read in-mem)                  2556           2556           1          0.0       25558.5       0.0X
+250 deep x 400 rows (exec in-mem)                 10410          10416           8          0.0      104102.6       0.0X
+250 deep x 400 rows (read parquet)                 9670           9688          26          0.0       96699.1       0.0X
+250 deep x 400 rows (write parquet)                2638           2642           5          0.0       26379.7       0.0X
+
+
+================================================================================================
+bushy struct field read and write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+bushy struct field r/w:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 x 1 deep x 100000 rows (read in-mem)               39             44           6          2.6         388.2       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)               48             50           4          2.1         477.4       0.8X
+1 x 1 deep x 100000 rows (read parquet)              47             54           9          2.1         466.1       0.8X
+1 x 1 deep x 100000 rows (write parquet)            135            141           5          0.7        1350.5       0.3X
+128 x 8 deep x 1000 rows (read in-mem)               45             53           9          2.2         445.2       0.9X
+128 x 8 deep x 1000 rows (exec in-mem)              155            160           4          0.6        1553.0       0.2X
+128 x 8 deep x 1000 rows (read parquet)             173            217          31          0.6        1729.8       0.2X
+128 x 8 deep x 1000 rows (write parquet)            139            154          10          0.7        1389.9       0.3X
+1024 x 11 deep x 100 rows (read in-mem)              73             77           4          1.4         730.2       0.5X
+1024 x 11 deep x 100 rows (exec in-mem)             733            738           8          0.1        7326.1       0.1X
+1024 x 11 deep x 100 rows (read parquet)            652            660           8          0.2        6517.6       0.1X
+1024 x 11 deep x 100 rows (write parquet)            171            186          20          0.6        1706.4       0.2X
+
+
+================================================================================================
+wide array field read and write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 wide x 100000 rows (read in-mem)                   43             46           4          2.3         429.7       1.0X
+1 wide x 100000 rows (exec in-mem)                   54             57           4          1.8         542.4       0.8X
+1 wide x 100000 rows (read parquet)                  82             87           8          1.2         816.6       0.5X
+1 wide x 100000 rows (write parquet)                137            159          19          0.7        1374.9       0.3X
+100 wide x 1000 rows (read in-mem)                   37             39           4          2.7         367.1       1.2X
+100 wide x 1000 rows (exec in-mem)                   45             50           6          2.2         451.6       1.0X
+100 wide x 1000 rows (read parquet)                  52             57           5          1.9         520.8       0.8X
+100 wide x 1000 rows (write parquet)                125            131           8          0.8        1247.0       0.3X
+2500 wide x 40 rows (read in-mem)                    35             39           4          2.9         348.8       1.2X
+2500 wide x 40 rows (exec in-mem)                    46             49           5          2.2         456.0       0.9X
+2500 wide x 40 rows (read parquet)                   51             55           6          2.0         508.3       0.8X
+2500 wide x 40 rows (write parquet)                 129            135           6          0.8        1287.3       0.3X
+
+
+================================================================================================
+wide map field read and write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 wide x 100000 rows (read in-mem)                   39             48           9          2.5         394.2       1.0X
+1 wide x 100000 rows (exec in-mem)                   51             56           9          1.9         514.4       0.8X
+1 wide x 100000 rows (read parquet)                 119            124           7          0.8        1195.0       0.3X
+1 wide x 100000 rows (write parquet)                130            138           8          0.8        1299.8       0.3X
+100 wide x 1000 rows (read in-mem)                   31             32           3          3.3         306.5       1.3X
+100 wide x 1000 rows (exec in-mem)                   40             42           3          2.5         402.7       1.0X
+100 wide x 1000 rows (read parquet)                  65             70           6          1.5         651.8       0.6X
+100 wide x 1000 rows (write parquet)                123            129           6          0.8        1228.5       0.3X
+2500 wide x 40 rows (read in-mem)                    33             37           6          3.0         330.1       1.2X
+2500 wide x 40 rows (exec in-mem)                    43             44           3          2.3         426.6       0.9X
+2500 wide x 40 rows (read parquet)                   66             69           9          1.5         657.8       0.6X
+2500 wide x 40 rows (write parquet)                 123            127           2          0.8        1234.4       0.3X
+
+
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
index 6347a6ac6b67c..59eb181f6b115 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
@@ -2,144 +2,144 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-parsing large select:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-1 select expressions                             6 /   13          0.0     5997373.0       1.0X
-100 select expressions                           7 /   10          0.0     7204596.0       0.8X
-2500 select expressions                        103 /  107          0.0   102962705.0       0.1X
+parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 select expressions                                  5             13           8          0.0     5370143.0       1.0X
+100 select expressions                               12             16           6          0.0    11995425.0       0.4X
+2500 select expressions                             211            214           4          0.0   210927791.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-many column field r/w:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)              40 /   51          2.5         396.5       1.0X
-1 cols x 100000 rows (exec in-mem)              41 /   48          2.4         414.4       1.0X
-1 cols x 100000 rows (read parquet)             61 /   70          1.6         610.2       0.6X
-1 cols x 100000 rows (write parquet)           209 /  233          0.5        2086.1       0.2X
-100 cols x 1000 rows (read in-mem)              43 /   49          2.3         433.8       0.9X
-100 cols x 1000 rows (exec in-mem)              57 /   66          1.8         568.4       0.7X
-100 cols x 1000 rows (read parquet)             60 /   66          1.7         599.0       0.7X
-100 cols x 1000 rows (write parquet)           212 /  224          0.5        2120.6       0.2X
-2500 cols x 40 rows (read in-mem)              268 /  275          0.4        2676.5       0.1X
-2500 cols x 40 rows (exec in-mem)              494 /  504          0.2        4936.9       0.1X
-2500 cols x 40 rows (read parquet)             132 /  139          0.8        1319.7       0.3X
-2500 cols x 40 rows (write parquet)            371 /  381          0.3        3710.1       0.1X
+many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 cols x 100000 rows (read in-mem)                   44             53           6          2.3         440.3       1.0X
+1 cols x 100000 rows (exec in-mem)                   44             54           9          2.3         437.0       1.0X
+1 cols x 100000 rows (read parquet)                  53             61          10          1.9         532.4       0.8X
+1 cols x 100000 rows (write parquet)                129            142          36          0.8        1291.6       0.3X
+100 cols x 1000 rows (read in-mem)                   49             55           7          2.0         494.9       0.9X
+100 cols x 1000 rows (exec in-mem)                   69             73           5          1.4         693.2       0.6X
+100 cols x 1000 rows (read parquet)                  60             67           8          1.7         596.3       0.7X
+100 cols x 1000 rows (write parquet)                142            156          31          0.7        1417.8       0.3X
+2500 cols x 40 rows (read in-mem)                   391            399          13          0.3        3912.6       0.1X
+2500 cols x 40 rows (exec in-mem)                   743            749           8          0.1        7432.5       0.1X
+2500 cols x 40 rows (read parquet)                  297            310          10          0.3        2972.8       0.1X
+2500 cols x 40 rows (write parquet)                 485            492          16          0.2        4848.1       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-wide shallowly nested struct field r/w:  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)              37 /   43          2.7         373.6       1.0X
-1 wide x 100000 rows (exec in-mem)              47 /   54          2.1         472.7       0.8X
-1 wide x 100000 rows (read parquet)            132 /  145          0.8        1316.5       0.3X
-1 wide x 100000 rows (write parquet)           205 /  232          0.5        2046.3       0.2X
-100 wide x 1000 rows (read in-mem)              68 /   79          1.5         676.3       0.6X
-100 wide x 1000 rows (exec in-mem)              88 /   97          1.1         882.2       0.4X
-100 wide x 1000 rows (read parquet)            197 /  234          0.5        1971.8       0.2X
-100 wide x 1000 rows (write parquet)           236 /  249          0.4        2359.6       0.2X
-2500 wide x 40 rows (read in-mem)               77 /   85          1.3         768.0       0.5X
-2500 wide x 40 rows (exec in-mem)              386 /  393          0.3        3855.2       0.1X
-2500 wide x 40 rows (read parquet)            1741 / 1765          0.1       17408.3       0.0X
-2500 wide x 40 rows (write parquet)            243 /  256          0.4        2425.2       0.2X
+wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 wide x 100000 rows (read in-mem)                   43             48           6          2.3         427.0       1.0X
+1 wide x 100000 rows (exec in-mem)                   56             63           8          1.8         557.8       0.8X
+1 wide x 100000 rows (read parquet)                  82             88          10          1.2         818.6       0.5X
+1 wide x 100000 rows (write parquet)                134            145          21          0.7        1344.6       0.3X
+100 wide x 1000 rows (read in-mem)                   55             61          16          1.8         553.1       0.8X
+100 wide x 1000 rows (exec in-mem)                   94            101          17          1.1         941.4       0.5X
+100 wide x 1000 rows (read parquet)                 151            179          29          0.7        1511.7       0.3X
+100 wide x 1000 rows (write parquet)                147            157           9          0.7        1470.0       0.3X
+2500 wide x 40 rows (read in-mem)                    66             69           9          1.5         658.9       0.6X
+2500 wide x 40 rows (exec in-mem)                   853            871          30          0.1        8525.7       0.1X
+2500 wide x 40 rows (read parquet)                 1158           1296         195          0.1       11577.8       0.0X
+2500 wide x 40 rows (write parquet)                 157            173          23          0.6        1569.6       0.3X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-deeply nested struct field r/w:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)              35 /   42          2.9         350.2       1.0X
-1 deep x 100000 rows (exec in-mem)              40 /   43          2.5         399.5       0.9X
-1 deep x 100000 rows (read parquet)             69 /   73          1.4         691.6       0.5X
-1 deep x 100000 rows (write parquet)           203 /  224          0.5        2025.9       0.2X
-100 deep x 1000 rows (read in-mem)              70 /   75          1.4         703.7       0.5X
-100 deep x 1000 rows (exec in-mem)             654 /  684          0.2        6539.9       0.1X
-100 deep x 1000 rows (read parquet)         10503 / 10550          0.0      105030.5       0.0X
-100 deep x 1000 rows (write parquet)           235 /  243          0.4        2353.2       0.1X
-250 deep x 400 rows (read in-mem)              249 /  259          0.4        2492.6       0.1X
-250 deep x 400 rows (exec in-mem)             3842 / 3854          0.0       38424.8       0.0X
-250 deep x 400 rows (read parquet)        153080 / 153444          0.0     1530796.1       0.0X
-250 deep x 400 rows (write parquet)            434 /  441          0.2        4344.6       0.1X
+deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 deep x 100000 rows (read in-mem)                   37             41           6          2.7         374.5       1.0X
+1 deep x 100000 rows (exec in-mem)                   47             50           6          2.1         466.9       0.8X
+1 deep x 100000 rows (read parquet)                  58             61           7          1.7         577.7       0.6X
+1 deep x 100000 rows (write parquet)                128            134          18          0.8        1282.2       0.3X
+100 deep x 1000 rows (read in-mem)                  345            350           5          0.3        3447.8       0.1X
+100 deep x 1000 rows (exec in-mem)                 1283           1283           0          0.1       12830.5       0.0X
+100 deep x 1000 rows (read parquet)                1201           1205           7          0.1       12005.2       0.0X
+100 deep x 1000 rows (write parquet)                436            443           9          0.2        4361.4       0.1X
+250 deep x 400 rows (read in-mem)                  1882           1883           1          0.1       18819.9       0.0X
+250 deep x 400 rows (exec in-mem)                  7705           7709           5          0.0       77054.4       0.0X
+250 deep x 400 rows (read parquet)                 7052           7087          50          0.0       70517.1       0.0X
+250 deep x 400 rows (write parquet)                1978           1979           1          0.1       19780.3       0.0X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-bushy struct field r/w:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)          37 /   42          2.7         370.2       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)          43 /   47          2.4         425.3       0.9X
-1 x 1 deep x 100000 rows (read parquet)         48 /   51          2.1         478.7       0.8X
-1 x 1 deep x 100000 rows (write parquet)       204 /  215          0.5        2042.0       0.2X
-128 x 8 deep x 1000 rows (read in-mem)          32 /   37          3.1         318.6       1.2X
-128 x 8 deep x 1000 rows (exec in-mem)          91 /   96          1.1         906.6       0.4X
-128 x 8 deep x 1000 rows (read parquet)        351 /  379          0.3        3510.3       0.1X
-128 x 8 deep x 1000 rows (write parquet)       199 /  203          0.5        1988.3       0.2X
-1024 x 11 deep x 100 rows (read in-mem)         73 /   76          1.4         730.4       0.5X
-1024 x 11 deep x 100 rows (exec in-mem)        327 /  334          0.3        3267.2       0.1X
-1024 x 11 deep x 100 rows (read parquet)      2063 / 2078          0.0       20629.2       0.0X
-1024 x 11 deep x 100 rows (write parquet)       248 /  266          0.4        2475.1       0.1X
+bushy struct field r/w:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 x 1 deep x 100000 rows (read in-mem)               34             39           7          2.9         341.5       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)               42             45           5          2.4         423.4       0.8X
+1 x 1 deep x 100000 rows (read parquet)              42             45           6          2.4         423.8       0.8X
+1 x 1 deep x 100000 rows (write parquet)            124            132          19          0.8        1240.4       0.3X
+128 x 8 deep x 1000 rows (read in-mem)               39             42           6          2.6         387.3       0.9X
+128 x 8 deep x 1000 rows (exec in-mem)              134            138           6          0.7        1342.5       0.3X
+128 x 8 deep x 1000 rows (read parquet)             147            164          27          0.7        1468.2       0.2X
+128 x 8 deep x 1000 rows (write parquet)            130            142          34          0.8        1297.7       0.3X
+1024 x 11 deep x 100 rows (read in-mem)              64             68          11          1.6         639.3       0.5X
+1024 x 11 deep x 100 rows (exec in-mem)             642            652          14          0.2        6416.9       0.1X
+1024 x 11 deep x 100 rows (read parquet)            527            531           5          0.2        5268.1       0.1X
+1024 x 11 deep x 100 rows (write parquet)            155            166          28          0.6        1545.0       0.2X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-wide array field r/w:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)              33 /   38          3.0         328.4       1.0X
-1 wide x 100000 rows (exec in-mem)              40 /   44          2.5         402.7       0.8X
-1 wide x 100000 rows (read parquet)             83 /   91          1.2         826.6       0.4X
-1 wide x 100000 rows (write parquet)           204 /  218          0.5        2039.1       0.2X
-100 wide x 1000 rows (read in-mem)              28 /   31          3.6         277.2       1.2X
-100 wide x 1000 rows (exec in-mem)              34 /   37          2.9         343.2       1.0X
-100 wide x 1000 rows (read parquet)             56 /   61          1.8         556.4       0.6X
-100 wide x 1000 rows (write parquet)           202 /  206          0.5        2017.3       0.2X
-2500 wide x 40 rows (read in-mem)               29 /   30          3.5         286.4       1.1X
-2500 wide x 40 rows (exec in-mem)               33 /   39          3.0         330.2       1.0X
-2500 wide x 40 rows (read parquet)              54 /   66          1.8         544.0       0.6X
-2500 wide x 40 rows (write parquet)            196 /  208          0.5        1959.2       0.2X
+wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 wide x 100000 rows (read in-mem)                   36             39           5          2.7         364.2       1.0X
+1 wide x 100000 rows (exec in-mem)                   46             50           7          2.2         460.4       0.8X
+1 wide x 100000 rows (read parquet)                  75             78           8          1.3         749.8       0.5X
+1 wide x 100000 rows (write parquet)                127            133          19          0.8        1266.0       0.3X
+100 wide x 1000 rows (read in-mem)                   31             33           4          3.2         309.9       1.2X
+100 wide x 1000 rows (exec in-mem)                   40             42           4          2.5         397.3       0.9X
+100 wide x 1000 rows (read parquet)                  49             52           7          2.0         488.6       0.7X
+100 wide x 1000 rows (write parquet)                122            135          23          0.8        1216.2       0.3X
+2500 wide x 40 rows (read in-mem)                    31             32           3          3.3         305.7       1.2X
+2500 wide x 40 rows (exec in-mem)                    39             42           5          2.6         391.9       0.9X
+2500 wide x 40 rows (read parquet)                   48             51           7          2.1         482.9       0.8X
+2500 wide x 40 rows (write parquet)                 120            130          22          0.8        1203.6       0.3X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-wide map field r/w:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)              31 /   34          3.3         305.7       1.0X
-1 wide x 100000 rows (exec in-mem)              39 /   44          2.6         390.0       0.8X
-1 wide x 100000 rows (read parquet)            125 /  132          0.8        1250.5       0.2X
-1 wide x 100000 rows (write parquet)           198 /  213          0.5        1979.9       0.2X
-100 wide x 1000 rows (read in-mem)              21 /   23          4.7         212.7       1.4X
-100 wide x 1000 rows (exec in-mem)              28 /   32          3.5         283.3       1.1X
-100 wide x 1000 rows (read parquet)             68 /   73          1.5         683.0       0.4X
-100 wide x 1000 rows (write parquet)           188 /  206          0.5        1882.1       0.2X
-2500 wide x 40 rows (read in-mem)               25 /   28          4.0         252.2       1.2X
-2500 wide x 40 rows (exec in-mem)               32 /   34          3.1         318.5       1.0X
-2500 wide x 40 rows (read parquet)              69 /   73          1.4         691.5       0.4X
-2500 wide x 40 rows (write parquet)            193 /  202          0.5        1932.8       0.2X
+wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+1 wide x 100000 rows (read in-mem)                   35             40           8          2.9         348.8       1.0X
+1 wide x 100000 rows (exec in-mem)                   46             47           2          2.2         461.8       0.8X
+1 wide x 100000 rows (read parquet)                 124            127           7          0.8        1236.1       0.3X
+1 wide x 100000 rows (write parquet)                125            138          26          0.8        1245.4       0.3X
+100 wide x 1000 rows (read in-mem)                   26             35           8          3.8         263.1       1.3X
+100 wide x 1000 rows (exec in-mem)                   35             41          10          2.8         351.8       1.0X
+100 wide x 1000 rows (read parquet)                  59             62           8          1.7         586.7       0.6X
+100 wide x 1000 rows (write parquet)                116            125          32          0.9        1158.2       0.3X
+2500 wide x 40 rows (read in-mem)                    27             30           5          3.7         270.2       1.3X
+2500 wide x 40 rows (exec in-mem)                    37             38           3          2.7         366.4       1.0X
+2500 wide x 40 rows (read parquet)                   58             62           8          1.7         584.3       0.6X
+2500 wide x 40 rows (write parquet)                 118            126          24          0.9        1176.1       0.3X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt b/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..8f3920db0dcd9
--- /dev/null
+++ b/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt
@@ -0,0 +1,17 @@
+================================================================================================
+projection on wide table
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+split threshold 10                                 8915           9048         180          0.1        8501.7       1.0X
+split threshold 100                                4419           4465          31          0.2        4214.2       2.0X
+split threshold 1024                               2477           2559          57          0.4        2362.4       3.6X
+split threshold 2048                               2314           2391          74          0.5        2206.7       3.9X
+split threshold 4096                               2374           2399          18          0.4        2264.2       3.8X
+split threshold 8192                               2831           2846          11          0.4        2699.7       3.1X
+split threshold 65536                             26886          26944          55          0.0       25640.7       0.3X
+
+
diff --git a/sql/core/benchmarks/WideTableBenchmark-results.txt b/sql/core/benchmarks/WideTableBenchmark-results.txt
index 8c09f9ca11307..4f64f877ac0c8 100644
--- a/sql/core/benchmarks/WideTableBenchmark-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-projection on wide table:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-split threshold 10                          40571 / 40937          0.0       38691.7       1.0X
-split threshold 100                         31116 / 31669          0.0       29674.6       1.3X
-split threshold 1024                        10077 / 10199          0.1        9609.7       4.0X
-split threshold 2048                          8654 / 8692          0.1        8253.2       4.7X
-split threshold 4096                          8006 / 8038          0.1        7634.7       5.1X
-split threshold 8192                          8069 / 8107          0.1        7695.3       5.0X
-split threshold 65536                       56973 / 57204          0.0       54333.7       0.7X
+projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+split threshold 10                                 6461           6535          84          0.2        6161.9       1.0X
+split threshold 100                                3643           3725          74          0.3        3474.5       1.8X
+split threshold 1024                               2217           2255          26          0.5        2113.9       2.9X
+split threshold 2048                               1941           2003          60          0.5        1851.5       3.3X
+split threshold 4096                               2195           2220          20          0.5        2093.4       2.9X
+split threshold 8192                               2592           2652          39          0.4        2472.4       2.5X
+split threshold 65536                             26324          26365          66          0.0       25104.1       0.2X
 
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 02ed6f8adaa62..0e664eca6a820 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -38,7 +38,6 @@
     <dependency>
       <groupId>com.univocity</groupId>
       <artifactId>univocity-parsers</artifactId>
-      <version>2.7.3</version>
       <type>jar</type>
     </dependency>
     <dependency>
@@ -96,6 +95,10 @@
       <artifactId>orc-mapreduce</artifactId>
       <classifier>${orc.classifier}</classifier>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-storage-api</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.parquet</groupId>
       <artifactId>parquet-column</artifactId>
@@ -178,7 +181,7 @@
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
         <configuration>
-          <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+          <argLine>-ea -Xmx4g -Xss4m -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true</argLine>
         </configuration>
       </plugin>
       <plugin>
@@ -195,6 +198,7 @@
               <sources>
                 <source>v${hive.version.short}/src/main/scala</source>
                 <source>v${hive.version.short}/src/main/java</source>
+                <source>src/main/scala-${scala.binary.version}</source>
               </sources>
             </configuration>
           </execution>
@@ -215,4 +219,16 @@
       </plugin>
     </plugins>
   </build>
+  
+  <profiles>
+    <profile>
+      <id>scala-2.13</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>
diff --git a/sql/core/src/main/java/org/apache/spark/sql/connector/read/V1Scan.java b/sql/core/src/main/java/org/apache/spark/sql/connector/read/V1Scan.java
new file mode 100644
index 0000000000000..c9d7cb1bf80a3
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/connector/read/V1Scan.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read;
+
+import org.apache.spark.annotation.Unstable;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.sources.BaseRelation;
+import org.apache.spark.sql.sources.TableScan;
+
+/**
+ * A trait that should be implemented by V1 DataSources that would like to leverage the DataSource
+ * V2 read code paths.
+ *
+ * This interface is designed to provide Spark DataSources time to migrate to DataSource V2 and
+ * will be removed in a future Spark release.
+ *
+ * @since 3.0.0
+ */
+@Unstable
+public interface V1Scan extends Scan {
+
+  /**
+   * Create an `BaseRelation` with `TableScan` that can scan data from DataSource v1 to RDD[Row].
+   *
+   * @since 3.0.0
+   */
+  <T extends BaseRelation & TableScan> T toV1TableScan(SQLContext context);
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/v2/writer/V1WriteBuilder.scala b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java
similarity index 73%
rename from sql/core/src/main/scala/org/apache/spark/sql/sources/v2/writer/V1WriteBuilder.scala
rename to sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java
index 2a88555e29276..89b567b5231ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/v2/writer/V1WriteBuilder.scala
+++ b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java
@@ -15,11 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer
+package org.apache.spark.sql.connector.write;
 
-import org.apache.spark.annotation.{Experimental, Unstable}
-import org.apache.spark.sql.sources.InsertableRelation
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
+import org.apache.spark.annotation.Unstable;
+import org.apache.spark.sql.sources.InsertableRelation;
 
 /**
  * A trait that should be implemented by V1 DataSources that would like to leverage the DataSource
@@ -32,10 +31,8 @@
  *
  * @since 3.0.0
  */
-@Experimental
 @Unstable
-trait V1WriteBuilder extends WriteBuilder {
-
+public interface V1WriteBuilder extends WriteBuilder {
   /**
    * Creates an InsertableRelation that allows appending a DataFrame to a
    * a destination (using data source-specific parameters). The insert method will only be
@@ -44,11 +41,5 @@
    *
    * @since 3.0.0
    */
-  def buildForV1Write(): InsertableRelation
-
-  // These methods cannot be implemented by a V1WriteBuilder. The super class will throw
-  // an Unsupported OperationException
-  override final def buildForBatch(): BatchWrite = super.buildForBatch()
-
-  override final def buildForStreaming(): StreamingWrite = super.buildForStreaming()
+  InsertableRelation buildForV1Write();
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java b/sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java
index 40c2cc806e87a..1f243406c77e0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/RecordBinaryComparator.java
@@ -20,8 +20,13 @@
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.util.collection.unsafe.sort.RecordComparator;
 
+import java.nio.ByteOrder;
+
 public final class RecordBinaryComparator extends RecordComparator {
 
+  private static final boolean LITTLE_ENDIAN =
+      ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN);
+
   @Override
   public int compare(
       Object leftObj, long leftOff, int leftLen, Object rightObj, long rightOff, int rightLen) {
@@ -38,10 +43,10 @@ public int compare(
     // check if stars align and we can get both offsets to be aligned
     if ((leftOff % 8) == (rightOff % 8)) {
       while ((leftOff + i) % 8 != 0 && i < leftLen) {
-        final int v1 = Platform.getByte(leftObj, leftOff + i) & 0xff;
-        final int v2 = Platform.getByte(rightObj, rightOff + i) & 0xff;
+        final int v1 = Platform.getByte(leftObj, leftOff + i);
+        final int v2 = Platform.getByte(rightObj, rightOff + i);
         if (v1 != v2) {
-          return v1 > v2 ? 1 : -1;
+          return (v1 & 0xff) > (v2 & 0xff) ? 1 : -1;
         }
         i += 1;
       }
@@ -49,10 +54,17 @@ public int compare(
     // for architectures that support unaligned accesses, chew it up 8 bytes at a time
     if (Platform.unaligned() || (((leftOff + i) % 8 == 0) && ((rightOff + i) % 8 == 0))) {
       while (i <= leftLen - 8) {
-        final long v1 = Platform.getLong(leftObj, leftOff + i);
-        final long v2 = Platform.getLong(rightObj, rightOff + i);
+        long v1 = Platform.getLong(leftObj, leftOff + i);
+        long v2 = Platform.getLong(rightObj, rightOff + i);
         if (v1 != v2) {
-          return v1 > v2 ? 1 : -1;
+          if (LITTLE_ENDIAN) {
+            // if read as little-endian, we have to reverse bytes so that the long comparison result
+            // is equivalent to byte-by-byte comparison result.
+            // See discussion in https://github.com/apache/spark/pull/26548#issuecomment-554645859
+            v1 = Long.reverseBytes(v1);
+            v2 = Long.reverseBytes(v2);
+          }
+          return Long.compareUnsigned(v1, v2);
         }
         i += 8;
       }
@@ -60,10 +72,10 @@ public int compare(
     // this will finish off the unaligned comparisons, or do the entire aligned comparison
     // whichever is needed.
     while (i < leftLen) {
-      final int v1 = Platform.getByte(leftObj, leftOff + i) & 0xff;
-      final int v2 = Platform.getByte(rightObj, rightOff + i) & 0xff;
+      final int v1 = Platform.getByte(leftObj, leftOff + i);
+      final int v2 = Platform.getByte(rightObj, rightOff + i);
       if (v1 != v2) {
-        return v1 > v2 ? 1 : -1;
+        return (v1 & 0xff) > (v2 & 0xff) ? 1 : -1;
       }
       i += 1;
     }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
index 863d80b5cb9c5..90b55a8586de7 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
@@ -20,7 +20,6 @@
 import java.io.IOException;
 import java.util.function.Supplier;
 
-import scala.collection.AbstractIterator;
 import scala.collection.Iterator;
 import scala.math.Ordering;
 
@@ -52,6 +51,12 @@ public final class UnsafeExternalRowSorter {
   private final UnsafeExternalRowSorter.PrefixComputer prefixComputer;
   private final UnsafeExternalSorter sorter;
 
+  // This flag makes sure the cleanupResource() has been called. After the cleanup work,
+  // iterator.next should always return false. Downstream operator triggers the resource
+  // cleanup while they found there's no need to keep the iterator any more.
+  // See more details in SPARK-21492.
+  private boolean isReleased = false;
+
   public abstract static class PrefixComputer {
 
     public static class Prefix {
@@ -157,11 +162,12 @@ public long getSortTimeNanos() {
     return sorter.getSortTimeNanos();
   }
 
-  private void cleanupResources() {
+  public void cleanupResources() {
+    isReleased = true;
     sorter.cleanupResources();
   }
 
-  public Iterator<UnsafeRow> sort() throws IOException {
+  public Iterator<InternalRow> sort() throws IOException {
     try {
       final UnsafeSorterIterator sortedIterator = sorter.getSortedIterator();
       if (!sortedIterator.hasNext()) {
@@ -169,31 +175,32 @@ public Iterator<UnsafeRow> sort() throws IOException {
         // here in order to prevent memory leaks.
         cleanupResources();
       }
-      return new AbstractIterator<UnsafeRow>() {
+      return new RowIterator() {
 
         private final int numFields = schema.length();
         private UnsafeRow row = new UnsafeRow(numFields);
 
         @Override
-        public boolean hasNext() {
-          return sortedIterator.hasNext();
-        }
-
-        @Override
-        public UnsafeRow next() {
+        public boolean advanceNext() {
           try {
-            sortedIterator.loadNext();
-            row.pointTo(
-              sortedIterator.getBaseObject(),
-              sortedIterator.getBaseOffset(),
-              sortedIterator.getRecordLength());
-            if (!hasNext()) {
-              UnsafeRow copy = row.copy(); // so that we don't have dangling pointers to freed page
-              row = null; // so that we don't keep references to the base object
-              cleanupResources();
-              return copy;
+            if (!isReleased && sortedIterator.hasNext()) {
+              sortedIterator.loadNext();
+              row.pointTo(
+                  sortedIterator.getBaseObject(),
+                  sortedIterator.getBaseOffset(),
+                  sortedIterator.getRecordLength());
+              // Here is the initial bug fix in SPARK-9364: the bug fix of use-after-free bug
+              // when returning the last row from an iterator. For example, in
+              // [[GroupedIterator]], we still use the last row after traversing the iterator
+              // in `fetchNextGroupIterator`
+              if (!sortedIterator.hasNext()) {
+                row = row.copy(); // so that we don't have dangling pointers to freed page
+                cleanupResources();
+              }
+              return true;
             } else {
-              return row;
+              row = null; // so that we don't keep references to the base object
+              return false;
             }
           } catch (IOException e) {
             cleanupResources();
@@ -203,14 +210,18 @@ public UnsafeRow next() {
           }
           throw new RuntimeException("Exception should have been re-thrown in next()");
         }
-      };
+
+        @Override
+        public UnsafeRow getRow() { return row; }
+
+      }.toScala();
     } catch (IOException e) {
       cleanupResources();
       throw e;
     }
   }
 
-  public Iterator<UnsafeRow> sort(Iterator<UnsafeRow> inputIterator) throws IOException {
+  public Iterator<InternalRow> sort(Iterator<UnsafeRow> inputIterator) throws IOException {
     while (inputIterator.hasNext()) {
       insertRow(inputIterator.next());
     }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
index 09426117a24b9..acd54fe25d62d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
@@ -29,7 +29,7 @@
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.serializer.SerializerManager;
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
-import org.apache.spark.sql.catalyst.expressions.codegen.BaseOrdering;
+import org.apache.spark.sql.catalyst.expressions.BaseOrdering;
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.storage.BlockManager;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java b/sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java
index f1785853a94ae..419dda874d3d9 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/columnar/ColumnDictionary.java
@@ -1,58 +1,58 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.columnar;
-
-import org.apache.spark.sql.execution.vectorized.Dictionary;
-
-public final class ColumnDictionary implements Dictionary {
-  private int[] intDictionary;
-  private long[] longDictionary;
-
-  public ColumnDictionary(int[] dictionary) {
-    this.intDictionary = dictionary;
-  }
-
-  public ColumnDictionary(long[] dictionary) {
-    this.longDictionary = dictionary;
-  }
-
-  @Override
-  public int decodeToInt(int id) {
-    return intDictionary[id];
-  }
-
-  @Override
-  public long decodeToLong(int id) {
-    return longDictionary[id];
-  }
-
-  @Override
-  public float decodeToFloat(int id) {
-    throw new UnsupportedOperationException("Dictionary encoding does not support float");
-  }
-
-  @Override
-  public double decodeToDouble(int id) {
-    throw new UnsupportedOperationException("Dictionary encoding does not support double");
-  }
-
-  @Override
-  public byte[] decodeToBinary(int id) {
-    throw new UnsupportedOperationException("Dictionary encoding does not support String");
-  }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.columnar;
+
+import org.apache.spark.sql.execution.vectorized.Dictionary;
+
+public final class ColumnDictionary implements Dictionary {
+  private int[] intDictionary;
+  private long[] longDictionary;
+
+  public ColumnDictionary(int[] dictionary) {
+    this.intDictionary = dictionary;
+  }
+
+  public ColumnDictionary(long[] dictionary) {
+    this.longDictionary = dictionary;
+  }
+
+  @Override
+  public int decodeToInt(int id) {
+    return intDictionary[id];
+  }
+
+  @Override
+  public long decodeToLong(int id) {
+    return longDictionary[id];
+  }
+
+  @Override
+  public float decodeToFloat(int id) {
+    throw new UnsupportedOperationException("Dictionary encoding does not support float");
+  }
+
+  @Override
+  public double decodeToDouble(int id) {
+    throw new UnsupportedOperationException("Dictionary encoding does not support double");
+  }
+
+  @Override
+  public byte[] decodeToBinary(int id) {
+    throw new UnsupportedOperationException("Dictionary encoding does not support String");
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
index ba26b57567e64..329465544979d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
@@ -18,8 +18,9 @@
 package org.apache.spark.sql.execution.datasources.parquet;
 
 import java.io.IOException;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
 import java.util.Arrays;
-import java.util.TimeZone;
 
 import org.apache.parquet.bytes.ByteBufferInputStream;
 import org.apache.parquet.bytes.BytesInput;
@@ -98,14 +99,14 @@ public class VectorizedColumnReader {
   private final ColumnDescriptor descriptor;
   private final OriginalType originalType;
   // The timezone conversion to apply to int96 timestamps. Null if no conversion.
-  private final TimeZone convertTz;
-  private static final TimeZone UTC = DateTimeUtils.TimeZoneUTC();
+  private final ZoneId convertTz;
+  private static final ZoneId UTC = ZoneOffset.UTC;
 
   public VectorizedColumnReader(
       ColumnDescriptor descriptor,
       OriginalType originalType,
       PageReader pageReader,
-      TimeZone convertTz) throws IOException {
+      ZoneId convertTz) throws IOException {
     this.descriptor = descriptor;
     this.pageReader = pageReader;
     this.convertTz = convertTz;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
index f02861355c404..7306709a79c34 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.execution.datasources.parquet;
 
 import java.io.IOException;
+import java.time.ZoneId;
 import java.util.Arrays;
 import java.util.List;
-import java.util.TimeZone;
 
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
@@ -86,7 +86,7 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa
    * The timezone that timestamp INT96 values should be converted to. Null if no conversion. Here to
    * workaround incompatibilities between different engines when writing timestamp values.
    */
-  private TimeZone convertTz = null;
+  private ZoneId convertTz = null;
 
   /**
    * columnBatch object that is used for batch decoding. This is created on first use and triggers
@@ -116,7 +116,7 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa
    */
   private final MemoryMode MEMORY_MODE;
 
-  public VectorizedParquetRecordReader(TimeZone convertTz, boolean useOffHeap, int capacity) {
+  public VectorizedParquetRecordReader(ZoneId convertTz, boolean useOffHeap, int capacity) {
     this.convertTz = convertTz;
     MEMORY_MODE = useOffHeap ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP;
     this.capacity = capacity;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
index 7c167dc012329..707812e5af61c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.execution.streaming;
 
 /**
- * This class is an alias of {@link org.apache.spark.sql.sources.v2.reader.streaming.Offset}. It's
+ * This class is an alias of {@link org.apache.spark.sql.connector.read.streaming.Offset}. It's
  * internal and deprecated. New streaming data source implementations should use data source v2 API,
  * which will be supported in the long term.
  *
  * This class will be removed in a future release.
  */
-public abstract class Offset extends org.apache.spark.sql.sources.v2.reader.streaming.Offset {}
+public abstract class Offset extends org.apache.spark.sql.connector.read.streaming.Offset {}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
index 829f3ce750fe6..bce6aa28c42a1 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
@@ -165,7 +165,8 @@ private static void appendValue(WritableColumnVector dst, DataType t, Object o)
         CalendarInterval c = (CalendarInterval)o;
         dst.appendStruct(false);
         dst.getChild(0).appendInt(c.months);
-        dst.getChild(1).appendLong(c.microseconds);
+        dst.getChild(1).appendInt(c.days);
+        dst.getChild(2).appendLong(c.microseconds);
       } else if (t instanceof DateType) {
         dst.appendInt(DateTimeUtils.fromJavaDate((Date)o));
       } else {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
index fca7e36859126..f4fdf50692c11 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
@@ -212,6 +212,8 @@ public void update(int ordinal, Object value) {
         DecimalType t = (DecimalType) dt;
         Decimal d = Decimal.apply((BigDecimal) value, t.precision(), t.scale());
         setDecimal(ordinal, d, t.precision());
+      } else if (dt instanceof CalendarIntervalType) {
+        setInterval(ordinal, (CalendarInterval) value);
       } else {
         throw new UnsupportedOperationException("Datatype not supported " + dt);
       }
@@ -270,4 +272,10 @@ public void setDecimal(int ordinal, Decimal value, int precision) {
     columns[ordinal].putNotNull(rowId);
     columns[ordinal].putDecimal(rowId, value, precision);
   }
+
+  @Override
+  public void setInterval(int ordinal, CalendarInterval value) {
+    columns[ordinal].putNotNull(rowId);
+    columns[ordinal].putInterval(rowId, value);
+  }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
index 14fac72847af2..f04b0707ff7b5 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
@@ -27,6 +27,7 @@
 import org.apache.spark.sql.vectorized.ColumnarArray;
 import org.apache.spark.sql.vectorized.ColumnarMap;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
+import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
@@ -372,6 +373,12 @@ public void putDecimal(int rowId, Decimal value, int precision) {
     }
   }
 
+  public void putInterval(int rowId, CalendarInterval value) {
+    getChild(0).putInt(rowId, value.months);
+    getChild(1).putInt(rowId, value.days);
+    getChild(2).putLong(rowId, value.microseconds);
+  }
+
   @Override
   public UTF8String getUTF8String(int rowId) {
     if (isNullAt(rowId)) return null;
@@ -736,10 +743,11 @@ protected WritableColumnVector(int capacity, DataType type) {
       this.childColumns[0] = reserveNewColumn(capacity, mapType.keyType());
       this.childColumns[1] = reserveNewColumn(capacity, mapType.valueType());
     } else if (type instanceof CalendarIntervalType) {
-      // Two columns. Months as int. Microseconds as Long.
-      this.childColumns = new WritableColumnVector[2];
+      // Three columns. Months as int. Days as Int. Microseconds as Long.
+      this.childColumns = new WritableColumnVector[3];
       this.childColumns[0] = reserveNewColumn(capacity, DataTypes.IntegerType);
-      this.childColumns[1] = reserveNewColumn(capacity, DataTypes.LongType);
+      this.childColumns[1] = reserveNewColumn(capacity, DataTypes.IntegerType);
+      this.childColumns[2] = reserveNewColumn(capacity, DataTypes.LongType);
     } else {
       this.childColumns = null;
     }
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder b/sql/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
new file mode 100644
index 0000000000000..5025616b752d1
--- /dev/null
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.deploy.history.EventFilterBuilder
@@ -0,0 +1 @@
+org.apache.spark.sql.execution.history.SQLEventFilterBuilder
\ No newline at end of file
diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
index b13850c301490..20188387c9ba4 100644
--- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
@@ -18,6 +18,7 @@
 #plan-viz-graph .label {
   font-weight: normal;
   text-shadow: none;
+  color: black;
 }
 
 #plan-viz-graph svg g.cluster rect {
diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
index 46d3fbc8c3cb4..c8349149439c8 100644
--- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
@@ -20,6 +20,10 @@ var PlanVizConstants = {
   svgMarginY: 16
 };
 
+function shouldRenderPlanViz() {
+  return planVizContainer().selectAll("svg").empty();
+}
+
 function renderPlanViz() {
   var svg = planVizContainer().append("svg");
   var metadata = d3.select("#plan-viz-metadata");
@@ -61,7 +65,7 @@ function setupTooltipForSparkPlanNode(nodeId) {
     .on('mouseover', function(d) {
       var domNode = d3.select(this).node();
       $(domNode).tooltip({
-        title: nodeTooltip, trigger: "manual", container: "body", placement: "right"
+        title: nodeTooltip, trigger: "manual", container: "body", placement: "top"
       });
       $(domNode).tooltip("show");
     })
@@ -77,6 +81,7 @@ function setupTooltipForSparkPlanNode(nodeId) {
  * and sizes of graph elements, e.g. padding, font style, shape.
  */
 function preprocessGraphLayout(g) {
+  g.graph().ranksep = "70";
   var nodes = g.nodes();
   for (var i = 0; i < nodes.length; i++) {
       var node = g.node(nodes[i]);
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala-2.12/org/apache/spark/sql/execution/streaming/StreamProgress.scala
similarity index 91%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
rename to sql/core/src/main/scala-2.12/org/apache/spark/sql/execution/streaming/StreamProgress.scala
index 8783eaa0e68b3..9e5bb8e061ccb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
+++ b/sql/core/src/main/scala-2.12/org/apache/spark/sql/execution/streaming/StreamProgress.scala
@@ -19,8 +19,7 @@ package org.apache.spark.sql.execution.streaming
 
 import scala.collection.{immutable, GenTraversableOnce}
 
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
-
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
 
 /**
  * A helper class that looks like a Map[Source, Offset].
@@ -30,6 +29,8 @@ class StreamProgress(
         new immutable.HashMap[SparkDataStream, OffsetV2])
   extends scala.collection.immutable.Map[SparkDataStream, OffsetV2] {
 
+  //  Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation.
+
   def toOffsetSeq(source: Seq[SparkDataStream], metadata: OffsetSeqMetadata): OffsetSeq = {
     OffsetSeq(source.map(get), Some(metadata))
   }
diff --git a/sql/core/src/main/scala-2.13/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala-2.13/org/apache/spark/sql/execution/streaming/StreamProgress.scala
new file mode 100644
index 0000000000000..0aa29640899c6
--- /dev/null
+++ b/sql/core/src/main/scala-2.13/org/apache/spark/sql/execution/streaming/StreamProgress.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import scala.collection.immutable
+
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
+
+/**
+ * A helper class that looks like a Map[Source, Offset].
+ */
+class StreamProgress(
+    val baseMap: immutable.Map[SparkDataStream, OffsetV2] =
+        new immutable.HashMap[SparkDataStream, OffsetV2])                   
+  extends scala.collection.immutable.Map[SparkDataStream, OffsetV2] {
+
+  //  Note: this class supports Scala 2.13. A parallel source tree has a 2.12 implementation.
+
+  def toOffsetSeq(source: Seq[SparkDataStream], metadata: OffsetSeqMetadata): OffsetSeq = {
+    OffsetSeq(source.map(get), Some(metadata))
+  }
+
+  override def toString: String =
+    baseMap.map { case (k, v) => s"$k: $v"}.mkString("{", ",", "}")
+
+  override def updated[B1 >: OffsetV2](key: SparkDataStream, value: B1): Map[SparkDataStream, B1] =
+    baseMap + (key -> value)
+
+  override def get(key: SparkDataStream): Option[OffsetV2] = baseMap.get(key)
+
+  override def iterator: Iterator[(SparkDataStream, OffsetV2)] = baseMap.iterator
+
+  override def removed(key: SparkDataStream): Map[SparkDataStream, OffsetV2] = baseMap - key
+
+  def ++(updates: IterableOnce[(SparkDataStream, OffsetV2)]): StreamProgress = {
+    new StreamProgress(baseMap ++ updates)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index b0de3c85aaef8..8bd5835fd931b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 private[sql] object Column {
@@ -199,7 +200,7 @@ class Column(val expr: Expression) extends Logging {
       UnresolvedAlias(a, Some(Column.generateAlias))
 
     // Wait until the struct is resolved. This will generate a nicer looking alias.
-    case struct: CreateNamedStructLike => UnresolvedAlias(struct)
+    case struct: CreateNamedStruct => UnresolvedAlias(struct)
 
     case expr: Expression => Alias(expr, toPrettySQL(expr))()
   }
@@ -808,7 +809,14 @@ class Column(val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 2.4.0
    */
-  def isInCollection(values: scala.collection.Iterable[_]): Column = isin(values.toSeq: _*)
+  def isInCollection(values: scala.collection.Iterable[_]): Column = withExpr {
+    val hSet = values.toSet[Any]
+    if (hSet.size > SQLConf.get.optimizerInSetConversionThreshold) {
+      InSet(expr, hSet)
+    } else {
+      In(expr, values.toSeq.map(lit(_).expr))
+    }
+  }
 
   /**
    * A boolean expression that is evaluated to true if the value of this expression is contained
@@ -833,7 +841,7 @@ class Column(val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 1.3.0
    */
-  def like(literal: String): Column = withExpr { Like(expr, lit(literal).expr) }
+  def like(literal: String): Column = withExpr { new Like(expr, lit(literal).expr) }
 
   /**
    * SQL RLIKE expression (LIKE with Regex). Returns a boolean column based on a regex
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 53e9f810d7c85..953db806258a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -40,7 +40,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 1.3.1
    */
-  def drop(): DataFrame = drop("any", df.columns)
+  def drop(): DataFrame = drop0("any", outputAttributes)
 
   /**
    * Returns a new `DataFrame` that drops rows containing null or NaN values.
@@ -50,7 +50,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 1.3.1
    */
-  def drop(how: String): DataFrame = drop(how, df.columns)
+  def drop(how: String): DataFrame = drop0(how, outputAttributes)
 
   /**
    * Returns a new `DataFrame` that drops rows containing any null or NaN values
@@ -89,11 +89,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    * @since 1.3.1
    */
   def drop(how: String, cols: Seq[String]): DataFrame = {
-    how.toLowerCase(Locale.ROOT) match {
-      case "any" => drop(cols.size, cols)
-      case "all" => drop(1, cols)
-      case _ => throw new IllegalArgumentException(s"how ($how) must be 'any' or 'all'")
-    }
+    drop0(how, toAttributes(cols))
   }
 
   /**
@@ -119,10 +115,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    * @since 1.3.1
    */
   def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = {
-    // Filtering condition:
-    // only keep the row if it has at least `minNonNulls` non-null and non-NaN values.
-    val predicate = AtLeastNNonNulls(minNonNulls, cols.map(name => df.resolve(name)))
-    df.filter(Column(predicate))
+    drop0(minNonNulls, toAttributes(cols))
   }
 
   /**
@@ -130,20 +123,20 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 2.2.0
    */
-  def fill(value: Long): DataFrame = fill(value, df.columns)
+  def fill(value: Long): DataFrame = fillValue(value, outputAttributes)
 
   /**
    * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
    * @since 1.3.1
    */
-  def fill(value: Double): DataFrame = fill(value, df.columns)
+  def fill(value: Double): DataFrame = fillValue(value, outputAttributes)
 
   /**
    * Returns a new `DataFrame` that replaces null values in string columns with `value`.
    *
    * @since 1.3.1
    */
-  def fill(value: String): DataFrame = fill(value, df.columns)
+  def fill(value: String): DataFrame = fillValue(value, outputAttributes)
 
   /**
    * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns.
@@ -167,7 +160,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 2.2.0
    */
-  def fill(value: Long, cols: Seq[String]): DataFrame = fillValue(value, cols)
+  def fill(value: Long, cols: Seq[String]): DataFrame = fillValue(value, toAttributes(cols))
 
   /**
    * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
@@ -175,7 +168,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 1.3.1
    */
-  def fill(value: Double, cols: Seq[String]): DataFrame = fillValue(value, cols)
+  def fill(value: Double, cols: Seq[String]): DataFrame = fillValue(value, toAttributes(cols))
 
 
   /**
@@ -192,14 +185,14 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 1.3.1
    */
-  def fill(value: String, cols: Seq[String]): DataFrame = fillValue(value, cols)
+  def fill(value: String, cols: Seq[String]): DataFrame = fillValue(value, toAttributes(cols))
 
   /**
    * Returns a new `DataFrame` that replaces null values in boolean columns with `value`.
    *
    * @since 2.3.0
    */
-  def fill(value: Boolean): DataFrame = fill(value, df.columns)
+  def fill(value: Boolean): DataFrame = fillValue(value, outputAttributes)
 
   /**
    * (Scala-specific) Returns a new `DataFrame` that replaces null values in specified
@@ -207,7 +200,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * @since 2.3.0
    */
-  def fill(value: Boolean, cols: Seq[String]): DataFrame = fillValue(value, cols)
+  def fill(value: Boolean, cols: Seq[String]): DataFrame = fillValue(value, toAttributes(cols))
 
   /**
    * Returns a new `DataFrame` that replaces null values in specified boolean columns.
@@ -433,15 +426,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
 
   /**
    * Returns a [[Column]] expression that replaces null value in `col` with `replacement`.
+   * It selects a column based on its name.
    */
   private def fillCol[T](col: StructField, replacement: T): Column = {
     val quotedColName = "`" + col.name + "`"
-    val colValue = col.dataType match {
+    fillCol(col.dataType, col.name, df.col(quotedColName), replacement)
+  }
+
+  /**
+   * Returns a [[Column]] expression that replaces null value in `expr` with `replacement`.
+   * It uses the given `expr` as a column.
+   */
+  private def fillCol[T](dataType: DataType, name: String, expr: Column, replacement: T): Column = {
+    val colValue = dataType match {
       case DoubleType | FloatType =>
-        nanvl(df.col(quotedColName), lit(null)) // nanvl only supports these types
-      case _ => df.col(quotedColName)
+        nanvl(expr, lit(null)) // nanvl only supports these types
+      case _ => expr
     }
-    coalesce(colValue, lit(replacement).cast(col.dataType)).as(col.name)
+    coalesce(colValue, lit(replacement).cast(dataType)).as(name)
   }
 
   /**
@@ -450,11 +452,11 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
    *
    * TODO: This can be optimized to use broadcast join when replacementMap is large.
    */
-  private def replaceCol(col: StructField, replacementMap: Map[_, _]): Column = {
+  private def replaceCol[K, V](col: StructField, replacementMap: Map[K, V]): Column = {
     val keyExpr = df.col(col.name).expr
     def buildExpr(v: Any) = Cast(Literal(v), keyExpr.dataType)
     val branches = replacementMap.flatMap { case (source, target) =>
-      Seq(buildExpr(source), buildExpr(target))
+      Seq(Literal(source), buildExpr(target))
     }.toSeq
     new Column(CaseKeyWhen(keyExpr, branches :+ keyExpr)).as(col.name)
   }
@@ -468,12 +470,39 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
       s"Unsupported value type ${v.getClass.getName} ($v).")
   }
 
+  private def toAttributes(cols: Seq[String]): Seq[Attribute] = {
+    cols.map(name => df.col(name).expr).collect {
+      case a: Attribute => a
+    }
+  }
+
+  private def outputAttributes: Seq[Attribute] = {
+    df.queryExecution.analyzed.output
+  }
+
+  private def drop0(how: String, cols: Seq[Attribute]): DataFrame = {
+    how.toLowerCase(Locale.ROOT) match {
+      case "any" => drop0(cols.size, cols)
+      case "all" => drop0(1, cols)
+      case _ => throw new IllegalArgumentException(s"how ($how) must be 'any' or 'all'")
+    }
+  }
+
+  private def drop0(minNonNulls: Int, cols: Seq[Attribute]): DataFrame = {
+    // Filtering condition:
+    // only keep the row if it has at least `minNonNulls` non-null and non-NaN values.
+    val predicate = AtLeastNNonNulls(
+      minNonNulls,
+      outputAttributes.filter{ col => cols.exists(_.semanticEquals(col)) })
+    df.filter(Column(predicate))
+  }
+
   /**
-   * Returns a new `DataFrame` that replaces null or NaN values in specified
-   * numeric, string columns. If a specified column is not a numeric, string
-   * or boolean column it is ignored.
+   * Returns a new `DataFrame` that replaces null or NaN values in the specified
+   * columns. If a specified column is not a numeric, string or boolean column,
+   * it is ignored.
    */
-  private def fillValue[T](value: T, cols: Seq[String]): DataFrame = {
+  private def fillValue[T](value: T, cols: Seq[Attribute]): DataFrame = {
     // the fill[T] which T is  Long/Double,
     // should apply on all the NumericType Column, for example:
     // val input = Seq[(java.lang.Integer, java.lang.Double)]((null, 164.3)).toDF("a","b")
@@ -487,9 +516,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
         s"Unsupported value type ${value.getClass.getName} ($value).")
     }
 
-    val columnEquals = df.sparkSession.sessionState.analyzer.resolver
-    val projections = df.schema.fields.map { f =>
-      val typeMatches = (targetType, f.dataType) match {
+    val projections = outputAttributes.map { col =>
+      val typeMatches = (targetType, col.dataType) match {
         case (NumericType, dt) => dt.isInstanceOf[NumericType]
         case (StringType, dt) => dt == StringType
         case (BooleanType, dt) => dt == BooleanType
@@ -497,10 +525,10 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
           throw new IllegalArgumentException(s"$targetType is not matched at fillValue")
       }
       // Only fill if the column is part of the cols list.
-      if (typeMatches && cols.exists(col => columnEquals(f.name, col))) {
-        fillCol[T](f, value)
+      if (typeMatches && cols.exists(_.semanticEquals(col))) {
+        fillCol(col.dataType, col.name, Column(col), value)
       } else {
-        df.col(f.name)
+        Column(col)
       }
     }
     df.select(projections : _*)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index f901005ad4fcf..6cce7203127f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -32,14 +32,14 @@ import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, Univocit
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}
 import org.apache.spark.sql.catalyst.util.FailureSafeParser
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsCatalogOptions, SupportsRead}
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.csv._
 import org.apache.spark.sql.execution.datasources.jdbc._
 import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils}
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.unsafe.types.UTF8String
@@ -98,9 +98,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV datasources or partition values.</li>
-   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
-   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
-   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 1.4.0
@@ -138,9 +135,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV datasources or partition values.</li>
-   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
-   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
-   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 1.4.0
@@ -157,9 +151,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV datasources or partition values.</li>
-   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
-   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
-   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 1.4.0
@@ -204,6 +195,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     }
 
     DataSource.lookupDataSourceV2(source, sparkSession.sessionState.conf).map { provider =>
+      val catalogManager = sparkSession.sessionState.catalogManager
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         source = provider, conf = sparkSession.sessionState.conf)
       val pathsOption = if (paths.isEmpty) {
@@ -215,14 +207,28 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
 
       val finalOptions = sessionOptions ++ extraOptions.toMap ++ pathsOption
       val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
-      val table = userSpecifiedSchema match {
-        case Some(schema) => provider.getTable(dsOptions, schema)
-        case _ => provider.getTable(dsOptions)
+      val (table, catalog, ident) = provider match {
+        case _: SupportsCatalogOptions if userSpecifiedSchema.nonEmpty =>
+          throw new IllegalArgumentException(
+            s"$source does not support user specified schema. Please don't specify the schema.")
+        case hasCatalog: SupportsCatalogOptions =>
+          val ident = hasCatalog.extractIdentifier(dsOptions)
+          val catalog = CatalogV2Util.getTableProviderCatalog(
+            hasCatalog,
+            catalogManager,
+            dsOptions)
+          (catalog.loadTable(ident), Some(catalog), Some(ident))
+        case _ =>
+          // TODO: Non-catalog paths for DSV2 are currently not well defined.
+          val tbl = DataSourceV2Utils.getTableFromProvider(provider, dsOptions, userSpecifiedSchema)
+          (tbl, None, None)
       }
       import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
       table match {
         case _: SupportsRead if table.supports(BATCH_READ) =>
-          Dataset.ofRows(sparkSession, DataSourceV2Relation.create(table, dsOptions))
+          Dataset.ofRows(
+            sparkSession,
+            DataSourceV2Relation.create(table, catalog, ident, dsOptions))
 
         case _ => loadV1Source(paths: _*)
       }
@@ -403,6 +409,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * empty array/struct during schema inference.</li>
    * <li>`locale` (default is `en-US`): sets a locale as language tag in IETF BCP 47 format.
    * For instance, this is used while parsing dates and timestamps.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
    * </ul>
    *
    * @since 2.0.0
@@ -548,7 +559,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     val parsed = linesWithoutHeader.mapPartitions { iter =>
       val rawParser = new UnivocityParser(actualSchema, parsedOptions)
       val parser = new FailureSafeParser[String](
-        input => Seq(rawParser.parse(input)),
+        input => rawParser.parse(input),
         parsedOptions.parseMode,
         schema,
         parsedOptions.columnNameOfCorruptRecord)
@@ -566,8 +577,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    *
    * You can set the following CSV-specific options to deal with CSV files:
    * <ul>
-   * <li>`sep` (default `,`): sets a single character as a separator for each
-   * field and value.</li>
+   * <li>`sep` (default `,`): sets a separator for each field and value. This separator can be one
+   * or more characters.</li>
    * <li>`encoding` (default `UTF-8`): decodes the CSV files by the given encoding
    * type.</li>
    * <li>`quote` (default `"`): sets a single character used for escaping quoted values where
@@ -640,6 +651,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * For instance, this is used while parsing dates and timestamps.</li>
    * <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
    * that should be used for parsing. Maximum length is 1 character.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
    * </ul>
    *
    * @since 2.0.0
@@ -666,7 +682,13 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`mergeSchema` (default is the value specified in `spark.sql.parquet.mergeSchema`): sets
    * whether we should merge schemas collected from all Parquet part-files. This will override
    * `spark.sql.parquet.mergeSchema`.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
    * </ul>
+   *
    * @since 1.4.0
    */
   @scala.annotation.varargs
@@ -688,6 +710,18 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   /**
    * Loads ORC files and returns the result as a `DataFrame`.
    *
+   * You can set the following ORC-specific option(s) for reading ORC files:
+   * <ul>
+   * <li>`mergeSchema` (default is the value specified in `spark.sql.orc.mergeSchema`): sets whether
+   * we should merge schemas collected from all ORC part-files. This will override
+   * `spark.sql.orc.mergeSchema`.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
+   * </ul>
+   *
    * @param paths input paths
    * @since 2.0.0
    */
@@ -736,6 +770,11 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * </li>
    * <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
    * that should be used for parsing.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
    * </ul>
    *
    * @param paths input paths
@@ -771,13 +810,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    *   spark.read().textFile("/path/to/spark/README.md")
    * }}}
    *
-   * You can set the following textFile-specific option(s) for reading text files:
-   * <ul>
-   * <li>`wholetext` (default `false`): If true, read a file as a single row and not split by "\n".
-   * </li>
-   * <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
-   * that should be used for parsing.</li>
-   * </ul>
+   * You can set the text-specific options as specified in `DataFrameReader.text`.
    *
    * @param paths input path
    * @since 2.0.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index c782e5012d8d7..fff1f4b636dea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -17,28 +17,26 @@
 
 package org.apache.spark.sql
 
-import java.util.{Locale, Properties, UUID}
+import java.util.{Locale, Properties}
 
 import scala.collection.JavaConverters._
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NoSuchTableException, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, InsertIntoTable, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, InsertIntoStatement, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, CatalogV2Implicits, CatalogV2Util, Identifier, SupportsCatalogOptions, SupportsWrite, Table, TableCatalog, TableProvider, V1Table}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, LiteralValue, Transform}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, DataSourceUtils, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.sources.BaseRelation
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.internal.V1Table
 import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -68,7 +66,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * @since 1.4.0
    */
   def mode(saveMode: SaveMode): DataFrameWriter[T] = {
-    this.mode = Some(saveMode)
+    this.mode = saveMode
     this
   }
 
@@ -88,10 +86,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       case "overwrite" => mode(SaveMode.Overwrite)
       case "append" => mode(SaveMode.Append)
       case "ignore" => mode(SaveMode.Ignore)
-      case "error" | "errorifexists" => mode(SaveMode.ErrorIfExists)
-      case "default" => this
-      case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. " +
-        "Accepted save modes are 'overwrite', 'append', 'ignore', 'error', 'errorifexists'.")
+      case "error" | "errorifexists" | "default" => mode(SaveMode.ErrorIfExists)
+      case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. Accepted " +
+        "save modes are 'overwrite', 'append', 'ignore', 'error', 'errorifexists', 'default'.")
     }
   }
 
@@ -254,44 +251,98 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
     val maybeV2Provider = lookupV2Provider()
     if (maybeV2Provider.isDefined) {
-      if (partitioningColumns.nonEmpty) {
-        throw new AnalysisException(
-          "Cannot write data to TableProvider implementation if partition columns are specified.")
-      }
-
       val provider = maybeV2Provider.get
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         provider, df.sparkSession.sessionState.conf)
       val options = sessionOptions ++ extraOptions
       val dsOptions = new CaseInsensitiveStringMap(options.asJava)
 
+      def getTable: Table = {
+        // For file source, it's expensive to infer schema/partition at each write. Here we pass
+        // the schema of input query and the user-specified partitioning to `getTable`. If the
+        // query schema is not compatible with the existing data, the write can still success but
+        // following reads would fail.
+        if (provider.isInstanceOf[FileDataSourceV2]) {
+          provider.getTable(
+            df.schema.asNullable,
+            partitioningAsV2.toArray,
+            dsOptions.asCaseSensitiveMap())
+        } else {
+          DataSourceV2Utils.getTableFromProvider(provider, dsOptions, userSpecifiedSchema = None)
+        }
+      }
+
       import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
-      provider.getTable(dsOptions) match {
-        case table: SupportsWrite if table.supports(BATCH_WRITE) =>
-          lazy val relation = DataSourceV2Relation.create(table, dsOptions)
-          modeForDSV2 match {
-            case SaveMode.Append =>
-              runCommand(df.sparkSession, "save") {
-                AppendData.byName(relation, df.logicalPlan)
+      val catalogManager = df.sparkSession.sessionState.catalogManager
+      mode match {
+        case SaveMode.Append | SaveMode.Overwrite =>
+          val (table, catalog, ident) = provider match {
+            case supportsExtract: SupportsCatalogOptions =>
+              val ident = supportsExtract.extractIdentifier(dsOptions)
+              val catalog = CatalogV2Util.getTableProviderCatalog(
+                supportsExtract, catalogManager, dsOptions)
+
+              (catalog.loadTable(ident), Some(catalog), Some(ident))
+            case _: TableProvider =>
+              val t = getTable
+              if (t.supports(BATCH_WRITE)) {
+                (t, None, None)
+              } else {
+                // Streaming also uses the data source V2 API. So it may be that the data source
+                // implements v2, but has no v2 implementation for batch writes. In that case, we
+                // fall back to saving as though it's a V1 source.
+                return saveToV1Source()
               }
+          }
+
+          val relation = DataSourceV2Relation.create(table, catalog, ident, dsOptions)
+          checkPartitioningMatchesV2Table(table)
+          if (mode == SaveMode.Append) {
+            runCommand(df.sparkSession, "save") {
+              AppendData.byName(relation, df.logicalPlan, extraOptions.toMap)
+            }
+          } else {
+            // Truncate the table. TableCapabilityCheck will throw a nice exception if this
+            // isn't supported
+            runCommand(df.sparkSession, "save") {
+              OverwriteByExpression.byName(
+                relation, df.logicalPlan, Literal(true), extraOptions.toMap)
+            }
+          }
+
+        case createMode =>
+          provider match {
+            case supportsExtract: SupportsCatalogOptions =>
+              val ident = supportsExtract.extractIdentifier(dsOptions)
+              val catalog = CatalogV2Util.getTableProviderCatalog(
+                supportsExtract, catalogManager, dsOptions)
+
+              val location = Option(dsOptions.get("path")).map(TableCatalog.PROP_LOCATION -> _)
 
-            case SaveMode.Overwrite if table.supportsAny(TRUNCATE, OVERWRITE_BY_FILTER) =>
-              // truncate the table
               runCommand(df.sparkSession, "save") {
-                OverwriteByExpression.byName(relation, df.logicalPlan, Literal(true))
+                CreateTableAsSelect(
+                  catalog,
+                  ident,
+                  partitioningAsV2,
+                  df.queryExecution.analyzed,
+                  Map(TableCatalog.PROP_PROVIDER -> source) ++ location,
+                  extraOptions.toMap,
+                  ignoreIfExists = createMode == SaveMode.Ignore)
+              }
+            case _: TableProvider =>
+              if (getTable.supports(BATCH_WRITE)) {
+                throw new AnalysisException(s"TableProvider implementation $source cannot be " +
+                    s"written with $createMode mode, please use Append or Overwrite " +
+                    "modes instead.")
+              } else {
+                // Streaming also uses the data source V2 API. So it may be that the data source
+                // implements v2, but has no v2 implementation for batch writes. In that case, we
+                // fallback to saving as though it's a V1 source.
+                saveToV1Source()
               }
-
-            case other =>
-              throw new AnalysisException(s"TableProvider implementation $source cannot be " +
-                s"written with $other mode, please use Append or Overwrite " +
-                "modes instead.")
           }
-
-        // Streaming also uses the data source V2 API. So it may be that the data source implements
-        // v2, but has no v2 implementation for batch writes. In that case, we fall back to saving
-        // as though it's a V1 source.
-        case _ => saveToV1Source()
       }
+
     } else {
       saveToV1Source()
     }
@@ -309,7 +360,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         sparkSession = df.sparkSession,
         className = source,
         partitionColumns = partitioningColumns.getOrElse(Nil),
-        options = extraOptions.toMap).planForWriting(modeForDSV1, df.logicalPlan)
+        options = extraOptions.toMap).planForWriting(mode, df.logicalPlan)
     }
   }
 
@@ -320,6 +371,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * @note Unlike `saveAsTable`, `insertInto` ignores the column names and just uses position-based
    * resolution. For example:
    *
+   * @note SaveMode.ErrorIfExists and SaveMode.Ignore behave as SaveMode.Append in `insertInto` as
+   *       `insertInto` is not a table creating operation.
+   *
    * {{{
    *    scala> Seq((1, 2)).toDF("i", "j").write.mode("overwrite").saveAsTable("t1")
    *    scala> Seq((3, 4)).toDF("j", "i").write.insertInto("t1")
@@ -339,8 +393,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * @since 1.4.0
    */
   def insertInto(tableName: String): Unit = {
-    import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, CatalogObjectIdentifier}
-    import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+    import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, NonSessionCatalogAndIdentifier, SessionCatalogAndIdentifier}
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    import org.apache.spark.sql.connector.catalog.CatalogV2Util._
 
     assertNotBucketed("insertInto")
 
@@ -354,15 +409,14 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
     val session = df.sparkSession
     val canUseV2 = lookupV2Provider().isDefined
-    val sessionCatalogOpt = session.sessionState.analyzer.sessionCatalog
 
     session.sessionState.sqlParser.parseMultipartIdentifier(tableName) match {
-      case CatalogObjectIdentifier(Some(catalog), ident) =>
+      case NonSessionCatalogAndIdentifier(catalog, ident) =>
         insertInto(catalog, ident)
 
-      case CatalogObjectIdentifier(None, ident)
-          if canUseV2 && sessionCatalogOpt.isDefined && ident.namespace().length <= 1 =>
-        insertInto(sessionCatalogOpt.get, ident)
+      case SessionCatalogAndIdentifier(catalog, ident)
+          if canUseV2 && ident.namespace().length <= 1 =>
+        insertInto(catalog, ident)
 
       case AsTableIdentifier(tableIdentifier) =>
         insertInto(tableIdentifier)
@@ -373,18 +427,18 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   private def insertInto(catalog: CatalogPlugin, ident: Identifier): Unit = {
-    import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     val table = catalog.asTableCatalog.loadTable(ident) match {
       case _: V1Table =>
         return insertInto(TableIdentifier(ident.name(), ident.namespace().headOption))
       case t =>
-        DataSourceV2Relation.create(t)
+        DataSourceV2Relation.create(t, Some(catalog), Some(ident))
     }
 
-    val command = modeForDSV2 match {
-      case SaveMode.Append =>
-        AppendData.byPosition(table, df.logicalPlan)
+    val command = mode match {
+      case SaveMode.Append | SaveMode.ErrorIfExists | SaveMode.Ignore =>
+        AppendData.byPosition(table, df.logicalPlan, extraOptions.toMap)
 
       case SaveMode.Overwrite =>
         val conf = df.sparkSession.sessionState.conf
@@ -392,14 +446,10 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
 
         if (dynamicPartitionOverwrite) {
-          OverwritePartitionsDynamic.byPosition(table, df.logicalPlan)
+          OverwritePartitionsDynamic.byPosition(table, df.logicalPlan, extraOptions.toMap)
         } else {
-          OverwriteByExpression.byPosition(table, df.logicalPlan, Literal(true))
+          OverwriteByExpression.byPosition(table, df.logicalPlan, Literal(true), extraOptions.toMap)
         }
-
-      case other =>
-        throw new AnalysisException(s"insertInto does not support $other mode, " +
-          s"please use Append or Overwrite mode instead.")
     }
 
     runCommand(df.sparkSession, "insertInto") {
@@ -409,11 +459,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
   private def insertInto(tableIdent: TableIdentifier): Unit = {
     runCommand(df.sparkSession, "insertInto") {
-      InsertIntoTable(
+      InsertIntoStatement(
         table = UnresolvedRelation(tableIdent),
-        partition = Map.empty[String, Option[String]],
+        partitionSpec = Map.empty[String, Option[String]],
         query = df.logicalPlan,
-        overwrite = modeForDSV1 == SaveMode.Overwrite,
+        overwrite = mode == SaveMode.Overwrite,
         ifPartitionNotExists = false)
     }
   }
@@ -483,22 +533,19 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * @since 1.4.0
    */
   def saveAsTable(tableName: String): Unit = {
-    import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, CatalogObjectIdentifier}
-    import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+    import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, NonSessionCatalogAndIdentifier, SessionCatalogAndIdentifier}
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     val session = df.sparkSession
     val canUseV2 = lookupV2Provider().isDefined
-    val sessionCatalogOpt = session.sessionState.analyzer.sessionCatalog
 
     session.sessionState.sqlParser.parseMultipartIdentifier(tableName) match {
-      case CatalogObjectIdentifier(Some(catalog), ident) =>
-        saveAsTable(catalog.asTableCatalog, ident, modeForDSV2)
+      case NonSessionCatalogAndIdentifier(catalog, ident) =>
+        saveAsTable(catalog.asTableCatalog, ident)
 
-      case CatalogObjectIdentifier(None, ident)
-          if canUseV2 && sessionCatalogOpt.isDefined && ident.namespace().length <= 1 =>
-        // We pass in the modeForDSV1, as using the V2 session catalog should maintain compatibility
-        // for now.
-        saveAsTable(sessionCatalogOpt.get.asTableCatalog, ident, modeForDSV1)
+      case SessionCatalogAndIdentifier(catalog, ident)
+          if canUseV2 && ident.namespace().length <= 1 =>
+        saveAsTable(catalog.asTableCatalog, ident)
 
       case AsTableIdentifier(tableIdentifier) =>
         saveAsTable(tableIdentifier)
@@ -510,38 +557,32 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
 
-  private def saveAsTable(catalog: TableCatalog, ident: Identifier, mode: SaveMode): Unit = {
-    val partitioning = partitioningColumns.map { colNames =>
-      colNames.map(name => IdentityTransform(FieldReference(name)))
-    }.getOrElse(Seq.empty[Transform])
-    val bucketing = bucketColumnNames.map { cols =>
-      Seq(BucketTransform(LiteralValue(numBuckets.get, IntegerType), cols.map(FieldReference(_))))
-    }.getOrElse(Seq.empty[Transform])
-    val partitionTransforms = partitioning ++ bucketing
-
+  private def saveAsTable(catalog: TableCatalog, ident: Identifier): Unit = {
     val tableOpt = try Option(catalog.loadTable(ident)) catch {
       case _: NoSuchTableException => None
     }
 
     def getLocationIfExists: Option[(String, String)] = {
       val opts = CaseInsensitiveMap(extraOptions.toMap)
-      opts.get("path").map("location" -> _)
+      opts.get("path").map(TableCatalog.PROP_LOCATION -> _)
     }
 
     val command = (mode, tableOpt) match {
-      case (_, Some(table: V1Table)) =>
+      case (_, Some(_: V1Table)) =>
         return saveAsTable(TableIdentifier(ident.name(), ident.namespace().headOption))
 
       case (SaveMode.Append, Some(table)) =>
-        AppendData.byName(DataSourceV2Relation.create(table), df.logicalPlan)
+        checkPartitioningMatchesV2Table(table)
+        val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
+        AppendData.byName(v2Relation, df.logicalPlan, extraOptions.toMap)
 
       case (SaveMode.Overwrite, _) =>
         ReplaceTableAsSelect(
           catalog,
           ident,
-          partitionTransforms,
+          partitioningAsV2,
           df.queryExecution.analyzed,
-          Map("provider" -> source) ++ getLocationIfExists,
+          Map(TableCatalog.PROP_PROVIDER -> source) ++ getLocationIfExists,
           extraOptions.toMap,
           orCreate = true)      // Create the table if it doesn't exist
 
@@ -552,9 +593,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         CreateTableAsSelect(
           catalog,
           ident,
-          partitionTransforms,
+          partitioningAsV2,
           df.queryExecution.analyzed,
-          Map("provider" -> source) ++ getLocationIfExists,
+          Map(TableCatalog.PROP_PROVIDER -> source) ++ getLocationIfExists,
           extraOptions.toMap,
           ignoreIfExists = other == SaveMode.Ignore)
     }
@@ -571,7 +612,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     val tableIdentWithDB = tableIdent.copy(database = Some(db))
     val tableName = tableIdentWithDB.unquotedString
 
-    (tableExists, modeForDSV1) match {
+    (tableExists, mode) match {
       case (true, SaveMode.Ignore) =>
         // Do nothing
 
@@ -627,7 +668,30 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       bucketSpec = getBucketSpec)
 
     runCommand(df.sparkSession, "saveAsTable")(
-      CreateTable(tableDesc, modeForDSV1, Some(df.logicalPlan)))
+      CreateTable(tableDesc, mode, Some(df.logicalPlan)))
+  }
+
+  /** Converts the provided partitioning and bucketing information to DataSourceV2 Transforms. */
+  private def partitioningAsV2: Seq[Transform] = {
+    val partitioning = partitioningColumns.map { colNames =>
+      colNames.map(name => IdentityTransform(FieldReference(name)))
+    }.getOrElse(Seq.empty[Transform])
+    val bucketing =
+      getBucketSpec.map(spec => CatalogV2Implicits.BucketSpecHelper(spec).asTransform).toSeq
+    partitioning ++ bucketing
+  }
+
+  /**
+   * For V2 DataSources, performs if the provided partitioning matches that of the table.
+   * Partitioning information is not required when appending data to V2 tables.
+   */
+  private def checkPartitioningMatchesV2Table(existingTable: Table): Unit = {
+    val v2Partitions = partitioningAsV2
+    if (v2Partitions.isEmpty) return
+    require(v2Partitions.sameElements(existingTable.partitioning()),
+      "The provided partitioning does not match of the table.\n" +
+      s" - provided: ${v2Partitions.mkString(", ")}\n" +
+      s" - table: ${existingTable.partitioning().mkString(", ")}")
   }
 
   /**
@@ -693,6 +757,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * <li>`encoding` (by default it is not set): specifies encoding (charset) of saved json
    * files. If it is not set, the UTF-8 charset will be used. </li>
    * <li>`lineSep` (default `\n`): defines the line separator that should be used for writing.</li>
+   * <li>`ignoreNullFields` (default `true`): Whether to ignore null fields
+   * when generating JSON objects. </li>
    * </ul>
    *
    * @since 1.4.0
@@ -830,13 +896,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   private def runCommand(session: SparkSession, name: String)(command: LogicalPlan): Unit = {
     val qe = session.sessionState.executePlan(command)
     // call `QueryExecution.toRDD` to trigger the execution of commands.
-    SQLExecution.withNewExecutionId(session, qe, Some(name))(qe.toRdd)
+    SQLExecution.withNewExecutionId(qe, Some(name))(qe.toRdd)
   }
 
-  private def modeForDSV1 = mode.getOrElse(SaveMode.ErrorIfExists)
-
-  private def modeForDSV2 = mode.getOrElse(SaveMode.Append)
-
   private def lookupV2Provider(): Option[TableProvider] = {
     DataSource.lookupDataSourceV2(source, df.sparkSession.sessionState.conf) match {
       // TODO(SPARK-28396): File source v2 write path is currently broken.
@@ -851,7 +913,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
   private var source: String = df.sparkSession.sessionState.conf.defaultDataSourceName
 
-  private var mode: Option[SaveMode] = None
+  private var mode: SaveMode = SaveMode.ErrorIfExists
 
   private val extraOptions = new scala.collection.mutable.HashMap[String, String]
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
new file mode 100644
index 0000000000000..cf6bde5a2bcb9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -0,0 +1,370 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Bucket, Days, Hours, Literal, Months, Years}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect}
+import org.apache.spark.sql.connector.catalog.TableCatalog
+import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference, Transform}
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * Interface used to write a [[org.apache.spark.sql.Dataset]] to external storage using the v2 API.
+ *
+ * @since 3.0.0
+ */
+@Experimental
+final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
+    extends CreateTableWriter[T] {
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Util._
+  import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier
+
+  private val df: DataFrame = ds.toDF()
+
+  private val sparkSession = ds.sparkSession
+
+  private val catalogManager = sparkSession.sessionState.analyzer.catalogManager
+
+  private val tableName = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(table)
+
+  private val (catalog, identifier) = {
+    val CatalogAndIdentifier(catalog, identifier) = tableName
+    (catalog.asTableCatalog, identifier)
+  }
+
+  private val logicalPlan = df.queryExecution.logical
+
+  private var provider: Option[String] = None
+
+  private val options = new mutable.HashMap[String, String]()
+
+  private val properties = new mutable.HashMap[String, String]()
+
+  private var partitioning: Option[Seq[Transform]] = None
+
+  override def using(provider: String): CreateTableWriter[T] = {
+    this.provider = Some(provider)
+    this
+  }
+
+  override def option(key: String, value: String): DataFrameWriterV2[T] = {
+    this.options.put(key, value)
+    this
+  }
+
+  override def options(options: scala.collection.Map[String, String]): DataFrameWriterV2[T] = {
+    options.foreach {
+      case (key, value) =>
+        this.options.put(key, value)
+    }
+    this
+  }
+
+  override def options(options: java.util.Map[String, String]): DataFrameWriterV2[T] = {
+    this.options(options.asScala)
+    this
+  }
+
+  override def tableProperty(property: String, value: String): CreateTableWriter[T] = {
+    this.properties.put(property, value)
+    this
+  }
+
+  @scala.annotation.varargs
+  override def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T] = {
+    def ref(name: String): NamedReference = LogicalExpressions.parseReference(name)
+
+    val asTransforms = (column +: columns).map(_.expr).map {
+      case Years(attr: Attribute) =>
+        LogicalExpressions.years(ref(attr.name))
+      case Months(attr: Attribute) =>
+        LogicalExpressions.months(ref(attr.name))
+      case Days(attr: Attribute) =>
+        LogicalExpressions.days(ref(attr.name))
+      case Hours(attr: Attribute) =>
+        LogicalExpressions.hours(ref(attr.name))
+      case Bucket(Literal(numBuckets: Int, IntegerType), attr: Attribute) =>
+        LogicalExpressions.bucket(numBuckets, Array(ref(attr.name)))
+      case attr: Attribute =>
+        LogicalExpressions.identity(ref(attr.name))
+      case expr =>
+        throw new AnalysisException(s"Invalid partition transformation: ${expr.sql}")
+    }
+
+    this.partitioning = Some(asTransforms)
+    this
+  }
+
+  override def create(): Unit = {
+    // create and replace could alternatively create ParsedPlan statements, like
+    // `CreateTableFromDataFrameStatement(UnresolvedRelation(tableName), ...)`, to keep the catalog
+    // resolution logic in the analyzer.
+    runCommand("create") {
+      CreateTableAsSelect(
+        catalog,
+        identifier,
+        partitioning.getOrElse(Seq.empty),
+        logicalPlan,
+        properties = provider.map(p => properties + (TableCatalog.PROP_PROVIDER -> p))
+          .getOrElse(properties).toMap,
+        writeOptions = options.toMap,
+        ignoreIfExists = false)
+    }
+  }
+
+  override def replace(): Unit = {
+    internalReplace(orCreate = false)
+  }
+
+  override def createOrReplace(): Unit = {
+    internalReplace(orCreate = true)
+  }
+
+
+  /**
+   * Append the contents of the data frame to the output table.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
+   * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException If the table does not exist
+   */
+  @throws(classOf[NoSuchTableException])
+  def append(): Unit = {
+    val append = loadTable(catalog, identifier) match {
+      case Some(t) =>
+        AppendData.byName(
+          DataSourceV2Relation.create(t, Some(catalog), Some(identifier)),
+          logicalPlan, options.toMap)
+      case _ =>
+        throw new NoSuchTableException(identifier)
+    }
+
+    runCommand("append")(append)
+  }
+
+  /**
+   * Overwrite rows matching the given filter condition with the contents of the data frame in
+   * the output table.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]].
+   * The data frame will be validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException If the table does not exist
+   */
+  @throws(classOf[NoSuchTableException])
+  def overwrite(condition: Column): Unit = {
+    val overwrite = loadTable(catalog, identifier) match {
+      case Some(t) =>
+        OverwriteByExpression.byName(
+          DataSourceV2Relation.create(t, Some(catalog), Some(identifier)),
+          logicalPlan, condition.expr, options.toMap)
+      case _ =>
+        throw new NoSuchTableException(identifier)
+    }
+
+    runCommand("overwrite")(overwrite)
+  }
+
+  /**
+   * Overwrite all partition for which the data frame contains at least one row with the contents
+   * of the data frame in the output table.
+   *
+   * This operation is equivalent to Hive's `INSERT OVERWRITE ... PARTITION`, which replaces
+   * partitions dynamically depending on the contents of the data frame.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
+   * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException If the table does not exist
+   */
+  @throws(classOf[NoSuchTableException])
+  def overwritePartitions(): Unit = {
+    val dynamicOverwrite = loadTable(catalog, identifier) match {
+      case Some(t) =>
+        OverwritePartitionsDynamic.byName(
+          DataSourceV2Relation.create(t, Some(catalog), Some(identifier)),
+          logicalPlan, options.toMap)
+      case _ =>
+        throw new NoSuchTableException(identifier)
+    }
+
+    runCommand("overwritePartitions")(dynamicOverwrite)
+  }
+
+  /**
+   * Wrap an action to track the QueryExecution and time cost, then report to the user-registered
+   * callback functions.
+   */
+  private def runCommand(name: String)(command: LogicalPlan): Unit = {
+    val qe = sparkSession.sessionState.executePlan(command)
+    // call `QueryExecution.toRDD` to trigger the execution of commands.
+    SQLExecution.withNewExecutionId(qe, Some(name))(qe.toRdd)
+  }
+
+  private def internalReplace(orCreate: Boolean): Unit = {
+    runCommand("replace") {
+      ReplaceTableAsSelect(
+        catalog,
+        identifier,
+        partitioning.getOrElse(Seq.empty),
+        logicalPlan,
+        properties = provider.map(p => properties + ("provider" -> p)).getOrElse(properties).toMap,
+        writeOptions = options.toMap,
+        orCreate = orCreate)
+    }
+  }
+}
+
+/**
+ * Configuration methods common to create/replace operations and insert/overwrite operations.
+ * @tparam R builder type to return
+ */
+trait WriteConfigMethods[R] {
+  /**
+   * Add a write option.
+   *
+   * @since 3.0.0
+   */
+  def option(key: String, value: String): R
+
+  /**
+   * Add a boolean output option.
+   *
+   * @since 3.0.0
+   */
+  def option(key: String, value: Boolean): R = option(key, value.toString)
+
+  /**
+   * Add a long output option.
+   *
+   * @since 3.0.0
+   */
+  def option(key: String, value: Long): R = option(key, value.toString)
+
+  /**
+   * Add a double output option.
+   *
+   * @since 3.0.0
+   */
+  def option(key: String, value: Double): R = option(key, value.toString)
+
+  /**
+   * Add write options from a Scala Map.
+   *
+   * @since 3.0.0
+   */
+  def options(options: scala.collection.Map[String, String]): R
+
+  /**
+   * Add write options from a Java Map.
+   *
+   * @since 3.0.0
+   */
+  def options(options: java.util.Map[String, String]): R
+}
+
+/**
+ * Trait to restrict calls to create and replace operations.
+ */
+trait CreateTableWriter[T] extends WriteConfigMethods[CreateTableWriter[T]] {
+  /**
+   * Create a new table from the contents of the data frame.
+   *
+   * The new table's schema, partition layout, properties, and other configuration will be
+   * based on the configuration set on this writer.
+   *
+   * If the output table exists, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException]].
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+   *         If the table already exists
+   */
+  @throws(classOf[TableAlreadyExistsException])
+  def create(): Unit
+
+  /**
+   * Replace an existing table with the contents of the data frame.
+   *
+   * The existing table's schema, partition layout, properties, and other configuration will be
+   * replaced with the contents of the data frame and the configuration set on this writer.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException]].
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException
+   *         If the table already exists
+   */
+  @throws(classOf[CannotReplaceMissingTableException])
+  def replace(): Unit
+
+  /**
+   * Create a new table or replace an existing table with the contents of the data frame.
+   *
+   * The output table's schema, partition layout, properties, and other configuration will be based
+   * on the contents of the data frame and the configuration set on this writer. If the table
+   * exists, its configuration and data will be replaced.
+   */
+  def createOrReplace(): Unit
+
+  /**
+   * Partition the output table created by `create`, `createOrReplace`, or `replace` using
+   * the given columns or transforms.
+   *
+   * When specified, the table data will be stored by these values for efficient reads.
+   *
+   * For example, when a table is partitioned by day, it may be stored in a directory layout like:
+   * <ul>
+   * <li>`table/day=2019-06-01/`</li>
+   * <li>`table/day=2019-06-02/`</li>
+   * </ul>
+   *
+   * Partitioning is one of the most widely used techniques to optimize physical data layout.
+   * It provides a coarse-grained index for skipping unnecessary data reads when queries have
+   * predicates on the partitioned columns. In order for partitioning to work well, the number
+   * of distinct values in each column should typically be less than tens of thousands.
+   *
+   * @since 3.0.0
+   */
+  def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T]
+
+  /**
+   * Specifies a provider for the underlying output data source. Spark's default catalog supports
+   * "parquet", "json", etc.
+   *
+   * @since 3.0.0
+   */
+  def using(provider: String): CreateTableWriter[T]
+
+  /**
+   * Add a table property.
+   */
+  def tableProperty(property: String, value: String): CreateTableWriter[T]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 88fe7a3f380ab..42f35354e864f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -46,11 +46,12 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, PartitioningCollection}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.arrow.{ArrowBatchStreamWriter, ArrowConverters}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, FileTable}
 import org.apache.spark.sql.execution.python.EvaluatePython
 import org.apache.spark.sql.execution.stat.StatFunctions
 import org.apache.spark.sql.internal.SQLConf
@@ -59,7 +60,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.unsafe.array.ByteArrayMethods
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 import org.apache.spark.util.Utils
 
 private[sql] object Dataset {
@@ -81,18 +82,19 @@ private[sql] object Dataset {
     dataset
   }
 
-  def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame = {
-    val qe = sparkSession.sessionState.executePlan(logicalPlan)
-    qe.assertAnalyzed()
-    new Dataset[Row](sparkSession, qe, RowEncoder(qe.analyzed.schema))
+  def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame =
+    sparkSession.withActive {
+      val qe = sparkSession.sessionState.executePlan(logicalPlan)
+      qe.assertAnalyzed()
+      new Dataset[Row](qe, RowEncoder(qe.analyzed.schema))
   }
 
   /** A variant of ofRows that allows passing in a tracker so we can track query parsing time. */
   def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan, tracker: QueryPlanningTracker)
-    : DataFrame = {
+    : DataFrame = sparkSession.withActive {
     val qe = new QueryExecution(sparkSession, logicalPlan, tracker)
     qe.assertAnalyzed()
-    new Dataset[Row](sparkSession, qe, RowEncoder(qe.analyzed.schema))
+    new Dataset[Row](qe, RowEncoder(qe.analyzed.schema))
   }
 }
 
@@ -184,13 +186,12 @@ private[sql] object Dataset {
  */
 @Stable
 class Dataset[T] private[sql](
-    @transient private val _sparkSession: SparkSession,
     @DeveloperApi @Unstable @transient val queryExecution: QueryExecution,
     @DeveloperApi @Unstable @transient val encoder: Encoder[T])
   extends Serializable {
 
   @transient lazy val sparkSession: SparkSession = {
-    if (_sparkSession == null) {
+    if (queryExecution == null || queryExecution.sparkSession == null) {
       throw new SparkException(
       "Dataset transformations and actions can only be invoked by the driver, not inside of" +
         " other Dataset transformations; for example, dataset1.map(x => dataset2.values.count()" +
@@ -198,7 +199,7 @@ class Dataset[T] private[sql](
         "performed inside of the dataset1.map transformation. For more information," +
         " see SPARK-28702.")
     }
-    _sparkSession
+    queryExecution.sparkSession
   }
 
   // A globally unique id of this Dataset.
@@ -210,7 +211,7 @@ class Dataset[T] private[sql](
   // you wrap it with `withNewExecutionId` if this actions doesn't call other action.
 
   def this(sparkSession: SparkSession, logicalPlan: LogicalPlan, encoder: Encoder[T]) = {
-    this(sparkSession, sparkSession.sessionState.executePlan(logicalPlan), encoder)
+    this(sparkSession.sessionState.executePlan(logicalPlan), encoder)
   }
 
   def this(sqlContext: SQLContext, logicalPlan: LogicalPlan, encoder: Encoder[T]) = {
@@ -228,7 +229,7 @@ class Dataset[T] private[sql](
       case _ =>
         queryExecution.analyzed
     }
-    if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN)) {
+    if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) {
       plan.setTagValue(Dataset.DATASET_ID_TAG, id)
     }
     plan
@@ -254,10 +255,16 @@ class Dataset[T] private[sql](
   @transient lazy val sqlContext: SQLContext = sparkSession.sqlContext
 
   private[sql] def resolve(colName: String): NamedExpression = {
-    queryExecution.analyzed.resolveQuoted(colName, sparkSession.sessionState.analyzer.resolver)
+    val resolver = sparkSession.sessionState.analyzer.resolver
+    queryExecution.analyzed.resolveQuoted(colName, resolver)
       .getOrElse {
-        throw new AnalysisException(
-          s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""")
+        val fields = schema.fieldNames
+        val extraMsg = if (fields.exists(resolver(_, colName))) {
+          s"; did you mean to quote the `$colName` column?"
+        } else ""
+        val fieldsStr = fields.mkString(", ")
+        val errorMsg = s"""Cannot resolve column name "$colName" among (${fieldsStr})${extraMsg}"""
+        throw new AnalysisException(errorMsg)
       }
   }
 
@@ -438,17 +445,19 @@ class Dataset[T] private[sql](
    */
   // This is declared with parentheses to prevent the Scala compiler from treating
   // `ds.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
-  def toDF(): DataFrame = new Dataset[Row](sparkSession, queryExecution, RowEncoder(schema))
+  def toDF(): DataFrame = new Dataset[Row](queryExecution, RowEncoder(schema))
 
   /**
    * Returns a new Dataset where each record has been mapped on to the specified type. The
    * method used to map columns depend on the type of `U`:
-   *  - When `U` is a class, fields for the class will be mapped to columns of the same name
-   *    (case sensitivity is determined by `spark.sql.caseSensitive`).
-   *  - When `U` is a tuple, the columns will be mapped by ordinal (i.e. the first column will
-   *    be assigned to `_1`).
-   *  - When `U` is a primitive type (i.e. String, Int, etc), then the first column of the
-   *    `DataFrame` will be used.
+   * <ul>
+   *   <li>When `U` is a class, fields for the class will be mapped to columns of the same name
+   *   (case sensitivity is determined by `spark.sql.caseSensitive`).</li>
+   *   <li>When `U` is a tuple, the columns will be mapped by ordinal (i.e. the first column will
+   *   be assigned to `_1`).</li>
+   *   <li>When `U` is a primitive type (i.e. String, Int, etc), then the first column of the
+   *   `DataFrame` will be used.</li>
+   * </ul>
    *
    * If the schema of the Dataset does not match the desired `U` type, you can use `select`
    * along with `alias` or `as` to rearrange or rename as required.
@@ -494,7 +503,9 @@ class Dataset[T] private[sql](
    * @group basic
    * @since 1.6.0
    */
-  def schema: StructType = queryExecution.analyzed.schema
+  def schema: StructType = sparkSession.withActive {
+    queryExecution.analyzed.schema
+  }
 
   /**
    * Prints the schema to the console in a nice tree format.
@@ -515,36 +526,53 @@ class Dataset[T] private[sql](
   // scalastyle:on println
 
   /**
-   * Prints the plans (logical and physical) to the console for debugging purposes.
+   * Prints the plans (logical and physical) with a format specified by a given explain mode.
    *
+   * @param mode specifies the expected output format of plans.
+   *             <ul>
+   *               <li>`simple` Print only a physical plan.</li>
+   *               <li>`extended`: Print both logical and physical plans.</li>
+   *               <li>`codegen`: Print a physical plan and generated codes if they are
+   *                 available.</li>
+   *               <li>`cost`: Print a logical plan and statistics if they are available.</li>
+   *               <li>`formatted`: Split explain output into two sections: a physical plan outline
+   *                 and node details.</li>
+   *             </ul>
    * @group basic
-   * @since 1.6.0
+   * @since 3.0.0
    */
-  def explain(extended: Boolean): Unit = {
+  def explain(mode: String): Unit = sparkSession.withActive {
     // Because temporary views are resolved during analysis when we create a Dataset, and
     // `ExplainCommand` analyzes input query plan and resolves temporary views again. Using
     // `ExplainCommand` here will probably output different query plans, compared to the results
     // of evaluation of the Dataset. So just output QueryExecution's query plans here.
-    val qe = ExplainCommandUtil.explainedQueryExecution(sparkSession, logicalPlan, queryExecution)
 
-    val outputString =
-      if (extended) {
-        qe.toString
-      } else {
-        qe.simpleString
-      }
     // scalastyle:off println
-    println(outputString)
+    println(queryExecution.explainString(ExplainMode.fromString(mode)))
     // scalastyle:on println
   }
 
+  /**
+   * Prints the plans (logical and physical) to the console for debugging purposes.
+   *
+   * @param extended default `false`. If `false`, prints only the physical plan.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def explain(extended: Boolean): Unit = if (extended) {
+    explain(ExtendedMode.name)
+  } else {
+    explain(SimpleMode.name)
+  }
+
   /**
    * Prints the physical plan to the console for debugging purposes.
    *
    * @group basic
    * @since 1.6.0
    */
-  def explain(): Unit = explain(extended = false)
+  def explain(): Unit = explain(SimpleMode.name)
 
   /**
    * Returns all column names and their data types as an array.
@@ -579,8 +607,8 @@ class Dataset[T] private[sql](
    * @group basic
    * @since 2.4.0
    */
-  def isEmpty: Boolean = withAction("isEmpty", limit(1).groupBy().count().queryExecution) { plan =>
-    plan.executeCollect().head.getLong(0) == 0
+  def isEmpty: Boolean = withAction("isEmpty", select().queryExecution) { plan =>
+    plan.executeTake(1).isEmpty
   }
 
   /**
@@ -694,11 +722,12 @@ class Dataset[T] private[sql](
    * before which we assume no more late data is going to arrive.
    *
    * Spark will use this watermark for several purposes:
-   *  - To know when a given time window aggregation can be finalized and thus can be emitted when
-   *    using output modes that do not allow updates.
-   *  - To minimize the amount of state that we need to keep for on-going aggregations,
-   *    `mapGroupsWithState` and `dropDuplicates` operators.
-   *
+   * <ul>
+   *   <li>To know when a given time window aggregation can be finalized and thus can be emitted
+   *   when using output modes that do not allow updates.</li>
+   *   <li>To minimize the amount of state that we need to keep for on-going aggregations,
+   *    `mapGroupsWithState` and `dropDuplicates` operators.</li>
+   * </ul>
    *  The current watermark is computed by looking at the `MAX(eventTime)` seen across
    *  all of the partitions in the query minus a user specified `delayThreshold`.  Due to the cost
    *  of coordinating this value across partitions, the actual watermark used is only guaranteed
@@ -718,14 +747,14 @@ class Dataset[T] private[sql](
   def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = withTypedPlan {
     val parsedDelay =
       try {
-        CalendarInterval.fromCaseInsensitiveString(delayThreshold)
+        IntervalUtils.stringToInterval(UTF8String.fromString(delayThreshold))
       } catch {
         case e: IllegalArgumentException =>
           throw new AnalysisException(
             s"Unable to parse time delay '$delayThreshold'",
             cause = Some(e))
       }
-    require(parsedDelay.milliseconds >= 0 && parsedDelay.months >= 0,
+    require(!IntervalUtils.isNegative(parsedDelay),
       s"delay threshold ($delayThreshold) should not be negative.")
     EliminateEventTimeWatermark(
       EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, logicalPlan))
@@ -1330,7 +1359,7 @@ class Dataset[T] private[sql](
   private def addDataFrameIdToCol(expr: NamedExpression): NamedExpression = {
     val newExpr = expr transform {
       case a: AttributeReference
-        if sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN) =>
+        if sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) =>
         val metadata = new MetadataBuilder()
           .withMetadata(a.metadata)
           .putLong(Dataset.DATASET_ID_KEY, id)
@@ -1475,7 +1504,7 @@ class Dataset[T] private[sql](
     val namedColumns =
       columns.map(_.withInputType(exprEnc, logicalPlan.output).named)
     val execution = new QueryExecution(sparkSession, Project(namedColumns, logicalPlan))
-    new Dataset(sparkSession, execution, ExpressionEncoder.tuple(encoders))
+    new Dataset(execution, ExpressionEncoder.tuple(encoders))
   }
 
   /**
@@ -1841,6 +1870,57 @@ class Dataset[T] private[sql](
   @scala.annotation.varargs
   def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs : _*)
 
+ /**
+  * Define (named) metrics to observe on the Dataset. This method returns an 'observed' Dataset
+  * that returns the same result as the input, with the following guarantees:
+  * <ul>
+  *   <li>It will compute the defined aggregates (metrics) on all the data that is flowing through
+  *   the Dataset at that point.</li>
+  *   <li>It will report the value of the defined aggregate columns as soon as we reach a completion
+  *   point. A completion point is either the end of a query (batch mode) or the end of a streaming
+  *   epoch. The value of the aggregates only reflects the data processed since the previous
+  *   completion point.</li>
+  * </ul>
+  * Please note that continuous execution is currently not supported.
+  *
+  * The metrics columns must either contain a literal (e.g. lit(42)), or should contain one or
+  * more aggregate functions (e.g. sum(a) or sum(a + b) + avg(c) - lit(1)). Expressions that
+  * contain references to the input Dataset's columns must always be wrapped in an aggregate
+  * function.
+  *
+  * A user can observe these metrics by either adding
+  * [[org.apache.spark.sql.streaming.StreamingQueryListener]] or a
+  * [[org.apache.spark.sql.util.QueryExecutionListener]] to the spark session.
+  *
+  * {{{
+  *   // Monitor the metrics using a listener.
+  *   spark.streams.addListener(new StreamingQueryListener() {
+  *     override def onQueryProgress(event: QueryProgressEvent): Unit = {
+  *       event.progress.observedMetrics.asScala.get("my_event").foreach { row =>
+  *         // Trigger if the number of errors exceeds 5 percent
+  *         val num_rows = row.getAs[Long]("rc")
+  *         val num_error_rows = row.getAs[Long]("erc")
+  *         val ratio = num_error_rows.toDouble / num_rows
+  *         if (ratio > 0.05) {
+  *           // Trigger alert
+  *         }
+  *       }
+  *     }
+  *     def onQueryStarted(event: QueryStartedEvent): Unit = {}
+  *     def onQueryTerminated(event: QueryTerminatedEvent): Unit = {}
+  *   })
+  *   // Observe row count (rc) and error row count (erc) in the streaming Dataset
+  *   val observed_ds = ds.observe("my_event", count(lit(1)).as("rc"), count($"error").as("erc"))
+  *   observed_ds.writeStream.format("...").start()
+  * }}}
+  *
+  * @group typedrel
+  * @since 3.0.0
+  */
+  def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = withTypedPlan {
+    CollectMetrics(name, (expr +: exprs).map(_.named), logicalPlan)
+  }
+
   /**
    * Returns a new Dataset by taking the first `n` rows. The difference between this function
    * and `head` is that `head` is an action and returns an array (by triggering query execution)
@@ -2439,13 +2519,14 @@ class Dataset[T] private[sql](
 
   /**
    * Computes specified statistics for numeric and string columns. Available statistics are:
-   *
-   * - count
-   * - mean
-   * - stddev
-   * - min
-   * - max
-   * - arbitrary approximate percentiles specified as a percentage (eg, 75%)
+   * <ul>
+   *   <li>count</li>
+   *   <li>mean</li>
+   *   <li>stddev</li>
+   *   <li>min</li>
+   *   <li>max</li>
+   *   <li>arbitrary approximate percentiles specified as a percentage (e.g. 75%)</li>
+   * </ul>
    *
    * If no statistics are given, this function computes count, mean, stddev, min,
    * approximate quartiles (percentiles at 25%, 50%, and 75%), and max.
@@ -2715,6 +2796,18 @@ class Dataset[T] private[sql](
    */
   def take(n: Int): Array[T] = head(n)
 
+  /**
+   * Returns the last `n` rows in the Dataset.
+   *
+   * Running tail requires moving data into the application's driver process, and doing so with
+   * a very large `n` can crash the driver process with OutOfMemoryError.
+   *
+   * @group action
+   * @since 3.0.0
+   */
+  def tail(n: Int): Array[T] = withAction(
+    "tail", withTypedPlan(Tail(Literal(n), logicalPlan)).queryExecution)(collectFromPlan)
+
   /**
    * Returns the first `n` rows in the Dataset as a list.
    *
@@ -3120,6 +3213,34 @@ class Dataset[T] private[sql](
     new DataFrameWriter[T](this)
   }
 
+  /**
+   * Create a write configuration builder for v2 sources.
+   *
+   * This builder is used to configure and execute write operations. For example, to append to an
+   * existing table, run:
+   *
+   * {{{
+   *   df.writeTo("catalog.db.table").append()
+   * }}}
+   *
+   * This can also be used to create or replace existing tables:
+   *
+   * {{{
+   *   df.writeTo("catalog.db.table").partitionedBy($"col").createOrReplace()
+   * }}}
+   *
+   * @group basic
+   * @since 3.0.0
+   */
+  def writeTo(table: String): DataFrameWriterV2[T] = {
+    // TODO: streaming could be adapted to use this interface
+    if (isStreaming) {
+      logicalPlan.failAnalysis(
+        "'writeTo' can not be called on streaming Dataset/DataFrame")
+    }
+    new DataFrameWriterV2[T](table, this)
+  }
+
   /**
    * Interface for saving the content of the streaming Dataset out into external storage.
    *
@@ -3183,7 +3304,7 @@ class Dataset[T] private[sql](
         fr.inputFiles
       case r: HiveTableRelation =>
         r.tableMeta.storage.locationUri.map(_.toString).toArray
-      case DataSourceV2Relation(table: FileTable, _, _) =>
+      case DataSourceV2ScanRelation(table: FileTable, _, _) =>
         table.fileIndex.inputFiles
     }.flatten
     files.toSet.toArray
@@ -3212,6 +3333,16 @@ class Dataset[T] private[sql](
     }
   }
 
+  private[sql] def tailToPython(n: Int): Array[Any] = {
+    EvaluatePython.registerPicklers()
+    withAction("tailToPython", queryExecution) { plan =>
+      val toJava: (Any) => Any = EvaluatePython.toJava(_, schema)
+      val iter: Iterator[Array[Byte]] = new SerDeUtil.AutoBatchedPickler(
+        plan.executeTail(n).iterator.map(toJava))
+      PythonRDD.serveIterator(iter, "serve-DataFrame")
+    }
+  }
+
   private[sql] def getRowsToPython(
       _numRows: Int,
       truncate: Int): Array[Any] = {
@@ -3328,9 +3459,9 @@ class Dataset[T] private[sql](
     }
   }
 
-  private[sql] def toPythonIterator(): Array[Any] = {
+  private[sql] def toPythonIterator(prefetchPartitions: Boolean = false): Array[Any] = {
     withNewExecutionId {
-      PythonRDD.toLocalIteratorAndServe(javaToPython.rdd)
+      PythonRDD.toLocalIteratorAndServe(javaToPython.rdd, prefetchPartitions)
     }
   }
 
@@ -3343,7 +3474,7 @@ class Dataset[T] private[sql](
    * an execution.
    */
   private def withNewExecutionId[U](body: => U): U = {
-    SQLExecution.withNewExecutionId(sparkSession, queryExecution)(body)
+    SQLExecution.withNewExecutionId(queryExecution)(body)
   }
 
   /**
@@ -3352,10 +3483,8 @@ class Dataset[T] private[sql](
    * reset.
    */
   private def withNewRDDExecutionId[U](body: => U): U = {
-    SQLExecution.withNewExecutionId(sparkSession, rddQueryExecution) {
-      rddQueryExecution.executedPlan.foreach { plan =>
-        plan.resetMetrics()
-      }
+    SQLExecution.withNewExecutionId(rddQueryExecution) {
+      rddQueryExecution.executedPlan.resetMetrics()
       body
     }
   }
@@ -3365,10 +3494,8 @@ class Dataset[T] private[sql](
    * user-registered callback functions.
    */
   private def withAction[U](name: String, qe: QueryExecution)(action: SparkPlan => U) = {
-    SQLExecution.withNewExecutionId(sparkSession, qe, Some(name)) {
-      qe.executedPlan.foreach { plan =>
-        plan.resetMetrics()
-      }
+    SQLExecution.withNewExecutionId(qe, Some(name)) {
+      qe.executedPlan.resetMetrics()
       action(qe.executedPlan)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 89cc9735e4f6a..76ee297dfca79 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -449,10 +449,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
     val aggregate = Aggregate(groupingAttributes, keyColumn +: namedColumns, logicalPlan)
     val execution = new QueryExecution(sparkSession, aggregate)
 
-    new Dataset(
-      sparkSession,
-      execution,
-      ExpressionEncoder.tuple(kExprEnc +: encoders))
+    new Dataset(execution, ExpressionEncoder.tuple(kExprEnc +: encoders))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index e85636d82a62c..b1ba7d4538732 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -26,6 +26,7 @@ import org.apache.spark.annotation.Stable
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction}
+import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -47,8 +48,8 @@ import org.apache.spark.sql.types.{NumericType, StructType}
  */
 @Stable
 class RelationalGroupedDataset protected[sql](
-    df: DataFrame,
-    groupingExprs: Seq[Expression],
+    private[sql] val df: DataFrame,
+    private[sql] val groupingExprs: Seq[Expression],
     groupType: RelationalGroupedDataset.GroupType) {
 
   private[this] def toDF(aggExprs: Seq[Expression]): DataFrame = {
@@ -129,6 +130,37 @@ class RelationalGroupedDataset protected[sql](
     (inputExpr: Expression) => exprToFunc(inputExpr)
   }
 
+  /**
+   * Returns a `KeyValueGroupedDataset` where the data is grouped by the grouping expressions
+   * of current `RelationalGroupedDataset`.
+   *
+   * @since 3.0.0
+   */
+  def as[K: Encoder, T: Encoder]: KeyValueGroupedDataset[K, T] = {
+    val keyEncoder = encoderFor[K]
+    val valueEncoder = encoderFor[T]
+
+    // Resolves grouping expressions.
+    val dummyPlan = Project(groupingExprs.map(alias), LocalRelation(df.logicalPlan.output))
+    val analyzedPlan = df.sparkSession.sessionState.analyzer.execute(dummyPlan)
+      .asInstanceOf[Project]
+    df.sparkSession.sessionState.analyzer.checkAnalysis(analyzedPlan)
+    val aliasedGroupings = analyzedPlan.projectList
+
+    // Adds the grouping expressions that are not in base DataFrame into outputs.
+    val addedCols = aliasedGroupings.filter(g => !df.logicalPlan.outputSet.contains(g.toAttribute))
+    val qe = Dataset.ofRows(
+      df.sparkSession,
+      Project(df.logicalPlan.output ++ addedCols, df.logicalPlan)).queryExecution
+
+    new KeyValueGroupedDataset(
+      keyEncoder,
+      valueEncoder,
+      qe,
+      df.logicalPlan.output,
+      aliasedGroupings.map(_.toAttribute))
+  }
+
   /**
    * (Scala-specific) Compute aggregates by specifying the column names and
    * aggregate methods. The resulting `DataFrame` will also contain the grouping columns.
@@ -523,6 +555,48 @@ class RelationalGroupedDataset protected[sql](
     Dataset.ofRows(df.sparkSession, plan)
   }
 
+  /**
+   * Applies a vectorized python user-defined function to each cogrouped data.
+   * The user-defined function defines a transformation:
+   * `pandas.DataFrame`, `pandas.DataFrame` -> `pandas.DataFrame`.
+   *  For each group in the cogrouped data, all elements in the group are passed as a
+   * `pandas.DataFrame` and the results for all cogroups are combined into a new [[DataFrame]].
+   *
+   * This function uses Apache Arrow as serialization format between Java executors and Python
+   * workers.
+   */
+  private[sql] def flatMapCoGroupsInPandas(
+      r: RelationalGroupedDataset,
+      expr: PythonUDF): DataFrame = {
+    require(expr.evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+      "Must pass a cogrouped map udf")
+    require(expr.dataType.isInstanceOf[StructType],
+      s"The returnType of the udf must be a ${StructType.simpleString}")
+
+    val leftGroupingNamedExpressions = groupingExprs.map {
+      case ne: NamedExpression => ne
+      case other => Alias(other, other.toString)()
+    }
+
+    val rightGroupingNamedExpressions = r.groupingExprs.map {
+      case ne: NamedExpression => ne
+      case other => Alias(other, other.toString)()
+    }
+
+    val leftAttributes = leftGroupingNamedExpressions.map(_.toAttribute)
+    val rightAttributes = rightGroupingNamedExpressions.map(_.toAttribute)
+
+    val leftChild = df.logicalPlan
+    val rightChild = r.df.logicalPlan
+
+    val left = Project(leftGroupingNamedExpressions ++ leftChild.output, leftChild)
+    val right = Project(rightGroupingNamedExpressions ++ rightChild.output, rightChild)
+
+    val output = expr.dataType.asInstanceOf[StructType].toAttributes
+    val plan = FlatMapCoGroupsInPandas(leftAttributes, rightAttributes, expr, output, left, right)
+    Dataset.ofRows(df.sparkSession, plan)
+  }
+
   override def toString: String = {
     val builder = new StringBuilder
     builder.append("RelationalGroupedDataset: [grouping expressions: [")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
index 0f5aab7f47d0d..e1b44b5918143 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -18,8 +18,10 @@
 package org.apache.spark.sql
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.{DeprecatedConfig, RemovedConfig}
 
 /**
  * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`.
@@ -29,7 +31,7 @@ import org.apache.spark.sql.internal.SQLConf
  * @since 2.0.0
  */
 @Stable
-class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
+class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) extends Logging {
 
   /**
    * Sets the given Spark runtime configuration property.
@@ -38,6 +40,8 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
    */
   def set(key: String, value: String): Unit = {
     requireNonStaticConf(key)
+    requireDefaultValueOfRemovedConf(key, value)
+    logDeprecationWarning(key)
     sqlConf.setConfString(key, value)
   }
 
@@ -47,7 +51,6 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
    * @since 2.0.0
    */
   def set(key: String, value: Boolean): Unit = {
-    requireNonStaticConf(key)
     set(key, value.toString)
   }
 
@@ -57,7 +60,6 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
    * @since 2.0.0
    */
   def set(key: String, value: Long): Unit = {
-    requireNonStaticConf(key)
     set(key, value.toString)
   }
 
@@ -128,6 +130,7 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
    */
   def unset(key: String): Unit = {
     requireNonStaticConf(key)
+    logDeprecationWarning(key)
     sqlConf.unsetConf(key)
   }
 
@@ -158,4 +161,26 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
       throw new AnalysisException(s"Cannot modify the value of a Spark config: $key")
     }
   }
+
+  private def requireDefaultValueOfRemovedConf(key: String, value: String): Unit = {
+    SQLConf.removedSQLConfigs.get(key).foreach {
+      case RemovedConfig(configName, version, defaultValue, comment) =>
+        if (value != defaultValue) {
+          throw new AnalysisException(
+            s"The SQL config '$configName' was removed in the version $version. $comment")
+        }
+    }
+  }
+
+  /**
+   * Logs a warning message if the given config key is deprecated.
+   */
+  private def logDeprecationWarning(key: String): Unit = {
+    SQLConf.deprecatedSQLConfigs.get(key).foreach {
+      case DeprecatedConfig(configName, version, comment) =>
+        logWarning(
+          s"The SQL config '$configName' has been deprecated in Spark v$version " +
+          s"and may be removed in the future. $comment")
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 45d0bd4122535..2054874e5e07b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -479,97 +479,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   def readStream: DataStreamReader = sparkSession.readStream
 
 
-  /**
-   * Creates an external table from the given path and returns the corresponding DataFrame.
-   * It will use the default data source configured by spark.sql.sources.default.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String): DataFrame = {
-    sparkSession.catalog.createTable(tableName, path)
-  }
-
-  /**
-   * Creates an external table from the given path based on a data source
-   * and returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      path: String,
-      source: String): DataFrame = {
-    sparkSession.catalog.createTable(tableName, path, source)
-  }
-
-  /**
-   * Creates an external table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, options)
-  }
-
-  /**
-   * (Scala-specific)
-   * Creates an external table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, options)
-  }
-
-  /**
-   * Create an external table from the given path based on a data source, a schema and
-   * a set of options. Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, schema, options)
-  }
-
-  /**
-   * (Scala-specific)
-   * Create an external table from the given path based on a data source, a schema and
-   * a set of options. Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, schema, options)
-  }
-
   /**
    * Registers the given `DataFrame` as a temporary table in the catalog. Temporary tables exist
    * only during the lifetime of this instance of SQLContext.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index bd2bc1c0ad5d7..1fb97fb4b4cf1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -37,8 +37,10 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
+import org.apache.spark.sql.connector.ExternalCommandRunner
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.command.ExternalCommandExecutor
+import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation}
 import org.apache.spark.sql.internal._
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.sources.BaseRelation
@@ -274,9 +276,7 @@ class SparkSession private(
    * @since 2.0.0
    */
   @transient
-  lazy val emptyDataFrame: DataFrame = {
-    createDataFrame(sparkContext.emptyRDD[Row].setName("empty"), StructType(Nil))
-  }
+  lazy val emptyDataFrame: DataFrame = Dataset.ofRows(self, LocalRelation())
 
   /**
    * Creates a new [[Dataset]] of type T containing zero elements.
@@ -293,8 +293,7 @@ class SparkSession private(
    *
    * @since 2.0.0
    */
-  def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
-    SparkSession.setActiveSession(this)
+  def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = withActive {
     val encoder = Encoders.product[A]
     Dataset.ofRows(self, ExternalRDD(rdd, self)(encoder))
   }
@@ -304,8 +303,7 @@ class SparkSession private(
    *
    * @since 2.0.0
    */
-  def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = {
-    SparkSession.setActiveSession(this)
+  def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = withActive {
     val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
     val attributeSeq = schema.toAttributes
     Dataset.ofRows(self, LocalRelation.fromProduct(attributeSeq, data))
@@ -343,7 +341,7 @@ class SparkSession private(
    * @since 2.0.0
    */
   @DeveloperApi
-  def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = {
+  def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = withActive {
     // TODO: use MutableProjection when rowRDD is another DataFrame and the applied
     // schema differs from the existing schema on any field data type.
     val encoder = RowEncoder(schema)
@@ -373,7 +371,7 @@ class SparkSession private(
    * @since 2.0.0
    */
   @DeveloperApi
-  def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = {
+  def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = withActive {
     Dataset.ofRows(self, LocalRelation.fromExternalRows(schema.toAttributes, rows.asScala))
   }
 
@@ -385,7 +383,7 @@ class SparkSession private(
    *
    * @since 2.0.0
    */
-  def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
+  def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = withActive {
     val attributeSeq: Seq[AttributeReference] = getSchema(beanClass)
     val className = beanClass.getName
     val rowRdd = rdd.mapPartitions { iter =>
@@ -414,7 +412,7 @@ class SparkSession private(
    *          SELECT * queries will return the columns in an undefined order.
    * @since 1.6.0
    */
-  def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = {
+  def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = withActive {
     val attrSeq = getSchema(beanClass)
     val rows = SQLContext.beansToRows(data.asScala.iterator, beanClass, attrSeq)
     Dataset.ofRows(self, LocalRelation(attrSeq, rows.toSeq))
@@ -599,7 +597,7 @@ class SparkSession private(
    *
    * @since 2.0.0
    */
-  def sql(sqlText: String): DataFrame = {
+  def sql(sqlText: String): DataFrame = withActive {
     val tracker = new QueryPlanningTracker
     val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) {
       sessionState.sqlParser.parsePlan(sqlText)
@@ -607,6 +605,33 @@ class SparkSession private(
     Dataset.ofRows(self, plan, tracker)
   }
 
+  /**
+   * Execute an arbitrary string command inside an external execution engine rather than Spark.
+   * This could be useful when user wants to execute some commands out of Spark. For
+   * example, executing custom DDL/DML command for JDBC, creating index for ElasticSearch,
+   * creating cores for Solr and so on.
+   *
+   * The command will be eagerly executed after this method is called and the returned
+   * DataFrame will contain the output of the command(if any).
+   *
+   * @param runner The class name of the runner that implements `ExternalCommandRunner`.
+   * @param command The target command to be executed
+   * @param options The options for the runner.
+   *
+   * @since 3.0.0
+   */
+  @Unstable
+  def executeCommand(runner: String, command: String, options: Map[String, String]): DataFrame = {
+    DataSource.lookupDataSource(runner, sessionState.conf) match {
+      case source if classOf[ExternalCommandRunner].isAssignableFrom(source) =>
+        Dataset.ofRows(self, ExternalCommandExecutor(
+          source.newInstance().asInstanceOf[ExternalCommandRunner], command, options))
+
+      case _ =>
+        throw new AnalysisException(s"Command execution is not supported in runner $runner")
+    }
+  }
+
   /**
    * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
    * `DataFrame`.
@@ -724,6 +749,20 @@ class SparkSession private(
     }
   }
 
+  /**
+   * Execute a block of code with the this session set as the active session, and restore the
+   * previous session on completion.
+   */
+  private[sql] def withActive[T](block: => T): T = {
+    // Use the active session thread local directly to make sure we get the session that is actually
+    // set and not the default session. This to prevent that we promote the default session to the
+    // active session once we are done.
+    val old = SparkSession.activeThreadSession.get()
+    SparkSession.setActiveSession(this)
+    try block finally {
+      SparkSession.setActiveSession(old)
+    }
+  }
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index bb05c76cfee6d..0f08e10c00d22 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -28,10 +28,11 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.api.java._
 import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
-import org.apache.spark.sql.execution.aggregate.ScalaUDAF
+import org.apache.spark.sql.execution.aggregate.{ScalaAggregator, ScalaUDAF}
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
-import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedFunction}
+import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction}
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.util.Utils
 
@@ -72,7 +73,11 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @return the registered UDAF.
    *
    * @since 1.5.0
+   * @deprecated this method and the use of UserDefinedAggregateFunction are deprecated.
+   * Aggregator[IN, BUF, OUT] should now be registered as a UDF via the functions.udaf(agg) method.
    */
+  @deprecated("Aggregator[IN, BUF, OUT] should now be registered as a UDF" +
+    " via the functions.udaf(agg) method.", "3.0.0")
   def register(name: String, udaf: UserDefinedAggregateFunction): UserDefinedAggregateFunction = {
     def builder(children: Seq[Expression]) = ScalaUDAF(children, udaf)
     functionRegistry.createOrReplaceTempFunction(name, builder)
@@ -101,9 +106,16 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 2.2.0
    */
   def register(name: String, udf: UserDefinedFunction): UserDefinedFunction = {
-    def builder(children: Seq[Expression]) = udf.apply(children.map(Column.apply) : _*).expr
-    functionRegistry.createOrReplaceTempFunction(name, builder)
-    udf
+    udf match {
+      case udaf: UserDefinedAggregator[_, _, _] =>
+        def builder(children: Seq[Expression]) = udaf.scalaAggregator(children)
+        functionRegistry.createOrReplaceTempFunction(name, builder)
+        udf
+      case _ =>
+        def builder(children: Seq[Expression]) = udf.apply(children.map(Column.apply) : _*).expr
+        functionRegistry.createOrReplaceTempFunction(name, builder)
+        udf
+    }
   }
 
   // scalastyle:off line.size.limit
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
index 482e2bfeb7098..bf3055d5e3e09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@@ -23,11 +23,13 @@ import java.nio.channels.Channels
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.python.PythonRDDServer
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.{DataFrame, Dataset, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
 import org.apache.spark.sql.execution.arrow.ArrowConverters
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
 
 private[sql] object PythonSQLUtils {
@@ -38,6 +40,12 @@ private[sql] object PythonSQLUtils {
     FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
   }
 
+  def listSQLConfigs(): Array[(String, String, String)] = {
+    val conf = new SQLConf()
+    // Py4J doesn't seem to translate Seq well, so we convert to an Array.
+    conf.getAllDefinedConfs.toArray
+  }
+
   /**
    * Python callable function to read a file in Arrow stream format and create a [[RDD]]
    * using each serialized ArrowRecordBatch as a partition.
@@ -56,6 +64,10 @@ private[sql] object PythonSQLUtils {
       sqlContext: SQLContext): DataFrame = {
     ArrowConverters.toDataFrame(arrowBatchRDD, schemaString, sqlContext)
   }
+
+  def explainString(queryExecution: QueryExecution, mode: String): String = {
+    queryExecution.explainString(ExplainMode.fromString(mode))
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 60738e6d4ef9e..318cc629e7a34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalog
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.{Evolving, Experimental, Stable}
+import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.storage.StorageLevel
@@ -208,20 +208,6 @@ abstract class Catalog {
    */
   def functionExists(dbName: String, functionName: String): Boolean
 
-  /**
-   * Creates a table from the given path and returns the corresponding DataFrame.
-   * It will use the default data source configured by spark.sql.sources.default.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String): DataFrame = {
-    createTable(tableName, path)
-  }
-
   /**
    * Creates a table from the given path and returns the corresponding DataFrame.
    * It will use the default data source configured by spark.sql.sources.default.
@@ -233,20 +219,6 @@ abstract class Catalog {
    */
   def createTable(tableName: String, path: String): DataFrame
 
-  /**
-   * Creates a table from the given path based on a data source and returns the corresponding
-   * DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String, source: String): DataFrame = {
-    createTable(tableName, path, source)
-  }
-
   /**
    * Creates a table from the given path based on a data source and returns the corresponding
    * DataFrame.
@@ -258,23 +230,6 @@ abstract class Catalog {
    */
   def createTable(tableName: String, path: String, source: String): DataFrame
 
-  /**
-   * Creates a table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
-
   /**
    * Creates a table based on the dataset in a data source and a set of options.
    * Then, returns the corresponding DataFrame.
@@ -291,24 +246,6 @@ abstract class Catalog {
     createTable(tableName, source, options.asScala.toMap)
   }
 
-  /**
-   * (Scala-specific)
-   * Creates a table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
-
   /**
    * (Scala-specific)
    * Creates a table based on the dataset in a data source and a set of options.
@@ -324,24 +261,6 @@ abstract class Catalog {
       source: String,
       options: Map[String, String]): DataFrame
 
-  /**
-   * Create a table from the given path based on a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
-
   /**
    * Create a table based on the dataset in a data source, a schema and a set of options.
    * Then, returns the corresponding DataFrame.
@@ -359,25 +278,6 @@ abstract class Catalog {
     createTable(tableName, source, schema, options.asScala.toMap)
   }
 
-  /**
-   * (Scala-specific)
-   * Create a table from the given path based on a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
-
   /**
    * (Scala-specific)
    * Create a table based on the dataset in a data source, a schema and a set of options.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
new file mode 100644
index 0000000000000..adeb2164eff63
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -0,0 +1,654 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.execution.command._
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, RefreshTable}
+import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType}
+
+/**
+ * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
+ * to the corresponding v1 or v2 commands if the resolved catalog is the session catalog.
+ *
+ * We can remove this rule once we implement all the catalog functionality in `V2SessionCatalog`.
+ */
+class ResolveSessionCatalog(
+    val catalogManager: CatalogManager,
+    conf: SQLConf,
+    isView: Seq[String] => Boolean)
+  extends Rule[LogicalPlan] with LookupCatalog {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Util._
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+    case AlterTableAddColumnsStatement(
+         nameParts @ SessionCatalogAndTable(catalog, tbl), cols) =>
+      loadTable(catalog, tbl.asIdentifier).collect {
+        case v1Table: V1Table =>
+          cols.foreach { c =>
+            assertTopLevelColumn(c.name, "AlterTableAddColumnsCommand")
+            if (!c.nullable) {
+              throw new AnalysisException(
+                "ADD COLUMN with v1 tables cannot specify NOT NULL.")
+            }
+          }
+          AlterTableAddColumnsCommand(tbl.asTableIdentifier, cols.map(convertToStructField))
+      }.getOrElse {
+        val changes = cols.map { col =>
+          TableChange.addColumn(
+            col.name.toArray,
+            col.dataType,
+            col.nullable,
+            col.comment.orNull,
+            col.position.orNull)
+        }
+        createAlterTable(nameParts, catalog, tbl, changes)
+      }
+
+    case AlterTableReplaceColumnsStatement(
+        nameParts @ SessionCatalogAndTable(catalog, tbl), cols) =>
+      val changes: Seq[TableChange] = loadTable(catalog, tbl.asIdentifier) match {
+        case Some(_: V1Table) =>
+          throw new AnalysisException("REPLACE COLUMNS is only supported with v2 tables.")
+        case Some(table) =>
+          // REPLACE COLUMNS deletes all the existing columns and adds new columns specified.
+          val deleteChanges = table.schema.fieldNames.map { name =>
+            TableChange.deleteColumn(Array(name))
+          }
+          val addChanges = cols.map { col =>
+            TableChange.addColumn(
+              col.name.toArray,
+              col.dataType,
+              col.nullable,
+              col.comment.orNull,
+              col.position.orNull)
+          }
+          deleteChanges ++ addChanges
+        case None => Seq() // Unresolved table will be handled in CheckAnalysis.
+      }
+      createAlterTable(nameParts, catalog, tbl, changes)
+
+    case a @ AlterTableAlterColumnStatement(
+         nameParts @ SessionCatalogAndTable(catalog, tbl), _, _, _, _, _) =>
+      loadTable(catalog, tbl.asIdentifier).collect {
+        case v1Table: V1Table =>
+          if (a.column.length > 1) {
+            throw new AnalysisException(
+              "ALTER COLUMN with qualified column is only supported with v2 tables.")
+          }
+          if (a.nullable.isDefined) {
+            throw new AnalysisException(
+              "ALTER COLUMN with v1 tables cannot specify NOT NULL.")
+          }
+          if (a.position.isDefined) {
+            throw new AnalysisException("" +
+              "ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.")
+          }
+          val builder = new MetadataBuilder
+          // Add comment to metadata
+          a.comment.map(c => builder.putString("comment", c))
+          val colName = a.column(0)
+          val dataType = a.dataType.getOrElse {
+            v1Table.schema.findNestedField(Seq(colName), resolver = conf.resolver)
+              .map(_._2.dataType)
+              .getOrElse {
+                throw new AnalysisException(
+                  s"ALTER COLUMN cannot find column ${quote(colName)} in v1 table. " +
+                    s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
+              }
+          }
+          // Add Hive type string to metadata.
+          val cleanedDataType = HiveStringType.replaceCharType(dataType)
+          if (dataType != cleanedDataType) {
+            builder.putString(HIVE_TYPE_STRING, dataType.catalogString)
+          }
+          val newColumn = StructField(
+            colName,
+            cleanedDataType,
+            nullable = true,
+            builder.build())
+          AlterTableChangeColumnCommand(tbl.asTableIdentifier, colName, newColumn)
+      }.getOrElse {
+        val colName = a.column.toArray
+        val typeChange = a.dataType.map { newDataType =>
+          TableChange.updateColumnType(colName, newDataType)
+        }
+        val nullabilityChange = a.nullable.map { nullable =>
+          TableChange.updateColumnNullability(colName, nullable)
+        }
+        val commentChange = a.comment.map { newComment =>
+          TableChange.updateColumnComment(colName, newComment)
+        }
+        val positionChange = a.position.map { newPosition =>
+          TableChange.updateColumnPosition(colName, newPosition)
+        }
+        createAlterTable(
+          nameParts,
+          catalog,
+          tbl,
+          typeChange.toSeq ++ nullabilityChange ++ commentChange ++ positionChange)
+      }
+
+    case AlterTableRenameColumnStatement(
+         nameParts @ SessionCatalogAndTable(catalog, tbl), col, newName) =>
+      loadTable(catalog, tbl.asIdentifier).collect {
+        case v1Table: V1Table =>
+          throw new AnalysisException("RENAME COLUMN is only supported with v2 tables.")
+      }.getOrElse {
+        val changes = Seq(TableChange.renameColumn(col.toArray, newName))
+        createAlterTable(nameParts, catalog, tbl, changes)
+      }
+
+    case AlterTableDropColumnsStatement(
+         nameParts @ SessionCatalogAndTable(catalog, tbl), cols) =>
+      loadTable(catalog, tbl.asIdentifier).collect {
+        case v1Table: V1Table =>
+          throw new AnalysisException("DROP COLUMN is only supported with v2 tables.")
+      }.getOrElse {
+        val changes = cols.map(col => TableChange.deleteColumn(col.toArray))
+        createAlterTable(nameParts, catalog, tbl, changes)
+      }
+
+    case AlterTableSetPropertiesStatement(
+         nameParts @ SessionCatalogAndTable(catalog, tbl), props) =>
+      loadTable(catalog, tbl.asIdentifier).collect {
+        case v1Table: V1Table =>
+          AlterTableSetPropertiesCommand(tbl.asTableIdentifier, props, isView = false)
+      }.getOrElse {
+        val changes = props.map { case (key, value) =>
+          TableChange.setProperty(key, value)
+        }.toSeq
+        createAlterTable(nameParts, catalog, tbl, changes)
+      }
+
+    case AlterTableUnsetPropertiesStatement(
+         nameParts @ SessionCatalogAndTable(catalog, tbl), keys, ifExists) =>
+      loadTable(catalog, tbl.asIdentifier).collect {
+        case v1Table: V1Table =>
+          AlterTableUnsetPropertiesCommand(
+            tbl.asTableIdentifier, keys, ifExists, isView = false)
+      }.getOrElse {
+        val changes = keys.map(key => TableChange.removeProperty(key))
+        createAlterTable(nameParts, catalog, tbl, changes)
+      }
+
+    case AlterTableSetLocationStatement(
+         nameParts @ SessionCatalogAndTable(catalog, tbl), partitionSpec, newLoc) =>
+      loadTable(catalog, tbl.asIdentifier).collect {
+        case v1Table: V1Table =>
+          AlterTableSetLocationCommand(tbl.asTableIdentifier, partitionSpec, newLoc)
+      }.getOrElse {
+        if (partitionSpec.nonEmpty) {
+          throw new AnalysisException(
+            "ALTER TABLE SET LOCATION does not support partition for v2 tables.")
+        }
+        val changes = Seq(TableChange.setProperty(TableCatalog.PROP_LOCATION, newLoc))
+        createAlterTable(nameParts, catalog, tbl, changes)
+      }
+
+    // ALTER VIEW should always use v1 command if the resolved catalog is session catalog.
+    case AlterViewSetPropertiesStatement(SessionCatalogAndTable(_, tbl), props) =>
+      AlterTableSetPropertiesCommand(tbl.asTableIdentifier, props, isView = true)
+
+    case AlterViewUnsetPropertiesStatement(SessionCatalogAndTable(_, tbl), keys, ifExists) =>
+      AlterTableUnsetPropertiesCommand(tbl.asTableIdentifier, keys, ifExists, isView = true)
+
+    case d @ DescribeNamespace(SessionCatalogAndNamespace(_, ns), _) =>
+      if (ns.length != 1) {
+        throw new AnalysisException(
+          s"The database name is not valid: ${ns.quoted}")
+      }
+      DescribeDatabaseCommand(ns.head, d.extended)
+
+    case AlterNamespaceSetProperties(SessionCatalogAndNamespace(_, ns), properties) =>
+      if (ns.length != 1) {
+        throw new AnalysisException(
+          s"The database name is not valid: ${ns.quoted}")
+      }
+      AlterDatabasePropertiesCommand(ns.head, properties)
+
+    case AlterNamespaceSetLocation(SessionCatalogAndNamespace(_, ns), location) =>
+      if (ns.length != 1) {
+        throw new AnalysisException(
+          s"The database name is not valid: ${ns.quoted}")
+      }
+      AlterDatabaseSetLocationCommand(ns.head, location)
+
+    case RenameTableStatement(SessionCatalogAndTable(_, oldName), newNameParts, isView) =>
+      AlterTableRenameCommand(oldName.asTableIdentifier, newNameParts.asTableIdentifier, isView)
+
+    case DescribeRelation(ResolvedTable(_, ident, _: V1Table), partitionSpec, isExtended) =>
+      DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended)
+
+    // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet.
+    case DescribeRelation(ResolvedView(ident), partitionSpec, isExtended) =>
+      DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended)
+
+    case DescribeColumnStatement(
+        SessionCatalogAndTable(catalog, tbl), colNameParts, isExtended) =>
+      loadTable(catalog, tbl.asIdentifier).collect {
+        case v1Table: V1Table =>
+          DescribeColumnCommand(tbl.asTableIdentifier, colNameParts, isExtended)
+      }.getOrElse {
+        if (isView(tbl)) {
+          DescribeColumnCommand(tbl.asTableIdentifier, colNameParts, isExtended)
+        } else {
+          throw new AnalysisException("Describing columns is not supported for v2 tables.")
+        }
+      }
+
+    // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the
+    // session catalog and the table provider is not v2.
+    case c @ CreateTableStatement(
+         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+      if (!isV2Provider(c.provider)) {
+        val tableDesc = buildCatalogTable(tbl.asTableIdentifier, c.tableSchema,
+          c.partitioning, c.bucketSpec, c.properties, c.provider, c.options, c.location,
+          c.comment, c.ifNotExists)
+        val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
+        CreateTable(tableDesc, mode, None)
+      } else {
+        CreateV2Table(
+          catalog.asTableCatalog,
+          tbl.asIdentifier,
+          c.tableSchema,
+          // convert the bucket spec and add it as a transform
+          c.partitioning ++ c.bucketSpec.map(_.asTransform),
+          convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+          ignoreIfExists = c.ifNotExists)
+      }
+
+    case c @ CreateTableAsSelectStatement(
+         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+      if (!isV2Provider(c.provider)) {
+        val tableDesc = buildCatalogTable(tbl.asTableIdentifier, new StructType,
+          c.partitioning, c.bucketSpec, c.properties, c.provider, c.options, c.location,
+          c.comment, c.ifNotExists)
+        val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
+        CreateTable(tableDesc, mode, Some(c.asSelect))
+      } else {
+        CreateTableAsSelect(
+          catalog.asTableCatalog,
+          tbl.asIdentifier,
+          // convert the bucket spec and add it as a transform
+          c.partitioning ++ c.bucketSpec.map(_.asTransform),
+          c.asSelect,
+          convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+          writeOptions = c.options,
+          ignoreIfExists = c.ifNotExists)
+      }
+
+    case RefreshTableStatement(SessionCatalogAndTable(_, tbl)) =>
+      RefreshTable(tbl.asTableIdentifier)
+
+    // For REPLACE TABLE [AS SELECT], we should fail if the catalog is resolved to the
+    // session catalog and the table provider is not v2.
+    case c @ ReplaceTableStatement(
+         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+      if (!isV2Provider(c.provider)) {
+        throw new AnalysisException("REPLACE TABLE is only supported with v2 tables.")
+      } else {
+        ReplaceTable(
+          catalog.asTableCatalog,
+          tbl.asIdentifier,
+          c.tableSchema,
+          // convert the bucket spec and add it as a transform
+          c.partitioning ++ c.bucketSpec.map(_.asTransform),
+          convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+          orCreate = c.orCreate)
+      }
+
+    case c @ ReplaceTableAsSelectStatement(
+         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+      if (!isV2Provider(c.provider)) {
+        throw new AnalysisException("REPLACE TABLE AS SELECT is only supported with v2 tables.")
+      } else {
+        ReplaceTableAsSelect(
+          catalog.asTableCatalog,
+          tbl.asIdentifier,
+          // convert the bucket spec and add it as a transform
+          c.partitioning ++ c.bucketSpec.map(_.asTransform),
+          c.asSelect,
+          convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+          writeOptions = c.options,
+          orCreate = c.orCreate)
+      }
+
+    case DropTableStatement(SessionCatalogAndTable(catalog, tbl), ifExists, purge) =>
+      DropTableCommand(tbl.asTableIdentifier, ifExists, isView = false, purge = purge)
+
+    case DropViewStatement(SessionCatalogAndTable(catalog, viewName), ifExists) =>
+      DropTableCommand(viewName.asTableIdentifier, ifExists, isView = true, purge = false)
+
+    case c @ CreateNamespaceStatement(CatalogAndNamespace(catalog, ns), _, _)
+        if isSessionCatalog(catalog) =>
+      if (ns.length != 1) {
+        throw new AnalysisException(
+          s"The database name is not valid: ${ns.quoted}")
+      }
+
+      val comment = c.properties.get(SupportsNamespaces.PROP_COMMENT)
+      val location = c.properties.get(SupportsNamespaces.PROP_LOCATION)
+      val newProperties = c.properties -- CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES
+      CreateDatabaseCommand(ns.head, c.ifNotExists, location, comment, newProperties)
+
+    case d @ DropNamespace(SessionCatalogAndNamespace(_, ns), _, _) =>
+      if (ns.length != 1) {
+        throw new AnalysisException(
+          s"The database name is not valid: ${ns.quoted}")
+      }
+      DropDatabaseCommand(ns.head, d.ifExists, d.cascade)
+
+    case ShowTables(SessionCatalogAndNamespace(_, ns), pattern) =>
+      assert(ns.nonEmpty)
+      if (ns.length != 1) {
+          throw new AnalysisException(
+            s"The database name is not valid: ${ns.quoted}")
+      }
+      ShowTablesCommand(Some(ns.head), pattern)
+
+    case ShowTableStatement(ns, pattern, partitionsSpec) =>
+      val db = ns match {
+        case Some(ns) if ns.length != 1 =>
+          throw new AnalysisException(
+            s"The database name is not valid: ${ns.quoted}")
+        case _ => ns.map(_.head)
+      }
+      ShowTablesCommand(db, Some(pattern), true, partitionsSpec)
+
+    case AnalyzeTableStatement(tbl, partitionSpec, noScan) =>
+      val v1TableName = parseV1Table(tbl, "ANALYZE TABLE")
+      if (partitionSpec.isEmpty) {
+        AnalyzeTableCommand(v1TableName.asTableIdentifier, noScan)
+      } else {
+        AnalyzePartitionCommand(v1TableName.asTableIdentifier, partitionSpec, noScan)
+      }
+
+    case AnalyzeColumnStatement(tbl, columnNames, allColumns) =>
+      val v1TableName = parseV1Table(tbl, "ANALYZE TABLE")
+      AnalyzeColumnCommand(v1TableName.asTableIdentifier, columnNames, allColumns)
+
+    case RepairTableStatement(tbl) =>
+      val v1TableName = parseV1Table(tbl, "MSCK REPAIR TABLE")
+      AlterTableRecoverPartitionsCommand(
+        v1TableName.asTableIdentifier,
+        "MSCK REPAIR TABLE")
+
+    case LoadDataStatement(tbl, path, isLocal, isOverwrite, partition) =>
+      val v1TableName = parseV1Table(tbl, "LOAD DATA")
+      LoadDataCommand(
+        v1TableName.asTableIdentifier,
+        path,
+        isLocal,
+        isOverwrite,
+        partition)
+
+    case ShowCreateTableStatement(tbl, asSerde) if !asSerde =>
+      val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE")
+      ShowCreateTableCommand(v1TableName.asTableIdentifier)
+
+    case ShowCreateTableStatement(tbl, asSerde) if asSerde =>
+      val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE AS SERDE")
+      ShowCreateTableAsSerdeCommand(v1TableName.asTableIdentifier)
+
+    case CacheTableStatement(tbl, plan, isLazy, options) =>
+      val v1TableName = parseV1Table(tbl, "CACHE TABLE")
+      CacheTableCommand(v1TableName.asTableIdentifier, plan, isLazy, options)
+
+    case UncacheTableStatement(tbl, ifExists) =>
+      val v1TableName = parseV1Table(tbl, "UNCACHE TABLE")
+      UncacheTableCommand(v1TableName.asTableIdentifier, ifExists)
+
+    case TruncateTableStatement(tbl, partitionSpec) =>
+      val v1TableName = parseV1Table(tbl, "TRUNCATE TABLE")
+      TruncateTableCommand(
+        v1TableName.asTableIdentifier,
+        partitionSpec)
+
+    case ShowPartitionsStatement(tbl, partitionSpec) =>
+      val v1TableName = parseV1Table(tbl, "SHOW PARTITIONS")
+      ShowPartitionsCommand(
+        v1TableName.asTableIdentifier,
+        partitionSpec)
+
+    case ShowColumnsStatement(tbl, ns) =>
+      val sql = "SHOW COLUMNS"
+      val v1TableName = parseV1Table(tbl, sql).asTableIdentifier
+      val resolver = conf.resolver
+      val db = ns match {
+        case Some(db) if (v1TableName.database.exists(!resolver(_, db.head))) =>
+          throw new AnalysisException(
+            s"SHOW COLUMNS with conflicting databases: " +
+              s"'${db.head}' != '${v1TableName.database.get}'")
+        case _ => ns.map(_.head)
+      }
+      if (ns.isDefined && ns.get.length > 1) {
+        throw new AnalysisException(
+          s"Namespace name should have only one part if specified: ${ns.get.quoted}")
+      }
+      if (tbl.length > 2) {
+        throw new AnalysisException(
+          s"Table name should have at most two parts: ${tbl.quoted}")
+      }
+      ShowColumnsCommand(db, v1TableName)
+
+    case AlterTableRecoverPartitionsStatement(tbl) =>
+      val v1TableName = parseV1Table(tbl, "ALTER TABLE RECOVER PARTITIONS")
+      AlterTableRecoverPartitionsCommand(
+        v1TableName.asTableIdentifier,
+        "ALTER TABLE RECOVER PARTITIONS")
+
+    case AlterTableAddPartitionStatement(tbl, partitionSpecsAndLocs, ifNotExists) =>
+      val v1TableName = parseV1Table(tbl, "ALTER TABLE ADD PARTITION")
+      AlterTableAddPartitionCommand(
+        v1TableName.asTableIdentifier,
+        partitionSpecsAndLocs,
+        ifNotExists)
+
+    case AlterTableRenamePartitionStatement(tbl, from, to) =>
+      val v1TableName = parseV1Table(tbl, "ALTER TABLE RENAME PARTITION")
+      AlterTableRenamePartitionCommand(
+        v1TableName.asTableIdentifier,
+        from,
+        to)
+
+    case AlterTableDropPartitionStatement(tbl, specs, ifExists, purge, retainData) =>
+      val v1TableName = parseV1Table(tbl, "ALTER TABLE DROP PARTITION")
+      AlterTableDropPartitionCommand(
+        v1TableName.asTableIdentifier,
+        specs,
+        ifExists,
+        purge,
+        retainData)
+
+    case AlterTableSerDePropertiesStatement(tbl, serdeClassName, serdeProperties, partitionSpec) =>
+      val v1TableName = parseV1Table(tbl, "ALTER TABLE SerDe Properties")
+      AlterTableSerDePropertiesCommand(
+        v1TableName.asTableIdentifier,
+        serdeClassName,
+        serdeProperties,
+        partitionSpec)
+
+    case AlterViewAsStatement(tbl, originalText, query) =>
+      val v1TableName = parseV1Table(tbl, "ALTER VIEW QUERY")
+      AlterViewAsCommand(
+        v1TableName.asTableIdentifier,
+        originalText,
+        query)
+
+    case CreateViewStatement(
+      tbl, userSpecifiedColumns, comment, properties,
+      originalText, child, allowExisting, replace, viewType) =>
+
+      val v1TableName = parseV1Table(tbl, "CREATE VIEW")
+      CreateViewCommand(
+        v1TableName.asTableIdentifier,
+        userSpecifiedColumns,
+        comment,
+        properties,
+        originalText,
+        child,
+        allowExisting,
+        replace,
+        viewType)
+
+    case ShowTableProperties(r: ResolvedTable, propertyKey) if isSessionCatalog(r.catalog) =>
+      ShowTablePropertiesCommand(r.identifier.asTableIdentifier, propertyKey)
+
+    case DescribeFunctionStatement(CatalogAndIdentifier(catalog, ident), extended) =>
+      val functionIdent =
+        parseSessionCatalogFunctionIdentifier("DESCRIBE FUNCTION", catalog, ident)
+      DescribeFunctionCommand(functionIdent, extended)
+
+    case ShowFunctionsStatement(userScope, systemScope, pattern, fun) =>
+      val (database, function) = fun match {
+        case Some(CatalogAndIdentifier(catalog, ident)) =>
+          val FunctionIdentifier(fn, db) =
+            parseSessionCatalogFunctionIdentifier("SHOW FUNCTIONS", catalog, ident)
+          (db, Some(fn))
+        case None => (None, pattern)
+      }
+      ShowFunctionsCommand(database, function, userScope, systemScope)
+
+    case DropFunctionStatement(CatalogAndIdentifier(catalog, ident), ifExists, isTemp) =>
+      val FunctionIdentifier(function, database) =
+        parseSessionCatalogFunctionIdentifier("DROP FUNCTION", catalog, ident)
+      DropFunctionCommand(database, function, ifExists, isTemp)
+
+    case CreateFunctionStatement(CatalogAndIdentifier(catalog, ident),
+      className, resources, isTemp, ignoreIfExists, replace) =>
+      val FunctionIdentifier(function, database) =
+        parseSessionCatalogFunctionIdentifier("CREATE FUNCTION", catalog, ident)
+      CreateFunctionCommand(database, function, className, resources, isTemp, ignoreIfExists,
+        replace)
+  }
+
+  private def parseSessionCatalogFunctionIdentifier(
+      sql: String,
+      catalog: CatalogPlugin,
+      functionIdent: Identifier): FunctionIdentifier = {
+    if (isSessionCatalog(catalog)) {
+      functionIdent.asMultipartIdentifier match {
+        case Seq(db, fn) => FunctionIdentifier(fn, Some(db))
+        case Seq(fn) => FunctionIdentifier(fn, None)
+        case _ =>
+          throw new AnalysisException(s"Unsupported function name '${functionIdent.quoted}'")
+      }
+    } else {
+      throw new AnalysisException(s"$sql is only supported in v1 catalog")
+    }
+  }
+
+  private def parseV1Table(tableName: Seq[String], sql: String): Seq[String] = {
+    val CatalogAndIdentifier(catalog, ident) = tableName
+    if (!isSessionCatalog(catalog)) {
+      throw new AnalysisException(s"$sql is only supported with v1 tables.")
+    }
+    ident.asMultipartIdentifier
+  }
+
+  private def buildCatalogTable(
+      table: TableIdentifier,
+      schema: StructType,
+      partitioning: Seq[Transform],
+      bucketSpec: Option[BucketSpec],
+      properties: Map[String, String],
+      provider: String,
+      options: Map[String, String],
+      location: Option[String],
+      comment: Option[String],
+      ifNotExists: Boolean): CatalogTable = {
+    val storage = CatalogStorageFormat.empty.copy(
+      locationUri = location.map(CatalogUtils.stringToURI),
+      properties = options)
+
+    val tableType = if (location.isDefined) {
+      CatalogTableType.EXTERNAL
+    } else {
+      CatalogTableType.MANAGED
+    }
+
+    CatalogTable(
+      identifier = table,
+      tableType = tableType,
+      storage = storage,
+      schema = schema,
+      provider = Some(provider),
+      partitionColumnNames = partitioning.asPartitionColumns,
+      bucketSpec = bucketSpec,
+      properties = properties,
+      comment = comment)
+  }
+
+  object SessionCatalogAndTable {
+    def unapply(nameParts: Seq[String]): Option[(CatalogPlugin, Seq[String])] = nameParts match {
+      case SessionCatalogAndIdentifier(catalog, ident) =>
+        Some(catalog -> ident.asMultipartIdentifier)
+      case _ => None
+    }
+  }
+
+  object SessionCatalogAndNamespace {
+    def unapply(resolved: ResolvedNamespace): Option[(CatalogPlugin, Seq[String])] =
+      if (isSessionCatalog(resolved.catalog)) {
+        Some(resolved.catalog -> resolved.namespace)
+      } else {
+        None
+      }
+  }
+
+  private def assertTopLevelColumn(colName: Seq[String], command: String): Unit = {
+    if (colName.length > 1) {
+      throw new AnalysisException(s"$command does not support nested column: ${colName.quoted}")
+    }
+  }
+
+  private def convertToStructField(col: QualifiedColType): StructField = {
+    val builder = new MetadataBuilder
+    col.comment.foreach(builder.putString("comment", _))
+
+    val cleanedDataType = HiveStringType.replaceCharType(col.dataType)
+    if (col.dataType != cleanedDataType) {
+      builder.putString(HIVE_TYPE_STRING, col.dataType.catalogString)
+    }
+
+    StructField(
+      col.name.head,
+      cleanedDataType,
+      nullable = true,
+      builder.build())
+  }
+
+  private def isV2Provider(provider: String): Boolean = {
+    DataSource.lookupDataSourceV2(provider, conf) match {
+      // TODO(SPARK-28396): Currently file source v2 can't work with tables.
+      case Some(_: FileDataSourceV2) => false
+      case Some(_) => true
+      case _ => false
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/dynamicpruning/PartitionPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/dynamicpruning/PartitionPruning.scala
index d64e11136e673..28f8f49d2ce44 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/dynamicpruning/PartitionPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/dynamicpruning/PartitionPruning.scala
@@ -86,7 +86,7 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper {
       filteringPlan: LogicalPlan,
       joinKeys: Seq[Expression],
       hasBenefit: Boolean): LogicalPlan = {
-    val reuseEnabled = SQLConf.get.dynamicPartitionPruningReuseBroadcast
+    val reuseEnabled = SQLConf.get.exchangeReuseEnabled
     val index = joinKeys.indexOf(filteringKey)
     if (hasBenefit || reuseEnabled) {
       // insert a DynamicPruning wrapper to identify the subquery during query planning
@@ -96,7 +96,7 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper {
           filteringPlan,
           joinKeys,
           index,
-          !hasBenefit),
+          !hasBenefit || SQLConf.get.dynamicPartitionPruningReuseBroadcastOnly),
         pruningPlan)
     } else {
       // abort dynamic partition pruning
@@ -159,7 +159,7 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper {
     case Not(expr) => isLikelySelective(expr)
     case And(l, r) => isLikelySelective(l) || isLikelySelective(r)
     case Or(l, r) => isLikelySelective(l) && isLikelySelective(r)
-    case Like(_, _) => true
+    case Like(_, _, _) => true
     case _: BinaryComparison => true
     case _: In | _: InSet => true
     case _: StringPredicate => true
@@ -252,7 +252,7 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan match {
     // Do not rewrite subqueries.
-    case _: Subquery => plan
+    case s: Subquery if s.correlated => plan
     case _ if !SQLConf.get.dynamicPartitionPruningEnabled => plan
     case _ => prune(plan)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/dynamicpruning/PlanDynamicPruningFilters.scala
index 031c3b1aa0d50..be00f728aa3ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/dynamicpruning/PlanDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/dynamicpruning/PlanDynamicPruningFilters.scala
@@ -36,9 +36,6 @@ import org.apache.spark.sql.internal.SQLConf
 case class PlanDynamicPruningFilters(sparkSession: SparkSession)
     extends Rule[SparkPlan] with PredicateHelper {
 
-  private def reuseBroadcast: Boolean =
-    SQLConf.get.dynamicPartitionPruningReuseBroadcast && SQLConf.get.exchangeReuseEnabled
-
   /**
    * Identify the shape in which keys of a given plan are broadcasted.
    */
@@ -55,22 +52,24 @@ case class PlanDynamicPruningFilters(sparkSession: SparkSession)
     plan transformAllExpressions {
       case DynamicPruningSubquery(
           value, buildPlan, buildKeys, broadcastKeyIndex, onlyInBroadcast, exprId) =>
-        val qe = new QueryExecution(sparkSession, buildPlan)
+        val sparkPlan = QueryExecution.createSparkPlan(
+          sparkSession, sparkSession.sessionState.planner, buildPlan)
         // Using `sparkPlan` is a little hacky as it is based on the assumption that this rule is
         // the first to be applied (apart from `InsertAdaptiveSparkPlan`).
-        val canReuseExchange = reuseBroadcast && buildKeys.nonEmpty &&
+        val canReuseExchange = SQLConf.get.exchangeReuseEnabled && buildKeys.nonEmpty &&
           plan.find {
             case BroadcastHashJoinExec(_, _, _, BuildLeft, _, left, _) =>
-              left.sameResult(qe.sparkPlan)
+              left.sameResult(sparkPlan)
             case BroadcastHashJoinExec(_, _, _, BuildRight, _, _, right) =>
-              right.sameResult(qe.sparkPlan)
+              right.sameResult(sparkPlan)
             case _ => false
           }.isDefined
 
         if (canReuseExchange) {
           val mode = broadcastMode(buildKeys, buildPlan)
+          val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, sparkPlan)
           // plan a broadcast exchange of the build side of the join
-          val exchange = BroadcastExchangeExec(mode, qe.executedPlan)
+          val exchange = BroadcastExchangeExec(mode, executedPlan)
           val name = s"dynamicpruning#${exprId.id}"
           // place the broadcast adaptor for reusing the broadcast results on the probe side
           val broadcastValues =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AggregatingAccumulator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AggregatingAccumulator.scala
new file mode 100644
index 0000000000000..9807b5dbe9348
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AggregatingAccumulator.scala
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import scala.collection.mutable
+
+import org.apache.spark.TaskContext
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSeq, BindReferences, Expression, InterpretedMutableProjection, InterpretedUnsafeProjection, JoinedRow, MutableProjection, NamedExpression, Projection, SpecificInternalRow}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, DeclarativeAggregate, ImperativeAggregate, NoOp, TypedImperativeAggregate}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.util.AccumulatorV2
+
+/**
+ * Accumulator that computes a global aggregate.
+ */
+class AggregatingAccumulator private(
+    bufferSchema: Seq[DataType],
+    initialValues: Seq[Expression],
+    updateExpressions: Seq[Expression],
+    @transient private val mergeExpressions: Seq[Expression],
+    @transient private val resultExpressions: Seq[Expression],
+    imperatives: Array[ImperativeAggregate],
+    typedImperatives: Array[TypedImperativeAggregate[_]],
+    @transient private val conf: SQLConf)
+  extends AccumulatorV2[InternalRow, InternalRow] {
+  assert(bufferSchema.size == initialValues.size)
+  assert(bufferSchema.size == updateExpressions.size)
+  assert(mergeExpressions == null || bufferSchema.size == mergeExpressions.size)
+
+  @transient
+  private var joinedRow: JoinedRow = _
+
+  private var buffer: SpecificInternalRow = _
+
+  private def createBuffer(): SpecificInternalRow = {
+    val buffer = new SpecificInternalRow(bufferSchema)
+
+    // Initialize the buffer. Note that we do not use a code generated projection here because
+    // generating and compiling a projection is probably more expensive than using an interpreted
+    // projection.
+    InterpretedMutableProjection.createProjection(initialValues)
+      .target(buffer)
+      .apply(InternalRow.empty)
+    imperatives.foreach(_.initialize(buffer))
+    typedImperatives.foreach(_.initialize(buffer))
+    buffer
+  }
+
+  private def getOrCreateBuffer(): SpecificInternalRow = {
+    if (buffer == null) {
+      buffer = createBuffer()
+
+      // Create the joined row and set the buffer as its 'left' row.
+      joinedRow = new JoinedRow()
+      joinedRow.withLeft(buffer)
+    }
+    buffer
+  }
+
+  private def initializeProjection[T <: Projection](projection: T): T = {
+    projection.initialize(TaskContext.getPartitionId())
+    projection
+  }
+
+  @transient
+  private[this] lazy val updateProjection = initializeProjection {
+    MutableProjection.create(updateExpressions)
+  }
+
+  @transient
+  private[this] lazy val mergeProjection = initializeProjection {
+    InterpretedMutableProjection.createProjection(mergeExpressions)
+  }
+
+  @transient
+  private[this] lazy val resultProjection = initializeProjection {
+    InterpretedUnsafeProjection.createProjection(resultExpressions)
+  }
+
+  /**
+   * Driver side operations like `merge` and `value` are executed in the DAGScheduler thread. This
+   * thread does not have a SQL configuration so we attach our own here. Note that we can't (and
+   * shouldn't) call `merge` or `value` on an accumulator originating from an executor so we just
+   * return a default value here.
+   */
+  private[this] def withSQLConf[T](default: => T)(body: => T): T = {
+    if (conf != null) {
+      SQLConf.withExistingConf(conf)(body)
+    } else {
+      default
+    }
+  }
+
+  override def reset(): Unit = {
+    buffer = null
+    joinedRow = null
+  }
+
+  override def isZero: Boolean = buffer == null
+
+  override def copyAndReset(): AggregatingAccumulator = {
+    new AggregatingAccumulator(
+      bufferSchema,
+      initialValues,
+      updateExpressions,
+      mergeExpressions,
+      resultExpressions,
+      imperatives,
+      typedImperatives,
+      conf)
+  }
+
+  override def copy(): AggregatingAccumulator = {
+    val copy = copyAndReset()
+    copy.merge(this)
+    copy
+  }
+
+  override def add(v: InternalRow): Unit = {
+    val buffer = getOrCreateBuffer()
+    updateProjection.target(buffer)(joinedRow.withRight(v))
+    var i = 0
+    while (i < imperatives.length) {
+      imperatives(i).update(buffer, v)
+      i += 1
+    }
+    i = 0
+    while (i < typedImperatives.length) {
+      typedImperatives(i).update(buffer, v)
+      i += 1
+    }
+  }
+
+  override def merge(other: AccumulatorV2[InternalRow, InternalRow]): Unit = withSQLConf(()) {
+    if (!other.isZero) {
+      other match {
+        case agg: AggregatingAccumulator =>
+          val buffer = getOrCreateBuffer()
+          val otherBuffer = agg.buffer
+          mergeProjection.target(buffer)(joinedRow.withRight(otherBuffer))
+          var i = 0
+          while (i < imperatives.length) {
+            imperatives(i).merge(buffer, otherBuffer)
+            i += 1
+          }
+          i = 0
+          while (i < typedImperatives.length) {
+            typedImperatives(i).mergeBuffersObjects(buffer, otherBuffer)
+            i += 1
+          }
+        case _ =>
+          throw new UnsupportedOperationException(
+            s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
+      }
+    }
+  }
+
+  override def value: InternalRow = withSQLConf(InternalRow.empty) {
+    // Either use the existing buffer or create a temporary one.
+    val input = if (!isZero) {
+      buffer
+    } else {
+      // Create a temporary buffer because we want to avoid changing the state of the accumulator
+      // here, which would happen if we called getOrCreateBuffer(). This is relatively expensive to
+      // do but it should be no problem since this method is supposed to be called rarely (once per
+      // query execution).
+      createBuffer()
+    }
+    resultProjection(input)
+  }
+
+  /**
+   * Get the output schema of the aggregating accumulator.
+   */
+  lazy val schema: StructType = {
+    StructType(resultExpressions.zipWithIndex.map {
+      case (e: NamedExpression, _) => StructField(e.name, e.dataType, e.nullable, e.metadata)
+      case (e, i) => StructField(s"c_$i", e.dataType, e.nullable)
+    })
+  }
+
+  /**
+   * Set the state of the accumulator to the state of another accumulator. This is used in cases
+   * where we only want to publish the state of the accumulator when the task completes, see
+   * [[CollectMetricsExec]] for an example.
+   */
+  private[execution] def setState(other: AggregatingAccumulator): Unit = {
+    assert(buffer == null || (buffer eq other.buffer))
+    buffer = other.buffer
+    joinedRow = other.joinedRow
+  }
+}
+
+object AggregatingAccumulator {
+  /**
+   * Create an aggregating accumulator for the given functions and input schema.
+   */
+  def apply(functions: Seq[Expression], inputAttributes: Seq[Attribute]): AggregatingAccumulator = {
+    // There are a couple of things happening here:
+    // - Collect the schema's of the aggregate and input aggregate buffers. These are needed to bind
+    //   the expressions which will be done when we create the accumulator.
+    // - Collect the initialValues, update and merge expressions for declarative aggregate
+    //   functions.
+    // - Bind and Collect the imperative aggregate functions. Note that we insert NoOps into the
+    //   (declarative) initialValues, update and merge expression buffers to keep these aligned with
+    //   the aggregate buffer.
+    // - Build the result expressions.
+    val aggBufferAttributes = mutable.Buffer.empty[AttributeReference]
+    val inputAggBufferAttributes = mutable.Buffer.empty[AttributeReference]
+    val initialValues = mutable.Buffer.empty[Expression]
+    val updateExpressions = mutable.Buffer.empty[Expression]
+    val mergeExpressions = mutable.Buffer.empty[Expression]
+    val imperatives = mutable.Buffer.empty[ImperativeAggregate]
+    val typedImperatives = mutable.Buffer.empty[TypedImperativeAggregate[_]]
+    val inputAttributeSeq: AttributeSeq = inputAttributes
+    val resultExpressions = functions.map(_.transform {
+      case AggregateExpression(agg: DeclarativeAggregate, _, _, _, _) =>
+        aggBufferAttributes ++= agg.aggBufferAttributes
+        inputAggBufferAttributes ++= agg.inputAggBufferAttributes
+        initialValues ++= agg.initialValues
+        updateExpressions ++= agg.updateExpressions
+        mergeExpressions ++= agg.mergeExpressions
+        agg.evaluateExpression
+      case AggregateExpression(agg: ImperativeAggregate, _, _, _, _) =>
+        val imperative = BindReferences.bindReference(agg
+          .withNewMutableAggBufferOffset(aggBufferAttributes.size)
+          .withNewInputAggBufferOffset(inputAggBufferAttributes.size),
+          inputAttributeSeq)
+        imperative match {
+          case typedImperative: TypedImperativeAggregate[_] =>
+            typedImperatives += typedImperative
+          case _ =>
+            imperatives += imperative
+        }
+        aggBufferAttributes ++= imperative.aggBufferAttributes
+        inputAggBufferAttributes ++= agg.inputAggBufferAttributes
+        val noOps = Seq.fill(imperative.aggBufferAttributes.size)(NoOp)
+        initialValues ++= noOps
+        updateExpressions ++= noOps
+        mergeExpressions ++= noOps
+        imperative
+    })
+
+    val updateAttrSeq: AttributeSeq = aggBufferAttributes ++ inputAttributes
+    val mergeAttrSeq: AttributeSeq = aggBufferAttributes ++ inputAggBufferAttributes
+    val aggBufferAttributesSeq: AttributeSeq = aggBufferAttributes
+
+    // Create the accumulator.
+    new AggregatingAccumulator(
+      aggBufferAttributes.map(_.dataType),
+      initialValues,
+      updateExpressions.map(BindReferences.bindReference(_, updateAttrSeq)),
+      mergeExpressions.map(BindReferences.bindReference(_, mergeAttrSeq)),
+      resultExpressions.map(BindReferences.bindReference(_, aggBufferAttributesSeq)),
+      imperatives.toArray,
+      typedImperatives.toArray,
+      SQLConf.get)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputPartitioning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputPartitioning.scala
new file mode 100644
index 0000000000000..2c7faea019322
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputPartitioning.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning}
+
+/**
+ * A trait that handles aliases in the `outputExpressions` to produce `outputPartitioning`
+ * that satisfies output distribution requirements.
+ */
+trait AliasAwareOutputPartitioning extends UnaryExecNode {
+  protected def outputExpressions: Seq[NamedExpression]
+
+  final override def outputPartitioning: Partitioning = {
+    if (hasAlias) {
+      child.outputPartitioning match {
+        case h: HashPartitioning => h.copy(expressions = replaceAliases(h.expressions))
+        case other => other
+      }
+    } else {
+      child.outputPartitioning
+    }
+  }
+
+  private def hasAlias: Boolean = outputExpressions.collectFirst { case _: Alias => }.isDefined
+
+  private def replaceAliases(exprs: Seq[Expression]): Seq[Expression] = {
+    exprs.map {
+      case a: AttributeReference => replaceAlias(a).getOrElse(a)
+      case other => other
+    }
+  }
+
+  private def replaceAlias(attr: AttributeReference): Option[Attribute] = {
+    outputExpressions.collectFirst {
+      case a @ Alias(child: AttributeReference, _) if child.semanticEquals(attr) =>
+        a.toAttribute
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 10dc74dd8a8ff..413bd7b29cf45 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK
 
@@ -80,12 +81,20 @@ class CacheManager extends Logging {
     } else {
       val sparkSession = query.sparkSession
       val qe = sparkSession.sessionState.executePlan(planToCache)
-      val inMemoryRelation = InMemoryRelation(
-        sparkSession.sessionState.conf.useCompression,
-        sparkSession.sessionState.conf.columnBatchSize, storageLevel,
-        qe.executedPlan,
-        tableName,
-        optimizedPlan = qe.optimizedPlan)
+      val originalValue = sparkSession.sessionState.conf.getConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED)
+      val inMemoryRelation = try {
+        // Avoiding changing the output partitioning, here disable AQE.
+        sparkSession.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, false)
+        InMemoryRelation(
+          sparkSession.sessionState.conf.useCompression,
+          sparkSession.sessionState.conf.columnBatchSize, storageLevel,
+          qe.executedPlan,
+          tableName,
+          optimizedPlan = qe.optimizedPlan)
+      } finally {
+        sparkSession.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, originalValue)
+      }
+
       this.synchronized {
         if (lookupCachedData(planToCache).nonEmpty) {
           logWarning("Data has already been cached.")
@@ -261,7 +270,7 @@ class CacheManager extends Logging {
         case _ => false
       }
 
-      case DataSourceV2Relation(fileTable: FileTable, _, _) =>
+      case DataSourceV2Relation(fileTable: FileTable, _, _, _, _) =>
         refreshFileIndexIfNecessary(fileTable.fileIndex, fs, qualifiedPath)
 
       case _ => false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
new file mode 100644
index 0000000000000..e482bc9941ea9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import scala.collection.mutable
+
+import org.apache.spark.TaskContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, NamedExpression, SortOrder}
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Collect arbitrary (named) metrics from a [[SparkPlan]].
+ */
+case class CollectMetricsExec(
+    name: String,
+    metricExpressions: Seq[NamedExpression],
+    child: SparkPlan)
+  extends UnaryExecNode {
+
+  private lazy val accumulator: AggregatingAccumulator = {
+    val acc = AggregatingAccumulator(metricExpressions, child.output)
+    acc.register(sparkContext, Option("Collected metrics"))
+    acc
+  }
+
+  val metricsSchema: StructType = {
+    StructType.fromAttributes(metricExpressions.map(_.toAttribute))
+  }
+
+  // This is not used very frequently (once a query); it is not useful to use code generation here.
+  private lazy val toRowConverter: InternalRow => Row = {
+    CatalystTypeConverters.createToScalaConverter(metricsSchema)
+      .asInstanceOf[InternalRow => Row]
+  }
+
+  def collectedMetrics: Row = toRowConverter(accumulator.value)
+
+  override def output: Seq[Attribute] = child.output
+
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
+  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    val collector = accumulator
+    collector.reset()
+    child.execute().mapPartitions { rows =>
+      // Only publish the value of the accumulator when the task has completed. This is done by
+      // updating a task local accumulator ('updater') which will be merged with the actual
+      // accumulator as soon as the task completes. This avoids the following problems during the
+      // heartbeat:
+      // - Correctness issues due to partially completed/visible updates.
+      // - Performance issues due to excessive serialization.
+      val updater = collector.copyAndReset()
+      TaskContext.get().addTaskCompletionListener[Unit] { _ =>
+        collector.setState(updater)
+      }
+
+      rows.map { r =>
+        updater.add(r)
+        r
+      }
+    }
+  }
+}
+
+object CollectMetricsExec {
+  /**
+   * Recursively collect all collected metrics from a query tree.
+   */
+  def collect(plan: SparkPlan): Map[String, Row] = {
+    val metrics = plan.collectInPlanAndSubqueries {
+      case collector: CollectMetricsExec => collector.name -> collector.collectedMetrics
+    }
+    metrics.toMap
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 9d1636ccf2718..e01cd8598db0c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -323,7 +323,8 @@ private object RowToColumnConverter {
       val c = row.getInterval(column)
       cv.appendStruct(false)
       cv.getChild(0).appendInt(c.months)
-      cv.getChild(1).appendLong(c.microseconds)
+      cv.getChild(1).appendInt(c.days)
+      cv.getChild(2).appendLong(c.microseconds)
     }
   }
 
@@ -454,6 +455,7 @@ case class RowToColumnarExec(child: SparkPlan) extends UnaryExecNode {
 
           override def next(): ColumnarBatch = {
             cb.setNumRows(0)
+            vectors.foreach(_.reset())
             var rowCount = 0
             while (rowCount < numRows && rowIterator.hasNext) {
               val row = rowIterator.next()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 4a87049ac292b..0d759085a7e2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -65,10 +65,26 @@ trait DataSourceScanExec extends LeafExecNode {
       s"$nodeNamePrefix$nodeName${truncatedString(output, "[", ",", "]", maxFields)}$metadataStr")
   }
 
+  override def verboseStringWithOperatorId(): String = {
+    val metadataStr = metadata.toSeq.sorted.filterNot {
+      case (_, value) if (value.isEmpty || value.equals("[]")) => true
+      case (key, _) if (key.equals("DataFilters") || key.equals("Format")) => true
+      case (_, _) => false
+    }.map {
+      case (key, value) => s"$key: ${redact(value)}"
+    }
+
+    s"""
+       |(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
+       |Output: ${producedAttributes.mkString("[", ", ", "]")}
+       |${metadataStr.mkString("\n")}
+     """.stripMargin
+  }
+
   /**
    * Shorthand for calling redactString() without specifying redacting rules
    */
-  private def redact(text: String): String = {
+  protected def redact(text: String): String = {
     Utils.redact(sqlContext.sessionState.conf.stringRedactionPattern, text)
   }
 
@@ -171,7 +187,7 @@ case class FileSourceScanExec(
       partitionSchema = relation.partitionSchema,
       relation.sparkSession.sessionState.conf)
 
-  val driverMetrics: HashMap[String, Long] = HashMap.empty
+  private lazy val driverMetrics: HashMap[String, Long] = HashMap.empty
 
   /**
    * Send the driver-side metrics. Before calling this function, selectedPartitions has
@@ -214,7 +230,7 @@ case class FileSourceScanExec(
       // call the file index for the files matching all filters except dynamic partition filters
       val predicate = dynamicPartitionFilters.reduce(And)
       val partitionColumns = relation.partitionSchema
-      val boundPredicate = newPredicate(predicate.transform {
+      val boundPredicate = Predicate.create(predicate.transform {
         case a: AttributeReference =>
           val index = partitionColumns.indexWhere(a.name == _.name)
           BoundReference(index, partitionColumns(index).dataType, nullable = true)
@@ -309,8 +325,7 @@ case class FileSourceScanExec(
   }
 
   @transient
-  private val pushedDownFilters = dataFilters.flatMap(DataSourceStrategy.translateFilter)
-  logInfo(s"Pushed Filters: ${pushedDownFilters.mkString(",")}")
+  private lazy val pushedDownFilters = dataFilters.flatMap(DataSourceStrategy.translateFilter)
 
   override lazy val metadata: Map[String, String] = {
     def seqToString(seq: Seq[Any]) = seq.mkString("[", ", ", "]")
@@ -342,6 +357,31 @@ case class FileSourceScanExec(
     withSelectedBucketsCount
   }
 
+  override def verboseStringWithOperatorId(): String = {
+    val metadataStr = metadata.toSeq.sorted.filterNot {
+      case (_, value) if (value.isEmpty || value.equals("[]")) => true
+      case (key, _) if (key.equals("DataFilters") || key.equals("Format")) => true
+      case (_, _) => false
+    }.map {
+      case (key, _) if (key.equals("Location")) =>
+        val location = relation.location
+        val numPaths = location.rootPaths.length
+        val abbreviatedLoaction = if (numPaths <= 1) {
+          location.rootPaths.mkString("[", ", ", "]")
+        } else {
+          "[" + location.rootPaths.head + s", ... ${numPaths - 1} entries]"
+        }
+        s"$key: ${location.getClass.getSimpleName} ${redact(abbreviatedLoaction)}"
+      case (key, value) => s"$key: ${redact(value)}"
+    }
+
+    s"""
+       |(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
+       |Output: ${producedAttributes.mkString("[", ", ", "]")}
+       |${metadataStr.mkString("\n")}
+     """.stripMargin
+  }
+
   lazy val inputRDD: RDD[InternalRow] = {
     val readFile: (PartitionedFile) => Iterator[InternalRow] =
       relation.fileFormat.buildReaderWithPartitionValues(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainMode.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainMode.scala
new file mode 100644
index 0000000000000..0ceafe99cdfcf
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainMode.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import java.util.Locale
+
+sealed trait ExplainMode {
+  /**
+   * String name of the explain mode.
+   */
+  def name: String
+}
+
+/**
+ * Simple mode means that when printing explain for a DataFrame, only a physical plan is
+ * expected to be printed to the console.
+ */
+case object SimpleMode extends ExplainMode { val name = "simple" }
+
+/**
+ * Extended mode means that when printing explain for a DataFrame, both logical and physical
+ * plans are expected to be printed to the console.
+ */
+case object ExtendedMode extends ExplainMode { val name = "extended" }
+
+/**
+ * Codegen mode means that when printing explain for a DataFrame, if generated codes are
+ * available, a physical plan and the generated codes are expected to be printed to the console.
+ */
+case object CodegenMode extends ExplainMode { val name = "codegen" }
+
+/**
+ * Cost mode means that when printing explain for a DataFrame, if plan node statistics are
+ * available, a logical plan and the statistics are expected to be printed to the console.
+ */
+case object CostMode extends ExplainMode { val name = "cost" }
+
+/**
+ * Formatted mode means that when printing explain for a DataFrame, explain output is
+ * expected to be split into two sections: a physical plan outline and node details.
+ */
+case object FormattedMode extends ExplainMode { val name = "formatted" }
+
+object ExplainMode {
+  /**
+   * Returns the explain mode from the given string.
+   */
+  def fromString(mode: String): ExplainMode = mode.toLowerCase(Locale.ROOT) match {
+    case SimpleMode.name => SimpleMode
+    case ExtendedMode.name => ExtendedMode
+    case CodegenMode.name => CodegenMode
+    case CostMode.name => CostMode
+    case FormattedMode.name => FormattedMode
+    case _ => throw new IllegalArgumentException(s"Unknown explain mode: $mode. Accepted " +
+      "explain modes are 'simple', 'extended', 'codegen', 'cost', 'formatted'.")
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
index 18a7f9822dcbc..d4fe272f8c95f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
@@ -41,7 +41,7 @@ object ExplainUtils {
    *
    * @param plan Input query plan to process
    * @param append function used to append the explain output
-   * @param startOperationID The start value of operation id. The subsequent operations will
+   * @param startOperatorID The start value of operation id. The subsequent operations will
    *                         be assigned higher value.
    *
    * @return The last generated operation id for this input plan. This is to ensure we
@@ -125,7 +125,7 @@ object ExplainUtils {
    *       appear in the explain output.
    *    2. operator identifier starts at startOperatorID + 1
    * @param plan Input query plan to process
-   * @param startOperationID The start value of operation id. The subsequent operations will
+   * @param startOperatorID The start value of operation id. The subsequent operations will
    *                         be assigned higher value.
    * @param operatorIDs A output parameter that contains a map of operator id and query plan. This
    *                    is used by caller to print the detail portion of the plan.
@@ -193,14 +193,14 @@ object ExplainUtils {
       subqueries: ArrayBuffer[(SparkPlan, Expression, BaseSubqueryExec)]): Unit = {
     plan.foreach {
       case p: SparkPlan =>
-        p.expressions.flatMap(_.collect {
+        p.expressions.foreach (_.collect {
           case e: PlanExpression[_] =>
             e.plan match {
               case s: BaseSubqueryExec =>
                 subqueries += ((p, e, s))
                 getSubqueries(s, subqueries)
+              case _ =>
             }
-          case other =>
         })
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index eec8d70b5adf0..5a2f16d8e1526 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -19,12 +19,14 @@ package org.apache.spark.sql.execution
 
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
+import java.time.{Instant, LocalDate}
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
  * Runs a query returning the result in Hive compatible form.
@@ -53,75 +55,40 @@ object HiveResult {
       // We need the types so we can output struct field names
       val types = executedPlan.output.map(_.dataType)
       // Reformat to match hive tab delimited output.
-      result.map(_.zip(types).map(toHiveString)).map(_.mkString("\t"))
+      result.map(_.zip(types).map(e => toHiveString(e)))
+        .map(_.mkString("\t"))
   }
 
-  private def formatDecimal(d: java.math.BigDecimal): String = {
-    if (d.compareTo(java.math.BigDecimal.ZERO) == 0) {
-      java.math.BigDecimal.ZERO.toPlainString
-    } else {
-      d.stripTrailingZeros().toPlainString // Hive strips trailing zeros
-    }
-  }
-
-  private val primitiveTypes = Seq(
-    StringType,
-    IntegerType,
-    LongType,
-    DoubleType,
-    FloatType,
-    BooleanType,
-    ByteType,
-    ShortType,
-    DateType,
-    TimestampType,
-    BinaryType)
-
-  private lazy val dateFormatter = DateFormatter()
-  private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(
-    DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
-
-  /** Hive outputs fields of structs slightly differently than top level attributes. */
-  private def toHiveStructString(a: (Any, DataType)): String = a match {
-    case (struct: Row, StructType(fields)) =>
-      struct.toSeq.zip(fields).map {
-        case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
-      }.mkString("{", ",", "}")
-    case (seq: Seq[_], ArrayType(typ, _)) =>
-      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-    case (map: Map[_, _], MapType(kType, vType, _)) =>
-      map.map {
-        case (key, value) =>
-          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-      }.toSeq.sorted.mkString("{", ",", "}")
-    case (null, _) => "null"
-    case (s: String, StringType) => "\"" + s + "\""
-    case (decimal, DecimalType()) => decimal.toString
-    case (interval, CalendarIntervalType) => interval.toString
-    case (other, tpe) if primitiveTypes contains tpe => other.toString
-  }
+  private lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
+  private lazy val dateFormatter = DateFormatter(zoneId)
+  private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
 
   /** Formats a datum (based on the given data type) and returns the string representation. */
-  def toHiveString(a: (Any, DataType)): String = a match {
-    case (struct: Row, StructType(fields)) =>
-      struct.toSeq.zip(fields).map {
-        case (v, t) => s""""${t.name}":${toHiveStructString((v, t.dataType))}"""
-      }.mkString("{", ",", "}")
-    case (seq: Seq[_], ArrayType(typ, _)) =>
-      seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]")
-    case (map: Map[_, _], MapType(kType, vType, _)) =>
-      map.map {
-        case (key, value) =>
-          toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType))
-      }.toSeq.sorted.mkString("{", ",", "}")
-    case (null, _) => "NULL"
+  def toHiveString(a: (Any, DataType), nested: Boolean = false): String = a match {
+    case (null, _) => if (nested) "null" else "NULL"
+    case (b, BooleanType) => b.toString
     case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d))
+    case (ld: LocalDate, DateType) =>
+      dateFormatter.format(DateTimeUtils.localDateToDays(ld))
     case (t: Timestamp, TimestampType) =>
-      DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t))
+      timestampFormatter.format(DateTimeUtils.fromJavaTimestamp(t))
+    case (i: Instant, TimestampType) =>
+      timestampFormatter.format(DateTimeUtils.instantToMicros(i))
     case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
-    case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal)
-    case (interval, CalendarIntervalType) => interval.toString
-    case (other, _ : UserDefinedType[_]) => other.toString
-    case (other, tpe) if primitiveTypes.contains(tpe) => other.toString
+    case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString
+    case (n, _: NumericType) => n.toString
+    case (s: String, StringType) => if (nested) "\"" + s + "\"" else s
+    case (interval: CalendarInterval, CalendarIntervalType) => interval.toString
+    case (seq: Seq[_], ArrayType(typ, _)) =>
+      seq.map(v => (v, typ)).map(e => toHiveString(e, true)).mkString("[", ",", "]")
+    case (m: Map[_, _], MapType(kType, vType, _)) =>
+      m.map { case (key, value) =>
+        toHiveString((key, kType), true) + ":" + toHiveString((value, vType), true)
+      }.toSeq.sorted.mkString("{", ",", "}")
+    case (struct: Row, StructType(fields)) =>
+      struct.toSeq.zip(fields).map { case (v, t) =>
+        s""""${t.name}":${toHiveString((v, t.dataType), true)}"""
+      }.mkString("{", ",", "}")
+    case (other, _: UserDefinedType[_]) => other.toString
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
index 9e32ecfdd80a7..b452213cd6cc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
@@ -45,10 +45,14 @@ case class LocalTableScanExec(
     }
   }
 
-  private lazy val numParallelism: Int = math.min(math.max(unsafeRows.length, 1),
-    sqlContext.sparkContext.defaultParallelism)
-
-  private lazy val rdd = sqlContext.sparkContext.parallelize(unsafeRows, numParallelism)
+  @transient private lazy val rdd: RDD[InternalRow] = {
+    if (rows.isEmpty) {
+      sqlContext.sparkContext.emptyRDD
+    } else {
+      val numSlices = math.min(unsafeRows.length, sqlContext.sparkContext.defaultParallelism)
+      sqlContext.sparkContext.parallelize(unsafeRows, numSlices)
+    }
+  }
 
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
@@ -77,6 +81,12 @@ case class LocalTableScanExec(
     taken
   }
 
+  override def executeTail(limit: Int): Array[InternalRow] = {
+    val taken: Seq[InternalRow] = unsafeRows.takeRight(limit)
+    longMetric("numOutputRows").add(taken.size)
+    taken.toArray
+  }
+
   // Input is already UnsafeRows.
   override protected val createUnsafeProjection: Boolean = false
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala
index 3196624f7c7c3..7dece29eb0212 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala
@@ -64,13 +64,14 @@ object PartitionedFileUtil {
       offset: Long,
       length: Long): Array[String] = {
     val candidates = blockLocations.map {
-      // The fragment starts from a position within this block
+      // The fragment starts from a position within this block. It handles the case where the
+      // fragment is fully contained in the block.
       case b if b.getOffset <= offset && offset < b.getOffset + b.getLength =>
         b.getHosts -> (b.getOffset + b.getLength - offset).min(length)
 
       // The fragment ends at a position within this block
-      case b if offset <= b.getOffset && offset + length < b.getLength =>
-        b.getHosts -> (offset + length - b.getOffset).min(length)
+      case b if b.getOffset < offset + length && offset + length < b.getOffset + b.getLength =>
+        b.getHosts -> (offset + length - b.getOffset)
 
       // The fragment fully contains this block
       case b if offset <= b.getOffset && b.getOffset + b.getLength <= offset + length =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index e5e86db29fe61..9109c05e75853 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -18,22 +18,26 @@
 package org.apache.spark.sql.execution
 
 import java.io.{BufferedWriter, OutputStreamWriter}
+import java.util.UUID
 
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
+import org.apache.spark.sql.catalyst.expressions.codegen.ByteCodeStats
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.dynamicpruning.PlanDynamicPruningFilters
-import org.apache.spark.sql.execution.adaptive.InsertAdaptiveSparkPlan
+import org.apache.spark.sql.execution.adaptive.{AdaptiveExecutionContext, InsertAdaptiveSparkPlan}
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange}
+import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.util.Utils
 
 /**
@@ -59,13 +63,12 @@ class QueryExecution(
     }
   }
 
-  lazy val analyzed: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.ANALYSIS) {
-    SparkSession.setActiveSession(sparkSession)
+  lazy val analyzed: LogicalPlan = executePhase(QueryPlanningTracker.ANALYSIS) {
     // We can't clone `logical` here, which will reset the `_analyzed` flag.
     sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker)
   }
 
-  lazy val withCachedData: LogicalPlan = {
+  lazy val withCachedData: LogicalPlan = sparkSession.withActive {
     assertAnalyzed()
     assertSupported()
     // clone the plan to avoid sharing the plan instance between different stages like analyzing,
@@ -73,26 +76,23 @@ class QueryExecution(
     sparkSession.sharedState.cacheManager.useCachedData(analyzed.clone())
   }
 
-  lazy val optimizedPlan: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.OPTIMIZATION) {
+  lazy val optimizedPlan: LogicalPlan = executePhase(QueryPlanningTracker.OPTIMIZATION) {
     // clone the plan to avoid sharing the plan instance between different stages like analyzing,
     // optimizing and planning.
     sparkSession.sessionState.optimizer.executeAndTrack(withCachedData.clone(), tracker)
   }
 
-  lazy val sparkPlan: SparkPlan = tracker.measurePhase(QueryPlanningTracker.PLANNING) {
-    SparkSession.setActiveSession(sparkSession)
-    // TODO: We use next(), i.e. take the first plan returned by the planner, here for now,
-    //       but we will implement to choose the best plan.
+  lazy val sparkPlan: SparkPlan = executePhase(QueryPlanningTracker.PLANNING) {
     // Clone the logical plan here, in case the planner rules change the states of the logical plan.
-    planner.plan(ReturnAnswer(optimizedPlan.clone())).next()
+    QueryExecution.createSparkPlan(sparkSession, planner, optimizedPlan.clone())
   }
 
   // executedPlan should not be used to initialize any SparkPlan. It should be
   // only used for execution.
-  lazy val executedPlan: SparkPlan = tracker.measurePhase(QueryPlanningTracker.PLANNING) {
+  lazy val executedPlan: SparkPlan = executePhase(QueryPlanningTracker.PLANNING) {
     // clone the plan to avoid sharing the plan instance between different stages like analyzing,
     // optimizing and planning.
-    prepareForExecution(sparkPlan.clone())
+    QueryExecution.prepareForExecution(preparations, sparkPlan.clone())
   }
 
   /**
@@ -105,29 +105,19 @@ class QueryExecution(
    * Given QueryExecution is not a public class, end users are discouraged to use this: please
    * use `Dataset.rdd` instead where conversion will be applied.
    */
-  lazy val toRdd: RDD[InternalRow] = executedPlan.execute()
+  lazy val toRdd: RDD[InternalRow] = new SQLExecutionRDD(
+    executedPlan.execute(), sparkSession.sessionState.conf)
 
-  /**
-   * Prepares a planned [[SparkPlan]] for execution by inserting shuffle operations and internal
-   * row format conversions as needed.
-   */
-  protected def prepareForExecution(plan: SparkPlan): SparkPlan = {
-    preparations.foldLeft(plan) { case (sp, rule) => rule.apply(sp) }
+  /** Get the metrics observed during the execution of the query plan. */
+  def observedMetrics: Map[String, Row] = CollectMetricsExec.collect(executedPlan)
+
+  protected def preparations: Seq[Rule[SparkPlan]] = {
+    QueryExecution.preparations(sparkSession)
   }
 
-  /** A sequence of rules that will be applied in order to the physical plan before execution. */
-  protected def preparations: Seq[Rule[SparkPlan]] = Seq(
-    // `AdaptiveSparkPlanExec` is a leaf node. If inserted, all the following rules will be no-op
-    // as the original plan is hidden behind `AdaptiveSparkPlanExec`.
-    InsertAdaptiveSparkPlan(sparkSession, this),
-    PlanDynamicPruningFilters(sparkSession),
-    PlanSubqueries(sparkSession),
-    EnsureRequirements(sparkSession.sessionState.conf),
-    ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.conf,
-      sparkSession.sessionState.columnarRules),
-    CollapseCodegenStages(sparkSession.sessionState.conf),
-    ReuseExchange(sparkSession.sessionState.conf),
-    ReuseSubquery(sparkSession.sessionState.conf))
+  private def executePhase[T](phase: String)(block: => T): T = sparkSession.withActive {
+    tracker.measurePhase(phase)(block)
+  }
 
   def simpleString: String = simpleString(false)
 
@@ -135,7 +125,12 @@ class QueryExecution(
     val concat = new PlanStringConcat()
     concat.append("== Physical Plan ==\n")
     if (formatted) {
-      ExplainUtils.processPlan(executedPlan, concat.append)
+      try {
+        ExplainUtils.processPlan(executedPlan, concat.append)
+      } catch {
+        case e: AnalysisException => concat.append(e.toString)
+        case e: IllegalArgumentException => concat.append(e.toString)
+      }
     } else {
       QueryPlan.append(executedPlan, concat.append, verbose = false, addSuffix = false)
     }
@@ -143,24 +138,54 @@ class QueryExecution(
     concat.toString
   }
 
+  def explainString(mode: ExplainMode): String = {
+    val queryExecution = if (logical.isStreaming) {
+      // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the
+      // output mode does not matter since there is no `Sink`.
+      new IncrementalExecution(
+        sparkSession, logical, OutputMode.Append(), "<unknown>",
+        UUID.randomUUID, UUID.randomUUID, 0, OffsetSeqMetadata(0, 0))
+    } else {
+      this
+    }
+
+    mode match {
+      case SimpleMode =>
+        queryExecution.simpleString
+      case ExtendedMode =>
+        queryExecution.toString
+      case CodegenMode =>
+        try {
+          org.apache.spark.sql.execution.debug.codegenString(queryExecution.executedPlan)
+        } catch {
+          case e: AnalysisException => e.toString
+        }
+      case CostMode =>
+        queryExecution.stringWithStats
+      case FormattedMode =>
+        queryExecution.simpleString(formatted = true)
+    }
+  }
+
   private def writePlans(append: String => Unit, maxFields: Int): Unit = {
     val (verbose, addSuffix) = (true, false)
     append("== Parsed Logical Plan ==\n")
     QueryPlan.append(logical, append, verbose, addSuffix, maxFields)
     append("\n== Analyzed Logical Plan ==\n")
-    val analyzedOutput = try {
-      truncatedString(
-        analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}"), ", ", maxFields)
+    try {
+      append(
+        truncatedString(
+          analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}"), ", ", maxFields)
+      )
+      append("\n")
+      QueryPlan.append(analyzed, append, verbose, addSuffix, maxFields)
+      append("\n== Optimized Logical Plan ==\n")
+      QueryPlan.append(optimizedPlan, append, verbose, addSuffix, maxFields)
+      append("\n== Physical Plan ==\n")
+      QueryPlan.append(executedPlan, append, verbose, addSuffix, maxFields)
     } catch {
-      case e: AnalysisException => e.toString
+      case e: AnalysisException => append(e.toString)
     }
-    append(analyzedOutput)
-    append("\n")
-    QueryPlan.append(analyzed, append, verbose, addSuffix, maxFields)
-    append("\n== Optimized Logical Plan ==\n")
-    QueryPlan.append(optimizedPlan, append, verbose, addSuffix, maxFields)
-    append("\n== Physical Plan ==\n")
-    QueryPlan.append(executedPlan, append, verbose, addSuffix, maxFields)
   }
 
   override def toString: String = withRedaction {
@@ -174,8 +199,11 @@ class QueryExecution(
     val maxFields = SQLConf.get.maxToStringFields
 
     // trigger to compute stats for logical plans
-    optimizedPlan.stats
-
+    try {
+      optimizedPlan.stats
+    } catch {
+      case e: AnalysisException => concat.append(e.toString + "\n")
+    }
     // only show optimized logical plan and physical plan
     concat.append("== Optimized Logical Plan ==\n")
     QueryPlan.append(optimizedPlan, concat.append, verbose = true, addSuffix = true, maxFields)
@@ -212,7 +240,7 @@ class QueryExecution(
      *
      * @return Sequence of WholeStageCodegen subtrees and corresponding codegen
      */
-    def codegenToSeq(): Seq[(String, String)] = {
+    def codegenToSeq(): Seq[(String, String, ByteCodeStats)] = {
       org.apache.spark.sql.execution.debug.codegenStringSeq(executedPlan)
     }
 
@@ -238,3 +266,79 @@ class QueryExecution(
     }
   }
 }
+
+object QueryExecution {
+  /**
+   * Construct a sequence of rules that are used to prepare a planned [[SparkPlan]] for execution.
+   * These rules will make sure subqueries are planned, make use the data partitioning and ordering
+   * are correct, insert whole stage code gen, and try to reduce the work done by reusing exchanges
+   * and subqueries.
+   */
+  private[execution] def preparations(sparkSession: SparkSession): Seq[Rule[SparkPlan]] = {
+
+    val sparkSessionWithAdaptiveExecutionOff =
+    if (sparkSession.sessionState.conf.adaptiveExecutionEnabled) {
+      val session = sparkSession.cloneSession()
+      session.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, false)
+      session
+    } else {
+      sparkSession
+    }
+
+    Seq(
+      // `AdaptiveSparkPlanExec` is a leaf node. If inserted, all the following rules will be no-op
+      // as the original plan is hidden behind `AdaptiveSparkPlanExec`.
+      InsertAdaptiveSparkPlan(AdaptiveExecutionContext(sparkSession)),
+      // If the following rules apply, it means the main query is not AQE-ed, so we make sure the
+      // subqueries are not AQE-ed either.
+      PlanDynamicPruningFilters(sparkSessionWithAdaptiveExecutionOff),
+      PlanSubqueries(sparkSessionWithAdaptiveExecutionOff),
+      EnsureRequirements(sparkSession.sessionState.conf),
+      ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.conf,
+        sparkSession.sessionState.columnarRules),
+      CollapseCodegenStages(sparkSession.sessionState.conf),
+      ReuseExchange(sparkSession.sessionState.conf),
+      ReuseSubquery(sparkSession.sessionState.conf)
+    )
+  }
+
+  /**
+   * Prepares a planned [[SparkPlan]] for execution by inserting shuffle operations and internal
+   * row format conversions as needed.
+   */
+  private[execution] def prepareForExecution(
+      preparations: Seq[Rule[SparkPlan]],
+      plan: SparkPlan): SparkPlan = {
+    preparations.foldLeft(plan) { case (sp, rule) => rule.apply(sp) }
+  }
+
+  /**
+   * Transform a [[LogicalPlan]] into a [[SparkPlan]].
+   *
+   * Note that the returned physical plan still needs to be prepared for execution.
+   */
+  def createSparkPlan(
+      sparkSession: SparkSession,
+      planner: SparkPlanner,
+      plan: LogicalPlan): SparkPlan = {
+    // TODO: We use next(), i.e. take the first plan returned by the planner, here for now,
+    //       but we will implement to choose the best plan.
+    planner.plan(ReturnAnswer(plan)).next()
+  }
+
+  /**
+   * Prepare the [[SparkPlan]] for execution.
+   */
+  def prepareExecutedPlan(spark: SparkSession, plan: SparkPlan): SparkPlan = {
+    prepareForExecution(preparations(spark), plan)
+  }
+
+  /**
+   * Transform the subquery's [[LogicalPlan]] into a [[SparkPlan]] and prepare the resulting
+   * [[SparkPlan]] for execution.
+   */
+  def prepareExecutedPlan(spark: SparkSession, plan: LogicalPlan): SparkPlan = {
+    val sparkPlan = createSparkPlan(spark, spark.sessionState.planner, plan.clone())
+    prepareExecutedPlan(spark, sparkPlan)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index 6046805ae95d4..59c503e372535 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.execution
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicLong
 
+import scala.concurrent.{ExecutionContext, Future}
+
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.SparkSession
@@ -60,9 +62,9 @@ object SQLExecution {
    * we can connect them with an execution.
    */
   def withNewExecutionId[T](
-      sparkSession: SparkSession,
       queryExecution: QueryExecution,
-      name: Option[String] = None)(body: => T): T = {
+      name: Option[String] = None)(body: => T): T = queryExecution.sparkSession.withActive {
+    val sparkSession = queryExecution.sparkSession
     val sc = sparkSession.sparkContext
     val oldExecutionId = sc.getLocalProperty(EXECUTION_ID_KEY)
     val executionId = SQLExecution.nextExecutionId
@@ -164,4 +166,30 @@ object SQLExecution {
       }
     }
   }
+
+  /**
+   * Wrap passed function to ensure necessary thread-local variables like
+   * SparkContext local properties are forwarded to execution thread
+   */
+  def withThreadLocalCaptured[T](
+      sparkSession: SparkSession, exec: ExecutionContext)(body: => T): Future[T] = {
+    val activeSession = sparkSession
+    val sc = sparkSession.sparkContext
+    val localProps = Utils.cloneProperties(sc.getLocalProperties)
+    Future {
+      val originalSession = SparkSession.getActiveSession
+      val originalLocalProps = sc.getLocalProperties
+      SparkSession.setActiveSession(activeSession)
+      sc.setLocalProperties(localProps)
+      val res = body
+      // reset active session and local props.
+      sc.setLocalProperties(originalLocalProps)
+      if (originalSession.nonEmpty) {
+        SparkSession.setActiveSession(originalSession.get)
+      } else {
+        SparkSession.clearActiveSession()
+      }
+      res
+    }(exec)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecutionRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecutionRDD.scala
new file mode 100644
index 0000000000000..45b9cadc4aeda
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecutionRDD.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.spark.{Partition, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * It is just a wrapper over `sqlRDD`, which sets and makes effective all the configs from the
+ * captured `SQLConf`.
+ * Please notice that this means we may miss configurations set after the creation of this RDD and
+ * before its execution.
+ *
+ * @param sqlRDD the `RDD` generated by the SQL plan
+ * @param conf the `SQLConf` to apply to the execution of the SQL plan
+ */
+class SQLExecutionRDD(
+    var sqlRDD: RDD[InternalRow], @transient conf: SQLConf) extends RDD[InternalRow](sqlRDD) {
+  private val sqlConfigs = conf.getAllConfs
+  private lazy val sqlConfExecutorSide = {
+    val newConf = new SQLConf()
+    sqlConfigs.foreach { case (k, v) => newConf.setConfString(k, v) }
+    newConf
+  }
+
+  override val partitioner = firstParent[InternalRow].partitioner
+
+  override def getPartitions: Array[Partition] = firstParent[InternalRow].partitions
+
+  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
+    // If we are in the context of a tracked SQL operation, `SQLExecution.EXECUTION_ID_KEY` is set
+    // and we have nothing to do here. Otherwise, we use the `SQLConf` captured at the creation of
+    // this RDD.
+    if (context.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) == null) {
+      SQLConf.withExistingConf(sqlConfExecutorSide) {
+        firstParent[InternalRow].iterator(split, context)
+      }
+    } else {
+      firstParent[InternalRow].iterator(split, context)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
index 079ff25fcb67e..4c19f95796d04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
@@ -21,8 +21,10 @@ import java.util.Arrays
 
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
+import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLShuffleReadMetricsReporter}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * The [[Partition]] used by [[ShuffledRowRDD]]. A post-shuffle partition
@@ -117,6 +119,11 @@ class ShuffledRowRDD(
     specifiedPartitionStartIndices: Option[Array[Int]] = None)
   extends RDD[InternalRow](dependency.rdd.context, Nil) {
 
+  if (SQLConf.get.fetchShuffleBlocksInBatchEnabled) {
+    dependency.rdd.context.setLocalProperty(
+      SortShuffleManager.FETCH_SHUFFLE_BLOCKS_IN_BATCH_ENABLED_KEY, "true")
+  }
+
   private[this] val numPreShufflePartitions = dependency.partitioner.numPartitions
 
   private[this] val partitionStartIndices: Array[Int] = specifiedPartitionStartIndices match {
@@ -172,7 +179,7 @@ class ShuffledRowRDD(
     reader.read().asInstanceOf[Iterator[Product2[Int, InternalRow]]].map(_._2)
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     dependency = null
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
index 0a955d6a75235..6b6ca531c6d3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.sql.execution.metric.SQLMetrics
 
 /**
@@ -62,8 +62,16 @@ case class SortExec(
     "peakMemory" -> SQLMetrics.createSizeMetric(sparkContext, "peak memory"),
     "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size"))
 
+  private[sql] var rowSorter: UnsafeExternalRowSorter = _
+
+  /**
+   * This method gets invoked only once for each SortExec instance to initialize an
+   * UnsafeExternalRowSorter, both `plan.execute` and code generation are using it.
+   * In the code generation code path, we need to call this function outside the class so we
+   * should make it public.
+   */
   def createSorter(): UnsafeExternalRowSorter = {
-    val ordering = newOrdering(sortOrder, output)
+    val ordering = RowOrdering.create(sortOrder, output)
 
     // The comparator for comparing prefix
     val boundSortExpression = BindReferences.bindReference(sortOrder.head, output)
@@ -87,13 +95,13 @@ case class SortExec(
     }
 
     val pageSize = SparkEnv.get.memoryManager.pageSizeBytes
-    val sorter = UnsafeExternalRowSorter.create(
+    rowSorter = UnsafeExternalRowSorter.create(
       schema, ordering, prefixComparator, prefixComputer, pageSize, canUseRadixSort)
 
     if (testSpillFrequency > 0) {
-      sorter.setTestSpillFrequency(testSpillFrequency)
+      rowSorter.setTestSpillFrequency(testSpillFrequency)
     }
-    sorter
+    rowSorter
   }
 
   protected override def doExecute(): RDD[InternalRow] = {
@@ -181,4 +189,17 @@ case class SortExec(
        |$sorterVariable.insertRow((UnsafeRow)${row.value});
      """.stripMargin
   }
+
+  /**
+   * In SortExec, we overwrites cleanupResources to close UnsafeExternalRowSorter.
+   */
+  override protected[sql] def cleanupResources(): Unit = {
+    if (rowSorter != null) {
+      // There's possible for rowSorter is null here, for example, in the scenario of empty
+      // iterator in the current task, the downstream physical node(like SortMergeJoinExec) will
+      // trigger cleanupResources before rowSorter initialized in createSorter.
+      rowSorter.cleanupResources()
+    }
+    super.cleanupResources()
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 98d6be0374da7..013d94768a2a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -20,20 +20,27 @@ package org.apache.spark.sql.execution
 import org.apache.spark.sql.ExperimentalMethods
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning}
 import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions
 import org.apache.spark.sql.execution.datasources.SchemaPruning
+import org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown
 import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs}
 
 class SparkOptimizer(
+    catalogManager: CatalogManager,
     catalog: SessionCatalog,
     experimentalMethods: ExperimentalMethods)
-  extends Optimizer(catalog) {
+  extends Optimizer(catalogManager) {
+
+  override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] =
+    // TODO: move SchemaPruning into catalyst
+    SchemaPruning :: V2ScanRelationPushDown :: PruneFileSourcePartitions :: Nil
 
   override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+
     Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+
-    Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+
-    Batch("Schema Pruning", Once, SchemaPruning) :+
     Batch("PartitionPruning", Once,
       PartitionPruning,
       OptimizeSubqueries) :+
@@ -62,7 +69,8 @@ class SparkOptimizer(
   override def nonExcludableRules: Seq[String] = super.nonExcludableRules :+
     ExtractPythonUDFFromJoinCondition.ruleName :+
     ExtractPythonUDFFromAggregate.ruleName :+ ExtractGroupingPythonUDFFromAggregate.ruleName :+
-    ExtractPythonUDFs.ruleName
+    ExtractPythonUDFs.ruleName :+
+    V2ScanRelationPushDown.ruleName
 
   /**
    * Optimization batches that are executed before the regular optimization batches (also before
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index b4cdf9e16b7e5..3301e9b5ab180 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -20,11 +20,7 @@ package org.apache.spark.sql.execution
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.collection.mutable.ArrayBuffer
-import scala.concurrent.ExecutionContext
-
-import org.codehaus.commons.compiler.CompileException
-import org.codehaus.janino.InternalCompilerException
+import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 
 import org.apache.spark.{broadcast, SparkEnv}
 import org.apache.spark.internal.Logging
@@ -33,13 +29,11 @@ import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.{Predicate => GenPredicate, _}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.execution.metric.SQLMetric
-import org.apache.spark.sql.types.DataType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 object SparkPlan {
@@ -73,16 +67,6 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
 
   val id: Int = SparkPlan.newPlanId()
 
-  // sqlContext will be null when SparkPlan nodes are created without the active sessions.
-  val subexpressionEliminationEnabled: Boolean = if (sqlContext != null) {
-    sqlContext.conf.subexpressionEliminationEnabled
-  } else {
-    false
-  }
-
-  // whether we should fallback when hitting compilation errors caused by codegen
-  private val codeGenFallBack = (sqlContext == null) || sqlContext.conf.codegenFallback
-
   /**
    * Return true if this stage of the plan supports columnar execution.
    */
@@ -141,6 +125,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    */
   def resetMetrics(): Unit = {
     metrics.valuesIterator.foreach(_.reset())
+    children.foreach(_.resetMetrics())
   }
 
   /**
@@ -325,20 +310,38 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    * UnsafeRow is highly compressible (at least 8 bytes for any column), the byte array is also
    * compressed.
    */
-  private def getByteArrayRdd(n: Int = -1): RDD[(Long, Array[Byte])] = {
+  private def getByteArrayRdd(
+      n: Int = -1, takeFromEnd: Boolean = false): RDD[(Long, Array[Byte])] = {
     execute().mapPartitionsInternal { iter =>
       var count = 0
       val buffer = new Array[Byte](4 << 10)  // 4K
       val codec = CompressionCodec.createCodec(SparkEnv.get.conf)
       val bos = new ByteArrayOutputStream()
       val out = new DataOutputStream(codec.compressedOutputStream(bos))
-      // `iter.hasNext` may produce one row and buffer it, we should only call it when the limit is
-      // not hit.
-      while ((n < 0 || count < n) && iter.hasNext) {
-        val row = iter.next().asInstanceOf[UnsafeRow]
-        out.writeInt(row.getSizeInBytes)
-        row.writeToStream(out, buffer)
-        count += 1
+
+      if (takeFromEnd && n > 0) {
+        // To collect n from the last, we should anyway read everything with keeping the n.
+        // Otherwise, we don't know where is the last from the iterator.
+        var last: Seq[UnsafeRow] = Seq.empty[UnsafeRow]
+        val slidingIter = iter.map(_.copy()).sliding(n)
+        while (slidingIter.hasNext) { last = slidingIter.next().asInstanceOf[Seq[UnsafeRow]] }
+        var i = 0
+        count = last.length
+        while (i < count) {
+          val row = last(i)
+          out.writeInt(row.getSizeInBytes)
+          row.writeToStream(out, buffer)
+          i += 1
+        }
+      } else {
+        // `iter.hasNext` may produce one row and buffer it, we should only call it when the
+        // limit is not hit.
+        while ((n < 0 || count < n) && iter.hasNext) {
+          val row = iter.next().asInstanceOf[UnsafeRow]
+          out.writeInt(row.getSizeInBytes)
+          row.writeToStream(out, buffer)
+          count += 1
+        }
       }
       out.writeInt(-1)
       out.flush()
@@ -413,14 +416,23 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    *
    * This is modeled after `RDD.take` but never runs any job locally on the driver.
    */
-  def executeTake(n: Int): Array[InternalRow] = {
+  def executeTake(n: Int): Array[InternalRow] = executeTake(n, takeFromEnd = false)
+
+  /**
+   * Runs this query returning the last `n` rows as an array.
+   *
+   * This is modeled after `RDD.take` but never runs any job locally on the driver.
+   */
+  def executeTail(n: Int): Array[InternalRow] = executeTake(n, takeFromEnd = true)
+
+  private def executeTake(n: Int, takeFromEnd: Boolean): Array[InternalRow] = {
     if (n == 0) {
       return new Array[InternalRow](0)
     }
 
-    val childRDD = getByteArrayRdd(n)
+    val childRDD = getByteArrayRdd(n, takeFromEnd)
 
-    val buf = new ArrayBuffer[InternalRow]
+    val buf = if (takeFromEnd) new ListBuffer[InternalRow] else new ArrayBuffer[InternalRow]
     val totalParts = childRDD.partitions.length
     var partsScanned = 0
     while (buf.length < n && partsScanned < totalParts) {
@@ -442,70 +454,57 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
         }
       }
 
-      val p = partsScanned.until(math.min(partsScanned + numPartsToTry, totalParts).toInt)
+      val parts = partsScanned.until(math.min(partsScanned + numPartsToTry, totalParts).toInt)
+      val partsToScan = if (takeFromEnd) {
+        // Reverse partitions to scan. So, if parts was [1, 2, 3] in 200 partitions (0 to 199),
+        // it becomes [198, 197, 196].
+        parts.map(p => (totalParts - 1) - p)
+      } else {
+        parts
+      }
       val sc = sqlContext.sparkContext
       val res = sc.runJob(childRDD, (it: Iterator[(Long, Array[Byte])]) =>
-        if (it.hasNext) it.next() else (0L, Array.empty[Byte]), p)
+        if (it.hasNext) it.next() else (0L, Array.emptyByteArray), partsToScan)
 
       var i = 0
-      while (buf.length < n && i < res.length) {
-        val rows = decodeUnsafeRows(res(i)._2)
-        val rowsToTake = if (n - buf.length >= res(i)._1) {
-          rows.toArray
-        } else {
-          rows.take(n - buf.length).toArray
+
+      if (takeFromEnd) {
+        while (buf.length < n && i < res.length) {
+          val rows = decodeUnsafeRows(res(i)._2)
+          if (n - buf.length >= res(i)._1) {
+            buf.prepend(rows.toArray[InternalRow]: _*)
+          } else {
+            val dropUntil = res(i)._1 - (n - buf.length)
+            // Same as Iterator.drop but this only takes a long.
+            var j: Long = 0L
+            while (j < dropUntil) { rows.next(); j += 1L}
+            buf.prepend(rows.toArray[InternalRow]: _*)
+          }
+          i += 1
+        }
+      } else {
+        while (buf.length < n && i < res.length) {
+          val rows = decodeUnsafeRows(res(i)._2)
+          if (n - buf.length >= res(i)._1) {
+            buf ++= rows.toArray[InternalRow]
+          } else {
+            buf ++= rows.take(n - buf.length).toArray[InternalRow]
+          }
+          i += 1
         }
-        buf ++= rowsToTake
-        i += 1
       }
-      partsScanned += p.size
+      partsScanned += partsToScan.size
     }
     buf.toArray
   }
 
-  protected def newMutableProjection(
-      expressions: Seq[Expression],
-      inputSchema: Seq[Attribute],
-      useSubexprElimination: Boolean = false): MutableProjection = {
-    log.debug(s"Creating MutableProj: $expressions, inputSchema: $inputSchema")
-    MutableProjection.create(expressions, inputSchema)
-  }
-
-  private def genInterpretedPredicate(
-      expression: Expression, inputSchema: Seq[Attribute]): InterpretedPredicate = {
-    val str = expression.toString
-    val logMessage = if (str.length > 256) {
-      str.substring(0, 256 - 3) + "..."
-    } else {
-      str
-    }
-    logWarning(s"Codegen disabled for this expression:\n $logMessage")
-    InterpretedPredicate.create(expression, inputSchema)
-  }
-
-  protected def newPredicate(
-      expression: Expression, inputSchema: Seq[Attribute]): GenPredicate = {
-    try {
-      GeneratePredicate.generate(expression, inputSchema)
-    } catch {
-      case _ @ (_: InternalCompilerException | _: CompileException) if codeGenFallBack =>
-        genInterpretedPredicate(expression, inputSchema)
-    }
-  }
-
-  protected def newOrdering(
-      order: Seq[SortOrder], inputSchema: Seq[Attribute]): Ordering[InternalRow] = {
-    GenerateOrdering.generate(order, inputSchema)
-  }
-
   /**
-   * Creates a row ordering for the given schema, in natural ascending order.
+   * Cleans up the resources used by the physical operator (if any). In general, all the resources
+   * should be cleaned up when the task finishes but operators like SortMergeJoinExec and LimitExec
+   * may want eager cleanup to free up tight resources (e.g., memory).
    */
-  protected def newNaturalAscendingOrdering(dataTypes: Seq[DataType]): Ordering[InternalRow] = {
-    val order: Seq[SortOrder] = dataTypes.zipWithIndex.map {
-      case (dt, index) => SortOrder(BoundReference(index, dt, nullable = true), Ascending)
-    }
-    newOrdering(order, Seq.empty)
+  protected[sql] def cleanupResources(): Unit = {
+    children.foreach(_.cleanupResources())
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
index 8c7752c4bb742..5b72ec058e127 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, LocalShuffleReaderExec, QueryStageExec}
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.metric.SQLMetricInfo
 import org.apache.spark.sql.internal.SQLConf
@@ -71,6 +71,7 @@ private[execution] object SparkPlanInfo {
       plan.nodeName,
       plan.simpleString(SQLConf.get.maxToStringFields),
       children.map(fromSparkPlan),
-      metadata, metrics)
+      metadata,
+      metrics)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
index dc7fb7741e7a7..895eeedd86b8b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.SparkContext
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -27,7 +26,7 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy
 import org.apache.spark.sql.internal.SQLConf
 
 class SparkPlanner(
-    val sparkContext: SparkContext,
+    val session: SparkSession,
     val conf: SQLConf,
     val experimentalMethods: ExperimentalMethods)
   extends SparkStrategies {
@@ -39,7 +38,7 @@ class SparkPlanner(
       extraPlanningStrategies ++ (
       LogicalQueryStageStrategy ::
       PythonEvals ::
-      DataSourceV2Strategy ::
+      new DataSourceV2Strategy(session) ::
       FileSourceStrategy ::
       DataSourceStrategy(conf) ::
       SpecialLimits ::
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 83cdc7a978a9a..aa139cb6b0c3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.types.StructType
 /**
  * Concrete parser for Spark SQL statements.
  */
-class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser {
+class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser(conf) {
   val astBuilder = new SparkSqlAstBuilder(conf)
 
   private val substitutor = new VariableSubstitution(conf)
@@ -89,151 +89,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     ResetCommand
   }
 
-  /**
-   * Create an [[AnalyzeTableCommand]] command, or an [[AnalyzePartitionCommand]]
-   * or an [[AnalyzeColumnCommand]] command.
-   * Example SQL for analyzing a table or a set of partitions :
-   * {{{
-   *   ANALYZE TABLE [db_name.]tablename [PARTITION (partcol1[=val1], partcol2[=val2], ...)]
-   *   COMPUTE STATISTICS [NOSCAN];
-   * }}}
-   *
-   * Example SQL for analyzing columns :
-   * {{{
-   *   ANALYZE TABLE [db_name.]tablename COMPUTE STATISTICS FOR COLUMNS column1, column2;
-   * }}}
-   *
-   * Example SQL for analyzing all columns of a table:
-   * {{{
-   *   ANALYZE TABLE [db_name.]tablename COMPUTE STATISTICS FOR ALL COLUMNS;
-   * }}}
-   */
-  override def visitAnalyze(ctx: AnalyzeContext): LogicalPlan = withOrigin(ctx) {
-    def checkPartitionSpec(): Unit = {
-      if (ctx.partitionSpec != null) {
-        logWarning("Partition specification is ignored when collecting column statistics: " +
-          ctx.partitionSpec.getText)
-      }
-    }
-    if (ctx.identifier != null &&
-        ctx.identifier.getText.toLowerCase(Locale.ROOT) != "noscan") {
-      throw new ParseException(s"Expected `NOSCAN` instead of `${ctx.identifier.getText}`", ctx)
-    }
-
-    val table = visitTableIdentifier(ctx.tableIdentifier)
-    if (ctx.ALL() != null) {
-      checkPartitionSpec()
-      AnalyzeColumnCommand(table, None, allColumns = true)
-    } else if (ctx.identifierSeq() == null) {
-      if (ctx.partitionSpec != null) {
-        AnalyzePartitionCommand(table, visitPartitionSpec(ctx.partitionSpec),
-          noscan = ctx.identifier != null)
-      } else {
-        AnalyzeTableCommand(table, noscan = ctx.identifier != null)
-      }
-    } else {
-      checkPartitionSpec()
-      AnalyzeColumnCommand(table,
-        Option(visitIdentifierSeq(ctx.identifierSeq())), allColumns = false)
-    }
-  }
-
-  /**
-   * Create a [[SetDatabaseCommand]] logical plan.
-   */
-  override def visitUse(ctx: UseContext): LogicalPlan = withOrigin(ctx) {
-    SetDatabaseCommand(ctx.db.getText)
-  }
-
-  /**
-   * Create a [[ShowTablesCommand]] logical plan.
-   * Example SQL :
-   * {{{
-   *   SHOW TABLE EXTENDED [(IN|FROM) database_name] LIKE 'identifier_with_wildcards'
-   *   [PARTITION(partition_spec)];
-   * }}}
-   */
-  override def visitShowTable(ctx: ShowTableContext): LogicalPlan = withOrigin(ctx) {
-    val partitionSpec = Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)
-    ShowTablesCommand(
-      Option(ctx.db).map(_.getText),
-      Option(ctx.pattern).map(string),
-      isExtended = true,
-      partitionSpec = partitionSpec)
-  }
-
-  /**
-   * Create a [[ShowDatabasesCommand]] logical plan.
-   * Example SQL:
-   * {{{
-   *   SHOW (DATABASES|SCHEMAS) [LIKE 'identifier_with_wildcards'];
-   * }}}
-   */
-  override def visitShowDatabases(ctx: ShowDatabasesContext): LogicalPlan = withOrigin(ctx) {
-    ShowDatabasesCommand(Option(ctx.pattern).map(string))
-  }
-
-  /**
-   * A command for users to list the properties for a table. If propertyKey is specified, the value
-   * for the propertyKey is returned. If propertyKey is not specified, all the keys and their
-   * corresponding values are returned.
-   * The syntax of using this command in SQL is:
-   * {{{
-   *   SHOW TBLPROPERTIES table_name[('propertyKey')];
-   * }}}
-   */
-  override def visitShowTblProperties(
-      ctx: ShowTblPropertiesContext): LogicalPlan = withOrigin(ctx) {
-    ShowTablePropertiesCommand(
-      visitTableIdentifier(ctx.tableIdentifier),
-      Option(ctx.key).map(visitTablePropertyKey))
-  }
-
-  /**
-   * A command for users to list the column names for a table.
-   * This function creates a [[ShowColumnsCommand]] logical plan.
-   *
-   * The syntax of using this command in SQL is:
-   * {{{
-   *   SHOW COLUMNS (FROM | IN) table_identifier [(FROM | IN) database];
-   * }}}
-   */
-  override def visitShowColumns(ctx: ShowColumnsContext): LogicalPlan = withOrigin(ctx) {
-    ShowColumnsCommand(Option(ctx.db).map(_.getText), visitTableIdentifier(ctx.tableIdentifier))
-  }
-
-  /**
-   * A command for users to list the partition names of a table. If partition spec is specified,
-   * partitions that match the spec are returned. Otherwise an empty result set is returned.
-   *
-   * This function creates a [[ShowPartitionsCommand]] logical plan
-   *
-   * The syntax of using this command in SQL is:
-   * {{{
-   *   SHOW PARTITIONS table_identifier [partition_spec];
-   * }}}
-   */
-  override def visitShowPartitions(ctx: ShowPartitionsContext): LogicalPlan = withOrigin(ctx) {
-    val table = visitTableIdentifier(ctx.tableIdentifier)
-    val partitionKeys = Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)
-    ShowPartitionsCommand(table, partitionKeys)
-  }
-
-  /**
-   * Creates a [[ShowCreateTableCommand]]
-   */
-  override def visitShowCreateTable(ctx: ShowCreateTableContext): LogicalPlan = withOrigin(ctx) {
-    val table = visitTableIdentifier(ctx.tableIdentifier())
-    ShowCreateTableCommand(table)
-  }
-
-  /**
-   * Create a [[RefreshTable]] logical plan.
-   */
-  override def visitRefreshTable(ctx: RefreshTableContext): LogicalPlan = withOrigin(ctx) {
-    RefreshTable(visitTableIdentifier(ctx.tableIdentifier))
-  }
-
   /**
    * Create a [[RefreshResource]] logical plan.
    */
@@ -256,28 +111,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     unquotedPath
   }
 
-  /**
-   * Create a [[CacheTableCommand]] logical plan.
-   */
-  override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) {
-    val query = Option(ctx.query).map(plan)
-    val tableIdent = visitTableIdentifier(ctx.tableIdentifier)
-    if (query.isDefined && tableIdent.database.isDefined) {
-      val database = tableIdent.database.get
-      throw new ParseException(s"It is not allowed to add database prefix `$database` to " +
-        s"the table name in CACHE TABLE AS SELECT", ctx)
-    }
-    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    CacheTableCommand(tableIdent, query, ctx.LAZY != null, options)
-  }
-
-  /**
-   * Create an [[UncacheTableCommand]] logical plan.
-   */
-  override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) {
-    UncacheTableCommand(visitTableIdentifier(ctx.tableIdentifier), ctx.EXISTS != null)
-  }
-
   /**
    * Create a [[ClearCacheCommand]] logical plan.
    */
@@ -303,10 +136,13 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     } else {
       ExplainCommand(
         logicalPlan = statement,
-        extended = ctx.EXTENDED != null,
-        codegen = ctx.CODEGEN != null,
-        cost = ctx.COST != null,
-        formatted = ctx.FORMATTED != null)
+        mode = {
+          if (ctx.EXTENDED != null) ExtendedMode
+          else if (ctx.CODEGEN != null) CodegenMode
+          else if (ctx.COST != null) CostMode
+          else if (ctx.FORMATTED != null) FormattedMode
+          else SimpleMode
+        })
     }
   }
 
@@ -354,22 +190,15 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       if (external) {
         operationNotAllowed("CREATE EXTERNAL TABLE ... USING", ctx)
       }
-
-      checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
-      checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
-      checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
-      checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
-      checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
-      checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
-
       if (ifNotExists) {
         // Unlike CREATE TEMPORARY VIEW USING, CREATE TEMPORARY TABLE USING does not support
         // IF NOT EXISTS. Users are not allowed to replace the existing temp table.
         operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx)
       }
 
-      val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
-      val provider = ctx.tableProvider.qualifiedName.getText
+      val (_, _, _, options, _, _) = visitCreateTableClauses(ctx.createTableClauses())
+      val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse(
+        throw new ParseException("CREATE TEMPORARY TABLE without a provider is not allowed.", ctx))
       val schema = Option(ctx.colTypeList()).map(createSchema)
 
       logWarning(s"CREATE TEMPORARY TABLE ... USING ... is deprecated, please use " +
@@ -390,378 +219,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       userSpecifiedSchema = Option(ctx.colTypeList()).map(createSchema),
       replace = ctx.REPLACE != null,
       global = ctx.GLOBAL != null,
-      provider = ctx.tableProvider.qualifiedName.getText,
+      provider = ctx.tableProvider.multipartIdentifier.getText,
       options = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty))
   }
 
-  /**
-   * Create a [[LoadDataCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename
-   *   [PARTITION (partcol1=val1, partcol2=val2 ...)]
-   * }}}
-   */
-  override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) {
-    LoadDataCommand(
-      table = visitTableIdentifier(ctx.tableIdentifier),
-      path = string(ctx.path),
-      isLocal = ctx.LOCAL != null,
-      isOverwrite = ctx.OVERWRITE != null,
-      partition = Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)
-    )
-  }
-
-  /**
-   * Create a [[TruncateTableCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   TRUNCATE TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
-   * }}}
-   */
-  override def visitTruncateTable(ctx: TruncateTableContext): LogicalPlan = withOrigin(ctx) {
-    TruncateTableCommand(
-      visitTableIdentifier(ctx.tableIdentifier),
-      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))
-  }
-
-  /**
-   * Create a [[AlterTableRecoverPartitionsCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   MSCK REPAIR TABLE tablename
-   * }}}
-   */
-  override def visitRepairTable(ctx: RepairTableContext): LogicalPlan = withOrigin(ctx) {
-    AlterTableRecoverPartitionsCommand(
-      visitTableIdentifier(ctx.tableIdentifier),
-      "MSCK REPAIR TABLE")
-  }
-
-  /**
-   * Create a [[CreateDatabaseCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   CREATE DATABASE [IF NOT EXISTS] database_name
-   *     create_database_clauses;
-   *
-   *   create_database_clauses (order insensitive):
-   *     [COMMENT database_comment]
-   *     [LOCATION path]
-   *     [WITH DBPROPERTIES (key1=val1, key2=val2, ...)]
-   * }}}
-   */
-  override def visitCreateDatabase(ctx: CreateDatabaseContext): LogicalPlan = withOrigin(ctx) {
-    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
-    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
-    checkDuplicateClauses(ctx.DBPROPERTIES, "WITH DBPROPERTIES", ctx)
-
-    CreateDatabaseCommand(
-      ctx.db.getText,
-      ctx.EXISTS != null,
-      ctx.locationSpec.asScala.headOption.map(visitLocationSpec),
-      Option(ctx.comment).map(string),
-      ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues).getOrElse(Map.empty))
-  }
-
-  /**
-   * Create an [[AlterDatabasePropertiesCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   ALTER (DATABASE|SCHEMA) database SET DBPROPERTIES (property_name=property_value, ...);
-   * }}}
-   */
-  override def visitSetDatabaseProperties(
-      ctx: SetDatabasePropertiesContext): LogicalPlan = withOrigin(ctx) {
-    AlterDatabasePropertiesCommand(
-      ctx.db.getText,
-      visitPropertyKeyValues(ctx.tablePropertyList))
-  }
-
-  /**
-   * Create a [[DropDatabaseCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   DROP (DATABASE|SCHEMA) [IF EXISTS] database [RESTRICT|CASCADE];
-   * }}}
-   */
-  override def visitDropDatabase(ctx: DropDatabaseContext): LogicalPlan = withOrigin(ctx) {
-    DropDatabaseCommand(ctx.db.getText, ctx.EXISTS != null, ctx.CASCADE != null)
-  }
-
-  /**
-   * Create a [[DescribeDatabaseCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   DESCRIBE DATABASE [EXTENDED] database;
-   * }}}
-   */
-  override def visitDescribeDatabase(ctx: DescribeDatabaseContext): LogicalPlan = withOrigin(ctx) {
-    DescribeDatabaseCommand(ctx.db.getText, ctx.EXTENDED != null)
-  }
-
-  /**
-   * Create a plan for a DESCRIBE FUNCTION command.
-   */
-  override def visitDescribeFunction(ctx: DescribeFunctionContext): LogicalPlan = withOrigin(ctx) {
-    import ctx._
-    val functionName =
-      if (describeFuncName.STRING() != null) {
-        FunctionIdentifier(string(describeFuncName.STRING()), database = None)
-      } else if (describeFuncName.qualifiedName() != null) {
-        visitFunctionName(describeFuncName.qualifiedName)
-      } else {
-        FunctionIdentifier(describeFuncName.getText, database = None)
-      }
-    DescribeFunctionCommand(functionName, EXTENDED != null)
-  }
-
-  /**
-   * Create a plan for a SHOW FUNCTIONS command.
-   */
-  override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = withOrigin(ctx) {
-    import ctx._
-    val (user, system) = Option(ctx.identifier).map(_.getText.toLowerCase(Locale.ROOT)) match {
-      case None | Some("all") => (true, true)
-      case Some("system") => (false, true)
-      case Some("user") => (true, false)
-      case Some(x) => throw new ParseException(s"SHOW $x FUNCTIONS not supported", ctx)
-    }
-
-    val (db, pat) = if (qualifiedName != null) {
-      val name = visitFunctionName(qualifiedName)
-      (name.database, Some(name.funcName))
-    } else if (pattern != null) {
-      (None, Some(string(pattern)))
-    } else {
-      (None, None)
-    }
-
-    ShowFunctionsCommand(db, pat, user, system)
-  }
-
-  /**
-   * Create a [[CreateFunctionCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   CREATE [OR REPLACE] [TEMPORARY] FUNCTION [IF NOT EXISTS] [db_name.]function_name
-   *   AS class_name [USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri']];
-   * }}}
-   */
-  override def visitCreateFunction(ctx: CreateFunctionContext): LogicalPlan = withOrigin(ctx) {
-    val resources = ctx.resource.asScala.map { resource =>
-      val resourceType = resource.identifier.getText.toLowerCase(Locale.ROOT)
-      resourceType match {
-        case "jar" | "file" | "archive" =>
-          FunctionResource(FunctionResourceType.fromString(resourceType), string(resource.STRING))
-        case other =>
-          operationNotAllowed(s"CREATE FUNCTION with resource type '$resourceType'", ctx)
-      }
-    }
-
-    // Extract database, name & alias.
-    val functionIdentifier = visitFunctionName(ctx.qualifiedName)
-    CreateFunctionCommand(
-      functionIdentifier.database,
-      functionIdentifier.funcName,
-      string(ctx.className),
-      resources,
-      ctx.TEMPORARY != null,
-      ctx.EXISTS != null,
-      ctx.REPLACE != null)
-  }
-
-  /**
-   * Create a [[DropFunctionCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   DROP [TEMPORARY] FUNCTION [IF EXISTS] function;
-   * }}}
-   */
-  override def visitDropFunction(ctx: DropFunctionContext): LogicalPlan = withOrigin(ctx) {
-    val functionIdentifier = visitFunctionName(ctx.qualifiedName)
-    DropFunctionCommand(
-      functionIdentifier.database,
-      functionIdentifier.funcName,
-      ctx.EXISTS != null,
-      ctx.TEMPORARY != null)
-  }
-
-  /**
-   * Create a [[AlterTableRenameCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   ALTER TABLE table1 RENAME TO table2;
-   *   ALTER VIEW view1 RENAME TO view2;
-   * }}}
-   */
-  override def visitRenameTable(ctx: RenameTableContext): LogicalPlan = withOrigin(ctx) {
-    AlterTableRenameCommand(
-      visitTableIdentifier(ctx.from),
-      visitTableIdentifier(ctx.to),
-      ctx.VIEW != null)
-  }
-
-  /**
-   * Create an [[AlterTableSerDePropertiesCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   ALTER TABLE table [PARTITION spec] SET SERDE serde_name [WITH SERDEPROPERTIES props];
-   *   ALTER TABLE table [PARTITION spec] SET SERDEPROPERTIES serde_properties;
-   * }}}
-   */
-  override def visitSetTableSerDe(ctx: SetTableSerDeContext): LogicalPlan = withOrigin(ctx) {
-    AlterTableSerDePropertiesCommand(
-      visitTableIdentifier(ctx.tableIdentifier),
-      Option(ctx.STRING).map(string),
-      Option(ctx.tablePropertyList).map(visitPropertyKeyValues),
-      // TODO a partition spec is allowed to have optional values. This is currently violated.
-      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))
-  }
-
-  /**
-   * Create an [[AlterTableAddPartitionCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   ALTER TABLE table ADD [IF NOT EXISTS] PARTITION spec [LOCATION 'loc1']
-   *   ALTER VIEW view ADD [IF NOT EXISTS] PARTITION spec
-   * }}}
-   *
-   * ALTER VIEW ... ADD PARTITION ... is not supported because the concept of partitioning
-   * is associated with physical tables
-   */
-  override def visitAddTablePartition(
-      ctx: AddTablePartitionContext): LogicalPlan = withOrigin(ctx) {
-    if (ctx.VIEW != null) {
-      operationNotAllowed("ALTER VIEW ... ADD PARTITION", ctx)
-    }
-    // Create partition spec to location mapping.
-    val specsAndLocs = if (ctx.partitionSpec.isEmpty) {
-      ctx.partitionSpecLocation.asScala.map {
-        splCtx =>
-          val spec = visitNonOptionalPartitionSpec(splCtx.partitionSpec)
-          val location = Option(splCtx.locationSpec).map(visitLocationSpec)
-          spec -> location
-      }
-    } else {
-      // Alter View: the location clauses are not allowed.
-      ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec(_) -> None)
-    }
-    AlterTableAddPartitionCommand(
-      visitTableIdentifier(ctx.tableIdentifier),
-      specsAndLocs,
-      ctx.EXISTS != null)
-  }
-
-  /**
-   * Create an [[AlterTableRenamePartitionCommand]] command
-   *
-   * For example:
-   * {{{
-   *   ALTER TABLE table PARTITION spec1 RENAME TO PARTITION spec2;
-   * }}}
-   */
-  override def visitRenameTablePartition(
-      ctx: RenameTablePartitionContext): LogicalPlan = withOrigin(ctx) {
-    AlterTableRenamePartitionCommand(
-      visitTableIdentifier(ctx.tableIdentifier),
-      visitNonOptionalPartitionSpec(ctx.from),
-      visitNonOptionalPartitionSpec(ctx.to))
-  }
-
-  /**
-   * Create an [[AlterTableDropPartitionCommand]] command
-   *
-   * For example:
-   * {{{
-   *   ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...] [PURGE];
-   *   ALTER VIEW view DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...];
-   * }}}
-   *
-   * ALTER VIEW ... DROP PARTITION ... is not supported because the concept of partitioning
-   * is associated with physical tables
-   */
-  override def visitDropTablePartitions(
-      ctx: DropTablePartitionsContext): LogicalPlan = withOrigin(ctx) {
-    if (ctx.VIEW != null) {
-      operationNotAllowed("ALTER VIEW ... DROP PARTITION", ctx)
-    }
-    AlterTableDropPartitionCommand(
-      visitTableIdentifier(ctx.tableIdentifier),
-      ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec),
-      ifExists = ctx.EXISTS != null,
-      purge = ctx.PURGE != null,
-      retainData = false)
-  }
-
-  /**
-   * Create an [[AlterTableRecoverPartitionsCommand]] command
-   *
-   * For example:
-   * {{{
-   *   ALTER TABLE table RECOVER PARTITIONS;
-   * }}}
-   */
-  override def visitRecoverPartitions(
-      ctx: RecoverPartitionsContext): LogicalPlan = withOrigin(ctx) {
-    AlterTableRecoverPartitionsCommand(visitTableIdentifier(ctx.tableIdentifier))
-  }
-
-  /**
-   * Create an [[AlterTableSetLocationCommand]] command for a partition.
-   *
-   * For example:
-   * {{{
-   *   ALTER TABLE table PARTITION spec SET LOCATION "loc";
-   * }}}
-   */
-  override def visitSetPartitionLocation(
-      ctx: SetPartitionLocationContext): LogicalPlan = withOrigin(ctx) {
-    AlterTableSetLocationCommand(
-      visitTableIdentifier(ctx.tableIdentifier),
-      Some(visitNonOptionalPartitionSpec(ctx.partitionSpec)),
-      visitLocationSpec(ctx.locationSpec))
-  }
-
-  /**
-   * Create a [[AlterTableChangeColumnCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   ALTER TABLE table [PARTITION partition_spec]
-   *   CHANGE [COLUMN] column_old_name column_new_name column_dataType [COMMENT column_comment]
-   *   [FIRST | AFTER column_name];
-   * }}}
-   */
-  override def visitChangeColumn(ctx: ChangeColumnContext): LogicalPlan = withOrigin(ctx) {
-    if (ctx.partitionSpec != null) {
-      operationNotAllowed("ALTER TABLE table PARTITION partition_spec CHANGE COLUMN", ctx)
-    }
-
-    if (ctx.colPosition != null) {
-      operationNotAllowed(
-        "ALTER TABLE table [PARTITION partition_spec] CHANGE COLUMN ... FIRST | AFTER otherCol",
-        ctx)
-    }
-
-    AlterTableChangeColumnCommand(
-      tableName = visitTableIdentifier(ctx.tableIdentifier),
-      columnName = ctx.colName.getText,
-      newColumn = visitColType(ctx.colType))
-  }
-
   /**
    * Convert a nested constants list into a sequence of string sequences.
    */
@@ -801,9 +262,14 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    *   ADD (FILE[S] <filepath ...> | JAR[S] <jarpath ...>)
    *   LIST (FILE[S] [filepath ...] | JAR[S] [jarpath ...])
    * }}}
+   *
+   * Note that filepath/jarpath can be given as follows;
+   *  - /path/to/fileOrJar
+   *  - "/path/to/fileOrJar"
+   *  - '/path/to/fileOrJar'
    */
   override def visitManageResource(ctx: ManageResourceContext): LogicalPlan = withOrigin(ctx) {
-    val mayebePaths = remainder(ctx.identifier).trim
+    val mayebePaths = if (ctx.STRING != null) string(ctx.STRING) else remainder(ctx.identifier).trim
     ctx.op.getType match {
       case SqlBaseParser.ADD =>
         ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
@@ -870,7 +336,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
 
     checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
     checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
-    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
+    checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
     checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
     checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx)
     checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx)
@@ -893,7 +359,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       .getOrElse(CatalogStorageFormat.empty)
     val rowStorage = ctx.rowFormat.asScala.headOption.map(visitRowFormat)
       .getOrElse(CatalogStorageFormat.empty)
-    val location = ctx.locationSpec.asScala.headOption.map(visitLocationSpec)
+    val location = visitLocationSpecList(ctx.locationSpec())
     // If we are creating an EXTERNAL table, then the LOCATION field is required
     if (external && location.isEmpty) {
       operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION", ctx)
@@ -927,7 +393,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       provider = Some(DDLUtils.HIVE_PROVIDER),
       partitionColumnNames = partitionCols.map(_.name),
       properties = properties,
-      comment = Option(ctx.comment).map(string))
+      comment = visitCommentSpecList(ctx.commentSpec()))
 
     val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
 
@@ -986,14 +452,50 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
    * For example:
    * {{{
    *   CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
-   *   LIKE [other_db_name.]existing_table_name [locationSpec]
+   *   LIKE [other_db_name.]existing_table_name
+   *   [USING provider |
+   *    [
+   *     [ROW FORMAT row_format]
+   *     [STORED AS file_format] [WITH SERDEPROPERTIES (...)]
+   *    ]
+   *   ]
+   *   [locationSpec]
+   *   [TBLPROPERTIES (property_name=property_value, ...)]
    * }}}
    */
   override def visitCreateTableLike(ctx: CreateTableLikeContext): LogicalPlan = withOrigin(ctx) {
     val targetTable = visitTableIdentifier(ctx.target)
     val sourceTable = visitTableIdentifier(ctx.source)
-    val location = Option(ctx.locationSpec).map(visitLocationSpec)
-    CreateTableLikeCommand(targetTable, sourceTable, location, ctx.EXISTS != null)
+    checkDuplicateClauses(ctx.tableProvider, "PROVIDER", ctx)
+    checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx)
+    checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx)
+    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
+    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
+    val provider = ctx.tableProvider.asScala.headOption.map(_.multipartIdentifier.getText)
+    val location = visitLocationSpecList(ctx.locationSpec())
+    // rowStorage used to determine CatalogStorageFormat.serde and
+    // CatalogStorageFormat.properties in STORED AS clause.
+    val rowStorage = ctx.rowFormat.asScala.headOption.map(visitRowFormat)
+      .getOrElse(CatalogStorageFormat.empty)
+    val fileFormat = ctx.createFileFormat.asScala.headOption.map(visitCreateFileFormat) match {
+      case Some(f) =>
+        if (provider.isDefined) {
+          throw new ParseException("'STORED AS hiveFormats' and 'USING provider' " +
+            "should not be specified both", ctx)
+        }
+        f.copy(
+          locationUri = location.map(CatalogUtils.stringToURI),
+          serde = rowStorage.serde.orElse(f.serde),
+          properties = rowStorage.properties ++ f.properties)
+      case None =>
+        if (rowStorage.serde.isDefined) {
+          throw new ParseException("'ROW FORMAT' must be used with 'STORED AS'", ctx)
+        }
+        CatalogStorageFormat.empty.copy(locationUri = location.map(CatalogUtils.stringToURI))
+    }
+    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    CreateTableLikeCommand(
+      targetTable, sourceTable, fileFormat, provider, properties, ctx.EXISTS != null)
   }
 
   /**
@@ -1160,73 +662,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     }
   }
 
-  /**
-   * Create or replace a view. This creates a [[CreateViewCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [IF NOT EXISTS] [db_name.]view_name
-   *   [(column_name [COMMENT column_comment], ...) ]
-   *   create_view_clauses
-   *
-   *   AS SELECT ...;
-   *
-   *   create_view_clauses (order insensitive):
-   *     [COMMENT view_comment]
-   *     [TBLPROPERTIES (property_name = property_value, ...)]
-   * }}}
-   */
-  override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) {
-    if (!ctx.identifierList.isEmpty) {
-      operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
-    }
-
-    checkDuplicateClauses(ctx.COMMENT, "COMMENT", ctx)
-    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED ON", ctx)
-    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
-
-    val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
-      icl.identifierComment.asScala.map { ic =>
-        ic.identifier.getText -> Option(ic.STRING).map(string)
-      }
-    }
-
-    val viewType = if (ctx.TEMPORARY == null) {
-      PersistedView
-    } else if (ctx.GLOBAL != null) {
-      GlobalTempView
-    } else {
-      LocalTempView
-    }
-
-    CreateViewCommand(
-      name = visitTableIdentifier(ctx.tableIdentifier),
-      userSpecifiedColumns = userSpecifiedColumns,
-      comment = ctx.STRING.asScala.headOption.map(string),
-      properties = ctx.tablePropertyList.asScala.headOption.map(visitPropertyKeyValues)
-        .getOrElse(Map.empty),
-      originalText = Option(source(ctx.query)),
-      child = plan(ctx.query),
-      allowExisting = ctx.EXISTS != null,
-      replace = ctx.REPLACE != null,
-      viewType = viewType)
-  }
-
-  /**
-   * Alter the query of a view. This creates a [[AlterViewAsCommand]] command.
-   *
-   * For example:
-   * {{{
-   *   ALTER VIEW [db_name.]view_name AS SELECT ...;
-   * }}}
-   */
-  override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) {
-    AlterViewAsCommand(
-      name = visitTableIdentifier(ctx.tableIdentifier),
-      originalText = source(ctx.query),
-      query = plan(ctx.query))
-  }
-
   /**
    * Create a [[ScriptInputOutputSchema]].
    */
@@ -1346,7 +781,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       storage = storage.copy(locationUri = customLocation)
     }
 
-    val provider = ctx.tableProvider.qualifiedName.getText
+    val provider = ctx.tableProvider.multipartIdentifier.getText
 
     (false, storage, Some(provider))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 08b00184ef9d7..bd2684d92a1d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
-import org.apache.spark.sql.execution.adaptive.LogicalQueryStage
+import org.apache.spark.sql.execution.aggregate.AggUtils
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
@@ -89,6 +89,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           TakeOrderedAndProjectExec(limit, order, projectList, planLater(child)) :: Nil
         case Limit(IntegerLiteral(limit), child) =>
           CollectLimitExec(limit, planLater(child)) :: Nil
+        case Tail(IntegerLiteral(limit), child) =>
+          CollectTailExec(limit, planLater(child)) :: Nil
         case other => planLater(other) :: Nil
       }
       case Limit(IntegerLiteral(limit), Sort(order, true, child))
@@ -291,7 +293,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         }
 
         def createJoinWithoutHint() = {
-          createBroadcastHashJoin(canBroadcast(left), canBroadcast(right))
+          createBroadcastHashJoin(
+            canBroadcast(left) && !hint.leftHint.exists(_.strategy.contains(NO_BROADCAST_HASH)),
+            canBroadcast(right) && !hint.rightHint.exists(_.strategy.contains(NO_BROADCAST_HASH)))
             .orElse {
               if (!conf.preferSortMergeJoin) {
                 createShuffleHashJoin(
@@ -420,7 +424,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           }
         }
 
-        aggregate.AggUtils.planStreamingAggregation(
+        AggUtils.planStreamingAggregation(
           normalizedGroupingExpressions,
           aggregateExpressions.map(expr => expr.asInstanceOf[AggregateExpression]),
           rewrittenResultExpressions,
@@ -447,21 +451,35 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
    * Used to plan the streaming global limit operator for streams in append mode.
    * We need to check for either a direct Limit or a Limit wrapped in a ReturnAnswer operator,
    * following the example of the SpecialLimits Strategy above.
-   * Streams with limit in Append mode use the stateful StreamingGlobalLimitExec.
-   * Streams with limit in Complete mode use the stateless CollectLimitExec operator.
-   * Limit is unsupported for streams in Update mode.
    */
   case class StreamingGlobalLimitStrategy(outputMode: OutputMode) extends Strategy {
-    override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case ReturnAnswer(rootPlan) => rootPlan match {
-        case Limit(IntegerLiteral(limit), child)
-            if plan.isStreaming && outputMode == InternalOutputModes.Append =>
-          StreamingGlobalLimitExec(limit, LocalLimitExec(limit, planLater(child))) :: Nil
-        case _ => Nil
+
+    private def generatesStreamingAppends(plan: LogicalPlan): Boolean = {
+
+      /** Ensures that this plan does not have a streaming aggregate in it. */
+      def hasNoStreamingAgg: Boolean = {
+        plan.collectFirst { case a: Aggregate if a.isStreaming => a }.isEmpty
       }
-      case Limit(IntegerLiteral(limit), child)
-          if plan.isStreaming && outputMode == InternalOutputModes.Append =>
-        StreamingGlobalLimitExec(limit, LocalLimitExec(limit, planLater(child))) :: Nil
+
+      // The following cases of limits on a streaming plan has to be executed with a stateful
+      // streaming plan.
+      // 1. When the query is in append mode (that is, all logical plan operate on appended data).
+      // 2. When the plan does not contain any streaming aggregate (that is, plan has only
+      //    operators that operate on appended data). This must be executed with a stateful
+      //    streaming plan even if the query is in complete mode because of a later streaming
+      //    aggregation (e.g., `streamingDf.limit(5).groupBy().count()`).
+      plan.isStreaming && (
+        outputMode == InternalOutputModes.Append ||
+        outputMode == InternalOutputModes.Complete && hasNoStreamingAgg)
+    }
+
+    override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case ReturnAnswer(Limit(IntegerLiteral(limit), child)) if generatesStreamingAppends(child) =>
+        StreamingGlobalLimitExec(limit, StreamingLocalLimitExec(limit, planLater(child))) :: Nil
+
+      case Limit(IntegerLiteral(limit), child) if generatesStreamingAppends(child) =>
+        StreamingGlobalLimitExec(limit, StreamingLocalLimitExec(limit, planLater(child))) :: Nil
+
       case _ => Nil
     }
   }
@@ -472,8 +490,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right, _)
           if left.isStreaming && right.isStreaming =>
 
-          new StreamingSymmetricHashJoinExec(
-            leftKeys, rightKeys, joinType, condition, planLater(left), planLater(right)) :: Nil
+          val stateVersion = conf.getConf(SQLConf.STREAMING_JOIN_STATE_FORMAT_VERSION)
+          new StreamingSymmetricHashJoinExec(leftKeys, rightKeys, joinType, condition,
+            stateVersion, planLater(left), planLater(right)) :: Nil
 
         case Join(left, right, _, _, _) if left.isStreaming && right.isStreaming =>
           throw new AnalysisException(
@@ -514,13 +533,13 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
         val aggregateOperator =
           if (functionsWithDistinct.isEmpty) {
-            aggregate.AggUtils.planAggregateWithoutDistinct(
+            AggUtils.planAggregateWithoutDistinct(
               normalizedGroupingExpressions,
               aggregateExpressions,
               resultExpressions,
               planLater(child))
           } else {
-            aggregate.AggUtils.planAggregateWithOneDistinct(
+            AggUtils.planAggregateWithOneDistinct(
               normalizedGroupingExpressions,
               functionsWithDistinct,
               functionsWithoutDistinct,
@@ -565,7 +584,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     }
   }
 
-  protected lazy val singleRowRdd = sparkContext.parallelize(Seq(InternalRow()), 1)
+  protected lazy val singleRowRdd = session.sparkContext.parallelize(Seq(InternalRow()), 1)
 
   object InMemoryScans extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
@@ -680,6 +699,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           f, p, b, is, ot, planLater(child)) :: Nil
       case logical.FlatMapGroupsInPandas(grouping, func, output, child) =>
         execution.python.FlatMapGroupsInPandasExec(grouping, func, output, planLater(child)) :: Nil
+      case logical.FlatMapCoGroupsInPandas(leftGroup, rightGroup, func, output, left, right) =>
+        execution.python.FlatMapCoGroupsInPandasExec(
+          leftGroup, rightGroup, func, output, planLater(left), planLater(right)) :: Nil
       case logical.MapInPandas(func, output, child) =>
         execution.python.MapInPandasExec(func, output, planLater(child)) :: Nil
       case logical.MapElements(f, _, _, objAttr, child) =>
@@ -740,6 +762,12 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case ExternalRDD(outputObjAttr, rdd) => ExternalRDDScanExec(outputObjAttr, rdd) :: Nil
       case r: LogicalRDD =>
         RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering) :: Nil
+      case _: UpdateTable =>
+        throw new UnsupportedOperationException(s"UPDATE TABLE is not supported temporarily.")
+      case _: MergeIntoTable =>
+        throw new UnsupportedOperationException(s"MERGE INTO TABLE is not supported temporarily.")
+      case logical.CollectMetrics(name, metrics, child) =>
+        execution.CollectMetricsExec(name, metrics, planLater(child)) :: Nil
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index ce9a6ea319d5f..10fe0f252322f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -54,6 +54,7 @@ trait CodegenSupport extends SparkPlan {
     case _: RDDScanExec => "rdd"
     case _: DataSourceScanExec => "scan"
     case _: InMemoryTableScanExec => "memoryScan"
+    case _: WholeStageCodegenExec => "wholestagecodegen"
     case _ => nodeName.toLowerCase(Locale.ROOT)
   }
 
@@ -613,6 +614,8 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
     "pipelineTime" -> SQLMetrics.createTimingMetric(sparkContext,
       WholeStageCodegenExec.PIPELINE_DURATION_METRIC))
 
+  override def nodeName: String = s"WholeStageCodegen (${codegenStageId})"
+
   def generatedClassName(): String = if (conf.wholeStageUseIdInClassName) {
     s"GeneratedIteratorForCodegenStage$codegenStageId"
   } else {
@@ -688,7 +691,7 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
   override def doExecute(): RDD[InternalRow] = {
     val (ctx, cleanedSource) = doCodeGen()
     // try to compile and fallback if it failed
-    val (_, maxCodeSize) = try {
+    val (_, compiledCodeStats) = try {
       CodeGenerator.compile(cleanedSource)
     } catch {
       case NonFatal(_) if !Utils.isTesting && sqlContext.conf.codegenFallback =>
@@ -698,9 +701,9 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
     }
 
     // Check if compiled code has a too large function
-    if (maxCodeSize > sqlContext.conf.hugeMethodLimit) {
+    if (compiledCodeStats.maxMethodCodeSize > sqlContext.conf.hugeMethodLimit) {
       logInfo(s"Found too long generated codes and JIT optimization might not work: " +
-        s"the bytecode size ($maxCodeSize) is above the limit " +
+        s"the bytecode size (${compiledCodeStats.maxMethodCodeSize}) is above the limit " +
         s"${sqlContext.conf.hugeMethodLimit}, and the whole-stage codegen was disabled " +
         s"for this plan (id=$codegenStageId). To avoid this, you can raise the limit " +
         s"`${SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.key}`:\n$treeString")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 5d92ddad887bf..3f20b59361988 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -31,15 +31,14 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
 import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._
-import org.apache.spark.sql.execution.adaptive.rule.ReduceNumShufflePartitions
 import org.apache.spark.sql.execution.exchange._
-import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SQLPlanMetric}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.ThreadUtils
 
@@ -61,11 +60,9 @@ import org.apache.spark.util.ThreadUtils
  */
 case class AdaptiveSparkPlanExec(
     initialPlan: SparkPlan,
-    @transient session: SparkSession,
+    @transient context: AdaptiveExecutionContext,
     @transient preprocessingRules: Seq[Rule[SparkPlan]],
-    @transient subqueryCache: TrieMap[SparkPlan, BaseSubqueryExec],
-    @transient stageCache: TrieMap[SparkPlan, QueryStageExec],
-    @transient queryExecution: QueryExecution)
+    @transient isSubquery: Boolean)
   extends LeafExecNode {
 
   @transient private val lock = new Object()
@@ -73,7 +70,9 @@ case class AdaptiveSparkPlanExec(
   // The logical plan optimizer for re-optimizing the current logical plan.
   @transient private val optimizer = new RuleExecutor[LogicalPlan] {
     // TODO add more optimization rules
-    override protected def batches: Seq[Batch] = Seq()
+    override protected def batches: Seq[Batch] = Seq(
+      Batch("Demote BroadcastHashJoin", Once, DemoteBroadcastHashJoin(conf))
+    )
   }
 
   @transient private val ensureRequirements = EnsureRequirements(conf)
@@ -88,10 +87,16 @@ case class AdaptiveSparkPlanExec(
   // A list of physical optimizer rules to be applied to a new stage before its execution. These
   // optimizations should be stage-independent.
   @transient private val queryStageOptimizerRules: Seq[Rule[SparkPlan]] = Seq(
-    ReuseAdaptiveSubquery(conf, subqueryCache),
+    ReuseAdaptiveSubquery(conf, context.subqueryCache),
+    // Here the 'OptimizeSkewedJoin' rule should be executed
+    // before 'ReduceNumShufflePartitions', as the skewed partition handled
+    // in 'OptimizeSkewedJoin' rule, should be omitted in 'ReduceNumShufflePartitions'.
+    OptimizeSkewedJoin(conf),
     ReduceNumShufflePartitions(conf),
-    ApplyColumnarRulesAndInsertTransitions(session.sessionState.conf,
-      session.sessionState.columnarRules),
+    // The rule of 'OptimizeLocalShuffleReader' need to make use of the 'partitionStartIndices'
+    // in 'ReduceNumShufflePartitions' rule. So it must be after 'ReduceNumShufflePartitions' rule.
+    OptimizeLocalShuffleReader(conf),
+    ApplyColumnarRulesAndInsertTransitions(conf, context.session.sessionState.columnarRules),
     CollapseCodegenStages(conf)
   )
 
@@ -117,25 +122,38 @@ case class AdaptiveSparkPlanExec(
 
   def executedPlan: SparkPlan = currentPhysicalPlan
 
-  override def conf: SQLConf = session.sessionState.conf
+  override def conf: SQLConf = context.session.sessionState.conf
 
   override def output: Seq[Attribute] = initialPlan.output
 
   override def doCanonicalize(): SparkPlan = initialPlan.canonicalized
 
-  override def doExecute(): RDD[InternalRow] = lock.synchronized {
-    if (isFinalPlan) {
-      currentPhysicalPlan.execute()
-    } else {
-      // Make sure we only update Spark UI if this plan's `QueryExecution` object matches the one
-      // retrieved by the `sparkContext`'s current execution ID. Note that sub-queries do not have
-      // their own execution IDs and therefore rely on the main query to update UI.
-      val executionId = Option(
-        session.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)).flatMap { idStr =>
-        val id = idStr.toLong
-        val qe = SQLExecution.getQueryExecution(id)
-        if (qe.eq(queryExecution)) Some(id) else None
-      }
+  override def resetMetrics(): Unit = {
+    metrics.valuesIterator.foreach(_.reset())
+    executedPlan.resetMetrics()
+  }
+
+  private def collectSQLMetrics(plan: SparkPlan): Seq[SQLMetric] = {
+    val metrics = new mutable.ArrayBuffer[SQLMetric]()
+    plan.foreach {
+      case p: ShuffleQueryStageExec if (p.resultOption.isEmpty) =>
+        collectSQLMetrics(p.plan).foreach(metrics += _)
+      case p: BroadcastQueryStageExec if (p.resultOption.isEmpty) =>
+        collectSQLMetrics(p.plan).foreach(metrics += _)
+      case p: SparkPlan =>
+        p.metrics.foreach { case metric =>
+          metrics += metric._2
+        }
+    }
+    metrics
+  }
+
+  private def getFinalPhysicalPlan(): SparkPlan = lock.synchronized {
+    if (!isFinalPlan) {
+      // Subqueries do not have their own execution IDs and therefore rely on the main query to
+      // update UI.
+      val executionId = Option(context.session.sparkContext.getLocalProperty(
+        SQLExecution.EXECUTION_ID_KEY)).map(_.toLong)
       var currentLogicalPlan = currentPhysicalPlan.logicalLink.get
       var result = createQueryStages(currentPhysicalPlan)
       val events = new LinkedBlockingQueue[StageMaterializationEvent]()
@@ -149,13 +167,17 @@ case class AdaptiveSparkPlanExec(
 
           // Start materialization of all new stages.
           result.newStages.foreach { stage =>
-            stage.materialize().onComplete { res =>
-              if (res.isSuccess) {
-                events.offer(StageSuccess(stage, res.get))
-              } else {
-                events.offer(StageFailure(stage, res.failed.get))
-              }
-            }(AdaptiveSparkPlanExec.executionContext)
+            try {
+              stage.materialize().onComplete { res =>
+                if (res.isSuccess) {
+                  events.offer(StageSuccess(stage, res.get))
+                } else {
+                  events.offer(StageFailure(stage, res.failed.get))
+                }
+              }(AdaptiveSparkPlanExec.executionContext)
+            } catch {
+              case e: Throwable => events.offer(StageFailure(stage, e))
+            }
           }
         }
 
@@ -170,7 +192,8 @@ case class AdaptiveSparkPlanExec(
             stage.resultOption = Some(res)
           case StageFailure(stage, ex) =>
             errors.append(
-              new SparkException(s"Failed to materialize query stage: ${stage.treeString}", ex))
+              new SparkException(s"Failed to materialize query stage: ${stage.treeString}." +
+                s" and the cause is ${ex.getMessage}", ex))
         }
 
         // In case of errors, we cancel all running stages and throw exception.
@@ -207,12 +230,26 @@ case class AdaptiveSparkPlanExec(
       // Run the final plan when there's no more unfinished stages.
       currentPhysicalPlan = applyPhysicalRules(result.newPlan, queryStageOptimizerRules)
       isFinalPlan = true
-
-      val ret = currentPhysicalPlan.execute()
-      logDebug(s"Final plan: $currentPhysicalPlan")
       executionId.foreach(onUpdatePlan)
-      ret
+      logDebug(s"Final plan: $currentPhysicalPlan")
     }
+    currentPhysicalPlan
+  }
+
+  override def executeCollect(): Array[InternalRow] = {
+    getFinalPhysicalPlan().executeCollect()
+  }
+
+  override def executeTake(n: Int): Array[InternalRow] = {
+    getFinalPhysicalPlan().executeTake(n)
+  }
+
+  override def executeTail(n: Int): Array[InternalRow] = {
+    getFinalPhysicalPlan().executeTail(n)
+  }
+
+  override def doExecute(): RDD[InternalRow] = {
+    getFinalPhysicalPlan().execute()
   }
 
   override def verboseString(maxFields: Int): String = simpleString(maxFields)
@@ -271,13 +308,14 @@ case class AdaptiveSparkPlanExec(
   private def createQueryStages(plan: SparkPlan): CreateStageResult = plan match {
     case e: Exchange =>
       // First have a quick check in the `stageCache` without having to traverse down the node.
-      stageCache.get(e.canonicalized) match {
+      context.stageCache.get(e.canonicalized) match {
         case Some(existingStage) if conf.exchangeReuseEnabled =>
-          val reusedStage = reuseQueryStage(existingStage, e)
-          // When reusing a stage, we treat it a new stage regardless of whether the existing stage
-          // has been materialized or not. Thus we won't skip re-optimization for a reused stage.
-          CreateStageResult(newPlan = reusedStage,
-            allChildStagesMaterialized = false, newStages = Seq(reusedStage))
+          val stage = reuseQueryStage(existingStage, e)
+          // This is a leaf stage and is not materialized yet even if the reused exchange may has
+          // been completed. It will trigger re-optimization later and stage materialization will
+          // finish in instant if the underlying exchange is already completed.
+          CreateStageResult(
+            newPlan = stage, allChildStagesMaterialized = false, newStages = Seq(stage))
 
         case _ =>
           val result = createQueryStages(e.child)
@@ -289,7 +327,7 @@ case class AdaptiveSparkPlanExec(
               // Check the `stageCache` again for reuse. If a match is found, ditch the new stage
               // and reuse the existing stage found in the `stageCache`, otherwise update the
               // `stageCache` with the new stage.
-              val queryStage = stageCache.getOrElseUpdate(e.canonicalized, newStage)
+              val queryStage = context.stageCache.getOrElseUpdate(e.canonicalized, newStage)
               if (queryStage.ne(newStage)) {
                 newStage = reuseQueryStage(queryStage, e)
               }
@@ -333,10 +371,10 @@ case class AdaptiveSparkPlanExec(
     queryStage
   }
 
-  private def reuseQueryStage(s: QueryStageExec, e: Exchange): QueryStageExec = {
-    val queryStage = ReusedQueryStageExec(currentStageId, s, e.output)
+  private def reuseQueryStage(existing: QueryStageExec, exchange: Exchange): QueryStageExec = {
+    val queryStage = existing.newReuseInstance(currentStageId, exchange.output)
     currentStageId += 1
-    setLogicalLinkForNewQueryStage(queryStage, e)
+    setLogicalLinkForNewQueryStage(queryStage, exchange)
     queryStage
   }
 
@@ -429,8 +467,8 @@ case class AdaptiveSparkPlanExec(
   private def reOptimize(logicalPlan: LogicalPlan): (SparkPlan, LogicalPlan) = {
     logicalPlan.invalidateStatsCache()
     val optimized = optimizer.execute(logicalPlan)
-    SparkSession.setActiveSession(session)
-    val sparkPlan = session.sessionState.planner.plan(ReturnAnswer(optimized)).next()
+    SparkSession.setActiveSession(context.session)
+    val sparkPlan = context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next()
     val newPlan = applyPhysicalRules(sparkPlan, preprocessingRules ++ queryStagePreparationRules)
     (newPlan, optimized)
   }
@@ -458,10 +496,27 @@ case class AdaptiveSparkPlanExec(
    * Notify the listeners of the physical plan change.
    */
   private def onUpdatePlan(executionId: Long): Unit = {
-    session.sparkContext.listenerBus.post(SparkListenerSQLAdaptiveExecutionUpdate(
-      executionId,
-      SQLExecution.getQueryExecution(executionId).toString,
-      SparkPlanInfo.fromSparkPlan(this)))
+    if (isSubquery) {
+      // When executing subqueries, we can't update the query plan in the UI as the
+      // UI doesn't support partial update yet. However, the subquery may have been
+      // optimized into a different plan and we must let the UI know the SQL metrics
+      // of the new plan nodes, so that it can track the valid accumulator updates later
+      // and display SQL metrics correctly.
+      onUpdateSQLMetrics(collectSQLMetrics(currentPhysicalPlan), executionId)
+    } else {
+      context.session.sparkContext.listenerBus.post(SparkListenerSQLAdaptiveExecutionUpdate(
+        executionId,
+        SQLExecution.getQueryExecution(executionId).toString,
+        SparkPlanInfo.fromSparkPlan(this)))
+    }
+  }
+
+  private def onUpdateSQLMetrics(sqlMetrics: Seq[SQLMetric], executionId: Long): Unit = {
+    val sqlPlanMetrics = sqlMetrics.map { case sqlMetric =>
+      SQLPlanMetric(sqlMetric.name.get, sqlMetric.id, sqlMetric.metricType)
+    }
+    context.session.sparkContext.listenerBus.post(SparkListenerSQLAdaptiveSQLMetricUpdates(
+      executionId.toLong, sqlPlanMetrics))
   }
 
   /**
@@ -485,7 +540,8 @@ case class AdaptiveSparkPlanExec(
       }
     } finally {
       val ex = new SparkException(
-        "Adaptive execution failed due to stage materialization failures.", errors.head)
+        "Adaptive execution failed due to stage materialization failures." +
+          s" and the cause is ${errors.head.getMessage}", errors.head)
       errors.tail.foreach(ex.addSuppressed)
       cancelErrors.foreach(ex.addSuppressed)
       throw ex
@@ -517,6 +573,24 @@ object AdaptiveSparkPlanExec {
   }
 }
 
+/**
+ * The execution context shared between the main query and all sub-queries.
+ */
+case class AdaptiveExecutionContext(session: SparkSession) {
+
+  /**
+   * The subquery-reuse map shared across the entire query.
+   */
+  val subqueryCache: TrieMap[SparkPlan, BaseSubqueryExec] =
+    new TrieMap[SparkPlan, BaseSubqueryExec]()
+
+  /**
+   * The exchange-reuse map shared across the entire query, including sub-queries.
+   */
+  val stageCache: TrieMap[SparkPlan, QueryStageExec] =
+    new TrieMap[SparkPlan, QueryStageExec]()
+}
+
 /**
  * The event type for stage materialization.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
new file mode 100644
index 0000000000000..61ae6cb14ccd3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import org.apache.spark.sql.execution.SparkPlan
+
+/**
+ * This class provides utility methods related to tree traversal of an [[AdaptiveSparkPlanExec]]
+ * plan. Unlike their counterparts in [[org.apache.spark.sql.catalyst.trees.TreeNode]] or
+ * [[org.apache.spark.sql.catalyst.plans.QueryPlan]], these methods traverse down leaf nodes of
+ * adaptive plans, i.e., [[AdaptiveSparkPlanExec]] and [[QueryStageExec]].
+ */
+trait AdaptiveSparkPlanHelper {
+
+  /**
+   * Find the first [[SparkPlan]] that satisfies the condition specified by `f`.
+   * The condition is recursively applied to this node and all of its children (pre-order).
+   */
+  def find(p: SparkPlan)(f: SparkPlan => Boolean): Option[SparkPlan] = if (f(p)) {
+    Some(p)
+  } else {
+    allChildren(p).foldLeft(Option.empty[SparkPlan]) { (l, r) => l.orElse(find(r)(f)) }
+  }
+
+  /**
+   * Runs the given function on this node and then recursively on children.
+   * @param f the function to be applied to each node in the tree.
+   */
+  def foreach(p: SparkPlan)(f: SparkPlan => Unit): Unit = {
+    f(p)
+    allChildren(p).foreach(foreach(_)(f))
+  }
+
+  /**
+   * Runs the given function recursively on children then on this node.
+   * @param f the function to be applied to each node in the tree.
+   */
+  def foreachUp(p: SparkPlan)(f: SparkPlan => Unit): Unit = {
+    allChildren(p).foreach(foreachUp(_)(f))
+    f(p)
+  }
+
+  /**
+   * Returns a Seq containing the result of applying the given function to each
+   * node in this tree in a preorder traversal.
+   * @param f the function to be applied.
+   */
+  def mapPlans[A](p: SparkPlan)(f: SparkPlan => A): Seq[A] = {
+    val ret = new collection.mutable.ArrayBuffer[A]()
+    foreach(p)(ret += f(_))
+    ret
+  }
+
+  /**
+   * Returns a Seq by applying a function to all nodes in this tree and using the elements of the
+   * resulting collections.
+   */
+  def flatMap[A](p: SparkPlan)(f: SparkPlan => TraversableOnce[A]): Seq[A] = {
+    val ret = new collection.mutable.ArrayBuffer[A]()
+    foreach(p)(ret ++= f(_))
+    ret
+  }
+
+  /**
+   * Returns a Seq containing the result of applying a partial function to all elements in this
+   * tree on which the function is defined.
+   */
+  def collect[B](p: SparkPlan)(pf: PartialFunction[SparkPlan, B]): Seq[B] = {
+    val ret = new collection.mutable.ArrayBuffer[B]()
+    val lifted = pf.lift
+    foreach(p)(node => lifted(node).foreach(ret.+=))
+    ret
+  }
+
+  /**
+   * Returns a Seq containing the leaves in this tree.
+   */
+  def collectLeaves(p: SparkPlan): Seq[SparkPlan] = {
+    collect(p) { case plan if allChildren(plan).isEmpty => plan }
+  }
+
+  /**
+   * Finds and returns the first [[SparkPlan]] of the tree for which the given partial function
+   * is defined (pre-order), and applies the partial function to it.
+   */
+  def collectFirst[B](p: SparkPlan)(pf: PartialFunction[SparkPlan, B]): Option[B] = {
+    val lifted = pf.lift
+    lifted(p).orElse {
+      allChildren(p).foldLeft(Option.empty[B]) { (l, r) => l.orElse(collectFirst(r)(pf)) }
+    }
+  }
+
+  /**
+   * Returns a sequence containing the result of applying a partial function to all elements in this
+   * plan, also considering all the plans in its (nested) subqueries
+   */
+  def collectInPlanAndSubqueries[B](p: SparkPlan)(f: PartialFunction[SparkPlan, B]): Seq[B] = {
+    (p +: subqueriesAll(p)).flatMap(collect(_)(f))
+  }
+
+  /**
+   * Returns a sequence containing the subqueries in this plan, also including the (nested)
+   * subquries in its children
+   */
+  def subqueriesAll(p: SparkPlan): Seq[SparkPlan] = {
+    val subqueries = flatMap(p)(_.subqueries)
+    subqueries ++ subqueries.flatMap(subqueriesAll)
+  }
+
+  private def allChildren(p: SparkPlan): Seq[SparkPlan] = p match {
+    case a: AdaptiveSparkPlanExec => Seq(a.executedPlan)
+    case s: QueryStageExec => Seq(s.plan)
+    case _ => p.children
+  }
+
+  /**
+   * Strip the executePlan of AdaptiveSparkPlanExec leaf node.
+   */
+  def stripAQEPlan(p: SparkPlan): SparkPlan = p match {
+    case a: AdaptiveSparkPlanExec => a.executedPlan
+    case other => other
+  }
+ }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffledRowRDD.scala
new file mode 100644
index 0000000000000..5aba57443d632
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffledRowRDD.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import org.apache.spark.{Dependency, MapOutputTrackerMaster, Partition, ShuffleDependency, SparkEnv, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLShuffleReadMetricsReporter}
+
+sealed trait ShufflePartitionSpec
+
+// A partition that reads data of one reducer.
+case class SinglePartitionSpec(reducerIndex: Int) extends ShufflePartitionSpec
+
+// A partition that reads data of multiple reducers, from `startReducerIndex` (inclusive) to
+// `endReducerIndex` (exclusive).
+case class CoalescedPartitionSpec(
+    startReducerIndex: Int, endReducerIndex: Int) extends ShufflePartitionSpec
+
+// A partition that reads partial data of one reducer, from `startMapIndex` (inclusive) to
+// `endMapIndex` (exclusive).
+case class PartialPartitionSpec(
+    reducerIndex: Int, startMapIndex: Int, endMapIndex: Int) extends ShufflePartitionSpec
+
+private final case class CustomShufflePartition(
+    index: Int, spec: ShufflePartitionSpec) extends Partition
+
+// TODO: merge this with `ShuffledRowRDD`, and replace `LocalShuffledRowRDD` with this RDD.
+class CustomShuffledRowRDD(
+    var dependency: ShuffleDependency[Int, InternalRow, InternalRow],
+    metrics: Map[String, SQLMetric],
+    partitionSpecs: Array[ShufflePartitionSpec])
+  extends RDD[InternalRow](dependency.rdd.context, Nil) {
+
+  override def getDependencies: Seq[Dependency[_]] = List(dependency)
+
+  override def clearDependencies() {
+    super.clearDependencies()
+    dependency = null
+  }
+
+  override def getPartitions: Array[Partition] = {
+    Array.tabulate[Partition](partitionSpecs.length) { i =>
+      CustomShufflePartition(i, partitionSpecs(i))
+    }
+  }
+
+  override def getPreferredLocations(partition: Partition): Seq[String] = {
+    val tracker = SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
+    partition.asInstanceOf[CustomShufflePartition].spec match {
+      case SinglePartitionSpec(reducerIndex) =>
+        tracker.getPreferredLocationsForShuffle(dependency, reducerIndex)
+
+      case CoalescedPartitionSpec(startReducerIndex, endReducerIndex) =>
+        startReducerIndex.until(endReducerIndex).flatMap { reducerIndex =>
+          tracker.getPreferredLocationsForShuffle(dependency, reducerIndex)
+        }
+
+      case PartialPartitionSpec(_, startMapIndex, endMapIndex) =>
+        tracker.getMapLocation(dependency, startMapIndex, endMapIndex)
+    }
+  }
+
+  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
+    val tempMetrics = context.taskMetrics().createTempShuffleReadMetrics()
+    // `SQLShuffleReadMetricsReporter` will update its own metrics for SQL exchange operator,
+    // as well as the `tempMetrics` for basic shuffle metrics.
+    val sqlMetricsReporter = new SQLShuffleReadMetricsReporter(tempMetrics, metrics)
+    val reader = split.asInstanceOf[CustomShufflePartition].spec match {
+      case SinglePartitionSpec(reducerIndex) =>
+        SparkEnv.get.shuffleManager.getReader(
+          dependency.shuffleHandle,
+          reducerIndex,
+          reducerIndex + 1,
+          context,
+          sqlMetricsReporter)
+
+      case CoalescedPartitionSpec(startReducerIndex, endReducerIndex) =>
+        SparkEnv.get.shuffleManager.getReader(
+          dependency.shuffleHandle,
+          startReducerIndex,
+          endReducerIndex,
+          context,
+          sqlMetricsReporter)
+
+      case PartialPartitionSpec(reducerIndex, startMapIndex, endMapIndex) =>
+        SparkEnv.get.shuffleManager.getReaderForRange(
+          dependency.shuffleHandle,
+          startMapIndex,
+          endMapIndex,
+          reducerIndex,
+          reducerIndex + 1,
+          context,
+          sqlMetricsReporter)
+    }
+    reader.read().asInstanceOf[Iterator[Product2[Int, InternalRow]]].map(_._2)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
new file mode 100644
index 0000000000000..e5642991c59a3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import org.apache.spark.MapOutputStatistics
+import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, Join, LogicalPlan, NO_BROADCAST_HASH}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * This optimization rule detects a join child that has a high ratio of empty partitions and
+ * adds a no-broadcast-hash-join hint to avoid it being broadcast.
+ */
+case class DemoteBroadcastHashJoin(conf: SQLConf) extends Rule[LogicalPlan] {
+
+  private def shouldDemote(plan: LogicalPlan): Boolean = plan match {
+    case LogicalQueryStage(_, stage: ShuffleQueryStageExec) if stage.resultOption.isDefined
+      && stage.resultOption.get != null =>
+      val mapOutputStatistics = stage.resultOption.get.asInstanceOf[MapOutputStatistics]
+      val partitionCnt = mapOutputStatistics.bytesByPartitionId.length
+      val nonZeroCnt = mapOutputStatistics.bytesByPartitionId.count(_ > 0)
+      partitionCnt > 0 && nonZeroCnt > 0 &&
+        (nonZeroCnt * 1.0 / partitionCnt) < conf.nonEmptyPartitionRatioForBroadcastJoin
+    case _ => false
+  }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan.transformDown {
+    case j @ Join(left, right, _, _, hint) =>
+      var newHint = hint
+      if (!hint.leftHint.exists(_.strategy.isDefined) && shouldDemote(left)) {
+        newHint = newHint.copy(leftHint =
+          Some(hint.leftHint.getOrElse(HintInfo()).copy(strategy = Some(NO_BROADCAST_HASH))))
+      }
+      if (!hint.rightHint.exists(_.strategy.isDefined) && shouldDemote(right)) {
+        newHint = newHint.copy(rightHint =
+          Some(hint.rightHint.getOrElse(HintInfo()).copy(strategy = Some(NO_BROADCAST_HASH))))
+      }
+      if (newHint.ne(hint)) {
+        j.copy(hint = newHint)
+      } else {
+        j
+      }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
index 23eadfd6f3e5e..621c063e5a7d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
@@ -17,16 +17,16 @@
 
 package org.apache.spark.sql.execution.adaptive
 
-import scala.collection.concurrent.TrieMap
 import scala.collection.mutable
 
-import org.apache.spark.sql.{execution, SparkSession}
 import org.apache.spark.sql.catalyst.expressions
-import org.apache.spark.sql.catalyst.expressions.DynamicPruningSubquery
+import org.apache.spark.sql.catalyst.expressions.{DynamicPruningSubquery, ListQuery, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.physical.UnspecifiedDistribution
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.ExecutedCommandExec
+import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -36,45 +36,64 @@ import org.apache.spark.sql.internal.SQLConf
  * Note that this rule is stateful and thus should not be reused across query executions.
  */
 case class InsertAdaptiveSparkPlan(
-    session: SparkSession,
-    queryExecution: QueryExecution) extends Rule[SparkPlan] {
+    adaptiveExecutionContext: AdaptiveExecutionContext) extends Rule[SparkPlan] {
 
-  private val conf = session.sessionState.conf
+  private val conf = adaptiveExecutionContext.session.sessionState.conf
 
-  // Subquery-reuse is shared across the entire query.
-  private val subqueryCache = new TrieMap[SparkPlan, BaseSubqueryExec]()
+  override def apply(plan: SparkPlan): SparkPlan = applyInternal(plan, false)
 
-  // Exchange-reuse is shared across the entire query, including sub-queries.
-  private val stageCache = new TrieMap[SparkPlan, QueryStageExec]()
-
-  override def apply(plan: SparkPlan): SparkPlan = applyInternal(plan, queryExecution)
-
-  private def applyInternal(plan: SparkPlan, qe: QueryExecution): SparkPlan = plan match {
+  private def applyInternal(plan: SparkPlan, isSubquery: Boolean): SparkPlan = plan match {
+    case _ if !conf.adaptiveExecutionEnabled => plan
     case _: ExecutedCommandExec => plan
-    case _ if conf.adaptiveExecutionEnabled && supportAdaptive(plan) =>
-      try {
-        // Plan sub-queries recursively and pass in the shared stage cache for exchange reuse. Fall
-        // back to non-adaptive mode if adaptive execution is supported in any of the sub-queries.
-        val subqueryMap = buildSubqueryMap(plan)
-        val planSubqueriesRule = PlanAdaptiveSubqueries(subqueryMap)
-        val preprocessingRules = Seq(
-          planSubqueriesRule)
-        // Run pre-processing rules.
-        val newPlan = AdaptiveSparkPlanExec.applyPhysicalRules(plan, preprocessingRules)
-        logDebug(s"Adaptive execution enabled for plan: $plan")
-        AdaptiveSparkPlanExec(newPlan, session, preprocessingRules, subqueryCache, stageCache, qe)
-      } catch {
-        case SubqueryAdaptiveNotSupportedException(subquery) =>
-          logWarning(s"${SQLConf.ADAPTIVE_EXECUTION_ENABLED.key} is enabled " +
-            s"but is not supported for sub-query: $subquery.")
-          plan
-      }
-    case _ =>
-      if (conf.adaptiveExecutionEnabled) {
+    case _ if shouldApplyAQE(plan, isSubquery) =>
+      if (supportAdaptive(plan)) {
+        try {
+          // Plan sub-queries recursively and pass in the shared stage cache for exchange reuse.
+          // Fall back to non-AQE mode if AQE is not supported in any of the sub-queries.
+          val subqueryMap = buildSubqueryMap(plan)
+          val planSubqueriesRule = PlanAdaptiveSubqueries(subqueryMap)
+          val preprocessingRules = Seq(
+            planSubqueriesRule)
+          // Run pre-processing rules.
+          val newPlan = AdaptiveSparkPlanExec.applyPhysicalRules(plan, preprocessingRules)
+          logDebug(s"Adaptive execution enabled for plan: $plan")
+          AdaptiveSparkPlanExec(newPlan, adaptiveExecutionContext, preprocessingRules, isSubquery)
+        } catch {
+          case SubqueryAdaptiveNotSupportedException(subquery) =>
+            logWarning(s"${SQLConf.ADAPTIVE_EXECUTION_ENABLED.key} is enabled " +
+              s"but is not supported for sub-query: $subquery.")
+            plan
+        }
+      } else {
         logWarning(s"${SQLConf.ADAPTIVE_EXECUTION_ENABLED.key} is enabled " +
           s"but is not supported for query: $plan.")
+        plan
       }
-      plan
+
+    case _ => plan
+  }
+
+  // AQE is only useful when the query has exchanges or sub-queries. This method returns true if
+  // one of the following conditions is satisfied:
+  //   - The config ADAPTIVE_EXECUTION_FORCE_APPLY is true.
+  //   - The input query is from a sub-query. When this happens, it means we've already decided to
+  //     apply AQE for the main query and we must continue to do it.
+  //   - The query contains exchanges.
+  //   - The query may need to add exchanges. It's an overkill to run `EnsureRequirements` here, so
+  //     we just check `SparkPlan.requiredChildDistribution` and see if it's possible that the
+  //     the query needs to add exchanges later.
+  //   - The query contains sub-query.
+  private def shouldApplyAQE(plan: SparkPlan, isSubquery: Boolean): Boolean = {
+    conf.getConf(SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY) || isSubquery || {
+      plan.find {
+        case _: Exchange => true
+        case p if !p.requiredChildDistribution.forall(_ == UnspecifiedDistribution) => true
+        case p => p.expressions.exists(_.find {
+          case _: SubqueryExpression => true
+          case _ => false
+        }.isDefined)
+      }.isDefined
+    }
   }
 
   private def supportAdaptive(plan: SparkPlan): Boolean = {
@@ -93,27 +112,33 @@ case class InsertAdaptiveSparkPlan(
    * For each sub-query, generate the adaptive execution plan for each sub-query by applying this
    * rule, or reuse the execution plan from another sub-query of the same semantics if possible.
    */
-  private def buildSubqueryMap(plan: SparkPlan): mutable.HashMap[Long, ExecSubqueryExpression] = {
-    val subqueryMap = mutable.HashMap.empty[Long, ExecSubqueryExpression]
+  private def buildSubqueryMap(plan: SparkPlan): Map[Long, SubqueryExec] = {
+    val subqueryMap = mutable.HashMap.empty[Long, SubqueryExec]
     plan.foreach(_.expressions.foreach(_.foreach {
       case expressions.ScalarSubquery(p, _, exprId)
           if !subqueryMap.contains(exprId.id) =>
         val executedPlan = compileSubquery(p)
         verifyAdaptivePlan(executedPlan, p)
-        val scalarSubquery = execution.ScalarSubquery(
-          SubqueryExec(s"subquery${exprId.id}", executedPlan), exprId)
-        subqueryMap.put(exprId.id, scalarSubquery)
+        val subquery = SubqueryExec(s"subquery${exprId.id}", executedPlan)
+        subqueryMap.put(exprId.id, subquery)
+      case expressions.InSubquery(_, ListQuery(query, _, exprId, _))
+          if !subqueryMap.contains(exprId.id) =>
+        val executedPlan = compileSubquery(query)
+        verifyAdaptivePlan(executedPlan, query)
+        val subquery = SubqueryExec(s"subquery#${exprId.id}", executedPlan)
+        subqueryMap.put(exprId.id, subquery)
       case _ =>
     }))
 
-    subqueryMap
+    subqueryMap.toMap
   }
 
   def compileSubquery(plan: LogicalPlan): SparkPlan = {
-    val queryExec = new QueryExecution(session, plan)
     // Apply the same instance of this rule to sub-queries so that sub-queries all share the
     // same `stageCache` for Exchange reuse.
-    this.applyInternal(queryExec.sparkPlan, queryExec)
+    this.applyInternal(
+      QueryExecution.createSparkPlan(adaptiveExecutionContext.session,
+        adaptiveExecutionContext.session.sessionState.planner, plan.clone()), true)
   }
 
   private def verifyAdaptivePlan(plan: SparkPlan, logicalPlan: LogicalPlan): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LocalShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LocalShuffledRowRDD.scala
new file mode 100644
index 0000000000000..19b78f5e36c9b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LocalShuffledRowRDD.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLShuffleReadMetricsReporter}
+
+/**
+ * The [[Partition]] used by [[LocalShuffledRowRDD]].
+ * @param mapIndex the index of mapper.
+ * @param startPartition the start partition ID in mapIndex mapper.
+ * @param endPartition the end partition ID in mapIndex mapper.
+ */
+private final class LocalShuffledRowRDDPartition(
+    override val index: Int,
+    val mapIndex: Int,
+    val startPartition: Int,
+    val endPartition: Int) extends Partition {
+}
+
+/**
+ * This is a specialized version of [[org.apache.spark.sql.execution.ShuffledRowRDD]]. This is used
+ * in Spark SQL adaptive execution when a shuffle join is converted to broadcast join at runtime
+ * because the map output of one input table is small enough for broadcast. This RDD represents the
+ * data of another input table of the join that reads from shuffle. Each partition of the RDD reads
+ * the whole data from just one mapper output locally. So actually there is no data transferred
+ * from the network.
+ *
+ * This RDD takes a [[ShuffleDependency]] (`dependency`).
+ *
+ * The `dependency` has the parent RDD of this RDD, which represents the dataset before shuffle
+ * (i.e. map output). Elements of this RDD are (partitionId, Row) pairs.
+ * Partition ids should be in the range [0, numPartitions - 1].
+ * `dependency.partitioner.numPartitions` is the number of pre-shuffle partitions. (i.e. the number
+ * of partitions of the map output). The post-shuffle partition number is the same to the parent
+ * RDD's partition number.
+ *
+ * `partitionStartIndicesPerMapper` specifies how to split the shuffle blocks of each mapper into
+ * one or more partitions. For a mapper `i`, the `j`th partition includes shuffle blocks from
+ * `partitionStartIndicesPerMapper[i][j]` to `partitionStartIndicesPerMapper[i][j+1]` (exclusive).
+ */
+class LocalShuffledRowRDD(
+     var dependency: ShuffleDependency[Int, InternalRow, InternalRow],
+     metrics: Map[String, SQLMetric],
+     partitionStartIndicesPerMapper: Array[Array[Int]])
+  extends RDD[InternalRow](dependency.rdd.context, Nil) {
+
+  private[this] val numReducers = dependency.partitioner.numPartitions
+  private[this] val numMappers = dependency.rdd.partitions.length
+
+  override def getDependencies: Seq[Dependency[_]] = List(dependency)
+
+  override def getPartitions: Array[Partition] = {
+    val partitions = ArrayBuffer[LocalShuffledRowRDDPartition]()
+    for (mapIndex <- 0 until numMappers) {
+      (partitionStartIndicesPerMapper(mapIndex) :+ numReducers).sliding(2, 1).foreach {
+        case Array(start, end) =>
+          partitions += new LocalShuffledRowRDDPartition(partitions.length, mapIndex, start, end)
+      }
+    }
+    partitions.toArray
+  }
+
+  override def getPreferredLocations(partition: Partition): Seq[String] = {
+    val tracker = SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
+    tracker.getMapLocation(dependency, partition.index, partition.index + 1)
+  }
+
+  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
+    val localRowPartition = split.asInstanceOf[LocalShuffledRowRDDPartition]
+    val mapIndex = localRowPartition.mapIndex
+    val tempMetrics = context.taskMetrics().createTempShuffleReadMetrics()
+    // `SQLShuffleReadMetricsReporter` will update its own metrics for SQL exchange operator,
+    // as well as the `tempMetrics` for basic shuffle metrics.
+    val sqlMetricsReporter = new SQLShuffleReadMetricsReporter(tempMetrics, metrics)
+
+    val reader = SparkEnv.get.shuffleManager.getReaderForRange(
+      dependency.shuffleHandle,
+      mapIndex,
+      mapIndex + 1,
+      localRowPartition.startPartition,
+      localRowPartition.endPartition,
+      context,
+      sqlMetricsReporter)
+    reader.read().asInstanceOf[Iterator[Product2[Int, InternalRow]]].map(_._2)
+  }
+
+  override def clearDependencies() {
+    super.clearDependencies()
+    dependency = null
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
index a0d07a68ab0f4..d60c3ca72f6f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
@@ -36,9 +36,7 @@ import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNes
 object LogicalQueryStageStrategy extends Strategy with PredicateHelper {
 
   private def isBroadcastStage(plan: LogicalPlan): Boolean = plan match {
-    case LogicalQueryStage(_, physicalPlan)
-        if BroadcastQueryStageExec.isBroadcastQueryStageExec(physicalPlan) =>
-      true
+    case LogicalQueryStage(_, _: BroadcastQueryStageExec) => true
     case _ => false
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
new file mode 100644
index 0000000000000..e95441e28aafe
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnknownPartitioning}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReusedExchangeExec, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BuildLeft, BuildRight, BuildSide}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * A rule to optimize the shuffle reader to local reader iff no additional shuffles
+ * will be introduced:
+ * 1. if the input plan is a shuffle, add local reader directly as we can never introduce
+ * extra shuffles in this case.
+ * 2. otherwise, add local reader to the probe side of broadcast hash join and
+ * then run `EnsureRequirements` to check whether additional shuffle introduced.
+ * If introduced, we will revert all the local readers.
+ */
+case class OptimizeLocalShuffleReader(conf: SQLConf) extends Rule[SparkPlan] {
+  import OptimizeLocalShuffleReader._
+
+  private val ensureRequirements = EnsureRequirements(conf)
+
+  // The build side is a broadcast query stage which should have been optimized using local reader
+  // already. So we only need to deal with probe side here.
+  private def createProbeSideLocalReader(plan: SparkPlan): SparkPlan = {
+    val optimizedPlan = plan.transformDown {
+      case join @ BroadcastJoinWithShuffleLeft(shuffleStage, BuildRight) =>
+        val localReader = createLocalReader(shuffleStage)
+        join.asInstanceOf[BroadcastHashJoinExec].copy(left = localReader)
+      case join @ BroadcastJoinWithShuffleRight(shuffleStage, BuildLeft) =>
+        val localReader = createLocalReader(shuffleStage)
+        join.asInstanceOf[BroadcastHashJoinExec].copy(right = localReader)
+    }
+
+    val numShuffles = ensureRequirements.apply(optimizedPlan).collect {
+      case e: ShuffleExchangeExec => e
+    }.length
+
+    // Check whether additional shuffle introduced. If introduced, revert the local reader.
+    if (numShuffles > 0) {
+      logDebug("OptimizeLocalShuffleReader rule is not applied due" +
+        " to additional shuffles will be introduced.")
+      plan
+    } else {
+      optimizedPlan
+    }
+  }
+
+  private def createLocalReader(plan: SparkPlan): LocalShuffleReaderExec = {
+    plan match {
+      case c @ CoalescedShuffleReaderExec(s: ShuffleQueryStageExec, _) =>
+        LocalShuffleReaderExec(
+          s, getPartitionStartIndices(s, Some(c.partitionStartIndices.length)))
+      case s: ShuffleQueryStageExec =>
+        LocalShuffleReaderExec(s, getPartitionStartIndices(s, None))
+    }
+  }
+
+  // TODO: this method assumes all shuffle blocks are the same data size. We should calculate the
+  //       partition start indices based on block size to avoid data skew.
+  private def getPartitionStartIndices(
+      shuffleStage: ShuffleQueryStageExec,
+      advisoryParallelism: Option[Int]): Array[Array[Int]] = {
+    val shuffleDep = shuffleStage.shuffle.shuffleDependency
+    val numReducers = shuffleDep.partitioner.numPartitions
+    val expectedParallelism = advisoryParallelism.getOrElse(numReducers)
+    val numMappers = shuffleDep.rdd.getNumPartitions
+    Array.fill(numMappers) {
+      equallyDivide(numReducers, math.max(1, expectedParallelism / numMappers)).toArray
+    }
+  }
+
+  /**
+   * To equally divide n elements into m buckets, basically each bucket should have n/m elements,
+   * for the remaining n%m elements, add one more element to the first n%m buckets each. Returns
+   * a sequence with length numBuckets and each value represents the start index of each bucket.
+   */
+  private def equallyDivide(numElements: Int, numBuckets: Int): Seq[Int] = {
+    val elementsPerBucket = numElements / numBuckets
+    val remaining = numElements % numBuckets
+    val splitPoint = (elementsPerBucket + 1) * remaining
+    (0 until remaining).map(_ * (elementsPerBucket + 1)) ++
+      (remaining until numBuckets).map(i => splitPoint + (i - remaining) * elementsPerBucket)
+  }
+
+  override def apply(plan: SparkPlan): SparkPlan = {
+    if (!conf.getConf(SQLConf.LOCAL_SHUFFLE_READER_ENABLED)) {
+      return plan
+    }
+
+    plan match {
+      case s: SparkPlan if canUseLocalShuffleReader(s) =>
+        createLocalReader(s)
+      case s: SparkPlan =>
+        createProbeSideLocalReader(s)
+    }
+  }
+}
+
+object OptimizeLocalShuffleReader {
+
+  object BroadcastJoinWithShuffleLeft {
+    def unapply(plan: SparkPlan): Option[(SparkPlan, BuildSide)] = plan match {
+      case join: BroadcastHashJoinExec if canUseLocalShuffleReader(join.left) =>
+        Some((join.left, join.buildSide))
+      case _ => None
+    }
+  }
+
+  object BroadcastJoinWithShuffleRight {
+    def unapply(plan: SparkPlan): Option[(SparkPlan, BuildSide)] = plan match {
+      case join: BroadcastHashJoinExec if canUseLocalShuffleReader(join.right) =>
+        Some((join.right, join.buildSide))
+      case _ => None
+    }
+  }
+
+  def canUseLocalShuffleReader(plan: SparkPlan): Boolean = plan match {
+    case s: ShuffleQueryStageExec => s.shuffle.canChangeNumPartitions
+    case CoalescedShuffleReaderExec(s: ShuffleQueryStageExec, _) => s.shuffle.canChangeNumPartitions
+    case _ => false
+  }
+}
+
+/**
+ * A wrapper of shuffle query stage, which submits one or more reduce tasks per mapper to read the
+ * shuffle files written by one mapper. By doing this, it's very likely to read the shuffle files
+ * locally, as the shuffle files that a reduce task needs to read are in one node.
+ *
+ * @param child It's usually `ShuffleQueryStageExec`, but can be the shuffle exchange node during
+ *              canonicalization.
+ * @param partitionStartIndicesPerMapper A mapper usually writes many shuffle blocks, and it's
+ *                                       better to launch multiple tasks to read shuffle blocks of
+ *                                       one mapper. This array contains the partition start
+ *                                       indices for each mapper.
+ */
+case class LocalShuffleReaderExec(
+    child: SparkPlan,
+    partitionStartIndicesPerMapper: Array[Array[Int]]) extends UnaryExecNode {
+
+  override def output: Seq[Attribute] = child.output
+
+  override lazy val outputPartitioning: Partitioning = {
+    // when we read one mapper per task, then the output partitioning is the same as the plan
+    // before shuffle.
+    if (partitionStartIndicesPerMapper.forall(_.length == 1)) {
+      child match {
+        case ShuffleQueryStageExec(_, s: ShuffleExchangeExec) =>
+          s.child.outputPartitioning
+        case ShuffleQueryStageExec(_, r @ ReusedExchangeExec(_, s: ShuffleExchangeExec)) =>
+          s.child.outputPartitioning match {
+            case e: Expression => r.updateAttr(e).asInstanceOf[Partitioning]
+            case other => other
+          }
+        case _ =>
+          throw new IllegalStateException("operating on canonicalization plan")
+      }
+    } else {
+      UnknownPartitioning(partitionStartIndicesPerMapper.map(_.length).sum)
+    }
+  }
+
+  private var cachedShuffleRDD: RDD[InternalRow] = null
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    if (cachedShuffleRDD == null) {
+      cachedShuffleRDD = child match {
+        case stage: ShuffleQueryStageExec =>
+          stage.shuffle.createLocalShuffleRDD(partitionStartIndicesPerMapper)
+        case _ =>
+          throw new IllegalStateException("operating on canonicalization plan")
+      }
+    }
+    cachedShuffleRDD
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
new file mode 100644
index 0000000000000..a716497c274b8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
@@ -0,0 +1,390 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.commons.io.FileUtils
+
+import org.apache.spark.{MapOutputStatistics, MapOutputTrackerMaster, SparkEnv}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnknownPartitioning}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.joins.SortMergeJoinExec
+import org.apache.spark.sql.internal.SQLConf
+
+case class OptimizeSkewedJoin(conf: SQLConf) extends Rule[SparkPlan] {
+
+  private val ensureRequirements = EnsureRequirements(conf)
+
+  private val supportedJoinTypes =
+    Inner :: Cross :: LeftSemi :: LeftAnti :: LeftOuter :: RightOuter :: Nil
+
+  /**
+   * A partition is considered as a skewed partition if its size is larger than the median
+   * partition size * spark.sql.adaptive.skewedPartitionFactor and also larger than
+   * spark.sql.adaptive.skewedPartitionSizeThreshold.
+   */
+  private def isSkewed(size: Long, medianSize: Long): Boolean = {
+    size > medianSize * conf.getConf(SQLConf.ADAPTIVE_EXECUTION_SKEWED_PARTITION_FACTOR) &&
+      size > conf.getConf(SQLConf.ADAPTIVE_EXECUTION_SKEWED_PARTITION_SIZE_THRESHOLD)
+  }
+
+  private def medianSize(stats: MapOutputStatistics): Long = {
+    val numPartitions = stats.bytesByPartitionId.length
+    val bytes = stats.bytesByPartitionId.sorted
+    numPartitions match {
+      case _ if (numPartitions % 2 == 0) =>
+        math.max((bytes(numPartitions / 2) + bytes(numPartitions / 2 - 1)) / 2, 1)
+      case _ => math.max(bytes(numPartitions / 2), 1)
+    }
+  }
+
+  /**
+   * Get the map size of the specific reduce shuffle Id.
+   */
+  private def getMapSizesForReduceId(shuffleId: Int, partitionId: Int): Array[Long] = {
+    val mapOutputTracker = SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
+    mapOutputTracker.shuffleStatuses(shuffleId).mapStatuses.map{_.getSizeForBlock(partitionId)}
+  }
+
+  /**
+   * Split the skewed partition based on the map size and the max split number.
+   */
+  private def getMapStartIndices(stage: ShuffleQueryStageExec, partitionId: Int): Array[Int] = {
+    val shuffleId = stage.shuffle.shuffleDependency.shuffleHandle.shuffleId
+    val mapPartitionSizes = getMapSizesForReduceId(shuffleId, partitionId)
+    val maxSplits = math.min(conf.getConf(
+      SQLConf.ADAPTIVE_EXECUTION_SKEWED_PARTITION_MAX_SPLITS), mapPartitionSizes.length)
+    val avgPartitionSize = mapPartitionSizes.sum / maxSplits
+    val advisoryPartitionSize = math.max(avgPartitionSize,
+      conf.getConf(SQLConf.ADAPTIVE_EXECUTION_SKEWED_PARTITION_SIZE_THRESHOLD))
+    val partitionStartIndices = ArrayBuffer[Int]()
+    partitionStartIndices += 0
+    var i = 0
+    var postMapPartitionSize = 0L
+    while (i < mapPartitionSizes.length) {
+      val nextMapPartitionSize = mapPartitionSizes(i)
+      if (i > 0 && postMapPartitionSize + nextMapPartitionSize > advisoryPartitionSize) {
+        partitionStartIndices += i
+        postMapPartitionSize = nextMapPartitionSize
+      } else {
+        postMapPartitionSize += nextMapPartitionSize
+      }
+      i += 1
+    }
+
+    if (partitionStartIndices.size > maxSplits) {
+      partitionStartIndices.take(maxSplits).toArray
+    } else partitionStartIndices.toArray
+  }
+
+  private def getStatistics(stage: ShuffleQueryStageExec): MapOutputStatistics = {
+    assert(stage.resultOption.isDefined, "ShuffleQueryStageExec should" +
+      " already be ready when executing OptimizeSkewedPartitions rule")
+    stage.resultOption.get.asInstanceOf[MapOutputStatistics]
+  }
+
+  private def canSplitLeftSide(joinType: JoinType) = {
+    joinType == Inner || joinType == Cross || joinType == LeftSemi ||
+      joinType == LeftAnti || joinType == LeftOuter
+  }
+
+  private def canSplitRightSide(joinType: JoinType) = {
+    joinType == Inner || joinType == Cross || joinType == RightOuter
+  }
+
+  private def getNumMappers(stage: ShuffleQueryStageExec): Int = {
+    stage.shuffle.shuffleDependency.rdd.partitions.length
+  }
+
+  private def getSizeInfo(medianSize: Long, maxSize: Long): String = {
+    s"median size: $medianSize, max size: ${maxSize}"
+  }
+
+  /*
+   * This method aim to optimize the skewed join with the following steps:
+   * 1. Check whether the shuffle partition is skewed based on the median size
+   *    and the skewed partition threshold in origin smj.
+   * 2. Assuming partition0 is skewed in left side, and it has 5 mappers (Map0, Map1...Map4).
+   *    And we may split the 5 Mappers into 3 mapper ranges [(Map0, Map1), (Map2, Map3), (Map4)]
+   *    based on the map size and the max split number.
+   * 3. Wrap the join left child with a special shuffle reader that reads each mapper range with one
+   *    task, so total 3 tasks.
+   * 4. Wrap the join right child with a special shuffle reader that reads partition0 3 times by
+   *    3 tasks separately.
+   */
+  def optimizeSkewJoin(plan: SparkPlan): SparkPlan = plan.transformUp {
+    case smj @ SortMergeJoinExec(_, _, joinType, _,
+        s1 @ SortExec(_, _, left: ShuffleQueryStageExec, _),
+        s2 @ SortExec(_, _, right: ShuffleQueryStageExec, _), _)
+        if supportedJoinTypes.contains(joinType) =>
+      val leftStats = getStatistics(left)
+      val rightStats = getStatistics(right)
+      val numPartitions = leftStats.bytesByPartitionId.length
+
+      val leftMedSize = medianSize(leftStats)
+      val rightMedSize = medianSize(rightStats)
+      logDebug(
+        s"""
+          |Try to optimize skewed join.
+          |Left side partition size:
+          |${getSizeInfo(leftMedSize, leftStats.bytesByPartitionId.max)}
+          |Right side partition size:
+          |${getSizeInfo(rightMedSize, rightStats.bytesByPartitionId.max)}
+        """.stripMargin)
+      val canSplitLeft = canSplitLeftSide(joinType)
+      val canSplitRight = canSplitRightSide(joinType)
+
+      val leftSidePartitions = mutable.ArrayBuffer.empty[ShufflePartitionSpec]
+      val rightSidePartitions = mutable.ArrayBuffer.empty[ShufflePartitionSpec]
+      // This is used to delay the creation of non-skew partitions so that we can potentially
+      // coalesce them like `ReduceNumShufflePartitions` does.
+      val nonSkewPartitionIndices = mutable.ArrayBuffer.empty[Int]
+      val leftSkewDesc = new SkewDesc
+      val rightSkewDesc = new SkewDesc
+      for (partitionIndex <- 0 until numPartitions) {
+        val leftSize = leftStats.bytesByPartitionId(partitionIndex)
+        val isLeftSkew = isSkewed(leftSize, leftMedSize) && canSplitLeft
+        val rightSize = rightStats.bytesByPartitionId(partitionIndex)
+        val isRightSkew = isSkewed(rightSize, rightMedSize) && canSplitRight
+        if (isLeftSkew || isRightSkew) {
+          if (nonSkewPartitionIndices.nonEmpty) {
+            // As soon as we see a skew, we'll "flush" out unhandled non-skew partitions.
+            createNonSkewPartitions(leftStats, rightStats, nonSkewPartitionIndices).foreach { p =>
+              leftSidePartitions += p
+              rightSidePartitions += p
+            }
+            nonSkewPartitionIndices.clear()
+          }
+
+          val leftParts = if (isLeftSkew) {
+            leftSkewDesc.addPartitionSize(leftSize)
+            createSkewPartitions(
+              partitionIndex,
+              getMapStartIndices(left, partitionIndex),
+              getNumMappers(left))
+          } else {
+            Seq(SinglePartitionSpec(partitionIndex))
+          }
+
+          val rightParts = if (isRightSkew) {
+            rightSkewDesc.addPartitionSize(rightSize)
+            createSkewPartitions(
+              partitionIndex,
+              getMapStartIndices(right, partitionIndex),
+              getNumMappers(right))
+          } else {
+            Seq(SinglePartitionSpec(partitionIndex))
+          }
+
+          for {
+            leftSidePartition <- leftParts
+            rightSidePartition <- rightParts
+          } {
+            leftSidePartitions += leftSidePartition
+            rightSidePartitions += rightSidePartition
+          }
+        } else {
+          // Add to `nonSkewPartitionIndices` first, and add real partitions later, in case we can
+          // coalesce the non-skew partitions.
+          nonSkewPartitionIndices += partitionIndex
+          // If this is the last partition, add real partition immediately.
+          if (partitionIndex == numPartitions - 1) {
+            createNonSkewPartitions(leftStats, rightStats, nonSkewPartitionIndices).foreach { p =>
+              leftSidePartitions += p
+              rightSidePartitions += p
+            }
+            nonSkewPartitionIndices.clear()
+          }
+        }
+      }
+
+      logDebug("number of skewed partitions: " +
+        s"left ${leftSkewDesc.numPartitions}, right ${rightSkewDesc.numPartitions}")
+      if (leftSkewDesc.numPartitions > 0 || rightSkewDesc.numPartitions > 0) {
+        val newLeft = SkewJoinShuffleReaderExec(
+          left, leftSidePartitions.toArray, leftSkewDesc.toString)
+        val newRight = SkewJoinShuffleReaderExec(
+          right, rightSidePartitions.toArray, rightSkewDesc.toString)
+        smj.copy(
+          left = s1.copy(child = newLeft), right = s2.copy(child = newRight), isSkewJoin = true)
+      } else {
+        smj
+      }
+  }
+
+  private def createNonSkewPartitions(
+      leftStats: MapOutputStatistics,
+      rightStats: MapOutputStatistics,
+      nonSkewPartitionIndices: Seq[Int]): Seq[ShufflePartitionSpec] = {
+    assert(nonSkewPartitionIndices.nonEmpty)
+    if (nonSkewPartitionIndices.length == 1) {
+      Seq(SinglePartitionSpec(nonSkewPartitionIndices.head))
+    } else {
+      val startIndices = ShufflePartitionsCoalescer.coalescePartitions(
+        Array(leftStats, rightStats),
+        firstPartitionIndex = nonSkewPartitionIndices.head,
+        // `lastPartitionIndex` is exclusive.
+        lastPartitionIndex = nonSkewPartitionIndices.last + 1,
+        advisoryTargetSize = conf.targetPostShuffleInputSize)
+      startIndices.indices.map { i =>
+        val startIndex = startIndices(i)
+        val endIndex = if (i == startIndices.length - 1) {
+          // `endIndex` is exclusive.
+          nonSkewPartitionIndices.last + 1
+        } else {
+          startIndices(i + 1)
+        }
+        // Do not create `CoalescedPartitionSpec` if only need to read a singe partition.
+        if (startIndex + 1 == endIndex) {
+          SinglePartitionSpec(startIndex)
+        } else {
+          CoalescedPartitionSpec(startIndex, endIndex)
+        }
+      }
+    }
+  }
+
+  private def createSkewPartitions(
+      reducerIndex: Int,
+      mapStartIndices: Array[Int],
+      numMappers: Int): Seq[PartialPartitionSpec] = {
+    mapStartIndices.indices.map { i =>
+      val startMapIndex = mapStartIndices(i)
+      val endMapIndex = if (i == mapStartIndices.length - 1) {
+        numMappers
+      } else {
+        mapStartIndices(i + 1)
+      }
+      PartialPartitionSpec(reducerIndex, startMapIndex, endMapIndex)
+    }
+  }
+
+  override def apply(plan: SparkPlan): SparkPlan = {
+    if (!conf.getConf(SQLConf.ADAPTIVE_EXECUTION_SKEWED_JOIN_ENABLED)) {
+      return plan
+    }
+
+    def collectShuffleStages(plan: SparkPlan): Seq[ShuffleQueryStageExec] = plan match {
+      case stage: ShuffleQueryStageExec => Seq(stage)
+      case _ => plan.children.flatMap(collectShuffleStages)
+    }
+
+    val shuffleStages = collectShuffleStages(plan)
+
+    if (shuffleStages.length == 2) {
+      // When multi table join, there will be too many complex combination to consider.
+      // Currently we only handle 2 table join like following two use cases.
+      // SMJ
+      //   Sort
+      //     Shuffle
+      //   Sort
+      //     Shuffle
+      val optimizePlan = optimizeSkewJoin(plan)
+      val numShuffles = ensureRequirements.apply(optimizePlan).collect {
+        case e: ShuffleExchangeExec => e
+      }.length
+
+      if (numShuffles > 0) {
+        logDebug("OptimizeSkewedJoin rule is not applied due" +
+          " to additional shuffles will be introduced.")
+        plan
+      } else {
+        optimizePlan
+      }
+    } else {
+      plan
+    }
+  }
+}
+
+private class SkewDesc {
+  private[this] var numSkewedPartitions: Int = 0
+  private[this] var totalSize: Long = 0
+  private[this] var maxSize: Long = 0
+  private[this] var minSize: Long = 0
+
+  def numPartitions: Int = numSkewedPartitions
+
+  def addPartitionSize(size: Long): Unit = {
+    if (numSkewedPartitions == 0) {
+      maxSize = size
+      minSize = size
+    }
+    numSkewedPartitions += 1
+    totalSize += size
+    if (size > maxSize) maxSize = size
+    if (size < minSize) minSize = size
+  }
+
+  override def toString: String = {
+    if (numSkewedPartitions == 0) {
+      "no skewed partition"
+    } else {
+      val maxSizeStr = FileUtils.byteCountToDisplaySize(maxSize)
+      val minSizeStr = FileUtils.byteCountToDisplaySize(minSize)
+      val avgSizeStr = FileUtils.byteCountToDisplaySize(totalSize / numSkewedPartitions)
+      s"$numSkewedPartitions skewed partitions with " +
+        s"size(max=$maxSizeStr, min=$minSizeStr, avg=$avgSizeStr)"
+    }
+  }
+}
+
+/**
+ * A wrapper of shuffle query stage, which follows the given partition arrangement.
+ *
+ * @param child It's usually `ShuffleQueryStageExec`, but can be the shuffle exchange node during
+ *              canonicalization.
+ * @param partitionSpecs The partition specs that defines the arrangement.
+ * @param skewDesc The description of the skewed partitions.
+ */
+case class SkewJoinShuffleReaderExec(
+    child: SparkPlan,
+    partitionSpecs: Array[ShufflePartitionSpec],
+    skewDesc: String) extends UnaryExecNode {
+
+  override def output: Seq[Attribute] = child.output
+
+  override def outputPartitioning: Partitioning = {
+    UnknownPartitioning(partitionSpecs.length)
+  }
+
+  override def stringArgs: Iterator[Any] = Iterator(skewDesc)
+
+  private var cachedShuffleRDD: RDD[InternalRow] = null
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    if (cachedShuffleRDD == null) {
+      cachedShuffleRDD = child match {
+        case stage: ShuffleQueryStageExec =>
+          new CustomShuffledRowRDD(
+            stage.shuffle.shuffleDependency, stage.shuffle.readMetrics, partitionSpecs)
+        case _ =>
+          throw new IllegalStateException("operating on canonicalization plan")
+      }
+    }
+    cachedShuffleRDD
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
index 91d4359224a6a..f845b6b16ee3a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
@@ -18,19 +18,28 @@
 package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.expressions
-import org.apache.spark.sql.catalyst.expressions.ListQuery
+import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, ListQuery, Literal}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.{ExecSubqueryExpression, SparkPlan}
+import org.apache.spark.sql.execution
+import org.apache.spark.sql.execution.{InSubqueryExec, SparkPlan, SubqueryExec}
 
-case class PlanAdaptiveSubqueries(
-    subqueryMap: scala.collection.Map[Long, ExecSubqueryExpression]) extends Rule[SparkPlan] {
+case class PlanAdaptiveSubqueries(subqueryMap: Map[Long, SubqueryExec]) extends Rule[SparkPlan] {
 
   def apply(plan: SparkPlan): SparkPlan = {
     plan.transformAllExpressions {
       case expressions.ScalarSubquery(_, _, exprId) =>
-        subqueryMap(exprId.id)
-      case expressions.InSubquery(_, ListQuery(_, _, exprId, _)) =>
-        subqueryMap(exprId.id)
+        execution.ScalarSubquery(subqueryMap(exprId.id), exprId)
+      case expressions.InSubquery(values, ListQuery(_, _, exprId, _)) =>
+        val expr = if (values.length == 1) {
+          values.head
+        } else {
+          CreateNamedStruct(
+            values.zipWithIndex.flatMap { case (v, index) =>
+              Seq(Literal(s"col_$index"), v)
+            }
+          )
+        }
+        InSubqueryExec(expr, subqueryMap(exprId.id), exprId)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
index 231fffce3360b..d5dc1be63f06e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.adaptive
 
-import scala.collection.mutable
 import scala.concurrent.Future
 
 import org.apache.spark.{FutureAction, MapOutputStatistics}
@@ -25,13 +24,11 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.Statistics
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.exchange._
 
-
 /**
  * A query stage is an independent subgraph of the query plan. Query stage materializes its output
  * before proceeding with further operators of the query plan. The data statistics of the
@@ -76,12 +73,19 @@ abstract class QueryStageExec extends LeafExecNode {
     doMaterialize()
   }
 
+  def newReuseInstance(newStageId: Int, newOutput: Seq[Attribute]): QueryStageExec
+
   /**
    * Compute the statistics of the query stage if executed, otherwise None.
    */
   def computeStats(): Option[Statistics] = resultOption.map { _ =>
     // Metrics `dataSize` are available in both `ShuffleExchangeExec` and `BroadcastExchangeExec`.
-    Statistics(sizeInBytes = plan.metrics("dataSize").value)
+    val exchange = plan match {
+      case r: ReusedExchangeExec => r.child
+      case e: Exchange => e
+      case _ => throw new IllegalStateException("wrong plan for query stage:\n " + plan.treeString)
+    }
+    Statistics(sizeInBytes = exchange.metrics("dataSize").value)
   }
 
   @transient
@@ -93,6 +97,7 @@ abstract class QueryStageExec extends LeafExecNode {
   override def outputOrdering: Seq[SortOrder] = plan.outputOrdering
   override def executeCollect(): Array[InternalRow] = plan.executeCollect()
   override def executeTake(n: Int): Array[InternalRow] = plan.executeTake(n)
+  override def executeTail(n: Int): Array[InternalRow] = plan.executeTail(n)
   override def executeToIterator(): Iterator[InternalRow] = plan.executeToIterator()
 
   override def doPrepare(): Unit = plan.prepare()
@@ -125,27 +130,33 @@ abstract class QueryStageExec extends LeafExecNode {
 }
 
 /**
- * A shuffle query stage whose child is a [[ShuffleExchangeExec]].
+ * A shuffle query stage whose child is a [[ShuffleExchangeExec]] or [[ReusedExchangeExec]].
  */
 case class ShuffleQueryStageExec(
     override val id: Int,
-    override val plan: ShuffleExchangeExec) extends QueryStageExec {
+    override val plan: SparkPlan) extends QueryStageExec {
 
-  @transient lazy val mapOutputStatisticsFuture: Future[MapOutputStatistics] = {
-    if (plan.inputRDD.getNumPartitions == 0) {
-      Future.successful(null)
-    } else {
-      sparkContext.submitMapStage(plan.shuffleDependency)
-    }
+  @transient val shuffle = plan match {
+    case s: ShuffleExchangeExec => s
+    case ReusedExchangeExec(_, s: ShuffleExchangeExec) => s
+    case _ =>
+      throw new IllegalStateException("wrong plan for shuffle stage:\n " + plan.treeString)
   }
 
   override def doMaterialize(): Future[Any] = {
-    mapOutputStatisticsFuture
+    shuffle.mapOutputStatisticsFuture
+  }
+
+  override def newReuseInstance(newStageId: Int, newOutput: Seq[Attribute]): QueryStageExec = {
+    ShuffleQueryStageExec(
+      newStageId,
+      ReusedExchangeExec(newOutput, shuffle))
   }
 
   override def cancel(): Unit = {
-    mapOutputStatisticsFuture match {
-      case action: FutureAction[MapOutputStatistics] if !mapOutputStatisticsFuture.isCompleted =>
+    shuffle.mapOutputStatisticsFuture match {
+      case action: FutureAction[MapOutputStatistics]
+        if !shuffle.mapOutputStatisticsFuture.isCompleted =>
         action.cancel()
       case _ =>
     }
@@ -153,80 +164,33 @@ case class ShuffleQueryStageExec(
 }
 
 /**
- * A broadcast query stage whose child is a [[BroadcastExchangeExec]].
+ * A broadcast query stage whose child is a [[BroadcastExchangeExec]] or [[ReusedExchangeExec]].
  */
 case class BroadcastQueryStageExec(
     override val id: Int,
-    override val plan: BroadcastExchangeExec) extends QueryStageExec {
+    override val plan: SparkPlan) extends QueryStageExec {
 
-  override def doMaterialize(): Future[Any] = {
-    plan.completionFuture
+  @transient val broadcast = plan match {
+    case b: BroadcastExchangeExec => b
+    case ReusedExchangeExec(_, b: BroadcastExchangeExec) => b
+    case _ =>
+      throw new IllegalStateException("wrong plan for broadcast stage:\n " + plan.treeString)
   }
 
-  override def cancel(): Unit = {
-    if (!plan.relationFuture.isDone) {
-      sparkContext.cancelJobGroup(plan.runId.toString)
-      plan.relationFuture.cancel(true)
-    }
-  }
-}
-
-object ShuffleQueryStageExec {
-  /**
-   * Returns true if the plan is a [[ShuffleQueryStageExec]] or a reused [[ShuffleQueryStageExec]].
-   */
-  def isShuffleQueryStageExec(plan: SparkPlan): Boolean = plan match {
-    case r: ReusedQueryStageExec => isShuffleQueryStageExec(r.plan)
-    case _: ShuffleQueryStageExec => true
-    case _ => false
-  }
-}
-
-object BroadcastQueryStageExec {
-  /**
-   * Returns true if the plan is a [[BroadcastQueryStageExec]] or a reused
-   * [[BroadcastQueryStageExec]].
-   */
-  def isBroadcastQueryStageExec(plan: SparkPlan): Boolean = plan match {
-    case r: ReusedQueryStageExec => isBroadcastQueryStageExec(r.plan)
-    case _: BroadcastQueryStageExec => true
-    case _ => false
-  }
-}
-
-/**
- * A wrapper for reused query stage to have different output.
- */
-case class ReusedQueryStageExec(
-    override val id: Int,
-    override val plan: QueryStageExec,
-    override val output: Seq[Attribute]) extends QueryStageExec {
-
   override def doMaterialize(): Future[Any] = {
-    plan.materialize()
+    broadcast.completionFuture
   }
 
-  override def cancel(): Unit = {
-    plan.cancel()
+  override def newReuseInstance(newStageId: Int, newOutput: Seq[Attribute]): QueryStageExec = {
+    BroadcastQueryStageExec(
+      newStageId,
+      ReusedExchangeExec(newOutput, broadcast))
   }
 
-  // `ReusedQueryStageExec` can have distinct set of output attribute ids from its child, we need
-  // to update the attribute ids in `outputPartitioning` and `outputOrdering`.
-  private lazy val updateAttr: Expression => Expression = {
-    val originalAttrToNewAttr = AttributeMap(plan.output.zip(output))
-    e => e.transform {
-      case attr: Attribute => originalAttrToNewAttr.getOrElse(attr, attr)
+  override def cancel(): Unit = {
+    if (!broadcast.relationFuture.isDone) {
+      sparkContext.cancelJobGroup(broadcast.runId.toString)
+      broadcast.relationFuture.cancel(true)
     }
   }
-
-  override def outputPartitioning: Partitioning = plan.outputPartitioning match {
-    case e: Expression => updateAttr(e).asInstanceOf[Partitioning]
-    case other => other
-  }
-
-  override def outputOrdering: Seq[SortOrder] = {
-    plan.outputOrdering.map(updateAttr(_).asInstanceOf[SortOrder])
-  }
-
-  override def computeStats(): Option[Statistics] = plan.computeStats()
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReduceNumShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReduceNumShufflePartitions.scala
index 1a85d5c02075b..5bbcb14e008d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReduceNumShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReduceNumShufflePartitions.scala
@@ -15,10 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution.adaptive.rule
-
-import scala.collection.mutable.ArrayBuffer
-import scala.concurrent.duration.Duration
+package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.MapOutputStatistics
 import org.apache.spark.rdd.RDD
@@ -27,29 +24,11 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{ShuffledRowRDD, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.execution.adaptive.{QueryStageExec, ReusedQueryStageExec, ShuffleQueryStageExec}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.util.ThreadUtils
 
 /**
- * A rule to adjust the post shuffle partitions based on the map output statistics.
- *
- * The strategy used to determine the number of post-shuffle partitions is described as follows.
- * To determine the number of post-shuffle partitions, we have a target input size for a
- * post-shuffle partition. Once we have size statistics of all pre-shuffle partitions, we will do
- * a pass of those statistics and pack pre-shuffle partitions with continuous indices to a single
- * post-shuffle partition until adding another pre-shuffle partition would cause the size of a
- * post-shuffle partition to be greater than the target size.
- *
- * For example, we have two stages with the following pre-shuffle partition size statistics:
- * stage 1: [100 MiB, 20 MiB, 100 MiB, 10MiB, 30 MiB]
- * stage 2: [10 MiB,  10 MiB, 70 MiB,  5 MiB, 5 MiB]
- * assuming the target input size is 128 MiB, we will have four post-shuffle partitions,
- * which are:
- *  - post-shuffle partition 0: pre-shuffle partition 0 (size 110 MiB)
- *  - post-shuffle partition 1: pre-shuffle partition 1 (size 30 MiB)
- *  - post-shuffle partition 2: pre-shuffle partition 2 (size 170 MiB)
- *  - post-shuffle partition 3: pre-shuffle partition 3 and 4 (size 50 MiB)
+ * A rule to reduce the post shuffle partitions based on the map output statistics, which can
+ * avoid many small reduce tasks that hurt performance.
  */
 case class ReduceNumShufflePartitions(conf: SQLConf) extends Rule[SparkPlan] {
 
@@ -64,19 +43,22 @@ case class ReduceNumShufflePartitions(conf: SQLConf) extends Rule[SparkPlan] {
       return plan
     }
 
-    val shuffleStages = plan.collect {
-      case stage: ShuffleQueryStageExec => stage
-      case ReusedQueryStageExec(_, stage: ShuffleQueryStageExec, _) => stage
+    def collectShuffleStages(plan: SparkPlan): Seq[ShuffleQueryStageExec] = plan match {
+      case _: LocalShuffleReaderExec => Nil
+      case _: SkewJoinShuffleReaderExec => Nil
+      case stage: ShuffleQueryStageExec => Seq(stage)
+      case _ => plan.children.flatMap(collectShuffleStages)
     }
+
+    val shuffleStages = collectShuffleStages(plan)
     // ShuffleExchanges introduced by repartition do not support changing the number of partitions.
     // We change the number of partitions in the stage only if all the ShuffleExchanges support it.
-    if (!shuffleStages.forall(_.plan.canChangeNumPartitions)) {
+    if (!shuffleStages.forall(_.shuffle.canChangeNumPartitions)) {
       plan
     } else {
       val shuffleMetrics = shuffleStages.map { stage =>
-        val metricsFuture = stage.mapOutputStatisticsFuture
-        assert(metricsFuture.isCompleted, "ShuffleQueryStageExec should already be ready")
-        ThreadUtils.awaitResult(metricsFuture, Duration.Zero)
+        assert(stage.resultOption.isDefined, "ShuffleQueryStageExec should already be ready")
+        stage.resultOption.get.asInstanceOf[MapOutputStatistics]
       }
 
       // `ShuffleQueryStageExec` gives null mapOutputStatistics when the input RDD has 0 partitions,
@@ -88,13 +70,19 @@ case class ReduceNumShufflePartitions(conf: SQLConf) extends Rule[SparkPlan] {
       val distinctNumPreShufflePartitions =
         validMetrics.map(stats => stats.bytesByPartitionId.length).distinct
       if (validMetrics.nonEmpty && distinctNumPreShufflePartitions.length == 1) {
-        val partitionStartIndices = estimatePartitionStartIndices(validMetrics.toArray)
+        val partitionStartIndices = ShufflePartitionsCoalescer.coalescePartitions(
+          validMetrics.toArray,
+          firstPartitionIndex = 0,
+          lastPartitionIndex = distinctNumPreShufflePartitions.head,
+          advisoryTargetSize = conf.targetPostShuffleInputSize,
+          minNumPartitions = conf.minNumPostShufflePartitions)
         // This transformation adds new nodes, so we must use `transformUp` here.
+        val stageIds = shuffleStages.map(_.id).toSet
         plan.transformUp {
           // even for shuffle exchange whose input RDD has 0 partition, we should still update its
           // `partitionStartIndices`, so that all the leaf shuffles in a stage have the same
           // number of output partitions.
-          case stage: QueryStageExec if ShuffleQueryStageExec.isShuffleQueryStageExec(stage) =>
+          case stage: ShuffleQueryStageExec if stageIds.contains(stage.id) =>
             CoalescedShuffleReaderExec(stage, partitionStartIndices)
         }
       } else {
@@ -102,90 +90,22 @@ case class ReduceNumShufflePartitions(conf: SQLConf) extends Rule[SparkPlan] {
       }
     }
   }
-
-  /**
-   * Estimates partition start indices for post-shuffle partitions based on
-   * mapOutputStatistics provided by all pre-shuffle stages.
-   */
-  // visible for testing.
-  private[sql] def estimatePartitionStartIndices(
-      mapOutputStatistics: Array[MapOutputStatistics]): Array[Int] = {
-    val minNumPostShufflePartitions = conf.minNumPostShufflePartitions
-    val advisoryTargetPostShuffleInputSize = conf.targetPostShuffleInputSize
-    // If minNumPostShufflePartitions is defined, it is possible that we need to use a
-    // value less than advisoryTargetPostShuffleInputSize as the target input size of
-    // a post shuffle task.
-    val totalPostShuffleInputSize = mapOutputStatistics.map(_.bytesByPartitionId.sum).sum
-    // The max at here is to make sure that when we have an empty table, we
-    // only have a single post-shuffle partition.
-    // There is no particular reason that we pick 16. We just need a number to
-    // prevent maxPostShuffleInputSize from being set to 0.
-    val maxPostShuffleInputSize = math.max(
-      math.ceil(totalPostShuffleInputSize / minNumPostShufflePartitions.toDouble).toLong, 16)
-    val targetPostShuffleInputSize =
-      math.min(maxPostShuffleInputSize, advisoryTargetPostShuffleInputSize)
-
-    logInfo(
-      s"advisoryTargetPostShuffleInputSize: $advisoryTargetPostShuffleInputSize, " +
-        s"targetPostShuffleInputSize $targetPostShuffleInputSize.")
-
-    // Make sure we do get the same number of pre-shuffle partitions for those stages.
-    val distinctNumPreShufflePartitions =
-      mapOutputStatistics.map(stats => stats.bytesByPartitionId.length).distinct
-    // The reason that we are expecting a single value of the number of pre-shuffle partitions
-    // is that when we add Exchanges, we set the number of pre-shuffle partitions
-    // (i.e. map output partitions) using a static setting, which is the value of
-    // spark.sql.shuffle.partitions. Even if two input RDDs are having different
-    // number of partitions, they will have the same number of pre-shuffle partitions
-    // (i.e. map output partitions).
-    assert(
-      distinctNumPreShufflePartitions.length == 1,
-      "There should be only one distinct value of the number pre-shuffle partitions " +
-        "among registered Exchange operator.")
-    val numPreShufflePartitions = distinctNumPreShufflePartitions.head
-
-    val partitionStartIndices = ArrayBuffer[Int]()
-    // The first element of partitionStartIndices is always 0.
-    partitionStartIndices += 0
-
-    var postShuffleInputSize = 0L
-
-    var i = 0
-    while (i < numPreShufflePartitions) {
-      // We calculate the total size of ith pre-shuffle partitions from all pre-shuffle stages.
-      // Then, we add the total size to postShuffleInputSize.
-      var nextShuffleInputSize = 0L
-      var j = 0
-      while (j < mapOutputStatistics.length) {
-        nextShuffleInputSize += mapOutputStatistics(j).bytesByPartitionId(i)
-        j += 1
-      }
-
-      // If including the nextShuffleInputSize would exceed the target partition size, then start a
-      // new partition.
-      if (i > 0 && postShuffleInputSize + nextShuffleInputSize > targetPostShuffleInputSize) {
-        partitionStartIndices += i
-        // reset postShuffleInputSize.
-        postShuffleInputSize = nextShuffleInputSize
-      } else {
-        postShuffleInputSize += nextShuffleInputSize
-      }
-
-      i += 1
-    }
-
-    partitionStartIndices.toArray
-  }
 }
 
+/**
+ * A wrapper of shuffle query stage, which submits fewer reduce task as one reduce task may read
+ * multiple shuffle partitions. This can avoid many small reduce tasks that hurt performance.
+ *
+ * @param child It's usually `ShuffleQueryStageExec`, but can be the shuffle exchange node during
+ *              canonicalization.
+ * @param partitionStartIndices The start partition indices for the coalesced partitions.
+ */
 case class CoalescedShuffleReaderExec(
-    child: QueryStageExec,
+    child: SparkPlan,
     partitionStartIndices: Array[Int]) extends UnaryExecNode {
 
   override def output: Seq[Attribute] = child.output
 
-  override def doCanonicalize(): SparkPlan = child.canonicalized
-
   override def outputPartitioning: Partitioning = {
     UnknownPartitioning(partitionStartIndices.length)
   }
@@ -196,9 +116,9 @@ case class CoalescedShuffleReaderExec(
     if (cachedShuffleRDD == null) {
       cachedShuffleRDD = child match {
         case stage: ShuffleQueryStageExec =>
-          stage.plan.createShuffledRDD(Some(partitionStartIndices))
-        case ReusedQueryStageExec(_, stage: ShuffleQueryStageExec, _) =>
-          stage.plan.createShuffledRDD(Some(partitionStartIndices))
+          stage.shuffle.createShuffledRDD(Some(partitionStartIndices))
+        case _ =>
+          throw new IllegalStateException("operating on canonicalization plan")
       }
     }
     cachedShuffleRDD
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsCoalescer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsCoalescer.scala
new file mode 100644
index 0000000000000..18f0585524aa2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsCoalescer.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.MapOutputStatistics
+import org.apache.spark.internal.Logging
+
+object ShufflePartitionsCoalescer extends Logging {
+
+  /**
+   * Coalesce the same range of partitions (`firstPartitionIndex`` to `lastPartitionIndex`, the
+   * start is inclusive and the end is exclusive) from multiple shuffles. This method assumes that
+   * all the shuffles have the same number of partitions, and the partitions of same index will be
+   * read together by one task.
+   *
+   * The strategy used to determine the number of coalesced partitions is described as follows.
+   * To determine the number of coalesced partitions, we have a target size for a coalesced
+   * partition. Once we have size statistics of all shuffle partitions, we will do
+   * a pass of those statistics and pack shuffle partitions with continuous indices to a single
+   * coalesced partition until adding another shuffle partition would cause the size of a
+   * coalesced partition to be greater than the target size.
+   *
+   * For example, we have two shuffles with the following partition size statistics:
+   *  - shuffle 1 (5 partitions): [100 MiB, 20 MiB, 100 MiB, 10MiB, 30 MiB]
+   *  - shuffle 2 (5 partitions): [10 MiB,  10 MiB, 70 MiB,  5 MiB, 5 MiB]
+   * Assuming the target size is 128 MiB, we will have 4 coalesced partitions, which are:
+   *  - coalesced partition 0: shuffle partition 0 (size 110 MiB)
+   *  - coalesced partition 1: shuffle partition 1 (size 30 MiB)
+   *  - coalesced partition 2: shuffle partition 2 (size 170 MiB)
+   *  - coalesced partition 3: shuffle partition 3 and 4 (size 50 MiB)
+   *
+   *  @return An array of partition indices which represents the coalesced partitions. For example,
+   *          [0, 2, 3] means 3 coalesced partitions: [0, 2), [2, 3), [3, lastPartitionIndex]
+   */
+  def coalescePartitions(
+      mapOutputStatistics: Array[MapOutputStatistics],
+      firstPartitionIndex: Int,
+      lastPartitionIndex: Int,
+      advisoryTargetSize: Long,
+      minNumPartitions: Int = 1): Array[Int] = {
+    // If `minNumPartitions` is very large, it is possible that we need to use a value less than
+    // `advisoryTargetSize` as the target size of a coalesced task.
+    val totalPostShuffleInputSize = mapOutputStatistics.map(_.bytesByPartitionId.sum).sum
+    // The max at here is to make sure that when we have an empty table, we only have a single
+    // coalesced partition.
+    // There is no particular reason that we pick 16. We just need a number to prevent
+    // `maxTargetSize` from being set to 0.
+    val maxTargetSize = math.max(
+      math.ceil(totalPostShuffleInputSize / minNumPartitions.toDouble).toLong, 16)
+    val targetSize = math.min(maxTargetSize, advisoryTargetSize)
+
+    logInfo(s"advisory target size: $advisoryTargetSize, actual target size $targetSize.")
+
+    // Make sure these shuffles have the same number of partitions.
+    val distinctNumShufflePartitions =
+      mapOutputStatistics.map(stats => stats.bytesByPartitionId.length).distinct
+    // The reason that we are expecting a single value of the number of shuffle partitions
+    // is that when we add Exchanges, we set the number of shuffle partitions
+    // (i.e. map output partitions) using a static setting, which is the value of
+    // `spark.sql.shuffle.partitions`. Even if two input RDDs are having different
+    // number of partitions, they will have the same number of shuffle partitions
+    // (i.e. map output partitions).
+    assert(
+      distinctNumShufflePartitions.length == 1,
+      "There should be only one distinct value of the number of shuffle partitions " +
+        "among registered Exchange operators.")
+
+    val splitPoints = ArrayBuffer[Int]()
+    splitPoints += firstPartitionIndex
+    var coalescedSize = 0L
+    var i = firstPartitionIndex
+    while (i < lastPartitionIndex) {
+      // We calculate the total size of i-th shuffle partitions from all shuffles.
+      var totalSizeOfCurrentPartition = 0L
+      var j = 0
+      while (j < mapOutputStatistics.length) {
+        totalSizeOfCurrentPartition += mapOutputStatistics(j).bytesByPartitionId(i)
+        j += 1
+      }
+
+      // If including the `totalSizeOfCurrentPartition` would exceed the target size, then start a
+      // new coalesced partition.
+      if (i > firstPartitionIndex && coalescedSize + totalSizeOfCurrentPartition > targetSize) {
+        splitPoints += i
+        // reset postShuffleInputSize.
+        coalescedSize = totalSizeOfCurrentPartition
+      } else {
+        coalescedSize += totalSizeOfCurrentPartition
+      }
+      i += 1
+    }
+
+    splitPoints.toArray
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
index 4d762c5ea9f34..56a287d4d0279 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.aggregate
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.optimizer.NormalizeFloatingNumbers
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.streaming.{StateStoreRestoreExec, StateStoreSaveExec}
 
@@ -27,6 +26,22 @@ import org.apache.spark.sql.execution.streaming.{StateStoreRestoreExec, StateSto
  * Utility functions used by the query planner to convert our plan to new aggregation code path.
  */
 object AggUtils {
+
+  private def mayRemoveAggFilters(exprs: Seq[AggregateExpression]): Seq[AggregateExpression] = {
+    exprs.map { ae =>
+      if (ae.filter.isDefined) {
+        ae.mode match {
+          // Aggregate filters are applicable only in partial/complete modes;
+          // this method filters out them, otherwise.
+          case Partial | Complete => ae
+          case _ => ae.copy(filter = None)
+        }
+      } else {
+        ae
+      }
+    }
+  }
+
   private def createAggregate(
       requiredChildDistributionExpressions: Option[Seq[Expression]] = None,
       groupingExpressions: Seq[NamedExpression] = Nil,
@@ -41,7 +56,7 @@ object AggUtils {
       HashAggregateExec(
         requiredChildDistributionExpressions = requiredChildDistributionExpressions,
         groupingExpressions = groupingExpressions,
-        aggregateExpressions = aggregateExpressions,
+        aggregateExpressions = mayRemoveAggFilters(aggregateExpressions),
         aggregateAttributes = aggregateAttributes,
         initialInputBufferOffset = initialInputBufferOffset,
         resultExpressions = resultExpressions,
@@ -54,7 +69,7 @@ object AggUtils {
         ObjectHashAggregateExec(
           requiredChildDistributionExpressions = requiredChildDistributionExpressions,
           groupingExpressions = groupingExpressions,
-          aggregateExpressions = aggregateExpressions,
+          aggregateExpressions = mayRemoveAggFilters(aggregateExpressions),
           aggregateAttributes = aggregateAttributes,
           initialInputBufferOffset = initialInputBufferOffset,
           resultExpressions = resultExpressions,
@@ -63,7 +78,7 @@ object AggUtils {
         SortAggregateExec(
           requiredChildDistributionExpressions = requiredChildDistributionExpressions,
           groupingExpressions = groupingExpressions,
-          aggregateExpressions = aggregateExpressions,
+          aggregateExpressions = mayRemoveAggFilters(aggregateExpressions),
           aggregateAttributes = aggregateAttributes,
           initialInputBufferOffset = initialInputBufferOffset,
           resultExpressions = resultExpressions,
@@ -174,7 +189,7 @@ object AggUtils {
       // Children of an AggregateFunction with DISTINCT keyword has already
       // been evaluated. At here, we need to replace original children
       // to AttributeReferences.
-      case agg @ AggregateExpression(aggregateFunction, mode, true, _) =>
+      case agg @ AggregateExpression(aggregateFunction, mode, true, _, _) =>
         aggregateFunction.transformDown(distinctColumnAttributeLookup)
           .asInstanceOf[AggregateFunction]
       case agg =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
index d03de1507fbbd..527a9eac9948e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregationIterator.scala
@@ -157,19 +157,44 @@ abstract class AggregationIterator(
       inputAttributes: Seq[Attribute]): (InternalRow, InternalRow) => Unit = {
     val joinedRow = new JoinedRow
     if (expressions.nonEmpty) {
-      val mergeExpressions = functions.zip(expressions).flatMap {
-        case (ae: DeclarativeAggregate, expression) =>
-          expression.mode match {
-            case Partial | Complete => ae.updateExpressions
-            case PartialMerge | Final => ae.mergeExpressions
+      val mergeExpressions =
+        functions.zip(expressions.map(ae => (ae.mode, ae.isDistinct, ae.filter))).flatMap {
+          case (ae: DeclarativeAggregate, (mode, isDistinct, filter)) =>
+            mode match {
+              case Partial | Complete =>
+                if (filter.isDefined) {
+                  ae.updateExpressions.zip(ae.aggBufferAttributes).map {
+                    case (updateExpr, attr) => If(filter.get, updateExpr, attr)
+                  }
+                } else {
+                  ae.updateExpressions
+                }
+              case PartialMerge | Final => ae.mergeExpressions
+            }
+          case (agg: AggregateFunction, _) => Seq.fill(agg.aggBufferAttributes.length)(NoOp)
+        }
+      // Initialize predicates for aggregate functions if necessary
+      val predicateOptions = expressions.map {
+        case AggregateExpression(_, mode, _, Some(filter), _) =>
+          mode match {
+            case Partial | Complete =>
+              val predicate = Predicate.create(filter, inputAttributes)
+              predicate.initialize(partIndex)
+              Some(predicate)
+            case _ => None
           }
-        case (agg: AggregateFunction, _) => Seq.fill(agg.aggBufferAttributes.length)(NoOp)
+        case _ => None
       }
       val updateFunctions = functions.zipWithIndex.collect {
         case (ae: ImperativeAggregate, i) =>
           expressions(i).mode match {
             case Partial | Complete =>
-              (buffer: InternalRow, row: InternalRow) => ae.update(buffer, row)
+              if (predicateOptions(i).isDefined) {
+                (buffer: InternalRow, row: InternalRow) =>
+                  if (predicateOptions(i).get.eval(row)) { ae.update(buffer, row) }
+              } else {
+                (buffer: InternalRow, row: InternalRow) => ae.update(buffer, row)
+              }
             case PartialMerge | Final =>
               (buffer: InternalRow, row: InternalRow) => ae.merge(buffer, row)
           }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
new file mode 100644
index 0000000000000..0eaa0f53fdacd
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.aggregate
+
+import org.apache.spark.sql.catalyst.expressions.{Attribute, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.execution.{ExplainUtils, UnaryExecNode}
+
+/**
+ * Holds common logic for aggregate operators
+ */
+trait BaseAggregateExec extends UnaryExecNode {
+  def groupingExpressions: Seq[NamedExpression]
+  def aggregateExpressions: Seq[AggregateExpression]
+  def aggregateAttributes: Seq[Attribute]
+  def resultExpressions: Seq[NamedExpression]
+
+  override def verboseStringWithOperatorId(): String = {
+    val inputString = child.output.mkString("[", ", ", "]")
+    val keyString = groupingExpressions.mkString("[", ", ", "]")
+    val functionString = aggregateExpressions.mkString("[", ", ", "]")
+    val aggregateAttributeString = aggregateAttributes.mkString("[", ", ", "]")
+    val resultString = resultExpressions.mkString("[", ", ", "]")
+    s"""
+       |(${ExplainUtils.getOpId(this)}) $nodeName ${ExplainUtils.getCodegenId(this)}
+       |Input: $inputString
+       |Keys: $keyString
+       |Functions: $functionString
+       |Aggregate Attributes: $aggregateAttributeString
+       |Results: $resultString
+     """.stripMargin
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 4a95f76381339..7a26fd7a8541a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.aggregate
 
 import java.util.concurrent.TimeUnit._
 
+import scala.collection.mutable
+
 import org.apache.spark.TaskContext
 import org.apache.spark.memory.{SparkOutOfMemoryError, TaskMemoryManager}
 import org.apache.spark.rdd.RDD
@@ -30,13 +32,13 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.execution.vectorized.MutableColumnarRow
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DecimalType, StringType, StructType}
+import org.apache.spark.sql.types.{CalendarIntervalType, DecimalType, StringType, StructType}
 import org.apache.spark.unsafe.KVIterator
 import org.apache.spark.util.Utils
 
@@ -51,7 +53,7 @@ case class HashAggregateExec(
     initialInputBufferOffset: Int,
     resultExpressions: Seq[NamedExpression],
     child: SparkPlan)
-  extends UnaryExecNode with BlockingOperatorWithCodegen {
+  extends BaseAggregateExec with BlockingOperatorWithCodegen with AliasAwareOutputPartitioning {
 
   private[this] val aggregateBufferAttributes = {
     aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
@@ -73,7 +75,7 @@ case class HashAggregateExec(
 
   override def output: Seq[Attribute] = resultExpressions.map(_.toAttribute)
 
-  override def outputPartitioning: Partitioning = child.outputPartitioning
+  override protected def outputExpressions: Seq[NamedExpression] = resultExpressions
 
   override def producedAttributes: AttributeSet =
     AttributeSet(aggregateAttributes) ++
@@ -124,7 +126,7 @@ case class HashAggregateExec(
             initialInputBufferOffset,
             resultExpressions,
             (expressions, inputSchema) =>
-              newMutableProjection(expressions, inputSchema, subexpressionEliminationEnabled),
+              MutableProjection.create(expressions, inputSchema),
             child.output,
             iter,
             testFallbackStartsAt,
@@ -150,8 +152,10 @@ case class HashAggregateExec(
   override def usedInputs: AttributeSet = inputSet
 
   override def supportCodegen: Boolean = {
-    // ImperativeAggregate is not supported right now
-    !aggregateExpressions.exists(_.aggregateFunction.isInstanceOf[ImperativeAggregate])
+    // ImperativeAggregate and filter predicate are not supported right now
+    // TODO: SPARK-30027 Support codegen for filter exprs in HashAggregateExec
+    !(aggregateExpressions.exists(_.aggregateFunction.isInstanceOf[ImperativeAggregate]) ||
+        aggregateExpressions.exists(_.filter.isDefined))
   }
 
   override def inputRDDs(): Seq[RDD[InternalRow]] = {
@@ -174,8 +178,9 @@ case class HashAggregateExec(
     }
   }
 
-  // The variables used as aggregation buffer. Only used for aggregation without keys.
-  private var bufVars: Seq[ExprCode] = _
+  // The variables are used as aggregation buffers and each aggregate function has one or more
+  // ExprCode to initialize its buffer slots. Only used for aggregation without keys.
+  private var bufVars: Seq[Seq[ExprCode]] = _
 
   private def doProduceWithoutKeys(ctx: CodegenContext): String = {
     val initAgg = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "initAgg")
@@ -184,27 +189,30 @@ case class HashAggregateExec(
 
     // generate variables for aggregation buffer
     val functions = aggregateExpressions.map(_.aggregateFunction.asInstanceOf[DeclarativeAggregate])
-    val initExpr = functions.flatMap(f => f.initialValues)
-    bufVars = initExpr.map { e =>
-      val isNull = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "bufIsNull")
-      val value = ctx.addMutableState(CodeGenerator.javaType(e.dataType), "bufValue")
-      // The initial expression should not access any column
-      val ev = e.genCode(ctx)
-      val initVars = code"""
-         | $isNull = ${ev.isNull};
-         | $value = ${ev.value};
-       """.stripMargin
-      ExprCode(
-        ev.code + initVars,
-        JavaCode.isNullGlobal(isNull),
-        JavaCode.global(value, e.dataType))
+    val initExpr = functions.map(f => f.initialValues)
+    bufVars = initExpr.map { exprs =>
+      exprs.map { e =>
+        val isNull = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "bufIsNull")
+        val value = ctx.addMutableState(CodeGenerator.javaType(e.dataType), "bufValue")
+        // The initial expression should not access any column
+        val ev = e.genCode(ctx)
+        val initVars = code"""
+           |$isNull = ${ev.isNull};
+           |$value = ${ev.value};
+         """.stripMargin
+        ExprCode(
+          ev.code + initVars,
+          JavaCode.isNullGlobal(isNull),
+          JavaCode.global(value, e.dataType))
+      }
     }
-    val initBufVar = evaluateVariables(bufVars)
+    val flatBufVars = bufVars.flatten
+    val initBufVar = evaluateVariables(flatBufVars)
 
     // generate variables for output
     val (resultVars, genResult) = if (modes.contains(Final) || modes.contains(Complete)) {
       // evaluate aggregate results
-      ctx.currentVars = bufVars
+      ctx.currentVars = flatBufVars
       val aggResults = bindReferences(
         functions.map(_.evaluateExpression),
         aggregateBufferAttributes).map(_.genCode(ctx))
@@ -218,7 +226,7 @@ case class HashAggregateExec(
        """.stripMargin)
     } else if (modes.contains(Partial) || modes.contains(PartialMerge)) {
       // output the aggregate buffer directly
-      (bufVars, "")
+      (flatBufVars, "")
     } else {
       // no aggregate function, the result should be literals
       val resultVars = resultExpressions.map(_.genCode(ctx))
@@ -228,38 +236,106 @@ case class HashAggregateExec(
     val doAgg = ctx.freshName("doAggregateWithoutKey")
     val doAggFuncName = ctx.addNewFunction(doAgg,
       s"""
-         | private void $doAgg() throws java.io.IOException {
-         |   // initialize aggregation buffer
-         |   $initBufVar
+         |private void $doAgg() throws java.io.IOException {
+         |  // initialize aggregation buffer
+         |  $initBufVar
          |
-         |   ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
-         | }
+         |  ${child.asInstanceOf[CodegenSupport].produce(ctx, this)}
+         |}
        """.stripMargin)
 
     val numOutput = metricTerm(ctx, "numOutputRows")
     val aggTime = metricTerm(ctx, "aggTime")
     val beforeAgg = ctx.freshName("beforeAgg")
     s"""
-       | while (!$initAgg) {
-       |   $initAgg = true;
-       |   long $beforeAgg = System.nanoTime();
-       |   $doAggFuncName();
-       |   $aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
+       |while (!$initAgg) {
+       |  $initAgg = true;
+       |  long $beforeAgg = System.nanoTime();
+       |  $doAggFuncName();
+       |  $aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
        |
-       |   // output the result
-       |   ${genResult.trim}
+       |  // output the result
+       |  ${genResult.trim}
        |
-       |   $numOutput.add(1);
-       |   ${consume(ctx, resultVars).trim}
-       | }
+       |  $numOutput.add(1);
+       |  ${consume(ctx, resultVars).trim}
+       |}
      """.stripMargin
   }
 
+  // Splits aggregate code into small functions because the most of JVM implementations
+  // can not compile too long functions. Returns None if we are not able to split the given code.
+  //
+  // Note: The difference from `CodeGenerator.splitExpressions` is that we define an individual
+  // function for each aggregation function (e.g., SUM and AVG). For example, in a query
+  // `SELECT SUM(a), AVG(a) FROM VALUES(1) t(a)`, we define two functions
+  // for `SUM(a)` and `AVG(a)`.
+  private def splitAggregateExpressions(
+      ctx: CodegenContext,
+      aggNames: Seq[String],
+      aggBufferUpdatingExprs: Seq[Seq[Expression]],
+      aggCodeBlocks: Seq[Block],
+      subExprs: Map[Expression, SubExprEliminationState]): Option[String] = {
+    val exprValsInSubExprs = subExprs.flatMap { case (_, s) => s.value :: s.isNull :: Nil }
+    if (exprValsInSubExprs.exists(_.isInstanceOf[SimpleExprValue])) {
+      // `SimpleExprValue`s cannot be used as an input variable for split functions, so
+      // we give up splitting functions if it exists in `subExprs`.
+      None
+    } else {
+      val inputVars = aggBufferUpdatingExprs.map { aggExprsForOneFunc =>
+        val inputVarsForOneFunc = aggExprsForOneFunc.map(
+          CodeGenerator.getLocalInputVariableValues(ctx, _, subExprs)).reduce(_ ++ _).toSeq
+        val paramLength = CodeGenerator.calculateParamLengthFromExprValues(inputVarsForOneFunc)
+
+        // Checks if a parameter length for the `aggExprsForOneFunc` does not go over the JVM limit
+        if (CodeGenerator.isValidParamLength(paramLength)) {
+          Some(inputVarsForOneFunc)
+        } else {
+          None
+        }
+      }
+
+      // Checks if all the aggregate code can be split into pieces.
+      // If the parameter length of at lease one `aggExprsForOneFunc` goes over the limit,
+      // we totally give up splitting aggregate code.
+      if (inputVars.forall(_.isDefined)) {
+        val splitCodes = inputVars.flatten.zipWithIndex.map { case (args, i) =>
+          val doAggFunc = ctx.freshName(s"doAggregate_${aggNames(i)}")
+          val argList = args.map { v =>
+            s"${CodeGenerator.typeName(v.javaType)} ${v.variableName}"
+          }.mkString(", ")
+          val doAggFuncName = ctx.addNewFunction(doAggFunc,
+            s"""
+               |private void $doAggFunc($argList) throws java.io.IOException {
+               |  ${aggCodeBlocks(i)}
+               |}
+             """.stripMargin)
+
+          val inputVariables = args.map(_.variableName).mkString(", ")
+          s"$doAggFuncName($inputVariables);"
+        }
+        Some(splitCodes.mkString("\n").trim)
+      } else {
+        val errMsg = "Failed to split aggregate code into small functions because the parameter " +
+          "length of at least one split function went over the JVM limit: " +
+          CodeGenerator.MAX_JVM_METHOD_PARAMS_LENGTH
+        if (Utils.isTesting) {
+          throw new IllegalStateException(errMsg)
+        } else {
+          logInfo(errMsg)
+          None
+        }
+      }
+    }
+  }
+
   private def doConsumeWithoutKeys(ctx: CodegenContext, input: Seq[ExprCode]): String = {
     // only have DeclarativeAggregate
     val functions = aggregateExpressions.map(_.aggregateFunction.asInstanceOf[DeclarativeAggregate])
     val inputAttrs = functions.flatMap(_.aggBufferAttributes) ++ child.output
-    val updateExpr = aggregateExpressions.flatMap { e =>
+    // To individually generate code for each aggregate function, an element in `updateExprs` holds
+    // all the expressions for the buffer of an aggregation function.
+    val updateExprs = aggregateExpressions.map { e =>
       e.mode match {
         case Partial | Complete =>
           e.aggregateFunction.asInstanceOf[DeclarativeAggregate].updateExpressions
@@ -267,28 +343,56 @@ case class HashAggregateExec(
           e.aggregateFunction.asInstanceOf[DeclarativeAggregate].mergeExpressions
       }
     }
-    ctx.currentVars = bufVars ++ input
-    val boundUpdateExpr = bindReferences(updateExpr, inputAttrs)
-    val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
+    ctx.currentVars = bufVars.flatten ++ input
+    val boundUpdateExprs = updateExprs.map { updateExprsForOneFunc =>
+      bindReferences(updateExprsForOneFunc, inputAttrs)
+    }
+    val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExprs.flatten)
     val effectiveCodes = subExprs.codes.mkString("\n")
-    val aggVals = ctx.withSubExprEliminationExprs(subExprs.states) {
-      boundUpdateExpr.map(_.genCode(ctx))
+    val bufferEvals = boundUpdateExprs.map { boundUpdateExprsForOneFunc =>
+      ctx.withSubExprEliminationExprs(subExprs.states) {
+        boundUpdateExprsForOneFunc.map(_.genCode(ctx))
+      }
     }
-    // aggregate buffer should be updated atomic
-    val updates = aggVals.zipWithIndex.map { case (ev, i) =>
-      s"""
-         | ${bufVars(i).isNull} = ${ev.isNull};
-         | ${bufVars(i).value} = ${ev.value};
+
+    val aggNames = functions.map(_.prettyName)
+    val aggCodeBlocks = bufferEvals.zipWithIndex.map { case (bufferEvalsForOneFunc, i) =>
+      val bufVarsForOneFunc = bufVars(i)
+      // All the update code for aggregation buffers should be placed in the end
+      // of each aggregation function code.
+      val updates = bufferEvalsForOneFunc.zip(bufVarsForOneFunc).map { case (ev, bufVar) =>
+        s"""
+           |${bufVar.isNull} = ${ev.isNull};
+           |${bufVar.value} = ${ev.value};
+         """.stripMargin
+      }
+      code"""
+         |// do aggregate for ${aggNames(i)}
+         |// evaluate aggregate function
+         |${evaluateVariables(bufferEvalsForOneFunc)}
+         |// update aggregation buffers
+         |${updates.mkString("\n").trim}
        """.stripMargin
     }
+
+    val codeToEvalAggFunc = if (conf.codegenSplitAggregateFunc &&
+        aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) {
+      val maybeSplitCode = splitAggregateExpressions(
+        ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states)
+
+      maybeSplitCode.getOrElse {
+        aggCodeBlocks.fold(EmptyBlock)(_ + _).code
+      }
+    } else {
+      aggCodeBlocks.fold(EmptyBlock)(_ + _).code
+    }
+
     s"""
-       | // do aggregate
-       | // common sub-expressions
-       | $effectiveCodes
-       | // evaluate aggregate function
-       | ${evaluateVariables(aggVals)}
-       | // update aggregation buffer
-       | ${updates.mkString("\n").trim}
+       |// do aggregate
+       |// common sub-expressions
+       |$effectiveCodes
+       |// evaluate aggregate functions and update aggregation buffers
+       |$codeToEvalAggFunc
      """.stripMargin
   }
 
@@ -384,10 +488,9 @@ case class HashAggregateExec(
 
       // Create a MutableProjection to merge the rows of same key together
       val mergeExpr = declFunctions.flatMap(_.mergeExpressions)
-      val mergeProjection = newMutableProjection(
+      val mergeProjection = MutableProjection.create(
         mergeExpr,
-        aggregateBufferAttributes ++ declFunctions.flatMap(_.inputAggBufferAttributes),
-        subexpressionEliminationEnabled)
+        aggregateBufferAttributes ++ declFunctions.flatMap(_.inputAggBufferAttributes))
       val joinedRow = new JoinedRow()
 
       var currentKey: UnsafeRow = null
@@ -473,12 +576,12 @@ case class HashAggregateExec(
       val evaluateNondeterministicResults =
         evaluateNondeterministicVariables(output, resultVars, resultExpressions)
       s"""
-       $evaluateKeyVars
-       $evaluateBufferVars
-       $evaluateAggResults
-       $evaluateNondeterministicResults
-       ${consume(ctx, resultVars)}
-       """
+         |$evaluateKeyVars
+         |$evaluateBufferVars
+         |$evaluateAggResults
+         |$evaluateNondeterministicResults
+         |${consume(ctx, resultVars)}
+       """.stripMargin
     } else if (modes.contains(Partial) || modes.contains(PartialMerge)) {
       // resultExpressions are Attributes of groupingExpressions and aggregateBufferAttributes.
       assert(resultExpressions.forall(_.isInstanceOf[Attribute]))
@@ -505,10 +608,10 @@ case class HashAggregateExec(
         resultExpressions,
         inputAttrs).map(_.genCode(ctx))
       s"""
-       $evaluateKeyVars
-       $evaluateResultBufferVars
-       ${consume(ctx, resultVars)}
-       """
+         |$evaluateKeyVars
+         |$evaluateResultBufferVars
+         |${consume(ctx, resultVars)}
+       """.stripMargin
     } else {
       // generate result based on grouping key
       ctx.INPUT_ROW = keyTerm
@@ -519,18 +622,18 @@ case class HashAggregateExec(
       val evaluateNondeterministicResults =
         evaluateNondeterministicVariables(output, resultVars, resultExpressions)
       s"""
-       $evaluateNondeterministicResults
-       ${consume(ctx, resultVars)}
-       """
+         |$evaluateNondeterministicResults
+         |${consume(ctx, resultVars)}
+       """.stripMargin
     }
     ctx.addNewFunction(funcName,
       s"""
-        private void $funcName(UnsafeRow $keyTerm, UnsafeRow $bufferTerm)
-            throws java.io.IOException {
-          $numOutput.add(1);
-          $body
-        }
-       """)
+         |private void $funcName(UnsafeRow $keyTerm, UnsafeRow $bufferTerm)
+         |    throws java.io.IOException {
+         |  $numOutput.add(1);
+         |  $body
+         |}
+       """.stripMargin)
   }
 
   /**
@@ -542,7 +645,8 @@ case class HashAggregateExec(
   private def checkIfFastHashMapSupported(ctx: CodegenContext): Boolean = {
     val isSupported =
       (groupingKeySchema ++ bufferSchema).forall(f => CodeGenerator.isPrimitiveType(f.dataType) ||
-        f.dataType.isInstanceOf[DecimalType] || f.dataType.isInstanceOf[StringType]) &&
+        f.dataType.isInstanceOf[DecimalType] || f.dataType.isInstanceOf[StringType] ||
+        f.dataType.isInstanceOf[CalendarIntervalType]) &&
         bufferSchema.nonEmpty && modes.forall(mode => mode == Partial || mode == PartialMerge)
 
     // For vectorized hash map, We do not support byte array based decimal type for aggregate values
@@ -554,7 +658,7 @@ case class HashAggregateExec(
     val isNotByteArrayDecimalType = bufferSchema.map(_.dataType).filter(_.isInstanceOf[DecimalType])
       .forall(!DecimalType.isByteArrayDecimalType(_))
 
-    isSupported  && isNotByteArrayDecimalType
+    isSupported && isNotByteArrayDecimalType
   }
 
   private def enableTwoLevelHashMap(ctx: CodegenContext): Unit = {
@@ -583,9 +687,9 @@ case class HashAggregateExec(
 
     val thisPlan = ctx.addReferenceObj("plan", this)
 
-    // Create a name for the iterator from the fast hash map.
-    val iterTermForFastHashMap = if (isFastHashMapEnabled) {
-      // Generates the fast hash map class and creates the fash hash map term.
+    // Create a name for the iterator from the fast hash map, and the code to create fast hash map.
+    val (iterTermForFastHashMap, createFastHashMap) = if (isFastHashMapEnabled) {
+      // Generates the fast hash map class and creates the fast hash map term.
       val fastHashMapClassName = ctx.freshName("FastHashMap")
       if (isVectorizedHashMapEnabled) {
         val generatedMap = new VectorizedHashMapGenerator(ctx, aggregateExpressions,
@@ -593,25 +697,30 @@ case class HashAggregateExec(
         ctx.addInnerClass(generatedMap)
 
         // Inline mutable state since not many aggregation operations in a task
-        fastHashMapTerm = ctx.addMutableState(fastHashMapClassName, "vectorizedHastHashMap",
-          v => s"$v = new $fastHashMapClassName();", forceInline = true)
-        ctx.addMutableState(s"java.util.Iterator<InternalRow>", "vectorizedFastHashMapIter",
+        fastHashMapTerm = ctx.addMutableState(
+          fastHashMapClassName, "vectorizedFastHashMap", forceInline = true)
+        val iter = ctx.addMutableState(
+          "java.util.Iterator<InternalRow>",
+          "vectorizedFastHashMapIter",
           forceInline = true)
+        val create = s"$fastHashMapTerm = new $fastHashMapClassName();"
+        (iter, create)
       } else {
         val generatedMap = new RowBasedHashMapGenerator(ctx, aggregateExpressions,
           fastHashMapClassName, groupingKeySchema, bufferSchema, bitMaxCapacity).generate()
         ctx.addInnerClass(generatedMap)
 
         // Inline mutable state since not many aggregation operations in a task
-        fastHashMapTerm = ctx.addMutableState(fastHashMapClassName, "fastHashMap",
-          v => s"$v = new $fastHashMapClassName(" +
-            s"$thisPlan.getTaskMemoryManager(), $thisPlan.getEmptyAggregationBuffer());",
-          forceInline = true)
-        ctx.addMutableState(
+        fastHashMapTerm = ctx.addMutableState(
+          fastHashMapClassName, "fastHashMap", forceInline = true)
+        val iter = ctx.addMutableState(
           "org.apache.spark.unsafe.KVIterator<UnsafeRow, UnsafeRow>",
           "fastHashMapIter", forceInline = true)
+        val create = s"$fastHashMapTerm = new $fastHashMapClassName(" +
+          s"$thisPlan.getTaskMemoryManager(), $thisPlan.getEmptyAggregationBuffer());"
+        (iter, create)
       }
-    }
+    } else ("", "")
 
     // Create a name for the iterator from the regular hash map.
     // Inline mutable state since not many aggregation operations in a task
@@ -619,8 +728,7 @@ case class HashAggregateExec(
       "mapIter", forceInline = true)
     // create hashMap
     val hashMapClassName = classOf[UnsafeFixedWidthAggregationMap].getName
-    hashMapTerm = ctx.addMutableState(hashMapClassName, "hashMap",
-      v => s"$v = $thisPlan.createHashMap();", forceInline = true)
+    hashMapTerm = ctx.addMutableState(hashMapClassName, "hashMap", forceInline = true)
     sorterTerm = ctx.addMutableState(classOf[UnsafeKVExternalSorter].getName, "sorter",
       forceInline = true)
 
@@ -721,17 +829,18 @@ case class HashAggregateExec(
     val aggTime = metricTerm(ctx, "aggTime")
     val beforeAgg = ctx.freshName("beforeAgg")
     s"""
-     if (!$initAgg) {
-       $initAgg = true;
-       long $beforeAgg = System.nanoTime();
-       $doAggFuncName();
-       $aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
-     }
-
-     // output the result
-     $outputFromFastHashMap
-     $outputFromRegularHashMap
-     """
+       |if (!$initAgg) {
+       |  $initAgg = true;
+       |  $createFastHashMap
+       |  $hashMapTerm = $thisPlan.createHashMap();
+       |  long $beforeAgg = System.nanoTime();
+       |  $doAggFuncName();
+       |  $aggTime.add((System.nanoTime() - $beforeAgg) / $NANOS_PER_MILLIS);
+       |}
+       |// output the result
+       |$outputFromFastHashMap
+       |$outputFromRegularHashMap
+     """.stripMargin
   }
 
   private def doConsumeWithKeys(ctx: CodegenContext, input: Seq[ExprCode]): String = {
@@ -745,8 +854,10 @@ case class HashAggregateExec(
     val unsafeRowBuffer = ctx.freshName("unsafeRowAggBuffer")
     val fastRowBuffer = ctx.freshName("fastAggBuffer")
 
-    // only have DeclarativeAggregate
-    val updateExpr = aggregateExpressions.flatMap { e =>
+    // To individually generate code for each aggregate function, an element in `updateExprs` holds
+    // all the expressions for the buffer of an aggregation function.
+    val updateExprs = aggregateExpressions.map { e =>
+      // only have DeclarativeAggregate
       e.mode match {
         case Partial | Complete =>
           e.aggregateFunction.asInstanceOf[DeclarativeAggregate].updateExpressions
@@ -824,25 +935,70 @@ case class HashAggregateExec(
     // generating input columns, we use `currentVars`.
     ctx.currentVars = new Array[ExprCode](aggregateBufferAttributes.length) ++ input
 
+    val aggNames = aggregateExpressions.map(_.aggregateFunction.prettyName)
+    // Computes start offsets for each aggregation function code
+    // in the underlying buffer row.
+    val bufferStartOffsets = {
+      val offsets = mutable.ArrayBuffer[Int]()
+      var curOffset = 0
+      updateExprs.foreach { exprsForOneFunc =>
+        offsets += curOffset
+        curOffset += exprsForOneFunc.length
+      }
+      offsets.toArray
+    }
+
     val updateRowInRegularHashMap: String = {
       ctx.INPUT_ROW = unsafeRowBuffer
-      val boundUpdateExpr = bindReferences(updateExpr, inputAttr)
-      val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
+      val boundUpdateExprs = updateExprs.map { updateExprsForOneFunc =>
+        bindReferences(updateExprsForOneFunc, inputAttr)
+      }
+      val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExprs.flatten)
       val effectiveCodes = subExprs.codes.mkString("\n")
-      val unsafeRowBufferEvals = ctx.withSubExprEliminationExprs(subExprs.states) {
-        boundUpdateExpr.map(_.genCode(ctx))
+      val unsafeRowBufferEvals = boundUpdateExprs.map { boundUpdateExprsForOneFunc =>
+        ctx.withSubExprEliminationExprs(subExprs.states) {
+          boundUpdateExprsForOneFunc.map(_.genCode(ctx))
+        }
       }
-      val updateUnsafeRowBuffer = unsafeRowBufferEvals.zipWithIndex.map { case (ev, i) =>
-        val dt = updateExpr(i).dataType
-        CodeGenerator.updateColumn(unsafeRowBuffer, dt, i, ev, updateExpr(i).nullable)
+
+      val aggCodeBlocks = updateExprs.indices.map { i =>
+        val rowBufferEvalsForOneFunc = unsafeRowBufferEvals(i)
+        val boundUpdateExprsForOneFunc = boundUpdateExprs(i)
+        val bufferOffset = bufferStartOffsets(i)
+
+        // All the update code for aggregation buffers should be placed in the end
+        // of each aggregation function code.
+        val updateRowBuffers = rowBufferEvalsForOneFunc.zipWithIndex.map { case (ev, j) =>
+          val updateExpr = boundUpdateExprsForOneFunc(j)
+          val dt = updateExpr.dataType
+          val nullable = updateExpr.nullable
+          CodeGenerator.updateColumn(unsafeRowBuffer, dt, bufferOffset + j, ev, nullable)
+        }
+        code"""
+           |// evaluate aggregate function for ${aggNames(i)}
+           |${evaluateVariables(rowBufferEvalsForOneFunc)}
+           |// update unsafe row buffer
+           |${updateRowBuffers.mkString("\n").trim}
+         """.stripMargin
+      }
+
+      val codeToEvalAggFunc = if (conf.codegenSplitAggregateFunc &&
+          aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) {
+        val maybeSplitCode = splitAggregateExpressions(
+          ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states)
+
+        maybeSplitCode.getOrElse {
+          aggCodeBlocks.fold(EmptyBlock)(_ + _).code
+        }
+      } else {
+        aggCodeBlocks.fold(EmptyBlock)(_ + _).code
       }
+
       s"""
          |// common sub-expressions
          |$effectiveCodes
-         |// evaluate aggregate function
-         |${evaluateVariables(unsafeRowBufferEvals)}
-         |// update unsafe row buffer
-         |${updateUnsafeRowBuffer.mkString("\n").trim}
+         |// evaluate aggregate functions and update aggregation buffers
+         |$codeToEvalAggFunc
        """.stripMargin
     }
 
@@ -850,16 +1006,48 @@ case class HashAggregateExec(
       if (isFastHashMapEnabled) {
         if (isVectorizedHashMapEnabled) {
           ctx.INPUT_ROW = fastRowBuffer
-          val boundUpdateExpr = bindReferences(updateExpr, inputAttr)
-          val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExpr)
+          val boundUpdateExprs = updateExprs.map { updateExprsForOneFunc =>
+            bindReferences(updateExprsForOneFunc, inputAttr)
+          }
+          val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExprs.flatten)
           val effectiveCodes = subExprs.codes.mkString("\n")
-          val fastRowEvals = ctx.withSubExprEliminationExprs(subExprs.states) {
-            boundUpdateExpr.map(_.genCode(ctx))
+          val fastRowEvals = boundUpdateExprs.map { boundUpdateExprsForOneFunc =>
+            ctx.withSubExprEliminationExprs(subExprs.states) {
+              boundUpdateExprsForOneFunc.map(_.genCode(ctx))
+            }
+          }
+
+          val aggCodeBlocks = fastRowEvals.zipWithIndex.map { case (fastRowEvalsForOneFunc, i) =>
+            val boundUpdateExprsForOneFunc = boundUpdateExprs(i)
+            val bufferOffset = bufferStartOffsets(i)
+            // All the update code for aggregation buffers should be placed in the end
+            // of each aggregation function code.
+            val updateRowBuffer = fastRowEvalsForOneFunc.zipWithIndex.map { case (ev, j) =>
+              val updateExpr = boundUpdateExprsForOneFunc(j)
+              val dt = updateExpr.dataType
+              val nullable = updateExpr.nullable
+              CodeGenerator.updateColumn(fastRowBuffer, dt, bufferOffset + j, ev, nullable,
+                isVectorized = true)
+            }
+            code"""
+               |// evaluate aggregate function for ${aggNames(i)}
+               |${evaluateVariables(fastRowEvalsForOneFunc)}
+               |// update fast row
+               |${updateRowBuffer.mkString("\n").trim}
+             """.stripMargin
           }
-          val updateFastRow = fastRowEvals.zipWithIndex.map { case (ev, i) =>
-            val dt = updateExpr(i).dataType
-            CodeGenerator.updateColumn(
-              fastRowBuffer, dt, i, ev, updateExpr(i).nullable, isVectorized = true)
+
+
+          val codeToEvalAggFunc = if (conf.codegenSplitAggregateFunc &&
+              aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) {
+            val maybeSplitCode = splitAggregateExpressions(
+              ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states)
+
+            maybeSplitCode.getOrElse {
+              aggCodeBlocks.fold(EmptyBlock)(_ + _).code
+            }
+          } else {
+            aggCodeBlocks.fold(EmptyBlock)(_ + _).code
           }
 
           // If vectorized fast hash map is on, we first generate code to update row
@@ -869,10 +1057,8 @@ case class HashAggregateExec(
              |if ($fastRowBuffer != null) {
              |  // common sub-expressions
              |  $effectiveCodes
-             |  // evaluate aggregate function
-             |  ${evaluateVariables(fastRowEvals)}
-             |  // update fast row
-             |  ${updateFastRow.mkString("\n").trim}
+             |  // evaluate aggregate functions and update aggregation buffers
+             |  $codeToEvalAggFunc
              |} else {
              |  $updateRowInRegularHashMap
              |}
@@ -913,14 +1099,11 @@ case class HashAggregateExec(
     // continue to do in-memory aggregation and spilling until all the rows had been processed.
     // Finally, sort the spilled aggregate buffers by key, and merge them together for same key.
     s"""
-     $declareRowBuffer
-
-     $findOrInsertHashMap
-
-     $incCounter
-
-     $updateRowInHashMap
-     """
+       |$declareRowBuffer
+       |$findOrInsertHashMap
+       |$incCounter
+       |$updateRowInHashMap
+     """.stripMargin
   }
 
   override def verboseString(maxFields: Int): String = toString(verbose = true, maxFields)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
index b88ddba8e48d3..75651500954cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
@@ -22,7 +22,7 @@ import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.expressions.codegen.{BaseOrdering, GenerateOrdering}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateOrdering
 import org.apache.spark.sql.execution.UnsafeKVExternalSorter
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
@@ -61,9 +61,9 @@ class ObjectAggregationIterator(
   // Hacking the aggregation mode to call AggregateFunction.merge to merge two aggregation buffers
   private val mergeAggregationBuffers: (InternalRow, InternalRow) => Unit = {
     val newExpressions = aggregateExpressions.map {
-      case agg @ AggregateExpression(_, Partial, _, _) =>
+      case agg @ AggregateExpression(_, Partial, _, _, _) =>
         agg.copy(mode = PartialMerge)
-      case agg @ AggregateExpression(_, Complete, _, _) =>
+      case agg @ AggregateExpression(_, Complete, _, _, _) =>
         agg.copy(mode = Final)
       case other => other
     }
@@ -158,7 +158,7 @@ class ObjectAggregationIterator(
         val buffer: InternalRow = getAggregationBufferByKey(hashMap, groupingKey)
         processRow(buffer, newInput)
 
-        // The the hash map gets too large, makes a sorted spill and clear the map.
+        // The hash map gets too large, makes a sorted spill and clear the map.
         if (hashMap.size >= fallbackCountThreshold) {
           logInfo(
             s"Aggregation hash map size ${hashMap.size} reaches threshold " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
index 151da241144be..3fb58eb2cc8ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
@@ -67,7 +67,7 @@ case class ObjectHashAggregateExec(
     initialInputBufferOffset: Int,
     resultExpressions: Seq[NamedExpression],
     child: SparkPlan)
-  extends UnaryExecNode {
+  extends BaseAggregateExec with AliasAwareOutputPartitioning {
 
   private[this] val aggregateBufferAttributes = {
     aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
@@ -97,7 +97,7 @@ case class ObjectHashAggregateExec(
     }
   }
 
-  override def outputPartitioning: Partitioning = child.outputPartitioning
+  override protected def outputExpressions: Seq[NamedExpression] = resultExpressions
 
   protected override def doExecute(): RDD[InternalRow] = attachTree(this, "execute") {
     val numOutputRows = longMetric("numOutputRows")
@@ -122,7 +122,7 @@ case class ObjectHashAggregateExec(
             initialInputBufferOffset,
             resultExpressions,
             (expressions, inputSchema) =>
-              newMutableProjection(expressions, inputSchema, subexpressionEliminationEnabled),
+              MutableProjection.create(expressions, inputSchema),
             child.output,
             iter,
             fallbackCountThreshold,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
index 56cf78d8b7fc1..44d19ad60d49f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
@@ -127,7 +127,8 @@ class RowBasedHashMapGenerator(
         case t: DecimalType =>
           s"agg_rowWriter.write(${ordinal}, ${key.name}, ${t.precision}, ${t.scale})"
         case t: DataType =>
-          if (!t.isInstanceOf[StringType] && !CodeGenerator.isPrimitiveType(t)) {
+          if (!t.isInstanceOf[StringType] && !t.isInstanceOf[CalendarIntervalType] &&
+            !CodeGenerator.isPrimitiveType(t)) {
             throw new IllegalArgumentException(s"cannot generate code for unsupported type: $t")
           }
           s"agg_rowWriter.write(${ordinal}, ${key.name})"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
index 7ab6ecc08a7bc..77ed469016fa3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.{AliasAwareOutputPartitioning, SparkPlan}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 
 /**
@@ -38,7 +38,7 @@ case class SortAggregateExec(
     initialInputBufferOffset: Int,
     resultExpressions: Seq[NamedExpression],
     child: SparkPlan)
-  extends UnaryExecNode {
+  extends BaseAggregateExec with AliasAwareOutputPartitioning {
 
   private[this] val aggregateBufferAttributes = {
     aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
@@ -66,7 +66,7 @@ case class SortAggregateExec(
     groupingExpressions.map(SortOrder(_, Ascending)) :: Nil
   }
 
-  override def outputPartitioning: Partitioning = child.outputPartitioning
+  override protected def outputExpressions: Seq[NamedExpression] = resultExpressions
 
   override def outputOrdering: Seq[SortOrder] = {
     groupingExpressions.map(SortOrder(_, Ascending))
@@ -93,7 +93,7 @@ case class SortAggregateExec(
           initialInputBufferOffset,
           resultExpressions,
           (expressions, inputSchema) =>
-            newMutableProjection(expressions, inputSchema, subexpressionEliminationEnabled),
+            MutableProjection.create(expressions, inputSchema),
           numOutputRows)
         if (!hasInput && groupingExpressions.isEmpty) {
           // There is no input and there is no grouping expressions.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
index 6dc64657ebf1f..99358fbf4e94f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
@@ -249,9 +249,9 @@ class TungstenAggregationIterator(
     // Basically the value of the KVIterator returned by externalSorter
     // will be just aggregation buffer, so we rewrite the aggregateExpressions to reflect it.
     val newExpressions = aggregateExpressions.map {
-      case agg @ AggregateExpression(_, Partial, _, _) =>
+      case agg @ AggregateExpression(_, Partial, _, _, _) =>
         agg.copy(mode = PartialMerge)
-      case agg @ AggregateExpression(_, Complete, _, _) =>
+      case agg @ AggregateExpression(_, Complete, _, _, _) =>
         agg.copy(mode = Final)
       case other => other
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
index 100486fa9850f..dfae5c07e0373 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
@@ -17,13 +17,17 @@
 
 package org.apache.spark.sql.execution.aggregate
 
+import scala.reflect.runtime.universe.TypeTag
+
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{Column, Row}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _}
-import org.apache.spark.sql.catalyst.expressions.aggregate.ImperativeAggregate
-import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
-import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
+import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateMutableProjection, GenerateSafeProjection}
+import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction}
 import org.apache.spark.sql.types._
 
 /**
@@ -450,3 +454,63 @@ case class ScalaUDAF(
 
   override def nodeName: String = udaf.getClass.getSimpleName
 }
+
+case class ScalaAggregator[IN, BUF, OUT](
+    children: Seq[Expression],
+    agg: Aggregator[IN, BUF, OUT],
+    inputEncoderNR: ExpressionEncoder[IN],
+    nullable: Boolean = true,
+    isDeterministic: Boolean = true,
+    mutableAggBufferOffset: Int = 0,
+    inputAggBufferOffset: Int = 0)
+  extends TypedImperativeAggregate[BUF]
+  with NonSQLExpression
+  with UserDefinedExpression
+  with ImplicitCastInputTypes
+  with Logging {
+
+  private[this] lazy val inputEncoder = inputEncoderNR.resolveAndBind()
+  private[this] lazy val bufferEncoder =
+    agg.bufferEncoder.asInstanceOf[ExpressionEncoder[BUF]].resolveAndBind()
+  private[this] lazy val outputEncoder = agg.outputEncoder.asInstanceOf[ExpressionEncoder[OUT]]
+
+  def dataType: DataType = outputEncoder.objSerializer.dataType
+
+  def inputTypes: Seq[DataType] = inputEncoder.schema.map(_.dataType)
+
+  override lazy val deterministic: Boolean = isDeterministic
+
+  def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ScalaAggregator[IN, BUF, OUT] =
+    copy(mutableAggBufferOffset = newMutableAggBufferOffset)
+
+  def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ScalaAggregator[IN, BUF, OUT] =
+    copy(inputAggBufferOffset = newInputAggBufferOffset)
+
+  private[this] lazy val inputProjection = UnsafeProjection.create(children)
+
+  def createAggregationBuffer(): BUF = agg.zero
+
+  def update(buffer: BUF, input: InternalRow): BUF =
+    agg.reduce(buffer, inputEncoder.fromRow(inputProjection(input)))
+
+  def merge(buffer: BUF, input: BUF): BUF = agg.merge(buffer, input)
+
+  def eval(buffer: BUF): Any = {
+    val row = outputEncoder.toRow(agg.finish(buffer))
+    if (outputEncoder.isSerializedAsStruct) row else row.get(0, dataType)
+  }
+
+  private[this] lazy val bufferRow = new UnsafeRow(bufferEncoder.namedExpressions.length)
+
+  def serialize(agg: BUF): Array[Byte] =
+    bufferEncoder.toRow(agg).asInstanceOf[UnsafeRow].getBytes()
+
+  def deserialize(storageFormat: Array[Byte]): BUF = {
+    bufferRow.pointTo(storageFormat, storageFormat.length)
+    bufferEncoder.fromRow(bufferRow)
+  }
+
+  override def toString: String = s"""${nodeName}(${children.mkString(",")})"""
+
+  override def nodeName: String = agg.getClass.getSimpleName
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
index 5c3c735f0346c..614d6c2846bfa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
@@ -71,7 +71,7 @@ class DetectAmbiguousSelfJoin(conf: SQLConf) extends Rule[LogicalPlan] {
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    if (!conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN)) return plan
+    if (!conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) return plan
 
     // We always remove the special metadata from `AttributeReference` at the end of this rule, so
     // Dataset column reference only exists in the root node via Dataset transformations like
@@ -149,7 +149,7 @@ class DetectAmbiguousSelfJoin(conf: SQLConf) extends Rule[LogicalPlan] {
           "to figure out which one. Please alias the Datasets with different names via " +
           "`Dataset.as` before joining them, and specify the column using qualified name, e.g. " +
           """`df.as("a").join(df.as("b"), $"a.id" > $"b.id")`. You can also set """ +
-          s"${SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key} to false to disable this check.")
+          s"${SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key} to false to disable this check.")
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
index 1a6f4acb63521..d1076d9d0156c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
@@ -26,7 +26,7 @@ import org.apache.arrow.flatbuf.MessageHeader
 import org.apache.arrow.memory.BufferAllocator
 import org.apache.arrow.vector._
 import org.apache.arrow.vector.ipc.{ArrowStreamWriter, ReadChannel, WriteChannel}
-import org.apache.arrow.vector.ipc.message.{ArrowRecordBatch, MessageSerializer}
+import org.apache.arrow.vector.ipc.message.{ArrowRecordBatch, IpcOption, MessageSerializer}
 
 import org.apache.spark.TaskContext
 import org.apache.spark.api.java.JavaRDD
@@ -64,7 +64,7 @@ private[sql] class ArrowBatchStreamWriter(
    * End the Arrow stream, does not close output stream.
    */
   def end(): Unit = {
-    ArrowStreamWriter.writeEndOfStream(writeChannel)
+    ArrowStreamWriter.writeEndOfStream(writeChannel, new IpcOption)
   }
 }
 
@@ -251,8 +251,8 @@ private[sql] object ArrowConverters {
         // Only care about RecordBatch messages, skip Schema and unsupported Dictionary messages
         if (msgMetadata.getMessage.headerType() == MessageHeader.RecordBatch) {
 
-          // Buffer backed output large enough to hold the complete serialized message
-          val bbout = new ByteBufferOutputStream(4 + msgMetadata.getMessageLength + bodyLength)
+          // Buffer backed output large enough to hold 8-byte length + complete serialized message
+          val bbout = new ByteBufferOutputStream(8 + msgMetadata.getMessageLength + bodyLength)
 
           // Write message metadata to ByteBuffer output stream
           MessageSerializer.writeMessageBuffer(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
index 6147d6fefd52a..501e1c460f9c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -62,6 +62,11 @@ object ArrowWriter {
       case (ArrayType(_, _), vector: ListVector) =>
         val elementVector = createFieldWriter(vector.getDataVector())
         new ArrayWriter(vector, elementVector)
+      case (MapType(_, _, _), vector: MapVector) =>
+        val entryWriter = createFieldWriter(vector.getDataVector).asInstanceOf[StructWriter]
+        val keyWriter = createFieldWriter(entryWriter.valueVector.getChild(MapVector.KEY_NAME))
+        val valueWriter = createFieldWriter(entryWriter.valueVector.getChild(MapVector.VALUE_NAME))
+        new MapWriter(vector, keyWriter, valueWriter)
       case (StructType(_), vector: StructVector) =>
         val children = (0 until vector.size()).map { ordinal =>
           createFieldWriter(vector.getChildByOrdinal(ordinal))
@@ -343,3 +348,38 @@ private[arrow] class StructWriter(
     children.foreach(_.reset())
   }
 }
+
+private[arrow] class MapWriter(
+    val valueVector: MapVector,
+    val keyWriter: ArrowFieldWriter,
+    val valueWriter: ArrowFieldWriter) extends ArrowFieldWriter {
+
+  override def setNull(): Unit = {}
+
+  override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
+    val map = input.getMap(ordinal)
+    valueVector.startNewValue(count)
+    val keys = map.keyArray()
+    val values = map.valueArray()
+    var i = 0
+    while (i <  map.numElements()) {
+      keyWriter.write(keys, i)
+      valueWriter.write(values, i)
+      i += 1
+    }
+
+    valueVector.endValue(count, map.numElements())
+  }
+
+  override def finish(): Unit = {
+    super.finish()
+    keyWriter.finish()
+    valueWriter.finish()
+  }
+
+  override def reset(): Unit = {
+    super.reset()
+    keyWriter.reset()
+    valueWriter.reset()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index b072a7f5d914c..c35c48496e1c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
 
 import java.util.concurrent.TimeUnit._
 
+import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 
@@ -28,16 +29,16 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{LongType, StructType}
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.util.{ThreadUtils, Utils}
 import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler}
 
 /** Physical plan for Project. */
 case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
-  extends UnaryExecNode with CodegenSupport {
+  extends UnaryExecNode with CodegenSupport with AliasAwareOutputPartitioning {
 
   override def output: Seq[Attribute] = projectList.map(_.toAttribute)
 
@@ -80,7 +81,7 @@ case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
 
   override def outputOrdering: Seq[SortOrder] = child.outputOrdering
 
-  override def outputPartitioning: Partitioning = child.outputPartitioning
+  override protected def outputExpressions: Seq[NamedExpression] = projectList
 
   override def verboseStringWithOperatorId(): String = {
     s"""
@@ -91,7 +92,6 @@ case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
   }
 }
 
-
 /** Physical plan for Filter. */
 case class FilterExec(condition: Expression, child: SparkPlan)
   extends UnaryExecNode with CodegenSupport with PredicateHelper {
@@ -171,6 +171,7 @@ case class FilterExec(condition: Expression, child: SparkPlan)
     // This is very perf sensitive.
     // TODO: revisit this. We can consider reordering predicates as well.
     val generatedIsNotNullChecks = new Array[Boolean](notNullPreds.length)
+    val extraIsNotNullAttrs = mutable.Set[Attribute]()
     val generated = otherPreds.map { c =>
       val nullChecks = c.references.map { r =>
         val idx = notNullPreds.indexWhere { n => n.asInstanceOf[IsNotNull].child.semanticEquals(r)}
@@ -178,6 +179,9 @@ case class FilterExec(condition: Expression, child: SparkPlan)
           generatedIsNotNullChecks(idx) = true
           // Use the child's output. The nullability is what the child produced.
           genPredicate(notNullPreds(idx), input, child.output)
+        } else if (notNullAttributes.contains(r.exprId) && !extraIsNotNullAttrs.contains(r)) {
+          extraIsNotNullAttrs += r
+          genPredicate(IsNotNull(r), input, child.output)
         } else {
           ""
         }
@@ -222,7 +226,7 @@ case class FilterExec(condition: Expression, child: SparkPlan)
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
     child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
-      val predicate = newPredicate(condition, child.output)
+      val predicate = Predicate.create(condition, child.output)
       predicate.initialize(0)
       iter.filter { row =>
         val r = predicate.eval(row)
@@ -294,7 +298,9 @@ case class SampleExec(
     child.asInstanceOf[CodegenSupport].produce(ctx, this)
   }
 
-  override def needCopyResult: Boolean = withReplacement
+  override def needCopyResult: Boolean = {
+    child.asInstanceOf[CodegenSupport].needCopyResult || withReplacement
+  }
 
   override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
     val numOutput = metricTerm(ctx, "numOutputRows")
@@ -743,7 +749,9 @@ case class SubqueryExec(name: String, child: SparkPlan)
   private lazy val relationFuture: Future[Array[InternalRow]] = {
     // relationFuture is used in "doExecute". Therefore we can get the execution id correctly here.
     val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
-    Future {
+    SQLExecution.withThreadLocalCaptured[Array[InternalRow]](
+      sqlContext.sparkSession,
+      SubqueryExec.executionContext) {
       // This will run in another thread. Set the execution id so that we can connect these jobs
       // with the correct execution.
       SQLExecution.withExecutionId(sqlContext.sparkSession, executionId) {
@@ -758,7 +766,7 @@ case class SubqueryExec(name: String, child: SparkPlan)
         SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
         rows
       }
-    }(SubqueryExec.executionContext)
+    }
   }
 
   protected override def doCanonicalize(): SparkPlan = {
@@ -782,7 +790,8 @@ case class SubqueryExec(name: String, child: SparkPlan)
 
 object SubqueryExec {
   private[execution] val executionContext = ExecutionContext.fromExecutorService(
-    ThreadUtils.newDaemonCachedThreadPool("subquery", 16))
+    ThreadUtils.newDaemonCachedThreadPool("subquery",
+      SQLConf.get.getConf(StaticSQLConf.SUBQUERY_MAX_THREAD_THRESHOLD)))
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index 85c36b7da9498..06f411dec158d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{UnsafeArrayData, UnsafeMapData
 import org.apache.spark.sql.execution.columnar.compression.CompressibleColumnAccessor
 import org.apache.spark.sql.execution.vectorized.WritableColumnVector
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
  * An `Iterator` like trait used to extract values from columnar byte buffer. When a value is
@@ -36,7 +37,7 @@ import org.apache.spark.sql.types._
 private[columnar] trait ColumnAccessor {
   initialize()
 
-  protected def initialize()
+  protected def initialize(): Unit
 
   def hasNext: Boolean
 
@@ -50,7 +51,7 @@ private[columnar] abstract class BasicColumnAccessor[JvmType](
     protected val columnType: ColumnType[JvmType])
   extends ColumnAccessor {
 
-  protected def initialize() {}
+  protected def initialize(): Unit = {}
 
   override def hasNext: Boolean = buffer.hasRemaining
 
@@ -104,6 +105,10 @@ private[columnar] class BinaryColumnAccessor(buffer: ByteBuffer)
   extends BasicColumnAccessor[Array[Byte]](buffer, BINARY)
   with NullableColumnAccessor
 
+private[columnar] class IntervalColumnAccessor(buffer: ByteBuffer, dataType: CalendarIntervalType)
+  extends BasicColumnAccessor[CalendarInterval](buffer, CALENDAR_INTERVAL)
+  with NullableColumnAccessor
+
 private[columnar] class CompactDecimalColumnAccessor(buffer: ByteBuffer, dataType: DecimalType)
   extends NativeColumnAccessor(buffer, COMPACT_DECIMAL(dataType))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala
index d30655e0c4a20..3d94681a2fb31 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala
@@ -125,6 +125,9 @@ class StringColumnBuilder extends NativeColumnBuilder(new StringColumnStats, STR
 private[columnar]
 class BinaryColumnBuilder extends ComplexColumnBuilder(new BinaryColumnStats, BINARY)
 
+private[columnar]
+class IntervalColumnBuilder extends ComplexColumnBuilder(new IntervalColumnStats, CALENDAR_INTERVAL)
+
 private[columnar] class CompactDecimalColumnBuilder(dataType: DecimalType)
   extends NativeColumnBuilder(new DecimalColumnStats(dataType), COMPACT_DECIMAL(dataType))
 
@@ -176,6 +179,7 @@ private[columnar] object ColumnBuilder {
       case DoubleType => new DoubleColumnBuilder
       case StringType => new StringColumnBuilder
       case BinaryType => new BinaryColumnBuilder
+      case CalendarIntervalType => new IntervalColumnBuilder
       case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
         new CompactDecimalColumnBuilder(dt)
       case dt: DecimalType => new DecimalColumnBuilder(dt)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
index bc7e73ae1ba87..20ecc57c49e75 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.columnar
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 private[columnar] class ColumnStatisticsSchema(a: Attribute) extends Serializable {
   val upperBound = AttributeReference(a.name + ".upperBound", a.dataType, nullable = true)()
@@ -295,6 +295,20 @@ private[columnar] final class BinaryColumnStats extends ColumnStats {
     Array[Any](null, null, nullCount, count, sizeInBytes)
 }
 
+private[columnar] final class IntervalColumnStats extends ColumnStats {
+  override def gatherStats(row: InternalRow, ordinal: Int): Unit = {
+    if (!row.isNullAt(ordinal)) {
+      sizeInBytes += CALENDAR_INTERVAL.actualSize(row, ordinal)
+      count += 1
+    } else {
+      gatherNullStats
+    }
+  }
+
+  override def collectedStatistics: Array[Any] =
+    Array[Any](null, null, nullCount, count, sizeInBytes)
+}
+
 private[columnar] final class DecimalColumnStats(precision: Int, scale: Int) extends ColumnStats {
   def this(dt: DecimalType) = this(dt.precision, dt.scale)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index 542a10fc175c0..d3c8e9251cefd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 
 /**
@@ -221,7 +221,8 @@ private[columnar] object INT extends NativeColumnType(IntegerType, 4) {
   override def getField(row: InternalRow, ordinal: Int): Int = row.getInt(ordinal)
 
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setInt(toOrdinal, from.getInt(fromOrdinal))
   }
 }
@@ -249,7 +250,8 @@ private[columnar] object LONG extends NativeColumnType(LongType, 8) {
 
   override def getField(row: InternalRow, ordinal: Int): Long = row.getLong(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setLong(toOrdinal, from.getLong(fromOrdinal))
   }
 }
@@ -277,7 +279,8 @@ private[columnar] object FLOAT extends NativeColumnType(FloatType, 4) {
 
   override def getField(row: InternalRow, ordinal: Int): Float = row.getFloat(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setFloat(toOrdinal, from.getFloat(fromOrdinal))
   }
 }
@@ -305,7 +308,8 @@ private[columnar] object DOUBLE extends NativeColumnType(DoubleType, 8) {
 
   override def getField(row: InternalRow, ordinal: Int): Double = row.getDouble(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setDouble(toOrdinal, from.getDouble(fromOrdinal))
   }
 }
@@ -331,7 +335,8 @@ private[columnar] object BOOLEAN extends NativeColumnType(BooleanType, 1) {
 
   override def getField(row: InternalRow, ordinal: Int): Boolean = row.getBoolean(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setBoolean(toOrdinal, from.getBoolean(fromOrdinal))
   }
 }
@@ -359,7 +364,8 @@ private[columnar] object BYTE extends NativeColumnType(ByteType, 1) {
 
   override def getField(row: InternalRow, ordinal: Int): Byte = row.getByte(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setByte(toOrdinal, from.getByte(fromOrdinal))
   }
 }
@@ -387,7 +393,8 @@ private[columnar] object SHORT extends NativeColumnType(ShortType, 2) {
 
   override def getField(row: InternalRow, ordinal: Int): Short = row.getShort(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setShort(toOrdinal, from.getShort(fromOrdinal))
   }
 }
@@ -452,7 +459,8 @@ private[columnar] object STRING
     row.getUTF8String(ordinal)
   }
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     setField(to, toOrdinal, getField(from, fromOrdinal))
   }
 
@@ -496,7 +504,8 @@ private[columnar] case class COMPACT_DECIMAL(precision: Int, scale: Int)
     row.setDecimal(ordinal, value, precision)
   }
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     setField(to, toOrdinal, getField(from, fromOrdinal))
   }
 }
@@ -696,6 +705,44 @@ private[columnar] case class MAP(dataType: MapType)
   override def clone(v: UnsafeMapData): UnsafeMapData = v.copy()
 }
 
+private[columnar] object CALENDAR_INTERVAL extends ColumnType[CalendarInterval] {
+
+  override def dataType: DataType = CalendarIntervalType
+
+  override def defaultSize: Int = 16
+
+  override def getField(row: InternalRow, ordinal: Int): CalendarInterval = row.getInterval(ordinal)
+
+  override def setField(row: InternalRow, ordinal: Int, value: CalendarInterval): Unit = {
+    row.setInterval(ordinal, value)
+  }
+
+  override def extract(buffer: ByteBuffer): CalendarInterval = {
+    val months = ByteBufferHelper.getInt(buffer)
+    val days = ByteBufferHelper.getInt(buffer)
+    val microseconds = ByteBufferHelper.getLong(buffer)
+    new CalendarInterval(months, days, microseconds)
+  }
+
+  // copy the bytes from ByteBuffer to UnsafeRow
+  override def extract(buffer: ByteBuffer, row: InternalRow, ordinal: Int): Unit = {
+    if (row.isInstanceOf[MutableUnsafeRow]) {
+      val cursor = buffer.position()
+      buffer.position(cursor + defaultSize)
+      row.asInstanceOf[MutableUnsafeRow].writer.write(ordinal, buffer.array(),
+        buffer.arrayOffset() + cursor, defaultSize)
+    } else {
+      setField(row, ordinal, extract(buffer))
+    }
+  }
+
+  override def append(v: CalendarInterval, buffer: ByteBuffer): Unit = {
+    ByteBufferHelper.putInt(buffer, v.months)
+    ByteBufferHelper.putInt(buffer, v.days)
+    ByteBufferHelper.putLong(buffer, v.microseconds)
+  }
+}
+
 private[columnar] object ColumnType {
   @tailrec
   def apply(dataType: DataType): ColumnType[_] = {
@@ -710,6 +757,7 @@ private[columnar] object ColumnType {
       case DoubleType => DOUBLE
       case StringType => STRING
       case BinaryType => BINARY
+      case i: CalendarIntervalType => CALENDAR_INTERVAL
       case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS => COMPACT_DECIMAL(dt)
       case dt: DecimalType => LARGE_DECIMAL(dt)
       case arr: ArrayType => ARRAY(arr)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index 2d699e8a9d088..bd2d06665a910 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeFormatter, CodeGenerator, UnsafeRowWriter}
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
  * An Iterator to walk through the InternalRows from a CachedBatch
@@ -51,6 +52,10 @@ class MutableUnsafeRow(val writer: UnsafeRowWriter) extends BaseGenericInternalR
   // the writer will be used directly to avoid creating wrapper objects
   override def setDecimal(i: Int, v: Decimal, precision: Int): Unit =
     throw new UnsupportedOperationException
+
+  override def setInterval(i: Int, value: CalendarInterval): Unit =
+    throw new UnsupportedOperationException
+
   override def update(i: Int, v: Any): Unit = throw new UnsupportedOperationException
 
   // all other methods inherited from GenericMutableRow are not need
@@ -81,6 +86,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
         case DoubleType => classOf[DoubleColumnAccessor].getName
         case StringType => classOf[StringColumnAccessor].getName
         case BinaryType => classOf[BinaryColumnAccessor].getName
+        case CalendarIntervalType => classOf[IntervalColumnAccessor].getName
         case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
           classOf[CompactDecimalColumnAccessor].getName
         case dt: DecimalType => classOf[DecimalColumnAccessor].getName
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index 8d13cfb93d270..f03c2586048bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -310,7 +310,7 @@ case class InMemoryTableScanExec(
     val buffers = relation.cacheBuilder.cachedColumnBuffers
 
     buffers.mapPartitionsWithIndexInternal { (index, cachedBatchIterator) =>
-      val partitionFilter = newPredicate(
+      val partitionFilter = Predicate.create(
         partitionFilters.reduceOption(And).getOrElse(Literal(true)),
         schema)
       partitionFilter.initialize(index)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
index 18fefa0a6f19f..33b29bde93ee5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
@@ -106,11 +106,12 @@ case class AnalyzePartitionCommand(
 
     // Update the metastore if newly computed statistics are different from those
     // recorded in the metastore.
-    val newPartitions = partitions.flatMap { p =>
-      val newTotalSize = CommandUtils.calculateLocationSize(
-        sessionState, tableMeta.identifier, p.storage.locationUri)
+
+    val sizes = CommandUtils.calculateMultipleLocationSizes(sparkSession, tableMeta.identifier,
+      partitions.map(_.storage.locationUri))
+    val newPartitions = partitions.zipWithIndex.flatMap { case (p, idx) =>
       val newRowCount = rowCounts.get(p.spec)
-      val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount)
+      val newStats = CommandUtils.compareAndGetNewStats(p.stats, sizes(idx), newRowCount)
       newStats.map(_ => p.copy(stats = newStats))
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index b644e6dc471d6..81157ca0bfe9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -31,12 +31,23 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatisti
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.{DataSourceUtils, InMemoryFileIndex}
 import org.apache.spark.sql.internal.{SessionState, SQLConf}
 import org.apache.spark.sql.types._
 
+/**
+ * For the purpose of calculating total directory sizes, use this filter to
+ * ignore some irrelevant files.
+ * @param stagingDir hive staging dir
+ */
+class PathFilterIgnoreNonData(stagingDir: String) extends PathFilter with Serializable {
+  override def accept(path: Path): Boolean = {
+    val fileName = path.getName
+    !fileName.startsWith(stagingDir) && DataSourceUtils.isDataFile(fileName)
+  }
+}
 
 object CommandUtils extends Logging {
 
@@ -50,34 +61,31 @@ object CommandUtils extends Logging {
       catalog.alterTableStats(table.identifier, Some(newStats))
     } else if (table.stats.nonEmpty) {
       catalog.alterTableStats(table.identifier, None)
+    } else {
+      // In other cases, we still need to invalidate the table relation cache.
+      catalog.refreshTable(table.identifier)
     }
   }
 
   def calculateTotalSize(spark: SparkSession, catalogTable: CatalogTable): BigInt = {
     val sessionState = spark.sessionState
-    if (catalogTable.partitionColumnNames.isEmpty) {
-      calculateLocationSize(sessionState, catalogTable.identifier, catalogTable.storage.locationUri)
+    val startTime = System.nanoTime()
+    val totalSize = if (catalogTable.partitionColumnNames.isEmpty) {
+      calculateSingleLocationSize(sessionState, catalogTable.identifier,
+        catalogTable.storage.locationUri)
     } else {
       // Calculate table size as a sum of the visible partitions. See SPARK-21079
       val partitions = sessionState.catalog.listPartitions(catalogTable.identifier)
-      if (spark.sessionState.conf.parallelFileListingInStatsComputation) {
-        val paths = partitions.map(x => new Path(x.storage.locationUri.get))
-        val stagingDir = sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging")
-        val pathFilter = new PathFilter with Serializable {
-          override def accept(path: Path): Boolean = isDataPath(path, stagingDir)
-        }
-        val fileStatusSeq = InMemoryFileIndex.bulkListLeafFiles(
-          paths, sessionState.newHadoopConf(), pathFilter, spark, areRootPaths = true)
-        fileStatusSeq.flatMap(_._2.map(_.getLen)).sum
-      } else {
-        partitions.map { p =>
-          calculateLocationSize(sessionState, catalogTable.identifier, p.storage.locationUri)
-        }.sum
-      }
+      logInfo(s"Starting to calculate sizes for ${partitions.length} partitions.")
+      val paths = partitions.map(_.storage.locationUri)
+      calculateMultipleLocationSizes(spark, catalogTable.identifier, paths).sum
     }
+    logInfo(s"It took ${(System.nanoTime() - startTime) / (1000 * 1000)} ms to calculate" +
+      s" the total size for table ${catalogTable.identifier}.")
+    totalSize
   }
 
-  def calculateLocationSize(
+  def calculateSingleLocationSize(
       sessionState: SessionState,
       identifier: TableIdentifier,
       locationUri: Option[URI]): Long = {
@@ -110,7 +118,6 @@ object CommandUtils extends Logging {
     }
 
     val startTime = System.nanoTime()
-    logInfo(s"Starting to calculate the total file size under path $locationUri.")
     val size = locationUri.map { p =>
       val path = new Path(p)
       try {
@@ -125,11 +132,44 @@ object CommandUtils extends Logging {
       }
     }.getOrElse(0L)
     val durationInMs = (System.nanoTime() - startTime) / (1000 * 1000)
-    logInfo(s"It took $durationInMs ms to calculate the total file size under path $locationUri.")
+    logDebug(s"It took $durationInMs ms to calculate the total file size under path $locationUri.")
 
     size
   }
 
+  def calculateMultipleLocationSizes(
+      sparkSession: SparkSession,
+      tid: TableIdentifier,
+      paths: Seq[Option[URI]]): Seq[Long] = {
+    if (sparkSession.sessionState.conf.parallelFileListingInStatsComputation) {
+      calculateMultipleLocationSizesInParallel(sparkSession, paths.map(_.map(new Path(_))))
+    } else {
+      paths.map(p => calculateSingleLocationSize(sparkSession.sessionState, tid, p))
+    }
+  }
+
+  /**
+   * Launch a Job to list all leaf files in `paths` and compute the total size
+   * for each path.
+   * @param sparkSession the [[SparkSession]]
+   * @param paths the Seq of [[Option[Path]]]s
+   * @return a Seq of same size as `paths` where i-th element is total size of `paths(i)` or 0
+   *         if `paths(i)` is None
+   */
+  def calculateMultipleLocationSizesInParallel(
+      sparkSession: SparkSession,
+      paths: Seq[Option[Path]]): Seq[Long] = {
+    val stagingDir = sparkSession.sessionState.conf
+      .getConfString("hive.exec.stagingdir", ".hive-staging")
+    val filter = new PathFilterIgnoreNonData(stagingDir)
+    val sizes = InMemoryFileIndex.bulkListLeafFiles(paths.flatten,
+      sparkSession.sessionState.newHadoopConf(), filter, sparkSession, areRootPaths = true).map {
+      case (_, files) => files.map(_.getLen).sum
+    }
+    // the size is 0 where paths(i) is not defined and sizes(i) where it is defined
+    paths.zipWithIndex.map { case (p, idx) => p.map(_ => sizes(idx)).getOrElse(0L) }
+  }
+
   def compareAndGetNewStats(
       oldStats: Option[CatalogStatistics],
       newTotalSize: BigInt,
@@ -214,7 +254,9 @@ object CommandUtils extends Logging {
 
       val namedExprs = attrsToGenHistogram.map { attr =>
         val aggFunc =
-          new ApproximatePercentile(attr, Literal(percentiles), Literal(conf.percentileAccuracy))
+          new ApproximatePercentile(attr,
+            Literal(new GenericArrayData(percentiles), ArrayType(DoubleType, false)),
+            Literal(conf.percentileAccuracy))
         val expr = aggFunc.toAggregateExpression()
         Alias(expr, expr.toString)()
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index f7d4fa4c4ffcb..18fd2a5ac2330 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command
 
-import java.util.UUID
+import scala.collection.JavaConverters._
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SparkSession}
@@ -26,12 +26,12 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
-import org.apache.spark.sql.execution.{LeafExecNode, QueryExecution, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.execution.debug._
+import org.apache.spark.sql.connector.ExternalCommandRunner
+import org.apache.spark.sql.execution.{ExplainMode, LeafExecNode, SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.metric.SQLMetric
-import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata}
-import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.execution.streaming.IncrementalExecution
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A logical command that is executed for its side-effects.  `RunnableCommand`s are
@@ -82,6 +82,10 @@ case class ExecutedCommandExec(cmd: RunnableCommand) extends LeafExecNode {
 
   override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray
 
+  override def executeTail(limit: Int): Array[InternalRow] = {
+    sideEffectResult.takeRight(limit).toArray
+  }
+
   protected override def doExecute(): RDD[InternalRow] = {
     sqlContext.sparkContext.parallelize(sideEffectResult, 1)
   }
@@ -119,6 +123,10 @@ case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan)
 
   override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray
 
+  override def executeTail(limit: Int): Array[InternalRow] = {
+    sideEffectResult.takeRight(limit).toArray
+  }
+
   protected override def doExecute(): RDD[InternalRow] = {
     sqlContext.sparkContext.parallelize(sideEffectResult, 1)
   }
@@ -131,20 +139,15 @@ case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan)
  * (but do NOT actually execute it).
  *
  * {{{
- *   EXPLAIN (EXTENDED | CODEGEN) SELECT * FROM ...
+ *   EXPLAIN (EXTENDED | CODEGEN | COST | FORMATTED) SELECT * FROM ...
  * }}}
  *
  * @param logicalPlan plan to explain
- * @param extended whether to do extended explain or not
- * @param codegen whether to output generated code from whole-stage codegen or not
- * @param cost whether to show cost information for operators.
+ * @param mode explain mode
  */
 case class ExplainCommand(
     logicalPlan: LogicalPlan,
-    extended: Boolean = false,
-    codegen: Boolean = false,
-    cost: Boolean = false,
-    formatted: Boolean = false)
+    mode: ExplainMode)
   extends RunnableCommand {
 
   override val output: Seq[Attribute] =
@@ -152,44 +155,13 @@ case class ExplainCommand(
 
   // Run through the optimizer to generate the physical plan.
   override def run(sparkSession: SparkSession): Seq[Row] = try {
-    val queryExecution = ExplainCommandUtil.explainedQueryExecution(sparkSession, logicalPlan,
-      sparkSession.sessionState.executePlan(logicalPlan))
-    val outputString =
-      if (codegen) {
-        codegenString(queryExecution.executedPlan)
-      } else if (extended) {
-        queryExecution.toString
-      } else if (cost) {
-        queryExecution.stringWithStats
-      } else if (formatted) {
-        queryExecution.simpleString(formatted = true)
-      } else {
-        queryExecution.simpleString
-      }
+    val outputString = sparkSession.sessionState.executePlan(logicalPlan).explainString(mode)
     Seq(Row(outputString))
   } catch { case cause: TreeNodeException[_] =>
     ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_))
   }
 }
 
-object ExplainCommandUtil {
-  // Returns `QueryExecution` which is used to explain a logical plan.
-  def explainedQueryExecution(
-      sparkSession: SparkSession,
-      logicalPlan: LogicalPlan,
-      queryExecution: => QueryExecution): QueryExecution = {
-    if (logicalPlan.isStreaming) {
-      // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the
-      // output mode does not matter since there is no `Sink`.
-      new IncrementalExecution(
-        sparkSession, logicalPlan, OutputMode.Append(), "<unknown>",
-        UUID.randomUUID, UUID.randomUUID, 0, OffsetSeqMetadata(0, 0))
-    } else {
-      queryExecution
-    }
-  }
-}
-
 /** An explain command for users to see how a streaming batch is executed. */
 case class StreamingExplainCommand(
     queryExecution: IncrementalExecution,
@@ -211,3 +183,21 @@ case class StreamingExplainCommand(
     ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_))
   }
 }
+
+/**
+ * Used to execute an arbitrary string command inside an external execution engine
+ * rather than Spark. Please check [[ExternalCommandRunner]] for more details.
+ */
+case class ExternalCommandExecutor(
+    runner: ExternalCommandRunner,
+    command: String,
+    options: Map[String, String]) extends RunnableCommand {
+
+  override def output: Seq[Attribute] =
+    Seq(AttributeReference("command_output", StringType)())
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val output = runner.executeCommand(command, new CaseInsensitiveStringMap(options.asJava))
+    output.map(Row(_))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
deleted file mode 100644
index 470c736da98b7..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.command
-
-import org.apache.spark.sql.{Row, SparkSession}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
-import org.apache.spark.sql.types.StringType
-
-
-/**
- * A command for users to list the databases/schemas.
- * If a databasePattern is supplied then the databases that only match the
- * pattern would be listed.
- * The syntax of using this command in SQL is:
- * {{{
- *   SHOW (DATABASES|SCHEMAS) [LIKE 'identifier_with_wildcards'];
- * }}}
- */
-case class ShowDatabasesCommand(databasePattern: Option[String]) extends RunnableCommand {
-
-  // The result of SHOW DATABASES has one column called 'databaseName'
-  override val output: Seq[Attribute] = {
-    AttributeReference("databaseName", StringType, nullable = false)() :: Nil
-  }
-
-  override def run(sparkSession: SparkSession): Seq[Row] = {
-    val catalog = sparkSession.sessionState.catalog
-    val databases =
-      databasePattern.map(catalog.listDatabases).getOrElse(catalog.listDatabases())
-    databases.map { d => Row(d) }
-  }
-}
-
-
-/**
- * Command for setting the current database.
- * {{{
- *   USE database_name;
- * }}}
- */
-case class SetDatabaseCommand(databaseName: String) extends RunnableCommand {
-
-  override def run(sparkSession: SparkSession): Seq[Row] = {
-    sparkSession.sessionState.catalog.setCurrentDatabase(databaseName)
-    Seq.empty[Row]
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index ee5d37cebf2f3..47b213fc2d83b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.TimeUnit._
 
 import scala.collection.{GenMap, GenSeq}
 import scala.collection.parallel.ForkJoinTaskSupport
+import scala.collection.parallel.immutable.ParVector
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
@@ -36,10 +37,12 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, TableCatalog}
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningUtils}
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter
-import org.apache.spark.sql.internal.HiveSerDe
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{SerializableConfiguration, ThreadUtils}
 
@@ -132,6 +135,27 @@ case class AlterDatabasePropertiesCommand(
   }
 }
 
+/**
+ * A command for users to set new location path for a database
+ * If the database does not exist, an error message will be issued to indicate the database
+ * does not exist.
+ * The syntax of using this command in SQL is:
+ * {{{
+ *    ALTER (DATABASE|SCHEMA) database_name SET LOCATION path
+ * }}}
+ */
+case class AlterDatabaseSetLocationCommand(databaseName: String, location: String)
+  extends RunnableCommand {
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val catalog = sparkSession.sessionState.catalog
+    val oldDb = catalog.getDatabaseMetadata(databaseName)
+    catalog.alterDatabase(oldDb.copy(locationUri = CatalogUtils.stringToURI(location)))
+
+    Seq.empty[Row]
+  }
+}
+
 /**
  * A command for users to show the name of the database, its comment (if one has been set), and its
  * root location on the filesystem. When extended is true, it also shows the database's properties
@@ -150,19 +174,22 @@ case class DescribeDatabaseCommand(
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val dbMetadata: CatalogDatabase =
       sparkSession.sessionState.catalog.getDatabaseMetadata(databaseName)
+    val allDbProperties = dbMetadata.properties
     val result =
       Row("Database Name", dbMetadata.name) ::
-        Row("Description", dbMetadata.description) ::
-        Row("Location", CatalogUtils.URIToString(dbMetadata.locationUri)) :: Nil
+        Row("Comment", dbMetadata.description) ::
+        Row("Location", CatalogUtils.URIToString(dbMetadata.locationUri))::
+        Row("Owner", allDbProperties.getOrElse(PROP_OWNER, "")) :: Nil
 
     if (extended) {
-      val properties =
-        if (dbMetadata.properties.isEmpty) {
+      val properties = allDbProperties -- CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES
+      val propertiesStr =
+        if (properties.isEmpty) {
           ""
         } else {
-          dbMetadata.properties.toSeq.mkString("(", ", ", ")")
+          properties.toSeq.mkString("(", ", ", ")")
         }
-      result :+ Row("Properties", properties)
+      result :+ Row("Properties", propertiesStr)
     } else {
       result
     }
@@ -249,7 +276,7 @@ case class AlterTableSetPropertiesCommand(
     // direct property.
     val newTable = table.copy(
       properties = table.properties ++ properties,
-      comment = properties.get("comment").orElse(table.comment))
+      comment = properties.get(TableCatalog.PROP_COMMENT).orElse(table.comment))
     catalog.alterTable(newTable)
     Seq.empty[Row]
   }
@@ -278,14 +305,14 @@ case class AlterTableUnsetPropertiesCommand(
     DDLUtils.verifyAlterTableType(catalog, table, isView)
     if (!ifExists) {
       propKeys.foreach { k =>
-        if (!table.properties.contains(k) && k != "comment") {
+        if (!table.properties.contains(k) && k != TableCatalog.PROP_COMMENT) {
           throw new AnalysisException(
             s"Attempted to unset non-existent property '$k' in table '${table.identifier}'")
         }
       }
     }
     // If comment is in the table property, we reset it to None
-    val tableComment = if (propKeys.contains("comment")) None else table.comment
+    val tableComment = if (propKeys.contains(TableCatalog.PROP_COMMENT)) None else table.comment
     val newProperties = table.properties.filter { case (k, _) => !propKeys.contains(k) }
     val newTable = table.copy(properties = newProperties, comment = tableComment)
     catalog.alterTable(newTable)
@@ -448,14 +475,19 @@ case class AlterTableAddPartitionCommand(
       CatalogTablePartition(normalizedSpec, table.storage.copy(
         locationUri = location.map(CatalogUtils.stringToURI)))
     }
-    catalog.createPartitions(table.identifier, parts, ignoreIfExists = ifNotExists)
+
+    // Hive metastore may not have enough memory to handle millions of partitions in single RPC.
+    // Also the request to metastore times out when adding lot of partitions in one shot.
+    // we should split them into smaller batches
+    val batchSize = conf.getConf(SQLConf.ADD_PARTITION_BATCH_SIZE)
+    parts.toIterator.grouped(batchSize).foreach { batch =>
+      catalog.createPartitions(table.identifier, batch, ignoreIfExists = ifNotExists)
+    }
 
     if (table.stats.nonEmpty) {
       if (sparkSession.sessionState.conf.autoSizeUpdateEnabled) {
-        val addedSize = parts.map { part =>
-          CommandUtils.calculateLocationSize(sparkSession.sessionState, table.identifier,
-            part.storage.locationUri)
-        }.sum
+        val addedSize = CommandUtils.calculateMultipleLocationSizes(sparkSession, table.identifier,
+          parts.map(_.storage.locationUri)).sum
         if (addedSize > 0) {
           val newStats = CatalogStatistics(sizeInBytes = table.stats.get.sizeInBytes + addedSize)
           catalog.alterTableStats(table.identifier, Some(newStats))
@@ -663,7 +695,7 @@ case class AlterTableRecoverPartitionsCommand(
     val statusPar: GenSeq[FileStatus] =
       if (partitionNames.length > 1 && statuses.length > threshold || partitionNames.length > 2) {
         // parallelize the list of partitions here, then we can have better parallelism later.
-        val parArray = statuses.par
+        val parArray = new ParVector(statuses.toVector)
         parArray.tasksupport = evalTaskSupport
         parArray
       } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index d3b2491cd7056..6fdc7f4a58195 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, FunctionResource}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionInfo}
+import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
 
 
@@ -222,6 +223,21 @@ case class ShowFunctionsCommand(
           case (f, "USER") if showUserFunctions => f.unquotedString
           case (f, "SYSTEM") if showSystemFunctions => f.unquotedString
         }
-    functionNames.sorted.map(Row(_))
+    // Hard code "<>", "!=", "between", and "case" for now as there is no corresponding functions.
+    // "<>", "!=", "between", and "case" is SystemFunctions, only show when showSystemFunctions=true
+    if (showSystemFunctions) {
+      (functionNames ++
+        StringUtils.filterPattern(FunctionsCommand.virtualOperators, pattern.getOrElse("*")))
+        .sorted.map(Row(_))
+    } else {
+      functionNames.sorted.map(Row(_))
+    }
+
   }
 }
+
+object FunctionsCommand {
+  // operators that do not have corresponding functions.
+  // They should be handled `DescribeFunctionCommand`, `ShowFunctionsCommand`
+  val virtualOperators = Seq("!=", "<>", "between", "case")
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala
index 8fee02a8f6c82..1119e5cb1d288 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/resources.scala
@@ -47,7 +47,8 @@ case class AddJarCommand(path: String) extends RunnableCommand {
  */
 case class AddFileCommand(path: String) extends RunnableCommand {
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    sparkSession.sparkContext.addFile(path)
+    val recursive = sparkSession.sessionState.conf.addDirectoryRecursiveEnabled
+    sparkSession.sparkContext.addFile(path, recursive)
     Seq.empty[Row]
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 9377cb0174673..61500b773cd7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -17,26 +17,26 @@
 
 package org.apache.spark.sql.execution.command
 
-import java.io.File
 import java.net.{URI, URISyntaxException}
-import java.nio.file.FileSystems
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.util.Try
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.{FileContext, FsConstants, Path}
+import org.apache.hadoop.fs.permission.{AclEntry, AclEntryScope, AclEntryType, FsAction, FsPermission}
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, UnresolvedAttribute, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.DescribeTableSchema
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier}
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap}
 import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
@@ -45,7 +45,7 @@ import org.apache.spark.sql.execution.datasources.v2.csv.CSVDataSourceV2
 import org.apache.spark.sql.execution.datasources.v2.json.JsonDataSourceV2
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -55,44 +55,76 @@ import org.apache.spark.sql.util.SchemaUtils
  * are identical to the ones defined in the source table.
  *
  * The CatalogTable attributes copied from the source table are storage(inputFormat, outputFormat,
- * serde, compressed, properties), schema, provider, partitionColumnNames, bucketSpec.
+ * serde, compressed, properties), schema, provider, partitionColumnNames, bucketSpec by default.
+ *
+ * Use "CREATE TABLE t1 LIKE t2 USING file_format" to specify new provider for t1.
+ * For Hive compatibility, use "CREATE TABLE t1 LIKE t2 STORED AS hiveFormat"
+ * to specify new file storage format (inputFormat, outputFormat, serde) for t1.
  *
  * The syntax of using this command in SQL is:
  * {{{
  *   CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
- *   LIKE [other_db_name.]existing_table_name [locationSpec]
+ *   LIKE [other_db_name.]existing_table_name
+ *   [USING provider |
+ *    [
+ *     [ROW FORMAT row_format]
+ *     [STORED AS file_format] [WITH SERDEPROPERTIES (...)]
+ *    ]
+ *   ]
+ *   [locationSpec]
+ *   [TBLPROPERTIES (property_name=property_value, ...)]
  * }}}
  */
 case class CreateTableLikeCommand(
     targetTable: TableIdentifier,
     sourceTable: TableIdentifier,
-    location: Option[String],
+    fileFormat: CatalogStorageFormat,
+    provider: Option[String],
+    properties: Map[String, String] = Map.empty,
     ifNotExists: Boolean) extends RunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
     val sourceTableDesc = catalog.getTempViewOrPermanentTableMetadata(sourceTable)
-
-    val newProvider = if (sourceTableDesc.tableType == CatalogTableType.VIEW) {
+    val newProvider = if (provider.isDefined) {
+      if (!DDLUtils.isHiveTable(provider)) {
+        // check the validation of provider input, invalid provider will throw
+        // AnalysisException, ClassNotFoundException, or NoClassDefFoundError
+        DataSource.lookupDataSource(provider.get, sparkSession.sessionState.conf)
+      }
+      provider
+    } else if (sourceTableDesc.tableType == CatalogTableType.VIEW) {
       Some(sparkSession.sessionState.conf.defaultDataSourceName)
+    } else if (fileFormat.inputFormat.isDefined) {
+      Some(DDLUtils.HIVE_PROVIDER)
     } else {
       sourceTableDesc.provider
     }
 
+    val newStorage = if (fileFormat.inputFormat.isDefined) {
+      fileFormat
+    } else {
+      sourceTableDesc.storage.copy(locationUri = fileFormat.locationUri)
+    }
+
     // If the location is specified, we create an external table internally.
     // Otherwise create a managed table.
-    val tblType = if (location.isEmpty) CatalogTableType.MANAGED else CatalogTableType.EXTERNAL
+    val tblType = if (newStorage.locationUri.isEmpty) {
+      CatalogTableType.MANAGED
+    } else {
+      CatalogTableType.EXTERNAL
+    }
 
     val newTableDesc =
       CatalogTable(
         identifier = targetTable,
         tableType = tblType,
-        storage = sourceTableDesc.storage.copy(
-          locationUri = location.map(CatalogUtils.stringToURI(_))),
+        storage = newStorage,
         schema = sourceTableDesc.schema,
         provider = newProvider,
         partitionColumnNames = sourceTableDesc.partitionColumnNames,
-        bucketSpec = sourceTableDesc.bucketSpec)
+        bucketSpec = sourceTableDesc.bucketSpec,
+        properties = properties)
 
     catalog.createTable(newTableDesc, ifNotExists)
     Seq.empty[Row]
@@ -278,6 +310,13 @@ case class LoadDataCommand(
     val catalog = sparkSession.sessionState.catalog
     val targetTable = catalog.getTableMetadata(table)
     val tableIdentwithDB = targetTable.identifier.quotedString
+    val normalizedSpec = partition.map { spec =>
+      PartitioningUtils.normalizePartitionSpec(
+        spec,
+        targetTable.partitionColumnNames,
+        tableIdentwithDB,
+        sparkSession.sessionState.conf.resolver)
+    }
 
     if (targetTable.tableType == CatalogTableType.VIEW) {
       throw new AnalysisException(s"Target table in LOAD DATA cannot be a view: $tableIdentwithDB")
@@ -297,13 +336,6 @@ case class LoadDataCommand(
           s"do not match number of partitioned columns in table " +
           s"(${targetTable.partitionColumnNames.size})")
       }
-      partition.get.keys.foreach { colName =>
-        if (!targetTable.partitionColumnNames.contains(colName)) {
-          throw new AnalysisException(s"LOAD DATA target table $tableIdentwithDB is partitioned, " +
-            s"but the specified partition spec refers to a column that is not partitioned: " +
-            s"'$colName'")
-        }
-      }
     } else {
       if (partition.nonEmpty) {
         throw new AnalysisException(s"LOAD DATA target table $tableIdentwithDB is not " +
@@ -353,7 +385,7 @@ case class LoadDataCommand(
       catalog.loadPartition(
         targetTable.identifier,
         loadPath.toString,
-        partition.get,
+        normalizedSpec.get,
         isOverwrite,
         inheritTableSpecs = true,
         isSrcLocal = isLocal)
@@ -464,13 +496,74 @@ case class TruncateTableCommand(
         partLocations
       }
     val hadoopConf = spark.sessionState.newHadoopConf()
+    val ignorePermissionAcl = SQLConf.get.truncateTableIgnorePermissionAcl
     locations.foreach { location =>
       if (location.isDefined) {
         val path = new Path(location.get)
         try {
           val fs = path.getFileSystem(hadoopConf)
+
+          // Not all fs impl. support these APIs.
+          var optPermission: Option[FsPermission] = None
+          var optAcls: Option[java.util.List[AclEntry]] = None
+          if (!ignorePermissionAcl) {
+            val fileStatus = fs.getFileStatus(path)
+            try {
+              optPermission = Some(fileStatus.getPermission())
+            } catch {
+              case NonFatal(_) => // do nothing
+            }
+
+            try {
+              optAcls = Some(fs.getAclStatus(path).getEntries)
+            } catch {
+              case NonFatal(_) => // do nothing
+            }
+          }
+
           fs.delete(path, true)
+
+          // We should keep original permission/acl of the path.
+          // For owner/group, only super-user can set it, for example on HDFS. Because
+          // current user can delete the path, we assume the user/group is correct or not an issue.
           fs.mkdirs(path)
+          if (!ignorePermissionAcl) {
+            optPermission.foreach { permission =>
+              try {
+                fs.setPermission(path, permission)
+              } catch {
+                case NonFatal(e) =>
+                  throw new SecurityException(
+                    s"Failed to set original permission $permission back to " +
+                      s"the created path: $path. Exception: ${e.getMessage}")
+              }
+            }
+            optAcls.foreach { acls =>
+              val aclEntries = acls.asScala.filter(_.getName != null).asJava
+
+              // If the path doesn't have default ACLs, `setAcl` API will throw an error
+              // as it expects user/group/other permissions must be in ACL entries.
+              // So we need to add tradition user/group/other permission
+              // in the form of ACL.
+              optPermission.map { permission =>
+                aclEntries.add(newAclEntry(AclEntryScope.ACCESS,
+                  AclEntryType.USER, permission.getUserAction()))
+                aclEntries.add(newAclEntry(AclEntryScope.ACCESS,
+                  AclEntryType.GROUP, permission.getGroupAction()))
+                aclEntries.add(newAclEntry(AclEntryScope.ACCESS,
+                  AclEntryType.OTHER, permission.getOtherAction()))
+              }
+
+              try {
+                fs.setAcl(path, aclEntries)
+              } catch {
+                case NonFatal(e) =>
+                  throw new SecurityException(
+                    s"Failed to set original ACL $aclEntries back to " +
+                      s"the created path: $path. Exception: ${e.getMessage}")
+              }
+            }
+          }
         } catch {
           case NonFatal(e) =>
             throw new AnalysisException(
@@ -497,6 +590,16 @@ case class TruncateTableCommand(
     }
     Seq.empty[Row]
   }
+
+  private def newAclEntry(
+      scope: AclEntryScope,
+      aclType: AclEntryType,
+      permission: FsAction): AclEntry = {
+    new AclEntry.Builder()
+      .setScope(scope)
+      .setType(aclType)
+      .setPermission(permission).build()
+  }
 }
 
 abstract class DescribeCommandBase extends RunnableCommand {
@@ -690,7 +793,8 @@ case class DescribeColumnCommand(
     }
 
     val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table)
-    val colStats = catalogTable.stats.map(_.colStats).getOrElse(Map.empty)
+    val colStatsMap = catalogTable.stats.map(_.colStats).getOrElse(Map.empty)
+    val colStats = if (conf.caseSensitiveAnalysis) colStatsMap else CaseInsensitiveMap(colStatsMap)
     val cs = colStats.get(field.name)
 
     val comment = if (field.metadata.contains("comment")) {
@@ -822,22 +926,15 @@ case class ShowTablePropertiesCommand(table: TableIdentifier, propertyKey: Optio
   }
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    val catalog = sparkSession.sessionState.catalog
-
-    if (catalog.isTemporaryTable(table)) {
-      Seq.empty[Row]
-    } else {
-      val catalogTable = sparkSession.sessionState.catalog.getTableMetadata(table)
-
-      propertyKey match {
-        case Some(p) =>
-          val propValue = catalogTable
-            .properties
-            .getOrElse(p, s"Table ${catalogTable.qualifiedName} does not have property: $p")
-          Seq(Row(propValue))
-        case None =>
-          catalogTable.properties.map(p => Row(p._1, p._2)).toSeq
-      }
+    val catalogTable = sparkSession.sessionState.catalog.getTableMetadata(table)
+    propertyKey match {
+      case Some(p) =>
+        val propValue = catalogTable
+          .properties
+          .getOrElse(p, s"Table ${catalogTable.qualifiedName} does not have property: $p")
+        Seq(Row(propValue))
+      case None =>
+        catalogTable.properties.map(p => Row(p._1, p._2)).toSeq
     }
   }
 }
@@ -859,12 +956,8 @@ case class ShowColumnsCommand(
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val resolver = sparkSession.sessionState.conf.resolver
     val lookupTable = databaseName match {
       case None => tableName
-      case Some(db) if tableName.database.exists(!resolver(_, db)) =>
-        throw new AnalysisException(
-          s"SHOW COLUMNS with conflicting databases: '$db' != '${tableName.database.get}'")
       case Some(db) => TableIdentifier(tableName.identifier, Some(db))
     }
     val table = catalog.getTempViewOrPermanentTableMetadata(lookupTable)
@@ -935,7 +1028,57 @@ case class ShowPartitionsCommand(
   }
 }
 
-case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableCommand {
+/**
+ * Provides common utilities between `ShowCreateTableCommand` and `ShowCreateTableAsSparkCommand`.
+ */
+trait ShowCreateTableCommandBase {
+
+  protected val table: TableIdentifier
+
+  protected def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = {
+    if (metadata.tableType == EXTERNAL) {
+      metadata.storage.locationUri.foreach { location =>
+        builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n"
+      }
+    }
+  }
+
+  protected def showTableComment(metadata: CatalogTable, builder: StringBuilder): Unit = {
+    metadata
+      .comment
+      .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n")
+      .foreach(builder.append)
+  }
+
+  protected def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
+    if (metadata.properties.nonEmpty) {
+      val props = metadata.properties.map { case (key, value) =>
+        s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
+      }
+
+      builder ++= "TBLPROPERTIES "
+      builder ++= concatByMultiLines(props)
+    }
+  }
+
+
+  protected def concatByMultiLines(iter: Iterable[String]): String = {
+    iter.mkString("(\n  ", ",\n  ", ")\n")
+  }
+}
+
+/**
+ * A command that shows the Spark DDL syntax that can be used to create a given table.
+ * For Hive serde table, this command will generate Spark DDL that can be used to
+ * create corresponding Spark table.
+ *
+ * The syntax of using this command in SQL is:
+ * {{{
+ *   SHOW CREATE TABLE [db_name.]table_name
+ * }}}
+ */
+case class ShowCreateTableCommand(table: TableIdentifier)
+    extends RunnableCommand with ShowCreateTableCommandBase {
   override val output: Seq[Attribute] = Seq(
     AttributeReference("createtab_stmt", StringType, nullable = false)()
   )
@@ -950,16 +1093,158 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
 
       // TODO: [SPARK-28692] unify this after we unify the
       //  CREATE TABLE syntax for hive serde and data source table.
-      val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) {
-        showCreateDataSourceTable(tableMetadata)
+      val metadata = if (DDLUtils.isDatasourceTable(tableMetadata)) {
+        tableMetadata
       } else {
-        showCreateHiveTable(tableMetadata)
+        // For a Hive serde table, we try to convert it to Spark DDL.
+        if (tableMetadata.unsupportedFeatures.nonEmpty) {
+          throw new AnalysisException(
+            "Failed to execute SHOW CREATE TABLE against table " +
+              s"${tableMetadata.identifier}, which is created by Hive and uses the " +
+              "following unsupported feature(s)\n" +
+              tableMetadata.unsupportedFeatures.map(" - " + _).mkString("\n") + ". " +
+              s"Please use `SHOW CREATE TABLE ${tableMetadata.identifier} AS SERDE` " +
+              "to show Hive DDL instead."
+          )
+        }
+
+        if (tableMetadata.tableType == VIEW) {
+          throw new AnalysisException("Hive view isn't supported by SHOW CREATE TABLE")
+        }
+
+        if ("true".equalsIgnoreCase(tableMetadata.properties.getOrElse("transactional", "false"))) {
+          throw new AnalysisException(
+            "SHOW CREATE TABLE doesn't support transactional Hive table. " +
+              s"Please use `SHOW CREATE TABLE ${tableMetadata.identifier} AS SERDE` " +
+              "to show Hive DDL instead.")
+        }
+
+        convertTableMetadata(tableMetadata)
       }
 
+      val stmt = showCreateDataSourceTable(metadata)
+
       Seq(Row(stmt))
     }
   }
 
+  private def convertTableMetadata(tableMetadata: CatalogTable): CatalogTable = {
+    val hiveSerde = HiveSerDe(
+      serde = tableMetadata.storage.serde,
+      inputFormat = tableMetadata.storage.inputFormat,
+      outputFormat = tableMetadata.storage.outputFormat)
+
+    // Looking for Spark data source that maps to to the Hive serde.
+    // TODO: some Hive fileformat + row serde might be mapped to Spark data source, e.g. CSV.
+    val source = HiveSerDe.serdeToSource(hiveSerde)
+    if (source.isEmpty) {
+      val builder = StringBuilder.newBuilder
+      hiveSerde.serde.foreach { serde =>
+        builder ++= s" SERDE: $serde"
+      }
+      hiveSerde.inputFormat.foreach { format =>
+        builder ++= s" INPUTFORMAT: $format"
+      }
+      hiveSerde.outputFormat.foreach { format =>
+        builder ++= s" OUTPUTFORMAT: $format"
+      }
+      throw new AnalysisException(
+        "Failed to execute SHOW CREATE TABLE against table " +
+          s"${tableMetadata.identifier}, which is created by Hive and uses the " +
+          "following unsupported serde configuration\n" +
+          builder.toString()
+      )
+    } else {
+      // TODO: should we keep Hive serde properties?
+      val newStorage = tableMetadata.storage.copy(properties = Map.empty)
+      tableMetadata.copy(provider = source, storage = newStorage)
+    }
+  }
+
+  private def showDataSourceTableDataColumns(
+      metadata: CatalogTable, builder: StringBuilder): Unit = {
+    val columns = metadata.schema.fields.map(_.toDDL)
+    builder ++= concatByMultiLines(columns)
+  }
+
+  private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = {
+    // For datasource table, there is a provider there in the metadata.
+    // If it is a Hive table, we already convert its metadata and fill in a provider.
+    builder ++= s"USING ${metadata.provider.get}\n"
+
+    val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map {
+      case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'"
+    }
+
+    if (dataSourceOptions.nonEmpty) {
+      builder ++= "OPTIONS "
+      builder ++= concatByMultiLines(dataSourceOptions)
+    }
+  }
+
+  private def showDataSourceTableNonDataColumns(
+      metadata: CatalogTable, builder: StringBuilder): Unit = {
+    val partCols = metadata.partitionColumnNames
+    if (partCols.nonEmpty) {
+      builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n"
+    }
+
+    metadata.bucketSpec.foreach { spec =>
+      if (spec.bucketColumnNames.nonEmpty) {
+        builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n"
+
+        if (spec.sortColumnNames.nonEmpty) {
+          builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n"
+        }
+
+        builder ++= s"INTO ${spec.numBuckets} BUCKETS\n"
+      }
+    }
+  }
+
+  private def showCreateDataSourceTable(metadata: CatalogTable): String = {
+    val builder = StringBuilder.newBuilder
+
+    builder ++= s"CREATE TABLE ${table.quotedString} "
+    showDataSourceTableDataColumns(metadata, builder)
+    showDataSourceTableOptions(metadata, builder)
+    showDataSourceTableNonDataColumns(metadata, builder)
+    showTableComment(metadata, builder)
+    showTableLocation(metadata, builder)
+    showTableProperties(metadata, builder)
+
+    builder.toString()
+  }
+}
+
+/**
+ * This commands generates the DDL for Hive serde table.
+ *
+ * The syntax of using this command in SQL is:
+ * {{{
+ *   SHOW CREATE TABLE table_identifier AS SERDE;
+ * }}}
+ */
+case class ShowCreateTableAsSerdeCommand(table: TableIdentifier)
+    extends RunnableCommand with ShowCreateTableCommandBase {
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("createtab_stmt", StringType, nullable = false)()
+  )
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val catalog = sparkSession.sessionState.catalog
+    val tableMetadata = catalog.getTableMetadata(table)
+
+    val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) {
+      throw new AnalysisException(
+        s"$table is a Spark data source table. Use `SHOW CREATE TABLE` without `AS SERDE` instead.")
+    } else {
+      showCreateHiveTable(tableMetadata)
+    }
+
+    Seq(Row(stmt))
+  }
+
   private def showCreateHiveTable(metadata: CatalogTable): String = {
     def reportUnsupportedError(features: Seq[String]): Unit = {
       throw new AnalysisException(
@@ -987,10 +1272,10 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
     builder ++= s"CREATE$tableTypeString ${table.quotedString}"
 
     if (metadata.tableType == VIEW) {
-      if (metadata.schema.nonEmpty) {
-        builder ++= metadata.schema.map(_.name).mkString("(", ", ", ")")
-      }
-      builder ++= metadata.viewText.mkString(" AS\n", "", "\n")
+      showViewDataColumns(metadata, builder)
+      showTableComment(metadata, builder)
+      showViewProperties(metadata, builder)
+      showViewText(metadata, builder)
     } else {
       showHiveTableHeader(metadata, builder)
       showTableComment(metadata, builder)
@@ -1003,13 +1288,42 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
     builder.toString()
   }
 
+  private def showViewDataColumns(metadata: CatalogTable, builder: StringBuilder): Unit = {
+    if (metadata.schema.nonEmpty) {
+      val viewColumns = metadata.schema.map { f =>
+        val comment = f.getComment()
+          .map(escapeSingleQuotedString)
+          .map(" COMMENT '" + _ + "'")
+
+        // view columns shouldn't have data type info
+        s"${quoteIdentifier(f.name)}${comment.getOrElse("")}"
+      }
+      builder ++= concatByMultiLines(viewColumns)
+    }
+  }
+
+  private def showViewProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
+    val viewProps = metadata.properties.filterKeys(!_.startsWith(CatalogTable.VIEW_PREFIX))
+    if (viewProps.nonEmpty) {
+      val props = viewProps.map { case (key, value) =>
+        s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
+      }
+
+      builder ++= s"TBLPROPERTIES ${concatByMultiLines(props)}"
+    }
+  }
+
+  private def showViewText(metadata: CatalogTable, builder: StringBuilder): Unit = {
+    builder ++= metadata.viewText.mkString("AS ", "", "\n")
+  }
+
   private def showHiveTableHeader(metadata: CatalogTable, builder: StringBuilder): Unit = {
     val columns = metadata.schema.filterNot { column =>
       metadata.partitionColumnNames.contains(column.name)
     }.map(_.toDDL)
 
     if (columns.nonEmpty) {
-      builder ++= columns.mkString("(", ", ", ")\n")
+      builder ++= concatByMultiLines(columns)
     }
   }
 
@@ -1021,7 +1335,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
 
     if (metadata.bucketSpec.isDefined) {
       val bucketSpec = metadata.bucketSpec.get
-      builder ++= s"CLUSTERED BY (${bucketSpec.bucketColumnNames.mkString(",")})\n"
+      builder ++= s"CLUSTERED BY (${bucketSpec.bucketColumnNames.mkString(", ")})\n"
 
       if (bucketSpec.sortColumnNames.nonEmpty) {
         builder ++= s"SORTED BY (${bucketSpec.sortColumnNames.map(_ + " ASC").mkString(", ")})\n"
@@ -1036,12 +1350,12 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
     storage.serde.foreach { serde =>
       builder ++= s"ROW FORMAT SERDE '$serde'\n"
 
-      val serdeProps = metadata.storage.properties.map {
+      val serdeProps = SQLConf.get.redactOptions(metadata.storage.properties).map {
         case (key, value) =>
           s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
       }
 
-      builder ++= serdeProps.mkString("WITH SERDEPROPERTIES (\n  ", ",\n  ", "\n)\n")
+      builder ++= s"WITH SERDEPROPERTIES ${concatByMultiLines(serdeProps)}"
     }
 
     if (storage.inputFormat.isDefined || storage.outputFormat.isDefined) {
@@ -1056,83 +1370,4 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
       }
     }
   }
-
-  private def showTableLocation(metadata: CatalogTable, builder: StringBuilder): Unit = {
-    if (metadata.tableType == EXTERNAL) {
-      metadata.storage.locationUri.foreach { location =>
-        builder ++= s"LOCATION '${escapeSingleQuotedString(CatalogUtils.URIToString(location))}'\n"
-      }
-    }
-  }
-
-  private def showTableComment(metadata: CatalogTable, builder: StringBuilder): Unit = {
-    metadata
-      .comment
-      .map("COMMENT '" + escapeSingleQuotedString(_) + "'\n")
-      .foreach(builder.append)
-  }
-
-  private def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
-    if (metadata.properties.nonEmpty) {
-      val props = metadata.properties.map { case (key, value) =>
-        s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
-      }
-
-      builder ++= props.mkString("TBLPROPERTIES (\n  ", ",\n  ", "\n)\n")
-    }
-  }
-
-  private def showCreateDataSourceTable(metadata: CatalogTable): String = {
-    val builder = StringBuilder.newBuilder
-
-    builder ++= s"CREATE TABLE ${table.quotedString} "
-    showDataSourceTableDataColumns(metadata, builder)
-    showDataSourceTableOptions(metadata, builder)
-    showDataSourceTableNonDataColumns(metadata, builder)
-    showTableComment(metadata, builder)
-    showTableLocation(metadata, builder)
-    showTableProperties(metadata, builder)
-
-    builder.toString()
-  }
-
-  private def showDataSourceTableDataColumns(
-      metadata: CatalogTable, builder: StringBuilder): Unit = {
-    val columns = metadata.schema.fields.map(_.toDDL)
-    builder ++= columns.mkString("(", ", ", ")\n")
-  }
-
-  private def showDataSourceTableOptions(metadata: CatalogTable, builder: StringBuilder): Unit = {
-    builder ++= s"USING ${metadata.provider.get}\n"
-
-    val dataSourceOptions = SQLConf.get.redactOptions(metadata.storage.properties).map {
-      case (key, value) => s"${quoteIdentifier(key)} '${escapeSingleQuotedString(value)}'"
-    }
-
-    if (dataSourceOptions.nonEmpty) {
-      builder ++= "OPTIONS (\n"
-      builder ++= dataSourceOptions.mkString("  ", ",\n  ", "\n")
-      builder ++= ")\n"
-    }
-  }
-
-  private def showDataSourceTableNonDataColumns(
-      metadata: CatalogTable, builder: StringBuilder): Unit = {
-    val partCols = metadata.partitionColumnNames
-    if (partCols.nonEmpty) {
-      builder ++= s"PARTITIONED BY ${partCols.mkString("(", ", ", ")")}\n"
-    }
-
-    metadata.bucketSpec.foreach { spec =>
-      if (spec.bucketColumnNames.nonEmpty) {
-        builder ++= s"CLUSTERED BY ${spec.bucketColumnNames.mkString("(", ", ", ")")}\n"
-
-        if (spec.sortColumnNames.nonEmpty) {
-          builder ++= s"SORTED BY ${spec.sortColumnNames.mkString("(", ", ", ")")}\n"
-        }
-
-        builder ++= s"INTO ${spec.numBuckets} BUCKETS\n"
-      }
-    }
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index b31514827220e..38481dda428a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -21,49 +21,14 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedFunction, UnresolvedRelation}
-import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, UnresolvedFunction, UnresolvedRelation, ViewType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.{Alias, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
-import org.apache.spark.sql.types.{MetadataBuilder, StructType}
+import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.sql.util.SchemaUtils
 
-
-/**
- * ViewType is used to specify the expected view type when we want to create or replace a view in
- * [[CreateViewCommand]].
- */
-sealed trait ViewType {
-  override def toString: String = getClass.getSimpleName.stripSuffix("$")
-}
-
-/**
- * LocalTempView means session-scoped local temporary views. Its lifetime is the lifetime of the
- * session that created it, i.e. it will be automatically dropped when the session terminates. It's
- * not tied to any databases, i.e. we can't use `db1.view1` to reference a local temporary view.
- */
-object LocalTempView extends ViewType
-
-/**
- * GlobalTempView means cross-session global temporary views. Its lifetime is the lifetime of the
- * Spark application, i.e. it will be automatically dropped when the application terminates. It's
- * tied to a system preserved database `global_temp`, and we must use the qualified name to refer a
- * global temp view, e.g. SELECT * FROM global_temp.view1.
- */
-object GlobalTempView extends ViewType
-
-/**
- * PersistedView means cross-session persisted views. Persisted views stay until they are
- * explicitly dropped by user command. It's always tied to a database, default to the current
- * database if not specified.
- *
- * Note that, Existing persisted view with the same name are not visible to the current session
- * while the local temporary view exists, unless the view name is qualified by database.
- */
-object PersistedView extends ViewType
-
-
 /**
  * Create or replace a view with given query plan. This command will generate some view-specific
  * properties(e.g. view default database, view query output column names) and store them as
@@ -136,11 +101,12 @@ case class CreateViewCommand(
         s"specified by CREATE VIEW (num: `${userSpecifiedColumns.length}`).")
     }
 
+    val catalog = sparkSession.sessionState.catalog
+
     // When creating a permanent view, not allowed to reference temporary objects.
     // This should be called after `qe.assertAnalyzed()` (i.e., `child` can be resolved)
-    verifyTemporaryObjectsNotExists(sparkSession)
+    verifyTemporaryObjectsNotExists(catalog)
 
-    val catalog = sparkSession.sessionState.catalog
     if (viewType == LocalTempView) {
       val aliasedPlan = aliasPlan(sparkSession, analyzedPlan)
       catalog.createTempView(name.table, aliasedPlan, overrideIfExists = replace)
@@ -180,9 +146,8 @@ case class CreateViewCommand(
   /**
    * Permanent views are not allowed to reference temp objects, including temp function and views
    */
-  private def verifyTemporaryObjectsNotExists(sparkSession: SparkSession): Unit = {
-    import sparkSession.sessionState.analyzer.AsTableIdentifier
-
+  private def verifyTemporaryObjectsNotExists(catalog: SessionCatalog): Unit = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
     if (!isTemporary) {
       // This func traverses the unresolved plan `child`. Below are the reasons:
       // 1) Analyzer replaces unresolved temporary views by a SubqueryAlias with the corresponding
@@ -190,21 +155,24 @@ case class CreateViewCommand(
       // added/generated from a temporary view.
       // 2) The temp functions are represented by multiple classes. Most are inaccessible from this
       // package (e.g., HiveGenericUDF).
-      child.collect {
-        // Disallow creating permanent views based on temporary views.
-        case UnresolvedRelation(AsTableIdentifier(ident))
-            if sparkSession.sessionState.catalog.isTemporaryTable(ident) =>
-          // temporary views are only stored in the session catalog
-          throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
-            s"referencing a temporary view $ident")
-        case other if !other.resolved => other.expressions.flatMap(_.collect {
-          // Disallow creating permanent views based on temporary UDFs.
-          case e: UnresolvedFunction
-            if sparkSession.sessionState.catalog.isTemporaryFunction(e.name) =>
+      def verify(child: LogicalPlan) {
+        child.collect {
+          // Disallow creating permanent views based on temporary views.
+          case UnresolvedRelation(nameParts) if catalog.isTempView(nameParts) =>
             throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
-              s"referencing a temporary function `${e.name}`")
-        })
+              s"referencing a temporary view ${nameParts.quoted}. " +
+              "Please create a temp view instead by CREATE TEMP VIEW")
+          case other if !other.resolved => other.expressions.flatMap(_.collect {
+            // Traverse subquery plan for any unresolved relations.
+            case e: SubqueryExpression => verify(e.plan)
+            // Disallow creating permanent views based on temporary UDFs.
+            case e: UnresolvedFunction if catalog.isTemporaryFunction(e.name) =>
+              throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
+                s"referencing a temporary function `${e.name}`")
+          })
+        }
       }
+      verify(child)
     }
   }
 
@@ -316,13 +284,6 @@ object ViewHelper {
 
   import CatalogTable._
 
-  /**
-   * Generate the view default database in `properties`.
-   */
-  private def generateViewDefaultDatabase(databaseName: String): Map[String, String] = {
-    Map(VIEW_DEFAULT_DATABASE -> databaseName)
-  }
-
   /**
    * Generate the view query output column names in `properties`.
    */
@@ -372,10 +333,10 @@ object ViewHelper {
     SchemaUtils.checkColumnNameDuplication(
       fieldNames, "in the view definition", session.sessionState.conf.resolver)
 
-    // Generate the view default database name.
-    val viewDefaultDatabase = session.sessionState.catalog.getCurrentDatabase
+    // Generate the view default catalog and namespace.
+    val manager = session.sessionState.catalogManager
     removeQueryColumnNames(properties) ++
-      generateViewDefaultDatabase(viewDefaultDatabase) ++
+      catalogAndNamespaceToProps(manager.currentCatalog.name, manager.currentNamespace) ++
       generateQueryColumnNames(queryOutput)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
index 8736d0713e0b3..91313f33a78e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/CatalogFileIndex.scala
@@ -81,10 +81,15 @@ class CatalogFileIndex(
       }
       val partitionSpec = PartitionSpec(partitionSchema, partitions)
       val timeNs = System.nanoTime() - startTime
-      new PrunedInMemoryFileIndex(
-        sparkSession, new Path(baseLocation.get), fileStatusCache, partitionSpec, Option(timeNs))
+      new InMemoryFileIndex(sparkSession,
+        rootPathsSpecified = partitionSpec.partitions.map(_.path),
+        parameters = Map.empty,
+        userSpecifiedSchema = Some(partitionSpec.partitionColumns),
+        fileStatusCache = fileStatusCache,
+        userSpecifiedPartitionSpec = Some(partitionSpec),
+        metadataOpsTimeNs = Some(timeNs))
     } else {
-      new InMemoryFileIndex(sparkSession, rootPaths, table.storage.properties,
+      new InMemoryFileIndex(sparkSession, rootPaths, parameters = table.storage.properties,
         userSpecifiedSchema = None, fileStatusCache = fileStatusCache)
     }
   }
@@ -101,23 +106,3 @@ class CatalogFileIndex(
 
   override def hashCode(): Int = table.identifier.hashCode()
 }
-
-/**
- * An override of the standard HDFS listing based catalog, that overrides the partition spec with
- * the information from the metastore.
- *
- * @param tableBasePath The default base path of the Hive metastore table
- * @param partitionSpec The partition specifications from Hive metastore
- */
-private class PrunedInMemoryFileIndex(
-    sparkSession: SparkSession,
-    tableBasePath: Path,
-    fileStatusCache: FileStatusCache,
-    override val partitionSpec: PartitionSpec,
-    override val metadataOpsTimeNs: Option[Long])
-  extends InMemoryFileIndex(
-    sparkSession,
-    partitionSpec.partitions.map(_.path),
-    Map.empty,
-    Some(partitionSpec.partitionColumns),
-    fileStatusCache)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 9376f08351791..3615afcf86c7a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.TableProvider
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.DataWritingCommand
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
@@ -46,7 +47,6 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.{RateStreamProvider, TextSocketSourceProvider}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.sources.v2.TableProvider
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{CalendarIntervalType, StructField, StructType}
 import org.apache.spark.sql.util.SchemaUtils
@@ -343,7 +343,12 @@ case class DataSource(
         val baseRelation =
           dataSource.createRelation(sparkSession.sqlContext, caseInsensitiveOptions)
         if (baseRelation.schema != schema) {
-          throw new AnalysisException(s"$className does not allow user-specified schemas.")
+          throw new AnalysisException(
+            "The user-specified schema doesn't match the actual schema: " +
+            s"user-specified: ${schema.toDDL}, actual: ${baseRelation.schema.toDDL}. If " +
+            "you're using DataFrameReader.schema API or creating a table, please do not " +
+            "specify the schema. Or if you're scanning an existed table, please drop " +
+            "it and re-create it.")
         }
         baseRelation
 
@@ -378,8 +383,6 @@ case class DataSource(
 
       // This is a non-streaming file based datasource.
       case (format: FileFormat, _) =>
-        val globbedPaths =
-          checkAndGlobPathIfNecessary(checkEmptyGlobPath = true, checkFilesExist = checkFilesExist)
         val useCatalogFileIndex = sparkSession.sqlContext.conf.manageFilesourcePartitions &&
           catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog &&
           catalogTable.get.partitionColumnNames.nonEmpty
@@ -391,6 +394,8 @@ case class DataSource(
             catalogTable.get.stats.map(_.sizeInBytes.toLong).getOrElse(defaultTableSize))
           (index, catalogTable.get.dataSchema, catalogTable.get.partitionSchema)
         } else {
+          val globbedPaths = checkAndGlobPathIfNecessary(
+            checkEmptyGlobPath = true, checkFilesExist = checkFilesExist)
           val index = createInMemoryFileIndex(globbedPaths)
           val (resultDataSchema, resultPartitionSchema) =
             getOrInferFileFormatSchema(format, () => index)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
deleted file mode 100644
index a37a2cf7f0369..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources
-
-import scala.collection.mutable
-
-import org.apache.spark.sql.{AnalysisException, SaveMode}
-import org.apache.spark.sql.catalog.v2.{CatalogManager, CatalogPlugin, Identifier, LookupCatalog, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{CastSupport, UnresolvedRelation}
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType, CatalogUtils, UnresolvedCatalogRelation}
-import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, CreateV2Table, DeleteFromTable, DropTable, Filter, LogicalPlan, ReplaceTable, ReplaceTableAsSelect, ShowTables, SubqueryAlias}
-import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement, AlterViewSetPropertiesStatement, AlterViewUnsetPropertiesStatement, CreateTableAsSelectStatement, CreateTableStatement, DeleteFromStatement, DescribeColumnStatement, DescribeTableStatement, DropTableStatement, DropViewStatement, QualifiedColType, ReplaceTableAsSelectStatement, ReplaceTableStatement, ShowTablesStatement}
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.command.{AlterTableAddColumnsCommand, AlterTableSetLocationCommand, AlterTableSetPropertiesCommand, AlterTableUnsetPropertiesCommand, DescribeColumnCommand, DescribeTableCommand, DropTableCommand, ShowTablesCommand}
-import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType}
-
-case class DataSourceResolution(
-    conf: SQLConf,
-    catalogManager: CatalogManager)
-  extends Rule[LogicalPlan] with CastSupport with LookupCatalog {
-
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-
-  def v2SessionCatalog: CatalogPlugin = sessionCatalog.getOrElse(
-    throw new AnalysisException("No v2 session catalog implementation is available"))
-
-  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case CreateTableStatement(
-        AsTableIdentifier(table), schema, partitionCols, bucketSpec, properties,
-        V1Provider(provider), options, location, comment, ifNotExists) =>
-      // the source is v1, the identifier has no catalog, and there is no default v2 catalog
-      val tableDesc = buildCatalogTable(table, schema, partitionCols, bucketSpec, properties,
-        provider, options, location, comment, ifNotExists)
-      val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
-
-      CreateTable(tableDesc, mode, None)
-
-    case create: CreateTableStatement =>
-      // the provider was not a v1 source or a v2 catalog is the default, convert to a v2 plan
-      val CatalogObjectIdentifier(maybeCatalog, identifier) = create.tableName
-      maybeCatalog match {
-        case Some(catalog) =>
-          // the identifier had a catalog, or there is a default v2 catalog
-          convertCreateTable(catalog.asTableCatalog, identifier, create)
-        case _ =>
-          // the identifier had no catalog and no default catalog is set, but the source is v2.
-          // use the v2 session catalog, which delegates to the global v1 session catalog
-          convertCreateTable(v2SessionCatalog.asTableCatalog, identifier, create)
-      }
-
-    case CreateTableAsSelectStatement(
-        AsTableIdentifier(table), query, partitionCols, bucketSpec, properties,
-        V1Provider(provider), options, location, comment, ifNotExists) =>
-      // the source is v1, the identifier has no catalog, and there is no default v2 catalog
-      val tableDesc = buildCatalogTable(table, new StructType, partitionCols, bucketSpec,
-        properties, provider, options, location, comment, ifNotExists)
-      val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
-
-      CreateTable(tableDesc, mode, Some(query))
-
-    case create: CreateTableAsSelectStatement =>
-      // the provider was not a v1 source or a v2 catalog is the default, convert to a v2 plan
-      val CatalogObjectIdentifier(maybeCatalog, identifier) = create.tableName
-      maybeCatalog match {
-        case Some(catalog) =>
-          // the identifier had a catalog, or there is a default v2 catalog
-          convertCTAS(catalog.asTableCatalog, identifier, create)
-        case _ =>
-          // the identifier had no catalog and no default catalog is set, but the source is v2.
-          // use the v2 session catalog, which delegates to the global v1 session catalog
-          convertCTAS(v2SessionCatalog.asTableCatalog, identifier, create)
-      }
-
-    case DescribeColumnStatement(
-        AsTableIdentifier(tableName), colName, isExtended) =>
-      DescribeColumnCommand(tableName, colName, isExtended)
-
-    case DescribeColumnStatement(
-        CatalogObjectIdentifier(Some(catalog), ident), colName, isExtended) =>
-      throw new AnalysisException("Describing columns is not supported for v2 tables.")
-
-    case DescribeTableStatement(
-        AsTableIdentifier(tableName), partitionSpec, isExtended) =>
-      DescribeTableCommand(tableName, partitionSpec, isExtended)
-
-    case ReplaceTableStatement(
-        AsTableIdentifier(table), schema, partitionCols, bucketSpec, properties,
-        V1Provider(provider), options, location, comment, orCreate) =>
-        throw new AnalysisException(
-          s"Replacing tables is not supported using the legacy / v1 Spark external catalog" +
-            s" API. Write provider name: $provider, identifier: $table.")
-
-    case ReplaceTableAsSelectStatement(
-        AsTableIdentifier(table), query, partitionCols, bucketSpec, properties,
-        V1Provider(provider), options, location, comment, orCreate) =>
-      throw new AnalysisException(
-        s"Replacing tables is not supported using the legacy / v1 Spark external catalog" +
-          s" API. Write provider name: $provider, identifier: $table.")
-
-    case replace: ReplaceTableStatement =>
-      // the provider was not a v1 source, convert to a v2 plan
-      val CatalogObjectIdentifier(maybeCatalog, identifier) = replace.tableName
-      val catalog = maybeCatalog.orElse(sessionCatalog)
-        .getOrElse(throw new AnalysisException(
-          s"No catalog specified for table ${identifier.quoted} and no default catalog is set"))
-        .asTableCatalog
-      convertReplaceTable(catalog, identifier, replace)
-
-    case rtas: ReplaceTableAsSelectStatement =>
-      // the provider was not a v1 source, convert to a v2 plan
-      val CatalogObjectIdentifier(maybeCatalog, identifier) = rtas.tableName
-      val catalog = maybeCatalog.orElse(sessionCatalog)
-        .getOrElse(throw new AnalysisException(
-          s"No catalog specified for table ${identifier.quoted} and no default catalog is set"))
-        .asTableCatalog
-      convertRTAS(catalog, identifier, rtas)
-
-    case DropTableStatement(CatalogObjectIdentifier(Some(catalog), ident), ifExists, _) =>
-      DropTable(catalog.asTableCatalog, ident, ifExists)
-
-    case DropTableStatement(AsTableIdentifier(tableName), ifExists, purge) =>
-      DropTableCommand(tableName, ifExists, isView = false, purge)
-
-    case DropViewStatement(CatalogObjectIdentifier(Some(catalog), ident), _) =>
-      throw new AnalysisException(
-        s"Can not specify catalog `${catalog.name}` for view $ident " +
-          s"because view support in catalog has not been implemented yet")
-
-    case DropViewStatement(AsTableIdentifier(tableName), ifExists) =>
-      DropTableCommand(tableName, ifExists, isView = true, purge = false)
-
-    case AlterTableSetPropertiesStatement(AsTableIdentifier(table), properties) =>
-      AlterTableSetPropertiesCommand(table, properties, isView = false)
-
-    case AlterViewSetPropertiesStatement(AsTableIdentifier(table), properties) =>
-      AlterTableSetPropertiesCommand(table, properties, isView = true)
-
-    case AlterTableUnsetPropertiesStatement(AsTableIdentifier(table), propertyKeys, ifExists) =>
-      AlterTableUnsetPropertiesCommand(table, propertyKeys, ifExists, isView = false)
-
-    case AlterViewUnsetPropertiesStatement(AsTableIdentifier(table), propertyKeys, ifExists) =>
-      AlterTableUnsetPropertiesCommand(table, propertyKeys, ifExists, isView = true)
-
-    case AlterTableSetLocationStatement(AsTableIdentifier(table), newLocation) =>
-      AlterTableSetLocationCommand(table, None, newLocation)
-
-    case AlterTableAddColumnsStatement(AsTableIdentifier(table), newColumns)
-        if newColumns.forall(_.name.size == 1) =>
-      // only top-level adds are supported using AlterTableAddColumnsCommand
-      AlterTableAddColumnsCommand(table, newColumns.map(convertToStructField))
-
-    case DeleteFromStatement(AsTableIdentifier(table), tableAlias, condition) =>
-      throw new AnalysisException(
-        s"Delete from tables is not supported using the legacy / v1 Spark external catalog" +
-            s" API. Identifier: $table.")
-
-    case delete: DeleteFromStatement =>
-      val relation = UnresolvedRelation(delete.tableName)
-      val aliased = delete.tableAlias.map(SubqueryAlias(_, relation)).getOrElse(relation)
-      DeleteFromTable(aliased, delete.condition)
-
-    case ShowTablesStatement(None, pattern) =>
-      defaultCatalog match {
-        case Some(catalog) =>
-          ShowTables(
-            catalog.asTableCatalog,
-            catalogManager.currentNamespace,
-            pattern)
-        case None =>
-          ShowTablesCommand(None, pattern)
-      }
-
-    case ShowTablesStatement(Some(namespace), pattern) =>
-      val CatalogNamespace(maybeCatalog, ns) = namespace
-      maybeCatalog match {
-        case Some(catalog) =>
-          ShowTables(catalog.asTableCatalog, ns, pattern)
-        case None =>
-          if (namespace.length != 1) {
-            throw new AnalysisException(
-              s"The database name is not valid: ${namespace.quoted}")
-          }
-          ShowTablesCommand(Some(namespace.quoted), pattern)
-      }
-  }
-
-  object V1Provider {
-    def unapply(provider: String): Option[String] = {
-      DataSource.lookupDataSourceV2(provider, conf) match {
-        // TODO(SPARK-28396): Currently file source v2 can't work with tables.
-        case Some(_: FileDataSourceV2) => Some(provider)
-        case Some(_) => None
-        case _ => Some(provider)
-      }
-    }
-  }
-
-  private def buildCatalogTable(
-      table: TableIdentifier,
-      schema: StructType,
-      partitioning: Seq[Transform],
-      bucketSpec: Option[BucketSpec],
-      properties: Map[String, String],
-      provider: String,
-      options: Map[String, String],
-      location: Option[String],
-      comment: Option[String],
-      ifNotExists: Boolean): CatalogTable = {
-
-    val storage = DataSource.buildStorageFormatFromOptions(options)
-    if (location.isDefined && storage.locationUri.isDefined) {
-      throw new AnalysisException(
-        "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " +
-            "you can only specify one of them.")
-    }
-    val customLocation = storage.locationUri.orElse(location.map(CatalogUtils.stringToURI))
-
-    val tableType = if (customLocation.isDefined) {
-      CatalogTableType.EXTERNAL
-    } else {
-      CatalogTableType.MANAGED
-    }
-
-    CatalogTable(
-      identifier = table,
-      tableType = tableType,
-      storage = storage.copy(locationUri = customLocation),
-      schema = schema,
-      provider = Some(provider),
-      partitionColumnNames = partitioning.asPartitionColumns,
-      bucketSpec = bucketSpec,
-      properties = properties,
-      comment = comment)
-  }
-
-  private def convertCTAS(
-      catalog: TableCatalog,
-      identifier: Identifier,
-      ctas: CreateTableAsSelectStatement): CreateTableAsSelect = {
-    // convert the bucket spec and add it as a transform
-    val partitioning = ctas.partitioning ++ ctas.bucketSpec.map(_.asTransform)
-    val properties = convertTableProperties(
-      ctas.properties, ctas.options, ctas.location, ctas.comment, ctas.provider)
-
-    CreateTableAsSelect(
-      catalog,
-      identifier,
-      partitioning,
-      ctas.asSelect,
-      properties,
-      writeOptions = ctas.options.filterKeys(_ != "path"),
-      ignoreIfExists = ctas.ifNotExists)
-  }
-
-  private def convertCreateTable(
-      catalog: TableCatalog,
-      identifier: Identifier,
-      create: CreateTableStatement): CreateV2Table = {
-    // convert the bucket spec and add it as a transform
-    val partitioning = create.partitioning ++ create.bucketSpec.map(_.asTransform)
-    val properties = convertTableProperties(
-      create.properties, create.options, create.location, create.comment, create.provider)
-
-    CreateV2Table(
-      catalog,
-      identifier,
-      create.tableSchema,
-      partitioning,
-      properties,
-      ignoreIfExists = create.ifNotExists)
-  }
-
-  private def convertRTAS(
-      catalog: TableCatalog,
-      identifier: Identifier,
-      rtas: ReplaceTableAsSelectStatement): ReplaceTableAsSelect = {
-    // convert the bucket spec and add it as a transform
-    val partitioning = rtas.partitioning ++ rtas.bucketSpec.map(_.asTransform)
-    val properties = convertTableProperties(
-      rtas.properties, rtas.options, rtas.location, rtas.comment, rtas.provider)
-
-    ReplaceTableAsSelect(
-      catalog,
-      identifier,
-      partitioning,
-      rtas.asSelect,
-      properties,
-      writeOptions = rtas.options.filterKeys(_ != "path"),
-      orCreate = rtas.orCreate)
-  }
-
-  private def convertReplaceTable(
-      catalog: TableCatalog,
-      identifier: Identifier,
-      replace: ReplaceTableStatement): ReplaceTable = {
-    // convert the bucket spec and add it as a transform
-    val partitioning = replace.partitioning ++ replace.bucketSpec.map(_.asTransform)
-    val properties = convertTableProperties(
-      replace.properties, replace.options, replace.location, replace.comment, replace.provider)
-
-    ReplaceTable(
-      catalog,
-      identifier,
-      replace.tableSchema,
-      partitioning,
-      properties,
-      orCreate = replace.orCreate)
-  }
-
-  private def convertTableProperties(
-      properties: Map[String, String],
-      options: Map[String, String],
-      location: Option[String],
-      comment: Option[String],
-      provider: String): Map[String, String] = {
-    if (options.contains("path") && location.isDefined) {
-      throw new AnalysisException(
-        "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " +
-            "you can only specify one of them.")
-    }
-
-    if ((options.contains("comment") || properties.contains("comment"))
-        && comment.isDefined) {
-      throw new AnalysisException(
-        "COMMENT and option/property 'comment' are both used to set the table comment, you can " +
-            "only specify one of them.")
-    }
-
-    if (options.contains("provider") || properties.contains("provider")) {
-      throw new AnalysisException(
-        "USING and option/property 'provider' are both used to set the provider implementation, " +
-            "you can only specify one of them.")
-    }
-
-    val filteredOptions = options.filterKeys(_ != "path")
-
-    // create table properties from TBLPROPERTIES and OPTIONS clauses
-    val tableProperties = new mutable.HashMap[String, String]()
-    tableProperties ++= properties
-    tableProperties ++= filteredOptions
-
-    // convert USING, LOCATION, and COMMENT clauses to table properties
-    tableProperties += ("provider" -> provider)
-    comment.map(text => tableProperties += ("comment" -> text))
-    location.orElse(options.get("path")).map(loc => tableProperties += ("location" -> loc))
-
-    tableProperties.toMap
-  }
-
-  private def convertToStructField(col: QualifiedColType): StructField = {
-    val builder = new MetadataBuilder
-    col.comment.foreach(builder.putString("comment", _))
-
-    val cleanedDataType = HiveStringType.replaceCharType(col.dataType)
-    if (col.dataType != cleanedDataType) {
-      builder.putString(HIVE_TYPE_STRING, col.dataType.catalogString)
-    }
-
-    StructField(
-      col.name.head,
-      cleanedDataType,
-      nullable = true,
-      builder.build())
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 4dcf5c52ce83d..e3a0a0a6c34e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -33,8 +33,8 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoTable, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.planning.ScanOperation
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
@@ -140,7 +140,7 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
         if query.resolved && DDLUtils.isDatasourceTable(tableDesc) =>
       CreateDataSourceTableAsSelectCommand(tableDesc, mode, query, query.output.map(_.name))
 
-    case InsertIntoTable(l @ LogicalRelation(_: InsertableRelation, _, _, _),
+    case InsertIntoStatement(l @ LogicalRelation(_: InsertableRelation, _, _, _),
         parts, query, overwrite, false) if parts.isEmpty =>
       InsertIntoDataSourceCommand(l, query, overwrite)
 
@@ -152,7 +152,7 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
 
       InsertIntoDataSourceDirCommand(storage, provider.get, query, overwrite)
 
-    case i @ InsertIntoTable(
+    case i @ InsertIntoStatement(
         l @ LogicalRelation(t: HadoopFsRelation, _, table, _), parts, query, overwrite, _) =>
       // If the InsertIntoTable command is for a partitioned HadoopFsRelation and
       // the user has specified static partitions, we add a Project operator on top of the query
@@ -188,15 +188,13 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
       }
 
       val outputPath = t.location.rootPaths.head
-      if (overwrite) DDLUtils.verifyNotReadPath(actualQuery, outputPath)
-
       val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
 
       val partitionSchema = actualQuery.resolve(
         t.partitionSchema, t.sparkSession.sessionState.analyzer.resolver)
       val staticPartitions = parts.filter(_._2.nonEmpty).map { case (k, v) => k -> v.get }
 
-      InsertIntoHadoopFsRelationCommand(
+      val insertCommand = InsertIntoHadoopFsRelationCommand(
         outputPath,
         staticPartitions,
         i.ifPartitionNotExists,
@@ -209,6 +207,14 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
         table,
         Some(t.location),
         actualQuery.output.map(_.name))
+
+      // For dynamic partition overwrite, we do not delete partition directories ahead.
+      // We write to staging directories and move to final partition directories after writing
+      // job is done. So it is ok to have outputPath try to overwrite inputpath.
+      if (overwrite && !insertCommand.dynamicPartitionOverwrite) {
+        DDLUtils.verifyNotReadPath(actualQuery, outputPath)
+      }
+      insertCommand
   }
 }
 
@@ -241,11 +247,11 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoTable(UnresolvedCatalogRelation(tableMeta), _, _, _, _)
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta), _, _, _, _)
         if DDLUtils.isDatasourceTable(tableMeta) =>
       i.copy(table = readDataSourceTable(tableMeta))
 
-    case i @ InsertIntoTable(UnresolvedCatalogRelation(tableMeta), _, _, _, _) =>
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta), _, _, _, _) =>
       i.copy(table = DDLUtils.readHiveTable(tableMeta))
 
     case UnresolvedCatalogRelation(tableMeta) if DDLUtils.isDatasourceTable(tableMeta) =>
@@ -264,7 +270,7 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
   import DataSourceStrategy._
 
   def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match {
-    case PhysicalOperation(projects, filters, l @ LogicalRelation(t: CatalystScan, _, _, _)) =>
+    case ScanOperation(projects, filters, l @ LogicalRelation(t: CatalystScan, _, _, _)) =>
       pruneFilterProjectRaw(
         l,
         projects,
@@ -272,7 +278,7 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
         (requestedColumns, allPredicates, _) =>
           toCatalystRDD(l, requestedColumns, t.buildScan(requestedColumns, allPredicates))) :: Nil
 
-    case PhysicalOperation(projects, filters,
+    case ScanOperation(projects, filters,
                            l @ LogicalRelation(t: PrunedFilteredScan, _, _, _)) =>
       pruneFilterProject(
         l,
@@ -280,7 +286,7 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
         filters,
         (a, f) => toCatalystRDD(l, a, t.buildScan(a.map(_.name).toArray, f))) :: Nil
 
-    case PhysicalOperation(projects, filters, l @ LogicalRelation(t: PrunedScan, _, _, _)) =>
+    case ScanOperation(projects, filters, l @ LogicalRelation(t: PrunedScan, _, _, _)) =>
       pruneFilterProject(
         l,
         projects,
@@ -403,14 +409,7 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
       relation: LogicalRelation,
       output: Seq[Attribute],
       rdd: RDD[Row]): RDD[InternalRow] = {
-    if (relation.relation.needConversion) {
-      val converters = RowEncoder(StructType.fromAttributes(output))
-      rdd.mapPartitions { iterator =>
-        iterator.map(converters.toRow)
-      }
-    } else {
-      rdd.asInstanceOf[RDD[InternalRow]]
-    }
+    DataSourceStrategy.toCatalystRDD(relation.relation, output, rdd)
   }
 
   /**
@@ -423,14 +422,14 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
 
 object DataSourceStrategy {
   /**
-   * The attribute name of predicate could be different than the one in schema in case of
-   * case insensitive, we should change them to match the one in schema, so we do not need to
-   * worry about case sensitivity anymore.
+   * The attribute name may differ from the one in the schema if the query analyzer
+   * is case insensitive. We should change attribute names to match the ones in the schema,
+   * so we do not need to worry about case sensitivity anymore.
    */
-  protected[sql] def normalizeFilters(
-      filters: Seq[Expression],
+  protected[sql] def normalizeExprs(
+      exprs: Seq[Expression],
       attributes: Seq[AttributeReference]): Seq[Expression] = {
-    filters.map { e =>
+    exprs.map { e =>
       e transform {
         case a: AttributeReference =>
           a.withName(attributes.find(_.semanticEquals(a)).getOrElse(a).name)
@@ -618,4 +617,21 @@ object DataSourceStrategy {
 
     (nonconvertiblePredicates ++ unhandledPredicates, pushedFilters, handledFilters)
   }
+
+  /**
+   * Convert RDD of Row into RDD of InternalRow with objects in catalyst types
+   */
+  private[sql] def toCatalystRDD(
+      relation: BaseRelation,
+      output: Seq[Attribute],
+      rdd: RDD[Row]): RDD[InternalRow] = {
+    if (relation.needConversion) {
+      val converters = RowEncoder(StructType.fromAttributes(output))
+      rdd.mapPartitions { iterator =>
+        iterator.map(converters.toRow)
+      }
+    } else {
+      rdd.asInstanceOf[RDD[InternalRow]]
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
index 813af8203c2c5..28a63c26604ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
@@ -20,12 +20,12 @@ package org.apache.spark.sql.execution.datasources
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileDataSourceV2, FileTable}
 
 /**
- * Replace the File source V2 table in [[InsertIntoTable]] to V1 [[FileFormat]].
+ * Replace the File source V2 table in [[InsertIntoStatement]] to V1 [[FileFormat]].
  * E.g, with temporary view `t` using [[FileDataSourceV2]], inserting into  view `t` fails
  * since there is no corresponding physical plan.
  * This is a temporary hack for making current data source V2 work. It should be
@@ -33,7 +33,8 @@ import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, File
  */
 class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoTable(d @ DataSourceV2Relation(table: FileTable, _, _), _, _, _, _) =>
+    case i @
+        InsertIntoStatement(d @ DataSourceV2Relation(table: FileTable, _, _, _, _), _, _, _, _) =>
       val v1FileFormat = table.fallbackFileFormat.newInstance()
       val relation = HadoopFsRelation(
         table.fileIndex,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
index 2595cc6371bc2..50c4f6cd57a96 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.util.SerializableConfiguration
 
@@ -86,6 +86,8 @@ abstract class FileFormatDataWriter(
       committer.abortTask(taskAttemptContext)
     }
   }
+
+  override def close(): Unit = {}
 }
 
 /** FileFormatWriteTask for empty partitions */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index f1fc5d762ad56..219c778b9164a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources
 import java.util.{Date, UUID}
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileAlreadyExistsException, Path}
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
@@ -277,10 +277,16 @@ object FileFormatWriter extends Logging {
         // If there is an error, abort the task
         dataWriter.abort()
         logError(s"Job $jobId aborted.")
+      }, finallyBlock = {
+        dataWriter.close()
       })
     } catch {
       case e: FetchFailedException =>
         throw e
+      case f: FileAlreadyExistsException =>
+        // If any output file to write already exists, it does not make sense to re-run this task.
+        // We throw the exception and let Executor throw ExceptionFailure to abort the job.
+        throw new TaskOutputFileAlreadyExistException(f)
       case t: Throwable =>
         throw new SparkException("Task failed while writing rows.", t)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
index 14bee173cc116..b4fc94e097aa8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.Partition
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.sources.v2.reader.InputPartition
+import org.apache.spark.sql.connector.read.InputPartition
 
 /**
  * A collection of file blocks that should be read as a single task
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 9e98b0bbfabc9..542c996a5342d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -19,17 +19,14 @@ package org.apache.spark.sql.execution.datasources
 
 import java.io.{FileNotFoundException, IOException}
 
-import scala.collection.mutable
-
 import org.apache.parquet.io.ParquetDecodingException
 
-import org.apache.spark.{Partition => RDDPartition, TaskContext, TaskKilledException}
+import org.apache.spark.{Partition => RDDPartition, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.{InputFileBlockHolder, RDD}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.QueryExecutionException
-import org.apache.spark.sql.sources.v2.reader.InputPartition
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.NextIterator
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index c8a42f043f15f..f45495121a980 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.planning.ScanOperation
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
 import org.apache.spark.util.collection.BitSet
@@ -137,7 +137,7 @@ object FileSourceStrategy extends Strategy with Logging {
   }
 
   def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-    case PhysicalOperation(projects, filters,
+    case ScanOperation(projects, filters,
       l @ LogicalRelation(fsRelation: HadoopFsRelation, _, table, _)) =>
       // Filters on this relation fall into four categories based on where we can use them to avoid
       // reading unneeded data:
@@ -147,7 +147,8 @@ object FileSourceStrategy extends Strategy with Logging {
       //  - filters that need to be evaluated again after the scan
       val filterSet = ExpressionSet(filters)
 
-      val normalizedFilters = DataSourceStrategy.normalizeFilters(filters, l.output)
+      val normalizedFilters = DataSourceStrategy.normalizeExprs(
+        filters.filter(_.deterministic), l.output)
 
       val partitionColumns =
         l.resolve(
@@ -177,6 +178,8 @@ object FileSourceStrategy extends Strategy with Logging {
       // Partition keys are not available in the statistics of the files.
       val dataFilters =
         normalizedFiltersWithoutSubqueries.filter(_.references.intersect(partitionSet).isEmpty)
+      logInfo(s"Pushed Filters: " +
+        s"${dataFilters.flatMap(DataSourceStrategy.translateFilter).mkString(",")}")
 
       // Predicates with both partition keys and attributes need to be evaluated after the scan.
       val afterScanFilters = filterSet -- partitionKeyFilters.filter(_.references.nonEmpty)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index cf7a13050f66c..cac2d6e626120 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -50,7 +50,9 @@ class InMemoryFileIndex(
     rootPathsSpecified: Seq[Path],
     parameters: Map[String, String],
     userSpecifiedSchema: Option[StructType],
-    fileStatusCache: FileStatusCache = NoopCache)
+    fileStatusCache: FileStatusCache = NoopCache,
+    userSpecifiedPartitionSpec: Option[PartitionSpec] = None,
+    override val metadataOpsTimeNs: Option[Long] = None)
   extends PartitioningAwareFileIndex(
     sparkSession, parameters, userSpecifiedSchema, fileStatusCache) {
 
@@ -69,7 +71,11 @@ class InMemoryFileIndex(
 
   override def partitionSpec(): PartitionSpec = {
     if (cachedPartitionSpec == null) {
-      cachedPartitionSpec = inferPartitioning()
+      if (userSpecifiedPartitionSpec.isDefined) {
+        cachedPartitionSpec = userSpecifiedPartitionSpec.get
+      } else {
+        cachedPartitionSpec = inferPartitioning()
+      }
     }
     logTrace(s"Partition spec: $cachedPartitionSpec")
     cachedPartitionSpec
@@ -111,6 +117,7 @@ class InMemoryFileIndex(
    * This is publicly visible for testing.
    */
   def listLeafFiles(paths: Seq[Path]): mutable.LinkedHashSet[FileStatus] = {
+    val startTime = System.nanoTime()
     val output = mutable.LinkedHashSet[FileStatus]()
     val pathsToFetch = mutable.ArrayBuffer[Path]()
     for (path <- paths) {
@@ -121,7 +128,7 @@ class InMemoryFileIndex(
         case None =>
           pathsToFetch += path
       }
-      Unit // for some reasons scalac 2.12 needs this; return type doesn't matter
+      () // for some reasons scalac 2.12 needs this; return type doesn't matter
     }
     val filter = FileInputFormat.getInputPathFilter(new JobConf(hadoopConf, this.getClass))
     val discovered = InMemoryFileIndex.bulkListLeafFiles(
@@ -131,6 +138,8 @@ class InMemoryFileIndex(
       fileStatusCache.putLeafFiles(path, leafFiles.toArray)
       output ++= leafFiles
     }
+    logInfo(s"It took ${(System.nanoTime() - startTime) / (1000 * 1000)} ms to list leaf files" +
+      s" for ${paths.length} paths.")
     output
   }
 }
@@ -171,6 +180,7 @@ object InMemoryFileIndex extends Logging {
       areRootPaths: Boolean): Seq[(Path, Seq[FileStatus])] = {
 
     val ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles
+    val ignoreLocality = sparkSession.sessionState.conf.ignoreDataLocality
 
     // Short-circuits parallel listing when serial listing is likely to be faster.
     if (paths.size <= sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold) {
@@ -181,12 +191,14 @@ object InMemoryFileIndex extends Logging {
           filter,
           Some(sparkSession),
           ignoreMissingFiles = ignoreMissingFiles,
+          ignoreLocality = ignoreLocality,
           isRootPath = areRootPaths)
         (path, leafFiles)
       }
     }
 
-    logInfo(s"Listing leaf files and directories in parallel under: ${paths.mkString(", ")}")
+    logInfo(s"Listing leaf files and directories in parallel under ${paths.length} paths." +
+      s" The first several paths are: ${paths.take(10).mkString(", ")}.")
     HiveCatalogMetrics.incrementParallelListingJobCount(1)
 
     val sparkContext = sparkSession.sparkContext
@@ -221,6 +233,7 @@ object InMemoryFileIndex extends Logging {
               filter,
               None,
               ignoreMissingFiles = ignoreMissingFiles,
+              ignoreLocality = ignoreLocality,
               isRootPath = areRootPaths)
             (path, leafFiles)
           }.iterator
@@ -287,6 +300,7 @@ object InMemoryFileIndex extends Logging {
       filter: PathFilter,
       sessionOpt: Option[SparkSession],
       ignoreMissingFiles: Boolean,
+      ignoreLocality: Boolean,
       isRootPath: Boolean): Seq[FileStatus] = {
     logTrace(s"Listing $path")
     val fs = path.getFileSystem(hadoopConf)
@@ -299,7 +313,7 @@ object InMemoryFileIndex extends Logging {
         // to retrieve the file status with the file block location. The reason to still fallback
         // to listStatus is because the default implementation would potentially throw a
         // FileNotFoundException which is better handled by doing the lookups manually below.
-        case _: DistributedFileSystem =>
+        case _: DistributedFileSystem if !ignoreLocality =>
           val remoteIter = fs.listLocatedStatus(path)
           new Iterator[LocatedFileStatus]() {
             def next(): LocatedFileStatus = remoteIter.next
@@ -353,6 +367,7 @@ object InMemoryFileIndex extends Logging {
               filter,
               sessionOpt,
               ignoreMissingFiles = ignoreMissingFiles,
+              ignoreLocality = ignoreLocality,
               isRootPath = false)
           }
       }
@@ -376,7 +391,7 @@ object InMemoryFileIndex extends Logging {
       // - Here we are calling `getFileBlockLocations` in a sequential manner, but it should not
       //   be a big deal since we always use to `bulkListLeafFiles` when the number of
       //   paths exceeds threshold.
-      case f =>
+      case f if !ignoreLocality =>
         // The other constructor of LocatedFileStatus will call FileStatus.getPermission(),
         // which is very slow on some file system (RawLocalFileSystem, which is launch a
         // subprocess and parse the stdout).
@@ -400,6 +415,8 @@ object InMemoryFileIndex extends Logging {
             missingFiles += f.getPath.toString
             None
         }
+
+      case f => Some(f)
     }
 
     if (missingFiles.nonEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index d43fa3893df1d..f11972115e09f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -60,6 +61,21 @@ case class InsertIntoHadoopFsRelationCommand(
   extends DataWritingCommand {
   import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
 
+  private lazy val parameters = CaseInsensitiveMap(options)
+
+  private[sql] lazy val dynamicPartitionOverwrite: Boolean = {
+    val partitionOverwriteMode = parameters.get("partitionOverwriteMode")
+      // scalastyle:off caselocale
+      .map(mode => PartitionOverwriteMode.withName(mode.toUpperCase))
+      // scalastyle:on caselocale
+      .getOrElse(SQLConf.get.partitionOverwriteMode)
+    val enableDynamicOverwrite = partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
+    // This config only makes sense when we are overwriting a partitioned dataset with dynamic
+    // partition columns.
+    enableDynamicOverwrite && mode == SaveMode.Overwrite &&
+      staticPartitions.size < partitionColumns.length
+  }
+
   override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
     // Most formats don't do well with duplicate columns, so lets not allow that
     SchemaUtils.checkColumnNameDuplication(
@@ -90,46 +106,36 @@ case class InsertIntoHadoopFsRelationCommand(
         fs, catalogTable.get, qualifiedOutputPath, matchingPartitions)
     }
 
-    val pathExists = fs.exists(qualifiedOutputPath)
-
-    val parameters = CaseInsensitiveMap(options)
-
-    val partitionOverwriteMode = parameters.get("partitionOverwriteMode")
-      // scalastyle:off caselocale
-      .map(mode => PartitionOverwriteMode.withName(mode.toUpperCase))
-      // scalastyle:on caselocale
-      .getOrElse(sparkSession.sessionState.conf.partitionOverwriteMode)
-    val enableDynamicOverwrite = partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
-    // This config only makes sense when we are overwriting a partitioned dataset with dynamic
-    // partition columns.
-    val dynamicPartitionOverwrite = enableDynamicOverwrite && mode == SaveMode.Overwrite &&
-      staticPartitions.size < partitionColumns.length
-
     val committer = FileCommitProtocol.instantiate(
       sparkSession.sessionState.conf.fileCommitProtocolClass,
       jobId = java.util.UUID.randomUUID().toString,
       outputPath = outputPath.toString,
       dynamicPartitionOverwrite = dynamicPartitionOverwrite)
 
-    val doInsertion = (mode, pathExists) match {
-      case (SaveMode.ErrorIfExists, true) =>
-        throw new AnalysisException(s"path $qualifiedOutputPath already exists.")
-      case (SaveMode.Overwrite, true) =>
-        if (ifPartitionNotExists && matchingPartitions.nonEmpty) {
-          false
-        } else if (dynamicPartitionOverwrite) {
-          // For dynamic partition overwrite, do not delete partition directories ahead.
-          true
-        } else {
-          deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
+    val doInsertion = if (mode == SaveMode.Append) {
+      true
+    } else {
+      val pathExists = fs.exists(qualifiedOutputPath)
+      (mode, pathExists) match {
+        case (SaveMode.ErrorIfExists, true) =>
+          throw new AnalysisException(s"path $qualifiedOutputPath already exists.")
+        case (SaveMode.Overwrite, true) =>
+          if (ifPartitionNotExists && matchingPartitions.nonEmpty) {
+            false
+          } else if (dynamicPartitionOverwrite) {
+            // For dynamic partition overwrite, do not delete partition directories ahead.
+            true
+          } else {
+            deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
+            true
+          }
+        case (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
           true
-        }
-      case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
-        true
-      case (SaveMode.Ignore, exists) =>
-        !exists
-      case (s, exists) =>
-        throw new IllegalStateException(s"unsupported save mode $s ($exists)")
+        case (SaveMode.Ignore, exists) =>
+          !exists
+        case (s, exists) =>
+          throw new IllegalStateException(s"unsupported save mode $s ($exists)")
+      }
     }
 
     if (doInsertion) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index 35bda5682fda1..33a3486bf6f67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -41,7 +41,7 @@ case class LogicalRelation(
 
   override def computeStats(): Statistics = {
     catalogTable
-      .flatMap(_.stats.map(_.toPlanStats(output, conf.cboEnabled)))
+      .flatMap(_.stats.map(_.toPlanStats(output, conf.cboEnabled || conf.planStatsEnabled)))
       .getOrElse(Statistics(sizeInBytes = relation.sizeInBytes))
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 3adec2f790730..2e09c729529a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -171,7 +171,7 @@ abstract class PartitioningAwareFileIndex(
     if (partitionPruningPredicates.nonEmpty) {
       val predicate = partitionPruningPredicates.reduce(expressions.And)
 
-      val boundPredicate = InterpretedPredicate.create(predicate.transform {
+      val boundPredicate = Predicate.createInterpreted(predicate.transform {
         case a: AttributeReference =>
           val index = partitionColumns.indexWhere(a.name == _.name)
           BoundReference(index, partitionColumns(index).dataType, nullable = true)
@@ -221,7 +221,15 @@ abstract class PartitioningAwareFileIndex(
         if (!fs.isDirectory(userDefinedBasePath)) {
           throw new IllegalArgumentException(s"Option '$BASE_PATH_PARAM' must be a directory")
         }
-        Set(fs.makeQualified(userDefinedBasePath))
+        val qualifiedBasePath = fs.makeQualified(userDefinedBasePath)
+        val qualifiedBasePathStr = qualifiedBasePath.toString
+        rootPaths
+          .find(!fs.makeQualified(_).toString.startsWith(qualifiedBasePathStr))
+          .foreach { rp =>
+            throw new IllegalArgumentException(
+              s"Wrong basePath $userDefinedBasePath for the root path: $rp")
+          }
+        Set(qualifiedBasePath)
 
       case None =>
         rootPaths.map { path =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 1e47d53b7e976..fdad43b23c5aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -130,7 +130,7 @@ object PartitioningUtils {
       Map.empty[String, String]
     }
 
-    val dateFormatter = DateFormatter()
+    val dateFormatter = DateFormatter(zoneId)
     val timestampFormatter = TimestampFormatter(timestampPartitionPattern, zoneId)
     // First, we need to parse every partition's path and see if we can find partition values.
     val (partitionValues, optDiscoveredBasePaths) = paths.map { path =>
@@ -492,7 +492,7 @@ object PartitioningUtils {
       // We need to check that we can cast the raw string since we later can use Cast to get
       // the partition values with the right DataType (see
       // org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.inferPartitioning)
-      val dateValue = Cast(Literal(raw), DateType).eval()
+      val dateValue = Cast(Literal(raw), DateType, Some(zoneId.getId)).eval()
       // Disallow DateType if the cast returned null
       require(dateValue != null)
       Literal.create(dateValue, DateType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
index 927e77a53bf47..a7129fb14d1a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
@@ -17,13 +17,61 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.CatalogStatistics
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, FileScan}
+import org.apache.spark.sql.types.StructType
 
+/**
+ * Prune the partitions of file source based table using partition filters. Currently, this rule
+ * is applied to [[HadoopFsRelation]] with [[CatalogFileIndex]] and [[DataSourceV2ScanRelation]]
+ * with [[FileScan]].
+ *
+ * For [[HadoopFsRelation]], the location will be replaced by pruned file index, and corresponding
+ * statistics will be updated. And the partition filters will be kept in the filters of returned
+ * logical plan.
+ *
+ * For [[DataSourceV2ScanRelation]], both partition filters and data filters will be added to
+ * its underlying [[FileScan]]. And the partition filters will be removed in the filters of
+ * returned logical plan.
+ */
 private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] {
+
+  private def getPartitionKeyFiltersAndDataFilters(
+      sparkSession: SparkSession,
+      relation: LeafNode,
+      partitionSchema: StructType,
+      filters: Seq[Expression],
+      output: Seq[AttributeReference]): (ExpressionSet, Seq[Expression]) = {
+    val normalizedFilters = DataSourceStrategy.normalizeExprs(
+      filters.filter(f => f.deterministic && !SubqueryExpression.hasSubquery(f)), output)
+    val partitionColumns =
+      relation.resolve(partitionSchema, sparkSession.sessionState.analyzer.resolver)
+    val partitionSet = AttributeSet(partitionColumns)
+    val (partitionFilters, dataFilters) = normalizedFilters.partition(f =>
+      f.references.subsetOf(partitionSet)
+    )
+
+    (ExpressionSet(partitionFilters), dataFilters)
+  }
+
+  private def rebuildPhysicalOperation(
+      projects: Seq[NamedExpression],
+      filters: Seq[Expression],
+      relation: LeafNode): Project = {
+    val withFilter = if (filters.nonEmpty) {
+      val filterExpression = filters.reduceLeft(And)
+      Filter(filterExpression, relation)
+    } else {
+      relation
+    }
+    Project(projects, withFilter)
+  }
+
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
     case op @ PhysicalOperation(projects, filters,
         logicalRelation @
@@ -39,31 +87,37 @@ private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] {
             _,
             _))
         if filters.nonEmpty && fsRelation.partitionSchemaOption.isDefined =>
-      val normalizedFilters = DataSourceStrategy.normalizeFilters(
-        filters.filterNot(SubqueryExpression.hasSubquery), logicalRelation.output)
-
-      val sparkSession = fsRelation.sparkSession
-      val partitionColumns =
-        logicalRelation.resolve(
-          partitionSchema, sparkSession.sessionState.analyzer.resolver)
-      val partitionSet = AttributeSet(partitionColumns)
-      val partitionKeyFilters = ExpressionSet(normalizedFilters.filter { f =>
-        f.references.subsetOf(partitionSet) && f.find(_.isInstanceOf[SubqueryExpression]).isEmpty
-      })
-
+      val (partitionKeyFilters, _) = getPartitionKeyFiltersAndDataFilters(
+        fsRelation.sparkSession, logicalRelation, partitionSchema, filters, logicalRelation.output)
       if (partitionKeyFilters.nonEmpty) {
         val prunedFileIndex = catalogFileIndex.filterPartitions(partitionKeyFilters.toSeq)
         val prunedFsRelation =
-          fsRelation.copy(location = prunedFileIndex)(sparkSession)
+          fsRelation.copy(location = prunedFileIndex)(fsRelation.sparkSession)
         // Change table stats based on the sizeInBytes of pruned files
         val withStats = logicalRelation.catalogTable.map(_.copy(
           stats = Some(CatalogStatistics(sizeInBytes = BigInt(prunedFileIndex.sizeInBytes)))))
         val prunedLogicalRelation = logicalRelation.copy(
           relation = prunedFsRelation, catalogTable = withStats)
         // Keep partition-pruning predicates so that they are visible in physical planning
-        val filterExpression = filters.reduceLeft(And)
-        val filter = Filter(filterExpression, prunedLogicalRelation)
-        Project(projects, filter)
+        rebuildPhysicalOperation(projects, filters, prunedLogicalRelation)
+      } else {
+        op
+      }
+
+    case op @ PhysicalOperation(projects, filters,
+        v2Relation @ DataSourceV2ScanRelation(_, scan: FileScan, output))
+        if filters.nonEmpty && scan.readDataSchema.nonEmpty =>
+      val (partitionKeyFilters, dataFilters) =
+        getPartitionKeyFiltersAndDataFilters(scan.sparkSession, v2Relation,
+          scan.readPartitionSchema, filters, output)
+      // The dataFilters are pushed down only once
+      if (partitionKeyFilters.nonEmpty || (dataFilters.nonEmpty && scan.dataFilters.isEmpty)) {
+        val prunedV2Relation =
+          v2Relation.copy(scan = scan.withFilters(partitionKeyFilters.toSeq, dataFilters))
+        // The pushed down partition filters don't need to be reevaluated.
+        val afterScanFilters =
+          ExpressionSet(filters) -- partitionKeyFilters.filter(_.references.nonEmpty)
+        rebuildPhysicalOperation(projects, afterScanFilters.toSeq, prunedV2Relation)
       } else {
         op
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index c2211cccb501c..61e0154a0ffe8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -17,16 +17,12 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
-import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
-import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetTable
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
 
@@ -58,21 +54,6 @@ object SchemaPruning extends Rule[LogicalPlan] {
               hadoopFsRelation.copy(dataSchema = prunedDataSchema)(hadoopFsRelation.sparkSession)
             buildPrunedRelation(l, prunedHadoopRelation)
           }).getOrElse(op)
-
-      case op @ PhysicalOperation(projects, filters,
-          d @ DataSourceV2Relation(table: FileTable, output, _)) if canPruneTable(table) =>
-
-        prunePhysicalColumns(output, projects, filters, table.dataSchema,
-          prunedDataSchema => {
-            val prunedFileTable = table match {
-              case o: OrcTable => o.copy(userSpecifiedSchema = Some(prunedDataSchema))
-              case p: ParquetTable => p.copy(userSpecifiedSchema = Some(prunedDataSchema))
-              case _ =>
-                val message = s"${table.formatName} data source doesn't support schema pruning."
-                throw new AnalysisException(message)
-            }
-            buildPrunedRelationV2(d, prunedFileTable)
-          }).getOrElse(op)
     }
 
   /**
@@ -119,12 +100,6 @@ object SchemaPruning extends Rule[LogicalPlan] {
     fsRelation.fileFormat.isInstanceOf[ParquetFileFormat] ||
       fsRelation.fileFormat.isInstanceOf[OrcFileFormat]
 
-  /**
-   * Checks to see if the given [[FileTable]] can be pruned. Currently we support ORC v2.
-   */
-  private def canPruneTable(table: FileTable) =
-    table.isInstanceOf[OrcTable] || table.isInstanceOf[ParquetTable]
-
   /**
    * Normalizes the names of the attribute references in the given projects and filters to reflect
    * the names in the given logical relation. This makes it possible to compare attributes and
@@ -191,17 +166,6 @@ object SchemaPruning extends Rule[LogicalPlan] {
     outputRelation.copy(relation = prunedBaseRelation, output = prunedOutput)
   }
 
-  /**
-   * Builds a pruned data source V2 relation from the output of the relation and the schema
-   * of the pruned [[FileTable]].
-   */
-  private def buildPrunedRelationV2(
-      outputRelation: DataSourceV2Relation,
-      prunedFileTable: FileTable) = {
-    val prunedOutput = getPrunedOutput(outputRelation.output, prunedFileTable.schema)
-    outputRelation.copy(table = prunedFileTable, output = prunedOutput)
-  }
-
   // Prune the given output to make it consistent with `requiredSchema`.
   private def getPrunedOutput(
       output: Seq[AttributeReference],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index 8abc6fcacd4c5..cbf9d2bac7ceb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -17,16 +17,13 @@
 
 package org.apache.spark.sql.execution.datasources.csv
 
-import java.nio.charset.Charset
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce._
 
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityGenerator, UnivocityParser}
+import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser}
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.util.CompressionCodecs
 import org.apache.spark.sql.execution.datasources._
@@ -134,7 +131,11 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
         dataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
       val actualRequiredSchema = StructType(
         requiredSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
-      val parser = new UnivocityParser(actualDataSchema, actualRequiredSchema, parsedOptions)
+      val parser = new UnivocityParser(
+        actualDataSchema,
+        actualRequiredSchema,
+        parsedOptions,
+        filters)
       val schema = if (columnPruning) actualRequiredSchema else actualDataSchema
       val isStartOfFile = file.start == 0
       val headerChecker = new CSVHeaderChecker(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala
index 21fabac472f4b..d8b52c503ad34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala
@@ -33,11 +33,12 @@ object CSVUtils {
     // with the one below, `filterCommentAndEmpty` but execution path is different. One of them
     // might have to be removed in the near future if possible.
     import lines.sqlContext.implicits._
-    val nonEmptyLines = lines.filter(length(trim($"value")) > 0)
+    val aliased = lines.toDF("value")
+    val nonEmptyLines = aliased.filter(length(trim($"value")) > 0)
     if (options.isCommentSet) {
-      nonEmptyLines.filter(!$"value".startsWith(options.comment.toString))
+      nonEmptyLines.filter(!$"value".startsWith(options.comment.toString)).as[String]
     } else {
-      nonEmptyLines
+      nonEmptyLines.as[String]
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CsvOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CsvOutputWriter.scala
index 3ff36bfde3cca..2b549536ae486 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CsvOutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CsvOutputWriter.scala
@@ -33,25 +33,17 @@ class CsvOutputWriter(
     context: TaskAttemptContext,
     params: CSVOptions) extends OutputWriter with Logging {
 
-  private var univocityGenerator: Option[UnivocityGenerator] = None
+  private val charset = Charset.forName(params.charset)
+
+  private val writer = CodecStreams.createOutputStreamWriter(context, new Path(path), charset)
+
+  private val gen = new UnivocityGenerator(dataSchema, writer, params)
 
   if (params.headerFlag) {
-    val gen = getGen()
     gen.writeHeaders()
   }
 
-  private def getGen(): UnivocityGenerator = univocityGenerator.getOrElse {
-    val charset = Charset.forName(params.charset)
-    val os = CodecStreams.createOutputStreamWriter(context, new Path(path), charset)
-    val newGen = new UnivocityGenerator(dataSchema, os, params)
-    univocityGenerator = Some(newGen)
-    newGen
-  }
-
-  override def write(row: InternalRow): Unit = {
-    val gen = getGen()
-    gen.write(row)
-  }
+  override def write(row: InternalRow): Unit = gen.write(row)
 
-  override def close(): Unit = univocityGenerator.foreach(_.close())
+  override def close(): Unit = gen.close()
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index d184f3cb71b1a..222ef1145b922 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -147,14 +147,7 @@ class JDBCOptions(
      """.stripMargin
   )
 
-  val fetchSize = {
-    val size = parameters.getOrElse(JDBC_BATCH_FETCH_SIZE, "0").toInt
-    require(size >= 0,
-      s"Invalid value `${size.toString}` for parameter " +
-        s"`$JDBC_BATCH_FETCH_SIZE`. The minimum value is 0. When the value is 0, " +
-        "the JDBC driver ignores the value and does the estimates.")
-    size
-  }
+  val fetchSize = parameters.getOrElse(JDBC_BATCH_FETCH_SIZE, "0").toInt
 
   // ------------------------------------------------------------
   // Optional parameters only for writing
@@ -184,6 +177,10 @@ class JDBCOptions(
       case "READ_COMMITTED" => Connection.TRANSACTION_READ_COMMITTED
       case "REPEATABLE_READ" => Connection.TRANSACTION_REPEATABLE_READ
       case "SERIALIZABLE" => Connection.TRANSACTION_SERIALIZABLE
+      case other => throw new IllegalArgumentException(
+        s"Invalid value `$other` for parameter `$JDBC_TXN_ISOLATION_LEVEL`. This can be " +
+          "`NONE`, `READ_UNCOMMITTED`, `READ_COMMITTED`, `REPEATABLE_READ` or `SERIALIZABLE`."
+      )
     }
   // An option to execute custom SQL before fetching data from the remote DB
   val sessionInitStatement = parameters.get(JDBC_SESSION_INIT_STATEMENT)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 16b493892e3be..e25ce53941ff6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -231,7 +231,7 @@ private[jdbc] class JDBCRDD(
     var stmt: PreparedStatement = null
     var conn: Connection = null
 
-    def close() {
+    def close(): Unit = {
       if (closed) return
       try {
         if (null != rs) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 3cd5cb1647923..f5a474ddf3904 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -186,7 +186,7 @@ private[sql] object JDBCRelation extends Logging {
     }
     columnType match {
       case _: NumericType => value.toLong
-      case DateType => parse(stringToDate).toLong
+      case DateType => parse(stringToDate(_, getZoneId(timeZoneId))).toLong
       case TimestampType => parse(stringToTimestamp(_, getZoneId(timeZoneId)))
     }
   }
@@ -197,7 +197,9 @@ private[sql] object JDBCRelation extends Logging {
       timeZoneId: String): String = {
     def dateTimeToString(): String = {
       val dateTimeStr = columnType match {
-        case DateType => DateFormatter().format(value.toInt)
+        case DateType =>
+          val dateFormatter = DateFormatter(DateTimeUtils.getZoneId(timeZoneId))
+          dateFormatter.format(value.toInt)
         case TimestampType =>
           val timestampFormatter = TimestampFormatter.getFractionFormatter(
             DateTimeUtils.getZoneId(timeZoneId))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 86a27b5afc250..c1e1aed83bae5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -445,7 +445,7 @@ object JdbcUtils extends Logging {
 
     case ByteType =>
       (rs: ResultSet, row: InternalRow, pos: Int) =>
-        row.update(pos, rs.getByte(pos + 1))
+        row.setByte(pos, rs.getByte(pos + 1))
 
     case StringType =>
       (rs: ResultSet, row: InternalRow, pos: Int) =>
@@ -605,6 +605,13 @@ object JdbcUtils extends Logging {
    * implementation changes elsewhere might easily render such a closure
    * non-Serializable.  Instead, we explicitly close over all variables that
    * are used.
+   *
+   * Note that this method records task output metrics. It assumes the method is
+   * running in a task. For now, we only records the number of rows being written
+   * because there's no good way to measure the total bytes being written. Only
+   * effective outputs are taken into account: for example, metric will not be updated
+   * if it supports transaction and transaction is rolled back, but metric will be
+   * updated even with error if it doesn't support transaction, as there're dirty outputs.
    */
   def savePartition(
       getConnection: () => Connection,
@@ -615,7 +622,9 @@ object JdbcUtils extends Logging {
       batchSize: Int,
       dialect: JdbcDialect,
       isolationLevel: Int,
-      options: JDBCOptions): Iterator[Byte] = {
+      options: JDBCOptions): Unit = {
+    val outMetrics = TaskContext.get().taskMetrics().outputMetrics
+
     val conn = getConnection()
     var committed = false
 
@@ -643,7 +652,7 @@ object JdbcUtils extends Logging {
       }
     }
     val supportsTransactions = finalIsolationLevel != Connection.TRANSACTION_NONE
-
+    var totalRowCount = 0L
     try {
       if (supportsTransactions) {
         conn.setAutoCommit(false) // Everything in the same db transaction.
@@ -672,6 +681,7 @@ object JdbcUtils extends Logging {
           }
           stmt.addBatch()
           rowCount += 1
+          totalRowCount += 1
           if (rowCount % batchSize == 0) {
             stmt.executeBatch()
             rowCount = 0
@@ -687,7 +697,6 @@ object JdbcUtils extends Logging {
         conn.commit()
       }
       committed = true
-      Iterator.empty
     } catch {
       case e: SQLException =>
         val cause = e.getNextException
@@ -715,9 +724,13 @@ object JdbcUtils extends Logging {
         // tell the user about another problem.
         if (supportsTransactions) {
           conn.rollback()
+        } else {
+          outMetrics.setRecordsWritten(totalRowCount)
         }
         conn.close()
       } else {
+        outMetrics.setRecordsWritten(totalRowCount)
+
         // The stage must succeed.  We cannot propagate any exception close() might throw.
         try {
           conn.close()
@@ -840,10 +853,10 @@ object JdbcUtils extends Logging {
       case Some(n) if n < df.rdd.getNumPartitions => df.coalesce(n)
       case _ => df
     }
-    repartitionedDF.rdd.foreachPartition(iterator => savePartition(
+    repartitionedDF.rdd.foreachPartition { iterator => savePartition(
       getConnection, table, iterator, rddSchema, insertStmt, batchSize, dialect, isolationLevel,
       options)
-    )
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala
index b3cd570cfb1cf..dfd84e344eb2a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala
@@ -44,20 +44,18 @@ class JsonOutputWriter(
       " which can be read back by Spark only if multiLine is enabled.")
   }
 
-  private var jacksonGenerator: Option[JacksonGenerator] = None
+  private val writer = CodecStreams.createOutputStreamWriter(context, new Path(path), encoding)
 
-  override def write(row: InternalRow): Unit = {
-    val gen = jacksonGenerator.getOrElse {
-      val os = CodecStreams.createOutputStreamWriter(context, new Path(path), encoding)
-      // create the Generator without separator inserted between 2 records
-      val newGen = new JacksonGenerator(dataSchema, os, options)
-      jacksonGenerator = Some(newGen)
-      newGen
-    }
+  // create the Generator without separator inserted between 2 records
+  private[this] val gen = new JacksonGenerator(dataSchema, writer, options)
 
+  override def write(row: InternalRow): Unit = {
     gen.write(row)
     gen.writeLineEnding()
   }
 
-  override def close(): Unit = jacksonGenerator.foreach(_.close())
+  override def close(): Unit = {
+    gen.close()
+    writer.close()
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index e4f9e49c4dd28..4fad0a2484cde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -22,10 +22,11 @@ import java.util
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, LogicalWriteInfo, PhysicalWriteInfo, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -33,13 +34,13 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
  * This is no-op datasource. It does not do anything besides consuming its input.
  * This can be useful for benchmarking or to cache data without any additional overhead.
  */
-class NoopDataSource extends TableProvider with DataSourceRegister {
+class NoopDataSource extends SimpleTableProvider with DataSourceRegister {
   override def shortName(): String = "noop"
   override def getTable(options: CaseInsensitiveStringMap): Table = NoopTable
 }
 
 private[noop] object NoopTable extends Table with SupportsWrite {
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = NoopWriteBuilder
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = NoopWriteBuilder
   override def name(): String = "noop-table"
   override def schema(): StructType = new StructType()
   override def capabilities(): util.Set[TableCapability] = {
@@ -58,7 +59,8 @@ private[noop] object NoopWriteBuilder extends WriteBuilder with SupportsTruncate
 }
 
 private[noop] object NoopBatchWrite extends BatchWrite {
-  override def createBatchWriterFactory(): DataWriterFactory = NoopWriterFactory
+  override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory =
+    NoopWriterFactory
   override def commit(messages: Array[WriterCommitMessage]): Unit = {}
   override def abort(messages: Array[WriterCommitMessage]): Unit = {}
 }
@@ -71,11 +73,12 @@ private[noop] object NoopWriter extends DataWriter[InternalRow] {
   override def write(record: InternalRow): Unit = {}
   override def commit(): WriterCommitMessage = null
   override def abort(): Unit = {}
+  override def close(): Unit = {}
 }
 
 private[noop] object NoopStreamingWrite extends StreamingWrite {
-  override def createStreamingWriterFactory(): StreamingDataWriterFactory =
-    NoopStreamingDataWriterFactory
+  override def createStreamingWriterFactory(
+      info: PhysicalWriteInfo): StreamingDataWriterFactory = NoopStreamingDataWriterFactory
   override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
   override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index f7c12598da209..fd791ce7c5e19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.FileSplit
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
-import org.apache.orc._
+import org.apache.orc.{OrcUtils => _, _}
 import org.apache.orc.OrcConf.{COMPRESS, MAPRED_OUTPUT_SCHEMA}
 import org.apache.orc.mapred.OrcStruct
 import org.apache.orc.mapreduce._
@@ -38,10 +38,9 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 private[sql] object OrcFileFormat {
   private def checkFieldName(name: String): Unit = {
@@ -180,10 +179,11 @@ class OrcFileFormat
 
       val fs = filePath.getFileSystem(conf)
       val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
-      val reader = OrcFile.createReader(filePath, readerOptions)
-
-      val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
-        isCaseSensitive, dataSchema, requiredSchema, reader, conf)
+      val requestedColIdsOrEmptyFile =
+        Utils.tryWithResource(OrcFile.createReader(filePath, readerOptions)) { reader =>
+          OrcUtils.requestedColumnIds(
+            isCaseSensitive, dataSchema, requiredSchema, reader, conf)
+        }
 
       if (requestedColIdsOrEmptyFile.isEmpty) {
         Iterator.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
index 12d4244e19812..eea9b2a8f9613 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.execution.datasources.SchemaMergeUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.util.{SerializableConfiguration, ThreadUtils}
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 object OrcUtils extends Logging {
 
@@ -62,8 +62,9 @@ object OrcUtils extends Logging {
     val fs = file.getFileSystem(conf)
     val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
     try {
-      val reader = OrcFile.createReader(file, readerOptions)
-      val schema = reader.getSchema
+      val schema = Utils.tryWithResource(OrcFile.createReader(file, readerOptions)) { reader =>
+        reader.getSchema
+      }
       if (schema.getFieldNames.size == 0) {
         None
       } else {
@@ -162,6 +163,7 @@ object OrcUtils extends Logging {
                 if (matchedOrcFields.size > 1) {
                   // Need to fail if there is ambiguity, i.e. more than one field is matched.
                   val matchedOrcFieldsString = matchedOrcFields.mkString("[", ", ", "]")
+                  reader.close()
                   throw new RuntimeException(s"""Found duplicate field(s) "$requiredFieldName": """
                     + s"$matchedOrcFieldsString in case-insensitive mode")
                 } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 815b62dfbf898..29dbd8dfbca8f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -27,7 +27,6 @@ import scala.util.{Failure, Try}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce._
-import org.apache.hadoop.mapreduce.lib.input.FileSplit
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 import org.apache.parquet.filter2.compat.FilterCompat
 import org.apache.parquet.filter2.predicate.FilterApi
@@ -296,7 +295,7 @@ class ParquetFileFormat
 
       val convertTz =
         if (timestampConversion && !isCreatedByParquetMr) {
-          Some(DateTimeUtils.getTimeZone(sharedConf.get(SQLConf.SESSION_LOCAL_TIMEZONE.key)))
+          Some(DateTimeUtils.getZoneId(sharedConf.get(SQLConf.SESSION_LOCAL_TIMEZONE.key)))
         } else {
           None
         }
@@ -328,32 +327,28 @@ class ParquetFileFormat
         iter.asInstanceOf[Iterator[InternalRow]]
       } else {
         logDebug(s"Falling back to parquet-mr")
-        // ParquetRecordReader returns UnsafeRow
+        // ParquetRecordReader returns InternalRow
         val readSupport = new ParquetReadSupport(convertTz, enableVectorizedReader = false)
         val reader = if (pushed.isDefined && enableRecordFilter) {
           val parquetFilter = FilterCompat.get(pushed.get, null)
-          new ParquetRecordReader[UnsafeRow](readSupport, parquetFilter)
+          new ParquetRecordReader[InternalRow](readSupport, parquetFilter)
         } else {
-          new ParquetRecordReader[UnsafeRow](readSupport)
+          new ParquetRecordReader[InternalRow](readSupport)
         }
-        val iter = new RecordReaderIterator(reader)
+        val iter = new RecordReaderIterator[InternalRow](reader)
         // SPARK-23457 Register a task completion listener before `initialization`.
         taskContext.foreach(_.addTaskCompletionListener[Unit](_ => iter.close()))
         reader.initialize(split, hadoopAttemptContext)
 
         val fullSchema = requiredSchema.toAttributes ++ partitionSchema.toAttributes
-        val joinedRow = new JoinedRow()
-        val appendPartitionColumns = GenerateUnsafeProjection.generate(fullSchema, fullSchema)
+        val unsafeProjection = GenerateUnsafeProjection.generate(fullSchema, fullSchema)
 
-        // This is a horrible erasure hack...  if we type the iterator above, then it actually check
-        // the type in next() and we get a class cast exception.  If we make that function return
-        // Object, then we can defer the cast until later!
         if (partitionSchema.length == 0) {
           // There is no partition columns
-          iter.asInstanceOf[Iterator[InternalRow]]
+          iter.map(unsafeProjection)
         } else {
-          iter.asInstanceOf[Iterator[InternalRow]]
-            .map(d => appendPartitionColumns(joinedRow(d, file.partitionValues)))
+          val joinedRow = new JoinedRow()
+          iter.map(d => unsafeProjection(joinedRow(d, file.partitionValues)))
         }
       }
     }
@@ -403,7 +398,7 @@ object ParquetFileFormat extends Logging {
             logInfo(
               "Serialized Spark schema in Parquet key-value metadata is not in JSON format, " +
                 "falling back to the deprecated DataType.fromCaseClassString parser.")
-            LegacyTypeStringParser.parse(serializedSchema.get)
+            LegacyTypeStringParser.parseString(serializedSchema.get)
           }
           .recover { case cause: Throwable =>
             logWarning(
@@ -514,7 +509,7 @@ object ParquetFileFormat extends Logging {
         logInfo(
           "Serialized Spark schema in Parquet key-value metadata is not in JSON format, " +
             "falling back to the deprecated DataType.fromCaseClassString parser.")
-        LegacyTypeStringParser.parse(schemaString).asInstanceOf[StructType]
+        LegacyTypeStringParser.parseString(schemaString).asInstanceOf[StructType]
     }.recoverWith {
       case cause: Throwable =>
         logWarning(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
index b9b86adb438e6..948a120e0d6e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
@@ -591,7 +591,7 @@ class ParquetFilters(
       case sources.StringStartsWith(name, prefix)
           if pushDownStartWith && canMakeFilterOn(name, prefix) =>
         Option(prefix).map { v =>
-          FilterApi.userDefined(binaryColumn(name),
+          FilterApi.userDefined(binaryColumn(nameToParquetField(name).fieldName),
             new UserDefinedPredicate[Binary] with Serializable {
               private val strToBinary = Binary.fromReusedByteArray(v.getBytes)
               private val size = strToBinary.length
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 2c7231d2c3e0a..c05ecf16311ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
-import java.util.{Locale, Map => JMap, TimeZone}
+import java.time.ZoneId
+import java.util.{Locale, Map => JMap}
 
 import scala.collection.JavaConverters._
 
@@ -29,13 +30,13 @@ import org.apache.parquet.schema._
 import org.apache.parquet.schema.Type.Repetition
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
  * A Parquet [[ReadSupport]] implementation for reading Parquet records as Catalyst
- * [[UnsafeRow]]s.
+ * [[InternalRow]]s.
  *
  * The API interface of [[ReadSupport]] is a little bit over complicated because of historical
  * reasons.  In older versions of parquet-mr (say 1.6.0rc3 and prior), [[ReadSupport]] need to be
@@ -49,9 +50,9 @@ import org.apache.spark.sql.types._
  * Due to this reason, we no longer rely on [[ReadContext]] to pass requested schema from [[init()]]
  * to [[prepareForRead()]], but use a private `var` for simplicity.
  */
-class ParquetReadSupport(val convertTz: Option[TimeZone],
+class ParquetReadSupport(val convertTz: Option[ZoneId],
     enableVectorizedReader: Boolean)
-  extends ReadSupport[UnsafeRow] with Logging {
+  extends ReadSupport[InternalRow] with Logging {
   private var catalystRequestedSchema: StructType = _
 
   def this() {
@@ -114,13 +115,13 @@ class ParquetReadSupport(val convertTz: Option[TimeZone],
   /**
    * Called on executor side after [[init()]], before instantiating actual Parquet record readers.
    * Responsible for instantiating [[RecordMaterializer]], which is used for converting Parquet
-   * records to Catalyst [[UnsafeRow]]s.
+   * records to Catalyst [[InternalRow]]s.
    */
   override def prepareForRead(
       conf: Configuration,
       keyValueMetaData: JMap[String, String],
       fileSchema: MessageType,
-      readContext: ReadContext): RecordMaterializer[UnsafeRow] = {
+      readContext: ReadContext): RecordMaterializer[InternalRow] = {
     val parquetRequestedSchema = readContext.getRequestedSchema
     new ParquetRecordMaterializer(
       parquetRequestedSchema,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
index b2459dd0e8bba..5622169df1281 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
@@ -17,12 +17,12 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
-import java.util.TimeZone
+import java.time.ZoneId
 
 import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer}
 import org.apache.parquet.schema.MessageType
 
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -36,13 +36,13 @@ private[parquet] class ParquetRecordMaterializer(
     parquetSchema: MessageType,
     catalystSchema: StructType,
     schemaConverter: ParquetToSparkSchemaConverter,
-    convertTz: Option[TimeZone])
-  extends RecordMaterializer[UnsafeRow] {
+    convertTz: Option[ZoneId])
+  extends RecordMaterializer[InternalRow] {
 
   private val rootConverter =
     new ParquetRowConverter(schemaConverter, parquetSchema, catalystSchema, convertTz, NoopUpdater)
 
-  override def getCurrentRecord: UnsafeRow = rootConverter.currentRecord
+  override def getCurrentRecord: InternalRow = rootConverter.currentRecord
 
   override def getRootConverter: GroupConverter = rootConverter
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index b772b6b77d1ce..850adae8a6b95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import java.math.{BigDecimal, BigInteger}
 import java.nio.ByteOrder
-import java.util.TimeZone
+import java.time.{ZoneId, ZoneOffset}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
@@ -125,7 +125,7 @@ private[parquet] class ParquetRowConverter(
     schemaConverter: ParquetToSparkSchemaConverter,
     parquetType: GroupType,
     catalystType: StructType,
-    convertTz: Option[TimeZone],
+    convertTz: Option[ZoneId],
     updater: ParentContainerUpdater)
   extends ParquetGroupConverter(updater) with Logging {
 
@@ -154,8 +154,6 @@ private[parquet] class ParquetRowConverter(
        |${catalystType.prettyJson}
      """.stripMargin)
 
-  private val UTC = DateTimeUtils.TimeZoneUTC
-
   /**
    * Updater used together with field converters within a [[ParquetRowConverter]].  It propagates
    * converted filed values to the `ordinal`-th cell in `currentRow`.
@@ -171,17 +169,15 @@ private[parquet] class ParquetRowConverter(
     override def setFloat(value: Float): Unit = row.setFloat(ordinal, value)
   }
 
-  private val currentRow = new SpecificInternalRow(catalystType.map(_.dataType))
-
-  private val unsafeProjection = UnsafeProjection.create(catalystType)
+  private[this] val currentRow = new SpecificInternalRow(catalystType.map(_.dataType))
 
   /**
-   * The [[UnsafeRow]] converted from an entire Parquet record.
+   * The [[InternalRow]] converted from an entire Parquet record.
    */
-  def currentRecord: UnsafeRow = unsafeProjection(currentRow)
+  def currentRecord: InternalRow = currentRow
 
   // Converters for each field.
-  private val fieldConverters: Array[Converter with HasParentContainerUpdater] = {
+  private[this] val fieldConverters: Array[Converter with HasParentContainerUpdater] = {
     parquetType.getFields.asScala.map { parquetField =>
       val fieldIndex = catalystType.fieldIndex(parquetField.getName)
       val catalystField = catalystType(fieldIndex)
@@ -190,12 +186,15 @@ private[parquet] class ParquetRowConverter(
     }.toArray
   }
 
+  // Updaters for each field.
+  private[this] val fieldUpdaters: Array[ParentContainerUpdater] = fieldConverters.map(_.updater)
+
   override def getConverter(fieldIndex: Int): Converter = fieldConverters(fieldIndex)
 
   override def end(): Unit = {
     var i = 0
-    while (i < fieldConverters.length) {
-      fieldConverters(i).updater.end()
+    while (i < fieldUpdaters.length) {
+      fieldUpdaters(i).end()
       i += 1
     }
     updater.set(currentRow)
@@ -203,13 +202,14 @@ private[parquet] class ParquetRowConverter(
 
   override def start(): Unit = {
     var i = 0
-    while (i < currentRow.numFields) {
+    val numFields = currentRow.numFields
+    while (i < numFields) {
       currentRow.setNullAt(i)
       i += 1
     }
     i = 0
-    while (i < fieldConverters.length) {
-      fieldConverters(i).updater.start()
+    while (i < fieldUpdaters.length) {
+      fieldUpdaters(i).start()
       i += 1
     }
   }
@@ -290,7 +290,8 @@ private[parquet] class ParquetRowConverter(
             val timeOfDayNanos = buf.getLong
             val julianDay = buf.getInt
             val rawTime = DateTimeUtils.fromJulianDay(julianDay, timeOfDayNanos)
-            val adjTime = convertTz.map(DateTimeUtils.convertTz(rawTime, _, UTC)).getOrElse(rawTime)
+            val adjTime = convertTz.map(DateTimeUtils.convertTz(rawTime, _, ZoneOffset.UTC))
+              .getOrElse(rawTime)
             updater.setLong(adjTime)
           }
         }
@@ -320,10 +321,34 @@ private[parquet] class ParquetRowConverter(
         new ParquetMapConverter(parquetType.asGroupType(), t, updater)
 
       case t: StructType =>
+        val wrappedUpdater = {
+          // SPARK-30338: avoid unnecessary InternalRow copying for nested structs:
+          // There are two cases to handle here:
+          //
+          //  1. Parent container is a map or array: we must make a deep copy of the mutable row
+          //     because this converter may be invoked multiple times per Parquet input record
+          //     (if the map or array contains multiple elements).
+          //
+          //  2. Parent container is a struct: we don't need to copy the row here because either:
+          //
+          //     (a) all ancestors are structs and therefore no copying is required because this
+          //         converter will only be invoked once per Parquet input record, or
+          //     (b) some ancestor is struct that is nested in a map or array and that ancestor's
+          //         converter will perform deep-copying (which will recursively copy this row).
+          if (updater.isInstanceOf[RowUpdater]) {
+            // `updater` is a RowUpdater, implying that the parent container is a struct.
+            updater
+          } else {
+            // `updater` is NOT a RowUpdater, implying that the parent container a map or array.
+            new ParentContainerUpdater {
+              override def set(value: Any): Unit = {
+                updater.set(value.asInstanceOf[SpecificInternalRow].copy())  // deep copy
+              }
+            }
+          }
+        }
         new ParquetRowConverter(
-          schemaConverter, parquetType.asGroupType(), t, convertTz, new ParentContainerUpdater {
-            override def set(value: Any): Unit = updater.set(value.asInstanceOf[InternalRow].copy())
-          })
+          schemaConverter, parquetType.asGroupType(), t, convertTz, wrappedUpdater)
 
       case t =>
         throw new RuntimeException(
@@ -466,9 +491,9 @@ private[parquet] class ParquetRowConverter(
       updater: ParentContainerUpdater)
     extends ParquetGroupConverter(updater) {
 
-    private var currentArray: ArrayBuffer[Any] = _
+    private[this] val currentArray = ArrayBuffer.empty[Any]
 
-    private val elementConverter: Converter = {
+    private[this] val elementConverter: Converter = {
       val repeatedType = parquetSchema.getType(0)
       val elementType = catalystSchema.elementType
 
@@ -519,10 +544,7 @@ private[parquet] class ParquetRowConverter(
 
     override def end(): Unit = updater.set(new GenericArrayData(currentArray.toArray))
 
-    // NOTE: We can't reuse the mutable `ArrayBuffer` here and must instantiate a new buffer for the
-    // next value.  `Row.copy()` only copies row cells, it doesn't do deep copy to objects stored
-    // in row cells.
-    override def start(): Unit = currentArray = ArrayBuffer.empty[Any]
+    override def start(): Unit = currentArray.clear()
 
     /** Array element converter */
     private final class ElementConverter(parquetType: Type, catalystType: DataType)
@@ -530,9 +552,10 @@ private[parquet] class ParquetRowConverter(
 
       private var currentElement: Any = _
 
-      private val converter = newConverter(parquetType, catalystType, new ParentContainerUpdater {
-        override def set(value: Any): Unit = currentElement = value
-      })
+      private[this] val converter =
+        newConverter(parquetType, catalystType, new ParentContainerUpdater {
+          override def set(value: Any): Unit = currentElement = value
+        })
 
       override def getConverter(fieldIndex: Int): Converter = converter
 
@@ -549,10 +572,10 @@ private[parquet] class ParquetRowConverter(
       updater: ParentContainerUpdater)
     extends ParquetGroupConverter(updater) {
 
-    private var currentKeys: ArrayBuffer[Any] = _
-    private var currentValues: ArrayBuffer[Any] = _
+    private[this] val currentKeys = ArrayBuffer.empty[Any]
+    private[this] val currentValues = ArrayBuffer.empty[Any]
 
-    private val keyValueConverter = {
+    private[this] val keyValueConverter = {
       val repeatedType = parquetType.getType(0).asGroupType()
       new KeyValueConverter(
         repeatedType.getType(0),
@@ -567,15 +590,15 @@ private[parquet] class ParquetRowConverter(
       // The parquet map may contains null or duplicated map keys. When it happens, the behavior is
       // undefined.
       // TODO (SPARK-26174): disallow it with a config.
-      updater.set(ArrayBasedMapData(currentKeys.toArray, currentValues.toArray))
+      updater.set(
+        new ArrayBasedMapData(
+          new GenericArrayData(currentKeys.toArray),
+          new GenericArrayData(currentValues.toArray)))
     }
 
-    // NOTE: We can't reuse the mutable Map here and must instantiate a new `Map` for the next
-    // value.  `Row.copy()` only copies row cells, it doesn't do deep copy to objects stored in row
-    // cells.
     override def start(): Unit = {
-      currentKeys = ArrayBuffer.empty[Any]
-      currentValues = ArrayBuffer.empty[Any]
+      currentKeys.clear()
+      currentValues.clear()
     }
 
     /** Parquet converter for key-value pairs within the map. */
@@ -590,7 +613,7 @@ private[parquet] class ParquetRowConverter(
 
       private var currentValue: Any = _
 
-      private val converters = Array(
+      private[this] val converters = Array(
         // Converter for keys
         newConverter(parquetKeyType, catalystKeyType, new ParentContainerUpdater {
           override def set(value: Any): Unit = currentKey = value
@@ -616,10 +639,10 @@ private[parquet] class ParquetRowConverter(
   }
 
   private trait RepeatedConverter {
-    private var currentArray: ArrayBuffer[Any] = _
+    private[this] val currentArray = ArrayBuffer.empty[Any]
 
     protected def newArrayUpdater(updater: ParentContainerUpdater) = new ParentContainerUpdater {
-      override def start(): Unit = currentArray = ArrayBuffer.empty[Any]
+      override def start(): Unit = currentArray.clear()
       override def end(): Unit = updater.set(new GenericArrayData(currentArray.toArray))
       override def set(value: Any): Unit = currentArray += value
     }
@@ -637,7 +660,7 @@ private[parquet] class ParquetRowConverter(
 
     val updater: ParentContainerUpdater = newArrayUpdater(parentUpdater)
 
-    private val elementConverter: PrimitiveConverter =
+    private[this] val elementConverter: PrimitiveConverter =
       newConverter(parquetType, catalystType, updater).asPrimitiveConverter()
 
     override def addBoolean(value: Boolean): Unit = elementConverter.addBoolean(value)
@@ -664,7 +687,7 @@ private[parquet] class ParquetRowConverter(
 
     val updater: ParentContainerUpdater = newArrayUpdater(parentUpdater)
 
-    private val elementConverter: GroupConverter =
+    private[this] val elementConverter: GroupConverter =
       newConverter(parquetType, catalystType, updater).asGroupConverter()
 
     override def getConverter(field: Int): Converter = elementConverter.getConverter(field)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index b507ef1c509dd..95343e2872def 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -20,18 +20,18 @@ package org.apache.spark.sql.execution.datasources
 import java.util.Locale
 
 import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
-import org.apache.spark.sql.catalog.v2.expressions.{FieldReference, RewritableTransform}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, RowOrdering}
+import org.apache.spark.sql.catalyst.expressions.{Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, RowOrdering}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.expressions.{FieldReference, RewritableTransform}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.sources.InsertableRelation
-import org.apache.spark.sql.types.{ArrayType, AtomicType, StructField, StructType}
+import org.apache.spark.sql.types.{AtomicType, StructType}
 import org.apache.spark.sql.util.SchemaUtils
 
 /**
@@ -189,14 +189,11 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
         query
       }
 
-      // SPARK-28730: for V1 data source, we use the "LEGACY" as default store assignment policy.
-      // TODO: use ANSI store assignment policy by default in SPARK-28495.
-      val storeAssignmentPolicy = conf.storeAssignmentPolicy.getOrElse(StoreAssignmentPolicy.LEGACY)
       c.copy(
         tableDesc = existingTable,
         query = Some(TableOutputResolver.resolveOutputColumns(
           tableDesc.qualifiedName, existingTable.schema.toAttributes, newQuery,
-          byName = true, conf, storeAssignmentPolicy)))
+          byName = true, conf)))
 
     // Here we normalize partition, bucket and sort column names, w.r.t. the case sensitivity
     // config, and do various checks:
@@ -377,19 +374,19 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
 }
 
 /**
- * Preprocess the [[InsertIntoTable]] plan. Throws exception if the number of columns mismatch, or
- * specified partition columns are different from the existing partition columns in the target
+ * Preprocess the [[InsertIntoStatement]] plan. Throws exception if the number of columns mismatch,
+ * or specified partition columns are different from the existing partition columns in the target
  * table. It also does data type casting and field renaming, to make sure that the columns to be
  * inserted have the correct data type and fields have the correct names.
  */
 case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
   private def preprocess(
-      insert: InsertIntoTable,
+      insert: InsertIntoStatement,
       tblName: String,
-      partColNames: Seq[String]): InsertIntoTable = {
+      partColNames: Seq[String]): InsertIntoStatement = {
 
     val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec(
-      insert.partition, partColNames, tblName, conf.resolver)
+      insert.partitionSpec, partColNames, tblName, conf.resolver)
 
     val staticPartCols = normalizedPartSpec.filter(_._2.isDefined).keySet
     val expectedColumns = insert.table.output.filterNot(a => staticPartCols.contains(a.name))
@@ -402,11 +399,8 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
           s"including ${staticPartCols.size} partition column(s) having constant value(s).")
     }
 
-    // SPARK-28730: for V1 data source, we use the "LEGACY" as default store assignment policy.
-    // TODO: use ANSI store assignment policy by default in SPARK-28495.
-    val storeAssignmentPolicy = conf.storeAssignmentPolicy.getOrElse(StoreAssignmentPolicy.LEGACY)
     val newQuery = TableOutputResolver.resolveOutputColumns(
-      tblName, expectedColumns, insert.query, byName = false, conf, storeAssignmentPolicy)
+      tblName, expectedColumns, insert.query, byName = false, conf)
     if (normalizedPartSpec.nonEmpty) {
       if (normalizedPartSpec.size != partColNames.length) {
         throw new AnalysisException(
@@ -417,16 +411,16 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
            """.stripMargin)
       }
 
-      insert.copy(query = newQuery, partition = normalizedPartSpec)
+      insert.copy(query = newQuery, partitionSpec = normalizedPartSpec)
     } else {
       // All partition columns are dynamic because the InsertIntoTable command does
       // not explicitly specify partitioning columns.
-      insert.copy(query = newQuery, partition = partColNames.map(_ -> None).toMap)
+      insert.copy(query = newQuery, partitionSpec = partColNames.map(_ -> None).toMap)
     }
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoTable(table, _, query, _, _) if table.resolved && query.resolved =>
+    case i @ InsertIntoStatement(table, _, query, _, _) if table.resolved && query.resolved =>
       table match {
         case relation: HiveTableRelation =>
           val metadata = relation.tableMeta
@@ -503,7 +497,7 @@ object PreWriteCheck extends (LogicalPlan => Unit) {
 
   def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
-      case InsertIntoTable(l @ LogicalRelation(relation, _, _, _), partition, query, _, _) =>
+      case InsertIntoStatement(l @ LogicalRelation(relation, _, _, _), partition, query, _, _) =>
         // Get all input data source relations of the query.
         val srcRelations = query.collect {
           case LogicalRelation(src, _, _, _) => src
@@ -525,7 +519,7 @@ object PreWriteCheck extends (LogicalPlan => Unit) {
           case _ => failAnalysis(s"$relation does not allow insertion.")
         }
 
-      case InsertIntoTable(t, _, _, _, _)
+      case InsertIntoStatement(t, _, _, _, _)
         if !t.isInstanceOf[LeafNode] ||
           t.isInstanceOf[Range] ||
           t.isInstanceOf[OneRowRelation] ||
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOutputWriter.scala
index faf6e573105f2..2b1b81f60ceb4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOutputWriter.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.sql.execution.datasources.text
 
-import java.io.OutputStream
-
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.TaskAttemptContext
 
@@ -32,23 +30,17 @@ class TextOutputWriter(
     context: TaskAttemptContext)
   extends OutputWriter {
 
-  private var outputStream: Option[OutputStream] = None
+  private val writer = CodecStreams.createOutputStream(context, new Path(path))
 
   override def write(row: InternalRow): Unit = {
-    val os = outputStream.getOrElse {
-      val newStream = CodecStreams.createOutputStream(context, new Path(path))
-      outputStream = Some(newStream)
-      newStream
-    }
-
     if (!row.isNullAt(0)) {
       val utf8string = row.getUTF8String(0)
-      utf8string.writeTo(os)
+      utf8string.writeTo(writer)
     }
-    os.write(lineSeparator)
+    writer.write(lineSeparator)
   }
 
   override def close(): Unit = {
-    outputStream.foreach(_.close())
+    writer.close()
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterNamespaceSetPropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterNamespaceSetPropertiesExec.scala
new file mode 100644
index 0000000000000..1eebe4cdb6a86
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterNamespaceSetPropertiesExec.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{NamespaceChange, SupportsNamespaces}
+
+/**
+ * Physical plan node for setting properties of namespace.
+ */
+case class AlterNamespaceSetPropertiesExec(
+    catalog: SupportsNamespaces,
+    namespace: Seq[String],
+    props: Map[String, String]) extends V2CommandExec {
+  override protected def run(): Seq[InternalRow] = {
+    val changes = props.map{ case (k, v) =>
+      NamespaceChange.setProperty(k, v)
+    }.toSeq
+    catalog.alterNamespace(namespace.toArray, changes: _*)
+    Seq.empty
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala
index a3fa82b12e938..8b2930cca841d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala
@@ -18,11 +18,9 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.SparkException
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog, TableChange}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.execution.LeafExecNode
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, TableChange}
 
 /**
  * Physical plan node for altering a table.
@@ -30,11 +28,11 @@ import org.apache.spark.sql.execution.LeafExecNode
 case class AlterTableExec(
     catalog: TableCatalog,
     ident: Identifier,
-    changes: Seq[TableChange]) extends LeafExecNode {
+    changes: Seq[TableChange]) extends V2CommandExec {
 
   override def output: Seq[Attribute] = Seq.empty
 
-  override protected def doExecute(): RDD[InternalRow] = {
+  override protected def run(): Seq[InternalRow] = {
     try {
       catalog.alterTable(ident, changes: _*)
     } catch {
@@ -42,6 +40,6 @@ case class AlterTableExec(
         throw new SparkException(s"Unsupported table change: ${e.getMessage}", e)
     }
 
-    sqlContext.sparkContext.parallelize(Seq.empty, 1)
+    Seq.empty
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
index 0f98d9486bbbf..e4e7887017a1d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan}
 
 /**
  * Physical plan node for scanning a batch of data from a data source v2.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
index f54ff608a53e3..dc95d157e40fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.connector.read.{InputPartition, Scan}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, Offset}
 import org.apache.spark.sql.execution.streaming.continuous._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, Offset}
 
 /**
  * Physical plan node for scanning data from a streaming data source with continuous mode.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala
new file mode 100644
index 0000000000000..d5b81d13a7cc4
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.JavaConverters.mapAsJavaMapConverter
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces
+import org.apache.spark.util.Utils
+
+/**
+ * Physical plan node for creating a namespace.
+ */
+case class CreateNamespaceExec(
+    catalog: SupportsNamespaces,
+    namespace: Seq[String],
+    ifNotExists: Boolean,
+    private var properties: Map[String, String])
+    extends V2CommandExec {
+  override protected def run(): Seq[InternalRow] = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
+
+    val ns = namespace.toArray
+    if (!catalog.namespaceExists(ns)) {
+      try {
+        val ownership =
+          Map(PROP_OWNER -> Utils.getCurrentUserName())
+        catalog.createNamespace(ns, (properties ++ ownership).asJava)
+      } catch {
+        case _: NamespaceAlreadyExistsException if ifNotExists =>
+          logWarning(s"Namespace ${namespace.quoted} was created concurrently. Ignoring.")
+      }
+    } else if (!ifNotExists) {
+      throw new NamespaceAlreadyExistsException(ns)
+    }
+
+    Seq.empty
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
index f35758bf08c67..511cd8a9a438f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
@@ -19,13 +19,11 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.execution.LeafExecNode
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
 case class CreateTableExec(
@@ -34,10 +32,10 @@ case class CreateTableExec(
     tableSchema: StructType,
     partitioning: Seq[Transform],
     tableProperties: Map[String, String],
-    ignoreIfExists: Boolean) extends LeafExecNode {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+    ignoreIfExists: Boolean) extends V2CommandExec {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
-  override protected def doExecute(): RDD[InternalRow] = {
+  override protected def run(): Seq[InternalRow] = {
     if (!catalog.tableExists(identifier)) {
       try {
         catalog.createTable(identifier, tableSchema, partitioning.toArray, tableProperties.asJava)
@@ -49,7 +47,7 @@ case class CreateTableExec(
       throw new TableAlreadyExistsException(identifier)
     }
 
-    sqlContext.sparkContext.parallelize(Seq.empty, 1)
+    Seq.empty
   }
 
   override def output: Seq[Attribute] = Seq.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala
index 33079d5912506..9211ec25525fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expression}
 import org.apache.spark.sql.catalyst.plans.physical
-import org.apache.spark.sql.sources.v2.reader.partitioning.{ClusteredDistribution, Partitioning}
+import org.apache.spark.sql.connector.read.partitioning.{ClusteredDistribution, Partitioning}
 
 /**
  * An adapter from public data source partitioning to catalyst internal `Partitioning`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
index f62f7349d1da7..63403b9577237 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
@@ -17,10 +17,15 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import scala.language.existentials
+
 import org.apache.spark._
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.vectorized.ColumnarBatch
 
 class DataSourceRDDPartition(val index: Int, val inputPartition: InputPartition)
   extends Partition with Serializable
@@ -47,31 +52,16 @@ class DataSourceRDD(
 
   override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
     val inputPartition = castPartition(split).inputPartition
-    val reader: PartitionReader[_] = if (columnarReads) {
-      partitionReaderFactory.createColumnarReader(inputPartition)
+    val (iter, reader) = if (columnarReads) {
+      val batchReader = partitionReaderFactory.createColumnarReader(inputPartition)
+      val iter = new MetricsBatchIterator(new PartitionIterator[ColumnarBatch](batchReader))
+      (iter, batchReader)
     } else {
-      partitionReaderFactory.createReader(inputPartition)
+      val rowReader = partitionReaderFactory.createReader(inputPartition)
+      val iter = new MetricsRowIterator(new PartitionIterator[InternalRow](rowReader))
+      (iter, rowReader)
     }
-
     context.addTaskCompletionListener[Unit](_ => reader.close())
-    val iter = new Iterator[Any] {
-      private[this] var valuePrepared = false
-
-      override def hasNext: Boolean = {
-        if (!valuePrepared) {
-          valuePrepared = reader.next()
-        }
-        valuePrepared
-      }
-
-      override def next(): Any = {
-        if (!hasNext) {
-          throw new java.util.NoSuchElementException("End of stream")
-        }
-        valuePrepared = false
-        reader.get()
-      }
-    }
     // TODO: SPARK-25083 remove the type erasure hack in data source scan
     new InterruptibleIterator(context, iter.asInstanceOf[Iterator[InternalRow]])
   }
@@ -80,3 +70,68 @@ class DataSourceRDD(
     castPartition(split).inputPartition.preferredLocations()
   }
 }
+
+private class PartitionIterator[T](reader: PartitionReader[T]) extends Iterator[T] {
+  private[this] var valuePrepared = false
+
+  override def hasNext: Boolean = {
+    if (!valuePrepared) {
+      valuePrepared = reader.next()
+    }
+    valuePrepared
+  }
+
+  override def next(): T = {
+    if (!hasNext) {
+      throw new java.util.NoSuchElementException("End of stream")
+    }
+    valuePrepared = false
+    reader.get()
+  }
+}
+
+private class MetricsHandler extends Logging with Serializable {
+  private val inputMetrics = TaskContext.get().taskMetrics().inputMetrics
+  private val startingBytesRead = inputMetrics.bytesRead
+  private val getBytesRead = SparkHadoopUtil.get.getFSBytesReadOnThreadCallback()
+
+  def updateMetrics(numRows: Int, force: Boolean = false): Unit = {
+    inputMetrics.incRecordsRead(numRows)
+    val shouldUpdateBytesRead =
+      inputMetrics.recordsRead % SparkHadoopUtil.UPDATE_INPUT_METRICS_INTERVAL_RECORDS == 0
+    if (shouldUpdateBytesRead || force) {
+      inputMetrics.setBytesRead(startingBytesRead + getBytesRead())
+    }
+  }
+}
+
+private abstract class MetricsIterator[I](iter: Iterator[I]) extends Iterator[I] {
+  protected val metricsHandler = new MetricsHandler
+
+  override def hasNext: Boolean = {
+    if (iter.hasNext) {
+      true
+    } else {
+      metricsHandler.updateMetrics(0, force = true)
+      false
+    }
+  }
+}
+
+private class MetricsRowIterator(
+    iter: Iterator[InternalRow]) extends MetricsIterator[InternalRow](iter) {
+  override def next(): InternalRow = {
+    val item = iter.next
+    metricsHandler.updateMetrics(1)
+    item
+  }
+}
+
+private class MetricsBatchIterator(
+    iter: Iterator[ColumnarBatch]) extends MetricsIterator[ColumnarBatch](iter) {
+  override def next(): ColumnarBatch = {
+    val batch: ColumnarBatch = iter.next
+    metricsHandler.updateMetrics(batch.numRows)
+    batch
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
index 74fc5432ea82c..211f61279ddd5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
@@ -23,9 +23,9 @@ import org.apache.spark.sql.catalyst.expressions.AttributeMap
 import org.apache.spark.sql.catalyst.plans.physical
 import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan, SupportsReportPartitioning}
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReaderFactory, Scan, SupportsReportPartitioning}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 7cad305aefeb8..8f4e2d256c714 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -17,182 +17,134 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import java.util.UUID
-
 import scala.collection.JavaConverters._
-import scala.collection.mutable
 
-import org.apache.spark.sql.{AnalysisException, Strategy}
-import org.apache.spark.sql.catalog.v2.StagingTableCatalog
-import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, PredicateHelper, SubqueryExpression}
+import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedTable}
+import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, AppendData, CreateTableAsSelect, CreateV2Table, DeleteFromTable, DescribeTable, DropTable, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Repartition, ReplaceTable, ReplaceTableAsSelect, ShowTables}
-import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, TableCapability, TableCatalog, TableChange}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
+import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, ProjectExec, RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
-import org.apache.spark.sql.sources
-import org.apache.spark.sql.sources.v2.TableCapability
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
-import org.apache.spark.sql.sources.v2.writer.V1WriteBuilder
+import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-object DataSourceV2Strategy extends Strategy with PredicateHelper {
-
-  /**
-   * Pushes down filters to the data source reader
-   *
-   * @return pushed filter and post-scan filters.
-   */
-  private def pushFilters(
-      scanBuilder: ScanBuilder,
-      filters: Seq[Expression]): (Seq[Expression], Seq[Expression]) = {
-    scanBuilder match {
-      case r: SupportsPushDownFilters =>
-        // A map from translated data source leaf node filters to original catalyst filter
-        // expressions. For a `And`/`Or` predicate, it is possible that the predicate is partially
-        // pushed down. This map can be used to construct a catalyst filter expression from the
-        // input filter, or a superset(partial push down filter) of the input filter.
-        val translatedFilterToExpr = mutable.HashMap.empty[sources.Filter, Expression]
-        val translatedFilters = mutable.ArrayBuffer.empty[sources.Filter]
-        // Catalyst filter expression that can't be translated to data source filters.
-        val untranslatableExprs = mutable.ArrayBuffer.empty[Expression]
-
-        for (filterExpr <- filters) {
-          val translated =
-            DataSourceStrategy.translateFilterWithMapping(filterExpr, Some(translatedFilterToExpr))
-          if (translated.isEmpty) {
-            untranslatableExprs += filterExpr
-          } else {
-            translatedFilters += translated.get
-          }
-        }
-
-        // Data source filters that need to be evaluated again after scanning. which means
-        // the data source cannot guarantee the rows returned can pass these filters.
-        // As a result we must return it so Spark can plan an extra filter operator.
-        val postScanFilters = r.pushFilters(translatedFilters.toArray).map { filter =>
-          DataSourceStrategy.rebuildExpressionFromFilter(filter, translatedFilterToExpr)
-        }
-        // The filters which are marked as pushed to this data source
-        val pushedFilters = r.pushedFilters().map { filter =>
-          DataSourceStrategy.rebuildExpressionFromFilter(filter, translatedFilterToExpr)
-        }
-        (pushedFilters, untranslatableExprs ++ postScanFilters)
-
-      case _ => (Nil, filters)
-    }
-  }
-
-  /**
-   * Applies column pruning to the data source, w.r.t. the references of the given expressions.
-   *
-   * @return the created `ScanConfig`(since column pruning is the last step of operator pushdown),
-   *         and new output attributes after column pruning.
-   */
-  // TODO: nested column pruning.
-  private def pruneColumns(
-      scanBuilder: ScanBuilder,
-      relation: DataSourceV2Relation,
-      exprs: Seq[Expression]): (Scan, Seq[AttributeReference]) = {
-    scanBuilder match {
-      case r: SupportsPushDownRequiredColumns =>
-        val requiredColumns = AttributeSet(exprs.flatMap(_.references))
-        val neededOutput = relation.output.filter(requiredColumns.contains)
-        if (neededOutput != relation.output) {
-          r.pruneColumns(neededOutput.toStructType)
-          val scan = r.build()
-          val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap
-          scan -> scan.readSchema().toAttributes.map {
-            // We have to keep the attribute id during transformation.
-            a => a.withExprId(nameToAttr(a.name).exprId)
-          }
-        } else {
-          r.build() -> relation.output
-        }
+class DataSourceV2Strategy(session: SparkSession) extends Strategy with PredicateHelper {
 
-      case _ => scanBuilder.build() -> relation.output
+  import DataSourceV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  private def withProjectAndFilter(
+      project: Seq[NamedExpression],
+      filters: Seq[Expression],
+      scan: LeafExecNode,
+      needsUnsafeConversion: Boolean): SparkPlan = {
+    val filterCondition = filters.reduceLeftOption(And)
+    val withFilter = filterCondition.map(FilterExec(_, scan)).getOrElse(scan)
+
+    if (withFilter.output != project || needsUnsafeConversion) {
+      ProjectExec(project, withFilter)
+    } else {
+      withFilter
     }
   }
 
-  import DataSourceV2Implicits._
-
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-    case PhysicalOperation(project, filters, relation: DataSourceV2Relation) =>
-      val scanBuilder = relation.newScanBuilder()
-
-      val (withSubquery, withoutSubquery) = filters.partition(SubqueryExpression.hasSubquery)
-      val normalizedFilters = DataSourceStrategy.normalizeFilters(
-        withoutSubquery, relation.output)
-
-      // `pushedFilters` will be pushed down and evaluated in the underlying data sources.
-      // `postScanFilters` need to be evaluated after the scan.
-      // `postScanFilters` and `pushedFilters` can overlap, e.g. the parquet row group filter.
-      val (pushedFilters, postScanFiltersWithoutSubquery) =
-        pushFilters(scanBuilder, normalizedFilters)
-      val postScanFilters = postScanFiltersWithoutSubquery ++ withSubquery
-      val (scan, output) = pruneColumns(scanBuilder, relation, project ++ postScanFilters)
-      logInfo(
-        s"""
-           |Pushing operators to ${relation.name}
-           |Pushed Filters: ${pushedFilters.mkString(", ")}
-           |Post-Scan Filters: ${postScanFilters.mkString(",")}
-           |Output: ${output.mkString(", ")}
-         """.stripMargin)
-
-      val batchExec = BatchScanExec(output, scan)
-
-      val filterCondition = postScanFilters.reduceLeftOption(And)
-      val withFilter = filterCondition.map(FilterExec(_, batchExec)).getOrElse(batchExec)
-
-      val withProjection = if (withFilter.output != project || !batchExec.supportsColumnar) {
-        ProjectExec(project, withFilter)
-      } else {
-        withFilter
+    case PhysicalOperation(project, filters,
+        relation @ DataSourceV2ScanRelation(_, V1ScanWrapper(scan, translated, pushed), output)) =>
+      val v1Relation = scan.toV1TableScan[BaseRelation with TableScan](session.sqlContext)
+      if (v1Relation.schema != scan.readSchema()) {
+        throw new IllegalArgumentException(
+          "The fallback v1 relation reports inconsistent schema:\n" +
+            "Schema of v2 scan:     " + scan.readSchema() + "\n" +
+            "Schema of v1 relation: " + v1Relation.schema)
       }
-
-      withProjection :: Nil
+      val rdd = v1Relation.buildScan()
+      val unsafeRowRDD = DataSourceStrategy.toCatalystRDD(v1Relation, output, rdd)
+      val originalOutputNames = relation.table.schema().map(_.name)
+      val requiredColumnsIndex = output.map(_.name).map(originalOutputNames.indexOf)
+      val dsScan = RowDataSourceScanExec(
+        output,
+        requiredColumnsIndex,
+        translated.toSet,
+        pushed.toSet,
+        unsafeRowRDD,
+        v1Relation,
+        tableIdentifier = None)
+      withProjectAndFilter(project, filters, dsScan, needsUnsafeConversion = false) :: Nil
+
+    case PhysicalOperation(project, filters, relation: DataSourceV2ScanRelation) =>
+      // projection and filters were already pushed down in the optimizer.
+      // this uses PhysicalOperation to get the projection and ensure that if the batch scan does
+      // not support columnar, a projection is added to convert the rows to UnsafeRow.
+      val batchExec = BatchScanExec(relation.output, relation.scan)
+      withProjectAndFilter(project, filters, batchExec, !batchExec.supportsColumnar) :: Nil
 
     case r: StreamingDataSourceV2Relation if r.startOffset.isDefined && r.endOffset.isDefined =>
       val microBatchStream = r.stream.asInstanceOf[MicroBatchStream]
-      // ensure there is a projection, which will produce unsafe rows required by some operators
-      ProjectExec(r.output,
-        MicroBatchScanExec(
-          r.output, r.scan, microBatchStream, r.startOffset.get, r.endOffset.get)) :: Nil
+      val scanExec = MicroBatchScanExec(
+        r.output, r.scan, microBatchStream, r.startOffset.get, r.endOffset.get)
+
+      val withProjection = if (scanExec.supportsColumnar) {
+        scanExec
+      } else {
+        // Add a Project here to make sure we produce unsafe rows.
+        ProjectExec(r.output, scanExec)
+      }
+
+      withProjection :: Nil
 
     case r: StreamingDataSourceV2Relation if r.startOffset.isDefined && r.endOffset.isEmpty =>
       val continuousStream = r.stream.asInstanceOf[ContinuousStream]
-      // ensure there is a projection, which will produce unsafe rows required by some operators
-      ProjectExec(r.output,
-        ContinuousScanExec(
-          r.output, r.scan, continuousStream, r.startOffset.get)) :: Nil
+      val scanExec = ContinuousScanExec(r.output, r.scan, continuousStream, r.startOffset.get)
+
+      val withProjection = if (scanExec.supportsColumnar) {
+        scanExec
+      } else {
+        // Add a Project here to make sure we produce unsafe rows.
+        ProjectExec(r.output, scanExec)
+      }
+
+      withProjection :: Nil
 
     case WriteToDataSourceV2(writer, query) =>
       WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil
 
     case CreateV2Table(catalog, ident, schema, parts, props, ifNotExists) =>
-      CreateTableExec(catalog, ident, schema, parts, props, ifNotExists) :: Nil
+      val propsWithOwner = CatalogV2Util.withDefaultOwnership(props)
+      CreateTableExec(catalog, ident, schema, parts, propsWithOwner, ifNotExists) :: Nil
 
     case CreateTableAsSelect(catalog, ident, parts, query, props, options, ifNotExists) =>
+      val propsWithOwner = CatalogV2Util.withDefaultOwnership(props)
       val writeOptions = new CaseInsensitiveStringMap(options.asJava)
       catalog match {
         case staging: StagingTableCatalog =>
-          AtomicCreateTableAsSelectExec(
-            staging, ident, parts, query, planLater(query), props, writeOptions, ifNotExists) :: Nil
+          AtomicCreateTableAsSelectExec(staging, ident, parts, query, planLater(query),
+            propsWithOwner, writeOptions, ifNotExists) :: Nil
         case _ =>
-          CreateTableAsSelectExec(
-            catalog, ident, parts, query, planLater(query), props, writeOptions, ifNotExists) :: Nil
+          CreateTableAsSelectExec(catalog, ident, parts, query, planLater(query),
+            propsWithOwner, writeOptions, ifNotExists) :: Nil
       }
 
+    case RefreshTable(catalog, ident) =>
+      RefreshTableExec(catalog, ident) :: Nil
+
     case ReplaceTable(catalog, ident, schema, parts, props, orCreate) =>
+      val propsWithOwner = CatalogV2Util.withDefaultOwnership(props)
       catalog match {
         case staging: StagingTableCatalog =>
-          AtomicReplaceTableExec(staging, ident, schema, parts, props, orCreate = orCreate) :: Nil
+          AtomicReplaceTableExec(
+            staging, ident, schema, parts, propsWithOwner, orCreate = orCreate) :: Nil
         case _ =>
-          ReplaceTableExec(catalog, ident, schema, parts, props, orCreate = orCreate) :: Nil
+          ReplaceTableExec(
+            catalog, ident, schema, parts, propsWithOwner, orCreate = orCreate) :: Nil
       }
 
     case ReplaceTableAsSelect(catalog, ident, parts, query, props, options, orCreate) =>
+      val propsWithOwner = CatalogV2Util.withDefaultOwnership(props)
       val writeOptions = new CaseInsensitiveStringMap(options.asJava)
       catalog match {
         case staging: StagingTableCatalog =>
@@ -202,7 +154,7 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
             parts,
             query,
             planLater(query),
-            props,
+            propsWithOwner,
             writeOptions,
             orCreate = orCreate) :: Nil
         case _ =>
@@ -212,20 +164,20 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
             parts,
             query,
             planLater(query),
-            props,
+            propsWithOwner,
             writeOptions,
             orCreate = orCreate) :: Nil
       }
 
-    case AppendData(r: DataSourceV2Relation, query, _) =>
+    case AppendData(r: DataSourceV2Relation, query, writeOptions, _) =>
       r.table.asWritable match {
         case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          AppendDataExecV1(v1, r.options, query) :: Nil
+          AppendDataExecV1(v1, writeOptions.asOptions, query) :: Nil
         case v2 =>
-          AppendDataExec(v2, r.options, planLater(query)) :: Nil
+          AppendDataExec(v2, writeOptions.asOptions, planLater(query)) :: Nil
       }
 
-    case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, _) =>
+    case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, writeOptions, _) =>
       // fail if any filter cannot be converted. correctness depends on removing all matching data.
       val filters = splitConjunctivePredicates(deleteExpr).map {
         filter => DataSourceStrategy.translateFilter(deleteExpr).getOrElse(
@@ -233,26 +185,34 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
       }.toArray
       r.table.asWritable match {
         case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          OverwriteByExpressionExecV1(v1, filters, r.options, query) :: Nil
+          OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, query) :: Nil
         case v2 =>
-          OverwriteByExpressionExec(v2, filters, r.options, planLater(query)) :: Nil
+          OverwriteByExpressionExec(v2, filters, writeOptions.asOptions, planLater(query)) :: Nil
       }
 
-    case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, _) =>
-      OverwritePartitionsDynamicExec(r.table.asWritable, r.options, planLater(query)) :: Nil
+    case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _) =>
+      OverwritePartitionsDynamicExec(
+        r.table.asWritable, writeOptions.asOptions, planLater(query)) :: Nil
 
-    case DeleteFromTable(r: DataSourceV2Relation, condition) =>
-      if (SubqueryExpression.hasSubquery(condition)) {
-        throw new AnalysisException(
-          s"Delete by condition with subquery is not supported: $condition")
+    case DeleteFromTable(relation, condition) =>
+      relation match {
+        case DataSourceV2ScanRelation(table, _, output) =>
+          if (condition.exists(SubqueryExpression.hasSubquery)) {
+            throw new AnalysisException(
+              s"Delete by condition with subquery is not supported: $condition")
+          }
+          // fail if any filter cannot be converted.
+          // correctness depends on removing all matching data.
+          val filters = DataSourceStrategy.normalizeExprs(condition.toSeq, output)
+              .flatMap(splitConjunctivePredicates(_).map {
+                f => DataSourceStrategy.translateFilter(f).getOrElse(
+                  throw new AnalysisException(s"Exec update failed:" +
+                      s" cannot translate expression to source filter: $f"))
+              }).toArray
+          DeleteFromTableExec(table.asDeletable, filters) :: Nil
+        case _ =>
+          throw new AnalysisException("DELETE is only supported with v2 tables.")
       }
-      // fail if any filter cannot be converted. correctness depends on removing all matching data.
-      val filters = splitConjunctivePredicates(condition).map {
-        f => DataSourceStrategy.translateFilter(f).getOrElse(
-          throw new AnalysisException(s"Exec delete failed:" +
-              s" cannot translate expression to source filter: $f"))
-      }.toArray
-      DeleteFromTableExec(r.table.asDeletable, filters) :: Nil
 
     case WriteToContinuousDataSource(writer, query) =>
       WriteToContinuousDataSourceExec(writer, planLater(query)) :: Nil
@@ -269,7 +229,13 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
         Nil
       }
 
-    case desc @ DescribeTable(r: DataSourceV2Relation, isExtended) =>
+    case desc @ DescribeNamespace(ResolvedNamespace(catalog, ns), extended) =>
+      DescribeNamespaceExec(desc.output, catalog.asNamespaceCatalog, ns, extended) :: Nil
+
+    case desc @ DescribeRelation(r: ResolvedTable, partitionSpec, isExtended) =>
+      if (partitionSpec.nonEmpty) {
+        throw new AnalysisException("DESCRIBE does not support partition for v2 tables.")
+      }
       DescribeTableExec(desc.output, r.table, isExtended) :: Nil
 
     case DropTable(catalog, ident, ifExists) =>
@@ -278,8 +244,48 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
     case AlterTable(catalog, ident, _, changes) =>
       AlterTableExec(catalog, ident, changes) :: Nil
 
-    case r : ShowTables =>
-      ShowTablesExec(r.output, r.catalog, r.namespace, r.pattern) :: Nil
+    case RenameTable(catalog, oldIdent, newIdent) =>
+      RenameTableExec(catalog, oldIdent, newIdent) :: Nil
+
+    case AlterNamespaceSetProperties(ResolvedNamespace(catalog, ns), properties) =>
+      AlterNamespaceSetPropertiesExec(catalog.asNamespaceCatalog, ns, properties) :: Nil
+
+    case AlterNamespaceSetLocation(ResolvedNamespace(catalog, ns), location) =>
+      AlterNamespaceSetPropertiesExec(
+        catalog.asNamespaceCatalog,
+        ns,
+        Map(SupportsNamespaces.PROP_LOCATION -> location)) :: Nil
+
+    case CommentOnNamespace(ResolvedNamespace(catalog, ns), comment) =>
+      AlterNamespaceSetPropertiesExec(
+        catalog.asNamespaceCatalog,
+        ns,
+        Map(SupportsNamespaces.PROP_COMMENT -> comment)) :: Nil
+
+    case CommentOnTable(ResolvedTable(catalog, identifier, _), comment) =>
+      val changes = TableChange.setProperty(TableCatalog.PROP_COMMENT, comment)
+      AlterTableExec(catalog, identifier, Seq(changes)) :: Nil
+
+    case CreateNamespace(catalog, namespace, ifNotExists, properties) =>
+      CreateNamespaceExec(catalog, namespace, ifNotExists, properties) :: Nil
+
+    case DropNamespace(ResolvedNamespace(catalog, ns), ifExists, cascade) =>
+      DropNamespaceExec(catalog, ns, ifExists, cascade) :: Nil
+
+    case r @ ShowNamespaces(ResolvedNamespace(catalog, ns), pattern) =>
+      ShowNamespacesExec(r.output, catalog.asNamespaceCatalog, ns, pattern) :: Nil
+
+    case r @ ShowTables(ResolvedNamespace(catalog, ns), pattern) =>
+      ShowTablesExec(r.output, catalog.asTableCatalog, ns, pattern) :: Nil
+
+    case SetCatalogAndNamespace(catalogManager, catalogName, ns) =>
+      SetCatalogAndNamespaceExec(catalogManager, catalogName, ns) :: Nil
+
+    case r: ShowCurrentNamespace =>
+      ShowCurrentNamespaceExec(r.output, r.catalogManager) :: Nil
+
+    case r @ ShowTableProperties(rt: ResolvedTable, propertyKey) =>
+      ShowTablePropertiesExec(r.output, rt.table, propertyKey) :: Nil
 
     case _ => Nil
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
index 30897d86f8179..b50b8295463eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
@@ -20,8 +20,10 @@ package org.apache.spark.sql.execution.datasources.v2
 import java.util.regex.Pattern
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.connector.catalog.{SessionConfigSupport, Table, TableProvider}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.{SessionConfigSupport, TableProvider}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 private[sql] object DataSourceV2Utils extends Logging {
 
@@ -57,4 +59,28 @@ private[sql] object DataSourceV2Utils extends Logging {
       case _ => Map.empty
     }
   }
+
+  def getTableFromProvider(
+      provider: TableProvider,
+      options: CaseInsensitiveStringMap,
+      userSpecifiedSchema: Option[StructType]): Table = {
+    userSpecifiedSchema match {
+      case Some(schema) =>
+        if (provider.supportsExternalMetadata()) {
+          provider.getTable(
+            schema,
+            provider.inferPartitioning(options),
+            options.asCaseSensitiveMap())
+        } else {
+          throw new UnsupportedOperationException(
+            s"${provider.getClass.getSimpleName} source does not support user-specified schema.")
+        }
+
+      case None =>
+        provider.getTable(
+          provider.inferSchema(options),
+          provider.inferPartitioning(options),
+          options.asCaseSensitiveMap())
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
index a5840571fff23..afebbfd01db22 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
@@ -17,21 +17,18 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.execution.LeafExecNode
+import org.apache.spark.sql.connector.catalog.SupportsDelete
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.SupportsDelete
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class DeleteFromTableExec(
     table: SupportsDelete,
-    condition: Array[Filter]) extends LeafExecNode {
+    condition: Array[Filter]) extends V2CommandExec {
 
-  override protected def doExecute(): RDD[InternalRow] = {
+  override protected def run(): Seq[InternalRow] = {
     table.deleteWhere(condition)
-    sparkContext.emptyRDD
+    Seq.empty
   }
 
   override def output: Seq[Attribute] = Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala
new file mode 100644
index 0000000000000..64b98fb83b8fa
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Physical plan node for describing a namespace.
+ */
+case class DescribeNamespaceExec(
+    output: Seq[Attribute],
+    catalog: SupportsNamespaces,
+    namespace: Seq[String],
+    isExtended: Boolean) extends V2CommandExec {
+  private val encoder = RowEncoder(StructType.fromAttributes(output)).resolveAndBind()
+
+  override protected def run(): Seq[InternalRow] = {
+    val rows = new ArrayBuffer[InternalRow]()
+    val ns = namespace.toArray
+    val metadata = catalog.loadNamespaceMetadata(ns)
+
+    rows += toCatalystRow("Namespace Name", ns.last)
+
+    CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.foreach { p =>
+      rows ++= Option(metadata.get(p)).map(toCatalystRow(p.capitalize, _))
+    }
+
+    if (isExtended) {
+      val properties = metadata.asScala -- CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES
+      if (properties.nonEmpty) {
+        rows += toCatalystRow("Properties", properties.toSeq.mkString("(", ",", ")"))
+      }
+    }
+    rows
+  }
+
+  private def toCatalystRow(strs: String*): InternalRow = {
+    encoder.toRow(new GenericRowWithSchema(strs.toArray, schema)).copy()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 640bdfb8cba54..9c280206c548e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -20,30 +20,47 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
-import org.apache.spark.sql.execution.LeafExecNode
-import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, TableCatalog}
 import org.apache.spark.sql.types.StructType
 
 case class DescribeTableExec(
     output: Seq[Attribute],
     table: Table,
-    isExtended: Boolean) extends LeafExecNode {
+    isExtended: Boolean) extends V2CommandExec {
 
   private val encoder = RowEncoder(StructType.fromAttributes(output)).resolveAndBind()
 
-  override protected def doExecute(): RDD[InternalRow] = {
+  override protected def run(): Seq[InternalRow] = {
     val rows = new ArrayBuffer[InternalRow]()
     addSchema(rows)
+    addPartitioning(rows)
 
     if (isExtended) {
-      addPartitioning(rows)
-      addProperties(rows)
+      addTableDetails(rows)
     }
-    sparkContext.parallelize(rows)
+    rows
+  }
+
+  private def addTableDetails(rows: ArrayBuffer[InternalRow]): Unit = {
+    rows += emptyRow()
+    rows += toCatalystRow("# Detailed Table Information", "", "")
+    rows += toCatalystRow("Name", table.name(), "")
+
+    CatalogV2Util.TABLE_RESERVED_PROPERTIES.foreach(propKey => {
+      if (table.properties.containsKey(propKey)) {
+        rows += toCatalystRow(propKey.capitalize, table.properties.get(propKey), "")
+      }
+    })
+    val properties =
+      table.properties.asScala.toList
+        .filter(kv => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(kv._1))
+        .sortBy(_._1).map {
+        case (key, value) => key + "=" + value
+      }.mkString("[", ",", "]")
+    rows += toCatalystRow("Table Properties", properties, "")
   }
 
   private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = {
@@ -55,8 +72,7 @@ case class DescribeTableExec(
 
   private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = {
     rows += emptyRow()
-    rows += toCatalystRow(" Partitioning", "", "")
-    rows += toCatalystRow("--------------", "", "")
+    rows += toCatalystRow("# Partitioning", "", "")
     if (table.partitioning.isEmpty) {
       rows += toCatalystRow("Not partitioned", "", "")
     } else {
@@ -66,15 +82,6 @@ case class DescribeTableExec(
     }
   }
 
-  private def addProperties(rows: ArrayBuffer[InternalRow]): Unit = {
-    rows += emptyRow()
-    rows += toCatalystRow(" Table Property", " Value", "")
-    rows += toCatalystRow("----------------", "-------", "")
-    rows ++= table.properties.asScala.toList.sortBy(_._1).map {
-      case (key, value) => toCatalystRow(key, value, "")
-    }
-  }
-
   private def emptyRow(): InternalRow = toCatalystRow("", "", "")
 
   private def toCatalystRow(strs: String*): InternalRow = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
new file mode 100644
index 0000000000000..f7b4317ad65e2
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, SupportsNamespaces}
+
+/**
+ * Physical plan node for dropping a namespace.
+ */
+case class DropNamespaceExec(
+    catalog: CatalogPlugin,
+    namespace: Seq[String],
+    ifExists: Boolean,
+    cascade: Boolean)
+  extends V2CommandExec {
+  override protected def run(): Seq[InternalRow] = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+    val nsCatalog = catalog.asNamespaceCatalog
+    val ns = namespace.toArray
+    if (nsCatalog.namespaceExists(ns)) {
+      // The default behavior of `SupportsNamespace.dropNamespace()` is cascading,
+      // so make sure the namespace to drop is empty.
+      if (!cascade) {
+        if (catalog.asTableCatalog.listTables(ns).nonEmpty
+          || nsCatalog.listNamespaces(ns).nonEmpty) {
+          throw new SparkException(
+            s"Cannot drop a non-empty namespace: ${namespace.quoted}. " +
+              "Use CASCADE option to drop a non-empty namespace.")
+        }
+      }
+
+      if (!nsCatalog.dropNamespace(ns)) {
+        throw new SparkException(s"Failed to drop a namespace: ${namespace.quoted}.")
+      }
+    } else if (!ifExists) {
+      throw new NoSuchNamespaceException(ns)
+    }
+
+    Seq.empty
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
index d325e0205f9d8..967613f77577c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
@@ -17,27 +17,25 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.execution.LeafExecNode
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 
 /**
  * Physical plan node for dropping a table.
  */
 case class DropTableExec(catalog: TableCatalog, ident: Identifier, ifExists: Boolean)
-  extends LeafExecNode {
+  extends V2CommandExec {
 
-  override def doExecute(): RDD[InternalRow] = {
+  override def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
       catalog.dropTable(ident)
     } else if (!ifExists) {
       throw new NoSuchTableException(ident)
     }
 
-    sqlContext.sparkContext.parallelize(Seq.empty, 1)
+    Seq.empty
   }
 
   override def output: Seq[Attribute] = Seq.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala
index b177d15e1fe32..711bd41e1db24 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.io.IOException
 
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.connector.read.PartitionReader
 
 /**
  * A [[PartitionReader]] with empty output.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
index db31927fa73bb..266c834909363 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
@@ -20,10 +20,9 @@ import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, PhysicalWriteInfo, WriterCommitMessage}
 import org.apache.spark.sql.execution.datasources.{WriteJobDescription, WriteTaskResult}
 import org.apache.spark.sql.execution.datasources.FileFormatWriter.processStats
-import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.util.SerializableConfiguration
 
 class FileBatchWrite(
     job: Job,
@@ -45,7 +44,7 @@ class FileBatchWrite(
     committer.abortJob(job)
   }
 
-  override def createBatchWriterFactory(): DataWriterFactory = {
+  override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = {
     FileWriterFactory(description, committer)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index ac786bbaac6d7..30a964d7e643f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -16,13 +16,17 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
+import java.util
+
 import com.fasterxml.jackson.databind.ObjectMapper
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2.TableProvider
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
@@ -59,4 +63,40 @@ trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
     val fs = hdfsPath.getFileSystem(sparkSession.sessionState.newHadoopConf())
     hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory).toString
   }
+
+  // TODO: To reduce code diff of SPARK-29665, we create stub implementations for file source v2, so
+  //       that we don't need to touch all the file source v2 classes. We should remove the stub
+  //       implementation and directly implement the TableProvider APIs.
+  protected def getTable(options: CaseInsensitiveStringMap): Table
+  protected def getTable(options: CaseInsensitiveStringMap, schema: StructType): Table = {
+    throw new UnsupportedOperationException("user-specified schema")
+  }
+
+  override def supportsExternalMetadata(): Boolean = true
+
+  private var t: Table = null
+
+  override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
+    if (t == null) t = getTable(options)
+    t.schema()
+  }
+
+  // TODO: implement a light-weight partition inference which only looks at the path of one leaf
+  //       file and return partition column names. For now the partition inference happens in
+  //       `getTable`, because we don't know the user-specified schema here.
+  override def inferPartitioning(options: CaseInsensitiveStringMap): Array[Transform] = {
+    Array.empty
+  }
+
+  override def getTable(
+      schema: StructType,
+      partitioning: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    // If the table is already loaded during schema inference, return it directly.
+    if (t != null) {
+      t
+    } else {
+      getTable(new CaseInsensitiveStringMap(properties), schema)
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index 836eae88e4da7..8f51d454b1434 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -22,10 +22,10 @@ import org.apache.parquet.io.ParquetDecodingException
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.InputFileBlockHolder
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
 
 class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
   extends PartitionReader[T] with Logging {
@@ -42,9 +42,8 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
           currentReader = getNextReader()
         } catch {
           case e: FileNotFoundException if ignoreMissingFiles =>
-            logWarning(s"Skipped missing file: $currentReader", e)
+            logWarning(s"Skipped missing file.", e)
             currentReader = null
-            return false
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles =>
             throw new FileNotFoundException(
@@ -54,10 +53,8 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
                 "recreating the Dataset/DataFrame involved.")
           case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
             logWarning(
-              s"Skipped the rest of the content in the corrupted file: $currentReader", e)
+              s"Skipped the rest of the content in the corrupted file.", e)
             currentReader = null
-            InputFileBlockHolder.unset()
-            return false
         }
       } else {
         return false
@@ -67,7 +64,7 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
     // In PartitionReader.next(), the current reader proceeds to next record.
     // It might throw RuntimeException/IOException and Spark should handle these exceptions.
     val hasNext = try {
-      currentReader.next()
+      currentReader != null && currentReader.next()
     } catch {
       case e: SchemaColumnConvertNotSupportedException =>
         val message = "Parquet column cannot be converted in " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
index 5a19412c90334..c1d91736a8b8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
@@ -17,9 +17,8 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, PartitioningUtils}
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 0438bd0430da1..6e05aa56f4f72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -24,21 +24,16 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.IO_WARNING_LARGEFILETHRESHOLD
 import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionSet}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.connector.read.{Batch, InputPartition, Scan, Statistics, SupportsReportStatistics}
 import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
-abstract class FileScan(
-    sparkSession: SparkSession,
-    fileIndex: PartitioningAwareFileIndex,
-    readDataSchema: StructType,
-    readPartitionSchema: StructType)
-  extends Scan
-  with Batch with SupportsReportStatistics with Logging {
+trait FileScan extends Scan with Batch with SupportsReportStatistics with Logging {
   /**
    * Returns whether a file with `path` could be split or not.
    */
@@ -46,6 +41,36 @@ abstract class FileScan(
     false
   }
 
+  def sparkSession: SparkSession
+
+  def fileIndex: PartitioningAwareFileIndex
+
+  /**
+   * Returns the required data schema
+   */
+  def readDataSchema: StructType
+
+  /**
+   * Returns the required partition schema
+   */
+  def readPartitionSchema: StructType
+
+  /**
+   * Returns the filters that can be use for partition pruning
+   */
+  def partitionFilters: Seq[Expression]
+
+  /**
+   * Returns the data filters that can be use for file listing
+   */
+  def dataFilters: Seq[Expression]
+
+  /**
+   * Create a new `FileScan` instance from the current one
+   * with different `partitionFilters` and `dataFilters`
+   */
+  def withFilters(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): FileScan
+
   /**
    * If a file with `path` is unsplittable, return the unsplittable reason,
    * otherwise return `None`.
@@ -55,11 +80,26 @@ abstract class FileScan(
     "undefined"
   }
 
+  protected def seqToString(seq: Seq[Any]): String = seq.mkString("[", ", ", "]")
+
+  override def equals(obj: Any): Boolean = obj match {
+    case f: FileScan =>
+      fileIndex == f.fileIndex && readSchema == f.readSchema
+        ExpressionSet(partitionFilters) == ExpressionSet(f.partitionFilters) &&
+        ExpressionSet(dataFilters) == ExpressionSet(f.dataFilters)
+
+    case _ => false
+  }
+
+  override def hashCode(): Int = getClass.hashCode()
+
   override def description(): String = {
     val locationDesc =
       fileIndex.getClass.getSimpleName + fileIndex.rootPaths.mkString("[", ", ", "]")
     val metadata: Map[String, String] = Map(
       "ReadSchema" -> readDataSchema.catalogString,
+      "PartitionFilters" -> seqToString(partitionFilters),
+      "DataFilters" -> seqToString(dataFilters),
       "Location" -> locationDesc)
     val metadataStr = metadata.toSeq.sorted.map {
       case (key, value) =>
@@ -71,7 +111,7 @@ abstract class FileScan(
   }
 
   protected def partitions: Seq[FilePartition] = {
-    val selectedPartitions = fileIndex.listFiles(Seq.empty, Seq.empty)
+    val selectedPartitions = fileIndex.listFiles(partitionFilters, dataFilters)
     val maxSplitBytes = FilePartition.maxSplitBytes(sparkSession, selectedPartitions)
     val partitionAttributes = fileIndex.partitionSchema.toAttributes
     val attributeMap = partitionAttributes.map(a => normalizeName(a.name) -> a).toMap
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
index 3b236be90e6ff..97874e8f4932e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
@@ -17,8 +17,8 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownRequiredColumns}
 import org.apache.spark.sql.execution.datasources.{PartitioningAwareFileIndex, PartitioningUtils}
-import org.apache.spark.sql.sources.v2.reader.{ScanBuilder, SupportsPushDownRequiredColumns}
 import org.apache.spark.sql.types.StructType
 
 abstract class FileScanBuilder(
@@ -27,15 +27,21 @@ abstract class FileScanBuilder(
     dataSchema: StructType) extends ScanBuilder with SupportsPushDownRequiredColumns {
   private val partitionSchema = fileIndex.partitionSchema
   private val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+  protected val supportsNestedSchemaPruning = false
   protected var requiredSchema = StructType(dataSchema.fields ++ partitionSchema.fields)
 
   override def pruneColumns(requiredSchema: StructType): Unit = {
+    // [SPARK-30107] While `requiredSchema` might have pruned nested columns,
+    // the actual data schema of this scan is determined in `readDataSchema`.
+    // File formats that don't support nested schema pruning,
+    // use `requiredSchema` as a reference and prune only top-level columns.
     this.requiredSchema = requiredSchema
   }
 
   protected def readDataSchema(): StructType = {
     val requiredNameSet = createRequiredNameSet()
-    val fields = dataSchema.fields.filter { field =>
+    val schema = if (supportsNestedSchemaPruning) requiredSchema else dataSchema
+    val fields = schema.fields.filter { field =>
       val colName = PartitioningUtils.getColName(field, isCaseSensitive)
       requiredNameSet.contains(colName) && !partitionNameSet.contains(colName)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 4483f5b1dd30c..59dc3ae56bf25 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -23,11 +23,11 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.{FileStatus, Path}
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex}
-import org.apache.spark.sql.sources.v2.{SupportsRead, SupportsWrite, Table, TableCapability}
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.util.SchemaUtils
@@ -39,7 +39,7 @@ abstract class FileTable(
     userSpecifiedSchema: Option[StructType])
   extends Table with SupportsRead with SupportsWrite {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   lazy val fileIndex: PartitioningAwareFileIndex = {
     val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
@@ -102,7 +102,7 @@ abstract class FileTable(
     StructType(fields)
   }
 
-  override def partitioning: Array[Transform] = fileIndex.partitionSchema.asTransforms
+  override def partitioning: Array[Transform] = fileIndex.partitionSchema.names.toSeq.asTransforms
 
   override def properties: util.Map[String, String] = options.asCaseSensitiveMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index eacc4cb3ac4a9..d519832c57501 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -30,34 +30,24 @@ import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.connector.write.{BatchWrite, LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, DataSource, OutputWriterFactory, WriteJobDescription}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, WriteBuilder}
 import org.apache.spark.sql.types.{DataType, StructType}
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.util.SerializableConfiguration
 
 abstract class FileWriteBuilder(
-    options: CaseInsensitiveStringMap,
     paths: Seq[String],
     formatName: String,
-    supportsDataType: DataType => Boolean) extends WriteBuilder {
-  private var schema: StructType = _
-  private var queryId: String = _
+    supportsDataType: DataType => Boolean,
+    info: LogicalWriteInfo) extends WriteBuilder {
+  private val schema = info.schema()
+  private val queryId = info.queryId()
+  private val options = info.options()
   private var mode: SaveMode = _
 
-  override def withInputDataSchema(schema: StructType): WriteBuilder = {
-    this.schema = schema
-    this
-  }
-
-  override def withQueryId(queryId: String): WriteBuilder = {
-    this.queryId = queryId
-    this
-  }
-
   def mode(mode: SaveMode): WriteBuilder = {
     this.mode = mode
     this
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
index eb573b317142a..1f25fed3000b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
@@ -23,9 +23,8 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 
 import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory}
 import org.apache.spark.sql.execution.datasources.{DynamicPartitionDataWriter, SingleDirectoryDataWriter, WriteJobDescription}
-import org.apache.spark.sql.sources.v2.writer.{DataWriter, DataWriterFactory}
-import org.apache.spark.util.SerializableConfiguration
 
 case class FileWriterFactory (
     description: WriteJobDescription,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
index a9b0f5bce1b09..bca28e3cacb62 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReaderFactory, Scan}
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 
 /**
  * Physical plan node for scanning a micro-batch of data from a data source.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala
index f9dfcf448a3ea..0d9aa5b42a6ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.connector.read.PartitionReader
 
 class PartitionReaderFromIterator[InternalRow](
     iter: Iterator[InternalRow]) extends PartitionReader[InternalRow] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala
index 072465b56857d..7bca98e54efa7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.JoinedRow
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
index baa8cb6b24659..8e524a986aa06 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.hadoop.mapreduce.RecordReader
 
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.connector.read.PartitionReader
 
 class PartitionRecordReader[T](
     private[this] var rowReader: RecordReader[_, T]) extends PartitionReader[T] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
new file mode 100644
index 0000000000000..33338b06565c9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, NamedExpression, PredicateHelper, SchemaPruning}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns}
+import org.apache.spark.sql.execution.datasources.DataSourceStrategy
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources
+import org.apache.spark.sql.types.StructType
+
+object PushDownUtils extends PredicateHelper {
+  /**
+   * Pushes down filters to the data source reader
+   *
+   * @return pushed filter and post-scan filters.
+   */
+  def pushFilters(
+      scanBuilder: ScanBuilder,
+      filters: Seq[Expression]): (Seq[sources.Filter], Seq[Expression]) = {
+    scanBuilder match {
+      case r: SupportsPushDownFilters =>
+        // A map from translated data source leaf node filters to original catalyst filter
+        // expressions. For a `And`/`Or` predicate, it is possible that the predicate is partially
+        // pushed down. This map can be used to construct a catalyst filter expression from the
+        // input filter, or a superset(partial push down filter) of the input filter.
+        val translatedFilterToExpr = mutable.HashMap.empty[sources.Filter, Expression]
+        val translatedFilters = mutable.ArrayBuffer.empty[sources.Filter]
+        // Catalyst filter expression that can't be translated to data source filters.
+        val untranslatableExprs = mutable.ArrayBuffer.empty[Expression]
+
+        for (filterExpr <- filters) {
+          val translated =
+            DataSourceStrategy.translateFilterWithMapping(filterExpr, Some(translatedFilterToExpr))
+          if (translated.isEmpty) {
+            untranslatableExprs += filterExpr
+          } else {
+            translatedFilters += translated.get
+          }
+        }
+
+        // Data source filters that need to be evaluated again after scanning. which means
+        // the data source cannot guarantee the rows returned can pass these filters.
+        // As a result we must return it so Spark can plan an extra filter operator.
+        val postScanFilters = r.pushFilters(translatedFilters.toArray).map { filter =>
+          DataSourceStrategy.rebuildExpressionFromFilter(filter, translatedFilterToExpr)
+        }
+        (r.pushedFilters(), untranslatableExprs ++ postScanFilters)
+
+      case _ => (Nil, filters)
+    }
+  }
+
+  /**
+   * Applies column pruning to the data source, w.r.t. the references of the given expressions.
+   *
+   * @return the `Scan` instance (since column pruning is the last step of operator pushdown),
+   *         and new output attributes after column pruning.
+   */
+  def pruneColumns(
+      scanBuilder: ScanBuilder,
+      relation: DataSourceV2Relation,
+      projects: Seq[NamedExpression],
+      filters: Seq[Expression]): (Scan, Seq[AttributeReference]) = {
+    scanBuilder match {
+      case r: SupportsPushDownRequiredColumns if SQLConf.get.nestedSchemaPruningEnabled =>
+        val rootFields = SchemaPruning.identifyRootFields(projects, filters)
+        val prunedSchema = if (rootFields.nonEmpty) {
+          SchemaPruning.pruneDataSchema(relation.schema, rootFields)
+        } else {
+          new StructType()
+        }
+        r.pruneColumns(prunedSchema)
+        val scan = r.build()
+        scan -> toOutputAttrs(scan.readSchema(), relation)
+
+      case r: SupportsPushDownRequiredColumns =>
+        val exprs = projects ++ filters
+        val requiredColumns = AttributeSet(exprs.flatMap(_.references))
+        val neededOutput = relation.output.filter(requiredColumns.contains)
+        if (neededOutput != relation.output) {
+          r.pruneColumns(neededOutput.toStructType)
+          val scan = r.build()
+          scan -> toOutputAttrs(scan.readSchema(), relation)
+        } else {
+          r.build() -> relation.output
+        }
+
+      case _ => scanBuilder.build() -> relation.output
+    }
+  }
+
+  private def toOutputAttrs(
+      schema: StructType,
+      relation: DataSourceV2Relation): Seq[AttributeReference] = {
+    val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap
+    schema.toAttributes.map {
+      // we have to keep the attribute id during transformation
+      a => a.withExprId(nameToAttr(a.name).exprId)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DropTableStatement.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
similarity index 70%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DropTableStatement.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
index d41e8a5010257..2a19ff304a9e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DropTableStatement.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
@@ -15,20 +15,19 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.plans.logical.sql
+package org.apache.spark.sql.execution.datasources.v2
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 
-/**
- * A DROP TABLE statement, as parsed from SQL.
- */
-case class DropTableStatement(
-    tableName: Seq[String],
-    ifExists: Boolean,
-    purge: Boolean) extends ParsedStatement {
+case class RefreshTableExec(
+    catalog: TableCatalog,
+    ident: Identifier) extends V2CommandExec {
+  override protected def run(): Seq[InternalRow] = {
+    catalog.invalidateTable(ident)
+    Seq.empty
+  }
 
   override def output: Seq[Attribute] = Seq.empty
-
-  override def children: Seq[LogicalPlan] = Seq.empty
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DropViewStatement.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala
similarity index 64%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DropViewStatement.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala
index 523158788e834..a650607d5f129 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DropViewStatement.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala
@@ -15,19 +15,26 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.plans.logical.sql
+package org.apache.spark.sql.execution.datasources.v2
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 
 /**
- * A DROP VIEW statement, as parsed from SQL.
+ * Physical plan node for renaming a table.
  */
-case class DropViewStatement(
-    viewName: Seq[String],
-    ifExists: Boolean) extends ParsedStatement {
+case class RenameTableExec(
+    catalog: TableCatalog,
+    oldIdent: Identifier,
+    newIdent: Identifier) extends V2CommandExec {
 
   override def output: Seq[Attribute] = Seq.empty
 
-  override def children: Seq[LogicalPlan] = Seq.empty
+  override protected def run(): Seq[InternalRow] = {
+    catalog.invalidateTable(oldIdent)
+    catalog.renameTable(oldIdent, newIdent)
+
+    Seq.empty
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
index 35d86ee2abbbb..1f3bcf2e3fe57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
@@ -19,14 +19,11 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, StagingTableCatalog, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.execution.LeafExecNode
-import org.apache.spark.sql.sources.v2.StagedTable
+import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, TableCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
@@ -36,16 +33,16 @@ case class ReplaceTableExec(
     tableSchema: StructType,
     partitioning: Seq[Transform],
     tableProperties: Map[String, String],
-    orCreate: Boolean) extends LeafExecNode {
+    orCreate: Boolean) extends V2CommandExec {
 
-  override protected def doExecute(): RDD[InternalRow] = {
+  override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
       catalog.dropTable(ident)
     } else if (!orCreate) {
       throw new CannotReplaceMissingTableException(ident)
     }
     catalog.createTable(ident, tableSchema, partitioning.toArray, tableProperties.asJava)
-    sqlContext.sparkContext.parallelize(Seq.empty, 1)
+    Seq.empty
   }
 
   override def output: Seq[Attribute] = Seq.empty
@@ -57,9 +54,9 @@ case class AtomicReplaceTableExec(
     tableSchema: StructType,
     partitioning: Seq[Transform],
     tableProperties: Map[String, String],
-    orCreate: Boolean) extends LeafExecNode {
+    orCreate: Boolean) extends V2CommandExec {
 
-  override protected def doExecute(): RDD[InternalRow] = {
+  override protected def run(): Seq[InternalRow] = {
     val staged = if (orCreate) {
       catalog.stageCreateOrReplace(
         identifier, tableSchema, partitioning.toArray, tableProperties.asJava)
@@ -75,8 +72,7 @@ case class AtomicReplaceTableExec(
       throw new CannotReplaceMissingTableException(identifier)
     }
     commitOrAbortStagedChanges(staged)
-
-    sqlContext.sparkContext.parallelize(Seq.empty, 1)
+    Seq.empty
   }
 
   override def output: Seq[Attribute] = Seq.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetCatalogAndNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetCatalogAndNamespaceExec.scala
new file mode 100644
index 0000000000000..9e6f00e0923ea
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetCatalogAndNamespaceExec.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.CatalogManager
+
+/**
+ * Physical plan node for setting the current catalog and/or namespace.
+ */
+case class SetCatalogAndNamespaceExec(
+    catalogManager: CatalogManager,
+    catalogName: Option[String],
+    namespace: Option[Seq[String]])
+    extends V2CommandExec {
+  override protected def run(): Seq[InternalRow] = {
+    // The catalog is updated first because CatalogManager resets the current namespace
+    // when the current catalog is set.
+    catalogName.map(catalogManager.setCurrentCatalog)
+    namespace.map(ns => catalogManager.setCurrentNamespace(ns.toArray))
+
+    Seq.empty
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala
new file mode 100644
index 0000000000000..42b80a15080a6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
+
+/**
+ * Physical plan node for showing current catalog/namespace.
+ */
+case class ShowCurrentNamespaceExec(
+    output: Seq[Attribute],
+    catalogManager: CatalogManager)
+  extends V2CommandExec {
+  override protected def run(): Seq[InternalRow] = {
+    val encoder = RowEncoder(schema).resolveAndBind()
+    Seq(encoder
+      .toRow(new GenericRowWithSchema(
+        Array(catalogManager.currentCatalog.name, catalogManager.currentNamespace.quoted), schema))
+      .copy())
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala
new file mode 100644
index 0000000000000..fe3ab8023db6f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces
+
+/**
+ * Physical plan node for showing namespaces.
+ */
+case class ShowNamespacesExec(
+    output: Seq[Attribute],
+    catalog: SupportsNamespaces,
+    namespace: Seq[String],
+    pattern: Option[String])
+    extends V2CommandExec {
+
+  override protected def run(): Seq[InternalRow] = {
+    val namespaces = if (namespace.nonEmpty) {
+      catalog.listNamespaces(namespace.toArray)
+    } else {
+      catalog.listNamespaces()
+    }
+
+    val rows = new ArrayBuffer[InternalRow]()
+    val encoder = RowEncoder(schema).resolveAndBind()
+
+    namespaces.map(_.quoted).map { ns =>
+      if (pattern.map(StringUtils.filterPattern(Seq(ns), _).nonEmpty).getOrElse(true)) {
+        rows += encoder
+          .toRow(new GenericRowWithSchema(Array(ns), schema))
+          .copy()
+      }
+    }
+
+    rows
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
new file mode 100644
index 0000000000000..7905c35f55de0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.connector.catalog.Table
+
+/**
+ * Physical plan node for showing table properties.
+ */
+case class ShowTablePropertiesExec(
+    output: Seq[Attribute],
+    catalogTable: Table,
+    propertyKey: Option[String]) extends V2CommandExec {
+
+  override protected def run(): Seq[InternalRow] = {
+    import scala.collection.JavaConverters._
+    val encoder = RowEncoder(schema).resolveAndBind()
+
+    val properties = catalogTable.properties.asScala
+    propertyKey match {
+      case Some(p) =>
+        val propValue = properties
+          .getOrElse(p, s"Table ${catalogTable.name} does not have property: $p")
+        Seq(encoder.toRow(new GenericRowWithSchema(Array(p, propValue), schema)).copy())
+      case None =>
+        properties.keys.map(k =>
+          encoder.toRow(new GenericRowWithSchema(Array(k, properties(k)), schema)).copy()).toSeq
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
index c652f28a5e760..995b00871fc2a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
@@ -19,14 +19,12 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.NamespaceHelper
-import org.apache.spark.sql.catalog.v2.TableCatalog
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
 import org.apache.spark.sql.catalyst.util.StringUtils
-import org.apache.spark.sql.execution.LeafExecNode
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
+import org.apache.spark.sql.connector.catalog.TableCatalog
 
 /**
  * Physical plan node for showing tables.
@@ -36,8 +34,8 @@ case class ShowTablesExec(
     catalog: TableCatalog,
     namespace: Seq[String],
     pattern: Option[String])
-    extends LeafExecNode {
-  override protected def doExecute(): RDD[InternalRow] = {
+    extends V2CommandExec {
+  override protected def run(): Seq[InternalRow] = {
     val rows = new ArrayBuffer[InternalRow]()
     val encoder = RowEncoder(schema).resolveAndBind()
 
@@ -53,6 +51,6 @@ case class ShowTablesExec(
       }
     }
 
-    sparkContext.parallelize(rows, 1)
+    rows
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
index 660b6e763e056..509a5f7139cca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
@@ -20,8 +20,9 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2}
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.BooleanType
 
 /**
@@ -32,6 +33,10 @@ object TableCapabilityCheck extends (LogicalPlan => Unit) {
 
   private def failAnalysis(msg: String): Unit = throw new AnalysisException(msg)
 
+  private def supportsBatchWrite(table: Table): Boolean = {
+    table.supportsAny(BATCH_WRITE, V1_BATCH_WRITE)
+  }
+
   override def apply(plan: LogicalPlan): Unit = {
     plan foreach {
       case r: DataSourceV2Relation if !r.table.supports(BATCH_READ) =>
@@ -43,24 +48,23 @@ object TableCapabilityCheck extends (LogicalPlan => Unit) {
 
       // TODO: check STREAMING_WRITE capability. It's not doable now because we don't have a
       //       a logical plan for streaming write.
-
-      case AppendData(r: DataSourceV2Relation, _, _) if !r.table.supports(BATCH_WRITE) =>
+      case AppendData(r: DataSourceV2Relation, _, _, _) if !supportsBatchWrite(r.table) =>
         failAnalysis(s"Table ${r.table.name()} does not support append in batch mode.")
 
-      case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _)
+      case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _, _)
         if !r.table.supports(BATCH_WRITE) || !r.table.supports(OVERWRITE_DYNAMIC) =>
         failAnalysis(s"Table ${r.table.name()} does not support dynamic overwrite in batch mode.")
 
-      case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _) =>
+      case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _, _) =>
         expr match {
           case Literal(true, BooleanType) =>
-            if (!r.table.supports(BATCH_WRITE) ||
-              !r.table.supportsAny(TRUNCATE, OVERWRITE_BY_FILTER)) {
+            if (!supportsBatchWrite(r.table) ||
+                !r.table.supportsAny(TRUNCATE, OVERWRITE_BY_FILTER)) {
               failAnalysis(
                 s"Table ${r.table.name()} does not support truncate in batch mode.")
             }
           case _ =>
-            if (!r.table.supports(BATCH_WRITE) || !r.table.supports(OVERWRITE_BY_FILTER)) {
+            if (!supportsBatchWrite(r.table) || !r.table.supports(OVERWRITE_BY_FILTER)) {
               failAnalysis(s"Table ${r.table.name()} does not support " +
                 "overwrite by filter in batch mode.")
             }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
index 7ddd99a0293b1..1ca3fd42c0597 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
@@ -29,11 +29,7 @@ import org.apache.spark.util.Utils
 
 abstract class TextBasedFileScan(
     sparkSession: SparkSession,
-    fileIndex: PartitioningAwareFileIndex,
-    readDataSchema: StructType,
-    readPartitionSchema: StructType,
-    options: CaseInsensitiveStringMap)
-  extends FileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema) {
+    options: CaseInsensitiveStringMap) extends FileScan {
   @transient private lazy val codecFactory: CompressionCodecFactory = new CompressionCodecFactory(
     sparkSession.sessionState.newHadoopConfWithOptions(options.asScala.toMap))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
index 2f05ff3a7c2e1..f97300025400d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
@@ -19,18 +19,16 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.util.UUID
 
-import scala.collection.JavaConverters._
-
 import org.apache.spark.SparkException
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Dataset, SaveMode}
+import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.SupportsWrite
+import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.sources.{AlwaysTrue, CreatableRelationProvider, Filter, InsertableRelation}
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
-import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.sources.{AlwaysTrue, Filter, InsertableRelation}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
@@ -100,9 +98,12 @@ sealed trait V1FallbackWriters extends SupportsV1Write {
   }
 
   protected def newWriteBuilder(): V1WriteBuilder = {
-    val writeBuilder = table.newWriteBuilder(writeOptions)
-      .withInputDataSchema(plan.schema)
-      .withQueryId(UUID.randomUUID().toString)
+    val info = LogicalWriteInfoImpl(
+      queryId = UUID.randomUUID().toString,
+      schema = plan.schema,
+      options = writeOptions)
+    val writeBuilder = table.newWriteBuilder(info)
+
     writeBuilder.asV1Builder
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
new file mode 100644
index 0000000000000..a1f685d47a346
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.LeafExecNode
+
+/**
+ * A physical operator that executes run() and saves the result to prevent multiple executions.
+ * Any V2 commands that do not require triggering a spark job should extend this class.
+ */
+abstract class V2CommandExec extends LeafExecNode {
+
+  /**
+   * Abstract method that each concrete command needs to implement to compute the result.
+   */
+  protected def run(): Seq[InternalRow]
+
+  /**
+   * The value of this field can be used as the contents of the corresponding RDD generated from
+   * the physical plan of this command.
+   */
+  private lazy val result: Seq[InternalRow] = run()
+
+  /**
+   * The `execute()` method of all the physical command classes should reference `result`
+   * so that the command can be executed eagerly right after the command query is created.
+   */
+  override def executeCollect(): Array[InternalRow] = result.toArray
+
+  override def executeToIterator: Iterator[InternalRow] = result.toIterator
+
+  override def executeTake(limit: Int): Array[InternalRow] = result.take(limit).toArray
+
+  override def executeTail(limit: Int): Array[InternalRow] = result.takeRight(limit).toArray
+
+  protected override def doExecute(): RDD[InternalRow] = {
+    sqlContext.sparkContext.parallelize(result, 1)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
new file mode 100644
index 0000000000000..59089fa6b77e9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, ProjectionOverSchema, SubqueryExpression}
+import org.apache.spark.sql.catalyst.planning.ScanOperation
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.read.{Scan, V1Scan}
+import org.apache.spark.sql.execution.datasources.DataSourceStrategy
+import org.apache.spark.sql.sources
+import org.apache.spark.sql.types.StructType
+
+object V2ScanRelationPushDown extends Rule[LogicalPlan] {
+  import DataSourceV2Implicits._
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
+    case ScanOperation(project, filters, relation: DataSourceV2Relation) =>
+      val scanBuilder = relation.table.asReadable.newScanBuilder(relation.options)
+
+      val normalizedFilters = DataSourceStrategy.normalizeExprs(filters, relation.output)
+      val (normalizedFiltersWithSubquery, normalizedFiltersWithoutSubquery) =
+        normalizedFilters.partition(SubqueryExpression.hasSubquery)
+
+      // `pushedFilters` will be pushed down and evaluated in the underlying data sources.
+      // `postScanFilters` need to be evaluated after the scan.
+      // `postScanFilters` and `pushedFilters` can overlap, e.g. the parquet row group filter.
+      val (pushedFilters, postScanFiltersWithoutSubquery) = PushDownUtils.pushFilters(
+        scanBuilder, normalizedFiltersWithoutSubquery)
+      val postScanFilters = postScanFiltersWithoutSubquery ++ normalizedFiltersWithSubquery
+
+      val normalizedProjects = DataSourceStrategy
+        .normalizeExprs(project, relation.output)
+        .asInstanceOf[Seq[NamedExpression]]
+      val (scan, output) = PushDownUtils.pruneColumns(
+        scanBuilder, relation, normalizedProjects, postScanFilters)
+      logInfo(
+        s"""
+           |Pushing operators to ${relation.name}
+           |Pushed Filters: ${pushedFilters.mkString(", ")}
+           |Post-Scan Filters: ${postScanFilters.mkString(",")}
+           |Output: ${output.mkString(", ")}
+         """.stripMargin)
+
+      val wrappedScan = scan match {
+        case v1: V1Scan =>
+          val translated = filters.flatMap(DataSourceStrategy.translateFilter)
+          V1ScanWrapper(v1, translated, pushedFilters)
+        case _ => scan
+      }
+
+      val scanRelation = DataSourceV2ScanRelation(relation.table, wrappedScan, output)
+
+      val projectionOverSchema = ProjectionOverSchema(output.toStructType)
+      val projectionFunc = (expr: Expression) => expr transformDown {
+        case projectionOverSchema(newExpr) => newExpr
+      }
+
+      val filterCondition = postScanFilters.reduceLeftOption(And)
+      val newFilterCondition = filterCondition.map(projectionFunc)
+      val withFilter = newFilterCondition.map(Filter(_, scanRelation)).getOrElse(scanRelation)
+
+      val withProjection = if (withFilter.output != project) {
+        val newProjects = normalizedProjects
+          .map(projectionFunc)
+          .asInstanceOf[Seq[NamedExpression]]
+        Project(newProjects, withFilter)
+      } else {
+        withFilter
+      }
+
+      withProjection
+  }
+}
+
+// A wrapper for v1 scan to carry the translated filters and the handled ones. This is required by
+// the physical v1 scan node.
+case class V1ScanWrapper(
+    v1Scan: V1Scan,
+    translatedFilters: Seq[sources.Filter],
+    handledFilters: Seq[sources.Filter]) extends Scan {
+  override def readSchema(): StructType = v1Scan.readSchema()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index ebfd7384930fe..cef9b5f675889 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -23,48 +23,39 @@ import java.util
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalog.v2.{Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange}
-import org.apache.spark.sql.catalog.v2.NamespaceChange.{RemoveProperty, SetProperty}
-import org.apache.spark.sql.catalog.v2.expressions.{BucketTransform, FieldReference, IdentityTransform, Transform}
-import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogDatabase, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.NamespaceChange.RemoveProperty
+import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.internal.SessionState
-import org.apache.spark.sql.sources.v2.Table
-import org.apache.spark.sql.sources.v2.internal.V1Table
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A [[TableCatalog]] that translates calls to the v1 SessionCatalog.
  */
-class V2SessionCatalog(sessionState: SessionState) extends TableCatalog with SupportsNamespaces {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+class V2SessionCatalog(catalog: SessionCatalog, conf: SQLConf)
+  extends TableCatalog with SupportsNamespaces {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
   import V2SessionCatalog._
 
-  def this() = {
-    this(SparkSession.active.sessionState)
-  }
-
   override val defaultNamespace: Array[String] = Array("default")
 
-  private lazy val catalog: SessionCatalog = sessionState.catalog
-
-  private var _name: String = _
+  override def name: String = CatalogManager.SESSION_CATALOG_NAME
 
-  override def name: String = _name
-
-  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
-    this._name = name
-  }
+  // This class is instantiated by Spark, so `initialize` method will not be called.
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
 
   override def listTables(namespace: Array[String]): Array[Identifier] = {
     namespace match {
       case Array(db) =>
-        catalog.listTables(db).map(ident => Identifier.of(Array(db), ident.table)).toArray
+        catalog
+          .listTables(db)
+          .map(ident => Identifier.of(Array(ident.database.getOrElse("")), ident.table))
+          .toArray
       case _ =>
         throw new NoSuchNamespaceException(namespace)
     }
@@ -92,9 +83,9 @@ class V2SessionCatalog(sessionState: SessionState) extends TableCatalog with Sup
       properties: util.Map[String, String]): Table = {
 
     val (partitionColumns, maybeBucketSpec) = V2SessionCatalog.convertTransforms(partitions)
-    val provider = properties.getOrDefault("provider", sessionState.conf.defaultDataSourceName)
+    val provider = properties.getOrDefault(TableCatalog.PROP_PROVIDER, conf.defaultDataSourceName)
     val tableProperties = properties.asScala
-    val location = Option(properties.get(LOCATION_TABLE_PROP))
+    val location = Option(properties.get(TableCatalog.PROP_LOCATION))
     val storage = DataSource.buildStorageFormatFromOptions(tableProperties.toMap)
         .copy(locationUri = location.map(CatalogUtils.stringToURI))
     val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
@@ -108,8 +99,8 @@ class V2SessionCatalog(sessionState: SessionState) extends TableCatalog with Sup
       partitionColumnNames = partitionColumns,
       bucketSpec = maybeBucketSpec,
       properties = tableProperties.toMap,
-      tracksPartitionsInCatalog = sessionState.conf.manageFilesourcePartitions,
-      comment = Option(properties.get(COMMENT_TABLE_PROP)))
+      tracksPartitionsInCatalog = conf.manageFilesourcePartitions,
+      comment = Option(properties.get(TableCatalog.PROP_COMMENT)))
 
     try {
       catalog.createTable(tableDesc, ignoreIfExists = false)
@@ -133,9 +124,13 @@ class V2SessionCatalog(sessionState: SessionState) extends TableCatalog with Sup
 
     val properties = CatalogV2Util.applyPropertiesChanges(catalogTable.properties, changes)
     val schema = CatalogV2Util.applySchemaChanges(catalogTable.schema, changes)
+    val comment = properties.get(TableCatalog.PROP_COMMENT)
+    val owner = properties.getOrElse(TableCatalog.PROP_OWNER, catalogTable.owner)
 
     try {
-      catalog.alterTable(catalogTable.copy(properties = properties, schema = schema))
+      catalog.alterTable(
+        catalogTable
+          .copy(properties = properties, schema = schema, owner = owner, comment = comment))
     } catch {
       case _: NoSuchTableException =>
         throw new NoSuchTableException(ident)
@@ -236,7 +231,8 @@ class V2SessionCatalog(sessionState: SessionState) extends TableCatalog with Sup
       case Array(db) =>
         // validate that this catalog's reserved properties are not removed
         changes.foreach {
-          case remove: RemoveProperty if RESERVED_PROPERTIES.contains(remove.property) =>
+          case remove: RemoveProperty
+            if CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.contains(remove.property) =>
             throw new UnsupportedOperationException(
               s"Cannot remove reserved property: ${remove.property}")
           case _ =>
@@ -271,9 +267,6 @@ class V2SessionCatalog(sessionState: SessionState) extends TableCatalog with Sup
 }
 
 private[sql] object V2SessionCatalog {
-  val COMMENT_TABLE_PROP: String = "comment"
-  val LOCATION_TABLE_PROP: String = "location"
-  val RESERVED_PROPERTIES: Set[String] = Set(COMMENT_TABLE_PROP, LOCATION_TABLE_PROP)
 
   /**
    * Convert v2 Transforms to v1 partition columns and an optional bucket spec.
@@ -303,12 +296,13 @@ private[sql] object V2SessionCatalog {
       defaultLocation: Option[URI] = None): CatalogDatabase = {
     CatalogDatabase(
       name = db,
-      description = metadata.getOrDefault(COMMENT_TABLE_PROP, ""),
-      locationUri = Option(metadata.get(LOCATION_TABLE_PROP))
+      description = metadata.getOrDefault(SupportsNamespaces.PROP_COMMENT, ""),
+      locationUri = Option(metadata.get(SupportsNamespaces.PROP_LOCATION))
           .map(CatalogUtils.stringToURI)
           .orElse(defaultLocation)
           .getOrElse(throw new IllegalArgumentException("Missing database location")),
-      properties = metadata.asScala.toMap -- Seq("comment", "location"))
+      properties = metadata.asScala.toMap --
+        Seq(SupportsNamespaces.PROP_COMMENT, SupportsNamespaces.PROP_LOCATION))
   }
 
   private implicit class CatalogDatabaseHelper(catalogDatabase: CatalogDatabase) {
@@ -318,8 +312,8 @@ private[sql] object V2SessionCatalog {
       catalogDatabase.properties.foreach {
         case (key, value) => metadata.put(key, value)
       }
-      metadata.put(LOCATION_TABLE_PROP, catalogDatabase.locationUri.toString)
-      metadata.put(COMMENT_TABLE_PROP, catalogDatabase.description)
+      metadata.put(SupportsNamespaces.PROP_LOCATION, catalogDatabase.locationUri.toString)
+      metadata.put(SupportsNamespaces.PROP_COMMENT, catalogDatabase.description)
 
       metadata.asJava
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 0131d72ebc97a..e360a9e656a16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -26,16 +26,15 @@ import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.executor.CommitDeniedException
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, StagingTableCatalog, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, TableCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.sources.{AlwaysTrue, Filter}
-import org.apache.spark.sql.sources.v2.{StagedTable, SupportsWrite}
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriterFactory, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{LongAccumulator, Utils}
 
@@ -69,7 +68,7 @@ case class CreateTableAsSelectExec(
     writeOptions: CaseInsensitiveStringMap,
     ifNotExists: Boolean) extends V2TableWriteExec with SupportsV1Write {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.IdentifierHelper
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 
   override protected def doExecute(): RDD[InternalRow] = {
     if (catalog.tableExists(ident)) {
@@ -85,9 +84,11 @@ case class CreateTableAsSelectExec(
       catalog.createTable(
         ident, schema, partitioning.toArray, properties.asJava) match {
         case table: SupportsWrite =>
-          val writeBuilder = table.newWriteBuilder(writeOptions)
-            .withInputDataSchema(schema)
-            .withQueryId(UUID.randomUUID().toString)
+          val info = LogicalWriteInfoImpl(
+            queryId = UUID.randomUUID().toString,
+            schema,
+            writeOptions)
+          val writeBuilder = table.newWriteBuilder(info)
 
           writeBuilder match {
             case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write())
@@ -158,7 +159,7 @@ case class ReplaceTableAsSelectExec(
     writeOptions: CaseInsensitiveStringMap,
     orCreate: Boolean) extends V2TableWriteExec with SupportsV1Write {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.IdentifierHelper
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 
   override protected def doExecute(): RDD[InternalRow] = {
     // Note that this operation is potentially unsafe, but these are the strict semantics of
@@ -180,9 +181,11 @@ case class ReplaceTableAsSelectExec(
     Utils.tryWithSafeFinallyAndFailureCallbacks({
       createdTable match {
         case table: SupportsWrite =>
-          val writeBuilder = table.newWriteBuilder(writeOptions)
-            .withInputDataSchema(schema)
-            .withQueryId(UUID.randomUUID().toString)
+          val info = LogicalWriteInfoImpl(
+            queryId = UUID.randomUUID().toString,
+            schema,
+            writeOptions)
+          val writeBuilder = table.newWriteBuilder(info)
 
           writeBuilder match {
             case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write())
@@ -336,9 +339,11 @@ trait BatchWriteHelper {
   def writeOptions: CaseInsensitiveStringMap
 
   def newWriteBuilder(): WriteBuilder = {
-    table.newWriteBuilder(writeOptions)
-      .withInputDataSchema(query.schema)
-      .withQueryId(UUID.randomUUID().toString)
+    val info = LogicalWriteInfoImpl(
+      queryId = UUID.randomUUID().toString,
+      query.schema,
+      writeOptions)
+    table.newWriteBuilder(info)
   }
 }
 
@@ -354,17 +359,20 @@ trait V2TableWriteExec extends UnaryExecNode {
   override def output: Seq[Attribute] = Nil
 
   protected def writeWithV2(batchWrite: BatchWrite): RDD[InternalRow] = {
-    val writerFactory = batchWrite.createBatchWriterFactory()
-    val useCommitCoordinator = batchWrite.useCommitCoordinator
-    val rdd = query.execute()
-    // SPARK-23271 If we are attempting to write a zero partition rdd, create a dummy single
-    // partition rdd to make sure we at least set up one write task to write the metadata.
-    val rddWithNonEmptyPartitions = if (rdd.partitions.length == 0) {
-      sparkContext.parallelize(Array.empty[InternalRow], 1)
-    } else {
-      rdd
+    val rdd: RDD[InternalRow] = {
+      val tempRdd = query.execute()
+      // SPARK-23271 If we are attempting to write a zero partition rdd, create a dummy single
+      // partition rdd to make sure we at least set up one write task to write the metadata.
+      if (tempRdd.partitions.length == 0) {
+        sparkContext.parallelize(Array.empty[InternalRow], 1)
+      } else {
+        tempRdd
+      }
     }
-    val messages = new Array[WriterCommitMessage](rddWithNonEmptyPartitions.partitions.length)
+    val writerFactory = batchWrite.createBatchWriterFactory(
+      PhysicalWriteInfoImpl(rdd.getNumPartitions))
+    val useCommitCoordinator = batchWrite.useCommitCoordinator
+    val messages = new Array[WriterCommitMessage](rdd.partitions.length)
     val totalNumRowsAccumulator = new LongAccumulator()
 
     logInfo(s"Start processing data source write support: $batchWrite. " +
@@ -372,10 +380,10 @@ trait V2TableWriteExec extends UnaryExecNode {
 
     try {
       sparkContext.runJob(
-        rddWithNonEmptyPartitions,
+        rdd,
         (context: TaskContext, iter: Iterator[InternalRow]) =>
           DataWritingSparkTask.run(writerFactory, context, iter, useCommitCoordinator),
-        rddWithNonEmptyPartitions.partitions.indices,
+        rdd.partitions.indices,
         (index, result: DataWritingSparkTaskResult) => {
           val commitMessage = result.writerCommitMessage
           messages(index) = commitMessage
@@ -465,12 +473,14 @@ object DataWritingSparkTask extends Logging {
       dataWriter.abort()
       logError(s"Aborted commit for partition $partId (task $taskId, attempt $attemptId, " +
             s"stage $stageId.$stageAttempt)")
+    }, finallyBlock = {
+      dataWriter.close()
     })
   }
 }
 
 private[v2] trait AtomicTableWriteExec extends V2TableWriteExec with SupportsV1Write {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.IdentifierHelper
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 
   protected def writeToStagedTable(
       stagedTable: StagedTable,
@@ -479,9 +489,11 @@ private[v2] trait AtomicTableWriteExec extends V2TableWriteExec with SupportsV1W
     Utils.tryWithSafeFinallyAndFailureCallbacks({
       stagedTable match {
         case table: SupportsWrite =>
-          val writeBuilder = table.newWriteBuilder(writeOptions)
-            .withInputDataSchema(query.schema)
-            .withQueryId(UUID.randomUUID().toString)
+          val info = LogicalWriteInfoImpl(
+            queryId = UUID.randomUUID().toString,
+            query.schema,
+            writeOptions)
+          val writeBuilder = table.newWriteBuilder(info)
 
           val writtenRows = writeBuilder match {
             case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
index 045f41e670ad3..1f99d4282f6da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.csv
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
index 828594ffb10af..31d31bd43f453 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
@@ -19,11 +19,12 @@ package org.apache.spark.sql.execution.datasources.v2.csv
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser}
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
@@ -43,7 +44,8 @@ case class CSVPartitionReaderFactory(
     dataSchema: StructType,
     readDataSchema: StructType,
     partitionSchema: StructType,
-    parsedOptions: CSVOptions) extends FilePartitionReaderFactory {
+    parsedOptions: CSVOptions,
+    filters: Seq[Filter]) extends FilePartitionReaderFactory {
   private val columnPruning = sqlConf.csvColumnPruning
 
   override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
@@ -55,7 +57,8 @@ case class CSVPartitionReaderFactory(
     val parser = new UnivocityParser(
       actualDataSchema,
       actualReadDataSchema,
-      parsedOptions)
+      parsedOptions,
+      filters)
     val schema = if (columnPruning) actualReadDataSchema else actualDataSchema
     val isStartOfFile = file.start == 0
     val headerChecker = new CSVHeaderChecker(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
index 3cbcfca01a9c3..4f510322815ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
@@ -22,12 +22,13 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.csv.CSVOptions
-import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExprUtils}
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
-import org.apache.spark.sql.execution.datasources.csv.{CSVDataSource, MultiLineCSVDataSource}
-import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
+import org.apache.spark.sql.execution.datasources.v2.{FileScan, TextBasedFileScan}
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
 
@@ -37,8 +38,11 @@ case class CSVScan(
     dataSchema: StructType,
     readDataSchema: StructType,
     readPartitionSchema: StructType,
-    options: CaseInsensitiveStringMap)
-  extends TextBasedFileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema, options) {
+    options: CaseInsensitiveStringMap,
+    pushedFilters: Array[Filter],
+    partitionFilters: Seq[Expression] = Seq.empty,
+    dataFilters: Seq[Expression] = Seq.empty)
+  extends TextBasedFileScan(sparkSession, options) {
 
   private lazy val parsedOptions: CSVOptions = new CSVOptions(
     options.asScala.toMap,
@@ -85,6 +89,22 @@ case class CSVScan(
     // The partition values are already truncated in `FileScan.partitions`.
     // We should use `readPartitionSchema` as the partition schema here.
     CSVPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
-      dataSchema, readDataSchema, readPartitionSchema, parsedOptions)
+      dataSchema, readDataSchema, readPartitionSchema, parsedOptions, pushedFilters)
+  }
+
+  override def withFilters(
+      partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): FileScan =
+    this.copy(partitionFilters = partitionFilters, dataFilters = dataFilters)
+
+  override def equals(obj: Any): Boolean = obj match {
+    case c: CSVScan => super.equals(c) && dataSchema == c.dataSchema && options == c.options &&
+      equivalentFilters(pushedFilters, c.pushedFilters)
+    case _ => false
+  }
+
+  override def hashCode(): Int = super.hashCode()
+
+  override def description(): String = {
+    super.description() + ", PushedFilters: " + pushedFilters.mkString("[", ", ", "]")
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
index 28c5b3d81a3d5..81a234e254000 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.sql.execution.datasources.v2.csv
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.csv.CSVFilters
+import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
-import org.apache.spark.sql.sources.v2.reader.Scan
+import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -30,9 +32,27 @@ case class CSVScanBuilder(
     schema: StructType,
     dataSchema: StructType,
     options: CaseInsensitiveStringMap)
-  extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
+  extends FileScanBuilder(sparkSession, fileIndex, dataSchema) with SupportsPushDownFilters {
 
   override def build(): Scan = {
-    CSVScan(sparkSession, fileIndex, dataSchema, readDataSchema(), readPartitionSchema(), options)
+    CSVScan(
+      sparkSession,
+      fileIndex,
+      dataSchema,
+      readDataSchema(),
+      readPartitionSchema(),
+      options,
+      pushedFilters())
   }
+
+  private var _pushedFilters: Array[Filter] = Array.empty
+
+  override def pushFilters(filters: Array[Filter]): Array[Filter] = {
+    if (sparkSession.sessionState.conf.csvFilterPushDown) {
+      _pushedFilters = CSVFilters.pushedFilters(filters, dataSchema)
+    }
+    filters
+  }
+
+  override def pushedFilters(): Array[Filter] = _pushedFilters
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
index 8170661a70172..3cafe37b743f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
@@ -22,10 +22,10 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.csv.CSVOptions
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types.{AtomicType, DataType, StructType, UserDefinedType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -49,8 +49,8 @@ case class CSVTable(
     CSVDataSource(parsedOptions).inferSchema(sparkSession, files, parsedOptions)
   }
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
-    new CSVWriteBuilder(options, paths, formatName, supportsDataType)
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+    new CSVWriteBuilder(paths, formatName, supportsDataType, info)
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
index 92b47e4354807..bfbb1831aa63d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWriteBuilder.scala
@@ -20,19 +20,19 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.sql.catalyst.csv.CSVOptions
 import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.connector.write.LogicalWriteInfo
 import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter, OutputWriterFactory}
 import org.apache.spark.sql.execution.datasources.csv.CsvOutputWriter
 import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class CSVWriteBuilder(
-    options: CaseInsensitiveStringMap,
     paths: Seq[String],
     formatName: String,
-    supportsDataType: DataType => Boolean)
-  extends FileWriteBuilder(options, paths, formatName, supportsDataType) {
+    supportsDataType: DataType => Boolean,
+    info: LogicalWriteInfo)
+  extends FileWriteBuilder(paths, formatName, supportsDataType, info) {
   override def prepareWrite(
       sqlConf: SQLConf,
       job: Job,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala
index 610bd4c1b9d85..7a0949e586cd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.json
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.v2._
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
index e5b7ae0bd228a..698423948f916 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
@@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources.v2.json
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.json.{JacksonParser, JSONOptionsInRead}
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.json.JsonDataSource
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
index 5c41bbd931982..75231625676ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
@@ -21,13 +21,13 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
-import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExprUtils}
 import org.apache.spark.sql.catalyst.json.JSONOptionsInRead
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.json.JsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.{FileScan, TextBasedFileScan}
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
@@ -38,8 +38,10 @@ case class JsonScan(
     dataSchema: StructType,
     readDataSchema: StructType,
     readPartitionSchema: StructType,
-    options: CaseInsensitiveStringMap)
-  extends TextBasedFileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema, options) {
+    options: CaseInsensitiveStringMap,
+    partitionFilters: Seq[Expression] = Seq.empty,
+    dataFilters: Seq[Expression] = Seq.empty)
+  extends TextBasedFileScan(sparkSession, options) {
 
   private val parsedOptions = new JSONOptionsInRead(
     CaseInsensitiveMap(options.asScala.toMap),
@@ -86,4 +88,16 @@ case class JsonScan(
     JsonPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
       dataSchema, readDataSchema, readPartitionSchema, parsedOptions)
   }
+
+  override def withFilters(
+      partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): FileScan =
+    this.copy(partitionFilters = partitionFilters, dataFilters = dataFilters)
+
+  override def equals(obj: Any): Boolean = obj match {
+    case j: JsonScan => super.equals(j) && dataSchema == j.dataSchema && options == j.options
+
+    case _ => false
+  }
+
+  override def hashCode(): Int = super.hashCode()
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
index bb3c0366bdc2f..be53b1b1676f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql.execution.datasources.v2.json
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
-import org.apache.spark.sql.sources.v2.reader.Scan
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
index bbdd3ae69222a..4b66aec6acbed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
@@ -22,10 +22,10 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.json.JSONOptionsInRead
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -49,8 +49,8 @@ case class JsonTable(
       sparkSession, files, parsedOptions)
   }
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
-    new JsonWriteBuilder(options, paths, formatName, supportsDataType)
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+    new JsonWriteBuilder(paths, formatName, supportsDataType, info)
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWriteBuilder.scala
index 3c99e07489a77..19f472057ea7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonWriteBuilder.scala
@@ -20,19 +20,19 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.sql.catalyst.json.JSONOptions
 import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.connector.write.LogicalWriteInfo
 import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter, OutputWriterFactory}
 import org.apache.spark.sql.execution.datasources.json.JsonOutputWriter
 import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class JsonWriteBuilder(
-    options: CaseInsensitiveStringMap,
     paths: Seq[String],
     formatName: String,
-    supportsDataType: DataType => Boolean)
-  extends FileWriteBuilder(options, paths, formatName, supportsDataType) {
+    supportsDataType: DataType => Boolean,
+    info: LogicalWriteInfo)
+  extends FileWriteBuilder(paths, formatName, supportsDataType, info) {
   override def prepareWrite(
       sqlConf: SQLConf,
       job: Job,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
index 1ea80d2ba5fbc..8665af33b976a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.orc
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.v2._
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
index ec923797e2691..03d58fdcb7f67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -29,16 +29,14 @@ import org.apache.orc.mapreduce.OrcInputFormat
 
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.JoinedRow
-import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
-import org.apache.spark.sql.execution.datasources.{PartitionedFile, PartitioningUtils}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.orc.{OrcColumnarBatchReader, OrcDeserializer, OrcUtils}
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader}
 import org.apache.spark.sql.types.{AtomicType, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 /**
  * A factory used to create Orc readers.
@@ -76,10 +74,11 @@ case class OrcPartitionReaderFactory(
 
     val fs = filePath.getFileSystem(conf)
     val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
-    val reader = OrcFile.createReader(filePath, readerOptions)
-
-    val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
-      isCaseSensitive, dataSchema, readDataSchema, reader, conf)
+    val requestedColIdsOrEmptyFile =
+      Utils.tryWithResource(OrcFile.createReader(filePath, readerOptions)) { reader =>
+        OrcUtils.requestedColumnIds(
+          isCaseSensitive, dataSchema, readDataSchema, reader, conf)
+      }
 
     if (requestedColIdsOrEmptyFile.isEmpty) {
       new EmptyPartitionReader[InternalRow]
@@ -121,10 +120,11 @@ case class OrcPartitionReaderFactory(
 
     val fs = filePath.getFileSystem(conf)
     val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
-    val reader = OrcFile.createReader(filePath, readerOptions)
-
-    val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
-      isCaseSensitive, dataSchema, readDataSchema, reader, conf)
+    val requestedColIdsOrEmptyFile =
+      Utils.tryWithResource(OrcFile.createReader(filePath, readerOptions)) { reader =>
+        OrcUtils.requestedColumnIds(
+          isCaseSensitive, dataSchema, readDataSchema, reader, conf)
+      }
 
     if (requestedColIdsOrEmptyFile.isEmpty) {
       new EmptyPartitionReader
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index a4fb03405d162..62894fa7a2538 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -20,10 +20,11 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScan
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
@@ -36,8 +37,9 @@ case class OrcScan(
     readDataSchema: StructType,
     readPartitionSchema: StructType,
     options: CaseInsensitiveStringMap,
-    pushedFilters: Array[Filter])
-  extends FileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema) {
+    pushedFilters: Array[Filter],
+    partitionFilters: Seq[Expression] = Seq.empty,
+    dataFilters: Seq[Expression] = Seq.empty) extends FileScan {
   override def isSplitable(path: Path): Boolean = true
 
   override def createReaderFactory(): PartitionReaderFactory = {
@@ -51,15 +53,19 @@ case class OrcScan(
 
   override def equals(obj: Any): Boolean = obj match {
     case o: OrcScan =>
-      fileIndex == o.fileIndex && dataSchema == o.dataSchema &&
-      readDataSchema == o.readDataSchema && readPartitionSchema == o.readPartitionSchema &&
-      options == o.options && equivalentFilters(pushedFilters, o.pushedFilters)
+      super.equals(o) && dataSchema == o.dataSchema && options == o.options &&
+        equivalentFilters(pushedFilters, o.pushedFilters)
+
     case _ => false
   }
 
   override def hashCode(): Int = getClass.hashCode()
 
   override def description(): String = {
-    super.description() + ", PushedFilters: " + pushedFilters.mkString("[", ", ", "]")
+    super.description() + ", PushedFilters: " + seqToString(pushedFilters)
   }
+
+  override def withFilters(
+      partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): FileScan =
+    this.copy(partitionFilters = partitionFilters, dataFilters = dataFilters)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index 458b98c627be4..1421ffd8b6de4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -22,11 +22,11 @@ import scala.collection.JavaConverters._
 import org.apache.orc.mapreduce.OrcInputFormat
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.orc.OrcFilters
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -43,6 +43,8 @@ case class OrcScanBuilder(
     sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
   }
 
+  override protected val supportsNestedSchemaPruning: Boolean = true
+
   override def build(): Scan = {
     OrcScan(sparkSession, hadoopConf, fileIndex, dataSchema,
       readDataSchema(), readPartitionSchema(), options, pushedFilters())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index 3fe433861a3c4..3ef41210de181 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -21,10 +21,10 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.orc.OrcUtils
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -43,8 +43,8 @@ case class OrcTable(
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     OrcUtils.inferSchema(sparkSession, files, options.asScala.toMap)
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
-    new OrcWriteBuilder(options, paths, formatName, supportsDataType)
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+    new OrcWriteBuilder(paths, formatName, supportsDataType, info)
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
index f5b06e11c8bd7..48044748708d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWriteBuilder.scala
@@ -21,19 +21,19 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 import org.apache.orc.OrcConf.{COMPRESS, MAPRED_OUTPUT_SCHEMA}
 import org.apache.orc.mapred.OrcStruct
 
+import org.apache.spark.sql.connector.write.LogicalWriteInfo
 import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFactory}
 import org.apache.spark.sql.execution.datasources.orc.{OrcFileFormat, OrcOptions, OrcOutputWriter, OrcUtils}
 import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class OrcWriteBuilder(
-    options: CaseInsensitiveStringMap,
     paths: Seq[String],
     formatName: String,
-    supportsDataType: DataType => Boolean)
-  extends FileWriteBuilder(options, paths, formatName, supportsDataType) {
+    supportsDataType: DataType => Boolean,
+    info: LogicalWriteInfo)
+  extends FileWriteBuilder(paths, formatName, supportsDataType, info) {
 
   override def prepareWrite(
       sqlConf: SQLConf,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetDataSourceV2.scala
index 0b6d5a960374b..8cb6186c12ff3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.parquet
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.datasources.v2._
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
index a0f19c3dd2eb4..047bc74a8d81e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.sql.execution.datasources.v2.parquet
 
 import java.net.URI
-import java.util.TimeZone
+import java.time.ZoneId
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce._
@@ -31,14 +31,13 @@ import org.apache.spark.TaskContext
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader}
 import org.apache.spark.sql.execution.datasources.{PartitionedFile, RecordReaderIterator}
 import org.apache.spark.sql.execution.datasources.parquet._
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader}
 import org.apache.spark.sql.types.{AtomicType, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.SerializableConfiguration
@@ -118,7 +117,7 @@ case class ParquetPartitionReaderFactory(
       file: PartitionedFile,
       buildReaderFunc: (
         ParquetInputSplit, InternalRow, TaskAttemptContextImpl, Option[FilterPredicate],
-          Option[TimeZone]) => RecordReader[Void, T]): RecordReader[Void, T] = {
+          Option[ZoneId]) => RecordReader[Void, T]): RecordReader[Void, T] = {
     val conf = broadcastedConf.value.value
 
     val filePath = new Path(new URI(file.filePath))
@@ -157,7 +156,7 @@ case class ParquetPartitionReaderFactory(
 
     val convertTz =
       if (timestampConversion && !isCreatedByParquetMr) {
-        Some(DateTimeUtils.getTimeZone(conf.get(SQLConf.SESSION_LOCAL_TIMEZONE.key)))
+        Some(DateTimeUtils.getZoneId(conf.get(SQLConf.SESSION_LOCAL_TIMEZONE.key)))
       } else {
         None
       }
@@ -176,7 +175,7 @@ case class ParquetPartitionReaderFactory(
     reader
   }
 
-  private def createRowBaseReader(file: PartitionedFile): RecordReader[Void, UnsafeRow] = {
+  private def createRowBaseReader(file: PartitionedFile): RecordReader[Void, InternalRow] = {
     buildReaderBase(file, createRowBaseParquetReader)
   }
 
@@ -185,16 +184,16 @@ case class ParquetPartitionReaderFactory(
       partitionValues: InternalRow,
       hadoopAttemptContext: TaskAttemptContextImpl,
       pushed: Option[FilterPredicate],
-      convertTz: Option[TimeZone]): RecordReader[Void, UnsafeRow] = {
+      convertTz: Option[ZoneId]): RecordReader[Void, InternalRow] = {
     logDebug(s"Falling back to parquet-mr")
     val taskContext = Option(TaskContext.get())
-    // ParquetRecordReader returns UnsafeRow
+    // ParquetRecordReader returns InternalRow
     val readSupport = new ParquetReadSupport(convertTz, enableVectorizedReader = false)
     val reader = if (pushed.isDefined && enableRecordFilter) {
       val parquetFilter = FilterCompat.get(pushed.get, null)
-      new ParquetRecordReader[UnsafeRow](readSupport, parquetFilter)
+      new ParquetRecordReader[InternalRow](readSupport, parquetFilter)
     } else {
-      new ParquetRecordReader[UnsafeRow](readSupport)
+      new ParquetRecordReader[InternalRow](readSupport)
     }
     val iter = new RecordReaderIterator(reader)
     // SPARK-23457 Register a task completion listener before `initialization`.
@@ -214,7 +213,7 @@ case class ParquetPartitionReaderFactory(
       partitionValues: InternalRow,
       hadoopAttemptContext: TaskAttemptContextImpl,
       pushed: Option[FilterPredicate],
-      convertTz: Option[TimeZone]): VectorizedParquetRecordReader = {
+      convertTz: Option[ZoneId]): VectorizedParquetRecordReader = {
     val taskContext = Option(TaskContext.get())
     val vectorizedReader = new VectorizedParquetRecordReader(
       convertTz.orNull, enableOffHeapColumnVector && taskContext.isDefined, capacity)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
index a67aa3b92ce82..bb315262a8211 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
@@ -21,13 +21,13 @@ import org.apache.hadoop.fs.Path
 import org.apache.parquet.hadoop.ParquetInputFormat
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetReadSupport, ParquetWriteSupport}
 import org.apache.spark.sql.execution.datasources.v2.FileScan
-import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
@@ -40,8 +40,9 @@ case class ParquetScan(
     readDataSchema: StructType,
     readPartitionSchema: StructType,
     pushedFilters: Array[Filter],
-    options: CaseInsensitiveStringMap)
-  extends FileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema) {
+    options: CaseInsensitiveStringMap,
+    partitionFilters: Seq[Expression] = Seq.empty,
+    dataFilters: Seq[Expression] = Seq.empty) extends FileScan {
   override def isSplitable(path: Path): Boolean = true
 
   override def createReaderFactory(): PartitionReaderFactory = {
@@ -81,11 +82,18 @@ case class ParquetScan(
 
   override def equals(obj: Any): Boolean = obj match {
     case p: ParquetScan =>
-      fileIndex == p.fileIndex && dataSchema == p.dataSchema &&
-        readDataSchema == p.readDataSchema && readPartitionSchema == p.readPartitionSchema &&
-        options == p.options && equivalentFilters(pushedFilters, p.pushedFilters)
+      super.equals(p) && dataSchema == p.dataSchema && options == p.options &&
+        equivalentFilters(pushedFilters, p.pushedFilters)
     case _ => false
   }
 
   override def hashCode(): Int = getClass.hashCode()
+
+  override def description(): String = {
+    super.description() + ", PushedFilters: " + seqToString(pushedFilters)
+  }
+
+  override def withFilters(
+      partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): FileScan =
+    this.copy(partitionFilters = partitionFilters, dataFilters = dataFilters)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
index 4b8b434af88e6..2f861356e9499 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
@@ -20,11 +20,11 @@ package org.apache.spark.sql.execution.datasources.v2.parquet
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFilters, SparkToParquetSchemaConverter}
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -56,6 +56,8 @@ case class ParquetScanBuilder(
     parquetFilters.convertibleFilters(this.filters).toArray
   }
 
+  override protected val supportsNestedSchemaPruning: Boolean = true
+
   private var filters: Array[Filter] = Array.empty
 
   override def pushFilters(filters: Array[Filter]): Array[Filter] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
index dce851dbcd336..e9f9bf8df35e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
@@ -21,10 +21,10 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -43,8 +43,8 @@ case class ParquetTable(
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     ParquetUtils.inferSchema(sparkSession, options.asScala.toMap, files)
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
-    new ParquetWriteBuilder(options, paths, formatName, supportsDataType)
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+    new ParquetWriteBuilder(paths, formatName, supportsDataType, info)
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWriteBuilder.scala
index bfe2084299df3..a4e22c21a11f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWriteBuilder.scala
@@ -16,7 +16,6 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.parquet
 
-import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce.{Job, OutputCommitter, TaskAttemptContext}
 import org.apache.parquet.hadoop.{ParquetOutputCommitter, ParquetOutputFormat}
 import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
@@ -25,19 +24,19 @@ import org.apache.parquet.hadoop.util.ContextUtil
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.connector.write.LogicalWriteInfo
 import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFactory}
 import org.apache.spark.sql.execution.datasources.parquet._
 import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class ParquetWriteBuilder(
-    options: CaseInsensitiveStringMap,
     paths: Seq[String],
     formatName: String,
-    supportsDataType: DataType => Boolean)
-  extends FileWriteBuilder(options, paths, formatName, supportsDataType) with Logging {
+    supportsDataType: DataType => Boolean,
+    info: LogicalWriteInfo)
+  extends FileWriteBuilder(paths, formatName, supportsDataType, info) with Logging {
 
   override def prepareWrite(
       sqlConf: SQLConf,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala
index f6aa1e9c898b9..049c717effa26 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.text
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.text.TextFileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
index 8788887111880..0cd184da6ef8f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
@@ -21,11 +21,11 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.datasources.{HadoopFileLinesReader, HadoopFileWholeTextReader, PartitionedFile}
 import org.apache.spark.sql.execution.datasources.text.TextOptions
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
index 89b0511442d4a..e75de2c4a4079 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
@@ -21,10 +21,11 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.text.TextOptions
-import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
+import org.apache.spark.sql.execution.datasources.v2.{FileScan, TextBasedFileScan}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
@@ -34,8 +35,10 @@ case class TextScan(
     fileIndex: PartitioningAwareFileIndex,
     readDataSchema: StructType,
     readPartitionSchema: StructType,
-    options: CaseInsensitiveStringMap)
-  extends TextBasedFileScan(sparkSession, fileIndex, readDataSchema, readPartitionSchema, options) {
+    options: CaseInsensitiveStringMap,
+    partitionFilters: Seq[Expression] = Seq.empty,
+    dataFilters: Seq[Expression] = Seq.empty)
+  extends TextBasedFileScan(sparkSession, options) {
 
   private val optionsAsScala = options.asScala.toMap
   private lazy val textOptions: TextOptions = new TextOptions(optionsAsScala)
@@ -67,4 +70,16 @@ case class TextScan(
     TextPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf, readDataSchema,
       readPartitionSchema, textOptions)
   }
+
+  override def withFilters(
+      partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): FileScan =
+    this.copy(partitionFilters = partitionFilters, dataFilters = dataFilters)
+
+  override def equals(obj: Any): Boolean = obj match {
+    case t: TextScan => super.equals(t) && options == t.options
+
+    case _ => false
+  }
+
+  override def hashCode(): Int = super.hashCode()
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
index fbe5e1688b836..b2b518c12b01a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.execution.datasources.v2.text
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
-import org.apache.spark.sql.sources.v2.reader.Scan
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
index b8cb61a6c646e..36304a9b17a1e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.datasources.v2.text
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types.{DataType, StringType, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -39,8 +39,8 @@ case class TextTable(
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     Some(StructType(Seq(StructField("value", StringType))))
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
-    new TextWriteBuilder(options, paths, formatName, supportsDataType)
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+    new TextWriteBuilder(paths, formatName, supportsDataType, info)
 
   override def supportsDataType(dataType: DataType): Boolean = dataType == StringType
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWriteBuilder.scala
index c00dbc20be64a..a3bf4dcae3f33 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextWriteBuilder.scala
@@ -20,19 +20,19 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.connector.write.LogicalWriteInfo
 import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter, OutputWriterFactory}
 import org.apache.spark.sql.execution.datasources.text.{TextOptions, TextOutputWriter}
 import org.apache.spark.sql.execution.datasources.v2.FileWriteBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class TextWriteBuilder(
-    options: CaseInsensitiveStringMap,
     paths: Seq[String],
     formatName: String,
-    supportsDataType: DataType => Boolean)
-  extends FileWriteBuilder(options, paths, formatName, supportsDataType) {
+    supportsDataType: DataType => Boolean,
+    info: LogicalWriteInfo)
+  extends FileWriteBuilder(paths, formatName, supportsDataType, info) {
   private def verifySchema(schema: StructType): Unit = {
     if (schema.size != 1) {
       throw new AnalysisException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 03adeaaa66569..6a57ef2cafe23 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution
 import java.util.Collections
 
 import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
 
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
@@ -27,7 +28,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
@@ -81,11 +82,20 @@ package object debug {
   def writeCodegen(append: String => Unit, plan: SparkPlan): Unit = {
     val codegenSeq = codegenStringSeq(plan)
     append(s"Found ${codegenSeq.size} WholeStageCodegen subtrees.\n")
-    for (((subtree, code), i) <- codegenSeq.zipWithIndex) {
-      append(s"== Subtree ${i + 1} / ${codegenSeq.size} ==\n")
+    for (((subtree, code, codeStats), i) <- codegenSeq.zipWithIndex) {
+      val usedConstPoolRatio = if (codeStats.maxConstPoolSize > 0) {
+        val rt = 100.0 * codeStats.maxConstPoolSize / CodeGenerator.MAX_JVM_CONSTANT_POOL_SIZE
+        "(%.2f%% used)".format(rt)
+      } else {
+        ""
+      }
+      val codeStatsStr = s"maxMethodCodeSize:${codeStats.maxMethodCodeSize}; " +
+        s"maxConstantPoolSize:${codeStats.maxConstPoolSize}$usedConstPoolRatio; " +
+        s"numInnerClasses:${codeStats.numInnerClasses}"
+      append(s"== Subtree ${i + 1} / ${codegenSeq.size} ($codeStatsStr) ==\n")
       append(subtree)
       append("\nGenerated code:\n")
-      append(s"${code}\n")
+      append(s"$code\n")
     }
   }
 
@@ -95,7 +105,7 @@ package object debug {
    * @param plan the query plan for codegen
    * @return Sequence of WholeStageCodegen subtrees and corresponding codegen
    */
-  def codegenStringSeq(plan: SparkPlan): Seq[(String, String)] = {
+  def codegenStringSeq(plan: SparkPlan): Seq[(String, String, ByteCodeStats)] = {
     val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
     plan transform {
       case s: WholeStageCodegenExec =>
@@ -105,7 +115,13 @@ package object debug {
     }
     codegenSubtrees.toSeq.map { subtree =>
       val (_, source) = subtree.doCodeGen()
-      (subtree.toString, CodeFormatter.format(source))
+      val codeStats = try {
+        CodeGenerator.compile(source)._2
+      } catch {
+        case NonFatal(_) =>
+          ByteCodeStats.UNAVAILABLE
+      }
+      (subtree.toString, CodeFormatter.format(source), codeStats)
     }
   }
 
@@ -130,7 +146,7 @@ package object debug {
    * @param query the streaming query for codegen
    * @return Sequence of WholeStageCodegen subtrees and corresponding codegen
    */
-  def codegenStringSeq(query: StreamingQuery): Seq[(String, String)] = {
+  def codegenStringSeq(query: StreamingQuery): Seq[(String, String, ByteCodeStats)] = {
     val w = asStreamExecution(query)
     if (w.lastExecution != null) {
       codegenStringSeq(w.lastExecution.executedPlan)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index c56a5c015f32d..ab4176cada527 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
  */
 case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
   private def defaultNumPreShufflePartitions: Int =
-    if (conf.adaptiveExecutionEnabled) {
+    if (conf.adaptiveExecutionEnabled && conf.reducePostShufflePartitionsEnabled) {
       conf.maxNumPostShufflePartitions
     } else {
       conf.numShufflePartitions
@@ -83,7 +83,24 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
         numPartitionsSet.headOption
       }
 
-      val targetNumPartitions = requiredNumPartitions.getOrElse(childrenNumPartitions.max)
+      // If there are non-shuffle children that satisfy the required distribution, we have
+      // some tradeoffs when picking the expected number of shuffle partitions:
+      // 1. We should avoid shuffling these children.
+      // 2. We should have a reasonable parallelism.
+      val nonShuffleChildrenNumPartitions =
+        childrenIndexes.map(children).filterNot(_.isInstanceOf[ShuffleExchangeExec])
+          .map(_.outputPartitioning.numPartitions)
+      val expectedChildrenNumPartitions = if (nonShuffleChildrenNumPartitions.nonEmpty) {
+        // Here we pick the max number of partitions among these non-shuffle children as the
+        // expected number of shuffle partitions. However, if it's smaller than
+        // `conf.numShufflePartitions`, we pick `conf.numShufflePartitions` as the
+        // expected number of shuffle partitions.
+        math.max(nonShuffleChildrenNumPartitions.max, conf.numShufflePartitions)
+      } else {
+        childrenNumPartitions.max
+      }
+
+      val targetNumPartitions = requiredNumPartitions.getOrElse(expectedChildrenNumPartitions)
 
       children = children.zip(requiredChildDistributions).zipWithIndex.map {
         case ((child, distribution), index) if childrenIndexes.contains(index) =>
@@ -188,10 +205,11 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
         ShuffledHashJoinExec(reorderedLeftKeys, reorderedRightKeys, joinType, buildSide, condition,
           left, right)
 
-      case SortMergeJoinExec(leftKeys, rightKeys, joinType, condition, left, right) =>
+      case SortMergeJoinExec(leftKeys, rightKeys, joinType, condition, left, right, isPartial) =>
         val (reorderedLeftKeys, reorderedRightKeys) =
           reorderJoinKeys(leftKeys, rightKeys, left.outputPartitioning, right.outputPartitioning)
-        SortMergeJoinExec(reorderedLeftKeys, reorderedRightKeys, joinType, condition, left, right)
+        SortMergeJoinExec(reorderedLeftKeys, reorderedRightKeys, joinType, condition,
+          left, right, isPartial)
 
       case other => other
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
index 3315ae7dabef1..849ff384c130a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expression, SortOrder}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.{ExplainUtils, LeafExecNode, SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -71,7 +71,7 @@ case class ReusedExchangeExec(override val output: Seq[Attribute], child: Exchan
 
   // `ReusedExchangeExec` can have distinct set of output attribute ids from its child, we need
   // to update the attribute ids in `outputPartitioning` and `outputOrdering`.
-  private lazy val updateAttr: Expression => Expression = {
+  private[sql] lazy val updateAttr: Expression => Expression = {
     val originalAttrToNewAttr = AttributeMap(child.output.zip(output))
     e => e.transform {
       case attr: Attribute => originalAttrToNewAttr.getOrElse(attr, attr)
@@ -109,9 +109,10 @@ case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] {
     }
     // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls.
     val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]()
-    plan.transformUp {
+
+    // Replace a Exchange duplicate with a ReusedExchange
+    def reuse: PartialFunction[Exchange, SparkPlan] = {
       case exchange: Exchange =>
-        // the exchanges that have same results usually also have same schemas (same column names).
         val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]())
         val samePlan = sameSchema.find { e =>
           exchange.sameResult(e)
@@ -125,5 +126,16 @@ case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] {
           exchange
         }
     }
+
+    plan transformUp {
+      case exchange: Exchange => reuse(exchange)
+    } transformAllExpressions {
+      // Lookup inside subqueries for duplicate exchanges
+      case in: InSubqueryExec =>
+        val newIn = in.plan.transformUp {
+          case exchange: Exchange => reuse(exchange)
+        }
+        in.copy(plan = newIn.asInstanceOf[BaseSubqueryExec])
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 2f4c5734469f8..4b08da043b83e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.execution.exchange
 import java.util.Random
 import java.util.function.Supplier
 
+import scala.concurrent.Future
+
 import org.apache.spark._
 import org.apache.spark.internal.config
 import org.apache.spark.rdd.RDD
@@ -28,10 +30,11 @@ import org.apache.spark.shuffle.{ShuffleWriteMetricsReporter, ShuffleWriteProces
 import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
-import org.apache.spark.sql.catalyst.expressions.{Attribute, BoundReference, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, BoundReference, Divide, Literal, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.adaptive.LocalShuffledRowRDD
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics, SQLShuffleReadMetricsReporter, SQLShuffleWriteMetricsReporter}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
@@ -46,11 +49,9 @@ case class ShuffleExchangeExec(
     child: SparkPlan,
     canChangeNumPartitions: Boolean = true) extends Exchange {
 
-  // NOTE: coordinator can be null after serialization/deserialization,
-  //       e.g. it can be null on the Executor side
   private lazy val writeMetrics =
     SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
-  private lazy val readMetrics =
+  private[sql] lazy val readMetrics =
     SQLShuffleReadMetricsReporter.createShuffleReadMetrics(sparkContext)
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size")
@@ -63,6 +64,15 @@ case class ShuffleExchangeExec(
 
   @transient lazy val inputRDD: RDD[InternalRow] = child.execute()
 
+  // 'mapOutputStatisticsFuture' is only needed when enable AQE.
+  @transient lazy val mapOutputStatisticsFuture: Future[MapOutputStatistics] = {
+    if (inputRDD.getNumPartitions == 0) {
+      Future.successful(null)
+    } else {
+      sparkContext.submitMapStage(shuffleDependency)
+    }
+  }
+
   /**
    * A [[ShuffleDependency]] that will partition rows of its child based on
    * the partitioning scheme defined in `newPartitioning`. Those partitions of
@@ -82,6 +92,11 @@ case class ShuffleExchangeExec(
     new ShuffledRowRDD(shuffleDependency, readMetrics, partitionStartIndices)
   }
 
+  def createLocalShuffleRDD(
+      partitionStartIndicesPerMapper: Array[Array[Int]]): LocalShuffledRowRDD = {
+    new LocalShuffledRowRDD(shuffleDependency, readMetrics, partitionStartIndicesPerMapper)
+  }
+
   /**
    * Caches the created ShuffleRowRDD so we can reuse that.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilder.scala
new file mode 100644
index 0000000000000..e1f42d7abe0fe
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilder.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.history
+
+import scala.collection.mutable
+
+import org.apache.spark.deploy.history.{EventFilter, EventFilterBuilder, JobEventFilter}
+import org.apache.spark.internal.Logging
+import org.apache.spark.scheduler._
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.ui._
+import org.apache.spark.sql.streaming.StreamingQueryListener
+
+/**
+ * This class tracks live SQL executions, and pass the list to the [[SQLLiveEntitiesEventFilter]]
+ * to help SQLLiveEntitiesEventFilter to accept live SQL executions as well as relevant
+ * jobs (+ stages/tasks/RDDs).
+ *
+ * Note that this class only tracks the jobs which are relevant to SQL executions - cannot classify
+ * between finished job and live job without relation of SQL execution.
+ */
+private[spark] class SQLEventFilterBuilder extends SparkListener with EventFilterBuilder {
+  private val liveExecutionToJobs = new mutable.HashMap[Long, mutable.Set[Int]]
+  private val jobToStages = new mutable.HashMap[Int, Set[Int]]
+  private val stageToTasks = new mutable.HashMap[Int, mutable.Set[Long]]
+  private val stageToRDDs = new mutable.HashMap[Int, Set[Int]]
+  private val stages = new mutable.HashSet[Int]
+
+  private[history] def liveSQLExecutions: Set[Long] = liveExecutionToJobs.keySet.toSet
+  private[history] def liveJobs: Set[Int] = liveExecutionToJobs.values.flatten.toSet
+  private[history] def liveStages: Set[Int] = stageToRDDs.keySet.toSet
+  private[history] def liveTasks: Set[Long] = stageToTasks.values.flatten.toSet
+  private[history] def liveRDDs: Set[Int] = stageToRDDs.values.flatten.toSet
+
+  override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+    val executionIdString = jobStart.properties.getProperty(SQLExecution.EXECUTION_ID_KEY)
+    if (executionIdString == null) {
+      // This is not a job created by SQL
+      return
+    }
+
+    val executionId = executionIdString.toLong
+    val jobId = jobStart.jobId
+
+    val jobsForExecution = liveExecutionToJobs.getOrElseUpdate(executionId,
+      mutable.HashSet[Int]())
+    jobsForExecution += jobId
+
+    jobToStages += jobStart.jobId -> jobStart.stageIds.toSet
+    stages ++= jobStart.stageIds
+  }
+
+  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+    val stageId = stageSubmitted.stageInfo.stageId
+    if (stages.contains(stageId)) {
+      stageToRDDs.put(stageId, stageSubmitted.stageInfo.rddInfos.map(_.id).toSet)
+      stageToTasks.getOrElseUpdate(stageId, new mutable.HashSet[Long]())
+    }
+  }
+
+  override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
+    stageToTasks.get(taskStart.stageId).foreach { tasks =>
+      tasks += taskStart.taskInfo.taskId
+    }
+  }
+
+  override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
+    case e: SparkListenerSQLExecutionStart => onExecutionStart(e)
+    case e: SparkListenerSQLExecutionEnd => onExecutionEnd(e)
+    case _ => // Ignore
+  }
+
+  private def onExecutionStart(event: SparkListenerSQLExecutionStart): Unit = {
+    liveExecutionToJobs += event.executionId -> mutable.HashSet[Int]()
+  }
+
+  private def onExecutionEnd(event: SparkListenerSQLExecutionEnd): Unit = {
+    liveExecutionToJobs.remove(event.executionId).foreach { jobs =>
+      val stagesToDrop = jobToStages.filter(kv => jobs.contains(kv._1)).values.flatten
+      jobToStages --= jobs
+      stages --= stagesToDrop
+      stageToTasks --= stagesToDrop
+      stageToRDDs --= stagesToDrop
+    }
+  }
+
+  override def createFilter(): EventFilter = {
+    new SQLLiveEntitiesEventFilter(liveSQLExecutions, liveJobs, liveStages, liveTasks, liveRDDs)
+  }
+}
+
+/**
+ * This class accepts events which are related to the live SQL executions based on the given
+ * information.
+ *
+ * Note that acceptFn will not match the event ("Don't mind") instead of returning false on
+ * job related events, because it cannot determine whether the job is related to the finished
+ * SQL executions, or job is NOT related to the SQL executions. For this case, it just gives up
+ * the decision and let other filters decide it.
+ */
+private[spark] class SQLLiveEntitiesEventFilter(
+    liveSQLExecutions: Set[Long],
+    liveJobs: Set[Int],
+    liveStages: Set[Int],
+    liveTasks: Set[Long],
+    liveRDDs: Set[Int])
+  extends JobEventFilter(None, liveJobs, liveStages, liveTasks, liveRDDs) with Logging {
+
+  logDebug(s"live SQL executions : $liveSQLExecutions")
+
+  private val _acceptFn: PartialFunction[SparkListenerEvent, Boolean] = {
+    case e: SparkListenerSQLExecutionStart =>
+      liveSQLExecutions.contains(e.executionId)
+    case e: SparkListenerSQLAdaptiveExecutionUpdate =>
+      liveSQLExecutions.contains(e.executionId)
+    case e: SparkListenerSQLExecutionEnd =>
+      liveSQLExecutions.contains(e.executionId)
+    case e: SparkListenerDriverAccumUpdates =>
+      liveSQLExecutions.contains(e.executionId)
+
+    case e if acceptFnForJobEvents.lift(e).contains(true) =>
+      // NOTE: if acceptFnForJobEvents(e) returns false, we should leave it to "unmatched"
+      // because we don't know whether the job has relevant SQL execution which is finished,
+      // or the job is not related to the SQL execution.
+      true
+
+    // these events are for finished batches so safer to ignore
+    case _: StreamingQueryListener.QueryProgressEvent => false
+  }
+
+  override def acceptFn(): PartialFunction[SparkListenerEvent, Boolean] = _acceptFn
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
index f526a19876670..5517c0dcdb188 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
@@ -19,14 +19,12 @@ package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan}
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.collection.{BitSet, CompactBuffer}
 
 case class BroadcastNestedLoopJoinExec(
@@ -84,7 +82,7 @@ case class BroadcastNestedLoopJoinExec(
 
   @transient private lazy val boundCondition = {
     if (condition.isDefined) {
-      newPredicate(condition.get, streamed.output ++ broadcast.output).eval _
+      Predicate.create(condition.get, streamed.output ++ broadcast.output).eval _
     } else {
       (r: InternalRow) => true
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala
index 88d98530991c9..29645a736548c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProductExec.scala
@@ -20,9 +20,8 @@ package org.apache.spark.sql.execution.joins
 import org.apache.spark._
 import org.apache.spark.rdd.{CartesianPartition, CartesianRDD, RDD}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, JoinedRow, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, JoinedRow, Predicate, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeRowJoiner
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.execution.{BinaryExecNode, ExplainUtils, ExternalAppendOnlyUnsafeRowArray, SparkPlan}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.util.CompletionIterator
@@ -93,7 +92,7 @@ case class CartesianProductExec(
     pair.mapPartitionsWithIndexInternal { (index, iter) =>
       val joiner = GenerateUnsafeRowJoiner.create(left.schema, right.schema)
       val filtered = if (condition.isDefined) {
-        val boundCondition = newPredicate(condition.get, left.output ++ right.output)
+        val boundCondition = Predicate.create(condition.get, left.output ++ right.output)
         boundCondition.initialize(index)
         val joined = new JoinedRow
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index e8938cb22e890..137f0b87a2f3d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -99,7 +99,7 @@ trait HashJoin {
     UnsafeProjection.create(streamedKeys)
 
   @transient private[this] lazy val boundCondition = if (condition.isDefined) {
-    newPredicate(condition.get, streamedPlan.output ++ buildPlan.output).eval _
+    Predicate.create(condition.get, streamedPlan.output ++ buildPlan.output).eval _
   } else {
     (r: InternalRow) => true
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index efe03e0f9ab46..4001338662d53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -335,7 +335,7 @@ private[joins] object UnsafeHashedRelation {
         if (!success) {
           binaryMap.free()
           // scalastyle:off throwerror
-          throw new SparkOutOfMemoryError("There is no enough memory to build hash map")
+          throw new SparkOutOfMemoryError("There is not enough memory to build hash map")
           // scalastyle:on throwerror
         }
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 189727a9bc88d..62eea611556ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -40,11 +40,18 @@ case class SortMergeJoinExec(
     joinType: JoinType,
     condition: Option[Expression],
     left: SparkPlan,
-    right: SparkPlan) extends BinaryExecNode with CodegenSupport {
+    right: SparkPlan,
+    isSkewJoin: Boolean = false) extends BinaryExecNode with CodegenSupport {
 
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 
+  override def nodeName: String = {
+    if (isSkewJoin) super.nodeName + "(skew=true)" else super.nodeName
+  }
+
+  override def stringArgs: Iterator[Any] = super.stringArgs.toSeq.dropRight(1).iterator
+
   override def simpleStringWithNodeId(): String = {
     val opId = ExplainUtils.getOpId(this)
     s"$nodeName $joinType ($opId)".trim
@@ -95,8 +102,15 @@ case class SortMergeJoinExec(
         s"${getClass.getSimpleName} should not take $x as the JoinType")
   }
 
-  override def requiredChildDistribution: Seq[Distribution] =
-    HashClusteredDistribution(leftKeys) :: HashClusteredDistribution(rightKeys) :: Nil
+  override def requiredChildDistribution: Seq[Distribution] = {
+    if (isSkewJoin) {
+      // We re-arrange the shuffle partitions to deal with skew join, and the new children
+      // partitioning doesn't satisfy `HashClusteredDistribution`.
+      UnspecifiedDistribution :: UnspecifiedDistribution :: Nil
+    } else {
+      HashClusteredDistribution(leftKeys) :: HashClusteredDistribution(rightKeys) :: Nil
+    }
+  }
 
   override def outputOrdering: Seq[SortOrder] = joinType match {
     // For inner join, orders of both sides keys should be kept.
@@ -168,14 +182,14 @@ case class SortMergeJoinExec(
     left.execute().zipPartitions(right.execute()) { (leftIter, rightIter) =>
       val boundCondition: (InternalRow) => Boolean = {
         condition.map { cond =>
-          newPredicate(cond, left.output ++ right.output).eval _
+          Predicate.create(cond, left.output ++ right.output).eval _
         }.getOrElse {
           (r: InternalRow) => true
         }
       }
 
       // An ordering that can be used to compare keys from both sides.
-      val keyOrdering = newNaturalAscendingOrdering(leftKeys.map(_.dataType))
+      val keyOrdering = RowOrdering.createNaturalAscendingOrdering(leftKeys.map(_.dataType))
       val resultProj: InternalRow => InternalRow = UnsafeProjection.create(output, output)
 
       joinType match {
@@ -191,7 +205,8 @@ case class SortMergeJoinExec(
               RowIterator.fromScala(leftIter),
               RowIterator.fromScala(rightIter),
               inMemoryThreshold,
-              spillThreshold
+              spillThreshold,
+              cleanupResources
             )
             private[this] val joinRow = new JoinedRow
 
@@ -235,7 +250,8 @@ case class SortMergeJoinExec(
             streamedIter = RowIterator.fromScala(leftIter),
             bufferedIter = RowIterator.fromScala(rightIter),
             inMemoryThreshold,
-            spillThreshold
+            spillThreshold,
+            cleanupResources
           )
           val rightNullRow = new GenericInternalRow(right.output.length)
           new LeftOuterIterator(
@@ -249,7 +265,8 @@ case class SortMergeJoinExec(
             streamedIter = RowIterator.fromScala(rightIter),
             bufferedIter = RowIterator.fromScala(leftIter),
             inMemoryThreshold,
-            spillThreshold
+            spillThreshold,
+            cleanupResources
           )
           val leftNullRow = new GenericInternalRow(left.output.length)
           new RightOuterIterator(
@@ -283,7 +300,8 @@ case class SortMergeJoinExec(
               RowIterator.fromScala(leftIter),
               RowIterator.fromScala(rightIter),
               inMemoryThreshold,
-              spillThreshold
+              spillThreshold,
+              cleanupResources
             )
             private[this] val joinRow = new JoinedRow
 
@@ -318,7 +336,8 @@ case class SortMergeJoinExec(
               RowIterator.fromScala(leftIter),
               RowIterator.fromScala(rightIter),
               inMemoryThreshold,
-              spillThreshold
+              spillThreshold,
+              cleanupResources
             )
             private[this] val joinRow = new JoinedRow
 
@@ -360,7 +379,8 @@ case class SortMergeJoinExec(
               RowIterator.fromScala(leftIter),
               RowIterator.fromScala(rightIter),
               inMemoryThreshold,
-              spillThreshold
+              spillThreshold,
+              cleanupResources
             )
             private[this] val joinRow = new JoinedRow
 
@@ -640,6 +660,9 @@ case class SortMergeJoinExec(
       (evaluateVariables(leftVars), "")
     }
 
+    val thisPlan = ctx.addReferenceObj("plan", this)
+    val eagerCleanup = s"$thisPlan.cleanupResources();"
+
     s"""
        |while (findNextInnerJoinRows($leftInput, $rightInput)) {
        |  ${leftVarDecl.mkString("\n")}
@@ -653,6 +676,7 @@ case class SortMergeJoinExec(
        |  }
        |  if (shouldStop()) return;
        |}
+       |$eagerCleanup
      """.stripMargin
   }
 }
@@ -678,6 +702,7 @@ case class SortMergeJoinExec(
  * @param inMemoryThreshold Threshold for number of rows guaranteed to be held in memory by
  *                          internal buffer
  * @param spillThreshold Threshold for number of rows to be spilled by internal buffer
+ * @param eagerCleanupResources the eager cleanup function to be invoked when no join row found
  */
 private[joins] class SortMergeJoinScanner(
     streamedKeyGenerator: Projection,
@@ -686,7 +711,8 @@ private[joins] class SortMergeJoinScanner(
     streamedIter: RowIterator,
     bufferedIter: RowIterator,
     inMemoryThreshold: Int,
-    spillThreshold: Int) {
+    spillThreshold: Int,
+    eagerCleanupResources: () => Unit) {
   private[this] var streamedRow: InternalRow = _
   private[this] var streamedRowKey: InternalRow = _
   private[this] var bufferedRow: InternalRow = _
@@ -710,7 +736,8 @@ private[joins] class SortMergeJoinScanner(
   def getBufferedMatches: ExternalAppendOnlyUnsafeRowArray = bufferedMatches
 
   /**
-   * Advances both input iterators, stopping when we have found rows with matching join keys.
+   * Advances both input iterators, stopping when we have found rows with matching join keys. If no
+   * join rows found, try to do the eager resources cleanup.
    * @return true if matching rows have been found and false otherwise. If this returns true, then
    *         [[getStreamedRow]] and [[getBufferedMatches]] can be called to construct the join
    *         results.
@@ -720,7 +747,7 @@ private[joins] class SortMergeJoinScanner(
       // Advance the streamed side of the join until we find the next row whose join key contains
       // no nulls or we hit the end of the streamed iterator.
     }
-    if (streamedRow == null) {
+    val found = if (streamedRow == null) {
       // We have consumed the entire streamed iterator, so there can be no more matches.
       matchJoinKey = null
       bufferedMatches.clear()
@@ -760,17 +787,19 @@ private[joins] class SortMergeJoinScanner(
         true
       }
     }
+    if (!found) eagerCleanupResources()
+    found
   }
 
   /**
    * Advances the streamed input iterator and buffers all rows from the buffered input that
-   * have matching keys.
+   * have matching keys. If no join rows found, try to do the eager resources cleanup.
    * @return true if the streamed iterator returned a row, false otherwise. If this returns true,
    *         then [[getStreamedRow]] and [[getBufferedMatches]] can be called to produce the outer
    *         join results.
    */
   final def findNextOuterJoinRows(): Boolean = {
-    if (!advancedStreamed()) {
+    val found = if (!advancedStreamed()) {
       // We have consumed the entire streamed iterator, so there can be no more matches.
       matchJoinKey = null
       bufferedMatches.clear()
@@ -800,6 +829,8 @@ private[joins] class SortMergeJoinScanner(
       // If there is a streamed input then we always return true
       true
     }
+    if (!found) eagerCleanupResources()
+    found
   }
 
   // --- Private methods --------------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 2ff08883d5cab..ddbd0a343ffcf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -65,6 +65,28 @@ case class CollectLimitExec(limit: Int, child: SparkPlan) extends LimitExec {
   }
 }
 
+/**
+ * Take the last `limit` elements and collect them to a single partition.
+ *
+ * This operator will be used when a logical `Tail` operation is the final operator in an
+ * logical plan, which happens when the user is collecting results back to the driver.
+ */
+case class CollectTailExec(limit: Int, child: SparkPlan) extends LimitExec {
+  override def output: Seq[Attribute] = child.output
+  override def outputPartitioning: Partitioning = SinglePartition
+  override def executeCollect(): Array[InternalRow] = child.executeTail(limit)
+  protected override def doExecute(): RDD[InternalRow] = {
+    // This is a bit hacky way to avoid a shuffle and scanning all data when it performs
+    // at `Dataset.tail`.
+    // Since this execution plan and `execute` are currently called only when
+    // `Dataset.tail` is invoked, the jobs are always executed when they are supposed to be.
+
+    // If we use this execution plan separately like `Dataset.limit` without an actual
+    // job launch, we might just have to mimic the implementation of `CollectLimitExec`.
+    sparkContext.parallelize(executeCollect(), numSlices = 1)
+  }
+}
+
 object BaseLimitExec {
   private val curId = new java.util.concurrent.atomic.AtomicInteger()
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index 19809b07508d9..65aabe004d75b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.metric
 
 import java.text.NumberFormat
-import java.util.Locale
+import java.util.{Arrays, Locale}
 
 import scala.concurrent.duration._
 
@@ -50,14 +50,19 @@ class SQLMetric(val metricType: String, initValue: Long = 0L) extends Accumulato
   override def reset(): Unit = _value = _zeroValue
 
   override def merge(other: AccumulatorV2[Long, Long]): Unit = other match {
-    case o: SQLMetric => _value += o.value
+    case o: SQLMetric =>
+      if (_value < 0) _value = 0
+      if (o.value > 0) _value += o.value
     case _ => throw new UnsupportedOperationException(
       s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
   }
 
   override def isZero(): Boolean = _value == _zeroValue
 
-  override def add(v: Long): Unit = _value += v
+  override def add(v: Long): Unit = {
+    if (_value < 0) _value = 0
+    _value += v
+  }
 
   // We can set a double value to `SQLMetric` which stores only long value, if it is
   // average metrics.
@@ -65,7 +70,7 @@ class SQLMetric(val metricType: String, initValue: Long = 0L) extends Accumulato
 
   def set(v: Long): Unit = _value = v
 
-  def +=(v: Long): Unit = _value += v
+  def +=(v: Long): Unit = add(v)
 
   override def value: Long = _value
 
@@ -111,7 +116,8 @@ object SQLMetrics {
     // data size total (min, med, max):
     // 100GB (100MB, 1GB, 10GB)
     val acc = new SQLMetric(SIZE_METRIC, -1)
-    acc.register(sc, name = Some(s"$name total (min, med, max)"), countFailedValues = false)
+    acc.register(sc, name = Some(s"$name total (min, med, max (stageId (attemptId): taskId))"),
+      countFailedValues = false)
     acc
   }
 
@@ -120,14 +126,16 @@ object SQLMetrics {
     // duration(min, med, max):
     // 5s (800ms, 1s, 2s)
     val acc = new SQLMetric(TIMING_METRIC, -1)
-    acc.register(sc, name = Some(s"$name total (min, med, max)"), countFailedValues = false)
+    acc.register(sc, name = Some(s"$name total (min, med, max (stageId (attemptId): taskId))"),
+      countFailedValues = false)
     acc
   }
 
   def createNanoTimingMetric(sc: SparkContext, name: String): SQLMetric = {
     // Same with createTimingMetric, just normalize the unit of time to millisecond.
     val acc = new SQLMetric(NS_TIMING_METRIC, -1)
-    acc.register(sc, name = Some(s"$name total (min, med, max)"), countFailedValues = false)
+    acc.register(sc, name = Some(s"$name total (min, med, max (stageId (attemptId): taskId))"),
+      countFailedValues = false)
     acc
   }
 
@@ -142,30 +150,46 @@ object SQLMetrics {
     // probe avg (min, med, max):
     // (1.2, 2.2, 6.3)
     val acc = new SQLMetric(AVERAGE_METRIC)
-    acc.register(sc, name = Some(s"$name (min, med, max)"), countFailedValues = false)
+    acc.register(sc, name = Some(s"$name (min, med, max (stageId (attemptId): taskId))"),
+      countFailedValues = false)
     acc
   }
 
+  private def toNumberFormat(value: Long): String = {
+    val numberFormat = NumberFormat.getNumberInstance(Locale.US)
+    numberFormat.format(value.toDouble / baseForAvgMetric)
+  }
+
+  def metricNeedsMax(metricsType: String): Boolean = {
+    metricsType != SUM_METRIC
+  }
+
   /**
    * A function that defines how we aggregate the final accumulator results among all tasks,
    * and represent it in string for a SQL physical operator.
-   */
-  def stringValue(metricsType: String, values: Seq[Long]): String = {
+    */
+  def stringValue(metricsType: String, values: Array[Long], maxMetrics: Array[Long]): String = {
+    // stringMetric = "(driver)" OR (stage $stageId (attempt $attemptId): task $taskId))
+    val stringMetric = if (maxMetrics.isEmpty) {
+      "(driver)"
+    } else {
+      s"(stage ${maxMetrics(1)} (attempt ${maxMetrics(2)}): task ${maxMetrics(3)})"
+    }
     if (metricsType == SUM_METRIC) {
       val numberFormat = NumberFormat.getIntegerInstance(Locale.US)
       numberFormat.format(values.sum)
     } else if (metricsType == AVERAGE_METRIC) {
-      val numberFormat = NumberFormat.getNumberInstance(Locale.US)
-
       val validValues = values.filter(_ > 0)
       val Seq(min, med, max) = {
         val metric = if (validValues.isEmpty) {
-          Seq.fill(3)(0L)
+          val zeros = Seq.fill(3)(0L)
+          zeros.map(v => toNumberFormat(v))
         } else {
-          val sorted = validValues.sorted
-          Seq(sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1))
+          Arrays.sort(validValues)
+          Seq(toNumberFormat(validValues(0)), toNumberFormat(validValues(validValues.length / 2)),
+            s"${toNumberFormat(validValues(validValues.length - 1))} $stringMetric")
         }
-        metric.map(v => numberFormat.format(v.toDouble / baseForAvgMetric))
+        metric
       }
       s"\n($min, $med, $max)"
     } else {
@@ -182,12 +206,15 @@ object SQLMetrics {
       val validValues = values.filter(_ >= 0)
       val Seq(sum, min, med, max) = {
         val metric = if (validValues.isEmpty) {
-          Seq.fill(4)(0L)
+          val zeros = Seq.fill(4)(0L)
+          zeros.map(v => strFormat(v))
         } else {
-          val sorted = validValues.sorted
-          Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1))
+          Arrays.sort(validValues)
+          Seq(strFormat(validValues.sum), strFormat(validValues(0)),
+            strFormat(validValues(validValues.length / 2)),
+            s"${strFormat(validValues(validValues.length - 1))} $stringMetric")
         }
-        metric.map(strFormat)
+        metric
       }
       s"\n$sum ($min, $med, $max)"
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
index 5101f7e871af2..b44b13c8de0da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
@@ -19,12 +19,9 @@ package org.apache.spark.sql.execution.python
 
 import java.io._
 import java.net._
-import java.util.concurrent.atomic.AtomicBoolean
-
-import scala.collection.JavaConverters._
 
 import org.apache.arrow.vector.VectorSchemaRoot
-import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter}
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
 
 import org.apache.spark._
 import org.apache.spark.api.python._
@@ -33,7 +30,7 @@ import org.apache.spark.sql.execution.arrow.ArrowWriter
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.ArrowUtils
-import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
+import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.Utils
 
 /**
@@ -46,8 +43,8 @@ class ArrowPythonRunner(
     schema: StructType,
     timeZoneId: String,
     conf: Map[String, String])
-  extends BasePythonRunner[Iterator[InternalRow], ColumnarBatch](
-    funcs, evalType, argOffsets) {
+  extends BasePythonRunner[Iterator[InternalRow], ColumnarBatch](funcs, evalType, argOffsets)
+  with PythonArrowOutput {
 
   override val bufferSize: Int = SQLConf.get.pandasUDFBufferSize
   require(
@@ -119,72 +116,4 @@ class ArrowPythonRunner(
     }
   }
 
-  protected override def newReaderIterator(
-      stream: DataInputStream,
-      writerThread: WriterThread,
-      startTime: Long,
-      env: SparkEnv,
-      worker: Socket,
-      releasedOrClosed: AtomicBoolean,
-      context: TaskContext): Iterator[ColumnarBatch] = {
-    new ReaderIterator(stream, writerThread, startTime, env, worker, releasedOrClosed, context) {
-
-      private val allocator = ArrowUtils.rootAllocator.newChildAllocator(
-        s"stdin reader for $pythonExec", 0, Long.MaxValue)
-
-      private var reader: ArrowStreamReader = _
-      private var root: VectorSchemaRoot = _
-      private var schema: StructType = _
-      private var vectors: Array[ColumnVector] = _
-
-      context.addTaskCompletionListener[Unit] { _ =>
-        if (reader != null) {
-          reader.close(false)
-        }
-        allocator.close()
-      }
-
-      private var batchLoaded = true
-
-      protected override def read(): ColumnarBatch = {
-        if (writerThread.exception.isDefined) {
-          throw writerThread.exception.get
-        }
-        try {
-          if (reader != null && batchLoaded) {
-            batchLoaded = reader.loadNextBatch()
-            if (batchLoaded) {
-              val batch = new ColumnarBatch(vectors)
-              batch.setNumRows(root.getRowCount)
-              batch
-            } else {
-              reader.close(false)
-              allocator.close()
-              // Reach end of stream. Call `read()` again to read control data.
-              read()
-            }
-          } else {
-            stream.readInt() match {
-              case SpecialLengths.START_ARROW_STREAM =>
-                reader = new ArrowStreamReader(stream, allocator)
-                root = reader.getVectorSchemaRoot()
-                schema = ArrowUtils.fromArrowSchema(root.getSchema())
-                vectors = root.getFieldVectors().asScala.map { vector =>
-                  new ArrowColumnVector(vector)
-                }.toArray[ColumnVector]
-                read()
-              case SpecialLengths.TIMING_DATA =>
-                handleTimingData()
-                read()
-              case SpecialLengths.PYTHON_EXCEPTION_THROWN =>
-                throw handlePythonException()
-              case SpecialLengths.END_OF_DATA_SECTION =>
-                handleEndOfDataSection()
-                null
-            }
-          }
-        } catch handleException
-      }
-    }
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
new file mode 100644
index 0000000000000..25ce16db264ac
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import java.io.DataOutputStream
+import java.net.Socket
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
+
+import org.apache.spark.{SparkEnv, TaskContext}
+import org.apache.spark.api.python.{BasePythonRunner, ChainedPythonFunctions, PythonRDD}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.arrow.ArrowWriter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.Utils
+
+
+/**
+ * Python UDF Runner for cogrouped udfs. It sends Arrow bathes from two different DataFrames,
+ * groups them in Python, and receive it back in JVM as batches of single DataFrame.
+ */
+class CoGroupedArrowPythonRunner(
+    funcs: Seq[ChainedPythonFunctions],
+    evalType: Int,
+    argOffsets: Array[Array[Int]],
+    leftSchema: StructType,
+    rightSchema: StructType,
+    timeZoneId: String,
+    conf: Map[String, String])
+  extends BasePythonRunner[
+    (Iterator[InternalRow], Iterator[InternalRow]), ColumnarBatch](funcs, evalType, argOffsets)
+  with PythonArrowOutput {
+
+  protected def newWriterThread(
+      env: SparkEnv,
+      worker: Socket,
+      inputIterator: Iterator[(Iterator[InternalRow], Iterator[InternalRow])],
+      partitionIndex: Int,
+      context: TaskContext): WriterThread = {
+
+    new WriterThread(env, worker, inputIterator, partitionIndex, context) {
+
+      protected override def writeCommand(dataOut: DataOutputStream): Unit = {
+
+        // Write config for the worker as a number of key -> value pairs of strings
+        dataOut.writeInt(conf.size)
+        for ((k, v) <- conf) {
+          PythonRDD.writeUTF(k, dataOut)
+          PythonRDD.writeUTF(v, dataOut)
+        }
+
+        PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
+      }
+
+      protected override def writeIteratorToStream(dataOut: DataOutputStream): Unit = {
+        // For each we first send the number of dataframes in each group then send
+        // first df, then send second df.  End of data is marked by sending 0.
+        while (inputIterator.hasNext) {
+          dataOut.writeInt(2)
+          val (nextLeft, nextRight) = inputIterator.next()
+          writeGroup(nextLeft, leftSchema, dataOut, "left")
+          writeGroup(nextRight, rightSchema, dataOut, "right")
+        }
+        dataOut.writeInt(0)
+      }
+
+      private def writeGroup(
+          group: Iterator[InternalRow],
+          schema: StructType,
+          dataOut: DataOutputStream,
+          name: String): Unit = {
+        val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
+        val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+          s"stdout writer for $pythonExec ($name)", 0, Long.MaxValue)
+        val root = VectorSchemaRoot.create(arrowSchema, allocator)
+
+        Utils.tryWithSafeFinally {
+          val writer = new ArrowStreamWriter(root, null, dataOut)
+          val arrowWriter = ArrowWriter.create(root)
+          writer.start()
+
+          while (group.hasNext) {
+            arrowWriter.write(group.next())
+          }
+          arrowWriter.finish()
+          writer.writeBatch()
+          writer.end()
+        }{
+          root.close()
+          allocator.close()
+        }
+      }
+    }
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
index 3554bdb5c9e0c..a0f23e925d237 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
@@ -113,7 +113,7 @@ abstract class EvalPythonExec(udfs: Seq[PythonUDF], resultAttrs: Seq[Attribute],
           }
         }.toArray
       }.toArray
-      val projection = newMutableProjection(allInputs, child.output)
+      val projection = MutableProjection.create(allInputs, child.output)
       val schema = StructType(dataTypes.zipWithIndex.map { case (dt, i) =>
         StructField(s"_$i", dt)
       })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
index d49d790d7888b..7bc8b95cfb03b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
@@ -205,7 +205,7 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan match {
     // SPARK-26293: A subquery will be rewritten into join later, and will go through this rule
     // eventually. Here we skip subquery, as Python UDF only needs to be extracted once.
-    case _: Subquery => plan
+    case s: Subquery if s.correlated => plan
 
     case _ => plan transformUp {
       // A safe guard. `ExtractPythonUDFs` only runs once, so we will not hit `BatchEvalPython` and
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala
new file mode 100644
index 0000000000000..b079405bdc2f8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
+import org.apache.spark.sql.execution.{BinaryExecNode, CoGroupedIterator, SparkPlan}
+import org.apache.spark.sql.execution.python.PandasGroupUtils._
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+
+
+/**
+ * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapCoGroupsInPandas]]
+ *
+ * The input dataframes are first Cogrouped.  Rows from each side of the cogroup are passed to the
+ * Python worker via Arrow.  As each side of the cogroup may have a different schema we send every
+ * group in its own Arrow stream.
+ * The Python worker turns the resulting record batches to `pandas.DataFrame`s, invokes the
+ * user-defined function, and passes the resulting `pandas.DataFrame`
+ * as an Arrow record batch. Finally, each record batch is turned to
+ * Iterator[InternalRow] using ColumnarBatch.
+ *
+ * Note on memory usage:
+ * Both the Python worker and the Java executor need to have enough memory to
+ * hold the largest cogroup. The memory on the Java side is used to construct the
+ * record batches (off heap memory). The memory on the Python side is used for
+ * holding the `pandas.DataFrame`. It's possible to further split one group into
+ * multiple record batches to reduce the memory footprint on the Java side, this
+ * is left as future work.
+ */
+case class FlatMapCoGroupsInPandasExec(
+    leftGroup: Seq[Attribute],
+    rightGroup: Seq[Attribute],
+    func: Expression,
+    output: Seq[Attribute],
+    left: SparkPlan,
+    right: SparkPlan)
+  extends SparkPlan with BinaryExecNode {
+
+  private val sessionLocalTimeZone = conf.sessionLocalTimeZone
+  private val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
+  private val pandasFunction = func.asInstanceOf[PythonUDF].func
+  private val chainedFunc = Seq(ChainedPythonFunctions(Seq(pandasFunction)))
+
+  override def producedAttributes: AttributeSet = AttributeSet(output)
+
+  override def outputPartitioning: Partitioning = left.outputPartitioning
+
+  override def requiredChildDistribution: Seq[Distribution] = {
+    val leftDist = if (leftGroup.isEmpty) AllTuples else ClusteredDistribution(leftGroup)
+    val rightDist = if (rightGroup.isEmpty) AllTuples else ClusteredDistribution(rightGroup)
+    leftDist :: rightDist :: Nil
+  }
+
+  override def requiredChildOrdering: Seq[Seq[SortOrder]] = {
+    leftGroup
+      .map(SortOrder(_, Ascending)) :: rightGroup.map(SortOrder(_, Ascending)) :: Nil
+  }
+
+  override protected def doExecute(): RDD[InternalRow] = {
+
+    val (leftDedup, leftArgOffsets) = resolveArgOffsets(left, leftGroup)
+    val (rightDedup, rightArgOffsets) = resolveArgOffsets(right, rightGroup)
+
+    // Map cogrouped rows to ArrowPythonRunner results, Only execute if partition is not empty
+    left.execute().zipPartitions(right.execute())  { (leftData, rightData) =>
+      if (leftData.isEmpty && rightData.isEmpty) Iterator.empty else {
+
+        val leftGrouped = groupAndProject(leftData, leftGroup, left.output, leftDedup)
+        val rightGrouped = groupAndProject(rightData, rightGroup, right.output, rightDedup)
+        val data = new CoGroupedIterator(leftGrouped, rightGrouped, leftGroup)
+          .map { case (_, l, r) => (l, r) }
+
+        val runner = new CoGroupedArrowPythonRunner(
+          chainedFunc,
+          PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+          Array(leftArgOffsets ++ rightArgOffsets),
+          StructType.fromAttributes(leftDedup),
+          StructType.fromAttributes(rightDedup),
+          sessionLocalTimeZone,
+          pythonRunnerConf)
+
+        executePython(data, output, runner)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
index 267698d1bca50..5032bc81327b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
@@ -17,19 +17,16 @@
 
 package org.apache.spark.sql.execution.python
 
-import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.TaskContext
 import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
-import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.python.PandasGroupUtils._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.ArrowUtils
-import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch}
+
 
 /**
  * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsInPandas]]
@@ -53,14 +50,17 @@ case class FlatMapGroupsInPandasExec(
     func: Expression,
     output: Seq[Attribute],
     child: SparkPlan)
-  extends UnaryExecNode {
+  extends SparkPlan with UnaryExecNode {
 
+  private val sessionLocalTimeZone = conf.sessionLocalTimeZone
+  private val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
   private val pandasFunction = func.asInstanceOf[PythonUDF].func
-
-  override def outputPartitioning: Partitioning = child.outputPartitioning
+  private val chainedFunc = Seq(ChainedPythonFunctions(Seq(pandasFunction)))
 
   override def producedAttributes: AttributeSet = AttributeSet(output)
 
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
   override def requiredChildDistribution: Seq[Distribution] = {
     if (groupingAttributes.isEmpty) {
       AllTuples :: Nil
@@ -75,88 +75,23 @@ case class FlatMapGroupsInPandasExec(
   override protected def doExecute(): RDD[InternalRow] = {
     val inputRDD = child.execute()
 
-    val chainedFunc = Seq(ChainedPythonFunctions(Seq(pandasFunction)))
-    val sessionLocalTimeZone = conf.sessionLocalTimeZone
-    val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
-
-    // Deduplicate the grouping attributes.
-    // If a grouping attribute also appears in data attributes, then we don't need to send the
-    // grouping attribute to Python worker. If a grouping attribute is not in data attributes,
-    // then we need to send this grouping attribute to python worker.
-    //
-    // We use argOffsets to distinguish grouping attributes and data attributes as following:
-    //
-    // argOffsets[0] is the length of grouping attributes
-    // argOffsets[1 .. argOffsets[0]+1] is the arg offsets for grouping attributes
-    // argOffsets[argOffsets[0]+1 .. ] is the arg offsets for data attributes
-
-    val dataAttributes = child.output.drop(groupingAttributes.length)
-    val groupingIndicesInData = groupingAttributes.map { attribute =>
-      dataAttributes.indexWhere(attribute.semanticEquals)
-    }
-
-    val groupingArgOffsets = new ArrayBuffer[Int]
-    val nonDupGroupingAttributes = new ArrayBuffer[Attribute]
-    val nonDupGroupingSize = groupingIndicesInData.count(_ == -1)
-
-    // Non duplicate grouping attributes are added to nonDupGroupingAttributes and
-    // their offsets are 0, 1, 2 ...
-    // Duplicate grouping attributes are NOT added to nonDupGroupingAttributes and
-    // their offsets are n + index, where n is the total number of non duplicate grouping
-    // attributes and index is the index in the data attributes that the grouping attribute
-    // is a duplicate of.
-
-    groupingAttributes.zip(groupingIndicesInData).foreach {
-      case (attribute, index) =>
-        if (index == -1) {
-          groupingArgOffsets += nonDupGroupingAttributes.length
-          nonDupGroupingAttributes += attribute
-        } else {
-          groupingArgOffsets += index + nonDupGroupingSize
-        }
-    }
-
-    val dataArgOffsets = nonDupGroupingAttributes.length until
-      (nonDupGroupingAttributes.length + dataAttributes.length)
-
-    val argOffsets = Array(Array(groupingAttributes.length) ++ groupingArgOffsets ++ dataArgOffsets)
-
-    // Attributes after deduplication
-    val dedupAttributes = nonDupGroupingAttributes ++ dataAttributes
-    val dedupSchema = StructType.fromAttributes(dedupAttributes)
+    val (dedupAttributes, argOffsets) = resolveArgOffsets(child, groupingAttributes)
 
     // Map grouped rows to ArrowPythonRunner results, Only execute if partition is not empty
     inputRDD.mapPartitionsInternal { iter => if (iter.isEmpty) iter else {
-      val grouped = if (groupingAttributes.isEmpty) {
-        Iterator(iter)
-      } else {
-        val groupedIter = GroupedIterator(iter, groupingAttributes, child.output)
-        val dedupProj = UnsafeProjection.create(dedupAttributes, child.output)
-        groupedIter.map {
-          case (_, groupedRowIter) => groupedRowIter.map(dedupProj)
-        }
-      }
 
-      val context = TaskContext.get()
+      val data = groupAndProject(iter, groupingAttributes, child.output, dedupAttributes)
+        .map { case (_, x) => x }
 
-      val columnarBatchIter = new ArrowPythonRunner(
+      val runner = new ArrowPythonRunner(
         chainedFunc,
         PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
-        argOffsets,
-        dedupSchema,
+        Array(argOffsets),
+        StructType.fromAttributes(dedupAttributes),
         sessionLocalTimeZone,
-        pythonRunnerConf).compute(grouped, context.partitionId(), context)
-
-      val unsafeProj = UnsafeProjection.create(output, output)
+        pythonRunnerConf)
 
-      columnarBatchIter.flatMap { batch =>
-        // Grouped Map UDF returns a StructType column in ColumnarBatch, select the children here
-        val structVector = batch.column(0).asInstanceOf[ArrowColumnVector]
-        val outputVectors = output.indices.map(structVector.getChild)
-        val flattenedBatch = new ColumnarBatch(outputVectors.toArray)
-        flattenedBatch.setNumRows(batch.numRows())
-        flattenedBatch.rowIterator.asScala
-      }.map(unsafeProj)
+      executePython(data, output, runner)
     }}
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala
new file mode 100644
index 0000000000000..68ce991a8ae7f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.TaskContext
+import org.apache.spark.api.python.BasePythonRunner
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
+import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan}
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch}
+
+/**
+ * Base functionality for plans which execute grouped python udfs.
+ */
+private[python] object PandasGroupUtils {
+  /**
+   * passes the data to the python runner and coverts the resulting
+   * columnarbatch into internal rows.
+   */
+  def executePython[T](
+      data: Iterator[T],
+      output: Seq[Attribute],
+      runner: BasePythonRunner[T, ColumnarBatch]): Iterator[InternalRow] = {
+
+    val context = TaskContext.get()
+    val columnarBatchIter = runner.compute(data, context.partitionId(), context)
+    val unsafeProj = UnsafeProjection.create(output, output)
+
+    columnarBatchIter.flatMap { batch =>
+      //  UDF returns a StructType column in ColumnarBatch, select the children here
+      val structVector = batch.column(0).asInstanceOf[ArrowColumnVector]
+      val outputVectors = output.indices.map(structVector.getChild)
+      val flattenedBatch = new ColumnarBatch(outputVectors.toArray)
+      flattenedBatch.setNumRows(batch.numRows())
+      flattenedBatch.rowIterator.asScala
+    }.map(unsafeProj)
+  }
+
+  /**
+   * groups according to grouping attributes and then projects into the deduplicated schema
+   */
+  def groupAndProject(
+      input: Iterator[InternalRow],
+      groupingAttributes: Seq[Attribute],
+      inputSchema: Seq[Attribute],
+      dedupSchema: Seq[Attribute]): Iterator[(InternalRow, Iterator[InternalRow])] = {
+    val groupedIter = GroupedIterator(input, groupingAttributes, inputSchema)
+    val dedupProj = UnsafeProjection.create(dedupSchema, inputSchema)
+    groupedIter.map {
+      case (k, groupedRowIter) => (k, groupedRowIter.map(dedupProj))
+    }
+  }
+
+  /**
+   * Returns a the deduplicated attributes of the spark plan and the arg offsets of the
+   * keys and values.
+   *
+   * The deduplicated attributes are needed because the spark plan may contain an attribute
+   * twice; once in the key and once in the value.  For any such attribute we need to
+   * deduplicate.
+   *
+   * The arg offsets are used to distinguish grouping grouping attributes and data attributes
+   * as following:
+   *
+   * argOffsets[0] is the length of the argOffsets array
+   *
+   * argOffsets[1] is the length of grouping attribute
+   * argOffsets[2 .. argOffsets[0]+2] is the arg offsets for grouping attributes
+   *
+   * argOffsets[argOffsets[0]+2 .. ] is the arg offsets for data attributes
+   */
+  def resolveArgOffsets(
+    child: SparkPlan, groupingAttributes: Seq[Attribute]): (Seq[Attribute], Array[Int]) = {
+
+    val dataAttributes = child.output.drop(groupingAttributes.length)
+    val groupingIndicesInData = groupingAttributes.map { attribute =>
+      dataAttributes.indexWhere(attribute.semanticEquals)
+    }
+
+    val groupingArgOffsets = new ArrayBuffer[Int]
+    val nonDupGroupingAttributes = new ArrayBuffer[Attribute]
+    val nonDupGroupingSize = groupingIndicesInData.count(_ == -1)
+
+    groupingAttributes.zip(groupingIndicesInData).foreach {
+      case (attribute, index) =>
+        if (index == -1) {
+          groupingArgOffsets += nonDupGroupingAttributes.length
+          nonDupGroupingAttributes += attribute
+        } else {
+          groupingArgOffsets += index + nonDupGroupingSize
+        }
+    }
+
+    val dataArgOffsets = nonDupGroupingAttributes.length until
+      (nonDupGroupingAttributes.length + dataAttributes.length)
+
+    val argOffsetsLength = groupingAttributes.length + dataArgOffsets.length + 1
+    val argOffsets = Array(argOffsetsLength,
+          groupingAttributes.length) ++ groupingArgOffsets ++ dataArgOffsets
+
+    // Attributes after deduplication
+    val dedupAttributes = nonDupGroupingAttributes ++ dataAttributes
+    (dedupAttributes, argOffsets)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
new file mode 100644
index 0000000000000..bb353062384a0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.python
+
+import java.io.DataInputStream
+import java.net.Socket
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.collection.JavaConverters._
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamReader
+
+import org.apache.spark.{SparkEnv, TaskContext}
+import org.apache.spark.api.python.{BasePythonRunner, SpecialLengths}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
+
+/**
+ * A trait that can be mixed-in with [[BasePythonRunner]]. It implements the logic from
+ * Python (Arrow) to JVM (ColumnarBatch).
+ */
+private[python] trait PythonArrowOutput { self: BasePythonRunner[_, ColumnarBatch] =>
+
+  protected def newReaderIterator(
+      stream: DataInputStream,
+      writerThread: WriterThread,
+      startTime: Long,
+      env: SparkEnv,
+      worker: Socket,
+      releasedOrClosed: AtomicBoolean,
+      context: TaskContext): Iterator[ColumnarBatch] = {
+
+    new ReaderIterator(stream, writerThread, startTime, env, worker, releasedOrClosed, context) {
+
+      private val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+        s"stdin reader for $pythonExec", 0, Long.MaxValue)
+
+      private var reader: ArrowStreamReader = _
+      private var root: VectorSchemaRoot = _
+      private var schema: StructType = _
+      private var vectors: Array[ColumnVector] = _
+
+      context.addTaskCompletionListener[Unit] { _ =>
+        if (reader != null) {
+          reader.close(false)
+        }
+        allocator.close()
+      }
+
+      private var batchLoaded = true
+
+      protected override def read(): ColumnarBatch = {
+        if (writerThread.exception.isDefined) {
+          throw writerThread.exception.get
+        }
+        try {
+          if (reader != null && batchLoaded) {
+            batchLoaded = reader.loadNextBatch()
+            if (batchLoaded) {
+              val batch = new ColumnarBatch(vectors)
+              batch.setNumRows(root.getRowCount)
+              batch
+            } else {
+              reader.close(false)
+              allocator.close()
+              // Reach end of stream. Call `read()` again to read control data.
+              read()
+            }
+          } else {
+            stream.readInt() match {
+              case SpecialLengths.START_ARROW_STREAM =>
+                reader = new ArrowStreamReader(stream, allocator)
+                root = reader.getVectorSchemaRoot()
+                schema = ArrowUtils.fromArrowSchema(root.getSchema())
+                vectors = root.getFieldVectors().asScala.map { vector =>
+                  new ArrowColumnVector(vector)
+                }.toArray[ColumnVector]
+                read()
+              case SpecialLengths.TIMING_DATA =>
+                handleTimingData()
+                read()
+              case SpecialLengths.PYTHON_EXCEPTION_THROWN =>
+                throw handlePythonException()
+              case SpecialLengths.END_OF_DATA_SECTION =>
+                handleEndOfDataSection()
+                null
+            }
+          }
+        } catch handleException
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonForeachWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonForeachWriter.scala
index a4e9b3305052f..2a799bab1eb81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonForeachWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonForeachWriter.scala
@@ -78,7 +78,7 @@ object PythonForeachWriter {
    *
    * Internally, it uses a [[HybridRowQueue]] to buffer the rows in a practically unlimited queue
    * across memory and local disk. However, HybridRowQueue is designed to be used only with
-   * EvalPythonExec where the reader is always behind the the writer, that is, the reader does not
+   * EvalPythonExec where the reader is always behind the writer, that is, the reader does not
    * try to read n+1 rows if the writer has only written n rows at any point of time. This
    * assumption is not true for PythonForeachWriter where rows may be added at a different rate as
    * they are consumed by the python worker. Hence, to maintain the invariant of the reader being
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
index 752d271c4cc35..0a250b27ccb94 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
@@ -73,7 +73,7 @@ class PythonUDFRunner(
               val obj = new Array[Byte](length)
               stream.readFully(obj)
               obj
-            case 0 => Array.empty[Byte]
+            case 0 => Array.emptyByteArray
             case SpecialLengths.TIMING_DATA =>
               handleTimingData()
               read()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
index cad89dedb8b07..f54c4b8f22066 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
@@ -304,7 +304,7 @@ case class WindowInPandasExec(
         var nextRow: UnsafeRow = null
         var nextGroup: UnsafeRow = null
         var nextRowAvailable: Boolean = false
-        private[this] def fetchNextRow() {
+        private[this] def fetchNextRow(): Unit = {
           nextRowAvailable = stream.hasNext
           if (nextRowAvailable) {
             nextRow = stream.next().asInstanceOf[UnsafeRow]
@@ -325,7 +325,7 @@ case class WindowInPandasExec(
 
         val frames = factories.map(_(indexRow))
 
-        private[this] def fetchNextPartition() {
+        private[this] def fetchNextPartition(): Unit = {
           // Collect all the rows in the current partition.
           // Before we start to fetch new input rows, make a copy of nextGroup.
           val currentGroup = nextGroup.copy()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
index 0fe2b628fa38b..59f5a7078a151 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
@@ -191,11 +191,7 @@ class ArrowRRunner(
               null
           }
         }
-      } catch {
-        case eof: EOFException =>
-          throw new SparkException(
-            "R worker exited unexpectedly (crashed)\n " + errThread.getLines(), eof)
-      }
+      } catch handleException
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index a6c9c2972df6c..fffd8805a6525 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, Expression, GenericInternalRow, GetArrayItem, Literal}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.catalyst.util.QuantileSummaries
+import org.apache.spark.sql.catalyst.util.{GenericArrayData, QuantileSummaries}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -248,7 +248,9 @@ object StatFunctions extends Logging {
         percentileIndex += 1
         (child: Expression) =>
           GetArrayItem(
-            new ApproximatePercentile(child, Literal.create(percentiles)).toAggregateExpression(),
+            new ApproximatePercentile(child,
+              Literal(new GenericArrayData(percentiles), ArrayType(DoubleType, false)))
+              .toAggregateExpression(),
             Literal(index))
       } else {
         stats.toLowerCase(Locale.ROOT) match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ContinuousRecordEndpoint.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ContinuousRecordEndpoint.scala
index c9c2ebc875f28..985a5fa6063ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ContinuousRecordEndpoint.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ContinuousRecordEndpoint.scala
@@ -18,8 +18,8 @@ package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.reader.streaming.PartitionOffset
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.connector.read.streaming.PartitionOffset
 
 case class ContinuousRecordPartitionOffset(partitionId: Int, offset: Int) extends PartitionOffset
 case class GetRecord(offset: ContinuousRecordPartitionOffset)
@@ -33,7 +33,7 @@ case class GetRecord(offset: ContinuousRecordPartitionOffset)
  *                to the number of partitions.
  * @param lock a lock object for locking the buckets for read
  */
-class ContinuousRecordEndpoint(buckets: Seq[Seq[Any]], lock: Object)
+class ContinuousRecordEndpoint(buckets: Seq[Seq[UnsafeRow]], lock: Object)
   extends ThreadSafeRpcEndpoint {
 
   private var startOffsets: Seq[Int] = List.fill(buckets.size)(0)
@@ -63,7 +63,7 @@ class ContinuousRecordEndpoint(buckets: Seq[Seq[Any]], lock: Object)
         val buf = buckets(partitionId)
         val record = if (buf.size <= bufOffset) None else Some(buf(bufOffset))
 
-        context.reply(record.map(InternalRow(_)))
+        context.reply(record)
       }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index 6d1131e6939db..eac5246904ffd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
-import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_MILLIS
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.unsafe.types.CalendarInterval
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
index 1d57cb084df9e..712ed1585bc8a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.util.Locale
+
 import scala.util.Try
 
 import org.apache.spark.internal.Logging
@@ -74,6 +76,30 @@ class FileStreamOptions(parameters: CaseInsensitiveMap[String]) extends Logging
    */
   val fileNameOnly: Boolean = withBooleanParameter("fileNameOnly", false)
 
+  /**
+   * The archive directory to move completed files. The option will be only effective when
+   * "cleanSource" is set to "archive".
+   *
+   * Note that the completed file will be moved to this archive directory with respecting to
+   * its own path.
+   *
+   * For example, if the path of source file is "/a/b/dataset.txt", and the path of archive
+   * directory is "/archived/here", file will be moved to "/archived/here/a/b/dataset.txt".
+   */
+  val sourceArchiveDir: Option[String] = parameters.get("sourceArchiveDir")
+
+  /**
+   * Defines how to clean up completed files. Available options are "archive", "delete", "off".
+   */
+  val cleanSource: CleanSourceMode.Value = {
+    val matchedMode = CleanSourceMode.fromString(parameters.get("cleanSource"))
+    if (matchedMode == CleanSourceMode.ARCHIVE && sourceArchiveDir.isEmpty) {
+      throw new IllegalArgumentException("Archive mode must be used with 'sourceArchiveDir' " +
+        "option.")
+    }
+    matchedMode
+  }
+
   private def withBooleanParameter(name: String, default: Boolean) = {
     parameters.get(name).map { str =>
       try {
@@ -86,3 +112,14 @@ class FileStreamOptions(parameters: CaseInsensitiveMap[String]) extends Logging
     }.getOrElse(default)
   }
 }
+
+object CleanSourceMode extends Enumeration {
+  val ARCHIVE, DELETE, OFF = Value
+
+  def fromString(value: Option[String]): CleanSourceMode.Value = value.map { v =>
+    CleanSourceMode.values.find(_.toString == v.toUpperCase(Locale.ROOT))
+      .getOrElse(throw new IllegalArgumentException(
+        s"Invalid mode for clean source option $value." +
+        s" Must be one of ${CleanSourceMode.values.mkString(",")}"))
+  }.getOrElse(OFF)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 67e26dc1a2dbc..e8ce8e1487093 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -18,16 +18,24 @@
 package org.apache.spark.sql.execution.streaming
 
 import java.net.URI
+import java.util.concurrent.ThreadPoolExecutor
 import java.util.concurrent.TimeUnit._
 
-import org.apache.hadoop.fs.{FileStatus, Path}
+import scala.util.control.NonFatal
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, FileSystem, GlobFilter, Path}
 
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.read.streaming
+import org.apache.spark.sql.connector.read.streaming.{ReadAllAvailable, ReadLimit, ReadMaxFiles, SupportsAdmissionControl}
 import org.apache.spark.sql.execution.datasources.{DataSource, InMemoryFileIndex, LogicalRelation}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.ThreadUtils
 
 /**
  * A very simple source that reads files from the given directory as they appear.
@@ -39,7 +47,7 @@ class FileStreamSource(
     override val schema: StructType,
     partitionColumns: Seq[String],
     metadataPath: String,
-    options: Map[String, String]) extends Source with Logging {
+    options: Map[String, String]) extends SupportsAdmissionControl with Source with Logging {
 
   import FileStreamSource._
 
@@ -53,6 +61,9 @@ class FileStreamSource(
     fs.makeQualified(new Path(path))  // can contain glob patterns
   }
 
+  private val sourceCleaner: Option[FileStreamSourceCleaner] = FileStreamSourceCleaner(
+    fs, qualifiedBasePath, sourceOptions, hadoopConf)
+
   private val optionsWithPartitionBasePath = sourceOptions.optionMapWithoutPath ++ {
     if (!SparkHadoopUtil.get.isGlobPath(new Path(path)) && options.contains("path")) {
       Map("basePath" -> path)
@@ -106,15 +117,17 @@ class FileStreamSource(
    * `synchronized` on this method is for solving race conditions in tests. In the normal usage,
    * there is no race here, so the cost of `synchronized` should be rare.
    */
-  private def fetchMaxOffset(): FileStreamSourceOffset = synchronized {
+  private def fetchMaxOffset(limit: ReadLimit): FileStreamSourceOffset = synchronized {
     // All the new files found - ignore aged files and files that we have seen.
     val newFiles = fetchAllFiles().filter {
       case (path, timestamp) => seenFiles.isNewFile(path, timestamp)
     }
 
     // Obey user's setting to limit the number of files in this batch trigger.
-    val batchFiles =
-      if (maxFilesPerBatch.nonEmpty) newFiles.take(maxFilesPerBatch.get) else newFiles
+    val batchFiles = limit match {
+      case files: ReadMaxFiles => newFiles.take(files.maxFiles())
+      case _: ReadAllAvailable => newFiles
+    }
 
     batchFiles.foreach { file =>
       seenFiles.add(file._1, file._2)
@@ -141,6 +154,10 @@ class FileStreamSource(
     FileStreamSourceOffset(metadataLogCurrentOffset)
   }
 
+  override def getDefaultReadLimit: ReadLimit = {
+    maxFilesPerBatch.map(ReadLimit.maxFiles).getOrElse(super.getDefaultReadLimit)
+  }
+
   /**
    * For test only. Run `func` with the internal lock to make sure when `func` is running,
    * the current offset won't be changed and no new batch will be emitted.
@@ -200,6 +217,17 @@ class FileStreamSource(
       CaseInsensitiveMap(options), None).allFiles()
   }
 
+  private def setSourceHasMetadata(newValue: Option[Boolean]): Unit = newValue match {
+    case Some(true) =>
+      if (sourceCleaner.isDefined) {
+        throw new UnsupportedOperationException("Clean up source files is not supported when" +
+          " reading from the output directory of FileStreamSink.")
+      }
+      sourceHasMetadata = Some(true)
+    case _ =>
+      sourceHasMetadata = newValue
+  }
+
   /**
    * Returns a list of files found, sorted by their timestamp.
    */
@@ -210,7 +238,7 @@ class FileStreamSource(
     sourceHasMetadata match {
       case None =>
         if (FileStreamSink.hasMetadata(Seq(path), hadoopConf, sparkSession.sessionState.conf)) {
-          sourceHasMetadata = Some(true)
+          setSourceHasMetadata(Some(true))
           allFiles = allFilesUsingMetadataLogFileIndex()
         } else {
           allFiles = allFilesUsingInMemoryFileIndex()
@@ -222,10 +250,10 @@ class FileStreamSource(
             // metadata log and data files are only generated after the previous
             // `FileStreamSink.hasMetadata` check
             if (FileStreamSink.hasMetadata(Seq(path), hadoopConf, sparkSession.sessionState.conf)) {
-              sourceHasMetadata = Some(true)
+              setSourceHasMetadata(Some(true))
               allFiles = allFilesUsingMetadataLogFileIndex()
             } else {
-              sourceHasMetadata = Some(false)
+              setSourceHasMetadata(Some(false))
               // `allFiles` have already been fetched using InMemoryFileIndex in this round
             }
           }
@@ -249,7 +277,14 @@ class FileStreamSource(
     files
   }
 
-  override def getOffset: Option[Offset] = Some(fetchMaxOffset()).filterNot(_.logOffset == -1)
+  override def getOffset: Option[Offset] = {
+    throw new UnsupportedOperationException(
+      "latestOffset(Offset, ReadLimit) should be called instead of this method")
+  }
+
+  override def latestOffset(startOffset: streaming.Offset, limit: ReadLimit): streaming.Offset = {
+    Some(fetchMaxOffset(limit)).filterNot(_.logOffset == -1).orNull
+  }
 
   override def toString: String = s"FileStreamSource[$qualifiedBasePath]"
 
@@ -258,16 +293,21 @@ class FileStreamSource(
    * equal to `end` and will only request offsets greater than `end` in the future.
    */
   override def commit(end: Offset): Unit = {
-    // No-op for now; FileStreamSource currently garbage-collects files based on timestamp
-    // and the value of the maxFileAge parameter.
+    val logOffset = FileStreamSourceOffset(end).logOffset
+
+    sourceCleaner.foreach { cleaner =>
+      val files = metadataLog.get(Some(logOffset), Some(logOffset)).flatMap(_._2)
+      val validFileEntities = files.filter(_.batchId == logOffset)
+      logDebug(s"completed file entries: ${validFileEntities.mkString(",")}")
+      validFileEntities.foreach(cleaner.clean)
+    }
   }
 
-  override def stop() {}
+  override def stop(): Unit = sourceCleaner.foreach(_.stop())
 }
 
 
 object FileStreamSource {
-
   /** Timestamp for file modification time, in ms since January 1, 1970 UTC. */
   type Timestamp = Long
 
@@ -330,4 +370,166 @@ object FileStreamSource {
 
     def size: Int = map.size()
   }
+
+  private[sql] abstract class FileStreamSourceCleaner extends Logging {
+    private val cleanThreadPool: Option[ThreadPoolExecutor] = {
+      val numThreads = SQLConf.get.getConf(SQLConf.FILE_SOURCE_CLEANER_NUM_THREADS)
+      if (numThreads > 0) {
+        logDebug(s"Cleaning file source on $numThreads separate thread(s)")
+        Some(ThreadUtils.newDaemonCachedThreadPool("file-source-cleaner-threadpool", numThreads))
+      } else {
+        logDebug("Cleaning file source on main thread")
+        None
+      }
+    }
+
+    def stop(): Unit = cleanThreadPool.foreach(ThreadUtils.shutdown(_))
+
+    def clean(entry: FileEntry): Unit = {
+      cleanThreadPool match {
+        case Some(p) =>
+          p.submit(new Runnable {
+            override def run(): Unit = {
+              cleanTask(entry)
+            }
+          })
+
+        case None =>
+          cleanTask(entry)
+      }
+    }
+
+    protected def cleanTask(entry: FileEntry): Unit
+  }
+
+  private[sql] object FileStreamSourceCleaner {
+    def apply(
+        fileSystem: FileSystem,
+        sourcePath: Path,
+        option: FileStreamOptions,
+        hadoopConf: Configuration): Option[FileStreamSourceCleaner] = option.cleanSource match {
+      case CleanSourceMode.ARCHIVE =>
+        require(option.sourceArchiveDir.isDefined)
+        val path = new Path(option.sourceArchiveDir.get)
+        val archiveFs = path.getFileSystem(hadoopConf)
+        val qualifiedArchivePath = archiveFs.makeQualified(path)
+        Some(new SourceFileArchiver(fileSystem, sourcePath, archiveFs, qualifiedArchivePath))
+
+      case CleanSourceMode.DELETE =>
+        Some(new SourceFileRemover(fileSystem))
+
+      case _ => None
+    }
+  }
+
+  private[sql] class SourceFileArchiver(
+      fileSystem: FileSystem,
+      sourcePath: Path,
+      baseArchiveFileSystem: FileSystem,
+      baseArchivePath: Path) extends FileStreamSourceCleaner with Logging {
+    assertParameters()
+
+    private def assertParameters(): Unit = {
+      require(fileSystem.getUri == baseArchiveFileSystem.getUri, "Base archive path is located " +
+        s"on a different file system than the source files. source path: $sourcePath" +
+        s" / base archive path: $baseArchivePath")
+
+      require(!isBaseArchivePathMatchedAgainstSourcePattern, "Base archive path cannot be set to" +
+        " the path where archived path can possibly match with source pattern. Ensure the base " +
+        "archive path doesn't match with source pattern in depth, where the depth is minimum of" +
+        " depth on both paths.")
+    }
+
+    private def getAncestorEnsuringDepth(path: Path, depth: Int): Path = {
+      var newPath = path
+      while (newPath.depth() > depth) {
+        newPath = newPath.getParent
+      }
+      newPath
+    }
+
+    private def isBaseArchivePathMatchedAgainstSourcePattern: Boolean = {
+      // We should disallow end users to set base archive path which path matches against source
+      // pattern to avoid checking each source file. There're couple of cases which allow
+      // FileStreamSource to read any depth of subdirectory under the source pattern, so we should
+      // consider all three cases 1) both has same depth 2) base archive path is longer than source
+      // pattern 3) source pattern is longer than base archive path. To handle all cases, we take
+      // min of depth for both paths, and check the match.
+
+      val minDepth = math.min(sourcePath.depth(), baseArchivePath.depth())
+
+      val sourcePathMinDepth = getAncestorEnsuringDepth(sourcePath, minDepth)
+      val baseArchivePathMinDepth = getAncestorEnsuringDepth(baseArchivePath, minDepth)
+
+      val sourceGlobFilters: Seq[GlobFilter] = buildSourceGlobFilters(sourcePathMinDepth)
+
+      var matched = true
+
+      // pathToCompare should have same depth as sourceGlobFilters.length
+      var pathToCompare = baseArchivePathMinDepth
+      var index = 0
+      do {
+        // GlobFilter only matches against its name, not full path so it's safe to compare
+        if (!sourceGlobFilters(index).accept(pathToCompare)) {
+          matched = false
+        } else {
+          pathToCompare = pathToCompare.getParent
+          index += 1
+        }
+      } while (matched && !pathToCompare.isRoot)
+
+      matched
+    }
+
+    private def buildSourceGlobFilters(sourcePath: Path): Seq[GlobFilter] = {
+      val filters = new scala.collection.mutable.MutableList[GlobFilter]()
+
+      var currentPath = sourcePath
+      while (!currentPath.isRoot) {
+        filters += new GlobFilter(currentPath.getName)
+        currentPath = currentPath.getParent
+      }
+
+      filters.toList
+    }
+
+    override protected def cleanTask(entry: FileEntry): Unit = {
+      val curPath = new Path(new URI(entry.path))
+      val newPath = new Path(baseArchivePath.toString.stripSuffix("/") + curPath.toUri.getPath)
+
+      try {
+        logDebug(s"Creating directory if it doesn't exist ${newPath.getParent}")
+        if (!fileSystem.exists(newPath.getParent)) {
+          fileSystem.mkdirs(newPath.getParent)
+        }
+
+        logDebug(s"Archiving completed file $curPath to $newPath")
+        if (!fileSystem.rename(curPath, newPath)) {
+          logWarning(s"Fail to move $curPath to $newPath / skip moving file.")
+        }
+      } catch {
+        case NonFatal(e) =>
+          logWarning(s"Fail to move $curPath to $newPath / skip moving file.", e)
+      }
+    }
+  }
+
+  private[sql] class SourceFileRemover(fileSystem: FileSystem)
+    extends FileStreamSourceCleaner with Logging {
+
+    override protected def cleanTask(entry: FileEntry): Unit = {
+      val curPath = new Path(new URI(entry.path))
+      try {
+        logDebug(s"Removing completed file $curPath")
+
+        if (!fileSystem.delete(curPath, false)) {
+          logWarning(s"Failed to remove $curPath / skip removing file.")
+        }
+      } catch {
+        case NonFatal(e) =>
+          // Log to error but swallow exception to avoid process being stopped
+          logWarning(s"Fail to remove $curPath / skip removing file.", e)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
index dda9d41f630e6..59ce7c3707b27 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
@@ -21,9 +21,10 @@ import java.sql.Date
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeTimeout, ProcessingTimeTimeout}
+import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.execution.streaming.GroupStateImpl._
 import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout}
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.UTF8String
 
 
 /**
@@ -159,13 +160,12 @@ private[sql] class GroupStateImpl[S] private(
   def getTimeoutTimestamp: Long = timeoutTimestamp
 
   private def parseDuration(duration: String): Long = {
-    val cal = CalendarInterval.fromCaseInsensitiveString(duration)
-    if (cal.milliseconds < 0 || cal.months < 0) {
-      throw new IllegalArgumentException(s"Provided duration ($duration) is not positive")
+    val cal = IntervalUtils.stringToInterval(UTF8String.fromString(duration))
+    if (IntervalUtils.isNegative(cal)) {
+      throw new IllegalArgumentException(s"Provided duration ($duration) is negative")
     }
 
-    val millisPerMonth = TimeUnit.MICROSECONDS.toMillis(CalendarInterval.MICROS_PER_DAY) * 31
-    cal.milliseconds + cal.months * millisPerMonth
+    IntervalUtils.getDuration(cal, TimeUnit.MILLISECONDS)
   }
 
   private def checkTimeoutTimestampAllowed(): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 5c9249fb16343..ed0c44da08c5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -90,13 +90,21 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     }
   }
 
+  /**
+   * Serialize the metadata and write to the output stream. If this method is overridden in a
+   * subclass, the overriding method should not close the given output stream, as it will be closed
+   * in the caller.
+   */
   protected def serialize(metadata: T, out: OutputStream): Unit = {
-    // called inside a try-finally where the underlying stream is closed in the caller
     Serialization.write(metadata, out)
   }
 
+  /**
+   * Read and deserialize the metadata from input stream. If this method is overridden in a
+   * subclass, the overriding method should not close the given input stream, as it will be closed
+   * in the caller.
+   */
   protected def deserialize(in: InputStream): T = {
-    // called inside a try-finally where the underlying stream is closed in the caller
     val reader = new InputStreamReader(in, StandardCharsets.UTF_8)
     Serialization.read[T](reader)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index af52af0d1d7e6..09ae7692ec518 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.{CurrentBatchTimestamp, Express
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, HashPartitioning, SinglePartition}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.{QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
+import org.apache.spark.sql.execution.{LeafExecNode, LocalLimitExec, QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
@@ -50,7 +50,7 @@ class IncrementalExecution(
 
   // Modified planner with stateful operations.
   override val planner: SparkPlanner = new SparkPlanner(
-      sparkSession.sparkContext,
+      sparkSession,
       sparkSession.sessionState.conf,
       sparkSession.sessionState.experimentalMethods) {
     override def strategies: Seq[Strategy] =
@@ -77,7 +77,8 @@ class IncrementalExecution(
    */
   override
   lazy val optimizedPlan: LogicalPlan = tracker.measurePhase(QueryPlanningTracker.OPTIMIZATION) {
-    sparkSession.sessionState.optimizer.execute(withCachedData) transformAllExpressions {
+    sparkSession.sessionState.optimizer.executeAndTrack(withCachedData,
+      tracker) transformAllExpressions {
       case ts @ CurrentBatchTimestamp(timestamp, _, _) =>
         logInfo(s"Current batch timestamp = $timestamp")
         ts.toLiteral
@@ -104,6 +105,32 @@ class IncrementalExecution(
   /** Locates save/restore pairs surrounding aggregation. */
   val state = new Rule[SparkPlan] {
 
+    /**
+     * Ensures that this plan DOES NOT have any stateful operation in it whose pipelined execution
+     * depends on this plan. In other words, this function returns true if this plan does
+     * have a narrow dependency on a stateful subplan.
+     */
+    private def hasNoStatefulOp(plan: SparkPlan): Boolean = {
+      var statefulOpFound = false
+
+      def findStatefulOp(planToCheck: SparkPlan): Unit = {
+        planToCheck match {
+          case s: StatefulOperator =>
+            statefulOpFound = true
+
+          case e: ShuffleExchangeExec =>
+            // Don't search recursively any further as any child stateful operator as we
+            // are only looking for stateful subplans that this plan has narrow dependencies on.
+
+          case p: SparkPlan =>
+            p.children.foreach(findStatefulOp)
+        }
+      }
+
+      findStatefulOp(plan)
+      !statefulOpFound
+    }
+
     override def apply(plan: SparkPlan): SparkPlan = plan transform {
       case StateStoreSaveExec(keys, None, None, None, stateFormatVersion,
              UnaryExecNode(agg,
@@ -148,6 +175,12 @@ class IncrementalExecution(
         l.copy(
           stateInfo = Some(nextStatefulOperationStateInfo),
           outputMode = Some(outputMode))
+
+      case StreamingLocalLimitExec(limit, child) if hasNoStatefulOp(child) =>
+        // Optimize limit execution by replacing StreamingLocalLimitExec (consumes the iterator
+        // completely) to LocalLimitExec (does not consume the iterator) when the child plan has
+        // no stateful operator (i.e., consuming the iterator is not needed).
+        LocalLimitExec(limit, child)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
index 916bd2ddbc818..f6cc8116c6c4c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.io.IOException
 import java.util.UUID
 
 import scala.collection.mutable.ArrayBuffer
@@ -43,6 +44,8 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
   @transient private var fileLog: FileStreamSinkLog = _
   private var batchId: Long = _
 
+  @transient private var pendingCommitFiles: ArrayBuffer[Path] = _
+
   /**
    * Sets up the manifest log output and the batch id for this job.
    * Must be called before any other function.
@@ -54,13 +57,21 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
 
   override def setupJob(jobContext: JobContext): Unit = {
     require(fileLog != null, "setupManifestOptions must be called before this function")
-    // Do nothing
+    pendingCommitFiles = new ArrayBuffer[Path]
   }
 
   override def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit = {
     require(fileLog != null, "setupManifestOptions must be called before this function")
     val fileStatuses = taskCommits.flatMap(_.obj.asInstanceOf[Seq[SinkFileStatus]]).toArray
 
+    // We shouldn't remove the files if they're written to the metadata:
+    // `fileLog.add(batchId, fileStatuses)` could fail AFTER writing files to the metadata
+    // as well as there could be race
+    // so for the safety we clean up the list before calling anything incurs exception.
+    // The case is uncommon and we do best effort instead of guarantee, so the simplicity of
+    // logic here would be OK, and safe for dealing with unexpected situations.
+    pendingCommitFiles.clear()
+
     if (fileLog.add(batchId, fileStatuses)) {
       logInfo(s"Committed batch $batchId")
     } else {
@@ -70,7 +81,29 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
 
   override def abortJob(jobContext: JobContext): Unit = {
     require(fileLog != null, "setupManifestOptions must be called before this function")
-    // Do nothing
+    // Best effort cleanup of complete files from failed job.
+    // Since the file has UUID in its filename, we are safe to try deleting them
+    // as the file will not conflict with file with another attempt on the same task.
+    if (pendingCommitFiles.nonEmpty) {
+      pendingCommitFiles.foreach { path =>
+        try {
+          val fs = path.getFileSystem(jobContext.getConfiguration)
+          // this is to make sure the file can be seen from driver as well
+          if (fs.exists(path)) {
+            fs.delete(path, false)
+          }
+        } catch {
+          case e: IOException =>
+            logWarning(s"Fail to remove temporary file $path, continue removing next.", e)
+        }
+      }
+      pendingCommitFiles.clear()
+    }
+  }
+
+  override def onTaskCommit(taskCommit: TaskCommitMessage): Unit = {
+    pendingCommitFiles ++= taskCommit.obj.asInstanceOf[Seq[SinkFileStatus]]
+      .map(_.toFileStatus.getPath)
   }
 
   override def setupTask(taskContext: TaskAttemptContext): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index e7eb2cb558cdb..45a2ce16183a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -24,12 +24,12 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset => OffsetV2, ReadLimit, SparkDataStream, SupportsAdmissionControl}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2Exec}
-import org.apache.spark.sql.execution.streaming.sources.{RateControlMicroBatchStream, WriteToMicroBatchDataSource}
+import org.apache.spark.sql.execution.streaming.sources.WriteToMicroBatchDataSource
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.streaming.{OutputMode, Trigger}
 import org.apache.spark.util.Clock
 
@@ -79,7 +79,7 @@ class MicroBatchExecution(
 
     import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
     val _logicalPlan = analyzedPlan.transform {
-      case streamingRelation@StreamingRelation(dataSourceV1, sourceName, output) =>
+      case streamingRelation @ StreamingRelation(dataSourceV1, sourceName, output) =>
         toExecutionRelationMap.getOrElseUpdate(streamingRelation, {
           // Materialize source to avoid creating it in every batch
           val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
@@ -122,7 +122,18 @@ class MicroBatchExecution(
       // v2 source
       case r: StreamingDataSourceV2Relation => r.stream
     }
-    uniqueSources = sources.distinct
+    uniqueSources = sources.distinct.map {
+      case source: SupportsAdmissionControl =>
+        val limit = source.getDefaultReadLimit
+        if (trigger == OneTimeTrigger && limit != ReadLimit.allAvailable()) {
+          logWarning(s"The read limit $limit for $source is ignored when Trigger.Once() is used.")
+          source -> ReadLimit.allAvailable()
+        } else {
+          source -> limit
+        }
+      case other =>
+        other -> ReadLimit.allAvailable()
+    }.toMap
 
     // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
     sink match {
@@ -150,8 +161,7 @@ class MicroBatchExecution(
     state.set(TERMINATED)
     if (queryExecutionThread.isAlive) {
       sparkSession.sparkContext.cancelJobGroup(runId.toString)
-      queryExecutionThread.interrupt()
-      queryExecutionThread.join()
+      interruptAndAwaitExecutionThreadTermination()
       // microBatchThread may spawn new jobs, so we need to cancel again to prevent a leak
       sparkSession.sparkContext.cancelJobGroup(runId.toString)
     }
@@ -355,25 +365,33 @@ class MicroBatchExecution(
 
     // Generate a map from each unique source to the next available offset.
     val latestOffsets: Map[SparkDataStream, Option[OffsetV2]] = uniqueSources.map {
-      case s: Source =>
+      case (s: SupportsAdmissionControl, limit) =>
         updateStatusMessage(s"Getting offsets from $s")
-        reportTimeTaken("getOffset") {
-          (s, s.getOffset)
+        reportTimeTaken("latestOffset") {
+          val startOffsetOpt = availableOffsets.get(s)
+          val startOffset = s match {
+            case _: Source =>
+              startOffsetOpt.orNull
+            case v2: MicroBatchStream =>
+              startOffsetOpt.map(offset => v2.deserializeOffset(offset.json))
+                .getOrElse(v2.initialOffset())
+          }
+          (s, Option(s.latestOffset(startOffset, limit)))
         }
-      case s: RateControlMicroBatchStream =>
+      case (s: Source, _) =>
         updateStatusMessage(s"Getting offsets from $s")
-        reportTimeTaken("latestOffset") {
-          val startOffset = availableOffsets
-            .get(s).map(off => s.deserializeOffset(off.json))
-            .getOrElse(s.initialOffset())
-          (s, Option(s.latestOffset(startOffset)))
+        reportTimeTaken("getOffset") {
+          (s, s.getOffset)
         }
-      case s: MicroBatchStream =>
+      case (s: MicroBatchStream, _) =>
         updateStatusMessage(s"Getting offsets from $s")
         reportTimeTaken("latestOffset") {
           (s, Option(s.latestOffset()))
         }
-    }.toMap
+      case (s, _) =>
+        // for some reason, the compiler is unhappy and thinks the match is not exhaustive
+        throw new IllegalStateException(s"Unexpected source: $s")
+    }
     availableOffsets ++= latestOffsets.filter { case (_, o) => o.nonEmpty }.mapValues(_.get)
 
     // Update the query metadata
@@ -545,11 +563,11 @@ class MicroBatchExecution(
     }
 
     val nextBatch =
-      new Dataset(sparkSessionToRunBatch, lastExecution, RowEncoder(lastExecution.analyzed.schema))
+      new Dataset(lastExecution, RowEncoder(lastExecution.analyzed.schema))
 
     val batchSinkProgress: Option[StreamWriterCommitProgress] =
       reportTimeTaken("addBatch") {
-      SQLExecution.withNewExecutionId(sparkSessionToRunBatch, lastExecution) {
+      SQLExecution.withNewExecutionId(lastExecution) {
         sink match {
           case s: Sink => s.addBatch(currentBatchId, nextBatch)
           case _: SupportsWrite =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index b6fa2e9dc3612..1c59464268444 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -22,9 +22,9 @@ import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.RuntimeConfig
-import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, StreamingAggregationStateManager}
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
+import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, StreamingAggregationStateManager, SymmetricHashJoinStateManager}
 import org.apache.spark.sql.internal.SQLConf.{FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, _}
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
 
 
 /**
@@ -91,7 +91,8 @@ object OffsetSeqMetadata extends Logging {
   private implicit val format = Serialization.formats(NoTypeHints)
   private val relevantSQLConfs = Seq(
     SHUFFLE_PARTITIONS, STATE_STORE_PROVIDER_CLASS, STREAMING_MULTIPLE_WATERMARK_POLICY,
-    FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, STREAMING_AGGREGATION_STATE_FORMAT_VERSION)
+    FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, STREAMING_AGGREGATION_STATE_FORMAT_VERSION,
+    STREAMING_JOIN_STATE_FORMAT_VERSION)
 
   /**
    * Default values of relevant configurations that are used for backward compatibility.
@@ -108,7 +109,9 @@ object OffsetSeqMetadata extends Logging {
     FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION.key ->
       FlatMapGroupsWithStateExecHelper.legacyVersion.toString,
     STREAMING_AGGREGATION_STATE_FORMAT_VERSION.key ->
-      StreamingAggregationStateManager.legacyVersion.toString
+      StreamingAggregationStateManager.legacyVersion.toString,
+    STREAMING_JOIN_STATE_FORMAT_VERSION.key ->
+      SymmetricHashJoinStateManager.legacyVersion.toString
   )
 
   def apply(json: String): OffsetSeqMetadata = Serialization.read[OffsetSeqMetadata](json)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
index b40426aff0e79..f6543c3e4c4ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
@@ -24,7 +24,7 @@ import java.nio.charset.StandardCharsets._
 import scala.io.{Source => IOSource}
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2}
 
 /**
  * This class is used to log offsets to persistent files in HDFS.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 6cb75083d0c0b..f20291e11fd70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -24,13 +24,14 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, SparkDataStream}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.{MicroBatchScanExec, StreamingDataSourceV2Relation, StreamWriterCommitProgress}
-import org.apache.spark.sql.sources.v2.Table
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, SparkDataStream}
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent
 import org.apache.spark.util.Clock
@@ -88,7 +89,7 @@ trait ProgressReporter extends Logging {
   private var lastNoDataProgressEventTime = Long.MinValue
 
   private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
-  timestampFormat.setTimeZone(getTimeZone("UTC"))
+  timestampFormat.setTimeZone(DateTimeUtils.getTimeZone("UTC"))
 
   @volatile
   protected var currentStatus: StreamingQueryStatus = {
@@ -147,8 +148,8 @@ trait ProgressReporter extends Logging {
     currentTriggerEndTimestamp = triggerClock.getTimeMillis()
 
     val executionStats = extractExecutionStats(hasNewData)
-    val processingTimeSec =
-      (currentTriggerEndTimestamp - currentTriggerStartTimestamp).toDouble / MILLIS_PER_SECOND
+    val processingTimeMills = currentTriggerEndTimestamp - currentTriggerStartTimestamp
+    val processingTimeSec = Math.max(1L, processingTimeMills).toDouble / MILLIS_PER_SECOND
 
     val inputTimeSec = if (lastTriggerStartTimestamp >= 0) {
       (currentTriggerStartTimestamp - lastTriggerStartTimestamp).toDouble / MILLIS_PER_SECOND
@@ -172,6 +173,7 @@ trait ProgressReporter extends Logging {
     val sinkProgress = SinkProgress(
       sink.toString,
       sinkCommitProgress.map(_.numOutputRows))
+    val observedMetrics = extractObservedMetrics(hasNewData, lastExecution)
 
     val newProgress = new StreamingQueryProgress(
       id = id,
@@ -179,11 +181,13 @@ trait ProgressReporter extends Logging {
       name = name,
       timestamp = formatTimestamp(currentTriggerStartTimestamp),
       batchId = currentBatchId,
+      batchDuration = processingTimeMills,
       durationMs = new java.util.HashMap(currentDurationsMs.toMap.mapValues(long2Long).asJava),
       eventTime = new java.util.HashMap(executionStats.eventTimeStats.asJava),
       stateOperators = executionStats.stateOperators.toArray,
       sources = sourceProgress.toArray,
-      sink = sinkProgress)
+      sink = sinkProgress,
+      observedMetrics = new java.util.HashMap(observedMetrics.asJava))
 
     if (hasNewData) {
       // Reset noDataEventTimestamp if we processed any data
@@ -322,6 +326,16 @@ trait ProgressReporter extends Logging {
     }
   }
 
+  /** Extracts observed metrics from the most recent query execution. */
+  private def extractObservedMetrics(
+      hasNewData: Boolean,
+      lastExecution: QueryExecution): Map[String, Row] = {
+    if (!hasNewData || lastExecution == null) {
+      return Map.empty
+    }
+    lastExecution.observedMetrics
+  }
+
   /** Records the duration of running `body` for the next query progress update. */
   protected def reportTimeTaken[T](triggerDetailKey: String)(body: => T): T = {
     val startTime = triggerClock.getTimeMillis()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala
index 02fed50485b94..84f0961e4af12 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala
@@ -20,10 +20,10 @@ package org.apache.spark.sql.execution.streaming
 import org.json4s.DefaultFormats
 import org.json4s.jackson.Serialization
 
-import org.apache.spark.sql.sources.v2
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2}
 
 case class RateStreamOffset(partitionToValueAndRunTimeMs: Map[Int, ValueRunTimeMsPair])
-  extends v2.reader.streaming.Offset {
+  extends OffsetV2 {
   implicit val defaultFormats: DefaultFormats = DefaultFormats
   override val json = Serialization.write(partitionToValueAndRunTimeMs)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
index 190325fb7ec25..36c7796ec4399 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.streaming
 import java.util
 
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.sources.v2.{Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.{Table, TableCapability}
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index 7f66d0b055cc3..6d51d7dc44171 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 7c1f6ca42c1f2..8b3534bc0837a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.execution.streaming
 import java.io.{InterruptedIOException, IOException, UncheckedIOException}
 import java.nio.channels.ClosedByInterruptException
 import java.util.UUID
-import java.util.concurrent.{CountDownLatch, ExecutionException, TimeUnit}
+import java.util.concurrent.{CountDownLatch, ExecutionException, TimeoutException, TimeUnit}
 import java.util.concurrent.atomic.AtomicReference
-import java.util.concurrent.locks.{Condition, ReentrantLock}
+import java.util.concurrent.locks.ReentrantLock
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{Map => MutableMap}
@@ -36,14 +36,14 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit, SparkDataStream}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate}
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
-import org.apache.spark.sql.sources.v2.writer.SupportsTruncate
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
@@ -206,7 +206,7 @@ abstract class StreamExecution(
   /**
    * A list of unique sources in the query plan. This will be set when generating logical plan.
    */
-  @volatile protected var uniqueSources: Seq[SparkDataStream] = Seq.empty
+  @volatile protected var uniqueSources: Map[SparkDataStream, ReadLimit] = Map.empty
 
   /** Defines the internal state of execution */
   protected val state = new AtomicReference[State](INITIALIZING)
@@ -307,7 +307,8 @@ abstract class StreamExecution(
       }
 
       // `postEvent` does not throw non fatal exception.
-      postEvent(new QueryStartedEvent(id, runId, name))
+      val submissionTime = triggerClock.getTimeMillis()
+      postEvent(new QueryStartedEvent(id, runId, name, submissionTime))
 
       // Unblock starting thread
       startLatch.countDown()
@@ -424,7 +425,7 @@ abstract class StreamExecution(
 
   /** Stops all streaming sources safely. */
   protected def stopSources(): Unit = {
-    uniqueSources.foreach { source =>
+    uniqueSources.foreach { case (source, _) =>
       try {
         source.stop()
       } catch {
@@ -434,6 +435,30 @@ abstract class StreamExecution(
     }
   }
 
+  /**
+   * Interrupts the query execution thread and awaits its termination until until it exceeds the
+   * timeout. The timeout can be set on "spark.sql.streaming.stopTimeout".
+   *
+   * @throws TimeoutException If the thread cannot be stopped within the timeout
+   */
+  @throws[TimeoutException]
+  protected def interruptAndAwaitExecutionThreadTermination(): Unit = {
+    val timeout = math.max(
+      sparkSession.sessionState.conf.getConf(SQLConf.STREAMING_STOP_TIMEOUT), 0)
+    queryExecutionThread.interrupt()
+    queryExecutionThread.join(timeout)
+    if (queryExecutionThread.isAlive) {
+      val stackTraceException = new SparkException("The stream thread was last executing:")
+      stackTraceException.setStackTrace(queryExecutionThread.getStackTrace)
+      val timeoutException = new TimeoutException(
+        s"Stream Execution thread failed to stop within $timeout milliseconds (specified by " +
+        s"${SQLConf.STREAMING_STOP_TIMEOUT.key}). See the cause on what was " +
+        "being executed in the streaming query thread.")
+      timeoutException.initCause(stackTraceException)
+      throw timeoutException
+    }
+  }
+
   /**
    * Blocks the current thread until processing for data from the given `source` has reached at
    * least the given `Offset`. This method is intended for use primarily when writing tests.
@@ -578,17 +603,21 @@ abstract class StreamExecution(
 
   protected def getBatchDescriptionString: String = {
     val batchDescription = if (currentBatchId < 0) "init" else currentBatchId.toString
-    Option(name).map(_ + "<br/>").getOrElse("") +
-      s"id = $id<br/>runId = $runId<br/>batch = $batchDescription"
+    s"""|${Option(name).getOrElse("")}
+        |id = $id
+        |runId = $runId
+        |batch = $batchDescription""".stripMargin
   }
 
   protected def createStreamingWrite(
       table: SupportsWrite,
       options: Map[String, String],
       inputPlan: LogicalPlan): StreamingWrite = {
-    val writeBuilder = table.newWriteBuilder(new CaseInsensitiveStringMap(options.asJava))
-      .withQueryId(id.toString)
-      .withInputDataSchema(inputPlan.schema)
+    val info = LogicalWriteInfoImpl(
+      queryId = id.toString,
+      inputPlan.schema,
+      new CaseInsensitiveStringMap(options.asJava))
+    val writeBuilder = table.newWriteBuilder(info)
     outputMode match {
       case Append =>
         writeBuilder.buildForStreaming()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
index 7dd491ede9d05..1b8d69ffb7521 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
@@ -63,7 +63,7 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
    * are dispatched to Spark listener bus. This method is guaranteed to be called by queries in
    * the same SparkSession as this listener.
    */
-  def post(event: StreamingQueryListener.Event) {
+  def post(event: StreamingQueryListener.Event): Unit = {
     event match {
       case s: QueryStartedEvent =>
         activeQueryRunIds.synchronized { activeQueryRunIds += s.runId }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 142b6e7d18068..5858c54ce554a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -23,10 +23,10 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
+import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.sources.v2.{Table, TableProvider}
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object StreamingRelation {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index 50cf971e4ec3c..198e17db419a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -21,13 +21,14 @@ import java.util.concurrent.TimeUnit.NANOSECONDS
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow, JoinedRow, Literal, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow, JoinedRow, Literal, Predicate, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan}
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._
 import org.apache.spark.sql.execution.streaming.state._
+import org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager.KeyToValuePair
 import org.apache.spark.sql.internal.SessionState
 import org.apache.spark.util.{CompletionIterator, SerializableConfiguration}
 
@@ -109,7 +110,7 @@ import org.apache.spark.util.{CompletionIterator, SerializableConfiguration}
  *
  * 3. When both window in join key and time range conditions are present, case 1 + 2.
  *    In this case, since window equality is a stricter condition than the time range, we can
- *    use the the State Key Watermark = event time watermark to discard state (similar to case 1).
+ *    use the State Key Watermark = event time watermark to discard state (similar to case 1).
  *
  * @param leftKeys  Expression to generate key rows for joining from left input
  * @param rightKeys Expression to generate key rows for joining from right input
@@ -131,6 +132,7 @@ case class StreamingSymmetricHashJoinExec(
     stateInfo: Option[StatefulOperatorStateInfo],
     eventTimeWatermark: Option[Long],
     stateWatermarkPredicates: JoinStateWatermarkPredicates,
+    stateFormatVersion: Int,
     left: SparkPlan,
     right: SparkPlan) extends SparkPlan with BinaryExecNode with StateStoreWriter {
 
@@ -139,13 +141,20 @@ case class StreamingSymmetricHashJoinExec(
       rightKeys: Seq[Expression],
       joinType: JoinType,
       condition: Option[Expression],
+      stateFormatVersion: Int,
       left: SparkPlan,
       right: SparkPlan) = {
 
     this(
       leftKeys, rightKeys, joinType, JoinConditionSplitPredicates(condition, left, right),
       stateInfo = None, eventTimeWatermark = None,
-      stateWatermarkPredicates = JoinStateWatermarkPredicates(), left, right)
+      stateWatermarkPredicates = JoinStateWatermarkPredicates(), stateFormatVersion, left, right)
+  }
+
+  if (stateFormatVersion < 2 && joinType != Inner) {
+    throw new IllegalArgumentException("The query is using stream-stream outer join with state" +
+      s" format version ${stateFormatVersion} - correctness issue is discovered. Please discard" +
+      " the checkpoint and rerun the query. See SPARK-26154 for more details.")
   }
 
   private def throwBadJoinTypeException(): Nothing = {
@@ -206,6 +215,7 @@ case class StreamingSymmetricHashJoinExec(
   }
 
   private def processPartitions(
+      partitionId: Int,
       leftInputIter: Iterator[InternalRow],
       rightInputIter: Iterator[InternalRow]): Iterator[InternalRow] = {
     if (stateInfo.isEmpty) {
@@ -224,14 +234,15 @@ case class StreamingSymmetricHashJoinExec(
     val joinedRow = new JoinedRow
 
 
+    val inputSchema = left.output ++ right.output
     val postJoinFilter =
-      newPredicate(condition.bothSides.getOrElse(Literal(true)), left.output ++ right.output).eval _
+      Predicate.create(condition.bothSides.getOrElse(Literal(true)), inputSchema).eval _
     val leftSideJoiner = new OneSideHashJoiner(
       LeftSide, left.output, leftKeys, leftInputIter,
-      condition.leftSideOnly, postJoinFilter, stateWatermarkPredicates.left)
+      condition.leftSideOnly, postJoinFilter, stateWatermarkPredicates.left, partitionId)
     val rightSideJoiner = new OneSideHashJoiner(
       RightSide, right.output, rightKeys, rightInputIter,
-      condition.rightSideOnly, postJoinFilter, stateWatermarkPredicates.right)
+      condition.rightSideOnly, postJoinFilter, stateWatermarkPredicates.right, partitionId)
 
     //  Join one side input using the other side's buffered/state rows. Here is how it is done.
     //
@@ -270,20 +281,30 @@ case class StreamingSymmetricHashJoinExec(
         // * Getting an iterator over the rows that have aged out on the left side. These rows are
         //   candidates for being null joined. Note that to avoid doing two passes, this iterator
         //   removes the rows from the state manager as they're processed.
-        // * Checking whether the current row matches a key in the right side state, and that key
-        //   has any value which satisfies the filter function when joined. If it doesn't,
-        //   we know we can join with null, since there was never (including this batch) a match
-        //   within the watermark period. If it does, there must have been a match at some point, so
-        //   we know we can't join with null.
+        // * (state format version 1) Checking whether the current row matches a key in the
+        //   right side state, and that key has any value which satisfies the filter function when
+        //   joined. If it doesn't, we know we can join with null, since there was never
+        //   (including this batch) a match within the watermark period. If it does, there must have
+        //   been a match at some point, so we know we can't join with null.
+        // * (state format version 2) We found edge-case of above approach which brings correctness
+        //   issue, and had to take another approach (see SPARK-26154); now Spark stores 'matched'
+        //   flag along with row, which is set to true when there's any matching row on the right.
+
         def matchesWithRightSideState(leftKeyValue: UnsafeRowPair) = {
           rightSideJoiner.get(leftKeyValue.key).exists { rightValue =>
             postJoinFilter(joinedRow.withLeft(leftKeyValue.value).withRight(rightValue))
           }
         }
         val removedRowIter = leftSideJoiner.removeOldState()
-        val outerOutputIter = removedRowIter
-          .filterNot(pair => matchesWithRightSideState(pair))
-          .map(pair => joinedRow.withLeft(pair.value).withRight(nullRight))
+        val outerOutputIter = removedRowIter.filterNot { kv =>
+          stateFormatVersion match {
+            case 1 => matchesWithRightSideState(new UnsafeRowPair(kv.key, kv.value))
+            case 2 => kv.matched
+            case _ =>
+              throw new IllegalStateException("Unexpected state format version! " +
+                s"version $stateFormatVersion")
+          }
+        }.map(pair => joinedRow.withLeft(pair.value).withRight(nullRight))
 
         innerOutputIter ++ outerOutputIter
       case RightOuter =>
@@ -294,9 +315,15 @@ case class StreamingSymmetricHashJoinExec(
           }
         }
         val removedRowIter = rightSideJoiner.removeOldState()
-        val outerOutputIter = removedRowIter
-          .filterNot(pair => matchesWithLeftSideState(pair))
-          .map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value))
+        val outerOutputIter = removedRowIter.filterNot { kv =>
+          stateFormatVersion match {
+            case 1 => matchesWithLeftSideState(new UnsafeRowPair(kv.key, kv.value))
+            case 2 => kv.matched
+            case _ =>
+              throw new IllegalStateException("Unexpected state format version! " +
+                s"version $stateFormatVersion")
+          }
+        }.map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value))
 
         innerOutputIter ++ outerOutputIter
       case _ => throwBadJoinTypeException()
@@ -380,6 +407,7 @@ case class StreamingSymmetricHashJoinExec(
    * @param stateWatermarkPredicate The state watermark predicate. See
    *                                [[StreamingSymmetricHashJoinExec]] for further description of
    *                                state watermarks.
+   * @param partitionId A partition ID of source RDD.
    */
   private class OneSideHashJoiner(
       joinSide: JoinSide,
@@ -388,30 +416,32 @@ case class StreamingSymmetricHashJoinExec(
       inputIter: Iterator[InternalRow],
       preJoinFilterExpr: Option[Expression],
       postJoinFilter: (InternalRow) => Boolean,
-      stateWatermarkPredicate: Option[JoinStateWatermarkPredicate]) {
+      stateWatermarkPredicate: Option[JoinStateWatermarkPredicate],
+      partitionId: Int) {
 
     // Filter the joined rows based on the given condition.
     val preJoinFilter =
-      newPredicate(preJoinFilterExpr.getOrElse(Literal(true)), inputAttributes).eval _
+      Predicate.create(preJoinFilterExpr.getOrElse(Literal(true)), inputAttributes).eval _
 
     private val joinStateManager = new SymmetricHashJoinStateManager(
-      joinSide, inputAttributes, joinKeys, stateInfo, storeConf, hadoopConfBcast.value.value)
+      joinSide, inputAttributes, joinKeys, stateInfo, storeConf, hadoopConfBcast.value.value,
+      partitionId, stateFormatVersion)
     private[this] val keyGenerator = UnsafeProjection.create(joinKeys, inputAttributes)
 
     private[this] val stateKeyWatermarkPredicateFunc = stateWatermarkPredicate match {
       case Some(JoinStateKeyWatermarkPredicate(expr)) =>
         // inputSchema can be empty as expr should only have BoundReferences and does not require
         // the schema to generated predicate. See [[StreamingSymmetricHashJoinHelper]].
-        newPredicate(expr, Seq.empty).eval _
+        Predicate.create(expr, Seq.empty).eval _
       case _ =>
-        newPredicate(Literal(false), Seq.empty).eval _ // false = do not remove if no predicate
+        Predicate.create(Literal(false), Seq.empty).eval _ // false = do not remove if no predicate
     }
 
     private[this] val stateValueWatermarkPredicateFunc = stateWatermarkPredicate match {
       case Some(JoinStateValueWatermarkPredicate(expr)) =>
-        newPredicate(expr, inputAttributes).eval _
+        Predicate.create(expr, inputAttributes).eval _
       case _ =>
-        newPredicate(Literal(false), Seq.empty).eval _  // false = do not remove if no predicate
+        Predicate.create(Literal(false), Seq.empty).eval _  // false = do not remove if no predicate
     }
 
     private[this] var updatedStateRowsCount = 0
@@ -431,7 +461,7 @@ case class StreamingSymmetricHashJoinExec(
       val nonLateRows =
         WatermarkSupport.watermarkExpression(watermarkAttribute, eventTimeWatermark) match {
           case Some(watermarkExpr) =>
-            val predicate = newPredicate(watermarkExpr, inputAttributes)
+            val predicate = Predicate.create(watermarkExpr, inputAttributes)
             inputIter.filter { row => !predicate.eval(row) }
           case None =>
             inputIter
@@ -445,16 +475,9 @@ case class StreamingSymmetricHashJoinExec(
         // the case of inner join).
         if (preJoinFilter(thisRow)) {
           val key = keyGenerator(thisRow)
-          val outputIter = otherSideJoiner.joinStateManager.get(key).map { thatRow =>
-            generateJoinedRow(thisRow, thatRow)
-          }.filter(postJoinFilter)
-          val shouldAddToState = // add only if both removal predicates do not match
-            !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow)
-          if (shouldAddToState) {
-            joinStateManager.append(key, thisRow)
-            updatedStateRowsCount += 1
-          }
-          outputIter
+          val outputIter: Iterator[JoinedRow] = otherSideJoiner.joinStateManager
+            .getJoinedRows(key, thatRow => generateJoinedRow(thisRow, thatRow), postJoinFilter)
+          new AddingProcessedRowToStateCompletionIterator(key, thisRow, outputIter)
         } else {
           joinSide match {
             case LeftSide if joinType == LeftOuter =>
@@ -467,6 +490,23 @@ case class StreamingSymmetricHashJoinExec(
       }
     }
 
+    private class AddingProcessedRowToStateCompletionIterator(
+        key: UnsafeRow,
+        thisRow: UnsafeRow,
+        subIter: Iterator[JoinedRow])
+      extends CompletionIterator[JoinedRow, Iterator[JoinedRow]](subIter) {
+      private val iteratorNotEmpty: Boolean = super.hasNext
+
+      override def completion(): Unit = {
+        val shouldAddToState = // add only if both removal predicates do not match
+          !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow)
+        if (shouldAddToState) {
+          joinStateManager.append(key, thisRow, matched = iteratorNotEmpty)
+          updatedStateRowsCount += 1
+        }
+      }
+    }
+
     /**
      * Get an iterator over the values stored in this joiner's state manager for the given key.
      *
@@ -486,7 +526,7 @@ case class StreamingSymmetricHashJoinExec(
      * We do this to avoid requiring either two passes or full materialization when
      * processing the rows for outer join.
      */
-    def removeOldState(): Iterator[UnsafeRowPair] = {
+    def removeOldState(): Iterator[KeyToValuePair] = {
       stateWatermarkPredicate match {
         case Some(JoinStateKeyWatermarkPredicate(expr)) =>
           joinStateManager.removeByKeyCondition(stateKeyWatermarkPredicateFunc)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
index 2d4c3c10e6445..cdd3a854c9a90 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.sql.execution.streaming
 
 import scala.reflect.ClassTag
-import scala.util.control.NonFatal
 
-import org.apache.spark.{Partition, SparkContext}
+import org.apache.spark.{Partition, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
-import org.apache.spark.rdd.{RDD, ZippedPartitionsRDD2}
+import org.apache.spark.rdd.{RDD, ZippedPartitionsBaseRDD, ZippedPartitionsPartition, ZippedPartitionsRDD2}
 import org.apache.spark.sql.catalyst.analysis.StreamingJoinHelper
 import org.apache.spark.sql.catalyst.expressions.{Add, And, Attribute, AttributeReference, AttributeSet, BoundReference, Cast, CheckOverflow, Expression, ExpressionSet, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Literal, Multiply, NamedExpression, PreciseTimestampConversion, PredicateHelper, Subtract, TimeAdd, TimeSub, UnaryMinus}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._
@@ -203,17 +202,18 @@ object StreamingSymmetricHashJoinHelper extends Logging {
   /**
    * A custom RDD that allows partitions to be "zipped" together, while ensuring the tasks'
    * preferred location is based on which executors have the required join state stores already
-   * loaded. This is class is a modified version of [[ZippedPartitionsRDD2]].
+   * loaded. This class is a variant of [[ZippedPartitionsRDD2]] which only changes signature
+   * of `f`.
    */
   class StateStoreAwareZipPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag](
       sc: SparkContext,
-      f: (Iterator[A], Iterator[B]) => Iterator[V],
-      rdd1: RDD[A],
-      rdd2: RDD[B],
+      var f: (Int, Iterator[A], Iterator[B]) => Iterator[V],
+      var rdd1: RDD[A],
+      var rdd2: RDD[B],
       stateInfo: StatefulOperatorStateInfo,
       stateStoreNames: Seq[String],
       @transient private val storeCoordinator: Option[StateStoreCoordinatorRef])
-      extends ZippedPartitionsRDD2[A, B, V](sc, f, rdd1, rdd2) {
+      extends ZippedPartitionsBaseRDD[V](sc, List(rdd1, rdd2)) {
 
     /**
      * Set the preferred location of each partition using the executor that has the related
@@ -225,6 +225,24 @@ object StreamingSymmetricHashJoinHelper extends Logging {
         storeCoordinator.flatMap(_.getLocation(stateStoreProviderId))
       }.distinct
     }
+
+    override def compute(s: Partition, context: TaskContext): Iterator[V] = {
+      val partitions = s.asInstanceOf[ZippedPartitionsPartition].partitions
+      if (partitions(0).index != partitions(1).index) {
+        throw new IllegalStateException(s"Partition ID should be same in both side: " +
+          s"left ${partitions(0).index} , right ${partitions(1).index}")
+      }
+
+      val partitionId = partitions(0).index
+      f(partitionId, rdd1.iterator(partitions(0), context), rdd2.iterator(partitions(1), context))
+    }
+
+    override def clearDependencies(): Unit = {
+      super.clearDependencies()
+      rdd1 = null
+      rdd2 = null
+      f = null
+    }
   }
 
   implicit class StateStoreAwareZipPartitionsHelper[T: ClassTag](dataRDD: RDD[T]) {
@@ -239,7 +257,7 @@ object StreamingSymmetricHashJoinHelper extends Logging {
         stateInfo: StatefulOperatorStateInfo,
         storeNames: Seq[String],
         storeCoordinator: StateStoreCoordinatorRef
-      )(f: (Iterator[T], Iterator[U]) => Iterator[V]): RDD[V] = {
+      )(f: (Int, Iterator[T], Iterator[U]) => Iterator[V]): RDD[V] = {
       new StateStoreAwareZipPartitionsRDD(
         dataRDD.sparkContext, f, dataRDD, dataRDD2, stateInfo, storeNames, Some(storeCoordinator))
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
index 2bdb3402c14b1..1a27fe61d9602 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
@@ -21,8 +21,10 @@ import java.util.concurrent.TimeUnit
 
 import scala.concurrent.duration.Duration
 
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY
+import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.streaming.Trigger
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.UTF8String
 
 private object Triggers {
   def validate(intervalMs: Long): Unit = {
@@ -30,11 +32,11 @@ private object Triggers {
   }
 
   def convert(interval: String): Long = {
-    val cal = CalendarInterval.fromCaseInsensitiveString(interval)
-    if (cal.months > 0) {
+    val cal = IntervalUtils.stringToInterval(UTF8String.fromString(interval))
+    if (cal.months != 0) {
       throw new IllegalArgumentException(s"Doesn't support month or year interval: $interval")
     }
-    TimeUnit.MICROSECONDS.toMillis(cal.microseconds)
+    TimeUnit.MICROSECONDS.toMillis(cal.microseconds + cal.days * MICROS_PER_DAY)
   }
 
   def convert(interval: Duration): Long = interval.toMillis
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
index 76ab1284633b1..b0f8cf9cd1846 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
@@ -63,7 +63,7 @@ case object MinWatermark extends MultipleWatermarkPolicy {
 }
 
 /**
- * Policy to choose the *min* of the operator watermark values as the global watermark value. So the
+ * Policy to choose the *max* of the operator watermark values as the global watermark value. So the
  * global watermark will advance if any of the individual operator watermarks has advanced.
  * In other words, in a streaming query with multiple input streams and watermarks defined on all
  * of them, the global watermark will advance as fast as the fastest input. So if there is watermark
@@ -108,10 +108,9 @@ case class WatermarkTracker(policy: MultipleWatermarkPolicy) extends Logging {
         }
     }
 
-    // Update the global watermark to the minimum of all watermark nodes.
-    // This is the safest option, because only the global watermark is fault-tolerant. Making
-    // it the minimum of all individual watermarks guarantees it will never advance past where
-    // any individual watermark operator would be if it were in a plan by itself.
+    // Update the global watermark accordingly to the chosen policy. To find all available policies
+    // and their semantics, please check the comments of
+    // `org.apache.spark.sql.execution.streaming.MultipleWatermarkPolicy` implementations.
     val chosenGlobalWatermark = policy.chooseGlobalWatermark(operatorToWatermarkMap.values.toSeq)
     if (chosenGlobalWatermark > globalWatermarkMs) {
       logInfo(s"Updating event-time watermark from $globalWatermarkMs to $chosenGlobalWatermark ms")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
index 9ae39c79c5156..e471e6c601d16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -22,11 +22,12 @@ import java.util
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, SupportsTruncate, WriteBuilder}
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.streaming.sources.ConsoleWrite
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister}
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.writer.{SupportsTruncate, WriteBuilder}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -35,7 +36,7 @@ case class ConsoleRelation(override val sqlContext: SQLContext, data: DataFrame)
   override def schema: StructType = data.schema
 }
 
-class ConsoleSinkProvider extends TableProvider
+class ConsoleSinkProvider extends SimpleTableProvider
   with DataSourceRegister
   with CreatableRelationProvider {
 
@@ -71,21 +72,16 @@ object ConsoleTable extends Table with SupportsWrite {
     Set(TableCapability.STREAMING_WRITE).asJava
   }
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
-      private var inputSchema: StructType = _
-
-      override def withInputDataSchema(schema: StructType): WriteBuilder = {
-        this.inputSchema = schema
-        this
-      }
+      private val inputSchema: StructType = info.schema()
 
       // Do nothing for truncate. Console sink is special that it just prints all the records.
       override def truncate(): WriteBuilder = this
 
       override def buildForStreaming(): StreamingWrite = {
         assert(inputSchema != null)
-        new ConsoleWrite(inputSchema, options)
+        new ConsoleWrite(inputSchema, info.options)
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDD.scala
index b68f67e0b22d9..5ee27c71aa731 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDD.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.streaming.continuous
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousPartitionReaderFactory
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.ContinuousPartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.NextIterator
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index f6d156ded7663..a109c2171f3d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -28,12 +28,11 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.{CurrentDate, CurrentTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, TableCapability}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, Offset => OffsetV2, PartitionOffset, ReadLimit}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming.{StreamingRelationV2, _}
-import org.apache.spark.sql.sources.v2
-import org.apache.spark.sql.sources.v2.{SupportsRead, SupportsWrite, TableCapability}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.streaming.{OutputMode, Trigger}
 import org.apache.spark.util.Clock
 
@@ -85,7 +84,7 @@ class ContinuousExecution(
     sources = _logicalPlan.collect {
       case r: StreamingDataSourceV2Relation => r.stream.asInstanceOf[ContinuousStream]
     }
-    uniqueSources = sources.distinct
+    uniqueSources = sources.distinct.map(s => s -> ReadLimit.allAvailable()).toMap
 
     // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
     WriteToContinuousDataSource(
@@ -253,7 +252,7 @@ class ContinuousExecution(
 
       updateStatusMessage("Running")
       reportTimeTaken("runContinuous") {
-        SQLExecution.withNewExecutionId(sparkSessionForQuery, lastExecution) {
+        SQLExecution.withNewExecutionId(lastExecution) {
           lastExecution.executedPlan.execute()
         }
       }
@@ -340,7 +339,7 @@ class ContinuousExecution(
         val offset =
           sources(0).deserializeOffset(offsetLog.get(epoch).get.offsets(0).get.json)
         committedOffsets ++= Seq(sources(0) -> offset)
-        sources(0).commit(offset.asInstanceOf[v2.reader.streaming.Offset])
+        sources(0).commit(offset.asInstanceOf[OffsetV2])
       } else {
         return
       }
@@ -428,8 +427,7 @@ class ContinuousExecution(
     if (queryExecutionThread.isAlive) {
       // The query execution thread will clean itself up in the finally clause of runContinuous.
       // We just need to interrupt the long running job.
-      queryExecutionThread.interrupt()
-      queryExecutionThread.join()
+      interruptAndAwaitExecutionThreadTermination()
     }
     logInfo(s"Query $prettyIdString was stopped")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala
index 65c5fc63c2f46..dff2fa69e42fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala
@@ -26,7 +26,7 @@ import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReader, PartitionOffset}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, PartitionOffset}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ThreadUtils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
index e1b7a8fc283d3..e66a1fe48a2e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
@@ -22,9 +22,9 @@ import org.json4s.jackson.Serialization
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousPartitionReaderFactory, ContinuousStream, Offset, PartitionOffset}
 import org.apache.spark.sql.execution.streaming.{RateStreamOffset, ValueRunTimeMsPair}
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming._
 
 case class RateStreamPartitionOffset(
    partition: Int, currentValue: Long, currentTimeMs: Long) extends PartitionOffset
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
index 2263b42870a65..fc47c5ed3ac00 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
@@ -32,10 +32,12 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousPartitionReaderFactory, ContinuousStream, Offset, PartitionOffset}
 import org.apache.spark.sql.execution.streaming.{Offset => _, _}
 import org.apache.spark.sql.execution.streaming.sources.TextSocketReader
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.RpcUtils
 
@@ -54,6 +56,9 @@ class TextSocketContinuousStream(
 
   implicit val defaultFormats: DefaultFormats = DefaultFormats
 
+  private val encoder = ExpressionEncoder.tuple(ExpressionEncoder[String],
+    ExpressionEncoder[Timestamp])
+
   @GuardedBy("this")
   private var socket: Socket = _
 
@@ -61,7 +66,7 @@ class TextSocketContinuousStream(
   private var readThread: Thread = _
 
   @GuardedBy("this")
-  private val buckets = Seq.fill(numPartitions)(new ListBuffer[(String, Timestamp)])
+  private val buckets = Seq.fill(numPartitions)(new ListBuffer[UnsafeRow])
 
   @GuardedBy("this")
   private var currentOffset: Int = -1
@@ -182,7 +187,8 @@ class TextSocketContinuousStream(
                 Timestamp.valueOf(
                   TextSocketReader.DATE_FORMAT.format(Calendar.getInstance().getTime()))
               )
-              buckets(currentOffset % numPartitions) += newData
+              buckets(currentOffset % numPartitions) += encoder.toRow(newData)
+                .copy().asInstanceOf[UnsafeRow]
             }
           }
         } catch {
@@ -240,6 +246,8 @@ class TextSocketContinuousPartitionReader(
   private var currentOffset = startOffset
   private var current: Option[InternalRow] = None
 
+  private val projectWithoutTimestamp = UnsafeProjection.create(TextSocketReader.SCHEMA_REGULAR)
+
   override def next(): Boolean = {
     try {
       current = getRecord
@@ -271,8 +279,7 @@ class TextSocketContinuousPartitionReader(
       if (includeTimestamp) {
         rec
       } else {
-        InternalRow(rec.get(0, TextSocketReader.SCHEMA_TIMESTAMP)
-          .asInstanceOf[(String, Timestamp)]._1)
+        projectWithoutTimestamp(rec)
       }
     )
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
index a08411d746abe..909dda57ee586 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.streaming.continuous
 import org.apache.spark.{Partition, SparkEnv, TaskContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.writer.DataWriter
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingDataWriterFactory
+import org.apache.spark.sql.connector.write.DataWriter
+import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory
 import org.apache.spark.util.Utils
 
 /**
@@ -80,13 +80,15 @@ class ContinuousWriteRDD(var prev: RDD[InternalRow], writerFactory: StreamingDat
         logError(s"Writer for partition ${context.partitionId()} is aborting.")
         if (dataWriter != null) dataWriter.abort()
         logError(s"Writer for partition ${context.partitionId()} aborted.")
+      }, finallyBlock = {
+        dataWriter.close()
       })
     }
 
     Iterator()
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     prev = null
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
index decf524f7167c..dbddab2e9acdd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
@@ -23,9 +23,9 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, PartitionOffset}
+import org.apache.spark.sql.connector.write.WriterCommitMessage
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.util.RpcUtils
 
 private[continuous] sealed trait EpochCoordinatorMessage extends Serializable
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala
index 54f484c4adae3..cecb2843fc3b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.streaming.continuous
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 
 /**
  * The logical plan for writing data in a continuous stream.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
index 2f3af6a6544c4..f1898ad3f27ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
@@ -24,9 +24,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.write.PhysicalWriteInfoImpl
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.streaming.StreamExecution
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 
 /**
  * The physical plan for writing data into a continuous processing [[StreamingWrite]].
@@ -38,8 +39,10 @@ case class WriteToContinuousDataSourceExec(write: StreamingWrite, query: SparkPl
   override def output: Seq[Attribute] = Nil
 
   override protected def doExecute(): RDD[InternalRow] = {
-    val writerFactory = write.createStreamingWriterFactory()
-    val rdd = new ContinuousWriteRDD(query.execute(), writerFactory)
+    val queryRdd = query.execute()
+    val writerFactory = write.createStreamingWriterFactory(
+      PhysicalWriteInfoImpl(queryRdd.getNumPartitions))
+    val rdd = new ContinuousWriteRDD(queryRdd, writerFactory)
 
     logInfo(s"Start processing data source write support: $write. " +
       s"The input RDD has ${rdd.partitions.length} partitions.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index df149552dfb30..ea39c549bd072 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -31,10 +31,11 @@ import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory, Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream, Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream, Offset => OffsetV2, SparkDataStream}
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -44,6 +45,9 @@ object MemoryStream {
 
   def apply[A : Encoder](implicit sqlContext: SQLContext): MemoryStream[A] =
     new MemoryStream[A](memoryStreamId.getAndIncrement(), sqlContext)
+
+  def apply[A : Encoder](numPartitions: Int)(implicit sqlContext: SQLContext): MemoryStream[A] =
+    new MemoryStream[A](memoryStreamId.getAndIncrement(), sqlContext, Some(numPartitions))
 }
 
 /**
@@ -94,7 +98,7 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Spa
 
 // This class is used to indicate the memory stream data source. We don't actually use it, as
 // memory stream is for test only and we never look it up by name.
-object MemoryStreamTableProvider extends TableProvider {
+object MemoryStreamTableProvider extends SimpleTableProvider {
   override def getTable(options: CaseInsensitiveStringMap): Table = {
     throw new IllegalStateException("MemoryStreamTableProvider should not be used.")
   }
@@ -136,9 +140,14 @@ class MemoryStreamScanBuilder(stream: MemoryStreamBase[_]) extends ScanBuilder w
  * A [[Source]] that produces value stored in memory as they are added by the user.  This [[Source]]
  * is intended for use in unit tests as it can only replay data when the object is still
  * available.
+ *
+ * If numPartitions is provided, the rows will be redistributed to the given number of partitions.
  */
-case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
-    extends MemoryStreamBase[A](sqlContext) with MicroBatchStream with Logging {
+case class MemoryStream[A : Encoder](
+    id: Int,
+    sqlContext: SQLContext,
+    numPartitions: Option[Int] = None)
+  extends MemoryStreamBase[A](sqlContext) with MicroBatchStream with Logging {
 
   protected val output = logicalPlan.output
 
@@ -206,9 +215,23 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
 
       logDebug(generateDebugString(newBlocks.flatten, startOrdinal, endOrdinal))
 
-      newBlocks.map { block =>
-        new MemoryStreamInputPartition(block)
-      }.toArray
+      numPartitions match {
+        case Some(numParts) =>
+          // When the number of partition is provided, we redistribute the rows into
+          // the given number of partition, via round-robin manner.
+          val inputRows = newBlocks.flatten.toArray
+          (0 until numParts).map { newPartIdx =>
+            val records = inputRows.zipWithIndex.filter { case (_, idx) =>
+              idx % numParts == newPartIdx
+            }.map(_._1)
+            new MemoryStreamInputPartition(records)
+          }.toArray
+
+        case _ =>
+          newBlocks.map { block =>
+            new MemoryStreamInputPartition(block)
+          }.toArray
+      }
     }
   }
 
@@ -237,7 +260,7 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
     lastOffsetCommitted = newOffset
   }
 
-  override def stop() {}
+  override def stop(): Unit = {}
 
   def reset(): Unit = synchronized {
     batches.clear()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
index dbe242784986d..dc25289aa1e2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
@@ -20,12 +20,12 @@ package org.apache.spark.sql.execution.streaming.sources
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
+import org.apache.spark.sql.connector.write.{PhysicalWriteInfo, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-/** Common methods used to create writes for the the console sink */
+/** Common methods used to create writes for the console sink */
 class ConsoleWrite(schema: StructType, options: CaseInsensitiveStringMap)
     extends StreamingWrite with Logging {
 
@@ -38,7 +38,8 @@ class ConsoleWrite(schema: StructType, options: CaseInsensitiveStringMap)
   assert(SparkSession.getActiveSession.isDefined)
   protected val spark = SparkSession.getActiveSession.get
 
-  def createStreamingWriterFactory(): StreamingDataWriterFactory = PackedRowWriterFactory
+  def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory =
+    PackedRowWriterFactory
 
   override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
     // We have to print a "Batch" label for the epoch for compatibility with the pre-data source V2
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
index 41eaf84b7f9ea..f94469385b281 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
@@ -29,9 +29,10 @@ import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.{Encoder, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousPartitionReaderFactory, ContinuousStream, Offset, PartitionOffset}
 import org.apache.spark.sql.execution.streaming.{Offset => _, _}
-import org.apache.spark.sql.sources.v2.reader.InputPartition
-import org.apache.spark.sql.sources.v2.reader.streaming._
 import org.apache.spark.util.RpcUtils
 
 /**
@@ -50,7 +51,7 @@ class ContinuousMemoryStream[A : Encoder](id: Int, sqlContext: SQLContext, numPa
   // ContinuousReader implementation
 
   @GuardedBy("this")
-  private val records = Seq.fill(numPartitions)(new ListBuffer[A])
+  private val records = Seq.fill(numPartitions)(new ListBuffer[UnsafeRow])
 
   private val recordEndpoint = new ContinuousRecordEndpoint(records, this)
   @volatile private var endpointRef: RpcEndpointRef = _
@@ -58,7 +59,8 @@ class ContinuousMemoryStream[A : Encoder](id: Int, sqlContext: SQLContext, numPa
   def addData(data: TraversableOnce[A]): Offset = synchronized {
     // Distribute data evenly among partition lists.
     data.toSeq.zipWithIndex.map {
-      case (item, index) => records(index % numPartitions) += item
+      case (item, index) =>
+        records(index % numPartitions) += encoder.toRow(item).copy().asInstanceOf[UnsafeRow]
     }
 
     // The new target offset is the offset where all records in all partitions have been processed.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
index 838c7d497e35b..6e4f40ad080d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
@@ -26,12 +26,11 @@ import org.apache.spark.sql.{ForeachWriter, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.write.{DataWriter, LogicalWriteInfo, PhysicalWriteInfo, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.execution.python.PythonForeachWriter
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table, TableCapability}
-import org.apache.spark.sql.sources.v2.writer.{DataWriter, SupportsTruncate, WriteBuilder, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A write-only table for forwarding data into the specified [[ForeachWriter]].
@@ -54,14 +53,9 @@ case class ForeachWriterTable[T](
     Set(TableCapability.STREAMING_WRITE).asJava
   }
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
-      private var inputSchema: StructType = _
-
-      override def withInputDataSchema(schema: StructType): WriteBuilder = {
-        this.inputSchema = schema
-        this
-      }
+      private var inputSchema: StructType = info.schema()
 
       // Do nothing for truncate. Foreach sink is special that it just forwards all the records to
       // ForeachWriter.
@@ -72,7 +66,8 @@ case class ForeachWriterTable[T](
           override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
           override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
 
-          override def createStreamingWriterFactory(): StreamingDataWriterFactory = {
+          override def createStreamingWriterFactory(
+              info: PhysicalWriteInfo): StreamingDataWriterFactory = {
             val rowConverter: InternalRow => T = converter match {
               case Left(enc) =>
                 val boundEnc = enc.resolveAndBind(
@@ -134,7 +129,7 @@ class ForeachDataWriter[T](
 
   // If open returns false, we should skip writing rows.
   private val opened = writer.open(partitionId, epochId)
-  private var closeCalled: Boolean = false
+  private var errorOrNull: Throwable = _
 
   override def write(record: InternalRow): Unit = {
     if (!opened) return
@@ -143,25 +138,24 @@ class ForeachDataWriter[T](
       writer.process(rowConverter(record))
     } catch {
       case t: Throwable =>
-        closeWriter(t)
+        errorOrNull = t
         throw t
     }
+
   }
 
   override def commit(): WriterCommitMessage = {
-    closeWriter(null)
     ForeachWriterCommitMessage
   }
 
   override def abort(): Unit = {
-    closeWriter(new SparkException("Foreach writer has been aborted due to a task failure"))
+    if (errorOrNull == null) {
+      errorOrNull = new SparkException("Foreach writer has been aborted due to a task failure")
+    }
   }
 
-  private def closeWriter(errorOrNull: Throwable): Unit = {
-    if (!closeCalled) {
-      closeCalled = true
-      writer.close(errorOrNull)
-    }
+  override def close(): Unit = {
+    writer.close(errorOrNull)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
index f3951897ea747..c2adc1dd6742a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, PhysicalWriteInfo, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 
 /**
  * A [[BatchWrite]] used to hook V2 stream writers into a microbatch plan. It implements
@@ -36,8 +36,8 @@ class MicroBatchWrite(eppchId: Long, val writeSupport: StreamingWrite) extends B
     writeSupport.abort(eppchId, messages)
   }
 
-  override def createBatchWriterFactory(): DataWriterFactory = {
-    new MicroBatchWriterFactory(eppchId, writeSupport.createStreamingWriterFactory())
+  override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = {
+    new MicroBatchWriterFactory(eppchId, writeSupport.createStreamingWriterFactory(info))
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
index fd4cb444ce580..507f860e0452a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
@@ -21,8 +21,8 @@ import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingDataWriterFactory
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory
 
 /**
  * A simple [[DataWriterFactory]] whose tasks just pack rows into the commit message for delivery
@@ -56,10 +56,12 @@ class PackedRowDataWriter() extends DataWriter[InternalRow] with Logging {
   override def write(row: InternalRow): Unit = data.append(row.copy())
 
   override def commit(): PackedRowCommitMessage = {
-    val msg = PackedRowCommitMessage(data.toArray)
-    data.clear()
-    msg
+    PackedRowCommitMessage(data.toArray)
   }
 
-  override def abort(): Unit = data.clear()
+  override def abort(): Unit = {}
+
+  override def close(): Unit = {
+    data.clear()
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
index 156ba95ab9733..eb6baf698a5b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
@@ -27,9 +27,9 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{ManualClock, SystemClock}
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index f61e9dbecd4ea..a093bf54b2107 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -23,11 +23,12 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.execution.streaming.continuous.RateStreamContinuousStream
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -45,7 +46,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
  *    generated rows. The source will try its best to reach `rowsPerSecond`, but the query may
  *    be resource constrained, and `numPartitions` can be tweaked to help reach the desired speed.
  */
-class RateStreamProvider extends TableProvider with DataSourceRegister {
+class RateStreamProvider extends SimpleTableProvider with DataSourceRegister {
   import RateStreamProvider._
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
index 25e9af2bc2927..97a6576832515 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
@@ -28,9 +28,9 @@ import scala.collection.mutable.ListBuffer
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.sql.execution.streaming.LongOffset
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
index 0f807e235661a..a4dcb2049eb87 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -26,15 +26,16 @@ import scala.util.{Failure, Success, Try}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.execution.streaming.continuous.TextSocketContinuousStream
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class TextSocketSourceProvider extends TableProvider with DataSourceRegister with Logging {
+class TextSocketSourceProvider extends SimpleTableProvider with DataSourceRegister with Logging {
 
   private def checkParameters(params: CaseInsensitiveStringMap): Unit = {
     logWarning("The socket source should not be used for production applications! " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
index a3f58fa966fe8..ef1115e6d9e01 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 
 /**
  * The logical plan for writing data to a micro-batch stream.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
index de8d00d4ac348..2b674070a70ad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
@@ -32,12 +32,11 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory, LogicalWriteInfo, PhysicalWriteInfo, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.execution.streaming.Sink
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table, TableCapability}
-import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
@@ -53,21 +52,16 @@ class MemorySink extends Table with SupportsWrite with Logging {
     Set(TableCapability.STREAMING_WRITE).asJava
   }
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder with SupportsTruncate {
       private var needTruncate: Boolean = false
-      private var inputSchema: StructType = _
+      private val inputSchema: StructType = info.schema()
 
       override def truncate(): WriteBuilder = {
         this.needTruncate = true
         this
       }
 
-      override def withInputDataSchema(schema: StructType): WriteBuilder = {
-        this.inputSchema = schema
-        this
-      }
-
       override def buildForStreaming(): StreamingWrite = {
         new MemoryStreamingWrite(MemorySink.this, inputSchema, needTruncate)
       }
@@ -140,7 +134,7 @@ class MemoryStreamingWrite(
     val sink: MemorySink, schema: StructType, needTruncate: Boolean)
   extends StreamingWrite {
 
-  override def createStreamingWriterFactory: MemoryWriterFactory = {
+  override def createStreamingWriterFactory(info: PhysicalWriteInfo): MemoryWriterFactory = {
     MemoryWriterFactory(schema)
   }
 
@@ -191,6 +185,8 @@ class MemoryDataWriter(partition: Int, schema: StructType)
   }
 
   override def abort(): Unit = {}
+
+  override def close(): Unit = {}
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 6ee54b948a7d4..05c651f9951b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -43,7 +43,7 @@ import org.apache.spark.util.{SizeEstimator, Utils}
 
 /**
  * An implementation of [[StateStoreProvider]] and [[StateStore]] in which all the data is backed
- * by files in a HDFS-compatible file system. All updates to the store has to be done in sets
+ * by files in an HDFS-compatible file system. All updates to the store has to be done in sets
  * transactionally, and each set of updates increments the store's version. These versions can
  * be used to re-execute the updates (by retries in RDD operations) on the correct version of
  * the store, and regenerate the store version.
@@ -79,7 +79,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
   //   java.util.ConcurrentModificationException
   type MapType = java.util.concurrent.ConcurrentHashMap[UnsafeRow, UnsafeRow]
 
-  /** Implementation of [[StateStore]] API which is backed by a HDFS-compatible file system */
+  /** Implementation of [[StateStore]] API which is backed by an HDFS-compatible file system */
   class HDFSBackedStateStore(val version: Long, mapToUpdate: MapType)
     extends StateStore {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
index 43f22803e7685..1a0a43c083879 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
@@ -23,10 +23,12 @@ import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, Literal, SpecificInternalRow, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, JoinedRow, Literal, SpecificInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.execution.streaming.{StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec}
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._
-import org.apache.spark.sql.types.{LongType, StructField, StructType}
+import org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager.KeyToValuePair
+import org.apache.spark.sql.types.{BooleanType, LongType, StructField, StructType}
 import org.apache.spark.util.NextIterator
 
 /**
@@ -42,10 +44,15 @@ import org.apache.spark.util.NextIterator
  * @param stateInfo             Information about how to retrieve the correct version of state
  * @param storeConf             Configuration for the state store.
  * @param hadoopConf            Hadoop configuration for reading state data from storage
+ * @param partitionId           A partition ID of source RDD.
+ * @param stateFormatVersion    The version of format for state.
  *
  * Internally, the key -> multiple values is stored in two [[StateStore]]s.
  * - Store 1 ([[KeyToNumValuesStore]]) maintains mapping between key -> number of values
- * - Store 2 ([[KeyWithIndexToValueStore]]) maintains mapping between (key, index) -> value
+ * - Store 2 ([[KeyWithIndexToValueStore]]) maintains mapping; the mapping depends on the state
+ *   format version:
+ *   - version 1: [(key, index) -> value]
+ *   - version 2: [(key, index) -> (value, matched)]
  * - Put:   update count in KeyToNumValuesStore,
  *          insert new (key, count) -> value in KeyWithIndexToValueStore
  * - Get:   read count from KeyToNumValuesStore,
@@ -54,7 +61,7 @@ import org.apache.spark.util.NextIterator
  *          scan all keys in KeyToNumValuesStore to find keys that do match the predicate,
  *          delete from key from KeyToNumValuesStore, delete values in KeyWithIndexToValueStore
  * - Remove state by condition on values:
- *          scan all [(key, index) -> value] in KeyWithIndexToValueStore to find values that match
+ *          scan all elements in KeyWithIndexToValueStore to find values that match
  *          the predicate, delete corresponding (key, indexToDelete) from KeyWithIndexToValueStore
  *          by overwriting with the value of (key, maxIndex), and removing [(key, maxIndex),
  *          decrement corresponding num values in KeyToNumValuesStore
@@ -65,8 +72,9 @@ class SymmetricHashJoinStateManager(
     joinKeys: Seq[Expression],
     stateInfo: Option[StatefulOperatorStateInfo],
     storeConf: StateStoreConf,
-    hadoopConf: Configuration) extends Logging {
-
+    hadoopConf: Configuration,
+    partitionId: Int,
+    stateFormatVersion: Int) extends Logging {
   import SymmetricHashJoinStateManager._
 
   /*
@@ -82,23 +90,46 @@ class SymmetricHashJoinStateManager(
   }
 
   /** Append a new value to the key */
-  def append(key: UnsafeRow, value: UnsafeRow): Unit = {
+  def append(key: UnsafeRow, value: UnsafeRow, matched: Boolean): Unit = {
     val numExistingValues = keyToNumValues.get(key)
-    keyWithIndexToValue.put(key, numExistingValues, value)
+    keyWithIndexToValue.put(key, numExistingValues, value, matched)
     keyToNumValues.put(key, numExistingValues + 1)
   }
 
+  /**
+   * Get all the matched values for given join condition, with marking matched.
+   * This method is designed to mark joined rows properly without exposing internal index of row.
+   */
+  def getJoinedRows(
+      key: UnsafeRow,
+      generateJoinedRow: InternalRow => JoinedRow,
+      predicate: JoinedRow => Boolean): Iterator[JoinedRow] = {
+    val numValues = keyToNumValues.get(key)
+    keyWithIndexToValue.getAll(key, numValues).map { keyIdxToValue =>
+      val joinedRow = generateJoinedRow(keyIdxToValue.value)
+      if (predicate(joinedRow)) {
+        if (!keyIdxToValue.matched) {
+          keyWithIndexToValue.put(key, keyIdxToValue.valueIndex, keyIdxToValue.value,
+            matched = true)
+        }
+        joinedRow
+      } else {
+        null
+      }
+    }.filter(_ != null)
+  }
+
   /**
    * Remove using a predicate on keys.
    *
-   * This produces an iterator over the (key, value) pairs satisfying condition(key), where the
-   * underlying store is updated as a side-effect of producing next.
+   * This produces an iterator over the (key, value, matched) tuples satisfying condition(key),
+   * where the underlying store is updated as a side-effect of producing next.
    *
    * This implies the iterator must be consumed fully without any other operations on this manager
    * or the underlying store being interleaved.
    */
-  def removeByKeyCondition(removalCondition: UnsafeRow => Boolean): Iterator[UnsafeRowPair] = {
-    new NextIterator[UnsafeRowPair] {
+  def removeByKeyCondition(removalCondition: UnsafeRow => Boolean): Iterator[KeyToValuePair] = {
+    new NextIterator[KeyToValuePair] {
 
       private val allKeyToNumValues = keyToNumValues.iterator
 
@@ -107,15 +138,15 @@ class SymmetricHashJoinStateManager(
 
       private def currentKey = currentKeyToNumValue.key
 
-      private val reusedPair = new UnsafeRowPair()
+      private val reusedRet = new KeyToValuePair()
 
-      private def getAndRemoveValue() = {
+      private def getAndRemoveValue(): KeyToValuePair = {
         val keyWithIndexAndValue = currentValues.next()
         keyWithIndexToValue.remove(currentKey, keyWithIndexAndValue.valueIndex)
-        reusedPair.withRows(currentKey, keyWithIndexAndValue.value)
+        reusedRet.withNew(currentKey, keyWithIndexAndValue.value, keyWithIndexAndValue.matched)
       }
 
-      override def getNext(): UnsafeRowPair = {
+      override def getNext(): KeyToValuePair = {
         // If there are more values for the current key, remove and return the next one.
         if (currentValues != null && currentValues.hasNext) {
           return getAndRemoveValue()
@@ -126,8 +157,7 @@ class SymmetricHashJoinStateManager(
         while (allKeyToNumValues.hasNext) {
           currentKeyToNumValue = allKeyToNumValues.next()
           if (removalCondition(currentKey)) {
-            currentValues = keyWithIndexToValue.getAll(
-              currentKey, currentKeyToNumValue.numValue)
+            currentValues = keyWithIndexToValue.getAll(currentKey, currentKeyToNumValue.numValue)
             keyToNumValues.remove(currentKey)
 
             if (currentValues.hasNext) {
@@ -148,18 +178,18 @@ class SymmetricHashJoinStateManager(
   /**
    * Remove using a predicate on values.
    *
-   * At a high level, this produces an iterator over the (key, value) pairs such that value
-   * satisfies the predicate, where producing an element removes the value from the state store
-   * and producing all elements with a given key updates it accordingly.
+   * At a high level, this produces an iterator over the (key, value, matched) tuples such that
+   * value satisfies the predicate, where producing an element removes the value from the
+   * state store and producing all elements with a given key updates it accordingly.
    *
    * This implies the iterator must be consumed fully without any other operations on this manager
    * or the underlying store being interleaved.
    */
-  def removeByValueCondition(removalCondition: UnsafeRow => Boolean): Iterator[UnsafeRowPair] = {
-    new NextIterator[UnsafeRowPair] {
+  def removeByValueCondition(removalCondition: UnsafeRow => Boolean): Iterator[KeyToValuePair] = {
+    new NextIterator[KeyToValuePair] {
 
       // Reuse this object to avoid creation+GC overhead.
-      private val reusedPair = new UnsafeRowPair()
+      private val reusedRet = new KeyToValuePair()
 
       private val allKeyToNumValues = keyToNumValues.iterator
 
@@ -187,7 +217,7 @@ class SymmetricHashJoinStateManager(
 
       // Find the next value satisfying the condition, updating `currentKey` and `numValues` if
       // needed. Returns null when no value can be found.
-      private def findNextValueForIndex(): UnsafeRow = {
+      private def findNextValueForIndex(): ValueAndMatchPair = {
         // Loop across all values for the current key, and then all other keys, until we find a
         // value satisfying the removal condition.
         def hasMoreValuesForCurrentKey = currentKey != null && index < numValues
@@ -195,9 +225,9 @@ class SymmetricHashJoinStateManager(
         while (hasMoreValuesForCurrentKey || hasMoreKeys) {
           if (hasMoreValuesForCurrentKey) {
             // First search the values for the current key.
-            val currentValue = keyWithIndexToValue.get(currentKey, index)
-            if (removalCondition(currentValue)) {
-              return currentValue
+            val valuePair = keyWithIndexToValue.get(currentKey, index)
+            if (removalCondition(valuePair.value)) {
+              return valuePair
             } else {
               index += 1
             }
@@ -219,7 +249,7 @@ class SymmetricHashJoinStateManager(
         return null
       }
 
-      override def getNext(): UnsafeRowPair = {
+      override def getNext(): KeyToValuePair = {
         val currentValue = findNextValueForIndex()
 
         // If there's no value, clean up and finish. There aren't any more available.
@@ -233,8 +263,13 @@ class SymmetricHashJoinStateManager(
         // any hole. So we swap the last element into the hole and decrement numValues to shorten.
         // clean
         if (numValues > 1) {
-          val valueAtMaxIndex = keyWithIndexToValue.get(currentKey, numValues - 1)
-          keyWithIndexToValue.put(currentKey, index, valueAtMaxIndex)
+          val valuePairAtMaxIndex = keyWithIndexToValue.get(currentKey, numValues - 1)
+          if (valuePairAtMaxIndex != null) {
+            keyWithIndexToValue.put(currentKey, index, valuePairAtMaxIndex.value,
+              valuePairAtMaxIndex.matched)
+          } else {
+            keyWithIndexToValue.put(currentKey, index, null, false)
+          }
           keyWithIndexToValue.remove(currentKey, numValues - 1)
         } else {
           keyWithIndexToValue.remove(currentKey, 0)
@@ -242,7 +277,7 @@ class SymmetricHashJoinStateManager(
         numValues -= 1
         valueRemoved = true
 
-        return reusedPair.withRows(currentKey, currentValue)
+        return reusedRet.withNew(currentKey, currentValue.value, currentValue.matched)
       }
 
       override def close: Unit = {}
@@ -294,7 +329,7 @@ class SymmetricHashJoinStateManager(
     joinKeys.zipWithIndex.map { case (k, i) => StructField(s"field$i", k.dataType, k.nullable) })
   private val keyAttributes = keySchema.toAttributes
   private val keyToNumValues = new KeyToNumValuesStore()
-  private val keyWithIndexToValue = new KeyWithIndexToValueStore()
+  private val keyWithIndexToValue = new KeyWithIndexToValueStore(stateFormatVersion)
 
   // Clean up any state store resources if necessary at the end of the task
   Option(TaskContext.get()).foreach { _.addTaskCompletionListener[Unit] { _ => abortIfNeeded() } }
@@ -322,7 +357,7 @@ class SymmetricHashJoinStateManager(
     /** Get the StateStore with the given schema */
     protected def getStateStore(keySchema: StructType, valueSchema: StructType): StateStore = {
       val storeProviderId = StateStoreProviderId(
-        stateInfo.get, TaskContext.getPartitionId(), getStateStoreName(joinSide, stateStoreType))
+        stateInfo.get, partitionId, getStateStoreName(joinSide, stateStoreType))
       val store = StateStore.get(
         storeProviderId, keySchema, valueSchema, None,
         stateInfo.get.storeVersion, storeConf, hadoopConf)
@@ -335,7 +370,7 @@ class SymmetricHashJoinStateManager(
    * Helper class for representing data returned by [[KeyWithIndexToValueStore]].
    * Designed for object reuse.
    */
-  private case class KeyAndNumValues(var key: UnsafeRow = null, var numValue: Long = 0) {
+  private class KeyAndNumValues(var key: UnsafeRow = null, var numValue: Long = 0) {
     def withNew(newKey: UnsafeRow, newNumValues: Long): this.type = {
       this.key = newKey
       this.numValue = newNumValues
@@ -380,18 +415,105 @@ class SymmetricHashJoinStateManager(
    * Helper class for representing data returned by [[KeyWithIndexToValueStore]].
    * Designed for object reuse.
    */
-  private case class KeyWithIndexAndValue(
-    var key: UnsafeRow = null, var valueIndex: Long = -1, var value: UnsafeRow = null) {
-    def withNew(newKey: UnsafeRow, newIndex: Long, newValue: UnsafeRow): this.type = {
+  private class KeyWithIndexAndValue(
+    var key: UnsafeRow = null,
+    var valueIndex: Long = -1,
+    var value: UnsafeRow = null,
+    var matched: Boolean = false) {
+
+    def withNew(
+        newKey: UnsafeRow,
+        newIndex: Long,
+        newValue: UnsafeRow,
+        newMatched: Boolean): this.type = {
       this.key = newKey
       this.valueIndex = newIndex
       this.value = newValue
+      this.matched = newMatched
       this
     }
+
+    def withNew(
+        newKey: UnsafeRow,
+        newIndex: Long,
+        newValue: ValueAndMatchPair): this.type = {
+      this.key = newKey
+      this.valueIndex = newIndex
+      if (newValue != null) {
+        this.value = newValue.value
+        this.matched = newValue.matched
+      } else {
+        this.value = null
+        this.matched = false
+      }
+      this
+    }
+  }
+
+  private trait KeyWithIndexToValueRowConverter {
+    def valueAttributes: Seq[Attribute]
+
+    def convertValue(value: UnsafeRow): ValueAndMatchPair
+
+    def convertToValueRow(value: UnsafeRow, matched: Boolean): UnsafeRow
+  }
+
+  private object KeyWithIndexToValueRowConverter {
+    def create(version: Int): KeyWithIndexToValueRowConverter = version match {
+      case 1 => new KeyWithIndexToValueRowConverterFormatV1()
+      case 2 => new KeyWithIndexToValueRowConverterFormatV2()
+      case _ => throw new IllegalArgumentException("Incorrect state format version! " +
+        s"version $version")
+    }
+  }
+
+  private class KeyWithIndexToValueRowConverterFormatV1 extends KeyWithIndexToValueRowConverter {
+    override val valueAttributes: Seq[Attribute] = inputValueAttributes
+
+    override def convertValue(value: UnsafeRow): ValueAndMatchPair = {
+      if (value != null) ValueAndMatchPair(value, false) else null
+    }
+
+    override def convertToValueRow(value: UnsafeRow, matched: Boolean): UnsafeRow = value
+  }
+
+  private class KeyWithIndexToValueRowConverterFormatV2 extends KeyWithIndexToValueRowConverter {
+    private val valueWithMatchedExprs = inputValueAttributes :+ Literal(true)
+    private val indexOrdinalInValueWithMatchedRow = inputValueAttributes.size
+
+    private val valueWithMatchedRowGenerator = UnsafeProjection.create(valueWithMatchedExprs,
+      inputValueAttributes)
+
+    override val valueAttributes: Seq[Attribute] = inputValueAttributes :+
+      AttributeReference("matched", BooleanType)()
+
+    // Projection to generate key row from (value + matched) row
+    private val valueRowGenerator = UnsafeProjection.create(
+      inputValueAttributes, valueAttributes)
+
+    override def convertValue(value: UnsafeRow): ValueAndMatchPair = {
+      if (value != null) {
+        ValueAndMatchPair(valueRowGenerator(value),
+          value.getBoolean(indexOrdinalInValueWithMatchedRow))
+      } else {
+        null
+      }
+    }
+
+    override def convertToValueRow(value: UnsafeRow, matched: Boolean): UnsafeRow = {
+      val row = valueWithMatchedRowGenerator(value)
+      row.setBoolean(indexOrdinalInValueWithMatchedRow, matched)
+      row
+    }
   }
 
-  /** A wrapper around a [[StateStore]] that stores [(key, index) -> value]. */
-  private class KeyWithIndexToValueStore extends StateStoreHandler(KeyWithIndexToValueType) {
+  /**
+   * A wrapper around a [[StateStore]] that stores the mapping; the mapping depends on the
+   * state format version - please refer implementations of [[KeyWithIndexToValueRowConverter]].
+   */
+  private class KeyWithIndexToValueStore(stateFormatVersion: Int)
+    extends StateStoreHandler(KeyWithIndexToValueType) {
+
     private val keyWithIndexExprs = keyAttributes :+ Literal(1L)
     private val keyWithIndexSchema = keySchema.add("index", LongType)
     private val indexOrdinalInKeyWithIndexRow = keyAttributes.size
@@ -403,10 +525,13 @@ class SymmetricHashJoinStateManager(
     private val keyRowGenerator = UnsafeProjection.create(
       keyAttributes, keyAttributes :+ AttributeReference("index", LongType)())
 
-    protected val stateStore = getStateStore(keyWithIndexSchema, inputValueAttributes.toStructType)
+    private val valueRowConverter = KeyWithIndexToValueRowConverter.create(stateFormatVersion)
+
+    protected val stateStore = getStateStore(keyWithIndexSchema,
+      valueRowConverter.valueAttributes.toStructType)
 
-    def get(key: UnsafeRow, valueIndex: Long): UnsafeRow = {
-      stateStore.get(keyWithIndexRow(key, valueIndex))
+    def get(key: UnsafeRow, valueIndex: Long): ValueAndMatchPair = {
+      valueRowConverter.convertValue(stateStore.get(keyWithIndexRow(key, valueIndex)))
     }
 
     /**
@@ -423,8 +548,8 @@ class SymmetricHashJoinStateManager(
             null
           } else {
             val keyWithIndex = keyWithIndexRow(key, index)
-            val value = stateStore.get(keyWithIndex)
-            keyWithIndexAndValue.withNew(key, index, value)
+            val valuePair = valueRowConverter.convertValue(stateStore.get(keyWithIndex))
+            keyWithIndexAndValue.withNew(key, index, valuePair)
             index += 1
             keyWithIndexAndValue
           }
@@ -435,9 +560,10 @@ class SymmetricHashJoinStateManager(
     }
 
     /** Put new value for key at the given index */
-    def put(key: UnsafeRow, valueIndex: Long, value: UnsafeRow): Unit = {
+    def put(key: UnsafeRow, valueIndex: Long, value: UnsafeRow, matched: Boolean): Unit = {
       val keyWithIndex = keyWithIndexRow(key, valueIndex)
-      stateStore.put(keyWithIndex, value)
+      val valueWithMatched = valueRowConverter.convertToValueRow(value, matched)
+      stateStore.put(keyWithIndex, valueWithMatched)
     }
 
     /**
@@ -460,8 +586,9 @@ class SymmetricHashJoinStateManager(
     def iterator: Iterator[KeyWithIndexAndValue] = {
       val keyWithIndexAndValue = new KeyWithIndexAndValue()
       stateStore.getRange(None, None).map { pair =>
+        val valuePair = valueRowConverter.convertValue(pair.value)
         keyWithIndexAndValue.withNew(
-          keyRowGenerator(pair.key), pair.key.getLong(indexOrdinalInKeyWithIndexRow), pair.value)
+          keyRowGenerator(pair.key), pair.key.getLong(indexOrdinalInKeyWithIndexRow), valuePair)
         keyWithIndexAndValue
       }
     }
@@ -476,6 +603,8 @@ class SymmetricHashJoinStateManager(
 }
 
 object SymmetricHashJoinStateManager {
+  val supportedVersions = Seq(1, 2)
+  val legacyVersion = 1
 
   def allStateStoreNames(joinSides: JoinSide*): Seq[String] = {
     val allStateStoreTypes: Seq[StateStoreType] = Seq(KeyToNumValuesType, KeyWithIndexToValueType)
@@ -497,4 +626,35 @@ object SymmetricHashJoinStateManager {
   private def getStateStoreName(joinSide: JoinSide, storeType: StateStoreType): String = {
     s"$joinSide-$storeType"
   }
+
+  /** Helper class for representing data (value, matched). */
+  case class ValueAndMatchPair(value: UnsafeRow, matched: Boolean)
+
+  /**
+   * Helper class for representing data key to (value, matched).
+   * Designed for object reuse.
+   */
+  case class KeyToValuePair(
+      var key: UnsafeRow = null,
+      var value: UnsafeRow = null,
+      var matched: Boolean = false) {
+    def withNew(newKey: UnsafeRow, newValue: UnsafeRow, newMatched: Boolean): this.type = {
+      this.key = newKey
+      this.value = newValue
+      this.matched = newMatched
+      this
+    }
+
+    def withNew(newKey: UnsafeRow, newValue: ValueAndMatchPair): this.type = {
+      this.key = newKey
+      if (newValue != null) {
+        this.value = newValue.value
+        this.matched = newValue.matched
+      } else {
+        this.value = null
+        this.matched = false
+      }
+      this
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index d689a6f3c9819..1bec924ba219a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -26,7 +26,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateUnsafeProjection, Predicate}
+import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
@@ -156,17 +156,17 @@ trait WatermarkSupport extends UnaryExecNode {
   }
 
   /** Predicate based on keys that matches data older than the watermark */
-  lazy val watermarkPredicateForKeys: Option[Predicate] = watermarkExpression.flatMap { e =>
+  lazy val watermarkPredicateForKeys: Option[BasePredicate] = watermarkExpression.flatMap { e =>
     if (keyExpressions.exists(_.metadata.contains(EventTimeWatermark.delayKey))) {
-      Some(newPredicate(e, keyExpressions))
+      Some(Predicate.create(e, keyExpressions))
     } else {
       None
     }
   }
 
   /** Predicate based on the child output that matches data older than the watermark. */
-  lazy val watermarkPredicateForData: Option[Predicate] =
-    watermarkExpression.map(newPredicate(_, child.output))
+  lazy val watermarkPredicateForData: Option[BasePredicate] =
+    watermarkExpression.map(Predicate.create(_, child.output))
 
   protected def removeKeysOlderThanWatermark(store: StateStore): Unit = {
     if (watermarkPredicateForKeys.nonEmpty) {
@@ -353,6 +353,7 @@ case class StateStoreSaveExec(
                   finished = true
                   null
                 } else {
+                  numOutputRows += 1
                   removedValueRow
                 }
               }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingGlobalLimitExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
similarity index 68%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingGlobalLimitExec.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
index bf4af60c8cf03..b19540253d7eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingGlobalLimitExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
@@ -20,21 +20,21 @@ import java.util.concurrent.TimeUnit.NANOSECONDS
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
-import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericInternalRow, SortOrder, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, Distribution, Partitioning}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
-import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.{LimitExec, SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.streaming.state.StateStoreOps
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{LongType, NullType, StructField, StructType}
-import org.apache.spark.util.CompletionIterator
+import org.apache.spark.util.{CompletionIterator, NextIterator}
 
 /**
  * A physical operator for executing a streaming limit, which makes sure no more than streamLimit
- * rows are returned. This operator is meant for streams in Append mode only.
+ * rows are returned. This physical operator is only meant for logical limit operations that
+ * will get a input stream of rows that are effectively appends. For example,
+ * - limit on any query in append mode
+ * - limit before the aggregation in a streaming aggregation query complete mode
  */
 case class StreamingGlobalLimitExec(
     streamLimit: Long,
@@ -49,9 +49,6 @@ case class StreamingGlobalLimitExec(
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver
 
-    assert(outputMode.isDefined && outputMode.get == InternalOutputModes.Append,
-      "StreamingGlobalLimitExec is only valid for streams in Append output mode")
-
     child.execute().mapPartitionsWithStateStore(
         getStateInfo,
         keySchema,
@@ -100,3 +97,41 @@ case class StreamingGlobalLimitExec(
     UnsafeProjection.create(valueSchema)(new GenericInternalRow(Array[Any](value)))
   }
 }
+
+
+/**
+ * A physical operator for executing limits locally on each partition. The main difference from
+ * LocalLimitExec is that this will fully consume `child` plan's iterators to ensure that any
+ * stateful operation within `child` commits all the state changes (many stateful operations
+ * commit state changes only after the iterator is consumed).
+ */
+case class StreamingLocalLimitExec(limit: Int, child: SparkPlan)
+  extends LimitExec {
+
+  override def doExecute(): RDD[InternalRow] = child.execute().mapPartitions { iter =>
+
+    var generatedCount = 0
+
+    new NextIterator[InternalRow]() {
+      override protected def getNext(): InternalRow = {
+        if (generatedCount < limit && iter.hasNext) {
+          generatedCount += 1
+          iter.next()
+        } else {
+          finished = true
+          null
+        }
+      }
+
+      override protected def close(): Unit = {
+        while (iter.hasNext) iter.next() // consume the iterator completely
+      }
+    }
+  }
+
+  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
+
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
+  override def output: Seq[Attribute] = child.output
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 22e3f8e035991..c2270c57eb941 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -172,13 +172,13 @@ case class InSubqueryExec(
 }
 
 /**
- * Plans scalar subqueries from that are present in the given [[SparkPlan]].
+ * Plans subqueries that are present in the given [[SparkPlan]].
  */
 case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
   def apply(plan: SparkPlan): SparkPlan = {
     plan.transformAllExpressions {
       case subquery: expressions.ScalarSubquery =>
-        val executedPlan = new QueryExecution(sparkSession, subquery.plan).executedPlan
+        val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, subquery.plan)
         ScalarSubquery(
           SubqueryExec(s"scalar-subquery#${subquery.exprId.id}", executedPlan),
           subquery.exprId)
@@ -192,8 +192,8 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
             }
           )
         }
-        val executedPlan = new QueryExecution(sparkSession, query).executedPlan
-        InSubqueryExec(expr, SubqueryExec(s"subquery${exprId.id}", executedPlan), exprId)
+        val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, query)
+        InSubqueryExec(expr, SubqueryExec(s"subquery#${exprId.id}", executedPlan), exprId)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
index ec0577283265d..e1ff90a2c20e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
@@ -324,7 +324,15 @@ private[ui] class ExecutionPagedTable(
 
             <th>
               <a href={headerLink}>
-                {header}
+                {if (header == "Duration") {
+                  <span data-toggle="tooltip" data-placement="top"
+                    title="Time from query submission to completion
+                    (or if still executing, time since submission)">
+                    {header}
+                  </span>
+                } else {
+                  {header}
+                }}
               </a>
             </th>
           } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
index 875086cda258d..91360e0e50314 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
@@ -116,7 +116,7 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
         {metadata}
       </div>
       {planVisualizationResources(request)}
-      <script>$(function() {{ renderPlanViz(); }})</script>
+      <script>$(function() {{ if (shouldRenderPlanViz()) {{ renderPlanViz(); }} }})</script>
     </div>
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index 2c4a7eacdf10b..1454cc05ed4da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -16,10 +16,12 @@
  */
 package org.apache.spark.sql.execution.ui
 
-import java.util.{Date, NoSuchElementException}
+import java.util.{Arrays, Date, NoSuchElementException}
 import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
 import org.apache.spark.{JobExecutionStatus, SparkConf}
 import org.apache.spark.internal.Logging
@@ -29,6 +31,7 @@ import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.metric._
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.status.{ElementTrackingStore, KVUtils, LiveEntity}
+import org.apache.spark.util.collection.OpenHashMap
 
 class SQLAppStatusListener(
     conf: SparkConf,
@@ -50,7 +53,7 @@ class SQLAppStatusListener(
     liveExecutions.isEmpty && stageMetrics.isEmpty
   }
 
-  kvstore.addTrigger(classOf[SQLExecutionUIData], conf.get(UI_RETAINED_EXECUTIONS)) { count =>
+  kvstore.addTrigger(classOf[SQLExecutionUIData], conf.get[Int](UI_RETAINED_EXECUTIONS)) { count =>
     cleanupExecutions(count)
   }
 
@@ -92,7 +95,7 @@ class SQLAppStatusListener(
           executionData.jobs = sqlStoreData.jobs
           executionData.stages = sqlStoreData.stages
           executionData.metricsValues = sqlStoreData.metricValues
-          executionData.endEvents = sqlStoreData.jobs.size + 1
+          executionData.endEvents.set(sqlStoreData.jobs.size + 1)
           liveExecutions.put(executionId, executionData)
           Some(executionData)
         } catch {
@@ -100,11 +103,14 @@ class SQLAppStatusListener(
         }
       }.getOrElse(getOrCreateExecution(executionId))
 
-    // Record the accumulator IDs for the stages of this job, so that the code that keeps
-    // track of the metrics knows which accumulators to look at.
-    val accumIds = exec.metrics.map(_.accumulatorId).toSet
-    event.stageIds.foreach { id =>
-      stageMetrics.put(id, new LiveStageMetrics(id, 0, accumIds, new ConcurrentHashMap()))
+    // Record the accumulator IDs and metric types for the stages of this job, so that the code
+    // that keeps track of the metrics knows which accumulators to look at.
+    val accumIdsAndType = exec.metrics.map { m => (m.accumulatorId, m.metricType) }.toMap
+    if (accumIdsAndType.nonEmpty) {
+      event.stageInfos.foreach { stage =>
+        stageMetrics.put(stage.stageId, new LiveStageMetrics(stage.stageId, 0,
+          stage.numTasks, accumIdsAndType))
+      }
     }
 
     exec.jobs = exec.jobs + (jobId -> JobExecutionStatus.RUNNING)
@@ -118,9 +124,12 @@ class SQLAppStatusListener(
     }
 
     // Reset the metrics tracking object for the new attempt.
-    Option(stageMetrics.get(event.stageInfo.stageId)).foreach { metrics =>
-      metrics.taskMetrics.clear()
-      metrics.attemptId = event.stageInfo.attemptNumber
+    Option(stageMetrics.get(event.stageInfo.stageId)).foreach { stage =>
+      if (stage.attemptId != event.stageInfo.attemptNumber) {
+        stageMetrics.put(event.stageInfo.stageId,
+          new LiveStageMetrics(event.stageInfo.stageId, event.stageInfo.attemptNumber,
+            stage.numTasks, stage.accumIdsToMetricType))
+      }
     }
   }
 
@@ -132,7 +141,7 @@ class SQLAppStatusListener(
           case _ => JobExecutionStatus.FAILED
         }
         exec.jobs = exec.jobs + (event.jobId -> result)
-        exec.endEvents += 1
+        exec.endEvents.incrementAndGet()
         update(exec)
       }
     }
@@ -140,7 +149,16 @@ class SQLAppStatusListener(
 
   override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate): Unit = {
     event.accumUpdates.foreach { case (taskId, stageId, attemptId, accumUpdates) =>
-      updateStageMetrics(stageId, attemptId, taskId, accumUpdates, false)
+      updateStageMetrics(stageId, attemptId, taskId, SQLAppStatusListener.UNKNOWN_INDEX,
+        accumUpdates, false)
+    }
+  }
+
+  override def onTaskStart(event: SparkListenerTaskStart): Unit = {
+    Option(stageMetrics.get(event.stageId)).foreach { stage =>
+      if (stage.attemptId == event.stageAttemptId) {
+        stage.registerTask(event.taskInfo.taskId, event.taskInfo.index)
+      }
     }
   }
 
@@ -165,7 +183,7 @@ class SQLAppStatusListener(
     } else {
       info.accumulables
     }
-    updateStageMetrics(event.stageId, event.stageAttemptId, info.taskId, accums,
+    updateStageMetrics(event.stageId, event.stageAttemptId, info.taskId, info.index, accums,
       info.successful)
   }
 
@@ -181,17 +199,64 @@ class SQLAppStatusListener(
 
   private def aggregateMetrics(exec: LiveExecutionData): Map[Long, String] = {
     val metricTypes = exec.metrics.map { m => (m.accumulatorId, m.metricType) }.toMap
-    val metrics = exec.stages.toSeq
+
+    val liveStageMetrics = exec.stages.toSeq
       .flatMap { stageId => Option(stageMetrics.get(stageId)) }
-      .flatMap(_.taskMetrics.values().asScala)
-      .flatMap { metrics => metrics.ids.zip(metrics.values) }
-
-    val aggregatedMetrics = (metrics ++ exec.driverAccumUpdates.toSeq)
-      .filter { case (id, _) => metricTypes.contains(id) }
-      .groupBy(_._1)
-      .map { case (id, values) =>
-        id -> SQLMetrics.stringValue(metricTypes(id), values.map(_._2))
+
+    val taskMetrics = liveStageMetrics.flatMap(_.metricValues())
+
+    val maxMetrics = liveStageMetrics.flatMap(_.maxMetricValues())
+
+    val allMetrics = new mutable.HashMap[Long, Array[Long]]()
+
+    val maxMetricsFromAllStages = new mutable.HashMap[Long, Array[Long]]()
+
+    taskMetrics.foreach { case (id, values) =>
+      val prev = allMetrics.getOrElse(id, null)
+      val updated = if (prev != null) {
+        prev ++ values
+      } else {
+        values
+      }
+      allMetrics(id) = updated
+    }
+
+    // Find the max for each metric id between all stages.
+    maxMetrics.foreach { case (id, value, taskId, stageId, attemptId) =>
+      val updated = maxMetricsFromAllStages.getOrElse(id, Array(value, stageId, attemptId, taskId))
+      if (value > updated(0)) {
+        updated(0) = value
+        updated(1) = stageId
+        updated(2) = attemptId
+        updated(3) = taskId
       }
+      maxMetricsFromAllStages(id) = updated
+    }
+
+    exec.driverAccumUpdates.foreach { case (id, value) =>
+      if (metricTypes.contains(id)) {
+        val prev = allMetrics.getOrElse(id, null)
+        val updated = if (prev != null) {
+          // If the driver updates same metrics as tasks and has higher value then remove
+          // that entry from maxMetricsFromAllStage. This would make stringValue function default
+          // to "driver" that would be displayed on UI.
+          if (maxMetricsFromAllStages.contains(id) && value > maxMetricsFromAllStages(id)(0)) {
+            maxMetricsFromAllStages.remove(id)
+          }
+          val _copy = Arrays.copyOf(prev, prev.length + 1)
+          _copy(prev.length) = value
+          _copy
+        } else {
+          Array(value)
+        }
+        allMetrics(id) = updated
+      }
+    }
+
+    val aggregatedMetrics = allMetrics.map { case (id, values) =>
+      id -> SQLMetrics.stringValue(metricTypes(id), values, maxMetricsFromAllStages.getOrElse(id,
+        Array.empty[Long]))
+    }.toMap
 
     // Check the execution again for whether the aggregated metrics data has been calculated.
     // This can happen if the UI is requesting this data, and the onExecutionEnd handler is
@@ -208,43 +273,13 @@ class SQLAppStatusListener(
       stageId: Int,
       attemptId: Int,
       taskId: Long,
+      taskIdx: Int,
       accumUpdates: Seq[AccumulableInfo],
       succeeded: Boolean): Unit = {
     Option(stageMetrics.get(stageId)).foreach { metrics =>
-      if (metrics.attemptId != attemptId || metrics.accumulatorIds.isEmpty) {
-        return
-      }
-
-      val oldTaskMetrics = metrics.taskMetrics.get(taskId)
-      if (oldTaskMetrics != null && oldTaskMetrics.succeeded) {
-        return
-      }
-
-      val updates = accumUpdates
-        .filter { acc => acc.update.isDefined && metrics.accumulatorIds.contains(acc.id) }
-        .sortBy(_.id)
-
-      if (updates.isEmpty) {
-        return
+      if (metrics.attemptId == attemptId) {
+        metrics.updateTaskMetrics(taskId, taskIdx, succeeded, accumUpdates)
       }
-
-      val ids = new Array[Long](updates.size)
-      val values = new Array[Long](updates.size)
-      updates.zipWithIndex.foreach { case (acc, idx) =>
-        ids(idx) = acc.id
-        // In a live application, accumulators have Long values, but when reading from event
-        // logs, they have String values. For now, assume all accumulators are Long and covert
-        // accordingly.
-        values(idx) = acc.update.get match {
-          case s: String => s.toLong
-          case l: Long => l
-          case o => throw new IllegalArgumentException(s"Unexpected: $o")
-        }
-      }
-
-      // TODO: storing metrics by task ID can cause metrics for the same task index to be
-      // counted multiple times, for example due to speculation or re-attempts.
-      metrics.taskMetrics.put(taskId, new LiveTaskMetrics(ids, values, succeeded))
     }
   }
 
@@ -309,15 +344,29 @@ class SQLAppStatusListener(
     update(exec)
   }
 
+  private def onAdaptiveSQLMetricUpdate(event: SparkListenerSQLAdaptiveSQLMetricUpdates): Unit = {
+    val SparkListenerSQLAdaptiveSQLMetricUpdates(executionId, sqlPlanMetrics) = event
+
+    val exec = getOrCreateExecution(executionId)
+    exec.metrics = exec.metrics ++ sqlPlanMetrics
+    update(exec)
+  }
+
   private def onExecutionEnd(event: SparkListenerSQLExecutionEnd): Unit = {
     val SparkListenerSQLExecutionEnd(executionId, time) = event
     Option(liveExecutions.get(executionId)).foreach { exec =>
-      exec.metricsValues = aggregateMetrics(exec)
       exec.completionTime = Some(new Date(time))
-      exec.endEvents += 1
       update(exec)
 
-      removeStaleMetricsData(exec)
+      // Aggregating metrics can be expensive for large queries, so do it asynchronously. The end
+      // event count is updated after the metrics have been aggregated, to prevent a job end event
+      // arriving during aggregation from cleaning up the metrics data.
+      kvstore.doAsync {
+        exec.metricsValues = aggregateMetrics(exec)
+        removeStaleMetricsData(exec)
+        exec.endEvents.incrementAndGet()
+        update(exec, force = true)
+      }
     }
   }
 
@@ -342,6 +391,7 @@ class SQLAppStatusListener(
   override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
     case e: SparkListenerSQLExecutionStart => onExecutionStart(e)
     case e: SparkListenerSQLAdaptiveExecutionUpdate => onAdaptiveExecutionUpdate(e)
+    case e: SparkListenerSQLAdaptiveSQLMetricUpdates => onAdaptiveSQLMetricUpdate(e)
     case e: SparkListenerSQLExecutionEnd => onExecutionEnd(e)
     case e: SparkListenerDriverAccumUpdates => onDriverAccumUpdates(e)
     case _ => // Ignore
@@ -354,7 +404,7 @@ class SQLAppStatusListener(
 
   private def update(exec: LiveExecutionData, force: Boolean = false): Unit = {
     val now = System.nanoTime()
-    if (exec.endEvents >= exec.jobs.size + 1) {
+    if (exec.endEvents.get() >= exec.jobs.size + 1) {
       exec.write(kvstore, now)
       removeStaleMetricsData(exec)
       liveExecutions.remove(exec.executionId)
@@ -406,7 +456,7 @@ private class LiveExecutionData(val executionId: Long) extends LiveEntity {
 
   // Just in case job end and execution end arrive out of order, keep track of how many
   // end events arrived so that the listener can stop tracking the execution.
-  var endEvents = 0
+  val endEvents = new AtomicInteger()
 
   override protected def doUpdate(): Any = {
     new SQLExecutionUIData(
@@ -426,11 +476,94 @@ private class LiveExecutionData(val executionId: Long) extends LiveEntity {
 
 private class LiveStageMetrics(
     val stageId: Int,
-    var attemptId: Int,
-    val accumulatorIds: Set[Long],
-    val taskMetrics: ConcurrentHashMap[Long, LiveTaskMetrics])
-
-private class LiveTaskMetrics(
-    val ids: Array[Long],
-    val values: Array[Long],
-    val succeeded: Boolean)
+    val attemptId: Int,
+    val numTasks: Int,
+    val accumIdsToMetricType: Map[Long, String]) {
+
+  /**
+   * Mapping of task IDs to their respective index. Note this may contain more elements than the
+   * stage's number of tasks, if speculative execution is on.
+   */
+  private val taskIndices = new OpenHashMap[Long, Int]()
+
+  /** Bit set tracking which indices have been successfully computed. */
+  private val completedIndices = new mutable.BitSet()
+
+  /**
+   * Task metrics values for the stage. Maps the metric ID to the metric values for each
+   * index. For each metric ID, there will be the same number of values as the number
+   * of indices. This relies on `SQLMetrics.stringValue` treating 0 as a neutral value,
+   * independent of the actual metric type.
+   */
+  private val taskMetrics = new ConcurrentHashMap[Long, Array[Long]]()
+
+  private val  metricsIdToMaxTaskValue = new ConcurrentHashMap[Long, Array[Long]]()
+
+  def registerTask(taskId: Long, taskIdx: Int): Unit = {
+    taskIndices.update(taskId, taskIdx)
+  }
+
+  def updateTaskMetrics(
+      taskId: Long,
+      eventIdx: Int,
+      finished: Boolean,
+      accumUpdates: Seq[AccumulableInfo]): Unit = {
+    val taskIdx = if (eventIdx == SQLAppStatusListener.UNKNOWN_INDEX) {
+      if (!taskIndices.contains(taskId)) {
+        // We probably missed the start event for the task, just ignore it.
+        return
+      }
+      taskIndices(taskId)
+    } else {
+      // Here we can recover from a missing task start event. Just register the task again.
+      registerTask(taskId, eventIdx)
+      eventIdx
+    }
+
+    if (completedIndices.contains(taskIdx)) {
+      return
+    }
+
+    accumUpdates
+      .filter { acc => acc.update.isDefined && accumIdsToMetricType.contains(acc.id) }
+      .foreach { acc =>
+        // In a live application, accumulators have Long values, but when reading from event
+        // logs, they have String values. For now, assume all accumulators are Long and convert
+        // accordingly.
+        val value = acc.update.get match {
+          case s: String => s.toLong
+          case l: Long => l
+          case o => throw new IllegalArgumentException(s"Unexpected: $o")
+        }
+
+        val metricValues = taskMetrics.computeIfAbsent(acc.id, _ => new Array(numTasks))
+        metricValues(taskIdx) = value
+
+        if (SQLMetrics.metricNeedsMax(accumIdsToMetricType(acc.id))) {
+          val maxMetricsTaskId = metricsIdToMaxTaskValue.computeIfAbsent(acc.id, _ => Array(value,
+            taskId))
+
+          if (value > maxMetricsTaskId.head) {
+            maxMetricsTaskId(0) = value
+            maxMetricsTaskId(1) = taskId
+          }
+        }
+      }
+    if (finished) {
+      completedIndices += taskIdx
+    }
+  }
+
+  def metricValues(): Seq[(Long, Array[Long])] = taskMetrics.asScala.toSeq
+
+  // Return Seq of metric id, value, taskId, stageId, attemptId for this stage
+  def maxMetricValues(): Seq[(Long, Long, Long, Int, Int)] = {
+    metricsIdToMaxTaskValue.asScala.toSeq.map { case (id, maxMetrics) => (id, maxMetrics(0),
+      maxMetrics(1), stageId, attemptId)
+    }
+  }
+}
+
+private object SQLAppStatusListener {
+  val UNKNOWN_INDEX = -1
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
index 241001a857c8f..a90f37a80d525 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
@@ -42,6 +42,10 @@ class SQLAppStatusStore(
     store.view(classOf[SQLExecutionUIData]).asScala.toSeq
   }
 
+  def executionsList(offset: Int, length: Int): Seq[SQLExecutionUIData] = {
+    store.view(classOf[SQLExecutionUIData]).skip(offset).max(length).asScala.toSeq
+  }
+
   def execution(executionId: Long): Option[SQLExecutionUIData] = {
     try {
       Some(store.read(classOf[SQLExecutionUIData], executionId))
@@ -133,7 +137,7 @@ class SparkPlanGraphNodeWrapper(
     val cluster: SparkPlanGraphClusterWrapper) {
 
   def toSparkPlanGraphNode(): SparkPlanGraphNode = {
-    assert(node == null ^ cluster == null, "One and only of of nore or cluster must be set.")
+    assert(node == null ^ cluster == null, "Exactly one of node, cluster values to be set.")
     if (node != null) node else cluster.toSparkPlanGraphCluster()
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLHistoryServerPlugin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLHistoryServerPlugin.scala
index 522d0cf79bffa..5bf1ce5eb8a90 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLHistoryServerPlugin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLHistoryServerPlugin.scala
@@ -33,4 +33,7 @@ class SQLHistoryServerPlugin extends AppHistoryServerPlugin {
       new SQLTab(sqlStatusStore, ui)
     }
   }
+
+  override def displayOrder: Int = 0
 }
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
index 81cbc7f54c7eb..6a6a71c46f213 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
@@ -34,6 +34,12 @@ case class SparkListenerSQLAdaptiveExecutionUpdate(
   sparkPlanInfo: SparkPlanInfo)
   extends SparkListenerEvent
 
+@DeveloperApi
+case class SparkListenerSQLAdaptiveSQLMetricUpdates(
+    executionId: Long,
+    sqlPlanMetrics: Seq[SQLPlanMetric])
+  extends SparkListenerEvent
+
 @DeveloperApi
 case class SparkListenerSQLExecutionStart(
     executionId: Long,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
index f898236c537a8..d31d77840b802 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
@@ -78,7 +78,7 @@ object SparkPlanGraph {
       subgraph: SparkPlanGraphCluster,
       exchanges: mutable.HashMap[SparkPlanInfo, SparkPlanGraphNode]): Unit = {
     planInfo.nodeName match {
-      case "WholeStageCodegen" =>
+      case name if name.startsWith("WholeStageCodegen") =>
         val metrics = planInfo.metrics.map { metric =>
           SQLPlanMetric(metric.name, metric.accumulatorId, metric.metricType)
         }
@@ -175,9 +175,12 @@ private[ui] class SparkPlanGraphNode(
       // SparkPlan and metrics. If removing it, it won't display the empty line in UI.
       builder ++= "\n \n"
       builder ++= values.mkString("\n")
+      s"""  $id [label="${StringEscapeUtils.escapeJava(builder.toString())}"];"""
+    } else {
+      // SPARK-30684: when there is no metrics, add empty lines to increase the height of the node,
+      // so that there won't be gaps between an edge and a small node.
+      s"""  $id [labelType="html" label="<br><b>$name</b><br><br>"];"""
     }
-
-    s"""  $id [label="${StringEscapeUtils.escapeJava(builder.toString())}"];"""
   }
 }
 
@@ -197,8 +200,8 @@ private[ui] class SparkPlanGraphCluster(
     val labelStr = if (duration.nonEmpty) {
       require(duration.length == 1)
       val id = duration(0).accumulatorId
-      if (metricsValue.contains(duration(0).accumulatorId)) {
-        name + "\n\n" + metricsValue(id)
+      if (metricsValue.contains(id)) {
+        name + "\n \n" + duration(0).name + ": " + metricsValue(id)
       } else {
         name
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index 89f6edda2ef57..d191f3790ffa8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -123,7 +123,7 @@ case class WindowExec(
         var nextRow: UnsafeRow = null
         var nextGroup: UnsafeRow = null
         var nextRowAvailable: Boolean = false
-        private[this] def fetchNextRow() {
+        private[this] def fetchNextRow(): Unit = {
           nextRowAvailable = stream.hasNext
           if (nextRowAvailable) {
             nextRow = stream.next().asInstanceOf[UnsafeRow]
@@ -144,7 +144,7 @@ case class WindowExec(
         val windowFunctionResult = new SpecificInternalRow(expressions.map(_.dataType))
         val frames = factories.map(_(windowFunctionResult))
         val numFrames = frames.length
-        private[this] def fetchNextPartition() {
+        private[this] def fetchNextPartition(): Unit = {
           // Collect all the rows in the current partition.
           // Before we start to fetch new input rows, make a copy of nextGroup.
           val currentGroup = nextGroup.copy()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index dcb86f48bdf32..d5d11c45f8535 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -73,7 +73,7 @@ abstract class WindowExecBase(
         RowBoundOrdering(offset)
 
       case (RangeFrame, CurrentRow) =>
-        val ordering = newOrdering(orderSpec, child.output)
+        val ordering = RowOrdering.create(orderSpec, child.output)
         RangeBoundOrdering(ordering, IdentityProjection, IdentityProjection)
 
       case (RangeFrame, offset: Expression) if orderSpec.size == 1 =>
@@ -82,7 +82,7 @@ abstract class WindowExecBase(
         val expr = sortExpr.child
 
         // Create the projection which returns the current 'value'.
-        val current = newMutableProjection(expr :: Nil, child.output)
+        val current = MutableProjection.create(expr :: Nil, child.output)
 
         // Flip the sign of the offset when processing the order is descending
         val boundOffset = sortExpr.direction match {
@@ -97,13 +97,13 @@ abstract class WindowExecBase(
             TimeAdd(expr, boundOffset, Some(timeZone))
           case (a, b) if a == b => Add(expr, boundOffset)
         }
-        val bound = newMutableProjection(boundExpr :: Nil, child.output)
+        val bound = MutableProjection.create(boundExpr :: Nil, child.output)
 
         // Construct the ordering. This is used to compare the result of current value projection
         // to the result of bound value projection. This is done manually because we want to use
         // Code Generation (if it is enabled).
         val boundSortExprs = sortExpr.copy(BoundReference(0, expr.dataType, expr.nullable)) :: Nil
-        val ordering = newOrdering(boundSortExprs, Nil)
+        val ordering = RowOrdering.create(boundSortExprs, Nil)
         RangeBoundOrdering(ordering, current, bound)
 
       case (RangeFrame, _) =>
@@ -136,7 +136,7 @@ abstract class WindowExecBase(
         case e @ WindowExpression(function, spec) =>
           val frame = spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame]
           function match {
-            case AggregateExpression(f, _, _, _) => collect("AGGREGATE", frame, e, f)
+            case AggregateExpression(f, _, _, _, _) => collect("AGGREGATE", frame, e, f)
             case f: AggregateWindowFunction => collect("AGGREGATE", frame, e, f)
             case f: OffsetWindowFunction => collect("OFFSET", frame, e, f)
             case f: PythonUDF => collect("AGGREGATE", frame, e, f)
@@ -167,7 +167,7 @@ abstract class WindowExecBase(
             ordinal,
             child.output,
             (expressions, schema) =>
-              newMutableProjection(expressions, schema, subexpressionEliminationEnabled))
+              MutableProjection.create(expressions, schema))
         }
 
         // Create the factory
@@ -182,7 +182,7 @@ abstract class WindowExecBase(
                 functions.map(_.asInstanceOf[OffsetWindowFunction]),
                 child.output,
                 (expressions, schema) =>
-                  newMutableProjection(expressions, schema, subexpressionEliminationEnabled),
+                  MutableProjection.create(expressions, schema),
                 offset)
 
           // Entire Partition Frame.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 0c956ecbf936e..85b2cd379ba24 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -17,10 +17,15 @@
 
 package org.apache.spark.sql.expressions
 
-import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.Column
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.annotation.{Experimental, Stable}
+import org.apache.spark.sql.{Column, Encoder}
 import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
+import org.apache.spark.sql.execution.aggregate.ScalaAggregator
 import org.apache.spark.sql.types.{AnyDataType, DataType}
 
 /**
@@ -136,3 +141,42 @@ private[sql] case class SparkUserDefinedFunction(
     }
   }
 }
+
+private[sql] case class UserDefinedAggregator[IN, BUF, OUT](
+    aggregator: Aggregator[IN, BUF, OUT],
+    inputEncoder: Encoder[IN],
+    name: Option[String] = None,
+    nullable: Boolean = true,
+    deterministic: Boolean = true) extends UserDefinedFunction {
+
+  @scala.annotation.varargs
+  def apply(exprs: Column*): Column = {
+    Column(AggregateExpression(scalaAggregator(exprs.map(_.expr)), Complete, isDistinct = false))
+  }
+
+  // This is also used by udf.register(...) when it detects a UserDefinedAggregator
+  def scalaAggregator(exprs: Seq[Expression]): ScalaAggregator[IN, BUF, OUT] = {
+    val iEncoder = inputEncoder.asInstanceOf[ExpressionEncoder[IN]]
+    ScalaAggregator(exprs, aggregator, iEncoder, nullable, deterministic)
+  }
+
+  override def withName(name: String): UserDefinedAggregator[IN, BUF, OUT] = {
+    copy(name = Option(name))
+  }
+
+  override def asNonNullable(): UserDefinedAggregator[IN, BUF, OUT] = {
+    if (!nullable) {
+      this
+    } else {
+      copy(nullable = false)
+    }
+  }
+
+  override def asNondeterministic(): UserDefinedAggregator[IN, BUF, OUT] = {
+    if (!deterministic) {
+      this
+    } else {
+      copy(deterministic = false)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index cd1c198ddebf0..d13baaedbaeff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.expressions
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.Column
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.{WindowSpec => _, _}
 
 /**
  * Utility functions for defining window in DataFrames.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
index 4e8cb3a6ddd66..8407b1419af62 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
@@ -27,8 +27,12 @@ import org.apache.spark.sql.types._
  * The base class for implementing user-defined aggregate functions (UDAF).
  *
  * @since 1.5.0
+ * @deprecated UserDefinedAggregateFunction is deprecated.
+ * Aggregator[IN, BUF, OUT] should now be registered as a UDF via the functions.udaf(agg) method.
  */
 @Stable
+@deprecated("Aggregator[IN, BUF, OUT] should now be registered as a UDF" +
+  " via the functions.udaf(agg) method.", "3.0.0")
 abstract class UserDefinedAggregateFunction extends Serializable {
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 6b8127bab1cb4..2d5504ac00ffa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -31,13 +31,13 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, ResolvedHint}
+import org.apache.spark.sql.catalyst.util.TimestampFormatter
 import org.apache.spark.sql.execution.SparkSqlParser
-import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedFunction}
+import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
-
 /**
  * Commonly used functions available for DataFrame operations. Using functions defined here provides
  * a little bit more compile-time safety to make sure the function exists.
@@ -69,6 +69,7 @@ import org.apache.spark.util.Utils
  * @groupname window_funcs Window functions
  * @groupname string_funcs String functions
  * @groupname collection_funcs Collection functions
+ * @groupname partition_transforms Partition transform functions
  * @groupname Ungrouped Support functions for DataFrames
  * @since 1.3.0
  */
@@ -272,7 +273,7 @@ object functions {
    * Aggregate function: returns a list of objects with duplicates.
    *
    * @note The function is non-deterministic because the order of collected results depends
-   * on order of rows which may be non-deterministic after a shuffle.
+   * on the order of the rows which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 1.6.0
@@ -283,7 +284,7 @@ object functions {
    * Aggregate function: returns a list of objects with duplicates.
    *
    * @note The function is non-deterministic because the order of collected results depends
-   * on order of rows which may be non-deterministic after a shuffle.
+   * on the order of the rows which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 1.6.0
@@ -294,7 +295,7 @@ object functions {
    * Aggregate function: returns a set of objects with duplicate elements eliminated.
    *
    * @note The function is non-deterministic because the order of collected results depends
-   * on order of rows which may be non-deterministic after a shuffle.
+   * on the order of the rows which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 1.6.0
@@ -305,7 +306,7 @@ object functions {
    * Aggregate function: returns a set of objects with duplicate elements eliminated.
    *
    * @note The function is non-deterministic because the order of collected results depends
-   * on order of rows which may be non-deterministic after a shuffle.
+   * on the order of the rows which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 1.6.0
@@ -423,8 +424,8 @@ object functions {
    * The function by default returns the first values it sees. It will return the first non-null
    * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
    *
-   * @note The function is non-deterministic because its results depends on order of rows which
-   * may be non-deterministic after a shuffle.
+   * @note The function is non-deterministic because its results depends on the order of the rows
+   * which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 2.0.0
@@ -439,8 +440,8 @@ object functions {
    * The function by default returns the first values it sees. It will return the first non-null
    * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
    *
-   * @note The function is non-deterministic because its results depends on order of rows which
-   * may be non-deterministic after a shuffle.
+   * @note The function is non-deterministic because its results depends on the order of the rows
+   * which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 2.0.0
@@ -455,8 +456,8 @@ object functions {
    * The function by default returns the first values it sees. It will return the first non-null
    * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
    *
-   * @note The function is non-deterministic because its results depends on order of rows which
-   * may be non-deterministic after a shuffle.
+   * @note The function is non-deterministic because its results depends on the order of the rows
+   * which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 1.3.0
@@ -469,8 +470,8 @@ object functions {
    * The function by default returns the first values it sees. It will return the first non-null
    * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
    *
-   * @note The function is non-deterministic because its results depends on order of rows which
-   * may be non-deterministic after a shuffle.
+   * @note The function is non-deterministic because its results depends on the order of the rows
+   * which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 1.3.0
@@ -548,8 +549,8 @@ object functions {
    * The function by default returns the last values it sees. It will return the last non-null
    * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
    *
-   * @note The function is non-deterministic because its results depends on order of rows which
-   * may be non-deterministic after a shuffle.
+   * @note The function is non-deterministic because its results depends on the order of the rows
+   * which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 2.0.0
@@ -564,8 +565,8 @@ object functions {
    * The function by default returns the last values it sees. It will return the last non-null
    * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
    *
-   * @note The function is non-deterministic because its results depends on order of rows which
-   * may be non-deterministic after a shuffle.
+   * @note The function is non-deterministic because its results depends on the order of the rows
+   * which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 2.0.0
@@ -580,8 +581,8 @@ object functions {
    * The function by default returns the last values it sees. It will return the last non-null
    * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
    *
-   * @note The function is non-deterministic because its results depends on order of rows which
-   * may be non-deterministic after a shuffle.
+   * @note The function is non-deterministic because its results depends on the order of the rows
+   * which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 1.3.0
@@ -594,8 +595,8 @@ object functions {
    * The function by default returns the last values it sees. It will return the last non-null
    * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
    *
-   * @note The function is non-deterministic because its results depends on order of rows which
-   * may be non-deterministic after a shuffle.
+   * @note The function is non-deterministic because its results depends on the order of the rows
+   * which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
    * @since 1.3.0
@@ -2521,25 +2522,25 @@ object functions {
   }
 
   /**
-   * Overlay the specified portion of `src` with `replaceString`,
-   *  starting from byte position `pos` of `inputString` and proceeding for `len` bytes.
+   * Overlay the specified portion of `src` with `replace`,
+   *  starting from byte position `pos` of `src` and proceeding for `len` bytes.
    *
    * @group string_funcs
    * @since 3.0.0
    */
-  def overlay(src: Column, replaceString: String, pos: Int, len: Int): Column = withExpr {
-    Overlay(src.expr, lit(replaceString).expr, lit(pos).expr, lit(len).expr)
+  def overlay(src: Column, replace: Column, pos: Column, len: Column): Column = withExpr {
+    Overlay(src.expr, replace.expr, pos.expr, len.expr)
   }
 
   /**
-   * Overlay the specified portion of `src` with `replaceString`,
-   *  starting from byte position `pos` of `inputString`.
+   * Overlay the specified portion of `src` with `replace`,
+   *  starting from byte position `pos` of `src`.
    *
    * @group string_funcs
    * @since 3.0.0
    */
-  def overlay(src: Column, replaceString: String, pos: Int): Column = withExpr {
-    new Overlay(src.expr, lit(replaceString).expr, lit(pos).expr)
+  def overlay(src: Column, replace: Column, pos: Column): Column = withExpr {
+    new Overlay(src.expr, replace.expr, pos.expr)
   }
 
   /**
@@ -2634,8 +2635,8 @@ object functions {
    * See [[java.time.format.DateTimeFormatter]] for valid date and time format patterns
    *
    * @param dateExpr A date, timestamp or string. If a string, the data must be in a format that
-   *                 can be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param format A pattern `dd.MM.yyyy` would return a string like `18.03.1993`
+   *                 can be cast to a timestamp, such as `uuuu-MM-dd` or `uuuu-MM-dd HH:mm:ss.SSSS`
+   * @param format A pattern `dd.MM.uuuu` would return a string like `18.03.1993`
    * @return A string, or null if `dateExpr` was a string that could not be cast to a timestamp
    * @note Use specialized functions like [[year]] whenever possible as they benefit from a
    * specialized implementation.
@@ -2881,7 +2882,7 @@ object functions {
    * @since 1.5.0
    */
   def from_unixtime(ut: Column): Column = withExpr {
-    FromUnixTime(ut.expr, Literal("uuuu-MM-dd HH:mm:ss"))
+    FromUnixTime(ut.expr, Literal(TimestampFormatter.defaultPattern))
   }
 
   /**
@@ -2913,7 +2914,7 @@ object functions {
    * @since 1.5.0
    */
   def unix_timestamp(): Column = withExpr {
-    UnixTimestamp(CurrentTimestamp(), Literal("uuuu-MM-dd HH:mm:ss"))
+    UnixTimestamp(CurrentTimestamp(), Literal(TimestampFormatter.defaultPattern))
   }
 
   /**
@@ -2927,7 +2928,7 @@ object functions {
    * @since 1.5.0
    */
   def unix_timestamp(s: Column): Column = withExpr {
-    UnixTimestamp(s.expr, Literal("uuuu-MM-dd HH:mm:ss"))
+    UnixTimestamp(s.expr, Literal(TimestampFormatter.defaultPattern))
   }
 
   /**
@@ -3053,7 +3054,6 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
-  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def from_utc_timestamp(ts: Column, tz: String): Column = withExpr {
     FromUTCTimestamp(ts.expr, Literal(tz))
   }
@@ -3065,7 +3065,6 @@ object functions {
    * @group datetime_funcs
    * @since 2.4.0
    */
-  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def from_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
     FromUTCTimestamp(ts.expr, tz.expr)
   }
@@ -3084,7 +3083,6 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
-  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def to_utc_timestamp(ts: Column, tz: String): Column = withExpr {
     ToUTCTimestamp(ts.expr, Literal(tz))
   }
@@ -3096,7 +3094,6 @@ object functions {
    * @group datetime_funcs
    * @since 2.4.0
    */
-  @deprecated("This function is deprecated and will be removed in future versions.", "3.0.0")
   def to_utc_timestamp(ts: Column, tz: Column): Column = withExpr {
     ToUTCTimestamp(ts.expr, tz.expr)
   }
@@ -3266,6 +3263,11 @@ object functions {
   /**
    * Returns an array containing all the elements in `x` from index `start` (or starting from the
    * end if `start` is negative) with the specified `length`.
+   *
+   * @param x the array column to be sliced
+   * @param start the starting index
+   * @param length the length of the slice
+   *
    * @group collection_funcs
    * @since 2.4.0
    */
@@ -3334,7 +3336,7 @@ object functions {
    * @group collection_funcs
    * @since 2.4.0
    */
-  def array_sort(e: Column): Column = withExpr { ArraySort(e.expr) }
+  def array_sort(e: Column): Column = withExpr { new ArraySort(e.expr) }
 
   /**
    * Remove all elements that equal to element from the given array.
@@ -3385,6 +3387,265 @@ object functions {
     ArrayExcept(col1.expr, col2.expr)
   }
 
+  private def createLambda(f: Column => Column) = {
+    val x = UnresolvedNamedLambdaVariable(Seq("x"))
+    val function = f(Column(x)).expr
+    LambdaFunction(function, Seq(x))
+  }
+
+  private def createLambda(f: (Column, Column) => Column) = {
+    val x = UnresolvedNamedLambdaVariable(Seq("x"))
+    val y = UnresolvedNamedLambdaVariable(Seq("y"))
+    val function = f(Column(x), Column(y)).expr
+    LambdaFunction(function, Seq(x, y))
+  }
+
+  private def createLambda(f: (Column, Column, Column) => Column) = {
+    val x = UnresolvedNamedLambdaVariable(Seq("x"))
+    val y = UnresolvedNamedLambdaVariable(Seq("y"))
+    val z = UnresolvedNamedLambdaVariable(Seq("z"))
+    val function = f(Column(x), Column(y), Column(z)).expr
+    LambdaFunction(function, Seq(x, y, z))
+  }
+
+  /**
+   * Returns an array of elements after applying a transformation to each element
+   * in the input array.
+   * {{{
+   *   df.select(transform(col("i"), x => x + 1))
+   * }}}
+   *
+   * @param column the input array column
+   * @param f col => transformed_col, the lambda function to transform the input column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def transform(column: Column, f: Column => Column): Column = withExpr {
+    ArrayTransform(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns an array of elements after applying a transformation to each element
+   * in the input array.
+   * {{{
+   *   df.select(transform(col("i"), (x, i) => x + i))
+   * }}}
+   *
+   * @param column the input array column
+   * @param f (col, index) => transformed_col, the lambda function to filter the input column
+   *           given the index. Indices start at 0.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def transform(column: Column, f: (Column, Column) => Column): Column = withExpr {
+    ArrayTransform(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns whether a predicate holds for one or more elements in the array.
+   * {{{
+   *   df.select(exists(col("i"), _ % 2 === 0))
+   * }}}
+   *
+   * @param column the input array column
+   * @param f col => predicate, the Boolean predicate to check the input column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def exists(column: Column, f: Column => Column): Column = withExpr {
+    ArrayExists(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns whether a predicate holds for every element in the array.
+   * {{{
+   *   df.select(forall(col("i"), x => x % 2 === 0))
+   * }}}
+   *
+   * @param column the input array column
+   * @param f col => predicate, the Boolean predicate to check the input column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def forall(column: Column, f: Column => Column): Column = withExpr {
+    ArrayForAll(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns an array of elements for which a predicate holds in a given array.
+   * {{{
+   *   df.select(filter(col("s"), x => x % 2 === 0))
+   * }}}
+   *
+   * @param column the input array column
+   * @param f col => predicate, the Boolean predicate to filter the input column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def filter(column: Column, f: Column => Column): Column = withExpr {
+    ArrayFilter(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns an array of elements for which a predicate holds in a given array.
+   * {{{
+   *   df.select(filter(col("s"), (x, i) => i % 2 === 0))
+   * }}}
+   *
+   * @param column the input array column
+   * @param f (col, index) => predicate, the Boolean predicate to filter the input column
+   *           given the index. Indices start at 0.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def filter(column: Column, f: (Column, Column) => Column): Column = withExpr {
+    ArrayFilter(column.expr, createLambda(f))
+  }
+
+  /**
+   * Applies a binary operator to an initial state and all elements in the array,
+   * and reduces this to a single state. The final state is converted into the final result
+   * by applying a finish function.
+   * {{{
+   *   df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x, _ * 10))
+   * }}}
+   *
+   * @param expr the input array column
+   * @param initialValue the initial value
+   * @param merge (combined_value, input_value) => combined_value, the merge function to merge
+   *              an input value to the combined_value
+   * @param finish combined_value => final_value, the lambda function to convert the combined value
+   *               of all inputs to final result
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def aggregate(
+      expr: Column,
+      initialValue: Column,
+      merge: (Column, Column) => Column,
+      finish: Column => Column): Column = withExpr {
+    ArrayAggregate(
+      expr.expr,
+      initialValue.expr,
+      createLambda(merge),
+      createLambda(finish)
+    )
+  }
+
+  /**
+   * Applies a binary operator to an initial state and all elements in the array,
+   * and reduces this to a single state.
+   * {{{
+   *   df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x))
+   * }}}
+   *
+   * @param expr the input array column
+   * @param initialValue the initial value
+   * @param merge (combined_value, input_value) => combined_value, the merge function to merge
+   *              an input value to the combined_value
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def aggregate(expr: Column, initialValue: Column, merge: (Column, Column) => Column): Column =
+    aggregate(expr, initialValue, merge, c => c)
+
+  /**
+   * Merge two given arrays, element-wise, into a single array using a function.
+   * If one array is shorter, nulls are appended at the end to match the length of the longer
+   * array, before applying the function.
+   * {{{
+   *   df.select(zip_with(df1("val1"), df1("val2"), (x, y) => x + y))
+   * }}}
+   *
+   * @param left the left input array column
+   * @param right the right input array column
+   * @param f (lCol, rCol) => col, the lambda function to merge two input columns into one column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def zip_with(left: Column, right: Column, f: (Column, Column) => Column): Column = withExpr {
+    ZipWith(left.expr, right.expr, createLambda(f))
+  }
+
+  /**
+   * Applies a function to every key-value pair in a map and returns
+   * a map with the results of those applications as the new keys for the pairs.
+   * {{{
+   *   df.select(transform_keys(col("i"), (k, v) => k + v))
+   * }}}
+   *
+   * @param expr the input map column
+   * @param f (key, value) => new_key, the lambda function to transform the key of input map column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def transform_keys(expr: Column, f: (Column, Column) => Column): Column = withExpr {
+    TransformKeys(expr.expr, createLambda(f))
+  }
+
+  /**
+   * Applies a function to every key-value pair in a map and returns
+   * a map with the results of those applications as the new values for the pairs.
+   * {{{
+   *   df.select(transform_values(col("i"), (k, v) => k + v))
+   * }}}
+   *
+   * @param expr the input map column
+   * @param f (key, value) => new_value, the lambda function to transform the value of input map
+   *          column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def transform_values(expr: Column, f: (Column, Column) => Column): Column = withExpr {
+    TransformValues(expr.expr, createLambda(f))
+  }
+
+  /**
+   * Returns a map whose key-value pairs satisfy a predicate.
+   * {{{
+   *   df.select(map_filter(col("m"), (k, v) => k * 10 === v))
+   * }}}
+   *
+   * @param expr the input map column
+   * @param f (key, value) => predicate, the Boolean predicate to filter the input map column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def map_filter(expr: Column, f: (Column, Column) => Column): Column = withExpr {
+    MapFilter(expr.expr, createLambda(f))
+  }
+
+  /**
+   * Merge two given maps, key-wise into a single map using a function.
+   * {{{
+   *   df.select(map_zip_with(df("m1"), df("m2"), (k, v1, v2) => k === v1 + v2))
+   * }}}
+   *
+   * @param left the left input map column
+   * @param right the right input map column
+   * @param f (key, value1, value2) => new_value, the lambda function to merge the map values
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def map_zip_with(
+      left: Column,
+      right: Column,
+      f: (Column, Column, Column) => Column): Column = withExpr {
+    MapZipWith(left.expr, right.expr, createLambda(f))
+  }
+
   /**
    * Creates a new row for each element in the given array or map column.
    * Uses the default column name `col` for elements in the array and
@@ -3942,6 +4203,63 @@ object functions {
    */
   def to_csv(e: Column): Column = to_csv(e, Map.empty[String, String].asJava)
 
+  /**
+   * A transform for timestamps and dates to partition data into years.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def years(e: Column): Column = withExpr { Years(e.expr) }
+
+  /**
+   * A transform for timestamps and dates to partition data into months.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def months(e: Column): Column = withExpr { Months(e.expr) }
+
+  /**
+   * A transform for timestamps and dates to partition data into days.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def days(e: Column): Column = withExpr { Days(e.expr) }
+
+  /**
+   * A transform for timestamps to partition data into hours.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def hours(e: Column): Column = withExpr { Hours(e.expr) }
+
+  /**
+   * A transform for any type that partitions by a hash of the input column.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def bucket(numBuckets: Column, e: Column): Column = withExpr {
+    numBuckets.expr match {
+      case lit @ Literal(_, IntegerType) =>
+        Bucket(lit, e.expr)
+      case _ =>
+        throw new AnalysisException(s"Invalid number of buckets: bucket($numBuckets, $e)")
+    }
+  }
+
+  /**
+   * A transform for any type that partitions by a hash of the input column.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def bucket(numBuckets: Int, e: Column): Column = withExpr {
+    Bucket(Literal(numBuckets), e.expr)
+  }
+
   // scalastyle:off line.size.limit
   // scalastyle:off parameter.number
 
@@ -3997,6 +4315,67 @@ object functions {
   // Scala UDF functions
   //////////////////////////////////////////////////////////////////////////////////////////////
 
+  /**
+   * Obtains a `UserDefinedFunction` that wraps the given `Aggregator`
+   * so that it may be used with untyped Data Frames.
+   * {{{
+   *   val agg = // Aggregator[IN, BUF, OUT]
+   *
+   *   // declare a UDF based on agg
+   *   val aggUDF = udaf(agg)
+   *   val aggData = df.agg(aggUDF($"colname"))
+   *
+   *   // register agg as a named function
+   *   spark.udf.register("myAggName", udaf(agg))
+   * }}}
+   *
+   * @tparam IN the aggregator input type
+   * @tparam BUF the aggregating buffer type
+   * @tparam OUT the finalized output type
+   *
+   * @param agg the typed Aggregator
+   *
+   * @return a UserDefinedFunction that can be used as an aggregating expression.
+   *
+   * @note The input encoder is inferred from the input type IN.
+   */
+  def udaf[IN: TypeTag, BUF, OUT](agg: Aggregator[IN, BUF, OUT]): UserDefinedFunction = {
+    udaf(agg, ExpressionEncoder[IN]())
+  }
+
+  /**
+   * Obtains a `UserDefinedFunction` that wraps the given `Aggregator`
+   * so that it may be used with untyped Data Frames.
+   * {{{
+   *   Aggregator<IN, BUF, OUT> agg = // custom Aggregator
+   *   Encoder<IN> enc = // input encoder
+   *
+   *   // declare a UDF based on agg
+   *   UserDefinedFunction aggUDF = udaf(agg, enc)
+   *   DataFrame aggData = df.agg(aggUDF($"colname"))
+   *
+   *   // register agg as a named function
+   *   spark.udf.register("myAggName", udaf(agg, enc))
+   * }}}
+   *
+   * @tparam IN the aggregator input type
+   * @tparam BUF the aggregating buffer type
+   * @tparam OUT the finalized output type
+   *
+   * @param agg the typed Aggregator
+   * @param inputEncoder a specific input encoder to use
+   *
+   * @return a UserDefinedFunction that can be used as an aggregating expression
+   *
+   * @note This overloading takes an explicit input encoder, to support UDAF
+   * declarations in Java.
+   */
+  def udaf[IN, BUF, OUT](
+      agg: Aggregator[IN, BUF, OUT],
+      inputEncoder: Encoder[IN]): UserDefinedFunction = {
+    UserDefinedAggregator(agg, inputEncoder)
+  }
+
   /**
    * Defines a Scala closure of 0 arguments as user-defined function (UDF).
    * The data types are automatically inferred based on the Scala closure's
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index db4885aa01bad..eb658e2d8850e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -19,16 +19,17 @@ package org.apache.spark.sql.internal
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _}
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, ResolveSessionCatalog}
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.{ColumnarRule, QueryExecution, SparkOptimizer, SparkPlanner, SparkSqlParser}
 import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.datasources.v2.TableCapabilityCheck
+import org.apache.spark.sql.execution.datasources.v2.{TableCapabilityCheck, V2SessionCatalog}
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.util.ExecutionListenerManager
 
@@ -151,6 +152,10 @@ abstract class BaseSessionStateBuilder(
     catalog
   }
 
+  protected lazy val v2SessionCatalog = new V2SessionCatalog(catalog, conf)
+
+  protected lazy val catalogManager = new CatalogManager(conf, v2SessionCatalog, catalog)
+
   /**
    * Interface exposed to the user for registering user-defined functions.
    *
@@ -164,12 +169,12 @@ abstract class BaseSessionStateBuilder(
    *
    * Note: this depends on the `conf` and `catalog` fields.
    */
-  protected def analyzer: Analyzer = new Analyzer(catalog, conf) {
+  protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
         new FallBackFileSourceV2(session) +:
-        DataSourceResolution(conf, this.catalogManager) +:
+        new ResolveSessionCatalog(catalogManager, conf, catalog.isView) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
@@ -223,7 +228,7 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on `catalog` and `experimentalMethods` fields.
    */
   protected def optimizer: Optimizer = {
-    new SparkOptimizer(catalog, experimentalMethods) {
+    new SparkOptimizer(catalogManager, catalog, experimentalMethods) {
       override def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] =
         super.extendedOperatorOptimizationRules ++ customOperatorOptimizationRules
     }
@@ -245,7 +250,7 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on the `conf` and `experimentalMethods` fields.
    */
   protected def planner: SparkPlanner = {
-    new SparkPlanner(session.sparkContext, conf, experimentalMethods) {
+    new SparkPlanner(session, conf, experimentalMethods) {
       override def extraPlanningStrategies: Seq[Strategy] =
         super.extraPlanningStrategies ++ customPlanningStrategies
     }
@@ -311,7 +316,7 @@ abstract class BaseSessionStateBuilder(
       () => analyzer,
       () => optimizer,
       planner,
-      streamingQueryManager,
+      () => streamingQueryManager,
       listenerManager,
       () => resourceLoader,
       createQueryExecution,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 3740b56cb9cbb..d3ef03e9b3b74 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -520,7 +520,7 @@ private[sql] object CatalogImpl {
     val encoded = data.map(d => enc.toRow(d).copy())
     val plan = new LocalRelation(enc.schema.toAttributes, encoded)
     val queryExecution = sparkSession.sessionState.executePlan(plan)
-    new Dataset[T](sparkSession, queryExecution, enc)
+    new Dataset[T](queryExecution, enc)
   }
 
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
index 4921e3ca903c4..64b7e7fe7923a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
@@ -65,6 +65,14 @@ object HiveSerDe {
         outputFormat = Option("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat"),
         serde = Option("org.apache.hadoop.hive.serde2.avro.AvroSerDe")))
 
+  // `HiveSerDe` in `serdeMap` should be dintinct.
+  val serdeInverseMap: Map[HiveSerDe, String] = serdeMap.flatMap {
+    case ("sequencefile", _) => None
+    case ("rcfile", _) => None
+    case ("textfile", serde) => Some((serde, "text"))
+    case pair => Some(pair.swap)
+  }
+
   /**
    * Get the Hive SerDe information from the data source abbreviation string or classname.
    *
@@ -88,6 +96,14 @@ object HiveSerDe {
     serdeMap.get(key)
   }
 
+  /**
+   * Get the Spark data source name from the Hive SerDe information.
+   *
+   * @param serde Hive SerDe information.
+   * @return Spark data source name associated with the specified Hive Serde.
+   */
+  def serdeToSource(serde: HiveSerDe): Option[String] = serdeInverseMap.get(serde)
+
   def getDefaultStorage(conf: SQLConf): CatalogStorageFormat = {
     // To respect hive-site.xml, it peeks Hadoop configuration from existing Spark session,
     // as an easy workaround. See SPARK-27555.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index a83a0f51ecf11..abd1250628539 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -24,12 +24,12 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalog.v2.CatalogManager
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.util.{ExecutionListenerManager, QueryExecutionListener}
@@ -49,7 +49,8 @@ import org.apache.spark.sql.util.{ExecutionListenerManager, QueryExecutionListen
  *                        unresolved attributes and relations.
  * @param optimizerBuilder a function to create the logical query plan optimizer.
  * @param planner Planner that converts optimized logical plans to physical plans.
- * @param streamingQueryManager Interface to start and stop streaming queries.
+ * @param streamingQueryManagerBuilder A function to create a streaming query manager to
+ *                                     start and stop streaming queries.
  * @param listenerManager Interface to register custom [[QueryExecutionListener]]s.
  * @param resourceLoaderBuilder a function to create a session shared resource loader to load JARs,
  *                              files, etc.
@@ -67,7 +68,7 @@ private[sql] class SessionState(
     analyzerBuilder: () => Analyzer,
     optimizerBuilder: () => Optimizer,
     val planner: SparkPlanner,
-    val streamingQueryManager: StreamingQueryManager,
+    val streamingQueryManagerBuilder: () => StreamingQueryManager,
     val listenerManager: ExecutionListenerManager,
     resourceLoaderBuilder: () => SessionResourceLoader,
     createQueryExecution: LogicalPlan => QueryExecution,
@@ -83,6 +84,10 @@ private[sql] class SessionState(
 
   lazy val resourceLoader: SessionResourceLoader = resourceLoaderBuilder()
 
+  // The streamingQueryManager is lazy to avoid creating a StreamingQueryManager for each session
+  // when connecting to ThriftServer.
+  lazy val streamingQueryManager: StreamingQueryManager = streamingQueryManagerBuilder()
+
   def catalogManager: CatalogManager = analyzer.catalogManager
 
   def newHadoopConf(): Configuration = SessionState.newHadoopConf(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index f1a648176c3b3..5347264d7c50a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.internal
 
 import java.net.URL
-import java.util.Locale
+import java.util.UUID
+import java.util.concurrent.ConcurrentHashMap
+import javax.annotation.concurrent.GuardedBy
 
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
@@ -31,8 +33,11 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.CacheManager
+import org.apache.spark.sql.execution.streaming.StreamExecution
 import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab}
 import org.apache.spark.sql.internal.StaticSQLConf._
+import org.apache.spark.sql.streaming.StreamingQueryListener
+import org.apache.spark.sql.streaming.ui.{StreamingQueryStatusListener, StreamingQueryTab}
 import org.apache.spark.status.ElementTrackingStore
 import org.apache.spark.util.Utils
 
@@ -48,6 +53,8 @@ private[sql] class SharedState(
     initialConfigs: scala.collection.Map[String, String])
   extends Logging {
 
+  SharedState.setFsUrlStreamHandlerFactory(sparkContext.conf)
+
   // Load hive-site.xml into hadoopConf and determine the warehouse path we want to use, based on
   // the config from both hive and Spark SQL. Finally set the warehouse config value to sparkConf.
   val warehousePath: String = {
@@ -110,6 +117,16 @@ private[sql] class SharedState(
    */
   val cacheManager: CacheManager = new CacheManager
 
+  /** A global lock for all streaming query lifecycle tracking and management. */
+  private[sql] val activeQueriesLock = new Object
+
+  /**
+   * A map of active streaming queries to the session specific StreamingQueryManager that manages
+   * the lifecycle of that stream.
+   */
+  @GuardedBy("activeQueriesLock")
+  private[sql] val activeStreamingQueries = new ConcurrentHashMap[UUID, StreamExecution]()
+
   /**
    * A status store to query SQL status/metrics of this Spark application, based on SQL-specific
    * [[org.apache.spark.scheduler.SparkListenerEvent]]s.
@@ -123,6 +140,22 @@ private[sql] class SharedState(
     statusStore
   }
 
+  /**
+   * A [[StreamingQueryListener]] for structured streaming ui, it contains all streaming query ui
+   * data to show.
+   */
+  lazy val streamingQueryStatusListener: Option[StreamingQueryStatusListener] = {
+    sparkContext.ui.flatMap { ui =>
+      if (conf.get(STREAMING_UI_ENABLED)) {
+        val statusListener = new StreamingQueryStatusListener(conf)
+        new StreamingQueryTab(statusListener, ui)
+        Some(statusListener)
+      } else {
+        None
+      }
+    }
+  }
+
   /**
    * A catalog that interacts with external systems.
    */
@@ -177,11 +210,23 @@ private[sql] class SharedState(
 }
 
 object SharedState extends Logging {
-  try {
-    URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory())
-  } catch {
-    case e: Error =>
-      logWarning("URL.setURLStreamHandlerFactory failed to set FsUrlStreamHandlerFactory")
+  @volatile private var fsUrlStreamHandlerFactoryInitialized = false
+
+  private def setFsUrlStreamHandlerFactory(conf: SparkConf): Unit = {
+    if (!fsUrlStreamHandlerFactoryInitialized &&
+        conf.get(DEFAULT_URL_STREAM_HANDLER_FACTORY_ENABLED)) {
+      synchronized {
+        if (!fsUrlStreamHandlerFactoryInitialized) {
+          try {
+            URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory())
+            fsUrlStreamHandlerFactoryInitialized = true
+          } catch {
+            case NonFatal(_) =>
+              logWarning("URL.setURLStreamHandlerFactory failed to set FsUrlStreamHandlerFactory")
+          }
+        }
+      }
+    }
   }
 
   private val HIVE_EXTERNAL_CATALOG_CLASS_NAME = "org.apache.spark.sql.hive.HiveExternalCatalog"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index d160ad82888a2..ab574df4557a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -18,12 +18,14 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.Types
+import java.util.Locale
 
 import org.apache.spark.sql.types._
 
 private object DB2Dialect extends JdbcDialect {
 
-  override def canHandle(url: String): Boolean = url.startsWith("jdbc:db2")
+  override def canHandle(url: String): Boolean =
+    url.toLowerCase(Locale.ROOT).startsWith("jdbc:db2")
 
   override def getCatalystType(
       sqlType: Int,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
index d13c29ed46bd5..d528d5a9fef5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
@@ -18,13 +18,15 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.Types
+import java.util.Locale
 
 import org.apache.spark.sql.types._
 
 
 private object DerbyDialect extends JdbcDialect {
 
-  override def canHandle(url: String): Boolean = url.startsWith("jdbc:derby")
+  override def canHandle(url: String): Boolean =
+    url.toLowerCase(Locale.ROOT).startsWith("jdbc:derby")
 
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 805f73dee141b..72284b5996201 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -17,12 +17,16 @@
 
 package org.apache.spark.sql.jdbc
 
+import java.util.Locale
+
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
 private object MsSqlServerDialect extends JdbcDialect {
 
-  override def canHandle(url: String): Boolean = url.startsWith("jdbc:sqlserver")
+  override def canHandle(url: String): Boolean =
+    url.toLowerCase(Locale.ROOT).startsWith("jdbc:sqlserver")
 
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
@@ -30,10 +34,14 @@ private object MsSqlServerDialect extends JdbcDialect {
       // String is recommend by Microsoft SQL Server for datetimeoffset types in non-MS clients
       Option(StringType)
     } else {
-      sqlType match {
-        case java.sql.Types.SMALLINT => Some(ShortType)
-        case java.sql.Types.REAL => Some(FloatType)
-        case _ => None
+      if (SQLConf.get.legacyMsSqlServerNumericMappingEnabled) {
+        None
+      } else {
+        sqlType match {
+          case java.sql.Types.SMALLINT => Some(ShortType)
+          case java.sql.Types.REAL => Some(FloatType)
+          case _ => None
+        }
       }
     }
   }
@@ -43,7 +51,8 @@ private object MsSqlServerDialect extends JdbcDialect {
     case StringType => Some(JdbcType("NVARCHAR(MAX)", java.sql.Types.NVARCHAR))
     case BooleanType => Some(JdbcType("BIT", java.sql.Types.BIT))
     case BinaryType => Some(JdbcType("VARBINARY(MAX)", java.sql.Types.VARBINARY))
-    case ShortType => Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
+    case ShortType if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled =>
+      Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
     case _ => None
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index b2cff7877d8b5..24b31b14d9427 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -18,12 +18,14 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.Types
+import java.util.Locale
 
 import org.apache.spark.sql.types.{BooleanType, DataType, LongType, MetadataBuilder}
 
 private case object MySQLDialect extends JdbcDialect {
 
-  override def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql")
+  override def canHandle(url : String): Boolean =
+    url.toLowerCase(Locale.ROOT).startsWith("jdbc:mysql")
 
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index f4a6d0a4d2e44..4c0623729e00d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.{Date, Timestamp, Types}
-import java.util.TimeZone
+import java.util.{Locale, TimeZone}
 
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
@@ -30,7 +30,8 @@ private case object OracleDialect extends JdbcDialect {
   private[jdbc] val BINARY_DOUBLE = 101
   private[jdbc] val TIMESTAMPTZ = -101
 
-  override def canHandle(url: String): Boolean = url.startsWith("jdbc:oracle")
+  override def canHandle(url: String): Boolean =
+    url.toLowerCase(Locale.ROOT).startsWith("jdbc:oracle")
 
   private def supportTimeZoneTypes: Boolean = {
     val timeZone = DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index 2645e4c9d528b..c8d8a3392128e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.jdbc
 
 import java.sql.{Connection, Types}
+import java.util.Locale
 
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.types._
@@ -25,7 +26,8 @@ import org.apache.spark.sql.types._
 
 private object PostgresDialect extends JdbcDialect {
 
-  override def canHandle(url: String): Boolean = url.startsWith("jdbc:postgresql")
+  override def canHandle(url: String): Boolean =
+    url.toLowerCase(Locale.ROOT).startsWith("jdbc:postgresql")
 
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
index 6c17bd7ed9ec4..552d7a484f3fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
@@ -17,14 +17,15 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.Types
+import java.util.Locale
 
 import org.apache.spark.sql.types._
 
 
 private case object TeradataDialect extends JdbcDialect {
 
-  override def canHandle(url: String): Boolean = { url.startsWith("jdbc:teradata") }
+  override def canHandle(url: String): Boolean =
+    url.toLowerCase(Locale.ROOT).startsWith("jdbc:teradata")
 
   override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
     case StringType => Some(JdbcType("VARCHAR(255)", java.sql.Types.VARCHAR))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 23a84cbd0dc02..0eb4776988d9f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -24,13 +24,13 @@ import scala.collection.JavaConverters._
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.connector.catalog.{SupportsRead, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Utils, FileDataSourceV2}
 import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2}
 import org.apache.spark.sql.sources.StreamSourceProvider
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -83,9 +83,6 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV datasources or partition values.</li>
-   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
-   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
-   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 2.0.0
@@ -123,9 +120,6 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV data sources or partition values.</li>
-   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
-   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
-   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 2.0.0
@@ -142,9 +136,6 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <ul>
    * <li>`timeZone` (default session local timezone): sets the string that indicates a timezone
    * to be used to parse timestamps in the JSON/CSV data sources or partition values.</li>
-   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
-   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
-   * It does not change the behavior of partition discovery.</li>
    * </ul>
    *
    * @since 2.0.0
@@ -182,15 +173,13 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
       case _ => None
     }
     ds match {
-      case provider: TableProvider =>
+      // file source v2 does not support streaming yet.
+      case provider: TableProvider if !provider.isInstanceOf[FileDataSourceV2] =>
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
           source = provider, conf = sparkSession.sessionState.conf)
         val options = sessionOptions ++ extraOptions
         val dsOptions = new CaseInsensitiveStringMap(options.asJava)
-        val table = userSpecifiedSchema match {
-          case Some(schema) => provider.getTable(dsOptions, schema)
-          case _ => provider.getTable(dsOptions)
-        }
+        val table = DataSourceV2Utils.getTableFromProvider(provider, dsOptions, userSpecifiedSchema)
         import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
         table match {
           case _: SupportsRead if table.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ) =>
@@ -277,6 +266,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * empty array/struct during schema inference.</li>
    * <li>`locale` (default is `en-US`): sets a locale as language tag in IETF BCP 47 format.
    * For instance, this is used while parsing dates and timestamps.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
    * </ul>
    *
    * @since 2.0.0
@@ -357,6 +351,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * For instance, this is used while parsing dates and timestamps.</li>
    * <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
    * that should be used for parsing. Maximum length is 1 character.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
    * </ul>
    *
    * @since 2.0.0
@@ -370,6 +369,14 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * <ul>
    * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
    * considered in every trigger.</li>
+   * <li>`mergeSchema` (default is the value specified in `spark.sql.orc.mergeSchema`): sets whether
+   * we should merge schemas collected from all ORC part-files. This will override
+   * `spark.sql.orc.mergeSchema`.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
    * </ul>
    *
    * @since 2.3.0
@@ -389,6 +396,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * whether we should merge schemas collected from all
    * Parquet part-files. This will override
    * `spark.sql.parquet.mergeSchema`.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
    * </ul>
    *
    * @since 2.0.0
@@ -419,6 +431,11 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * </li>
    * <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
    * that should be used for parsing.</li>
+   * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
+   * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
+   * It does not change the behavior of partition discovery.</li>
+   * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
+   * disables partition discovery</li>
    * </ul>
    *
    * @since 2.0.0
@@ -442,15 +459,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    *   spark.readStream().textFile("/path/to/spark/README.md")
    * }}}
    *
-   * You can set the following text-specific options to deal with text files:
-   * <ul>
-   * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
-   * considered in every trigger.</li>
-   * <li>`wholetext` (default `false`): If true, read a file as a single row and not split by "\n".
-   * </li>
-   * <li>`lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
-   * that should be used for parsing.</li>
-   * </ul>
+   * You can set the text-specific options as specified in `DataStreamReader.text`.
    *
    * @param path input path
    * @since 2.1.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 36104d7a70443..1c21a30dd5bd6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.streaming
 
 import java.util.Locale
+import java.util.concurrent.TimeoutException
 
 import scala.collection.JavaConverters._
 
@@ -25,13 +26,13 @@ import org.apache.spark.annotation.Evolving
 import org.apache.spark.api.java.function.VoidFunction2
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Utils, FileDataSourceV2}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources._
-import org.apache.spark.sql.sources.v2.{SupportsWrite, TableProvider}
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
@@ -238,10 +239,18 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
   /**
    * Starts the execution of the streaming query, which will continually output results to the given
    * path as new data arrives. The returned [[StreamingQuery]] object can be used to interact with
-   * the stream.
+   * the stream. Throws a `TimeoutException` if the following conditions are met:
+   *  - Another run of the same streaming query, that is a streaming query
+   *    sharing the same checkpoint location, is already active on the same
+   *    Spark Driver
+   *  - The SQL configuration `spark.sql.streaming.stopActiveRunOnRestart`
+   *    is enabled
+   *  - The active run cannot be stopped within the timeout controlled by
+   *    the SQL configuration `spark.sql.streaming.stopTimeout`
    *
    * @since 2.0.0
    */
+  @throws[TimeoutException]
   def start(): StreamingQuery = {
     if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
       throw new AnalysisException("Hive data source can only be used with tables, you can not " +
@@ -299,7 +308,9 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
     } else {
       val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
       val disabledSources = df.sparkSession.sqlContext.conf.disabledV2StreamingWriters.split(",")
-      val useV1Source = disabledSources.contains(cls.getCanonicalName)
+      val useV1Source = disabledSources.contains(cls.getCanonicalName) ||
+        // file source v2 does not support streaming yet.
+        classOf[FileDataSourceV2].isAssignableFrom(cls)
 
       val sink = if (classOf[TableProvider].isAssignableFrom(cls) && !useV1Source) {
         val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
@@ -307,8 +318,10 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
           source = provider, conf = df.sparkSession.sessionState.conf)
         val options = sessionOptions ++ extraOptions
         val dsOptions = new CaseInsensitiveStringMap(options.asJava)
+        val table = DataSourceV2Utils.getTableFromProvider(
+          provider, dsOptions, userSpecifiedSchema = None)
         import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
-        provider.getTable(dsOptions) match {
+        table match {
           case table: SupportsWrite if table.supports(STREAMING_WRITE) =>
             table
           case _ => createV1Sink()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
index ab68eba81b843..af08a53e465b3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
@@ -93,7 +93,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalGroupState
  *      any trigger and timeout function call will not occur until there is data.
  *    - Since the processing time timeout is based on the clock time, it is affected by the
  *      variations in the system clock (i.e. time zone changes, clock skew, etc.).
- *  - With `EventTimeTimeout`, the user also has to specify the the the event time watermark in
+ *  - With `EventTimeTimeout`, the user also has to specify the event time watermark in
  *    the query using `Dataset.withWatermark()`. With this setting, data that is older than the
  *    watermark are filtered out. The timeout can be set for a group by setting a timeout timestamp
  *    using`GroupState.setTimeoutTimestamp()`, and the timeout would occur when the watermark
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 47ddc88e964e8..85d980e5d6733 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.streaming
 
 import java.util.UUID
+import java.util.concurrent.TimeoutException
 
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.sql.SparkSession
@@ -142,10 +143,17 @@ trait StreamingQuery {
   def processAllAvailable(): Unit
 
   /**
-   * Stops the execution of this query if it is running. This method blocks until the threads
-   * performing execution has stopped.
+   * Stops the execution of this query if it is running. This waits until the termination of the
+   * query execution threads or until a timeout is hit.
+   *
+   * By default stop will block indefinitely. You can configure a timeout by the configuration
+   * `spark.sql.streaming.stopTimeout`. A timeout of 0 (or negative) milliseconds will block
+   * indefinitely. If a `TimeoutException` is thrown, users can retry stopping the stream. If the
+   * issue persists, it is advisable to kill the Spark application.
+   *
    * @since 2.0.0
    */
+  @throws[TimeoutException]
   def stop(): Unit
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 916d6a0365965..dd842cd1a3e99 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -79,16 +79,18 @@ object StreamingQueryListener {
 
   /**
    * Event representing the start of a query
-   * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
+   * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`.
    * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
    * @param name User-specified name of the query, null if not specified.
+   * @param submissionTime The timestamp to start a query.
    * @since 2.1.0
    */
   @Evolving
   class QueryStartedEvent private[sql](
       val id: UUID,
       val runId: UUID,
-      val name: String) extends Event
+      val name: String,
+      val submissionTime: Long) extends Event
 
   /**
    * Event representing any progress updates in a query.
@@ -101,7 +103,7 @@ object StreamingQueryListener {
   /**
    * Event representing that termination of a query.
    *
-   * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
+   * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`.
    * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
    * @param exception The exception message of the query if the query was terminated
    *                  with an exception. Otherwise, it will be `None`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 976595616bd28..4d0d8ffd959c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.sql.streaming
 
-import java.util.UUID
-import java.util.concurrent.TimeUnit
+import java.util.{ConcurrentModificationException, UUID}
+import java.util.concurrent.{TimeoutException, TimeUnit}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
@@ -29,14 +29,15 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.STREAMING_QUERY_LISTENERS
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
 import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
@@ -51,9 +52,10 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
     StateStoreCoordinatorRef.forDriver(sparkSession.sparkContext.env)
   private val listenerBus = new StreamingQueryListenerBus(sparkSession.sparkContext.listenerBus)
 
-  @GuardedBy("activeQueriesLock")
+  @GuardedBy("activeQueriesSharedLock")
   private val activeQueries = new mutable.HashMap[UUID, StreamingQuery]
-  private val activeQueriesLock = new Object
+  // A global lock to keep track of active streaming queries across Spark sessions
+  private val activeQueriesSharedLock = sparkSession.sharedState.activeQueriesLock
   private val awaitTerminationLock = new Object
 
   @GuardedBy("awaitTerminationLock")
@@ -67,6 +69,9 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
         logInfo(s"Registered listener ${listener.getClass.getName}")
       })
     }
+    sparkSession.sharedState.streamingQueryStatusListener.foreach { listener =>
+      addListener(listener)
+    }
   } catch {
     case e: Exception =>
       throw new SparkException("Exception when registering StreamingQueryListener", e)
@@ -77,7 +82,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
    *
    * @since 2.0.0
    */
-  def active: Array[StreamingQuery] = activeQueriesLock.synchronized {
+  def active: Array[StreamingQuery] = activeQueriesSharedLock.synchronized {
     activeQueries.values.toArray
   }
 
@@ -86,7 +91,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
    *
    * @since 2.1.0
    */
-  def get(id: UUID): StreamingQuery = activeQueriesLock.synchronized {
+  def get(id: UUID): StreamingQuery = activeQueriesSharedLock.synchronized {
     activeQueries.get(id).orNull
   }
 
@@ -320,6 +325,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
    * @param trigger [[Trigger]] for the query.
    * @param triggerClock [[Clock]] to use for the triggering.
    */
+  @throws[TimeoutException]
   private[sql] def startQuery(
       userSpecifiedName: Option[String],
       userSpecifiedCheckpointLocation: Option[String],
@@ -343,25 +349,61 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
       trigger,
       triggerClock)
 
-    activeQueriesLock.synchronized {
+    // The following code block checks if a stream with the same name or id is running. Then it
+    // returns an Option of an already active stream to stop outside of the lock
+    // to avoid a deadlock.
+    val activeRunOpt = activeQueriesSharedLock.synchronized {
       // Make sure no other query with same name is active
       userSpecifiedName.foreach { name =>
         if (activeQueries.values.exists(_.name == name)) {
-          throw new IllegalArgumentException(
-            s"Cannot start query with name $name as a query with that name is already active")
+          throw new IllegalArgumentException(s"Cannot start query with name $name as a query " +
+            s"with that name is already active in this SparkSession")
         }
       }
 
-      // Make sure no other query with same id is active
-      if (activeQueries.values.exists(_.id == query.id)) {
-        throw new IllegalStateException(
-          s"Cannot start query with id ${query.id} as another query with same id is " +
-            s"already active. Perhaps you are attempting to restart a query from checkpoint " +
-            s"that is already active.")
+      // Make sure no other query with same id is active across all sessions
+      val activeOption = Option(sparkSession.sharedState.activeStreamingQueries.get(query.id))
+        .orElse(activeQueries.get(query.id)) // shouldn't be needed but paranoia ...
+
+      val shouldStopActiveRun =
+        sparkSession.sessionState.conf.getConf(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART)
+      if (activeOption.isDefined) {
+        if (shouldStopActiveRun) {
+          val oldQuery = activeOption.get
+          logWarning(s"Stopping existing streaming query [id=${query.id}, " +
+            s"runId=${oldQuery.runId}], as a new run is being started.")
+          Some(oldQuery)
+        } else {
+          throw new IllegalStateException(
+            s"Cannot start query with id ${query.id} as another query with same id is " +
+              s"already active. Perhaps you are attempting to restart a query from checkpoint " +
+              s"that is already active. You may stop the old query by setting the SQL " +
+              "configuration: " +
+              s"""spark.conf.set("${SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART.key}", true) """ +
+              "and retry.")
+        }
+      } else {
+        // nothing to stop so, no-op
+        None
       }
+    }
 
+    // stop() will clear the queryId from activeStreamingQueries as well as activeQueries
+    activeRunOpt.foreach(_.stop())
+
+    activeQueriesSharedLock.synchronized {
+      // We still can have a race condition when two concurrent instances try to start the same
+      // stream, while a third one was already active and stopped above. In this case, we throw a
+      // ConcurrentModificationException.
+      val oldActiveQuery = sparkSession.sharedState.activeStreamingQueries.put(
+        query.id, query.streamingQuery) // we need to put the StreamExecution, not the wrapper
+      if (oldActiveQuery != null) {
+        throw new ConcurrentModificationException(
+          "Another instance of this query was just started by a concurrent session.")
+      }
       activeQueries.put(query.id, query)
     }
+
     try {
       // When starting a query, it will call `StreamingQueryListener.onQueryStarted` synchronously.
       // As it's provided by the user and can run arbitrary codes, we must not hold any lock here.
@@ -370,9 +412,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
       query.streamingQuery.start()
     } catch {
       case e: Throwable =>
-        activeQueriesLock.synchronized {
-          activeQueries -= query.id
-        }
+        unregisterTerminatedStream(query)
         throw e
     }
     query
@@ -380,9 +420,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
 
   /** Notify (by the StreamingQuery) that the query has been terminated */
   private[sql] def notifyQueryTermination(terminatedQuery: StreamingQuery): Unit = {
-    activeQueriesLock.synchronized {
-      activeQueries -= terminatedQuery.id
-    }
+    unregisterTerminatedStream(terminatedQuery)
     awaitTerminationLock.synchronized {
       if (lastTerminatedQuery == null || terminatedQuery.exception.nonEmpty) {
         lastTerminatedQuery = terminatedQuery
@@ -391,4 +429,13 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
     }
     stateStoreCoordinator.deactivateInstances(terminatedQuery.runId)
   }
+
+  private def unregisterTerminatedStream(terminatedQuery: StreamingQuery): Unit = {
+    activeQueriesSharedLock.synchronized {
+      // remove from shared state only if the streaming execution also matches
+      sparkSession.sharedState.activeStreamingQueries.remove(
+        terminatedQuery.id, terminatedQuery)
+      activeQueries -= terminatedQuery.id
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 0b3945cbd1323..13b506b60a126 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -24,12 +24,15 @@ import java.util.UUID
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
 import org.json4s._
 import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.annotation.Evolving
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 import org.apache.spark.sql.streaming.SinkProgress.DEFAULT_NUM_OUTPUT_ROWS
 
 /**
@@ -74,7 +77,7 @@ class StateOperatorProgress private[sql](
  * a trigger. Each event relates to processing done for a single trigger of the streaming
  * query. Events are emitted even when no new data is available to be processed.
  *
- * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
+ * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`.
  * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
  * @param name User-specified name of the query, null if not specified.
  * @param timestamp Beginning time of the trigger in ISO8601 format, i.e. UTC timestamps.
@@ -82,6 +85,7 @@ class StateOperatorProgress private[sql](
  *                case of retries after a failure a given batchId my be executed more than once.
  *                Similarly, when there is no data to be processed, the batchId will not be
  *                incremented.
+ * @param batchDuration The process duration of each batch.
  * @param durationMs The amount of time taken to perform various operations in milliseconds.
  * @param eventTime Statistics of event time seen in this batch. It may contain the following keys:
  *                 {{{
@@ -102,11 +106,14 @@ class StreamingQueryProgress private[sql](
   val name: String,
   val timestamp: String,
   val batchId: Long,
+  val batchDuration: Long,
   val durationMs: ju.Map[String, JLong],
   val eventTime: ju.Map[String, String],
   val stateOperators: Array[StateOperatorProgress],
   val sources: Array[SourceProgress],
-  val sink: SinkProgress) extends Serializable {
+  val sink: SinkProgress,
+  @JsonDeserialize(contentAs = classOf[GenericRowWithSchema])
+  val observedMetrics: ju.Map[String, Row]) extends Serializable {
 
   /** The aggregate (across all sources) number of records processed in a trigger. */
   def numInputRows: Long = sources.map(_.numInputRows).sum
@@ -149,7 +156,8 @@ class StreamingQueryProgress private[sql](
     ("eventTime" -> safeMapToJValue[String](eventTime, s => JString(s))) ~
     ("stateOperators" -> JArray(stateOperators.map(_.jsonValue).toList)) ~
     ("sources" -> JArray(sources.map(_.jsonValue).toList)) ~
-    ("sink" -> sink.jsonValue)
+    ("sink" -> sink.jsonValue) ~
+    ("observedMetrics" -> safeMapToJValue[Row](observedMetrics, row => row.jsonValue))
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
new file mode 100644
index 0000000000000..650f64fe1688c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.ui
+
+import java.text.SimpleDateFormat
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.Node
+
+import org.apache.commons.lang3.StringEscapeUtils
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone
+import org.apache.spark.sql.streaming.ui.UIUtils._
+import org.apache.spark.ui.{UIUtils => SparkUIUtils, WebUIPage}
+
+private[ui] class StreamingQueryPage(parent: StreamingQueryTab)
+    extends WebUIPage("") with Logging {
+  val df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
+  df.setTimeZone(getTimeZone("UTC"))
+
+  override def render(request: HttpServletRequest): Seq[Node] = {
+    val content = generateStreamingQueryTable(request)
+    SparkUIUtils.headerSparkPage(request, "Streaming Query", content, parent)
+  }
+
+  def generateDataRow(request: HttpServletRequest, queryActive: Boolean)
+    (query: StreamingQueryUIData): Seq[Node] = {
+
+    def details(detail: Any): Seq[Node] = {
+      if (queryActive) {
+        return Seq.empty[Node]
+      }
+      val detailString = detail.asInstanceOf[String]
+      val isMultiline = detailString.indexOf('\n') >= 0
+      val summary = StringEscapeUtils.escapeHtml4(
+        if (isMultiline) detailString.substring(0, detailString.indexOf('\n')) else detailString
+      )
+      val details = SparkUIUtils.detailsUINode(isMultiline, detailString)
+      <td>{summary}{details}</td>
+    }
+
+    val statisticsLink = "%s/%s/statistics?id=%s"
+      .format(SparkUIUtils.prependBaseUri(request, parent.basePath), parent.prefix, query.runId)
+
+    val name = UIUtils.getQueryName(query)
+    val status = UIUtils.getQueryStatus(query)
+    val duration = if (queryActive) {
+      SparkUIUtils.formatDurationVerbose(System.currentTimeMillis() - query.submissionTime)
+    } else {
+      withNoProgress(query, {
+        val endTimeMs = query.lastProgress.timestamp
+        SparkUIUtils.formatDurationVerbose(df.parse(endTimeMs).getTime - query.submissionTime)
+      }, "-")
+    }
+
+    <tr>
+      <td> {name} </td>
+      <td> {status} </td>
+      <td> {query.id} </td>
+      <td> <a href={statisticsLink}> {query.runId} </a> </td>
+      <td> {SparkUIUtils.formatDate(query.submissionTime)} </td>
+      <td> {duration} </td>
+      <td> {withNoProgress(query, {
+        (query.recentProgress.map(p => withNumberInvalid(p.inputRowsPerSecond)).sum /
+          query.recentProgress.length).formatted("%.2f") }, "NaN")}
+      </td>
+      <td> {withNoProgress(query, {
+        (query.recentProgress.map(p => withNumberInvalid(p.processedRowsPerSecond)).sum /
+          query.recentProgress.length).formatted("%.2f") }, "NaN")}
+      </td>
+      <td> {withNoProgress(query, { query.lastProgress.batchId }, "NaN")} </td>
+      {details(query.exception.getOrElse("-"))}
+    </tr>
+  }
+
+  private def generateStreamingQueryTable(request: HttpServletRequest): Seq[Node] = {
+    val (activeQueries, inactiveQueries) = parent.statusListener.allQueryStatus
+      .partition(_.isActive)
+    val activeQueryTables = if (activeQueries.nonEmpty) {
+      val headerRow = Seq(
+        "Name", "Status", "Id", "Run ID", "Submitted Time", "Duration", "Avg Input /sec",
+        "Avg Process /sec", "Lastest Batch")
+
+      Some(SparkUIUtils.listingTable(headerRow, generateDataRow(request, queryActive = true),
+        activeQueries, true, None, Seq(null), false))
+    } else {
+      None
+    }
+
+    val inactiveQueryTables = if (inactiveQueries.nonEmpty) {
+      val headerRow = Seq(
+        "Name", "Status", "Id", "Run ID", "Submitted Time", "Duration", "Avg Input /sec",
+        "Avg Process /sec", "Lastest Batch", "Error")
+
+      Some(SparkUIUtils.listingTable(headerRow, generateDataRow(request, queryActive = false),
+        inactiveQueries, true, None, Seq(null), false))
+    } else {
+      None
+    }
+
+    // scalastyle:off
+    val content =
+      <span id="completed" class="collapse-aggregated-activeQueries collapse-table"
+            onClick="collapseTable('collapse-aggregated-activeQueries','aggregated-activeQueries')">
+        <h5 id="activequeries">
+          <span class="collapse-table-arrow arrow-open"></span>
+          <a>Active Streaming Queries ({activeQueries.length})</a>
+        </h5>
+      </span> ++
+      <div>
+        <ul class="aggregated-activeQueries collapsible-table">
+          {activeQueryTables.getOrElse(Seq.empty[Node])}
+        </ul>
+      </div> ++
+      <span id="completed" class="collapse-aggregated-completedQueries collapse-table"
+            onClick="collapseTable('collapse-aggregated-completedQueries','aggregated-completedQueries')">
+        <h5 id="completedqueries">
+          <span class="collapse-table-arrow arrow-open"></span>
+          <a>Completed Streaming Queries ({inactiveQueries.length})</a>
+        </h5>
+      </span> ++
+      <div>
+        <ul class="aggregated-completedQueries collapsible-table">
+          {inactiveQueryTables.getOrElse(Seq.empty[Node])}
+        </ul>
+      </div>
+    // scalastyle:on
+
+    content
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
new file mode 100644
index 0000000000000..56672ce328bff
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.ui
+
+import java.{util => ju}
+import java.lang.{Long => JLong}
+import java.text.SimpleDateFormat
+import java.util.UUID
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.{Node, Unparsed}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone
+import org.apache.spark.sql.streaming.ui.UIUtils._
+import org.apache.spark.ui.{GraphUIData, JsCollector, UIUtils => SparkUIUtils, WebUIPage}
+
+private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
+  extends WebUIPage("statistics") with Logging {
+  val df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'")
+  df.setTimeZone(getTimeZone("UTC"))
+
+  def generateLoadResources(request: HttpServletRequest): Seq[Node] = {
+    // scalastyle:off
+    <script src={SparkUIUtils.prependBaseUri(request, "/static/d3.min.js")}></script>
+        <link rel="stylesheet" href={SparkUIUtils.prependBaseUri(request, "/static/streaming-page.css")} type="text/css"/>
+      <script src={SparkUIUtils.prependBaseUri(request, "/static/streaming-page.js")}></script>
+      <script src={SparkUIUtils.prependBaseUri(request, "/static/structured-streaming-page.js")}></script>
+    // scalastyle:on
+  }
+
+  override def render(request: HttpServletRequest): Seq[Node] = {
+    val parameterId = request.getParameter("id")
+    require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
+
+    val query = parent.statusListener.allQueryStatus.find { case q =>
+      q.runId.equals(UUID.fromString(parameterId))
+    }.getOrElse(throw new IllegalArgumentException(s"Failed to find streaming query $parameterId"))
+
+    val resources = generateLoadResources(request)
+    val basicInfo = generateBasicInfo(query)
+    val content =
+      resources ++
+        basicInfo ++
+        generateStatTable(query)
+    SparkUIUtils.headerSparkPage(request, "Streaming Query Statistics", content, parent)
+  }
+
+  def generateTimeMap(times: Seq[Long]): Seq[Node] = {
+    val js = "var timeFormat = {};\n" + times.map { time =>
+      val formattedTime = SparkUIUtils.formatBatchTime(time, 1, showYYYYMMSS = false)
+      s"timeFormat[$time] = '$formattedTime';"
+    }.mkString("\n")
+
+    <script>{Unparsed(js)}</script>
+  }
+
+  def generateVar(values: Array[(Long, ju.Map[String, JLong])]): Seq[Node] = {
+    val durationDataPadding = SparkUIUtils.durationDataPadding(values)
+    val js = "var timeToValues = {};\n" + durationDataPadding.map { case (x, y) =>
+      val s = y.toSeq.sortBy(_._1).map(e => s""""${e._2}"""").mkString("[", ",", "]")
+      s"""timeToValues["${SparkUIUtils.formatBatchTime(x, 1, showYYYYMMSS = false)}"] = $s;"""
+    }.mkString("\n")
+
+    <script>{Unparsed(js)}</script>
+  }
+
+  def generateBasicInfo(query: StreamingQueryUIData): Seq[Node] = {
+    val duration = if (query.isActive) {
+      SparkUIUtils.formatDurationVerbose(System.currentTimeMillis() - query.submissionTime)
+    } else {
+      withNoProgress(query, {
+        val end = query.lastProgress.timestamp
+        val start = query.recentProgress.head.timestamp
+        SparkUIUtils.formatDurationVerbose(
+          df.parse(end).getTime - df.parse(start).getTime)
+      }, "-")
+    }
+
+    val name = UIUtils.getQueryName(query)
+    val numBatches = withNoProgress(query, { query.lastProgress.batchId + 1L }, 0)
+    <div>Running batches for
+      <strong>
+        {duration}
+      </strong>
+      since
+      <strong>
+        {SparkUIUtils.formatDate(query.submissionTime)}
+      </strong>
+      (<strong>{numBatches}</strong> completed batches)
+    </div>
+    <br />
+    <div><strong>Name: </strong>{name}</div>
+    <div><strong>Id: </strong>{query.id}</div>
+    <div><strong>RunId: </strong>{query.runId}</div>
+    <br />
+  }
+
+  def generateStatTable(query: StreamingQueryUIData): Seq[Node] = {
+    val batchTimes = withNoProgress(query,
+      query.recentProgress.map(p => df.parse(p.timestamp).getTime), Array.empty[Long])
+    val minBatchTime =
+      withNoProgress(query, df.parse(query.recentProgress.head.timestamp).getTime, 0L)
+    val maxBatchTime =
+      withNoProgress(query, df.parse(query.lastProgress.timestamp).getTime, 0L)
+    val maxRecordRate =
+      withNoProgress(query, query.recentProgress.map(_.inputRowsPerSecond).max, 0L)
+    val minRecordRate = 0L
+    val maxProcessRate =
+      withNoProgress(query, query.recentProgress.map(_.processedRowsPerSecond).max, 0L)
+
+    val minProcessRate = 0L
+    val maxRows = withNoProgress(query, query.recentProgress.map(_.numInputRows).max, 0L)
+    val minRows = 0L
+    val maxBatchDuration = withNoProgress(query, query.recentProgress.map(_.batchDuration).max, 0L)
+    val minBatchDuration = 0L
+
+    val inputRateData = withNoProgress(query,
+      query.recentProgress.map(p => (df.parse(p.timestamp).getTime,
+        withNumberInvalid { p.inputRowsPerSecond })), Array.empty[(Long, Double)])
+    val processRateData = withNoProgress(query,
+      query.recentProgress.map(p => (df.parse(p.timestamp).getTime,
+        withNumberInvalid { p.processedRowsPerSecond })), Array.empty[(Long, Double)])
+    val inputRowsData = withNoProgress(query,
+      query.recentProgress.map(p => (df.parse(p.timestamp).getTime,
+        withNumberInvalid { p.numInputRows })), Array.empty[(Long, Double)])
+    val batchDurations = withNoProgress(query,
+      query.recentProgress.map(p => (df.parse(p.timestamp).getTime,
+        withNumberInvalid { p.batchDuration })), Array.empty[(Long, Double)])
+    val operationDurationData = withNoProgress(query, query.recentProgress.map { p =>
+      val durationMs = p.durationMs
+      // remove "triggerExecution" as it count the other operation duration.
+      durationMs.remove("triggerExecution")
+      (df.parse(p.timestamp).getTime, durationMs)}, Array.empty[(Long, ju.Map[String, JLong])])
+
+    val jsCollector = new JsCollector
+    val graphUIDataForInputRate =
+      new GraphUIData(
+        "input-rate-timeline",
+        "input-rate-histogram",
+        inputRateData,
+        minBatchTime,
+        maxBatchTime,
+        minRecordRate,
+        maxRecordRate,
+        "records/sec")
+    graphUIDataForInputRate.generateDataJs(jsCollector)
+
+    val graphUIDataForProcessRate =
+      new GraphUIData(
+        "process-rate-timeline",
+        "process-rate-histogram",
+        processRateData,
+        minBatchTime,
+        maxBatchTime,
+        minProcessRate,
+        maxProcessRate,
+        "records/sec")
+    graphUIDataForProcessRate.generateDataJs(jsCollector)
+
+    val graphUIDataForInputRows =
+      new GraphUIData(
+        "input-rows-timeline",
+        "input-rows-histogram",
+        inputRowsData,
+        minBatchTime,
+        maxBatchTime,
+        minRows,
+        maxRows,
+        "records")
+    graphUIDataForInputRows.generateDataJs(jsCollector)
+
+    val graphUIDataForBatchDuration =
+      new GraphUIData(
+        "batch-duration-timeline",
+        "batch-duration-histogram",
+        batchDurations,
+        minBatchTime,
+        maxBatchTime,
+        minBatchDuration,
+        maxBatchDuration,
+        "ms")
+    graphUIDataForBatchDuration.generateDataJs(jsCollector)
+
+    val graphUIDataForDuration =
+      new GraphUIData(
+        "duration-area-stack",
+        "",
+        Seq.empty[(Long, Double)],
+        0L,
+        0L,
+        0L,
+        0L,
+        "ms")
+
+    val table =
+    // scalastyle:off
+      <table id="stat-table" class="table table-bordered" style="width: auto">
+        <thead>
+          <tr>
+            <th style="width: 160px;"></th>
+            <th style="width: 492px;">Timelines</th>
+            <th style="width: 350px;">Histograms</th></tr>
+        </thead>
+        <tbody>
+          <tr>
+            <td style="vertical-align: middle;">
+              <div style="width: 160px;">
+                <div><strong>Input Rate {SparkUIUtils.tooltip("The aggregate (across all sources) rate of data arriving.", "right")}</strong></div>
+              </div>
+            </td>
+            <td class="timeline">{graphUIDataForInputRate.generateTimelineHtml(jsCollector)}</td>
+            <td class="histogram">{graphUIDataForInputRate.generateHistogramHtml(jsCollector)}</td>
+          </tr>
+          <tr>
+            <td style="vertical-align: middle;">
+              <div style="width: 160px;">
+                <div><strong>Process Rate {SparkUIUtils.tooltip("The aggregate (across all sources) rate at which Spark is processing data.", "right")}</strong></div>
+              </div>
+            </td>
+            <td class="timeline">{graphUIDataForProcessRate.generateTimelineHtml(jsCollector)}</td>
+            <td class="histogram">{graphUIDataForProcessRate.generateHistogramHtml(jsCollector)}</td>
+          </tr>
+          <tr>
+            <td style="vertical-align: middle;">
+              <div style="width: 160px;">
+                <div><strong>Input Rows {SparkUIUtils.tooltip("The aggregate (across all sources) number of records processed in a trigger.", "right")}</strong></div>
+              </div>
+            </td>
+            <td class="timeline">{graphUIDataForInputRows.generateTimelineHtml(jsCollector)}</td>
+            <td class="histogram">{graphUIDataForInputRows.generateHistogramHtml(jsCollector)}</td>
+          </tr>
+          <tr>
+            <td style="vertical-align: middle;">
+              <div style="width: 160px;">
+                <div><strong>Batch Duration {SparkUIUtils.tooltip("The process duration of each batch.", "right")}</strong></div>
+              </div>
+            </td>
+            <td class="timeline">{graphUIDataForBatchDuration.generateTimelineHtml(jsCollector)}</td>
+            <td class="histogram">{graphUIDataForBatchDuration.generateHistogramHtml(jsCollector)}</td>
+          </tr>
+          <tr>
+            <td style="vertical-align: middle;">
+              <div style="width: auto;">
+                <div><strong>Operation Duration {SparkUIUtils.tooltip("The amount of time taken to perform various operations in milliseconds.", "right")}</strong></div>
+              </div>
+            </td>
+            <td class="duration-area-stack" colspan="2">{graphUIDataForDuration.generateAreaStackHtmlWithData(jsCollector, operationDurationData)}</td>
+          </tr>
+        </tbody>
+      </table>
+    // scalastyle:on
+
+    generateVar(operationDurationData) ++ generateTimeMap(batchTimes) ++ table ++ jsCollector.toHtml
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
new file mode 100644
index 0000000000000..91815110e0d39
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.ui
+
+import java.text.SimpleDateFormat
+import java.util.UUID
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.streaming.{StreamingQueryListener, StreamingQueryProgress}
+
+/**
+ * A customized StreamingQueryListener used in structured streaming UI, which contains all
+ * UI data for both active and inactive query.
+ * TODO: Add support for history server.
+ */
+private[sql] class StreamingQueryStatusListener(conf: SparkConf) extends StreamingQueryListener {
+
+  private val timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
+  timestampFormat.setTimeZone(DateTimeUtils.getTimeZone("UTC"))
+
+  /**
+   * We use runId as the key here instead of id in active query status map,
+   * because the runId is unique for every started query, even it its a restart.
+   */
+  private[ui] val activeQueryStatus = new ConcurrentHashMap[UUID, StreamingQueryUIData]()
+  private[ui] val inactiveQueryStatus = new mutable.Queue[StreamingQueryUIData]()
+
+  private val streamingProgressRetention =
+    conf.get(StaticSQLConf.STREAMING_UI_RETAINED_PROGRESS_UPDATES)
+  private val inactiveQueryStatusRetention = conf.get(StaticSQLConf.STREAMING_UI_RETAINED_QUERIES)
+
+  override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = {
+    activeQueryStatus.putIfAbsent(event.runId,
+      new StreamingQueryUIData(event.name, event.id, event.runId, event.submissionTime))
+  }
+
+  override def onQueryProgress(event: StreamingQueryListener.QueryProgressEvent): Unit = {
+    val batchTimestamp = timestampFormat.parse(event.progress.timestamp).getTime
+    val queryStatus = activeQueryStatus.getOrDefault(
+      event.progress.runId,
+      new StreamingQueryUIData(event.progress.name, event.progress.id, event.progress.runId,
+        batchTimestamp))
+    queryStatus.updateProcess(event.progress, streamingProgressRetention)
+  }
+
+  override def onQueryTerminated(
+      event: StreamingQueryListener.QueryTerminatedEvent): Unit = synchronized {
+    val queryStatus = activeQueryStatus.remove(event.runId)
+    if (queryStatus != null) {
+      queryStatus.queryTerminated(event)
+      inactiveQueryStatus += queryStatus
+      while (inactiveQueryStatus.length >= inactiveQueryStatusRetention) {
+        inactiveQueryStatus.dequeue()
+      }
+    }
+  }
+
+  def allQueryStatus: Seq[StreamingQueryUIData] = synchronized {
+    activeQueryStatus.values().asScala.toSeq ++ inactiveQueryStatus
+  }
+}
+
+/**
+ * This class contains all message related to UI display, each instance corresponds to a single
+ * [[org.apache.spark.sql.streaming.StreamingQuery]].
+ */
+private[ui] class StreamingQueryUIData(
+    val name: String,
+    val id: UUID,
+    val runId: UUID,
+    val submissionTime: Long) {
+
+  /** Holds the most recent query progress updates. */
+  private val progressBuffer = new mutable.Queue[StreamingQueryProgress]()
+
+  private var _isActive = true
+  private var _exception: Option[String] = None
+
+  def isActive: Boolean = synchronized { _isActive }
+
+  def exception: Option[String] = synchronized { _exception }
+
+  def queryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = synchronized {
+    _isActive = false
+    _exception = event.exception
+  }
+
+  def updateProcess(
+      newProgress: StreamingQueryProgress, retentionNum: Int): Unit = progressBuffer.synchronized {
+    progressBuffer += newProgress
+    while (progressBuffer.length >= retentionNum) {
+      progressBuffer.dequeue()
+    }
+  }
+
+  def recentProgress: Array[StreamingQueryProgress] = progressBuffer.synchronized {
+    progressBuffer.toArray
+  }
+
+  def lastProgress: StreamingQueryProgress = progressBuffer.synchronized {
+    progressBuffer.lastOption.orNull
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala
new file mode 100644
index 0000000000000..bb097ffc06912
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.streaming.ui
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.ui.{SparkUI, SparkUITab}
+
+private[sql] class StreamingQueryTab(
+    val statusListener: StreamingQueryStatusListener,
+    sparkUI: SparkUI) extends SparkUITab(sparkUI, "StreamingQuery") with Logging {
+
+  override val name = "Structured Streaming"
+
+  val parent = sparkUI
+
+  attachPage(new StreamingQueryPage(this))
+  attachPage(new StreamingQueryStatisticsPage(this))
+  parent.attachTab(this)
+
+  parent.addStaticHandler(StreamingQueryTab.STATIC_RESOURCE_DIR, "/static/sql")
+}
+
+private[sql] object StreamingQueryTab {
+  private val STATIC_RESOURCE_DIR = "org/apache/spark/sql/execution/ui/static"
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
new file mode 100644
index 0000000000000..57b9dec81f28a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.ui
+
+private[ui] object UIUtils {
+
+  /**
+   * Check whether `number` is valid, if not return 0.0d
+   */
+  def withNumberInvalid(number: => Double): Double = {
+    if (number.isNaN || number.isInfinite) {
+      0.0d
+    } else {
+      number
+    }
+  }
+
+  /**
+   * Execute a block of code when there is already one completed batch in streaming query,
+   * otherwise return `default` value.
+   */
+  def withNoProgress[T](query: StreamingQueryUIData, body: => T, default: T): T = {
+    if (query.lastProgress != null) {
+      body
+    } else {
+      default
+    }
+  }
+
+  def getQueryName(query: StreamingQueryUIData): String = {
+    if (query.name == null || query.name.isEmpty) {
+      "<no name>"
+    } else {
+      query.name
+    }
+  }
+
+  def getQueryStatus(query: StreamingQueryUIData): String = {
+    if (query.isActive) {
+      "RUNNING"
+    } else {
+      query.exception.map(_ => "FAILED").getOrElse("FINISHED")
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
index f1fe472afdc2a..01f81825f6bfd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@@ -108,6 +108,11 @@ class ExecutionListenerManager private[sql](session: SparkSession, loadExtension
     listenerBus.removeAllListeners()
   }
 
+  /** Only exposed for testing. */
+  private[sql] def listListeners(): Array[QueryExecutionListener] = {
+    listenerBus.listeners.asScala.toArray
+  }
+
   /**
    * Get an identical copy of this listener manager.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/ApiSqlRootResource.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/ApiSqlRootResource.scala
new file mode 100644
index 0000000000000..5fc7123c9097b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/ApiSqlRootResource.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.api.v1.sql
+
+import javax.ws.rs.Path
+
+import org.apache.spark.status.api.v1.ApiRequestContext
+
+@Path("/v1")
+private[v1] class ApiSqlRootResource extends ApiRequestContext {
+
+  @Path("applications/{appId}/sql")
+  def sqlList(): Class[SqlResource] = classOf[SqlResource]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
new file mode 100644
index 0000000000000..346e07f2bef15
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.api.v1.sql
+
+import java.util.Date
+import javax.ws.rs._
+import javax.ws.rs.core.MediaType
+
+import org.apache.spark.JobExecutionStatus
+import org.apache.spark.sql.execution.ui.{SQLAppStatusStore, SQLExecutionUIData, SQLPlanMetric}
+import org.apache.spark.status.api.v1.{BaseAppResource, NotFoundException}
+
+@Produces(Array(MediaType.APPLICATION_JSON))
+private[v1] class SqlResource extends BaseAppResource {
+
+  @GET
+  def sqlList(
+      @DefaultValue("false") @QueryParam("details") details: Boolean,
+      @DefaultValue("0") @QueryParam("offset") offset: Int,
+      @DefaultValue("20") @QueryParam("length") length: Int): Seq[ExecutionData] = {
+    withUI { ui =>
+      val sqlStore = new SQLAppStatusStore(ui.store.store)
+      sqlStore.executionsList(offset, length).map(prepareExecutionData(_, details))
+    }
+  }
+
+  @GET
+  @Path("{executionId:\\d+}")
+  def sql(
+      @PathParam("executionId") execId: Long,
+      @DefaultValue("false") @QueryParam("details") details: Boolean): ExecutionData = {
+    withUI { ui =>
+      val sqlStore = new SQLAppStatusStore(ui.store.store)
+      sqlStore
+        .execution(execId)
+        .map(prepareExecutionData(_, details))
+        .getOrElse(throw new NotFoundException("unknown id: " + execId))
+    }
+  }
+
+  private def printableMetrics(
+      metrics: Seq[SQLPlanMetric],
+      metricValues: Map[Long, String]): Seq[Metrics] = {
+    metrics.map(metric =>
+      Metrics(metric.name, metricValues.get(metric.accumulatorId).getOrElse("")))
+  }
+
+  private def prepareExecutionData(exec: SQLExecutionUIData, details: Boolean): ExecutionData = {
+    var running = Seq[Int]()
+    var completed = Seq[Int]()
+    var failed = Seq[Int]()
+
+    exec.jobs.foreach {
+      case (id, JobExecutionStatus.RUNNING) =>
+        running = running :+ id
+      case (id, JobExecutionStatus.SUCCEEDED) =>
+        completed = completed :+ id
+      case (id, JobExecutionStatus.FAILED) =>
+        failed = failed :+ id
+      case _ =>
+    }
+
+    val status = if (exec.jobs.size == completed.size) {
+      "COMPLETED"
+    } else if (failed.nonEmpty) {
+      "FAILED"
+    } else {
+      "RUNNING"
+    }
+
+    val duration = exec.completionTime.getOrElse(new Date()).getTime - exec.submissionTime
+    val planDetails = if (details) exec.physicalPlanDescription else ""
+    val metrics = if (details) printableMetrics(exec.metrics, exec.metricValues) else Seq.empty
+    new ExecutionData(
+      exec.executionId,
+      status,
+      exec.description,
+      planDetails,
+      metrics,
+      new Date(exec.submissionTime),
+      duration,
+      running,
+      completed,
+      failed)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/api.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/api.scala
new file mode 100644
index 0000000000000..7ace66ffb06e1
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/api.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1.sql
+
+import java.util.Date
+
+class ExecutionData private[spark] (
+    val id: Long,
+    val status: String,
+    val description: String,
+    val planDescription: String,
+    val metrics: Seq[Metrics],
+    val submissionTime: Date,
+    val duration: Long,
+    val runningJobIds: Seq[Int],
+    val successJobIds: Seq[Int],
+    val failedJobIds: Seq[Int])
+
+case class Metrics private[spark] (metricName: String, metricValue: String)
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
index 6ffccee52c0fe..dd3755d3f904e 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
@@ -25,43 +25,50 @@
 
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.KeyValueGroupedDataset;
-import org.apache.spark.sql.expressions.javalang.typed;
 
 /**
  * Suite that replicates tests in JavaDatasetAggregatorSuite using lambda syntax.
  */
 public class Java8DatasetAggregatorSuite extends JavaDatasetAggregatorSuiteBase {
+  @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationAverage() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Double>> agged = grouped.agg(typed.avg(v -> (double)(v._2() * 2)));
+    Dataset<Tuple2<String, Double>> agged = grouped.agg(
+      org.apache.spark.sql.expressions.javalang.typed.avg(v -> (double)(v._2() * 2)));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 6.0)),
         agged.collectAsList());
   }
 
+  @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationCount() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.count(v -> v));
+    Dataset<Tuple2<String, Long>> agged = grouped.agg(
+      org.apache.spark.sql.expressions.javalang.typed.count(v -> v));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)),
         agged.collectAsList());
   }
 
+  @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationSumDouble() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Double>> agged = grouped.agg(typed.sum(v -> (double)v._2()));
+    Dataset<Tuple2<String, Double>> agged = grouped.agg(
+      org.apache.spark.sql.expressions.javalang.typed.sum(v -> (double)v._2()));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 3.0)),
         agged.collectAsList());
   }
 
+  @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationSumLong() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.sumLong(v -> (long)v._2()));
+    Dataset<Tuple2<String, Long>> agged = grouped.agg(
+      org.apache.spark.sql.expressions.javalang.typed.sumLong(v -> (long)v._2()));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3L), new Tuple2<>("b", 3L)),
         agged.collectAsList());
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
index 7bf0789b43d63..5603cb988b8e7 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
@@ -22,6 +22,10 @@
 import java.time.LocalDate;
 import java.util.*;
 
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
+import org.junit.*;
+
 import org.apache.spark.sql.*;
 import org.apache.spark.sql.catalyst.expressions.GenericRow;
 import org.apache.spark.sql.catalyst.util.DateTimeUtils;
@@ -29,7 +33,6 @@
 import org.apache.spark.sql.internal.SQLConf;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.types.StructType;
-import org.junit.*;
 
 import org.apache.spark.sql.test.TestSparkSession;
 
@@ -78,7 +81,7 @@ public void testBeanWithArrayFieldDeserialization() {
       .as(encoder);
 
     List<ArrayRecord> records = dataset.collectAsList();
-    Assert.assertEquals(records, ARRAY_RECORDS);
+    Assert.assertEquals(ARRAY_RECORDS, records);
   }
 
   private static final List<MapRecord> MAP_RECORDS = new ArrayList<>();
@@ -121,7 +124,7 @@ public void testBeanWithMapFieldsDeserialization() {
 
     List<MapRecord> records = dataset.collectAsList();
 
-    Assert.assertEquals(records, MAP_RECORDS);
+    Assert.assertEquals(MAP_RECORDS, records);
   }
 
   @Test
@@ -486,17 +489,17 @@ public int hashCode() {
 
     @Override
     public String toString() {
-      return com.google.common.base.Objects.toStringHelper(this)
-              .add("shortField", shortField)
-              .add("intField", intField)
-              .add("longField", longField)
-              .add("floatField", floatField)
-              .add("doubleField", doubleField)
-              .add("stringField", stringField)
-              .add("booleanField", booleanField)
-              .add("timestampField", timestampField)
-              .add("nullIntField", nullIntField)
-              .toString();
+      return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+          .append("shortField", shortField)
+          .append("intField", intField)
+          .append("longField", longField)
+          .append("floatField", floatField)
+          .append("doubleField", doubleField)
+          .append("stringField", stringField)
+          .append("booleanField", booleanField)
+          .append("timestampField", timestampField)
+          .append("nullIntField", nullIntField)
+          .toString();
     }
   }
 
@@ -584,11 +587,12 @@ public int hashCode() {
 
     @Override
     public String toString() {
-      return com.google.common.base.Objects.toStringHelper(this)
-        .add("localDateField", localDateField)
-        .add("instantField", instantField)
-        .toString();
+      return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+          .append("localDateField", localDateField)
+          .append("instantField", instantField)
+          .toString();
     }
+
   }
 
   private static Row createLocalDateInstantRow(Long index) {
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index a05afa4f6ba30..f4bffd9d79828 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -467,10 +467,11 @@ public void testBeanWithoutGetter() {
     BeanWithoutGetter bean = new BeanWithoutGetter();
     List<BeanWithoutGetter> data = Arrays.asList(bean);
     Dataset<Row> df = spark.createDataFrame(data, BeanWithoutGetter.class);
-    Assert.assertEquals(df.schema().length(), 0);
-    Assert.assertEquals(df.collectAsList().size(), 1);
+    Assert.assertEquals(0, df.schema().length());
+    Assert.assertEquals(1, df.collectAsList().size());
   }
 
+  @SuppressWarnings("deprecation")
   @Test
   public void testJsonRDDToDataFrame() {
     // This is a test for the deprecated API in SPARK-15615.
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameWriterV2Suite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameWriterV2Suite.java
new file mode 100644
index 0000000000000..e418958bef94d
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameWriterV2Suite.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
+import org.apache.spark.sql.connector.InMemoryTableCatalog;
+import org.apache.spark.sql.test.TestSparkSession;
+import org.apache.spark.sql.types.StructType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.apache.spark.sql.functions.*;
+
+public class JavaDataFrameWriterV2Suite {
+  private static StructType schema = new StructType().add("s", "string");
+  private SparkSession spark = null;
+
+  public Dataset<Row> df() {
+    return spark.read().schema(schema).text();
+  }
+
+  @Before
+  public void createTestTable() {
+    this.spark = new TestSparkSession();
+    spark.conf().set("spark.sql.catalog.testcat", InMemoryTableCatalog.class.getName());
+    spark.sql("CREATE TABLE testcat.t (s string) USING foo");
+  }
+
+  @After
+  public void dropTestTable() {
+    spark.sql("DROP TABLE testcat.t");
+    spark.stop();
+  }
+
+  @Test
+  public void testAppendAPI() throws NoSuchTableException {
+    df().writeTo("testcat.t").append();
+    df().writeTo("testcat.t").option("property", "value").append();
+  }
+
+  @Test
+  public void testOverwritePartitionsAPI() throws NoSuchTableException {
+    df().writeTo("testcat.t").overwritePartitions();
+    df().writeTo("testcat.t").option("property", "value").overwritePartitions();
+  }
+
+  @Test
+  public void testOverwriteAPI() throws NoSuchTableException {
+    df().writeTo("testcat.t").overwrite(lit(true));
+    df().writeTo("testcat.t").option("property", "value").overwrite(lit(true));
+  }
+
+  @Test
+  public void testCreateAPI() throws TableAlreadyExistsException {
+    df().writeTo("testcat.t2").create();
+    spark.sql("DROP TABLE testcat.t2");
+
+    df().writeTo("testcat.t2").option("property", "value").create();
+    spark.sql("DROP TABLE testcat.t2");
+
+    df().writeTo("testcat.t2").tableProperty("property", "value").create();
+    spark.sql("DROP TABLE testcat.t2");
+
+    df().writeTo("testcat.t2").using("v2format").create();
+    spark.sql("DROP TABLE testcat.t2");
+
+    df().writeTo("testcat.t2").partitionedBy(col("s")).create();
+    spark.sql("DROP TABLE testcat.t2");
+  }
+
+  @Test
+  public void testReplaceAPI() throws CannotReplaceMissingTableException {
+    df().writeTo("testcat.t").replace();
+    df().writeTo("testcat.t").option("property", "value").replace();
+    df().writeTo("testcat.t").tableProperty("property", "value").replace();
+    df().writeTo("testcat.t").using("v2format").replace();
+    df().writeTo("testcat.t").partitionedBy(col("s")).replace();
+  }
+
+  @Test
+  public void testCreateOrReplaceAPI() {
+    df().writeTo("testcat.t").createOrReplace();
+    df().writeTo("testcat.t").option("property", "value").createOrReplace();
+    df().writeTo("testcat.t").tableProperty("property", "value").createOrReplace();
+    df().writeTo("testcat.t").using("v2format").createOrReplace();
+    df().writeTo("testcat.t").partitionedBy(col("s")).createOrReplace();
+  }
+}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java
index 539976d5af469..8a90624f2070b 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java
@@ -29,7 +29,6 @@
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.KeyValueGroupedDataset;
 import org.apache.spark.sql.expressions.Aggregator;
-import org.apache.spark.sql.expressions.javalang.typed;
 
 /**
  * Suite for testing the aggregate functionality of Datasets in Java.
@@ -85,37 +84,45 @@ public Encoder<Integer> outputEncoder() {
     }
   }
 
+  @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationAverage() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Double>> agged = grouped.agg(typed.avg(value -> value._2() * 2.0));
+    Dataset<Tuple2<String, Double>> agged = grouped.agg(
+      org.apache.spark.sql.expressions.javalang.typed.avg(value -> value._2() * 2.0));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 6.0)),
         agged.collectAsList());
   }
 
+  @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationCount() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.count(value -> value));
+    Dataset<Tuple2<String, Long>> agged = grouped.agg(
+      org.apache.spark.sql.expressions.javalang.typed.count(value -> value));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)),
         agged.collectAsList());
   }
 
+  @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationSumDouble() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Double>> agged = grouped.agg(typed.sum(value -> (double) value._2()));
+    Dataset<Tuple2<String, Double>> agged = grouped.agg(
+      org.apache.spark.sql.expressions.javalang.typed.sum(value -> (double) value._2()));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 3.0)),
         agged.collectAsList());
   }
 
+  @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationSumLong() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Long>> agged = grouped.agg(typed.sumLong(value -> (long) value._2()));
+    Dataset<Tuple2<String, Long>> agged = grouped.agg(
+      org.apache.spark.sql.expressions.javalang.typed.sumLong(value -> (long) value._2()));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3L), new Tuple2<>("b", 3L)),
         agged.collectAsList());
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 1e5f55e494b70..d8462ae064dcf 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -853,7 +853,7 @@ public void testRuntimeNullabilityCheck() {
       NestedSmallBean nestedSmallBean = new NestedSmallBean();
       nestedSmallBean.setF(smallBean);
 
-      Assert.assertEquals(ds.collectAsList(), Collections.singletonList(nestedSmallBean));
+      Assert.assertEquals(Collections.singletonList(nestedSmallBean), ds.collectAsList());
     }
 
     // Shouldn't throw runtime exception when parent object (`ClassData`) is null
@@ -864,7 +864,7 @@ public void testRuntimeNullabilityCheck() {
       Dataset<NestedSmallBean> ds = df.as(Encoders.bean(NestedSmallBean.class));
 
       NestedSmallBean nestedSmallBean = new NestedSmallBean();
-      Assert.assertEquals(ds.collectAsList(), Collections.singletonList(nestedSmallBean));
+      Assert.assertEquals(Collections.singletonList(nestedSmallBean), ds.collectAsList());
     }
 
     nullabilityCheck.expect(RuntimeException.class);
@@ -1384,7 +1384,7 @@ public void testBeanWithEnum() {
             new BeanWithEnum(MyEnum.B, "flower boulevard"));
     Encoder<BeanWithEnum> encoder = Encoders.bean(BeanWithEnum.class);
     Dataset<BeanWithEnum> ds = spark.createDataset(data, encoder);
-    Assert.assertEquals(ds.collectAsList(), data);
+    Assert.assertEquals(data, ds.collectAsList());
   }
 
   public static class EmptyBean implements Serializable {}
@@ -1394,8 +1394,8 @@ public void testEmptyBean() {
     EmptyBean bean = new EmptyBean();
     List<EmptyBean> data = Arrays.asList(bean);
     Dataset<EmptyBean> df = spark.createDataset(data, Encoders.bean(EmptyBean.class));
-    Assert.assertEquals(df.schema().length(), 0);
-    Assert.assertEquals(df.collectAsList().size(), 1);
+    Assert.assertEquals(0, df.schema().length());
+    Assert.assertEquals(1, df.collectAsList().size());
   }
 
   public class CircularReference1Bean implements Serializable {
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaHigherOrderFunctionsSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaHigherOrderFunctionsSuite.java
new file mode 100644
index 0000000000000..de0acc295b5ea
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaHigherOrderFunctionsSuite.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import static java.util.stream.Collectors.toList;
+
+import static scala.collection.JavaConverters.mapAsScalaMap;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import static org.apache.spark.sql.functions.*;
+import org.apache.spark.sql.test.TestSparkSession;
+import org.apache.spark.sql.types.*;
+import static org.apache.spark.sql.types.DataTypes.*;
+
+public class JavaHigherOrderFunctionsSuite {
+    private transient TestSparkSession spark;
+    private Dataset<Row> arrDf;
+    private Dataset<Row> mapDf;
+
+    private void checkAnswer(Dataset<Row> actualDS, List<Row> expected) throws Exception {
+        List<Row> actual = actualDS.collectAsList();
+        Assert.assertEquals(expected.size(), actual.size());
+        for (int i = 0; i < expected.size(); i++) {
+            Row expectedRow = expected.get(i);
+            Row actualRow = actual.get(i);
+            Assert.assertEquals(expectedRow.size(), actualRow.size());
+            for (int j = 0; j < expectedRow.size(); j++) {
+                Object expectedValue = expectedRow.get(j);
+                Object actualValue = actualRow.get(j);
+                if (expectedValue != null && expectedValue.getClass().isArray()) {
+                    actualValue = actualValue.getClass().getMethod("array").invoke(actualValue);
+                    Assert.assertArrayEquals((Object[]) expectedValue, (Object[]) actualValue);
+                } else {
+                    Assert.assertEquals(expectedValue, actualValue);
+                }
+            }
+        }
+    }
+
+    @SafeVarargs
+    private static <T> List<Row> toRows(T... objs) {
+        return Arrays.stream(objs)
+            .map(RowFactory::create)
+            .collect(toList());
+    }
+
+    @SafeVarargs
+    private static <T> T[] makeArray(T... ts) {
+        return ts;
+    }
+
+    private void setUpArrDf() {
+        List<Row> data = toRows(
+            makeArray(1, 9, 8, 7),
+            makeArray(5, 8, 9, 7, 2),
+            JavaHigherOrderFunctionsSuite.<Integer>makeArray(),
+            null
+        );
+        StructType schema =  new StructType()
+            .add("x", new ArrayType(IntegerType, true), true);
+        arrDf = spark.createDataFrame(data, schema);
+    }
+
+    private void setUpMapDf() {
+        List<Row> data = toRows(
+            new HashMap<Integer, Integer>() {{
+                put(1, 1);
+                put(2, 2);
+            }},
+            null
+        );
+        StructType schema = new StructType()
+            .add("x", new MapType(IntegerType, IntegerType, true));
+        mapDf = spark.createDataFrame(data, schema);
+    }
+
+    @Before
+    public void setUp() {
+        spark = new TestSparkSession();
+        setUpArrDf();
+        setUpMapDf();
+    }
+
+    @After
+    public void tearDown() {
+        spark.stop();
+        spark = null;
+    }
+
+    @Test
+    public void testTransform() throws Exception {
+        checkAnswer(
+            arrDf.select(transform(col("x"), x -> x.plus(1))),
+            toRows(
+                makeArray(2, 10, 9, 8),
+                makeArray(6, 9, 10, 8, 3),
+                JavaHigherOrderFunctionsSuite.<Integer>makeArray(),
+                null
+            )
+        );
+        checkAnswer(
+            arrDf.select(transform(col("x"), (x, i) -> x.plus(i))),
+            toRows(
+                makeArray(1, 10, 10, 10),
+                makeArray(5, 9, 11, 10, 6),
+                JavaHigherOrderFunctionsSuite.<Integer>makeArray(),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testFilter() throws Exception {
+        checkAnswer(
+            arrDf.select(filter(col("x"), x -> x.plus(1).equalTo(10))),
+            toRows(
+                makeArray(9),
+                makeArray(9),
+                JavaHigherOrderFunctionsSuite.<Integer>makeArray(),
+                null
+            )
+        );
+        checkAnswer(
+            arrDf.select(filter(col("x"), (x, i) -> x.plus(i).equalTo(10))),
+            toRows(
+                makeArray(9, 8, 7),
+                makeArray(7),
+                JavaHigherOrderFunctionsSuite.<Integer>makeArray(),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testExists() throws Exception {
+        checkAnswer(
+            arrDf.select(exists(col("x"), x -> x.plus(1).equalTo(10))),
+            toRows(
+                true,
+                true,
+                false,
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testForall() throws Exception {
+        checkAnswer(
+            arrDf.select(forall(col("x"), x -> x.plus(1).equalTo(10))),
+            toRows(
+                false,
+                false,
+                true,
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testAggregate() throws Exception {
+        checkAnswer(
+            arrDf.select(aggregate(col("x"), lit(0), (acc, x) -> acc.plus(x))),
+            toRows(
+                25,
+                31,
+                0,
+                null
+            )
+        );
+        checkAnswer(
+            arrDf.select(aggregate(col("x"), lit(0), (acc, x) -> acc.plus(x), x -> x)),
+            toRows(
+                25,
+                31,
+                0,
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testZipWith() throws Exception {
+        checkAnswer(
+            arrDf.select(zip_with(col("x"), col("x"), (a, b) -> lit(42))),
+            toRows(
+                makeArray(42, 42, 42, 42),
+                makeArray(42, 42, 42, 42, 42),
+                JavaHigherOrderFunctionsSuite.<Integer>makeArray(),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testTransformKeys() throws Exception {
+        checkAnswer(
+            mapDf.select(transform_keys(col("x"), (k, v) -> k.plus(v))),
+            toRows(
+                mapAsScalaMap(new HashMap<Integer, Integer>() {{
+                    put(2, 1);
+                    put(4, 2);
+                }}),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testTransformValues() throws Exception {
+        checkAnswer(
+            mapDf.select(transform_values(col("x"), (k, v) -> k.plus(v))),
+            toRows(
+                mapAsScalaMap(new HashMap<Integer, Integer>() {{
+                    put(1, 2);
+                    put(2, 4);
+                }}),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testMapFilter() throws Exception {
+        checkAnswer(
+            mapDf.select(map_filter(col("x"), (k, v) -> lit(false))),
+            toRows(
+                mapAsScalaMap(new HashMap<Integer, Integer>()),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testMapZipWith() throws Exception {
+        checkAnswer(
+            mapDf.select(map_zip_with(col("x"), col("x"), (k, v1, v2) -> lit(false))),
+            toRows(
+                mapAsScalaMap(new HashMap<Integer, Boolean>() {{
+                    put(1, false);
+                    put(2, false);
+                }}),
+                null
+            )
+        );
+    }
+}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaSaveLoadSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaSaveLoadSuite.java
index 127d272579a62..e2a69d55337bc 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaSaveLoadSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaSaveLoadSuite.java
@@ -25,7 +25,6 @@
 import java.util.Map;
 
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -43,10 +42,7 @@ public class JavaSaveLoadSuite {
   Dataset<Row> df;
 
   private static void checkAnswer(Dataset<Row> actual, List<Row> expected) {
-    String errorMessage = QueryTest$.MODULE$.checkAnswer(actual, expected);
-    if (errorMessage != null) {
-      Assert.fail(errorMessage);
-    }
+    QueryTest$.MODULE$.checkAnswer(actual, expected);
   }
 
   @Before
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2.java
similarity index 93%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2.java
index 255a9f887878b..1a55d198361ee 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2.java
@@ -15,22 +15,22 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.io.IOException;
 import java.util.*;
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.connector.TestingV2Source;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.read.*;
 import org.apache.spark.sql.sources.Filter;
 import org.apache.spark.sql.sources.GreaterThan;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
-public class JavaAdvancedDataSourceV2 implements TableProvider {
+public class JavaAdvancedDataSourceV2 implements TestingV2Source {
 
   @Override
   public Table getTable(CaseInsensitiveStringMap options) {
@@ -45,7 +45,7 @@ public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
   static class AdvancedScanBuilder implements ScanBuilder, Scan,
     SupportsPushDownFilters, SupportsPushDownRequiredColumns {
 
-    private StructType requiredSchema = new StructType().add("i", "int").add("j", "int");
+    private StructType requiredSchema = TestingV2Source.schema();
     private Filter[] filters = new Filter[0];
 
     @Override
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaColumnarDataSourceV2.java
similarity index 88%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaColumnarDataSourceV2.java
index 699859cfaebe1..2f10c84c999f9 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaColumnarDataSourceV2.java
@@ -15,22 +15,25 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.io.IOException;
 
 import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.TestingV2Source;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 
 
-public class JavaColumnarDataSourceV2 implements TableProvider {
+public class JavaColumnarDataSourceV2 implements TestingV2Source {
 
   class MyScanBuilder extends JavaSimpleScanBuilder {
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java
similarity index 84%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java
index 391af5a306a16..9c1db7a379602 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java
@@ -15,24 +15,24 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.io.IOException;
 import java.util.Arrays;
 
-import org.apache.spark.sql.catalog.v2.expressions.Expressions;
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
-import org.apache.spark.sql.sources.v2.reader.partitioning.ClusteredDistribution;
-import org.apache.spark.sql.sources.v2.reader.partitioning.Distribution;
-import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
+import org.apache.spark.sql.connector.TestingV2Source;
+import org.apache.spark.sql.connector.expressions.Expressions;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.read.*;
+import org.apache.spark.sql.connector.read.partitioning.ClusteredDistribution;
+import org.apache.spark.sql.connector.read.partitioning.Distribution;
+import org.apache.spark.sql.connector.read.partitioning.Partitioning;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
-public class JavaPartitionAwareDataSource implements TableProvider {
+public class JavaPartitionAwareDataSource implements TestingV2Source {
 
   class MyScanBuilder extends JavaSimpleScanBuilder implements SupportsReportPartitioning {
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaRangeInputPartition.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaRangeInputPartition.java
similarity index 90%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaRangeInputPartition.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaRangeInputPartition.java
index 438f489a3eea7..d612441201e64 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaRangeInputPartition.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaRangeInputPartition.java
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
+import org.apache.spark.sql.connector.read.InputPartition;
 
 class JavaRangeInputPartition implements InputPartition {
   int start;
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaReportStatisticsDataSource.java
similarity index 79%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaReportStatisticsDataSource.java
index f3755e18b58d5..9a787c3d2d92c 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaReportStatisticsDataSource.java
@@ -15,19 +15,19 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.util.OptionalLong;
 
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
-import org.apache.spark.sql.sources.v2.reader.Statistics;
-import org.apache.spark.sql.sources.v2.reader.SupportsReportStatistics;
+import org.apache.spark.sql.connector.TestingV2Source;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.ScanBuilder;
+import org.apache.spark.sql.connector.read.Statistics;
+import org.apache.spark.sql.connector.read.SupportsReportStatistics;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
-public class JavaReportStatisticsDataSource implements TableProvider {
+public class JavaReportStatisticsDataSource implements TestingV2Source {
   class MyScanBuilder extends JavaSimpleScanBuilder implements SupportsReportStatistics {
     @Override
     public Statistics estimateStatistics() {
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSchemaRequiredDataSource.java
similarity index 73%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSchemaRequiredDataSource.java
index 3800a94f88898..5f73567ade025 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSchemaRequiredDataSource.java
@@ -15,11 +15,15 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
+import java.util.Map;
+
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
@@ -45,7 +49,18 @@ public InputPartition[] planInputPartitions() {
   }
 
   @Override
-  public Table getTable(CaseInsensitiveStringMap options, StructType schema) {
+  public boolean supportsExternalMetadata() {
+    return true;
+  }
+
+  @Override
+  public StructType inferSchema(CaseInsensitiveStringMap options) {
+    throw new IllegalArgumentException("requires a user-supplied schema");
+  }
+
+  @Override
+  public Table getTable(
+      StructType schema, Transform[] partitioning, Map<String, String> properties) {
     return new JavaSimpleBatchTable() {
 
       @Override
@@ -59,9 +74,4 @@ public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
       }
     };
   }
-
-  @Override
-  public Table getTable(CaseInsensitiveStringMap options) {
-    throw new IllegalArgumentException("requires a user-supplied schema");
-  }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleBatchTable.java
similarity index 81%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleBatchTable.java
index 64663d5db4bed..71cf97b56fe54 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleBatchTable.java
@@ -15,15 +15,16 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
-import org.apache.spark.sql.sources.v2.SupportsRead;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableCapability;
+import org.apache.spark.sql.connector.TestingV2Source;
+import org.apache.spark.sql.connector.catalog.SupportsRead;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableCapability;
 import org.apache.spark.sql.types.StructType;
 
 abstract class JavaSimpleBatchTable implements Table, SupportsRead {
@@ -34,7 +35,7 @@ abstract class JavaSimpleBatchTable implements Table, SupportsRead {
 
   @Override
   public StructType schema() {
-    return new StructType().add("i", "int").add("j", "int");
+    return TestingV2Source.schema();
   }
 
   @Override
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleDataSourceV2.java
similarity index 81%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleDataSourceV2.java
index 7474f36c97f75..8852249d8a01f 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleDataSourceV2.java
@@ -15,14 +15,15 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
+import org.apache.spark.sql.connector.TestingV2Source;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
-public class JavaSimpleDataSourceV2 implements TableProvider {
+public class JavaSimpleDataSourceV2 implements TestingV2Source {
 
   class MyScanBuilder extends JavaSimpleScanBuilder {
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleReaderFactory.java
similarity index 86%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleReaderFactory.java
index 740279033c416..0c702031a939b 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleReaderFactory.java
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
 
 class JavaSimpleReaderFactory implements PartitionReaderFactory {
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleScanBuilder.java
similarity index 77%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleScanBuilder.java
index 217e66950d146..bdd9dd3ea0ce0 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleScanBuilder.java
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
-import org.apache.spark.sql.sources.v2.reader.Batch;
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.sql.connector.TestingV2Source;
+import org.apache.spark.sql.connector.read.Batch;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.Scan;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.types.StructType;
 
 abstract class JavaSimpleScanBuilder implements ScanBuilder, Scan, Batch {
@@ -37,7 +38,7 @@ public Batch toBatch() {
 
   @Override
   public StructType readSchema() {
-    return new StructType().add("i", "int").add("j", "int");
+    return TestingV2Source.schema();
   }
 
   @Override
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java
index 92dabc79d2bff..564e76737ecde 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/execution/sort/RecordBinaryComparatorSuite.java
@@ -33,6 +33,7 @@
 import org.apache.spark.util.collection.unsafe.sort.*;
 
 import org.junit.After;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -81,14 +82,14 @@ private void insertRow(UnsafeRow row) {
     int recordLength = row.getSizeInBytes();
 
     Object baseObject = dataPage.getBaseObject();
-    assert(pageCursor + recordLength <= dataPage.getBaseOffset() + dataPage.size());
+    Assert.assertTrue(pageCursor + recordLength <= dataPage.getBaseOffset() + dataPage.size());
     long recordAddress = memoryManager.encodePageNumberAndOffset(dataPage, pageCursor);
     UnsafeAlignedOffset.putSize(baseObject, pageCursor, recordLength);
     pageCursor += uaoSize;
     Platform.copyMemory(recordBase, recordOffset, baseObject, pageCursor, recordLength);
     pageCursor += recordLength;
 
-    assert(pos < 2);
+    Assert.assertTrue(pos < 2);
     array.set(pos, recordAddress);
     pos++;
   }
@@ -141,8 +142,8 @@ public void testBinaryComparatorForSingleColumnRow() throws Exception {
     insertRow(row1);
     insertRow(row2);
 
-    assert(compare(0, 0) == 0);
-    assert(compare(0, 1) < 0);
+    Assert.assertEquals(0, compare(0, 0));
+    Assert.assertTrue(compare(0, 1) < 0);
   }
 
   @Test
@@ -166,8 +167,8 @@ public void testBinaryComparatorForMultipleColumnRow() throws Exception {
     insertRow(row1);
     insertRow(row2);
 
-    assert(compare(0, 0) == 0);
-    assert(compare(0, 1) < 0);
+    Assert.assertEquals(0, compare(0, 0));
+    Assert.assertTrue(compare(0, 1) < 0);
   }
 
   @Test
@@ -193,8 +194,8 @@ public void testBinaryComparatorForArrayColumn() throws Exception {
     insertRow(row1);
     insertRow(row2);
 
-    assert(compare(0, 0) == 0);
-    assert(compare(0, 1) > 0);
+    Assert.assertEquals(0, compare(0, 0));
+    Assert.assertTrue(compare(0, 1) > 0);
   }
 
   @Test
@@ -226,8 +227,8 @@ public void testBinaryComparatorForMixedColumns() throws Exception {
     insertRow(row1);
     insertRow(row2);
 
-    assert(compare(0, 0) == 0);
-    assert(compare(0, 1) > 0);
+    Assert.assertEquals(0, compare(0, 0));
+    Assert.assertTrue(compare(0, 1) > 0);
   }
 
   @Test
@@ -252,8 +253,8 @@ public void testBinaryComparatorForNullColumns() throws Exception {
     insertRow(row1);
     insertRow(row2);
 
-    assert(compare(0, 0) == 0);
-    assert(compare(0, 1) > 0);
+    Assert.assertEquals(0, compare(0, 0));
+    Assert.assertTrue(compare(0, 1) > 0);
   }
 
   @Test
@@ -273,7 +274,7 @@ public void testBinaryComparatorWhenSubtractionIsDivisibleByMaxIntValue() throws
     insertRow(row1);
     insertRow(row2);
 
-    assert(compare(0, 1) < 0);
+    Assert.assertTrue(compare(0, 1) > 0);
   }
 
   @Test
@@ -293,7 +294,7 @@ public void testBinaryComparatorWhenSubtractionCanOverflowLongValue() throws Exc
     insertRow(row1);
     insertRow(row2);
 
-    assert(compare(0, 1) < 0);
+    Assert.assertTrue(compare(0, 1) < 0);
   }
 
   @Test
@@ -319,6 +320,50 @@ public void testBinaryComparatorWhenOnlyTheLastColumnDiffers() throws Exception
     insertRow(row1);
     insertRow(row2);
 
-    assert(compare(0, 1) < 0);
+    Assert.assertTrue(compare(0, 1) < 0);
+  }
+
+  @Test
+  public void testCompareLongsAsLittleEndian() {
+    long arrayOffset = Platform.LONG_ARRAY_OFFSET + 4;
+
+    long[] arr1 = new long[2];
+    Platform.putLong(arr1, arrayOffset, 0x0100000000000000L);
+    long[] arr2 = new long[2];
+    Platform.putLong(arr2, arrayOffset + 4, 0x0000000000000001L);
+    // leftBaseOffset is not aligned while rightBaseOffset is aligned,
+    // it will start by comparing long
+    int result1 = binaryComparator.compare(arr1, arrayOffset, 8, arr2, arrayOffset + 4, 8);
+
+    long[] arr3 = new long[2];
+    Platform.putLong(arr3, arrayOffset, 0x0100000000000000L);
+    long[] arr4 = new long[2];
+    Platform.putLong(arr4, arrayOffset, 0x0000000000000001L);
+    // both left and right offset is not aligned, it will start with byte-by-byte comparison
+    int result2 = binaryComparator.compare(arr3, arrayOffset, 8, arr4, arrayOffset, 8);
+
+    Assert.assertEquals(result1, result2);
+  }
+
+  @Test
+  public void testCompareLongsAsUnsigned() {
+    long arrayOffset = Platform.LONG_ARRAY_OFFSET + 4;
+
+    long[] arr1 = new long[2];
+    Platform.putLong(arr1, arrayOffset + 4, 0xa000000000000000L);
+    long[] arr2 = new long[2];
+    Platform.putLong(arr2, arrayOffset + 4, 0x0000000000000000L);
+    // both leftBaseOffset and rightBaseOffset are aligned, so it will start by comparing long
+    int result1 = binaryComparator.compare(arr1, arrayOffset + 4, 8, arr2, arrayOffset + 4, 8);
+
+    long[] arr3 = new long[2];
+    Platform.putLong(arr3, arrayOffset, 0xa000000000000000L);
+    long[] arr4 = new long[2];
+    Platform.putLong(arr4, arrayOffset, 0x0000000000000000L);
+    // both leftBaseOffset and rightBaseOffset are not aligned,
+    // so it will start with byte-by-byte comparison
+    int result2 = binaryComparator.compare(arr3, arrayOffset, 8, arr4, arrayOffset, 8);
+
+    Assert.assertEquals(result1, result2);
   }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/streaming/JavaDataStreamReaderWriterSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/streaming/JavaDataStreamReaderWriterSuite.java
index 48cdb2642d830..5903623847f52 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/streaming/JavaDataStreamReaderWriterSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/streaming/JavaDataStreamReaderWriterSuite.java
@@ -18,6 +18,7 @@
 package test.org.apache.spark.sql.streaming;
 
 import java.io.File;
+import java.util.concurrent.TimeoutException;
 
 import org.junit.After;
 import org.junit.Before;
@@ -52,7 +53,7 @@ public void tearDown() {
   }
 
   @Test
-  public void testForeachBatchAPI() {
+  public void testForeachBatchAPI() throws TimeoutException {
     StreamingQuery query = spark
       .readStream()
       .textFile(input)
@@ -66,7 +67,7 @@ public void call(Dataset<String> v1, Long v2) throws Exception {}
   }
 
   @Test
-  public void testForeachAPI() {
+  public void testForeachAPI() throws TimeoutException {
     StreamingQuery query = spark
       .readStream()
       .textFile(input)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
new file mode 100644
index 0000000000000..d190f38345d6b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
@@ -0,0 +1,32 @@
+-- SPARK-23179: SQL ANSI 2011 states that in case of overflow during arithmetic operations,
+-- an exception should be thrown instead of returning NULL.
+-- This is what most of the SQL DBs do (eg. SQLServer, DB2).
+
+-- tests for decimals handling in operations
+create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet;
+
+insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
+  (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789);
+
+-- test operations between decimals and constants
+select id, a*10, b/10 from decimals_test order by id;
+
+-- test operations on constants
+select 10.3 * 3.0;
+select 10.3000 * 3.0;
+select 10.30000 * 30.0;
+select 10.300000000000000000 * 3.000000000000000000;
+select 10.300000000000000000 * 3.0000000000000000000;
+
+-- arithmetic operations causing an overflow throw exception
+select (5e36BD + 0.1) + 5e36BD;
+select (-4e36BD - 0.1) - 7e36BD;
+select 12345678901234567890.0 * 12345678901234567890.0;
+select 1e35BD / 0.1;
+
+-- arithmetic operations causing a precision loss throw exception
+select 123456789123456789.1234567890 * 1.123456789123456789;
+select 123456789123456789.1234567890 * 1.123456789123456789;
+select 12345678912345.123456789123 / 0.000000012345678;
+
+drop table decimals_test;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/higher-order-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/higher-order-functions.sql
new file mode 100644
index 0000000000000..1e2424fe47cad
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/higher-order-functions.sql
@@ -0,0 +1 @@
+--IMPORT higher-order-functions.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
index f2f4b02c8634b..215ce9658e1ad 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
@@ -1,188 +1 @@
--- Turns on ANSI mode
-SET spark.sql.parser.ansi.enabled=true;
-
-select
-  '1' second,
-  2  seconds,
-  '1' minute,
-  2  minutes,
-  '1' hour,
-  2  hours,
-  '1' day,
-  2  days,
-  '1' month,
-  2  months,
-  '1' year,
-  2  years;
-
-select
-  interval '10-11' year to month,
-  interval '10' year,
-  interval '11' month;
-
-select
-  '10-11' year to month,
-  '10' year,
-  '11' month;
-
-select
-  interval '10 9:8:7.987654321' day to second,
-  interval '10' day,
-  interval '11' hour,
-  interval '12' minute,
-  interval '13' second,
-  interval '13.123456789' second;
-
-select
-  '10 9:8:7.987654321' day to second,
-  '10' day,
-  '11' hour,
-  '12' minute,
-  '13' second,
-  '13.123456789' second;
-
-select map(1, interval 1 day, 2, interval 3 week);
-
-select map(1, 1 day, 2, 3 week);
-
--- Interval year-month arithmetic
-
-create temporary view interval_arithmetic as
-  select CAST(dateval AS date), CAST(tsval AS timestamp) from values
-    ('2012-01-01', '2012-01-01')
-    as interval_arithmetic(dateval, tsval);
-
-select
-  dateval,
-  dateval - interval '2-2' year to month,
-  dateval - interval '-2-2' year to month,
-  dateval + interval '2-2' year to month,
-  dateval + interval '-2-2' year to month,
-  - interval '2-2' year to month + dateval,
-  interval '2-2' year to month + dateval
-from interval_arithmetic;
-
-select
-  dateval,
-  dateval - '2-2' year to month,
-  dateval - '-2-2' year to month,
-  dateval + '2-2' year to month,
-  dateval + '-2-2' year to month,
-  - '2-2' year to month + dateval,
-  '2-2' year to month + dateval
-from interval_arithmetic;
-
-select
-  tsval,
-  tsval - interval '2-2' year to month,
-  tsval - interval '-2-2' year to month,
-  tsval + interval '2-2' year to month,
-  tsval + interval '-2-2' year to month,
-  - interval '2-2' year to month + tsval,
-  interval '2-2' year to month + tsval
-from interval_arithmetic;
-
-select
-  tsval,
-  tsval - '2-2' year to month,
-  tsval - '-2-2' year to month,
-  tsval + '2-2' year to month,
-  tsval + '-2-2' year to month,
-  - '2-2' year to month + tsval,
-  '2-2' year to month + tsval
-from interval_arithmetic;
-
-select
-  interval '2-2' year to month + interval '3-3' year to month,
-  interval '2-2' year to month - interval '3-3' year to month
-from interval_arithmetic;
-
-select
-  '2-2' year to month + '3-3' year to month,
-  '2-2' year to month - '3-3' year to month
-from interval_arithmetic;
-
--- Interval day-time arithmetic
-
-select
-  dateval,
-  dateval - interval '99 11:22:33.123456789' day to second,
-  dateval - interval '-99 11:22:33.123456789' day to second,
-  dateval + interval '99 11:22:33.123456789' day to second,
-  dateval + interval '-99 11:22:33.123456789' day to second,
-  -interval '99 11:22:33.123456789' day to second + dateval,
-  interval '99 11:22:33.123456789' day to second + dateval
-from interval_arithmetic;
-
-select
-  dateval,
-  dateval - '99 11:22:33.123456789' day to second,
-  dateval - '-99 11:22:33.123456789' day to second,
-  dateval + '99 11:22:33.123456789' day to second,
-  dateval + '-99 11:22:33.123456789' day to second,
-  - '99 11:22:33.123456789' day to second + dateval,
-  '99 11:22:33.123456789' day to second + dateval
-from interval_arithmetic;
-
-select
-  tsval,
-  tsval - interval '99 11:22:33.123456789' day to second,
-  tsval - interval '-99 11:22:33.123456789' day to second,
-  tsval + interval '99 11:22:33.123456789' day to second,
-  tsval + interval '-99 11:22:33.123456789' day to second,
-  -interval '99 11:22:33.123456789' day to second + tsval,
-  interval '99 11:22:33.123456789' day to second + tsval
-from interval_arithmetic;
-
-select
-  tsval,
-  tsval - '99 11:22:33.123456789' day to second,
-  tsval - '-99 11:22:33.123456789' day to second,
-  tsval + '99 11:22:33.123456789' day to second,
-  tsval + '-99 11:22:33.123456789' day to second,
-  - '99 11:22:33.123456789' day to second + tsval,
-  '99 11:22:33.123456789' day to second + tsval
-from interval_arithmetic;
-
-select
-  interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second,
-  interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second
-from interval_arithmetic;
-
-select
-  '99 11:22:33.123456789' day to second + '10 9:8:7.123456789' day to second,
-  '99 11:22:33.123456789' day to second - '10 9:8:7.123456789' day to second
-from interval_arithmetic;
-
--- More tests for interval syntax alternatives
-
-select 30 day;
-
-select 30 day day;
-
-select 30 day day day;
-
-select date '2012-01-01' - 30 day;
-
-select date '2012-01-01' - 30 day day;
-
-select date '2012-01-01' - 30 day day day;
-
-select date '2012-01-01' + '-30' day;
-
-select date '2012-01-01' + interval '-30' day;
-
--- Unsupported syntax for intervals
-
-select date '2012-01-01' + interval (-30) day;
-
-select date '2012-01-01' + (-30) day;
-
-create temporary view t as select * from values (1), (2) as t(a);
-
-select date '2012-01-01' + interval (a + 1) day from t;
-
-select date '2012-01-01' + (a + 1) day from t;
-
--- Turns off ANSI mode
-SET spark.sql.parser.ansi.enabled=false;
+--IMPORT interval.sql
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/literals.sql
new file mode 100644
index 0000000000000..698e8fa886307
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/literals.sql
@@ -0,0 +1,2 @@
+--- malformed interval literal with ansi mode
+--IMPORT literals.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql b/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql
new file mode 100644
index 0000000000000..5e665e4c0c384
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql
@@ -0,0 +1,70 @@
+-- test cases for bitwise functions
+
+-- null
+select bit_count(null);
+
+-- boolean
+select bit_count(true);
+select bit_count(false);
+
+-- byte/tinyint
+select bit_count(cast(1 as tinyint));
+select bit_count(cast(2 as tinyint));
+select bit_count(cast(3 as tinyint));
+
+-- short/smallint
+select bit_count(1S);
+select bit_count(2S);
+select bit_count(3S);
+
+-- int
+select bit_count(1);
+select bit_count(2);
+select bit_count(3);
+
+-- long/bigint
+select bit_count(1L);
+select bit_count(2L);
+select bit_count(3L);
+
+-- negative num
+select bit_count(-1L);
+
+-- edge value
+select bit_count(9223372036854775807L);
+select bit_count(-9223372036854775808L);
+
+-- other illegal arguments
+select bit_count("bit count");
+select bit_count('a');
+
+-- test for bit_xor
+--
+CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
+  (1, 1, 1, 1L),
+  (2, 3, 4, null),
+  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4);
+
+-- empty case
+SELECT BIT_XOR(b3) AS n1 FROM bitwise_test where 1 = 0;
+
+-- null case
+SELECT BIT_XOR(b4) AS n1 FROM bitwise_test where b4 is null;
+
+-- the suffix numbers show the expected answer
+SELECT
+ BIT_XOR(cast(b1 as tinyint))  AS a4,
+ BIT_XOR(cast(b2 as smallint))  AS b5,
+ BIT_XOR(b3)  AS c2,
+ BIT_XOR(b4)  AS d2,
+ BIT_XOR(distinct b4) AS e2
+FROM bitwise_test;
+
+-- group by
+SELECT bit_xor(b3) FROM bitwise_test GROUP BY b1 & 1;
+
+--having
+SELECT b1, bit_xor(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7;
+
+-- window
+SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
index 8a035f594be54..972ebdd01f61e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
@@ -60,3 +60,18 @@ DESC FUNCTION EXTENDED boolean;
 -- cast string to interval and interval to string
 SELECT CAST('interval 3 month 1 hour' AS interval);
 SELECT CAST(interval 3 month 1 hour AS string);
+
+-- trim string before cast to numeric
+select cast(' 1' as tinyint);
+select cast(' 1\t' as tinyint);
+select cast(' 1' as smallint);
+select cast(' 1' as INT);
+select cast(' 1' as bigint);
+select cast(' 1' as float);
+select cast(' 1 ' as DOUBLE);
+select cast('1.0 ' as DEC);
+
+-- trim string before cast to boolean
+select cast('\t\t true \n\r ' as boolean);
+select cast('\t\n false \t\r' as boolean);
+select cast('\t\n xyz \t\r' as boolean);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/change-column.sql b/sql/core/src/test/resources/sql-tests/inputs/change-column.sql
index 6f5ac221ce79c..2b57891cfcbc5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/change-column.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/change-column.sql
@@ -2,56 +2,48 @@
 CREATE TABLE test_change(a INT, b STRING, c INT) using parquet;
 DESC test_change;
 
--- Change column name (not supported yet)
-ALTER TABLE test_change CHANGE a a1 INT;
+-- ALTER TABLE CHANGE COLUMN must change either type or comment
+ALTER TABLE test_change CHANGE a;
+DESC test_change;
+
+-- Change column name (not supported on v1 table)
+ALTER TABLE test_change RENAME COLUMN a TO a1;
 DESC test_change;
 
 -- Change column dataType (not supported yet)
-ALTER TABLE test_change CHANGE a a STRING;
+ALTER TABLE test_change CHANGE a TYPE STRING;
 DESC test_change;
 
 -- Change column position (not supported yet)
-ALTER TABLE test_change CHANGE a a INT AFTER b;
-ALTER TABLE test_change CHANGE b b STRING FIRST;
+ALTER TABLE test_change CHANGE a AFTER b;
+ALTER TABLE test_change CHANGE b FIRST;
 DESC test_change;
 
 -- Change column comment
-ALTER TABLE test_change CHANGE a a INT COMMENT 'this is column a';
-ALTER TABLE test_change CHANGE b b STRING COMMENT '#*02?`';
-ALTER TABLE test_change CHANGE c c INT COMMENT '';
+ALTER TABLE test_change CHANGE a COMMENT 'this is column a';
+ALTER TABLE test_change CHANGE b COMMENT '#*02?`';
+ALTER TABLE test_change CHANGE c COMMENT '';
 DESC test_change;
 
 -- Don't change anything.
-ALTER TABLE test_change CHANGE a a INT COMMENT 'this is column a';
+ALTER TABLE test_change CHANGE a TYPE INT;
+ALTER TABLE test_change CHANGE a COMMENT 'this is column a';
 DESC test_change;
 
 -- Change a invalid column
-ALTER TABLE test_change CHANGE invalid_col invalid_col INT;
+ALTER TABLE test_change CHANGE invalid_col TYPE INT;
 DESC test_change;
 
--- Change column name/dataType/position/comment together (not supported yet)
-ALTER TABLE test_change CHANGE a a1 STRING COMMENT 'this is column a1' AFTER b;
-DESC test_change;
-
--- Check the behavior with different values of CASE_SENSITIVE
-SET spark.sql.caseSensitive=false;
-ALTER TABLE test_change CHANGE a A INT COMMENT 'this is column A';
-SET spark.sql.caseSensitive=true;
-ALTER TABLE test_change CHANGE a A INT COMMENT 'this is column A1';
+-- Check case insensitivity.
+ALTER TABLE test_change CHANGE A COMMENT 'case insensitivity';
 DESC test_change;
 
 -- Change column can't apply to a temporary/global_temporary view
 CREATE TEMPORARY VIEW temp_view(a, b) AS SELECT 1, "one";
-ALTER TABLE temp_view CHANGE a a INT COMMENT 'this is column a';
+ALTER TABLE temp_view CHANGE a TYPE INT;
 CREATE GLOBAL TEMPORARY VIEW global_temp_view(a, b) AS SELECT 1, "one";
-ALTER TABLE global_temp.global_temp_view CHANGE a a INT COMMENT 'this is column a';
-
--- Change column in partition spec (not supported yet)
-CREATE TABLE partition_table(a INT, b STRING, c INT, d STRING) USING parquet PARTITIONED BY (c, d);
-ALTER TABLE partition_table PARTITION (c = 1) CHANGE COLUMN a new_a INT;
-ALTER TABLE partition_table CHANGE COLUMN c c INT COMMENT 'this is column C';
+ALTER TABLE global_temp.global_temp_view CHANGE a TYPE INT;
 
 -- DROP TEST TABLE
 DROP TABLE test_change;
-DROP TABLE partition_table;
 DROP VIEW global_temp.global_temp_view;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/comparator.sql b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
index 3e2447723e576..70af4f75ac431 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/comparator.sql
@@ -1,3 +1,13 @@
 -- binary type
 select x'00' < x'0f';
 select x'00' < x'ff';
+
+-- trim string to numeric
+select '1 ' = 1Y;
+select '\t1 ' = 1Y;
+select '1 ' = 1S;
+select '1 ' = 1;
+select ' 1' = 1L;
+select ' 1' = cast(1.0 as float);
+select ' 1.0 ' = 1.0D;
+select ' 1.0 ' = 1.0BD;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql
new file mode 100644
index 0000000000000..b711bf338ab08
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte-nonlegacy.sql
@@ -0,0 +1,2 @@
+--SET spark.sql.legacy.ctePrecedence.enabled = false
+--IMPORT cte.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/date_part.sql b/sql/core/src/test/resources/sql-tests/inputs/date_part.sql
new file mode 100644
index 0000000000000..a63cdafb745a0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/date_part.sql
@@ -0,0 +1,145 @@
+CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c;
+
+select date_part('millennium', c) from t;
+select date_part('millennia', c) from t;
+select date_part('mil', c) from t;
+select date_part('mils', c) from t;
+
+select date_part('century', c) from t;
+select date_part('centuries', c) from t;
+select date_part('c', c) from t;
+select date_part('cent', c) from t;
+
+select date_part('decade', c) from t;
+select date_part('decades', c) from t;
+select date_part('dec', c) from t;
+select date_part('decs', c) from t;
+
+select date_part('year', c) from t;
+select date_part('y', c) from t;
+select date_part('years', c) from t;
+select date_part('yr', c) from t;
+select date_part('yrs', c) from t;
+
+select date_part('quarter', c) from t;
+select date_part('qtr', c) from t;
+
+select date_part('month', c) from t;
+select date_part('mon', c) from t;
+select date_part('mons', c) from t;
+select date_part('months', c) from t;
+
+select date_part('week', c) from t;
+select date_part('w', c) from t;
+select date_part('weeks', c) from t;
+
+select date_part('day', c) from t;
+select date_part('d', c) from t;
+select date_part('days', c) from t;
+
+select date_part('dayofweek', c) from t;
+
+select date_part('dow', c) from t;
+
+select date_part('isodow', c) from t;
+
+select date_part('doy', c) from t;
+
+select date_part('hour', c) from t;
+select date_part('h', c) from t;
+select date_part('hours', c) from t;
+select date_part('hr', c) from t;
+select date_part('hrs', c) from t;
+
+select date_part('minute', c) from t;
+select date_part('m', c) from t;
+select date_part('min', c) from t;
+select date_part('mins', c) from t;
+select date_part('minutes', c) from t;
+
+select date_part('second', c) from t;
+select date_part('s', c) from t;
+select date_part('sec', c) from t;
+select date_part('seconds', c) from t;
+select date_part('secs', c) from t;
+
+select date_part('not_supported', c) from t;
+
+select date_part(c, c) from t;
+
+select date_part(null, c) from t;
+
+CREATE TEMPORARY VIEW t2 AS select interval 1010 year 9 month 8 day 7 hour 6 minute 5 second 4 millisecond 3 microsecond as c;
+
+select date_part('millennium', c) from t2;
+select date_part('millennia', c) from t2;
+select date_part('mil', c) from t2;
+select date_part('mils', c) from t2;
+
+select date_part('century', c) from t2;
+select date_part('centuries', c) from t2;
+select date_part('c', c) from t2;
+select date_part('cent', c) from t2;
+
+select date_part('decade', c) from t2;
+select date_part('decades', c) from t2;
+select date_part('dec', c) from t2;
+select date_part('decs', c) from t2;
+
+select date_part('year', c) from t2;
+select date_part('y', c) from t2;
+select date_part('years', c) from t2;
+select date_part('yr', c) from t2;
+select date_part('yrs', c) from t2;
+
+select date_part('quarter', c) from t2;
+select date_part('qtr', c) from t2;
+
+select date_part('month', c) from t2;
+select date_part('mon', c) from t2;
+select date_part('mons', c) from t2;
+select date_part('months', c) from t2;
+
+select date_part('day', c) from t2;
+select date_part('d', c) from t2;
+select date_part('days', c) from t2;
+
+select date_part('hour', c) from t2;
+select date_part('h', c) from t2;
+select date_part('hours', c) from t2;
+select date_part('hr', c) from t2;
+select date_part('hrs', c) from t2;
+
+select date_part('minute', c) from t2;
+select date_part('m', c) from t2;
+select date_part('min', c) from t2;
+select date_part('mins', c) from t2;
+select date_part('minutes', c) from t2;
+
+select date_part('second', c) from t2;
+select date_part('s', c) from t2;
+select date_part('sec', c) from t2;
+select date_part('seconds', c) from t2;
+select date_part('secs', c) from t2;
+
+select date_part('milliseconds', c) from t2;
+select date_part('msec', c) from t2;
+select date_part('msecs', c) from t2;
+select date_part('millisecon', c) from t2;
+select date_part('mseconds', c) from t2;
+select date_part('ms', c) from t2;
+
+select date_part('microseconds', c) from t2;
+select date_part('usec', c) from t2;
+select date_part('usecs', c) from t2;
+select date_part('useconds', c) from t2;
+select date_part('microsecon', c) from t2;
+select date_part('us', c) from t2;
+
+select date_part('epoch', c) from t2;
+
+select date_part('not_supported', c) from t2;
+
+select date_part(c, c) from t2;
+
+select date_part(null, c) from t2;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index 2f7ffb73e86b8..b14778b91510e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -30,7 +30,48 @@ select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), week
 
 select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01');
 
-select date '2001-09-28' + 7;
-select 7 + date '2001-09-28';
+
+select date '2019-01-01\t';
+select timestamp '2019-01-01\t';
+
+-- time add/sub
+select timestamp'2011-11-11 11:11:11' + interval '2' day;
+select timestamp'2011-11-11 11:11:11' - interval '2' day;
+select date'2011-11-11 11:11:11' + interval '2' second;
+select date'2011-11-11 11:11:11' - interval '2' second;
+select '2011-11-11' - interval '2' day;
+select '2011-11-11 11:11:11' - interval '2' second;
+select '1' - interval '2' second;
+select 1 - interval '2' second;
+
+-- subtract timestamps
+select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678';
+select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01';
+select timestamp'2019-10-06 10:11:12.345678' - null;
+select null - timestamp'2019-10-06 10:11:12.345678';
+
+-- date add/sub
+select date_add('2011-11-11', 1Y);
+select date_add('2011-11-11', 1S);
+select date_add('2011-11-11', 1);
+select date_add('2011-11-11', 1L);
+select date_add('2011-11-11', 1.0);
+select date_add('2011-11-11', 1E1);
+select date_add('2011-11-11', '1');
+select date_add(date'2011-11-11', 1);
+select date_add(timestamp'2011-11-11', 1);
+select date_sub(date'2011-11-11', 1);
+select date_sub(timestamp'2011-11-11', 1);
+select date_sub(null, 1);
+select date_sub(date'2011-11-11', null);
+select date'2011-11-11' + 1E1;
+select null + date '2001-09-28';
+select date '2001-09-28' + 7Y;
+select 7S + date '2001-09-28';
 select date '2001-10-01' - 7;
+select date '2001-09-28' + null;
+select date '2001-09-28' - null;
+
+-- subtract dates
+select null - date '2019-10-06';
 select date '2001-10-01' - date '2001-09-28';
diff --git a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
index 35f2be46cd130..a3bc282cd6ae8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
@@ -43,11 +43,11 @@ select 10.300000000000000000 * 3.0000000000000000000;
 select 2.35E10 * 1.0;
 
 -- arithmetic operations causing an overflow return NULL
-select (5e36 + 0.1) + 5e36;
-select (-4e36 - 0.1) - 7e36;
+select (5e36BD + 0.1) + 5e36BD;
+select (-4e36BD - 0.1) - 7e36BD;
 select 12345678901234567890.0 * 12345678901234567890.0;
-select 1e35 / 0.1;
-select 1.2345678901234567890E30 * 1.2345678901234567890E25;
+select 1e35BD / 0.1;
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD;
 
 -- arithmetic operations causing a precision loss are truncated
 select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345;
@@ -72,39 +72,15 @@ select 10.300000000000000000 * 3.0000000000000000000;
 select 2.35E10 * 1.0;
 
 -- arithmetic operations causing an overflow return NULL
-select (5e36 + 0.1) + 5e36;
-select (-4e36 - 0.1) - 7e36;
+select (5e36BD + 0.1) + 5e36BD;
+select (-4e36BD - 0.1) - 7e36BD;
 select 12345678901234567890.0 * 12345678901234567890.0;
-select 1e35 / 0.1;
-select 1.2345678901234567890E30 * 1.2345678901234567890E25;
+select 1e35BD / 0.1;
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD;
 
 -- arithmetic operations causing a precision loss return NULL
 select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345;
 select 123456789123456789.1234567890 * 1.123456789123456789;
 select 12345678912345.123456789123 / 0.000000012345678;
 
--- throw an exception instead of returning NULL, according to SQL ANSI 2011
-set spark.sql.decimalOperations.nullOnOverflow=false;
-
--- test operations between decimals and constants
-select id, a*10, b/10 from decimals_test order by id;
-
--- test operations on constants
-select 10.3 * 3.0;
-select 10.3000 * 3.0;
-select 10.30000 * 30.0;
-select 10.300000000000000000 * 3.000000000000000000;
-select 10.300000000000000000 * 3.0000000000000000000;
-
--- arithmetic operations causing an overflow throw exception
-select (5e36 + 0.1) + 5e36;
-select (-4e36 - 0.1) - 7e36;
-select 12345678901234567890.0 * 12345678901234567890.0;
-select 1e35 / 0.1;
-
--- arithmetic operations causing a precision loss throw exception
-select 123456789123456789.1234567890 * 1.123456789123456789;
-select 123456789123456789.1234567890 * 1.123456789123456789;
-select 12345678912345.123456789123 / 0.000000012345678;
-
 drop table decimals_test;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
index 2d180d118da7a..821cb473751eb 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
@@ -49,3 +49,16 @@ DROP VIEW desc_col_temp_view;
 DROP TABLE desc_col_table;
 
 DROP TABLE desc_complex_col_table;
+
+--Test case insensitive
+
+CREATE TABLE customer(CName STRING);
+
+INSERT INTO customer VALUES('Maria');
+
+ANALYZE TABLE customer COMPUTE STATISTICS FOR COLUMNS cname;
+
+DESC EXTENDED customer cname;
+
+DROP TABLE customer;
+
diff --git a/sql/core/src/test/resources/sql-tests/inputs/explain.sql b/sql/core/src/test/resources/sql-tests/inputs/explain.sql
index 773c123992f71..497b61c6134a2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/explain.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/explain.sql
@@ -1,7 +1,11 @@
+--SET spark.sql.codegen.wholeStage = true
+--SET spark.sql.adaptive.enabled = false
+
 -- Test tables
 CREATE table  explain_temp1 (key int, val int) USING PARQUET;
 CREATE table  explain_temp2 (key int, val int) USING PARQUET;
 CREATE table  explain_temp3 (key int, val int) USING PARQUET;
+CREATE table  explain_temp4 (key int, val string) USING PARQUET;
 
 SET spark.sql.codegen.wholeStage = true;
 
@@ -58,7 +62,7 @@ EXPLAIN FORMATTED
                 FROM   explain_temp2 
                 WHERE  val > 0) 
          OR
-         key = (SELECT max(key) 
+         key = (SELECT avg(key)
                 FROM   explain_temp3
                 WHERE  val > 0);
 
@@ -90,6 +94,25 @@ EXPLAIN FORMATTED
   CREATE VIEW explain_view AS
     SELECT key, val FROM explain_temp1;
 
+-- HashAggregate
+EXPLAIN FORMATTED
+  SELECT
+    COUNT(val) + SUM(key) as TOTAL,
+    COUNT(key) FILTER (WHERE val > 1)
+  FROM explain_temp1;
+
+-- ObjectHashAggregate
+EXPLAIN FORMATTED
+  SELECT key, sort_array(collect_set(val))[0]
+  FROM explain_temp4
+  GROUP BY key;
+
+-- SortAggregate
+EXPLAIN FORMATTED
+  SELECT key, MIN(val)
+  FROM explain_temp4
+  GROUP BY key;
+
 -- cleanup
 DROP TABLE explain_temp1;
 DROP TABLE explain_temp2;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql
new file mode 100644
index 0000000000000..beb5b9e5fe516
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql
@@ -0,0 +1,132 @@
+-- Test filter clause for aggregate expression.
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b);
+
+CREATE OR REPLACE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (200, "emp 2", date "2003-01-01", 200.00D, 10),
+  (300, "emp 3", date "2002-01-01", 300.00D, 20),
+  (400, "emp 4", date "2005-01-01", 400.00D, 30),
+  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
+  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
+  (700, "emp 7", date "2010-01-01", 400.00D, 100),
+  (800, "emp 8", date "2016-01-01", 150.00D, 70)
+AS EMP(id, emp_name, hiredate, salary, dept_id);
+
+CREATE OR REPLACE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
+  (10, "dept 1", "CA"),
+  (20, "dept 2", "NY"),
+  (30, "dept 3", "TX"),
+  (40, "dept 4 - unassigned", "OR"),
+  (50, "dept 5 - unassigned", "NJ"),
+  (70, "dept 7", "FL")
+AS DEPT(dept_id, dept_name, state);
+
+-- Aggregate with filter and empty GroupBy expressions.
+SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData;
+SELECT COUNT(a) FILTER (WHERE a = 1), COUNT(b) FILTER (WHERE a > 1) FROM testData;
+SELECT COUNT(id) FILTER (WHERE hiredate = date "2001-01-01") FROM emp;
+SELECT COUNT(id) FILTER (WHERE hiredate = to_date('2001-01-01 00:00:00')) FROM emp;
+SELECT COUNT(id) FILTER (WHERE hiredate = to_timestamp("2001-01-01 00:00:00")) FROM emp;
+SELECT COUNT(id) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd") = "2001-01-01") FROM emp;
+-- [SPARK-30276] Support Filter expression allows simultaneous use of DISTINCT
+-- SELECT COUNT(DISTINCT id) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd HH:mm:ss") = "2001-01-01 00:00:00") FROM emp;
+
+-- Aggregate with filter and non-empty GroupBy expressions.
+SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData GROUP BY a;
+SELECT a, COUNT(b) FILTER (WHERE a != 2) FROM testData GROUP BY b;
+SELECT COUNT(a) FILTER (WHERE a >= 0), COUNT(b) FILTER (WHERE a >= 3) FROM testData GROUP BY a;
+SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > date "2003-01-01") FROM emp GROUP BY dept_id;
+SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > to_date("2003-01-01")) FROM emp GROUP BY dept_id;
+SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > to_timestamp("2003-01-01 00:00:00")) FROM emp GROUP BY dept_id;
+SELECT dept_id, SUM(salary) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd") > "2003-01-01") FROM emp GROUP BY dept_id;
+-- [SPARK-30276] Support Filter expression allows simultaneous use of DISTINCT
+-- SELECT dept_id, SUM(DISTINCT salary) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd HH:mm:ss") > "2001-01-01 00:00:00") FROM emp GROUP BY dept_id;
+
+-- Aggregate with filter and grouped by literals.
+SELECT 'foo', COUNT(a) FILTER (WHERE b <= 2) FROM testData GROUP BY 1;
+SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= date "2003-01-01") FROM emp GROUP BY 1;
+SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= to_date("2003-01-01")) FROM emp GROUP BY 1;
+SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= to_timestamp("2003-01-01")) FROM emp GROUP BY 1;
+
+-- Aggregate with filter, more than one aggregate function goes with distinct.
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary), sum(salary) filter (where id > 200) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary), sum(salary) filter (where id + dept_id > 500) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary) filter (where salary < 400.00D), sum(salary) filter (where id > 200) from emp group by dept_id;
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary) filter (where salary < 400.00D), sum(salary) filter (where id + dept_id > 500) from emp group by dept_id;
+-- [SPARK-30276] Support Filter expression allows simultaneous use of DISTINCT
+-- select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct hiredate), sum(salary) from emp group by dept_id;
+-- select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct hiredate) filter (where hiredate > date "2003-01-01"), sum(salary) from emp group by dept_id;
+-- select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct hiredate) filter (where hiredate > date "2003-01-01"), sum(salary) filter (where salary < 400.00D) from emp group by dept_id;
+-- select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct hiredate) filter (where hiredate > date "2003-01-01"), sum(salary) filter (where salary < 400.00D), sum(salary) filter (where id > 200) from emp group by dept_id;
+-- select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct emp_name), sum(salary) from emp group by dept_id;
+-- select dept_id, count(distinct emp_name) filter (where id > 200), count(distinct emp_name) filter (where hiredate > date "2003-01-01"), sum(salary) from emp group by dept_id;
+
+-- Aggregate with filter and grouped by literals (hash aggregate), here the input table is filtered using WHERE.
+SELECT 'foo', APPROX_COUNT_DISTINCT(a) FILTER (WHERE b >= 0) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate with filter and grouped by literals (sort aggregate), here the input table is filtered using WHERE.
+SELECT 'foo', MAX(STRUCT(a)) FILTER (WHERE b >= 1) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate with filter and complex GroupBy expressions.
+SELECT a + b, COUNT(b) FILTER (WHERE b >= 2) FROM testData GROUP BY a + b;
+SELECT a + 2, COUNT(b) FILTER (WHERE b IN (1, 2)) FROM testData GROUP BY a + 1;
+SELECT a + 1 + 1, COUNT(b) FILTER (WHERE b > 0) FROM testData GROUP BY a + 1;
+
+-- Aggregate with filter, foldable input and multiple distinct groups.
+-- [SPARK-30276] Support Filter expression allows simultaneous use of DISTINCT
+-- SELECT COUNT(DISTINCT b) FILTER (WHERE b > 0), COUNT(DISTINCT b, c) FILTER (WHERE b > 0 AND c > 2)
+-- FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
+
+-- Check analysis exceptions
+SELECT a AS k, COUNT(b) FILTER (WHERE b > 0) FROM testData GROUP BY k;
+
+-- Aggregate with filter contains exists subquery
+SELECT emp.dept_id,
+       avg(salary),
+       avg(salary) FILTER (WHERE id > (SELECT 200))
+FROM emp
+GROUP BY dept_id;
+
+SELECT emp.dept_id,
+       avg(salary),
+       avg(salary) FILTER (WHERE emp.dept_id = (SELECT dept_id FROM dept LIMIT 1))
+FROM emp
+GROUP BY dept_id;
+
+-- [SPARK-30220] Support Filter expression uses IN/EXISTS predicate sub-queries
+SELECT emp.dept_id,
+       avg(salary),
+       avg(salary) FILTER (WHERE EXISTS (SELECT state
+               FROM dept
+               WHERE dept.dept_id = emp.dept_id))
+FROM emp
+GROUP BY dept_id;
+
+SELECT emp.dept_id, 
+       Sum(salary),
+       Sum(salary) FILTER (WHERE NOT EXISTS (SELECT state 
+                   FROM dept 
+                   WHERE dept.dept_id = emp.dept_id))
+FROM emp 
+GROUP BY dept_id; 
+
+SELECT emp.dept_id, 
+       avg(salary),
+       avg(salary) FILTER (WHERE emp.dept_id IN (SELECT DISTINCT dept_id
+               FROM dept))
+FROM emp 
+GROUP BY dept_id; 
+SELECT emp.dept_id, 
+       Sum(salary),
+       Sum(salary) FILTER (WHERE emp.dept_id NOT IN (SELECT DISTINCT dept_id
+               FROM dept))
+FROM emp 
+GROUP BY dept_id; 
+
+-- Aggregate with filter is subquery
+SELECT t1.b FROM (SELECT COUNT(b) FILTER (WHERE a >= 2) AS b FROM testData) t1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 66bc90914e0d4..fedf03d774e42 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -1,3 +1,8 @@
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 -- Test data.
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
@@ -90,16 +95,16 @@ CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
   (5, null), (5, true), (5, false) AS test_agg(k, v);
 
 -- empty table
-SELECT every(v), some(v), any(v) FROM test_agg WHERE 1 = 0;
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0;
 
 -- all null values
-SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 4;
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4;
 
 -- aggregates are null Filtering
-SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 5;
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5;
 
 -- group by
-SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k;
+SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k;
 
 -- having
 SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false;
@@ -137,10 +142,18 @@ SELECT any(1L);
 -- input type checking String
 SELECT every("true");
 
--- every/some/any aggregates are supported as windows expression.
+-- input type checking Decimal
+SELECT bool_and(1.0);
+
+-- input type checking double
+SELECT bool_or(1.0D);
+
+-- every/some/any aggregates/bool_and/bool_or are supported as windows expression.
 SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
 SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
 SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
+SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg;
 
 -- Having referencing aggregate expressions is ok.
 SELECT count(*) FROM test_agg HAVING count(*) > 1L;
@@ -153,4 +166,3 @@ SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L;
 SELECT count(*) FROM test_agg WHERE count(*) > 1L;
 SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L;
 SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1;
-
diff --git a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
index 6bbde9f38d657..d30914fdd92df 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
@@ -51,3 +51,9 @@ SELECT a, b, c, count(d) FROM grouping GROUP BY WITH CUBE;
 
 SELECT c1 FROM (values (1,2), (3,2)) t(c1, c2) GROUP BY GROUPING SETS (());
 
+-- duplicate entries in grouping sets
+SELECT k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+SELECT grouping__id, k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+SELECT grouping(k1), k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
index 02ad5e3538689..cfa06aea82b04 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
@@ -1,3 +1,8 @@
+-- Test higher order functions with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 create or replace temporary view nested as values
   (1, array(32, 97), array(array(12, 99), array(123, 42), array(1))),
   (2, array(77, -76), array(array(6, 96, 65), array(-1, -2))),
@@ -83,3 +88,7 @@ select transform_values(ys, (k, v) -> v + 1) as v from nested;
 
 -- Transform values in a map using values
 select transform_values(ys, (k, v) -> k + v) as v from nested;
+
+-- use non reversed keywords: all is non reversed only if !ansi
+select transform(ys, all -> all * all) as v from values (array(32, 97)) as t(ys);
+select transform(ys, (all, i) -> all + i) as v from values (array(32, 97)) as t(ys);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql b/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql
index 38739cb950582..5623161839331 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/inner-join.sql
@@ -1,3 +1,15 @@
+-- There are 2 dimensions we want to test
+--  1. run with broadcast hash join, sort merge join or shuffle hash join.
+--  2. run with whole-stage-codegen, operator codegen or no codegen.
+
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=10485760
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a);
 CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
new file mode 100644
index 0000000000000..a4e621e9639d4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -0,0 +1,234 @@
+-- test for intervals
+
+-- multiply and divide an interval by a number
+select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15');
+select interval 4 month 2 weeks 3 microseconds * 1.5;
+select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5;
+
+-- interval operation with null and zero case
+select interval '2 seconds' / 0;
+select interval '2 seconds' / null;
+select interval '2 seconds' * null;
+select null * interval '2 seconds';
+
+-- interval with a positive/negative sign
+select -interval '-1 month 1 day -1 second';
+select -interval -1 month 1 day -1 second;
+select +interval '-1 month 1 day -1 second';
+select +interval -1 month 1 day -1 second;
+
+-- make intervals
+select make_interval(1);
+select make_interval(1, 2);
+select make_interval(1, 2, 3);
+select make_interval(1, 2, 3, 4);
+select make_interval(1, 2, 3, 4, 5);
+select make_interval(1, 2, 3, 4, 5, 6);
+select make_interval(1, 2, 3, 4, 5, 6, 7.008009);
+
+-- cast string to intervals
+select cast('1 second' as interval);
+select cast('+1 second' as interval);
+select cast('-1 second' as interval);
+select cast('+     1 second' as interval);
+select cast('-     1 second' as interval);
+select cast('- -1 second' as interval);
+select cast('- +1 second' as interval);
+
+-- interval literal
+select interval 13.123456789 seconds, interval -13.123456789 second;
+select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond;
+select interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second;
+select interval '0 0:0:0.1' day to second;
+select interval '10-9' year to month;
+select interval '20 15' day to hour;
+select interval '20 15:40' day to minute;
+select interval '20 15:40:32.99899999' day to second;
+select interval '15:40' hour to minute;
+select interval '15:40:32.99899999' hour to second;
+select interval '40:32.99899999' minute to second;
+select interval '40:32' minute to second;
+select interval 30 day day;
+
+-- invalid day-time string intervals
+select interval '20 15:40:32.99899999' day to hour;
+select interval '20 15:40:32.99899999' day to minute;
+select interval '15:40:32.99899999' hour to minute;
+select interval '15:40.99899999' hour to second;
+select interval '15:40' hour to second;
+select interval '20 40:32.99899999' minute to second;
+
+-- ns is not supported
+select interval 10 nanoseconds;
+
+-- map + interval test
+select map(1, interval 1 day, 2, interval 3 week);
+
+-- typed interval expression
+select interval 'interval 3 year 1 hour';
+select interval '3 year 1 hour';
+
+-- malformed interval literal
+select interval;
+select interval 1 fake_unit;
+select interval 1 year to month;
+select interval '1' year to second;
+select interval '10-9' year to month '2-1' year to month;
+select interval '10-9' year to month '12:11:10' hour to second;
+select interval '1 15:11' day to minute '12:11:10' hour to second;
+select interval 1 year '2-1' year to month;
+select interval 1 year '12:11:10' hour to second;
+select interval '10-9' year to month '1' year;
+select interval '12:11:10' hour to second '1' year;
+select interval (-30) day;
+select interval (a + 1) day;
+select interval 30 day day day;
+
+-- sum interval values
+-- null
+select sum(cast(null as interval));
+
+-- empty set
+select sum(cast(v as interval)) from VALUES ('1 seconds') t(v) where 1=0;
+
+-- basic interval sum
+select sum(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v);
+select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v);
+select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v);
+select sum(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v);
+
+-- group by
+select
+    i,
+    sum(cast(v as interval))
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+group by i;
+
+-- having
+select
+    sum(cast(v as interval)) as sv
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+having sv is not null;
+
+-- window
+SELECT
+    i,
+    sum(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+FROM VALUES(1, '1 seconds'), (1, '2 seconds'), (2, NULL), (2, NULL) t(i,v);
+
+-- average with interval type
+-- null
+select avg(cast(v as interval)) from VALUES (null) t(v);
+
+-- empty set
+select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v) where 1=0;
+
+-- basic interval avg
+select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v);
+select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v);
+select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v);
+select avg(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v);
+
+-- group by
+select
+    i,
+    avg(cast(v as interval))
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+group by i;
+
+-- having
+select
+    avg(cast(v as interval)) as sv
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+having sv is not null;
+
+-- window
+SELECT
+    i,
+    avg(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+FROM VALUES (1,'1 seconds'), (1,'2 seconds'), (2,NULL), (2,NULL) t(i,v);
+
+-- Interval year-month arithmetic
+
+create temporary view interval_arithmetic as
+  select CAST(dateval AS date), CAST(tsval AS timestamp) from values
+    ('2012-01-01', '2012-01-01')
+    as interval_arithmetic(dateval, tsval);
+
+select
+  dateval,
+  dateval - interval '2-2' year to month,
+  dateval - interval '-2-2' year to month,
+  dateval + interval '2-2' year to month,
+  dateval + interval '-2-2' year to month,
+  - interval '2-2' year to month + dateval,
+  interval '2-2' year to month + dateval
+from interval_arithmetic;
+
+select
+  tsval,
+  tsval - interval '2-2' year to month,
+  tsval - interval '-2-2' year to month,
+  tsval + interval '2-2' year to month,
+  tsval + interval '-2-2' year to month,
+  - interval '2-2' year to month + tsval,
+  interval '2-2' year to month + tsval
+from interval_arithmetic;
+
+select
+  interval '2-2' year to month + interval '3-3' year to month,
+  interval '2-2' year to month - interval '3-3' year to month
+from interval_arithmetic;
+
+-- Interval day-time arithmetic
+
+select
+  dateval,
+  dateval - interval '99 11:22:33.123456789' day to second,
+  dateval - interval '-99 11:22:33.123456789' day to second,
+  dateval + interval '99 11:22:33.123456789' day to second,
+  dateval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + dateval,
+  interval '99 11:22:33.123456789' day to second + dateval
+from interval_arithmetic;
+
+select
+  tsval,
+  tsval - interval '99 11:22:33.123456789' day to second,
+  tsval - interval '-99 11:22:33.123456789' day to second,
+  tsval + interval '99 11:22:33.123456789' day to second,
+  tsval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + tsval,
+  interval '99 11:22:33.123456789' day to second + tsval
+from interval_arithmetic;
+
+select
+  interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second,
+  interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second
+from interval_arithmetic;
+
+-- control characters as white spaces
+select interval '\t interval 1 day';
+select interval 'interval \t 1\tday';
+select interval 'interval\t1\tday';
+select interval '1\t' day;
+select interval '1 ' day;
+
+-- interval overflow if (ansi) exception else NULL
+select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b);
+select a - b from values (interval '-2147483648 months', interval '2147483647 months') t(a, b);
+select b + interval '1 month' from values (interval '-2147483648 months', interval '2147483647 months') t(a, b);
+select a * 1.1 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b);
+select a / 0.5 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b);
+
+-- interval support for csv and json functions
+SELECT
+  from_csv('1, 1 day', 'a INT, b interval'),
+  to_csv(from_csv('1, 1 day', 'a INT, b interval')),
+  to_csv(named_struct('a', interval 32 month, 'b', interval 70 minute)),
+  from_csv(to_csv(named_struct('a', interval 32 month, 'b', interval 70 minute)), 'a interval, b interval');
+SELECT
+  from_json('{"a":"1 days"}', 'a interval'),
+  to_json(from_json('{"a":"1 days"}', 'a interval')),
+  to_json(map('a', interval 25 month 100 day 130 minute)),
+  from_json(to_json(map('a', interval 25 month 100 day 130 minute)), 'a interval');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql b/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql
index 2e6a5f362a8fa..8afa3270f4de4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql
@@ -1,8 +1,3 @@
--- List of configuration the test suite is run against:
---SET spark.sql.autoBroadcastJoinThreshold=10485760
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
-
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a);
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
index 816386c483209..108cfd766af2c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
@@ -82,12 +82,6 @@ select tImEstAmp '2016-03-11 20:54:00.000';
 -- invalid timestamp
 select timestamp '2016-33-11 20:54:00.000';
 
--- interval
-select interval 13.123456789 seconds, interval -13.123456789 second;
-select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond, 9 microsecond;
--- ns is not supported
-select interval 10 nanoseconds;
-
 -- unsupported data type
 select GEO '(10,-6)';
 
@@ -106,9 +100,15 @@ select X'XuZ';
 -- Hive literal_double test.
 SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8;
 
--- map + interval test
-select map(1, interval 1 day, 2, interval 3 week);
-
--- typed interval expression
-select interval 'interval 3 year 1 hour';
-select interval '3 year 1 hour';
+-- awareness of the negative/positive sign before type
+select +date '1999-01-01';
+select +timestamp '1999-01-01';
+select +interval '1 day';
+select +map(1, 2);
+select +array(1,2);
+select +named_struct('a', 1, 'b', 'spark');
+select +X'1';
+-- can't negate date/timestamp/binary
+select -date '1999-01-01';
+select -timestamp '1999-01-01';
+select -x'2379ACFe';
diff --git a/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 0000000000000..95f71925e9294
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,10 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark'));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
index e0abeda3eb44f..71a50157b766c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
@@ -1,8 +1,3 @@
--- List of configuration the test suite is run against:
---SET spark.sql.autoBroadcastJoinThreshold=10485760
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
-
 create temporary view nt1 as select * from values
   ("one", 1),
   ("two", 2),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/order-by-nulls-ordering.sql b/sql/core/src/test/resources/sql-tests/inputs/order-by-nulls-ordering.sql
index f7637b444b9fe..ad3977465c835 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/order-by-nulls-ordering.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/order-by-nulls-ordering.sql
@@ -1,3 +1,8 @@
+-- Test sort operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 -- Q1. testing window functions with order by
 create table spark_10747(col1 int, col2 int, col3 int) using parquet;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql
index ce09c21568f13..ceb438ec34b2d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql
@@ -1,7 +1,14 @@
--- List of configuration the test suite is run against:
---SET spark.sql.autoBroadcastJoinThreshold=10485760
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+-- There are 2 dimensions we want to test
+--  1. run with broadcast hash join, sort merge join or shuffle hash join.
+--  2. run with whole-stage-codegen, operator codegen or no codegen.
+
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=10485760
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
 
 -- SPARK-17099: Incorrect result when HAVING clause is added to group by query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
@@ -29,9 +36,6 @@ CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (97) as t1(int_col1)
 
 CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (0) as t2(int_col1);
 
--- Set the cross join enabled flag for the LEFT JOIN test since there's no join condition.
--- Ultimately the join should be optimized away.
-set spark.sql.crossJoin.enabled = true;
 SELECT *
 FROM (
 SELECT
@@ -39,6 +43,3 @@ SELECT
     FROM t1
     LEFT JOIN t2 ON false
 ) t where (t.int_col) is not null;
-set spark.sql.crossJoin.enabled = false;
-
-
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/numeric.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/numeric.sql
deleted file mode 100644
index c447a0dc2c7f2..0000000000000
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/numeric.sql
+++ /dev/null
@@ -1,1096 +0,0 @@
---
--- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
---
---
--- NUMERIC
--- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/numeric.sql
---
-
--- [SPARK-28318] Decimal can only support precision up to 38. We rewrite numeric(210,10) to decimal(38,10).
-CREATE TABLE num_data (id int, val decimal(38,10)) USING parquet;
-CREATE TABLE num_exp_add (id1 int, id2 int, expected decimal(38,10)) USING parquet;
-CREATE TABLE num_exp_sub (id1 int, id2 int, expected decimal(38,10)) USING parquet;
-CREATE TABLE num_exp_div (id1 int, id2 int, expected decimal(38,10)) USING parquet;
-CREATE TABLE num_exp_mul (id1 int, id2 int, expected decimal(38,10)) USING parquet;
-CREATE TABLE num_exp_sqrt (id int, expected decimal(38,10)) USING parquet;
-CREATE TABLE num_exp_ln (id int, expected decimal(38,10)) USING parquet;
-CREATE TABLE num_exp_log10 (id int, expected decimal(38,10)) USING parquet;
-CREATE TABLE num_exp_power_10_ln (id int, expected decimal(38,10)) USING parquet;
-
-CREATE TABLE num_result (id1 int, id2 int, result decimal(38,10)) USING parquet;
-
-
--- ******************************
--- * The following EXPECTED results are computed by bc(1)
--- * with a scale of 200
--- ******************************
-
--- BEGIN TRANSACTION;
-INSERT INTO num_exp_add VALUES (0,0,'0');
-INSERT INTO num_exp_sub VALUES (0,0,'0');
-INSERT INTO num_exp_mul VALUES (0,0,'0');
-INSERT INTO num_exp_div VALUES (0,0,'NaN');
-INSERT INTO num_exp_add VALUES (0,1,'0');
-INSERT INTO num_exp_sub VALUES (0,1,'0');
-INSERT INTO num_exp_mul VALUES (0,1,'0');
-INSERT INTO num_exp_div VALUES (0,1,'NaN');
-INSERT INTO num_exp_add VALUES (0,2,'-34338492.215397047');
-INSERT INTO num_exp_sub VALUES (0,2,'34338492.215397047');
-INSERT INTO num_exp_mul VALUES (0,2,'0');
-INSERT INTO num_exp_div VALUES (0,2,'0');
-INSERT INTO num_exp_add VALUES (0,3,'4.31');
-INSERT INTO num_exp_sub VALUES (0,3,'-4.31');
-INSERT INTO num_exp_mul VALUES (0,3,'0');
-INSERT INTO num_exp_div VALUES (0,3,'0');
-INSERT INTO num_exp_add VALUES (0,4,'7799461.4119');
-INSERT INTO num_exp_sub VALUES (0,4,'-7799461.4119');
-INSERT INTO num_exp_mul VALUES (0,4,'0');
-INSERT INTO num_exp_div VALUES (0,4,'0');
-INSERT INTO num_exp_add VALUES (0,5,'16397.038491');
-INSERT INTO num_exp_sub VALUES (0,5,'-16397.038491');
-INSERT INTO num_exp_mul VALUES (0,5,'0');
-INSERT INTO num_exp_div VALUES (0,5,'0');
-INSERT INTO num_exp_add VALUES (0,6,'93901.57763026');
-INSERT INTO num_exp_sub VALUES (0,6,'-93901.57763026');
-INSERT INTO num_exp_mul VALUES (0,6,'0');
-INSERT INTO num_exp_div VALUES (0,6,'0');
-INSERT INTO num_exp_add VALUES (0,7,'-83028485');
-INSERT INTO num_exp_sub VALUES (0,7,'83028485');
-INSERT INTO num_exp_mul VALUES (0,7,'0');
-INSERT INTO num_exp_div VALUES (0,7,'0');
-INSERT INTO num_exp_add VALUES (0,8,'74881');
-INSERT INTO num_exp_sub VALUES (0,8,'-74881');
-INSERT INTO num_exp_mul VALUES (0,8,'0');
-INSERT INTO num_exp_div VALUES (0,8,'0');
-INSERT INTO num_exp_add VALUES (0,9,'-24926804.045047420');
-INSERT INTO num_exp_sub VALUES (0,9,'24926804.045047420');
-INSERT INTO num_exp_mul VALUES (0,9,'0');
-INSERT INTO num_exp_div VALUES (0,9,'0');
-INSERT INTO num_exp_add VALUES (1,0,'0');
-INSERT INTO num_exp_sub VALUES (1,0,'0');
-INSERT INTO num_exp_mul VALUES (1,0,'0');
-INSERT INTO num_exp_div VALUES (1,0,'NaN');
-INSERT INTO num_exp_add VALUES (1,1,'0');
-INSERT INTO num_exp_sub VALUES (1,1,'0');
-INSERT INTO num_exp_mul VALUES (1,1,'0');
-INSERT INTO num_exp_div VALUES (1,1,'NaN');
-INSERT INTO num_exp_add VALUES (1,2,'-34338492.215397047');
-INSERT INTO num_exp_sub VALUES (1,2,'34338492.215397047');
-INSERT INTO num_exp_mul VALUES (1,2,'0');
-INSERT INTO num_exp_div VALUES (1,2,'0');
-INSERT INTO num_exp_add VALUES (1,3,'4.31');
-INSERT INTO num_exp_sub VALUES (1,3,'-4.31');
-INSERT INTO num_exp_mul VALUES (1,3,'0');
-INSERT INTO num_exp_div VALUES (1,3,'0');
-INSERT INTO num_exp_add VALUES (1,4,'7799461.4119');
-INSERT INTO num_exp_sub VALUES (1,4,'-7799461.4119');
-INSERT INTO num_exp_mul VALUES (1,4,'0');
-INSERT INTO num_exp_div VALUES (1,4,'0');
-INSERT INTO num_exp_add VALUES (1,5,'16397.038491');
-INSERT INTO num_exp_sub VALUES (1,5,'-16397.038491');
-INSERT INTO num_exp_mul VALUES (1,5,'0');
-INSERT INTO num_exp_div VALUES (1,5,'0');
-INSERT INTO num_exp_add VALUES (1,6,'93901.57763026');
-INSERT INTO num_exp_sub VALUES (1,6,'-93901.57763026');
-INSERT INTO num_exp_mul VALUES (1,6,'0');
-INSERT INTO num_exp_div VALUES (1,6,'0');
-INSERT INTO num_exp_add VALUES (1,7,'-83028485');
-INSERT INTO num_exp_sub VALUES (1,7,'83028485');
-INSERT INTO num_exp_mul VALUES (1,7,'0');
-INSERT INTO num_exp_div VALUES (1,7,'0');
-INSERT INTO num_exp_add VALUES (1,8,'74881');
-INSERT INTO num_exp_sub VALUES (1,8,'-74881');
-INSERT INTO num_exp_mul VALUES (1,8,'0');
-INSERT INTO num_exp_div VALUES (1,8,'0');
-INSERT INTO num_exp_add VALUES (1,9,'-24926804.045047420');
-INSERT INTO num_exp_sub VALUES (1,9,'24926804.045047420');
-INSERT INTO num_exp_mul VALUES (1,9,'0');
-INSERT INTO num_exp_div VALUES (1,9,'0');
-INSERT INTO num_exp_add VALUES (2,0,'-34338492.215397047');
-INSERT INTO num_exp_sub VALUES (2,0,'-34338492.215397047');
-INSERT INTO num_exp_mul VALUES (2,0,'0');
-INSERT INTO num_exp_div VALUES (2,0,'NaN');
-INSERT INTO num_exp_add VALUES (2,1,'-34338492.215397047');
-INSERT INTO num_exp_sub VALUES (2,1,'-34338492.215397047');
-INSERT INTO num_exp_mul VALUES (2,1,'0');
-INSERT INTO num_exp_div VALUES (2,1,'NaN');
-INSERT INTO num_exp_add VALUES (2,2,'-68676984.430794094');
-INSERT INTO num_exp_sub VALUES (2,2,'0');
-INSERT INTO num_exp_mul VALUES (2,2,'1179132047626883.596862135856320209');
-INSERT INTO num_exp_div VALUES (2,2,'1.00000000000000000000');
-INSERT INTO num_exp_add VALUES (2,3,'-34338487.905397047');
-INSERT INTO num_exp_sub VALUES (2,3,'-34338496.525397047');
-INSERT INTO num_exp_mul VALUES (2,3,'-147998901.44836127257');
-INSERT INTO num_exp_div VALUES (2,3,'-7967167.56737750510440835266');
-INSERT INTO num_exp_add VALUES (2,4,'-26539030.803497047');
-INSERT INTO num_exp_sub VALUES (2,4,'-42137953.627297047');
-INSERT INTO num_exp_mul VALUES (2,4,'-267821744976817.8111137106593');
-INSERT INTO num_exp_div VALUES (2,4,'-4.40267480046830116685');
-INSERT INTO num_exp_add VALUES (2,5,'-34322095.176906047');
-INSERT INTO num_exp_sub VALUES (2,5,'-34354889.253888047');
-INSERT INTO num_exp_mul VALUES (2,5,'-563049578578.769242506736077');
-INSERT INTO num_exp_div VALUES (2,5,'-2094.18866914563535496429');
-INSERT INTO num_exp_add VALUES (2,6,'-34244590.637766787');
-INSERT INTO num_exp_sub VALUES (2,6,'-34432393.793027307');
-INSERT INTO num_exp_mul VALUES (2,6,'-3224438592470.18449811926184222');
-INSERT INTO num_exp_div VALUES (2,6,'-365.68599891479766440940');
-INSERT INTO num_exp_add VALUES (2,7,'-117366977.215397047');
-INSERT INTO num_exp_sub VALUES (2,7,'48689992.784602953');
-INSERT INTO num_exp_mul VALUES (2,7,'2851072985828710.485883795');
-INSERT INTO num_exp_div VALUES (2,7,'.41357483778485235518');
-INSERT INTO num_exp_add VALUES (2,8,'-34263611.215397047');
-INSERT INTO num_exp_sub VALUES (2,8,'-34413373.215397047');
-INSERT INTO num_exp_mul VALUES (2,8,'-2571300635581.146276407');
-INSERT INTO num_exp_div VALUES (2,8,'-458.57416721727870888476');
-INSERT INTO num_exp_add VALUES (2,9,'-59265296.260444467');
-INSERT INTO num_exp_sub VALUES (2,9,'-9411688.170349627');
-INSERT INTO num_exp_mul VALUES (2,9,'855948866655588.453741509242968740');
-INSERT INTO num_exp_div VALUES (2,9,'1.37757299946438931811');
-INSERT INTO num_exp_add VALUES (3,0,'4.31');
-INSERT INTO num_exp_sub VALUES (3,0,'4.31');
-INSERT INTO num_exp_mul VALUES (3,0,'0');
-INSERT INTO num_exp_div VALUES (3,0,'NaN');
-INSERT INTO num_exp_add VALUES (3,1,'4.31');
-INSERT INTO num_exp_sub VALUES (3,1,'4.31');
-INSERT INTO num_exp_mul VALUES (3,1,'0');
-INSERT INTO num_exp_div VALUES (3,1,'NaN');
-INSERT INTO num_exp_add VALUES (3,2,'-34338487.905397047');
-INSERT INTO num_exp_sub VALUES (3,2,'34338496.525397047');
-INSERT INTO num_exp_mul VALUES (3,2,'-147998901.44836127257');
-INSERT INTO num_exp_div VALUES (3,2,'-.00000012551512084352');
-INSERT INTO num_exp_add VALUES (3,3,'8.62');
-INSERT INTO num_exp_sub VALUES (3,3,'0');
-INSERT INTO num_exp_mul VALUES (3,3,'18.5761');
-INSERT INTO num_exp_div VALUES (3,3,'1.00000000000000000000');
-INSERT INTO num_exp_add VALUES (3,4,'7799465.7219');
-INSERT INTO num_exp_sub VALUES (3,4,'-7799457.1019');
-INSERT INTO num_exp_mul VALUES (3,4,'33615678.685289');
-INSERT INTO num_exp_div VALUES (3,4,'.00000055260225961552');
-INSERT INTO num_exp_add VALUES (3,5,'16401.348491');
-INSERT INTO num_exp_sub VALUES (3,5,'-16392.728491');
-INSERT INTO num_exp_mul VALUES (3,5,'70671.23589621');
-INSERT INTO num_exp_div VALUES (3,5,'.00026285234387695504');
-INSERT INTO num_exp_add VALUES (3,6,'93905.88763026');
-INSERT INTO num_exp_sub VALUES (3,6,'-93897.26763026');
-INSERT INTO num_exp_mul VALUES (3,6,'404715.7995864206');
-INSERT INTO num_exp_div VALUES (3,6,'.00004589912234457595');
-INSERT INTO num_exp_add VALUES (3,7,'-83028480.69');
-INSERT INTO num_exp_sub VALUES (3,7,'83028489.31');
-INSERT INTO num_exp_mul VALUES (3,7,'-357852770.35');
-INSERT INTO num_exp_div VALUES (3,7,'-.00000005190989574240');
-INSERT INTO num_exp_add VALUES (3,8,'74885.31');
-INSERT INTO num_exp_sub VALUES (3,8,'-74876.69');
-INSERT INTO num_exp_mul VALUES (3,8,'322737.11');
-INSERT INTO num_exp_div VALUES (3,8,'.00005755799201399553');
-INSERT INTO num_exp_add VALUES (3,9,'-24926799.735047420');
-INSERT INTO num_exp_sub VALUES (3,9,'24926808.355047420');
-INSERT INTO num_exp_mul VALUES (3,9,'-107434525.43415438020');
-INSERT INTO num_exp_div VALUES (3,9,'-.00000017290624149854');
-INSERT INTO num_exp_add VALUES (4,0,'7799461.4119');
-INSERT INTO num_exp_sub VALUES (4,0,'7799461.4119');
-INSERT INTO num_exp_mul VALUES (4,0,'0');
-INSERT INTO num_exp_div VALUES (4,0,'NaN');
-INSERT INTO num_exp_add VALUES (4,1,'7799461.4119');
-INSERT INTO num_exp_sub VALUES (4,1,'7799461.4119');
-INSERT INTO num_exp_mul VALUES (4,1,'0');
-INSERT INTO num_exp_div VALUES (4,1,'NaN');
-INSERT INTO num_exp_add VALUES (4,2,'-26539030.803497047');
-INSERT INTO num_exp_sub VALUES (4,2,'42137953.627297047');
-INSERT INTO num_exp_mul VALUES (4,2,'-267821744976817.8111137106593');
-INSERT INTO num_exp_div VALUES (4,2,'-.22713465002993920385');
-INSERT INTO num_exp_add VALUES (4,3,'7799465.7219');
-INSERT INTO num_exp_sub VALUES (4,3,'7799457.1019');
-INSERT INTO num_exp_mul VALUES (4,3,'33615678.685289');
-INSERT INTO num_exp_div VALUES (4,3,'1809619.81714617169373549883');
-INSERT INTO num_exp_add VALUES (4,4,'15598922.8238');
-INSERT INTO num_exp_sub VALUES (4,4,'0');
-INSERT INTO num_exp_mul VALUES (4,4,'60831598315717.14146161');
-INSERT INTO num_exp_div VALUES (4,4,'1.00000000000000000000');
-INSERT INTO num_exp_add VALUES (4,5,'7815858.450391');
-INSERT INTO num_exp_sub VALUES (4,5,'7783064.373409');
-INSERT INTO num_exp_mul VALUES (4,5,'127888068979.9935054429');
-INSERT INTO num_exp_div VALUES (4,5,'475.66281046305802686061');
-INSERT INTO num_exp_add VALUES (4,6,'7893362.98953026');
-INSERT INTO num_exp_sub VALUES (4,6,'7705559.83426974');
-INSERT INTO num_exp_mul VALUES (4,6,'732381731243.745115764094');
-INSERT INTO num_exp_div VALUES (4,6,'83.05996138436129499606');
-INSERT INTO num_exp_add VALUES (4,7,'-75229023.5881');
-INSERT INTO num_exp_sub VALUES (4,7,'90827946.4119');
-INSERT INTO num_exp_mul VALUES (4,7,'-647577464846017.9715');
-INSERT INTO num_exp_div VALUES (4,7,'-.09393717604145131637');
-INSERT INTO num_exp_add VALUES (4,8,'7874342.4119');
-INSERT INTO num_exp_sub VALUES (4,8,'7724580.4119');
-INSERT INTO num_exp_mul VALUES (4,8,'584031469984.4839');
-INSERT INTO num_exp_div VALUES (4,8,'104.15808298366741897143');
-INSERT INTO num_exp_add VALUES (4,9,'-17127342.633147420');
-INSERT INTO num_exp_sub VALUES (4,9,'32726265.456947420');
-INSERT INTO num_exp_mul VALUES (4,9,'-194415646271340.1815956522980');
-INSERT INTO num_exp_div VALUES (4,9,'-.31289456112403769409');
-INSERT INTO num_exp_add VALUES (5,0,'16397.038491');
-INSERT INTO num_exp_sub VALUES (5,0,'16397.038491');
-INSERT INTO num_exp_mul VALUES (5,0,'0');
-INSERT INTO num_exp_div VALUES (5,0,'NaN');
-INSERT INTO num_exp_add VALUES (5,1,'16397.038491');
-INSERT INTO num_exp_sub VALUES (5,1,'16397.038491');
-INSERT INTO num_exp_mul VALUES (5,1,'0');
-INSERT INTO num_exp_div VALUES (5,1,'NaN');
-INSERT INTO num_exp_add VALUES (5,2,'-34322095.176906047');
-INSERT INTO num_exp_sub VALUES (5,2,'34354889.253888047');
-INSERT INTO num_exp_mul VALUES (5,2,'-563049578578.769242506736077');
-INSERT INTO num_exp_div VALUES (5,2,'-.00047751189505192446');
-INSERT INTO num_exp_add VALUES (5,3,'16401.348491');
-INSERT INTO num_exp_sub VALUES (5,3,'16392.728491');
-INSERT INTO num_exp_mul VALUES (5,3,'70671.23589621');
-INSERT INTO num_exp_div VALUES (5,3,'3804.41728329466357308584');
-INSERT INTO num_exp_add VALUES (5,4,'7815858.450391');
-INSERT INTO num_exp_sub VALUES (5,4,'-7783064.373409');
-INSERT INTO num_exp_mul VALUES (5,4,'127888068979.9935054429');
-INSERT INTO num_exp_div VALUES (5,4,'.00210232958726897192');
-INSERT INTO num_exp_add VALUES (5,5,'32794.076982');
-INSERT INTO num_exp_sub VALUES (5,5,'0');
-INSERT INTO num_exp_mul VALUES (5,5,'268862871.275335557081');
-INSERT INTO num_exp_div VALUES (5,5,'1.00000000000000000000');
-INSERT INTO num_exp_add VALUES (5,6,'110298.61612126');
-INSERT INTO num_exp_sub VALUES (5,6,'-77504.53913926');
-INSERT INTO num_exp_mul VALUES (5,6,'1539707782.76899778633766');
-INSERT INTO num_exp_div VALUES (5,6,'.17461941433576102689');
-INSERT INTO num_exp_add VALUES (5,7,'-83012087.961509');
-INSERT INTO num_exp_sub VALUES (5,7,'83044882.038491');
-INSERT INTO num_exp_mul VALUES (5,7,'-1361421264394.416135');
-INSERT INTO num_exp_div VALUES (5,7,'-.00019748690453643710');
-INSERT INTO num_exp_add VALUES (5,8,'91278.038491');
-INSERT INTO num_exp_sub VALUES (5,8,'-58483.961509');
-INSERT INTO num_exp_mul VALUES (5,8,'1227826639.244571');
-INSERT INTO num_exp_div VALUES (5,8,'.21897461960978085228');
-INSERT INTO num_exp_add VALUES (5,9,'-24910407.006556420');
-INSERT INTO num_exp_sub VALUES (5,9,'24943201.083538420');
-INSERT INTO num_exp_mul VALUES (5,9,'-408725765384.257043660243220');
-INSERT INTO num_exp_div VALUES (5,9,'-.00065780749354660427');
-INSERT INTO num_exp_add VALUES (6,0,'93901.57763026');
-INSERT INTO num_exp_sub VALUES (6,0,'93901.57763026');
-INSERT INTO num_exp_mul VALUES (6,0,'0');
-INSERT INTO num_exp_div VALUES (6,0,'NaN');
-INSERT INTO num_exp_add VALUES (6,1,'93901.57763026');
-INSERT INTO num_exp_sub VALUES (6,1,'93901.57763026');
-INSERT INTO num_exp_mul VALUES (6,1,'0');
-INSERT INTO num_exp_div VALUES (6,1,'NaN');
-INSERT INTO num_exp_add VALUES (6,2,'-34244590.637766787');
-INSERT INTO num_exp_sub VALUES (6,2,'34432393.793027307');
-INSERT INTO num_exp_mul VALUES (6,2,'-3224438592470.18449811926184222');
-INSERT INTO num_exp_div VALUES (6,2,'-.00273458651128995823');
-INSERT INTO num_exp_add VALUES (6,3,'93905.88763026');
-INSERT INTO num_exp_sub VALUES (6,3,'93897.26763026');
-INSERT INTO num_exp_mul VALUES (6,3,'404715.7995864206');
-INSERT INTO num_exp_div VALUES (6,3,'21786.90896293735498839907');
-INSERT INTO num_exp_add VALUES (6,4,'7893362.98953026');
-INSERT INTO num_exp_sub VALUES (6,4,'-7705559.83426974');
-INSERT INTO num_exp_mul VALUES (6,4,'732381731243.745115764094');
-INSERT INTO num_exp_div VALUES (6,4,'.01203949512295682469');
-INSERT INTO num_exp_add VALUES (6,5,'110298.61612126');
-INSERT INTO num_exp_sub VALUES (6,5,'77504.53913926');
-INSERT INTO num_exp_mul VALUES (6,5,'1539707782.76899778633766');
-INSERT INTO num_exp_div VALUES (6,5,'5.72674008674192359679');
-INSERT INTO num_exp_add VALUES (6,6,'187803.15526052');
-INSERT INTO num_exp_sub VALUES (6,6,'0');
-INSERT INTO num_exp_mul VALUES (6,6,'8817506281.4517452372676676');
-INSERT INTO num_exp_div VALUES (6,6,'1.00000000000000000000');
-INSERT INTO num_exp_add VALUES (6,7,'-82934583.42236974');
-INSERT INTO num_exp_sub VALUES (6,7,'83122386.57763026');
-INSERT INTO num_exp_mul VALUES (6,7,'-7796505729750.37795610');
-INSERT INTO num_exp_div VALUES (6,7,'-.00113095617281538980');
-INSERT INTO num_exp_add VALUES (6,8,'168782.57763026');
-INSERT INTO num_exp_sub VALUES (6,8,'19020.57763026');
-INSERT INTO num_exp_mul VALUES (6,8,'7031444034.53149906');
-INSERT INTO num_exp_div VALUES (6,8,'1.25401073209839612184');
-INSERT INTO num_exp_add VALUES (6,9,'-24832902.467417160');
-INSERT INTO num_exp_sub VALUES (6,9,'25020705.622677680');
-INSERT INTO num_exp_mul VALUES (6,9,'-2340666225110.29929521292692920');
-INSERT INTO num_exp_div VALUES (6,9,'-.00376709254265256789');
-INSERT INTO num_exp_add VALUES (7,0,'-83028485');
-INSERT INTO num_exp_sub VALUES (7,0,'-83028485');
-INSERT INTO num_exp_mul VALUES (7,0,'0');
-INSERT INTO num_exp_div VALUES (7,0,'NaN');
-INSERT INTO num_exp_add VALUES (7,1,'-83028485');
-INSERT INTO num_exp_sub VALUES (7,1,'-83028485');
-INSERT INTO num_exp_mul VALUES (7,1,'0');
-INSERT INTO num_exp_div VALUES (7,1,'NaN');
-INSERT INTO num_exp_add VALUES (7,2,'-117366977.215397047');
-INSERT INTO num_exp_sub VALUES (7,2,'-48689992.784602953');
-INSERT INTO num_exp_mul VALUES (7,2,'2851072985828710.485883795');
-INSERT INTO num_exp_div VALUES (7,2,'2.41794207151503385700');
-INSERT INTO num_exp_add VALUES (7,3,'-83028480.69');
-INSERT INTO num_exp_sub VALUES (7,3,'-83028489.31');
-INSERT INTO num_exp_mul VALUES (7,3,'-357852770.35');
-INSERT INTO num_exp_div VALUES (7,3,'-19264149.65197215777262180974');
-INSERT INTO num_exp_add VALUES (7,4,'-75229023.5881');
-INSERT INTO num_exp_sub VALUES (7,4,'-90827946.4119');
-INSERT INTO num_exp_mul VALUES (7,4,'-647577464846017.9715');
-INSERT INTO num_exp_div VALUES (7,4,'-10.64541262725136247686');
-INSERT INTO num_exp_add VALUES (7,5,'-83012087.961509');
-INSERT INTO num_exp_sub VALUES (7,5,'-83044882.038491');
-INSERT INTO num_exp_mul VALUES (7,5,'-1361421264394.416135');
-INSERT INTO num_exp_div VALUES (7,5,'-5063.62688881730941836574');
-INSERT INTO num_exp_add VALUES (7,6,'-82934583.42236974');
-INSERT INTO num_exp_sub VALUES (7,6,'-83122386.57763026');
-INSERT INTO num_exp_mul VALUES (7,6,'-7796505729750.37795610');
-INSERT INTO num_exp_div VALUES (7,6,'-884.20756174009028770294');
-INSERT INTO num_exp_add VALUES (7,7,'-166056970');
-INSERT INTO num_exp_sub VALUES (7,7,'0');
-INSERT INTO num_exp_mul VALUES (7,7,'6893729321395225');
-INSERT INTO num_exp_div VALUES (7,7,'1.00000000000000000000');
-INSERT INTO num_exp_add VALUES (7,8,'-82953604');
-INSERT INTO num_exp_sub VALUES (7,8,'-83103366');
-INSERT INTO num_exp_mul VALUES (7,8,'-6217255985285');
-INSERT INTO num_exp_div VALUES (7,8,'-1108.80577182462841041118');
-INSERT INTO num_exp_add VALUES (7,9,'-107955289.045047420');
-INSERT INTO num_exp_sub VALUES (7,9,'-58101680.954952580');
-INSERT INTO num_exp_mul VALUES (7,9,'2069634775752159.035758700');
-INSERT INTO num_exp_div VALUES (7,9,'3.33089171198810413382');
-INSERT INTO num_exp_add VALUES (8,0,'74881');
-INSERT INTO num_exp_sub VALUES (8,0,'74881');
-INSERT INTO num_exp_mul VALUES (8,0,'0');
-INSERT INTO num_exp_div VALUES (8,0,'NaN');
-INSERT INTO num_exp_add VALUES (8,1,'74881');
-INSERT INTO num_exp_sub VALUES (8,1,'74881');
-INSERT INTO num_exp_mul VALUES (8,1,'0');
-INSERT INTO num_exp_div VALUES (8,1,'NaN');
-INSERT INTO num_exp_add VALUES (8,2,'-34263611.215397047');
-INSERT INTO num_exp_sub VALUES (8,2,'34413373.215397047');
-INSERT INTO num_exp_mul VALUES (8,2,'-2571300635581.146276407');
-INSERT INTO num_exp_div VALUES (8,2,'-.00218067233500788615');
-INSERT INTO num_exp_add VALUES (8,3,'74885.31');
-INSERT INTO num_exp_sub VALUES (8,3,'74876.69');
-INSERT INTO num_exp_mul VALUES (8,3,'322737.11');
-INSERT INTO num_exp_div VALUES (8,3,'17373.78190255220417633410');
-INSERT INTO num_exp_add VALUES (8,4,'7874342.4119');
-INSERT INTO num_exp_sub VALUES (8,4,'-7724580.4119');
-INSERT INTO num_exp_mul VALUES (8,4,'584031469984.4839');
-INSERT INTO num_exp_div VALUES (8,4,'.00960079113741758956');
-INSERT INTO num_exp_add VALUES (8,5,'91278.038491');
-INSERT INTO num_exp_sub VALUES (8,5,'58483.961509');
-INSERT INTO num_exp_mul VALUES (8,5,'1227826639.244571');
-INSERT INTO num_exp_div VALUES (8,5,'4.56673929509287019456');
-INSERT INTO num_exp_add VALUES (8,6,'168782.57763026');
-INSERT INTO num_exp_sub VALUES (8,6,'-19020.57763026');
-INSERT INTO num_exp_mul VALUES (8,6,'7031444034.53149906');
-INSERT INTO num_exp_div VALUES (8,6,'.79744134113322314424');
-INSERT INTO num_exp_add VALUES (8,7,'-82953604');
-INSERT INTO num_exp_sub VALUES (8,7,'83103366');
-INSERT INTO num_exp_mul VALUES (8,7,'-6217255985285');
-INSERT INTO num_exp_div VALUES (8,7,'-.00090187120721280172');
-INSERT INTO num_exp_add VALUES (8,8,'149762');
-INSERT INTO num_exp_sub VALUES (8,8,'0');
-INSERT INTO num_exp_mul VALUES (8,8,'5607164161');
-INSERT INTO num_exp_div VALUES (8,8,'1.00000000000000000000');
-INSERT INTO num_exp_add VALUES (8,9,'-24851923.045047420');
-INSERT INTO num_exp_sub VALUES (8,9,'25001685.045047420');
-INSERT INTO num_exp_mul VALUES (8,9,'-1866544013697.195857020');
-INSERT INTO num_exp_div VALUES (8,9,'-.00300403532938582735');
-INSERT INTO num_exp_add VALUES (9,0,'-24926804.045047420');
-INSERT INTO num_exp_sub VALUES (9,0,'-24926804.045047420');
-INSERT INTO num_exp_mul VALUES (9,0,'0');
-INSERT INTO num_exp_div VALUES (9,0,'NaN');
-INSERT INTO num_exp_add VALUES (9,1,'-24926804.045047420');
-INSERT INTO num_exp_sub VALUES (9,1,'-24926804.045047420');
-INSERT INTO num_exp_mul VALUES (9,1,'0');
-INSERT INTO num_exp_div VALUES (9,1,'NaN');
-INSERT INTO num_exp_add VALUES (9,2,'-59265296.260444467');
-INSERT INTO num_exp_sub VALUES (9,2,'9411688.170349627');
-INSERT INTO num_exp_mul VALUES (9,2,'855948866655588.453741509242968740');
-INSERT INTO num_exp_div VALUES (9,2,'.72591434384152961526');
-INSERT INTO num_exp_add VALUES (9,3,'-24926799.735047420');
-INSERT INTO num_exp_sub VALUES (9,3,'-24926808.355047420');
-INSERT INTO num_exp_mul VALUES (9,3,'-107434525.43415438020');
-INSERT INTO num_exp_div VALUES (9,3,'-5783481.21694835730858468677');
-INSERT INTO num_exp_add VALUES (9,4,'-17127342.633147420');
-INSERT INTO num_exp_sub VALUES (9,4,'-32726265.456947420');
-INSERT INTO num_exp_mul VALUES (9,4,'-194415646271340.1815956522980');
-INSERT INTO num_exp_div VALUES (9,4,'-3.19596478892958416484');
-INSERT INTO num_exp_add VALUES (9,5,'-24910407.006556420');
-INSERT INTO num_exp_sub VALUES (9,5,'-24943201.083538420');
-INSERT INTO num_exp_mul VALUES (9,5,'-408725765384.257043660243220');
-INSERT INTO num_exp_div VALUES (9,5,'-1520.20159364322004505807');
-INSERT INTO num_exp_add VALUES (9,6,'-24832902.467417160');
-INSERT INTO num_exp_sub VALUES (9,6,'-25020705.622677680');
-INSERT INTO num_exp_mul VALUES (9,6,'-2340666225110.29929521292692920');
-INSERT INTO num_exp_div VALUES (9,6,'-265.45671195426965751280');
-INSERT INTO num_exp_add VALUES (9,7,'-107955289.045047420');
-INSERT INTO num_exp_sub VALUES (9,7,'58101680.954952580');
-INSERT INTO num_exp_mul VALUES (9,7,'2069634775752159.035758700');
-INSERT INTO num_exp_div VALUES (9,7,'.30021990699995814689');
-INSERT INTO num_exp_add VALUES (9,8,'-24851923.045047420');
-INSERT INTO num_exp_sub VALUES (9,8,'-25001685.045047420');
-INSERT INTO num_exp_mul VALUES (9,8,'-1866544013697.195857020');
-INSERT INTO num_exp_div VALUES (9,8,'-332.88556569820675471748');
-INSERT INTO num_exp_add VALUES (9,9,'-49853608.090094840');
-INSERT INTO num_exp_sub VALUES (9,9,'0');
-INSERT INTO num_exp_mul VALUES (9,9,'621345559900192.420120630048656400');
-INSERT INTO num_exp_div VALUES (9,9,'1.00000000000000000000');
--- COMMIT TRANSACTION;
--- BEGIN TRANSACTION;
-INSERT INTO num_exp_sqrt VALUES (0,'0');
-INSERT INTO num_exp_sqrt VALUES (1,'0');
-INSERT INTO num_exp_sqrt VALUES (2,'5859.90547836712524903505');
-INSERT INTO num_exp_sqrt VALUES (3,'2.07605394920266944396');
-INSERT INTO num_exp_sqrt VALUES (4,'2792.75158435189147418923');
-INSERT INTO num_exp_sqrt VALUES (5,'128.05092147657509145473');
-INSERT INTO num_exp_sqrt VALUES (6,'306.43364311096782703406');
-INSERT INTO num_exp_sqrt VALUES (7,'9111.99676251039939975230');
-INSERT INTO num_exp_sqrt VALUES (8,'273.64392922189960397542');
-INSERT INTO num_exp_sqrt VALUES (9,'4992.67503899937593364766');
--- COMMIT TRANSACTION;
--- BEGIN TRANSACTION;
-INSERT INTO num_exp_ln VALUES (0,'NaN');
-INSERT INTO num_exp_ln VALUES (1,'NaN');
-INSERT INTO num_exp_ln VALUES (2,'17.35177750493897715514');
-INSERT INTO num_exp_ln VALUES (3,'1.46093790411565641971');
-INSERT INTO num_exp_ln VALUES (4,'15.86956523951936572464');
-INSERT INTO num_exp_ln VALUES (5,'9.70485601768871834038');
-INSERT INTO num_exp_ln VALUES (6,'11.45000246622944403127');
-INSERT INTO num_exp_ln VALUES (7,'18.23469429965478772991');
-INSERT INTO num_exp_ln VALUES (8,'11.22365546576315513668');
-INSERT INTO num_exp_ln VALUES (9,'17.03145425013166006962');
--- COMMIT TRANSACTION;
--- BEGIN TRANSACTION;
-INSERT INTO num_exp_log10 VALUES (0,'NaN');
-INSERT INTO num_exp_log10 VALUES (1,'NaN');
-INSERT INTO num_exp_log10 VALUES (2,'7.53578122160797276459');
-INSERT INTO num_exp_log10 VALUES (3,'.63447727016073160075');
-INSERT INTO num_exp_log10 VALUES (4,'6.89206461372691743345');
-INSERT INTO num_exp_log10 VALUES (5,'4.21476541614777768626');
-INSERT INTO num_exp_log10 VALUES (6,'4.97267288886207207671');
-INSERT INTO num_exp_log10 VALUES (7,'7.91922711353275546914');
-INSERT INTO num_exp_log10 VALUES (8,'4.87437163556421004138');
-INSERT INTO num_exp_log10 VALUES (9,'7.39666659961986567059');
--- COMMIT TRANSACTION;
--- BEGIN TRANSACTION;
-INSERT INTO num_exp_power_10_ln VALUES (0,'NaN');
-INSERT INTO num_exp_power_10_ln VALUES (1,'NaN');
-INSERT INTO num_exp_power_10_ln VALUES (2,'224790267919917955.13261618583642653184');
-INSERT INTO num_exp_power_10_ln VALUES (3,'28.90266599445155957393');
-INSERT INTO num_exp_power_10_ln VALUES (4,'7405685069594999.07733999469386277636');
-INSERT INTO num_exp_power_10_ln VALUES (5,'5068226527.32127265408584640098');
-INSERT INTO num_exp_power_10_ln VALUES (6,'281839893606.99372343357047819067');
-INSERT INTO num_exp_power_10_ln VALUES (7,'1716699575118597095.42330819910640247627');
-INSERT INTO num_exp_power_10_ln VALUES (8,'167361463828.07491320069016125952');
-INSERT INTO num_exp_power_10_ln VALUES (9,'107511333880052007.04141124673540337457');
--- COMMIT TRANSACTION;
--- BEGIN TRANSACTION;
-INSERT INTO num_data VALUES (0, '0');
-INSERT INTO num_data VALUES (1, '0');
-INSERT INTO num_data VALUES (2, '-34338492.215397047');
-INSERT INTO num_data VALUES (3, '4.31');
-INSERT INTO num_data VALUES (4, '7799461.4119');
-INSERT INTO num_data VALUES (5, '16397.038491');
-INSERT INTO num_data VALUES (6, '93901.57763026');
-INSERT INTO num_data VALUES (7, '-83028485');
-INSERT INTO num_data VALUES (8, '74881');
-INSERT INTO num_data VALUES (9, '-24926804.045047420');
--- COMMIT TRANSACTION;
-
-SELECT * FROM num_data;
-
--- ******************************
--- * Create indices for faster checks
--- ******************************
-
--- CREATE UNIQUE INDEX num_exp_add_idx ON num_exp_add (id1, id2);
--- CREATE UNIQUE INDEX num_exp_sub_idx ON num_exp_sub (id1, id2);
--- CREATE UNIQUE INDEX num_exp_div_idx ON num_exp_div (id1, id2);
--- CREATE UNIQUE INDEX num_exp_mul_idx ON num_exp_mul (id1, id2);
--- CREATE UNIQUE INDEX num_exp_sqrt_idx ON num_exp_sqrt (id);
--- CREATE UNIQUE INDEX num_exp_ln_idx ON num_exp_ln (id);
--- CREATE UNIQUE INDEX num_exp_log10_idx ON num_exp_log10 (id);
--- CREATE UNIQUE INDEX num_exp_power_10_ln_idx ON num_exp_power_10_ln (id);
-
--- VACUUM ANALYZE num_exp_add;
--- VACUUM ANALYZE num_exp_sub;
--- VACUUM ANALYZE num_exp_div;
--- VACUUM ANALYZE num_exp_mul;
--- VACUUM ANALYZE num_exp_sqrt;
--- VACUUM ANALYZE num_exp_ln;
--- VACUUM ANALYZE num_exp_log10;
--- VACUUM ANALYZE num_exp_power_10_ln;
-
--- ******************************
--- * Now check the behaviour of the NUMERIC type
--- ******************************
-
--- ******************************
--- * Addition check
--- ******************************
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT t1.id, t2.id, t1.val + t2.val
-    FROM num_data t1, num_data t2;
-SELECT t1.id1, t1.id2, t1.result, t2.expected
-    FROM num_result t1, num_exp_add t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != t2.expected;
-
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val + t2.val, 10)
-    FROM num_data t1, num_data t2;
-SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 10) as expected
-    FROM num_result t1, num_exp_add t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != round(t2.expected, 10);
-
--- ******************************
--- * Subtraction check
--- ******************************
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT t1.id, t2.id, t1.val - t2.val
-    FROM num_data t1, num_data t2;
-SELECT t1.id1, t1.id2, t1.result, t2.expected
-    FROM num_result t1, num_exp_sub t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != t2.expected;
-
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val - t2.val, 40)
-    FROM num_data t1, num_data t2;
-SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 40)
-    FROM num_result t1, num_exp_sub t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != round(t2.expected, 40);
-
--- ******************************
--- * Multiply check
--- ******************************
--- [SPARK-28316] Decimal precision issue
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT t1.id, t2.id, t1.val, t2.val, t1.val * t2.val
-    FROM num_data t1, num_data t2;
-SELECT t1.id1, t1.id2, t1.result, t2.expected
-    FROM num_result t1, num_exp_mul t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != t2.expected;
-
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val * t2.val, 30)
-    FROM num_data t1, num_data t2;
-SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 30) as expected
-    FROM num_result t1, num_exp_mul t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != round(t2.expected, 30);
-
--- ******************************
--- * Division check
--- ******************************
--- [SPARK-28316] Decimal precision issue
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT t1.id, t2.id, t1.val / t2.val
-    FROM num_data t1, num_data t2
-    WHERE t2.val != '0.0';
-SELECT t1.id1, t1.id2, t1.result, t2.expected
-    FROM num_result t1, num_exp_div t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != t2.expected;
-
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val / t2.val, 80)
-    FROM num_data t1, num_data t2
-    WHERE t2.val != '0.0';
-SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 80) as expected
-    FROM num_result t1, num_exp_div t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != round(t2.expected, 80);
-
--- ******************************
--- * Square root check
--- ******************************
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT id, 0, SQRT(ABS(val))
-    FROM num_data;
-SELECT t1.id1, t1.result, t2.expected
-    FROM num_result t1, num_exp_sqrt t2
-    WHERE t1.id1 = t2.id
-    AND t1.result != t2.expected;
-
--- ******************************
--- * Natural logarithm check
--- ******************************
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT id, 0, LN(ABS(val))
-    FROM num_data
-    WHERE val != '0.0';
-SELECT t1.id1, t1.result, t2.expected
-    FROM num_result t1, num_exp_ln t2
-    WHERE t1.id1 = t2.id
-    AND t1.result != t2.expected;
-
--- ******************************
--- * Logarithm base 10 check
--- ******************************
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT id, 0, LOG(cast('10' as decimal(38, 18)), ABS(val))
-    FROM num_data
-    WHERE val != '0.0';
-SELECT t1.id1, t1.result, t2.expected
-    FROM num_result t1, num_exp_log10 t2
-    WHERE t1.id1 = t2.id
-    AND t1.result != t2.expected;
-
--- ******************************
--- * POWER(10, LN(value)) check
--- ******************************
--- [SPARK-28316] Decimal precision issue
-TRUNCATE TABLE num_result;
-INSERT INTO num_result SELECT id, 0, POWER(cast('10' as decimal(38, 18)), LN(ABS(round(val,200))))
-    FROM num_data
-    WHERE val != '0.0';
-SELECT t1.id1, t1.result, t2.expected
-    FROM num_result t1, num_exp_power_10_ln t2
-    WHERE t1.id1 = t2.id
-    AND t1.result != t2.expected;
-
--- ******************************
--- * miscellaneous checks for things that have been broken in the past...
--- ******************************
--- numeric AVG used to fail on some platforms
-SELECT AVG(val) FROM num_data;
--- [SPARK-28316] STDDEV and VARIANCE returns double type
--- Skip it because: Expected "2.779120328758835[]E7", but got "2.779120328758835[4]E7"
--- SELECT STDDEV(val) FROM num_data;
--- Skip it because: Expected "7.72350980172061[8]E14", but got "7.72350980172061[6]E14"
--- SELECT VARIANCE(val) FROM num_data;
-
--- Check for appropriate rounding and overflow
-CREATE TABLE fract_only (id int, val decimal(4,4)) USING parquet;
-INSERT INTO fract_only VALUES (1, '0.0');
-INSERT INTO fract_only VALUES (2, '0.1');
--- [SPARK-27923] PostgreSQL throws an exception but Spark SQL is NULL
--- INSERT INTO fract_only VALUES (3, '1.0');	-- should fail
-INSERT INTO fract_only VALUES (4, '-0.9999');
-INSERT INTO fract_only VALUES (5, '0.99994');
--- [SPARK-27923] PostgreSQL throws an exception but Spark SQL is NULL
--- INSERT INTO fract_only VALUES (6, '0.99995');  -- should fail
-INSERT INTO fract_only VALUES (7, '0.00001');
-INSERT INTO fract_only VALUES (8, '0.00017');
-SELECT * FROM fract_only;
-DROP TABLE fract_only;
-
--- [SPARK-28315] Decimal can not accept NaN as input
--- [SPARK-27923] Decimal type can not accept Infinity and -Infinity
--- Check inf/nan conversion behavior
-SELECT decimal(double('NaN'));
-SELECT decimal(double('Infinity'));
-SELECT decimal(double('-Infinity'));
-SELECT decimal(float('NaN'));
-SELECT decimal(float('Infinity'));
-SELECT decimal(float('-Infinity'));
-
--- Simple check that ceil(), floor(), and round() work correctly
-CREATE TABLE ceil_floor_round (a decimal(38, 18)) USING parquet;
-INSERT INTO ceil_floor_round VALUES ('-5.5');
-INSERT INTO ceil_floor_round VALUES ('-5.499999');
-INSERT INTO ceil_floor_round VALUES ('9.5');
-INSERT INTO ceil_floor_round VALUES ('9.4999999');
-INSERT INTO ceil_floor_round VALUES ('0.0');
-INSERT INTO ceil_floor_round VALUES ('0.0000001');
-INSERT INTO ceil_floor_round VALUES ('-0.000001');
-SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round;
-DROP TABLE ceil_floor_round;
-
--- [SPARK-28007] Caret operator (^) means bitwise XOR in Spark and exponentiation in Postgres
--- Check rounding, it should round ties away from zero.
--- SELECT i as pow,
--- 	round((-2.5 * 10 ^ i)::numeric, -i),
--- 	round((-1.5 * 10 ^ i)::numeric, -i),
--- 	round((-0.5 * 10 ^ i)::numeric, -i),
--- 	round((0.5 * 10 ^ i)::numeric, -i),
--- 	round((1.5 * 10 ^ i)::numeric, -i),
--- 	round((2.5 * 10 ^ i)::numeric, -i)
--- FROM generate_series(-5,5) AS t(i);
-
--- [SPARK-21117] Built-in SQL Function Support - WIDTH_BUCKET
--- Testing for width_bucket(). For convenience, we test both the
--- numeric and float8 versions of the function in this file.
-
--- errors
--- SELECT width_bucket(5.0, 3.0, 4.0, 0);
--- SELECT width_bucket(5.0, 3.0, 4.0, -5);
--- SELECT width_bucket(3.5, 3.0, 3.0, 888);
--- SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0);
--- SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5);
--- SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888);
--- SELECT width_bucket('NaN', 3.0, 4.0, 888);
--- SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888);
-
--- normal operation
--- CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8);
-
--- COPY width_bucket_test (operand_num) FROM stdin;
--- -5.2
--- -0.0000000001
--- 0.000000000001
--- 1
--- 1.99999999999999
--- 2
--- 2.00000000000001
--- 3
--- 4
--- 4.5
--- 5
--- 5.5
--- 6
--- 7
--- 8
--- 9
--- 9.99999999999999
--- 10
--- 10.0000000000001
--- \.
-
--- UPDATE width_bucket_test SET operand_f8 = operand_num::float8;
-
--- SELECT
---     operand_num,
---     width_bucket(operand_num, 0, 10, 5) AS wb_1,
---     width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
---     width_bucket(operand_num, 10, 0, 5) AS wb_2,
---     width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
---     width_bucket(operand_num, 2, 8, 4) AS wb_3,
---     width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
---     width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
---     width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
---     width_bucket(operand_num, -25, 25, 10) AS wb_5,
---     width_bucket(operand_f8, -25, 25, 10) AS wb_5f
---     FROM width_bucket_test;
-
--- for float8 only, check positive and negative infinity: we require
--- finite bucket bounds, but allow an infinite operand
--- SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error
--- SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error
--- SELECT width_bucket('Infinity'::float8, 1, 10, 10),
---        width_bucket('-Infinity'::float8, 1, 10, 10);
-
--- DROP TABLE width_bucket_test;
-
--- [SPARK-28137] Missing Data Type Formatting Functions: TO_CHAR
--- TO_CHAR()
---
--- SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999')
--- 	FROM num_data;
-
--- SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999')
--- 	FROM num_data;
-
--- SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR')
--- 	FROM num_data;
-
--- SELECT '' AS to_char_4, to_char(val, '9999999999999999.999999999999999S')
--- 	FROM num_data;
-
--- SELECT '' AS to_char_5,  to_char(val, 'MI9999999999999999.999999999999999')     FROM num_data;
--- SELECT '' AS to_char_6,  to_char(val, 'FMS9999999999999999.999999999999999')    FROM num_data;
--- SELECT '' AS to_char_7,  to_char(val, 'FM9999999999999999.999999999999999THPR') FROM num_data;
--- SELECT '' AS to_char_8,  to_char(val, 'SG9999999999999999.999999999999999th')   FROM num_data;
--- SELECT '' AS to_char_9,  to_char(val, '0999999999999999.999999999999999')       FROM num_data;
--- SELECT '' AS to_char_10, to_char(val, 'S0999999999999999.999999999999999')      FROM num_data;
--- SELECT '' AS to_char_11, to_char(val, 'FM0999999999999999.999999999999999')     FROM num_data;
--- SELECT '' AS to_char_12, to_char(val, 'FM9999999999999999.099999999999999') 	FROM num_data;
--- SELECT '' AS to_char_13, to_char(val, 'FM9999999999990999.990999999999999') 	FROM num_data;
--- SELECT '' AS to_char_14, to_char(val, 'FM0999999999999999.999909999999999') 	FROM num_data;
--- SELECT '' AS to_char_15, to_char(val, 'FM9999999990999999.099999999999999') 	FROM num_data;
--- SELECT '' AS to_char_16, to_char(val, 'L9999999999999999.099999999999999')	FROM num_data;
--- SELECT '' AS to_char_17, to_char(val, 'FM9999999999999999.99999999999999')	FROM num_data;
--- SELECT '' AS to_char_18, to_char(val, 'S 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9') FROM num_data;
--- SELECT '' AS to_char_19, to_char(val, 'FMS 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9') FROM num_data;
--- SELECT '' AS to_char_20, to_char(val, E'99999 "text" 9999 "9999" 999 "\\"text between quote marks\\"" 9999') FROM num_data;
--- SELECT '' AS to_char_21, to_char(val, '999999SG9999999999')			FROM num_data;
--- SELECT '' AS to_char_22, to_char(val, 'FM9999999999999999.999999999999999')	FROM num_data;
--- SELECT '' AS to_char_23, to_char(val, '9.999EEEE')				FROM num_data;
-
--- SELECT '' AS to_char_24, to_char('100'::numeric, 'FM999.9');
--- SELECT '' AS to_char_25, to_char('100'::numeric, 'FM999.');
--- SELECT '' AS to_char_26, to_char('100'::numeric, 'FM999');
-
--- Check parsing of literal text in a format string
--- SELECT '' AS to_char_27, to_char('100'::numeric, 'foo999');
--- SELECT '' AS to_char_28, to_char('100'::numeric, 'f\oo999');
--- SELECT '' AS to_char_29, to_char('100'::numeric, 'f\\oo999');
--- SELECT '' AS to_char_30, to_char('100'::numeric, 'f\"oo999');
--- SELECT '' AS to_char_31, to_char('100'::numeric, 'f\\"oo999');
--- SELECT '' AS to_char_32, to_char('100'::numeric, 'f"ool"999');
--- SELECT '' AS to_char_33, to_char('100'::numeric, 'f"\ool"999');
--- SELECT '' AS to_char_34, to_char('100'::numeric, 'f"\\ool"999');
--- SELECT '' AS to_char_35, to_char('100'::numeric, 'f"ool\"999');
--- SELECT '' AS to_char_36, to_char('100'::numeric, 'f"ool\\"999');
-
--- [SPARK-28137] Missing Data Type Formatting Functions: TO_NUMBER
--- TO_NUMBER()
---
--- SET lc_numeric = 'C';
--- SELECT '' AS to_number_1,  to_number('-34,338,492', '99G999G999');
--- SELECT '' AS to_number_2,  to_number('-34,338,492.654,878', '99G999G999D999G999');
--- SELECT '' AS to_number_3,  to_number('<564646.654564>', '999999.999999PR');
--- SELECT '' AS to_number_4,  to_number('0.00001-', '9.999999S');
--- SELECT '' AS to_number_5,  to_number('5.01-', 'FM9.999999S');
--- SELECT '' AS to_number_5,  to_number('5.01-', 'FM9.999999MI');
--- SELECT '' AS to_number_7,  to_number('5 4 4 4 4 8 . 7 8', '9 9 9 9 9 9 . 9 9');
--- SELECT '' AS to_number_8,  to_number('.01', 'FM9.99');
--- SELECT '' AS to_number_9,  to_number('.0', '99999999.99999999');
--- SELECT '' AS to_number_10, to_number('0', '99.99');
--- SELECT '' AS to_number_11, to_number('.-01', 'S99.99');
--- SELECT '' AS to_number_12, to_number('.01-', '99.99S');
--- SELECT '' AS to_number_13, to_number(' . 0 1-', ' 9 9 . 9 9 S');
--- SELECT '' AS to_number_14, to_number('34,50','999,99');
--- SELECT '' AS to_number_15, to_number('123,000','999G');
--- SELECT '' AS to_number_16, to_number('123456','999G999');
--- SELECT '' AS to_number_17, to_number('$1234.56','L9,999.99');
--- SELECT '' AS to_number_18, to_number('$1234.56','L99,999.99');
--- SELECT '' AS to_number_19, to_number('$1,234.56','L99,999.99');
--- SELECT '' AS to_number_20, to_number('1234.56','L99,999.99');
--- SELECT '' AS to_number_21, to_number('1,234.56','L99,999.99');
--- SELECT '' AS to_number_22, to_number('42nd', '99th');
--- RESET lc_numeric;
-
---
--- Input syntax
---
-
-CREATE TABLE num_input_test (n1 decimal(38, 18)) USING parquet;
-
--- good inputs
-INSERT INTO num_input_test VALUES (trim(' 123'));
-INSERT INTO num_input_test VALUES (trim('   3245874    '));
-INSERT INTO num_input_test VALUES (trim('  -93853'));
-INSERT INTO num_input_test VALUES ('555.50');
-INSERT INTO num_input_test VALUES ('-555.50');
--- [SPARK-28315] Decimal can not accept NaN as input
--- INSERT INTO num_input_test VALUES (trim('NaN '));
--- INSERT INTO num_input_test VALUES (trim('        nan'));
-
--- [SPARK-27923] Spark SQL accept bad inputs to NULL
--- bad inputs
--- INSERT INTO num_input_test VALUES ('     ');
--- INSERT INTO num_input_test VALUES ('   1234   %');
--- INSERT INTO num_input_test VALUES ('xyz');
--- INSERT INTO num_input_test VALUES ('- 1234');
--- INSERT INTO num_input_test VALUES ('5 . 0');
--- INSERT INTO num_input_test VALUES ('5. 0   ');
--- INSERT INTO num_input_test VALUES ('');
--- INSERT INTO num_input_test VALUES (' N aN ');
-
-SELECT * FROM num_input_test;
-
--- [SPARK-28318] Decimal can only support precision up to 38
---
--- Test some corner cases for multiplication
---
-
--- select 4790999999999999999999999999999999999999999999999999999999999999999999999999999999999999 * 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999;
-
--- select 4789999999999999999999999999999999999999999999999999999999999999999999999999999999999999 * 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999;
-
--- select 4770999999999999999999999999999999999999999999999999999999999999999999999999999999999999 * 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999;
-
--- select 4769999999999999999999999999999999999999999999999999999999999999999999999999999999999999 * 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999;
-
---
--- Test some corner cases for division
---
--- 999999999999999999999 is overflow for SYSTEM_DEFAULT(decimal(38, 18)), we use BigIntDecimal(decimal(38, 0)).
-select cast(999999999999999999999 as decimal(38, 0))/1000000000000000000000;
-
-select div(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000);
-select mod(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000);
-select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000);
-select mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000);
-select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)*1000000000000000000000 + mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000);
-select mod (70.0,70) ;
-select div (70.0,70) ;
-select 70.0 / 70 ;
-select 12345678901234567890 % 123;
--- [SPARK-2659] HiveQL: Division operator should always perform fractional division
--- select 12345678901234567890 DIV 123;
--- select div(12345678901234567890, 123);
--- select div(12345678901234567890, 123) * 123 + 12345678901234567890 % 123;
-
--- [SPARK-28007] Caret operator (^) means bitwise XOR in Spark and exponentiation in Postgres
---
--- Test code path for raising to integer powers
---
-
--- select 10.0 ^ -2147483648 as rounds_to_zero;
--- select 10.0 ^ -2147483647 as rounds_to_zero;
--- select 10.0 ^ 2147483647 as overflows;
--- select 117743296169.0 ^ 1000000000 as overflows;
-
--- cases that used to return inaccurate results
--- select 3.789 ^ 21;
--- select 3.789 ^ 35;
--- select 1.2 ^ 345;
--- select 0.12 ^ (-20);
-
--- cases that used to error out
--- select 0.12 ^ (-25);
--- select 0.5678 ^ (-85);
-
---
--- Tests for raising to non-integer powers
---
-
--- special cases
--- select 0.0 ^ 0.0;
--- select (-12.34) ^ 0.0;
--- select 12.34 ^ 0.0;
--- select 0.0 ^ 12.34;
-
--- NaNs
--- select 'NaN'::numeric ^ 'NaN'::numeric;
--- select 'NaN'::numeric ^ 0;
--- select 'NaN'::numeric ^ 1;
--- select 0 ^ 'NaN'::numeric;
--- select 1 ^ 'NaN'::numeric;
-
--- invalid inputs
--- select 0.0 ^ (-12.34);
--- select (-12.34) ^ 1.2;
-
--- cases that used to generate inaccurate results
--- select 32.1 ^ 9.8;
--- select 32.1 ^ (-9.8);
--- select 12.3 ^ 45.6;
--- select 12.3 ^ (-45.6);
-
--- big test
--- select 1.234 ^ 5678;
-
---
--- Tests for EXP()
---
-
--- special cases
-select exp(0.0);
-select exp(1.0);
--- [SPARK-28316] EXP returns double type for decimal input
--- [SPARK-28318] Decimal can only support precision up to 38
--- select exp(1.0::numeric(71,70));
-
--- cases that used to generate inaccurate results
-select exp(32.999);
-select exp(-32.999);
-select exp(123.456);
-select exp(-123.456);
-
--- big test
-select exp(1234.5678);
-
---
--- Tests for generate_series
---
-select * from range(cast(0.0 as decimal(38, 18)), cast(4.0 as decimal(38, 18)));
-select * from range(cast(0.1 as decimal(38, 18)), cast(4.0 as decimal(38, 18)), cast(1.3 as decimal(38, 18)));
-select * from range(cast(4.0 as decimal(38, 18)), cast(-1.5 as decimal(38, 18)), cast(-2.2 as decimal(38, 18)));
--- Trigger errors
--- select * from generate_series(-100::numeric, 100::numeric, 0::numeric);
--- select * from generate_series(-100::numeric, 100::numeric, 'nan'::numeric);
--- select * from generate_series('nan'::numeric, 100::numeric, 10::numeric);
--- select * from generate_series(0::numeric, 'nan'::numeric, 10::numeric);
--- [SPARK-28007] Caret operator (^) means bitwise XOR in Spark and exponentiation in Postgres
--- Checks maximum, output is truncated
--- select (i / (10::numeric ^ 131071))::numeric(1,0)
--- 	from generate_series(6 * (10::numeric ^ 131071),
--- 			     9 * (10::numeric ^ 131071),
--- 			     10::numeric ^ 131071) as a(i);
--- Check usage with variables
--- select * from generate_series(1::numeric, 3::numeric) i, generate_series(i,3) j;
--- select * from generate_series(1::numeric, 3::numeric) i, generate_series(1,i) j;
--- select * from generate_series(1::numeric, 3::numeric) i, generate_series(1,5,i) j;
-
---
--- Tests for LN()
---
-
--- [SPARK-27923] Invalid inputs for LN throws exception at PostgreSQL
--- Invalid inputs
--- select ln(-12.34);
--- select ln(0.0);
-
--- Some random tests
-select ln(1.2345678e-28);
-select ln(0.0456789);
--- [SPARK-28318] Decimal can only support precision up to 38
--- select ln(0.349873948359354029493948309745709580730482050975);
-select ln(0.99949452);
-select ln(1.00049687395);
-select ln(1234.567890123456789);
-select ln(5.80397490724e5);
-select ln(9.342536355e34);
-
---
--- Tests for LOG() (base 10)
---
-
--- [SPARK-27923] Invalid inputs for LOG throws exception at PostgreSQL
--- invalid inputs
--- select log(-12.34);
--- select log(0.0);
-
--- some random tests
--- [SPARK-28318] Decimal can only support precision up to 38
--- select log(1.234567e-89);
--- [SPARK-28324] The LOG function using 10 as the base, but Spark using E
-select log(3.4634998359873254962349856073435545);
-select log(9.999999999999999999);
-select log(10.00000000000000000);
-select log(10.00000000000000001);
-select log(590489.45235237);
-
---
--- Tests for LOG() (arbitrary base)
---
-
--- [SPARK-27923] Invalid inputs for LOG throws exception at PostgreSQL
--- invalid inputs
--- select log(-12.34, 56.78);
--- select log(-12.34, -56.78);
--- select log(12.34, -56.78);
--- select log(0.0, 12.34);
--- select log(12.34, 0.0);
--- select log(1.0, 12.34);
-
--- some random tests
--- [SPARK-28318] Decimal can only support precision up to 38
--- select log(1.23e-89, 6.4689e45);
-select log(0.99923, 4.58934e34);
-select log(1.000016, 8.452010e18);
--- [SPARK-28318] Decimal can only support precision up to 38
--- select log(3.1954752e47, 9.4792021e-73);
-
--- [SPARK-28317] Built-in Mathematical Functions: SCALE
---
--- Tests for scale()
---
-
--- select scale(numeric 'NaN');
--- select scale(NULL::numeric);
--- select scale(1.12);
--- select scale(0);
--- select scale(0.00);
--- select scale(1.12345);
--- select scale(110123.12475871856128);
--- select scale(-1123.12471856128);
--- select scale(-13.000000000000000);
-
---
--- Tests for SUM()
---
-
--- cases that need carry propagation
-SELECT SUM(decimal(9999)) FROM range(1, 100001);
-SELECT SUM(decimal(-9999)) FROM range(1, 100001);
-
-DROP TABLE num_data;
-DROP TABLE num_exp_add;
-DROP TABLE num_exp_sub;
-DROP TABLE num_exp_div;
-DROP TABLE num_exp_mul;
-DROP TABLE num_exp_sqrt;
-DROP TABLE num_exp_ln;
-DROP TABLE num_exp_log10;
-DROP TABLE num_exp_power_10_ln;
-DROP TABLE num_result;
-DROP TABLE num_input_test;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
similarity index 95%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part1.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
index 5d54be9341148..63f80bd2efa73 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
@@ -8,6 +8,11 @@
 -- avoid bit-exact output here because operations may not be bit-exact.
 -- SET extra_float_digits = 0;
 
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 SELECT avg(four) AS avg_1 FROM onek;
 
 SELECT avg(a) AS avg_32 FROM aggtest WHERE a < 100;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql
similarity index 71%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part2.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql
index 47f9d2f373069..a8af1db77563c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part2.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql
@@ -5,6 +5,11 @@
 -- AGGREGATES [Part 2]
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L145-L350
 
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 create temporary view int4_tbl as select * from values
   (0),
   (123456),
@@ -41,42 +46,37 @@ create temporary view int4_tbl as select * from values
 --
 -- test for bitwise integer aggregates
 --
--- CREATE TEMPORARY TABLE bitwise_test(
---   i2 INT2,
---   i4 INT4,
---   i8 INT8,
---   i INTEGER,
---   x INT2,
---   y BIT(4)
--- );
+CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
+  (1, 1, 1, 1L),
+  (3, 3, 3, null),
+  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4);
 
 -- empty case
--- SELECT
---   BIT_AND(i2) AS "?",
---   BIT_OR(i4)  AS "?"
--- FROM bitwise_test;
-
--- COPY bitwise_test FROM STDIN NULL 'null';
--- 1	1	1	1	1	B0101
--- 3	3	3	null	2	B0100
--- 7	7	7	3	4	B1100
--- \.
-
--- SELECT
---   BIT_AND(i2) AS "1",
---   BIT_AND(i4) AS "1",
---   BIT_AND(i8) AS "1",
---   BIT_AND(i)  AS "?",
---   BIT_AND(x)  AS "0",
---   BIT_AND(y)  AS "0100",
---
---   BIT_OR(i2)  AS "7",
---   BIT_OR(i4)  AS "7",
---   BIT_OR(i8)  AS "7",
---   BIT_OR(i)   AS "?",
---   BIT_OR(x)   AS "7",
---   BIT_OR(y)   AS "1101"
--- FROM bitwise_test;
+SELECT BIT_AND(b1) AS n1, BIT_OR(b2)  AS n2 FROM bitwise_test where 1 = 0;
+
+-- null case
+SELECT BIT_AND(b4) AS n1, BIT_OR(b4)  AS n2 FROM bitwise_test where b4 is null;
+
+SELECT
+ BIT_AND(cast(b1 as tinyint)) AS a1,
+ BIT_AND(cast(b2 as smallint)) AS b1,
+ BIT_AND(b3) AS c1,
+ BIT_AND(b4) AS d1,
+ BIT_OR(cast(b1 as tinyint))  AS e7,
+ BIT_OR(cast(b2 as smallint))  AS f7,
+ BIT_OR(b3)  AS g7,
+ BIT_OR(b4)  AS h3
+FROM bitwise_test;
+
+-- group by
+SELECT b1 , bit_and(b2), bit_or(b4) FROM bitwise_test GROUP BY b1;
+
+--having
+SELECT b1, bit_and(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7;
+
+-- window
+SELECT b1, b2, bit_and(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test;
+SELECT b1, b2, bit_or(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test;
 
 --
 -- test boolean aggregates
@@ -114,50 +114,40 @@ SELECT
   NOT (FALSE OR FALSE) AS `t`;
 
 -- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY)
--- CREATE TEMPORARY TABLE bool_test(
---   b1 BOOL,
---   b2 BOOL,
---   b3 BOOL,
---   b4 BOOL);
+CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES
+  (TRUE, null, FALSE, null),
+  (FALSE, TRUE, null, null),
+  (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4);
 
 -- empty case
--- SELECT
---   BOOL_AND(b1)   AS "n",
---   BOOL_OR(b3)    AS "n"
--- FROM bool_test;
-
--- COPY bool_test FROM STDIN NULL 'null';
--- TRUE	null	FALSE	null
--- FALSE	TRUE	null	null
--- null	TRUE	FALSE	null
--- \.
-
--- SELECT
---   BOOL_AND(b1)     AS "f",
---   BOOL_AND(b2)     AS "t",
---   BOOL_AND(b3)     AS "f",
---   BOOL_AND(b4)     AS "n",
---   BOOL_AND(NOT b2) AS "f",
---   BOOL_AND(NOT b3) AS "t"
--- FROM bool_test;
-
--- SELECT
---   EVERY(b1)     AS "f",
---   EVERY(b2)     AS "t",
---   EVERY(b3)     AS "f",
---   EVERY(b4)     AS "n",
---   EVERY(NOT b2) AS "f",
---   EVERY(NOT b3) AS "t"
--- FROM bool_test;
-
--- SELECT
---   BOOL_OR(b1)      AS "t",
---   BOOL_OR(b2)      AS "t",
---   BOOL_OR(b3)      AS "f",
---   BOOL_OR(b4)      AS "n",
---   BOOL_OR(NOT b2)  AS "f",
---   BOOL_OR(NOT b3)  AS "t"
--- FROM bool_test;
+SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0;
+
+SELECT
+  BOOL_AND(b1)     AS f1,
+  BOOL_AND(b2)     AS t2,
+  BOOL_AND(b3)     AS f3,
+  BOOL_AND(b4)     AS n4,
+  BOOL_AND(NOT b2) AS f5,
+  BOOL_AND(NOT b3) AS t6
+FROM bool_test;
+
+SELECT
+  EVERY(b1)     AS f1,
+  EVERY(b2)     AS t2,
+  EVERY(b3)     AS f3,
+  EVERY(b4)     AS n4,
+  EVERY(NOT b2) AS f5,
+  EVERY(NOT b3) AS t6
+FROM bool_test;
+
+SELECT
+  BOOL_OR(b1)      AS t1,
+  BOOL_OR(b2)      AS t2,
+  BOOL_OR(b3)      AS f3,
+  BOOL_OR(b4)      AS n4,
+  BOOL_OR(NOT b2)  AS f5,
+  BOOL_OR(NOT b3)  AS t6
+FROM bool_test;
 
 --
 -- Test cases that should be optimized into indexscans instead of
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
similarity index 94%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part3.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
index 78fdbf6ae6cd2..746b677234832 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part3.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
@@ -5,6 +5,11 @@
 -- AGGREGATES [Part 3]
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L352-L605
 
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 -- [SPARK-28865] Table inheritance
 -- try it on an inheritance tree
 -- create table minmaxtest(f1 int);
@@ -227,16 +232,16 @@ select max(min(unique1)) from tenk1;
 
 -- drop table bytea_test_table;
 
--- [SPARK-27986] Support Aggregate Expressions with filter
 -- FILTER tests
 
--- select min(unique1) filter (where unique1 > 100) from tenk1;
+select min(unique1) filter (where unique1 > 100) from tenk1;
 
--- select sum(1/ten) filter (where ten > 0) from tenk1;
+select sum(1/ten) filter (where ten > 0) from tenk1;
 
 -- select ten, sum(distinct four) filter (where four::text ~ '123') from onek a
 -- group by ten;
 
+-- [SPARK-30276] Support Filter expression allows simultaneous use of DISTINCT
 -- select ten, sum(distinct four) filter (where four > 10) from onek a
 -- group by ten
 -- having exists (select 1 from onek b where sum(distinct a.four) = b.four);
@@ -249,6 +254,7 @@ select max(min(unique1)) from tenk1;
 select (select count(*)
         from (values (1)) t0(inner_c))
 from (values (2),(3)) t1(outer_c); -- inner query is aggregation query
+-- [SPARK-30219] Support Filter expression reference the outer query
 -- select (select count(*) filter (where outer_c <> 0)
 --         from (values (1)) t0(inner_c))
 -- from (values (2),(3)) t1(outer_c); -- outer query is aggregation query
@@ -260,6 +266,7 @@ from (values (2),(3)) t1(outer_c); -- inner query is aggregation query
 --      filter (where o.unique1 < 10))
 -- from tenk1 o;					-- outer query is aggregation query
 
+-- [SPARK-30220] Support Filter expression uses IN/EXISTS predicate sub-queries
 -- subquery in FILTER clause (PostgreSQL extension)
 -- select sum(unique1) FILTER (WHERE
 --  unique1 IN (SELECT unique1 FROM onek where unique1 < 100)) FROM tenk1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part4.sql
similarity index 98%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part4.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part4.sql
index 6fa2306cf1475..0d255bed24e9c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part4.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part4.sql
@@ -5,6 +5,11 @@
 -- AGGREGATES [Part 4]
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L607-L997
 
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 -- [SPARK-27980] Ordered-Set Aggregate Functions
 -- ordered-set aggregates
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/boolean.sql
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/boolean.sql
index 178823bcfe9d6..3a949c834deb5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/boolean.sql
@@ -98,7 +98,6 @@ SELECT boolean('f') <= boolean('t') AS true;
 
 -- explicit casts to/from text
 SELECT boolean(string('TrUe')) AS true, boolean(string('fAlse')) AS `false`;
-
 SELECT boolean(string('    true   ')) AS true,
        boolean(string('     FALSE')) AS `false`;
 SELECT string(boolean(true)) AS true, string(boolean(false)) AS `false`;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/case.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/case.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
new file mode 100644
index 0000000000000..1a454179ef79f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
@@ -0,0 +1,50 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- COMMENTS
+-- https://github.com/postgres/postgres/blob/REL_12_BETA3/src/test/regress/sql/comments.sql
+--
+
+SELECT 'trailing' AS first; -- trailing single line
+SELECT /* embedded single line */ 'embedded' AS `second`;
+SELECT /* both embedded and trailing single line */ 'both' AS third; -- trailing single line
+
+SELECT 'before multi-line' AS fourth;
+--QUERY-DELIMITER-START
+-- [SPARK-28880] ANSI SQL: Bracketed comments
+/* This is an example of SQL which should not execute:
+ * select 'multi-line';
+ */
+SELECT 'after multi-line' AS fifth;
+--QUERY-DELIMITER-END
+
+-- [SPARK-28880] ANSI SQL: Bracketed comments
+--
+-- Nested comments
+--
+--QUERY-DELIMITER-START
+/*
+SELECT 'trailing' as x1; -- inside block comment
+*/
+
+/* This block comment surrounds a query which itself has a block comment...
+SELECT /* embedded single line */ 'embedded' AS x2;
+*/
+
+SELECT -- continued after the following block comments...
+/* Deeply nested comment.
+   This includes a single apostrophe to make sure we aren't decoding this part as a string.
+SELECT 'deep nest' AS n1;
+/* Second level of nesting...
+SELECT 'deeper nest' as n2;
+/* Third level of nesting...
+SELECT 'deepest nest' as n3;
+*/
+Hoo boy. Still two deep...
+*/
+Now just one deep...
+*/
+'deeply nested example' AS sixth;
+--QUERY-DELIMITER-END
+/* and this is the end of the file */
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql
new file mode 100644
index 0000000000000..39e708478e298
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql
@@ -0,0 +1,779 @@
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- CREATE VIEW
+-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/create_view.sql
+
+-- [SPARK-27764] Support geometric types
+-- CREATE VIEW street AS
+--    SELECT r.name, r.thepath, c.cname AS cname
+--    FROM ONLY road r, real_city c
+--    WHERE c.outline ## r.thepath;
+
+-- [SPARK-27764] Support geometric types
+-- CREATE VIEW iexit AS
+--    SELECT ih.name, ih.thepath,
+-- 	interpt_pp(ih.thepath, r.thepath) AS exit
+--    FROM ihighway ih, ramp r
+--    WHERE ih.thepath ## r.thepath;
+
+CREATE TABLE emp (
+  name string,
+  age int,
+  -- [SPARK-27764] Support geometric types
+  -- location point
+  salary int,
+  manager string
+) USING parquet;
+
+CREATE VIEW toyemp AS
+   SELECT name, age, /* location ,*/ 12*salary AS annualsal
+   FROM emp;
+
+-- [SPARK-29659] Support COMMENT ON syntax
+-- Test comments
+-- COMMENT ON VIEW noview IS 'no view';
+-- COMMENT ON VIEW toyemp IS 'is a view';
+-- COMMENT ON VIEW toyemp IS NULL;
+
+DROP VIEW toyemp;
+DROP TABLE emp;
+
+-- These views are left around mainly to exercise special cases in pg_dump.
+
+-- [SPARK-19842] Informational Referential Integrity Constraints Support in Spark
+CREATE TABLE view_base_table (key int /* PRIMARY KEY */, data varchar(20));
+--
+CREATE VIEW key_dependent_view AS
+   SELECT * FROM view_base_table GROUP BY key;
+--
+-- [SPARK-19842] Informational Referential Integrity Constraints Support in Spark
+-- ALTER TABLE view_base_table DROP CONSTRAINT view_base_table_pkey;  -- fails
+
+CREATE VIEW key_dependent_view_no_cols AS
+   SELECT FROM view_base_table GROUP BY key HAVING length(data) > 0;
+
+--
+-- CREATE OR REPLACE VIEW
+--
+
+CREATE TABLE viewtest_tbl (a int, b int) using parquet;
+-- [SPARK-29386] Copy data between a file and a table
+-- COPY viewtest_tbl FROM stdin;
+-- 5	10
+-- 10	15
+-- 15	20
+-- 20	25
+-- \.
+INSERT INTO viewtest_tbl VALUES (5, 10), (10, 15), (15, 20), (20, 25);
+
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT * FROM viewtest_tbl;
+
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT * FROM viewtest_tbl WHERE a > 10;
+
+SELECT * FROM viewtest;
+
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT a, b FROM viewtest_tbl WHERE a > 5 ORDER BY b DESC;
+
+SELECT * FROM viewtest;
+
+-- should fail
+-- [SPARK-29660] Dropping columns and changing column names/types are prohibited in VIEW definition
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT a FROM viewtest_tbl WHERE a <> 20;
+
+-- should fail
+-- [SPARK-29660] Dropping columns and changing column names/types are prohibited in VIEW definition
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT 1, * FROM viewtest_tbl;
+
+-- should fail
+-- [SPARK-29660] Dropping columns and changing column names/types are prohibited in VIEW definition
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT a, decimal(b) FROM viewtest_tbl;
+
+-- should work
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT a, b, 0 AS c FROM viewtest_tbl;
+
+DROP VIEW viewtest;
+DROP TABLE viewtest_tbl;
+
+-- tests for temporary views
+
+-- [SPARK-29661] Support cascaded syntax in CREATE SCHEMA
+-- CREATE SCHEMA temp_view_test
+--     CREATE TABLE base_table (a int, id int) using parquet
+--     CREATE TABLE base_table2 (a int, id int) using parquet;
+CREATE SCHEMA temp_view_test;
+CREATE TABLE temp_view_test.base_table (a int, id int) using parquet;
+CREATE TABLE temp_view_test.base_table2 (a int, id int) using parquet;
+
+-- Replace SET with USE
+-- SET search_path TO temp_view_test, public;
+USE temp_view_test;
+
+-- Since Spark doesn't support CREATE TEMPORARY TABLE, we used CREATE TEMPORARY VIEW instead
+-- CREATE TEMPORARY TABLE temp_table (a int, id int);
+CREATE TEMPORARY VIEW temp_table AS SELECT * FROM VALUES
+  (1, 1) as temp_table(a, id);
+
+-- should be created in temp_view_test schema
+CREATE VIEW v1 AS SELECT * FROM base_table;
+DESC TABLE EXTENDED v1;
+-- should be created in temp object schema
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW v1_temp AS SELECT * FROM temp_table;
+-- should be created in temp object schema
+CREATE TEMP VIEW v2_temp AS SELECT * FROM base_table;
+DESC TABLE EXTENDED v2_temp;
+-- should be created in temp_views schema
+CREATE VIEW temp_view_test.v2 AS SELECT * FROM base_table;
+DESC TABLE EXTENDED temp_view_test.v2;
+-- should fail
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW temp_view_test.v3_temp AS SELECT * FROM temp_table;
+-- should fail
+-- [SPARK-29661] Support cascaded syntax in CREATE SCHEMA
+-- CREATE SCHEMA test_view_schema
+--     CREATE TEMP VIEW testview AS SELECT 1;
+
+-- joins: if any of the join relations are temporary, the view
+-- should also be temporary
+
+-- should be non-temp
+CREATE VIEW v3 AS
+    SELECT t1.a AS t1_a, t2.a AS t2_a
+    FROM base_table t1, base_table2 t2
+    WHERE t1.id = t2.id;
+DESC TABLE EXTENDED v3;
+-- should be temp (one join rel is temp)
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW v4_temp AS
+    SELECT t1.a AS t1_a, t2.a AS t2_a
+    FROM base_table t1, temp_table t2
+    WHERE t1.id = t2.id;
+-- should be temp
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW v5_temp AS
+    SELECT t1.a AS t1_a, t2.a AS t2_a, t3.a AS t3_a
+    FROM base_table t1, base_table2 t2, temp_table t3
+    WHERE t1.id = t2.id and t2.id = t3.id;
+
+-- subqueries
+CREATE VIEW v4 AS SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2);
+DESC TABLE EXTENDED v4;
+CREATE VIEW v5 AS SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2;
+DESC TABLE EXTENDED v5;
+CREATE VIEW v6 AS SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2);
+DESC TABLE EXTENDED v6;
+CREATE VIEW v7 AS SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2);
+DESC TABLE EXTENDED v7;
+CREATE VIEW v8 AS SELECT * FROM base_table WHERE EXISTS (SELECT 1);
+DESC TABLE EXTENDED v8;
+
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW v6_temp AS SELECT * FROM base_table WHERE id IN (SELECT id FROM temp_table);
+CREATE VIEW v7_temp AS SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM temp_table) t2;
+CREATE VIEW v8_temp AS SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM temp_table);
+CREATE VIEW v9_temp AS SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM temp_table);
+
+-- a view should also be temporary if it references a temporary view
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW v10_temp AS SELECT * FROM v7_temp;
+CREATE VIEW v11_temp AS SELECT t1.id, t2.a FROM base_table t1, v10_temp t2;
+CREATE VIEW v12_temp AS SELECT true FROM v11_temp;
+
+-- [SPARK-27764] Support ANSI SQL CREATE SEQUENCE
+-- a view should also be temporary if it references a temporary sequence
+-- CREATE SEQUENCE seq1;
+-- CREATE TEMPORARY SEQUENCE seq1_temp;
+-- CREATE VIEW v9 AS SELECT seq1.is_called FROM seq1;
+-- CREATE VIEW v13_temp AS SELECT seq1_temp.is_called FROM seq1_temp;
+
+-- Skip the tests below because of PostgreSQL specific cases
+-- SELECT relname FROM pg_class
+--     WHERE relname LIKE 'v_'
+--     AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'temp_view_test')
+--     ORDER BY relname;
+-- SELECT relname FROM pg_class
+--     WHERE relname LIKE 'v%'
+--     AND relnamespace IN (SELECT oid FROM pg_namespace WHERE nspname LIKE 'pg_temp%')
+--     ORDER BY relname;
+
+CREATE SCHEMA testviewschm2;
+-- Replace SET with USE
+-- SET search_path TO testviewschm2, public;
+USE testviewschm2;
+
+CREATE TABLE t1 (num int, name string) using parquet;
+CREATE TABLE t2 (num2 int, value string) using parquet;
+-- Since Spark doesn't support CREATE TEMPORARY TABLE, we used CREATE TEMPORARY VIEW instead
+-- CREATE TEMP TABLE tt (num2 int, value string);
+CREATE TEMP VIEW tt AS SELECT * FROM VALUES
+  (1, 'a') AS tt(num2, value);
+
+CREATE VIEW nontemp1 AS SELECT * FROM t1 CROSS JOIN t2;
+DESC TABLE EXTENDED nontemp1;
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW temporal1 AS SELECT * FROM t1 CROSS JOIN tt;
+CREATE VIEW nontemp2 AS SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2;
+DESC TABLE EXTENDED nontemp2;
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW temporal2 AS SELECT * FROM t1 INNER JOIN tt ON t1.num = tt.num2;
+CREATE VIEW nontemp3 AS SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2;
+DESC TABLE EXTENDED nontemp3;
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW temporal3 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2;
+CREATE VIEW nontemp4 AS SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx';
+DESC TABLE EXTENDED nontemp4;
+-- [SPARK-29628] Forcibly create a temporary view in CREATE VIEW if referencing a temporary view
+CREATE VIEW temporal4 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2 AND tt.value = 'xxx';
+CREATE VIEW temporal5 AS SELECT * FROM t1 WHERE num IN (SELECT num FROM t1 WHERE EXISTS (SELECT 1 FROM tt));
+
+-- Skip the tests below because of PostgreSQL specific cases
+-- SELECT relname FROM pg_class
+--     WHERE relname LIKE 'nontemp%'
+--     AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'testviewschm2')
+--     ORDER BY relname;
+-- SELECT relname FROM pg_class
+--     WHERE relname LIKE 'temporal%'
+--     AND relnamespace IN (SELECT oid FROM pg_namespace WHERE nspname LIKE 'pg_temp%')
+--     ORDER BY relname;
+
+CREATE TABLE tbl1 ( a int, b int) using parquet;
+CREATE TABLE tbl2 (c int, d int) using parquet;
+CREATE TABLE tbl3 (e int, f int) using parquet;
+CREATE TABLE tbl4 (g int, h int) using parquet;
+-- Since Spark doesn't support CREATE TEMPORARY TABLE, we used CREATE TABLE instead
+-- CREATE TEMP TABLE tmptbl (i int, j int);
+CREATE TABLE tmptbl (i int, j int) using parquet;
+INSERT INTO tmptbl VALUES (1, 1);
+
+--Should be in testviewschm2
+CREATE   VIEW  pubview AS SELECT * FROM tbl1 WHERE tbl1.a
+BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
+AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f);
+DESC TABLE EXTENDED pubview;
+
+-- Skip the test below because of PostgreSQL specific cases
+-- SELECT count(*) FROM pg_class where relname = 'pubview'
+-- AND relnamespace IN (SELECT OID FROM pg_namespace WHERE nspname = 'testviewschm2');
+
+--Should be in temp object schema
+CREATE   VIEW  mytempview AS SELECT * FROM tbl1 WHERE tbl1.a
+BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
+AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
+AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j);
+DESC TABLE EXTENDED mytempview;
+
+-- Skip the test below because of PostgreSQL specific cases
+-- SELECT count(*) FROM pg_class where relname LIKE 'mytempview'
+-- And relnamespace IN (SELECT OID FROM pg_namespace WHERE nspname LIKE 'pg_temp%');
+
+--
+-- CREATE VIEW and WITH(...) clause
+-- CREATE VIEW mysecview1
+--        AS SELECT * FROM tbl1 WHERE a = 0;
+--
+-- Skip the tests below because Spark doesn't support `WITH options`
+-- CREATE VIEW mysecview2 WITH (security_barrier=true)
+--        AS SELECT * FROM tbl1 WHERE a > 0;
+-- CREATE VIEW mysecview3 WITH (security_barrier=false)
+--        AS SELECT * FROM tbl1 WHERE a < 0;
+-- CREATE VIEW mysecview4 WITH (security_barrier)
+--        AS SELECT * FROM tbl1 WHERE a <> 0;
+-- Spark cannot support options in WITH clause
+-- CREATE VIEW mysecview5 WITH (security_barrier=100)	-- Error
+--        AS SELECT * FROM tbl1 WHERE a > 100;
+-- CREATE VIEW mysecview6 WITH (invalid_option)		-- Error
+--        AS SELECT * FROM tbl1 WHERE a < 100;
+-- Skip the test below because of PostgreSQL specific cases
+-- SELECT relname, relkind, reloptions FROM pg_class
+--        WHERE oid in ('mysecview1'::regclass, 'mysecview2'::regclass,
+--                      'mysecview3'::regclass, 'mysecview4'::regclass)
+--        ORDER BY relname;
+
+-- CREATE OR REPLACE VIEW mysecview1
+--        AS SELECT * FROM tbl1 WHERE a = 256;
+-- CREATE OR REPLACE VIEW mysecview2
+--        AS SELECT * FROM tbl1 WHERE a > 256;
+-- CREATE OR REPLACE VIEW mysecview3 WITH (security_barrier=true)
+--        AS SELECT * FROM tbl1 WHERE a < 256;
+-- CREATE OR REPLACE VIEW mysecview4 WITH (security_barrier=false)
+--        AS SELECT * FROM tbl1 WHERE a <> 256;
+-- Skip the test below because of PostgreSQL specific cases
+-- SELECT relname, relkind, reloptions FROM pg_class
+--        WHERE oid in ('mysecview1'::regclass, 'mysecview2'::regclass,
+--                      'mysecview3'::regclass, 'mysecview4'::regclass)
+--        ORDER BY relname;
+
+-- Check that unknown literals are converted to "text" in CREATE VIEW,
+-- so that we don't end up with unknown-type columns.
+
+-- Skip the tests below because of PostgreSQL specific cases
+-- CREATE VIEW unspecified_types AS
+--   SELECT 42 as i, 42.5 as num, 'foo' as u, 'foo'::unknown as u2, null as n;
+-- \d+ unspecified_types
+-- SELECT * FROM unspecified_types;
+
+-- This test checks that proper typmods are assigned in a multi-row VALUES
+
+CREATE VIEW tt1 AS
+  SELECT * FROM (
+    VALUES
+       ('abc', '0123456789', 42, 'abcd'),
+       ('0123456789', 'abc', 42.12, 'abc')
+  ) vv(a,b,c,d);
+-- Replace the PostgreSQL meta command `\d` with `DESC`
+-- \d+ tt1
+SELECT * FROM tt1;
+SELECT string(a) FROM tt1;
+DROP VIEW tt1;
+
+-- Test view decompilation in the face of relation renaming conflicts
+
+CREATE TABLE tt1 (f1 int, f2 int, f3 string) using parquet;
+CREATE TABLE tx1 (x1 int, x2 int, x3 string) using parquet;
+CREATE TABLE temp_view_test.tt1 (y1 int, f2 int, f3 string) using parquet;
+
+CREATE VIEW aliased_view_1 AS
+  select * from tt1
+    where exists (select 1 from tx1 where tt1.f1 = tx1.x1);
+CREATE VIEW aliased_view_2 AS
+  select * from tt1 a1
+    where exists (select 1 from tx1 where a1.f1 = tx1.x1);
+CREATE VIEW aliased_view_3 AS
+  select * from tt1
+    where exists (select 1 from tx1 a2 where tt1.f1 = a2.x1);
+CREATE VIEW aliased_view_4 AS
+  select * from temp_view_test.tt1
+    where exists (select 1 from tt1 where temp_view_test.tt1.y1 = tt1.f1);
+
+-- Replace the PostgreSQL meta command `\d` with `DESC`
+-- \d+ aliased_view_1
+DESC TABLE aliased_view_1;
+-- \d+ aliased_view_2
+DESC TABLE aliased_view_2;
+-- \d+ aliased_view_3
+DESC TABLE aliased_view_3;
+-- \d+ aliased_view_4
+DESC TABLE aliased_view_4;
+
+ALTER TABLE tx1 RENAME TO a1;
+
+-- Replace the PostgreSQL meta command `\d` with `DESC`
+-- \d+ aliased_view_1
+DESC TABLE aliased_view_1;
+-- \d+ aliased_view_2
+DESC TABLE aliased_view_2;
+-- \d+ aliased_view_3
+DESC TABLE aliased_view_3;
+-- \d+ aliased_view_4
+DESC TABLE aliased_view_4;
+
+ALTER TABLE tt1 RENAME TO a2;
+
+-- Replace the PostgreSQL meta command `\d` with `DESC`
+-- \d+ aliased_view_1
+DESC TABLE aliased_view_1;
+-- \d+ aliased_view_2
+DESC TABLE aliased_view_2;
+-- \d+ aliased_view_3
+DESC TABLE aliased_view_3;
+-- \d+ aliased_view_4
+DESC TABLE aliased_view_4;
+
+ALTER TABLE a1 RENAME TO tt1;
+
+-- Replace the PostgreSQL meta command `\d` with `DESC`
+-- \d+ aliased_view_1
+DESC TABLE aliased_view_1;
+-- \d+ aliased_view_2
+DESC TABLE aliased_view_2;
+-- \d+ aliased_view_3
+DESC TABLE aliased_view_3;
+-- \d+ aliased_view_4
+DESC TABLE aliased_view_4;
+
+ALTER TABLE a2 RENAME TO tx1;
+-- [SPARK-29632] Support ALTER TABLE [relname] SET SCHEMA [dbname]
+-- ALTER TABLE tx1 SET SCHEMA temp_view_test;
+
+-- \d+ aliased_view_1
+-- \d+ aliased_view_2
+-- \d+ aliased_view_3
+-- \d+ aliased_view_4
+
+-- [SPARK-29632] Support ALTER TABLE [relname] SET SCHEMA [dbname]
+-- ALTER TABLE temp_view_test.tt1 RENAME TO tmp1;
+-- ALTER TABLE temp_view_test.tmp1 SET SCHEMA testviewschm2;
+-- ALTER TABLE tmp1 RENAME TO tx1;
+
+-- Replace the PostgreSQL meta command `\d` with `DESC`
+-- \d+ aliased_view_1
+-- \d+ aliased_view_2
+-- \d+ aliased_view_3
+-- \d+ aliased_view_4
+
+-- Test aliasing of joins
+
+create view view_of_joins as
+select * from
+  (select * from (tbl1 cross join tbl2) same) ss,
+  (tbl3 cross join tbl4) same;
+
+-- Replace the PostgreSQL meta command `\d` with `DESC`
+-- \d+ view_of_joins
+
+-- Test view decompilation in the face of column addition/deletion/renaming
+
+create table tt2 (a int, b int, c int) using parquet;
+create table tt3 (ax bigint, b short, c decimal) using parquet;
+create table tt4 (ay int, b int, q int) using parquet;
+
+create view v1 as select * from tt2 natural join tt3;
+create view v1a as select * from (tt2 natural join tt3) j;
+create view v2 as select * from tt2 join tt3 using (b,c) join tt4 using (b);
+create view v2a as select * from (tt2 join tt3 using (b,c) join tt4 using (b)) j;
+create view v3 as select * from tt2 join tt3 using (b,c) full join tt4 using (b);
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('v1', true);
+DESC TABLE v1;
+-- select pg_get_viewdef('v1a', true);
+DESC TABLE v1a;
+-- select pg_get_viewdef('v2', true);
+DESC TABLE v2;
+-- select pg_get_viewdef('v2a', true);
+DESC TABLE v2a;
+-- select pg_get_viewdef('v3', true);
+DESC TABLE v3;
+
+alter table tt2 add column d int;
+alter table tt2 add column e int;
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('v1', true);
+DESC TABLE v1;
+-- select pg_get_viewdef('v1a', true);
+DESC TABLE v1a;
+-- select pg_get_viewdef('v2', true);
+DESC TABLE v2;
+-- select pg_get_viewdef('v2a', true);
+DESC TABLE v2a;
+-- select pg_get_viewdef('v3', true);
+DESC TABLE v3;
+
+-- [SPARK-27764] Make COLUMN optional in ALTER TABLE
+-- [SPARK-27589] Spark file source V2 (For supporting RENAME COLUMN in ALTER TABLE)
+-- alter table tt3 rename c to d;
+drop table tt3;
+create table tt3 (ax bigint, b short, d decimal) using parquet;
+
+-- select pg_get_viewdef('v1', true);
+-- select pg_get_viewdef('v1a', true);
+-- select pg_get_viewdef('v2', true);
+-- select pg_get_viewdef('v2a', true);
+-- select pg_get_viewdef('v3', true);
+
+alter table tt3 add column c int;
+alter table tt3 add column e int;
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('v1', true);
+DESC TABLE v1;
+-- select pg_get_viewdef('v1a', true);
+DESC TABLE v1a;
+-- select pg_get_viewdef('v2', true);
+DESC TABLE v2;
+-- select pg_get_viewdef('v2a', true);
+DESC TABLE v2a;
+-- select pg_get_viewdef('v3', true);
+DESC TABLE v3;
+
+-- [SPARK-27589] Spark file source V2 (For supporting DROP COLUMN in ALTER TABLE)
+-- alter table tt2 drop column d;
+
+-- select pg_get_viewdef('v1', true);
+-- select pg_get_viewdef('v1a', true);
+-- select pg_get_viewdef('v2', true);
+-- select pg_get_viewdef('v2a', true);
+-- select pg_get_viewdef('v3', true);
+
+create table tt5 (a int, b int) using parquet;
+create table tt6 (c int, d int) using parquet;
+create view vv1 as select * from (tt5 cross join tt6) j(aa,bb,cc,dd);
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv1', true);
+DESC TABLE vv1;
+alter table tt5 add column c int;
+-- select pg_get_viewdef('vv1', true);
+DESC TABLE vv1;
+alter table tt5 add column cc int;
+-- select pg_get_viewdef('vv1', true);
+DESC TABLE vv1;
+-- [SPARK-27589] Spark file source V2 (For supporting DROP COLUMN in ALTER TABLE)
+-- alter table tt5 drop column c;
+-- select pg_get_viewdef('vv1', true);
+
+-- Unnamed FULL JOIN USING is lots of fun too
+
+-- [SPARK-27589] Spark file source V2 (For supporting DROP COLUMN in ALTER TABLE)
+create table tt7 (x int, /* xx int, */ y int) using parquet;
+-- alter table tt7 drop column xx;
+create table tt8 (x int, z int) using parquet;
+
+create view vv2 as
+select * from (values(1,2,3,4,5)) v(a,b,c,d,e)
+union all
+select * from tt7 full join tt8 using (x), tt8 tt8x;
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv2', true);
+DESC TABLE vv2;
+
+create view vv3 as
+select * from (values(1,2,3,4,5,6)) v(a,b,c,x,e,f)
+union all
+select * from
+  tt7 full join tt8 using (x),
+  tt7 tt7x full join tt8 tt8x using (x);
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv3', true);
+DESC TABLE vv3;
+
+create view vv4 as
+select * from (values(1,2,3,4,5,6,7)) v(a,b,c,x,e,f,g)
+union all
+select * from
+  tt7 full join tt8 using (x),
+  tt7 tt7x full join tt8 tt8x using (x) full join tt8 tt8y using (x);
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv4', true);
+DESC TABLE vv4;
+
+alter table tt7 add column zz int;
+alter table tt7 add column z int;
+-- [SPARK-27589] Spark file source V2 (For supporting DROP COLUMN in ALTER TABLE)
+-- alter table tt7 drop column zz;
+alter table tt8 add column z2 int;
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv2', true);
+DESC TABLE vv2;
+-- select pg_get_viewdef('vv3', true);
+DESC TABLE vv3;
+-- select pg_get_viewdef('vv4', true);
+DESC TABLE vv4;
+
+-- Implicit coercions in a JOIN USING create issues similar to FULL JOIN
+
+-- [SPARK-27589] Spark file source V2 (For supporting DROP COLUMN in ALTER TABLE)
+create table tt7a (x date, /* xx int, */ y int) using parquet;
+-- alter table tt7a drop column xx;
+create table tt8a (x timestamp, z int) using parquet;
+
+-- To pass the query, added exact column names in the select stmt
+create view vv2a as
+select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e)
+union all
+select * from tt7a left join tt8a using (x), tt8a tt8ax;
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv4', true);
+DESC TABLE vv4;
+-- select pg_get_viewdef('vv2a', true);
+DESC TABLE vv2a;
+
+--
+-- Also check dropping a column that existed when the view was made
+--
+
+create table tt9 (x int, xx int, y int) using parquet;
+create table tt10 (x int, z int) using parquet;
+
+create view vv5 as select x,y,z from tt9 join tt10 using(x);
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv5', true);
+DESC TABLE vv5;
+
+-- [SPARK-27589] Spark file source V2 (For supporting DROP COLUMN in ALTER TABLE)
+-- alter table tt9 drop column xx;
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv5', true);
+DESC TABLE vv5;
+
+--
+-- Another corner case is that we might add a column to a table below a
+-- JOIN USING, and thereby make the USING column name ambiguous
+--
+
+create table tt11 (x int, y int) using parquet;
+create table tt12 (x int, z int) using parquet;
+create table tt13 (z int, q int) using parquet;
+
+create view vv6 as select x,y,z,q from
+  (tt11 join tt12 using(x)) join tt13 using(z);
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv6', true);
+DESC TABLE vv6;
+
+alter table tt11 add column z int;
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('vv6', true);
+DESC TABLE vv6;
+
+--
+-- Check cases involving dropped/altered columns in a function's rowtype result
+--
+
+-- Skip the tests below because Spark does't support PostgreSQL-specific UDFs/transactions
+-- create table tt14t (f1 text, f2 text, f3 text, f4 text);
+-- insert into tt14t values('foo', 'bar', 'baz', '42');
+--
+-- alter table tt14t drop column f2;
+--
+-- create function tt14f() returns setof tt14t as
+-- $$
+-- declare
+--     rec1 record;
+-- begin
+--     for rec1 in select * from tt14t
+--     loop
+--         return next rec1;
+--     end loop;
+-- end;
+-- $$
+-- language plpgsql;
+--
+-- create view tt14v as select t.* from tt14f() t;
+--
+-- select pg_get_viewdef('tt14v', true);
+-- select * from tt14v;
+--
+-- begin;
+--
+-- -- this perhaps should be rejected, but it isn't:
+-- alter table tt14t drop column f3;
+--
+-- -- f3 is still in the view ...
+-- select pg_get_viewdef('tt14v', true);
+-- -- but will fail at execution
+-- select f1, f4 from tt14v;
+-- select * from tt14v;
+--
+-- rollback;
+--
+-- begin;
+--
+-- -- this perhaps should be rejected, but it isn't:
+-- alter table tt14t alter column f4 type integer using f4::integer;
+--
+-- -- f4 is still in the view ...
+-- select pg_get_viewdef('tt14v', true);
+-- -- but will fail at execution
+-- select f1, f3 from tt14v;
+-- select * from tt14v;
+--
+-- rollback;
+
+-- check display of whole-row variables in some corner cases
+
+-- Skip the tests below because we do not support creating types
+-- create type nestedcomposite as (x int8_tbl);
+-- create view tt15v as select row(i)::nestedcomposite from int8_tbl i;
+-- select * from tt15v;
+-- select pg_get_viewdef('tt15v', true);
+-- select row(i.*::int8_tbl)::nestedcomposite from int8_tbl i;
+--
+-- create view tt16v as select * from int8_tbl i, lateral(values(i)) ss;
+-- select * from tt16v;
+-- select pg_get_viewdef('tt16v', true);
+-- select * from int8_tbl i, lateral(values(i.*::int8_tbl)) ss;
+--
+-- create view tt17v as select * from int8_tbl i where i in (values(i));
+-- select * from tt17v;
+-- select pg_get_viewdef('tt17v', true);
+-- select * from int8_tbl i where i.* in (values(i.*::int8_tbl));
+
+-- check unique-ification of overlength names
+
+CREATE TABLE int8_tbl (q1 int, q2 int) USING parquet;
+
+create view tt18v as
+  select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
+  union all
+  select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz;
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('tt18v', true);
+DESC TABLE tt18v;
+-- explain (costs off) select * from tt18v;
+
+-- check display of ScalarArrayOp with a sub-select
+
+-- Skip the tests below because of PostgreSQL specific cases
+-- select 'foo'::text = any(array['abc','def','foo']::text[]);
+-- select 'foo'::text = any((select array['abc','def','foo']::text[]));  -- fail
+-- select 'foo'::text = any((select array['abc','def','foo']::text[])::text[]);
+--
+-- create view tt19v as
+-- select 'foo'::text = any(array['abc','def','foo']::text[]) c1,
+--        'foo'::text = any((select array['abc','def','foo']::text[])::text[]) c2;
+-- select pg_get_viewdef('tt19v', true);
+
+-- check display of assorted RTE_FUNCTION expressions
+
+-- [SPARK-28682] ANSI SQL: Collation Support
+-- create view tt20v as
+-- select * from
+--   coalesce(1,2) as c,
+--   collation for ('x'::text) col,
+--   current_date as d,
+--   localtimestamp(3) as t,
+--   cast(1+2 as int4) as i4,
+--   cast(1+2 as int8) as i8;
+-- select pg_get_viewdef('tt20v', true);
+
+-- corner cases with empty join conditions
+
+create view tt21v as
+select * from tt5 natural inner join tt6;
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('tt21v', true);
+DESC TABLE tt21v;
+
+create view tt22v as
+select * from tt5 natural left join tt6;
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('tt22v', true);
+DESC TABLE tt22v;
+
+-- check handling of views with immediately-renamed columns
+
+create view tt23v (col_a, col_b) as
+select q1 as other_name1, q2 as other_name2 from int8_tbl
+union
+select 42, 43;
+
+-- Replace `pg_get_viewdef` with `DESC`
+-- select pg_get_viewdef('tt23v', true);
+DESC TABLE tt23v;
+-- Skip the test below because of PostgreSQL specific cases
+-- select pg_get_ruledef(oid, true) from pg_rewrite
+--   where ev_class = 'tt23v'::regclass and ev_type = '1';
+
+-- clean up all the random objects we made above
+DROP SCHEMA temp_view_test CASCADE;
+DROP SCHEMA testviewschm2 CASCADE;
+
+DROP VIEW temp_table;
+DROP VIEW tt;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/date.sql
similarity index 89%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/date.sql
index b9a6b998e52fe..0bab2f884d976 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/date.sql
@@ -7,23 +7,25 @@
 
 CREATE TABLE DATE_TBL (f1 date) USING parquet;
 
-INSERT INTO DATE_TBL VALUES ('1957-04-09');
-INSERT INTO DATE_TBL VALUES ('1957-06-13');
-INSERT INTO DATE_TBL VALUES ('1996-02-28');
-INSERT INTO DATE_TBL VALUES ('1996-02-29');
-INSERT INTO DATE_TBL VALUES ('1996-03-01');
-INSERT INTO DATE_TBL VALUES ('1996-03-02');
-INSERT INTO DATE_TBL VALUES ('1997-02-28');
+-- PostgreSQL implicitly casts string literals to data with date types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO DATE_TBL VALUES (date('1957-04-09'));
+INSERT INTO DATE_TBL VALUES (date('1957-06-13'));
+INSERT INTO DATE_TBL VALUES (date('1996-02-28'));
+INSERT INTO DATE_TBL VALUES (date('1996-02-29'));
+INSERT INTO DATE_TBL VALUES (date('1996-03-01'));
+INSERT INTO DATE_TBL VALUES (date('1996-03-02'));
+INSERT INTO DATE_TBL VALUES (date('1997-02-28'));
 -- [SPARK-27923] Skip invalid date: 1997-02-29
--- INSERT INTO DATE_TBL VALUES ('1997-02-29');
-INSERT INTO DATE_TBL VALUES ('1997-03-01');
-INSERT INTO DATE_TBL VALUES ('1997-03-02');
-INSERT INTO DATE_TBL VALUES ('2000-04-01');
-INSERT INTO DATE_TBL VALUES ('2000-04-02');
-INSERT INTO DATE_TBL VALUES ('2000-04-03');
-INSERT INTO DATE_TBL VALUES ('2038-04-08');
-INSERT INTO DATE_TBL VALUES ('2039-04-09');
-INSERT INTO DATE_TBL VALUES ('2040-04-10');
+-- INSERT INTO DATE_TBL VALUES ('1997-02-29'));
+INSERT INTO DATE_TBL VALUES (date('1997-03-01'));
+INSERT INTO DATE_TBL VALUES (date('1997-03-02'));
+INSERT INTO DATE_TBL VALUES (date('2000-04-01'));
+INSERT INTO DATE_TBL VALUES (date('2000-04-02'));
+INSERT INTO DATE_TBL VALUES (date('2000-04-03'));
+INSERT INTO DATE_TBL VALUES (date('2038-04-08'));
+INSERT INTO DATE_TBL VALUES (date('2039-04-09'));
+INSERT INTO DATE_TBL VALUES (date('2040-04-10'));
 
 SELECT f1 AS `Fifteen` FROM DATE_TBL;
 
@@ -208,20 +210,19 @@ SELECT date '5874898-01-01';  -- out of range
 
 SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL;
 
--- [SPARK-28141] Date type can not accept special values
--- SELECT f1 - date 'epoch' AS "Days From Epoch" FROM DATE_TBL;
+SELECT f1 - date 'epoch' AS `Days From Epoch` FROM DATE_TBL;
 
--- SELECT date 'yesterday' - date 'today' AS "One day";
+SELECT date 'yesterday' - date 'today' AS `One day`;
 
--- SELECT date 'today' - date 'tomorrow' AS "One day";
+SELECT date 'today' - date 'tomorrow' AS `One day`;
 
--- SELECT date 'yesterday' - date 'tomorrow' AS "Two days";
+SELECT date 'yesterday' - date 'tomorrow' AS `Two days`;
 
--- SELECT date 'tomorrow' - date 'today' AS "One day";
+SELECT date 'tomorrow' - date 'today' AS `One day`;
 
--- SELECT date 'today' - date 'yesterday' AS "One day";
+SELECT date 'today' - date 'yesterday' AS `One day`;
 
--- SELECT date 'tomorrow' - date 'yesterday' AS "Two days";
+SELECT date 'tomorrow' - date 'yesterday' AS `Two days`;
 
 -- [SPARK-28017] Enhance date EXTRACT
 --
@@ -290,7 +291,7 @@ SELECT DATE_TRUNC('DECADE', DATE '1993-12-25'); -- 1990-01-01
 SELECT DATE_TRUNC('DECADE', DATE '0004-12-25'); -- 0001-01-01 BC
 SELECT DATE_TRUNC('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')); -- 0011-01-01 BC
 
--- [SPARK-28141] Date type can not accept special values
+-- [SPARK-29006] Support special date/timestamp values `infinity`/`-infinity`
 --
 -- test infinity
 --
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/float4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float4.sql
similarity index 96%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/float4.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float4.sql
index 058467695a608..2989569e219ff 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/float4.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float4.sql
@@ -7,11 +7,13 @@
 
 CREATE TABLE FLOAT4_TBL (f1  float) USING parquet;
 
-INSERT INTO FLOAT4_TBL VALUES ('    0.0');
-INSERT INTO FLOAT4_TBL VALUES ('1004.30   ');
-INSERT INTO FLOAT4_TBL VALUES ('     -34.84    ');
-INSERT INTO FLOAT4_TBL VALUES ('1.2345678901234e+20');
-INSERT INTO FLOAT4_TBL VALUES ('1.2345678901234e-20');
+-- PostgreSQL implicitly casts string literals to data with floating point types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO FLOAT4_TBL VALUES (float('    0.0'));
+INSERT INTO FLOAT4_TBL VALUES (float('1004.30   '));
+INSERT INTO FLOAT4_TBL VALUES (float('     -34.84    '));
+INSERT INTO FLOAT4_TBL VALUES (float('1.2345678901234e+20'));
+INSERT INTO FLOAT4_TBL VALUES (float('1.2345678901234e-20'));
 
 -- [SPARK-28024] Incorrect numeric values when out of range
 -- test for over and under flow
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/float8.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float8.sql
similarity index 95%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/float8.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float8.sql
index 957dabdebab4e..932cdb95fcf3a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/float8.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float8.sql
@@ -7,11 +7,13 @@
 
 CREATE TABLE FLOAT8_TBL(f1 double) USING parquet;
 
-INSERT INTO FLOAT8_TBL VALUES ('    0.0   ');
-INSERT INTO FLOAT8_TBL VALUES ('1004.30  ');
-INSERT INTO FLOAT8_TBL VALUES ('   -34.84');
-INSERT INTO FLOAT8_TBL VALUES ('1.2345678901234e+200');
-INSERT INTO FLOAT8_TBL VALUES ('1.2345678901234e-200');
+-- PostgreSQL implicitly casts string literals to data with floating point types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO FLOAT8_TBL VALUES (double('    0.0   '));
+INSERT INTO FLOAT8_TBL VALUES (double('1004.30  '));
+INSERT INTO FLOAT8_TBL VALUES (double('   -34.84'));
+INSERT INTO FLOAT8_TBL VALUES (double('1.2345678901234e+200'));
+INSERT INTO FLOAT8_TBL VALUES (double('1.2345678901234e-200'));
 
 -- [SPARK-28024] Incorrect numeric values when out of range
 -- test for underflow and overflow handling
@@ -227,15 +229,17 @@ SELECT atanh(double('NaN'));
 
 TRUNCATE TABLE FLOAT8_TBL;
 
-INSERT INTO FLOAT8_TBL VALUES ('0.0');
+-- PostgreSQL implicitly casts string literals to data with floating point types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO FLOAT8_TBL VALUES (double('0.0'));
 
-INSERT INTO FLOAT8_TBL VALUES ('-34.84');
+INSERT INTO FLOAT8_TBL VALUES (double('-34.84'));
 
-INSERT INTO FLOAT8_TBL VALUES ('-1004.30');
+INSERT INTO FLOAT8_TBL VALUES (double('-1004.30'));
 
-INSERT INTO FLOAT8_TBL VALUES ('-1.2345678901234e+200');
+INSERT INTO FLOAT8_TBL VALUES (double('-1.2345678901234e+200'));
 
-INSERT INTO FLOAT8_TBL VALUES ('-1.2345678901234e-200');
+INSERT INTO FLOAT8_TBL VALUES (double('-1.2345678901234e-200'));
 
 SELECT '' AS five, * FROM FLOAT8_TBL;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/groupingsets.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/groupingsets.sql
new file mode 100644
index 0000000000000..fc54d179f742c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/groupingsets.sql
@@ -0,0 +1,562 @@
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- GROUPING SETS
+-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/groupingsets.sql
+
+-- test data sources
+
+create temp view gstest1(a,b,v)
+  as values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),
+            (2,3,15),
+            (3,3,16),(3,4,17),
+            (4,1,18),(4,1,19);
+
+-- Since Spark doesn't support CREATE TEMPORARY TABLE, we used CREATE TABLE instead
+-- create temp table gstest2 (a integer, b integer, c integer, d integer,
+--                            e integer, f integer, g integer, h integer);
+create table gstest2 (a integer, b integer, c integer, d integer,
+                      e integer, f integer, g integer, h integer) using parquet;
+-- [SPARK-29386] Copy data between a file and a table
+-- copy gstest2 from stdin;
+-- 1	1	1	1	1	1	1	1
+-- 1	1	1	1	1	1	1	2
+-- 1	1	1	1	1	1	2	2
+-- 1	1	1	1	1	2	2	2
+-- 1	1	1	1	2	2	2	2
+-- 1	1	1	2	2	2	2	2
+-- 1	1	2	2	2	2	2	2
+-- 1	2	2	2	2	2	2	2
+-- 2	2	2	2	2	2	2	2
+-- \.
+insert into gstest2 values
+  (1, 1, 1, 1, 1, 1, 1, 1),
+  (1, 1, 1, 1, 1, 1, 1, 2),
+  (1, 1, 1, 1, 1, 1, 2, 2),
+  (1, 1, 1, 1, 1, 2, 2, 2),
+  (1, 1, 1, 1, 2, 2, 2, 2),
+  (1, 1, 1, 2, 2, 2, 2, 2),
+  (1, 1, 2, 2, 2, 2, 2, 2),
+  (1, 2, 2, 2, 2, 2, 2, 2),
+  (2, 2, 2, 2, 2, 2, 2, 2);
+
+-- Since Spark doesn't support CREATE TEMPORARY TABLE, we used CREATE TABLE instead
+-- create temp table gstest3 (a integer, b integer, c integer, d integer);
+create table gstest3 (a integer, b integer, c integer, d integer) using parquet;
+-- [SPARK-29386] Copy data between a file and a table
+-- copy gstest3 from stdin;
+-- 1	1	1	1
+-- 2	2	2	2
+-- \.
+insert into gstest3 values
+  (1, 1, 1, 1),
+  (2, 2, 2, 2);
+-- [SPARK-19842] Informational Referential Integrity Constraints Support in Spark
+-- alter table gstest3 add primary key (a);
+
+-- Since Spark doesn't support CREATE TEMPORARY TABLE, we used CREATE TABLE instead
+-- create temp table gstest4(id integer, v integer,
+--                           unhashable_col bit(4), unsortable_col xid);
+-- [SPARK-29697] Support bit string types/literals
+create table gstest4(id integer, v integer,
+                     unhashable_col /* bit(4) */ byte, unsortable_col /* xid */ integer) using parquet;
+insert into gstest4
+-- values (1,1,b'0000','1'), (2,2,b'0001','1'),
+--        (3,4,b'0010','2'), (4,8,b'0011','2'),
+--        (5,16,b'0000','2'), (6,32,b'0001','2'),
+--        (7,64,b'0010','1'), (8,128,b'0011','1');
+values (1,1,tinyint('0'),1), (2,2,tinyint('1'),1),
+       (3,4,tinyint('2'),2), (4,8,tinyint('3'),2),
+       (5,16,tinyint('0'),2), (6,32,tinyint('1'),2),
+       (7,64,tinyint('2'),1), (8,128,tinyint('3'),1);
+
+-- Since Spark doesn't support CREATE TEMPORARY TABLE, we used CREATE TABLE instead
+-- create temp table gstest_empty (a integer, b integer, v integer);
+create table gstest_empty (a integer, b integer, v integer) using parquet;
+
+-- Spark doesn't handle UDFs in SQL
+-- create function gstest_data(v integer, out a integer, out b integer)
+--   returns setof record
+--   as $f$
+--     begin
+--       return query select v, i from generate_series(1,3) i;
+--     end;
+--   $f$ language plpgsql;
+
+-- basic functionality
+
+-- Ignore a PostgreSQL-specific option
+-- set enable_hashagg = false;  -- test hashing explicitly later
+
+-- simple rollup with multiple plain aggregates, with and without ordering
+-- (and with ordering differing from grouping)
+
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- select a, b, grouping(a,b), sum(v), count(*), max(v)
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by rollup (a,b);
+-- select a, b, grouping(a,b), sum(v), count(*), max(v)
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by rollup (a,b) order by a,b;
+-- select a, b, grouping(a,b), sum(v), count(*), max(v)
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by rollup (a,b) order by b desc, a;
+-- select a, b, grouping(a,b), sum(v), count(*), max(v)
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by rollup (a,b) order by coalesce(a,0)+coalesce(b,0);
+
+-- [SPARK-28664] ORDER BY in aggregate function
+-- various types of ordered aggs
+-- select a, b, grouping(a,b),
+--        array_agg(v order by v),
+--        string_agg(string(v:text, ':' order by v desc),
+--        percentile_disc(0.5) within group (order by v),
+--        rank(1,2,12) within group (order by a,b,v)
+--   from gstest1 group by rollup (a,b) order by a,b;
+
+-- [SPARK-28664] ORDER BY in aggregate function
+-- test usage of grouped columns in direct args of aggs
+-- select grouping(a), a, array_agg(b),
+--        rank(a) within group (order by b nulls first),
+--        rank(a) within group (order by b nulls last)
+--   from (values (1,1),(1,4),(1,5),(3,1),(3,2)) v(a,b)
+--  group by rollup (a) order by a;
+
+-- nesting with window functions
+-- [SPARK-29699] Different answers in nested aggregates with window functions
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+  from gstest2 group by rollup (a,b) order by rsum, a, b;
+
+-- [SPARK-29700] Support nested grouping sets
+-- nesting with grouping sets
+-- select sum(c) from gstest2
+--   group by grouping sets((), grouping sets((), grouping sets(())))
+--   order by 1 desc;
+-- select sum(c) from gstest2
+--   group by grouping sets((), grouping sets((), grouping sets(((a, b)))))
+--   order by 1 desc;
+-- select sum(c) from gstest2
+--   group by grouping sets(grouping sets(rollup(c), grouping sets(cube(c))))
+--   order by 1 desc;
+-- select sum(c) from gstest2
+--   group by grouping sets(a, grouping sets(a, cube(b)))
+--   order by 1 desc;
+-- select sum(c) from gstest2
+--   group by grouping sets(grouping sets((a, (b))))
+--   order by 1 desc;
+-- select sum(c) from gstest2
+--   group by grouping sets(grouping sets((a, b)))
+--   order by 1 desc;
+-- select sum(c) from gstest2
+--   group by grouping sets(grouping sets(a, grouping sets(a), a))
+--   order by 1 desc;
+-- select sum(c) from gstest2
+--   group by grouping sets(grouping sets(a, grouping sets(a, grouping sets(a), ((a)), a, grouping sets(a), (a)), a))
+--   order by 1 desc;
+-- select sum(c) from gstest2
+--   group by grouping sets((a,(a,b)), grouping sets((a,(a,b)),a))
+--   order by 1 desc;
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+-- [SPARK-29701] Different answers when empty input given in GROUPING SETS
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+
+-- empty input with joins tests some important code paths
+-- [SPARK-29701] Different answers when empty input given in GROUPING SETS
+select t1.a, t2.b, sum(t1.v), count(*) from gstest_empty t1, gstest_empty t2
+ group by grouping sets ((t1.a,t2.b),());
+
+-- simple joins, var resolution, GROUPING on join vars
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- select t1.a, t2.b, grouping(t1.a, t2.b), sum(t1.v), max(t2.a)
+select t1.a, t2.b, grouping(t1.a), grouping(t2.b), sum(t1.v), max(t2.a)
+  from gstest1 t1, gstest2 t2
+ group by grouping sets ((t1.a, t2.b), ());
+
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- select t1.a, t2.b, grouping(t1.a, t2.b), sum(t1.v), max(t2.a)
+select t1.a, t2.b, grouping(t1.a), grouping(t2.b), sum(t1.v), max(t2.a)
+  from gstest1 t1 join gstest2 t2 on (t1.a=t2.a)
+ group by grouping sets ((t1.a, t2.b), ());
+
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- select a, b, grouping(a, b), sum(t1.v), max(t2.c)
+select a, b, grouping(a), grouping(b), sum(t1.v), max(t2.c)
+  from gstest1 t1 join gstest2 t2 using (a,b)
+ group by grouping sets ((a, b), ());
+
+-- check that functionally dependent cols are not nulled
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- [SPARK-19842] Informational Referential Integrity Constraints Support in Spark
+-- [SPARK-29702] Resolve group-by columns with functional dependencies
+-- select a, d, grouping(a,b,c)
+--   from gstest3
+--  group by grouping sets ((a,b), (a,c));
+
+-- check that distinct grouping columns are kept separate
+-- even if they are equal()
+-- explain (costs off)
+-- select g as alias1, g as alias2
+--   from generate_series(1,3) g
+--  group by alias1, rollup(alias2);
+
+-- [SPARK-27767] Built-in function: generate_series
+-- [SPARK-29704] Support the combinations of grouping operations
+-- select g as alias1, g as alias2
+--   from generate_series(1,3) g
+--  group by alias1, rollup(alias2);
+
+-- check that pulled-up subquery outputs still go to null when appropriate
+select four, x
+  from (select four, ten, 'foo' as x from tenk1) as t
+  group by grouping sets (four, x)
+  having x = 'foo';
+
+select four, x || 'x'
+  from (select four, ten, 'foo' as x from tenk1) as t
+  group by grouping sets (four, x)
+  order by four;
+
+select (x+y)*1, sum(z)
+ from (select 1 as x, 2 as y, 3 as z) s
+ group by grouping sets (x+y, x);
+
+CREATE TEMP VIEW int8_tbl AS SELECT * FROM VALUES
+  (123L, 456L),
+  (123L, 4567890123456789L),
+  (4567890123456789L, 123L),
+  (4567890123456789L, 4567890123456789L),
+  (4567890123456789L, -4567890123456789L) as int8_tbl(q1, q2);
+
+select x, not x as not_x, q2 from
+  (select *, q1 = 1 as x from int8_tbl i1) as t
+  group by grouping sets(x, q2)
+  order by x, q2;
+
+DROP VIEW int8_tbl;
+
+-- simple rescan tests
+
+-- Spark doesn't handle UDFs in SQL
+-- select a, b, sum(v.x)
+--   from (values (1),(2)) v(x), gstest_data(v.x)
+--  group by rollup (a,b);
+
+-- Spark doesn't handle UDFs in SQL
+-- select *
+--   from (values (1),(2)) v(x),
+--        lateral (select a, b, sum(v.x) from gstest_data(v.x) group by rollup (a,b)) s;
+
+-- min max optimization should still work with GROUP BY ()
+-- explain (costs off)
+--   select min(unique1) from tenk1 GROUP BY ();
+
+-- Views with GROUPING SET queries
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- [SPARK-29705] Support more expressive forms in GroupingSets/Cube/Rollup
+-- CREATE VIEW gstest_view AS select a, b, grouping(a,b), sum(c), count(*), max(c)
+--   from gstest2 group by rollup ((a,b,c),(c,d));
+
+-- select pg_get_viewdef('gstest_view'::regclass, true);
+
+-- Nested queries with 3 or more levels of nesting
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- [SPARK-29703] grouping() can only be used with GroupingSets/Cube/Rollup
+-- select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f);
+-- select(select (select grouping(e,f) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f);
+-- select(select (select grouping(c) from (values (1)) v2(c) GROUP BY c) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f);
+
+-- Combinations of operations
+-- [SPARK-29704] Support the combinations of grouping operations
+-- select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d);
+-- select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a);
+
+-- Spark doesn't handle UDFs in SQL
+-- Tests for chained aggregates
+-- select a, b, grouping(a,b), sum(v), count(*), max(v)
+--   from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+-- select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1));
+-- select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1);
+-- select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+--   from gstest2 group by cube (a,b) order by rsum, a, b;
+-- select a, b, sum(c) from (values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),(2,3,15),(3,3,16),(3,4,17),(4,1,18),(4,1,19)) v(a,b,c) group by rollup (a,b);
+-- select a, b, sum(v.x)
+--   from (values (1),(2)) v(x), gstest_data(v.x)
+--  group by cube (a,b) order by a,b;
+
+-- Test reordering of grouping sets
+-- explain (costs off)
+-- select * from gstest1 group by grouping sets((a,b,v),(v)) order by v,b,a;
+
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- [SPARK-29703] grouping() can only be used with GroupingSets/Cube/Rollup
+-- Agg level check. This query should error out.
+-- select (select grouping(a), grouping(b) from gstest2) from gstest2 group by a,b;
+
+--Nested queries
+-- [SPARK-29700] Support nested grouping sets
+-- select a, b, sum(c), count(*) from gstest2 group by grouping sets (rollup(a,b),a);
+
+-- HAVING queries
+select ten, sum(distinct four) from onek a
+group by grouping sets((ten,four),(ten))
+having exists (select 1 from onek b where sum(distinct a.four) = b.four);
+
+-- Tests around pushdown of HAVING clauses, partially testing against previous bugs
+select a,count(*) from gstest2 group by rollup(a) order by a;
+select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a;
+-- explain (costs off)
+--   select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a;
+
+-- [SPARK-29706] Support an empty grouping expression
+-- select v.c, (select count(*) from gstest2 group by () having v.c)
+--   from (values (false),(true)) v(c) order by v.c;
+-- explain (costs off)
+--   select v.c, (select count(*) from gstest2 group by () having v.c)
+--     from (values (false),(true)) v(c) order by v.c;
+
+-- HAVING with GROUPING queries
+select ten, grouping(ten) from onek
+group by grouping sets(ten) having grouping(ten) >= 0
+order by 2,1;
+select ten, grouping(ten) from onek
+group by grouping sets(ten, four) having grouping(ten) > 0
+order by 2,1;
+select ten, grouping(ten) from onek
+group by rollup(ten) having grouping(ten) > 0
+order by 2,1;
+select ten, grouping(ten) from onek
+group by cube(ten) having grouping(ten) > 0
+order by 2,1;
+-- [SPARK-29703] grouping() can only be used with GroupingSets/Cube/Rollup
+-- select ten, grouping(ten) from onek
+-- group by (ten) having grouping(ten) >= 0
+-- order by 2,1;
+
+-- FILTER queries
+-- [SPARK-30276] Support Filter expression allows simultaneous use of DISTINCT
+-- select ten, sum(distinct four) filter (where string(four) like '123') from onek a
+-- group by rollup(ten);
+
+-- More rescan tests
+-- [SPARK-27877] ANSI SQL: LATERAL derived table(T491)
+-- select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+-- [SPARK-27878] Support ARRAY(sub-SELECT) expressions
+-- select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- [SPARK-29704] Support the combinations of grouping operations
+-- Grouping on text columns
+-- select sum(ten) from onek group by two, rollup(string(four)) order by 1;
+-- select sum(ten) from onek group by rollup(string(four)), two order by 1;
+
+-- hashing support
+
+-- Ignore a PostgreSQL-specific option
+-- set enable_hashagg = true;
+
+-- failure cases
+
+-- Since this test is implementation specific for plans, it passes in Spark
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col);
+-- [SPARK-27878] Support ARRAY(sub-SELECT) expressions
+-- select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id));
+
+-- simple cases
+
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- select a, b, grouping(a,b), sum(v), count(*), max(v)
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by grouping sets ((a),(b)) order by 3,4,1,2 /* 3,1,2 */;
+-- explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+--   from gstest1 group by grouping sets ((a),(b)) order by 3,1,2;
+
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- select a, b, grouping(a,b), sum(v), count(*), max(v)
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by cube(a,b) order by 3,4,1,2 /* 3,1,2 */;
+-- explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v)
+--   from gstest1 group by cube(a,b) order by 3,1,2;
+
+-- shouldn't try and hash
+-- explain (costs off)
+--   select a, b, grouping(a,b), array_agg(v order by v)
+--     from gstest1 group by cube(a,b);
+
+-- unsortable cases
+select unsortable_col, count(*)
+  from gstest4 group by grouping sets ((unsortable_col),(unsortable_col))
+  order by string(unsortable_col);
+
+-- mixed hashable/sortable cases
+-- [SPARK-29698] Support grouping function with multiple arguments
+select unhashable_col, unsortable_col,
+       -- grouping(unhashable_col, unsortable_col),
+       grouping(unhashable_col), grouping(unsortable_col),
+       count(*), sum(v)
+  from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 4, 6 /* 3, 5 */;
+-- explain (costs off)
+--   select unhashable_col, unsortable_col,
+--          grouping(unhashable_col, unsortable_col),
+--          count(*), sum(v)
+--     from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+--    order by 3,5;
+
+-- [SPARK-29698] Support grouping function with multiple arguments
+select unhashable_col, unsortable_col,
+       -- grouping(unhashable_col, unsortable_col),
+       grouping(unhashable_col), grouping(unsortable_col),
+       count(*), sum(v)
+  from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3, 4, 6 /* 3,5 */;
+-- explain (costs off)
+--   select unhashable_col, unsortable_col,
+--          grouping(unhashable_col, unsortable_col),
+--          count(*), sum(v)
+--     from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+--    order by 3,5;
+
+-- empty input: first is 0 rows, second 1, third 3 etc.
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+-- explain (costs off)
+--   select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a);
+-- [SPARK-29701] Different answers when empty input given in GROUPING SETS
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),());
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+-- explain (costs off)
+--   select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),());
+-- [SPARK-29701] Different answers when empty input given in GROUPING SETS
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+-- explain (costs off)
+--   select sum(v), count(*) from gstest_empty group by grouping sets ((),(),());
+
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- [SPARK-19842] Informational Referential Integrity Constraints Support in Spark
+-- [SPARK-29702] Resolve group-by columns with functional dependencies
+-- check that functionally dependent cols are not nulled
+-- select a, d, grouping(a,b,c)
+--   from gstest3
+--  group by grouping sets ((a,b), (a,c));
+-- explain (costs off)
+--   select a, d, grouping(a,b,c)
+--     from gstest3
+--    group by grouping sets ((a,b), (a,c));
+
+-- simple rescan tests
+
+-- select a, b, sum(v.x)
+--   from (values (1),(2)) v(x), gstest_data(v.x)
+--  group by grouping sets (a,b)
+--  order by 1, 2, 3;
+-- explain (costs off)
+--   select a, b, sum(v.x)
+--     from (values (1),(2)) v(x), gstest_data(v.x)
+--    group by grouping sets (a,b)
+--    order by 3, 1, 2;
+-- select *
+--   from (values (1),(2)) v(x),
+--        lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+-- explain (costs off)
+--   select *
+--     from (values (1),(2)) v(x),
+--          lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s;
+
+-- Tests for chained aggregates
+-- [SPARK-29698] Support grouping function with multiple arguments
+-- select a, b, grouping(a,b), sum(v), count(*), max(v)
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,4,7 /* 3,6 */;
+-- explain (costs off)
+--   select a, b, grouping(a,b), sum(v), count(*), max(v)
+--     from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6;
+-- [SPARK-29699] Different answers in nested aggregates with window functions
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+  from gstest2 group by cube (a,b) order by rsum, a, b;
+-- explain (costs off)
+--   select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+--     from gstest2 group by cube (a,b) order by rsum, a, b;
+-- select a, b, sum(v.x)
+--   from (values (1),(2)) v(x), gstest_data(v.x)
+--  group by cube (a,b) order by a,b;
+-- explain (costs off)
+--   select a, b, sum(v.x)
+--     from (values (1),(2)) v(x), gstest_data(v.x)
+--    group by cube (a,b) order by a,b;
+
+-- Verify that we correctly handle the child node returning a
+-- non-minimal slot, which happens if the input is pre-sorted,
+-- e.g. due to an index scan.
+-- BEGIN;
+-- Ignore a PostgreSQL-specific option
+-- SET LOCAL enable_hashagg = false;
+-- EXPLAIN (COSTS OFF) SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,()) ORDER BY a, b;
+SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,()) ORDER BY a, b;
+-- Ignore a PostgreSQL-specific option
+-- SET LOCAL enable_seqscan = false;
+-- EXPLAIN (COSTS OFF) SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,()) ORDER BY a, b;
+-- SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,()) ORDER BY a, b;
+-- COMMIT;
+
+-- More rescan tests
+-- [SPARK-27877] ANSI SQL: LATERAL derived table(T491)
+-- select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten;
+-- [SPARK-27878] Support ARRAY(sub-SELECT) expressions
+-- select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- Rescan logic changes when there are no empty grouping sets, so test
+-- that too:
+-- [SPARK-27877] ANSI SQL: LATERAL derived table(T491)
+-- select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten;
+-- [SPARK-27878] Support ARRAY(sub-SELECT) expressions
+-- select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a);
+
+-- test the knapsack
+
+-- Ignore a PostgreSQL-specific option
+-- set enable_indexscan = false;
+-- set work_mem = '64kB';
+-- explain (costs off)
+--   select unique1,
+--          count(two), count(four), count(ten),
+--          count(hundred), count(thousand), count(twothousand),
+--          count(*)
+--     from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+-- explain (costs off)
+--   select unique1,
+--          count(two), count(four), count(ten),
+--          count(hundred), count(thousand), count(twothousand),
+--          count(*)
+--     from tenk1 group by grouping sets (unique1,hundred,ten,four,two);
+
+-- Ignore a PostgreSQL-specific option
+-- set work_mem = '384kB';
+-- explain (costs off)
+--   select unique1,
+--          count(two), count(four), count(ten),
+--          count(hundred), count(thousand), count(twothousand),
+--          count(*)
+--     from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two);
+
+-- check collation-sensitive matching between grouping expressions
+-- (similar to a check for aggregates, but there are additional code
+-- paths for GROUPING, so check again here)
+
+-- [SPARK-28382] Array Functions: unnest
+select v||'a', case grouping(v||'a') when 1 then 1 else 0 end, count(*)
+  -- from unnest(array[1,1], array['a','b']) u(i,v)
+  from values (1, 'a'), (1, 'b') u(i,v)
+ group by rollup(i, v||'a') order by 1,3;
+select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*)
+  -- from unnest(array[1,1], array['a','b']) u(i,v)
+  from values (1, 'a'), (1, 'b') u(i,v)
+ group by rollup(i, v||'a') order by 1,3;
+
+-- end
+
+DROP VIEW gstest1;
+DROP TABLE gstest2;
+DROP TABLE gstest3;
+DROP TABLE gstest4;
+DROP TABLE gstest_empty;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/insert.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/insert.sql
new file mode 100644
index 0000000000000..6783dda9ff015
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/insert.sql
@@ -0,0 +1,653 @@
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- INSERT
+-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/insert.sql
+
+--
+-- insert with DEFAULT in the target_list
+--
+-- [SPARK-19842] Informational Referential Integrity Constraints Support in Spark
+-- [SPARK-29119] DEFAULT option is not supported in Spark
+create table inserttest (col1 int, col2 int /* NOT NULL */, col3 string /* default 'testing' */) using parquet;
+-- [SPARK-29119] DEFAULT option is not supported in Spark
+-- [SPARK-20845] Support specification of column names in INSERT INTO
+-- Skip a test below because the PK constraint is violated and the query fails in PostgreSQL
+-- insert into inserttest (col1, col2, col3) values (DEFAULT, DEFAULT, DEFAULT);
+-- insert into inserttest (col2, col3) values (3, DEFAULT);
+insert into inserttest  values (NULL, 3, 'testing');
+-- insert into inserttest (col1, col2, col3) values (DEFAULT, 5, DEFAULT);
+insert into inserttest values (NULL, 5, 'testing');
+-- insert into inserttest values (DEFAULT, 5, 'test');
+insert into inserttest values (NULL, 5, 'test');
+-- insert into inserttest values (DEFAULT, 7);
+insert into inserttest values (NULL, 7, 'testing');
+
+select * from inserttest;
+
+--
+-- insert with similar expression / target_list values (all fail)
+--
+-- [SPARK-20845] Support specification of column names in INSERT INTO
+-- [SPARK-29119] DEFAULT option is not supported in Spark
+-- insert into inserttest (col1, col2, col3) values (DEFAULT, DEFAULT);
+-- insert into inserttest (col1, col2, col3) values (1, 2);
+-- insert into inserttest (col1) values (1, 2);
+-- insert into inserttest (col1) values (DEFAULT, DEFAULT);
+
+-- select * from inserttest;
+
+--
+-- VALUES test
+--
+-- [SPARK-29119] DEFAULT option is not supported in Spark
+-- [SPARK-29715] Support SELECT statements in VALUES of INSERT INTO
+-- insert into inserttest values(10, 20, '40'), (-1, 2, DEFAULT),
+--     ((select 2), (select i from (values(3)) as foo (i)), 'values are fun!');
+
+-- select * from inserttest;
+
+--
+-- TOASTed value test
+--
+insert into inserttest values(30, 50, repeat('x', 10000));
+
+select col1, col2, char_length(col3) from inserttest;
+
+drop table inserttest;
+
+--
+-- check indirection (field/array assignment), cf bug #14265
+--
+-- these tests are aware that transformInsertStmt has 3 separate code paths
+--
+
+-- [SPARK-29716] Support [CREATE|DROP] TYPE
+-- create type insert_test_type as (if1 int, if2 array<string>);
+
+-- create table inserttest (f1 int, f2 int[],
+--                          f3 insert_test_type, f4 insert_test_type[]);
+--
+-- insert into inserttest (f2[1], f2[2]) values (1,2);
+-- insert into inserttest (f2[1], f2[2]) values (3,4), (5,6);
+-- insert into inserttest (f2[1], f2[2]) select 7,8;
+-- insert into inserttest (f2[1], f2[2]) values (1,default);  -- not supported
+--
+-- insert into inserttest (f3.if1, f3.if2) values (1,array['foo']);
+-- insert into inserttest (f3.if1, f3.if2) values (1,'{foo}'), (2,'{bar}');
+-- insert into inserttest (f3.if1, f3.if2) select 3, '{baz,quux}';
+-- insert into inserttest (f3.if1, f3.if2) values (1,default);  -- not supported
+--
+-- insert into inserttest (f3.if2[1], f3.if2[2]) values ('foo', 'bar');
+-- insert into inserttest (f3.if2[1], f3.if2[2]) values ('foo', 'bar'), ('baz', 'quux');
+-- insert into inserttest (f3.if2[1], f3.if2[2]) select 'bear', 'beer';
+--
+-- insert into inserttest (f4[1].if2[1], f4[1].if2[2]) values ('foo', 'bar');
+-- insert into inserttest (f4[1].if2[1], f4[1].if2[2]) values ('foo', 'bar'), ('baz', 'quux');
+-- insert into inserttest (f4[1].if2[1], f4[1].if2[2]) select 'bear', 'beer';
+--
+-- select * from inserttest;
+
+-- also check reverse-listing
+-- create table inserttest2 (f1 bigint, f2 string);
+-- [SPARK-29717] Support [CREATE|DROP] RULE - define a new plan rewrite rule
+-- create rule irule1 as on insert to inserttest2 do also
+--   insert into inserttest (f3.if2[1], f3.if2[2])
+--   values (new.f1,new.f2);
+-- create rule irule2 as on insert to inserttest2 do also
+--   insert into inserttest (f4[1].if1, f4[1].if2[2])
+--   values (1,'fool'),(new.f1,new.f2);
+-- create rule irule3 as on insert to inserttest2 do also
+--   insert into inserttest (f4[1].if1, f4[1].if2[2])
+--   select new.f1, new.f2;
+-- \d+ inserttest2
+
+-- drop table inserttest2;
+-- drop table inserttest;
+-- [SPARK-29716] Support [CREATE|DROP] TYPE
+-- drop type insert_test_type;
+
+-- direct partition inserts should check partition bound constraint
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table range_parted (
+-- 	a string,
+-- 	b int
+-- ) partition by range (a, (b+0));
+
+-- no partitions, so fail
+-- insert into range_parted values ('a', 11);
+
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table part1 partition of range_parted for values from ('a', 1) to ('a', 10);
+-- create table part2 partition of range_parted for values from ('a', 10) to ('a', 20);
+-- create table part3 partition of range_parted for values from ('b', 1) to ('b', 10);
+-- create table part4 partition of range_parted for values from ('b', 10) to ('b', 20);
+
+-- fail
+-- insert into part1 values ('a', 11);
+-- insert into part1 values ('b', 1);
+-- ok
+-- insert into part1 values ('a', 1);
+-- fail
+-- insert into part4 values ('b', 21);
+-- insert into part4 values ('a', 10);
+-- ok
+-- insert into part4 values ('b', 10);
+
+-- fail (partition key a has a NOT NULL constraint)
+-- insert into part1 values (null);
+-- fail (expression key (b+0) cannot be null either)
+-- insert into part1 values (1);
+
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table list_parted (
+-- 	a text,
+-- 	b int
+-- ) partition by list (lower(a));
+-- create table part_aa_bb partition of list_parted FOR VALUES IN ('aa', 'bb');
+-- create table part_cc_dd partition of list_parted FOR VALUES IN ('cc', 'dd');
+-- create table part_null partition of list_parted FOR VALUES IN (null);
+
+-- fail
+-- insert into part_aa_bb values ('cc', 1);
+-- insert into part_aa_bb values ('AAa', 1);
+-- insert into part_aa_bb values (null);
+-- ok
+-- insert into part_cc_dd values ('cC', 1);
+-- insert into part_null values (null, 0);
+
+-- check in case of multi-level partitioned table
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table part_ee_ff partition of list_parted for values in ('ee', 'ff') partition by range (b);
+-- create table part_ee_ff1 partition of part_ee_ff for values from (1) to (10);
+-- create table part_ee_ff2 partition of part_ee_ff for values from (10) to (20);
+
+-- test default partition
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table part_default partition of list_parted default;
+-- Negative test: a row, which would fit in other partition, does not fit
+-- default partition, even when inserted directly
+-- insert into part_default values ('aa', 2);
+-- insert into part_default values (null, 2);
+-- ok
+-- insert into part_default values ('Zz', 2);
+-- test if default partition works as expected for multi-level partitioned
+-- table as well as when default partition itself is further partitioned
+-- drop table part_default;
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table part_xx_yy partition of list_parted for values in ('xx', 'yy') partition by list (a);
+-- create table part_xx_yy_p1 partition of part_xx_yy for values in ('xx');
+-- create table part_xx_yy_defpart partition of part_xx_yy default;
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table part_default partition of list_parted default partition by range(b);
+-- create table part_default_p1 partition of part_default for values from (20) to (30);
+-- create table part_default_p2 partition of part_default for values from (30) to (40);
+
+-- fail
+-- insert into part_ee_ff1 values ('EE', 11);
+-- insert into part_default_p2 values ('gg', 43);
+-- fail (even the parent's, ie, part_ee_ff's partition constraint applies)
+-- insert into part_ee_ff1 values ('cc', 1);
+-- insert into part_default values ('gg', 43);
+-- ok
+-- insert into part_ee_ff1 values ('ff', 1);
+-- insert into part_ee_ff2 values ('ff', 11);
+-- insert into part_default_p1 values ('cd', 25);
+-- insert into part_default_p2 values ('de', 35);
+-- insert into list_parted values ('ab', 21);
+-- insert into list_parted values ('xx', 1);
+-- insert into list_parted values ('yy', 2);
+-- select tableoid::regclass, * from list_parted;
+
+-- Check tuple routing for partitioned tables
+
+-- fail
+-- insert into range_parted values ('a', 0);
+-- ok
+-- insert into range_parted values ('a', 1);
+-- insert into range_parted values ('a', 10);
+-- fail
+-- insert into range_parted values ('a', 20);
+-- ok
+-- insert into range_parted values ('b', 1);
+-- insert into range_parted values ('b', 10);
+-- fail (partition key (b+0) is null)
+-- insert into range_parted values ('a');
+
+-- Check default partition
+-- create table part_def partition of range_parted default;
+-- fail
+-- insert into part_def values ('b', 10);
+-- ok
+-- insert into part_def values ('c', 10);
+-- insert into range_parted values (null, null);
+-- insert into range_parted values ('a', null);
+-- insert into range_parted values (null, 19);
+-- insert into range_parted values ('b', 20);
+
+-- select tableoid::regclass, * from range_parted;
+-- ok
+-- insert into list_parted values (null, 1);
+-- insert into list_parted (a) values ('aA');
+-- fail (partition of part_ee_ff not found in both cases)
+-- insert into list_parted values ('EE', 0);
+-- insert into part_ee_ff values ('EE', 0);
+-- ok
+-- insert into list_parted values ('EE', 1);
+-- insert into part_ee_ff values ('EE', 10);
+-- select tableoid::regclass, * from list_parted;
+
+-- some more tests to exercise tuple-routing with multi-level partitioning
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table part_gg partition of list_parted for values in ('gg') partition by range (b);
+-- create table part_gg1 partition of part_gg for values from (minvalue) to (1);
+-- create table part_gg2 partition of part_gg for values from (1) to (10) partition by range (b);
+-- create table part_gg2_1 partition of part_gg2 for values from (1) to (5);
+-- create table part_gg2_2 partition of part_gg2 for values from (5) to (10);
+
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table part_ee_ff3 partition of part_ee_ff for values from (20) to (30) partition by range (b);
+-- create table part_ee_ff3_1 partition of part_ee_ff3 for values from (20) to (25);
+-- create table part_ee_ff3_2 partition of part_ee_ff3 for values from (25) to (30);
+
+-- truncate list_parted;
+-- insert into list_parted values ('aa'), ('cc');
+-- [SPARK-27767] Built-in function: generate_series
+-- insert into list_parted select 'Ff', s.a from generate_series(1, 29) s(a);
+-- insert into list_parted select 'gg', s.a from generate_series(1, 9) s(a);
+-- insert into list_parted (b) values (1);
+-- select tableoid::regclass::text, a, min(b) as min_b, max(b) as max_b from list_parted group by 1, 2 order by 1;
+
+-- direct partition inserts should check hash partition bound constraint
+
+-- Use hand-rolled hash functions and operator classes to get predictable
+-- result on different matchines.  The hash function for int4 simply returns
+-- the sum of the values passed to it and the one for text returns the length
+-- of the non-empty string value passed to it or 0.
+
+-- create or replace function part_hashint4_noop(value int4, seed int8)
+-- returns int8 as $$
+-- select value + seed;
+-- $$ language sql immutable;
+
+-- create operator class part_test_int4_ops
+-- for type int4
+-- using hash as
+-- operator 1 =,
+-- function 2 part_hashint4_noop(int4, int8);
+
+-- create or replace function part_hashtext_length(value text, seed int8)
+-- RETURNS int8 AS $$
+-- select length(coalesce(value, ''))::int8
+-- $$ language sql immutable;
+
+-- create operator class part_test_text_ops
+-- for type text
+-- using hash as
+-- operator 1 =,
+-- function 2 part_hashtext_length(text, int8);
+
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table hash_parted (
+-- 	a int
+-- ) partition by hash (a part_test_int4_ops);
+-- create table hpart0 partition of hash_parted for values with (modulus 4, remainder 0);
+-- create table hpart1 partition of hash_parted for values with (modulus 4, remainder 1);
+-- create table hpart2 partition of hash_parted for values with (modulus 4, remainder 2);
+-- create table hpart3 partition of hash_parted for values with (modulus 4, remainder 3);
+
+-- [SPARK-27767] Built-in function: generate_series
+-- insert into hash_parted values(generate_series(1,10));
+
+-- direct insert of values divisible by 4 - ok;
+-- insert into hpart0 values(12),(16);
+-- fail;
+-- insert into hpart0 values(11);
+-- 11 % 4 -> 3 remainder i.e. valid data for hpart3 partition
+-- insert into hpart3 values(11);
+
+-- view data
+-- select tableoid::regclass as part, a, a%4 as "remainder = a % 4"
+-- from hash_parted order by part;
+
+-- test \d+ output on a table which has both partitioned and unpartitioned
+-- partitions
+-- \d+ list_parted
+
+-- cleanup
+-- drop table range_parted, list_parted;
+-- drop table hash_parted;
+
+-- test that a default partition added as the first partition accepts any value
+-- including null
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table list_parted (a int) partition by list (a);
+-- create table part_default partition of list_parted default;
+-- \d+ part_default
+-- insert into part_default values (null);
+-- insert into part_default values (1);
+-- insert into part_default values (-1);
+-- select tableoid::regclass, a from list_parted;
+-- cleanup
+-- drop table list_parted;
+
+-- more tests for certain multi-level partitioning scenarios
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table mlparted (a int, b int) partition by range (a, b);
+-- create table mlparted1 (b int not null, a int not null) partition by range ((b+0));
+-- create table mlparted11 (like mlparted1);
+-- alter table mlparted11 drop a;
+-- alter table mlparted11 add a int;
+-- alter table mlparted11 drop a;
+-- alter table mlparted11 add a int not null;
+-- attnum for key attribute 'a' is different in mlparted, mlparted1, and mlparted11
+-- select attrelid::regclass, attname, attnum
+-- from pg_attribute
+-- where attname = 'a'
+--  and (attrelid = 'mlparted'::regclass
+--    or attrelid = 'mlparted1'::regclass
+--    or attrelid = 'mlparted11'::regclass)
+-- order by attrelid::regclass::text;
+
+-- alter table mlparted1 attach partition mlparted11 for values from (2) to (5);
+-- alter table mlparted attach partition mlparted1 for values from (1, 2) to (1, 10);
+
+-- check that "(1, 2)" is correctly routed to mlparted11.
+-- insert into mlparted values (1, 2);
+-- select tableoid::regclass, * from mlparted;
+
+-- check that proper message is shown after failure to route through mlparted1
+-- insert into mlparted (a, b) values (1, 5);
+
+-- truncate mlparted;
+-- alter table mlparted add constraint check_b check (b = 3);
+
+-- have a BR trigger modify the row such that the check_b is violated
+-- create function mlparted11_trig_fn()
+-- returns trigger AS
+-- $$
+-- begin
+--   NEW.b := 4;
+--   return NEW;
+-- end;
+-- $$
+-- language plpgsql;
+-- create trigger mlparted11_trig before insert ON mlparted11
+--   for each row execute procedure mlparted11_trig_fn();
+
+-- check that the correct row is shown when constraint check_b fails after
+-- "(1, 2)" is routed to mlparted11 (actually "(1, 4)" would be shown due
+-- to the BR trigger mlparted11_trig_fn)
+-- insert into mlparted values (1, 2);
+-- drop trigger mlparted11_trig on mlparted11;
+-- drop function mlparted11_trig_fn();
+
+-- check that inserting into an internal partition successfully results in
+-- checking its partition constraint before inserting into the leaf partition
+-- selected by tuple-routing
+-- insert into mlparted1 (a, b) values (2, 3);
+
+-- check routing error through a list partitioned table when the key is null
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table lparted_nonullpart (a int, b char) partition by list (b);
+-- create table lparted_nonullpart_a partition of lparted_nonullpart for values in ('a');
+-- insert into lparted_nonullpart values (1);
+-- drop table lparted_nonullpart;
+
+-- check that RETURNING works correctly with tuple-routing
+-- alter table mlparted drop constraint check_b;
+-- create table mlparted12 partition of mlparted1 for values from (5) to (10);
+-- create table mlparted2 (b int not null, a int not null);
+-- alter table mlparted attach partition mlparted2 for values from (1, 10) to (1, 20);
+-- create table mlparted3 partition of mlparted for values from (1, 20) to (1, 30);
+-- create table mlparted4 (like mlparted);
+-- alter table mlparted4 drop a;
+-- alter table mlparted4 add a int not null;
+-- alter table mlparted attach partition mlparted4 for values from (1, 30) to (1, 40);
+-- [SPARK-27767] Built-in function: generate_series
+-- with ins (a, b, c) as
+--   (insert into mlparted (b, a) select s.a, 1 from generate_series(2, 39) s(a) returning tableoid::regclass, *)
+--   select a, b, min(c), max(c) from ins group by a, b order by 1;
+
+-- alter table mlparted add c text;
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table mlparted5 (c text, a int not null, b int not null) partition by list (c);
+-- create table mlparted5a (a int not null, c text, b int not null);
+-- alter table mlparted5 attach partition mlparted5a for values in ('a');
+-- alter table mlparted attach partition mlparted5 for values from (1, 40) to (1, 50);
+-- alter table mlparted add constraint check_b check (a = 1 and b < 45);
+-- insert into mlparted values (1, 45, 'a');
+-- create function mlparted5abrtrig_func() returns trigger as $$ begin new.c = 'b'; return new; end; $$ language plpgsql;
+-- create trigger mlparted5abrtrig before insert on mlparted5a for each row execute procedure mlparted5abrtrig_func();
+-- insert into mlparted5 (a, b, c) values (1, 40, 'a');
+-- drop table mlparted5;
+-- alter table mlparted drop constraint check_b;
+
+-- Check multi-level default partition
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table mlparted_def partition of mlparted default partition by range(a);
+-- create table mlparted_def1 partition of mlparted_def for values from (40) to (50);
+-- create table mlparted_def2 partition of mlparted_def for values from (50) to (60);
+-- insert into mlparted values (40, 100);
+-- insert into mlparted_def1 values (42, 100);
+-- insert into mlparted_def2 values (54, 50);
+-- fail
+-- insert into mlparted values (70, 100);
+-- insert into mlparted_def1 values (52, 50);
+-- insert into mlparted_def2 values (34, 50);
+-- ok
+-- create table mlparted_defd partition of mlparted_def default;
+-- insert into mlparted values (70, 100);
+
+-- select tableoid::regclass, * from mlparted_def;
+
+-- Check multi-level tuple routing with attributes dropped from the
+-- top-most parent.  First remove the last attribute.
+-- alter table mlparted add d int, add e int;
+-- alter table mlparted drop e;
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table mlparted5 partition of mlparted
+--   for values from (1, 40) to (1, 50) partition by range (c);
+-- create table mlparted5_ab partition of mlparted5
+--   for values from ('a') to ('c') partition by list (c);
+-- This partitioned table should remain with no partitions.
+-- create table mlparted5_cd partition of mlparted5
+--   for values from ('c') to ('e') partition by list (c);
+-- create table mlparted5_a partition of mlparted5_ab for values in ('a');
+-- create table mlparted5_b (d int, b int, c text, a int);
+-- alter table mlparted5_ab attach partition mlparted5_b for values in ('b');
+-- truncate mlparted;
+-- insert into mlparted values (1, 2, 'a', 1);
+-- insert into mlparted values (1, 40, 'a', 1);  -- goes to mlparted5_a
+-- insert into mlparted values (1, 45, 'b', 1);  -- goes to mlparted5_b
+-- insert into mlparted values (1, 45, 'c', 1);  -- goes to mlparted5_cd, fails
+-- insert into mlparted values (1, 45, 'f', 1);  -- goes to mlparted5, fails
+-- select tableoid::regclass, * from mlparted order by a, b, c, d;
+-- alter table mlparted drop d;
+-- truncate mlparted;
+-- Remove the before last attribute.
+-- alter table mlparted add e int, add d int;
+-- alter table mlparted drop e;
+-- insert into mlparted values (1, 2, 'a', 1);
+-- insert into mlparted values (1, 40, 'a', 1);  -- goes to mlparted5_a
+-- insert into mlparted values (1, 45, 'b', 1);  -- goes to mlparted5_b
+-- insert into mlparted values (1, 45, 'c', 1);  -- goes to mlparted5_cd, fails
+-- insert into mlparted values (1, 45, 'f', 1);  -- goes to mlparted5, fails
+-- select tableoid::regclass, * from mlparted order by a, b, c, d;
+-- alter table mlparted drop d;
+-- drop table mlparted5;
+
+-- check that message shown after failure to find a partition shows the
+-- appropriate key description (or none) in various situations
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table key_desc (a int, b int) partition by list ((a+0));
+-- create table key_desc_1 partition of key_desc for values in (1) partition by range (b);
+
+-- create user regress_insert_other_user;
+-- grant select (a) on key_desc_1 to regress_insert_other_user;
+-- grant insert on key_desc to regress_insert_other_user;
+
+-- set role regress_insert_other_user;
+-- no key description is shown
+-- insert into key_desc values (1, 1);
+
+-- reset role;
+-- grant select (b) on key_desc_1 to regress_insert_other_user;
+-- set role regress_insert_other_user;
+-- key description (b)=(1) is now shown
+-- insert into key_desc values (1, 1);
+
+-- key description is not shown if key contains expression
+-- insert into key_desc values (2, 1);
+-- reset role;
+-- revoke all on key_desc from regress_insert_other_user;
+-- revoke all on key_desc_1 from regress_insert_other_user;
+-- drop role regress_insert_other_user;
+-- drop table key_desc, key_desc_1;
+
+-- test minvalue/maxvalue restrictions
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table mcrparted (a int, b int, c int) partition by range (a, abs(b), c);
+-- create table mcrparted0 partition of mcrparted for values from (minvalue, 0, 0) to (1, maxvalue, maxvalue);
+-- create table mcrparted2 partition of mcrparted for values from (10, 6, minvalue) to (10, maxvalue, minvalue);
+-- create table mcrparted4 partition of mcrparted for values from (21, minvalue, 0) to (30, 20, minvalue);
+
+-- check multi-column range partitioning expression enforces the same
+-- constraint as what tuple-routing would determine it to be
+-- create table mcrparted0 partition of mcrparted for values from (minvalue, minvalue, minvalue) to (1, maxvalue, maxvalue);
+-- create table mcrparted1 partition of mcrparted for values from (2, 1, minvalue) to (10, 5, 10);
+-- create table mcrparted2 partition of mcrparted for values from (10, 6, minvalue) to (10, maxvalue, maxvalue);
+-- create table mcrparted3 partition of mcrparted for values from (11, 1, 1) to (20, 10, 10);
+-- create table mcrparted4 partition of mcrparted for values from (21, minvalue, minvalue) to (30, 20, maxvalue);
+-- create table mcrparted5 partition of mcrparted for values from (30, 21, 20) to (maxvalue, maxvalue, maxvalue);
+
+-- null not allowed in range partition
+-- insert into mcrparted values (null, null, null);
+
+-- routed to mcrparted0
+-- insert into mcrparted values (0, 1, 1);
+-- insert into mcrparted0 values (0, 1, 1);
+
+-- routed to mcparted1
+-- insert into mcrparted values (9, 1000, 1);
+-- insert into mcrparted1 values (9, 1000, 1);
+-- insert into mcrparted values (10, 5, -1);
+-- insert into mcrparted1 values (10, 5, -1);
+-- insert into mcrparted values (2, 1, 0);
+-- insert into mcrparted1 values (2, 1, 0);
+
+-- routed to mcparted2
+-- insert into mcrparted values (10, 6, 1000);
+-- insert into mcrparted2 values (10, 6, 1000);
+-- insert into mcrparted values (10, 1000, 1000);
+-- insert into mcrparted2 values (10, 1000, 1000);
+
+-- no partition exists, nor does mcrparted3 accept it
+-- insert into mcrparted values (11, 1, -1);
+-- insert into mcrparted3 values (11, 1, -1);
+
+-- routed to mcrparted5
+-- insert into mcrparted values (30, 21, 20);
+-- insert into mcrparted5 values (30, 21, 20);
+-- insert into mcrparted4 values (30, 21, 20);	-- error
+
+-- check rows
+-- select tableoid::regclass::text, * from mcrparted order by 1;
+
+-- cleanup
+-- drop table mcrparted;
+
+-- check that a BR constraint can't make partition contain violating rows
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table brtrigpartcon (a int, b text) partition by list (a);
+-- create table brtrigpartcon1 partition of brtrigpartcon for values in (1);
+-- create or replace function brtrigpartcon1trigf() returns trigger as $$begin new.a := 2; return new; end$$ language plpgsql;
+-- create trigger brtrigpartcon1trig before insert on brtrigpartcon1 for each row execute procedure brtrigpartcon1trigf();
+-- insert into brtrigpartcon values (1, 'hi there');
+-- insert into brtrigpartcon1 values (1, 'hi there');
+
+-- check that the message shows the appropriate column description in a
+-- situation where the partitioned table is not the primary ModifyTable node
+-- create table inserttest3 (f1 text default 'foo', f2 text default 'bar', f3 int);
+-- create role regress_coldesc_role;
+-- grant insert on inserttest3 to regress_coldesc_role;
+-- grant insert on brtrigpartcon to regress_coldesc_role;
+-- revoke select on brtrigpartcon from regress_coldesc_role;
+-- set role regress_coldesc_role;
+-- with result as (insert into brtrigpartcon values (1, 'hi there') returning 1)
+--   insert into inserttest3 (f3) select * from result;
+-- reset role;
+
+-- cleanup
+-- revoke all on inserttest3 from regress_coldesc_role;
+-- revoke all on brtrigpartcon from regress_coldesc_role;
+-- drop role regress_coldesc_role;
+-- drop table inserttest3;
+-- drop table brtrigpartcon;
+-- drop function brtrigpartcon1trigf();
+
+-- check that "do nothing" BR triggers work with tuple-routing (this checks
+-- that estate->es_result_relation_info is appropriately set/reset for each
+-- routed tuple)
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table donothingbrtrig_test (a int, b text) partition by list (a);
+-- create table donothingbrtrig_test1 (b text, a int);
+-- create table donothingbrtrig_test2 (c text, b text, a int);
+-- alter table donothingbrtrig_test2 drop column c;
+-- create or replace function donothingbrtrig_func() returns trigger as $$begin raise notice 'b: %', new.b; return NULL; end$$ language plpgsql;
+-- create trigger donothingbrtrig1 before insert on donothingbrtrig_test1 for each row execute procedure donothingbrtrig_func();
+-- create trigger donothingbrtrig2 before insert on donothingbrtrig_test2 for each row execute procedure donothingbrtrig_func();
+-- alter table donothingbrtrig_test attach partition donothingbrtrig_test1 for values in (1);
+-- alter table donothingbrtrig_test attach partition donothingbrtrig_test2 for values in (2);
+-- insert into donothingbrtrig_test values (1, 'foo'), (2, 'bar');
+-- [SPARK-29386] Copy data between a file and a table
+-- copy donothingbrtrig_test from stdout;
+-- 1	baz
+-- 2	qux
+-- \.
+-- select tableoid::regclass, * from donothingbrtrig_test;
+
+-- cleanup
+-- drop table donothingbrtrig_test;
+-- drop function donothingbrtrig_func();
+
+-- check multi-column range partitioning with minvalue/maxvalue constraints
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table mcrparted (a text, b int) partition by range(a, b);
+-- create table mcrparted1_lt_b partition of mcrparted for values from (minvalue, minvalue) to ('b', minvalue);
+-- create table mcrparted2_b partition of mcrparted for values from ('b', minvalue) to ('c', minvalue);
+-- create table mcrparted3_c_to_common partition of mcrparted for values from ('c', minvalue) to ('common', minvalue);
+-- create table mcrparted4_common_lt_0 partition of mcrparted for values from ('common', minvalue) to ('common', 0);
+-- create table mcrparted5_common_0_to_10 partition of mcrparted for values from ('common', 0) to ('common', 10);
+-- create table mcrparted6_common_ge_10 partition of mcrparted for values from ('common', 10) to ('common', maxvalue);
+-- create table mcrparted7_gt_common_lt_d partition of mcrparted for values from ('common', maxvalue) to ('d', minvalue);
+-- create table mcrparted8_ge_d partition of mcrparted for values from ('d', minvalue) to (maxvalue, maxvalue);
+
+-- \d+ mcrparted
+-- \d+ mcrparted1_lt_b
+-- \d+ mcrparted2_b
+-- \d+ mcrparted3_c_to_common
+-- \d+ mcrparted4_common_lt_0
+-- \d+ mcrparted5_common_0_to_10
+-- \d+ mcrparted6_common_ge_10
+-- \d+ mcrparted7_gt_common_lt_d
+-- \d+ mcrparted8_ge_d
+
+-- insert into mcrparted values ('aaa', 0), ('b', 0), ('bz', 10), ('c', -10),
+--     ('comm', -10), ('common', -10), ('common', 0), ('common', 10),
+--     ('commons', 0), ('d', -10), ('e', 0);
+-- select tableoid::regclass, * from mcrparted order by a, b;
+-- drop table mcrparted;
+
+-- check that wholerow vars in the RETURNING list work with partitioned tables
+-- [SPARK-29718] Support PARTITION BY [RANGE|LIST|HASH] and PARTITION OF in CREATE TABLE
+-- create table returningwrtest (a int) partition by list (a);
+-- create table returningwrtest1 partition of returningwrtest for values in (1);
+-- insert into returningwrtest values (1) returning returningwrtest;
+
+-- check also that the wholerow vars in RETURNING list are converted as needed
+-- alter table returningwrtest add b text;
+-- create table returningwrtest2 (b text, c int, a int);
+-- alter table returningwrtest2 drop c;
+-- alter table returningwrtest attach partition returningwrtest2 for values in (2);
+-- insert into returningwrtest values (2, 'foo') returning returningwrtest;
+-- drop table returningwrtest;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int2.sql
similarity index 87%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/int2.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int2.sql
index f64ec5d75afcf..07f5976ca6d2f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int2.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int2.sql
@@ -8,19 +8,23 @@
 CREATE TABLE INT2_TBL(f1 smallint) USING parquet;
 
 -- [SPARK-28023] Trim the string when cast string type to other types
-INSERT INTO INT2_TBL VALUES (trim('0   '));
+-- PostgreSQL implicitly casts string literals to data with integral types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO INT2_TBL VALUES (smallint(trim('0   ')));
 
-INSERT INTO INT2_TBL VALUES (trim('  1234 '));
+INSERT INTO INT2_TBL VALUES (smallint(trim('  1234 ')));
 
-INSERT INTO INT2_TBL VALUES (trim('    -1234'));
+INSERT INTO INT2_TBL VALUES (smallint(trim('    -1234')));
 
 -- [SPARK-27923] Invalid input syntax for type short throws exception at PostgreSQL
 -- INSERT INTO INT2_TBL VALUES ('34.5');
 
 -- largest and smallest values
-INSERT INTO INT2_TBL VALUES ('32767');
+-- PostgreSQL implicitly casts string literals to data with integral types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO INT2_TBL VALUES (smallint('32767'));
 
-INSERT INTO INT2_TBL VALUES ('-32767');
+INSERT INTO INT2_TBL VALUES (smallint('-32767'));
 
 -- bad input values -- should give errors
 -- INSERT INTO INT2_TBL VALUES ('100000');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
similarity index 90%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/int4.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
index 1012db72e1873..3a409eea34837 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int4.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
@@ -9,19 +9,23 @@
 CREATE TABLE INT4_TBL(f1 int) USING parquet;
 
 -- [SPARK-28023] Trim the string when cast string type to other types
-INSERT INTO INT4_TBL VALUES (trim('   0  '));
+-- PostgreSQL implicitly casts string literals to data with integral types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO INT4_TBL VALUES (int(trim('   0  ')));
 
-INSERT INTO INT4_TBL VALUES (trim('123456     '));
+INSERT INTO INT4_TBL VALUES (int(trim('123456     ')));
 
-INSERT INTO INT4_TBL VALUES (trim('    -123456'));
+INSERT INTO INT4_TBL VALUES (int(trim('    -123456')));
 
 -- [SPARK-27923] Invalid input syntax for integer: "34.5" at PostgreSQL
 -- INSERT INTO INT4_TBL(f1) VALUES ('34.5');
 
 -- largest and smallest values
-INSERT INTO INT4_TBL VALUES ('2147483647');
+-- PostgreSQL implicitly casts string literals to data with integral types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO INT4_TBL VALUES (int('2147483647'));
 
-INSERT INTO INT4_TBL VALUES ('-2147483647');
+INSERT INTO INT4_TBL VALUES (int('-2147483647'));
 
 -- [SPARK-27923] Spark SQL insert these bad inputs to NULL
 -- bad input values
@@ -33,11 +37,6 @@ INSERT INTO INT4_TBL VALUES ('-2147483647');
 -- INSERT INTO INT4_TBL(f1) VALUES ('123       5');
 -- INSERT INTO INT4_TBL(f1) VALUES ('');
 
--- We cannot test this when failOnOverFlow=true here
--- because exception happens in the executors and the
--- output stacktrace cannot have an exact match
-set spark.sql.arithmeticOperations.failOnOverFlow=false;
-
 SELECT '' AS five, * FROM INT4_TBL;
 
 SELECT '' AS four, i.* FROM INT4_TBL i WHERE i.f1 <> smallint('0');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int8.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
similarity index 94%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/int8.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
index d29bf3bfad4ca..5fea758e73084 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int8.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
@@ -8,11 +8,13 @@
 --
 CREATE TABLE INT8_TBL(q1 bigint, q2 bigint) USING parquet;
 
-INSERT INTO INT8_TBL VALUES(trim('  123   '),trim('  456'));
-INSERT INTO INT8_TBL VALUES(trim('123   '),'4567890123456789');
-INSERT INTO INT8_TBL VALUES('4567890123456789','123');
-INSERT INTO INT8_TBL VALUES(+4567890123456789,'4567890123456789');
-INSERT INTO INT8_TBL VALUES('+4567890123456789','-4567890123456789');
+-- PostgreSQL implicitly casts string literals to data with integral types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO INT8_TBL VALUES(bigint(trim('  123   ')),bigint(trim('  456')));
+INSERT INTO INT8_TBL VALUES(bigint(trim('123   ')),bigint('4567890123456789'));
+INSERT INTO INT8_TBL VALUES(bigint('4567890123456789'),bigint('123'));
+INSERT INTO INT8_TBL VALUES(+4567890123456789,bigint('4567890123456789'));
+INSERT INTO INT8_TBL VALUES(bigint('+4567890123456789'),bigint('-4567890123456789'));
 
 -- [SPARK-27923] Spark SQL insert there bad inputs to NULL
 -- bad inputs
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/interval.sql
new file mode 100644
index 0000000000000..eb8cc34419519
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/interval.sql
@@ -0,0 +1,344 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- INTERVAL
+-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/interval.sql
+
+-- [SPARK-28259] Date/Time Output Styles and Date Order Conventions
+-- SET DATESTYLE = 'ISO';
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle to postgres;
+
+-- check acceptance of "time zone style"
+-- [SPARK-29369] Accept strings without `interval` prefix in casting to intervals
+-- [SPARK-29370] Interval strings without explicit unit markings
+-- SELECT INTERVAL '01:00' AS `One hour`;
+-- SELECT INTERVAL '+02:00' AS `Two hours`;
+-- SELECT INTERVAL '-08:00' AS `Eight hours`;
+-- SELECT INTERVAL '-1 +02:03' AS `22 hours ago...`;
+-- SELECT INTERVAL '-1 days +02:03' AS `22 hours ago...`;
+-- [SPARK-29371] Support interval field values with fractional parts
+-- SELECT INTERVAL '1.5 weeks' AS `Ten days twelve hours`;
+-- SELECT INTERVAL '1.5 months' AS `One month 15 days`;
+-- SELECT INTERVAL '10 years -11 month -12 days +13:14' AS `9 years...`;
+
+-- [SPARK-29382] Support writing `INTERVAL` type to datasource table
+-- CREATE TABLE INTERVAL_TBL (f1 interval);
+
+-- [SPARK-29383] Support the optional prefix `@` in interval strings
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('@ 1 minute');
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('@ 5 hour');
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('@ 10 day');
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('@ 34 year');
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('@ 3 months');
+-- [SPARK-29384] Support `ago` in interval strings
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('@ 14 seconds ago');
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('1 day 2 hours 3 minutes 4 seconds');
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('6 years');
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('5 months');
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('5 months 12 hours');
+
+-- badly formatted interval
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('badly formatted interval');
+-- INSERT INTO INTERVAL_TBL (f1) VALUES ('@ 30 eons ago');
+
+-- test interval operators
+
+-- SELECT '' AS ten, * FROM INTERVAL_TBL;
+-- [SPARK-29385] Make `INTERVAL` values comparable
+-- SELECT '' AS nine, * FROM INTERVAL_TBL
+--   WHERE INTERVAL_TBL.f1 <> interval '@ 10 days';
+
+-- SELECT '' AS three, * FROM INTERVAL_TBL
+--   WHERE INTERVAL_TBL.f1 <= interval '@ 5 hours';
+
+-- SELECT '' AS three, * FROM INTERVAL_TBL
+--   WHERE INTERVAL_TBL.f1 < interval '@ 1 day';
+
+-- SELECT '' AS one, * FROM INTERVAL_TBL
+--   WHERE INTERVAL_TBL.f1 = interval '@ 34 years';
+
+-- SELECT '' AS five, * FROM INTERVAL_TBL
+--   WHERE INTERVAL_TBL.f1 >= interval '@ 1 month';
+
+-- SELECT '' AS nine, * FROM INTERVAL_TBL
+--   WHERE INTERVAL_TBL.f1 > interval '@ 3 seconds ago';
+
+-- SELECT '' AS fortyfive, r1.*, r2.*
+--   FROM INTERVAL_TBL r1, INTERVAL_TBL r2
+--   WHERE r1.f1 > r2.f1
+--   ORDER BY r1.f1, r2.f1;
+
+-- Test intervals that are large enough to overflow 64 bits in comparisons
+-- [SPARK-29369] Accept strings without `interval` prefix in casting to intervals
+-- CREATE TEMP TABLE INTERVAL_TBL_OF (f1 interval);
+-- INSERT INTO INTERVAL_TBL_OF (f1) VALUES
+--  ('2147483647 days 2147483647 months'),
+--  ('2147483647 days -2147483648 months'),
+--  ('1 year'),
+--  ('-2147483648 days 2147483647 months'),
+--  ('-2147483648 days -2147483648 months');
+-- these should fail as out-of-range
+-- INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('2147483648 days');
+-- INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('-2147483649 days');
+-- INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('2147483647 years');
+-- INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('-2147483648 years');
+
+-- SELECT r1.*, r2.*
+--   FROM INTERVAL_TBL_OF r1, INTERVAL_TBL_OF r2
+--   WHERE r1.f1 > r2.f1
+--   ORDER BY r1.f1, r2.f1;
+
+-- CREATE INDEX ON INTERVAL_TBL_OF USING btree (f1);
+-- SET enable_seqscan TO false;
+-- EXPLAIN (COSTS OFF)
+-- SELECT f1 FROM INTERVAL_TBL_OF r1 ORDER BY f1;
+-- SELECT f1 FROM INTERVAL_TBL_OF r1 ORDER BY f1;
+-- RESET enable_seqscan;
+
+-- DROP TABLE INTERVAL_TBL_OF;
+
+-- Test multiplication and division with intervals.
+-- Floating point arithmetic rounding errors can lead to unexpected results,
+-- though the code attempts to do the right thing and round up to days and
+-- minutes to avoid results such as '3 days 24:00 hours' or '14:20:60'.
+-- Note that it is expected for some day components to be greater than 29 and
+-- some time components be greater than 23:59:59 due to how intervals are
+-- stored internally.
+-- [SPARK-29386] Copy data between a file and a table
+-- CREATE TABLE INTERVAL_MULDIV_TBL (span interval);
+-- COPY INTERVAL_MULDIV_TBL FROM STDIN;
+-- 41 mon 12 days 360:00
+-- -41 mon -12 days +360:00
+-- -12 days
+-- 9 mon -27 days 12:34:56
+-- -3 years 482 days 76:54:32.189
+-- 4 mon
+-- 14 mon
+-- 999 mon 999 days
+-- \.
+-- [SPARK-29387] Support `*` and `\` operators for intervals
+-- SELECT span * 0.3 AS product
+-- FROM INTERVAL_MULDIV_TBL;
+
+-- SELECT span * 8.2 AS product
+-- FROM INTERVAL_MULDIV_TBL;
+
+-- SELECT span / 10 AS quotient
+-- FROM INTERVAL_MULDIV_TBL;
+
+-- SELECT span / 100 AS quotient
+-- FROM INTERVAL_MULDIV_TBL;
+
+-- DROP TABLE INTERVAL_MULDIV_TBL;
+-- [SPARK-28259] Date/Time Output Styles and Date Order Conventions
+-- SET DATESTYLE = 'postgres';
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle to postgres_verbose;
+
+-- SELECT '' AS ten, * FROM INTERVAL_TBL;
+
+-- test avg(interval), which is somewhat fragile since people have been
+-- known to change the allowed input syntax for type interval without
+-- updating pg_aggregate.agginitval
+
+-- select avg(f1) from interval_tbl;
+
+-- test long interval input
+-- [SPARK-29388] Construct intervals from the `millenniums`, `centuries` or `decades` units
+-- select '4 millenniums 5 centuries 4 decades 1 year 4 months 4 days 17 minutes 31 seconds'::interval;
+
+-- test long interval output
+-- Note: the actual maximum length of the interval output is longer,
+-- but we need the test to work for both integer and floating-point
+-- timestamps.
+-- [SPARK-29389] Support synonyms for interval units
+-- select '100000000y 10mon -1000000000d -100000h -10min -10.000001s ago'::interval;
+
+-- test justify_hours() and justify_days()
+-- [SPARK-29390] Add the justify_days(), justify_hours() and justify_interval() functions
+-- SELECT justify_hours(interval '6 months 3 days 52 hours 3 minutes 2 seconds') as `6 mons 5 days 4 hours 3 mins 2 seconds`;
+-- SELECT justify_days(interval '6 months 36 days 5 hours 4 minutes 3 seconds') as `7 mons 6 days 5 hours 4 mins 3 seconds`;
+
+-- test justify_interval()
+
+-- SELECT justify_interval(interval '1 month -1 hour') as `1 month -1 hour`;
+
+-- test fractional second input, and detection of duplicate units
+-- [SPARK-28259] Date/Time Output Styles and Date Order Conventions
+-- SET DATESTYLE = 'ISO';
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle TO postgres;
+-- [SPARK-29369] Accept strings without `interval` prefix in casting to intervals
+-- SELECT '1 millisecond'::interval, '1 microsecond'::interval,
+--       '500 seconds 99 milliseconds 51 microseconds'::interval;
+-- SELECT '3 days 5 milliseconds'::interval;
+
+-- SELECT '1 second 2 seconds'::interval;              -- error
+-- SELECT '10 milliseconds 20 milliseconds'::interval; -- error
+-- SELECT '5.5 seconds 3 milliseconds'::interval;      -- error
+-- SELECT '1:20:05 5 microseconds'::interval;          -- error
+-- SELECT '1 day 1 day'::interval;                     -- error
+-- [SPARK-29391] Default year-month units
+-- SELECT interval '1-2';  -- SQL year-month literal
+SELECT interval '999' second;  -- oversize leading field is ok
+SELECT interval '999' minute;
+SELECT interval '999' hour;
+SELECT interval '999' day;
+SELECT interval '999' month;
+
+-- test SQL-spec syntaxes for restricted field sets
+SELECT interval '1' year;
+SELECT interval '2' month;
+SELECT interval '3' day;
+SELECT interval '4' hour;
+SELECT interval '5' minute;
+SELECT interval '6' second;
+-- [SPARK-29391] Default year-month units
+-- SELECT interval '1' year to month;
+SELECT interval '1-2' year to month;
+-- [SPARK-29391] Default year-month units
+-- SELECT interval '1 2' day to hour;
+SELECT interval '1 2:03' day to hour;
+SELECT interval '1 2:03:04' day to hour;
+-- SELECT interval '1 2' day to minute;
+SELECT interval '1 2:03' day to minute;
+SELECT interval '1 2:03:04' day to minute;
+-- SELECT interval '1 2' day to second;
+SELECT interval '1 2:03' day to second;
+SELECT interval '1 2:03:04' day to second;
+-- SELECT interval '1 2' hour to minute;
+SELECT interval '1 2:03' hour to minute;
+SELECT interval '1 2:03:04' hour to minute;
+-- SELECT interval '1 2' hour to second;
+SELECT interval '1 2:03' hour to second;
+SELECT interval '1 2:03:04' hour to second;
+-- SELECT interval '1 2' minute to second;
+SELECT interval '1 2:03' minute to second;
+SELECT interval '1 2:03:04' minute to second;
+-- [SPARK-29370] Interval strings without explicit unit markings
+-- SELECT interval '1 +2:03' minute to second;
+-- SELECT interval '1 +2:03:04' minute to second;
+-- SELECT interval '1 -2:03' minute to second;
+-- SELECT interval '1 -2:03:04' minute to second;
+-- SELECT interval '123 11' day to hour; -- ok
+-- SELECT interval '123 11' day; -- not ok
+-- SELECT interval '123 11'; -- not ok, too ambiguous
+-- SELECT interval '123 2:03 -2:04'; -- not ok, redundant hh:mm fields
+
+-- test syntaxes for restricted precision
+-- [SPARK-29395] Precision of the interval type
+-- SELECT interval(0) '1 day 01:23:45.6789';
+-- SELECT interval(2) '1 day 01:23:45.6789';
+-- SELECT interval '12:34.5678' minute to second(2);  -- per SQL spec
+-- SELECT interval '1.234' second;
+-- SELECT interval '1.234' second(2);
+-- SELECT interval '1 2.345' day to second(2);
+-- SELECT interval '1 2:03' day to second(2);
+-- SELECT interval '1 2:03.4567' day to second(2);
+-- SELECT interval '1 2:03:04.5678' day to second(2);
+-- SELECT interval '1 2.345' hour to second(2);
+-- SELECT interval '1 2:03.45678' hour to second(2);
+-- SELECT interval '1 2:03:04.5678' hour to second(2);
+-- SELECT interval '1 2.3456' minute to second(2);
+-- SELECT interval '1 2:03.5678' minute to second(2);
+-- SELECT interval '1 2:03:04.5678' minute to second(2);
+
+-- test casting to restricted precision (bug #14479)
+-- SELECT f1, f1::INTERVAL DAY TO MINUTE AS `minutes`,
+--  (f1 + INTERVAL '1 month')::INTERVAL MONTH::INTERVAL YEAR AS `years`
+--  FROM interval_tbl;
+
+-- test inputting and outputting SQL standard interval literals
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle TO sql_standard;
+-- [SPARK-29407] Support syntax for zero interval
+-- SELECT  interval '0'                       AS zero,
+--        interval '1-2' year to month       AS `year-month`,
+--        interval '1 2:03:04' day to second AS `day-time`,
+-- [SPARK-29408] Support interval literal with negative sign `-`
+--        - interval '1-2'                   AS `negative year-month`,
+--        - interval '1 2:03:04'             AS `negative day-time`;
+
+-- test input of some not-quite-standard interval values in the sql style
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle TO postgres;
+-- SELECT  interval '+1 -1:00:00',
+--         interval '-1 +1:00:00',
+--         interval '+1-2 -3 +4:05:06.789',
+--         interval '-1-2 +3 -4:05:06.789';
+
+-- test output of couple non-standard interval values in the sql style
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle TO sql_standard;
+-- SELECT  interval '1 day -1 hours',
+--         interval '-1 days +1 hours',
+--         interval '1 years 2 months -3 days 4 hours 5 minutes 6.789 seconds',
+--         - interval '1 years 2 months -3 days 4 hours 5 minutes 6.789 seconds';
+
+-- test outputting iso8601 intervals
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle to iso_8601;
+-- select  interval '0'                                AS zero,
+--         interval '1-2'                              AS `a year 2 months`,
+--         interval '1 2:03:04'                        AS `a bit over a day`,
+--         interval '2:03:04.45679'                    AS `a bit over 2 hours`,
+--         (interval '1-2' + interval '3 4:05:06.7')   AS `all fields`,
+--         (interval '1-2' - interval '3 4:05:06.7')   AS `mixed sign`,
+--         (- interval '1-2' + interval '3 4:05:06.7') AS negative;
+
+-- test inputting ISO 8601 4.4.2.1 "Format With Time Unit Designators"
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle to sql_standard;
+-- [SPARK-29394] Support ISO 8601 format for intervals
+-- select  interval 'P0Y'                    AS zero,
+--         interval 'P1Y2M'                  AS `a year 2 months`,
+--         interval 'P1W'                    AS `a week`,
+--         interval 'P1DT2H3M4S'             AS `a bit over a day`,
+--         interval 'P1Y2M3DT4H5M6.7S'       AS `all fields`,
+--         interval 'P-1Y-2M-3DT-4H-5M-6.7S' AS negative,
+--         interval 'PT-0.1S'                AS `fractional second`;
+
+-- test inputting ISO 8601 4.4.2.2 "Alternative Format"
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle to postgres;
+-- select  interval 'P00021015T103020'       AS `ISO8601 Basic Format`,
+--         interval 'P0002-10-15T10:30:20'   AS `ISO8601 Extended Format`;
+
+-- Make sure optional ISO8601 alternative format fields are optional.
+-- select  interval 'P0002'                  AS `year only`,
+--         interval 'P0002-10'               AS `year month`,
+--         interval 'P0002-10-15'            AS `year month day`,
+--         interval 'P0002T1S'               AS `year only plus time`,
+--         interval 'P0002-10T1S'            AS `year month plus time`,
+--         interval 'P0002-10-15T1S'         AS `year month day plus time`,
+--         interval 'PT10'                   AS `hour only`,
+--         interval 'PT10:30'                AS `hour minute`;
+
+-- test a couple rounding cases that changed since 8.3 w/ HAVE_INT64_TIMESTAMP.
+-- [SPARK-29406] Interval output styles
+-- SET IntervalStyle to postgres_verbose;
+-- select interval '-10 mons -3 days +03:55:06.70';
+-- select interval '1 year 2 mons 3 days 04:05:06.699999';
+-- select interval '0:0:0.7', interval '@ 0.70 secs', interval '0.7 seconds';
+
+-- check that '30 days' equals '1 month' according to the hash function
+-- [SPARK-29385] Make `INTERVAL` values comparable
+-- select '30 days'::interval = '1 month'::interval as t;
+-- select interval_hash('30 days'::interval) = interval_hash('1 month'::interval) as t;
+
+-- numeric constructor
+-- [SPARK-29393] Add the make_interval() function
+-- select make_interval(years := 2);
+-- select make_interval(years := 1, months := 6);
+-- select make_interval(years := 1, months := -1, weeks := 5, days := -7, hours := 25, mins := -180);
+
+-- select make_interval() = make_interval(years := 0, months := 0, weeks := 0, days := 0, mins := 0, secs := 0.0);
+-- select make_interval(hours := -2, mins := -10, secs := -25.3);
+
+-- select make_interval(years := 'inf'::float::int);
+-- select make_interval(months := 'NaN'::float::int);
+-- select make_interval(secs := 'inf');
+-- select make_interval(secs := 'NaN');
+-- select make_interval(secs := 7e12);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/join.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/join.sql
similarity index 98%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/join.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/join.sql
index 08f54fe0a40e5..cc07b00cc3670 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/join.sql
@@ -6,6 +6,19 @@
 -- Test JOIN clauses
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/join.sql
 --
+
+-- There are 2 dimensions we want to test
+--  1. run with broadcast hash join, sort merge join or shuffle hash join.
+--  2. run with whole-stage-codegen, operator codegen or no codegen.
+
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=10485760
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
   AS v(f1);
@@ -577,15 +590,15 @@ select count(*) from tenk1 a, tenk1 b
 -- regression test for 8.2 bug with improper re-ordering of left joins
 --
 
-DROP TABLE IF EXISTS tt3;
-CREATE TABLE tt3(f1 int, f2 string) USING parquet;
-INSERT INTO tt3 SELECT x.id, repeat('xyzzy', 100) FROM range(1,10001) x;
+create or replace temporary view tt3 as select * from
+  (SELECT cast(x.id as int), repeat('xyzzy', 100) FROM range(1,10001) x)
+  as v(f1, f2);
 -- create index tt3i on tt3(f1);
 -- analyze tt3;
 
-DROP TABLE IF EXISTS tt4;
-CREATE TABLE tt4(f1 int) USING parquet;
-INSERT INTO tt4 VALUES (0),(1),(9999);
+create or replace temporary view tt4 as select * from
+  (values (0), (1), (9999))
+  as v(f1);
 -- analyze tt4;
 
 SELECT a.f1
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/limit.sql
new file mode 100644
index 0000000000000..bc0b5d6dddc52
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/limit.sql
@@ -0,0 +1,164 @@
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- LIMIT
+-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/limit.sql
+
+SELECT '' AS two, unique1, unique2, stringu1
+		FROM onek WHERE unique1 > 50
+		ORDER BY unique1 LIMIT 2;
+SELECT '' AS five, unique1, unique2, stringu1
+		FROM onek WHERE unique1 > 60
+		ORDER BY unique1 LIMIT 5;
+SELECT '' AS two, unique1, unique2, stringu1
+		FROM onek WHERE unique1 > 60 AND unique1 < 63
+		ORDER BY unique1 LIMIT 5;
+-- [SPARK-28330] ANSI SQL: Top-level <result offset clause> in <query expression>
+-- SELECT '' AS three, unique1, unique2, stringu1
+-- 		FROM onek WHERE unique1 > 100
+-- 		ORDER BY unique1 LIMIT 3 OFFSET 20;
+-- SELECT '' AS zero, unique1, unique2, stringu1
+-- 		FROM onek WHERE unique1 < 50
+-- 		ORDER BY unique1 DESC LIMIT 8 OFFSET 99;
+-- SELECT '' AS eleven, unique1, unique2, stringu1
+-- 		FROM onek WHERE unique1 < 50
+-- 		ORDER BY unique1 DESC LIMIT 20 OFFSET 39;
+-- SELECT '' AS ten, unique1, unique2, stringu1
+-- 		FROM onek
+-- 		ORDER BY unique1 OFFSET 990;
+-- SELECT '' AS five, unique1, unique2, stringu1
+-- 		FROM onek
+-- 		ORDER BY unique1 OFFSET 990 LIMIT 5;
+-- SELECT '' AS five, unique1, unique2, stringu1
+-- 		FROM onek
+-- 		ORDER BY unique1 LIMIT 5 OFFSET 900;
+
+CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
+  (VALUES
+    (123, 456),
+    (123, 4567890123456789),
+    (4567890123456789, 123),
+    (4567890123456789, 4567890123456789),
+    (4567890123456789, -4567890123456789))
+  AS v(q1, q2);
+
+-- Test null limit and offset.  The planner would discard a simple null
+-- constant, so to ensure executor is exercised, do this:
+-- [SPARK-29650] Discard a NULL constant in LIMIT
+select * from int8_tbl limit (case when random() < 0.5 then bigint(null) end);
+-- [SPARK-28330] ANSI SQL: Top-level <result offset clause> in <query expression>
+-- select * from int8_tbl offset (case when random() < 0.5 then bigint(null) end);
+
+-- Test assorted cases involving backwards fetch from a LIMIT plan node
+-- [SPARK-20965] Support PREPARE/EXECUTE/DECLARE/FETCH statements
+-- begin;
+--
+-- declare c1 cursor for select * from int8_tbl limit 10;
+-- fetch all in c1;
+-- fetch 1 in c1;
+-- fetch backward 1 in c1;
+-- fetch backward all in c1;
+-- fetch backward 1 in c1;
+-- fetch all in c1;
+--
+-- declare c2 cursor for select * from int8_tbl limit 3;
+-- fetch all in c2;
+-- fetch 1 in c2;
+-- fetch backward 1 in c2;
+-- fetch backward all in c2;
+-- fetch backward 1 in c2;
+-- fetch all in c2;
+--
+-- declare c3 cursor for select * from int8_tbl offset 3;
+-- fetch all in c3;
+-- fetch 1 in c3;
+-- fetch backward 1 in c3;
+-- fetch backward all in c3;
+-- fetch backward 1 in c3;
+-- fetch all in c3;
+--
+-- declare c4 cursor for select * from int8_tbl offset 10;
+-- fetch all in c4;
+-- fetch 1 in c4;
+-- fetch backward 1 in c4;
+-- fetch backward all in c4;
+-- fetch backward 1 in c4;
+-- fetch all in c4;
+--
+-- rollback;
+
+DROP VIEW INT8_TBL;
+
+-- Stress test for variable LIMIT in conjunction with bounded-heap sorting
+
+-- [SPARK-28330] ANSI SQL: Top-level <result offset clause> in <query expression>
+-- SELECT
+--   (SELECT n
+--      FROM (VALUES (1)) AS x,
+--           (SELECT n FROM generate_series(1,10) AS n
+--              ORDER BY n LIMIT 1 OFFSET s-1) AS y) AS z
+--   FROM generate_series(1,10) AS s;
+
+--
+-- Test behavior of volatile and set-returning functions in conjunction
+-- with ORDER BY and LIMIT.
+--
+
+-- [SPARK-29631] Support ANSI SQL CREATE SEQUENCE
+-- create temp sequence testseq;
+
+-- explain (verbose, costs off)
+-- select unique1, unique2, nextval('testseq')
+--   from tenk1 order by unique2 limit 10;
+
+-- select unique1, unique2, nextval('testseq')
+--   from tenk1 order by unique2 limit 10;
+
+-- select currval('testseq');
+
+-- explain (verbose, costs off)
+-- select unique1, unique2, nextval('testseq')
+--   from tenk1 order by tenthous limit 10;
+
+-- select unique1, unique2, nextval('testseq')
+--   from tenk1 order by tenthous limit 10;
+
+-- select currval('testseq');
+
+-- explain (verbose, costs off)
+-- select unique1, unique2, generate_series(1,10)
+--   from tenk1 order by unique2 limit 7;
+
+-- [SPARK-27767] Built-in function: generate_series
+-- select unique1, unique2, generate_series(1,10)
+--   from tenk1 order by unique2 limit 7;
+
+-- explain (verbose, costs off)
+-- select unique1, unique2, generate_series(1,10)
+--   from tenk1 order by tenthous limit 7;
+
+-- [SPARK-27767] Built-in function: generate_series
+-- select unique1, unique2, generate_series(1,10)
+--   from tenk1 order by tenthous limit 7;
+
+-- use of random() is to keep planner from folding the expressions together
+-- explain (verbose, costs off)
+-- select generate_series(0,2) as s1, generate_series((random()*.1)::int,2) as s2;
+
+-- [SPARK-27767] Built-in function: generate_series
+-- select generate_series(0,2) as s1, generate_series((random()*.1)::int,2) as s2;
+
+-- explain (verbose, costs off)
+-- select generate_series(0,2) as s1, generate_series((random()*.1)::int,2) as s2
+-- order by s2 desc;
+
+-- [SPARK-27767] Built-in function: generate_series
+-- select generate_series(0,2) as s1, generate_series((random()*.1)::int,2) as s2
+-- order by s2 desc;
+
+-- test for failure to set all aggregates' aggtranstype
+-- explain (verbose, costs off)
+-- select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
+--   from tenk1 group by thousand order by thousand limit 3;
+
+select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
+  from tenk1 group by thousand order by thousand limit 3;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
new file mode 100644
index 0000000000000..dbdb2cace0e0c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
@@ -0,0 +1,1150 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- NUMERIC
+-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/numeric.sql
+--
+
+-- [SPARK-28318] Decimal can only support precision up to 38. We rewrite numeric(210,10) to decimal(38,10).
+CREATE TABLE num_data (id int, val decimal(38,10)) USING parquet;
+CREATE TABLE num_exp_add (id1 int, id2 int, expected decimal(38,10)) USING parquet;
+CREATE TABLE num_exp_sub (id1 int, id2 int, expected decimal(38,10)) USING parquet;
+CREATE TABLE num_exp_div (id1 int, id2 int, expected decimal(38,10)) USING parquet;
+CREATE TABLE num_exp_mul (id1 int, id2 int, expected decimal(38,10)) USING parquet;
+CREATE TABLE num_exp_sqrt (id int, expected decimal(38,10)) USING parquet;
+CREATE TABLE num_exp_ln (id int, expected decimal(38,10)) USING parquet;
+CREATE TABLE num_exp_log10 (id int, expected decimal(38,10)) USING parquet;
+CREATE TABLE num_exp_power_10_ln (id int, expected decimal(38,10)) USING parquet;
+
+CREATE TABLE num_result (id1 int, id2 int, result decimal(38,10)) USING parquet;
+
+
+-- ******************************
+-- * The following EXPECTED results are computed by bc(1)
+-- * with a scale of 200
+-- ******************************
+
+-- BEGIN TRANSACTION;
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+INSERT INTO num_exp_add VALUES (0,0,0);
+INSERT INTO num_exp_sub VALUES (0,0,0);
+INSERT INTO num_exp_mul VALUES (0,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (0,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (0,1,0);
+INSERT INTO num_exp_sub VALUES (0,1,0);
+INSERT INTO num_exp_mul VALUES (0,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (0,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (0,2,-34338492.215397047);
+INSERT INTO num_exp_sub VALUES (0,2,34338492.215397047);
+INSERT INTO num_exp_mul VALUES (0,2,0);
+INSERT INTO num_exp_div VALUES (0,2,0);
+INSERT INTO num_exp_add VALUES (0,3,4.31);
+INSERT INTO num_exp_sub VALUES (0,3,-4.31);
+INSERT INTO num_exp_mul VALUES (0,3,0);
+INSERT INTO num_exp_div VALUES (0,3,0);
+INSERT INTO num_exp_add VALUES (0,4,7799461.4119);
+INSERT INTO num_exp_sub VALUES (0,4,-7799461.4119);
+INSERT INTO num_exp_mul VALUES (0,4,0);
+INSERT INTO num_exp_div VALUES (0,4,0);
+INSERT INTO num_exp_add VALUES (0,5,16397.038491);
+INSERT INTO num_exp_sub VALUES (0,5,-16397.038491);
+INSERT INTO num_exp_mul VALUES (0,5,0);
+INSERT INTO num_exp_div VALUES (0,5,0);
+INSERT INTO num_exp_add VALUES (0,6,93901.57763026);
+INSERT INTO num_exp_sub VALUES (0,6,-93901.57763026);
+INSERT INTO num_exp_mul VALUES (0,6,0);
+INSERT INTO num_exp_div VALUES (0,6,0);
+INSERT INTO num_exp_add VALUES (0,7,-83028485);
+INSERT INTO num_exp_sub VALUES (0,7,83028485);
+INSERT INTO num_exp_mul VALUES (0,7,0);
+INSERT INTO num_exp_div VALUES (0,7,0);
+INSERT INTO num_exp_add VALUES (0,8,74881);
+INSERT INTO num_exp_sub VALUES (0,8,-74881);
+INSERT INTO num_exp_mul VALUES (0,8,0);
+INSERT INTO num_exp_div VALUES (0,8,0);
+INSERT INTO num_exp_add VALUES (0,9,-24926804.045047420);
+INSERT INTO num_exp_sub VALUES (0,9,24926804.045047420);
+INSERT INTO num_exp_mul VALUES (0,9,0);
+INSERT INTO num_exp_div VALUES (0,9,0);
+INSERT INTO num_exp_add VALUES (1,0,0);
+INSERT INTO num_exp_sub VALUES (1,0,0);
+INSERT INTO num_exp_mul VALUES (1,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (1,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (1,1,0);
+INSERT INTO num_exp_sub VALUES (1,1,0);
+INSERT INTO num_exp_mul VALUES (1,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (1,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (1,2,-34338492.215397047);
+INSERT INTO num_exp_sub VALUES (1,2,34338492.215397047);
+INSERT INTO num_exp_mul VALUES (1,2,0);
+INSERT INTO num_exp_div VALUES (1,2,0);
+INSERT INTO num_exp_add VALUES (1,3,4.31);
+INSERT INTO num_exp_sub VALUES (1,3,-4.31);
+INSERT INTO num_exp_mul VALUES (1,3,0);
+INSERT INTO num_exp_div VALUES (1,3,0);
+INSERT INTO num_exp_add VALUES (1,4,7799461.4119);
+INSERT INTO num_exp_sub VALUES (1,4,-7799461.4119);
+INSERT INTO num_exp_mul VALUES (1,4,0);
+INSERT INTO num_exp_div VALUES (1,4,0);
+INSERT INTO num_exp_add VALUES (1,5,16397.038491);
+INSERT INTO num_exp_sub VALUES (1,5,-16397.038491);
+INSERT INTO num_exp_mul VALUES (1,5,0);
+INSERT INTO num_exp_div VALUES (1,5,0);
+INSERT INTO num_exp_add VALUES (1,6,93901.57763026);
+INSERT INTO num_exp_sub VALUES (1,6,-93901.57763026);
+INSERT INTO num_exp_mul VALUES (1,6,0);
+INSERT INTO num_exp_div VALUES (1,6,0);
+INSERT INTO num_exp_add VALUES (1,7,-83028485);
+INSERT INTO num_exp_sub VALUES (1,7,83028485);
+INSERT INTO num_exp_mul VALUES (1,7,0);
+INSERT INTO num_exp_div VALUES (1,7,0);
+INSERT INTO num_exp_add VALUES (1,8,74881);
+INSERT INTO num_exp_sub VALUES (1,8,-74881);
+INSERT INTO num_exp_mul VALUES (1,8,0);
+INSERT INTO num_exp_div VALUES (1,8,0);
+INSERT INTO num_exp_add VALUES (1,9,-24926804.045047420);
+INSERT INTO num_exp_sub VALUES (1,9,24926804.045047420);
+INSERT INTO num_exp_mul VALUES (1,9,0);
+INSERT INTO num_exp_div VALUES (1,9,0);
+INSERT INTO num_exp_add VALUES (2,0,-34338492.215397047);
+INSERT INTO num_exp_sub VALUES (2,0,-34338492.215397047);
+INSERT INTO num_exp_mul VALUES (2,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (2,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (2,1,-34338492.215397047);
+INSERT INTO num_exp_sub VALUES (2,1,-34338492.215397047);
+INSERT INTO num_exp_mul VALUES (2,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (2,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (2,2,-68676984.430794094);
+INSERT INTO num_exp_sub VALUES (2,2,0);
+INSERT INTO num_exp_mul VALUES (2,2,1179132047626883.596862135856320209);
+INSERT INTO num_exp_div VALUES (2,2,1.00000000000000000000);
+INSERT INTO num_exp_add VALUES (2,3,-34338487.905397047);
+INSERT INTO num_exp_sub VALUES (2,3,-34338496.525397047);
+INSERT INTO num_exp_mul VALUES (2,3,-147998901.44836127257);
+INSERT INTO num_exp_div VALUES (2,3,-7967167.56737750510440835266);
+INSERT INTO num_exp_add VALUES (2,4,-26539030.803497047);
+INSERT INTO num_exp_sub VALUES (2,4,-42137953.627297047);
+INSERT INTO num_exp_mul VALUES (2,4,-267821744976817.8111137106593);
+INSERT INTO num_exp_div VALUES (2,4,-4.40267480046830116685);
+INSERT INTO num_exp_add VALUES (2,5,-34322095.176906047);
+INSERT INTO num_exp_sub VALUES (2,5,-34354889.253888047);
+INSERT INTO num_exp_mul VALUES (2,5,-563049578578.769242506736077);
+INSERT INTO num_exp_div VALUES (2,5,-2094.18866914563535496429);
+INSERT INTO num_exp_add VALUES (2,6,-34244590.637766787);
+INSERT INTO num_exp_sub VALUES (2,6,-34432393.793027307);
+INSERT INTO num_exp_mul VALUES (2,6,-3224438592470.18449811926184222);
+INSERT INTO num_exp_div VALUES (2,6,-365.68599891479766440940);
+INSERT INTO num_exp_add VALUES (2,7,-117366977.215397047);
+INSERT INTO num_exp_sub VALUES (2,7,48689992.784602953);
+INSERT INTO num_exp_mul VALUES (2,7,2851072985828710.485883795);
+INSERT INTO num_exp_div VALUES (2,7,.41357483778485235518);
+INSERT INTO num_exp_add VALUES (2,8,-34263611.215397047);
+INSERT INTO num_exp_sub VALUES (2,8,-34413373.215397047);
+INSERT INTO num_exp_mul VALUES (2,8,-2571300635581.146276407);
+INSERT INTO num_exp_div VALUES (2,8,-458.57416721727870888476);
+INSERT INTO num_exp_add VALUES (2,9,-59265296.260444467);
+INSERT INTO num_exp_sub VALUES (2,9,-9411688.170349627);
+INSERT INTO num_exp_mul VALUES (2,9,855948866655588.453741509242968740);
+INSERT INTO num_exp_div VALUES (2,9,1.37757299946438931811);
+INSERT INTO num_exp_add VALUES (3,0,4.31);
+INSERT INTO num_exp_sub VALUES (3,0,4.31);
+INSERT INTO num_exp_mul VALUES (3,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (3,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (3,1,4.31);
+INSERT INTO num_exp_sub VALUES (3,1,4.31);
+INSERT INTO num_exp_mul VALUES (3,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (3,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (3,2,-34338487.905397047);
+INSERT INTO num_exp_sub VALUES (3,2,34338496.525397047);
+INSERT INTO num_exp_mul VALUES (3,2,-147998901.44836127257);
+INSERT INTO num_exp_div VALUES (3,2,-.00000012551512084352);
+INSERT INTO num_exp_add VALUES (3,3,8.62);
+INSERT INTO num_exp_sub VALUES (3,3,0);
+INSERT INTO num_exp_mul VALUES (3,3,18.5761);
+INSERT INTO num_exp_div VALUES (3,3,1.00000000000000000000);
+INSERT INTO num_exp_add VALUES (3,4,7799465.7219);
+INSERT INTO num_exp_sub VALUES (3,4,-7799457.1019);
+INSERT INTO num_exp_mul VALUES (3,4,33615678.685289);
+INSERT INTO num_exp_div VALUES (3,4,.00000055260225961552);
+INSERT INTO num_exp_add VALUES (3,5,16401.348491);
+INSERT INTO num_exp_sub VALUES (3,5,-16392.728491);
+INSERT INTO num_exp_mul VALUES (3,5,70671.23589621);
+INSERT INTO num_exp_div VALUES (3,5,.00026285234387695504);
+INSERT INTO num_exp_add VALUES (3,6,93905.88763026);
+INSERT INTO num_exp_sub VALUES (3,6,-93897.26763026);
+INSERT INTO num_exp_mul VALUES (3,6,404715.7995864206);
+INSERT INTO num_exp_div VALUES (3,6,.00004589912234457595);
+INSERT INTO num_exp_add VALUES (3,7,-83028480.69);
+INSERT INTO num_exp_sub VALUES (3,7,83028489.31);
+INSERT INTO num_exp_mul VALUES (3,7,-357852770.35);
+INSERT INTO num_exp_div VALUES (3,7,-.00000005190989574240);
+INSERT INTO num_exp_add VALUES (3,8,74885.31);
+INSERT INTO num_exp_sub VALUES (3,8,-74876.69);
+INSERT INTO num_exp_mul VALUES (3,8,322737.11);
+INSERT INTO num_exp_div VALUES (3,8,.00005755799201399553);
+INSERT INTO num_exp_add VALUES (3,9,-24926799.735047420);
+INSERT INTO num_exp_sub VALUES (3,9,24926808.355047420);
+INSERT INTO num_exp_mul VALUES (3,9,-107434525.43415438020);
+INSERT INTO num_exp_div VALUES (3,9,-.00000017290624149854);
+INSERT INTO num_exp_add VALUES (4,0,7799461.4119);
+INSERT INTO num_exp_sub VALUES (4,0,7799461.4119);
+INSERT INTO num_exp_mul VALUES (4,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (4,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (4,1,7799461.4119);
+INSERT INTO num_exp_sub VALUES (4,1,7799461.4119);
+INSERT INTO num_exp_mul VALUES (4,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (4,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (4,2,-26539030.803497047);
+INSERT INTO num_exp_sub VALUES (4,2,42137953.627297047);
+INSERT INTO num_exp_mul VALUES (4,2,-267821744976817.8111137106593);
+INSERT INTO num_exp_div VALUES (4,2,-.22713465002993920385);
+INSERT INTO num_exp_add VALUES (4,3,7799465.7219);
+INSERT INTO num_exp_sub VALUES (4,3,7799457.1019);
+INSERT INTO num_exp_mul VALUES (4,3,33615678.685289);
+INSERT INTO num_exp_div VALUES (4,3,1809619.81714617169373549883);
+INSERT INTO num_exp_add VALUES (4,4,15598922.8238);
+INSERT INTO num_exp_sub VALUES (4,4,0);
+INSERT INTO num_exp_mul VALUES (4,4,60831598315717.14146161);
+INSERT INTO num_exp_div VALUES (4,4,1.00000000000000000000);
+INSERT INTO num_exp_add VALUES (4,5,7815858.450391);
+INSERT INTO num_exp_sub VALUES (4,5,7783064.373409);
+INSERT INTO num_exp_mul VALUES (4,5,127888068979.9935054429);
+INSERT INTO num_exp_div VALUES (4,5,475.66281046305802686061);
+INSERT INTO num_exp_add VALUES (4,6,7893362.98953026);
+INSERT INTO num_exp_sub VALUES (4,6,7705559.83426974);
+INSERT INTO num_exp_mul VALUES (4,6,732381731243.745115764094);
+INSERT INTO num_exp_div VALUES (4,6,83.05996138436129499606);
+INSERT INTO num_exp_add VALUES (4,7,-75229023.5881);
+INSERT INTO num_exp_sub VALUES (4,7,90827946.4119);
+INSERT INTO num_exp_mul VALUES (4,7,-647577464846017.9715);
+INSERT INTO num_exp_div VALUES (4,7,-.09393717604145131637);
+INSERT INTO num_exp_add VALUES (4,8,7874342.4119);
+INSERT INTO num_exp_sub VALUES (4,8,7724580.4119);
+INSERT INTO num_exp_mul VALUES (4,8,584031469984.4839);
+INSERT INTO num_exp_div VALUES (4,8,104.15808298366741897143);
+INSERT INTO num_exp_add VALUES (4,9,-17127342.633147420);
+INSERT INTO num_exp_sub VALUES (4,9,32726265.456947420);
+INSERT INTO num_exp_mul VALUES (4,9,-194415646271340.1815956522980);
+INSERT INTO num_exp_div VALUES (4,9,-.31289456112403769409);
+INSERT INTO num_exp_add VALUES (5,0,16397.038491);
+INSERT INTO num_exp_sub VALUES (5,0,16397.038491);
+INSERT INTO num_exp_mul VALUES (5,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (5,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (5,1,16397.038491);
+INSERT INTO num_exp_sub VALUES (5,1,16397.038491);
+INSERT INTO num_exp_mul VALUES (5,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (5,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (5,2,-34322095.176906047);
+INSERT INTO num_exp_sub VALUES (5,2,34354889.253888047);
+INSERT INTO num_exp_mul VALUES (5,2,-563049578578.769242506736077);
+INSERT INTO num_exp_div VALUES (5,2,-.00047751189505192446);
+INSERT INTO num_exp_add VALUES (5,3,16401.348491);
+INSERT INTO num_exp_sub VALUES (5,3,16392.728491);
+INSERT INTO num_exp_mul VALUES (5,3,70671.23589621);
+INSERT INTO num_exp_div VALUES (5,3,3804.41728329466357308584);
+INSERT INTO num_exp_add VALUES (5,4,7815858.450391);
+INSERT INTO num_exp_sub VALUES (5,4,-7783064.373409);
+INSERT INTO num_exp_mul VALUES (5,4,127888068979.9935054429);
+INSERT INTO num_exp_div VALUES (5,4,.00210232958726897192);
+INSERT INTO num_exp_add VALUES (5,5,32794.076982);
+INSERT INTO num_exp_sub VALUES (5,5,0);
+INSERT INTO num_exp_mul VALUES (5,5,268862871.275335557081);
+INSERT INTO num_exp_div VALUES (5,5,1.00000000000000000000);
+INSERT INTO num_exp_add VALUES (5,6,110298.61612126);
+INSERT INTO num_exp_sub VALUES (5,6,-77504.53913926);
+INSERT INTO num_exp_mul VALUES (5,6,1539707782.76899778633766);
+INSERT INTO num_exp_div VALUES (5,6,.17461941433576102689);
+INSERT INTO num_exp_add VALUES (5,7,-83012087.961509);
+INSERT INTO num_exp_sub VALUES (5,7,83044882.038491);
+INSERT INTO num_exp_mul VALUES (5,7,-1361421264394.416135);
+INSERT INTO num_exp_div VALUES (5,7,-.00019748690453643710);
+INSERT INTO num_exp_add VALUES (5,8,91278.038491);
+INSERT INTO num_exp_sub VALUES (5,8,-58483.961509);
+INSERT INTO num_exp_mul VALUES (5,8,1227826639.244571);
+INSERT INTO num_exp_div VALUES (5,8,.21897461960978085228);
+INSERT INTO num_exp_add VALUES (5,9,-24910407.006556420);
+INSERT INTO num_exp_sub VALUES (5,9,24943201.083538420);
+INSERT INTO num_exp_mul VALUES (5,9,-408725765384.257043660243220);
+INSERT INTO num_exp_div VALUES (5,9,-.00065780749354660427);
+INSERT INTO num_exp_add VALUES (6,0,93901.57763026);
+INSERT INTO num_exp_sub VALUES (6,0,93901.57763026);
+INSERT INTO num_exp_mul VALUES (6,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (6,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (6,1,93901.57763026);
+INSERT INTO num_exp_sub VALUES (6,1,93901.57763026);
+INSERT INTO num_exp_mul VALUES (6,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (6,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (6,2,-34244590.637766787);
+INSERT INTO num_exp_sub VALUES (6,2,34432393.793027307);
+INSERT INTO num_exp_mul VALUES (6,2,-3224438592470.18449811926184222);
+INSERT INTO num_exp_div VALUES (6,2,-.00273458651128995823);
+INSERT INTO num_exp_add VALUES (6,3,93905.88763026);
+INSERT INTO num_exp_sub VALUES (6,3,93897.26763026);
+INSERT INTO num_exp_mul VALUES (6,3,404715.7995864206);
+INSERT INTO num_exp_div VALUES (6,3,21786.90896293735498839907);
+INSERT INTO num_exp_add VALUES (6,4,7893362.98953026);
+INSERT INTO num_exp_sub VALUES (6,4,-7705559.83426974);
+INSERT INTO num_exp_mul VALUES (6,4,732381731243.745115764094);
+INSERT INTO num_exp_div VALUES (6,4,.01203949512295682469);
+INSERT INTO num_exp_add VALUES (6,5,110298.61612126);
+INSERT INTO num_exp_sub VALUES (6,5,77504.53913926);
+INSERT INTO num_exp_mul VALUES (6,5,1539707782.76899778633766);
+INSERT INTO num_exp_div VALUES (6,5,5.72674008674192359679);
+INSERT INTO num_exp_add VALUES (6,6,187803.15526052);
+INSERT INTO num_exp_sub VALUES (6,6,0);
+INSERT INTO num_exp_mul VALUES (6,6,8817506281.4517452372676676);
+INSERT INTO num_exp_div VALUES (6,6,1.00000000000000000000);
+INSERT INTO num_exp_add VALUES (6,7,-82934583.42236974);
+INSERT INTO num_exp_sub VALUES (6,7,83122386.57763026);
+INSERT INTO num_exp_mul VALUES (6,7,-7796505729750.37795610);
+INSERT INTO num_exp_div VALUES (6,7,-.00113095617281538980);
+INSERT INTO num_exp_add VALUES (6,8,168782.57763026);
+INSERT INTO num_exp_sub VALUES (6,8,19020.57763026);
+INSERT INTO num_exp_mul VALUES (6,8,7031444034.53149906);
+INSERT INTO num_exp_div VALUES (6,8,1.25401073209839612184);
+INSERT INTO num_exp_add VALUES (6,9,-24832902.467417160);
+INSERT INTO num_exp_sub VALUES (6,9,25020705.622677680);
+INSERT INTO num_exp_mul VALUES (6,9,-2340666225110.29929521292692920);
+INSERT INTO num_exp_div VALUES (6,9,-.00376709254265256789);
+INSERT INTO num_exp_add VALUES (7,0,-83028485);
+INSERT INTO num_exp_sub VALUES (7,0,-83028485);
+INSERT INTO num_exp_mul VALUES (7,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (7,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (7,1,-83028485);
+INSERT INTO num_exp_sub VALUES (7,1,-83028485);
+INSERT INTO num_exp_mul VALUES (7,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (7,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (7,2,-117366977.215397047);
+INSERT INTO num_exp_sub VALUES (7,2,-48689992.784602953);
+INSERT INTO num_exp_mul VALUES (7,2,2851072985828710.485883795);
+INSERT INTO num_exp_div VALUES (7,2,2.41794207151503385700);
+INSERT INTO num_exp_add VALUES (7,3,-83028480.69);
+INSERT INTO num_exp_sub VALUES (7,3,-83028489.31);
+INSERT INTO num_exp_mul VALUES (7,3,-357852770.35);
+INSERT INTO num_exp_div VALUES (7,3,-19264149.65197215777262180974);
+INSERT INTO num_exp_add VALUES (7,4,-75229023.5881);
+INSERT INTO num_exp_sub VALUES (7,4,-90827946.4119);
+INSERT INTO num_exp_mul VALUES (7,4,-647577464846017.9715);
+INSERT INTO num_exp_div VALUES (7,4,-10.64541262725136247686);
+INSERT INTO num_exp_add VALUES (7,5,-83012087.961509);
+INSERT INTO num_exp_sub VALUES (7,5,-83044882.038491);
+INSERT INTO num_exp_mul VALUES (7,5,-1361421264394.416135);
+INSERT INTO num_exp_div VALUES (7,5,-5063.62688881730941836574);
+INSERT INTO num_exp_add VALUES (7,6,-82934583.42236974);
+INSERT INTO num_exp_sub VALUES (7,6,-83122386.57763026);
+INSERT INTO num_exp_mul VALUES (7,6,-7796505729750.37795610);
+INSERT INTO num_exp_div VALUES (7,6,-884.20756174009028770294);
+INSERT INTO num_exp_add VALUES (7,7,-166056970);
+INSERT INTO num_exp_sub VALUES (7,7,0);
+INSERT INTO num_exp_mul VALUES (7,7,6893729321395225);
+INSERT INTO num_exp_div VALUES (7,7,1.00000000000000000000);
+INSERT INTO num_exp_add VALUES (7,8,-82953604);
+INSERT INTO num_exp_sub VALUES (7,8,-83103366);
+INSERT INTO num_exp_mul VALUES (7,8,-6217255985285);
+INSERT INTO num_exp_div VALUES (7,8,-1108.80577182462841041118);
+INSERT INTO num_exp_add VALUES (7,9,-107955289.045047420);
+INSERT INTO num_exp_sub VALUES (7,9,-58101680.954952580);
+INSERT INTO num_exp_mul VALUES (7,9,2069634775752159.035758700);
+INSERT INTO num_exp_div VALUES (7,9,3.33089171198810413382);
+INSERT INTO num_exp_add VALUES (8,0,74881);
+INSERT INTO num_exp_sub VALUES (8,0,74881);
+INSERT INTO num_exp_mul VALUES (8,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (8,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (8,1,74881);
+INSERT INTO num_exp_sub VALUES (8,1,74881);
+INSERT INTO num_exp_mul VALUES (8,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (8,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (8,2,-34263611.215397047);
+INSERT INTO num_exp_sub VALUES (8,2,34413373.215397047);
+INSERT INTO num_exp_mul VALUES (8,2,-2571300635581.146276407);
+INSERT INTO num_exp_div VALUES (8,2,-.00218067233500788615);
+INSERT INTO num_exp_add VALUES (8,3,74885.31);
+INSERT INTO num_exp_sub VALUES (8,3,74876.69);
+INSERT INTO num_exp_mul VALUES (8,3,322737.11);
+INSERT INTO num_exp_div VALUES (8,3,17373.78190255220417633410);
+INSERT INTO num_exp_add VALUES (8,4,7874342.4119);
+INSERT INTO num_exp_sub VALUES (8,4,-7724580.4119);
+INSERT INTO num_exp_mul VALUES (8,4,584031469984.4839);
+INSERT INTO num_exp_div VALUES (8,4,.00960079113741758956);
+INSERT INTO num_exp_add VALUES (8,5,91278.038491);
+INSERT INTO num_exp_sub VALUES (8,5,58483.961509);
+INSERT INTO num_exp_mul VALUES (8,5,1227826639.244571);
+INSERT INTO num_exp_div VALUES (8,5,4.56673929509287019456);
+INSERT INTO num_exp_add VALUES (8,6,168782.57763026);
+INSERT INTO num_exp_sub VALUES (8,6,-19020.57763026);
+INSERT INTO num_exp_mul VALUES (8,6,7031444034.53149906);
+INSERT INTO num_exp_div VALUES (8,6,.79744134113322314424);
+INSERT INTO num_exp_add VALUES (8,7,-82953604);
+INSERT INTO num_exp_sub VALUES (8,7,83103366);
+INSERT INTO num_exp_mul VALUES (8,7,-6217255985285);
+INSERT INTO num_exp_div VALUES (8,7,-.00090187120721280172);
+INSERT INTO num_exp_add VALUES (8,8,149762);
+INSERT INTO num_exp_sub VALUES (8,8,0);
+INSERT INTO num_exp_mul VALUES (8,8,5607164161);
+INSERT INTO num_exp_div VALUES (8,8,1.00000000000000000000);
+INSERT INTO num_exp_add VALUES (8,9,-24851923.045047420);
+INSERT INTO num_exp_sub VALUES (8,9,25001685.045047420);
+INSERT INTO num_exp_mul VALUES (8,9,-1866544013697.195857020);
+INSERT INTO num_exp_div VALUES (8,9,-.00300403532938582735);
+INSERT INTO num_exp_add VALUES (9,0,-24926804.045047420);
+INSERT INTO num_exp_sub VALUES (9,0,-24926804.045047420);
+INSERT INTO num_exp_mul VALUES (9,0,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (9,0,double('NaN'));
+INSERT INTO num_exp_add VALUES (9,1,-24926804.045047420);
+INSERT INTO num_exp_sub VALUES (9,1,-24926804.045047420);
+INSERT INTO num_exp_mul VALUES (9,1,0);
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_div VALUES (9,1,double('NaN'));
+INSERT INTO num_exp_add VALUES (9,2,-59265296.260444467);
+INSERT INTO num_exp_sub VALUES (9,2,9411688.170349627);
+INSERT INTO num_exp_mul VALUES (9,2,855948866655588.453741509242968740);
+INSERT INTO num_exp_div VALUES (9,2,.72591434384152961526);
+INSERT INTO num_exp_add VALUES (9,3,-24926799.735047420);
+INSERT INTO num_exp_sub VALUES (9,3,-24926808.355047420);
+INSERT INTO num_exp_mul VALUES (9,3,-107434525.43415438020);
+INSERT INTO num_exp_div VALUES (9,3,-5783481.21694835730858468677);
+INSERT INTO num_exp_add VALUES (9,4,-17127342.633147420);
+INSERT INTO num_exp_sub VALUES (9,4,-32726265.456947420);
+INSERT INTO num_exp_mul VALUES (9,4,-194415646271340.1815956522980);
+INSERT INTO num_exp_div VALUES (9,4,-3.19596478892958416484);
+INSERT INTO num_exp_add VALUES (9,5,-24910407.006556420);
+INSERT INTO num_exp_sub VALUES (9,5,-24943201.083538420);
+INSERT INTO num_exp_mul VALUES (9,5,-408725765384.257043660243220);
+INSERT INTO num_exp_div VALUES (9,5,-1520.20159364322004505807);
+INSERT INTO num_exp_add VALUES (9,6,-24832902.467417160);
+INSERT INTO num_exp_sub VALUES (9,6,-25020705.622677680);
+INSERT INTO num_exp_mul VALUES (9,6,-2340666225110.29929521292692920);
+INSERT INTO num_exp_div VALUES (9,6,-265.45671195426965751280);
+INSERT INTO num_exp_add VALUES (9,7,-107955289.045047420);
+INSERT INTO num_exp_sub VALUES (9,7,58101680.954952580);
+INSERT INTO num_exp_mul VALUES (9,7,2069634775752159.035758700);
+INSERT INTO num_exp_div VALUES (9,7,.30021990699995814689);
+INSERT INTO num_exp_add VALUES (9,8,-24851923.045047420);
+INSERT INTO num_exp_sub VALUES (9,8,-25001685.045047420);
+INSERT INTO num_exp_mul VALUES (9,8,-1866544013697.195857020);
+INSERT INTO num_exp_div VALUES (9,8,-332.88556569820675471748);
+INSERT INTO num_exp_add VALUES (9,9,-49853608.090094840);
+INSERT INTO num_exp_sub VALUES (9,9,0);
+INSERT INTO num_exp_mul VALUES (9,9,621345559900192.420120630048656400);
+INSERT INTO num_exp_div VALUES (9,9,1.00000000000000000000);
+-- COMMIT TRANSACTION;
+-- BEGIN TRANSACTION;
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+INSERT INTO num_exp_sqrt VALUES (0,0);
+INSERT INTO num_exp_sqrt VALUES (1,0);
+INSERT INTO num_exp_sqrt VALUES (2,5859.90547836712524903505);
+INSERT INTO num_exp_sqrt VALUES (3,2.07605394920266944396);
+INSERT INTO num_exp_sqrt VALUES (4,2792.75158435189147418923);
+INSERT INTO num_exp_sqrt VALUES (5,128.05092147657509145473);
+INSERT INTO num_exp_sqrt VALUES (6,306.43364311096782703406);
+INSERT INTO num_exp_sqrt VALUES (7,9111.99676251039939975230);
+INSERT INTO num_exp_sqrt VALUES (8,273.64392922189960397542);
+INSERT INTO num_exp_sqrt VALUES (9,4992.67503899937593364766);
+-- COMMIT TRANSACTION;
+-- BEGIN TRANSACTION;
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_ln VALUES (0,double('NaN'));
+INSERT INTO num_exp_ln VALUES (1,double('NaN'));
+INSERT INTO num_exp_ln VALUES (2,17.35177750493897715514);
+INSERT INTO num_exp_ln VALUES (3,1.46093790411565641971);
+INSERT INTO num_exp_ln VALUES (4,15.86956523951936572464);
+INSERT INTO num_exp_ln VALUES (5,9.70485601768871834038);
+INSERT INTO num_exp_ln VALUES (6,11.45000246622944403127);
+INSERT INTO num_exp_ln VALUES (7,18.23469429965478772991);
+INSERT INTO num_exp_ln VALUES (8,11.22365546576315513668);
+INSERT INTO num_exp_ln VALUES (9,17.03145425013166006962);
+-- COMMIT TRANSACTION;
+-- BEGIN TRANSACTION;
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_log10 VALUES (0,double('NaN'));
+INSERT INTO num_exp_log10 VALUES (1,double('NaN'));
+INSERT INTO num_exp_log10 VALUES (2,7.53578122160797276459);
+INSERT INTO num_exp_log10 VALUES (3,.63447727016073160075);
+INSERT INTO num_exp_log10 VALUES (4,6.89206461372691743345);
+INSERT INTO num_exp_log10 VALUES (5,4.21476541614777768626);
+INSERT INTO num_exp_log10 VALUES (6,4.97267288886207207671);
+INSERT INTO num_exp_log10 VALUES (7,7.91922711353275546914);
+INSERT INTO num_exp_log10 VALUES (8,4.87437163556421004138);
+INSERT INTO num_exp_log10 VALUES (9,7.39666659961986567059);
+-- COMMIT TRANSACTION;
+-- BEGIN TRANSACTION;
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+-- [SPARK-28315] Decimal can not accept NaN as input
+INSERT INTO num_exp_power_10_ln VALUES (0,double('NaN'));
+INSERT INTO num_exp_power_10_ln VALUES (1,double('NaN'));
+INSERT INTO num_exp_power_10_ln VALUES (2,224790267919917955.13261618583642653184);
+INSERT INTO num_exp_power_10_ln VALUES (3,28.90266599445155957393);
+INSERT INTO num_exp_power_10_ln VALUES (4,7405685069594999.07733999469386277636);
+INSERT INTO num_exp_power_10_ln VALUES (5,5068226527.32127265408584640098);
+INSERT INTO num_exp_power_10_ln VALUES (6,281839893606.99372343357047819067);
+-- In Spark, decimal can only support precision up to 38
+INSERT INTO num_exp_power_10_ln VALUES (7,1716699575118597095.42330819910640247627);
+INSERT INTO num_exp_power_10_ln VALUES (8,167361463828.07491320069016125952);
+INSERT INTO num_exp_power_10_ln VALUES (9,107511333880052007.04141124673540337457);
+-- COMMIT TRANSACTION;
+-- BEGIN TRANSACTION;
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+INSERT INTO num_data VALUES (0, 0);
+INSERT INTO num_data VALUES (1, 0);
+INSERT INTO num_data VALUES (2, -34338492.215397047);
+INSERT INTO num_data VALUES (3, 4.31);
+INSERT INTO num_data VALUES (4, 7799461.4119);
+INSERT INTO num_data VALUES (5, 16397.038491);
+INSERT INTO num_data VALUES (6, 93901.57763026);
+INSERT INTO num_data VALUES (7, -83028485);
+INSERT INTO num_data VALUES (8, 74881);
+INSERT INTO num_data VALUES (9, -24926804.045047420);
+-- COMMIT TRANSACTION;
+
+SELECT * FROM num_data;
+
+-- ******************************
+-- * Create indices for faster checks
+-- ******************************
+
+-- CREATE UNIQUE INDEX num_exp_add_idx ON num_exp_add (id1, id2);
+-- CREATE UNIQUE INDEX num_exp_sub_idx ON num_exp_sub (id1, id2);
+-- CREATE UNIQUE INDEX num_exp_div_idx ON num_exp_div (id1, id2);
+-- CREATE UNIQUE INDEX num_exp_mul_idx ON num_exp_mul (id1, id2);
+-- CREATE UNIQUE INDEX num_exp_sqrt_idx ON num_exp_sqrt (id);
+-- CREATE UNIQUE INDEX num_exp_ln_idx ON num_exp_ln (id);
+-- CREATE UNIQUE INDEX num_exp_log10_idx ON num_exp_log10 (id);
+-- CREATE UNIQUE INDEX num_exp_power_10_ln_idx ON num_exp_power_10_ln (id);
+
+-- VACUUM ANALYZE num_exp_add;
+-- VACUUM ANALYZE num_exp_sub;
+-- VACUUM ANALYZE num_exp_div;
+-- VACUUM ANALYZE num_exp_mul;
+-- VACUUM ANALYZE num_exp_sqrt;
+-- VACUUM ANALYZE num_exp_ln;
+-- VACUUM ANALYZE num_exp_log10;
+-- VACUUM ANALYZE num_exp_power_10_ln;
+
+-- ******************************
+-- * Now check the behaviour of the NUMERIC type
+-- ******************************
+
+-- ******************************
+-- * Addition check
+-- ******************************
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT t1.id, t2.id, t1.val + t2.val
+    FROM num_data t1, num_data t2;
+SELECT t1.id1, t1.id2, t1.result, t2.expected
+    FROM num_result t1, num_exp_add t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != t2.expected;
+
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val + t2.val, 10)
+    FROM num_data t1, num_data t2;
+SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 10) as expected
+    FROM num_result t1, num_exp_add t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != round(t2.expected, 10);
+
+-- ******************************
+-- * Subtraction check
+-- ******************************
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT t1.id, t2.id, t1.val - t2.val
+    FROM num_data t1, num_data t2;
+SELECT t1.id1, t1.id2, t1.result, t2.expected
+    FROM num_result t1, num_exp_sub t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != t2.expected;
+
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val - t2.val, 40)
+    FROM num_data t1, num_data t2;
+SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 40)
+    FROM num_result t1, num_exp_sub t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != round(t2.expected, 40);
+
+-- ******************************
+-- * Multiply check
+-- ******************************
+-- [SPARK-28316] Decimal precision issue
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT t1.id, t2.id, t1.val, t2.val, t1.val * t2.val
+    FROM num_data t1, num_data t2;
+SELECT t1.id1, t1.id2, t1.result, t2.expected
+    FROM num_result t1, num_exp_mul t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != t2.expected;
+
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val * t2.val, 30)
+    FROM num_data t1, num_data t2;
+SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 30) as expected
+    FROM num_result t1, num_exp_mul t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != round(t2.expected, 30);
+
+-- ******************************
+-- * Division check
+-- ******************************
+-- [SPARK-28316] Decimal precision issue
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT t1.id, t2.id, t1.val / t2.val
+    FROM num_data t1, num_data t2
+    WHERE t2.val != '0.0';
+SELECT t1.id1, t1.id2, t1.result, t2.expected
+    FROM num_result t1, num_exp_div t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != t2.expected;
+
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val / t2.val, 80)
+    FROM num_data t1, num_data t2
+    WHERE t2.val != '0.0';
+SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 80) as expected
+    FROM num_result t1, num_exp_div t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != round(t2.expected, 80);
+
+-- ******************************
+-- * Square root check
+-- ******************************
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT id, 0, SQRT(ABS(val))
+    FROM num_data;
+SELECT t1.id1, t1.result, t2.expected
+    FROM num_result t1, num_exp_sqrt t2
+    WHERE t1.id1 = t2.id
+    AND t1.result != t2.expected;
+
+-- ******************************
+-- * Natural logarithm check
+-- ******************************
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT id, 0, LN(ABS(val))
+    FROM num_data
+    WHERE val != '0.0';
+SELECT t1.id1, t1.result, t2.expected
+    FROM num_result t1, num_exp_ln t2
+    WHERE t1.id1 = t2.id
+    AND t1.result != t2.expected;
+
+-- ******************************
+-- * Logarithm base 10 check
+-- ******************************
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT id, 0, LOG(cast('10' as decimal(38, 18)), ABS(val))
+    FROM num_data
+    WHERE val != '0.0';
+SELECT t1.id1, t1.result, t2.expected
+    FROM num_result t1, num_exp_log10 t2
+    WHERE t1.id1 = t2.id
+    AND t1.result != t2.expected;
+
+-- ******************************
+-- * POWER(10, LN(value)) check
+-- ******************************
+-- [SPARK-28316] Decimal precision issue
+TRUNCATE TABLE num_result;
+INSERT INTO num_result SELECT id, 0, POWER(cast('10' as decimal(38, 18)), LN(ABS(round(val,200))))
+    FROM num_data
+    WHERE val != '0.0';
+SELECT t1.id1, t1.result, t2.expected
+    FROM num_result t1, num_exp_power_10_ln t2
+    WHERE t1.id1 = t2.id
+    AND t1.result != t2.expected;
+
+-- ******************************
+-- * miscellaneous checks for things that have been broken in the past...
+-- ******************************
+-- numeric AVG used to fail on some platforms
+SELECT AVG(val) FROM num_data;
+-- [SPARK-28316] STDDEV and VARIANCE returns double type
+-- Skip it because: Expected "2.779120328758835[]E7", but got "2.779120328758835[4]E7"
+-- SELECT STDDEV(val) FROM num_data;
+-- Skip it because: Expected "7.72350980172061[8]E14", but got "7.72350980172061[6]E14"
+-- SELECT VARIANCE(val) FROM num_data;
+
+-- Check for appropriate rounding and overflow
+CREATE TABLE fract_only (id int, val decimal(4,4)) USING parquet;
+INSERT INTO fract_only VALUES (1, 0.0);
+INSERT INTO fract_only VALUES (2, 0.1);
+-- [SPARK-27923] PostgreSQL throws an exception but Spark SQL is NULL
+-- INSERT INTO fract_only VALUES (3, '1.0');	-- should fail
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+INSERT INTO fract_only VALUES (4, -0.9999);
+INSERT INTO fract_only VALUES (5, 0.99994);
+-- [SPARK-27923] PostgreSQL throws an exception but Spark SQL is NULL
+-- INSERT INTO fract_only VALUES (6, '0.99995');  -- should fail
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+INSERT INTO fract_only VALUES (7, 0.00001);
+INSERT INTO fract_only VALUES (8, 0.00017);
+SELECT * FROM fract_only;
+DROP TABLE fract_only;
+
+-- [SPARK-28315] Decimal can not accept NaN as input
+-- [SPARK-27923] Decimal type can not accept Infinity and -Infinity
+-- Check inf/nan conversion behavior
+SELECT decimal(double('NaN'));
+SELECT decimal(double('Infinity'));
+SELECT decimal(double('-Infinity'));
+SELECT decimal(float('NaN'));
+SELECT decimal(float('Infinity'));
+SELECT decimal(float('-Infinity'));
+
+-- Simple check that ceil(), floor(), and round() work correctly
+CREATE TABLE ceil_floor_round (a decimal(38, 18)) USING parquet;
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+INSERT INTO ceil_floor_round VALUES (-5.5);
+INSERT INTO ceil_floor_round VALUES (-5.499999);
+INSERT INTO ceil_floor_round VALUES (9.5);
+INSERT INTO ceil_floor_round VALUES (9.4999999);
+INSERT INTO ceil_floor_round VALUES (0.0);
+INSERT INTO ceil_floor_round VALUES (0.0000001);
+INSERT INTO ceil_floor_round VALUES (-0.000001);
+SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round;
+DROP TABLE ceil_floor_round;
+
+-- [SPARK-28007] Caret operator (^) means bitwise XOR in Spark and exponentiation in Postgres
+-- Check rounding, it should round ties away from zero.
+-- SELECT i as pow,
+-- 	round((-2.5 * 10 ^ i)::numeric, -i),
+-- 	round((-1.5 * 10 ^ i)::numeric, -i),
+-- 	round((-0.5 * 10 ^ i)::numeric, -i),
+-- 	round((0.5 * 10 ^ i)::numeric, -i),
+-- 	round((1.5 * 10 ^ i)::numeric, -i),
+-- 	round((2.5 * 10 ^ i)::numeric, -i)
+-- FROM generate_series(-5,5) AS t(i);
+
+-- [SPARK-21117] Built-in SQL Function Support - WIDTH_BUCKET
+-- Testing for width_bucket(). For convenience, we test both the
+-- numeric and float8 versions of the function in this file.
+
+-- errors
+-- SELECT width_bucket(5.0, 3.0, 4.0, 0);
+-- SELECT width_bucket(5.0, 3.0, 4.0, -5);
+-- SELECT width_bucket(3.5, 3.0, 3.0, 888);
+-- SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0);
+-- SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5);
+-- SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888);
+-- SELECT width_bucket('NaN', 3.0, 4.0, 888);
+-- SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888);
+
+-- normal operation
+-- CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8);
+
+-- COPY width_bucket_test (operand_num) FROM stdin;
+-- -5.2
+-- -0.0000000001
+-- 0.000000000001
+-- 1
+-- 1.99999999999999
+-- 2
+-- 2.00000000000001
+-- 3
+-- 4
+-- 4.5
+-- 5
+-- 5.5
+-- 6
+-- 7
+-- 8
+-- 9
+-- 9.99999999999999
+-- 10
+-- 10.0000000000001
+-- \.
+
+-- UPDATE width_bucket_test SET operand_f8 = operand_num::float8;
+
+-- SELECT
+--     operand_num,
+--     width_bucket(operand_num, 0, 10, 5) AS wb_1,
+--     width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
+--     width_bucket(operand_num, 10, 0, 5) AS wb_2,
+--     width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
+--     width_bucket(operand_num, 2, 8, 4) AS wb_3,
+--     width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
+--     width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
+--     width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
+--     width_bucket(operand_num, -25, 25, 10) AS wb_5,
+--     width_bucket(operand_f8, -25, 25, 10) AS wb_5f
+--     FROM width_bucket_test;
+
+-- for float8 only, check positive and negative infinity: we require
+-- finite bucket bounds, but allow an infinite operand
+-- SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error
+-- SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error
+-- SELECT width_bucket('Infinity'::float8, 1, 10, 10),
+--        width_bucket('-Infinity'::float8, 1, 10, 10);
+
+-- DROP TABLE width_bucket_test;
+
+-- [SPARK-28137] Missing Data Type Formatting Functions: TO_CHAR
+-- TO_CHAR()
+--
+-- SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999')
+-- 	FROM num_data;
+
+-- SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999')
+-- 	FROM num_data;
+
+-- SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR')
+-- 	FROM num_data;
+
+-- SELECT '' AS to_char_4, to_char(val, '9999999999999999.999999999999999S')
+-- 	FROM num_data;
+
+-- SELECT '' AS to_char_5,  to_char(val, 'MI9999999999999999.999999999999999')     FROM num_data;
+-- SELECT '' AS to_char_6,  to_char(val, 'FMS9999999999999999.999999999999999')    FROM num_data;
+-- SELECT '' AS to_char_7,  to_char(val, 'FM9999999999999999.999999999999999THPR') FROM num_data;
+-- SELECT '' AS to_char_8,  to_char(val, 'SG9999999999999999.999999999999999th')   FROM num_data;
+-- SELECT '' AS to_char_9,  to_char(val, '0999999999999999.999999999999999')       FROM num_data;
+-- SELECT '' AS to_char_10, to_char(val, 'S0999999999999999.999999999999999')      FROM num_data;
+-- SELECT '' AS to_char_11, to_char(val, 'FM0999999999999999.999999999999999')     FROM num_data;
+-- SELECT '' AS to_char_12, to_char(val, 'FM9999999999999999.099999999999999') 	FROM num_data;
+-- SELECT '' AS to_char_13, to_char(val, 'FM9999999999990999.990999999999999') 	FROM num_data;
+-- SELECT '' AS to_char_14, to_char(val, 'FM0999999999999999.999909999999999') 	FROM num_data;
+-- SELECT '' AS to_char_15, to_char(val, 'FM9999999990999999.099999999999999') 	FROM num_data;
+-- SELECT '' AS to_char_16, to_char(val, 'L9999999999999999.099999999999999')	FROM num_data;
+-- SELECT '' AS to_char_17, to_char(val, 'FM9999999999999999.99999999999999')	FROM num_data;
+-- SELECT '' AS to_char_18, to_char(val, 'S 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9') FROM num_data;
+-- SELECT '' AS to_char_19, to_char(val, 'FMS 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9') FROM num_data;
+-- SELECT '' AS to_char_20, to_char(val, E'99999 "text" 9999 "9999" 999 "\\"text between quote marks\\"" 9999') FROM num_data;
+-- SELECT '' AS to_char_21, to_char(val, '999999SG9999999999')			FROM num_data;
+-- SELECT '' AS to_char_22, to_char(val, 'FM9999999999999999.999999999999999')	FROM num_data;
+-- SELECT '' AS to_char_23, to_char(val, '9.999EEEE')				FROM num_data;
+
+-- SELECT '' AS to_char_24, to_char('100'::numeric, 'FM999.9');
+-- SELECT '' AS to_char_25, to_char('100'::numeric, 'FM999.');
+-- SELECT '' AS to_char_26, to_char('100'::numeric, 'FM999');
+
+-- Check parsing of literal text in a format string
+-- SELECT '' AS to_char_27, to_char('100'::numeric, 'foo999');
+-- SELECT '' AS to_char_28, to_char('100'::numeric, 'f\oo999');
+-- SELECT '' AS to_char_29, to_char('100'::numeric, 'f\\oo999');
+-- SELECT '' AS to_char_30, to_char('100'::numeric, 'f\"oo999');
+-- SELECT '' AS to_char_31, to_char('100'::numeric, 'f\\"oo999');
+-- SELECT '' AS to_char_32, to_char('100'::numeric, 'f"ool"999');
+-- SELECT '' AS to_char_33, to_char('100'::numeric, 'f"\ool"999');
+-- SELECT '' AS to_char_34, to_char('100'::numeric, 'f"\\ool"999');
+-- SELECT '' AS to_char_35, to_char('100'::numeric, 'f"ool\"999');
+-- SELECT '' AS to_char_36, to_char('100'::numeric, 'f"ool\\"999');
+
+-- [SPARK-28137] Missing Data Type Formatting Functions: TO_NUMBER
+-- TO_NUMBER()
+--
+-- SET lc_numeric = 'C';
+-- SELECT '' AS to_number_1,  to_number('-34,338,492', '99G999G999');
+-- SELECT '' AS to_number_2,  to_number('-34,338,492.654,878', '99G999G999D999G999');
+-- SELECT '' AS to_number_3,  to_number('<564646.654564>', '999999.999999PR');
+-- SELECT '' AS to_number_4,  to_number('0.00001-', '9.999999S');
+-- SELECT '' AS to_number_5,  to_number('5.01-', 'FM9.999999S');
+-- SELECT '' AS to_number_5,  to_number('5.01-', 'FM9.999999MI');
+-- SELECT '' AS to_number_7,  to_number('5 4 4 4 4 8 . 7 8', '9 9 9 9 9 9 . 9 9');
+-- SELECT '' AS to_number_8,  to_number('.01', 'FM9.99');
+-- SELECT '' AS to_number_9,  to_number('.0', '99999999.99999999');
+-- SELECT '' AS to_number_10, to_number('0', '99.99');
+-- SELECT '' AS to_number_11, to_number('.-01', 'S99.99');
+-- SELECT '' AS to_number_12, to_number('.01-', '99.99S');
+-- SELECT '' AS to_number_13, to_number(' . 0 1-', ' 9 9 . 9 9 S');
+-- SELECT '' AS to_number_14, to_number('34,50','999,99');
+-- SELECT '' AS to_number_15, to_number('123,000','999G');
+-- SELECT '' AS to_number_16, to_number('123456','999G999');
+-- SELECT '' AS to_number_17, to_number('$1234.56','L9,999.99');
+-- SELECT '' AS to_number_18, to_number('$1234.56','L99,999.99');
+-- SELECT '' AS to_number_19, to_number('$1,234.56','L99,999.99');
+-- SELECT '' AS to_number_20, to_number('1234.56','L99,999.99');
+-- SELECT '' AS to_number_21, to_number('1,234.56','L99,999.99');
+-- SELECT '' AS to_number_22, to_number('42nd', '99th');
+-- RESET lc_numeric;
+
+--
+-- Input syntax
+--
+
+CREATE TABLE num_input_test (n1 decimal(38, 18)) USING parquet;
+
+-- good inputs
+-- PostgreSQL implicitly casts string literals to data with decimal types, but
+-- Spark does not support that kind of implicit casts. To test all the INSERT queries below,
+-- we rewrote them into the other typed literals.
+INSERT INTO num_input_test VALUES (double(trim(' 123')));
+INSERT INTO num_input_test VALUES (double(trim('   3245874    ')));
+INSERT INTO num_input_test VALUES (double(trim('  -93853')));
+INSERT INTO num_input_test VALUES (555.50);
+INSERT INTO num_input_test VALUES (-555.50);
+-- [SPARK-28315] Decimal can not accept NaN as input
+-- INSERT INTO num_input_test VALUES (trim('NaN '));
+-- INSERT INTO num_input_test VALUES (trim('        nan'));
+
+-- [SPARK-27923] Spark SQL accept bad inputs to NULL
+-- bad inputs
+-- INSERT INTO num_input_test VALUES ('     ');
+-- INSERT INTO num_input_test VALUES ('   1234   %');
+-- INSERT INTO num_input_test VALUES ('xyz');
+-- INSERT INTO num_input_test VALUES ('- 1234');
+-- INSERT INTO num_input_test VALUES ('5 . 0');
+-- INSERT INTO num_input_test VALUES ('5. 0   ');
+-- INSERT INTO num_input_test VALUES ('');
+-- INSERT INTO num_input_test VALUES (' N aN ');
+
+SELECT * FROM num_input_test;
+
+-- [SPARK-28318] Decimal can only support precision up to 38
+--
+-- Test some corner cases for multiplication
+--
+
+-- select 4790999999999999999999999999999999999999999999999999999999999999999999999999999999999999 * 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999;
+
+-- select 4789999999999999999999999999999999999999999999999999999999999999999999999999999999999999 * 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999;
+
+-- select 4770999999999999999999999999999999999999999999999999999999999999999999999999999999999999 * 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999;
+
+-- select 4769999999999999999999999999999999999999999999999999999999999999999999999999999999999999 * 9999999999999999999999999999999999999999999999999999999999999999999999999999999999999999;
+
+--
+-- Test some corner cases for division
+--
+-- 999999999999999999999 is overflow for SYSTEM_DEFAULT(decimal(38, 18)), we use BigIntDecimal(decimal(38, 0)).
+select cast(999999999999999999999 as decimal(38, 0))/1000000000000000000000;
+
+select div(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000);
+select mod(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000);
+select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000);
+select mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000);
+select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)*1000000000000000000000 + mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000);
+select mod (70.0,70) ;
+select div (70.0,70) ;
+select 70.0 / 70 ;
+select 12345678901234567890 % 123;
+-- [SPARK-2659] HiveQL: Division operator should always perform fractional division
+-- select 12345678901234567890 DIV 123;
+-- select div(12345678901234567890, 123);
+-- select div(12345678901234567890, 123) * 123 + 12345678901234567890 % 123;
+
+-- [SPARK-28007] Caret operator (^) means bitwise XOR in Spark and exponentiation in Postgres
+--
+-- Test code path for raising to integer powers
+--
+
+-- select 10.0 ^ -2147483648 as rounds_to_zero;
+-- select 10.0 ^ -2147483647 as rounds_to_zero;
+-- select 10.0 ^ 2147483647 as overflows;
+-- select 117743296169.0 ^ 1000000000 as overflows;
+
+-- cases that used to return inaccurate results
+-- select 3.789 ^ 21;
+-- select 3.789 ^ 35;
+-- select 1.2 ^ 345;
+-- select 0.12 ^ (-20);
+
+-- cases that used to error out
+-- select 0.12 ^ (-25);
+-- select 0.5678 ^ (-85);
+
+--
+-- Tests for raising to non-integer powers
+--
+
+-- special cases
+-- select 0.0 ^ 0.0;
+-- select (-12.34) ^ 0.0;
+-- select 12.34 ^ 0.0;
+-- select 0.0 ^ 12.34;
+
+-- NaNs
+-- select 'NaN'::numeric ^ 'NaN'::numeric;
+-- select 'NaN'::numeric ^ 0;
+-- select 'NaN'::numeric ^ 1;
+-- select 0 ^ 'NaN'::numeric;
+-- select 1 ^ 'NaN'::numeric;
+
+-- invalid inputs
+-- select 0.0 ^ (-12.34);
+-- select (-12.34) ^ 1.2;
+
+-- cases that used to generate inaccurate results
+-- select 32.1 ^ 9.8;
+-- select 32.1 ^ (-9.8);
+-- select 12.3 ^ 45.6;
+-- select 12.3 ^ (-45.6);
+
+-- big test
+-- select 1.234 ^ 5678;
+
+--
+-- Tests for EXP()
+--
+
+-- special cases
+select exp(0.0);
+select exp(1.0);
+-- [SPARK-28316] EXP returns double type for decimal input
+-- [SPARK-28318] Decimal can only support precision up to 38
+-- select exp(1.0::numeric(71,70));
+
+-- cases that used to generate inaccurate results
+select exp(32.999);
+select exp(-32.999);
+select exp(123.456);
+select exp(-123.456);
+
+-- big test
+select exp(1234.5678);
+
+--
+-- Tests for generate_series
+--
+select * from range(cast(0.0 as decimal(38, 18)), cast(4.0 as decimal(38, 18)));
+select * from range(cast(0.1 as decimal(38, 18)), cast(4.0 as decimal(38, 18)), cast(1.3 as decimal(38, 18)));
+select * from range(cast(4.0 as decimal(38, 18)), cast(-1.5 as decimal(38, 18)), cast(-2.2 as decimal(38, 18)));
+-- Trigger errors
+-- select * from generate_series(-100::numeric, 100::numeric, 0::numeric);
+-- select * from generate_series(-100::numeric, 100::numeric, 'nan'::numeric);
+-- select * from generate_series('nan'::numeric, 100::numeric, 10::numeric);
+-- select * from generate_series(0::numeric, 'nan'::numeric, 10::numeric);
+-- [SPARK-28007] Caret operator (^) means bitwise XOR in Spark and exponentiation in Postgres
+-- Checks maximum, output is truncated
+-- select (i / (10::numeric ^ 131071))::numeric(1,0)
+-- 	from generate_series(6 * (10::numeric ^ 131071),
+-- 			     9 * (10::numeric ^ 131071),
+-- 			     10::numeric ^ 131071) as a(i);
+-- Check usage with variables
+-- select * from generate_series(1::numeric, 3::numeric) i, generate_series(i,3) j;
+-- select * from generate_series(1::numeric, 3::numeric) i, generate_series(1,i) j;
+-- select * from generate_series(1::numeric, 3::numeric) i, generate_series(1,5,i) j;
+
+--
+-- Tests for LN()
+--
+
+-- [SPARK-27923] Invalid inputs for LN throws exception at PostgreSQL
+-- Invalid inputs
+-- select ln(-12.34);
+-- select ln(0.0);
+
+-- Some random tests
+select ln(1.2345678e-28);
+select ln(0.0456789);
+-- [SPARK-28318] Decimal can only support precision up to 38
+-- select ln(0.349873948359354029493948309745709580730482050975);
+select ln(0.99949452);
+select ln(1.00049687395);
+select ln(1234.567890123456789);
+select ln(5.80397490724e5);
+select ln(9.342536355e34);
+
+--
+-- Tests for LOG() (base 10)
+--
+
+-- [SPARK-27923] Invalid inputs for LOG throws exception at PostgreSQL
+-- invalid inputs
+-- select log(-12.34);
+-- select log(0.0);
+
+-- some random tests
+-- [SPARK-28318] Decimal can only support precision up to 38
+-- select log(1.234567e-89);
+-- [SPARK-28324] The LOG function using 10 as the base, but Spark using E
+select log(3.4634998359873254962349856073435545);
+select log(9.999999999999999999);
+select log(10.00000000000000000);
+select log(10.00000000000000001);
+select log(590489.45235237);
+
+--
+-- Tests for LOG() (arbitrary base)
+--
+
+-- [SPARK-27923] Invalid inputs for LOG throws exception at PostgreSQL
+-- invalid inputs
+-- select log(-12.34, 56.78);
+-- select log(-12.34, -56.78);
+-- select log(12.34, -56.78);
+-- select log(0.0, 12.34);
+-- select log(12.34, 0.0);
+-- select log(1.0, 12.34);
+
+-- some random tests
+-- [SPARK-28318] Decimal can only support precision up to 38
+-- select log(1.23e-89, 6.4689e45);
+select log(0.99923, 4.58934e34);
+select log(1.000016, 8.452010e18);
+-- [SPARK-28318] Decimal can only support precision up to 38
+-- select log(3.1954752e47, 9.4792021e-73);
+
+-- [SPARK-28317] Built-in Mathematical Functions: SCALE
+--
+-- Tests for scale()
+--
+
+-- select scale(numeric 'NaN');
+-- select scale(NULL::numeric);
+-- select scale(1.12);
+-- select scale(0);
+-- select scale(0.00);
+-- select scale(1.12345);
+-- select scale(110123.12475871856128);
+-- select scale(-1123.12471856128);
+-- select scale(-13.000000000000000);
+
+--
+-- Tests for SUM()
+--
+
+-- cases that need carry propagation
+SELECT SUM(decimal(9999)) FROM range(1, 100001);
+SELECT SUM(decimal(-9999)) FROM range(1, 100001);
+
+DROP TABLE num_data;
+DROP TABLE num_exp_add;
+DROP TABLE num_exp_sub;
+DROP TABLE num_exp_div;
+DROP TABLE num_exp_mul;
+DROP TABLE num_exp_sqrt;
+DROP TABLE num_exp_ln;
+DROP TABLE num_exp_log10;
+DROP TABLE num_exp_power_10_ln;
+DROP TABLE num_result;
+DROP TABLE num_input_test;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/select.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_distinct.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_distinct.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_distinct.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_distinct.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_having.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_implicit.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_implicit.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/strings.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/strings.sql
similarity index 92%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/strings.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/strings.sql
index 05841af27dd2e..541ff0bdad745 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/strings.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/strings.sql
@@ -270,55 +270,54 @@ SELECT 'indio' NOT LIKE 'in__o' AS `false`;
 SELECT 'indio' LIKE 'in_o' AS `false`;
 SELECT 'indio' NOT LIKE 'in_o' AS `true`;
 
--- [SPARK-28083] ANSI SQL: LIKE predicate: ESCAPE clause
 -- unused escape character
--- SELECT 'hawkeye' LIKE 'h%' ESCAPE '#' AS "true";
--- SELECT 'hawkeye' NOT LIKE 'h%' ESCAPE '#' AS "false";
+SELECT 'hawkeye' LIKE 'h%' ESCAPE '#' AS `true`;
+SELECT 'hawkeye' NOT LIKE 'h%' ESCAPE '#' AS `false`;
 
--- SELECT 'indio' LIKE 'ind_o' ESCAPE '$' AS "true";
--- SELECT 'indio' NOT LIKE 'ind_o' ESCAPE '$' AS "false";
+SELECT 'indio' LIKE 'ind_o' ESCAPE '$' AS `true`;
+SELECT 'indio' NOT LIKE 'ind_o' ESCAPE '$' AS `false`;
 
 -- escape character
 -- E061-05 like predicate with escape clause
--- SELECT 'h%' LIKE 'h#%' ESCAPE '#' AS "true";
--- SELECT 'h%' NOT LIKE 'h#%' ESCAPE '#' AS "false";
+SELECT 'h%' LIKE 'h#%' ESCAPE '#' AS `true`;
+SELECT 'h%' NOT LIKE 'h#%' ESCAPE '#' AS `false`;
 
--- SELECT 'h%wkeye' LIKE 'h#%' ESCAPE '#' AS "false";
--- SELECT 'h%wkeye' NOT LIKE 'h#%' ESCAPE '#' AS "true";
+SELECT 'h%wkeye' LIKE 'h#%' ESCAPE '#' AS `false`;
+SELECT 'h%wkeye' NOT LIKE 'h#%' ESCAPE '#' AS `true`;
 
--- SELECT 'h%wkeye' LIKE 'h#%%' ESCAPE '#' AS "true";
--- SELECT 'h%wkeye' NOT LIKE 'h#%%' ESCAPE '#' AS "false";
+SELECT 'h%wkeye' LIKE 'h#%%' ESCAPE '#' AS `true`;
+SELECT 'h%wkeye' NOT LIKE 'h#%%' ESCAPE '#' AS `false`;
 
--- SELECT 'h%awkeye' LIKE 'h#%a%k%e' ESCAPE '#' AS "true";
--- SELECT 'h%awkeye' NOT LIKE 'h#%a%k%e' ESCAPE '#' AS "false";
+SELECT 'h%awkeye' LIKE 'h#%a%k%e' ESCAPE '#' AS `true`;
+SELECT 'h%awkeye' NOT LIKE 'h#%a%k%e' ESCAPE '#' AS `false`;
 
--- SELECT 'indio' LIKE '_ndio' ESCAPE '$' AS "true";
--- SELECT 'indio' NOT LIKE '_ndio' ESCAPE '$' AS "false";
+SELECT 'indio' LIKE '_ndio' ESCAPE '$' AS `true`;
+SELECT 'indio' NOT LIKE '_ndio' ESCAPE '$' AS `false`;
 
--- SELECT 'i_dio' LIKE 'i$_d_o' ESCAPE '$' AS "true";
--- SELECT 'i_dio' NOT LIKE 'i$_d_o' ESCAPE '$' AS "false";
+SELECT 'i_dio' LIKE 'i$_d_o' ESCAPE '$' AS `true`;
+SELECT 'i_dio' NOT LIKE 'i$_d_o' ESCAPE '$' AS `false`;
 
--- SELECT 'i_dio' LIKE 'i$_nd_o' ESCAPE '$' AS "false";
--- SELECT 'i_dio' NOT LIKE 'i$_nd_o' ESCAPE '$' AS "true";
+SELECT 'i_dio' LIKE 'i$_nd_o' ESCAPE '$' AS `false`;
+SELECT 'i_dio' NOT LIKE 'i$_nd_o' ESCAPE '$' AS `true`;
 
--- SELECT 'i_dio' LIKE 'i$_d%o' ESCAPE '$' AS "true";
--- SELECT 'i_dio' NOT LIKE 'i$_d%o' ESCAPE '$' AS "false";
+SELECT 'i_dio' LIKE 'i$_d%o' ESCAPE '$' AS `true`;
+SELECT 'i_dio' NOT LIKE 'i$_d%o' ESCAPE '$' AS `false`;
 
 -- escape character same as pattern character
--- SELECT 'maca' LIKE 'm%aca' ESCAPE '%' AS "true";
--- SELECT 'maca' NOT LIKE 'm%aca' ESCAPE '%' AS "false";
+SELECT 'maca' LIKE 'm%aca' ESCAPE '%' AS `true`;
+SELECT 'maca' NOT LIKE 'm%aca' ESCAPE '%' AS `false`;
 
--- SELECT 'ma%a' LIKE 'm%a%%a' ESCAPE '%' AS "true";
--- SELECT 'ma%a' NOT LIKE 'm%a%%a' ESCAPE '%' AS "false";
+SELECT 'ma%a' LIKE 'm%a%%a' ESCAPE '%' AS `true`;
+SELECT 'ma%a' NOT LIKE 'm%a%%a' ESCAPE '%' AS `false`;
 
--- SELECT 'bear' LIKE 'b_ear' ESCAPE '_' AS "true";
--- SELECT 'bear' NOT LIKE 'b_ear' ESCAPE '_' AS "false";
+SELECT 'bear' LIKE 'b_ear' ESCAPE '_' AS `true`;
+SELECT 'bear' NOT LIKE 'b_ear' ESCAPE '_' AS `false`;
 
--- SELECT 'be_r' LIKE 'b_e__r' ESCAPE '_' AS "true";
--- SELECT 'be_r' NOT LIKE 'b_e__r' ESCAPE '_' AS "false";
+SELECT 'be_r' LIKE 'b_e__r' ESCAPE '_' AS `true`;
+SELECT 'be_r' NOT LIKE 'b_e__r' ESCAPE '_' AS `false`;
 
--- SELECT 'be_r' LIKE '__e__r' ESCAPE '_' AS "false";
--- SELECT 'be_r' NOT LIKE '__e__r' ESCAPE '_' AS "true";
+SELECT 'be_r' LIKE '__e__r' ESCAPE '_' AS `false`;
+SELECT 'be_r' NOT LIKE '__e__r' ESCAPE '_' AS `true`;
 
 -- [SPARK-28448] Implement ILIKE operator
 --
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/text.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql
similarity index 96%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/text.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql
index 04d3acc145e95..05953123da86f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/text.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql
@@ -44,11 +44,7 @@ select concat_ws(',',10,20,null,30);
 select concat_ws('',10,20,null,30);
 select concat_ws(NULL,10,20,null,30) is null;
 select reverse('abcde');
--- [SPARK-28036] Built-in udf left/right has inconsistent behavior
--- [SPARK-28479] Parser error when enabling ANSI mode
-set spark.sql.parser.ansi.enabled=false;
 select i, left('ahoj', i), right('ahoj', i) from range(-5, 6) t(i) order by i;
-set spark.sql.parser.ansi.enabled=true;
 -- [SPARK-28037] Add built-in String Functions: quote_literal
 -- select quote_literal('');
 -- select quote_literal('abc''');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/timestamp.sql
similarity index 76%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/timestamp.sql
index 2b974816766bd..bf69da295a960 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/timestamp.sql
@@ -7,7 +7,6 @@
 
 CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
 
--- [SPARK-28141] Timestamp type can not accept special values
 -- Test shorthand input values
 -- We can't just "select" the results since they aren't constants; test for
 -- equality instead.  We can do that by running the test inside a transaction
@@ -17,22 +16,28 @@ CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
 -- block is entered exactly at local midnight; then 'now' and 'today' have
 -- the same values and the counts will come out different.
 
--- INSERT INTO TIMESTAMP_TBL VALUES ('now');
+-- PostgreSQL implicitly casts string literals to data with timestamp types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('now'));
 -- SELECT pg_sleep(0.1);
 
 -- BEGIN;
 
--- INSERT INTO TIMESTAMP_TBL VALUES ('now');
--- INSERT INTO TIMESTAMP_TBL VALUES ('today');
--- INSERT INTO TIMESTAMP_TBL VALUES ('yesterday');
--- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow');
+-- PostgreSQL implicitly casts string literals to data with timestamp types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('now'));
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('today'));
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('yesterday'));
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('tomorrow'));
 -- time zone should be ignored by this data type
--- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow EST');
--- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow zulu');
-
--- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'today';
--- SELECT count(*) AS Three FROM TIMESTAMP_TBL WHERE d1 = timestamp 'tomorrow';
--- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'yesterday';
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('tomorrow EST'));
+-- [SPARK-29024] Ignore case while resolving time zones
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('tomorrow Zulu'));
+
+SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'today';
+SELECT count(*) AS Three FROM TIMESTAMP_TBL WHERE d1 = timestamp 'tomorrow';
+SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'yesterday';
+-- [SPARK-29025] Support seconds precision by the timestamp type
 -- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp(2) 'now';
 
 -- COMMIT;
@@ -48,12 +53,14 @@ CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
 -- SELECT count(*) AS two FROM TIMESTAMP_TBL WHERE d1 = timestamp(2) 'now';
 -- COMMIT;
 
--- TRUNCATE TIMESTAMP_TBL;
+TRUNCATE TABLE TIMESTAMP_TBL;
 
 -- Special values
 -- INSERT INTO TIMESTAMP_TBL VALUES ('-infinity');
 -- INSERT INTO TIMESTAMP_TBL VALUES ('infinity');
--- INSERT INTO TIMESTAMP_TBL VALUES ('epoch');
+-- PostgreSQL implicitly casts string literals to data with timestamp types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('epoch'));
 -- [SPARK-27923] Spark SQL insert there obsolete special values to NULL
 -- Obsolete special values
 -- INSERT INTO TIMESTAMP_TBL VALUES ('invalid');
@@ -72,14 +79,16 @@ CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
 -- INSERT INTO TIMESTAMP_TBL VALUES ('Mon Feb 10 17:32:01.6 1997 PST');
 
 -- ISO 8601 format
-INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02');
-INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02 03:04:05');
-INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01-08');
+-- PostgreSQL implicitly casts string literals to data with timestamp types, but
+-- Spark does not support that kind of implicit casts.
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('1997-01-02'));
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('1997-01-02 03:04:05'));
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('1997-02-10 17:32:01-08'));
 -- INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01-0800');
 -- INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01 -08:00');
 -- INSERT INTO TIMESTAMP_TBL VALUES ('19970210 173201 -0800');
 -- INSERT INTO TIMESTAMP_TBL VALUES ('1997-06-10 17:32:01 -07:00');
-INSERT INTO TIMESTAMP_TBL VALUES ('2001-09-22T18:19:20');
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('2001-09-22T18:19:20'));
 
 -- POSIX format (note that the timezone abbrev is just decoration here)
 -- INSERT INTO TIMESTAMP_TBL VALUES ('2000-03-15 08:14:01 GMT+8');
@@ -174,35 +183,32 @@ SELECT '' AS `16`, d1 FROM TIMESTAMP_TBL
 SELECT '' AS `49`, d1 FROM TIMESTAMP_TBL
    WHERE d1 >= timestamp '1997-01-02';
 
--- [SPARK-28425] Add more Date/Time Operators
--- SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
---    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
+SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+   FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
 
 SELECT '' AS date_trunc_week, date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc;
 
--- [SPARK-28425] Add more Date/Time Operators
 -- Test casting within a BETWEEN qualifier
--- SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
---   FROM TIMESTAMP_TBL
---   WHERE d1 BETWEEN timestamp '1902-01-01'
---    AND timestamp '2038-01-01';
-
--- [SPARK-28420] Date/Time Functions: date_part
--- SELECT '' AS "54", d1 as "timestamp",
---    date_part( 'year', d1) AS year, date_part( 'month', d1) AS month,
---    date_part( 'day', d1) AS day, date_part( 'hour', d1) AS hour,
---    date_part( 'minute', d1) AS minute, date_part( 'second', d1) AS second
---    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
-
--- SELECT '' AS "54", d1 as "timestamp",
---    date_part( 'quarter', d1) AS quarter, date_part( 'msec', d1) AS msec,
---    date_part( 'usec', d1) AS usec
---    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
-
--- SELECT '' AS "54", d1 as "timestamp",
---    date_part( 'isoyear', d1) AS isoyear, date_part( 'week', d1) AS week,
---    date_part( 'dow', d1) AS dow
---    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
+SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+  FROM TIMESTAMP_TBL
+  WHERE d1 BETWEEN timestamp '1902-01-01'
+   AND timestamp '2038-01-01';
+
+SELECT '' AS `54`, d1 as `timestamp`,
+    date_part( 'year', d1) AS `year`, date_part( 'month', d1) AS `month`,
+    date_part( 'day', d1) AS `day`, date_part( 'hour', d1) AS `hour`,
+    date_part( 'minute', d1) AS `minute`, date_part( 'second', d1) AS `second`
+    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
+
+SELECT '' AS `54`, d1 as `timestamp`,
+    date_part( 'quarter', d1) AS quarter, date_part( 'msec', d1) AS msec,
+    date_part( 'usec', d1) AS usec
+    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
+
+SELECT '' AS `54`, d1 as `timestamp`,
+    date_part( 'isoyear', d1) AS isoyear, date_part( 'week', d1) AS week,
+    date_part( 'dow', d1) AS dow
+    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
 
 -- [SPARK-28137] Data Type Formatting Functions
 -- TO_CHAR()
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/union.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/union.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/union.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/union.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
new file mode 100644
index 0000000000000..087d7a5befd19
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
@@ -0,0 +1,357 @@
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- Window Functions Testing
+-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql#L1-L319
+
+-- Test window operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1;
+
+-- [SPARK-29540] Thrift in some cases can't parse string to date
+-- CREATE TABLE empsalary (
+--     depname string,
+--     empno integer,
+--     salary int,
+--     enroll_date date
+-- ) USING parquet;
+
+-- [SPARK-29540] Thrift in some cases can't parse string to date
+-- INSERT INTO empsalary VALUES ('develop', 10, 5200, '2007-08-01');
+-- INSERT INTO empsalary VALUES ('sales', 1, 5000, '2006-10-01');
+-- INSERT INTO empsalary VALUES ('personnel', 5, 3500, '2007-12-10');
+-- INSERT INTO empsalary VALUES ('sales', 4, 4800, '2007-08-08');
+-- INSERT INTO empsalary VALUES ('personnel', 2, 3900, '2006-12-23');
+-- INSERT INTO empsalary VALUES ('develop', 7, 4200, '2008-01-01');
+-- INSERT INTO empsalary VALUES ('develop', 9, 4500, '2008-01-01');
+-- INSERT INTO empsalary VALUES ('sales', 3, 4800, '2007-08-01');
+-- INSERT INTO empsalary VALUES ('develop', 8, 6000, '2006-10-01');
+-- INSERT INTO empsalary VALUES ('develop', 11, 5200, '2007-08-15');
+
+-- [SPARK-29540] Thrift in some cases can't parse string to date
+-- SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary;
+
+-- [SPARK-29540] Thrift in some cases can't parse string to date
+-- SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary;
+
+-- with GROUP BY
+SELECT four, ten, SUM(SUM(four)) OVER (PARTITION BY four), AVG(ten) FROM tenk1
+GROUP BY four, ten ORDER BY four, ten;
+
+-- [SPARK-29540] Thrift in some cases can't parse string to date
+-- SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname);
+
+-- [SPARK-28064] Order by does not accept a call to rank()
+-- SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w;
+
+-- empty window specification
+SELECT COUNT(*) OVER () FROM tenk1 WHERE unique2 < 10;
+
+SELECT COUNT(*) OVER w FROM tenk1 WHERE unique2 < 10 WINDOW w AS ();
+
+-- no window operation
+SELECT four FROM tenk1 WHERE FALSE WINDOW w AS (PARTITION BY ten);
+
+-- cumulative aggregate
+SELECT sum(four) OVER (PARTITION BY ten ORDER BY unique2) AS sum_1, ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT row_number() OVER (ORDER BY unique2) FROM tenk1 WHERE unique2 < 10;
+
+SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT cume_dist() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT ntile(3) OVER (ORDER BY ten, four), ten, four FROM tenk1 WHERE unique2 < 10;
+
+-- [SPARK-28065] ntile does not accept NULL as input
+-- SELECT ntile(NULL) OVER (ORDER BY ten, four), ten, four FROM tenk1 LIMIT 2;
+
+SELECT lag(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+-- [SPARK-28068] `lag` second argument must be a literal in Spark
+-- SELECT lag(ten, four) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+-- [SPARK-28068] `lag` second argument must be a literal in Spark
+-- SELECT lag(ten, four, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT lead(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT lead(ten * 2, 1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT first(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+-- last returns the last row of the frame, which is CURRENT ROW in ORDER BY window.
+SELECT last(four) OVER (ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10;
+
+SELECT last(ten) OVER (PARTITION BY four), ten, four FROM
+(SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s
+ORDER BY four, ten;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- SELECT nth_value(ten, four + 1) OVER (PARTITION BY four), ten, four
+-- FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s;
+
+SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER (PARTITION BY two ORDER BY ten) AS wsum
+FROM tenk1 GROUP BY ten, two;
+
+SELECT count(*) OVER (PARTITION BY four), four FROM (SELECT * FROM tenk1 WHERE two = 1)s WHERE unique2 < 10;
+
+SELECT (count(*) OVER (PARTITION BY four ORDER BY ten) +
+  sum(hundred) OVER (PARTITION BY four ORDER BY ten)) AS cntsum
+  FROM tenk1 WHERE unique2 < 10;
+
+-- opexpr with different windows evaluation.
+SELECT * FROM(
+  SELECT count(*) OVER (PARTITION BY four ORDER BY ten) +
+    sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS total,
+    count(*) OVER (PARTITION BY four ORDER BY ten) AS fourcount,
+    sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS twosum
+    FROM tenk1
+)sub WHERE total <> fourcount + twosum;
+
+SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM tenk1 WHERE unique2 < 10;
+
+SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER win AS wsum
+FROM tenk1 GROUP BY ten, two WINDOW win AS (PARTITION BY two ORDER BY ten);
+
+-- [SPARK-29540] Thrift in some cases can't parse string to date
+-- more than one window with GROUP BY
+-- SELECT sum(salary),
+--   row_number() OVER (ORDER BY depname),
+--   sum(sum(salary)) OVER (ORDER BY depname DESC)
+-- FROM empsalary GROUP BY depname;
+
+-- [SPARK-29540] Thrift in some cases can't parse string to date
+-- identical windows with different names
+-- SELECT sum(salary) OVER w1, count(*) OVER w2
+-- FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary);
+
+-- subplan
+-- [SPARK-28379] Correlated scalar subqueries must be aggregated
+-- SELECT lead(ten, (SELECT two FROM tenk1 WHERE s.unique2 = unique2)) OVER (PARTITION BY four ORDER BY ten)
+-- FROM tenk1 s WHERE unique2 < 10;
+
+-- empty table
+SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s;
+
+-- [SPARK-29540] Thrift in some cases can't parse string to date
+-- mixture of agg/wfunc in the same window
+-- SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC);
+
+-- Cannot safely cast 'enroll_date': StringType to DateType;
+-- SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM(
+-- SELECT *,
+--   CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(year FROM enroll_date) END * 500 AS bonus,
+--   CASE WHEN
+--     AVG(salary) OVER (PARTITION BY depname) < salary
+--     THEN 200 END AS depadj FROM empsalary
+--   )s;
+
+create temporary view int4_tbl as select * from values
+  (0),
+  (123456),
+  (-123456),
+  (2147483647),
+  (-2147483647)
+  as int4_tbl(f1);
+
+-- window function over ungrouped agg over empty row set (bug before 9.1)
+SELECT SUM(COUNT(f1)) OVER () FROM int4_tbl WHERE f1=42;
+
+-- window function with ORDER BY an expression involving aggregates (9.1 bug)
+select ten,
+  sum(unique1) + sum(unique2) as res,
+  rank() over (order by sum(unique1) + sum(unique2)) as rank
+from tenk1
+group by ten order by ten;
+
+-- window and aggregate with GROUP BY expression (9.2 bug)
+-- explain
+-- select first(max(x)) over (), y
+--   from (select unique1 as x, ten+four as y from tenk1) ss
+--   group by y;
+
+-- test non-default frame specifications
+SELECT four, ten,
+sum(ten) over (partition by four order by ten),
+last(ten) over (partition by four order by ten)
+FROM (select distinct ten, four from tenk1) ss;
+
+SELECT four, ten,
+sum(ten) over (partition by four order by ten range between unbounded preceding and current row),
+last(ten) over (partition by four order by ten range between unbounded preceding and current row)
+FROM (select distinct ten, four from tenk1) ss;
+
+SELECT four, ten,
+sum(ten) over (partition by four order by ten range between unbounded preceding and unbounded following),
+last(ten) over (partition by four order by ten range between unbounded preceding and unbounded following)
+FROM (select distinct ten, four from tenk1) ss;
+
+-- [SPARK-29451] Some queries with divisions in SQL windows are failling in Thrift
+-- SELECT four, ten/4 as two,
+-- sum(ten/4) over (partition by four order by ten/4 range between unbounded preceding and current row),
+-- last(ten/4) over (partition by four order by ten/4 range between unbounded preceding and current row)
+-- FROM (select distinct ten, four from tenk1) ss;
+
+-- [SPARK-29451] Some queries with divisions in SQL windows are failling in Thrift
+-- SELECT four, ten/4 as two,
+-- sum(ten/4) over (partition by four order by ten/4 rows between unbounded preceding and current row),
+-- last(ten/4) over (partition by four order by ten/4 rows between unbounded preceding and current row)
+-- FROM (select distinct ten, four from tenk1) ss;
+
+SELECT sum(unique1) over (order by four range between current row and unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between current row and unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between 2 preceding and 2 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude no others),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT first(unique1) over (ORDER BY four rows between current row and 2 following exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT last(unique1) over (ORDER BY four rows between current row and 2 following exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between 2 preceding and 1 preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between 1 following and 3 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (rows between unbounded preceding and 1 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (w range between current row and unbounded following),
+-- 	unique1, four
+-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (w range between unbounded preceding and current row exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- SELECT first_value(unique1) over w,
+-- nth_value(unique1, 2) over w AS nth_2,
+-- last_value(unique1) over w, unique1, four
+-- FROM tenk1 WHERE unique1 < 10
+-- WINDOW w AS (order by four range between current row and unbounded following);
+
+-- [SPARK-28501] Frame bound value must be a literal.
+-- SELECT sum(unique1) over
+-- (order by unique1
+--   rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
+-- unique1
+-- FROM tenk1 WHERE unique1 < 10;
+
+CREATE TEMP VIEW v_window AS
+SELECT i.id, sum(i.id) over (order by i.id rows between 1 preceding and 1 following) as sum_rows
+FROM range(1, 11) i;
+
+SELECT * FROM v_window;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- CREATE OR REPLACE TEMP VIEW v_window AS
+-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
+--   exclude current row) as sum_rows FROM range(1, 10) i;
+
+-- SELECT * FROM v_window;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- CREATE OR REPLACE TEMP VIEW v_window AS
+-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
+--   exclude group) as sum_rows FROM range(1, 10) i;
+-- SELECT * FROM v_window;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- CREATE OR REPLACE TEMP VIEW v_window AS
+-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
+--   exclude ties) as sum_rows FROM generate_series(1, 10) i;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- CREATE OR REPLACE TEMP VIEW v_window AS
+-- SELECT i, sum(i) over (order by i rows between 1 preceding and 1 following
+--   exclude no others) as sum_rows FROM generate_series(1, 10) i;
+-- SELECT * FROM v_window;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- CREATE OR REPLACE TEMP VIEW v_window AS
+-- SELECT i.id, sum(i.id) over (order by i.id groups between 1 preceding and 1 following) as sum_rows FROM range(1, 11) i;
+-- SELECT * FROM v_window;
+
+DROP VIEW v_window;
+-- [SPARK-29540] Thrift in some cases can't parse string to date
+-- DROP TABLE empsalary;
+DROP VIEW tenk2;
+DROP VIEW int4_tbl;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql
new file mode 100644
index 0000000000000..ba1acc9f56b4a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql
@@ -0,0 +1,303 @@
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- Window Functions Testing
+-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql#L320-562
+
+-- Test window operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+CREATE TABLE empsalary (
+    depname string,
+    empno integer,
+    salary int,
+    enroll_date date
+) USING parquet;
+
+INSERT INTO empsalary VALUES
+  ('develop', 10, 5200, date '2007-08-01'),
+  ('sales', 1, 5000, date '2006-10-01'),
+  ('personnel', 5, 3500, date '2007-12-10'),
+  ('sales', 4, 4800, date '2007-08-08'),
+  ('personnel', 2, 3900, date '2006-12-23'),
+  ('develop', 7, 4200, date '2008-01-01'),
+  ('develop', 9, 4500, date '2008-01-01'),
+  ('sales', 3, 4800, date '2007-08-01'),
+  ('develop', 8, 6000, date '2006-10-01'),
+  ('develop', 11, 5200, date '2007-08-15');
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- CREATE TEMP VIEW v_window AS
+-- SELECT i, min(i) over (order by i range between '1 day' preceding and '10 days' following) as min_i
+--   FROM range(now(), now()+'100 days', '1 hour') i;
+
+-- RANGE offset PRECEDING/FOLLOWING tests
+
+SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (order by four desc range between 2 preceding and 1 preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude no others),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 6 following exclude ties),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 6 following exclude group),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+SELECT sum(unique1) over (partition by four order by unique1 range between 5 preceding and 6 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (partition by four order by unique1 range between 5 preceding and 6 following
+--   exclude current row),unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select sum(salary) over (order by enroll_date desc range between '1 year' preceding and '1 year' following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select sum(salary) over (order by enroll_date desc range between '1 year' following and '1 year' following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following
+--   exclude current row), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following
+--   exclude group), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following
+--   exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- select first_value(salary) over(order by salary range between 1000 preceding and 1000 following),
+-- lead(salary) over(order by salary range between 1000 preceding and 1000 following),
+-- nth_value(salary, 1) over(order by salary range between 1000 preceding and 1000 following),
+-- salary from empsalary;
+
+-- [SPARK-30734] AnalysisException that window RangeFrame not match RowFrame
+-- select last(salary) over(order by salary range between 1000 preceding and 1000 following),
+-- lag(salary) over(order by salary range between 1000 preceding and 1000 following),
+-- salary from empsalary;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- select first_value(salary) over(order by salary range between 1000 following and 3000 following
+--   exclude current row),
+-- lead(salary) over(order by salary range between 1000 following and 3000 following exclude ties),
+-- nth_value(salary, 1) over(order by salary range between 1000 following and 3000 following
+--   exclude ties),
+-- salary from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select last(salary) over(order by salary range between 1000 following and 3000 following
+--   exclude group),
+-- lag(salary) over(order by salary range between 1000 following and 3000 following exclude group),
+-- salary from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
+--   exclude ties),
+-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
+--   exclude ties),
+-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
+--   exclude ties),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
+--   exclude group),
+-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
+--   exclude group),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select first(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
+--   exclude current row),
+-- last(salary) over(order by enroll_date range between unbounded preceding and '1 year' following
+--   exclude current row),
+-- salary, enroll_date from empsalary;
+
+-- RANGE offset PRECEDING/FOLLOWING with null values
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,6) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id asc nulls first range between 2 preceding and 2 following);
+
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,6) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id asc nulls last range between 2 preceding and 2 following);
+
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,6) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id desc nulls first range between 2 preceding and 2 following);
+
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,6) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id desc nulls last range between 2 preceding and 2 following);
+
+-- Check overflow behavior for various integer sizes
+
+select x.id, last(x.id) over (order by x.id range between current row and 2147450884 following)
+from range(32764, 32767) x;
+
+select x.id, last(x.id) over (order by x.id desc range between current row and 2147450885 following)
+from range(-32766, -32765) x;
+
+select x.id, last(x.id) over (order by x.id range between current row and 4 following)
+from range(2147483644, 2147483647) x;
+
+select x.id, last(x.id) over (order by x.id desc range between current row and 5 following)
+from range(-2147483646, -2147483645) x;
+
+select x.id, last(x.id) over (order by x.id range between current row and 4 following)
+from range(9223372036854775804, 9223372036854775807) x;
+
+select x.id, last(x.id) over (order by x.id desc range between current row and 5 following)
+from range(-9223372036854775806, -9223372036854775805) x;
+
+-- Test in_range for other numeric datatypes
+
+create table numerics (
+    id int,
+    f_float4 float,
+    f_float8 float,
+    f_numeric int
+) using parquet;
+
+insert into numerics values
+(1, -3, -3, -3),
+(2, -1, -1, -1),
+(3, 0, 0, 0),
+(4, 1.1, 1.1, 1.1),
+(5, 1.12, 1.12, 1.12),
+(6, 2, 2, 2),
+(7, 100, 100, 100);
+-- (8, 'infinity', 'infinity', '1000'),
+-- (9, 'NaN', 'NaN', 'NaN'),
+-- (0, '-infinity', '-infinity', '-1000');  -- numeric type lacks infinities
+
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             1 preceding and 1 following);
+
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             1 preceding and 1.1 following);
+
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             'inf' preceding and 'inf' following);
+
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             1.1 preceding and 'NaN' following);  -- error, NaN disallowed
+
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             1 preceding and 1 following);
+
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             1 preceding and 1.1 following);
+
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             'inf' preceding and 'inf' following);
+
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             1.1 preceding and 'NaN' following);  -- error, NaN disallowed
+
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1 preceding and 1 following);
+
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1 preceding and 1.1 following);
+
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1 preceding and 1.1 following);  -- currently unsupported
+
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1.1 preceding and 'NaN' following);  -- error, NaN disallowed
+
+drop table empsalary;
+drop table numerics;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
new file mode 100644
index 0000000000000..cd3b74b3aa03f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
@@ -0,0 +1,456 @@
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- Window Functions Testing
+-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql#L564-L911
+
+-- Test window operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1;
+
+CREATE TABLE empsalary (
+    depname string,
+    empno integer,
+    salary int,
+    enroll_date date
+) USING parquet;
+
+INSERT INTO empsalary VALUES
+  ('develop', 10, 5200, date '2007-08-01'),
+  ('sales', 1, 5000, date '2006-10-01'),
+  ('personnel', 5, 3500, date '2007-12-10'),
+  ('sales', 4, 4800, date '2007-08-08'),
+  ('personnel', 2, 3900, date '2006-12-23'),
+  ('develop', 7, 4200, date '2008-01-01'),
+  ('develop', 9, 4500, date '2008-01-01'),
+  ('sales', 3, 4800, date '2007-08-01'),
+  ('develop', 8, 6000, date '2006-10-01'),
+  ('develop', 11, 5200, date '2007-08-15');
+
+-- Test in_range for other datetime datatypes
+
+-- Spark only supports timestamp
+-- [SPARK-29636] Spark can't parse '11:00 BST' or '2000-10-19 10:23:54+01' signatures to timestamp
+create table datetimes (
+    id int,
+    f_time timestamp,
+    f_timetz timestamp,
+    f_interval timestamp,
+    f_timestamptz timestamp,
+    f_timestamp timestamp
+) using parquet;
+
+-- Spark cannot safely cast StringType to TimestampType
+-- [SPARK-29636] Spark can't parse '11:00 BST' or '2000-10-19 10:23:54+01' signatures to timestamp
+insert into datetimes values
+(1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), cast ('2000-10-19 10:23:54+01' as timestamp), timestamp '2000-10-19 10:23:54'),
+(2, timestamp '12:00', cast ('12:00 BST' as timestamp), cast ('2 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'),
+(3, timestamp '13:00', cast ('13:00 BST' as timestamp), cast ('3 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'),
+(4, timestamp '14:00', cast ('14:00 BST' as timestamp), cast ('4 years' as timestamp), cast ('2002-10-19 10:23:54+01' as timestamp), timestamp '2002-10-19 10:23:54'),
+(5, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2003-10-19 10:23:54+01' as timestamp), timestamp '2003-10-19 10:23:54'),
+(6, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2004-10-19 10:23:54+01' as timestamp), timestamp '2004-10-19 10:23:54'),
+(7, timestamp '17:00', cast ('17:00 BST' as timestamp), cast ('7 years' as timestamp), cast ('2005-10-19 10:23:54+01' as timestamp), timestamp '2005-10-19 10:23:54'),
+(8, timestamp '18:00', cast ('18:00 BST' as timestamp), cast ('8 years' as timestamp), cast ('2006-10-19 10:23:54+01' as timestamp), timestamp '2006-10-19 10:23:54'),
+(9, timestamp '19:00', cast ('19:00 BST' as timestamp), cast ('9 years' as timestamp), cast ('2007-10-19 10:23:54+01' as timestamp), timestamp '2007-10-19 10:23:54'),
+(10, timestamp '20:00', cast ('20:00 BST' as timestamp), cast ('10 years' as timestamp), cast ('2008-10-19 10:23:54+01' as timestamp), timestamp '2008-10-19 10:23:54');
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_time, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_time range between
+--              '70 min' preceding and '2 hours' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_time, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_time desc range between
+--              '70 min' preceding and '2 hours' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_timetz, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timetz range between
+--              '70 min' preceding and '2 hours' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_timetz, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timetz desc range between
+--              '70 min' preceding and '2 hours' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_interval, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_interval range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_interval, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_interval desc range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_timestamptz, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timestamptz range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_timestamptz, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timestamptz desc range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_timestamp, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timestamp range between
+--              '1 year' preceding and '1 year' following);
+
+-- [SPARK-28429] SQL Datetime util function being casted to double instead of timestamp
+-- select id, f_timestamp, first(id) over w, last(id) over w
+-- from datetimes
+-- window w as (order by f_timestamp desc range between
+--              '1 year' preceding and '1 year' following);
+
+-- RANGE offset PRECEDING/FOLLOWING error cases
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by enroll_date, salary range between '1 year' preceding and '2 years' following
+-- 	exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (range between '1 year' preceding and '2 years' following
+-- 	exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select sum(salary) over (order by depname range between '1 year' preceding and '2 years' following
+-- 	exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by enroll_date range between 1 preceding and 2 following
+-- 	exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by salary range between -1 preceding and 2 following
+-- 	exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by salary range between 1 preceding and -2 following
+-- 	exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by salary range between '1 year' preceding and '2 years' following
+-- 	exclude ties), salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select max(enroll_date) over (order by enroll_date range between '1 year' preceding and '-2 years' following
+-- 	exclude ties), salary, enroll_date from empsalary;
+
+-- GROUPS tests
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (order by four groups between unbounded preceding and current row),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (order by four groups between unbounded preceding and unbounded following),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (order by four groups between current row and unbounded following),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (order by four groups between 1 preceding and unbounded following),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (order by four groups between 1 following and unbounded following),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (order by four groups between unbounded preceding and 2 following),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (order by four groups between 2 preceding and 1 preceding),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (order by four groups between 2 preceding and 1 following),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (order by four groups between 0 preceding and 0 following),
+-- unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four groups between 2 preceding and 1 following
+--   exclude current row), unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 following
+--   exclude group), unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- SELECT sum(unique1) over (order by four range between 2 preceding and 1 following
+--   exclude ties), unique1, four
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (partition by ten
+--   order by four groups between 0 preceding and 0 following),unique1, four, ten
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (partition by ten
+--   order by four groups between 0 preceding and 0 following exclude current row), unique1, four, ten
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (partition by ten
+--   order by four groups between 0 preceding and 0 following exclude group), unique1, four, ten
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- SELECT sum(unique1) over (partition by ten
+--   order by four groups between 0 preceding and 0 following exclude ties), unique1, four, ten
+-- FROM tenk1 WHERE unique1 < 10;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- select first_value(salary) over(order by enroll_date groups between 1 preceding and 1 following),
+-- lead(salary) over(order by enroll_date groups between 1 preceding and 1 following),
+-- nth_value(salary, 1) over(order by enroll_date groups between 1 preceding and 1 following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28508] Support for range frame+row frame in the same query
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- select last(salary) over(order by enroll_date groups between 1 preceding and 1 following),
+-- lag(salary)         over(order by enroll_date groups between 1 preceding and 1 following),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- select first_value(salary) over(order by enroll_date groups between 1 following and 3 following
+--   exclude current row),
+-- lead(salary) over(order by enroll_date groups between 1 following and 3 following exclude ties),
+-- nth_value(salary, 1) over(order by enroll_date groups between 1 following and 3 following
+--   exclude ties),
+-- salary, enroll_date from empsalary;
+
+-- [SPARK-28428] Spark `exclude` always expecting `()`
+-- select last(salary) over(order by enroll_date groups between 1 following and 3 following
+--   exclude group),
+-- lag(salary) over(order by enroll_date groups between 1 following and 3 following exclude group),
+-- salary, enroll_date from empsalary;
+
+-- Show differences in offset interpretation between ROWS, RANGE, and GROUPS
+WITH cte (x) AS (
+        SELECT * FROM range(1, 36, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following);
+
+WITH cte (x) AS (
+        SELECT * FROM range(1, 36, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x range between 1 preceding and 1 following);
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- WITH cte (x) AS (
+--         SELECT * FROM range(1, 36, 2)
+-- )
+-- SELECT x, (sum(x) over w)
+-- FROM cte
+-- WINDOW w AS (ORDER BY x groups between 1 preceding and 1 following);
+
+WITH cte (x) AS (
+        select 1 union all select 1 union all select 1 union all
+        SELECT * FROM range(5, 50, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following);
+
+WITH cte (x) AS (
+        select 1 union all select 1 union all select 1 union all
+        SELECT * FROM range(5, 50, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x range between 1 preceding and 1 following);
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- WITH cte (x) AS (
+--         select 1 union all select 1 union all select 1 union all
+--         SELECT * FROM range(5, 50, 2)
+-- )
+-- SELECT x, (sum(x) over w)
+-- FROM cte
+-- WINDOW w AS (ORDER BY x groups between 1 preceding and 1 following);
+
+-- with UNION
+SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk2)s LIMIT 0;
+
+-- check some degenerate cases
+create table t1 (f1 int, f2 int) using parquet;
+insert into t1 values (1,1),(1,2),(2,2);
+
+select f1, sum(f1) over (partition by f1
+                         range between 1 preceding and 1 following)
+from t1 where f1 = f2; -- error, must have order by
+
+-- Since EXPLAIN clause rely on host physical location, it is commented out
+-- explain
+-- select f1, sum(f1) over (partition by f1 order by f2
+-- range between 1 preceding and 1 following)
+-- from t1 where f1 = f2;
+
+select f1, sum(f1) over (partition by f1 order by f2
+range between 1 preceding and 1 following)
+from t1 where f1 = f2;
+
+select f1, sum(f1) over (partition by f1, f1 order by f2
+range between 2 preceding and 1 preceding)
+from t1 where f1 = f2;
+
+select f1, sum(f1) over (partition by f1, f2 order by f2
+range between 1 following and 2 following)
+from t1 where f1 = f2;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- select f1, sum(f1) over (partition by f1,
+-- groups between 1 preceding and 1 following)
+-- from t1 where f1 = f2;
+
+-- Since EXPLAIN clause rely on host physical location, it is commented out
+-- explain
+-- select f1, sum(f1) over (partition by f1 order by f2
+-- range between 1 preceding and 1 following)
+-- from t1 where f1 = f2;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- select f1, sum(f1) over (partition by f1 order by f2
+-- groups between 1 preceding and 1 following)
+-- from t1 where f1 = f2;
+
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- select f1, sum(f1) over (partition by f1, f1 order by f2
+-- groups between 2 preceding and 1 preceding)
+-- from t1 where f1 = f2;
+ 
+-- [SPARK-28648] Adds support to `groups` unit type in window clauses
+-- select f1, sum(f1) over (partition by f1, f2 order by f2
+-- groups between 1 following and 2 following)
+-- from t1 where f1 = f2;
+
+-- ordering by a non-integer constant is allowed
+SELECT rank() OVER (ORDER BY length('abc'));
+
+-- can't order by another window function
+-- [SPARK-28566] window functions should not be allowed in window definitions
+-- SELECT rank() OVER (ORDER BY rank() OVER (ORDER BY random()));
+
+-- some other errors
+SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10;
+
+SELECT * FROM empsalary INNER JOIN tenk1 ON row_number() OVER (ORDER BY salary) < 10;
+
+SELECT rank() OVER (ORDER BY 1), count(*) FROM empsalary GROUP BY 1;
+
+SELECT * FROM rank() OVER (ORDER BY random());
+
+-- Original query: DELETE FROM empsalary WHERE (rank() OVER (ORDER BY random())) > 10;
+SELECT * FROM empsalary WHERE (rank() OVER (ORDER BY random())) > 10;
+
+-- Original query: DELETE FROM empsalary RETURNING rank() OVER (ORDER BY random());
+SELECT * FROM empsalary WHERE rank() OVER (ORDER BY random());
+
+-- [SPARK-28645] Throw an error on window redefinition
+-- select count(*) OVER w FROM tenk1 WINDOW w AS (ORDER BY unique1), w AS (ORDER BY unique1);
+
+select rank() OVER (PARTITION BY four, ORDER BY ten) FROM tenk1;
+
+-- [SPARK-28646] Allow usage of `count` only for parameterless aggregate function
+-- select count() OVER () FROM tenk1;
+
+-- The output is the expected one: `range` is not a window or aggregate function.
+SELECT range(1, 100) OVER () FROM empsalary;
+
+SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1;
+
+-- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1;
+
+-- filter
+
+-- [SPARK-30182] Support nested aggregates
+-- SELECT sum(salary), row_number() OVER (ORDER BY depname), sum(
+--     sum(salary) FILTER (WHERE enroll_date > '2007-01-01')
+-- )
+-- FROM empsalary GROUP BY depname;
+
+-- Test pushdown of quals into a subquery containing window functions
+
+-- pushdown is safe because all PARTITION BY clauses include depname:
+-- Since EXPLAIN clause rely on host physical location, it is commented out
+-- EXPLAIN
+-- SELECT * FROM
+-- (SELECT depname,
+-- sum(salary) OVER (PARTITION BY depname) depsalary,
+-- min(salary) OVER (PARTITION BY depname || 'A', depname) depminsalary
+-- FROM empsalary) emp
+-- WHERE depname = 'sales';
+
+-- pushdown is unsafe because there's a PARTITION BY clause without depname:
+-- Since EXPLAIN clause rely on host physical location, it is commented out
+-- EXPLAIN
+-- SELECT * FROM
+-- (SELECT depname,
+-- sum(salary) OVER (PARTITION BY enroll_date) enroll_salary,
+-- min(salary) OVER (PARTITION BY depname) depminsalary
+-- FROM empsalary) emp
+-- WHERE depname = 'sales';
+
+-- Test Sort node collapsing
+-- Since EXPLAIN clause rely on host physical location, it is commented out
+-- EXPLAIN
+-- SELECT * FROM
+-- (SELECT depname,
+-- sum(salary) OVER (PARTITION BY depname order by empno) depsalary,
+-- min(salary) OVER (PARTITION BY depname, empno order by enroll_date) depminsalary
+-- FROM empsalary) emp
+-- WHERE depname = 'sales';
+
+-- Test Sort node reordering
+-- Since EXPLAIN clause rely on host physical location, it is commented out
+-- EXPLAIN
+-- SELECT
+-- lead(1) OVER (PARTITION BY depname ORDER BY salary, enroll_date),
+-- lag(1) OVER (PARTITION BY depname ORDER BY salary,enroll_date,empno)
+-- FROM empsalary;
+
+-- cleanup
+DROP TABLE empsalary;
+DROP TABLE datetimes;
+DROP TABLE t1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part4.sql
new file mode 100644
index 0000000000000..64ba8e3b7a5ad
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part4.sql
@@ -0,0 +1,404 @@
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+-- Window Functions Testing
+-- https://github.com/postgres/postgres/blob/REL_12_STABLE/src/test/regress/sql/window.sql#L913-L1278
+
+-- Test window operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+-- Spark doesn't handle UDFs in SQL
+-- test user-defined window function with named args and default args
+-- CREATE FUNCTION nth_value_def(val anyelement, n integer = 1) RETURNS anyelement
+--   LANGUAGE internal WINDOW IMMUTABLE STRICT AS 'window_nth_value';
+
+-- Spark doesn't handle UDFs in SQL
+-- SELECT nth_value_def(n := 2, val := ten) OVER (PARTITION BY four), ten, four
+--   FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten) s;
+
+-- Spark doesn't handle UDFs in SQL
+-- SELECT nth_value_def(ten) OVER (PARTITION BY four), ten, four
+--   FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten) s;
+
+--
+-- Test the basic moving-aggregate machinery
+--
+
+-- create aggregates that record the series of transform calls (these are
+-- intentionally not true inverses)
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE FUNCTION logging_sfunc_nonstrict(text, anyelement) RETURNS text AS
+-- $$ SELECT COALESCE($1, '') || '*' || quote_nullable($2) $$
+-- LANGUAGE SQL IMMUTABLE;
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE FUNCTION logging_msfunc_nonstrict(text, anyelement) RETURNS text AS
+-- $$ SELECT COALESCE($1, '') || '+' || quote_nullable($2) $$
+-- LANGUAGE SQL IMMUTABLE;
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE FUNCTION logging_minvfunc_nonstrict(text, anyelement) RETURNS text AS
+-- $$ SELECT $1 || '-' || quote_nullable($2) $$
+-- LANGUAGE SQL IMMUTABLE;
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE AGGREGATE logging_agg_nonstrict (anyelement)
+-- (
+-- 	stype = text,
+-- 	sfunc = logging_sfunc_nonstrict,
+-- 	mstype = text,
+-- 	msfunc = logging_msfunc_nonstrict,
+-- 	minvfunc = logging_minvfunc_nonstrict
+-- );
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE AGGREGATE logging_agg_nonstrict_initcond (anyelement)
+-- (
+-- 	stype = text,
+-- 	sfunc = logging_sfunc_nonstrict,
+-- 	mstype = text,
+-- 	msfunc = logging_msfunc_nonstrict,
+-- 	minvfunc = logging_minvfunc_nonstrict,
+-- 	initcond = 'I',
+-- 	minitcond = 'MI'
+-- );
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE FUNCTION logging_sfunc_strict(text, anyelement) RETURNS text AS
+-- $$ SELECT $1 || '*' || quote_nullable($2) $$
+-- LANGUAGE SQL STRICT IMMUTABLE;
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE FUNCTION logging_msfunc_strict(text, anyelement) RETURNS text AS
+-- $$ SELECT $1 || '+' || quote_nullable($2) $$
+-- LANGUAGE SQL STRICT IMMUTABLE;
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE FUNCTION logging_minvfunc_strict(text, anyelement) RETURNS text AS
+-- $$ SELECT $1 || '-' || quote_nullable($2) $$
+-- LANGUAGE SQL STRICT IMMUTABLE;
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE AGGREGATE logging_agg_strict (text)
+-- (
+-- 	stype = text,
+-- 	sfunc = logging_sfunc_strict,
+-- 	mstype = text,
+-- 	msfunc = logging_msfunc_strict,
+-- 	minvfunc = logging_minvfunc_strict
+-- );
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE AGGREGATE logging_agg_strict_initcond (anyelement)
+-- (
+-- 	stype = text,
+-- 	sfunc = logging_sfunc_strict,
+-- 	mstype = text,
+-- 	msfunc = logging_msfunc_strict,
+-- 	minvfunc = logging_minvfunc_strict,
+-- 	initcond = 'I',
+-- 	minitcond = 'MI'
+-- );
+
+-- Spark doesn't handle UDFs in SQL
+-- test strict and non-strict cases
+-- SELECT
+-- 	p::text || ',' || i::text || ':' || COALESCE(v::text, 'NULL') AS row,
+-- 	logging_agg_nonstrict(v) over wnd as nstrict,
+-- 	logging_agg_nonstrict_initcond(v) over wnd as nstrict_init,
+-- 	logging_agg_strict(v::text) over wnd as strict,
+-- 	logging_agg_strict_initcond(v) over wnd as strict_init
+-- FROM (VALUES
+-- 	(1, 1, NULL),
+-- 	(1, 2, 'a'),
+-- 	(1, 3, 'b'),
+-- 	(1, 4, NULL),
+-- 	(1, 5, NULL),
+-- 	(1, 6, 'c'),
+-- 	(2, 1, NULL),
+-- 	(2, 2, 'x'),
+-- 	(3, 1, 'z')
+-- ) AS t(p, i, v)
+-- WINDOW wnd AS (PARTITION BY P ORDER BY i ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)
+-- ORDER BY p, i;
+
+-- Spark doesn't handle UDFs in SQL
+-- and again, but with filter
+-- SELECT
+-- 	p::text || ',' || i::text || ':' ||
+-- 		CASE WHEN f THEN COALESCE(v::text, 'NULL') ELSE '-' END as row,
+-- 	logging_agg_nonstrict(v) filter(where f) over wnd as nstrict_filt,
+-- 	logging_agg_nonstrict_initcond(v) filter(where f) over wnd as nstrict_init_filt,
+-- 	logging_agg_strict(v::text) filter(where f) over wnd as strict_filt,
+-- 	logging_agg_strict_initcond(v) filter(where f) over wnd as strict_init_filt
+-- FROM (VALUES
+-- 	(1, 1, true,  NULL),
+-- 	(1, 2, false, 'a'),
+-- 	(1, 3, true,  'b'),
+-- 	(1, 4, false, NULL),
+-- 	(1, 5, false, NULL),
+-- 	(1, 6, false, 'c'),
+-- 	(2, 1, false, NULL),
+-- 	(2, 2, true,  'x'),
+-- 	(3, 1, true,  'z')
+-- ) AS t(p, i, f, v)
+-- WINDOW wnd AS (PARTITION BY p ORDER BY i ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)
+-- ORDER BY p, i;
+
+-- Spark doesn't handle UDFs in SQL
+-- test that volatile arguments disable moving-aggregate mode
+-- SELECT
+-- 	i::text || ':' || COALESCE(v::text, 'NULL') as row,
+-- 	logging_agg_strict(v::text)
+-- 		over wnd as inverse,
+-- 	logging_agg_strict(v::text || CASE WHEN random() < 0 then '?' ELSE '' END)
+-- 		over wnd as noinverse
+-- FROM (VALUES
+-- 	(1, 'a'),
+-- 	(2, 'b'),
+-- 	(3, 'c')
+-- ) AS t(i, v)
+-- WINDOW wnd AS (ORDER BY i ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)
+-- ORDER BY i;
+
+-- Spark doesn't handle UDFs in SQL
+-- SELECT
+-- 	i::text || ':' || COALESCE(v::text, 'NULL') as row,
+-- 	logging_agg_strict(v::text) filter(where true)
+-- 		over wnd as inverse,
+-- 	logging_agg_strict(v::text) filter(where random() >= 0)
+-- 		over wnd as noinverse
+-- FROM (VALUES
+-- 	(1, 'a'),
+-- 	(2, 'b'),
+-- 	(3, 'c')
+-- ) AS t(i, v)
+-- WINDOW wnd AS (ORDER BY i ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)
+-- ORDER BY i;
+
+-- Spark doesn't handle UDFs in SQL
+-- test that non-overlapping windows don't use inverse transitions
+-- SELECT
+-- 	logging_agg_strict(v::text) OVER wnd
+-- FROM (VALUES
+-- 	(1, 'a'),
+-- 	(2, 'b'),
+-- 	(3, 'c')
+-- ) AS t(i, v)
+-- WINDOW wnd AS (ORDER BY i ROWS BETWEEN CURRENT ROW AND CURRENT ROW)
+-- ORDER BY i;
+
+-- Spark doesn't handle UDFs in SQL
+-- test that returning NULL from the inverse transition functions
+-- restarts the aggregation from scratch. The second aggregate is supposed
+-- to test cases where only some aggregates restart, the third one checks
+-- that one aggregate restarting doesn't cause others to restart.
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE FUNCTION sum_int_randrestart_minvfunc(int4, int4) RETURNS int4 AS
+-- $$ SELECT CASE WHEN random() < 0.2 THEN NULL ELSE $1 - $2 END $$
+-- LANGUAGE SQL STRICT;
+
+-- Spark doesn't handle UDFs in SQL
+-- CREATE AGGREGATE sum_int_randomrestart (int4)
+-- (
+-- 	stype = int4,
+-- 	sfunc = int4pl,
+-- 	mstype = int4,
+-- 	msfunc = int4pl,
+-- 	minvfunc = sum_int_randrestart_minvfunc
+-- );
+
+-- Spark doesn't handle UDFs in SQL
+-- WITH
+-- vs AS (
+-- 	SELECT i, (random() * 100)::int4 AS v
+-- 	FROM generate_series(1, 100) AS i
+-- ),
+-- sum_following AS (
+-- 	SELECT i, SUM(v) OVER
+-- 		(ORDER BY i DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS s
+-- 	FROM vs
+-- )
+-- SELECT DISTINCT
+-- 	sum_following.s = sum_int_randomrestart(v) OVER fwd AS eq1,
+-- 	-sum_following.s = sum_int_randomrestart(-v) OVER fwd AS eq2,
+-- 	100*3+(vs.i-1)*3 = length(logging_agg_nonstrict(''::text) OVER fwd) AS eq3
+-- FROM vs
+-- JOIN sum_following ON sum_following.i = vs.i
+-- WINDOW fwd AS (
+-- 	ORDER BY vs.i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
+-- );
+
+--
+-- Test various built-in aggregates that have moving-aggregate support
+--
+
+-- test inverse transition functions handle NULLs properly
+SELECT i,AVG(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,AVG(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,AVG(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,AVG(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1.5),(2,2.5),(3,NULL),(4,NULL)) t(i,v);
+
+-- [SPARK-28602] Spark does not recognize 'interval' type as 'numeric'
+-- SELECT i,AVG(v::interval) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+--   FROM (VALUES(1,'1 sec'),(2,'2 sec'),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+-- The cast syntax is present in PgSQL for legacy reasons and Spark will not recognize a money field
+-- SELECT i,SUM(v::money) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+--   FROM (VALUES(1,'1.10'),(2,'2.20'),(3,NULL),(4,NULL)) t(i,v);
+
+-- [SPARK-28602] Spark does not recognize 'interval' type as 'numeric'
+-- SELECT i,SUM(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+--   FROM (VALUES(1,'1 sec'),(2,'2 sec'),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1.1),(2,2.2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT SUM(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1.01),(2,2),(3,3)) v(i,n);
+
+SELECT i,COUNT(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,COUNT(*) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT VAR_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VAR_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VAR_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VAR_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VAR_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VAR_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VAR_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VAR_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VARIANCE(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VARIANCE(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VARIANCE(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT VARIANCE(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT STDDEV_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n);
+
+SELECT STDDEV_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n);
+
+SELECT STDDEV_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n);
+
+SELECT STDDEV_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n);
+
+-- For the following queries Spark result differs from PgSQL:
+-- Spark handles division by zero as 'NaN' instead of 'NULL', which is the PgSQL behaviour
+SELECT STDDEV_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n);
+
+SELECT STDDEV_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n);
+
+SELECT STDDEV_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n);
+
+SELECT STDDEV_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n);
+
+SELECT STDDEV(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(0,NULL),(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT STDDEV(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(0,NULL),(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT STDDEV(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(0,NULL),(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+SELECT STDDEV(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(0,NULL),(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n);
+
+-- test that inverse transition functions work with various frame options
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND CURRENT ROW)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v);
+
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,3),(4,4)) t(i,v);
+
+-- [SPARK-29638] Spark handles 'NaN' as 0 in sums
+-- ensure aggregate over numeric properly recovers from NaN values
+SELECT a, b,
+       SUM(b) OVER(ORDER BY A ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)
+FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b);
+
+-- It might be tempting for someone to add an inverse trans function for
+-- float and double precision. This should not be done as it can give incorrect
+-- results. This test should fail if anyone ever does this without thinking too
+-- hard about it.
+-- [SPARK-28516] adds `to_char`
+-- SELECT to_char(SUM(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING),'999999999999999999999D9')
+--   FROM (VALUES(1,1e20),(2,1)) n(i,n);
+
+-- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY)
+-- SELECT i, b, bool_and(b) OVER w, bool_or(b) OVER w
+--   FROM (VALUES (1,true), (2,true), (3,false), (4,false), (5,true)) v(i,b)
+--   WINDOW w AS (ORDER BY i ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING);
+
+-- Tests for problems with failure to walk or mutate expressions
+-- within window frame clauses.
+
+-- [SPARK-27974] Add built-in Aggregate Function: array_agg
+-- test walker (fails with collation error if expressions are not walked)
+-- SELECT array_agg(i) OVER w
+--   FROM range(1,6) i
+-- WINDOW w AS (ORDER BY i ROWS BETWEEN (('foo' < 'foobar')::integer) PRECEDING AND CURRENT ROW);
+
+-- Spark doesn't handle UDFs in SQL
+-- test mutator (fails when inlined if expressions are not mutated)
+-- CREATE FUNCTION pg_temp.f(group_size BIGINT) RETURNS SETOF integer[]
+-- AS $$
+--     SELECT array_agg(s) OVER w
+--       FROM generate_series(1,5) s
+--     WINDOW w AS (ORDER BY s ROWS BETWEEN CURRENT ROW AND GROUP_SIZE FOLLOWING)
+-- $$ LANGUAGE SQL STABLE;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/with.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/with.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/with.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/with.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
new file mode 100644
index 0000000000000..c0827a3cba39b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
@@ -0,0 +1,9 @@
+-- regexp_extract
+SELECT regexp_extract('1a 2b 14m', '\\d+');
+SELECT regexp_extract('1a 2b 14m', '\\d+', 0);
+SELECT regexp_extract('1a 2b 14m', '\\d+', 1);
+SELECT regexp_extract('1a 2b 14m', '\\d+', 2);
+SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)');
+SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 0);
+SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 1);
+SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 2);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
index 852bfbd63847d..dc77f87d9743a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
@@ -59,3 +59,46 @@ TBLPROPERTIES ('a' = '1');
 
 SHOW CREATE TABLE tbl;
 DROP TABLE tbl;
+
+-- float alias real and decimal alias numeric
+CREATE TABLE tbl (a REAL, b NUMERIC, c NUMERIC(10), d NUMERIC(10,1)) USING parquet;
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
+-- show create table for view
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet;
+
+-- simple
+CREATE VIEW view_SPARK_30302 (aaa, bbb)
+AS SELECT a, b FROM tbl;
+
+SHOW CREATE TABLE view_SPARK_30302 AS SERDE;
+DROP VIEW view_SPARK_30302;
+
+
+-- comment
+CREATE VIEW view_SPARK_30302 (aaa COMMENT 'comment with \'quoted text\' for aaa', bbb)
+COMMENT 'This is a comment with \'quoted text\' for view'
+AS SELECT a, b FROM tbl;
+
+SHOW CREATE TABLE view_SPARK_30302 AS SERDE;
+DROP VIEW view_SPARK_30302;
+
+
+-- tblproperties
+CREATE VIEW view_SPARK_30302 (aaa, bbb)
+TBLPROPERTIES ('a' = '1', 'b' = '2')
+AS SELECT a, b FROM tbl;
+
+SHOW CREATE TABLE view_SPARK_30302 AS SERDE;
+DROP VIEW view_SPARK_30302;
+
+-- SHOW CREATE TABLE does not support view
+CREATE VIEW view_SPARK_30302 (aaa, bbb)
+AS SELECT a, b FROM tbl;
+
+SHOW CREATE TABLE view_SPARK_30302;
+DROP VIEW view_SPARK_30302;
+
+DROP TABLE tbl;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql
index b5f458f2cb184..ae6a9641aae66 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-aggregate.sql
@@ -1,5 +1,10 @@
 -- Tests aggregate expressions in outer query and EXISTS subquery.
 
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
index cefc3fe6272ab..667573b30d265 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
@@ -1,9 +1,17 @@
 -- Tests EXISTS subquery support. Tests Exists subquery
 -- used in Joins (Both when joins occurs in outer and suquery blocks)
--- List of configuration the test suite is run against:
---SET spark.sql.autoBroadcastJoinThreshold=10485760
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
+-- There are 2 dimensions we want to test
+--  1. run with broadcast hash join, sort merge join or shuffle hash join.
+--  2. run with whole-stage-codegen, operator codegen or no codegen.
+
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=10485760
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
 
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql
index 19fc18833760c..580fc1d4162eb 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql
@@ -1,5 +1,10 @@
 -- Tests EXISTS subquery support with ORDER BY and LIMIT clauses.
 
+-- Test sort operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql
index b1d96b32c2478..496285e3514ea 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-group-by.sql
@@ -1,50 +1,55 @@
 -- A test suite for GROUP BY in parent side, subquery, and both predicate subquery
 -- It includes correlated cases.
 
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 create temporary view t1 as select * from values
-  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("t1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("t1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
 
 create temporary view t2 as select * from values
-  ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("t2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("t1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("t1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("t2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("t1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("t1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("t1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("t1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
 
 create temporary view t3 as select * from values
-  ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("t3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("t3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("t3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("t3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("t1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("t3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
 
 -- correlated IN subquery
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
index 22f3eafd6a02d..200a71ebbb622 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
@@ -1,9 +1,17 @@
 -- A test suite for IN JOINS in parent side, subquery, and both predicate subquery
 -- It includes correlated cases.
--- List of configuration the test suite is run against:
---SET spark.sql.autoBroadcastJoinThreshold=10485760
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
+-- There are 2 dimensions we want to test
+--  1. run with broadcast hash join, sort merge join or shuffle hash join.
+--  2. run with whole-stage-codegen, operator codegen or no codegen.
+
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=10485760
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--CONFIG_DIM1 spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM2 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
 
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
@@ -51,6 +59,18 @@ create temporary view t3 as select * from values
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
 
+create temporary view s1 as select * from values
+    (1), (3), (5), (7), (9)
+  as s1(id);
+
+create temporary view s2 as select * from values
+    (1), (3), (4), (6), (9)
+  as s2(id);
+
+create temporary view s3 as select * from values
+    (3), (4), (6), (9)
+  as s3(id);
+
 -- correlated IN subquery
 -- different JOIN in parent side
 -- TC 01.01
@@ -83,7 +103,7 @@ GROUP BY  t1a,
           t3a,
           t3b,
           t3c
-ORDER BY  t1a DESC, t3b DESC;
+ORDER BY  t1a DESC, t3b DESC, t3c ASC;
 
 -- TC 01.03
 SELECT     Count(DISTINCT(t1a))
@@ -272,3 +292,101 @@ Group By t1a, t1b, t1c, t2a, t2b, t2c
 HAVING t2c IS NOT NULL
 ORDER By t2b DESC nulls last;
 
+
+SELECT s1.id FROM s1
+JOIN s2 ON s1.id = s2.id
+AND s1.id IN (SELECT 9);
+
+
+SELECT s1.id FROM s1
+JOIN s2 ON s1.id = s2.id
+AND s1.id NOT IN (SELECT 9);
+
+
+-- IN with Subquery ON INNER JOIN
+SELECT s1.id FROM s1
+JOIN s2 ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3);
+
+
+-- IN with Subquery ON LEFT SEMI JOIN
+SELECT s1.id AS id2 FROM s1
+LEFT SEMI JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3);
+
+
+-- IN with Subquery ON LEFT ANTI JOIN
+SELECT s1.id as id2 FROM s1
+LEFT ANTI JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3);
+
+
+-- IN with Subquery ON LEFT OUTER JOIN
+SELECT s1.id, s2.id as id2 FROM s1
+LEFT OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3);
+
+
+-- IN with Subquery ON RIGHT OUTER JOIN
+SELECT s1.id, s2.id as id2 FROM s1
+RIGHT OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3);
+
+
+-- IN with Subquery ON FULL OUTER JOIN
+SELECT s1.id, s2.id AS id2 FROM s1
+FULL OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3);
+
+
+-- NOT IN with Subquery ON INNER JOIN
+SELECT s1.id FROM s1
+JOIN s2 ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3);
+
+
+-- NOT IN with Subquery ON LEFT SEMI JOIN
+SELECT s1.id AS id2 FROM s1
+LEFT SEMI JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3);
+
+
+-- NOT IN with Subquery ON LEFT ANTI JOIN
+SELECT s1.id AS id2 FROM s1
+LEFT ANTI JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3);
+
+
+-- NOT IN with Subquery ON LEFT OUTER JOIN
+SELECT s1.id, s2.id AS id2 FROM s1
+LEFT OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3);
+
+
+-- NOT IN with Subquery ON RIGHT OUTER JOIN
+SELECT s1.id, s2.id AS id2 FROM s1
+RIGHT OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3);
+
+
+-- NOT IN with Subquery ON FULL OUTER JOIN
+SELECT s1.id, s2.id AS id2 FROM s1
+FULL OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3);
+
+
+DROP VIEW s1;
+
+DROP VIEW s2;
+
+DROP VIEW s3;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
index a40ee082ba3b9..481b5e8cc7700 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
@@ -2,49 +2,49 @@
 -- It includes correlated cases.
 
 create temporary view t1 as select * from values
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
 
 create temporary view t2 as select * from values
-  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
 
 create temporary view t3 as select * from values
-  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
 
 -- correlated IN subquery
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
index 892e39ff47c1f..001c49c460b06 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-order-by.sql
@@ -1,50 +1,55 @@
 -- A test suite for ORDER BY in parent side, subquery, and both predicate subquery
 -- It includes correlated cases.
 
+-- Test sort operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 create temporary view t1 as select * from values
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
 
 create temporary view t2 as select * from values
-  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
 
 create temporary view t3 as select * from values
-  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
 
 -- correlated IN subquery
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
index 5c371d2305ac8..b81dd7dce7ff5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-set-operations.sql
@@ -2,49 +2,49 @@
 -- It includes correlated cases.
 
 create temporary view t1 as select * from values
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
 
 create temporary view t2 as select * from values
-  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
 
 create temporary view t3 as select * from values
-  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
 
 -- correlated IN subquery
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
index 58cf109e136c5..54b74534c1162 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-group-by.sql
@@ -1,6 +1,11 @@
 -- A test suite for NOT IN GROUP BY in parent side, subquery, and both predicate subquery
 -- It includes correlated cases.
 
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
index 4f8ca8bfb27c1..fcdb667ad4523 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
@@ -1,9 +1,5 @@
 -- A test suite for not-in-joins in parent side, subquery, and both predicate subquery
 -- It includes correlated cases.
--- List of configuration the test suite is run against:
---SET spark.sql.autoBroadcastJoinThreshold=10485760
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
 
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
@@ -128,7 +124,7 @@ GROUP  BY t1b,
 HAVING t1d NOT IN (SELECT t2d
                    FROM   t2
                    WHERE  t1d = t2d)
-ORDER BY t1b DESC;
+ORDER BY t1b DESC, t1d ASC;
 
 -- TC 01.05
 SELECT   COUNT(DISTINCT(t1a)),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
index f19567d2fac20..2748a959cbef8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/simple-in.sql
@@ -2,49 +2,49 @@
 -- It includes correlated cases.
 
 create temporary view t1 as select * from values
-  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("t1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("t1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
 
 create temporary view t2 as select * from values
-  ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("t2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("t1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("t1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("t2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("t1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("t1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("t1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("t1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
 
 create temporary view t3 as select * from values
-  ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("t3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("t3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("t3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("t3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("t1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("t3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
 
 -- correlated IN subquery
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql
index 95b115a8dd094..98ce1354a1355 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/negative-cases/subq-input-typecheck.sql
@@ -18,7 +18,7 @@ CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES
 AS t1(t4a, t4b, t4c);
 
 CREATE TEMPORARY VIEW t5 AS SELECT * FROM VALUES
-  (CAST(1 AS DECIMAL(18, 0)), CAST(2 AS STRING), CAST(3 AS BIGINT))
+  (CAST('2011-01-01 01:01:01' AS TIMESTAMP), CAST(2 AS STRING), CAST(3 AS BIGINT))
 AS t1(t5a, t5b, t5c);
 
 -- TC 01.01
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
index 1661209093fc4..17e44a96492b8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
@@ -22,49 +22,49 @@ AND    c.cv = (SELECT max(avg)
                        GROUP BY c1.cv));
 
 create temporary view t1 as select * from values
-  ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
-  ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ('val1d', null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ('val1d', null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
+  ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ('val1d', null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ('val1d', null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i);
 
 create temporary view t2 as select * from values
-  ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ('val2a', 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ('val1b', 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ('val1c', 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ('val1b', null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ('val2e', 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ('val1f', 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ('val1c', 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ('val1e', 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ('val1f', 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ('val1b', null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i);
 
 create temporary view t3 as select * from values
-  ('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ('val1b', 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ('val1b', 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ('val3c', 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ('val3c', 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ('val1b', null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ('val3a', 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ('val3a', 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ('val1b', 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ('val1b', 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ('val3c', 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ('val3c', 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ('val1b', null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ('val1b', null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ('val3b', 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ('val3b', 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i);
 
 -- Group 1: scalar subquery in predicate context
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf.sql
index 58613a1325dfa..0374d98feb6e6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udaf.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf.sql
@@ -1,3 +1,8 @@
+-- Test aggregate operator and UDAF with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
 (1), (2), (3), (4)
 as t1(int_col1);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
similarity index 98%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
index d829a5c1159fd..24bc25a3fd1c5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
@@ -8,7 +8,7 @@
 -- avoid bit-exact output here because operations may not be bit-exact.
 -- SET extra_float_digits = 0;
 
--- This test file was converted from pgSQL/aggregates_part1.sql.
+-- This test file was converted from postgreSQL/aggregates_part1.sql.
 
 SELECT avg(udf(four)) AS avg_1 FROM onek;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql
similarity index 75%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part2.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql
index 5636537398a86..b4054850062b7 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part2.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql
@@ -5,7 +5,7 @@
 -- AGGREGATES [Part 2]
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L145-L350
 --
--- This test file was converted from pgSQL/aggregates_part2.sql.
+-- This test file was converted from postgreSQL/aggregates_part2.sql.
 
 create temporary view int4_tbl as select * from values
   (0),
@@ -43,42 +43,28 @@ create temporary view int4_tbl as select * from values
 --
 -- test for bitwise integer aggregates
 --
--- CREATE TEMPORARY TABLE bitwise_test(
---   i2 INT2,
---   i4 INT4,
---   i8 INT8,
---   i INTEGER,
---   x INT2,
---   y BIT(4)
--- );
+CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
+  (1, 1, 1, 1L),
+  (3, 3, 3, null),
+  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4);
 
 -- empty case
--- SELECT
---   BIT_AND(i2) AS "?",
---   BIT_OR(i4)  AS "?"
--- FROM bitwise_test;
-
--- COPY bitwise_test FROM STDIN NULL 'null';
--- 1	1	1	1	1	B0101
--- 3	3	3	null	2	B0100
--- 7	7	7	3	4	B1100
--- \.
-
--- SELECT
---   BIT_AND(i2) AS "1",
---   BIT_AND(i4) AS "1",
---   BIT_AND(i8) AS "1",
---   BIT_AND(i)  AS "?",
---   BIT_AND(x)  AS "0",
---   BIT_AND(y)  AS "0100",
---
---   BIT_OR(i2)  AS "7",
---   BIT_OR(i4)  AS "7",
---   BIT_OR(i8)  AS "7",
---   BIT_OR(i)   AS "?",
---   BIT_OR(x)   AS "7",
---   BIT_OR(y)   AS "1101"
--- FROM bitwise_test;
+SELECT BIT_AND(b1) AS n1, BIT_OR(b2)  AS n2 FROM bitwise_test where 1 = 0;
+
+-- null case
+SELECT BIT_AND(b4) AS n1, BIT_OR(b4)  AS n2 FROM bitwise_test where b4 is null;
+
+
+SELECT
+ BIT_AND(cast(b1 as tinyint)) AS a1,
+ BIT_AND(cast(b2 as smallint)) AS b1,
+ BIT_AND(b3) AS c1,
+ BIT_AND(b4) AS d1,
+ BIT_OR(cast(b1 as tinyint))  AS e7,
+ BIT_OR(cast(b2 as smallint))  AS f7,
+ BIT_OR(b3)  AS g7,
+ BIT_OR(b4)  AS h3
+FROM bitwise_test;
 
 --
 -- test boolean aggregates
@@ -116,50 +102,40 @@ SELECT
   NOT (FALSE OR FALSE) AS `t`;
 
 -- [SPARK-27880] Implement boolean aggregates(BOOL_AND, BOOL_OR and EVERY)
--- CREATE TEMPORARY TABLE bool_test(
---   b1 BOOL,
---   b2 BOOL,
---   b3 BOOL,
---   b4 BOOL);
+CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES
+  (TRUE, null, FALSE, null),
+  (FALSE, TRUE, null, null),
+  (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4);
 
 -- empty case
--- SELECT
---   BOOL_AND(b1)   AS "n",
---   BOOL_OR(b3)    AS "n"
--- FROM bool_test;
-
--- COPY bool_test FROM STDIN NULL 'null';
--- TRUE	null	FALSE	null
--- FALSE	TRUE	null	null
--- null	TRUE	FALSE	null
--- \.
-
--- SELECT
---   BOOL_AND(b1)     AS "f",
---   BOOL_AND(b2)     AS "t",
---   BOOL_AND(b3)     AS "f",
---   BOOL_AND(b4)     AS "n",
---   BOOL_AND(NOT b2) AS "f",
---   BOOL_AND(NOT b3) AS "t"
--- FROM bool_test;
-
--- SELECT
---   EVERY(b1)     AS "f",
---   EVERY(b2)     AS "t",
---   EVERY(b3)     AS "f",
---   EVERY(b4)     AS "n",
---   EVERY(NOT b2) AS "f",
---   EVERY(NOT b3) AS "t"
--- FROM bool_test;
-
--- SELECT
---   BOOL_OR(b1)      AS "t",
---   BOOL_OR(b2)      AS "t",
---   BOOL_OR(b3)      AS "f",
---   BOOL_OR(b4)      AS "n",
---   BOOL_OR(NOT b2)  AS "f",
---   BOOL_OR(NOT b3)  AS "t"
--- FROM bool_test;
+SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0;
+
+SELECT
+  BOOL_AND(b1)     AS f1,
+  BOOL_AND(b2)     AS t2,
+  BOOL_AND(b3)     AS f3,
+  BOOL_AND(b4)     AS n4,
+  BOOL_AND(NOT b2) AS f5,
+  BOOL_AND(NOT b3) AS t6
+FROM bool_test;
+
+SELECT
+  EVERY(b1)     AS f1,
+  EVERY(b2)     AS t2,
+  EVERY(b3)     AS f3,
+  EVERY(b4)     AS n4,
+  EVERY(NOT b2) AS f5,
+  EVERY(NOT b3) AS t6
+FROM bool_test;
+
+SELECT
+  BOOL_OR(b1)      AS t1,
+  BOOL_OR(b2)      AS t2,
+  BOOL_OR(b3)      AS f3,
+  BOOL_OR(b4)      AS n4,
+  BOOL_OR(NOT b2)  AS f5,
+  BOOL_OR(NOT b3)  AS t6
+FROM bool_test;
 
 --
 -- Test cases that should be optimized into indexscans instead of
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part3.sql
similarity index 98%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part3.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part3.sql
index 1c58620d1c11a..b11c8c05f3103 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part3.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part3.sql
@@ -5,7 +5,7 @@
 -- AGGREGATES [Part 3]
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L352-L605
 
--- This test file was converted from pgSQL/aggregates_part3.sql.
+-- This test file was converted from postgreSQL/aggregates_part3.sql.
 
 -- [SPARK-28865] Table inheritance
 -- try it on an inheritance tree
@@ -229,7 +229,6 @@ select udf(max(min(unique1))) from tenk1;
 
 -- drop table bytea_test_table;
 
--- [SPARK-27986] Support Aggregate Expressions with filter
 -- FILTER tests
 
 -- select min(unique1) filter (where unique1 > 100) from tenk1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part4.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part4.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql
index 7c7777362de8e..8aea00073eee8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part4.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql
@@ -5,7 +5,7 @@
 -- AGGREGATES [Part 4]
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L607-L997
 
--- This test file was converted from pgSQL/aggregates_part4.sql.
+-- This test file was converted from postgreSQL/aggregates_part4.sql.
 
 -- [SPARK-27980] Ordered-Set Aggregate Functions
 -- ordered-set aggregates
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql
index 1865ee94ec1f9..8fa3c0a6dfec9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql
@@ -6,7 +6,7 @@
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/case.sql
 -- Test the CASE statement
 --
--- This test file was converted from pgSQL/case.sql.
+-- This test file was converted from postgreSQL/case.sql.
 
 CREATE TABLE CASE_TBL (
   i integer,
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-join.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-join.sql
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-join.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-join.sql
index c05aa156a13bf..e6fe1078b0d24 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-join.sql
@@ -6,7 +6,7 @@
 -- Test JOIN clauses
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/join.sql
 --
--- This test file was converted from pgSQL/join.sql.
+-- This test file was converted from postgreSQL/join.sql.
 
 CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql
similarity index 96%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_having.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql
index c8e4346cedb89..412d45b49a184 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql
@@ -5,7 +5,7 @@
 -- SELECT_HAVING
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_having.sql
 --
--- This test file was converted from inputs/pgSQL/select_having.sql
+-- This test file was converted from inputs/postgreSQL/select_having.sql
 -- TODO: We should add UDFs in GROUP BY clause when [SPARK-28386] and [SPARK-26741] is resolved.
 
 -- load test data
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_implicit.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_implicit.sql
similarity index 98%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_implicit.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_implicit.sql
index 373896ccd1674..1cbd77c6cf86d 100755
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_implicit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_implicit.sql
@@ -9,7 +9,7 @@
 -- - thomas 1998-07-09
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_implicit.sql
 --
--- This test file was converted from pgSQL/select_implicit.sql
+-- This test file was converted from postgreSQL/select_implicit.sql
 
 -- load test data
 CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-join-empty-relation.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-join-empty-relation.sql
index 47fb70d02394b..b46206d4530ed 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-join-empty-relation.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-join-empty-relation.sql
@@ -1,8 +1,3 @@
--- List of configuration the test suite is run against:
---SET spark.sql.autoBroadcastJoinThreshold=10485760
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
-
 -- This test file was converted from join-empty-relation.sql.
 
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-natural-join.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-natural-join.sql
index e5eb812d69a1c..7cf080ea1b4eb 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-natural-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-natural-join.sql
@@ -1,8 +1,3 @@
--- List of configuration the test suite is run against:
---SET spark.sql.autoBroadcastJoinThreshold=10485760
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
-
 -- This test file was converted from natural-join.sql.
 
 create temporary view nt1 as select * from values
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql
index 4eb0805c9cc67..4b09bcb988d25 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql
@@ -1,8 +1,4 @@
 -- This test file was converted from outer-join.sql.
--- List of configuration the test suite is run against:
---SET spark.sql.autoBroadcastJoinThreshold=10485760
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
---SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
 
 -- SPARK-17099: Incorrect result when HAVING clause is added to group by query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index faab4c61c8640..3d05dfda6c3fa 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -1,3 +1,8 @@
+-- Test window operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
 -- Test data.
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (null, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), "a"),
@@ -115,3 +120,8 @@ SELECT cate, sum(val) OVER (w)
 FROM testData
 WHERE val is not null
 WINDOW w AS (PARTITION BY cate ORDER BY val);
+
+-- with filter predicate
+SELECT val, cate,
+count(val) FILTER (WHERE val > 1) OVER(PARTITION BY cate)
+FROM testData ORDER BY cate, val;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
new file mode 100644
index 0000000000000..ce53e1c2863e0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
@@ -0,0 +1,138 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 16
+
+
+-- !query
+create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
+  (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select id, a*10, b/10 from decimals_test order by id
+-- !query schema
+struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,15),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18)>
+-- !query output
+1	1000.000000000000000	99.900000000000000000
+2	123451.230000000000000	1234.512300000000000000
+3	1.234567891011000	123.410000000000000000
+4	1234567891234567890.000000000000000	0.112345678912345679
+
+
+-- !query
+select 10.3 * 3.0
+-- !query schema
+struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
+-- !query output
+30.90
+
+
+-- !query
+select 10.3000 * 3.0
+-- !query schema
+struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
+-- !query output
+30.90000
+
+
+-- !query
+select 10.30000 * 30.0
+-- !query schema
+struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
+-- !query output
+309.000000
+
+
+-- !query
+select 10.300000000000000000 * 3.000000000000000000
+-- !query schema
+struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,34)>
+-- !query output
+30.9000000000000000000000000000000000
+
+
+-- !query
+select 10.300000000000000000 * 3.0000000000000000000
+-- !query schema
+struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,34)>
+-- !query output
+30.9000000000000000000000000000000000
+
+
+-- !query
+select (5e36BD + 0.1) + 5e36BD
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Decimal(expanded,10000000000000000000000000000000000000.1,39,1}) cannot be represented as Decimal(38, 1).
+
+
+-- !query
+select (-4e36BD - 0.1) - 7e36BD
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Decimal(expanded,-11000000000000000000000000000000000000.1,39,1}) cannot be represented as Decimal(38, 1).
+
+
+-- !query
+select 12345678901234567890.0 * 12345678901234567890.0
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Decimal(expanded,152415787532388367501905199875019052100,39,0}) cannot be represented as Decimal(38, 2).
+
+
+-- !query
+select 1e35BD / 0.1
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Decimal(expanded,1000000000000000000000000000000000000,37,0}) cannot be represented as Decimal(38, 6).
+
+
+-- !query
+select 123456789123456789.1234567890 * 1.123456789123456789
+-- !query schema
+struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,18)>
+-- !query output
+138698367904130467.654320988515622621
+
+
+-- !query
+select 123456789123456789.1234567890 * 1.123456789123456789
+-- !query schema
+struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,18)>
+-- !query output
+138698367904130467.654320988515622621
+
+
+-- !query
+select 12345678912345.123456789123 / 0.000000012345678
+-- !query schema
+struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,9)>
+-- !query output
+1000000073899961059796.725866332
+
+
+-- !query
+drop table decimals_test
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
new file mode 100644
index 0000000000000..7bef1bad4507e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
@@ -0,0 +1,284 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 29
+
+
+-- !query
+create or replace temporary view nested as values
+  (1, array(32, 97), array(array(12, 99), array(123, 42), array(1))),
+  (2, array(77, -76), array(array(6, 96, 65), array(-1, -2))),
+  (3, array(12), array(array(17)))
+  as t(x, ys, zs)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select upper(x -> x) as v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+A lambda function should only be used in a higher order function. However, its class is org.apache.spark.sql.catalyst.expressions.Upper, which is not a higher order function.; line 1 pos 7
+
+
+-- !query
+select transform(zs, z -> z) as v from nested
+-- !query schema
+struct<v:array<array<int>>>
+-- !query output
+[[12,99],[123,42],[1]]
+[[17]]
+[[6,96,65],[-1,-2]]
+
+
+-- !query
+select transform(ys, y -> y * y) as v from nested
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[1024,9409]
+[144]
+[5929,5776]
+
+
+-- !query
+select transform(ys, (y, i) -> y + i) as v from nested
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[12]
+[32,98]
+[77,-75]
+
+
+-- !query
+select transform(zs, z -> concat(ys, z)) as v from nested
+-- !query schema
+struct<v:array<array<int>>>
+-- !query output
+[[12,17]]
+[[32,97,12,99],[32,97,123,42],[32,97,1]]
+[[77,-76,6,96,65],[77,-76,-1,-2]]
+
+
+-- !query
+select transform(ys, 0) as v from nested
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[0,0]
+[0,0]
+[0]
+
+
+-- !query
+select transform(cast(null as array<int>), x -> x + 1) as v
+-- !query schema
+struct<v:array<int>>
+-- !query output
+NULL
+
+
+-- !query
+select filter(ys, y -> y > 30) as v from nested
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[32,97]
+[77]
+[]
+
+
+-- !query
+select filter(cast(null as array<int>), y -> true) as v
+-- !query schema
+struct<v:array<int>>
+-- !query output
+NULL
+
+
+-- !query
+select transform(zs, z -> filter(z, zz -> zz > 50)) as v from nested
+-- !query schema
+struct<v:array<array<int>>>
+-- !query output
+[[96,65],[]]
+[[99],[123],[]]
+[[]]
+
+
+-- !query
+select aggregate(ys, 0, (y, a) -> y + a + x) as v from nested
+-- !query schema
+struct<v:int>
+-- !query output
+131
+15
+5
+
+
+-- !query
+select aggregate(ys, (0 as sum, 0 as n), (acc, x) -> (acc.sum + x, acc.n + 1), acc -> acc.sum / acc.n) as v from nested
+-- !query schema
+struct<v:double>
+-- !query output
+0.5
+12.0
+64.5
+
+
+-- !query
+select transform(zs, z -> aggregate(z, 1, (acc, val) -> acc * val * size(z))) as v from nested
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[1010880,8]
+[17]
+[4752,20664,1]
+
+
+-- !query
+select aggregate(cast(null as array<int>), 0, (a, y) -> a + y + 1, a -> a + 2) as v
+-- !query schema
+struct<v:int>
+-- !query output
+NULL
+
+
+-- !query
+select exists(ys, y -> y > 30) as v from nested
+-- !query schema
+struct<v:boolean>
+-- !query output
+false
+true
+true
+
+
+-- !query
+select exists(cast(null as array<int>), y -> y > 30) as v
+-- !query schema
+struct<v:boolean>
+-- !query output
+NULL
+
+
+-- !query
+select zip_with(ys, zs, (a, b) -> a + size(b)) as v from nested
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[13]
+[34,99,null]
+[80,-74]
+
+
+-- !query
+select zip_with(array('a', 'b', 'c'), array('d', 'e', 'f'), (x, y) -> concat(x, y)) as v
+-- !query schema
+struct<v:array<string>>
+-- !query output
+["ad","be","cf"]
+
+
+-- !query
+select zip_with(array('a'), array('d', null, 'f'), (x, y) -> coalesce(x, y)) as v
+-- !query schema
+struct<v:array<string>>
+-- !query output
+["a",null,"f"]
+
+
+-- !query
+create or replace temporary view nested as values
+  (1, map(1, 1, 2, 2, 3, 3)),
+  (2, map(4, 4, 5, 5, 6, 6))
+  as t(x, ys)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select transform_keys(ys, (k, v) -> k) as v from nested
+-- !query schema
+struct<v:map<int,int>>
+-- !query output
+{1:1,2:2,3:3}
+{4:4,5:5,6:6}
+
+
+-- !query
+select transform_keys(ys, (k, v) -> k + 1) as v from nested
+-- !query schema
+struct<v:map<int,int>>
+-- !query output
+{2:1,3:2,4:3}
+{5:4,6:5,7:6}
+
+
+-- !query
+select transform_keys(ys, (k, v) -> k + v) as v from nested
+-- !query schema
+struct<v:map<int,int>>
+-- !query output
+{10:5,12:6,8:4}
+{2:1,4:2,6:3}
+
+
+-- !query
+select transform_values(ys, (k, v) -> v) as v from nested
+-- !query schema
+struct<v:map<int,int>>
+-- !query output
+{1:1,2:2,3:3}
+{4:4,5:5,6:6}
+
+
+-- !query
+select transform_values(ys, (k, v) -> v + 1) as v from nested
+-- !query schema
+struct<v:map<int,int>>
+-- !query output
+{1:2,2:3,3:4}
+{4:5,5:6,6:7}
+
+
+-- !query
+select transform_values(ys, (k, v) -> k + v) as v from nested
+-- !query schema
+struct<v:map<int,int>>
+-- !query output
+{1:2,2:4,3:6}
+{4:8,5:10,6:12}
+
+
+-- !query
+select transform(ys, all -> all * all) as v from values (array(32, 97)) as t(ys)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'all'(line 1, pos 21)
+
+== SQL ==
+select transform(ys, all -> all * all) as v from values (array(32, 97)) as t(ys)
+---------------------^^^
+
+
+-- !query
+select transform(ys, (all, i) -> all + i) as v from values (array(32, 97)) as t(ys)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'all'(line 1, pos 22)
+
+== SQL ==
+select transform(ys, (all, i) -> all + i) as v from values (array(32, 97)) as t(ys)
+----------------------^^^
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 13f72614f5778..7fdb4c53d1dcb 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,439 +1,1011 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 35
+-- Number of queries: 99
 
 
--- !query 0
-SET spark.sql.parser.ansi.enabled=true
--- !query 0 schema
-struct<key:string,value:string>
--- !query 0 output
-spark.sql.parser.ansi.enabled	true
+-- !query
+select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15')
+-- !query schema
+struct<multiply_interval(subtracttimestamps(TIMESTAMP '2019-10-15 10:11:12.001002', CAST(DATE '2019-10-15' AS TIMESTAMP)), CAST(3 AS DOUBLE)):interval>
+-- !query output
+30 hours 33 minutes 36.003006 seconds
 
 
--- !query 1
-select
-  '1' second,
-  2  seconds,
-  '1' minute,
-  2  minutes,
-  '1' hour,
-  2  hours,
-  '1' day,
-  2  days,
-  '1' month,
-  2  months,
-  '1' year,
-  2  years
--- !query 1 schema
-struct<interval 1 seconds:interval,interval 2 seconds:interval,interval 1 minutes:interval,interval 2 minutes:interval,interval 1 hours:interval,interval 2 hours:interval,interval 1 days:interval,interval 2 days:interval,interval 1 months:interval,interval 2 months:interval,interval 1 years:interval,interval 2 years:interval>
--- !query 1 output
-interval 1 seconds	interval 2 seconds	interval 1 minutes	interval 2 minutes	interval 1 hours	interval 2 hours	interval 1 days	interval 2 days	interval 1 months	interval 2 months	interval 1 years	interval 2 years
-
-
--- !query 2
-select
-  interval '10-11' year to month,
-  interval '10' year,
-  interval '11' month
--- !query 2 schema
-struct<interval 10 years 11 months:interval,interval 10 years:interval,interval 11 months:interval>
--- !query 2 output
-interval 10 years 11 months	interval 10 years	interval 11 months
+-- !query
+select interval 4 month 2 weeks 3 microseconds * 1.5
+-- !query schema
+struct<multiply_interval(INTERVAL '4 months 14 days 0.000003 seconds', CAST(1.5 AS DOUBLE)):interval>
+-- !query output
+6 months 21 days 0.000005 seconds
 
 
--- !query 3
-select
-  '10-11' year to month,
-  '10' year,
-  '11' month
--- !query 3 schema
-struct<interval 10 years 11 months:interval,interval 10 years:interval,interval 11 months:interval>
--- !query 3 output
-interval 10 years 11 months	interval 10 years	interval 11 months
+-- !query
+select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5
+-- !query schema
+struct<divide_interval(subtracttimestamps(TIMESTAMP '2019-10-15 00:00:00', TIMESTAMP '2019-10-14 00:00:00'), CAST(1.5 AS DOUBLE)):interval>
+-- !query output
+16 hours
 
 
--- !query 4
-select
-  interval '10 9:8:7.987654321' day to second,
-  interval '10' day,
-  interval '11' hour,
-  interval '12' minute,
-  interval '13' second,
-  interval '13.123456789' second
--- !query 4 schema
-struct<interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 987 milliseconds 654 microseconds:interval,interval 1 weeks 3 days:interval,interval 11 hours:interval,interval 12 minutes:interval,interval 13 seconds:interval,interval 13 seconds 123 milliseconds 456 microseconds:interval>
--- !query 4 output
-interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 987 milliseconds 654 microseconds	interval 1 weeks 3 days	interval 11 hours	interval 12 minutes	interval 13 seconds	interval 13 seconds 123 milliseconds 456 microseconds
-
-
--- !query 5
-select
-  '10 9:8:7.987654321' day to second,
-  '10' day,
-  '11' hour,
-  '12' minute,
-  '13' second,
-  '13.123456789' second
--- !query 5 schema
-struct<interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 987 milliseconds 654 microseconds:interval,interval 1 weeks 3 days:interval,interval 11 hours:interval,interval 12 minutes:interval,interval 13 seconds:interval,interval 13 seconds 123 milliseconds 456 microseconds:interval>
--- !query 5 output
-interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 987 milliseconds 654 microseconds	interval 1 weeks 3 days	interval 11 hours	interval 12 minutes	interval 13 seconds	interval 13 seconds 123 milliseconds 456 microseconds
-
-
--- !query 6
-select map(1, interval 1 day, 2, interval 3 week)
--- !query 6 schema
-struct<map(1, interval 1 days, 2, interval 3 weeks):map<int,interval>>
--- !query 6 output
-{1:interval 1 days,2:interval 3 weeks}
+-- !query
+select interval '2 seconds' / 0
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+divide by zero
 
 
--- !query 7
-select map(1, 1 day, 2, 3 week)
--- !query 7 schema
-struct<map(1, interval 1 days, 2, interval 3 weeks):map<int,interval>>
--- !query 7 output
-{1:interval 1 days,2:interval 3 weeks}
+-- !query
+select interval '2 seconds' / null
+-- !query schema
+struct<divide_interval(INTERVAL '2 seconds', CAST(NULL AS DOUBLE)):interval>
+-- !query output
+NULL
 
 
--- !query 8
-create temporary view interval_arithmetic as
-  select CAST(dateval AS date), CAST(tsval AS timestamp) from values
-    ('2012-01-01', '2012-01-01')
-    as interval_arithmetic(dateval, tsval)
--- !query 8 schema
+-- !query
+select interval '2 seconds' * null
+-- !query schema
+struct<multiply_interval(INTERVAL '2 seconds', CAST(NULL AS DOUBLE)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select null * interval '2 seconds'
+-- !query schema
+struct<multiply_interval(INTERVAL '2 seconds', CAST(NULL AS DOUBLE)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select -interval '-1 month 1 day -1 second'
+-- !query schema
+struct<(- INTERVAL '-1 months 1 days -1 seconds'):interval>
+-- !query output
+1 months -1 days 1 seconds
+
+
+-- !query
+select -interval -1 month 1 day -1 second
+-- !query schema
+struct<(- INTERVAL '-1 months 1 days -1 seconds'):interval>
+-- !query output
+1 months -1 days 1 seconds
+
+
+-- !query
+select +interval '-1 month 1 day -1 second'
+-- !query schema
+struct<(+ INTERVAL '-1 months 1 days -1 seconds'):interval>
+-- !query output
+-1 months 1 days -1 seconds
+
+
+-- !query
+select +interval -1 month 1 day -1 second
+-- !query schema
+struct<(+ INTERVAL '-1 months 1 days -1 seconds'):interval>
+-- !query output
+-1 months 1 days -1 seconds
+
+
+-- !query
+select make_interval(1)
+-- !query schema
+struct<make_interval(1, 0, 0, 0, 0, 0, 0.000000):interval>
+-- !query output
+1 years
+
+
+-- !query
+select make_interval(1, 2)
+-- !query schema
+struct<make_interval(1, 2, 0, 0, 0, 0, 0.000000):interval>
+-- !query output
+1 years 2 months
+
+
+-- !query
+select make_interval(1, 2, 3)
+-- !query schema
+struct<make_interval(1, 2, 3, 0, 0, 0, 0.000000):interval>
+-- !query output
+1 years 2 months 21 days
+
+
+-- !query
+select make_interval(1, 2, 3, 4)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 0, 0, 0.000000):interval>
+-- !query output
+1 years 2 months 25 days
+
+
+-- !query
+select make_interval(1, 2, 3, 4, 5)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 5, 0, 0.000000):interval>
+-- !query output
+1 years 2 months 25 days 5 hours
+
+
+-- !query
+select make_interval(1, 2, 3, 4, 5, 6)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 5, 6, 0.000000):interval>
+-- !query output
+1 years 2 months 25 days 5 hours 6 minutes
+
+
+-- !query
+select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(8,6))):interval>
+-- !query output
+1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
+
+
+-- !query
+select cast('1 second' as interval)
+-- !query schema
+struct<CAST(1 second AS INTERVAL):interval>
+-- !query output
+1 seconds
+
+
+-- !query
+select cast('+1 second' as interval)
+-- !query schema
+struct<CAST(+1 second AS INTERVAL):interval>
+-- !query output
+1 seconds
+
+
+-- !query
+select cast('-1 second' as interval)
+-- !query schema
+struct<CAST(-1 second AS INTERVAL):interval>
+-- !query output
+-1 seconds
+
+
+-- !query
+select cast('+     1 second' as interval)
+-- !query schema
+struct<CAST(+     1 second AS INTERVAL):interval>
+-- !query output
+1 seconds
+
+
+-- !query
+select cast('-     1 second' as interval)
+-- !query schema
+struct<CAST(-     1 second AS INTERVAL):interval>
+-- !query output
+-1 seconds
+
+
+-- !query
+select cast('- -1 second' as interval)
+-- !query schema
+struct<CAST(- -1 second AS INTERVAL):interval>
+-- !query output
+NULL
+
+
+-- !query
+select cast('- +1 second' as interval)
+-- !query schema
+struct<CAST(- +1 second AS INTERVAL):interval>
+-- !query output
+NULL
+
+
+-- !query
+select interval 13.123456789 seconds, interval -13.123456789 second
+-- !query schema
+struct<INTERVAL '13.123456 seconds':interval,INTERVAL '-13.123456 seconds':interval>
+-- !query output
+13.123456 seconds	-13.123456 seconds
+
+
+-- !query
+select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond
+-- !query schema
+struct<INTERVAL '1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds':interval>
+-- !query output
+1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
+
+
+-- !query
+select interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second
+-- !query schema
+struct<INTERVAL '32 years 1 months -100 days 41 hours 24 minutes 59.889987 seconds':interval>
+-- !query output
+32 years 1 months -100 days 41 hours 24 minutes 59.889987 seconds
+
+
+-- !query
+select interval '0 0:0:0.1' day to second
+-- !query schema
+struct<INTERVAL '0.1 seconds':interval>
+-- !query output
+0.1 seconds
+
+
+-- !query
+select interval '10-9' year to month
+-- !query schema
+struct<INTERVAL '10 years 9 months':interval>
+-- !query output
+10 years 9 months
+
+
+-- !query
+select interval '20 15' day to hour
+-- !query schema
+struct<INTERVAL '20 days 15 hours':interval>
+-- !query output
+20 days 15 hours
+
+
+-- !query
+select interval '20 15:40' day to minute
+-- !query schema
+struct<INTERVAL '20 days 15 hours 40 minutes':interval>
+-- !query output
+20 days 15 hours 40 minutes
+
+
+-- !query
+select interval '20 15:40:32.99899999' day to second
+-- !query schema
+struct<INTERVAL '20 days 15 hours 40 minutes 32.998999 seconds':interval>
+-- !query output
+20 days 15 hours 40 minutes 32.998999 seconds
+
+
+-- !query
+select interval '15:40' hour to minute
+-- !query schema
+struct<INTERVAL '15 hours 40 minutes':interval>
+-- !query output
+15 hours 40 minutes
+
+
+-- !query
+select interval '15:40:32.99899999' hour to second
+-- !query schema
+struct<INTERVAL '15 hours 40 minutes 32.998999 seconds':interval>
+-- !query output
+15 hours 40 minutes 32.998999 seconds
+
+
+-- !query
+select interval '40:32.99899999' minute to second
+-- !query schema
+struct<INTERVAL '40 minutes 32.998999 seconds':interval>
+-- !query output
+40 minutes 32.998999 seconds
+
+
+-- !query
+select interval '40:32' minute to second
+-- !query schema
+struct<INTERVAL '40 minutes 32 seconds':interval>
+-- !query output
+40 minutes 32 seconds
+
+
+-- !query
+select interval 30 day day
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
+no viable alternative at input 'day'(line 1, pos 23)
 
+== SQL ==
+select interval 30 day day
+-----------------------^^^
 
--- !query 9
-select
-  dateval,
-  dateval - interval '2-2' year to month,
-  dateval - interval '-2-2' year to month,
-  dateval + interval '2-2' year to month,
-  dateval + interval '-2-2' year to month,
-  - interval '2-2' year to month + dateval,
-  interval '2-2' year to month + dateval
-from interval_arithmetic
--- !query 9 schema
-struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - interval 2 years 2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - interval -2 years -2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 2 years 2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval -2 years -2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- interval 2 years 2 months) AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 2 years 2 months AS DATE):date>
--- !query 9 output
-2012-01-01	2009-11-01	2014-03-01	2014-03-01	2009-11-01	2009-11-01	2014-03-01
 
+-- !query
+select interval '20 15:40:32.99899999' day to hour
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
--- !query 10
-select
-  dateval,
-  dateval - '2-2' year to month,
-  dateval - '-2-2' year to month,
-  dateval + '2-2' year to month,
-  dateval + '-2-2' year to month,
-  - '2-2' year to month + dateval,
-  '2-2' year to month + dateval
-from interval_arithmetic
--- !query 10 schema
-struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - interval 2 years 2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - interval -2 years -2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 2 years 2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval -2 years -2 months AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- interval 2 years 2 months) AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 2 years 2 months AS DATE):date>
--- !query 10 output
-2012-01-01	2009-11-01	2014-03-01	2014-03-01	2009-11-01	2009-11-01	2014-03-01
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2})$': 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
 
+== SQL ==
+select interval '20 15:40:32.99899999' day to hour
+----------------^^^
 
--- !query 11
-select
-  tsval,
-  tsval - interval '2-2' year to month,
-  tsval - interval '-2-2' year to month,
-  tsval + interval '2-2' year to month,
-  tsval + interval '-2-2' year to month,
-  - interval '2-2' year to month + tsval,
-  interval '2-2' year to month + tsval
-from interval_arithmetic
--- !query 11 schema
-struct<tsval:timestamp,CAST(tsval - interval 2 years 2 months AS TIMESTAMP):timestamp,CAST(tsval - interval -2 years -2 months AS TIMESTAMP):timestamp,CAST(tsval + interval 2 years 2 months AS TIMESTAMP):timestamp,CAST(tsval + interval -2 years -2 months AS TIMESTAMP):timestamp,CAST(tsval + (- interval 2 years 2 months) AS TIMESTAMP):timestamp,CAST(tsval + interval 2 years 2 months AS TIMESTAMP):timestamp>
--- !query 11 output
-2012-01-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00	2014-03-01 00:00:00	2009-11-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00
 
+-- !query
+select interval '20 15:40:32.99899999' day to minute
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
--- !query 12
-select
-  tsval,
-  tsval - '2-2' year to month,
-  tsval - '-2-2' year to month,
-  tsval + '2-2' year to month,
-  tsval + '-2-2' year to month,
-  - '2-2' year to month + tsval,
-  '2-2' year to month + tsval
-from interval_arithmetic
--- !query 12 schema
-struct<tsval:timestamp,CAST(tsval - interval 2 years 2 months AS TIMESTAMP):timestamp,CAST(tsval - interval -2 years -2 months AS TIMESTAMP):timestamp,CAST(tsval + interval 2 years 2 months AS TIMESTAMP):timestamp,CAST(tsval + interval -2 years -2 months AS TIMESTAMP):timestamp,CAST(tsval + (- interval 2 years 2 months) AS TIMESTAMP):timestamp,CAST(tsval + interval 2 years 2 months AS TIMESTAMP):timestamp>
--- !query 12 output
-2012-01-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00	2014-03-01 00:00:00	2009-11-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2})$': 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
 
+== SQL ==
+select interval '20 15:40:32.99899999' day to minute
+----------------^^^
 
--- !query 13
-select
-  interval '2-2' year to month + interval '3-3' year to month,
-  interval '2-2' year to month - interval '3-3' year to month
-from interval_arithmetic
--- !query 13 schema
-struct<(interval 2 years 2 months + interval 3 years 3 months):interval,(interval 2 years 2 months - interval 3 years 3 months):interval>
--- !query 13 output
-interval 5 years 5 months	interval -1 years -1 months
 
+-- !query
+select interval '15:40:32.99899999' hour to minute
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
--- !query 14
-select
-  '2-2' year to month + '3-3' year to month,
-  '2-2' year to month - '3-3' year to month
-from interval_arithmetic
--- !query 14 schema
-struct<(interval 2 years 2 months + interval 3 years 3 months):interval,(interval 2 years 2 months - interval 3 years 3 months):interval>
--- !query 14 output
-interval 5 years 5 months	interval -1 years -1 months
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2})$': 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
 
+== SQL ==
+select interval '15:40:32.99899999' hour to minute
+----------------^^^
 
--- !query 15
-select
-  dateval,
-  dateval - interval '99 11:22:33.123456789' day to second,
-  dateval - interval '-99 11:22:33.123456789' day to second,
-  dateval + interval '99 11:22:33.123456789' day to second,
-  dateval + interval '-99 11:22:33.123456789' day to second,
-  -interval '99 11:22:33.123456789' day to second + dateval,
-  interval '99 11:22:33.123456789' day to second + dateval
-from interval_arithmetic
--- !query 15 schema
-struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds) AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date>
--- !query 15 output
-2012-01-01	2011-09-23	2012-04-09	2012-04-09	2011-09-23	2011-09-23	2012-04-09
 
+-- !query
+select interval '15:40.99899999' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
--- !query 16
-select
-  dateval,
-  dateval - '99 11:22:33.123456789' day to second,
-  dateval - '-99 11:22:33.123456789' day to second,
-  dateval + '99 11:22:33.123456789' day to second,
-  dateval + '-99 11:22:33.123456789' day to second,
-  - '99 11:22:33.123456789' day to second + dateval,
-  '99 11:22:33.123456789' day to second + dateval
-from interval_arithmetic
--- !query 16 schema
-struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds) AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS DATE):date>
--- !query 16 output
-2012-01-01	2011-09-23	2012-04-09	2012-04-09	2011-09-23	2011-09-23	2012-04-09
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
 
+== SQL ==
+select interval '15:40.99899999' hour to second
+----------------^^^
 
--- !query 17
-select
-  tsval,
-  tsval - interval '99 11:22:33.123456789' day to second,
-  tsval - interval '-99 11:22:33.123456789' day to second,
-  tsval + interval '99 11:22:33.123456789' day to second,
-  tsval + interval '-99 11:22:33.123456789' day to second,
-  -interval '99 11:22:33.123456789' day to second + tsval,
-  interval '99 11:22:33.123456789' day to second + tsval
-from interval_arithmetic
--- !query 17 schema
-struct<tsval:timestamp,CAST(tsval - interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp,CAST(tsval - interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + (- interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds) AS TIMESTAMP):timestamp,CAST(tsval + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp>
--- !query 17 output
-2012-01-01 00:00:00	2011-09-23 13:37:26.876544	2012-04-09 12:22:33.123456	2012-04-09 12:22:33.123456	2011-09-23 13:37:26.876544	2011-09-23 13:37:26.876544	2012-04-09 12:22:33.123456
 
+-- !query
+select interval '15:40' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
--- !query 18
-select
-  tsval,
-  tsval - '99 11:22:33.123456789' day to second,
-  tsval - '-99 11:22:33.123456789' day to second,
-  tsval + '99 11:22:33.123456789' day to second,
-  tsval + '-99 11:22:33.123456789' day to second,
-  - '99 11:22:33.123456789' day to second + tsval,
-  '99 11:22:33.123456789' day to second + tsval
-from interval_arithmetic
--- !query 18 schema
-struct<tsval:timestamp,CAST(tsval - interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp,CAST(tsval - interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + interval -14 weeks -1 days -11 hours -22 minutes -33 seconds -123 milliseconds -456 microseconds AS TIMESTAMP):timestamp,CAST(tsval + (- interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds) AS TIMESTAMP):timestamp,CAST(tsval + interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds AS TIMESTAMP):timestamp>
--- !query 18 output
-2012-01-01 00:00:00	2011-09-23 13:37:26.876544	2012-04-09 12:22:33.123456	2012-04-09 12:22:33.123456	2011-09-23 13:37:26.876544	2011-09-23 13:37:26.876544	2012-04-09 12:22:33.123456
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
 
+== SQL ==
+select interval '15:40' hour to second
+----------------^^^
 
--- !query 19
-select
-  interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second,
-  interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second
-from interval_arithmetic
--- !query 19 schema
-struct<(interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds + interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 123 milliseconds 456 microseconds):interval,(interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds - interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 123 milliseconds 456 microseconds):interval>
--- !query 19 output
-interval 15 weeks 4 days 20 hours 30 minutes 40 seconds 246 milliseconds 912 microseconds	interval 12 weeks 5 days 2 hours 14 minutes 26 seconds
 
+-- !query
+select interval '20 40:32.99899999' minute to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '20 40:32.99899999' minute to second
+----------------^^^
+
+
+-- !query
+select interval 10 nanoseconds
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Error parsing ' 10 nanoseconds' to interval, invalid unit 'nanoseconds'(line 1, pos 16)
+
+== SQL ==
+select interval 10 nanoseconds
+----------------^^^
+
+
+-- !query
+select map(1, interval 1 day, 2, interval 3 week)
+-- !query schema
+struct<map(1, INTERVAL '1 days', 2, INTERVAL '21 days'):map<int,interval>>
+-- !query output
+{1:1 days,2:21 days}
+
+
+-- !query
+select interval 'interval 3 year 1 hour'
+-- !query schema
+struct<INTERVAL '3 years 1 hours':interval>
+-- !query output
+3 years 1 hours
+
+
+-- !query
+select interval '3 year 1 hour'
+-- !query schema
+struct<INTERVAL '3 years 1 hours':interval>
+-- !query output
+3 years 1 hours
 
--- !query 20
-select
-  '99 11:22:33.123456789' day to second + '10 9:8:7.123456789' day to second,
-  '99 11:22:33.123456789' day to second - '10 9:8:7.123456789' day to second
-from interval_arithmetic
--- !query 20 schema
-struct<(interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds + interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 123 milliseconds 456 microseconds):interval,(interval 14 weeks 1 days 11 hours 22 minutes 33 seconds 123 milliseconds 456 microseconds - interval 1 weeks 3 days 9 hours 8 minutes 7 seconds 123 milliseconds 456 microseconds):interval>
--- !query 20 output
-interval 15 weeks 4 days 20 hours 30 minutes 40 seconds 246 milliseconds 912 microseconds	interval 12 weeks 5 days 2 hours 14 minutes 26 seconds
 
+-- !query
+select interval
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
--- !query 21
-select 30 day
--- !query 21 schema
-struct<interval 4 weeks 2 days:interval>
--- !query 21 output
-interval 4 weeks 2 days
+at least one time unit should be given for interval literal(line 1, pos 7)
 
+== SQL ==
+select interval
+-------^^^
 
--- !query 22
-select 30 day day
--- !query 22 schema
+
+-- !query
+select interval 1 fake_unit
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 14)
+Error parsing ' 1 fake_unit' to interval, invalid unit 'fake_unit'(line 1, pos 16)
 
 == SQL ==
-select 30 day day
---------------^^^
+select interval 1 fake_unit
+----------------^^^
 
 
--- !query 23
-select 30 day day day
--- !query 23 schema
+-- !query
+select interval 1 year to month
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 14)
+The value of from-to unit must be a string(line 1, pos 16)
 
 == SQL ==
-select 30 day day day
---------------^^^
+select interval 1 year to month
+----------------^^^
+
 
+-- !query
+select interval '1' year to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
--- !query 24
-select date '2012-01-01' - 30 day
--- !query 24 schema
-struct<CAST(CAST(DATE '2012-01-01' AS TIMESTAMP) - interval 4 weeks 2 days AS DATE):date>
--- !query 24 output
-2011-12-02
+Intervals FROM year TO second are not supported.(line 1, pos 16)
 
+== SQL ==
+select interval '1' year to second
+----------------^^^
 
--- !query 25
-select date '2012-01-01' - 30 day day
--- !query 25 schema
+
+-- !query
+select interval '10-9' year to month '2-1' year to month
+-- !query schema
 struct<>
--- !query 25 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 34)
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
 
 == SQL ==
-select date '2012-01-01' - 30 day day
-----------------------------------^^^
+select interval '10-9' year to month '2-1' year to month
+-------------------------------------^^^
 
 
--- !query 26
-select date '2012-01-01' - 30 day day day
--- !query 26 schema
+-- !query
+select interval '10-9' year to month '12:11:10' hour to second
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 34)
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
 
 == SQL ==
-select date '2012-01-01' - 30 day day day
-----------------------------------^^^
+select interval '10-9' year to month '12:11:10' hour to second
+-------------------------------------^^^
 
 
--- !query 27
-select date '2012-01-01' + '-30' day
--- !query 27 schema
-struct<CAST(CAST(DATE '2012-01-01' AS TIMESTAMP) + interval -4 weeks -2 days AS DATE):date>
--- !query 27 output
-2011-12-02
+-- !query
+select interval '1 15:11' day to minute '12:11:10' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 40)
 
--- !query 28
-select date '2012-01-01' + interval '-30' day
--- !query 28 schema
-struct<CAST(CAST(DATE '2012-01-01' AS TIMESTAMP) + interval -4 weeks -2 days AS DATE):date>
--- !query 28 output
-2011-12-02
+== SQL ==
+select interval '1 15:11' day to minute '12:11:10' hour to second
+----------------------------------------^^^
 
 
--- !query 29
-select date '2012-01-01' + interval (-30) day
--- !query 29 schema
+-- !query
+select interval 1 year '2-1' year to month
+-- !query schema
 struct<>
--- !query 29 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 42)
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
 
 == SQL ==
-select date '2012-01-01' + interval (-30) day
-------------------------------------------^^^
+select interval 1 year '2-1' year to month
+-----------------------^^^
 
 
--- !query 30
-select date '2012-01-01' + (-30) day
--- !query 30 schema
+-- !query
+select interval 1 year '12:11:10' hour to second
+-- !query schema
 struct<>
--- !query 30 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 33)
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
 
 == SQL ==
-select date '2012-01-01' + (-30) day
----------------------------------^^^
+select interval 1 year '12:11:10' hour to second
+-----------------------^^^
 
 
--- !query 31
-create temporary view t as select * from values (1), (2) as t(a)
--- !query 31 schema
+-- !query
+select interval '10-9' year to month '1' year
+-- !query schema
 struct<>
--- !query 31 output
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
 
+== SQL ==
+select interval '10-9' year to month '1' year
+-------------------------------------^^^
 
 
--- !query 32
-select date '2012-01-01' + interval (a + 1) day from t
--- !query 32 schema
+-- !query
+select interval '12:11:10' hour to second '1' year
+-- !query schema
 struct<>
--- !query 32 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 44)
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 42)
 
 == SQL ==
-select date '2012-01-01' + interval (a + 1) day from t
---------------------------------------------^^^
+select interval '12:11:10' hour to second '1' year
+------------------------------------------^^^
 
 
--- !query 33
-select date '2012-01-01' + (a + 1) day from t
--- !query 33 schema
+-- !query
+select interval (-30) day
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-no viable alternative at input 'day'(line 1, pos 35)
+no viable alternative at input 'day'(line 1, pos 22)
 
 == SQL ==
-select date '2012-01-01' + (a + 1) day from t
------------------------------------^^^
+select interval (-30) day
+----------------------^^^
+
 
+-- !query
+select interval (a + 1) day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
 
--- !query 34
-SET spark.sql.parser.ansi.enabled=false
--- !query 34 schema
-struct<key:string,value:string>
--- !query 34 output
-spark.sql.parser.ansi.enabled	false
+no viable alternative at input 'day'(line 1, pos 24)
+
+== SQL ==
+select interval (a + 1) day
+------------------------^^^
+
+
+-- !query
+select interval 30 day day day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'day'(line 1, pos 23)
+
+== SQL ==
+select interval 30 day day day
+-----------------------^^^
+
+
+-- !query
+select sum(cast(null as interval))
+-- !query schema
+struct<sum(CAST(NULL AS INTERVAL)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('1 seconds') t(v) where 1=0
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+3 seconds
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+1 seconds
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v)
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+-3 seconds
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+-7 days 2 seconds
+
+
+-- !query
+select
+    i,
+    sum(cast(v as interval))
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+group by i
+-- !query schema
+struct<i:int,sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+1	-2 days
+2	2 seconds
+3	NULL
+
+
+-- !query
+select
+    sum(cast(v as interval)) as sv
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+having sv is not null
+-- !query schema
+struct<sv:interval>
+-- !query output
+-2 days 2 seconds
+
+
+-- !query
+SELECT
+    i,
+    sum(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+FROM VALUES(1, '1 seconds'), (1, '2 seconds'), (2, NULL), (2, NULL) t(i,v)
+-- !query schema
+struct<i:int,sum(CAST(v AS INTERVAL)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):interval>
+-- !query output
+1	2 seconds
+1	3 seconds
+2	NULL
+2	NULL
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v) where 1=0
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+1.5 seconds
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+0.5 seconds
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+-1.5 seconds
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+-3 days -11 hours -59 minutes -59 seconds
+
+
+-- !query
+select
+    i,
+    avg(cast(v as interval))
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+group by i
+-- !query schema
+struct<i:int,avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+1	-1 days
+2	2 seconds
+3	NULL
+
+
+-- !query
+select
+    avg(cast(v as interval)) as sv
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+having sv is not null
+-- !query schema
+struct<sv:interval>
+-- !query output
+-15 hours -59 minutes -59.333333 seconds
+
+
+-- !query
+SELECT
+    i,
+    avg(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+FROM VALUES (1,'1 seconds'), (1,'2 seconds'), (2,NULL), (2,NULL) t(i,v)
+-- !query schema
+struct<i:int,avg(CAST(v AS INTERVAL)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):interval>
+-- !query output
+1	1.5 seconds
+1	2 seconds
+2	NULL
+2	NULL
+
+
+-- !query
+create temporary view interval_arithmetic as
+  select CAST(dateval AS date), CAST(tsval AS timestamp) from values
+    ('2012-01-01', '2012-01-01')
+    as interval_arithmetic(dateval, tsval)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select
+  dateval,
+  dateval - interval '2-2' year to month,
+  dateval - interval '-2-2' year to month,
+  dateval + interval '2-2' year to month,
+  dateval + interval '-2-2' year to month,
+  - interval '2-2' year to month + dateval,
+  interval '2-2' year to month + dateval
+from interval_arithmetic
+-- !query schema
+struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '2 years 2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '-2 years -2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '2 years 2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '-2 years -2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- INTERVAL '2 years 2 months') AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '2 years 2 months' AS DATE):date>
+-- !query output
+2012-01-01	2009-11-01	2014-03-01	2014-03-01	2009-11-01	2009-11-01	2014-03-01
+
+
+-- !query
+select
+  tsval,
+  tsval - interval '2-2' year to month,
+  tsval - interval '-2-2' year to month,
+  tsval + interval '2-2' year to month,
+  tsval + interval '-2-2' year to month,
+  - interval '2-2' year to month + tsval,
+  interval '2-2' year to month + tsval
+from interval_arithmetic
+-- !query schema
+struct<tsval:timestamp,CAST(tsval - INTERVAL '2 years 2 months' AS TIMESTAMP):timestamp,CAST(tsval - INTERVAL '-2 years -2 months' AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '2 years 2 months' AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '-2 years -2 months' AS TIMESTAMP):timestamp,CAST(tsval + (- INTERVAL '2 years 2 months') AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '2 years 2 months' AS TIMESTAMP):timestamp>
+-- !query output
+2012-01-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00	2014-03-01 00:00:00	2009-11-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00
+
+
+-- !query
+select
+  interval '2-2' year to month + interval '3-3' year to month,
+  interval '2-2' year to month - interval '3-3' year to month
+from interval_arithmetic
+-- !query schema
+struct<(INTERVAL '2 years 2 months' + INTERVAL '3 years 3 months'):interval,(INTERVAL '2 years 2 months' - INTERVAL '3 years 3 months'):interval>
+-- !query output
+5 years 5 months	-1 years -1 months
+
+
+-- !query
+select
+  dateval,
+  dateval - interval '99 11:22:33.123456789' day to second,
+  dateval - interval '-99 11:22:33.123456789' day to second,
+  dateval + interval '99 11:22:33.123456789' day to second,
+  dateval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + dateval,
+  interval '99 11:22:33.123456789' day to second + dateval
+from interval_arithmetic
+-- !query schema
+struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds') AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS DATE):date>
+-- !query output
+2012-01-01	2011-09-23	2012-04-09	2012-04-09	2011-09-23	2011-09-23	2012-04-09
+
+
+-- !query
+select
+  tsval,
+  tsval - interval '99 11:22:33.123456789' day to second,
+  tsval - interval '-99 11:22:33.123456789' day to second,
+  tsval + interval '99 11:22:33.123456789' day to second,
+  tsval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + tsval,
+  interval '99 11:22:33.123456789' day to second + tsval
+from interval_arithmetic
+-- !query schema
+struct<tsval:timestamp,CAST(tsval - INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS TIMESTAMP):timestamp,CAST(tsval - INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS TIMESTAMP):timestamp,CAST(tsval + (- INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds') AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS TIMESTAMP):timestamp>
+-- !query output
+2012-01-01 00:00:00	2011-09-23 12:37:26.876544	2012-04-09 11:22:33.123456	2012-04-09 11:22:33.123456	2011-09-23 12:37:26.876544	2011-09-23 12:37:26.876544	2012-04-09 11:22:33.123456
+
+
+-- !query
+select
+  interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second,
+  interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second
+from interval_arithmetic
+-- !query schema
+struct<(INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' + INTERVAL '10 days 9 hours 8 minutes 7.123456 seconds'):interval,(INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' - INTERVAL '10 days 9 hours 8 minutes 7.123456 seconds'):interval>
+-- !query output
+109 days 20 hours 30 minutes 40.246912 seconds	89 days 2 hours 14 minutes 26 seconds
+
+
+-- !query
+select interval '\t interval 1 day'
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select interval 'interval \t 1\tday'
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select interval 'interval\t1\tday'
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select interval '1\t' day
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select interval '1 ' day
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select a - b from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select b + interval '1 month' from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select a * 1.1 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select a / 0.5 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+SELECT
+  from_csv('1, 1 day', 'a INT, b interval'),
+  to_csv(from_csv('1, 1 day', 'a INT, b interval')),
+  to_csv(named_struct('a', interval 32 month, 'b', interval 70 minute)),
+  from_csv(to_csv(named_struct('a', interval 32 month, 'b', interval 70 minute)), 'a interval, b interval')
+-- !query schema
+struct<from_csv(1, 1 day):struct<a:int,b:interval>,to_csv(from_csv(1, 1 day)):string,to_csv(named_struct(a, INTERVAL '2 years 8 months', b, INTERVAL '1 hours 10 minutes')):string,from_csv(to_csv(named_struct(a, INTERVAL '2 years 8 months', b, INTERVAL '1 hours 10 minutes'))):struct<a:interval,b:interval>>
+-- !query output
+{"a":1,"b":1 days}	1,1 days	2 years 8 months,1 hours 10 minutes	{"a":2 years 8 months,"b":1 hours 10 minutes}
+
+
+-- !query
+SELECT
+  from_json('{"a":"1 days"}', 'a interval'),
+  to_json(from_json('{"a":"1 days"}', 'a interval')),
+  to_json(map('a', interval 25 month 100 day 130 minute)),
+  from_json(to_json(map('a', interval 25 month 100 day 130 minute)), 'a interval')
+-- !query schema
+struct<from_json({"a":"1 days"}):struct<a:interval>,to_json(from_json({"a":"1 days"})):string,to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes')):string,from_json(to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes'))):struct<a:interval>>
+-- !query output
+{"a":1 days}	{"a":"1 days"}	{"a":"2 years 1 months 100 days 2 hours 10 minutes"}	{"a":2 years 1 months 100 days 2 hours 10 minutes}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
new file mode 100644
index 0000000000000..f6720f6c5faa4
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
@@ -0,0 +1,481 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 50
+
+
+-- !query
+select null, Null, nUll
+-- !query schema
+struct<NULL:null,NULL:null,NULL:null>
+-- !query output
+NULL	NULL	NULL
+
+
+-- !query
+select true, tRue, false, fALse
+-- !query schema
+struct<true:boolean,true:boolean,false:boolean,false:boolean>
+-- !query output
+true	true	false	false
+
+
+-- !query
+select 1Y
+-- !query schema
+struct<1:tinyint>
+-- !query output
+1
+
+
+-- !query
+select 127Y, -128Y
+-- !query schema
+struct<127:tinyint,-128:tinyint>
+-- !query output
+127	-128
+
+
+-- !query
+select 128Y
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Numeric literal 128 does not fit in range [-128, 127] for type tinyint(line 1, pos 7)
+
+== SQL ==
+select 128Y
+-------^^^
+
+
+-- !query
+select 1S
+-- !query schema
+struct<1:smallint>
+-- !query output
+1
+
+
+-- !query
+select 32767S, -32768S
+-- !query schema
+struct<32767:smallint,-32768:smallint>
+-- !query output
+32767	-32768
+
+
+-- !query
+select 32768S
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Numeric literal 32768 does not fit in range [-32768, 32767] for type smallint(line 1, pos 7)
+
+== SQL ==
+select 32768S
+-------^^^
+
+
+-- !query
+select 1L, 2147483648L
+-- !query schema
+struct<1:bigint,2147483648:bigint>
+-- !query output
+1	2147483648
+
+
+-- !query
+select 9223372036854775807L, -9223372036854775808L
+-- !query schema
+struct<9223372036854775807:bigint,-9223372036854775808:bigint>
+-- !query output
+9223372036854775807	-9223372036854775808
+
+
+-- !query
+select 9223372036854775808L
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Numeric literal 9223372036854775808 does not fit in range [-9223372036854775808, 9223372036854775807] for type bigint(line 1, pos 7)
+
+== SQL ==
+select 9223372036854775808L
+-------^^^
+
+
+-- !query
+select 1, -1
+-- !query schema
+struct<1:int,-1:int>
+-- !query output
+1	-1
+
+
+-- !query
+select 2147483647, -2147483648
+-- !query schema
+struct<2147483647:int,-2147483648:int>
+-- !query output
+2147483647	-2147483648
+
+
+-- !query
+select 9223372036854775807, -9223372036854775808
+-- !query schema
+struct<9223372036854775807:bigint,-9223372036854775808:bigint>
+-- !query output
+9223372036854775807	-9223372036854775808
+
+
+-- !query
+select 9223372036854775808, -9223372036854775809
+-- !query schema
+struct<9223372036854775808:decimal(19,0),-9223372036854775809:decimal(19,0)>
+-- !query output
+9223372036854775808	-9223372036854775809
+
+
+-- !query
+select 1234567890123456789012345678901234567890
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+decimal can only support precision up to 38
+== SQL ==
+select 1234567890123456789012345678901234567890
+
+
+-- !query
+select 1234567890123456789012345678901234567890.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+decimal can only support precision up to 38
+== SQL ==
+select 1234567890123456789012345678901234567890.0
+
+
+-- !query
+select 1D, 1.2D, 1e10, 1.5e5, .10D, 0.10D, .1e5, .9e+2, 0.9e+2, 900e-1, 9.e+1
+-- !query schema
+struct<1.0:double,1.2:double,1.0E10:double,150000.0:double,0.1:double,0.1:double,10000.0:double,90.0:double,90.0:double,90.0:double,90.0:double>
+-- !query output
+1.0	1.2	1.0E10	150000.0	0.1	0.1	10000.0	90.0	90.0	90.0	90.0
+
+
+-- !query
+select -1D, -1.2D, -1e10, -1.5e5, -.10D, -0.10D, -.1e5
+-- !query schema
+struct<-1.0:double,-1.2:double,-1.0E10:double,-150000.0:double,-0.1:double,-0.1:double,-10000.0:double>
+-- !query output
+-1.0	-1.2	-1.0E10	-150000.0	-0.1	-0.1	-10000.0
+
+
+-- !query
+select .e3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'select .'(line 1, pos 7)
+
+== SQL ==
+select .e3
+-------^^^
+
+
+-- !query
+select 1E309, -1E309
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Numeric literal 1E309 does not fit in range [-1.7976931348623157E+308, 1.7976931348623157E+308] for type double(line 1, pos 7)
+
+== SQL ==
+select 1E309, -1E309
+-------^^^
+
+
+-- !query
+select 0.3, -0.8, .5, -.18, 0.1111, .1111
+-- !query schema
+struct<0.3:decimal(1,1),-0.8:decimal(1,1),0.5:decimal(1,1),-0.18:decimal(2,2),0.1111:decimal(4,4),0.1111:decimal(4,4)>
+-- !query output
+0.3	-0.8	0.5	-0.18	0.1111	0.1111
+
+
+-- !query
+select 123456789012345678901234567890123456789e10d, 123456789012345678901234567890123456789.1e10d
+-- !query schema
+struct<1.2345678901234568E48:double,1.2345678901234568E48:double>
+-- !query output
+1.2345678901234568E48	1.2345678901234568E48
+
+
+-- !query
+select "Hello Peter!", 'hello lee!'
+-- !query schema
+struct<Hello Peter!:string,hello lee!:string>
+-- !query output
+Hello Peter!	hello lee!
+
+
+-- !query
+select 'hello' 'world', 'hello' " " 'lee'
+-- !query schema
+struct<helloworld:string,hello lee:string>
+-- !query output
+helloworld	hello lee
+
+
+-- !query
+select "hello 'peter'"
+-- !query schema
+struct<hello 'peter':string>
+-- !query output
+hello 'peter'
+
+
+-- !query
+select 'pattern%', 'no-pattern\%', 'pattern\\%', 'pattern\\\%'
+-- !query schema
+struct<pattern%:string,no-pattern\%:string,pattern\%:string,pattern\\%:string>
+-- !query output
+pattern%	no-pattern\%	pattern\%	pattern\\%
+
+
+-- !query
+select '\'', '"', '\n', '\r', '\t', 'Z'
+-- !query schema
+struct<':string,":string,
+:string,:string,	:string,Z:string>
+-- !query output
+'	"	
+				Z
+
+
+-- !query
+select '\110\145\154\154\157\041'
+-- !query schema
+struct<Hello!:string>
+-- !query output
+Hello!
+
+
+-- !query
+select '\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'
+-- !query schema
+struct<World :):string>
+-- !query output
+World :)
+
+
+-- !query
+select dAte '2016-03-12'
+-- !query schema
+struct<DATE '2016-03-12':date>
+-- !query output
+2016-03-12
+
+
+-- !query
+select date 'mar 11 2016'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: mar 11 2016(line 1, pos 7)
+
+== SQL ==
+select date 'mar 11 2016'
+-------^^^
+
+
+-- !query
+select tImEstAmp '2016-03-11 20:54:00.000'
+-- !query schema
+struct<TIMESTAMP '2016-03-11 20:54:00':timestamp>
+-- !query output
+2016-03-11 20:54:00
+
+
+-- !query
+select timestamp '2016-33-11 20:54:00.000'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the TIMESTAMP value: 2016-33-11 20:54:00.000(line 1, pos 7)
+
+== SQL ==
+select timestamp '2016-33-11 20:54:00.000'
+-------^^^
+
+
+-- !query
+select GEO '(10,-6)'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Literals of type 'GEO' are currently not supported.(line 1, pos 7)
+
+== SQL ==
+select GEO '(10,-6)'
+-------^^^
+
+
+-- !query
+select 90912830918230182310293801923652346786BD, 123.0E-28BD, 123.08BD
+-- !query schema
+struct<90912830918230182310293801923652346786:decimal(38,0),1.230E-26:decimal(29,29),123.08:decimal(5,2)>
+-- !query output
+90912830918230182310293801923652346786	0.00000000000000000000000001230	123.08
+
+
+-- !query
+select 1.20E-38BD
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+decimal can only support precision up to 38(line 1, pos 7)
+
+== SQL ==
+select 1.20E-38BD
+-------^^^
+
+
+-- !query
+select x'2379ACFe'
+-- !query schema
+struct<X'2379ACFE':binary>
+-- !query output
+#y��
+
+
+-- !query
+select X'XuZ'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+contains illegal character for hexBinary: 0XuZ(line 1, pos 7)
+
+== SQL ==
+select X'XuZ'
+-------^^^
+
+
+-- !query
+SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8
+-- !query schema
+struct<3.14:decimal(3,2),-3.14:decimal(3,2),3.14E8:double,3.14E-8:double,-3.14E8:double,-3.14E-8:double,3.14E8:double,3.14E8:double,3.14E-8:double>
+-- !query output
+3.14	-3.14	3.14E8	3.14E-8	-3.14E8	-3.14E-8	3.14E8	3.14E8	3.14E-8
+
+
+-- !query
+select +date '1999-01-01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ DATE '1999-01-01')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'DATE '1999-01-01'' is of date type.; line 1 pos 7
+
+
+-- !query
+select +timestamp '1999-01-01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ TIMESTAMP '1999-01-01 00:00:00')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'TIMESTAMP '1999-01-01 00:00:00'' is of timestamp type.; line 1 pos 7
+
+
+-- !query
+select +interval '1 day'
+-- !query schema
+struct<(+ INTERVAL '1 days'):interval>
+-- !query output
+1 days
+
+
+-- !query
+select +map(1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ map(1, 2))' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'map(1, 2)' is of map<int,int> type.; line 1 pos 7
+
+
+-- !query
+select +array(1,2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ array(1, 2))' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'array(1, 2)' is of array<int> type.; line 1 pos 7
+
+
+-- !query
+select +named_struct('a', 1, 'b', 'spark')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ named_struct('a', 1, 'b', 'spark'))' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'named_struct('a', 1, 'b', 'spark')' is of struct<a:int,b:string> type.; line 1 pos 7
+
+
+-- !query
+select +X'1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ X'01')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'X'01'' is of binary type.; line 1 pos 7
+
+
+-- !query
+select -date '1999-01-01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(- DATE '1999-01-01')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'DATE '1999-01-01'' is of date type.; line 1 pos 7
+
+
+-- !query
+select -timestamp '1999-01-01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(- TIMESTAMP '1999-01-01 00:00:00')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'TIMESTAMP '1999-01-01 00:00:00'' is of timestamp type.; line 1 pos 7
+
+
+-- !query
+select -x'2379ACFe'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(- X'2379ACFE')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'X'2379ACFE'' is of binary type.; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 5f5d988771847..2c2b1a7856304 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -2,45 +2,45 @@
 -- Number of queries: 12
 
 
--- !query 0
+-- !query
 create temporary view data as select * from values
   ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
   ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223)))
   as data(a, b, c)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 select * from data
--- !query 1 schema
+-- !query schema
 struct<a:string,b:array<int>,c:array<array<int>>>
--- !query 1 output
+-- !query output
 one	[11,12,13]	[[111,112,113],[121,122,123]]
 two	[21,22,23]	[[211,212,213],[221,222,223]]
 
 
--- !query 2
+-- !query
 select a, b[0], b[0] + b[1] from data
--- !query 2 schema
+-- !query schema
 struct<a:string,b[0]:int,(b[0] + b[1]):int>
--- !query 2 output
+-- !query output
 one	11	23
 two	21	43
 
 
--- !query 3
+-- !query
 select a, c[0][0] + c[0][0 + 1] from data
--- !query 3 schema
+-- !query schema
 struct<a:string,(c[0][0] + c[0][(0 + 1)]):int>
--- !query 3 output
+-- !query output
 one	223
 two	423
 
 
--- !query 4
+-- !query
 create temporary view primitive_arrays as select * from values (
   array(true),
   array(2Y, 1Y),
@@ -64,21 +64,21 @@ create temporary view primitive_arrays as select * from values (
   date_array,
   timestamp_array
 )
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 select * from primitive_arrays
--- !query 5 schema
+-- !query schema
 struct<boolean_array:array<boolean>,tinyint_array:array<tinyint>,smallint_array:array<smallint>,int_array:array<int>,bigint_array:array<bigint>,decimal_array:array<decimal(19,0)>,double_array:array<double>,float_array:array<float>,date_array:array<date>,timestamp_array:array<timestamp>>
--- !query 5 output
-[true]	[2,1]	[2,1]	[2,1]	[2,1]	[9223372036854775809,9223372036854775808]	[2.0,1.0]	[2.0,1.0]	[2016-03-14,2016-03-13]	[2016-11-15 20:54:00.0,2016-11-12 20:54:00.0]
+-- !query output
+[true]	[2,1]	[2,1]	[2,1]	[2,1]	[9223372036854775809,9223372036854775808]	[2.0,1.0]	[2.0,1.0]	[2016-03-14,2016-03-13]	[2016-11-15 20:54:00,2016-11-12 20:54:00]
 
 
--- !query 6
+-- !query
 select
   array_contains(boolean_array, true), array_contains(boolean_array, false),
   array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y),
@@ -91,22 +91,22 @@ select
   array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'),
   array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
 from primitive_arrays
--- !query 6 schema
-struct<array_contains(boolean_array, true):boolean,array_contains(boolean_array, false):boolean,array_contains(tinyint_array, 2):boolean,array_contains(tinyint_array, 0):boolean,array_contains(smallint_array, 2):boolean,array_contains(smallint_array, 0):boolean,array_contains(int_array, 2):boolean,array_contains(int_array, 0):boolean,array_contains(bigint_array, 2):boolean,array_contains(bigint_array, 0):boolean,array_contains(decimal_array, 9223372036854775809):boolean,array_contains(decimal_array, CAST(1 AS DECIMAL(19,0))):boolean,array_contains(double_array, 2.0):boolean,array_contains(double_array, 0.0):boolean,array_contains(float_array, CAST(2.0 AS FLOAT)):boolean,array_contains(float_array, CAST(0.0 AS FLOAT)):boolean,array_contains(date_array, DATE '2016-03-14'):boolean,array_contains(date_array, DATE '2016-01-01'):boolean,array_contains(timestamp_array, TIMESTAMP('2016-11-15 20:54:00')):boolean,array_contains(timestamp_array, TIMESTAMP('2016-01-01 20:54:00')):boolean>
--- !query 6 output
+-- !query schema
+struct<array_contains(boolean_array, true):boolean,array_contains(boolean_array, false):boolean,array_contains(tinyint_array, 2):boolean,array_contains(tinyint_array, 0):boolean,array_contains(smallint_array, 2):boolean,array_contains(smallint_array, 0):boolean,array_contains(int_array, 2):boolean,array_contains(int_array, 0):boolean,array_contains(bigint_array, 2):boolean,array_contains(bigint_array, 0):boolean,array_contains(decimal_array, 9223372036854775809):boolean,array_contains(decimal_array, CAST(1 AS DECIMAL(19,0))):boolean,array_contains(double_array, 2.0):boolean,array_contains(double_array, 0.0):boolean,array_contains(float_array, CAST(2.0 AS FLOAT)):boolean,array_contains(float_array, CAST(0.0 AS FLOAT)):boolean,array_contains(date_array, DATE '2016-03-14'):boolean,array_contains(date_array, DATE '2016-01-01'):boolean,array_contains(timestamp_array, TIMESTAMP '2016-11-15 20:54:00'):boolean,array_contains(timestamp_array, TIMESTAMP '2016-01-01 20:54:00'):boolean>
+-- !query output
 true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false
 
 
--- !query 7
+-- !query
 select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data
--- !query 7 schema
+-- !query schema
 struct<array_contains(b, 11):boolean,array_contains(c, array(111, 112, 113)):boolean>
--- !query 7 output
+-- !query output
 false	false
 true	true
 
 
--- !query 8
+-- !query
 select
   sort_array(boolean_array),
   sort_array(tinyint_array),
@@ -119,31 +119,31 @@ select
   sort_array(date_array),
   sort_array(timestamp_array)
 from primitive_arrays
--- !query 8 schema
+-- !query schema
 struct<sort_array(boolean_array, true):array<boolean>,sort_array(tinyint_array, true):array<tinyint>,sort_array(smallint_array, true):array<smallint>,sort_array(int_array, true):array<int>,sort_array(bigint_array, true):array<bigint>,sort_array(decimal_array, true):array<decimal(19,0)>,sort_array(double_array, true):array<double>,sort_array(float_array, true):array<float>,sort_array(date_array, true):array<date>,sort_array(timestamp_array, true):array<timestamp>>
--- !query 8 output
-[true]	[1,2]	[1,2]	[1,2]	[1,2]	[9223372036854775808,9223372036854775809]	[1.0,2.0]	[1.0,2.0]	[2016-03-13,2016-03-14]	[2016-11-12 20:54:00.0,2016-11-15 20:54:00.0]
+-- !query output
+[true]	[1,2]	[1,2]	[1,2]	[1,2]	[9223372036854775808,9223372036854775809]	[1.0,2.0]	[1.0,2.0]	[2016-03-13,2016-03-14]	[2016-11-12 20:54:00,2016-11-15 20:54:00]
 
 
--- !query 9
+-- !query
 select sort_array(array('b', 'd'), '1')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
 
 
--- !query 10
+-- !query
 select sort_array(array('b', 'd'), cast(NULL as boolean))
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
 
 
--- !query 11
+-- !query
 select
   size(boolean_array),
   size(tinyint_array),
@@ -156,7 +156,7 @@ select
   size(date_array),
   size(timestamp_array)
 from primitive_arrays
--- !query 11 schema
+-- !query schema
 struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,size(int_array):int,size(bigint_array):int,size(decimal_array):int,size(double_array):int,size(float_array):int,size(date_array):int,size(timestamp_array):int>
--- !query 11 output
+-- !query output
 1	2	2	2	2	2	2	2	2	2
diff --git a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out
new file mode 100644
index 0000000000000..552b027df1bc0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out
@@ -0,0 +1,233 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 27
+
+
+-- !query
+select bit_count(null)
+-- !query schema
+struct<bit_count(CAST(NULL AS INT)):int>
+-- !query output
+NULL
+
+
+-- !query
+select bit_count(true)
+-- !query schema
+struct<bit_count(true):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count(false)
+-- !query schema
+struct<bit_count(false):int>
+-- !query output
+0
+
+
+-- !query
+select bit_count(cast(1 as tinyint))
+-- !query schema
+struct<bit_count(CAST(1 AS TINYINT)):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count(cast(2 as tinyint))
+-- !query schema
+struct<bit_count(CAST(2 AS TINYINT)):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count(cast(3 as tinyint))
+-- !query schema
+struct<bit_count(CAST(3 AS TINYINT)):int>
+-- !query output
+2
+
+
+-- !query
+select bit_count(1S)
+-- !query schema
+struct<bit_count(1):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count(2S)
+-- !query schema
+struct<bit_count(2):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count(3S)
+-- !query schema
+struct<bit_count(3):int>
+-- !query output
+2
+
+
+-- !query
+select bit_count(1)
+-- !query schema
+struct<bit_count(1):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count(2)
+-- !query schema
+struct<bit_count(2):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count(3)
+-- !query schema
+struct<bit_count(3):int>
+-- !query output
+2
+
+
+-- !query
+select bit_count(1L)
+-- !query schema
+struct<bit_count(1):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count(2L)
+-- !query schema
+struct<bit_count(2):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count(3L)
+-- !query schema
+struct<bit_count(3):int>
+-- !query output
+2
+
+
+-- !query
+select bit_count(-1L)
+-- !query schema
+struct<bit_count(-1):int>
+-- !query output
+64
+
+
+-- !query
+select bit_count(9223372036854775807L)
+-- !query schema
+struct<bit_count(9223372036854775807):int>
+-- !query output
+63
+
+
+-- !query
+select bit_count(-9223372036854775808L)
+-- !query schema
+struct<bit_count(-9223372036854775808):int>
+-- !query output
+1
+
+
+-- !query
+select bit_count("bit count")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'bit_count('bit count')' due to data type mismatch: argument 1 requires (integral or boolean) type, however, ''bit count'' is of string type.; line 1 pos 7
+
+
+-- !query
+select bit_count('a')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'bit_count('a')' due to data type mismatch: argument 1 requires (integral or boolean) type, however, ''a'' is of string type.; line 1 pos 7
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
+  (1, 1, 1, 1L),
+  (2, 3, 4, null),
+  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT BIT_XOR(b3) AS n1 FROM bitwise_test where 1 = 0
+-- !query schema
+struct<n1:int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT BIT_XOR(b4) AS n1 FROM bitwise_test where b4 is null
+-- !query schema
+struct<n1:bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT
+ BIT_XOR(cast(b1 as tinyint))  AS a4,
+ BIT_XOR(cast(b2 as smallint))  AS b5,
+ BIT_XOR(b3)  AS c2,
+ BIT_XOR(b4)  AS d2,
+ BIT_XOR(distinct b4) AS e2
+FROM bitwise_test
+-- !query schema
+struct<a4:tinyint,b5:smallint,c2:int,d2:bigint,e2:bigint>
+-- !query output
+4	5	2	2	2
+
+
+-- !query
+SELECT bit_xor(b3) FROM bitwise_test GROUP BY b1 & 1
+-- !query schema
+struct<bit_xor(b3):int>
+-- !query output
+4
+6
+
+
+-- !query
+SELECT b1, bit_xor(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7
+-- !query schema
+struct<b1:int,bit_xor(b2):int>
+-- !query output
+1	1
+2	3
+
+
+-- !query
+SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test
+-- !query schema
+struct<b1:int,b2:int,bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
+-- !query output
+1	1	1
+2	3	3
+7	7	7
diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
index adad21f049440..35b4c0e79720b 100644
--- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
@@ -1,270 +1,270 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 35
+-- Number of queries: 46
 
 
--- !query 0
+-- !query
 SELECT CAST('1.23' AS int)
--- !query 0 schema
+-- !query schema
 struct<CAST(1.23 AS INT):int>
--- !query 0 output
+-- !query output
 1
 
 
--- !query 1
+-- !query
 SELECT CAST('1.23' AS long)
--- !query 1 schema
+-- !query schema
 struct<CAST(1.23 AS BIGINT):bigint>
--- !query 1 output
+-- !query output
 1
 
 
--- !query 2
+-- !query
 SELECT CAST('-4.56' AS int)
--- !query 2 schema
+-- !query schema
 struct<CAST(-4.56 AS INT):int>
--- !query 2 output
+-- !query output
 -4
 
 
--- !query 3
+-- !query
 SELECT CAST('-4.56' AS long)
--- !query 3 schema
+-- !query schema
 struct<CAST(-4.56 AS BIGINT):bigint>
--- !query 3 output
+-- !query output
 -4
 
 
--- !query 4
+-- !query
 SELECT CAST('abc' AS int)
--- !query 4 schema
+-- !query schema
 struct<CAST(abc AS INT):int>
--- !query 4 output
+-- !query output
 NULL
 
 
--- !query 5
+-- !query
 SELECT CAST('abc' AS long)
--- !query 5 schema
+-- !query schema
 struct<CAST(abc AS BIGINT):bigint>
--- !query 5 output
+-- !query output
 NULL
 
 
--- !query 6
+-- !query
 SELECT CAST('1234567890123' AS int)
--- !query 6 schema
+-- !query schema
 struct<CAST(1234567890123 AS INT):int>
--- !query 6 output
+-- !query output
 NULL
 
 
--- !query 7
+-- !query
 SELECT CAST('12345678901234567890123' AS long)
--- !query 7 schema
+-- !query schema
 struct<CAST(12345678901234567890123 AS BIGINT):bigint>
--- !query 7 output
+-- !query output
 NULL
 
 
--- !query 8
+-- !query
 SELECT CAST('' AS int)
--- !query 8 schema
+-- !query schema
 struct<CAST( AS INT):int>
--- !query 8 output
+-- !query output
 NULL
 
 
--- !query 9
+-- !query
 SELECT CAST('' AS long)
--- !query 9 schema
+-- !query schema
 struct<CAST( AS BIGINT):bigint>
--- !query 9 output
+-- !query output
 NULL
 
 
--- !query 10
+-- !query
 SELECT CAST(NULL AS int)
--- !query 10 schema
+-- !query schema
 struct<CAST(NULL AS INT):int>
--- !query 10 output
+-- !query output
 NULL
 
 
--- !query 11
+-- !query
 SELECT CAST(NULL AS long)
--- !query 11 schema
+-- !query schema
 struct<CAST(NULL AS BIGINT):bigint>
--- !query 11 output
+-- !query output
 NULL
 
 
--- !query 12
+-- !query
 SELECT CAST('123.a' AS int)
--- !query 12 schema
+-- !query schema
 struct<CAST(123.a AS INT):int>
--- !query 12 output
+-- !query output
 NULL
 
 
--- !query 13
+-- !query
 SELECT CAST('123.a' AS long)
--- !query 13 schema
+-- !query schema
 struct<CAST(123.a AS BIGINT):bigint>
--- !query 13 output
+-- !query output
 NULL
 
 
--- !query 14
+-- !query
 SELECT CAST('-2147483648' AS int)
--- !query 14 schema
+-- !query schema
 struct<CAST(-2147483648 AS INT):int>
--- !query 14 output
+-- !query output
 -2147483648
 
 
--- !query 15
+-- !query
 SELECT CAST('-2147483649' AS int)
--- !query 15 schema
+-- !query schema
 struct<CAST(-2147483649 AS INT):int>
--- !query 15 output
+-- !query output
 NULL
 
 
--- !query 16
+-- !query
 SELECT CAST('2147483647' AS int)
--- !query 16 schema
+-- !query schema
 struct<CAST(2147483647 AS INT):int>
--- !query 16 output
+-- !query output
 2147483647
 
 
--- !query 17
+-- !query
 SELECT CAST('2147483648' AS int)
--- !query 17 schema
+-- !query schema
 struct<CAST(2147483648 AS INT):int>
--- !query 17 output
+-- !query output
 NULL
 
 
--- !query 18
+-- !query
 SELECT CAST('-9223372036854775808' AS long)
--- !query 18 schema
+-- !query schema
 struct<CAST(-9223372036854775808 AS BIGINT):bigint>
--- !query 18 output
+-- !query output
 -9223372036854775808
 
 
--- !query 19
+-- !query
 SELECT CAST('-9223372036854775809' AS long)
--- !query 19 schema
+-- !query schema
 struct<CAST(-9223372036854775809 AS BIGINT):bigint>
--- !query 19 output
+-- !query output
 NULL
 
 
--- !query 20
+-- !query
 SELECT CAST('9223372036854775807' AS long)
--- !query 20 schema
+-- !query schema
 struct<CAST(9223372036854775807 AS BIGINT):bigint>
--- !query 20 output
+-- !query output
 9223372036854775807
 
 
--- !query 21
+-- !query
 SELECT CAST('9223372036854775808' AS long)
--- !query 21 schema
+-- !query schema
 struct<CAST(9223372036854775808 AS BIGINT):bigint>
--- !query 21 output
+-- !query output
 NULL
 
 
--- !query 22
+-- !query
 SELECT HEX(CAST('abc' AS binary))
--- !query 22 schema
+-- !query schema
 struct<hex(CAST(abc AS BINARY)):string>
--- !query 22 output
+-- !query output
 616263
 
 
--- !query 23
+-- !query
 SELECT HEX(CAST(CAST(123 AS byte) AS binary))
--- !query 23 schema
+-- !query schema
 struct<hex(CAST(CAST(123 AS TINYINT) AS BINARY)):string>
--- !query 23 output
+-- !query output
 7B
 
 
--- !query 24
+-- !query
 SELECT HEX(CAST(CAST(-123 AS byte) AS binary))
--- !query 24 schema
+-- !query schema
 struct<hex(CAST(CAST(-123 AS TINYINT) AS BINARY)):string>
--- !query 24 output
+-- !query output
 85
 
 
--- !query 25
+-- !query
 SELECT HEX(CAST(123S AS binary))
--- !query 25 schema
+-- !query schema
 struct<hex(CAST(123 AS BINARY)):string>
--- !query 25 output
+-- !query output
 007B
 
 
--- !query 26
+-- !query
 SELECT HEX(CAST(-123S AS binary))
--- !query 26 schema
+-- !query schema
 struct<hex(CAST(-123 AS BINARY)):string>
--- !query 26 output
+-- !query output
 FF85
 
 
--- !query 27
+-- !query
 SELECT HEX(CAST(123 AS binary))
--- !query 27 schema
+-- !query schema
 struct<hex(CAST(123 AS BINARY)):string>
--- !query 27 output
+-- !query output
 0000007B
 
 
--- !query 28
+-- !query
 SELECT HEX(CAST(-123 AS binary))
--- !query 28 schema
+-- !query schema
 struct<hex(CAST(-123 AS BINARY)):string>
--- !query 28 output
+-- !query output
 FFFFFF85
 
 
--- !query 29
+-- !query
 SELECT HEX(CAST(123L AS binary))
--- !query 29 schema
+-- !query schema
 struct<hex(CAST(123 AS BINARY)):string>
--- !query 29 output
+-- !query output
 000000000000007B
 
 
--- !query 30
+-- !query
 SELECT HEX(CAST(-123L AS binary))
--- !query 30 schema
+-- !query schema
 struct<hex(CAST(-123 AS BINARY)):string>
--- !query 30 output
+-- !query output
 FFFFFFFFFFFFFF85
 
 
--- !query 31
+-- !query
 DESC FUNCTION boolean
--- !query 31 schema
+-- !query schema
 struct<function_desc:string>
--- !query 31 output
+-- !query output
 Class: org.apache.spark.sql.catalyst.expressions.Cast
 Function: boolean
 Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.
 
 
--- !query 32
+-- !query
 DESC FUNCTION EXTENDED boolean
--- !query 32 schema
+-- !query schema
 struct<function_desc:string>
--- !query 32 output
+-- !query output
 Class: org.apache.spark.sql.catalyst.expressions.Cast
 Extended Usage:
     No example/argument for boolean.
@@ -273,17 +273,108 @@ Function: boolean
 Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.
 
 
--- !query 33
+-- !query
 SELECT CAST('interval 3 month 1 hour' AS interval)
--- !query 33 schema
+-- !query schema
 struct<CAST(interval 3 month 1 hour AS INTERVAL):interval>
--- !query 33 output
-interval 3 months 1 hours
+-- !query output
+3 months 1 hours
 
 
--- !query 34
+-- !query
 SELECT CAST(interval 3 month 1 hour AS string)
--- !query 34 schema
-struct<CAST(interval 3 months 1 hours AS STRING):string>
--- !query 34 output
-interval 3 months 1 hours
+-- !query schema
+struct<CAST(INTERVAL '3 months 1 hours' AS STRING):string>
+-- !query output
+3 months 1 hours
+
+
+-- !query
+select cast(' 1' as tinyint)
+-- !query schema
+struct<CAST( 1 AS TINYINT):tinyint>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1\t' as tinyint)
+-- !query schema
+struct<CAST( 1	 AS TINYINT):tinyint>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1' as smallint)
+-- !query schema
+struct<CAST( 1 AS SMALLINT):smallint>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1' as INT)
+-- !query schema
+struct<CAST( 1 AS INT):int>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1' as bigint)
+-- !query schema
+struct<CAST( 1 AS BIGINT):bigint>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1' as float)
+-- !query schema
+struct<CAST( 1 AS FLOAT):float>
+-- !query output
+1.0
+
+
+-- !query
+select cast(' 1 ' as DOUBLE)
+-- !query schema
+struct<CAST( 1  AS DOUBLE):double>
+-- !query output
+1.0
+
+
+-- !query
+select cast('1.0 ' as DEC)
+-- !query schema
+struct<CAST(1.0  AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+1
+
+
+-- !query
+select cast('\t\t true \n\r ' as boolean)
+-- !query schema
+struct<CAST(		 true 
+  AS BOOLEAN):boolean>
+-- !query output
+true
+
+
+-- !query
+select cast('\t\n false \t\r' as boolean)
+-- !query schema
+struct<CAST(	
+ false 	 AS BOOLEAN):boolean>
+-- !query output
+false
+
+
+-- !query
+select cast('\t\n xyz \t\r' as boolean)
+-- !query schema
+struct<CAST(	
+ xyz 	 AS BOOLEAN):boolean>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
index 114617873af47..b1a32ad1f63e9 100644
--- a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
@@ -1,323 +1,257 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 34
+-- Number of queries: 28
 
 
--- !query 0
+-- !query
 CREATE TABLE test_change(a INT, b STRING, c INT) using parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 DESC test_change
--- !query 1 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 1 output
+-- !query output
 a                   	int                 	                    
 b                   	string              	                    
 c                   	int
 
 
--- !query 2
-ALTER TABLE test_change CHANGE a a1 INT
--- !query 2 schema
+-- !query
+ALTER TABLE test_change CHANGE a
+-- !query schema
 struct<>
--- !query 2 output
-org.apache.spark.sql.AnalysisException
-ALTER TABLE CHANGE COLUMN is not supported for changing column 'a' with type 'IntegerType' to 'a1' with type 'IntegerType';
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Operation not allowed: ALTER TABLE table CHANGE COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER(line 1, pos 0)
 
+== SQL ==
+ALTER TABLE test_change CHANGE a
+^^^
 
--- !query 3
+
+-- !query
 DESC test_change
--- !query 3 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 3 output
+-- !query output
 a                   	int                 	                    
 b                   	string              	                    
 c                   	int
 
 
--- !query 4
-ALTER TABLE test_change CHANGE a a STRING
--- !query 4 schema
+-- !query
+ALTER TABLE test_change RENAME COLUMN a TO a1
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-ALTER TABLE CHANGE COLUMN is not supported for changing column 'a' with type 'IntegerType' to 'a' with type 'StringType';
+RENAME COLUMN is only supported with v2 tables.;
 
 
--- !query 5
+-- !query
 DESC test_change
--- !query 5 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 5 output
+-- !query output
 a                   	int                 	                    
 b                   	string              	                    
 c                   	int
 
 
--- !query 6
-ALTER TABLE test_change CHANGE a a INT AFTER b
--- !query 6 schema
+-- !query
+ALTER TABLE test_change CHANGE a TYPE STRING
+-- !query schema
 struct<>
--- !query 6 output
-org.apache.spark.sql.catalyst.parser.ParseException
+-- !query output
+org.apache.spark.sql.AnalysisException
+ALTER TABLE CHANGE COLUMN is not supported for changing column 'a' with type 'IntegerType' to 'a' with type 'StringType';
 
-Operation not allowed: ALTER TABLE table [PARTITION partition_spec] CHANGE COLUMN ... FIRST | AFTER otherCol(line 1, pos 0)
 
-== SQL ==
-ALTER TABLE test_change CHANGE a a INT AFTER b
-^^^
+-- !query
+DESC test_change
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	string              	                    
+c                   	int
 
 
--- !query 7
-ALTER TABLE test_change CHANGE b b STRING FIRST
--- !query 7 schema
+-- !query
+ALTER TABLE test_change CHANGE a AFTER b
+-- !query schema
 struct<>
--- !query 7 output
-org.apache.spark.sql.catalyst.parser.ParseException
+-- !query output
+org.apache.spark.sql.AnalysisException
+ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.;
 
-Operation not allowed: ALTER TABLE table [PARTITION partition_spec] CHANGE COLUMN ... FIRST | AFTER otherCol(line 1, pos 0)
 
-== SQL ==
-ALTER TABLE test_change CHANGE b b STRING FIRST
-^^^
+-- !query
+ALTER TABLE test_change CHANGE b FIRST
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.;
 
 
--- !query 8
+-- !query
 DESC test_change
--- !query 8 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 8 output
+-- !query output
 a                   	int                 	                    
 b                   	string              	                    
 c                   	int
 
 
--- !query 9
-ALTER TABLE test_change CHANGE a a INT COMMENT 'this is column a'
--- !query 9 schema
+-- !query
+ALTER TABLE test_change CHANGE a COMMENT 'this is column a'
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
-ALTER TABLE test_change CHANGE b b STRING COMMENT '#*02?`'
--- !query 10 schema
+-- !query
+ALTER TABLE test_change CHANGE b COMMENT '#*02?`'
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
-ALTER TABLE test_change CHANGE c c INT COMMENT ''
--- !query 11 schema
+-- !query
+ALTER TABLE test_change CHANGE c COMMENT ''
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 DESC test_change
--- !query 12 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 12 output
+-- !query output
 a                   	int                 	this is column a    
 b                   	string              	#*02?`              
 c                   	int
 
 
--- !query 13
-ALTER TABLE test_change CHANGE a a INT COMMENT 'this is column a'
--- !query 13 schema
+-- !query
+ALTER TABLE test_change CHANGE a TYPE INT
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
-DESC test_change
--- !query 14 schema
-struct<col_name:string,data_type:string,comment:string>
--- !query 14 output
-a                   	int                 	this is column a    
-b                   	string              	#*02?`              
-c                   	int
-
-
--- !query 15
-ALTER TABLE test_change CHANGE invalid_col invalid_col INT
--- !query 15 schema
+-- !query
+ALTER TABLE test_change CHANGE a COMMENT 'this is column a'
+-- !query schema
 struct<>
--- !query 15 output
-org.apache.spark.sql.AnalysisException
-Can't find column `invalid_col` given table data columns [`a`, `b`, `c`];
+-- !query output
 
 
--- !query 16
+
+-- !query
 DESC test_change
--- !query 16 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 16 output
+-- !query output
 a                   	int                 	this is column a    
 b                   	string              	#*02?`              
 c                   	int
 
 
--- !query 17
-ALTER TABLE test_change CHANGE a a1 STRING COMMENT 'this is column a1' AFTER b
--- !query 17 schema
+-- !query
+ALTER TABLE test_change CHANGE invalid_col TYPE INT
+-- !query schema
 struct<>
--- !query 17 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Operation not allowed: ALTER TABLE table [PARTITION partition_spec] CHANGE COLUMN ... FIRST | AFTER otherCol(line 1, pos 0)
-
-== SQL ==
-ALTER TABLE test_change CHANGE a a1 STRING COMMENT 'this is column a1' AFTER b
-^^^
+-- !query output
+org.apache.spark.sql.AnalysisException
+Can't find column `invalid_col` given table data columns [`a`, `b`, `c`];
 
 
--- !query 18
+-- !query
 DESC test_change
--- !query 18 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 18 output
+-- !query output
 a                   	int                 	this is column a    
 b                   	string              	#*02?`              
 c                   	int
 
 
--- !query 19
-SET spark.sql.caseSensitive=false
--- !query 19 schema
-struct<key:string,value:string>
--- !query 19 output
-spark.sql.caseSensitive	false
-
-
--- !query 20
-ALTER TABLE test_change CHANGE a A INT COMMENT 'this is column A'
--- !query 20 schema
+-- !query
+ALTER TABLE test_change CHANGE A COMMENT 'case insensitivity'
+-- !query schema
 struct<>
--- !query 20 output
-
+-- !query output
 
 
--- !query 21
-SET spark.sql.caseSensitive=true
--- !query 21 schema
-struct<key:string,value:string>
--- !query 21 output
-spark.sql.caseSensitive	true
 
-
--- !query 22
-ALTER TABLE test_change CHANGE a A INT COMMENT 'this is column A1'
--- !query 22 schema
-struct<>
--- !query 22 output
-org.apache.spark.sql.AnalysisException
-ALTER TABLE CHANGE COLUMN is not supported for changing column 'a' with type 'IntegerType' to 'A' with type 'IntegerType';
-
-
--- !query 23
+-- !query
 DESC test_change
--- !query 23 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 23 output
-a                   	int                 	this is column A    
+-- !query output
+a                   	int                 	case insensitivity  
 b                   	string              	#*02?`              
 c                   	int
 
 
--- !query 24
+-- !query
 CREATE TEMPORARY VIEW temp_view(a, b) AS SELECT 1, "one"
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 
 
 
--- !query 25
-ALTER TABLE temp_view CHANGE a a INT COMMENT 'this is column a'
--- !query 25 schema
+-- !query
+ALTER TABLE temp_view CHANGE a TYPE INT
+-- !query schema
 struct<>
--- !query 25 output
-org.apache.spark.sql.catalyst.analysis.NoSuchTableException
-Table or view 'temp_view' not found in database 'default';
+-- !query output
+org.apache.spark.sql.AnalysisException
+Invalid command: 'temp_view' is a view not a table.; line 1 pos 0
 
 
--- !query 26
+-- !query
 CREATE GLOBAL TEMPORARY VIEW global_temp_view(a, b) AS SELECT 1, "one"
--- !query 26 schema
-struct<>
--- !query 26 output
-
-
-
--- !query 27
-ALTER TABLE global_temp.global_temp_view CHANGE a a INT COMMENT 'this is column a'
--- !query 27 schema
-struct<>
--- !query 27 output
-org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
-Database 'global_temp' not found;
-
-
--- !query 28
-CREATE TABLE partition_table(a INT, b STRING, c INT, d STRING) USING parquet PARTITIONED BY (c, d)
--- !query 28 schema
+-- !query schema
 struct<>
--- !query 28 output
+-- !query output
 
 
 
--- !query 29
-ALTER TABLE partition_table PARTITION (c = 1) CHANGE COLUMN a new_a INT
--- !query 29 schema
+-- !query
+ALTER TABLE global_temp.global_temp_view CHANGE a TYPE INT
+-- !query schema
 struct<>
--- !query 29 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Operation not allowed: ALTER TABLE table PARTITION partition_spec CHANGE COLUMN(line 1, pos 0)
-
-== SQL ==
-ALTER TABLE partition_table PARTITION (c = 1) CHANGE COLUMN a new_a INT
-^^^
-
-
--- !query 30
-ALTER TABLE partition_table CHANGE COLUMN c c INT COMMENT 'this is column C'
--- !query 30 schema
-struct<>
--- !query 30 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-Can't find column `c` given table data columns [`a`, `b`];
+Invalid command: 'global_temp.global_temp_view' is a view not a table.; line 1 pos 0
 
 
--- !query 31
+-- !query
 DROP TABLE test_change
--- !query 31 schema
-struct<>
--- !query 31 output
-
-
-
--- !query 32
-DROP TABLE partition_table
--- !query 32 schema
+-- !query schema
 struct<>
--- !query 32 output
+-- !query output
 
 
 
--- !query 33
+-- !query
 DROP VIEW global_temp.global_temp_view
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
index 9fc97f0c39149..f34b75a379aae 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
@@ -2,239 +2,239 @@
 -- Number of queries: 28
 
 
--- !query 0
+-- !query
 CREATE DATABASE mydb1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 USE mydb1
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TABLE t1 USING parquet AS SELECT 1 AS i1
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE DATABASE mydb2
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 USE mydb2
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 CREATE TABLE t1 USING parquet AS SELECT 20 AS i1
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SET spark.sql.crossJoin.enabled = true
--- !query 6 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 6 output
+-- !query output
 spark.sql.crossJoin.enabled	true
 
 
--- !query 7
+-- !query
 USE mydb1
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 SELECT i1 FROM t1, mydb1.t1
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'i1' is ambiguous, could be: mydb1.t1.i1, mydb1.t1.i1.; line 1 pos 7
 
 
--- !query 9
+-- !query
 SELECT t1.i1 FROM t1, mydb1.t1
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 't1.i1' is ambiguous, could be: mydb1.t1.i1, mydb1.t1.i1.; line 1 pos 7
 
 
--- !query 10
+-- !query
 SELECT mydb1.t1.i1 FROM t1, mydb1.t1
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'mydb1.t1.i1' is ambiguous, could be: mydb1.t1.i1, mydb1.t1.i1.; line 1 pos 7
 
 
--- !query 11
+-- !query
 SELECT i1 FROM t1, mydb2.t1
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'i1' is ambiguous, could be: mydb1.t1.i1, mydb2.t1.i1.; line 1 pos 7
 
 
--- !query 12
+-- !query
 SELECT t1.i1 FROM t1, mydb2.t1
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 't1.i1' is ambiguous, could be: mydb1.t1.i1, mydb2.t1.i1.; line 1 pos 7
 
 
--- !query 13
+-- !query
 USE mydb2
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
+-- !query
 SELECT i1 FROM t1, mydb1.t1
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'i1' is ambiguous, could be: mydb2.t1.i1, mydb1.t1.i1.; line 1 pos 7
 
 
--- !query 15
+-- !query
 SELECT t1.i1 FROM t1, mydb1.t1
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 't1.i1' is ambiguous, could be: mydb2.t1.i1, mydb1.t1.i1.; line 1 pos 7
 
 
--- !query 16
+-- !query
 SELECT i1 FROM t1, mydb2.t1
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'i1' is ambiguous, could be: mydb2.t1.i1, mydb2.t1.i1.; line 1 pos 7
 
 
--- !query 17
+-- !query
 SELECT t1.i1 FROM t1, mydb2.t1
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 't1.i1' is ambiguous, could be: mydb2.t1.i1, mydb2.t1.i1.; line 1 pos 7
 
 
--- !query 18
+-- !query
 SELECT db1.t1.i1 FROM t1, mydb2.t1
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`db1.t1.i1`' given input columns: [mydb2.t1.i1, mydb2.t1.i1]; line 1 pos 7
 
 
--- !query 19
+-- !query
 SET spark.sql.crossJoin.enabled = false
--- !query 19 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 19 output
+-- !query output
 spark.sql.crossJoin.enabled	false
 
 
--- !query 20
+-- !query
 USE mydb1
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 SELECT mydb1.t1 FROM t1
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`mydb1.t1`' given input columns: [mydb1.t1.i1]; line 1 pos 7
 
 
--- !query 22
+-- !query
 SELECT t1.x.y.* FROM t1
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 't1.x.y.*' given input columns 'i1';
 
 
--- !query 23
+-- !query
 SELECT t1 FROM mydb1.t1
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`t1`' given input columns: [mydb1.t1.i1]; line 1 pos 7
 
 
--- !query 24
+-- !query
 USE mydb2
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 
 
 
--- !query 25
+-- !query
 SELECT mydb1.t1.i1 FROM t1
--- !query 25 schema
+-- !query schema
 struct<>
--- !query 25 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`mydb1.t1.i1`' given input columns: [mydb2.t1.i1]; line 1 pos 7
 
 
--- !query 26
+-- !query
 DROP DATABASE mydb1 CASCADE
--- !query 26 schema
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 
 
 
--- !query 27
+-- !query
 DROP DATABASE mydb2 CASCADE
--- !query 27 schema
+-- !query schema
 struct<>
--- !query 27 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out
index 3d8fb661afe55..16ff4f51bd5f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out
@@ -2,137 +2,137 @@
 -- Number of queries: 17
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW view1 AS SELECT 2 AS i1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT view1.* FROM view1
--- !query 1 schema
+-- !query schema
 struct<i1:int>
--- !query 1 output
+-- !query output
 2
 
 
--- !query 2
+-- !query
 SELECT * FROM view1
--- !query 2 schema
+-- !query schema
 struct<i1:int>
--- !query 2 output
+-- !query output
 2
 
 
--- !query 3
+-- !query
 SELECT view1.i1 FROM view1
--- !query 3 schema
+-- !query schema
 struct<i1:int>
--- !query 3 output
+-- !query output
 2
 
 
--- !query 4
+-- !query
 SELECT i1 FROM view1
--- !query 4 schema
+-- !query schema
 struct<i1:int>
--- !query 4 output
+-- !query output
 2
 
 
--- !query 5
+-- !query
 SELECT a.i1 FROM view1 AS a
--- !query 5 schema
+-- !query schema
 struct<i1:int>
--- !query 5 output
+-- !query output
 2
 
 
--- !query 6
+-- !query
 SELECT i1 FROM view1 AS a
--- !query 6 schema
+-- !query schema
 struct<i1:int>
--- !query 6 output
+-- !query output
 2
 
 
--- !query 7
+-- !query
 DROP VIEW view1
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 CREATE OR REPLACE GLOBAL TEMPORARY VIEW view1 as SELECT 1 as i1
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 SELECT * FROM global_temp.view1
--- !query 9 schema
+-- !query schema
 struct<i1:int>
--- !query 9 output
+-- !query output
 1
 
 
--- !query 10
+-- !query
 SELECT global_temp.view1.* FROM global_temp.view1
--- !query 10 schema
+-- !query schema
 struct<i1:int>
--- !query 10 output
+-- !query output
 1
 
 
--- !query 11
+-- !query
 SELECT i1 FROM global_temp.view1
--- !query 11 schema
+-- !query schema
 struct<i1:int>
--- !query 11 output
+-- !query output
 1
 
 
--- !query 12
+-- !query
 SELECT global_temp.view1.i1 FROM global_temp.view1
--- !query 12 schema
+-- !query schema
 struct<i1:int>
--- !query 12 output
+-- !query output
 1
 
 
--- !query 13
+-- !query
 SELECT view1.i1 FROM global_temp.view1
--- !query 13 schema
+-- !query schema
 struct<i1:int>
--- !query 13 output
+-- !query output
 1
 
 
--- !query 14
+-- !query
 SELECT a.i1 FROM global_temp.view1 AS a
--- !query 14 schema
+-- !query schema
 struct<i1:int>
--- !query 14 output
+-- !query output
 1
 
 
--- !query 15
+-- !query
 SELECT i1 FROM global_temp.view1 AS a
--- !query 15 schema
+-- !query schema
 struct<i1:int>
--- !query 15 output
+-- !query output
 1
 
 
--- !query 16
+-- !query
 DROP VIEW global_temp.view1
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out
index 73e3fdc08232c..dcfd48b687b17 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out
@@ -2,442 +2,442 @@
 -- Number of queries: 55
 
 
--- !query 0
+-- !query
 CREATE DATABASE mydb1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 USE mydb1
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TABLE t1 USING parquet AS SELECT 1 AS i1
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE DATABASE mydb2
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 USE mydb2
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 CREATE TABLE t1 USING parquet AS SELECT 20 AS i1
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 USE mydb1
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 SELECT i1 FROM t1
--- !query 7 schema
+-- !query schema
 struct<i1:int>
--- !query 7 output
+-- !query output
 1
 
 
--- !query 8
+-- !query
 SELECT i1 FROM mydb1.t1
--- !query 8 schema
+-- !query schema
 struct<i1:int>
--- !query 8 output
+-- !query output
 1
 
 
--- !query 9
+-- !query
 SELECT t1.i1 FROM t1
--- !query 9 schema
+-- !query schema
 struct<i1:int>
--- !query 9 output
+-- !query output
 1
 
 
--- !query 10
+-- !query
 SELECT t1.i1 FROM mydb1.t1
--- !query 10 schema
+-- !query schema
 struct<i1:int>
--- !query 10 output
+-- !query output
 1
 
 
--- !query 11
+-- !query
 SELECT mydb1.t1.i1 FROM t1
--- !query 11 schema
+-- !query schema
 struct<i1:int>
--- !query 11 output
+-- !query output
 1
 
 
--- !query 12
+-- !query
 SELECT mydb1.t1.i1 FROM mydb1.t1
--- !query 12 schema
+-- !query schema
 struct<i1:int>
--- !query 12 output
+-- !query output
 1
 
 
--- !query 13
+-- !query
 USE mydb2
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
+-- !query
 SELECT i1 FROM t1
--- !query 14 schema
+-- !query schema
 struct<i1:int>
--- !query 14 output
+-- !query output
 20
 
 
--- !query 15
+-- !query
 SELECT i1 FROM mydb1.t1
--- !query 15 schema
+-- !query schema
 struct<i1:int>
--- !query 15 output
+-- !query output
 1
 
 
--- !query 16
+-- !query
 SELECT t1.i1 FROM t1
--- !query 16 schema
+-- !query schema
 struct<i1:int>
--- !query 16 output
+-- !query output
 20
 
 
--- !query 17
+-- !query
 SELECT t1.i1 FROM mydb1.t1
--- !query 17 schema
+-- !query schema
 struct<i1:int>
--- !query 17 output
+-- !query output
 1
 
 
--- !query 18
+-- !query
 SELECT mydb1.t1.i1 FROM mydb1.t1
--- !query 18 schema
+-- !query schema
 struct<i1:int>
--- !query 18 output
+-- !query output
 1
 
 
--- !query 19
+-- !query
 USE mydb1
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 SELECT t1.* FROM t1
--- !query 20 schema
+-- !query schema
 struct<i1:int>
--- !query 20 output
+-- !query output
 1
 
 
--- !query 21
+-- !query
 SELECT mydb1.t1.* FROM mydb1.t1
--- !query 21 schema
+-- !query schema
 struct<i1:int>
--- !query 21 output
+-- !query output
 1
 
 
--- !query 22
+-- !query
 SELECT t1.* FROM mydb1.t1
--- !query 22 schema
+-- !query schema
 struct<i1:int>
--- !query 22 output
+-- !query output
 1
 
 
--- !query 23
+-- !query
 USE mydb2
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 
 
 
--- !query 24
+-- !query
 SELECT t1.* FROM t1
--- !query 24 schema
+-- !query schema
 struct<i1:int>
--- !query 24 output
+-- !query output
 20
 
 
--- !query 25
+-- !query
 SELECT mydb1.t1.* FROM mydb1.t1
--- !query 25 schema
+-- !query schema
 struct<i1:int>
--- !query 25 output
+-- !query output
 1
 
 
--- !query 26
+-- !query
 SELECT t1.* FROM mydb1.t1
--- !query 26 schema
+-- !query schema
 struct<i1:int>
--- !query 26 output
+-- !query output
 1
 
 
--- !query 27
+-- !query
 SELECT a.* FROM mydb1.t1 AS a
--- !query 27 schema
+-- !query schema
 struct<i1:int>
--- !query 27 output
+-- !query output
 1
 
 
--- !query 28
+-- !query
 USE mydb1
--- !query 28 schema
+-- !query schema
 struct<>
--- !query 28 output
+-- !query output
 
 
 
--- !query 29
+-- !query
 CREATE TABLE t3 USING parquet AS SELECT * FROM VALUES (4,1), (3,1) AS t3(c1, c2)
--- !query 29 schema
+-- !query schema
 struct<>
--- !query 29 output
+-- !query output
 
 
 
--- !query 30
+-- !query
 CREATE TABLE t4 USING parquet AS SELECT * FROM VALUES (4,1), (2,1) AS t4(c2, c3)
--- !query 30 schema
+-- !query schema
 struct<>
--- !query 30 output
+-- !query output
 
 
 
--- !query 31
+-- !query
 SELECT * FROM t3 WHERE c1 IN (SELECT c2 FROM t4 WHERE t4.c3 = t3.c2)
--- !query 31 schema
+-- !query schema
 struct<c1:int,c2:int>
--- !query 31 output
+-- !query output
 4	1
 
 
--- !query 32
+-- !query
 SELECT * FROM mydb1.t3 WHERE c1 IN
   (SELECT mydb1.t4.c2 FROM mydb1.t4 WHERE mydb1.t4.c3 = mydb1.t3.c2)
--- !query 32 schema
+-- !query schema
 struct<c1:int,c2:int>
--- !query 32 output
+-- !query output
 4	1
 
 
--- !query 33
+-- !query
 SET spark.sql.crossJoin.enabled = true
--- !query 33 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 33 output
+-- !query output
 spark.sql.crossJoin.enabled	true
 
 
--- !query 34
+-- !query
 SELECT mydb1.t1.i1 FROM t1, mydb2.t1
--- !query 34 schema
+-- !query schema
 struct<i1:int>
--- !query 34 output
+-- !query output
 1
 
 
--- !query 35
+-- !query
 SELECT mydb1.t1.i1 FROM mydb1.t1, mydb2.t1
--- !query 35 schema
+-- !query schema
 struct<i1:int>
--- !query 35 output
+-- !query output
 1
 
 
--- !query 36
+-- !query
 USE mydb2
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 
 
 
--- !query 37
+-- !query
 SELECT mydb1.t1.i1 FROM t1, mydb1.t1
--- !query 37 schema
+-- !query schema
 struct<i1:int>
--- !query 37 output
+-- !query output
 1
 
 
--- !query 38
+-- !query
 SET spark.sql.crossJoin.enabled = false
--- !query 38 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 38 output
+-- !query output
 spark.sql.crossJoin.enabled	false
 
 
--- !query 39
+-- !query
 USE mydb1
--- !query 39 schema
+-- !query schema
 struct<>
--- !query 39 output
+-- !query output
 
 
 
--- !query 40
+-- !query
 CREATE TABLE t5(i1 INT, t5 STRUCT<i1:INT, i2:INT>) USING parquet
--- !query 40 schema
+-- !query schema
 struct<>
--- !query 40 output
+-- !query output
 
 
 
--- !query 41
+-- !query
 INSERT INTO t5 VALUES(1, (2, 3))
--- !query 41 schema
+-- !query schema
 struct<>
--- !query 41 output
+-- !query output
 
 
 
--- !query 42
+-- !query
 SELECT t5.i1 FROM t5
--- !query 42 schema
+-- !query schema
 struct<i1:int>
--- !query 42 output
+-- !query output
 1
 
 
--- !query 43
+-- !query
 SELECT t5.t5.i1 FROM t5
--- !query 43 schema
+-- !query schema
 struct<i1:int>
--- !query 43 output
+-- !query output
 2
 
 
--- !query 44
+-- !query
 SELECT t5.t5.i1 FROM mydb1.t5
--- !query 44 schema
+-- !query schema
 struct<i1:int>
--- !query 44 output
+-- !query output
 2
 
 
--- !query 45
+-- !query
 SELECT t5.i1 FROM mydb1.t5
--- !query 45 schema
+-- !query schema
 struct<i1:int>
--- !query 45 output
+-- !query output
 1
 
 
--- !query 46
+-- !query
 SELECT t5.* FROM mydb1.t5
--- !query 46 schema
+-- !query schema
 struct<i1:int,t5:struct<i1:int,i2:int>>
--- !query 46 output
+-- !query output
 1	{"i1":2,"i2":3}
 
 
--- !query 47
+-- !query
 SELECT t5.t5.* FROM mydb1.t5
--- !query 47 schema
+-- !query schema
 struct<i1:int,i2:int>
--- !query 47 output
+-- !query output
 2	3
 
 
--- !query 48
+-- !query
 SELECT mydb1.t5.t5.i1 FROM mydb1.t5
--- !query 48 schema
+-- !query schema
 struct<i1:int>
--- !query 48 output
+-- !query output
 2
 
 
--- !query 49
+-- !query
 SELECT mydb1.t5.t5.i2 FROM mydb1.t5
--- !query 49 schema
+-- !query schema
 struct<i2:int>
--- !query 49 output
+-- !query output
 3
 
 
--- !query 50
+-- !query
 SELECT mydb1.t5.* FROM mydb1.t5
--- !query 50 schema
+-- !query schema
 struct<i1:int,t5:struct<i1:int,i2:int>>
--- !query 50 output
+-- !query output
 1	{"i1":2,"i2":3}
 
 
--- !query 51
+-- !query
 SELECT mydb1.t5.* FROM t5
--- !query 51 schema
+-- !query schema
 struct<i1:int,t5:struct<i1:int,i2:int>>
--- !query 51 output
+-- !query output
 1	{"i1":2,"i2":3}
 
 
--- !query 52
+-- !query
 USE default
--- !query 52 schema
+-- !query schema
 struct<>
--- !query 52 output
+-- !query output
 
 
 
--- !query 53
+-- !query
 DROP DATABASE mydb1 CASCADE
--- !query 53 schema
+-- !query schema
 struct<>
--- !query 53 output
+-- !query output
 
 
 
--- !query 54
+-- !query
 DROP DATABASE mydb2 CASCADE
--- !query 54 schema
+-- !query schema
 struct<>
--- !query 54 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/comparator.sql.out b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
index afc7b5448b7b6..721b56cc1da2f 100644
--- a/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
@@ -1,18 +1,82 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 2
+-- Number of queries: 10
 
 
--- !query 0
+-- !query
 select x'00' < x'0f'
--- !query 0 schema
+-- !query schema
 struct<(X'00' < X'0F'):boolean>
--- !query 0 output
+-- !query output
 true
 
 
--- !query 1
+-- !query
 select x'00' < x'ff'
--- !query 1 schema
+-- !query schema
 struct<(X'00' < X'FF'):boolean>
--- !query 1 output
+-- !query output
+true
+
+
+-- !query
+select '1 ' = 1Y
+-- !query schema
+struct<(CAST(1  AS TINYINT) = 1):boolean>
+-- !query output
+true
+
+
+-- !query
+select '\t1 ' = 1Y
+-- !query schema
+struct<(CAST(	1  AS TINYINT) = 1):boolean>
+-- !query output
+true
+
+
+-- !query
+select '1 ' = 1S
+-- !query schema
+struct<(CAST(1  AS SMALLINT) = 1):boolean>
+-- !query output
+true
+
+
+-- !query
+select '1 ' = 1
+-- !query schema
+struct<(CAST(1  AS INT) = 1):boolean>
+-- !query output
+true
+
+
+-- !query
+select ' 1' = 1L
+-- !query schema
+struct<(CAST( 1 AS BIGINT) = 1):boolean>
+-- !query output
+true
+
+
+-- !query
+select ' 1' = cast(1.0 as float)
+-- !query schema
+struct<(CAST( 1 AS FLOAT) = CAST(1.0 AS FLOAT)):boolean>
+-- !query output
+true
+
+
+-- !query
+select ' 1.0 ' = 1.0D
+-- !query schema
+struct<(CAST( 1.0  AS DOUBLE) = 1.0):boolean>
+-- !query output
+true
+
+
+-- !query
+select ' 1.0 ' = 1.0BD
+-- !query schema
+struct<(CAST( 1.0  AS DOUBLE) = CAST(1.0 AS DOUBLE)):boolean>
+-- !query output
 true
diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/count.sql.out
index b8a86d4c44594..68a5114bb5859 100644
--- a/sql/core/src/test/resources/sql-tests/results/count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/count.sql.out
@@ -2,27 +2,27 @@
 -- Number of queries: 5
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
 AS testData(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT
   count(*), count(1), count(null), count(a), count(b), count(a + b), count((a, b))
 FROM testData
--- !query 1 schema
+-- !query schema
 struct<count(1):bigint,count(1):bigint,count(NULL):bigint,count(a):bigint,count(b):bigint,count((a + b)):bigint,count(named_struct(a, a, b, b)):bigint>
--- !query 1 output
+-- !query output
 7	7	0	5	5	4	7
 
 
--- !query 2
+-- !query
 SELECT
   count(DISTINCT 1),
   count(DISTINCT null),
@@ -31,25 +31,25 @@ SELECT
   count(DISTINCT (a + b)),
   count(DISTINCT (a, b))
 FROM testData
--- !query 2 schema
+-- !query schema
 struct<count(DISTINCT 1):bigint,count(DISTINCT NULL):bigint,count(DISTINCT a):bigint,count(DISTINCT b):bigint,count(DISTINCT (a + b)):bigint,count(DISTINCT named_struct(a, a, b, b)):bigint>
--- !query 2 output
+-- !query output
 1	0	2	2	2	6
 
 
--- !query 3
+-- !query
 SELECT count(a, b), count(b, a), count(testData.*) FROM testData
--- !query 3 schema
+-- !query schema
 struct<count(a, b):bigint,count(b, a):bigint,count(a, b):bigint>
--- !query 3 output
+-- !query output
 4	4	4
 
 
--- !query 4
+-- !query
 SELECT
   count(DISTINCT a, b), count(DISTINCT b, a), count(DISTINCT *), count(DISTINCT testData.*)
 FROM testData
--- !query 4 schema
+-- !query schema
 struct<count(DISTINCT a, b):bigint,count(DISTINCT b, a):bigint,count(DISTINCT a, b):bigint,count(DISTINCT a, b):bigint>
--- !query 4 output
+-- !query output
 3	3	3	3
diff --git a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
index 3833c42bdfecf..ce2305cb7ec90 100644
--- a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
@@ -2,35 +2,35 @@
 -- Number of queries: 13
 
 
--- !query 0
+-- !query
 create temporary view nt1 as select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3)
   as nt1(k, v1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view nt2 as select * from values
   ("one", 1),
   ("two", 22),
   ("one", 5)
   as nt2(k, v2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT * FROM nt1 cross join nt2
--- !query 2 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 2 output
+-- !query output
 one	1	one	1
 one	1	one	5
 one	1	two	22
@@ -42,82 +42,82 @@ two	2	one	5
 two	2	two	22
 
 
--- !query 3
+-- !query
 SELECT * FROM nt1 cross join nt2 where nt1.k = nt2.k
--- !query 3 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 3 output
+-- !query output
 one	1	one	1
 one	1	one	5
 two	2	two	22
 
 
--- !query 4
+-- !query
 SELECT * FROM nt1 cross join nt2 on (nt1.k = nt2.k)
--- !query 4 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 4 output
+-- !query output
 one	1	one	1
 one	1	one	5
 two	2	two	22
 
 
--- !query 5
+-- !query
 SELECT * FROM nt1 cross join nt2 where nt1.v1 = 1 and nt2.v2 = 22
--- !query 5 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 5 output
+-- !query output
 one	1	two	22
 
 
--- !query 6
+-- !query
 SELECT a.key, b.key FROM
 (SELECT k key FROM nt1 WHERE v1 < 2) a
 CROSS JOIN
 (SELECT k key FROM nt2 WHERE v2 = 22) b
--- !query 6 schema
+-- !query schema
 struct<key:string,key:string>
--- !query 6 output
+-- !query output
 one	two
 
 
--- !query 7
+-- !query
 create temporary view A(a, va) as select * from nt1
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 create temporary view B(b, vb) as select * from nt1
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 create temporary view C(c, vc) as select * from nt1
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 create temporary view D(d, vd) as select * from nt1
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 select * from ((A join B on (a = b)) cross join C) join D on (a = d)
--- !query 11 schema
+-- !query schema
 struct<a:string,va:int,b:string,vb:int,c:string,vc:int,d:string,vd:int>
--- !query 11 output
+-- !query output
 one	1	one	1	one	1	one	1
 one	1	one	1	three	3	one	1
 one	1	one	1	two	2	one	1
@@ -129,11 +129,11 @@ two	2	two	2	three	3	two	2
 two	2	two	2	two	2	two	2
 
 
--- !query 12
+-- !query
 SELECT * FROM nt1 CROSS JOIN nt2 ON (nt1.k > nt2.k)
--- !query 12 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 12 output
+-- !query output
 three	3	one	1
 three	3	one	5
 two	2	one	1
diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
index 03d4bfffa8923..8495bef9122ef 100644
--- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
@@ -2,36 +2,36 @@
 -- Number of queries: 17
 
 
--- !query 0
+-- !query
 select from_csv('1, 3.14', 'a INT, f FLOAT')
--- !query 0 schema
+-- !query schema
 struct<from_csv(1, 3.14):struct<a:int,f:float>>
--- !query 0 output
+-- !query output
 {"a":1,"f":3.14}
 
 
--- !query 1
+-- !query
 select from_csv('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'))
--- !query 1 schema
+-- !query schema
 struct<from_csv(26/08/2015):struct<time:timestamp>>
--- !query 1 output
-{"time":2015-08-26 00:00:00.0}
+-- !query output
+{"time":2015-08-26 00:00:00}
 
 
--- !query 2
+-- !query
 select from_csv('1', 1)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Schema should be specified in DDL format as a string literal or output of the schema_of_csv function instead of 1;; line 1 pos 7
 
 
--- !query 3
+-- !query
 select from_csv('1', 'a InvalidType')
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 DataType invalidtype is not supported.(line 1, pos 2)
@@ -42,112 +42,112 @@ a InvalidType
 ; line 1 pos 7
 
 
--- !query 4
+-- !query
 select from_csv('1', 'a INT', named_struct('mode', 'PERMISSIVE'))
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Must use a map() function for options;; line 1 pos 7
 
 
--- !query 5
+-- !query
 select from_csv('1', 'a INT', map('mode', 1))
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 A type of keys and values in map() must be string, but got map<string,int>;; line 1 pos 7
 
 
--- !query 6
+-- !query
 select from_csv()
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid number of arguments for function from_csv. Expected: one of 2 and 3; Found: 0; line 1 pos 7
 
 
--- !query 7
+-- !query
 select from_csv('1,abc', schema_of_csv('1,abc'))
--- !query 7 schema
+-- !query schema
 struct<from_csv(1,abc):struct<_c0:int,_c1:string>>
--- !query 7 output
+-- !query output
 {"_c0":1,"_c1":"abc"}
 
 
--- !query 8
+-- !query
 select schema_of_csv('1|abc', map('delimiter', '|'))
--- !query 8 schema
+-- !query schema
 struct<schema_of_csv(1|abc):string>
--- !query 8 output
+-- !query output
 struct<_c0:int,_c1:string>
 
 
--- !query 9
+-- !query
 select schema_of_csv(null)
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'schema_of_csv(NULL)' due to data type mismatch: The input csv should be a string literal and not null; however, got NULL.; line 1 pos 7
 
 
--- !query 10
+-- !query
 CREATE TEMPORARY VIEW csvTable(csvField, a) AS SELECT * FROM VALUES ('1,abc', 'a')
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 SELECT schema_of_csv(csvField) FROM csvTable
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'schema_of_csv(csvtable.`csvField`)' due to data type mismatch: The input csv should be a string literal and not null; however, got csvtable.`csvField`.; line 1 pos 7
 
 
--- !query 12
+-- !query
 DROP VIEW IF EXISTS csvTable
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 select to_csv(named_struct('a', 1, 'b', 2))
--- !query 13 schema
+-- !query schema
 struct<to_csv(named_struct(a, 1, b, 2)):string>
--- !query 13 output
+-- !query output
 1,2
 
 
--- !query 14
+-- !query
 select to_csv(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'))
--- !query 14 schema
+-- !query schema
 struct<to_csv(named_struct(time, to_timestamp('2015-08-26', 'yyyy-MM-dd'))):string>
--- !query 14 output
+-- !query output
 26/08/2015
 
 
--- !query 15
+-- !query
 select to_csv(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'))
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Must use a map() function for options;; line 1 pos 7
 
 
--- !query 16
+-- !query
 select to_csv(named_struct('a', 1, 'b', 2), map('mode', 1))
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 A type of keys and values in map() must be string, but got map<string,int>;; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
index 5193e2536c0cc..a9709c4a79793 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
@@ -2,65 +2,65 @@
 -- Number of queries: 17
 
 
--- !query 0
+-- !query
 create temporary view t as select * from values 0, 1, 2 as t(id)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values 0, 1 as t(id)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SET spark.sql.legacy.ctePrecedence.enabled=true
--- !query 2 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 2 output
+-- !query output
 spark.sql.legacy.ctePrecedence.enabled	true
 
 
--- !query 3
+-- !query
 WITH t as (
   WITH t2 AS (SELECT 1)
   SELECT * FROM t2
 )
 SELECT * FROM t
--- !query 3 schema
+-- !query schema
 struct<1:int>
--- !query 3 output
+-- !query output
 1
 
 
--- !query 4
+-- !query
 SELECT max(c) FROM (
   WITH t(c) AS (SELECT 1)
   SELECT * FROM t
 )
--- !query 4 schema
+-- !query schema
 struct<max(c):int>
--- !query 4 output
+-- !query output
 1
 
 
--- !query 5
+-- !query
 SELECT (
   WITH t AS (SELECT 1)
   SELECT * FROM t
 )
--- !query 5 schema
+-- !query schema
 struct<scalarsubquery():int>
--- !query 5 output
+-- !query output
 1
 
 
--- !query 6
+-- !query
 WITH
   t AS (SELECT 1),
   t2 AS (
@@ -68,13 +68,13 @@ WITH
     SELECT * FROM t
   )
 SELECT * FROM t2
--- !query 6 schema
+-- !query schema
 struct<1:int>
--- !query 6 output
+-- !query output
 1
 
 
--- !query 7
+-- !query
 WITH
   t(c) AS (SELECT 1),
   t2 AS (
@@ -86,13 +86,13 @@ WITH
     )
   )
 SELECT * FROM t2
--- !query 7 schema
+-- !query schema
 struct<scalarsubquery():int>
--- !query 7 output
+-- !query output
 1
 
 
--- !query 8
+-- !query
 WITH
   t AS (SELECT 1),
   t2 AS (
@@ -104,25 +104,25 @@ WITH
     SELECT * FROM t2
   )
 SELECT * FROM t2
--- !query 8 schema
+-- !query schema
 struct<2:int>
--- !query 8 output
+-- !query output
 2
 
 
--- !query 9
+-- !query
 WITH t(c) AS (SELECT 1)
 SELECT max(c) FROM (
   WITH t(c) AS (SELECT 2)
   SELECT * FROM t
 )
--- !query 9 schema
+-- !query schema
 struct<max(c):int>
--- !query 9 output
+-- !query output
 2
 
 
--- !query 10
+-- !query
 WITH t(c) AS (SELECT 1)
 SELECT sum(c) FROM (
   SELECT max(c) AS c FROM (
@@ -130,13 +130,13 @@ SELECT sum(c) FROM (
     SELECT * FROM t
   )
 )
--- !query 10 schema
+-- !query schema
 struct<sum(c):bigint>
--- !query 10 output
+-- !query output
 2
 
 
--- !query 11
+-- !query
 WITH t(c) AS (SELECT 1)
 SELECT sum(c) FROM (
   WITH t(c) AS (SELECT 2)
@@ -145,25 +145,25 @@ SELECT sum(c) FROM (
     SELECT * FROM t
   )
 )
--- !query 11 schema
+-- !query schema
 struct<sum(c):bigint>
--- !query 11 output
+-- !query output
 3
 
 
--- !query 12
+-- !query
 WITH t AS (SELECT 1)
 SELECT (
   WITH t AS (SELECT 2)
   SELECT * FROM t
 )
--- !query 12 schema
+-- !query schema
 struct<scalarsubquery():int>
--- !query 12 output
+-- !query output
 1
 
 
--- !query 13
+-- !query
 WITH t AS (SELECT 1)
 SELECT (
   SELECT (
@@ -171,13 +171,13 @@ SELECT (
     SELECT * FROM t
   )
 )
--- !query 13 schema
+-- !query schema
 struct<scalarsubquery():int>
--- !query 13 output
+-- !query output
 1
 
 
--- !query 14
+-- !query
 WITH t AS (SELECT 1)
 SELECT (
   WITH t AS (SELECT 2)
@@ -186,23 +186,23 @@ SELECT (
     SELECT * FROM t
   )
 )
--- !query 14 schema
+-- !query schema
 struct<scalarsubquery():int>
--- !query 14 output
+-- !query output
 1
 
 
--- !query 15
+-- !query
 DROP VIEW IF EXISTS t
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 
 
 
--- !query 16
+-- !query
 DROP VIEW IF EXISTS t2
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out
new file mode 100644
index 0000000000000..2d87781193c25
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out
@@ -0,0 +1,343 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 27
+
+
+-- !query
+create temporary view t as select * from values 0, 1, 2 as t(id)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view t2 as select * from values 0, 1 as t(id)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+WITH s AS (SELECT 1 FROM s) SELECT * FROM s
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Table or view not found: s; line 1 pos 25
+
+
+-- !query
+WITH r AS (SELECT (SELECT * FROM r))
+SELECT * FROM r
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Table or view not found: r; line 1 pos 33
+
+
+-- !query
+WITH t AS (SELECT 1 FROM t) SELECT * FROM t
+-- !query schema
+struct<1:int>
+-- !query output
+1
+1
+1
+
+
+-- !query
+WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Table or view not found: s2; line 1 pos 26
+
+
+-- !query
+WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1 cross join t2
+-- !query schema
+struct<id:int,2:int>
+-- !query output
+0	2
+0	2
+1	2
+1	2
+
+
+-- !query
+WITH CTE1 AS (
+  SELECT b.id AS id
+  FROM   T2 a
+         CROSS JOIN (SELECT id AS id FROM T2) b
+)
+SELECT t1.id AS c1,
+       t2.id AS c2
+FROM   CTE1 t1
+       CROSS JOIN CTE1 t2
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+0	0
+0	0
+0	0
+0	0
+0	1
+0	1
+0	1
+0	1
+1	0
+1	0
+1	0
+1	0
+1	1
+1	1
+1	1
+1	1
+
+
+-- !query
+WITH t(x) AS (SELECT 1)
+SELECT * FROM t WHERE x = 1
+-- !query schema
+struct<x:int>
+-- !query output
+1
+
+
+-- !query
+WITH t(x, y) AS (SELECT 1, 2)
+SELECT * FROM t WHERE x = 1 AND y = 2
+-- !query schema
+struct<x:int,y:int>
+-- !query output
+1	2
+
+
+-- !query
+WITH t(x, x) AS (SELECT 1, 2)
+SELECT * FROM t
+-- !query schema
+struct<x:int,x:int>
+-- !query output
+1	2
+
+
+-- !query
+WITH t() AS (SELECT 1)
+SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'WITH t()'(line 1, pos 7)
+
+== SQL ==
+WITH t() AS (SELECT 1)
+-------^^^
+SELECT * FROM t
+
+
+-- !query
+WITH
+  t(x) AS (SELECT 1),
+  t(x) AS (SELECT 2)
+SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+CTE definition can't have duplicate names: 't'.(line 1, pos 0)
+
+== SQL ==
+WITH
+^^^
+  t(x) AS (SELECT 1),
+  t(x) AS (SELECT 2)
+SELECT * FROM t
+
+
+-- !query
+WITH t as (
+  WITH t2 AS (SELECT 1)
+  SELECT * FROM t2
+)
+SELECT * FROM t
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT max(c) FROM (
+  WITH t(c) AS (SELECT 1)
+  SELECT * FROM t
+)
+-- !query schema
+struct<max(c):int>
+-- !query output
+1
+
+
+-- !query
+SELECT (
+  WITH t AS (SELECT 1)
+  SELECT * FROM t
+)
+-- !query schema
+struct<scalarsubquery():int>
+-- !query output
+1
+
+
+-- !query
+WITH
+  t AS (SELECT 1),
+  t2 AS (
+    WITH t AS (SELECT 2)
+    SELECT * FROM t
+  )
+SELECT * FROM t2
+-- !query schema
+struct<2:int>
+-- !query output
+2
+
+
+-- !query
+WITH
+  t(c) AS (SELECT 1),
+  t2 AS (
+    SELECT (
+      SELECT max(c) FROM (
+        WITH t(c) AS (SELECT 2)
+        SELECT * FROM t
+      )
+    )
+  )
+SELECT * FROM t2
+-- !query schema
+struct<scalarsubquery():int>
+-- !query output
+2
+
+
+-- !query
+WITH
+  t AS (SELECT 1),
+  t2 AS (
+    WITH t AS (SELECT 2),
+    t2 AS (
+      WITH t AS (SELECT 3)
+      SELECT * FROM t
+    )
+    SELECT * FROM t2
+  )
+SELECT * FROM t2
+-- !query schema
+struct<3:int>
+-- !query output
+3
+
+
+-- !query
+WITH t(c) AS (SELECT 1)
+SELECT max(c) FROM (
+  WITH t(c) AS (SELECT 2)
+  SELECT * FROM t
+)
+-- !query schema
+struct<max(c):int>
+-- !query output
+2
+
+
+-- !query
+WITH t(c) AS (SELECT 1)
+SELECT sum(c) FROM (
+  SELECT max(c) AS c FROM (
+    WITH t(c) AS (SELECT 2)
+    SELECT * FROM t
+  )
+)
+-- !query schema
+struct<sum(c):bigint>
+-- !query output
+2
+
+
+-- !query
+WITH t(c) AS (SELECT 1)
+SELECT sum(c) FROM (
+  WITH t(c) AS (SELECT 2)
+  SELECT max(c) AS c FROM (
+    WITH t(c) AS (SELECT 3)
+    SELECT * FROM t
+  )
+)
+-- !query schema
+struct<sum(c):bigint>
+-- !query output
+3
+
+
+-- !query
+WITH t AS (SELECT 1)
+SELECT (
+  WITH t AS (SELECT 2)
+  SELECT * FROM t
+)
+-- !query schema
+struct<scalarsubquery():int>
+-- !query output
+2
+
+
+-- !query
+WITH t AS (SELECT 1)
+SELECT (
+  SELECT (
+    WITH t AS (SELECT 2)
+    SELECT * FROM t
+  )
+)
+-- !query schema
+struct<scalarsubquery():int>
+-- !query output
+2
+
+
+-- !query
+WITH t AS (SELECT 1)
+SELECT (
+  WITH t AS (SELECT 2)
+  SELECT (
+    WITH t AS (SELECT 3)
+    SELECT * FROM t
+  )
+)
+-- !query schema
+struct<scalarsubquery():int>
+-- !query output
+3
+
+
+-- !query
+DROP VIEW IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW IF EXISTS t2
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
index b7dd76c725209..1d50aa8f57505 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
@@ -2,72 +2,72 @@
 -- Number of queries: 27
 
 
--- !query 0
+-- !query
 create temporary view t as select * from values 0, 1, 2 as t(id)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values 0, 1 as t(id)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 WITH s AS (SELECT 1 FROM s) SELECT * FROM s
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Table or view not found: s; line 1 pos 25
 
 
--- !query 3
+-- !query
 WITH r AS (SELECT (SELECT * FROM r))
 SELECT * FROM r
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Table or view not found: r; line 1 pos 33
 
 
--- !query 4
+-- !query
 WITH t AS (SELECT 1 FROM t) SELECT * FROM t
--- !query 4 schema
+-- !query schema
 struct<1:int>
--- !query 4 output
+-- !query output
 1
 1
 1
 
 
--- !query 5
+-- !query
 WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Table or view not found: s2; line 1 pos 26
 
 
--- !query 6
+-- !query
 WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1 cross join t2
--- !query 6 schema
+-- !query schema
 struct<id:int,2:int>
--- !query 6 output
+-- !query output
 0	2
 0	2
 1	2
 1	2
 
 
--- !query 7
+-- !query
 WITH CTE1 AS (
   SELECT b.id AS id
   FROM   T2 a
@@ -77,9 +77,9 @@ SELECT t1.id AS c1,
        t2.id AS c2
 FROM   CTE1 t1
        CROSS JOIN CTE1 t2
--- !query 7 schema
+-- !query schema
 struct<c1:int,c2:int>
--- !query 7 output
+-- !query output
 0	0
 0	0
 0	0
@@ -98,39 +98,39 @@ struct<c1:int,c2:int>
 1	1
 
 
--- !query 8
+-- !query
 WITH t(x) AS (SELECT 1)
 SELECT * FROM t WHERE x = 1
--- !query 8 schema
+-- !query schema
 struct<x:int>
--- !query 8 output
+-- !query output
 1
 
 
--- !query 9
+-- !query
 WITH t(x, y) AS (SELECT 1, 2)
 SELECT * FROM t WHERE x = 1 AND y = 2
--- !query 9 schema
+-- !query schema
 struct<x:int,y:int>
--- !query 9 output
+-- !query output
 1	2
 
 
--- !query 10
+-- !query
 WITH t(x, x) AS (SELECT 1, 2)
 SELECT * FROM t
--- !query 10 schema
+-- !query schema
 struct<x:int,x:int>
--- !query 10 output
+-- !query output
 1	2
 
 
--- !query 11
+-- !query
 WITH t() AS (SELECT 1)
 SELECT * FROM t
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 no viable alternative at input 'WITH t()'(line 1, pos 7)
@@ -141,14 +141,14 @@ WITH t() AS (SELECT 1)
 SELECT * FROM t
 
 
--- !query 12
+-- !query
 WITH
   t(x) AS (SELECT 1),
   t(x) AS (SELECT 2)
 SELECT * FROM t
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 CTE definition can't have duplicate names: 't'.(line 1, pos 0)
@@ -161,41 +161,41 @@ WITH
 SELECT * FROM t
 
 
--- !query 13
+-- !query
 WITH t as (
   WITH t2 AS (SELECT 1)
   SELECT * FROM t2
 )
 SELECT * FROM t
--- !query 13 schema
+-- !query schema
 struct<1:int>
--- !query 13 output
+-- !query output
 1
 
 
--- !query 14
+-- !query
 SELECT max(c) FROM (
   WITH t(c) AS (SELECT 1)
   SELECT * FROM t
 )
--- !query 14 schema
+-- !query schema
 struct<max(c):int>
--- !query 14 output
+-- !query output
 1
 
 
--- !query 15
+-- !query
 SELECT (
   WITH t AS (SELECT 1)
   SELECT * FROM t
 )
--- !query 15 schema
+-- !query schema
 struct<scalarsubquery():int>
--- !query 15 output
+-- !query output
 1
 
 
--- !query 16
+-- !query
 WITH
   t AS (SELECT 1),
   t2 AS (
@@ -203,13 +203,14 @@ WITH
     SELECT * FROM t
   )
 SELECT * FROM t2
--- !query 16 schema
-struct<2:int>
--- !query 16 output
-2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
 
 
--- !query 17
+-- !query
 WITH
   t(c) AS (SELECT 1),
   t2 AS (
@@ -221,13 +222,14 @@ WITH
     )
   )
 SELECT * FROM t2
--- !query 17 schema
-struct<scalarsubquery():int>
--- !query 17 output
-2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
 
 
--- !query 18
+-- !query
 WITH
   t AS (SELECT 1),
   t2 AS (
@@ -239,25 +241,26 @@ WITH
     SELECT * FROM t2
   )
 SELECT * FROM t2
--- !query 18 schema
-struct<3:int>
--- !query 18 output
-3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
 
 
--- !query 19
+-- !query
 WITH t(c) AS (SELECT 1)
 SELECT max(c) FROM (
   WITH t(c) AS (SELECT 2)
   SELECT * FROM t
 )
--- !query 19 schema
+-- !query schema
 struct<max(c):int>
--- !query 19 output
+-- !query output
 2
 
 
--- !query 20
+-- !query
 WITH t(c) AS (SELECT 1)
 SELECT sum(c) FROM (
   SELECT max(c) AS c FROM (
@@ -265,13 +268,13 @@ SELECT sum(c) FROM (
     SELECT * FROM t
   )
 )
--- !query 20 schema
+-- !query schema
 struct<sum(c):bigint>
--- !query 20 output
+-- !query output
 2
 
 
--- !query 21
+-- !query
 WITH t(c) AS (SELECT 1)
 SELECT sum(c) FROM (
   WITH t(c) AS (SELECT 2)
@@ -280,25 +283,26 @@ SELECT sum(c) FROM (
     SELECT * FROM t
   )
 )
--- !query 21 schema
+-- !query schema
 struct<sum(c):bigint>
--- !query 21 output
+-- !query output
 3
 
 
--- !query 22
+-- !query
 WITH t AS (SELECT 1)
 SELECT (
   WITH t AS (SELECT 2)
   SELECT * FROM t
 )
--- !query 22 schema
-struct<scalarsubquery():int>
--- !query 22 output
-2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
 
 
--- !query 23
+-- !query
 WITH t AS (SELECT 1)
 SELECT (
   SELECT (
@@ -306,13 +310,14 @@ SELECT (
     SELECT * FROM t
   )
 )
--- !query 23 schema
-struct<scalarsubquery():int>
--- !query 23 output
-2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
 
 
--- !query 24
+-- !query
 WITH t AS (SELECT 1)
 SELECT (
   WITH t AS (SELECT 2)
@@ -321,23 +326,24 @@ SELECT (
     SELECT * FROM t
   )
 )
--- !query 24 schema
-struct<scalarsubquery():int>
--- !query 24 output
-3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedence.enabled to false so that name defined in inner CTE takes precedence. See more details in SPARK-28228.;
 
 
--- !query 25
+-- !query
 DROP VIEW IF EXISTS t
--- !query 25 schema
+-- !query schema
 struct<>
--- !query 25 output
+-- !query output
 
 
 
--- !query 26
+-- !query
 DROP VIEW IF EXISTS t2
--- !query 26 schema
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/date_part.sql.out b/sql/core/src/test/resources/sql-tests/results/date_part.sql.out
new file mode 100644
index 0000000000000..b4cceedffd98b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/date_part.sql.out
@@ -0,0 +1,886 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 110
+
+
+-- !query
+CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select date_part('millennium', c) from t
+-- !query schema
+struct<date_part('millennium', t.`c`):int>
+-- !query output
+3
+
+
+-- !query
+select date_part('millennia', c) from t
+-- !query schema
+struct<date_part('millennia', t.`c`):int>
+-- !query output
+3
+
+
+-- !query
+select date_part('mil', c) from t
+-- !query schema
+struct<date_part('mil', t.`c`):int>
+-- !query output
+3
+
+
+-- !query
+select date_part('mils', c) from t
+-- !query schema
+struct<date_part('mils', t.`c`):int>
+-- !query output
+3
+
+
+-- !query
+select date_part('century', c) from t
+-- !query schema
+struct<date_part('century', t.`c`):int>
+-- !query output
+21
+
+
+-- !query
+select date_part('centuries', c) from t
+-- !query schema
+struct<date_part('centuries', t.`c`):int>
+-- !query output
+21
+
+
+-- !query
+select date_part('c', c) from t
+-- !query schema
+struct<date_part('c', t.`c`):int>
+-- !query output
+21
+
+
+-- !query
+select date_part('cent', c) from t
+-- !query schema
+struct<date_part('cent', t.`c`):int>
+-- !query output
+21
+
+
+-- !query
+select date_part('decade', c) from t
+-- !query schema
+struct<date_part('decade', t.`c`):int>
+-- !query output
+201
+
+
+-- !query
+select date_part('decades', c) from t
+-- !query schema
+struct<date_part('decades', t.`c`):int>
+-- !query output
+201
+
+
+-- !query
+select date_part('dec', c) from t
+-- !query schema
+struct<date_part('dec', t.`c`):int>
+-- !query output
+201
+
+
+-- !query
+select date_part('decs', c) from t
+-- !query schema
+struct<date_part('decs', t.`c`):int>
+-- !query output
+201
+
+
+-- !query
+select date_part('year', c) from t
+-- !query schema
+struct<date_part('year', t.`c`):int>
+-- !query output
+2011
+
+
+-- !query
+select date_part('y', c) from t
+-- !query schema
+struct<date_part('y', t.`c`):int>
+-- !query output
+2011
+
+
+-- !query
+select date_part('years', c) from t
+-- !query schema
+struct<date_part('years', t.`c`):int>
+-- !query output
+2011
+
+
+-- !query
+select date_part('yr', c) from t
+-- !query schema
+struct<date_part('yr', t.`c`):int>
+-- !query output
+2011
+
+
+-- !query
+select date_part('yrs', c) from t
+-- !query schema
+struct<date_part('yrs', t.`c`):int>
+-- !query output
+2011
+
+
+-- !query
+select date_part('quarter', c) from t
+-- !query schema
+struct<date_part('quarter', t.`c`):int>
+-- !query output
+2
+
+
+-- !query
+select date_part('qtr', c) from t
+-- !query schema
+struct<date_part('qtr', t.`c`):int>
+-- !query output
+2
+
+
+-- !query
+select date_part('month', c) from t
+-- !query schema
+struct<date_part('month', t.`c`):int>
+-- !query output
+5
+
+
+-- !query
+select date_part('mon', c) from t
+-- !query schema
+struct<date_part('mon', t.`c`):int>
+-- !query output
+5
+
+
+-- !query
+select date_part('mons', c) from t
+-- !query schema
+struct<date_part('mons', t.`c`):int>
+-- !query output
+5
+
+
+-- !query
+select date_part('months', c) from t
+-- !query schema
+struct<date_part('months', t.`c`):int>
+-- !query output
+5
+
+
+-- !query
+select date_part('week', c) from t
+-- !query schema
+struct<date_part('week', t.`c`):int>
+-- !query output
+18
+
+
+-- !query
+select date_part('w', c) from t
+-- !query schema
+struct<date_part('w', t.`c`):int>
+-- !query output
+18
+
+
+-- !query
+select date_part('weeks', c) from t
+-- !query schema
+struct<date_part('weeks', t.`c`):int>
+-- !query output
+18
+
+
+-- !query
+select date_part('day', c) from t
+-- !query schema
+struct<date_part('day', t.`c`):int>
+-- !query output
+6
+
+
+-- !query
+select date_part('d', c) from t
+-- !query schema
+struct<date_part('d', t.`c`):int>
+-- !query output
+6
+
+
+-- !query
+select date_part('days', c) from t
+-- !query schema
+struct<date_part('days', t.`c`):int>
+-- !query output
+6
+
+
+-- !query
+select date_part('dayofweek', c) from t
+-- !query schema
+struct<date_part('dayofweek', t.`c`):int>
+-- !query output
+6
+
+
+-- !query
+select date_part('dow', c) from t
+-- !query schema
+struct<date_part('dow', t.`c`):int>
+-- !query output
+5
+
+
+-- !query
+select date_part('isodow', c) from t
+-- !query schema
+struct<date_part('isodow', t.`c`):int>
+-- !query output
+5
+
+
+-- !query
+select date_part('doy', c) from t
+-- !query schema
+struct<date_part('doy', t.`c`):int>
+-- !query output
+126
+
+
+-- !query
+select date_part('hour', c) from t
+-- !query schema
+struct<date_part('hour', t.`c`):int>
+-- !query output
+7
+
+
+-- !query
+select date_part('h', c) from t
+-- !query schema
+struct<date_part('h', t.`c`):int>
+-- !query output
+7
+
+
+-- !query
+select date_part('hours', c) from t
+-- !query schema
+struct<date_part('hours', t.`c`):int>
+-- !query output
+7
+
+
+-- !query
+select date_part('hr', c) from t
+-- !query schema
+struct<date_part('hr', t.`c`):int>
+-- !query output
+7
+
+
+-- !query
+select date_part('hrs', c) from t
+-- !query schema
+struct<date_part('hrs', t.`c`):int>
+-- !query output
+7
+
+
+-- !query
+select date_part('minute', c) from t
+-- !query schema
+struct<date_part('minute', t.`c`):int>
+-- !query output
+8
+
+
+-- !query
+select date_part('m', c) from t
+-- !query schema
+struct<date_part('m', t.`c`):int>
+-- !query output
+8
+
+
+-- !query
+select date_part('min', c) from t
+-- !query schema
+struct<date_part('min', t.`c`):int>
+-- !query output
+8
+
+
+-- !query
+select date_part('mins', c) from t
+-- !query schema
+struct<date_part('mins', t.`c`):int>
+-- !query output
+8
+
+
+-- !query
+select date_part('minutes', c) from t
+-- !query schema
+struct<date_part('minutes', t.`c`):int>
+-- !query output
+8
+
+
+-- !query
+select date_part('second', c) from t
+-- !query schema
+struct<date_part('second', t.`c`):decimal(8,6)>
+-- !query output
+9.123456
+
+
+-- !query
+select date_part('s', c) from t
+-- !query schema
+struct<date_part('s', t.`c`):decimal(8,6)>
+-- !query output
+9.123456
+
+
+-- !query
+select date_part('sec', c) from t
+-- !query schema
+struct<date_part('sec', t.`c`):decimal(8,6)>
+-- !query output
+9.123456
+
+
+-- !query
+select date_part('seconds', c) from t
+-- !query schema
+struct<date_part('seconds', t.`c`):decimal(8,6)>
+-- !query output
+9.123456
+
+
+-- !query
+select date_part('secs', c) from t
+-- !query schema
+struct<date_part('secs', t.`c`):decimal(8,6)>
+-- !query output
+9.123456
+
+
+-- !query
+select date_part('not_supported', c) from t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Literals of type 'not_supported' are currently not supported for the string type.;; line 1 pos 7
+
+
+-- !query
+select date_part(c, c) from t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+The field parameter needs to be a foldable string value.;; line 1 pos 7
+
+
+-- !query
+select date_part(null, c) from t
+-- !query schema
+struct<date_part(NULL, t.`c`):double>
+-- !query output
+NULL
+
+
+-- !query
+CREATE TEMPORARY VIEW t2 AS select interval 1010 year 9 month 8 day 7 hour 6 minute 5 second 4 millisecond 3 microsecond as c
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select date_part('millennium', c) from t2
+-- !query schema
+struct<date_part('millennium', t2.`c`):int>
+-- !query output
+1
+
+
+-- !query
+select date_part('millennia', c) from t2
+-- !query schema
+struct<date_part('millennia', t2.`c`):int>
+-- !query output
+1
+
+
+-- !query
+select date_part('mil', c) from t2
+-- !query schema
+struct<date_part('mil', t2.`c`):int>
+-- !query output
+1
+
+
+-- !query
+select date_part('mils', c) from t2
+-- !query schema
+struct<date_part('mils', t2.`c`):int>
+-- !query output
+1
+
+
+-- !query
+select date_part('century', c) from t2
+-- !query schema
+struct<date_part('century', t2.`c`):int>
+-- !query output
+10
+
+
+-- !query
+select date_part('centuries', c) from t2
+-- !query schema
+struct<date_part('centuries', t2.`c`):int>
+-- !query output
+10
+
+
+-- !query
+select date_part('c', c) from t2
+-- !query schema
+struct<date_part('c', t2.`c`):int>
+-- !query output
+10
+
+
+-- !query
+select date_part('cent', c) from t2
+-- !query schema
+struct<date_part('cent', t2.`c`):int>
+-- !query output
+10
+
+
+-- !query
+select date_part('decade', c) from t2
+-- !query schema
+struct<date_part('decade', t2.`c`):int>
+-- !query output
+101
+
+
+-- !query
+select date_part('decades', c) from t2
+-- !query schema
+struct<date_part('decades', t2.`c`):int>
+-- !query output
+101
+
+
+-- !query
+select date_part('dec', c) from t2
+-- !query schema
+struct<date_part('dec', t2.`c`):int>
+-- !query output
+101
+
+
+-- !query
+select date_part('decs', c) from t2
+-- !query schema
+struct<date_part('decs', t2.`c`):int>
+-- !query output
+101
+
+
+-- !query
+select date_part('year', c) from t2
+-- !query schema
+struct<date_part('year', t2.`c`):int>
+-- !query output
+1010
+
+
+-- !query
+select date_part('y', c) from t2
+-- !query schema
+struct<date_part('y', t2.`c`):int>
+-- !query output
+1010
+
+
+-- !query
+select date_part('years', c) from t2
+-- !query schema
+struct<date_part('years', t2.`c`):int>
+-- !query output
+1010
+
+
+-- !query
+select date_part('yr', c) from t2
+-- !query schema
+struct<date_part('yr', t2.`c`):int>
+-- !query output
+1010
+
+
+-- !query
+select date_part('yrs', c) from t2
+-- !query schema
+struct<date_part('yrs', t2.`c`):int>
+-- !query output
+1010
+
+
+-- !query
+select date_part('quarter', c) from t2
+-- !query schema
+struct<date_part('quarter', t2.`c`):tinyint>
+-- !query output
+4
+
+
+-- !query
+select date_part('qtr', c) from t2
+-- !query schema
+struct<date_part('qtr', t2.`c`):tinyint>
+-- !query output
+4
+
+
+-- !query
+select date_part('month', c) from t2
+-- !query schema
+struct<date_part('month', t2.`c`):tinyint>
+-- !query output
+9
+
+
+-- !query
+select date_part('mon', c) from t2
+-- !query schema
+struct<date_part('mon', t2.`c`):tinyint>
+-- !query output
+9
+
+
+-- !query
+select date_part('mons', c) from t2
+-- !query schema
+struct<date_part('mons', t2.`c`):tinyint>
+-- !query output
+9
+
+
+-- !query
+select date_part('months', c) from t2
+-- !query schema
+struct<date_part('months', t2.`c`):tinyint>
+-- !query output
+9
+
+
+-- !query
+select date_part('day', c) from t2
+-- !query schema
+struct<date_part('day', t2.`c`):int>
+-- !query output
+8
+
+
+-- !query
+select date_part('d', c) from t2
+-- !query schema
+struct<date_part('d', t2.`c`):int>
+-- !query output
+8
+
+
+-- !query
+select date_part('days', c) from t2
+-- !query schema
+struct<date_part('days', t2.`c`):int>
+-- !query output
+8
+
+
+-- !query
+select date_part('hour', c) from t2
+-- !query schema
+struct<date_part('hour', t2.`c`):bigint>
+-- !query output
+7
+
+
+-- !query
+select date_part('h', c) from t2
+-- !query schema
+struct<date_part('h', t2.`c`):bigint>
+-- !query output
+7
+
+
+-- !query
+select date_part('hours', c) from t2
+-- !query schema
+struct<date_part('hours', t2.`c`):bigint>
+-- !query output
+7
+
+
+-- !query
+select date_part('hr', c) from t2
+-- !query schema
+struct<date_part('hr', t2.`c`):bigint>
+-- !query output
+7
+
+
+-- !query
+select date_part('hrs', c) from t2
+-- !query schema
+struct<date_part('hrs', t2.`c`):bigint>
+-- !query output
+7
+
+
+-- !query
+select date_part('minute', c) from t2
+-- !query schema
+struct<date_part('minute', t2.`c`):tinyint>
+-- !query output
+6
+
+
+-- !query
+select date_part('m', c) from t2
+-- !query schema
+struct<date_part('m', t2.`c`):tinyint>
+-- !query output
+6
+
+
+-- !query
+select date_part('min', c) from t2
+-- !query schema
+struct<date_part('min', t2.`c`):tinyint>
+-- !query output
+6
+
+
+-- !query
+select date_part('mins', c) from t2
+-- !query schema
+struct<date_part('mins', t2.`c`):tinyint>
+-- !query output
+6
+
+
+-- !query
+select date_part('minutes', c) from t2
+-- !query schema
+struct<date_part('minutes', t2.`c`):tinyint>
+-- !query output
+6
+
+
+-- !query
+select date_part('second', c) from t2
+-- !query schema
+struct<date_part('second', t2.`c`):decimal(8,6)>
+-- !query output
+5.004003
+
+
+-- !query
+select date_part('s', c) from t2
+-- !query schema
+struct<date_part('s', t2.`c`):decimal(8,6)>
+-- !query output
+5.004003
+
+
+-- !query
+select date_part('sec', c) from t2
+-- !query schema
+struct<date_part('sec', t2.`c`):decimal(8,6)>
+-- !query output
+5.004003
+
+
+-- !query
+select date_part('seconds', c) from t2
+-- !query schema
+struct<date_part('seconds', t2.`c`):decimal(8,6)>
+-- !query output
+5.004003
+
+
+-- !query
+select date_part('secs', c) from t2
+-- !query schema
+struct<date_part('secs', t2.`c`):decimal(8,6)>
+-- !query output
+5.004003
+
+
+-- !query
+select date_part('milliseconds', c) from t2
+-- !query schema
+struct<date_part('milliseconds', t2.`c`):decimal(8,3)>
+-- !query output
+5004.003
+
+
+-- !query
+select date_part('msec', c) from t2
+-- !query schema
+struct<date_part('msec', t2.`c`):decimal(8,3)>
+-- !query output
+5004.003
+
+
+-- !query
+select date_part('msecs', c) from t2
+-- !query schema
+struct<date_part('msecs', t2.`c`):decimal(8,3)>
+-- !query output
+5004.003
+
+
+-- !query
+select date_part('millisecon', c) from t2
+-- !query schema
+struct<date_part('millisecon', t2.`c`):decimal(8,3)>
+-- !query output
+5004.003
+
+
+-- !query
+select date_part('mseconds', c) from t2
+-- !query schema
+struct<date_part('mseconds', t2.`c`):decimal(8,3)>
+-- !query output
+5004.003
+
+
+-- !query
+select date_part('ms', c) from t2
+-- !query schema
+struct<date_part('ms', t2.`c`):decimal(8,3)>
+-- !query output
+5004.003
+
+
+-- !query
+select date_part('microseconds', c) from t2
+-- !query schema
+struct<date_part('microseconds', t2.`c`):bigint>
+-- !query output
+5004003
+
+
+-- !query
+select date_part('usec', c) from t2
+-- !query schema
+struct<date_part('usec', t2.`c`):bigint>
+-- !query output
+5004003
+
+
+-- !query
+select date_part('usecs', c) from t2
+-- !query schema
+struct<date_part('usecs', t2.`c`):bigint>
+-- !query output
+5004003
+
+
+-- !query
+select date_part('useconds', c) from t2
+-- !query schema
+struct<date_part('useconds', t2.`c`):bigint>
+-- !query output
+5004003
+
+
+-- !query
+select date_part('microsecon', c) from t2
+-- !query schema
+struct<date_part('microsecon', t2.`c`):bigint>
+-- !query output
+5004003
+
+
+-- !query
+select date_part('us', c) from t2
+-- !query schema
+struct<date_part('us', t2.`c`):bigint>
+-- !query output
+5004003
+
+
+-- !query
+select date_part('epoch', c) from t2
+-- !query schema
+struct<date_part('epoch', t2.`c`):decimal(18,6)>
+-- !query output
+31897220765.004003
+
+
+-- !query
+select date_part('not_supported', c) from t2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Literals of type 'not_supported' are currently not supported for the interval type.;; line 1 pos 7
+
+
+-- !query
+select date_part(c, c) from t2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+The field parameter needs to be a foldable string value.;; line 1 pos 7
+
+
+-- !query
+select date_part(null, c) from t2
+-- !query schema
+struct<date_part(NULL, t2.`c`):double>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
old mode 100644
new mode 100755
index 178400e5706b8..a7b098d79a706
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,131 +1,393 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
+-- Number of queries: 47
 
 
--- !query 0
+-- !query
 select current_date = current_date(), current_timestamp = current_timestamp()
--- !query 0 schema
+-- !query schema
 struct<(current_date() = current_date()):boolean,(current_timestamp() = current_timestamp()):boolean>
--- !query 0 output
+-- !query output
 true	true
 
 
--- !query 1
+-- !query
 select to_date(null), to_date('2016-12-31'), to_date('2016-12-31', 'yyyy-MM-dd')
--- !query 1 schema
+-- !query schema
 struct<to_date(NULL):date,to_date('2016-12-31'):date,to_date('2016-12-31', 'yyyy-MM-dd'):date>
--- !query 1 output
+-- !query output
 NULL	2016-12-31	2016-12-31
 
 
--- !query 2
+-- !query
 select to_timestamp(null), to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd')
--- !query 2 schema
+-- !query schema
 struct<to_timestamp(NULL):timestamp,to_timestamp('2016-12-31 00:12:00'):timestamp,to_timestamp('2016-12-31', 'yyyy-MM-dd'):timestamp>
--- !query 2 output
+-- !query output
 NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
 
 
--- !query 3
+-- !query
 select dayofweek('2007-02-03'), dayofweek('2009-07-30'), dayofweek('2017-05-27'), dayofweek(null), dayofweek('1582-10-15 13:10:15')
--- !query 3 schema
+-- !query schema
 struct<dayofweek(CAST(2007-02-03 AS DATE)):int,dayofweek(CAST(2009-07-30 AS DATE)):int,dayofweek(CAST(2017-05-27 AS DATE)):int,dayofweek(CAST(NULL AS DATE)):int,dayofweek(CAST(1582-10-15 13:10:15 AS DATE)):int>
--- !query 3 output
+-- !query output
 7	5	7	NULL	6
 
 
--- !query 4
+-- !query
 create temporary view ttf1 as select * from values
   (1, 2),
   (2, 3)
   as ttf1(current_date, current_timestamp)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 select current_date, current_timestamp from ttf1
--- !query 5 schema
+-- !query schema
 struct<current_date:int,current_timestamp:int>
--- !query 5 output
+-- !query output
 1	2
 2	3
 
 
--- !query 6
+-- !query
 create temporary view ttf2 as select * from values
   (1, 2),
   (2, 3)
   as ttf2(a, b)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 select current_date = current_date(), current_timestamp = current_timestamp(), a, b from ttf2
--- !query 7 schema
+-- !query schema
 struct<(current_date() = current_date()):boolean,(current_timestamp() = current_timestamp()):boolean,a:int,b:int>
--- !query 7 output
+-- !query output
 true	true	1	2
 true	true	2	3
 
 
--- !query 8
+-- !query
 select a, b from ttf2 order by a, current_date
--- !query 8 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 8 output
+-- !query output
 1	2
 2	3
 
 
--- !query 9
+-- !query
 select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null), weekday('1582-10-15 13:10:15')
--- !query 9 schema
+-- !query schema
 struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):int,weekday(CAST(2017-05-27 AS DATE)):int,weekday(CAST(NULL AS DATE)):int,weekday(CAST(1582-10-15 13:10:15 AS DATE)):int>
--- !query 9 output
+-- !query output
 5	3	5	NULL	4
 
 
--- !query 10
+-- !query
 select year('1500-01-01'), month('1500-01-01'), dayOfYear('1500-01-01')
--- !query 10 schema
+-- !query schema
 struct<year(CAST(1500-01-01 AS DATE)):int,month(CAST(1500-01-01 AS DATE)):int,dayofyear(CAST(1500-01-01 AS DATE)):int>
--- !query 10 output
+-- !query output
 1500	1	1
 
 
--- !query 11
-select date '2001-09-28' + 7
--- !query 11 schema
+-- !query
+select date '2019-01-01\t'
+-- !query schema
+struct<DATE '2019-01-01':date>
+-- !query output
+2019-01-01
+
+
+-- !query
+select timestamp '2019-01-01\t'
+-- !query schema
+struct<TIMESTAMP '2019-01-01 00:00:00':timestamp>
+-- !query output
+2019-01-01 00:00:00
+
+
+-- !query
+select timestamp'2011-11-11 11:11:11' + interval '2' day
+-- !query schema
+struct<CAST(TIMESTAMP '2011-11-11 11:11:11' + INTERVAL '2 days' AS TIMESTAMP):timestamp>
+-- !query output
+2011-11-13 11:11:11
+
+
+-- !query
+select timestamp'2011-11-11 11:11:11' - interval '2' day
+-- !query schema
+struct<CAST(TIMESTAMP '2011-11-11 11:11:11' - INTERVAL '2 days' AS TIMESTAMP):timestamp>
+-- !query output
+2011-11-09 11:11:11
+
+
+-- !query
+select date'2011-11-11 11:11:11' + interval '2' second
+-- !query schema
+struct<CAST(CAST(DATE '2011-11-11' AS TIMESTAMP) + INTERVAL '2 seconds' AS DATE):date>
+-- !query output
+2011-11-11
+
+
+-- !query
+select date'2011-11-11 11:11:11' - interval '2' second
+-- !query schema
+struct<CAST(CAST(DATE '2011-11-11' AS TIMESTAMP) - INTERVAL '2 seconds' AS DATE):date>
+-- !query output
+2011-11-10
+
+
+-- !query
+select '2011-11-11' - interval '2' day
+-- !query schema
+struct<CAST(CAST(2011-11-11 AS TIMESTAMP) - INTERVAL '2 days' AS STRING):string>
+-- !query output
+2011-11-09 00:00:00
+
+
+-- !query
+select '2011-11-11 11:11:11' - interval '2' second
+-- !query schema
+struct<CAST(CAST(2011-11-11 11:11:11 AS TIMESTAMP) - INTERVAL '2 seconds' AS STRING):string>
+-- !query output
+2011-11-11 11:11:09
+
+
+-- !query
+select '1' - interval '2' second
+-- !query schema
+struct<CAST(CAST(1 AS TIMESTAMP) - INTERVAL '2 seconds' AS STRING):string>
+-- !query output
+NULL
+
+
+-- !query
+select 1 - interval '2' second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '1 - INTERVAL '2 seconds'' due to data type mismatch: argument 1 requires timestamp type, however, '1' is of int type.; line 1 pos 7
+
+
+-- !query
+select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678'
+-- !query schema
+struct<subtracttimestamps(CAST(DATE '2020-01-01' AS TIMESTAMP), TIMESTAMP '2019-10-06 10:11:12.345678'):interval>
+-- !query output
+2078 hours 48 minutes 47.654322 seconds
+
+
+-- !query
+select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'
+-- !query schema
+struct<subtracttimestamps(TIMESTAMP '2019-10-06 10:11:12.345678', CAST(DATE '2020-01-01' AS TIMESTAMP)):interval>
+-- !query output
+-2078 hours -48 minutes -47.654322 seconds
+
+
+-- !query
+select timestamp'2019-10-06 10:11:12.345678' - null
+-- !query schema
+struct<subtracttimestamps(TIMESTAMP '2019-10-06 10:11:12.345678', CAST(NULL AS TIMESTAMP)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select null - timestamp'2019-10-06 10:11:12.345678'
+-- !query schema
+struct<subtracttimestamps(CAST(NULL AS TIMESTAMP), TIMESTAMP '2019-10-06 10:11:12.345678'):interval>
+-- !query output
+NULL
+
+
+-- !query
+select date_add('2011-11-11', 1Y)
+-- !query schema
+struct<date_add(CAST(2011-11-11 AS DATE), 1):date>
+-- !query output
+2011-11-12
+
+
+-- !query
+select date_add('2011-11-11', 1S)
+-- !query schema
+struct<date_add(CAST(2011-11-11 AS DATE), 1):date>
+-- !query output
+2011-11-12
+
+
+-- !query
+select date_add('2011-11-11', 1)
+-- !query schema
+struct<date_add(CAST(2011-11-11 AS DATE), 1):date>
+-- !query output
+2011-11-12
+
+
+-- !query
+select date_add('2011-11-11', 1L)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 1L)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1L' is of bigint type.; line 1 pos 7
+
+
+-- !query
+select date_add('2011-11-11', 1.0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 1.0BD)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+
+
+-- !query
+select date_add('2011-11-11', 1E1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+
+
+-- !query
+select date_add('2011-11-11', '1')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'date_add(CAST('2011-11-11' AS DATE), '1')' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, ''1'' is of string type.; line 1 pos 7
+
+
+-- !query
+select date_add(date'2011-11-11', 1)
+-- !query schema
+struct<date_add(DATE '2011-11-11', 1):date>
+-- !query output
+2011-11-12
+
+
+-- !query
+select date_add(timestamp'2011-11-11', 1)
+-- !query schema
+struct<date_add(CAST(TIMESTAMP '2011-11-11 00:00:00' AS DATE), 1):date>
+-- !query output
+2011-11-12
+
+
+-- !query
+select date_sub(date'2011-11-11', 1)
+-- !query schema
+struct<date_sub(DATE '2011-11-11', 1):date>
+-- !query output
+2011-11-10
+
+
+-- !query
+select date_sub(timestamp'2011-11-11', 1)
+-- !query schema
+struct<date_sub(CAST(TIMESTAMP '2011-11-11 00:00:00' AS DATE), 1):date>
+-- !query output
+2011-11-10
+
+
+-- !query
+select date_sub(null, 1)
+-- !query schema
+struct<date_sub(CAST(NULL AS DATE), 1):date>
+-- !query output
+NULL
+
+
+-- !query
+select date_sub(date'2011-11-11', null)
+-- !query schema
+struct<date_sub(DATE '2011-11-11', CAST(NULL AS INT)):date>
+-- !query output
+NULL
+
+
+-- !query
+select date'2011-11-11' + 1E1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'date_add(DATE '2011-11-11', 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+
+
+-- !query
+select null + date '2001-09-28'
+-- !query schema
+struct<date_add(DATE '2001-09-28', CAST(NULL AS INT)):date>
+-- !query output
+NULL
+
+
+-- !query
+select date '2001-09-28' + 7Y
+-- !query schema
 struct<date_add(DATE '2001-09-28', 7):date>
--- !query 11 output
+-- !query output
 2001-10-05
 
 
--- !query 12
-select 7 + date '2001-09-28'
--- !query 12 schema
+-- !query
+select 7S + date '2001-09-28'
+-- !query schema
 struct<date_add(DATE '2001-09-28', 7):date>
--- !query 12 output
+-- !query output
 2001-10-05
 
 
--- !query 13
+-- !query
 select date '2001-10-01' - 7
--- !query 13 schema
+-- !query schema
 struct<date_sub(DATE '2001-10-01', 7):date>
--- !query 13 output
+-- !query output
 2001-09-24
 
 
--- !query 14
+-- !query
+select date '2001-09-28' + null
+-- !query schema
+struct<date_add(DATE '2001-09-28', CAST(NULL AS INT)):date>
+-- !query output
+NULL
+
+
+-- !query
+select date '2001-09-28' - null
+-- !query schema
+struct<date_sub(DATE '2001-09-28', CAST(NULL AS INT)):date>
+-- !query output
+NULL
+
+
+-- !query
+select null - date '2019-10-06'
+-- !query schema
+struct<subtractdates(CAST(NULL AS DATE), DATE '2019-10-06'):interval>
+-- !query output
+NULL
+
+
+-- !query
 select date '2001-10-01' - date '2001-09-28'
--- !query 14 schema
-struct<datediff(DATE '2001-10-01', DATE '2001-09-28'):int>
--- !query 14 output
-3
+-- !query schema
+struct<subtractdates(DATE '2001-10-01', DATE '2001-09-28'):interval>
+-- !query output
+3 days
diff --git a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
index 217233bfad378..72e46ef493a5d 100644
--- a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
@@ -1,458 +1,335 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 54
+-- Number of queries: 40
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1.0 as a, 0.0 as b
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 select a / b from t
--- !query 1 schema
+-- !query schema
 struct<(CAST(a AS DECIMAL(2,1)) / CAST(b AS DECIMAL(2,1))):decimal(8,6)>
--- !query 1 output
+-- !query output
 NULL
 
 
--- !query 2
+-- !query
 select a % b from t
--- !query 2 schema
+-- !query schema
 struct<(CAST(a AS DECIMAL(2,1)) % CAST(b AS DECIMAL(2,1))):decimal(1,1)>
--- !query 2 output
+-- !query output
 NULL
 
 
--- !query 3
+-- !query
 select pmod(a, b) from t
--- !query 3 schema
+-- !query schema
 struct<pmod(CAST(a AS DECIMAL(2,1)), CAST(b AS DECIMAL(2,1))):decimal(1,1)>
--- !query 3 output
+-- !query output
 NULL
 
 
--- !query 4
+-- !query
 create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
   (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 select id, a+b, a-b, a*b, a/b from decimals_test order by id
--- !query 6 schema
+-- !query schema
 struct<id:int,(a + b):decimal(38,17),(a - b):decimal(38,17),(a * b):decimal(38,6),(a / b):decimal(38,6)>
--- !query 6 output
-1	1099	-899	99900	0.1001
-2	24690.246	0	152402061.885129	1
-3	1234.2234567891011	-1233.9765432108989	152.358023	0.0001
+-- !query output
+1	1099.00000000000000000	-899.00000000000000000	99900.000000	0.100100
+2	24690.24600000000000000	0.00000000000000000	152402061.885129	1.000000
+3	1234.22345678910110000	-1233.97654321089890000	152.358023	0.000100
 4	123456789123456790.12345678912345679	123456789123456787.87654321087654321	138698367904130467.515623	109890109097814272.043109
 
 
--- !query 7
+-- !query
 select id, a*10, b/10 from decimals_test order by id
--- !query 7 schema
+-- !query schema
 struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,15),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18)>
--- !query 7 output
-1	1000	99.9
-2	123451.23	1234.5123
-3	1.234567891011	123.41
-4	1234567891234567890	0.112345678912345679
+-- !query output
+1	1000.000000000000000	99.900000000000000000
+2	123451.230000000000000	1234.512300000000000000
+3	1.234567891011000	123.410000000000000000
+4	1234567891234567890.000000000000000	0.112345678912345679
 
 
--- !query 8
+-- !query
 select 10.3 * 3.0
--- !query 8 schema
+-- !query schema
 struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
--- !query 8 output
-30.9
+-- !query output
+30.90
 
 
--- !query 9
+-- !query
 select 10.3000 * 3.0
--- !query 9 schema
+-- !query schema
 struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
--- !query 9 output
-30.9
+-- !query output
+30.90000
 
 
--- !query 10
+-- !query
 select 10.30000 * 30.0
--- !query 10 schema
+-- !query schema
 struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
--- !query 10 output
-309
+-- !query output
+309.000000
 
 
--- !query 11
+-- !query
 select 10.300000000000000000 * 3.000000000000000000
--- !query 11 schema
+-- !query schema
 struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,34)>
--- !query 11 output
-30.9
+-- !query output
+30.9000000000000000000000000000000000
 
 
--- !query 12
+-- !query
 select 10.300000000000000000 * 3.0000000000000000000
--- !query 12 schema
+-- !query schema
 struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,34)>
--- !query 12 output
-30.9
+-- !query output
+30.9000000000000000000000000000000000
 
 
--- !query 13
+-- !query
 select 2.35E10 * 1.0
--- !query 13 schema
-struct<(CAST(2.35E+10 AS DECIMAL(12,1)) * CAST(1.0 AS DECIMAL(12,1))):decimal(6,-7)>
--- !query 13 output
-23500000000
+-- !query schema
+struct<(2.35E10 * CAST(1.0 AS DOUBLE)):double>
+-- !query output
+2.35E10
 
 
--- !query 14
-select (5e36 + 0.1) + 5e36
--- !query 14 schema
-struct<(CAST((CAST(5E+36 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5E+36 AS DECIMAL(38,1))):decimal(38,1)>
--- !query 14 output
+-- !query
+select (5e36BD + 0.1) + 5e36BD
+-- !query schema
+struct<(CAST((CAST(5000000000000000000000000000000000000 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5000000000000000000000000000000000000 AS DECIMAL(38,1))):decimal(38,1)>
+-- !query output
 NULL
 
 
--- !query 15
-select (-4e36 - 0.1) - 7e36
--- !query 15 schema
-struct<(CAST((CAST(-4E+36 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7E+36 AS DECIMAL(38,1))):decimal(38,1)>
--- !query 15 output
+-- !query
+select (-4e36BD - 0.1) - 7e36BD
+-- !query schema
+struct<(CAST((CAST(-4000000000000000000000000000000000000 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7000000000000000000000000000000000000 AS DECIMAL(38,1))):decimal(38,1)>
+-- !query output
 NULL
 
 
--- !query 16
+-- !query
 select 12345678901234567890.0 * 12345678901234567890.0
--- !query 16 schema
+-- !query schema
 struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
--- !query 16 output
+-- !query output
 NULL
 
 
--- !query 17
-select 1e35 / 0.1
--- !query 17 schema
-struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,6)>
--- !query 17 output
+-- !query
+select 1e35BD / 0.1
+-- !query schema
+struct<(CAST(100000000000000000000000000000000000 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,6)>
+-- !query output
 NULL
 
 
--- !query 18
-select 1.2345678901234567890E30 * 1.2345678901234567890E25
--- !query 18 schema
-struct<(CAST(1.2345678901234567890E+30 AS DECIMAL(25,-6)) * CAST(1.2345678901234567890E+25 AS DECIMAL(25,-6))):decimal(38,-17)>
--- !query 18 output
+-- !query
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD
+-- !query schema
+struct<(CAST(1234567890123456789000000000000 AS DECIMAL(31,0)) * CAST(12345678901234567890000000 AS DECIMAL(31,0))):decimal(38,0)>
+-- !query output
 NULL
 
 
--- !query 19
+-- !query
 select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
--- !query 19 schema
+-- !query schema
 struct<(CAST(12345678912345678912345678912.1234567 AS DECIMAL(38,6)) + CAST(9999999999999999999999999999999.12345 AS DECIMAL(38,6))):decimal(38,6)>
--- !query 19 output
+-- !query output
 10012345678912345678912345678911.246907
 
 
--- !query 20
+-- !query
 select 123456789123456789.1234567890 * 1.123456789123456789
--- !query 20 schema
+-- !query schema
 struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,18)>
--- !query 20 output
+-- !query output
 138698367904130467.654320988515622621
 
 
--- !query 21
+-- !query
 select 12345678912345.123456789123 / 0.000000012345678
--- !query 21 schema
+-- !query schema
 struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,9)>
--- !query 21 output
+-- !query output
 1000000073899961059796.725866332
 
 
--- !query 22
+-- !query
 set spark.sql.decimalOperations.allowPrecisionLoss=false
--- !query 22 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 22 output
+-- !query output
 spark.sql.decimalOperations.allowPrecisionLoss	false
 
 
--- !query 23
+-- !query
 select id, a+b, a-b, a*b, a/b from decimals_test order by id
--- !query 23 schema
+-- !query schema
 struct<id:int,(a + b):decimal(38,18),(a - b):decimal(38,18),(a * b):decimal(38,36),(a / b):decimal(38,18)>
--- !query 23 output
-1	1099	-899	NULL	0.1001001001001001
-2	24690.246	0	NULL	1
-3	1234.2234567891011	-1233.9765432108989	NULL	0.000100037913541123
+-- !query output
+1	1099.000000000000000000	-899.000000000000000000	NULL	0.100100100100100100
+2	24690.246000000000000000	0.000000000000000000	NULL	1.000000000000000000
+3	1234.223456789101100000	-1233.976543210898900000	NULL	0.000100037913541123
 4	123456789123456790.123456789123456789	123456789123456787.876543210876543211	NULL	109890109097814272.043109406191131436
 
 
--- !query 24
+-- !query
 select id, a*10, b/10 from decimals_test order by id
--- !query 24 schema
+-- !query schema
 struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,19)>
--- !query 24 output
-1	1000	99.9
-2	123451.23	1234.5123
-3	1.234567891011	123.41
-4	1234567891234567890	0.1123456789123456789
+-- !query output
+1	1000.000000000000000000	99.9000000000000000000
+2	123451.230000000000000000	1234.5123000000000000000
+3	1.234567891011000000	123.4100000000000000000
+4	1234567891234567890.000000000000000000	0.1123456789123456789
 
 
--- !query 25
+-- !query
 select 10.3 * 3.0
--- !query 25 schema
+-- !query schema
 struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
--- !query 25 output
-30.9
+-- !query output
+30.90
 
 
--- !query 26
+-- !query
 select 10.3000 * 3.0
--- !query 26 schema
+-- !query schema
 struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
--- !query 26 output
-30.9
+-- !query output
+30.90000
 
 
--- !query 27
+-- !query
 select 10.30000 * 30.0
--- !query 27 schema
+-- !query schema
 struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
--- !query 27 output
-309
+-- !query output
+309.000000
 
 
--- !query 28
+-- !query
 select 10.300000000000000000 * 3.000000000000000000
--- !query 28 schema
+-- !query schema
 struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,36)>
--- !query 28 output
-30.9
+-- !query output
+30.900000000000000000000000000000000000
 
 
--- !query 29
+-- !query
 select 10.300000000000000000 * 3.0000000000000000000
--- !query 29 schema
+-- !query schema
 struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,37)>
--- !query 29 output
+-- !query output
 NULL
 
 
--- !query 30
+-- !query
 select 2.35E10 * 1.0
--- !query 30 schema
-struct<(CAST(2.35E+10 AS DECIMAL(12,1)) * CAST(1.0 AS DECIMAL(12,1))):decimal(6,-7)>
--- !query 30 output
-23500000000
+-- !query schema
+struct<(2.35E10 * CAST(1.0 AS DOUBLE)):double>
+-- !query output
+2.35E10
 
 
--- !query 31
-select (5e36 + 0.1) + 5e36
--- !query 31 schema
-struct<(CAST((CAST(5E+36 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5E+36 AS DECIMAL(38,1))):decimal(38,1)>
--- !query 31 output
+-- !query
+select (5e36BD + 0.1) + 5e36BD
+-- !query schema
+struct<(CAST((CAST(5000000000000000000000000000000000000 AS DECIMAL(38,1)) + CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) + CAST(5000000000000000000000000000000000000 AS DECIMAL(38,1))):decimal(38,1)>
+-- !query output
 NULL
 
 
--- !query 32
-select (-4e36 - 0.1) - 7e36
--- !query 32 schema
-struct<(CAST((CAST(-4E+36 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7E+36 AS DECIMAL(38,1))):decimal(38,1)>
--- !query 32 output
+-- !query
+select (-4e36BD - 0.1) - 7e36BD
+-- !query schema
+struct<(CAST((CAST(-4000000000000000000000000000000000000 AS DECIMAL(38,1)) - CAST(0.1 AS DECIMAL(38,1))) AS DECIMAL(38,1)) - CAST(7000000000000000000000000000000000000 AS DECIMAL(38,1))):decimal(38,1)>
+-- !query output
 NULL
 
 
--- !query 33
+-- !query
 select 12345678901234567890.0 * 12345678901234567890.0
--- !query 33 schema
+-- !query schema
 struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
--- !query 33 output
+-- !query output
 NULL
 
 
--- !query 34
-select 1e35 / 0.1
--- !query 34 schema
-struct<(CAST(1E+35 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,3)>
--- !query 34 output
+-- !query
+select 1e35BD / 0.1
+-- !query schema
+struct<(CAST(100000000000000000000000000000000000 AS DECIMAL(37,1)) / CAST(0.1 AS DECIMAL(37,1))):decimal(38,3)>
+-- !query output
 NULL
 
 
--- !query 35
-select 1.2345678901234567890E30 * 1.2345678901234567890E25
--- !query 35 schema
-struct<(CAST(1.2345678901234567890E+30 AS DECIMAL(25,-6)) * CAST(1.2345678901234567890E+25 AS DECIMAL(25,-6))):decimal(38,-17)>
--- !query 35 output
+-- !query
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD
+-- !query schema
+struct<(CAST(1234567890123456789000000000000 AS DECIMAL(31,0)) * CAST(12345678901234567890000000 AS DECIMAL(31,0))):decimal(38,0)>
+-- !query output
 NULL
 
 
--- !query 36
+-- !query
 select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
--- !query 36 schema
+-- !query schema
 struct<(CAST(12345678912345678912345678912.1234567 AS DECIMAL(38,7)) + CAST(9999999999999999999999999999999.12345 AS DECIMAL(38,7))):decimal(38,7)>
--- !query 36 output
+-- !query output
 NULL
 
 
--- !query 37
+-- !query
 select 123456789123456789.1234567890 * 1.123456789123456789
--- !query 37 schema
+-- !query schema
 struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,28)>
--- !query 37 output
+-- !query output
 NULL
 
 
--- !query 38
+-- !query
 select 12345678912345.123456789123 / 0.000000012345678
--- !query 38 schema
+-- !query schema
 struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,18)>
--- !query 38 output
+-- !query output
 NULL
 
 
--- !query 39
-set spark.sql.decimalOperations.nullOnOverflow=false
--- !query 39 schema
-struct<key:string,value:string>
--- !query 39 output
-spark.sql.decimalOperations.nullOnOverflow	false
-
-
--- !query 40
-select id, a*10, b/10 from decimals_test order by id
--- !query 40 schema
-struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,19)>
--- !query 40 output
-1	1000	99.9
-2	123451.23	1234.5123
-3	1.234567891011	123.41
-4	1234567891234567890	0.1123456789123456789
-
-
--- !query 41
-select 10.3 * 3.0
--- !query 41 schema
-struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
--- !query 41 output
-30.9
-
-
--- !query 42
-select 10.3000 * 3.0
--- !query 42 schema
-struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
--- !query 42 output
-30.9
-
-
--- !query 43
-select 10.30000 * 30.0
--- !query 43 schema
-struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
--- !query 43 output
-309
-
-
--- !query 44
-select 10.300000000000000000 * 3.000000000000000000
--- !query 44 schema
-struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,36)>
--- !query 44 output
-30.9
-
-
--- !query 45
-select 10.300000000000000000 * 3.0000000000000000000
--- !query 45 schema
-struct<>
--- !query 45 output
-java.lang.ArithmeticException
-Decimal(expanded,30.900000000000000000000000000000000000,38,36}) cannot be represented as Decimal(38, 37).
-
-
--- !query 46
-select (5e36 + 0.1) + 5e36
--- !query 46 schema
-struct<>
--- !query 46 output
-java.lang.ArithmeticException
-Decimal(expanded,10000000000000000000000000000000000000.1,39,1}) cannot be represented as Decimal(38, 1).
-
-
--- !query 47
-select (-4e36 - 0.1) - 7e36
--- !query 47 schema
-struct<>
--- !query 47 output
-java.lang.ArithmeticException
-Decimal(expanded,-11000000000000000000000000000000000000.1,39,1}) cannot be represented as Decimal(38, 1).
-
-
--- !query 48
-select 12345678901234567890.0 * 12345678901234567890.0
--- !query 48 schema
-struct<>
--- !query 48 output
-java.lang.ArithmeticException
-Decimal(expanded,1.5241578753238836750190519987501905210E+38,38,-1}) cannot be represented as Decimal(38, 2).
-
-
--- !query 49
-select 1e35 / 0.1
--- !query 49 schema
-struct<>
--- !query 49 output
-java.lang.ArithmeticException
-Decimal(expanded,1000000000000000000000000000000000000,37,0}) cannot be represented as Decimal(38, 3).
-
-
--- !query 50
-select 123456789123456789.1234567890 * 1.123456789123456789
--- !query 50 schema
-struct<>
--- !query 50 output
-java.lang.ArithmeticException
-Decimal(expanded,138698367904130467.65432098851562262075,38,20}) cannot be represented as Decimal(38, 28).
-
-
--- !query 51
-select 123456789123456789.1234567890 * 1.123456789123456789
--- !query 51 schema
-struct<>
--- !query 51 output
-java.lang.ArithmeticException
-Decimal(expanded,138698367904130467.65432098851562262075,38,20}) cannot be represented as Decimal(38, 28).
-
-
--- !query 52
-select 12345678912345.123456789123 / 0.000000012345678
--- !query 52 schema
-struct<>
--- !query 52 output
-java.lang.ArithmeticException
-Decimal(expanded,1000000073899961059796.7258663315210392,38,16}) cannot be represented as Decimal(38, 18).
-
-
--- !query 53
+-- !query
 drop table decimals_test
--- !query 53 schema
+-- !query schema
 struct<>
--- !query 53 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out
index 17dd317f63b70..24927c34c57b4 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out
@@ -2,47 +2,47 @@
 -- Number of queries: 15
 
 
--- !query 0
+-- !query
 CREATE TABLE t (key STRING, value STRING, ds STRING, hr INT) USING parquet
     PARTITIONED BY (ds, hr)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 INSERT INTO TABLE t PARTITION (ds='2017-08-01', hr=10)
 VALUES ('k1', 100), ('k2', 200), ('k3', 300)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 INSERT INTO TABLE t PARTITION (ds='2017-08-01', hr=11)
 VALUES ('k1', 101), ('k2', 201), ('k3', 301), ('k4', 401)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 INSERT INTO TABLE t PARTITION (ds='2017-09-01', hr=5)
 VALUES ('k1', 102), ('k2', 202)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 DESC EXTENDED t PARTITION (ds='2017-08-01', hr=10)
--- !query 4 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 4 output
+-- !query output
 key                 	string              	                    
 value               	string              	                    
 ds                  	string              	                    
@@ -56,27 +56,27 @@ hr                  	int
 Database            	default             	                    
 Table               	t                   	                    
 Partition Values    	[ds=2017-08-01, hr=10]	                    
-Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10	                    
+Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=10	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
                     	                    	                    
 # Storage Information	                    	                    
-Location [not included in comparison]sql/core/spark-warehouse/t
+Location [not included in comparison]/{warehouse_dir}/t
 
 
--- !query 5
+-- !query
 ANALYZE TABLE t PARTITION (ds='2017-08-01', hr=10) COMPUTE STATISTICS
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 DESC EXTENDED t PARTITION (ds='2017-08-01', hr=10)
--- !query 6 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 6 output
+-- !query output
 key                 	string              	                    
 value               	string              	                    
 ds                  	string              	                    
@@ -90,28 +90,28 @@ hr                  	int
 Database            	default             	                    
 Table               	t                   	                    
 Partition Values    	[ds=2017-08-01, hr=10]	                    
-Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10	                    
+Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=10	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
 Partition Statistics	[not included in comparison] bytes, 3 rows  	                    
                     	                    	                    
 # Storage Information	                    	                    
-Location [not included in comparison]sql/core/spark-warehouse/t
+Location [not included in comparison]/{warehouse_dir}/t
 
 
--- !query 7
+-- !query
 ANALYZE TABLE t PARTITION (ds='2017-08-01') COMPUTE STATISTICS
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 DESC EXTENDED t PARTITION (ds='2017-08-01', hr=10)
--- !query 8 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 8 output
+-- !query output
 key                 	string              	                    
 value               	string              	                    
 ds                  	string              	                    
@@ -125,20 +125,20 @@ hr                  	int
 Database            	default             	                    
 Table               	t                   	                    
 Partition Values    	[ds=2017-08-01, hr=10]	                    
-Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10	                    
+Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=10	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
 Partition Statistics	[not included in comparison] bytes, 3 rows  	                    
                     	                    	                    
 # Storage Information	                    	                    
-Location [not included in comparison]sql/core/spark-warehouse/t
+Location [not included in comparison]/{warehouse_dir}/t
 
 
--- !query 9
+-- !query
 DESC EXTENDED t PARTITION (ds='2017-08-01', hr=11)
--- !query 9 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 9 output
+-- !query output
 key                 	string              	                    
 value               	string              	                    
 ds                  	string              	                    
@@ -152,28 +152,28 @@ hr                  	int
 Database            	default             	                    
 Table               	t                   	                    
 Partition Values    	[ds=2017-08-01, hr=11]	                    
-Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=11	                    
+Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=11	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
 Partition Statistics	[not included in comparison] bytes, 4 rows  	                    
                     	                    	                    
 # Storage Information	                    	                    
-Location [not included in comparison]sql/core/spark-warehouse/t
+Location [not included in comparison]/{warehouse_dir}/t
 
 
--- !query 10
+-- !query
 ANALYZE TABLE t PARTITION (ds, hr) COMPUTE STATISTICS
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 DESC EXTENDED t PARTITION (ds='2017-08-01', hr=10)
--- !query 11 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 11 output
+-- !query output
 key                 	string              	                    
 value               	string              	                    
 ds                  	string              	                    
@@ -187,20 +187,20 @@ hr                  	int
 Database            	default             	                    
 Table               	t                   	                    
 Partition Values    	[ds=2017-08-01, hr=10]	                    
-Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=10	                    
+Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=10	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
 Partition Statistics	[not included in comparison] bytes, 3 rows  	                    
                     	                    	                    
 # Storage Information	                    	                    
-Location [not included in comparison]sql/core/spark-warehouse/t
+Location [not included in comparison]/{warehouse_dir}/t
 
 
--- !query 12
+-- !query
 DESC EXTENDED t PARTITION (ds='2017-08-01', hr=11)
--- !query 12 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 12 output
+-- !query output
 key                 	string              	                    
 value               	string              	                    
 ds                  	string              	                    
@@ -214,20 +214,20 @@ hr                  	int
 Database            	default             	                    
 Table               	t                   	                    
 Partition Values    	[ds=2017-08-01, hr=11]	                    
-Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-08-01/hr=11	                    
+Location [not included in comparison]/{warehouse_dir}/t/ds=2017-08-01/hr=11	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
 Partition Statistics	[not included in comparison] bytes, 4 rows  	                    
                     	                    	                    
 # Storage Information	                    	                    
-Location [not included in comparison]sql/core/spark-warehouse/t
+Location [not included in comparison]/{warehouse_dir}/t
 
 
--- !query 13
+-- !query
 DESC EXTENDED t PARTITION (ds='2017-09-01', hr=5)
--- !query 13 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 13 output
+-- !query output
 key                 	string              	                    
 value               	string              	                    
 ds                  	string              	                    
@@ -241,18 +241,18 @@ hr                  	int
 Database            	default             	                    
 Table               	t                   	                    
 Partition Values    	[ds=2017-09-01, hr=5]	                    
-Location [not included in comparison]sql/core/spark-warehouse/t/ds=2017-09-01/hr=5	                    
+Location [not included in comparison]/{warehouse_dir}/t/ds=2017-09-01/hr=5	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
 Partition Statistics	[not included in comparison] bytes, 2 rows  	                    
                     	                    	                    
 # Storage Information	                    	                    
-Location [not included in comparison]sql/core/spark-warehouse/t
+Location [not included in comparison]/{warehouse_dir}/t
 
 
--- !query 14
+-- !query
 DROP TABLE t
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
index e41534681dc91..6b16aba268f50 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -2,114 +2,114 @@
 -- Number of queries: 19
 
 
--- !query 0
+-- !query
 CREATE table  desc_temp1 (key int COMMENT 'column_comment', val string) USING PARQUET
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE table  desc_temp2 (key int, val string) USING PARQUET
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 DESC SELECT key, key + 1 as plusone FROM desc_temp1
--- !query 2 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 2 output
+-- !query output
 key                 	int                 	column_comment      
 plusone             	int
 
 
--- !query 3
+-- !query
 DESC QUERY SELECT * FROM desc_temp2
--- !query 3 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 3 output
+-- !query output
 key                 	int                 	                    
 val                 	string
 
 
--- !query 4
+-- !query
 DESC SELECT key, COUNT(*) as count FROM desc_temp1 group by key
--- !query 4 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 4 output
+-- !query output
 key                 	int                 	column_comment      
 count               	bigint
 
 
--- !query 5
+-- !query
 DESC SELECT 10.00D as col1
--- !query 5 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 5 output
+-- !query output
 col1                	double
 
 
--- !query 6
+-- !query
 DESC QUERY SELECT key FROM desc_temp1 UNION ALL select CAST(1 AS DOUBLE)
--- !query 6 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 6 output
+-- !query output
 key                 	double
 
 
--- !query 7
+-- !query
 DESC QUERY VALUES(1.00D, 'hello') as tab1(col1, col2)
--- !query 7 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 7 output
+-- !query output
 col1                	double              	                    
 col2                	string
 
 
--- !query 8
+-- !query
 DESC QUERY FROM desc_temp1 a SELECT *
--- !query 8 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 8 output
+-- !query output
 key                 	int                 	column_comment      
 val                 	string
 
 
--- !query 9
+-- !query
 DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
--- !query 9 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 9 output
+-- !query output
 col1                	string
 
 
--- !query 10
+-- !query
 DESCRIBE QUERY WITH s AS (SELECT * from desc_temp1) SELECT * FROM s
--- !query 10 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 10 output
+-- !query output
 key                 	int                 	column_comment      
 val                 	string
 
 
--- !query 11
+-- !query
 DESCRIBE SELECT * FROM (FROM desc_temp2 select * select *)
--- !query 11 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 11 output
+-- !query output
 key                 	int                 	                    
 val                 	string
 
 
--- !query 12
+-- !query
 DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21)
@@ -119,11 +119,11 @@ DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
 ---------------------^^^
 
 
--- !query 13
+-- !query
 DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21)
@@ -133,14 +133,14 @@ DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
 ---------------------^^^
 
 
--- !query 14
+-- !query
 DESCRIBE
    FROM desc_temp1 a
      insert into desc_temp1 select *
      insert into desc_temp2 select *
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 mismatched input 'insert' expecting {'MAP', 'REDUCE', 'SELECT'}(line 3, pos 5)
@@ -153,21 +153,21 @@ DESCRIBE
      insert into desc_temp2 select *
 
 
--- !query 15
+-- !query
 EXPLAIN DESC QUERY SELECT * FROM desc_temp2 WHERE key > 0
--- !query 15 schema
+-- !query schema
 struct<plan:string>
--- !query 15 output
+-- !query output
 == Physical Plan ==
 Execute DescribeQueryCommand
    +- DescribeQueryCommand SELECT * FROM desc_temp2 WHERE key > 0
 
 
--- !query 16
+-- !query
 EXPLAIN EXTENDED DESC WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
--- !query 16 schema
+-- !query schema
 struct<plan:string>
--- !query 16 output
+-- !query output
 == Parsed Logical Plan ==
 DescribeQueryCommand WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
 
@@ -183,17 +183,17 @@ Execute DescribeQueryCommand
    +- DescribeQueryCommand WITH s AS (SELECT 'hello' as col1) SELECT * FROM s
 
 
--- !query 17
+-- !query
 DROP TABLE desc_temp1
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 
 
 
--- !query 18
+-- !query
 DROP TABLE desc_temp2
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
index 7873085da5069..3029fa8e83077 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
@@ -2,19 +2,19 @@
 -- Number of queries: 12
 
 
--- !query 0
+-- !query
 CREATE TABLE table_with_comment (a STRING, b INT, c STRING, d STRING) USING parquet COMMENT 'added'
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 DESC FORMATTED table_with_comment
--- !query 1 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 1 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -29,22 +29,22 @@ Created By [not included in comparison]
 Type                	MANAGED             	                    
 Provider            	parquet             	                    
 Comment             	added               	                    
-Location [not included in comparison]sql/core/spark-warehouse/table_with_comment
+Location [not included in comparison]/{warehouse_dir}/table_with_comment
 
 
--- !query 2
+-- !query
 ALTER TABLE table_with_comment SET TBLPROPERTIES("comment"= "modified comment", "type"= "parquet")
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 DESC FORMATTED table_with_comment
--- !query 3 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 3 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -60,30 +60,30 @@ Type                	MANAGED
 Provider            	parquet             	                    
 Comment             	modified comment    	                    
 Table Properties    	[type=parquet]      	                    
-Location [not included in comparison]sql/core/spark-warehouse/table_with_comment
+Location [not included in comparison]/{warehouse_dir}/table_with_comment
 
 
--- !query 4
+-- !query
 DROP TABLE table_with_comment
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 CREATE TABLE table_comment (a STRING, b INT) USING parquet
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 DESC FORMATTED table_comment
--- !query 6 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 6 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
                     	                    	                    
@@ -95,22 +95,22 @@ Last Access [not included in comparison]
 Created By [not included in comparison]
 Type                	MANAGED             	                    
 Provider            	parquet             	                    
-Location [not included in comparison]sql/core/spark-warehouse/table_comment
+Location [not included in comparison]/{warehouse_dir}/table_comment
 
 
--- !query 7
+-- !query
 ALTER TABLE table_comment SET TBLPROPERTIES(comment = "added comment")
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 DESC formatted table_comment
--- !query 8 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 8 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
                     	                    	                    
@@ -123,22 +123,22 @@ Created By [not included in comparison]
 Type                	MANAGED             	                    
 Provider            	parquet             	                    
 Comment             	added comment       	                    
-Location [not included in comparison]sql/core/spark-warehouse/table_comment
+Location [not included in comparison]/{warehouse_dir}/table_comment
 
 
--- !query 9
+-- !query
 ALTER TABLE table_comment UNSET TBLPROPERTIES IF EXISTS ('comment')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 DESC FORMATTED table_comment
--- !query 10 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 10 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
                     	                    	                    
@@ -150,12 +150,12 @@ Last Access [not included in comparison]
 Created By [not included in comparison]
 Type                	MANAGED             	                    
 Provider            	parquet             	                    
-Location [not included in comparison]sql/core/spark-warehouse/table_comment
+Location [not included in comparison]/{warehouse_dir}/table_comment
 
 
--- !query 11
+-- !query
 DROP TABLE table_comment
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
index 6ef8af6574e98..ae9240ec588da 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
@@ -1,30 +1,30 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 23
+-- Number of queries: 28
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment') USING PARQUET
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 DESC desc_col_temp_view key
--- !query 1 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 1 output
+-- !query output
 col_name	key
 data_type	int
 comment	column_comment
 
 
--- !query 2
+-- !query
 DESC EXTENDED desc_col_temp_view key
--- !query 2 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 2 output
+-- !query output
 col_name	key
 data_type	int
 comment	column_comment
@@ -37,11 +37,11 @@ max_col_len	NULL
 histogram	NULL
 
 
--- !query 3
+-- !query
 DESC FORMATTED desc_col_temp_view key
--- !query 3 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 3 output
+-- !query output
 col_name	key
 data_type	int
 comment	column_comment
@@ -54,11 +54,11 @@ max_col_len	NULL
 histogram	NULL
 
 
--- !query 4
+-- !query
 DESC FORMATTED desc_col_temp_view desc_col_temp_view.key
--- !query 4 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 4 output
+-- !query output
 col_name	key
 data_type	int
 comment	column_comment
@@ -71,46 +71,46 @@ max_col_len	NULL
 histogram	NULL
 
 
--- !query 5
+-- !query
 DESC desc_col_temp_view key1
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Column key1 does not exist;
 
 
--- !query 6
+-- !query
 CREATE TABLE desc_col_table (key int COMMENT 'column_comment') USING PARQUET
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 DESC desc_col_table key
--- !query 8 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 8 output
+-- !query output
 col_name	key
 data_type	int
 comment	column_comment
 
 
--- !query 9
+-- !query
 DESC EXTENDED desc_col_table key
--- !query 9 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 9 output
+-- !query output
 col_name	key
 data_type	int
 comment	column_comment
@@ -123,11 +123,11 @@ max_col_len	4
 histogram	NULL
 
 
--- !query 10
+-- !query
 DESC FORMATTED desc_col_table key
--- !query 10 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 10 output
+-- !query output
 col_name	key
 data_type	int
 comment	column_comment
@@ -140,19 +140,19 @@ max_col_len	4
 histogram	NULL
 
 
--- !query 11
+-- !query
 CREATE TABLE desc_complex_col_table (`a.b` int, col struct<x:int, y:string>) USING PARQUET
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 DESC FORMATTED desc_complex_col_table `a.b`
--- !query 12 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 12 output
+-- !query output
 col_name	a.b
 data_type	int
 comment	NULL
@@ -165,11 +165,11 @@ max_col_len	NULL
 histogram	NULL
 
 
--- !query 13
+-- !query
 DESC FORMATTED desc_complex_col_table col
--- !query 13 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 13 output
+-- !query output
 col_name	col
 data_type	struct<x:int,y:string>
 comment	NULL
@@ -182,52 +182,52 @@ max_col_len	NULL
 histogram	NULL
 
 
--- !query 14
+-- !query
 DESC FORMATTED desc_complex_col_table col.x
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 DESC TABLE COLUMN command does not support nested data types: col.x;
 
 
--- !query 15
+-- !query
 SET spark.sql.statistics.histogram.enabled=true
--- !query 15 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 15 output
+-- !query output
 spark.sql.statistics.histogram.enabled	true
 
 
--- !query 16
+-- !query
 SET spark.sql.statistics.histogram.numBins=2
--- !query 16 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 16 output
+-- !query output
 spark.sql.statistics.histogram.numBins	2
 
 
--- !query 17
+-- !query
 INSERT INTO desc_col_table values 1, 2, 3, 4
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 
 
 
--- !query 18
+-- !query
 ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 DESC EXTENDED desc_col_table key
--- !query 19 schema
+-- !query schema
 struct<info_name:string,info_value:string>
--- !query 19 output
+-- !query output
 col_name	key
 data_type	int
 comment	column_comment
@@ -242,25 +242,74 @@ bin_0	lower_bound: 1.0, upper_bound: 2.0, distinct_count: 2
 bin_1	lower_bound: 2.0, upper_bound: 4.0, distinct_count: 2
 
 
--- !query 20
+-- !query
 DROP VIEW desc_col_temp_view
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 DROP TABLE desc_col_table
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
 
 
--- !query 22
+-- !query
 DROP TABLE desc_complex_col_table
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
+
+
+
+-- !query
+CREATE TABLE customer(CName STRING)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO customer VALUES('Maria')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+ANALYZE TABLE customer COMPUTE STATISTICS FOR COLUMNS cname
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC EXTENDED customer cname
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name	cname
+data_type	string
+comment	NULL
+min	NULL
+max	NULL
+num_nulls	0
+distinct_count	1
+avg_col_len	5
+max_col_len	5
+histogram	NULL
+
+
+-- !query
+DROP TABLE customer
+-- !query schema
+struct<>
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index f58bdb5446b64..697e006544acf 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -2,68 +2,68 @@
 -- Number of queries: 41
 
 
--- !query 0
+-- !query
 CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet
   OPTIONS (a '1', b '2')
   PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS
   COMMENT 'table_comment'
   TBLPROPERTIES (t 'test')
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW temp_v AS SELECT * FROM t
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW temp_Data_Source_View
   USING org.apache.spark.sql.sources.DDLScanSource
   OPTIONS (
     From '1',
     To '10',
     Table 'test1')
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE VIEW v AS SELECT * FROM t
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 ALTER TABLE t SET TBLPROPERTIES (e = '3')
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 ALTER TABLE t ADD PARTITION (c='Us', d=1)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 DESCRIBE t
--- !query 6 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 6 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -74,11 +74,11 @@ c                   	string
 d                   	string
 
 
--- !query 7
+-- !query
 DESC default.t
--- !query 7 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 7 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -89,11 +89,11 @@ c                   	string
 d                   	string
 
 
--- !query 8
+-- !query
 DESC TABLE t
--- !query 8 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 8 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -104,11 +104,11 @@ c                   	string
 d                   	string
 
 
--- !query 9
+-- !query
 DESC FORMATTED t
--- !query 9 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 9 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -131,16 +131,16 @@ Bucket Columns      	[`a`]
 Sort Columns        	[`b`]               	                    
 Comment             	table_comment       	                    
 Table Properties    	[t=test, e=3]       	                    
-Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Location [not included in comparison]/{warehouse_dir}/t	                    
 Storage Properties  	[a=1, b=2]          	                    
 Partition Provider  	Catalog
 
 
--- !query 10
+-- !query
 DESC EXTENDED t
--- !query 10 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 10 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -163,24 +163,24 @@ Bucket Columns      	[`a`]
 Sort Columns        	[`b`]               	                    
 Comment             	table_comment       	                    
 Table Properties    	[t=test, e=3]       	                    
-Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Location [not included in comparison]/{warehouse_dir}/t	                    
 Storage Properties  	[a=1, b=2]          	                    
 Partition Provider  	Catalog
 
 
--- !query 11
+-- !query
 ALTER TABLE t UNSET TBLPROPERTIES (e)
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 DESC EXTENDED t
--- !query 12 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 12 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -203,24 +203,24 @@ Bucket Columns      	[`a`]
 Sort Columns        	[`b`]               	                    
 Comment             	table_comment       	                    
 Table Properties    	[t=test]            	                    
-Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Location [not included in comparison]/{warehouse_dir}/t	                    
 Storage Properties  	[a=1, b=2]          	                    
 Partition Provider  	Catalog
 
 
--- !query 13
+-- !query
 ALTER TABLE t UNSET TBLPROPERTIES (comment)
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
+-- !query
 DESC EXTENDED t
--- !query 14 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 14 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -242,16 +242,16 @@ Num Buckets         	2
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
 Table Properties    	[t=test]            	                    
-Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Location [not included in comparison]/{warehouse_dir}/t	                    
 Storage Properties  	[a=1, b=2]          	                    
 Partition Provider  	Catalog
 
 
--- !query 15
+-- !query
 DESC t PARTITION (c='Us', d=1)
--- !query 15 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 15 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -262,11 +262,11 @@ c                   	string
 d                   	string
 
 
--- !query 16
+-- !query
 DESC EXTENDED t PARTITION (c='Us', d=1)
--- !query 16 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 16 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -280,7 +280,7 @@ d                   	string
 Database            	default             	                    
 Table               	t                   	                    
 Partition Values    	[c=Us, d=1]         	                    
-Location [not included in comparison]sql/core/spark-warehouse/t/c=Us/d=1	                    
+Location [not included in comparison]/{warehouse_dir}/t/c=Us/d=1	                    
 Storage Properties  	[a=1, b=2]          	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
@@ -289,15 +289,15 @@ Last Access [not included in comparison]
 Num Buckets         	2                   	                    
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
-Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Location [not included in comparison]/{warehouse_dir}/t	                    
 Storage Properties  	[a=1, b=2]
 
 
--- !query 17
+-- !query
 DESC FORMATTED t PARTITION (c='Us', d=1)
--- !query 17 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 17 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -311,7 +311,7 @@ d                   	string
 Database            	default             	                    
 Table               	t                   	                    
 Partition Values    	[c=Us, d=1]         	                    
-Location [not included in comparison]sql/core/spark-warehouse/t/c=Us/d=1	                    
+Location [not included in comparison]/{warehouse_dir}/t/c=Us/d=1	                    
 Storage Properties  	[a=1, b=2]          	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
@@ -320,35 +320,35 @@ Last Access [not included in comparison]
 Num Buckets         	2                   	                    
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
-Location [not included in comparison]sql/core/spark-warehouse/t	                    
+Location [not included in comparison]/{warehouse_dir}/t	                    
 Storage Properties  	[a=1, b=2]
 
 
--- !query 18
+-- !query
 DESC t PARTITION (c='Us', d=2)
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 Partition not found in table 't' database 'default':
 c -> Us
 d -> 2;
 
 
--- !query 19
+-- !query
 DESC t PARTITION (c='Us')
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`default`.`t`';
 
 
--- !query 20
+-- !query
 DESC t PARTITION (c='Us', d)
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 PARTITION specification is incomplete: `d`(line 1, pos 0)
@@ -358,55 +358,55 @@ DESC t PARTITION (c='Us', d)
 ^^^
 
 
--- !query 21
+-- !query
 DESC temp_v
--- !query 21 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 21 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
 d                   	string
 
 
--- !query 22
+-- !query
 DESC TABLE temp_v
--- !query 22 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 22 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
 d                   	string
 
 
--- !query 23
+-- !query
 DESC FORMATTED temp_v
--- !query 23 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 23 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
 d                   	string
 
 
--- !query 24
+-- !query
 DESC EXTENDED temp_v
--- !query 24 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 24 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
 d                   	string
 
 
--- !query 25
+-- !query
 DESC temp_Data_Source_View
--- !query 25 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 25 output
+-- !query output
 intType             	int                 	test comment test1  
 stringType          	string              	                    
 dateType            	date                	                    
@@ -425,42 +425,42 @@ arrayType           	array<string>
 structType          	struct<f1:string,f2:int>
 
 
--- !query 26
+-- !query
 DESC temp_v PARTITION (c='Us', d=1)
--- !query 26 schema
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 DESC PARTITION is not allowed on a temporary view: temp_v;
 
 
--- !query 27
+-- !query
 DESC v
--- !query 27 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 27 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
 d                   	string
 
 
--- !query 28
+-- !query
 DESC TABLE v
--- !query 28 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 28 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
 d                   	string
 
 
--- !query 29
+-- !query
 DESC FORMATTED v
--- !query 29 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 29 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -475,16 +475,16 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM t     	                    
 View Original Text  	SELECT * FROM t     	                    
-View Default Database	default             	                    
+View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[a, b, c, d]        	                    
-Table Properties    	[view.query.out.col.3=d, view.query.out.col.0=a, view.query.out.numCols=4, view.default.database=default, view.query.out.col.1=b, view.query.out.col.2=c]
+Table Properties    	[view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.catalogAndNamespace.part.1=default]
 
 
--- !query 30
+-- !query
 DESC EXTENDED v
--- !query 30 schema
+-- !query schema
 struct<col_name:string,data_type:string,comment:string>
--- !query 30 output
+-- !query output
 a                   	string              	                    
 b                   	int                 	                    
 c                   	string              	                    
@@ -499,47 +499,48 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM t     	                    
 View Original Text  	SELECT * FROM t     	                    
-View Default Database	default             	                    
+View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[a, b, c, d]        	                    
-Table Properties    	[view.query.out.col.3=d, view.query.out.col.0=a, view.query.out.numCols=4, view.default.database=default, view.query.out.col.1=b, view.query.out.col.2=c]
+Table Properties    	[view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.catalogAndNamespace.part.1=default]
 
 
--- !query 31
+-- !query
 DESC v PARTITION (c='Us', d=1)
--- !query 31 schema
+-- !query schema
 struct<>
--- !query 31 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 DESC PARTITION is not allowed on a view: v;
 
 
--- !query 32
+-- !query
 EXPLAIN DESC t
--- !query 32 schema
+-- !query schema
 struct<plan:string>
--- !query 32 output
+-- !query output
 == Physical Plan ==
 Execute DescribeTableCommand
    +- DescribeTableCommand `t`, false
 
 
--- !query 33
+-- !query
 EXPLAIN DESC EXTENDED t
--- !query 33 schema
+-- !query schema
 struct<plan:string>
--- !query 33 output
+-- !query output
 == Physical Plan ==
 Execute DescribeTableCommand
    +- DescribeTableCommand `t`, true
 
 
--- !query 34
+-- !query
 EXPLAIN EXTENDED DESC t
--- !query 34 schema
+-- !query schema
 struct<plan:string>
--- !query 34 output
+-- !query output
 == Parsed Logical Plan ==
-'DescribeTableStatement [t], false
+'DescribeRelation false
++- 'UnresolvedTableOrView [t]
 
 == Analyzed Logical Plan ==
 col_name: string, data_type: string, comment: string
@@ -553,53 +554,53 @@ Execute DescribeTableCommand
    +- DescribeTableCommand `t`, false
 
 
--- !query 35
+-- !query
 EXPLAIN DESCRIBE t b
--- !query 35 schema
+-- !query schema
 struct<plan:string>
--- !query 35 output
+-- !query output
 == Physical Plan ==
 Execute DescribeColumnCommand
    +- DescribeColumnCommand `t`, [b], false
 
 
--- !query 36
+-- !query
 EXPLAIN DESCRIBE t PARTITION (c='Us', d=2)
--- !query 36 schema
+-- !query schema
 struct<plan:string>
--- !query 36 output
+-- !query output
 == Physical Plan ==
 Execute DescribeTableCommand
    +- DescribeTableCommand `t`, Map(c -> Us, d -> 2), false
 
 
--- !query 37
+-- !query
 DROP TABLE t
--- !query 37 schema
+-- !query schema
 struct<>
--- !query 37 output
+-- !query output
 
 
 
--- !query 38
+-- !query
 DROP VIEW temp_v
--- !query 38 schema
+-- !query schema
 struct<>
--- !query 38 output
+-- !query output
 
 
 
--- !query 39
+-- !query
 DROP VIEW temp_Data_Source_View
--- !query 39 schema
+-- !query schema
 struct<>
--- !query 39 output
+-- !query output
 
 
 
--- !query 40
+-- !query
 DROP VIEW v
--- !query 40 schema
+-- !query schema
 struct<>
--- !query 40 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
index 01091a2f751ce..601ff8f024214 100644
--- a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
@@ -2,25 +2,25 @@
 -- Number of queries: 27
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW tab2 AS SELECT * FROM VALUES
     (1), (2), (2), (3), (5), (5), (null) AS tab2(c1)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW tab3 AS SELECT * FROM VALUES
     (1, 2), 
     (1, 2),
@@ -28,13 +28,13 @@ CREATE TEMPORARY VIEW tab3 AS SELECT * FROM VALUES
     (2, 3),
     (2, 2)
     AS tab3(k, v)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE TEMPORARY VIEW tab4 AS SELECT * FROM VALUES
     (1, 2), 
     (2, 3),
@@ -42,45 +42,45 @@ CREATE TEMPORARY VIEW tab4 AS SELECT * FROM VALUES
     (2, 2),
     (2, 20)
     AS tab4(k, v)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 SELECT * FROM tab1
 EXCEPT ALL
 SELECT * FROM tab2
--- !query 4 schema
+-- !query schema
 struct<c1:int>
--- !query 4 output
+-- !query output
 0
 2
 2
 NULL
 
 
--- !query 5
+-- !query
 SELECT * FROM tab1
 MINUS ALL
 SELECT * FROM tab2
--- !query 5 schema
+-- !query schema
 struct<c1:int>
--- !query 5 output
+-- !query output
 0
 2
 2
 NULL
 
 
--- !query 6
+-- !query
 SELECT * FROM tab1
 EXCEPT ALL
 SELECT * FROM tab2 WHERE c1 IS NOT NULL
--- !query 6 schema
+-- !query schema
 struct<c1:int>
--- !query 6 output
+-- !query output
 0
 2
 2
@@ -88,23 +88,23 @@ NULL
 NULL
 
 
--- !query 7
+-- !query
 SELECT * FROM tab1 WHERE c1 > 5
 EXCEPT ALL
 SELECT * FROM tab2
--- !query 7 schema
+-- !query schema
 struct<c1:int>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 SELECT * FROM tab1
 EXCEPT ALL
 SELECT * FROM tab2 WHERE c1 > 6
--- !query 8 schema
+-- !query schema
 struct<c1:int>
--- !query 8 output
+-- !query output
 0
 1
 2
@@ -116,13 +116,13 @@ NULL
 NULL
 
 
--- !query 9
+-- !query
 SELECT * FROM tab1
 EXCEPT ALL
 SELECT CAST(1 AS BIGINT)
--- !query 9 schema
+-- !query schema
 struct<c1:bigint>
--- !query 9 output
+-- !query output
 0
 2
 2
@@ -133,65 +133,65 @@ NULL
 NULL
 
 
--- !query 10
+-- !query
 SELECT * FROM tab1
 EXCEPT ALL
 SELECT array(1)
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 ExceptAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
 
 
--- !query 11
+-- !query
 SELECT * FROM tab3
 EXCEPT ALL
 SELECT * FROM tab4
--- !query 11 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 11 output
+-- !query output
 1	2
 1	3
 
 
--- !query 12
+-- !query
 SELECT * FROM tab4
 EXCEPT ALL
 SELECT * FROM tab3
--- !query 12 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 12 output
+-- !query output
 2	2
 2	20
 
 
--- !query 13
+-- !query
 SELECT * FROM tab4
 EXCEPT ALL
 SELECT * FROM tab3
 INTERSECT DISTINCT
 SELECT * FROM tab4
--- !query 13 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 13 output
+-- !query output
 2	2
 2	20
 
 
--- !query 14
+-- !query
 SELECT * FROM tab4
 EXCEPT ALL
 SELECT * FROM tab3
 EXCEPT DISTINCT
 SELECT * FROM tab4
--- !query 14 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 SELECT * FROM tab3
 EXCEPT ALL
 SELECT * FROM tab4
@@ -199,24 +199,24 @@ UNION ALL
 SELECT * FROM tab3
 EXCEPT DISTINCT
 SELECT * FROM tab4
--- !query 15 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 15 output
+-- !query output
 1	3
 
 
--- !query 16
+-- !query
 SELECT k FROM tab3
 EXCEPT ALL
 SELECT k, v FROM tab4
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
 
 
--- !query 17
+-- !query
 SELECT * FROM tab3
 EXCEPT ALL
 SELECT * FROM tab4
@@ -224,13 +224,13 @@ UNION
 SELECT * FROM tab3
 EXCEPT DISTINCT
 SELECT * FROM tab4
--- !query 17 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 17 output
+-- !query output
 1	3
 
 
--- !query 18
+-- !query
 SELECT * FROM tab3
 MINUS ALL
 SELECT * FROM tab4
@@ -238,13 +238,13 @@ UNION
 SELECT * FROM tab3
 MINUS DISTINCT
 SELECT * FROM tab4
--- !query 18 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 18 output
+-- !query output
 1	3
 
 
--- !query 19
+-- !query
 SELECT * FROM tab3
 EXCEPT ALL
 SELECT * FROM tab4
@@ -252,13 +252,13 @@ EXCEPT DISTINCT
 SELECT * FROM tab3
 EXCEPT DISTINCT
 SELECT * FROM tab4
--- !query 19 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 SELECT * 
 FROM   (SELECT tab3.k, 
                tab4.v 
@@ -272,13 +272,13 @@ FROM   (SELECT tab3.k,
         FROM   tab3 
                JOIN tab4 
                  ON tab3.k = tab4.k)
--- !query 20 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 SELECT * 
 FROM   (SELECT tab3.k, 
                tab4.v 
@@ -292,9 +292,9 @@ FROM   (SELECT tab4.v AS k,
         FROM   tab3 
                JOIN tab4 
                  ON tab3.k = tab4.k)
--- !query 21 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 21 output
+-- !query output
 1	2
 1	2
 1	2
@@ -304,43 +304,43 @@ struct<k:int,v:int>
 2	3
 
 
--- !query 22
+-- !query
 SELECT v FROM tab3 GROUP BY v
 EXCEPT ALL
 SELECT k FROM tab4 GROUP BY k
--- !query 22 schema
+-- !query schema
 struct<v:int>
--- !query 22 output
+-- !query output
 3
 
 
--- !query 23
+-- !query
 DROP VIEW IF EXISTS tab1
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 
 
 
--- !query 24
+-- !query
 DROP VIEW IF EXISTS tab2
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 
 
 
--- !query 25
+-- !query
 DROP VIEW IF EXISTS tab3
--- !query 25 schema
+-- !query schema
 struct<>
--- !query 25 output
+-- !query output
 
 
 
--- !query 26
+-- !query
 DROP VIEW IF EXISTS tab4
--- !query 26 schema
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/except.sql.out b/sql/core/src/test/resources/sql-tests/results/except.sql.out
index c9b712d4d2949..62d695219d01d 100644
--- a/sql/core/src/test/resources/sql-tests/results/except.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/except.sql.out
@@ -2,20 +2,20 @@
 -- Number of queries: 9
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3),
   ("one", NULL)
   as t1(k, v)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
   ("one", 1),
   ("two", 22),
@@ -23,71 +23,71 @@ create temporary view t2 as select * from values
   ("one", NULL),
   (NULL, 5)
   as t2(k, v)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT * FROM t1 EXCEPT SELECT * FROM t2
--- !query 2 schema
+-- !query schema
 struct<k:string,v:int>
--- !query 2 output
+-- !query output
 three	3
 two	2
 
 
--- !query 3
+-- !query
 SELECT * FROM t1 EXCEPT SELECT * FROM t1 where v <> 1 and v <> 2
--- !query 3 schema
+-- !query schema
 struct<k:string,v:int>
--- !query 3 output
+-- !query output
 one	1
 one	NULL
 two	2
 
 
--- !query 4
+-- !query
 SELECT * FROM t1 where v <> 1 and v <> 22 EXCEPT SELECT * FROM t1 where v <> 2 and v >= 3
--- !query 4 schema
+-- !query schema
 struct<k:string,v:int>
--- !query 4 output
+-- !query output
 two	2
 
 
--- !query 5
+-- !query
 SELECT t1.* FROM t1, t2 where t1.k = t2.k
 EXCEPT
 SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != 'one'
--- !query 5 schema
+-- !query schema
 struct<k:string,v:int>
--- !query 5 output
+-- !query output
 one	1
 one	NULL
 
 
--- !query 6
+-- !query
 SELECT * FROM t2 where v >= 1 and v <> 22 EXCEPT SELECT * FROM t1
--- !query 6 schema
+-- !query schema
 struct<k:string,v:int>
--- !query 6 output
+-- !query output
 NULL	5
 one	5
 
 
--- !query 7
+-- !query
 SELECT (SELECT min(k) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
 MINUS
 SELECT (SELECT min(k) FROM t2) abs_min_t2 FROM t1 WHERE  t1.k = 'one'
--- !query 7 schema
+-- !query schema
 struct<min_t2:string>
--- !query 7 output
+-- !query output
 NULL
 two
 
 
--- !query 8
+-- !query
 SELECT t1.k
 FROM   t1
 WHERE  t1.v <= (SELECT   max(t2.v)
@@ -99,7 +99,7 @@ FROM   t1
 WHERE  t1.v >= (SELECT   min(t2.v)
                 FROM     t2
                 WHERE    t2.k = t1.k)
--- !query 8 schema
+-- !query schema
 struct<k:string>
--- !query 8 output
+-- !query output
 two
diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
index 4a08cfada292d..bc28d7f87bf00 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
@@ -1,49 +1,57 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 18
+-- Number of queries: 22
 
 
--- !query 0
+-- !query
 CREATE table  explain_temp1 (key int, val int) USING PARQUET
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE table  explain_temp2 (key int, val int) USING PARQUET
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE table  explain_temp3 (key int, val int) USING PARQUET
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
+CREATE table  explain_temp4 (key int, val string) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
 SET spark.sql.codegen.wholeStage = true
--- !query 3 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 3 output
+-- !query output
 spark.sql.codegen.wholeStage	true
 
 
--- !query 4
+-- !query
 EXPLAIN FORMATTED
   SELECT key, max(val) 
   FROM   explain_temp1 
   WHERE  key > 0 
   GROUP  BY key 
   ORDER  BY key
--- !query 4 schema
+-- !query schema
 struct<plan:string>
--- !query 4 output
+-- !query output
 == Physical Plan ==
 * Sort (9)
 +- Exchange (8)
@@ -58,6 +66,10 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+PushedFilters: [IsNotNull(key), GreaterThan(key,0)]
+ReadSchema: struct<key:int,val:int>
      
 (2) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -72,12 +84,20 @@ Input     : [key#x, val#x]
      
 (5) HashAggregate [codegen id : 1]
 Input: [key#x, val#x]
+Keys: [key#x]
+Functions: [partial_max(val#x)]
+Aggregate Attributes: [max#x]
+Results: [key#x, max#x]
      
 (6) Exchange 
 Input: [key#x, max#x]
      
 (7) HashAggregate [codegen id : 2]
 Input: [key#x, max#x]
+Keys: [key#x]
+Functions: [max(val#x)]
+Aggregate Attributes: [max(val#x)#x]
+Results: [key#x, max(val#x)#x AS max(val)#x]
      
 (8) Exchange 
 Input: [key#x, max(val)#x]
@@ -86,16 +106,16 @@ Input: [key#x, max(val)#x]
 Input: [key#x, max(val)#x]
 
 
--- !query 5
+-- !query
 EXPLAIN FORMATTED
   SELECT key, max(val)
   FROM explain_temp1
   WHERE key > 0
   GROUP BY key
   HAVING max(val) > 0
--- !query 5 schema
+-- !query schema
 struct<plan:string>
--- !query 5 output
+-- !query output
 == Physical Plan ==
 * Project (9)
 +- * Filter (8)
@@ -110,6 +130,10 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+PushedFilters: [IsNotNull(key), GreaterThan(key,0)]
+ReadSchema: struct<key:int,val:int>
      
 (2) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -124,12 +148,20 @@ Input     : [key#x, val#x]
      
 (5) HashAggregate [codegen id : 1]
 Input: [key#x, val#x]
+Keys: [key#x]
+Functions: [partial_max(val#x)]
+Aggregate Attributes: [max#x]
+Results: [key#x, max#x]
      
 (6) Exchange 
 Input: [key#x, max#x]
      
 (7) HashAggregate [codegen id : 2]
 Input: [key#x, max#x]
+Keys: [key#x]
+Functions: [max(val#x)]
+Aggregate Attributes: [max(val#x)#x]
+Results: [key#x, max(val#x)#x AS max(val)#x, max(val#x)#x AS max(val#x)#x]
      
 (8) Filter [codegen id : 2]
 Input     : [key#x, max(val)#x, max(val#x)#x]
@@ -140,14 +172,14 @@ Output    : [key#x, max(val)#x]
 Input     : [key#x, max(val)#x, max(val#x)#x]
 
 
--- !query 6
+-- !query
 EXPLAIN FORMATTED
   SELECT key, val FROM explain_temp1 WHERE key > 0
   UNION 
   SELECT key, val FROM explain_temp1 WHERE key > 0
--- !query 6 schema
+-- !query schema
 struct<plan:string>
--- !query 6 output
+-- !query output
 == Physical Plan ==
 * HashAggregate (12)
 +- Exchange (11)
@@ -165,6 +197,10 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+PushedFilters: [IsNotNull(key), GreaterThan(key,0)]
+ReadSchema: struct<key:int,val:int>
      
 (2) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -179,6 +215,10 @@ Input     : [key#x, val#x]
      
 (5) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+PushedFilters: [IsNotNull(key), GreaterThan(key,0)]
+ReadSchema: struct<key:int,val:int>
      
 (6) ColumnarToRow [codegen id : 2]
 Input: [key#x, val#x]
@@ -195,23 +235,31 @@ Input     : [key#x, val#x]
      
 (10) HashAggregate [codegen id : 3]
 Input: [key#x, val#x]
+Keys: [key#x, val#x]
+Functions: []
+Aggregate Attributes: []
+Results: [key#x, val#x]
      
 (11) Exchange 
 Input: [key#x, val#x]
      
 (12) HashAggregate [codegen id : 4]
 Input: [key#x, val#x]
+Keys: [key#x, val#x]
+Functions: []
+Aggregate Attributes: []
+Results: [key#x, val#x]
 
 
--- !query 7
+-- !query
 EXPLAIN FORMATTED
   SELECT * 
   FROM   explain_temp1 a, 
          explain_temp2 b 
   WHERE  a.key = b.key
--- !query 7 schema
+-- !query schema
 struct<plan:string>
--- !query 7 output
+-- !query output
 == Physical Plan ==
 * BroadcastHashJoin Inner BuildRight (10)
 :- * Project (4)
@@ -227,6 +275,10 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+PushedFilters: [IsNotNull(key)]
+ReadSchema: struct<key:int,val:int>
      
 (2) ColumnarToRow [codegen id : 2]
 Input: [key#x, val#x]
@@ -241,6 +293,10 @@ Input     : [key#x, val#x]
      
 (5) Scan parquet default.explain_temp2 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp2]
+PushedFilters: [IsNotNull(key)]
+ReadSchema: struct<key:int,val:int>
      
 (6) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -262,15 +318,15 @@ Right keys: List(key#x)
 Join condition: None
 
 
--- !query 8
+-- !query
 EXPLAIN FORMATTED
   SELECT * 
   FROM   explain_temp1 a 
          LEFT OUTER JOIN explain_temp2 b 
                       ON a.key = b.key
--- !query 8 schema
+-- !query schema
 struct<plan:string>
--- !query 8 output
+-- !query output
 == Physical Plan ==
 * BroadcastHashJoin LeftOuter BuildRight (8)
 :- * ColumnarToRow (2)
@@ -284,12 +340,19 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+ReadSchema: struct<key:int,val:int>
      
 (2) ColumnarToRow [codegen id : 2]
 Input: [key#x, val#x]
      
 (3) Scan parquet default.explain_temp2 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp2]
+PushedFilters: [IsNotNull(key)]
+ReadSchema: struct<key:int,val:int>
      
 (4) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -311,7 +374,7 @@ Right keys: List(key#x)
 Join condition: None
 
 
--- !query 9
+-- !query
 EXPLAIN FORMATTED
   SELECT * 
   FROM   explain_temp1 
@@ -322,9 +385,9 @@ EXPLAIN FORMATTED
                               WHERE  val > 0) 
                        AND val = 2) 
          AND val > 3
--- !query 9 schema
+-- !query schema
 struct<plan:string>
--- !query 9 output
+-- !query output
 == Physical Plan ==
 * Project (4)
 +- * Filter (3)
@@ -334,6 +397,10 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+PushedFilters: [IsNotNull(key), IsNotNull(val), GreaterThan(val,3)]
+ReadSchema: struct<key:int,val:int>
      
 (2) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -360,6 +427,10 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
 
 (5) Scan parquet default.explain_temp2 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp2]
+PushedFilters: [IsNotNull(key), IsNotNull(val), EqualTo(val,2)]
+ReadSchema: struct<key:int,val:int>
      
 (6) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -374,12 +445,20 @@ Input     : [key#x, val#x]
      
 (9) HashAggregate [codegen id : 1]
 Input: [key#x]
+Keys: []
+Functions: [partial_max(key#x)]
+Aggregate Attributes: [max#x]
+Results: [max#x]
      
 (10) Exchange 
 Input: [max#x]
      
 (11) HashAggregate [codegen id : 2]
 Input: [max#x]
+Keys: []
+Functions: [max(key#x)]
+Aggregate Attributes: [max(key#x)#x]
+Results: [max(key#x)#x AS max(key)#x]
      
 Subquery:2 Hosting operator id = 7 Hosting Expression = Subquery scalar-subquery#x, [id=#x]
 * HashAggregate (18)
@@ -393,6 +472,10 @@ Subquery:2 Hosting operator id = 7 Hosting Expression = Subquery scalar-subquery
 
 (12) Scan parquet default.explain_temp3 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp3]
+PushedFilters: [IsNotNull(val), GreaterThan(val,0)]
+ReadSchema: struct<key:int,val:int>
      
 (13) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -407,15 +490,23 @@ Input     : [key#x, val#x]
      
 (16) HashAggregate [codegen id : 1]
 Input: [key#x]
+Keys: []
+Functions: [partial_max(key#x)]
+Aggregate Attributes: [max#x]
+Results: [max#x]
      
 (17) Exchange 
 Input: [max#x]
      
 (18) HashAggregate [codegen id : 2]
 Input: [max#x]
+Keys: []
+Functions: [max(key#x)]
+Aggregate Attributes: [max(key#x)#x]
+Results: [max(key#x)#x AS max(key)#x]
 
 
--- !query 10
+-- !query
 EXPLAIN FORMATTED
   SELECT * 
   FROM   explain_temp1 
@@ -423,12 +514,12 @@ EXPLAIN FORMATTED
                 FROM   explain_temp2 
                 WHERE  val > 0) 
          OR
-         key = (SELECT max(key) 
+         key = (SELECT avg(key)
                 FROM   explain_temp3
                 WHERE  val > 0)
--- !query 10 schema
+-- !query schema
 struct<plan:string>
--- !query 10 output
+-- !query output
 == Physical Plan ==
 * Filter (3)
 +- * ColumnarToRow (2)
@@ -437,13 +528,16 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+ReadSchema: struct<key:int,val:int>
      
 (2) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
      
 (3) Filter [codegen id : 1]
 Input     : [key#x, val#x]
-Condition : ((key#x = Subquery scalar-subquery#x, [id=#x]) OR (key#x = Subquery scalar-subquery#x, [id=#x]))
+Condition : ((key#x = Subquery scalar-subquery#x, [id=#x]) OR (cast(key#x as double) = Subquery scalar-subquery#x, [id=#x]))
      
 ===== Subqueries =====
 
@@ -459,6 +553,10 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
 
 (4) Scan parquet default.explain_temp2 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp2]
+PushedFilters: [IsNotNull(val), GreaterThan(val,0)]
+ReadSchema: struct<key:int,val:int>
      
 (5) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -473,12 +571,20 @@ Input     : [key#x, val#x]
      
 (8) HashAggregate [codegen id : 1]
 Input: [key#x]
+Keys: []
+Functions: [partial_max(key#x)]
+Aggregate Attributes: [max#x]
+Results: [max#x]
      
 (9) Exchange 
 Input: [max#x]
      
 (10) HashAggregate [codegen id : 2]
 Input: [max#x]
+Keys: []
+Functions: [max(key#x)]
+Aggregate Attributes: [max(key#x)#x]
+Results: [max(key#x)#x AS max(key)#x]
      
 Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#x, [id=#x]
 * HashAggregate (17)
@@ -492,6 +598,10 @@ Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
 
 (11) Scan parquet default.explain_temp3 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp3]
+PushedFilters: [IsNotNull(val), GreaterThan(val,0)]
+ReadSchema: struct<key:int,val:int>
      
 (12) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -506,21 +616,29 @@ Input     : [key#x, val#x]
      
 (15) HashAggregate [codegen id : 1]
 Input: [key#x]
+Keys: []
+Functions: [partial_avg(cast(key#x as bigint))]
+Aggregate Attributes: [sum#x, count#xL]
+Results: [sum#x, count#xL]
      
 (16) Exchange 
-Input: [max#x]
+Input: [sum#x, count#xL]
      
 (17) HashAggregate [codegen id : 2]
-Input: [max#x]
+Input: [sum#x, count#xL]
+Keys: []
+Functions: [avg(cast(key#x as bigint))]
+Aggregate Attributes: [avg(cast(key#x as bigint))#x]
+Results: [avg(cast(key#x as bigint))#x AS avg(key)#x]
 
 
--- !query 11
+-- !query
 EXPLAIN FORMATTED
   SELECT (SELECT Avg(key) FROM explain_temp1) + (SELECT Avg(key) FROM explain_temp1)
   FROM explain_temp1
--- !query 11 schema
+-- !query schema
 struct<plan:string>
--- !query 11 output
+-- !query output
 == Physical Plan ==
 * Project (3)
 +- * ColumnarToRow (2)
@@ -529,6 +647,9 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: []
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+ReadSchema: struct<>
      
 (2) ColumnarToRow [codegen id : 1]
 Input: []
@@ -549,23 +670,34 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
 
 (4) Scan parquet default.explain_temp1 
 Output: [key#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+ReadSchema: struct<key:int>
      
 (5) ColumnarToRow [codegen id : 1]
 Input: [key#x]
      
 (6) HashAggregate [codegen id : 1]
 Input: [key#x]
+Keys: []
+Functions: [partial_avg(cast(key#x as bigint))]
+Aggregate Attributes: [sum#x, count#xL]
+Results: [sum#x, count#xL]
      
 (7) Exchange 
 Input: [sum#x, count#xL]
      
 (8) HashAggregate [codegen id : 2]
 Input: [sum#x, count#xL]
+Keys: []
+Functions: [avg(cast(key#x as bigint))]
+Aggregate Attributes: [avg(cast(key#x as bigint))#x]
+Results: [avg(cast(key#x as bigint))#x AS avg(key)#x]
      
 Subquery:2 Hosting operator id = 3 Hosting Expression = ReusedSubquery Subquery scalar-subquery#x, [id=#x]
 
 
--- !query 12
+-- !query
 EXPLAIN FORMATTED
   WITH cte1 AS (
     SELECT *
@@ -573,9 +705,9 @@ EXPLAIN FORMATTED
     WHERE key > 10
   )
   SELECT * FROM cte1 a, cte1 b WHERE a.key = b.key
--- !query 12 schema
+-- !query schema
 struct<plan:string>
--- !query 12 output
+-- !query output
 == Physical Plan ==
 * BroadcastHashJoin Inner BuildRight (10)
 :- * Project (4)
@@ -591,6 +723,10 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+PushedFilters: [IsNotNull(key), GreaterThan(key,10)]
+ReadSchema: struct<key:int,val:int>
      
 (2) ColumnarToRow [codegen id : 2]
 Input: [key#x, val#x]
@@ -605,6 +741,10 @@ Input     : [key#x, val#x]
      
 (5) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+PushedFilters: [IsNotNull(key), GreaterThan(key,10)]
+ReadSchema: struct<key:int,val:int>
      
 (6) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -626,7 +766,7 @@ Right keys: List(key#x)
 Join condition: None
 
 
--- !query 13
+-- !query
 EXPLAIN FORMATTED
   WITH cte1 AS (
     SELECT key, max(val)
@@ -635,9 +775,9 @@ EXPLAIN FORMATTED
     GROUP BY key
   )
   SELECT * FROM cte1 a, cte1 b WHERE a.key = b.key
--- !query 13 schema
+-- !query schema
 struct<plan:string>
--- !query 13 output
+-- !query output
 == Physical Plan ==
 * BroadcastHashJoin Inner BuildRight (11)
 :- * HashAggregate (7)
@@ -654,6 +794,10 @@ struct<plan:string>
 
 (1) Scan parquet default.explain_temp1 
 Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+PushedFilters: [IsNotNull(key), GreaterThan(key,10)]
+ReadSchema: struct<key:int,val:int>
      
 (2) ColumnarToRow [codegen id : 1]
 Input: [key#x, val#x]
@@ -668,18 +812,30 @@ Input     : [key#x, val#x]
      
 (5) HashAggregate [codegen id : 1]
 Input: [key#x, val#x]
+Keys: [key#x]
+Functions: [partial_max(val#x)]
+Aggregate Attributes: [max#x]
+Results: [key#x, max#x]
      
 (6) Exchange 
 Input: [key#x, max#x]
      
 (7) HashAggregate [codegen id : 4]
 Input: [key#x, max#x]
+Keys: [key#x]
+Functions: [max(val#x)]
+Aggregate Attributes: [max(val#x)#x]
+Results: [key#x, max(val#x)#x AS max(val)#x]
      
 (8) ReusedExchange  [Reuses operator id: 6]
 Output : ArrayBuffer(key#x, max#x)
      
 (9) HashAggregate [codegen id : 3]
 Input: [key#x, max#x]
+Keys: [key#x]
+Functions: [max(val#x)]
+Aggregate Attributes: [max(val#x)#x]
+Results: [key#x, max(val#x)#x AS max(val)#x]
      
 (10) BroadcastExchange 
 Input: [key#x, max(val)#x]
@@ -690,13 +846,13 @@ Right keys: List(key#x)
 Join condition: None
 
 
--- !query 14
+-- !query
 EXPLAIN FORMATTED
   CREATE VIEW explain_view AS
     SELECT key, val FROM explain_temp1
--- !query 14 schema
+-- !query schema
 struct<plan:string>
--- !query 14 output
+-- !query output
 == Physical Plan ==
 Execute CreateViewCommand (1)
    +- CreateViewCommand (2)
@@ -714,25 +870,163 @@ Output: []
 (4) Project
 
 
--- !query 15
+-- !query
+EXPLAIN FORMATTED
+  SELECT
+    COUNT(val) + SUM(key) as TOTAL,
+    COUNT(key) FILTER (WHERE val > 1)
+  FROM explain_temp1
+-- !query schema
+struct<plan:string>
+-- !query output
+== Physical Plan ==
+* HashAggregate (5)
++- Exchange (4)
+   +- HashAggregate (3)
+      +- * ColumnarToRow (2)
+         +- Scan parquet default.explain_temp1 (1)
+
+
+(1) Scan parquet default.explain_temp1 
+Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+ReadSchema: struct<key:int,val:int>
+     
+(2) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(3) HashAggregate 
+Input: [key#x, val#x]
+Keys: []
+Functions: [partial_count(val#x), partial_sum(cast(key#x as bigint)), partial_count(key#x) FILTER (WHERE (val#x > 1))]
+Aggregate Attributes: [count#xL, sum#xL, count#xL]
+Results: [count#xL, sum#xL, count#xL]
+     
+(4) Exchange 
+Input: [count#xL, sum#xL, count#xL]
+     
+(5) HashAggregate [codegen id : 2]
+Input: [count#xL, sum#xL, count#xL]
+Keys: []
+Functions: [count(val#x), sum(cast(key#x as bigint)), count(key#x)]
+Aggregate Attributes: [count(val#x)#xL, sum(cast(key#x as bigint))#xL, count(key#x)#xL]
+Results: [(count(val#x)#xL + sum(cast(key#x as bigint))#xL) AS TOTAL#xL, count(key#x)#xL AS count(key) FILTER (WHERE (val > 1))#xL]
+
+
+-- !query
+EXPLAIN FORMATTED
+  SELECT key, sort_array(collect_set(val))[0]
+  FROM explain_temp4
+  GROUP BY key
+-- !query schema
+struct<plan:string>
+-- !query output
+== Physical Plan ==
+ObjectHashAggregate (5)
++- Exchange (4)
+   +- ObjectHashAggregate (3)
+      +- * ColumnarToRow (2)
+         +- Scan parquet default.explain_temp4 (1)
+
+
+(1) Scan parquet default.explain_temp4 
+Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp4]
+ReadSchema: struct<key:int,val:string>
+     
+(2) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(3) ObjectHashAggregate 
+Input: [key#x, val#x]
+Keys: [key#x]
+Functions: [partial_collect_set(val#x, 0, 0)]
+Aggregate Attributes: [buf#x]
+Results: [key#x, buf#x]
+     
+(4) Exchange 
+Input: [key#x, buf#x]
+     
+(5) ObjectHashAggregate 
+Input: [key#x, buf#x]
+Keys: [key#x]
+Functions: [collect_set(val#x, 0, 0)]
+Aggregate Attributes: [collect_set(val#x, 0, 0)#x]
+Results: [key#x, sort_array(collect_set(val#x, 0, 0)#x, true)[0] AS sort_array(collect_set(val), true)[0]#x]
+
+
+-- !query
+EXPLAIN FORMATTED
+  SELECT key, MIN(val)
+  FROM explain_temp4
+  GROUP BY key
+-- !query schema
+struct<plan:string>
+-- !query output
+== Physical Plan ==
+SortAggregate (7)
++- * Sort (6)
+   +- Exchange (5)
+      +- SortAggregate (4)
+         +- * Sort (3)
+            +- * ColumnarToRow (2)
+               +- Scan parquet default.explain_temp4 (1)
+
+
+(1) Scan parquet default.explain_temp4 
+Output: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp4]
+ReadSchema: struct<key:int,val:string>
+     
+(2) ColumnarToRow [codegen id : 1]
+Input: [key#x, val#x]
+     
+(3) Sort [codegen id : 1]
+Input: [key#x, val#x]
+     
+(4) SortAggregate 
+Input: [key#x, val#x]
+Keys: [key#x]
+Functions: [partial_min(val#x)]
+Aggregate Attributes: [min#x]
+Results: [key#x, min#x]
+     
+(5) Exchange 
+Input: [key#x, min#x]
+     
+(6) Sort [codegen id : 2]
+Input: [key#x, min#x]
+     
+(7) SortAggregate 
+Input: [key#x, min#x]
+Keys: [key#x]
+Functions: [min(val#x)]
+Aggregate Attributes: [min(val#x)#x]
+Results: [key#x, min(val#x)#x AS min(val)#x]
+
+
+-- !query
 DROP TABLE explain_temp1
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 
 
 
--- !query 16
+-- !query
 DROP TABLE explain_temp2
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 
 
 
--- !query 17
+-- !query
 DROP TABLE explain_temp3
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
index b02dfe054344b..583459f9037b8 100644
--- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
@@ -2,518 +2,518 @@
 -- Number of queries: 64
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 select extract(millennium from c) from t
--- !query 1 schema
-struct<millennium(CAST(c AS DATE)):int>
--- !query 1 output
+-- !query schema
+struct<date_part('millennium', `c`):int>
+-- !query output
 3
 
 
--- !query 2
+-- !query
 select extract(millennia from c) from t
--- !query 2 schema
-struct<millennium(CAST(c AS DATE)):int>
--- !query 2 output
+-- !query schema
+struct<date_part('millennia', `c`):int>
+-- !query output
 3
 
 
--- !query 3
+-- !query
 select extract(mil from c) from t
--- !query 3 schema
-struct<millennium(CAST(c AS DATE)):int>
--- !query 3 output
+-- !query schema
+struct<date_part('mil', `c`):int>
+-- !query output
 3
 
 
--- !query 4
+-- !query
 select extract(mils from c) from t
--- !query 4 schema
-struct<millennium(CAST(c AS DATE)):int>
--- !query 4 output
+-- !query schema
+struct<date_part('mils', `c`):int>
+-- !query output
 3
 
 
--- !query 5
+-- !query
 select extract(century from c) from t
--- !query 5 schema
-struct<century(CAST(c AS DATE)):int>
--- !query 5 output
+-- !query schema
+struct<date_part('century', `c`):int>
+-- !query output
 21
 
 
--- !query 6
+-- !query
 select extract(centuries from c) from t
--- !query 6 schema
-struct<century(CAST(c AS DATE)):int>
--- !query 6 output
+-- !query schema
+struct<date_part('centuries', `c`):int>
+-- !query output
 21
 
 
--- !query 7
+-- !query
 select extract(c from c) from t
--- !query 7 schema
-struct<century(CAST(c AS DATE)):int>
--- !query 7 output
+-- !query schema
+struct<date_part('c', `c`):int>
+-- !query output
 21
 
 
--- !query 8
+-- !query
 select extract(cent from c) from t
--- !query 8 schema
-struct<century(CAST(c AS DATE)):int>
--- !query 8 output
+-- !query schema
+struct<date_part('cent', `c`):int>
+-- !query output
 21
 
 
--- !query 9
+-- !query
 select extract(decade from c) from t
--- !query 9 schema
-struct<decade(CAST(c AS DATE)):int>
--- !query 9 output
+-- !query schema
+struct<date_part('decade', `c`):int>
+-- !query output
 201
 
 
--- !query 10
+-- !query
 select extract(decades from c) from t
--- !query 10 schema
-struct<decade(CAST(c AS DATE)):int>
--- !query 10 output
+-- !query schema
+struct<date_part('decades', `c`):int>
+-- !query output
 201
 
 
--- !query 11
+-- !query
 select extract(dec from c) from t
--- !query 11 schema
-struct<decade(CAST(c AS DATE)):int>
--- !query 11 output
+-- !query schema
+struct<date_part('dec', `c`):int>
+-- !query output
 201
 
 
--- !query 12
+-- !query
 select extract(decs from c) from t
--- !query 12 schema
-struct<decade(CAST(c AS DATE)):int>
--- !query 12 output
+-- !query schema
+struct<date_part('decs', `c`):int>
+-- !query output
 201
 
 
--- !query 13
+-- !query
 select extract(year from c) from t
--- !query 13 schema
-struct<year(CAST(c AS DATE)):int>
--- !query 13 output
+-- !query schema
+struct<date_part('year', `c`):int>
+-- !query output
 2011
 
 
--- !query 14
+-- !query
 select extract(y from c) from t
--- !query 14 schema
-struct<year(CAST(c AS DATE)):int>
--- !query 14 output
+-- !query schema
+struct<date_part('y', `c`):int>
+-- !query output
 2011
 
 
--- !query 15
+-- !query
 select extract(years from c) from t
--- !query 15 schema
-struct<year(CAST(c AS DATE)):int>
--- !query 15 output
+-- !query schema
+struct<date_part('years', `c`):int>
+-- !query output
 2011
 
 
--- !query 16
+-- !query
 select extract(yr from c) from t
--- !query 16 schema
-struct<year(CAST(c AS DATE)):int>
--- !query 16 output
+-- !query schema
+struct<date_part('yr', `c`):int>
+-- !query output
 2011
 
 
--- !query 17
+-- !query
 select extract(yrs from c) from t
--- !query 17 schema
-struct<year(CAST(c AS DATE)):int>
--- !query 17 output
+-- !query schema
+struct<date_part('yrs', `c`):int>
+-- !query output
 2011
 
 
--- !query 18
+-- !query
 select extract(isoyear from c) from t
--- !query 18 schema
-struct<isoyear(CAST(c AS DATE)):int>
--- !query 18 output
+-- !query schema
+struct<date_part('isoyear', `c`):int>
+-- !query output
 2011
 
 
--- !query 19
+-- !query
 select extract(quarter from c) from t
--- !query 19 schema
-struct<quarter(CAST(c AS DATE)):int>
--- !query 19 output
+-- !query schema
+struct<date_part('quarter', `c`):int>
+-- !query output
 2
 
 
--- !query 20
+-- !query
 select extract(qtr from c) from t
--- !query 20 schema
-struct<quarter(CAST(c AS DATE)):int>
--- !query 20 output
+-- !query schema
+struct<date_part('qtr', `c`):int>
+-- !query output
 2
 
 
--- !query 21
+-- !query
 select extract(month from c) from t
--- !query 21 schema
-struct<month(CAST(c AS DATE)):int>
--- !query 21 output
+-- !query schema
+struct<date_part('month', `c`):int>
+-- !query output
 5
 
 
--- !query 22
+-- !query
 select extract(mon from c) from t
--- !query 22 schema
-struct<month(CAST(c AS DATE)):int>
--- !query 22 output
+-- !query schema
+struct<date_part('mon', `c`):int>
+-- !query output
 5
 
 
--- !query 23
+-- !query
 select extract(mons from c) from t
--- !query 23 schema
-struct<month(CAST(c AS DATE)):int>
--- !query 23 output
+-- !query schema
+struct<date_part('mons', `c`):int>
+-- !query output
 5
 
 
--- !query 24
+-- !query
 select extract(months from c) from t
--- !query 24 schema
-struct<month(CAST(c AS DATE)):int>
--- !query 24 output
+-- !query schema
+struct<date_part('months', `c`):int>
+-- !query output
 5
 
 
--- !query 25
+-- !query
 select extract(week from c) from t
--- !query 25 schema
-struct<weekofyear(CAST(c AS DATE)):int>
--- !query 25 output
+-- !query schema
+struct<date_part('week', `c`):int>
+-- !query output
 18
 
 
--- !query 26
+-- !query
 select extract(w from c) from t
--- !query 26 schema
-struct<weekofyear(CAST(c AS DATE)):int>
--- !query 26 output
+-- !query schema
+struct<date_part('w', `c`):int>
+-- !query output
 18
 
 
--- !query 27
+-- !query
 select extract(weeks from c) from t
--- !query 27 schema
-struct<weekofyear(CAST(c AS DATE)):int>
--- !query 27 output
+-- !query schema
+struct<date_part('weeks', `c`):int>
+-- !query output
 18
 
 
--- !query 28
+-- !query
 select extract(day from c) from t
--- !query 28 schema
-struct<dayofmonth(CAST(c AS DATE)):int>
--- !query 28 output
+-- !query schema
+struct<date_part('day', `c`):int>
+-- !query output
 6
 
 
--- !query 29
+-- !query
 select extract(d from c) from t
--- !query 29 schema
-struct<dayofmonth(CAST(c AS DATE)):int>
--- !query 29 output
+-- !query schema
+struct<date_part('d', `c`):int>
+-- !query output
 6
 
 
--- !query 30
+-- !query
 select extract(days from c) from t
--- !query 30 schema
-struct<dayofmonth(CAST(c AS DATE)):int>
--- !query 30 output
+-- !query schema
+struct<date_part('days', `c`):int>
+-- !query output
 6
 
 
--- !query 31
+-- !query
 select extract(dayofweek from c) from t
--- !query 31 schema
-struct<dayofweek(CAST(c AS DATE)):int>
--- !query 31 output
+-- !query schema
+struct<date_part('dayofweek', `c`):int>
+-- !query output
 6
 
 
--- !query 32
+-- !query
 select extract(dow from c) from t
--- !query 32 schema
-struct<(dayofweek(CAST(c AS DATE)) - 1):int>
--- !query 32 output
+-- !query schema
+struct<date_part('dow', `c`):int>
+-- !query output
 5
 
 
--- !query 33
+-- !query
 select extract(isodow from c) from t
--- !query 33 schema
-struct<(weekday(CAST(c AS DATE)) + 1):int>
--- !query 33 output
+-- !query schema
+struct<date_part('isodow', `c`):int>
+-- !query output
 5
 
 
--- !query 34
+-- !query
 select extract(doy from c) from t
--- !query 34 schema
-struct<dayofyear(CAST(c AS DATE)):int>
--- !query 34 output
+-- !query schema
+struct<date_part('doy', `c`):int>
+-- !query output
 126
 
 
--- !query 35
+-- !query
 select extract(hour from c) from t
--- !query 35 schema
-struct<hour(CAST(c AS TIMESTAMP)):int>
--- !query 35 output
+-- !query schema
+struct<date_part('hour', `c`):int>
+-- !query output
 7
 
 
--- !query 36
+-- !query
 select extract(h from c) from t
--- !query 36 schema
-struct<hour(CAST(c AS TIMESTAMP)):int>
--- !query 36 output
+-- !query schema
+struct<date_part('h', `c`):int>
+-- !query output
 7
 
 
--- !query 37
+-- !query
 select extract(hours from c) from t
--- !query 37 schema
-struct<hour(CAST(c AS TIMESTAMP)):int>
--- !query 37 output
+-- !query schema
+struct<date_part('hours', `c`):int>
+-- !query output
 7
 
 
--- !query 38
+-- !query
 select extract(hr from c) from t
--- !query 38 schema
-struct<hour(CAST(c AS TIMESTAMP)):int>
--- !query 38 output
+-- !query schema
+struct<date_part('hr', `c`):int>
+-- !query output
 7
 
 
--- !query 39
+-- !query
 select extract(hrs from c) from t
--- !query 39 schema
-struct<hour(CAST(c AS TIMESTAMP)):int>
--- !query 39 output
+-- !query schema
+struct<date_part('hrs', `c`):int>
+-- !query output
 7
 
 
--- !query 40
+-- !query
 select extract(minute from c) from t
--- !query 40 schema
-struct<minute(CAST(c AS TIMESTAMP)):int>
--- !query 40 output
+-- !query schema
+struct<date_part('minute', `c`):int>
+-- !query output
 8
 
 
--- !query 41
+-- !query
 select extract(m from c) from t
--- !query 41 schema
-struct<minute(CAST(c AS TIMESTAMP)):int>
--- !query 41 output
+-- !query schema
+struct<date_part('m', `c`):int>
+-- !query output
 8
 
 
--- !query 42
+-- !query
 select extract(min from c) from t
--- !query 42 schema
-struct<minute(CAST(c AS TIMESTAMP)):int>
--- !query 42 output
+-- !query schema
+struct<date_part('min', `c`):int>
+-- !query output
 8
 
 
--- !query 43
+-- !query
 select extract(mins from c) from t
--- !query 43 schema
-struct<minute(CAST(c AS TIMESTAMP)):int>
--- !query 43 output
+-- !query schema
+struct<date_part('mins', `c`):int>
+-- !query output
 8
 
 
--- !query 44
+-- !query
 select extract(minutes from c) from t
--- !query 44 schema
-struct<minute(CAST(c AS TIMESTAMP)):int>
--- !query 44 output
+-- !query schema
+struct<date_part('minutes', `c`):int>
+-- !query output
 8
 
 
--- !query 45
+-- !query
 select extract(second from c) from t
--- !query 45 schema
-struct<second(CAST(c AS TIMESTAMP)):int>
--- !query 45 output
-9
+-- !query schema
+struct<date_part('second', `c`):decimal(8,6)>
+-- !query output
+9.123456
 
 
--- !query 46
+-- !query
 select extract(s from c) from t
--- !query 46 schema
-struct<second(CAST(c AS TIMESTAMP)):int>
--- !query 46 output
-9
+-- !query schema
+struct<date_part('s', `c`):decimal(8,6)>
+-- !query output
+9.123456
 
 
--- !query 47
+-- !query
 select extract(sec from c) from t
--- !query 47 schema
-struct<second(CAST(c AS TIMESTAMP)):int>
--- !query 47 output
-9
+-- !query schema
+struct<date_part('sec', `c`):decimal(8,6)>
+-- !query output
+9.123456
 
 
--- !query 48
+-- !query
 select extract(seconds from c) from t
--- !query 48 schema
-struct<second(CAST(c AS TIMESTAMP)):int>
--- !query 48 output
-9
+-- !query schema
+struct<date_part('seconds', `c`):decimal(8,6)>
+-- !query output
+9.123456
 
 
--- !query 49
+-- !query
 select extract(secs from c) from t
--- !query 49 schema
-struct<second(CAST(c AS TIMESTAMP)):int>
--- !query 49 output
-9
+-- !query schema
+struct<date_part('secs', `c`):decimal(8,6)>
+-- !query output
+9.123456
 
 
--- !query 50
+-- !query
 select extract(milliseconds from c) from t
--- !query 50 schema
-struct<milliseconds(CAST(c AS TIMESTAMP)):decimal(8,3)>
--- !query 50 output
+-- !query schema
+struct<date_part('milliseconds', `c`):decimal(8,3)>
+-- !query output
 9123.456
 
 
--- !query 51
+-- !query
 select extract(msec from c) from t
--- !query 51 schema
-struct<milliseconds(CAST(c AS TIMESTAMP)):decimal(8,3)>
--- !query 51 output
+-- !query schema
+struct<date_part('msec', `c`):decimal(8,3)>
+-- !query output
 9123.456
 
 
--- !query 52
+-- !query
 select extract(msecs from c) from t
--- !query 52 schema
-struct<milliseconds(CAST(c AS TIMESTAMP)):decimal(8,3)>
--- !query 52 output
+-- !query schema
+struct<date_part('msecs', `c`):decimal(8,3)>
+-- !query output
 9123.456
 
 
--- !query 53
+-- !query
 select extract(millisecon from c) from t
--- !query 53 schema
-struct<milliseconds(CAST(c AS TIMESTAMP)):decimal(8,3)>
--- !query 53 output
+-- !query schema
+struct<date_part('millisecon', `c`):decimal(8,3)>
+-- !query output
 9123.456
 
 
--- !query 54
+-- !query
 select extract(mseconds from c) from t
--- !query 54 schema
-struct<milliseconds(CAST(c AS TIMESTAMP)):decimal(8,3)>
--- !query 54 output
+-- !query schema
+struct<date_part('mseconds', `c`):decimal(8,3)>
+-- !query output
 9123.456
 
 
--- !query 55
+-- !query
 select extract(ms from c) from t
--- !query 55 schema
-struct<milliseconds(CAST(c AS TIMESTAMP)):decimal(8,3)>
--- !query 55 output
+-- !query schema
+struct<date_part('ms', `c`):decimal(8,3)>
+-- !query output
 9123.456
 
 
--- !query 56
+-- !query
 select extract(microseconds from c) from t
--- !query 56 schema
-struct<microseconds(CAST(c AS TIMESTAMP)):int>
--- !query 56 output
+-- !query schema
+struct<date_part('microseconds', `c`):int>
+-- !query output
 9123456
 
 
--- !query 57
+-- !query
 select extract(usec from c) from t
--- !query 57 schema
-struct<microseconds(CAST(c AS TIMESTAMP)):int>
--- !query 57 output
+-- !query schema
+struct<date_part('usec', `c`):int>
+-- !query output
 9123456
 
 
--- !query 58
+-- !query
 select extract(usecs from c) from t
--- !query 58 schema
-struct<microseconds(CAST(c AS TIMESTAMP)):int>
--- !query 58 output
+-- !query schema
+struct<date_part('usecs', `c`):int>
+-- !query output
 9123456
 
 
--- !query 59
+-- !query
 select extract(useconds from c) from t
--- !query 59 schema
-struct<microseconds(CAST(c AS TIMESTAMP)):int>
--- !query 59 output
+-- !query schema
+struct<date_part('useconds', `c`):int>
+-- !query output
 9123456
 
 
--- !query 60
+-- !query
 select extract(microsecon from c) from t
--- !query 60 schema
-struct<microseconds(CAST(c AS TIMESTAMP)):int>
--- !query 60 output
+-- !query schema
+struct<date_part('microsecon', `c`):int>
+-- !query output
 9123456
 
 
--- !query 61
+-- !query
 select extract(us from c) from t
--- !query 61 schema
-struct<microseconds(CAST(c AS TIMESTAMP)):int>
--- !query 61 output
+-- !query schema
+struct<date_part('us', `c`):int>
+-- !query output
 9123456
 
 
--- !query 62
+-- !query
 select extract(epoch from c) from t
--- !query 62 schema
-struct<epoch(CAST(c AS TIMESTAMP)):decimal(20,6)>
--- !query 62 output
+-- !query schema
+struct<date_part('epoch', `c`):decimal(20,6)>
+-- !query output
 1304665689.123456
 
 
--- !query 63
+-- !query
 select extract(not_supported from c) from t
--- !query 63 schema
+-- !query schema
 struct<>
--- !query 63 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
-Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7)
+Literals of type 'not_supported' are currently not supported.(line 1, pos 7)
 
 == SQL ==
 select extract(not_supported from c) from t
diff --git a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
index 3439a05727f95..4584b823a6e70 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
@@ -2,21 +2,21 @@
 -- Number of queries: 29
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
 AS testData(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH CUBE
--- !query 1 schema
+-- !query schema
 struct<(a + b):int,b:int,sum((a - b)):bigint>
--- !query 1 output
+-- !query output
 2	1	0
 2	NULL	0
 3	1	1
@@ -32,11 +32,11 @@ NULL	2	0
 NULL	NULL	3
 
 
--- !query 2
+-- !query
 SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH CUBE
--- !query 2 schema
+-- !query schema
 struct<a:int,b:int,sum(b):bigint>
--- !query 2 output
+-- !query output
 1	1	1
 1	2	2
 1	NULL	3
@@ -51,11 +51,11 @@ NULL	2	6
 NULL	NULL	9
 
 
--- !query 3
+-- !query
 SELECT a + b, b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
--- !query 3 schema
+-- !query schema
 struct<(a + b):int,b:int,sum((a - b)):bigint>
--- !query 3 output
+-- !query output
 2	1	0
 2	NULL	0
 3	1	1
@@ -69,11 +69,11 @@ struct<(a + b):int,b:int,sum((a - b)):bigint>
 NULL	NULL	3
 
 
--- !query 4
+-- !query
 SELECT a, b, SUM(b) FROM testData GROUP BY a, b WITH ROLLUP
--- !query 4 schema
+-- !query schema
 struct<a:int,b:int,sum(b):bigint>
--- !query 4 output
+-- !query output
 1	1	1
 1	2	2
 1	NULL	3
@@ -86,21 +86,21 @@ struct<a:int,b:int,sum(b):bigint>
 NULL	NULL	9
 
 
--- !query 5
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
 ("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
 AS courseSales(course, year, earnings)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year
--- !query 6 schema
+-- !query schema
 struct<course:string,year:int,sum(earnings):bigint>
--- !query 6 output
+-- !query output
 NULL	NULL	113000
 Java	NULL	50000
 Java	2012	20000
@@ -110,11 +110,11 @@ dotNET	2012	15000
 dotNET	2013	48000
 
 
--- !query 7
+-- !query
 SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year
--- !query 7 schema
+-- !query schema
 struct<course:string,year:int,sum(earnings):bigint>
--- !query 7 output
+-- !query output
 NULL	NULL	113000
 NULL	2012	35000
 NULL	2013	78000
@@ -126,41 +126,41 @@ dotNET	2012	15000
 dotNET	2013	48000
 
 
--- !query 8
+-- !query
 SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
--- !query 8 schema
+-- !query schema
 struct<course:string,year:int,sum(earnings):bigint>
--- !query 8 output
+-- !query output
 Java	NULL	50000
 NULL	2012	35000
 NULL	2013	78000
 dotNET	NULL	63000
 
 
--- !query 9
+-- !query
 SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course)
--- !query 9 schema
+-- !query schema
 struct<course:string,year:int,sum(earnings):bigint>
--- !query 9 output
+-- !query output
 Java	NULL	50000
 dotNET	NULL	63000
 
 
--- !query 10
+-- !query
 SELECT course, year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
--- !query 10 schema
+-- !query schema
 struct<course:string,year:int,sum(earnings):bigint>
--- !query 10 output
+-- !query output
 NULL	2012	35000
 NULL	2013	78000
 
 
--- !query 11
+-- !query
 SELECT course, SUM(earnings) AS sum FROM courseSales
 GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
--- !query 11 schema
+-- !query schema
 struct<course:string,sum:bigint>
--- !query 11 output
+-- !query output
 NULL	113000
 Java	20000
 Java	30000
@@ -171,12 +171,12 @@ dotNET	48000
 dotNET	63000
 
 
--- !query 12
+-- !query
 SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
 GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
--- !query 12 schema
+-- !query schema
 struct<course:string,sum:bigint,grouping_id(course, earnings):int>
--- !query 12 output
+-- !query output
 NULL	113000	3
 Java	20000	0
 Java	30000	0
@@ -187,12 +187,12 @@ dotNET	48000	0
 dotNET	63000	1
 
 
--- !query 13
+-- !query
 SELECT course, year, GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
 GROUP BY CUBE(course, year)
--- !query 13 schema
+-- !query schema
 struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int>
--- !query 13 output
+-- !query output
 Java	2012	0	0	0
 Java	2013	0	0	0
 Java	NULL	0	1	1
@@ -204,29 +204,29 @@ dotNET	2013	0	0	0
 dotNET	NULL	0	1	1
 
 
--- !query 14
+-- !query
 SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 15
+-- !query
 SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course, year
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 16
+-- !query
 SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
--- !query 16 schema
+-- !query schema
 struct<course:string,year:int,grouping__id:int>
--- !query 16 output
+-- !query output
 Java	2012	0
 Java	2013	0
 dotNET	2012	0
@@ -238,40 +238,40 @@ NULL	2013	2
 NULL	NULL	3
 
 
--- !query 17
+-- !query
 SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
 HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, year
--- !query 17 schema
+-- !query schema
 struct<course:string,year:int>
--- !query 17 output
+-- !query output
 NULL	NULL
 Java	NULL
 dotNET	NULL
 
 
--- !query 18
+-- !query
 SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(course) > 0
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 19
+-- !query
 SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 20
+-- !query
 SELECT course, year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
--- !query 20 schema
+-- !query schema
 struct<course:string,year:int>
--- !query 20 output
+-- !query output
 Java	NULL
 NULL	2012
 NULL	2013
@@ -279,12 +279,12 @@ NULL	NULL
 dotNET	NULL
 
 
--- !query 21
+-- !query
 SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
 ORDER BY GROUPING(course), GROUPING(year), course, year
--- !query 21 schema
+-- !query schema
 struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint>
--- !query 21 output
+-- !query output
 Java	2012	0	0
 Java	2013	0	0
 dotNET	2012	0	0
@@ -296,12 +296,12 @@ NULL	2013	1	0
 NULL	NULL	1	1
 
 
--- !query 22
+-- !query
 SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
 ORDER BY GROUPING(course), GROUPING(year), course, year
--- !query 22 schema
+-- !query schema
 struct<course:string,year:int,grouping_id(course, year):int>
--- !query 22 output
+-- !query output
 Java	2012	0
 Java	2013	0
 dotNET	2012	0
@@ -313,29 +313,29 @@ NULL	2013	2
 NULL	NULL	3
 
 
--- !query 23
+-- !query
 SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(course)
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 24
+-- !query
 SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(course)
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 25
+-- !query
 SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, year
--- !query 25 schema
+-- !query schema
 struct<course:string,year:int>
--- !query 25 output
+-- !query output
 Java	2012
 Java	2013
 dotNET	2012
@@ -347,11 +347,11 @@ NULL	2013
 NULL	NULL
 
 
--- !query 26
+-- !query
 SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
--- !query 26 schema
+-- !query schema
 struct<k1:int,k2:int,sum((a - b)):bigint>
--- !query 26 output
+-- !query output
 2	1	0
 2	NULL	0
 3	1	1
@@ -367,11 +367,11 @@ NULL	2	0
 NULL	NULL	3
 
 
--- !query 27
+-- !query
 SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
--- !query 27 schema
+-- !query schema
 struct<k:int,b:int,sum((a - b)):bigint>
--- !query 27 output
+-- !query output
 2	1	0
 2	NULL	0
 3	1	1
@@ -385,10 +385,10 @@ struct<k:int,b:int,sum((a - b)):bigint>
 NULL	NULL	3
 
 
--- !query 28
+-- !query
 SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
--- !query 28 schema
+-- !query schema
 struct<(a + b):int,k:int,sum((a - b)):bigint>
--- !query 28 output
+-- !query output
 NULL	1	3
 NULL	2	0
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
new file mode 100644
index 0000000000000..a4c7c2cf90cd7
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
@@ -0,0 +1,464 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 37
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (200, "emp 2", date "2003-01-01", 200.00D, 10),
+  (300, "emp 3", date "2002-01-01", 300.00D, 20),
+  (400, "emp 4", date "2005-01-01", 400.00D, 30),
+  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
+  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
+  (700, "emp 7", date "2010-01-01", 400.00D, 100),
+  (800, "emp 8", date "2016-01-01", 150.00D, 70)
+AS EMP(id, emp_name, hiredate, salary, dept_id)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
+  (10, "dept 1", "CA"),
+  (20, "dept 2", "NY"),
+  (30, "dept 3", "TX"),
+  (40, "dept 4 - unassigned", "OR"),
+  (50, "dept 5 - unassigned", "NJ"),
+  (70, "dept 7", "FL")
+AS DEPT(dept_id, dept_name, state)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) FILTER (WHERE (testdata.`a` >= 2)) AS `count(b) FILTER (WHERE (a >= 2))`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query
+SELECT COUNT(a) FILTER (WHERE a = 1), COUNT(b) FILTER (WHERE a > 1) FROM testData
+-- !query schema
+struct<count(a) FILTER (WHERE (a = 1)):bigint,count(b) FILTER (WHERE (a > 1)):bigint>
+-- !query output
+2	4
+
+
+-- !query
+SELECT COUNT(id) FILTER (WHERE hiredate = date "2001-01-01") FROM emp
+-- !query schema
+struct<count(id) FILTER (WHERE (hiredate = DATE '2001-01-01')):bigint>
+-- !query output
+2
+
+
+-- !query
+SELECT COUNT(id) FILTER (WHERE hiredate = to_date('2001-01-01 00:00:00')) FROM emp
+-- !query schema
+struct<count(id) FILTER (WHERE (hiredate = to_date('2001-01-01 00:00:00'))):bigint>
+-- !query output
+2
+
+
+-- !query
+SELECT COUNT(id) FILTER (WHERE hiredate = to_timestamp("2001-01-01 00:00:00")) FROM emp
+-- !query schema
+struct<count(id) FILTER (WHERE (CAST(hiredate AS TIMESTAMP) = to_timestamp('2001-01-01 00:00:00'))):bigint>
+-- !query output
+2
+
+
+-- !query
+SELECT COUNT(id) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd") = "2001-01-01") FROM emp
+-- !query schema
+struct<count(id) FILTER (WHERE (date_format(CAST(hiredate AS TIMESTAMP), yyyy-MM-dd) = 2001-01-01)):bigint>
+-- !query output
+2
+
+
+-- !query
+SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData GROUP BY a
+-- !query schema
+struct<a:int,count(b) FILTER (WHERE (a >= 2)):bigint>
+-- !query output
+1	0
+2	2
+3	2
+NULL	0
+
+
+-- !query
+SELECT a, COUNT(b) FILTER (WHERE a != 2) FROM testData GROUP BY b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query
+SELECT COUNT(a) FILTER (WHERE a >= 0), COUNT(b) FILTER (WHERE a >= 3) FROM testData GROUP BY a
+-- !query schema
+struct<count(a) FILTER (WHERE (a >= 0)):bigint,count(b) FILTER (WHERE (a >= 3)):bigint>
+-- !query output
+0	0
+2	0
+2	0
+3	2
+
+
+-- !query
+SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > date "2003-01-01") FROM emp GROUP BY dept_id
+-- !query schema
+struct<dept_id:int,sum(salary) FILTER (WHERE (hiredate > DATE '2003-01-01')):double>
+-- !query output
+10	200.0
+100	400.0
+20	NULL
+30	400.0
+70	150.0
+NULL	NULL
+
+
+-- !query
+SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > to_date("2003-01-01")) FROM emp GROUP BY dept_id
+-- !query schema
+struct<dept_id:int,sum(salary) FILTER (WHERE (hiredate > to_date('2003-01-01'))):double>
+-- !query output
+10	200.0
+100	400.0
+20	NULL
+30	400.0
+70	150.0
+NULL	NULL
+
+
+-- !query
+SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > to_timestamp("2003-01-01 00:00:00")) FROM emp GROUP BY dept_id
+-- !query schema
+struct<dept_id:int,sum(salary) FILTER (WHERE (CAST(hiredate AS TIMESTAMP) > to_timestamp('2003-01-01 00:00:00'))):double>
+-- !query output
+10	200.0
+100	400.0
+20	NULL
+30	400.0
+70	150.0
+NULL	NULL
+
+
+-- !query
+SELECT dept_id, SUM(salary) FILTER (WHERE date_format(hiredate, "yyyy-MM-dd") > "2003-01-01") FROM emp GROUP BY dept_id
+-- !query schema
+struct<dept_id:int,sum(salary) FILTER (WHERE (date_format(CAST(hiredate AS TIMESTAMP), yyyy-MM-dd) > 2003-01-01)):double>
+-- !query output
+10	200.0
+100	400.0
+20	NULL
+30	400.0
+70	150.0
+NULL	NULL
+
+
+-- !query
+SELECT 'foo', COUNT(a) FILTER (WHERE b <= 2) FROM testData GROUP BY 1
+-- !query schema
+struct<foo:string,count(a) FILTER (WHERE (b <= 2)):bigint>
+-- !query output
+foo	6
+
+
+-- !query
+SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= date "2003-01-01") FROM emp GROUP BY 1
+-- !query schema
+struct<foo:string,sum(salary) FILTER (WHERE (hiredate >= DATE '2003-01-01')):double>
+-- !query output
+foo	1350.0
+
+
+-- !query
+SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= to_date("2003-01-01")) FROM emp GROUP BY 1
+-- !query schema
+struct<foo:string,sum(salary) FILTER (WHERE (hiredate >= to_date('2003-01-01'))):double>
+-- !query output
+foo	1350.0
+
+
+-- !query
+SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= to_timestamp("2003-01-01")) FROM emp GROUP BY 1
+-- !query schema
+struct<foo:string,sum(salary) FILTER (WHERE (CAST(hiredate AS TIMESTAMP) >= to_timestamp('2003-01-01'))):double>
+-- !query output
+foo	1350.0
+
+
+-- !query
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary), sum(salary) filter (where id > 200) from emp group by dept_id
+-- !query schema
+struct<dept_id:int,count(DISTINCT emp_name):bigint,count(DISTINCT hiredate):bigint,sum(salary):double,sum(salary) FILTER (WHERE (id > 200)):double>
+-- !query output
+10	2	2	400.0	NULL
+100	2	2	800.0	800.0
+20	1	1	300.0	300.0
+30	1	1	400.0	400.0
+70	1	1	150.0	150.0
+NULL	1	1	400.0	400.0
+
+
+-- !query
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary), sum(salary) filter (where id + dept_id > 500) from emp group by dept_id
+-- !query schema
+struct<dept_id:int,count(DISTINCT emp_name):bigint,count(DISTINCT hiredate):bigint,sum(salary):double,sum(salary) FILTER (WHERE ((id + dept_id) > 500)):double>
+-- !query output
+10	2	2	400.0	NULL
+100	2	2	800.0	800.0
+20	1	1	300.0	NULL
+30	1	1	400.0	NULL
+70	1	1	150.0	150.0
+NULL	1	1	400.0	NULL
+
+
+-- !query
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary) filter (where salary < 400.00D), sum(salary) filter (where id > 200) from emp group by dept_id
+-- !query schema
+struct<dept_id:int,count(DISTINCT emp_name):bigint,count(DISTINCT hiredate):bigint,sum(salary) FILTER (WHERE (salary < 400.0)):double,sum(salary) FILTER (WHERE (id > 200)):double>
+-- !query output
+10	2	2	400.0	NULL
+100	2	2	NULL	800.0
+20	1	1	300.0	300.0
+30	1	1	NULL	400.0
+70	1	1	150.0	150.0
+NULL	1	1	NULL	400.0
+
+
+-- !query
+select dept_id, count(distinct emp_name), count(distinct hiredate), sum(salary) filter (where salary < 400.00D), sum(salary) filter (where id + dept_id > 500) from emp group by dept_id
+-- !query schema
+struct<dept_id:int,count(DISTINCT emp_name):bigint,count(DISTINCT hiredate):bigint,sum(salary) FILTER (WHERE (salary < 400.0)):double,sum(salary) FILTER (WHERE ((id + dept_id) > 500)):double>
+-- !query output
+10	2	2	400.0	NULL
+100	2	2	NULL	800.0
+20	1	1	300.0	NULL
+30	1	1	NULL	NULL
+70	1	1	150.0	150.0
+NULL	1	1	NULL	NULL
+
+
+-- !query
+SELECT 'foo', APPROX_COUNT_DISTINCT(a) FILTER (WHERE b >= 0) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct<foo:string,approx_count_distinct(a) FILTER (WHERE (b >= 0)):bigint>
+-- !query output
+
+
+
+-- !query
+SELECT 'foo', MAX(STRUCT(a)) FILTER (WHERE b >= 1) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct<foo:string,max(named_struct(a, a)) FILTER (WHERE (b >= 1)):struct<a:int>>
+-- !query output
+
+
+
+-- !query
+SELECT a + b, COUNT(b) FILTER (WHERE b >= 2) FROM testData GROUP BY a + b
+-- !query schema
+struct<(a + b):int,count(b) FILTER (WHERE (b >= 2)):bigint>
+-- !query output
+2	0
+3	1
+4	1
+5	1
+NULL	0
+
+
+-- !query
+SELECT a + 2, COUNT(b) FILTER (WHERE b IN (1, 2)) FROM testData GROUP BY a + 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query
+SELECT a + 1 + 1, COUNT(b) FILTER (WHERE b > 0) FROM testData GROUP BY a + 1
+-- !query schema
+struct<((a + 1) + 1):int,count(b) FILTER (WHERE (b > 0)):bigint>
+-- !query output
+3	2
+4	2
+5	2
+NULL	1
+
+
+-- !query
+SELECT a AS k, COUNT(b) FILTER (WHERE b > 0) FROM testData GROUP BY k
+-- !query schema
+struct<k:int,count(b) FILTER (WHERE (b > 0)):bigint>
+-- !query output
+1	2
+2	2
+3	2
+NULL	1
+
+
+-- !query
+SELECT emp.dept_id,
+       avg(salary),
+       avg(salary) FILTER (WHERE id > (SELECT 200))
+FROM emp
+GROUP BY dept_id
+-- !query schema
+struct<dept_id:int,avg(salary):double,avg(salary) FILTER (WHERE (id > scalarsubquery())):double>
+-- !query output
+10	133.33333333333334	NULL
+100	400.0	400.0
+20	300.0	300.0
+30	400.0	400.0
+70	150.0	150.0
+NULL	400.0	400.0
+
+
+-- !query
+SELECT emp.dept_id,
+       avg(salary),
+       avg(salary) FILTER (WHERE emp.dept_id = (SELECT dept_id FROM dept LIMIT 1))
+FROM emp
+GROUP BY dept_id
+-- !query schema
+struct<dept_id:int,avg(salary):double,avg(salary) FILTER (WHERE (dept_id = scalarsubquery())):double>
+-- !query output
+10	133.33333333333334	133.33333333333334
+100	400.0	NULL
+20	300.0	NULL
+30	400.0	NULL
+70	150.0	NULL
+NULL	400.0	NULL
+
+
+-- !query
+SELECT emp.dept_id,
+       avg(salary),
+       avg(salary) FILTER (WHERE EXISTS (SELECT state
+               FROM dept
+               WHERE dept.dept_id = emp.dept_id))
+FROM emp
+GROUP BY dept_id
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few commands: Aggregate [dept_id#x], [dept_id#x, avg(salary#x) AS avg(salary)#x, avg(salary#x) FILTER (WHERE exists#x [dept_id#x]) AS avg(salary) FILTER (WHERE exists(dept_id))#x]
+:  +- Project [state#x]
+:     +- Filter (dept_id#x = outer(dept_id#x))
+:        +- SubqueryAlias dept
+:           +- Project [dept_id#x, dept_name#x, state#x]
+:              +- SubqueryAlias DEPT
+:                 +- LocalRelation [dept_id#x, dept_name#x, state#x]
++- SubqueryAlias emp
+   +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+      +- SubqueryAlias EMP
+         +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+;
+
+
+-- !query
+SELECT emp.dept_id, 
+       Sum(salary),
+       Sum(salary) FILTER (WHERE NOT EXISTS (SELECT state 
+                   FROM dept 
+                   WHERE dept.dept_id = emp.dept_id))
+FROM emp 
+GROUP BY dept_id
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few commands: Aggregate [dept_id#x], [dept_id#x, sum(salary#x) AS sum(salary)#x, sum(salary#x) FILTER (WHERE NOT exists#x [dept_id#x]) AS sum(salary) FILTER (WHERE (NOT exists(dept_id)))#x]
+:  +- Project [state#x]
+:     +- Filter (dept_id#x = outer(dept_id#x))
+:        +- SubqueryAlias dept
+:           +- Project [dept_id#x, dept_name#x, state#x]
+:              +- SubqueryAlias DEPT
+:                 +- LocalRelation [dept_id#x, dept_name#x, state#x]
++- SubqueryAlias emp
+   +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+      +- SubqueryAlias EMP
+         +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+;
+
+
+-- !query
+SELECT emp.dept_id, 
+       avg(salary),
+       avg(salary) FILTER (WHERE emp.dept_id IN (SELECT DISTINCT dept_id
+               FROM dept))
+FROM emp 
+GROUP BY dept_id
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few commands: Aggregate [dept_id#x], [dept_id#x, avg(salary#x) AS avg(salary)#x, avg(salary#x) FILTER (WHERE dept_id#x IN (list#x [])) AS avg(salary) FILTER (WHERE (dept_id IN (listquery())))#x]
+:  +- Distinct
+:     +- Project [dept_id#x]
+:        +- SubqueryAlias dept
+:           +- Project [dept_id#x, dept_name#x, state#x]
+:              +- SubqueryAlias DEPT
+:                 +- LocalRelation [dept_id#x, dept_name#x, state#x]
++- SubqueryAlias emp
+   +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+      +- SubqueryAlias EMP
+         +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+;
+
+
+-- !query
+SELECT emp.dept_id, 
+       Sum(salary),
+       Sum(salary) FILTER (WHERE emp.dept_id NOT IN (SELECT DISTINCT dept_id
+               FROM dept))
+FROM emp 
+GROUP BY dept_id
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few commands: Aggregate [dept_id#x], [dept_id#x, sum(salary#x) AS sum(salary)#x, sum(salary#x) FILTER (WHERE NOT dept_id#x IN (list#x [])) AS sum(salary) FILTER (WHERE (NOT (dept_id IN (listquery()))))#x]
+:  +- Distinct
+:     +- Project [dept_id#x]
+:        +- SubqueryAlias dept
+:           +- Project [dept_id#x, dept_name#x, state#x]
+:              +- SubqueryAlias DEPT
+:                 +- LocalRelation [dept_id#x, dept_name#x, state#x]
++- SubqueryAlias emp
+   +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+      +- SubqueryAlias EMP
+         +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+;
+
+
+-- !query
+SELECT t1.b FROM (SELECT COUNT(b) FILTER (WHERE a >= 2) AS b FROM testData) t1
+-- !query schema
+struct<b:bigint>
+-- !query output
+4
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index 09e2c632f6386..bf9f606a2224e 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 20
 
 
--- !query 0
+-- !query
 create temporary view data as select * from values
   (1, 1),
   (1, 2),
@@ -11,55 +11,55 @@ create temporary view data as select * from values
   (3, 1),
   (3, 2)
   as data(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 select a, sum(b) from data group by 1
--- !query 1 schema
+-- !query schema
 struct<a:int,sum(b):bigint>
--- !query 1 output
+-- !query output
 1	3
 2	3
 3	3
 
 
--- !query 2
+-- !query
 select 1, 2, sum(b) from data group by 1, 2
--- !query 2 schema
+-- !query schema
 struct<1:int,2:int,sum(b):bigint>
--- !query 2 output
+-- !query output
 1	2	9
 
 
--- !query 3
+-- !query
 select a, 1, sum(b) from data group by a, 1
--- !query 3 schema
+-- !query schema
 struct<a:int,1:int,sum(b):bigint>
--- !query 3 output
+-- !query output
 1	1	3
 2	1	3
 3	1	3
 
 
--- !query 4
+-- !query
 select a, 1, sum(b) from data group by 1, 2
--- !query 4 schema
+-- !query schema
 struct<a:int,1:int,sum(b):bigint>
--- !query 4 output
+-- !query output
 1	1	3
 2	1	3
 3	1	3
 
 
--- !query 5
+-- !query
 select a, b + 2, count(2) from data group by a, 2
--- !query 5 schema
+-- !query schema
 struct<a:int,(b + 2):int,count(2):bigint>
--- !query 5 output
+-- !query output
 1	3	1
 1	4	1
 2	3	1
@@ -68,11 +68,11 @@ struct<a:int,(b + 2):int,count(2):bigint>
 3	4	1
 
 
--- !query 6
+-- !query
 select a as aa, b + 2 as bb, count(2) from data group by 1, 2
--- !query 6 schema
+-- !query schema
 struct<aa:int,bb:int,count(2):bigint>
--- !query 6 output
+-- !query output
 1	3	1
 1	4	1
 2	3	1
@@ -81,66 +81,66 @@ struct<aa:int,bb:int,count(2):bigint>
 3	4	1
 
 
--- !query 7
+-- !query
 select sum(b) from data group by 1 + 0
--- !query 7 schema
+-- !query schema
 struct<sum(b):bigint>
--- !query 7 output
+-- !query output
 9
 
 
--- !query 8
+-- !query
 select a, b from data group by -1
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 GROUP BY position -1 is not in select list (valid range is [1, 2]); line 1 pos 31
 
 
--- !query 9
+-- !query
 select a, b from data group by 0
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 GROUP BY position 0 is not in select list (valid range is [1, 2]); line 1 pos 31
 
 
--- !query 10
+-- !query
 select a, b from data group by 3
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 31
 
 
--- !query 11
+-- !query
 select a, b, sum(b) from data group by 3
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 aggregate functions are not allowed in GROUP BY, but found sum(CAST(data.`b` AS BIGINT));
 
 
--- !query 12
+-- !query
 select a, b, sum(b) + 2 from data group by 3
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 aggregate functions are not allowed in GROUP BY, but found (sum(CAST(data.`b` AS BIGINT)) + CAST(2 AS BIGINT));
 
 
--- !query 13
+-- !query
 select a, rand(0), sum(b)
 from 
 (select /*+ REPARTITION(1) */ a, b from data) group by a, 2
--- !query 13 schema
+-- !query schema
 struct<a:int,rand(0):double,sum(b):bigint>
--- !query 13 output
+-- !query output
 1	0.5234194256885571	2
 1	0.7604953758285915	1
 2	0.0953472826424725	1
@@ -149,52 +149,52 @@ struct<a:int,rand(0):double,sum(b):bigint>
 3	0.7141011170991605	1
 
 
--- !query 14
+-- !query
 select * from data group by a, b, 1
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Star (*) is not allowed in select list when GROUP BY ordinal position is used;
 
 
--- !query 15
+-- !query
 select a, count(a) from (select 1 as a) tmp group by 1 order by 1
--- !query 15 schema
+-- !query schema
 struct<a:int,count(a):bigint>
--- !query 15 output
+-- !query output
 1	1
 
 
--- !query 16
+-- !query
 select count(a), a from (select 1 as a) tmp group by 2 having a > 0
--- !query 16 schema
+-- !query schema
 struct<count(a):bigint,a:int>
--- !query 16 output
+-- !query output
 1	1
 
 
--- !query 17
+-- !query
 select a, a AS k, count(b) from data group by k, 1
--- !query 17 schema
+-- !query schema
 struct<a:int,k:int,count(b):bigint>
--- !query 17 output
+-- !query output
 1	1	2
 2	2	2
 3	3	2
 
 
--- !query 18
+-- !query
 set spark.sql.groupByOrdinal=false
--- !query 18 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 18 output
+-- !query output
 spark.sql.groupByOrdinal	false
 
 
--- !query 19
+-- !query
 select sum(b) from data group by -1
--- !query 19 schema
+-- !query schema
 struct<sum(b):bigint>
--- !query 19 output
+-- !query output
 9
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 3a5df254f2cd9..7bfdd0ad53a95 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,102 +1,102 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 52
+-- Number of queries: 56
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
 AS testData(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT a, COUNT(b) FROM testData
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) AS `count(b)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 2
+-- !query
 SELECT COUNT(a), COUNT(b) FROM testData
--- !query 2 schema
+-- !query schema
 struct<count(a):bigint,count(b):bigint>
--- !query 2 output
+-- !query output
 7	7
 
 
--- !query 3
+-- !query
 SELECT a, COUNT(b) FROM testData GROUP BY a
--- !query 3 schema
+-- !query schema
 struct<a:int,count(b):bigint>
--- !query 3 output
+-- !query output
 1	2
 2	2
 3	2
 NULL	1
 
 
--- !query 4
+-- !query
 SELECT a, COUNT(b) FROM testData GROUP BY b
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 5
+-- !query
 SELECT COUNT(a), COUNT(b) FROM testData GROUP BY a
--- !query 5 schema
+-- !query schema
 struct<count(a):bigint,count(b):bigint>
--- !query 5 output
+-- !query output
 0	1
 2	2
 2	2
 3	2
 
 
--- !query 6
+-- !query
 SELECT 'foo', COUNT(a) FROM testData GROUP BY 1
--- !query 6 schema
+-- !query schema
 struct<foo:string,count(a):bigint>
--- !query 6 output
+-- !query output
 foo	7
 
 
--- !query 7
+-- !query
 SELECT 'foo' FROM testData WHERE a = 0 GROUP BY 1
--- !query 7 schema
+-- !query schema
 struct<foo:string>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 SELECT 'foo', APPROX_COUNT_DISTINCT(a) FROM testData WHERE a = 0 GROUP BY 1
--- !query 8 schema
+-- !query schema
 struct<foo:string,approx_count_distinct(a):bigint>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 SELECT 'foo', MAX(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1
--- !query 9 schema
+-- !query schema
 struct<foo:string,max(named_struct(a, a)):struct<a:int>>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 SELECT a + b, COUNT(b) FROM testData GROUP BY a + b
--- !query 10 schema
+-- !query schema
 struct<(a + b):int,count(b):bigint>
--- !query 10 output
+-- !query output
 2	1
 3	2
 4	2
@@ -104,132 +104,132 @@ struct<(a + b):int,count(b):bigint>
 NULL	1
 
 
--- !query 11
+-- !query
 SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 12
+-- !query
 SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1
--- !query 12 schema
+-- !query schema
 struct<((a + 1) + 1):int,count(b):bigint>
--- !query 12 output
+-- !query output
 3	2
 4	2
 5	2
 NULL	1
 
 
--- !query 13
+-- !query
 SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a)
 FROM testData
--- !query 13 schema
-struct<skewness(CAST(a AS DOUBLE)):double,kurtosis(CAST(a AS DOUBLE)):double,min(a):int,max(a):int,avg(a):double,var_samp(CAST(a AS DOUBLE)):double,stddev_samp(CAST(a AS DOUBLE)):double,sum(a):bigint,count(a):bigint>
--- !query 13 output
+-- !query schema
+struct<skewness(CAST(a AS DOUBLE)):double,kurtosis(CAST(a AS DOUBLE)):double,min(a):int,max(a):int,avg(a):double,variance(CAST(a AS DOUBLE)):double,stddev(CAST(a AS DOUBLE)):double,sum(a):bigint,count(a):bigint>
+-- !query output
 -0.2723801058145729	-1.5069204152249134	1	3	2.142857142857143	0.8095238095238094	0.8997354108424372	15	7
 
 
--- !query 14
+-- !query
 SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a
--- !query 14 schema
+-- !query schema
 struct<count(DISTINCT b):bigint,count(DISTINCT b, c):bigint>
--- !query 14 output
+-- !query output
 1	1
 
 
--- !query 15
+-- !query
 SELECT a AS k, COUNT(b) FROM testData GROUP BY k
--- !query 15 schema
+-- !query schema
 struct<k:int,count(b):bigint>
--- !query 15 output
+-- !query output
 1	2
 2	2
 3	2
 NULL	1
 
 
--- !query 16
+-- !query
 SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1
--- !query 16 schema
+-- !query schema
 struct<k:int,count(b):bigint>
--- !query 16 output
+-- !query output
 2	2
 3	2
 
 
--- !query 17
+-- !query
 SELECT COUNT(b) AS k FROM testData GROUP BY k
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 aggregate functions are not allowed in GROUP BY, but found count(testdata.`b`);
 
 
--- !query 18
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
 (1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 20
+-- !query
 set spark.sql.groupByAliases=false
--- !query 20 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 20 output
+-- !query output
 spark.sql.groupByAliases	false
 
 
--- !query 21
+-- !query
 SELECT a AS k, COUNT(b) FROM testData GROUP BY k
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`k`' given input columns: [testdata.a, testdata.b]; line 1 pos 47
 
 
--- !query 22
+-- !query
 SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a
--- !query 22 schema
+-- !query schema
 struct<a:int,count(1):bigint>
--- !query 22 output
+-- !query output
 
 
 
--- !query 23
+-- !query
 SELECT COUNT(1) FROM testData WHERE false
--- !query 23 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 23 output
+-- !query output
 0
 
 
--- !query 24
+-- !query
 SELECT 1 FROM (SELECT COUNT(1) FROM testData WHERE false) t
--- !query 24 schema
+-- !query schema
 struct<1:int>
--- !query 24 output
+-- !query output
 1
 
 
--- !query 25
+-- !query
 SELECT 1 from (
   SELECT 1 AS z,
   MIN(a.x)
@@ -237,114 +237,114 @@ SELECT 1 from (
   WHERE false
 ) b
 where b.z != b.z
--- !query 25 schema
+-- !query schema
 struct<1:int>
--- !query 25 output
+-- !query output
 
 
 
--- !query 26
+-- !query
 SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
   FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y)
--- !query 26 schema
+-- !query schema
 struct<corr(DISTINCT CAST(x AS DOUBLE), CAST(y AS DOUBLE)):double,corr(DISTINCT CAST(y AS DOUBLE), CAST(x AS DOUBLE)):double,count(1):bigint>
--- !query 26 output
+-- !query output
 1.0	1.0	3
 
 
--- !query 27
+-- !query
 SELECT 1 FROM range(10) HAVING true
--- !query 27 schema
+-- !query schema
 struct<1:int>
--- !query 27 output
+-- !query output
 1
 
 
--- !query 28
+-- !query
 SELECT 1 FROM range(10) HAVING MAX(id) > 0
--- !query 28 schema
+-- !query schema
 struct<1:int>
--- !query 28 output
+-- !query output
 1
 
 
--- !query 29
+-- !query
 SELECT id FROM range(10) HAVING id > 0
--- !query 29 schema
+-- !query schema
 struct<>
--- !query 29 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 30
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
   (1, true), (1, false),
   (2, true),
   (3, false), (3, null),
   (4, null), (4, null),
   (5, null), (5, true), (5, false) AS test_agg(k, v)
--- !query 30 schema
+-- !query schema
 struct<>
--- !query 30 output
+-- !query output
 
 
 
--- !query 31
-SELECT every(v), some(v), any(v) FROM test_agg WHERE 1 = 0
--- !query 31 schema
-struct<every(v):boolean,some(v):boolean,any(v):boolean>
--- !query 31 output
-NULL	NULL	NULL
+-- !query
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0
+-- !query schema
+struct<every(v):boolean,some(v):boolean,any(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
+-- !query output
+NULL	NULL	NULL	NULL	NULL
 
 
--- !query 32
-SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 4
--- !query 32 schema
-struct<every(v):boolean,some(v):boolean,any(v):boolean>
--- !query 32 output
-NULL	NULL	NULL
+-- !query
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4
+-- !query schema
+struct<every(v):boolean,some(v):boolean,any(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
+-- !query output
+NULL	NULL	NULL	NULL	NULL
 
 
--- !query 33
-SELECT every(v), some(v), any(v) FROM test_agg WHERE k = 5
--- !query 33 schema
-struct<every(v):boolean,some(v):boolean,any(v):boolean>
--- !query 33 output
-false	true	true
+-- !query
+SELECT every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5
+-- !query schema
+struct<every(v):boolean,some(v):boolean,any(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
+-- !query output
+false	true	true	false	true
 
 
--- !query 34
-SELECT k, every(v), some(v), any(v) FROM test_agg GROUP BY k
--- !query 34 schema
-struct<k:int,every(v):boolean,some(v):boolean,any(v):boolean>
--- !query 34 output
-1	false	true	true
-2	true	true	true
-3	false	false	false
-4	NULL	NULL	NULL
-5	false	true	true
+-- !query
+SELECT k, every(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k
+-- !query schema
+struct<k:int,every(v):boolean,some(v):boolean,any(v):boolean,bool_and(v):boolean,bool_or(v):boolean>
+-- !query output
+1	false	true	true	false	true
+2	true	true	true	true	true
+3	false	false	false	false	false
+4	NULL	NULL	NULL	NULL	NULL
+5	false	true	true	false	true
 
 
--- !query 35
+-- !query
 SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) = false
--- !query 35 schema
+-- !query schema
 struct<k:int,every(v):boolean>
--- !query 35 output
+-- !query output
 1	false
 3	false
 5	false
 
 
--- !query 36
+-- !query
 SELECT k, every(v) FROM test_agg GROUP BY k HAVING every(v) IS NULL
--- !query 36 schema
+-- !query schema
 struct<k:int,every(v):boolean>
--- !query 36 output
+-- !query output
 4	NULL
 
 
--- !query 37
+-- !query
 SELECT k,
        Every(v) AS every
 FROM   test_agg
@@ -353,13 +353,13 @@ WHERE  k = 2
                  FROM   test_agg
                  WHERE  k = 1)
 GROUP  BY k
--- !query 37 schema
+-- !query schema
 struct<k:int,every:boolean>
--- !query 37 output
+-- !query output
 2	true
 
 
--- !query 38
+-- !query
 SELECT k,
        Every(v) AS every
 FROM   test_agg
@@ -368,53 +368,71 @@ WHERE  k = 2
                  FROM   test_agg
                  WHERE  k = 1)
 GROUP  BY k
--- !query 38 schema
+-- !query schema
 struct<k:int,every:boolean>
--- !query 38 output
+-- !query output
 
 
 
--- !query 39
+-- !query
 SELECT every(1)
--- !query 39 schema
+-- !query schema
 struct<>
--- !query 39 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'every(1)' due to data type mismatch: Input to function 'every' should have been boolean, but it's [int].; line 1 pos 7
 
 
--- !query 40
+-- !query
 SELECT some(1S)
--- !query 40 schema
+-- !query schema
 struct<>
--- !query 40 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'some(1S)' due to data type mismatch: Input to function 'some' should have been boolean, but it's [smallint].; line 1 pos 7
 
 
--- !query 41
+-- !query
 SELECT any(1L)
--- !query 41 schema
+-- !query schema
 struct<>
--- !query 41 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'any(1L)' due to data type mismatch: Input to function 'any' should have been boolean, but it's [bigint].; line 1 pos 7
 
 
--- !query 42
+-- !query
 SELECT every("true")
--- !query 42 schema
+-- !query schema
 struct<>
--- !query 42 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'every('true')' due to data type mismatch: Input to function 'every' should have been boolean, but it's [string].; line 1 pos 7
 
 
--- !query 43
+-- !query
+SELECT bool_and(1.0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'bool_and(1.0BD)' due to data type mismatch: Input to function 'bool_and' should have been boolean, but it's [decimal(2,1)].; line 1 pos 7
+
+
+-- !query
+SELECT bool_or(1.0D)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'bool_or(1.0D)' due to data type mismatch: Input to function 'bool_or' should have been boolean, but it's [double].; line 1 pos 7
+
+
+-- !query
 SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
--- !query 43 schema
+-- !query schema
 struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
--- !query 43 output
+-- !query output
 1	false	false
 1	true	false
 2	true	true
@@ -427,11 +445,11 @@ struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST
 5	true	false
 
 
--- !query 44
+-- !query
 SELECT k, v, some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
--- !query 44 schema
+-- !query schema
 struct<k:int,v:boolean,some(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
--- !query 44 output
+-- !query output
 1	false	false
 1	true	true
 2	true	true
@@ -444,11 +462,45 @@ struct<k:int,v:boolean,some(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST R
 5	true	true
 
 
--- !query 45
+-- !query
 SELECT k, v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
--- !query 45 schema
+-- !query schema
 struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
--- !query 45 output
+-- !query output
+1	false	false
+1	true	true
+2	true	true
+3	NULL	NULL
+3	false	false
+4	NULL	NULL
+4	NULL	NULL
+5	NULL	NULL
+5	false	false
+5	true	true
+
+
+-- !query
+SELECT k, v, bool_and(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct<k:int,v:boolean,bool_and(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
+-- !query output
+1	false	false
+1	true	false
+2	true	true
+3	NULL	NULL
+3	false	false
+4	NULL	NULL
+4	NULL	NULL
+5	NULL	NULL
+5	false	false
+5	true	false
+
+
+-- !query
+SELECT k, v, bool_or(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
+-- !query schema
+struct<k:int,v:boolean,bool_or(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
+-- !query output
 1	false	false
 1	true	true
 2	true	true
@@ -461,37 +513,37 @@ struct<k:int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RA
 5	true	true
 
 
--- !query 46
+-- !query
 SELECT count(*) FROM test_agg HAVING count(*) > 1L
--- !query 46 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 46 output
+-- !query output
 10
 
 
--- !query 47
+-- !query
 SELECT k, max(v) FROM test_agg GROUP BY k HAVING max(v) = true
--- !query 47 schema
+-- !query schema
 struct<k:int,max(v):boolean>
--- !query 47 output
+-- !query output
 1	true
 2	true
 5	true
 
 
--- !query 48
+-- !query
 SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L
--- !query 48 schema
+-- !query schema
 struct<cnt:bigint>
--- !query 48 output
+-- !query output
 10
 
 
--- !query 49
+-- !query
 SELECT count(*) FROM test_agg WHERE count(*) > 1L
--- !query 49 schema
+-- !query schema
 struct<>
--- !query 49 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
@@ -499,11 +551,11 @@ Expression in where clause: [(count(1) > 1L)]
 Invalid expressions: [count(1)];
 
 
--- !query 50
+-- !query
 SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L
--- !query 50 schema
+-- !query schema
 struct<>
--- !query 50 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
@@ -511,11 +563,11 @@ Expression in where clause: [((count(1) + 1L) > 1L)]
 Invalid expressions: [count(1)];
 
 
--- !query 51
+-- !query
 SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1
--- !query 51 schema
+-- !query schema
 struct<>
--- !query 51 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
index 34ab09c5e3bba..8eeabb34b4fab 100644
--- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
@@ -1,139 +1,141 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
+-- Number of queries: 18
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
   ("1", "2", "3", 1),
   ("4", "5", "6", 1),
   ("7", "8", "9", 1)
   as grouping(a, b, c, d)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
--- !query 1 schema
+-- !query schema
 struct<a:string,b:string,c:string,count(d):bigint>
--- !query 1 output
+-- !query output
 NULL	NULL	NULL	3
 
 
--- !query 2
+-- !query
 SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
--- !query 2 schema
+-- !query schema
 struct<a:string,b:string,c:string,count(d):bigint>
--- !query 2 output
+-- !query output
 1	NULL	NULL	1
 4	NULL	NULL	1
 7	NULL	NULL	1
 
 
--- !query 3
+-- !query
 SELECT a, b, c, count(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c))
--- !query 3 schema
+-- !query schema
 struct<a:string,b:string,c:string,count(d):bigint>
--- !query 3 output
+-- !query output
 NULL	NULL	3	1
 NULL	NULL	6	1
 NULL	NULL	9	1
 
 
--- !query 4
+-- !query
 SELECT c1, sum(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1)
--- !query 4 schema
+-- !query schema
 struct<c1:string,sum(c2):bigint>
--- !query 4 output
+-- !query output
 x	10
 y	20
 
 
--- !query 5
+-- !query
 SELECT c1, sum(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1)
--- !query 5 schema
+-- !query schema
 struct<c1:string,sum(c2):bigint,grouping(c1):tinyint>
--- !query 5 output
+-- !query output
 x	10	0
 y	20	0
 
 
--- !query 6
+-- !query
 SELECT c1, c2, Sum(c3), grouping__id
 FROM   (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3)
 GROUP  BY GROUPING SETS ( ( c1 ), ( c2 ) )
 HAVING GROUPING__ID > 1
--- !query 6 schema
+-- !query schema
 struct<c1:string,c2:string,sum(c3):bigint,grouping__id:int>
--- !query 6 output
+-- !query output
 NULL	a	10	2
 NULL	b	20	2
 
 
--- !query 7
+-- !query
 SELECT grouping(c1) FROM (VALUES ('x', 'a', 10), ('y', 'b', 20)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1,c2)
--- !query 7 schema
+-- !query schema
 struct<grouping(c1):tinyint>
--- !query 7 output
+-- !query output
 0
 0
 1
 1
 
 
--- !query 8
+-- !query
 SELECT -c1 AS c1 FROM (values (1,2), (3,2)) t(c1, c2) GROUP BY GROUPING SETS ((c1), (c1, c2))
--- !query 8 schema
+-- !query schema
 struct<c1:int>
--- !query 8 output
+-- !query output
 -1
 -1
 -3
 -3
 
 
--- !query 9
+-- !query
 SELECT a + b, b, sum(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b))
--- !query 9 schema
+-- !query schema
 struct<(a + b):int,b:int,sum(c):bigint>
--- !query 9 output
+-- !query output
 2	NULL	1
 4	NULL	2
 NULL	1	1
 NULL	2	2
 
 
--- !query 10
+-- !query
 SELECT a + b, b, sum(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b))
--- !query 10 schema
+-- !query schema
 struct<(a + b):int,b:int,sum(c):bigint>
--- !query 10 output
-2	NULL	2
-4	NULL	4
+-- !query output
+2	NULL	1
+2	NULL	1
+4	NULL	2
+4	NULL	2
 NULL	1	1
 NULL	2	2
 
 
--- !query 11
+-- !query
 SELECT c1 AS col1, c2 AS col2
 FROM   (VALUES (1, 2), (3, 2)) t(c1, c2)
 GROUP  BY GROUPING SETS ( ( c1 ), ( c1, c2 ) )
 HAVING col2 IS NOT NULL
 ORDER  BY -col1
--- !query 11 schema
+-- !query schema
 struct<col1:int,col2:int>
--- !query 11 output
+-- !query output
 3	2
 1	2
 
 
--- !query 12
+-- !query
 SELECT a, b, c, count(d) FROM grouping GROUP BY WITH ROLLUP
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 extraneous input 'ROLLUP' expecting <EOF>(line 1, pos 53)
@@ -143,11 +145,11 @@ SELECT a, b, c, count(d) FROM grouping GROUP BY WITH ROLLUP
 -----------------------------------------------------^^^
 
 
--- !query 13
+-- !query
 SELECT a, b, c, count(d) FROM grouping GROUP BY WITH CUBE
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 extraneous input 'CUBE' expecting <EOF>(line 1, pos 53)
@@ -157,10 +159,49 @@ SELECT a, b, c, count(d) FROM grouping GROUP BY WITH CUBE
 -----------------------------------------------------^^^
 
 
--- !query 14
+-- !query
 SELECT c1 FROM (values (1,2), (3,2)) t(c1, c2) GROUP BY GROUPING SETS (())
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expression '`c1`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query
+SELECT k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<k1:int,k2:int,avg(v):double>
+-- !query output
+1	1	1.0
+1	1	1.0
+1	NULL	1.0
+2	2	2.0
+2	2	2.0
+2	NULL	2.0
+
+
+-- !query
+SELECT grouping__id, k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<grouping__id:int,k1:int,k2:int,avg(v):double>
+-- !query output
+0	1	1	1.0
+0	1	1	1.0
+0	2	2	2.0
+0	2	2	2.0
+1	1	NULL	1.0
+1	2	NULL	2.0
+
+
+-- !query
+SELECT grouping(k1), k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<grouping(k1):tinyint,k1:int,k2:int,avg(v):double>
+-- !query output
+0	1	1	1.0
+0	1	1	1.0
+0	1	NULL	1.0
+0	2	2	2.0
+0	2	2	2.0
+0	2	NULL	2.0
diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out
index d87ee5221647f..5bd185d7b815d 100644
--- a/sql/core/src/test/resources/sql-tests/results/having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out
@@ -2,48 +2,48 @@
 -- Number of queries: 5
 
 
--- !query 0
+-- !query
 create temporary view hav as select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3),
   ("one", 5)
   as hav(k, v)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2
--- !query 1 schema
+-- !query schema
 struct<k:string,sum(v):bigint>
--- !query 1 output
+-- !query output
 one	6
 three	3
 
 
--- !query 2
+-- !query
 SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2
--- !query 2 schema
+-- !query schema
 struct<count(k):bigint>
--- !query 2 output
+-- !query output
 1
 
 
--- !query 3
+-- !query
 SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0)
--- !query 3 schema
+-- !query schema
 struct<min(v):int>
--- !query 3 output
+-- !query output
 1
 
 
--- !query 4
+-- !query
 SELECT a + b FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > 1
--- !query 4 schema
+-- !query schema
 struct<(a + CAST(b AS BIGINT)):bigint>
--- !query 4 output
+-- !query output
 3
 7
diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
index 1b7c6f4f76250..d35d0d5d944bb 100644
--- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
@@ -1,256 +1,272 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 27
+-- Number of queries: 29
 
 
--- !query 0
+-- !query
 create or replace temporary view nested as values
   (1, array(32, 97), array(array(12, 99), array(123, 42), array(1))),
   (2, array(77, -76), array(array(6, 96, 65), array(-1, -2))),
   (3, array(12), array(array(17)))
   as t(x, ys, zs)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 select upper(x -> x) as v
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 A lambda function should only be used in a higher order function. However, its class is org.apache.spark.sql.catalyst.expressions.Upper, which is not a higher order function.; line 1 pos 7
 
 
--- !query 2
+-- !query
 select transform(zs, z -> z) as v from nested
--- !query 2 schema
+-- !query schema
 struct<v:array<array<int>>>
--- !query 2 output
+-- !query output
 [[12,99],[123,42],[1]]
 [[17]]
 [[6,96,65],[-1,-2]]
 
 
--- !query 3
+-- !query
 select transform(ys, y -> y * y) as v from nested
--- !query 3 schema
+-- !query schema
 struct<v:array<int>>
--- !query 3 output
+-- !query output
 [1024,9409]
 [144]
 [5929,5776]
 
 
--- !query 4
+-- !query
 select transform(ys, (y, i) -> y + i) as v from nested
--- !query 4 schema
+-- !query schema
 struct<v:array<int>>
--- !query 4 output
+-- !query output
 [12]
 [32,98]
 [77,-75]
 
 
--- !query 5
+-- !query
 select transform(zs, z -> concat(ys, z)) as v from nested
--- !query 5 schema
+-- !query schema
 struct<v:array<array<int>>>
--- !query 5 output
+-- !query output
 [[12,17]]
 [[32,97,12,99],[32,97,123,42],[32,97,1]]
 [[77,-76,6,96,65],[77,-76,-1,-2]]
 
 
--- !query 6
+-- !query
 select transform(ys, 0) as v from nested
--- !query 6 schema
+-- !query schema
 struct<v:array<int>>
--- !query 6 output
+-- !query output
 [0,0]
 [0,0]
 [0]
 
 
--- !query 7
+-- !query
 select transform(cast(null as array<int>), x -> x + 1) as v
--- !query 7 schema
+-- !query schema
 struct<v:array<int>>
--- !query 7 output
+-- !query output
 NULL
 
 
--- !query 8
+-- !query
 select filter(ys, y -> y > 30) as v from nested
--- !query 8 schema
+-- !query schema
 struct<v:array<int>>
--- !query 8 output
+-- !query output
 [32,97]
 [77]
 []
 
 
--- !query 9
+-- !query
 select filter(cast(null as array<int>), y -> true) as v
--- !query 9 schema
+-- !query schema
 struct<v:array<int>>
--- !query 9 output
+-- !query output
 NULL
 
 
--- !query 10
+-- !query
 select transform(zs, z -> filter(z, zz -> zz > 50)) as v from nested
--- !query 10 schema
+-- !query schema
 struct<v:array<array<int>>>
--- !query 10 output
+-- !query output
 [[96,65],[]]
 [[99],[123],[]]
 [[]]
 
 
--- !query 11
+-- !query
 select aggregate(ys, 0, (y, a) -> y + a + x) as v from nested
--- !query 11 schema
+-- !query schema
 struct<v:int>
--- !query 11 output
+-- !query output
 131
 15
 5
 
 
--- !query 12
+-- !query
 select aggregate(ys, (0 as sum, 0 as n), (acc, x) -> (acc.sum + x, acc.n + 1), acc -> acc.sum / acc.n) as v from nested
--- !query 12 schema
+-- !query schema
 struct<v:double>
--- !query 12 output
+-- !query output
 0.5
 12.0
 64.5
 
 
--- !query 13
+-- !query
 select transform(zs, z -> aggregate(z, 1, (acc, val) -> acc * val * size(z))) as v from nested
--- !query 13 schema
+-- !query schema
 struct<v:array<int>>
--- !query 13 output
+-- !query output
 [1010880,8]
 [17]
 [4752,20664,1]
 
 
--- !query 14
+-- !query
 select aggregate(cast(null as array<int>), 0, (a, y) -> a + y + 1, a -> a + 2) as v
--- !query 14 schema
+-- !query schema
 struct<v:int>
--- !query 14 output
+-- !query output
 NULL
 
 
--- !query 15
+-- !query
 select exists(ys, y -> y > 30) as v from nested
--- !query 15 schema
+-- !query schema
 struct<v:boolean>
--- !query 15 output
+-- !query output
 false
 true
 true
 
 
--- !query 16
+-- !query
 select exists(cast(null as array<int>), y -> y > 30) as v
--- !query 16 schema
+-- !query schema
 struct<v:boolean>
--- !query 16 output
+-- !query output
 NULL
 
 
--- !query 17
+-- !query
 select zip_with(ys, zs, (a, b) -> a + size(b)) as v from nested
--- !query 17 schema
+-- !query schema
 struct<v:array<int>>
--- !query 17 output
+-- !query output
 [13]
 [34,99,null]
 [80,-74]
 
 
--- !query 18
+-- !query
 select zip_with(array('a', 'b', 'c'), array('d', 'e', 'f'), (x, y) -> concat(x, y)) as v
--- !query 18 schema
+-- !query schema
 struct<v:array<string>>
--- !query 18 output
+-- !query output
 ["ad","be","cf"]
 
 
--- !query 19
+-- !query
 select zip_with(array('a'), array('d', null, 'f'), (x, y) -> coalesce(x, y)) as v
--- !query 19 schema
+-- !query schema
 struct<v:array<string>>
--- !query 19 output
+-- !query output
 ["a",null,"f"]
 
 
--- !query 20
+-- !query
 create or replace temporary view nested as values
   (1, map(1, 1, 2, 2, 3, 3)),
   (2, map(4, 4, 5, 5, 6, 6))
   as t(x, ys)
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 select transform_keys(ys, (k, v) -> k) as v from nested
--- !query 21 schema
+-- !query schema
 struct<v:map<int,int>>
--- !query 21 output
+-- !query output
 {1:1,2:2,3:3}
 {4:4,5:5,6:6}
 
 
--- !query 22
+-- !query
 select transform_keys(ys, (k, v) -> k + 1) as v from nested
--- !query 22 schema
+-- !query schema
 struct<v:map<int,int>>
--- !query 22 output
+-- !query output
 {2:1,3:2,4:3}
 {5:4,6:5,7:6}
 
 
--- !query 23
+-- !query
 select transform_keys(ys, (k, v) -> k + v) as v from nested
--- !query 23 schema
+-- !query schema
 struct<v:map<int,int>>
--- !query 23 output
+-- !query output
 {10:5,12:6,8:4}
 {2:1,4:2,6:3}
 
 
--- !query 24
+-- !query
 select transform_values(ys, (k, v) -> v) as v from nested
--- !query 24 schema
+-- !query schema
 struct<v:map<int,int>>
--- !query 24 output
+-- !query output
 {1:1,2:2,3:3}
 {4:4,5:5,6:6}
 
 
--- !query 25
+-- !query
 select transform_values(ys, (k, v) -> v + 1) as v from nested
--- !query 25 schema
+-- !query schema
 struct<v:map<int,int>>
--- !query 25 output
+-- !query output
 {1:2,2:3,3:4}
 {4:5,5:6,6:7}
 
 
--- !query 26
+-- !query
 select transform_values(ys, (k, v) -> k + v) as v from nested
--- !query 26 schema
+-- !query schema
 struct<v:map<int,int>>
--- !query 26 output
+-- !query output
 {1:2,2:4,3:6}
 {4:8,5:10,6:12}
+
+
+-- !query
+select transform(ys, all -> all * all) as v from values (array(32, 97)) as t(ys)
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[1024,9409]
+
+
+-- !query
+select transform(ys, (all, i) -> all + i) as v from values (array(32, 97)) as t(ys)
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[32,98]
diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
index 4e80f0bda5513..9943b93c431df 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
@@ -2,152 +2,152 @@
 -- Number of queries: 17
 
 
--- !query 0
+-- !query
 select * from values ("one", 1)
--- !query 0 schema
+-- !query schema
 struct<col1:string,col2:int>
--- !query 0 output
+-- !query output
 one	1
 
 
--- !query 1
+-- !query
 select * from values ("one", 1) as data
--- !query 1 schema
+-- !query schema
 struct<col1:string,col2:int>
--- !query 1 output
+-- !query output
 one	1
 
 
--- !query 2
+-- !query
 select * from values ("one", 1) as data(a, b)
--- !query 2 schema
+-- !query schema
 struct<a:string,b:int>
--- !query 2 output
+-- !query output
 one	1
 
 
--- !query 3
+-- !query
 select * from values 1, 2, 3 as data(a)
--- !query 3 schema
+-- !query schema
 struct<a:int>
--- !query 3 output
+-- !query output
 1
 2
 3
 
 
--- !query 4
+-- !query
 select * from values ("one", 1), ("two", 2), ("three", null) as data(a, b)
--- !query 4 schema
+-- !query schema
 struct<a:string,b:int>
--- !query 4 output
+-- !query output
 one	1
 three	NULL
 two	2
 
 
--- !query 5
+-- !query
 select * from values ("one", null), ("two", null) as data(a, b)
--- !query 5 schema
+-- !query schema
 struct<a:string,b:null>
--- !query 5 output
+-- !query output
 one	NULL
 two	NULL
 
 
--- !query 6
+-- !query
 select * from values ("one", 1), ("two", 2L) as data(a, b)
--- !query 6 schema
+-- !query schema
 struct<a:string,b:bigint>
--- !query 6 output
+-- !query output
 one	1
 two	2
 
 
--- !query 7
+-- !query
 select * from values ("one", 1 + 0), ("two", 1 + 3L) as data(a, b)
--- !query 7 schema
+-- !query schema
 struct<a:string,b:bigint>
--- !query 7 output
+-- !query output
 one	1
 two	4
 
 
--- !query 8
+-- !query
 select * from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b)
--- !query 8 schema
+-- !query schema
 struct<a:string,b:array<int>>
--- !query 8 output
+-- !query output
 one	[0,1]
 two	[2,3]
 
 
--- !query 9
+-- !query
 select * from values ("one", 2.0), ("two", 3.0D) as data(a, b)
--- !query 9 schema
+-- !query schema
 struct<a:string,b:double>
--- !query 9 output
+-- !query output
 one	2.0
 two	3.0
 
 
--- !query 10
+-- !query
 select * from values ("one", rand(5)), ("two", 3.0D) as data(a, b)
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot evaluate expression rand(5) in inline table definition; line 1 pos 29
 
 
--- !query 11
+-- !query
 select * from values ("one", 2.0), ("two") as data(a, b)
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expected 2 columns but found 1 columns in row 1; line 1 pos 14
 
 
--- !query 12
+-- !query
 select * from values ("one", array(0, 1)), ("two", struct(1, 2)) as data(a, b)
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 incompatible types found in column b for inline table; line 1 pos 14
 
 
--- !query 13
+-- !query
 select * from values ("one"), ("two") as data(a, b)
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expected 2 columns but found 1 columns in row 0; line 1 pos 14
 
 
--- !query 14
+-- !query
 select * from values ("one", random_not_exist_func(1)), ("two", 2) as data(a, b)
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Undefined function: 'random_not_exist_func'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 29
 
 
--- !query 15
+-- !query
 select * from values ("one", count(1)), ("two", 2) as data(a, b)
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot evaluate expression count(1) in inline table definition; line 1 pos 29
 
 
--- !query 16
+-- !query
 select * from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991-12-06 01:00:00.0'), timestamp('1991-12-06 12:00:00.0'))) as data(a, b)
--- !query 16 schema
+-- !query schema
 struct<a:timestamp,b:array<timestamp>>
--- !query 16 output
-1991-12-06 00:00:00	[1991-12-06 01:00:00.0,1991-12-06 12:00:00.0]
+-- !query output
+1991-12-06 00:00:00	[1991-12-06 01:00:00,1991-12-06 12:00:00]
diff --git a/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
index 8d56ebe9fd3b4..7c3774c8bd7fb 100644
--- a/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
@@ -2,65 +2,65 @@
 -- Number of queries: 7
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 CREATE TEMPORARY VIEW ta AS
 SELECT a, 'a' AS tag FROM t1
 UNION ALL
 SELECT a, 'b' AS tag FROM t2
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 CREATE TEMPORARY VIEW tb AS
 SELECT a, 'a' AS tag FROM t3
 UNION ALL
 SELECT a, 'b' AS tag FROM t4
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT tb.* FROM ta INNER JOIN tb ON ta.a = tb.a AND ta.tag = tb.tag
--- !query 6 schema
+-- !query schema
 struct<a:int,tag:string>
--- !query 6 output
+-- !query output
 1	a
 1	a
 1	b
diff --git a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
index 63dd56ce468bc..4762082dc3be2 100644
--- a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 22
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (1, 2), 
     (1, 2),
@@ -12,13 +12,13 @@ CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (null, null),
     (null, null)
     AS tab1(k, v)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW tab2 AS SELECT * FROM VALUES
     (1, 2), 
     (1, 2), 
@@ -27,19 +27,19 @@ CREATE TEMPORARY VIEW tab2 AS SELECT * FROM VALUES
     (null, null),
     (null, null)
     AS tab2(k, v)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT * FROM tab1
 INTERSECT ALL
 SELECT * FROM tab2
--- !query 2 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 2 output
+-- !query output
 1	2
 1	2
 2	3
@@ -47,80 +47,80 @@ NULL	NULL
 NULL	NULL
 
 
--- !query 3
+-- !query
 SELECT * FROM tab1
 INTERSECT ALL
 SELECT * FROM tab1 WHERE k = 1
--- !query 3 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 3 output
+-- !query output
 1	2
 1	2
 1	3
 1	3
 
 
--- !query 4
+-- !query
 SELECT * FROM tab1 WHERE k > 2
 INTERSECT ALL
 SELECT * FROM tab2
--- !query 4 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT * FROM tab1
 INTERSECT ALL
 SELECT * FROM tab2 WHERE k > 3
--- !query 5 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT * FROM tab1
 INTERSECT ALL
 SELECT CAST(1 AS BIGINT), CAST(2 AS BIGINT)
--- !query 6 schema
+-- !query schema
 struct<k:bigint,v:bigint>
--- !query 6 output
+-- !query output
 1	2
 
 
--- !query 7
+-- !query
 SELECT * FROM tab1
 INTERSECT ALL
 SELECT array(1), 2
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 IntersectAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
 
 
--- !query 8
+-- !query
 SELECT k FROM tab1
 INTERSECT ALL
 SELECT k, v FROM tab2
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
 
 
--- !query 9
+-- !query
 SELECT * FROM tab2
 INTERSECT ALL
 SELECT * FROM tab1
 INTERSECT ALL
 SELECT * FROM tab2
--- !query 9 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 9 output
+-- !query output
 1	2
 1	2
 2	3
@@ -128,7 +128,7 @@ NULL	NULL
 NULL	NULL
 
 
--- !query 10
+-- !query
 SELECT * FROM tab1
 EXCEPT
 SELECT * FROM tab2
@@ -136,9 +136,9 @@ UNION ALL
 SELECT * FROM tab1
 INTERSECT ALL
 SELECT * FROM tab2
--- !query 10 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 10 output
+-- !query output
 1	2
 1	2
 1	3
@@ -147,7 +147,7 @@ NULL	NULL
 NULL	NULL
 
 
--- !query 11
+-- !query
 SELECT * FROM tab1
 EXCEPT
 SELECT * FROM tab2
@@ -155,13 +155,13 @@ EXCEPT
 SELECT * FROM tab1
 INTERSECT ALL
 SELECT * FROM tab2
--- !query 11 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 11 output
+-- !query output
 1	3
 
 
--- !query 12
+-- !query
 (
   (
     (
@@ -175,13 +175,13 @@ struct<k:int,v:int>
   INTERSECT ALL
   SELECT * FROM tab2
 )
--- !query 12 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 SELECT * 
 FROM   (SELECT tab1.k, 
                tab2.v 
@@ -195,9 +195,9 @@ FROM   (SELECT tab1.k,
         FROM   tab1 
                JOIN tab2 
                  ON tab1.k = tab2.k)
--- !query 13 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 13 output
+-- !query output
 1	2
 1	2
 1	2
@@ -209,7 +209,7 @@ struct<k:int,v:int>
 2	3
 
 
--- !query 14
+-- !query
 SELECT * 
 FROM   (SELECT tab1.k, 
                tab2.v 
@@ -223,33 +223,33 @@ FROM   (SELECT tab2.v AS k,
         FROM   tab1 
                JOIN tab2 
                  ON tab1.k = tab2.k)
--- !query 14 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 SELECT v FROM tab1 GROUP BY v
 INTERSECT ALL
 SELECT k FROM tab2 GROUP BY k
--- !query 15 schema
+-- !query schema
 struct<v:int>
--- !query 15 output
+-- !query output
 2
 3
 NULL
 
 
--- !query 16
+-- !query
 SET spark.sql.legacy.setopsPrecedence.enabled= true
--- !query 16 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 16 output
+-- !query output
 spark.sql.legacy.setopsPrecedence.enabled	true
 
 
--- !query 17
+-- !query
 SELECT * FROM tab1
 EXCEPT
 SELECT * FROM tab2
@@ -257,9 +257,9 @@ UNION ALL
 SELECT * FROM tab1
 INTERSECT ALL
 SELECT * FROM tab2
--- !query 17 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 17 output
+-- !query output
 1	2
 1	2
 2	3
@@ -267,7 +267,7 @@ NULL	NULL
 NULL	NULL
 
 
--- !query 18
+-- !query
 SELECT * FROM tab1
 EXCEPT
 SELECT * FROM tab2
@@ -275,33 +275,33 @@ UNION ALL
 SELECT * FROM tab1
 INTERSECT
 SELECT * FROM tab2
--- !query 18 schema
+-- !query schema
 struct<k:int,v:int>
--- !query 18 output
+-- !query output
 1	2
 2	3
 NULL	NULL
 
 
--- !query 19
+-- !query
 SET spark.sql.legacy.setopsPrecedence.enabled = false
--- !query 19 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 19 output
+-- !query output
 spark.sql.legacy.setopsPrecedence.enabled	false
 
 
--- !query 20
+-- !query
 DROP VIEW IF EXISTS tab1
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 DROP VIEW IF EXISTS tab2
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
new file mode 100644
index 0000000000000..3c4b4301d0025
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -0,0 +1,992 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 99
+
+
+-- !query
+select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15')
+-- !query schema
+struct<multiply_interval(subtracttimestamps(TIMESTAMP '2019-10-15 10:11:12.001002', CAST(DATE '2019-10-15' AS TIMESTAMP)), CAST(3 AS DOUBLE)):interval>
+-- !query output
+30 hours 33 minutes 36.003006 seconds
+
+
+-- !query
+select interval 4 month 2 weeks 3 microseconds * 1.5
+-- !query schema
+struct<multiply_interval(INTERVAL '4 months 14 days 0.000003 seconds', CAST(1.5 AS DOUBLE)):interval>
+-- !query output
+6 months 21 days 0.000005 seconds
+
+
+-- !query
+select (timestamp'2019-10-15' - timestamp'2019-10-14') / 1.5
+-- !query schema
+struct<divide_interval(subtracttimestamps(TIMESTAMP '2019-10-15 00:00:00', TIMESTAMP '2019-10-14 00:00:00'), CAST(1.5 AS DOUBLE)):interval>
+-- !query output
+16 hours
+
+
+-- !query
+select interval '2 seconds' / 0
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+divide by zero
+
+
+-- !query
+select interval '2 seconds' / null
+-- !query schema
+struct<divide_interval(INTERVAL '2 seconds', CAST(NULL AS DOUBLE)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select interval '2 seconds' * null
+-- !query schema
+struct<multiply_interval(INTERVAL '2 seconds', CAST(NULL AS DOUBLE)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select null * interval '2 seconds'
+-- !query schema
+struct<multiply_interval(INTERVAL '2 seconds', CAST(NULL AS DOUBLE)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select -interval '-1 month 1 day -1 second'
+-- !query schema
+struct<(- INTERVAL '-1 months 1 days -1 seconds'):interval>
+-- !query output
+1 months -1 days 1 seconds
+
+
+-- !query
+select -interval -1 month 1 day -1 second
+-- !query schema
+struct<(- INTERVAL '-1 months 1 days -1 seconds'):interval>
+-- !query output
+1 months -1 days 1 seconds
+
+
+-- !query
+select +interval '-1 month 1 day -1 second'
+-- !query schema
+struct<(+ INTERVAL '-1 months 1 days -1 seconds'):interval>
+-- !query output
+-1 months 1 days -1 seconds
+
+
+-- !query
+select +interval -1 month 1 day -1 second
+-- !query schema
+struct<(+ INTERVAL '-1 months 1 days -1 seconds'):interval>
+-- !query output
+-1 months 1 days -1 seconds
+
+
+-- !query
+select make_interval(1)
+-- !query schema
+struct<make_interval(1, 0, 0, 0, 0, 0, 0.000000):interval>
+-- !query output
+1 years
+
+
+-- !query
+select make_interval(1, 2)
+-- !query schema
+struct<make_interval(1, 2, 0, 0, 0, 0, 0.000000):interval>
+-- !query output
+1 years 2 months
+
+
+-- !query
+select make_interval(1, 2, 3)
+-- !query schema
+struct<make_interval(1, 2, 3, 0, 0, 0, 0.000000):interval>
+-- !query output
+1 years 2 months 21 days
+
+
+-- !query
+select make_interval(1, 2, 3, 4)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 0, 0, 0.000000):interval>
+-- !query output
+1 years 2 months 25 days
+
+
+-- !query
+select make_interval(1, 2, 3, 4, 5)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 5, 0, 0.000000):interval>
+-- !query output
+1 years 2 months 25 days 5 hours
+
+
+-- !query
+select make_interval(1, 2, 3, 4, 5, 6)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 5, 6, 0.000000):interval>
+-- !query output
+1 years 2 months 25 days 5 hours 6 minutes
+
+
+-- !query
+select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
+-- !query schema
+struct<make_interval(1, 2, 3, 4, 5, 6, CAST(7.008009 AS DECIMAL(8,6))):interval>
+-- !query output
+1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
+
+
+-- !query
+select cast('1 second' as interval)
+-- !query schema
+struct<CAST(1 second AS INTERVAL):interval>
+-- !query output
+1 seconds
+
+
+-- !query
+select cast('+1 second' as interval)
+-- !query schema
+struct<CAST(+1 second AS INTERVAL):interval>
+-- !query output
+1 seconds
+
+
+-- !query
+select cast('-1 second' as interval)
+-- !query schema
+struct<CAST(-1 second AS INTERVAL):interval>
+-- !query output
+-1 seconds
+
+
+-- !query
+select cast('+     1 second' as interval)
+-- !query schema
+struct<CAST(+     1 second AS INTERVAL):interval>
+-- !query output
+1 seconds
+
+
+-- !query
+select cast('-     1 second' as interval)
+-- !query schema
+struct<CAST(-     1 second AS INTERVAL):interval>
+-- !query output
+-1 seconds
+
+
+-- !query
+select cast('- -1 second' as interval)
+-- !query schema
+struct<CAST(- -1 second AS INTERVAL):interval>
+-- !query output
+NULL
+
+
+-- !query
+select cast('- +1 second' as interval)
+-- !query schema
+struct<CAST(- +1 second AS INTERVAL):interval>
+-- !query output
+NULL
+
+
+-- !query
+select interval 13.123456789 seconds, interval -13.123456789 second
+-- !query schema
+struct<INTERVAL '13.123456 seconds':interval,INTERVAL '-13.123456 seconds':interval>
+-- !query output
+13.123456 seconds	-13.123456 seconds
+
+
+-- !query
+select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond
+-- !query schema
+struct<INTERVAL '1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds':interval>
+-- !query output
+1 years 2 months 25 days 5 hours 6 minutes 7.008009 seconds
+
+
+-- !query
+select interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second
+-- !query schema
+struct<INTERVAL '32 years 1 months -100 days 41 hours 24 minutes 59.889987 seconds':interval>
+-- !query output
+32 years 1 months -100 days 41 hours 24 minutes 59.889987 seconds
+
+
+-- !query
+select interval '0 0:0:0.1' day to second
+-- !query schema
+struct<INTERVAL '0.1 seconds':interval>
+-- !query output
+0.1 seconds
+
+
+-- !query
+select interval '10-9' year to month
+-- !query schema
+struct<INTERVAL '10 years 9 months':interval>
+-- !query output
+10 years 9 months
+
+
+-- !query
+select interval '20 15' day to hour
+-- !query schema
+struct<INTERVAL '20 days 15 hours':interval>
+-- !query output
+20 days 15 hours
+
+
+-- !query
+select interval '20 15:40' day to minute
+-- !query schema
+struct<INTERVAL '20 days 15 hours 40 minutes':interval>
+-- !query output
+20 days 15 hours 40 minutes
+
+
+-- !query
+select interval '20 15:40:32.99899999' day to second
+-- !query schema
+struct<INTERVAL '20 days 15 hours 40 minutes 32.998999 seconds':interval>
+-- !query output
+20 days 15 hours 40 minutes 32.998999 seconds
+
+
+-- !query
+select interval '15:40' hour to minute
+-- !query schema
+struct<INTERVAL '15 hours 40 minutes':interval>
+-- !query output
+15 hours 40 minutes
+
+
+-- !query
+select interval '15:40:32.99899999' hour to second
+-- !query schema
+struct<INTERVAL '15 hours 40 minutes 32.998999 seconds':interval>
+-- !query output
+15 hours 40 minutes 32.998999 seconds
+
+
+-- !query
+select interval '40:32.99899999' minute to second
+-- !query schema
+struct<INTERVAL '40 minutes 32.998999 seconds':interval>
+-- !query output
+40 minutes 32.998999 seconds
+
+
+-- !query
+select interval '40:32' minute to second
+-- !query schema
+struct<INTERVAL '40 minutes 32 seconds':interval>
+-- !query output
+40 minutes 32 seconds
+
+
+-- !query
+select interval 30 day day
+-- !query schema
+struct<day:interval>
+-- !query output
+30 days
+
+
+-- !query
+select interval '20 15:40:32.99899999' day to hour
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2})$': 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '20 15:40:32.99899999' day to hour
+----------------^^^
+
+
+-- !query
+select interval '20 15:40:32.99899999' day to minute
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2})$': 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '20 15:40:32.99899999' day to minute
+----------------^^^
+
+
+-- !query
+select interval '15:40:32.99899999' hour to minute
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2})$': 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '15:40:32.99899999' hour to minute
+----------------^^^
+
+
+-- !query
+select interval '15:40.99899999' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '15:40.99899999' hour to second
+----------------^^^
+
+
+-- !query
+select interval '15:40' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '15:40' hour to second
+----------------^^^
+
+
+-- !query
+select interval '20 40:32.99899999' minute to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+select interval '20 40:32.99899999' minute to second
+----------------^^^
+
+
+-- !query
+select interval 10 nanoseconds
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Error parsing ' 10 nanoseconds' to interval, invalid unit 'nanoseconds'(line 1, pos 16)
+
+== SQL ==
+select interval 10 nanoseconds
+----------------^^^
+
+
+-- !query
+select map(1, interval 1 day, 2, interval 3 week)
+-- !query schema
+struct<map(1, INTERVAL '1 days', 2, INTERVAL '21 days'):map<int,interval>>
+-- !query output
+{1:1 days,2:21 days}
+
+
+-- !query
+select interval 'interval 3 year 1 hour'
+-- !query schema
+struct<INTERVAL '3 years 1 hours':interval>
+-- !query output
+3 years 1 hours
+
+
+-- !query
+select interval '3 year 1 hour'
+-- !query schema
+struct<INTERVAL '3 years 1 hours':interval>
+-- !query output
+3 years 1 hours
+
+
+-- !query
+select interval
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+at least one time unit should be given for interval literal(line 1, pos 7)
+
+== SQL ==
+select interval
+-------^^^
+
+
+-- !query
+select interval 1 fake_unit
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Error parsing ' 1 fake_unit' to interval, invalid unit 'fake_unit'(line 1, pos 16)
+
+== SQL ==
+select interval 1 fake_unit
+----------------^^^
+
+
+-- !query
+select interval 1 year to month
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+The value of from-to unit must be a string(line 1, pos 16)
+
+== SQL ==
+select interval 1 year to month
+----------------^^^
+
+
+-- !query
+select interval '1' year to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Intervals FROM year TO second are not supported.(line 1, pos 16)
+
+== SQL ==
+select interval '1' year to second
+----------------^^^
+
+
+-- !query
+select interval '10-9' year to month '2-1' year to month
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
+
+== SQL ==
+select interval '10-9' year to month '2-1' year to month
+-------------------------------------^^^
+
+
+-- !query
+select interval '10-9' year to month '12:11:10' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
+
+== SQL ==
+select interval '10-9' year to month '12:11:10' hour to second
+-------------------------------------^^^
+
+
+-- !query
+select interval '1 15:11' day to minute '12:11:10' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 40)
+
+== SQL ==
+select interval '1 15:11' day to minute '12:11:10' hour to second
+----------------------------------------^^^
+
+
+-- !query
+select interval 1 year '2-1' year to month
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
+
+== SQL ==
+select interval 1 year '2-1' year to month
+-----------------------^^^
+
+
+-- !query
+select interval 1 year '12:11:10' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
+
+== SQL ==
+select interval 1 year '12:11:10' hour to second
+-----------------------^^^
+
+
+-- !query
+select interval '10-9' year to month '1' year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
+
+== SQL ==
+select interval '10-9' year to month '1' year
+-------------------------------------^^^
+
+
+-- !query
+select interval '12:11:10' hour to second '1' year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Can only have a single from-to unit in the interval literal syntax(line 1, pos 42)
+
+== SQL ==
+select interval '12:11:10' hour to second '1' year
+------------------------------------------^^^
+
+
+-- !query
+select interval (-30) day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval (a + 1) day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'interval'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+select interval 30 day day day
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+extraneous input 'day' expecting <EOF>(line 1, pos 27)
+
+== SQL ==
+select interval 30 day day day
+---------------------------^^^
+
+
+-- !query
+select sum(cast(null as interval))
+-- !query schema
+struct<sum(CAST(NULL AS INTERVAL)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('1 seconds') t(v) where 1=0
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+3 seconds
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+1 seconds
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v)
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+-3 seconds
+
+
+-- !query
+select sum(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+-7 days 2 seconds
+
+
+-- !query
+select
+    i,
+    sum(cast(v as interval))
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+group by i
+-- !query schema
+struct<i:int,sum(CAST(v AS INTERVAL)):interval>
+-- !query output
+1	-2 days
+2	2 seconds
+3	NULL
+
+
+-- !query
+select
+    sum(cast(v as interval)) as sv
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+having sv is not null
+-- !query schema
+struct<sv:interval>
+-- !query output
+-2 days 2 seconds
+
+
+-- !query
+SELECT
+    i,
+    sum(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+FROM VALUES(1, '1 seconds'), (1, '2 seconds'), (2, NULL), (2, NULL) t(i,v)
+-- !query schema
+struct<i:int,sum(CAST(v AS INTERVAL)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):interval>
+-- !query output
+1	2 seconds
+1	3 seconds
+2	NULL
+2	NULL
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v) where 1=0
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+NULL
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('1 seconds'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+1.5 seconds
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+0.5 seconds
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('-1 seconds'), ('-2 seconds'), (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+-1.5 seconds
+
+
+-- !query
+select avg(cast(v as interval)) from VALUES ('-1 weeks'), ('2 seconds'), (null) t(v)
+-- !query schema
+struct<avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+-3 days -11 hours -59 minutes -59 seconds
+
+
+-- !query
+select
+    i,
+    avg(cast(v as interval))
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+group by i
+-- !query schema
+struct<i:int,avg(CAST(v AS INTERVAL)):interval>
+-- !query output
+1	-1 days
+2	2 seconds
+3	NULL
+
+
+-- !query
+select
+    avg(cast(v as interval)) as sv
+from VALUES (1, '-1 weeks'), (2, '2 seconds'), (3, null), (1, '5 days') t(i, v)
+having sv is not null
+-- !query schema
+struct<sv:interval>
+-- !query output
+-15 hours -59 minutes -59.333333 seconds
+
+
+-- !query
+SELECT
+    i,
+    avg(cast(v as interval)) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+FROM VALUES (1,'1 seconds'), (1,'2 seconds'), (2,NULL), (2,NULL) t(i,v)
+-- !query schema
+struct<i:int,avg(CAST(v AS INTERVAL)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):interval>
+-- !query output
+1	1.5 seconds
+1	2 seconds
+2	NULL
+2	NULL
+
+
+-- !query
+create temporary view interval_arithmetic as
+  select CAST(dateval AS date), CAST(tsval AS timestamp) from values
+    ('2012-01-01', '2012-01-01')
+    as interval_arithmetic(dateval, tsval)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select
+  dateval,
+  dateval - interval '2-2' year to month,
+  dateval - interval '-2-2' year to month,
+  dateval + interval '2-2' year to month,
+  dateval + interval '-2-2' year to month,
+  - interval '2-2' year to month + dateval,
+  interval '2-2' year to month + dateval
+from interval_arithmetic
+-- !query schema
+struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '2 years 2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '-2 years -2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '2 years 2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '-2 years -2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- INTERVAL '2 years 2 months') AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '2 years 2 months' AS DATE):date>
+-- !query output
+2012-01-01	2009-11-01	2014-03-01	2014-03-01	2009-11-01	2009-11-01	2014-03-01
+
+
+-- !query
+select
+  tsval,
+  tsval - interval '2-2' year to month,
+  tsval - interval '-2-2' year to month,
+  tsval + interval '2-2' year to month,
+  tsval + interval '-2-2' year to month,
+  - interval '2-2' year to month + tsval,
+  interval '2-2' year to month + tsval
+from interval_arithmetic
+-- !query schema
+struct<tsval:timestamp,CAST(tsval - INTERVAL '2 years 2 months' AS TIMESTAMP):timestamp,CAST(tsval - INTERVAL '-2 years -2 months' AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '2 years 2 months' AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '-2 years -2 months' AS TIMESTAMP):timestamp,CAST(tsval + (- INTERVAL '2 years 2 months') AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '2 years 2 months' AS TIMESTAMP):timestamp>
+-- !query output
+2012-01-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00	2014-03-01 00:00:00	2009-11-01 00:00:00	2009-11-01 00:00:00	2014-03-01 00:00:00
+
+
+-- !query
+select
+  interval '2-2' year to month + interval '3-3' year to month,
+  interval '2-2' year to month - interval '3-3' year to month
+from interval_arithmetic
+-- !query schema
+struct<(INTERVAL '2 years 2 months' + INTERVAL '3 years 3 months'):interval,(INTERVAL '2 years 2 months' - INTERVAL '3 years 3 months'):interval>
+-- !query output
+5 years 5 months	-1 years -1 months
+
+
+-- !query
+select
+  dateval,
+  dateval - interval '99 11:22:33.123456789' day to second,
+  dateval - interval '-99 11:22:33.123456789' day to second,
+  dateval + interval '99 11:22:33.123456789' day to second,
+  dateval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + dateval,
+  interval '99 11:22:33.123456789' day to second + dateval
+from interval_arithmetic
+-- !query schema
+struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds') AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS DATE):date>
+-- !query output
+2012-01-01	2011-09-23	2012-04-09	2012-04-09	2011-09-23	2011-09-23	2012-04-09
+
+
+-- !query
+select
+  tsval,
+  tsval - interval '99 11:22:33.123456789' day to second,
+  tsval - interval '-99 11:22:33.123456789' day to second,
+  tsval + interval '99 11:22:33.123456789' day to second,
+  tsval + interval '-99 11:22:33.123456789' day to second,
+  -interval '99 11:22:33.123456789' day to second + tsval,
+  interval '99 11:22:33.123456789' day to second + tsval
+from interval_arithmetic
+-- !query schema
+struct<tsval:timestamp,CAST(tsval - INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS TIMESTAMP):timestamp,CAST(tsval - INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS TIMESTAMP):timestamp,CAST(tsval + (- INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds') AS TIMESTAMP):timestamp,CAST(tsval + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS TIMESTAMP):timestamp>
+-- !query output
+2012-01-01 00:00:00	2011-09-23 12:37:26.876544	2012-04-09 11:22:33.123456	2012-04-09 11:22:33.123456	2011-09-23 12:37:26.876544	2011-09-23 12:37:26.876544	2012-04-09 11:22:33.123456
+
+
+-- !query
+select
+  interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second,
+  interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second
+from interval_arithmetic
+-- !query schema
+struct<(INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' + INTERVAL '10 days 9 hours 8 minutes 7.123456 seconds'):interval,(INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' - INTERVAL '10 days 9 hours 8 minutes 7.123456 seconds'):interval>
+-- !query output
+109 days 20 hours 30 minutes 40.246912 seconds	89 days 2 hours 14 minutes 26 seconds
+
+
+-- !query
+select interval '\t interval 1 day'
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select interval 'interval \t 1\tday'
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select interval 'interval\t1\tday'
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select interval '1\t' day
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select interval '1 ' day
+-- !query schema
+struct<INTERVAL '1 days':interval>
+-- !query output
+1 days
+
+
+-- !query
+select -(a) from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<(- a):interval>
+-- !query output
+-178956970 years -8 months
+
+
+-- !query
+select a - b from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<(a - b):interval>
+-- !query output
+1 months
+
+
+-- !query
+select b + interval '1 month' from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<(b + INTERVAL '1 months'):interval>
+-- !query output
+-178956970 years -8 months
+
+
+-- !query
+select a * 1.1 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+select a / 0.5 from values (interval '-2147483648 months', interval '2147483647 months') t(a, b)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
+
+
+-- !query
+SELECT
+  from_csv('1, 1 day', 'a INT, b interval'),
+  to_csv(from_csv('1, 1 day', 'a INT, b interval')),
+  to_csv(named_struct('a', interval 32 month, 'b', interval 70 minute)),
+  from_csv(to_csv(named_struct('a', interval 32 month, 'b', interval 70 minute)), 'a interval, b interval')
+-- !query schema
+struct<from_csv(1, 1 day):struct<a:int,b:interval>,to_csv(from_csv(1, 1 day)):string,to_csv(named_struct(a, INTERVAL '2 years 8 months', b, INTERVAL '1 hours 10 minutes')):string,from_csv(to_csv(named_struct(a, INTERVAL '2 years 8 months', b, INTERVAL '1 hours 10 minutes'))):struct<a:interval,b:interval>>
+-- !query output
+{"a":1,"b":1 days}	1,1 days	2 years 8 months,1 hours 10 minutes	{"a":2 years 8 months,"b":1 hours 10 minutes}
+
+
+-- !query
+SELECT
+  from_json('{"a":"1 days"}', 'a interval'),
+  to_json(from_json('{"a":"1 days"}', 'a interval')),
+  to_json(map('a', interval 25 month 100 day 130 minute)),
+  from_json(to_json(map('a', interval 25 month 100 day 130 minute)), 'a interval')
+-- !query schema
+struct<from_json({"a":"1 days"}):struct<a:interval>,to_json(from_json({"a":"1 days"})):string,to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes')):string,from_json(to_json(map(a, INTERVAL '2 years 1 months 100 days 2 hours 10 minutes'))):struct<a:interval>>
+-- !query output
+{"a":1 days}	{"a":"1 days"}	{"a":"2 years 1 months 100 days 2 hours 10 minutes"}	{"a":2 years 1 months 100 days 2 hours 10 minutes}
diff --git a/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out b/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out
index 857073a827f24..6b7edfb2356f4 100644
--- a/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out
@@ -2,193 +2,193 @@
 -- Number of queries: 24
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT * FROM t1 INNER JOIN empty_table
--- !query 3 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 SELECT * FROM t1 CROSS JOIN empty_table
--- !query 4 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT * FROM t1 LEFT OUTER JOIN empty_table
--- !query 5 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 5 output
+-- !query output
 1	NULL
 
 
--- !query 6
+-- !query
 SELECT * FROM t1 RIGHT OUTER JOIN empty_table
--- !query 6 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 SELECT * FROM t1 FULL OUTER JOIN empty_table
--- !query 7 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 7 output
+-- !query output
 1	NULL
 
 
--- !query 8
+-- !query
 SELECT * FROM t1 LEFT SEMI JOIN empty_table
--- !query 8 schema
+-- !query schema
 struct<a:int>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 SELECT * FROM t1 LEFT ANTI JOIN empty_table
--- !query 9 schema
+-- !query schema
 struct<a:int>
--- !query 9 output
+-- !query output
 1
 
 
--- !query 10
+-- !query
 SELECT * FROM empty_table INNER JOIN t1
--- !query 10 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 SELECT * FROM empty_table CROSS JOIN t1
--- !query 11 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 SELECT * FROM empty_table LEFT OUTER JOIN t1
--- !query 12 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 SELECT * FROM empty_table RIGHT OUTER JOIN t1
--- !query 13 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 13 output
+-- !query output
 NULL	1
 
 
--- !query 14
+-- !query
 SELECT * FROM empty_table FULL OUTER JOIN t1
--- !query 14 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 14 output
+-- !query output
 NULL	1
 
 
--- !query 15
+-- !query
 SELECT * FROM empty_table LEFT SEMI JOIN t1
--- !query 15 schema
+-- !query schema
 struct<a:int>
--- !query 15 output
+-- !query output
 
 
 
--- !query 16
+-- !query
 SELECT * FROM empty_table LEFT ANTI JOIN t1
--- !query 16 schema
+-- !query schema
 struct<a:int>
--- !query 16 output
+-- !query output
 
 
 
--- !query 17
+-- !query
 SELECT * FROM empty_table INNER JOIN empty_table
--- !query 17 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 17 output
+-- !query output
 
 
 
--- !query 18
+-- !query
 SELECT * FROM empty_table CROSS JOIN empty_table
--- !query 18 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 SELECT * FROM empty_table LEFT OUTER JOIN empty_table
--- !query 19 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 SELECT * FROM empty_table RIGHT OUTER JOIN empty_table
--- !query 20 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 SELECT * FROM empty_table FULL OUTER JOIN empty_table
--- !query 21 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 21 output
+-- !query output
 
 
 
--- !query 22
+-- !query
 SELECT * FROM empty_table LEFT SEMI JOIN empty_table
--- !query 22 schema
+-- !query schema
 struct<a:int>
--- !query 22 output
+-- !query output
 
 
 
--- !query 23
+-- !query
 SELECT * FROM empty_table LEFT ANTI JOIN empty_table
--- !query 23 schema
+-- !query schema
 struct<a:int>
--- !query 23 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index ca0cd90d94fa7..21a3531caf732 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -2,127 +2,127 @@
 -- Number of queries: 42
 
 
--- !query 0
+-- !query
 select to_json(named_struct('a', 1, 'b', 2))
--- !query 0 schema
+-- !query schema
 struct<to_json(named_struct(a, 1, b, 2)):string>
--- !query 0 output
+-- !query output
 {"a":1,"b":2}
 
 
--- !query 1
+-- !query
 select to_json(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'))
--- !query 1 schema
+-- !query schema
 struct<to_json(named_struct(time, to_timestamp('2015-08-26', 'yyyy-MM-dd'))):string>
--- !query 1 output
+-- !query output
 {"time":"26/08/2015"}
 
 
--- !query 2
+-- !query
 select to_json(array(named_struct('a', 1, 'b', 2)))
--- !query 2 schema
+-- !query schema
 struct<to_json(array(named_struct(a, 1, b, 2))):string>
--- !query 2 output
+-- !query output
 [{"a":1,"b":2}]
 
 
--- !query 3
+-- !query
 select to_json(map(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 2)))
--- !query 3 schema
+-- !query schema
 struct<to_json(map(named_struct(a, 1, b, 2), named_struct(a, 1, b, 2))):string>
--- !query 3 output
+-- !query output
 {"[1,2]":{"a":1,"b":2}}
 
 
--- !query 4
+-- !query
 select to_json(map('a', named_struct('a', 1, 'b', 2)))
--- !query 4 schema
+-- !query schema
 struct<to_json(map(a, named_struct(a, 1, b, 2))):string>
--- !query 4 output
+-- !query output
 {"a":{"a":1,"b":2}}
 
 
--- !query 5
+-- !query
 select to_json(map('a', 1))
--- !query 5 schema
+-- !query schema
 struct<to_json(map(a, 1)):string>
--- !query 5 output
+-- !query output
 {"a":1}
 
 
--- !query 6
+-- !query
 select to_json(array(map('a',1)))
--- !query 6 schema
+-- !query schema
 struct<to_json(array(map(a, 1))):string>
--- !query 6 output
+-- !query output
 [{"a":1}]
 
 
--- !query 7
+-- !query
 select to_json(array(map('a',1), map('b',2)))
--- !query 7 schema
+-- !query schema
 struct<to_json(array(map(a, 1), map(b, 2))):string>
--- !query 7 output
+-- !query output
 [{"a":1},{"b":2}]
 
 
--- !query 8
+-- !query
 select to_json(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'))
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Must use a map() function for options;; line 1 pos 7
 
 
--- !query 9
+-- !query
 select to_json(named_struct('a', 1, 'b', 2), map('mode', 1))
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 A type of keys and values in map() must be string, but got map<string,int>;; line 1 pos 7
 
 
--- !query 10
+-- !query
 select to_json()
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid number of arguments for function to_json. Expected: one of 1 and 2; Found: 0; line 1 pos 7
 
 
--- !query 11
+-- !query
 select from_json('{"a":1}', 'a INT')
--- !query 11 schema
+-- !query schema
 struct<from_json({"a":1}):struct<a:int>>
--- !query 11 output
+-- !query output
 {"a":1}
 
 
--- !query 12
+-- !query
 select from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'))
--- !query 12 schema
+-- !query schema
 struct<from_json({"time":"26/08/2015"}):struct<time:timestamp>>
--- !query 12 output
-{"time":2015-08-26 00:00:00.0}
+-- !query output
+{"time":2015-08-26 00:00:00}
 
 
--- !query 13
+-- !query
 select from_json('{"a":1}', 1)
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Schema should be specified in DDL format as a string literal or output of the schema_of_json function instead of 1;; line 1 pos 7
 
 
--- !query 14
+-- !query
 select from_json('{"a":1}', 'a InvalidType')
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 DataType invalidtype is not supported.(line 1, pos 2)
@@ -133,222 +133,222 @@ a InvalidType
 ; line 1 pos 7
 
 
--- !query 15
+-- !query
 select from_json('{"a":1}', 'a INT', named_struct('mode', 'PERMISSIVE'))
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Must use a map() function for options;; line 1 pos 7
 
 
--- !query 16
+-- !query
 select from_json('{"a":1}', 'a INT', map('mode', 1))
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 A type of keys and values in map() must be string, but got map<string,int>;; line 1 pos 7
 
 
--- !query 17
+-- !query
 select from_json()
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid number of arguments for function from_json. Expected: one of 2 and 3; Found: 0; line 1 pos 7
 
 
--- !query 18
+-- !query
 SELECT json_tuple('{"a" : 1, "b" : 2}', CAST(NULL AS STRING), 'b', CAST(NULL AS STRING), 'a')
--- !query 18 schema
+-- !query schema
 struct<c0:string,c1:string,c2:string,c3:string>
--- !query 18 output
+-- !query output
 NULL	2	NULL	1
 
 
--- !query 19
+-- !query
 CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a')
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 SELECT json_tuple(jsonField, 'b', CAST(NULL AS STRING), a) FROM jsonTable
--- !query 20 schema
+-- !query schema
 struct<c0:string,c1:string,c2:string>
--- !query 20 output
+-- !query output
 2	NULL	1
 
 
--- !query 21
+-- !query
 DROP VIEW IF EXISTS jsonTable
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
 
 
--- !query 22
+-- !query
 select from_json('{"a":1, "b":2}', 'map<string, int>')
--- !query 22 schema
+-- !query schema
 struct<entries:map<string,int>>
--- !query 22 output
+-- !query output
 {"a":1,"b":2}
 
 
--- !query 23
+-- !query
 select from_json('{"a":1, "b":"2"}', 'struct<a:int,b:string>')
--- !query 23 schema
+-- !query schema
 struct<from_json({"a":1, "b":"2"}):struct<a:int,b:string>>
--- !query 23 output
+-- !query output
 {"a":1,"b":"2"}
 
 
--- !query 24
+-- !query
 select schema_of_json('{"c1":0, "c2":[1]}')
--- !query 24 schema
+-- !query schema
 struct<schema_of_json({"c1":0, "c2":[1]}):string>
--- !query 24 output
+-- !query output
 struct<c1:bigint,c2:array<bigint>>
 
 
--- !query 25
+-- !query
 select from_json('{"c1":[1, 2, 3]}', schema_of_json('{"c1":[0]}'))
--- !query 25 schema
+-- !query schema
 struct<from_json({"c1":[1, 2, 3]}):struct<c1:array<bigint>>>
--- !query 25 output
+-- !query output
 {"c1":[1,2,3]}
 
 
--- !query 26
+-- !query
 select from_json('[1, 2, 3]', 'array<int>')
--- !query 26 schema
+-- !query schema
 struct<from_json([1, 2, 3]):array<int>>
--- !query 26 output
+-- !query output
 [1,2,3]
 
 
--- !query 27
+-- !query
 select from_json('[1, "2", 3]', 'array<int>')
--- !query 27 schema
+-- !query schema
 struct<from_json([1, "2", 3]):array<int>>
--- !query 27 output
+-- !query output
 NULL
 
 
--- !query 28
+-- !query
 select from_json('[1, 2, null]', 'array<int>')
--- !query 28 schema
+-- !query schema
 struct<from_json([1, 2, null]):array<int>>
--- !query 28 output
+-- !query output
 [1,2,null]
 
 
--- !query 29
+-- !query
 select from_json('[{"a": 1}, {"a":2}]', 'array<struct<a:int>>')
--- !query 29 schema
+-- !query schema
 struct<from_json([{"a": 1}, {"a":2}]):array<struct<a:int>>>
--- !query 29 output
+-- !query output
 [{"a":1},{"a":2}]
 
 
--- !query 30
+-- !query
 select from_json('{"a": 1}', 'array<struct<a:int>>')
--- !query 30 schema
+-- !query schema
 struct<from_json({"a": 1}):array<struct<a:int>>>
--- !query 30 output
+-- !query output
 [{"a":1}]
 
 
--- !query 31
+-- !query
 select from_json('[null, {"a":2}]', 'array<struct<a:int>>')
--- !query 31 schema
+-- !query schema
 struct<from_json([null, {"a":2}]):array<struct<a:int>>>
--- !query 31 output
+-- !query output
 [null,{"a":2}]
 
 
--- !query 32
+-- !query
 select from_json('[{"a": 1}, {"b":2}]', 'array<map<string,int>>')
--- !query 32 schema
+-- !query schema
 struct<from_json([{"a": 1}, {"b":2}]):array<map<string,int>>>
--- !query 32 output
+-- !query output
 [{"a":1},{"b":2}]
 
 
--- !query 33
+-- !query
 select from_json('[{"a": 1}, 2]', 'array<map<string,int>>')
--- !query 33 schema
+-- !query schema
 struct<from_json([{"a": 1}, 2]):array<map<string,int>>>
--- !query 33 output
+-- !query output
 NULL
 
 
--- !query 34
+-- !query
 select to_json(array('1', '2', '3'))
--- !query 34 schema
+-- !query schema
 struct<to_json(array(1, 2, 3)):string>
--- !query 34 output
+-- !query output
 ["1","2","3"]
 
 
--- !query 35
+-- !query
 select to_json(array(array(1, 2, 3), array(4)))
--- !query 35 schema
+-- !query schema
 struct<to_json(array(array(1, 2, 3), array(4))):string>
--- !query 35 output
+-- !query output
 [[1,2,3],[4]]
 
 
--- !query 36
+-- !query
 select schema_of_json('{"c1":1}', map('primitivesAsString', 'true'))
--- !query 36 schema
+-- !query schema
 struct<schema_of_json({"c1":1}):string>
--- !query 36 output
+-- !query output
 struct<c1:string>
 
 
--- !query 37
+-- !query
 select schema_of_json('{"c1":01, "c2":0.1}', map('allowNumericLeadingZeros', 'true', 'prefersDecimal', 'true'))
--- !query 37 schema
+-- !query schema
 struct<schema_of_json({"c1":01, "c2":0.1}):string>
--- !query 37 output
+-- !query output
 struct<c1:bigint,c2:decimal(1,1)>
 
 
--- !query 38
+-- !query
 select schema_of_json(null)
--- !query 38 schema
+-- !query schema
 struct<>
--- !query 38 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'schema_of_json(NULL)' due to data type mismatch: The input json should be a string literal and not null; however, got NULL.; line 1 pos 7
 
 
--- !query 39
+-- !query
 CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a')
--- !query 39 schema
+-- !query schema
 struct<>
--- !query 39 output
+-- !query output
 
 
 
--- !query 40
+-- !query
 SELECT schema_of_json(jsonField) FROM jsonTable
--- !query 40 schema
+-- !query schema
 struct<>
--- !query 40 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'schema_of_json(jsontable.`jsonField`)' due to data type mismatch: The input json should be a string literal and not null; however, got jsontable.`jsonField`.; line 1 pos 7
 
 
--- !query 41
+-- !query
 DROP VIEW IF EXISTS jsonTable
--- !query 41 schema
+-- !query schema
 struct<>
--- !query 41 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
index 02fe1de84f753..281326e22a97a 100644
--- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
@@ -2,125 +2,125 @@
 -- Number of queries: 14
 
 
--- !query 0
+-- !query
 SELECT * FROM testdata LIMIT 2
--- !query 0 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 0 output
+-- !query output
 1	1
 2	2
 
 
--- !query 1
+-- !query
 SELECT * FROM arraydata LIMIT 2
--- !query 1 schema
+-- !query schema
 struct<arraycol:array<int>,nestedarraycol:array<array<int>>>
--- !query 1 output
+-- !query output
 [1,2,3]	[[1,2,3]]
 [2,3,4]	[[2,3,4]]
 
 
--- !query 2
+-- !query
 SELECT * FROM mapdata LIMIT 2
--- !query 2 schema
+-- !query schema
 struct<mapcol:map<int,string>>
--- !query 2 output
+-- !query output
 {1:"a1",2:"b1",3:"c1",4:"d1",5:"e1"}
 {1:"a2",2:"b2",3:"c2",4:"d2"}
 
 
--- !query 3
+-- !query
 SELECT * FROM testdata LIMIT 2 + 1
--- !query 3 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 3 output
+-- !query output
 1	1
 2	2
 3	3
 
 
--- !query 4
+-- !query
 SELECT * FROM testdata LIMIT CAST(1 AS int)
--- !query 4 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 4 output
+-- !query output
 1	1
 
 
--- !query 5
+-- !query
 SELECT * FROM testdata LIMIT -1
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 The limit expression must be equal to or greater than 0, but got -1;
 
 
--- !query 6
+-- !query
 SELECT * FROM testData TABLESAMPLE (-1 ROWS)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 The limit expression must be equal to or greater than 0, but got -1;
 
 
--- !query 7
+-- !query
 SELECT * FROM testdata LIMIT CAST(1 AS INT)
--- !query 7 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 7 output
+-- !query output
 1	1
 
 
--- !query 8
+-- !query
 SELECT * FROM testdata LIMIT CAST(NULL AS INT)
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 The evaluated limit expression must not be null, but got CAST(NULL AS INT);
 
 
--- !query 9
+-- !query
 SELECT * FROM testdata LIMIT key > 3
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 The limit expression must evaluate to a constant value, but got (testdata.`key` > 3);
 
 
--- !query 10
+-- !query
 SELECT * FROM testdata LIMIT true
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 The limit expression must be integer type, but got boolean;
 
 
--- !query 11
+-- !query
 SELECT * FROM testdata LIMIT 'a'
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 The limit expression must be integer type, but got string;
 
 
--- !query 12
+-- !query
 SELECT * FROM (SELECT * FROM range(10) LIMIT 5) WHERE id > 3
--- !query 12 schema
+-- !query schema
 struct<id:bigint>
--- !query 12 output
+-- !query output
 4
 
 
--- !query 13
+-- !query
 SELECT * FROM testdata WHERE key < 3 LIMIT ALL
--- !query 13 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 13 output
+-- !query output
 1	1
 2	2
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index e1e8d685e8787..f6720f6c5faa4 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -1,44 +1,44 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 46
+-- Number of queries: 50
 
 
--- !query 0
+-- !query
 select null, Null, nUll
--- !query 0 schema
+-- !query schema
 struct<NULL:null,NULL:null,NULL:null>
--- !query 0 output
+-- !query output
 NULL	NULL	NULL
 
 
--- !query 1
+-- !query
 select true, tRue, false, fALse
--- !query 1 schema
+-- !query schema
 struct<true:boolean,true:boolean,false:boolean,false:boolean>
--- !query 1 output
+-- !query output
 true	true	false	false
 
 
--- !query 2
+-- !query
 select 1Y
--- !query 2 schema
+-- !query schema
 struct<1:tinyint>
--- !query 2 output
+-- !query output
 1
 
 
--- !query 3
+-- !query
 select 127Y, -128Y
--- !query 3 schema
+-- !query schema
 struct<127:tinyint,-128:tinyint>
--- !query 3 output
+-- !query output
 127	-128
 
 
--- !query 4
+-- !query
 select 128Y
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Numeric literal 128 does not fit in range [-128, 127] for type tinyint(line 1, pos 7)
@@ -48,27 +48,27 @@ select 128Y
 -------^^^
 
 
--- !query 5
+-- !query
 select 1S
--- !query 5 schema
+-- !query schema
 struct<1:smallint>
--- !query 5 output
+-- !query output
 1
 
 
--- !query 6
+-- !query
 select 32767S, -32768S
--- !query 6 schema
+-- !query schema
 struct<32767:smallint,-32768:smallint>
--- !query 6 output
+-- !query output
 32767	-32768
 
 
--- !query 7
+-- !query
 select 32768S
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Numeric literal 32768 does not fit in range [-32768, 32767] for type smallint(line 1, pos 7)
@@ -78,27 +78,27 @@ select 32768S
 -------^^^
 
 
--- !query 8
+-- !query
 select 1L, 2147483648L
--- !query 8 schema
+-- !query schema
 struct<1:bigint,2147483648:bigint>
--- !query 8 output
+-- !query output
 1	2147483648
 
 
--- !query 9
+-- !query
 select 9223372036854775807L, -9223372036854775808L
--- !query 9 schema
+-- !query schema
 struct<9223372036854775807:bigint,-9223372036854775808:bigint>
--- !query 9 output
+-- !query output
 9223372036854775807	-9223372036854775808
 
 
--- !query 10
+-- !query
 select 9223372036854775808L
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Numeric literal 9223372036854775808 does not fit in range [-9223372036854775808, 9223372036854775807] for type bigint(line 1, pos 7)
@@ -108,43 +108,43 @@ select 9223372036854775808L
 -------^^^
 
 
--- !query 11
+-- !query
 select 1, -1
--- !query 11 schema
+-- !query schema
 struct<1:int,-1:int>
--- !query 11 output
+-- !query output
 1	-1
 
 
--- !query 12
+-- !query
 select 2147483647, -2147483648
--- !query 12 schema
+-- !query schema
 struct<2147483647:int,-2147483648:int>
--- !query 12 output
+-- !query output
 2147483647	-2147483648
 
 
--- !query 13
+-- !query
 select 9223372036854775807, -9223372036854775808
--- !query 13 schema
+-- !query schema
 struct<9223372036854775807:bigint,-9223372036854775808:bigint>
--- !query 13 output
+-- !query output
 9223372036854775807	-9223372036854775808
 
 
--- !query 14
+-- !query
 select 9223372036854775808, -9223372036854775809
--- !query 14 schema
+-- !query schema
 struct<9223372036854775808:decimal(19,0),-9223372036854775809:decimal(19,0)>
--- !query 14 output
+-- !query output
 9223372036854775808	-9223372036854775809
 
 
--- !query 15
+-- !query
 select 1234567890123456789012345678901234567890
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 decimal can only support precision up to 38
@@ -152,11 +152,11 @@ decimal can only support precision up to 38
 select 1234567890123456789012345678901234567890
 
 
--- !query 16
+-- !query
 select 1234567890123456789012345678901234567890.0
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 decimal can only support precision up to 38
@@ -164,27 +164,27 @@ decimal can only support precision up to 38
 select 1234567890123456789012345678901234567890.0
 
 
--- !query 17
+-- !query
 select 1D, 1.2D, 1e10, 1.5e5, .10D, 0.10D, .1e5, .9e+2, 0.9e+2, 900e-1, 9.e+1
--- !query 17 schema
-struct<1.0:double,1.2:double,1E+10:decimal(1,-10),1.5E+5:decimal(2,-4),0.1:double,0.1:double,1E+4:decimal(1,-4),9E+1:decimal(1,-1),9E+1:decimal(1,-1),90.0:decimal(3,1),9E+1:decimal(1,-1)>
--- !query 17 output
-1.0	1.2	10000000000	150000	0.1	0.1	10000	90	90	90	90
+-- !query schema
+struct<1.0:double,1.2:double,1.0E10:double,150000.0:double,0.1:double,0.1:double,10000.0:double,90.0:double,90.0:double,90.0:double,90.0:double>
+-- !query output
+1.0	1.2	1.0E10	150000.0	0.1	0.1	10000.0	90.0	90.0	90.0	90.0
 
 
--- !query 18
+-- !query
 select -1D, -1.2D, -1e10, -1.5e5, -.10D, -0.10D, -.1e5
--- !query 18 schema
-struct<-1.0:double,-1.2:double,-1E+10:decimal(1,-10),-1.5E+5:decimal(2,-4),-0.1:double,-0.1:double,-1E+4:decimal(1,-4)>
--- !query 18 output
--1.0	-1.2	-10000000000	-150000	-0.1	-0.1	-10000
+-- !query schema
+struct<-1.0:double,-1.2:double,-1.0E10:double,-150000.0:double,-0.1:double,-0.1:double,-10000.0:double>
+-- !query output
+-1.0	-1.2	-1.0E10	-150000.0	-0.1	-0.1	-10000.0
 
 
--- !query 19
+-- !query
 select .e3
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 no viable alternative at input 'select .'(line 1, pos 7)
@@ -194,101 +194,107 @@ select .e3
 -------^^^
 
 
--- !query 20
+-- !query
 select 1E309, -1E309
--- !query 20 schema
-struct<1E+309:decimal(1,-309),-1E+309:decimal(1,-309)>
--- !query 20 output
-1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000	-1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Numeric literal 1E309 does not fit in range [-1.7976931348623157E+308, 1.7976931348623157E+308] for type double(line 1, pos 7)
+
+== SQL ==
+select 1E309, -1E309
+-------^^^
 
 
--- !query 21
+-- !query
 select 0.3, -0.8, .5, -.18, 0.1111, .1111
--- !query 21 schema
+-- !query schema
 struct<0.3:decimal(1,1),-0.8:decimal(1,1),0.5:decimal(1,1),-0.18:decimal(2,2),0.1111:decimal(4,4),0.1111:decimal(4,4)>
--- !query 21 output
+-- !query output
 0.3	-0.8	0.5	-0.18	0.1111	0.1111
 
 
--- !query 22
+-- !query
 select 123456789012345678901234567890123456789e10d, 123456789012345678901234567890123456789.1e10d
--- !query 22 schema
+-- !query schema
 struct<1.2345678901234568E48:double,1.2345678901234568E48:double>
--- !query 22 output
+-- !query output
 1.2345678901234568E48	1.2345678901234568E48
 
 
--- !query 23
+-- !query
 select "Hello Peter!", 'hello lee!'
--- !query 23 schema
+-- !query schema
 struct<Hello Peter!:string,hello lee!:string>
--- !query 23 output
+-- !query output
 Hello Peter!	hello lee!
 
 
--- !query 24
+-- !query
 select 'hello' 'world', 'hello' " " 'lee'
--- !query 24 schema
+-- !query schema
 struct<helloworld:string,hello lee:string>
--- !query 24 output
+-- !query output
 helloworld	hello lee
 
 
--- !query 25
+-- !query
 select "hello 'peter'"
--- !query 25 schema
+-- !query schema
 struct<hello 'peter':string>
--- !query 25 output
+-- !query output
 hello 'peter'
 
 
--- !query 26
+-- !query
 select 'pattern%', 'no-pattern\%', 'pattern\\%', 'pattern\\\%'
--- !query 26 schema
+-- !query schema
 struct<pattern%:string,no-pattern\%:string,pattern\%:string,pattern\\%:string>
--- !query 26 output
+-- !query output
 pattern%	no-pattern\%	pattern\%	pattern\\%
 
 
--- !query 27
+-- !query
 select '\'', '"', '\n', '\r', '\t', 'Z'
--- !query 27 schema
+-- !query schema
 struct<':string,":string,
 :string,:string,	:string,Z:string>
--- !query 27 output
+-- !query output
 '	"	
 				Z
 
 
--- !query 28
+-- !query
 select '\110\145\154\154\157\041'
--- !query 28 schema
+-- !query schema
 struct<Hello!:string>
--- !query 28 output
+-- !query output
 Hello!
 
 
--- !query 29
+-- !query
 select '\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'
--- !query 29 schema
+-- !query schema
 struct<World :):string>
--- !query 29 output
+-- !query output
 World :)
 
 
--- !query 30
+-- !query
 select dAte '2016-03-12'
--- !query 30 schema
+-- !query schema
 struct<DATE '2016-03-12':date>
--- !query 30 output
+-- !query output
 2016-03-12
 
 
--- !query 31
+-- !query
 select date 'mar 11 2016'
--- !query 31 schema
+-- !query schema
 struct<>
--- !query 31 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Cannot parse the DATE value: mar 11 2016(line 1, pos 7)
@@ -298,19 +304,19 @@ select date 'mar 11 2016'
 -------^^^
 
 
--- !query 32
+-- !query
 select tImEstAmp '2016-03-11 20:54:00.000'
--- !query 32 schema
-struct<TIMESTAMP('2016-03-11 20:54:00'):timestamp>
--- !query 32 output
+-- !query schema
+struct<TIMESTAMP '2016-03-11 20:54:00':timestamp>
+-- !query output
 2016-03-11 20:54:00
 
 
--- !query 33
+-- !query
 select timestamp '2016-33-11 20:54:00.000'
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Cannot parse the TIMESTAMP value: 2016-33-11 20:54:00.000(line 1, pos 7)
@@ -320,41 +326,11 @@ select timestamp '2016-33-11 20:54:00.000'
 -------^^^
 
 
--- !query 34
-select interval 13.123456789 seconds, interval -13.123456789 second
--- !query 34 schema
-struct<interval 13 seconds 123 milliseconds 456 microseconds:interval,interval -12 seconds -876 milliseconds -544 microseconds:interval>
--- !query 34 output
-interval 13 seconds 123 milliseconds 456 microseconds	interval -12 seconds -876 milliseconds -544 microseconds
-
-
--- !query 35
-select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond, 9 microsecond
--- !query 35 schema
-struct<interval 1 years 2 months 3 weeks 4 days 5 hours 6 minutes 7 seconds 8 milliseconds:interval,microsecond:int>
--- !query 35 output
-interval 1 years 2 months 3 weeks 4 days 5 hours 6 minutes 7 seconds 8 milliseconds	9
-
-
--- !query 36
-select interval 10 nanoseconds
--- !query 36 schema
-struct<>
--- !query 36 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-no viable alternative at input 'interval 10 nanoseconds'(line 1, pos 19)
-
-== SQL ==
-select interval 10 nanoseconds
--------------------^^^
-
-
--- !query 37
+-- !query
 select GEO '(10,-6)'
--- !query 37 schema
+-- !query schema
 struct<>
--- !query 37 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Literals of type 'GEO' are currently not supported.(line 1, pos 7)
@@ -364,19 +340,19 @@ select GEO '(10,-6)'
 -------^^^
 
 
--- !query 38
+-- !query
 select 90912830918230182310293801923652346786BD, 123.0E-28BD, 123.08BD
--- !query 38 schema
+-- !query schema
 struct<90912830918230182310293801923652346786:decimal(38,0),1.230E-26:decimal(29,29),123.08:decimal(5,2)>
--- !query 38 output
-90912830918230182310293801923652346786	0.0000000000000000000000000123	123.08
+-- !query output
+90912830918230182310293801923652346786	0.00000000000000000000000001230	123.08
 
 
--- !query 39
+-- !query
 select 1.20E-38BD
--- !query 39 schema
+-- !query schema
 struct<>
--- !query 39 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 decimal can only support precision up to 38(line 1, pos 7)
@@ -386,19 +362,19 @@ select 1.20E-38BD
 -------^^^
 
 
--- !query 40
+-- !query
 select x'2379ACFe'
--- !query 40 schema
+-- !query schema
 struct<X'2379ACFE':binary>
--- !query 40 output
+-- !query output
 #y��
 
 
--- !query 41
+-- !query
 select X'XuZ'
--- !query 41 schema
+-- !query schema
 struct<>
--- !query 41 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 contains illegal character for hexBinary: 0XuZ(line 1, pos 7)
@@ -408,33 +384,98 @@ select X'XuZ'
 -------^^^
 
 
--- !query 42
+-- !query
 SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8
--- !query 42 schema
-struct<3.14:decimal(3,2),-3.14:decimal(3,2),3.14E+8:decimal(3,-6),3.14E-8:decimal(10,10),-3.14E+8:decimal(3,-6),-3.14E-8:decimal(10,10),3.14E+8:decimal(3,-6),3.14E+8:decimal(3,-6),3.14E-8:decimal(10,10)>
--- !query 42 output
-3.14	-3.14	314000000	0.0000000314	-314000000	-0.0000000314	314000000	314000000	0.0000000314
-
-
--- !query 43
-select map(1, interval 1 day, 2, interval 3 week)
--- !query 43 schema
-struct<map(1, interval 1 days, 2, interval 3 weeks):map<int,interval>>
--- !query 43 output
-{1:interval 1 days,2:interval 3 weeks}
-
-
--- !query 44
-select interval 'interval 3 year 1 hour'
--- !query 44 schema
-struct<interval 3 years 1 hours:interval>
--- !query 44 output
-interval 3 years 1 hours
-
-
--- !query 45
-select interval '3 year 1 hour'
--- !query 45 schema
-struct<CAST(NULL AS INTERVAL):interval>
--- !query 45 output
-NULL
+-- !query schema
+struct<3.14:decimal(3,2),-3.14:decimal(3,2),3.14E8:double,3.14E-8:double,-3.14E8:double,-3.14E-8:double,3.14E8:double,3.14E8:double,3.14E-8:double>
+-- !query output
+3.14	-3.14	3.14E8	3.14E-8	-3.14E8	-3.14E-8	3.14E8	3.14E8	3.14E-8
+
+
+-- !query
+select +date '1999-01-01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ DATE '1999-01-01')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'DATE '1999-01-01'' is of date type.; line 1 pos 7
+
+
+-- !query
+select +timestamp '1999-01-01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ TIMESTAMP '1999-01-01 00:00:00')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'TIMESTAMP '1999-01-01 00:00:00'' is of timestamp type.; line 1 pos 7
+
+
+-- !query
+select +interval '1 day'
+-- !query schema
+struct<(+ INTERVAL '1 days'):interval>
+-- !query output
+1 days
+
+
+-- !query
+select +map(1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ map(1, 2))' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'map(1, 2)' is of map<int,int> type.; line 1 pos 7
+
+
+-- !query
+select +array(1,2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ array(1, 2))' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'array(1, 2)' is of array<int> type.; line 1 pos 7
+
+
+-- !query
+select +named_struct('a', 1, 'b', 'spark')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ named_struct('a', 1, 'b', 'spark'))' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'named_struct('a', 1, 'b', 'spark')' is of struct<a:int,b:string> type.; line 1 pos 7
+
+
+-- !query
+select +X'1'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(+ X'01')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'X'01'' is of binary type.; line 1 pos 7
+
+
+-- !query
+select -date '1999-01-01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(- DATE '1999-01-01')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'DATE '1999-01-01'' is of date type.; line 1 pos 7
+
+
+-- !query
+select -timestamp '1999-01-01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(- TIMESTAMP '1999-01-01 00:00:00')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'TIMESTAMP '1999-01-01 00:00:00'' is of timestamp type.; line 1 pos 7
+
+
+-- !query
+select -x'2379ACFe'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(- X'2379ACFE')' due to data type mismatch: argument 1 requires (numeric or interval) type, however, 'X'2379ACFE'' is of binary type.; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 0000000000000..bd8ffb82ee129
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,58 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 7
+
+
+-- !query
+select typeof(null)
+-- !query schema
+struct<typeof(NULL):string>
+-- !query output
+null
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct<typeof(true):string>
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct<typeof(1):string,typeof(1):string,typeof(1):string,typeof(1):string>
+-- !query output
+tinyint	smallint	int	bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct<typeof(CAST(1.0 AS FLOAT)):string,typeof(1.0):string,typeof(1.2):string>
+-- !query output
+float	double	decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), typeof(interval '23 days')
+-- !query schema
+struct<typeof(DATE '1986-05-23'):string,typeof(TIMESTAMP '1986-05-23 00:00:00'):string,typeof(INTERVAL '23 days'):string>
+-- !query output
+date	timestamp	interval
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct<typeof(X'ABCD'):string,typeof(SPARK):string>
+-- !query output
+binary	string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark'))
+-- !query schema
+struct<typeof(array(1, 2)):string,typeof(map(1, 2)):string,typeof(named_struct(a, 1, b, spark)):string>
+-- !query output
+array<int>	map<int,int>	struct<a:int,b:string>
diff --git a/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out
index 43f2f9af61d9b..13f319700df3f 100644
--- a/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out
@@ -2,63 +2,63 @@
 -- Number of queries: 6
 
 
--- !query 0
+-- !query
 create temporary view nt1 as select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3)
   as nt1(k, v1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view nt2 as select * from values
   ("one", 1),
   ("two", 22),
   ("one", 5)
   as nt2(k, v2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT * FROM nt1 natural join nt2 where k = "one"
--- !query 2 schema
+-- !query schema
 struct<k:string,v1:int,v2:int>
--- !query 2 output
+-- !query output
 one	1	1
 one	1	5
 
 
--- !query 3
+-- !query
 SELECT * FROM nt1 natural left join nt2 order by v1, v2
--- !query 3 schema
+-- !query schema
 struct<k:string,v1:int,v2:int>
--- !query 3 output
+-- !query output
 one	1	1
 one	1	5
 two	2	22
 three	3	NULL
 
 
--- !query 4
+-- !query
 SELECT * FROM nt1 natural right join nt2 order by v1, v2
--- !query 4 schema
+-- !query schema
 struct<k:string,v1:int,v2:int>
--- !query 4 output
+-- !query output
 one	1	1
 one	1	5
 two	2	22
 
 
--- !query 5
+-- !query
 SELECT count(*) FROM nt1 natural full outer join nt2
--- !query 5 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 5 output
+-- !query output
 4
diff --git a/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out b/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out
index 5005dfeb6cd14..5e7eec56743b1 100644
--- a/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out
@@ -2,75 +2,75 @@
 -- Number of queries: 28
 
 
--- !query 0
+-- !query
 create table t1(a int, b int, c int) using parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 insert into t1 values(1,0,0)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 insert into t1 values(2,0,1)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 insert into t1 values(3,1,0)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 insert into t1 values(4,1,1)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 insert into t1 values(5,null,0)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 insert into t1 values(6,null,1)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 insert into t1 values(7,null,null)
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 select a, b+c from t1
--- !query 8 schema
+-- !query schema
 struct<a:int,(b + c):int>
--- !query 8 output
+-- !query output
 1	0
 2	1
 3	1
@@ -80,11 +80,11 @@ struct<a:int,(b + c):int>
 7	NULL
 
 
--- !query 9
+-- !query
 select a+10, b*0 from t1
--- !query 9 schema
+-- !query schema
 struct<(a + 10):int,(b * 0):int>
--- !query 9 output
+-- !query output
 11	0
 12	0
 13	0
@@ -94,31 +94,31 @@ struct<(a + 10):int,(b * 0):int>
 17	NULL
 
 
--- !query 10
+-- !query
 select distinct b from t1
--- !query 10 schema
+-- !query schema
 struct<b:int>
--- !query 10 output
+-- !query output
 0
 1
 NULL
 
 
--- !query 11
+-- !query
 select b from t1 union select b from t1
--- !query 11 schema
+-- !query schema
 struct<b:int>
--- !query 11 output
+-- !query output
 0
 1
 NULL
 
 
--- !query 12
+-- !query
 select a+20, case b when c then 1 else 0 end from t1
--- !query 12 schema
+-- !query schema
 struct<(a + 20):int,CASE WHEN (b = c) THEN 1 ELSE 0 END:int>
--- !query 12 output
+-- !query output
 21	1
 22	0
 23	0
@@ -128,11 +128,11 @@ struct<(a + 20):int,CASE WHEN (b = c) THEN 1 ELSE 0 END:int>
 27	0
 
 
--- !query 13
+-- !query
 select a+30, case c when b then 1 else 0 end from t1
--- !query 13 schema
+-- !query schema
 struct<(a + 30):int,CASE WHEN (c = b) THEN 1 ELSE 0 END:int>
--- !query 13 output
+-- !query output
 31	1
 32	0
 33	0
@@ -142,11 +142,11 @@ struct<(a + 30):int,CASE WHEN (c = b) THEN 1 ELSE 0 END:int>
 37	0
 
 
--- !query 14
+-- !query
 select a+40, case when b<>0 then 1 else 0 end from t1
--- !query 14 schema
+-- !query schema
 struct<(a + 40):int,CASE WHEN (NOT (b = 0)) THEN 1 ELSE 0 END:int>
--- !query 14 output
+-- !query output
 41	0
 42	0
 43	1
@@ -156,11 +156,11 @@ struct<(a + 40):int,CASE WHEN (NOT (b = 0)) THEN 1 ELSE 0 END:int>
 47	0
 
 
--- !query 15
+-- !query
 select a+50, case when not b<>0 then 1 else 0 end from t1
--- !query 15 schema
+-- !query schema
 struct<(a + 50):int,CASE WHEN (NOT (NOT (b = 0))) THEN 1 ELSE 0 END:int>
--- !query 15 output
+-- !query output
 51	1
 52	1
 53	0
@@ -170,11 +170,11 @@ struct<(a + 50):int,CASE WHEN (NOT (NOT (b = 0))) THEN 1 ELSE 0 END:int>
 57	0
 
 
--- !query 16
+-- !query
 select a+60, case when b<>0 and c<>0 then 1 else 0 end from t1
--- !query 16 schema
+-- !query schema
 struct<(a + 60):int,CASE WHEN ((NOT (b = 0)) AND (NOT (c = 0))) THEN 1 ELSE 0 END:int>
--- !query 16 output
+-- !query output
 61	0
 62	0
 63	0
@@ -184,11 +184,11 @@ struct<(a + 60):int,CASE WHEN ((NOT (b = 0)) AND (NOT (c = 0))) THEN 1 ELSE 0 EN
 67	0
 
 
--- !query 17
+-- !query
 select a+70, case when not (b<>0 and c<>0) then 1 else 0 end from t1
--- !query 17 schema
+-- !query schema
 struct<(a + 70):int,CASE WHEN (NOT ((NOT (b = 0)) AND (NOT (c = 0)))) THEN 1 ELSE 0 END:int>
--- !query 17 output
+-- !query output
 71	1
 72	1
 73	1
@@ -198,11 +198,11 @@ struct<(a + 70):int,CASE WHEN (NOT ((NOT (b = 0)) AND (NOT (c = 0)))) THEN 1 ELS
 77	0
 
 
--- !query 18
+-- !query
 select a+80, case when b<>0 or c<>0 then 1 else 0 end from t1
--- !query 18 schema
+-- !query schema
 struct<(a + 80):int,CASE WHEN ((NOT (b = 0)) OR (NOT (c = 0))) THEN 1 ELSE 0 END:int>
--- !query 18 output
+-- !query output
 81	0
 82	1
 83	1
@@ -212,11 +212,11 @@ struct<(a + 80):int,CASE WHEN ((NOT (b = 0)) OR (NOT (c = 0))) THEN 1 ELSE 0 END
 87	0
 
 
--- !query 19
+-- !query
 select a+90, case when not (b<>0 or c<>0) then 1 else 0 end from t1
--- !query 19 schema
+-- !query schema
 struct<(a + 90):int,CASE WHEN (NOT ((NOT (b = 0)) OR (NOT (c = 0)))) THEN 1 ELSE 0 END:int>
--- !query 19 output
+-- !query output
 91	1
 92	0
 93	0
@@ -226,41 +226,41 @@ struct<(a + 90):int,CASE WHEN (NOT ((NOT (b = 0)) OR (NOT (c = 0)))) THEN 1 ELSE
 97	0
 
 
--- !query 20
+-- !query
 select count(*), count(b), sum(b), avg(b), min(b), max(b) from t1
--- !query 20 schema
+-- !query schema
 struct<count(1):bigint,count(b):bigint,sum(b):bigint,avg(b):double,min(b):int,max(b):int>
--- !query 20 output
+-- !query output
 7	4	2	0.5	0	1
 
 
--- !query 21
+-- !query
 select a+100 from t1 where b<10
--- !query 21 schema
+-- !query schema
 struct<(a + 100):int>
--- !query 21 output
+-- !query output
 101
 102
 103
 104
 
 
--- !query 22
+-- !query
 select a+110 from t1 where not b>10
--- !query 22 schema
+-- !query schema
 struct<(a + 110):int>
--- !query 22 output
+-- !query output
 111
 112
 113
 114
 
 
--- !query 23
+-- !query
 select a+120 from t1 where b<10 OR c=1
--- !query 23 schema
+-- !query schema
 struct<(a + 120):int>
--- !query 23 output
+-- !query output
 121
 122
 123
@@ -268,38 +268,38 @@ struct<(a + 120):int>
 126
 
 
--- !query 24
+-- !query
 select a+130 from t1 where b<10 AND c=1
--- !query 24 schema
+-- !query schema
 struct<(a + 130):int>
--- !query 24 output
+-- !query output
 132
 134
 
 
--- !query 25
+-- !query
 select a+140 from t1 where not (b<10 AND c=1)
--- !query 25 schema
+-- !query schema
 struct<(a + 140):int>
--- !query 25 output
+-- !query output
 141
 143
 145
 
 
--- !query 26
+-- !query
 select a+150 from t1 where not (c=1 AND b<10)
--- !query 26 schema
+-- !query schema
 struct<(a + 150):int>
--- !query 26 output
+-- !query output
 151
 153
 155
 
 
--- !query 27
+-- !query
 drop table t1
--- !query 27 schema
+-- !query schema
 struct<>
--- !query 27 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/null-propagation.sql.out b/sql/core/src/test/resources/sql-tests/results/null-propagation.sql.out
index ed3a651aa6614..76a41f9170388 100644
--- a/sql/core/src/test/resources/sql-tests/results/null-propagation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/null-propagation.sql.out
@@ -2,37 +2,37 @@
 -- Number of queries: 4
 
 
--- !query 0
+-- !query
 SELECT COUNT(NULL) FROM VALUES 1, 2, 3
--- !query 0 schema
+-- !query schema
 struct<count(NULL):bigint>
--- !query 0 output
+-- !query output
 0
 
 
--- !query 1
+-- !query
 SELECT COUNT(1 + NULL) FROM VALUES 1, 2, 3
--- !query 1 schema
+-- !query schema
 struct<count((1 + CAST(NULL AS INT))):bigint>
--- !query 1 output
+-- !query output
 0
 
 
--- !query 2
+-- !query
 SELECT COUNT(NULL) OVER () FROM VALUES 1, 2, 3
--- !query 2 schema
+-- !query schema
 struct<count(NULL) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
--- !query 2 output
+-- !query output
 0
 0
 0
 
 
--- !query 3
+-- !query
 SELECT COUNT(1 + NULL) OVER () FROM VALUES 1, 2, 3
--- !query 3 schema
+-- !query schema
 struct<count((1 + CAST(NULL AS INT))) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
--- !query 3 output
+-- !query output
 0
 0
 0
diff --git a/sql/core/src/test/resources/sql-tests/results/operator-div.sql.out b/sql/core/src/test/resources/sql-tests/results/operator-div.sql.out
index 75736bee669b0..3f933f4c0e449 100644
--- a/sql/core/src/test/resources/sql-tests/results/operator-div.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operator-div.sql.out
@@ -2,145 +2,145 @@
 -- Number of queries: 18
 
 
--- !query 0
+-- !query
 set spark.sql.legacy.integralDivide.returnBigint=true
--- !query 0 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 0 output
+-- !query output
 spark.sql.legacy.integralDivide.returnBigint	true
 
 
--- !query 1
+-- !query
 select 5 div 2
--- !query 1 schema
+-- !query schema
 struct<(5 div 2):bigint>
--- !query 1 output
+-- !query output
 2
 
 
--- !query 2
+-- !query
 select 5 div 0
--- !query 2 schema
+-- !query schema
 struct<(5 div 0):bigint>
--- !query 2 output
+-- !query output
 NULL
 
 
--- !query 3
+-- !query
 select 5 div null
--- !query 3 schema
+-- !query schema
 struct<(5 div CAST(NULL AS INT)):bigint>
--- !query 3 output
+-- !query output
 NULL
 
 
--- !query 4
+-- !query
 select null div 5
--- !query 4 schema
+-- !query schema
 struct<(CAST(NULL AS INT) div 5):bigint>
--- !query 4 output
+-- !query output
 NULL
 
 
--- !query 5
+-- !query
 select cast(51 as decimal(10, 0)) div cast(2 as decimal(2, 0))
--- !query 5 schema
+-- !query schema
 struct<(CAST(CAST(51 AS DECIMAL(10,0)) AS DECIMAL(10,0)) div CAST(CAST(2 AS DECIMAL(2,0)) AS DECIMAL(10,0))):bigint>
--- !query 5 output
+-- !query output
 25
 
 
--- !query 6
+-- !query
 select cast(5 as decimal(1, 0)) div cast(0 as decimal(2, 0))
--- !query 6 schema
+-- !query schema
 struct<(CAST(CAST(5 AS DECIMAL(1,0)) AS DECIMAL(2,0)) div CAST(CAST(0 AS DECIMAL(2,0)) AS DECIMAL(2,0))):bigint>
--- !query 6 output
+-- !query output
 NULL
 
 
--- !query 7
+-- !query
 select cast(5 as decimal(1, 0)) div cast(null as decimal(2, 0))
--- !query 7 schema
+-- !query schema
 struct<(CAST(CAST(5 AS DECIMAL(1,0)) AS DECIMAL(2,0)) div CAST(CAST(NULL AS DECIMAL(2,0)) AS DECIMAL(2,0))):bigint>
--- !query 7 output
+-- !query output
 NULL
 
 
--- !query 8
+-- !query
 select cast(null as decimal(1, 0)) div cast(5 as decimal(2, 0))
--- !query 8 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS DECIMAL(1,0)) AS DECIMAL(2,0)) div CAST(CAST(5 AS DECIMAL(2,0)) AS DECIMAL(2,0))):bigint>
--- !query 8 output
+-- !query output
 NULL
 
 
--- !query 9
+-- !query
 set spark.sql.legacy.integralDivide.returnBigint=false
--- !query 9 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 9 output
+-- !query output
 spark.sql.legacy.integralDivide.returnBigint	false
 
 
--- !query 10
+-- !query
 select 5 div 2
--- !query 10 schema
+-- !query schema
 struct<(5 div 2):int>
--- !query 10 output
+-- !query output
 2
 
 
--- !query 11
+-- !query
 select 5 div 0
--- !query 11 schema
+-- !query schema
 struct<(5 div 0):int>
--- !query 11 output
+-- !query output
 NULL
 
 
--- !query 12
+-- !query
 select 5 div null
--- !query 12 schema
+-- !query schema
 struct<(5 div CAST(NULL AS INT)):int>
--- !query 12 output
+-- !query output
 NULL
 
 
--- !query 13
+-- !query
 select null div 5
--- !query 13 schema
+-- !query schema
 struct<(CAST(NULL AS INT) div 5):int>
--- !query 13 output
+-- !query output
 NULL
 
 
--- !query 14
+-- !query
 select cast(51 as decimal(10, 0)) div cast(2 as decimal(2, 0))
--- !query 14 schema
+-- !query schema
 struct<(CAST(CAST(51 AS DECIMAL(10,0)) AS DECIMAL(10,0)) div CAST(CAST(2 AS DECIMAL(2,0)) AS DECIMAL(10,0))):decimal(10,0)>
--- !query 14 output
+-- !query output
 25
 
 
--- !query 15
+-- !query
 select cast(5 as decimal(1, 0)) div cast(0 as decimal(2, 0))
--- !query 15 schema
+-- !query schema
 struct<(CAST(CAST(5 AS DECIMAL(1,0)) AS DECIMAL(2,0)) div CAST(CAST(0 AS DECIMAL(2,0)) AS DECIMAL(2,0))):decimal(1,0)>
--- !query 15 output
+-- !query output
 NULL
 
 
--- !query 16
+-- !query
 select cast(5 as decimal(1, 0)) div cast(null as decimal(2, 0))
--- !query 16 schema
+-- !query schema
 struct<(CAST(CAST(5 AS DECIMAL(1,0)) AS DECIMAL(2,0)) div CAST(CAST(NULL AS DECIMAL(2,0)) AS DECIMAL(2,0))):decimal(1,0)>
--- !query 16 output
+-- !query output
 NULL
 
 
--- !query 17
+-- !query
 select cast(null as decimal(1, 0)) div cast(5 as decimal(2, 0))
--- !query 17 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS DECIMAL(1,0)) AS DECIMAL(2,0)) div CAST(CAST(5 AS DECIMAL(2,0)) AS DECIMAL(2,0))):decimal(1,0)>
--- !query 17 output
+-- !query output
 NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
index e0cbd575bc346..548281014afd7 100644
--- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
@@ -2,393 +2,393 @@
 -- Number of queries: 49
 
 
--- !query 0
+-- !query
 select -100
--- !query 0 schema
+-- !query schema
 struct<-100:int>
--- !query 0 output
+-- !query output
 -100
 
 
--- !query 1
+-- !query
 select +230
--- !query 1 schema
-struct<230:int>
--- !query 1 output
+-- !query schema
+struct<(+ 230):int>
+-- !query output
 230
 
 
--- !query 2
+-- !query
 select -5.2
--- !query 2 schema
+-- !query schema
 struct<-5.2:decimal(2,1)>
--- !query 2 output
+-- !query output
 -5.2
 
 
--- !query 3
+-- !query
 select +6.8e0
--- !query 3 schema
-struct<6.8:decimal(2,1)>
--- !query 3 output
+-- !query schema
+struct<(+ 6.8):double>
+-- !query output
 6.8
 
 
--- !query 4
+-- !query
 select -key, +key from testdata where key = 2
--- !query 4 schema
-struct<(- key):int,key:int>
--- !query 4 output
+-- !query schema
+struct<(- key):int,(+ key):int>
+-- !query output
 -2	2
 
 
--- !query 5
+-- !query
 select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1
--- !query 5 schema
-struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int>
--- !query 5 output
+-- !query schema
+struct<(- (key + 1)):int,((- key) + 1):int,(+ (key + 5)):int>
+-- !query output
 -2	0	6
 
 
--- !query 6
+-- !query
 select -max(key), +max(key) from testdata
--- !query 6 schema
-struct<(- max(key)):int,max(key):int>
--- !query 6 output
+-- !query schema
+struct<(- max(key)):int,(+ max(key)):int>
+-- !query output
 -100	100
 
 
--- !query 7
+-- !query
 select - (-10)
--- !query 7 schema
+-- !query schema
 struct<(- -10):int>
--- !query 7 output
+-- !query output
 10
 
 
--- !query 8
+-- !query
 select + (-key) from testdata where key = 32
--- !query 8 schema
-struct<(- key):int>
--- !query 8 output
+-- !query schema
+struct<(+ (- key)):int>
+-- !query output
 -32
 
 
--- !query 9
+-- !query
 select - (+max(key)) from testdata
--- !query 9 schema
-struct<(- max(key)):int>
--- !query 9 output
+-- !query schema
+struct<(- (+ max(key))):int>
+-- !query output
 -100
 
 
--- !query 10
+-- !query
 select - - 3
--- !query 10 schema
+-- !query schema
 struct<(- -3):int>
--- !query 10 output
+-- !query output
 3
 
 
--- !query 11
+-- !query
 select - + 20
--- !query 11 schema
-struct<(- 20):int>
--- !query 11 output
+-- !query schema
+struct<(- (+ 20)):int>
+-- !query output
 -20
 
 
--- !query 12
+-- !query
 select + + 100
--- !query 12 schema
-struct<100:int>
--- !query 12 output
+-- !query schema
+struct<(+ (+ 100)):int>
+-- !query output
 100
 
 
--- !query 13
+-- !query
 select - - max(key) from testdata
--- !query 13 schema
+-- !query schema
 struct<(- (- max(key))):int>
--- !query 13 output
+-- !query output
 100
 
 
--- !query 14
+-- !query
 select + - key from testdata where key = 33
--- !query 14 schema
-struct<(- key):int>
--- !query 14 output
+-- !query schema
+struct<(+ (- key)):int>
+-- !query output
 -33
 
 
--- !query 15
+-- !query
 select 5 / 2
--- !query 15 schema
+-- !query schema
 struct<(CAST(5 AS DOUBLE) / CAST(2 AS DOUBLE)):double>
--- !query 15 output
+-- !query output
 2.5
 
 
--- !query 16
+-- !query
 select 5 / 0
--- !query 16 schema
+-- !query schema
 struct<(CAST(5 AS DOUBLE) / CAST(0 AS DOUBLE)):double>
--- !query 16 output
+-- !query output
 NULL
 
 
--- !query 17
+-- !query
 select 5 / null
--- !query 17 schema
+-- !query schema
 struct<(CAST(5 AS DOUBLE) / CAST(NULL AS DOUBLE)):double>
--- !query 17 output
+-- !query output
 NULL
 
 
--- !query 18
+-- !query
 select null / 5
--- !query 18 schema
+-- !query schema
 struct<(CAST(NULL AS DOUBLE) / CAST(5 AS DOUBLE)):double>
--- !query 18 output
+-- !query output
 NULL
 
 
--- !query 19
+-- !query
 select 1 + 2
--- !query 19 schema
+-- !query schema
 struct<(1 + 2):int>
--- !query 19 output
+-- !query output
 3
 
 
--- !query 20
+-- !query
 select 1 - 2
--- !query 20 schema
+-- !query schema
 struct<(1 - 2):int>
--- !query 20 output
+-- !query output
 -1
 
 
--- !query 21
+-- !query
 select 2 * 5
--- !query 21 schema
+-- !query schema
 struct<(2 * 5):int>
--- !query 21 output
+-- !query output
 10
 
 
--- !query 22
+-- !query
 select 5 % 3
--- !query 22 schema
+-- !query schema
 struct<(5 % 3):int>
--- !query 22 output
+-- !query output
 2
 
 
--- !query 23
+-- !query
 select pmod(-7, 3)
--- !query 23 schema
+-- !query schema
 struct<pmod(-7, 3):int>
--- !query 23 output
+-- !query output
 2
 
 
--- !query 24
+-- !query
 select cot(1)
--- !query 24 schema
+-- !query schema
 struct<COT(CAST(1 AS DOUBLE)):double>
--- !query 24 output
+-- !query output
 0.6420926159343306
 
 
--- !query 25
+-- !query
 select cot(null)
--- !query 25 schema
+-- !query schema
 struct<COT(CAST(NULL AS DOUBLE)):double>
--- !query 25 output
+-- !query output
 NULL
 
 
--- !query 26
+-- !query
 select cot(0)
--- !query 26 schema
+-- !query schema
 struct<COT(CAST(0 AS DOUBLE)):double>
--- !query 26 output
+-- !query output
 Infinity
 
 
--- !query 27
+-- !query
 select cot(-1)
--- !query 27 schema
+-- !query schema
 struct<COT(CAST(-1 AS DOUBLE)):double>
--- !query 27 output
+-- !query output
 -0.6420926159343306
 
 
--- !query 28
+-- !query
 select ceiling(0)
--- !query 28 schema
+-- !query schema
 struct<CEIL(CAST(0 AS DOUBLE)):bigint>
--- !query 28 output
+-- !query output
 0
 
 
--- !query 29
+-- !query
 select ceiling(1)
--- !query 29 schema
+-- !query schema
 struct<CEIL(CAST(1 AS DOUBLE)):bigint>
--- !query 29 output
+-- !query output
 1
 
 
--- !query 30
+-- !query
 select ceil(1234567890123456)
--- !query 30 schema
+-- !query schema
 struct<CEIL(1234567890123456):bigint>
--- !query 30 output
+-- !query output
 1234567890123456
 
 
--- !query 31
+-- !query
 select ceiling(1234567890123456)
--- !query 31 schema
+-- !query schema
 struct<CEIL(1234567890123456):bigint>
--- !query 31 output
+-- !query output
 1234567890123456
 
 
--- !query 32
+-- !query
 select ceil(0.01)
--- !query 32 schema
+-- !query schema
 struct<CEIL(0.01):decimal(1,0)>
--- !query 32 output
+-- !query output
 1
 
 
--- !query 33
+-- !query
 select ceiling(-0.10)
--- !query 33 schema
+-- !query schema
 struct<CEIL(-0.10):decimal(1,0)>
--- !query 33 output
+-- !query output
 0
 
 
--- !query 34
+-- !query
 select floor(0)
--- !query 34 schema
+-- !query schema
 struct<FLOOR(CAST(0 AS DOUBLE)):bigint>
--- !query 34 output
+-- !query output
 0
 
 
--- !query 35
+-- !query
 select floor(1)
--- !query 35 schema
+-- !query schema
 struct<FLOOR(CAST(1 AS DOUBLE)):bigint>
--- !query 35 output
+-- !query output
 1
 
 
--- !query 36
+-- !query
 select floor(1234567890123456)
--- !query 36 schema
+-- !query schema
 struct<FLOOR(1234567890123456):bigint>
--- !query 36 output
+-- !query output
 1234567890123456
 
 
--- !query 37
+-- !query
 select floor(0.01)
--- !query 37 schema
+-- !query schema
 struct<FLOOR(0.01):decimal(1,0)>
--- !query 37 output
+-- !query output
 0
 
 
--- !query 38
+-- !query
 select floor(-0.10)
--- !query 38 schema
+-- !query schema
 struct<FLOOR(-0.10):decimal(1,0)>
--- !query 38 output
+-- !query output
 -1
 
 
--- !query 39
+-- !query
 select 1 > 0.00001
--- !query 39 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) > 0):boolean>
--- !query 39 output
+-- !query output
 true
 
 
--- !query 40
+-- !query
 select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, null)
--- !query 40 schema
+-- !query schema
 struct<(7 % 2):int,(7 % 0):int,(0 % 2):int,(7 % CAST(NULL AS INT)):int,(CAST(NULL AS INT) % 2):int,(CAST(NULL AS DOUBLE) % CAST(NULL AS DOUBLE)):double>
--- !query 40 output
+-- !query output
 1	NULL	0	NULL	NULL	NULL
 
 
--- !query 41
+-- !query
 select BIT_LENGTH('abc')
--- !query 41 schema
+-- !query schema
 struct<bit_length(abc):int>
--- !query 41 output
+-- !query output
 24
 
 
--- !query 42
+-- !query
 select CHAR_LENGTH('abc')
--- !query 42 schema
+-- !query schema
 struct<length(abc):int>
--- !query 42 output
+-- !query output
 3
 
 
--- !query 43
+-- !query
 select CHARACTER_LENGTH('abc')
--- !query 43 schema
+-- !query schema
 struct<length(abc):int>
--- !query 43 output
+-- !query output
 3
 
 
--- !query 44
+-- !query
 select OCTET_LENGTH('abc')
--- !query 44 schema
+-- !query schema
 struct<octet_length(abc):int>
--- !query 44 output
+-- !query output
 3
 
 
--- !query 45
+-- !query
 select abs(-3.13), abs('-2.19')
--- !query 45 schema
+-- !query schema
 struct<abs(-3.13):decimal(3,2),abs(CAST(-2.19 AS DOUBLE)):double>
--- !query 45 output
+-- !query output
 3.13	2.19
 
 
--- !query 46
+-- !query
 select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11)
--- !query 46 schema
+-- !query schema
 struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),(- CAST(-1.11 AS DOUBLE)):double,(- -1.11):decimal(3,2)>
--- !query 46 output
+-- !query output
 -1.11	-1.11	1.11	1.11
 
 
--- !query 47
+-- !query
 select pmod(-7, 2), pmod(0, 2), pmod(7, 0), pmod(7, null), pmod(null, 2), pmod(null, null)
--- !query 47 schema
+-- !query schema
 struct<pmod(-7, 2):int,pmod(0, 2):int,pmod(7, 0):int,pmod(7, CAST(NULL AS INT)):int,pmod(CAST(NULL AS INT), 2):int,pmod(CAST(NULL AS DOUBLE), CAST(NULL AS DOUBLE)):double>
--- !query 47 output
+-- !query output
 1	0	NULL	NULL	NULL	NULL
 
 
--- !query 48
+-- !query
 select pmod(cast(3.13 as decimal), cast(0 as decimal)), pmod(cast(2 as smallint), cast(0 as smallint))
--- !query 48 schema
+-- !query schema
 struct<pmod(CAST(3.13 AS DECIMAL(10,0)), CAST(0 AS DECIMAL(10,0))):decimal(10,0),pmod(CAST(2 AS SMALLINT), CAST(0 AS SMALLINT)):smallint>
--- !query 48 output
+-- !query output
 NULL	NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out b/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out
index c1b63dfb8caef..67d271790eef0 100644
--- a/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out
@@ -2,32 +2,32 @@
 -- Number of queries: 17
 
 
--- !query 0
+-- !query
 create table spark_10747(col1 int, col2 int, col3 int) using parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 INSERT INTO spark_10747 VALUES (6, 12, 10), (6, 11, 4), (6, 9, 10), (6, 15, 8),
 (6, 15, 8), (6, 7, 4), (6, 7, 8), (6, 13, null), (6, 10, null)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 select col1, col2, col3, sum(col2)
     over (partition by col1
        order by col3 desc nulls last, col2
        rows between 2 preceding and 2 following ) as sum_col2
 from spark_10747 where col1 = 6 order by sum_col2
--- !query 2 schema
+-- !query schema
 struct<col1:int,col2:int,col3:int,sum_col2:bigint>
--- !query 2 output
+-- !query output
 6	9	10	28
 6	13	NULL	34
 6	10	NULL	41
@@ -39,15 +39,15 @@ struct<col1:int,col2:int,col3:int,sum_col2:bigint>
 6	7	4	58
 
 
--- !query 3
+-- !query
 select col1, col2, col3, sum(col2)
     over (partition by col1
        order by col3 desc nulls first, col2
        rows between 2 preceding and 2 following ) as sum_col2
 from spark_10747 where col1 = 6 order by sum_col2
--- !query 3 schema
+-- !query schema
 struct<col1:int,col2:int,col3:int,sum_col2:bigint>
--- !query 3 output
+-- !query output
 6	10	NULL	32
 6	11	4	33
 6	13	NULL	44
@@ -59,15 +59,15 @@ struct<col1:int,col2:int,col3:int,sum_col2:bigint>
 6	7	8	58
 
 
--- !query 4
+-- !query
 select col1, col2, col3, sum(col2)
     over (partition by col1
        order by col3 asc nulls last, col2
        rows between 2 preceding and 2 following ) as sum_col2
 from spark_10747 where col1 = 6 order by sum_col2
--- !query 4 schema
+-- !query schema
 struct<col1:int,col2:int,col3:int,sum_col2:bigint>
--- !query 4 output
+-- !query output
 6	7	4	25
 6	13	NULL	35
 6	11	4	40
@@ -79,15 +79,15 @@ struct<col1:int,col2:int,col3:int,sum_col2:bigint>
 6	9	10	61
 
 
--- !query 5
+-- !query
 select col1, col2, col3, sum(col2)
     over (partition by col1
        order by col3 asc nulls first, col2
        rows between 2 preceding and 2 following ) as sum_col2
 from spark_10747 where col1 = 6 order by sum_col2
--- !query 5 schema
+-- !query schema
 struct<col1:int,col2:int,col3:int,sum_col2:bigint>
--- !query 5 output
+-- !query output
 6	10	NULL	30
 6	12	10	36
 6	13	NULL	41
@@ -99,11 +99,11 @@ struct<col1:int,col2:int,col3:int,sum_col2:bigint>
 6	15	8	58
 
 
--- !query 6
+-- !query
 SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 ASC NULLS FIRST, COL2
--- !query 6 schema
+-- !query schema
 struct<COL1:int,COL2:int,COL3:int>
--- !query 6 output
+-- !query output
 6	10	NULL
 6	13	NULL
 6	7	4
@@ -115,11 +115,11 @@ struct<COL1:int,COL2:int,COL3:int>
 6	12	10
 
 
--- !query 7
+-- !query
 SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 NULLS LAST, COL2
--- !query 7 schema
+-- !query schema
 struct<COL1:int,COL2:int,COL3:int>
--- !query 7 output
+-- !query output
 6	7	4
 6	11	4
 6	7	8
@@ -131,11 +131,11 @@ struct<COL1:int,COL2:int,COL3:int>
 6	13	NULL
 
 
--- !query 8
+-- !query
 SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS FIRST, COL2
--- !query 8 schema
+-- !query schema
 struct<COL1:int,COL2:int,COL3:int>
--- !query 8 output
+-- !query output
 6	10	NULL
 6	13	NULL
 6	9	10
@@ -147,11 +147,11 @@ struct<COL1:int,COL2:int,COL3:int>
 6	11	4
 
 
--- !query 9
+-- !query
 SELECT COL1, COL2, COL3 FROM spark_10747 ORDER BY COL3 DESC NULLS LAST, COL2
--- !query 9 schema
+-- !query schema
 struct<COL1:int,COL2:int,COL3:int>
--- !query 9 output
+-- !query output
 6	9	10
 6	12	10
 6	7	8
@@ -163,15 +163,15 @@ struct<COL1:int,COL2:int,COL3:int>
 6	13	NULL
 
 
--- !query 10
+-- !query
 drop table spark_10747
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 create table spark_10747_mix(
 col1 string,
 col2 int,
@@ -179,13 +179,13 @@ col3 double,
 col4 decimal(10,2),
 col5 decimal(20,1))
 using parquet
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 INSERT INTO spark_10747_mix VALUES
 ('b', 2, 1.0, 1.00, 10.0),
 ('d', 3, 2.0, 3.00, 0.0),
@@ -195,60 +195,60 @@ INSERT INTO spark_10747_mix VALUES
 ('d', 3, null, 4.00, 1.0),
 ('a', 1, 1.0, 1.00, null),
 ('c', 3, 2.0, 2.00, null)
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 select * from spark_10747_mix order by col1 nulls last, col5 nulls last
--- !query 13 schema
+-- !query schema
 struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)>
--- !query 13 output
-a	1	1.0	1	NULL
-b	2	1.0	1	10
-c	3	2.0	2	15.1
-c	3	2.0	2	NULL
-d	3	2.0	3	0
-d	3	0.0	3	1
-d	3	NULL	4	1
-NULL	3	0.0	3	1
-
-
--- !query 14
+-- !query output
+a	1	1.0	1.00	NULL
+b	2	1.0	1.00	10.0
+c	3	2.0	2.00	15.1
+c	3	2.0	2.00	NULL
+d	3	2.0	3.00	0.0
+d	3	0.0	3.00	1.0
+d	3	NULL	4.00	1.0
+NULL	3	0.0	3.00	1.0
+
+
+-- !query
 select * from spark_10747_mix order by col1 desc nulls first, col5 desc nulls first
--- !query 14 schema
+-- !query schema
 struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)>
--- !query 14 output
-NULL	3	0.0	3	1
-d	3	0.0	3	1
-d	3	NULL	4	1
-d	3	2.0	3	0
-c	3	2.0	2	NULL
-c	3	2.0	2	15.1
-b	2	1.0	1	10
-a	1	1.0	1	NULL
-
-
--- !query 15
+-- !query output
+NULL	3	0.0	3.00	1.0
+d	3	0.0	3.00	1.0
+d	3	NULL	4.00	1.0
+d	3	2.0	3.00	0.0
+c	3	2.0	2.00	NULL
+c	3	2.0	2.00	15.1
+b	2	1.0	1.00	10.0
+a	1	1.0	1.00	NULL
+
+
+-- !query
 select * from spark_10747_mix order by col5 desc nulls first, col3 desc nulls last
--- !query 15 schema
+-- !query schema
 struct<col1:string,col2:int,col3:double,col4:decimal(10,2),col5:decimal(20,1)>
--- !query 15 output
-c	3	2.0	2	NULL
-a	1	1.0	1	NULL
-c	3	2.0	2	15.1
-b	2	1.0	1	10
-d	3	0.0	3	1
-NULL	3	0.0	3	1
-d	3	NULL	4	1
-d	3	2.0	3	0
-
-
--- !query 16
+-- !query output
+c	3	2.0	2.00	NULL
+a	1	1.0	1.00	NULL
+c	3	2.0	2.00	15.1
+b	2	1.0	1.00	10.0
+d	3	0.0	3.00	1.0
+NULL	3	0.0	3.00	1.0
+d	3	NULL	4.00	1.0
+d	3	2.0	3.00	0.0
+
+
+-- !query
 drop table spark_10747_mix
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/order-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/order-by-ordinal.sql.out
index cc47cc67c87c8..44c811a7439c0 100644
--- a/sql/core/src/test/resources/sql-tests/results/order-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/order-by-ordinal.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 12
 
 
--- !query 0
+-- !query
 create temporary view data as select * from values
   (1, 1),
   (1, 2),
@@ -11,17 +11,17 @@ create temporary view data as select * from values
   (3, 1),
   (3, 2)
   as data(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 select * from data order by 1 desc
--- !query 1 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 1 output
+-- !query output
 3	1
 3	2
 2	1
@@ -30,11 +30,11 @@ struct<a:int,b:int>
 1	2
 
 
--- !query 2
+-- !query
 select * from data order by 1 desc, b desc
--- !query 2 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 2 output
+-- !query output
 3	2
 3	1
 2	2
@@ -43,11 +43,11 @@ struct<a:int,b:int>
 1	1
 
 
--- !query 3
+-- !query
 select * from data order by 1 desc, 2 desc
--- !query 3 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 3 output
+-- !query output
 3	2
 3	1
 2	2
@@ -56,11 +56,11 @@ struct<a:int,b:int>
 1	1
 
 
--- !query 4
+-- !query
 select * from data order by 1 + 0 desc, b desc
--- !query 4 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 4 output
+-- !query output
 1	2
 2	2
 3	2
@@ -69,38 +69,38 @@ struct<a:int,b:int>
 3	1
 
 
--- !query 5
+-- !query
 select * from data order by 0
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 ORDER BY position 0 is not in select list (valid range is [1, 2]); line 1 pos 28
 
 
--- !query 6
+-- !query
 select * from data order by -1
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 ORDER BY position -1 is not in select list (valid range is [1, 2]); line 1 pos 28
 
 
--- !query 7
+-- !query
 select * from data order by 3
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 ORDER BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 28
 
 
--- !query 8
+-- !query
 select * from data sort by 1 desc
--- !query 8 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 8 output
+-- !query output
 1	1
 1	2
 2	1
@@ -109,19 +109,19 @@ struct<a:int,b:int>
 3	2
 
 
--- !query 9
+-- !query
 set spark.sql.orderByOrdinal=false
--- !query 9 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 9 output
+-- !query output
 spark.sql.orderByOrdinal	false
 
 
--- !query 10
+-- !query
 select * from data order by 0
--- !query 10 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 10 output
+-- !query output
 1	1
 1	2
 2	1
@@ -130,11 +130,11 @@ struct<a:int,b:int>
 3	2
 
 
--- !query 11
+-- !query
 select * from data sort by 0
--- !query 11 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 11 output
+-- !query output
 1	1
 1	2
 2	1
diff --git a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out
index 5db3bae5d0379..703ce231c53ff 100644
--- a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out
@@ -1,28 +1,28 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
+-- Number of queries: 6
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
 (-234), (145), (367), (975), (298)
 as t1(int_col1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES
 (-769, -244), (-800, -409), (940, 86), (-507, 304), (-367, 158)
 as t2(int_col0, int_col1)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT
   (SUM(COALESCE(t1.int_col1, t2.int_col0))),
      ((COALESCE(t1.int_col1, t2.int_col0)) * 2)
@@ -33,40 +33,32 @@ GROUP BY GREATEST(COALESCE(t2.int_col1, 109), COALESCE(t1.int_col1, -449)),
          COALESCE(t1.int_col1, t2.int_col0)
 HAVING (SUM(COALESCE(t1.int_col1, t2.int_col0)))
             > ((COALESCE(t1.int_col1, t2.int_col0)) * 2)
--- !query 2 schema
+-- !query schema
 struct<sum(coalesce(int_col1, int_col0)):bigint,(coalesce(int_col1, int_col0) * 2):int>
--- !query 2 output
+-- !query output
 -367	-734
 -507	-1014
 -769	-1538
 -800	-1600
 
 
--- !query 3
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (97) as t1(int_col1)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (0) as t2(int_col1)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
-set spark.sql.crossJoin.enabled = true
--- !query 5 schema
-struct<key:string,value:string>
--- !query 5 output
-spark.sql.crossJoin.enabled	true
-
-
--- !query 6
+-- !query
 SELECT *
 FROM (
 SELECT
@@ -74,15 +66,7 @@ SELECT
     FROM t1
     LEFT JOIN t2 ON false
 ) t where (t.int_col) is not null
--- !query 6 schema
+-- !query schema
 struct<int_col:int>
--- !query 6 output
+-- !query output
 97
-
-
--- !query 7
-set spark.sql.crossJoin.enabled = false
--- !query 7 schema
-struct<key:string,value:string>
--- !query 7 output
-spark.sql.crossJoin.enabled	false
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part2.sql.out
deleted file mode 100644
index 2b5371a657196..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part2.sql.out
+++ /dev/null
@@ -1,156 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
-
-
--- !query 0
-create temporary view int4_tbl as select * from values
-  (0),
-  (123456),
-  (-123456),
-  (2147483647),
-  (-2147483647)
-  as int4_tbl(f1)
--- !query 0 schema
-struct<>
--- !query 0 output
-
-
-
--- !query 1
-SELECT
-  (NULL AND NULL) IS NULL AS `t`,
-  (TRUE AND NULL) IS NULL AS `t`,
-  (FALSE AND NULL) IS NULL AS `t`,
-  (NULL AND TRUE) IS NULL AS `t`,
-  (NULL AND FALSE) IS NULL AS `t`,
-  (TRUE AND TRUE) AS `t`,
-  NOT (TRUE AND FALSE) AS `t`,
-  NOT (FALSE AND TRUE) AS `t`,
-  NOT (FALSE AND FALSE) AS `t`
--- !query 1 schema
-struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
--- !query 1 output
-true	true	false	true	false	true	true	true	true
-
-
--- !query 2
-SELECT
-  (NULL OR NULL) IS NULL AS `t`,
-  (TRUE OR NULL) IS NULL AS `t`,
-  (FALSE OR NULL) IS NULL AS `t`,
-  (NULL OR TRUE) IS NULL AS `t`,
-  (NULL OR FALSE) IS NULL AS `t`,
-  (TRUE OR TRUE) AS `t`,
-  (TRUE OR FALSE) AS `t`,
-  (FALSE OR TRUE) AS `t`,
-  NOT (FALSE OR FALSE) AS `t`
--- !query 2 schema
-struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
--- !query 2 output
-true	false	true	false	true	true	true	true	true
-
-
--- !query 3
-select min(unique1) from tenk1
--- !query 3 schema
-struct<min(unique1):int>
--- !query 3 output
-0
-
-
--- !query 4
-select max(unique1) from tenk1
--- !query 4 schema
-struct<max(unique1):int>
--- !query 4 output
-9999
-
-
--- !query 5
-select max(unique1) from tenk1 where unique1 < 42
--- !query 5 schema
-struct<max(unique1):int>
--- !query 5 output
-41
-
-
--- !query 6
-select max(unique1) from tenk1 where unique1 > 42
--- !query 6 schema
-struct<max(unique1):int>
--- !query 6 output
-9999
-
-
--- !query 7
-select max(unique1) from tenk1 where unique1 > 42000
--- !query 7 schema
-struct<max(unique1):int>
--- !query 7 output
-NULL
-
-
--- !query 8
-select max(tenthous) from tenk1 where thousand = 33
--- !query 8 schema
-struct<max(tenthous):int>
--- !query 8 output
-9033
-
-
--- !query 9
-select min(tenthous) from tenk1 where thousand = 33
--- !query 9 schema
-struct<min(tenthous):int>
--- !query 9 output
-33
-
-
--- !query 10
-select distinct max(unique2) from tenk1
--- !query 10 schema
-struct<max(unique2):int>
--- !query 10 output
-9999
-
-
--- !query 11
-select max(unique2) from tenk1 order by 1
--- !query 11 schema
-struct<max(unique2):int>
--- !query 11 output
-9999
-
-
--- !query 12
-select max(unique2) from tenk1 order by max(unique2)
--- !query 12 schema
-struct<max(unique2):int>
--- !query 12 output
-9999
-
-
--- !query 13
-select max(unique2) from tenk1 order by max(unique2)+1
--- !query 13 schema
-struct<max(unique2):int>
--- !query 13 output
-9999
-
-
--- !query 14
-select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc
--- !query 14 schema
-struct<max_unique2:int,g:int>
--- !query 14 output
-9999	3
-9999	2
-9999	1
-
-
--- !query 15
-select max(100) from tenk1
--- !query 15 schema
-struct<max(100):int>
--- !query 15 output
-100
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part3.sql.out
deleted file mode 100644
index f102383cb4d8f..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part3.sql.out
+++ /dev/null
@@ -1,22 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 2
-
-
--- !query 0
-select max(min(unique1)) from tenk1
--- !query 0 schema
-struct<>
--- !query 0 output
-org.apache.spark.sql.AnalysisException
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
-
-
--- !query 1
-select (select count(*)
-        from (values (1)) t0(inner_c))
-from (values (2),(3)) t1(outer_c)
--- !query 1 schema
-struct<scalarsubquery():bigint>
--- !query 1 output
-1
-1
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out
deleted file mode 100644
index cb2be6d1cd22d..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out
+++ /dev/null
@@ -1,853 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 91
-
-
--- !query 0
-CREATE TABLE DATE_TBL (f1 date) USING parquet
--- !query 0 schema
-struct<>
--- !query 0 output
-
-
-
--- !query 1
-INSERT INTO DATE_TBL VALUES ('1957-04-09')
--- !query 1 schema
-struct<>
--- !query 1 output
-
-
-
--- !query 2
-INSERT INTO DATE_TBL VALUES ('1957-06-13')
--- !query 2 schema
-struct<>
--- !query 2 output
-
-
-
--- !query 3
-INSERT INTO DATE_TBL VALUES ('1996-02-28')
--- !query 3 schema
-struct<>
--- !query 3 output
-
-
-
--- !query 4
-INSERT INTO DATE_TBL VALUES ('1996-02-29')
--- !query 4 schema
-struct<>
--- !query 4 output
-
-
-
--- !query 5
-INSERT INTO DATE_TBL VALUES ('1996-03-01')
--- !query 5 schema
-struct<>
--- !query 5 output
-
-
-
--- !query 6
-INSERT INTO DATE_TBL VALUES ('1996-03-02')
--- !query 6 schema
-struct<>
--- !query 6 output
-
-
-
--- !query 7
-INSERT INTO DATE_TBL VALUES ('1997-02-28')
--- !query 7 schema
-struct<>
--- !query 7 output
-
-
-
--- !query 8
-INSERT INTO DATE_TBL VALUES ('1997-03-01')
--- !query 8 schema
-struct<>
--- !query 8 output
-
-
-
--- !query 9
-INSERT INTO DATE_TBL VALUES ('1997-03-02')
--- !query 9 schema
-struct<>
--- !query 9 output
-
-
-
--- !query 10
-INSERT INTO DATE_TBL VALUES ('2000-04-01')
--- !query 10 schema
-struct<>
--- !query 10 output
-
-
-
--- !query 11
-INSERT INTO DATE_TBL VALUES ('2000-04-02')
--- !query 11 schema
-struct<>
--- !query 11 output
-
-
-
--- !query 12
-INSERT INTO DATE_TBL VALUES ('2000-04-03')
--- !query 12 schema
-struct<>
--- !query 12 output
-
-
-
--- !query 13
-INSERT INTO DATE_TBL VALUES ('2038-04-08')
--- !query 13 schema
-struct<>
--- !query 13 output
-
-
-
--- !query 14
-INSERT INTO DATE_TBL VALUES ('2039-04-09')
--- !query 14 schema
-struct<>
--- !query 14 output
-
-
-
--- !query 15
-INSERT INTO DATE_TBL VALUES ('2040-04-10')
--- !query 15 schema
-struct<>
--- !query 15 output
-
-
-
--- !query 16
-SELECT f1 AS `Fifteen` FROM DATE_TBL
--- !query 16 schema
-struct<Fifteen:date>
--- !query 16 output
-1957-04-09
-1957-06-13
-1996-02-28
-1996-02-29
-1996-03-01
-1996-03-02
-1997-02-28
-1997-03-01
-1997-03-02
-2000-04-01
-2000-04-02
-2000-04-03
-2038-04-08
-2039-04-09
-2040-04-10
-
-
--- !query 17
-SELECT f1 AS `Nine` FROM DATE_TBL WHERE f1 < '2000-01-01'
--- !query 17 schema
-struct<Nine:date>
--- !query 17 output
-1957-04-09
-1957-06-13
-1996-02-28
-1996-02-29
-1996-03-01
-1996-03-02
-1997-02-28
-1997-03-01
-1997-03-02
-
-
--- !query 18
-SELECT f1 AS `Three` FROM DATE_TBL
-  WHERE f1 BETWEEN '2000-01-01' AND '2001-01-01'
--- !query 18 schema
-struct<Three:date>
--- !query 18 output
-2000-04-01
-2000-04-02
-2000-04-03
-
-
--- !query 19
-SELECT date '1999-01-08'
--- !query 19 schema
-struct<DATE '1999-01-08':date>
--- !query 19 output
-1999-01-08
-
-
--- !query 20
-SELECT date '1999-01-18'
--- !query 20 schema
-struct<DATE '1999-01-18':date>
--- !query 20 output
-1999-01-18
-
-
--- !query 21
-SELECT date '1999 Jan 08'
--- !query 21 schema
-struct<>
--- !query 21 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 Jan 08'
--------^^^
-
-
--- !query 22
-SELECT date '1999 08 Jan'
--- !query 22 schema
-struct<>
--- !query 22 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 Jan'
--------^^^
-
-
--- !query 23
-SELECT date '1999-01-08'
--- !query 23 schema
-struct<DATE '1999-01-08':date>
--- !query 23 output
-1999-01-08
-
-
--- !query 24
-SELECT date '1999-08-01'
--- !query 24 schema
-struct<DATE '1999-08-01':date>
--- !query 24 output
-1999-08-01
-
-
--- !query 25
-SELECT date '1999 01 08'
--- !query 25 schema
-struct<>
--- !query 25 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 01 08'
--------^^^
-
-
--- !query 26
-SELECT date '1999 08 01'
--- !query 26 schema
-struct<>
--- !query 26 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 01'
--------^^^
-
-
--- !query 27
-SELECT date '1999-01-08'
--- !query 27 schema
-struct<DATE '1999-01-08':date>
--- !query 27 output
-1999-01-08
-
-
--- !query 28
-SELECT date '1999 Jan 08'
--- !query 28 schema
-struct<>
--- !query 28 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 Jan 08'
--------^^^
-
-
--- !query 29
-SELECT date '1999 08 Jan'
--- !query 29 schema
-struct<>
--- !query 29 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 Jan'
--------^^^
-
-
--- !query 30
-SELECT date '1999-01-08'
--- !query 30 schema
-struct<DATE '1999-01-08':date>
--- !query 30 output
-1999-01-08
-
-
--- !query 31
-SELECT date '1999-08-01'
--- !query 31 schema
-struct<DATE '1999-08-01':date>
--- !query 31 output
-1999-08-01
-
-
--- !query 32
-SELECT date '1999 01 08'
--- !query 32 schema
-struct<>
--- !query 32 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 01 08'
--------^^^
-
-
--- !query 33
-SELECT date '1999 08 01'
--- !query 33 schema
-struct<>
--- !query 33 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 01'
--------^^^
-
-
--- !query 34
-SELECT date '1999-01-08'
--- !query 34 schema
-struct<DATE '1999-01-08':date>
--- !query 34 output
-1999-01-08
-
-
--- !query 35
-SELECT date '1999-01-18'
--- !query 35 schema
-struct<DATE '1999-01-18':date>
--- !query 35 output
-1999-01-18
-
-
--- !query 36
-SELECT date '1999 Jan 08'
--- !query 36 schema
-struct<>
--- !query 36 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 Jan 08'
--------^^^
-
-
--- !query 37
-SELECT date '1999 08 Jan'
--- !query 37 schema
-struct<>
--- !query 37 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 Jan'
--------^^^
-
-
--- !query 38
-SELECT date '1999-01-08'
--- !query 38 schema
-struct<DATE '1999-01-08':date>
--- !query 38 output
-1999-01-08
-
-
--- !query 39
-SELECT date '1999-08-01'
--- !query 39 schema
-struct<DATE '1999-08-01':date>
--- !query 39 output
-1999-08-01
-
-
--- !query 40
-SELECT date '1999 01 08'
--- !query 40 schema
-struct<>
--- !query 40 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 01 08'
--------^^^
-
-
--- !query 41
-SELECT date '1999 08 01'
--- !query 41 schema
-struct<>
--- !query 41 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 01'
--------^^^
-
-
--- !query 42
-SELECT date '4714-11-24 BC'
--- !query 42 schema
-struct<DATE '4714-11-24':date>
--- !query 42 output
-4714-11-24
-
-
--- !query 43
-SELECT date '4714-11-23 BC'
--- !query 43 schema
-struct<DATE '4714-11-23':date>
--- !query 43 output
-4714-11-23
-
-
--- !query 44
-SELECT date '5874897-12-31'
--- !query 44 schema
-struct<>
--- !query 44 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 5874897-12-31(line 1, pos 7)
-
-== SQL ==
-SELECT date '5874897-12-31'
--------^^^
-
-
--- !query 45
-SELECT date '5874898-01-01'
--- !query 45 schema
-struct<>
--- !query 45 output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 5874898-01-01(line 1, pos 7)
-
-== SQL ==
-SELECT date '5874898-01-01'
--------^^^
-
-
--- !query 46
-SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL
--- !query 46 schema
-struct<Days From 2K:int>
--- !query 46 output
--1035
--1036
--1037
--1400
--1401
--1402
--1403
--15542
--15607
-13977
-14343
-14710
-91
-92
-93
-
-
--- !query 47
-SELECT EXTRACT(EPOCH FROM DATE        '1970-01-01')
--- !query 47 schema
-struct<epoch(CAST(DATE '1970-01-01' AS TIMESTAMP)):decimal(20,6)>
--- !query 47 output
-0
-
-
--- !query 48
-SELECT EXTRACT(EPOCH FROM TIMESTAMP   '1970-01-01')
--- !query 48 schema
-struct<epoch(TIMESTAMP('1970-01-01 00:00:00')):decimal(20,6)>
--- !query 48 output
-0
-
-
--- !query 49
-SELECT EXTRACT(CENTURY FROM TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G'))
--- !query 49 schema
-struct<century(to_date('0101-12-31 BC', 'yyyy-MM-dd G')):int>
--- !query 49 output
--2
-
-
--- !query 50
-SELECT EXTRACT(CENTURY FROM TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G'))
--- !query 50 schema
-struct<century(to_date('0100-12-31 BC', 'yyyy-MM-dd G')):int>
--- !query 50 output
--1
-
-
--- !query 51
-SELECT EXTRACT(CENTURY FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G'))
--- !query 51 schema
-struct<century(to_date('0001-12-31 BC', 'yyyy-MM-dd G')):int>
--- !query 51 output
--1
-
-
--- !query 52
-SELECT EXTRACT(CENTURY FROM DATE '0001-01-01')
--- !query 52 schema
-struct<century(DATE '0001-01-01'):int>
--- !query 52 output
-1
-
-
--- !query 53
-SELECT EXTRACT(CENTURY FROM DATE '0001-01-01 AD')
--- !query 53 schema
-struct<century(DATE '0001-01-01'):int>
--- !query 53 output
-1
-
-
--- !query 54
-SELECT EXTRACT(CENTURY FROM DATE '1900-12-31')
--- !query 54 schema
-struct<century(DATE '1900-12-31'):int>
--- !query 54 output
-19
-
-
--- !query 55
-SELECT EXTRACT(CENTURY FROM DATE '1901-01-01')
--- !query 55 schema
-struct<century(DATE '1901-01-01'):int>
--- !query 55 output
-20
-
-
--- !query 56
-SELECT EXTRACT(CENTURY FROM DATE '2000-12-31')
--- !query 56 schema
-struct<century(DATE '2000-12-31'):int>
--- !query 56 output
-20
-
-
--- !query 57
-SELECT EXTRACT(CENTURY FROM DATE '2001-01-01')
--- !query 57 schema
-struct<century(DATE '2001-01-01'):int>
--- !query 57 output
-21
-
-
--- !query 58
-SELECT EXTRACT(CENTURY FROM CURRENT_DATE)>=21 AS True
--- !query 58 schema
-struct<True:boolean>
--- !query 58 output
-true
-
-
--- !query 59
-SELECT EXTRACT(MILLENNIUM FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G'))
--- !query 59 schema
-struct<millennium(to_date('0001-12-31 BC', 'yyyy-MM-dd G')):int>
--- !query 59 output
--1
-
-
--- !query 60
-SELECT EXTRACT(MILLENNIUM FROM DATE '0001-01-01 AD')
--- !query 60 schema
-struct<millennium(DATE '0001-01-01'):int>
--- !query 60 output
-1
-
-
--- !query 61
-SELECT EXTRACT(MILLENNIUM FROM DATE '1000-12-31')
--- !query 61 schema
-struct<millennium(DATE '1000-12-31'):int>
--- !query 61 output
-1
-
-
--- !query 62
-SELECT EXTRACT(MILLENNIUM FROM DATE '1001-01-01')
--- !query 62 schema
-struct<millennium(DATE '1001-01-01'):int>
--- !query 62 output
-2
-
-
--- !query 63
-SELECT EXTRACT(MILLENNIUM FROM DATE '2000-12-31')
--- !query 63 schema
-struct<millennium(DATE '2000-12-31'):int>
--- !query 63 output
-2
-
-
--- !query 64
-SELECT EXTRACT(MILLENNIUM FROM DATE '2001-01-01')
--- !query 64 schema
-struct<millennium(DATE '2001-01-01'):int>
--- !query 64 output
-3
-
-
--- !query 65
-SELECT EXTRACT(MILLENNIUM FROM CURRENT_DATE)
--- !query 65 schema
-struct<millennium(current_date()):int>
--- !query 65 output
-3
-
-
--- !query 66
-SELECT EXTRACT(DECADE FROM DATE '1994-12-25')
--- !query 66 schema
-struct<decade(DATE '1994-12-25'):int>
--- !query 66 output
-199
-
-
--- !query 67
-SELECT EXTRACT(DECADE FROM DATE '0010-01-01')
--- !query 67 schema
-struct<decade(DATE '0010-01-01'):int>
--- !query 67 output
-1
-
-
--- !query 68
-SELECT EXTRACT(DECADE FROM DATE '0009-12-31')
--- !query 68 schema
-struct<decade(DATE '0009-12-31'):int>
--- !query 68 output
-0
-
-
--- !query 69
-SELECT EXTRACT(DECADE FROM TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G'))
--- !query 69 schema
-struct<decade(to_date('0001-01-01 BC', 'yyyy-MM-dd G')):int>
--- !query 69 output
-0
-
-
--- !query 70
-SELECT EXTRACT(DECADE FROM TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G'))
--- !query 70 schema
-struct<decade(to_date('0002-12-31 BC', 'yyyy-MM-dd G')):int>
--- !query 70 output
--1
-
-
--- !query 71
-SELECT EXTRACT(DECADE FROM TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G'))
--- !query 71 schema
-struct<decade(to_date('0011-01-01 BC', 'yyyy-MM-dd G')):int>
--- !query 71 output
--1
-
-
--- !query 72
-SELECT EXTRACT(DECADE FROM TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G'))
--- !query 72 schema
-struct<decade(to_date('0012-12-31 BC', 'yyyy-MM-dd G')):int>
--- !query 72 output
--2
-
-
--- !query 73
-SELECT EXTRACT(CENTURY FROM NOW())>=21 AS True
--- !query 73 schema
-struct<True:boolean>
--- !query 73 output
-true
-
-
--- !query 74
-SELECT EXTRACT(CENTURY FROM TIMESTAMP '1970-03-20 04:30:00.00000')
--- !query 74 schema
-struct<century(CAST(TIMESTAMP('1970-03-20 04:30:00') AS DATE)):int>
--- !query 74 output
-20
-
-
--- !query 75
-SELECT DATE_TRUNC('MILLENNIUM', TIMESTAMP '1970-03-20 04:30:00.00000')
--- !query 75 schema
-struct<date_trunc(MILLENNIUM, TIMESTAMP('1970-03-20 04:30:00')):timestamp>
--- !query 75 output
-1001-01-01 00:07:02
-
-
--- !query 76
-SELECT DATE_TRUNC('MILLENNIUM', DATE '1970-03-20')
--- !query 76 schema
-struct<date_trunc(MILLENNIUM, CAST(DATE '1970-03-20' AS TIMESTAMP)):timestamp>
--- !query 76 output
-1001-01-01 00:07:02
-
-
--- !query 77
-SELECT DATE_TRUNC('CENTURY', TIMESTAMP '1970-03-20 04:30:00.00000')
--- !query 77 schema
-struct<date_trunc(CENTURY, TIMESTAMP('1970-03-20 04:30:00')):timestamp>
--- !query 77 output
-1901-01-01 00:00:00
-
-
--- !query 78
-SELECT DATE_TRUNC('CENTURY', DATE '1970-03-20')
--- !query 78 schema
-struct<date_trunc(CENTURY, CAST(DATE '1970-03-20' AS TIMESTAMP)):timestamp>
--- !query 78 output
-1901-01-01 00:00:00
-
-
--- !query 79
-SELECT DATE_TRUNC('CENTURY', DATE '2004-08-10')
--- !query 79 schema
-struct<date_trunc(CENTURY, CAST(DATE '2004-08-10' AS TIMESTAMP)):timestamp>
--- !query 79 output
-2001-01-01 00:00:00
-
-
--- !query 80
-SELECT DATE_TRUNC('CENTURY', DATE '0002-02-04')
--- !query 80 schema
-struct<date_trunc(CENTURY, CAST(DATE '0002-02-04' AS TIMESTAMP)):timestamp>
--- !query 80 output
-0001-01-01 00:07:02
-
-
--- !query 81
-SELECT DATE_TRUNC('CENTURY', TO_DATE('0055-08-10 BC', 'yyyy-MM-dd G'))
--- !query 81 schema
-struct<date_trunc(CENTURY, CAST(to_date('0055-08-10 BC', 'yyyy-MM-dd G') AS TIMESTAMP)):timestamp>
--- !query 81 output
--0099-01-01 00:07:02
-
-
--- !query 82
-SELECT DATE_TRUNC('DECADE', DATE '1993-12-25')
--- !query 82 schema
-struct<date_trunc(DECADE, CAST(DATE '1993-12-25' AS TIMESTAMP)):timestamp>
--- !query 82 output
-1990-01-01 00:00:00
-
-
--- !query 83
-SELECT DATE_TRUNC('DECADE', DATE '0004-12-25')
--- !query 83 schema
-struct<date_trunc(DECADE, CAST(DATE '0004-12-25' AS TIMESTAMP)):timestamp>
--- !query 83 output
-0000-01-01 00:07:02
-
-
--- !query 84
-SELECT DATE_TRUNC('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G'))
--- !query 84 schema
-struct<date_trunc(DECADE, CAST(to_date('0002-12-31 BC', 'yyyy-MM-dd G') AS TIMESTAMP)):timestamp>
--- !query 84 output
--0010-01-01 00:07:02
-
-
--- !query 85
-select make_date(2013, 7, 15)
--- !query 85 schema
-struct<make_date(2013, 7, 15):date>
--- !query 85 output
-2013-07-15
-
-
--- !query 86
-select make_date(-44, 3, 15)
--- !query 86 schema
-struct<make_date(-44, 3, 15):date>
--- !query 86 output
--0044-03-15
-
-
--- !query 87
-select make_date(2013, 2, 30)
--- !query 87 schema
-struct<make_date(2013, 2, 30):date>
--- !query 87 output
-NULL
-
-
--- !query 88
-select make_date(2013, 13, 1)
--- !query 88 schema
-struct<make_date(2013, 13, 1):date>
--- !query 88 output
-NULL
-
-
--- !query 89
-select make_date(2013, 11, -1)
--- !query 89 schema
-struct<make_date(2013, 11, -1):date>
--- !query 89 output
-NULL
-
-
--- !query 90
-DROP TABLE DATE_TBL
--- !query 90 schema
-struct<>
--- !query 90 output
-
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/numeric.sql.out
deleted file mode 100644
index ed649feaaebb2..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/numeric.sql.out
+++ /dev/null
@@ -1,4864 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 577
-
-
--- !query 0
-CREATE TABLE num_data (id int, val decimal(38,10)) USING parquet
--- !query 0 schema
-struct<>
--- !query 0 output
-
-
-
--- !query 1
-CREATE TABLE num_exp_add (id1 int, id2 int, expected decimal(38,10)) USING parquet
--- !query 1 schema
-struct<>
--- !query 1 output
-
-
-
--- !query 2
-CREATE TABLE num_exp_sub (id1 int, id2 int, expected decimal(38,10)) USING parquet
--- !query 2 schema
-struct<>
--- !query 2 output
-
-
-
--- !query 3
-CREATE TABLE num_exp_div (id1 int, id2 int, expected decimal(38,10)) USING parquet
--- !query 3 schema
-struct<>
--- !query 3 output
-
-
-
--- !query 4
-CREATE TABLE num_exp_mul (id1 int, id2 int, expected decimal(38,10)) USING parquet
--- !query 4 schema
-struct<>
--- !query 4 output
-
-
-
--- !query 5
-CREATE TABLE num_exp_sqrt (id int, expected decimal(38,10)) USING parquet
--- !query 5 schema
-struct<>
--- !query 5 output
-
-
-
--- !query 6
-CREATE TABLE num_exp_ln (id int, expected decimal(38,10)) USING parquet
--- !query 6 schema
-struct<>
--- !query 6 output
-
-
-
--- !query 7
-CREATE TABLE num_exp_log10 (id int, expected decimal(38,10)) USING parquet
--- !query 7 schema
-struct<>
--- !query 7 output
-
-
-
--- !query 8
-CREATE TABLE num_exp_power_10_ln (id int, expected decimal(38,10)) USING parquet
--- !query 8 schema
-struct<>
--- !query 8 output
-
-
-
--- !query 9
-CREATE TABLE num_result (id1 int, id2 int, result decimal(38,10)) USING parquet
--- !query 9 schema
-struct<>
--- !query 9 output
-
-
-
--- !query 10
-INSERT INTO num_exp_add VALUES (0,0,'0')
--- !query 10 schema
-struct<>
--- !query 10 output
-
-
-
--- !query 11
-INSERT INTO num_exp_sub VALUES (0,0,'0')
--- !query 11 schema
-struct<>
--- !query 11 output
-
-
-
--- !query 12
-INSERT INTO num_exp_mul VALUES (0,0,'0')
--- !query 12 schema
-struct<>
--- !query 12 output
-
-
-
--- !query 13
-INSERT INTO num_exp_div VALUES (0,0,'NaN')
--- !query 13 schema
-struct<>
--- !query 13 output
-
-
-
--- !query 14
-INSERT INTO num_exp_add VALUES (0,1,'0')
--- !query 14 schema
-struct<>
--- !query 14 output
-
-
-
--- !query 15
-INSERT INTO num_exp_sub VALUES (0,1,'0')
--- !query 15 schema
-struct<>
--- !query 15 output
-
-
-
--- !query 16
-INSERT INTO num_exp_mul VALUES (0,1,'0')
--- !query 16 schema
-struct<>
--- !query 16 output
-
-
-
--- !query 17
-INSERT INTO num_exp_div VALUES (0,1,'NaN')
--- !query 17 schema
-struct<>
--- !query 17 output
-
-
-
--- !query 18
-INSERT INTO num_exp_add VALUES (0,2,'-34338492.215397047')
--- !query 18 schema
-struct<>
--- !query 18 output
-
-
-
--- !query 19
-INSERT INTO num_exp_sub VALUES (0,2,'34338492.215397047')
--- !query 19 schema
-struct<>
--- !query 19 output
-
-
-
--- !query 20
-INSERT INTO num_exp_mul VALUES (0,2,'0')
--- !query 20 schema
-struct<>
--- !query 20 output
-
-
-
--- !query 21
-INSERT INTO num_exp_div VALUES (0,2,'0')
--- !query 21 schema
-struct<>
--- !query 21 output
-
-
-
--- !query 22
-INSERT INTO num_exp_add VALUES (0,3,'4.31')
--- !query 22 schema
-struct<>
--- !query 22 output
-
-
-
--- !query 23
-INSERT INTO num_exp_sub VALUES (0,3,'-4.31')
--- !query 23 schema
-struct<>
--- !query 23 output
-
-
-
--- !query 24
-INSERT INTO num_exp_mul VALUES (0,3,'0')
--- !query 24 schema
-struct<>
--- !query 24 output
-
-
-
--- !query 25
-INSERT INTO num_exp_div VALUES (0,3,'0')
--- !query 25 schema
-struct<>
--- !query 25 output
-
-
-
--- !query 26
-INSERT INTO num_exp_add VALUES (0,4,'7799461.4119')
--- !query 26 schema
-struct<>
--- !query 26 output
-
-
-
--- !query 27
-INSERT INTO num_exp_sub VALUES (0,4,'-7799461.4119')
--- !query 27 schema
-struct<>
--- !query 27 output
-
-
-
--- !query 28
-INSERT INTO num_exp_mul VALUES (0,4,'0')
--- !query 28 schema
-struct<>
--- !query 28 output
-
-
-
--- !query 29
-INSERT INTO num_exp_div VALUES (0,4,'0')
--- !query 29 schema
-struct<>
--- !query 29 output
-
-
-
--- !query 30
-INSERT INTO num_exp_add VALUES (0,5,'16397.038491')
--- !query 30 schema
-struct<>
--- !query 30 output
-
-
-
--- !query 31
-INSERT INTO num_exp_sub VALUES (0,5,'-16397.038491')
--- !query 31 schema
-struct<>
--- !query 31 output
-
-
-
--- !query 32
-INSERT INTO num_exp_mul VALUES (0,5,'0')
--- !query 32 schema
-struct<>
--- !query 32 output
-
-
-
--- !query 33
-INSERT INTO num_exp_div VALUES (0,5,'0')
--- !query 33 schema
-struct<>
--- !query 33 output
-
-
-
--- !query 34
-INSERT INTO num_exp_add VALUES (0,6,'93901.57763026')
--- !query 34 schema
-struct<>
--- !query 34 output
-
-
-
--- !query 35
-INSERT INTO num_exp_sub VALUES (0,6,'-93901.57763026')
--- !query 35 schema
-struct<>
--- !query 35 output
-
-
-
--- !query 36
-INSERT INTO num_exp_mul VALUES (0,6,'0')
--- !query 36 schema
-struct<>
--- !query 36 output
-
-
-
--- !query 37
-INSERT INTO num_exp_div VALUES (0,6,'0')
--- !query 37 schema
-struct<>
--- !query 37 output
-
-
-
--- !query 38
-INSERT INTO num_exp_add VALUES (0,7,'-83028485')
--- !query 38 schema
-struct<>
--- !query 38 output
-
-
-
--- !query 39
-INSERT INTO num_exp_sub VALUES (0,7,'83028485')
--- !query 39 schema
-struct<>
--- !query 39 output
-
-
-
--- !query 40
-INSERT INTO num_exp_mul VALUES (0,7,'0')
--- !query 40 schema
-struct<>
--- !query 40 output
-
-
-
--- !query 41
-INSERT INTO num_exp_div VALUES (0,7,'0')
--- !query 41 schema
-struct<>
--- !query 41 output
-
-
-
--- !query 42
-INSERT INTO num_exp_add VALUES (0,8,'74881')
--- !query 42 schema
-struct<>
--- !query 42 output
-
-
-
--- !query 43
-INSERT INTO num_exp_sub VALUES (0,8,'-74881')
--- !query 43 schema
-struct<>
--- !query 43 output
-
-
-
--- !query 44
-INSERT INTO num_exp_mul VALUES (0,8,'0')
--- !query 44 schema
-struct<>
--- !query 44 output
-
-
-
--- !query 45
-INSERT INTO num_exp_div VALUES (0,8,'0')
--- !query 45 schema
-struct<>
--- !query 45 output
-
-
-
--- !query 46
-INSERT INTO num_exp_add VALUES (0,9,'-24926804.045047420')
--- !query 46 schema
-struct<>
--- !query 46 output
-
-
-
--- !query 47
-INSERT INTO num_exp_sub VALUES (0,9,'24926804.045047420')
--- !query 47 schema
-struct<>
--- !query 47 output
-
-
-
--- !query 48
-INSERT INTO num_exp_mul VALUES (0,9,'0')
--- !query 48 schema
-struct<>
--- !query 48 output
-
-
-
--- !query 49
-INSERT INTO num_exp_div VALUES (0,9,'0')
--- !query 49 schema
-struct<>
--- !query 49 output
-
-
-
--- !query 50
-INSERT INTO num_exp_add VALUES (1,0,'0')
--- !query 50 schema
-struct<>
--- !query 50 output
-
-
-
--- !query 51
-INSERT INTO num_exp_sub VALUES (1,0,'0')
--- !query 51 schema
-struct<>
--- !query 51 output
-
-
-
--- !query 52
-INSERT INTO num_exp_mul VALUES (1,0,'0')
--- !query 52 schema
-struct<>
--- !query 52 output
-
-
-
--- !query 53
-INSERT INTO num_exp_div VALUES (1,0,'NaN')
--- !query 53 schema
-struct<>
--- !query 53 output
-
-
-
--- !query 54
-INSERT INTO num_exp_add VALUES (1,1,'0')
--- !query 54 schema
-struct<>
--- !query 54 output
-
-
-
--- !query 55
-INSERT INTO num_exp_sub VALUES (1,1,'0')
--- !query 55 schema
-struct<>
--- !query 55 output
-
-
-
--- !query 56
-INSERT INTO num_exp_mul VALUES (1,1,'0')
--- !query 56 schema
-struct<>
--- !query 56 output
-
-
-
--- !query 57
-INSERT INTO num_exp_div VALUES (1,1,'NaN')
--- !query 57 schema
-struct<>
--- !query 57 output
-
-
-
--- !query 58
-INSERT INTO num_exp_add VALUES (1,2,'-34338492.215397047')
--- !query 58 schema
-struct<>
--- !query 58 output
-
-
-
--- !query 59
-INSERT INTO num_exp_sub VALUES (1,2,'34338492.215397047')
--- !query 59 schema
-struct<>
--- !query 59 output
-
-
-
--- !query 60
-INSERT INTO num_exp_mul VALUES (1,2,'0')
--- !query 60 schema
-struct<>
--- !query 60 output
-
-
-
--- !query 61
-INSERT INTO num_exp_div VALUES (1,2,'0')
--- !query 61 schema
-struct<>
--- !query 61 output
-
-
-
--- !query 62
-INSERT INTO num_exp_add VALUES (1,3,'4.31')
--- !query 62 schema
-struct<>
--- !query 62 output
-
-
-
--- !query 63
-INSERT INTO num_exp_sub VALUES (1,3,'-4.31')
--- !query 63 schema
-struct<>
--- !query 63 output
-
-
-
--- !query 64
-INSERT INTO num_exp_mul VALUES (1,3,'0')
--- !query 64 schema
-struct<>
--- !query 64 output
-
-
-
--- !query 65
-INSERT INTO num_exp_div VALUES (1,3,'0')
--- !query 65 schema
-struct<>
--- !query 65 output
-
-
-
--- !query 66
-INSERT INTO num_exp_add VALUES (1,4,'7799461.4119')
--- !query 66 schema
-struct<>
--- !query 66 output
-
-
-
--- !query 67
-INSERT INTO num_exp_sub VALUES (1,4,'-7799461.4119')
--- !query 67 schema
-struct<>
--- !query 67 output
-
-
-
--- !query 68
-INSERT INTO num_exp_mul VALUES (1,4,'0')
--- !query 68 schema
-struct<>
--- !query 68 output
-
-
-
--- !query 69
-INSERT INTO num_exp_div VALUES (1,4,'0')
--- !query 69 schema
-struct<>
--- !query 69 output
-
-
-
--- !query 70
-INSERT INTO num_exp_add VALUES (1,5,'16397.038491')
--- !query 70 schema
-struct<>
--- !query 70 output
-
-
-
--- !query 71
-INSERT INTO num_exp_sub VALUES (1,5,'-16397.038491')
--- !query 71 schema
-struct<>
--- !query 71 output
-
-
-
--- !query 72
-INSERT INTO num_exp_mul VALUES (1,5,'0')
--- !query 72 schema
-struct<>
--- !query 72 output
-
-
-
--- !query 73
-INSERT INTO num_exp_div VALUES (1,5,'0')
--- !query 73 schema
-struct<>
--- !query 73 output
-
-
-
--- !query 74
-INSERT INTO num_exp_add VALUES (1,6,'93901.57763026')
--- !query 74 schema
-struct<>
--- !query 74 output
-
-
-
--- !query 75
-INSERT INTO num_exp_sub VALUES (1,6,'-93901.57763026')
--- !query 75 schema
-struct<>
--- !query 75 output
-
-
-
--- !query 76
-INSERT INTO num_exp_mul VALUES (1,6,'0')
--- !query 76 schema
-struct<>
--- !query 76 output
-
-
-
--- !query 77
-INSERT INTO num_exp_div VALUES (1,6,'0')
--- !query 77 schema
-struct<>
--- !query 77 output
-
-
-
--- !query 78
-INSERT INTO num_exp_add VALUES (1,7,'-83028485')
--- !query 78 schema
-struct<>
--- !query 78 output
-
-
-
--- !query 79
-INSERT INTO num_exp_sub VALUES (1,7,'83028485')
--- !query 79 schema
-struct<>
--- !query 79 output
-
-
-
--- !query 80
-INSERT INTO num_exp_mul VALUES (1,7,'0')
--- !query 80 schema
-struct<>
--- !query 80 output
-
-
-
--- !query 81
-INSERT INTO num_exp_div VALUES (1,7,'0')
--- !query 81 schema
-struct<>
--- !query 81 output
-
-
-
--- !query 82
-INSERT INTO num_exp_add VALUES (1,8,'74881')
--- !query 82 schema
-struct<>
--- !query 82 output
-
-
-
--- !query 83
-INSERT INTO num_exp_sub VALUES (1,8,'-74881')
--- !query 83 schema
-struct<>
--- !query 83 output
-
-
-
--- !query 84
-INSERT INTO num_exp_mul VALUES (1,8,'0')
--- !query 84 schema
-struct<>
--- !query 84 output
-
-
-
--- !query 85
-INSERT INTO num_exp_div VALUES (1,8,'0')
--- !query 85 schema
-struct<>
--- !query 85 output
-
-
-
--- !query 86
-INSERT INTO num_exp_add VALUES (1,9,'-24926804.045047420')
--- !query 86 schema
-struct<>
--- !query 86 output
-
-
-
--- !query 87
-INSERT INTO num_exp_sub VALUES (1,9,'24926804.045047420')
--- !query 87 schema
-struct<>
--- !query 87 output
-
-
-
--- !query 88
-INSERT INTO num_exp_mul VALUES (1,9,'0')
--- !query 88 schema
-struct<>
--- !query 88 output
-
-
-
--- !query 89
-INSERT INTO num_exp_div VALUES (1,9,'0')
--- !query 89 schema
-struct<>
--- !query 89 output
-
-
-
--- !query 90
-INSERT INTO num_exp_add VALUES (2,0,'-34338492.215397047')
--- !query 90 schema
-struct<>
--- !query 90 output
-
-
-
--- !query 91
-INSERT INTO num_exp_sub VALUES (2,0,'-34338492.215397047')
--- !query 91 schema
-struct<>
--- !query 91 output
-
-
-
--- !query 92
-INSERT INTO num_exp_mul VALUES (2,0,'0')
--- !query 92 schema
-struct<>
--- !query 92 output
-
-
-
--- !query 93
-INSERT INTO num_exp_div VALUES (2,0,'NaN')
--- !query 93 schema
-struct<>
--- !query 93 output
-
-
-
--- !query 94
-INSERT INTO num_exp_add VALUES (2,1,'-34338492.215397047')
--- !query 94 schema
-struct<>
--- !query 94 output
-
-
-
--- !query 95
-INSERT INTO num_exp_sub VALUES (2,1,'-34338492.215397047')
--- !query 95 schema
-struct<>
--- !query 95 output
-
-
-
--- !query 96
-INSERT INTO num_exp_mul VALUES (2,1,'0')
--- !query 96 schema
-struct<>
--- !query 96 output
-
-
-
--- !query 97
-INSERT INTO num_exp_div VALUES (2,1,'NaN')
--- !query 97 schema
-struct<>
--- !query 97 output
-
-
-
--- !query 98
-INSERT INTO num_exp_add VALUES (2,2,'-68676984.430794094')
--- !query 98 schema
-struct<>
--- !query 98 output
-
-
-
--- !query 99
-INSERT INTO num_exp_sub VALUES (2,2,'0')
--- !query 99 schema
-struct<>
--- !query 99 output
-
-
-
--- !query 100
-INSERT INTO num_exp_mul VALUES (2,2,'1179132047626883.596862135856320209')
--- !query 100 schema
-struct<>
--- !query 100 output
-
-
-
--- !query 101
-INSERT INTO num_exp_div VALUES (2,2,'1.00000000000000000000')
--- !query 101 schema
-struct<>
--- !query 101 output
-
-
-
--- !query 102
-INSERT INTO num_exp_add VALUES (2,3,'-34338487.905397047')
--- !query 102 schema
-struct<>
--- !query 102 output
-
-
-
--- !query 103
-INSERT INTO num_exp_sub VALUES (2,3,'-34338496.525397047')
--- !query 103 schema
-struct<>
--- !query 103 output
-
-
-
--- !query 104
-INSERT INTO num_exp_mul VALUES (2,3,'-147998901.44836127257')
--- !query 104 schema
-struct<>
--- !query 104 output
-
-
-
--- !query 105
-INSERT INTO num_exp_div VALUES (2,3,'-7967167.56737750510440835266')
--- !query 105 schema
-struct<>
--- !query 105 output
-
-
-
--- !query 106
-INSERT INTO num_exp_add VALUES (2,4,'-26539030.803497047')
--- !query 106 schema
-struct<>
--- !query 106 output
-
-
-
--- !query 107
-INSERT INTO num_exp_sub VALUES (2,4,'-42137953.627297047')
--- !query 107 schema
-struct<>
--- !query 107 output
-
-
-
--- !query 108
-INSERT INTO num_exp_mul VALUES (2,4,'-267821744976817.8111137106593')
--- !query 108 schema
-struct<>
--- !query 108 output
-
-
-
--- !query 109
-INSERT INTO num_exp_div VALUES (2,4,'-4.40267480046830116685')
--- !query 109 schema
-struct<>
--- !query 109 output
-
-
-
--- !query 110
-INSERT INTO num_exp_add VALUES (2,5,'-34322095.176906047')
--- !query 110 schema
-struct<>
--- !query 110 output
-
-
-
--- !query 111
-INSERT INTO num_exp_sub VALUES (2,5,'-34354889.253888047')
--- !query 111 schema
-struct<>
--- !query 111 output
-
-
-
--- !query 112
-INSERT INTO num_exp_mul VALUES (2,5,'-563049578578.769242506736077')
--- !query 112 schema
-struct<>
--- !query 112 output
-
-
-
--- !query 113
-INSERT INTO num_exp_div VALUES (2,5,'-2094.18866914563535496429')
--- !query 113 schema
-struct<>
--- !query 113 output
-
-
-
--- !query 114
-INSERT INTO num_exp_add VALUES (2,6,'-34244590.637766787')
--- !query 114 schema
-struct<>
--- !query 114 output
-
-
-
--- !query 115
-INSERT INTO num_exp_sub VALUES (2,6,'-34432393.793027307')
--- !query 115 schema
-struct<>
--- !query 115 output
-
-
-
--- !query 116
-INSERT INTO num_exp_mul VALUES (2,6,'-3224438592470.18449811926184222')
--- !query 116 schema
-struct<>
--- !query 116 output
-
-
-
--- !query 117
-INSERT INTO num_exp_div VALUES (2,6,'-365.68599891479766440940')
--- !query 117 schema
-struct<>
--- !query 117 output
-
-
-
--- !query 118
-INSERT INTO num_exp_add VALUES (2,7,'-117366977.215397047')
--- !query 118 schema
-struct<>
--- !query 118 output
-
-
-
--- !query 119
-INSERT INTO num_exp_sub VALUES (2,7,'48689992.784602953')
--- !query 119 schema
-struct<>
--- !query 119 output
-
-
-
--- !query 120
-INSERT INTO num_exp_mul VALUES (2,7,'2851072985828710.485883795')
--- !query 120 schema
-struct<>
--- !query 120 output
-
-
-
--- !query 121
-INSERT INTO num_exp_div VALUES (2,7,'.41357483778485235518')
--- !query 121 schema
-struct<>
--- !query 121 output
-
-
-
--- !query 122
-INSERT INTO num_exp_add VALUES (2,8,'-34263611.215397047')
--- !query 122 schema
-struct<>
--- !query 122 output
-
-
-
--- !query 123
-INSERT INTO num_exp_sub VALUES (2,8,'-34413373.215397047')
--- !query 123 schema
-struct<>
--- !query 123 output
-
-
-
--- !query 124
-INSERT INTO num_exp_mul VALUES (2,8,'-2571300635581.146276407')
--- !query 124 schema
-struct<>
--- !query 124 output
-
-
-
--- !query 125
-INSERT INTO num_exp_div VALUES (2,8,'-458.57416721727870888476')
--- !query 125 schema
-struct<>
--- !query 125 output
-
-
-
--- !query 126
-INSERT INTO num_exp_add VALUES (2,9,'-59265296.260444467')
--- !query 126 schema
-struct<>
--- !query 126 output
-
-
-
--- !query 127
-INSERT INTO num_exp_sub VALUES (2,9,'-9411688.170349627')
--- !query 127 schema
-struct<>
--- !query 127 output
-
-
-
--- !query 128
-INSERT INTO num_exp_mul VALUES (2,9,'855948866655588.453741509242968740')
--- !query 128 schema
-struct<>
--- !query 128 output
-
-
-
--- !query 129
-INSERT INTO num_exp_div VALUES (2,9,'1.37757299946438931811')
--- !query 129 schema
-struct<>
--- !query 129 output
-
-
-
--- !query 130
-INSERT INTO num_exp_add VALUES (3,0,'4.31')
--- !query 130 schema
-struct<>
--- !query 130 output
-
-
-
--- !query 131
-INSERT INTO num_exp_sub VALUES (3,0,'4.31')
--- !query 131 schema
-struct<>
--- !query 131 output
-
-
-
--- !query 132
-INSERT INTO num_exp_mul VALUES (3,0,'0')
--- !query 132 schema
-struct<>
--- !query 132 output
-
-
-
--- !query 133
-INSERT INTO num_exp_div VALUES (3,0,'NaN')
--- !query 133 schema
-struct<>
--- !query 133 output
-
-
-
--- !query 134
-INSERT INTO num_exp_add VALUES (3,1,'4.31')
--- !query 134 schema
-struct<>
--- !query 134 output
-
-
-
--- !query 135
-INSERT INTO num_exp_sub VALUES (3,1,'4.31')
--- !query 135 schema
-struct<>
--- !query 135 output
-
-
-
--- !query 136
-INSERT INTO num_exp_mul VALUES (3,1,'0')
--- !query 136 schema
-struct<>
--- !query 136 output
-
-
-
--- !query 137
-INSERT INTO num_exp_div VALUES (3,1,'NaN')
--- !query 137 schema
-struct<>
--- !query 137 output
-
-
-
--- !query 138
-INSERT INTO num_exp_add VALUES (3,2,'-34338487.905397047')
--- !query 138 schema
-struct<>
--- !query 138 output
-
-
-
--- !query 139
-INSERT INTO num_exp_sub VALUES (3,2,'34338496.525397047')
--- !query 139 schema
-struct<>
--- !query 139 output
-
-
-
--- !query 140
-INSERT INTO num_exp_mul VALUES (3,2,'-147998901.44836127257')
--- !query 140 schema
-struct<>
--- !query 140 output
-
-
-
--- !query 141
-INSERT INTO num_exp_div VALUES (3,2,'-.00000012551512084352')
--- !query 141 schema
-struct<>
--- !query 141 output
-
-
-
--- !query 142
-INSERT INTO num_exp_add VALUES (3,3,'8.62')
--- !query 142 schema
-struct<>
--- !query 142 output
-
-
-
--- !query 143
-INSERT INTO num_exp_sub VALUES (3,3,'0')
--- !query 143 schema
-struct<>
--- !query 143 output
-
-
-
--- !query 144
-INSERT INTO num_exp_mul VALUES (3,3,'18.5761')
--- !query 144 schema
-struct<>
--- !query 144 output
-
-
-
--- !query 145
-INSERT INTO num_exp_div VALUES (3,3,'1.00000000000000000000')
--- !query 145 schema
-struct<>
--- !query 145 output
-
-
-
--- !query 146
-INSERT INTO num_exp_add VALUES (3,4,'7799465.7219')
--- !query 146 schema
-struct<>
--- !query 146 output
-
-
-
--- !query 147
-INSERT INTO num_exp_sub VALUES (3,4,'-7799457.1019')
--- !query 147 schema
-struct<>
--- !query 147 output
-
-
-
--- !query 148
-INSERT INTO num_exp_mul VALUES (3,4,'33615678.685289')
--- !query 148 schema
-struct<>
--- !query 148 output
-
-
-
--- !query 149
-INSERT INTO num_exp_div VALUES (3,4,'.00000055260225961552')
--- !query 149 schema
-struct<>
--- !query 149 output
-
-
-
--- !query 150
-INSERT INTO num_exp_add VALUES (3,5,'16401.348491')
--- !query 150 schema
-struct<>
--- !query 150 output
-
-
-
--- !query 151
-INSERT INTO num_exp_sub VALUES (3,5,'-16392.728491')
--- !query 151 schema
-struct<>
--- !query 151 output
-
-
-
--- !query 152
-INSERT INTO num_exp_mul VALUES (3,5,'70671.23589621')
--- !query 152 schema
-struct<>
--- !query 152 output
-
-
-
--- !query 153
-INSERT INTO num_exp_div VALUES (3,5,'.00026285234387695504')
--- !query 153 schema
-struct<>
--- !query 153 output
-
-
-
--- !query 154
-INSERT INTO num_exp_add VALUES (3,6,'93905.88763026')
--- !query 154 schema
-struct<>
--- !query 154 output
-
-
-
--- !query 155
-INSERT INTO num_exp_sub VALUES (3,6,'-93897.26763026')
--- !query 155 schema
-struct<>
--- !query 155 output
-
-
-
--- !query 156
-INSERT INTO num_exp_mul VALUES (3,6,'404715.7995864206')
--- !query 156 schema
-struct<>
--- !query 156 output
-
-
-
--- !query 157
-INSERT INTO num_exp_div VALUES (3,6,'.00004589912234457595')
--- !query 157 schema
-struct<>
--- !query 157 output
-
-
-
--- !query 158
-INSERT INTO num_exp_add VALUES (3,7,'-83028480.69')
--- !query 158 schema
-struct<>
--- !query 158 output
-
-
-
--- !query 159
-INSERT INTO num_exp_sub VALUES (3,7,'83028489.31')
--- !query 159 schema
-struct<>
--- !query 159 output
-
-
-
--- !query 160
-INSERT INTO num_exp_mul VALUES (3,7,'-357852770.35')
--- !query 160 schema
-struct<>
--- !query 160 output
-
-
-
--- !query 161
-INSERT INTO num_exp_div VALUES (3,7,'-.00000005190989574240')
--- !query 161 schema
-struct<>
--- !query 161 output
-
-
-
--- !query 162
-INSERT INTO num_exp_add VALUES (3,8,'74885.31')
--- !query 162 schema
-struct<>
--- !query 162 output
-
-
-
--- !query 163
-INSERT INTO num_exp_sub VALUES (3,8,'-74876.69')
--- !query 163 schema
-struct<>
--- !query 163 output
-
-
-
--- !query 164
-INSERT INTO num_exp_mul VALUES (3,8,'322737.11')
--- !query 164 schema
-struct<>
--- !query 164 output
-
-
-
--- !query 165
-INSERT INTO num_exp_div VALUES (3,8,'.00005755799201399553')
--- !query 165 schema
-struct<>
--- !query 165 output
-
-
-
--- !query 166
-INSERT INTO num_exp_add VALUES (3,9,'-24926799.735047420')
--- !query 166 schema
-struct<>
--- !query 166 output
-
-
-
--- !query 167
-INSERT INTO num_exp_sub VALUES (3,9,'24926808.355047420')
--- !query 167 schema
-struct<>
--- !query 167 output
-
-
-
--- !query 168
-INSERT INTO num_exp_mul VALUES (3,9,'-107434525.43415438020')
--- !query 168 schema
-struct<>
--- !query 168 output
-
-
-
--- !query 169
-INSERT INTO num_exp_div VALUES (3,9,'-.00000017290624149854')
--- !query 169 schema
-struct<>
--- !query 169 output
-
-
-
--- !query 170
-INSERT INTO num_exp_add VALUES (4,0,'7799461.4119')
--- !query 170 schema
-struct<>
--- !query 170 output
-
-
-
--- !query 171
-INSERT INTO num_exp_sub VALUES (4,0,'7799461.4119')
--- !query 171 schema
-struct<>
--- !query 171 output
-
-
-
--- !query 172
-INSERT INTO num_exp_mul VALUES (4,0,'0')
--- !query 172 schema
-struct<>
--- !query 172 output
-
-
-
--- !query 173
-INSERT INTO num_exp_div VALUES (4,0,'NaN')
--- !query 173 schema
-struct<>
--- !query 173 output
-
-
-
--- !query 174
-INSERT INTO num_exp_add VALUES (4,1,'7799461.4119')
--- !query 174 schema
-struct<>
--- !query 174 output
-
-
-
--- !query 175
-INSERT INTO num_exp_sub VALUES (4,1,'7799461.4119')
--- !query 175 schema
-struct<>
--- !query 175 output
-
-
-
--- !query 176
-INSERT INTO num_exp_mul VALUES (4,1,'0')
--- !query 176 schema
-struct<>
--- !query 176 output
-
-
-
--- !query 177
-INSERT INTO num_exp_div VALUES (4,1,'NaN')
--- !query 177 schema
-struct<>
--- !query 177 output
-
-
-
--- !query 178
-INSERT INTO num_exp_add VALUES (4,2,'-26539030.803497047')
--- !query 178 schema
-struct<>
--- !query 178 output
-
-
-
--- !query 179
-INSERT INTO num_exp_sub VALUES (4,2,'42137953.627297047')
--- !query 179 schema
-struct<>
--- !query 179 output
-
-
-
--- !query 180
-INSERT INTO num_exp_mul VALUES (4,2,'-267821744976817.8111137106593')
--- !query 180 schema
-struct<>
--- !query 180 output
-
-
-
--- !query 181
-INSERT INTO num_exp_div VALUES (4,2,'-.22713465002993920385')
--- !query 181 schema
-struct<>
--- !query 181 output
-
-
-
--- !query 182
-INSERT INTO num_exp_add VALUES (4,3,'7799465.7219')
--- !query 182 schema
-struct<>
--- !query 182 output
-
-
-
--- !query 183
-INSERT INTO num_exp_sub VALUES (4,3,'7799457.1019')
--- !query 183 schema
-struct<>
--- !query 183 output
-
-
-
--- !query 184
-INSERT INTO num_exp_mul VALUES (4,3,'33615678.685289')
--- !query 184 schema
-struct<>
--- !query 184 output
-
-
-
--- !query 185
-INSERT INTO num_exp_div VALUES (4,3,'1809619.81714617169373549883')
--- !query 185 schema
-struct<>
--- !query 185 output
-
-
-
--- !query 186
-INSERT INTO num_exp_add VALUES (4,4,'15598922.8238')
--- !query 186 schema
-struct<>
--- !query 186 output
-
-
-
--- !query 187
-INSERT INTO num_exp_sub VALUES (4,4,'0')
--- !query 187 schema
-struct<>
--- !query 187 output
-
-
-
--- !query 188
-INSERT INTO num_exp_mul VALUES (4,4,'60831598315717.14146161')
--- !query 188 schema
-struct<>
--- !query 188 output
-
-
-
--- !query 189
-INSERT INTO num_exp_div VALUES (4,4,'1.00000000000000000000')
--- !query 189 schema
-struct<>
--- !query 189 output
-
-
-
--- !query 190
-INSERT INTO num_exp_add VALUES (4,5,'7815858.450391')
--- !query 190 schema
-struct<>
--- !query 190 output
-
-
-
--- !query 191
-INSERT INTO num_exp_sub VALUES (4,5,'7783064.373409')
--- !query 191 schema
-struct<>
--- !query 191 output
-
-
-
--- !query 192
-INSERT INTO num_exp_mul VALUES (4,5,'127888068979.9935054429')
--- !query 192 schema
-struct<>
--- !query 192 output
-
-
-
--- !query 193
-INSERT INTO num_exp_div VALUES (4,5,'475.66281046305802686061')
--- !query 193 schema
-struct<>
--- !query 193 output
-
-
-
--- !query 194
-INSERT INTO num_exp_add VALUES (4,6,'7893362.98953026')
--- !query 194 schema
-struct<>
--- !query 194 output
-
-
-
--- !query 195
-INSERT INTO num_exp_sub VALUES (4,6,'7705559.83426974')
--- !query 195 schema
-struct<>
--- !query 195 output
-
-
-
--- !query 196
-INSERT INTO num_exp_mul VALUES (4,6,'732381731243.745115764094')
--- !query 196 schema
-struct<>
--- !query 196 output
-
-
-
--- !query 197
-INSERT INTO num_exp_div VALUES (4,6,'83.05996138436129499606')
--- !query 197 schema
-struct<>
--- !query 197 output
-
-
-
--- !query 198
-INSERT INTO num_exp_add VALUES (4,7,'-75229023.5881')
--- !query 198 schema
-struct<>
--- !query 198 output
-
-
-
--- !query 199
-INSERT INTO num_exp_sub VALUES (4,7,'90827946.4119')
--- !query 199 schema
-struct<>
--- !query 199 output
-
-
-
--- !query 200
-INSERT INTO num_exp_mul VALUES (4,7,'-647577464846017.9715')
--- !query 200 schema
-struct<>
--- !query 200 output
-
-
-
--- !query 201
-INSERT INTO num_exp_div VALUES (4,7,'-.09393717604145131637')
--- !query 201 schema
-struct<>
--- !query 201 output
-
-
-
--- !query 202
-INSERT INTO num_exp_add VALUES (4,8,'7874342.4119')
--- !query 202 schema
-struct<>
--- !query 202 output
-
-
-
--- !query 203
-INSERT INTO num_exp_sub VALUES (4,8,'7724580.4119')
--- !query 203 schema
-struct<>
--- !query 203 output
-
-
-
--- !query 204
-INSERT INTO num_exp_mul VALUES (4,8,'584031469984.4839')
--- !query 204 schema
-struct<>
--- !query 204 output
-
-
-
--- !query 205
-INSERT INTO num_exp_div VALUES (4,8,'104.15808298366741897143')
--- !query 205 schema
-struct<>
--- !query 205 output
-
-
-
--- !query 206
-INSERT INTO num_exp_add VALUES (4,9,'-17127342.633147420')
--- !query 206 schema
-struct<>
--- !query 206 output
-
-
-
--- !query 207
-INSERT INTO num_exp_sub VALUES (4,9,'32726265.456947420')
--- !query 207 schema
-struct<>
--- !query 207 output
-
-
-
--- !query 208
-INSERT INTO num_exp_mul VALUES (4,9,'-194415646271340.1815956522980')
--- !query 208 schema
-struct<>
--- !query 208 output
-
-
-
--- !query 209
-INSERT INTO num_exp_div VALUES (4,9,'-.31289456112403769409')
--- !query 209 schema
-struct<>
--- !query 209 output
-
-
-
--- !query 210
-INSERT INTO num_exp_add VALUES (5,0,'16397.038491')
--- !query 210 schema
-struct<>
--- !query 210 output
-
-
-
--- !query 211
-INSERT INTO num_exp_sub VALUES (5,0,'16397.038491')
--- !query 211 schema
-struct<>
--- !query 211 output
-
-
-
--- !query 212
-INSERT INTO num_exp_mul VALUES (5,0,'0')
--- !query 212 schema
-struct<>
--- !query 212 output
-
-
-
--- !query 213
-INSERT INTO num_exp_div VALUES (5,0,'NaN')
--- !query 213 schema
-struct<>
--- !query 213 output
-
-
-
--- !query 214
-INSERT INTO num_exp_add VALUES (5,1,'16397.038491')
--- !query 214 schema
-struct<>
--- !query 214 output
-
-
-
--- !query 215
-INSERT INTO num_exp_sub VALUES (5,1,'16397.038491')
--- !query 215 schema
-struct<>
--- !query 215 output
-
-
-
--- !query 216
-INSERT INTO num_exp_mul VALUES (5,1,'0')
--- !query 216 schema
-struct<>
--- !query 216 output
-
-
-
--- !query 217
-INSERT INTO num_exp_div VALUES (5,1,'NaN')
--- !query 217 schema
-struct<>
--- !query 217 output
-
-
-
--- !query 218
-INSERT INTO num_exp_add VALUES (5,2,'-34322095.176906047')
--- !query 218 schema
-struct<>
--- !query 218 output
-
-
-
--- !query 219
-INSERT INTO num_exp_sub VALUES (5,2,'34354889.253888047')
--- !query 219 schema
-struct<>
--- !query 219 output
-
-
-
--- !query 220
-INSERT INTO num_exp_mul VALUES (5,2,'-563049578578.769242506736077')
--- !query 220 schema
-struct<>
--- !query 220 output
-
-
-
--- !query 221
-INSERT INTO num_exp_div VALUES (5,2,'-.00047751189505192446')
--- !query 221 schema
-struct<>
--- !query 221 output
-
-
-
--- !query 222
-INSERT INTO num_exp_add VALUES (5,3,'16401.348491')
--- !query 222 schema
-struct<>
--- !query 222 output
-
-
-
--- !query 223
-INSERT INTO num_exp_sub VALUES (5,3,'16392.728491')
--- !query 223 schema
-struct<>
--- !query 223 output
-
-
-
--- !query 224
-INSERT INTO num_exp_mul VALUES (5,3,'70671.23589621')
--- !query 224 schema
-struct<>
--- !query 224 output
-
-
-
--- !query 225
-INSERT INTO num_exp_div VALUES (5,3,'3804.41728329466357308584')
--- !query 225 schema
-struct<>
--- !query 225 output
-
-
-
--- !query 226
-INSERT INTO num_exp_add VALUES (5,4,'7815858.450391')
--- !query 226 schema
-struct<>
--- !query 226 output
-
-
-
--- !query 227
-INSERT INTO num_exp_sub VALUES (5,4,'-7783064.373409')
--- !query 227 schema
-struct<>
--- !query 227 output
-
-
-
--- !query 228
-INSERT INTO num_exp_mul VALUES (5,4,'127888068979.9935054429')
--- !query 228 schema
-struct<>
--- !query 228 output
-
-
-
--- !query 229
-INSERT INTO num_exp_div VALUES (5,4,'.00210232958726897192')
--- !query 229 schema
-struct<>
--- !query 229 output
-
-
-
--- !query 230
-INSERT INTO num_exp_add VALUES (5,5,'32794.076982')
--- !query 230 schema
-struct<>
--- !query 230 output
-
-
-
--- !query 231
-INSERT INTO num_exp_sub VALUES (5,5,'0')
--- !query 231 schema
-struct<>
--- !query 231 output
-
-
-
--- !query 232
-INSERT INTO num_exp_mul VALUES (5,5,'268862871.275335557081')
--- !query 232 schema
-struct<>
--- !query 232 output
-
-
-
--- !query 233
-INSERT INTO num_exp_div VALUES (5,5,'1.00000000000000000000')
--- !query 233 schema
-struct<>
--- !query 233 output
-
-
-
--- !query 234
-INSERT INTO num_exp_add VALUES (5,6,'110298.61612126')
--- !query 234 schema
-struct<>
--- !query 234 output
-
-
-
--- !query 235
-INSERT INTO num_exp_sub VALUES (5,6,'-77504.53913926')
--- !query 235 schema
-struct<>
--- !query 235 output
-
-
-
--- !query 236
-INSERT INTO num_exp_mul VALUES (5,6,'1539707782.76899778633766')
--- !query 236 schema
-struct<>
--- !query 236 output
-
-
-
--- !query 237
-INSERT INTO num_exp_div VALUES (5,6,'.17461941433576102689')
--- !query 237 schema
-struct<>
--- !query 237 output
-
-
-
--- !query 238
-INSERT INTO num_exp_add VALUES (5,7,'-83012087.961509')
--- !query 238 schema
-struct<>
--- !query 238 output
-
-
-
--- !query 239
-INSERT INTO num_exp_sub VALUES (5,7,'83044882.038491')
--- !query 239 schema
-struct<>
--- !query 239 output
-
-
-
--- !query 240
-INSERT INTO num_exp_mul VALUES (5,7,'-1361421264394.416135')
--- !query 240 schema
-struct<>
--- !query 240 output
-
-
-
--- !query 241
-INSERT INTO num_exp_div VALUES (5,7,'-.00019748690453643710')
--- !query 241 schema
-struct<>
--- !query 241 output
-
-
-
--- !query 242
-INSERT INTO num_exp_add VALUES (5,8,'91278.038491')
--- !query 242 schema
-struct<>
--- !query 242 output
-
-
-
--- !query 243
-INSERT INTO num_exp_sub VALUES (5,8,'-58483.961509')
--- !query 243 schema
-struct<>
--- !query 243 output
-
-
-
--- !query 244
-INSERT INTO num_exp_mul VALUES (5,8,'1227826639.244571')
--- !query 244 schema
-struct<>
--- !query 244 output
-
-
-
--- !query 245
-INSERT INTO num_exp_div VALUES (5,8,'.21897461960978085228')
--- !query 245 schema
-struct<>
--- !query 245 output
-
-
-
--- !query 246
-INSERT INTO num_exp_add VALUES (5,9,'-24910407.006556420')
--- !query 246 schema
-struct<>
--- !query 246 output
-
-
-
--- !query 247
-INSERT INTO num_exp_sub VALUES (5,9,'24943201.083538420')
--- !query 247 schema
-struct<>
--- !query 247 output
-
-
-
--- !query 248
-INSERT INTO num_exp_mul VALUES (5,9,'-408725765384.257043660243220')
--- !query 248 schema
-struct<>
--- !query 248 output
-
-
-
--- !query 249
-INSERT INTO num_exp_div VALUES (5,9,'-.00065780749354660427')
--- !query 249 schema
-struct<>
--- !query 249 output
-
-
-
--- !query 250
-INSERT INTO num_exp_add VALUES (6,0,'93901.57763026')
--- !query 250 schema
-struct<>
--- !query 250 output
-
-
-
--- !query 251
-INSERT INTO num_exp_sub VALUES (6,0,'93901.57763026')
--- !query 251 schema
-struct<>
--- !query 251 output
-
-
-
--- !query 252
-INSERT INTO num_exp_mul VALUES (6,0,'0')
--- !query 252 schema
-struct<>
--- !query 252 output
-
-
-
--- !query 253
-INSERT INTO num_exp_div VALUES (6,0,'NaN')
--- !query 253 schema
-struct<>
--- !query 253 output
-
-
-
--- !query 254
-INSERT INTO num_exp_add VALUES (6,1,'93901.57763026')
--- !query 254 schema
-struct<>
--- !query 254 output
-
-
-
--- !query 255
-INSERT INTO num_exp_sub VALUES (6,1,'93901.57763026')
--- !query 255 schema
-struct<>
--- !query 255 output
-
-
-
--- !query 256
-INSERT INTO num_exp_mul VALUES (6,1,'0')
--- !query 256 schema
-struct<>
--- !query 256 output
-
-
-
--- !query 257
-INSERT INTO num_exp_div VALUES (6,1,'NaN')
--- !query 257 schema
-struct<>
--- !query 257 output
-
-
-
--- !query 258
-INSERT INTO num_exp_add VALUES (6,2,'-34244590.637766787')
--- !query 258 schema
-struct<>
--- !query 258 output
-
-
-
--- !query 259
-INSERT INTO num_exp_sub VALUES (6,2,'34432393.793027307')
--- !query 259 schema
-struct<>
--- !query 259 output
-
-
-
--- !query 260
-INSERT INTO num_exp_mul VALUES (6,2,'-3224438592470.18449811926184222')
--- !query 260 schema
-struct<>
--- !query 260 output
-
-
-
--- !query 261
-INSERT INTO num_exp_div VALUES (6,2,'-.00273458651128995823')
--- !query 261 schema
-struct<>
--- !query 261 output
-
-
-
--- !query 262
-INSERT INTO num_exp_add VALUES (6,3,'93905.88763026')
--- !query 262 schema
-struct<>
--- !query 262 output
-
-
-
--- !query 263
-INSERT INTO num_exp_sub VALUES (6,3,'93897.26763026')
--- !query 263 schema
-struct<>
--- !query 263 output
-
-
-
--- !query 264
-INSERT INTO num_exp_mul VALUES (6,3,'404715.7995864206')
--- !query 264 schema
-struct<>
--- !query 264 output
-
-
-
--- !query 265
-INSERT INTO num_exp_div VALUES (6,3,'21786.90896293735498839907')
--- !query 265 schema
-struct<>
--- !query 265 output
-
-
-
--- !query 266
-INSERT INTO num_exp_add VALUES (6,4,'7893362.98953026')
--- !query 266 schema
-struct<>
--- !query 266 output
-
-
-
--- !query 267
-INSERT INTO num_exp_sub VALUES (6,4,'-7705559.83426974')
--- !query 267 schema
-struct<>
--- !query 267 output
-
-
-
--- !query 268
-INSERT INTO num_exp_mul VALUES (6,4,'732381731243.745115764094')
--- !query 268 schema
-struct<>
--- !query 268 output
-
-
-
--- !query 269
-INSERT INTO num_exp_div VALUES (6,4,'.01203949512295682469')
--- !query 269 schema
-struct<>
--- !query 269 output
-
-
-
--- !query 270
-INSERT INTO num_exp_add VALUES (6,5,'110298.61612126')
--- !query 270 schema
-struct<>
--- !query 270 output
-
-
-
--- !query 271
-INSERT INTO num_exp_sub VALUES (6,5,'77504.53913926')
--- !query 271 schema
-struct<>
--- !query 271 output
-
-
-
--- !query 272
-INSERT INTO num_exp_mul VALUES (6,5,'1539707782.76899778633766')
--- !query 272 schema
-struct<>
--- !query 272 output
-
-
-
--- !query 273
-INSERT INTO num_exp_div VALUES (6,5,'5.72674008674192359679')
--- !query 273 schema
-struct<>
--- !query 273 output
-
-
-
--- !query 274
-INSERT INTO num_exp_add VALUES (6,6,'187803.15526052')
--- !query 274 schema
-struct<>
--- !query 274 output
-
-
-
--- !query 275
-INSERT INTO num_exp_sub VALUES (6,6,'0')
--- !query 275 schema
-struct<>
--- !query 275 output
-
-
-
--- !query 276
-INSERT INTO num_exp_mul VALUES (6,6,'8817506281.4517452372676676')
--- !query 276 schema
-struct<>
--- !query 276 output
-
-
-
--- !query 277
-INSERT INTO num_exp_div VALUES (6,6,'1.00000000000000000000')
--- !query 277 schema
-struct<>
--- !query 277 output
-
-
-
--- !query 278
-INSERT INTO num_exp_add VALUES (6,7,'-82934583.42236974')
--- !query 278 schema
-struct<>
--- !query 278 output
-
-
-
--- !query 279
-INSERT INTO num_exp_sub VALUES (6,7,'83122386.57763026')
--- !query 279 schema
-struct<>
--- !query 279 output
-
-
-
--- !query 280
-INSERT INTO num_exp_mul VALUES (6,7,'-7796505729750.37795610')
--- !query 280 schema
-struct<>
--- !query 280 output
-
-
-
--- !query 281
-INSERT INTO num_exp_div VALUES (6,7,'-.00113095617281538980')
--- !query 281 schema
-struct<>
--- !query 281 output
-
-
-
--- !query 282
-INSERT INTO num_exp_add VALUES (6,8,'168782.57763026')
--- !query 282 schema
-struct<>
--- !query 282 output
-
-
-
--- !query 283
-INSERT INTO num_exp_sub VALUES (6,8,'19020.57763026')
--- !query 283 schema
-struct<>
--- !query 283 output
-
-
-
--- !query 284
-INSERT INTO num_exp_mul VALUES (6,8,'7031444034.53149906')
--- !query 284 schema
-struct<>
--- !query 284 output
-
-
-
--- !query 285
-INSERT INTO num_exp_div VALUES (6,8,'1.25401073209839612184')
--- !query 285 schema
-struct<>
--- !query 285 output
-
-
-
--- !query 286
-INSERT INTO num_exp_add VALUES (6,9,'-24832902.467417160')
--- !query 286 schema
-struct<>
--- !query 286 output
-
-
-
--- !query 287
-INSERT INTO num_exp_sub VALUES (6,9,'25020705.622677680')
--- !query 287 schema
-struct<>
--- !query 287 output
-
-
-
--- !query 288
-INSERT INTO num_exp_mul VALUES (6,9,'-2340666225110.29929521292692920')
--- !query 288 schema
-struct<>
--- !query 288 output
-
-
-
--- !query 289
-INSERT INTO num_exp_div VALUES (6,9,'-.00376709254265256789')
--- !query 289 schema
-struct<>
--- !query 289 output
-
-
-
--- !query 290
-INSERT INTO num_exp_add VALUES (7,0,'-83028485')
--- !query 290 schema
-struct<>
--- !query 290 output
-
-
-
--- !query 291
-INSERT INTO num_exp_sub VALUES (7,0,'-83028485')
--- !query 291 schema
-struct<>
--- !query 291 output
-
-
-
--- !query 292
-INSERT INTO num_exp_mul VALUES (7,0,'0')
--- !query 292 schema
-struct<>
--- !query 292 output
-
-
-
--- !query 293
-INSERT INTO num_exp_div VALUES (7,0,'NaN')
--- !query 293 schema
-struct<>
--- !query 293 output
-
-
-
--- !query 294
-INSERT INTO num_exp_add VALUES (7,1,'-83028485')
--- !query 294 schema
-struct<>
--- !query 294 output
-
-
-
--- !query 295
-INSERT INTO num_exp_sub VALUES (7,1,'-83028485')
--- !query 295 schema
-struct<>
--- !query 295 output
-
-
-
--- !query 296
-INSERT INTO num_exp_mul VALUES (7,1,'0')
--- !query 296 schema
-struct<>
--- !query 296 output
-
-
-
--- !query 297
-INSERT INTO num_exp_div VALUES (7,1,'NaN')
--- !query 297 schema
-struct<>
--- !query 297 output
-
-
-
--- !query 298
-INSERT INTO num_exp_add VALUES (7,2,'-117366977.215397047')
--- !query 298 schema
-struct<>
--- !query 298 output
-
-
-
--- !query 299
-INSERT INTO num_exp_sub VALUES (7,2,'-48689992.784602953')
--- !query 299 schema
-struct<>
--- !query 299 output
-
-
-
--- !query 300
-INSERT INTO num_exp_mul VALUES (7,2,'2851072985828710.485883795')
--- !query 300 schema
-struct<>
--- !query 300 output
-
-
-
--- !query 301
-INSERT INTO num_exp_div VALUES (7,2,'2.41794207151503385700')
--- !query 301 schema
-struct<>
--- !query 301 output
-
-
-
--- !query 302
-INSERT INTO num_exp_add VALUES (7,3,'-83028480.69')
--- !query 302 schema
-struct<>
--- !query 302 output
-
-
-
--- !query 303
-INSERT INTO num_exp_sub VALUES (7,3,'-83028489.31')
--- !query 303 schema
-struct<>
--- !query 303 output
-
-
-
--- !query 304
-INSERT INTO num_exp_mul VALUES (7,3,'-357852770.35')
--- !query 304 schema
-struct<>
--- !query 304 output
-
-
-
--- !query 305
-INSERT INTO num_exp_div VALUES (7,3,'-19264149.65197215777262180974')
--- !query 305 schema
-struct<>
--- !query 305 output
-
-
-
--- !query 306
-INSERT INTO num_exp_add VALUES (7,4,'-75229023.5881')
--- !query 306 schema
-struct<>
--- !query 306 output
-
-
-
--- !query 307
-INSERT INTO num_exp_sub VALUES (7,4,'-90827946.4119')
--- !query 307 schema
-struct<>
--- !query 307 output
-
-
-
--- !query 308
-INSERT INTO num_exp_mul VALUES (7,4,'-647577464846017.9715')
--- !query 308 schema
-struct<>
--- !query 308 output
-
-
-
--- !query 309
-INSERT INTO num_exp_div VALUES (7,4,'-10.64541262725136247686')
--- !query 309 schema
-struct<>
--- !query 309 output
-
-
-
--- !query 310
-INSERT INTO num_exp_add VALUES (7,5,'-83012087.961509')
--- !query 310 schema
-struct<>
--- !query 310 output
-
-
-
--- !query 311
-INSERT INTO num_exp_sub VALUES (7,5,'-83044882.038491')
--- !query 311 schema
-struct<>
--- !query 311 output
-
-
-
--- !query 312
-INSERT INTO num_exp_mul VALUES (7,5,'-1361421264394.416135')
--- !query 312 schema
-struct<>
--- !query 312 output
-
-
-
--- !query 313
-INSERT INTO num_exp_div VALUES (7,5,'-5063.62688881730941836574')
--- !query 313 schema
-struct<>
--- !query 313 output
-
-
-
--- !query 314
-INSERT INTO num_exp_add VALUES (7,6,'-82934583.42236974')
--- !query 314 schema
-struct<>
--- !query 314 output
-
-
-
--- !query 315
-INSERT INTO num_exp_sub VALUES (7,6,'-83122386.57763026')
--- !query 315 schema
-struct<>
--- !query 315 output
-
-
-
--- !query 316
-INSERT INTO num_exp_mul VALUES (7,6,'-7796505729750.37795610')
--- !query 316 schema
-struct<>
--- !query 316 output
-
-
-
--- !query 317
-INSERT INTO num_exp_div VALUES (7,6,'-884.20756174009028770294')
--- !query 317 schema
-struct<>
--- !query 317 output
-
-
-
--- !query 318
-INSERT INTO num_exp_add VALUES (7,7,'-166056970')
--- !query 318 schema
-struct<>
--- !query 318 output
-
-
-
--- !query 319
-INSERT INTO num_exp_sub VALUES (7,7,'0')
--- !query 319 schema
-struct<>
--- !query 319 output
-
-
-
--- !query 320
-INSERT INTO num_exp_mul VALUES (7,7,'6893729321395225')
--- !query 320 schema
-struct<>
--- !query 320 output
-
-
-
--- !query 321
-INSERT INTO num_exp_div VALUES (7,7,'1.00000000000000000000')
--- !query 321 schema
-struct<>
--- !query 321 output
-
-
-
--- !query 322
-INSERT INTO num_exp_add VALUES (7,8,'-82953604')
--- !query 322 schema
-struct<>
--- !query 322 output
-
-
-
--- !query 323
-INSERT INTO num_exp_sub VALUES (7,8,'-83103366')
--- !query 323 schema
-struct<>
--- !query 323 output
-
-
-
--- !query 324
-INSERT INTO num_exp_mul VALUES (7,8,'-6217255985285')
--- !query 324 schema
-struct<>
--- !query 324 output
-
-
-
--- !query 325
-INSERT INTO num_exp_div VALUES (7,8,'-1108.80577182462841041118')
--- !query 325 schema
-struct<>
--- !query 325 output
-
-
-
--- !query 326
-INSERT INTO num_exp_add VALUES (7,9,'-107955289.045047420')
--- !query 326 schema
-struct<>
--- !query 326 output
-
-
-
--- !query 327
-INSERT INTO num_exp_sub VALUES (7,9,'-58101680.954952580')
--- !query 327 schema
-struct<>
--- !query 327 output
-
-
-
--- !query 328
-INSERT INTO num_exp_mul VALUES (7,9,'2069634775752159.035758700')
--- !query 328 schema
-struct<>
--- !query 328 output
-
-
-
--- !query 329
-INSERT INTO num_exp_div VALUES (7,9,'3.33089171198810413382')
--- !query 329 schema
-struct<>
--- !query 329 output
-
-
-
--- !query 330
-INSERT INTO num_exp_add VALUES (8,0,'74881')
--- !query 330 schema
-struct<>
--- !query 330 output
-
-
-
--- !query 331
-INSERT INTO num_exp_sub VALUES (8,0,'74881')
--- !query 331 schema
-struct<>
--- !query 331 output
-
-
-
--- !query 332
-INSERT INTO num_exp_mul VALUES (8,0,'0')
--- !query 332 schema
-struct<>
--- !query 332 output
-
-
-
--- !query 333
-INSERT INTO num_exp_div VALUES (8,0,'NaN')
--- !query 333 schema
-struct<>
--- !query 333 output
-
-
-
--- !query 334
-INSERT INTO num_exp_add VALUES (8,1,'74881')
--- !query 334 schema
-struct<>
--- !query 334 output
-
-
-
--- !query 335
-INSERT INTO num_exp_sub VALUES (8,1,'74881')
--- !query 335 schema
-struct<>
--- !query 335 output
-
-
-
--- !query 336
-INSERT INTO num_exp_mul VALUES (8,1,'0')
--- !query 336 schema
-struct<>
--- !query 336 output
-
-
-
--- !query 337
-INSERT INTO num_exp_div VALUES (8,1,'NaN')
--- !query 337 schema
-struct<>
--- !query 337 output
-
-
-
--- !query 338
-INSERT INTO num_exp_add VALUES (8,2,'-34263611.215397047')
--- !query 338 schema
-struct<>
--- !query 338 output
-
-
-
--- !query 339
-INSERT INTO num_exp_sub VALUES (8,2,'34413373.215397047')
--- !query 339 schema
-struct<>
--- !query 339 output
-
-
-
--- !query 340
-INSERT INTO num_exp_mul VALUES (8,2,'-2571300635581.146276407')
--- !query 340 schema
-struct<>
--- !query 340 output
-
-
-
--- !query 341
-INSERT INTO num_exp_div VALUES (8,2,'-.00218067233500788615')
--- !query 341 schema
-struct<>
--- !query 341 output
-
-
-
--- !query 342
-INSERT INTO num_exp_add VALUES (8,3,'74885.31')
--- !query 342 schema
-struct<>
--- !query 342 output
-
-
-
--- !query 343
-INSERT INTO num_exp_sub VALUES (8,3,'74876.69')
--- !query 343 schema
-struct<>
--- !query 343 output
-
-
-
--- !query 344
-INSERT INTO num_exp_mul VALUES (8,3,'322737.11')
--- !query 344 schema
-struct<>
--- !query 344 output
-
-
-
--- !query 345
-INSERT INTO num_exp_div VALUES (8,3,'17373.78190255220417633410')
--- !query 345 schema
-struct<>
--- !query 345 output
-
-
-
--- !query 346
-INSERT INTO num_exp_add VALUES (8,4,'7874342.4119')
--- !query 346 schema
-struct<>
--- !query 346 output
-
-
-
--- !query 347
-INSERT INTO num_exp_sub VALUES (8,4,'-7724580.4119')
--- !query 347 schema
-struct<>
--- !query 347 output
-
-
-
--- !query 348
-INSERT INTO num_exp_mul VALUES (8,4,'584031469984.4839')
--- !query 348 schema
-struct<>
--- !query 348 output
-
-
-
--- !query 349
-INSERT INTO num_exp_div VALUES (8,4,'.00960079113741758956')
--- !query 349 schema
-struct<>
--- !query 349 output
-
-
-
--- !query 350
-INSERT INTO num_exp_add VALUES (8,5,'91278.038491')
--- !query 350 schema
-struct<>
--- !query 350 output
-
-
-
--- !query 351
-INSERT INTO num_exp_sub VALUES (8,5,'58483.961509')
--- !query 351 schema
-struct<>
--- !query 351 output
-
-
-
--- !query 352
-INSERT INTO num_exp_mul VALUES (8,5,'1227826639.244571')
--- !query 352 schema
-struct<>
--- !query 352 output
-
-
-
--- !query 353
-INSERT INTO num_exp_div VALUES (8,5,'4.56673929509287019456')
--- !query 353 schema
-struct<>
--- !query 353 output
-
-
-
--- !query 354
-INSERT INTO num_exp_add VALUES (8,6,'168782.57763026')
--- !query 354 schema
-struct<>
--- !query 354 output
-
-
-
--- !query 355
-INSERT INTO num_exp_sub VALUES (8,6,'-19020.57763026')
--- !query 355 schema
-struct<>
--- !query 355 output
-
-
-
--- !query 356
-INSERT INTO num_exp_mul VALUES (8,6,'7031444034.53149906')
--- !query 356 schema
-struct<>
--- !query 356 output
-
-
-
--- !query 357
-INSERT INTO num_exp_div VALUES (8,6,'.79744134113322314424')
--- !query 357 schema
-struct<>
--- !query 357 output
-
-
-
--- !query 358
-INSERT INTO num_exp_add VALUES (8,7,'-82953604')
--- !query 358 schema
-struct<>
--- !query 358 output
-
-
-
--- !query 359
-INSERT INTO num_exp_sub VALUES (8,7,'83103366')
--- !query 359 schema
-struct<>
--- !query 359 output
-
-
-
--- !query 360
-INSERT INTO num_exp_mul VALUES (8,7,'-6217255985285')
--- !query 360 schema
-struct<>
--- !query 360 output
-
-
-
--- !query 361
-INSERT INTO num_exp_div VALUES (8,7,'-.00090187120721280172')
--- !query 361 schema
-struct<>
--- !query 361 output
-
-
-
--- !query 362
-INSERT INTO num_exp_add VALUES (8,8,'149762')
--- !query 362 schema
-struct<>
--- !query 362 output
-
-
-
--- !query 363
-INSERT INTO num_exp_sub VALUES (8,8,'0')
--- !query 363 schema
-struct<>
--- !query 363 output
-
-
-
--- !query 364
-INSERT INTO num_exp_mul VALUES (8,8,'5607164161')
--- !query 364 schema
-struct<>
--- !query 364 output
-
-
-
--- !query 365
-INSERT INTO num_exp_div VALUES (8,8,'1.00000000000000000000')
--- !query 365 schema
-struct<>
--- !query 365 output
-
-
-
--- !query 366
-INSERT INTO num_exp_add VALUES (8,9,'-24851923.045047420')
--- !query 366 schema
-struct<>
--- !query 366 output
-
-
-
--- !query 367
-INSERT INTO num_exp_sub VALUES (8,9,'25001685.045047420')
--- !query 367 schema
-struct<>
--- !query 367 output
-
-
-
--- !query 368
-INSERT INTO num_exp_mul VALUES (8,9,'-1866544013697.195857020')
--- !query 368 schema
-struct<>
--- !query 368 output
-
-
-
--- !query 369
-INSERT INTO num_exp_div VALUES (8,9,'-.00300403532938582735')
--- !query 369 schema
-struct<>
--- !query 369 output
-
-
-
--- !query 370
-INSERT INTO num_exp_add VALUES (9,0,'-24926804.045047420')
--- !query 370 schema
-struct<>
--- !query 370 output
-
-
-
--- !query 371
-INSERT INTO num_exp_sub VALUES (9,0,'-24926804.045047420')
--- !query 371 schema
-struct<>
--- !query 371 output
-
-
-
--- !query 372
-INSERT INTO num_exp_mul VALUES (9,0,'0')
--- !query 372 schema
-struct<>
--- !query 372 output
-
-
-
--- !query 373
-INSERT INTO num_exp_div VALUES (9,0,'NaN')
--- !query 373 schema
-struct<>
--- !query 373 output
-
-
-
--- !query 374
-INSERT INTO num_exp_add VALUES (9,1,'-24926804.045047420')
--- !query 374 schema
-struct<>
--- !query 374 output
-
-
-
--- !query 375
-INSERT INTO num_exp_sub VALUES (9,1,'-24926804.045047420')
--- !query 375 schema
-struct<>
--- !query 375 output
-
-
-
--- !query 376
-INSERT INTO num_exp_mul VALUES (9,1,'0')
--- !query 376 schema
-struct<>
--- !query 376 output
-
-
-
--- !query 377
-INSERT INTO num_exp_div VALUES (9,1,'NaN')
--- !query 377 schema
-struct<>
--- !query 377 output
-
-
-
--- !query 378
-INSERT INTO num_exp_add VALUES (9,2,'-59265296.260444467')
--- !query 378 schema
-struct<>
--- !query 378 output
-
-
-
--- !query 379
-INSERT INTO num_exp_sub VALUES (9,2,'9411688.170349627')
--- !query 379 schema
-struct<>
--- !query 379 output
-
-
-
--- !query 380
-INSERT INTO num_exp_mul VALUES (9,2,'855948866655588.453741509242968740')
--- !query 380 schema
-struct<>
--- !query 380 output
-
-
-
--- !query 381
-INSERT INTO num_exp_div VALUES (9,2,'.72591434384152961526')
--- !query 381 schema
-struct<>
--- !query 381 output
-
-
-
--- !query 382
-INSERT INTO num_exp_add VALUES (9,3,'-24926799.735047420')
--- !query 382 schema
-struct<>
--- !query 382 output
-
-
-
--- !query 383
-INSERT INTO num_exp_sub VALUES (9,3,'-24926808.355047420')
--- !query 383 schema
-struct<>
--- !query 383 output
-
-
-
--- !query 384
-INSERT INTO num_exp_mul VALUES (9,3,'-107434525.43415438020')
--- !query 384 schema
-struct<>
--- !query 384 output
-
-
-
--- !query 385
-INSERT INTO num_exp_div VALUES (9,3,'-5783481.21694835730858468677')
--- !query 385 schema
-struct<>
--- !query 385 output
-
-
-
--- !query 386
-INSERT INTO num_exp_add VALUES (9,4,'-17127342.633147420')
--- !query 386 schema
-struct<>
--- !query 386 output
-
-
-
--- !query 387
-INSERT INTO num_exp_sub VALUES (9,4,'-32726265.456947420')
--- !query 387 schema
-struct<>
--- !query 387 output
-
-
-
--- !query 388
-INSERT INTO num_exp_mul VALUES (9,4,'-194415646271340.1815956522980')
--- !query 388 schema
-struct<>
--- !query 388 output
-
-
-
--- !query 389
-INSERT INTO num_exp_div VALUES (9,4,'-3.19596478892958416484')
--- !query 389 schema
-struct<>
--- !query 389 output
-
-
-
--- !query 390
-INSERT INTO num_exp_add VALUES (9,5,'-24910407.006556420')
--- !query 390 schema
-struct<>
--- !query 390 output
-
-
-
--- !query 391
-INSERT INTO num_exp_sub VALUES (9,5,'-24943201.083538420')
--- !query 391 schema
-struct<>
--- !query 391 output
-
-
-
--- !query 392
-INSERT INTO num_exp_mul VALUES (9,5,'-408725765384.257043660243220')
--- !query 392 schema
-struct<>
--- !query 392 output
-
-
-
--- !query 393
-INSERT INTO num_exp_div VALUES (9,5,'-1520.20159364322004505807')
--- !query 393 schema
-struct<>
--- !query 393 output
-
-
-
--- !query 394
-INSERT INTO num_exp_add VALUES (9,6,'-24832902.467417160')
--- !query 394 schema
-struct<>
--- !query 394 output
-
-
-
--- !query 395
-INSERT INTO num_exp_sub VALUES (9,6,'-25020705.622677680')
--- !query 395 schema
-struct<>
--- !query 395 output
-
-
-
--- !query 396
-INSERT INTO num_exp_mul VALUES (9,6,'-2340666225110.29929521292692920')
--- !query 396 schema
-struct<>
--- !query 396 output
-
-
-
--- !query 397
-INSERT INTO num_exp_div VALUES (9,6,'-265.45671195426965751280')
--- !query 397 schema
-struct<>
--- !query 397 output
-
-
-
--- !query 398
-INSERT INTO num_exp_add VALUES (9,7,'-107955289.045047420')
--- !query 398 schema
-struct<>
--- !query 398 output
-
-
-
--- !query 399
-INSERT INTO num_exp_sub VALUES (9,7,'58101680.954952580')
--- !query 399 schema
-struct<>
--- !query 399 output
-
-
-
--- !query 400
-INSERT INTO num_exp_mul VALUES (9,7,'2069634775752159.035758700')
--- !query 400 schema
-struct<>
--- !query 400 output
-
-
-
--- !query 401
-INSERT INTO num_exp_div VALUES (9,7,'.30021990699995814689')
--- !query 401 schema
-struct<>
--- !query 401 output
-
-
-
--- !query 402
-INSERT INTO num_exp_add VALUES (9,8,'-24851923.045047420')
--- !query 402 schema
-struct<>
--- !query 402 output
-
-
-
--- !query 403
-INSERT INTO num_exp_sub VALUES (9,8,'-25001685.045047420')
--- !query 403 schema
-struct<>
--- !query 403 output
-
-
-
--- !query 404
-INSERT INTO num_exp_mul VALUES (9,8,'-1866544013697.195857020')
--- !query 404 schema
-struct<>
--- !query 404 output
-
-
-
--- !query 405
-INSERT INTO num_exp_div VALUES (9,8,'-332.88556569820675471748')
--- !query 405 schema
-struct<>
--- !query 405 output
-
-
-
--- !query 406
-INSERT INTO num_exp_add VALUES (9,9,'-49853608.090094840')
--- !query 406 schema
-struct<>
--- !query 406 output
-
-
-
--- !query 407
-INSERT INTO num_exp_sub VALUES (9,9,'0')
--- !query 407 schema
-struct<>
--- !query 407 output
-
-
-
--- !query 408
-INSERT INTO num_exp_mul VALUES (9,9,'621345559900192.420120630048656400')
--- !query 408 schema
-struct<>
--- !query 408 output
-
-
-
--- !query 409
-INSERT INTO num_exp_div VALUES (9,9,'1.00000000000000000000')
--- !query 409 schema
-struct<>
--- !query 409 output
-
-
-
--- !query 410
-INSERT INTO num_exp_sqrt VALUES (0,'0')
--- !query 410 schema
-struct<>
--- !query 410 output
-
-
-
--- !query 411
-INSERT INTO num_exp_sqrt VALUES (1,'0')
--- !query 411 schema
-struct<>
--- !query 411 output
-
-
-
--- !query 412
-INSERT INTO num_exp_sqrt VALUES (2,'5859.90547836712524903505')
--- !query 412 schema
-struct<>
--- !query 412 output
-
-
-
--- !query 413
-INSERT INTO num_exp_sqrt VALUES (3,'2.07605394920266944396')
--- !query 413 schema
-struct<>
--- !query 413 output
-
-
-
--- !query 414
-INSERT INTO num_exp_sqrt VALUES (4,'2792.75158435189147418923')
--- !query 414 schema
-struct<>
--- !query 414 output
-
-
-
--- !query 415
-INSERT INTO num_exp_sqrt VALUES (5,'128.05092147657509145473')
--- !query 415 schema
-struct<>
--- !query 415 output
-
-
-
--- !query 416
-INSERT INTO num_exp_sqrt VALUES (6,'306.43364311096782703406')
--- !query 416 schema
-struct<>
--- !query 416 output
-
-
-
--- !query 417
-INSERT INTO num_exp_sqrt VALUES (7,'9111.99676251039939975230')
--- !query 417 schema
-struct<>
--- !query 417 output
-
-
-
--- !query 418
-INSERT INTO num_exp_sqrt VALUES (8,'273.64392922189960397542')
--- !query 418 schema
-struct<>
--- !query 418 output
-
-
-
--- !query 419
-INSERT INTO num_exp_sqrt VALUES (9,'4992.67503899937593364766')
--- !query 419 schema
-struct<>
--- !query 419 output
-
-
-
--- !query 420
-INSERT INTO num_exp_ln VALUES (0,'NaN')
--- !query 420 schema
-struct<>
--- !query 420 output
-
-
-
--- !query 421
-INSERT INTO num_exp_ln VALUES (1,'NaN')
--- !query 421 schema
-struct<>
--- !query 421 output
-
-
-
--- !query 422
-INSERT INTO num_exp_ln VALUES (2,'17.35177750493897715514')
--- !query 422 schema
-struct<>
--- !query 422 output
-
-
-
--- !query 423
-INSERT INTO num_exp_ln VALUES (3,'1.46093790411565641971')
--- !query 423 schema
-struct<>
--- !query 423 output
-
-
-
--- !query 424
-INSERT INTO num_exp_ln VALUES (4,'15.86956523951936572464')
--- !query 424 schema
-struct<>
--- !query 424 output
-
-
-
--- !query 425
-INSERT INTO num_exp_ln VALUES (5,'9.70485601768871834038')
--- !query 425 schema
-struct<>
--- !query 425 output
-
-
-
--- !query 426
-INSERT INTO num_exp_ln VALUES (6,'11.45000246622944403127')
--- !query 426 schema
-struct<>
--- !query 426 output
-
-
-
--- !query 427
-INSERT INTO num_exp_ln VALUES (7,'18.23469429965478772991')
--- !query 427 schema
-struct<>
--- !query 427 output
-
-
-
--- !query 428
-INSERT INTO num_exp_ln VALUES (8,'11.22365546576315513668')
--- !query 428 schema
-struct<>
--- !query 428 output
-
-
-
--- !query 429
-INSERT INTO num_exp_ln VALUES (9,'17.03145425013166006962')
--- !query 429 schema
-struct<>
--- !query 429 output
-
-
-
--- !query 430
-INSERT INTO num_exp_log10 VALUES (0,'NaN')
--- !query 430 schema
-struct<>
--- !query 430 output
-
-
-
--- !query 431
-INSERT INTO num_exp_log10 VALUES (1,'NaN')
--- !query 431 schema
-struct<>
--- !query 431 output
-
-
-
--- !query 432
-INSERT INTO num_exp_log10 VALUES (2,'7.53578122160797276459')
--- !query 432 schema
-struct<>
--- !query 432 output
-
-
-
--- !query 433
-INSERT INTO num_exp_log10 VALUES (3,'.63447727016073160075')
--- !query 433 schema
-struct<>
--- !query 433 output
-
-
-
--- !query 434
-INSERT INTO num_exp_log10 VALUES (4,'6.89206461372691743345')
--- !query 434 schema
-struct<>
--- !query 434 output
-
-
-
--- !query 435
-INSERT INTO num_exp_log10 VALUES (5,'4.21476541614777768626')
--- !query 435 schema
-struct<>
--- !query 435 output
-
-
-
--- !query 436
-INSERT INTO num_exp_log10 VALUES (6,'4.97267288886207207671')
--- !query 436 schema
-struct<>
--- !query 436 output
-
-
-
--- !query 437
-INSERT INTO num_exp_log10 VALUES (7,'7.91922711353275546914')
--- !query 437 schema
-struct<>
--- !query 437 output
-
-
-
--- !query 438
-INSERT INTO num_exp_log10 VALUES (8,'4.87437163556421004138')
--- !query 438 schema
-struct<>
--- !query 438 output
-
-
-
--- !query 439
-INSERT INTO num_exp_log10 VALUES (9,'7.39666659961986567059')
--- !query 439 schema
-struct<>
--- !query 439 output
-
-
-
--- !query 440
-INSERT INTO num_exp_power_10_ln VALUES (0,'NaN')
--- !query 440 schema
-struct<>
--- !query 440 output
-
-
-
--- !query 441
-INSERT INTO num_exp_power_10_ln VALUES (1,'NaN')
--- !query 441 schema
-struct<>
--- !query 441 output
-
-
-
--- !query 442
-INSERT INTO num_exp_power_10_ln VALUES (2,'224790267919917955.13261618583642653184')
--- !query 442 schema
-struct<>
--- !query 442 output
-
-
-
--- !query 443
-INSERT INTO num_exp_power_10_ln VALUES (3,'28.90266599445155957393')
--- !query 443 schema
-struct<>
--- !query 443 output
-
-
-
--- !query 444
-INSERT INTO num_exp_power_10_ln VALUES (4,'7405685069594999.07733999469386277636')
--- !query 444 schema
-struct<>
--- !query 444 output
-
-
-
--- !query 445
-INSERT INTO num_exp_power_10_ln VALUES (5,'5068226527.32127265408584640098')
--- !query 445 schema
-struct<>
--- !query 445 output
-
-
-
--- !query 446
-INSERT INTO num_exp_power_10_ln VALUES (6,'281839893606.99372343357047819067')
--- !query 446 schema
-struct<>
--- !query 446 output
-
-
-
--- !query 447
-INSERT INTO num_exp_power_10_ln VALUES (7,'1716699575118597095.42330819910640247627')
--- !query 447 schema
-struct<>
--- !query 447 output
-
-
-
--- !query 448
-INSERT INTO num_exp_power_10_ln VALUES (8,'167361463828.07491320069016125952')
--- !query 448 schema
-struct<>
--- !query 448 output
-
-
-
--- !query 449
-INSERT INTO num_exp_power_10_ln VALUES (9,'107511333880052007.04141124673540337457')
--- !query 449 schema
-struct<>
--- !query 449 output
-
-
-
--- !query 450
-INSERT INTO num_data VALUES (0, '0')
--- !query 450 schema
-struct<>
--- !query 450 output
-
-
-
--- !query 451
-INSERT INTO num_data VALUES (1, '0')
--- !query 451 schema
-struct<>
--- !query 451 output
-
-
-
--- !query 452
-INSERT INTO num_data VALUES (2, '-34338492.215397047')
--- !query 452 schema
-struct<>
--- !query 452 output
-
-
-
--- !query 453
-INSERT INTO num_data VALUES (3, '4.31')
--- !query 453 schema
-struct<>
--- !query 453 output
-
-
-
--- !query 454
-INSERT INTO num_data VALUES (4, '7799461.4119')
--- !query 454 schema
-struct<>
--- !query 454 output
-
-
-
--- !query 455
-INSERT INTO num_data VALUES (5, '16397.038491')
--- !query 455 schema
-struct<>
--- !query 455 output
-
-
-
--- !query 456
-INSERT INTO num_data VALUES (6, '93901.57763026')
--- !query 456 schema
-struct<>
--- !query 456 output
-
-
-
--- !query 457
-INSERT INTO num_data VALUES (7, '-83028485')
--- !query 457 schema
-struct<>
--- !query 457 output
-
-
-
--- !query 458
-INSERT INTO num_data VALUES (8, '74881')
--- !query 458 schema
-struct<>
--- !query 458 output
-
-
-
--- !query 459
-INSERT INTO num_data VALUES (9, '-24926804.045047420')
--- !query 459 schema
-struct<>
--- !query 459 output
-
-
-
--- !query 460
-SELECT * FROM num_data
--- !query 460 schema
-struct<id:int,val:decimal(38,10)>
--- !query 460 output
-0	0
-1	0
-2	-34338492.215397047
-3	4.31
-4	7799461.4119
-5	16397.038491
-6	93901.57763026
-7	-83028485
-8	74881
-9	-24926804.04504742
-
-
--- !query 461
-TRUNCATE TABLE num_result
--- !query 461 schema
-struct<>
--- !query 461 output
-
-
-
--- !query 462
-INSERT INTO num_result SELECT t1.id, t2.id, t1.val + t2.val
-    FROM num_data t1, num_data t2
--- !query 462 schema
-struct<>
--- !query 462 output
-
-
-
--- !query 463
-SELECT t1.id1, t1.id2, t1.result, t2.expected
-    FROM num_result t1, num_exp_add t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != t2.expected
--- !query 463 schema
-struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 463 output
-
-
-
--- !query 464
-TRUNCATE TABLE num_result
--- !query 464 schema
-struct<>
--- !query 464 output
-
-
-
--- !query 465
-INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val + t2.val, 10)
-    FROM num_data t1, num_data t2
--- !query 465 schema
-struct<>
--- !query 465 output
-
-
-
--- !query 466
-SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 10) as expected
-    FROM num_result t1, num_exp_add t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != round(t2.expected, 10)
--- !query 466 schema
-struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 466 output
-
-
-
--- !query 467
-TRUNCATE TABLE num_result
--- !query 467 schema
-struct<>
--- !query 467 output
-
-
-
--- !query 468
-INSERT INTO num_result SELECT t1.id, t2.id, t1.val - t2.val
-    FROM num_data t1, num_data t2
--- !query 468 schema
-struct<>
--- !query 468 output
-
-
-
--- !query 469
-SELECT t1.id1, t1.id2, t1.result, t2.expected
-    FROM num_result t1, num_exp_sub t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != t2.expected
--- !query 469 schema
-struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 469 output
-
-
-
--- !query 470
-TRUNCATE TABLE num_result
--- !query 470 schema
-struct<>
--- !query 470 output
-
-
-
--- !query 471
-INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val - t2.val, 40)
-    FROM num_data t1, num_data t2
--- !query 471 schema
-struct<>
--- !query 471 output
-
-
-
--- !query 472
-SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 40)
-    FROM num_result t1, num_exp_sub t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != round(t2.expected, 40)
--- !query 472 schema
-struct<id1:int,id2:int,result:decimal(38,10),round(expected, 40):decimal(38,10)>
--- !query 472 output
-
-
-
--- !query 473
-TRUNCATE TABLE num_result
--- !query 473 schema
-struct<>
--- !query 473 output
-
-
-
--- !query 474
-INSERT INTO num_result SELECT t1.id, t2.id, t1.val, t2.val, t1.val * t2.val
-    FROM num_data t1, num_data t2
--- !query 474 schema
-struct<>
--- !query 474 output
-org.apache.spark.sql.AnalysisException
-`default`.`num_result` requires that the data to be inserted have the same number of columns as the target table: target table has 3 column(s) but the inserted data has 5 column(s), including 0 partition column(s) having constant value(s).;
-
-
--- !query 475
-SELECT t1.id1, t1.id2, t1.result, t2.expected
-    FROM num_result t1, num_exp_mul t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != t2.expected
--- !query 475 schema
-struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 475 output
-
-
-
--- !query 476
-TRUNCATE TABLE num_result
--- !query 476 schema
-struct<>
--- !query 476 output
-
-
-
--- !query 477
-INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val * t2.val, 30)
-    FROM num_data t1, num_data t2
--- !query 477 schema
-struct<>
--- !query 477 output
-
-
-
--- !query 478
-SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 30) as expected
-    FROM num_result t1, num_exp_mul t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != round(t2.expected, 30)
--- !query 478 schema
-struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 478 output
-2	2	1179132047626883.596862	1179132047626883.5968621359
-2	3	-147998901.448361	-147998901.4483612726
-2	4	-267821744976817.811114	-267821744976817.8111137107
-2	5	-563049578578.769243	-563049578578.7692425067
-2	6	-3224438592470.184498	-3224438592470.1844981193
-2	7	2851072985828710.485884	2851072985828710.485883795
-2	8	-2571300635581.146276	-2571300635581.146276407
-2	9	855948866655588.453742	855948866655588.4537415092
-3	2	-147998901.448361	-147998901.4483612726
-3	5	70671.235896	70671.23589621
-3	6	404715.799586	404715.7995864206
-3	9	-107434525.434154	-107434525.4341543802
-4	2	-267821744976817.811114	-267821744976817.8111137107
-4	4	60831598315717.141462	60831598315717.14146161
-4	5	127888068979.993505	127888068979.9935054429
-4	6	732381731243.745116	732381731243.7451157641
-4	9	-194415646271340.181596	-194415646271340.1815956523
-5	2	-563049578578.769243	-563049578578.7692425067
-5	3	70671.235896	70671.23589621
-5	4	127888068979.993505	127888068979.9935054429
-5	5	268862871.275336	268862871.2753355571
-5	6	1539707782.768998	1539707782.7689977863
-5	9	-408725765384.257044	-408725765384.2570436602
-6	2	-3224438592470.184498	-3224438592470.1844981193
-6	3	404715.799586	404715.7995864206
-6	4	732381731243.745116	732381731243.7451157641
-6	5	1539707782.768998	1539707782.7689977863
-6	6	8817506281.451745	8817506281.4517452373
-6	7	-7796505729750.377956	-7796505729750.3779561
-6	8	7031444034.531499	7031444034.53149906
-6	9	-2340666225110.299295	-2340666225110.2992952129
-7	2	2851072985828710.485884	2851072985828710.485883795
-7	6	-7796505729750.377956	-7796505729750.3779561
-7	9	2069634775752159.035759	2069634775752159.0357587
-8	2	-2571300635581.146276	-2571300635581.146276407
-8	6	7031444034.531499	7031444034.53149906
-8	9	-1866544013697.195857	-1866544013697.19585702
-9	2	855948866655588.453742	855948866655588.4537415092
-9	3	-107434525.434154	-107434525.4341543802
-9	4	-194415646271340.181596	-194415646271340.1815956523
-9	5	-408725765384.257044	-408725765384.2570436602
-9	6	-2340666225110.299295	-2340666225110.2992952129
-9	7	2069634775752159.035759	2069634775752159.0357587
-9	8	-1866544013697.195857	-1866544013697.19585702
-9	9	621345559900192.420121	621345559900192.42012063
-
-
--- !query 479
-TRUNCATE TABLE num_result
--- !query 479 schema
-struct<>
--- !query 479 output
-
-
-
--- !query 480
-INSERT INTO num_result SELECT t1.id, t2.id, t1.val / t2.val
-    FROM num_data t1, num_data t2
-    WHERE t2.val != '0.0'
--- !query 480 schema
-struct<>
--- !query 480 output
-
-
-
--- !query 481
-SELECT t1.id1, t1.id2, t1.result, t2.expected
-    FROM num_result t1, num_exp_div t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != t2.expected
--- !query 481 schema
-struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 481 output
-2	3	-7967167.567378	-7967167.5673775051
-2	4	-4.402675	-4.4026748005
-2	5	-2094.188669	-2094.1886691456
-2	6	-365.685999	-365.6859989148
-2	7	0.413575	0.4135748378
-2	8	-458.574167	-458.5741672173
-2	9	1.377573	1.3775729995
-3	2	0	-0.0000001255
-3	4	0.000001	0.0000005526
-3	5	0.000263	0.0002628523
-3	6	0.000046	0.0000458991
-3	7	0	-0.0000000519
-3	8	0.000058	0.000057558
-3	9	0	-0.0000001729
-4	2	-0.227135	-0.22713465
-4	3	1809619.817146	1809619.8171461717
-4	5	475.66281	475.6628104631
-4	6	83.059961	83.0599613844
-4	7	-0.093937	-0.093937176
-4	8	104.158083	104.1580829837
-4	9	-0.312895	-0.3128945611
-5	2	-0.000478	-0.0004775119
-5	3	3804.417283	3804.4172832947
-5	4	0.002102	0.0021023296
-5	6	0.174619	0.1746194143
-5	7	-0.000197	-0.0001974869
-5	8	0.218975	0.2189746196
-5	9	-0.000658	-0.0006578075
-6	2	-0.002735	-0.0027345865
-6	3	21786.908963	21786.9089629374
-6	4	0.012039	0.0120394951
-6	5	5.72674	5.7267400867
-6	7	-0.001131	-0.0011309562
-6	8	1.254011	1.2540107321
-6	9	-0.003767	-0.0037670925
-7	2	2.417942	2.4179420715
-7	3	-19264149.651972	-19264149.6519721578
-7	4	-10.645413	-10.6454126273
-7	5	-5063.626889	-5063.6268888173
-7	6	-884.207562	-884.2075617401
-7	8	-1108.805772	-1108.8057718246
-7	9	3.330892	3.330891712
-8	2	-0.002181	-0.0021806723
-8	3	17373.781903	17373.7819025522
-8	4	0.009601	0.0096007911
-8	5	4.566739	4.5667392951
-8	6	0.797441	0.7974413411
-8	7	-0.000902	-0.0009018712
-8	9	-0.003004	-0.0030040353
-9	2	0.725914	0.7259143438
-9	3	-5783481.216948	-5783481.2169483573
-9	4	-3.195965	-3.1959647889
-9	5	-1520.201594	-1520.2015936432
-9	6	-265.456712	-265.4567119543
-9	7	0.30022	0.300219907
-9	8	-332.885566	-332.8855656982
-
-
--- !query 482
-TRUNCATE TABLE num_result
--- !query 482 schema
-struct<>
--- !query 482 output
-
-
-
--- !query 483
-INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val / t2.val, 80)
-    FROM num_data t1, num_data t2
-    WHERE t2.val != '0.0'
--- !query 483 schema
-struct<>
--- !query 483 output
-
-
-
--- !query 484
-SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 80) as expected
-    FROM num_result t1, num_exp_div t2
-    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
-    AND t1.result != round(t2.expected, 80)
--- !query 484 schema
-struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 484 output
-2	3	-7967167.567378	-7967167.5673775051
-2	4	-4.402675	-4.4026748005
-2	5	-2094.188669	-2094.1886691456
-2	6	-365.685999	-365.6859989148
-2	7	0.413575	0.4135748378
-2	8	-458.574167	-458.5741672173
-2	9	1.377573	1.3775729995
-3	2	0	-0.0000001255
-3	4	0.000001	0.0000005526
-3	5	0.000263	0.0002628523
-3	6	0.000046	0.0000458991
-3	7	0	-0.0000000519
-3	8	0.000058	0.000057558
-3	9	0	-0.0000001729
-4	2	-0.227135	-0.22713465
-4	3	1809619.817146	1809619.8171461717
-4	5	475.66281	475.6628104631
-4	6	83.059961	83.0599613844
-4	7	-0.093937	-0.093937176
-4	8	104.158083	104.1580829837
-4	9	-0.312895	-0.3128945611
-5	2	-0.000478	-0.0004775119
-5	3	3804.417283	3804.4172832947
-5	4	0.002102	0.0021023296
-5	6	0.174619	0.1746194143
-5	7	-0.000197	-0.0001974869
-5	8	0.218975	0.2189746196
-5	9	-0.000658	-0.0006578075
-6	2	-0.002735	-0.0027345865
-6	3	21786.908963	21786.9089629374
-6	4	0.012039	0.0120394951
-6	5	5.72674	5.7267400867
-6	7	-0.001131	-0.0011309562
-6	8	1.254011	1.2540107321
-6	9	-0.003767	-0.0037670925
-7	2	2.417942	2.4179420715
-7	3	-19264149.651972	-19264149.6519721578
-7	4	-10.645413	-10.6454126273
-7	5	-5063.626889	-5063.6268888173
-7	6	-884.207562	-884.2075617401
-7	8	-1108.805772	-1108.8057718246
-7	9	3.330892	3.330891712
-8	2	-0.002181	-0.0021806723
-8	3	17373.781903	17373.7819025522
-8	4	0.009601	0.0096007911
-8	5	4.566739	4.5667392951
-8	6	0.797441	0.7974413411
-8	7	-0.000902	-0.0009018712
-8	9	-0.003004	-0.0030040353
-9	2	0.725914	0.7259143438
-9	3	-5783481.216948	-5783481.2169483573
-9	4	-3.195965	-3.1959647889
-9	5	-1520.201594	-1520.2015936432
-9	6	-265.456712	-265.4567119543
-9	7	0.30022	0.300219907
-9	8	-332.885566	-332.8855656982
-
-
--- !query 485
-TRUNCATE TABLE num_result
--- !query 485 schema
-struct<>
--- !query 485 output
-
-
-
--- !query 486
-INSERT INTO num_result SELECT id, 0, SQRT(ABS(val))
-    FROM num_data
--- !query 486 schema
-struct<>
--- !query 486 output
-
-
-
--- !query 487
-SELECT t1.id1, t1.result, t2.expected
-    FROM num_result t1, num_exp_sqrt t2
-    WHERE t1.id1 = t2.id
-    AND t1.result != t2.expected
--- !query 487 schema
-struct<id1:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 487 output
-
-
-
--- !query 488
-TRUNCATE TABLE num_result
--- !query 488 schema
-struct<>
--- !query 488 output
-
-
-
--- !query 489
-INSERT INTO num_result SELECT id, 0, LN(ABS(val))
-    FROM num_data
-    WHERE val != '0.0'
--- !query 489 schema
-struct<>
--- !query 489 output
-
-
-
--- !query 490
-SELECT t1.id1, t1.result, t2.expected
-    FROM num_result t1, num_exp_ln t2
-    WHERE t1.id1 = t2.id
-    AND t1.result != t2.expected
--- !query 490 schema
-struct<id1:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 490 output
-
-
-
--- !query 491
-TRUNCATE TABLE num_result
--- !query 491 schema
-struct<>
--- !query 491 output
-
-
-
--- !query 492
-INSERT INTO num_result SELECT id, 0, LOG(cast('10' as decimal(38, 18)), ABS(val))
-    FROM num_data
-    WHERE val != '0.0'
--- !query 492 schema
-struct<>
--- !query 492 output
-
-
-
--- !query 493
-SELECT t1.id1, t1.result, t2.expected
-    FROM num_result t1, num_exp_log10 t2
-    WHERE t1.id1 = t2.id
-    AND t1.result != t2.expected
--- !query 493 schema
-struct<id1:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 493 output
-
-
-
--- !query 494
-TRUNCATE TABLE num_result
--- !query 494 schema
-struct<>
--- !query 494 output
-
-
-
--- !query 495
-INSERT INTO num_result SELECT id, 0, POWER(cast('10' as decimal(38, 18)), LN(ABS(round(val,200))))
-    FROM num_data
-    WHERE val != '0.0'
--- !query 495 schema
-struct<>
--- !query 495 output
-
-
-
--- !query 496
-SELECT t1.id1, t1.result, t2.expected
-    FROM num_result t1, num_exp_power_10_ln t2
-    WHERE t1.id1 = t2.id
-    AND t1.result != t2.expected
--- !query 496 schema
-struct<id1:int,result:decimal(38,10),expected:decimal(38,10)>
--- !query 496 output
-2	224790267919917440	224790267919917955.1326161858
-4	7405685069595001	7405685069594999.0773399947
-5	5068226527.321263	5068226527.3212726541
-6	281839893606.99365	281839893606.9937234336
-7	1716699575118595580	1716699575118597095.4233081991
-8	167361463828.0749	167361463828.0749132007
-9	107511333880051872	107511333880052007.0414112467
-
-
--- !query 497
-SELECT AVG(val) FROM num_data
--- !query 497 schema
-struct<avg(val):decimal(38,14)>
--- !query 497 output
--13430913.5922423207
-
-
--- !query 498
-CREATE TABLE fract_only (id int, val decimal(4,4)) USING parquet
--- !query 498 schema
-struct<>
--- !query 498 output
-
-
-
--- !query 499
-INSERT INTO fract_only VALUES (1, '0.0')
--- !query 499 schema
-struct<>
--- !query 499 output
-
-
-
--- !query 500
-INSERT INTO fract_only VALUES (2, '0.1')
--- !query 500 schema
-struct<>
--- !query 500 output
-
-
-
--- !query 501
-INSERT INTO fract_only VALUES (4, '-0.9999')
--- !query 501 schema
-struct<>
--- !query 501 output
-
-
-
--- !query 502
-INSERT INTO fract_only VALUES (5, '0.99994')
--- !query 502 schema
-struct<>
--- !query 502 output
-
-
-
--- !query 503
-INSERT INTO fract_only VALUES (7, '0.00001')
--- !query 503 schema
-struct<>
--- !query 503 output
-
-
-
--- !query 504
-INSERT INTO fract_only VALUES (8, '0.00017')
--- !query 504 schema
-struct<>
--- !query 504 output
-
-
-
--- !query 505
-SELECT * FROM fract_only
--- !query 505 schema
-struct<id:int,val:decimal(4,4)>
--- !query 505 output
-1	0
-2	0.1
-4	-0.9999
-5	0.9999
-7	0
-8	0.0002
-
-
--- !query 506
-DROP TABLE fract_only
--- !query 506 schema
-struct<>
--- !query 506 output
-
-
-
--- !query 507
-SELECT decimal(double('NaN'))
--- !query 507 schema
-struct<CAST(CAST(NaN AS DOUBLE) AS DECIMAL(10,0)):decimal(10,0)>
--- !query 507 output
-NULL
-
-
--- !query 508
-SELECT decimal(double('Infinity'))
--- !query 508 schema
-struct<CAST(CAST(Infinity AS DOUBLE) AS DECIMAL(10,0)):decimal(10,0)>
--- !query 508 output
-NULL
-
-
--- !query 509
-SELECT decimal(double('-Infinity'))
--- !query 509 schema
-struct<CAST(CAST(-Infinity AS DOUBLE) AS DECIMAL(10,0)):decimal(10,0)>
--- !query 509 output
-NULL
-
-
--- !query 510
-SELECT decimal(float('NaN'))
--- !query 510 schema
-struct<CAST(CAST(NaN AS FLOAT) AS DECIMAL(10,0)):decimal(10,0)>
--- !query 510 output
-NULL
-
-
--- !query 511
-SELECT decimal(float('Infinity'))
--- !query 511 schema
-struct<CAST(CAST(Infinity AS FLOAT) AS DECIMAL(10,0)):decimal(10,0)>
--- !query 511 output
-NULL
-
-
--- !query 512
-SELECT decimal(float('-Infinity'))
--- !query 512 schema
-struct<CAST(CAST(-Infinity AS FLOAT) AS DECIMAL(10,0)):decimal(10,0)>
--- !query 512 output
-NULL
-
-
--- !query 513
-CREATE TABLE ceil_floor_round (a decimal(38, 18)) USING parquet
--- !query 513 schema
-struct<>
--- !query 513 output
-
-
-
--- !query 514
-INSERT INTO ceil_floor_round VALUES ('-5.5')
--- !query 514 schema
-struct<>
--- !query 514 output
-
-
-
--- !query 515
-INSERT INTO ceil_floor_round VALUES ('-5.499999')
--- !query 515 schema
-struct<>
--- !query 515 output
-
-
-
--- !query 516
-INSERT INTO ceil_floor_round VALUES ('9.5')
--- !query 516 schema
-struct<>
--- !query 516 output
-
-
-
--- !query 517
-INSERT INTO ceil_floor_round VALUES ('9.4999999')
--- !query 517 schema
-struct<>
--- !query 517 output
-
-
-
--- !query 518
-INSERT INTO ceil_floor_round VALUES ('0.0')
--- !query 518 schema
-struct<>
--- !query 518 output
-
-
-
--- !query 519
-INSERT INTO ceil_floor_round VALUES ('0.0000001')
--- !query 519 schema
-struct<>
--- !query 519 output
-
-
-
--- !query 520
-INSERT INTO ceil_floor_round VALUES ('-0.000001')
--- !query 520 schema
-struct<>
--- !query 520 output
-
-
-
--- !query 521
-SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round
--- !query 521 schema
-struct<a:decimal(38,18),CEIL(a):decimal(21,0),CEIL(a):decimal(21,0),FLOOR(a):decimal(21,0),round(a, 0):decimal(38,0)>
--- !query 521 output
--0.000001	0	0	-1	0
--5.499999	-5	-5	-6	-5
--5.5	-5	-5	-6	-6
-0	0	0	0	0
-0.0000001	1	1	0	0
-9.4999999	10	10	9	9
-9.5	10	10	9	10
-
-
--- !query 522
-DROP TABLE ceil_floor_round
--- !query 522 schema
-struct<>
--- !query 522 output
-
-
-
--- !query 523
-CREATE TABLE num_input_test (n1 decimal(38, 18)) USING parquet
--- !query 523 schema
-struct<>
--- !query 523 output
-
-
-
--- !query 524
-INSERT INTO num_input_test VALUES (trim(' 123'))
--- !query 524 schema
-struct<>
--- !query 524 output
-
-
-
--- !query 525
-INSERT INTO num_input_test VALUES (trim('   3245874    '))
--- !query 525 schema
-struct<>
--- !query 525 output
-
-
-
--- !query 526
-INSERT INTO num_input_test VALUES (trim('  -93853'))
--- !query 526 schema
-struct<>
--- !query 526 output
-
-
-
--- !query 527
-INSERT INTO num_input_test VALUES ('555.50')
--- !query 527 schema
-struct<>
--- !query 527 output
-
-
-
--- !query 528
-INSERT INTO num_input_test VALUES ('-555.50')
--- !query 528 schema
-struct<>
--- !query 528 output
-
-
-
--- !query 529
-SELECT * FROM num_input_test
--- !query 529 schema
-struct<n1:decimal(38,18)>
--- !query 529 output
--555.5
--93853
-123
-3245874
-555.5
-
-
--- !query 530
-select cast(999999999999999999999 as decimal(38, 0))/1000000000000000000000
--- !query 530 schema
-struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) / CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(38,6)>
--- !query 530 output
-1
-
-
--- !query 531
-select div(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000)
--- !query 531 schema
-struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(38,0)>
--- !query 531 output
-0
-
-
--- !query 532
-select mod(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000)
--- !query 532 schema
-struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)>
--- !query 532 output
-999999999999999999999
-
-
--- !query 533
-select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
--- !query 533 schema
-struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(38,0)>
--- !query 533 output
--9
-
-
--- !query 534
-select mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
--- !query 534 schema
-struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)>
--- !query 534 output
--999999999999999999999
-
-
--- !query 535
-select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)*1000000000000000000000 + mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
--- !query 535 schema
-struct<(CAST((CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0)) * CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0)) + CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0))):decimal(38,0)>
--- !query 535 output
--9999999999999999999999
-
-
--- !query 536
-select mod (70.0,70)
--- !query 536 schema
-struct<(CAST(70.0 AS DECIMAL(3,1)) % CAST(CAST(70 AS DECIMAL(2,0)) AS DECIMAL(3,1))):decimal(3,1)>
--- !query 536 output
-0
-
-
--- !query 537
-select div (70.0,70)
--- !query 537 schema
-struct<(CAST(70.0 AS DECIMAL(3,1)) div CAST(CAST(70 AS DECIMAL(2,0)) AS DECIMAL(3,1))):decimal(2,0)>
--- !query 537 output
-1
-
-
--- !query 538
-select 70.0 / 70
--- !query 538 schema
-struct<(CAST(70.0 AS DECIMAL(3,1)) / CAST(CAST(70 AS DECIMAL(2,0)) AS DECIMAL(3,1))):decimal(8,6)>
--- !query 538 output
-1
-
-
--- !query 539
-select 12345678901234567890 % 123
--- !query 539 schema
-struct<(CAST(12345678901234567890 AS DECIMAL(20,0)) % CAST(CAST(123 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
--- !query 539 output
-78
-
-
--- !query 540
-select exp(0.0)
--- !query 540 schema
-struct<EXP(CAST(0.0 AS DOUBLE)):double>
--- !query 540 output
-1.0
-
-
--- !query 541
-select exp(1.0)
--- !query 541 schema
-struct<EXP(CAST(1.0 AS DOUBLE)):double>
--- !query 541 output
-2.7182818284590455
-
-
--- !query 542
-select exp(32.999)
--- !query 542 schema
-struct<EXP(CAST(32.999 AS DOUBLE)):double>
--- !query 542 output
-2.1442904349215556E14
-
-
--- !query 543
-select exp(-32.999)
--- !query 543 schema
-struct<EXP(CAST(-32.999 AS DOUBLE)):double>
--- !query 543 output
-4.663547361468238E-15
-
-
--- !query 544
-select exp(123.456)
--- !query 544 schema
-struct<EXP(CAST(123.456 AS DOUBLE)):double>
--- !query 544 output
-4.132944352778106E53
-
-
--- !query 545
-select exp(-123.456)
--- !query 545 schema
-struct<EXP(CAST(-123.456 AS DOUBLE)):double>
--- !query 545 output
-2.4195825412645934E-54
-
-
--- !query 546
-select exp(1234.5678)
--- !query 546 schema
-struct<EXP(CAST(1234.5678 AS DOUBLE)):double>
--- !query 546 output
-Infinity
-
-
--- !query 547
-select * from range(cast(0.0 as decimal(38, 18)), cast(4.0 as decimal(38, 18)))
--- !query 547 schema
-struct<id:bigint>
--- !query 547 output
-0
-1
-2
-3
-
-
--- !query 548
-select * from range(cast(0.1 as decimal(38, 18)), cast(4.0 as decimal(38, 18)), cast(1.3 as decimal(38, 18)))
--- !query 548 schema
-struct<id:bigint>
--- !query 548 output
-0
-1
-2
-3
-
-
--- !query 549
-select * from range(cast(4.0 as decimal(38, 18)), cast(-1.5 as decimal(38, 18)), cast(-2.2 as decimal(38, 18)))
--- !query 549 schema
-struct<id:bigint>
--- !query 549 output
-0
-2
-4
-
-
--- !query 550
-select ln(1.2345678e-28)
--- !query 550 schema
-struct<LOG(CAST(1.2345678E-28 AS DOUBLE)):double>
--- !query 550 output
--64.26166165451762
-
-
--- !query 551
-select ln(0.0456789)
--- !query 551 schema
-struct<LOG(CAST(0.0456789 AS DOUBLE)):double>
--- !query 551 output
--3.0861187944847437
-
-
--- !query 552
-select ln(0.99949452)
--- !query 552 schema
-struct<LOG(CAST(0.99949452 AS DOUBLE)):double>
--- !query 552 output
--5.056077980832118E-4
-
-
--- !query 553
-select ln(1.00049687395)
--- !query 553 schema
-struct<LOG(CAST(1.00049687395 AS DOUBLE)):double>
--- !query 553 output
-4.967505490136803E-4
-
-
--- !query 554
-select ln(1234.567890123456789)
--- !query 554 schema
-struct<LOG(CAST(1234.567890123456789 AS DOUBLE)):double>
--- !query 554 output
-7.11847630129779
-
-
--- !query 555
-select ln(5.80397490724e5)
--- !query 555 schema
-struct<LOG(CAST(580397.490724 AS DOUBLE)):double>
--- !query 555 output
-13.271468476626518
-
-
--- !query 556
-select ln(9.342536355e34)
--- !query 556 schema
-struct<LOG(CAST(9.342536355E+34 AS DOUBLE)):double>
--- !query 556 output
-80.52247093552418
-
-
--- !query 557
-select log(3.4634998359873254962349856073435545)
--- !query 557 schema
-struct<LOG(E(), CAST(3.4634998359873254962349856073435545 AS DOUBLE)):double>
--- !query 557 output
-1.2422795911259166
-
-
--- !query 558
-select log(9.999999999999999999)
--- !query 558 schema
-struct<LOG(E(), CAST(9.999999999999999999 AS DOUBLE)):double>
--- !query 558 output
-2.302585092994046
-
-
--- !query 559
-select log(10.00000000000000000)
--- !query 559 schema
-struct<LOG(E(), CAST(10.00000000000000000 AS DOUBLE)):double>
--- !query 559 output
-2.302585092994046
-
-
--- !query 560
-select log(10.00000000000000001)
--- !query 560 schema
-struct<LOG(E(), CAST(10.00000000000000001 AS DOUBLE)):double>
--- !query 560 output
-2.302585092994046
-
-
--- !query 561
-select log(590489.45235237)
--- !query 561 schema
-struct<LOG(E(), CAST(590489.45235237 AS DOUBLE)):double>
--- !query 561 output
-13.288707052228641
-
-
--- !query 562
-select log(0.99923, 4.58934e34)
--- !query 562 schema
-struct<LOG(CAST(0.99923 AS DOUBLE), CAST(4.58934E+34 AS DOUBLE)):double>
--- !query 562 output
--103611.55579543479
-
-
--- !query 563
-select log(1.000016, 8.452010e18)
--- !query 563 schema
-struct<LOG(CAST(1.000016 AS DOUBLE), CAST(8.452010E+18 AS DOUBLE)):double>
--- !query 563 output
-2723830.287707013
-
-
--- !query 564
-SELECT SUM(decimal(9999)) FROM range(1, 100001)
--- !query 564 schema
-struct<sum(CAST(9999 AS DECIMAL(10,0))):decimal(20,0)>
--- !query 564 output
-999900000
-
-
--- !query 565
-SELECT SUM(decimal(-9999)) FROM range(1, 100001)
--- !query 565 schema
-struct<sum(CAST(-9999 AS DECIMAL(10,0))):decimal(20,0)>
--- !query 565 output
--999900000
-
-
--- !query 566
-DROP TABLE num_data
--- !query 566 schema
-struct<>
--- !query 566 output
-
-
-
--- !query 567
-DROP TABLE num_exp_add
--- !query 567 schema
-struct<>
--- !query 567 output
-
-
-
--- !query 568
-DROP TABLE num_exp_sub
--- !query 568 schema
-struct<>
--- !query 568 output
-
-
-
--- !query 569
-DROP TABLE num_exp_div
--- !query 569 schema
-struct<>
--- !query 569 output
-
-
-
--- !query 570
-DROP TABLE num_exp_mul
--- !query 570 schema
-struct<>
--- !query 570 output
-
-
-
--- !query 571
-DROP TABLE num_exp_sqrt
--- !query 571 schema
-struct<>
--- !query 571 output
-
-
-
--- !query 572
-DROP TABLE num_exp_ln
--- !query 572 schema
-struct<>
--- !query 572 output
-
-
-
--- !query 573
-DROP TABLE num_exp_log10
--- !query 573 schema
-struct<>
--- !query 573 output
-
-
-
--- !query 574
-DROP TABLE num_exp_power_10_ln
--- !query 574 schema
-struct<>
--- !query 574 output
-
-
-
--- !query 575
-DROP TABLE num_result
--- !query 575 schema
-struct<>
--- !query 575 output
-
-
-
--- !query 576
-DROP TABLE num_input_test
--- !query 576 schema
-struct<>
--- !query 576 output
-
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/strings.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/strings.sql.out
deleted file mode 100644
index 1e4e6e5021de8..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/strings.sql.out
+++ /dev/null
@@ -1,750 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 92
-
-
--- !query 0
-SELECT 'first line'
-' - next line'
-	' - third line'
-	AS `Three lines to one`
--- !query 0 schema
-struct<Three lines to one:string>
--- !query 0 output
-first line - next line - third line
-
-
--- !query 1
-SELECT 'first line'
-' - next line' /* this comment is not allowed here */
-' - third line'
-	AS `Illegal comment within continuation`
--- !query 1 schema
-struct<Illegal comment within continuation:string>
--- !query 1 output
-first line - next line - third line
-
-
--- !query 2
-SELECT binary('\\xDeAdBeEf')
--- !query 2 schema
-struct<CAST(\xDeAdBeEf AS BINARY):binary>
--- !query 2 output
-\xDeAdBeEf
-
-
--- !query 3
-SELECT binary('\\x De Ad Be Ef ')
--- !query 3 schema
-struct<CAST(\x De Ad Be Ef  AS BINARY):binary>
--- !query 3 output
-\x De Ad Be Ef
-
-
--- !query 4
-SELECT binary('\\xDe00BeEf')
--- !query 4 schema
-struct<CAST(\xDe00BeEf AS BINARY):binary>
--- !query 4 output
-\xDe00BeEf
-
-
--- !query 5
-SELECT binary('DeAdBeEf')
--- !query 5 schema
-struct<CAST(DeAdBeEf AS BINARY):binary>
--- !query 5 output
-DeAdBeEf
-
-
--- !query 6
-SELECT binary('De\\000dBeEf')
--- !query 6 schema
-struct<CAST(De\000dBeEf AS BINARY):binary>
--- !query 6 output
-De\000dBeEf
-
-
--- !query 7
-SELECT binary('De\\123dBeEf')
--- !query 7 schema
-struct<CAST(De\123dBeEf AS BINARY):binary>
--- !query 7 output
-De\123dBeEf
-
-
--- !query 8
-SELECT TRIM(BOTH FROM '  bunch o blanks  ') = 'bunch o blanks' AS `bunch o blanks`
--- !query 8 schema
-struct<bunch o blanks:boolean>
--- !query 8 output
-true
-
-
--- !query 9
-SELECT TRIM(LEADING FROM '  bunch o blanks  ') = 'bunch o blanks  ' AS `bunch o blanks  `
--- !query 9 schema
-struct<bunch o blanks  :boolean>
--- !query 9 output
-true
-
-
--- !query 10
-SELECT TRIM(TRAILING FROM '  bunch o blanks  ') = '  bunch o blanks' AS `  bunch o blanks`
--- !query 10 schema
-struct<  bunch o blanks:boolean>
--- !query 10 output
-true
-
-
--- !query 11
-SELECT TRIM(BOTH 'x' FROM 'xxxxxsome Xsxxxxx') = 'some Xs' AS `some Xs`
--- !query 11 schema
-struct<some Xs:boolean>
--- !query 11 output
-true
-
-
--- !query 12
-SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS `34567890`
--- !query 12 schema
-struct<34567890:boolean>
--- !query 12 output
-true
-
-
--- !query 13
-SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS `456`
--- !query 13 schema
-struct<456:boolean>
--- !query 13 output
-true
-
-
--- !query 14
-SELECT POSITION('4' IN '1234567890') = '4' AS `4`
--- !query 14 schema
-struct<4:boolean>
--- !query 14 output
-true
-
-
--- !query 15
-SELECT POSITION('5' IN '1234567890') = '5' AS `5`
--- !query 15 schema
-struct<5:boolean>
--- !query 15 output
-true
-
-
--- !query 16
-SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS `abc45f`
--- !query 16 schema
-struct<abc45f:string>
--- !query 16 output
-abc45f
-
-
--- !query 17
-SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5) AS `yabadaba`
--- !query 17 schema
-struct<yabadaba:string>
--- !query 17 output
-yabadaba
-
-
--- !query 18
-SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS `yabadabadoo`
--- !query 18 schema
-struct<yabadabadoo:string>
--- !query 18 output
-yabadabadoo
-
-
--- !query 19
-SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4) AS `bubba`
--- !query 19 schema
-struct<bubba:string>
--- !query 19 output
-bubba
-
-
--- !query 20
-SELECT 'hawkeye' LIKE 'h%' AS `true`
--- !query 20 schema
-struct<true:boolean>
--- !query 20 output
-true
-
-
--- !query 21
-SELECT 'hawkeye' NOT LIKE 'h%' AS `false`
--- !query 21 schema
-struct<false:boolean>
--- !query 21 output
-false
-
-
--- !query 22
-SELECT 'hawkeye' LIKE 'H%' AS `false`
--- !query 22 schema
-struct<false:boolean>
--- !query 22 output
-false
-
-
--- !query 23
-SELECT 'hawkeye' NOT LIKE 'H%' AS `true`
--- !query 23 schema
-struct<true:boolean>
--- !query 23 output
-true
-
-
--- !query 24
-SELECT 'hawkeye' LIKE 'indio%' AS `false`
--- !query 24 schema
-struct<false:boolean>
--- !query 24 output
-false
-
-
--- !query 25
-SELECT 'hawkeye' NOT LIKE 'indio%' AS `true`
--- !query 25 schema
-struct<true:boolean>
--- !query 25 output
-true
-
-
--- !query 26
-SELECT 'hawkeye' LIKE 'h%eye' AS `true`
--- !query 26 schema
-struct<true:boolean>
--- !query 26 output
-true
-
-
--- !query 27
-SELECT 'hawkeye' NOT LIKE 'h%eye' AS `false`
--- !query 27 schema
-struct<false:boolean>
--- !query 27 output
-false
-
-
--- !query 28
-SELECT 'indio' LIKE '_ndio' AS `true`
--- !query 28 schema
-struct<true:boolean>
--- !query 28 output
-true
-
-
--- !query 29
-SELECT 'indio' NOT LIKE '_ndio' AS `false`
--- !query 29 schema
-struct<false:boolean>
--- !query 29 output
-false
-
-
--- !query 30
-SELECT 'indio' LIKE 'in__o' AS `true`
--- !query 30 schema
-struct<true:boolean>
--- !query 30 output
-true
-
-
--- !query 31
-SELECT 'indio' NOT LIKE 'in__o' AS `false`
--- !query 31 schema
-struct<false:boolean>
--- !query 31 output
-false
-
-
--- !query 32
-SELECT 'indio' LIKE 'in_o' AS `false`
--- !query 32 schema
-struct<false:boolean>
--- !query 32 output
-false
-
-
--- !query 33
-SELECT 'indio' NOT LIKE 'in_o' AS `true`
--- !query 33 schema
-struct<true:boolean>
--- !query 33 output
-true
-
-
--- !query 34
-SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f
--- !query 34 schema
-struct<t:boolean,t:boolean,f:boolean>
--- !query 34 output
-true	true	false
-
-
--- !query 35
-SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f
--- !query 35 schema
-struct<t:boolean,t:boolean,f:boolean>
--- !query 35 output
-true	true	false
-
-
--- !query 36
-SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f
--- !query 36 schema
-struct<t:boolean,t:boolean,f:boolean>
--- !query 36 output
-true	true	false
-
-
--- !query 37
-SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f
--- !query 37 schema
-struct<t:boolean,t:boolean,f:boolean>
--- !query 37 output
-true	true	false
-
-
--- !query 38
-SELECT 'jack' LIKE '%____%' AS t
--- !query 38 schema
-struct<t:boolean>
--- !query 38 output
-true
-
-
--- !query 39
-SELECT 'unknown' || ' and unknown' AS `Concat unknown types`
--- !query 39 schema
-struct<Concat unknown types:string>
--- !query 39 output
-unknown and unknown
-
-
--- !query 40
-SELECT string('text') || ' and unknown' AS `Concat text to unknown type`
--- !query 40 schema
-struct<Concat text to unknown type:string>
--- !query 40 output
-text and unknown
-
-
--- !query 41
-CREATE TABLE toasttest(f1 string) USING parquet
--- !query 41 schema
-struct<>
--- !query 41 output
-
-
-
--- !query 42
-insert into toasttest values(repeat('1234567890',10000))
--- !query 42 schema
-struct<>
--- !query 42 output
-
-
-
--- !query 43
-insert into toasttest values(repeat('1234567890',10000))
--- !query 43 schema
-struct<>
--- !query 43 output
-
-
-
--- !query 44
-insert into toasttest values(repeat('1234567890',10000))
--- !query 44 schema
-struct<>
--- !query 44 output
-
-
-
--- !query 45
-insert into toasttest values(repeat('1234567890',10000))
--- !query 45 schema
-struct<>
--- !query 45 output
-
-
-
--- !query 46
-SELECT substr(f1, 99995) from toasttest
--- !query 46 schema
-struct<substring(f1, 99995, 2147483647):string>
--- !query 46 output
-567890
-567890
-567890
-567890
-
-
--- !query 47
-SELECT substr(f1, 99995, 10) from toasttest
--- !query 47 schema
-struct<substring(f1, 99995, 10):string>
--- !query 47 output
-567890
-567890
-567890
-567890
-
-
--- !query 48
-SELECT length('abcdef') AS `length_6`
--- !query 48 schema
-struct<length_6:int>
--- !query 48 output
-6
-
-
--- !query 49
-SELECT position('cd', 'abcdef') AS `pos_3`
--- !query 49 schema
-struct<pos_3:int>
--- !query 49 output
-3
-
-
--- !query 50
-SELECT position('xy', 'abcdef') AS `pos_0`
--- !query 50 schema
-struct<pos_0:int>
--- !query 50 output
-0
-
-
--- !query 51
-SELECT replace('abcdef', 'de', '45') AS `abc45f`
--- !query 51 schema
-struct<abc45f:string>
--- !query 51 output
-abc45f
-
-
--- !query 52
-SELECT replace('yabadabadoo', 'ba', '123') AS `ya123da123doo`
--- !query 52 schema
-struct<ya123da123doo:string>
--- !query 52 output
-ya123da123doo
-
-
--- !query 53
-SELECT replace('yabadoo', 'bad', '') AS `yaoo`
--- !query 53 schema
-struct<yaoo:string>
--- !query 53 output
-yaoo
-
-
--- !query 54
-select hex(256*256*256 - 1) AS `ffffff`
--- !query 54 schema
-struct<ffffff:string>
--- !query 54 output
-FFFFFF
-
-
--- !query 55
-select hex(bigint(bigint(bigint(bigint(256)*256)*256)*256) - 1) AS `ffffffff`
--- !query 55 schema
-struct<ffffffff:string>
--- !query 55 output
-FFFFFFFF
-
-
--- !query 56
-select md5('') = 'd41d8cd98f00b204e9800998ecf8427e' AS `TRUE`
--- !query 56 schema
-struct<TRUE:boolean>
--- !query 56 output
-true
-
-
--- !query 57
-select md5('a') = '0cc175b9c0f1b6a831c399e269772661' AS `TRUE`
--- !query 57 schema
-struct<TRUE:boolean>
--- !query 57 output
-true
-
-
--- !query 58
-select md5('abc') = '900150983cd24fb0d6963f7d28e17f72' AS `TRUE`
--- !query 58 schema
-struct<TRUE:boolean>
--- !query 58 output
-true
-
-
--- !query 59
-select md5('message digest') = 'f96b697d7cb7938d525a2f31aaf161d0' AS `TRUE`
--- !query 59 schema
-struct<TRUE:boolean>
--- !query 59 output
-true
-
-
--- !query 60
-select md5('abcdefghijklmnopqrstuvwxyz') = 'c3fcd3d76192e4007dfb496cca67e13b' AS `TRUE`
--- !query 60 schema
-struct<TRUE:boolean>
--- !query 60 output
-true
-
-
--- !query 61
-select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') = 'd174ab98d277d9f5a5611c2c9f419d9f' AS `TRUE`
--- !query 61 schema
-struct<TRUE:boolean>
--- !query 61 output
-true
-
-
--- !query 62
-select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') = '57edf4a22be3c955ac49da2e2107b67a' AS `TRUE`
--- !query 62 schema
-struct<TRUE:boolean>
--- !query 62 output
-true
-
-
--- !query 63
-select md5(binary('')) = 'd41d8cd98f00b204e9800998ecf8427e' AS `TRUE`
--- !query 63 schema
-struct<TRUE:boolean>
--- !query 63 output
-true
-
-
--- !query 64
-select md5(binary('a')) = '0cc175b9c0f1b6a831c399e269772661' AS `TRUE`
--- !query 64 schema
-struct<TRUE:boolean>
--- !query 64 output
-true
-
-
--- !query 65
-select md5(binary('abc')) = '900150983cd24fb0d6963f7d28e17f72' AS `TRUE`
--- !query 65 schema
-struct<TRUE:boolean>
--- !query 65 output
-true
-
-
--- !query 66
-select md5(binary('message digest')) = 'f96b697d7cb7938d525a2f31aaf161d0' AS `TRUE`
--- !query 66 schema
-struct<TRUE:boolean>
--- !query 66 output
-true
-
-
--- !query 67
-select md5(binary('abcdefghijklmnopqrstuvwxyz')) = 'c3fcd3d76192e4007dfb496cca67e13b' AS `TRUE`
--- !query 67 schema
-struct<TRUE:boolean>
--- !query 67 output
-true
-
-
--- !query 68
-select md5(binary('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')) = 'd174ab98d277d9f5a5611c2c9f419d9f' AS `TRUE`
--- !query 68 schema
-struct<TRUE:boolean>
--- !query 68 output
-true
-
-
--- !query 69
-select md5(binary('12345678901234567890123456789012345678901234567890123456789012345678901234567890')) = '57edf4a22be3c955ac49da2e2107b67a' AS `TRUE`
--- !query 69 schema
-struct<TRUE:boolean>
--- !query 69 output
-true
-
-
--- !query 70
-SELECT initcap('hi THOMAS')
--- !query 70 schema
-struct<initcap(hi THOMAS):string>
--- !query 70 output
-Hi Thomas
-
-
--- !query 71
-SELECT lpad('hi', 5, 'xy')
--- !query 71 schema
-struct<lpad(hi, 5, xy):string>
--- !query 71 output
-xyxhi
-
-
--- !query 72
-SELECT lpad('hi', 5)
--- !query 72 schema
-struct<lpad(hi, 5,  ):string>
--- !query 72 output
-   hi
-
-
--- !query 73
-SELECT lpad('hi', -5, 'xy')
--- !query 73 schema
-struct<lpad(hi, -5, xy):string>
--- !query 73 output
-
-
-
--- !query 74
-SELECT lpad('hello', 2)
--- !query 74 schema
-struct<lpad(hello, 2,  ):string>
--- !query 74 output
-he
-
-
--- !query 75
-SELECT lpad('hi', 5, '')
--- !query 75 schema
-struct<lpad(hi, 5, ):string>
--- !query 75 output
-hi
-
-
--- !query 76
-SELECT rpad('hi', 5, 'xy')
--- !query 76 schema
-struct<rpad(hi, 5, xy):string>
--- !query 76 output
-hixyx
-
-
--- !query 77
-SELECT rpad('hi', 5)
--- !query 77 schema
-struct<rpad(hi, 5,  ):string>
--- !query 77 output
-hi
-
-
--- !query 78
-SELECT rpad('hi', -5, 'xy')
--- !query 78 schema
-struct<rpad(hi, -5, xy):string>
--- !query 78 output
-
-
-
--- !query 79
-SELECT rpad('hello', 2)
--- !query 79 schema
-struct<rpad(hello, 2,  ):string>
--- !query 79 output
-he
-
-
--- !query 80
-SELECT rpad('hi', 5, '')
--- !query 80 schema
-struct<rpad(hi, 5, ):string>
--- !query 80 output
-hi
-
-
--- !query 81
-SELECT ltrim('zzzytrim', 'xyz')
--- !query 81 schema
-struct<ltrim(zzzytrim, xyz):string>
--- !query 81 output
-trim
-
-
--- !query 82
-SELECT translate('', '14', 'ax')
--- !query 82 schema
-struct<translate(, 14, ax):string>
--- !query 82 output
-
-
-
--- !query 83
-SELECT translate('12345', '14', 'ax')
--- !query 83 schema
-struct<translate(12345, 14, ax):string>
--- !query 83 output
-a23x5
-
-
--- !query 84
-SELECT ascii('x')
--- !query 84 schema
-struct<ascii(x):int>
--- !query 84 output
-120
-
-
--- !query 85
-SELECT ascii('')
--- !query 85 schema
-struct<ascii():int>
--- !query 85 output
-0
-
-
--- !query 86
-SELECT chr(65)
--- !query 86 schema
-struct<chr(CAST(65 AS BIGINT)):string>
--- !query 86 output
-A
-
-
--- !query 87
-SELECT chr(0)
--- !query 87 schema
-struct<chr(CAST(0 AS BIGINT)):string>
--- !query 87 output
- 
-
-
--- !query 88
-SELECT repeat('Pg', 4)
--- !query 88 schema
-struct<repeat(Pg, 4):string>
--- !query 88 output
-PgPgPgPg
-
-
--- !query 89
-SELECT repeat('Pg', -4)
--- !query 89 schema
-struct<repeat(Pg, -4):string>
--- !query 89 output
-
-
-
--- !query 90
-SELECT trim(binary('\\000') from binary('\\000Tom\\000'))
--- !query 90 schema
-struct<trim(CAST(CAST(\000Tom\000 AS BINARY) AS STRING), CAST(CAST(\000 AS BINARY) AS STRING)):string>
--- !query 90 output
-Tom
-
-
--- !query 91
-DROP TABLE toasttest
--- !query 91 schema
-struct<>
--- !query 91 output
-
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/pgSQL/timestamp.sql.out
deleted file mode 100644
index 13a1d09b71b76..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/timestamp.sql.out
+++ /dev/null
@@ -1,138 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
-
-
--- !query 0
-CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet
--- !query 0 schema
-struct<>
--- !query 0 output
-
-
-
--- !query 1
-INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02')
--- !query 1 schema
-struct<>
--- !query 1 output
-
-
-
--- !query 2
-INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02 03:04:05')
--- !query 2 schema
-struct<>
--- !query 2 output
-
-
-
--- !query 3
-INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01-08')
--- !query 3 schema
-struct<>
--- !query 3 output
-
-
-
--- !query 4
-INSERT INTO TIMESTAMP_TBL VALUES ('2001-09-22T18:19:20')
--- !query 4 schema
-struct<>
--- !query 4 output
-
-
-
--- !query 5
-SELECT '' AS `64`, d1 FROM TIMESTAMP_TBL
--- !query 5 schema
-struct<64:string,d1:timestamp>
--- !query 5 output
-	1997-01-02 00:00:00
-	1997-01-02 03:04:05
-	1997-02-10 17:32:01
-	2001-09-22 18:19:20
-
-
--- !query 6
-SELECT '' AS `48`, d1 FROM TIMESTAMP_TBL
-   WHERE d1 > timestamp '1997-01-02'
--- !query 6 schema
-struct<48:string,d1:timestamp>
--- !query 6 output
-	1997-01-02 03:04:05
-	1997-02-10 17:32:01
-	2001-09-22 18:19:20
-
-
--- !query 7
-SELECT '' AS `15`, d1 FROM TIMESTAMP_TBL
-   WHERE d1 < timestamp '1997-01-02'
--- !query 7 schema
-struct<15:string,d1:timestamp>
--- !query 7 output
-
-
-
--- !query 8
-SELECT '' AS one, d1 FROM TIMESTAMP_TBL
-   WHERE d1 = timestamp '1997-01-02'
--- !query 8 schema
-struct<one:string,d1:timestamp>
--- !query 8 output
-	1997-01-02 00:00:00
-
-
--- !query 9
-SELECT '' AS `63`, d1 FROM TIMESTAMP_TBL
-   WHERE d1 != timestamp '1997-01-02'
--- !query 9 schema
-struct<63:string,d1:timestamp>
--- !query 9 output
-	1997-01-02 03:04:05
-	1997-02-10 17:32:01
-	2001-09-22 18:19:20
-
-
--- !query 10
-SELECT '' AS `16`, d1 FROM TIMESTAMP_TBL
-   WHERE d1 <= timestamp '1997-01-02'
--- !query 10 schema
-struct<16:string,d1:timestamp>
--- !query 10 output
-	1997-01-02 00:00:00
-
-
--- !query 11
-SELECT '' AS `49`, d1 FROM TIMESTAMP_TBL
-   WHERE d1 >= timestamp '1997-01-02'
--- !query 11 schema
-struct<49:string,d1:timestamp>
--- !query 11 output
-	1997-01-02 00:00:00
-	1997-01-02 03:04:05
-	1997-02-10 17:32:01
-	2001-09-22 18:19:20
-
-
--- !query 12
-SELECT '' AS date_trunc_week, date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc
--- !query 12 schema
-struct<date_trunc_week:string,week_trunc:timestamp>
--- !query 12 output
-	2004-02-23 00:00:00
-
-
--- !query 13
-SELECT make_timestamp(2014,12,28,6,30,45.887)
--- !query 13 schema
-struct<make_timestamp(2014, 12, 28, 6, 30, CAST(45.887 AS DECIMAL(8,6))):timestamp>
--- !query 13 output
-2014-12-28 06:30:45.887
-
-
--- !query 14
-DROP TABLE TIMESTAMP_TBL
--- !query 14 schema
-struct<>
--- !query 14 output
-
diff --git a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
index 9a8f783da4369..ac4e71e244bc0 100644
--- a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 32
 
 
--- !query 0
+-- !query
 create temporary view courseSales as select * from values
   ("dotNET", 2012, 10000),
   ("Java", 2012, 20000),
@@ -10,35 +10,35 @@ create temporary view courseSales as select * from values
   ("dotNET", 2013, 48000),
   ("Java", 2013, 30000)
   as courseSales(course, year, earnings)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view years as select * from values
   (2012, 1),
   (2013, 2)
   as years(y, s)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view yearsWithComplexTypes as select * from values
   (2012, array(1, 1), map('1', 1), struct(1, 'a')),
   (2013, array(2, 2), map('2', 2), struct(2, 'b'))
   as yearsWithComplexTypes(y, a, m, s)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -46,27 +46,27 @@ PIVOT (
   sum(earnings)
   FOR course IN ('dotNET', 'Java')
 )
--- !query 3 schema
+-- !query schema
 struct<year:int,dotNET:bigint,Java:bigint>
--- !query 3 output
+-- !query output
 2012	15000	20000
 2013	48000	30000
 
 
--- !query 4
+-- !query
 SELECT * FROM courseSales
 PIVOT (
   sum(earnings)
   FOR year IN (2012, 2013)
 )
--- !query 4 schema
+-- !query schema
 struct<course:string,2012:bigint,2013:bigint>
--- !query 4 output
+-- !query output
 Java	20000	30000
 dotNET	15000	48000
 
 
--- !query 5
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -74,14 +74,14 @@ PIVOT (
   sum(earnings), avg(earnings)
   FOR course IN ('dotNET', 'Java')
 )
--- !query 5 schema
+-- !query schema
 struct<year:int,dotNET_sum(CAST(earnings AS BIGINT)):bigint,dotNET_avg(CAST(earnings AS BIGINT)):double,Java_sum(CAST(earnings AS BIGINT)):bigint,Java_avg(CAST(earnings AS BIGINT)):double>
--- !query 5 output
+-- !query output
 2012	15000	7500.0	20000	20000.0
 2013	48000	48000.0	30000	30000.0
 
 
--- !query 6
+-- !query
 SELECT * FROM (
   SELECT course, earnings FROM courseSales
 )
@@ -89,13 +89,13 @@ PIVOT (
   sum(earnings)
   FOR course IN ('dotNET', 'Java')
 )
--- !query 6 schema
+-- !query schema
 struct<dotNET:bigint,Java:bigint>
--- !query 6 output
+-- !query output
 63000	50000
 
 
--- !query 7
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -103,13 +103,13 @@ PIVOT (
   sum(earnings), min(year)
   FOR course IN ('dotNET', 'Java')
 )
--- !query 7 schema
+-- !query schema
 struct<dotNET_sum(CAST(earnings AS BIGINT)):bigint,dotNET_min(year):int,Java_sum(CAST(earnings AS BIGINT)):bigint,Java_min(year):int>
--- !query 7 output
+-- !query output
 63000	2012	50000	2012
 
 
--- !query 8
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -119,16 +119,16 @@ PIVOT (
   sum(earnings)
   FOR s IN (1, 2)
 )
--- !query 8 schema
+-- !query schema
 struct<course:string,year:int,1:bigint,2:bigint>
--- !query 8 output
+-- !query output
 Java	2012	20000	NULL
 Java	2013	NULL	30000
 dotNET	2012	15000	NULL
 dotNET	2013	NULL	48000
 
 
--- !query 9
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -138,14 +138,14 @@ PIVOT (
   sum(earnings), min(s)
   FOR course IN ('dotNET', 'Java')
 )
--- !query 9 schema
+-- !query schema
 struct<year:int,dotNET_sum(CAST(earnings AS BIGINT)):bigint,dotNET_min(s):int,Java_sum(CAST(earnings AS BIGINT)):bigint,Java_min(s):int>
--- !query 9 output
+-- !query output
 2012	15000	1	20000	1
 2013	48000	2	30000	2
 
 
--- !query 10
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -155,14 +155,14 @@ PIVOT (
   sum(earnings * s)
   FOR course IN ('dotNET', 'Java')
 )
--- !query 10 schema
+-- !query schema
 struct<year:int,dotNET:bigint,Java:bigint>
--- !query 10 output
+-- !query output
 2012	15000	20000
 2013	96000	60000
 
 
--- !query 11
+-- !query
 SELECT 2012_s, 2013_s, 2012_a, 2013_a, c FROM (
   SELECT year y, course c, earnings e FROM courseSales
 )
@@ -170,14 +170,14 @@ PIVOT (
   sum(e) s, avg(e) a
   FOR y IN (2012, 2013)
 )
--- !query 11 schema
+-- !query schema
 struct<2012_s:bigint,2013_s:bigint,2012_a:double,2013_a:double,c:string>
--- !query 11 output
+-- !query output
 15000	48000	7500.0	48000.0	dotNET
 20000	30000	20000.0	30000.0	Java
 
 
--- !query 12
+-- !query
 SELECT firstYear_s, secondYear_s, firstYear_a, secondYear_a, c FROM (
   SELECT year y, course c, earnings e FROM courseSales
 )
@@ -185,27 +185,27 @@ PIVOT (
   sum(e) s, avg(e) a
   FOR y IN (2012 as firstYear, 2013 secondYear)
 )
--- !query 12 schema
+-- !query schema
 struct<firstYear_s:bigint,secondYear_s:bigint,firstYear_a:double,secondYear_a:double,c:string>
--- !query 12 output
+-- !query output
 15000	48000	7500.0	48000.0	dotNET
 20000	30000	20000.0	30000.0	Java
 
 
--- !query 13
+-- !query
 SELECT * FROM courseSales
 PIVOT (
   abs(earnings)
   FOR year IN (2012, 2013)
 )
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function.;
 
 
--- !query 14
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -213,14 +213,14 @@ PIVOT (
   sum(earnings), year
   FOR course IN ('dotNET', 'Java')
 )
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function.;
 
 
--- !query 15
+-- !query
 SELECT * FROM (
   SELECT course, earnings FROM courseSales
 )
@@ -228,14 +228,14 @@ PIVOT (
   sum(earnings)
   FOR year IN (2012, 2013)
 )
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`year`' given input columns: [__auto_generated_subquery_name.course, __auto_generated_subquery_name.earnings]; line 4 pos 0
 
 
--- !query 16
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -243,14 +243,14 @@ PIVOT (
   ceil(sum(earnings)), avg(earnings) + 1 as a1
   FOR course IN ('dotNET', 'Java')
 )
--- !query 16 schema
+-- !query schema
 struct<year:int,dotNET_CEIL(sum(CAST(earnings AS BIGINT))):bigint,dotNET_a1:double,Java_CEIL(sum(CAST(earnings AS BIGINT))):bigint,Java_a1:double>
--- !query 16 output
+-- !query output
 2012	15000	7501.0	20000	20001.0
 2013	48000	48001.0	30000	30001.0
 
 
--- !query 17
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -258,14 +258,14 @@ PIVOT (
   sum(avg(earnings))
   FOR course IN ('dotNET', 'Java')
 )
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
 
 
--- !query 18
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -275,14 +275,14 @@ PIVOT (
   sum(earnings)
   FOR (course, year) IN (('dotNET', 2012), ('Java', 2013))
 )
--- !query 18 schema
+-- !query schema
 struct<s:int,[dotNET, 2012]:bigint,[Java, 2013]:bigint>
--- !query 18 output
+-- !query output
 1	15000	NULL
 2	NULL	30000
 
 
--- !query 19
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -292,14 +292,14 @@ PIVOT (
   sum(earnings)
   FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2)
 )
--- !query 19 schema
+-- !query schema
 struct<year:int,c1:bigint,c2:bigint>
--- !query 19 output
+-- !query output
 2012	NULL	20000
 2013	48000	NULL
 
 
--- !query 20
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -309,40 +309,40 @@ PIVOT (
   sum(earnings)
   FOR (course, year) IN ('dotNET', 'Java')
 )
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct<course:string,year:int>;
 
 
--- !query 21
+-- !query
 SELECT * FROM courseSales
 PIVOT (
   sum(earnings)
   FOR year IN (s, 2013)
 )
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`s`' given input columns: [coursesales.course, coursesales.earnings, coursesales.year]; line 4 pos 15
 
 
--- !query 22
+-- !query
 SELECT * FROM courseSales
 PIVOT (
   sum(earnings)
   FOR year IN (course, 2013)
 )
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Literal expressions required for pivot values, found 'course#x';
 
 
--- !query 23
+-- !query
 SELECT * FROM (
   SELECT course, year, a
   FROM courseSales
@@ -352,14 +352,14 @@ PIVOT (
   min(a)
   FOR course IN ('dotNET', 'Java')
 )
--- !query 23 schema
+-- !query schema
 struct<year:int,dotNET:array<int>,Java:array<int>>
--- !query 23 output
+-- !query output
 2012	[1,1]	[1,1]
 2013	[2,2]	[2,2]
 
 
--- !query 24
+-- !query
 SELECT * FROM (
   SELECT course, year, y, a
   FROM courseSales
@@ -369,14 +369,14 @@ PIVOT (
   max(a)
   FOR (y, course) IN ((2012, 'dotNET'), (2013, 'Java'))
 )
--- !query 24 schema
+-- !query schema
 struct<year:int,[2012, dotNET]:array<int>,[2013, Java]:array<int>>
--- !query 24 output
+-- !query output
 2012	[1,1]	NULL
 2013	NULL	[2,2]
 
 
--- !query 25
+-- !query
 SELECT * FROM (
   SELECT earnings, year, a
   FROM courseSales
@@ -386,14 +386,14 @@ PIVOT (
   sum(earnings)
   FOR a IN (array(1, 1), array(2, 2))
 )
--- !query 25 schema
+-- !query schema
 struct<year:int,[1, 1]:bigint,[2, 2]:bigint>
--- !query 25 output
+-- !query output
 2012	35000	NULL
 2013	NULL	78000
 
 
--- !query 26
+-- !query
 SELECT * FROM (
   SELECT course, earnings, year, a
   FROM courseSales
@@ -403,14 +403,14 @@ PIVOT (
   sum(earnings)
   FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2)))
 )
--- !query 26 schema
+-- !query schema
 struct<year:int,[dotNET, [1, 1]]:bigint,[Java, [2, 2]]:bigint>
--- !query 26 output
+-- !query output
 2012	15000	NULL
 2013	NULL	30000
 
 
--- !query 27
+-- !query
 SELECT * FROM (
   SELECT earnings, year, s
   FROM courseSales
@@ -420,14 +420,14 @@ PIVOT (
   sum(earnings)
   FOR s IN ((1, 'a'), (2, 'b'))
 )
--- !query 27 schema
+-- !query schema
 struct<year:int,[1, a]:bigint,[2, b]:bigint>
--- !query 27 output
+-- !query output
 2012	35000	NULL
 2013	NULL	78000
 
 
--- !query 28
+-- !query
 SELECT * FROM (
   SELECT course, earnings, year, s
   FROM courseSales
@@ -437,14 +437,14 @@ PIVOT (
   sum(earnings)
   FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b')))
 )
--- !query 28 schema
+-- !query schema
 struct<year:int,[dotNET, [1, a]]:bigint,[Java, [2, b]]:bigint>
--- !query 28 output
+-- !query output
 2012	15000	NULL
 2013	NULL	30000
 
 
--- !query 29
+-- !query
 SELECT * FROM (
   SELECT earnings, year, m
   FROM courseSales
@@ -454,14 +454,14 @@ PIVOT (
   sum(earnings)
   FOR m IN (map('1', 1), map('2', 2))
 )
--- !query 29 schema
+-- !query schema
 struct<>
--- !query 29 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid pivot column 'm#x'. Pivot columns must be comparable.;
 
 
--- !query 30
+-- !query
 SELECT * FROM (
   SELECT course, earnings, year, m
   FROM courseSales
@@ -471,14 +471,14 @@ PIVOT (
   sum(earnings)
   FOR (course, m) IN (('dotNET', map('1', 1)), ('Java', map('2', 2)))
 )
--- !query 30 schema
+-- !query schema
 struct<>
--- !query 30 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable.;
 
 
--- !query 31
+-- !query
 SELECT * FROM (
   SELECT course, earnings, "a" as a, "z" as z, "b" as b, "y" as y, "c" as c, "x" as x, "d" as d, "w" as w
   FROM courseSales
@@ -487,7 +487,7 @@ PIVOT (
   sum(Earnings)
   FOR Course IN ('dotNET', 'Java')
 )
--- !query 31 schema
+-- !query schema
 struct<a:string,z:string,b:string,y:string,c:string,x:string,d:string,w:string,dotNET:bigint,Java:bigint>
--- !query 31 output
+-- !query output
 a	z	b	y	c	x	d	w	63000	50000
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
similarity index 73%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part1.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
index 29bafb42f579e..5efb58c7fc1b0 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
@@ -2,330 +2,330 @@
 -- Number of queries: 44
 
 
--- !query 0
+-- !query
 SELECT avg(four) AS avg_1 FROM onek
--- !query 0 schema
+-- !query schema
 struct<avg_1:double>
--- !query 0 output
+-- !query output
 1.5
 
 
--- !query 1
+-- !query
 SELECT avg(a) AS avg_32 FROM aggtest WHERE a < 100
--- !query 1 schema
+-- !query schema
 struct<avg_32:double>
--- !query 1 output
+-- !query output
 32.666666666666664
 
 
--- !query 2
+-- !query
 select CAST(avg(b) AS Decimal(10,3)) AS avg_107_943 FROM aggtest
--- !query 2 schema
+-- !query schema
 struct<avg_107_943:decimal(10,3)>
--- !query 2 output
+-- !query output
 107.943
 
 
--- !query 3
+-- !query
 SELECT sum(four) AS sum_1500 FROM onek
--- !query 3 schema
+-- !query schema
 struct<sum_1500:bigint>
--- !query 3 output
+-- !query output
 1500
 
 
--- !query 4
+-- !query
 SELECT sum(a) AS sum_198 FROM aggtest
--- !query 4 schema
+-- !query schema
 struct<sum_198:bigint>
--- !query 4 output
+-- !query output
 198
 
 
--- !query 5
+-- !query
 SELECT sum(b) AS avg_431_773 FROM aggtest
--- !query 5 schema
+-- !query schema
 struct<avg_431_773:double>
--- !query 5 output
+-- !query output
 431.77260909229517
 
 
--- !query 6
+-- !query
 SELECT max(four) AS max_3 FROM onek
--- !query 6 schema
+-- !query schema
 struct<max_3:int>
--- !query 6 output
+-- !query output
 3
 
 
--- !query 7
+-- !query
 SELECT max(a) AS max_100 FROM aggtest
--- !query 7 schema
+-- !query schema
 struct<max_100:int>
--- !query 7 output
+-- !query output
 100
 
 
--- !query 8
+-- !query
 SELECT max(aggtest.b) AS max_324_78 FROM aggtest
--- !query 8 schema
+-- !query schema
 struct<max_324_78:float>
--- !query 8 output
+-- !query output
 324.78
 
 
--- !query 9
+-- !query
 SELECT stddev_pop(b) FROM aggtest
--- !query 9 schema
+-- !query schema
 struct<stddev_pop(CAST(b AS DOUBLE)):double>
--- !query 9 output
+-- !query output
 131.10703231895047
 
 
--- !query 10
+-- !query
 SELECT stddev_samp(b) FROM aggtest
--- !query 10 schema
+-- !query schema
 struct<stddev_samp(CAST(b AS DOUBLE)):double>
--- !query 10 output
+-- !query output
 151.38936080399804
 
 
--- !query 11
+-- !query
 SELECT var_pop(b) FROM aggtest
--- !query 11 schema
+-- !query schema
 struct<var_pop(CAST(b AS DOUBLE)):double>
--- !query 11 output
+-- !query output
 17189.053923482323
 
 
--- !query 12
+-- !query
 SELECT var_samp(b) FROM aggtest
--- !query 12 schema
+-- !query schema
 struct<var_samp(CAST(b AS DOUBLE)):double>
--- !query 12 output
+-- !query output
 22918.738564643096
 
 
--- !query 13
+-- !query
 SELECT stddev_pop(CAST(b AS Decimal(38,0))) FROM aggtest
--- !query 13 schema
+-- !query schema
 struct<stddev_pop(CAST(CAST(b AS DECIMAL(38,0)) AS DOUBLE)):double>
--- !query 13 output
+-- !query output
 131.18117242958306
 
 
--- !query 14
+-- !query
 SELECT stddev_samp(CAST(b AS Decimal(38,0))) FROM aggtest
--- !query 14 schema
+-- !query schema
 struct<stddev_samp(CAST(CAST(b AS DECIMAL(38,0)) AS DOUBLE)):double>
--- !query 14 output
+-- !query output
 151.47497042966097
 
 
--- !query 15
+-- !query
 SELECT var_pop(CAST(b AS Decimal(38,0))) FROM aggtest
--- !query 15 schema
+-- !query schema
 struct<var_pop(CAST(CAST(b AS DECIMAL(38,0)) AS DOUBLE)):double>
--- !query 15 output
+-- !query output
 17208.5
 
 
--- !query 16
+-- !query
 SELECT var_samp(CAST(b AS Decimal(38,0))) FROM aggtest
--- !query 16 schema
+-- !query schema
 struct<var_samp(CAST(CAST(b AS DECIMAL(38,0)) AS DOUBLE)):double>
--- !query 16 output
+-- !query output
 22944.666666666668
 
 
--- !query 17
+-- !query
 SELECT var_pop(1.0), var_samp(2.0)
--- !query 17 schema
+-- !query schema
 struct<var_pop(CAST(1.0 AS DOUBLE)):double,var_samp(CAST(2.0 AS DOUBLE)):double>
--- !query 17 output
+-- !query output
 0.0	NaN
 
 
--- !query 18
+-- !query
 SELECT stddev_pop(CAST(3.0 AS Decimal(38,0))), stddev_samp(CAST(4.0 AS Decimal(38,0)))
--- !query 18 schema
+-- !query schema
 struct<stddev_pop(CAST(CAST(3.0 AS DECIMAL(38,0)) AS DOUBLE)):double,stddev_samp(CAST(CAST(4.0 AS DECIMAL(38,0)) AS DOUBLE)):double>
--- !query 18 output
+-- !query output
 0.0	NaN
 
 
--- !query 19
+-- !query
 select sum(CAST(null AS int)) from range(1,4)
--- !query 19 schema
+-- !query schema
 struct<sum(CAST(NULL AS INT)):bigint>
--- !query 19 output
+-- !query output
 NULL
 
 
--- !query 20
+-- !query
 select sum(CAST(null AS long)) from range(1,4)
--- !query 20 schema
+-- !query schema
 struct<sum(CAST(NULL AS BIGINT)):bigint>
--- !query 20 output
+-- !query output
 NULL
 
 
--- !query 21
+-- !query
 select sum(CAST(null AS Decimal(38,0))) from range(1,4)
--- !query 21 schema
+-- !query schema
 struct<sum(CAST(NULL AS DECIMAL(38,0))):decimal(38,0)>
--- !query 21 output
+-- !query output
 NULL
 
 
--- !query 22
+-- !query
 select sum(CAST(null AS DOUBLE)) from range(1,4)
--- !query 22 schema
+-- !query schema
 struct<sum(CAST(NULL AS DOUBLE)):double>
--- !query 22 output
+-- !query output
 NULL
 
 
--- !query 23
+-- !query
 select avg(CAST(null AS int)) from range(1,4)
--- !query 23 schema
+-- !query schema
 struct<avg(CAST(NULL AS INT)):double>
--- !query 23 output
+-- !query output
 NULL
 
 
--- !query 24
+-- !query
 select avg(CAST(null AS long)) from range(1,4)
--- !query 24 schema
+-- !query schema
 struct<avg(CAST(NULL AS BIGINT)):double>
--- !query 24 output
+-- !query output
 NULL
 
 
--- !query 25
+-- !query
 select avg(CAST(null AS Decimal(38,0))) from range(1,4)
--- !query 25 schema
+-- !query schema
 struct<avg(CAST(NULL AS DECIMAL(38,0))):decimal(38,4)>
--- !query 25 output
+-- !query output
 NULL
 
 
--- !query 26
+-- !query
 select avg(CAST(null AS DOUBLE)) from range(1,4)
--- !query 26 schema
+-- !query schema
 struct<avg(CAST(NULL AS DOUBLE)):double>
--- !query 26 output
+-- !query output
 NULL
 
 
--- !query 27
+-- !query
 select sum(CAST('NaN' AS DOUBLE)) from range(1,4)
--- !query 27 schema
+-- !query schema
 struct<sum(CAST(NaN AS DOUBLE)):double>
--- !query 27 output
+-- !query output
 NaN
 
 
--- !query 28
+-- !query
 select avg(CAST('NaN' AS DOUBLE)) from range(1,4)
--- !query 28 schema
+-- !query schema
 struct<avg(CAST(NaN AS DOUBLE)):double>
--- !query 28 output
+-- !query output
 NaN
 
 
--- !query 29
+-- !query
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
 FROM (VALUES (CAST('1' AS DOUBLE)), (CAST('infinity' AS DOUBLE))) v(x)
--- !query 29 schema
+-- !query schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
--- !query 29 output
+-- !query output
 Infinity	NaN
 
 
--- !query 30
+-- !query
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
 FROM (VALUES ('infinity'), ('1')) v(x)
--- !query 30 schema
+-- !query schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
--- !query 30 output
+-- !query output
 Infinity	NaN
 
 
--- !query 31
+-- !query
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
 FROM (VALUES ('infinity'), ('infinity')) v(x)
--- !query 31 schema
+-- !query schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
--- !query 31 output
+-- !query output
 Infinity	NaN
 
 
--- !query 32
+-- !query
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
 FROM (VALUES ('-infinity'), ('infinity')) v(x)
--- !query 32 schema
+-- !query schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
--- !query 32 output
+-- !query output
 NaN	NaN
 
 
--- !query 33
+-- !query
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
 FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x)
--- !query 33 schema
+-- !query schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
--- !query 33 output
+-- !query output
 1.00000005E8	2.5
 
 
--- !query 34
+-- !query
 SELECT avg(CAST(x AS DOUBLE)), var_pop(CAST(x AS DOUBLE))
 FROM (VALUES (7000000000005), (7000000000007)) v(x)
--- !query 34 schema
+-- !query schema
 struct<avg(CAST(x AS DOUBLE)):double,var_pop(CAST(x AS DOUBLE)):double>
--- !query 34 output
+-- !query output
 7.000000000006E12	1.0
 
 
--- !query 35
+-- !query
 SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest
--- !query 35 schema
+-- !query schema
 struct<covar_pop(CAST(b AS DOUBLE), CAST(a AS DOUBLE)):double,covar_samp(CAST(b AS DOUBLE), CAST(a AS DOUBLE)):double>
--- !query 35 output
+-- !query output
 653.6289553875104	871.5052738500139
 
 
--- !query 36
+-- !query
 SELECT corr(b, a) FROM aggtest
--- !query 36 schema
+-- !query schema
 struct<corr(CAST(b AS DOUBLE), CAST(a AS DOUBLE)):double>
--- !query 36 output
+-- !query output
 0.1396345165178734
 
 
--- !query 37
+-- !query
 SELECT count(four) AS cnt_1000 FROM onek
--- !query 37 schema
+-- !query schema
 struct<cnt_1000:bigint>
--- !query 37 output
+-- !query output
 1000
 
 
--- !query 38
+-- !query
 SELECT count(DISTINCT four) AS cnt_4 FROM onek
--- !query 38 schema
+-- !query schema
 struct<cnt_4:bigint>
--- !query 38 output
+-- !query output
 4
 
 
--- !query 39
+-- !query
 select ten, count(*), sum(four) from onek
 group by ten order by ten
--- !query 39 schema
+-- !query schema
 struct<ten:int,count(1):bigint,sum(four):bigint>
--- !query 39 output
+-- !query output
 0	100	100
 1	100	200
 2	100	100
@@ -338,12 +338,12 @@ struct<ten:int,count(1):bigint,sum(four):bigint>
 9	100	200
 
 
--- !query 40
+-- !query
 select ten, count(four), sum(DISTINCT four) from onek
 group by ten order by ten
--- !query 40 schema
+-- !query schema
 struct<ten:int,count(four):bigint,sum(DISTINCT four):bigint>
--- !query 40 output
+-- !query output
 0	100	2
 1	100	4
 2	100	2
@@ -356,13 +356,13 @@ struct<ten:int,count(four):bigint,sum(DISTINCT four):bigint>
 9	100	4
 
 
--- !query 41
+-- !query
 select ten, sum(distinct four) from onek a
 group by ten
 having exists (select 1 from onek b where sum(distinct a.four) = b.four)
--- !query 41 schema
+-- !query schema
 struct<ten:int,sum(DISTINCT four):bigint>
--- !query 41 output
+-- !query output
 0	2
 2	2
 4	2
@@ -370,14 +370,14 @@ struct<ten:int,sum(DISTINCT four):bigint>
 8	2
 
 
--- !query 42
+-- !query
 select ten, sum(distinct four) from onek a
 group by ten
 having exists (select 1 from onek b
                where sum(distinct a.four + b.four) = b.four)
--- !query 42 schema
+-- !query schema
 struct<>
--- !query 42 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
@@ -385,12 +385,12 @@ Expression in where clause: [(sum(DISTINCT CAST((outer() + b.`four`) AS BIGINT))
 Invalid expressions: [sum(DISTINCT CAST((outer() + b.`four`) AS BIGINT))];
 
 
--- !query 43
+-- !query
 select
   (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
 from tenk1 o
--- !query 43 schema
+-- !query schema
 struct<>
--- !query 43 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`o.unique1`' given input columns: [i.even, i.fivethous, i.four, i.hundred, i.odd, i.string4, i.stringu1, i.stringu2, i.ten, i.tenthous, i.thousand, i.twenty, i.two, i.twothousand, i.unique1, i.unique2]; line 2 pos 63
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out
new file mode 100644
index 0000000000000..6633bf5d114ed
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out
@@ -0,0 +1,303 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 29
+
+
+-- !query
+create temporary view int4_tbl as select * from values
+  (0),
+  (123456),
+  (-123456),
+  (2147483647),
+  (-2147483647)
+  as int4_tbl(f1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
+  (1, 1, 1, 1L),
+  (3, 3, 3, null),
+  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT BIT_AND(b1) AS n1, BIT_OR(b2)  AS n2 FROM bitwise_test where 1 = 0
+-- !query schema
+struct<n1:int,n2:int>
+-- !query output
+NULL	NULL
+
+
+-- !query
+SELECT BIT_AND(b4) AS n1, BIT_OR(b4)  AS n2 FROM bitwise_test where b4 is null
+-- !query schema
+struct<n1:bigint,n2:bigint>
+-- !query output
+NULL	NULL
+
+
+-- !query
+SELECT
+ BIT_AND(cast(b1 as tinyint)) AS a1,
+ BIT_AND(cast(b2 as smallint)) AS b1,
+ BIT_AND(b3) AS c1,
+ BIT_AND(b4) AS d1,
+ BIT_OR(cast(b1 as tinyint))  AS e7,
+ BIT_OR(cast(b2 as smallint))  AS f7,
+ BIT_OR(b3)  AS g7,
+ BIT_OR(b4)  AS h3
+FROM bitwise_test
+-- !query schema
+struct<a1:tinyint,b1:smallint,c1:int,d1:bigint,e7:tinyint,f7:smallint,g7:int,h3:bigint>
+-- !query output
+1	1	1	1	7	7	7	3
+
+
+-- !query
+SELECT b1 , bit_and(b2), bit_or(b4) FROM bitwise_test GROUP BY b1
+-- !query schema
+struct<b1:int,bit_and(b2):int,bit_or(b4):bigint>
+-- !query output
+1	1	1
+3	3	NULL
+7	7	3
+
+
+-- !query
+SELECT b1, bit_and(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7
+-- !query schema
+struct<b1:int,bit_and(b2):int>
+-- !query output
+1	1
+3	3
+
+
+-- !query
+SELECT b1, b2, bit_and(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test
+-- !query schema
+struct<b1:int,b2:int,bit_and(b2) OVER (PARTITION BY b1 ORDER BY b2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
+-- !query output
+1	1	1
+3	3	3
+7	7	7
+
+
+-- !query
+SELECT b1, b2, bit_or(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test
+-- !query schema
+struct<b1:int,b2:int,bit_or(b2) OVER (PARTITION BY b1 ORDER BY b2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
+-- !query output
+1	1	1
+3	3	3
+7	7	7
+
+
+-- !query
+SELECT
+  (NULL AND NULL) IS NULL AS `t`,
+  (TRUE AND NULL) IS NULL AS `t`,
+  (FALSE AND NULL) IS NULL AS `t`,
+  (NULL AND TRUE) IS NULL AS `t`,
+  (NULL AND FALSE) IS NULL AS `t`,
+  (TRUE AND TRUE) AS `t`,
+  NOT (TRUE AND FALSE) AS `t`,
+  NOT (FALSE AND TRUE) AS `t`,
+  NOT (FALSE AND FALSE) AS `t`
+-- !query schema
+struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
+-- !query output
+true	true	false	true	false	true	true	true	true
+
+
+-- !query
+SELECT
+  (NULL OR NULL) IS NULL AS `t`,
+  (TRUE OR NULL) IS NULL AS `t`,
+  (FALSE OR NULL) IS NULL AS `t`,
+  (NULL OR TRUE) IS NULL AS `t`,
+  (NULL OR FALSE) IS NULL AS `t`,
+  (TRUE OR TRUE) AS `t`,
+  (TRUE OR FALSE) AS `t`,
+  (FALSE OR TRUE) AS `t`,
+  NOT (FALSE OR FALSE) AS `t`
+-- !query schema
+struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
+-- !query output
+true	false	true	false	true	true	true	true	true
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES
+  (TRUE, null, FALSE, null),
+  (FALSE, TRUE, null, null),
+  (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0
+-- !query schema
+struct<n1:boolean,n2:boolean>
+-- !query output
+NULL	NULL
+
+
+-- !query
+SELECT
+  BOOL_AND(b1)     AS f1,
+  BOOL_AND(b2)     AS t2,
+  BOOL_AND(b3)     AS f3,
+  BOOL_AND(b4)     AS n4,
+  BOOL_AND(NOT b2) AS f5,
+  BOOL_AND(NOT b3) AS t6
+FROM bool_test
+-- !query schema
+struct<f1:boolean,t2:boolean,f3:boolean,n4:boolean,f5:boolean,t6:boolean>
+-- !query output
+false	true	false	NULL	false	true
+
+
+-- !query
+SELECT
+  EVERY(b1)     AS f1,
+  EVERY(b2)     AS t2,
+  EVERY(b3)     AS f3,
+  EVERY(b4)     AS n4,
+  EVERY(NOT b2) AS f5,
+  EVERY(NOT b3) AS t6
+FROM bool_test
+-- !query schema
+struct<f1:boolean,t2:boolean,f3:boolean,n4:boolean,f5:boolean,t6:boolean>
+-- !query output
+false	true	false	NULL	false	true
+
+
+-- !query
+SELECT
+  BOOL_OR(b1)      AS t1,
+  BOOL_OR(b2)      AS t2,
+  BOOL_OR(b3)      AS f3,
+  BOOL_OR(b4)      AS n4,
+  BOOL_OR(NOT b2)  AS f5,
+  BOOL_OR(NOT b3)  AS t6
+FROM bool_test
+-- !query schema
+struct<t1:boolean,t2:boolean,f3:boolean,n4:boolean,f5:boolean,t6:boolean>
+-- !query output
+true	true	false	NULL	false	true
+
+
+-- !query
+select min(unique1) from tenk1
+-- !query schema
+struct<min(unique1):int>
+-- !query output
+0
+
+
+-- !query
+select max(unique1) from tenk1
+-- !query schema
+struct<max(unique1):int>
+-- !query output
+9999
+
+
+-- !query
+select max(unique1) from tenk1 where unique1 < 42
+-- !query schema
+struct<max(unique1):int>
+-- !query output
+41
+
+
+-- !query
+select max(unique1) from tenk1 where unique1 > 42
+-- !query schema
+struct<max(unique1):int>
+-- !query output
+9999
+
+
+-- !query
+select max(unique1) from tenk1 where unique1 > 42000
+-- !query schema
+struct<max(unique1):int>
+-- !query output
+NULL
+
+
+-- !query
+select max(tenthous) from tenk1 where thousand = 33
+-- !query schema
+struct<max(tenthous):int>
+-- !query output
+9033
+
+
+-- !query
+select min(tenthous) from tenk1 where thousand = 33
+-- !query schema
+struct<min(tenthous):int>
+-- !query output
+33
+
+
+-- !query
+select distinct max(unique2) from tenk1
+-- !query schema
+struct<max(unique2):int>
+-- !query output
+9999
+
+
+-- !query
+select max(unique2) from tenk1 order by 1
+-- !query schema
+struct<max(unique2):int>
+-- !query output
+9999
+
+
+-- !query
+select max(unique2) from tenk1 order by max(unique2)
+-- !query schema
+struct<max(unique2):int>
+-- !query output
+9999
+
+
+-- !query
+select max(unique2) from tenk1 order by max(unique2)+1
+-- !query schema
+struct<max(unique2):int>
+-- !query output
+9999
+
+
+-- !query
+select t1.max_unique2, g from (select max(unique2) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc
+-- !query schema
+struct<max_unique2:int,g:int>
+-- !query output
+9999	3
+9999	2
+9999	1
+
+
+-- !query
+select max(100) from tenk1
+-- !query schema
+struct<max(100):int>
+-- !query output
+100
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
new file mode 100644
index 0000000000000..69f96b02782e3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
@@ -0,0 +1,38 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 4
+
+
+-- !query
+select max(min(unique1)) from tenk1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
+
+
+-- !query
+select min(unique1) filter (where unique1 > 100) from tenk1
+-- !query schema
+struct<min(unique1) FILTER (WHERE (unique1 > 100)):int>
+-- !query output
+101
+
+
+-- !query
+select sum(1/ten) filter (where ten > 0) from tenk1
+-- !query schema
+struct<sum((CAST(1 AS DOUBLE) / CAST(ten AS DOUBLE))) FILTER (WHERE (ten > 0)):double>
+-- !query output
+2828.9682539682954
+
+
+-- !query
+select (select count(*)
+        from (values (1)) t0(inner_c))
+from (values (2),(3)) t1(outer_c)
+-- !query schema
+struct<scalarsubquery():bigint>
+-- !query output
+1
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part4.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
similarity index 63%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
index 203806d43368a..0347e0dc7853b 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
@@ -2,475 +2,475 @@
 -- Number of queries: 92
 
 
--- !query 0
+-- !query
 SELECT 1 AS one
--- !query 0 schema
+-- !query schema
 struct<one:int>
--- !query 0 output
+-- !query output
 1
 
 
--- !query 1
+-- !query
 SELECT true AS true
--- !query 1 schema
+-- !query schema
 struct<true:boolean>
--- !query 1 output
+-- !query output
 true
 
 
--- !query 2
+-- !query
 SELECT false AS `false`
--- !query 2 schema
+-- !query schema
 struct<false:boolean>
--- !query 2 output
+-- !query output
 false
 
 
--- !query 3
+-- !query
 SELECT boolean('t') AS true
--- !query 3 schema
+-- !query schema
 struct<true:boolean>
--- !query 3 output
+-- !query output
 true
 
 
--- !query 4
+-- !query
 SELECT boolean('   f           ') AS `false`
--- !query 4 schema
+-- !query schema
 struct<false:boolean>
--- !query 4 output
+-- !query output
 false
 
 
--- !query 5
+-- !query
 SELECT boolean('true') AS true
--- !query 5 schema
+-- !query schema
 struct<true:boolean>
--- !query 5 output
+-- !query output
 true
 
 
--- !query 6
+-- !query
 SELECT boolean('test') AS error
--- !query 6 schema
+-- !query schema
 struct<error:boolean>
--- !query 6 output
+-- !query output
 NULL
 
 
--- !query 7
+-- !query
 SELECT boolean('false') AS `false`
--- !query 7 schema
+-- !query schema
 struct<false:boolean>
--- !query 7 output
+-- !query output
 false
 
 
--- !query 8
+-- !query
 SELECT boolean('foo') AS error
--- !query 8 schema
+-- !query schema
 struct<error:boolean>
--- !query 8 output
+-- !query output
 NULL
 
 
--- !query 9
+-- !query
 SELECT boolean('y') AS true
--- !query 9 schema
+-- !query schema
 struct<true:boolean>
--- !query 9 output
+-- !query output
 true
 
 
--- !query 10
+-- !query
 SELECT boolean('yes') AS true
--- !query 10 schema
+-- !query schema
 struct<true:boolean>
--- !query 10 output
+-- !query output
 true
 
 
--- !query 11
+-- !query
 SELECT boolean('yeah') AS error
--- !query 11 schema
+-- !query schema
 struct<error:boolean>
--- !query 11 output
+-- !query output
 NULL
 
 
--- !query 12
+-- !query
 SELECT boolean('n') AS `false`
--- !query 12 schema
+-- !query schema
 struct<false:boolean>
--- !query 12 output
+-- !query output
 false
 
 
--- !query 13
+-- !query
 SELECT boolean('no') AS `false`
--- !query 13 schema
+-- !query schema
 struct<false:boolean>
--- !query 13 output
+-- !query output
 false
 
 
--- !query 14
+-- !query
 SELECT boolean('nay') AS error
--- !query 14 schema
+-- !query schema
 struct<error:boolean>
--- !query 14 output
+-- !query output
 NULL
 
 
--- !query 15
+-- !query
 SELECT boolean('on') AS true
--- !query 15 schema
+-- !query schema
 struct<true:boolean>
--- !query 15 output
-true
+-- !query output
+NULL
 
 
--- !query 16
+-- !query
 SELECT boolean('off') AS `false`
--- !query 16 schema
+-- !query schema
 struct<false:boolean>
--- !query 16 output
-false
+-- !query output
+NULL
 
 
--- !query 17
+-- !query
 SELECT boolean('of') AS `false`
--- !query 17 schema
+-- !query schema
 struct<false:boolean>
--- !query 17 output
-false
+-- !query output
+NULL
 
 
--- !query 18
+-- !query
 SELECT boolean('o') AS error
--- !query 18 schema
+-- !query schema
 struct<error:boolean>
--- !query 18 output
+-- !query output
 NULL
 
 
--- !query 19
+-- !query
 SELECT boolean('on_') AS error
--- !query 19 schema
+-- !query schema
 struct<error:boolean>
--- !query 19 output
+-- !query output
 NULL
 
 
--- !query 20
+-- !query
 SELECT boolean('off_') AS error
--- !query 20 schema
+-- !query schema
 struct<error:boolean>
--- !query 20 output
+-- !query output
 NULL
 
 
--- !query 21
+-- !query
 SELECT boolean('1') AS true
--- !query 21 schema
+-- !query schema
 struct<true:boolean>
--- !query 21 output
+-- !query output
 true
 
 
--- !query 22
+-- !query
 SELECT boolean('11') AS error
--- !query 22 schema
+-- !query schema
 struct<error:boolean>
--- !query 22 output
+-- !query output
 NULL
 
 
--- !query 23
+-- !query
 SELECT boolean('0') AS `false`
--- !query 23 schema
+-- !query schema
 struct<false:boolean>
--- !query 23 output
+-- !query output
 false
 
 
--- !query 24
+-- !query
 SELECT boolean('000') AS error
--- !query 24 schema
+-- !query schema
 struct<error:boolean>
--- !query 24 output
+-- !query output
 NULL
 
 
--- !query 25
+-- !query
 SELECT boolean('') AS error
--- !query 25 schema
+-- !query schema
 struct<error:boolean>
--- !query 25 output
+-- !query output
 NULL
 
 
--- !query 26
+-- !query
 SELECT boolean('t') or boolean('f') AS true
--- !query 26 schema
+-- !query schema
 struct<true:boolean>
--- !query 26 output
+-- !query output
 true
 
 
--- !query 27
+-- !query
 SELECT boolean('t') and boolean('f') AS `false`
--- !query 27 schema
+-- !query schema
 struct<false:boolean>
--- !query 27 output
+-- !query output
 false
 
 
--- !query 28
+-- !query
 SELECT not boolean('f') AS true
--- !query 28 schema
+-- !query schema
 struct<true:boolean>
--- !query 28 output
+-- !query output
 true
 
 
--- !query 29
+-- !query
 SELECT boolean('t') = boolean('f') AS `false`
--- !query 29 schema
+-- !query schema
 struct<false:boolean>
--- !query 29 output
+-- !query output
 false
 
 
--- !query 30
+-- !query
 SELECT boolean('t') <> boolean('f') AS true
--- !query 30 schema
+-- !query schema
 struct<true:boolean>
--- !query 30 output
+-- !query output
 true
 
 
--- !query 31
+-- !query
 SELECT boolean('t') > boolean('f') AS true
--- !query 31 schema
+-- !query schema
 struct<true:boolean>
--- !query 31 output
+-- !query output
 true
 
 
--- !query 32
+-- !query
 SELECT boolean('t') >= boolean('f') AS true
--- !query 32 schema
+-- !query schema
 struct<true:boolean>
--- !query 32 output
+-- !query output
 true
 
 
--- !query 33
+-- !query
 SELECT boolean('f') < boolean('t') AS true
--- !query 33 schema
+-- !query schema
 struct<true:boolean>
--- !query 33 output
+-- !query output
 true
 
 
--- !query 34
+-- !query
 SELECT boolean('f') <= boolean('t') AS true
--- !query 34 schema
+-- !query schema
 struct<true:boolean>
--- !query 34 output
+-- !query output
 true
 
 
--- !query 35
+-- !query
 SELECT boolean(string('TrUe')) AS true, boolean(string('fAlse')) AS `false`
--- !query 35 schema
+-- !query schema
 struct<true:boolean,false:boolean>
--- !query 35 output
+-- !query output
 true	false
 
 
--- !query 36
+-- !query
 SELECT boolean(string('    true   ')) AS true,
        boolean(string('     FALSE')) AS `false`
--- !query 36 schema
+-- !query schema
 struct<true:boolean,false:boolean>
--- !query 36 output
+-- !query output
 true	false
 
 
--- !query 37
+-- !query
 SELECT string(boolean(true)) AS true, string(boolean(false)) AS `false`
--- !query 37 schema
+-- !query schema
 struct<true:string,false:string>
--- !query 37 output
+-- !query output
 true	false
 
 
--- !query 38
+-- !query
 SELECT boolean(string('  tru e ')) AS invalid
--- !query 38 schema
+-- !query schema
 struct<invalid:boolean>
--- !query 38 output
+-- !query output
 NULL
 
 
--- !query 39
+-- !query
 SELECT boolean(string('')) AS invalid
--- !query 39 schema
+-- !query schema
 struct<invalid:boolean>
--- !query 39 output
+-- !query output
 NULL
 
 
--- !query 40
+-- !query
 CREATE TABLE BOOLTBL1 (f1 boolean) USING parquet
--- !query 40 schema
+-- !query schema
 struct<>
--- !query 40 output
+-- !query output
 
 
 
--- !query 41
+-- !query
 INSERT INTO BOOLTBL1 VALUES (cast('t' as boolean))
--- !query 41 schema
+-- !query schema
 struct<>
--- !query 41 output
+-- !query output
 
 
 
--- !query 42
+-- !query
 INSERT INTO BOOLTBL1 VALUES (cast('True' as boolean))
--- !query 42 schema
+-- !query schema
 struct<>
--- !query 42 output
+-- !query output
 
 
 
--- !query 43
+-- !query
 INSERT INTO BOOLTBL1 VALUES (cast('true' as boolean))
--- !query 43 schema
+-- !query schema
 struct<>
--- !query 43 output
+-- !query output
 
 
 
--- !query 44
+-- !query
 SELECT '' AS t_3, BOOLTBL1.* FROM BOOLTBL1
--- !query 44 schema
+-- !query schema
 struct<t_3:string,f1:boolean>
--- !query 44 output
+-- !query output
 	true
 	true
 	true
 
 
--- !query 45
+-- !query
 SELECT '' AS t_3, BOOLTBL1.*
    FROM BOOLTBL1
    WHERE f1 = boolean('true')
--- !query 45 schema
+-- !query schema
 struct<t_3:string,f1:boolean>
--- !query 45 output
+-- !query output
 	true
 	true
 	true
 
 
--- !query 46
+-- !query
 SELECT '' AS t_3, BOOLTBL1.*
    FROM BOOLTBL1
    WHERE f1 <> boolean('false')
--- !query 46 schema
+-- !query schema
 struct<t_3:string,f1:boolean>
--- !query 46 output
+-- !query output
 	true
 	true
 	true
 
 
--- !query 47
+-- !query
 SELECT '' AS zero, BOOLTBL1.*
    FROM BOOLTBL1
    WHERE booleq(boolean('false'), f1)
--- !query 47 schema
+-- !query schema
 struct<zero:string,f1:boolean>
--- !query 47 output
+-- !query output
 
 
 
--- !query 48
+-- !query
 INSERT INTO BOOLTBL1 VALUES (boolean('f'))
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 
 
 
--- !query 49
+-- !query
 SELECT '' AS f_1, BOOLTBL1.*
    FROM BOOLTBL1
    WHERE f1 = boolean('false')
--- !query 49 schema
+-- !query schema
 struct<f_1:string,f1:boolean>
--- !query 49 output
+-- !query output
 	false
 
 
--- !query 50
+-- !query
 CREATE TABLE BOOLTBL2 (f1 boolean) USING parquet
--- !query 50 schema
+-- !query schema
 struct<>
--- !query 50 output
+-- !query output
 
 
 
--- !query 51
+-- !query
 INSERT INTO BOOLTBL2 VALUES (boolean('f'))
--- !query 51 schema
+-- !query schema
 struct<>
--- !query 51 output
+-- !query output
 
 
 
--- !query 52
+-- !query
 INSERT INTO BOOLTBL2 VALUES (boolean('false'))
--- !query 52 schema
+-- !query schema
 struct<>
--- !query 52 output
+-- !query output
 
 
 
--- !query 53
+-- !query
 INSERT INTO BOOLTBL2 VALUES (boolean('False'))
--- !query 53 schema
+-- !query schema
 struct<>
--- !query 53 output
+-- !query output
 
 
 
--- !query 54
+-- !query
 INSERT INTO BOOLTBL2 VALUES (boolean('FALSE'))
--- !query 54 schema
+-- !query schema
 struct<>
--- !query 54 output
+-- !query output
 
 
 
--- !query 55
+-- !query
 INSERT INTO BOOLTBL2
    VALUES (boolean('XXX'))
--- !query 55 schema
+-- !query schema
 struct<>
--- !query 55 output
+-- !query output
 
 
 
--- !query 56
+-- !query
 SELECT '' AS f_4, BOOLTBL2.* FROM BOOLTBL2
--- !query 56 schema
+-- !query schema
 struct<f_4:string,f1:boolean>
--- !query 56 output
+-- !query output
 	NULL
 	false
 	false
@@ -478,13 +478,13 @@ struct<f_4:string,f1:boolean>
 	false
 
 
--- !query 57
+-- !query
 SELECT '' AS tf_12, BOOLTBL1.*, BOOLTBL2.*
    FROM BOOLTBL1, BOOLTBL2
    WHERE BOOLTBL2.f1 <> BOOLTBL1.f1
--- !query 57 schema
+-- !query schema
 struct<tf_12:string,f1:boolean,f1:boolean>
--- !query 57 output
+-- !query output
 	true	false
 	true	false
 	true	false
@@ -499,13 +499,13 @@ struct<tf_12:string,f1:boolean,f1:boolean>
 	true	false
 
 
--- !query 58
+-- !query
 SELECT '' AS tf_12, BOOLTBL1.*, BOOLTBL2.*
    FROM BOOLTBL1, BOOLTBL2
    WHERE boolne(BOOLTBL2.f1,BOOLTBL1.f1)
--- !query 58 schema
+-- !query schema
 struct<tf_12:string,f1:boolean,f1:boolean>
--- !query 58 output
+-- !query output
 	true	false
 	true	false
 	true	false
@@ -520,27 +520,27 @@ struct<tf_12:string,f1:boolean,f1:boolean>
 	true	false
 
 
--- !query 59
+-- !query
 SELECT '' AS ff_4, BOOLTBL1.*, BOOLTBL2.*
    FROM BOOLTBL1, BOOLTBL2
    WHERE BOOLTBL2.f1 = BOOLTBL1.f1 and BOOLTBL1.f1 = boolean('false')
--- !query 59 schema
+-- !query schema
 struct<ff_4:string,f1:boolean,f1:boolean>
--- !query 59 output
+-- !query output
 	false	false
 	false	false
 	false	false
 	false	false
 
 
--- !query 60
+-- !query
 SELECT '' AS tf_12_ff_4, BOOLTBL1.*, BOOLTBL2.*
    FROM BOOLTBL1, BOOLTBL2
    WHERE BOOLTBL2.f1 = BOOLTBL1.f1 or BOOLTBL1.f1 = boolean('true')
    ORDER BY BOOLTBL1.f1, BOOLTBL2.f1
--- !query 60 schema
+-- !query schema
 struct<tf_12_ff_4:string,f1:boolean,f1:boolean>
--- !query 60 output
+-- !query output
 	false	false
 	false	false
 	false	false
@@ -562,90 +562,90 @@ struct<tf_12_ff_4:string,f1:boolean,f1:boolean>
 	true	false
 
 
--- !query 61
+-- !query
 SELECT '' AS True, f1
    FROM BOOLTBL1
    WHERE f1 IS TRUE
--- !query 61 schema
+-- !query schema
 struct<True:string,f1:boolean>
--- !query 61 output
+-- !query output
 	true
 	true
 	true
 
 
--- !query 62
+-- !query
 SELECT '' AS `Not False`, f1
    FROM BOOLTBL1
    WHERE f1 IS NOT FALSE
--- !query 62 schema
+-- !query schema
 struct<Not False:string,f1:boolean>
--- !query 62 output
+-- !query output
 	true
 	true
 	true
 
 
--- !query 63
+-- !query
 SELECT '' AS `False`, f1
    FROM BOOLTBL1
    WHERE f1 IS FALSE
--- !query 63 schema
+-- !query schema
 struct<False:string,f1:boolean>
--- !query 63 output
+-- !query output
 	false
 
 
--- !query 64
+-- !query
 SELECT '' AS `Not True`, f1
    FROM BOOLTBL1
    WHERE f1 IS NOT TRUE
--- !query 64 schema
+-- !query schema
 struct<Not True:string,f1:boolean>
--- !query 64 output
+-- !query output
 	false
 
 
--- !query 65
+-- !query
 SELECT '' AS `True`, f1
    FROM BOOLTBL2
    WHERE f1 IS TRUE
--- !query 65 schema
+-- !query schema
 struct<True:string,f1:boolean>
--- !query 65 output
+-- !query output
 
 
 
--- !query 66
+-- !query
 SELECT '' AS `Not False`, f1
    FROM BOOLTBL2
    WHERE f1 IS NOT FALSE
--- !query 66 schema
+-- !query schema
 struct<Not False:string,f1:boolean>
--- !query 66 output
+-- !query output
 	NULL
 
 
--- !query 67
+-- !query
 SELECT '' AS `False`, f1
    FROM BOOLTBL2
    WHERE f1 IS FALSE
--- !query 67 schema
+-- !query schema
 struct<False:string,f1:boolean>
--- !query 67 output
+-- !query output
 	false
 	false
 	false
 	false
 
 
--- !query 68
+-- !query
 SELECT '' AS `Not True`, f1
    FROM BOOLTBL2
    WHERE f1 IS NOT TRUE
--- !query 68 schema
+-- !query schema
 struct<Not True:string,f1:boolean>
--- !query 68 output
+-- !query output
 	NULL
 	false
 	false
@@ -653,39 +653,39 @@ struct<Not True:string,f1:boolean>
 	false
 
 
--- !query 69
+-- !query
 CREATE TABLE BOOLTBL3 (d string, b boolean, o int) USING parquet
--- !query 69 schema
+-- !query schema
 struct<>
--- !query 69 output
+-- !query output
 
 
 
--- !query 70
+-- !query
 INSERT INTO BOOLTBL3 VALUES ('true', true, 1)
--- !query 70 schema
+-- !query schema
 struct<>
--- !query 70 output
+-- !query output
 
 
 
--- !query 71
+-- !query
 INSERT INTO BOOLTBL3 VALUES ('false', false, 2)
--- !query 71 schema
+-- !query schema
 struct<>
--- !query 71 output
+-- !query output
 
 
 
--- !query 72
+-- !query
 INSERT INTO BOOLTBL3 VALUES ('null', null, 3)
--- !query 72 schema
+-- !query schema
 struct<>
--- !query 72 output
+-- !query output
 
 
 
--- !query 73
+-- !query
 SELECT
     d,
     b IS TRUE AS istrue,
@@ -695,153 +695,153 @@ SELECT
     b IS UNKNOWN AS isunknown,
     b IS NOT UNKNOWN AS isnotunknown
 FROM booltbl3 ORDER BY o
--- !query 73 schema
+-- !query schema
 struct<d:string,istrue:boolean,isnottrue:boolean,isfalse:boolean,isnotfalse:boolean,isunknown:boolean,isnotunknown:boolean>
--- !query 73 output
+-- !query output
 true	true	false	false	true	false	true
 false	false	true	true	false	false	true
 null	false	true	false	true	true	false
 
 
--- !query 74
+-- !query
 CREATE TABLE booltbl4(isfalse boolean, istrue boolean, isnul boolean) USING parquet
--- !query 74 schema
+-- !query schema
 struct<>
--- !query 74 output
+-- !query output
 
 
 
--- !query 75
+-- !query
 INSERT INTO booltbl4 VALUES (false, true, null)
--- !query 75 schema
+-- !query schema
 struct<>
--- !query 75 output
+-- !query output
 
 
 
--- !query 76
+-- !query
 SELECT istrue AND isnul AND istrue FROM booltbl4
--- !query 76 schema
+-- !query schema
 struct<((istrue AND isnul) AND istrue):boolean>
--- !query 76 output
+-- !query output
 NULL
 
 
--- !query 77
+-- !query
 SELECT istrue AND istrue AND isnul FROM booltbl4
--- !query 77 schema
+-- !query schema
 struct<((istrue AND istrue) AND isnul):boolean>
--- !query 77 output
+-- !query output
 NULL
 
 
--- !query 78
+-- !query
 SELECT isnul AND istrue AND istrue FROM booltbl4
--- !query 78 schema
+-- !query schema
 struct<((isnul AND istrue) AND istrue):boolean>
--- !query 78 output
+-- !query output
 NULL
 
 
--- !query 79
+-- !query
 SELECT isfalse AND isnul AND istrue FROM booltbl4
--- !query 79 schema
+-- !query schema
 struct<((isfalse AND isnul) AND istrue):boolean>
--- !query 79 output
+-- !query output
 false
 
 
--- !query 80
+-- !query
 SELECT istrue AND isfalse AND isnul FROM booltbl4
--- !query 80 schema
+-- !query schema
 struct<((istrue AND isfalse) AND isnul):boolean>
--- !query 80 output
+-- !query output
 false
 
 
--- !query 81
+-- !query
 SELECT isnul AND istrue AND isfalse FROM booltbl4
--- !query 81 schema
+-- !query schema
 struct<((isnul AND istrue) AND isfalse):boolean>
--- !query 81 output
+-- !query output
 false
 
 
--- !query 82
+-- !query
 SELECT isfalse OR isnul OR isfalse FROM booltbl4
--- !query 82 schema
+-- !query schema
 struct<((isfalse OR isnul) OR isfalse):boolean>
--- !query 82 output
+-- !query output
 NULL
 
 
--- !query 83
+-- !query
 SELECT isfalse OR isfalse OR isnul FROM booltbl4
--- !query 83 schema
+-- !query schema
 struct<((isfalse OR isfalse) OR isnul):boolean>
--- !query 83 output
+-- !query output
 NULL
 
 
--- !query 84
+-- !query
 SELECT isnul OR isfalse OR isfalse FROM booltbl4
--- !query 84 schema
+-- !query schema
 struct<((isnul OR isfalse) OR isfalse):boolean>
--- !query 84 output
+-- !query output
 NULL
 
 
--- !query 85
+-- !query
 SELECT isfalse OR isnul OR istrue FROM booltbl4
--- !query 85 schema
+-- !query schema
 struct<((isfalse OR isnul) OR istrue):boolean>
--- !query 85 output
+-- !query output
 true
 
 
--- !query 86
+-- !query
 SELECT istrue OR isfalse OR isnul FROM booltbl4
--- !query 86 schema
+-- !query schema
 struct<((istrue OR isfalse) OR isnul):boolean>
--- !query 86 output
+-- !query output
 true
 
 
--- !query 87
+-- !query
 SELECT isnul OR istrue OR isfalse FROM booltbl4
--- !query 87 schema
+-- !query schema
 struct<((isnul OR istrue) OR isfalse):boolean>
--- !query 87 output
+-- !query output
 true
 
 
--- !query 88
+-- !query
 DROP TABLE  BOOLTBL1
--- !query 88 schema
+-- !query schema
 struct<>
--- !query 88 output
+-- !query output
 
 
 
--- !query 89
+-- !query
 DROP TABLE  BOOLTBL2
--- !query 89 schema
+-- !query schema
 struct<>
--- !query 89 output
+-- !query output
 
 
 
--- !query 90
+-- !query
 DROP TABLE  BOOLTBL3
--- !query 90 schema
+-- !query schema
 struct<>
--- !query 90 output
+-- !query output
 
 
 
--- !query 91
+-- !query
 DROP TABLE  BOOLTBL4
--- !query 91 schema
+-- !query schema
 struct<>
--- !query 91 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/case.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
similarity index 68%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/case.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
index 348198b060238..1b002c3f48ae2 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/case.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
@@ -2,243 +2,243 @@
 -- Number of queries: 35
 
 
--- !query 0
+-- !query
 CREATE TABLE CASE_TBL (
   i integer,
   f double
 ) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TABLE CASE2_TBL (
   i integer,
   j integer
 ) USING parquet
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 INSERT INTO CASE_TBL VALUES (1, 10.1)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 INSERT INTO CASE_TBL VALUES (2, 20.2)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 INSERT INTO CASE_TBL VALUES (3, -30.3)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 INSERT INTO CASE_TBL VALUES (4, NULL)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 INSERT INTO CASE2_TBL VALUES (1, -1)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 INSERT INTO CASE2_TBL VALUES (2, -2)
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 INSERT INTO CASE2_TBL VALUES (3, -3)
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 INSERT INTO CASE2_TBL VALUES (2, -4)
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 INSERT INTO CASE2_TBL VALUES (1, NULL)
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 INSERT INTO CASE2_TBL VALUES (NULL, -6)
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 SELECT '3' AS `One`,
   CASE
     WHEN 1 < 2 THEN 3
   END AS `Simple WHEN`
--- !query 12 schema
+-- !query schema
 struct<One:string,Simple WHEN:int>
--- !query 12 output
+-- !query output
 3	3
 
 
--- !query 13
+-- !query
 SELECT '<NULL>' AS `One`,
   CASE
     WHEN 1 > 2 THEN 3
   END AS `Simple default`
--- !query 13 schema
+-- !query schema
 struct<One:string,Simple default:int>
--- !query 13 output
+-- !query output
 <NULL>	NULL
 
 
--- !query 14
+-- !query
 SELECT '3' AS `One`,
   CASE
     WHEN 1 < 2 THEN 3
     ELSE 4
   END AS `Simple ELSE`
--- !query 14 schema
+-- !query schema
 struct<One:string,Simple ELSE:int>
--- !query 14 output
+-- !query output
 3	3
 
 
--- !query 15
+-- !query
 SELECT '4' AS `One`,
   CASE
     WHEN 1 > 2 THEN 3
     ELSE 4
   END AS `ELSE default`
--- !query 15 schema
+-- !query schema
 struct<One:string,ELSE default:int>
--- !query 15 output
+-- !query output
 4	4
 
 
--- !query 16
+-- !query
 SELECT '6' AS `One`,
   CASE
     WHEN 1 > 2 THEN 3
     WHEN 4 < 5 THEN 6
     ELSE 7
   END AS `Two WHEN with default`
--- !query 16 schema
+-- !query schema
 struct<One:string,Two WHEN with default:int>
--- !query 16 output
+-- !query output
 6	6
 
 
--- !query 17
+-- !query
 SELECT '7' AS `None`,
   CASE WHEN rand() < 0 THEN 1
   END AS `NULL on no matches`
--- !query 17 schema
+-- !query schema
 struct<None:string,NULL on no matches:int>
--- !query 17 output
+-- !query output
 7	NULL
 
 
--- !query 18
+-- !query
 SELECT CASE WHEN 1=0 THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END
--- !query 18 schema
-struct<CASE WHEN (1 = 0) THEN (1 div 0) WHEN (1 = 1) THEN 1 ELSE (2 div 0) END:int>
--- !query 18 output
-1
+-- !query schema
+struct<CASE WHEN (1 = 0) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
+-- !query output
+1.0
 
 
--- !query 19
+-- !query
 SELECT CASE 1 WHEN 0 THEN 1/0 WHEN 1 THEN 1 ELSE 2/0 END
--- !query 19 schema
-struct<CASE WHEN (1 = 0) THEN (1 div 0) WHEN (1 = 1) THEN 1 ELSE (2 div 0) END:int>
--- !query 19 output
-1
+-- !query schema
+struct<CASE WHEN (1 = 0) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
+-- !query output
+1.0
 
 
--- !query 20
+-- !query
 SELECT CASE WHEN i > 100 THEN 1/0 ELSE 0 END FROM case_tbl
--- !query 20 schema
-struct<CASE WHEN (i > 100) THEN (1 div 0) ELSE 0 END:int>
--- !query 20 output
-0
-0
-0
-0
+-- !query schema
+struct<CASE WHEN (i > 100) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) ELSE CAST(0 AS DOUBLE) END:double>
+-- !query output
+0.0
+0.0
+0.0
+0.0
 
 
--- !query 21
+-- !query
 SELECT CASE 'a' WHEN 'a' THEN 1 ELSE 2 END
--- !query 21 schema
+-- !query schema
 struct<CASE WHEN (a = a) THEN 1 ELSE 2 END:int>
--- !query 21 output
+-- !query output
 1
 
 
--- !query 22
+-- !query
 SELECT '' AS `Five`,
   CASE
     WHEN i >= 3 THEN i
   END AS `>= 3 or Null`
   FROM CASE_TBL
--- !query 22 schema
+-- !query schema
 struct<Five:string,>= 3 or Null:int>
--- !query 22 output
+-- !query output
 	3
 	4
 	NULL
 	NULL
 
 
--- !query 23
+-- !query
 SELECT '' AS `Five`,
   CASE WHEN i >= 3 THEN (i + i)
        ELSE i
   END AS `Simplest Math`
   FROM CASE_TBL
--- !query 23 schema
+-- !query schema
 struct<Five:string,Simplest Math:int>
--- !query 23 output
+-- !query output
 	1
 	2
 	6
 	8
 
 
--- !query 24
+-- !query
 SELECT '' AS `Five`, i AS `Value`,
   CASE WHEN (i < 0) THEN 'small'
        WHEN (i = 0) THEN 'zero'
@@ -247,16 +247,16 @@ SELECT '' AS `Five`, i AS `Value`,
        ELSE 'big'
   END AS `Category`
   FROM CASE_TBL
--- !query 24 schema
+-- !query schema
 struct<Five:string,Value:int,Category:string>
--- !query 24 output
+-- !query output
 	1	one
 	2	two
 	3	big
 	4	big
 
 
--- !query 25
+-- !query
 SELECT '' AS `Five`,
   CASE WHEN ((i < 0) or (i < 0)) THEN 'small'
        WHEN ((i = 0) or (i = 0)) THEN 'zero'
@@ -265,37 +265,37 @@ SELECT '' AS `Five`,
        ELSE 'big'
   END AS `Category`
   FROM CASE_TBL
--- !query 25 schema
+-- !query schema
 struct<Five:string,Category:string>
--- !query 25 output
+-- !query output
 	big
 	big
 	one
 	two
 
 
--- !query 26
+-- !query
 SELECT * FROM CASE_TBL WHERE COALESCE(f,i) = 4
--- !query 26 schema
+-- !query schema
 struct<i:int,f:double>
--- !query 26 output
+-- !query output
 4	NULL
 
 
--- !query 27
+-- !query
 SELECT * FROM CASE_TBL WHERE NULLIF(f,i) = 2
--- !query 27 schema
+-- !query schema
 struct<i:int,f:double>
--- !query 27 output
+-- !query output
 
 
 
--- !query 28
+-- !query
 SELECT COALESCE(a.f, b.i, b.j)
   FROM CASE_TBL a, CASE2_TBL b
--- !query 28 schema
+-- !query schema
 struct<coalesce(f, CAST(i AS DOUBLE), CAST(j AS DOUBLE)):double>
--- !query 28 output
+-- !query output
 -30.3
 -30.3
 -30.3
@@ -322,24 +322,24 @@ struct<coalesce(f, CAST(i AS DOUBLE), CAST(j AS DOUBLE)):double>
 3.0
 
 
--- !query 29
+-- !query
 SELECT *
   FROM CASE_TBL a, CASE2_TBL b
   WHERE COALESCE(a.f, b.i, b.j) = 2
--- !query 29 schema
+-- !query schema
 struct<i:int,f:double,i:int,j:int>
--- !query 29 output
+-- !query output
 4	NULL	2	-2
 4	NULL	2	-4
 
 
--- !query 30
+-- !query
 SELECT '' AS Five, NULLIF(a.i,b.i) AS `NULLIF(a.i,b.i)`,
   NULLIF(b.i, 4) AS `NULLIF(b.i,4)`
   FROM CASE_TBL a, CASE2_TBL b
--- !query 30 schema
+-- !query schema
 struct<Five:string,NULLIF(a.i,b.i):int,NULLIF(b.i,4):int>
--- !query 30 output
+-- !query output
 	1	2
 	1	2
 	1	3
@@ -366,18 +366,18 @@ struct<Five:string,NULLIF(a.i,b.i):int,NULLIF(b.i,4):int>
 	NULL	3
 
 
--- !query 31
+-- !query
 SELECT '' AS `Two`, *
   FROM CASE_TBL a, CASE2_TBL b
   WHERE COALESCE(f,b.i) = 2
--- !query 31 schema
+-- !query schema
 struct<Two:string,i:int,f:double,i:int,j:int>
--- !query 31 output
+-- !query output
 	4	NULL	2	-2
 	4	NULL	2	-4
 
 
--- !query 32
+-- !query
 SELECT CASE
   (CASE vol('bar')
     WHEN 'foo' THEN 'it was foo!'
@@ -387,23 +387,23 @@ SELECT CASE
   WHEN 'it was foo!' THEN 'foo recognized'
   WHEN 'it was bar!' THEN 'bar recognized'
   ELSE 'unrecognized' END
--- !query 32 schema
+-- !query schema
 struct<CASE WHEN (CASE WHEN (vol(bar) = foo) THEN it was foo! WHEN (vol(bar) = vol(null)) THEN null input WHEN (vol(bar) = bar) THEN it was bar! END = it was foo!) THEN foo recognized WHEN (CASE WHEN (vol(bar) = foo) THEN it was foo! WHEN (vol(bar) = vol(null)) THEN null input WHEN (vol(bar) = bar) THEN it was bar! END = it was bar!) THEN bar recognized ELSE unrecognized END:string>
--- !query 32 output
+-- !query output
 bar recognized
 
 
--- !query 33
+-- !query
 DROP TABLE CASE_TBL
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 
 
 
--- !query 34
+-- !query
 DROP TABLE CASE2_TBL
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
new file mode 100644
index 0000000000000..637c5561bd940
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
@@ -0,0 +1,115 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 7
+
+
+-- !query
+SELECT 'trailing' AS first
+-- !query schema
+struct<first:string>
+-- !query output
+trailing
+
+
+-- !query
+SELECT /* embedded single line */ 'embedded' AS `second`
+-- !query schema
+struct<second:string>
+-- !query output
+embedded
+
+
+-- !query
+SELECT /* both embedded and trailing single line */ 'both' AS third
+-- !query schema
+struct<third:string>
+-- !query output
+both
+
+
+-- !query
+SELECT 'before multi-line' AS fourth
+-- !query schema
+struct<fourth:string>
+-- !query output
+before multi-line
+
+
+-- !query
+/* This is an example of SQL which should not execute:
+ * select 'multi-line';
+ */
+SELECT 'after multi-line' AS fifth
+-- !query schema
+struct<fifth:string>
+-- !query output
+after multi-line
+
+
+-- !query
+/*
+SELECT 'trailing' as x1; -- inside block comment
+*/
+
+/* This block comment surrounds a query which itself has a block comment...
+SELECT /* embedded single line */ 'embedded' AS x2;
+*/
+
+SELECT -- continued after the following block comments...
+/* Deeply nested comment.
+   This includes a single apostrophe to make sure we aren't decoding this part as a string.
+SELECT 'deep nest' AS n1;
+/* Second level of nesting...
+SELECT 'deeper nest' as n2;
+/* Third level of nesting...
+SELECT 'deepest nest' as n3;
+*/
+Hoo boy. Still two deep...
+*/
+Now just one deep...
+*/
+'deeply nested example' AS sixth
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input ''embedded'' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 6, pos 34)
+
+== SQL ==
+/*
+SELECT 'trailing' as x1; -- inside block comment
+*/
+
+/* This block comment surrounds a query which itself has a block comment...
+SELECT /* embedded single line */ 'embedded' AS x2;
+----------------------------------^^^
+*/
+
+SELECT -- continued after the following block comments...
+/* Deeply nested comment.
+   This includes a single apostrophe to make sure we aren't decoding this part as a string.
+SELECT 'deep nest' AS n1;
+/* Second level of nesting...
+SELECT 'deeper nest' as n2;
+/* Third level of nesting...
+SELECT 'deepest nest' as n3;
+*/
+Hoo boy. Still two deep...
+*/
+Now just one deep...
+*/
+'deeply nested example' AS sixth
+
+
+-- !query
+/* and this is the end of the file */
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input '<EOF>' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 37)
+
+== SQL ==
+/* and this is the end of the file */
+-------------------------------------^^^
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
new file mode 100644
index 0000000000000..436b33ce43980
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -0,0 +1,2047 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 195
+
+
+-- !query
+CREATE TABLE emp (
+  name string,
+  age int,
+  salary int,
+  manager string
+) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW toyemp AS
+   SELECT name, age, /* location ,*/ 12*salary AS annualsal
+   FROM emp
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW toyemp
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE emp
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE view_base_table (key int /* PRIMARY KEY */, data varchar(20))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW key_dependent_view AS
+   SELECT * FROM view_base_table GROUP BY key
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+expression 'default.view_base_table.`data`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query
+CREATE VIEW key_dependent_view_no_cols AS
+   SELECT FROM view_base_table GROUP BY key HAVING length(data) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'FROM'(line 2, pos 10)
+
+== SQL ==
+CREATE VIEW key_dependent_view_no_cols AS
+   SELECT FROM view_base_table GROUP BY key HAVING length(data) > 0
+----------^^^
+
+
+-- !query
+CREATE TABLE viewtest_tbl (a int, b int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO viewtest_tbl VALUES (5, 10), (10, 15), (15, 20), (20, 25)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT * FROM viewtest_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT * FROM viewtest_tbl WHERE a > 10
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM viewtest
+-- !query schema
+struct<a:int,b:int>
+-- !query output
+15	20
+20	25
+
+
+-- !query
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT a, b FROM viewtest_tbl WHERE a > 5 ORDER BY b DESC
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM viewtest
+-- !query schema
+struct<a:int,b:int>
+-- !query output
+20	25
+15	20
+10	15
+
+
+-- !query
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT a FROM viewtest_tbl WHERE a <> 20
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT 1, * FROM viewtest_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT a, decimal(b) FROM viewtest_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW viewtest AS
+	SELECT a, b, 0 AS c FROM viewtest_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW viewtest
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE viewtest_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE SCHEMA temp_view_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE temp_view_test.base_table (a int, id int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE temp_view_test.base_table2 (a int, id int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+USE temp_view_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW temp_table AS SELECT * FROM VALUES
+  (1, 1) as temp_table(a, id)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW v1 AS SELECT * FROM base_table
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED v1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+id                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	temp_view_test      	                    
+Table               	v1                  	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM base_table	                    
+View Original Text  	SELECT * FROM base_table	                    
+View Catalog and Namespace	spark_catalog.temp_view_test	                    
+View Query Output Columns	[a, id]             	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+
+
+-- !query
+CREATE VIEW v1_temp AS SELECT * FROM temp_table
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `v1_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE TEMP VIEW v2_temp AS SELECT * FROM base_table
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED v2_temp
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+id                  	int
+
+
+-- !query
+CREATE VIEW temp_view_test.v2 AS SELECT * FROM base_table
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED temp_view_test.v2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+id                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	temp_view_test      	                    
+Table               	v2                  	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM base_table	                    
+View Original Text  	SELECT * FROM base_table	                    
+View Catalog and Namespace	spark_catalog.temp_view_test	                    
+View Query Output Columns	[a, id]             	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+
+
+-- !query
+CREATE VIEW temp_view_test.v3_temp AS SELECT * FROM temp_table
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `temp_view_test`.`v3_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW v3 AS
+    SELECT t1.a AS t1_a, t2.a AS t2_a
+    FROM base_table t1, base_table2 t2
+    WHERE t1.id = t2.id
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED v3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+t1_a                	int                 	                    
+t2_a                	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	temp_view_test      	                    
+Table               	v3                  	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT t1.a AS t1_a, t2.a AS t2_a
+    FROM base_table t1, base_table2 t2
+    WHERE t1.id = t2.id	                    
+View Original Text  	SELECT t1.a AS t1_a, t2.a AS t2_a
+    FROM base_table t1, base_table2 t2
+    WHERE t1.id = t2.id	                    
+View Catalog and Namespace	spark_catalog.temp_view_test	                    
+View Query Output Columns	[t1_a, t2_a]        	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+
+
+-- !query
+CREATE VIEW v4_temp AS
+    SELECT t1.a AS t1_a, t2.a AS t2_a
+    FROM base_table t1, temp_table t2
+    WHERE t1.id = t2.id
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `v4_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW v5_temp AS
+    SELECT t1.a AS t1_a, t2.a AS t2_a, t3.a AS t3_a
+    FROM base_table t1, base_table2 t2, temp_table t3
+    WHERE t1.id = t2.id and t2.id = t3.id
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `v5_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW v4 AS SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED v4
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+id                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	temp_view_test      	                    
+Table               	v4                  	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)	                    
+View Original Text  	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)	                    
+View Catalog and Namespace	spark_catalog.temp_view_test	                    
+View Query Output Columns	[a, id]             	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+
+
+-- !query
+CREATE VIEW v5 AS SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED v5
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+id                  	int                 	                    
+a                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	temp_view_test      	                    
+Table               	v5                  	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2	                    
+View Original Text  	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2	                    
+View Catalog and Namespace	spark_catalog.temp_view_test	                    
+View Query Output Columns	[id, a]             	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+
+
+-- !query
+CREATE VIEW v6 AS SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED v6
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+id                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	temp_view_test      	                    
+Table               	v6                  	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)	                    
+View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)	                    
+View Catalog and Namespace	spark_catalog.temp_view_test	                    
+View Query Output Columns	[a, id]             	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+
+
+-- !query
+CREATE VIEW v7 AS SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED v7
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+id                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	temp_view_test      	                    
+Table               	v7                  	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)	                    
+View Original Text  	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)	                    
+View Catalog and Namespace	spark_catalog.temp_view_test	                    
+View Query Output Columns	[a, id]             	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+
+
+-- !query
+CREATE VIEW v8 AS SELECT * FROM base_table WHERE EXISTS (SELECT 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED v8
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+id                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	temp_view_test      	                    
+Table               	v8                  	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1)	                    
+View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1)	                    
+View Catalog and Namespace	spark_catalog.temp_view_test	                    
+View Query Output Columns	[a, id]             	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+
+
+-- !query
+CREATE VIEW v6_temp AS SELECT * FROM base_table WHERE id IN (SELECT id FROM temp_table)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `v6_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW v7_temp AS SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM temp_table) t2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `v7_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW v8_temp AS SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM temp_table)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `v8_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW v9_temp AS SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM temp_table)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `v9_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW v10_temp AS SELECT * FROM v7_temp
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Table or view not found: v7_temp; line 1 pos 38
+
+
+-- !query
+CREATE VIEW v11_temp AS SELECT t1.id, t2.a FROM base_table t1, v10_temp t2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Table or view not found: v10_temp; line 1 pos 63
+
+
+-- !query
+CREATE VIEW v12_temp AS SELECT true FROM v11_temp
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Table or view not found: v11_temp; line 1 pos 41
+
+
+-- !query
+CREATE SCHEMA testviewschm2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+USE testviewschm2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t1 (num int, name string) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t2 (num2 int, value string) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMP VIEW tt AS SELECT * FROM VALUES
+  (1, 'a') AS tt(num2, value)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW nontemp1 AS SELECT * FROM t1 CROSS JOIN t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED nontemp1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+num                 	int                 	                    
+name                	string              	                    
+num2                	int                 	                    
+value               	string              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	testviewschm2       	                    
+Table               	nontemp1            	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t1 CROSS JOIN t2	                    
+View Original Text  	SELECT * FROM t1 CROSS JOIN t2	                    
+View Catalog and Namespace	spark_catalog.testviewschm2	                    
+View Query Output Columns	[num, name, num2, value]	                    
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+
+
+-- !query
+CREATE VIEW temporal1 AS SELECT * FROM t1 CROSS JOIN tt
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `temporal1` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW nontemp2 AS SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED nontemp2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+num                 	int                 	                    
+name                	string              	                    
+num2                	int                 	                    
+value               	string              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	testviewschm2       	                    
+Table               	nontemp2            	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2	                    
+View Original Text  	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2	                    
+View Catalog and Namespace	spark_catalog.testviewschm2	                    
+View Query Output Columns	[num, name, num2, value]	                    
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+
+
+-- !query
+CREATE VIEW temporal2 AS SELECT * FROM t1 INNER JOIN tt ON t1.num = tt.num2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `temporal2` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW nontemp3 AS SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED nontemp3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+num                 	int                 	                    
+name                	string              	                    
+num2                	int                 	                    
+value               	string              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	testviewschm2       	                    
+Table               	nontemp3            	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2	                    
+View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2	                    
+View Catalog and Namespace	spark_catalog.testviewschm2	                    
+View Query Output Columns	[num, name, num2, value]	                    
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+
+
+-- !query
+CREATE VIEW temporal3 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `temporal3` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW nontemp4 AS SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED nontemp4
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+num                 	int                 	                    
+name                	string              	                    
+num2                	int                 	                    
+value               	string              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	testviewschm2       	                    
+Table               	nontemp4            	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'	                    
+View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'	                    
+View Catalog and Namespace	spark_catalog.testviewschm2	                    
+View Query Output Columns	[num, name, num2, value]	                    
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+
+
+-- !query
+CREATE VIEW temporal4 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2 AND tt.value = 'xxx'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `temporal4` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE VIEW temporal5 AS SELECT * FROM t1 WHERE num IN (SELECT num FROM t1 WHERE EXISTS (SELECT 1 FROM tt))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Not allowed to create a permanent view `temporal5` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+
+
+-- !query
+CREATE TABLE tbl1 ( a int, b int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE tbl2 (c int, d int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE tbl3 (e int, f int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE tbl4 (g int, h int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE tmptbl (i int, j int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO tmptbl VALUES (1, 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE   VIEW  pubview AS SELECT * FROM tbl1 WHERE tbl1.a
+BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
+AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED pubview
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	testviewschm2       	                    
+Table               	pubview             	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM tbl1 WHERE tbl1.a
+BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
+AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)	                    
+View Original Text  	SELECT * FROM tbl1 WHERE tbl1.a
+BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
+AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)	                    
+View Catalog and Namespace	spark_catalog.testviewschm2	                    
+View Query Output Columns	[a, b]              	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2]
+
+
+-- !query
+CREATE   VIEW  mytempview AS SELECT * FROM tbl1 WHERE tbl1.a
+BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
+AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
+AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE EXTENDED mytempview
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	testviewschm2       	                    
+Table               	mytempview          	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM tbl1 WHERE tbl1.a
+BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
+AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
+AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)	                    
+View Original Text  	SELECT * FROM tbl1 WHERE tbl1.a
+BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
+AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
+AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)	                    
+View Catalog and Namespace	spark_catalog.testviewschm2	                    
+View Query Output Columns	[a, b]              	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2]
+
+
+-- !query
+CREATE VIEW tt1 AS
+  SELECT * FROM (
+    VALUES
+       ('abc', '0123456789', 42, 'abcd'),
+       ('0123456789', 'abc', 42.12, 'abc')
+  ) vv(a,b,c,d)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM tt1
+-- !query schema
+struct<a:string,b:string,c:decimal(12,2),d:string>
+-- !query output
+0123456789	abc	42.12	abc
+abc	0123456789	42.00	abcd
+
+
+-- !query
+SELECT string(a) FROM tt1
+-- !query schema
+struct<a:string>
+-- !query output
+0123456789
+abc
+
+
+-- !query
+DROP VIEW tt1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE tt1 (f1 int, f2 int, f3 string) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE tx1 (x1 int, x2 int, x3 string) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE temp_view_test.tt1 (y1 int, f2 int, f3 string) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW aliased_view_1 AS
+  select * from tt1
+    where exists (select 1 from tx1 where tt1.f1 = tx1.x1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW aliased_view_2 AS
+  select * from tt1 a1
+    where exists (select 1 from tx1 where a1.f1 = tx1.x1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW aliased_view_3 AS
+  select * from tt1
+    where exists (select 1 from tx1 a2 where tt1.f1 = a2.x1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW aliased_view_4 AS
+  select * from temp_view_test.tt1
+    where exists (select 1 from tt1 where temp_view_test.tt1.y1 = tt1.f1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE aliased_view_1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_4
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+y1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+ALTER TABLE tx1 RENAME TO a1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE aliased_view_1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_4
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+y1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+ALTER TABLE tt1 RENAME TO a2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE aliased_view_1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_4
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+y1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+ALTER TABLE a1 RENAME TO tt1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE aliased_view_1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+f1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+DESC TABLE aliased_view_4
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+y1                  	int                 	                    
+f2                  	int                 	                    
+f3                  	string
+
+
+-- !query
+ALTER TABLE a2 RENAME TO tx1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view view_of_joins as
+select * from
+  (select * from (tbl1 cross join tbl2) same) ss,
+  (tbl3 cross join tbl4) same
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt2 (a int, b int, c int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt3 (ax bigint, b short, c decimal) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt4 (ay int, b int, q int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view v1 as select * from tt2 natural join tt3
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view v1a as select * from (tt2 natural join tt3) j
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view v2 as select * from tt2 join tt3 using (b,c) join tt4 using (b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view v2a as select * from (tt2 join tt3 using (b,c) join tt4 using (b)) j
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view v3 as select * from tt2 join tt3 using (b,c) full join tt4 using (b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE v1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint
+
+
+-- !query
+DESC TABLE v1a
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint
+
+
+-- !query
+DESC TABLE v2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint              	                    
+ay                  	int                 	                    
+q                   	int
+
+
+-- !query
+DESC TABLE v2a
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint              	                    
+ay                  	int                 	                    
+q                   	int
+
+
+-- !query
+DESC TABLE v3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint              	                    
+ay                  	int                 	                    
+q                   	int
+
+
+-- !query
+alter table tt2 add column d int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+alter table tt2 add column e int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE v1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint
+
+
+-- !query
+DESC TABLE v1a
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint
+
+
+-- !query
+DESC TABLE v2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint              	                    
+ay                  	int                 	                    
+q                   	int
+
+
+-- !query
+DESC TABLE v2a
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint              	                    
+ay                  	int                 	                    
+q                   	int
+
+
+-- !query
+DESC TABLE v3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint              	                    
+ay                  	int                 	                    
+q                   	int
+
+
+-- !query
+drop table tt3
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt3 (ax bigint, b short, d decimal) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+alter table tt3 add column c int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+alter table tt3 add column e int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE v1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint
+
+
+-- !query
+DESC TABLE v1a
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint
+
+
+-- !query
+DESC TABLE v2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint              	                    
+ay                  	int                 	                    
+q                   	int
+
+
+-- !query
+DESC TABLE v2a
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint              	                    
+ay                  	int                 	                    
+q                   	int
+
+
+-- !query
+DESC TABLE v3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+b                   	int                 	                    
+c                   	int                 	                    
+a                   	int                 	                    
+ax                  	bigint              	                    
+ay                  	int                 	                    
+q                   	int
+
+
+-- !query
+create table tt5 (a int, b int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt6 (c int, d int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view vv1 as select * from (tt5 cross join tt6) j(aa,bb,cc,dd)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+aa                  	int                 	                    
+bb                  	int                 	                    
+cc                  	int                 	                    
+dd                  	int
+
+
+-- !query
+alter table tt5 add column c int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+aa                  	int                 	                    
+bb                  	int                 	                    
+cc                  	int                 	                    
+dd                  	int
+
+
+-- !query
+alter table tt5 add column cc int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+aa                  	int                 	                    
+bb                  	int                 	                    
+cc                  	int                 	                    
+dd                  	int
+
+
+-- !query
+create table tt7 (x int, /* xx int, */ y int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt8 (x int, z int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view vv2 as
+select * from (values(1,2,3,4,5)) v(a,b,c,d,e)
+union all
+select * from tt7 full join tt8 using (x), tt8 tt8x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
+d                   	int                 	                    
+e                   	int
+
+
+-- !query
+create view vv3 as
+select * from (values(1,2,3,4,5,6)) v(a,b,c,x,e,f)
+union all
+select * from
+  tt7 full join tt8 using (x),
+  tt7 tt7x full join tt8 tt8x using (x)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
+x                   	int                 	                    
+e                   	int                 	                    
+f                   	int
+
+
+-- !query
+create view vv4 as
+select * from (values(1,2,3,4,5,6,7)) v(a,b,c,x,e,f,g)
+union all
+select * from
+  tt7 full join tt8 using (x),
+  tt7 tt7x full join tt8 tt8x using (x) full join tt8 tt8y using (x)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv4
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
+x                   	int                 	                    
+e                   	int                 	                    
+f                   	int                 	                    
+g                   	int
+
+
+-- !query
+alter table tt7 add column zz int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+alter table tt7 add column z int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+alter table tt8 add column z2 int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
+d                   	int                 	                    
+e                   	int
+
+
+-- !query
+DESC TABLE vv3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
+x                   	int                 	                    
+e                   	int                 	                    
+f                   	int
+
+
+-- !query
+DESC TABLE vv4
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
+x                   	int                 	                    
+e                   	int                 	                    
+f                   	int                 	                    
+g                   	int
+
+
+-- !query
+create table tt7a (x date, /* xx int, */ y int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt8a (x timestamp, z int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view vv2a as
+select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e)
+union all
+select * from tt7a left join tt8a using (x), tt8a tt8ax
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv4
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	int                 	                    
+b                   	int                 	                    
+c                   	int                 	                    
+x                   	int                 	                    
+e                   	int                 	                    
+f                   	int                 	                    
+g                   	int
+
+
+-- !query
+DESC TABLE vv2a
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	timestamp           	                    
+b                   	int                 	                    
+c                   	int                 	                    
+d                   	timestamp           	                    
+e                   	int
+
+
+-- !query
+create table tt9 (x int, xx int, y int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt10 (x int, z int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view vv5 as select x,y,z from tt9 join tt10 using(x)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv5
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+x                   	int                 	                    
+y                   	int                 	                    
+z                   	int
+
+
+-- !query
+DESC TABLE vv5
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+x                   	int                 	                    
+y                   	int                 	                    
+z                   	int
+
+
+-- !query
+create table tt11 (x int, y int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt12 (x int, z int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table tt13 (z int, q int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view vv6 as select x,y,z,q from
+  (tt11 join tt12 using(x)) join tt13 using(z)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv6
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+x                   	int                 	                    
+y                   	int                 	                    
+z                   	int                 	                    
+q                   	int
+
+
+-- !query
+alter table tt11 add column z int
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE vv6
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+x                   	int                 	                    
+y                   	int                 	                    
+z                   	int                 	                    
+q                   	int
+
+
+-- !query
+CREATE TABLE int8_tbl (q1 int, q2 int) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create view tt18v as
+  select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
+  union all
+  select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE tt18v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+q1                  	int                 	                    
+q2                  	int
+
+
+-- !query
+create view tt21v as
+select * from tt5 natural inner join tt6
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE tt21v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	int                 	                    
+a                   	int                 	                    
+b                   	int                 	                    
+cc                  	int                 	                    
+d                   	int
+
+
+-- !query
+create view tt22v as
+select * from tt5 natural left join tt6
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE tt22v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	int                 	                    
+a                   	int                 	                    
+b                   	int                 	                    
+cc                  	int                 	                    
+d                   	int
+
+
+-- !query
+create view tt23v (col_a, col_b) as
+select q1 as other_name1, q2 as other_name2 from int8_tbl
+union
+select 42, 43
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC TABLE tt23v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+col_a               	int                 	                    
+col_b               	int
+
+
+-- !query
+DROP SCHEMA temp_view_test CASCADE
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA testviewschm2 CASCADE
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW temp_table
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW tt
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
new file mode 100755
index 0000000000000..ed27317121623
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
@@ -0,0 +1,923 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 98
+
+
+-- !query
+CREATE TABLE DATE_TBL (f1 date) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('1957-04-09'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('1957-06-13'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('1996-02-28'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('1996-02-29'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('1996-03-01'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('1996-03-02'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('1997-02-28'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('1997-03-01'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('1997-03-02'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('2000-04-01'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('2000-04-02'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('2000-04-03'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('2038-04-08'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('2039-04-09'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO DATE_TBL VALUES (date('2040-04-10'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT f1 AS `Fifteen` FROM DATE_TBL
+-- !query schema
+struct<Fifteen:date>
+-- !query output
+1957-04-09
+1957-06-13
+1996-02-28
+1996-02-29
+1996-03-01
+1996-03-02
+1997-02-28
+1997-03-01
+1997-03-02
+2000-04-01
+2000-04-02
+2000-04-03
+2038-04-08
+2039-04-09
+2040-04-10
+
+
+-- !query
+SELECT f1 AS `Nine` FROM DATE_TBL WHERE f1 < '2000-01-01'
+-- !query schema
+struct<Nine:date>
+-- !query output
+1957-04-09
+1957-06-13
+1996-02-28
+1996-02-29
+1996-03-01
+1996-03-02
+1997-02-28
+1997-03-01
+1997-03-02
+
+
+-- !query
+SELECT f1 AS `Three` FROM DATE_TBL
+  WHERE f1 BETWEEN '2000-01-01' AND '2001-01-01'
+-- !query schema
+struct<Three:date>
+-- !query output
+2000-04-01
+2000-04-02
+2000-04-03
+
+
+-- !query
+SELECT date '1999-01-08'
+-- !query schema
+struct<DATE '1999-01-08':date>
+-- !query output
+1999-01-08
+
+
+-- !query
+SELECT date '1999-01-18'
+-- !query schema
+struct<DATE '1999-01-18':date>
+-- !query output
+1999-01-18
+
+
+-- !query
+SELECT date '1999 Jan 08'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 Jan 08'
+-------^^^
+
+
+-- !query
+SELECT date '1999 08 Jan'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 08 Jan'
+-------^^^
+
+
+-- !query
+SELECT date '1999-01-08'
+-- !query schema
+struct<DATE '1999-01-08':date>
+-- !query output
+1999-01-08
+
+
+-- !query
+SELECT date '1999-08-01'
+-- !query schema
+struct<DATE '1999-08-01':date>
+-- !query output
+1999-08-01
+
+
+-- !query
+SELECT date '1999 01 08'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 01 08'
+-------^^^
+
+
+-- !query
+SELECT date '1999 08 01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 08 01'
+-------^^^
+
+
+-- !query
+SELECT date '1999-01-08'
+-- !query schema
+struct<DATE '1999-01-08':date>
+-- !query output
+1999-01-08
+
+
+-- !query
+SELECT date '1999 Jan 08'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 Jan 08'
+-------^^^
+
+
+-- !query
+SELECT date '1999 08 Jan'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 08 Jan'
+-------^^^
+
+
+-- !query
+SELECT date '1999-01-08'
+-- !query schema
+struct<DATE '1999-01-08':date>
+-- !query output
+1999-01-08
+
+
+-- !query
+SELECT date '1999-08-01'
+-- !query schema
+struct<DATE '1999-08-01':date>
+-- !query output
+1999-08-01
+
+
+-- !query
+SELECT date '1999 01 08'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 01 08'
+-------^^^
+
+
+-- !query
+SELECT date '1999 08 01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 08 01'
+-------^^^
+
+
+-- !query
+SELECT date '1999-01-08'
+-- !query schema
+struct<DATE '1999-01-08':date>
+-- !query output
+1999-01-08
+
+
+-- !query
+SELECT date '1999-01-18'
+-- !query schema
+struct<DATE '1999-01-18':date>
+-- !query output
+1999-01-18
+
+
+-- !query
+SELECT date '1999 Jan 08'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 Jan 08'
+-------^^^
+
+
+-- !query
+SELECT date '1999 08 Jan'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 08 Jan'
+-------^^^
+
+
+-- !query
+SELECT date '1999-01-08'
+-- !query schema
+struct<DATE '1999-01-08':date>
+-- !query output
+1999-01-08
+
+
+-- !query
+SELECT date '1999-08-01'
+-- !query schema
+struct<DATE '1999-08-01':date>
+-- !query output
+1999-08-01
+
+
+-- !query
+SELECT date '1999 01 08'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 01 08'
+-------^^^
+
+
+-- !query
+SELECT date '1999 08 01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
+
+== SQL ==
+SELECT date '1999 08 01'
+-------^^^
+
+
+-- !query
+SELECT date '4714-11-24 BC'
+-- !query schema
+struct<DATE '4714-11-24':date>
+-- !query output
+4714-11-24
+
+
+-- !query
+SELECT date '4714-11-23 BC'
+-- !query schema
+struct<DATE '4714-11-23':date>
+-- !query output
+4714-11-23
+
+
+-- !query
+SELECT date '5874897-12-31'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 5874897-12-31(line 1, pos 7)
+
+== SQL ==
+SELECT date '5874897-12-31'
+-------^^^
+
+
+-- !query
+SELECT date '5874898-01-01'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+Cannot parse the DATE value: 5874898-01-01(line 1, pos 7)
+
+== SQL ==
+SELECT date '5874898-01-01'
+-------^^^
+
+
+-- !query
+SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL
+-- !query schema
+struct<Days From 2K:interval>
+-- !query output
+-2 years -10 months
+-2 years -10 months -1 days
+-2 years -9 months -30 days
+-3 years -10 months
+-3 years -10 months -1 days
+-3 years -10 months -2 days
+-3 years -9 months -30 days
+-42 years -6 months -18 days
+-42 years -8 months -22 days
+3 months
+3 months 1 days
+3 months 2 days
+38 years 3 months 7 days
+39 years 3 months 8 days
+40 years 3 months 9 days
+
+
+-- !query
+SELECT f1 - date 'epoch' AS `Days From Epoch` FROM DATE_TBL
+-- !query schema
+struct<Days From Epoch:interval>
+-- !query output
+-12 years -6 months -18 days
+-12 years -8 months -22 days
+26 years 1 months 27 days
+26 years 1 months 28 days
+26 years 2 months
+26 years 2 months 1 days
+27 years 1 months 27 days
+27 years 2 months
+27 years 2 months 1 days
+30 years 3 months
+30 years 3 months 1 days
+30 years 3 months 2 days
+68 years 3 months 7 days
+69 years 3 months 8 days
+70 years 3 months 9 days
+
+
+-- !query
+SELECT date 'yesterday' - date 'today' AS `One day`
+-- !query schema
+struct<One day:interval>
+-- !query output
+-1 days
+
+
+-- !query
+SELECT date 'today' - date 'tomorrow' AS `One day`
+-- !query schema
+struct<One day:interval>
+-- !query output
+-1 days
+
+
+-- !query
+SELECT date 'yesterday' - date 'tomorrow' AS `Two days`
+-- !query schema
+struct<Two days:interval>
+-- !query output
+-2 days
+
+
+-- !query
+SELECT date 'tomorrow' - date 'today' AS `One day`
+-- !query schema
+struct<One day:interval>
+-- !query output
+1 days
+
+
+-- !query
+SELECT date 'today' - date 'yesterday' AS `One day`
+-- !query schema
+struct<One day:interval>
+-- !query output
+1 days
+
+
+-- !query
+SELECT date 'tomorrow' - date 'yesterday' AS `Two days`
+-- !query schema
+struct<Two days:interval>
+-- !query output
+2 days
+
+
+-- !query
+SELECT EXTRACT(EPOCH FROM DATE        '1970-01-01')
+-- !query schema
+struct<date_part('EPOCH', DATE '1970-01-01'):decimal(20,6)>
+-- !query output
+0.000000
+
+
+-- !query
+SELECT EXTRACT(EPOCH FROM TIMESTAMP   '1970-01-01')
+-- !query schema
+struct<date_part('EPOCH', TIMESTAMP '1970-01-01 00:00:00'):decimal(20,6)>
+-- !query output
+0.000000
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_part('CENTURY', TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G')):int>
+-- !query output
+-2
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_part('CENTURY', TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G')):int>
+-- !query output
+-1
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_part('CENTURY', TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')):int>
+-- !query output
+-1
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM DATE '0001-01-01')
+-- !query schema
+struct<date_part('CENTURY', DATE '0001-01-01'):int>
+-- !query output
+1
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM DATE '0001-01-01 AD')
+-- !query schema
+struct<date_part('CENTURY', DATE '0001-01-01'):int>
+-- !query output
+1
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM DATE '1900-12-31')
+-- !query schema
+struct<date_part('CENTURY', DATE '1900-12-31'):int>
+-- !query output
+19
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM DATE '1901-01-01')
+-- !query schema
+struct<date_part('CENTURY', DATE '1901-01-01'):int>
+-- !query output
+20
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM DATE '2000-12-31')
+-- !query schema
+struct<date_part('CENTURY', DATE '2000-12-31'):int>
+-- !query output
+20
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM DATE '2001-01-01')
+-- !query schema
+struct<date_part('CENTURY', DATE '2001-01-01'):int>
+-- !query output
+21
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM CURRENT_DATE)>=21 AS True
+-- !query schema
+struct<True:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT EXTRACT(MILLENNIUM FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_part('MILLENNIUM', TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')):int>
+-- !query output
+-1
+
+
+-- !query
+SELECT EXTRACT(MILLENNIUM FROM DATE '0001-01-01 AD')
+-- !query schema
+struct<date_part('MILLENNIUM', DATE '0001-01-01'):int>
+-- !query output
+1
+
+
+-- !query
+SELECT EXTRACT(MILLENNIUM FROM DATE '1000-12-31')
+-- !query schema
+struct<date_part('MILLENNIUM', DATE '1000-12-31'):int>
+-- !query output
+1
+
+
+-- !query
+SELECT EXTRACT(MILLENNIUM FROM DATE '1001-01-01')
+-- !query schema
+struct<date_part('MILLENNIUM', DATE '1001-01-01'):int>
+-- !query output
+2
+
+
+-- !query
+SELECT EXTRACT(MILLENNIUM FROM DATE '2000-12-31')
+-- !query schema
+struct<date_part('MILLENNIUM', DATE '2000-12-31'):int>
+-- !query output
+2
+
+
+-- !query
+SELECT EXTRACT(MILLENNIUM FROM DATE '2001-01-01')
+-- !query schema
+struct<date_part('MILLENNIUM', DATE '2001-01-01'):int>
+-- !query output
+3
+
+
+-- !query
+SELECT EXTRACT(MILLENNIUM FROM CURRENT_DATE)
+-- !query schema
+struct<date_part('MILLENNIUM', current_date()):int>
+-- !query output
+3
+
+
+-- !query
+SELECT EXTRACT(DECADE FROM DATE '1994-12-25')
+-- !query schema
+struct<date_part('DECADE', DATE '1994-12-25'):int>
+-- !query output
+199
+
+
+-- !query
+SELECT EXTRACT(DECADE FROM DATE '0010-01-01')
+-- !query schema
+struct<date_part('DECADE', DATE '0010-01-01'):int>
+-- !query output
+1
+
+
+-- !query
+SELECT EXTRACT(DECADE FROM DATE '0009-12-31')
+-- !query schema
+struct<date_part('DECADE', DATE '0009-12-31'):int>
+-- !query output
+0
+
+
+-- !query
+SELECT EXTRACT(DECADE FROM TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_part('DECADE', TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G')):int>
+-- !query output
+0
+
+
+-- !query
+SELECT EXTRACT(DECADE FROM TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_part('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')):int>
+-- !query output
+-1
+
+
+-- !query
+SELECT EXTRACT(DECADE FROM TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_part('DECADE', TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G')):int>
+-- !query output
+-1
+
+
+-- !query
+SELECT EXTRACT(DECADE FROM TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_part('DECADE', TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G')):int>
+-- !query output
+-2
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM NOW())>=21 AS True
+-- !query schema
+struct<True:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT EXTRACT(CENTURY FROM TIMESTAMP '1970-03-20 04:30:00.00000')
+-- !query schema
+struct<date_part('CENTURY', TIMESTAMP '1970-03-20 04:30:00'):int>
+-- !query output
+20
+
+
+-- !query
+SELECT DATE_TRUNC('MILLENNIUM', TIMESTAMP '1970-03-20 04:30:00.00000')
+-- !query schema
+struct<date_trunc(MILLENNIUM, TIMESTAMP '1970-03-20 04:30:00'):timestamp>
+-- !query output
+1001-01-01 00:00:00
+
+
+-- !query
+SELECT DATE_TRUNC('MILLENNIUM', DATE '1970-03-20')
+-- !query schema
+struct<date_trunc(MILLENNIUM, CAST(DATE '1970-03-20' AS TIMESTAMP)):timestamp>
+-- !query output
+1001-01-01 00:00:00
+
+
+-- !query
+SELECT DATE_TRUNC('CENTURY', TIMESTAMP '1970-03-20 04:30:00.00000')
+-- !query schema
+struct<date_trunc(CENTURY, TIMESTAMP '1970-03-20 04:30:00'):timestamp>
+-- !query output
+1901-01-01 00:00:00
+
+
+-- !query
+SELECT DATE_TRUNC('CENTURY', DATE '1970-03-20')
+-- !query schema
+struct<date_trunc(CENTURY, CAST(DATE '1970-03-20' AS TIMESTAMP)):timestamp>
+-- !query output
+1901-01-01 00:00:00
+
+
+-- !query
+SELECT DATE_TRUNC('CENTURY', DATE '2004-08-10')
+-- !query schema
+struct<date_trunc(CENTURY, CAST(DATE '2004-08-10' AS TIMESTAMP)):timestamp>
+-- !query output
+2001-01-01 00:00:00
+
+
+-- !query
+SELECT DATE_TRUNC('CENTURY', DATE '0002-02-04')
+-- !query schema
+struct<date_trunc(CENTURY, CAST(DATE '0002-02-04' AS TIMESTAMP)):timestamp>
+-- !query output
+0001-01-01 00:00:00
+
+
+-- !query
+SELECT DATE_TRUNC('CENTURY', TO_DATE('0055-08-10 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_trunc(CENTURY, CAST(to_date('0055-08-10 BC', 'yyyy-MM-dd G') AS TIMESTAMP)):timestamp>
+-- !query output
+-0099-01-01 00:00:00
+
+
+-- !query
+SELECT DATE_TRUNC('DECADE', DATE '1993-12-25')
+-- !query schema
+struct<date_trunc(DECADE, CAST(DATE '1993-12-25' AS TIMESTAMP)):timestamp>
+-- !query output
+1990-01-01 00:00:00
+
+
+-- !query
+SELECT DATE_TRUNC('DECADE', DATE '0004-12-25')
+-- !query schema
+struct<date_trunc(DECADE, CAST(DATE '0004-12-25' AS TIMESTAMP)):timestamp>
+-- !query output
+0000-01-01 00:00:00
+
+
+-- !query
+SELECT DATE_TRUNC('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G'))
+-- !query schema
+struct<date_trunc(DECADE, CAST(to_date('0002-12-31 BC', 'yyyy-MM-dd G') AS TIMESTAMP)):timestamp>
+-- !query output
+-0010-01-01 00:00:00
+
+
+-- !query
+select make_date(2013, 7, 15)
+-- !query schema
+struct<make_date(2013, 7, 15):date>
+-- !query output
+2013-07-15
+
+
+-- !query
+select make_date(-44, 3, 15)
+-- !query schema
+struct<make_date(-44, 3, 15):date>
+-- !query output
+-0044-03-15
+
+
+-- !query
+select make_date(2013, 2, 30)
+-- !query schema
+struct<make_date(2013, 2, 30):date>
+-- !query output
+NULL
+
+
+-- !query
+select make_date(2013, 13, 1)
+-- !query schema
+struct<make_date(2013, 13, 1):date>
+-- !query output
+NULL
+
+
+-- !query
+select make_date(2013, 11, -1)
+-- !query schema
+struct<make_date(2013, 11, -1):date>
+-- !query output
+NULL
+
+
+-- !query
+DROP TABLE DATE_TBL
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
similarity index 58%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
index 6e47cff91a7d5..fe8375c5eab8f 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
@@ -2,155 +2,159 @@
 -- Number of queries: 43
 
 
--- !query 0
+-- !query
 CREATE TABLE FLOAT4_TBL (f1  float) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
-INSERT INTO FLOAT4_TBL VALUES ('    0.0')
--- !query 1 schema
+-- !query
+INSERT INTO FLOAT4_TBL VALUES (float('    0.0'))
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
-INSERT INTO FLOAT4_TBL VALUES ('1004.30   ')
--- !query 2 schema
+-- !query
+INSERT INTO FLOAT4_TBL VALUES (float('1004.30   '))
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
-INSERT INTO FLOAT4_TBL VALUES ('     -34.84    ')
--- !query 3 schema
+-- !query
+INSERT INTO FLOAT4_TBL VALUES (float('     -34.84    '))
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
-INSERT INTO FLOAT4_TBL VALUES ('1.2345678901234e+20')
--- !query 4 schema
+-- !query
+INSERT INTO FLOAT4_TBL VALUES (float('1.2345678901234e+20'))
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
-INSERT INTO FLOAT4_TBL VALUES ('1.2345678901234e-20')
--- !query 5 schema
+-- !query
+INSERT INTO FLOAT4_TBL VALUES (float('1.2345678901234e-20'))
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT float('NaN')
--- !query 6 schema
+-- !query schema
 struct<CAST(NaN AS FLOAT):float>
--- !query 6 output
+-- !query output
 NaN
 
 
--- !query 7
+-- !query
 SELECT float('nan')
--- !query 7 schema
+-- !query schema
 struct<CAST(nan AS FLOAT):float>
--- !query 7 output
+-- !query output
 NaN
 
 
--- !query 8
+-- !query
 SELECT float('   NAN  ')
--- !query 8 schema
+-- !query schema
 struct<CAST(   NAN   AS FLOAT):float>
--- !query 8 output
+-- !query output
 NaN
 
 
--- !query 9
+-- !query
 SELECT float('infinity')
--- !query 9 schema
+-- !query schema
 struct<CAST(infinity AS FLOAT):float>
--- !query 9 output
+-- !query output
 Infinity
 
 
--- !query 10
+-- !query
 SELECT float('          -INFINiTY   ')
--- !query 10 schema
+-- !query schema
 struct<CAST(          -INFINiTY    AS FLOAT):float>
--- !query 10 output
+-- !query output
 -Infinity
 
 
--- !query 11
+-- !query
 SELECT float('N A N')
--- !query 11 schema
-struct<CAST(N A N AS FLOAT):float>
--- !query 11 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: N A N
 
 
--- !query 12
+-- !query
 SELECT float('NaN x')
--- !query 12 schema
-struct<CAST(NaN x AS FLOAT):float>
--- !query 12 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: NaN x
 
 
--- !query 13
+-- !query
 SELECT float(' INFINITY    x')
--- !query 13 schema
-struct<CAST( INFINITY    x AS FLOAT):float>
--- !query 13 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric:  INFINITY    x
 
 
--- !query 14
+-- !query
 SELECT float('Infinity') + 100.0
--- !query 14 schema
+-- !query schema
 struct<(CAST(CAST(Infinity AS FLOAT) AS DOUBLE) + CAST(100.0 AS DOUBLE)):double>
--- !query 14 output
+-- !query output
 Infinity
 
 
--- !query 15
+-- !query
 SELECT float('Infinity') / float('Infinity')
--- !query 15 schema
+-- !query schema
 struct<(CAST(CAST(Infinity AS FLOAT) AS DOUBLE) / CAST(CAST(Infinity AS FLOAT) AS DOUBLE)):double>
--- !query 15 output
+-- !query output
 NaN
 
 
--- !query 16
+-- !query
 SELECT float('nan') / float('nan')
--- !query 16 schema
+-- !query schema
 struct<(CAST(CAST(nan AS FLOAT) AS DOUBLE) / CAST(CAST(nan AS FLOAT) AS DOUBLE)):double>
--- !query 16 output
+-- !query output
 NaN
 
 
--- !query 17
+-- !query
 SELECT float(decimal('nan'))
--- !query 17 schema
-struct<CAST(CAST(nan AS DECIMAL(10,0)) AS FLOAT):float>
--- !query 17 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: nan
 
 
--- !query 18
+-- !query
 SELECT '' AS five, * FROM FLOAT4_TBL
--- !query 18 schema
+-- !query schema
 struct<five:string,f1:float>
--- !query 18 output
+-- !query output
 	-34.84
 	0.0
 	1.2345679E-20
@@ -158,116 +162,116 @@ struct<five:string,f1:float>
 	1004.3
 
 
--- !query 19
+-- !query
 SELECT '' AS four, f.* FROM FLOAT4_TBL f WHERE f.f1 <> '1004.3'
--- !query 19 schema
+-- !query schema
 struct<four:string,f1:float>
--- !query 19 output
+-- !query output
 	-34.84
 	0.0
 	1.2345679E-20
 	1.2345679E20
 
 
--- !query 20
+-- !query
 SELECT '' AS one, f.* FROM FLOAT4_TBL f WHERE f.f1 = '1004.3'
--- !query 20 schema
+-- !query schema
 struct<one:string,f1:float>
--- !query 20 output
+-- !query output
 	1004.3
 
 
--- !query 21
+-- !query
 SELECT '' AS three, f.* FROM FLOAT4_TBL f WHERE '1004.3' > f.f1
--- !query 21 schema
+-- !query schema
 struct<three:string,f1:float>
--- !query 21 output
+-- !query output
 	-34.84
 	0.0
 	1.2345679E-20
 
 
--- !query 22
+-- !query
 SELECT '' AS three, f.* FROM FLOAT4_TBL f WHERE  f.f1 < '1004.3'
--- !query 22 schema
+-- !query schema
 struct<three:string,f1:float>
--- !query 22 output
+-- !query output
 	-34.84
 	0.0
 	1.2345679E-20
 
 
--- !query 23
+-- !query
 SELECT '' AS four, f.* FROM FLOAT4_TBL f WHERE '1004.3' >= f.f1
--- !query 23 schema
+-- !query schema
 struct<four:string,f1:float>
--- !query 23 output
+-- !query output
 	-34.84
 	0.0
 	1.2345679E-20
 	1004.3
 
 
--- !query 24
+-- !query
 SELECT '' AS four, f.* FROM FLOAT4_TBL f WHERE  f.f1 <= '1004.3'
--- !query 24 schema
+-- !query schema
 struct<four:string,f1:float>
--- !query 24 output
+-- !query output
 	-34.84
 	0.0
 	1.2345679E-20
 	1004.3
 
 
--- !query 25
+-- !query
 SELECT '' AS three, f.f1, f.f1 * '-10' AS x FROM FLOAT4_TBL f
    WHERE f.f1 > '0.0'
--- !query 25 schema
+-- !query schema
 struct<three:string,f1:float,x:double>
--- !query 25 output
+-- !query output
 	1.2345679E-20	-1.2345678720289608E-19
 	1.2345679E20	-1.2345678955701443E21
 	1004.3	-10042.999877929688
 
 
--- !query 26
+-- !query
 SELECT '' AS three, f.f1, f.f1 + '-10' AS x FROM FLOAT4_TBL f
    WHERE f.f1 > '0.0'
--- !query 26 schema
+-- !query schema
 struct<three:string,f1:float,x:double>
--- !query 26 output
+-- !query output
 	1.2345679E-20	-10.0
 	1.2345679E20	1.2345678955701443E20
 	1004.3	994.2999877929688
 
 
--- !query 27
+-- !query
 SELECT '' AS three, f.f1, f.f1 / '-10' AS x FROM FLOAT4_TBL f
    WHERE f.f1 > '0.0'
--- !query 27 schema
+-- !query schema
 struct<three:string,f1:float,x:double>
--- !query 27 output
+-- !query output
 	1.2345679E-20	-1.2345678720289608E-21
 	1.2345679E20	-1.2345678955701443E19
 	1004.3	-100.42999877929688
 
 
--- !query 28
+-- !query
 SELECT '' AS three, f.f1, f.f1 - '-10' AS x FROM FLOAT4_TBL f
    WHERE f.f1 > '0.0'
--- !query 28 schema
+-- !query schema
 struct<three:string,f1:float,x:double>
--- !query 28 output
+-- !query output
 	1.2345679E-20	10.0
 	1.2345679E20	1.2345678955701443E20
 	1004.3	1014.2999877929688
 
 
--- !query 29
+-- !query
 SELECT '' AS five, * FROM FLOAT4_TBL
--- !query 29 schema
+-- !query schema
 struct<five:string,f1:float>
--- !query 29 output
+-- !query output
 	-34.84
 	0.0
 	1.2345679E-20
@@ -275,105 +279,108 @@ struct<five:string,f1:float>
 	1004.3
 
 
--- !query 30
+-- !query
 SELECT smallint(float('32767.4'))
--- !query 30 schema
+-- !query schema
 struct<CAST(CAST(32767.4 AS FLOAT) AS SMALLINT):smallint>
--- !query 30 output
+-- !query output
 32767
 
 
--- !query 31
+-- !query
 SELECT smallint(float('32767.6'))
--- !query 31 schema
+-- !query schema
 struct<CAST(CAST(32767.6 AS FLOAT) AS SMALLINT):smallint>
--- !query 31 output
+-- !query output
 32767
 
 
--- !query 32
+-- !query
 SELECT smallint(float('-32768.4'))
--- !query 32 schema
+-- !query schema
 struct<CAST(CAST(-32768.4 AS FLOAT) AS SMALLINT):smallint>
--- !query 32 output
+-- !query output
 -32768
 
 
--- !query 33
+-- !query
 SELECT smallint(float('-32768.6'))
--- !query 33 schema
+-- !query schema
 struct<CAST(CAST(-32768.6 AS FLOAT) AS SMALLINT):smallint>
--- !query 33 output
+-- !query output
 -32768
 
 
--- !query 34
+-- !query
 SELECT int(float('2147483520'))
--- !query 34 schema
+-- !query schema
 struct<CAST(CAST(2147483520 AS FLOAT) AS INT):int>
--- !query 34 output
+-- !query output
 2147483520
 
 
--- !query 35
+-- !query
 SELECT int(float('2147483647'))
--- !query 35 schema
-struct<CAST(CAST(2147483647 AS FLOAT) AS INT):int>
--- !query 35 output
-2147483647
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Casting 2.14748365E9 to int causes overflow
 
 
--- !query 36
+-- !query
 SELECT int(float('-2147483648.5'))
--- !query 36 schema
+-- !query schema
 struct<CAST(CAST(-2147483648.5 AS FLOAT) AS INT):int>
--- !query 36 output
+-- !query output
 -2147483648
 
 
--- !query 37
+-- !query
 SELECT int(float('-2147483900'))
--- !query 37 schema
-struct<CAST(CAST(-2147483900 AS FLOAT) AS INT):int>
--- !query 37 output
--2147483648
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Casting -2.1474839E9 to int causes overflow
 
 
--- !query 38
+-- !query
 SELECT bigint(float('9223369837831520256'))
--- !query 38 schema
+-- !query schema
 struct<CAST(CAST(9223369837831520256 AS FLOAT) AS BIGINT):bigint>
--- !query 38 output
+-- !query output
 9223369837831520256
 
 
--- !query 39
+-- !query
 SELECT bigint(float('9223372036854775807'))
--- !query 39 schema
+-- !query schema
 struct<CAST(CAST(9223372036854775807 AS FLOAT) AS BIGINT):bigint>
--- !query 39 output
+-- !query output
 9223372036854775807
 
 
--- !query 40
+-- !query
 SELECT bigint(float('-9223372036854775808.5'))
--- !query 40 schema
+-- !query schema
 struct<CAST(CAST(-9223372036854775808.5 AS FLOAT) AS BIGINT):bigint>
--- !query 40 output
+-- !query output
 -9223372036854775808
 
 
--- !query 41
+-- !query
 SELECT bigint(float('-9223380000000000000'))
--- !query 41 schema
-struct<CAST(CAST(-9223380000000000000 AS FLOAT) AS BIGINT):bigint>
--- !query 41 output
--9223372036854775808
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Casting -9.22338E18 to int causes overflow
 
 
--- !query 42
+-- !query
 DROP TABLE FLOAT4_TBL
--- !query 42 schema
+-- !query schema
 struct<>
--- !query 42 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
similarity index 61%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
index b4ea3c1ad1cab..4cdb6958a230a 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
@@ -2,187 +2,191 @@
 -- Number of queries: 95
 
 
--- !query 0
+-- !query
 CREATE TABLE FLOAT8_TBL(f1 double) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
-INSERT INTO FLOAT8_TBL VALUES ('    0.0   ')
--- !query 1 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('    0.0   '))
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
-INSERT INTO FLOAT8_TBL VALUES ('1004.30  ')
--- !query 2 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('1004.30  '))
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
-INSERT INTO FLOAT8_TBL VALUES ('   -34.84')
--- !query 3 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('   -34.84'))
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
-INSERT INTO FLOAT8_TBL VALUES ('1.2345678901234e+200')
--- !query 4 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('1.2345678901234e+200'))
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
-INSERT INTO FLOAT8_TBL VALUES ('1.2345678901234e-200')
--- !query 5 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('1.2345678901234e-200'))
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT double('10e400')
--- !query 6 schema
+-- !query schema
 struct<CAST(10e400 AS DOUBLE):double>
--- !query 6 output
+-- !query output
 Infinity
 
 
--- !query 7
+-- !query
 SELECT double('-10e400')
--- !query 7 schema
+-- !query schema
 struct<CAST(-10e400 AS DOUBLE):double>
--- !query 7 output
+-- !query output
 -Infinity
 
 
--- !query 8
+-- !query
 SELECT double('10e-400')
--- !query 8 schema
+-- !query schema
 struct<CAST(10e-400 AS DOUBLE):double>
--- !query 8 output
+-- !query output
 0.0
 
 
--- !query 9
+-- !query
 SELECT double('-10e-400')
--- !query 9 schema
+-- !query schema
 struct<CAST(-10e-400 AS DOUBLE):double>
--- !query 9 output
+-- !query output
 -0.0
 
 
--- !query 10
+-- !query
 SELECT double('NaN')
--- !query 10 schema
+-- !query schema
 struct<CAST(NaN AS DOUBLE):double>
--- !query 10 output
+-- !query output
 NaN
 
 
--- !query 11
+-- !query
 SELECT double('nan')
--- !query 11 schema
+-- !query schema
 struct<CAST(nan AS DOUBLE):double>
--- !query 11 output
+-- !query output
 NaN
 
 
--- !query 12
+-- !query
 SELECT double('   NAN  ')
--- !query 12 schema
+-- !query schema
 struct<CAST(   NAN   AS DOUBLE):double>
--- !query 12 output
+-- !query output
 NaN
 
 
--- !query 13
+-- !query
 SELECT double('infinity')
--- !query 13 schema
+-- !query schema
 struct<CAST(infinity AS DOUBLE):double>
--- !query 13 output
+-- !query output
 Infinity
 
 
--- !query 14
+-- !query
 SELECT double('          -INFINiTY   ')
--- !query 14 schema
+-- !query schema
 struct<CAST(          -INFINiTY    AS DOUBLE):double>
--- !query 14 output
+-- !query output
 -Infinity
 
 
--- !query 15
+-- !query
 SELECT double('N A N')
--- !query 15 schema
-struct<CAST(N A N AS DOUBLE):double>
--- !query 15 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: N A N
 
 
--- !query 16
+-- !query
 SELECT double('NaN x')
--- !query 16 schema
-struct<CAST(NaN x AS DOUBLE):double>
--- !query 16 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: NaN x
 
 
--- !query 17
+-- !query
 SELECT double(' INFINITY    x')
--- !query 17 schema
-struct<CAST( INFINITY    x AS DOUBLE):double>
--- !query 17 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric:  INFINITY    x
 
 
--- !query 18
+-- !query
 SELECT double('Infinity') + 100.0
--- !query 18 schema
+-- !query schema
 struct<(CAST(Infinity AS DOUBLE) + CAST(100.0 AS DOUBLE)):double>
--- !query 18 output
+-- !query output
 Infinity
 
 
--- !query 19
+-- !query
 SELECT double('Infinity') / double('Infinity')
--- !query 19 schema
+-- !query schema
 struct<(CAST(Infinity AS DOUBLE) / CAST(Infinity AS DOUBLE)):double>
--- !query 19 output
+-- !query output
 NaN
 
 
--- !query 20
+-- !query
 SELECT double('NaN') / double('NaN')
--- !query 20 schema
+-- !query schema
 struct<(CAST(NaN AS DOUBLE) / CAST(NaN AS DOUBLE)):double>
--- !query 20 output
+-- !query output
 NaN
 
 
--- !query 21
+-- !query
 SELECT double(decimal('nan'))
--- !query 21 schema
-struct<CAST(CAST(nan AS DECIMAL(10,0)) AS DOUBLE):double>
--- !query 21 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: nan
 
 
--- !query 22
+-- !query
 SELECT '' AS five, * FROM FLOAT8_TBL
--- !query 22 schema
+-- !query schema
 struct<five:string,f1:double>
--- !query 22 output
+-- !query output
 	-34.84
 	0.0
 	1.2345678901234E-200
@@ -190,121 +194,121 @@ struct<five:string,f1:double>
 	1004.3
 
 
--- !query 23
+-- !query
 SELECT '' AS four, f.* FROM FLOAT8_TBL f WHERE f.f1 <> '1004.3'
--- !query 23 schema
+-- !query schema
 struct<four:string,f1:double>
--- !query 23 output
+-- !query output
 	-34.84
 	0.0
 	1.2345678901234E-200
 	1.2345678901234E200
 
 
--- !query 24
+-- !query
 SELECT '' AS one, f.* FROM FLOAT8_TBL f WHERE f.f1 = '1004.3'
--- !query 24 schema
+-- !query schema
 struct<one:string,f1:double>
--- !query 24 output
+-- !query output
 	1004.3
 
 
--- !query 25
+-- !query
 SELECT '' AS three, f.* FROM FLOAT8_TBL f WHERE '1004.3' > f.f1
--- !query 25 schema
+-- !query schema
 struct<three:string,f1:double>
--- !query 25 output
+-- !query output
 	-34.84
 	0.0
 	1.2345678901234E-200
 
 
--- !query 26
+-- !query
 SELECT '' AS three, f.* FROM FLOAT8_TBL f WHERE  f.f1 < '1004.3'
--- !query 26 schema
+-- !query schema
 struct<three:string,f1:double>
--- !query 26 output
+-- !query output
 	-34.84
 	0.0
 	1.2345678901234E-200
 
 
--- !query 27
+-- !query
 SELECT '' AS four, f.* FROM FLOAT8_TBL f WHERE '1004.3' >= f.f1
--- !query 27 schema
+-- !query schema
 struct<four:string,f1:double>
--- !query 27 output
+-- !query output
 	-34.84
 	0.0
 	1.2345678901234E-200
 	1004.3
 
 
--- !query 28
+-- !query
 SELECT '' AS four, f.* FROM FLOAT8_TBL f WHERE  f.f1 <= '1004.3'
--- !query 28 schema
+-- !query schema
 struct<four:string,f1:double>
--- !query 28 output
+-- !query output
 	-34.84
 	0.0
 	1.2345678901234E-200
 	1004.3
 
 
--- !query 29
+-- !query
 SELECT '' AS three, f.f1, f.f1 * '-10' AS x
    FROM FLOAT8_TBL f
    WHERE f.f1 > '0.0'
--- !query 29 schema
+-- !query schema
 struct<three:string,f1:double,x:double>
--- !query 29 output
+-- !query output
 	1.2345678901234E-200	-1.2345678901234E-199
 	1.2345678901234E200	-1.2345678901234E201
 	1004.3	-10043.0
 
 
--- !query 30
+-- !query
 SELECT '' AS three, f.f1, f.f1 + '-10' AS x
    FROM FLOAT8_TBL f
    WHERE f.f1 > '0.0'
--- !query 30 schema
+-- !query schema
 struct<three:string,f1:double,x:double>
--- !query 30 output
+-- !query output
 	1.2345678901234E-200	-10.0
 	1.2345678901234E200	1.2345678901234E200
 	1004.3	994.3
 
 
--- !query 31
+-- !query
 SELECT '' AS three, f.f1, f.f1 / '-10' AS x
    FROM FLOAT8_TBL f
    WHERE f.f1 > '0.0'
--- !query 31 schema
+-- !query schema
 struct<three:string,f1:double,x:double>
--- !query 31 output
+-- !query output
 	1.2345678901234E-200	-1.2345678901234E-201
 	1.2345678901234E200	-1.2345678901234E199
 	1004.3	-100.42999999999999
 
 
--- !query 32
+-- !query
 SELECT '' AS three, f.f1, f.f1 - '-10' AS x
    FROM FLOAT8_TBL f
    WHERE f.f1 > '0.0'
--- !query 32 schema
+-- !query schema
 struct<three:string,f1:double,x:double>
--- !query 32 output
+-- !query output
 	1.2345678901234E-200	10.0
 	1.2345678901234E200	1.2345678901234E200
 	1004.3	1014.3
 
 
--- !query 33
+-- !query
 SELECT '' AS five, f.f1, round(f.f1) AS round_f1
    FROM FLOAT8_TBL f
--- !query 33 schema
+-- !query schema
 struct<five:string,f1:double,round_f1:double>
--- !query 33 output
+-- !query output
 	-34.84	-35.0
 	0.0	0.0
 	1.2345678901234E-200	0.0
@@ -312,11 +316,11 @@ struct<five:string,f1:double,round_f1:double>
 	1004.3	1004.0
 
 
--- !query 34
+-- !query
 select ceil(f1) as ceil_f1 from float8_tbl f
--- !query 34 schema
+-- !query schema
 struct<ceil_f1:bigint>
--- !query 34 output
+-- !query output
 -34
 0
 1
@@ -324,11 +328,11 @@ struct<ceil_f1:bigint>
 9223372036854775807
 
 
--- !query 35
+-- !query
 select ceiling(f1) as ceiling_f1 from float8_tbl f
--- !query 35 schema
+-- !query schema
 struct<ceiling_f1:bigint>
--- !query 35 output
+-- !query output
 -34
 0
 1
@@ -336,11 +340,11 @@ struct<ceiling_f1:bigint>
 9223372036854775807
 
 
--- !query 36
+-- !query
 select floor(f1) as floor_f1 from float8_tbl f
--- !query 36 schema
+-- !query schema
 struct<floor_f1:bigint>
--- !query 36 output
+-- !query output
 -35
 0
 0
@@ -348,11 +352,11 @@ struct<floor_f1:bigint>
 9223372036854775807
 
 
--- !query 37
+-- !query
 select sign(f1) as sign_f1 from float8_tbl f
--- !query 37 schema
+-- !query schema
 struct<sign_f1:double>
--- !query 37 output
+-- !query output
 -1.0
 0.0
 1.0
@@ -360,87 +364,87 @@ struct<sign_f1:double>
 1.0
 
 
--- !query 38
+-- !query
 SELECT sqrt(double('64')) AS eight
--- !query 38 schema
+-- !query schema
 struct<eight:double>
--- !query 38 output
+-- !query output
 8.0
 
 
--- !query 39
+-- !query
 SELECT power(double('144'), double('0.5'))
--- !query 39 schema
+-- !query schema
 struct<POWER(CAST(144 AS DOUBLE), CAST(0.5 AS DOUBLE)):double>
--- !query 39 output
+-- !query output
 12.0
 
 
--- !query 40
+-- !query
 SELECT power(double('NaN'), double('0.5'))
--- !query 40 schema
+-- !query schema
 struct<POWER(CAST(NaN AS DOUBLE), CAST(0.5 AS DOUBLE)):double>
--- !query 40 output
+-- !query output
 NaN
 
 
--- !query 41
+-- !query
 SELECT power(double('144'), double('NaN'))
--- !query 41 schema
+-- !query schema
 struct<POWER(CAST(144 AS DOUBLE), CAST(NaN AS DOUBLE)):double>
--- !query 41 output
+-- !query output
 NaN
 
 
--- !query 42
+-- !query
 SELECT power(double('NaN'), double('NaN'))
--- !query 42 schema
+-- !query schema
 struct<POWER(CAST(NaN AS DOUBLE), CAST(NaN AS DOUBLE)):double>
--- !query 42 output
+-- !query output
 NaN
 
 
--- !query 43
+-- !query
 SELECT power(double('-1'), double('NaN'))
--- !query 43 schema
+-- !query schema
 struct<POWER(CAST(-1 AS DOUBLE), CAST(NaN AS DOUBLE)):double>
--- !query 43 output
+-- !query output
 NaN
 
 
--- !query 44
+-- !query
 SELECT power(double('1'), double('NaN'))
--- !query 44 schema
+-- !query schema
 struct<POWER(CAST(1 AS DOUBLE), CAST(NaN AS DOUBLE)):double>
--- !query 44 output
+-- !query output
 NaN
 
 
--- !query 45
+-- !query
 SELECT power(double('NaN'), double('0'))
--- !query 45 schema
+-- !query schema
 struct<POWER(CAST(NaN AS DOUBLE), CAST(0 AS DOUBLE)):double>
--- !query 45 output
+-- !query output
 1.0
 
 
--- !query 46
+-- !query
 SELECT '' AS three, f.f1, exp(ln(f.f1)) AS exp_ln_f1
    FROM FLOAT8_TBL f
    WHERE f.f1 > '0.0'
--- !query 46 schema
+-- !query schema
 struct<three:string,f1:double,exp_ln_f1:double>
--- !query 46 output
+-- !query output
 	1.2345678901234E-200	1.2345678901233948E-200
 	1.2345678901234E200	1.234567890123379E200
 	1004.3	1004.3000000000004
 
 
--- !query 47
+-- !query
 SELECT '' AS five, * FROM FLOAT8_TBL
--- !query 47 schema
+-- !query schema
 struct<five:string,f1:double>
--- !query 47 output
+-- !query output
 	-34.84
 	0.0
 	1.2345678901234E-200
@@ -448,22 +452,22 @@ struct<five:string,f1:double>
 	1004.3
 
 
--- !query 48
+-- !query
 CREATE TEMPORARY VIEW UPDATED_FLOAT8_TBL as
 SELECT
   CASE WHEN FLOAT8_TBL.f1 > '0.0' THEN FLOAT8_TBL.f1 * '-1' ELSE FLOAT8_TBL.f1 END AS f1
 FROM FLOAT8_TBL
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 
 
 
--- !query 49
+-- !query
 SELECT '' AS bad, f.f1 * '1e200' from UPDATED_FLOAT8_TBL f
--- !query 49 schema
+-- !query schema
 struct<bad:string,(f1 * CAST(1e200 AS DOUBLE)):double>
--- !query 49 output
+-- !query output
 	-1.0042999999999999E203
 	-1.2345678901234
 	-3.484E201
@@ -471,11 +475,11 @@ struct<bad:string,(f1 * CAST(1e200 AS DOUBLE)):double>
 	0.0
 
 
--- !query 50
+-- !query
 SELECT '' AS five, * FROM UPDATED_FLOAT8_TBL
--- !query 50 schema
+-- !query schema
 struct<five:string,f1:double>
--- !query 50 output
+-- !query output
 	-1.2345678901234E-200
 	-1.2345678901234E200
 	-1004.3
@@ -483,251 +487,251 @@ struct<five:string,f1:double>
 	0.0
 
 
--- !query 51
+-- !query
 SELECT sinh(double('1'))
--- !query 51 schema
+-- !query schema
 struct<SINH(CAST(1 AS DOUBLE)):double>
--- !query 51 output
+-- !query output
 1.1752011936438014
 
 
--- !query 52
+-- !query
 SELECT cosh(double('1'))
--- !query 52 schema
+-- !query schema
 struct<COSH(CAST(1 AS DOUBLE)):double>
--- !query 52 output
+-- !query output
 1.543080634815244
 
 
--- !query 53
+-- !query
 SELECT tanh(double('1'))
--- !query 53 schema
+-- !query schema
 struct<TANH(CAST(1 AS DOUBLE)):double>
--- !query 53 output
+-- !query output
 0.7615941559557649
 
 
--- !query 54
+-- !query
 SELECT asinh(double('1'))
--- !query 54 schema
+-- !query schema
 struct<ASINH(CAST(1 AS DOUBLE)):double>
--- !query 54 output
+-- !query output
 0.8813735870195429
 
 
--- !query 55
+-- !query
 SELECT acosh(double('2'))
--- !query 55 schema
+-- !query schema
 struct<ACOSH(CAST(2 AS DOUBLE)):double>
--- !query 55 output
+-- !query output
 1.3169578969248166
 
 
--- !query 56
+-- !query
 SELECT atanh(double('0.5'))
--- !query 56 schema
+-- !query schema
 struct<ATANH(CAST(0.5 AS DOUBLE)):double>
--- !query 56 output
+-- !query output
 0.5493061443340548
 
 
--- !query 57
+-- !query
 SELECT sinh(double('Infinity'))
--- !query 57 schema
+-- !query schema
 struct<SINH(CAST(Infinity AS DOUBLE)):double>
--- !query 57 output
+-- !query output
 Infinity
 
 
--- !query 58
+-- !query
 SELECT sinh(double('-Infinity'))
--- !query 58 schema
+-- !query schema
 struct<SINH(CAST(-Infinity AS DOUBLE)):double>
--- !query 58 output
+-- !query output
 -Infinity
 
 
--- !query 59
+-- !query
 SELECT sinh(double('NaN'))
--- !query 59 schema
+-- !query schema
 struct<SINH(CAST(NaN AS DOUBLE)):double>
--- !query 59 output
+-- !query output
 NaN
 
 
--- !query 60
+-- !query
 SELECT cosh(double('Infinity'))
--- !query 60 schema
+-- !query schema
 struct<COSH(CAST(Infinity AS DOUBLE)):double>
--- !query 60 output
+-- !query output
 Infinity
 
 
--- !query 61
+-- !query
 SELECT cosh(double('-Infinity'))
--- !query 61 schema
+-- !query schema
 struct<COSH(CAST(-Infinity AS DOUBLE)):double>
--- !query 61 output
+-- !query output
 Infinity
 
 
--- !query 62
+-- !query
 SELECT cosh(double('NaN'))
--- !query 62 schema
+-- !query schema
 struct<COSH(CAST(NaN AS DOUBLE)):double>
--- !query 62 output
+-- !query output
 NaN
 
 
--- !query 63
+-- !query
 SELECT tanh(double('Infinity'))
--- !query 63 schema
+-- !query schema
 struct<TANH(CAST(Infinity AS DOUBLE)):double>
--- !query 63 output
+-- !query output
 1.0
 
 
--- !query 64
+-- !query
 SELECT tanh(double('-Infinity'))
--- !query 64 schema
+-- !query schema
 struct<TANH(CAST(-Infinity AS DOUBLE)):double>
--- !query 64 output
+-- !query output
 -1.0
 
 
--- !query 65
+-- !query
 SELECT tanh(double('NaN'))
--- !query 65 schema
+-- !query schema
 struct<TANH(CAST(NaN AS DOUBLE)):double>
--- !query 65 output
+-- !query output
 NaN
 
 
--- !query 66
+-- !query
 SELECT asinh(double('Infinity'))
--- !query 66 schema
+-- !query schema
 struct<ASINH(CAST(Infinity AS DOUBLE)):double>
--- !query 66 output
+-- !query output
 Infinity
 
 
--- !query 67
+-- !query
 SELECT asinh(double('-Infinity'))
--- !query 67 schema
+-- !query schema
 struct<ASINH(CAST(-Infinity AS DOUBLE)):double>
--- !query 67 output
+-- !query output
 -Infinity
 
 
--- !query 68
+-- !query
 SELECT asinh(double('NaN'))
--- !query 68 schema
+-- !query schema
 struct<ASINH(CAST(NaN AS DOUBLE)):double>
--- !query 68 output
+-- !query output
 NaN
 
 
--- !query 69
+-- !query
 SELECT acosh(double('Infinity'))
--- !query 69 schema
+-- !query schema
 struct<ACOSH(CAST(Infinity AS DOUBLE)):double>
--- !query 69 output
+-- !query output
 Infinity
 
 
--- !query 70
+-- !query
 SELECT acosh(double('-Infinity'))
--- !query 70 schema
+-- !query schema
 struct<ACOSH(CAST(-Infinity AS DOUBLE)):double>
--- !query 70 output
+-- !query output
 NaN
 
 
--- !query 71
+-- !query
 SELECT acosh(double('NaN'))
--- !query 71 schema
+-- !query schema
 struct<ACOSH(CAST(NaN AS DOUBLE)):double>
--- !query 71 output
+-- !query output
 NaN
 
 
--- !query 72
+-- !query
 SELECT atanh(double('Infinity'))
--- !query 72 schema
+-- !query schema
 struct<ATANH(CAST(Infinity AS DOUBLE)):double>
--- !query 72 output
+-- !query output
 NaN
 
 
--- !query 73
+-- !query
 SELECT atanh(double('-Infinity'))
--- !query 73 schema
+-- !query schema
 struct<ATANH(CAST(-Infinity AS DOUBLE)):double>
--- !query 73 output
+-- !query output
 NaN
 
 
--- !query 74
+-- !query
 SELECT atanh(double('NaN'))
--- !query 74 schema
+-- !query schema
 struct<ATANH(CAST(NaN AS DOUBLE)):double>
--- !query 74 output
+-- !query output
 NaN
 
 
--- !query 75
+-- !query
 TRUNCATE TABLE FLOAT8_TBL
--- !query 75 schema
+-- !query schema
 struct<>
--- !query 75 output
+-- !query output
 
 
 
--- !query 76
-INSERT INTO FLOAT8_TBL VALUES ('0.0')
--- !query 76 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('0.0'))
+-- !query schema
 struct<>
--- !query 76 output
+-- !query output
 
 
 
--- !query 77
-INSERT INTO FLOAT8_TBL VALUES ('-34.84')
--- !query 77 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('-34.84'))
+-- !query schema
 struct<>
--- !query 77 output
+-- !query output
 
 
 
--- !query 78
-INSERT INTO FLOAT8_TBL VALUES ('-1004.30')
--- !query 78 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('-1004.30'))
+-- !query schema
 struct<>
--- !query 78 output
+-- !query output
 
 
 
--- !query 79
-INSERT INTO FLOAT8_TBL VALUES ('-1.2345678901234e+200')
--- !query 79 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('-1.2345678901234e+200'))
+-- !query schema
 struct<>
--- !query 79 output
+-- !query output
 
 
 
--- !query 80
-INSERT INTO FLOAT8_TBL VALUES ('-1.2345678901234e-200')
--- !query 80 schema
+-- !query
+INSERT INTO FLOAT8_TBL VALUES (double('-1.2345678901234e-200'))
+-- !query schema
 struct<>
--- !query 80 output
+-- !query output
 
 
 
--- !query 81
+-- !query
 SELECT '' AS five, * FROM FLOAT8_TBL
--- !query 81 schema
+-- !query schema
 struct<five:string,f1:double>
--- !query 81 output
+-- !query output
 	-1.2345678901234E-200
 	-1.2345678901234E200
 	-1004.3
@@ -735,105 +739,106 @@ struct<five:string,f1:double>
 	0.0
 
 
--- !query 82
+-- !query
 SELECT smallint(double('32767.4'))
--- !query 82 schema
+-- !query schema
 struct<CAST(CAST(32767.4 AS DOUBLE) AS SMALLINT):smallint>
--- !query 82 output
+-- !query output
 32767
 
 
--- !query 83
+-- !query
 SELECT smallint(double('32767.6'))
--- !query 83 schema
+-- !query schema
 struct<CAST(CAST(32767.6 AS DOUBLE) AS SMALLINT):smallint>
--- !query 83 output
+-- !query output
 32767
 
 
--- !query 84
+-- !query
 SELECT smallint(double('-32768.4'))
--- !query 84 schema
+-- !query schema
 struct<CAST(CAST(-32768.4 AS DOUBLE) AS SMALLINT):smallint>
--- !query 84 output
+-- !query output
 -32768
 
 
--- !query 85
+-- !query
 SELECT smallint(double('-32768.6'))
--- !query 85 schema
+-- !query schema
 struct<CAST(CAST(-32768.6 AS DOUBLE) AS SMALLINT):smallint>
--- !query 85 output
+-- !query output
 -32768
 
 
--- !query 86
+-- !query
 SELECT int(double('2147483647.4'))
--- !query 86 schema
+-- !query schema
 struct<CAST(CAST(2147483647.4 AS DOUBLE) AS INT):int>
--- !query 86 output
+-- !query output
 2147483647
 
 
--- !query 87
+-- !query
 SELECT int(double('2147483647.6'))
--- !query 87 schema
+-- !query schema
 struct<CAST(CAST(2147483647.6 AS DOUBLE) AS INT):int>
--- !query 87 output
+-- !query output
 2147483647
 
 
--- !query 88
+-- !query
 SELECT int(double('-2147483648.4'))
--- !query 88 schema
+-- !query schema
 struct<CAST(CAST(-2147483648.4 AS DOUBLE) AS INT):int>
--- !query 88 output
+-- !query output
 -2147483648
 
 
--- !query 89
+-- !query
 SELECT int(double('-2147483648.6'))
--- !query 89 schema
+-- !query schema
 struct<CAST(CAST(-2147483648.6 AS DOUBLE) AS INT):int>
--- !query 89 output
+-- !query output
 -2147483648
 
 
--- !query 90
+-- !query
 SELECT bigint(double('9223372036854773760'))
--- !query 90 schema
+-- !query schema
 struct<CAST(CAST(9223372036854773760 AS DOUBLE) AS BIGINT):bigint>
--- !query 90 output
+-- !query output
 9223372036854773760
 
 
--- !query 91
+-- !query
 SELECT bigint(double('9223372036854775807'))
--- !query 91 schema
+-- !query schema
 struct<CAST(CAST(9223372036854775807 AS DOUBLE) AS BIGINT):bigint>
--- !query 91 output
+-- !query output
 9223372036854775807
 
 
--- !query 92
+-- !query
 SELECT bigint(double('-9223372036854775808.5'))
--- !query 92 schema
+-- !query schema
 struct<CAST(CAST(-9223372036854775808.5 AS DOUBLE) AS BIGINT):bigint>
--- !query 92 output
+-- !query output
 -9223372036854775808
 
 
--- !query 93
+-- !query
 SELECT bigint(double('-9223372036854780000'))
--- !query 93 schema
-struct<CAST(CAST(-9223372036854780000 AS DOUBLE) AS BIGINT):bigint>
--- !query 93 output
--9223372036854775808
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Casting -9.22337203685478E18 to long causes overflow
 
 
--- !query 94
+-- !query
 DROP TABLE FLOAT8_TBL
--- !query 94 schema
+-- !query schema
 struct<>
--- !query 94 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/groupingsets.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/groupingsets.sql.out
new file mode 100644
index 0000000000000..24fd9dcbfc826
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/groupingsets.sql.out
@@ -0,0 +1,715 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 54
+
+
+-- !query
+create temp view gstest1(a,b,v)
+  as values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),
+            (2,3,15),
+            (3,3,16),(3,4,17),
+            (4,1,18),(4,1,19)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table gstest2 (a integer, b integer, c integer, d integer,
+                      e integer, f integer, g integer, h integer) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into gstest2 values
+  (1, 1, 1, 1, 1, 1, 1, 1),
+  (1, 1, 1, 1, 1, 1, 1, 2),
+  (1, 1, 1, 1, 1, 1, 2, 2),
+  (1, 1, 1, 1, 1, 2, 2, 2),
+  (1, 1, 1, 1, 2, 2, 2, 2),
+  (1, 1, 1, 2, 2, 2, 2, 2),
+  (1, 1, 2, 2, 2, 2, 2, 2),
+  (1, 2, 2, 2, 2, 2, 2, 2),
+  (2, 2, 2, 2, 2, 2, 2, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table gstest3 (a integer, b integer, c integer, d integer) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into gstest3 values
+  (1, 1, 1, 1),
+  (2, 2, 2, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table gstest4(id integer, v integer,
+                     unhashable_col /* bit(4) */ byte, unsortable_col /* xid */ integer) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into gstest4
+values (1,1,tinyint('0'),1), (2,2,tinyint('1'),1),
+       (3,4,tinyint('2'),2), (4,8,tinyint('3'),2),
+       (5,16,tinyint('0'),2), (6,32,tinyint('1'),2),
+       (7,64,tinyint('2'),1), (8,128,tinyint('3'),1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table gstest_empty (a integer, b integer, v integer) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by rollup (a,b)
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,count(1):bigint,max(v):int>
+-- !query output
+1	1	0	0	21	2	11
+1	2	0	0	25	2	13
+1	3	0	0	14	1	14
+1	NULL	0	1	60	5	14
+2	3	0	0	15	1	15
+2	NULL	0	1	15	1	15
+3	3	0	0	16	1	16
+3	4	0	0	17	1	17
+3	NULL	0	1	33	2	17
+4	1	0	0	37	2	19
+4	NULL	0	1	37	2	19
+NULL	NULL	1	1	145	10	19
+
+
+-- !query
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by rollup (a,b) order by a,b
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,count(1):bigint,max(v):int>
+-- !query output
+NULL	NULL	1	1	145	10	19
+1	NULL	0	1	60	5	14
+1	1	0	0	21	2	11
+1	2	0	0	25	2	13
+1	3	0	0	14	1	14
+2	NULL	0	1	15	1	15
+2	3	0	0	15	1	15
+3	NULL	0	1	33	2	17
+3	3	0	0	16	1	16
+3	4	0	0	17	1	17
+4	NULL	0	1	37	2	19
+4	1	0	0	37	2	19
+
+
+-- !query
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by rollup (a,b) order by b desc, a
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,count(1):bigint,max(v):int>
+-- !query output
+3	4	0	0	17	1	17
+1	3	0	0	14	1	14
+2	3	0	0	15	1	15
+3	3	0	0	16	1	16
+1	2	0	0	25	2	13
+1	1	0	0	21	2	11
+4	1	0	0	37	2	19
+NULL	NULL	1	1	145	10	19
+1	NULL	0	1	60	5	14
+2	NULL	0	1	15	1	15
+3	NULL	0	1	33	2	17
+4	NULL	0	1	37	2	19
+
+
+-- !query
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by rollup (a,b) order by coalesce(a,0)+coalesce(b,0)
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,count(1):bigint,max(v):int>
+-- !query output
+NULL	NULL	1	1	145	10	19
+1	NULL	0	1	60	5	14
+1	1	0	0	21	2	11
+2	NULL	0	1	15	1	15
+1	2	0	0	25	2	13
+3	NULL	0	1	33	2	17
+1	3	0	0	14	1	14
+4	NULL	0	1	37	2	19
+4	1	0	0	37	2	19
+2	3	0	0	15	1	15
+3	3	0	0	16	1	16
+3	4	0	0	17	1	17
+
+
+-- !query
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+  from gstest2 group by rollup (a,b) order by rsum, a, b
+-- !query schema
+struct<a:int,b:int,sum(c):bigint,rsum:bigint>
+-- !query output
+NULL	NULL	12	12
+1	NULL	10	22
+1	1	8	30
+1	2	2	32
+2	NULL	2	34
+2	2	2	36
+
+
+-- !query
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a)
+-- !query schema
+struct<a:int,b:int,sum(v):bigint,count(1):bigint>
+-- !query output
+
+
+
+-- !query
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),())
+-- !query schema
+struct<a:int,b:int,sum(v):bigint,count(1):bigint>
+-- !query output
+
+
+
+-- !query
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),())
+-- !query schema
+struct<a:int,b:int,sum(v):bigint,count(1):bigint>
+-- !query output
+
+
+
+-- !query
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),())
+-- !query schema
+struct<sum(v):bigint,count(1):bigint>
+-- !query output
+
+
+
+-- !query
+select t1.a, t2.b, sum(t1.v), count(*) from gstest_empty t1, gstest_empty t2
+ group by grouping sets ((t1.a,t2.b),())
+-- !query schema
+struct<a:int,b:int,sum(v):bigint,count(1):bigint>
+-- !query output
+
+
+
+-- !query
+select t1.a, t2.b, grouping(t1.a), grouping(t2.b), sum(t1.v), max(t2.a)
+  from gstest1 t1, gstest2 t2
+ group by grouping sets ((t1.a, t2.b), ())
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,max(a):int>
+-- !query output
+1	1	0	0	420	1
+1	2	0	0	120	2
+2	1	0	0	105	1
+2	2	0	0	30	2
+3	1	0	0	231	1
+3	2	0	0	66	2
+4	1	0	0	259	1
+4	2	0	0	74	2
+NULL	NULL	1	1	1305	2
+
+
+-- !query
+select t1.a, t2.b, grouping(t1.a), grouping(t2.b), sum(t1.v), max(t2.a)
+  from gstest1 t1 join gstest2 t2 on (t1.a=t2.a)
+ group by grouping sets ((t1.a, t2.b), ())
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,max(a):int>
+-- !query output
+1	1	0	0	420	1
+1	2	0	0	60	1
+2	2	0	0	15	2
+NULL	NULL	1	1	495	2
+
+
+-- !query
+select a, b, grouping(a), grouping(b), sum(t1.v), max(t2.c)
+  from gstest1 t1 join gstest2 t2 using (a,b)
+ group by grouping sets ((a, b), ())
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,max(c):int>
+-- !query output
+1	1	0	0	147	2
+1	2	0	0	25	2
+NULL	NULL	1	1	172	2
+
+
+-- !query
+select four, x
+  from (select four, ten, 'foo' as x from tenk1) as t
+  group by grouping sets (four, x)
+  having x = 'foo'
+-- !query schema
+struct<four:int,x:string>
+-- !query output
+NULL	foo
+
+
+-- !query
+select four, x || 'x'
+  from (select four, ten, 'foo' as x from tenk1) as t
+  group by grouping sets (four, x)
+  order by four
+-- !query schema
+struct<four:int,concat(x, x):string>
+-- !query output
+NULL	foox
+0	NULL
+1	NULL
+2	NULL
+3	NULL
+
+
+-- !query
+select (x+y)*1, sum(z)
+ from (select 1 as x, 2 as y, 3 as z) s
+ group by grouping sets (x+y, x)
+-- !query schema
+struct<((x + y) * 1):int,sum(z):bigint>
+-- !query output
+3	3
+NULL	3
+
+
+-- !query
+CREATE TEMP VIEW int8_tbl AS SELECT * FROM VALUES
+  (123L, 456L),
+  (123L, 4567890123456789L),
+  (4567890123456789L, 123L),
+  (4567890123456789L, 4567890123456789L),
+  (4567890123456789L, -4567890123456789L) as int8_tbl(q1, q2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select x, not x as not_x, q2 from
+  (select *, q1 = 1 as x from int8_tbl i1) as t
+  group by grouping sets(x, q2)
+  order by x, q2
+-- !query schema
+struct<x:boolean,not_x:boolean,q2:bigint>
+-- !query output
+NULL	NULL	-4567890123456789
+NULL	NULL	123
+NULL	NULL	456
+NULL	NULL	4567890123456789
+false	true	NULL
+
+
+-- !query
+DROP VIEW int8_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select ten, sum(distinct four) from onek a
+group by grouping sets((ten,four),(ten))
+having exists (select 1 from onek b where sum(distinct a.four) = b.four)
+-- !query schema
+struct<ten:int,sum(DISTINCT four):bigint>
+-- !query output
+0	0
+0	2
+0	2
+1	1
+1	3
+2	0
+2	2
+2	2
+3	1
+3	3
+4	0
+4	2
+4	2
+5	1
+5	3
+6	0
+6	2
+6	2
+7	1
+7	3
+8	0
+8	2
+8	2
+9	1
+9	3
+
+
+-- !query
+select a,count(*) from gstest2 group by rollup(a) order by a
+-- !query schema
+struct<a:int,count(1):bigint>
+-- !query output
+NULL	9
+1	8
+2	1
+
+
+-- !query
+select a,count(*) from gstest2 group by rollup(a) having a is distinct from 1 order by a
+-- !query schema
+struct<a:int,count(1):bigint>
+-- !query output
+NULL	9
+2	1
+
+
+-- !query
+select ten, grouping(ten) from onek
+group by grouping sets(ten) having grouping(ten) >= 0
+order by 2,1
+-- !query schema
+struct<ten:int,grouping(ten):tinyint>
+-- !query output
+0	0
+1	0
+2	0
+3	0
+4	0
+5	0
+6	0
+7	0
+8	0
+9	0
+
+
+-- !query
+select ten, grouping(ten) from onek
+group by grouping sets(ten, four) having grouping(ten) > 0
+order by 2,1
+-- !query schema
+struct<ten:int,grouping(ten):tinyint>
+-- !query output
+NULL	1
+NULL	1
+NULL	1
+NULL	1
+
+
+-- !query
+select ten, grouping(ten) from onek
+group by rollup(ten) having grouping(ten) > 0
+order by 2,1
+-- !query schema
+struct<ten:int,grouping(ten):tinyint>
+-- !query output
+NULL	1
+
+
+-- !query
+select ten, grouping(ten) from onek
+group by cube(ten) having grouping(ten) > 0
+order by 2,1
+-- !query schema
+struct<ten:int,grouping(ten):tinyint>
+-- !query output
+NULL	1
+
+
+-- !query
+select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col)
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+1
+1
+1
+1
+1
+1
+1
+2
+2
+2
+2
+8
+
+
+-- !query
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by grouping sets ((a),(b)) order by 3,4,1,2 /* 3,1,2 */
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,count(1):bigint,max(v):int>
+-- !query output
+1	NULL	0	1	60	5	14
+2	NULL	0	1	15	1	15
+3	NULL	0	1	33	2	17
+4	NULL	0	1	37	2	19
+NULL	1	1	0	58	4	19
+NULL	2	1	0	25	2	13
+NULL	3	1	0	45	3	16
+NULL	4	1	0	17	1	17
+
+
+-- !query
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by cube(a,b) order by 3,4,1,2 /* 3,1,2 */
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,count(1):bigint,max(v):int>
+-- !query output
+1	1	0	0	21	2	11
+1	2	0	0	25	2	13
+1	3	0	0	14	1	14
+2	3	0	0	15	1	15
+3	3	0	0	16	1	16
+3	4	0	0	17	1	17
+4	1	0	0	37	2	19
+1	NULL	0	1	60	5	14
+2	NULL	0	1	15	1	15
+3	NULL	0	1	33	2	17
+4	NULL	0	1	37	2	19
+NULL	1	1	0	58	4	19
+NULL	2	1	0	25	2	13
+NULL	3	1	0	45	3	16
+NULL	4	1	0	17	1	17
+NULL	NULL	1	1	145	10	19
+
+
+-- !query
+select unsortable_col, count(*)
+  from gstest4 group by grouping sets ((unsortable_col),(unsortable_col))
+  order by string(unsortable_col)
+-- !query schema
+struct<unsortable_col:int,count(1):bigint>
+-- !query output
+1	4
+1	4
+2	4
+2	4
+
+
+-- !query
+select unhashable_col, unsortable_col,
+       grouping(unhashable_col), grouping(unsortable_col),
+       count(*), sum(v)
+  from gstest4 group by grouping sets ((unhashable_col),(unsortable_col))
+ order by 3, 4, 6 /* 3, 5 */
+-- !query schema
+struct<unhashable_col:tinyint,unsortable_col:int,grouping(unhashable_col):tinyint,grouping(unsortable_col):tinyint,count(1):bigint,sum(v):bigint>
+-- !query output
+0	NULL	0	1	2	17
+1	NULL	0	1	2	34
+2	NULL	0	1	2	68
+3	NULL	0	1	2	136
+NULL	2	1	0	4	60
+NULL	1	1	0	4	195
+
+
+-- !query
+select unhashable_col, unsortable_col,
+       grouping(unhashable_col), grouping(unsortable_col),
+       count(*), sum(v)
+  from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col))
+ order by 3, 4, 6 /* 3,5 */
+-- !query schema
+struct<unhashable_col:tinyint,unsortable_col:int,grouping(unhashable_col):tinyint,grouping(unsortable_col):tinyint,count(1):bigint,sum(v):bigint>
+-- !query output
+0	NULL	0	1	1	1
+1	NULL	0	1	1	2
+2	NULL	0	1	1	4
+3	NULL	0	1	1	8
+0	NULL	0	1	1	16
+1	NULL	0	1	1	32
+2	NULL	0	1	1	64
+3	NULL	0	1	1	128
+NULL	1	1	0	1	1
+NULL	1	1	0	1	2
+NULL	2	1	0	1	4
+NULL	2	1	0	1	8
+NULL	2	1	0	1	16
+NULL	2	1	0	1	32
+NULL	1	1	0	1	64
+NULL	1	1	0	1	128
+
+
+-- !query
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a)
+-- !query schema
+struct<a:int,b:int,sum(v):bigint,count(1):bigint>
+-- !query output
+
+
+
+-- !query
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),())
+-- !query schema
+struct<a:int,b:int,sum(v):bigint,count(1):bigint>
+-- !query output
+
+
+
+-- !query
+select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),())
+-- !query schema
+struct<a:int,b:int,sum(v):bigint,count(1):bigint>
+-- !query output
+
+
+
+-- !query
+select sum(v), count(*) from gstest_empty group by grouping sets ((),(),())
+-- !query schema
+struct<sum(v):bigint,count(1):bigint>
+-- !query output
+
+
+
+-- !query
+select a, b, grouping(a), grouping(b), sum(v), count(*), max(v)
+  from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,4,7 /* 3,6 */
+-- !query schema
+struct<a:int,b:int,grouping(a):tinyint,grouping(b):tinyint,sum(v):bigint,count(1):bigint,max(v):int>
+-- !query output
+1	1	0	0	21	2	11
+1	2	0	0	25	2	13
+1	3	0	0	14	1	14
+2	3	0	0	15	1	15
+3	3	0	0	16	1	16
+3	4	0	0	17	1	17
+4	1	0	0	37	2	19
+NULL	NULL	1	1	21	2	11
+NULL	NULL	1	1	21	2	11
+NULL	NULL	1	1	25	2	13
+NULL	NULL	1	1	25	2	13
+NULL	NULL	1	1	14	1	14
+NULL	NULL	1	1	14	1	14
+NULL	NULL	1	1	15	1	15
+NULL	NULL	1	1	15	1	15
+NULL	NULL	1	1	16	1	16
+NULL	NULL	1	1	16	1	16
+NULL	NULL	1	1	17	1	17
+NULL	NULL	1	1	17	1	17
+NULL	NULL	1	1	37	2	19
+NULL	NULL	1	1	37	2	19
+
+
+-- !query
+select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum
+  from gstest2 group by cube (a,b) order by rsum, a, b
+-- !query schema
+struct<a:int,b:int,sum(c):bigint,rsum:bigint>
+-- !query output
+NULL	NULL	12	12
+NULL	1	8	20
+NULL	2	4	24
+1	NULL	10	34
+1	1	8	42
+1	2	2	44
+2	NULL	2	46
+2	2	2	48
+
+
+-- !query
+SELECT a, b, count(*), max(a), max(b) FROM gstest3 GROUP BY GROUPING SETS(a, b,()) ORDER BY a, b
+-- !query schema
+struct<a:int,b:int,count(1):bigint,max(a):int,max(b):int>
+-- !query output
+NULL	NULL	2	2	2
+NULL	1	1	1	1
+NULL	2	1	2	2
+1	NULL	1	1	1
+2	NULL	1	2	2
+
+
+-- !query
+select v||'a', case grouping(v||'a') when 1 then 1 else 0 end, count(*)
+  from values (1, 'a'), (1, 'b') u(i,v)
+ group by rollup(i, v||'a') order by 1,3
+-- !query schema
+struct<concat(v, a):string,CASE WHEN (CAST(grouping(concat(v, a)) AS INT) = 1) THEN 1 ELSE 0 END:int,count(1):bigint>
+-- !query output
+NULL	1	2
+NULL	1	2
+aa	0	1
+ba	0	1
+
+
+-- !query
+select v||'a', case when grouping(v||'a') = 1 then 1 else 0 end, count(*)
+  from values (1, 'a'), (1, 'b') u(i,v)
+ group by rollup(i, v||'a') order by 1,3
+-- !query schema
+struct<concat(v, a):string,CASE WHEN (CAST(grouping(concat(v, a)) AS INT) = 1) THEN 1 ELSE 0 END:int,count(1):bigint>
+-- !query output
+NULL	1	2
+NULL	1	2
+aa	0	1
+ba	0	1
+
+
+-- !query
+DROP VIEW gstest1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE gstest2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE gstest3
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE gstest4
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE gstest_empty
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/insert.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/insert.sql.out
new file mode 100644
index 0000000000000..1046d0ec86bbd
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/insert.sql.out
@@ -0,0 +1,81 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 9
+
+
+-- !query
+create table inserttest (col1 int, col2 int /* NOT NULL */, col3 string /* default 'testing' */) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into inserttest  values (NULL, 3, 'testing')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into inserttest values (NULL, 5, 'testing')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into inserttest values (NULL, 5, 'test')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into inserttest values (NULL, 7, 'testing')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select * from inserttest
+-- !query schema
+struct<col1:int,col2:int,col3:string>
+-- !query output
+NULL	3	testing
+NULL	5	test
+NULL	5	testing
+NULL	7	testing
+
+
+-- !query
+insert into inserttest values(30, 50, repeat('x', 10000))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select col1, col2, char_length(col3) from inserttest
+-- !query schema
+struct<col1:int,col2:int,length(col3):int>
+-- !query output
+30	50	10000
+NULL	3	7
+NULL	5	4
+NULL	5	7
+NULL	7	7
+
+
+-- !query
+drop table inserttest
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/int2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int2.sql.out
old mode 100644
new mode 100755
similarity index 64%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/int2.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/int2.sql.out
index 569d137891dd3..02e373f2d2b2b
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/int2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int2.sql.out
@@ -2,59 +2,59 @@
 -- Number of queries: 35
 
 
--- !query 0
+-- !query
 CREATE TABLE INT2_TBL(f1 smallint) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
-INSERT INTO INT2_TBL VALUES (trim('0   '))
--- !query 1 schema
+-- !query
+INSERT INTO INT2_TBL VALUES (smallint(trim('0   ')))
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
-INSERT INTO INT2_TBL VALUES (trim('  1234 '))
--- !query 2 schema
+-- !query
+INSERT INTO INT2_TBL VALUES (smallint(trim('  1234 ')))
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
-INSERT INTO INT2_TBL VALUES (trim('    -1234'))
--- !query 3 schema
+-- !query
+INSERT INTO INT2_TBL VALUES (smallint(trim('    -1234')))
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
-INSERT INTO INT2_TBL VALUES ('32767')
--- !query 4 schema
+-- !query
+INSERT INTO INT2_TBL VALUES (smallint('32767'))
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
-INSERT INTO INT2_TBL VALUES ('-32767')
--- !query 5 schema
+-- !query
+INSERT INTO INT2_TBL VALUES (smallint('-32767'))
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT '' AS five, * FROM INT2_TBL
--- !query 6 schema
+-- !query schema
 struct<five:string,f1:smallint>
--- !query 6 output
+-- !query output
 	-1234
 	-32767
 	0
@@ -62,154 +62,154 @@ struct<five:string,f1:smallint>
 	32767
 
 
--- !query 7
+-- !query
 SELECT '' AS four, i.* FROM INT2_TBL i WHERE i.f1 <> smallint('0')
--- !query 7 schema
+-- !query schema
 struct<four:string,f1:smallint>
--- !query 7 output
+-- !query output
 	-1234
 	-32767
 	1234
 	32767
 
 
--- !query 8
+-- !query
 SELECT '' AS four, i.* FROM INT2_TBL i WHERE i.f1 <> int('0')
--- !query 8 schema
+-- !query schema
 struct<four:string,f1:smallint>
--- !query 8 output
+-- !query output
 	-1234
 	-32767
 	1234
 	32767
 
 
--- !query 9
+-- !query
 SELECT '' AS one, i.* FROM INT2_TBL i WHERE i.f1 = smallint('0')
--- !query 9 schema
+-- !query schema
 struct<one:string,f1:smallint>
--- !query 9 output
+-- !query output
 	0
 
 
--- !query 10
+-- !query
 SELECT '' AS one, i.* FROM INT2_TBL i WHERE i.f1 = int('0')
--- !query 10 schema
+-- !query schema
 struct<one:string,f1:smallint>
--- !query 10 output
+-- !query output
 	0
 
 
--- !query 11
+-- !query
 SELECT '' AS two, i.* FROM INT2_TBL i WHERE i.f1 < smallint('0')
--- !query 11 schema
+-- !query schema
 struct<two:string,f1:smallint>
--- !query 11 output
+-- !query output
 	-1234
 	-32767
 
 
--- !query 12
+-- !query
 SELECT '' AS two, i.* FROM INT2_TBL i WHERE i.f1 < int('0')
--- !query 12 schema
+-- !query schema
 struct<two:string,f1:smallint>
--- !query 12 output
+-- !query output
 	-1234
 	-32767
 
 
--- !query 13
+-- !query
 SELECT '' AS three, i.* FROM INT2_TBL i WHERE i.f1 <= smallint('0')
--- !query 13 schema
+-- !query schema
 struct<three:string,f1:smallint>
--- !query 13 output
+-- !query output
 	-1234
 	-32767
 	0
 
 
--- !query 14
+-- !query
 SELECT '' AS three, i.* FROM INT2_TBL i WHERE i.f1 <= int('0')
--- !query 14 schema
+-- !query schema
 struct<three:string,f1:smallint>
--- !query 14 output
+-- !query output
 	-1234
 	-32767
 	0
 
 
--- !query 15
+-- !query
 SELECT '' AS two, i.* FROM INT2_TBL i WHERE i.f1 > smallint('0')
--- !query 15 schema
+-- !query schema
 struct<two:string,f1:smallint>
--- !query 15 output
+-- !query output
 	1234
 	32767
 
 
--- !query 16
+-- !query
 SELECT '' AS two, i.* FROM INT2_TBL i WHERE i.f1 > int('0')
--- !query 16 schema
+-- !query schema
 struct<two:string,f1:smallint>
--- !query 16 output
+-- !query output
 	1234
 	32767
 
 
--- !query 17
+-- !query
 SELECT '' AS three, i.* FROM INT2_TBL i WHERE i.f1 >= smallint('0')
--- !query 17 schema
+-- !query schema
 struct<three:string,f1:smallint>
--- !query 17 output
+-- !query output
 	0
 	1234
 	32767
 
 
--- !query 18
+-- !query
 SELECT '' AS three, i.* FROM INT2_TBL i WHERE i.f1 >= int('0')
--- !query 18 schema
+-- !query schema
 struct<three:string,f1:smallint>
--- !query 18 output
+-- !query output
 	0
 	1234
 	32767
 
 
--- !query 19
+-- !query
 SELECT '' AS one, i.* FROM INT2_TBL i WHERE (i.f1 % smallint('2')) = smallint('1')
--- !query 19 schema
+-- !query schema
 struct<one:string,f1:smallint>
--- !query 19 output
+-- !query output
 	32767
 
 
--- !query 20
+-- !query
 SELECT '' AS three, i.* FROM INT2_TBL i WHERE (i.f1 % int('2')) = smallint('0')
--- !query 20 schema
+-- !query schema
 struct<three:string,f1:smallint>
--- !query 20 output
+-- !query output
 	-1234
 	0
 	1234
 
 
--- !query 21
+-- !query
 SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT2_TBL i
 WHERE abs(f1) < 16384
--- !query 21 schema
+-- !query schema
 struct<five:string,f1:smallint,x:smallint>
--- !query 21 output
+-- !query output
 	-1234	-2468
 	0	0
 	1234	2468
 
 
--- !query 22
+-- !query
 SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT2_TBL i
--- !query 22 schema
+-- !query schema
 struct<five:string,f1:smallint,x:int>
--- !query 22 output
+-- !query output
 	-1234	-2468
 	-32767	-65534
 	0	0
@@ -217,23 +217,23 @@ struct<five:string,f1:smallint,x:int>
 	32767	65534
 
 
--- !query 23
+-- !query
 SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT2_TBL i
 WHERE f1 < 32766
--- !query 23 schema
+-- !query schema
 struct<five:string,f1:smallint,x:smallint>
--- !query 23 output
+-- !query output
 	-1234	-1232
 	-32767	-32765
 	0	2
 	1234	1236
 
 
--- !query 24
+-- !query
 SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT2_TBL i
--- !query 24 schema
+-- !query schema
 struct<five:string,f1:smallint,x:int>
--- !query 24 output
+-- !query output
 	-1234	-1232
 	-32767	-32765
 	0	2
@@ -241,23 +241,23 @@ struct<five:string,f1:smallint,x:int>
 	32767	32769
 
 
--- !query 25
+-- !query
 SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT2_TBL i
 WHERE f1 > -32767
--- !query 25 schema
+-- !query schema
 struct<five:string,f1:smallint,x:smallint>
--- !query 25 output
+-- !query output
 	-1234	-1236
 	0	-2
 	1234	1232
 	32767	32765
 
 
--- !query 26
+-- !query
 SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT2_TBL i
--- !query 26 schema
+-- !query schema
 struct<five:string,f1:smallint,x:int>
--- !query 26 output
+-- !query output
 	-1234	-1236
 	-32767	-32769
 	0	-2
@@ -265,55 +265,55 @@ struct<five:string,f1:smallint,x:int>
 	32767	32765
 
 
--- !query 27
+-- !query
 SELECT '' AS five, i.f1, i.f1 / smallint('2') AS x FROM INT2_TBL i
--- !query 27 schema
-struct<five:string,f1:smallint,x:smallint>
--- !query 27 output
-	-1234	-617
-	-32767	-16383
-	0	0
-	1234	617
-	32767	16383
+-- !query schema
+struct<five:string,f1:smallint,x:double>
+-- !query output
+	-1234	-617.0
+	-32767	-16383.5
+	0	0.0
+	1234	617.0
+	32767	16383.5
 
 
--- !query 28
+-- !query
 SELECT '' AS five, i.f1, i.f1 / int('2') AS x FROM INT2_TBL i
--- !query 28 schema
-struct<five:string,f1:smallint,x:int>
--- !query 28 output
-	-1234	-617
-	-32767	-16383
-	0	0
-	1234	617
-	32767	16383
+-- !query schema
+struct<five:string,f1:smallint,x:double>
+-- !query output
+	-1234	-617.0
+	-32767	-16383.5
+	0	0.0
+	1234	617.0
+	32767	16383.5
 
 
--- !query 29
+-- !query
 SELECT string(shiftleft(smallint(-1), 15))
--- !query 29 schema
+-- !query schema
 struct<CAST(shiftleft(CAST(CAST(-1 AS SMALLINT) AS INT), 15) AS STRING):string>
--- !query 29 output
+-- !query output
 -32768
 
 
--- !query 30
+-- !query
 SELECT string(smallint(shiftleft(smallint(-1), 15))+1)
--- !query 30 schema
+-- !query schema
 struct<CAST((CAST(CAST(shiftleft(CAST(CAST(-1 AS SMALLINT) AS INT), 15) AS SMALLINT) AS INT) + 1) AS STRING):string>
--- !query 30 output
+-- !query output
 -32767
 
 
--- !query 31
+-- !query
 SELECT smallint(-32768) % smallint(-1)
--- !query 31 schema
+-- !query schema
 struct<(CAST(-32768 AS SMALLINT) % CAST(-1 AS SMALLINT)):smallint>
--- !query 31 output
+-- !query output
 0
 
 
--- !query 32
+-- !query
 SELECT x, smallint(x) AS int2_value
 FROM (VALUES float(-2.5),
              float(-1.5),
@@ -322,9 +322,9 @@ FROM (VALUES float(-2.5),
              float(0.5),
              float(1.5),
              float(2.5)) t(x)
--- !query 32 schema
+-- !query schema
 struct<x:float,int2_value:smallint>
--- !query 32 output
+-- !query output
 -0.5	0
 -1.5	-1
 -2.5	-2
@@ -334,7 +334,7 @@ struct<x:float,int2_value:smallint>
 2.5	2
 
 
--- !query 33
+-- !query
 SELECT x, smallint(x) AS int2_value
 FROM (VALUES cast(-2.5 as decimal(38, 18)),
              cast(-1.5 as decimal(38, 18)),
@@ -343,21 +343,21 @@ FROM (VALUES cast(-2.5 as decimal(38, 18)),
              cast(0.5 as decimal(38, 18)),
              cast(1.5 as decimal(38, 18)),
              cast(2.5 as decimal(38, 18))) t(x)
--- !query 33 schema
+-- !query schema
 struct<x:decimal(38,18),int2_value:smallint>
--- !query 33 output
--0.5	0
--1.5	-1
--2.5	-2
-0	0
-0.5	0
-1.5	1
-2.5	2
+-- !query output
+-0.500000000000000000	0
+-1.500000000000000000	-1
+-2.500000000000000000	-2
+0.000000000000000000	0
+0.500000000000000000	0
+1.500000000000000000	1
+2.500000000000000000	2
 
 
--- !query 34
+-- !query
 DROP TABLE INT2_TBL
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/int4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
old mode 100644
new mode 100755
similarity index 55%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/int4.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
index 879b3c626ec1b..3d80c5d595d53
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/int4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
@@ -1,68 +1,60 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 54
+-- Number of queries: 53
 
 
--- !query 0
+-- !query
 CREATE TABLE INT4_TBL(f1 int) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
-INSERT INTO INT4_TBL VALUES (trim('   0  '))
--- !query 1 schema
+-- !query
+INSERT INTO INT4_TBL VALUES (int(trim('   0  ')))
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
-INSERT INTO INT4_TBL VALUES (trim('123456     '))
--- !query 2 schema
+-- !query
+INSERT INTO INT4_TBL VALUES (int(trim('123456     ')))
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
-INSERT INTO INT4_TBL VALUES (trim('    -123456'))
--- !query 3 schema
+-- !query
+INSERT INTO INT4_TBL VALUES (int(trim('    -123456')))
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
-INSERT INTO INT4_TBL VALUES ('2147483647')
--- !query 4 schema
+-- !query
+INSERT INTO INT4_TBL VALUES (int('2147483647'))
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
-INSERT INTO INT4_TBL VALUES ('-2147483647')
--- !query 5 schema
+-- !query
+INSERT INTO INT4_TBL VALUES (int('-2147483647'))
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
-set spark.sql.arithmeticOperations.failOnOverFlow=false
--- !query 6 schema
-struct<key:string,value:string>
--- !query 6 output
-spark.sql.arithmeticOperations.failOnOverFlow	false
-
-
--- !query 7
+-- !query
 SELECT '' AS five, * FROM INT4_TBL
--- !query 7 schema
+-- !query schema
 struct<five:string,f1:int>
--- !query 7 output
+-- !query output
 	-123456
 	-2147483647
 	0
@@ -70,425 +62,407 @@ struct<five:string,f1:int>
 	2147483647
 
 
--- !query 8
+-- !query
 SELECT '' AS four, i.* FROM INT4_TBL i WHERE i.f1 <> smallint('0')
--- !query 8 schema
+-- !query schema
 struct<four:string,f1:int>
--- !query 8 output
+-- !query output
 	-123456
 	-2147483647
 	123456
 	2147483647
 
 
--- !query 9
+-- !query
 SELECT '' AS four, i.* FROM INT4_TBL i WHERE i.f1 <> int('0')
--- !query 9 schema
+-- !query schema
 struct<four:string,f1:int>
--- !query 9 output
+-- !query output
 	-123456
 	-2147483647
 	123456
 	2147483647
 
 
--- !query 10
+-- !query
 SELECT '' AS one, i.* FROM INT4_TBL i WHERE i.f1 = smallint('0')
--- !query 10 schema
+-- !query schema
 struct<one:string,f1:int>
--- !query 10 output
+-- !query output
 	0
 
 
--- !query 11
+-- !query
 SELECT '' AS one, i.* FROM INT4_TBL i WHERE i.f1 = int('0')
--- !query 11 schema
+-- !query schema
 struct<one:string,f1:int>
--- !query 11 output
+-- !query output
 	0
 
 
--- !query 12
+-- !query
 SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 < smallint('0')
--- !query 12 schema
+-- !query schema
 struct<two:string,f1:int>
--- !query 12 output
+-- !query output
 	-123456
 	-2147483647
 
 
--- !query 13
+-- !query
 SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 < int('0')
--- !query 13 schema
+-- !query schema
 struct<two:string,f1:int>
--- !query 13 output
+-- !query output
 	-123456
 	-2147483647
 
 
--- !query 14
+-- !query
 SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 <= smallint('0')
--- !query 14 schema
+-- !query schema
 struct<three:string,f1:int>
--- !query 14 output
+-- !query output
 	-123456
 	-2147483647
 	0
 
 
--- !query 15
+-- !query
 SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 <= int('0')
--- !query 15 schema
+-- !query schema
 struct<three:string,f1:int>
--- !query 15 output
+-- !query output
 	-123456
 	-2147483647
 	0
 
 
--- !query 16
+-- !query
 SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 > smallint('0')
--- !query 16 schema
+-- !query schema
 struct<two:string,f1:int>
--- !query 16 output
+-- !query output
 	123456
 	2147483647
 
 
--- !query 17
+-- !query
 SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 > int('0')
--- !query 17 schema
+-- !query schema
 struct<two:string,f1:int>
--- !query 17 output
+-- !query output
 	123456
 	2147483647
 
 
--- !query 18
+-- !query
 SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 >= smallint('0')
--- !query 18 schema
+-- !query schema
 struct<three:string,f1:int>
--- !query 18 output
+-- !query output
 	0
 	123456
 	2147483647
 
 
--- !query 19
+-- !query
 SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 >= int('0')
--- !query 19 schema
+-- !query schema
 struct<three:string,f1:int>
--- !query 19 output
+-- !query output
 	0
 	123456
 	2147483647
 
 
--- !query 20
+-- !query
 SELECT '' AS one, i.* FROM INT4_TBL i WHERE (i.f1 % smallint('2')) = smallint('1')
--- !query 20 schema
+-- !query schema
 struct<one:string,f1:int>
--- !query 20 output
+-- !query output
 	2147483647
 
 
--- !query 21
+-- !query
 SELECT '' AS three, i.* FROM INT4_TBL i WHERE (i.f1 % int('2')) = smallint('0')
--- !query 21 schema
+-- !query schema
 struct<three:string,f1:int>
--- !query 21 output
+-- !query output
 	-123456
 	0
 	123456
 
 
--- !query 22
+-- !query
 SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i
--- !query 22 schema
-struct<five:string,f1:int,x:int>
--- !query 22 output
-	-123456	-246912
-	-2147483647	2
-	0	0
-	123456	246912
-	2147483647	-2
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
 
 
--- !query 23
+-- !query
 SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i
 WHERE abs(f1) < 1073741824
--- !query 23 schema
+-- !query schema
 struct<five:string,f1:int,x:int>
--- !query 23 output
+-- !query output
 	-123456	-246912
 	0	0
 	123456	246912
 
 
--- !query 24
+-- !query
 SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i
--- !query 24 schema
-struct<five:string,f1:int,x:int>
--- !query 24 output
-	-123456	-246912
-	-2147483647	2
-	0	0
-	123456	246912
-	2147483647	-2
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
 
 
--- !query 25
+-- !query
 SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i
 WHERE abs(f1) < 1073741824
--- !query 25 schema
+-- !query schema
 struct<five:string,f1:int,x:int>
--- !query 25 output
+-- !query output
 	-123456	-246912
 	0	0
 	123456	246912
 
 
--- !query 26
+-- !query
 SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i
--- !query 26 schema
-struct<five:string,f1:int,x:int>
--- !query 26 output
-	-123456	-123454
-	-2147483647	-2147483645
-	0	2
-	123456	123458
-	2147483647	-2147483647
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
 
 
--- !query 27
+-- !query
 SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i
 WHERE f1 < 2147483646
--- !query 27 schema
+-- !query schema
 struct<five:string,f1:int,x:int>
--- !query 27 output
+-- !query output
 	-123456	-123454
 	-2147483647	-2147483645
 	0	2
 	123456	123458
 
 
--- !query 28
+-- !query
 SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i
--- !query 28 schema
-struct<five:string,f1:int,x:int>
--- !query 28 output
-	-123456	-123454
-	-2147483647	-2147483645
-	0	2
-	123456	123458
-	2147483647	-2147483647
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
 
 
--- !query 29
+-- !query
 SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i
 WHERE f1 < 2147483646
--- !query 29 schema
+-- !query schema
 struct<five:string,f1:int,x:int>
--- !query 29 output
+-- !query output
 	-123456	-123454
 	-2147483647	-2147483645
 	0	2
 	123456	123458
 
 
--- !query 30
+-- !query
 SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i
--- !query 30 schema
-struct<five:string,f1:int,x:int>
--- !query 30 output
-	-123456	-123458
-	-2147483647	2147483647
-	0	-2
-	123456	123454
-	2147483647	2147483645
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
 
 
--- !query 31
+-- !query
 SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i
 WHERE f1 > -2147483647
--- !query 31 schema
+-- !query schema
 struct<five:string,f1:int,x:int>
--- !query 31 output
+-- !query output
 	-123456	-123458
 	0	-2
 	123456	123454
 	2147483647	2147483645
 
 
--- !query 32
+-- !query
 SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
--- !query 32 schema
-struct<five:string,f1:int,x:int>
--- !query 32 output
-	-123456	-123458
-	-2147483647	2147483647
-	0	-2
-	123456	123454
-	2147483647	2147483645
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+integer overflow
 
 
--- !query 33
+-- !query
 SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
 WHERE f1 > -2147483647
--- !query 33 schema
+-- !query schema
 struct<five:string,f1:int,x:int>
--- !query 33 output
+-- !query output
 	-123456	-123458
 	0	-2
 	123456	123454
 	2147483647	2147483645
 
 
--- !query 34
+-- !query
 SELECT '' AS five, i.f1, i.f1 / smallint('2') AS x FROM INT4_TBL i
--- !query 34 schema
-struct<five:string,f1:int,x:int>
--- !query 34 output
-	-123456	-61728
-	-2147483647	-1073741823
-	0	0
-	123456	61728
-	2147483647	1073741823
+-- !query schema
+struct<five:string,f1:int,x:double>
+-- !query output
+	-123456	-61728.0
+	-2147483647	-1.0737418235E9
+	0	0.0
+	123456	61728.0
+	2147483647	1.0737418235E9
 
 
--- !query 35
+-- !query
 SELECT '' AS five, i.f1, i.f1 / int('2') AS x FROM INT4_TBL i
--- !query 35 schema
-struct<five:string,f1:int,x:int>
--- !query 35 output
-	-123456	-61728
-	-2147483647	-1073741823
-	0	0
-	123456	61728
-	2147483647	1073741823
+-- !query schema
+struct<five:string,f1:int,x:double>
+-- !query output
+	-123456	-61728.0
+	-2147483647	-1.0737418235E9
+	0	0.0
+	123456	61728.0
+	2147483647	1.0737418235E9
 
 
--- !query 36
+-- !query
 SELECT -2+3 AS one
--- !query 36 schema
+-- !query schema
 struct<one:int>
--- !query 36 output
+-- !query output
 1
 
 
--- !query 37
+-- !query
 SELECT 4-2 AS two
--- !query 37 schema
+-- !query schema
 struct<two:int>
--- !query 37 output
+-- !query output
 2
 
 
--- !query 38
+-- !query
 SELECT 2- -1 AS three
--- !query 38 schema
+-- !query schema
 struct<three:int>
--- !query 38 output
+-- !query output
 3
 
 
--- !query 39
+-- !query
 SELECT 2 - -2 AS four
--- !query 39 schema
+-- !query schema
 struct<four:int>
--- !query 39 output
+-- !query output
 4
 
 
--- !query 40
+-- !query
 SELECT smallint('2') * smallint('2') = smallint('16') / smallint('4') AS true
--- !query 40 schema
+-- !query schema
 struct<true:boolean>
--- !query 40 output
+-- !query output
 true
 
 
--- !query 41
+-- !query
 SELECT int('2') * smallint('2') = smallint('16') / int('4') AS true
--- !query 41 schema
+-- !query schema
 struct<true:boolean>
--- !query 41 output
+-- !query output
 true
 
 
--- !query 42
+-- !query
 SELECT smallint('2') * int('2') = int('16') / smallint('4') AS true
--- !query 42 schema
+-- !query schema
 struct<true:boolean>
--- !query 42 output
+-- !query output
 true
 
 
--- !query 43
+-- !query
 SELECT int('1000') < int('999') AS `false`
--- !query 43 schema
+-- !query schema
 struct<false:boolean>
--- !query 43 output
+-- !query output
 false
 
 
--- !query 44
+-- !query
 SELECT 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 AS ten
--- !query 44 schema
+-- !query schema
 struct<ten:int>
--- !query 44 output
+-- !query output
 10
 
 
--- !query 45
+-- !query
 SELECT 2 + 2 / 2 AS three
--- !query 45 schema
-struct<three:int>
--- !query 45 output
-3
+-- !query schema
+struct<three:double>
+-- !query output
+3.0
 
 
--- !query 46
+-- !query
 SELECT (2 + 2) / 2 AS two
--- !query 46 schema
-struct<two:int>
--- !query 46 output
-2
+-- !query schema
+struct<two:double>
+-- !query output
+2.0
 
 
--- !query 47
+-- !query
 SELECT string(shiftleft(int(-1), 31))
--- !query 47 schema
+-- !query schema
 struct<CAST(shiftleft(CAST(-1 AS INT), 31) AS STRING):string>
--- !query 47 output
+-- !query output
 -2147483648
 
 
--- !query 48
+-- !query
 SELECT string(int(shiftleft(int(-1), 31))+1)
--- !query 48 schema
+-- !query schema
 struct<CAST((CAST(shiftleft(CAST(-1 AS INT), 31) AS INT) + 1) AS STRING):string>
--- !query 48 output
+-- !query output
 -2147483647
 
 
--- !query 49
+-- !query
 SELECT int(-2147483648) % int(-1)
--- !query 49 schema
+-- !query schema
 struct<(CAST(-2147483648 AS INT) % CAST(-1 AS INT)):int>
--- !query 49 output
+-- !query output
 0
 
 
--- !query 50
+-- !query
 SELECT int(-2147483648) % smallint(-1)
--- !query 50 schema
+-- !query schema
 struct<(CAST(-2147483648 AS INT) % CAST(CAST(-1 AS SMALLINT) AS INT)):int>
--- !query 50 output
+-- !query output
 0
 
 
--- !query 51
+-- !query
 SELECT x, int(x) AS int4_value
 FROM (VALUES double(-2.5),
              double(-1.5),
@@ -497,9 +471,9 @@ FROM (VALUES double(-2.5),
              double(0.5),
              double(1.5),
              double(2.5)) t(x)
--- !query 51 schema
+-- !query schema
 struct<x:double,int4_value:int>
--- !query 51 output
+-- !query output
 -0.5	0
 -1.5	-1
 -2.5	-2
@@ -509,7 +483,7 @@ struct<x:double,int4_value:int>
 2.5	2
 
 
--- !query 52
+-- !query
 SELECT x, int(x) AS int4_value
 FROM (VALUES cast(-2.5 as decimal(38, 18)),
              cast(-1.5 as decimal(38, 18)),
@@ -518,21 +492,21 @@ FROM (VALUES cast(-2.5 as decimal(38, 18)),
              cast(0.5 as decimal(38, 18)),
              cast(1.5 as decimal(38, 18)),
              cast(2.5 as decimal(38, 18))) t(x)
--- !query 52 schema
+-- !query schema
 struct<x:decimal(38,18),int4_value:int>
--- !query 52 output
--0.5	0
--1.5	-1
--2.5	-2
-0	0
-0.5	0
-1.5	1
-2.5	2
+-- !query output
+-0.500000000000000000	0
+-1.500000000000000000	-1
+-2.500000000000000000	-2
+0.000000000000000000	0
+0.500000000000000000	0
+1.500000000000000000	1
+2.500000000000000000	2
 
 
--- !query 53
+-- !query
 DROP TABLE INT4_TBL
--- !query 53 schema
+-- !query schema
 struct<>
--- !query 53 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
old mode 100644
new mode 100755
similarity index 59%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
index fc9f1474eb26c..18b0c821ae70f
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
@@ -2,59 +2,59 @@
 -- Number of queries: 85
 
 
--- !query 0
+-- !query
 CREATE TABLE INT8_TBL(q1 bigint, q2 bigint) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
-INSERT INTO INT8_TBL VALUES(trim('  123   '),trim('  456'))
--- !query 1 schema
+-- !query
+INSERT INTO INT8_TBL VALUES(bigint(trim('  123   ')),bigint(trim('  456')))
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
-INSERT INTO INT8_TBL VALUES(trim('123   '),'4567890123456789')
--- !query 2 schema
+-- !query
+INSERT INTO INT8_TBL VALUES(bigint(trim('123   ')),bigint('4567890123456789'))
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
-INSERT INTO INT8_TBL VALUES('4567890123456789','123')
--- !query 3 schema
+-- !query
+INSERT INTO INT8_TBL VALUES(bigint('4567890123456789'),bigint('123'))
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
-INSERT INTO INT8_TBL VALUES(+4567890123456789,'4567890123456789')
--- !query 4 schema
+-- !query
+INSERT INTO INT8_TBL VALUES(+4567890123456789,bigint('4567890123456789'))
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
-INSERT INTO INT8_TBL VALUES('+4567890123456789','-4567890123456789')
--- !query 5 schema
+-- !query
+INSERT INTO INT8_TBL VALUES(bigint('+4567890123456789'),bigint('-4567890123456789'))
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT * FROM INT8_TBL
--- !query 6 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 6 output
+-- !query output
 123	456
 123	4567890123456789
 4567890123456789	-4567890123456789
@@ -62,48 +62,48 @@ struct<q1:bigint,q2:bigint>
 4567890123456789	4567890123456789
 
 
--- !query 7
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 = 4567890123456789
--- !query 7 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 7 output
+-- !query output
 123	4567890123456789
 4567890123456789	4567890123456789
 
 
--- !query 8
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 <> 4567890123456789
--- !query 8 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 8 output
+-- !query output
 123	456
 4567890123456789	-4567890123456789
 4567890123456789	123
 
 
--- !query 9
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 < 4567890123456789
--- !query 9 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 9 output
+-- !query output
 123	456
 4567890123456789	-4567890123456789
 4567890123456789	123
 
 
--- !query 10
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 > 4567890123456789
--- !query 10 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 <= 4567890123456789
--- !query 11 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 11 output
+-- !query output
 123	456
 123	4567890123456789
 4567890123456789	-4567890123456789
@@ -111,114 +111,114 @@ struct<q1:bigint,q2:bigint>
 4567890123456789	4567890123456789
 
 
--- !query 12
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 >= 4567890123456789
--- !query 12 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 12 output
+-- !query output
 123	4567890123456789
 4567890123456789	4567890123456789
 
 
--- !query 13
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 = 456
--- !query 13 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 13 output
+-- !query output
 123	456
 
 
--- !query 14
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 <> 456
--- !query 14 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 14 output
+-- !query output
 123	4567890123456789
 4567890123456789	-4567890123456789
 4567890123456789	123
 4567890123456789	4567890123456789
 
 
--- !query 15
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 < 456
--- !query 15 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 15 output
+-- !query output
 4567890123456789	-4567890123456789
 4567890123456789	123
 
 
--- !query 16
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 > 456
--- !query 16 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 16 output
+-- !query output
 123	4567890123456789
 4567890123456789	4567890123456789
 
 
--- !query 17
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 <= 456
--- !query 17 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 17 output
+-- !query output
 123	456
 4567890123456789	-4567890123456789
 4567890123456789	123
 
 
--- !query 18
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 >= 456
--- !query 18 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 18 output
+-- !query output
 123	456
 123	4567890123456789
 4567890123456789	4567890123456789
 
 
--- !query 19
+-- !query
 SELECT * FROM INT8_TBL WHERE 123 = q1
--- !query 19 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 19 output
+-- !query output
 123	456
 123	4567890123456789
 
 
--- !query 20
+-- !query
 SELECT * FROM INT8_TBL WHERE 123 <> q1
--- !query 20 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 20 output
+-- !query output
 4567890123456789	-4567890123456789
 4567890123456789	123
 4567890123456789	4567890123456789
 
 
--- !query 21
+-- !query
 SELECT * FROM INT8_TBL WHERE 123 < q1
--- !query 21 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 21 output
+-- !query output
 4567890123456789	-4567890123456789
 4567890123456789	123
 4567890123456789	4567890123456789
 
 
--- !query 22
+-- !query
 SELECT * FROM INT8_TBL WHERE 123 > q1
--- !query 22 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 22 output
+-- !query output
 
 
 
--- !query 23
+-- !query
 SELECT * FROM INT8_TBL WHERE 123 <= q1
--- !query 23 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 23 output
+-- !query output
 123	456
 123	4567890123456789
 4567890123456789	-4567890123456789
@@ -226,114 +226,114 @@ struct<q1:bigint,q2:bigint>
 4567890123456789	4567890123456789
 
 
--- !query 24
+-- !query
 SELECT * FROM INT8_TBL WHERE 123 >= q1
--- !query 24 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 24 output
+-- !query output
 123	456
 123	4567890123456789
 
 
--- !query 25
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 = smallint('456')
--- !query 25 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 25 output
+-- !query output
 123	456
 
 
--- !query 26
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 <> smallint('456')
--- !query 26 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 26 output
+-- !query output
 123	4567890123456789
 4567890123456789	-4567890123456789
 4567890123456789	123
 4567890123456789	4567890123456789
 
 
--- !query 27
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 < smallint('456')
--- !query 27 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 27 output
+-- !query output
 4567890123456789	-4567890123456789
 4567890123456789	123
 
 
--- !query 28
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 > smallint('456')
--- !query 28 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 28 output
+-- !query output
 123	4567890123456789
 4567890123456789	4567890123456789
 
 
--- !query 29
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 <= smallint('456')
--- !query 29 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 29 output
+-- !query output
 123	456
 4567890123456789	-4567890123456789
 4567890123456789	123
 
 
--- !query 30
+-- !query
 SELECT * FROM INT8_TBL WHERE q2 >= smallint('456')
--- !query 30 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 30 output
+-- !query output
 123	456
 123	4567890123456789
 4567890123456789	4567890123456789
 
 
--- !query 31
+-- !query
 SELECT * FROM INT8_TBL WHERE smallint('123') = q1
--- !query 31 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 31 output
+-- !query output
 123	456
 123	4567890123456789
 
 
--- !query 32
+-- !query
 SELECT * FROM INT8_TBL WHERE smallint('123') <> q1
--- !query 32 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 32 output
+-- !query output
 4567890123456789	-4567890123456789
 4567890123456789	123
 4567890123456789	4567890123456789
 
 
--- !query 33
+-- !query
 SELECT * FROM INT8_TBL WHERE smallint('123') < q1
--- !query 33 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 33 output
+-- !query output
 4567890123456789	-4567890123456789
 4567890123456789	123
 4567890123456789	4567890123456789
 
 
--- !query 34
+-- !query
 SELECT * FROM INT8_TBL WHERE smallint('123') > q1
--- !query 34 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 34 output
+-- !query output
 
 
 
--- !query 35
+-- !query
 SELECT * FROM INT8_TBL WHERE smallint('123') <= q1
--- !query 35 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 35 output
+-- !query output
 123	456
 123	4567890123456789
 4567890123456789	-4567890123456789
@@ -341,20 +341,20 @@ struct<q1:bigint,q2:bigint>
 4567890123456789	4567890123456789
 
 
--- !query 36
+-- !query
 SELECT * FROM INT8_TBL WHERE smallint('123') >= q1
--- !query 36 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 36 output
+-- !query output
 123	456
 123	4567890123456789
 
 
--- !query 37
+-- !query
 SELECT '' AS five, q1 AS plus, -q1 AS `minus` FROM INT8_TBL
--- !query 37 schema
+-- !query schema
 struct<five:string,plus:bigint,minus:bigint>
--- !query 37 output
+-- !query output
 	123	-123
 	123	-123
 	4567890123456789	-4567890123456789
@@ -362,11 +362,11 @@ struct<five:string,plus:bigint,minus:bigint>
 	4567890123456789	-4567890123456789
 
 
--- !query 38
+-- !query
 SELECT '' AS five, q1, q2, q1 + q2 AS plus FROM INT8_TBL
--- !query 38 schema
+-- !query schema
 struct<five:string,q1:bigint,q2:bigint,plus:bigint>
--- !query 38 output
+-- !query output
 	123	456	579
 	123	4567890123456789	4567890123456912
 	4567890123456789	-4567890123456789	0
@@ -374,11 +374,11 @@ struct<five:string,q1:bigint,q2:bigint,plus:bigint>
 	4567890123456789	4567890123456789	9135780246913578
 
 
--- !query 39
+-- !query
 SELECT '' AS five, q1, q2, q1 - q2 AS `minus` FROM INT8_TBL
--- !query 39 schema
+-- !query schema
 struct<five:string,q1:bigint,q2:bigint,minus:bigint>
--- !query 39 output
+-- !query output
 	123	456	-333
 	123	4567890123456789	-4567890123456666
 	4567890123456789	-4567890123456789	9135780246913578
@@ -386,46 +386,43 @@ struct<five:string,q1:bigint,q2:bigint,minus:bigint>
 	4567890123456789	4567890123456789	0
 
 
--- !query 40
+-- !query
 SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL
--- !query 40 schema
-struct<three:string,q1:bigint,q2:bigint,multiply:bigint>
--- !query 40 output
-	123	456	56088
-	123	4567890123456789	561850485185185047
-	4567890123456789	-4567890123456789	-4868582358072306617
-	4567890123456789	123	561850485185185047
-	4567890123456789	4567890123456789	4868582358072306617
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+long overflow
 
 
--- !query 41
+-- !query
 SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL
  WHERE q1 < 1000 or (q2 > 0 and q2 < 1000)
--- !query 41 schema
+-- !query schema
 struct<three:string,q1:bigint,q2:bigint,multiply:bigint>
--- !query 41 output
+-- !query output
 	123	456	56088
 	123	4567890123456789	561850485185185047
 	4567890123456789	123	561850485185185047
 
 
--- !query 42
+-- !query
 SELECT '' AS five, q1, q2, q1 / q2 AS divide, q1 % q2 AS mod FROM INT8_TBL
--- !query 42 schema
-struct<five:string,q1:bigint,q2:bigint,divide:bigint,mod:bigint>
--- !query 42 output
-	123	456	0	123
-	123	4567890123456789	0	123
-	4567890123456789	-4567890123456789	-1	0
-	4567890123456789	123	37137318076884	57
-	4567890123456789	4567890123456789	1	0
+-- !query schema
+struct<five:string,q1:bigint,q2:bigint,divide:double,mod:bigint>
+-- !query output
+	123	456	0.26973684210526316	123
+	123	4567890123456789	2.6927092525360204E-14	123
+	4567890123456789	-4567890123456789	-1.0	0
+	4567890123456789	123	3.713731807688446E13	57
+	4567890123456789	4567890123456789	1.0	0
 
 
--- !query 43
+-- !query
 SELECT '' AS five, q1, double(q1) FROM INT8_TBL
--- !query 43 schema
+-- !query schema
 struct<five:string,q1:bigint,q1:double>
--- !query 43 output
+-- !query output
 	123	123.0
 	123	123.0
 	4567890123456789	4.567890123456789E15
@@ -433,11 +430,11 @@ struct<five:string,q1:bigint,q1:double>
 	4567890123456789	4.567890123456789E15
 
 
--- !query 44
+-- !query
 SELECT '' AS five, q2, double(q2) FROM INT8_TBL
--- !query 44 schema
+-- !query schema
 struct<five:string,q2:bigint,q2:double>
--- !query 44 output
+-- !query output
 	-4567890123456789	-4.567890123456789E15
 	123	123.0
 	456	456.0
@@ -445,11 +442,11 @@ struct<five:string,q2:bigint,q2:double>
 	4567890123456789	4.567890123456789E15
 
 
--- !query 45
+-- !query
 SELECT 37 + q1 AS plus4 FROM INT8_TBL
--- !query 45 schema
+-- !query schema
 struct<plus4:bigint>
--- !query 45 output
+-- !query output
 160
 160
 4567890123456826
@@ -457,11 +454,11 @@ struct<plus4:bigint>
 4567890123456826
 
 
--- !query 46
+-- !query
 SELECT 37 - q1 AS minus4 FROM INT8_TBL
--- !query 46 schema
+-- !query schema
 struct<minus4:bigint>
--- !query 46 output
+-- !query output
 -4567890123456752
 -4567890123456752
 -4567890123456752
@@ -469,11 +466,11 @@ struct<minus4:bigint>
 -86
 
 
--- !query 47
+-- !query
 SELECT '' AS five, 2 * q1 AS `twice int4` FROM INT8_TBL
--- !query 47 schema
+-- !query schema
 struct<five:string,twice int4:bigint>
--- !query 47 output
+-- !query output
 	246
 	246
 	9135780246913578
@@ -481,11 +478,11 @@ struct<five:string,twice int4:bigint>
 	9135780246913578
 
 
--- !query 48
+-- !query
 SELECT '' AS five, q1 * 2 AS `twice int4` FROM INT8_TBL
--- !query 48 schema
+-- !query schema
 struct<five:string,twice int4:bigint>
--- !query 48 output
+-- !query output
 	246
 	246
 	9135780246913578
@@ -493,59 +490,59 @@ struct<five:string,twice int4:bigint>
 	9135780246913578
 
 
--- !query 49
+-- !query
 SELECT q1 + int(42) AS `8plus4`, q1 - int(42) AS `8minus4`, q1 * int(42) AS `8mul4`, q1 / int(42) AS `8div4` FROM INT8_TBL
--- !query 49 schema
-struct<8plus4:bigint,8minus4:bigint,8mul4:bigint,8div4:bigint>
--- !query 49 output
-165	81	5166	2
-165	81	5166	2
-4567890123456831	4567890123456747	191851385185185138	108759288653733
-4567890123456831	4567890123456747	191851385185185138	108759288653733
-4567890123456831	4567890123456747	191851385185185138	108759288653733
+-- !query schema
+struct<8plus4:bigint,8minus4:bigint,8mul4:bigint,8div4:double>
+-- !query output
+165	81	5166	2.9285714285714284
+165	81	5166	2.9285714285714284
+4567890123456831	4567890123456747	191851385185185138	1.0875928865373308E14
+4567890123456831	4567890123456747	191851385185185138	1.0875928865373308E14
+4567890123456831	4567890123456747	191851385185185138	1.0875928865373308E14
 
 
--- !query 50
+-- !query
 SELECT int(246) + q1 AS `4plus8`, int(246) - q1 AS `4minus8`, int(246) * q1 AS `4mul8`, int(246) / q1 AS `4div8` FROM INT8_TBL
--- !query 50 schema
-struct<4plus8:bigint,4minus8:bigint,4mul8:bigint,4div8:bigint>
--- !query 50 output
-369	123	30258	2
-369	123	30258	2
-4567890123457035	-4567890123456543	1123700970370370094	0
-4567890123457035	-4567890123456543	1123700970370370094	0
-4567890123457035	-4567890123456543	1123700970370370094	0
+-- !query schema
+struct<4plus8:bigint,4minus8:bigint,4mul8:bigint,4div8:double>
+-- !query output
+369	123	30258	2.0
+369	123	30258	2.0
+4567890123457035	-4567890123456543	1123700970370370094	5.385418505072041E-14
+4567890123457035	-4567890123456543	1123700970370370094	5.385418505072041E-14
+4567890123457035	-4567890123456543	1123700970370370094	5.385418505072041E-14
 
 
--- !query 51
+-- !query
 SELECT q1 + smallint(42) AS `8plus2`, q1 - smallint(42) AS `8minus2`, q1 * smallint(42) AS `8mul2`, q1 / smallint(42) AS `8div2` FROM INT8_TBL
--- !query 51 schema
-struct<8plus2:bigint,8minus2:bigint,8mul2:bigint,8div2:bigint>
--- !query 51 output
-165	81	5166	2
-165	81	5166	2
-4567890123456831	4567890123456747	191851385185185138	108759288653733
-4567890123456831	4567890123456747	191851385185185138	108759288653733
-4567890123456831	4567890123456747	191851385185185138	108759288653733
+-- !query schema
+struct<8plus2:bigint,8minus2:bigint,8mul2:bigint,8div2:double>
+-- !query output
+165	81	5166	2.9285714285714284
+165	81	5166	2.9285714285714284
+4567890123456831	4567890123456747	191851385185185138	1.0875928865373308E14
+4567890123456831	4567890123456747	191851385185185138	1.0875928865373308E14
+4567890123456831	4567890123456747	191851385185185138	1.0875928865373308E14
 
 
--- !query 52
+-- !query
 SELECT smallint(246) + q1 AS `2plus8`, smallint(246) - q1 AS `2minus8`, smallint(246) * q1 AS `2mul8`, smallint(246) / q1 AS `2div8` FROM INT8_TBL
--- !query 52 schema
-struct<2plus8:bigint,2minus8:bigint,2mul8:bigint,2div8:bigint>
--- !query 52 output
-369	123	30258	2
-369	123	30258	2
-4567890123457035	-4567890123456543	1123700970370370094	0
-4567890123457035	-4567890123456543	1123700970370370094	0
-4567890123457035	-4567890123456543	1123700970370370094	0
+-- !query schema
+struct<2plus8:bigint,2minus8:bigint,2mul8:bigint,2div8:double>
+-- !query output
+369	123	30258	2.0
+369	123	30258	2.0
+4567890123457035	-4567890123456543	1123700970370370094	5.385418505072041E-14
+4567890123457035	-4567890123456543	1123700970370370094	5.385418505072041E-14
+4567890123457035	-4567890123456543	1123700970370370094	5.385418505072041E-14
 
 
--- !query 53
+-- !query
 SELECT q2, abs(q2) FROM INT8_TBL
--- !query 53 schema
+-- !query schema
 struct<q2:bigint,abs(q2):bigint>
--- !query 53 output
+-- !query output
 -4567890123456789	4567890123456789
 123	123
 456	456
@@ -553,97 +550,93 @@ struct<q2:bigint,abs(q2):bigint>
 4567890123456789	4567890123456789
 
 
--- !query 54
+-- !query
 SELECT min(q1), min(q2) FROM INT8_TBL
--- !query 54 schema
+-- !query schema
 struct<min(q1):bigint,min(q2):bigint>
--- !query 54 output
+-- !query output
 123	-4567890123456789
 
 
--- !query 55
+-- !query
 SELECT max(q1), max(q2) FROM INT8_TBL
--- !query 55 schema
+-- !query schema
 struct<max(q1):bigint,max(q2):bigint>
--- !query 55 output
+-- !query output
 4567890123456789	4567890123456789
 
 
--- !query 56
+-- !query
 select bigint('9223372036854775800') / bigint('0')
--- !query 56 schema
-struct<(CAST(9223372036854775800 AS BIGINT) div CAST(0 AS BIGINT)):bigint>
--- !query 56 output
+-- !query schema
+struct<(CAST(CAST(9223372036854775800 AS BIGINT) AS DOUBLE) / CAST(CAST(0 AS BIGINT) AS DOUBLE)):double>
+-- !query output
 NULL
 
 
--- !query 57
+-- !query
 select bigint('-9223372036854775808') / smallint('0')
--- !query 57 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) div CAST(CAST(0 AS SMALLINT) AS BIGINT)):bigint>
--- !query 57 output
+-- !query schema
+struct<(CAST(CAST(-9223372036854775808 AS BIGINT) AS DOUBLE) / CAST(CAST(0 AS SMALLINT) AS DOUBLE)):double>
+-- !query output
 NULL
 
 
--- !query 58
+-- !query
 select smallint('100') / bigint('0')
--- !query 58 schema
-struct<(CAST(CAST(100 AS SMALLINT) AS BIGINT) div CAST(0 AS BIGINT)):bigint>
--- !query 58 output
+-- !query schema
+struct<(CAST(CAST(100 AS SMALLINT) AS DOUBLE) / CAST(CAST(0 AS BIGINT) AS DOUBLE)):double>
+-- !query output
 NULL
 
 
--- !query 59
+-- !query
 SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 = 456
--- !query 59 schema
+-- !query schema
 struct<q1:int>
--- !query 59 output
+-- !query output
 123
 
 
--- !query 60
+-- !query
 SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456
--- !query 60 schema
-struct<q1:int>
--- !query 60 output
--869367531
--869367531
--869367531
-123
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Casting 4567890123456789 to int causes overflow
 
 
--- !query 61
+-- !query
 SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 = 456
--- !query 61 schema
+-- !query schema
 struct<q1:smallint>
--- !query 61 output
+-- !query output
 123
 
 
--- !query 62
+-- !query
 SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456
--- !query 62 schema
-struct<q1:smallint>
--- !query 62 output
--32491
--32491
--32491
-123
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Casting 4567890123456789 to short causes overflow
 
 
--- !query 63
+-- !query
 SELECT CAST(smallint('42') AS bigint), CAST(smallint('-37') AS bigint)
--- !query 63 schema
+-- !query schema
 struct<CAST(CAST(42 AS SMALLINT) AS BIGINT):bigint,CAST(CAST(-37 AS SMALLINT) AS BIGINT):bigint>
--- !query 63 output
+-- !query output
 42	-37
 
 
--- !query 64
+-- !query
 SELECT CAST(q1 AS float), CAST(q2 AS double) FROM INT8_TBL
--- !query 64 schema
+-- !query schema
 struct<q1:float,q2:double>
--- !query 64 output
+-- !query output
 123.0	4.567890123456789E15
 123.0	456.0
 4.5678899E15	-4.567890123456789E15
@@ -651,27 +644,28 @@ struct<q1:float,q2:double>
 4.5678899E15	4.567890123456789E15
 
 
--- !query 65
+-- !query
 SELECT CAST(float('36854775807.0') AS bigint)
--- !query 65 schema
+-- !query schema
 struct<CAST(CAST(36854775807.0 AS FLOAT) AS BIGINT):bigint>
--- !query 65 output
+-- !query output
 36854775808
 
 
--- !query 66
+-- !query
 SELECT CAST(double('922337203685477580700.0') AS bigint)
--- !query 66 schema
-struct<CAST(CAST(922337203685477580700.0 AS DOUBLE) AS BIGINT):bigint>
--- !query 66 output
-9223372036854775807
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Casting 9.223372036854776E20 to long causes overflow
 
 
--- !query 67
+-- !query
 SELECT q1, q2, q1 & q2 AS `and`, q1 | q2 AS `or`, ~q1 AS `not` FROM INT8_TBL
--- !query 67 schema
+-- !query schema
 struct<q1:bigint,q2:bigint,and:bigint,or:bigint,not:bigint>
--- !query 67 output
+-- !query output
 123	456	72	507	-124
 123	4567890123456789	17	4567890123456895	-124
 4567890123456789	-4567890123456789	1	-1	-4567890123456790
@@ -679,11 +673,11 @@ struct<q1:bigint,q2:bigint,and:bigint,or:bigint,not:bigint>
 4567890123456789	4567890123456789	4567890123456789	4567890123456789	-4567890123456790
 
 
--- !query 68
+-- !query
 SELECT * FROM range(bigint('+4567890123456789'), bigint('+4567890123456799'))
--- !query 68 schema
+-- !query schema
 struct<id:bigint>
--- !query 68 output
+-- !query output
 4567890123456789
 4567890123456790
 4567890123456791
@@ -696,20 +690,20 @@ struct<id:bigint>
 4567890123456798
 
 
--- !query 69
+-- !query
 SELECT * FROM range(bigint('+4567890123456789'), bigint('+4567890123456799'), 0)
--- !query 69 schema
+-- !query schema
 struct<>
--- !query 69 output
+-- !query output
 java.lang.IllegalArgumentException
 requirement failed: step (0) cannot be 0
 
 
--- !query 70
+-- !query
 SELECT * FROM range(bigint('+4567890123456789'), bigint('+4567890123456799'), 2)
--- !query 70 schema
+-- !query schema
 struct<id:bigint>
--- !query 70 output
+-- !query output
 4567890123456789
 4567890123456791
 4567890123456793
@@ -717,95 +711,99 @@ struct<id:bigint>
 4567890123456797
 
 
--- !query 71
+-- !query
 SELECT string(shiftleft(bigint(-1), 63))
--- !query 71 schema
+-- !query schema
 struct<CAST(shiftleft(CAST(-1 AS BIGINT), 63) AS STRING):string>
--- !query 71 output
+-- !query output
 -9223372036854775808
 
 
--- !query 72
+-- !query
 SELECT string(int(shiftleft(bigint(-1), 63))+1)
--- !query 72 schema
-struct<CAST((CAST(shiftleft(CAST(-1 AS BIGINT), 63) AS INT) + 1) AS STRING):string>
--- !query 72 output
-1
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+Casting -9223372036854775808 to int causes overflow
 
 
--- !query 73
+-- !query
 SELECT bigint((-9223372036854775808)) * bigint((-1))
--- !query 73 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) * CAST(-1 AS BIGINT)):bigint>
--- !query 73 output
--9223372036854775808
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+long overflow
 
 
--- !query 74
+-- !query
 SELECT bigint((-9223372036854775808)) / bigint((-1))
--- !query 74 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) div CAST(-1 AS BIGINT)):bigint>
--- !query 74 output
--9223372036854775808
+-- !query schema
+struct<(CAST(CAST(-9223372036854775808 AS BIGINT) AS DOUBLE) / CAST(CAST(-1 AS BIGINT) AS DOUBLE)):double>
+-- !query output
+9.223372036854776E18
 
 
--- !query 75
+-- !query
 SELECT bigint((-9223372036854775808)) % bigint((-1))
--- !query 75 schema
+-- !query schema
 struct<(CAST(-9223372036854775808 AS BIGINT) % CAST(-1 AS BIGINT)):bigint>
--- !query 75 output
+-- !query output
 0
 
 
--- !query 76
+-- !query
 SELECT bigint((-9223372036854775808)) * int((-1))
--- !query 76 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) * CAST(CAST(-1 AS INT) AS BIGINT)):bigint>
--- !query 76 output
--9223372036854775808
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+long overflow
 
 
--- !query 77
+-- !query
 SELECT bigint((-9223372036854775808)) / int((-1))
--- !query 77 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) div CAST(CAST(-1 AS INT) AS BIGINT)):bigint>
--- !query 77 output
--9223372036854775808
+-- !query schema
+struct<(CAST(CAST(-9223372036854775808 AS BIGINT) AS DOUBLE) / CAST(CAST(-1 AS INT) AS DOUBLE)):double>
+-- !query output
+9.223372036854776E18
 
 
--- !query 78
+-- !query
 SELECT bigint((-9223372036854775808)) % int((-1))
--- !query 78 schema
+-- !query schema
 struct<(CAST(-9223372036854775808 AS BIGINT) % CAST(CAST(-1 AS INT) AS BIGINT)):bigint>
--- !query 78 output
+-- !query output
 0
 
 
--- !query 79
+-- !query
 SELECT bigint((-9223372036854775808)) * smallint((-1))
--- !query 79 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) * CAST(CAST(-1 AS SMALLINT) AS BIGINT)):bigint>
--- !query 79 output
--9223372036854775808
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+long overflow
 
 
--- !query 80
+-- !query
 SELECT bigint((-9223372036854775808)) / smallint((-1))
--- !query 80 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) div CAST(CAST(-1 AS SMALLINT) AS BIGINT)):bigint>
--- !query 80 output
--9223372036854775808
+-- !query schema
+struct<(CAST(CAST(-9223372036854775808 AS BIGINT) AS DOUBLE) / CAST(CAST(-1 AS SMALLINT) AS DOUBLE)):double>
+-- !query output
+9.223372036854776E18
 
 
--- !query 81
+-- !query
 SELECT bigint((-9223372036854775808)) % smallint((-1))
--- !query 81 schema
+-- !query schema
 struct<(CAST(-9223372036854775808 AS BIGINT) % CAST(CAST(-1 AS SMALLINT) AS BIGINT)):bigint>
--- !query 81 output
+-- !query output
 0
 
 
--- !query 82
+-- !query
 SELECT x, bigint(x) AS int8_value
 FROM (VALUES (double(-2.5)),
              (double(-1.5)),
@@ -814,9 +812,9 @@ FROM (VALUES (double(-2.5)),
              (double(0.5)),
              (double(1.5)),
              (double(2.5))) t(x)
--- !query 82 schema
+-- !query schema
 struct<x:double,int8_value:bigint>
--- !query 82 output
+-- !query output
 -0.5	0
 -1.5	-1
 -2.5	-2
@@ -826,7 +824,7 @@ struct<x:double,int8_value:bigint>
 2.5	2
 
 
--- !query 83
+-- !query
 SELECT x, bigint(x) AS int8_value
 FROM (VALUES cast(-2.5 as decimal(38, 18)),
              cast(-1.5 as decimal(38, 18)),
@@ -835,21 +833,21 @@ FROM (VALUES cast(-2.5 as decimal(38, 18)),
              cast(0.5 as decimal(38, 18)),
              cast(1.5 as decimal(38, 18)),
              cast(2.5 as decimal(38, 18))) t(x)
--- !query 83 schema
+-- !query schema
 struct<x:decimal(38,18),int8_value:bigint>
--- !query 83 output
--0.5	0
--1.5	-1
--2.5	-2
-0	0
-0.5	0
-1.5	1
-2.5	2
+-- !query output
+-0.500000000000000000	0
+-1.500000000000000000	-1
+-2.500000000000000000	-2
+0.000000000000000000	0
+0.500000000000000000	0
+1.500000000000000000	1
+2.500000000000000000	2
 
 
--- !query 84
+-- !query
 DROP TABLE INT8_TBL
--- !query 84 schema
+-- !query schema
 struct<>
--- !query 84 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out
new file mode 100644
index 0000000000000..62d47410aab65
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out
@@ -0,0 +1,254 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 24
+
+
+-- !query
+SELECT interval '999' second
+-- !query schema
+struct<INTERVAL '16 minutes 39 seconds':interval>
+-- !query output
+16 minutes 39 seconds
+
+
+-- !query
+SELECT interval '999' minute
+-- !query schema
+struct<INTERVAL '16 hours 39 minutes':interval>
+-- !query output
+16 hours 39 minutes
+
+
+-- !query
+SELECT interval '999' hour
+-- !query schema
+struct<INTERVAL '999 hours':interval>
+-- !query output
+999 hours
+
+
+-- !query
+SELECT interval '999' day
+-- !query schema
+struct<INTERVAL '999 days':interval>
+-- !query output
+999 days
+
+
+-- !query
+SELECT interval '999' month
+-- !query schema
+struct<INTERVAL '83 years 3 months':interval>
+-- !query output
+83 years 3 months
+
+
+-- !query
+SELECT interval '1' year
+-- !query schema
+struct<INTERVAL '1 years':interval>
+-- !query output
+1 years
+
+
+-- !query
+SELECT interval '2' month
+-- !query schema
+struct<INTERVAL '2 months':interval>
+-- !query output
+2 months
+
+
+-- !query
+SELECT interval '3' day
+-- !query schema
+struct<INTERVAL '3 days':interval>
+-- !query output
+3 days
+
+
+-- !query
+SELECT interval '4' hour
+-- !query schema
+struct<INTERVAL '4 hours':interval>
+-- !query output
+4 hours
+
+
+-- !query
+SELECT interval '5' minute
+-- !query schema
+struct<INTERVAL '5 minutes':interval>
+-- !query output
+5 minutes
+
+
+-- !query
+SELECT interval '6' second
+-- !query schema
+struct<INTERVAL '6 seconds':interval>
+-- !query output
+6 seconds
+
+
+-- !query
+SELECT interval '1-2' year to month
+-- !query schema
+struct<INTERVAL '1 years 2 months':interval>
+-- !query output
+1 years 2 months
+
+
+-- !query
+SELECT interval '1 2:03' day to hour
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2})$': 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03' day to hour
+----------------^^^
+
+
+-- !query
+SELECT interval '1 2:03:04' day to hour
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2})$': 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03:04' day to hour
+----------------^^^
+
+
+-- !query
+SELECT interval '1 2:03' day to minute
+-- !query schema
+struct<INTERVAL '1 days 2 hours 3 minutes':interval>
+-- !query output
+1 days 2 hours 3 minutes
+
+
+-- !query
+SELECT interval '1 2:03:04' day to minute
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2})$': 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03:04' day to minute
+----------------^^^
+
+
+-- !query
+SELECT interval '1 2:03' day to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<day>\d+) (?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03' day to second
+----------------^^^
+
+
+-- !query
+SELECT interval '1 2:03:04' day to second
+-- !query schema
+struct<INTERVAL '1 days 2 hours 3 minutes 4 seconds':interval>
+-- !query output
+1 days 2 hours 3 minutes 4 seconds
+
+
+-- !query
+SELECT interval '1 2:03' hour to minute
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2})$': 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03' hour to minute
+----------------^^^
+
+
+-- !query
+SELECT interval '1 2:03:04' hour to minute
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2})$': 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03:04' hour to minute
+----------------^^^
+
+
+-- !query
+SELECT interval '1 2:03' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03' hour to second
+----------------^^^
+
+
+-- !query
+SELECT interval '1 2:03:04' hour to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<hour>\d{1,2}):(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03:04' hour to second
+----------------^^^
+
+
+-- !query
+SELECT interval '1 2:03' minute to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03' minute to second
+----------------^^^
+
+
+-- !query
+SELECT interval '1 2:03:04' minute to second
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+requirement failed: Interval string must match day-time format of '^(?<sign>[+|-])?(?<minute>\d{1,2}):(?<second>(\d{1,2})(\.(\d{1,9}))?)$': 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
+
+== SQL ==
+SELECT interval '1 2:03:04' minute to second
+----------------^^^
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
similarity index 84%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/join.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
index f75fe0519645b..5332dfff9f101 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
@@ -1,18 +1,18 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 185
+-- Number of queries: 181
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
   AS v(f1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
   (VALUES
     (123, 456),
@@ -21,230 +21,230 @@ CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
     (4567890123456789, 4567890123456789),
     (4567890123456789, -4567890123456789))
   AS v(q1, q2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW FLOAT8_TBL AS SELECT * FROM
   (VALUES (0.0), (1004.30), (-34.84),
     (cast('1.2345678901234e+200' as double)), (cast('1.2345678901234e-200' as double)))
   AS v(f1)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW TEXT_TBL AS SELECT * FROM
   (VALUES ('doh!'), ('hi de ho neighbor'))
   AS v(f1)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 CREATE TABLE J1_TBL (
   i integer,
   j integer,
   t string
 ) USING parquet
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 CREATE TABLE J2_TBL (
   i integer,
   k integer
 ) USING parquet
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 INSERT INTO J1_TBL VALUES (1, 4, 'one')
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 INSERT INTO J1_TBL VALUES (2, 3, 'two')
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 INSERT INTO J1_TBL VALUES (3, 2, 'three')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 INSERT INTO J1_TBL VALUES (4, 1, 'four')
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 INSERT INTO J1_TBL VALUES (5, 0, 'five')
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 INSERT INTO J1_TBL VALUES (6, 6, 'six')
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 INSERT INTO J1_TBL VALUES (7, 7, 'seven')
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
+-- !query
 INSERT INTO J1_TBL VALUES (8, 8, 'eight')
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 INSERT INTO J1_TBL VALUES (0, NULL, 'zero')
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 
 
 
--- !query 16
+-- !query
 INSERT INTO J1_TBL VALUES (NULL, NULL, 'null')
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 
 
 
--- !query 17
+-- !query
 INSERT INTO J1_TBL VALUES (NULL, 0, 'zero')
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 
 
 
--- !query 18
+-- !query
 INSERT INTO J2_TBL VALUES (1, -1)
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 INSERT INTO J2_TBL VALUES (2, 2)
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 INSERT INTO J2_TBL VALUES (3, -3)
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 INSERT INTO J2_TBL VALUES (2, 4)
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
 
 
--- !query 22
+-- !query
 INSERT INTO J2_TBL VALUES (5, -5)
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 
 
 
--- !query 23
+-- !query
 INSERT INTO J2_TBL VALUES (5, -5)
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 
 
 
--- !query 24
+-- !query
 INSERT INTO J2_TBL VALUES (0, NULL)
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 
 
 
--- !query 25
+-- !query
 INSERT INTO J2_TBL VALUES (NULL, NULL)
--- !query 25 schema
+-- !query schema
 struct<>
--- !query 25 output
+-- !query output
 
 
 
--- !query 26
+-- !query
 INSERT INTO J2_TBL VALUES (NULL, 0)
--- !query 26 schema
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 
 
 
--- !query 27
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL AS tx
--- !query 27 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string>
--- !query 27 output
+-- !query output
 	0	NULL	zero
 	1	4	one
 	2	3	two
@@ -258,12 +258,12 @@ struct<xxx:string,i:int,j:int,t:string>
 	NULL	NULL	null
 
 
--- !query 28
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL tx
--- !query 28 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string>
--- !query 28 output
+-- !query output
 	0	NULL	zero
 	1	4	one
 	2	3	two
@@ -277,12 +277,12 @@ struct<xxx:string,i:int,j:int,t:string>
 	NULL	NULL	null
 
 
--- !query 29
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL AS t1 (a, b, c)
--- !query 29 schema
+-- !query schema
 struct<xxx:string,a:int,b:int,c:string>
--- !query 29 output
+-- !query output
 	0	NULL	zero
 	1	4	one
 	2	3	two
@@ -296,12 +296,12 @@ struct<xxx:string,a:int,b:int,c:string>
 	NULL	NULL	null
 
 
--- !query 30
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL t1 (a, b, c)
--- !query 30 schema
+-- !query schema
 struct<xxx:string,a:int,b:int,c:string>
--- !query 30 output
+-- !query output
 	0	NULL	zero
 	1	4	one
 	2	3	two
@@ -315,12 +315,12 @@ struct<xxx:string,a:int,b:int,c:string>
 	NULL	NULL	null
 
 
--- !query 31
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL t1 (a, b, c), J2_TBL t2 (d, e)
--- !query 31 schema
+-- !query schema
 struct<xxx:string,a:int,b:int,c:string,d:int,e:int>
--- !query 31 output
+-- !query output
 	0	NULL	zero	0	NULL
 	0	NULL	zero	1	-1
 	0	NULL	zero	2	2
@@ -422,12 +422,12 @@ struct<xxx:string,a:int,b:int,c:string,d:int,e:int>
 	NULL	NULL	null	NULL	NULL
 
 
--- !query 32
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL CROSS JOIN J2_TBL
--- !query 32 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,i:int,k:int>
--- !query 32 output
+-- !query output
 	0	NULL	zero	0	NULL
 	0	NULL	zero	1	-1
 	0	NULL	zero	2	2
@@ -529,22 +529,22 @@ struct<xxx:string,i:int,j:int,t:string,i:int,k:int>
 	NULL	NULL	null	NULL	NULL
 
 
--- !query 33
+-- !query
 SELECT '' AS `xxx`, i, k, t
   FROM J1_TBL CROSS JOIN J2_TBL
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'i' is ambiguous, could be: default.j1_tbl.i, default.j2_tbl.i.; line 1 pos 20
 
 
--- !query 34
+-- !query
 SELECT '' AS `xxx`, t1.i, k, t
   FROM J1_TBL t1 CROSS JOIN J2_TBL t2
--- !query 34 schema
+-- !query schema
 struct<xxx:string,i:int,k:int,t:string>
--- !query 34 output
+-- !query output
 	0	-1	zero
 	0	-3	zero
 	0	-5	zero
@@ -646,13 +646,13 @@ struct<xxx:string,i:int,k:int,t:string>
 	NULL	NULL	zero
 
 
--- !query 35
+-- !query
 SELECT '' AS `xxx`, ii, tt, kk
   FROM (J1_TBL CROSS JOIN J2_TBL)
     AS tx (ii, jj, tt, ii2, kk)
--- !query 35 schema
+-- !query schema
 struct<xxx:string,ii:int,tt:string,kk:int>
--- !query 35 output
+-- !query output
 	0	zero	-1
 	0	zero	-3
 	0	zero	-5
@@ -754,12 +754,12 @@ struct<xxx:string,ii:int,tt:string,kk:int>
 	NULL	zero	NULL
 
 
--- !query 36
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL CROSS JOIN J2_TBL a CROSS JOIN J2_TBL b
--- !query 36 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,i:int,k:int,i:int,k:int>
--- !query 36 output
+-- !query output
 	0	NULL	zero	0	NULL	0	NULL
 	0	NULL	zero	0	NULL	1	-1
 	0	NULL	zero	0	NULL	2	2
@@ -1653,12 +1653,12 @@ struct<xxx:string,i:int,j:int,t:string,i:int,k:int,i:int,k:int>
 	NULL	NULL	null	NULL	NULL	NULL	NULL
 
 
--- !query 37
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL INNER JOIN J2_TBL USING (i)
--- !query 37 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 37 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1668,12 +1668,12 @@ struct<xxx:string,i:int,j:int,t:string,k:int>
 	5	0	five	-5
 
 
--- !query 38
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL JOIN J2_TBL USING (i)
--- !query 38 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 38 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1683,13 +1683,13 @@ struct<xxx:string,i:int,j:int,t:string,k:int>
 	5	0	five	-5
 
 
--- !query 39
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL t1 (a, b, c) JOIN J2_TBL t2 (a, d) USING (a)
   ORDER BY a, d
--- !query 39 schema
+-- !query schema
 struct<xxx:string,a:int,b:int,c:string,d:int>
--- !query 39 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1699,12 +1699,12 @@ struct<xxx:string,a:int,b:int,c:string,d:int>
 	5	0	five	-5
 
 
--- !query 40
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL NATURAL JOIN J2_TBL
--- !query 40 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 40 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1714,12 +1714,12 @@ struct<xxx:string,i:int,j:int,t:string,k:int>
 	5	0	five	-5
 
 
--- !query 41
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d)
--- !query 41 schema
+-- !query schema
 struct<xxx:string,a:int,b:int,c:string,d:int>
--- !query 41 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1729,23 +1729,23 @@ struct<xxx:string,a:int,b:int,c:string,d:int>
 	5	0	five	-5
 
 
--- !query 42
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a)
--- !query 42 schema
+-- !query schema
 struct<xxx:string,a:int,b:int,c:string,d:int>
--- !query 42 output
+-- !query output
 	0	NULL	zero	NULL
 	2	3	two	2
 	4	1	four	2
 
 
--- !query 43
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.i)
--- !query 43 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,i:int,k:int>
--- !query 43 output
+-- !query output
 	0	NULL	zero	0	NULL
 	1	4	one	1	-1
 	2	3	two	2	2
@@ -1755,23 +1755,23 @@ struct<xxx:string,i:int,j:int,t:string,i:int,k:int>
 	5	0	five	5	-5
 
 
--- !query 44
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.k)
--- !query 44 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,i:int,k:int>
--- !query 44 output
+-- !query output
 	0	NULL	zero	NULL	0
 	2	3	two	2	2
 	4	1	four	2	4
 
 
--- !query 45
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i <= J2_TBL.k)
--- !query 45 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,i:int,k:int>
--- !query 45 output
+-- !query output
 	0	NULL	zero	2	2
 	0	NULL	zero	2	4
 	0	NULL	zero	NULL	0
@@ -1783,13 +1783,13 @@ struct<xxx:string,i:int,j:int,t:string,i:int,k:int>
 	4	1	four	2	4
 
 
--- !query 46
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL LEFT OUTER JOIN J2_TBL USING (i)
   ORDER BY i, k, t
--- !query 46 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 46 output
+-- !query output
 	NULL	NULL	null	NULL
 	NULL	0	zero	NULL
 	0	NULL	zero	NULL
@@ -1805,13 +1805,13 @@ struct<xxx:string,i:int,j:int,t:string,k:int>
 	8	8	eight	NULL
 
 
--- !query 47
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL LEFT JOIN J2_TBL USING (i)
   ORDER BY i, k, t
--- !query 47 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 47 output
+-- !query output
 	NULL	NULL	null	NULL
 	NULL	0	zero	NULL
 	0	NULL	zero	NULL
@@ -1827,12 +1827,12 @@ struct<xxx:string,i:int,j:int,t:string,k:int>
 	8	8	eight	NULL
 
 
--- !query 48
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i)
--- !query 48 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 48 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1844,12 +1844,12 @@ struct<xxx:string,i:int,j:int,t:string,k:int>
 	NULL	NULL	NULL	NULL
 
 
--- !query 49
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL RIGHT JOIN J2_TBL USING (i)
--- !query 49 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 49 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1861,13 +1861,13 @@ struct<xxx:string,i:int,j:int,t:string,k:int>
 	NULL	NULL	NULL	NULL
 
 
--- !query 50
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL FULL OUTER JOIN J2_TBL USING (i)
   ORDER BY i, k, t
--- !query 50 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 50 output
+-- !query output
 	NULL	NULL	NULL	NULL
 	NULL	NULL	null	NULL
 	NULL	0	zero	NULL
@@ -1885,13 +1885,13 @@ struct<xxx:string,i:int,j:int,t:string,k:int>
 	8	8	eight	NULL
 
 
--- !query 51
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL FULL JOIN J2_TBL USING (i)
   ORDER BY i, k, t
--- !query 51 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 51 output
+-- !query output
 	NULL	NULL	NULL	NULL
 	NULL	NULL	null	NULL
 	NULL	0	zero	NULL
@@ -1909,226 +1909,226 @@ struct<xxx:string,i:int,j:int,t:string,k:int>
 	8	8	eight	NULL
 
 
--- !query 52
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (k = 1)
--- !query 52 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 52 output
+-- !query output
 
 
 
--- !query 53
+-- !query
 SELECT '' AS `xxx`, *
   FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (i = 1)
--- !query 53 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,k:int>
--- !query 53 output
+-- !query output
 	1	4	one	-1
 
 
--- !query 54
+-- !query
 CREATE TABLE t1 (name STRING, n INTEGER) USING parquet
--- !query 54 schema
+-- !query schema
 struct<>
--- !query 54 output
+-- !query output
 
 
 
--- !query 55
+-- !query
 CREATE TABLE t2 (name STRING, n INTEGER) USING parquet
--- !query 55 schema
+-- !query schema
 struct<>
--- !query 55 output
+-- !query output
 
 
 
--- !query 56
+-- !query
 CREATE TABLE t3 (name STRING, n INTEGER) USING parquet
--- !query 56 schema
+-- !query schema
 struct<>
--- !query 56 output
+-- !query output
 
 
 
--- !query 57
+-- !query
 INSERT INTO t1 VALUES ( 'bb', 11 )
--- !query 57 schema
+-- !query schema
 struct<>
--- !query 57 output
+-- !query output
 
 
 
--- !query 58
+-- !query
 INSERT INTO t2 VALUES ( 'bb', 12 )
--- !query 58 schema
+-- !query schema
 struct<>
--- !query 58 output
+-- !query output
 
 
 
--- !query 59
+-- !query
 INSERT INTO t2 VALUES ( 'cc', 22 )
--- !query 59 schema
+-- !query schema
 struct<>
--- !query 59 output
+-- !query output
 
 
 
--- !query 60
+-- !query
 INSERT INTO t2 VALUES ( 'ee', 42 )
--- !query 60 schema
+-- !query schema
 struct<>
--- !query 60 output
+-- !query output
 
 
 
--- !query 61
+-- !query
 INSERT INTO t3 VALUES ( 'bb', 13 )
--- !query 61 schema
+-- !query schema
 struct<>
--- !query 61 output
+-- !query output
 
 
 
--- !query 62
+-- !query
 INSERT INTO t3 VALUES ( 'cc', 23 )
--- !query 62 schema
+-- !query schema
 struct<>
--- !query 62 output
+-- !query output
 
 
 
--- !query 63
+-- !query
 INSERT INTO t3 VALUES ( 'dd', 33 )
--- !query 63 schema
+-- !query schema
 struct<>
--- !query 63 output
+-- !query output
 
 
 
--- !query 64
+-- !query
 SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name)
--- !query 64 schema
+-- !query schema
 struct<name:string,n:int,n:int,n:int>
--- !query 64 output
+-- !query output
 bb	11	12	13
 cc	NULL	22	23
 dd	NULL	NULL	33
 ee	NULL	42	NULL
 
 
--- !query 65
+-- !query
 SELECT * FROM
 (SELECT * FROM t2) as s2
 INNER JOIN
 (SELECT * FROM t3) s3
 USING (name)
--- !query 65 schema
+-- !query schema
 struct<name:string,n:int,n:int>
--- !query 65 output
+-- !query output
 bb	12	13
 cc	22	23
 
 
--- !query 66
+-- !query
 SELECT * FROM
 (SELECT * FROM t2) as s2
 LEFT JOIN
 (SELECT * FROM t3) s3
 USING (name)
--- !query 66 schema
+-- !query schema
 struct<name:string,n:int,n:int>
--- !query 66 output
+-- !query output
 bb	12	13
 cc	22	23
 ee	42	NULL
 
 
--- !query 67
+-- !query
 SELECT * FROM
 (SELECT * FROM t2) as s2
 FULL JOIN
 (SELECT * FROM t3) s3
 USING (name)
--- !query 67 schema
+-- !query schema
 struct<name:string,n:int,n:int>
--- !query 67 output
+-- !query output
 bb	12	13
 cc	22	23
 dd	NULL	33
 ee	42	NULL
 
 
--- !query 68
+-- !query
 SELECT * FROM
 (SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
 NATURAL INNER JOIN
 (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3
--- !query 68 schema
+-- !query schema
 struct<name:string,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 68 output
+-- !query output
 bb	12	2	13	3
 cc	22	2	23	3
 
 
--- !query 69
+-- !query
 SELECT * FROM
 (SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
 NATURAL LEFT JOIN
 (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3
--- !query 69 schema
+-- !query schema
 struct<name:string,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 69 output
+-- !query output
 bb	12	2	13	3
 cc	22	2	23	3
 ee	42	2	NULL	NULL
 
 
--- !query 70
+-- !query
 SELECT * FROM
 (SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
 NATURAL FULL JOIN
 (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3
--- !query 70 schema
+-- !query schema
 struct<name:string,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 70 output
+-- !query output
 bb	12	2	13	3
 cc	22	2	23	3
 dd	NULL	NULL	33	3
 ee	42	2	NULL	NULL
 
 
--- !query 71
+-- !query
 SELECT * FROM
 (SELECT name, n as s1_n, 1 as s1_1 FROM t1) as s1
 NATURAL INNER JOIN
 (SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
 NATURAL INNER JOIN
 (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3
--- !query 71 schema
+-- !query schema
 struct<name:string,s1_n:int,s1_1:int,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 71 output
+-- !query output
 bb	11	1	12	2	13	3
 
 
--- !query 72
+-- !query
 SELECT * FROM
 (SELECT name, n as s1_n, 1 as s1_1 FROM t1) as s1
 NATURAL FULL JOIN
 (SELECT name, n as s2_n, 2 as s2_2 FROM t2) as s2
 NATURAL FULL JOIN
 (SELECT name, n as s3_n, 3 as s3_2 FROM t3) s3
--- !query 72 schema
+-- !query schema
 struct<name:string,s1_n:int,s1_1:int,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 72 output
+-- !query output
 bb	11	1	12	2	13	3
 cc	NULL	NULL	22	2	23	3
 dd	NULL	NULL	NULL	NULL	33	3
 ee	NULL	NULL	42	2	NULL	NULL
 
 
--- !query 73
+-- !query
 SELECT * FROM
 (SELECT name, n as s1_n FROM t1) as s1
 NATURAL FULL JOIN
@@ -2137,16 +2137,16 @@ NATURAL FULL JOIN
     NATURAL FULL JOIN
     (SELECT name, n as s3_n FROM t3) as s3
   ) ss2
--- !query 73 schema
+-- !query schema
 struct<name:string,s1_n:int,s2_n:int,s3_n:int>
--- !query 73 output
+-- !query output
 bb	11	12	13
 cc	NULL	22	23
 dd	NULL	NULL	33
 ee	NULL	42	NULL
 
 
--- !query 74
+-- !query
 SELECT * FROM
 (SELECT name, n as s1_n FROM t1) as s1
 NATURAL FULL JOIN
@@ -2155,55 +2155,55 @@ NATURAL FULL JOIN
     NATURAL FULL JOIN
     (SELECT name, n as s3_n FROM t3) as s3
   ) ss2
--- !query 74 schema
+-- !query schema
 struct<name:string,s1_n:int,s2_n:int,s2_2:int,s3_n:int>
--- !query 74 output
+-- !query output
 bb	11	12	2	13
 cc	NULL	22	2	23
 dd	NULL	NULL	NULL	33
 ee	NULL	42	2	NULL
 
 
--- !query 75
+-- !query
 SELECT * FROM
   (SELECT name, n as s1_n FROM t1) as s1
 FULL JOIN
   (SELECT name, 2 as s2_n FROM t2) as s2
 ON (s1_n = s2_n)
--- !query 75 schema
+-- !query schema
 struct<name:string,s1_n:int,name:string,s2_n:int>
--- !query 75 output
+-- !query output
 NULL	NULL	bb	2
 NULL	NULL	cc	2
 NULL	NULL	ee	2
 bb	11	NULL	NULL
 
 
--- !query 76
+-- !query
 create or replace temporary view x as select * from
   (values (1,11), (2,22), (3,null), (4,44), (5,null))
   as v(x1, x2)
--- !query 76 schema
+-- !query schema
 struct<>
--- !query 76 output
+-- !query output
 
 
 
--- !query 77
+-- !query
 create or replace temporary view y as select * from
   (values (1,111), (2,222), (3,333), (4,null))
   as v(y1, y2)
--- !query 77 schema
+-- !query schema
 struct<>
--- !query 77 output
+-- !query output
 
 
 
--- !query 78
+-- !query
 select * from x
--- !query 78 schema
+-- !query schema
 struct<x1:int,x2:int>
--- !query 78 output
+-- !query output
 1	11
 2	22
 3	NULL
@@ -2211,22 +2211,22 @@ struct<x1:int,x2:int>
 5	NULL
 
 
--- !query 79
+-- !query
 select * from y
--- !query 79 schema
+-- !query schema
 struct<y1:int,y2:int>
--- !query 79 output
+-- !query output
 1	111
 2	222
 3	333
 4	NULL
 
 
--- !query 80
+-- !query
 select * from x left join y on (x1 = y1 and x2 is not null)
--- !query 80 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int>
--- !query 80 output
+-- !query output
 1	11	1	111
 2	22	2	222
 3	NULL	NULL	NULL
@@ -2234,11 +2234,11 @@ struct<x1:int,x2:int,y1:int,y2:int>
 5	NULL	NULL	NULL
 
 
--- !query 81
+-- !query
 select * from x left join y on (x1 = y1 and y2 is not null)
--- !query 81 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int>
--- !query 81 output
+-- !query output
 1	11	1	111
 2	22	2	222
 3	NULL	3	333
@@ -2246,12 +2246,12 @@ struct<x1:int,x2:int,y1:int,y2:int>
 5	NULL	NULL	NULL
 
 
--- !query 82
+-- !query
 select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
 on (x1 = xx1)
--- !query 82 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 82 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	3	NULL
@@ -2259,12 +2259,12 @@ struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
 5	NULL	NULL	NULL	5	NULL
 
 
--- !query 83
+-- !query
 select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
 on (x1 = xx1 and x2 is not null)
--- !query 83 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 83 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	NULL	NULL
@@ -2272,12 +2272,12 @@ struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
 5	NULL	NULL	NULL	NULL	NULL
 
 
--- !query 84
+-- !query
 select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
 on (x1 = xx1 and y2 is not null)
--- !query 84 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 84 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	3	NULL
@@ -2285,12 +2285,12 @@ struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
 5	NULL	NULL	NULL	NULL	NULL
 
 
--- !query 85
+-- !query
 select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
 on (x1 = xx1 and xx2 is not null)
--- !query 85 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 85 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	NULL	NULL
@@ -2298,78 +2298,78 @@ struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
 5	NULL	NULL	NULL	NULL	NULL
 
 
--- !query 86
+-- !query
 select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
 on (x1 = xx1) where (x2 is not null)
--- !query 86 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 86 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 4	44	4	NULL	4	44
 
 
--- !query 87
+-- !query
 select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
 on (x1 = xx1) where (y2 is not null)
--- !query 87 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 87 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	3	NULL
 
 
--- !query 88
+-- !query
 select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
 on (x1 = xx1) where (xx2 is not null)
--- !query 88 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 88 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 4	44	4	NULL	4	44
 
 
--- !query 89
+-- !query
 select count(*) from tenk1 a where unique1 in
   (select unique1 from tenk1 b join tenk1 c using (unique1)
    where b.unique2 = 42)
--- !query 89 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 89 output
+-- !query output
 1
 
 
--- !query 90
+-- !query
 select count(*) from tenk1 x where
   x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
   x.unique1 = 0 and
   x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1)
--- !query 90 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 90 output
+-- !query output
 1
 
 
--- !query 91
+-- !query
 select count(*) from tenk1 x where
   x.unique1 in (select a.f1 from int4_tbl a,float8_tbl b where a.f1=b.f1) and
   x.unique1 = 0 and
   x.unique1 in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=bb.f1)
--- !query 91 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 91 output
+-- !query output
 1
 
 
--- !query 92
+-- !query
 select * from int8_tbl i1 left join (int8_tbl i2 join
   (select 123 as x) ss on i2.q1 = x) on i1.q2 = i2.q2
 order by 1, 2
--- !query 92 schema
+-- !query schema
 struct<q1:bigint,q2:bigint,q1:bigint,q2:bigint,x:int>
--- !query 92 output
+-- !query output
 123	456	123	456	123
 123	4567890123456789	123	4567890123456789	123
 4567890123456789	-4567890123456789	NULL	NULL	NULL
@@ -2377,7 +2377,7 @@ struct<q1:bigint,q2:bigint,q1:bigint,q2:bigint,x:int>
 4567890123456789	4567890123456789	123	4567890123456789	123
 
 
--- !query 93
+-- !query
 select count(*)
 from
   (select t3.tenthous as x1, coalesce(t1.stringu1, t2.stringu1) as x2
@@ -2387,32 +2387,32 @@ from
   tenk1 t4,
   tenk1 t5
 where t4.thousand = t5.unique1 and ss.x1 = t4.tenthous and ss.x2 = t5.stringu1
--- !query 93 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 93 output
+-- !query output
 1000
 
 
--- !query 94
+-- !query
 select a.f1, b.f1, t.thousand, t.tenthous from
   tenk1 t,
   (select sum(f1)+1 as f1 from int4_tbl i4a) a,
   (select sum(f1) as f1 from int4_tbl i4b) b
 where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous
--- !query 94 schema
+-- !query schema
 struct<f1:bigint,f1:bigint,thousand:int,tenthous:int>
--- !query 94 output
+-- !query output
 
 
 
--- !query 95
+-- !query
 select * from
   j1_tbl full join
   (select * from j2_tbl order by j2_tbl.i desc, j2_tbl.k asc) j2_tbl
   on j1_tbl.i = j2_tbl.i and j1_tbl.i = j2_tbl.k
--- !query 95 schema
+-- !query schema
 struct<i:int,j:int,t:string,i:int,k:int>
--- !query 95 output
+-- !query output
 0	NULL	zero	NULL	NULL
 1	4	one	NULL	NULL
 2	3	two	2	2
@@ -2434,156 +2434,128 @@ NULL	NULL	NULL	NULL	NULL
 NULL	NULL	null	NULL	NULL
 
 
--- !query 96
+-- !query
 select count(*) from
   (select * from tenk1 x order by x.thousand, x.twothousand, x.fivethous) x
   left join
   (select * from tenk1 y order by y.unique2) y
   on x.thousand = y.unique2 and x.twothousand = y.hundred and x.fivethous = y.unique2
--- !query 96 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 96 output
+-- !query output
 10000
 
 
--- !query 97
+-- !query
 DROP TABLE t1
--- !query 97 schema
+-- !query schema
 struct<>
--- !query 97 output
+-- !query output
 
 
 
--- !query 98
+-- !query
 DROP TABLE t2
--- !query 98 schema
+-- !query schema
 struct<>
--- !query 98 output
+-- !query output
 
 
 
--- !query 99
+-- !query
 DROP TABLE t3
--- !query 99 schema
+-- !query schema
 struct<>
--- !query 99 output
+-- !query output
 
 
 
--- !query 100
+-- !query
 DROP TABLE J1_TBL
--- !query 100 schema
+-- !query schema
 struct<>
--- !query 100 output
+-- !query output
 
 
 
--- !query 101
+-- !query
 DROP TABLE J2_TBL
--- !query 101 schema
+-- !query schema
 struct<>
--- !query 101 output
+-- !query output
 
 
 
--- !query 102
+-- !query
 create or replace temporary view tt1 as select * from
   (values (1, 11), (2, NULL))
   as v(tt1_id, joincol)
--- !query 102 schema
+-- !query schema
 struct<>
--- !query 102 output
+-- !query output
 
 
 
--- !query 103
+-- !query
 create or replace temporary view tt2 as select * from
   (values (21, 11), (22, 11))
   as v(tt2_id, joincol)
--- !query 103 schema
+-- !query schema
 struct<>
--- !query 103 output
+-- !query output
 
 
 
--- !query 104
+-- !query
 select tt1.*, tt2.* from tt1 left join tt2 on tt1.joincol = tt2.joincol
--- !query 104 schema
+-- !query schema
 struct<tt1_id:int,joincol:int,tt2_id:int,joincol:int>
--- !query 104 output
+-- !query output
 1	11	21	11
 1	11	22	11
 2	NULL	NULL	NULL
 
 
--- !query 105
+-- !query
 select tt1.*, tt2.* from tt2 right join tt1 on tt1.joincol = tt2.joincol
--- !query 105 schema
+-- !query schema
 struct<tt1_id:int,joincol:int,tt2_id:int,joincol:int>
--- !query 105 output
+-- !query output
 1	11	21	11
 1	11	22	11
 2	NULL	NULL	NULL
 
 
--- !query 106
+-- !query
 select count(*) from tenk1 a, tenk1 b
   where a.hundred = b.thousand and (b.fivethous % 10) < 10
--- !query 106 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 106 output
+-- !query output
 100000
 
 
--- !query 107
-DROP TABLE IF EXISTS tt3
--- !query 107 schema
-struct<>
--- !query 107 output
-
-
-
--- !query 108
-CREATE TABLE tt3(f1 int, f2 string) USING parquet
--- !query 108 schema
-struct<>
--- !query 108 output
-
-
-
--- !query 109
-INSERT INTO tt3 SELECT x.id, repeat('xyzzy', 100) FROM range(1,10001) x
--- !query 109 schema
-struct<>
--- !query 109 output
-
-
-
--- !query 110
-DROP TABLE IF EXISTS tt4
--- !query 110 schema
-struct<>
--- !query 110 output
-
-
-
--- !query 111
-CREATE TABLE tt4(f1 int) USING parquet
--- !query 111 schema
+-- !query
+create or replace temporary view tt3 as select * from
+  (SELECT cast(x.id as int), repeat('xyzzy', 100) FROM range(1,10001) x)
+  as v(f1, f2)
+-- !query schema
 struct<>
--- !query 111 output
+-- !query output
 
 
 
--- !query 112
-INSERT INTO tt4 VALUES (0),(1),(9999)
--- !query 112 schema
+-- !query
+create or replace temporary view tt4 as select * from
+  (values (0), (1), (9999))
+  as v(f1)
+-- !query schema
 struct<>
--- !query 112 output
+-- !query output
 
 
 
--- !query 113
+-- !query
 SELECT a.f1
 FROM tt4 a
 LEFT JOIN (
@@ -2592,242 +2564,242 @@ LEFT JOIN (
         WHERE c.f1 IS NULL
 ) AS d ON (a.f1 = d.f1)
 WHERE d.f1 IS NULL
--- !query 113 schema
+-- !query schema
 struct<f1:int>
--- !query 113 output
+-- !query output
 0
 1
 9999
 
 
--- !query 114
+-- !query
 create or replace temporary view tt5 as select * from
   (values (1, 10), (1, 11))
   as v(f1, f2)
--- !query 114 schema
+-- !query schema
 struct<>
--- !query 114 output
+-- !query output
 
 
 
--- !query 115
+-- !query
 create or replace temporary view tt6 as select * from
   (values (1, 9), (1, 2), (2, 9))
   as v(f1, f2)
--- !query 115 schema
+-- !query schema
 struct<>
--- !query 115 output
+-- !query output
 
 
 
--- !query 116
+-- !query
 select * from tt5,tt6 where tt5.f1 = tt6.f1 and tt5.f1 = tt5.f2 - tt6.f2
--- !query 116 schema
+-- !query schema
 struct<f1:int,f2:int,f1:int,f2:int>
--- !query 116 output
+-- !query output
 1	10	1	9
 
 
--- !query 117
+-- !query
 create or replace temporary view xx as select * from
   (values (1), (2), (3))
   as v(pkxx)
--- !query 117 schema
+-- !query schema
 struct<>
--- !query 117 output
+-- !query output
 
 
 
--- !query 118
+-- !query
 create or replace temporary view yy as select * from
   (values (101, 1), (201, 2), (301, NULL))
   as v(pkyy, pkxx)
--- !query 118 schema
+-- !query schema
 struct<>
--- !query 118 output
+-- !query output
 
 
 
--- !query 119
+-- !query
 select yy.pkyy as yy_pkyy, yy.pkxx as yy_pkxx, yya.pkyy as yya_pkyy,
        xxa.pkxx as xxa_pkxx, xxb.pkxx as xxb_pkxx
 from yy
      left join (SELECT * FROM yy where pkyy = 101) as yya ON yy.pkyy = yya.pkyy
      left join xx xxa on yya.pkxx = xxa.pkxx
      left join xx xxb on coalesce (xxa.pkxx, 1) = xxb.pkxx
--- !query 119 schema
+-- !query schema
 struct<yy_pkyy:int,yy_pkxx:int,yya_pkyy:int,xxa_pkxx:int,xxb_pkxx:int>
--- !query 119 output
+-- !query output
 101	1	101	1	1
 201	2	NULL	NULL	1
 301	NULL	NULL	NULL	1
 
 
--- !query 120
+-- !query
 create or replace temporary view zt1 as select * from
   (values (53))
   as v(f1)
--- !query 120 schema
+-- !query schema
 struct<>
--- !query 120 output
+-- !query output
 
 
 
--- !query 121
+-- !query
 create or replace temporary view zt2 as select * from
   (values (53))
   as v(f2)
--- !query 121 schema
+-- !query schema
 struct<>
--- !query 121 output
+-- !query output
 
 
 
--- !query 122
+-- !query
 create or replace temporary view zt3(f3 int) using parquet
--- !query 122 schema
+-- !query schema
 struct<>
--- !query 122 output
+-- !query output
 
 
 
--- !query 123
+-- !query
 select * from
   zt2 left join zt3 on (f2 = f3)
       left join zt1 on (f3 = f1)
 where f2 = 53
--- !query 123 schema
+-- !query schema
 struct<f2:int,f3:int,f1:int>
--- !query 123 output
+-- !query output
 53	NULL	NULL
 
 
--- !query 124
+-- !query
 create temp view zv1 as select *,'dummy' AS junk from zt1
--- !query 124 schema
+-- !query schema
 struct<>
--- !query 124 output
+-- !query output
 
 
 
--- !query 125
+-- !query
 select * from
   zt2 left join zt3 on (f2 = f3)
       left join zv1 on (f3 = f1)
 where f2 = 53
--- !query 125 schema
+-- !query schema
 struct<f2:int,f3:int,f1:int,junk:string>
--- !query 125 output
+-- !query output
 53	NULL	NULL	NULL
 
 
--- !query 126
+-- !query
 select a.unique2, a.ten, b.tenthous, b.unique2, b.hundred
 from tenk1 a left join tenk1 b on a.unique2 = b.tenthous
 where a.unique1 = 42 and
       ((b.unique2 is null and a.ten = 2) or b.hundred = 3)
--- !query 126 schema
+-- !query schema
 struct<unique2:int,ten:int,tenthous:int,unique2:int,hundred:int>
--- !query 126 output
+-- !query output
 
 
 
--- !query 127
+-- !query
 create or replace temporary view a (i integer) using parquet
--- !query 127 schema
+-- !query schema
 struct<>
--- !query 127 output
+-- !query output
 
 
 
--- !query 128
+-- !query
 create or replace temporary view b (x integer, y integer) using parquet
--- !query 128 schema
+-- !query schema
 struct<>
--- !query 128 output
+-- !query output
 
 
 
--- !query 129
+-- !query
 select * from a left join b on i = x and i = y and x = i
--- !query 129 schema
+-- !query schema
 struct<i:int,x:int,y:int>
--- !query 129 output
+-- !query output
 
 
 
--- !query 130
+-- !query
 select t1.q2, count(t2.*)
 from int8_tbl t1 left join int8_tbl t2 on (t1.q2 = t2.q1)
 group by t1.q2 order by 1
--- !query 130 schema
+-- !query schema
 struct<q2:bigint,count(q1, q2):bigint>
--- !query 130 output
+-- !query output
 -4567890123456789	0
 123	2
 456	0
 4567890123456789	6
 
 
--- !query 131
+-- !query
 select t1.q2, count(t2.*)
 from int8_tbl t1 left join (select * from int8_tbl) t2 on (t1.q2 = t2.q1)
 group by t1.q2 order by 1
--- !query 131 schema
+-- !query schema
 struct<q2:bigint,count(q1, q2):bigint>
--- !query 131 output
+-- !query output
 -4567890123456789	0
 123	2
 456	0
 4567890123456789	6
 
 
--- !query 132
+-- !query
 select t1.q2, count(t2.*)
 from int8_tbl t1 left join
   (select q1, case when q2=1 then 1 else q2 end as q2 from int8_tbl) t2
   on (t1.q2 = t2.q1)
 group by t1.q2 order by 1
--- !query 132 schema
+-- !query schema
 struct<q2:bigint,count(q1, q2):bigint>
--- !query 132 output
+-- !query output
 -4567890123456789	0
 123	2
 456	0
 4567890123456789	6
 
 
--- !query 133
+-- !query
 create or replace temporary view a as select * from
   (values ('p'), ('q'))
   as v(code)
--- !query 133 schema
+-- !query schema
 struct<>
--- !query 133 output
+-- !query output
 
 
 
--- !query 134
+-- !query
 create or replace temporary view b as select * from
   (values ('p', 1), ('p', 2))
   as v(a, num)
--- !query 134 schema
+-- !query schema
 struct<>
--- !query 134 output
+-- !query output
 
 
 
--- !query 135
+-- !query
 create or replace temporary view c as select * from
   (values ('A', 'p'), ('B', 'q'), ('C', null))
   as v(name, a)
--- !query 135 schema
+-- !query schema
 struct<>
--- !query 135 output
+-- !query output
 
 
 
--- !query 136
+-- !query
 select c.name, ss.code, ss.b_cnt, ss.const
 from c left join
   (select a.code, coalesce(b_grp.cnt, 0) as b_cnt, -1 as const
@@ -2837,15 +2809,15 @@ from c left join
   ) as ss
   on (c.a = ss.code)
 order by c.name
--- !query 136 schema
+-- !query schema
 struct<name:string,code:string,b_cnt:bigint,const:int>
--- !query 136 output
+-- !query output
 A	p	2	-1
 B	q	0	-1
 C	NULL	NULL	NULL
 
 
--- !query 137
+-- !query
 SELECT * FROM
 ( SELECT 1 as key1 ) sub1
 LEFT JOIN
@@ -2861,13 +2833,13 @@ LEFT JOIN
     ON sub4.key5 = sub3.key3
 ) sub2
 ON sub1.key1 = sub2.key3
--- !query 137 schema
+-- !query schema
 struct<key1:int,key3:int,value2:int,value3:int>
--- !query 137 output
+-- !query output
 1	1	1	1
 
 
--- !query 138
+-- !query
 SELECT * FROM
 ( SELECT 1 as key1 ) sub1
 LEFT JOIN
@@ -2883,13 +2855,13 @@ LEFT JOIN
     ON sub4.key5 = sub3.key3
 ) sub2
 ON sub1.key1 = sub2.key3
--- !query 138 schema
+-- !query schema
 struct<key1:int,key3:int,value2:int,value3:int>
--- !query 138 output
+-- !query output
 1	1	1	1
 
 
--- !query 139
+-- !query
 SELECT qq, unique1
   FROM
   ( SELECT COALESCE(q1, 0) AS qq FROM int8_tbl a ) AS ss1
@@ -2897,45 +2869,45 @@ SELECT qq, unique1
   ( SELECT COALESCE(q2, -1) AS qq FROM int8_tbl b ) AS ss2
   USING (qq)
   INNER JOIN tenk1 c ON qq = unique2
--- !query 139 schema
+-- !query schema
 struct<qq:bigint,unique1:int>
--- !query 139 output
+-- !query output
 123	4596
 123	4596
 456	7318
 
 
--- !query 140
+-- !query
 create or replace temporary view nt1 as select * from
   (values(1,true,true), (2,true,false), (3,false,false))
   as v(id, a1, a2)
--- !query 140 schema
+-- !query schema
 struct<>
--- !query 140 output
+-- !query output
 
 
 
--- !query 141
+-- !query
 create or replace temporary view nt2 as select * from
   (values(1,1,true,true), (2,2,true,false), (3,3,false,false))
   as v(id, nt1_id, b1, b2)
--- !query 141 schema
+-- !query schema
 struct<>
--- !query 141 output
+-- !query output
 
 
 
--- !query 142
+-- !query
 create or replace temporary view nt3 as select * from
   (values(1,1,true), (2,2,false), (3,3,true))
   as v(id, nt2_id, c1)
--- !query 142 schema
+-- !query schema
 struct<>
--- !query 142 output
+-- !query output
 
 
 
--- !query 143
+-- !query
 select nt3.id
 from nt3 as nt3
   left join
@@ -2947,17 +2919,17 @@ from nt3 as nt3
     ) as ss2
     on ss2.id = nt3.nt2_id
 where nt3.id = 1 and ss2.b3
--- !query 143 schema
+-- !query schema
 struct<id:int>
--- !query 143 output
+-- !query output
 1
 
 
--- !query 144
+-- !query
 select * from int4_tbl a full join int4_tbl b on true
--- !query 144 schema
+-- !query schema
 struct<f1:int,f1:int>
--- !query 144 output
+-- !query output
 -123456	-123456
 -123456	-2147483647
 -123456	0
@@ -2985,11 +2957,11 @@ struct<f1:int,f1:int>
 2147483647	2147483647
 
 
--- !query 145
+-- !query
 select * from int4_tbl a full join int4_tbl b on false
--- !query 145 schema
+-- !query schema
 struct<f1:int,f1:int>
--- !query 145 output
+-- !query output
 -123456	NULL
 -2147483647	NULL
 0	NULL
@@ -3002,27 +2974,27 @@ NULL	123456
 NULL	2147483647
 
 
--- !query 146
+-- !query
 select count(*) from
   tenk1 a join tenk1 b on a.unique1 = b.unique2
   left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand
   join int4_tbl on b.thousand = f1
--- !query 146 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 146 output
+-- !query output
 10
 
 
--- !query 147
+-- !query
 select b.unique1 from
   tenk1 a join tenk1 b on a.unique1 = b.unique2
   left join tenk1 c on b.unique1 = 42 and c.thousand = a.thousand
   join int4_tbl i1 on b.thousand = f1
   right join int4_tbl i2 on i2.f1 = b.tenthous
   order by 1
--- !query 147 schema
+-- !query schema
 struct<unique1:int>
--- !query 147 output
+-- !query output
 NULL
 NULL
 NULL
@@ -3030,7 +3002,7 @@ NULL
 0
 
 
--- !query 148
+-- !query
 select * from
 (
   select unique1, q1, coalesce(unique1, -1) + q1 as fault
@@ -3038,43 +3010,43 @@ select * from
 ) ss
 where fault = 122
 order by fault
--- !query 148 schema
+-- !query schema
 struct<unique1:int,q1:bigint,fault:bigint>
--- !query 148 output
+-- !query output
 NULL	123	122
 
 
--- !query 149
+-- !query
 select q1, unique2, thousand, hundred
   from int8_tbl a left join tenk1 b on q1 = unique2
   where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123)
--- !query 149 schema
+-- !query schema
 struct<q1:bigint,unique2:int,thousand:int,hundred:int>
--- !query 149 output
+-- !query output
 
 
 
--- !query 150
+-- !query
 select f1, unique2, case when unique2 is null then f1 else 0 end
   from int4_tbl a left join tenk1 b on f1 = unique2
   where (case when unique2 is null then f1 else 0 end) = 0
--- !query 150 schema
+-- !query schema
 struct<f1:int,unique2:int,CASE WHEN (unique2 IS NULL) THEN f1 ELSE 0 END:int>
--- !query 150 output
+-- !query output
 0	0	0
 
 
--- !query 151
+-- !query
 select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
   from tenk1 a left join tenk1 b on b.thousand = a.unique1                        left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
   where a.unique2 < 10 and coalesce(b.twothousand, a.twothousand) = 44
--- !query 151 schema
+-- !query schema
 struct<unique1:int,unique1:int,unique1:int,coalesce(twothousand, twothousand):int>
--- !query 151 output
+-- !query output
 
 
 
--- !query 152
+-- !query
 select * from
   text_tbl t1
   inner join int8_tbl i8
@@ -3083,32 +3055,32 @@ select * from
   on t1.f1 = 'doh!'
   left join int4_tbl i4
   on i8.q1 = i4.f1
--- !query 152 schema
+-- !query schema
 struct<f1:string,q1:bigint,q2:bigint,f1:string,f1:int>
--- !query 152 output
+-- !query output
 doh!	123	456	doh!	NULL
 doh!	123	456	hi de ho neighbor	NULL
 
 
--- !query 153
+-- !query
 select * from
   (select 1 as id) as xx
   left join
     (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id))
   on (xx.id = coalesce(yy.id))
--- !query 153 schema
+-- !query schema
 struct<id:int,unique1:int,unique2:int,two:int,four:int,ten:int,twenty:int,hundred:int,thousand:int,twothousand:int,fivethous:int,tenthous:int,odd:int,even:int,stringu1:string,stringu2:string,string4:string,id:int>
--- !query 153 output
+-- !query output
 1	1	2838	1	1	1	1	1	1	1	1	1	2	3	BAAAAA	EFEAAA	OOOOxx	1
 
 
--- !query 154
+-- !query
 select a.q2, b.q1
   from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
   where coalesce(b.q1, 1) > 0
--- !query 154 schema
+-- !query schema
 struct<q2:bigint,q1:bigint>
--- !query 154 output
+-- !query output
 -4567890123456789	NULL
 123	123
 123	123
@@ -3121,124 +3093,124 @@ struct<q2:bigint,q1:bigint>
 4567890123456789	4567890123456789
 
 
--- !query 155
+-- !query
 create or replace temporary view parent as select * from
   (values (1, 10), (2, 20), (3, 30))
   as v(k, pd)
--- !query 155 schema
+-- !query schema
 struct<>
--- !query 155 output
+-- !query output
 
 
 
--- !query 156
+-- !query
 create or replace temporary view child as select * from
   (values (1, 100), (4, 400))
   as v(k, cd)
--- !query 156 schema
+-- !query schema
 struct<>
--- !query 156 output
+-- !query output
 
 
 
--- !query 157
+-- !query
 select p.* from parent p left join child c on (p.k = c.k)
--- !query 157 schema
+-- !query schema
 struct<k:int,pd:int>
--- !query 157 output
+-- !query output
 1	10
 2	20
 3	30
 
 
--- !query 158
+-- !query
 select p.*, linked from parent p
   left join (select c.*, true as linked from child c) as ss
   on (p.k = ss.k)
--- !query 158 schema
+-- !query schema
 struct<k:int,pd:int,linked:boolean>
--- !query 158 output
+-- !query output
 1	10	true
 2	20	NULL
 3	30	NULL
 
 
--- !query 159
+-- !query
 select p.* from
   parent p left join child c on (p.k = c.k)
   where p.k = 1 and p.k = 2
--- !query 159 schema
+-- !query schema
 struct<k:int,pd:int>
--- !query 159 output
+-- !query output
 
 
 
--- !query 160
+-- !query
 select p.* from
   (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k
   where p.k = 1 and p.k = 2
--- !query 160 schema
+-- !query schema
 struct<k:int,pd:int>
--- !query 160 output
+-- !query output
 
 
 
--- !query 161
+-- !query
 create or replace temporary view a as select * from
   (values (0), (1))
   as v(id)
--- !query 161 schema
+-- !query schema
 struct<>
--- !query 161 output
+-- !query output
 
 
 
--- !query 162
+-- !query
 create or replace temporary view b as select * from
   (values (0, 0), (1, NULL))
   as v(id, a_id)
--- !query 162 schema
+-- !query schema
 struct<>
--- !query 162 output
+-- !query output
 
 
 
--- !query 163
+-- !query
 SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0)
--- !query 163 schema
+-- !query schema
 struct<id:int,a_id:int,id:int>
--- !query 163 output
+-- !query output
 1	NULL	NULL
 
 
--- !query 164
+-- !query
 SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0)
--- !query 164 schema
+-- !query schema
 struct<id:int,a_id:int>
--- !query 164 output
+-- !query output
 1	NULL
 
 
--- !query 165
+-- !query
 create or replace temporary view innertab as select * from
   (values (123L, 42L))
   as v(id, dat1)
--- !query 165 schema
+-- !query schema
 struct<>
--- !query 165 output
+-- !query output
 
 
 
--- !query 166
+-- !query
 SELECT * FROM
     (SELECT 1 AS x) ss1
   LEFT JOIN
     (SELECT q1, q2, COALESCE(dat1, q1) AS y
      FROM int8_tbl LEFT JOIN innertab ON q2 = id) ss2
   ON true
--- !query 166 schema
+-- !query schema
 struct<x:int,q1:bigint,q2:bigint,y:bigint>
--- !query 166 output
+-- !query output
 1	123	456	123
 1	123	4567890123456789	123
 1	4567890123456789	-4567890123456789	4567890123456789
@@ -3246,163 +3218,163 @@ struct<x:int,q1:bigint,q2:bigint,y:bigint>
 1	4567890123456789	4567890123456789	4567890123456789
 
 
--- !query 167
+-- !query
 select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y) j on q1 = f1
--- !query 167 schema
+-- !query schema
 struct<>
--- !query 167 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 63
 
 
--- !query 168
+-- !query
 select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y) j on q1 = y.f1
--- !query 168 schema
+-- !query schema
 struct<>
--- !query 168 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`y.f1`' given input columns: [j.f1, j.f1, x.q1, x.q2]; line 2 pos 63
 
 
--- !query 169
+-- !query
 select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1
--- !query 169 schema
+-- !query schema
 struct<q1:bigint,q2:bigint,f1:int,ff:int>
--- !query 169 output
+-- !query output
 
 
 
--- !query 170
+-- !query
 select t1.uunique1 from
   tenk1 t1 join tenk2 t2 on t1.two = t2.two
--- !query 170 schema
+-- !query schema
 struct<>
--- !query 170 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`t1.uunique1`' given input columns: [t1.even, t2.even, t1.fivethous, t2.fivethous, t1.four, t2.four, t1.hundred, t2.hundred, t1.odd, t2.odd, t1.string4, t2.string4, t1.stringu1, t2.stringu1, t1.stringu2, t2.stringu2, t1.ten, t2.ten, t1.tenthous, t2.tenthous, t1.thousand, t2.thousand, t1.twenty, t2.twenty, t1.two, t2.two, t1.twothousand, t2.twothousand, t1.unique1, t2.unique1, t1.unique2, t2.unique2]; line 1 pos 7
 
 
--- !query 171
+-- !query
 select t2.uunique1 from
   tenk1 t1 join tenk2 t2 on t1.two = t2.two
--- !query 171 schema
+-- !query schema
 struct<>
--- !query 171 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`t2.uunique1`' given input columns: [t1.even, t2.even, t1.fivethous, t2.fivethous, t1.four, t2.four, t1.hundred, t2.hundred, t1.odd, t2.odd, t1.string4, t2.string4, t1.stringu1, t2.stringu1, t1.stringu2, t2.stringu2, t1.ten, t2.ten, t1.tenthous, t2.tenthous, t1.thousand, t2.thousand, t1.twenty, t2.twenty, t1.two, t2.two, t1.twothousand, t2.twothousand, t1.unique1, t2.unique1, t1.unique2, t2.unique2]; line 1 pos 7
 
 
--- !query 172
+-- !query
 select uunique1 from
   tenk1 t1 join tenk2 t2 on t1.two = t2.two
--- !query 172 schema
+-- !query schema
 struct<>
--- !query 172 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`uunique1`' given input columns: [t1.even, t2.even, t1.fivethous, t2.fivethous, t1.four, t2.four, t1.hundred, t2.hundred, t1.odd, t2.odd, t1.string4, t2.string4, t1.stringu1, t2.stringu1, t1.stringu2, t2.stringu2, t1.ten, t2.ten, t1.tenthous, t2.tenthous, t1.thousand, t2.thousand, t1.twenty, t2.twenty, t1.two, t2.two, t1.twothousand, t2.twothousand, t1.unique1, t2.unique1, t1.unique2, t2.unique2]; line 1 pos 7
 
 
--- !query 173
+-- !query
 select f1,g from int4_tbl a, (select f1 as g) ss
--- !query 173 schema
+-- !query schema
 struct<>
--- !query 173 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`f1`' given input columns: []; line 1 pos 37
 
 
--- !query 174
+-- !query
 select f1,g from int4_tbl a, (select a.f1 as g) ss
--- !query 174 schema
+-- !query schema
 struct<>
--- !query 174 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`a.f1`' given input columns: []; line 1 pos 37
 
 
--- !query 175
+-- !query
 select f1,g from int4_tbl a cross join (select f1 as g) ss
--- !query 175 schema
+-- !query schema
 struct<>
--- !query 175 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`f1`' given input columns: []; line 1 pos 47
 
 
--- !query 176
+-- !query
 select f1,g from int4_tbl a cross join (select a.f1 as g) ss
--- !query 176 schema
+-- !query schema
 struct<>
--- !query 176 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`a.f1`' given input columns: []; line 1 pos 47
 
 
--- !query 177
+-- !query
 CREATE TABLE j1 (id1 int, id2 int) USING parquet
--- !query 177 schema
+-- !query schema
 struct<>
--- !query 177 output
+-- !query output
 
 
 
--- !query 178
+-- !query
 CREATE TABLE j2 (id1 int, id2 int) USING parquet
--- !query 178 schema
+-- !query schema
 struct<>
--- !query 178 output
+-- !query output
 
 
 
--- !query 179
+-- !query
 INSERT INTO j1 values(1,1),(1,2)
--- !query 179 schema
+-- !query schema
 struct<>
--- !query 179 output
+-- !query output
 
 
 
--- !query 180
+-- !query
 INSERT INTO j2 values(1,1)
--- !query 180 schema
+-- !query schema
 struct<>
--- !query 180 output
+-- !query output
 
 
 
--- !query 181
+-- !query
 INSERT INTO j2 values(1,2)
--- !query 181 schema
+-- !query schema
 struct<>
--- !query 181 output
+-- !query output
 
 
 
--- !query 182
+-- !query
 select * from j1
 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2
 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1
--- !query 182 schema
+-- !query schema
 struct<id1:int,id2:int,id1:int,id2:int>
--- !query 182 output
+-- !query output
 1	1	1	1
 1	2	1	2
 
 
--- !query 183
+-- !query
 drop table j1
--- !query 183 schema
+-- !query schema
 struct<>
--- !query 183 output
+-- !query output
 
 
 
--- !query 184
+-- !query
 drop table j2
--- !query 184 schema
+-- !query schema
 struct<>
--- !query 184 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out
new file mode 100644
index 0000000000000..2c8bc31dbc6ca
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out
@@ -0,0 +1,81 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 7
+
+
+-- !query
+SELECT '' AS two, unique1, unique2, stringu1
+		FROM onek WHERE unique1 > 50
+		ORDER BY unique1 LIMIT 2
+-- !query schema
+struct<two:string,unique1:int,unique2:int,stringu1:string>
+-- !query output
+	51	76	ZBAAAA
+	52	985	ACAAAA
+
+
+-- !query
+SELECT '' AS five, unique1, unique2, stringu1
+		FROM onek WHERE unique1 > 60
+		ORDER BY unique1 LIMIT 5
+-- !query schema
+struct<five:string,unique1:int,unique2:int,stringu1:string>
+-- !query output
+	61	560	JCAAAA
+	62	633	KCAAAA
+	63	296	LCAAAA
+	64	479	MCAAAA
+	65	64	NCAAAA
+
+
+-- !query
+SELECT '' AS two, unique1, unique2, stringu1
+		FROM onek WHERE unique1 > 60 AND unique1 < 63
+		ORDER BY unique1 LIMIT 5
+-- !query schema
+struct<two:string,unique1:int,unique2:int,stringu1:string>
+-- !query output
+	61	560	JCAAAA
+	62	633	KCAAAA
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
+  (VALUES
+    (123, 456),
+    (123, 4567890123456789),
+    (4567890123456789, 123),
+    (4567890123456789, 4567890123456789),
+    (4567890123456789, -4567890123456789))
+  AS v(q1, q2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select * from int8_tbl limit (case when random() < 0.5 then bigint(null) end)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+The limit expression must evaluate to a constant value, but got CASE WHEN (`_nondeterministic` < CAST(0.5BD AS DOUBLE)) THEN CAST(NULL AS BIGINT) END;
+
+
+-- !query
+DROP VIEW INT8_TBL
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select sum(tenthous) as s1, sum(tenthous) + random()*0 as s2
+  from tenk1 group by thousand order by thousand limit 3
+-- !query schema
+struct<s1:bigint,s2:double>
+-- !query output
+45000	45000.0
+45010	45010.0
+45020	45020.0
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
new file mode 100644
index 0000000000000..bdb605e406b8a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
@@ -0,0 +1,4867 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 577
+
+
+-- !query
+CREATE TABLE num_data (id int, val decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_exp_add (id1 int, id2 int, expected decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_exp_sub (id1 int, id2 int, expected decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_exp_div (id1 int, id2 int, expected decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_exp_mul (id1 int, id2 int, expected decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_exp_sqrt (id int, expected decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_exp_ln (id int, expected decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_exp_log10 (id int, expected decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_exp_power_10_ln (id int, expected decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_result (id1 int, id2 int, result decimal(38,10)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,2,-34338492.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,2,34338492.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,2,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,2,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,3,4.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,3,-4.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,3,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,3,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,4,7799461.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,4,-7799461.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,4,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,4,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,5,16397.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,5,-16397.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,5,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,5,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,6,93901.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,6,-93901.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,6,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,6,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,7,-83028485)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,7,83028485)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,7,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,7,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,8,74881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,8,-74881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,8,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,8,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (0,9,-24926804.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (0,9,24926804.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (0,9,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (0,9,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,2,-34338492.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,2,34338492.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,2,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,2,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,3,4.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,3,-4.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,3,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,3,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,4,7799461.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,4,-7799461.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,4,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,4,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,5,16397.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,5,-16397.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,5,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,5,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,6,93901.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,6,-93901.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,6,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,6,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,7,-83028485)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,7,83028485)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,7,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,7,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,8,74881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,8,-74881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,8,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,8,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (1,9,-24926804.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (1,9,24926804.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (1,9,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (1,9,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,0,-34338492.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,0,-34338492.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,1,-34338492.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,1,-34338492.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,2,-68676984.430794094)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,2,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,2,1179132047626883.596862135856320209)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,2,1.00000000000000000000)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,3,-34338487.905397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,3,-34338496.525397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,3,-147998901.44836127257)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,3,-7967167.56737750510440835266)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,4,-26539030.803497047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,4,-42137953.627297047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,4,-267821744976817.8111137106593)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,4,-4.40267480046830116685)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,5,-34322095.176906047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,5,-34354889.253888047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,5,-563049578578.769242506736077)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,5,-2094.18866914563535496429)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,6,-34244590.637766787)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,6,-34432393.793027307)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,6,-3224438592470.18449811926184222)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,6,-365.68599891479766440940)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,7,-117366977.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,7,48689992.784602953)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,7,2851072985828710.485883795)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,7,.41357483778485235518)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,8,-34263611.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,8,-34413373.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,8,-2571300635581.146276407)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,8,-458.57416721727870888476)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (2,9,-59265296.260444467)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (2,9,-9411688.170349627)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (2,9,855948866655588.453741509242968740)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (2,9,1.37757299946438931811)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,0,4.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,0,4.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,1,4.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,1,4.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,2,-34338487.905397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,2,34338496.525397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,2,-147998901.44836127257)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,2,-.00000012551512084352)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,3,8.62)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,3,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,3,18.5761)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,3,1.00000000000000000000)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,4,7799465.7219)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,4,-7799457.1019)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,4,33615678.685289)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,4,.00000055260225961552)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,5,16401.348491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,5,-16392.728491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,5,70671.23589621)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,5,.00026285234387695504)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,6,93905.88763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,6,-93897.26763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,6,404715.7995864206)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,6,.00004589912234457595)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,7,-83028480.69)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,7,83028489.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,7,-357852770.35)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,7,-.00000005190989574240)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,8,74885.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,8,-74876.69)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,8,322737.11)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,8,.00005755799201399553)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (3,9,-24926799.735047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (3,9,24926808.355047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (3,9,-107434525.43415438020)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (3,9,-.00000017290624149854)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,0,7799461.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,0,7799461.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,1,7799461.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,1,7799461.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,2,-26539030.803497047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,2,42137953.627297047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,2,-267821744976817.8111137106593)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,2,-.22713465002993920385)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,3,7799465.7219)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,3,7799457.1019)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,3,33615678.685289)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,3,1809619.81714617169373549883)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,4,15598922.8238)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,4,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,4,60831598315717.14146161)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,4,1.00000000000000000000)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,5,7815858.450391)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,5,7783064.373409)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,5,127888068979.9935054429)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,5,475.66281046305802686061)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,6,7893362.98953026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,6,7705559.83426974)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,6,732381731243.745115764094)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,6,83.05996138436129499606)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,7,-75229023.5881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,7,90827946.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,7,-647577464846017.9715)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,7,-.09393717604145131637)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,8,7874342.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,8,7724580.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,8,584031469984.4839)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,8,104.15808298366741897143)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (4,9,-17127342.633147420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (4,9,32726265.456947420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (4,9,-194415646271340.1815956522980)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (4,9,-.31289456112403769409)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,0,16397.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,0,16397.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,1,16397.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,1,16397.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,2,-34322095.176906047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,2,34354889.253888047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,2,-563049578578.769242506736077)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,2,-.00047751189505192446)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,3,16401.348491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,3,16392.728491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,3,70671.23589621)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,3,3804.41728329466357308584)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,4,7815858.450391)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,4,-7783064.373409)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,4,127888068979.9935054429)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,4,.00210232958726897192)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,5,32794.076982)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,5,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,5,268862871.275335557081)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,5,1.00000000000000000000)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,6,110298.61612126)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,6,-77504.53913926)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,6,1539707782.76899778633766)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,6,.17461941433576102689)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,7,-83012087.961509)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,7,83044882.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,7,-1361421264394.416135)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,7,-.00019748690453643710)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,8,91278.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,8,-58483.961509)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,8,1227826639.244571)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,8,.21897461960978085228)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (5,9,-24910407.006556420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (5,9,24943201.083538420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (5,9,-408725765384.257043660243220)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (5,9,-.00065780749354660427)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,0,93901.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,0,93901.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,1,93901.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,1,93901.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,2,-34244590.637766787)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,2,34432393.793027307)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,2,-3224438592470.18449811926184222)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,2,-.00273458651128995823)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,3,93905.88763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,3,93897.26763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,3,404715.7995864206)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,3,21786.90896293735498839907)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,4,7893362.98953026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,4,-7705559.83426974)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,4,732381731243.745115764094)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,4,.01203949512295682469)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,5,110298.61612126)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,5,77504.53913926)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,5,1539707782.76899778633766)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,5,5.72674008674192359679)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,6,187803.15526052)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,6,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,6,8817506281.4517452372676676)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,6,1.00000000000000000000)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,7,-82934583.42236974)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,7,83122386.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,7,-7796505729750.37795610)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,7,-.00113095617281538980)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,8,168782.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,8,19020.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,8,7031444034.53149906)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,8,1.25401073209839612184)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (6,9,-24832902.467417160)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (6,9,25020705.622677680)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (6,9,-2340666225110.29929521292692920)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (6,9,-.00376709254265256789)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,0,-83028485)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,0,-83028485)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,1,-83028485)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,1,-83028485)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,2,-117366977.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,2,-48689992.784602953)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,2,2851072985828710.485883795)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,2,2.41794207151503385700)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,3,-83028480.69)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,3,-83028489.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,3,-357852770.35)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,3,-19264149.65197215777262180974)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,4,-75229023.5881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,4,-90827946.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,4,-647577464846017.9715)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,4,-10.64541262725136247686)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,5,-83012087.961509)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,5,-83044882.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,5,-1361421264394.416135)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,5,-5063.62688881730941836574)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,6,-82934583.42236974)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,6,-83122386.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,6,-7796505729750.37795610)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,6,-884.20756174009028770294)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,7,-166056970)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,7,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,7,6893729321395225)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,7,1.00000000000000000000)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,8,-82953604)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,8,-83103366)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,8,-6217255985285)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,8,-1108.80577182462841041118)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (7,9,-107955289.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (7,9,-58101680.954952580)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (7,9,2069634775752159.035758700)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (7,9,3.33089171198810413382)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,0,74881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,0,74881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,1,74881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,1,74881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,2,-34263611.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,2,34413373.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,2,-2571300635581.146276407)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,2,-.00218067233500788615)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,3,74885.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,3,74876.69)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,3,322737.11)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,3,17373.78190255220417633410)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,4,7874342.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,4,-7724580.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,4,584031469984.4839)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,4,.00960079113741758956)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,5,91278.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,5,58483.961509)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,5,1227826639.244571)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,5,4.56673929509287019456)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,6,168782.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,6,-19020.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,6,7031444034.53149906)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,6,.79744134113322314424)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,7,-82953604)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,7,83103366)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,7,-6217255985285)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,7,-.00090187120721280172)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,8,149762)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,8,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,8,5607164161)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,8,1.00000000000000000000)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (8,9,-24851923.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (8,9,25001685.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (8,9,-1866544013697.195857020)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (8,9,-.00300403532938582735)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,0,-24926804.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,0,-24926804.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,1,-24926804.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,1,-24926804.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,2,-59265296.260444467)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,2,9411688.170349627)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,2,855948866655588.453741509242968740)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,2,.72591434384152961526)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,3,-24926799.735047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,3,-24926808.355047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,3,-107434525.43415438020)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,3,-5783481.21694835730858468677)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,4,-17127342.633147420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,4,-32726265.456947420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,4,-194415646271340.1815956522980)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,4,-3.19596478892958416484)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,5,-24910407.006556420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,5,-24943201.083538420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,5,-408725765384.257043660243220)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,5,-1520.20159364322004505807)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,6,-24832902.467417160)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,6,-25020705.622677680)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,6,-2340666225110.29929521292692920)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,6,-265.45671195426965751280)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,7,-107955289.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,7,58101680.954952580)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,7,2069634775752159.035758700)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,7,.30021990699995814689)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,8,-24851923.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,8,-25001685.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,8,-1866544013697.195857020)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,8,-332.88556569820675471748)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_add VALUES (9,9,-49853608.090094840)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sub VALUES (9,9,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_mul VALUES (9,9,621345559900192.420120630048656400)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_div VALUES (9,9,1.00000000000000000000)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (0,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (1,0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (2,5859.90547836712524903505)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (3,2.07605394920266944396)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (4,2792.75158435189147418923)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (5,128.05092147657509145473)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (6,306.43364311096782703406)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (7,9111.99676251039939975230)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (8,273.64392922189960397542)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_sqrt VALUES (9,4992.67503899937593364766)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (2,17.35177750493897715514)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (3,1.46093790411565641971)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (4,15.86956523951936572464)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (5,9.70485601768871834038)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (6,11.45000246622944403127)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (7,18.23469429965478772991)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (8,11.22365546576315513668)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_ln VALUES (9,17.03145425013166006962)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (2,7.53578122160797276459)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (3,.63447727016073160075)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (4,6.89206461372691743345)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (5,4.21476541614777768626)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (6,4.97267288886207207671)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (7,7.91922711353275546914)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (8,4.87437163556421004138)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_log10 VALUES (9,7.39666659961986567059)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (0,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (1,double('NaN'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (2,224790267919917955.13261618583642653184)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (3,28.90266599445155957393)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (4,7405685069594999.07733999469386277636)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (5,5068226527.32127265408584640098)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (6,281839893606.99372343357047819067)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (7,1716699575118597095.42330819910640247627)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+decimal can only support precision up to 38
+== SQL ==
+INSERT INTO num_exp_power_10_ln VALUES (7,1716699575118597095.42330819910640247627)
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (8,167361463828.07491320069016125952)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_exp_power_10_ln VALUES (9,107511333880052007.04141124673540337457)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (0, 0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (1, 0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (2, -34338492.215397047)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (3, 4.31)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (4, 7799461.4119)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (5, 16397.038491)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (6, 93901.57763026)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (7, -83028485)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (8, 74881)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_data VALUES (9, -24926804.045047420)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM num_data
+-- !query schema
+struct<id:int,val:decimal(38,10)>
+-- !query output
+0	0.0000000000
+1	0.0000000000
+2	-34338492.2153970470
+3	4.3100000000
+4	7799461.4119000000
+5	16397.0384910000
+6	93901.5776302600
+7	-83028485.0000000000
+8	74881.0000000000
+9	-24926804.0450474200
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT t1.id, t2.id, t1.val + t2.val
+    FROM num_data t1, num_data t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.id2, t1.result, t2.expected
+    FROM num_result t1, num_exp_add t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != t2.expected
+-- !query schema
+struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val + t2.val, 10)
+    FROM num_data t1, num_data t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 10) as expected
+    FROM num_result t1, num_exp_add t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != round(t2.expected, 10)
+-- !query schema
+struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT t1.id, t2.id, t1.val - t2.val
+    FROM num_data t1, num_data t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.id2, t1.result, t2.expected
+    FROM num_result t1, num_exp_sub t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != t2.expected
+-- !query schema
+struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val - t2.val, 40)
+    FROM num_data t1, num_data t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 40)
+    FROM num_result t1, num_exp_sub t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != round(t2.expected, 40)
+-- !query schema
+struct<id1:int,id2:int,result:decimal(38,10),round(expected, 40):decimal(38,10)>
+-- !query output
+
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT t1.id, t2.id, t1.val, t2.val, t1.val * t2.val
+    FROM num_data t1, num_data t2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+`default`.`num_result` requires that the data to be inserted have the same number of columns as the target table: target table has 3 column(s) but the inserted data has 5 column(s), including 0 partition column(s) having constant value(s).;
+
+
+-- !query
+SELECT t1.id1, t1.id2, t1.result, t2.expected
+    FROM num_result t1, num_exp_mul t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != t2.expected
+-- !query schema
+struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val * t2.val, 30)
+    FROM num_data t1, num_data t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 30) as expected
+    FROM num_result t1, num_exp_mul t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != round(t2.expected, 30)
+-- !query schema
+struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+2	2	1179132047626883.5968620000	1179132047626883.5968621359
+2	3	-147998901.4483610000	-147998901.4483612726
+2	4	-267821744976817.8111140000	-267821744976817.8111137107
+2	5	-563049578578.7692430000	-563049578578.7692425067
+2	6	-3224438592470.1844980000	-3224438592470.1844981193
+2	7	2851072985828710.4858840000	2851072985828710.4858837950
+2	8	-2571300635581.1462760000	-2571300635581.1462764070
+2	9	855948866655588.4537420000	855948866655588.4537415092
+3	2	-147998901.4483610000	-147998901.4483612726
+3	5	70671.2358960000	70671.2358962100
+3	6	404715.7995860000	404715.7995864206
+3	9	-107434525.4341540000	-107434525.4341543802
+4	2	-267821744976817.8111140000	-267821744976817.8111137107
+4	4	60831598315717.1414620000	60831598315717.1414616100
+4	5	127888068979.9935050000	127888068979.9935054429
+4	6	732381731243.7451160000	732381731243.7451157641
+4	9	-194415646271340.1815960000	-194415646271340.1815956523
+5	2	-563049578578.7692430000	-563049578578.7692425067
+5	3	70671.2358960000	70671.2358962100
+5	4	127888068979.9935050000	127888068979.9935054429
+5	5	268862871.2753360000	268862871.2753355571
+5	6	1539707782.7689980000	1539707782.7689977863
+5	9	-408725765384.2570440000	-408725765384.2570436602
+6	2	-3224438592470.1844980000	-3224438592470.1844981193
+6	3	404715.7995860000	404715.7995864206
+6	4	732381731243.7451160000	732381731243.7451157641
+6	5	1539707782.7689980000	1539707782.7689977863
+6	6	8817506281.4517450000	8817506281.4517452373
+6	7	-7796505729750.3779560000	-7796505729750.3779561000
+6	8	7031444034.5314990000	7031444034.5314990600
+6	9	-2340666225110.2992950000	-2340666225110.2992952129
+7	2	2851072985828710.4858840000	2851072985828710.4858837950
+7	6	-7796505729750.3779560000	-7796505729750.3779561000
+7	9	2069634775752159.0357590000	2069634775752159.0357587000
+8	2	-2571300635581.1462760000	-2571300635581.1462764070
+8	6	7031444034.5314990000	7031444034.5314990600
+8	9	-1866544013697.1958570000	-1866544013697.1958570200
+9	2	855948866655588.4537420000	855948866655588.4537415092
+9	3	-107434525.4341540000	-107434525.4341543802
+9	4	-194415646271340.1815960000	-194415646271340.1815956523
+9	5	-408725765384.2570440000	-408725765384.2570436602
+9	6	-2340666225110.2992950000	-2340666225110.2992952129
+9	7	2069634775752159.0357590000	2069634775752159.0357587000
+9	8	-1866544013697.1958570000	-1866544013697.1958570200
+9	9	621345559900192.4201210000	621345559900192.4201206300
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT t1.id, t2.id, t1.val / t2.val
+    FROM num_data t1, num_data t2
+    WHERE t2.val != '0.0'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.id2, t1.result, t2.expected
+    FROM num_result t1, num_exp_div t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != t2.expected
+-- !query schema
+struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+2	3	-7967167.5673780000	-7967167.5673775051
+2	4	-4.4026750000	-4.4026748005
+2	5	-2094.1886690000	-2094.1886691456
+2	6	-365.6859990000	-365.6859989148
+2	7	0.4135750000	0.4135748378
+2	8	-458.5741670000	-458.5741672173
+2	9	1.3775730000	1.3775729995
+3	2	0.0000000000	-0.0000001255
+3	4	0.0000010000	0.0000005526
+3	5	0.0002630000	0.0002628523
+3	6	0.0000460000	0.0000458991
+3	7	0.0000000000	-0.0000000519
+3	8	0.0000580000	0.0000575580
+3	9	0.0000000000	-0.0000001729
+4	2	-0.2271350000	-0.2271346500
+4	3	1809619.8171460000	1809619.8171461717
+4	5	475.6628100000	475.6628104631
+4	6	83.0599610000	83.0599613844
+4	7	-0.0939370000	-0.0939371760
+4	8	104.1580830000	104.1580829837
+4	9	-0.3128950000	-0.3128945611
+5	2	-0.0004780000	-0.0004775119
+5	3	3804.4172830000	3804.4172832947
+5	4	0.0021020000	0.0021023296
+5	6	0.1746190000	0.1746194143
+5	7	-0.0001970000	-0.0001974869
+5	8	0.2189750000	0.2189746196
+5	9	-0.0006580000	-0.0006578075
+6	2	-0.0027350000	-0.0027345865
+6	3	21786.9089630000	21786.9089629374
+6	4	0.0120390000	0.0120394951
+6	5	5.7267400000	5.7267400867
+6	7	-0.0011310000	-0.0011309562
+6	8	1.2540110000	1.2540107321
+6	9	-0.0037670000	-0.0037670925
+7	2	2.4179420000	2.4179420715
+7	3	-19264149.6519720000	-19264149.6519721578
+7	4	-10.6454130000	-10.6454126273
+7	5	-5063.6268890000	-5063.6268888173
+7	6	-884.2075620000	-884.2075617401
+7	8	-1108.8057720000	-1108.8057718246
+7	9	3.3308920000	3.3308917120
+8	2	-0.0021810000	-0.0021806723
+8	3	17373.7819030000	17373.7819025522
+8	4	0.0096010000	0.0096007911
+8	5	4.5667390000	4.5667392951
+8	6	0.7974410000	0.7974413411
+8	7	-0.0009020000	-0.0009018712
+8	9	-0.0030040000	-0.0030040353
+9	2	0.7259140000	0.7259143438
+9	3	-5783481.2169480000	-5783481.2169483573
+9	4	-3.1959650000	-3.1959647889
+9	5	-1520.2015940000	-1520.2015936432
+9	6	-265.4567120000	-265.4567119543
+9	7	0.3002200000	0.3002199070
+9	8	-332.8855660000	-332.8855656982
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT t1.id, t2.id, round(t1.val / t2.val, 80)
+    FROM num_data t1, num_data t2
+    WHERE t2.val != '0.0'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.id2, t1.result, round(t2.expected, 80) as expected
+    FROM num_result t1, num_exp_div t2
+    WHERE t1.id1 = t2.id1 AND t1.id2 = t2.id2
+    AND t1.result != round(t2.expected, 80)
+-- !query schema
+struct<id1:int,id2:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+2	3	-7967167.5673780000	-7967167.5673775051
+2	4	-4.4026750000	-4.4026748005
+2	5	-2094.1886690000	-2094.1886691456
+2	6	-365.6859990000	-365.6859989148
+2	7	0.4135750000	0.4135748378
+2	8	-458.5741670000	-458.5741672173
+2	9	1.3775730000	1.3775729995
+3	2	0.0000000000	-0.0000001255
+3	4	0.0000010000	0.0000005526
+3	5	0.0002630000	0.0002628523
+3	6	0.0000460000	0.0000458991
+3	7	0.0000000000	-0.0000000519
+3	8	0.0000580000	0.0000575580
+3	9	0.0000000000	-0.0000001729
+4	2	-0.2271350000	-0.2271346500
+4	3	1809619.8171460000	1809619.8171461717
+4	5	475.6628100000	475.6628104631
+4	6	83.0599610000	83.0599613844
+4	7	-0.0939370000	-0.0939371760
+4	8	104.1580830000	104.1580829837
+4	9	-0.3128950000	-0.3128945611
+5	2	-0.0004780000	-0.0004775119
+5	3	3804.4172830000	3804.4172832947
+5	4	0.0021020000	0.0021023296
+5	6	0.1746190000	0.1746194143
+5	7	-0.0001970000	-0.0001974869
+5	8	0.2189750000	0.2189746196
+5	9	-0.0006580000	-0.0006578075
+6	2	-0.0027350000	-0.0027345865
+6	3	21786.9089630000	21786.9089629374
+6	4	0.0120390000	0.0120394951
+6	5	5.7267400000	5.7267400867
+6	7	-0.0011310000	-0.0011309562
+6	8	1.2540110000	1.2540107321
+6	9	-0.0037670000	-0.0037670925
+7	2	2.4179420000	2.4179420715
+7	3	-19264149.6519720000	-19264149.6519721578
+7	4	-10.6454130000	-10.6454126273
+7	5	-5063.6268890000	-5063.6268888173
+7	6	-884.2075620000	-884.2075617401
+7	8	-1108.8057720000	-1108.8057718246
+7	9	3.3308920000	3.3308917120
+8	2	-0.0021810000	-0.0021806723
+8	3	17373.7819030000	17373.7819025522
+8	4	0.0096010000	0.0096007911
+8	5	4.5667390000	4.5667392951
+8	6	0.7974410000	0.7974413411
+8	7	-0.0009020000	-0.0009018712
+8	9	-0.0030040000	-0.0030040353
+9	2	0.7259140000	0.7259143438
+9	3	-5783481.2169480000	-5783481.2169483573
+9	4	-3.1959650000	-3.1959647889
+9	5	-1520.2015940000	-1520.2015936432
+9	6	-265.4567120000	-265.4567119543
+9	7	0.3002200000	0.3002199070
+9	8	-332.8855660000	-332.8855656982
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT id, 0, SQRT(ABS(val))
+    FROM num_data
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.result, t2.expected
+    FROM num_result t1, num_exp_sqrt t2
+    WHERE t1.id1 = t2.id
+    AND t1.result != t2.expected
+-- !query schema
+struct<id1:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT id, 0, LN(ABS(val))
+    FROM num_data
+    WHERE val != '0.0'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.result, t2.expected
+    FROM num_result t1, num_exp_ln t2
+    WHERE t1.id1 = t2.id
+    AND t1.result != t2.expected
+-- !query schema
+struct<id1:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT id, 0, LOG(cast('10' as decimal(38, 18)), ABS(val))
+    FROM num_data
+    WHERE val != '0.0'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.result, t2.expected
+    FROM num_result t1, num_exp_log10 t2
+    WHERE t1.id1 = t2.id
+    AND t1.result != t2.expected
+-- !query schema
+struct<id1:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+
+
+
+-- !query
+TRUNCATE TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_result SELECT id, 0, POWER(cast('10' as decimal(38, 18)), LN(ABS(round(val,200))))
+    FROM num_data
+    WHERE val != '0.0'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.id1, t1.result, t2.expected
+    FROM num_result t1, num_exp_power_10_ln t2
+    WHERE t1.id1 = t2.id
+    AND t1.result != t2.expected
+-- !query schema
+struct<id1:int,result:decimal(38,10),expected:decimal(38,10)>
+-- !query output
+2	224790267919917440.0000000000	224790267919917955.1326161858
+4	7405685069595001.0000000000	7405685069594999.0773399947
+5	5068226527.3212630000	5068226527.3212726541
+6	281839893606.9936500000	281839893606.9937234336
+8	167361463828.0749000000	167361463828.0749132007
+9	107511333880051872.0000000000	107511333880052007.0414112467
+
+
+-- !query
+SELECT AVG(val) FROM num_data
+-- !query schema
+struct<avg(val):decimal(38,14)>
+-- !query output
+-13430913.59224232070000
+
+
+-- !query
+CREATE TABLE fract_only (id int, val decimal(4,4)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO fract_only VALUES (1, 0.0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO fract_only VALUES (2, 0.1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO fract_only VALUES (4, -0.9999)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO fract_only VALUES (5, 0.99994)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO fract_only VALUES (7, 0.00001)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO fract_only VALUES (8, 0.00017)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM fract_only
+-- !query schema
+struct<id:int,val:decimal(4,4)>
+-- !query output
+1	0.0000
+2	0.1000
+4	-0.9999
+5	0.9999
+7	0.0000
+8	0.0002
+
+
+-- !query
+DROP TABLE fract_only
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT decimal(double('NaN'))
+-- !query schema
+struct<CAST(CAST(NaN AS DOUBLE) AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT decimal(double('Infinity'))
+-- !query schema
+struct<CAST(CAST(Infinity AS DOUBLE) AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT decimal(double('-Infinity'))
+-- !query schema
+struct<CAST(CAST(-Infinity AS DOUBLE) AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT decimal(float('NaN'))
+-- !query schema
+struct<CAST(CAST(NaN AS FLOAT) AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT decimal(float('Infinity'))
+-- !query schema
+struct<CAST(CAST(Infinity AS FLOAT) AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT decimal(float('-Infinity'))
+-- !query schema
+struct<CAST(CAST(-Infinity AS FLOAT) AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+NULL
+
+
+-- !query
+CREATE TABLE ceil_floor_round (a decimal(38, 18)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO ceil_floor_round VALUES (-5.5)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO ceil_floor_round VALUES (-5.499999)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO ceil_floor_round VALUES (9.5)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO ceil_floor_round VALUES (9.4999999)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO ceil_floor_round VALUES (0.0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO ceil_floor_round VALUES (0.0000001)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO ceil_floor_round VALUES (-0.000001)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round
+-- !query schema
+struct<a:decimal(38,18),CEIL(a):decimal(21,0),CEIL(a):decimal(21,0),FLOOR(a):decimal(21,0),round(a, 0):decimal(38,0)>
+-- !query output
+-0.000001000000000000	0	0	-1	0
+-5.499999000000000000	-5	-5	-6	-5
+-5.500000000000000000	-5	-5	-6	-6
+0.000000000000000000	0	0	0	0
+0.000000100000000000	1	1	0	0
+9.499999900000000000	10	10	9	9
+9.500000000000000000	10	10	9	10
+
+
+-- !query
+DROP TABLE ceil_floor_round
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE num_input_test (n1 decimal(38, 18)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_input_test VALUES (double(trim(' 123')))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_input_test VALUES (double(trim('   3245874    ')))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_input_test VALUES (double(trim('  -93853')))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_input_test VALUES (555.50)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO num_input_test VALUES (-555.50)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM num_input_test
+-- !query schema
+struct<n1:decimal(38,18)>
+-- !query output
+-555.500000000000000000
+-93853.000000000000000000
+123.000000000000000000
+3245874.000000000000000000
+555.500000000000000000
+
+
+-- !query
+select cast(999999999999999999999 as decimal(38, 0))/1000000000000000000000
+-- !query schema
+struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) / CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(38,6)>
+-- !query output
+1.000000
+
+
+-- !query
+select div(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000)
+-- !query schema
+struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(38,0)>
+-- !query output
+0
+
+
+-- !query
+select mod(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000)
+-- !query schema
+struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)>
+-- !query output
+999999999999999999999
+
+
+-- !query
+select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
+-- !query schema
+struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(38,0)>
+-- !query output
+-9
+
+
+-- !query
+select mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
+-- !query schema
+struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)>
+-- !query output
+-999999999999999999999
+
+
+-- !query
+select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)*1000000000000000000000 + mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
+-- !query schema
+struct<(CAST((CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0)) * CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0)) + CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0))):decimal(38,0)>
+-- !query output
+-9999999999999999999999
+
+
+-- !query
+select mod (70.0,70)
+-- !query schema
+struct<(CAST(70.0 AS DECIMAL(3,1)) % CAST(CAST(70 AS DECIMAL(2,0)) AS DECIMAL(3,1))):decimal(3,1)>
+-- !query output
+0.0
+
+
+-- !query
+select div (70.0,70)
+-- !query schema
+struct<(CAST(70.0 AS DECIMAL(3,1)) div CAST(CAST(70 AS DECIMAL(2,0)) AS DECIMAL(3,1))):decimal(2,0)>
+-- !query output
+1
+
+
+-- !query
+select 70.0 / 70
+-- !query schema
+struct<(CAST(70.0 AS DECIMAL(3,1)) / CAST(CAST(70 AS DECIMAL(2,0)) AS DECIMAL(3,1))):decimal(8,6)>
+-- !query output
+1.000000
+
+
+-- !query
+select 12345678901234567890 % 123
+-- !query schema
+struct<(CAST(12345678901234567890 AS DECIMAL(20,0)) % CAST(CAST(123 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
+-- !query output
+78
+
+
+-- !query
+select exp(0.0)
+-- !query schema
+struct<EXP(CAST(0.0 AS DOUBLE)):double>
+-- !query output
+1.0
+
+
+-- !query
+select exp(1.0)
+-- !query schema
+struct<EXP(CAST(1.0 AS DOUBLE)):double>
+-- !query output
+2.7182818284590455
+
+
+-- !query
+select exp(32.999)
+-- !query schema
+struct<EXP(CAST(32.999 AS DOUBLE)):double>
+-- !query output
+2.1442904349215556E14
+
+
+-- !query
+select exp(-32.999)
+-- !query schema
+struct<EXP(CAST(-32.999 AS DOUBLE)):double>
+-- !query output
+4.663547361468238E-15
+
+
+-- !query
+select exp(123.456)
+-- !query schema
+struct<EXP(CAST(123.456 AS DOUBLE)):double>
+-- !query output
+4.132944352778106E53
+
+
+-- !query
+select exp(-123.456)
+-- !query schema
+struct<EXP(CAST(-123.456 AS DOUBLE)):double>
+-- !query output
+2.4195825412645934E-54
+
+
+-- !query
+select exp(1234.5678)
+-- !query schema
+struct<EXP(CAST(1234.5678 AS DOUBLE)):double>
+-- !query output
+Infinity
+
+
+-- !query
+select * from range(cast(0.0 as decimal(38, 18)), cast(4.0 as decimal(38, 18)))
+-- !query schema
+struct<id:bigint>
+-- !query output
+0
+1
+2
+3
+
+
+-- !query
+select * from range(cast(0.1 as decimal(38, 18)), cast(4.0 as decimal(38, 18)), cast(1.3 as decimal(38, 18)))
+-- !query schema
+struct<id:bigint>
+-- !query output
+0
+1
+2
+3
+
+
+-- !query
+select * from range(cast(4.0 as decimal(38, 18)), cast(-1.5 as decimal(38, 18)), cast(-2.2 as decimal(38, 18)))
+-- !query schema
+struct<id:bigint>
+-- !query output
+0
+2
+4
+
+
+-- !query
+select ln(1.2345678e-28)
+-- !query schema
+struct<LOG(1.2345678E-28):double>
+-- !query output
+-64.26166165451762
+
+
+-- !query
+select ln(0.0456789)
+-- !query schema
+struct<LOG(CAST(0.0456789 AS DOUBLE)):double>
+-- !query output
+-3.0861187944847437
+
+
+-- !query
+select ln(0.99949452)
+-- !query schema
+struct<LOG(CAST(0.99949452 AS DOUBLE)):double>
+-- !query output
+-5.056077980832118E-4
+
+
+-- !query
+select ln(1.00049687395)
+-- !query schema
+struct<LOG(CAST(1.00049687395 AS DOUBLE)):double>
+-- !query output
+4.967505490136803E-4
+
+
+-- !query
+select ln(1234.567890123456789)
+-- !query schema
+struct<LOG(CAST(1234.567890123456789 AS DOUBLE)):double>
+-- !query output
+7.11847630129779
+
+
+-- !query
+select ln(5.80397490724e5)
+-- !query schema
+struct<LOG(580397.490724):double>
+-- !query output
+13.271468476626518
+
+
+-- !query
+select ln(9.342536355e34)
+-- !query schema
+struct<LOG(9.342536355E34):double>
+-- !query output
+80.52247093552418
+
+
+-- !query
+select log(3.4634998359873254962349856073435545)
+-- !query schema
+struct<LOG(E(), CAST(3.4634998359873254962349856073435545 AS DOUBLE)):double>
+-- !query output
+1.2422795911259166
+
+
+-- !query
+select log(9.999999999999999999)
+-- !query schema
+struct<LOG(E(), CAST(9.999999999999999999 AS DOUBLE)):double>
+-- !query output
+2.302585092994046
+
+
+-- !query
+select log(10.00000000000000000)
+-- !query schema
+struct<LOG(E(), CAST(10.00000000000000000 AS DOUBLE)):double>
+-- !query output
+2.302585092994046
+
+
+-- !query
+select log(10.00000000000000001)
+-- !query schema
+struct<LOG(E(), CAST(10.00000000000000001 AS DOUBLE)):double>
+-- !query output
+2.302585092994046
+
+
+-- !query
+select log(590489.45235237)
+-- !query schema
+struct<LOG(E(), CAST(590489.45235237 AS DOUBLE)):double>
+-- !query output
+13.288707052228641
+
+
+-- !query
+select log(0.99923, 4.58934e34)
+-- !query schema
+struct<LOG(CAST(0.99923 AS DOUBLE), 4.58934E34):double>
+-- !query output
+-103611.55579543479
+
+
+-- !query
+select log(1.000016, 8.452010e18)
+-- !query schema
+struct<LOG(CAST(1.000016 AS DOUBLE), 8.45201E18):double>
+-- !query output
+2723830.287707013
+
+
+-- !query
+SELECT SUM(decimal(9999)) FROM range(1, 100001)
+-- !query schema
+struct<sum(CAST(9999 AS DECIMAL(10,0))):decimal(20,0)>
+-- !query output
+999900000
+
+
+-- !query
+SELECT SUM(decimal(-9999)) FROM range(1, 100001)
+-- !query schema
+struct<sum(CAST(-9999 AS DECIMAL(10,0))):decimal(20,0)>
+-- !query output
+-999900000
+
+
+-- !query
+DROP TABLE num_data
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_exp_add
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_exp_sub
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_exp_div
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_exp_mul
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_exp_sqrt
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_exp_ln
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_exp_log10
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_exp_power_10_ln
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_result
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE num_input_test
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
similarity index 79%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/select.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
index e54de1d6fdbdc..1e59036b979b4 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
@@ -2,15 +2,15 @@
 -- Number of queries: 37
 
 
--- !query 0
+-- !query
 create or replace temporary view onek2 as select * from onek
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create or replace temporary view INT8_TBL as select * from values
   (cast(trim('  123   ') as bigint), cast(trim('  456') as bigint)),
   (cast(trim('123   ') as bigint),cast('4567890123456789' as bigint)),
@@ -18,19 +18,19 @@ create or replace temporary view INT8_TBL as select * from values
   (cast(+4567890123456789 as bigint),cast('4567890123456789' as bigint)),
   (cast('+4567890123456789' as bigint),cast('-4567890123456789' as bigint))
   as INT8_TBL(q1, q2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT * FROM onek
    WHERE onek.unique1 < 10
    ORDER BY onek.unique1
--- !query 2 schema
+-- !query schema
 struct<unique1:int,unique2:int,two:int,four:int,ten:int,twenty:int,hundred:int,thousand:int,twothousand:int,fivethous:int,tenthous:int,odd:int,even:int,stringu1:string,stringu2:string,string4:string>
--- !query 2 output
+-- !query output
 0	998	0	0	0	0	0	0	0	0	0	0	1	AAAAAA	KMBAAA	OOOOxx
 1	214	1	1	1	1	1	1	1	1	1	2	3	BAAAAA	GIAAAA	OOOOxx
 2	326	0	2	2	2	2	2	2	2	2	4	5	CAAAAA	OMAAAA	OOOOxx
@@ -43,13 +43,13 @@ struct<unique1:int,unique2:int,two:int,four:int,ten:int,twenty:int,hundred:int,t
 9	49	1	1	9	9	9	9	9	9	9	18	19	JAAAAA	XBAAAA	HHHHxx
 
 
--- !query 3
+-- !query
 SELECT onek.unique1, onek.stringu1 FROM onek
    WHERE onek.unique1 < 20
    ORDER BY unique1 DESC
--- !query 3 schema
+-- !query schema
 struct<unique1:int,stringu1:string>
--- !query 3 output
+-- !query output
 19	TAAAAA
 18	SAAAAA
 17	RAAAAA
@@ -72,13 +72,13 @@ struct<unique1:int,stringu1:string>
 0	AAAAAA
 
 
--- !query 4
+-- !query
 SELECT onek.unique1, onek.stringu1 FROM onek
    WHERE onek.unique1 > 980
    ORDER BY stringu1 ASC
--- !query 4 schema
+-- !query schema
 struct<unique1:int,stringu1:string>
--- !query 4 output
+-- !query output
 988	AMAAAA
 989	BMAAAA
 990	CMAAAA
@@ -100,13 +100,13 @@ struct<unique1:int,stringu1:string>
 987	ZLAAAA
 
 
--- !query 5
+-- !query
 SELECT onek.unique1, onek.string4 FROM onek
    WHERE onek.unique1 > 980
    ORDER BY string4 ASC, unique1 DESC
--- !query 5 schema
+-- !query schema
 struct<unique1:int,string4:string>
--- !query 5 output
+-- !query output
 999	AAAAxx
 995	AAAAxx
 983	AAAAxx
@@ -128,13 +128,13 @@ struct<unique1:int,string4:string>
 984	VVVVxx
 
 
--- !query 6
+-- !query
 SELECT onek.unique1, onek.string4 FROM onek
    WHERE onek.unique1 > 980
    ORDER BY string4 DESC, unique1 ASC
--- !query 6 schema
+-- !query schema
 struct<unique1:int,string4:string>
--- !query 6 output
+-- !query output
 984	VVVVxx
 989	VVVVxx
 992	VVVVxx
@@ -156,13 +156,13 @@ struct<unique1:int,string4:string>
 999	AAAAxx
 
 
--- !query 7
+-- !query
 SELECT onek.unique1, onek.string4 FROM onek
    WHERE onek.unique1 < 20
    ORDER BY unique1 DESC, string4 ASC
--- !query 7 schema
+-- !query schema
 struct<unique1:int,string4:string>
--- !query 7 output
+-- !query output
 19	OOOOxx
 18	VVVVxx
 17	HHHHxx
@@ -185,13 +185,13 @@ struct<unique1:int,string4:string>
 0	OOOOxx
 
 
--- !query 8
+-- !query
 SELECT onek.unique1, onek.string4 FROM onek
    WHERE onek.unique1 < 20
    ORDER BY unique1 ASC, string4 DESC
--- !query 8 schema
+-- !query schema
 struct<unique1:int,string4:string>
--- !query 8 output
+-- !query output
 0	OOOOxx
 1	OOOOxx
 2	OOOOxx
@@ -214,11 +214,11 @@ struct<unique1:int,string4:string>
 19	OOOOxx
 
 
--- !query 9
+-- !query
 SELECT onek2.* FROM onek2 WHERE onek2.unique1 < 10
--- !query 9 schema
+-- !query schema
 struct<unique1:int,unique2:int,two:int,four:int,ten:int,twenty:int,hundred:int,thousand:int,twothousand:int,fivethous:int,tenthous:int,odd:int,even:int,stringu1:string,stringu2:string,string4:string>
--- !query 9 output
+-- !query output
 0	998	0	0	0	0	0	0	0	0	0	0	1	AAAAAA	KMBAAA	OOOOxx
 1	214	1	1	1	1	1	1	1	1	1	2	3	BAAAAA	GIAAAA	OOOOxx
 2	326	0	2	2	2	2	2	2	2	2	4	5	CAAAAA	OMAAAA	OOOOxx
@@ -231,13 +231,13 @@ struct<unique1:int,unique2:int,two:int,four:int,ten:int,twenty:int,hundred:int,t
 9	49	1	1	9	9	9	9	9	9	9	18	19	JAAAAA	XBAAAA	HHHHxx
 
 
--- !query 10
+-- !query
 SELECT onek2.unique1, onek2.stringu1 FROM onek2
     WHERE onek2.unique1 < 20
     ORDER BY unique1 DESC
--- !query 10 schema
+-- !query schema
 struct<unique1:int,stringu1:string>
--- !query 10 output
+-- !query output
 19	TAAAAA
 18	SAAAAA
 17	RAAAAA
@@ -260,12 +260,12 @@ struct<unique1:int,stringu1:string>
 0	AAAAAA
 
 
--- !query 11
+-- !query
 SELECT onek2.unique1, onek2.stringu1 FROM onek2
    WHERE onek2.unique1 > 980
--- !query 11 schema
+-- !query schema
 struct<unique1:int,stringu1:string>
--- !query 11 output
+-- !query output
 981	TLAAAA
 982	ULAAAA
 983	VLAAAA
@@ -287,94 +287,94 @@ struct<unique1:int,stringu1:string>
 999	LMAAAA
 
 
--- !query 12
+-- !query
 CREATE TABLE tmp USING parquet AS
 SELECT two, stringu1, ten, string4
 FROM onek
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 select foo.* from (select 1) as foo
--- !query 13 schema
+-- !query schema
 struct<1:int>
--- !query 13 output
+-- !query output
 1
 
 
--- !query 14
+-- !query
 select foo.* from (select null) as foo
--- !query 14 schema
+-- !query schema
 struct<NULL:null>
--- !query 14 output
+-- !query output
 NULL
 
 
--- !query 15
+-- !query
 select foo.* from (select 'xyzzy',1,null) as foo
--- !query 15 schema
+-- !query schema
 struct<xyzzy:string,1:int,NULL:null>
--- !query 15 output
+-- !query output
 xyzzy	1	NULL
 
 
--- !query 16
+-- !query
 select * from onek, values(147, 'RFAAAA'), (931, 'VJAAAA') as v (i, j)
     WHERE onek.unique1 = v.i and onek.stringu1 = v.j
--- !query 16 schema
+-- !query schema
 struct<unique1:int,unique2:int,two:int,four:int,ten:int,twenty:int,hundred:int,thousand:int,twothousand:int,fivethous:int,tenthous:int,odd:int,even:int,stringu1:string,stringu2:string,string4:string,i:int,j:string>
--- !query 16 output
+-- !query output
 147	0	1	3	7	7	7	47	147	147	147	14	15	RFAAAA	AAAAAA	AAAAxx	147	RFAAAA
 931	1	1	3	1	11	1	31	131	431	931	2	3	VJAAAA	BAAAAA	HHHHxx	931	VJAAAA
 
 
--- !query 17
+-- !query
 VALUES (1,2), (3,4+4), (7,77.7)
--- !query 17 schema
+-- !query schema
 struct<col1:int,col2:decimal(11,1)>
--- !query 17 output
-1	2
-3	8
+-- !query output
+1	2.0
+3	8.0
 7	77.7
 
 
--- !query 18
+-- !query
 VALUES (1,2), (3,4+4), (7,77.7)
 UNION ALL
 SELECT 2+2, 57
 UNION ALL
 TABLE int8_tbl
--- !query 18 schema
+-- !query schema
 struct<col1:bigint,col2:decimal(21,1)>
--- !query 18 output
-1	2
-123	456
-123	4567890123456789
-3	8
-4	57
-4567890123456789	-4567890123456789
-4567890123456789	123
-4567890123456789	4567890123456789
+-- !query output
+1	2.0
+123	456.0
+123	4567890123456789.0
+3	8.0
+4	57.0
+4567890123456789	-4567890123456789.0
+4567890123456789	123.0
+4567890123456789	4567890123456789.0
 7	77.7
 
 
--- !query 19
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW foo AS
 SELECT * FROM (values(42),(3),(10),(7),(null),(null),(1)) as foo (f1)
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 SELECT * FROM foo ORDER BY f1
--- !query 20 schema
+-- !query schema
 struct<f1:int>
--- !query 20 output
+-- !query output
 NULL
 NULL
 1
@@ -384,11 +384,11 @@ NULL
 42
 
 
--- !query 21
+-- !query
 SELECT * FROM foo ORDER BY f1 ASC
--- !query 21 schema
+-- !query schema
 struct<f1:int>
--- !query 21 output
+-- !query output
 NULL
 NULL
 1
@@ -398,11 +398,11 @@ NULL
 42
 
 
--- !query 22
+-- !query
 SELECT * FROM foo ORDER BY f1 NULLS FIRST
--- !query 22 schema
+-- !query schema
 struct<f1:int>
--- !query 22 output
+-- !query output
 NULL
 NULL
 1
@@ -412,11 +412,11 @@ NULL
 42
 
 
--- !query 23
+-- !query
 SELECT * FROM foo ORDER BY f1 DESC
--- !query 23 schema
+-- !query schema
 struct<f1:int>
--- !query 23 output
+-- !query output
 42
 10
 7
@@ -426,11 +426,11 @@ NULL
 NULL
 
 
--- !query 24
+-- !query
 SELECT * FROM foo ORDER BY f1 DESC NULLS LAST
--- !query 24 schema
+-- !query schema
 struct<f1:int>
--- !query 24 output
+-- !query output
 42
 10
 7
@@ -440,103 +440,103 @@ NULL
 NULL
 
 
--- !query 25
+-- !query
 select * from onek2 where unique2 = 11 and stringu1 = 'ATAAAA'
--- !query 25 schema
+-- !query schema
 struct<unique1:int,unique2:int,two:int,four:int,ten:int,twenty:int,hundred:int,thousand:int,twothousand:int,fivethous:int,tenthous:int,odd:int,even:int,stringu1:string,stringu2:string,string4:string>
--- !query 25 output
+-- !query output
 494	11	0	2	4	14	4	94	94	494	494	8	9	ATAAAA	LAAAAA	VVVVxx
 
 
--- !query 26
+-- !query
 select unique2 from onek2 where unique2 = 11 and stringu1 = 'ATAAAA'
--- !query 26 schema
+-- !query schema
 struct<unique2:int>
--- !query 26 output
+-- !query output
 11
 
 
--- !query 27
+-- !query
 select * from onek2 where unique2 = 11 and stringu1 < 'B'
--- !query 27 schema
+-- !query schema
 struct<unique1:int,unique2:int,two:int,four:int,ten:int,twenty:int,hundred:int,thousand:int,twothousand:int,fivethous:int,tenthous:int,odd:int,even:int,stringu1:string,stringu2:string,string4:string>
--- !query 27 output
+-- !query output
 494	11	0	2	4	14	4	94	94	494	494	8	9	ATAAAA	LAAAAA	VVVVxx
 
 
--- !query 28
+-- !query
 select unique2 from onek2 where unique2 = 11 and stringu1 < 'B'
--- !query 28 schema
+-- !query schema
 struct<unique2:int>
--- !query 28 output
+-- !query output
 11
 
 
--- !query 29
+-- !query
 select unique2 from onek2 where unique2 = 11 and stringu1 < 'C'
--- !query 29 schema
+-- !query schema
 struct<unique2:int>
--- !query 29 output
+-- !query output
 11
 
 
--- !query 30
+-- !query
 select unique2 from onek2 where unique2 = 11 and stringu1 < 'B'
--- !query 30 schema
+-- !query schema
 struct<unique2:int>
--- !query 30 output
+-- !query output
 11
 
 
--- !query 31
+-- !query
 select unique1, unique2 from onek2
   where (unique2 = 11 or unique1 = 0) and stringu1 < 'B'
--- !query 31 schema
+-- !query schema
 struct<unique1:int,unique2:int>
--- !query 31 output
+-- !query output
 0	998
 494	11
 
 
--- !query 32
+-- !query
 select unique1, unique2 from onek2
   where (unique2 = 11 and stringu1 < 'B') or unique1 = 0
--- !query 32 schema
+-- !query schema
 struct<unique1:int,unique2:int>
--- !query 32 output
+-- !query output
 0	998
 494	11
 
 
--- !query 33
+-- !query
 SELECT 1 AS x ORDER BY x
--- !query 33 schema
+-- !query schema
 struct<x:int>
--- !query 33 output
+-- !query output
 1
 
 
--- !query 34
+-- !query
 select * from (values (2),(null),(1)) v(k) where k = k order by k
--- !query 34 schema
+-- !query schema
 struct<k:int>
--- !query 34 output
+-- !query output
 1
 2
 
 
--- !query 35
+-- !query
 select * from (values (2),(null),(1)) v(k) where k = k
--- !query 35 schema
+-- !query schema
 struct<k:int>
--- !query 35 output
+-- !query output
 1
 2
 
 
--- !query 36
+-- !query
 drop table tmp
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_distinct.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_distinct.sql.out
similarity index 69%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/select_distinct.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/select_distinct.sql.out
index 38eae1739f553..53003e70f289a 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_distinct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_distinct.sql.out
@@ -2,30 +2,30 @@
 -- Number of queries: 19
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW tmp AS
 SELECT two, stringu1, ten, string4
 FROM onek
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT DISTINCT two FROM tmp ORDER BY 1
--- !query 1 schema
+-- !query schema
 struct<two:int>
--- !query 1 output
+-- !query output
 0
 1
 
 
--- !query 2
+-- !query
 SELECT DISTINCT ten FROM tmp ORDER BY 1
--- !query 2 schema
+-- !query schema
 struct<ten:int>
--- !query 2 output
+-- !query output
 0
 1
 2
@@ -38,24 +38,24 @@ struct<ten:int>
 9
 
 
--- !query 3
+-- !query
 SELECT DISTINCT string4 FROM tmp ORDER BY 1
--- !query 3 schema
+-- !query schema
 struct<string4:string>
--- !query 3 output
+-- !query output
 AAAAxx
 HHHHxx
 OOOOxx
 VVVVxx
 
 
--- !query 4
+-- !query
 SELECT DISTINCT two, string4, ten
    FROM tmp
    ORDER BY two ASC, string4 ASC, ten ASC
--- !query 4 schema
+-- !query schema
 struct<two:int,string4:string,ten:int>
--- !query 4 output
+-- !query output
 0	AAAAxx	0
 0	AAAAxx	2
 0	AAAAxx	4
@@ -98,128 +98,128 @@ struct<two:int,string4:string,ten:int>
 1	VVVVxx	9
 
 
--- !query 5
+-- !query
 SELECT count(*) FROM
   (SELECT DISTINCT two, four, two FROM tenk1) ss
--- !query 5 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 5 output
+-- !query output
 4
 
 
--- !query 6
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW disttable AS SELECT * FROM
   (VALUES (1), (2), (3), (NULL))
   AS v(f1)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 SELECT f1, f1 IS DISTINCT FROM 2 as `not 2` FROM disttable
--- !query 7 schema
+-- !query schema
 struct<f1:int,not 2:boolean>
--- !query 7 output
+-- !query output
 1	true
 2	false
 3	true
 NULL	true
 
 
--- !query 8
+-- !query
 SELECT f1, f1 IS DISTINCT FROM NULL as `not null` FROM disttable
--- !query 8 schema
+-- !query schema
 struct<f1:int,not null:boolean>
--- !query 8 output
+-- !query output
 1	true
 2	true
 3	true
 NULL	false
 
 
--- !query 9
+-- !query
 SELECT f1, f1 IS DISTINCT FROM f1 as `false` FROM disttable
--- !query 9 schema
+-- !query schema
 struct<f1:int,false:boolean>
--- !query 9 output
+-- !query output
 1	false
 2	false
 3	false
 NULL	false
 
 
--- !query 10
+-- !query
 SELECT f1, f1 IS DISTINCT FROM f1+1 as `not null` FROM disttable
--- !query 10 schema
+-- !query schema
 struct<f1:int,not null:boolean>
--- !query 10 output
+-- !query output
 1	true
 2	true
 3	true
 NULL	false
 
 
--- !query 11
+-- !query
 SELECT 1 IS DISTINCT FROM 2 as `yes`
--- !query 11 schema
+-- !query schema
 struct<yes:boolean>
--- !query 11 output
+-- !query output
 true
 
 
--- !query 12
+-- !query
 SELECT 2 IS DISTINCT FROM 2 as `no`
--- !query 12 schema
+-- !query schema
 struct<no:boolean>
--- !query 12 output
+-- !query output
 false
 
 
--- !query 13
+-- !query
 SELECT 2 IS DISTINCT FROM null as `yes`
--- !query 13 schema
+-- !query schema
 struct<yes:boolean>
--- !query 13 output
+-- !query output
 true
 
 
--- !query 14
+-- !query
 SELECT null IS DISTINCT FROM null as `no`
--- !query 14 schema
+-- !query schema
 struct<no:boolean>
--- !query 14 output
+-- !query output
 false
 
 
--- !query 15
+-- !query
 SELECT 1 IS NOT DISTINCT FROM 2 as `no`
--- !query 15 schema
+-- !query schema
 struct<no:boolean>
--- !query 15 output
+-- !query output
 false
 
 
--- !query 16
+-- !query
 SELECT 2 IS NOT DISTINCT FROM 2 as `yes`
--- !query 16 schema
+-- !query schema
 struct<yes:boolean>
--- !query 16 output
+-- !query output
 true
 
 
--- !query 17
+-- !query
 SELECT 2 IS NOT DISTINCT FROM null as `no`
--- !query 17 schema
+-- !query schema
 struct<no:boolean>
--- !query 17 output
+-- !query output
 false
 
 
--- !query 18
+-- !query
 SELECT null IS NOT DISTINCT FROM null as `yes`
--- !query 18 schema
+-- !query schema
 struct<yes:boolean>
--- !query 18 output
+-- !query output
 true
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
similarity index 65%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/select_having.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
index 02536ebd8ebea..cbf4cfa58cdb9 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
@@ -2,186 +2,186 @@
 -- Number of queries: 22
 
 
--- !query 0
+-- !query
 CREATE TABLE test_having (a int, b int, c string, d string) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 INSERT INTO test_having VALUES (0, 1, 'XXXX', 'A')
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 INSERT INTO test_having VALUES (1, 2, 'AAAA', 'b')
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 INSERT INTO test_having VALUES (2, 2, 'AAAA', 'c')
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 INSERT INTO test_having VALUES (3, 3, 'BBBB', 'D')
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 INSERT INTO test_having VALUES (4, 3, 'BBBB', 'e')
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 INSERT INTO test_having VALUES (5, 3, 'bbbb', 'F')
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 INSERT INTO test_having VALUES (6, 4, 'cccc', 'g')
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 INSERT INTO test_having VALUES (7, 4, 'cccc', 'h')
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j')
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 SELECT b, c FROM test_having
 	GROUP BY b, c HAVING count(*) = 1 ORDER BY b, c
--- !query 11 schema
+-- !query schema
 struct<b:int,c:string>
--- !query 11 output
+-- !query output
 1	XXXX
 3	bbbb
 
 
--- !query 12
+-- !query
 SELECT b, c FROM test_having
 	GROUP BY b, c HAVING b = 3 ORDER BY b, c
--- !query 12 schema
+-- !query schema
 struct<b:int,c:string>
--- !query 12 output
+-- !query output
 3	BBBB
 3	bbbb
 
 
--- !query 13
+-- !query
 SELECT c, max(a) FROM test_having
 	GROUP BY c HAVING count(*) > 2 OR min(a) = max(a)
 	ORDER BY c
--- !query 13 schema
+-- !query schema
 struct<c:string,max(a):int>
--- !query 13 output
+-- !query output
 XXXX	0
 bbbb	5
 
 
--- !query 14
+-- !query
 SELECT min(a), max(a) FROM test_having HAVING min(a) = max(a)
--- !query 14 schema
+-- !query schema
 struct<min(a):int,max(a):int>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 SELECT min(a), max(a) FROM test_having HAVING min(a) < max(a)
--- !query 15 schema
+-- !query schema
 struct<min(a):int,max(a):int>
--- !query 15 output
+-- !query output
 0	9
 
 
--- !query 16
+-- !query
 SELECT a FROM test_having HAVING min(a) < max(a)
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and 'default.test_having.`a`' is not an aggregate function. Wrap '(min(default.test_having.`a`) AS `min(a#x)`, max(default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'default.test_having.`a`' in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 17
+-- !query
 SELECT 1 AS one FROM test_having HAVING a > 1
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`a`' given input columns: [one]; line 1 pos 40
 
 
--- !query 18
+-- !query
 SELECT 1 AS one FROM test_having HAVING 1 > 2
--- !query 18 schema
+-- !query schema
 struct<one:int>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 SELECT 1 AS one FROM test_having HAVING 1 < 2
--- !query 19 schema
+-- !query schema
 struct<one:int>
--- !query 19 output
+-- !query output
 1
 
 
--- !query 20
+-- !query
 SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2
--- !query 20 schema
+-- !query schema
 struct<one:int>
--- !query 20 output
+-- !query output
 1
 
 
--- !query 21
+-- !query
 DROP TABLE test_having
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
old mode 100644
new mode 100755
similarity index 67%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
index 0675820b381da..4ecfabccdf414
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
@@ -2,99 +2,99 @@
 -- Number of queries: 38
 
 
--- !query 0
+-- !query
 CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A')
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b')
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c')
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D')
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e')
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F')
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g')
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h')
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j')
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 SELECT c, count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c
--- !query 11 schema
+-- !query schema
 struct<c:string,count(1):bigint>
--- !query 11 output
+-- !query output
 ABAB	2
 BBBB	2
 CCCC	2
@@ -103,11 +103,11 @@ bbbb	1
 cccc	2
 
 
--- !query 12
+-- !query
 SELECT count(*) FROM test_missing_target GROUP BY test_missing_target.c ORDER BY c
--- !query 12 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 12 output
+-- !query output
 2
 2
 2
@@ -116,43 +116,43 @@ struct<count(1):bigint>
 2
 
 
--- !query 13
+-- !query
 SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`b`' given input columns: [count(1)]; line 1 pos 61
 
 
--- !query 14
+-- !query
 SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b
--- !query 14 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 14 output
+-- !query output
 1
 2
 3
 4
 
 
--- !query 15
+-- !query
 SELECT test_missing_target.b, count(*)
   FROM test_missing_target GROUP BY b ORDER BY b
--- !query 15 schema
+-- !query schema
 struct<b:int,count(1):bigint>
--- !query 15 output
+-- !query output
 1	1
 2	2
 3	3
 4	4
 
 
--- !query 16
+-- !query
 SELECT c FROM test_missing_target ORDER BY a
--- !query 16 schema
+-- !query schema
 struct<c:string>
--- !query 16 output
+-- !query output
 XXXX
 ABAB
 ABAB
@@ -165,30 +165,30 @@ CCCC
 CCCC
 
 
--- !query 17
+-- !query
 SELECT count(*) FROM test_missing_target GROUP BY b ORDER BY b desc
--- !query 17 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 17 output
+-- !query output
 4
 3
 2
 1
 
 
--- !query 18
+-- !query
 SELECT count(*) FROM test_missing_target ORDER BY 1 desc
--- !query 18 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 18 output
+-- !query output
 10
 
 
--- !query 19
+-- !query
 SELECT c, count(*) FROM test_missing_target GROUP BY 1 ORDER BY 1
--- !query 19 schema
+-- !query schema
 struct<c:string,count(1):bigint>
--- !query 19 output
+-- !query output
 ABAB	2
 BBBB	2
 CCCC	2
@@ -197,32 +197,32 @@ bbbb	1
 cccc	2
 
 
--- !query 20
+-- !query
 SELECT c, count(*) FROM test_missing_target GROUP BY 3
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53
 
 
--- !query 21
+-- !query
 SELECT count(*) FROM test_missing_target x, test_missing_target y
 	WHERE x.a = y.a
 	GROUP BY b ORDER BY b
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
 
 
--- !query 22
+-- !query
 SELECT a, a FROM test_missing_target
 	ORDER BY a
--- !query 22 schema
+-- !query schema
 struct<a:int,a:int>
--- !query 22 output
+-- !query output
 0	0
 1	1
 2	2
@@ -235,123 +235,129 @@ struct<a:int,a:int>
 9	9
 
 
--- !query 23
+-- !query
 SELECT a/2, a/2 FROM test_missing_target
 	ORDER BY a/2
--- !query 23 schema
-struct<(a div 2):int,(a div 2):int>
--- !query 23 output
-0	0
-0	0
-1	1
-1	1
-2	2
-2	2
-3	3
-3	3
-4	4
-4	4
-
-
--- !query 24
+-- !query schema
+struct<(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double,(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double>
+-- !query output
+0.0	0.0
+0.5	0.5
+1.0	1.0
+1.5	1.5
+2.0	2.0
+2.5	2.5
+3.0	3.0
+3.5	3.5
+4.0	4.0
+4.5	4.5
+
+
+-- !query
 SELECT a/2, a/2 FROM test_missing_target
 	GROUP BY a/2 ORDER BY a/2
--- !query 24 schema
-struct<(a div 2):int,(a div 2):int>
--- !query 24 output
-0	0
-1	1
-2	2
-3	3
-4	4
-
-
--- !query 25
+-- !query schema
+struct<(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double,(CAST(a AS DOUBLE) / CAST(2 AS DOUBLE)):double>
+-- !query output
+0.0	0.0
+0.5	0.5
+1.0	1.0
+1.5	1.5
+2.0	2.0
+2.5	2.5
+3.0	3.0
+3.5	3.5
+4.0	4.0
+4.5	4.5
+
+
+-- !query
 SELECT x.b, count(*) FROM test_missing_target x, test_missing_target y
 	WHERE x.a = y.a
 	GROUP BY x.b ORDER BY x.b
--- !query 25 schema
+-- !query schema
 struct<b:int,count(1):bigint>
--- !query 25 output
+-- !query output
 1	1
 2	2
 3	3
 4	4
 
 
--- !query 26
+-- !query
 SELECT count(*) FROM test_missing_target x, test_missing_target y
 	WHERE x.a = y.a
 	GROUP BY x.b ORDER BY x.b
--- !query 26 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 26 output
+-- !query output
 1
 2
 3
 4
 
 
--- !query 27
+-- !query
 SELECT a%2, count(b) FROM test_missing_target
 GROUP BY test_missing_target.a%2
 ORDER BY test_missing_target.a%2
--- !query 27 schema
+-- !query schema
 struct<(a % 2):int,count(b):bigint>
--- !query 27 output
+-- !query output
 0	5
 1	5
 
 
--- !query 28
+-- !query
 SELECT count(c) FROM test_missing_target
 GROUP BY lower(test_missing_target.c)
 ORDER BY lower(test_missing_target.c)
--- !query 28 schema
+-- !query schema
 struct<count(c):bigint>
--- !query 28 output
+-- !query output
 2
 3
 4
 1
 
 
--- !query 29
+-- !query
 SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b
--- !query 29 schema
+-- !query schema
 struct<>
--- !query 29 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`b`' given input columns: [count(a)]; line 1 pos 61
 
 
--- !query 30
+-- !query
 SELECT count(b) FROM test_missing_target GROUP BY b/2 ORDER BY b/2
--- !query 30 schema
+-- !query schema
 struct<count(b):bigint>
--- !query 30 output
+-- !query output
 1
-5
+2
+3
 4
 
 
--- !query 31
+-- !query
 SELECT lower(test_missing_target.c), count(c)
   FROM test_missing_target GROUP BY lower(c) ORDER BY lower(c)
--- !query 31 schema
+-- !query schema
 struct<lower(c):string,count(c):bigint>
--- !query 31 output
+-- !query output
 abab	2
 bbbb	3
 cccc	4
 xxxx	1
 
 
--- !query 32
+-- !query
 SELECT a FROM test_missing_target ORDER BY upper(d)
--- !query 32 schema
+-- !query schema
 struct<a:int>
--- !query 32 output
+-- !query output
 0
 1
 2
@@ -364,53 +370,56 @@ struct<a:int>
 9
 
 
--- !query 33
+-- !query
 SELECT count(b) FROM test_missing_target
 	GROUP BY (b + 1) / 2 ORDER BY (b + 1) / 2 desc
--- !query 33 schema
+-- !query schema
 struct<count(b):bigint>
--- !query 33 output
-7
+-- !query output
+4
 3
+2
+1
 
 
--- !query 34
+-- !query
 SELECT count(x.a) FROM test_missing_target x, test_missing_target y
 	WHERE x.a = y.a
 	GROUP BY b/2 ORDER BY b/2
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
 
 
--- !query 35
+-- !query
 SELECT x.b/2, count(x.b) FROM test_missing_target x, test_missing_target y
 	WHERE x.a = y.a
 	GROUP BY x.b/2 ORDER BY x.b/2
--- !query 35 schema
-struct<(b div 2):int,count(b):bigint>
--- !query 35 output
-0	1
-1	5
-2	4
+-- !query schema
+struct<(CAST(b AS DOUBLE) / CAST(2 AS DOUBLE)):double,count(b):bigint>
+-- !query output
+0.5	1
+1.0	2
+1.5	3
+2.0	4
 
 
--- !query 36
+-- !query
 SELECT count(b) FROM test_missing_target x, test_missing_target y
 	WHERE x.a = y.a
 	GROUP BY x.b/2
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13
 
 
--- !query 37
+-- !query
 DROP TABLE test_missing_target
--- !query 37 schema
+-- !query schema
 struct<>
--- !query 37 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
new file mode 100644
index 0000000000000..c30eea8ab689d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
@@ -0,0 +1,998 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 122
+
+
+-- !query
+SELECT 'first line'
+' - next line'
+	' - third line'
+	AS `Three lines to one`
+-- !query schema
+struct<Three lines to one:string>
+-- !query output
+first line - next line - third line
+
+
+-- !query
+SELECT 'first line'
+' - next line' /* this comment is not allowed here */
+' - third line'
+	AS `Illegal comment within continuation`
+-- !query schema
+struct<Illegal comment within continuation:string>
+-- !query output
+first line - next line - third line
+
+
+-- !query
+SELECT binary('\\xDeAdBeEf')
+-- !query schema
+struct<CAST(\xDeAdBeEf AS BINARY):binary>
+-- !query output
+\xDeAdBeEf
+
+
+-- !query
+SELECT binary('\\x De Ad Be Ef ')
+-- !query schema
+struct<CAST(\x De Ad Be Ef  AS BINARY):binary>
+-- !query output
+\x De Ad Be Ef
+
+
+-- !query
+SELECT binary('\\xDe00BeEf')
+-- !query schema
+struct<CAST(\xDe00BeEf AS BINARY):binary>
+-- !query output
+\xDe00BeEf
+
+
+-- !query
+SELECT binary('DeAdBeEf')
+-- !query schema
+struct<CAST(DeAdBeEf AS BINARY):binary>
+-- !query output
+DeAdBeEf
+
+
+-- !query
+SELECT binary('De\\000dBeEf')
+-- !query schema
+struct<CAST(De\000dBeEf AS BINARY):binary>
+-- !query output
+De\000dBeEf
+
+
+-- !query
+SELECT binary('De\\123dBeEf')
+-- !query schema
+struct<CAST(De\123dBeEf AS BINARY):binary>
+-- !query output
+De\123dBeEf
+
+
+-- !query
+SELECT TRIM(BOTH FROM '  bunch o blanks  ') = 'bunch o blanks' AS `bunch o blanks`
+-- !query schema
+struct<bunch o blanks:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT TRIM(LEADING FROM '  bunch o blanks  ') = 'bunch o blanks  ' AS `bunch o blanks  `
+-- !query schema
+struct<bunch o blanks  :boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT TRIM(TRAILING FROM '  bunch o blanks  ') = '  bunch o blanks' AS `  bunch o blanks`
+-- !query schema
+struct<  bunch o blanks:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT TRIM(BOTH 'x' FROM 'xxxxxsome Xsxxxxx') = 'some Xs' AS `some Xs`
+-- !query schema
+struct<some Xs:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS `34567890`
+-- !query schema
+struct<34567890:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS `456`
+-- !query schema
+struct<456:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT POSITION('4' IN '1234567890') = '4' AS `4`
+-- !query schema
+struct<4:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT POSITION('5' IN '1234567890') = '5' AS `5`
+-- !query schema
+struct<5:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS `abc45f`
+-- !query schema
+struct<abc45f:string>
+-- !query output
+abc45f
+
+
+-- !query
+SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5) AS `yabadaba`
+-- !query schema
+struct<yabadaba:string>
+-- !query output
+yabadaba
+
+
+-- !query
+SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS `yabadabadoo`
+-- !query schema
+struct<yabadabadoo:string>
+-- !query output
+yabadabadoo
+
+
+-- !query
+SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4) AS `bubba`
+-- !query schema
+struct<bubba:string>
+-- !query output
+bubba
+
+
+-- !query
+SELECT 'hawkeye' LIKE 'h%' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'hawkeye' NOT LIKE 'h%' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'hawkeye' LIKE 'H%' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'hawkeye' NOT LIKE 'H%' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'hawkeye' LIKE 'indio%' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'hawkeye' NOT LIKE 'indio%' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'hawkeye' LIKE 'h%eye' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'hawkeye' NOT LIKE 'h%eye' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'indio' LIKE '_ndio' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'indio' NOT LIKE '_ndio' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'indio' LIKE 'in__o' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'indio' NOT LIKE 'in__o' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'indio' LIKE 'in_o' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'indio' NOT LIKE 'in_o' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'hawkeye' LIKE 'h%' ESCAPE '#' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'hawkeye' NOT LIKE 'h%' ESCAPE '#' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'indio' LIKE 'ind_o' ESCAPE '$' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'indio' NOT LIKE 'ind_o' ESCAPE '$' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'h%' LIKE 'h#%' ESCAPE '#' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'h%' NOT LIKE 'h#%' ESCAPE '#' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'h%wkeye' LIKE 'h#%' ESCAPE '#' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'h%wkeye' NOT LIKE 'h#%' ESCAPE '#' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'h%wkeye' LIKE 'h#%%' ESCAPE '#' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'h%wkeye' NOT LIKE 'h#%%' ESCAPE '#' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'h%awkeye' LIKE 'h#%a%k%e' ESCAPE '#' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'h%awkeye' NOT LIKE 'h#%a%k%e' ESCAPE '#' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'indio' LIKE '_ndio' ESCAPE '$' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'indio' NOT LIKE '_ndio' ESCAPE '$' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'i_dio' LIKE 'i$_d_o' ESCAPE '$' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'i_dio' NOT LIKE 'i$_d_o' ESCAPE '$' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'i_dio' LIKE 'i$_nd_o' ESCAPE '$' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'i_dio' NOT LIKE 'i$_nd_o' ESCAPE '$' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'i_dio' LIKE 'i$_d%o' ESCAPE '$' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'i_dio' NOT LIKE 'i$_d%o' ESCAPE '$' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'maca' LIKE 'm%aca' ESCAPE '%' AS `true`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a';
+
+
+-- !query
+SELECT 'maca' NOT LIKE 'm%aca' ESCAPE '%' AS `false`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a';
+
+
+-- !query
+SELECT 'ma%a' LIKE 'm%a%%a' ESCAPE '%' AS `true`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a';
+
+
+-- !query
+SELECT 'ma%a' NOT LIKE 'm%a%%a' ESCAPE '%' AS `false`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a';
+
+
+-- !query
+SELECT 'bear' LIKE 'b_ear' ESCAPE '_' AS `true`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e';
+
+
+-- !query
+SELECT 'bear' NOT LIKE 'b_ear' ESCAPE '_' AS `false`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e';
+
+
+-- !query
+SELECT 'be_r' LIKE 'b_e__r' ESCAPE '_' AS `true`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e';
+
+
+-- !query
+SELECT 'be_r' NOT LIKE 'b_e__r' ESCAPE '_' AS `false`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e';
+
+
+-- !query
+SELECT 'be_r' LIKE '__e__r' ESCAPE '_' AS `false`
+-- !query schema
+struct<false:boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT 'be_r' NOT LIKE '__e__r' ESCAPE '_' AS `true`
+-- !query schema
+struct<true:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f
+-- !query schema
+struct<t:boolean,t:boolean,f:boolean>
+-- !query output
+true	true	false
+
+
+-- !query
+SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f
+-- !query schema
+struct<t:boolean,t:boolean,f:boolean>
+-- !query output
+true	true	false
+
+
+-- !query
+SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f
+-- !query schema
+struct<t:boolean,t:boolean,f:boolean>
+-- !query output
+true	true	false
+
+
+-- !query
+SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f
+-- !query schema
+struct<t:boolean,t:boolean,f:boolean>
+-- !query output
+true	true	false
+
+
+-- !query
+SELECT 'jack' LIKE '%____%' AS t
+-- !query schema
+struct<t:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT 'unknown' || ' and unknown' AS `Concat unknown types`
+-- !query schema
+struct<Concat unknown types:string>
+-- !query output
+unknown and unknown
+
+
+-- !query
+SELECT string('text') || ' and unknown' AS `Concat text to unknown type`
+-- !query schema
+struct<Concat text to unknown type:string>
+-- !query output
+text and unknown
+
+
+-- !query
+CREATE TABLE toasttest(f1 string) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into toasttest values(repeat('1234567890',10000))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into toasttest values(repeat('1234567890',10000))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into toasttest values(repeat('1234567890',10000))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into toasttest values(repeat('1234567890',10000))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT substr(f1, 99995) from toasttest
+-- !query schema
+struct<substring(f1, 99995, 2147483647):string>
+-- !query output
+567890
+567890
+567890
+567890
+
+
+-- !query
+SELECT substr(f1, 99995, 10) from toasttest
+-- !query schema
+struct<substring(f1, 99995, 10):string>
+-- !query output
+567890
+567890
+567890
+567890
+
+
+-- !query
+SELECT length('abcdef') AS `length_6`
+-- !query schema
+struct<length_6:int>
+-- !query output
+6
+
+
+-- !query
+SELECT position('cd', 'abcdef') AS `pos_3`
+-- !query schema
+struct<pos_3:int>
+-- !query output
+3
+
+
+-- !query
+SELECT position('xy', 'abcdef') AS `pos_0`
+-- !query schema
+struct<pos_0:int>
+-- !query output
+0
+
+
+-- !query
+SELECT replace('abcdef', 'de', '45') AS `abc45f`
+-- !query schema
+struct<abc45f:string>
+-- !query output
+abc45f
+
+
+-- !query
+SELECT replace('yabadabadoo', 'ba', '123') AS `ya123da123doo`
+-- !query schema
+struct<ya123da123doo:string>
+-- !query output
+ya123da123doo
+
+
+-- !query
+SELECT replace('yabadoo', 'bad', '') AS `yaoo`
+-- !query schema
+struct<yaoo:string>
+-- !query output
+yaoo
+
+
+-- !query
+select hex(256*256*256 - 1) AS `ffffff`
+-- !query schema
+struct<ffffff:string>
+-- !query output
+FFFFFF
+
+
+-- !query
+select hex(bigint(bigint(bigint(bigint(256)*256)*256)*256) - 1) AS `ffffffff`
+-- !query schema
+struct<ffffffff:string>
+-- !query output
+FFFFFFFF
+
+
+-- !query
+select md5('') = 'd41d8cd98f00b204e9800998ecf8427e' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5('a') = '0cc175b9c0f1b6a831c399e269772661' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5('abc') = '900150983cd24fb0d6963f7d28e17f72' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5('message digest') = 'f96b697d7cb7938d525a2f31aaf161d0' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5('abcdefghijklmnopqrstuvwxyz') = 'c3fcd3d76192e4007dfb496cca67e13b' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789') = 'd174ab98d277d9f5a5611c2c9f419d9f' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890') = '57edf4a22be3c955ac49da2e2107b67a' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5(binary('')) = 'd41d8cd98f00b204e9800998ecf8427e' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5(binary('a')) = '0cc175b9c0f1b6a831c399e269772661' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5(binary('abc')) = '900150983cd24fb0d6963f7d28e17f72' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5(binary('message digest')) = 'f96b697d7cb7938d525a2f31aaf161d0' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5(binary('abcdefghijklmnopqrstuvwxyz')) = 'c3fcd3d76192e4007dfb496cca67e13b' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5(binary('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')) = 'd174ab98d277d9f5a5611c2c9f419d9f' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+select md5(binary('12345678901234567890123456789012345678901234567890123456789012345678901234567890')) = '57edf4a22be3c955ac49da2e2107b67a' AS `TRUE`
+-- !query schema
+struct<TRUE:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT initcap('hi THOMAS')
+-- !query schema
+struct<initcap(hi THOMAS):string>
+-- !query output
+Hi Thomas
+
+
+-- !query
+SELECT lpad('hi', 5, 'xy')
+-- !query schema
+struct<lpad(hi, 5, xy):string>
+-- !query output
+xyxhi
+
+
+-- !query
+SELECT lpad('hi', 5)
+-- !query schema
+struct<lpad(hi, 5,  ):string>
+-- !query output
+   hi
+
+
+-- !query
+SELECT lpad('hi', -5, 'xy')
+-- !query schema
+struct<lpad(hi, -5, xy):string>
+-- !query output
+
+
+
+-- !query
+SELECT lpad('hello', 2)
+-- !query schema
+struct<lpad(hello, 2,  ):string>
+-- !query output
+he
+
+
+-- !query
+SELECT lpad('hi', 5, '')
+-- !query schema
+struct<lpad(hi, 5, ):string>
+-- !query output
+hi
+
+
+-- !query
+SELECT rpad('hi', 5, 'xy')
+-- !query schema
+struct<rpad(hi, 5, xy):string>
+-- !query output
+hixyx
+
+
+-- !query
+SELECT rpad('hi', 5)
+-- !query schema
+struct<rpad(hi, 5,  ):string>
+-- !query output
+hi
+
+
+-- !query
+SELECT rpad('hi', -5, 'xy')
+-- !query schema
+struct<rpad(hi, -5, xy):string>
+-- !query output
+
+
+
+-- !query
+SELECT rpad('hello', 2)
+-- !query schema
+struct<rpad(hello, 2,  ):string>
+-- !query output
+he
+
+
+-- !query
+SELECT rpad('hi', 5, '')
+-- !query schema
+struct<rpad(hi, 5, ):string>
+-- !query output
+hi
+
+
+-- !query
+SELECT ltrim('zzzytrim', 'xyz')
+-- !query schema
+struct<ltrim(zzzytrim, xyz):string>
+-- !query output
+trim
+
+
+-- !query
+SELECT translate('', '14', 'ax')
+-- !query schema
+struct<translate(, 14, ax):string>
+-- !query output
+
+
+
+-- !query
+SELECT translate('12345', '14', 'ax')
+-- !query schema
+struct<translate(12345, 14, ax):string>
+-- !query output
+a23x5
+
+
+-- !query
+SELECT ascii('x')
+-- !query schema
+struct<ascii(x):int>
+-- !query output
+120
+
+
+-- !query
+SELECT ascii('')
+-- !query schema
+struct<ascii():int>
+-- !query output
+0
+
+
+-- !query
+SELECT chr(65)
+-- !query schema
+struct<chr(CAST(65 AS BIGINT)):string>
+-- !query output
+A
+
+
+-- !query
+SELECT chr(0)
+-- !query schema
+struct<chr(CAST(0 AS BIGINT)):string>
+-- !query output
+ 
+
+
+-- !query
+SELECT repeat('Pg', 4)
+-- !query schema
+struct<repeat(Pg, 4):string>
+-- !query output
+PgPgPgPg
+
+
+-- !query
+SELECT repeat('Pg', -4)
+-- !query schema
+struct<repeat(Pg, -4):string>
+-- !query output
+
+
+
+-- !query
+SELECT trim(binary('\\000') from binary('\\000Tom\\000'))
+-- !query schema
+struct<trim(CAST(CAST(\000Tom\000 AS BINARY) AS STRING), CAST(CAST(\000 AS BINARY) AS STRING)):string>
+-- !query output
+Tom
+
+
+-- !query
+DROP TABLE toasttest
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
old mode 100644
new mode 100755
similarity index 63%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/text.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
index 352b0232e8945..ccca1ba8cd8b4
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/text.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
@@ -1,168 +1,162 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 44
+-- Number of queries: 42
 
 
--- !query 0
+-- !query
 SELECT string('this is a text string') = string('this is a text string') AS true
--- !query 0 schema
+-- !query schema
 struct<true:boolean>
--- !query 0 output
+-- !query output
 true
 
 
--- !query 1
+-- !query
 SELECT string('this is a text string') = string('this is a text strin') AS `false`
--- !query 1 schema
+-- !query schema
 struct<false:boolean>
--- !query 1 output
+-- !query output
 false
 
 
--- !query 2
+-- !query
 CREATE TABLE TEXT_TBL (f1 string) USING parquet
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 INSERT INTO TEXT_TBL VALUES ('doh!')
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 INSERT INTO TEXT_TBL VALUES ('hi de ho neighbor')
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT '' AS two, * FROM TEXT_TBL
--- !query 5 schema
+-- !query schema
 struct<two:string,f1:string>
--- !query 5 output
+-- !query output
 	doh!
 	hi de ho neighbor
 
 
--- !query 6
+-- !query
 select length(42)
--- !query 6 schema
+-- !query schema
 struct<length(CAST(42 AS STRING)):int>
--- !query 6 output
+-- !query output
 2
 
 
--- !query 7
+-- !query
 select string('four: ') || 2+2
--- !query 7 schema
-struct<(CAST(concat(CAST(four:  AS STRING), CAST(2 AS STRING)) AS DOUBLE) + CAST(2 AS DOUBLE)):double>
--- !query 7 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: four: 2
 
 
--- !query 8
+-- !query
 select 'four: ' || 2+2
--- !query 8 schema
-struct<(CAST(concat(four: , CAST(2 AS STRING)) AS DOUBLE) + CAST(2 AS DOUBLE)):double>
--- !query 8 output
-NULL
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: four: 2
 
 
--- !query 9
+-- !query
 select 3 || 4.0
--- !query 9 schema
+-- !query schema
 struct<concat(CAST(3 AS STRING), CAST(4.0 AS STRING)):string>
--- !query 9 output
+-- !query output
 34.0
 
 
--- !query 10
+-- !query
 /*
  * various string functions
  */
 select concat('one')
--- !query 10 schema
+-- !query schema
 struct<concat(one):string>
--- !query 10 output
+-- !query output
 one
 
 
--- !query 11
+-- !query
 select concat(1,2,3,'hello',true, false, to_date('20100309','yyyyMMdd'))
--- !query 11 schema
+-- !query schema
 struct<concat(CAST(1 AS STRING), CAST(2 AS STRING), CAST(3 AS STRING), hello, CAST(true AS STRING), CAST(false AS STRING), CAST(to_date('20100309', 'yyyyMMdd') AS STRING)):string>
--- !query 11 output
+-- !query output
 123hellotruefalse2010-03-09
 
 
--- !query 12
+-- !query
 select concat_ws('#','one')
--- !query 12 schema
+-- !query schema
 struct<concat_ws(#, one):string>
--- !query 12 output
+-- !query output
 one
 
 
--- !query 13
+-- !query
 select concat_ws('#',1,2,3,'hello',true, false, to_date('20100309','yyyyMMdd'))
--- !query 13 schema
+-- !query schema
 struct<concat_ws(#, CAST(1 AS STRING), CAST(2 AS STRING), CAST(3 AS STRING), hello, CAST(true AS STRING), CAST(false AS STRING), CAST(to_date('20100309', 'yyyyMMdd') AS STRING)):string>
--- !query 13 output
+-- !query output
 1#x#x#hello#true#false#x-03-09
 
 
--- !query 14
+-- !query
 select concat_ws(',',10,20,null,30)
--- !query 14 schema
+-- !query schema
 struct<concat_ws(,, CAST(10 AS STRING), CAST(20 AS STRING), NULL, CAST(30 AS STRING)):string>
--- !query 14 output
+-- !query output
 10,20,30
 
 
--- !query 15
+-- !query
 select concat_ws('',10,20,null,30)
--- !query 15 schema
+-- !query schema
 struct<concat_ws(, CAST(10 AS STRING), CAST(20 AS STRING), NULL, CAST(30 AS STRING)):string>
--- !query 15 output
+-- !query output
 102030
 
 
--- !query 16
+-- !query
 select concat_ws(NULL,10,20,null,30) is null
--- !query 16 schema
+-- !query schema
 struct<(concat_ws(CAST(NULL AS STRING), CAST(10 AS STRING), CAST(20 AS STRING), NULL, CAST(30 AS STRING)) IS NULL):boolean>
--- !query 16 output
+-- !query output
 true
 
 
--- !query 17
+-- !query
 select reverse('abcde')
--- !query 17 schema
+-- !query schema
 struct<reverse(abcde):string>
--- !query 17 output
+-- !query output
 edcba
 
 
--- !query 18
-set spark.sql.parser.ansi.enabled=false
--- !query 18 schema
-struct<key:string,value:string>
--- !query 18 output
-spark.sql.parser.ansi.enabled	false
-
-
--- !query 19
+-- !query
 select i, left('ahoj', i), right('ahoj', i) from range(-5, 6) t(i) order by i
--- !query 19 schema
+-- !query schema
 struct<i:bigint,left('ahoj', t.`i`):string,right('ahoj', t.`i`):string>
--- !query 19 output
+-- !query output
 -5		
 -4		
 -3		
@@ -176,200 +170,192 @@ struct<i:bigint,left('ahoj', t.`i`):string,right('ahoj', t.`i`):string>
 5	ahoj	ahoj
 
 
--- !query 20
-set spark.sql.parser.ansi.enabled=true
--- !query 20 schema
-struct<key:string,value:string>
--- !query 20 output
-spark.sql.parser.ansi.enabled	true
-
-
--- !query 21
+-- !query
 /*
  * format
  */
 select format_string(NULL)
--- !query 21 schema
+-- !query schema
 struct<format_string(CAST(NULL AS STRING)):string>
--- !query 21 output
+-- !query output
 NULL
 
 
--- !query 22
+-- !query
 select format_string('Hello')
--- !query 22 schema
+-- !query schema
 struct<format_string(Hello):string>
--- !query 22 output
+-- !query output
 Hello
 
 
--- !query 23
+-- !query
 select format_string('Hello %s', 'World')
--- !query 23 schema
+-- !query schema
 struct<format_string(Hello %s, World):string>
--- !query 23 output
+-- !query output
 Hello World
 
 
--- !query 24
+-- !query
 select format_string('Hello %%')
--- !query 24 schema
+-- !query schema
 struct<format_string(Hello %%):string>
--- !query 24 output
+-- !query output
 Hello %
 
 
--- !query 25
+-- !query
 select format_string('Hello %%%%')
--- !query 25 schema
+-- !query schema
 struct<format_string(Hello %%%%):string>
--- !query 25 output
+-- !query output
 Hello %%
 
 
--- !query 26
+-- !query
 select format_string('Hello %s %s', 'World')
--- !query 26 schema
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 java.util.MissingFormatArgumentException
 Format specifier '%s'
 
 
--- !query 27
+-- !query
 select format_string('Hello %s')
--- !query 27 schema
+-- !query schema
 struct<>
--- !query 27 output
+-- !query output
 java.util.MissingFormatArgumentException
 Format specifier '%s'
 
 
--- !query 28
+-- !query
 select format_string('Hello %x', 20)
--- !query 28 schema
+-- !query schema
 struct<format_string(Hello %x, 20):string>
--- !query 28 output
+-- !query output
 Hello 14
 
 
--- !query 29
+-- !query
 select format_string('%1$s %3$s', 1, 2, 3)
--- !query 29 schema
+-- !query schema
 struct<format_string(%1$s %3$s, 1, 2, 3):string>
--- !query 29 output
+-- !query output
 1 3
 
 
--- !query 30
+-- !query
 select format_string('%1$s %12$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
--- !query 30 schema
+-- !query schema
 struct<format_string(%1$s %12$s, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12):string>
--- !query 30 output
+-- !query output
 1 12
 
 
--- !query 31
+-- !query
 select format_string('%1$s %4$s', 1, 2, 3)
--- !query 31 schema
+-- !query schema
 struct<>
--- !query 31 output
+-- !query output
 java.util.MissingFormatArgumentException
 Format specifier '%4$s'
 
 
--- !query 32
+-- !query
 select format_string('%1$s %13$s', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
--- !query 32 schema
+-- !query schema
 struct<>
--- !query 32 output
+-- !query output
 java.util.MissingFormatArgumentException
 Format specifier '%13$s'
 
 
--- !query 33
+-- !query
 select format_string('%0$s', 'Hello')
--- !query 33 schema
+-- !query schema
 struct<format_string(%0$s, Hello):string>
--- !query 33 output
+-- !query output
 Hello
 
 
--- !query 34
+-- !query
 select format_string('Hello %s %1$s %s', 'World', 'Hello again')
--- !query 34 schema
+-- !query schema
 struct<format_string(Hello %s %1$s %s, World, Hello again):string>
--- !query 34 output
+-- !query output
 Hello World World Hello again
 
 
--- !query 35
+-- !query
 select format_string('Hello %s %s, %2$s %2$s', 'World', 'Hello again')
--- !query 35 schema
+-- !query schema
 struct<format_string(Hello %s %s, %2$s %2$s, World, Hello again):string>
--- !query 35 output
+-- !query output
 Hello World Hello again, Hello again Hello again
 
 
--- !query 36
+-- !query
 select format_string('>>%10s<<', 'Hello')
--- !query 36 schema
+-- !query schema
 struct<format_string(>>%10s<<, Hello):string>
--- !query 36 output
+-- !query output
 >>     Hello<<
 
 
--- !query 37
+-- !query
 select format_string('>>%10s<<', NULL)
--- !query 37 schema
+-- !query schema
 struct<format_string(>>%10s<<, NULL):string>
--- !query 37 output
+-- !query output
 >>      null<<
 
 
--- !query 38
+-- !query
 select format_string('>>%10s<<', '')
--- !query 38 schema
+-- !query schema
 struct<format_string(>>%10s<<, ):string>
--- !query 38 output
+-- !query output
 >>          <<
 
 
--- !query 39
+-- !query
 select format_string('>>%-10s<<', '')
--- !query 39 schema
+-- !query schema
 struct<format_string(>>%-10s<<, ):string>
--- !query 39 output
+-- !query output
 >>          <<
 
 
--- !query 40
+-- !query
 select format_string('>>%-10s<<', 'Hello')
--- !query 40 schema
+-- !query schema
 struct<format_string(>>%-10s<<, Hello):string>
--- !query 40 output
+-- !query output
 >>Hello     <<
 
 
--- !query 41
+-- !query
 select format_string('>>%-10s<<', NULL)
--- !query 41 schema
+-- !query schema
 struct<format_string(>>%-10s<<, NULL):string>
--- !query 41 output
+-- !query output
 >>null      <<
 
 
--- !query 42
+-- !query
 select format_string('>>%1$10s<<', 'Hello')
--- !query 42 schema
+-- !query schema
 struct<format_string(>>%1$10s<<, Hello):string>
--- !query 42 output
+-- !query output
 >>     Hello<<
 
 
--- !query 43
+-- !query
 DROP TABLE TEXT_TBL
--- !query 43 schema
+-- !query schema
 struct<>
--- !query 43 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out
new file mode 100644
index 0000000000000..75ea3f3c42932
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out
@@ -0,0 +1,311 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 32
+
+
+-- !query
+CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('now'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('now'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('today'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('yesterday'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('tomorrow'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('tomorrow EST'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('tomorrow Zulu'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'today'
+-- !query schema
+struct<One:bigint>
+-- !query output
+1
+
+
+-- !query
+SELECT count(*) AS Three FROM TIMESTAMP_TBL WHERE d1 = timestamp 'tomorrow'
+-- !query schema
+struct<Three:bigint>
+-- !query output
+3
+
+
+-- !query
+SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'yesterday'
+-- !query schema
+struct<One:bigint>
+-- !query output
+1
+
+
+-- !query
+TRUNCATE TABLE TIMESTAMP_TBL
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('epoch'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('1997-01-02'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('1997-01-02 03:04:05'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('1997-02-10 17:32:01-08'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO TIMESTAMP_TBL VALUES (timestamp('2001-09-22T18:19:20'))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT '' AS `64`, d1 FROM TIMESTAMP_TBL
+-- !query schema
+struct<64:string,d1:timestamp>
+-- !query output
+	1969-12-31 16:00:00
+	1997-01-02 00:00:00
+	1997-01-02 03:04:05
+	1997-02-10 17:32:01
+	2001-09-22 18:19:20
+
+
+-- !query
+SELECT '' AS `48`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 > timestamp '1997-01-02'
+-- !query schema
+struct<48:string,d1:timestamp>
+-- !query output
+	1997-01-02 03:04:05
+	1997-02-10 17:32:01
+	2001-09-22 18:19:20
+
+
+-- !query
+SELECT '' AS `15`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 < timestamp '1997-01-02'
+-- !query schema
+struct<15:string,d1:timestamp>
+-- !query output
+	1969-12-31 16:00:00
+
+
+-- !query
+SELECT '' AS one, d1 FROM TIMESTAMP_TBL
+   WHERE d1 = timestamp '1997-01-02'
+-- !query schema
+struct<one:string,d1:timestamp>
+-- !query output
+	1997-01-02 00:00:00
+
+
+-- !query
+SELECT '' AS `63`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 != timestamp '1997-01-02'
+-- !query schema
+struct<63:string,d1:timestamp>
+-- !query output
+	1969-12-31 16:00:00
+	1997-01-02 03:04:05
+	1997-02-10 17:32:01
+	2001-09-22 18:19:20
+
+
+-- !query
+SELECT '' AS `16`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 <= timestamp '1997-01-02'
+-- !query schema
+struct<16:string,d1:timestamp>
+-- !query output
+	1969-12-31 16:00:00
+	1997-01-02 00:00:00
+
+
+-- !query
+SELECT '' AS `49`, d1 FROM TIMESTAMP_TBL
+   WHERE d1 >= timestamp '1997-01-02'
+-- !query schema
+struct<49:string,d1:timestamp>
+-- !query output
+	1997-01-02 00:00:00
+	1997-01-02 03:04:05
+	1997-02-10 17:32:01
+	2001-09-22 18:19:20
+
+
+-- !query
+SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+   FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
+-- !query schema
+struct<54:string,diff:interval>
+-- !query output
+	-236720 hours
+	0 seconds
+	3 hours 4 minutes 5 seconds
+	41393 hours 19 minutes 20 seconds
+	953 hours 32 minutes 1 seconds
+
+
+-- !query
+SELECT '' AS date_trunc_week, date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc
+-- !query schema
+struct<date_trunc_week:string,week_trunc:timestamp>
+-- !query output
+	2004-02-23 00:00:00
+
+
+-- !query
+SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+  FROM TIMESTAMP_TBL
+  WHERE d1 BETWEEN timestamp '1902-01-01'
+   AND timestamp '2038-01-01'
+-- !query schema
+struct<54:string,diff:interval>
+-- !query output
+	-236720 hours
+	0 seconds
+	3 hours 4 minutes 5 seconds
+	41393 hours 19 minutes 20 seconds
+	953 hours 32 minutes 1 seconds
+
+
+-- !query
+SELECT '' AS `54`, d1 as `timestamp`,
+    date_part( 'year', d1) AS `year`, date_part( 'month', d1) AS `month`,
+    date_part( 'day', d1) AS `day`, date_part( 'hour', d1) AS `hour`,
+    date_part( 'minute', d1) AS `minute`, date_part( 'second', d1) AS `second`
+    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
+-- !query schema
+struct<54:string,timestamp:timestamp,year:int,month:int,day:int,hour:int,minute:int,second:decimal(8,6)>
+-- !query output
+	1969-12-31 16:00:00	1969	12	31	16	0	0.000000
+	1997-01-02 00:00:00	1997	1	2	0	0	0.000000
+	1997-01-02 03:04:05	1997	1	2	3	4	5.000000
+	1997-02-10 17:32:01	1997	2	10	17	32	1.000000
+	2001-09-22 18:19:20	2001	9	22	18	19	20.000000
+
+
+-- !query
+SELECT '' AS `54`, d1 as `timestamp`,
+    date_part( 'quarter', d1) AS quarter, date_part( 'msec', d1) AS msec,
+    date_part( 'usec', d1) AS usec
+    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
+-- !query schema
+struct<54:string,timestamp:timestamp,quarter:int,msec:decimal(8,3),usec:int>
+-- !query output
+	1969-12-31 16:00:00	4	0.000	0
+	1997-01-02 00:00:00	1	0.000	0
+	1997-01-02 03:04:05	1	5000.000	5000000
+	1997-02-10 17:32:01	1	1000.000	1000000
+	2001-09-22 18:19:20	3	20000.000	20000000
+
+
+-- !query
+SELECT '' AS `54`, d1 as `timestamp`,
+    date_part( 'isoyear', d1) AS isoyear, date_part( 'week', d1) AS week,
+    date_part( 'dow', d1) AS dow
+    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
+-- !query schema
+struct<54:string,timestamp:timestamp,isoyear:int,week:int,dow:int>
+-- !query output
+	1969-12-31 16:00:00	1970	1	3
+	1997-01-02 00:00:00	1997	1	4
+	1997-01-02 03:04:05	1997	1	4
+	1997-02-10 17:32:01	1997	7	1
+	2001-09-22 18:19:20	2001	38	6
+
+
+-- !query
+SELECT make_timestamp(2014,12,28,6,30,45.887)
+-- !query schema
+struct<make_timestamp(2014, 12, 28, 6, 30, CAST(45.887 AS DECIMAL(8,6))):timestamp>
+-- !query output
+2014-12-28 06:30:45.887
+
+
+-- !query
+DROP TABLE TIMESTAMP_TBL
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/union.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
similarity index 70%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/union.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
index 05dedc547086e..2fe53055cf656 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
@@ -2,17 +2,17 @@
 -- Number of queries: 72
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
   AS v(f1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
   (VALUES
     (123, 456),
@@ -21,186 +21,186 @@ CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
     (4567890123456789, 4567890123456789),
     (4567890123456789, -4567890123456789))
   AS v(q1, q2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW FLOAT8_TBL AS SELECT * FROM
   (VALUES (0.0), (-34.84), (-1004.30),
     (CAST('-1.2345678901234e+200' AS DOUBLE)), (CAST('-1.2345678901234e-200' AS DOUBLE)))
   AS v(f1)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT 1 AS two UNION SELECT 2 ORDER BY 1
--- !query 3 schema
+-- !query schema
 struct<two:int>
--- !query 3 output
+-- !query output
 1
 2
 
 
--- !query 4
+-- !query
 SELECT 1 AS one UNION SELECT 1 ORDER BY 1
--- !query 4 schema
+-- !query schema
 struct<one:int>
--- !query 4 output
+-- !query output
 1
 
 
--- !query 5
+-- !query
 SELECT 1 AS two UNION ALL SELECT 2
--- !query 5 schema
+-- !query schema
 struct<two:int>
--- !query 5 output
+-- !query output
 1
 2
 
 
--- !query 6
+-- !query
 SELECT 1 AS two UNION ALL SELECT 1
--- !query 6 schema
+-- !query schema
 struct<two:int>
--- !query 6 output
+-- !query output
 1
 1
 
 
--- !query 7
+-- !query
 SELECT 1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1
--- !query 7 schema
+-- !query schema
 struct<three:int>
--- !query 7 output
+-- !query output
 1
 2
 3
 
 
--- !query 8
+-- !query
 SELECT 1 AS two UNION SELECT 2 UNION SELECT 2 ORDER BY 1
--- !query 8 schema
+-- !query schema
 struct<two:int>
--- !query 8 output
+-- !query output
 1
 2
 
 
--- !query 9
+-- !query
 SELECT 1 AS three UNION SELECT 2 UNION ALL SELECT 2 ORDER BY 1
--- !query 9 schema
+-- !query schema
 struct<three:int>
--- !query 9 output
+-- !query output
 1
 2
 2
 
 
--- !query 10
+-- !query
 SELECT 1.1 AS two UNION SELECT 2.2 ORDER BY 1
--- !query 10 schema
+-- !query schema
 struct<two:decimal(2,1)>
--- !query 10 output
+-- !query output
 1.1
 2.2
 
 
--- !query 11
+-- !query
 SELECT 1.1 AS two UNION SELECT 2 ORDER BY 1
--- !query 11 schema
+-- !query schema
 struct<two:decimal(11,1)>
--- !query 11 output
+-- !query output
 1.1
-2
+2.0
 
 
--- !query 12
+-- !query
 SELECT 1 AS two UNION SELECT 2.2 ORDER BY 1
--- !query 12 schema
+-- !query schema
 struct<two:decimal(11,1)>
--- !query 12 output
-1
+-- !query output
+1.0
 2.2
 
 
--- !query 13
+-- !query
 SELECT 1 AS one UNION SELECT double(1.0) ORDER BY 1
--- !query 13 schema
+-- !query schema
 struct<one:double>
--- !query 13 output
+-- !query output
 1.0
 
 
--- !query 14
+-- !query
 SELECT 1.1 AS two UNION ALL SELECT 2 ORDER BY 1
--- !query 14 schema
+-- !query schema
 struct<two:decimal(11,1)>
--- !query 14 output
+-- !query output
 1.1
-2
+2.0
 
 
--- !query 15
+-- !query
 SELECT double(1.0) AS two UNION ALL SELECT 1 ORDER BY 1
--- !query 15 schema
+-- !query schema
 struct<two:double>
--- !query 15 output
+-- !query output
 1.0
 1.0
 
 
--- !query 16
+-- !query
 SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1
--- !query 16 schema
+-- !query schema
 struct<three:decimal(11,1)>
--- !query 16 output
+-- !query output
 1.1
-2
-3
+2.0
+3.0
 
 
--- !query 17
+-- !query
 SELECT double(1.1) AS two UNION SELECT 2 UNION SELECT double(2.0) ORDER BY 1
--- !query 17 schema
+-- !query schema
 struct<two:double>
--- !query 17 output
+-- !query output
 1.1
 2.0
 
 
--- !query 18
+-- !query
 SELECT 1.1 AS three UNION SELECT 2 UNION ALL SELECT 2 ORDER BY 1
--- !query 18 schema
+-- !query schema
 struct<three:decimal(11,1)>
--- !query 18 output
+-- !query output
 1.1
-2
-2
+2.0
+2.0
 
 
--- !query 19
+-- !query
 SELECT 1.1 AS two UNION (SELECT 2 UNION ALL SELECT 2) ORDER BY 1
--- !query 19 schema
+-- !query schema
 struct<two:decimal(11,1)>
--- !query 19 output
+-- !query output
 1.1
-2
+2.0
 
 
--- !query 20
+-- !query
 SELECT f1 AS five FROM FLOAT8_TBL
 UNION
 SELECT f1 FROM FLOAT8_TBL
 ORDER BY 1
--- !query 20 schema
+-- !query schema
 struct<five:double>
--- !query 20 output
+-- !query output
 -1.2345678901234E200
 -1004.3
 -34.84
@@ -208,13 +208,13 @@ struct<five:double>
 0.0
 
 
--- !query 21
+-- !query
 SELECT f1 AS ten FROM FLOAT8_TBL
 UNION ALL
 SELECT f1 FROM FLOAT8_TBL
--- !query 21 schema
+-- !query schema
 struct<ten:double>
--- !query 21 output
+-- !query output
 -1.2345678901234E-200
 -1.2345678901234E-200
 -1.2345678901234E200
@@ -227,14 +227,14 @@ struct<ten:double>
 0.0
 
 
--- !query 22
+-- !query
 SELECT f1 AS nine FROM FLOAT8_TBL
 UNION
 SELECT f1 FROM INT4_TBL
 ORDER BY 1
--- !query 22 schema
+-- !query schema
 struct<nine:double>
--- !query 22 output
+-- !query output
 -1.2345678901234E200
 -2.147483647E9
 -123456.0
@@ -246,13 +246,13 @@ struct<nine:double>
 2.147483647E9
 
 
--- !query 23
+-- !query
 SELECT f1 AS ten FROM FLOAT8_TBL
 UNION ALL
 SELECT f1 FROM INT4_TBL
--- !query 23 schema
+-- !query schema
 struct<ten:double>
--- !query 23 output
+-- !query output
 -1.2345678901234E-200
 -1.2345678901234E200
 -1004.3
@@ -265,16 +265,16 @@ struct<ten:double>
 2.147483647E9
 
 
--- !query 24
+-- !query
 SELECT f1 AS five FROM FLOAT8_TBL
   WHERE f1 BETWEEN -1e6 AND 1e6
 UNION
 SELECT f1 FROM INT4_TBL
   WHERE f1 BETWEEN 0 AND 1000000
 ORDER BY 1
--- !query 24 schema
+-- !query schema
 struct<five:double>
--- !query 24 output
+-- !query output
 -1004.3
 -34.84
 -1.2345678901234E-200
@@ -282,170 +282,170 @@ struct<five:double>
 123456.0
 
 
--- !query 25
+-- !query
 SELECT q2 FROM int8_tbl INTERSECT SELECT q1 FROM int8_tbl ORDER BY 1
--- !query 25 schema
+-- !query schema
 struct<q2:bigint>
--- !query 25 output
+-- !query output
 123
 4567890123456789
 
 
--- !query 26
+-- !query
 SELECT q2 FROM int8_tbl INTERSECT ALL SELECT q1 FROM int8_tbl ORDER BY 1
--- !query 26 schema
+-- !query schema
 struct<q2:bigint>
--- !query 26 output
+-- !query output
 123
 4567890123456789
 4567890123456789
 
 
--- !query 27
+-- !query
 SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1
--- !query 27 schema
+-- !query schema
 struct<q2:bigint>
--- !query 27 output
+-- !query output
 -4567890123456789
 456
 
 
--- !query 28
+-- !query
 SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl ORDER BY 1
--- !query 28 schema
+-- !query schema
 struct<q2:bigint>
--- !query 28 output
+-- !query output
 -4567890123456789
 456
 
 
--- !query 29
+-- !query
 SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl ORDER BY 1
--- !query 29 schema
+-- !query schema
 struct<q2:bigint>
--- !query 29 output
+-- !query output
 -4567890123456789
 456
 4567890123456789
 
 
--- !query 30
+-- !query
 SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY 1
--- !query 30 schema
+-- !query schema
 struct<q1:bigint>
--- !query 30 output
+-- !query output
 
 
 
--- !query 31
+-- !query
 SELECT q1 FROM int8_tbl EXCEPT ALL SELECT q2 FROM int8_tbl ORDER BY 1
--- !query 31 schema
+-- !query schema
 struct<q1:bigint>
--- !query 31 output
+-- !query output
 123
 4567890123456789
 
 
--- !query 32
+-- !query
 SELECT q1 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q2 FROM int8_tbl ORDER BY 1
--- !query 32 schema
+-- !query schema
 struct<q1:bigint>
--- !query 32 output
+-- !query output
 123
 4567890123456789
 4567890123456789
 
 
--- !query 33
+-- !query
 (SELECT 1,2,3 UNION SELECT 4,5,6) INTERSECT SELECT 4,5,6
--- !query 33 schema
+-- !query schema
 struct<1:int,2:int,3:int>
--- !query 33 output
+-- !query output
 4	5	6
 
 
--- !query 34
+-- !query
 (SELECT 1,2,3 UNION SELECT 4,5,6 ORDER BY 1,2) INTERSECT SELECT 4,5,6
--- !query 34 schema
+-- !query schema
 struct<1:int,2:int,3:int>
--- !query 34 output
+-- !query output
 4	5	6
 
 
--- !query 35
+-- !query
 (SELECT 1,2,3 UNION SELECT 4,5,6) EXCEPT SELECT 4,5,6
--- !query 35 schema
+-- !query schema
 struct<1:int,2:int,3:int>
--- !query 35 output
+-- !query output
 1	2	3
 
 
--- !query 36
+-- !query
 (SELECT 1,2,3 UNION SELECT 4,5,6 ORDER BY 1,2) EXCEPT SELECT 4,5,6
--- !query 36 schema
+-- !query schema
 struct<1:int,2:int,3:int>
--- !query 36 output
+-- !query output
 1	2	3
 
 
--- !query 37
+-- !query
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss
--- !query 37 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 37 output
+-- !query output
 5000
 
 
--- !query 38
+-- !query
 select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10
--- !query 38 schema
+-- !query schema
 struct<unique1:int>
--- !query 38 output
+-- !query output
 10
 
 
--- !query 39
+-- !query
 select count(*) from
   ( select unique1 from tenk1 intersect select fivethous from tenk1 ) ss
--- !query 39 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 39 output
+-- !query output
 5000
 
 
--- !query 40
+-- !query
 select unique1 from tenk1 except select unique2 from tenk1 where unique2 != 10
--- !query 40 schema
+-- !query schema
 struct<unique1:int>
--- !query 40 output
+-- !query output
 10
 
 
--- !query 41
+-- !query
 SELECT f1 FROM float8_tbl INTERSECT SELECT f1 FROM int4_tbl ORDER BY 1
--- !query 41 schema
+-- !query schema
 struct<f1:double>
--- !query 41 output
+-- !query output
 0.0
 
 
--- !query 42
+-- !query
 SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl ORDER BY 1
--- !query 42 schema
+-- !query schema
 struct<f1:double>
--- !query 42 output
+-- !query output
 -1.2345678901234E200
 -1004.3
 -34.84
 -1.2345678901234E-200
 
 
--- !query 43
+-- !query
 SELECT q1 FROM int8_tbl INTERSECT SELECT q2 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl  ORDER BY 1
--- !query 43 schema
+-- !query schema
 struct<q1:bigint>
--- !query 43 output
+-- !query output
 -4567890123456789
 123
 123
@@ -455,20 +455,20 @@ struct<q1:bigint>
 4567890123456789
 
 
--- !query 44
+-- !query
 SELECT q1 FROM int8_tbl INTERSECT (((SELECT q2 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) ORDER BY 1
--- !query 44 schema
+-- !query schema
 struct<q1:bigint>
--- !query 44 output
+-- !query output
 123
 4567890123456789
 
 
--- !query 45
+-- !query
 (((SELECT q1 FROM int8_tbl INTERSECT SELECT q2 FROM int8_tbl ORDER BY 1))) UNION ALL SELECT q2 FROM int8_tbl
--- !query 45 schema
+-- !query schema
 struct<q1:bigint>
--- !query 45 output
+-- !query output
 123
 4567890123456789
 456
@@ -478,20 +478,20 @@ struct<q1:bigint>
 -4567890123456789
 
 
--- !query 46
+-- !query
 SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1
--- !query 46 schema
+-- !query schema
 struct<q1:bigint>
--- !query 46 output
+-- !query output
 -4567890123456789
 456
 
 
--- !query 47
+-- !query
 SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1)))
--- !query 47 schema
+-- !query schema
 struct<q1:bigint>
--- !query 47 output
+-- !query output
 123
 123
 4567890123456789
@@ -501,48 +501,48 @@ struct<q1:bigint>
 456
 
 
--- !query 48
+-- !query
 (((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1
--- !query 48 schema
+-- !query schema
 struct<q1:bigint>
--- !query 48 output
+-- !query output
 -4567890123456789
 456
 
 
--- !query 49
+-- !query
 SELECT q1,q2 FROM int8_tbl EXCEPT SELECT q2,q1 FROM int8_tbl
 ORDER BY q2,q1
--- !query 49 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 49 output
+-- !query output
 4567890123456789	-4567890123456789
 123	456
 
 
--- !query 50
+-- !query
 SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1
--- !query 50 schema
+-- !query schema
 struct<>
--- !query 50 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`q2`' given input columns: [int8_tbl.q1]; line 1 pos 64
 
 
--- !query 51
+-- !query
 SELECT q1 FROM int8_tbl EXCEPT (((SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1))) ORDER BY 1
--- !query 51 schema
+-- !query schema
 struct<q1:bigint>
--- !query 51 output
+-- !query output
 123
 4567890123456789
 
 
--- !query 52
+-- !query
 (((((select * from int8_tbl)))))
--- !query 52 schema
+-- !query schema
 struct<q1:bigint,q2:bigint>
--- !query 52 output
+-- !query output
 123	456
 123	4567890123456789
 4567890123456789	-4567890123456789
@@ -550,22 +550,22 @@ struct<q1:bigint,q2:bigint>
 4567890123456789	4567890123456789
 
 
--- !query 53
+-- !query
 select * from range(1,5) union select * from range(1,3)
--- !query 53 schema
+-- !query schema
 struct<id:bigint>
--- !query 53 output
+-- !query output
 1
 2
 3
 4
 
 
--- !query 54
+-- !query
 select * from range(1,6) union all select * from range(1,4)
--- !query 54 schema
+-- !query schema
 struct<id:bigint>
--- !query 54 output
+-- !query output
 1
 1
 2
@@ -576,49 +576,49 @@ struct<id:bigint>
 5
 
 
--- !query 55
+-- !query
 select * from range(1,6) intersect select * from range(1,4)
--- !query 55 schema
+-- !query schema
 struct<id:bigint>
--- !query 55 output
+-- !query output
 1
 2
 3
 
 
--- !query 56
+-- !query
 select * from range(1,6) intersect all select * from range(1,4)
--- !query 56 schema
+-- !query schema
 struct<id:bigint>
--- !query 56 output
+-- !query output
 1
 2
 3
 
 
--- !query 57
+-- !query
 select * from range(1,6) except select * from range(1,4)
--- !query 57 schema
+-- !query schema
 struct<id:bigint>
--- !query 57 output
+-- !query output
 4
 5
 
 
--- !query 58
+-- !query
 select * from range(1,6) except all select * from range(1,4)
--- !query 58 schema
+-- !query schema
 struct<id:bigint>
--- !query 58 output
+-- !query output
 4
 5
 
 
--- !query 59
+-- !query
 select * from range(1,6) union select * from range(1,4)
--- !query 59 schema
+-- !query schema
 struct<id:bigint>
--- !query 59 output
+-- !query output
 1
 2
 3
@@ -626,11 +626,11 @@ struct<id:bigint>
 5
 
 
--- !query 60
+-- !query
 select * from range(1,6) union all select * from range(1,4)
--- !query 60 schema
+-- !query schema
 struct<id:bigint>
--- !query 60 output
+-- !query output
 1
 1
 2
@@ -641,128 +641,128 @@ struct<id:bigint>
 5
 
 
--- !query 61
+-- !query
 select * from range(1,6) intersect select * from range(1,4)
--- !query 61 schema
+-- !query schema
 struct<id:bigint>
--- !query 61 output
+-- !query output
 1
 2
 3
 
 
--- !query 62
+-- !query
 select * from range(1,6) intersect all select * from range(1,4)
--- !query 62 schema
+-- !query schema
 struct<id:bigint>
--- !query 62 output
+-- !query output
 1
 2
 3
 
 
--- !query 63
+-- !query
 select * from range(1,6) except select * from range(1,4)
--- !query 63 schema
+-- !query schema
 struct<id:bigint>
--- !query 63 output
+-- !query output
 4
 5
 
 
--- !query 64
+-- !query
 select * from range(1,6) except all select * from range(1,4)
--- !query 64 schema
+-- !query schema
 struct<id:bigint>
--- !query 64 output
+-- !query output
 4
 5
 
 
--- !query 65
+-- !query
 SELECT cast('3.4' as decimal(38, 18)) UNION SELECT 'foo'
--- !query 65 schema
+-- !query schema
 struct<CAST(3.4 AS DECIMAL(38,18)):string>
--- !query 65 output
+-- !query output
 3.400000000000000000
 foo
 
 
--- !query 66
+-- !query
 SELECT * FROM
   (SELECT 1 AS t, 2 AS x
    UNION
    SELECT 2 AS t, 4 AS x) ss
 WHERE x < 4
 ORDER BY x
--- !query 66 schema
+-- !query schema
 struct<t:int,x:int>
--- !query 66 output
+-- !query output
 1	2
 
 
--- !query 67
+-- !query
 SELECT * FROM
   (SELECT 1 AS t, id as x from range(1,11)
    UNION
    SELECT 2 AS t, 4 AS x) ss
 WHERE x < 4
 ORDER BY x
--- !query 67 schema
+-- !query schema
 struct<t:int,x:bigint>
--- !query 67 output
+-- !query output
 1	1
 1	2
 1	3
 
 
--- !query 68
+-- !query
 SELECT * FROM
   (SELECT 1 AS t, int((random()*3)) AS x
    UNION
    SELECT 2 AS t, 4 AS x) ss
 WHERE x > 3
 ORDER BY x
--- !query 68 schema
+-- !query schema
 struct<t:int,x:int>
--- !query 68 output
+-- !query output
 2	4
 
 
--- !query 69
+-- !query
 select distinct q1 from
   (select distinct * from int8_tbl i81
    union all
    select distinct * from int8_tbl i82) ss
 where q2 = q2
--- !query 69 schema
+-- !query schema
 struct<q1:bigint>
--- !query 69 output
+-- !query output
 123
 4567890123456789
 
 
--- !query 70
+-- !query
 select distinct q1 from
   (select distinct * from int8_tbl i81
    union all
    select distinct * from int8_tbl i82) ss
 where -q1 = q2
--- !query 70 schema
+-- !query schema
 struct<q1:bigint>
--- !query 70 output
+-- !query output
 4567890123456789
 
 
--- !query 71
+-- !query
 select * from
   (select *, 0 as x from int8_tbl a
    union all
    select *, 1 as x from int8_tbl b) ss
 where (x = 0) or (q1 >= q2 and q1 <= q2)
--- !query 71 schema
+-- !query schema
 struct<q1:bigint,q2:bigint,x:int>
--- !query 71 output
+-- !query output
 123	456	0
 123	4567890123456789	0
 4567890123456789	-4567890123456789	0
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part1.sql.out
new file mode 100755
index 0000000000000..2b1de87a6be5e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part1.sql.out
@@ -0,0 +1,725 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 43
+
+
+-- !query
+CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT four, ten, SUM(SUM(four)) OVER (PARTITION BY four), AVG(ten) FROM tenk1
+GROUP BY four, ten ORDER BY four, ten
+-- !query schema
+struct<four:int,ten:int,sum(sum(CAST(four AS BIGINT))) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(ten):double>
+-- !query output
+0	0	0	0.0
+0	2	0	2.0
+0	4	0	4.0
+0	6	0	6.0
+0	8	0	8.0
+1	1	2500	1.0
+1	3	2500	3.0
+1	5	2500	5.0
+1	7	2500	7.0
+1	9	2500	9.0
+2	0	5000	0.0
+2	2	5000	2.0
+2	4	5000	4.0
+2	6	5000	6.0
+2	8	5000	8.0
+3	1	7500	1.0
+3	3	7500	3.0
+3	5	7500	5.0
+3	7	7500	7.0
+3	9	7500	9.0
+
+
+-- !query
+SELECT COUNT(*) OVER () FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
+
+
+-- !query
+SELECT COUNT(*) OVER w FROM tenk1 WHERE unique2 < 10 WINDOW w AS ()
+-- !query schema
+struct<count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+10
+10
+10
+10
+10
+10
+10
+10
+10
+10
+
+
+-- !query
+SELECT four FROM tenk1 WHERE FALSE WINDOW w AS (PARTITION BY ten)
+-- !query schema
+struct<four:int>
+-- !query output
+
+
+
+-- !query
+SELECT sum(four) OVER (PARTITION BY ten ORDER BY unique2) AS sum_1, ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<sum_1:bigint,ten:int,four:int>
+-- !query output
+0	0	0
+0	0	0
+0	4	0
+1	7	1
+1	9	1
+2	0	2
+3	1	3
+3	3	3
+4	1	1
+5	1	1
+
+
+-- !query
+SELECT row_number() OVER (ORDER BY unique2) FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<row_number() OVER (ORDER BY unique2 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
+-- !query output
+1
+10
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query
+SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<rank_1:int,ten:int,four:int>
+-- !query output
+1	0	0
+1	0	0
+1	0	2
+1	1	1
+1	1	1
+1	1	3
+2	3	3
+3	4	0
+3	7	1
+4	9	1
+
+
+-- !query
+SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<DENSE_RANK() OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,ten:int,four:int>
+-- !query output
+1	0	0
+1	0	0
+1	0	2
+1	1	1
+1	1	1
+1	1	3
+2	3	3
+2	4	0
+2	7	1
+3	9	1
+
+
+-- !query
+SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<PERCENT_RANK() OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,ten:int,four:int>
+-- !query output
+0.0	0	0
+0.0	0	0
+0.0	0	2
+0.0	1	1
+0.0	1	1
+0.0	1	3
+0.6666666666666666	7	1
+1.0	3	3
+1.0	4	0
+1.0	9	1
+
+
+-- !query
+SELECT cume_dist() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<cume_dist() OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double,ten:int,four:int>
+-- !query output
+0.5	1	1
+0.5	1	1
+0.5	1	3
+0.6666666666666666	0	0
+0.6666666666666666	0	0
+0.75	7	1
+1.0	0	2
+1.0	3	3
+1.0	4	0
+1.0	9	1
+
+
+-- !query
+SELECT ntile(3) OVER (ORDER BY ten, four), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<ntile(3) OVER (ORDER BY ten ASC NULLS FIRST, four ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,ten:int,four:int>
+-- !query output
+1	0	0
+1	0	0
+1	0	2
+1	1	1
+2	1	1
+2	1	3
+2	3	3
+3	4	0
+3	7	1
+3	9	1
+
+
+-- !query
+SELECT lag(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<lag(ten, 1, NULL) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING):int,ten:int,four:int>
+-- !query output
+0	0	0
+0	4	0
+1	1	1
+1	3	3
+1	7	1
+7	9	1
+NULL	0	0
+NULL	0	2
+NULL	1	1
+NULL	1	3
+
+
+-- !query
+SELECT lead(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<lead(ten, 1, NULL) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int,ten:int,four:int>
+-- !query output
+0	0	0
+1	1	1
+3	1	3
+4	0	0
+7	1	1
+9	7	1
+NULL	0	2
+NULL	3	3
+NULL	4	0
+NULL	9	1
+
+
+-- !query
+SELECT lead(ten * 2, 1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<lead((ten * 2), 1, NULL) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int,ten:int,four:int>
+-- !query output
+0	0	0
+14	1	1
+18	7	1
+2	1	1
+6	1	3
+8	0	0
+NULL	0	2
+NULL	3	3
+NULL	4	0
+NULL	9	1
+
+
+-- !query
+SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<lead((ten * 2), 1, -1) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int,ten:int,four:int>
+-- !query output
+-1	0	2
+-1	3	3
+-1	4	0
+-1	9	1
+0	0	0
+14	1	1
+18	7	1
+2	1	1
+6	1	3
+8	0	0
+
+
+-- !query
+SELECT first(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<first(ten, false) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,ten:int,four:int>
+-- !query output
+0	0	0
+0	0	0
+0	0	2
+0	4	0
+1	1	1
+1	1	1
+1	1	3
+1	3	3
+1	7	1
+1	9	1
+
+
+-- !query
+SELECT last(four) OVER (ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<last(four, false) OVER (ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int,ten:int,four:int>
+-- !query output
+0	4	0
+1	1	1
+1	1	1
+1	1	3
+1	7	1
+1	9	1
+2	0	0
+2	0	0
+2	0	2
+3	3	3
+
+
+-- !query
+SELECT last(ten) OVER (PARTITION BY four), ten, four FROM
+(SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s
+ORDER BY four, ten
+-- !query schema
+struct<last(ten, false) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):int,ten:int,four:int>
+-- !query output
+4	0	0
+4	0	0
+4	4	0
+9	1	1
+9	1	1
+9	7	1
+9	9	1
+0	0	2
+3	1	3
+3	3	3
+
+
+-- !query
+SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER (PARTITION BY two ORDER BY ten) AS wsum
+FROM tenk1 GROUP BY ten, two
+-- !query schema
+struct<ten:int,two:int,gsum:bigint,wsum:bigint>
+-- !query output
+0	0	45000	45000
+1	1	46000	46000
+2	0	47000	92000
+3	1	48000	94000
+4	0	49000	141000
+5	1	50000	144000
+6	0	51000	192000
+7	1	52000	196000
+8	0	53000	245000
+9	1	54000	250000
+
+
+-- !query
+SELECT count(*) OVER (PARTITION BY four), four FROM (SELECT * FROM tenk1 WHERE two = 1)s WHERE unique2 < 10
+-- !query schema
+struct<count(1) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,four:int>
+-- !query output
+2	3
+2	3
+4	1
+4	1
+4	1
+4	1
+
+
+-- !query
+SELECT (count(*) OVER (PARTITION BY four ORDER BY ten) +
+  sum(hundred) OVER (PARTITION BY four ORDER BY ten)) AS cntsum
+  FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<cntsum:bigint>
+-- !query output
+136
+22
+22
+24
+24
+51
+82
+87
+92
+92
+
+
+-- !query
+SELECT * FROM(
+  SELECT count(*) OVER (PARTITION BY four ORDER BY ten) +
+    sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS total,
+    count(*) OVER (PARTITION BY four ORDER BY ten) AS fourcount,
+    sum(hundred) OVER (PARTITION BY two ORDER BY ten) AS twosum
+    FROM tenk1
+)sub WHERE total <> fourcount + twosum
+-- !query schema
+struct<total:bigint,fourcount:bigint,twosum:bigint>
+-- !query output
+
+
+
+-- !query
+SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM tenk1 WHERE unique2 < 10
+-- !query schema
+struct<avg(CAST(four AS BIGINT)) OVER (PARTITION BY four ORDER BY (CAST(thousand AS DOUBLE) / CAST(100 AS DOUBLE)) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
+-- !query output
+0.0
+0.0
+0.0
+1.0
+1.0
+1.0
+1.0
+2.0
+3.0
+3.0
+
+
+-- !query
+SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER win AS wsum
+FROM tenk1 GROUP BY ten, two WINDOW win AS (PARTITION BY two ORDER BY ten)
+-- !query schema
+struct<ten:int,two:int,gsum:bigint,wsum:bigint>
+-- !query output
+0	0	45000	45000
+1	1	46000	46000
+2	0	47000	92000
+3	1	48000	94000
+4	0	49000	141000
+5	1	50000	144000
+6	0	51000	192000
+7	1	52000	196000
+8	0	53000	245000
+9	1	54000	250000
+
+
+-- !query
+SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 WHERE FALSE)s
+-- !query schema
+struct<count(1) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+
+
+
+-- !query
+create temporary view int4_tbl as select * from values
+  (0),
+  (123456),
+  (-123456),
+  (2147483647),
+  (-2147483647)
+  as int4_tbl(f1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT SUM(COUNT(f1)) OVER () FROM int4_tbl WHERE f1=42
+-- !query schema
+struct<sum(count(f1)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+0
+
+
+-- !query
+select ten,
+  sum(unique1) + sum(unique2) as res,
+  rank() over (order by sum(unique1) + sum(unique2)) as rank
+from tenk1
+group by ten order by ten
+-- !query schema
+struct<ten:int,res:bigint,rank:int>
+-- !query output
+0	9976146	4
+1	10114187	9
+2	10059554	8
+3	9878541	1
+4	9881005	2
+5	9981670	5
+6	9947099	3
+7	10120309	10
+8	9991305	6
+9	10040184	7
+
+
+-- !query
+SELECT four, ten,
+sum(ten) over (partition by four order by ten),
+last(ten) over (partition by four order by ten)
+FROM (select distinct ten, four from tenk1) ss
+-- !query schema
+struct<four:int,ten:int,sum(CAST(ten AS BIGINT)) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,last(ten, false) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
+-- !query output
+0	0	0	0
+0	2	2	2
+0	4	6	4
+0	6	12	6
+0	8	20	8
+1	1	1	1
+1	3	4	3
+1	5	9	5
+1	7	16	7
+1	9	25	9
+2	0	0	0
+2	2	2	2
+2	4	6	4
+2	6	12	6
+2	8	20	8
+3	1	1	1
+3	3	4	3
+3	5	9	5
+3	7	16	7
+3	9	25	9
+
+
+-- !query
+SELECT four, ten,
+sum(ten) over (partition by four order by ten range between unbounded preceding and current row),
+last(ten) over (partition by four order by ten range between unbounded preceding and current row)
+FROM (select distinct ten, four from tenk1) ss
+-- !query schema
+struct<four:int,ten:int,sum(ten) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint,last(ten, false) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
+-- !query output
+0	0	0	0
+0	2	2	2
+0	4	6	4
+0	6	12	6
+0	8	20	8
+1	1	1	1
+1	3	4	3
+1	5	9	5
+1	7	16	7
+1	9	25	9
+2	0	0	0
+2	2	2	2
+2	4	6	4
+2	6	12	6
+2	8	20	8
+3	1	1	1
+3	3	4	3
+3	5	9	5
+3	7	16	7
+3	9	25	9
+
+
+-- !query
+SELECT four, ten,
+sum(ten) over (partition by four order by ten range between unbounded preceding and unbounded following),
+last(ten) over (partition by four order by ten range between unbounded preceding and unbounded following)
+FROM (select distinct ten, four from tenk1) ss
+-- !query schema
+struct<four:int,ten:int,sum(ten) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,last(ten, false) OVER (PARTITION BY four ORDER BY ten ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):int>
+-- !query output
+0	0	20	8
+0	2	20	8
+0	4	20	8
+0	6	20	8
+0	8	20	8
+1	1	25	9
+1	3	25	9
+1	5	25	9
+1	7	25	9
+1	9	25	9
+2	0	20	8
+2	2	20	8
+2	4	20	8
+2	6	20	8
+2	8	20	8
+3	1	25	9
+3	3	25	9
+3	5	25	9
+3	7	25	9
+3	9	25	9
+
+
+-- !query
+SELECT sum(unique1) over (order by four range between current row and unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10
+-- !query schema
+struct<sum(unique1) OVER (ORDER BY four ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):bigint,unique1:int,four:int>
+-- !query output
+10	3	3
+10	7	3
+18	2	2
+18	6	2
+33	1	1
+33	5	1
+33	9	1
+45	0	0
+45	4	0
+45	8	0
+
+
+-- !query
+SELECT sum(unique1) over (rows between current row and unbounded following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10
+-- !query schema
+struct<sum(unique1) OVER (ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):bigint,unique1:int,four:int>
+-- !query output
+0	0	0
+10	3	3
+15	5	1
+23	8	0
+32	9	1
+38	6	2
+39	1	1
+41	2	2
+45	4	0
+7	7	3
+
+
+-- !query
+SELECT sum(unique1) over (rows between 2 preceding and 2 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10
+-- !query schema
+struct<sum(unique1) OVER (ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING):bigint,unique1:int,four:int>
+-- !query output
+10	0	0
+13	2	2
+15	7	3
+22	1	1
+23	3	3
+26	6	2
+29	9	1
+31	8	0
+32	5	1
+7	4	0
+
+
+-- !query
+SELECT sum(unique1) over (rows between 2 preceding and 1 preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10
+-- !query schema
+struct<sum(unique1) OVER (ROWS BETWEEN 2 PRECEDING AND 1 PRECEDING):bigint,unique1:int,four:int>
+-- !query output
+10	0	0
+13	3	3
+15	8	0
+17	5	1
+3	6	2
+4	2	2
+6	1	1
+7	9	1
+8	7	3
+NULL	4	0
+
+
+-- !query
+SELECT sum(unique1) over (rows between 1 following and 3 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10
+-- !query schema
+struct<sum(unique1) OVER (ROWS BETWEEN 1 FOLLOWING AND 3 FOLLOWING):bigint,unique1:int,four:int>
+-- !query output
+0	7	3
+10	5	1
+15	8	0
+16	2	2
+16	9	1
+22	6	2
+23	1	1
+7	3	3
+9	4	0
+NULL	0	0
+
+
+-- !query
+SELECT sum(unique1) over (rows between unbounded preceding and 1 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10
+-- !query schema
+struct<sum(unique1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING):bigint,unique1:int,four:int>
+-- !query output
+13	1	1
+22	6	2
+30	9	1
+35	8	0
+38	5	1
+45	0	0
+45	3	3
+45	7	3
+6	4	0
+7	2	2
+
+
+-- !query
+CREATE TEMP VIEW v_window AS
+SELECT i.id, sum(i.id) over (order by i.id rows between 1 preceding and 1 following) as sum_rows
+FROM range(1, 11) i
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v_window
+-- !query schema
+struct<id:bigint,sum_rows:bigint>
+-- !query output
+1	3
+10	19
+2	6
+3	9
+4	12
+5	15
+6	18
+7	21
+8	24
+9	27
+
+
+-- !query
+DROP VIEW v_window
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW tenk2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW int4_tbl
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out
new file mode 100644
index 0000000000000..f41659a196ae1
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out
@@ -0,0 +1,481 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 31
+
+
+-- !query
+CREATE TABLE empsalary (
+    depname string,
+    empno integer,
+    salary int,
+    enroll_date date
+) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO empsalary VALUES
+  ('develop', 10, 5200, date '2007-08-01'),
+  ('sales', 1, 5000, date '2006-10-01'),
+  ('personnel', 5, 3500, date '2007-12-10'),
+  ('sales', 4, 4800, date '2007-08-08'),
+  ('personnel', 2, 3900, date '2006-12-23'),
+  ('develop', 7, 4200, date '2008-01-01'),
+  ('develop', 9, 4500, date '2008-01-01'),
+  ('sales', 3, 4800, date '2007-08-01'),
+  ('develop', 8, 6000, date '2006-10-01'),
+  ('develop', 11, 5200, date '2007-08-15')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT sum(unique1) over (order by four range between 2 preceding and 1 preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10
+-- !query schema
+struct<sum(unique1) OVER (ORDER BY four ASC NULLS FIRST RANGE BETWEEN 2 PRECEDING AND 1 PRECEDING):bigint,unique1:int,four:int>
+-- !query output
+12	1	1
+12	5	1
+12	9	1
+23	3	3
+23	7	3
+27	2	2
+27	6	2
+NULL	0	0
+NULL	4	0
+NULL	8	0
+
+
+-- !query
+SELECT sum(unique1) over (order by four desc range between 2 preceding and 1 preceding),
+unique1, four
+FROM tenk1 WHERE unique1 < 10
+-- !query schema
+struct<sum(unique1) OVER (ORDER BY four DESC NULLS LAST RANGE BETWEEN 2 PRECEDING AND 1 PRECEDING):bigint,unique1:int,four:int>
+-- !query output
+10	2	2
+10	6	2
+18	1	1
+18	5	1
+18	9	1
+23	0	0
+23	4	0
+23	8	0
+NULL	3	3
+NULL	7	3
+
+
+-- !query
+SELECT sum(unique1) over (partition by four order by unique1 range between 5 preceding and 6 following),
+unique1, four
+FROM tenk1 WHERE unique1 < 10
+-- !query schema
+struct<sum(unique1) OVER (PARTITION BY four ORDER BY unique1 ASC NULLS FIRST RANGE BETWEEN 5 PRECEDING AND 6 FOLLOWING):bigint,unique1:int,four:int>
+-- !query output
+10	3	3
+10	7	3
+12	4	0
+12	8	0
+14	9	1
+15	5	1
+4	0	0
+6	1	1
+8	2	2
+8	6	2
+
+
+-- !query
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,6) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id asc nulls first range between 2 preceding and 2 following)
+-- !query schema
+struct<id:bigint,y:bigint,first(y, false) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CAST((- 2) AS BIGINT) FOLLOWING AND CAST(2 AS BIGINT) FOLLOWING):bigint,last(y, false) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CAST((- 2) AS BIGINT) FOLLOWING AND CAST(2 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+1	1	1	3
+2	2	1	4
+3	3	1	5
+4	4	2	5
+5	5	3	5
+NULL	42	42	43
+NULL	43	42	43
+
+
+-- !query
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,6) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id asc nulls last range between 2 preceding and 2 following)
+-- !query schema
+struct<id:bigint,y:bigint,first(y, false) OVER (ORDER BY id ASC NULLS LAST RANGE BETWEEN CAST((- 2) AS BIGINT) FOLLOWING AND CAST(2 AS BIGINT) FOLLOWING):bigint,last(y, false) OVER (ORDER BY id ASC NULLS LAST RANGE BETWEEN CAST((- 2) AS BIGINT) FOLLOWING AND CAST(2 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+1	1	1	3
+2	2	1	4
+3	3	1	5
+4	4	2	5
+5	5	3	5
+NULL	42	42	43
+NULL	43	42	43
+
+
+-- !query
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,6) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id desc nulls first range between 2 preceding and 2 following)
+-- !query schema
+struct<id:bigint,y:bigint,first(y, false) OVER (ORDER BY id DESC NULLS FIRST RANGE BETWEEN CAST((- 2) AS BIGINT) FOLLOWING AND CAST(2 AS BIGINT) FOLLOWING):bigint,last(y, false) OVER (ORDER BY id DESC NULLS FIRST RANGE BETWEEN CAST((- 2) AS BIGINT) FOLLOWING AND CAST(2 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+1	1	3	1
+2	2	4	1
+3	3	5	1
+4	4	5	2
+5	5	5	3
+NULL	42	42	43
+NULL	43	42	43
+
+
+-- !query
+select ss.id, ss.y,
+       first(ss.y) over w,
+       last(ss.y) over w
+from
+  (select x.id, x.id as y from range(1,6) as x
+   union all select null, 42
+   union all select null, 43) ss
+window w as
+  (order by ss.id desc nulls last range between 2 preceding and 2 following)
+-- !query schema
+struct<id:bigint,y:bigint,first(y, false) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CAST((- 2) AS BIGINT) FOLLOWING AND CAST(2 AS BIGINT) FOLLOWING):bigint,last(y, false) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CAST((- 2) AS BIGINT) FOLLOWING AND CAST(2 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+1	1	3	1
+2	2	4	1
+3	3	5	1
+4	4	5	2
+5	5	5	3
+NULL	42	42	43
+NULL	43	42	43
+
+
+-- !query
+select x.id, last(x.id) over (order by x.id range between current row and 2147450884 following)
+from range(32764, 32767) x
+-- !query schema
+struct<id:bigint,last(id, false) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND CAST(2147450884 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+32764	32766
+32765	32766
+32766	32766
+
+
+-- !query
+select x.id, last(x.id) over (order by x.id desc range between current row and 2147450885 following)
+from range(-32766, -32765) x
+-- !query schema
+struct<id:bigint,last(id, false) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(2147450885 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+-32766	-32766
+
+
+-- !query
+select x.id, last(x.id) over (order by x.id range between current row and 4 following)
+from range(2147483644, 2147483647) x
+-- !query schema
+struct<id:bigint,last(id, false) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND CAST(4 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+2147483644	2147483646
+2147483645	2147483646
+2147483646	2147483646
+
+
+-- !query
+select x.id, last(x.id) over (order by x.id desc range between current row and 5 following)
+from range(-2147483646, -2147483645) x
+-- !query schema
+struct<id:bigint,last(id, false) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(5 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+-2147483646	-2147483646
+
+
+-- !query
+select x.id, last(x.id) over (order by x.id range between current row and 4 following)
+from range(9223372036854775804, 9223372036854775807) x
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+long overflow
+
+
+-- !query
+select x.id, last(x.id) over (order by x.id desc range between current row and 5 following)
+from range(-9223372036854775806, -9223372036854775805) x
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+long overflow
+
+
+-- !query
+create table numerics (
+    id int,
+    f_float4 float,
+    f_float8 float,
+    f_numeric int
+) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into numerics values
+(1, -3, -3, -3),
+(2, -1, -1, -1),
+(3, 0, 0, 0),
+(4, 1.1, 1.1, 1.1),
+(5, 1.12, 1.12, 1.12),
+(6, 2, 2, 2),
+(7, 100, 100, 100)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             1 preceding and 1 following)
+-- !query schema
+struct<id:int,f_float4:float,first(id, false) OVER (ORDER BY f_float4 ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS FLOAT) FOLLOWING AND CAST(1 AS FLOAT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_float4 ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS FLOAT) FOLLOWING AND CAST(1 AS FLOAT) FOLLOWING):int>
+-- !query output
+1	-3.0	1	1
+2	-1.0	2	3
+3	0.0	2	3
+4	1.1	4	6
+5	1.12	4	6
+6	2.0	4	6
+7	100.0	7	7
+
+
+-- !query
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             1 preceding and 1.1 following)
+-- !query schema
+struct<id:int,f_float4:float,first(id, false) OVER (ORDER BY f_float4 ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS FLOAT) FOLLOWING AND CAST(1.1 AS FLOAT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_float4 ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS FLOAT) FOLLOWING AND CAST(1.1 AS FLOAT) FOLLOWING):int>
+-- !query output
+1	-3.0	1	1
+2	-1.0	2	3
+3	0.0	2	4
+4	1.1	4	6
+5	1.12	4	6
+6	2.0	4	6
+7	100.0	7	7
+
+
+-- !query
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             'inf' preceding and 'inf' following)
+-- !query schema
+struct<id:int,f_float4:float,first(id, false) OVER (ORDER BY f_float4 ASC NULLS FIRST RANGE BETWEEN CAST((- CAST(inf AS DOUBLE)) AS FLOAT) FOLLOWING AND CAST(inf AS FLOAT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_float4 ASC NULLS FIRST RANGE BETWEEN CAST((- CAST(inf AS DOUBLE)) AS FLOAT) FOLLOWING AND CAST(inf AS FLOAT) FOLLOWING):int>
+-- !query output
+1	-3.0	1	7
+2	-1.0	1	7
+3	0.0	1	7
+4	1.1	1	7
+5	1.12	1	7
+6	2.0	1	7
+7	100.0	1	7
+
+
+-- !query
+select id, f_float4, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float4 range between
+             1.1 preceding and 'NaN' following)
+-- !query schema
+struct<id:int,f_float4:float,first(id, false) OVER (ORDER BY f_float4 ASC NULLS FIRST RANGE BETWEEN CAST((- 1.1) AS FLOAT) FOLLOWING AND CAST(NaN AS FLOAT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_float4 ASC NULLS FIRST RANGE BETWEEN CAST((- 1.1) AS FLOAT) FOLLOWING AND CAST(NaN AS FLOAT) FOLLOWING):int>
+-- !query output
+1	-3.0	1	7
+2	-1.0	2	7
+3	0.0	2	7
+4	1.1	3	7
+5	1.12	4	7
+6	2.0	4	7
+7	100.0	7	7
+
+
+-- !query
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             1 preceding and 1 following)
+-- !query schema
+struct<id:int,f_float8:float,first(id, false) OVER (ORDER BY f_float8 ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS FLOAT) FOLLOWING AND CAST(1 AS FLOAT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_float8 ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS FLOAT) FOLLOWING AND CAST(1 AS FLOAT) FOLLOWING):int>
+-- !query output
+1	-3.0	1	1
+2	-1.0	2	3
+3	0.0	2	3
+4	1.1	4	6
+5	1.12	4	6
+6	2.0	4	6
+7	100.0	7	7
+
+
+-- !query
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             1 preceding and 1.1 following)
+-- !query schema
+struct<id:int,f_float8:float,first(id, false) OVER (ORDER BY f_float8 ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS FLOAT) FOLLOWING AND CAST(1.1 AS FLOAT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_float8 ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS FLOAT) FOLLOWING AND CAST(1.1 AS FLOAT) FOLLOWING):int>
+-- !query output
+1	-3.0	1	1
+2	-1.0	2	3
+3	0.0	2	4
+4	1.1	4	6
+5	1.12	4	6
+6	2.0	4	6
+7	100.0	7	7
+
+
+-- !query
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             'inf' preceding and 'inf' following)
+-- !query schema
+struct<id:int,f_float8:float,first(id, false) OVER (ORDER BY f_float8 ASC NULLS FIRST RANGE BETWEEN CAST((- CAST(inf AS DOUBLE)) AS FLOAT) FOLLOWING AND CAST(inf AS FLOAT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_float8 ASC NULLS FIRST RANGE BETWEEN CAST((- CAST(inf AS DOUBLE)) AS FLOAT) FOLLOWING AND CAST(inf AS FLOAT) FOLLOWING):int>
+-- !query output
+1	-3.0	1	7
+2	-1.0	1	7
+3	0.0	1	7
+4	1.1	1	7
+5	1.12	1	7
+6	2.0	1	7
+7	100.0	1	7
+
+
+-- !query
+select id, f_float8, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_float8 range between
+             1.1 preceding and 'NaN' following)
+-- !query schema
+struct<id:int,f_float8:float,first(id, false) OVER (ORDER BY f_float8 ASC NULLS FIRST RANGE BETWEEN CAST((- 1.1) AS FLOAT) FOLLOWING AND CAST(NaN AS FLOAT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_float8 ASC NULLS FIRST RANGE BETWEEN CAST((- 1.1) AS FLOAT) FOLLOWING AND CAST(NaN AS FLOAT) FOLLOWING):int>
+-- !query output
+1	-3.0	1	7
+2	-1.0	2	7
+3	0.0	2	7
+4	1.1	3	7
+5	1.12	4	7
+6	2.0	4	7
+7	100.0	7	7
+
+
+-- !query
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1 preceding and 1 following)
+-- !query schema
+struct<id:int,f_numeric:int,first(id, false) OVER (ORDER BY f_numeric ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING):int,last(id, false) OVER (ORDER BY f_numeric ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING):int>
+-- !query output
+1	-3	1	1
+2	-1	2	3
+3	0	2	5
+4	1	3	6
+5	1	3	6
+6	2	4	6
+7	100	7	7
+
+
+-- !query
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1 preceding and 1.1 following)
+-- !query schema
+struct<id:int,f_numeric:int,first(id, false) OVER (ORDER BY f_numeric ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CAST(1.1 AS INT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_numeric ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CAST(1.1 AS INT) FOLLOWING):int>
+-- !query output
+1	-3	1	1
+2	-1	2	3
+3	0	2	5
+4	1	3	6
+5	1	3	6
+6	2	4	6
+7	100	7	7
+
+
+-- !query
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1 preceding and 1.1 following)
+-- !query schema
+struct<id:int,f_numeric:int,first(id, false) OVER (ORDER BY f_numeric ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CAST(1.1 AS INT) FOLLOWING):int,last(id, false) OVER (ORDER BY f_numeric ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CAST(1.1 AS INT) FOLLOWING):int>
+-- !query output
+1	-3	1	1
+2	-1	2	3
+3	0	2	5
+4	1	3	6
+5	1	3	6
+6	2	4	6
+7	100	7	7
+
+
+-- !query
+select id, f_numeric, first(id) over w, last(id) over w
+from numerics
+window w as (order by f_numeric range between
+             1.1 preceding and 'NaN' following)
+-- !query schema
+struct<>
+-- !query output
+java.lang.NumberFormatException
+invalid input syntax for type numeric: NaN
+
+
+-- !query
+drop table empsalary
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table numerics
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
new file mode 100644
index 0000000000000..5a52358fe1c53
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -0,0 +1,409 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 29
+
+
+-- !query
+CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE empsalary (
+    depname string,
+    empno integer,
+    salary int,
+    enroll_date date
+) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO empsalary VALUES
+  ('develop', 10, 5200, date '2007-08-01'),
+  ('sales', 1, 5000, date '2006-10-01'),
+  ('personnel', 5, 3500, date '2007-12-10'),
+  ('sales', 4, 4800, date '2007-08-08'),
+  ('personnel', 2, 3900, date '2006-12-23'),
+  ('develop', 7, 4200, date '2008-01-01'),
+  ('develop', 9, 4500, date '2008-01-01'),
+  ('sales', 3, 4800, date '2007-08-01'),
+  ('develop', 8, 6000, date '2006-10-01'),
+  ('develop', 11, 5200, date '2007-08-15')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table datetimes (
+    id int,
+    f_time timestamp,
+    f_timetz timestamp,
+    f_interval timestamp,
+    f_timestamptz timestamp,
+    f_timestamp timestamp
+) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into datetimes values
+(1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), cast ('2000-10-19 10:23:54+01' as timestamp), timestamp '2000-10-19 10:23:54'),
+(2, timestamp '12:00', cast ('12:00 BST' as timestamp), cast ('2 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'),
+(3, timestamp '13:00', cast ('13:00 BST' as timestamp), cast ('3 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'),
+(4, timestamp '14:00', cast ('14:00 BST' as timestamp), cast ('4 years' as timestamp), cast ('2002-10-19 10:23:54+01' as timestamp), timestamp '2002-10-19 10:23:54'),
+(5, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2003-10-19 10:23:54+01' as timestamp), timestamp '2003-10-19 10:23:54'),
+(6, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2004-10-19 10:23:54+01' as timestamp), timestamp '2004-10-19 10:23:54'),
+(7, timestamp '17:00', cast ('17:00 BST' as timestamp), cast ('7 years' as timestamp), cast ('2005-10-19 10:23:54+01' as timestamp), timestamp '2005-10-19 10:23:54'),
+(8, timestamp '18:00', cast ('18:00 BST' as timestamp), cast ('8 years' as timestamp), cast ('2006-10-19 10:23:54+01' as timestamp), timestamp '2006-10-19 10:23:54'),
+(9, timestamp '19:00', cast ('19:00 BST' as timestamp), cast ('9 years' as timestamp), cast ('2007-10-19 10:23:54+01' as timestamp), timestamp '2007-10-19 10:23:54'),
+(10, timestamp '20:00', cast ('20:00 BST' as timestamp), cast ('10 years' as timestamp), cast ('2008-10-19 10:23:54+01' as timestamp), timestamp '2008-10-19 10:23:54')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+WITH cte (x) AS (
+        SELECT * FROM range(1, 36, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following)
+-- !query schema
+struct<x:bigint,sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING):bigint>
+-- !query output
+1	4
+11	33
+13	39
+15	45
+17	51
+19	57
+21	63
+23	69
+25	75
+27	81
+29	87
+3	9
+31	93
+33	99
+35	68
+5	15
+7	21
+9	27
+
+
+-- !query
+WITH cte (x) AS (
+        SELECT * FROM range(1, 36, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x range between 1 preceding and 1 following)
+-- !query schema
+struct<x:bigint,sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS BIGINT) FOLLOWING AND CAST(1 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+1	1
+11	11
+13	13
+15	15
+17	17
+19	19
+21	21
+23	23
+25	25
+27	27
+29	29
+3	3
+31	31
+33	33
+35	35
+5	5
+7	7
+9	9
+
+
+-- !query
+WITH cte (x) AS (
+        select 1 union all select 1 union all select 1 union all
+        SELECT * FROM range(5, 50, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x rows between 1 preceding and 1 following)
+-- !query schema
+struct<x:bigint,sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING):bigint>
+-- !query output
+1	2
+1	3
+1	7
+11	33
+13	39
+15	45
+17	51
+19	57
+21	63
+23	69
+25	75
+27	81
+29	87
+31	93
+33	99
+35	105
+37	111
+39	117
+41	123
+43	129
+45	135
+47	141
+49	96
+5	13
+7	21
+9	27
+
+
+-- !query
+WITH cte (x) AS (
+        select 1 union all select 1 union all select 1 union all
+        SELECT * FROM range(5, 50, 2)
+)
+SELECT x, (sum(x) over w)
+FROM cte
+WINDOW w AS (ORDER BY x range between 1 preceding and 1 following)
+-- !query schema
+struct<x:bigint,sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN CAST((- 1) AS BIGINT) FOLLOWING AND CAST(1 AS BIGINT) FOLLOWING):bigint>
+-- !query output
+1	3
+1	3
+1	3
+11	11
+13	13
+15	15
+17	17
+19	19
+21	21
+23	23
+25	25
+27	27
+29	29
+31	31
+33	33
+35	35
+37	37
+39	39
+41	41
+43	43
+45	45
+47	47
+49	49
+5	5
+7	7
+9	9
+
+
+-- !query
+SELECT count(*) OVER (PARTITION BY four) FROM (SELECT * FROM tenk1 UNION ALL SELECT * FROM tenk2)s LIMIT 0
+-- !query schema
+struct<count(1) OVER (PARTITION BY four ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+
+
+
+-- !query
+create table t1 (f1 int, f2 int) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t1 values (1,1),(1,2),(2,2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select f1, sum(f1) over (partition by f1
+                         range between 1 preceding and 1 following)
+from t1 where f1 = f2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(PARTITION BY default.t1.`f1` RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING)' due to data type mismatch: A range window frame cannot be used in an unordered window specification.; line 1 pos 24
+
+
+-- !query
+select f1, sum(f1) over (partition by f1 order by f2
+range between 1 preceding and 1 following)
+from t1 where f1 = f2
+-- !query schema
+struct<f1:int,sum(f1) OVER (PARTITION BY f1 ORDER BY f2 ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING):bigint>
+-- !query output
+1	1
+2	2
+
+
+-- !query
+select f1, sum(f1) over (partition by f1, f1 order by f2
+range between 2 preceding and 1 preceding)
+from t1 where f1 = f2
+-- !query schema
+struct<f1:int,sum(f1) OVER (PARTITION BY f1, f1 ORDER BY f2 ASC NULLS FIRST RANGE BETWEEN 2 PRECEDING AND 1 PRECEDING):bigint>
+-- !query output
+1	NULL
+2	NULL
+
+
+-- !query
+select f1, sum(f1) over (partition by f1, f2 order by f2
+range between 1 following and 2 following)
+from t1 where f1 = f2
+-- !query schema
+struct<f1:int,sum(f1) OVER (PARTITION BY f1, f2 ORDER BY f2 ASC NULLS FIRST RANGE BETWEEN 1 FOLLOWING AND 2 FOLLOWING):bigint>
+-- !query output
+1	NULL
+2	NULL
+
+
+-- !query
+SELECT rank() OVER (ORDER BY length('abc'))
+-- !query schema
+struct<RANK() OVER (ORDER BY length(abc) ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int>
+-- !query output
+1
+
+
+-- !query
+SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+It is not allowed to use window functions inside WHERE and HAVING clauses;
+
+
+-- !query
+SELECT * FROM empsalary INNER JOIN tenk1 ON row_number() OVER (ORDER BY salary) < 10
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+
+The query operator `Join` contains one or more unsupported
+expression types Aggregate, Window or Generate.
+Invalid expressions: [row_number() OVER (ORDER BY default.empsalary.`salary` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)];
+
+
+-- !query
+SELECT rank() OVER (ORDER BY 1), count(*) FROM empsalary GROUP BY 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+
+The query operator `Aggregate` contains one or more unsupported
+expression types Aggregate, Window or Generate.
+Invalid expressions: [RANK() OVER (ORDER BY 1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)];
+
+
+-- !query
+SELECT * FROM rank() OVER (ORDER BY random())
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'ORDER'(line 1, pos 27)
+
+== SQL ==
+SELECT * FROM rank() OVER (ORDER BY random())
+---------------------------^^^
+
+
+-- !query
+SELECT * FROM empsalary WHERE (rank() OVER (ORDER BY random())) > 10
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+It is not allowed to use window functions inside WHERE and HAVING clauses;
+
+
+-- !query
+SELECT * FROM empsalary WHERE rank() OVER (ORDER BY random())
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+It is not allowed to use window functions inside WHERE and HAVING clauses;
+
+
+-- !query
+select rank() OVER (PARTITION BY four, ORDER BY ten) FROM tenk1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+no viable alternative at input 'ORDER'(line 1, pos 39)
+
+== SQL ==
+select rank() OVER (PARTITION BY four, ORDER BY ten) FROM tenk1
+---------------------------------------^^^
+
+
+-- !query
+SELECT range(1, 100) OVER () FROM empsalary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Undefined function: 'range'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 7
+
+
+-- !query
+SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'ntile(0)' due to data type mismatch: Buckets expression must be positive, but got: 0; line 1 pos 7
+
+
+-- !query
+DROP TABLE empsalary
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE datetimes
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE t1
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
new file mode 100644
index 0000000000000..4dd4712345a89
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
@@ -0,0 +1,504 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 39
+
+
+-- !query
+SELECT i,AVG(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,avg(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+1	1.5
+2	2.0
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT i,AVG(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,avg(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+1	1.5
+2	2.0
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT i,AVG(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,avg(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+1	1.5
+2	2.0
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT i,AVG(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1.5),(2,2.5),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,avg(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):decimal(6,5)>
+-- !query output
+1	2.00000
+2	2.50000
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,sum(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+1	3
+2	2
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,sum(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+1	3
+2	2
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,sum(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+1	3
+2	2
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1.1),(2,2.2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,sum(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):decimal(12,1)>
+-- !query output
+1	3.3
+2	2.2
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT SUM(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1.01),(2,2),(3,3)) v(i,n)
+-- !query schema
+struct<sum(n) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):decimal(22,2)>
+-- !query output
+3.00
+5.00
+6.01
+
+
+-- !query
+SELECT i,COUNT(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,count(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+1	2
+2	1
+3	0
+4	0
+
+
+-- !query
+SELECT i,COUNT(*) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,count(1) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):bigint>
+-- !query output
+1	4
+2	3
+3	2
+4	1
+
+
+-- !query
+SELECT VAR_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<var_pop(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+0.0
+11266.666666666666
+13868.750000000002
+21703.999999999996
+4225.0
+
+
+-- !query
+SELECT VAR_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<var_pop(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+0.0
+11266.666666666666
+13868.750000000002
+21703.999999999996
+4225.0
+
+
+-- !query
+SELECT VAR_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<var_pop(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+0.0
+11266.666666666666
+13868.750000000002
+21703.999999999996
+4225.0
+
+
+-- !query
+SELECT VAR_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<var_pop(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+0.0
+11266.666666666666
+13868.750000000002
+21703.999999999996
+4225.0
+
+
+-- !query
+SELECT VAR_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<var_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+16900.0
+18491.666666666668
+27129.999999999996
+8450.0
+NaN
+
+
+-- !query
+SELECT VAR_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<var_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+16900.0
+18491.666666666668
+27129.999999999996
+8450.0
+NaN
+
+
+-- !query
+SELECT VAR_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<var_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+16900.0
+18491.666666666668
+27129.999999999996
+8450.0
+NaN
+
+
+-- !query
+SELECT VAR_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<var_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+16900.0
+18491.666666666668
+27129.999999999996
+8450.0
+NaN
+
+
+-- !query
+SELECT VARIANCE(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<variance(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+16900.0
+18491.666666666668
+27129.999999999996
+8450.0
+NaN
+
+
+-- !query
+SELECT VARIANCE(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<variance(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+16900.0
+18491.666666666668
+27129.999999999996
+8450.0
+NaN
+
+
+-- !query
+SELECT VARIANCE(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<variance(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+16900.0
+18491.666666666668
+27129.999999999996
+8450.0
+NaN
+
+
+-- !query
+SELECT VARIANCE(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<variance(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+16900.0
+18491.666666666668
+27129.999999999996
+8450.0
+NaN
+
+
+-- !query
+SELECT STDDEV_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n)
+-- !query schema
+struct<stddev_pop(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+0.0
+106.14455552060438
+117.76565713313879
+147.32277488562315
+147.32277488562315
+65.0
+
+
+-- !query
+SELECT STDDEV_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n)
+-- !query schema
+struct<stddev_pop(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+0.0
+106.14455552060438
+117.76565713313879
+147.32277488562315
+147.32277488562315
+65.0
+
+
+-- !query
+SELECT STDDEV_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n)
+-- !query schema
+struct<stddev_pop(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+0.0
+106.14455552060438
+117.76565713313879
+147.32277488562315
+147.32277488562315
+65.0
+
+
+-- !query
+SELECT STDDEV_POP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n)
+-- !query schema
+struct<stddev_pop(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+0.0
+106.14455552060438
+117.76565713313879
+147.32277488562315
+147.32277488562315
+65.0
+
+
+-- !query
+SELECT STDDEV_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n)
+-- !query schema
+struct<stddev_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+130.0
+135.9840676942217
+164.7118696390761
+164.7118696390761
+91.92388155425118
+NaN
+
+
+-- !query
+SELECT STDDEV_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n)
+-- !query schema
+struct<stddev_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+130.0
+135.9840676942217
+164.7118696390761
+164.7118696390761
+91.92388155425118
+NaN
+
+
+-- !query
+SELECT STDDEV_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n)
+-- !query schema
+struct<stddev_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+130.0
+135.9840676942217
+164.7118696390761
+164.7118696390761
+91.92388155425118
+NaN
+
+
+-- !query
+SELECT STDDEV_SAMP(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(1,NULL),(2,600),(3,470),(4,170),(5,430),(6,300)) r(i,n)
+-- !query schema
+struct<stddev_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+130.0
+135.9840676942217
+164.7118696390761
+164.7118696390761
+91.92388155425118
+NaN
+
+
+-- !query
+SELECT STDDEV(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(0,NULL),(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<stddev(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+130.0
+135.9840676942217
+164.7118696390761
+164.7118696390761
+91.92388155425118
+NaN
+
+
+-- !query
+SELECT STDDEV(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(0,NULL),(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<stddev(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+130.0
+135.9840676942217
+164.7118696390761
+164.7118696390761
+91.92388155425118
+NaN
+
+
+-- !query
+SELECT STDDEV(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(0,NULL),(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<stddev(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+130.0
+135.9840676942217
+164.7118696390761
+164.7118696390761
+91.92388155425118
+NaN
+
+
+-- !query
+SELECT STDDEV(n) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+  FROM (VALUES(0,NULL),(1,600),(2,470),(3,170),(4,430),(5,300)) r(i,n)
+-- !query schema
+struct<stddev(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING):double>
+-- !query output
+130.0
+135.9840676942217
+164.7118696390761
+164.7118696390761
+91.92388155425118
+NaN
+
+
+-- !query
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND CURRENT ROW)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,sum(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND CURRENT ROW):bigint>
+-- !query output
+1	1
+2	2
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
+-- !query schema
+struct<i:int,sum(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
+-- !query output
+1	3
+2	2
+3	NULL
+4	NULL
+
+
+-- !query
+SELECT i,SUM(v) OVER (ORDER BY i ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+  FROM (VALUES(1,1),(2,2),(3,3),(4,4)) t(i,v)
+-- !query schema
+struct<i:int,sum(v) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING):bigint>
+-- !query output
+1	3
+2	6
+3	9
+4	7
+
+
+-- !query
+SELECT a, b,
+       SUM(b) OVER(ORDER BY A ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)
+FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+failed to evaluate expression CAST('nan' AS INT): invalid input syntax for type numeric: nan; line 3 pos 6
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
similarity index 60%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/with.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
index 91b0ff20b6ab0..badafc9e659e2 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/with.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
@@ -2,134 +2,134 @@
 -- Number of queries: 51
 
 
--- !query 0
+-- !query
 WITH q1(x,y) AS (SELECT 1,2)
 SELECT * FROM q1, q1 AS q2
--- !query 0 schema
+-- !query schema
 struct<x:int,y:int,x:int,y:int>
--- !query 0 output
+-- !query output
 1	2	1	2
 
 
--- !query 1
+-- !query
 SELECT count(*) FROM (
   WITH q1(x) AS (SELECT rand() FROM (SELECT EXPLODE(SEQUENCE(1, 5))))
     SELECT * FROM q1
   UNION
     SELECT * FROM q1
 ) ss
--- !query 1 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 1 output
+-- !query output
 10
 
 
--- !query 2
+-- !query
 CREATE TABLE department (
 	id INTEGER,  -- department ID
 	parent_department INTEGER, -- upper department ID
 	name string -- department name
 ) USING parquet
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 INSERT INTO department VALUES (0, NULL, 'ROOT')
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 INSERT INTO department VALUES (1, 0, 'A')
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 INSERT INTO department VALUES (2, 1, 'B')
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 INSERT INTO department VALUES (3, 2, 'C')
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 INSERT INTO department VALUES (4, 2, 'D')
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 INSERT INTO department VALUES (5, 0, 'E')
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 INSERT INTO department VALUES (6, 4, 'F')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 INSERT INTO department VALUES (7, 5, 'G')
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 CREATE TABLE tree(
     id INTEGER,
     parent_id INTEGER
 ) USING parquet
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 INSERT INTO tree
 VALUES (1, NULL), (2, 1), (3,1), (4,2), (5,2), (6,2), (7,3), (8,3),
        (9,4), (10,4), (11,7), (12,7), (13,7), (14, 9), (15,11), (16,11)
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 create table graph( f int, t int, label string ) USING parquet
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
+-- !query
 insert into graph values
 	(1, 2, 'arc 1 -> 2'),
 	(1, 3, 'arc 1 -> 3'),
@@ -137,61 +137,61 @@ insert into graph values
 	(1, 4, 'arc 1 -> 4'),
 	(4, 5, 'arc 4 -> 5'),
 	(5, 1, 'arc 5 -> 1')
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 CREATE TABLE y (a INTEGER) USING parquet
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 
 
 
--- !query 16
+-- !query
 INSERT INTO y SELECT EXPLODE(SEQUENCE(1, 10))
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 
 
 
--- !query 17
+-- !query
 DROP TABLE y
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 
 
 
--- !query 18
+-- !query
 CREATE TABLE y (a INTEGER) USING parquet
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 INSERT INTO y SELECT EXPLODE(SEQUENCE(1, 10))
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 with cte(foo) as ( select 42 ) select * from ((select foo from cte)) q
--- !query 20 schema
+-- !query schema
 struct<foo:int>
--- !query 20 output
+-- !query output
 42
 
 
--- !query 21
+-- !query
 WITH outermost(x) AS (
   SELECT 1
   UNION (WITH innermost as (SELECT 2)
@@ -199,15 +199,15 @@ WITH outermost(x) AS (
          UNION SELECT 3)
 )
 SELECT * FROM outermost ORDER BY 1
--- !query 21 schema
+-- !query schema
 struct<x:int>
--- !query 21 output
+-- !query output
 1
 2
 3
 
 
--- !query 22
+-- !query
 WITH outermost(x) AS (
   SELECT 1
   UNION (WITH innermost as (SELECT 2)
@@ -215,26 +215,26 @@ WITH outermost(x) AS (
          UNION SELECT * FROM innermost)
 )
 SELECT * FROM outermost ORDER BY 1
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Table or view not found: outermost; line 4 pos 23
 
 
--- !query 23
+-- !query
 CREATE TABLE withz USING parquet AS SELECT i AS k, CAST(i || ' v' AS string) v FROM (SELECT EXPLODE(SEQUENCE(1, 16, 3)) i)
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 
 
 
--- !query 24
+-- !query
 SELECT * FROM withz ORDER BY k
--- !query 24 schema
+-- !query schema
 struct<k:int,v:string>
--- !query 24 output
+-- !query output
 1	1 v
 4	4 v
 7	7 v
@@ -243,111 +243,111 @@ struct<k:int,v:string>
 16	16 v
 
 
--- !query 25
+-- !query
 DROP TABLE withz
--- !query 25 schema
+-- !query schema
 struct<>
--- !query 25 output
+-- !query output
 
 
 
--- !query 26
+-- !query
 TRUNCATE TABLE y
--- !query 26 schema
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 
 
 
--- !query 27
+-- !query
 INSERT INTO y SELECT EXPLODE(SEQUENCE(1, 3))
--- !query 27 schema
+-- !query schema
 struct<>
--- !query 27 output
+-- !query output
 
 
 
--- !query 28
+-- !query
 CREATE TABLE yy (a INTEGER) USING parquet
--- !query 28 schema
+-- !query schema
 struct<>
--- !query 28 output
+-- !query output
 
 
 
--- !query 29
+-- !query
 SELECT * FROM y
--- !query 29 schema
+-- !query schema
 struct<a:int>
--- !query 29 output
+-- !query output
 1
 2
 3
 
 
--- !query 30
+-- !query
 SELECT * FROM yy
--- !query 30 schema
+-- !query schema
 struct<a:int>
--- !query 30 output
+-- !query output
 
 
 
--- !query 31
+-- !query
 SELECT * FROM y
--- !query 31 schema
+-- !query schema
 struct<a:int>
--- !query 31 output
+-- !query output
 1
 2
 3
 
 
--- !query 32
+-- !query
 SELECT * FROM yy
--- !query 32 schema
+-- !query schema
 struct<a:int>
--- !query 32 output
+-- !query output
 
 
 
--- !query 33
+-- !query
 CREATE TABLE parent ( id int, val string ) USING parquet
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 
 
 
--- !query 34
+-- !query
 INSERT INTO parent VALUES ( 1, 'p1' )
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 
 
 
--- !query 35
+-- !query
 SELECT * FROM parent
--- !query 35 schema
+-- !query schema
 struct<id:int,val:string>
--- !query 35 output
+-- !query output
 1	p1
 
 
--- !query 36
+-- !query
 SELECT * FROM parent
--- !query 36 schema
+-- !query schema
 struct<id:int,val:string>
--- !query 36 output
+-- !query output
 1	p1
 
 
--- !query 37
+-- !query
 create table foo (with baz)
--- !query 37 schema
+-- !query schema
 struct<>
--- !query 37 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 no viable alternative at input 'with'(line 1, pos 18)
@@ -357,11 +357,11 @@ create table foo (with baz)
 ------------------^^^
 
 
--- !query 38
+-- !query
 create table foo (with ordinality)
--- !query 38 schema
+-- !query schema
 struct<>
--- !query 38 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 no viable alternative at input 'with'(line 1, pos 18)
@@ -371,98 +371,98 @@ create table foo (with ordinality)
 ------------------^^^
 
 
--- !query 39
+-- !query
 with ordinality as (select 1 as x) select * from ordinality
--- !query 39 schema
+-- !query schema
 struct<x:int>
--- !query 39 output
+-- !query output
 1
 
 
--- !query 40
+-- !query
 WITH test AS (SELECT 42) INSERT INTO test VALUES (1)
--- !query 40 schema
+-- !query schema
 struct<>
--- !query 40 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Table not found: test;
 
 
--- !query 41
+-- !query
 create table test (i int) USING parquet
--- !query 41 schema
+-- !query schema
 struct<>
--- !query 41 output
+-- !query output
 
 
 
--- !query 42
+-- !query
 with test as (select 42) insert into test select * from test
--- !query 42 schema
+-- !query schema
 struct<>
--- !query 42 output
+-- !query output
 
 
 
--- !query 43
+-- !query
 select * from test
--- !query 43 schema
+-- !query schema
 struct<i:int>
--- !query 43 output
+-- !query output
 42
 
 
--- !query 44
+-- !query
 drop table test
--- !query 44 schema
+-- !query schema
 struct<>
--- !query 44 output
+-- !query output
 
 
 
--- !query 45
+-- !query
 DROP TABLE department
--- !query 45 schema
+-- !query schema
 struct<>
--- !query 45 output
+-- !query output
 
 
 
--- !query 46
+-- !query
 DROP TABLE tree
--- !query 46 schema
+-- !query schema
 struct<>
--- !query 46 output
+-- !query output
 
 
 
--- !query 47
+-- !query
 DROP TABLE graph
--- !query 47 schema
+-- !query schema
 struct<>
--- !query 47 output
+-- !query output
 
 
 
--- !query 48
+-- !query
 DROP TABLE y
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 
 
 
--- !query 49
+-- !query
 DROP TABLE yy
--- !query 49 schema
+-- !query schema
 struct<>
--- !query 49 output
+-- !query output
 
 
 
--- !query 50
+-- !query
 DROP TABLE parent
--- !query 50 schema
+-- !query schema
 struct<>
--- !query 50 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out b/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out
index 1b8ddbe4c7211..a64b8d3f6632d 100644
--- a/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out
@@ -2,39 +2,39 @@
 -- Number of queries: 4
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW tbl_a AS VALUES (1, 1), (2, 1), (3, 6) AS T(c1, c2)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW tbl_b AS VALUES 1 AS T(c1)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT *
 FROM   tbl_a
        LEFT ANTI JOIN tbl_b ON ((tbl_a.c1 = tbl_a.c2) IS NULL OR tbl_a.c1 = tbl_a.c2)
--- !query 2 schema
+-- !query schema
 struct<c1:int,c2:int>
--- !query 2 output
+-- !query output
 2	1
 3	6
 
 
--- !query 3
+-- !query
 SELECT l.c1, l.c2
 FROM   tbl_a l
 WHERE  EXISTS (SELECT 1 FROM tbl_b r WHERE l.c1 = l.c2) OR l.c2 < 2
--- !query 3 schema
+-- !query schema
 struct<c1:int,c2:int>
--- !query 3 output
+-- !query output
 1	1
 2	1
diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
index d38cab8fa7862..08cc6fa993e0b 100644
--- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
@@ -2,297 +2,297 @@
 -- Number of queries: 37
 
 
--- !query 0
+-- !query
 select 1 = 1
--- !query 0 schema
+-- !query schema
 struct<(1 = 1):boolean>
--- !query 0 output
+-- !query output
 true
 
 
--- !query 1
+-- !query
 select 1 = '1'
--- !query 1 schema
+-- !query schema
 struct<(1 = CAST(1 AS INT)):boolean>
--- !query 1 output
+-- !query output
 true
 
 
--- !query 2
+-- !query
 select 1.0 = '1'
--- !query 2 schema
+-- !query schema
 struct<(CAST(1.0 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 2 output
+-- !query output
 true
 
 
--- !query 3
+-- !query
 select 1.5 = '1.51'
--- !query 3 schema
+-- !query schema
 struct<(CAST(1.5 AS DOUBLE) = CAST(1.51 AS DOUBLE)):boolean>
--- !query 3 output
+-- !query output
 false
 
 
--- !query 4
+-- !query
 select 1 > '1'
--- !query 4 schema
+-- !query schema
 struct<(1 > CAST(1 AS INT)):boolean>
--- !query 4 output
+-- !query output
 false
 
 
--- !query 5
+-- !query
 select 2 > '1.0'
--- !query 5 schema
+-- !query schema
 struct<(2 > CAST(1.0 AS INT)):boolean>
--- !query 5 output
+-- !query output
 true
 
 
--- !query 6
+-- !query
 select 2 > '2.0'
--- !query 6 schema
+-- !query schema
 struct<(2 > CAST(2.0 AS INT)):boolean>
--- !query 6 output
+-- !query output
 false
 
 
--- !query 7
+-- !query
 select 2 > '2.2'
--- !query 7 schema
+-- !query schema
 struct<(2 > CAST(2.2 AS INT)):boolean>
--- !query 7 output
+-- !query output
 false
 
 
--- !query 8
+-- !query
 select '1.5' > 0.5
--- !query 8 schema
+-- !query schema
 struct<(CAST(1.5 AS DOUBLE) > CAST(0.5 AS DOUBLE)):boolean>
--- !query 8 output
+-- !query output
 true
 
 
--- !query 9
+-- !query
 select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')
--- !query 9 schema
+-- !query schema
 struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean>
--- !query 9 output
+-- !query output
 false
 
 
--- !query 10
+-- !query
 select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'
--- !query 10 schema
+-- !query schema
 struct<(to_date('2009-07-30 04:17:52') > CAST(2009-07-30 04:17:52 AS DATE)):boolean>
--- !query 10 output
+-- !query output
 false
 
 
--- !query 11
+-- !query
 select 1 >= '1'
--- !query 11 schema
+-- !query schema
 struct<(1 >= CAST(1 AS INT)):boolean>
--- !query 11 output
+-- !query output
 true
 
 
--- !query 12
+-- !query
 select 2 >= '1.0'
--- !query 12 schema
+-- !query schema
 struct<(2 >= CAST(1.0 AS INT)):boolean>
--- !query 12 output
+-- !query output
 true
 
 
--- !query 13
+-- !query
 select 2 >= '2.0'
--- !query 13 schema
+-- !query schema
 struct<(2 >= CAST(2.0 AS INT)):boolean>
--- !query 13 output
+-- !query output
 true
 
 
--- !query 14
+-- !query
 select 2.0 >= '2.2'
--- !query 14 schema
+-- !query schema
 struct<(CAST(2.0 AS DOUBLE) >= CAST(2.2 AS DOUBLE)):boolean>
--- !query 14 output
+-- !query output
 false
 
 
--- !query 15
+-- !query
 select '1.5' >= 0.5
--- !query 15 schema
+-- !query schema
 struct<(CAST(1.5 AS DOUBLE) >= CAST(0.5 AS DOUBLE)):boolean>
--- !query 15 output
+-- !query output
 true
 
 
--- !query 16
+-- !query
 select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')
--- !query 16 schema
+-- !query schema
 struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean>
--- !query 16 output
+-- !query output
 true
 
 
--- !query 17
+-- !query
 select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'
--- !query 17 schema
+-- !query schema
 struct<(to_date('2009-07-30 04:17:52') >= CAST(2009-07-30 04:17:52 AS DATE)):boolean>
--- !query 17 output
+-- !query output
 true
 
 
--- !query 18
+-- !query
 select 1 < '1'
--- !query 18 schema
+-- !query schema
 struct<(1 < CAST(1 AS INT)):boolean>
--- !query 18 output
+-- !query output
 false
 
 
--- !query 19
+-- !query
 select 2 < '1.0'
--- !query 19 schema
+-- !query schema
 struct<(2 < CAST(1.0 AS INT)):boolean>
--- !query 19 output
+-- !query output
 false
 
 
--- !query 20
+-- !query
 select 2 < '2.0'
--- !query 20 schema
+-- !query schema
 struct<(2 < CAST(2.0 AS INT)):boolean>
--- !query 20 output
+-- !query output
 false
 
 
--- !query 21
+-- !query
 select 2.0 < '2.2'
--- !query 21 schema
+-- !query schema
 struct<(CAST(2.0 AS DOUBLE) < CAST(2.2 AS DOUBLE)):boolean>
--- !query 21 output
+-- !query output
 true
 
 
--- !query 22
+-- !query
 select 0.5 < '1.5'
--- !query 22 schema
+-- !query schema
 struct<(CAST(0.5 AS DOUBLE) < CAST(1.5 AS DOUBLE)):boolean>
--- !query 22 output
+-- !query output
 true
 
 
--- !query 23
+-- !query
 select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')
--- !query 23 schema
+-- !query schema
 struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean>
--- !query 23 output
+-- !query output
 false
 
 
--- !query 24
+-- !query
 select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'
--- !query 24 schema
+-- !query schema
 struct<(to_date('2009-07-30 04:17:52') < CAST(2009-07-30 04:17:52 AS DATE)):boolean>
--- !query 24 output
+-- !query output
 false
 
 
--- !query 25
+-- !query
 select 1 <= '1'
--- !query 25 schema
+-- !query schema
 struct<(1 <= CAST(1 AS INT)):boolean>
--- !query 25 output
+-- !query output
 true
 
 
--- !query 26
+-- !query
 select 2 <= '1.0'
--- !query 26 schema
+-- !query schema
 struct<(2 <= CAST(1.0 AS INT)):boolean>
--- !query 26 output
+-- !query output
 false
 
 
--- !query 27
+-- !query
 select 2 <= '2.0'
--- !query 27 schema
+-- !query schema
 struct<(2 <= CAST(2.0 AS INT)):boolean>
--- !query 27 output
+-- !query output
 true
 
 
--- !query 28
+-- !query
 select 2.0 <= '2.2'
--- !query 28 schema
+-- !query schema
 struct<(CAST(2.0 AS DOUBLE) <= CAST(2.2 AS DOUBLE)):boolean>
--- !query 28 output
+-- !query output
 true
 
 
--- !query 29
+-- !query
 select 0.5 <= '1.5'
--- !query 29 schema
+-- !query schema
 struct<(CAST(0.5 AS DOUBLE) <= CAST(1.5 AS DOUBLE)):boolean>
--- !query 29 output
+-- !query output
 true
 
 
--- !query 30
+-- !query
 select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')
--- !query 30 schema
+-- !query schema
 struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean>
--- !query 30 output
+-- !query output
 true
 
 
--- !query 31
+-- !query
 select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'
--- !query 31 schema
+-- !query schema
 struct<(to_date('2009-07-30 04:17:52') <= CAST(2009-07-30 04:17:52 AS DATE)):boolean>
--- !query 31 output
+-- !query output
 true
 
 
--- !query 32
+-- !query
 select to_date('2017-03-01') = to_timestamp('2017-03-01 00:00:00')
--- !query 32 schema
+-- !query schema
 struct<(CAST(to_date('2017-03-01') AS TIMESTAMP) = to_timestamp('2017-03-01 00:00:00')):boolean>
--- !query 32 output
+-- !query output
 true
 
 
--- !query 33
+-- !query
 select to_timestamp('2017-03-01 00:00:01') > to_date('2017-03-01')
--- !query 33 schema
+-- !query schema
 struct<(to_timestamp('2017-03-01 00:00:01') > CAST(to_date('2017-03-01') AS TIMESTAMP)):boolean>
--- !query 33 output
+-- !query output
 true
 
 
--- !query 34
+-- !query
 select to_timestamp('2017-03-01 00:00:01') >= to_date('2017-03-01')
--- !query 34 schema
+-- !query schema
 struct<(to_timestamp('2017-03-01 00:00:01') >= CAST(to_date('2017-03-01') AS TIMESTAMP)):boolean>
--- !query 34 output
+-- !query output
 true
 
 
--- !query 35
+-- !query
 select to_date('2017-03-01') < to_timestamp('2017-03-01 00:00:01')
--- !query 35 schema
+-- !query schema
 struct<(CAST(to_date('2017-03-01') AS TIMESTAMP) < to_timestamp('2017-03-01 00:00:01')):boolean>
--- !query 35 output
+-- !query output
 true
 
 
--- !query 36
+-- !query
 select to_date('2017-03-01') <= to_timestamp('2017-03-01 00:00:01')
--- !query 36 schema
+-- !query schema
 struct<(CAST(to_date('2017-03-01') AS TIMESTAMP) <= to_timestamp('2017-03-01 00:00:01')):boolean>
--- !query 36 output
+-- !query output
 true
diff --git a/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out b/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out
index 2dade86f35df9..2e93ee286fd47 100644
--- a/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out
@@ -2,312 +2,312 @@
 -- Number of queries: 34
 
 
--- !query 0
+-- !query
 set spark.sql.parser.quotedRegexColumnNames=false
--- !query 0 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 0 output
+-- !query output
 spark.sql.parser.quotedRegexColumnNames	false
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, "1", "11"), (2, "2", "22"), (3, "3", "33"), (4, "4", "44"), (5, "5", "55"), (6, "6", "66")
 AS testData(key, value1, value2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData2 AS SELECT * FROM VALUES
 (1, 1, 1, 2), (1, 2, 1, 2), (2, 1, 2, 3), (2, 2, 2, 3), (3, 1, 3, 4), (3, 2, 3, 4)
 AS testData2(A, B, c, d)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT `(a)?+.+` FROM testData2 WHERE a = 1
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`(a)?+.+`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 7
 
 
--- !query 4
+-- !query
 SELECT t.`(a)?+.+` FROM testData2 t WHERE a = 1
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 't.`(a)?+.+`' given input columns: [t.A, t.B, t.c, t.d]; line 1 pos 7
 
 
--- !query 5
+-- !query
 SELECT `(a|b)` FROM testData2 WHERE a = 2
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`(a|b)`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 7
 
 
--- !query 6
+-- !query
 SELECT `(a|b)?+.+` FROM testData2 WHERE a = 2
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`(a|b)?+.+`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 7
 
 
--- !query 7
+-- !query
 SELECT SUM(`(a|b)?+.+`) FROM testData2
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`(a|b)?+.+`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 11
 
 
--- !query 8
+-- !query
 SELECT SUM(`(a)`) FROM testData2
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`(a)`' given input columns: [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 11
 
 
--- !query 9
+-- !query
 set spark.sql.parser.quotedRegexColumnNames=true
--- !query 9 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 9 output
+-- !query output
 spark.sql.parser.quotedRegexColumnNames	true
 
 
--- !query 10
+-- !query
 SELECT `(a)?+.+` FROM testData2 WHERE a = 1
--- !query 10 schema
+-- !query schema
 struct<B:int,c:int,d:int>
--- !query 10 output
+-- !query output
 1	1	2
 2	1	2
 
 
--- !query 11
+-- !query
 SELECT `(A)?+.+` FROM testData2 WHERE a = 1
--- !query 11 schema
+-- !query schema
 struct<B:int,c:int,d:int>
--- !query 11 output
+-- !query output
 1	1	2
 2	1	2
 
 
--- !query 12
+-- !query
 SELECT t.`(a)?+.+` FROM testData2 t WHERE a = 1
--- !query 12 schema
+-- !query schema
 struct<B:int,c:int,d:int>
--- !query 12 output
+-- !query output
 1	1	2
 2	1	2
 
 
--- !query 13
+-- !query
 SELECT t.`(A)?+.+` FROM testData2 t WHERE a = 1
--- !query 13 schema
+-- !query schema
 struct<B:int,c:int,d:int>
--- !query 13 output
+-- !query output
 1	1	2
 2	1	2
 
 
--- !query 14
+-- !query
 SELECT `(a|B)` FROM testData2 WHERE a = 2
--- !query 14 schema
+-- !query schema
 struct<A:int,B:int>
--- !query 14 output
+-- !query output
 2	1
 2	2
 
 
--- !query 15
+-- !query
 SELECT `(A|b)` FROM testData2 WHERE a = 2
--- !query 15 schema
+-- !query schema
 struct<A:int,B:int>
--- !query 15 output
+-- !query output
 2	1
 2	2
 
 
--- !query 16
+-- !query
 SELECT `(a|B)?+.+` FROM testData2 WHERE a = 2
--- !query 16 schema
+-- !query schema
 struct<c:int,d:int>
--- !query 16 output
+-- !query output
 2	3
 2	3
 
 
--- !query 17
+-- !query
 SELECT `(A|b)?+.+` FROM testData2 WHERE a = 2
--- !query 17 schema
+-- !query schema
 struct<c:int,d:int>
--- !query 17 output
+-- !query output
 2	3
 2	3
 
 
--- !query 18
+-- !query
 SELECT `(e|f)` FROM testData2
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 SELECT t.`(e|f)` FROM testData2 t
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 SELECT p.`(KEY)?+.+`, b, testdata2.`(b)?+.+` FROM testData p join testData2 ON p.key = testData2.a WHERE key < 3
--- !query 20 schema
+-- !query schema
 struct<value1:string,value2:string,b:int,A:int,c:int,d:int>
--- !query 20 output
+-- !query output
 1	11	1	1	1	2
 1	11	2	1	1	2
 2	22	1	2	2	3
 2	22	2	2	2	3
 
 
--- !query 21
+-- !query
 SELECT p.`(key)?+.+`, b, testdata2.`(b)?+.+` FROM testData p join testData2 ON p.key = testData2.a WHERE key < 3
--- !query 21 schema
+-- !query schema
 struct<value1:string,value2:string,b:int,A:int,c:int,d:int>
--- !query 21 output
+-- !query output
 1	11	1	1	1	2
 1	11	2	1	1	2
 2	22	1	2	2	3
 2	22	2	2	2	3
 
 
--- !query 22
+-- !query
 set spark.sql.caseSensitive=true
--- !query 22 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 22 output
+-- !query output
 spark.sql.caseSensitive	true
 
 
--- !query 23
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testdata3 AS SELECT * FROM VALUES
 (0, 1), (1, 2), (2, 3), (3, 4)
 AS testdata3(a, b)
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 
 
 
--- !query 24
+-- !query
 SELECT `(A)?+.+` FROM testdata3
--- !query 24 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 24 output
+-- !query output
 0	1
 1	2
 2	3
 3	4
 
 
--- !query 25
+-- !query
 SELECT `(a)?+.+` FROM testdata3
--- !query 25 schema
+-- !query schema
 struct<b:int>
--- !query 25 output
+-- !query output
 1
 2
 3
 4
 
 
--- !query 26
+-- !query
 SELECT `(A)?+.+` FROM testdata3 WHERE a > 1
--- !query 26 schema
+-- !query schema
 struct<a:int,b:int>
--- !query 26 output
+-- !query output
 2	3
 3	4
 
 
--- !query 27
+-- !query
 SELECT `(a)?+.+` FROM testdata3 where `a` > 1
--- !query 27 schema
+-- !query schema
 struct<b:int>
--- !query 27 output
+-- !query output
 3
 4
 
 
--- !query 28
+-- !query
 SELECT SUM(`a`) FROM testdata3
--- !query 28 schema
+-- !query schema
 struct<sum(a):bigint>
--- !query 28 output
+-- !query output
 6
 
 
--- !query 29
+-- !query
 SELECT SUM(`(a)`) FROM testdata3
--- !query 29 schema
+-- !query schema
 struct<sum(a):bigint>
--- !query 29 output
+-- !query output
 6
 
 
--- !query 30
+-- !query
 SELECT SUM(`(a)?+.+`) FROM testdata3
--- !query 30 schema
+-- !query schema
 struct<sum(b):bigint>
--- !query 30 output
+-- !query output
 10
 
 
--- !query 31
+-- !query
 SELECT SUM(a) FROM testdata3 GROUP BY `a`
--- !query 31 schema
+-- !query schema
 struct<sum(a):bigint>
--- !query 31 output
+-- !query output
 0
 1
 2
 3
 
 
--- !query 32
+-- !query
 SELECT SUM(a) FROM testdata3 GROUP BY `(a)`
--- !query 32 schema
+-- !query schema
 struct<>
--- !query 32 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`(a)`' given input columns: [testdata3.a, testdata3.b]; line 1 pos 38
 
 
--- !query 33
+-- !query
 SELECT SUM(a) FROM testdata3 GROUP BY `(a)?+.+`
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`(a)?+.+`' given input columns: [testdata3.a, testdata3.b]; line 1 pos 38
diff --git a/sql/core/src/test/resources/sql-tests/results/random.sql.out b/sql/core/src/test/resources/sql-tests/results/random.sql.out
index acd0609aabb16..9d00a82b76780 100644
--- a/sql/core/src/test/resources/sql-tests/results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/random.sql.out
@@ -2,83 +2,83 @@
 -- Number of queries: 10
 
 
--- !query 0
+-- !query
 SELECT rand(0)
--- !query 0 schema
+-- !query schema
 struct<rand(0):double>
--- !query 0 output
+-- !query output
 0.7604953758285915
 
 
--- !query 1
+-- !query
 SELECT rand(cast(3 / 7 AS int))
--- !query 1 schema
+-- !query schema
 struct<rand(CAST((CAST(3 AS DOUBLE) / CAST(7 AS DOUBLE)) AS INT)):double>
--- !query 1 output
+-- !query output
 0.7604953758285915
 
 
--- !query 2
+-- !query
 SELECT rand(NULL)
--- !query 2 schema
+-- !query schema
 struct<rand(CAST(NULL AS INT)):double>
--- !query 2 output
+-- !query output
 0.7604953758285915
 
 
--- !query 3
+-- !query
 SELECT rand(cast(NULL AS int))
--- !query 3 schema
+-- !query schema
 struct<rand(CAST(NULL AS INT)):double>
--- !query 3 output
+-- !query output
 0.7604953758285915
 
 
--- !query 4
+-- !query
 SELECT rand(1.0)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'rand(1.0BD)' due to data type mismatch: argument 1 requires (int or bigint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
 
 
--- !query 5
+-- !query
 SELECT randn(0L)
--- !query 5 schema
+-- !query schema
 struct<randn(0):double>
--- !query 5 output
+-- !query output
 1.6034991609278433
 
 
--- !query 6
+-- !query
 SELECT randn(cast(3 / 7 AS long))
--- !query 6 schema
+-- !query schema
 struct<randn(CAST((CAST(3 AS DOUBLE) / CAST(7 AS DOUBLE)) AS BIGINT)):double>
--- !query 6 output
+-- !query output
 1.6034991609278433
 
 
--- !query 7
+-- !query
 SELECT randn(NULL)
--- !query 7 schema
+-- !query schema
 struct<randn(CAST(NULL AS INT)):double>
--- !query 7 output
+-- !query output
 1.6034991609278433
 
 
--- !query 8
+-- !query
 SELECT randn(cast(NULL AS long))
--- !query 8 schema
+-- !query schema
 struct<randn(CAST(NULL AS BIGINT)):double>
--- !query 8 output
+-- !query output
 1.6034991609278433
 
 
--- !query 9
+-- !query
 SELECT rand('1')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'rand('1')' due to data type mismatch: argument 1 requires (int or bigint) type, however, ''1'' is of string type.; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
new file mode 100644
index 0000000000000..c92c1ddca774f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
@@ -0,0 +1,69 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 8
+
+
+-- !query
+SELECT regexp_extract('1a 2b 14m', '\\d+')
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+Regex group count is 0, but the specified group index is 1
+
+
+-- !query
+SELECT regexp_extract('1a 2b 14m', '\\d+', 0)
+-- !query schema
+struct<regexp_extract(1a 2b 14m, \d+, 0):string>
+-- !query output
+1
+
+
+-- !query
+SELECT regexp_extract('1a 2b 14m', '\\d+', 1)
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+Regex group count is 0, but the specified group index is 1
+
+
+-- !query
+SELECT regexp_extract('1a 2b 14m', '\\d+', 2)
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+Regex group count is 0, but the specified group index is 2
+
+
+-- !query
+SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)')
+-- !query schema
+struct<regexp_extract(1a 2b 14m, (\d+)([a-z]+), 1):string>
+-- !query output
+1
+
+
+-- !query
+SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 0)
+-- !query schema
+struct<regexp_extract(1a 2b 14m, (\d+)([a-z]+), 0):string>
+-- !query output
+1a
+
+
+-- !query
+SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 1)
+-- !query schema
+struct<regexp_extract(1a 2b 14m, (\d+)([a-z]+), 1):string>
+-- !query output
+1
+
+
+-- !query
+SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 2)
+-- !query schema
+struct<regexp_extract(1a 2b 14m, (\d+)([a-z]+), 2):string>
+-- !query output
+a
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index 1faf16cc30509..e8ee07171651d 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -1,222 +1,405 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 24
+-- Number of queries: 41
 
 
--- !query 0
+-- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SHOW CREATE TABLE tbl
--- !query 1 schema
+-- !query schema
 struct<createtab_stmt:string>
--- !query 1 output
-CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+-- !query output
+CREATE TABLE `tbl` (
+  `a` INT,
+  `b` STRING,
+  `c` INT)
 USING parquet
 
 
--- !query 2
+-- !query
 DROP TABLE tbl
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 OPTIONS ('a' 1)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 SHOW CREATE TABLE tbl
--- !query 4 schema
+-- !query schema
 struct<createtab_stmt:string>
--- !query 4 output
-CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+-- !query output
+CREATE TABLE `tbl` (
+  `a` INT,
+  `b` STRING,
+  `c` INT)
 USING parquet
 OPTIONS (
-  `a` '1'
-)
+  `a` '1')
 
 
--- !query 5
+-- !query
 DROP TABLE tbl
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 OPTIONS ('path' '/path/to/table')
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 SHOW CREATE TABLE tbl
--- !query 7 schema
+-- !query schema
 struct<createtab_stmt:string>
--- !query 7 output
-CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+-- !query output
+CREATE TABLE `tbl` (
+  `a` INT,
+  `b` STRING,
+  `c` INT)
 USING parquet
 LOCATION 'file:/path/to/table'
 
 
--- !query 8
+-- !query
 DROP TABLE tbl
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 LOCATION '/path/to/table'
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 SHOW CREATE TABLE tbl
--- !query 10 schema
+-- !query schema
 struct<createtab_stmt:string>
--- !query 10 output
-CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+-- !query output
+CREATE TABLE `tbl` (
+  `a` INT,
+  `b` STRING,
+  `c` INT)
 USING parquet
 LOCATION 'file:/path/to/table'
 
 
--- !query 11
+-- !query
 DROP TABLE tbl
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 PARTITIONED BY (a)
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 SHOW CREATE TABLE tbl
--- !query 13 schema
+-- !query schema
 struct<createtab_stmt:string>
--- !query 13 output
-CREATE TABLE `tbl` (`b` STRING, `c` INT, `a` INT)
+-- !query output
+CREATE TABLE `tbl` (
+  `b` STRING,
+  `c` INT,
+  `a` INT)
 USING parquet
 PARTITIONED BY (a)
 
 
--- !query 14
+-- !query
 DROP TABLE tbl
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 
 
 
--- !query 16
+-- !query
 SHOW CREATE TABLE tbl
--- !query 16 schema
+-- !query schema
 struct<createtab_stmt:string>
--- !query 16 output
-CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+-- !query output
+CREATE TABLE `tbl` (
+  `a` INT,
+  `b` STRING,
+  `c` INT)
 USING parquet
 CLUSTERED BY (a)
 SORTED BY (b)
 INTO 2 BUCKETS
 
 
--- !query 17
+-- !query
 DROP TABLE tbl
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 
 
 
--- !query 18
+-- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 COMMENT 'This is a comment'
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 SHOW CREATE TABLE tbl
--- !query 19 schema
+-- !query schema
 struct<createtab_stmt:string>
--- !query 19 output
-CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+-- !query output
+CREATE TABLE `tbl` (
+  `a` INT,
+  `b` STRING,
+  `c` INT)
 USING parquet
 COMMENT 'This is a comment'
 
 
--- !query 20
+-- !query
 DROP TABLE tbl
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 TBLPROPERTIES ('a' = '1')
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
 
 
--- !query 22
+-- !query
 SHOW CREATE TABLE tbl
--- !query 22 schema
+-- !query schema
 struct<createtab_stmt:string>
--- !query 22 output
-CREATE TABLE `tbl` (`a` INT, `b` STRING, `c` INT)
+-- !query output
+CREATE TABLE `tbl` (
+  `a` INT,
+  `b` STRING,
+  `c` INT)
 USING parquet
 TBLPROPERTIES (
-  'a' = '1'
-)
+  'a' = '1')
 
 
--- !query 23
+-- !query
 DROP TABLE tbl
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
+
+
+
+-- !query
+CREATE TABLE tbl (a REAL, b NUMERIC, c NUMERIC(10), d NUMERIC(10,1)) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SHOW CREATE TABLE tbl
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE TABLE `tbl` (
+  `a` FLOAT,
+  `b` DECIMAL(10,0),
+  `c` DECIMAL(10,0),
+  `d` DECIMAL(10,1))
+USING parquet
+
+
+-- !query
+DROP TABLE tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW view_SPARK_30302 (aaa, bbb)
+AS SELECT a, b FROM tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SHOW CREATE TABLE view_SPARK_30302 AS SERDE
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW `view_SPARK_30302`(
+  `aaa`,
+  `bbb`)
+AS SELECT a, b FROM tbl
+
+
+-- !query
+DROP VIEW view_SPARK_30302
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW view_SPARK_30302 (aaa COMMENT 'comment with \'quoted text\' for aaa', bbb)
+COMMENT 'This is a comment with \'quoted text\' for view'
+AS SELECT a, b FROM tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SHOW CREATE TABLE view_SPARK_30302 AS SERDE
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW `view_SPARK_30302`(
+  `aaa` COMMENT 'comment with \'quoted text\' for aaa',
+  `bbb`)
+COMMENT 'This is a comment with \'quoted text\' for view'
+AS SELECT a, b FROM tbl
+
+
+-- !query
+DROP VIEW view_SPARK_30302
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW view_SPARK_30302 (aaa, bbb)
+TBLPROPERTIES ('a' = '1', 'b' = '2')
+AS SELECT a, b FROM tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SHOW CREATE TABLE view_SPARK_30302 AS SERDE
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW `view_SPARK_30302`(
+  `aaa`,
+  `bbb`)
+TBLPROPERTIES (
+  'a' = '1',
+  'b' = '2')
+AS SELECT a, b FROM tbl
+
+
+-- !query
+DROP VIEW view_SPARK_30302
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW view_SPARK_30302 (aaa, bbb)
+AS SELECT a, b FROM tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SHOW CREATE TABLE view_SPARK_30302
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Hive view isn't supported by SHOW CREATE TABLE;
+
+
+-- !query
+DROP VIEW view_SPARK_30302
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE tbl
+-- !query schema
+struct<>
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index f22cb7e200e6c..501e185b07f7a 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -2,67 +2,67 @@
 -- Number of queries: 26
 
 
--- !query 0
+-- !query
 CREATE DATABASE showdb
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 USE showdb
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TABLE show_t1(a String, b Int, c String, d String) USING parquet PARTITIONED BY (c, d)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 ALTER TABLE show_t1 ADD PARTITION (c='Us', d=1)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 CREATE TABLE show_t2(b String, d Int) USING parquet
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 CREATE TEMPORARY VIEW show_t3(e int) USING parquet
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 CREATE GLOBAL TEMP VIEW show_t4 AS SELECT 1 as col1
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 SHOW TABLES
--- !query 7 schema
+-- !query schema
 struct<database:string,tableName:string,isTemporary:boolean>
--- !query 7 output
+-- !query output
 aggtest
 arraydata
 mapdata
@@ -74,11 +74,11 @@ tenk1
 testdata
 
 
--- !query 8
+-- !query
 SHOW TABLES IN showdb
--- !query 8 schema
+-- !query schema
 struct<database:string,tableName:string,isTemporary:boolean>
--- !query 8 output
+-- !query output
 aggtest
 arraydata
 mapdata
@@ -90,40 +90,40 @@ tenk1
 testdata
 
 
--- !query 9
+-- !query
 SHOW TABLES 'show_t*'
--- !query 9 schema
+-- !query schema
 struct<database:string,tableName:string,isTemporary:boolean>
--- !query 9 output
+-- !query output
 show_t1
 show_t2
 show_t3
 
 
--- !query 10
+-- !query
 SHOW TABLES LIKE 'show_t1*|show_t2*'
--- !query 10 schema
+-- !query schema
 struct<database:string,tableName:string,isTemporary:boolean>
--- !query 10 output
+-- !query output
 show_t1
 show_t2
 
 
--- !query 11
+-- !query
 SHOW TABLES IN showdb 'show_t*'
--- !query 11 schema
+-- !query schema
 struct<database:string,tableName:string,isTemporary:boolean>
--- !query 11 output
+-- !query output
 show_t1
 show_t2
 show_t3
 
 
--- !query 12
+-- !query
 SHOW TABLE EXTENDED LIKE 'show_t*'
--- !query 12 schema
+-- !query schema
 struct<database:string,tableName:string,isTemporary:boolean,information:string>
--- !query 12 output
+-- !query output
 	show_t3	true	Table: show_t3
 Created Time [not included in comparison]
 Last Access [not included in comparison]
@@ -140,7 +140,7 @@ Last Access [not included in comparison]
 Created By [not included in comparison]
 Type: MANAGED
 Provider: parquet
-Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t1
+Location [not included in comparison]/{warehouse_dir}/showdb.db/show_t1
 Partition Provider: Catalog
 Partition Columns: [`c`, `d`]
 Schema: root
@@ -157,17 +157,17 @@ Last Access [not included in comparison]
 Created By [not included in comparison]
 Type: MANAGED
 Provider: parquet
-Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t2
+Location [not included in comparison]/{warehouse_dir}/showdb.db/show_t2
 Schema: root
  |-- b: string (nullable = true)
  |-- d: integer (nullable = true)
 
 
--- !query 13
+-- !query
 SHOW TABLE EXTENDED
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 mismatched input '<EOF>' expecting {'FROM', 'IN', 'LIKE'}(line 1, pos 19)
@@ -177,22 +177,22 @@ SHOW TABLE EXTENDED
 -------------------^^^
 
 
--- !query 14
+-- !query
 SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(c='Us', d=1)
--- !query 14 schema
+-- !query schema
 struct<database:string,tableName:string,isTemporary:boolean,information:string>
--- !query 14 output
+-- !query output
 showdb	show_t1	false	Partition Values: [c=Us, d=1]
-Location [not included in comparison]sql/core/spark-warehouse/showdb.db/show_t1/c=Us/d=1
+Location [not included in comparison]/{warehouse_dir}/showdb.db/show_t1/c=Us/d=1
 Created Time [not included in comparison]
 Last Access [not included in comparison]
 
 
--- !query 15
+-- !query
 SHOW TABLE EXTENDED PARTITION(c='Us', d=1)
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 mismatched input 'PARTITION' expecting {'FROM', 'IN', 'LIKE'}(line 1, pos 20)
@@ -202,87 +202,87 @@ SHOW TABLE EXTENDED PARTITION(c='Us', d=1)
 --------------------^^^
 
 
--- !query 16
+-- !query
 SHOW TABLE EXTENDED LIKE 'show_t*' PARTITION(c='Us', d=1)
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 Table or view 'show_t*' not found in database 'showdb';
 
 
--- !query 17
+-- !query
 SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(c='Us')
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`';
 
 
--- !query 18
+-- !query
 SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1)
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Partition spec is invalid. The spec (a, d) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`';
 
 
--- !query 19
+-- !query
 SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(c='Ch', d=1)
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 Partition not found in table 'show_t1' database 'showdb':
 c -> Ch
 d -> 1;
 
 
--- !query 20
+-- !query
 DROP TABLE show_t1
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 DROP TABLE show_t2
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
 
 
--- !query 22
+-- !query
 DROP VIEW  show_t3
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 
 
 
--- !query 23
+-- !query
 DROP VIEW  global_temp.show_t4
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 
 
 
--- !query 24
+-- !query
 USE default
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 
 
 
--- !query 25
+-- !query
 DROP DATABASE showdb
--- !query 25 schema
+-- !query schema
 struct<>
--- !query 25 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
index 71d6e120e8943..4f5db7f6c6b2f 100644
--- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
@@ -2,216 +2,216 @@
 -- Number of queries: 25
 
 
--- !query 0
+-- !query
 CREATE DATABASE showdb
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 USE showdb
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TABLE showcolumn1 (col1 int, `col 2` int) USING json
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE TABLE showcolumn2 (price int, qty int, year int, month int) USING parquet partitioned by (year, month)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 CREATE TEMPORARY VIEW showColumn3 (col3 int, `col 4` int) USING json
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 CREATE GLOBAL TEMP VIEW showColumn4 AS SELECT 1 as col1, 'abc' as `col 5`
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SHOW COLUMNS IN showcolumn1
--- !query 6 schema
+-- !query schema
 struct<col_name:string>
--- !query 6 output
+-- !query output
 col 2
 col1
 
 
--- !query 7
+-- !query
 SHOW COLUMNS IN showdb.showcolumn1
--- !query 7 schema
+-- !query schema
 struct<col_name:string>
--- !query 7 output
+-- !query output
 col 2
 col1
 
 
--- !query 8
+-- !query
 SHOW COLUMNS IN showcolumn1 FROM showdb
--- !query 8 schema
+-- !query schema
 struct<col_name:string>
--- !query 8 output
+-- !query output
 col 2
 col1
 
 
--- !query 9
+-- !query
 SHOW COLUMNS IN showcolumn2 IN showdb
--- !query 9 schema
+-- !query schema
 struct<col_name:string>
--- !query 9 output
+-- !query output
 month
 price
 qty
 year
 
 
--- !query 10
+-- !query
 SHOW COLUMNS IN badtable FROM showdb
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 Table or view 'badtable' not found in database 'showdb';
 
 
--- !query 11
+-- !query
 SHOW COLUMNS IN showdb.showcolumn1 from SHOWDB
--- !query 11 schema
+-- !query schema
 struct<col_name:string>
--- !query 11 output
+-- !query output
 col 2
 col1
 
 
--- !query 12
+-- !query
 SHOW COLUMNS IN showdb.showcolumn1 FROM baddb
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 SHOW COLUMNS with conflicting databases: 'baddb' != 'showdb';
 
 
--- !query 13
+-- !query
 SHOW COLUMNS IN showcolumn3
--- !query 13 schema
+-- !query schema
 struct<col_name:string>
--- !query 13 output
+-- !query output
 col 4
 col3
 
 
--- !query 14
+-- !query
 SHOW COLUMNS IN showdb.showcolumn3
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 Table or view 'showcolumn3' not found in database 'showdb';
 
 
--- !query 15
+-- !query
 SHOW COLUMNS IN showcolumn3 FROM showdb
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 Table or view 'showcolumn3' not found in database 'showdb';
 
 
--- !query 16
+-- !query
 SHOW COLUMNS IN showcolumn4
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 Table or view 'showcolumn4' not found in database 'showdb';
 
 
--- !query 17
+-- !query
 SHOW COLUMNS IN global_temp.showcolumn4
--- !query 17 schema
+-- !query schema
 struct<col_name:string>
--- !query 17 output
+-- !query output
 col 5
 col1
 
 
--- !query 18
+-- !query
 SHOW COLUMNS IN showcolumn4 FROM global_temp
--- !query 18 schema
+-- !query schema
 struct<col_name:string>
--- !query 18 output
+-- !query output
 col 5
 col1
 
 
--- !query 19
+-- !query
 DROP TABLE showcolumn1
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 DROP TABLE showColumn2
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 DROP VIEW  showcolumn3
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
 
 
--- !query 22
+-- !query
 DROP VIEW  global_temp.showcolumn4
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 
 
 
--- !query 23
+-- !query
 use default
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 
 
 
--- !query 24
+-- !query
 DROP DATABASE showdb
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
index 69a8e958000db..6f1bbd03bc223 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
@@ -2,114 +2,114 @@
 -- Number of queries: 14
 
 
--- !query 0
+-- !query
 SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)
--- !query 0 schema
+-- !query schema
 struct<ifnull(NULL, 'x'):string,ifnull('y', 'x'):string,ifnull(NULL, NULL):null>
--- !query 0 output
+-- !query output
 x	y	NULL
 
 
--- !query 1
+-- !query
 SELECT nullif('x', 'x'), nullif('x', 'y')
--- !query 1 schema
+-- !query schema
 struct<nullif('x', 'x'):string,nullif('x', 'y'):string>
--- !query 1 output
+-- !query output
 NULL	x
 
 
--- !query 2
+-- !query
 SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)
--- !query 2 schema
+-- !query schema
 struct<nvl(NULL, 'x'):string,nvl('y', 'x'):string,nvl(NULL, NULL):null>
--- !query 2 output
+-- !query output
 x	y	NULL
 
 
--- !query 3
+-- !query
 SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)
--- !query 3 schema
+-- !query schema
 struct<nvl2(NULL, 'x', 'y'):string,nvl2('n', 'x', 'y'):string,nvl2(NULL, NULL, NULL):null>
--- !query 3 output
+-- !query output
 y	x	NULL
 
 
--- !query 4
+-- !query
 SELECT ifnull(1, 2.1d), ifnull(null, 2.1d)
--- !query 4 schema
+-- !query schema
 struct<ifnull(1, 2.1D):double,ifnull(NULL, 2.1D):double>
--- !query 4 output
+-- !query output
 1.0	2.1
 
 
--- !query 5
+-- !query
 SELECT nullif(1, 2.1d), nullif(1, 1.0d)
--- !query 5 schema
+-- !query schema
 struct<nullif(1, 2.1D):int,nullif(1, 1.0D):int>
--- !query 5 output
+-- !query output
 1	NULL
 
 
--- !query 6
+-- !query
 SELECT nvl(1, 2.1d), nvl(null, 2.1d)
--- !query 6 schema
+-- !query schema
 struct<nvl(1, 2.1D):double,nvl(NULL, 2.1D):double>
--- !query 6 output
+-- !query output
 1.0	2.1
 
 
--- !query 7
+-- !query
 SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d)
--- !query 7 schema
+-- !query schema
 struct<nvl2(NULL, 1, 2.1D):double,nvl2('n', 1, 2.1D):double>
--- !query 7 output
+-- !query output
 2.1	1.0
 
 
--- !query 8
+-- !query
 SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)
--- !query 8 schema
+-- !query schema
 struct<CAST(1 AS BOOLEAN):boolean,CAST(1 AS TINYINT):tinyint,CAST(1 AS SMALLINT):smallint,CAST(1 AS INT):int,CAST(1 AS BIGINT):bigint>
--- !query 8 output
+-- !query output
 true	1	1	1	1
 
 
--- !query 9
+-- !query
 SELECT float(1), double(1), decimal(1)
--- !query 9 schema
+-- !query schema
 struct<CAST(1 AS FLOAT):float,CAST(1 AS DOUBLE):double,CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
--- !query 9 output
+-- !query output
 1.0	1.0	1
 
 
--- !query 10
+-- !query
 SELECT date("2014-04-04"), timestamp(date("2014-04-04"))
--- !query 10 schema
+-- !query schema
 struct<CAST(2014-04-04 AS DATE):date,CAST(CAST(2014-04-04 AS DATE) AS TIMESTAMP):timestamp>
--- !query 10 output
+-- !query output
 2014-04-04	2014-04-04 00:00:00
 
 
--- !query 11
+-- !query
 SELECT string(1, 2)
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Function string accepts only one argument; line 1 pos 7
 
 
--- !query 12
+-- !query
 CREATE TEMPORARY VIEW tempView1 AS VALUES (1, NAMED_STRUCT('col1', 'gamma', 'col2', 'delta')) AS T(id, st)
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 SELECT nvl(st.col1, "value"), count(*) FROM from tempView1 GROUP BY nvl(st.col1, "value")
--- !query 13 schema
+-- !query schema
 struct<nvl(tempview1.`st`.`col1` AS `col1`, 'value'):string,FROM:bigint>
--- !query 13 output
+-- !query output
 gamma	1
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 87c3e04017643..33d1b25aee483 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -2,267 +2,267 @@
 -- Number of queries: 33
 
 
--- !query 0
+-- !query
 select concat_ws()
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 requirement failed: concat_ws requires at least one argument.; line 1 pos 7
 
 
--- !query 1
+-- !query
 select format_string()
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 requirement failed: format_string() should take at least 1 argument; line 1 pos 7
 
 
--- !query 2
+-- !query
 select 'a' || 'b' || 'c'
--- !query 2 schema
+-- !query schema
 struct<concat(concat(a, b), c):string>
--- !query 2 output
+-- !query output
 abc
 
 
--- !query 3
+-- !query
 select replace('abc', 'b', '123')
--- !query 3 schema
+-- !query schema
 struct<replace(abc, b, 123):string>
--- !query 3 output
+-- !query output
 a123c
 
 
--- !query 4
+-- !query
 select replace('abc', 'b')
--- !query 4 schema
+-- !query schema
 struct<replace(abc, b, ):string>
--- !query 4 output
+-- !query output
 ac
 
 
--- !query 5
+-- !query
 select length(uuid()), (uuid() <> uuid())
--- !query 5 schema
+-- !query schema
 struct<length(uuid()):int,(NOT (uuid() = uuid())):boolean>
--- !query 5 output
+-- !query output
 36	true
 
 
--- !query 6
+-- !query
 select position('bar' in 'foobarbar'), position(null, 'foobarbar'), position('aaads', null)
--- !query 6 schema
+-- !query schema
 struct<locate(bar, foobarbar, 1):int,locate(CAST(NULL AS STRING), foobarbar, 1):int,locate(aaads, CAST(NULL AS STRING), 1):int>
--- !query 6 output
+-- !query output
 4	NULL	NULL
 
 
--- !query 7
+-- !query
 select left("abcd", 2), left("abcd", 5), left("abcd", '2'), left("abcd", null)
--- !query 7 schema
+-- !query schema
 struct<left('abcd', 2):string,left('abcd', 5):string,left('abcd', '2'):string,left('abcd', NULL):string>
--- !query 7 output
+-- !query output
 ab	abcd	ab	NULL
 
 
--- !query 8
+-- !query
 select left(null, -2), left("abcd", -2), left("abcd", 0), left("abcd", 'a')
--- !query 8 schema
+-- !query schema
 struct<left(NULL, -2):string,left('abcd', -2):string,left('abcd', 0):string,left('abcd', 'a'):string>
--- !query 8 output
+-- !query output
 NULL			NULL
 
 
--- !query 9
+-- !query
 select right("abcd", 2), right("abcd", 5), right("abcd", '2'), right("abcd", null)
--- !query 9 schema
+-- !query schema
 struct<right('abcd', 2):string,right('abcd', 5):string,right('abcd', '2'):string,right('abcd', NULL):string>
--- !query 9 output
+-- !query output
 cd	abcd	cd	NULL
 
 
--- !query 10
+-- !query
 select right(null, -2), right("abcd", -2), right("abcd", 0), right("abcd", 'a')
--- !query 10 schema
+-- !query schema
 struct<right(NULL, -2):string,right('abcd', -2):string,right('abcd', 0):string,right('abcd', 'a'):string>
--- !query 10 output
+-- !query output
 NULL			NULL
 
 
--- !query 11
+-- !query
 SELECT split('aa1cc2ee3', '[1-9]+')
--- !query 11 schema
+-- !query schema
 struct<split(aa1cc2ee3, [1-9]+, -1):array<string>>
--- !query 11 output
+-- !query output
 ["aa","cc","ee",""]
 
 
--- !query 12
+-- !query
 SELECT split('aa1cc2ee3', '[1-9]+', 2)
--- !query 12 schema
+-- !query schema
 struct<split(aa1cc2ee3, [1-9]+, 2):array<string>>
--- !query 12 output
+-- !query output
 ["aa","cc2ee3"]
 
 
--- !query 13
+-- !query
 SELECT substr('Spark SQL', 5)
--- !query 13 schema
+-- !query schema
 struct<substring(Spark SQL, 5, 2147483647):string>
--- !query 13 output
+-- !query output
 k SQL
 
 
--- !query 14
+-- !query
 SELECT substr('Spark SQL', -3)
--- !query 14 schema
+-- !query schema
 struct<substring(Spark SQL, -3, 2147483647):string>
--- !query 14 output
+-- !query output
 SQL
 
 
--- !query 15
+-- !query
 SELECT substr('Spark SQL', 5, 1)
--- !query 15 schema
+-- !query schema
 struct<substring(Spark SQL, 5, 1):string>
--- !query 15 output
+-- !query output
 k
 
 
--- !query 16
+-- !query
 SELECT substr('Spark SQL' from 5)
--- !query 16 schema
+-- !query schema
 struct<substring(Spark SQL, 5, 2147483647):string>
--- !query 16 output
+-- !query output
 k SQL
 
 
--- !query 17
+-- !query
 SELECT substr('Spark SQL' from -3)
--- !query 17 schema
+-- !query schema
 struct<substring(Spark SQL, -3, 2147483647):string>
--- !query 17 output
+-- !query output
 SQL
 
 
--- !query 18
+-- !query
 SELECT substr('Spark SQL' from 5 for 1)
--- !query 18 schema
+-- !query schema
 struct<substring(Spark SQL, 5, 1):string>
--- !query 18 output
+-- !query output
 k
 
 
--- !query 19
+-- !query
 SELECT substring('Spark SQL', 5)
--- !query 19 schema
+-- !query schema
 struct<substring(Spark SQL, 5, 2147483647):string>
--- !query 19 output
+-- !query output
 k SQL
 
 
--- !query 20
+-- !query
 SELECT substring('Spark SQL', -3)
--- !query 20 schema
+-- !query schema
 struct<substring(Spark SQL, -3, 2147483647):string>
--- !query 20 output
+-- !query output
 SQL
 
 
--- !query 21
+-- !query
 SELECT substring('Spark SQL', 5, 1)
--- !query 21 schema
+-- !query schema
 struct<substring(Spark SQL, 5, 1):string>
--- !query 21 output
+-- !query output
 k
 
 
--- !query 22
+-- !query
 SELECT substring('Spark SQL' from 5)
--- !query 22 schema
+-- !query schema
 struct<substring(Spark SQL, 5, 2147483647):string>
--- !query 22 output
+-- !query output
 k SQL
 
 
--- !query 23
+-- !query
 SELECT substring('Spark SQL' from -3)
--- !query 23 schema
+-- !query schema
 struct<substring(Spark SQL, -3, 2147483647):string>
--- !query 23 output
+-- !query output
 SQL
 
 
--- !query 24
+-- !query
 SELECT substring('Spark SQL' from 5 for 1)
--- !query 24 schema
+-- !query schema
 struct<substring(Spark SQL, 5, 1):string>
--- !query 24 output
+-- !query output
 k
 
 
--- !query 25
+-- !query
 SELECT trim('yxTomxx', 'xyz'), trim(BOTH 'xyz' FROM 'yxTomxx'), trim('xyz' FROM 'yxTomxx')
--- !query 25 schema
+-- !query schema
 struct<trim(yxTomxx, xyz):string,trim(yxTomxx, xyz):string,trim(yxTomxx, xyz):string>
--- !query 25 output
+-- !query output
 Tom	Tom	Tom
 
 
--- !query 26
+-- !query
 SELECT trim('xxxbarxxx', 'x'), trim(BOTH 'x' FROM 'xxxbarxxx'), trim('x' FROM 'xxxbarxxx')
--- !query 26 schema
+-- !query schema
 struct<trim(xxxbarxxx, x):string,trim(xxxbarxxx, x):string,trim(xxxbarxxx, x):string>
--- !query 26 output
+-- !query output
 bar	bar	bar
 
 
--- !query 27
+-- !query
 SELECT ltrim('zzzytest', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytest')
--- !query 27 schema
+-- !query schema
 struct<ltrim(zzzytest, xyz):string,ltrim(zzzytest, xyz):string>
--- !query 27 output
+-- !query output
 test	test
 
 
--- !query 28
+-- !query
 SELECT ltrim('zzzytestxyz', 'xyz'), trim(LEADING 'xyz' FROM 'zzzytestxyz')
--- !query 28 schema
+-- !query schema
 struct<ltrim(zzzytestxyz, xyz):string,ltrim(zzzytestxyz, xyz):string>
--- !query 28 output
+-- !query output
 testxyz	testxyz
 
 
--- !query 29
+-- !query
 SELECT ltrim('xyxXxyLAST WORD', 'xy'), trim(LEADING 'xy' FROM 'xyxXxyLAST WORD')
--- !query 29 schema
+-- !query schema
 struct<ltrim(xyxXxyLAST WORD, xy):string,ltrim(xyxXxyLAST WORD, xy):string>
--- !query 29 output
+-- !query output
 XxyLAST WORD	XxyLAST WORD
 
 
--- !query 30
+-- !query
 SELECT rtrim('testxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'testxxzx')
--- !query 30 schema
+-- !query schema
 struct<rtrim(testxxzx, xyz):string,rtrim(testxxzx, xyz):string>
--- !query 30 output
+-- !query output
 test	test
 
 
--- !query 31
+-- !query
 SELECT rtrim('xyztestxxzx', 'xyz'), trim(TRAILING 'xyz' FROM 'xyztestxxzx')
--- !query 31 schema
+-- !query schema
 struct<rtrim(xyztestxxzx, xyz):string,rtrim(xyztestxxzx, xyz):string>
--- !query 31 output
+-- !query output
 xyztest	xyztest
 
 
--- !query 32
+-- !query
 SELECT rtrim('TURNERyxXxy', 'xy'), trim(TRAILING 'xy' FROM 'TURNERyxXxy')
--- !query 32 schema
+-- !query schema
 struct<rtrim(TURNERyxXxy, xy):string,rtrim(TURNERyxXxy, xy):string>
--- !query 32 output
+-- !query output
 TURNERyxX	TURNERyxX
diff --git a/sql/core/src/test/resources/sql-tests/results/struct.sql.out b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
index 1da33bc736f0b..f294c5213d319 100644
--- a/sql/core/src/test/resources/sql-tests/results/struct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
@@ -2,89 +2,89 @@
 -- Number of queries: 9
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW tbl_x AS VALUES
   (1, NAMED_STRUCT('C', 'gamma', 'D', 'delta')),
   (2, NAMED_STRUCT('C', 'epsilon', 'D', 'eta')),
   (3, NAMED_STRUCT('C', 'theta', 'D', 'iota'))
   AS T(ID, ST)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT STRUCT('alpha', 'beta') ST
--- !query 1 schema
+-- !query schema
 struct<ST:struct<col1:string,col2:string>>
--- !query 1 output
+-- !query output
 {"col1":"alpha","col2":"beta"}
 
 
--- !query 2
+-- !query
 SELECT STRUCT('alpha' AS A, 'beta' AS B) ST
--- !query 2 schema
+-- !query schema
 struct<ST:struct<A:string,B:string>>
--- !query 2 output
+-- !query output
 {"A":"alpha","B":"beta"}
 
 
--- !query 3
+-- !query
 SELECT ID, STRUCT(ST.*) NST FROM tbl_x
--- !query 3 schema
+-- !query schema
 struct<ID:int,NST:struct<C:string,D:string>>
--- !query 3 output
+-- !query output
 1	{"C":"gamma","D":"delta"}
 2	{"C":"epsilon","D":"eta"}
 3	{"C":"theta","D":"iota"}
 
 
--- !query 4
+-- !query
 SELECT ID, STRUCT(ST.*,CAST(ID AS STRING) AS E) NST FROM tbl_x
--- !query 4 schema
+-- !query schema
 struct<ID:int,NST:struct<C:string,D:string,E:string>>
--- !query 4 output
+-- !query output
 1	{"C":"gamma","D":"delta","E":"1"}
 2	{"C":"epsilon","D":"eta","E":"2"}
 3	{"C":"theta","D":"iota","E":"3"}
 
 
--- !query 5
+-- !query
 SELECT ID, STRUCT(CAST(ID AS STRING) AS AA, ST.*) NST FROM tbl_x
--- !query 5 schema
+-- !query schema
 struct<ID:int,NST:struct<AA:string,C:string,D:string>>
--- !query 5 output
+-- !query output
 1	{"AA":"1","C":"gamma","D":"delta"}
 2	{"AA":"2","C":"epsilon","D":"eta"}
 3	{"AA":"3","C":"theta","D":"iota"}
 
 
--- !query 6
+-- !query
 SELECT ID, STRUCT(ST.*).C NST FROM tbl_x
--- !query 6 schema
+-- !query schema
 struct<ID:int,NST:string>
--- !query 6 output
+-- !query output
 1	gamma
 2	epsilon
 3	theta
 
 
--- !query 7
+-- !query
 SELECT ID, STRUCT(ST.C, ST.D).D NST FROM tbl_x
--- !query 7 schema
+-- !query schema
 struct<ID:int,NST:string>
--- !query 7 output
+-- !query output
 1	delta
 2	eta
 3	iota
 
 
--- !query 8
+-- !query
 SELECT ID, STRUCT(ST.C as STC, ST.D as STD).STD FROM tbl_x
--- !query 8 schema
+-- !query schema
 struct<ID:int,named_struct(STC, ST.C AS `C` AS `STC`, STD, ST.D AS `D` AS `STD`).STD:string>
--- !query 8 output
+-- !query output
 1	delta
 2	eta
 3	iota
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-aggregate.sql.out
index 97f494cc05063..9f11b46d4088b 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-aggregate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-aggregate.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 11
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -14,13 +14,13 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (10, "dept 1", "CA"),
   (20, "dept 2", "NY"),
@@ -29,13 +29,13 @@ CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
 AS DEPT(dept_id, dept_name, state)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 1", 10.00D),
   ("emp 1", 20.00D),
@@ -46,13 +46,13 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
 AS BONUS(emp_name, bonus_amt)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT emp.dept_id, 
        avg(salary),
        sum(salary)
@@ -61,25 +61,25 @@ WHERE  EXISTS (SELECT state
                FROM   dept 
                WHERE  dept.dept_id = emp.dept_id) 
 GROUP  BY dept_id
--- !query 3 schema
+-- !query schema
 struct<dept_id:int,avg(salary):double,sum(salary):double>
--- !query 3 output
+-- !query output
 10	133.33333333333334	400.0
 20	300.0	300.0
 30	400.0	400.0
 70	150.0	150.0
 
 
--- !query 4
+-- !query
 SELECT emp_name 
 FROM   emp 
 WHERE  EXISTS (SELECT max(dept.dept_id) a 
                FROM   dept 
                WHERE  dept.dept_id = emp.dept_id 
                GROUP  BY dept.dept_id)
--- !query 4 schema
+-- !query schema
 struct<emp_name:string>
--- !query 4 output
+-- !query output
 emp 1
 emp 1
 emp 2
@@ -88,20 +88,20 @@ emp 4
 emp 8
 
 
--- !query 5
+-- !query
 SELECT count(*) 
 FROM   emp 
 WHERE  EXISTS (SELECT max(dept.dept_id) a 
                FROM   dept 
                WHERE  dept.dept_id = emp.dept_id 
                GROUP  BY dept.dept_id)
--- !query 5 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 5 output
+-- !query output
 6
 
 
--- !query 6
+-- !query
 SELECT * 
 FROM   bonus 
 WHERE  EXISTS (SELECT 1 
@@ -111,9 +111,9 @@ WHERE  EXISTS (SELECT 1
                                   FROM   dept 
                                   WHERE  emp.dept_id = dept.dept_id 
                                   GROUP  BY dept.dept_id))
--- !query 6 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 6 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
@@ -122,7 +122,7 @@ emp 3	300.0
 emp 4	100.0
 
 
--- !query 7
+-- !query
 SELECT emp.dept_id, 
        Avg(salary), 
        Sum(salary) 
@@ -131,42 +131,42 @@ WHERE  NOT EXISTS (SELECT state
                    FROM   dept 
                    WHERE  dept.dept_id = emp.dept_id) 
 GROUP  BY dept_id
--- !query 7 schema
+-- !query schema
 struct<dept_id:int,avg(salary):double,sum(salary):double>
--- !query 7 output
+-- !query output
 100	400.0	800.0
 NULL	400.0	400.0
 
 
--- !query 8
+-- !query
 SELECT emp_name 
 FROM   emp 
 WHERE  NOT EXISTS (SELECT max(dept.dept_id) a 
                    FROM   dept 
                    WHERE  dept.dept_id = emp.dept_id 
                    GROUP  BY dept.dept_id)
--- !query 8 schema
+-- !query schema
 struct<emp_name:string>
--- !query 8 output
+-- !query output
 emp 5
 emp 6 - no dept
 emp 7
 
 
--- !query 9
+-- !query
 SELECT count(*) 
 FROM   emp 
 WHERE  NOT EXISTS (SELECT max(dept.dept_id) a 
                    FROM   dept 
                    WHERE  dept.dept_id = emp.dept_id 
                    GROUP  BY dept.dept_id)
--- !query 9 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 9 output
+-- !query output
 3
 
 
--- !query 10
+-- !query
 SELECT * 
 FROM   bonus 
 WHERE  NOT EXISTS (SELECT 1 
@@ -176,8 +176,8 @@ WHERE  NOT EXISTS (SELECT 1
                                       FROM   dept 
                                       WHERE  emp.dept_id = dept.dept_id 
                                       GROUP  BY dept.dept_id))
--- !query 10 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 10 output
+-- !query output
 emp 5	1000.0
 emp 6 - no dept	500.0
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out
index 900e4d573bef1..a54fb47fe34f8 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 13
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -14,13 +14,13 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (10, "dept 1", "CA"),
   (20, "dept 2", "NY"),
@@ -29,13 +29,13 @@ CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
 AS DEPT(dept_id, dept_name, state)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 1", 10.00D),
   ("emp 1", 20.00D),
@@ -46,22 +46,22 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
 AS BONUS(emp_name, bonus_amt)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT 1 
                FROM   dept 
                WHERE  dept.dept_id > 10 
                       AND dept.dept_id < 30)
--- !query 3 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 3 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -73,15 +73,15 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 4
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT dept.dept_name 
                FROM   dept 
                WHERE  emp.dept_id = dept.dept_id)
--- !query 4 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 4 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -90,16 +90,16 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 5
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT dept.dept_name 
                FROM   dept 
                WHERE  emp.dept_id = dept.dept_id 
                        OR emp.dept_id IS NULL)
--- !query 5 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 5 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -109,92 +109,92 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 6
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT dept.dept_name 
                FROM   dept 
                WHERE  emp.dept_id = dept.dept_id) 
        AND emp.id > 200
--- !query 6 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 6 output
+-- !query output
 300	emp 3	2002-01-01	300.0	20
 400	emp 4	2005-01-01	400.0	30
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 7
+-- !query
 SELECT emp.emp_name 
 FROM   emp 
 WHERE  EXISTS (SELECT dept.state 
                FROM   dept 
                WHERE  emp.dept_id = dept.dept_id) 
        AND emp.id > 200
--- !query 7 schema
+-- !query schema
 struct<emp_name:string>
--- !query 7 output
+-- !query output
 emp 3
 emp 4
 emp 8
 
 
--- !query 8
+-- !query
 SELECT * 
 FROM   dept 
 WHERE  NOT EXISTS (SELECT emp_name 
                    FROM   emp 
                    WHERE  emp.dept_id = dept.dept_id)
--- !query 8 schema
+-- !query schema
 struct<dept_id:int,dept_name:string,state:string>
--- !query 8 output
+-- !query output
 40	dept 4 - unassigned	OR
 50	dept 5 - unassigned	NJ
 
 
--- !query 9
+-- !query
 SELECT * 
 FROM   dept 
 WHERE  NOT EXISTS (SELECT emp_name 
                    FROM   emp 
                    WHERE  emp.dept_id = dept.dept_id 
                            OR state = 'NJ')
--- !query 9 schema
+-- !query schema
 struct<dept_id:int,dept_name:string,state:string>
--- !query 9 output
+-- !query output
 40	dept 4 - unassigned	OR
 
 
--- !query 10
+-- !query
 SELECT * 
 FROM   bonus 
 WHERE  NOT EXISTS (SELECT * 
                    FROM   emp 
                    WHERE  emp.emp_name = emp_name 
                           AND bonus_amt > emp.salary)
--- !query 10 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 10 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
 emp 4	100.0
 
 
--- !query 11
+-- !query
 SELECT emp.*
 FROM   emp
 WHERE  NOT EXISTS (SELECT NULL
                    FROM   bonus
                    WHERE  bonus.emp_name = emp.emp_name)
--- !query 11 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 11 output
+-- !query output
 700	emp 7	2010-01-01	400.0	100
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 12
+-- !query
 SELECT * 
 FROM   bonus 
 WHERE  EXISTS (SELECT emp_name 
@@ -203,9 +203,9 @@ WHERE  EXISTS (SELECT emp_name
                       AND EXISTS (SELECT state 
                                   FROM   dept 
                                   WHERE  dept.dept_id = emp.dept_id))
--- !query 12 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 12 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-cte.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-cte.sql.out
index c6c1c04e1c73d..3c8a19998a786 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-cte.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -14,13 +14,13 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (10, "dept 1", "CA"),
   (20, "dept 2", "NY"),
@@ -29,13 +29,13 @@ CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
 AS DEPT(dept_id, dept_name, state)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 1", 10.00D),
   ("emp 1", 20.00D),
@@ -46,13 +46,13 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
 AS BONUS(emp_name, bonus_amt)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 WITH bonus_cte 
      AS (SELECT * 
          FROM   bonus 
@@ -73,16 +73,16 @@ WHERE  a.bonus_amt > 30
        AND EXISTS (SELECT 1 
                    FROM   bonus_cte b 
                    WHERE  a.emp_name = b.emp_name)
--- !query 3 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 3 output
+-- !query output
 emp 2	100.0
 emp 2	300.0
 emp 3	300.0
 emp 4	100.0
 
 
--- !query 4
+-- !query
 WITH emp_cte 
      AS (SELECT * 
          FROM   emp 
@@ -99,16 +99,16 @@ WHERE  EXISTS (SELECT *
                       JOIN dept_cte b 
                         ON a.dept_id = b.dept_id 
                WHERE  bonus.emp_name = a.emp_name)
--- !query 4 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 4 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
 emp 2	300.0
 
 
--- !query 5
+-- !query
 WITH emp_cte 
      AS (SELECT * 
          FROM   emp 
@@ -130,9 +130,9 @@ WHERE  e.dept_id = d.dept_id
                           LEFT JOIN dept_cte b 
                                  ON a.dept_id = b.dept_id 
                    WHERE  e.emp_name = a.emp_name)
--- !query 5 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 5 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
@@ -140,7 +140,7 @@ emp 2	300.0
 emp 3	300.0
 
 
--- !query 6
+-- !query
 WITH empdept 
      AS (SELECT id, 
                 salary, 
@@ -159,9 +159,9 @@ WHERE  EXISTS (SELECT dept_id,
                GROUP  BY dept_id 
                HAVING count(*) > 1) 
 GROUP  BY emp_name
--- !query 6 schema
+-- !query schema
 struct<emp_name:string,sum(bonus_amt):double>
--- !query 6 output
+-- !query output
 emp 1	30.0
 emp 2	400.0
 emp 3	300.0
@@ -170,7 +170,7 @@ emp 5	1000.0
 emp 6 - no dept	500.0
 
 
--- !query 7
+-- !query
 WITH empdept 
      AS (SELECT id, 
                 salary, 
@@ -189,9 +189,9 @@ WHERE  NOT EXISTS (SELECT dept_id,
                    GROUP  BY dept_id 
                    HAVING count(*) < 1) 
 GROUP  BY emp_name
--- !query 7 schema
+-- !query schema
 struct<emp_name:string,sum(bonus_amt):double>
--- !query 7 output
+-- !query output
 emp 1	30.0
 emp 2	400.0
 emp 3	300.0
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
index de90f5e260e1b..aa4d2ab7e4133 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -14,13 +14,13 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (10, "dept 1", "CA"),
   (20, "dept 2", "NY"),
@@ -29,13 +29,13 @@ CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
 AS DEPT(dept_id, dept_name, state)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 1", 10.00D),
   ("emp 1", 20.00D),
@@ -46,22 +46,22 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
 AS BONUS(emp_name, bonus_amt)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT dept_id, count(*) 
 FROM   emp 
 GROUP  BY dept_id 
 HAVING EXISTS (SELECT 1 
                FROM   bonus 
                WHERE  bonus_amt < min(emp.salary))
--- !query 3 schema
+-- !query schema
 struct<dept_id:int,count(1):bigint>
--- !query 3 output
+-- !query output
 10	3
 100	2
 20	1
@@ -70,7 +70,7 @@ struct<dept_id:int,count(1):bigint>
 NULL	1
 
 
--- !query 4
+-- !query
 SELECT * 
 FROM   dept 
 WHERE  EXISTS (SELECT dept_id, 
@@ -80,9 +80,9 @@ WHERE  EXISTS (SELECT dept_id,
                HAVING EXISTS (SELECT 1 
                               FROM   bonus 
                               WHERE bonus_amt < Min(emp.salary)))
--- !query 4 schema
+-- !query schema
 struct<dept_id:int,dept_name:string,state:string>
--- !query 4 output
+-- !query output
 10	dept 1	CA
 20	dept 2	NY
 30	dept 3	TX
@@ -91,7 +91,7 @@ struct<dept_id:int,dept_name:string,state:string>
 70	dept 7	FL
 
 
--- !query 5
+-- !query
 SELECT dept_id, 
        Max(salary) 
 FROM   emp gp 
@@ -103,9 +103,9 @@ WHERE  EXISTS (SELECT dept_id,
                               FROM   bonus 
                               WHERE  bonus_amt < Min(p.salary))) 
 GROUP  BY gp.dept_id
--- !query 5 schema
+-- !query schema
 struct<dept_id:int,max(salary):double>
--- !query 5 output
+-- !query output
 10	200.0
 100	400.0
 20	300.0
@@ -114,7 +114,7 @@ struct<dept_id:int,max(salary):double>
 NULL	400.0
 
 
--- !query 6
+-- !query
 SELECT * 
 FROM   dept 
 WHERE  EXISTS (SELECT dept_id, 
@@ -124,9 +124,9 @@ WHERE  EXISTS (SELECT dept_id,
                  HAVING EXISTS (SELECT 1 
                                 FROM   bonus 
                                 WHERE  bonus_amt > Min(emp.salary)))
--- !query 6 schema
+-- !query schema
 struct<dept_id:int,dept_name:string,state:string>
--- !query 6 output
+-- !query output
 10	dept 1	CA
 20	dept 2	NY
 30	dept 3	TX
@@ -135,7 +135,7 @@ struct<dept_id:int,dept_name:string,state:string>
 70	dept 7	FL
 
 
--- !query 7
+-- !query
 SELECT * 
 FROM   dept 
 WHERE  EXISTS (SELECT dept_id, 
@@ -147,7 +147,7 @@ WHERE  EXISTS (SELECT dept_id,
                               FROM   bonus 
                               WHERE  ( bonus_amt > min(emp.salary) 
                                        AND count(emp.dept_id) > 1 )))
--- !query 7 schema
+-- !query schema
 struct<dept_id:int,dept_name:string,state:string>
--- !query 7 output
+-- !query output
 10	dept 1	CA
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out
index c488cba01d4d0..1a5294930422a 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 17
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -14,13 +14,13 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (10, "dept 1", "CA"),
   (20, "dept 2", "NY"),
@@ -29,13 +29,13 @@ CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
 AS DEPT(dept_id, dept_name, state)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 1", 10.00D),
   ("emp 1", 20.00D),
@@ -46,13 +46,13 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
 AS BONUS(emp_name, bonus_amt)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT * 
 FROM   emp, 
        dept 
@@ -60,9 +60,9 @@ WHERE  emp.dept_id = dept.dept_id
        AND EXISTS (SELECT * 
                    FROM   bonus 
                    WHERE  bonus.emp_name = emp.emp_name)
--- !query 3 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int,dept_id:int,dept_name:string,state:string>
--- !query 3 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10	10	dept 1	CA
 100	emp 1	2005-01-01	100.0	10	10	dept 1	CA
 200	emp 2	2003-01-01	200.0	10	10	dept 1	CA
@@ -70,7 +70,7 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int,dept_id:in
 400	emp 4	2005-01-01	400.0	30	30	dept 3	TX
 
 
--- !query 4
+-- !query
 SELECT * 
 FROM   emp 
        JOIN dept 
@@ -78,9 +78,9 @@ FROM   emp
 WHERE  EXISTS (SELECT * 
                FROM   bonus 
                WHERE  bonus.emp_name = emp.emp_name)
--- !query 4 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int,dept_id:int,dept_name:string,state:string>
--- !query 4 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10	10	dept 1	CA
 100	emp 1	2005-01-01	100.0	10	10	dept 1	CA
 200	emp 2	2003-01-01	200.0	10	10	dept 1	CA
@@ -88,7 +88,7 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int,dept_id:in
 400	emp 4	2005-01-01	400.0	30	30	dept 3	TX
 
 
--- !query 5
+-- !query
 SELECT * 
 FROM   emp 
        LEFT JOIN dept 
@@ -96,9 +96,9 @@ FROM   emp
 WHERE  EXISTS (SELECT * 
                FROM   bonus 
                WHERE  bonus.emp_name = emp.emp_name)
--- !query 5 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int,dept_id:int,dept_name:string,state:string>
--- !query 5 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10	10	dept 1	CA
 100	emp 1	2005-01-01	100.0	10	10	dept 1	CA
 200	emp 2	2003-01-01	200.0	10	10	dept 1	CA
@@ -108,7 +108,7 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int,dept_id:in
 600	emp 6 - no dept	2001-01-01	400.0	100	NULL	NULL	NULL
 
 
--- !query 6
+-- !query
 SELECT * 
 FROM   emp, 
        dept 
@@ -116,13 +116,13 @@ WHERE  emp.dept_id = dept.dept_id
        AND NOT EXISTS (SELECT * 
                        FROM   bonus 
                        WHERE  bonus.emp_name = emp.emp_name)
--- !query 6 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int,dept_id:int,dept_name:string,state:string>
--- !query 6 output
+-- !query output
 800	emp 8	2016-01-01	150.0	70	70	dept 7	FL
 
 
--- !query 7
+-- !query
 SELECT * 
 FROM   bonus 
 WHERE  EXISTS (SELECT * 
@@ -130,9 +130,9 @@ WHERE  EXISTS (SELECT *
                         JOIN dept 
                           ON dept.dept_id = emp.dept_id 
                  WHERE  bonus.emp_name = emp.emp_name)
--- !query 7 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 7 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
@@ -141,7 +141,7 @@ emp 3	300.0
 emp 4	100.0
 
 
--- !query 8
+-- !query
 SELECT * 
 FROM   bonus 
 WHERE  EXISTS (SELECT * 
@@ -149,9 +149,9 @@ WHERE  EXISTS (SELECT *
                         RIGHT JOIN dept 
                                 ON dept.dept_id = emp.dept_id 
                  WHERE  bonus.emp_name = emp.emp_name)
--- !query 8 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 8 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
@@ -160,7 +160,7 @@ emp 3	300.0
 emp 4	100.0
 
 
--- !query 9
+-- !query
 SELECT * 
 FROM   bonus 
 WHERE  EXISTS (SELECT dept.dept_id, 
@@ -174,9 +174,9 @@ WHERE  EXISTS (SELECT dept.dept_id,
                  GROUP  BY dept.dept_id, 
                            emp.emp_name 
                  ORDER  BY emp.emp_name)
--- !query 9 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 9 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
@@ -185,7 +185,7 @@ emp 3	300.0
 emp 4	100.0
 
 
--- !query 10
+-- !query
 SELECT emp_name, 
        Sum(bonus_amt) 
 FROM   bonus 
@@ -199,13 +199,13 @@ WHERE  EXISTS (SELECT emp_name,
                  HAVING Count(*) > 1 
                  ORDER  BY emp_name)
 GROUP  BY emp_name
--- !query 10 schema
+-- !query schema
 struct<emp_name:string,sum(bonus_amt):double>
--- !query 10 output
+-- !query output
 emp 1	30.0
 
 
--- !query 11
+-- !query
 SELECT emp_name, 
        Sum(bonus_amt) 
 FROM   bonus 
@@ -219,9 +219,9 @@ WHERE  NOT EXISTS (SELECT emp_name,
                    HAVING Count(*) > 1 
                    ORDER  BY emp_name) 
 GROUP  BY emp_name
--- !query 11 schema
+-- !query schema
 struct<emp_name:string,sum(bonus_amt):double>
--- !query 11 output
+-- !query output
 emp 2	400.0
 emp 3	300.0
 emp 4	100.0
@@ -229,7 +229,7 @@ emp 5	1000.0
 emp 6 - no dept	500.0
 
 
--- !query 12
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT * 
@@ -240,9 +240,9 @@ WHERE  EXISTS (SELECT *
                FROM   dept 
                WHERE  dept_id >= 30 
                       AND dept_id <= 50)
--- !query 12 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 12 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -254,7 +254,7 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 13
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT * 
@@ -265,13 +265,13 @@ WHERE  EXISTS (SELECT *
                  FROM   dept 
                  WHERE  dept_id >= 30 
                         AND dept_id <= 50)
--- !query 13 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  NOT EXISTS (SELECT * 
@@ -282,9 +282,9 @@ WHERE  NOT EXISTS (SELECT *
                      FROM   dept 
                      WHERE  dept_id >= 30 
                             AND dept_id <= 50)
--- !query 14 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 14 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -296,7 +296,7 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 15
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT * 
@@ -316,9 +316,9 @@ WHERE  EXISTS (SELECT *
                  FROM   dept 
                  WHERE  dept_id >= 30 
                         AND dept_id <= 50)
--- !query 15 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 15 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -330,7 +330,7 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 16
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT * 
@@ -350,9 +350,9 @@ WHERE  EXISTS (SELECT *
                  FROM   dept 
                  WHERE  dept_id >= 30 
                         AND dept_id <= 50)
--- !query 16 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 16 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
 300	emp 3	2002-01-01	300.0	20
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out
index ee13ff2c4f38d..ebd4da6ccbd5d 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 12
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -14,13 +14,13 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (10, "dept 1", "CA"),
   (20, "dept 2", "NY"),
@@ -29,13 +29,13 @@ CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
 AS DEPT(dept_id, dept_name, state)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 1", 10.00D),
   ("emp 1", 20.00D),
@@ -46,13 +46,13 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
 AS BONUS(emp_name, bonus_amt)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT dept.dept_id 
@@ -60,9 +60,9 @@ WHERE  EXISTS (SELECT dept.dept_id
                WHERE  emp.dept_id = dept.dept_id 
                ORDER  BY state) 
 ORDER  BY hiredate
--- !query 3 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 3 output
+-- !query output
 300	emp 3	2002-01-01	300.0	20
 200	emp 2	2003-01-01	200.0	10
 100	emp 1	2005-01-01	100.0	10
@@ -71,7 +71,7 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 4
+-- !query
 SELECT id, 
        hiredate 
 FROM   emp 
@@ -80,9 +80,9 @@ WHERE  EXISTS (SELECT dept.dept_id
                WHERE  emp.dept_id = dept.dept_id 
                ORDER  BY state) 
 ORDER  BY hiredate DESC
--- !query 4 schema
+-- !query schema
 struct<id:int,hiredate:date>
--- !query 4 output
+-- !query output
 800	2016-01-01
 100	2005-01-01
 100	2005-01-01
@@ -91,7 +91,7 @@ struct<id:int,hiredate:date>
 300	2002-01-01
 
 
--- !query 5
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  NOT EXISTS (SELECT dept.dept_id 
@@ -99,15 +99,15 @@ WHERE  NOT EXISTS (SELECT dept.dept_id
                    WHERE  emp.dept_id = dept.dept_id 
                    ORDER  BY state) 
 ORDER  BY hiredate
--- !query 5 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 5 output
+-- !query output
 500	emp 5	2001-01-01	400.0	NULL
 600	emp 6 - no dept	2001-01-01	400.0	100
 700	emp 7	2010-01-01	400.0	100
 
 
--- !query 6
+-- !query
 SELECT emp_name 
 FROM   emp 
 WHERE  NOT EXISTS (SELECT max(dept.dept_id) a 
@@ -115,15 +115,15 @@ WHERE  NOT EXISTS (SELECT max(dept.dept_id) a
                    WHERE  dept.dept_id = emp.dept_id 
                    GROUP  BY state 
                    ORDER  BY state)
--- !query 6 schema
+-- !query schema
 struct<emp_name:string>
--- !query 6 output
+-- !query output
 emp 5
 emp 6 - no dept
 emp 7
 
 
--- !query 7
+-- !query
 SELECT count(*) 
 FROM   emp 
 WHERE  NOT EXISTS (SELECT max(dept.dept_id) a 
@@ -131,22 +131,22 @@ WHERE  NOT EXISTS (SELECT max(dept.dept_id) a
                    WHERE  dept.dept_id = emp.dept_id 
                    GROUP  BY dept_id 
                    ORDER  BY dept_id)
--- !query 7 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 7 output
+-- !query output
 3
 
 
--- !query 8
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT dept.dept_name 
                FROM   dept 
                WHERE  dept.dept_id > 10 
                LIMIT  1)
--- !query 8 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 8 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -158,16 +158,16 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 9
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT max(dept.dept_id) 
                FROM   dept 
                GROUP  BY state 
                LIMIT  1)
--- !query 9 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 9 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -179,16 +179,16 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 10
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  NOT EXISTS (SELECT dept.dept_name 
                    FROM   dept 
                    WHERE  dept.dept_id > 100 
                    LIMIT  1)
--- !query 10 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 10 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -200,7 +200,7 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 11
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  NOT EXISTS (SELECT max(dept.dept_id) 
@@ -208,9 +208,9 @@ WHERE  NOT EXISTS (SELECT max(dept.dept_id)
                    WHERE  dept.dept_id > 100 
                    GROUP  BY state 
                    LIMIT  1)
--- !query 11 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 11 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out
index 865e4ed14e4ab..6a17c2fc86d40 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -14,13 +14,13 @@ CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
 AS EMP(id, emp_name, hiredate, salary, dept_id)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (10, "dept 1", "CA"),
   (20, "dept 2", "NY"),
@@ -29,13 +29,13 @@ CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
 AS DEPT(dept_id, dept_name, state)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 1", 10.00D),
   ("emp 1", 20.00D),
@@ -46,22 +46,22 @@ CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
 AS BONUS(emp_name, bonus_amt)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT emp.emp_name 
 FROM   emp 
 WHERE  EXISTS (SELECT dept.state 
                FROM   dept 
                WHERE  emp.dept_id = dept.dept_id) 
         OR emp.id > 200
--- !query 3 schema
+-- !query schema
 struct<emp_name:string>
--- !query 3 output
+-- !query output
 emp 1
 emp 1
 emp 2
@@ -73,16 +73,16 @@ emp 7
 emp 8
 
 
--- !query 4
+-- !query
 SELECT * 
 FROM   emp 
 WHERE  EXISTS (SELECT dept.dept_name 
                FROM   dept 
                WHERE  emp.dept_id = dept.dept_id) 
         OR emp.dept_id IS NULL
--- !query 4 schema
+-- !query schema
 struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
--- !query 4 output
+-- !query output
 100	emp 1	2005-01-01	100.0	10
 100	emp 1	2005-01-01	100.0	10
 200	emp 2	2003-01-01	200.0	10
@@ -92,7 +92,7 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 800	emp 8	2016-01-01	150.0	70
 
 
--- !query 5
+-- !query
 SELECT emp.emp_name 
 FROM   emp 
 WHERE  EXISTS (SELECT dept.state 
@@ -103,14 +103,14 @@ WHERE  EXISTS (SELECT dept.state
                    FROM   dept 
                    WHERE  emp.dept_id = dept.dept_id 
                           AND dept.dept_id = 30)
--- !query 5 schema
+-- !query schema
 struct<emp_name:string>
--- !query 5 output
+-- !query output
 emp 3
 emp 4
 
 
--- !query 6
+-- !query
 SELECT * 
 FROM   bonus 
 WHERE  ( NOT EXISTS (SELECT * 
@@ -121,9 +121,9 @@ WHERE  ( NOT EXISTS (SELECT *
                      FROM   emp 
                      WHERE  emp.emp_name = emp_name 
                              OR bonus_amt < emp.salary) )
--- !query 6 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 6 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
@@ -134,7 +134,7 @@ emp 5	1000.0
 emp 6 - no dept	500.0
 
 
--- !query 7
+-- !query
 SELECT * FROM bonus WHERE NOT EXISTS 
 ( 
        SELECT * 
@@ -147,9 +147,9 @@ emp_name IN
        SELECT emp_name 
        FROM   emp 
        WHERE  bonus_amt < emp.salary)
--- !query 7 schema
+-- !query schema
 struct<emp_name:string,bonus_amt:double>
--- !query 7 output
+-- !query output
 emp 1	10.0
 emp 1	20.0
 emp 2	100.0
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
index 686fe4975379b..a33f78abf27f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
@@ -2,44 +2,44 @@
 -- Number of queries: 7
 
 
--- !query 0
+-- !query
 create temporary view tab_a as select * from values (1, 1) as tab_a(a1, b1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view tab_b as select * from values (1, 1) as tab_b(a2, b2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view struct_tab as select struct(col1 as a, col2 as b) as record from
  values (1, 1), (1, 2), (2, 1), (2, 2)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 select 1 from tab_a where (a1, b1) not in (select a2, b2 from tab_b)
--- !query 3 schema
+-- !query schema
 struct<1:int>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 select 1 from tab_a where (a1, b1) not in (select (a2, b2) from tab_b)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(named_struct('a1', tab_a.`a1`, 'b1', tab_a.`b1`) IN (listquery()))' due to data type mismatch: 
 The number of columns in the left hand side of an IN subquery does not match the
@@ -52,19 +52,19 @@ Right side columns:
 [`named_struct(a2, a2, b2, b2)`].;
 
 
--- !query 5
+-- !query
 select count(*) from struct_tab where record in
   (select (a2 as a, b2 as b) from tab_b)
--- !query 5 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 5 output
+-- !query output
 1
 
 
--- !query 6
+-- !query
 select count(*) from struct_tab where record not in
   (select (a2 as a, b2 as b) from tab_b)
--- !query 6 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 6 output
+-- !query output
 3
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out
index a159aa81eff1c..f378664014fdb 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out
@@ -2,86 +2,86 @@
 -- Number of queries: 19
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
-  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("t1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("t1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
-  ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("t2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("t1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("t1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("t2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("t1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("t1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("t1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("t1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
-  ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("t3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("t3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("t3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("t3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("t1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("t3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT t1a,
        Avg(t1b)
 FROM   t1
 WHERE  t1a IN (SELECT t2a
                FROM   t2)
 GROUP  BY t1a
--- !query 3 schema
+-- !query schema
 struct<t1a:string,avg(t1b):double>
--- !query 3 output
+-- !query output
 t1b	8.0
 t1c	8.0
 t1e	10.0
 
 
--- !query 4
+-- !query
 SELECT t1a,
        Max(t1b)
 FROM   t1
@@ -90,13 +90,13 @@ WHERE  t1b IN (SELECT t2b
                WHERE  t1a = t2a)
 GROUP  BY t1a,
           t1d
--- !query 4 schema
+-- !query schema
 struct<t1a:string,max(t1b):smallint>
--- !query 4 output
+-- !query output
 t1b	8
 
 
--- !query 5
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -105,14 +105,14 @@ WHERE  t1c IN (SELECT t2c
                WHERE  t1a = t2a)
 GROUP  BY t1a,
           t1b
--- !query 5 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 5 output
+-- !query output
 t1b	8
 t1c	8
 
 
--- !query 6
+-- !query
 SELECT t1a,
        Sum(DISTINCT( t1b ))
 FROM   t1
@@ -124,14 +124,14 @@ WHERE  t1c IN (SELECT t2c
                    WHERE  t1a = t3a)
 GROUP  BY t1a,
           t1c
--- !query 6 schema
+-- !query schema
 struct<t1a:string,sum(DISTINCT t1b):bigint>
--- !query 6 output
+-- !query output
 t1b	8
 t1c	8
 
 
--- !query 7
+-- !query
 SELECT t1a,
        Sum(DISTINCT( t1b ))
 FROM   t1
@@ -143,13 +143,13 @@ WHERE  t1c IN (SELECT t2c
                    WHERE  t1a = t3a)
 GROUP  BY t1a,
           t1c
--- !query 7 schema
+-- !query schema
 struct<t1a:string,sum(DISTINCT t1b):bigint>
--- !query 7 output
+-- !query output
 t1b	8
 
 
--- !query 8
+-- !query
 SELECT t1a,
        Count(DISTINCT( t1b ))
 FROM   t1
@@ -159,21 +159,21 @@ WHERE  t1c IN (SELECT t2c
 GROUP  BY t1a,
           t1c
 HAVING t1a = "t1b"
--- !query 8 schema
+-- !query schema
 struct<t1a:string,count(DISTINCT t1b):bigint>
--- !query 8 output
+-- !query output
 t1b	1
 
 
--- !query 9
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1b IN (SELECT Max(t2b)
                FROM   t2
                GROUP  BY t2a)
--- !query 9 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 9 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 t1a	6	8	10	15.0	20.0	2000	2014-04-04 01:00:00	2014-04-04
 t1a	6	8	10	15.0	20.0	2000	2014-04-04 01:02:00.001	2014-04-04
 t1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
@@ -184,7 +184,7 @@ t1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
 t1e	10	NULL	25	17.0	25.0	2600	2014-08-04 01:01:00	2014-08-04
 
 
--- !query 10
+-- !query
 SELECT *
 FROM   (SELECT t2a,
                t2b
@@ -194,13 +194,13 @@ FROM   (SELECT t2a,
                        WHERE  t1b = t2b)
         GROUP  BY t2a,
                   t2b) t2
--- !query 10 schema
+-- !query schema
 struct<t2a:string,t2b:smallint>
--- !query 10 output
+-- !query output
 t1b	8
 
 
--- !query 11
+-- !query
 SELECT Count(DISTINCT( * ))
 FROM   t1
 WHERE  t1b IN (SELECT Min(t2b)
@@ -208,13 +208,13 @@ WHERE  t1b IN (SELECT Min(t2b)
                WHERE  t1a = t2a
                       AND t1c = t2c
                GROUP  BY t2a)
--- !query 11 schema
+-- !query schema
 struct<count(DISTINCT t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i):bigint>
--- !query 11 output
+-- !query output
 1
 
 
--- !query 12
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -224,14 +224,14 @@ WHERE  t1c IN (SELECT Max(t2c)
                GROUP  BY t2a,
                          t2c
                HAVING t2c > 8)
--- !query 12 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 12 output
+-- !query output
 t1b	8
 t1c	8
 
 
--- !query 13
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -242,9 +242,9 @@ WHERE  t1c IN (SELECT t2c
                               WHERE  t3a = t2a
                               GROUP  BY t3b)
                GROUP  BY t2c)
--- !query 13 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 13 output
+-- !query output
 t1a	16
 t1a	16
 t1b	8
@@ -253,7 +253,7 @@ t1d	NULL
 t1d	NULL
 
 
--- !query 14
+-- !query
 SELECT t1a,
        Min(t1b)
 FROM   t1
@@ -262,14 +262,14 @@ WHERE  t1c IN (SELECT Min(t2c)
                WHERE  t2b = t1b
                GROUP  BY t2a)
 GROUP  BY t1a
--- !query 14 schema
+-- !query schema
 struct<t1a:string,min(t1b):smallint>
--- !query 14 output
+-- !query output
 t1b	8
 t1c	8
 
 
--- !query 15
+-- !query
 SELECT t1a,
        Min(t1b)
 FROM   t1
@@ -282,16 +282,16 @@ WHERE  t1c IN (SELECT Min(t2c)
                GROUP  BY t2c)
 GROUP  BY t1a,
           t1d
--- !query 15 schema
+-- !query schema
 struct<t1a:string,min(t1b):smallint>
--- !query 15 output
+-- !query output
 t1b	8
 t1c	8
 t1d	NULL
 t1d	NULL
 
 
--- !query 16
+-- !query
 SELECT t1a,
        Min(t1b)
 FROM   t1
@@ -304,14 +304,14 @@ WHERE  t1c IN (SELECT Min(t2c)
                    WHERE  t1c = t3c
                    GROUP  BY t3d)
 GROUP  BY t1a
--- !query 16 schema
+-- !query schema
 struct<t1a:string,min(t1b):smallint>
--- !query 16 output
+-- !query output
 t1b	8
 t1c	8
 
 
--- !query 17
+-- !query
 SELECT t1a,
        Min(t1b)
 FROM   t1
@@ -324,16 +324,16 @@ WHERE  t1c IN (SELECT Min(t2c)
                    WHERE  t1c = t3c
                    GROUP  BY t3d)
 GROUP  BY t1a
--- !query 17 schema
+-- !query schema
 struct<t1a:string,min(t1b):smallint>
--- !query 17 output
+-- !query output
 t1a	16
 t1b	8
 t1c	8
 t1d	NULL
 
 
--- !query 18
+-- !query
 SELECT t1a,
        Min(t1b)
 FROM   t1
@@ -349,9 +349,9 @@ WHERE  t1c IN (SELECT Min(t2c)
                    HAVING t3d = t1d)
 GROUP  BY t1a
 HAVING Min(t1b) IS NOT NULL
--- !query 18 schema
+-- !query schema
 struct<t1a:string,min(t1b):smallint>
--- !query 18 output
+-- !query output
 t1a	16
 t1b	8
 t1c	8
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-having.sql.out
index b90ebf57e739b..09b6adbe62b36 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-having.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 12
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -17,13 +17,13 @@ create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
   ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
   ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -39,13 +39,13 @@ create temporary view t2 as select * from values
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
   ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
   ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
@@ -60,13 +60,13 @@ create temporary view t3 as select * from values
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT t1a,
        t1b,
        t1h
@@ -75,16 +75,16 @@ WHERE  t1b IN (SELECT t2b
                FROM   t2
                GROUP BY t2b
                HAVING t2b < 10)
--- !query 3 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1h:timestamp>
--- !query 3 output
+-- !query output
 val1a	6	2014-04-04 01:00:00
 val1a	6	2014-04-04 01:02:00.001
 val1b	8	2014-05-04 01:01:00
 val1c	8	2014-05-04 01:02:00.001
 
 
--- !query 4
+-- !query
 SELECT t1a,
        t1b,
        t1c
@@ -94,13 +94,13 @@ WHERE  t1b IN (SELECT Min(t2b)
                WHERE  t1a = t2a
                GROUP  BY t2b
                HAVING t2b > 1)
--- !query 4 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int>
--- !query 4 output
+-- !query output
 val1b	8	16
 
 
--- !query 5
+-- !query
 SELECT t1a, t1b, t1c
 FROM   t1
 WHERE  t1b IN (SELECT t2b
@@ -108,13 +108,13 @@ WHERE  t1b IN (SELECT t2b
                WHERE t1c < t2c)
 GROUP BY t1a, t1b, t1c
 HAVING t1b < 10
--- !query 5 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int>
--- !query 5 output
+-- !query output
 val1a	6	8
 
 
--- !query 6
+-- !query
 SELECT t1a, t1b, t1c
 FROM   t1
 WHERE  t1b IN (SELECT t2b
@@ -122,14 +122,14 @@ WHERE  t1b IN (SELECT t2b
                WHERE t1c = t2c)
 GROUP BY t1a, t1b, t1c
 HAVING COUNT (DISTINCT t1b) < 10
--- !query 6 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int>
--- !query 6 output
+-- !query output
 val1b	8	16
 val1c	8	16
 
 
--- !query 7
+-- !query
 SELECT Count(DISTINCT( t1a )),
        t1b
 FROM   t1
@@ -140,13 +140,13 @@ WHERE  t1c IN (SELECT t2c
                HAVING t2c > 10)
 GROUP  BY t1b
 HAVING t1b >= 8
--- !query 7 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 7 output
+-- !query output
 2	8
 
 
--- !query 8
+-- !query
 SELECT t1a,
        Max(t1b)
 FROM   t1
@@ -158,13 +158,13 @@ HAVING t1a IN (SELECT t2a
                               FROM   t3
                               WHERE  t2c = t3c)
                )
--- !query 8 schema
+-- !query schema
 struct<t1a:string,max(t1b):smallint>
--- !query 8 output
+-- !query output
 val1b	8
 
 
--- !query 9
+-- !query
 SELECT t1a,
        t1c,
        Min(t1d)
@@ -175,16 +175,16 @@ WHERE  t1a NOT IN (SELECT t2a
                    HAVING t2a > 'val2a')
 GROUP BY t1a, t1c
 HAVING Min(t1d) > t1c
--- !query 9 schema
+-- !query schema
 struct<t1a:string,t1c:int,min(t1d):bigint>
--- !query 9 output
+-- !query output
 val1a	8	10
 val1b	16	19
 val1c	16	19
 val1d	16	19
 
 
--- !query 10
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -195,13 +195,13 @@ WHERE  t1d NOT IN (SELECT t2d
                    HAVING t2c > 8)
 GROUP  BY t1a, t1b
 HAVING t1b < 10
--- !query 10 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 10 output
+-- !query output
 val1a	6
 
 
--- !query 11
+-- !query
 SELECT t1a,
        Max(t1b)
 FROM   t1
@@ -210,8 +210,8 @@ GROUP  BY t1a
 HAVING t1a NOT IN (SELECT t2a
                    FROM   t2
                    WHERE  t2b > 3)
--- !query 11 schema
+-- !query schema
 struct<t1a:string,max(t1b):smallint>
--- !query 11 output
+-- !query output
 val1a	16
 val1d	10
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
index ab6a11a2b7efa..615b67f629e55 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
@@ -1,8 +1,8 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
+-- Number of queries: 34
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -17,13 +17,13 @@ create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
   ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
   ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -39,13 +39,13 @@ create temporary view t2 as select * from values
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
   ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
   ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
@@ -60,13 +60,43 @@ create temporary view t3 as select * from values
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
+create temporary view s1 as select * from values
+    (1), (3), (5), (7), (9)
+  as s1(id)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view s2 as select * from values
+    (1), (3), (4), (6), (9)
+  as s2(id)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view s3 as select * from values
+    (3), (4), (6), (9)
+  as s3(id)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
 SELECT t1a, t1b, t1c, t3a, t3b, t3c
 FROM   t1 natural JOIN t3
 WHERE  t1a IN (SELECT t2a
@@ -77,14 +107,14 @@ WHERE  t1a IN (SELECT t2a
 ORDER  BY t1a,
           t1b,
           t1c DESC nulls first
--- !query 3 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t3a:string,t3b:smallint,t3c:int>
--- !query 3 output
+-- !query output
 val1b	8	16	val1b	8	16
 val1b	8	16	val1b	8	16
 
 
--- !query 4
+-- !query
 SELECT    Count(DISTINCT(t1a)),
           t1b,
           t3a,
@@ -102,10 +132,10 @@ GROUP BY  t1a,
           t3a,
           t3b,
           t3c
-ORDER BY  t1a DESC, t3b DESC
--- !query 4 schema
+ORDER BY  t1a DESC, t3b DESC, t3c ASC
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint,t3a:string,t3b:smallint,t3c:int>
--- !query 4 output
+-- !query output
 1	10	val3b	8	NULL
 1	10	val1b	8	16
 1	10	val3a	6	12
@@ -113,7 +143,7 @@ struct<count(DISTINCT t1a):bigint,t1b:smallint,t3a:string,t3b:smallint,t3c:int>
 1	8	val3a	6	12
 
 
--- !query 5
+-- !query
 SELECT     Count(DISTINCT(t1a))
 FROM       t1 natural right JOIN t3
 WHERE      t1a IN
@@ -129,13 +159,13 @@ AND        t1d IN
 AND        t1a = t3a
 GROUP BY   t1a
 ORDER BY   t1a
--- !query 5 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint>
--- !query 5 output
+-- !query output
 1
 
 
--- !query 6
+-- !query
 SELECT          t1a,
                 t1b,
                 t1c,
@@ -151,9 +181,9 @@ where           t1a IN
 AND             t1b != t3b
 AND             t1a = 'val1b'
 ORDER BY        t1a
--- !query 6 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t3a:string,t3b:smallint,t3c:int>
--- !query 6 output
+-- !query output
 val1b	8	16	val3a	6	12
 val1b	8	16	val3a	6	12
 val1b	8	16	val1b	10	12
@@ -162,7 +192,7 @@ val1b	8	16	val3c	17	16
 val1b	8	16	val3c	17	16
 
 
--- !query 7
+-- !query
 SELECT     Count(DISTINCT(t1a)),
            t1b
 FROM       t1 RIGHT JOIN t3
@@ -181,13 +211,13 @@ GROUP BY   t1a,
            t1b
 HAVING     t1b > 8
 ORDER BY   t1a
--- !query 7 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 7 output
+-- !query output
 1	10
 
 
--- !query 8
+-- !query
 SELECT   Count(DISTINCT(t1a))
 FROM     t1 LEFT OUTER
 JOIN     t3
@@ -199,15 +229,15 @@ WHERE    t1a IN
                 WHERE  t1h < t2h )
 GROUP BY t1a
 ORDER BY t1a
--- !query 8 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint>
--- !query 8 output
+-- !query output
 1
 1
 1
 
 
--- !query 9
+-- !query
 SELECT   Count(DISTINCT(t1a)),
          t1b
 FROM     t1 INNER JOIN     t2
@@ -224,14 +254,14 @@ OR       t1a IN
                 WHERE  t2h < t1h)
 GROUP BY t1b
 HAVING   t1b > 6
--- !query 9 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 9 output
+-- !query output
 1	10
 1	8
 
 
--- !query 10
+-- !query
 SELECT   Count(DISTINCT(t1a)),
          t1b
 FROM     t1
@@ -249,13 +279,13 @@ AND      t1h IN
                     where      t2b = t3b)
 GROUP BY t1b
 HAVING t1b > 8
--- !query 10 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 10 output
+-- !query output
 1	10
 
 
--- !query 11
+-- !query
 SELECT   Count(DISTINCT(t1a)),
          t1b
 FROM     t1
@@ -280,13 +310,13 @@ AND       t1b IN
 
 GROUP BY t1b
 HAVING   t1b > 8
--- !query 11 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 11 output
+-- !query output
 1	10
 
 
--- !query 12
+-- !query
 SELECT     Count(DISTINCT(t1a)),
            t1b
 FROM       t1
@@ -314,13 +344,13 @@ AND        t1b IN
 AND        t1a = t2a
 GROUP BY   t1b
 ORDER BY   t1b DESC
--- !query 12 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 12 output
+-- !query output
 1	8
 
 
--- !query 13
+-- !query
 SELECT    t1a,
           t1b,
           t1c,
@@ -345,9 +375,222 @@ and t1a = t2a
 Group By t1a, t1b, t1c, t2a, t2b, t2c
 HAVING t2c IS NOT NULL
 ORDER By t2b DESC nulls last
--- !query 13 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,count(DISTINCT t2a):bigint,t2b:smallint,t2c:int>
--- !query 13 output
+-- !query output
 val1b	8	16	1	10	12
 val1b	8	16	1	8	16
 val1b	8	16	1	NULL	16
+
+
+-- !query
+SELECT s1.id FROM s1
+JOIN s2 ON s1.id = s2.id
+AND s1.id IN (SELECT 9)
+-- !query schema
+struct<id:int>
+-- !query output
+9
+
+
+-- !query
+SELECT s1.id FROM s1
+JOIN s2 ON s1.id = s2.id
+AND s1.id NOT IN (SELECT 9)
+-- !query schema
+struct<id:int>
+-- !query output
+1
+3
+
+
+-- !query
+SELECT s1.id FROM s1
+JOIN s2 ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3)
+-- !query schema
+struct<id:int>
+-- !query output
+3
+9
+
+
+-- !query
+SELECT s1.id AS id2 FROM s1
+LEFT SEMI JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3)
+-- !query schema
+struct<id2:int>
+-- !query output
+3
+9
+
+
+-- !query
+SELECT s1.id as id2 FROM s1
+LEFT ANTI JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3)
+-- !query schema
+struct<id2:int>
+-- !query output
+1
+5
+7
+
+
+-- !query
+SELECT s1.id, s2.id as id2 FROM s1
+LEFT OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3)
+-- !query schema
+struct<id:int,id2:int>
+-- !query output
+1	NULL
+3	3
+5	NULL
+7	NULL
+9	9
+
+
+-- !query
+SELECT s1.id, s2.id as id2 FROM s1
+RIGHT OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3)
+-- !query schema
+struct<id:int,id2:int>
+-- !query output
+3	3
+9	9
+NULL	1
+NULL	4
+NULL	6
+
+
+-- !query
+SELECT s1.id, s2.id AS id2 FROM s1
+FULL OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id IN (SELECT id FROM s3)
+-- !query schema
+struct<id:int,id2:int>
+-- !query output
+1	NULL
+3	3
+5	NULL
+7	NULL
+9	9
+NULL	1
+NULL	4
+NULL	6
+
+
+-- !query
+SELECT s1.id FROM s1
+JOIN s2 ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3)
+-- !query schema
+struct<id:int>
+-- !query output
+1
+
+
+-- !query
+SELECT s1.id AS id2 FROM s1
+LEFT SEMI JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3)
+-- !query schema
+struct<id2:int>
+-- !query output
+1
+
+
+-- !query
+SELECT s1.id AS id2 FROM s1
+LEFT ANTI JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3)
+-- !query schema
+struct<id2:int>
+-- !query output
+3
+5
+7
+9
+
+
+-- !query
+SELECT s1.id, s2.id AS id2 FROM s1
+LEFT OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3)
+-- !query schema
+struct<id:int,id2:int>
+-- !query output
+1	1
+3	NULL
+5	NULL
+7	NULL
+9	NULL
+
+
+-- !query
+SELECT s1.id, s2.id AS id2 FROM s1
+RIGHT OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3)
+-- !query schema
+struct<id:int,id2:int>
+-- !query output
+1	1
+NULL	3
+NULL	4
+NULL	6
+NULL	9
+
+
+-- !query
+SELECT s1.id, s2.id AS id2 FROM s1
+FULL OUTER JOIN s2
+ON s1.id = s2.id
+AND s1.id NOT IN (SELECT id FROM s3)
+-- !query schema
+struct<id:int,id2:int>
+-- !query output
+1	1
+3	NULL
+5	NULL
+7	NULL
+9	NULL
+NULL	3
+NULL	4
+NULL	6
+NULL	9
+
+
+-- !query
+DROP VIEW s1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW s2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW s3
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
index 71ca1f8649475..1c335445114c7 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
@@ -2,85 +2,85 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
-  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
-  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1a IN (SELECT t2a
                FROM   t2
                WHERE  t1d = t2d)
 LIMIT  2
--- !query 3 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 3 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
 val1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
 
 
--- !query 4
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1c IN (SELECT t2c
@@ -88,16 +88,16 @@ WHERE  t1c IN (SELECT t2c
                WHERE  t2b >= 8
                LIMIT  2)
 LIMIT 4
--- !query 4 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 4 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
 val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
 val1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
 val1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
 
 
--- !query 5
+-- !query
 SELECT Count(DISTINCT( t1a )),
        t1b
 FROM   t1
@@ -108,29 +108,29 @@ WHERE  t1d IN (SELECT t2d
 GROUP  BY t1b
 ORDER  BY t1b DESC NULLS FIRST
 LIMIT  1
--- !query 5 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 5 output
+-- !query output
 1	NULL
 
 
--- !query 6
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1b NOT IN (SELECT t2b
                    FROM   t2
                    WHERE  t2b > 6
                    LIMIT  2)
--- !query 6 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 6 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
 val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
 val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:00:00	2014-04-04
 val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:02:00.001	2014-04-04
 
 
--- !query 7
+-- !query
 SELECT Count(DISTINCT( t1a )),
        t1b
 FROM   t1
@@ -141,7 +141,7 @@ WHERE  t1d NOT IN (SELECT t2d
 GROUP  BY t1b
 ORDER BY t1b NULLS last
 LIMIT  1
--- !query 7 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 7 output
+-- !query output
 1	6
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-multiple-columns.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-multiple-columns.sql.out
index 7a96c4bc5a30b..c6e13715bd9fa 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-multiple-columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-multiple-columns.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -17,13 +17,13 @@ create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
   ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
   ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -39,13 +39,13 @@ create temporary view t2 as select * from values
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
   ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
   ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
@@ -60,13 +60,13 @@ create temporary view t3 as select * from values
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT t1a,
        t1b,
        t1h
@@ -77,16 +77,16 @@ WHERE  ( t1a, t1h ) NOT IN (SELECT t2a,
                             WHERE  t2a = t1a
                             ORDER  BY t2a)
 AND t1a = 'val1a'
--- !query 3 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1h:timestamp>
--- !query 3 output
+-- !query output
 val1a	16	2014-06-04 01:02:00.001
 val1a	16	2014-07-04 01:01:00
 val1a	6	2014-04-04 01:00:00
 val1a	6	2014-04-04 01:02:00.001
 
 
--- !query 4
+-- !query
 SELECT t1a,
        t1b,
        t1d
@@ -97,14 +97,14 @@ WHERE  ( t1b, t1d ) IN (SELECT t2b,
                         WHERE  t2i IN (SELECT t3i
                                        FROM   t3
                                        WHERE  t2b > t3b))
--- !query 4 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1d:bigint>
--- !query 4 output
+-- !query output
 val1e	10	19
 val1e	10	19
 
 
--- !query 5
+-- !query
 SELECT t1a,
        t1b,
        t1d
@@ -116,16 +116,16 @@ WHERE  ( t1b, t1d ) NOT IN (SELECT t2b,
                                            FROM   t3
                                            WHERE  t2b > t3b))
 AND t1a = 'val1a'
--- !query 5 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1d:bigint>
--- !query 5 output
+-- !query output
 val1a	16	10
 val1a	16	21
 val1a	6	10
 val1a	6	10
 
 
--- !query 6
+-- !query
 SELECT t2a
 FROM   (SELECT t2a
         FROM   t2
@@ -144,13 +144,13 @@ FROM   (SELECT t2a
         WHERE  ( t2a, t2b ) IN (SELECT t3a,
                                        t3b
                                 FROM   t3)) AS t4
--- !query 6 schema
+-- !query schema
 struct<t2a:string>
--- !query 6 output
+-- !query output
 val1b
 
 
--- !query 7
+-- !query
 WITH cte1 AS
 (
        SELECT t1a,
@@ -169,9 +169,9 @@ FROM            (
                            FROM       cte1
                            JOIN       cte1 cte2
                            on         cte1.t1b = cte2.t1b) s
--- !query 7 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1a:string,t1b:smallint>
--- !query 7 output
+-- !query output
 val1b	8	val1b	8
 val1b	8	val1c	8
 val1c	8	val1b	8
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
index 4bebd9622c3c5..96b418c54bf5b 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
@@ -2,79 +2,79 @@
 -- Number of queries: 18
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
-  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
-  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1a IN (SELECT t2a
                FROM   t2)
 ORDER  BY t1a
--- !query 3 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 3 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
 val1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
 val1e	10	NULL	25	17.0	25.0	2600	2014-08-04 01:01:00	2014-08-04
@@ -82,20 +82,20 @@ val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
 val1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
 
 
--- !query 4
+-- !query
 SELECT t1a
 FROM   t1
 WHERE  t1b IN (SELECT t2b
                FROM   t2
                WHERE  t1a = t2a)
 ORDER  BY t1b DESC
--- !query 4 schema
+-- !query schema
 struct<t1a:string>
--- !query 4 output
+-- !query output
 val1b
 
 
--- !query 5
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -103,40 +103,40 @@ WHERE  t1c IN (SELECT t2c
                FROM   t2
                WHERE  t1a = t2a)
 ORDER  BY 2 DESC nulls last
--- !query 5 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 5 output
+-- !query output
 val1b	8
 val1c	8
 
 
--- !query 6
+-- !query
 SELECT Count(DISTINCT( t1a ))
 FROM   t1
 WHERE  t1b IN (SELECT t2b
                FROM   t2
                WHERE  t1a = t2a)
 ORDER  BY Count(DISTINCT( t1a ))
--- !query 6 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint>
--- !query 6 output
+-- !query output
 1
 
 
--- !query 7
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1b IN (SELECT t2c
                FROM   t2
                ORDER  BY t2d)
--- !query 7 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 7 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
 val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
 
 
--- !query 8
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1b IN (SELECT Min(t2b)
@@ -144,9 +144,9 @@ WHERE  t1b IN (SELECT Min(t2b)
                WHERE  t1b = t2b
                ORDER  BY Min(t2b))
 ORDER BY t1c DESC nulls first
--- !query 8 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 8 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1e	10	NULL	25	17.0	25.0	2600	2014-08-04 01:01:00	2014-08-04
 val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
 val1d	10	NULL	12	17.0	25.0	2600	2015-05-04 01:01:00	2015-05-04
@@ -157,7 +157,7 @@ val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:00:00	2014-04-04
 val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:02:00.001	2014-04-04
 
 
--- !query 9
+-- !query
 SELECT t1a,
        t1b,
        t1h
@@ -170,22 +170,22 @@ WHERE  t1c IN (SELECT t2c
                    FROM   t2
                    WHERE  t1h > t2h)
 ORDER  BY t1h DESC nulls last
--- !query 9 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1h:timestamp>
--- !query 9 output
+-- !query output
 val1c	8	2014-05-04 01:02:00.001
 val1b	8	2014-05-04 01:01:00
 
 
--- !query 10
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1a NOT IN (SELECT t2a
                    FROM   t2)
 ORDER  BY t1a
--- !query 10 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 10 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:00:00	2014-04-04
 val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
 val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
@@ -195,7 +195,7 @@ val1d	NULL	16	19	17.0	25.0	2600	2014-07-04 01:02:00.001	NULL
 val1d	10	NULL	12	17.0	25.0	2600	2015-05-04 01:01:00	2015-05-04
 
 
--- !query 11
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -203,9 +203,9 @@ WHERE  t1a NOT IN (SELECT t2a
                    FROM   t2
                    WHERE  t1a = t2a)
 ORDER  BY t1b DESC nulls last
--- !query 11 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 11 output
+-- !query output
 val1a	16
 val1a	16
 val1d	10
@@ -215,7 +215,7 @@ val1d	NULL
 val1d	NULL
 
 
--- !query 12
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1a NOT IN (SELECT t2a
@@ -225,32 +225,32 @@ WHERE  t1a NOT IN (SELECT t2a
                    FROM   t2
                    ORDER  BY t2b DESC nulls last)
 ORDER  BY t1c DESC nulls last
--- !query 12 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 12 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1d	NULL	16	22	17.0	25.0	2600	2014-06-04 01:01:00	NULL
 val1d	NULL	16	19	17.0	25.0	2600	2014-07-04 01:02:00.001	NULL
 val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
 val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
 
 
--- !query 13
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1b IN (SELECT Min(t2b)
                FROM   t2
                GROUP  BY t2a
                ORDER  BY t2a DESC)
--- !query 13 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 13 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:00:00	2014-04-04
 val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:02:00.001	2014-04-04
 val1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
 val1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
 
 
--- !query 14
+-- !query
 SELECT t1a,
        Count(DISTINCT( t1b ))
 FROM   t1
@@ -262,22 +262,22 @@ WHERE  t1b IN (SELECT Min(t2b)
 GROUP  BY t1a,
           t1h
 ORDER BY t1a
--- !query 14 schema
+-- !query schema
 struct<t1a:string,count(DISTINCT t1b):bigint>
--- !query 14 output
+-- !query output
 val1b	1
 
 
--- !query 15
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1b NOT IN (SELECT Min(t2b)
                    FROM   t2
                    GROUP  BY t2a
                    ORDER  BY t2a)
--- !query 15 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 15 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
 val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
 val1d	10	NULL	12	17.0	25.0	2600	2015-05-04 01:01:00	2015-05-04
@@ -286,7 +286,7 @@ val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
 val1e	10	NULL	25	17.0	25.0	2600	2014-08-04 01:01:00	2014-08-04
 
 
--- !query 16
+-- !query
 SELECT t1a,
        Sum(DISTINCT( t1b ))
 FROM   t1
@@ -296,16 +296,16 @@ WHERE  t1b NOT IN (SELECT Min(t2b)
                    GROUP  BY t2c
                    ORDER  BY t2c DESC nulls last)
 GROUP  BY t1a
--- !query 16 schema
+-- !query schema
 struct<t1a:string,sum(DISTINCT t1b):bigint>
--- !query 16 output
+-- !query output
 val1a	22
 val1c	8
 val1d	10
 val1e	10
 
 
--- !query 17
+-- !query
 SELECT Count(DISTINCT( t1a )),
        t1b
 FROM   t1
@@ -317,9 +317,9 @@ WHERE  t1h NOT IN (SELECT t2h
 GROUP  BY t1a,
           t1b
 ORDER  BY t1b DESC nulls last
--- !query 17 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 17 output
+-- !query output
 1	16
 1	10
 1	10
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
index e06f9206d3401..783f4031a452b 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
@@ -2,71 +2,71 @@
 -- Number of queries: 16
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("val1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("val1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("val1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("val1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
-  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("val1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("val2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("val2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("val1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("val1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("val2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("val1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("val1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("val1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
-  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("val1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("val3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("val3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("val1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("val3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("val3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("val1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("val3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("val3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT t2a,
        t2b,
        t2c,
@@ -84,16 +84,16 @@ FROM   (SELECT *
 WHERE  t2i IS NOT NULL AND
        2 * t2b = t2c
 ORDER  BY t2c DESC nulls first
--- !query 3 schema
+-- !query schema
 struct<t2a:string,t2b:smallint,t2c:int,t2h:timestamp,t2i:date>
--- !query 3 output
+-- !query output
 val1b	8	16	2015-05-04 01:01:00	2015-05-04
 val1b	8	16	2014-07-04 01:01:00	2014-07-04
 val1b	8	16	2014-06-04 01:02:00	2014-06-04
 val1b	8	16	2014-07-04 01:02:00	2014-07-04
 
 
--- !query 4
+-- !query
 SELECT t2a,
        t2b,
        t2d,
@@ -115,15 +115,15 @@ GROUP  BY t2a,
           t2d,
           t2i
 ORDER  BY t2d DESC
--- !query 4 schema
+-- !query schema
 struct<t2a:string,t2b:smallint,t2d:bigint,count(DISTINCT t2h):bigint,t2i:date>
--- !query 4 output
+-- !query output
 val1b	8	119	1	2015-05-04
 val1b	8	19	1	2014-07-04
 val1b	8	19	1	2014-05-04
 
 
--- !query 5
+-- !query
 SELECT t2a,
        t2b,
        t2c,
@@ -163,9 +163,9 @@ WHERE  t1a IN (SELECT t3a
                FROM   t3
                WHERE  t3d = t1d)
 GROUP BY t1a, t1b, t1c
--- !query 5 schema
+-- !query schema
 struct<t2a:string,t2b:smallint,t2c:int,min(t2d):bigint>
--- !query 5 output
+-- !query output
 val1b	10	12	19
 val1b	8	16	119
 val1b	8	16	19
@@ -174,7 +174,7 @@ val1b	NULL	16	319
 val1c	12	16	219
 
 
--- !query 6
+-- !query
 SELECT DISTINCT( t2a ),
                t2b,
                Count(t2c),
@@ -209,16 +209,16 @@ GROUP  BY t2a,
           t2h,
           t2i
 HAVING t2b IS NOT NULL
--- !query 6 schema
+-- !query schema
 struct<t2a:string,t2b:smallint,count(t2c):bigint,t2d:bigint,t2h:timestamp,t2i:date>
--- !query 6 output
+-- !query output
 val1b	8	1	119	2015-05-04 01:01:00	2015-05-04
 val1b	8	1	19	2014-07-04 01:01:00	2014-07-04
 val1c	12	1	19	2014-08-04 01:01:00	2014-08-05
 val1c	12	1	219	2016-05-04 01:01:00	2016-05-04
 
 
--- !query 7
+-- !query
 SELECT t2a,
                t2b,
                Count(t2c),
@@ -265,9 +265,9 @@ FROM   t2
 WHERE  t2d IN (SELECT min(t1d)
                FROM   t1
                WHERE  t2c = t1c)
--- !query 7 schema
+-- !query schema
 struct<t2a:string,t2b:smallint,count(t2c):bigint,t2d:bigint,t2h:timestamp,t2i:date>
--- !query 7 output
+-- !query output
 val1b	8	1	119	2015-05-04 01:01:00	2015-05-04
 val1b	8	1	19	2014-07-04 01:01:00	2014-07-04
 val1b	8	16	19	2014-07-04 01:01:00	2014-07-04
@@ -275,7 +275,7 @@ val1b	NULL	16	19	2014-05-04 01:01:00	NULL
 val1c	12	16	19	2014-08-04 01:01:00	2014-08-05
 
 
--- !query 8
+-- !query
 SELECT t2a,
        t2b,
        t2c,
@@ -312,16 +312,16 @@ FROM   t2
 WHERE  t2c IN (SELECT Max(t1c)
                FROM   t1
                WHERE t1d = t2d)
--- !query 8 schema
+-- !query schema
 struct<t2a:string,t2b:smallint,t2c:int,t2d:bigint>
--- !query 8 output
+-- !query output
 val1b	8	16	119
 val1b	8	16	19
 val1b	NULL	16	19
 val1c	12	16	19
 
 
--- !query 9
+-- !query
 SELECT DISTINCT(t1a),
        t1b,
        t1c,
@@ -354,9 +354,9 @@ WHERE  t1a IN (SELECT t3a
 GROUP BY t1a, t1b, t1c, t1d
 HAVING t1c IS NOT NULL AND t1b IS NOT NULL
 ORDER BY t1c DESC, t1a DESC
--- !query 9 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint>
--- !query 9 output
+-- !query output
 val1c	8	16	19
 val1b	8	16	19
 val1a	16	12	21
@@ -364,7 +364,7 @@ val1a	16	12	10
 val1a	6	8	10
 
 
--- !query 10
+-- !query
 SELECT t1a,
        t1b,
        t1c
@@ -378,9 +378,9 @@ WHERE  t1b IN (SELECT t2b
                        FROM   t1
                        WHERE  t1b > 6) AS t3
                WHERE  t2b = t1b)
--- !query 10 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int>
--- !query 10 output
+-- !query output
 val1b	8	16
 val1c	8	16
 val1d	10	NULL
@@ -389,7 +389,7 @@ val1e	10	NULL
 val1e	10	NULL
 
 
--- !query 11
+-- !query
 SELECT t1a,
        t1b,
        t1c
@@ -401,9 +401,9 @@ WHERE  t1h IN (SELECT t2h
                        SELECT t3h
                        FROM   t3) AS t3)
 ORDER BY t1b DESC NULLs first, t1c  DESC NULLs last
--- !query 11 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int>
--- !query 11 output
+-- !query output
 val1d	NULL	16
 val1a	16	12
 val1e	10	NULL
@@ -412,7 +412,7 @@ val1e	10	NULL
 val1b	8	16
 
 
--- !query 12
+-- !query
 SELECT t1a,
        t1b,
        t1c
@@ -446,16 +446,16 @@ WHERE  t1b IN
                             WHERE  t1b > 6) AS t4
               WHERE  t2b = t1b)
 ORDER BY t1c DESC NULLS last, t1a DESC
--- !query 12 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int>
--- !query 12 output
+-- !query output
 val1c	8	16
 val1b	8	16
 val1e	10	NULL
 val1d	10	NULL
 
 
--- !query 13
+-- !query
 SELECT *
 FROM   (SELECT *
         FROM   (SELECT *
@@ -497,13 +497,13 @@ FROM   (SELECT *
         WHERE  t4.t2b IN (SELECT Min(t3b)
                           FROM   t3
                           WHERE  t4.t2a = t3a))
--- !query 13 schema
-struct<t2a:string,t2b:smallint,t2c:int,t2d:bigint,t2e:float,t2f:double,t2g:decimal(2,-2),t2h:timestamp,t2i:date>
--- !query 13 output
+-- !query schema
+struct<t2a:string,t2b:smallint,t2c:int,t2d:bigint,t2e:float,t2f:double,t2g:decimal(4,0),t2h:timestamp,t2i:date>
+-- !query output
 val1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
 
 
--- !query 14
+-- !query
 SELECT t2a,
        t2b,
        t2c,
@@ -530,14 +530,14 @@ WHERE  t3.t2a NOT IN (SELECT t1a
                       FROM   t2)
        AND t2c IS NOT NULL
 ORDER  BY t2a
--- !query 14 schema
+-- !query schema
 struct<t2a:string,t2b:smallint,t2c:int,t2i:date>
--- !query 14 output
+-- !query output
 val2a	6	12	2014-04-04
 val2a	6	12	2014-04-04
 
 
--- !query 15
+-- !query
 SELECT   Count(DISTINCT(t1a)),
          t1b,
          t1c,
@@ -581,9 +581,9 @@ HAVING   t1b NOT IN
                 SELECT t3b
                 FROM   t3)
 ORDER BY t1c DESC NULLS LAST, t1i
--- !query 15 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1i:date>
--- !query 15 output
+-- !query output
 1	8	16	2014-05-04
 1	8	16	2014-05-05
 1	16	12	2014-06-04
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-with-cte.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-with-cte.sql.out
index 7d3943e3764c5..b9cc68a339746 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-with-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-with-cte.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 13
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -17,13 +17,13 @@ create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
   ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
   ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -39,13 +39,13 @@ create temporary view t2 as select * from values
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
   ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
   ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
@@ -60,13 +60,13 @@ create temporary view t3 as select * from values
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 WITH cte1
      AS (SELECT t1a,
                 t1b
@@ -81,16 +81,16 @@ FROM   t1
 WHERE  t1b IN (SELECT cte1.t1b
                FROM   cte1
                WHERE  cte1.t1b > 0)
--- !query 3 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1h:timestamp>
--- !query 3 output
+-- !query output
 val1a	16	12	10	2014-07-04 01:01:00
 val1a	16	12	21	2014-06-04 01:02:00.001
 val1a	6	8	10	2014-04-04 01:00:00
 val1a	6	8	10	2014-04-04 01:02:00.001
 
 
--- !query 4
+-- !query
 WITH cte1 AS
 (
        SELECT t1a,
@@ -118,16 +118,16 @@ WHERE  t1b IN
               FROM   cte1 )
 GROUP BY t1a, t1b, t1c
 HAVING t1c IS NOT NULL
--- !query 4 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int>
--- !query 4 output
+-- !query output
 1	16	12
 1	6	8
 1	8	16
 1	8	16
 
 
--- !query 5
+-- !query
 WITH cte1 AS
 (
        SELECT t1a,
@@ -155,16 +155,16 @@ WHERE  t1c IN
               ON              cte1.t1b < cte5.t1b
               LEFT OUTER JOIN  cte1 cte6
               ON              cte1.t1d > cte6.t1d)
--- !query 5 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t1h:timestamp>
--- !query 5 output
+-- !query output
 val1b	8	16	2014-05-04 01:01:00
 val1c	8	16	2014-05-04 01:02:00.001
 val1d	NULL	16	2014-06-04 01:01:00
 val1d	NULL	16	2014-07-04 01:02:00.001
 
 
--- !query 6
+-- !query
 WITH cte1
      AS (SELECT t1a,
                 t1b
@@ -186,13 +186,13 @@ FROM   (SELECT *
                             ON cte1.t1a = cte3.t1a
                INNER JOIN cte1 cte4
                        ON cte1.t1b = cte4.t1b) s
--- !query 6 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1a:string,t1b:smallint,t1a:string,t1b:smallint,t1a:string,t1b:smallint>
--- !query 6 output
+-- !query output
 val1b	8	val1b	8	val1b	8	val1b	8
 
 
--- !query 7
+-- !query
 WITH cte1 AS
 (
        SELECT t1a,
@@ -217,13 +217,13 @@ WHERE    t1b IN
                 SELECT t1b
                 FROM   t1)
 GROUP BY t1b
--- !query 7 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 7 output
+-- !query output
 2	8
 
 
--- !query 8
+-- !query
 WITH cte1 AS
 (
        SELECT t1a,
@@ -244,13 +244,13 @@ FROM            (
                        RIGHT OUTER JOIN cte1 cte3  ON cte1.t1b = cte3.t1b
                        LEFT OUTER JOIN cte1 cte4 ON cte1.t1c = cte4.t1c
                        ) s
--- !query 8 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t1a:string,t1b:smallint,t1c:int,t1a:string,t1b:smallint,t1c:int,t1a:string,t1b:smallint,t1c:int>
--- !query 8 output
+-- !query output
 val1b	8	16	val1b	8	16	val1b	8	16	val1b	8	16
 
 
--- !query 9
+-- !query
 WITH cte1
      AS (SELECT t1a,
                 t1b
@@ -266,13 +266,13 @@ FROM   (SELECT cte1.t1a,
                RIGHT OUTER JOIN cte1 cte2
                              ON cte1.t1a = cte2.t1a) s
 GROUP  BY s.t1b
--- !query 9 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
--- !query 9 output
+-- !query output
 2	8
 
 
--- !query 10
+-- !query
 WITH cte1 AS
 (
        SELECT t1a,
@@ -295,13 +295,13 @@ WHERE           s.t1b IN
                        FROM   t1 INNER
                        JOIN   cte1
                        ON     t1.t1a = cte1.t1a)
--- !query 10 schema
+-- !query schema
 struct<t1b:smallint>
--- !query 10 output
+-- !query output
 8
 
 
--- !query 11
+-- !query
 WITH cte1
      AS (SELECT t1a,
                 t1b
@@ -316,9 +316,9 @@ WHERE  t1b NOT IN (SELECT cte1.t1b
                    FROM   cte1
                    WHERE  cte1.t1b < 0) AND
        t1c > 10
--- !query 11 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t1h:timestamp>
--- !query 11 output
+-- !query output
 val1a	16	12	2014-06-04 01:02:00.001
 val1a	16	12	2014-07-04 01:01:00
 val1b	8	16	2014-05-04 01:01:00
@@ -327,7 +327,7 @@ val1d	NULL	16	2014-06-04 01:01:00
 val1d	NULL	16	2014-07-04 01:02:00.001
 
 
--- !query 12
+-- !query
 WITH cte1 AS
 (
        SELECT t1a,
@@ -357,8 +357,8 @@ WHERE    t1b NOT IN
                     JOIN cte1 cte4 ON cte1.t1c = cte4.t1c) AND
          t1c IS NOT NULL
 ORDER BY t1c DESC
--- !query 12 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1h:timestamp>
--- !query 12 output
+-- !query output
 val1b	8	16	19	2014-05-04 01:01:00
 val1c	8	16	19	2014-05-04 01:02:00.001
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
index 6b86a9f6a0d00..720db9e8bdb15 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -17,13 +17,13 @@ create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
   ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
   ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -39,13 +39,13 @@ create temporary view t2 as select * from values
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
   ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
   ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
@@ -60,27 +60,27 @@ create temporary view t3 as select * from values
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT t1a,
        Avg(t1b)
 FROM   t1
 WHERE  t1a NOT IN (SELECT t2a
                    FROM   t2)
 GROUP  BY t1a
--- !query 3 schema
+-- !query schema
 struct<t1a:string,avg(t1b):double>
--- !query 3 output
+-- !query output
 val1a	11.0
 val1d	10.0
 
 
--- !query 4
+-- !query
 SELECT t1a,
        Sum(DISTINCT( t1b ))
 FROM   t1
@@ -88,15 +88,15 @@ WHERE  t1d NOT IN (SELECT t2d
                    FROM   t2
                    WHERE  t1h < t2h)
 GROUP  BY t1a
--- !query 4 schema
+-- !query schema
 struct<t1a:string,sum(DISTINCT t1b):bigint>
--- !query 4 output
+-- !query output
 val1a	22
 val1d	10
 val1e	10
 
 
--- !query 5
+-- !query
 SELECT Count(*)
 FROM   (SELECT *
         FROM   t2
@@ -107,13 +107,13 @@ WHERE  t2b NOT IN (SELECT Min(t2b)
                    FROM   t2
                    WHERE  t2b = t2b
                    GROUP  BY t2c)
--- !query 5 schema
+-- !query schema
 struct<count(1):bigint>
--- !query 5 output
+-- !query output
 4
 
 
--- !query 6
+-- !query
 SELECT t1a,
        max(t1b)
 FROM   t1
@@ -122,16 +122,16 @@ WHERE  t1c NOT IN (SELECT Max(t2b)
                    WHERE  t1a = t2a
                    GROUP  BY t2a)
 GROUP BY t1a
--- !query 6 schema
+-- !query schema
 struct<t1a:string,max(t1b):smallint>
--- !query 6 output
+-- !query output
 val1a	16
 val1b	8
 val1c	8
 val1d	10
 
 
--- !query 7
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -141,9 +141,9 @@ WHERE  t1c IN (SELECT t2b
                                   FROM   t3
                                   WHERE  t3a = t2a
                                   GROUP  BY t3b) order by t2a)
--- !query 7 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 7 output
+-- !query output
 val1a	16
 val1a	16
 val1a	6
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
index bae5d00cc8632..4872e3c953ff6 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 9
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
   ("val1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -17,13 +17,13 @@ create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
   ("val2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
   ("val1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -39,13 +39,13 @@ create temporary view t2 as select * from values
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
   ("val3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
   ("val3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
@@ -60,13 +60,13 @@ create temporary view t3 as select * from values
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT t1a,
        t1b,
        t1c,
@@ -78,9 +78,9 @@ FROM   t1
 WHERE  t1a NOT IN (SELECT t2a
                    FROM   t2)
        AND t1b = t3b
--- !query 3 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t3a:string,t3b:smallint,t3c:int>
--- !query 3 output
+-- !query output
 val1a	6	8	val3a	6	12
 val1a	6	8	val3a	6	12
 val1a	6	8	val3a	6	12
@@ -89,7 +89,7 @@ val1d	10	NULL	val1b	10	12
 val1d	10	NULL	val1b	10	12
 
 
--- !query 4
+-- !query
 SELECT          t1a,
                 t1b,
                 t1c,
@@ -113,15 +113,15 @@ AND             t1d = t2d
 GROUP BY        t1a, t1b, t1c, t3a, t3b, t3c
 HAVING          count(distinct(t3a)) >= 1
 ORDER BY        t1a, t3b
--- !query 4 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,count(DISTINCT t3a):bigint,t3b:smallint,t3c:int>
--- !query 4 output
+-- !query output
 val1c	8	16	1	6	12
 val1c	8	16	1	10	12
 val1c	8	16	1	17	16
 
 
--- !query 5
+-- !query
 SELECT t1a,
        t1b,
        t1c,
@@ -141,9 +141,9 @@ AND    t1d NOT IN
               FROM   t2
               RIGHT JOIN t1 on t2e = t1e
               WHERE t1a = t2a)
--- !query 5 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1h:timestamp>
--- !query 5 output
+-- !query output
 val1a	16	12	10	2014-07-04 01:01:00
 val1a	16	12	21	2014-06-04 01:02:00.001
 val1a	6	8	10	2014-04-04 01:00:00
@@ -153,7 +153,7 @@ val1d	NULL	16	22	2014-06-04 01:01:00
 val1e	10	NULL	25	2014-08-04 01:01:00
 
 
--- !query 6
+-- !query
 SELECT Count(DISTINCT( t1a )),
        t1b,
        t1c,
@@ -169,10 +169,10 @@ GROUP  BY t1b,
 HAVING t1d NOT IN (SELECT t2d
                    FROM   t2
                    WHERE  t1d = t2d)
-ORDER BY t1b DESC
--- !query 6 schema
+ORDER BY t1b DESC, t1d ASC
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1d:bigint>
--- !query 6 output
+-- !query output
 1	16	12	10
 1	16	12	21
 1	10	NULL	12
@@ -180,7 +180,7 @@ struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1d:bigint>
 1	NULL	16	22
 
 
--- !query 7
+-- !query
 SELECT   COUNT(DISTINCT(t1a)),
          t1b,
          t1c,
@@ -195,13 +195,13 @@ GROUP BY t1b,
          t1c,
          t1d
 HAVING   t1b < sum(t1c)
--- !query 7 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1d:bigint>
--- !query 7 output
+-- !query output
 1	6	8	10
 
 
--- !query 8
+-- !query
 SELECT   COUNT(DISTINCT(t1a)),
          t1b,
          t1c,
@@ -223,7 +223,7 @@ GROUP BY t1b,
          t1c,
          t1d
 HAVING   t1b < sum(t1c)
--- !query 8 schema
+-- !query schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1d:bigint>
--- !query 8 output
+-- !query output
 1	6	8	10
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
index f02f760727976..bc9e6f842557e 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
@@ -2,47 +2,47 @@
 -- Number of queries: 4
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, null),
   (null, 1.0),
   (2, 3.0),
   (4, 5.0)
   AS m(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 1.0 -- Matches (null, 1.0)
        AND (a, b) NOT IN ((2, 3.0))
--- !query 1 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 1 output
-NULL	1
+-- !query output
+NULL	1.0
 
 
--- !query 2
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 3.0 -- Matches (2, 3.0)
        AND (a, b) NOT IN ((2, 3.0))
--- !query 2 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 5.0 -- Matches (4, 5.0)
        AND (a, b) NOT IN ((2, 3.0))
--- !query 3 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 3 output
-4	5
+-- !query output
+4	5.0
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
index a27a66e3f27f5..54d6da8d0da83 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
@@ -2,119 +2,119 @@
 -- Number of queries: 9
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, null),
   (null, 1.0),
   (2, 3.0),
   (4, 5.0)
   AS m(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
   (null, null),
   (0, 1.0),
   (2, 3.0),
   (4, null)
   AS s(c, d)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT *
 FROM   m
 WHERE  (a, b) NOT IN (SELECT *
                       FROM   s
                       WHERE  d > 5.0) -- Matches no rows
--- !query 2 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 2 output
-2	3
-4	5
-NULL	1
+-- !query output
+2	3.0
+4	5.0
+NULL	1.0
 NULL	NULL
 
 
--- !query 3
+-- !query
 SELECT *
 FROM   m
 WHERE  (a, b) NOT IN (SELECT *
                       FROM s
                       WHERE c IS NULL AND d IS NULL) -- Matches only (null, null)
--- !query 3 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 SELECT *
 FROM   m
 WHERE  a IS NULL AND b IS NULL -- Matches only (null, null)
        AND (a, b) NOT IN (SELECT *
                           FROM s
                           WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
--- !query 4 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 1.0 -- Matches (null, 1.0)
        AND (a, b) NOT IN (SELECT *
                           FROM s
                           WHERE c IS NOT NULL) -- Matches (0, 1.0), (2, 3.0), (4, null)
--- !query 5 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 1.0 -- Matches (null, 1.0)
        AND (a, b) NOT IN (SELECT *
                           FROM s
                           WHERE c = 2) -- Matches (2, 3.0)
--- !query 6 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 6 output
-NULL	1
+-- !query output
+NULL	1.0
 
 
--- !query 7
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 3.0 -- Matches (2, 3.0)
        AND (a, b) NOT IN (SELECT *
                           FROM s
                           WHERE c = 2) -- Matches (2, 3.0)
--- !query 7 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 5.0 -- Matches (4, 5.0)
        AND (a, b) NOT IN (SELECT *
                           FROM s
                           WHERE c = 2) -- Matches (2, 3.0)
--- !query 8 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 8 output
-4	5
+-- !query output
+4	5.0
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
index cf8f03eaa9311..0fc9cf289155d 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
@@ -2,56 +2,56 @@
 -- Number of queries: 5
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, 1.0),
   (2, 3.0),
   (4, 5.0)
   AS m(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT *
 FROM   m
 WHERE  a NOT IN (null)
--- !query 1 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 1.0 -- Only matches (null, 1.0)
        AND a NOT IN (2)
--- !query 2 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 3.0 -- Only matches (2, 3.0)
        AND a NOT IN (2)
--- !query 3 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 3.0 -- Only matches (2, 3.0)
        AND a NOT IN (6)
--- !query 4 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 4 output
-2	3
+-- !query output
+2	3.0
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
index d07981cfd11e5..ef40fd462f883 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
@@ -2,130 +2,130 @@
 -- Number of queries: 10
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, 1.0),
   (2, 3.0),
   (4, 5.0)
   AS m(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW s AS SELECT * FROM VALUES
   (null, 1.0),
   (2, 3.0),
   (6, 7.0)
   AS s(c, d)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT *
 FROM   m
 WHERE  a NOT IN (SELECT c
                  FROM   s
                  WHERE  d > 10.0) -- (empty subquery)
--- !query 2 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 2 output
-2	3
-4	5
-NULL	1
+-- !query output
+2	3.0
+4	5.0
+NULL	1.0
 
 
--- !query 3
+-- !query
 SELECT *
 FROM   m
 WHERE  a NOT IN (SELECT c
                  FROM   s
                  WHERE  d = 1.0) -- Only matches (null, 1.0)
--- !query 3 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 1.0 -- Only matches (null, 1.0)
        AND a NOT IN (SELECT c
                      FROM   s
                      WHERE  d = 3.0) -- Matches (2, 3.0)
--- !query 4 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 3.0 -- Only matches (2, 3.0)
        AND a NOT IN (SELECT c
                      FROM   s
                      WHERE  d = 3.0) -- Matches (2, 3.0)
--- !query 5 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 3.0 -- Only matches (2, 3.0)
        AND a NOT IN (SELECT c
                      FROM   s
                      WHERE  d = 7.0) -- Matches (6, 7.0)
--- !query 6 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 6 output
-2	3
+-- !query output
+2	3.0
 
 
--- !query 7
+-- !query
 SELECT *
 FROM   m
 WHERE a NOT IN (SELECT c
                 FROM   s
                 WHERE  d = b + 10) -- Matches no row
--- !query 7 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 7 output
-2	3
-4	5
-NULL	1
+-- !query output
+2	3.0
+4	5.0
+NULL	1.0
 
 
--- !query 8
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 1.0 -- Only matches (null, 1.0)
        AND a NOT IN (SELECT c
                      FROM   s
                      WHERE  d = b + 10) -- Matches no row
--- !query 8 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 8 output
-NULL	1
+-- !query output
+NULL	1.0
 
 
--- !query 9
+-- !query
 SELECT *
 FROM   m
 WHERE  b = 3.0 -- Only matches (2, 3.0)
        AND a NOT IN (SELECT c
                      FROM   s
                      WHERE  d = b + 10) -- Matches no row
--- !query 9 schema
+-- !query schema
 struct<a:int,b:decimal(2,1)>
--- !query 9 output
-2	3
+-- !query output
+2	3.0
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
index d69b4bcf185c3..0661e1c9e4d96 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
@@ -2,78 +2,78 @@
 -- Number of queries: 14
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
-  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
-  ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ("t1d", null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ("t1d", null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
+  ("t1b", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1a", 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ("t1a", 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("t1c", 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ("t1d", null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ("t1d", null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ("t1e", 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
-  ("t2a", 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1b", 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ("t1c", 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ("t1b", null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ("t2e", 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("t1f", 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ("t1c", 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ("t1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ("t1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ("t2a", 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1b", 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ("t1c", 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ("t1b", null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ("t2e", 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("t1f", 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ("t1c", 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ("t1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ("t1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
-  ("t3a", 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ("t3a", 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t1b", 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ("t3c", 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ("t3c", 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ("t1b", null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ("t1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ("t3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ("t3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ("t3a", 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ("t3a", 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t1b", 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ("t1b", 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ("t3c", 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ("t3c", 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ("t1b", null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ("t3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ("t3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1a IN (SELECT t2a
                FROM   t2)
--- !query 3 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 3 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 t1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
 t1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
 t1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
@@ -81,35 +81,35 @@ t1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
 t1e	10	NULL	25	17.0	25.0	2600	2014-08-04 01:01:00	2014-08-04
 
 
--- !query 4
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1b IN (SELECT t2b
                FROM   t2
                WHERE  t1a = t2a)
--- !query 4 schema
-struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(2,-2),t1h:timestamp,t1i:date>
--- !query 4 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
 t1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
 
 
--- !query 5
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
 WHERE  t1c IN (SELECT t2b
                FROM   t2
                WHERE  t1a != t2a)
--- !query 5 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 5 output
+-- !query output
 t1a	16
 t1a	16
 t1a	6
 t1a	6
 
 
--- !query 6
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -117,14 +117,14 @@ WHERE  t1c IN (SELECT t2b
                FROM   t2
                WHERE  t1a = t2a
                        OR t1b > t2b)
--- !query 6 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 6 output
+-- !query output
 t1a	16
 t1a	16
 
 
--- !query 7
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -133,14 +133,14 @@ WHERE  t1c IN (SELECT t2b
                WHERE  t2i IN (SELECT t3i
                               FROM   t3
                               WHERE  t2c = t3c))
--- !query 7 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 7 output
+-- !query output
 t1a	6
 t1a	6
 
 
--- !query 8
+-- !query
 SELECT t1a,
        t1b
 FROM   t1
@@ -150,23 +150,23 @@ WHERE  t1c IN (SELECT t2b
                               FROM   t3
                               WHERE  t2c = t3c
                                      AND t2b IS NOT NULL))
--- !query 8 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 8 output
+-- !query output
 t1a	6
 t1a	6
 
 
--- !query 9
+-- !query
 SELECT DISTINCT( t1a ),
                t1b,
                t1h
 FROM   t1
 WHERE  t1a NOT IN (SELECT t2a
                    FROM   t2)
--- !query 9 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t1h:timestamp>
--- !query 9 output
+-- !query output
 t1a	16	2014-06-04 01:02:00.001
 t1a	16	2014-07-04 01:01:00
 t1a	6	2014-04-04 01:00:00
@@ -176,49 +176,49 @@ t1d	NULL	2014-06-04 01:01:00
 t1d	NULL	2014-07-04 01:02:00.001
 
 
--- !query 10
+-- !query
 create temporary view a as select * from values
   (1, 1), (2, 1), (null, 1), (1, 3), (null, 3), (1, null), (null, 2)
   as a(a1, a2)
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 create temporary view b as select * from values
   (1, 1, 2), (null, 3, 2), (1, null, 2), (1, 2, null)
   as b(b1, b2, b3)
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 SELECT a1, a2
 FROM   a
 WHERE  a1 NOT IN (SELECT b.b1
                   FROM   b
                   WHERE  a.a2 = b.b2)
--- !query 12 schema
+-- !query schema
 struct<a1:int,a2:int>
--- !query 12 output
+-- !query output
 1	NULL
 2	1
 
 
--- !query 13
+-- !query
 SELECT a1, a2
 FROM   a
 WHERE  a1 NOT IN (SELECT b.b1
                   FROM   b
                   WHERE  a.a2 = b.b2
                   AND    b.b3 > 1)
--- !query 13 schema
+-- !query schema
 struct<a1:int,a2:int>
--- !query 13 output
+-- !query output
 1	NULL
 2	1
 NULL	2
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index 7b47a6139f60a..ec7ecf28754ef 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -2,37 +2,37 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
   (1, 2, 3)
 AS t1(t1a, t1b, t1c)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES
   (1, 0, 1)
 AS t2(t2a, t2b, t2c)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES
   (3, 1, 2)
 AS t3(t3a, t3b, t3c)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT t1a, t2b
 FROM   t1, t2
 WHERE  t1b = t2c
@@ -42,14 +42,14 @@ AND    t2b = (SELECT max(avg)
                       WHERE    t2a = t1.t1b
                      )
              )
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and 't2.`t2b`' is not an aggregate function. Wrap '(avg(CAST(t2.`t2b` AS BIGINT)) AS `avg`)' in windowing function(s) or wrap 't2.`t2b`' in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 4
+-- !query
 SELECT *
 FROM   t1
 WHERE  t1a IN (SELECT   min(t2a)
@@ -59,14 +59,14 @@ WHERE  t1a IN (SELECT   min(t2a)
                                 FROM     t3
                                 GROUP BY t3b
                                 HAVING   t3b > t2b ))
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]).;
 
 
--- !query 5
+-- !query
 SELECT t1a 
 FROM   t1
 GROUP  BY 1
@@ -74,14 +74,14 @@ HAVING EXISTS (SELECT t2a
                FROM  t2
                GROUP BY 1
                HAVING t2a < min(t1a + t2a))
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t1.`t1a` + t2.`t2a`)), Outer references: t1.`t1a`, Local references: t2.`t2a`.;
 
 
--- !query 6
+-- !query
 SELECT t1a 
 FROM   t1
 WHERE  t1a IN (SELECT t2a 
@@ -90,28 +90,28 @@ WHERE  t1a IN (SELECT t2a
                               FROM   t3
                               GROUP BY 1
                               HAVING min(t2a + t3a) > 1))
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t2.`t2a` + t3.`t3a`)), Outer references: t2.`t2a`, Local references: t3.`t3a`.;
 
 
--- !query 7
+-- !query
 SELECT t1a 
 FROM   t1
 WHERE  t1a IN (SELECT t2a 
                FROM   t2
                WHERE  EXISTS (SELECT min(t2a) 
                               FROM   t3))
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses:
 Aggregate [min(outer(t2a#x)) AS min(outer())#x]
-+- SubqueryAlias `t3`
++- SubqueryAlias t3
    +- Project [t3a#x, t3b#x, t3c#x]
-      +- SubqueryAlias `t3`
+      +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x]
 ;
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
index dcd30055bca19..776598127075b 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
@@ -2,57 +2,57 @@
 -- Number of queries: 10
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
   (1, 2, 3)
 AS t1(t1a, t1b, t1c)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES
   (1, 0, 1)
 AS t2(t2a, t2b, t2c)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES
   (3, 1, 2)
 AS t3(t3a, t3b, t3c)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES
   (CAST(1 AS DOUBLE), CAST(2 AS STRING), CAST(3 AS STRING))
 AS t1(t4a, t4b, t4c)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 CREATE TEMPORARY VIEW t5 AS SELECT * FROM VALUES
-  (CAST(1 AS DECIMAL(18, 0)), CAST(2 AS STRING), CAST(3 AS BIGINT))
+  (CAST('2011-01-01 01:01:01' AS TIMESTAMP), CAST(2 AS STRING), CAST(3 AS BIGINT))
 AS t1(t5a, t5b, t5c)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT 
   ( SELECT max(t2b), min(t2b) 
     FROM t2 
@@ -60,14 +60,14 @@ SELECT
     GROUP BY t2.t2b
   )
 FROM t1
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Scalar subquery must return only one column, but got 2;
 
 
--- !query 6
+-- !query
 SELECT 
   ( SELECT max(t2b), min(t2b) 
     FROM t2 
@@ -75,22 +75,22 @@ SELECT
     GROUP BY t2.t2b
   )
 FROM t1
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Scalar subquery must return only one column, but got 2;
 
 
--- !query 7
+-- !query
 SELECT * FROM t1
 WHERE
 t1a IN (SELECT t2a, t2b 
         FROM t2
         WHERE t1a = t2a)
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(t1.`t1a` IN (listquery(t1.`t1a`)))' due to data type mismatch: 
 The number of columns in the left hand side of an IN subquery does not match the
@@ -103,15 +103,15 @@ Right side columns:
 [t2.`t2a`, t2.`t2b`].;
 
 
--- !query 8
+-- !query
 SELECT * FROM T1 
 WHERE
 (t1a, t1b) IN (SELECT t2a
                FROM t2
                WHERE t1a = t2a)
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(named_struct('t1a', t1.`t1a`, 't1b', t1.`t1b`) IN (listquery(t1.`t1a`)))' due to data type mismatch: 
 The number of columns in the left hand side of an IN subquery does not match the
@@ -124,23 +124,23 @@ Right side columns:
 [t2.`t2a`].;
 
 
--- !query 9
+-- !query
 SELECT * FROM t4
 WHERE
 (t4a, t4b, t4c) IN (SELECT t5a,
                            t5b,
                            t5c
                     FROM t5)
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(named_struct('t4a', t4.`t4a`, 't4b', t4.`t4b`, 't4c', t4.`t4c`) IN (listquery()))' due to data type mismatch: 
 The data type of one or more elements in the left hand side of an IN subquery
 is not compatible with the data type of the output of the subquery
 Mismatched columns:
-[(t4.`t4a`:double, t5.`t5a`:decimal(18,0)), (t4.`t4c`:string, t5.`t5c`:bigint)]
+[(t4.`t4a`:double, t5.`t5a`:timestamp), (t4.`t4c`:string, t5.`t5c`:bigint)]
 Left side:
 [double, string, string].
 Right side:
-[decimal(18,0), string, bigint].;
+[timestamp, string, bigint].;
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
index dd82efba0dde1..b7eef929864fc 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
@@ -2,36 +2,36 @@
 -- Number of queries: 27
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT pk, cv
 FROM   p, c
 WHERE  p.pk = c.ck
 AND    c.cv = (SELECT avg(c1.cv)
                FROM   c c1
                WHERE  c1.ck = p.pk)
--- !query 2 schema
+-- !query schema
 struct<pk:int,cv:int>
--- !query 2 output
+-- !query output
 1	1
 
 
--- !query 3
+-- !query
 SELECT pk, cv
 FROM   p, c
 WHERE  p.pk = c.ck
@@ -40,105 +40,105 @@ AND    c.cv = (SELECT max(avg)
                        FROM     c c1
                        WHERE    c1.ck = p.pk
                        GROUP BY c1.cv))
--- !query 3 schema
+-- !query schema
 struct<pk:int,cv:int>
--- !query 3 output
+-- !query output
 1	1
 
 
--- !query 4
+-- !query
 create temporary view t1 as select * from values
-  ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
-  ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
-  ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
-  ('val1d', null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null),
-  ('val1d', null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null),
-  ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
-  ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
-  ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
-  ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
+  ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
+  ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2BD, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'),
+  ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'),
+  ('val1d', null, 16, 22L, float(17.0), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', null),
+  ('val1d', null, 16, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.001', null),
+  ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'),
+  ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'),
+  ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
+  ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 create temporary view t2 as select * from values
-  ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
-  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
-  ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
-  ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null),
-  ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
-  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
-  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
-  ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
-  ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
-  ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
-  ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
+  ('val2a', 6S, 12, 14L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ('val1b', 8S, 16, 119L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
+  ('val1c', 12S, 16, 219L, float(17), 25D, 26E2BD, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'),
+  ('val1b', null, 16, 319L, float(17), 25D, 26E2BD, timestamp '2017-05-04 01:01:00.000', null),
+  ('val2e', 8S, null, 419L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ('val1f', 19S, null, 519L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'),
+  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'),
+  ('val1c', 12S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'),
+  ('val1e', 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
+  ('val1f', 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
+  ('val1b', null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 create temporary view t3 as select * from values
-  ('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
-  ('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ('val1b', 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ('val1b', 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
-  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
-  ('val3c', 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
-  ('val3c', 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
-  ('val1b', null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null),
-  ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
-  ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
-  ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
+  ('val3a', 6S, 12, 110L, float(15), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
+  ('val3a', 6S, 12, 10L, float(15), 20D, 20E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ('val1b', 10S, 12, 219L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ('val1b', 10S, 12, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ('val1b', 8S, 16, 319L, float(17), 25D, 26E2BD, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'),
+  ('val1b', 8S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'),
+  ('val3c', 17S, 16, 519L, float(17), 25D, 26E2BD, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'),
+  ('val3c', 17S, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'),
+  ('val1b', null, 16, 419L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:02:00.000', null),
+  ('val1b', null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
+  ('val3b', 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
+  ('val3b', 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 SELECT t1a, t1b
 FROM   t1
 WHERE  t1c = (SELECT max(t2c)
               FROM   t2)
--- !query 7 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 7 output
+-- !query output
 val1b	8
 val1c	8
 val1d	NULL
 val1d	NULL
 
 
--- !query 8
+-- !query
 SELECT t1a, t1d, t1f
 FROM   t1
 WHERE  t1c = (SELECT max(t2c)
               FROM   t2)
 AND    t1b > (SELECT min(t3b)
               FROM   t3)
--- !query 8 schema
+-- !query schema
 struct<t1a:string,t1d:bigint,t1f:double>
--- !query 8 output
+-- !query output
 val1b	19	25.0
 val1c	19	25.0
 
 
--- !query 9
+-- !query
 SELECT t1a, t1h
 FROM   t1
 WHERE  t1c = (SELECT max(t2c)
@@ -146,69 +146,69 @@ WHERE  t1c = (SELECT max(t2c)
 OR     t1b = (SELECT min(t3b)
               FROM   t3
               WHERE  t3b > 10)
--- !query 9 schema
+-- !query schema
 struct<t1a:string,t1h:timestamp>
--- !query 9 output
+-- !query output
 val1b	2014-05-04 01:01:00
 val1c	2014-05-04 01:02:00.001
 val1d	2014-06-04 01:01:00
 val1d	2014-07-04 01:02:00.001
 
 
--- !query 10
+-- !query
 SELECT t1a, t1b, t2d
 FROM   t1 LEFT JOIN t2
        ON t1a = t2a
 WHERE  t1b = (SELECT min(t3b)
               FROM   t3)
--- !query 10 schema
+-- !query schema
 struct<t1a:string,t1b:smallint,t2d:bigint>
--- !query 10 output
+-- !query output
 val1a	6	NULL
 val1a	6	NULL
 
 
--- !query 11
+-- !query
 SELECT t1a, t1b, t1g
 FROM   t1
 WHERE  t1c + 5 = (SELECT max(t2e)
                   FROM   t2)
--- !query 11 schema
-struct<t1a:string,t1b:smallint,t1g:decimal(2,-2)>
--- !query 11 output
+-- !query schema
+struct<t1a:string,t1b:smallint,t1g:decimal(4,0)>
+-- !query output
 val1a	16	2000
 val1a	16	2000
 
 
--- !query 12
+-- !query
 SELECT t1a, t1h
 FROM   t1
 WHERE  date(t1h) = (SELECT min(t2i)
                     FROM   t2)
--- !query 12 schema
+-- !query schema
 struct<t1a:string,t1h:timestamp>
--- !query 12 output
+-- !query output
 val1a	2014-04-04 00:00:00
 val1a	2014-04-04 01:02:00.001
 
 
--- !query 13
+-- !query
 SELECT t2d, t1a
 FROM   t1, t2
 WHERE  t1b = t2b
 AND    t2c + 1 = (SELECT max(t2c) + 1
                   FROM   t2, t1
                   WHERE  t2b = t1b)
--- !query 13 schema
+-- !query schema
 struct<t2d:bigint,t1a:string>
--- !query 13 output
+-- !query output
 119	val1b
 119	val1c
 19	val1b
 19	val1c
 
 
--- !query 14
+-- !query
 SELECT DISTINCT t2a, max_t1g
 FROM   t2, (SELECT   max(t1g) max_t1g, t1a
             FROM     t1
@@ -216,15 +216,15 @@ FROM   t2, (SELECT   max(t1g) max_t1g, t1a
 WHERE  t2a = t1a
 AND    max_t1g = (SELECT max(t1g)
                   FROM   t1)
--- !query 14 schema
-struct<t2a:string,max_t1g:decimal(2,-2)>
--- !query 14 output
+-- !query schema
+struct<t2a:string,max_t1g:decimal(4,0)>
+-- !query output
 val1b	2600
 val1c	2600
 val1e	2600
 
 
--- !query 15
+-- !query
 SELECT t3b, t3c
 FROM   t3
 WHERE  (SELECT max(t3c)
@@ -234,40 +234,40 @@ WHERE  (SELECT max(t3c)
         FROM   t3
         WHERE  t3c > 0)
 AND    (t3b is null or t3c is null)
--- !query 15 schema
+-- !query schema
 struct<t3b:smallint,t3c:int>
--- !query 15 output
+-- !query output
 8	NULL
 8	NULL
 NULL	16
 NULL	16
 
 
--- !query 16
+-- !query
 SELECT t1a
 FROM   t1
 WHERE  t1a < (SELECT   max(t2a)
               FROM     t2
               WHERE    t2c = t1c
               GROUP BY t2c)
--- !query 16 schema
+-- !query schema
 struct<t1a:string>
--- !query 16 output
+-- !query output
 val1a
 val1a
 val1b
 
 
--- !query 17
+-- !query
 SELECT t1a, t1c
 FROM   t1
 WHERE  (SELECT   max(t2a)
         FROM     t2
         WHERE    t2c = t1c
         GROUP BY t2c) IS NULL
--- !query 17 schema
+-- !query schema
 struct<t1a:string,t1c:int>
--- !query 17 output
+-- !query output
 val1a	8
 val1a	8
 val1d	NULL
@@ -276,7 +276,7 @@ val1e	NULL
 val1e	NULL
 
 
--- !query 18
+-- !query
 SELECT t1a
 FROM   t1
 WHERE  t1a = (SELECT   max(t2a)
@@ -285,14 +285,14 @@ WHERE  t1a = (SELECT   max(t2a)
               GROUP BY t2c
               HAVING   count(*) >= 0)
 OR     t1i > '2014-12-31'
--- !query 18 schema
+-- !query schema
 struct<t1a:string>
--- !query 18 output
+-- !query output
 val1c
 val1d
 
 
--- !query 19
+-- !query
 SELECT t1a
 FROM   t1
 WHERE  t1a = (SELECT   max(t2a)
@@ -301,14 +301,14 @@ WHERE  t1a = (SELECT   max(t2a)
               GROUP BY t2c
               HAVING   count(*) >= 1)
 OR     t1i > '2014-12-31'
--- !query 19 schema
+-- !query schema
 struct<t1a:string>
--- !query 19 output
+-- !query output
 val1c
 val1d
 
 
--- !query 20
+-- !query
 SELECT count(t1a)
 FROM   t1 RIGHT JOIN t2
 ON     t1d = t2d
@@ -316,13 +316,13 @@ WHERE  t1a < (SELECT   max(t2a)
               FROM     t2
               WHERE    t2c = t1c
               GROUP BY t2c)
--- !query 20 schema
+-- !query schema
 struct<count(t1a):bigint>
--- !query 20 output
+-- !query output
 7
 
 
--- !query 21
+-- !query
 SELECT t1a
 FROM   t1
 WHERE  t1b <= (SELECT   max(t2b)
@@ -333,14 +333,14 @@ AND    t1b >= (SELECT   min(t2b)
                FROM     t2
                WHERE    t2c = t1c
                GROUP BY t2c)
--- !query 21 schema
+-- !query schema
 struct<t1a:string>
--- !query 21 output
+-- !query output
 val1b
 val1c
 
 
--- !query 22
+-- !query
 SELECT t1a
 FROM   t1
 WHERE  t1a <= (SELECT   max(t2a)
@@ -354,14 +354,14 @@ WHERE  t1a >= (SELECT   min(t2a)
                FROM     t2
                WHERE    t2c = t1c
                GROUP BY t2c)
--- !query 22 schema
+-- !query schema
 struct<t1a:string>
--- !query 22 output
+-- !query output
 val1b
 val1c
 
 
--- !query 23
+-- !query
 SELECT t1a
 FROM   t1
 WHERE  t1a <= (SELECT   max(t2a)
@@ -375,9 +375,9 @@ WHERE  t1a >= (SELECT   min(t2a)
                FROM     t2
                WHERE    t2c = t1c
                GROUP BY t2c)
--- !query 23 schema
+-- !query schema
 struct<t1a:string>
--- !query 23 output
+-- !query output
 val1a
 val1a
 val1b
@@ -388,7 +388,7 @@ val1d
 val1d
 
 
--- !query 24
+-- !query
 SELECT t1a
 FROM   t1
 WHERE  t1a <= (SELECT   max(t2a)
@@ -402,16 +402,16 @@ WHERE  t1a >= (SELECT   min(t2a)
                FROM     t2
                WHERE    t2c = t1c
                GROUP BY t2c)
--- !query 24 schema
+-- !query schema
 struct<t1a:string>
--- !query 24 output
+-- !query output
 val1a
 val1b
 val1c
 val1d
 
 
--- !query 25
+-- !query
 SELECT t1a
 FROM   t1
 WHERE  t1a <= (SELECT   max(t2a)
@@ -425,13 +425,13 @@ WHERE  t1a >= (SELECT   min(t2a)
                FROM     t2
                WHERE    t2c = t1c
                GROUP BY t2c)
--- !query 25 schema
+-- !query schema
 struct<t1a:string>
--- !query 25 output
+-- !query output
 val1a
 
 
--- !query 26
+-- !query
 SELECT   t1a
 FROM     t1
 GROUP BY t1a, t1c
@@ -439,8 +439,8 @@ HAVING   max(t1b) <= (SELECT   max(t2b)
                       FROM     t2
                       WHERE    t2c = t1c
                       GROUP BY t2c)
--- !query 26 schema
+-- !query schema
 struct<t1a:string>
--- !query 26 output
+-- !query output
 val1b
 val1c
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 807bb47221885..184b8daf9d28e 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 11
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
   ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
   ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -17,13 +17,13 @@ create temporary view t1 as select * from values
   ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
   as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
   ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'),
   ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'),
@@ -39,13 +39,13 @@ create temporary view t2 as select * from values
   ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
   as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view t3 as select * from values
   ('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'),
   ('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
@@ -60,24 +60,24 @@ create temporary view t3 as select * from values
   ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
   as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT (SELECT min(t3d) FROM t3) min_t3d,
        (SELECT max(t2h) FROM t2) max_t2h
 FROM   t1
 WHERE  t1a = 'val1c'
--- !query 3 schema
+-- !query schema
 struct<min_t3d:bigint,max_t2h:timestamp>
--- !query 3 output
+-- !query output
 10	2017-05-04 01:01:00
 
 
--- !query 4
+-- !query
 SELECT   t1a, count(*)
 FROM     t1
 WHERE    t1c IN (SELECT   (SELECT min(t3c) FROM t3)
@@ -85,13 +85,13 @@ WHERE    t1c IN (SELECT   (SELECT min(t3c) FROM t3)
                  GROUP BY t2g
                  HAVING   count(*) > 1)
 GROUP BY t1a
--- !query 4 schema
+-- !query schema
 struct<t1a:string,count(1):bigint>
--- !query 4 output
+-- !query output
 val1a	2
 
 
--- !query 5
+-- !query
 SELECT (SELECT min(t3d) FROM t3) min_t3d,
        null
 FROM   t1
@@ -101,14 +101,14 @@ SELECT null,
        (SELECT max(t2h) FROM t2) max_t2h
 FROM   t1
 WHERE  t1a = 'val1c'
--- !query 5 schema
+-- !query schema
 struct<min_t3d:bigint,NULL:timestamp>
--- !query 5 output
+-- !query output
 10	NULL
 NULL	2017-05-04 01:01:00
 
 
--- !query 6
+-- !query
 SELECT (SELECT min(t3c) FROM t3) min_t3d
 FROM   t1
 WHERE  t1a = 'val1a'
@@ -116,13 +116,13 @@ INTERSECT
 SELECT (SELECT min(t2c) FROM t2) min_t2d
 FROM   t1
 WHERE  t1a = 'val1d'
--- !query 6 schema
+-- !query schema
 struct<min_t3d:int>
--- !query 6 output
+-- !query output
 12
 
 
--- !query 7
+-- !query
 SELECT q1.t1a, q2.t2a, q1.min_t3d, q2.avg_t3d
 FROM   (SELECT t1a, (SELECT min(t3d) FROM t3) min_t3d
         FROM   t1
@@ -133,9 +133,9 @@ FROM   (SELECT t1a, (SELECT min(t3d) FROM t3) min_t3d
         WHERE  t2a IN ('val1c', 'val2a')) q2
 ON     q1.t1a = q2.t2a
 AND    q1.min_t3d < q2.avg_t3d
--- !query 7 schema
+-- !query schema
 struct<t1a:string,t2a:string,min_t3d:bigint,avg_t3d:double>
--- !query 7 output
+-- !query output
 NULL	val2a	NULL	200.83333333333334
 val1c	val1c	10	200.83333333333334
 val1c	val1c	10	200.83333333333334
@@ -144,18 +144,18 @@ val1e	NULL	10	NULL
 val1e	NULL	10	NULL
 
 
--- !query 8
+-- !query
 SELECT (SELECT min(t3d) FROM t3 WHERE t3.t3a = t1.t1a) min_t3d,
        (SELECT max(t2h) FROM t2 WHERE t2.t2a = t1.t1a) max_t2h
 FROM   t1
 WHERE  t1a = 'val1b'
--- !query 8 schema
+-- !query schema
 struct<min_t3d:bigint,max_t2h:timestamp>
--- !query 8 output
+-- !query output
 19	2017-05-04 01:01:00
 
 
--- !query 9
+-- !query
 SELECT (SELECT min(t3d) FROM t3 WHERE t3a = t1a) min_t3d
 FROM   t1
 WHERE  t1a = 'val1b'
@@ -163,13 +163,13 @@ MINUS
 SELECT (SELECT min(t3d) FROM t3) abs_min_t3d
 FROM   t1
 WHERE  t1a = 'val1b'
--- !query 9 schema
+-- !query schema
 struct<min_t3d:bigint>
--- !query 9 output
+-- !query output
 19
 
 
--- !query 10
+-- !query
 SELECT t1a, t1b
 FROM   t1
 WHERE  NOT EXISTS (SELECT (SELECT max(t2b)
@@ -182,9 +182,9 @@ WHERE  NOT EXISTS (SELECT (SELECT max(t2b)
                                  ON     t2a = t1a
                                  WHERE  t2c = t3c)
                    AND    t3a = t1a)
--- !query 10 schema
+-- !query schema
 struct<t1a:string,t1b:smallint>
--- !query 10 output
+-- !query output
 val1a	16
 val1a	16
 val1a	6
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
index 50370df349168..11a51dca25341 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
@@ -2,49 +2,49 @@
 -- Number of queries: 6
 
 
--- !query 0
+-- !query
 SELECT * FROM (SELECT * FROM testData) AS t WHERE key = 1
--- !query 0 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 0 output
+-- !query output
 1	1
 
 
--- !query 1
+-- !query
 FROM (SELECT * FROM testData WHERE key = 1) AS t SELECT *
--- !query 1 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 1 output
+-- !query output
 1	1
 
 
--- !query 2
+-- !query
 SELECT * FROM (SELECT * FROM testData) t WHERE key = 1
--- !query 2 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 2 output
+-- !query output
 1	1
 
 
--- !query 3
+-- !query
 FROM (SELECT * FROM testData WHERE key = 1) t SELECT *
--- !query 3 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 3 output
+-- !query output
 1	1
 
 
--- !query 4
+-- !query
 SELECT * FROM (SELECT * FROM testData) WHERE key = 1
--- !query 4 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 4 output
+-- !query output
 1	1
 
 
--- !query 5
+-- !query
 FROM (SELECT * FROM testData WHERE key = 1) SELECT *
--- !query 5 schema
+-- !query schema
 struct<key:int,value:string>
--- !query 5 output
+-- !query output
 1	1
diff --git a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
index 1a2bd5ea91cde..25967a3968f23 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
@@ -2,96 +2,96 @@
 -- Number of queries: 11
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT * FROM testData AS t(col1, col2) WHERE col1 = 1
--- !query 1 schema
+-- !query schema
 struct<col1:int,col2:int>
--- !query 1 output
+-- !query output
 1	1
 1	2
 
 
--- !query 2
+-- !query
 SELECT * FROM testData AS t(col1, col2) WHERE col1 = 2
--- !query 2 schema
+-- !query schema
 struct<col1:int,col2:int>
--- !query 2 output
+-- !query output
 2	1
 
 
--- !query 3
+-- !query
 SELECT col1 AS k, SUM(col2) FROM testData AS t(col1, col2) GROUP BY k
--- !query 3 schema
+-- !query schema
 struct<k:int,sum(col2):bigint>
--- !query 3 output
+-- !query output
 1	3
 2	1
 
 
--- !query 4
+-- !query
 SELECT * FROM testData AS t(col1, col2, col3)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Number of column aliases does not match number of columns. Number of column aliases: 3; number of columns: 2.; line 1 pos 14
 
 
--- !query 5
+-- !query
 SELECT * FROM testData AS t(col1)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Number of column aliases does not match number of columns. Number of column aliases: 1; number of columns: 2.; line 1 pos 14
 
 
--- !query 6
+-- !query
 SELECT a AS col1, b AS col2 FROM testData AS t(c, d)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`a`' given input columns: [c, d]; line 1 pos 7
 
 
--- !query 7
+-- !query
 SELECT * FROM (SELECT 1 AS a, 1 AS b) t(col1, col2)
--- !query 7 schema
+-- !query schema
 struct<col1:int,col2:int>
--- !query 7 output
+-- !query output
 1	1
 
 
--- !query 8
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW src1 AS SELECT * FROM VALUES (1, "a"), (2, "b"), (3, "c") AS src1(id, v1)
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW src2 AS SELECT * FROM VALUES (2, 1.0), (3, 3.2), (1, 8.5) AS src2(id, v2)
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 SELECT * FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d)
--- !query 10 schema
+-- !query schema
 struct<a:int,b:string,c:int,d:decimal(2,1)>
--- !query 10 output
+-- !query output
 1	a	1	8.5
-2	b	2	1
+2	b	2	1.0
 3	c	3	3.2
diff --git a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
index fdbea0ee90720..16d483df62fd5 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
@@ -2,20 +2,20 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 select * from dummy(3)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 could not resolve `dummy` to a table-valued function; line 1 pos 14
 
 
--- !query 1
+-- !query
 select * from range(6 + cos(3))
--- !query 1 schema
+-- !query schema
 struct<id:bigint>
--- !query 1 output
+-- !query output
 0
 1
 2
@@ -23,11 +23,11 @@ struct<id:bigint>
 4
 
 
--- !query 2
+-- !query
 select * from range(5, 10)
--- !query 2 schema
+-- !query schema
 struct<id:bigint>
--- !query 2 output
+-- !query output
 5
 6
 7
@@ -35,11 +35,11 @@ struct<id:bigint>
 9
 
 
--- !query 3
+-- !query
 select * from range(0, 10, 2)
--- !query 3 schema
+-- !query schema
 struct<id:bigint>
--- !query 3 output
+-- !query output
 0
 2
 4
@@ -47,11 +47,11 @@ struct<id:bigint>
 8
 
 
--- !query 4
+-- !query
 select * from range(0, 10, 1, 200)
--- !query 4 schema
+-- !query schema
 struct<id:bigint>
--- !query 4 output
+-- !query output
 0
 1
 2
@@ -64,11 +64,11 @@ struct<id:bigint>
 9
 
 
--- !query 5
+-- !query
 select * from range(1, 1, 1, 1, 1)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 error: table-valued function range with alternatives:
  (end: long)
@@ -78,11 +78,11 @@ error: table-valued function range with alternatives:
 cannot be applied to: (integer, integer, integer, integer, integer); line 1 pos 14
 
 
--- !query 6
+-- !query
 select * from range(1, null)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 error: table-valued function range with alternatives:
  (end: long)
@@ -92,10 +92,10 @@ error: table-valued function range with alternatives:
 cannot be applied to: (integer, null); line 1 pos 14
 
 
--- !query 7
+-- !query
 select * from RaNgE(2)
--- !query 7 schema
+-- !query schema
 struct<id:bigint>
--- !query 7 output
+-- !query output
 0
 1
diff --git a/sql/core/src/test/resources/sql-tests/results/tablesample-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/tablesample-negative.sql.out
index 35f3931736b83..0188cdd0f8e71 100644
--- a/sql/core/src/test/resources/sql-tests/results/tablesample-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/tablesample-negative.sql.out
@@ -2,35 +2,35 @@
 -- Number of queries: 6
 
 
--- !query 0
+-- !query
 CREATE DATABASE mydb1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 USE mydb1
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TABLE t1 USING parquet AS SELECT 1 AS i1
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT mydb1.t1 FROM t1 TABLESAMPLE (-1 PERCENT)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Sampling fraction (-0.01) must be on interval [0, 1](line 1, pos 24)
@@ -40,11 +40,11 @@ SELECT mydb1.t1 FROM t1 TABLESAMPLE (-1 PERCENT)
 ------------------------^^^
 
 
--- !query 4
+-- !query
 SELECT mydb1.t1 FROM t1 TABLESAMPLE (101 PERCENT)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Sampling fraction (1.01) must be on interval [0, 1](line 1, pos 24)
@@ -54,9 +54,9 @@ SELECT mydb1.t1 FROM t1 TABLESAMPLE (101 PERCENT)
 ------------------------^^^
 
 
--- !query 5
+-- !query
 DROP DATABASE mydb1 CASCADE
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out
index c3d5fad0870bc..b49e6b5f21b16 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out
@@ -2,89 +2,89 @@
 -- Number of queries: 11
 
 
--- !query 0
+-- !query
 SELECT array_join(array(true, false), ', ')
--- !query 0 schema
+-- !query schema
 struct<array_join(array(true, false), , ):string>
--- !query 0 output
+-- !query output
 true, false
 
 
--- !query 1
+-- !query
 SELECT array_join(array(2Y, 1Y), ', ')
--- !query 1 schema
+-- !query schema
 struct<array_join(array(2, 1), , ):string>
--- !query 1 output
+-- !query output
 2, 1
 
 
--- !query 2
+-- !query
 SELECT array_join(array(2S, 1S), ', ')
--- !query 2 schema
+-- !query schema
 struct<array_join(array(2, 1), , ):string>
--- !query 2 output
+-- !query output
 2, 1
 
 
--- !query 3
+-- !query
 SELECT array_join(array(2, 1), ', ')
--- !query 3 schema
+-- !query schema
 struct<array_join(array(2, 1), , ):string>
--- !query 3 output
+-- !query output
 2, 1
 
 
--- !query 4
+-- !query
 SELECT array_join(array(2L, 1L), ', ')
--- !query 4 schema
+-- !query schema
 struct<array_join(array(2, 1), , ):string>
--- !query 4 output
+-- !query output
 2, 1
 
 
--- !query 5
+-- !query
 SELECT array_join(array(9223372036854775809, 9223372036854775808), ', ')
--- !query 5 schema
+-- !query schema
 struct<array_join(array(9223372036854775809, 9223372036854775808), , ):string>
--- !query 5 output
+-- !query output
 9223372036854775809, 9223372036854775808
 
 
--- !query 6
+-- !query
 SELECT array_join(array(2.0D, 1.0D), ', ')
--- !query 6 schema
+-- !query schema
 struct<array_join(array(2.0, 1.0), , ):string>
--- !query 6 output
+-- !query output
 2.0, 1.0
 
 
--- !query 7
+-- !query
 SELECT array_join(array(float(2.0), float(1.0)), ', ')
--- !query 7 schema
+-- !query schema
 struct<array_join(array(CAST(2.0 AS FLOAT), CAST(1.0 AS FLOAT)), , ):string>
--- !query 7 output
+-- !query output
 2.0, 1.0
 
 
--- !query 8
+-- !query
 SELECT array_join(array(date '2016-03-14', date '2016-03-13'), ', ')
--- !query 8 schema
+-- !query schema
 struct<array_join(array(DATE '2016-03-14', DATE '2016-03-13'), , ):string>
--- !query 8 output
+-- !query output
 2016-03-14, 2016-03-13
 
 
--- !query 9
+-- !query
 SELECT array_join(array(timestamp '2016-11-15 20:54:00.000', timestamp '2016-11-12 20:54:00.000'), ', ')
--- !query 9 schema
-struct<array_join(array(TIMESTAMP('2016-11-15 20:54:00'), TIMESTAMP('2016-11-12 20:54:00')), , ):string>
--- !query 9 output
+-- !query schema
+struct<array_join(array(TIMESTAMP '2016-11-15 20:54:00', TIMESTAMP '2016-11-12 20:54:00'), , ):string>
+-- !query output
 2016-11-15 20:54:00, 2016-11-12 20:54:00
 
 
--- !query 10
+-- !query
 SELECT array_join(array('a', 'b'), ', ')
--- !query 10 schema
+-- !query schema
 struct<array_join(array(a, b), , ):string>
--- !query 10 output
+-- !query output
 a, b
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
index 55caab8528fa9..0e1a3d0bc4d9e 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
@@ -2,2121 +2,2121 @@
 -- Number of queries: 265
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT cast(1 as binary) = '1' FROM t
--- !query 1 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) = CAST(1 AS BINARY)):boolean>
--- !query 1 output
+-- !query output
 false
 
 
--- !query 2
+-- !query
 SELECT cast(1 as binary) > '2' FROM t
--- !query 2 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) > CAST(2 AS BINARY)):boolean>
--- !query 2 output
+-- !query output
 false
 
 
--- !query 3
+-- !query
 SELECT cast(1 as binary) >= '2' FROM t
--- !query 3 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) >= CAST(2 AS BINARY)):boolean>
--- !query 3 output
+-- !query output
 false
 
 
--- !query 4
+-- !query
 SELECT cast(1 as binary) < '2' FROM t
--- !query 4 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) < CAST(2 AS BINARY)):boolean>
--- !query 4 output
+-- !query output
 true
 
 
--- !query 5
+-- !query
 SELECT cast(1 as binary) <= '2' FROM t
--- !query 5 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) <= CAST(2 AS BINARY)):boolean>
--- !query 5 output
+-- !query output
 true
 
 
--- !query 6
+-- !query
 SELECT cast(1 as binary) <> '2' FROM t
--- !query 6 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BINARY) = CAST(2 AS BINARY))):boolean>
--- !query 6 output
+-- !query output
 true
 
 
--- !query 7
+-- !query
 SELECT cast(1 as binary) = cast(null as string) FROM t
--- !query 7 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) = CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 7 output
+-- !query output
 NULL
 
 
--- !query 8
+-- !query
 SELECT cast(1 as binary) > cast(null as string) FROM t
--- !query 8 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) > CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 8 output
+-- !query output
 NULL
 
 
--- !query 9
+-- !query
 SELECT cast(1 as binary) >= cast(null as string) FROM t
--- !query 9 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) >= CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 9 output
+-- !query output
 NULL
 
 
--- !query 10
+-- !query
 SELECT cast(1 as binary) < cast(null as string) FROM t
--- !query 10 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) < CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 10 output
+-- !query output
 NULL
 
 
--- !query 11
+-- !query
 SELECT cast(1 as binary) <= cast(null as string) FROM t
--- !query 11 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) <= CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 11 output
+-- !query output
 NULL
 
 
--- !query 12
+-- !query
 SELECT cast(1 as binary) <> cast(null as string) FROM t
--- !query 12 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BINARY) = CAST(CAST(NULL AS STRING) AS BINARY))):boolean>
--- !query 12 output
+-- !query output
 NULL
 
 
--- !query 13
+-- !query
 SELECT '1' = cast(1 as binary) FROM t
--- !query 13 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) = CAST(1 AS BINARY)):boolean>
--- !query 13 output
+-- !query output
 false
 
 
--- !query 14
+-- !query
 SELECT '2' > cast(1 as binary) FROM t
--- !query 14 schema
+-- !query schema
 struct<(CAST(2 AS BINARY) > CAST(1 AS BINARY)):boolean>
--- !query 14 output
+-- !query output
 true
 
 
--- !query 15
+-- !query
 SELECT '2' >= cast(1 as binary) FROM t
--- !query 15 schema
+-- !query schema
 struct<(CAST(2 AS BINARY) >= CAST(1 AS BINARY)):boolean>
--- !query 15 output
+-- !query output
 true
 
 
--- !query 16
+-- !query
 SELECT '2' < cast(1 as binary) FROM t
--- !query 16 schema
+-- !query schema
 struct<(CAST(2 AS BINARY) < CAST(1 AS BINARY)):boolean>
--- !query 16 output
+-- !query output
 false
 
 
--- !query 17
+-- !query
 SELECT '2' <= cast(1 as binary) FROM t
--- !query 17 schema
+-- !query schema
 struct<(CAST(2 AS BINARY) <= CAST(1 AS BINARY)):boolean>
--- !query 17 output
+-- !query output
 false
 
 
--- !query 18
+-- !query
 SELECT '2' <> cast(1 as binary) FROM t
--- !query 18 schema
+-- !query schema
 struct<(NOT (CAST(2 AS BINARY) = CAST(1 AS BINARY))):boolean>
--- !query 18 output
+-- !query output
 true
 
 
--- !query 19
+-- !query
 SELECT cast(null as string) = cast(1 as binary) FROM t
--- !query 19 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) = CAST(1 AS BINARY)):boolean>
--- !query 19 output
+-- !query output
 NULL
 
 
--- !query 20
+-- !query
 SELECT cast(null as string) > cast(1 as binary) FROM t
--- !query 20 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) > CAST(1 AS BINARY)):boolean>
--- !query 20 output
+-- !query output
 NULL
 
 
--- !query 21
+-- !query
 SELECT cast(null as string) >= cast(1 as binary) FROM t
--- !query 21 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) >= CAST(1 AS BINARY)):boolean>
--- !query 21 output
+-- !query output
 NULL
 
 
--- !query 22
+-- !query
 SELECT cast(null as string) < cast(1 as binary) FROM t
--- !query 22 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) < CAST(1 AS BINARY)):boolean>
--- !query 22 output
+-- !query output
 NULL
 
 
--- !query 23
+-- !query
 SELECT cast(null as string) <= cast(1 as binary) FROM t
--- !query 23 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) <= CAST(1 AS BINARY)):boolean>
--- !query 23 output
+-- !query output
 NULL
 
 
--- !query 24
+-- !query
 SELECT cast(null as string) <> cast(1 as binary) FROM t
--- !query 24 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS BINARY) = CAST(1 AS BINARY))):boolean>
--- !query 24 output
+-- !query output
 NULL
 
 
--- !query 25
+-- !query
 SELECT cast(1 as tinyint) = '1' FROM t
--- !query 25 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) = CAST(1 AS TINYINT)):boolean>
--- !query 25 output
+-- !query output
 true
 
 
--- !query 26
+-- !query
 SELECT cast(1 as tinyint) > '2' FROM t
--- !query 26 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) > CAST(2 AS TINYINT)):boolean>
--- !query 26 output
+-- !query output
 false
 
 
--- !query 27
+-- !query
 SELECT cast(1 as tinyint) >= '2' FROM t
--- !query 27 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) >= CAST(2 AS TINYINT)):boolean>
--- !query 27 output
+-- !query output
 false
 
 
--- !query 28
+-- !query
 SELECT cast(1 as tinyint) < '2' FROM t
--- !query 28 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) < CAST(2 AS TINYINT)):boolean>
--- !query 28 output
+-- !query output
 true
 
 
--- !query 29
+-- !query
 SELECT cast(1 as tinyint) <= '2' FROM t
--- !query 29 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) <= CAST(2 AS TINYINT)):boolean>
--- !query 29 output
+-- !query output
 true
 
 
--- !query 30
+-- !query
 SELECT cast(1 as tinyint) <> '2' FROM t
--- !query 30 schema
+-- !query schema
 struct<(NOT (CAST(1 AS TINYINT) = CAST(2 AS TINYINT))):boolean>
--- !query 30 output
+-- !query output
 true
 
 
--- !query 31
+-- !query
 SELECT cast(1 as tinyint) = cast(null as string) FROM t
--- !query 31 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) = CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
--- !query 31 output
+-- !query output
 NULL
 
 
--- !query 32
+-- !query
 SELECT cast(1 as tinyint) > cast(null as string) FROM t
--- !query 32 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) > CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
--- !query 32 output
+-- !query output
 NULL
 
 
--- !query 33
+-- !query
 SELECT cast(1 as tinyint) >= cast(null as string) FROM t
--- !query 33 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) >= CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
--- !query 33 output
+-- !query output
 NULL
 
 
--- !query 34
+-- !query
 SELECT cast(1 as tinyint) < cast(null as string) FROM t
--- !query 34 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) < CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
--- !query 34 output
+-- !query output
 NULL
 
 
--- !query 35
+-- !query
 SELECT cast(1 as tinyint) <= cast(null as string) FROM t
--- !query 35 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) <= CAST(CAST(NULL AS STRING) AS TINYINT)):boolean>
--- !query 35 output
+-- !query output
 NULL
 
 
--- !query 36
+-- !query
 SELECT cast(1 as tinyint) <> cast(null as string) FROM t
--- !query 36 schema
+-- !query schema
 struct<(NOT (CAST(1 AS TINYINT) = CAST(CAST(NULL AS STRING) AS TINYINT))):boolean>
--- !query 36 output
+-- !query output
 NULL
 
 
--- !query 37
+-- !query
 SELECT '1' = cast(1 as tinyint) FROM t
--- !query 37 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) = CAST(1 AS TINYINT)):boolean>
--- !query 37 output
+-- !query output
 true
 
 
--- !query 38
+-- !query
 SELECT '2' > cast(1 as tinyint) FROM t
--- !query 38 schema
+-- !query schema
 struct<(CAST(2 AS TINYINT) > CAST(1 AS TINYINT)):boolean>
--- !query 38 output
+-- !query output
 true
 
 
--- !query 39
+-- !query
 SELECT '2' >= cast(1 as tinyint) FROM t
--- !query 39 schema
+-- !query schema
 struct<(CAST(2 AS TINYINT) >= CAST(1 AS TINYINT)):boolean>
--- !query 39 output
+-- !query output
 true
 
 
--- !query 40
+-- !query
 SELECT '2' < cast(1 as tinyint) FROM t
--- !query 40 schema
+-- !query schema
 struct<(CAST(2 AS TINYINT) < CAST(1 AS TINYINT)):boolean>
--- !query 40 output
+-- !query output
 false
 
 
--- !query 41
+-- !query
 SELECT '2' <= cast(1 as tinyint) FROM t
--- !query 41 schema
+-- !query schema
 struct<(CAST(2 AS TINYINT) <= CAST(1 AS TINYINT)):boolean>
--- !query 41 output
+-- !query output
 false
 
 
--- !query 42
+-- !query
 SELECT '2' <> cast(1 as tinyint) FROM t
--- !query 42 schema
+-- !query schema
 struct<(NOT (CAST(2 AS TINYINT) = CAST(1 AS TINYINT))):boolean>
--- !query 42 output
+-- !query output
 true
 
 
--- !query 43
+-- !query
 SELECT cast(null as string) = cast(1 as tinyint) FROM t
--- !query 43 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TINYINT) = CAST(1 AS TINYINT)):boolean>
--- !query 43 output
+-- !query output
 NULL
 
 
--- !query 44
+-- !query
 SELECT cast(null as string) > cast(1 as tinyint) FROM t
--- !query 44 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TINYINT) > CAST(1 AS TINYINT)):boolean>
--- !query 44 output
+-- !query output
 NULL
 
 
--- !query 45
+-- !query
 SELECT cast(null as string) >= cast(1 as tinyint) FROM t
--- !query 45 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TINYINT) >= CAST(1 AS TINYINT)):boolean>
--- !query 45 output
+-- !query output
 NULL
 
 
--- !query 46
+-- !query
 SELECT cast(null as string) < cast(1 as tinyint) FROM t
--- !query 46 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TINYINT) < CAST(1 AS TINYINT)):boolean>
--- !query 46 output
+-- !query output
 NULL
 
 
--- !query 47
+-- !query
 SELECT cast(null as string) <= cast(1 as tinyint) FROM t
--- !query 47 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TINYINT) <= CAST(1 AS TINYINT)):boolean>
--- !query 47 output
+-- !query output
 NULL
 
 
--- !query 48
+-- !query
 SELECT cast(null as string) <> cast(1 as tinyint) FROM t
--- !query 48 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS TINYINT) = CAST(1 AS TINYINT))):boolean>
--- !query 48 output
+-- !query output
 NULL
 
 
--- !query 49
+-- !query
 SELECT cast(1 as smallint) = '1' FROM t
--- !query 49 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
--- !query 49 output
+-- !query output
 true
 
 
--- !query 50
+-- !query
 SELECT cast(1 as smallint) > '2' FROM t
--- !query 50 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) > CAST(2 AS SMALLINT)):boolean>
--- !query 50 output
+-- !query output
 false
 
 
--- !query 51
+-- !query
 SELECT cast(1 as smallint) >= '2' FROM t
--- !query 51 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) >= CAST(2 AS SMALLINT)):boolean>
--- !query 51 output
+-- !query output
 false
 
 
--- !query 52
+-- !query
 SELECT cast(1 as smallint) < '2' FROM t
--- !query 52 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) < CAST(2 AS SMALLINT)):boolean>
--- !query 52 output
+-- !query output
 true
 
 
--- !query 53
+-- !query
 SELECT cast(1 as smallint) <= '2' FROM t
--- !query 53 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) <= CAST(2 AS SMALLINT)):boolean>
--- !query 53 output
+-- !query output
 true
 
 
--- !query 54
+-- !query
 SELECT cast(1 as smallint) <> '2' FROM t
--- !query 54 schema
+-- !query schema
 struct<(NOT (CAST(1 AS SMALLINT) = CAST(2 AS SMALLINT))):boolean>
--- !query 54 output
+-- !query output
 true
 
 
--- !query 55
+-- !query
 SELECT cast(1 as smallint) = cast(null as string) FROM t
--- !query 55 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) = CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
--- !query 55 output
+-- !query output
 NULL
 
 
--- !query 56
+-- !query
 SELECT cast(1 as smallint) > cast(null as string) FROM t
--- !query 56 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) > CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
--- !query 56 output
+-- !query output
 NULL
 
 
--- !query 57
+-- !query
 SELECT cast(1 as smallint) >= cast(null as string) FROM t
--- !query 57 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) >= CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
--- !query 57 output
+-- !query output
 NULL
 
 
--- !query 58
+-- !query
 SELECT cast(1 as smallint) < cast(null as string) FROM t
--- !query 58 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) < CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
--- !query 58 output
+-- !query output
 NULL
 
 
--- !query 59
+-- !query
 SELECT cast(1 as smallint) <= cast(null as string) FROM t
--- !query 59 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) <= CAST(CAST(NULL AS STRING) AS SMALLINT)):boolean>
--- !query 59 output
+-- !query output
 NULL
 
 
--- !query 60
+-- !query
 SELECT cast(1 as smallint) <> cast(null as string) FROM t
--- !query 60 schema
+-- !query schema
 struct<(NOT (CAST(1 AS SMALLINT) = CAST(CAST(NULL AS STRING) AS SMALLINT))):boolean>
--- !query 60 output
+-- !query output
 NULL
 
 
--- !query 61
+-- !query
 SELECT '1' = cast(1 as smallint) FROM t
--- !query 61 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
--- !query 61 output
+-- !query output
 true
 
 
--- !query 62
+-- !query
 SELECT '2' > cast(1 as smallint) FROM t
--- !query 62 schema
+-- !query schema
 struct<(CAST(2 AS SMALLINT) > CAST(1 AS SMALLINT)):boolean>
--- !query 62 output
+-- !query output
 true
 
 
--- !query 63
+-- !query
 SELECT '2' >= cast(1 as smallint) FROM t
--- !query 63 schema
+-- !query schema
 struct<(CAST(2 AS SMALLINT) >= CAST(1 AS SMALLINT)):boolean>
--- !query 63 output
+-- !query output
 true
 
 
--- !query 64
+-- !query
 SELECT '2' < cast(1 as smallint) FROM t
--- !query 64 schema
+-- !query schema
 struct<(CAST(2 AS SMALLINT) < CAST(1 AS SMALLINT)):boolean>
--- !query 64 output
+-- !query output
 false
 
 
--- !query 65
+-- !query
 SELECT '2' <= cast(1 as smallint) FROM t
--- !query 65 schema
+-- !query schema
 struct<(CAST(2 AS SMALLINT) <= CAST(1 AS SMALLINT)):boolean>
--- !query 65 output
+-- !query output
 false
 
 
--- !query 66
+-- !query
 SELECT '2' <> cast(1 as smallint) FROM t
--- !query 66 schema
+-- !query schema
 struct<(NOT (CAST(2 AS SMALLINT) = CAST(1 AS SMALLINT))):boolean>
--- !query 66 output
+-- !query output
 true
 
 
--- !query 67
+-- !query
 SELECT cast(null as string) = cast(1 as smallint) FROM t
--- !query 67 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
--- !query 67 output
+-- !query output
 NULL
 
 
--- !query 68
+-- !query
 SELECT cast(null as string) > cast(1 as smallint) FROM t
--- !query 68 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) > CAST(1 AS SMALLINT)):boolean>
--- !query 68 output
+-- !query output
 NULL
 
 
--- !query 69
+-- !query
 SELECT cast(null as string) >= cast(1 as smallint) FROM t
--- !query 69 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) >= CAST(1 AS SMALLINT)):boolean>
--- !query 69 output
+-- !query output
 NULL
 
 
--- !query 70
+-- !query
 SELECT cast(null as string) < cast(1 as smallint) FROM t
--- !query 70 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) < CAST(1 AS SMALLINT)):boolean>
--- !query 70 output
+-- !query output
 NULL
 
 
--- !query 71
+-- !query
 SELECT cast(null as string) <= cast(1 as smallint) FROM t
--- !query 71 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS SMALLINT) <= CAST(1 AS SMALLINT)):boolean>
--- !query 71 output
+-- !query output
 NULL
 
 
--- !query 72
+-- !query
 SELECT cast(null as string) <> cast(1 as smallint) FROM t
--- !query 72 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS SMALLINT) = CAST(1 AS SMALLINT))):boolean>
--- !query 72 output
+-- !query output
 NULL
 
 
--- !query 73
+-- !query
 SELECT cast(1 as int) = '1' FROM t
--- !query 73 schema
+-- !query schema
 struct<(CAST(1 AS INT) = CAST(1 AS INT)):boolean>
--- !query 73 output
+-- !query output
 true
 
 
--- !query 74
+-- !query
 SELECT cast(1 as int) > '2' FROM t
--- !query 74 schema
+-- !query schema
 struct<(CAST(1 AS INT) > CAST(2 AS INT)):boolean>
--- !query 74 output
+-- !query output
 false
 
 
--- !query 75
+-- !query
 SELECT cast(1 as int) >= '2' FROM t
--- !query 75 schema
+-- !query schema
 struct<(CAST(1 AS INT) >= CAST(2 AS INT)):boolean>
--- !query 75 output
+-- !query output
 false
 
 
--- !query 76
+-- !query
 SELECT cast(1 as int) < '2' FROM t
--- !query 76 schema
+-- !query schema
 struct<(CAST(1 AS INT) < CAST(2 AS INT)):boolean>
--- !query 76 output
+-- !query output
 true
 
 
--- !query 77
+-- !query
 SELECT cast(1 as int) <= '2' FROM t
--- !query 77 schema
+-- !query schema
 struct<(CAST(1 AS INT) <= CAST(2 AS INT)):boolean>
--- !query 77 output
+-- !query output
 true
 
 
--- !query 78
+-- !query
 SELECT cast(1 as int) <> '2' FROM t
--- !query 78 schema
+-- !query schema
 struct<(NOT (CAST(1 AS INT) = CAST(2 AS INT))):boolean>
--- !query 78 output
+-- !query output
 true
 
 
--- !query 79
+-- !query
 SELECT cast(1 as int) = cast(null as string) FROM t
--- !query 79 schema
+-- !query schema
 struct<(CAST(1 AS INT) = CAST(CAST(NULL AS STRING) AS INT)):boolean>
--- !query 79 output
+-- !query output
 NULL
 
 
--- !query 80
+-- !query
 SELECT cast(1 as int) > cast(null as string) FROM t
--- !query 80 schema
+-- !query schema
 struct<(CAST(1 AS INT) > CAST(CAST(NULL AS STRING) AS INT)):boolean>
--- !query 80 output
+-- !query output
 NULL
 
 
--- !query 81
+-- !query
 SELECT cast(1 as int) >= cast(null as string) FROM t
--- !query 81 schema
+-- !query schema
 struct<(CAST(1 AS INT) >= CAST(CAST(NULL AS STRING) AS INT)):boolean>
--- !query 81 output
+-- !query output
 NULL
 
 
--- !query 82
+-- !query
 SELECT cast(1 as int) < cast(null as string) FROM t
--- !query 82 schema
+-- !query schema
 struct<(CAST(1 AS INT) < CAST(CAST(NULL AS STRING) AS INT)):boolean>
--- !query 82 output
+-- !query output
 NULL
 
 
--- !query 83
+-- !query
 SELECT cast(1 as int) <= cast(null as string) FROM t
--- !query 83 schema
+-- !query schema
 struct<(CAST(1 AS INT) <= CAST(CAST(NULL AS STRING) AS INT)):boolean>
--- !query 83 output
+-- !query output
 NULL
 
 
--- !query 84
+-- !query
 SELECT cast(1 as int) <> cast(null as string) FROM t
--- !query 84 schema
+-- !query schema
 struct<(NOT (CAST(1 AS INT) = CAST(CAST(NULL AS STRING) AS INT))):boolean>
--- !query 84 output
+-- !query output
 NULL
 
 
--- !query 85
+-- !query
 SELECT '1' = cast(1 as int) FROM t
--- !query 85 schema
+-- !query schema
 struct<(CAST(1 AS INT) = CAST(1 AS INT)):boolean>
--- !query 85 output
+-- !query output
 true
 
 
--- !query 86
+-- !query
 SELECT '2' > cast(1 as int) FROM t
--- !query 86 schema
+-- !query schema
 struct<(CAST(2 AS INT) > CAST(1 AS INT)):boolean>
--- !query 86 output
+-- !query output
 true
 
 
--- !query 87
+-- !query
 SELECT '2' >= cast(1 as int) FROM t
--- !query 87 schema
+-- !query schema
 struct<(CAST(2 AS INT) >= CAST(1 AS INT)):boolean>
--- !query 87 output
+-- !query output
 true
 
 
--- !query 88
+-- !query
 SELECT '2' < cast(1 as int) FROM t
--- !query 88 schema
+-- !query schema
 struct<(CAST(2 AS INT) < CAST(1 AS INT)):boolean>
--- !query 88 output
+-- !query output
 false
 
 
--- !query 89
+-- !query
 SELECT '2' <> cast(1 as int) FROM t
--- !query 89 schema
+-- !query schema
 struct<(NOT (CAST(2 AS INT) = CAST(1 AS INT))):boolean>
--- !query 89 output
+-- !query output
 true
 
 
--- !query 90
+-- !query
 SELECT '2' <= cast(1 as int) FROM t
--- !query 90 schema
+-- !query schema
 struct<(CAST(2 AS INT) <= CAST(1 AS INT)):boolean>
--- !query 90 output
+-- !query output
 false
 
 
--- !query 91
+-- !query
 SELECT cast(null as string) = cast(1 as int) FROM t
--- !query 91 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS INT) = CAST(1 AS INT)):boolean>
--- !query 91 output
+-- !query output
 NULL
 
 
--- !query 92
+-- !query
 SELECT cast(null as string) > cast(1 as int) FROM t
--- !query 92 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS INT) > CAST(1 AS INT)):boolean>
--- !query 92 output
+-- !query output
 NULL
 
 
--- !query 93
+-- !query
 SELECT cast(null as string) >= cast(1 as int) FROM t
--- !query 93 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS INT) >= CAST(1 AS INT)):boolean>
--- !query 93 output
+-- !query output
 NULL
 
 
--- !query 94
+-- !query
 SELECT cast(null as string) < cast(1 as int) FROM t
--- !query 94 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS INT) < CAST(1 AS INT)):boolean>
--- !query 94 output
+-- !query output
 NULL
 
 
--- !query 95
+-- !query
 SELECT cast(null as string) <> cast(1 as int) FROM t
--- !query 95 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS INT) = CAST(1 AS INT))):boolean>
--- !query 95 output
+-- !query output
 NULL
 
 
--- !query 96
+-- !query
 SELECT cast(null as string) <= cast(1 as int) FROM t
--- !query 96 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS INT) <= CAST(1 AS INT)):boolean>
--- !query 96 output
+-- !query output
 NULL
 
 
--- !query 97
+-- !query
 SELECT cast(1 as bigint) = '1' FROM t
--- !query 97 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) = CAST(1 AS BIGINT)):boolean>
--- !query 97 output
+-- !query output
 true
 
 
--- !query 98
+-- !query
 SELECT cast(1 as bigint) > '2' FROM t
--- !query 98 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) > CAST(2 AS BIGINT)):boolean>
--- !query 98 output
+-- !query output
 false
 
 
--- !query 99
+-- !query
 SELECT cast(1 as bigint) >= '2' FROM t
--- !query 99 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) >= CAST(2 AS BIGINT)):boolean>
--- !query 99 output
+-- !query output
 false
 
 
--- !query 100
+-- !query
 SELECT cast(1 as bigint) < '2' FROM t
--- !query 100 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) < CAST(2 AS BIGINT)):boolean>
--- !query 100 output
+-- !query output
 true
 
 
--- !query 101
+-- !query
 SELECT cast(1 as bigint) <= '2' FROM t
--- !query 101 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) <= CAST(2 AS BIGINT)):boolean>
--- !query 101 output
+-- !query output
 true
 
 
--- !query 102
+-- !query
 SELECT cast(1 as bigint) <> '2' FROM t
--- !query 102 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BIGINT) = CAST(2 AS BIGINT))):boolean>
--- !query 102 output
+-- !query output
 true
 
 
--- !query 103
+-- !query
 SELECT cast(1 as bigint) = cast(null as string) FROM t
--- !query 103 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) = CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
--- !query 103 output
+-- !query output
 NULL
 
 
--- !query 104
+-- !query
 SELECT cast(1 as bigint) > cast(null as string) FROM t
--- !query 104 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) > CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
--- !query 104 output
+-- !query output
 NULL
 
 
--- !query 105
+-- !query
 SELECT cast(1 as bigint) >= cast(null as string) FROM t
--- !query 105 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) >= CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
--- !query 105 output
+-- !query output
 NULL
 
 
--- !query 106
+-- !query
 SELECT cast(1 as bigint) < cast(null as string) FROM t
--- !query 106 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) < CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
--- !query 106 output
+-- !query output
 NULL
 
 
--- !query 107
+-- !query
 SELECT cast(1 as bigint) <= cast(null as string) FROM t
--- !query 107 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) <= CAST(CAST(NULL AS STRING) AS BIGINT)):boolean>
--- !query 107 output
+-- !query output
 NULL
 
 
--- !query 108
+-- !query
 SELECT cast(1 as bigint) <> cast(null as string) FROM t
--- !query 108 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BIGINT) = CAST(CAST(NULL AS STRING) AS BIGINT))):boolean>
--- !query 108 output
+-- !query output
 NULL
 
 
--- !query 109
+-- !query
 SELECT '1' = cast(1 as bigint) FROM t
--- !query 109 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) = CAST(1 AS BIGINT)):boolean>
--- !query 109 output
+-- !query output
 true
 
 
--- !query 110
+-- !query
 SELECT '2' > cast(1 as bigint) FROM t
--- !query 110 schema
+-- !query schema
 struct<(CAST(2 AS BIGINT) > CAST(1 AS BIGINT)):boolean>
--- !query 110 output
+-- !query output
 true
 
 
--- !query 111
+-- !query
 SELECT '2' >= cast(1 as bigint) FROM t
--- !query 111 schema
+-- !query schema
 struct<(CAST(2 AS BIGINT) >= CAST(1 AS BIGINT)):boolean>
--- !query 111 output
+-- !query output
 true
 
 
--- !query 112
+-- !query
 SELECT '2' < cast(1 as bigint) FROM t
--- !query 112 schema
+-- !query schema
 struct<(CAST(2 AS BIGINT) < CAST(1 AS BIGINT)):boolean>
--- !query 112 output
+-- !query output
 false
 
 
--- !query 113
+-- !query
 SELECT '2' <= cast(1 as bigint) FROM t
--- !query 113 schema
+-- !query schema
 struct<(CAST(2 AS BIGINT) <= CAST(1 AS BIGINT)):boolean>
--- !query 113 output
+-- !query output
 false
 
 
--- !query 114
+-- !query
 SELECT '2' <> cast(1 as bigint) FROM t
--- !query 114 schema
+-- !query schema
 struct<(NOT (CAST(2 AS BIGINT) = CAST(1 AS BIGINT))):boolean>
--- !query 114 output
+-- !query output
 true
 
 
--- !query 115
+-- !query
 SELECT cast(null as string) = cast(1 as bigint) FROM t
--- !query 115 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BIGINT) = CAST(1 AS BIGINT)):boolean>
--- !query 115 output
+-- !query output
 NULL
 
 
--- !query 116
+-- !query
 SELECT cast(null as string) > cast(1 as bigint) FROM t
--- !query 116 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BIGINT) > CAST(1 AS BIGINT)):boolean>
--- !query 116 output
+-- !query output
 NULL
 
 
--- !query 117
+-- !query
 SELECT cast(null as string) >= cast(1 as bigint) FROM t
--- !query 117 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BIGINT) >= CAST(1 AS BIGINT)):boolean>
--- !query 117 output
+-- !query output
 NULL
 
 
--- !query 118
+-- !query
 SELECT cast(null as string) < cast(1 as bigint) FROM t
--- !query 118 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BIGINT) < CAST(1 AS BIGINT)):boolean>
--- !query 118 output
+-- !query output
 NULL
 
 
--- !query 119
+-- !query
 SELECT cast(null as string) <= cast(1 as bigint) FROM t
--- !query 119 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BIGINT) <= CAST(1 AS BIGINT)):boolean>
--- !query 119 output
+-- !query output
 NULL
 
 
--- !query 120
+-- !query
 SELECT cast(null as string) <> cast(1 as bigint) FROM t
--- !query 120 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS BIGINT) = CAST(1 AS BIGINT))):boolean>
--- !query 120 output
+-- !query output
 NULL
 
 
--- !query 121
+-- !query
 SELECT cast(1 as decimal(10, 0)) = '1' FROM t
--- !query 121 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 121 output
+-- !query output
 true
 
 
--- !query 122
+-- !query
 SELECT cast(1 as decimal(10, 0)) > '2' FROM t
--- !query 122 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(2 AS DOUBLE)):boolean>
--- !query 122 output
+-- !query output
 false
 
 
--- !query 123
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= '2' FROM t
--- !query 123 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(2 AS DOUBLE)):boolean>
--- !query 123 output
+-- !query output
 false
 
 
--- !query 124
+-- !query
 SELECT cast(1 as decimal(10, 0)) < '2' FROM t
--- !query 124 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(2 AS DOUBLE)):boolean>
--- !query 124 output
+-- !query output
 true
 
 
--- !query 125
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> '2' FROM t
--- !query 125 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(2 AS DOUBLE))):boolean>
--- !query 125 output
+-- !query output
 true
 
 
--- !query 126
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= '2' FROM t
--- !query 126 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(2 AS DOUBLE)):boolean>
--- !query 126 output
+-- !query output
 true
 
 
--- !query 127
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(null as string) FROM t
--- !query 127 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 127 output
+-- !query output
 NULL
 
 
--- !query 128
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(null as string) FROM t
--- !query 128 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 128 output
+-- !query output
 NULL
 
 
--- !query 129
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(null as string) FROM t
--- !query 129 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 129 output
+-- !query output
 NULL
 
 
--- !query 130
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(null as string) FROM t
--- !query 130 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 130 output
+-- !query output
 NULL
 
 
--- !query 131
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(null as string) FROM t
--- !query 131 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(NULL AS STRING) AS DOUBLE))):boolean>
--- !query 131 output
+-- !query output
 NULL
 
 
--- !query 132
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(null as string) FROM t
--- !query 132 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 132 output
+-- !query output
 NULL
 
 
--- !query 133
+-- !query
 SELECT '1' = cast(1 as decimal(10, 0)) FROM t
--- !query 133 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 133 output
+-- !query output
 true
 
 
--- !query 134
+-- !query
 SELECT '2' > cast(1 as decimal(10, 0)) FROM t
--- !query 134 schema
+-- !query schema
 struct<(CAST(2 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 134 output
+-- !query output
 true
 
 
--- !query 135
+-- !query
 SELECT '2' >= cast(1 as decimal(10, 0)) FROM t
--- !query 135 schema
+-- !query schema
 struct<(CAST(2 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 135 output
+-- !query output
 true
 
 
--- !query 136
+-- !query
 SELECT '2' < cast(1 as decimal(10, 0)) FROM t
--- !query 136 schema
+-- !query schema
 struct<(CAST(2 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 136 output
+-- !query output
 false
 
 
--- !query 137
+-- !query
 SELECT '2' <= cast(1 as decimal(10, 0)) FROM t
--- !query 137 schema
+-- !query schema
 struct<(CAST(2 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 137 output
+-- !query output
 false
 
 
--- !query 138
+-- !query
 SELECT '2' <> cast(1 as decimal(10, 0)) FROM t
--- !query 138 schema
+-- !query schema
 struct<(NOT (CAST(2 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
--- !query 138 output
+-- !query output
 true
 
 
--- !query 139
+-- !query
 SELECT cast(null as string) = cast(1 as decimal(10, 0)) FROM t
--- !query 139 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 139 output
+-- !query output
 NULL
 
 
--- !query 140
+-- !query
 SELECT cast(null as string) > cast(1 as decimal(10, 0)) FROM t
--- !query 140 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 140 output
+-- !query output
 NULL
 
 
--- !query 141
+-- !query
 SELECT cast(null as string) >= cast(1 as decimal(10, 0)) FROM t
--- !query 141 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 141 output
+-- !query output
 NULL
 
 
--- !query 142
+-- !query
 SELECT cast(null as string) < cast(1 as decimal(10, 0)) FROM t
--- !query 142 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 142 output
+-- !query output
 NULL
 
 
--- !query 143
+-- !query
 SELECT cast(null as string) <= cast(1 as decimal(10, 0)) FROM t
--- !query 143 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 143 output
+-- !query output
 NULL
 
 
--- !query 144
+-- !query
 SELECT cast(null as string) <> cast(1 as decimal(10, 0)) FROM t
--- !query 144 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
--- !query 144 output
+-- !query output
 NULL
 
 
--- !query 145
+-- !query
 SELECT cast(1 as double) = '1' FROM t
--- !query 145 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 145 output
+-- !query output
 true
 
 
--- !query 146
+-- !query
 SELECT cast(1 as double) > '2' FROM t
--- !query 146 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) > CAST(2 AS DOUBLE)):boolean>
--- !query 146 output
+-- !query output
 false
 
 
--- !query 147
+-- !query
 SELECT cast(1 as double) >= '2' FROM t
--- !query 147 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) >= CAST(2 AS DOUBLE)):boolean>
--- !query 147 output
+-- !query output
 false
 
 
--- !query 148
+-- !query
 SELECT cast(1 as double) < '2' FROM t
--- !query 148 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) < CAST(2 AS DOUBLE)):boolean>
--- !query 148 output
+-- !query output
 true
 
 
--- !query 149
+-- !query
 SELECT cast(1 as double) <= '2' FROM t
--- !query 149 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <= CAST(2 AS DOUBLE)):boolean>
--- !query 149 output
+-- !query output
 true
 
 
--- !query 150
+-- !query
 SELECT cast(1 as double) <> '2' FROM t
--- !query 150 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DOUBLE) = CAST(2 AS DOUBLE))):boolean>
--- !query 150 output
+-- !query output
 true
 
 
--- !query 151
+-- !query
 SELECT cast(1 as double) = cast(null as string) FROM t
--- !query 151 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 151 output
+-- !query output
 NULL
 
 
--- !query 152
+-- !query
 SELECT cast(1 as double) > cast(null as string) FROM t
--- !query 152 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) > CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 152 output
+-- !query output
 NULL
 
 
--- !query 153
+-- !query
 SELECT cast(1 as double) >= cast(null as string) FROM t
--- !query 153 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) >= CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 153 output
+-- !query output
 NULL
 
 
--- !query 154
+-- !query
 SELECT cast(1 as double) < cast(null as string) FROM t
--- !query 154 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) < CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 154 output
+-- !query output
 NULL
 
 
--- !query 155
+-- !query
 SELECT cast(1 as double) <= cast(null as string) FROM t
--- !query 155 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <= CAST(CAST(NULL AS STRING) AS DOUBLE)):boolean>
--- !query 155 output
+-- !query output
 NULL
 
 
--- !query 156
+-- !query
 SELECT cast(1 as double) <> cast(null as string) FROM t
--- !query 156 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(NULL AS STRING) AS DOUBLE))):boolean>
--- !query 156 output
+-- !query output
 NULL
 
 
--- !query 157
+-- !query
 SELECT '1' = cast(1 as double) FROM t
--- !query 157 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 157 output
+-- !query output
 true
 
 
--- !query 158
+-- !query
 SELECT '2' > cast(1 as double) FROM t
--- !query 158 schema
+-- !query schema
 struct<(CAST(2 AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
--- !query 158 output
+-- !query output
 true
 
 
--- !query 159
+-- !query
 SELECT '2' >= cast(1 as double) FROM t
--- !query 159 schema
+-- !query schema
 struct<(CAST(2 AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
--- !query 159 output
+-- !query output
 true
 
 
--- !query 160
+-- !query
 SELECT '2' < cast(1 as double) FROM t
--- !query 160 schema
+-- !query schema
 struct<(CAST(2 AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
--- !query 160 output
+-- !query output
 false
 
 
--- !query 161
+-- !query
 SELECT '2' <= cast(1 as double) FROM t
--- !query 161 schema
+-- !query schema
 struct<(CAST(2 AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
--- !query 161 output
+-- !query output
 false
 
 
--- !query 162
+-- !query
 SELECT '2' <> cast(1 as double) FROM t
--- !query 162 schema
+-- !query schema
 struct<(NOT (CAST(2 AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
--- !query 162 output
+-- !query output
 true
 
 
--- !query 163
+-- !query
 SELECT cast(null as string) = cast(1 as double) FROM t
--- !query 163 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 163 output
+-- !query output
 NULL
 
 
--- !query 164
+-- !query
 SELECT cast(null as string) > cast(1 as double) FROM t
--- !query 164 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
--- !query 164 output
+-- !query output
 NULL
 
 
--- !query 165
+-- !query
 SELECT cast(null as string) >= cast(1 as double) FROM t
--- !query 165 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
--- !query 165 output
+-- !query output
 NULL
 
 
--- !query 166
+-- !query
 SELECT cast(null as string) < cast(1 as double) FROM t
--- !query 166 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
--- !query 166 output
+-- !query output
 NULL
 
 
--- !query 167
+-- !query
 SELECT cast(null as string) <= cast(1 as double) FROM t
--- !query 167 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
--- !query 167 output
+-- !query output
 NULL
 
 
--- !query 168
+-- !query
 SELECT cast(null as string) <> cast(1 as double) FROM t
--- !query 168 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
--- !query 168 output
+-- !query output
 NULL
 
 
--- !query 169
+-- !query
 SELECT cast(1 as float) = '1' FROM t
--- !query 169 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) = CAST(1 AS FLOAT)):boolean>
--- !query 169 output
+-- !query output
 true
 
 
--- !query 170
+-- !query
 SELECT cast(1 as float) > '2' FROM t
--- !query 170 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) > CAST(2 AS FLOAT)):boolean>
--- !query 170 output
+-- !query output
 false
 
 
--- !query 171
+-- !query
 SELECT cast(1 as float) >= '2' FROM t
--- !query 171 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) >= CAST(2 AS FLOAT)):boolean>
--- !query 171 output
+-- !query output
 false
 
 
--- !query 172
+-- !query
 SELECT cast(1 as float) < '2' FROM t
--- !query 172 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) < CAST(2 AS FLOAT)):boolean>
--- !query 172 output
+-- !query output
 true
 
 
--- !query 173
+-- !query
 SELECT cast(1 as float) <= '2' FROM t
--- !query 173 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) <= CAST(2 AS FLOAT)):boolean>
--- !query 173 output
+-- !query output
 true
 
 
--- !query 174
+-- !query
 SELECT cast(1 as float) <> '2' FROM t
--- !query 174 schema
+-- !query schema
 struct<(NOT (CAST(1 AS FLOAT) = CAST(2 AS FLOAT))):boolean>
--- !query 174 output
+-- !query output
 true
 
 
--- !query 175
+-- !query
 SELECT cast(1 as float) = cast(null as string) FROM t
--- !query 175 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) = CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
--- !query 175 output
+-- !query output
 NULL
 
 
--- !query 176
+-- !query
 SELECT cast(1 as float) > cast(null as string) FROM t
--- !query 176 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) > CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
--- !query 176 output
+-- !query output
 NULL
 
 
--- !query 177
+-- !query
 SELECT cast(1 as float) >= cast(null as string) FROM t
--- !query 177 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) >= CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
--- !query 177 output
+-- !query output
 NULL
 
 
--- !query 178
+-- !query
 SELECT cast(1 as float) < cast(null as string) FROM t
--- !query 178 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) < CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
--- !query 178 output
+-- !query output
 NULL
 
 
--- !query 179
+-- !query
 SELECT cast(1 as float) <= cast(null as string) FROM t
--- !query 179 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) <= CAST(CAST(NULL AS STRING) AS FLOAT)):boolean>
--- !query 179 output
+-- !query output
 NULL
 
 
--- !query 180
+-- !query
 SELECT cast(1 as float) <> cast(null as string) FROM t
--- !query 180 schema
+-- !query schema
 struct<(NOT (CAST(1 AS FLOAT) = CAST(CAST(NULL AS STRING) AS FLOAT))):boolean>
--- !query 180 output
+-- !query output
 NULL
 
 
--- !query 181
+-- !query
 SELECT '1' = cast(1 as float) FROM t
--- !query 181 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) = CAST(1 AS FLOAT)):boolean>
--- !query 181 output
+-- !query output
 true
 
 
--- !query 182
+-- !query
 SELECT '2' > cast(1 as float) FROM t
--- !query 182 schema
+-- !query schema
 struct<(CAST(2 AS FLOAT) > CAST(1 AS FLOAT)):boolean>
--- !query 182 output
+-- !query output
 true
 
 
--- !query 183
+-- !query
 SELECT '2' >= cast(1 as float) FROM t
--- !query 183 schema
+-- !query schema
 struct<(CAST(2 AS FLOAT) >= CAST(1 AS FLOAT)):boolean>
--- !query 183 output
+-- !query output
 true
 
 
--- !query 184
+-- !query
 SELECT '2' < cast(1 as float) FROM t
--- !query 184 schema
+-- !query schema
 struct<(CAST(2 AS FLOAT) < CAST(1 AS FLOAT)):boolean>
--- !query 184 output
+-- !query output
 false
 
 
--- !query 185
+-- !query
 SELECT '2' <= cast(1 as float) FROM t
--- !query 185 schema
+-- !query schema
 struct<(CAST(2 AS FLOAT) <= CAST(1 AS FLOAT)):boolean>
--- !query 185 output
+-- !query output
 false
 
 
--- !query 186
+-- !query
 SELECT '2' <> cast(1 as float) FROM t
--- !query 186 schema
+-- !query schema
 struct<(NOT (CAST(2 AS FLOAT) = CAST(1 AS FLOAT))):boolean>
--- !query 186 output
+-- !query output
 true
 
 
--- !query 187
+-- !query
 SELECT cast(null as string) = cast(1 as float) FROM t
--- !query 187 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS FLOAT) = CAST(1 AS FLOAT)):boolean>
--- !query 187 output
+-- !query output
 NULL
 
 
--- !query 188
+-- !query
 SELECT cast(null as string) > cast(1 as float) FROM t
--- !query 188 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS FLOAT) > CAST(1 AS FLOAT)):boolean>
--- !query 188 output
+-- !query output
 NULL
 
 
--- !query 189
+-- !query
 SELECT cast(null as string) >= cast(1 as float) FROM t
--- !query 189 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS FLOAT) >= CAST(1 AS FLOAT)):boolean>
--- !query 189 output
+-- !query output
 NULL
 
 
--- !query 190
+-- !query
 SELECT cast(null as string) < cast(1 as float) FROM t
--- !query 190 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS FLOAT) < CAST(1 AS FLOAT)):boolean>
--- !query 190 output
+-- !query output
 NULL
 
 
--- !query 191
+-- !query
 SELECT cast(null as string) <= cast(1 as float) FROM t
--- !query 191 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS FLOAT) <= CAST(1 AS FLOAT)):boolean>
--- !query 191 output
+-- !query output
 NULL
 
 
--- !query 192
+-- !query
 SELECT cast(null as string) <> cast(1 as float) FROM t
--- !query 192 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS FLOAT) = CAST(1 AS FLOAT))):boolean>
--- !query 192 output
+-- !query output
 NULL
 
 
--- !query 193
+-- !query
 SELECT '1996-09-09' = date('1996-09-09') FROM t
--- !query 193 schema
+-- !query schema
 struct<(CAST(1996-09-09 AS DATE) = CAST(1996-09-09 AS DATE)):boolean>
--- !query 193 output
+-- !query output
 true
 
 
--- !query 194
+-- !query
 SELECT '1996-9-10' > date('1996-09-09') FROM t
--- !query 194 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) > CAST(1996-09-09 AS DATE)):boolean>
--- !query 194 output
+-- !query output
 true
 
 
--- !query 195
+-- !query
 SELECT '1996-9-10' >= date('1996-09-09') FROM t
--- !query 195 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) >= CAST(1996-09-09 AS DATE)):boolean>
--- !query 195 output
+-- !query output
 true
 
 
--- !query 196
+-- !query
 SELECT '1996-9-10' < date('1996-09-09') FROM t
--- !query 196 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) < CAST(1996-09-09 AS DATE)):boolean>
--- !query 196 output
+-- !query output
 false
 
 
--- !query 197
+-- !query
 SELECT '1996-9-10' <= date('1996-09-09') FROM t
--- !query 197 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) <= CAST(1996-09-09 AS DATE)):boolean>
--- !query 197 output
+-- !query output
 false
 
 
--- !query 198
+-- !query
 SELECT '1996-9-10' <> date('1996-09-09') FROM t
--- !query 198 schema
+-- !query schema
 struct<(NOT (CAST(1996-9-10 AS DATE) = CAST(1996-09-09 AS DATE))):boolean>
--- !query 198 output
+-- !query output
 true
 
 
--- !query 199
+-- !query
 SELECT cast(null as string) = date('1996-09-09') FROM t
--- !query 199 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DATE) = CAST(1996-09-09 AS DATE)):boolean>
--- !query 199 output
+-- !query output
 NULL
 
 
--- !query 200
+-- !query
 SELECT cast(null as string)> date('1996-09-09') FROM t
--- !query 200 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DATE) > CAST(1996-09-09 AS DATE)):boolean>
--- !query 200 output
+-- !query output
 NULL
 
 
--- !query 201
+-- !query
 SELECT cast(null as string)>= date('1996-09-09') FROM t
--- !query 201 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DATE) >= CAST(1996-09-09 AS DATE)):boolean>
--- !query 201 output
+-- !query output
 NULL
 
 
--- !query 202
+-- !query
 SELECT cast(null as string)< date('1996-09-09') FROM t
--- !query 202 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DATE) < CAST(1996-09-09 AS DATE)):boolean>
--- !query 202 output
+-- !query output
 NULL
 
 
--- !query 203
+-- !query
 SELECT cast(null as string)<= date('1996-09-09') FROM t
--- !query 203 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS DATE) <= CAST(1996-09-09 AS DATE)):boolean>
--- !query 203 output
+-- !query output
 NULL
 
 
--- !query 204
+-- !query
 SELECT cast(null as string)<> date('1996-09-09') FROM t
--- !query 204 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS DATE) = CAST(1996-09-09 AS DATE))):boolean>
--- !query 204 output
+-- !query output
 NULL
 
 
--- !query 205
+-- !query
 SELECT date('1996-09-09') = '1996-09-09' FROM t
--- !query 205 schema
+-- !query schema
 struct<(CAST(1996-09-09 AS DATE) = CAST(1996-09-09 AS DATE)):boolean>
--- !query 205 output
+-- !query output
 true
 
 
--- !query 206
+-- !query
 SELECT date('1996-9-10') > '1996-09-09' FROM t
--- !query 206 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) > CAST(1996-09-09 AS DATE)):boolean>
--- !query 206 output
+-- !query output
 true
 
 
--- !query 207
+-- !query
 SELECT date('1996-9-10') >= '1996-09-09' FROM t
--- !query 207 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) >= CAST(1996-09-09 AS DATE)):boolean>
--- !query 207 output
+-- !query output
 true
 
 
--- !query 208
+-- !query
 SELECT date('1996-9-10') < '1996-09-09' FROM t
--- !query 208 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) < CAST(1996-09-09 AS DATE)):boolean>
--- !query 208 output
+-- !query output
 false
 
 
--- !query 209
+-- !query
 SELECT date('1996-9-10') <= '1996-09-09' FROM t
--- !query 209 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) <= CAST(1996-09-09 AS DATE)):boolean>
--- !query 209 output
+-- !query output
 false
 
 
--- !query 210
+-- !query
 SELECT date('1996-9-10') <> '1996-09-09' FROM t
--- !query 210 schema
+-- !query schema
 struct<(NOT (CAST(1996-9-10 AS DATE) = CAST(1996-09-09 AS DATE))):boolean>
--- !query 210 output
+-- !query output
 true
 
 
--- !query 211
+-- !query
 SELECT date('1996-09-09') = cast(null as string) FROM t
--- !query 211 schema
+-- !query schema
 struct<(CAST(1996-09-09 AS DATE) = CAST(CAST(NULL AS STRING) AS DATE)):boolean>
--- !query 211 output
+-- !query output
 NULL
 
 
--- !query 212
+-- !query
 SELECT date('1996-9-10') > cast(null as string) FROM t
--- !query 212 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) > CAST(CAST(NULL AS STRING) AS DATE)):boolean>
--- !query 212 output
+-- !query output
 NULL
 
 
--- !query 213
+-- !query
 SELECT date('1996-9-10') >= cast(null as string) FROM t
--- !query 213 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) >= CAST(CAST(NULL AS STRING) AS DATE)):boolean>
--- !query 213 output
+-- !query output
 NULL
 
 
--- !query 214
+-- !query
 SELECT date('1996-9-10') < cast(null as string) FROM t
--- !query 214 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) < CAST(CAST(NULL AS STRING) AS DATE)):boolean>
--- !query 214 output
+-- !query output
 NULL
 
 
--- !query 215
+-- !query
 SELECT date('1996-9-10') <= cast(null as string) FROM t
--- !query 215 schema
+-- !query schema
 struct<(CAST(1996-9-10 AS DATE) <= CAST(CAST(NULL AS STRING) AS DATE)):boolean>
--- !query 215 output
+-- !query output
 NULL
 
 
--- !query 216
+-- !query
 SELECT date('1996-9-10') <> cast(null as string) FROM t
--- !query 216 schema
+-- !query schema
 struct<(NOT (CAST(1996-9-10 AS DATE) = CAST(CAST(NULL AS STRING) AS DATE))):boolean>
--- !query 216 output
+-- !query output
 NULL
 
 
--- !query 217
+-- !query
 SELECT '1996-09-09 12:12:12.4' = timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 217 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 217 output
+-- !query output
 true
 
 
--- !query 218
+-- !query
 SELECT '1996-09-09 12:12:12.5' > timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 218 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) > CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 218 output
+-- !query output
 true
 
 
--- !query 219
+-- !query
 SELECT '1996-09-09 12:12:12.5' >= timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 219 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) >= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 219 output
+-- !query output
 true
 
 
--- !query 220
+-- !query
 SELECT '1996-09-09 12:12:12.5' < timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 220 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) < CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 220 output
+-- !query output
 false
 
 
--- !query 221
+-- !query
 SELECT '1996-09-09 12:12:12.5' <= timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 221 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) <= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 221 output
+-- !query output
 false
 
 
--- !query 222
+-- !query
 SELECT '1996-09-09 12:12:12.5' <> timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 222 schema
+-- !query schema
 struct<(NOT (CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP))):boolean>
--- !query 222 output
+-- !query output
 true
 
 
--- !query 223
+-- !query
 SELECT cast(null as string) = timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 223 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 223 output
+-- !query output
 NULL
 
 
--- !query 224
+-- !query
 SELECT cast(null as string) > timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 224 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) > CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 224 output
+-- !query output
 NULL
 
 
--- !query 225
+-- !query
 SELECT cast(null as string) >= timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 225 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) >= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 225 output
+-- !query output
 NULL
 
 
--- !query 226
+-- !query
 SELECT cast(null as string) < timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 226 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) < CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 226 output
+-- !query output
 NULL
 
 
--- !query 227
+-- !query
 SELECT cast(null as string) <= timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 227 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) <= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 227 output
+-- !query output
 NULL
 
 
--- !query 228
+-- !query
 SELECT cast(null as string) <> timestamp('1996-09-09 12:12:12.4') FROM t
--- !query 228 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP))):boolean>
--- !query 228 output
+-- !query output
 NULL
 
 
--- !query 229
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.4' )= '1996-09-09 12:12:12.4' FROM t
--- !query 229 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 229 output
+-- !query output
 true
 
 
--- !query 230
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )> '1996-09-09 12:12:12.4' FROM t
--- !query 230 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) > CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 230 output
+-- !query output
 true
 
 
--- !query 231
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )>= '1996-09-09 12:12:12.4' FROM t
--- !query 231 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) >= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 231 output
+-- !query output
 true
 
 
--- !query 232
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )< '1996-09-09 12:12:12.4' FROM t
--- !query 232 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) < CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 232 output
+-- !query output
 false
 
 
--- !query 233
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )<= '1996-09-09 12:12:12.4' FROM t
--- !query 233 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) <= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean>
--- !query 233 output
+-- !query output
 false
 
 
--- !query 234
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )<> '1996-09-09 12:12:12.4' FROM t
--- !query 234 schema
+-- !query schema
 struct<(NOT (CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) = CAST(1996-09-09 12:12:12.4 AS TIMESTAMP))):boolean>
--- !query 234 output
+-- !query output
 true
 
 
--- !query 235
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.4' )= cast(null as string) FROM t
--- !query 235 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) = CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
--- !query 235 output
+-- !query output
 NULL
 
 
--- !query 236
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )> cast(null as string) FROM t
--- !query 236 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) > CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
--- !query 236 output
+-- !query output
 NULL
 
 
--- !query 237
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )>= cast(null as string) FROM t
--- !query 237 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) >= CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
--- !query 237 output
+-- !query output
 NULL
 
 
--- !query 238
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )< cast(null as string) FROM t
--- !query 238 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) < CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
--- !query 238 output
+-- !query output
 NULL
 
 
--- !query 239
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )<= cast(null as string) FROM t
--- !query 239 schema
+-- !query schema
 struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) <= CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean>
--- !query 239 output
+-- !query output
 NULL
 
 
--- !query 240
+-- !query
 SELECT timestamp('1996-09-09 12:12:12.5' )<> cast(null as string) FROM t
--- !query 240 schema
+-- !query schema
 struct<(NOT (CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) = CAST(CAST(NULL AS STRING) AS TIMESTAMP))):boolean>
--- !query 240 output
+-- !query output
 NULL
 
 
--- !query 241
+-- !query
 SELECT ' ' = X'0020' FROM t
--- !query 241 schema
+-- !query schema
 struct<(CAST(  AS BINARY) = X'0020'):boolean>
--- !query 241 output
+-- !query output
 false
 
 
--- !query 242
+-- !query
 SELECT ' ' > X'001F' FROM t
--- !query 242 schema
+-- !query schema
 struct<(CAST(  AS BINARY) > X'001F'):boolean>
--- !query 242 output
+-- !query output
 true
 
 
--- !query 243
+-- !query
 SELECT ' ' >= X'001F' FROM t
--- !query 243 schema
+-- !query schema
 struct<(CAST(  AS BINARY) >= X'001F'):boolean>
--- !query 243 output
+-- !query output
 true
 
 
--- !query 244
+-- !query
 SELECT ' ' < X'001F' FROM t
--- !query 244 schema
+-- !query schema
 struct<(CAST(  AS BINARY) < X'001F'):boolean>
--- !query 244 output
+-- !query output
 false
 
 
--- !query 245
+-- !query
 SELECT ' ' <= X'001F' FROM t
--- !query 245 schema
+-- !query schema
 struct<(CAST(  AS BINARY) <= X'001F'):boolean>
--- !query 245 output
+-- !query output
 false
 
 
--- !query 246
+-- !query
 SELECT ' ' <> X'001F' FROM t
--- !query 246 schema
+-- !query schema
 struct<(NOT (CAST(  AS BINARY) = X'001F')):boolean>
--- !query 246 output
+-- !query output
 true
 
 
--- !query 247
+-- !query
 SELECT cast(null as string) = X'0020' FROM t
--- !query 247 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) = X'0020'):boolean>
--- !query 247 output
+-- !query output
 NULL
 
 
--- !query 248
+-- !query
 SELECT cast(null as string) > X'001F' FROM t
--- !query 248 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) > X'001F'):boolean>
--- !query 248 output
+-- !query output
 NULL
 
 
--- !query 249
+-- !query
 SELECT cast(null as string) >= X'001F' FROM t
--- !query 249 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) >= X'001F'):boolean>
--- !query 249 output
+-- !query output
 NULL
 
 
--- !query 250
+-- !query
 SELECT cast(null as string) < X'001F' FROM t
--- !query 250 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) < X'001F'):boolean>
--- !query 250 output
+-- !query output
 NULL
 
 
--- !query 251
+-- !query
 SELECT cast(null as string) <= X'001F' FROM t
--- !query 251 schema
+-- !query schema
 struct<(CAST(CAST(NULL AS STRING) AS BINARY) <= X'001F'):boolean>
--- !query 251 output
+-- !query output
 NULL
 
 
--- !query 252
+-- !query
 SELECT cast(null as string) <> X'001F' FROM t
--- !query 252 schema
+-- !query schema
 struct<(NOT (CAST(CAST(NULL AS STRING) AS BINARY) = X'001F')):boolean>
--- !query 252 output
+-- !query output
 NULL
 
 
--- !query 253
+-- !query
 SELECT X'0020' = ' ' FROM t
--- !query 253 schema
+-- !query schema
 struct<(X'0020' = CAST(  AS BINARY)):boolean>
--- !query 253 output
+-- !query output
 false
 
 
--- !query 254
+-- !query
 SELECT X'001F' > ' ' FROM t
--- !query 254 schema
+-- !query schema
 struct<(X'001F' > CAST(  AS BINARY)):boolean>
--- !query 254 output
+-- !query output
 false
 
 
--- !query 255
+-- !query
 SELECT X'001F' >= ' ' FROM t
--- !query 255 schema
+-- !query schema
 struct<(X'001F' >= CAST(  AS BINARY)):boolean>
--- !query 255 output
+-- !query output
 false
 
 
--- !query 256
+-- !query
 SELECT X'001F' < ' ' FROM t
--- !query 256 schema
+-- !query schema
 struct<(X'001F' < CAST(  AS BINARY)):boolean>
--- !query 256 output
+-- !query output
 true
 
 
--- !query 257
+-- !query
 SELECT X'001F' <= ' ' FROM t
--- !query 257 schema
+-- !query schema
 struct<(X'001F' <= CAST(  AS BINARY)):boolean>
--- !query 257 output
+-- !query output
 true
 
 
--- !query 258
+-- !query
 SELECT X'001F' <> ' ' FROM t
--- !query 258 schema
+-- !query schema
 struct<(NOT (X'001F' = CAST(  AS BINARY))):boolean>
--- !query 258 output
+-- !query output
 true
 
 
--- !query 259
+-- !query
 SELECT X'0020' = cast(null as string) FROM t
--- !query 259 schema
+-- !query schema
 struct<(X'0020' = CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 259 output
+-- !query output
 NULL
 
 
--- !query 260
+-- !query
 SELECT X'001F' > cast(null as string) FROM t
--- !query 260 schema
+-- !query schema
 struct<(X'001F' > CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 260 output
+-- !query output
 NULL
 
 
--- !query 261
+-- !query
 SELECT X'001F' >= cast(null as string) FROM t
--- !query 261 schema
+-- !query schema
 struct<(X'001F' >= CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 261 output
+-- !query output
 NULL
 
 
--- !query 262
+-- !query
 SELECT X'001F' < cast(null as string) FROM t
--- !query 262 schema
+-- !query schema
 struct<(X'001F' < CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 262 output
+-- !query output
 NULL
 
 
--- !query 263
+-- !query
 SELECT X'001F' <= cast(null as string) FROM t
--- !query 263 schema
+-- !query schema
 struct<(X'001F' <= CAST(CAST(NULL AS STRING) AS BINARY)):boolean>
--- !query 263 output
+-- !query output
 NULL
 
 
--- !query 264
+-- !query
 SELECT X'001F' <> cast(null as string) FROM t
--- !query 264 schema
+-- !query schema
 struct<(NOT (X'001F' = CAST(CAST(NULL AS STRING) AS BINARY))):boolean>
--- !query 264 output
+-- !query output
 NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
index 46775d79ff4a2..dc068e70d66db 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
@@ -2,801 +2,801 @@
 -- Number of queries: 97
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT true = cast(1 as tinyint) FROM t
--- !query 1 schema
+-- !query schema
 struct<(CAST(true AS TINYINT) = CAST(1 AS TINYINT)):boolean>
--- !query 1 output
+-- !query output
 true
 
 
--- !query 2
+-- !query
 SELECT true = cast(1 as smallint) FROM t
--- !query 2 schema
+-- !query schema
 struct<(CAST(true AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
--- !query 2 output
+-- !query output
 true
 
 
--- !query 3
+-- !query
 SELECT true = cast(1 as int) FROM t
--- !query 3 schema
+-- !query schema
 struct<(CAST(true AS INT) = CAST(1 AS INT)):boolean>
--- !query 3 output
+-- !query output
 true
 
 
--- !query 4
+-- !query
 SELECT true = cast(1 as bigint) FROM t
--- !query 4 schema
+-- !query schema
 struct<(CAST(true AS BIGINT) = CAST(1 AS BIGINT)):boolean>
--- !query 4 output
+-- !query output
 true
 
 
--- !query 5
+-- !query
 SELECT true = cast(1 as float) FROM t
--- !query 5 schema
+-- !query schema
 struct<(CAST(true AS FLOAT) = CAST(1 AS FLOAT)):boolean>
--- !query 5 output
+-- !query output
 true
 
 
--- !query 6
+-- !query
 SELECT true = cast(1 as double) FROM t
--- !query 6 schema
+-- !query schema
 struct<(CAST(true AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 6 output
+-- !query output
 true
 
 
--- !query 7
+-- !query
 SELECT true = cast(1 as decimal(10, 0)) FROM t
--- !query 7 schema
+-- !query schema
 struct<(CAST(true AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 7 output
+-- !query output
 true
 
 
--- !query 8
+-- !query
 SELECT true = cast(1 as string) FROM t
--- !query 8 schema
+-- !query schema
 struct<(true = CAST(CAST(1 AS STRING) AS BOOLEAN)):boolean>
--- !query 8 output
+-- !query output
 true
 
 
--- !query 9
+-- !query
 SELECT true = cast('1' as binary) FROM t
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(true = CAST('1' AS BINARY))' (boolean and binary).; line 1 pos 7
 
 
--- !query 10
+-- !query
 SELECT true = cast(1 as boolean) FROM t
--- !query 10 schema
+-- !query schema
 struct<(true = CAST(1 AS BOOLEAN)):boolean>
--- !query 10 output
+-- !query output
 true
 
 
--- !query 11
+-- !query
 SELECT true = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(true = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
 
 
--- !query 12
+-- !query
 SELECT true = cast('2017-12-11 09:30:00' as date) FROM t
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(true = CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
 
 
--- !query 13
+-- !query
 SELECT true <=> cast(1 as tinyint) FROM t
--- !query 13 schema
+-- !query schema
 struct<(CAST(true AS TINYINT) <=> CAST(1 AS TINYINT)):boolean>
--- !query 13 output
+-- !query output
 true
 
 
--- !query 14
+-- !query
 SELECT true <=> cast(1 as smallint) FROM t
--- !query 14 schema
+-- !query schema
 struct<(CAST(true AS SMALLINT) <=> CAST(1 AS SMALLINT)):boolean>
--- !query 14 output
+-- !query output
 true
 
 
--- !query 15
+-- !query
 SELECT true <=> cast(1 as int) FROM t
--- !query 15 schema
+-- !query schema
 struct<(CAST(true AS INT) <=> CAST(1 AS INT)):boolean>
--- !query 15 output
+-- !query output
 true
 
 
--- !query 16
+-- !query
 SELECT true <=> cast(1 as bigint) FROM t
--- !query 16 schema
+-- !query schema
 struct<(CAST(true AS BIGINT) <=> CAST(1 AS BIGINT)):boolean>
--- !query 16 output
+-- !query output
 true
 
 
--- !query 17
+-- !query
 SELECT true <=> cast(1 as float) FROM t
--- !query 17 schema
+-- !query schema
 struct<(CAST(true AS FLOAT) <=> CAST(1 AS FLOAT)):boolean>
--- !query 17 output
+-- !query output
 true
 
 
--- !query 18
+-- !query
 SELECT true <=> cast(1 as double) FROM t
--- !query 18 schema
+-- !query schema
 struct<(CAST(true AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
--- !query 18 output
+-- !query output
 true
 
 
--- !query 19
+-- !query
 SELECT true <=> cast(1 as decimal(10, 0)) FROM t
--- !query 19 schema
+-- !query schema
 struct<(CAST(true AS DECIMAL(10,0)) <=> CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 19 output
+-- !query output
 true
 
 
--- !query 20
+-- !query
 SELECT true <=> cast(1 as string) FROM t
--- !query 20 schema
+-- !query schema
 struct<(true <=> CAST(CAST(1 AS STRING) AS BOOLEAN)):boolean>
--- !query 20 output
+-- !query output
 true
 
 
--- !query 21
+-- !query
 SELECT true <=> cast('1' as binary) FROM t
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(true <=> CAST('1' AS BINARY))' (boolean and binary).; line 1 pos 7
 
 
--- !query 22
+-- !query
 SELECT true <=> cast(1 as boolean) FROM t
--- !query 22 schema
+-- !query schema
 struct<(true <=> CAST(1 AS BOOLEAN)):boolean>
--- !query 22 output
+-- !query output
 true
 
 
--- !query 23
+-- !query
 SELECT true <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(true <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
 
 
--- !query 24
+-- !query
 SELECT true <=> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(true <=> CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
 
 
--- !query 25
+-- !query
 SELECT cast(1 as tinyint) = true FROM t
--- !query 25 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) = CAST(true AS TINYINT)):boolean>
--- !query 25 output
+-- !query output
 true
 
 
--- !query 26
+-- !query
 SELECT cast(1 as smallint) = true FROM t
--- !query 26 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) = CAST(true AS SMALLINT)):boolean>
--- !query 26 output
+-- !query output
 true
 
 
--- !query 27
+-- !query
 SELECT cast(1 as int) = true FROM t
--- !query 27 schema
+-- !query schema
 struct<(CAST(1 AS INT) = CAST(true AS INT)):boolean>
--- !query 27 output
+-- !query output
 true
 
 
--- !query 28
+-- !query
 SELECT cast(1 as bigint) = true FROM t
--- !query 28 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) = CAST(true AS BIGINT)):boolean>
--- !query 28 output
+-- !query output
 true
 
 
--- !query 29
+-- !query
 SELECT cast(1 as float) = true FROM t
--- !query 29 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) = CAST(true AS FLOAT)):boolean>
--- !query 29 output
+-- !query output
 true
 
 
--- !query 30
+-- !query
 SELECT cast(1 as double) = true FROM t
--- !query 30 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(true AS DOUBLE)):boolean>
--- !query 30 output
+-- !query output
 true
 
 
--- !query 31
+-- !query
 SELECT cast(1 as decimal(10, 0)) = true FROM t
--- !query 31 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) = CAST(true AS DECIMAL(10,0))):boolean>
--- !query 31 output
+-- !query output
 true
 
 
--- !query 32
+-- !query
 SELECT cast(1 as string) = true FROM t
--- !query 32 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS BOOLEAN) = true):boolean>
--- !query 32 output
+-- !query output
 true
 
 
--- !query 33
+-- !query
 SELECT cast('1' as binary) = true FROM t
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) = true)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = true)' (binary and boolean).; line 1 pos 7
 
 
--- !query 34
+-- !query
 SELECT cast(1 as boolean) = true FROM t
--- !query 34 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) = true):boolean>
--- !query 34 output
+-- !query output
 true
 
 
--- !query 35
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) = true FROM t
--- !query 35 schema
+-- !query schema
 struct<>
--- !query 35 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = true)' (timestamp and boolean).; line 1 pos 7
 
 
--- !query 36
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) = true FROM t
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = true)' (date and boolean).; line 1 pos 7
 
 
--- !query 37
+-- !query
 SELECT cast(1 as tinyint) <=> true FROM t
--- !query 37 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) <=> CAST(true AS TINYINT)):boolean>
--- !query 37 output
+-- !query output
 true
 
 
--- !query 38
+-- !query
 SELECT cast(1 as smallint) <=> true FROM t
--- !query 38 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) <=> CAST(true AS SMALLINT)):boolean>
--- !query 38 output
+-- !query output
 true
 
 
--- !query 39
+-- !query
 SELECT cast(1 as int) <=> true FROM t
--- !query 39 schema
+-- !query schema
 struct<(CAST(1 AS INT) <=> CAST(true AS INT)):boolean>
--- !query 39 output
+-- !query output
 true
 
 
--- !query 40
+-- !query
 SELECT cast(1 as bigint) <=> true FROM t
--- !query 40 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) <=> CAST(true AS BIGINT)):boolean>
--- !query 40 output
+-- !query output
 true
 
 
--- !query 41
+-- !query
 SELECT cast(1 as float) <=> true FROM t
--- !query 41 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) <=> CAST(true AS FLOAT)):boolean>
--- !query 41 output
+-- !query output
 true
 
 
--- !query 42
+-- !query
 SELECT cast(1 as double) <=> true FROM t
--- !query 42 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <=> CAST(true AS DOUBLE)):boolean>
--- !query 42 output
+-- !query output
 true
 
 
--- !query 43
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> true FROM t
--- !query 43 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(true AS DECIMAL(10,0))):boolean>
--- !query 43 output
+-- !query output
 true
 
 
--- !query 44
+-- !query
 SELECT cast(1 as string) <=> true FROM t
--- !query 44 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS BOOLEAN) <=> true):boolean>
--- !query 44 output
+-- !query output
 true
 
 
--- !query 45
+-- !query
 SELECT cast('1' as binary) <=> true FROM t
--- !query 45 schema
+-- !query schema
 struct<>
--- !query 45 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) <=> true)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> true)' (binary and boolean).; line 1 pos 7
 
 
--- !query 46
+-- !query
 SELECT cast(1 as boolean) <=> true FROM t
--- !query 46 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) <=> true):boolean>
--- !query 46 output
+-- !query output
 true
 
 
--- !query 47
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> true FROM t
--- !query 47 schema
+-- !query schema
 struct<>
--- !query 47 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> true)' (timestamp and boolean).; line 1 pos 7
 
 
--- !query 48
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <=> true FROM t
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> true)' (date and boolean).; line 1 pos 7
 
 
--- !query 49
+-- !query
 SELECT false = cast(0 as tinyint) FROM t
--- !query 49 schema
+-- !query schema
 struct<(CAST(false AS TINYINT) = CAST(0 AS TINYINT)):boolean>
--- !query 49 output
+-- !query output
 true
 
 
--- !query 50
+-- !query
 SELECT false = cast(0 as smallint) FROM t
--- !query 50 schema
+-- !query schema
 struct<(CAST(false AS SMALLINT) = CAST(0 AS SMALLINT)):boolean>
--- !query 50 output
+-- !query output
 true
 
 
--- !query 51
+-- !query
 SELECT false = cast(0 as int) FROM t
--- !query 51 schema
+-- !query schema
 struct<(CAST(false AS INT) = CAST(0 AS INT)):boolean>
--- !query 51 output
+-- !query output
 true
 
 
--- !query 52
+-- !query
 SELECT false = cast(0 as bigint) FROM t
--- !query 52 schema
+-- !query schema
 struct<(CAST(false AS BIGINT) = CAST(0 AS BIGINT)):boolean>
--- !query 52 output
+-- !query output
 true
 
 
--- !query 53
+-- !query
 SELECT false = cast(0 as float) FROM t
--- !query 53 schema
+-- !query schema
 struct<(CAST(false AS FLOAT) = CAST(0 AS FLOAT)):boolean>
--- !query 53 output
+-- !query output
 true
 
 
--- !query 54
+-- !query
 SELECT false = cast(0 as double) FROM t
--- !query 54 schema
+-- !query schema
 struct<(CAST(false AS DOUBLE) = CAST(0 AS DOUBLE)):boolean>
--- !query 54 output
+-- !query output
 true
 
 
--- !query 55
+-- !query
 SELECT false = cast(0 as decimal(10, 0)) FROM t
--- !query 55 schema
+-- !query schema
 struct<(CAST(false AS DECIMAL(10,0)) = CAST(0 AS DECIMAL(10,0))):boolean>
--- !query 55 output
+-- !query output
 true
 
 
--- !query 56
+-- !query
 SELECT false = cast(0 as string) FROM t
--- !query 56 schema
+-- !query schema
 struct<(false = CAST(CAST(0 AS STRING) AS BOOLEAN)):boolean>
--- !query 56 output
+-- !query output
 true
 
 
--- !query 57
+-- !query
 SELECT false = cast('0' as binary) FROM t
--- !query 57 schema
+-- !query schema
 struct<>
--- !query 57 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(false = CAST('0' AS BINARY))' due to data type mismatch: differing types in '(false = CAST('0' AS BINARY))' (boolean and binary).; line 1 pos 7
 
 
--- !query 58
+-- !query
 SELECT false = cast(0 as boolean) FROM t
--- !query 58 schema
+-- !query schema
 struct<(false = CAST(0 AS BOOLEAN)):boolean>
--- !query 58 output
+-- !query output
 true
 
 
--- !query 59
+-- !query
 SELECT false = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 59 schema
+-- !query schema
 struct<>
--- !query 59 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(false = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(false = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
 
 
--- !query 60
+-- !query
 SELECT false = cast('2017-12-11 09:30:00' as date) FROM t
--- !query 60 schema
+-- !query schema
 struct<>
--- !query 60 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(false = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(false = CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
 
 
--- !query 61
+-- !query
 SELECT false <=> cast(0 as tinyint) FROM t
--- !query 61 schema
+-- !query schema
 struct<(CAST(false AS TINYINT) <=> CAST(0 AS TINYINT)):boolean>
--- !query 61 output
+-- !query output
 true
 
 
--- !query 62
+-- !query
 SELECT false <=> cast(0 as smallint) FROM t
--- !query 62 schema
+-- !query schema
 struct<(CAST(false AS SMALLINT) <=> CAST(0 AS SMALLINT)):boolean>
--- !query 62 output
+-- !query output
 true
 
 
--- !query 63
+-- !query
 SELECT false <=> cast(0 as int) FROM t
--- !query 63 schema
+-- !query schema
 struct<(CAST(false AS INT) <=> CAST(0 AS INT)):boolean>
--- !query 63 output
+-- !query output
 true
 
 
--- !query 64
+-- !query
 SELECT false <=> cast(0 as bigint) FROM t
--- !query 64 schema
+-- !query schema
 struct<(CAST(false AS BIGINT) <=> CAST(0 AS BIGINT)):boolean>
--- !query 64 output
+-- !query output
 true
 
 
--- !query 65
+-- !query
 SELECT false <=> cast(0 as float) FROM t
--- !query 65 schema
+-- !query schema
 struct<(CAST(false AS FLOAT) <=> CAST(0 AS FLOAT)):boolean>
--- !query 65 output
+-- !query output
 true
 
 
--- !query 66
+-- !query
 SELECT false <=> cast(0 as double) FROM t
--- !query 66 schema
+-- !query schema
 struct<(CAST(false AS DOUBLE) <=> CAST(0 AS DOUBLE)):boolean>
--- !query 66 output
+-- !query output
 true
 
 
--- !query 67
+-- !query
 SELECT false <=> cast(0 as decimal(10, 0)) FROM t
--- !query 67 schema
+-- !query schema
 struct<(CAST(false AS DECIMAL(10,0)) <=> CAST(0 AS DECIMAL(10,0))):boolean>
--- !query 67 output
+-- !query output
 true
 
 
--- !query 68
+-- !query
 SELECT false <=> cast(0 as string) FROM t
--- !query 68 schema
+-- !query schema
 struct<(false <=> CAST(CAST(0 AS STRING) AS BOOLEAN)):boolean>
--- !query 68 output
+-- !query output
 true
 
 
--- !query 69
+-- !query
 SELECT false <=> cast('0' as binary) FROM t
--- !query 69 schema
+-- !query schema
 struct<>
--- !query 69 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(false <=> CAST('0' AS BINARY))' due to data type mismatch: differing types in '(false <=> CAST('0' AS BINARY))' (boolean and binary).; line 1 pos 7
 
 
--- !query 70
+-- !query
 SELECT false <=> cast(0 as boolean) FROM t
--- !query 70 schema
+-- !query schema
 struct<(false <=> CAST(0 AS BOOLEAN)):boolean>
--- !query 70 output
+-- !query output
 true
 
 
--- !query 71
+-- !query
 SELECT false <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 71 schema
+-- !query schema
 struct<>
--- !query 71 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(false <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(false <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
 
 
--- !query 72
+-- !query
 SELECT false <=> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 72 schema
+-- !query schema
 struct<>
--- !query 72 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(false <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(false <=> CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
 
 
--- !query 73
+-- !query
 SELECT cast(0 as tinyint) = false FROM t
--- !query 73 schema
+-- !query schema
 struct<(CAST(0 AS TINYINT) = CAST(false AS TINYINT)):boolean>
--- !query 73 output
+-- !query output
 true
 
 
--- !query 74
+-- !query
 SELECT cast(0 as smallint) = false FROM t
--- !query 74 schema
+-- !query schema
 struct<(CAST(0 AS SMALLINT) = CAST(false AS SMALLINT)):boolean>
--- !query 74 output
+-- !query output
 true
 
 
--- !query 75
+-- !query
 SELECT cast(0 as int) = false FROM t
--- !query 75 schema
+-- !query schema
 struct<(CAST(0 AS INT) = CAST(false AS INT)):boolean>
--- !query 75 output
+-- !query output
 true
 
 
--- !query 76
+-- !query
 SELECT cast(0 as bigint) = false FROM t
--- !query 76 schema
+-- !query schema
 struct<(CAST(0 AS BIGINT) = CAST(false AS BIGINT)):boolean>
--- !query 76 output
+-- !query output
 true
 
 
--- !query 77
+-- !query
 SELECT cast(0 as float) = false FROM t
--- !query 77 schema
+-- !query schema
 struct<(CAST(0 AS FLOAT) = CAST(false AS FLOAT)):boolean>
--- !query 77 output
+-- !query output
 true
 
 
--- !query 78
+-- !query
 SELECT cast(0 as double) = false FROM t
--- !query 78 schema
+-- !query schema
 struct<(CAST(0 AS DOUBLE) = CAST(false AS DOUBLE)):boolean>
--- !query 78 output
+-- !query output
 true
 
 
--- !query 79
+-- !query
 SELECT cast(0 as decimal(10, 0)) = false FROM t
--- !query 79 schema
+-- !query schema
 struct<(CAST(0 AS DECIMAL(10,0)) = CAST(false AS DECIMAL(10,0))):boolean>
--- !query 79 output
+-- !query output
 true
 
 
--- !query 80
+-- !query
 SELECT cast(0 as string) = false FROM t
--- !query 80 schema
+-- !query schema
 struct<(CAST(CAST(0 AS STRING) AS BOOLEAN) = false):boolean>
--- !query 80 output
+-- !query output
 true
 
 
--- !query 81
+-- !query
 SELECT cast('0' as binary) = false FROM t
--- !query 81 schema
+-- !query schema
 struct<>
--- !query 81 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('0' AS BINARY) = false)' due to data type mismatch: differing types in '(CAST('0' AS BINARY) = false)' (binary and boolean).; line 1 pos 7
 
 
--- !query 82
+-- !query
 SELECT cast(0 as boolean) = false FROM t
--- !query 82 schema
+-- !query schema
 struct<(CAST(0 AS BOOLEAN) = false):boolean>
--- !query 82 output
+-- !query output
 true
 
 
--- !query 83
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) = false FROM t
--- !query 83 schema
+-- !query schema
 struct<>
--- !query 83 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = false)' (timestamp and boolean).; line 1 pos 7
 
 
--- !query 84
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) = false FROM t
--- !query 84 schema
+-- !query schema
 struct<>
--- !query 84 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = false)' (date and boolean).; line 1 pos 7
 
 
--- !query 85
+-- !query
 SELECT cast(0 as tinyint) <=> false FROM t
--- !query 85 schema
+-- !query schema
 struct<(CAST(0 AS TINYINT) <=> CAST(false AS TINYINT)):boolean>
--- !query 85 output
+-- !query output
 true
 
 
--- !query 86
+-- !query
 SELECT cast(0 as smallint) <=> false FROM t
--- !query 86 schema
+-- !query schema
 struct<(CAST(0 AS SMALLINT) <=> CAST(false AS SMALLINT)):boolean>
--- !query 86 output
+-- !query output
 true
 
 
--- !query 87
+-- !query
 SELECT cast(0 as int) <=> false FROM t
--- !query 87 schema
+-- !query schema
 struct<(CAST(0 AS INT) <=> CAST(false AS INT)):boolean>
--- !query 87 output
+-- !query output
 true
 
 
--- !query 88
+-- !query
 SELECT cast(0 as bigint) <=> false FROM t
--- !query 88 schema
+-- !query schema
 struct<(CAST(0 AS BIGINT) <=> CAST(false AS BIGINT)):boolean>
--- !query 88 output
+-- !query output
 true
 
 
--- !query 89
+-- !query
 SELECT cast(0 as float) <=> false FROM t
--- !query 89 schema
+-- !query schema
 struct<(CAST(0 AS FLOAT) <=> CAST(false AS FLOAT)):boolean>
--- !query 89 output
+-- !query output
 true
 
 
--- !query 90
+-- !query
 SELECT cast(0 as double) <=> false FROM t
--- !query 90 schema
+-- !query schema
 struct<(CAST(0 AS DOUBLE) <=> CAST(false AS DOUBLE)):boolean>
--- !query 90 output
+-- !query output
 true
 
 
--- !query 91
+-- !query
 SELECT cast(0 as decimal(10, 0)) <=> false FROM t
--- !query 91 schema
+-- !query schema
 struct<(CAST(0 AS DECIMAL(10,0)) <=> CAST(false AS DECIMAL(10,0))):boolean>
--- !query 91 output
+-- !query output
 true
 
 
--- !query 92
+-- !query
 SELECT cast(0 as string) <=> false FROM t
--- !query 92 schema
+-- !query schema
 struct<(CAST(CAST(0 AS STRING) AS BOOLEAN) <=> false):boolean>
--- !query 92 output
+-- !query output
 true
 
 
--- !query 93
+-- !query
 SELECT cast('0' as binary) <=> false FROM t
--- !query 93 schema
+-- !query schema
 struct<>
--- !query 93 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('0' AS BINARY) <=> false)' due to data type mismatch: differing types in '(CAST('0' AS BINARY) <=> false)' (binary and boolean).; line 1 pos 7
 
 
--- !query 94
+-- !query
 SELECT cast(0 as boolean) <=> false FROM t
--- !query 94 schema
+-- !query schema
 struct<(CAST(0 AS BOOLEAN) <=> false):boolean>
--- !query 94 output
+-- !query output
 true
 
 
--- !query 95
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> false FROM t
--- !query 95 schema
+-- !query schema
 struct<>
--- !query 95 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> false)' (timestamp and boolean).; line 1 pos 7
 
 
--- !query 96
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <=> false FROM t
--- !query 96 schema
+-- !query schema
 struct<>
--- !query 96 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> false)' (date and boolean).; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
index 1e1cbc3304141..18d97c2f1b42a 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
@@ -2,1231 +2,1231 @@
 -- Number of queries: 145
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as tinyint) END FROM t
--- !query 1 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS TINYINT) END:tinyint>
--- !query 1 output
+-- !query output
 1
 
 
--- !query 2
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as smallint) END FROM t
--- !query 2 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS SMALLINT) ELSE CAST(2 AS SMALLINT) END:smallint>
--- !query 2 output
+-- !query output
 1
 
 
--- !query 3
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as int) END FROM t
--- !query 3 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS INT) ELSE CAST(2 AS INT) END:int>
--- !query 3 output
+-- !query output
 1
 
 
--- !query 4
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as bigint) END FROM t
--- !query 4 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS BIGINT) ELSE CAST(2 AS BIGINT) END:bigint>
--- !query 4 output
+-- !query output
 1
 
 
--- !query 5
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as float) END FROM t
--- !query 5 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
--- !query 5 output
+-- !query output
 1.0
 
 
--- !query 6
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as double) END FROM t
--- !query 6 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
--- !query 6 output
+-- !query output
 1.0
 
 
--- !query 7
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 7 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)) ELSE CAST(2 AS DECIMAL(10,0)) END:decimal(10,0)>
--- !query 7 output
+-- !query output
 1
 
 
--- !query 8
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as string) END FROM t
--- !query 8 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS TINYINT) AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 8 output
+-- !query output
 1
 
 
--- !query 9
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2' as binary) END FROM t
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE binary END; line 1 pos 7
 
 
--- !query 10
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as boolean) END FROM t
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE boolean END; line 1 pos 7
 
 
--- !query 11
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE timestamp END; line 1 pos 7
 
 
--- !query 12
+-- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE date END; line 1 pos 7
 
 
--- !query 13
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as tinyint) END FROM t
--- !query 13 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(CAST(2 AS TINYINT) AS SMALLINT) END:smallint>
--- !query 13 output
+-- !query output
 1
 
 
--- !query 14
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as smallint) END FROM t
--- !query 14 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS SMALLINT) END:smallint>
--- !query 14 output
+-- !query output
 1
 
 
--- !query 15
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as int) END FROM t
--- !query 15 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS INT) ELSE CAST(2 AS INT) END:int>
--- !query 15 output
+-- !query output
 1
 
 
--- !query 16
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as bigint) END FROM t
--- !query 16 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS BIGINT) ELSE CAST(2 AS BIGINT) END:bigint>
--- !query 16 output
+-- !query output
 1
 
 
--- !query 17
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as float) END FROM t
--- !query 17 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
--- !query 17 output
+-- !query output
 1.0
 
 
--- !query 18
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as double) END FROM t
--- !query 18 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
--- !query 18 output
+-- !query output
 1.0
 
 
--- !query 19
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 19 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)) ELSE CAST(2 AS DECIMAL(10,0)) END:decimal(10,0)>
--- !query 19 output
+-- !query output
 1
 
 
--- !query 20
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as string) END FROM t
--- !query 20 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS SMALLINT) AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 20 output
+-- !query output
 1
 
 
--- !query 21
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2' as binary) END FROM t
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE binary END; line 1 pos 7
 
 
--- !query 22
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as boolean) END FROM t
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE boolean END; line 1 pos 7
 
 
--- !query 23
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE timestamp END; line 1 pos 7
 
 
--- !query 24
+-- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE date END; line 1 pos 7
 
 
--- !query 25
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as tinyint) END FROM t
--- !query 25 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(CAST(2 AS TINYINT) AS INT) END:int>
--- !query 25 output
+-- !query output
 1
 
 
--- !query 26
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as smallint) END FROM t
--- !query 26 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(CAST(2 AS SMALLINT) AS INT) END:int>
--- !query 26 output
+-- !query output
 1
 
 
--- !query 27
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as int) END FROM t
--- !query 27 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS INT) END:int>
--- !query 27 output
+-- !query output
 1
 
 
--- !query 28
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as bigint) END FROM t
--- !query 28 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS BIGINT) ELSE CAST(2 AS BIGINT) END:bigint>
--- !query 28 output
+-- !query output
 1
 
 
--- !query 29
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as float) END FROM t
--- !query 29 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
--- !query 29 output
+-- !query output
 1.0
 
 
--- !query 30
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as double) END FROM t
--- !query 30 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
--- !query 30 output
+-- !query output
 1.0
 
 
--- !query 31
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 31 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS DECIMAL(10,0)) ELSE CAST(2 AS DECIMAL(10,0)) END:decimal(10,0)>
--- !query 31 output
+-- !query output
 1
 
 
--- !query 32
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as string) END FROM t
--- !query 32 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS INT) AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 32 output
+-- !query output
 1
 
 
--- !query 33
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2' as binary) END FROM t
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE binary END; line 1 pos 7
 
 
--- !query 34
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as boolean) END FROM t
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE boolean END; line 1 pos 7
 
 
--- !query 35
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 35 schema
+-- !query schema
 struct<>
--- !query 35 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE timestamp END; line 1 pos 7
 
 
--- !query 36
+-- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE date END; line 1 pos 7
 
 
--- !query 37
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as tinyint) END FROM t
--- !query 37 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(CAST(2 AS TINYINT) AS BIGINT) END:bigint>
--- !query 37 output
+-- !query output
 1
 
 
--- !query 38
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as smallint) END FROM t
--- !query 38 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(CAST(2 AS SMALLINT) AS BIGINT) END:bigint>
--- !query 38 output
+-- !query output
 1
 
 
--- !query 39
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as int) END FROM t
--- !query 39 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(CAST(2 AS INT) AS BIGINT) END:bigint>
--- !query 39 output
+-- !query output
 1
 
 
--- !query 40
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as bigint) END FROM t
--- !query 40 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BIGINT) END:bigint>
--- !query 40 output
+-- !query output
 1
 
 
--- !query 41
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as float) END FROM t
--- !query 41 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS BIGINT) AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
--- !query 41 output
+-- !query output
 1.0
 
 
--- !query 42
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as double) END FROM t
--- !query 42 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS BIGINT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
--- !query 42 output
+-- !query output
 1.0
 
 
--- !query 43
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 43 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) ELSE CAST(CAST(2 AS DECIMAL(10,0)) AS DECIMAL(20,0)) END:decimal(20,0)>
--- !query 43 output
+-- !query output
 1
 
 
--- !query 44
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as string) END FROM t
--- !query 44 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS BIGINT) AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 44 output
+-- !query output
 1
 
 
--- !query 45
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2' as binary) END FROM t
--- !query 45 schema
+-- !query schema
 struct<>
--- !query 45 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE binary END; line 1 pos 7
 
 
--- !query 46
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as boolean) END FROM t
--- !query 46 schema
+-- !query schema
 struct<>
--- !query 46 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE boolean END; line 1 pos 7
 
 
--- !query 47
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 47 schema
+-- !query schema
 struct<>
--- !query 47 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE timestamp END; line 1 pos 7
 
 
--- !query 48
+-- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE date END; line 1 pos 7
 
 
--- !query 49
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as tinyint) END FROM t
--- !query 49 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(CAST(2 AS TINYINT) AS FLOAT) END:float>
--- !query 49 output
+-- !query output
 1.0
 
 
--- !query 50
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as smallint) END FROM t
--- !query 50 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(CAST(2 AS SMALLINT) AS FLOAT) END:float>
--- !query 50 output
+-- !query output
 1.0
 
 
--- !query 51
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as int) END FROM t
--- !query 51 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(CAST(2 AS INT) AS FLOAT) END:float>
--- !query 51 output
+-- !query output
 1.0
 
 
--- !query 52
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as bigint) END FROM t
--- !query 52 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(CAST(2 AS BIGINT) AS FLOAT) END:float>
--- !query 52 output
+-- !query output
 1.0
 
 
--- !query 53
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as float) END FROM t
--- !query 53 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS FLOAT) END:float>
--- !query 53 output
+-- !query output
 1.0
 
 
--- !query 54
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as double) END FROM t
--- !query 54 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS FLOAT) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
--- !query 54 output
+-- !query output
 1.0
 
 
--- !query 55
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 55 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS FLOAT) AS DOUBLE) ELSE CAST(CAST(2 AS DECIMAL(10,0)) AS DOUBLE) END:double>
--- !query 55 output
+-- !query output
 1.0
 
 
--- !query 56
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as string) END FROM t
--- !query 56 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS FLOAT) AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 56 output
+-- !query output
 1.0
 
 
--- !query 57
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2' as binary) END FROM t
--- !query 57 schema
+-- !query schema
 struct<>
--- !query 57 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE binary END; line 1 pos 7
 
 
--- !query 58
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as boolean) END FROM t
--- !query 58 schema
+-- !query schema
 struct<>
--- !query 58 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE boolean END; line 1 pos 7
 
 
--- !query 59
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 59 schema
+-- !query schema
 struct<>
--- !query 59 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE timestamp END; line 1 pos 7
 
 
--- !query 60
+-- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 60 schema
+-- !query schema
 struct<>
--- !query 60 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE date END; line 1 pos 7
 
 
--- !query 61
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as tinyint) END FROM t
--- !query 61 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS TINYINT) AS DOUBLE) END:double>
--- !query 61 output
+-- !query output
 1.0
 
 
--- !query 62
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as smallint) END FROM t
--- !query 62 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS SMALLINT) AS DOUBLE) END:double>
--- !query 62 output
+-- !query output
 1.0
 
 
--- !query 63
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as int) END FROM t
--- !query 63 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS INT) AS DOUBLE) END:double>
--- !query 63 output
+-- !query output
 1.0
 
 
--- !query 64
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as bigint) END FROM t
--- !query 64 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS BIGINT) AS DOUBLE) END:double>
--- !query 64 output
+-- !query output
 1.0
 
 
--- !query 65
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as float) END FROM t
--- !query 65 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS FLOAT) AS DOUBLE) END:double>
--- !query 65 output
+-- !query output
 1.0
 
 
--- !query 66
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as double) END FROM t
--- !query 66 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
--- !query 66 output
+-- !query output
 1.0
 
 
--- !query 67
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 67 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(CAST(2 AS DECIMAL(10,0)) AS DOUBLE) END:double>
--- !query 67 output
+-- !query output
 1.0
 
 
--- !query 68
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as string) END FROM t
--- !query 68 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS DOUBLE) AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 68 output
+-- !query output
 1.0
 
 
--- !query 69
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2' as binary) END FROM t
--- !query 69 schema
+-- !query schema
 struct<>
--- !query 69 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE binary END; line 1 pos 7
 
 
--- !query 70
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as boolean) END FROM t
--- !query 70 schema
+-- !query schema
 struct<>
--- !query 70 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE boolean END; line 1 pos 7
 
 
--- !query 71
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 71 schema
+-- !query schema
 struct<>
--- !query 71 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE timestamp END; line 1 pos 7
 
 
--- !query 72
+-- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 72 schema
+-- !query schema
 struct<>
--- !query 72 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE date END; line 1 pos 7
 
 
--- !query 73
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as tinyint) END FROM t
--- !query 73 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(CAST(2 AS TINYINT) AS DECIMAL(10,0)) END:decimal(10,0)>
--- !query 73 output
+-- !query output
 1
 
 
--- !query 74
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as smallint) END FROM t
--- !query 74 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(CAST(2 AS SMALLINT) AS DECIMAL(10,0)) END:decimal(10,0)>
--- !query 74 output
+-- !query output
 1
 
 
--- !query 75
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as int) END FROM t
--- !query 75 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(CAST(2 AS INT) AS DECIMAL(10,0)) END:decimal(10,0)>
--- !query 75 output
+-- !query output
 1
 
 
--- !query 76
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as bigint) END FROM t
--- !query 76 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) ELSE CAST(CAST(2 AS BIGINT) AS DECIMAL(20,0)) END:decimal(20,0)>
--- !query 76 output
+-- !query output
 1
 
 
--- !query 77
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as float) END FROM t
--- !query 77 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) ELSE CAST(CAST(2 AS FLOAT) AS DOUBLE) END:double>
--- !query 77 output
+-- !query output
 1.0
 
 
--- !query 78
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as double) END FROM t
--- !query 78 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) ELSE CAST(2 AS DOUBLE) END:double>
--- !query 78 output
+-- !query output
 1.0
 
 
--- !query 79
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 79 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS DECIMAL(10,0)) END:decimal(10,0)>
--- !query 79 output
+-- !query output
 1
 
 
--- !query 80
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as string) END FROM t
--- !query 80 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 80 output
+-- !query output
 1
 
 
--- !query 81
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2' as binary) END FROM t
--- !query 81 schema
+-- !query schema
 struct<>
--- !query 81 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE binary END; line 1 pos 7
 
 
--- !query 82
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as boolean) END FROM t
--- !query 82 schema
+-- !query schema
 struct<>
--- !query 82 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE boolean END; line 1 pos 7
 
 
--- !query 83
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 83 schema
+-- !query schema
 struct<>
--- !query 83 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE timestamp END; line 1 pos 7
 
 
--- !query 84
+-- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 84 schema
+-- !query schema
 struct<>
--- !query 84 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE date END; line 1 pos 7
 
 
--- !query 85
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as tinyint) END FROM t
--- !query 85 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS TINYINT) AS STRING) END:string>
--- !query 85 output
+-- !query output
 1
 
 
--- !query 86
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as smallint) END FROM t
--- !query 86 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS SMALLINT) AS STRING) END:string>
--- !query 86 output
+-- !query output
 1
 
 
--- !query 87
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as int) END FROM t
--- !query 87 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS INT) AS STRING) END:string>
--- !query 87 output
+-- !query output
 1
 
 
--- !query 88
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as bigint) END FROM t
--- !query 88 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS BIGINT) AS STRING) END:string>
--- !query 88 output
+-- !query output
 1
 
 
--- !query 89
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as float) END FROM t
--- !query 89 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS FLOAT) AS STRING) END:string>
--- !query 89 output
+-- !query output
 1
 
 
--- !query 90
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as double) END FROM t
--- !query 90 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS DOUBLE) AS STRING) END:string>
--- !query 90 output
+-- !query output
 1
 
 
--- !query 91
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 91 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2 AS DECIMAL(10,0)) AS STRING) END:string>
--- !query 91 output
+-- !query output
 1
 
 
--- !query 92
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as string) END FROM t
--- !query 92 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 92 output
+-- !query output
 1
 
 
--- !query 93
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END FROM t
--- !query 93 schema
+-- !query schema
 struct<>
--- !query 93 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN string ELSE binary END; line 1 pos 7
 
 
--- !query 94
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END FROM t
--- !query 94 schema
+-- !query schema
 struct<>
--- !query 94 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN string ELSE boolean END; line 1 pos 7
 
 
--- !query 95
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 95 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) END:string>
--- !query 95 output
+-- !query output
 1
 
 
--- !query 96
+-- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 96 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) END:string>
--- !query 96 output
+-- !query output
 1
 
 
--- !query 97
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as tinyint) END FROM t
--- !query 97 schema
+-- !query schema
 struct<>
--- !query 97 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE tinyint END; line 1 pos 7
 
 
--- !query 98
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as smallint) END FROM t
--- !query 98 schema
+-- !query schema
 struct<>
--- !query 98 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE smallint END; line 1 pos 7
 
 
--- !query 99
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as int) END FROM t
--- !query 99 schema
+-- !query schema
 struct<>
--- !query 99 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE int END; line 1 pos 7
 
 
--- !query 100
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as bigint) END FROM t
--- !query 100 schema
+-- !query schema
 struct<>
--- !query 100 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE bigint END; line 1 pos 7
 
 
--- !query 101
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as float) END FROM t
--- !query 101 schema
+-- !query schema
 struct<>
--- !query 101 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE float END; line 1 pos 7
 
 
--- !query 102
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as double) END FROM t
--- !query 102 schema
+-- !query schema
 struct<>
--- !query 102 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE double END; line 1 pos 7
 
 
--- !query 103
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 103 schema
+-- !query schema
 struct<>
--- !query 103 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE decimal(10,0) END; line 1 pos 7
 
 
--- !query 104
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END FROM t
--- !query 104 schema
+-- !query schema
 struct<>
--- !query 104 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE string END; line 1 pos 7
 
 
--- !query 105
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2' as binary) END FROM t
--- !query 105 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS BINARY) END:binary>
--- !query 105 output
+-- !query output
 1
 
 
--- !query 106
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as boolean) END FROM t
--- !query 106 schema
+-- !query schema
 struct<>
--- !query 106 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE boolean END; line 1 pos 7
 
 
--- !query 107
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 107 schema
+-- !query schema
 struct<>
--- !query 107 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE timestamp END; line 1 pos 7
 
 
--- !query 108
+-- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 108 schema
+-- !query schema
 struct<>
--- !query 108 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE date END; line 1 pos 7
 
 
--- !query 109
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as tinyint) END FROM t
--- !query 109 schema
+-- !query schema
 struct<>
--- !query 109 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE tinyint END; line 1 pos 7
 
 
--- !query 110
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as smallint) END FROM t
--- !query 110 schema
+-- !query schema
 struct<>
--- !query 110 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE smallint END; line 1 pos 7
 
 
--- !query 111
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as int) END FROM t
--- !query 111 schema
+-- !query schema
 struct<>
--- !query 111 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE int END; line 1 pos 7
 
 
--- !query 112
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as bigint) END FROM t
--- !query 112 schema
+-- !query schema
 struct<>
--- !query 112 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE bigint END; line 1 pos 7
 
 
--- !query 113
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as float) END FROM t
--- !query 113 schema
+-- !query schema
 struct<>
--- !query 113 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE float END; line 1 pos 7
 
 
--- !query 114
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as double) END FROM t
--- !query 114 schema
+-- !query schema
 struct<>
--- !query 114 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE double END; line 1 pos 7
 
 
--- !query 115
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 115 schema
+-- !query schema
 struct<>
--- !query 115 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE decimal(10,0) END; line 1 pos 7
 
 
--- !query 116
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END FROM t
--- !query 116 schema
+-- !query schema
 struct<>
--- !query 116 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE string END; line 1 pos 7
 
 
--- !query 117
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2' as binary) END FROM t
--- !query 117 schema
+-- !query schema
 struct<>
--- !query 117 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE binary END; line 1 pos 7
 
 
--- !query 118
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as boolean) END FROM t
--- !query 118 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS BOOLEAN) END:boolean>
--- !query 118 output
+-- !query output
 true
 
 
--- !query 119
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 119 schema
+-- !query schema
 struct<>
--- !query 119 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE timestamp END; line 1 pos 7
 
 
--- !query 120
+-- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 120 schema
+-- !query schema
 struct<>
--- !query 120 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE date END; line 1 pos 7
 
 
--- !query 121
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as tinyint) END FROM t
--- !query 121 schema
+-- !query schema
 struct<>
--- !query 121 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE tinyint END; line 1 pos 7
 
 
--- !query 122
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as smallint) END FROM t
--- !query 122 schema
+-- !query schema
 struct<>
--- !query 122 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE smallint END; line 1 pos 7
 
 
--- !query 123
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as int) END FROM t
--- !query 123 schema
+-- !query schema
 struct<>
--- !query 123 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE int END; line 1 pos 7
 
 
--- !query 124
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as bigint) END FROM t
--- !query 124 schema
+-- !query schema
 struct<>
--- !query 124 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE bigint END; line 1 pos 7
 
 
--- !query 125
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as float) END FROM t
--- !query 125 schema
+-- !query schema
 struct<>
--- !query 125 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE float END; line 1 pos 7
 
 
--- !query 126
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as double) END FROM t
--- !query 126 schema
+-- !query schema
 struct<>
--- !query 126 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE double END; line 1 pos 7
 
 
--- !query 127
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 127 schema
+-- !query schema
 struct<>
--- !query 127 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE decimal(10,0) END; line 1 pos 7
 
 
--- !query 128
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as string) END FROM t
--- !query 128 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 128 output
+-- !query output
 2017-12-12 09:30:00
 
 
--- !query 129
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2' as binary) END FROM t
--- !query 129 schema
+-- !query schema
 struct<>
--- !query 129 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE binary END; line 1 pos 7
 
 
--- !query 130
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as boolean) END FROM t
--- !query 130 schema
+-- !query schema
 struct<>
--- !query 130 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE boolean END; line 1 pos 7
 
 
--- !query 131
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 131 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END:timestamp>
--- !query 131 output
+-- !query output
 2017-12-12 09:30:00
 
 
--- !query 132
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 132 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) END:timestamp>
--- !query 132 output
+-- !query output
 2017-12-12 09:30:00
 
 
--- !query 133
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as tinyint) END FROM t
--- !query 133 schema
+-- !query schema
 struct<>
--- !query 133 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE tinyint END; line 1 pos 7
 
 
--- !query 134
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as smallint) END FROM t
--- !query 134 schema
+-- !query schema
 struct<>
--- !query 134 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE smallint END; line 1 pos 7
 
 
--- !query 135
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as int) END FROM t
--- !query 135 schema
+-- !query schema
 struct<>
--- !query 135 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE int END; line 1 pos 7
 
 
--- !query 136
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as bigint) END FROM t
--- !query 136 schema
+-- !query schema
 struct<>
--- !query 136 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE bigint END; line 1 pos 7
 
 
--- !query 137
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as float) END FROM t
--- !query 137 schema
+-- !query schema
 struct<>
--- !query 137 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE float END; line 1 pos 7
 
 
--- !query 138
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as double) END FROM t
--- !query 138 schema
+-- !query schema
 struct<>
--- !query 138 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE double END; line 1 pos 7
 
 
--- !query 139
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as decimal(10, 0)) END FROM t
--- !query 139 schema
+-- !query schema
 struct<>
--- !query 139 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE decimal(10,0) END; line 1 pos 7
 
 
--- !query 140
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as string) END FROM t
--- !query 140 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) ELSE CAST(2 AS STRING) END:string>
--- !query 140 output
+-- !query output
 2017-12-12
 
 
--- !query 141
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2' as binary) END FROM t
--- !query 141 schema
+-- !query schema
 struct<>
--- !query 141 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE binary END; line 1 pos 7
 
 
--- !query 142
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as boolean) END FROM t
--- !query 142 schema
+-- !query schema
 struct<>
--- !query 142 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE boolean END; line 1 pos 7
 
 
--- !query 143
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query 143 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END:timestamp>
--- !query 143 output
+-- !query output
 2017-12-12 00:00:00
 
 
--- !query 144
+-- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query 144 schema
+-- !query schema
 struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2017-12-11 09:30:00 AS DATE) END:date>
--- !query 144 output
+-- !query output
 2017-12-12
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
index 6c6d3110d7d0d..bd157c474d249 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 14
 
 
--- !query 0
+-- !query
 SELECT (col1 || col2 || col3) col
 FROM (
   SELECT
@@ -11,9 +11,9 @@ FROM (
     encode(string(id + 2), 'utf-8') col3
   FROM range(10)
 )
--- !query 0 schema
+-- !query schema
 struct<col:string>
--- !query 0 output
+-- !query output
 012
 123
 234
@@ -26,7 +26,7 @@ struct<col:string>
 91011
 
 
--- !query 1
+-- !query
 SELECT ((col1 || col2) || (col3 || col4) || col5) col
 FROM (
   SELECT
@@ -37,9 +37,9 @@ FROM (
     CAST(id AS DOUBLE) col5
   FROM range(10)
 )
--- !query 1 schema
+-- !query schema
 struct<col:string>
--- !query 1 output
+-- !query output
 prefix_0120.0
 prefix_1231.0
 prefix_2342.0
@@ -52,7 +52,7 @@ prefix_89108.0
 prefix_910119.0
 
 
--- !query 2
+-- !query
 SELECT ((col1 || col2) || (col3 || col4)) col
 FROM (
   SELECT
@@ -62,9 +62,9 @@ FROM (
     encode(string(id + 3), 'utf-8') col4
   FROM range(10)
 )
--- !query 2 schema
+-- !query schema
 struct<col:string>
--- !query 2 output
+-- !query output
 0123
 1234
 2345
@@ -77,15 +77,15 @@ struct<col:string>
 9101112
 
 
--- !query 3
+-- !query
 set spark.sql.function.concatBinaryAsString=true
--- !query 3 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 3 output
+-- !query output
 spark.sql.function.concatBinaryAsString	true
 
 
--- !query 4
+-- !query
 SELECT (col1 || col2) col
 FROM (
   SELECT
@@ -93,9 +93,9 @@ FROM (
     encode(string(id + 1), 'utf-8') col2
   FROM range(10)
 )
--- !query 4 schema
+-- !query schema
 struct<col:string>
--- !query 4 output
+-- !query output
 01
 12
 23
@@ -108,7 +108,7 @@ struct<col:string>
 910
 
 
--- !query 5
+-- !query
 SELECT (col1 || col2 || col3 || col4) col
 FROM (
   SELECT
@@ -118,9 +118,9 @@ FROM (
     encode(string(id + 3), 'utf-8') col4
   FROM range(10)
 )
--- !query 5 schema
+-- !query schema
 struct<col:string>
--- !query 5 output
+-- !query output
 0123
 1234
 2345
@@ -133,7 +133,7 @@ struct<col:string>
 9101112
 
 
--- !query 6
+-- !query
 SELECT ((col1 || col2) || (col3 || col4)) col
 FROM (
   SELECT
@@ -143,9 +143,9 @@ FROM (
     encode(string(id + 3), 'utf-8') col4
   FROM range(10)
 )
--- !query 6 schema
+-- !query schema
 struct<col:string>
--- !query 6 output
+-- !query output
 0123
 1234
 2345
@@ -158,15 +158,15 @@ struct<col:string>
 9101112
 
 
--- !query 7
+-- !query
 set spark.sql.function.concatBinaryAsString=false
--- !query 7 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 7 output
+-- !query output
 spark.sql.function.concatBinaryAsString	false
 
 
--- !query 8
+-- !query
 SELECT (col1 || col2) col
 FROM (
   SELECT
@@ -174,9 +174,9 @@ FROM (
     encode(string(id + 1), 'utf-8') col2
   FROM range(10)
 )
--- !query 8 schema
+-- !query schema
 struct<col:binary>
--- !query 8 output
+-- !query output
 01
 12
 23
@@ -189,7 +189,7 @@ struct<col:binary>
 910
 
 
--- !query 9
+-- !query
 SELECT (col1 || col2 || col3 || col4) col
 FROM (
   SELECT
@@ -199,9 +199,9 @@ FROM (
     encode(string(id + 3), 'utf-8') col4
   FROM range(10)
 )
--- !query 9 schema
+-- !query schema
 struct<col:binary>
--- !query 9 output
+-- !query output
 0123
 1234
 2345
@@ -214,7 +214,7 @@ struct<col:binary>
 9101112
 
 
--- !query 10
+-- !query
 SELECT ((col1 || col2) || (col3 || col4)) col
 FROM (
   SELECT
@@ -224,9 +224,9 @@ FROM (
     encode(string(id + 3), 'utf-8') col4
   FROM range(10)
 )
--- !query 10 schema
+-- !query schema
 struct<col:binary>
--- !query 10 output
+-- !query output
 0123
 1234
 2345
@@ -239,7 +239,7 @@ struct<col:binary>
 9101112
 
 
--- !query 11
+-- !query
 CREATE TEMPORARY VIEW various_arrays AS SELECT * FROM VALUES (
   array(true, false), array(true),
   array(2Y, 1Y), array(3Y, 4Y),
@@ -272,13 +272,13 @@ CREATE TEMPORARY VIEW various_arrays AS SELECT * FROM VALUES (
   struct_array1, struct_array2,
   map_array1, map_array2
 )
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 SELECT
     (boolean_array1 || boolean_array2) boolean_array,
     (tinyint_array1 || tinyint_array2) tinyint_array,
@@ -295,13 +295,13 @@ SELECT
     (struct_array1 || struct_array2) struct_array,
     (map_array1 || map_array2) map_array
 FROM various_arrays
--- !query 12 schema
+-- !query schema
 struct<boolean_array:array<boolean>,tinyint_array:array<tinyint>,smallint_array:array<smallint>,int_array:array<int>,bigint_array:array<bigint>,decimal_array:array<decimal(19,0)>,double_array:array<double>,float_array:array<float>,data_array:array<date>,timestamp_array:array<timestamp>,string_array:array<string>,array_array:array<array<string>>,struct_array:array<struct<col1:string,col2:int>>,map_array:array<map<string,int>>>
--- !query 12 output
-[true,false,true]	[2,1,3,4]	[2,1,3,4]	[2,1,3,4]	[2,1,3,4]	[9223372036854775809,9223372036854775808,9223372036854775808,9223372036854775809]	[2.0,1.0,3.0,4.0]	[2.0,1.0,3.0,4.0]	[2016-03-14,2016-03-13,2016-03-12,2016-03-11]	[2016-11-15 20:54:00.0,2016-11-12 20:54:00.0,2016-11-11 20:54:00.0]	["a","b","c","d"]	[["a","b"],["c","d"],["e"],["f"]]	[{"col1":"a","col2":1},{"col1":"b","col2":2},{"col1":"c","col2":3},{"col1":"d","col2":4}]	[{"a":1},{"b":2},{"c":3},{"d":4}]
+-- !query output
+[true,false,true]	[2,1,3,4]	[2,1,3,4]	[2,1,3,4]	[2,1,3,4]	[9223372036854775809,9223372036854775808,9223372036854775808,9223372036854775809]	[2.0,1.0,3.0,4.0]	[2.0,1.0,3.0,4.0]	[2016-03-14,2016-03-13,2016-03-12,2016-03-11]	[2016-11-15 20:54:00,2016-11-12 20:54:00,2016-11-11 20:54:00]	["a","b","c","d"]	[["a","b"],["c","d"],["e"],["f"]]	[{"col1":"a","col2":1},{"col1":"b","col2":2},{"col1":"c","col2":3},{"col1":"d","col2":4}]	[{"a":1},{"b":2},{"c":3},{"d":4}]
 
 
--- !query 13
+-- !query
 SELECT
     (tinyint_array1 || smallint_array2) ts_array,
     (smallint_array1 || int_array2) si_array,
@@ -313,7 +313,7 @@ SELECT
     (timestamp_array1 || string_array2) tst_array,
     (string_array1 || int_array2) sti_array
 FROM various_arrays
--- !query 13 schema
+-- !query schema
 struct<ts_array:array<smallint>,si_array:array<int>,ib_array:array<bigint>,bd_array:array<decimal(20,0)>,dd_array:array<double>,df_array:array<double>,std_array:array<string>,tst_array:array<string>,sti_array:array<string>>
--- !query 13 output
+-- !query output
 [2,1,3,4]	[2,1,3,4]	[2,1,3,4]	[2,1,9223372036854775808,9223372036854775809]	[9.223372036854776E18,9.223372036854776E18,3.0,4.0]	[2.0,1.0,3.0,4.0]	["a","b","2016-03-12","2016-03-11"]	["2016-11-15 20:54:00","2016-11-12 20:54:00","c","d"]	["a","b","3","4"]
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out
index a4cd408c04bf8..d5c27ade8e152 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out
@@ -2,348 +2,348 @@
 -- Number of queries: 40
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 select cast(1 as tinyint) + interval 2 day
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) + interval 2 days)' (tinyint and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS TINYINT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS TINYINT)' is of tinyint type.; line 1 pos 7
 
 
--- !query 2
+-- !query
 select cast(1 as smallint) + interval 2 day
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) + interval 2 days)' (smallint and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS SMALLINT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS SMALLINT)' is of smallint type.; line 1 pos 7
 
 
--- !query 3
+-- !query
 select cast(1 as int) + interval 2 day
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS INT) + interval 2 days)' (int and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS INT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS INT)' is of int type.; line 1 pos 7
 
 
--- !query 4
+-- !query
 select cast(1 as bigint) + interval 2 day
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) + interval 2 days)' (bigint and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS BIGINT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS BIGINT)' is of bigint type.; line 1 pos 7
 
 
--- !query 5
+-- !query
 select cast(1 as float) + interval 2 day
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) + interval 2 days)' (float and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS FLOAT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS FLOAT)' is of float type.; line 1 pos 7
 
 
--- !query 6
+-- !query
 select cast(1 as double) + interval 2 day
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) + interval 2 days)' (double and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS DOUBLE) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DOUBLE)' is of double type.; line 1 pos 7
 
 
--- !query 7
+-- !query
 select cast(1 as decimal(10, 0)) + interval 2 day
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + interval 2 days)' (decimal(10,0) and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS DECIMAL(10,0)) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
 
 
--- !query 8
+-- !query
 select cast('2017-12-11' as string) + interval 2 day
--- !query 8 schema
-struct<CAST(CAST(CAST(2017-12-11 AS STRING) AS TIMESTAMP) + interval 2 days AS STRING):string>
--- !query 8 output
+-- !query schema
+struct<CAST(CAST(CAST(2017-12-11 AS STRING) AS TIMESTAMP) + INTERVAL '2 days' AS STRING):string>
+-- !query output
 2017-12-13 00:00:00
 
 
--- !query 9
+-- !query
 select cast('2017-12-11 09:30:00' as string) + interval 2 day
--- !query 9 schema
-struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS STRING) AS TIMESTAMP) + interval 2 days AS STRING):string>
--- !query 9 output
+-- !query schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS STRING) AS TIMESTAMP) + INTERVAL '2 days' AS STRING):string>
+-- !query output
 2017-12-13 09:30:00
 
 
--- !query 10
+-- !query
 select cast('1' as binary) + interval 2 day
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) + interval 2 days)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + interval 2 days)' (binary and interval).; line 1 pos 7
+cannot resolve 'CAST('1' AS BINARY) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST('1' AS BINARY)' is of binary type.; line 1 pos 7
 
 
--- !query 11
+-- !query
 select cast(1 as boolean) + interval 2 day
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) + interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) + interval 2 days)' (boolean and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS BOOLEAN) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS BOOLEAN)' is of boolean type.; line 1 pos 7
 
 
--- !query 12
+-- !query
 select cast('2017-12-11 09:30:00.0' as timestamp) + interval 2 day
--- !query 12 schema
-struct<CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + interval 2 days AS TIMESTAMP):timestamp>
--- !query 12 output
+-- !query schema
+struct<CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + INTERVAL '2 days' AS TIMESTAMP):timestamp>
+-- !query output
 2017-12-13 09:30:00
 
 
--- !query 13
+-- !query
 select cast('2017-12-11 09:30:00' as date) + interval 2 day
--- !query 13 schema
-struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) + interval 2 days AS DATE):date>
--- !query 13 output
+-- !query schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) + INTERVAL '2 days' AS DATE):date>
+-- !query output
 2017-12-13
 
 
--- !query 14
+-- !query
 select interval 2 day + cast(1 as tinyint)
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(interval 2 days + CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS TINYINT))' (interval and tinyint).; line 1 pos 7
+cannot resolve 'CAST(1 AS TINYINT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS TINYINT)' is of tinyint type.; line 1 pos 7
 
 
--- !query 15
+-- !query
 select interval 2 day + cast(1 as smallint)
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(interval 2 days + CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS SMALLINT))' (interval and smallint).; line 1 pos 7
+cannot resolve 'CAST(1 AS SMALLINT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS SMALLINT)' is of smallint type.; line 1 pos 7
 
 
--- !query 16
+-- !query
 select interval 2 day + cast(1 as int)
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(interval 2 days + CAST(1 AS INT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS INT))' (interval and int).; line 1 pos 7
+cannot resolve 'CAST(1 AS INT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS INT)' is of int type.; line 1 pos 7
 
 
--- !query 17
+-- !query
 select interval 2 day + cast(1 as bigint)
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(interval 2 days + CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS BIGINT))' (interval and bigint).; line 1 pos 7
+cannot resolve 'CAST(1 AS BIGINT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS BIGINT)' is of bigint type.; line 1 pos 7
 
 
--- !query 18
+-- !query
 select interval 2 day + cast(1 as float)
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(interval 2 days + CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS FLOAT))' (interval and float).; line 1 pos 7
+cannot resolve 'CAST(1 AS FLOAT) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS FLOAT)' is of float type.; line 1 pos 7
 
 
--- !query 19
+-- !query
 select interval 2 day + cast(1 as double)
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(interval 2 days + CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS DOUBLE))' (interval and double).; line 1 pos 7
+cannot resolve 'CAST(1 AS DOUBLE) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DOUBLE)' is of double type.; line 1 pos 7
 
 
--- !query 20
+-- !query
 select interval 2 day + cast(1 as decimal(10, 0))
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(interval 2 days + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS DECIMAL(10,0)))' (interval and decimal(10,0)).; line 1 pos 7
+cannot resolve 'CAST(1 AS DECIMAL(10,0)) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
 
 
--- !query 21
+-- !query
 select interval 2 day + cast('2017-12-11' as string)
--- !query 21 schema
-struct<CAST(CAST(CAST(2017-12-11 AS STRING) AS TIMESTAMP) + interval 2 days AS STRING):string>
--- !query 21 output
+-- !query schema
+struct<CAST(CAST(CAST(2017-12-11 AS STRING) AS TIMESTAMP) + INTERVAL '2 days' AS STRING):string>
+-- !query output
 2017-12-13 00:00:00
 
 
--- !query 22
+-- !query
 select interval 2 day + cast('2017-12-11 09:30:00' as string)
--- !query 22 schema
-struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS STRING) AS TIMESTAMP) + interval 2 days AS STRING):string>
--- !query 22 output
+-- !query schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS STRING) AS TIMESTAMP) + INTERVAL '2 days' AS STRING):string>
+-- !query output
 2017-12-13 09:30:00
 
 
--- !query 23
+-- !query
 select interval 2 day + cast('1' as binary)
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(interval 2 days + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(interval 2 days + CAST('1' AS BINARY))' (interval and binary).; line 1 pos 7
+cannot resolve 'CAST('1' AS BINARY) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST('1' AS BINARY)' is of binary type.; line 1 pos 7
 
 
--- !query 24
+-- !query
 select interval 2 day + cast(1 as boolean)
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(interval 2 days + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(interval 2 days + CAST(1 AS BOOLEAN))' (interval and boolean).; line 1 pos 7
+cannot resolve 'CAST(1 AS BOOLEAN) + INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS BOOLEAN)' is of boolean type.; line 1 pos 7
 
 
--- !query 25
+-- !query
 select interval 2 day + cast('2017-12-11 09:30:00.0' as timestamp)
--- !query 25 schema
-struct<CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + interval 2 days AS TIMESTAMP):timestamp>
--- !query 25 output
+-- !query schema
+struct<CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + INTERVAL '2 days' AS TIMESTAMP):timestamp>
+-- !query output
 2017-12-13 09:30:00
 
 
--- !query 26
+-- !query
 select interval 2 day + cast('2017-12-11 09:30:00' as date)
--- !query 26 schema
-struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) + interval 2 days AS DATE):date>
--- !query 26 output
+-- !query schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) + INTERVAL '2 days' AS DATE):date>
+-- !query output
 2017-12-13
 
 
--- !query 27
+-- !query
 select cast(1 as tinyint) - interval 2 day
--- !query 27 schema
+-- !query schema
 struct<>
--- !query 27 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) - interval 2 days)' (tinyint and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS TINYINT) - INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS TINYINT)' is of tinyint type.; line 1 pos 7
 
 
--- !query 28
+-- !query
 select cast(1 as smallint) - interval 2 day
--- !query 28 schema
+-- !query schema
 struct<>
--- !query 28 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) - interval 2 days)' (smallint and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS SMALLINT) - INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS SMALLINT)' is of smallint type.; line 1 pos 7
 
 
--- !query 29
+-- !query
 select cast(1 as int) - interval 2 day
--- !query 29 schema
+-- !query schema
 struct<>
--- !query 29 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS INT) - interval 2 days)' (int and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS INT) - INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS INT)' is of int type.; line 1 pos 7
 
 
--- !query 30
+-- !query
 select cast(1 as bigint) - interval 2 day
--- !query 30 schema
+-- !query schema
 struct<>
--- !query 30 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) - interval 2 days)' (bigint and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS BIGINT) - INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS BIGINT)' is of bigint type.; line 1 pos 7
 
 
--- !query 31
+-- !query
 select cast(1 as float) - interval 2 day
--- !query 31 schema
+-- !query schema
 struct<>
--- !query 31 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) - interval 2 days)' (float and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS FLOAT) - INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS FLOAT)' is of float type.; line 1 pos 7
 
 
--- !query 32
+-- !query
 select cast(1 as double) - interval 2 day
--- !query 32 schema
+-- !query schema
 struct<>
--- !query 32 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) - interval 2 days)' (double and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS DOUBLE) - INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DOUBLE)' is of double type.; line 1 pos 7
 
 
--- !query 33
+-- !query
 select cast(1 as decimal(10, 0)) - interval 2 day
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - interval 2 days)' (decimal(10,0) and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS DECIMAL(10,0)) - INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
 
 
--- !query 34
+-- !query
 select cast('2017-12-11' as string) - interval 2 day
--- !query 34 schema
-struct<CAST(CAST(CAST(2017-12-11 AS STRING) AS TIMESTAMP) - interval 2 days AS STRING):string>
--- !query 34 output
+-- !query schema
+struct<CAST(CAST(CAST(2017-12-11 AS STRING) AS TIMESTAMP) - INTERVAL '2 days' AS STRING):string>
+-- !query output
 2017-12-09 00:00:00
 
 
--- !query 35
+-- !query
 select cast('2017-12-11 09:30:00' as string) - interval 2 day
--- !query 35 schema
-struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS STRING) AS TIMESTAMP) - interval 2 days AS STRING):string>
--- !query 35 output
+-- !query schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS STRING) AS TIMESTAMP) - INTERVAL '2 days' AS STRING):string>
+-- !query output
 2017-12-09 09:30:00
 
 
--- !query 36
+-- !query
 select cast('1' as binary) - interval 2 day
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) - interval 2 days)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - interval 2 days)' (binary and interval).; line 1 pos 7
+cannot resolve 'CAST('1' AS BINARY) - INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST('1' AS BINARY)' is of binary type.; line 1 pos 7
 
 
--- !query 37
+-- !query
 select cast(1 as boolean) - interval 2 day
--- !query 37 schema
+-- !query schema
 struct<>
--- !query 37 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) - interval 2 days)' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) - interval 2 days)' (boolean and interval).; line 1 pos 7
+cannot resolve 'CAST(1 AS BOOLEAN) - INTERVAL '2 days'' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS BOOLEAN)' is of boolean type.; line 1 pos 7
 
 
--- !query 38
+-- !query
 select cast('2017-12-11 09:30:00.0' as timestamp) - interval 2 day
--- !query 38 schema
-struct<CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - interval 2 days AS TIMESTAMP):timestamp>
--- !query 38 output
+-- !query schema
+struct<CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - INTERVAL '2 days' AS TIMESTAMP):timestamp>
+-- !query output
 2017-12-09 09:30:00
 
 
--- !query 39
+-- !query
 select cast('2017-12-11 09:30:00' as date) - interval 2 day
--- !query 39 schema
-struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) - interval 2 days AS DATE):date>
--- !query 39 output
+-- !query schema
+struct<CAST(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP) - INTERVAL '2 days' AS DATE):date>
+-- !query output
 2017-12-09
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
index 6ee7f59d69877..33bd3850732f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
@@ -2,9513 +2,9513 @@
 -- Number of queries: 1145
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT cast(1 as tinyint) + cast(1 as decimal(3, 0)) FROM t
--- !query 1 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) + CAST(1 AS DECIMAL(3,0))):decimal(4,0)>
--- !query 1 output
+-- !query output
 2
 
 
--- !query 2
+-- !query
 SELECT cast(1 as tinyint) + cast(1 as decimal(5, 0)) FROM t
--- !query 2 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(6,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(6,0))):decimal(6,0)>
--- !query 2 output
+-- !query output
 2
 
 
--- !query 3
+-- !query
 SELECT cast(1 as tinyint) + cast(1 as decimal(10, 0)) FROM t
--- !query 3 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 3 output
+-- !query output
 2
 
 
--- !query 4
+-- !query
 SELECT cast(1 as tinyint) + cast(1 as decimal(20, 0)) FROM t
--- !query 4 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 4 output
+-- !query output
 2
 
 
--- !query 5
+-- !query
 SELECT cast(1 as smallint) + cast(1 as decimal(3, 0)) FROM t
--- !query 5 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(6,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(6,0))):decimal(6,0)>
--- !query 5 output
+-- !query output
 2
 
 
--- !query 6
+-- !query
 SELECT cast(1 as smallint) + cast(1 as decimal(5, 0)) FROM t
--- !query 6 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) + CAST(1 AS DECIMAL(5,0))):decimal(6,0)>
--- !query 6 output
+-- !query output
 2
 
 
--- !query 7
+-- !query
 SELECT cast(1 as smallint) + cast(1 as decimal(10, 0)) FROM t
--- !query 7 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 7 output
+-- !query output
 2
 
 
--- !query 8
+-- !query
 SELECT cast(1 as smallint) + cast(1 as decimal(20, 0)) FROM t
--- !query 8 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 8 output
+-- !query output
 2
 
 
--- !query 9
+-- !query
 SELECT cast(1 as int) + cast(1 as decimal(3, 0)) FROM t
--- !query 9 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 9 output
+-- !query output
 2
 
 
--- !query 10
+-- !query
 SELECT cast(1 as int) + cast(1 as decimal(5, 0)) FROM t
--- !query 10 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 10 output
+-- !query output
 2
 
 
--- !query 11
+-- !query
 SELECT cast(1 as int) + cast(1 as decimal(10, 0)) FROM t
--- !query 11 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) + CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
--- !query 11 output
+-- !query output
 2
 
 
--- !query 12
+-- !query
 SELECT cast(1 as int) + cast(1 as decimal(20, 0)) FROM t
--- !query 12 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 12 output
+-- !query output
 2
 
 
--- !query 13
+-- !query
 SELECT cast(1 as bigint) + cast(1 as decimal(3, 0)) FROM t
--- !query 13 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 13 output
+-- !query output
 2
 
 
--- !query 14
+-- !query
 SELECT cast(1 as bigint) + cast(1 as decimal(5, 0)) FROM t
--- !query 14 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 14 output
+-- !query output
 2
 
 
--- !query 15
+-- !query
 SELECT cast(1 as bigint) + cast(1 as decimal(10, 0)) FROM t
--- !query 15 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 15 output
+-- !query output
 2
 
 
--- !query 16
+-- !query
 SELECT cast(1 as bigint) + cast(1 as decimal(20, 0)) FROM t
--- !query 16 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) + CAST(1 AS DECIMAL(20,0))):decimal(21,0)>
--- !query 16 output
+-- !query output
 2
 
 
--- !query 17
+-- !query
 SELECT cast(1 as float) + cast(1 as decimal(3, 0)) FROM t
--- !query 17 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 17 output
+-- !query output
 2.0
 
 
--- !query 18
+-- !query
 SELECT cast(1 as float) + cast(1 as decimal(5, 0)) FROM t
--- !query 18 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 18 output
+-- !query output
 2.0
 
 
--- !query 19
+-- !query
 SELECT cast(1 as float) + cast(1 as decimal(10, 0)) FROM t
--- !query 19 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 19 output
+-- !query output
 2.0
 
 
--- !query 20
+-- !query
 SELECT cast(1 as float) + cast(1 as decimal(20, 0)) FROM t
--- !query 20 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 20 output
+-- !query output
 2.0
 
 
--- !query 21
+-- !query
 SELECT cast(1 as double) + cast(1 as decimal(3, 0)) FROM t
--- !query 21 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 21 output
+-- !query output
 2.0
 
 
--- !query 22
+-- !query
 SELECT cast(1 as double) + cast(1 as decimal(5, 0)) FROM t
--- !query 22 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 22 output
+-- !query output
 2.0
 
 
--- !query 23
+-- !query
 SELECT cast(1 as double) + cast(1 as decimal(10, 0)) FROM t
--- !query 23 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 23 output
+-- !query output
 2.0
 
 
--- !query 24
+-- !query
 SELECT cast(1 as double) + cast(1 as decimal(20, 0)) FROM t
--- !query 24 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 24 output
+-- !query output
 2.0
 
 
--- !query 25
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(3, 0)) FROM t
--- !query 25 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 25 output
+-- !query output
 2
 
 
--- !query 26
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(5, 0)) FROM t
--- !query 26 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 26 output
+-- !query output
 2
 
 
--- !query 27
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(10, 0)) FROM t
--- !query 27 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
--- !query 27 output
+-- !query output
 2
 
 
--- !query 28
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(20, 0)) FROM t
--- !query 28 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 28 output
+-- !query output
 2
 
 
--- !query 29
+-- !query
 SELECT cast('1' as binary) + cast(1 as decimal(3, 0)) FROM t
--- !query 29 schema
+-- !query schema
 struct<>
--- !query 29 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 30
+-- !query
 SELECT cast('1' as binary) + cast(1 as decimal(5, 0)) FROM t
--- !query 30 schema
+-- !query schema
 struct<>
--- !query 30 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 31
+-- !query
 SELECT cast('1' as binary) + cast(1 as decimal(10, 0)) FROM t
--- !query 31 schema
+-- !query schema
 struct<>
--- !query 31 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 32
+-- !query
 SELECT cast('1' as binary) + cast(1 as decimal(20, 0)) FROM t
--- !query 32 schema
+-- !query schema
 struct<>
--- !query 32 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 33
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(3, 0)) FROM t
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 34
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(5, 0)) FROM t
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 35
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(10, 0)) FROM t
--- !query 35 schema
+-- !query schema
 struct<>
--- !query 35 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 36
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(20, 0)) FROM t
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 37
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(3, 0)) FROM t
--- !query 37 schema
+-- !query schema
 struct<>
--- !query 37 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
 
 
--- !query 38
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(5, 0)) FROM t
--- !query 38 schema
+-- !query schema
 struct<>
--- !query 38 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
 
 
--- !query 39
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(10, 0)) FROM t
--- !query 39 schema
+-- !query schema
 struct<>
--- !query 39 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
 
 
--- !query 40
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(20, 0)) FROM t
--- !query 40 schema
+-- !query schema
 struct<>
--- !query 40 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
 
 
--- !query 41
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast(1 as tinyint) FROM t
--- !query 41 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) + CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(4,0)>
--- !query 41 output
+-- !query output
 2
 
 
--- !query 42
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast(1 as tinyint) FROM t
--- !query 42 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(6,0)) + CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(6,0))):decimal(6,0)>
--- !query 42 output
+-- !query output
 2
 
 
--- !query 43
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as tinyint) FROM t
--- !query 43 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 43 output
+-- !query output
 2
 
 
--- !query 44
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast(1 as tinyint) FROM t
--- !query 44 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 44 output
+-- !query output
 2
 
 
--- !query 45
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast(1 as smallint) FROM t
--- !query 45 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(6,0)) + CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(6,0))):decimal(6,0)>
--- !query 45 output
+-- !query output
 2
 
 
--- !query 46
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast(1 as smallint) FROM t
--- !query 46 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) + CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(6,0)>
--- !query 46 output
+-- !query output
 2
 
 
--- !query 47
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as smallint) FROM t
--- !query 47 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 47 output
+-- !query output
 2
 
 
--- !query 48
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast(1 as smallint) FROM t
--- !query 48 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 48 output
+-- !query output
 2
 
 
--- !query 49
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast(1 as int) FROM t
--- !query 49 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0)) + CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 49 output
+-- !query output
 2
 
 
--- !query 50
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast(1 as int) FROM t
--- !query 50 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0)) + CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 50 output
+-- !query output
 2
 
 
--- !query 51
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as int) FROM t
--- !query 51 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) + CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(11,0)>
--- !query 51 output
+-- !query output
 2
 
 
--- !query 52
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast(1 as int) FROM t
--- !query 52 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 52 output
+-- !query output
 2
 
 
--- !query 53
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast(1 as bigint) FROM t
--- !query 53 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 53 output
+-- !query output
 2
 
 
--- !query 54
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast(1 as bigint) FROM t
--- !query 54 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 54 output
+-- !query output
 2
 
 
--- !query 55
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as bigint) FROM t
--- !query 55 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0)) + CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 55 output
+-- !query output
 2
 
 
--- !query 56
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast(1 as bigint) FROM t
--- !query 56 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) + CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(21,0)>
--- !query 56 output
+-- !query output
 2
 
 
--- !query 57
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast(1 as float) FROM t
--- !query 57 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 57 output
+-- !query output
 2.0
 
 
--- !query 58
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast(1 as float) FROM t
--- !query 58 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 58 output
+-- !query output
 2.0
 
 
--- !query 59
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as float) FROM t
--- !query 59 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 59 output
+-- !query output
 2.0
 
 
--- !query 60
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast(1 as float) FROM t
--- !query 60 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 60 output
+-- !query output
 2.0
 
 
--- !query 61
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast(1 as double) FROM t
--- !query 61 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 61 output
+-- !query output
 2.0
 
 
--- !query 62
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast(1 as double) FROM t
--- !query 62 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 62 output
+-- !query output
 2.0
 
 
--- !query 63
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as double) FROM t
--- !query 63 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 63 output
+-- !query output
 2.0
 
 
--- !query 64
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast(1 as double) FROM t
--- !query 64 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 64 output
+-- !query output
 2.0
 
 
--- !query 65
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast(1 as decimal(10, 0)) FROM t
--- !query 65 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 65 output
+-- !query output
 2
 
 
--- !query 66
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast(1 as decimal(10, 0)) FROM t
--- !query 66 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 66 output
+-- !query output
 2
 
 
--- !query 67
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as decimal(10, 0)) FROM t
--- !query 67 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
--- !query 67 output
+-- !query output
 2
 
 
--- !query 68
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast(1 as decimal(10, 0)) FROM t
--- !query 68 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 68 output
+-- !query output
 2
 
 
--- !query 69
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast(1 as string) FROM t
--- !query 69 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) + CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 69 output
+-- !query output
 2.0
 
 
--- !query 70
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast(1 as string) FROM t
--- !query 70 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) + CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 70 output
+-- !query output
 2.0
 
 
--- !query 71
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as string) FROM t
--- !query 71 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) + CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 71 output
+-- !query output
 2.0
 
 
--- !query 72
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast(1 as string) FROM t
--- !query 72 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) + CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 72 output
+-- !query output
 2.0
 
 
--- !query 73
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast('1' as binary) FROM t
--- !query 73 schema
+-- !query schema
 struct<>
--- !query 73 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 74
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast('1' as binary) FROM t
--- !query 74 schema
+-- !query schema
 struct<>
--- !query 74 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 75
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast('1' as binary) FROM t
--- !query 75 schema
+-- !query schema
 struct<>
--- !query 75 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 76
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast('1' as binary) FROM t
--- !query 76 schema
+-- !query schema
 struct<>
--- !query 76 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 77
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast(1 as boolean) FROM t
--- !query 77 schema
+-- !query schema
 struct<>
--- !query 77 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 78
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast(1 as boolean) FROM t
--- !query 78 schema
+-- !query schema
 struct<>
--- !query 78 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 79
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast(1 as boolean) FROM t
--- !query 79 schema
+-- !query schema
 struct<>
--- !query 79 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 80
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast(1 as boolean) FROM t
--- !query 80 schema
+-- !query schema
 struct<>
--- !query 80 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 81
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 81 schema
+-- !query schema
 struct<>
--- !query 81 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 82
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 82 schema
+-- !query schema
 struct<>
--- !query 82 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 83
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 83 schema
+-- !query schema
 struct<>
--- !query 83 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 84
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 84 schema
+-- !query schema
 struct<>
--- !query 84 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 85
+-- !query
 SELECT cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00' as date) FROM t
--- !query 85 schema
+-- !query schema
 struct<>
--- !query 85 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
 
 
--- !query 86
+-- !query
 SELECT cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00' as date) FROM t
--- !query 86 schema
+-- !query schema
 struct<>
--- !query 86 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
 
 
--- !query 87
+-- !query
 SELECT cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00' as date) FROM t
--- !query 87 schema
+-- !query schema
 struct<>
--- !query 87 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
 
 
--- !query 88
+-- !query
 SELECT cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00' as date) FROM t
--- !query 88 schema
+-- !query schema
 struct<>
--- !query 88 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
 
 
--- !query 89
+-- !query
 SELECT cast(1 as tinyint) - cast(1 as decimal(3, 0)) FROM t
--- !query 89 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) - CAST(1 AS DECIMAL(3,0))):decimal(4,0)>
--- !query 89 output
+-- !query output
 0
 
 
--- !query 90
+-- !query
 SELECT cast(1 as tinyint) - cast(1 as decimal(5, 0)) FROM t
--- !query 90 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(6,0)) - CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(6,0))):decimal(6,0)>
--- !query 90 output
+-- !query output
 0
 
 
--- !query 91
+-- !query
 SELECT cast(1 as tinyint) - cast(1 as decimal(10, 0)) FROM t
--- !query 91 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 91 output
+-- !query output
 0
 
 
--- !query 92
+-- !query
 SELECT cast(1 as tinyint) - cast(1 as decimal(20, 0)) FROM t
--- !query 92 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 92 output
+-- !query output
 0
 
 
--- !query 93
+-- !query
 SELECT cast(1 as smallint) - cast(1 as decimal(3, 0)) FROM t
--- !query 93 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(6,0)) - CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(6,0))):decimal(6,0)>
--- !query 93 output
+-- !query output
 0
 
 
--- !query 94
+-- !query
 SELECT cast(1 as smallint) - cast(1 as decimal(5, 0)) FROM t
--- !query 94 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) - CAST(1 AS DECIMAL(5,0))):decimal(6,0)>
--- !query 94 output
+-- !query output
 0
 
 
--- !query 95
+-- !query
 SELECT cast(1 as smallint) - cast(1 as decimal(10, 0)) FROM t
--- !query 95 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 95 output
+-- !query output
 0
 
 
--- !query 96
+-- !query
 SELECT cast(1 as smallint) - cast(1 as decimal(20, 0)) FROM t
--- !query 96 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 96 output
+-- !query output
 0
 
 
--- !query 97
+-- !query
 SELECT cast(1 as int) - cast(1 as decimal(3, 0)) FROM t
--- !query 97 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 97 output
+-- !query output
 0
 
 
--- !query 98
+-- !query
 SELECT cast(1 as int) - cast(1 as decimal(5, 0)) FROM t
--- !query 98 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 98 output
+-- !query output
 0
 
 
--- !query 99
+-- !query
 SELECT cast(1 as int) - cast(1 as decimal(10, 0)) FROM t
--- !query 99 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) - CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
--- !query 99 output
+-- !query output
 0
 
 
--- !query 100
+-- !query
 SELECT cast(1 as int) - cast(1 as decimal(20, 0)) FROM t
--- !query 100 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 100 output
+-- !query output
 0
 
 
--- !query 101
+-- !query
 SELECT cast(1 as bigint) - cast(1 as decimal(3, 0)) FROM t
--- !query 101 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 101 output
+-- !query output
 0
 
 
--- !query 102
+-- !query
 SELECT cast(1 as bigint) - cast(1 as decimal(5, 0)) FROM t
--- !query 102 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 102 output
+-- !query output
 0
 
 
--- !query 103
+-- !query
 SELECT cast(1 as bigint) - cast(1 as decimal(10, 0)) FROM t
--- !query 103 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 103 output
+-- !query output
 0
 
 
--- !query 104
+-- !query
 SELECT cast(1 as bigint) - cast(1 as decimal(20, 0)) FROM t
--- !query 104 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) - CAST(1 AS DECIMAL(20,0))):decimal(21,0)>
--- !query 104 output
+-- !query output
 0
 
 
--- !query 105
+-- !query
 SELECT cast(1 as float) - cast(1 as decimal(3, 0)) FROM t
--- !query 105 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 105 output
+-- !query output
 0.0
 
 
--- !query 106
+-- !query
 SELECT cast(1 as float) - cast(1 as decimal(5, 0)) FROM t
--- !query 106 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 106 output
+-- !query output
 0.0
 
 
--- !query 107
+-- !query
 SELECT cast(1 as float) - cast(1 as decimal(10, 0)) FROM t
--- !query 107 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 107 output
+-- !query output
 0.0
 
 
--- !query 108
+-- !query
 SELECT cast(1 as float) - cast(1 as decimal(20, 0)) FROM t
--- !query 108 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 108 output
+-- !query output
 0.0
 
 
--- !query 109
+-- !query
 SELECT cast(1 as double) - cast(1 as decimal(3, 0)) FROM t
--- !query 109 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 109 output
+-- !query output
 0.0
 
 
--- !query 110
+-- !query
 SELECT cast(1 as double) - cast(1 as decimal(5, 0)) FROM t
--- !query 110 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 110 output
+-- !query output
 0.0
 
 
--- !query 111
+-- !query
 SELECT cast(1 as double) - cast(1 as decimal(10, 0)) FROM t
--- !query 111 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 111 output
+-- !query output
 0.0
 
 
--- !query 112
+-- !query
 SELECT cast(1 as double) - cast(1 as decimal(20, 0)) FROM t
--- !query 112 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 112 output
+-- !query output
 0.0
 
 
--- !query 113
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(3, 0)) FROM t
--- !query 113 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 113 output
+-- !query output
 0
 
 
--- !query 114
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(5, 0)) FROM t
--- !query 114 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 114 output
+-- !query output
 0
 
 
--- !query 115
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(10, 0)) FROM t
--- !query 115 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
--- !query 115 output
+-- !query output
 0
 
 
--- !query 116
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(20, 0)) FROM t
--- !query 116 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 116 output
+-- !query output
 0
 
 
--- !query 117
+-- !query
 SELECT cast('1' as binary) - cast(1 as decimal(3, 0)) FROM t
--- !query 117 schema
+-- !query schema
 struct<>
--- !query 117 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 118
+-- !query
 SELECT cast('1' as binary) - cast(1 as decimal(5, 0)) FROM t
--- !query 118 schema
+-- !query schema
 struct<>
--- !query 118 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 119
+-- !query
 SELECT cast('1' as binary) - cast(1 as decimal(10, 0)) FROM t
--- !query 119 schema
+-- !query schema
 struct<>
--- !query 119 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 120
+-- !query
 SELECT cast('1' as binary) - cast(1 as decimal(20, 0)) FROM t
--- !query 120 schema
+-- !query schema
 struct<>
--- !query 120 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 121
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(3, 0)) FROM t
--- !query 121 schema
+-- !query schema
 struct<>
--- !query 121 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+cannot resolve 'subtracttimestamps(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: argument 2 requires timestamp type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
 
 
--- !query 122
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(5, 0)) FROM t
--- !query 122 schema
+-- !query schema
 struct<>
--- !query 122 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+cannot resolve 'subtracttimestamps(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: argument 2 requires timestamp type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
 
 
--- !query 123
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(10, 0)) FROM t
--- !query 123 schema
+-- !query schema
 struct<>
--- !query 123 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+cannot resolve 'subtracttimestamps(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: argument 2 requires timestamp type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
 
 
--- !query 124
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(20, 0)) FROM t
--- !query 124 schema
+-- !query schema
 struct<>
--- !query 124 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+cannot resolve 'subtracttimestamps(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: argument 2 requires timestamp type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
 
 
--- !query 125
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(3, 0)) FROM t
--- !query 125 schema
+-- !query schema
 struct<>
--- !query 125 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
 
 
--- !query 126
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(5, 0)) FROM t
--- !query 126 schema
+-- !query schema
 struct<>
--- !query 126 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
 
 
--- !query 127
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(10, 0)) FROM t
--- !query 127 schema
+-- !query schema
 struct<>
--- !query 127 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
 
 
--- !query 128
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(20, 0)) FROM t
--- !query 128 schema
+-- !query schema
 struct<>
--- !query 128 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
 
 
--- !query 129
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast(1 as tinyint) FROM t
--- !query 129 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) - CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(4,0)>
--- !query 129 output
+-- !query output
 0
 
 
--- !query 130
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast(1 as tinyint) FROM t
--- !query 130 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(6,0)) - CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(6,0))):decimal(6,0)>
--- !query 130 output
+-- !query output
 0
 
 
--- !query 131
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as tinyint) FROM t
--- !query 131 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 131 output
+-- !query output
 0
 
 
--- !query 132
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast(1 as tinyint) FROM t
--- !query 132 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 132 output
+-- !query output
 0
 
 
--- !query 133
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast(1 as smallint) FROM t
--- !query 133 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(6,0)) - CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(6,0))):decimal(6,0)>
--- !query 133 output
+-- !query output
 0
 
 
--- !query 134
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast(1 as smallint) FROM t
--- !query 134 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) - CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(6,0)>
--- !query 134 output
+-- !query output
 0
 
 
--- !query 135
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as smallint) FROM t
--- !query 135 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0)) - CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 135 output
+-- !query output
 0
 
 
--- !query 136
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast(1 as smallint) FROM t
--- !query 136 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 136 output
+-- !query output
 0
 
 
--- !query 137
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast(1 as int) FROM t
--- !query 137 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0)) - CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 137 output
+-- !query output
 0
 
 
--- !query 138
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast(1 as int) FROM t
--- !query 138 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0)) - CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 138 output
+-- !query output
 0
 
 
--- !query 139
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as int) FROM t
--- !query 139 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) - CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(11,0)>
--- !query 139 output
+-- !query output
 0
 
 
--- !query 140
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast(1 as int) FROM t
--- !query 140 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 140 output
+-- !query output
 0
 
 
--- !query 141
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast(1 as bigint) FROM t
--- !query 141 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 141 output
+-- !query output
 0
 
 
--- !query 142
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast(1 as bigint) FROM t
--- !query 142 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 142 output
+-- !query output
 0
 
 
--- !query 143
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as bigint) FROM t
--- !query 143 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0)) - CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 143 output
+-- !query output
 0
 
 
--- !query 144
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast(1 as bigint) FROM t
--- !query 144 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) - CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(21,0)>
--- !query 144 output
+-- !query output
 0
 
 
--- !query 145
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast(1 as float) FROM t
--- !query 145 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 145 output
+-- !query output
 0.0
 
 
--- !query 146
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast(1 as float) FROM t
--- !query 146 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 146 output
+-- !query output
 0.0
 
 
--- !query 147
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as float) FROM t
--- !query 147 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 147 output
+-- !query output
 0.0
 
 
--- !query 148
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast(1 as float) FROM t
--- !query 148 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 148 output
+-- !query output
 0.0
 
 
--- !query 149
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast(1 as double) FROM t
--- !query 149 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 149 output
+-- !query output
 0.0
 
 
--- !query 150
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast(1 as double) FROM t
--- !query 150 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 150 output
+-- !query output
 0.0
 
 
--- !query 151
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as double) FROM t
--- !query 151 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 151 output
+-- !query output
 0.0
 
 
--- !query 152
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast(1 as double) FROM t
--- !query 152 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 152 output
+-- !query output
 0.0
 
 
--- !query 153
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast(1 as decimal(10, 0)) FROM t
--- !query 153 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 153 output
+-- !query output
 0
 
 
--- !query 154
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast(1 as decimal(10, 0)) FROM t
--- !query 154 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)>
--- !query 154 output
+-- !query output
 0
 
 
--- !query 155
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as decimal(10, 0)) FROM t
--- !query 155 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS DECIMAL(10,0))):decimal(11,0)>
--- !query 155 output
+-- !query output
 0
 
 
--- !query 156
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast(1 as decimal(10, 0)) FROM t
--- !query 156 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0)) - CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)>
--- !query 156 output
+-- !query output
 0
 
 
--- !query 157
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast(1 as string) FROM t
--- !query 157 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) - CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 157 output
+-- !query output
 0.0
 
 
--- !query 158
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast(1 as string) FROM t
--- !query 158 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) - CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 158 output
+-- !query output
 0.0
 
 
--- !query 159
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as string) FROM t
--- !query 159 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) - CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 159 output
+-- !query output
 0.0
 
 
--- !query 160
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast(1 as string) FROM t
--- !query 160 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) - CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 160 output
+-- !query output
 0.0
 
 
--- !query 161
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast('1' as binary) FROM t
--- !query 161 schema
+-- !query schema
 struct<>
--- !query 161 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 162
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast('1' as binary) FROM t
--- !query 162 schema
+-- !query schema
 struct<>
--- !query 162 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 163
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast('1' as binary) FROM t
--- !query 163 schema
+-- !query schema
 struct<>
--- !query 163 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 164
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast('1' as binary) FROM t
--- !query 164 schema
+-- !query schema
 struct<>
--- !query 164 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 165
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast(1 as boolean) FROM t
--- !query 165 schema
+-- !query schema
 struct<>
--- !query 165 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 166
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast(1 as boolean) FROM t
--- !query 166 schema
+-- !query schema
 struct<>
--- !query 166 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 167
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast(1 as boolean) FROM t
--- !query 167 schema
+-- !query schema
 struct<>
--- !query 167 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 168
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast(1 as boolean) FROM t
--- !query 168 schema
+-- !query schema
 struct<>
--- !query 168 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 169
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 169 schema
+-- !query schema
 struct<>
--- !query 169 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+cannot resolve 'subtracttimestamps(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
 
 
--- !query 170
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 170 schema
+-- !query schema
 struct<>
--- !query 170 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+cannot resolve 'subtracttimestamps(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
 
 
--- !query 171
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 171 schema
+-- !query schema
 struct<>
--- !query 171 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+cannot resolve 'subtracttimestamps(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
 
 
--- !query 172
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 172 schema
+-- !query schema
 struct<>
--- !query 172 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+cannot resolve 'subtracttimestamps(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires timestamp type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
 
 
--- !query 173
+-- !query
 SELECT cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00' as date) FROM t
--- !query 173 schema
+-- !query schema
 struct<>
--- !query 173 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+cannot resolve 'subtractdates(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: argument 1 requires date type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
 
 
--- !query 174
+-- !query
 SELECT cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00' as date) FROM t
--- !query 174 schema
+-- !query schema
 struct<>
--- !query 174 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+cannot resolve 'subtractdates(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: argument 1 requires date type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
 
 
--- !query 175
+-- !query
 SELECT cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00' as date) FROM t
--- !query 175 schema
+-- !query schema
 struct<>
--- !query 175 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+cannot resolve 'subtractdates(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: argument 1 requires date type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
 
 
--- !query 176
+-- !query
 SELECT cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00' as date) FROM t
--- !query 176 schema
+-- !query schema
 struct<>
--- !query 176 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+cannot resolve 'subtractdates(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: argument 1 requires date type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
 
 
--- !query 177
+-- !query
 SELECT cast(1 as tinyint) * cast(1 as decimal(3, 0)) FROM t
--- !query 177 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) * CAST(1 AS DECIMAL(3,0))):decimal(7,0)>
--- !query 177 output
+-- !query output
 1
 
 
--- !query 178
+-- !query
 SELECT cast(1 as tinyint) * cast(1 as decimal(5, 0)) FROM t
--- !query 178 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) * CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(9,0)>
--- !query 178 output
+-- !query output
 1
 
 
--- !query 179
+-- !query
 SELECT cast(1 as tinyint) * cast(1 as decimal(10, 0)) FROM t
--- !query 179 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,0)>
--- !query 179 output
+-- !query output
 1
 
 
--- !query 180
+-- !query
 SELECT cast(1 as tinyint) * cast(1 as decimal(20, 0)) FROM t
--- !query 180 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(24,0)>
--- !query 180 output
+-- !query output
 1
 
 
--- !query 181
+-- !query
 SELECT cast(1 as smallint) * cast(1 as decimal(3, 0)) FROM t
--- !query 181 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) * CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(9,0)>
--- !query 181 output
+-- !query output
 1
 
 
--- !query 182
+-- !query
 SELECT cast(1 as smallint) * cast(1 as decimal(5, 0)) FROM t
--- !query 182 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) * CAST(1 AS DECIMAL(5,0))):decimal(11,0)>
--- !query 182 output
+-- !query output
 1
 
 
--- !query 183
+-- !query
 SELECT cast(1 as smallint) * cast(1 as decimal(10, 0)) FROM t
--- !query 183 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,0)>
--- !query 183 output
+-- !query output
 1
 
 
--- !query 184
+-- !query
 SELECT cast(1 as smallint) * cast(1 as decimal(20, 0)) FROM t
--- !query 184 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(26,0)>
--- !query 184 output
+-- !query output
 1
 
 
--- !query 185
+-- !query
 SELECT cast(1 as int) * cast(1 as decimal(3, 0)) FROM t
--- !query 185 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(14,0)>
--- !query 185 output
+-- !query output
 1
 
 
--- !query 186
+-- !query
 SELECT cast(1 as int) * cast(1 as decimal(5, 0)) FROM t
--- !query 186 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,0)>
--- !query 186 output
+-- !query output
 1
 
 
--- !query 187
+-- !query
 SELECT cast(1 as int) * cast(1 as decimal(10, 0)) FROM t
--- !query 187 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) * CAST(1 AS DECIMAL(10,0))):decimal(21,0)>
--- !query 187 output
+-- !query output
 1
 
 
--- !query 188
+-- !query
 SELECT cast(1 as int) * cast(1 as decimal(20, 0)) FROM t
--- !query 188 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,0)>
--- !query 188 output
+-- !query output
 1
 
 
--- !query 189
+-- !query
 SELECT cast(1 as bigint) * cast(1 as decimal(3, 0)) FROM t
--- !query 189 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(24,0)>
--- !query 189 output
+-- !query output
 1
 
 
--- !query 190
+-- !query
 SELECT cast(1 as bigint) * cast(1 as decimal(5, 0)) FROM t
--- !query 190 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(26,0)>
--- !query 190 output
+-- !query output
 1
 
 
--- !query 191
+-- !query
 SELECT cast(1 as bigint) * cast(1 as decimal(10, 0)) FROM t
--- !query 191 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,0)>
--- !query 191 output
+-- !query output
 1
 
 
--- !query 192
+-- !query
 SELECT cast(1 as bigint) * cast(1 as decimal(20, 0)) FROM t
--- !query 192 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) * CAST(1 AS DECIMAL(20,0))):decimal(38,0)>
--- !query 192 output
+-- !query output
 1
 
 
--- !query 193
+-- !query
 SELECT cast(1 as float) * cast(1 as decimal(3, 0)) FROM t
--- !query 193 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 193 output
+-- !query output
 1.0
 
 
--- !query 194
+-- !query
 SELECT cast(1 as float) * cast(1 as decimal(5, 0)) FROM t
--- !query 194 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 194 output
+-- !query output
 1.0
 
 
--- !query 195
+-- !query
 SELECT cast(1 as float) * cast(1 as decimal(10, 0)) FROM t
--- !query 195 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 195 output
+-- !query output
 1.0
 
 
--- !query 196
+-- !query
 SELECT cast(1 as float) * cast(1 as decimal(20, 0)) FROM t
--- !query 196 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 196 output
+-- !query output
 1.0
 
 
--- !query 197
+-- !query
 SELECT cast(1 as double) * cast(1 as decimal(3, 0)) FROM t
--- !query 197 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 197 output
+-- !query output
 1.0
 
 
--- !query 198
+-- !query
 SELECT cast(1 as double) * cast(1 as decimal(5, 0)) FROM t
--- !query 198 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 198 output
+-- !query output
 1.0
 
 
--- !query 199
+-- !query
 SELECT cast(1 as double) * cast(1 as decimal(10, 0)) FROM t
--- !query 199 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 199 output
+-- !query output
 1.0
 
 
--- !query 200
+-- !query
 SELECT cast(1 as double) * cast(1 as decimal(20, 0)) FROM t
--- !query 200 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 200 output
+-- !query output
 1.0
 
 
--- !query 201
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(3, 0)) FROM t
--- !query 201 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(14,0)>
--- !query 201 output
+-- !query output
 1
 
 
--- !query 202
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(5, 0)) FROM t
--- !query 202 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,0)>
--- !query 202 output
+-- !query output
 1
 
 
--- !query 203
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(10, 0)) FROM t
--- !query 203 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS DECIMAL(10,0))):decimal(21,0)>
--- !query 203 output
+-- !query output
 1
 
 
--- !query 204
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(20, 0)) FROM t
--- !query 204 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,0)>
--- !query 204 output
+-- !query output
 1
 
 
--- !query 205
+-- !query
 SELECT cast('1' as binary) * cast(1 as decimal(3, 0)) FROM t
--- !query 205 schema
+-- !query schema
 struct<>
--- !query 205 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 206
+-- !query
 SELECT cast('1' as binary) * cast(1 as decimal(5, 0)) FROM t
--- !query 206 schema
+-- !query schema
 struct<>
--- !query 206 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 207
+-- !query
 SELECT cast('1' as binary) * cast(1 as decimal(10, 0)) FROM t
--- !query 207 schema
+-- !query schema
 struct<>
--- !query 207 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 208
+-- !query
 SELECT cast('1' as binary) * cast(1 as decimal(20, 0)) FROM t
--- !query 208 schema
+-- !query schema
 struct<>
--- !query 208 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 209
+-- !query
 SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(3, 0)) FROM t
--- !query 209 schema
+-- !query schema
 struct<>
--- !query 209 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 210
+-- !query
 SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(5, 0)) FROM t
--- !query 210 schema
+-- !query schema
 struct<>
--- !query 210 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 211
+-- !query
 SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(10, 0)) FROM t
--- !query 211 schema
+-- !query schema
 struct<>
--- !query 211 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 212
+-- !query
 SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(20, 0)) FROM t
--- !query 212 schema
+-- !query schema
 struct<>
--- !query 212 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 213
+-- !query
 SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(3, 0)) FROM t
--- !query 213 schema
+-- !query schema
 struct<>
--- !query 213 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 214
+-- !query
 SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(5, 0)) FROM t
--- !query 214 schema
+-- !query schema
 struct<>
--- !query 214 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 215
+-- !query
 SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(10, 0)) FROM t
--- !query 215 schema
+-- !query schema
 struct<>
--- !query 215 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 216
+-- !query
 SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(20, 0)) FROM t
--- !query 216 schema
+-- !query schema
 struct<>
--- !query 216 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 217
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast(1 as tinyint) FROM t
--- !query 217 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) * CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(7,0)>
--- !query 217 output
+-- !query output
 1
 
 
--- !query 218
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast(1 as tinyint) FROM t
--- !query 218 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) * CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(9,0)>
--- !query 218 output
+-- !query output
 1
 
 
--- !query 219
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as tinyint) FROM t
--- !query 219 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(14,0)>
--- !query 219 output
+-- !query output
 1
 
 
--- !query 220
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast(1 as tinyint) FROM t
--- !query 220 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(24,0)>
--- !query 220 output
+-- !query output
 1
 
 
--- !query 221
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast(1 as smallint) FROM t
--- !query 221 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) * CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(9,0)>
--- !query 221 output
+-- !query output
 1
 
 
--- !query 222
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast(1 as smallint) FROM t
--- !query 222 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) * CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(11,0)>
--- !query 222 output
+-- !query output
 1
 
 
--- !query 223
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as smallint) FROM t
--- !query 223 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) * CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,0)>
--- !query 223 output
+-- !query output
 1
 
 
--- !query 224
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast(1 as smallint) FROM t
--- !query 224 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(26,0)>
--- !query 224 output
+-- !query output
 1
 
 
--- !query 225
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast(1 as int) FROM t
--- !query 225 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) * CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,0)>
--- !query 225 output
+-- !query output
 1
 
 
--- !query 226
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast(1 as int) FROM t
--- !query 226 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) * CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,0)>
--- !query 226 output
+-- !query output
 1
 
 
--- !query 227
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as int) FROM t
--- !query 227 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) * CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(21,0)>
--- !query 227 output
+-- !query output
 1
 
 
--- !query 228
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast(1 as int) FROM t
--- !query 228 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,0)>
--- !query 228 output
+-- !query output
 1
 
 
--- !query 229
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast(1 as bigint) FROM t
--- !query 229 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(24,0)>
--- !query 229 output
+-- !query output
 1
 
 
--- !query 230
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast(1 as bigint) FROM t
--- !query 230 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(26,0)>
--- !query 230 output
+-- !query output
 1
 
 
--- !query 231
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as bigint) FROM t
--- !query 231 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) * CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,0)>
--- !query 231 output
+-- !query output
 1
 
 
--- !query 232
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast(1 as bigint) FROM t
--- !query 232 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) * CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(38,0)>
--- !query 232 output
+-- !query output
 1
 
 
--- !query 233
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast(1 as float) FROM t
--- !query 233 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 233 output
+-- !query output
 1.0
 
 
--- !query 234
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast(1 as float) FROM t
--- !query 234 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 234 output
+-- !query output
 1.0
 
 
--- !query 235
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as float) FROM t
--- !query 235 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 235 output
+-- !query output
 1.0
 
 
--- !query 236
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast(1 as float) FROM t
--- !query 236 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 236 output
+-- !query output
 1.0
 
 
--- !query 237
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast(1 as double) FROM t
--- !query 237 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 237 output
+-- !query output
 1.0
 
 
--- !query 238
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast(1 as double) FROM t
--- !query 238 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 238 output
+-- !query output
 1.0
 
 
--- !query 239
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as double) FROM t
--- !query 239 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 239 output
+-- !query output
 1.0
 
 
--- !query 240
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast(1 as double) FROM t
--- !query 240 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 240 output
+-- !query output
 1.0
 
 
--- !query 241
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast(1 as decimal(10, 0)) FROM t
--- !query 241 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,0)>
--- !query 241 output
+-- !query output
 1
 
 
--- !query 242
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast(1 as decimal(10, 0)) FROM t
--- !query 242 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,0)>
--- !query 242 output
+-- !query output
 1
 
 
--- !query 243
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as decimal(10, 0)) FROM t
--- !query 243 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS DECIMAL(10,0))):decimal(21,0)>
--- !query 243 output
+-- !query output
 1
 
 
--- !query 244
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast(1 as decimal(10, 0)) FROM t
--- !query 244 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) * CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,0)>
--- !query 244 output
+-- !query output
 1
 
 
--- !query 245
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast(1 as string) FROM t
--- !query 245 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) * CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 245 output
+-- !query output
 1.0
 
 
--- !query 246
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast(1 as string) FROM t
--- !query 246 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) * CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 246 output
+-- !query output
 1.0
 
 
--- !query 247
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as string) FROM t
--- !query 247 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) * CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 247 output
+-- !query output
 1.0
 
 
--- !query 248
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast(1 as string) FROM t
--- !query 248 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) * CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 248 output
+-- !query output
 1.0
 
 
--- !query 249
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast('1' as binary) FROM t
--- !query 249 schema
+-- !query schema
 struct<>
--- !query 249 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 250
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast('1' as binary) FROM t
--- !query 250 schema
+-- !query schema
 struct<>
--- !query 250 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 251
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast('1' as binary) FROM t
--- !query 251 schema
+-- !query schema
 struct<>
--- !query 251 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 252
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast('1' as binary) FROM t
--- !query 252 schema
+-- !query schema
 struct<>
--- !query 252 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 253
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast(1 as boolean) FROM t
--- !query 253 schema
+-- !query schema
 struct<>
--- !query 253 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 254
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast(1 as boolean) FROM t
--- !query 254 schema
+-- !query schema
 struct<>
--- !query 254 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 255
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast(1 as boolean) FROM t
--- !query 255 schema
+-- !query schema
 struct<>
--- !query 255 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 256
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast(1 as boolean) FROM t
--- !query 256 schema
+-- !query schema
 struct<>
--- !query 256 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 257
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00.0' as timestamp) FROM t
--- !query 257 schema
+-- !query schema
 struct<>
--- !query 257 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 258
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00.0' as timestamp) FROM t
--- !query 258 schema
+-- !query schema
 struct<>
--- !query 258 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 259
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00.0' as timestamp) FROM t
--- !query 259 schema
+-- !query schema
 struct<>
--- !query 259 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 260
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00.0' as timestamp) FROM t
--- !query 260 schema
+-- !query schema
 struct<>
--- !query 260 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 261
+-- !query
 SELECT cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00' as date) FROM t
--- !query 261 schema
+-- !query schema
 struct<>
--- !query 261 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 262
+-- !query
 SELECT cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00' as date) FROM t
--- !query 262 schema
+-- !query schema
 struct<>
--- !query 262 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 263
+-- !query
 SELECT cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00' as date) FROM t
--- !query 263 schema
+-- !query schema
 struct<>
--- !query 263 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 264
+-- !query
 SELECT cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00' as date) FROM t
--- !query 264 schema
+-- !query schema
 struct<>
--- !query 264 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 265
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as decimal(3, 0)) FROM t
--- !query 265 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) / CAST(1 AS DECIMAL(3,0))):decimal(9,6)>
--- !query 265 output
-1
+-- !query output
+1.000000
 
 
--- !query 266
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as decimal(5, 0)) FROM t
--- !query 266 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) / CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(9,6)>
--- !query 266 output
-1
+-- !query output
+1.000000
 
 
--- !query 267
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as decimal(10, 0)) FROM t
--- !query 267 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,11)>
--- !query 267 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 268
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as decimal(20, 0)) FROM t
--- !query 268 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(24,21)>
--- !query 268 output
-1
+-- !query output
+1.000000000000000000000
 
 
--- !query 269
+-- !query
 SELECT cast(1 as smallint) / cast(1 as decimal(3, 0)) FROM t
--- !query 269 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) / CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(11,6)>
--- !query 269 output
-1
+-- !query output
+1.000000
 
 
--- !query 270
+-- !query
 SELECT cast(1 as smallint) / cast(1 as decimal(5, 0)) FROM t
--- !query 270 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) / CAST(1 AS DECIMAL(5,0))):decimal(11,6)>
--- !query 270 output
-1
+-- !query output
+1.000000
 
 
--- !query 271
+-- !query
 SELECT cast(1 as smallint) / cast(1 as decimal(10, 0)) FROM t
--- !query 271 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,11)>
--- !query 271 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 272
+-- !query
 SELECT cast(1 as smallint) / cast(1 as decimal(20, 0)) FROM t
--- !query 272 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(26,21)>
--- !query 272 output
-1
+-- !query output
+1.000000000000000000000
 
 
--- !query 273
+-- !query
 SELECT cast(1 as int) / cast(1 as decimal(3, 0)) FROM t
--- !query 273 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(16,6)>
--- !query 273 output
-1
+-- !query output
+1.000000
 
 
--- !query 274
+-- !query
 SELECT cast(1 as int) / cast(1 as decimal(5, 0)) FROM t
--- !query 274 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,6)>
--- !query 274 output
-1
+-- !query output
+1.000000
 
 
--- !query 275
+-- !query
 SELECT cast(1 as int) / cast(1 as decimal(10, 0)) FROM t
--- !query 275 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
--- !query 275 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 276
+-- !query
 SELECT cast(1 as int) / cast(1 as decimal(20, 0)) FROM t
--- !query 276 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,21)>
--- !query 276 output
-1
+-- !query output
+1.000000000000000000000
 
 
--- !query 277
+-- !query
 SELECT cast(1 as bigint) / cast(1 as decimal(3, 0)) FROM t
--- !query 277 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(26,6)>
--- !query 277 output
-1
+-- !query output
+1.000000
 
 
--- !query 278
+-- !query
 SELECT cast(1 as bigint) / cast(1 as decimal(5, 0)) FROM t
--- !query 278 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(26,6)>
--- !query 278 output
-1
+-- !query output
+1.000000
 
 
--- !query 279
+-- !query
 SELECT cast(1 as bigint) / cast(1 as decimal(10, 0)) FROM t
--- !query 279 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,11)>
--- !query 279 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 280
+-- !query
 SELECT cast(1 as bigint) / cast(1 as decimal(20, 0)) FROM t
--- !query 280 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) / CAST(1 AS DECIMAL(20,0))):decimal(38,18)>
--- !query 280 output
-1
+-- !query output
+1.000000000000000000
 
 
--- !query 281
+-- !query
 SELECT cast(1 as float) / cast(1 as decimal(3, 0)) FROM t
--- !query 281 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) AS DOUBLE)):double>
--- !query 281 output
+-- !query output
 1.0
 
 
--- !query 282
+-- !query
 SELECT cast(1 as float) / cast(1 as decimal(5, 0)) FROM t
--- !query 282 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) AS DOUBLE)):double>
--- !query 282 output
+-- !query output
 1.0
 
 
--- !query 283
+-- !query
 SELECT cast(1 as float) / cast(1 as decimal(10, 0)) FROM t
--- !query 283 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) AS DOUBLE)):double>
--- !query 283 output
+-- !query output
 1.0
 
 
--- !query 284
+-- !query
 SELECT cast(1 as float) / cast(1 as decimal(20, 0)) FROM t
--- !query 284 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) AS DOUBLE)):double>
--- !query 284 output
+-- !query output
 1.0
 
 
--- !query 285
+-- !query
 SELECT cast(1 as double) / cast(1 as decimal(3, 0)) FROM t
--- !query 285 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 285 output
+-- !query output
 1.0
 
 
--- !query 286
+-- !query
 SELECT cast(1 as double) / cast(1 as decimal(5, 0)) FROM t
--- !query 286 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 286 output
+-- !query output
 1.0
 
 
--- !query 287
+-- !query
 SELECT cast(1 as double) / cast(1 as decimal(10, 0)) FROM t
--- !query 287 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 287 output
+-- !query output
 1.0
 
 
--- !query 288
+-- !query
 SELECT cast(1 as double) / cast(1 as decimal(20, 0)) FROM t
--- !query 288 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 288 output
+-- !query output
 1.0
 
 
--- !query 289
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(3, 0)) FROM t
--- !query 289 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(16,6)>
--- !query 289 output
-1
+-- !query output
+1.000000
 
 
--- !query 290
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(5, 0)) FROM t
--- !query 290 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,6)>
--- !query 290 output
-1
+-- !query output
+1.000000
 
 
--- !query 291
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(10, 0)) FROM t
--- !query 291 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
--- !query 291 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 292
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(20, 0)) FROM t
--- !query 292 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,21)>
--- !query 292 output
-1
+-- !query output
+1.000000000000000000000
 
 
--- !query 293
+-- !query
 SELECT cast('1' as binary) / cast(1 as decimal(3, 0)) FROM t
--- !query 293 schema
+-- !query schema
 struct<>
--- !query 293 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 294
+-- !query
 SELECT cast('1' as binary) / cast(1 as decimal(5, 0)) FROM t
--- !query 294 schema
+-- !query schema
 struct<>
--- !query 294 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 295
+-- !query
 SELECT cast('1' as binary) / cast(1 as decimal(10, 0)) FROM t
--- !query 295 schema
+-- !query schema
 struct<>
--- !query 295 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 296
+-- !query
 SELECT cast('1' as binary) / cast(1 as decimal(20, 0)) FROM t
--- !query 296 schema
+-- !query schema
 struct<>
--- !query 296 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 297
+-- !query
 SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(3, 0)) FROM t
--- !query 297 schema
+-- !query schema
 struct<>
--- !query 297 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 298
+-- !query
 SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(5, 0)) FROM t
--- !query 298 schema
+-- !query schema
 struct<>
--- !query 298 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 299
+-- !query
 SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0)) FROM t
--- !query 299 schema
+-- !query schema
 struct<>
--- !query 299 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 300
+-- !query
 SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(20, 0)) FROM t
--- !query 300 schema
+-- !query schema
 struct<>
--- !query 300 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 301
+-- !query
 SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(3, 0)) FROM t
--- !query 301 schema
+-- !query schema
 struct<>
--- !query 301 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 302
+-- !query
 SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(5, 0)) FROM t
--- !query 302 schema
+-- !query schema
 struct<>
--- !query 302 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 303
+-- !query
 SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(10, 0)) FROM t
--- !query 303 schema
+-- !query schema
 struct<>
--- !query 303 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 304
+-- !query
 SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(20, 0)) FROM t
--- !query 304 schema
+-- !query schema
 struct<>
--- !query 304 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 305
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast(1 as tinyint) FROM t
--- !query 305 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) / CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(9,6)>
--- !query 305 output
-1
+-- !query output
+1.000000
 
 
--- !query 306
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast(1 as tinyint) FROM t
--- !query 306 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) / CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(11,6)>
--- !query 306 output
-1
+-- !query output
+1.000000
 
 
--- !query 307
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as tinyint) FROM t
--- !query 307 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(16,6)>
--- !query 307 output
-1
+-- !query output
+1.000000
 
 
--- !query 308
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast(1 as tinyint) FROM t
--- !query 308 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(26,6)>
--- !query 308 output
-1
+-- !query output
+1.000000
 
 
--- !query 309
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast(1 as smallint) FROM t
--- !query 309 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) / CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(9,6)>
--- !query 309 output
-1
+-- !query output
+1.000000
 
 
--- !query 310
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast(1 as smallint) FROM t
--- !query 310 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) / CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(11,6)>
--- !query 310 output
-1
+-- !query output
+1.000000
 
 
--- !query 311
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as smallint) FROM t
--- !query 311 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,6)>
--- !query 311 output
-1
+-- !query output
+1.000000
 
 
--- !query 312
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast(1 as smallint) FROM t
--- !query 312 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(26,6)>
--- !query 312 output
-1
+-- !query output
+1.000000
 
 
--- !query 313
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast(1 as int) FROM t
--- !query 313 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,11)>
--- !query 313 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 314
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast(1 as int) FROM t
--- !query 314 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,11)>
--- !query 314 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 315
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as int) FROM t
--- !query 315 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) / CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(21,11)>
--- !query 315 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 316
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast(1 as int) FROM t
--- !query 316 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,11)>
--- !query 316 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 317
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast(1 as bigint) FROM t
--- !query 317 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(24,21)>
--- !query 317 output
-1
+-- !query output
+1.000000000000000000000
 
 
--- !query 318
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast(1 as bigint) FROM t
--- !query 318 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(26,21)>
--- !query 318 output
-1
+-- !query output
+1.000000000000000000000
 
 
--- !query 319
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as bigint) FROM t
--- !query 319 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,21)>
--- !query 319 output
-1
+-- !query output
+1.000000000000000000000
 
 
--- !query 320
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast(1 as bigint) FROM t
--- !query 320 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) / CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(38,18)>
--- !query 320 output
-1
+-- !query output
+1.000000000000000000
 
 
--- !query 321
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast(1 as float) FROM t
--- !query 321 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 321 output
+-- !query output
 1.0
 
 
--- !query 322
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast(1 as float) FROM t
--- !query 322 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 322 output
+-- !query output
 1.0
 
 
--- !query 323
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as float) FROM t
--- !query 323 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 323 output
+-- !query output
 1.0
 
 
--- !query 324
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast(1 as float) FROM t
--- !query 324 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 324 output
+-- !query output
 1.0
 
 
--- !query 325
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast(1 as double) FROM t
--- !query 325 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 325 output
+-- !query output
 1.0
 
 
--- !query 326
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast(1 as double) FROM t
--- !query 326 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 326 output
+-- !query output
 1.0
 
 
--- !query 327
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as double) FROM t
--- !query 327 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 327 output
+-- !query output
 1.0
 
 
--- !query 328
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast(1 as double) FROM t
--- !query 328 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 328 output
+-- !query output
 1.0
 
 
--- !query 329
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast(1 as decimal(10, 0)) FROM t
--- !query 329 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,11)>
--- !query 329 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 330
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast(1 as decimal(10, 0)) FROM t
--- !query 330 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,11)>
--- !query 330 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 331
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(10, 0)) FROM t
--- !query 331 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
--- !query 331 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 332
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast(1 as decimal(10, 0)) FROM t
--- !query 332 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,11)>
--- !query 332 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 333
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast(1 as string) FROM t
--- !query 333 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 333 output
+-- !query output
 1.0
 
 
--- !query 334
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast(1 as string) FROM t
--- !query 334 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 334 output
+-- !query output
 1.0
 
 
--- !query 335
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as string) FROM t
--- !query 335 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 335 output
+-- !query output
 1.0
 
 
--- !query 336
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast(1 as string) FROM t
--- !query 336 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 336 output
+-- !query output
 1.0
 
 
--- !query 337
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast('1' as binary) FROM t
--- !query 337 schema
+-- !query schema
 struct<>
--- !query 337 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 338
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast('1' as binary) FROM t
--- !query 338 schema
+-- !query schema
 struct<>
--- !query 338 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 339
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast('1' as binary) FROM t
--- !query 339 schema
+-- !query schema
 struct<>
--- !query 339 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 340
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast('1' as binary) FROM t
--- !query 340 schema
+-- !query schema
 struct<>
--- !query 340 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 341
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast(1 as boolean) FROM t
--- !query 341 schema
+-- !query schema
 struct<>
--- !query 341 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 342
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast(1 as boolean) FROM t
--- !query 342 schema
+-- !query schema
 struct<>
--- !query 342 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 343
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as boolean) FROM t
--- !query 343 schema
+-- !query schema
 struct<>
--- !query 343 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 344
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast(1 as boolean) FROM t
--- !query 344 schema
+-- !query schema
 struct<>
--- !query 344 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 345
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00.0' as timestamp) FROM t
--- !query 345 schema
+-- !query schema
 struct<>
--- !query 345 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 346
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00.0' as timestamp) FROM t
--- !query 346 schema
+-- !query schema
 struct<>
--- !query 346 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 347
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00.0' as timestamp) FROM t
--- !query 347 schema
+-- !query schema
 struct<>
--- !query 347 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 348
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00.0' as timestamp) FROM t
--- !query 348 schema
+-- !query schema
 struct<>
--- !query 348 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 349
+-- !query
 SELECT cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00' as date) FROM t
--- !query 349 schema
+-- !query schema
 struct<>
--- !query 349 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 350
+-- !query
 SELECT cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00' as date) FROM t
--- !query 350 schema
+-- !query schema
 struct<>
--- !query 350 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 351
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00' as date) FROM t
--- !query 351 schema
+-- !query schema
 struct<>
--- !query 351 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 352
+-- !query
 SELECT cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00' as date) FROM t
--- !query 352 schema
+-- !query schema
 struct<>
--- !query 352 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 353
+-- !query
 SELECT cast(1 as tinyint) % cast(1 as decimal(3, 0)) FROM t
--- !query 353 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) % CAST(1 AS DECIMAL(3,0))):decimal(3,0)>
--- !query 353 output
+-- !query output
 0
 
 
--- !query 354
+-- !query
 SELECT cast(1 as tinyint) % cast(1 as decimal(5, 0)) FROM t
--- !query 354 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) % CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(3,0)>
--- !query 354 output
+-- !query output
 0
 
 
--- !query 355
+-- !query
 SELECT cast(1 as tinyint) % cast(1 as decimal(10, 0)) FROM t
--- !query 355 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 355 output
+-- !query output
 0
 
 
--- !query 356
+-- !query
 SELECT cast(1 as tinyint) % cast(1 as decimal(20, 0)) FROM t
--- !query 356 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(3,0)>
--- !query 356 output
+-- !query output
 0
 
 
--- !query 357
+-- !query
 SELECT cast(1 as smallint) % cast(1 as decimal(3, 0)) FROM t
--- !query 357 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) % CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(3,0)>
--- !query 357 output
+-- !query output
 0
 
 
--- !query 358
+-- !query
 SELECT cast(1 as smallint) % cast(1 as decimal(5, 0)) FROM t
--- !query 358 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) % CAST(1 AS DECIMAL(5,0))):decimal(5,0)>
--- !query 358 output
+-- !query output
 0
 
 
--- !query 359
+-- !query
 SELECT cast(1 as smallint) % cast(1 as decimal(10, 0)) FROM t
--- !query 359 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 359 output
+-- !query output
 0
 
 
--- !query 360
+-- !query
 SELECT cast(1 as smallint) % cast(1 as decimal(20, 0)) FROM t
--- !query 360 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(5,0)>
--- !query 360 output
+-- !query output
 0
 
 
--- !query 361
+-- !query
 SELECT cast(1 as int) % cast(1 as decimal(3, 0)) FROM t
--- !query 361 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 361 output
+-- !query output
 0
 
 
--- !query 362
+-- !query
 SELECT cast(1 as int) % cast(1 as decimal(5, 0)) FROM t
--- !query 362 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 362 output
+-- !query output
 0
 
 
--- !query 363
+-- !query
 SELECT cast(1 as int) % cast(1 as decimal(10, 0)) FROM t
--- !query 363 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) % CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
--- !query 363 output
+-- !query output
 0
 
 
--- !query 364
+-- !query
 SELECT cast(1 as int) % cast(1 as decimal(20, 0)) FROM t
--- !query 364 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 364 output
+-- !query output
 0
 
 
--- !query 365
+-- !query
 SELECT cast(1 as bigint) % cast(1 as decimal(3, 0)) FROM t
--- !query 365 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
--- !query 365 output
+-- !query output
 0
 
 
--- !query 366
+-- !query
 SELECT cast(1 as bigint) % cast(1 as decimal(5, 0)) FROM t
--- !query 366 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(5,0)>
--- !query 366 output
+-- !query output
 0
 
 
--- !query 367
+-- !query
 SELECT cast(1 as bigint) % cast(1 as decimal(10, 0)) FROM t
--- !query 367 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 367 output
+-- !query output
 0
 
 
--- !query 368
+-- !query
 SELECT cast(1 as bigint) % cast(1 as decimal(20, 0)) FROM t
--- !query 368 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) % CAST(1 AS DECIMAL(20,0))):decimal(20,0)>
--- !query 368 output
+-- !query output
 0
 
 
--- !query 369
+-- !query
 SELECT cast(1 as float) % cast(1 as decimal(3, 0)) FROM t
--- !query 369 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 369 output
+-- !query output
 0.0
 
 
--- !query 370
+-- !query
 SELECT cast(1 as float) % cast(1 as decimal(5, 0)) FROM t
--- !query 370 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 370 output
+-- !query output
 0.0
 
 
--- !query 371
+-- !query
 SELECT cast(1 as float) % cast(1 as decimal(10, 0)) FROM t
--- !query 371 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 371 output
+-- !query output
 0.0
 
 
--- !query 372
+-- !query
 SELECT cast(1 as float) % cast(1 as decimal(20, 0)) FROM t
--- !query 372 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 372 output
+-- !query output
 0.0
 
 
--- !query 373
+-- !query
 SELECT cast(1 as double) % cast(1 as decimal(3, 0)) FROM t
--- !query 373 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 373 output
+-- !query output
 0.0
 
 
--- !query 374
+-- !query
 SELECT cast(1 as double) % cast(1 as decimal(5, 0)) FROM t
--- !query 374 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 374 output
+-- !query output
 0.0
 
 
--- !query 375
+-- !query
 SELECT cast(1 as double) % cast(1 as decimal(10, 0)) FROM t
--- !query 375 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 375 output
+-- !query output
 0.0
 
 
--- !query 376
+-- !query
 SELECT cast(1 as double) % cast(1 as decimal(20, 0)) FROM t
--- !query 376 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 376 output
+-- !query output
 0.0
 
 
--- !query 377
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(3, 0)) FROM t
--- !query 377 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 377 output
+-- !query output
 0
 
 
--- !query 378
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(5, 0)) FROM t
--- !query 378 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 378 output
+-- !query output
 0
 
 
--- !query 379
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(10, 0)) FROM t
--- !query 379 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
--- !query 379 output
+-- !query output
 0
 
 
--- !query 380
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(20, 0)) FROM t
--- !query 380 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 380 output
+-- !query output
 0
 
 
--- !query 381
+-- !query
 SELECT cast('1' as binary) % cast(1 as decimal(3, 0)) FROM t
--- !query 381 schema
+-- !query schema
 struct<>
--- !query 381 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 382
+-- !query
 SELECT cast('1' as binary) % cast(1 as decimal(5, 0)) FROM t
--- !query 382 schema
+-- !query schema
 struct<>
--- !query 382 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 383
+-- !query
 SELECT cast('1' as binary) % cast(1 as decimal(10, 0)) FROM t
--- !query 383 schema
+-- !query schema
 struct<>
--- !query 383 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 384
+-- !query
 SELECT cast('1' as binary) % cast(1 as decimal(20, 0)) FROM t
--- !query 384 schema
+-- !query schema
 struct<>
--- !query 384 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 385
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(3, 0)) FROM t
--- !query 385 schema
+-- !query schema
 struct<>
--- !query 385 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 386
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(5, 0)) FROM t
--- !query 386 schema
+-- !query schema
 struct<>
--- !query 386 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 387
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(10, 0)) FROM t
--- !query 387 schema
+-- !query schema
 struct<>
--- !query 387 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 388
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(20, 0)) FROM t
--- !query 388 schema
+-- !query schema
 struct<>
--- !query 388 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 389
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(3, 0)) FROM t
--- !query 389 schema
+-- !query schema
 struct<>
--- !query 389 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 390
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(5, 0)) FROM t
--- !query 390 schema
+-- !query schema
 struct<>
--- !query 390 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 391
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(10, 0)) FROM t
--- !query 391 schema
+-- !query schema
 struct<>
--- !query 391 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 392
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(20, 0)) FROM t
--- !query 392 schema
+-- !query schema
 struct<>
--- !query 392 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 393
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast(1 as tinyint) FROM t
--- !query 393 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) % CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(3,0)>
--- !query 393 output
+-- !query output
 0
 
 
--- !query 394
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast(1 as tinyint) FROM t
--- !query 394 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) % CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(3,0)>
--- !query 394 output
+-- !query output
 0
 
 
--- !query 395
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as tinyint) FROM t
--- !query 395 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 395 output
+-- !query output
 0
 
 
--- !query 396
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast(1 as tinyint) FROM t
--- !query 396 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
--- !query 396 output
+-- !query output
 0
 
 
--- !query 397
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast(1 as smallint) FROM t
--- !query 397 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) % CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(3,0)>
--- !query 397 output
+-- !query output
 0
 
 
--- !query 398
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast(1 as smallint) FROM t
--- !query 398 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) % CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(5,0)>
--- !query 398 output
+-- !query output
 0
 
 
--- !query 399
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as smallint) FROM t
--- !query 399 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) % CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 399 output
+-- !query output
 0
 
 
--- !query 400
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast(1 as smallint) FROM t
--- !query 400 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(5,0)>
--- !query 400 output
+-- !query output
 0
 
 
--- !query 401
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast(1 as int) FROM t
--- !query 401 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) % CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 401 output
+-- !query output
 0
 
 
--- !query 402
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast(1 as int) FROM t
--- !query 402 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) % CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 402 output
+-- !query output
 0
 
 
--- !query 403
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as int) FROM t
--- !query 403 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) % CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(10,0)>
--- !query 403 output
+-- !query output
 0
 
 
--- !query 404
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast(1 as int) FROM t
--- !query 404 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 404 output
+-- !query output
 0
 
 
--- !query 405
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast(1 as bigint) FROM t
--- !query 405 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(3,0)>
--- !query 405 output
+-- !query output
 0
 
 
--- !query 406
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast(1 as bigint) FROM t
--- !query 406 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(5,0)>
--- !query 406 output
+-- !query output
 0
 
 
--- !query 407
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as bigint) FROM t
--- !query 407 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) % CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 407 output
+-- !query output
 0
 
 
--- !query 408
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast(1 as bigint) FROM t
--- !query 408 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) % CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(20,0)>
--- !query 408 output
+-- !query output
 0
 
 
--- !query 409
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast(1 as float) FROM t
--- !query 409 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 409 output
+-- !query output
 0.0
 
 
--- !query 410
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast(1 as float) FROM t
--- !query 410 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 410 output
+-- !query output
 0.0
 
 
--- !query 411
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as float) FROM t
--- !query 411 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 411 output
+-- !query output
 0.0
 
 
--- !query 412
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast(1 as float) FROM t
--- !query 412 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 412 output
+-- !query output
 0.0
 
 
--- !query 413
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast(1 as double) FROM t
--- !query 413 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 413 output
+-- !query output
 0.0
 
 
--- !query 414
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast(1 as double) FROM t
--- !query 414 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 414 output
+-- !query output
 0.0
 
 
--- !query 415
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as double) FROM t
--- !query 415 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 415 output
+-- !query output
 0.0
 
 
--- !query 416
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast(1 as double) FROM t
--- !query 416 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 416 output
+-- !query output
 0.0
 
 
--- !query 417
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast(1 as decimal(10, 0)) FROM t
--- !query 417 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 417 output
+-- !query output
 0
 
 
--- !query 418
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast(1 as decimal(10, 0)) FROM t
--- !query 418 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 418 output
+-- !query output
 0
 
 
--- !query 419
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as decimal(10, 0)) FROM t
--- !query 419 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
--- !query 419 output
+-- !query output
 0
 
 
--- !query 420
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast(1 as decimal(10, 0)) FROM t
--- !query 420 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) % CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 420 output
+-- !query output
 0
 
 
--- !query 421
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast(1 as string) FROM t
--- !query 421 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) % CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 421 output
+-- !query output
 0.0
 
 
--- !query 422
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast(1 as string) FROM t
--- !query 422 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) % CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 422 output
+-- !query output
 0.0
 
 
--- !query 423
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as string) FROM t
--- !query 423 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) % CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 423 output
+-- !query output
 0.0
 
 
--- !query 424
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast(1 as string) FROM t
--- !query 424 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) % CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 424 output
+-- !query output
 0.0
 
 
--- !query 425
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast('1' as binary) FROM t
--- !query 425 schema
+-- !query schema
 struct<>
--- !query 425 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 426
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast('1' as binary) FROM t
--- !query 426 schema
+-- !query schema
 struct<>
--- !query 426 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 427
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast('1' as binary) FROM t
--- !query 427 schema
+-- !query schema
 struct<>
--- !query 427 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 428
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast('1' as binary) FROM t
--- !query 428 schema
+-- !query schema
 struct<>
--- !query 428 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 429
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast(1 as boolean) FROM t
--- !query 429 schema
+-- !query schema
 struct<>
--- !query 429 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 430
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast(1 as boolean) FROM t
--- !query 430 schema
+-- !query schema
 struct<>
--- !query 430 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 431
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast(1 as boolean) FROM t
--- !query 431 schema
+-- !query schema
 struct<>
--- !query 431 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 432
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast(1 as boolean) FROM t
--- !query 432 schema
+-- !query schema
 struct<>
--- !query 432 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 433
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 433 schema
+-- !query schema
 struct<>
--- !query 433 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 434
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 434 schema
+-- !query schema
 struct<>
--- !query 434 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 435
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 435 schema
+-- !query schema
 struct<>
--- !query 435 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 436
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 436 schema
+-- !query schema
 struct<>
--- !query 436 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 437
+-- !query
 SELECT cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00' as date) FROM t
--- !query 437 schema
+-- !query schema
 struct<>
--- !query 437 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 438
+-- !query
 SELECT cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00' as date) FROM t
--- !query 438 schema
+-- !query schema
 struct<>
--- !query 438 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 439
+-- !query
 SELECT cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00' as date) FROM t
--- !query 439 schema
+-- !query schema
 struct<>
--- !query 439 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 440
+-- !query
 SELECT cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00' as date) FROM t
--- !query 440 schema
+-- !query schema
 struct<>
--- !query 440 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 441
+-- !query
 SELECT pmod(cast(1 as tinyint), cast(1 as decimal(3, 0))) FROM t
--- !query 441 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)), CAST(1 AS DECIMAL(3,0))):decimal(3,0)>
--- !query 441 output
+-- !query output
 0
 
 
--- !query 442
+-- !query
 SELECT pmod(cast(1 as tinyint), cast(1 as decimal(5, 0))) FROM t
--- !query 442 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)), CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(3,0)>
--- !query 442 output
+-- !query output
 0
 
 
--- !query 443
+-- !query
 SELECT pmod(cast(1 as tinyint), cast(1 as decimal(10, 0))) FROM t
--- !query 443 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 443 output
+-- !query output
 0
 
 
--- !query 444
+-- !query
 SELECT pmod(cast(1 as tinyint), cast(1 as decimal(20, 0))) FROM t
--- !query 444 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(3,0)>
--- !query 444 output
+-- !query output
 0
 
 
--- !query 445
+-- !query
 SELECT pmod(cast(1 as smallint), cast(1 as decimal(3, 0))) FROM t
--- !query 445 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)), CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(3,0)>
--- !query 445 output
+-- !query output
 0
 
 
--- !query 446
+-- !query
 SELECT pmod(cast(1 as smallint), cast(1 as decimal(5, 0))) FROM t
--- !query 446 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)), CAST(1 AS DECIMAL(5,0))):decimal(5,0)>
--- !query 446 output
+-- !query output
 0
 
 
--- !query 447
+-- !query
 SELECT pmod(cast(1 as smallint), cast(1 as decimal(10, 0))) FROM t
--- !query 447 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 447 output
+-- !query output
 0
 
 
--- !query 448
+-- !query
 SELECT pmod(cast(1 as smallint), cast(1 as decimal(20, 0))) FROM t
--- !query 448 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(5,0)>
--- !query 448 output
+-- !query output
 0
 
 
--- !query 449
+-- !query
 SELECT pmod(cast(1 as int), cast(1 as decimal(3, 0))) FROM t
--- !query 449 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 449 output
+-- !query output
 0
 
 
--- !query 450
+-- !query
 SELECT pmod(cast(1 as int), cast(1 as decimal(5, 0))) FROM t
--- !query 450 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 450 output
+-- !query output
 0
 
 
--- !query 451
+-- !query
 SELECT pmod(cast(1 as int), cast(1 as decimal(10, 0))) FROM t
--- !query 451 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS INT) AS DECIMAL(10,0)), CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
--- !query 451 output
+-- !query output
 0
 
 
--- !query 452
+-- !query
 SELECT pmod(cast(1 as int), cast(1 as decimal(20, 0))) FROM t
--- !query 452 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 452 output
+-- !query output
 0
 
 
--- !query 453
+-- !query
 SELECT pmod(cast(1 as bigint), cast(1 as decimal(3, 0))) FROM t
--- !query 453 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
--- !query 453 output
+-- !query output
 0
 
 
--- !query 454
+-- !query
 SELECT pmod(cast(1 as bigint), cast(1 as decimal(5, 0))) FROM t
--- !query 454 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(5,0)>
--- !query 454 output
+-- !query output
 0
 
 
--- !query 455
+-- !query
 SELECT pmod(cast(1 as bigint), cast(1 as decimal(10, 0))) FROM t
--- !query 455 schema
+-- !query schema
 struct<pmod(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 455 output
+-- !query output
 0
 
 
--- !query 456
+-- !query
 SELECT pmod(cast(1 as bigint), cast(1 as decimal(20, 0))) FROM t
--- !query 456 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)), CAST(1 AS DECIMAL(20,0))):decimal(20,0)>
--- !query 456 output
+-- !query output
 0
 
 
--- !query 457
+-- !query
 SELECT pmod(cast(1 as float), cast(1 as decimal(3, 0))) FROM t
--- !query 457 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 457 output
+-- !query output
 0.0
 
 
--- !query 458
+-- !query
 SELECT pmod(cast(1 as float), cast(1 as decimal(5, 0))) FROM t
--- !query 458 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 458 output
+-- !query output
 0.0
 
 
--- !query 459
+-- !query
 SELECT pmod(cast(1 as float), cast(1 as decimal(10, 0))) FROM t
--- !query 459 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 459 output
+-- !query output
 0.0
 
 
--- !query 460
+-- !query
 SELECT pmod(cast(1 as float), cast(1 as decimal(20, 0))) FROM t
--- !query 460 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 460 output
+-- !query output
 0.0
 
 
--- !query 461
+-- !query
 SELECT pmod(cast(1 as double), cast(1 as decimal(3, 0))) FROM t
--- !query 461 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double>
--- !query 461 output
+-- !query output
 0.0
 
 
--- !query 462
+-- !query
 SELECT pmod(cast(1 as double), cast(1 as decimal(5, 0))) FROM t
--- !query 462 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):double>
--- !query 462 output
+-- !query output
 0.0
 
 
--- !query 463
+-- !query
 SELECT pmod(cast(1 as double), cast(1 as decimal(10, 0))) FROM t
--- !query 463 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 463 output
+-- !query output
 0.0
 
 
--- !query 464
+-- !query
 SELECT pmod(cast(1 as double), cast(1 as decimal(20, 0))) FROM t
--- !query 464 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):double>
--- !query 464 output
+-- !query output
 0.0
 
 
--- !query 465
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(3, 0))) FROM t
--- !query 465 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 465 output
+-- !query output
 0
 
 
--- !query 466
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(5, 0))) FROM t
--- !query 466 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 466 output
+-- !query output
 0
 
 
--- !query 467
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(10, 0))) FROM t
--- !query 467 schema
+-- !query schema
 struct<pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
--- !query 467 output
+-- !query output
 0
 
 
--- !query 468
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(20, 0))) FROM t
--- !query 468 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 468 output
+-- !query output
 0
 
 
--- !query 469
+-- !query
 SELECT pmod(cast('1' as binary), cast(1 as decimal(3, 0))) FROM t
--- !query 469 schema
+-- !query schema
 struct<>
--- !query 469 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 470
+-- !query
 SELECT pmod(cast('1' as binary), cast(1 as decimal(5, 0))) FROM t
--- !query 470 schema
+-- !query schema
 struct<>
--- !query 470 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 471
+-- !query
 SELECT pmod(cast('1' as binary), cast(1 as decimal(10, 0))) FROM t
--- !query 471 schema
+-- !query schema
 struct<>
--- !query 471 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 472
+-- !query
 SELECT pmod(cast('1' as binary), cast(1 as decimal(20, 0))) FROM t
--- !query 472 schema
+-- !query schema
 struct<>
--- !query 472 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 473
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(3, 0))) FROM t
--- !query 473 schema
+-- !query schema
 struct<>
--- !query 473 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 474
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(5, 0))) FROM t
--- !query 474 schema
+-- !query schema
 struct<>
--- !query 474 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 475
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(10, 0))) FROM t
--- !query 475 schema
+-- !query schema
 struct<>
--- !query 475 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 476
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(20, 0))) FROM t
--- !query 476 schema
+-- !query schema
 struct<>
--- !query 476 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 477
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(3, 0))) FROM t
--- !query 477 schema
+-- !query schema
 struct<>
--- !query 477 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 478
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(5, 0))) FROM t
--- !query 478 schema
+-- !query schema
 struct<>
--- !query 478 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 479
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(10, 0))) FROM t
--- !query 479 schema
+-- !query schema
 struct<>
--- !query 479 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 480
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(20, 0))) FROM t
--- !query 480 schema
+-- !query schema
 struct<>
--- !query 480 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 481
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as tinyint)) FROM t
--- !query 481 schema
+-- !query schema
 struct<pmod(CAST(1 AS DECIMAL(3,0)), CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):decimal(3,0)>
--- !query 481 output
+-- !query output
 0
 
 
--- !query 482
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as tinyint)) FROM t
--- !query 482 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)), CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):decimal(3,0)>
--- !query 482 output
+-- !query output
 0
 
 
--- !query 483
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as tinyint)) FROM t
--- !query 483 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 483 output
+-- !query output
 0
 
 
--- !query 484
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as tinyint)) FROM t
--- !query 484 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):decimal(3,0)>
--- !query 484 output
+-- !query output
 0
 
 
--- !query 485
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as smallint)) FROM t
--- !query 485 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)), CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):decimal(3,0)>
--- !query 485 output
+-- !query output
 0
 
 
--- !query 486
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as smallint)) FROM t
--- !query 486 schema
+-- !query schema
 struct<pmod(CAST(1 AS DECIMAL(5,0)), CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):decimal(5,0)>
--- !query 486 output
+-- !query output
 0
 
 
--- !query 487
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as smallint)) FROM t
--- !query 487 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 487 output
+-- !query output
 0
 
 
--- !query 488
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as smallint)) FROM t
--- !query 488 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):decimal(5,0)>
--- !query 488 output
+-- !query output
 0
 
 
--- !query 489
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as int)) FROM t
--- !query 489 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)), CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 489 output
+-- !query output
 0
 
 
--- !query 490
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as int)) FROM t
--- !query 490 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)), CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 490 output
+-- !query output
 0
 
 
--- !query 491
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as int)) FROM t
--- !query 491 schema
+-- !query schema
 struct<pmod(CAST(1 AS DECIMAL(10,0)), CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(10,0)>
--- !query 491 output
+-- !query output
 0
 
 
--- !query 492
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as int)) FROM t
--- !query 492 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 492 output
+-- !query output
 0
 
 
--- !query 493
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as bigint)) FROM t
--- !query 493 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(3,0)>
--- !query 493 output
+-- !query output
 0
 
 
--- !query 494
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as bigint)) FROM t
--- !query 494 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(5,0)>
--- !query 494 output
+-- !query output
 0
 
 
--- !query 495
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as bigint)) FROM t
--- !query 495 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 495 output
+-- !query output
 0
 
 
--- !query 496
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as bigint)) FROM t
--- !query 496 schema
+-- !query schema
 struct<pmod(CAST(1 AS DECIMAL(20,0)), CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):decimal(20,0)>
--- !query 496 output
+-- !query output
 0
 
 
--- !query 497
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as float)) FROM t
--- !query 497 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 497 output
+-- !query output
 0.0
 
 
--- !query 498
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as float)) FROM t
--- !query 498 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 498 output
+-- !query output
 0.0
 
 
--- !query 499
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as float)) FROM t
--- !query 499 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 499 output
+-- !query output
 0.0
 
 
--- !query 500
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as float)) FROM t
--- !query 500 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 500 output
+-- !query output
 0.0
 
 
--- !query 501
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as double)) FROM t
--- !query 501 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 501 output
+-- !query output
 0.0
 
 
--- !query 502
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as double)) FROM t
--- !query 502 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 502 output
+-- !query output
 0.0
 
 
--- !query 503
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as double)) FROM t
--- !query 503 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 503 output
+-- !query output
 0.0
 
 
--- !query 504
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as double)) FROM t
--- !query 504 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 504 output
+-- !query output
 0.0
 
 
--- !query 505
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as decimal(10, 0))) FROM t
--- !query 505 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(3,0)>
--- !query 505 output
+-- !query output
 0
 
 
--- !query 506
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as decimal(10, 0))) FROM t
--- !query 506 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(5,0)>
--- !query 506 output
+-- !query output
 0
 
 
--- !query 507
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as decimal(10, 0))) FROM t
--- !query 507 schema
+-- !query schema
 struct<pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS DECIMAL(10,0))):decimal(10,0)>
--- !query 507 output
+-- !query output
 0
 
 
--- !query 508
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as decimal(10, 0))) FROM t
--- !query 508 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(10,0)>
--- !query 508 output
+-- !query output
 0
 
 
--- !query 509
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as string)) FROM t
--- !query 509 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 509 output
+-- !query output
 0.0
 
 
--- !query 510
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as string)) FROM t
--- !query 510 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 510 output
+-- !query output
 0.0
 
 
--- !query 511
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as string)) FROM t
--- !query 511 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 511 output
+-- !query output
 0.0
 
 
--- !query 512
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as string)) FROM t
--- !query 512 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE), CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 512 output
+-- !query output
 0.0
 
 
--- !query 513
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast('1' as binary)) FROM t
--- !query 513 schema
+-- !query schema
 struct<>
--- !query 513 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 514
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast('1' as binary)) FROM t
--- !query 514 schema
+-- !query schema
 struct<>
--- !query 514 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 515
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast('1' as binary)) FROM t
--- !query 515 schema
+-- !query schema
 struct<>
--- !query 515 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 516
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast('1' as binary)) FROM t
--- !query 516 schema
+-- !query schema
 struct<>
--- !query 516 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 517
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as boolean)) FROM t
--- !query 517 schema
+-- !query schema
 struct<>
--- !query 517 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 518
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as boolean)) FROM t
--- !query 518 schema
+-- !query schema
 struct<>
--- !query 518 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 519
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as boolean)) FROM t
--- !query 519 schema
+-- !query schema
 struct<>
--- !query 519 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 520
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as boolean)) FROM t
--- !query 520 schema
+-- !query schema
 struct<>
--- !query 520 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 521
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 521 schema
+-- !query schema
 struct<>
--- !query 521 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 522
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 522 schema
+-- !query schema
 struct<>
--- !query 522 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 523
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 523 schema
+-- !query schema
 struct<>
--- !query 523 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 524
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 524 schema
+-- !query schema
 struct<>
--- !query 524 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 525
+-- !query
 SELECT pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 525 schema
+-- !query schema
 struct<>
--- !query 525 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 526
+-- !query
 SELECT pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 526 schema
+-- !query schema
 struct<>
--- !query 526 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 527
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 527 schema
+-- !query schema
 struct<>
--- !query 527 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 528
+-- !query
 SELECT pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 528 schema
+-- !query schema
 struct<>
--- !query 528 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 529
+-- !query
 SELECT cast(1 as tinyint) = cast(1 as decimal(3, 0)) FROM t
--- !query 529 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) = CAST(1 AS DECIMAL(3,0))):boolean>
--- !query 529 output
+-- !query output
 true
 
 
--- !query 530
+-- !query
 SELECT cast(1 as tinyint) = cast(1 as decimal(5, 0)) FROM t
--- !query 530 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 530 output
+-- !query output
 true
 
 
--- !query 531
+-- !query
 SELECT cast(1 as tinyint) = cast(1 as decimal(10, 0)) FROM t
--- !query 531 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 531 output
+-- !query output
 true
 
 
--- !query 532
+-- !query
 SELECT cast(1 as tinyint) = cast(1 as decimal(20, 0)) FROM t
--- !query 532 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 532 output
+-- !query output
 true
 
 
--- !query 533
+-- !query
 SELECT cast(1 as smallint) = cast(1 as decimal(3, 0)) FROM t
--- !query 533 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 533 output
+-- !query output
 true
 
 
--- !query 534
+-- !query
 SELECT cast(1 as smallint) = cast(1 as decimal(5, 0)) FROM t
--- !query 534 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) = CAST(1 AS DECIMAL(5,0))):boolean>
--- !query 534 output
+-- !query output
 true
 
 
--- !query 535
+-- !query
 SELECT cast(1 as smallint) = cast(1 as decimal(10, 0)) FROM t
--- !query 535 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 535 output
+-- !query output
 true
 
 
--- !query 536
+-- !query
 SELECT cast(1 as smallint) = cast(1 as decimal(20, 0)) FROM t
--- !query 536 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 536 output
+-- !query output
 true
 
 
--- !query 537
+-- !query
 SELECT cast(1 as int) = cast(1 as decimal(3, 0)) FROM t
--- !query 537 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 537 output
+-- !query output
 true
 
 
--- !query 538
+-- !query
 SELECT cast(1 as int) = cast(1 as decimal(5, 0)) FROM t
--- !query 538 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 538 output
+-- !query output
 true
 
 
--- !query 539
+-- !query
 SELECT cast(1 as int) = cast(1 as decimal(10, 0)) FROM t
--- !query 539 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 539 output
+-- !query output
 true
 
 
--- !query 540
+-- !query
 SELECT cast(1 as int) = cast(1 as decimal(20, 0)) FROM t
--- !query 540 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 540 output
+-- !query output
 true
 
 
--- !query 541
+-- !query
 SELECT cast(1 as bigint) = cast(1 as decimal(3, 0)) FROM t
--- !query 541 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 541 output
+-- !query output
 true
 
 
--- !query 542
+-- !query
 SELECT cast(1 as bigint) = cast(1 as decimal(5, 0)) FROM t
--- !query 542 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 542 output
+-- !query output
 true
 
 
--- !query 543
+-- !query
 SELECT cast(1 as bigint) = cast(1 as decimal(10, 0)) FROM t
--- !query 543 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 543 output
+-- !query output
 true
 
 
--- !query 544
+-- !query
 SELECT cast(1 as bigint) = cast(1 as decimal(20, 0)) FROM t
--- !query 544 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) = CAST(1 AS DECIMAL(20,0))):boolean>
--- !query 544 output
+-- !query output
 true
 
 
--- !query 545
+-- !query
 SELECT cast(1 as float) = cast(1 as decimal(3, 0)) FROM t
--- !query 545 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 545 output
+-- !query output
 true
 
 
--- !query 546
+-- !query
 SELECT cast(1 as float) = cast(1 as decimal(5, 0)) FROM t
--- !query 546 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 546 output
+-- !query output
 true
 
 
--- !query 547
+-- !query
 SELECT cast(1 as float) = cast(1 as decimal(10, 0)) FROM t
--- !query 547 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 547 output
+-- !query output
 true
 
 
--- !query 548
+-- !query
 SELECT cast(1 as float) = cast(1 as decimal(20, 0)) FROM t
--- !query 548 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 548 output
+-- !query output
 true
 
 
--- !query 549
+-- !query
 SELECT cast(1 as double) = cast(1 as decimal(3, 0)) FROM t
--- !query 549 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 549 output
+-- !query output
 true
 
 
--- !query 550
+-- !query
 SELECT cast(1 as double) = cast(1 as decimal(5, 0)) FROM t
--- !query 550 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 550 output
+-- !query output
 true
 
 
--- !query 551
+-- !query
 SELECT cast(1 as double) = cast(1 as decimal(10, 0)) FROM t
--- !query 551 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 551 output
+-- !query output
 true
 
 
--- !query 552
+-- !query
 SELECT cast(1 as double) = cast(1 as decimal(20, 0)) FROM t
--- !query 552 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 552 output
+-- !query output
 true
 
 
--- !query 553
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(3, 0)) FROM t
--- !query 553 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 553 output
+-- !query output
 true
 
 
--- !query 554
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(5, 0)) FROM t
--- !query 554 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 554 output
+-- !query output
 true
 
 
--- !query 555
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(10, 0)) FROM t
--- !query 555 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 555 output
+-- !query output
 true
 
 
--- !query 556
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(20, 0)) FROM t
--- !query 556 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 556 output
+-- !query output
 true
 
 
--- !query 557
+-- !query
 SELECT cast('1' as binary) = cast(1 as decimal(3, 0)) FROM t
--- !query 557 schema
+-- !query schema
 struct<>
--- !query 557 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 558
+-- !query
 SELECT cast('1' as binary) = cast(1 as decimal(5, 0)) FROM t
--- !query 558 schema
+-- !query schema
 struct<>
--- !query 558 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 559
+-- !query
 SELECT cast('1' as binary) = cast(1 as decimal(10, 0)) FROM t
--- !query 559 schema
+-- !query schema
 struct<>
--- !query 559 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 560
+-- !query
 SELECT cast('1' as binary) = cast(1 as decimal(20, 0)) FROM t
--- !query 560 schema
+-- !query schema
 struct<>
--- !query 560 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 561
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(3, 0)) FROM t
--- !query 561 schema
+-- !query schema
 struct<>
--- !query 561 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 562
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(5, 0)) FROM t
--- !query 562 schema
+-- !query schema
 struct<>
--- !query 562 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 563
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(10, 0)) FROM t
--- !query 563 schema
+-- !query schema
 struct<>
--- !query 563 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 564
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(20, 0)) FROM t
--- !query 564 schema
+-- !query schema
 struct<>
--- !query 564 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 565
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(3, 0)) FROM t
--- !query 565 schema
+-- !query schema
 struct<>
--- !query 565 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 566
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(5, 0)) FROM t
--- !query 566 schema
+-- !query schema
 struct<>
--- !query 566 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 567
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(10, 0)) FROM t
--- !query 567 schema
+-- !query schema
 struct<>
--- !query 567 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 568
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(20, 0)) FROM t
--- !query 568 schema
+-- !query schema
 struct<>
--- !query 568 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 569
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as tinyint) FROM t
--- !query 569 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) = CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
--- !query 569 output
+-- !query output
 true
 
 
--- !query 570
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as tinyint) FROM t
--- !query 570 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 570 output
+-- !query output
 true
 
 
--- !query 571
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as tinyint) FROM t
--- !query 571 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 571 output
+-- !query output
 true
 
 
--- !query 572
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as tinyint) FROM t
--- !query 572 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 572 output
+-- !query output
 true
 
 
--- !query 573
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as smallint) FROM t
--- !query 573 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 573 output
+-- !query output
 true
 
 
--- !query 574
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as smallint) FROM t
--- !query 574 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) = CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
--- !query 574 output
+-- !query output
 true
 
 
--- !query 575
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as smallint) FROM t
--- !query 575 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 575 output
+-- !query output
 true
 
 
--- !query 576
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as smallint) FROM t
--- !query 576 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 576 output
+-- !query output
 true
 
 
--- !query 577
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as int) FROM t
--- !query 577 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 577 output
+-- !query output
 true
 
 
--- !query 578
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as int) FROM t
--- !query 578 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 578 output
+-- !query output
 true
 
 
--- !query 579
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as int) FROM t
--- !query 579 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) = CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
--- !query 579 output
+-- !query output
 true
 
 
--- !query 580
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as int) FROM t
--- !query 580 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 580 output
+-- !query output
 true
 
 
--- !query 581
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as bigint) FROM t
--- !query 581 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 581 output
+-- !query output
 true
 
 
--- !query 582
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as bigint) FROM t
--- !query 582 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 582 output
+-- !query output
 true
 
 
--- !query 583
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as bigint) FROM t
--- !query 583 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 583 output
+-- !query output
 true
 
 
--- !query 584
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as bigint) FROM t
--- !query 584 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) = CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
--- !query 584 output
+-- !query output
 true
 
 
--- !query 585
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as float) FROM t
--- !query 585 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 585 output
+-- !query output
 true
 
 
--- !query 586
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as float) FROM t
--- !query 586 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 586 output
+-- !query output
 true
 
 
--- !query 587
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as float) FROM t
--- !query 587 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 587 output
+-- !query output
 true
 
 
--- !query 588
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as float) FROM t
--- !query 588 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 588 output
+-- !query output
 true
 
 
--- !query 589
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as double) FROM t
--- !query 589 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 589 output
+-- !query output
 true
 
 
--- !query 590
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as double) FROM t
--- !query 590 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 590 output
+-- !query output
 true
 
 
--- !query 591
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as double) FROM t
--- !query 591 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 591 output
+-- !query output
 true
 
 
--- !query 592
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as double) FROM t
--- !query 592 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 592 output
+-- !query output
 true
 
 
--- !query 593
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as decimal(10, 0)) FROM t
--- !query 593 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 593 output
+-- !query output
 true
 
 
--- !query 594
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as decimal(10, 0)) FROM t
--- !query 594 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 594 output
+-- !query output
 true
 
 
--- !query 595
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as decimal(10, 0)) FROM t
--- !query 595 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 595 output
+-- !query output
 true
 
 
--- !query 596
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as decimal(10, 0)) FROM t
--- !query 596 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 596 output
+-- !query output
 true
 
 
--- !query 597
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as string) FROM t
--- !query 597 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 597 output
+-- !query output
 true
 
 
--- !query 598
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as string) FROM t
--- !query 598 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 598 output
+-- !query output
 true
 
 
--- !query 599
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as string) FROM t
--- !query 599 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 599 output
+-- !query output
 true
 
 
--- !query 600
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as string) FROM t
--- !query 600 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 600 output
+-- !query output
 true
 
 
--- !query 601
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast('1' as binary) FROM t
--- !query 601 schema
+-- !query schema
 struct<>
--- !query 601 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 602
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast('1' as binary) FROM t
--- !query 602 schema
+-- !query schema
 struct<>
--- !query 602 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 603
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast('1' as binary) FROM t
--- !query 603 schema
+-- !query schema
 struct<>
--- !query 603 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 604
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast('1' as binary) FROM t
--- !query 604 schema
+-- !query schema
 struct<>
--- !query 604 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 605
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as boolean) FROM t
--- !query 605 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(3,0))):boolean>
--- !query 605 output
+-- !query output
 true
 
 
--- !query 606
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as boolean) FROM t
--- !query 606 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(5,0))):boolean>
--- !query 606 output
+-- !query output
 true
 
 
--- !query 607
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as boolean) FROM t
--- !query 607 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(10,0))):boolean>
--- !query 607 output
+-- !query output
 true
 
 
--- !query 608
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as boolean) FROM t
--- !query 608 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(20,0))):boolean>
--- !query 608 output
+-- !query output
 true
 
 
--- !query 609
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 609 schema
+-- !query schema
 struct<>
--- !query 609 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 610
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 610 schema
+-- !query schema
 struct<>
--- !query 610 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 611
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 611 schema
+-- !query schema
 struct<>
--- !query 611 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 612
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 612 schema
+-- !query schema
 struct<>
--- !query 612 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 613
+-- !query
 SELECT cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00' as date) FROM t
--- !query 613 schema
+-- !query schema
 struct<>
--- !query 613 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 614
+-- !query
 SELECT cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00' as date) FROM t
--- !query 614 schema
+-- !query schema
 struct<>
--- !query 614 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 615
+-- !query
 SELECT cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00' as date) FROM t
--- !query 615 schema
+-- !query schema
 struct<>
--- !query 615 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 616
+-- !query
 SELECT cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00' as date) FROM t
--- !query 616 schema
+-- !query schema
 struct<>
--- !query 616 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 617
+-- !query
 SELECT cast(1 as tinyint) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 617 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) <=> CAST(1 AS DECIMAL(3,0))):boolean>
--- !query 617 output
+-- !query output
 true
 
 
--- !query 618
+-- !query
 SELECT cast(1 as tinyint) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 618 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 618 output
+-- !query output
 true
 
 
--- !query 619
+-- !query
 SELECT cast(1 as tinyint) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 619 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 619 output
+-- !query output
 true
 
 
--- !query 620
+-- !query
 SELECT cast(1 as tinyint) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 620 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 620 output
+-- !query output
 true
 
 
--- !query 621
+-- !query
 SELECT cast(1 as smallint) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 621 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 621 output
+-- !query output
 true
 
 
--- !query 622
+-- !query
 SELECT cast(1 as smallint) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 622 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) <=> CAST(1 AS DECIMAL(5,0))):boolean>
--- !query 622 output
+-- !query output
 true
 
 
--- !query 623
+-- !query
 SELECT cast(1 as smallint) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 623 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 623 output
+-- !query output
 true
 
 
--- !query 624
+-- !query
 SELECT cast(1 as smallint) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 624 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 624 output
+-- !query output
 true
 
 
--- !query 625
+-- !query
 SELECT cast(1 as int) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 625 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 625 output
+-- !query output
 true
 
 
--- !query 626
+-- !query
 SELECT cast(1 as int) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 626 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 626 output
+-- !query output
 true
 
 
--- !query 627
+-- !query
 SELECT cast(1 as int) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 627 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) <=> CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 627 output
+-- !query output
 true
 
 
--- !query 628
+-- !query
 SELECT cast(1 as int) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 628 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 628 output
+-- !query output
 true
 
 
--- !query 629
+-- !query
 SELECT cast(1 as bigint) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 629 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 629 output
+-- !query output
 true
 
 
--- !query 630
+-- !query
 SELECT cast(1 as bigint) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 630 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 630 output
+-- !query output
 true
 
 
--- !query 631
+-- !query
 SELECT cast(1 as bigint) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 631 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 631 output
+-- !query output
 true
 
 
--- !query 632
+-- !query
 SELECT cast(1 as bigint) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 632 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) <=> CAST(1 AS DECIMAL(20,0))):boolean>
--- !query 632 output
+-- !query output
 true
 
 
--- !query 633
+-- !query
 SELECT cast(1 as float) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 633 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 633 output
+-- !query output
 true
 
 
--- !query 634
+-- !query
 SELECT cast(1 as float) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 634 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 634 output
+-- !query output
 true
 
 
--- !query 635
+-- !query
 SELECT cast(1 as float) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 635 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 635 output
+-- !query output
 true
 
 
--- !query 636
+-- !query
 SELECT cast(1 as float) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 636 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 636 output
+-- !query output
 true
 
 
--- !query 637
+-- !query
 SELECT cast(1 as double) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 637 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 637 output
+-- !query output
 true
 
 
--- !query 638
+-- !query
 SELECT cast(1 as double) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 638 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 638 output
+-- !query output
 true
 
 
--- !query 639
+-- !query
 SELECT cast(1 as double) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 639 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 639 output
+-- !query output
 true
 
 
--- !query 640
+-- !query
 SELECT cast(1 as double) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 640 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 640 output
+-- !query output
 true
 
 
--- !query 641
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 641 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 641 output
+-- !query output
 true
 
 
--- !query 642
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 642 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 642 output
+-- !query output
 true
 
 
--- !query 643
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 643 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 643 output
+-- !query output
 true
 
 
--- !query 644
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 644 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 644 output
+-- !query output
 true
 
 
--- !query 645
+-- !query
 SELECT cast('1' as binary) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 645 schema
+-- !query schema
 struct<>
--- !query 645 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 646
+-- !query
 SELECT cast('1' as binary) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 646 schema
+-- !query schema
 struct<>
--- !query 646 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 647
+-- !query
 SELECT cast('1' as binary) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 647 schema
+-- !query schema
 struct<>
--- !query 647 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 648
+-- !query
 SELECT cast('1' as binary) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 648 schema
+-- !query schema
 struct<>
--- !query 648 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 649
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 649 schema
+-- !query schema
 struct<>
--- !query 649 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 650
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 650 schema
+-- !query schema
 struct<>
--- !query 650 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 651
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 651 schema
+-- !query schema
 struct<>
--- !query 651 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 652
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 652 schema
+-- !query schema
 struct<>
--- !query 652 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 653
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(3, 0)) FROM t
--- !query 653 schema
+-- !query schema
 struct<>
--- !query 653 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 654
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(5, 0)) FROM t
--- !query 654 schema
+-- !query schema
 struct<>
--- !query 654 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 655
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 655 schema
+-- !query schema
 struct<>
--- !query 655 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 656
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(20, 0)) FROM t
--- !query 656 schema
+-- !query schema
 struct<>
--- !query 656 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 657
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as tinyint) FROM t
--- !query 657 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) <=> CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
--- !query 657 output
+-- !query output
 true
 
 
--- !query 658
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as tinyint) FROM t
--- !query 658 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) <=> CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 658 output
+-- !query output
 true
 
 
--- !query 659
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as tinyint) FROM t
--- !query 659 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 659 output
+-- !query output
 true
 
 
--- !query 660
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as tinyint) FROM t
--- !query 660 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 660 output
+-- !query output
 true
 
 
--- !query 661
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as smallint) FROM t
--- !query 661 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) <=> CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 661 output
+-- !query output
 true
 
 
--- !query 662
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as smallint) FROM t
--- !query 662 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) <=> CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
--- !query 662 output
+-- !query output
 true
 
 
--- !query 663
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as smallint) FROM t
--- !query 663 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <=> CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 663 output
+-- !query output
 true
 
 
--- !query 664
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as smallint) FROM t
--- !query 664 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 664 output
+-- !query output
 true
 
 
--- !query 665
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as int) FROM t
--- !query 665 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) <=> CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 665 output
+-- !query output
 true
 
 
--- !query 666
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as int) FROM t
--- !query 666 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) <=> CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 666 output
+-- !query output
 true
 
 
--- !query 667
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as int) FROM t
--- !query 667 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
--- !query 667 output
+-- !query output
 true
 
 
--- !query 668
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as int) FROM t
--- !query 668 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 668 output
+-- !query output
 true
 
 
--- !query 669
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as bigint) FROM t
--- !query 669 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 669 output
+-- !query output
 true
 
 
--- !query 670
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as bigint) FROM t
--- !query 670 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 670 output
+-- !query output
 true
 
 
--- !query 671
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as bigint) FROM t
--- !query 671 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) <=> CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 671 output
+-- !query output
 true
 
 
--- !query 672
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as bigint) FROM t
--- !query 672 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) <=> CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
--- !query 672 output
+-- !query output
 true
 
 
--- !query 673
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as float) FROM t
--- !query 673 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <=> CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 673 output
+-- !query output
 true
 
 
--- !query 674
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as float) FROM t
--- !query 674 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <=> CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 674 output
+-- !query output
 true
 
 
--- !query 675
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as float) FROM t
--- !query 675 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <=> CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 675 output
+-- !query output
 true
 
 
--- !query 676
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as float) FROM t
--- !query 676 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <=> CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 676 output
+-- !query output
 true
 
 
--- !query 677
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as double) FROM t
--- !query 677 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
--- !query 677 output
+-- !query output
 true
 
 
--- !query 678
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as double) FROM t
--- !query 678 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
--- !query 678 output
+-- !query output
 true
 
 
--- !query 679
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as double) FROM t
--- !query 679 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
--- !query 679 output
+-- !query output
 true
 
 
--- !query 680
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as double) FROM t
--- !query 680 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
--- !query 680 output
+-- !query output
 true
 
 
--- !query 681
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as decimal(10, 0)) FROM t
--- !query 681 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 681 output
+-- !query output
 true
 
 
--- !query 682
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as decimal(10, 0)) FROM t
--- !query 682 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 682 output
+-- !query output
 true
 
 
--- !query 683
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 683 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 683 output
+-- !query output
 true
 
 
--- !query 684
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as decimal(10, 0)) FROM t
--- !query 684 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 684 output
+-- !query output
 true
 
 
--- !query 685
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as string) FROM t
--- !query 685 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <=> CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 685 output
+-- !query output
 true
 
 
--- !query 686
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as string) FROM t
--- !query 686 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <=> CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 686 output
+-- !query output
 true
 
 
--- !query 687
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as string) FROM t
--- !query 687 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <=> CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 687 output
+-- !query output
 true
 
 
--- !query 688
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as string) FROM t
--- !query 688 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <=> CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 688 output
+-- !query output
 true
 
 
--- !query 689
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast('1' as binary) FROM t
--- !query 689 schema
+-- !query schema
 struct<>
--- !query 689 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <=> CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 690
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast('1' as binary) FROM t
--- !query 690 schema
+-- !query schema
 struct<>
--- !query 690 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <=> CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 691
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast('1' as binary) FROM t
--- !query 691 schema
+-- !query schema
 struct<>
--- !query 691 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <=> CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 692
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast('1' as binary) FROM t
--- !query 692 schema
+-- !query schema
 struct<>
--- !query 692 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <=> CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 693
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as boolean) FROM t
--- !query 693 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) <=> CAST(CAST(1 AS BOOLEAN) AS DECIMAL(3,0))):boolean>
--- !query 693 output
+-- !query output
 true
 
 
--- !query 694
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as boolean) FROM t
--- !query 694 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) <=> CAST(CAST(1 AS BOOLEAN) AS DECIMAL(5,0))):boolean>
--- !query 694 output
+-- !query output
 true
 
 
--- !query 695
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as boolean) FROM t
--- !query 695 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(CAST(1 AS BOOLEAN) AS DECIMAL(10,0))):boolean>
--- !query 695 output
+-- !query output
 true
 
 
--- !query 696
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as boolean) FROM t
--- !query 696 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) <=> CAST(CAST(1 AS BOOLEAN) AS DECIMAL(20,0))):boolean>
--- !query 696 output
+-- !query output
 true
 
 
--- !query 697
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 697 schema
+-- !query schema
 struct<>
--- !query 697 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 698
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 698 schema
+-- !query schema
 struct<>
--- !query 698 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 699
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 699 schema
+-- !query schema
 struct<>
--- !query 699 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 700
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 700 schema
+-- !query schema
 struct<>
--- !query 700 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 701
+-- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 701 schema
+-- !query schema
 struct<>
--- !query 701 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 702
+-- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 702 schema
+-- !query schema
 struct<>
--- !query 702 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 703
+-- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 703 schema
+-- !query schema
 struct<>
--- !query 703 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 704
+-- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 704 schema
+-- !query schema
 struct<>
--- !query 704 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 705
+-- !query
 SELECT cast(1 as tinyint) < cast(1 as decimal(3, 0)) FROM t
--- !query 705 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) < CAST(1 AS DECIMAL(3,0))):boolean>
--- !query 705 output
+-- !query output
 false
 
 
--- !query 706
+-- !query
 SELECT cast(1 as tinyint) < cast(1 as decimal(5, 0)) FROM t
--- !query 706 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) < CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 706 output
+-- !query output
 false
 
 
--- !query 707
+-- !query
 SELECT cast(1 as tinyint) < cast(1 as decimal(10, 0)) FROM t
--- !query 707 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 707 output
+-- !query output
 false
 
 
--- !query 708
+-- !query
 SELECT cast(1 as tinyint) < cast(1 as decimal(20, 0)) FROM t
--- !query 708 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 708 output
+-- !query output
 false
 
 
--- !query 709
+-- !query
 SELECT cast(1 as smallint) < cast(1 as decimal(3, 0)) FROM t
--- !query 709 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) < CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 709 output
+-- !query output
 false
 
 
--- !query 710
+-- !query
 SELECT cast(1 as smallint) < cast(1 as decimal(5, 0)) FROM t
--- !query 710 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) < CAST(1 AS DECIMAL(5,0))):boolean>
--- !query 710 output
+-- !query output
 false
 
 
--- !query 711
+-- !query
 SELECT cast(1 as smallint) < cast(1 as decimal(10, 0)) FROM t
--- !query 711 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 711 output
+-- !query output
 false
 
 
--- !query 712
+-- !query
 SELECT cast(1 as smallint) < cast(1 as decimal(20, 0)) FROM t
--- !query 712 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 712 output
+-- !query output
 false
 
 
--- !query 713
+-- !query
 SELECT cast(1 as int) < cast(1 as decimal(3, 0)) FROM t
--- !query 713 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 713 output
+-- !query output
 false
 
 
--- !query 714
+-- !query
 SELECT cast(1 as int) < cast(1 as decimal(5, 0)) FROM t
--- !query 714 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 714 output
+-- !query output
 false
 
 
--- !query 715
+-- !query
 SELECT cast(1 as int) < cast(1 as decimal(10, 0)) FROM t
--- !query 715 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) < CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 715 output
+-- !query output
 false
 
 
--- !query 716
+-- !query
 SELECT cast(1 as int) < cast(1 as decimal(20, 0)) FROM t
--- !query 716 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 716 output
+-- !query output
 false
 
 
--- !query 717
+-- !query
 SELECT cast(1 as bigint) < cast(1 as decimal(3, 0)) FROM t
--- !query 717 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 717 output
+-- !query output
 false
 
 
--- !query 718
+-- !query
 SELECT cast(1 as bigint) < cast(1 as decimal(5, 0)) FROM t
--- !query 718 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 718 output
+-- !query output
 false
 
 
--- !query 719
+-- !query
 SELECT cast(1 as bigint) < cast(1 as decimal(10, 0)) FROM t
--- !query 719 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 719 output
+-- !query output
 false
 
 
--- !query 720
+-- !query
 SELECT cast(1 as bigint) < cast(1 as decimal(20, 0)) FROM t
--- !query 720 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) < CAST(1 AS DECIMAL(20,0))):boolean>
--- !query 720 output
+-- !query output
 false
 
 
--- !query 721
+-- !query
 SELECT cast(1 as float) < cast(1 as decimal(3, 0)) FROM t
--- !query 721 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 721 output
+-- !query output
 false
 
 
--- !query 722
+-- !query
 SELECT cast(1 as float) < cast(1 as decimal(5, 0)) FROM t
--- !query 722 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 722 output
+-- !query output
 false
 
 
--- !query 723
+-- !query
 SELECT cast(1 as float) < cast(1 as decimal(10, 0)) FROM t
--- !query 723 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 723 output
+-- !query output
 false
 
 
--- !query 724
+-- !query
 SELECT cast(1 as float) < cast(1 as decimal(20, 0)) FROM t
--- !query 724 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) < CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 724 output
+-- !query output
 false
 
 
--- !query 725
+-- !query
 SELECT cast(1 as double) < cast(1 as decimal(3, 0)) FROM t
--- !query 725 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 725 output
+-- !query output
 false
 
 
--- !query 726
+-- !query
 SELECT cast(1 as double) < cast(1 as decimal(5, 0)) FROM t
--- !query 726 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 726 output
+-- !query output
 false
 
 
--- !query 727
+-- !query
 SELECT cast(1 as double) < cast(1 as decimal(10, 0)) FROM t
--- !query 727 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 727 output
+-- !query output
 false
 
 
--- !query 728
+-- !query
 SELECT cast(1 as double) < cast(1 as decimal(20, 0)) FROM t
--- !query 728 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 728 output
+-- !query output
 false
 
 
--- !query 729
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(3, 0)) FROM t
--- !query 729 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 729 output
+-- !query output
 false
 
 
--- !query 730
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(5, 0)) FROM t
--- !query 730 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 730 output
+-- !query output
 false
 
 
--- !query 731
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(10, 0)) FROM t
--- !query 731 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 731 output
+-- !query output
 false
 
 
--- !query 732
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(20, 0)) FROM t
--- !query 732 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 732 output
+-- !query output
 false
 
 
--- !query 733
+-- !query
 SELECT cast('1' as binary) < cast(1 as decimal(3, 0)) FROM t
--- !query 733 schema
+-- !query schema
 struct<>
--- !query 733 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 734
+-- !query
 SELECT cast('1' as binary) < cast(1 as decimal(5, 0)) FROM t
--- !query 734 schema
+-- !query schema
 struct<>
--- !query 734 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 735
+-- !query
 SELECT cast('1' as binary) < cast(1 as decimal(10, 0)) FROM t
--- !query 735 schema
+-- !query schema
 struct<>
--- !query 735 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 736
+-- !query
 SELECT cast('1' as binary) < cast(1 as decimal(20, 0)) FROM t
--- !query 736 schema
+-- !query schema
 struct<>
--- !query 736 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 737
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(3, 0)) FROM t
--- !query 737 schema
+-- !query schema
 struct<>
--- !query 737 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 738
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(5, 0)) FROM t
--- !query 738 schema
+-- !query schema
 struct<>
--- !query 738 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 739
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(10, 0)) FROM t
--- !query 739 schema
+-- !query schema
 struct<>
--- !query 739 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 740
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(20, 0)) FROM t
--- !query 740 schema
+-- !query schema
 struct<>
--- !query 740 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 741
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(3, 0)) FROM t
--- !query 741 schema
+-- !query schema
 struct<>
--- !query 741 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 742
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(5, 0)) FROM t
--- !query 742 schema
+-- !query schema
 struct<>
--- !query 742 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 743
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(10, 0)) FROM t
--- !query 743 schema
+-- !query schema
 struct<>
--- !query 743 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 744
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(20, 0)) FROM t
--- !query 744 schema
+-- !query schema
 struct<>
--- !query 744 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 745
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as tinyint) FROM t
--- !query 745 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) < CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
--- !query 745 output
+-- !query output
 false
 
 
--- !query 746
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as tinyint) FROM t
--- !query 746 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) < CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 746 output
+-- !query output
 false
 
 
--- !query 747
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as tinyint) FROM t
--- !query 747 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 747 output
+-- !query output
 false
 
 
--- !query 748
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as tinyint) FROM t
--- !query 748 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 748 output
+-- !query output
 false
 
 
--- !query 749
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as smallint) FROM t
--- !query 749 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) < CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 749 output
+-- !query output
 false
 
 
--- !query 750
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as smallint) FROM t
--- !query 750 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) < CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
--- !query 750 output
+-- !query output
 false
 
 
--- !query 751
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as smallint) FROM t
--- !query 751 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) < CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 751 output
+-- !query output
 false
 
 
--- !query 752
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as smallint) FROM t
--- !query 752 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 752 output
+-- !query output
 false
 
 
--- !query 753
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as int) FROM t
--- !query 753 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) < CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 753 output
+-- !query output
 false
 
 
--- !query 754
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as int) FROM t
--- !query 754 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) < CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 754 output
+-- !query output
 false
 
 
--- !query 755
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as int) FROM t
--- !query 755 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) < CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
--- !query 755 output
+-- !query output
 false
 
 
--- !query 756
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as int) FROM t
--- !query 756 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 756 output
+-- !query output
 false
 
 
--- !query 757
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as bigint) FROM t
--- !query 757 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 757 output
+-- !query output
 false
 
 
--- !query 758
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as bigint) FROM t
--- !query 758 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 758 output
+-- !query output
 false
 
 
--- !query 759
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as bigint) FROM t
--- !query 759 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) < CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 759 output
+-- !query output
 false
 
 
--- !query 760
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as bigint) FROM t
--- !query 760 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) < CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
--- !query 760 output
+-- !query output
 false
 
 
--- !query 761
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as float) FROM t
--- !query 761 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) < CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 761 output
+-- !query output
 false
 
 
--- !query 762
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as float) FROM t
--- !query 762 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) < CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 762 output
+-- !query output
 false
 
 
--- !query 763
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as float) FROM t
--- !query 763 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 763 output
+-- !query output
 false
 
 
--- !query 764
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as float) FROM t
--- !query 764 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) < CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 764 output
+-- !query output
 false
 
 
--- !query 765
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as double) FROM t
--- !query 765 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
--- !query 765 output
+-- !query output
 false
 
 
--- !query 766
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as double) FROM t
--- !query 766 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
--- !query 766 output
+-- !query output
 false
 
 
--- !query 767
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as double) FROM t
--- !query 767 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
--- !query 767 output
+-- !query output
 false
 
 
--- !query 768
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as double) FROM t
--- !query 768 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
--- !query 768 output
+-- !query output
 false
 
 
--- !query 769
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as decimal(10, 0)) FROM t
--- !query 769 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 769 output
+-- !query output
 false
 
 
--- !query 770
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as decimal(10, 0)) FROM t
--- !query 770 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 770 output
+-- !query output
 false
 
 
--- !query 771
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as decimal(10, 0)) FROM t
--- !query 771 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 771 output
+-- !query output
 false
 
 
--- !query 772
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as decimal(10, 0)) FROM t
--- !query 772 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) < CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 772 output
+-- !query output
 false
 
 
--- !query 773
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as string) FROM t
--- !query 773 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) < CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 773 output
+-- !query output
 false
 
 
--- !query 774
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as string) FROM t
--- !query 774 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) < CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 774 output
+-- !query output
 false
 
 
--- !query 775
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as string) FROM t
--- !query 775 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 775 output
+-- !query output
 false
 
 
--- !query 776
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as string) FROM t
--- !query 776 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) < CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 776 output
+-- !query output
 false
 
 
--- !query 777
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast('1' as binary) FROM t
--- !query 777 schema
+-- !query schema
 struct<>
--- !query 777 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 778
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast('1' as binary) FROM t
--- !query 778 schema
+-- !query schema
 struct<>
--- !query 778 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 779
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast('1' as binary) FROM t
--- !query 779 schema
+-- !query schema
 struct<>
--- !query 779 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 780
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast('1' as binary) FROM t
--- !query 780 schema
+-- !query schema
 struct<>
--- !query 780 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 781
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as boolean) FROM t
--- !query 781 schema
+-- !query schema
 struct<>
--- !query 781 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 782
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as boolean) FROM t
--- !query 782 schema
+-- !query schema
 struct<>
--- !query 782 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 783
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as boolean) FROM t
--- !query 783 schema
+-- !query schema
 struct<>
--- !query 783 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 784
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as boolean) FROM t
--- !query 784 schema
+-- !query schema
 struct<>
--- !query 784 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 785
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 785 schema
+-- !query schema
 struct<>
--- !query 785 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 786
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 786 schema
+-- !query schema
 struct<>
--- !query 786 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 787
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 787 schema
+-- !query schema
 struct<>
--- !query 787 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 788
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 788 schema
+-- !query schema
 struct<>
--- !query 788 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 789
+-- !query
 SELECT cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00' as date) FROM t
--- !query 789 schema
+-- !query schema
 struct<>
--- !query 789 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 790
+-- !query
 SELECT cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00' as date) FROM t
--- !query 790 schema
+-- !query schema
 struct<>
--- !query 790 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 791
+-- !query
 SELECT cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00' as date) FROM t
--- !query 791 schema
+-- !query schema
 struct<>
--- !query 791 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 792
+-- !query
 SELECT cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00' as date) FROM t
--- !query 792 schema
+-- !query schema
 struct<>
--- !query 792 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 793
+-- !query
 SELECT cast(1 as tinyint) <= cast(1 as decimal(3, 0)) FROM t
--- !query 793 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) <= CAST(1 AS DECIMAL(3,0))):boolean>
--- !query 793 output
+-- !query output
 true
 
 
--- !query 794
+-- !query
 SELECT cast(1 as tinyint) <= cast(1 as decimal(5, 0)) FROM t
--- !query 794 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 794 output
+-- !query output
 true
 
 
--- !query 795
+-- !query
 SELECT cast(1 as tinyint) <= cast(1 as decimal(10, 0)) FROM t
--- !query 795 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 795 output
+-- !query output
 true
 
 
--- !query 796
+-- !query
 SELECT cast(1 as tinyint) <= cast(1 as decimal(20, 0)) FROM t
--- !query 796 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 796 output
+-- !query output
 true
 
 
--- !query 797
+-- !query
 SELECT cast(1 as smallint) <= cast(1 as decimal(3, 0)) FROM t
--- !query 797 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 797 output
+-- !query output
 true
 
 
--- !query 798
+-- !query
 SELECT cast(1 as smallint) <= cast(1 as decimal(5, 0)) FROM t
--- !query 798 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) <= CAST(1 AS DECIMAL(5,0))):boolean>
--- !query 798 output
+-- !query output
 true
 
 
--- !query 799
+-- !query
 SELECT cast(1 as smallint) <= cast(1 as decimal(10, 0)) FROM t
--- !query 799 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 799 output
+-- !query output
 true
 
 
--- !query 800
+-- !query
 SELECT cast(1 as smallint) <= cast(1 as decimal(20, 0)) FROM t
--- !query 800 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 800 output
+-- !query output
 true
 
 
--- !query 801
+-- !query
 SELECT cast(1 as int) <= cast(1 as decimal(3, 0)) FROM t
--- !query 801 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 801 output
+-- !query output
 true
 
 
--- !query 802
+-- !query
 SELECT cast(1 as int) <= cast(1 as decimal(5, 0)) FROM t
--- !query 802 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 802 output
+-- !query output
 true
 
 
--- !query 803
+-- !query
 SELECT cast(1 as int) <= cast(1 as decimal(10, 0)) FROM t
--- !query 803 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) <= CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 803 output
+-- !query output
 true
 
 
--- !query 804
+-- !query
 SELECT cast(1 as int) <= cast(1 as decimal(20, 0)) FROM t
--- !query 804 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 804 output
+-- !query output
 true
 
 
--- !query 805
+-- !query
 SELECT cast(1 as bigint) <= cast(1 as decimal(3, 0)) FROM t
--- !query 805 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 805 output
+-- !query output
 true
 
 
--- !query 806
+-- !query
 SELECT cast(1 as bigint) <= cast(1 as decimal(5, 0)) FROM t
--- !query 806 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 806 output
+-- !query output
 true
 
 
--- !query 807
+-- !query
 SELECT cast(1 as bigint) <= cast(1 as decimal(10, 0)) FROM t
--- !query 807 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 807 output
+-- !query output
 true
 
 
--- !query 808
+-- !query
 SELECT cast(1 as bigint) <= cast(1 as decimal(20, 0)) FROM t
--- !query 808 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) <= CAST(1 AS DECIMAL(20,0))):boolean>
--- !query 808 output
+-- !query output
 true
 
 
--- !query 809
+-- !query
 SELECT cast(1 as float) <= cast(1 as decimal(3, 0)) FROM t
--- !query 809 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 809 output
+-- !query output
 true
 
 
--- !query 810
+-- !query
 SELECT cast(1 as float) <= cast(1 as decimal(5, 0)) FROM t
--- !query 810 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 810 output
+-- !query output
 true
 
 
--- !query 811
+-- !query
 SELECT cast(1 as float) <= cast(1 as decimal(10, 0)) FROM t
--- !query 811 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 811 output
+-- !query output
 true
 
 
--- !query 812
+-- !query
 SELECT cast(1 as float) <= cast(1 as decimal(20, 0)) FROM t
--- !query 812 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 812 output
+-- !query output
 true
 
 
--- !query 813
+-- !query
 SELECT cast(1 as double) <= cast(1 as decimal(3, 0)) FROM t
--- !query 813 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 813 output
+-- !query output
 true
 
 
--- !query 814
+-- !query
 SELECT cast(1 as double) <= cast(1 as decimal(5, 0)) FROM t
--- !query 814 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 814 output
+-- !query output
 true
 
 
--- !query 815
+-- !query
 SELECT cast(1 as double) <= cast(1 as decimal(10, 0)) FROM t
--- !query 815 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 815 output
+-- !query output
 true
 
 
--- !query 816
+-- !query
 SELECT cast(1 as double) <= cast(1 as decimal(20, 0)) FROM t
--- !query 816 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 816 output
+-- !query output
 true
 
 
--- !query 817
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(3, 0)) FROM t
--- !query 817 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 817 output
+-- !query output
 true
 
 
--- !query 818
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(5, 0)) FROM t
--- !query 818 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 818 output
+-- !query output
 true
 
 
--- !query 819
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(10, 0)) FROM t
--- !query 819 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 819 output
+-- !query output
 true
 
 
--- !query 820
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(20, 0)) FROM t
--- !query 820 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 820 output
+-- !query output
 true
 
 
--- !query 821
+-- !query
 SELECT cast('1' as binary) <= cast(1 as decimal(3, 0)) FROM t
--- !query 821 schema
+-- !query schema
 struct<>
--- !query 821 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 822
+-- !query
 SELECT cast('1' as binary) <= cast(1 as decimal(5, 0)) FROM t
--- !query 822 schema
+-- !query schema
 struct<>
--- !query 822 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 823
+-- !query
 SELECT cast('1' as binary) <= cast(1 as decimal(10, 0)) FROM t
--- !query 823 schema
+-- !query schema
 struct<>
--- !query 823 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 824
+-- !query
 SELECT cast('1' as binary) <= cast(1 as decimal(20, 0)) FROM t
--- !query 824 schema
+-- !query schema
 struct<>
--- !query 824 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 825
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(3, 0)) FROM t
--- !query 825 schema
+-- !query schema
 struct<>
--- !query 825 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 826
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(5, 0)) FROM t
--- !query 826 schema
+-- !query schema
 struct<>
--- !query 826 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 827
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(10, 0)) FROM t
--- !query 827 schema
+-- !query schema
 struct<>
--- !query 827 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 828
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(20, 0)) FROM t
--- !query 828 schema
+-- !query schema
 struct<>
--- !query 828 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 829
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(3, 0)) FROM t
--- !query 829 schema
+-- !query schema
 struct<>
--- !query 829 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 830
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(5, 0)) FROM t
--- !query 830 schema
+-- !query schema
 struct<>
--- !query 830 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 831
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(10, 0)) FROM t
--- !query 831 schema
+-- !query schema
 struct<>
--- !query 831 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 832
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(20, 0)) FROM t
--- !query 832 schema
+-- !query schema
 struct<>
--- !query 832 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 833
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as tinyint) FROM t
--- !query 833 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) <= CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
--- !query 833 output
+-- !query output
 true
 
 
--- !query 834
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as tinyint) FROM t
--- !query 834 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) <= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 834 output
+-- !query output
 true
 
 
--- !query 835
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as tinyint) FROM t
--- !query 835 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 835 output
+-- !query output
 true
 
 
--- !query 836
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as tinyint) FROM t
--- !query 836 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 836 output
+-- !query output
 true
 
 
--- !query 837
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as smallint) FROM t
--- !query 837 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) <= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 837 output
+-- !query output
 true
 
 
--- !query 838
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as smallint) FROM t
--- !query 838 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) <= CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
--- !query 838 output
+-- !query output
 true
 
 
--- !query 839
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as smallint) FROM t
--- !query 839 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) <= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 839 output
+-- !query output
 true
 
 
--- !query 840
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as smallint) FROM t
--- !query 840 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 840 output
+-- !query output
 true
 
 
--- !query 841
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as int) FROM t
--- !query 841 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) <= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 841 output
+-- !query output
 true
 
 
--- !query 842
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as int) FROM t
--- !query 842 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) <= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 842 output
+-- !query output
 true
 
 
--- !query 843
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as int) FROM t
--- !query 843 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) <= CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
--- !query 843 output
+-- !query output
 true
 
 
--- !query 844
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as int) FROM t
--- !query 844 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 844 output
+-- !query output
 true
 
 
--- !query 845
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as bigint) FROM t
--- !query 845 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 845 output
+-- !query output
 true
 
 
--- !query 846
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as bigint) FROM t
--- !query 846 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 846 output
+-- !query output
 true
 
 
--- !query 847
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as bigint) FROM t
--- !query 847 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) <= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 847 output
+-- !query output
 true
 
 
--- !query 848
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as bigint) FROM t
--- !query 848 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) <= CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
--- !query 848 output
+-- !query output
 true
 
 
--- !query 849
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as float) FROM t
--- !query 849 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 849 output
+-- !query output
 true
 
 
--- !query 850
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as float) FROM t
--- !query 850 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 850 output
+-- !query output
 true
 
 
--- !query 851
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as float) FROM t
--- !query 851 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 851 output
+-- !query output
 true
 
 
--- !query 852
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as float) FROM t
--- !query 852 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 852 output
+-- !query output
 true
 
 
--- !query 853
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as double) FROM t
--- !query 853 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
--- !query 853 output
+-- !query output
 true
 
 
--- !query 854
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as double) FROM t
--- !query 854 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
--- !query 854 output
+-- !query output
 true
 
 
--- !query 855
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as double) FROM t
--- !query 855 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
--- !query 855 output
+-- !query output
 true
 
 
--- !query 856
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as double) FROM t
--- !query 856 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
--- !query 856 output
+-- !query output
 true
 
 
--- !query 857
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as decimal(10, 0)) FROM t
--- !query 857 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 857 output
+-- !query output
 true
 
 
--- !query 858
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as decimal(10, 0)) FROM t
--- !query 858 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 858 output
+-- !query output
 true
 
 
--- !query 859
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as decimal(10, 0)) FROM t
--- !query 859 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 859 output
+-- !query output
 true
 
 
--- !query 860
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as decimal(10, 0)) FROM t
--- !query 860 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 860 output
+-- !query output
 true
 
 
--- !query 861
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as string) FROM t
--- !query 861 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) <= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 861 output
+-- !query output
 true
 
 
--- !query 862
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as string) FROM t
--- !query 862 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) <= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 862 output
+-- !query output
 true
 
 
--- !query 863
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as string) FROM t
--- !query 863 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 863 output
+-- !query output
 true
 
 
--- !query 864
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as string) FROM t
--- !query 864 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) <= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 864 output
+-- !query output
 true
 
 
--- !query 865
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast('1' as binary) FROM t
--- !query 865 schema
+-- !query schema
 struct<>
--- !query 865 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 866
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast('1' as binary) FROM t
--- !query 866 schema
+-- !query schema
 struct<>
--- !query 866 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 867
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast('1' as binary) FROM t
--- !query 867 schema
+-- !query schema
 struct<>
--- !query 867 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 868
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast('1' as binary) FROM t
--- !query 868 schema
+-- !query schema
 struct<>
--- !query 868 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 869
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as boolean) FROM t
--- !query 869 schema
+-- !query schema
 struct<>
--- !query 869 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 870
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as boolean) FROM t
--- !query 870 schema
+-- !query schema
 struct<>
--- !query 870 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 871
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as boolean) FROM t
--- !query 871 schema
+-- !query schema
 struct<>
--- !query 871 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 872
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as boolean) FROM t
--- !query 872 schema
+-- !query schema
 struct<>
--- !query 872 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 873
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 873 schema
+-- !query schema
 struct<>
--- !query 873 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 874
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 874 schema
+-- !query schema
 struct<>
--- !query 874 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 875
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 875 schema
+-- !query schema
 struct<>
--- !query 875 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 876
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 876 schema
+-- !query schema
 struct<>
--- !query 876 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 877
+-- !query
 SELECT cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00' as date) FROM t
--- !query 877 schema
+-- !query schema
 struct<>
--- !query 877 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 878
+-- !query
 SELECT cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00' as date) FROM t
--- !query 878 schema
+-- !query schema
 struct<>
--- !query 878 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 879
+-- !query
 SELECT cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00' as date) FROM t
--- !query 879 schema
+-- !query schema
 struct<>
--- !query 879 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 880
+-- !query
 SELECT cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00' as date) FROM t
--- !query 880 schema
+-- !query schema
 struct<>
--- !query 880 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 881
+-- !query
 SELECT cast(1 as tinyint) > cast(1 as decimal(3, 0)) FROM t
--- !query 881 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) > CAST(1 AS DECIMAL(3,0))):boolean>
--- !query 881 output
+-- !query output
 false
 
 
--- !query 882
+-- !query
 SELECT cast(1 as tinyint) > cast(1 as decimal(5, 0)) FROM t
--- !query 882 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) > CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 882 output
+-- !query output
 false
 
 
--- !query 883
+-- !query
 SELECT cast(1 as tinyint) > cast(1 as decimal(10, 0)) FROM t
--- !query 883 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 883 output
+-- !query output
 false
 
 
--- !query 884
+-- !query
 SELECT cast(1 as tinyint) > cast(1 as decimal(20, 0)) FROM t
--- !query 884 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 884 output
+-- !query output
 false
 
 
--- !query 885
+-- !query
 SELECT cast(1 as smallint) > cast(1 as decimal(3, 0)) FROM t
--- !query 885 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) > CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 885 output
+-- !query output
 false
 
 
--- !query 886
+-- !query
 SELECT cast(1 as smallint) > cast(1 as decimal(5, 0)) FROM t
--- !query 886 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) > CAST(1 AS DECIMAL(5,0))):boolean>
--- !query 886 output
+-- !query output
 false
 
 
--- !query 887
+-- !query
 SELECT cast(1 as smallint) > cast(1 as decimal(10, 0)) FROM t
--- !query 887 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 887 output
+-- !query output
 false
 
 
--- !query 888
+-- !query
 SELECT cast(1 as smallint) > cast(1 as decimal(20, 0)) FROM t
--- !query 888 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 888 output
+-- !query output
 false
 
 
--- !query 889
+-- !query
 SELECT cast(1 as int) > cast(1 as decimal(3, 0)) FROM t
--- !query 889 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 889 output
+-- !query output
 false
 
 
--- !query 890
+-- !query
 SELECT cast(1 as int) > cast(1 as decimal(5, 0)) FROM t
--- !query 890 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 890 output
+-- !query output
 false
 
 
--- !query 891
+-- !query
 SELECT cast(1 as int) > cast(1 as decimal(10, 0)) FROM t
--- !query 891 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) > CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 891 output
+-- !query output
 false
 
 
--- !query 892
+-- !query
 SELECT cast(1 as int) > cast(1 as decimal(20, 0)) FROM t
--- !query 892 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 892 output
+-- !query output
 false
 
 
--- !query 893
+-- !query
 SELECT cast(1 as bigint) > cast(1 as decimal(3, 0)) FROM t
--- !query 893 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 893 output
+-- !query output
 false
 
 
--- !query 894
+-- !query
 SELECT cast(1 as bigint) > cast(1 as decimal(5, 0)) FROM t
--- !query 894 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 894 output
+-- !query output
 false
 
 
--- !query 895
+-- !query
 SELECT cast(1 as bigint) > cast(1 as decimal(10, 0)) FROM t
--- !query 895 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 895 output
+-- !query output
 false
 
 
--- !query 896
+-- !query
 SELECT cast(1 as bigint) > cast(1 as decimal(20, 0)) FROM t
--- !query 896 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) > CAST(1 AS DECIMAL(20,0))):boolean>
--- !query 896 output
+-- !query output
 false
 
 
--- !query 897
+-- !query
 SELECT cast(1 as float) > cast(1 as decimal(3, 0)) FROM t
--- !query 897 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 897 output
+-- !query output
 false
 
 
--- !query 898
+-- !query
 SELECT cast(1 as float) > cast(1 as decimal(5, 0)) FROM t
--- !query 898 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 898 output
+-- !query output
 false
 
 
--- !query 899
+-- !query
 SELECT cast(1 as float) > cast(1 as decimal(10, 0)) FROM t
--- !query 899 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 899 output
+-- !query output
 false
 
 
--- !query 900
+-- !query
 SELECT cast(1 as float) > cast(1 as decimal(20, 0)) FROM t
--- !query 900 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) > CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 900 output
+-- !query output
 false
 
 
--- !query 901
+-- !query
 SELECT cast(1 as double) > cast(1 as decimal(3, 0)) FROM t
--- !query 901 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 901 output
+-- !query output
 false
 
 
--- !query 902
+-- !query
 SELECT cast(1 as double) > cast(1 as decimal(5, 0)) FROM t
--- !query 902 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 902 output
+-- !query output
 false
 
 
--- !query 903
+-- !query
 SELECT cast(1 as double) > cast(1 as decimal(10, 0)) FROM t
--- !query 903 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 903 output
+-- !query output
 false
 
 
--- !query 904
+-- !query
 SELECT cast(1 as double) > cast(1 as decimal(20, 0)) FROM t
--- !query 904 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 904 output
+-- !query output
 false
 
 
--- !query 905
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(3, 0)) FROM t
--- !query 905 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 905 output
+-- !query output
 false
 
 
--- !query 906
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(5, 0)) FROM t
--- !query 906 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 906 output
+-- !query output
 false
 
 
--- !query 907
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(10, 0)) FROM t
--- !query 907 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 907 output
+-- !query output
 false
 
 
--- !query 908
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(20, 0)) FROM t
--- !query 908 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 908 output
+-- !query output
 false
 
 
--- !query 909
+-- !query
 SELECT cast('1' as binary) > cast(1 as decimal(3, 0)) FROM t
--- !query 909 schema
+-- !query schema
 struct<>
--- !query 909 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 910
+-- !query
 SELECT cast('1' as binary) > cast(1 as decimal(5, 0)) FROM t
--- !query 910 schema
+-- !query schema
 struct<>
--- !query 910 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 911
+-- !query
 SELECT cast('1' as binary) > cast(1 as decimal(10, 0)) FROM t
--- !query 911 schema
+-- !query schema
 struct<>
--- !query 911 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 912
+-- !query
 SELECT cast('1' as binary) > cast(1 as decimal(20, 0)) FROM t
--- !query 912 schema
+-- !query schema
 struct<>
--- !query 912 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 913
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(3, 0)) FROM t
--- !query 913 schema
+-- !query schema
 struct<>
--- !query 913 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 914
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(5, 0)) FROM t
--- !query 914 schema
+-- !query schema
 struct<>
--- !query 914 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 915
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(10, 0)) FROM t
--- !query 915 schema
+-- !query schema
 struct<>
--- !query 915 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 916
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(20, 0)) FROM t
--- !query 916 schema
+-- !query schema
 struct<>
--- !query 916 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 917
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(3, 0)) FROM t
--- !query 917 schema
+-- !query schema
 struct<>
--- !query 917 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 918
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(5, 0)) FROM t
--- !query 918 schema
+-- !query schema
 struct<>
--- !query 918 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 919
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(10, 0)) FROM t
--- !query 919 schema
+-- !query schema
 struct<>
--- !query 919 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 920
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(20, 0)) FROM t
--- !query 920 schema
+-- !query schema
 struct<>
--- !query 920 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 921
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as tinyint) FROM t
--- !query 921 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) > CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
--- !query 921 output
+-- !query output
 false
 
 
--- !query 922
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as tinyint) FROM t
--- !query 922 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) > CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 922 output
+-- !query output
 false
 
 
--- !query 923
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as tinyint) FROM t
--- !query 923 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 923 output
+-- !query output
 false
 
 
--- !query 924
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as tinyint) FROM t
--- !query 924 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 924 output
+-- !query output
 false
 
 
--- !query 925
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as smallint) FROM t
--- !query 925 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) > CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 925 output
+-- !query output
 false
 
 
--- !query 926
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as smallint) FROM t
--- !query 926 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) > CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
--- !query 926 output
+-- !query output
 false
 
 
--- !query 927
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as smallint) FROM t
--- !query 927 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) > CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 927 output
+-- !query output
 false
 
 
--- !query 928
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as smallint) FROM t
--- !query 928 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 928 output
+-- !query output
 false
 
 
--- !query 929
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as int) FROM t
--- !query 929 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) > CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 929 output
+-- !query output
 false
 
 
--- !query 930
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as int) FROM t
--- !query 930 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) > CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 930 output
+-- !query output
 false
 
 
--- !query 931
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as int) FROM t
--- !query 931 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) > CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
--- !query 931 output
+-- !query output
 false
 
 
--- !query 932
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as int) FROM t
--- !query 932 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 932 output
+-- !query output
 false
 
 
--- !query 933
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as bigint) FROM t
--- !query 933 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 933 output
+-- !query output
 false
 
 
--- !query 934
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as bigint) FROM t
--- !query 934 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 934 output
+-- !query output
 false
 
 
--- !query 935
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as bigint) FROM t
--- !query 935 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) > CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 935 output
+-- !query output
 false
 
 
--- !query 936
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as bigint) FROM t
--- !query 936 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) > CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
--- !query 936 output
+-- !query output
 false
 
 
--- !query 937
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as float) FROM t
--- !query 937 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) > CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 937 output
+-- !query output
 false
 
 
--- !query 938
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as float) FROM t
--- !query 938 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) > CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 938 output
+-- !query output
 false
 
 
--- !query 939
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as float) FROM t
--- !query 939 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 939 output
+-- !query output
 false
 
 
--- !query 940
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as float) FROM t
--- !query 940 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) > CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 940 output
+-- !query output
 false
 
 
--- !query 941
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as double) FROM t
--- !query 941 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
--- !query 941 output
+-- !query output
 false
 
 
--- !query 942
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as double) FROM t
--- !query 942 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
--- !query 942 output
+-- !query output
 false
 
 
--- !query 943
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as double) FROM t
--- !query 943 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
--- !query 943 output
+-- !query output
 false
 
 
--- !query 944
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as double) FROM t
--- !query 944 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
--- !query 944 output
+-- !query output
 false
 
 
--- !query 945
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as decimal(10, 0)) FROM t
--- !query 945 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 945 output
+-- !query output
 false
 
 
--- !query 946
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as decimal(10, 0)) FROM t
--- !query 946 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 946 output
+-- !query output
 false
 
 
--- !query 947
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as decimal(10, 0)) FROM t
--- !query 947 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 947 output
+-- !query output
 false
 
 
--- !query 948
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as decimal(10, 0)) FROM t
--- !query 948 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) > CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 948 output
+-- !query output
 false
 
 
--- !query 949
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as string) FROM t
--- !query 949 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) > CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 949 output
+-- !query output
 false
 
 
--- !query 950
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as string) FROM t
--- !query 950 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) > CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 950 output
+-- !query output
 false
 
 
--- !query 951
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as string) FROM t
--- !query 951 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 951 output
+-- !query output
 false
 
 
--- !query 952
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as string) FROM t
--- !query 952 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) > CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 952 output
+-- !query output
 false
 
 
--- !query 953
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast('1' as binary) FROM t
--- !query 953 schema
+-- !query schema
 struct<>
--- !query 953 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 954
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast('1' as binary) FROM t
--- !query 954 schema
+-- !query schema
 struct<>
--- !query 954 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 955
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast('1' as binary) FROM t
--- !query 955 schema
+-- !query schema
 struct<>
--- !query 955 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 956
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast('1' as binary) FROM t
--- !query 956 schema
+-- !query schema
 struct<>
--- !query 956 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 957
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as boolean) FROM t
--- !query 957 schema
+-- !query schema
 struct<>
--- !query 957 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 958
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as boolean) FROM t
--- !query 958 schema
+-- !query schema
 struct<>
--- !query 958 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 959
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as boolean) FROM t
--- !query 959 schema
+-- !query schema
 struct<>
--- !query 959 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 960
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as boolean) FROM t
--- !query 960 schema
+-- !query schema
 struct<>
--- !query 960 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 961
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 961 schema
+-- !query schema
 struct<>
--- !query 961 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 962
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 962 schema
+-- !query schema
 struct<>
--- !query 962 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 963
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 963 schema
+-- !query schema
 struct<>
--- !query 963 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 964
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 964 schema
+-- !query schema
 struct<>
--- !query 964 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 965
+-- !query
 SELECT cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00' as date) FROM t
--- !query 965 schema
+-- !query schema
 struct<>
--- !query 965 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 966
+-- !query
 SELECT cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00' as date) FROM t
--- !query 966 schema
+-- !query schema
 struct<>
--- !query 966 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 967
+-- !query
 SELECT cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00' as date) FROM t
--- !query 967 schema
+-- !query schema
 struct<>
--- !query 967 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 968
+-- !query
 SELECT cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00' as date) FROM t
--- !query 968 schema
+-- !query schema
 struct<>
--- !query 968 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 969
+-- !query
 SELECT cast(1 as tinyint) >= cast(1 as decimal(3, 0)) FROM t
--- !query 969 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) >= CAST(1 AS DECIMAL(3,0))):boolean>
--- !query 969 output
+-- !query output
 true
 
 
--- !query 970
+-- !query
 SELECT cast(1 as tinyint) >= cast(1 as decimal(5, 0)) FROM t
--- !query 970 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 970 output
+-- !query output
 true
 
 
--- !query 971
+-- !query
 SELECT cast(1 as tinyint) >= cast(1 as decimal(10, 0)) FROM t
--- !query 971 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 971 output
+-- !query output
 true
 
 
--- !query 972
+-- !query
 SELECT cast(1 as tinyint) >= cast(1 as decimal(20, 0)) FROM t
--- !query 972 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 972 output
+-- !query output
 true
 
 
--- !query 973
+-- !query
 SELECT cast(1 as smallint) >= cast(1 as decimal(3, 0)) FROM t
--- !query 973 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 973 output
+-- !query output
 true
 
 
--- !query 974
+-- !query
 SELECT cast(1 as smallint) >= cast(1 as decimal(5, 0)) FROM t
--- !query 974 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) >= CAST(1 AS DECIMAL(5,0))):boolean>
--- !query 974 output
+-- !query output
 true
 
 
--- !query 975
+-- !query
 SELECT cast(1 as smallint) >= cast(1 as decimal(10, 0)) FROM t
--- !query 975 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 975 output
+-- !query output
 true
 
 
--- !query 976
+-- !query
 SELECT cast(1 as smallint) >= cast(1 as decimal(20, 0)) FROM t
--- !query 976 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 976 output
+-- !query output
 true
 
 
--- !query 977
+-- !query
 SELECT cast(1 as int) >= cast(1 as decimal(3, 0)) FROM t
--- !query 977 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 977 output
+-- !query output
 true
 
 
--- !query 978
+-- !query
 SELECT cast(1 as int) >= cast(1 as decimal(5, 0)) FROM t
--- !query 978 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 978 output
+-- !query output
 true
 
 
--- !query 979
+-- !query
 SELECT cast(1 as int) >= cast(1 as decimal(10, 0)) FROM t
--- !query 979 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) >= CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 979 output
+-- !query output
 true
 
 
--- !query 980
+-- !query
 SELECT cast(1 as int) >= cast(1 as decimal(20, 0)) FROM t
--- !query 980 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 980 output
+-- !query output
 true
 
 
--- !query 981
+-- !query
 SELECT cast(1 as bigint) >= cast(1 as decimal(3, 0)) FROM t
--- !query 981 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 981 output
+-- !query output
 true
 
 
--- !query 982
+-- !query
 SELECT cast(1 as bigint) >= cast(1 as decimal(5, 0)) FROM t
--- !query 982 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 982 output
+-- !query output
 true
 
 
--- !query 983
+-- !query
 SELECT cast(1 as bigint) >= cast(1 as decimal(10, 0)) FROM t
--- !query 983 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 983 output
+-- !query output
 true
 
 
--- !query 984
+-- !query
 SELECT cast(1 as bigint) >= cast(1 as decimal(20, 0)) FROM t
--- !query 984 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) >= CAST(1 AS DECIMAL(20,0))):boolean>
--- !query 984 output
+-- !query output
 true
 
 
--- !query 985
+-- !query
 SELECT cast(1 as float) >= cast(1 as decimal(3, 0)) FROM t
--- !query 985 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 985 output
+-- !query output
 true
 
 
--- !query 986
+-- !query
 SELECT cast(1 as float) >= cast(1 as decimal(5, 0)) FROM t
--- !query 986 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 986 output
+-- !query output
 true
 
 
--- !query 987
+-- !query
 SELECT cast(1 as float) >= cast(1 as decimal(10, 0)) FROM t
--- !query 987 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 987 output
+-- !query output
 true
 
 
--- !query 988
+-- !query
 SELECT cast(1 as float) >= cast(1 as decimal(20, 0)) FROM t
--- !query 988 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 988 output
+-- !query output
 true
 
 
--- !query 989
+-- !query
 SELECT cast(1 as double) >= cast(1 as decimal(3, 0)) FROM t
--- !query 989 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):boolean>
--- !query 989 output
+-- !query output
 true
 
 
--- !query 990
+-- !query
 SELECT cast(1 as double) >= cast(1 as decimal(5, 0)) FROM t
--- !query 990 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE)):boolean>
--- !query 990 output
+-- !query output
 true
 
 
--- !query 991
+-- !query
 SELECT cast(1 as double) >= cast(1 as decimal(10, 0)) FROM t
--- !query 991 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 991 output
+-- !query output
 true
 
 
--- !query 992
+-- !query
 SELECT cast(1 as double) >= cast(1 as decimal(20, 0)) FROM t
--- !query 992 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE)):boolean>
--- !query 992 output
+-- !query output
 true
 
 
--- !query 993
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(3, 0)) FROM t
--- !query 993 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 993 output
+-- !query output
 true
 
 
--- !query 994
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(5, 0)) FROM t
--- !query 994 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 994 output
+-- !query output
 true
 
 
--- !query 995
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(10, 0)) FROM t
--- !query 995 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 995 output
+-- !query output
 true
 
 
--- !query 996
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(20, 0)) FROM t
--- !query 996 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 996 output
+-- !query output
 true
 
 
--- !query 997
+-- !query
 SELECT cast('1' as binary) >= cast(1 as decimal(3, 0)) FROM t
--- !query 997 schema
+-- !query schema
 struct<>
--- !query 997 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 998
+-- !query
 SELECT cast('1' as binary) >= cast(1 as decimal(5, 0)) FROM t
--- !query 998 schema
+-- !query schema
 struct<>
--- !query 998 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 999
+-- !query
 SELECT cast('1' as binary) >= cast(1 as decimal(10, 0)) FROM t
--- !query 999 schema
+-- !query schema
 struct<>
--- !query 999 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 1000
+-- !query
 SELECT cast('1' as binary) >= cast(1 as decimal(20, 0)) FROM t
--- !query 1000 schema
+-- !query schema
 struct<>
--- !query 1000 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 1001
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(3, 0)) FROM t
--- !query 1001 schema
+-- !query schema
 struct<>
--- !query 1001 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 1002
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(5, 0)) FROM t
--- !query 1002 schema
+-- !query schema
 struct<>
--- !query 1002 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 1003
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(10, 0)) FROM t
--- !query 1003 schema
+-- !query schema
 struct<>
--- !query 1003 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 1004
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(20, 0)) FROM t
--- !query 1004 schema
+-- !query schema
 struct<>
--- !query 1004 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 1005
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(3, 0)) FROM t
--- !query 1005 schema
+-- !query schema
 struct<>
--- !query 1005 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 1006
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(5, 0)) FROM t
--- !query 1006 schema
+-- !query schema
 struct<>
--- !query 1006 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 1007
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(10, 0)) FROM t
--- !query 1007 schema
+-- !query schema
 struct<>
--- !query 1007 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 1008
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(20, 0)) FROM t
--- !query 1008 schema
+-- !query schema
 struct<>
--- !query 1008 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 1009
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as tinyint) FROM t
--- !query 1009 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(3,0)) >= CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0))):boolean>
--- !query 1009 output
+-- !query output
 true
 
 
--- !query 1010
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as tinyint) FROM t
--- !query 1010 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) >= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0))):boolean>
--- !query 1010 output
+-- !query output
 true
 
 
--- !query 1011
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as tinyint) FROM t
--- !query 1011 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):boolean>
--- !query 1011 output
+-- !query output
 true
 
 
--- !query 1012
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as tinyint) FROM t
--- !query 1012 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0))):boolean>
--- !query 1012 output
+-- !query output
 true
 
 
--- !query 1013
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as smallint) FROM t
--- !query 1013 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) >= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0))):boolean>
--- !query 1013 output
+-- !query output
 true
 
 
--- !query 1014
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as smallint) FROM t
--- !query 1014 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(5,0)) >= CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0))):boolean>
--- !query 1014 output
+-- !query output
 true
 
 
--- !query 1015
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as smallint) FROM t
--- !query 1015 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) >= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):boolean>
--- !query 1015 output
+-- !query output
 true
 
 
--- !query 1016
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as smallint) FROM t
--- !query 1016 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0))):boolean>
--- !query 1016 output
+-- !query output
 true
 
 
--- !query 1017
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as int) FROM t
--- !query 1017 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) >= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 1017 output
+-- !query output
 true
 
 
--- !query 1018
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as int) FROM t
--- !query 1018 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) >= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 1018 output
+-- !query output
 true
 
 
--- !query 1019
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as int) FROM t
--- !query 1019 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) >= CAST(CAST(1 AS INT) AS DECIMAL(10,0))):boolean>
--- !query 1019 output
+-- !query output
 true
 
 
--- !query 1020
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as int) FROM t
--- !query 1020 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 1020 output
+-- !query output
 true
 
 
--- !query 1021
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as bigint) FROM t
--- !query 1021 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 1021 output
+-- !query output
 true
 
 
--- !query 1022
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as bigint) FROM t
--- !query 1022 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 1022 output
+-- !query output
 true
 
 
--- !query 1023
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as bigint) FROM t
--- !query 1023 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) >= CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):boolean>
--- !query 1023 output
+-- !query output
 true
 
 
--- !query 1024
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as bigint) FROM t
--- !query 1024 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(20,0)) >= CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0))):boolean>
--- !query 1024 output
+-- !query output
 true
 
 
--- !query 1025
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as float) FROM t
--- !query 1025 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) >= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 1025 output
+-- !query output
 true
 
 
--- !query 1026
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as float) FROM t
--- !query 1026 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) >= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 1026 output
+-- !query output
 true
 
 
--- !query 1027
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as float) FROM t
--- !query 1027 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 1027 output
+-- !query output
 true
 
 
--- !query 1028
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as float) FROM t
--- !query 1028 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) >= CAST(CAST(1 AS FLOAT) AS DOUBLE)):boolean>
--- !query 1028 output
+-- !query output
 true
 
 
--- !query 1029
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as double) FROM t
--- !query 1029 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
--- !query 1029 output
+-- !query output
 true
 
 
--- !query 1030
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as double) FROM t
--- !query 1030 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
--- !query 1030 output
+-- !query output
 true
 
 
--- !query 1031
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as double) FROM t
--- !query 1031 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
--- !query 1031 output
+-- !query output
 true
 
 
--- !query 1032
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as double) FROM t
--- !query 1032 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
--- !query 1032 output
+-- !query output
 true
 
 
--- !query 1033
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as decimal(10, 0)) FROM t
--- !query 1033 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 1033 output
+-- !query output
 true
 
 
--- !query 1034
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as decimal(10, 0)) FROM t
--- !query 1034 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):boolean>
--- !query 1034 output
+-- !query output
 true
 
 
--- !query 1035
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as decimal(10, 0)) FROM t
--- !query 1035 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS DECIMAL(10,0))):boolean>
--- !query 1035 output
+-- !query output
 true
 
 
--- !query 1036
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as decimal(10, 0)) FROM t
--- !query 1036 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):boolean>
--- !query 1036 output
+-- !query output
 true
 
 
--- !query 1037
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as string) FROM t
--- !query 1037 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) >= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 1037 output
+-- !query output
 true
 
 
--- !query 1038
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as string) FROM t
--- !query 1038 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) >= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 1038 output
+-- !query output
 true
 
 
--- !query 1039
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as string) FROM t
--- !query 1039 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 1039 output
+-- !query output
 true
 
 
--- !query 1040
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as string) FROM t
--- !query 1040 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) >= CAST(CAST(1 AS STRING) AS DOUBLE)):boolean>
--- !query 1040 output
+-- !query output
 true
 
 
--- !query 1041
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast('1' as binary) FROM t
--- !query 1041 schema
+-- !query schema
 struct<>
--- !query 1041 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 1042
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast('1' as binary) FROM t
--- !query 1042 schema
+-- !query schema
 struct<>
--- !query 1042 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 1043
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast('1' as binary) FROM t
--- !query 1043 schema
+-- !query schema
 struct<>
--- !query 1043 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 1044
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast('1' as binary) FROM t
--- !query 1044 schema
+-- !query schema
 struct<>
--- !query 1044 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 1045
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as boolean) FROM t
--- !query 1045 schema
+-- !query schema
 struct<>
--- !query 1045 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
 
 
--- !query 1046
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as boolean) FROM t
--- !query 1046 schema
+-- !query schema
 struct<>
--- !query 1046 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
 
 
--- !query 1047
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as boolean) FROM t
--- !query 1047 schema
+-- !query schema
 struct<>
--- !query 1047 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 1048
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as boolean) FROM t
--- !query 1048 schema
+-- !query schema
 struct<>
--- !query 1048 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
 
 
--- !query 1049
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 1049 schema
+-- !query schema
 struct<>
--- !query 1049 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 1050
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 1050 schema
+-- !query schema
 struct<>
--- !query 1050 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 1051
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 1051 schema
+-- !query schema
 struct<>
--- !query 1051 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 1052
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 1052 schema
+-- !query schema
 struct<>
--- !query 1052 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 1053
+-- !query
 SELECT cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00' as date) FROM t
--- !query 1053 schema
+-- !query schema
 struct<>
--- !query 1053 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 1054
+-- !query
 SELECT cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00' as date) FROM t
--- !query 1054 schema
+-- !query schema
 struct<>
--- !query 1054 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 1055
+-- !query
 SELECT cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00' as date) FROM t
--- !query 1055 schema
+-- !query schema
 struct<>
--- !query 1055 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 1056
+-- !query
 SELECT cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00' as date) FROM t
--- !query 1056 schema
+-- !query schema
 struct<>
--- !query 1056 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
 
 
--- !query 1057
+-- !query
 SELECT cast(1 as tinyint) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1057 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) = CAST(1 AS DECIMAL(3,0)))):boolean>
--- !query 1057 output
+-- !query output
 false
 
 
--- !query 1058
+-- !query
 SELECT cast(1 as tinyint) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1058 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)))):boolean>
--- !query 1058 output
+-- !query output
 false
 
 
--- !query 1059
+-- !query
 SELECT cast(1 as tinyint) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1059 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1059 output
+-- !query output
 false
 
 
--- !query 1060
+-- !query
 SELECT cast(1 as tinyint) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1060 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1060 output
+-- !query output
 false
 
 
--- !query 1061
+-- !query
 SELECT cast(1 as smallint) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1061 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)))):boolean>
--- !query 1061 output
+-- !query output
 false
 
 
--- !query 1062
+-- !query
 SELECT cast(1 as smallint) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1062 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) = CAST(1 AS DECIMAL(5,0)))):boolean>
--- !query 1062 output
+-- !query output
 false
 
 
--- !query 1063
+-- !query
 SELECT cast(1 as smallint) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1063 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1063 output
+-- !query output
 false
 
 
--- !query 1064
+-- !query
 SELECT cast(1 as smallint) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1064 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1064 output
+-- !query output
 false
 
 
--- !query 1065
+-- !query
 SELECT cast(1 as int) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1065 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1065 output
+-- !query output
 false
 
 
--- !query 1066
+-- !query
 SELECT cast(1 as int) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1066 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1066 output
+-- !query output
 false
 
 
--- !query 1067
+-- !query
 SELECT cast(1 as int) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1067 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS INT) AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0)))):boolean>
--- !query 1067 output
+-- !query output
 false
 
 
--- !query 1068
+-- !query
 SELECT cast(1 as int) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1068 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1068 output
+-- !query output
 false
 
 
--- !query 1069
+-- !query
 SELECT cast(1 as bigint) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1069 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1069 output
+-- !query output
 false
 
 
--- !query 1070
+-- !query
 SELECT cast(1 as bigint) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1070 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1070 output
+-- !query output
 false
 
 
--- !query 1071
+-- !query
 SELECT cast(1 as bigint) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1071 schema
+-- !query schema
 struct<(NOT (CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1071 output
+-- !query output
 false
 
 
--- !query 1072
+-- !query
 SELECT cast(1 as bigint) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1072 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) = CAST(1 AS DECIMAL(20,0)))):boolean>
--- !query 1072 output
+-- !query output
 false
 
 
--- !query 1073
+-- !query
 SELECT cast(1 as float) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1073 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE))):boolean>
--- !query 1073 output
+-- !query output
 false
 
 
--- !query 1074
+-- !query
 SELECT cast(1 as float) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1074 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE))):boolean>
--- !query 1074 output
+-- !query output
 false
 
 
--- !query 1075
+-- !query
 SELECT cast(1 as float) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1075 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
--- !query 1075 output
+-- !query output
 false
 
 
--- !query 1076
+-- !query
 SELECT cast(1 as float) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1076 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS FLOAT) AS DOUBLE) = CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE))):boolean>
--- !query 1076 output
+-- !query output
 false
 
 
--- !query 1077
+-- !query
 SELECT cast(1 as double) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1077 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE))):boolean>
--- !query 1077 output
+-- !query output
 false
 
 
--- !query 1078
+-- !query
 SELECT cast(1 as double) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1078 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE))):boolean>
--- !query 1078 output
+-- !query output
 false
 
 
--- !query 1079
+-- !query
 SELECT cast(1 as double) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1079 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
--- !query 1079 output
+-- !query output
 false
 
 
--- !query 1080
+-- !query
 SELECT cast(1 as double) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1080 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE))):boolean>
--- !query 1080 output
+-- !query output
 false
 
 
--- !query 1081
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1081 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1081 output
+-- !query output
 false
 
 
--- !query 1082
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1082 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1082 output
+-- !query output
 false
 
 
--- !query 1083
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1083 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0)))):boolean>
--- !query 1083 output
+-- !query output
 false
 
 
--- !query 1084
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1084 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1084 output
+-- !query output
 false
 
 
--- !query 1085
+-- !query
 SELECT cast('1' as binary) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1085 schema
+-- !query schema
 struct<>
--- !query 1085 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
 
 
--- !query 1086
+-- !query
 SELECT cast('1' as binary) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1086 schema
+-- !query schema
 struct<>
--- !query 1086 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
 
 
--- !query 1087
+-- !query
 SELECT cast('1' as binary) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1087 schema
+-- !query schema
 struct<>
--- !query 1087 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 1088
+-- !query
 SELECT cast('1' as binary) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1088 schema
+-- !query schema
 struct<>
--- !query 1088 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
 
 
--- !query 1089
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1089 schema
+-- !query schema
 struct<>
--- !query 1089 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
 
 
--- !query 1090
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1090 schema
+-- !query schema
 struct<>
--- !query 1090 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
 
 
--- !query 1091
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1091 schema
+-- !query schema
 struct<>
--- !query 1091 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 1092
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1092 schema
+-- !query schema
 struct<>
--- !query 1092 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
 
 
--- !query 1093
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(3, 0)) FROM t
--- !query 1093 schema
+-- !query schema
 struct<>
--- !query 1093 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
 
 
--- !query 1094
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(5, 0)) FROM t
--- !query 1094 schema
+-- !query schema
 struct<>
--- !query 1094 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
 
 
--- !query 1095
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1095 schema
+-- !query schema
 struct<>
--- !query 1095 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 1096
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(20, 0)) FROM t
--- !query 1096 schema
+-- !query schema
 struct<>
--- !query 1096 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
 
 
--- !query 1097
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as tinyint) FROM t
--- !query 1097 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(3,0)) = CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)))):boolean>
--- !query 1097 output
+-- !query output
 false
 
 
--- !query 1098
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as tinyint) FROM t
--- !query 1098 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(5,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(5,0)))):boolean>
--- !query 1098 output
+-- !query output
 false
 
 
--- !query 1099
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as tinyint) FROM t
--- !query 1099 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1099 output
+-- !query output
 false
 
 
--- !query 1100
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as tinyint) FROM t
--- !query 1100 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1100 output
+-- !query output
 false
 
 
--- !query 1101
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as smallint) FROM t
--- !query 1101 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(5,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(5,0)))):boolean>
--- !query 1101 output
+-- !query output
 false
 
 
--- !query 1102
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as smallint) FROM t
--- !query 1102 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(5,0)) = CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)))):boolean>
--- !query 1102 output
+-- !query output
 false
 
 
--- !query 1103
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as smallint) FROM t
--- !query 1103 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1103 output
+-- !query output
 false
 
 
--- !query 1104
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as smallint) FROM t
--- !query 1104 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1104 output
+-- !query output
 false
 
 
--- !query 1105
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as int) FROM t
--- !query 1105 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1105 output
+-- !query output
 false
 
 
--- !query 1106
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as int) FROM t
--- !query 1106 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1106 output
+-- !query output
 false
 
 
--- !query 1107
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as int) FROM t
--- !query 1107 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(10,0)) = CAST(CAST(1 AS INT) AS DECIMAL(10,0)))):boolean>
--- !query 1107 output
+-- !query output
 false
 
 
--- !query 1108
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as int) FROM t
--- !query 1108 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1108 output
+-- !query output
 false
 
 
--- !query 1109
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as bigint) FROM t
--- !query 1109 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1109 output
+-- !query output
 false
 
 
--- !query 1110
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as bigint) FROM t
--- !query 1110 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1110 output
+-- !query output
 false
 
 
--- !query 1111
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as bigint) FROM t
--- !query 1111 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) = CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1111 output
+-- !query output
 false
 
 
--- !query 1112
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as bigint) FROM t
--- !query 1112 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(20,0)) = CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)))):boolean>
--- !query 1112 output
+-- !query output
 false
 
 
--- !query 1113
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as float) FROM t
--- !query 1113 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
--- !query 1113 output
+-- !query output
 false
 
 
--- !query 1114
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as float) FROM t
--- !query 1114 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
--- !query 1114 output
+-- !query output
 false
 
 
--- !query 1115
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as float) FROM t
--- !query 1115 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
--- !query 1115 output
+-- !query output
 false
 
 
--- !query 1116
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as float) FROM t
--- !query 1116 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
--- !query 1116 output
+-- !query output
 false
 
 
--- !query 1117
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as double) FROM t
--- !query 1117 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
--- !query 1117 output
+-- !query output
 false
 
 
--- !query 1118
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as double) FROM t
--- !query 1118 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
--- !query 1118 output
+-- !query output
 false
 
 
--- !query 1119
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as double) FROM t
--- !query 1119 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
--- !query 1119 output
+-- !query output
 false
 
 
--- !query 1120
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as double) FROM t
--- !query 1120 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
--- !query 1120 output
+-- !query output
 false
 
 
--- !query 1121
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as decimal(10, 0)) FROM t
--- !query 1121 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1121 output
+-- !query output
 false
 
 
--- !query 1122
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as decimal(10, 0)) FROM t
--- !query 1122 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(10,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 1122 output
+-- !query output
 false
 
 
--- !query 1123
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1123 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS DECIMAL(10,0)))):boolean>
--- !query 1123 output
+-- !query output
 false
 
 
--- !query 1124
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as decimal(10, 0)) FROM t
--- !query 1124 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(20,0)) = CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
--- !query 1124 output
+-- !query output
 false
 
 
--- !query 1125
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as string) FROM t
--- !query 1125 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE))):boolean>
--- !query 1125 output
+-- !query output
 false
 
 
--- !query 1126
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as string) FROM t
--- !query 1126 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(5,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE))):boolean>
--- !query 1126 output
+-- !query output
 false
 
 
--- !query 1127
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as string) FROM t
--- !query 1127 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE))):boolean>
--- !query 1127 output
+-- !query output
 false
 
 
--- !query 1128
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as string) FROM t
--- !query 1128 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(20,0)) AS DOUBLE) = CAST(CAST(1 AS STRING) AS DOUBLE))):boolean>
--- !query 1128 output
+-- !query output
 false
 
 
--- !query 1129
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast('1' as binary) FROM t
--- !query 1129 schema
+-- !query schema
 struct<>
--- !query 1129 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
 
 
--- !query 1130
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast('1' as binary) FROM t
--- !query 1130 schema
+-- !query schema
 struct<>
--- !query 1130 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
 
 
--- !query 1131
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast('1' as binary) FROM t
--- !query 1131 schema
+-- !query schema
 struct<>
--- !query 1131 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 1132
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast('1' as binary) FROM t
--- !query 1132 schema
+-- !query schema
 struct<>
--- !query 1132 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
 
 
--- !query 1133
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as boolean) FROM t
--- !query 1133 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(3,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(3,0)))):boolean>
--- !query 1133 output
+-- !query output
 false
 
 
--- !query 1134
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as boolean) FROM t
--- !query 1134 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(5,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(5,0)))):boolean>
--- !query 1134 output
+-- !query output
 false
 
 
--- !query 1135
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as boolean) FROM t
--- !query 1135 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(10,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(10,0)))):boolean>
--- !query 1135 output
+-- !query output
 false
 
 
--- !query 1136
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as boolean) FROM t
--- !query 1136 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DECIMAL(20,0)) = CAST(CAST(1 AS BOOLEAN) AS DECIMAL(20,0)))):boolean>
--- !query 1136 output
+-- !query output
 false
 
 
--- !query 1137
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 1137 schema
+-- !query schema
 struct<>
--- !query 1137 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
 
 
--- !query 1138
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 1138 schema
+-- !query schema
 struct<>
--- !query 1138 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
 
 
--- !query 1139
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 1139 schema
+-- !query schema
 struct<>
--- !query 1139 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 1140
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 1140 schema
+-- !query schema
 struct<>
--- !query 1140 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
 
 
--- !query 1141
+-- !query
 SELECT cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 1141 schema
+-- !query schema
 struct<>
--- !query 1141 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
 
 
--- !query 1142
+-- !query
 SELECT cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 1142 schema
+-- !query schema
 struct<>
--- !query 1142 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
 
 
--- !query 1143
+-- !query
 SELECT cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 1143 schema
+-- !query schema
 struct<>
--- !query 1143 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 1144
+-- !query
 SELECT cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00' as date) FROM t
--- !query 1144 schema
+-- !query schema
 struct<>
--- !query 1144 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
index 017e0fea30e90..ae933da59f63f 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
@@ -2,1241 +2,1241 @@
 -- Number of queries: 145
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as tinyint) FROM t
--- !query 1 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 1 output
+-- !query output
 1.0
 
 
--- !query 2
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as smallint) FROM t
--- !query 2 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 2 output
+-- !query output
 1.0
 
 
--- !query 3
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as int) FROM t
--- !query 3 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 3 output
+-- !query output
 1.0
 
 
--- !query 4
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as bigint) FROM t
--- !query 4 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 4 output
+-- !query output
 1.0
 
 
--- !query 5
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as float) FROM t
--- !query 5 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 5 output
+-- !query output
 1.0
 
 
--- !query 6
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as double) FROM t
--- !query 6 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 6 output
+-- !query output
 1.0
 
 
--- !query 7
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as decimal(10, 0)) FROM t
--- !query 7 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(14,11)>
--- !query 7 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 8
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as string) FROM t
--- !query 8 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
--- !query 8 output
+-- !query output
 1.0
 
 
--- !query 9
+-- !query
 SELECT cast(1 as tinyint) / cast('1' as binary) FROM t
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST('1' AS BINARY))' (tinyint and binary).; line 1 pos 7
 
 
--- !query 10
+-- !query
 SELECT cast(1 as tinyint) / cast(1 as boolean) FROM t
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST(1 AS BOOLEAN))' (tinyint and boolean).; line 1 pos 7
 
 
--- !query 11
+-- !query
 SELECT cast(1 as tinyint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (tinyint and timestamp).; line 1 pos 7
 
 
--- !query 12
+-- !query
 SELECT cast(1 as tinyint) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00' AS DATE))' (tinyint and date).; line 1 pos 7
 
 
--- !query 13
+-- !query
 SELECT cast(1 as smallint) / cast(1 as tinyint) FROM t
--- !query 13 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 13 output
+-- !query output
 1.0
 
 
--- !query 14
+-- !query
 SELECT cast(1 as smallint) / cast(1 as smallint) FROM t
--- !query 14 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 14 output
+-- !query output
 1.0
 
 
--- !query 15
+-- !query
 SELECT cast(1 as smallint) / cast(1 as int) FROM t
--- !query 15 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 15 output
+-- !query output
 1.0
 
 
--- !query 16
+-- !query
 SELECT cast(1 as smallint) / cast(1 as bigint) FROM t
--- !query 16 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 16 output
+-- !query output
 1.0
 
 
--- !query 17
+-- !query
 SELECT cast(1 as smallint) / cast(1 as float) FROM t
--- !query 17 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 17 output
+-- !query output
 1.0
 
 
--- !query 18
+-- !query
 SELECT cast(1 as smallint) / cast(1 as double) FROM t
--- !query 18 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 18 output
+-- !query output
 1.0
 
 
--- !query 19
+-- !query
 SELECT cast(1 as smallint) / cast(1 as decimal(10, 0)) FROM t
--- !query 19 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0))):decimal(16,11)>
--- !query 19 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 20
+-- !query
 SELECT cast(1 as smallint) / cast(1 as string) FROM t
--- !query 20 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
--- !query 20 output
+-- !query output
 1.0
 
 
--- !query 21
+-- !query
 SELECT cast(1 as smallint) / cast('1' as binary) FROM t
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST('1' AS BINARY))' (smallint and binary).; line 1 pos 7
 
 
--- !query 22
+-- !query
 SELECT cast(1 as smallint) / cast(1 as boolean) FROM t
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST(1 AS BOOLEAN))' (smallint and boolean).; line 1 pos 7
 
 
--- !query 23
+-- !query
 SELECT cast(1 as smallint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (smallint and timestamp).; line 1 pos 7
 
 
--- !query 24
+-- !query
 SELECT cast(1 as smallint) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00' AS DATE))' (smallint and date).; line 1 pos 7
 
 
--- !query 25
+-- !query
 SELECT cast(1 as int) / cast(1 as tinyint) FROM t
--- !query 25 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 25 output
+-- !query output
 1.0
 
 
--- !query 26
+-- !query
 SELECT cast(1 as int) / cast(1 as smallint) FROM t
--- !query 26 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 26 output
+-- !query output
 1.0
 
 
--- !query 27
+-- !query
 SELECT cast(1 as int) / cast(1 as int) FROM t
--- !query 27 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 27 output
+-- !query output
 1.0
 
 
--- !query 28
+-- !query
 SELECT cast(1 as int) / cast(1 as bigint) FROM t
--- !query 28 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 28 output
+-- !query output
 1.0
 
 
--- !query 29
+-- !query
 SELECT cast(1 as int) / cast(1 as float) FROM t
--- !query 29 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 29 output
+-- !query output
 1.0
 
 
--- !query 30
+-- !query
 SELECT cast(1 as int) / cast(1 as double) FROM t
--- !query 30 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 30 output
+-- !query output
 1.0
 
 
--- !query 31
+-- !query
 SELECT cast(1 as int) / cast(1 as decimal(10, 0)) FROM t
--- !query 31 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
--- !query 31 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 32
+-- !query
 SELECT cast(1 as int) / cast(1 as string) FROM t
--- !query 32 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
--- !query 32 output
+-- !query output
 1.0
 
 
--- !query 33
+-- !query
 SELECT cast(1 as int) / cast('1' as binary) FROM t
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST('1' AS BINARY))' (int and binary).; line 1 pos 7
 
 
--- !query 34
+-- !query
 SELECT cast(1 as int) / cast(1 as boolean) FROM t
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST(1 AS BOOLEAN))' (int and boolean).; line 1 pos 7
 
 
--- !query 35
+-- !query
 SELECT cast(1 as int) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 35 schema
+-- !query schema
 struct<>
--- !query 35 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (int and timestamp).; line 1 pos 7
 
 
--- !query 36
+-- !query
 SELECT cast(1 as int) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00' AS DATE))' (int and date).; line 1 pos 7
 
 
--- !query 37
+-- !query
 SELECT cast(1 as bigint) / cast(1 as tinyint) FROM t
--- !query 37 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 37 output
+-- !query output
 1.0
 
 
--- !query 38
+-- !query
 SELECT cast(1 as bigint) / cast(1 as smallint) FROM t
--- !query 38 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 38 output
+-- !query output
 1.0
 
 
--- !query 39
+-- !query
 SELECT cast(1 as bigint) / cast(1 as int) FROM t
--- !query 39 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 39 output
+-- !query output
 1.0
 
 
--- !query 40
+-- !query
 SELECT cast(1 as bigint) / cast(1 as bigint) FROM t
--- !query 40 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 40 output
+-- !query output
 1.0
 
 
--- !query 41
+-- !query
 SELECT cast(1 as bigint) / cast(1 as float) FROM t
--- !query 41 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 41 output
+-- !query output
 1.0
 
 
--- !query 42
+-- !query
 SELECT cast(1 as bigint) / cast(1 as double) FROM t
--- !query 42 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 42 output
+-- !query output
 1.0
 
 
--- !query 43
+-- !query
 SELECT cast(1 as bigint) / cast(1 as decimal(10, 0)) FROM t
--- !query 43 schema
+-- !query schema
 struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0)) / CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0))):decimal(31,11)>
--- !query 43 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 44
+-- !query
 SELECT cast(1 as bigint) / cast(1 as string) FROM t
--- !query 44 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
--- !query 44 output
+-- !query output
 1.0
 
 
--- !query 45
+-- !query
 SELECT cast(1 as bigint) / cast('1' as binary) FROM t
--- !query 45 schema
+-- !query schema
 struct<>
--- !query 45 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST('1' AS BINARY))' (bigint and binary).; line 1 pos 7
 
 
--- !query 46
+-- !query
 SELECT cast(1 as bigint) / cast(1 as boolean) FROM t
--- !query 46 schema
+-- !query schema
 struct<>
--- !query 46 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST(1 AS BOOLEAN))' (bigint and boolean).; line 1 pos 7
 
 
--- !query 47
+-- !query
 SELECT cast(1 as bigint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 47 schema
+-- !query schema
 struct<>
--- !query 47 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (bigint and timestamp).; line 1 pos 7
 
 
--- !query 48
+-- !query
 SELECT cast(1 as bigint) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00' AS DATE))' (bigint and date).; line 1 pos 7
 
 
--- !query 49
+-- !query
 SELECT cast(1 as float) / cast(1 as tinyint) FROM t
--- !query 49 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 49 output
+-- !query output
 1.0
 
 
--- !query 50
+-- !query
 SELECT cast(1 as float) / cast(1 as smallint) FROM t
--- !query 50 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 50 output
+-- !query output
 1.0
 
 
--- !query 51
+-- !query
 SELECT cast(1 as float) / cast(1 as int) FROM t
--- !query 51 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 51 output
+-- !query output
 1.0
 
 
--- !query 52
+-- !query
 SELECT cast(1 as float) / cast(1 as bigint) FROM t
--- !query 52 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 52 output
+-- !query output
 1.0
 
 
--- !query 53
+-- !query
 SELECT cast(1 as float) / cast(1 as float) FROM t
--- !query 53 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 53 output
+-- !query output
 1.0
 
 
--- !query 54
+-- !query
 SELECT cast(1 as float) / cast(1 as double) FROM t
--- !query 54 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 54 output
+-- !query output
 1.0
 
 
--- !query 55
+-- !query
 SELECT cast(1 as float) / cast(1 as decimal(10, 0)) FROM t
--- !query 55 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) AS DOUBLE)):double>
--- !query 55 output
+-- !query output
 1.0
 
 
--- !query 56
+-- !query
 SELECT cast(1 as float) / cast(1 as string) FROM t
--- !query 56 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(CAST(1 AS STRING) AS DOUBLE) AS DOUBLE)):double>
--- !query 56 output
+-- !query output
 1.0
 
 
--- !query 57
+-- !query
 SELECT cast(1 as float) / cast('1' as binary) FROM t
--- !query 57 schema
+-- !query schema
 struct<>
--- !query 57 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST('1' AS BINARY))' (float and binary).; line 1 pos 7
 
 
--- !query 58
+-- !query
 SELECT cast(1 as float) / cast(1 as boolean) FROM t
--- !query 58 schema
+-- !query schema
 struct<>
--- !query 58 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST(1 AS BOOLEAN))' (float and boolean).; line 1 pos 7
 
 
--- !query 59
+-- !query
 SELECT cast(1 as float) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 59 schema
+-- !query schema
 struct<>
--- !query 59 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (float and timestamp).; line 1 pos 7
 
 
--- !query 60
+-- !query
 SELECT cast(1 as float) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 60 schema
+-- !query schema
 struct<>
--- !query 60 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00' AS DATE))' (float and date).; line 1 pos 7
 
 
--- !query 61
+-- !query
 SELECT cast(1 as double) / cast(1 as tinyint) FROM t
--- !query 61 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 61 output
+-- !query output
 1.0
 
 
--- !query 62
+-- !query
 SELECT cast(1 as double) / cast(1 as smallint) FROM t
--- !query 62 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 62 output
+-- !query output
 1.0
 
 
--- !query 63
+-- !query
 SELECT cast(1 as double) / cast(1 as int) FROM t
--- !query 63 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 63 output
+-- !query output
 1.0
 
 
--- !query 64
+-- !query
 SELECT cast(1 as double) / cast(1 as bigint) FROM t
--- !query 64 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 64 output
+-- !query output
 1.0
 
 
--- !query 65
+-- !query
 SELECT cast(1 as double) / cast(1 as float) FROM t
--- !query 65 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 65 output
+-- !query output
 1.0
 
 
--- !query 66
+-- !query
 SELECT cast(1 as double) / cast(1 as double) FROM t
--- !query 66 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 66 output
+-- !query output
 1.0
 
 
--- !query 67
+-- !query
 SELECT cast(1 as double) / cast(1 as decimal(10, 0)) FROM t
--- !query 67 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 67 output
+-- !query output
 1.0
 
 
--- !query 68
+-- !query
 SELECT cast(1 as double) / cast(1 as string) FROM t
--- !query 68 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 68 output
+-- !query output
 1.0
 
 
--- !query 69
+-- !query
 SELECT cast(1 as double) / cast('1' as binary) FROM t
--- !query 69 schema
+-- !query schema
 struct<>
--- !query 69 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
 
 
--- !query 70
+-- !query
 SELECT cast(1 as double) / cast(1 as boolean) FROM t
--- !query 70 schema
+-- !query schema
 struct<>
--- !query 70 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
 
 
--- !query 71
+-- !query
 SELECT cast(1 as double) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 71 schema
+-- !query schema
 struct<>
--- !query 71 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
 
 
--- !query 72
+-- !query
 SELECT cast(1 as double) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 72 schema
+-- !query schema
 struct<>
--- !query 72 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
 
 
--- !query 73
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as tinyint) FROM t
--- !query 73 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(10,0))):decimal(16,6)>
--- !query 73 output
-1
+-- !query output
+1.000000
 
 
--- !query 74
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as smallint) FROM t
--- !query 74 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) / CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(10,0))):decimal(16,6)>
--- !query 74 output
-1
+-- !query output
+1.000000
 
 
--- !query 75
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as int) FROM t
--- !query 75 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) / CAST(CAST(1 AS INT) AS DECIMAL(10,0))):decimal(21,11)>
--- !query 75 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 76
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as bigint) FROM t
--- !query 76 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) / CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(20,0))):decimal(31,21)>
--- !query 76 output
-1
+-- !query output
+1.000000000000000000000
 
 
--- !query 77
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as float) FROM t
--- !query 77 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 77 output
+-- !query output
 1.0
 
 
--- !query 78
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as double) FROM t
--- !query 78 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 78 output
+-- !query output
 1.0
 
 
--- !query 79
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as decimal(10, 0)) FROM t
--- !query 79 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS DECIMAL(10,0))):decimal(21,11)>
--- !query 79 output
-1
+-- !query output
+1.00000000000
 
 
--- !query 80
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as string) FROM t
--- !query 80 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 80 output
+-- !query output
 1.0
 
 
--- !query 81
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast('1' as binary) FROM t
--- !query 81 schema
+-- !query schema
 struct<>
--- !query 81 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 82
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast(1 as boolean) FROM t
--- !query 82 schema
+-- !query schema
 struct<>
--- !query 82 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 83
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 83 schema
+-- !query schema
 struct<>
--- !query 83 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 84
+-- !query
 SELECT cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 84 schema
+-- !query schema
 struct<>
--- !query 84 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 85
+-- !query
 SELECT cast(1 as string) / cast(1 as tinyint) FROM t
--- !query 85 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 85 output
+-- !query output
 1.0
 
 
--- !query 86
+-- !query
 SELECT cast(1 as string) / cast(1 as smallint) FROM t
--- !query 86 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 86 output
+-- !query output
 1.0
 
 
--- !query 87
+-- !query
 SELECT cast(1 as string) / cast(1 as int) FROM t
--- !query 87 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 87 output
+-- !query output
 1.0
 
 
--- !query 88
+-- !query
 SELECT cast(1 as string) / cast(1 as bigint) FROM t
--- !query 88 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 88 output
+-- !query output
 1.0
 
 
--- !query 89
+-- !query
 SELECT cast(1 as string) / cast(1 as float) FROM t
--- !query 89 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 89 output
+-- !query output
 1.0
 
 
--- !query 90
+-- !query
 SELECT cast(1 as string) / cast(1 as double) FROM t
--- !query 90 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 90 output
+-- !query output
 1.0
 
 
--- !query 91
+-- !query
 SELECT cast(1 as string) / cast(1 as decimal(10, 0)) FROM t
--- !query 91 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 91 output
+-- !query output
 1.0
 
 
--- !query 92
+-- !query
 SELECT cast(1 as string) / cast(1 as string) FROM t
--- !query 92 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(CAST(1 AS STRING) AS DOUBLE)):double>
--- !query 92 output
+-- !query output
 1.0
 
 
--- !query 93
+-- !query
 SELECT cast(1 as string) / cast('1' as binary) FROM t
--- !query 93 schema
+-- !query schema
 struct<>
--- !query 93 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
 
 
--- !query 94
+-- !query
 SELECT cast(1 as string) / cast(1 as boolean) FROM t
--- !query 94 schema
+-- !query schema
 struct<>
--- !query 94 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
 
 
--- !query 95
+-- !query
 SELECT cast(1 as string) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 95 schema
+-- !query schema
 struct<>
--- !query 95 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
 
 
--- !query 96
+-- !query
 SELECT cast(1 as string) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 96 schema
+-- !query schema
 struct<>
--- !query 96 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
 
 
--- !query 97
+-- !query
 SELECT cast('1' as binary) / cast(1 as tinyint) FROM t
--- !query 97 schema
+-- !query schema
 struct<>
--- !query 97 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS TINYINT))' (binary and tinyint).; line 1 pos 7
 
 
--- !query 98
+-- !query
 SELECT cast('1' as binary) / cast(1 as smallint) FROM t
--- !query 98 schema
+-- !query schema
 struct<>
--- !query 98 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS SMALLINT))' (binary and smallint).; line 1 pos 7
 
 
--- !query 99
+-- !query
 SELECT cast('1' as binary) / cast(1 as int) FROM t
--- !query 99 schema
+-- !query schema
 struct<>
--- !query 99 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS INT))' (binary and int).; line 1 pos 7
 
 
--- !query 100
+-- !query
 SELECT cast('1' as binary) / cast(1 as bigint) FROM t
--- !query 100 schema
+-- !query schema
 struct<>
--- !query 100 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS BIGINT))' (binary and bigint).; line 1 pos 7
 
 
--- !query 101
+-- !query
 SELECT cast('1' as binary) / cast(1 as float) FROM t
--- !query 101 schema
+-- !query schema
 struct<>
--- !query 101 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS FLOAT))' (binary and float).; line 1 pos 7
 
 
--- !query 102
+-- !query
 SELECT cast('1' as binary) / cast(1 as double) FROM t
--- !query 102 schema
+-- !query schema
 struct<>
--- !query 102 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DOUBLE))' (binary and double).; line 1 pos 7
 
 
--- !query 103
+-- !query
 SELECT cast('1' as binary) / cast(1 as decimal(10, 0)) FROM t
--- !query 103 schema
+-- !query schema
 struct<>
--- !query 103 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 104
+-- !query
 SELECT cast('1' as binary) / cast(1 as string) FROM t
--- !query 104 schema
+-- !query schema
 struct<>
--- !query 104 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(CAST(1 AS STRING) AS DOUBLE))' (binary and double).; line 1 pos 7
 
 
--- !query 105
+-- !query
 SELECT cast('1' as binary) / cast('1' as binary) FROM t
--- !query 105 schema
+-- !query schema
 struct<>
--- !query 105 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST('1' AS BINARY))' due to data type mismatch: '(CAST('1' AS BINARY) / CAST('1' AS BINARY))' requires (double or decimal) type, not binary; line 1 pos 7
 
 
--- !query 106
+-- !query
 SELECT cast('1' as binary) / cast(1 as boolean) FROM t
--- !query 106 schema
+-- !query schema
 struct<>
--- !query 106 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS BOOLEAN))' (binary and boolean).; line 1 pos 7
 
 
--- !query 107
+-- !query
 SELECT cast('1' as binary) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 107 schema
+-- !query schema
 struct<>
--- !query 107 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (binary and timestamp).; line 1 pos 7
 
 
--- !query 108
+-- !query
 SELECT cast('1' as binary) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 108 schema
+-- !query schema
 struct<>
--- !query 108 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00' AS DATE))' (binary and date).; line 1 pos 7
 
 
--- !query 109
+-- !query
 SELECT cast(1 as boolean) / cast(1 as tinyint) FROM t
--- !query 109 schema
+-- !query schema
 struct<>
--- !query 109 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS TINYINT))' (boolean and tinyint).; line 1 pos 7
 
 
--- !query 110
+-- !query
 SELECT cast(1 as boolean) / cast(1 as smallint) FROM t
--- !query 110 schema
+-- !query schema
 struct<>
--- !query 110 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS SMALLINT))' (boolean and smallint).; line 1 pos 7
 
 
--- !query 111
+-- !query
 SELECT cast(1 as boolean) / cast(1 as int) FROM t
--- !query 111 schema
+-- !query schema
 struct<>
--- !query 111 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS INT))' (boolean and int).; line 1 pos 7
 
 
--- !query 112
+-- !query
 SELECT cast(1 as boolean) / cast(1 as bigint) FROM t
--- !query 112 schema
+-- !query schema
 struct<>
--- !query 112 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS BIGINT))' (boolean and bigint).; line 1 pos 7
 
 
--- !query 113
+-- !query
 SELECT cast(1 as boolean) / cast(1 as float) FROM t
--- !query 113 schema
+-- !query schema
 struct<>
--- !query 113 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS FLOAT))' (boolean and float).; line 1 pos 7
 
 
--- !query 114
+-- !query
 SELECT cast(1 as boolean) / cast(1 as double) FROM t
--- !query 114 schema
+-- !query schema
 struct<>
--- !query 114 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS DOUBLE))' (boolean and double).; line 1 pos 7
 
 
--- !query 115
+-- !query
 SELECT cast(1 as boolean) / cast(1 as decimal(10, 0)) FROM t
--- !query 115 schema
+-- !query schema
 struct<>
--- !query 115 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS DECIMAL(10,0)))' (boolean and decimal(10,0)).; line 1 pos 7
 
 
--- !query 116
+-- !query
 SELECT cast(1 as boolean) / cast(1 as string) FROM t
--- !query 116 schema
+-- !query schema
 struct<>
--- !query 116 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(CAST(1 AS STRING) AS DOUBLE))' (boolean and double).; line 1 pos 7
 
 
--- !query 117
+-- !query
 SELECT cast(1 as boolean) / cast('1' as binary) FROM t
--- !query 117 schema
+-- !query schema
 struct<>
--- !query 117 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('1' AS BINARY))' (boolean and binary).; line 1 pos 7
 
 
--- !query 118
+-- !query
 SELECT cast(1 as boolean) / cast(1 as boolean) FROM t
--- !query 118 schema
+-- !query schema
 struct<>
--- !query 118 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS BOOLEAN))' due to data type mismatch: '(CAST(1 AS BOOLEAN) / CAST(1 AS BOOLEAN))' requires (double or decimal) type, not boolean; line 1 pos 7
 
 
--- !query 119
+-- !query
 SELECT cast(1 as boolean) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 119 schema
+-- !query schema
 struct<>
--- !query 119 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
 
 
--- !query 120
+-- !query
 SELECT cast(1 as boolean) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 120 schema
+-- !query schema
 struct<>
--- !query 120 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
 
 
--- !query 121
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as tinyint) FROM t
--- !query 121 schema
+-- !query schema
 struct<>
--- !query 121 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS TINYINT))' (timestamp and tinyint).; line 1 pos 7
 
 
--- !query 122
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as smallint) FROM t
--- !query 122 schema
+-- !query schema
 struct<>
--- !query 122 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS SMALLINT))' (timestamp and smallint).; line 1 pos 7
 
 
--- !query 123
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as int) FROM t
--- !query 123 schema
+-- !query schema
 struct<>
--- !query 123 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS INT))' (timestamp and int).; line 1 pos 7
 
 
--- !query 124
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as bigint) FROM t
--- !query 124 schema
+-- !query schema
 struct<>
--- !query 124 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BIGINT))' (timestamp and bigint).; line 1 pos 7
 
 
--- !query 125
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as float) FROM t
--- !query 125 schema
+-- !query schema
 struct<>
--- !query 125 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS FLOAT))' (timestamp and float).; line 1 pos 7
 
 
--- !query 126
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as double) FROM t
--- !query 126 schema
+-- !query schema
 struct<>
--- !query 126 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DOUBLE))' (timestamp and double).; line 1 pos 7
 
 
--- !query 127
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0)) FROM t
--- !query 127 schema
+-- !query schema
 struct<>
--- !query 127 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 128
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as string) FROM t
--- !query 128 schema
+-- !query schema
 struct<>
--- !query 128 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(CAST(1 AS STRING) AS DOUBLE))' (timestamp and double).; line 1 pos 7
 
 
--- !query 129
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('1' as binary) FROM t
--- !query 129 schema
+-- !query schema
 struct<>
--- !query 129 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS BINARY))' (timestamp and binary).; line 1 pos 7
 
 
--- !query 130
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as boolean) FROM t
--- !query 130 schema
+-- !query schema
 struct<>
--- !query 130 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BOOLEAN))' (timestamp and boolean).; line 1 pos 7
 
 
--- !query 131
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 131 schema
+-- !query schema
 struct<>
--- !query 131 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' requires (double or decimal) type, not timestamp; line 1 pos 7
 
 
--- !query 132
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 132 schema
+-- !query schema
 struct<>
--- !query 132 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00' AS DATE))' (timestamp and date).; line 1 pos 7
 
 
--- !query 133
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as tinyint) FROM t
--- !query 133 schema
+-- !query schema
 struct<>
--- !query 133 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS TINYINT))' (date and tinyint).; line 1 pos 7
 
 
--- !query 134
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as smallint) FROM t
--- !query 134 schema
+-- !query schema
 struct<>
--- !query 134 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS SMALLINT))' (date and smallint).; line 1 pos 7
 
 
--- !query 135
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as int) FROM t
--- !query 135 schema
+-- !query schema
 struct<>
--- !query 135 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS INT))' (date and int).; line 1 pos 7
 
 
--- !query 136
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as bigint) FROM t
--- !query 136 schema
+-- !query schema
 struct<>
--- !query 136 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BIGINT))' (date and bigint).; line 1 pos 7
 
 
--- !query 137
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as float) FROM t
--- !query 137 schema
+-- !query schema
 struct<>
--- !query 137 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS FLOAT))' (date and float).; line 1 pos 7
 
 
--- !query 138
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as double) FROM t
--- !query 138 schema
+-- !query schema
 struct<>
--- !query 138 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DOUBLE))' (date and double).; line 1 pos 7
 
 
--- !query 139
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as decimal(10, 0)) FROM t
--- !query 139 schema
+-- !query schema
 struct<>
--- !query 139 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 140
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as string) FROM t
--- !query 140 schema
+-- !query schema
 struct<>
--- !query 140 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(CAST(1 AS STRING) AS DOUBLE))' (date and double).; line 1 pos 7
 
 
--- !query 141
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast('1' as binary) FROM t
--- !query 141 schema
+-- !query schema
 struct<>
--- !query 141 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS BINARY))' (date and binary).; line 1 pos 7
 
 
--- !query 142
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as boolean) FROM t
--- !query 142 schema
+-- !query schema
 struct<>
--- !query 142 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BOOLEAN))' (date and boolean).; line 1 pos 7
 
 
--- !query 143
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 143 schema
+-- !query schema
 struct<>
--- !query 143 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (date and timestamp).; line 1 pos 7
 
 
--- !query 144
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00' as date) FROM t
--- !query 144 schema
+-- !query schema
 struct<>
--- !query 144 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00' AS DATE))' requires (double or decimal) type, not date; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out
index b62e1b6826045..5e335df904a3d 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 6
 
 
--- !query 0
+-- !query
 SELECT elt(2, col1, col2, col3, col4, col5) col
 FROM (
   SELECT
@@ -13,9 +13,9 @@ FROM (
     CAST(id AS DOUBLE) col5
   FROM range(10)
 )
--- !query 0 schema
+-- !query schema
 struct<col:string>
--- !query 0 output
+-- !query output
 0
 1
 2
@@ -28,7 +28,7 @@ struct<col:string>
 9
 
 
--- !query 1
+-- !query
 SELECT elt(3, col1, col2, col3, col4) col
 FROM (
   SELECT
@@ -38,9 +38,9 @@ FROM (
     encode(string(id + 3), 'utf-8') col4
   FROM range(10)
 )
--- !query 1 schema
+-- !query schema
 struct<col:string>
--- !query 1 output
+-- !query output
 10
 11
 2
@@ -53,15 +53,15 @@ struct<col:string>
 9
 
 
--- !query 2
+-- !query
 set spark.sql.function.eltOutputAsString=true
--- !query 2 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 2 output
+-- !query output
 spark.sql.function.eltOutputAsString	true
 
 
--- !query 3
+-- !query
 SELECT elt(1, col1, col2) col
 FROM (
   SELECT
@@ -69,9 +69,9 @@ FROM (
     encode(string(id + 1), 'utf-8') col2
   FROM range(10)
 )
--- !query 3 schema
+-- !query schema
 struct<col:string>
--- !query 3 output
+-- !query output
 0
 1
 2
@@ -84,15 +84,15 @@ struct<col:string>
 9
 
 
--- !query 4
+-- !query
 set spark.sql.function.eltOutputAsString=false
--- !query 4 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 4 output
+-- !query output
 spark.sql.function.eltOutputAsString	false
 
 
--- !query 5
+-- !query
 SELECT elt(2, col1, col2) col
 FROM (
   SELECT
@@ -100,9 +100,9 @@ FROM (
     encode(string(id + 1), 'utf-8') col2
   FROM range(10)
 )
--- !query 5 schema
+-- !query schema
 struct<col:binary>
--- !query 5 output
+-- !query output
 1
 10
 2
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
index 7097027872707..bb49d296eaada 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
@@ -2,1231 +2,1231 @@
 -- Number of queries: 145
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as tinyint)) FROM t
--- !query 1 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS TINYINT), CAST(2 AS TINYINT))):tinyint>
--- !query 1 output
+-- !query output
 1
 
 
--- !query 2
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as smallint)) FROM t
--- !query 2 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS TINYINT) AS SMALLINT), CAST(2 AS SMALLINT))):smallint>
--- !query 2 output
+-- !query output
 1
 
 
--- !query 3
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as int)) FROM t
--- !query 3 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS TINYINT) AS INT), CAST(2 AS INT))):int>
--- !query 3 output
+-- !query output
 1
 
 
--- !query 4
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as bigint)) FROM t
--- !query 4 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS TINYINT) AS BIGINT), CAST(2 AS BIGINT))):bigint>
--- !query 4 output
+-- !query output
 1
 
 
--- !query 5
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as float)) FROM t
--- !query 5 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS TINYINT) AS FLOAT), CAST(2 AS FLOAT))):float>
--- !query 5 output
+-- !query output
 1.0
 
 
--- !query 6
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as double)) FROM t
--- !query 6 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS TINYINT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
--- !query 6 output
+-- !query output
 1.0
 
 
--- !query 7
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as decimal(10, 0))) FROM t
--- !query 7 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
--- !query 7 output
+-- !query output
 1
 
 
--- !query 8
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as string)) FROM t
--- !query 8 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS TINYINT) AS STRING), CAST(2 AS STRING))):string>
--- !query 8 output
+-- !query output
 1
 
 
--- !query 9
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast('2' as binary)) FROM t
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST('2' AS BINARY)))' (tinyint and binary).; line 1 pos 7
 
 
--- !query 10
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as boolean)) FROM t
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST(2 AS BOOLEAN)))' (tinyint and boolean).; line 1 pos 7
 
 
--- !query 11
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (tinyint and timestamp).; line 1 pos 7
 
 
--- !query 12
+-- !query
 SELECT IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00' AS DATE)))' (tinyint and date).; line 1 pos 7
 
 
--- !query 13
+-- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as tinyint)) FROM t
--- !query 13 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS SMALLINT), CAST(CAST(2 AS TINYINT) AS SMALLINT))):smallint>
--- !query 13 output
+-- !query output
 1
 
 
--- !query 14
+-- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as smallint)) FROM t
--- !query 14 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS SMALLINT), CAST(2 AS SMALLINT))):smallint>
--- !query 14 output
+-- !query output
 1
 
 
--- !query 15
+-- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as int)) FROM t
--- !query 15 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS INT), CAST(2 AS INT))):int>
--- !query 15 output
+-- !query output
 1
 
 
--- !query 16
+-- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as bigint)) FROM t
--- !query 16 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS BIGINT), CAST(2 AS BIGINT))):bigint>
--- !query 16 output
+-- !query output
 1
 
 
--- !query 17
+-- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as float)) FROM t
--- !query 17 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS FLOAT), CAST(2 AS FLOAT))):float>
--- !query 17 output
+-- !query output
 1.0
 
 
--- !query 18
+-- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as double)) FROM t
--- !query 18 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
--- !query 18 output
+-- !query output
 1.0
 
 
--- !query 19
+-- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as decimal(10, 0))) FROM t
--- !query 19 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
--- !query 19 output
+-- !query output
 1
 
 
--- !query 20
+-- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as string)) FROM t
--- !query 20 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS SMALLINT) AS STRING), CAST(2 AS STRING))):string>
--- !query 20 output
+-- !query output
 1
 
 
--- !query 21
+-- !query
 SELECT IF(true, cast(1 as smallint), cast('2' as binary)) FROM t
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST('2' AS BINARY)))' (smallint and binary).; line 1 pos 7
 
 
--- !query 22
+-- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as boolean)) FROM t
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST(2 AS BOOLEAN)))' (smallint and boolean).; line 1 pos 7
 
 
--- !query 23
+-- !query
 SELECT IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (smallint and timestamp).; line 1 pos 7
 
 
--- !query 24
+-- !query
 SELECT IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00' AS DATE)))' (smallint and date).; line 1 pos 7
 
 
--- !query 25
+-- !query
 SELECT IF(true, cast(1 as int), cast(2 as tinyint)) FROM t
--- !query 25 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS INT), CAST(CAST(2 AS TINYINT) AS INT))):int>
--- !query 25 output
+-- !query output
 1
 
 
--- !query 26
+-- !query
 SELECT IF(true, cast(1 as int), cast(2 as smallint)) FROM t
--- !query 26 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS INT), CAST(CAST(2 AS SMALLINT) AS INT))):int>
--- !query 26 output
+-- !query output
 1
 
 
--- !query 27
+-- !query
 SELECT IF(true, cast(1 as int), cast(2 as int)) FROM t
--- !query 27 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS INT), CAST(2 AS INT))):int>
--- !query 27 output
+-- !query output
 1
 
 
--- !query 28
+-- !query
 SELECT IF(true, cast(1 as int), cast(2 as bigint)) FROM t
--- !query 28 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS INT) AS BIGINT), CAST(2 AS BIGINT))):bigint>
--- !query 28 output
+-- !query output
 1
 
 
--- !query 29
+-- !query
 SELECT IF(true, cast(1 as int), cast(2 as float)) FROM t
--- !query 29 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS INT) AS FLOAT), CAST(2 AS FLOAT))):float>
--- !query 29 output
+-- !query output
 1.0
 
 
--- !query 30
+-- !query
 SELECT IF(true, cast(1 as int), cast(2 as double)) FROM t
--- !query 30 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS INT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
--- !query 30 output
+-- !query output
 1.0
 
 
--- !query 31
+-- !query
 SELECT IF(true, cast(1 as int), cast(2 as decimal(10, 0))) FROM t
--- !query 31 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS INT) AS DECIMAL(10,0)), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
--- !query 31 output
+-- !query output
 1
 
 
--- !query 32
+-- !query
 SELECT IF(true, cast(1 as int), cast(2 as string)) FROM t
--- !query 32 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS INT) AS STRING), CAST(2 AS STRING))):string>
--- !query 32 output
+-- !query output
 1
 
 
--- !query 33
+-- !query
 SELECT IF(true, cast(1 as int), cast('2' as binary)) FROM t
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS INT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST('2' AS BINARY)))' (int and binary).; line 1 pos 7
 
 
--- !query 34
+-- !query
 SELECT IF(true, cast(1 as int), cast(2 as boolean)) FROM t
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS INT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST(2 AS BOOLEAN)))' (int and boolean).; line 1 pos 7
 
 
--- !query 35
+-- !query
 SELECT IF(true, cast(1 as int), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 35 schema
+-- !query schema
 struct<>
--- !query 35 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (int and timestamp).; line 1 pos 7
 
 
--- !query 36
+-- !query
 SELECT IF(true, cast(1 as int), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00' AS DATE)))' (int and date).; line 1 pos 7
 
 
--- !query 37
+-- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as tinyint)) FROM t
--- !query 37 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS BIGINT), CAST(CAST(2 AS TINYINT) AS BIGINT))):bigint>
--- !query 37 output
+-- !query output
 1
 
 
--- !query 38
+-- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as smallint)) FROM t
--- !query 38 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS BIGINT), CAST(CAST(2 AS SMALLINT) AS BIGINT))):bigint>
--- !query 38 output
+-- !query output
 1
 
 
--- !query 39
+-- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as int)) FROM t
--- !query 39 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS BIGINT), CAST(CAST(2 AS INT) AS BIGINT))):bigint>
--- !query 39 output
+-- !query output
 1
 
 
--- !query 40
+-- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as bigint)) FROM t
--- !query 40 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS BIGINT), CAST(2 AS BIGINT))):bigint>
--- !query 40 output
+-- !query output
 1
 
 
--- !query 41
+-- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as float)) FROM t
--- !query 41 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS BIGINT) AS FLOAT), CAST(2 AS FLOAT))):float>
--- !query 41 output
+-- !query output
 1.0
 
 
--- !query 42
+-- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as double)) FROM t
--- !query 42 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS BIGINT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
--- !query 42 output
+-- !query output
 1.0
 
 
--- !query 43
+-- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as decimal(10, 0))) FROM t
--- !query 43 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)), CAST(CAST(2 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):decimal(20,0)>
--- !query 43 output
+-- !query output
 1
 
 
--- !query 44
+-- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as string)) FROM t
--- !query 44 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS BIGINT) AS STRING), CAST(2 AS STRING))):string>
--- !query 44 output
+-- !query output
 1
 
 
--- !query 45
+-- !query
 SELECT IF(true, cast(1 as bigint), cast('2' as binary)) FROM t
--- !query 45 schema
+-- !query schema
 struct<>
--- !query 45 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST('2' AS BINARY)))' (bigint and binary).; line 1 pos 7
 
 
--- !query 46
+-- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as boolean)) FROM t
--- !query 46 schema
+-- !query schema
 struct<>
--- !query 46 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST(2 AS BOOLEAN)))' (bigint and boolean).; line 1 pos 7
 
 
--- !query 47
+-- !query
 SELECT IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 47 schema
+-- !query schema
 struct<>
--- !query 47 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (bigint and timestamp).; line 1 pos 7
 
 
--- !query 48
+-- !query
 SELECT IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00' AS DATE)))' (bigint and date).; line 1 pos 7
 
 
--- !query 49
+-- !query
 SELECT IF(true, cast(1 as float), cast(2 as tinyint)) FROM t
--- !query 49 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS FLOAT), CAST(CAST(2 AS TINYINT) AS FLOAT))):float>
--- !query 49 output
+-- !query output
 1.0
 
 
--- !query 50
+-- !query
 SELECT IF(true, cast(1 as float), cast(2 as smallint)) FROM t
--- !query 50 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS FLOAT), CAST(CAST(2 AS SMALLINT) AS FLOAT))):float>
--- !query 50 output
+-- !query output
 1.0
 
 
--- !query 51
+-- !query
 SELECT IF(true, cast(1 as float), cast(2 as int)) FROM t
--- !query 51 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS FLOAT), CAST(CAST(2 AS INT) AS FLOAT))):float>
--- !query 51 output
+-- !query output
 1.0
 
 
--- !query 52
+-- !query
 SELECT IF(true, cast(1 as float), cast(2 as bigint)) FROM t
--- !query 52 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS FLOAT), CAST(CAST(2 AS BIGINT) AS FLOAT))):float>
--- !query 52 output
+-- !query output
 1.0
 
 
--- !query 53
+-- !query
 SELECT IF(true, cast(1 as float), cast(2 as float)) FROM t
--- !query 53 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS FLOAT))):float>
--- !query 53 output
+-- !query output
 1.0
 
 
--- !query 54
+-- !query
 SELECT IF(true, cast(1 as float), cast(2 as double)) FROM t
--- !query 54 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(2 AS DOUBLE))):double>
--- !query 54 output
+-- !query output
 1.0
 
 
--- !query 55
+-- !query
 SELECT IF(true, cast(1 as float), cast(2 as decimal(10, 0))) FROM t
--- !query 55 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(2 AS DECIMAL(10,0)) AS DOUBLE))):double>
--- !query 55 output
+-- !query output
 1.0
 
 
--- !query 56
+-- !query
 SELECT IF(true, cast(1 as float), cast(2 as string)) FROM t
--- !query 56 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS FLOAT) AS STRING), CAST(2 AS STRING))):string>
--- !query 56 output
+-- !query output
 1.0
 
 
--- !query 57
+-- !query
 SELECT IF(true, cast(1 as float), cast('2' as binary)) FROM t
--- !query 57 schema
+-- !query schema
 struct<>
--- !query 57 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST('2' AS BINARY)))' (float and binary).; line 1 pos 7
 
 
--- !query 58
+-- !query
 SELECT IF(true, cast(1 as float), cast(2 as boolean)) FROM t
--- !query 58 schema
+-- !query schema
 struct<>
--- !query 58 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST(2 AS BOOLEAN)))' (float and boolean).; line 1 pos 7
 
 
--- !query 59
+-- !query
 SELECT IF(true, cast(1 as float), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 59 schema
+-- !query schema
 struct<>
--- !query 59 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (float and timestamp).; line 1 pos 7
 
 
--- !query 60
+-- !query
 SELECT IF(true, cast(1 as float), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 60 schema
+-- !query schema
 struct<>
--- !query 60 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00' AS DATE)))' (float and date).; line 1 pos 7
 
 
--- !query 61
+-- !query
 SELECT IF(true, cast(1 as double), cast(2 as tinyint)) FROM t
--- !query 61 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS TINYINT) AS DOUBLE))):double>
--- !query 61 output
+-- !query output
 1.0
 
 
--- !query 62
+-- !query
 SELECT IF(true, cast(1 as double), cast(2 as smallint)) FROM t
--- !query 62 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS SMALLINT) AS DOUBLE))):double>
--- !query 62 output
+-- !query output
 1.0
 
 
--- !query 63
+-- !query
 SELECT IF(true, cast(1 as double), cast(2 as int)) FROM t
--- !query 63 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS INT) AS DOUBLE))):double>
--- !query 63 output
+-- !query output
 1.0
 
 
--- !query 64
+-- !query
 SELECT IF(true, cast(1 as double), cast(2 as bigint)) FROM t
--- !query 64 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS BIGINT) AS DOUBLE))):double>
--- !query 64 output
+-- !query output
 1.0
 
 
--- !query 65
+-- !query
 SELECT IF(true, cast(1 as double), cast(2 as float)) FROM t
--- !query 65 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS FLOAT) AS DOUBLE))):double>
--- !query 65 output
+-- !query output
 1.0
 
 
--- !query 66
+-- !query
 SELECT IF(true, cast(1 as double), cast(2 as double)) FROM t
--- !query 66 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DOUBLE), CAST(2 AS DOUBLE))):double>
--- !query 66 output
+-- !query output
 1.0
 
 
--- !query 67
+-- !query
 SELECT IF(true, cast(1 as double), cast(2 as decimal(10, 0))) FROM t
--- !query 67 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DOUBLE), CAST(CAST(2 AS DECIMAL(10,0)) AS DOUBLE))):double>
--- !query 67 output
+-- !query output
 1.0
 
 
--- !query 68
+-- !query
 SELECT IF(true, cast(1 as double), cast(2 as string)) FROM t
--- !query 68 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS DOUBLE) AS STRING), CAST(2 AS STRING))):string>
--- !query 68 output
+-- !query output
 1.0
 
 
--- !query 69
+-- !query
 SELECT IF(true, cast(1 as double), cast('2' as binary)) FROM t
--- !query 69 schema
+-- !query schema
 struct<>
--- !query 69 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST('2' AS BINARY)))' (double and binary).; line 1 pos 7
 
 
--- !query 70
+-- !query
 SELECT IF(true, cast(1 as double), cast(2 as boolean)) FROM t
--- !query 70 schema
+-- !query schema
 struct<>
--- !query 70 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST(2 AS BOOLEAN)))' (double and boolean).; line 1 pos 7
 
 
--- !query 71
+-- !query
 SELECT IF(true, cast(1 as double), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 71 schema
+-- !query schema
 struct<>
--- !query 71 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (double and timestamp).; line 1 pos 7
 
 
--- !query 72
+-- !query
 SELECT IF(true, cast(1 as double), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 72 schema
+-- !query schema
 struct<>
--- !query 72 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE)))' (double and date).; line 1 pos 7
 
 
--- !query 73
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as tinyint)) FROM t
--- !query 73 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(CAST(2 AS TINYINT) AS DECIMAL(10,0)))):decimal(10,0)>
--- !query 73 output
+-- !query output
 1
 
 
--- !query 74
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as smallint)) FROM t
--- !query 74 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(CAST(2 AS SMALLINT) AS DECIMAL(10,0)))):decimal(10,0)>
--- !query 74 output
+-- !query output
 1
 
 
--- !query 75
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as int)) FROM t
--- !query 75 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(CAST(2 AS INT) AS DECIMAL(10,0)))):decimal(10,0)>
--- !query 75 output
+-- !query output
 1
 
 
--- !query 76
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as bigint)) FROM t
--- !query 76 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(2 AS BIGINT) AS DECIMAL(20,0)))):decimal(20,0)>
--- !query 76 output
+-- !query output
 1
 
 
--- !query 77
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as float)) FROM t
--- !query 77 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(2 AS FLOAT) AS DOUBLE))):double>
--- !query 77 output
+-- !query output
 1.0
 
 
--- !query 78
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as double)) FROM t
--- !query 78 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(2 AS DOUBLE))):double>
--- !query 78 output
+-- !query output
 1.0
 
 
--- !query 79
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as decimal(10, 0))) FROM t
--- !query 79 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
--- !query 79 output
+-- !query output
 1
 
 
--- !query 80
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as string)) FROM t
--- !query 80 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(1 AS DECIMAL(10,0)) AS STRING), CAST(2 AS STRING))):string>
--- !query 80 output
+-- !query output
 1
 
 
--- !query 81
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast('2' as binary)) FROM t
--- !query 81 schema
+-- !query schema
 struct<>
--- !query 81 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2' AS BINARY)))' (decimal(10,0) and binary).; line 1 pos 7
 
 
--- !query 82
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as boolean)) FROM t
--- !query 82 schema
+-- !query schema
 struct<>
--- !query 82 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS BOOLEAN)))' (decimal(10,0) and boolean).; line 1 pos 7
 
 
--- !query 83
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 83 schema
+-- !query schema
 struct<>
--- !query 83 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (decimal(10,0) and timestamp).; line 1 pos 7
 
 
--- !query 84
+-- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 84 schema
+-- !query schema
 struct<>
--- !query 84 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE)))' (decimal(10,0) and date).; line 1 pos 7
 
 
--- !query 85
+-- !query
 SELECT IF(true, cast(1 as string), cast(2 as tinyint)) FROM t
--- !query 85 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS TINYINT) AS STRING))):string>
--- !query 85 output
+-- !query output
 1
 
 
--- !query 86
+-- !query
 SELECT IF(true, cast(1 as string), cast(2 as smallint)) FROM t
--- !query 86 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS SMALLINT) AS STRING))):string>
--- !query 86 output
+-- !query output
 1
 
 
--- !query 87
+-- !query
 SELECT IF(true, cast(1 as string), cast(2 as int)) FROM t
--- !query 87 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS INT) AS STRING))):string>
--- !query 87 output
+-- !query output
 1
 
 
--- !query 88
+-- !query
 SELECT IF(true, cast(1 as string), cast(2 as bigint)) FROM t
--- !query 88 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS BIGINT) AS STRING))):string>
--- !query 88 output
+-- !query output
 1
 
 
--- !query 89
+-- !query
 SELECT IF(true, cast(1 as string), cast(2 as float)) FROM t
--- !query 89 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS FLOAT) AS STRING))):string>
--- !query 89 output
+-- !query output
 1
 
 
--- !query 90
+-- !query
 SELECT IF(true, cast(1 as string), cast(2 as double)) FROM t
--- !query 90 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS DOUBLE) AS STRING))):string>
--- !query 90 output
+-- !query output
 1
 
 
--- !query 91
+-- !query
 SELECT IF(true, cast(1 as string), cast(2 as decimal(10, 0))) FROM t
--- !query 91 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2 AS DECIMAL(10,0)) AS STRING))):string>
--- !query 91 output
+-- !query output
 1
 
 
--- !query 92
+-- !query
 SELECT IF(true, cast(1 as string), cast(2 as string)) FROM t
--- !query 92 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(2 AS STRING))):string>
--- !query 92 output
+-- !query output
 1
 
 
--- !query 93
+-- !query
 SELECT IF(true, cast(1 as string), cast('2' as binary)) FROM t
--- !query 93 schema
+-- !query schema
 struct<>
--- !query 93 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS STRING), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS STRING), CAST('2' AS BINARY)))' (string and binary).; line 1 pos 7
 
 
--- !query 94
+-- !query
 SELECT IF(true, cast(1 as string), cast(2 as boolean)) FROM t
--- !query 94 schema
+-- !query schema
 struct<>
--- !query 94 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))' (string and boolean).; line 1 pos 7
 
 
--- !query 95
+-- !query
 SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 95 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING))):string>
--- !query 95 output
+-- !query output
 1
 
 
--- !query 96
+-- !query
 SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 96 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS STRING), CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):string>
--- !query 96 output
+-- !query output
 1
 
 
--- !query 97
+-- !query
 SELECT IF(true, cast('1' as binary), cast(2 as tinyint)) FROM t
--- !query 97 schema
+-- !query schema
 struct<>
--- !query 97 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS TINYINT)))' (binary and tinyint).; line 1 pos 7
 
 
--- !query 98
+-- !query
 SELECT IF(true, cast('1' as binary), cast(2 as smallint)) FROM t
--- !query 98 schema
+-- !query schema
 struct<>
--- !query 98 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS SMALLINT)))' (binary and smallint).; line 1 pos 7
 
 
--- !query 99
+-- !query
 SELECT IF(true, cast('1' as binary), cast(2 as int)) FROM t
--- !query 99 schema
+-- !query schema
 struct<>
--- !query 99 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS INT)))' (binary and int).; line 1 pos 7
 
 
--- !query 100
+-- !query
 SELECT IF(true, cast('1' as binary), cast(2 as bigint)) FROM t
--- !query 100 schema
+-- !query schema
 struct<>
--- !query 100 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS BIGINT)))' (binary and bigint).; line 1 pos 7
 
 
--- !query 101
+-- !query
 SELECT IF(true, cast('1' as binary), cast(2 as float)) FROM t
--- !query 101 schema
+-- !query schema
 struct<>
--- !query 101 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS FLOAT)))' (binary and float).; line 1 pos 7
 
 
--- !query 102
+-- !query
 SELECT IF(true, cast('1' as binary), cast(2 as double)) FROM t
--- !query 102 schema
+-- !query schema
 struct<>
--- !query 102 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS DOUBLE)))' (binary and double).; line 1 pos 7
 
 
--- !query 103
+-- !query
 SELECT IF(true, cast('1' as binary), cast(2 as decimal(10, 0))) FROM t
--- !query 103 schema
+-- !query schema
 struct<>
--- !query 103 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS DECIMAL(10,0))))' (binary and decimal(10,0)).; line 1 pos 7
 
 
--- !query 104
+-- !query
 SELECT IF(true, cast('1' as binary), cast(2 as string)) FROM t
--- !query 104 schema
+-- !query schema
 struct<>
--- !query 104 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS STRING)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS STRING)))' (binary and string).; line 1 pos 7
 
 
--- !query 105
+-- !query
 SELECT IF(true, cast('1' as binary), cast('2' as binary)) FROM t
--- !query 105 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS BINARY), CAST(2 AS BINARY))):binary>
--- !query 105 output
+-- !query output
 1
 
 
--- !query 106
+-- !query
 SELECT IF(true, cast('1' as binary), cast(2 as boolean)) FROM t
--- !query 106 schema
+-- !query schema
 struct<>
--- !query 106 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS BOOLEAN)))' (binary and boolean).; line 1 pos 7
 
 
--- !query 107
+-- !query
 SELECT IF(true, cast('1' as binary), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 107 schema
+-- !query schema
 struct<>
--- !query 107 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (binary and timestamp).; line 1 pos 7
 
 
--- !query 108
+-- !query
 SELECT IF(true, cast('1' as binary), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 108 schema
+-- !query schema
 struct<>
--- !query 108 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00' AS DATE)))' (binary and date).; line 1 pos 7
 
 
--- !query 109
+-- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as tinyint)) FROM t
--- !query 109 schema
+-- !query schema
 struct<>
--- !query 109 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS TINYINT)))' (boolean and tinyint).; line 1 pos 7
 
 
--- !query 110
+-- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as smallint)) FROM t
--- !query 110 schema
+-- !query schema
 struct<>
--- !query 110 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS SMALLINT)))' (boolean and smallint).; line 1 pos 7
 
 
--- !query 111
+-- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as int)) FROM t
--- !query 111 schema
+-- !query schema
 struct<>
--- !query 111 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS INT)))' (boolean and int).; line 1 pos 7
 
 
--- !query 112
+-- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as bigint)) FROM t
--- !query 112 schema
+-- !query schema
 struct<>
--- !query 112 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BIGINT)))' (boolean and bigint).; line 1 pos 7
 
 
--- !query 113
+-- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as float)) FROM t
--- !query 113 schema
+-- !query schema
 struct<>
--- !query 113 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS FLOAT)))' (boolean and float).; line 1 pos 7
 
 
--- !query 114
+-- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as double)) FROM t
--- !query 114 schema
+-- !query schema
 struct<>
--- !query 114 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DOUBLE)))' (boolean and double).; line 1 pos 7
 
 
--- !query 115
+-- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as decimal(10, 0))) FROM t
--- !query 115 schema
+-- !query schema
 struct<>
--- !query 115 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DECIMAL(10,0))))' (boolean and decimal(10,0)).; line 1 pos 7
 
 
--- !query 116
+-- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as string)) FROM t
--- !query 116 schema
+-- !query schema
 struct<>
--- !query 116 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))' (boolean and string).; line 1 pos 7
 
 
--- !query 117
+-- !query
 SELECT IF(true, cast(1 as boolean), cast('2' as binary)) FROM t
--- !query 117 schema
+-- !query schema
 struct<>
--- !query 117 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST('2' AS BINARY)))' (boolean and binary).; line 1 pos 7
 
 
--- !query 118
+-- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as boolean)) FROM t
--- !query 118 schema
+-- !query schema
 struct<(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BOOLEAN))):boolean>
--- !query 118 output
+-- !query output
 true
 
 
--- !query 119
+-- !query
 SELECT IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 119 schema
+-- !query schema
 struct<>
--- !query 119 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (boolean and timestamp).; line 1 pos 7
 
 
--- !query 120
+-- !query
 SELECT IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 120 schema
+-- !query schema
 struct<>
--- !query 120 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00' AS DATE)))' (boolean and date).; line 1 pos 7
 
 
--- !query 121
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as tinyint)) FROM t
--- !query 121 schema
+-- !query schema
 struct<>
--- !query 121 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS TINYINT)))' (timestamp and tinyint).; line 1 pos 7
 
 
--- !query 122
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as smallint)) FROM t
--- !query 122 schema
+-- !query schema
 struct<>
--- !query 122 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS SMALLINT)))' (timestamp and smallint).; line 1 pos 7
 
 
--- !query 123
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as int)) FROM t
--- !query 123 schema
+-- !query schema
 struct<>
--- !query 123 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS INT)))' (timestamp and int).; line 1 pos 7
 
 
--- !query 124
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as bigint)) FROM t
--- !query 124 schema
+-- !query schema
 struct<>
--- !query 124 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BIGINT)))' (timestamp and bigint).; line 1 pos 7
 
 
--- !query 125
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as float)) FROM t
--- !query 125 schema
+-- !query schema
 struct<>
--- !query 125 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS FLOAT)))' (timestamp and float).; line 1 pos 7
 
 
--- !query 126
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as double)) FROM t
--- !query 126 schema
+-- !query schema
 struct<>
--- !query 126 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DOUBLE)))' (timestamp and double).; line 1 pos 7
 
 
--- !query 127
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as decimal(10, 0))) FROM t
--- !query 127 schema
+-- !query schema
 struct<>
--- !query 127 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DECIMAL(10,0))))' (timestamp and decimal(10,0)).; line 1 pos 7
 
 
--- !query 128
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as string)) FROM t
--- !query 128 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING), CAST(2 AS STRING))):string>
--- !query 128 output
+-- !query output
 2017-12-12 09:30:00
 
 
--- !query 129
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2' as binary)) FROM t
--- !query 129 schema
+-- !query schema
 struct<>
--- !query 129 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST('2' AS BINARY)))' (timestamp and binary).; line 1 pos 7
 
 
--- !query 130
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as boolean)) FROM t
--- !query 130 schema
+-- !query schema
 struct<>
--- !query 130 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BOOLEAN)))' (timestamp and boolean).; line 1 pos 7
 
 
--- !query 131
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 131 schema
+-- !query schema
 struct<(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):timestamp>
--- !query 131 output
+-- !query output
 2017-12-12 09:30:00
 
 
--- !query 132
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 132 schema
+-- !query schema
 struct<(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP))):timestamp>
--- !query 132 output
+-- !query output
 2017-12-12 09:30:00
 
 
--- !query 133
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as tinyint)) FROM t
--- !query 133 schema
+-- !query schema
 struct<>
--- !query 133 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS TINYINT)))' (date and tinyint).; line 1 pos 7
 
 
--- !query 134
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as smallint)) FROM t
--- !query 134 schema
+-- !query schema
 struct<>
--- !query 134 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS SMALLINT)))' (date and smallint).; line 1 pos 7
 
 
--- !query 135
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as int)) FROM t
--- !query 135 schema
+-- !query schema
 struct<>
--- !query 135 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS INT)))' (date and int).; line 1 pos 7
 
 
--- !query 136
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as bigint)) FROM t
--- !query 136 schema
+-- !query schema
 struct<>
--- !query 136 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BIGINT)))' (date and bigint).; line 1 pos 7
 
 
--- !query 137
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as float)) FROM t
--- !query 137 schema
+-- !query schema
 struct<>
--- !query 137 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS FLOAT)))' (date and float).; line 1 pos 7
 
 
--- !query 138
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as double)) FROM t
--- !query 138 schema
+-- !query schema
 struct<>
--- !query 138 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DOUBLE)))' (date and double).; line 1 pos 7
 
 
--- !query 139
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as decimal(10, 0))) FROM t
--- !query 139 schema
+-- !query schema
 struct<>
--- !query 139 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DECIMAL(10,0))))' (date and decimal(10,0)).; line 1 pos 7
 
 
--- !query 140
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as string)) FROM t
--- !query 140 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING), CAST(2 AS STRING))):string>
--- !query 140 output
+-- !query output
 2017-12-12
 
 
--- !query 141
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2' as binary)) FROM t
--- !query 141 schema
+-- !query schema
 struct<>
--- !query 141 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST('2' AS BINARY)))' (date and binary).; line 1 pos 7
 
 
--- !query 142
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as boolean)) FROM t
--- !query 142 schema
+-- !query schema
 struct<>
--- !query 142 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BOOLEAN)))' (date and boolean).; line 1 pos 7
 
 
--- !query 143
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 143 schema
+-- !query schema
 struct<(IF(true, CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):timestamp>
--- !query 143 output
+-- !query output
 2017-12-12 00:00:00
 
 
--- !query 144
+-- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 144 schema
+-- !query schema
 struct<(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2017-12-11 09:30:00 AS DATE))):date>
--- !query 144 output
+-- !query output
 2017-12-12
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
index 44fa48e2697b3..f841adf89612e 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
@@ -2,353 +2,353 @@
 -- Number of queries: 44
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT 1 + '2' FROM t
--- !query 1 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(2 AS DOUBLE)):double>
--- !query 1 output
+-- !query output
 3.0
 
 
--- !query 2
+-- !query
 SELECT 1 - '2' FROM t
--- !query 2 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(2 AS DOUBLE)):double>
--- !query 2 output
+-- !query output
 -1.0
 
 
--- !query 3
+-- !query
 SELECT 1 * '2' FROM t
--- !query 3 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(2 AS DOUBLE)):double>
--- !query 3 output
+-- !query output
 2.0
 
 
--- !query 4
+-- !query
 SELECT 4 / '2' FROM t
--- !query 4 schema
+-- !query schema
 struct<(CAST(4 AS DOUBLE) / CAST(CAST(2 AS DOUBLE) AS DOUBLE)):double>
--- !query 4 output
+-- !query output
 2.0
 
 
--- !query 5
+-- !query
 SELECT 1.1 + '2' FROM t
--- !query 5 schema
+-- !query schema
 struct<(CAST(1.1 AS DOUBLE) + CAST(2 AS DOUBLE)):double>
--- !query 5 output
+-- !query output
 3.1
 
 
--- !query 6
+-- !query
 SELECT 1.1 - '2' FROM t
--- !query 6 schema
+-- !query schema
 struct<(CAST(1.1 AS DOUBLE) - CAST(2 AS DOUBLE)):double>
--- !query 6 output
+-- !query output
 -0.8999999999999999
 
 
--- !query 7
+-- !query
 SELECT 1.1 * '2' FROM t
--- !query 7 schema
+-- !query schema
 struct<(CAST(1.1 AS DOUBLE) * CAST(2 AS DOUBLE)):double>
--- !query 7 output
+-- !query output
 2.2
 
 
--- !query 8
+-- !query
 SELECT 4.4 / '2' FROM t
--- !query 8 schema
+-- !query schema
 struct<(CAST(4.4 AS DOUBLE) / CAST(2 AS DOUBLE)):double>
--- !query 8 output
+-- !query output
 2.2
 
 
--- !query 9
+-- !query
 SELECT 1.1 + '2.2' FROM t
--- !query 9 schema
+-- !query schema
 struct<(CAST(1.1 AS DOUBLE) + CAST(2.2 AS DOUBLE)):double>
--- !query 9 output
+-- !query output
 3.3000000000000003
 
 
--- !query 10
+-- !query
 SELECT 1.1 - '2.2' FROM t
--- !query 10 schema
+-- !query schema
 struct<(CAST(1.1 AS DOUBLE) - CAST(2.2 AS DOUBLE)):double>
--- !query 10 output
+-- !query output
 -1.1
 
 
--- !query 11
+-- !query
 SELECT 1.1 * '2.2' FROM t
--- !query 11 schema
+-- !query schema
 struct<(CAST(1.1 AS DOUBLE) * CAST(2.2 AS DOUBLE)):double>
--- !query 11 output
+-- !query output
 2.4200000000000004
 
 
--- !query 12
+-- !query
 SELECT 4.4 / '2.2' FROM t
--- !query 12 schema
+-- !query schema
 struct<(CAST(4.4 AS DOUBLE) / CAST(2.2 AS DOUBLE)):double>
--- !query 12 output
+-- !query output
 2.0
 
 
--- !query 13
+-- !query
 SELECT '$' || cast(1 as smallint) || '$' FROM t
--- !query 13 schema
+-- !query schema
 struct<concat(concat($, CAST(CAST(1 AS SMALLINT) AS STRING)), $):string>
--- !query 13 output
+-- !query output
 $1$
 
 
--- !query 14
+-- !query
 SELECT '$' || 1 || '$' FROM t
--- !query 14 schema
+-- !query schema
 struct<concat(concat($, CAST(1 AS STRING)), $):string>
--- !query 14 output
+-- !query output
 $1$
 
 
--- !query 15
+-- !query
 SELECT '$' || cast(1 as bigint) || '$' FROM t
--- !query 15 schema
+-- !query schema
 struct<concat(concat($, CAST(CAST(1 AS BIGINT) AS STRING)), $):string>
--- !query 15 output
+-- !query output
 $1$
 
 
--- !query 16
+-- !query
 SELECT '$' || cast(1.1 as float) || '$' FROM t
--- !query 16 schema
+-- !query schema
 struct<concat(concat($, CAST(CAST(1.1 AS FLOAT) AS STRING)), $):string>
--- !query 16 output
+-- !query output
 $1.1$
 
 
--- !query 17
+-- !query
 SELECT '$' || cast(1.1 as double) || '$' FROM t
--- !query 17 schema
+-- !query schema
 struct<concat(concat($, CAST(CAST(1.1 AS DOUBLE) AS STRING)), $):string>
--- !query 17 output
+-- !query output
 $1.1$
 
 
--- !query 18
+-- !query
 SELECT '$' || 1.1 || '$' FROM t
--- !query 18 schema
+-- !query schema
 struct<concat(concat($, CAST(1.1 AS STRING)), $):string>
--- !query 18 output
+-- !query output
 $1.1$
 
 
--- !query 19
+-- !query
 SELECT '$' || cast(1.1 as decimal(8,3)) || '$' FROM t
--- !query 19 schema
+-- !query schema
 struct<concat(concat($, CAST(CAST(1.1 AS DECIMAL(8,3)) AS STRING)), $):string>
--- !query 19 output
+-- !query output
 $1.100$
 
 
--- !query 20
+-- !query
 SELECT '$' || 'abcd' || '$' FROM t
--- !query 20 schema
+-- !query schema
 struct<concat(concat($, abcd), $):string>
--- !query 20 output
+-- !query output
 $abcd$
 
 
--- !query 21
+-- !query
 SELECT '$' || date('1996-09-09') || '$' FROM t
--- !query 21 schema
+-- !query schema
 struct<concat(concat($, CAST(CAST(1996-09-09 AS DATE) AS STRING)), $):string>
--- !query 21 output
+-- !query output
 $1996-09-09$
 
 
--- !query 22
+-- !query
 SELECT '$' || timestamp('1996-09-09 10:11:12.4' )|| '$' FROM t
--- !query 22 schema
+-- !query schema
 struct<concat(concat($, CAST(CAST(1996-09-09 10:11:12.4 AS TIMESTAMP) AS STRING)), $):string>
--- !query 22 output
+-- !query output
 $1996-09-09 10:11:12.4$
 
 
--- !query 23
+-- !query
 SELECT length(cast(1 as smallint)) FROM t
--- !query 23 schema
+-- !query schema
 struct<length(CAST(CAST(1 AS SMALLINT) AS STRING)):int>
--- !query 23 output
+-- !query output
 1
 
 
--- !query 24
+-- !query
 SELECT length(cast(1 as int)) FROM t
--- !query 24 schema
+-- !query schema
 struct<length(CAST(CAST(1 AS INT) AS STRING)):int>
--- !query 24 output
+-- !query output
 1
 
 
--- !query 25
+-- !query
 SELECT length(cast(1 as bigint)) FROM t
--- !query 25 schema
+-- !query schema
 struct<length(CAST(CAST(1 AS BIGINT) AS STRING)):int>
--- !query 25 output
+-- !query output
 1
 
 
--- !query 26
+-- !query
 SELECT length(cast(1.1 as float)) FROM t
--- !query 26 schema
+-- !query schema
 struct<length(CAST(CAST(1.1 AS FLOAT) AS STRING)):int>
--- !query 26 output
+-- !query output
 3
 
 
--- !query 27
+-- !query
 SELECT length(cast(1.1 as double)) FROM t
--- !query 27 schema
+-- !query schema
 struct<length(CAST(CAST(1.1 AS DOUBLE) AS STRING)):int>
--- !query 27 output
+-- !query output
 3
 
 
--- !query 28
+-- !query
 SELECT length(1.1) FROM t
--- !query 28 schema
+-- !query schema
 struct<length(CAST(1.1 AS STRING)):int>
--- !query 28 output
+-- !query output
 3
 
 
--- !query 29
+-- !query
 SELECT length(cast(1.1 as decimal(8,3))) FROM t
--- !query 29 schema
+-- !query schema
 struct<length(CAST(CAST(1.1 AS DECIMAL(8,3)) AS STRING)):int>
--- !query 29 output
+-- !query output
 5
 
 
--- !query 30
+-- !query
 SELECT length('four') FROM t
--- !query 30 schema
+-- !query schema
 struct<length(four):int>
--- !query 30 output
+-- !query output
 4
 
 
--- !query 31
+-- !query
 SELECT length(date('1996-09-10')) FROM t
--- !query 31 schema
+-- !query schema
 struct<length(CAST(CAST(1996-09-10 AS DATE) AS STRING)):int>
--- !query 31 output
+-- !query output
 10
 
 
--- !query 32
+-- !query
 SELECT length(timestamp('1996-09-10 10:11:12.4')) FROM t
--- !query 32 schema
+-- !query schema
 struct<length(CAST(CAST(1996-09-10 10:11:12.4 AS TIMESTAMP) AS STRING)):int>
--- !query 32 output
+-- !query output
 21
 
 
--- !query 33
+-- !query
 SELECT year( '1996-01-10') FROM t
--- !query 33 schema
+-- !query schema
 struct<year(CAST(1996-01-10 AS DATE)):int>
--- !query 33 output
+-- !query output
 1996
 
 
--- !query 34
+-- !query
 SELECT month( '1996-01-10') FROM t
--- !query 34 schema
+-- !query schema
 struct<month(CAST(1996-01-10 AS DATE)):int>
--- !query 34 output
+-- !query output
 1
 
 
--- !query 35
+-- !query
 SELECT day( '1996-01-10') FROM t
--- !query 35 schema
+-- !query schema
 struct<dayofmonth(CAST(1996-01-10 AS DATE)):int>
--- !query 35 output
+-- !query output
 10
 
 
--- !query 36
+-- !query
 SELECT hour( '10:11:12') FROM t
--- !query 36 schema
+-- !query schema
 struct<hour(CAST(10:11:12 AS TIMESTAMP)):int>
--- !query 36 output
+-- !query output
 10
 
 
--- !query 37
+-- !query
 SELECT minute( '10:11:12') FROM t
--- !query 37 schema
+-- !query schema
 struct<minute(CAST(10:11:12 AS TIMESTAMP)):int>
--- !query 37 output
+-- !query output
 11
 
 
--- !query 38
+-- !query
 SELECT second( '10:11:12') FROM t
--- !query 38 schema
+-- !query schema
 struct<second(CAST(10:11:12 AS TIMESTAMP)):int>
--- !query 38 output
+-- !query output
 12
 
 
--- !query 39
+-- !query
 select 1 like '%' FROM t
--- !query 39 schema
+-- !query schema
 struct<CAST(1 AS STRING) LIKE %:boolean>
--- !query 39 output
+-- !query output
 true
 
 
--- !query 40
+-- !query
 select date('1996-09-10') like '19%' FROM t
--- !query 40 schema
+-- !query schema
 struct<CAST(CAST(1996-09-10 AS DATE) AS STRING) LIKE 19%:boolean>
--- !query 40 output
+-- !query output
 true
 
 
--- !query 41
+-- !query
 select '1' like 1 FROM t
--- !query 41 schema
+-- !query schema
 struct<1 LIKE CAST(1 AS STRING):boolean>
--- !query 41 output
+-- !query output
 true
 
 
--- !query 42
+-- !query
 select '1 ' like 1 FROM t
--- !query 42 schema
+-- !query schema
 struct<1  LIKE CAST(1 AS STRING):boolean>
--- !query 42 output
+-- !query output
 false
 
 
--- !query 43
+-- !query
 select '1996-09-10' like date('1996-09-10') FROM t
--- !query 43 schema
+-- !query schema
 struct<1996-09-10 LIKE CAST(CAST(1996-09-10 AS DATE) AS STRING):boolean>
--- !query 43 output
+-- !query output
 true
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
index 875ccc1341ec4..21d0a0e0fef4e 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
@@ -2,2453 +2,2453 @@
 -- Number of queries: 289
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint)) FROM t
--- !query 1 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT))):boolean>
--- !query 1 output
+-- !query output
 true
 
 
--- !query 2
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as smallint)) FROM t
--- !query 2 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS SMALLINT) IN (CAST(CAST(1 AS SMALLINT) AS SMALLINT))):boolean>
--- !query 2 output
+-- !query output
 true
 
 
--- !query 3
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as int)) FROM t
--- !query 3 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS INT) IN (CAST(CAST(1 AS INT) AS INT))):boolean>
--- !query 3 output
+-- !query output
 true
 
 
--- !query 4
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as bigint)) FROM t
--- !query 4 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
--- !query 4 output
+-- !query output
 true
 
 
--- !query 5
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as float)) FROM t
--- !query 5 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
--- !query 5 output
+-- !query output
 true
 
 
--- !query 6
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as double)) FROM t
--- !query 6 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 6 output
+-- !query output
 true
 
 
--- !query 7
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as decimal(10, 0))) FROM t
--- !query 7 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 7 output
+-- !query output
 true
 
 
--- !query 8
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as string)) FROM t
--- !query 8 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 8 output
+-- !query output
 true
 
 
--- !query 9
+-- !query
 SELECT cast(1 as tinyint) in (cast('1' as binary)) FROM t
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: tinyint != binary; line 1 pos 26
 
 
--- !query 10
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as boolean)) FROM t
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: tinyint != boolean; line 1 pos 26
 
 
--- !query 11
+-- !query
 SELECT cast(1 as tinyint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: tinyint != timestamp; line 1 pos 26
 
 
--- !query 12
+-- !query
 SELECT cast(1 as tinyint) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: tinyint != date; line 1 pos 26
 
 
--- !query 13
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as tinyint)) FROM t
--- !query 13 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS SMALLINT) IN (CAST(CAST(1 AS TINYINT) AS SMALLINT))):boolean>
--- !query 13 output
+-- !query output
 true
 
 
--- !query 14
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint)) FROM t
--- !query 14 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT))):boolean>
--- !query 14 output
+-- !query output
 true
 
 
--- !query 15
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as int)) FROM t
--- !query 15 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS INT) IN (CAST(CAST(1 AS INT) AS INT))):boolean>
--- !query 15 output
+-- !query output
 true
 
 
--- !query 16
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as bigint)) FROM t
--- !query 16 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
--- !query 16 output
+-- !query output
 true
 
 
--- !query 17
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as float)) FROM t
--- !query 17 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
--- !query 17 output
+-- !query output
 true
 
 
--- !query 18
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as double)) FROM t
--- !query 18 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 18 output
+-- !query output
 true
 
 
--- !query 19
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as decimal(10, 0))) FROM t
--- !query 19 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 19 output
+-- !query output
 true
 
 
--- !query 20
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as string)) FROM t
--- !query 20 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 20 output
+-- !query output
 true
 
 
--- !query 21
+-- !query
 SELECT cast(1 as smallint) in (cast('1' as binary)) FROM t
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: smallint != binary; line 1 pos 27
 
 
--- !query 22
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as boolean)) FROM t
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: smallint != boolean; line 1 pos 27
 
 
--- !query 23
+-- !query
 SELECT cast(1 as smallint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: smallint != timestamp; line 1 pos 27
 
 
--- !query 24
+-- !query
 SELECT cast(1 as smallint) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: smallint != date; line 1 pos 27
 
 
--- !query 25
+-- !query
 SELECT cast(1 as int) in (cast(1 as tinyint)) FROM t
--- !query 25 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS TINYINT) AS INT))):boolean>
--- !query 25 output
+-- !query output
 true
 
 
--- !query 26
+-- !query
 SELECT cast(1 as int) in (cast(1 as smallint)) FROM t
--- !query 26 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS SMALLINT) AS INT))):boolean>
--- !query 26 output
+-- !query output
 true
 
 
--- !query 27
+-- !query
 SELECT cast(1 as int) in (cast(1 as int)) FROM t
--- !query 27 schema
+-- !query schema
 struct<(CAST(1 AS INT) IN (CAST(1 AS INT))):boolean>
--- !query 27 output
+-- !query output
 true
 
 
--- !query 28
+-- !query
 SELECT cast(1 as int) in (cast(1 as bigint)) FROM t
--- !query 28 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
--- !query 28 output
+-- !query output
 true
 
 
--- !query 29
+-- !query
 SELECT cast(1 as int) in (cast(1 as float)) FROM t
--- !query 29 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
--- !query 29 output
+-- !query output
 true
 
 
--- !query 30
+-- !query
 SELECT cast(1 as int) in (cast(1 as double)) FROM t
--- !query 30 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 30 output
+-- !query output
 true
 
 
--- !query 31
+-- !query
 SELECT cast(1 as int) in (cast(1 as decimal(10, 0))) FROM t
--- !query 31 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 31 output
+-- !query output
 true
 
 
--- !query 32
+-- !query
 SELECT cast(1 as int) in (cast(1 as string)) FROM t
--- !query 32 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 32 output
+-- !query output
 true
 
 
--- !query 33
+-- !query
 SELECT cast(1 as int) in (cast('1' as binary)) FROM t
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: int != binary; line 1 pos 22
 
 
--- !query 34
+-- !query
 SELECT cast(1 as int) in (cast(1 as boolean)) FROM t
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: int != boolean; line 1 pos 22
 
 
--- !query 35
+-- !query
 SELECT cast(1 as int) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 35 schema
+-- !query schema
 struct<>
--- !query 35 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: int != timestamp; line 1 pos 22
 
 
--- !query 36
+-- !query
 SELECT cast(1 as int) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: int != date; line 1 pos 22
 
 
--- !query 37
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as tinyint)) FROM t
--- !query 37 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS TINYINT) AS BIGINT))):boolean>
--- !query 37 output
+-- !query output
 true
 
 
--- !query 38
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as smallint)) FROM t
--- !query 38 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS SMALLINT) AS BIGINT))):boolean>
--- !query 38 output
+-- !query output
 true
 
 
--- !query 39
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as int)) FROM t
--- !query 39 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS INT) AS BIGINT))):boolean>
--- !query 39 output
+-- !query output
 true
 
 
--- !query 40
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint)) FROM t
--- !query 40 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT))):boolean>
--- !query 40 output
+-- !query output
 true
 
 
--- !query 41
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as float)) FROM t
--- !query 41 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
--- !query 41 output
+-- !query output
 true
 
 
--- !query 42
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as double)) FROM t
--- !query 42 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 42 output
+-- !query output
 true
 
 
--- !query 43
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as decimal(10, 0))) FROM t
--- !query 43 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
--- !query 43 output
+-- !query output
 true
 
 
--- !query 44
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as string)) FROM t
--- !query 44 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 44 output
+-- !query output
 true
 
 
--- !query 45
+-- !query
 SELECT cast(1 as bigint) in (cast('1' as binary)) FROM t
--- !query 45 schema
+-- !query schema
 struct<>
--- !query 45 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: bigint != binary; line 1 pos 25
 
 
--- !query 46
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as boolean)) FROM t
--- !query 46 schema
+-- !query schema
 struct<>
--- !query 46 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: bigint != boolean; line 1 pos 25
 
 
--- !query 47
+-- !query
 SELECT cast(1 as bigint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 47 schema
+-- !query schema
 struct<>
--- !query 47 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: bigint != timestamp; line 1 pos 25
 
 
--- !query 48
+-- !query
 SELECT cast(1 as bigint) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: bigint != date; line 1 pos 25
 
 
--- !query 49
+-- !query
 SELECT cast(1 as float) in (cast(1 as tinyint)) FROM t
--- !query 49 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS TINYINT) AS FLOAT))):boolean>
--- !query 49 output
+-- !query output
 true
 
 
--- !query 50
+-- !query
 SELECT cast(1 as float) in (cast(1 as smallint)) FROM t
--- !query 50 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS SMALLINT) AS FLOAT))):boolean>
--- !query 50 output
+-- !query output
 true
 
 
--- !query 51
+-- !query
 SELECT cast(1 as float) in (cast(1 as int)) FROM t
--- !query 51 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS INT) AS FLOAT))):boolean>
--- !query 51 output
+-- !query output
 true
 
 
--- !query 52
+-- !query
 SELECT cast(1 as float) in (cast(1 as bigint)) FROM t
--- !query 52 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS BIGINT) AS FLOAT))):boolean>
--- !query 52 output
+-- !query output
 true
 
 
--- !query 53
+-- !query
 SELECT cast(1 as float) in (cast(1 as float)) FROM t
--- !query 53 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT))):boolean>
--- !query 53 output
+-- !query output
 true
 
 
--- !query 54
+-- !query
 SELECT cast(1 as float) in (cast(1 as double)) FROM t
--- !query 54 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 54 output
+-- !query output
 true
 
 
--- !query 55
+-- !query
 SELECT cast(1 as float) in (cast(1 as decimal(10, 0))) FROM t
--- !query 55 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
--- !query 55 output
+-- !query output
 true
 
 
--- !query 56
+-- !query
 SELECT cast(1 as float) in (cast(1 as string)) FROM t
--- !query 56 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 56 output
+-- !query output
 false
 
 
--- !query 57
+-- !query
 SELECT cast(1 as float) in (cast('1' as binary)) FROM t
--- !query 57 schema
+-- !query schema
 struct<>
--- !query 57 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: float != binary; line 1 pos 24
 
 
--- !query 58
+-- !query
 SELECT cast(1 as float) in (cast(1 as boolean)) FROM t
--- !query 58 schema
+-- !query schema
 struct<>
--- !query 58 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: float != boolean; line 1 pos 24
 
 
--- !query 59
+-- !query
 SELECT cast(1 as float) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 59 schema
+-- !query schema
 struct<>
--- !query 59 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: float != timestamp; line 1 pos 24
 
 
--- !query 60
+-- !query
 SELECT cast(1 as float) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 60 schema
+-- !query schema
 struct<>
--- !query 60 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: float != date; line 1 pos 24
 
 
--- !query 61
+-- !query
 SELECT cast(1 as double) in (cast(1 as tinyint)) FROM t
--- !query 61 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS TINYINT) AS DOUBLE))):boolean>
--- !query 61 output
+-- !query output
 true
 
 
--- !query 62
+-- !query
 SELECT cast(1 as double) in (cast(1 as smallint)) FROM t
--- !query 62 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS SMALLINT) AS DOUBLE))):boolean>
--- !query 62 output
+-- !query output
 true
 
 
--- !query 63
+-- !query
 SELECT cast(1 as double) in (cast(1 as int)) FROM t
--- !query 63 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS INT) AS DOUBLE))):boolean>
--- !query 63 output
+-- !query output
 true
 
 
--- !query 64
+-- !query
 SELECT cast(1 as double) in (cast(1 as bigint)) FROM t
--- !query 64 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS BIGINT) AS DOUBLE))):boolean>
--- !query 64 output
+-- !query output
 true
 
 
--- !query 65
+-- !query
 SELECT cast(1 as double) in (cast(1 as float)) FROM t
--- !query 65 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
--- !query 65 output
+-- !query output
 true
 
 
--- !query 66
+-- !query
 SELECT cast(1 as double) in (cast(1 as double)) FROM t
--- !query 66 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE))):boolean>
--- !query 66 output
+-- !query output
 true
 
 
--- !query 67
+-- !query
 SELECT cast(1 as double) in (cast(1 as decimal(10, 0))) FROM t
--- !query 67 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
--- !query 67 output
+-- !query output
 true
 
 
--- !query 68
+-- !query
 SELECT cast(1 as double) in (cast(1 as string)) FROM t
--- !query 68 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 68 output
+-- !query output
 false
 
 
--- !query 69
+-- !query
 SELECT cast(1 as double) in (cast('1' as binary)) FROM t
--- !query 69 schema
+-- !query schema
 struct<>
--- !query 69 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: double != binary; line 1 pos 25
 
 
--- !query 70
+-- !query
 SELECT cast(1 as double) in (cast(1 as boolean)) FROM t
--- !query 70 schema
+-- !query schema
 struct<>
--- !query 70 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: double != boolean; line 1 pos 25
 
 
--- !query 71
+-- !query
 SELECT cast(1 as double) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 71 schema
+-- !query schema
 struct<>
--- !query 71 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: double != timestamp; line 1 pos 25
 
 
--- !query 72
+-- !query
 SELECT cast(1 as double) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 72 schema
+-- !query schema
 struct<>
--- !query 72 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: double != date; line 1 pos 25
 
 
--- !query 73
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as tinyint)) FROM t
--- !query 73 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)))):boolean>
--- !query 73 output
+-- !query output
 true
 
 
--- !query 74
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as smallint)) FROM t
--- !query 74 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)))):boolean>
--- !query 74 output
+-- !query output
 true
 
 
--- !query 75
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as int)) FROM t
--- !query 75 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS INT) AS DECIMAL(10,0)))):boolean>
--- !query 75 output
+-- !query output
 true
 
 
--- !query 76
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as bigint)) FROM t
--- !query 76 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) IN (CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)))):boolean>
--- !query 76 output
+-- !query output
 true
 
 
--- !query 77
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as float)) FROM t
--- !query 77 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
--- !query 77 output
+-- !query output
 true
 
 
--- !query 78
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as double)) FROM t
--- !query 78 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 78 output
+-- !query output
 true
 
 
--- !query 79
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0))) FROM t
--- !query 79 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)))):boolean>
--- !query 79 output
+-- !query output
 true
 
 
--- !query 80
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as string)) FROM t
--- !query 80 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 80 output
+-- !query output
 true
 
 
--- !query 81
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast('1' as binary)) FROM t
--- !query 81 schema
+-- !query schema
 struct<>
--- !query 81 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != binary; line 1 pos 33
 
 
--- !query 82
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as boolean)) FROM t
--- !query 82 schema
+-- !query schema
 struct<>
--- !query 82 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != boolean; line 1 pos 33
 
 
--- !query 83
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 83 schema
+-- !query schema
 struct<>
--- !query 83 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != timestamp; line 1 pos 33
 
 
--- !query 84
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 84 schema
+-- !query schema
 struct<>
--- !query 84 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != date; line 1 pos 33
 
 
--- !query 85
+-- !query
 SELECT cast(1 as string) in (cast(1 as tinyint)) FROM t
--- !query 85 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS TINYINT) AS STRING))):boolean>
--- !query 85 output
+-- !query output
 true
 
 
--- !query 86
+-- !query
 SELECT cast(1 as string) in (cast(1 as smallint)) FROM t
--- !query 86 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS SMALLINT) AS STRING))):boolean>
--- !query 86 output
+-- !query output
 true
 
 
--- !query 87
+-- !query
 SELECT cast(1 as string) in (cast(1 as int)) FROM t
--- !query 87 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS INT) AS STRING))):boolean>
--- !query 87 output
+-- !query output
 true
 
 
--- !query 88
+-- !query
 SELECT cast(1 as string) in (cast(1 as bigint)) FROM t
--- !query 88 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS BIGINT) AS STRING))):boolean>
--- !query 88 output
+-- !query output
 true
 
 
--- !query 89
+-- !query
 SELECT cast(1 as string) in (cast(1 as float)) FROM t
--- !query 89 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS FLOAT) AS STRING))):boolean>
--- !query 89 output
+-- !query output
 false
 
 
--- !query 90
+-- !query
 SELECT cast(1 as string) in (cast(1 as double)) FROM t
--- !query 90 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS DOUBLE) AS STRING))):boolean>
--- !query 90 output
+-- !query output
 false
 
 
--- !query 91
+-- !query
 SELECT cast(1 as string) in (cast(1 as decimal(10, 0))) FROM t
--- !query 91 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS STRING))):boolean>
--- !query 91 output
+-- !query output
 true
 
 
--- !query 92
+-- !query
 SELECT cast(1 as string) in (cast(1 as string)) FROM t
--- !query 92 schema
+-- !query schema
 struct<(CAST(1 AS STRING) IN (CAST(1 AS STRING))):boolean>
--- !query 92 output
+-- !query output
 true
 
 
--- !query 93
+-- !query
 SELECT cast(1 as string) in (cast('1' as binary)) FROM t
--- !query 93 schema
+-- !query schema
 struct<>
--- !query 93 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS STRING) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25
 
 
--- !query 94
+-- !query
 SELECT cast(1 as string) in (cast(1 as boolean)) FROM t
--- !query 94 schema
+-- !query schema
 struct<>
--- !query 94 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25
 
 
--- !query 95
+-- !query
 SELECT cast(1 as string) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 95 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING))):boolean>
--- !query 95 output
+-- !query output
 false
 
 
--- !query 96
+-- !query
 SELECT cast(1 as string) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 96 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):boolean>
--- !query 96 output
+-- !query output
 false
 
 
--- !query 97
+-- !query
 SELECT cast('1' as binary) in (cast(1 as tinyint)) FROM t
--- !query 97 schema
+-- !query schema
 struct<>
--- !query 97 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: binary != tinyint; line 1 pos 27
 
 
--- !query 98
+-- !query
 SELECT cast('1' as binary) in (cast(1 as smallint)) FROM t
--- !query 98 schema
+-- !query schema
 struct<>
--- !query 98 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: binary != smallint; line 1 pos 27
 
 
--- !query 99
+-- !query
 SELECT cast('1' as binary) in (cast(1 as int)) FROM t
--- !query 99 schema
+-- !query schema
 struct<>
--- !query 99 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: binary != int; line 1 pos 27
 
 
--- !query 100
+-- !query
 SELECT cast('1' as binary) in (cast(1 as bigint)) FROM t
--- !query 100 schema
+-- !query schema
 struct<>
--- !query 100 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: binary != bigint; line 1 pos 27
 
 
--- !query 101
+-- !query
 SELECT cast('1' as binary) in (cast(1 as float)) FROM t
--- !query 101 schema
+-- !query schema
 struct<>
--- !query 101 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: binary != float; line 1 pos 27
 
 
--- !query 102
+-- !query
 SELECT cast('1' as binary) in (cast(1 as double)) FROM t
--- !query 102 schema
+-- !query schema
 struct<>
--- !query 102 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: binary != double; line 1 pos 27
 
 
--- !query 103
+-- !query
 SELECT cast('1' as binary) in (cast(1 as decimal(10, 0))) FROM t
--- !query 103 schema
+-- !query schema
 struct<>
--- !query 103 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: binary != decimal(10,0); line 1 pos 27
 
 
--- !query 104
+-- !query
 SELECT cast('1' as binary) in (cast(1 as string)) FROM t
--- !query 104 schema
+-- !query schema
 struct<>
--- !query 104 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27
 
 
--- !query 105
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary)) FROM t
--- !query 105 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) IN (CAST(1 AS BINARY))):boolean>
--- !query 105 output
+-- !query output
 true
 
 
--- !query 106
+-- !query
 SELECT cast('1' as binary) in (cast(1 as boolean)) FROM t
--- !query 106 schema
+-- !query schema
 struct<>
--- !query 106 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: binary != boolean; line 1 pos 27
 
 
--- !query 107
+-- !query
 SELECT cast('1' as binary) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 107 schema
+-- !query schema
 struct<>
--- !query 107 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: binary != timestamp; line 1 pos 27
 
 
--- !query 108
+-- !query
 SELECT cast('1' as binary) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 108 schema
+-- !query schema
 struct<>
--- !query 108 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: binary != date; line 1 pos 27
 
 
--- !query 109
+-- !query
 SELECT true in (cast(1 as tinyint)) FROM t
--- !query 109 schema
+-- !query schema
 struct<>
--- !query 109 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: boolean != tinyint; line 1 pos 12
 
 
--- !query 110
+-- !query
 SELECT true in (cast(1 as smallint)) FROM t
--- !query 110 schema
+-- !query schema
 struct<>
--- !query 110 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: boolean != smallint; line 1 pos 12
 
 
--- !query 111
+-- !query
 SELECT true in (cast(1 as int)) FROM t
--- !query 111 schema
+-- !query schema
 struct<>
--- !query 111 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: boolean != int; line 1 pos 12
 
 
--- !query 112
+-- !query
 SELECT true in (cast(1 as bigint)) FROM t
--- !query 112 schema
+-- !query schema
 struct<>
--- !query 112 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: boolean != bigint; line 1 pos 12
 
 
--- !query 113
+-- !query
 SELECT true in (cast(1 as float)) FROM t
--- !query 113 schema
+-- !query schema
 struct<>
--- !query 113 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: boolean != float; line 1 pos 12
 
 
--- !query 114
+-- !query
 SELECT true in (cast(1 as double)) FROM t
--- !query 114 schema
+-- !query schema
 struct<>
--- !query 114 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: boolean != double; line 1 pos 12
 
 
--- !query 115
+-- !query
 SELECT true in (cast(1 as decimal(10, 0))) FROM t
--- !query 115 schema
+-- !query schema
 struct<>
--- !query 115 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: boolean != decimal(10,0); line 1 pos 12
 
 
--- !query 116
+-- !query
 SELECT true in (cast(1 as string)) FROM t
--- !query 116 schema
+-- !query schema
 struct<>
--- !query 116 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 12
 
 
--- !query 117
+-- !query
 SELECT true in (cast('1' as binary)) FROM t
--- !query 117 schema
+-- !query schema
 struct<>
--- !query 117 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: boolean != binary; line 1 pos 12
 
 
--- !query 118
+-- !query
 SELECT true in (cast(1 as boolean)) FROM t
--- !query 118 schema
+-- !query schema
 struct<(true IN (CAST(1 AS BOOLEAN))):boolean>
--- !query 118 output
+-- !query output
 true
 
 
--- !query 119
+-- !query
 SELECT true in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 119 schema
+-- !query schema
 struct<>
--- !query 119 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: boolean != timestamp; line 1 pos 12
 
 
--- !query 120
+-- !query
 SELECT true in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 120 schema
+-- !query schema
 struct<>
--- !query 120 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(true IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: boolean != date; line 1 pos 12
 
 
--- !query 121
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as tinyint)) FROM t
--- !query 121 schema
+-- !query schema
 struct<>
--- !query 121 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != tinyint; line 1 pos 50
 
 
--- !query 122
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as smallint)) FROM t
--- !query 122 schema
+-- !query schema
 struct<>
--- !query 122 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != smallint; line 1 pos 50
 
 
--- !query 123
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as int)) FROM t
--- !query 123 schema
+-- !query schema
 struct<>
--- !query 123 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS INT)))' due to data type mismatch: Arguments must be same type but were: timestamp != int; line 1 pos 50
 
 
--- !query 124
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as bigint)) FROM t
--- !query 124 schema
+-- !query schema
 struct<>
--- !query 124 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != bigint; line 1 pos 50
 
 
--- !query 125
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as float)) FROM t
--- !query 125 schema
+-- !query schema
 struct<>
--- !query 125 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: timestamp != float; line 1 pos 50
 
 
--- !query 126
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as double)) FROM t
--- !query 126 schema
+-- !query schema
 struct<>
--- !query 126 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: timestamp != double; line 1 pos 50
 
 
--- !query 127
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as decimal(10, 0))) FROM t
--- !query 127 schema
+-- !query schema
 struct<>
--- !query 127 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: timestamp != decimal(10,0); line 1 pos 50
 
 
--- !query 128
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as string)) FROM t
--- !query 128 schema
+-- !query schema
 struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) IN (CAST(CAST(2 AS STRING) AS STRING))):boolean>
--- !query 128 output
+-- !query output
 false
 
 
--- !query 129
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2' as binary)) FROM t
--- !query 129 schema
+-- !query schema
 struct<>
--- !query 129 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: timestamp != binary; line 1 pos 50
 
 
--- !query 130
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as boolean)) FROM t
--- !query 130 schema
+-- !query schema
 struct<>
--- !query 130 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: timestamp != boolean; line 1 pos 50
 
 
--- !query 131
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 131 schema
+-- !query schema
 struct<(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):boolean>
--- !query 131 output
+-- !query output
 false
 
 
--- !query 132
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 132 schema
+-- !query schema
 struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP) IN (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP))):boolean>
--- !query 132 output
+-- !query output
 false
 
 
--- !query 133
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as tinyint)) FROM t
--- !query 133 schema
+-- !query schema
 struct<>
--- !query 133 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: date != tinyint; line 1 pos 43
 
 
--- !query 134
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as smallint)) FROM t
--- !query 134 schema
+-- !query schema
 struct<>
--- !query 134 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: date != smallint; line 1 pos 43
 
 
--- !query 135
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as int)) FROM t
--- !query 135 schema
+-- !query schema
 struct<>
--- !query 135 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS INT)))' due to data type mismatch: Arguments must be same type but were: date != int; line 1 pos 43
 
 
--- !query 136
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as bigint)) FROM t
--- !query 136 schema
+-- !query schema
 struct<>
--- !query 136 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: date != bigint; line 1 pos 43
 
 
--- !query 137
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as float)) FROM t
--- !query 137 schema
+-- !query schema
 struct<>
--- !query 137 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: date != float; line 1 pos 43
 
 
--- !query 138
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as double)) FROM t
--- !query 138 schema
+-- !query schema
 struct<>
--- !query 138 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: date != double; line 1 pos 43
 
 
--- !query 139
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as decimal(10, 0))) FROM t
--- !query 139 schema
+-- !query schema
 struct<>
--- !query 139 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: date != decimal(10,0); line 1 pos 43
 
 
--- !query 140
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as string)) FROM t
--- !query 140 schema
+-- !query schema
 struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) IN (CAST(CAST(2 AS STRING) AS STRING))):boolean>
--- !query 140 output
+-- !query output
 false
 
 
--- !query 141
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2' as binary)) FROM t
--- !query 141 schema
+-- !query schema
 struct<>
--- !query 141 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: date != binary; line 1 pos 43
 
 
--- !query 142
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as boolean)) FROM t
--- !query 142 schema
+-- !query schema
 struct<>
--- !query 142 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: date != boolean; line 1 pos 43
 
 
--- !query 143
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 143 schema
+-- !query schema
 struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP) IN (CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP))):boolean>
--- !query 143 output
+-- !query output
 false
 
 
--- !query 144
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 144 schema
+-- !query schema
 struct<(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-11 09:30:00 AS DATE))):boolean>
--- !query 144 output
+-- !query output
 false
 
 
--- !query 145
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as tinyint)) FROM t
--- !query 145 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS TINYINT))):boolean>
--- !query 145 output
+-- !query output
 true
 
 
--- !query 146
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as smallint)) FROM t
--- !query 146 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS SMALLINT) IN (CAST(CAST(1 AS TINYINT) AS SMALLINT), CAST(CAST(1 AS SMALLINT) AS SMALLINT))):boolean>
--- !query 146 output
+-- !query output
 true
 
 
--- !query 147
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as int)) FROM t
--- !query 147 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS INT) IN (CAST(CAST(1 AS TINYINT) AS INT), CAST(CAST(1 AS INT) AS INT))):boolean>
--- !query 147 output
+-- !query output
 true
 
 
--- !query 148
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as bigint)) FROM t
--- !query 148 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS BIGINT) IN (CAST(CAST(1 AS TINYINT) AS BIGINT), CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
--- !query 148 output
+-- !query output
 true
 
 
--- !query 149
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as float)) FROM t
--- !query 149 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS FLOAT) IN (CAST(CAST(1 AS TINYINT) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
--- !query 149 output
+-- !query output
 true
 
 
--- !query 150
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as double)) FROM t
--- !query 150 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) IN (CAST(CAST(1 AS TINYINT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 150 output
+-- !query output
 true
 
 
--- !query 151
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as decimal(10, 0))) FROM t
--- !query 151 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 151 output
+-- !query output
 true
 
 
--- !query 152
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as string)) FROM t
--- !query 152 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS STRING) IN (CAST(CAST(1 AS TINYINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 152 output
+-- !query output
 true
 
 
--- !query 153
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('1' as binary)) FROM t
--- !query 153 schema
+-- !query schema
 struct<>
--- !query 153 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: tinyint != binary; line 1 pos 26
 
 
--- !query 154
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as boolean)) FROM t
--- !query 154 schema
+-- !query schema
 struct<>
--- !query 154 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: tinyint != boolean; line 1 pos 26
 
 
--- !query 155
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 155 schema
+-- !query schema
 struct<>
--- !query 155 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: tinyint != timestamp; line 1 pos 26
 
 
--- !query 156
+-- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 156 schema
+-- !query schema
 struct<>
--- !query 156 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: tinyint != date; line 1 pos 26
 
 
--- !query 157
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as tinyint)) FROM t
--- !query 157 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS SMALLINT) IN (CAST(CAST(1 AS SMALLINT) AS SMALLINT), CAST(CAST(1 AS TINYINT) AS SMALLINT))):boolean>
--- !query 157 output
+-- !query output
 true
 
 
--- !query 158
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as smallint)) FROM t
--- !query 158 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS SMALLINT))):boolean>
--- !query 158 output
+-- !query output
 true
 
 
--- !query 159
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as int)) FROM t
--- !query 159 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS INT) IN (CAST(CAST(1 AS SMALLINT) AS INT), CAST(CAST(1 AS INT) AS INT))):boolean>
--- !query 159 output
+-- !query output
 true
 
 
--- !query 160
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as bigint)) FROM t
--- !query 160 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS BIGINT) IN (CAST(CAST(1 AS SMALLINT) AS BIGINT), CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
--- !query 160 output
+-- !query output
 true
 
 
--- !query 161
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as float)) FROM t
--- !query 161 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS FLOAT) IN (CAST(CAST(1 AS SMALLINT) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
--- !query 161 output
+-- !query output
 true
 
 
--- !query 162
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as double)) FROM t
--- !query 162 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) IN (CAST(CAST(1 AS SMALLINT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 162 output
+-- !query output
 true
 
 
--- !query 163
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as decimal(10, 0))) FROM t
--- !query 163 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 163 output
+-- !query output
 true
 
 
--- !query 164
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as string)) FROM t
--- !query 164 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS STRING) IN (CAST(CAST(1 AS SMALLINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 164 output
+-- !query output
 true
 
 
--- !query 165
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast('1' as binary)) FROM t
--- !query 165 schema
+-- !query schema
 struct<>
--- !query 165 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: smallint != binary; line 1 pos 27
 
 
--- !query 166
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as boolean)) FROM t
--- !query 166 schema
+-- !query schema
 struct<>
--- !query 166 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: smallint != boolean; line 1 pos 27
 
 
--- !query 167
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 167 schema
+-- !query schema
 struct<>
--- !query 167 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: smallint != timestamp; line 1 pos 27
 
 
--- !query 168
+-- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 168 schema
+-- !query schema
 struct<>
--- !query 168 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: smallint != date; line 1 pos 27
 
 
--- !query 169
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as tinyint)) FROM t
--- !query 169 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS INT) AS INT), CAST(CAST(1 AS TINYINT) AS INT))):boolean>
--- !query 169 output
+-- !query output
 true
 
 
--- !query 170
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as smallint)) FROM t
--- !query 170 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS INT) IN (CAST(CAST(1 AS INT) AS INT), CAST(CAST(1 AS SMALLINT) AS INT))):boolean>
--- !query 170 output
+-- !query output
 true
 
 
--- !query 171
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as int)) FROM t
--- !query 171 schema
+-- !query schema
 struct<(CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS INT))):boolean>
--- !query 171 output
+-- !query output
 true
 
 
--- !query 172
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as bigint)) FROM t
--- !query 172 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS BIGINT) IN (CAST(CAST(1 AS INT) AS BIGINT), CAST(CAST(1 AS BIGINT) AS BIGINT))):boolean>
--- !query 172 output
+-- !query output
 true
 
 
--- !query 173
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as float)) FROM t
--- !query 173 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS FLOAT) IN (CAST(CAST(1 AS INT) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
--- !query 173 output
+-- !query output
 true
 
 
--- !query 174
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as double)) FROM t
--- !query 174 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) IN (CAST(CAST(1 AS INT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 174 output
+-- !query output
 true
 
 
--- !query 175
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as decimal(10, 0))) FROM t
--- !query 175 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS INT) AS DECIMAL(10,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)))):boolean>
--- !query 175 output
+-- !query output
 true
 
 
--- !query 176
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as string)) FROM t
--- !query 176 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS STRING) IN (CAST(CAST(1 AS INT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 176 output
+-- !query output
 true
 
 
--- !query 177
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast('1' as binary)) FROM t
--- !query 177 schema
+-- !query schema
 struct<>
--- !query 177 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: int != binary; line 1 pos 22
 
 
--- !query 178
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as boolean)) FROM t
--- !query 178 schema
+-- !query schema
 struct<>
--- !query 178 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: int != boolean; line 1 pos 22
 
 
--- !query 179
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 179 schema
+-- !query schema
 struct<>
--- !query 179 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: int != timestamp; line 1 pos 22
 
 
--- !query 180
+-- !query
 SELECT cast(1 as int) in (cast(1 as int), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 180 schema
+-- !query schema
 struct<>
--- !query 180 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: int != date; line 1 pos 22
 
 
--- !query 181
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as tinyint)) FROM t
--- !query 181 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT), CAST(CAST(1 AS TINYINT) AS BIGINT))):boolean>
--- !query 181 output
+-- !query output
 true
 
 
--- !query 182
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as smallint)) FROM t
--- !query 182 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT), CAST(CAST(1 AS SMALLINT) AS BIGINT))):boolean>
--- !query 182 output
+-- !query output
 true
 
 
--- !query 183
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as int)) FROM t
--- !query 183 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS BIGINT) IN (CAST(CAST(1 AS BIGINT) AS BIGINT), CAST(CAST(1 AS INT) AS BIGINT))):boolean>
--- !query 183 output
+-- !query output
 true
 
 
--- !query 184
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as bigint)) FROM t
--- !query 184 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS BIGINT))):boolean>
--- !query 184 output
+-- !query output
 true
 
 
--- !query 185
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as float)) FROM t
--- !query 185 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS FLOAT) IN (CAST(CAST(1 AS BIGINT) AS FLOAT), CAST(CAST(1 AS FLOAT) AS FLOAT))):boolean>
--- !query 185 output
+-- !query output
 true
 
 
--- !query 186
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as double)) FROM t
--- !query 186 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) IN (CAST(CAST(1 AS BIGINT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 186 output
+-- !query output
 true
 
 
--- !query 187
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as decimal(10, 0))) FROM t
--- !query 187 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) IN (CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)), CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)))):boolean>
--- !query 187 output
+-- !query output
 true
 
 
--- !query 188
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as string)) FROM t
--- !query 188 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS STRING) IN (CAST(CAST(1 AS BIGINT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 188 output
+-- !query output
 true
 
 
--- !query 189
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast('1' as binary)) FROM t
--- !query 189 schema
+-- !query schema
 struct<>
--- !query 189 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: bigint != binary; line 1 pos 25
 
 
--- !query 190
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as boolean)) FROM t
--- !query 190 schema
+-- !query schema
 struct<>
--- !query 190 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: bigint != boolean; line 1 pos 25
 
 
--- !query 191
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 191 schema
+-- !query schema
 struct<>
--- !query 191 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: bigint != timestamp; line 1 pos 25
 
 
--- !query 192
+-- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 192 schema
+-- !query schema
 struct<>
--- !query 192 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: bigint != date; line 1 pos 25
 
 
--- !query 193
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as tinyint)) FROM t
--- !query 193 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS TINYINT) AS FLOAT))):boolean>
--- !query 193 output
+-- !query output
 true
 
 
--- !query 194
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as smallint)) FROM t
--- !query 194 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS SMALLINT) AS FLOAT))):boolean>
--- !query 194 output
+-- !query output
 true
 
 
--- !query 195
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as int)) FROM t
--- !query 195 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS INT) AS FLOAT))):boolean>
--- !query 195 output
+-- !query output
 true
 
 
--- !query 196
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as bigint)) FROM t
--- !query 196 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS FLOAT) IN (CAST(CAST(1 AS FLOAT) AS FLOAT), CAST(CAST(1 AS BIGINT) AS FLOAT))):boolean>
--- !query 196 output
+-- !query output
 true
 
 
--- !query 197
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as float)) FROM t
--- !query 197 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS FLOAT))):boolean>
--- !query 197 output
+-- !query output
 true
 
 
--- !query 198
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as double)) FROM t
--- !query 198 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) IN (CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 198 output
+-- !query output
 true
 
 
--- !query 199
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as decimal(10, 0))) FROM t
--- !query 199 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) IN (CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
--- !query 199 output
+-- !query output
 true
 
 
--- !query 200
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as string)) FROM t
--- !query 200 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS STRING) IN (CAST(CAST(1 AS FLOAT) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 200 output
+-- !query output
 true
 
 
--- !query 201
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast('1' as binary)) FROM t
--- !query 201 schema
+-- !query schema
 struct<>
--- !query 201 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: float != binary; line 1 pos 24
 
 
--- !query 202
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as boolean)) FROM t
--- !query 202 schema
+-- !query schema
 struct<>
--- !query 202 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: float != boolean; line 1 pos 24
 
 
--- !query 203
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 203 schema
+-- !query schema
 struct<>
--- !query 203 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: float != timestamp; line 1 pos 24
 
 
--- !query 204
+-- !query
 SELECT cast(1 as float) in (cast(1 as float), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 204 schema
+-- !query schema
 struct<>
--- !query 204 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: float != date; line 1 pos 24
 
 
--- !query 205
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as tinyint)) FROM t
--- !query 205 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS TINYINT) AS DOUBLE))):boolean>
--- !query 205 output
+-- !query output
 true
 
 
--- !query 206
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as smallint)) FROM t
--- !query 206 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS SMALLINT) AS DOUBLE))):boolean>
--- !query 206 output
+-- !query output
 true
 
 
--- !query 207
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as int)) FROM t
--- !query 207 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS INT) AS DOUBLE))):boolean>
--- !query 207 output
+-- !query output
 true
 
 
--- !query 208
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as bigint)) FROM t
--- !query 208 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS BIGINT) AS DOUBLE))):boolean>
--- !query 208 output
+-- !query output
 true
 
 
--- !query 209
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as float)) FROM t
--- !query 209 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
--- !query 209 output
+-- !query output
 true
 
 
--- !query 210
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as double)) FROM t
--- !query 210 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(1 AS DOUBLE))):boolean>
--- !query 210 output
+-- !query output
 true
 
 
--- !query 211
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as decimal(10, 0))) FROM t
--- !query 211 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS DOUBLE) IN (CAST(CAST(1 AS DOUBLE) AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
--- !query 211 output
+-- !query output
 true
 
 
--- !query 212
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as string)) FROM t
--- !query 212 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DOUBLE) AS STRING) IN (CAST(CAST(1 AS DOUBLE) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 212 output
+-- !query output
 true
 
 
--- !query 213
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast('1' as binary)) FROM t
--- !query 213 schema
+-- !query schema
 struct<>
--- !query 213 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: double != binary; line 1 pos 25
 
 
--- !query 214
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as boolean)) FROM t
--- !query 214 schema
+-- !query schema
 struct<>
--- !query 214 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: double != boolean; line 1 pos 25
 
 
--- !query 215
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 215 schema
+-- !query schema
 struct<>
--- !query 215 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: double != timestamp; line 1 pos 25
 
 
--- !query 216
+-- !query
 SELECT cast(1 as double) in (cast(1 as double), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 216 schema
+-- !query schema
 struct<>
--- !query 216 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: double != date; line 1 pos 25
 
 
--- !query 217
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as tinyint)) FROM t
--- !query 217 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS TINYINT) AS DECIMAL(10,0)))):boolean>
--- !query 217 output
+-- !query output
 true
 
 
--- !query 218
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as smallint)) FROM t
--- !query 218 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS SMALLINT) AS DECIMAL(10,0)))):boolean>
--- !query 218 output
+-- !query output
 true
 
 
--- !query 219
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as int)) FROM t
--- !query 219 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(10,0)), CAST(CAST(1 AS INT) AS DECIMAL(10,0)))):boolean>
--- !query 219 output
+-- !query output
 true
 
 
--- !query 220
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as bigint)) FROM t
--- !query 220 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(20,0)), CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)))):boolean>
--- !query 220 output
+-- !query output
 true
 
 
--- !query 221
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as float)) FROM t
--- !query 221 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE))):boolean>
--- !query 221 output
+-- !query output
 true
 
 
--- !query 222
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as double)) FROM t
--- !query 222 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(CAST(1 AS DOUBLE) AS DOUBLE))):boolean>
--- !query 222 output
+-- !query output
 true
 
 
--- !query 223
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as decimal(10, 0))) FROM t
--- !query 223 schema
+-- !query schema
 struct<(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(1 AS DECIMAL(10,0)))):boolean>
--- !query 223 output
+-- !query output
 true
 
 
--- !query 224
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as string)) FROM t
--- !query 224 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS STRING) IN (CAST(CAST(1 AS DECIMAL(10,0)) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 224 output
+-- !query output
 true
 
 
--- !query 225
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('1' as binary)) FROM t
--- !query 225 schema
+-- !query schema
 struct<>
--- !query 225 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != binary; line 1 pos 33
 
 
--- !query 226
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as boolean)) FROM t
--- !query 226 schema
+-- !query schema
 struct<>
--- !query 226 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != boolean; line 1 pos 33
 
 
--- !query 227
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 227 schema
+-- !query schema
 struct<>
--- !query 227 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != timestamp; line 1 pos 33
 
 
--- !query 228
+-- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 228 schema
+-- !query schema
 struct<>
--- !query 228 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != date; line 1 pos 33
 
 
--- !query 229
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as tinyint)) FROM t
--- !query 229 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS TINYINT) AS STRING))):boolean>
--- !query 229 output
+-- !query output
 true
 
 
--- !query 230
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as smallint)) FROM t
--- !query 230 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS SMALLINT) AS STRING))):boolean>
--- !query 230 output
+-- !query output
 true
 
 
--- !query 231
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as int)) FROM t
--- !query 231 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS INT) AS STRING))):boolean>
--- !query 231 output
+-- !query output
 true
 
 
--- !query 232
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as bigint)) FROM t
--- !query 232 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS BIGINT) AS STRING))):boolean>
--- !query 232 output
+-- !query output
 true
 
 
--- !query 233
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as float)) FROM t
--- !query 233 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS FLOAT) AS STRING))):boolean>
--- !query 233 output
+-- !query output
 true
 
 
--- !query 234
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as double)) FROM t
--- !query 234 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS DOUBLE) AS STRING))):boolean>
--- !query 234 output
+-- !query output
 true
 
 
--- !query 235
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as decimal(10, 0))) FROM t
--- !query 235 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(1 AS DECIMAL(10,0)) AS STRING))):boolean>
--- !query 235 output
+-- !query output
 true
 
 
--- !query 236
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as string)) FROM t
--- !query 236 schema
+-- !query schema
 struct<(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS STRING))):boolean>
--- !query 236 output
+-- !query output
 true
 
 
--- !query 237
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast('1' as binary)) FROM t
--- !query 237 schema
+-- !query schema
 struct<>
--- !query 237 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25
 
 
--- !query 238
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as boolean)) FROM t
--- !query 238 schema
+-- !query schema
 struct<>
--- !query 238 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25
 
 
--- !query 239
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 239 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING))):boolean>
--- !query 239 output
+-- !query output
 true
 
 
--- !query 240
+-- !query
 SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 240 schema
+-- !query schema
 struct<(CAST(CAST(1 AS STRING) AS STRING) IN (CAST(CAST(1 AS STRING) AS STRING), CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):boolean>
--- !query 240 output
+-- !query output
 true
 
 
--- !query 241
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as tinyint)) FROM t
--- !query 241 schema
+-- !query schema
 struct<>
--- !query 241 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: binary != tinyint; line 1 pos 27
 
 
--- !query 242
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as smallint)) FROM t
--- !query 242 schema
+-- !query schema
 struct<>
--- !query 242 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: binary != smallint; line 1 pos 27
 
 
--- !query 243
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as int)) FROM t
--- !query 243 schema
+-- !query schema
 struct<>
--- !query 243 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: binary != int; line 1 pos 27
 
 
--- !query 244
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as bigint)) FROM t
--- !query 244 schema
+-- !query schema
 struct<>
--- !query 244 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: binary != bigint; line 1 pos 27
 
 
--- !query 245
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as float)) FROM t
--- !query 245 schema
+-- !query schema
 struct<>
--- !query 245 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: binary != float; line 1 pos 27
 
 
--- !query 246
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as double)) FROM t
--- !query 246 schema
+-- !query schema
 struct<>
--- !query 246 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: binary != double; line 1 pos 27
 
 
--- !query 247
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as decimal(10, 0))) FROM t
--- !query 247 schema
+-- !query schema
 struct<>
--- !query 247 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: binary != decimal(10,0); line 1 pos 27
 
 
--- !query 248
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as string)) FROM t
--- !query 248 schema
+-- !query schema
 struct<>
--- !query 248 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27
 
 
--- !query 249
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast('1' as binary)) FROM t
--- !query 249 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS BINARY))):boolean>
--- !query 249 output
+-- !query output
 true
 
 
--- !query 250
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as boolean)) FROM t
--- !query 250 schema
+-- !query schema
 struct<>
--- !query 250 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: binary != boolean; line 1 pos 27
 
 
--- !query 251
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 251 schema
+-- !query schema
 struct<>
--- !query 251 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: binary != timestamp; line 1 pos 27
 
 
--- !query 252
+-- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 252 schema
+-- !query schema
 struct<>
--- !query 252 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: binary != date; line 1 pos 27
 
 
--- !query 253
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as tinyint)) FROM t
--- !query 253 schema
+-- !query schema
 struct<>
--- !query 253 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: boolean != tinyint; line 1 pos 28
 
 
--- !query 254
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as smallint)) FROM t
--- !query 254 schema
+-- !query schema
 struct<>
--- !query 254 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: boolean != smallint; line 1 pos 28
 
 
--- !query 255
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as int)) FROM t
--- !query 255 schema
+-- !query schema
 struct<>
--- !query 255 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: boolean != int; line 1 pos 28
 
 
--- !query 256
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as bigint)) FROM t
--- !query 256 schema
+-- !query schema
 struct<>
--- !query 256 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: boolean != bigint; line 1 pos 28
 
 
--- !query 257
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as float)) FROM t
--- !query 257 schema
+-- !query schema
 struct<>
--- !query 257 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: boolean != float; line 1 pos 28
 
 
--- !query 258
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as double)) FROM t
--- !query 258 schema
+-- !query schema
 struct<>
--- !query 258 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: boolean != double; line 1 pos 28
 
 
--- !query 259
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as decimal(10, 0))) FROM t
--- !query 259 schema
+-- !query schema
 struct<>
--- !query 259 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: boolean != decimal(10,0); line 1 pos 28
 
 
--- !query 260
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as string)) FROM t
--- !query 260 schema
+-- !query schema
 struct<>
--- !query 260 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 28
 
 
--- !query 261
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast('1' as binary)) FROM t
--- !query 261 schema
+-- !query schema
 struct<>
--- !query 261 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: boolean != binary; line 1 pos 28
 
 
--- !query 262
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as boolean)) FROM t
--- !query 262 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS BOOLEAN))):boolean>
--- !query 262 output
+-- !query output
 true
 
 
--- !query 263
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 263 schema
+-- !query schema
 struct<>
--- !query 263 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: boolean != timestamp; line 1 pos 28
 
 
--- !query 264
+-- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 264 schema
+-- !query schema
 struct<>
--- !query 264 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: boolean != date; line 1 pos 28
 
 
--- !query 265
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as tinyint)) FROM t
--- !query 265 schema
+-- !query schema
 struct<>
--- !query 265 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != tinyint; line 1 pos 50
 
 
--- !query 266
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as smallint)) FROM t
--- !query 266 schema
+-- !query schema
 struct<>
--- !query 266 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != smallint; line 1 pos 50
 
 
--- !query 267
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as int)) FROM t
--- !query 267 schema
+-- !query schema
 struct<>
--- !query 267 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: timestamp != int; line 1 pos 50
 
 
--- !query 268
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as bigint)) FROM t
--- !query 268 schema
+-- !query schema
 struct<>
--- !query 268 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != bigint; line 1 pos 50
 
 
--- !query 269
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as float)) FROM t
--- !query 269 schema
+-- !query schema
 struct<>
--- !query 269 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: timestamp != float; line 1 pos 50
 
 
--- !query 270
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as double)) FROM t
--- !query 270 schema
+-- !query schema
 struct<>
--- !query 270 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: timestamp != double; line 1 pos 50
 
 
--- !query 271
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as decimal(10, 0))) FROM t
--- !query 271 schema
+-- !query schema
 struct<>
--- !query 271 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: timestamp != decimal(10,0); line 1 pos 50
 
 
--- !query 272
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as string)) FROM t
--- !query 272 schema
+-- !query schema
 struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING) IN (CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 272 output
+-- !query output
 true
 
 
--- !query 273
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast('1' as binary)) FROM t
--- !query 273 schema
+-- !query schema
 struct<>
--- !query 273 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: timestamp != binary; line 1 pos 50
 
 
--- !query 274
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as boolean)) FROM t
--- !query 274 schema
+-- !query schema
 struct<>
--- !query 274 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: timestamp != boolean; line 1 pos 50
 
 
--- !query 275
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 275 schema
+-- !query schema
 struct<(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):boolean>
--- !query 275 output
+-- !query output
 true
 
 
--- !query 276
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 276 schema
+-- !query schema
 struct<(CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP) IN (CAST(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP), CAST(CAST(2017-12-11 09:30:00 AS DATE) AS TIMESTAMP))):boolean>
--- !query 276 output
+-- !query output
 true
 
 
--- !query 277
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as tinyint)) FROM t
--- !query 277 schema
+-- !query schema
 struct<>
--- !query 277 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: date != tinyint; line 1 pos 43
 
 
--- !query 278
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as smallint)) FROM t
--- !query 278 schema
+-- !query schema
 struct<>
--- !query 278 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: date != smallint; line 1 pos 43
 
 
--- !query 279
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as int)) FROM t
--- !query 279 schema
+-- !query schema
 struct<>
--- !query 279 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: date != int; line 1 pos 43
 
 
--- !query 280
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as bigint)) FROM t
--- !query 280 schema
+-- !query schema
 struct<>
--- !query 280 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: date != bigint; line 1 pos 43
 
 
--- !query 281
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as float)) FROM t
--- !query 281 schema
+-- !query schema
 struct<>
--- !query 281 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: date != float; line 1 pos 43
 
 
--- !query 282
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as double)) FROM t
--- !query 282 schema
+-- !query schema
 struct<>
--- !query 282 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: date != double; line 1 pos 43
 
 
--- !query 283
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as decimal(10, 0))) FROM t
--- !query 283 schema
+-- !query schema
 struct<>
--- !query 283 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: date != decimal(10,0); line 1 pos 43
 
 
--- !query 284
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as string)) FROM t
--- !query 284 schema
+-- !query schema
 struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING) IN (CAST(CAST(2017-12-12 09:30:00 AS DATE) AS STRING), CAST(CAST(1 AS STRING) AS STRING))):boolean>
--- !query 284 output
+-- !query output
 true
 
 
--- !query 285
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast('1' as binary)) FROM t
--- !query 285 schema
+-- !query schema
 struct<>
--- !query 285 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: date != binary; line 1 pos 43
 
 
--- !query 286
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as boolean)) FROM t
--- !query 286 schema
+-- !query schema
 struct<>
--- !query 286 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: date != boolean; line 1 pos 43
 
 
--- !query 287
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 287 schema
+-- !query schema
 struct<(CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP) IN (CAST(CAST(2017-12-12 09:30:00 AS DATE) AS TIMESTAMP), CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS TIMESTAMP))):boolean>
--- !query 287 output
+-- !query output
 true
 
 
--- !query 288
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 288 schema
+-- !query schema
 struct<(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(2017-12-11 09:30:00 AS DATE))):boolean>
--- !query 288 output
+-- !query output
 true
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
index 86a578ca013df..ed7ab5a342c12 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 16
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW various_maps AS SELECT * FROM VALUES (
   map(true, false),
   map(2Y, 1Y),
@@ -36,144 +36,144 @@ CREATE TEMPORARY VIEW various_maps AS SELECT * FROM VALUES (
   array_map1, array_map2,
   struct_map1, struct_map2
 )
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT map_zip_with(tinyint_map, smallint_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 1 schema
+-- !query schema
 struct<m:map<smallint,struct<k:smallint,v1:tinyint,v2:smallint>>>
--- !query 1 output
+-- !query output
 {2:{"k":2,"v1":1,"v2":1}}
 
 
--- !query 2
+-- !query
 SELECT map_zip_with(smallint_map, int_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 2 schema
+-- !query schema
 struct<m:map<int,struct<k:int,v1:smallint,v2:int>>>
--- !query 2 output
+-- !query output
 {2:{"k":2,"v1":1,"v2":1}}
 
 
--- !query 3
+-- !query
 SELECT map_zip_with(int_map, bigint_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 3 schema
+-- !query schema
 struct<m:map<bigint,struct<k:bigint,v1:int,v2:bigint>>>
--- !query 3 output
+-- !query output
 {2:{"k":2,"v1":1,"v2":1}}
 
 
--- !query 4
+-- !query
 SELECT map_zip_with(double_map, float_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 4 schema
+-- !query schema
 struct<m:map<double,struct<k:double,v1:double,v2:float>>>
--- !query 4 output
+-- !query output
 {2.0:{"k":2.0,"v1":1.0,"v2":1.0}}
 
 
--- !query 5
+-- !query
 SELECT map_zip_with(decimal_map1, decimal_map2, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'map_zip_with(various_maps.`decimal_map1`, various_maps.`decimal_map2`, lambdafunction(named_struct(NamePlaceholder(), k, NamePlaceholder(), v1, NamePlaceholder(), v2), k, v1, v2))' due to argument data type mismatch: The input to function map_zip_with should have been two maps with compatible key types, but the key types are [decimal(36,0), decimal(36,35)].; line 1 pos 7
 
 
--- !query 6
+-- !query
 SELECT map_zip_with(decimal_map1, int_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 6 schema
+-- !query schema
 struct<m:map<decimal(36,0),struct<k:decimal(36,0),v1:decimal(36,0),v2:int>>>
--- !query 6 output
+-- !query output
 {2:{"k":2,"v1":null,"v2":1},922337203685477897945456575809789456:{"k":922337203685477897945456575809789456,"v1":922337203685477897945456575809789456,"v2":null}}
 
 
--- !query 7
+-- !query
 SELECT map_zip_with(decimal_map1, double_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 7 schema
+-- !query schema
 struct<m:map<double,struct<k:double,v1:decimal(36,0),v2:double>>>
--- !query 7 output
+-- !query output
 {2.0:{"k":2.0,"v1":null,"v2":1.0},9.223372036854779E35:{"k":9.223372036854779E35,"v1":922337203685477897945456575809789456,"v2":null}}
 
 
--- !query 8
+-- !query
 SELECT map_zip_with(decimal_map2, int_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'map_zip_with(various_maps.`decimal_map2`, various_maps.`int_map`, lambdafunction(named_struct(NamePlaceholder(), k, NamePlaceholder(), v1, NamePlaceholder(), v2), k, v1, v2))' due to argument data type mismatch: The input to function map_zip_with should have been two maps with compatible key types, but the key types are [decimal(36,35), int].; line 1 pos 7
 
 
--- !query 9
+-- !query
 SELECT map_zip_with(decimal_map2, double_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 9 schema
+-- !query schema
 struct<m:map<double,struct<k:double,v1:decimal(36,35),v2:double>>>
--- !query 9 output
+-- !query output
 {2.0:{"k":2.0,"v1":null,"v2":1.0},9.223372036854778:{"k":9.223372036854778,"v1":9.22337203685477897945456575809789456,"v2":null}}
 
 
--- !query 10
+-- !query
 SELECT map_zip_with(string_map1, int_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 10 schema
+-- !query schema
 struct<m:map<string,struct<k:string,v1:string,v2:int>>>
--- !query 10 output
+-- !query output
 {"2":{"k":"2","v1":"1","v2":1},"true":{"k":"true","v1":"false","v2":null}}
 
 
--- !query 11
+-- !query
 SELECT map_zip_with(string_map2, date_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 11 schema
+-- !query schema
 struct<m:map<string,struct<k:string,v1:string,v2:date>>>
--- !query 11 output
+-- !query output
 {"2016-03-14":{"k":"2016-03-14","v1":"2016-03-13","v2":2016-03-13}}
 
 
--- !query 12
+-- !query
 SELECT map_zip_with(timestamp_map, string_map3, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 12 schema
+-- !query schema
 struct<m:map<string,struct<k:string,v1:timestamp,v2:string>>>
--- !query 12 output
-{"2016-11-15 20:54:00":{"k":"2016-11-15 20:54:00","v1":2016-11-12 20:54:00.0,"v2":null},"2016-11-15 20:54:00.000":{"k":"2016-11-15 20:54:00.000","v1":null,"v2":"2016-11-12 20:54:00.000"}}
+-- !query output
+{"2016-11-15 20:54:00":{"k":"2016-11-15 20:54:00","v1":2016-11-12 20:54:00,"v2":null},"2016-11-15 20:54:00.000":{"k":"2016-11-15 20:54:00.000","v1":null,"v2":"2016-11-12 20:54:00.000"}}
 
 
--- !query 13
+-- !query
 SELECT map_zip_with(decimal_map1, string_map4, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 13 schema
+-- !query schema
 struct<m:map<string,struct<k:string,v1:decimal(36,0),v2:string>>>
--- !query 13 output
+-- !query output
 {"922337203685477897945456575809789456":{"k":"922337203685477897945456575809789456","v1":922337203685477897945456575809789456,"v2":"text"}}
 
 
--- !query 14
+-- !query
 SELECT map_zip_with(array_map1, array_map2, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 14 schema
+-- !query schema
 struct<m:map<array<bigint>,struct<k:array<bigint>,v1:array<bigint>,v2:array<int>>>>
--- !query 14 output
+-- !query output
 {[1,2]:{"k":[1,2],"v1":[1,2],"v2":[1,2]}}
 
 
--- !query 15
+-- !query
 SELECT map_zip_with(struct_map1, struct_map2, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
--- !query 15 schema
+-- !query schema
 struct<m:map<struct<col1:int,col2:bigint>,struct<k:struct<col1:int,col2:bigint>,v1:struct<col1:smallint,col2:bigint>,v2:struct<col1:int,col2:int>>>>
--- !query 15 output
+-- !query output
 {{"col1":1,"col2":2}:{"k":{"col1":1,"col2":2},"v1":{"col1":1,"col2":2},"v2":{"col1":1,"col2":2}}}
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out
index 79e00860e4c05..fcf1afc72efe9 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW various_maps AS SELECT * FROM VALUES (
   map(true, false), map(false, true),
   map(1Y, 2Y), map(3Y, 4Y),
@@ -38,13 +38,13 @@ CREATE TEMPORARY VIEW various_maps AS SELECT * FROM VALUES (
   string_int_map1, string_int_map2,
   int_string_map1, int_string_map2
 )
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT
     map_concat(boolean_map1, boolean_map2) boolean_map,
     map_concat(tinyint_map1, tinyint_map2) tinyint_map,
@@ -62,13 +62,13 @@ SELECT
     map_concat(string_int_map1, string_int_map2) string_int_map,
     map_concat(int_string_map1, int_string_map2) int_string_map
 FROM various_maps
--- !query 1 schema
+-- !query schema
 struct<boolean_map:map<boolean,boolean>,tinyint_map:map<tinyint,tinyint>,smallint_map:map<smallint,smallint>,int_map:map<int,int>,bigint_map:map<bigint,bigint>,decimal_map:map<decimal(19,0),decimal(19,0)>,float_map:map<float,float>,double_map:map<double,double>,date_map:map<date,date>,timestamp_map:map<timestamp,timestamp>,string_map:map<string,string>,array_map:map<array<string>,array<string>>,struct_map:map<struct<col1:string,col2:int>,struct<col1:string,col2:int>>,string_int_map:map<string,int>,int_string_map:map<int,string>>
--- !query 1 output
-{false:true,true:false}	{1:2,3:4}	{1:2,3:4}	{4:6,7:8}	{6:7,8:9}	{9223372036854775808:9223372036854775809,9223372036854775809:9223372036854775808}	{1.0:2.0,3.0:4.0}	{1.0:2.0,3.0:4.0}	{2016-03-12:2016-03-11,2016-03-14:2016-03-13}	{2016-11-11 20:54:00.0:2016-11-09 20:54:00.0,2016-11-15 20:54:00.0:2016-11-12 20:54:00.0}	{"a":"b","c":"d"}	{["a","b"]:["c","d"],["e"]:["f"]}	{{"col1":"a","col2":1}:{"col1":"b","col2":2},{"col1":"c","col2":3}:{"col1":"d","col2":4}}	{"a":1,"c":2}	{1:"a",2:"c"}
+-- !query output
+{false:true,true:false}	{1:2,3:4}	{1:2,3:4}	{4:6,7:8}	{6:7,8:9}	{9223372036854775808:9223372036854775809,9223372036854775809:9223372036854775808}	{1.0:2.0,3.0:4.0}	{1.0:2.0,3.0:4.0}	{2016-03-12:2016-03-11,2016-03-14:2016-03-13}	{2016-11-11 20:54:00:2016-11-09 20:54:00,2016-11-15 20:54:00:2016-11-12 20:54:00}	{"a":"b","c":"d"}	{["a","b"]:["c","d"],["e"]:["f"]}	{{"col1":"a","col2":1}:{"col1":"b","col2":2},{"col1":"c","col2":3}:{"col1":"d","col2":4}}	{"a":1,"c":2}	{1:"a",2:"c"}
 
 
--- !query 2
+-- !query
 SELECT
     map_concat(tinyint_map1, smallint_map2) ts_map,
     map_concat(smallint_map1, int_map2) si_map,
@@ -80,62 +80,62 @@ SELECT
     map_concat(string_map1, int_map2) sti_map,
     map_concat(int_string_map1, tinyint_map2) istt_map
 FROM various_maps
--- !query 2 schema
+-- !query schema
 struct<ts_map:map<smallint,smallint>,si_map:map<int,int>,ib_map:map<bigint,bigint>,bd_map:map<decimal(20,0),decimal(20,0)>,df_map:map<double,double>,std_map:map<string,string>,tst_map:map<string,string>,sti_map:map<string,string>,istt_map:map<int,string>>
--- !query 2 output
+-- !query output
 {1:2,3:4}	{1:2,7:8}	{4:6,8:9}	{6:7,9223372036854775808:9223372036854775809}	{3.0:4.0,9.223372036854776E18:9.223372036854776E18}	{"2016-03-12":"2016-03-11","a":"b"}	{"2016-11-15 20:54:00":"2016-11-12 20:54:00","c":"d"}	{"7":"8","a":"b"}	{1:"a",3:"4"}
 
 
--- !query 3
+-- !query
 SELECT
     map_concat(tinyint_map1, array_map1) tm_map
 FROM various_maps
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'map_concat(various_maps.`tinyint_map1`, various_maps.`array_map1`)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<tinyint,tinyint>, map<array<string>,array<string>>]; line 2 pos 4
 
 
--- !query 4
+-- !query
 SELECT
     map_concat(boolean_map1, int_map2) bi_map
 FROM various_maps
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'map_concat(various_maps.`boolean_map1`, various_maps.`int_map2`)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<boolean,boolean>, map<int,int>]; line 2 pos 4
 
 
--- !query 5
+-- !query
 SELECT
     map_concat(int_map1, struct_map2) is_map
 FROM various_maps
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'map_concat(various_maps.`int_map1`, various_maps.`struct_map2`)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<int,int>, map<struct<col1:string,col2:int>,struct<col1:string,col2:int>>]; line 2 pos 4
 
 
--- !query 6
+-- !query
 SELECT
     map_concat(struct_map1, array_map2) ma_map
 FROM various_maps
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'map_concat(various_maps.`struct_map1`, various_maps.`array_map2`)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<struct<col1:string,col2:int>,struct<col1:string,col2:int>>, map<array<string>,array<string>>]; line 2 pos 4
 
 
--- !query 7
+-- !query
 SELECT
     map_concat(int_map1, array_map2) ms_map
 FROM various_maps
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'map_concat(various_maps.`int_map1`, various_maps.`array_map2`)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<int,int>, map<array<string>,array<string>>]; line 2 pos 4
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
index c54ceba85ce79..31353bdedc69f 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
@@ -2,2577 +2,2576 @@
 -- Number of queries: 316
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT '1' + cast(1 as tinyint)                         FROM t
--- !query 1 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 1 output
+-- !query output
 2.0
 
 
--- !query 2
+-- !query
 SELECT '1' + cast(1 as smallint)                        FROM t
--- !query 2 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 2 output
+-- !query output
 2.0
 
 
--- !query 3
+-- !query
 SELECT '1' + cast(1 as int)                             FROM t
--- !query 3 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 3 output
+-- !query output
 2.0
 
 
--- !query 4
+-- !query
 SELECT '1' + cast(1 as bigint)                          FROM t
--- !query 4 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 4 output
+-- !query output
 2.0
 
 
--- !query 5
+-- !query
 SELECT '1' + cast(1 as float)                           FROM t
--- !query 5 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 5 output
+-- !query output
 2.0
 
 
--- !query 6
+-- !query
 SELECT '1' + cast(1 as double)                          FROM t
--- !query 6 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 6 output
+-- !query output
 2.0
 
 
--- !query 7
+-- !query
 SELECT '1' + cast(1 as decimal(10, 0))                  FROM t
--- !query 7 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 7 output
+-- !query output
 2.0
 
 
--- !query 8
+-- !query
 SELECT '1' + '1'                                        FROM t
--- !query 8 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 8 output
+-- !query output
 2.0
 
 
--- !query 9
+-- !query
 SELECT '1' + cast('1' as binary)                        FROM t
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
 
 
--- !query 10
+-- !query
 SELECT '1' + cast(1 as boolean)                         FROM t
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
 
 
--- !query 11
+-- !query
 SELECT '1' + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
 
 
--- !query 12
+-- !query
 SELECT '1' + cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), '1')' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, ''1'' is of string type.; line 1 pos 7
 
 
--- !query 13
+-- !query
 SELECT '1' - cast(1 as tinyint)                         FROM t
--- !query 13 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 13 output
+-- !query output
 0.0
 
 
--- !query 14
+-- !query
 SELECT '1' - cast(1 as smallint)                        FROM t
--- !query 14 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 14 output
+-- !query output
 0.0
 
 
--- !query 15
+-- !query
 SELECT '1' - cast(1 as int)                             FROM t
--- !query 15 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 15 output
+-- !query output
 0.0
 
 
--- !query 16
+-- !query
 SELECT '1' - cast(1 as bigint)                          FROM t
--- !query 16 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 16 output
+-- !query output
 0.0
 
 
--- !query 17
+-- !query
 SELECT '1' - cast(1 as float)                           FROM t
--- !query 17 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 17 output
+-- !query output
 0.0
 
 
--- !query 18
+-- !query
 SELECT '1' - cast(1 as double)                          FROM t
--- !query 18 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 18 output
+-- !query output
 0.0
 
 
--- !query 19
+-- !query
 SELECT '1' - cast(1 as decimal(10, 0))                  FROM t
--- !query 19 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 19 output
+-- !query output
 0.0
 
 
--- !query 20
+-- !query
 SELECT '1' - '1'                                        FROM t
--- !query 20 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 20 output
+-- !query output
 0.0
 
 
--- !query 21
+-- !query
 SELECT '1' - cast('1' as binary)                        FROM t
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
 
 
--- !query 22
+-- !query
 SELECT '1' - cast(1 as boolean)                         FROM t
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
 
 
--- !query 23
+-- !query
 SELECT '1' - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+cannot resolve 'subtracttimestamps('1', CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires timestamp type, however, ''1'' is of string type.; line 1 pos 7
 
 
--- !query 24
+-- !query
 SELECT '1' - cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 24 schema
-struct<>
--- !query 24 output
-org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+-- !query schema
+struct<subtractdates(CAST(1 AS DATE), CAST(2017-12-11 09:30:00 AS DATE)):interval>
+-- !query output
+NULL
 
 
--- !query 25
+-- !query
 SELECT '1' * cast(1 as tinyint)                         FROM t
--- !query 25 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 25 output
+-- !query output
 1.0
 
 
--- !query 26
+-- !query
 SELECT '1' * cast(1 as smallint)                        FROM t
--- !query 26 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 26 output
+-- !query output
 1.0
 
 
--- !query 27
+-- !query
 SELECT '1' * cast(1 as int)                             FROM t
--- !query 27 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 27 output
+-- !query output
 1.0
 
 
--- !query 28
+-- !query
 SELECT '1' * cast(1 as bigint)                          FROM t
--- !query 28 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 28 output
+-- !query output
 1.0
 
 
--- !query 29
+-- !query
 SELECT '1' * cast(1 as float)                           FROM t
--- !query 29 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 29 output
+-- !query output
 1.0
 
 
--- !query 30
+-- !query
 SELECT '1' * cast(1 as double)                          FROM t
--- !query 30 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 30 output
+-- !query output
 1.0
 
 
--- !query 31
+-- !query
 SELECT '1' * cast(1 as decimal(10, 0))                  FROM t
--- !query 31 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 31 output
+-- !query output
 1.0
 
 
--- !query 32
+-- !query
 SELECT '1' * '1'                                        FROM t
--- !query 32 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 32 output
+-- !query output
 1.0
 
 
--- !query 33
+-- !query
 SELECT '1' * cast('1' as binary)                        FROM t
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
 
 
--- !query 34
+-- !query
 SELECT '1' * cast(1 as boolean)                         FROM t
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
 
 
--- !query 35
+-- !query
 SELECT '1' * cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 35 schema
+-- !query schema
 struct<>
--- !query 35 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
 
 
--- !query 36
+-- !query
 SELECT '1' * cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
 
 
--- !query 37
+-- !query
 SELECT '1' / cast(1 as tinyint)                         FROM t
--- !query 37 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 37 output
+-- !query output
 1.0
 
 
--- !query 38
+-- !query
 SELECT '1' / cast(1 as smallint)                        FROM t
--- !query 38 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 38 output
+-- !query output
 1.0
 
 
--- !query 39
+-- !query
 SELECT '1' / cast(1 as int)                             FROM t
--- !query 39 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 39 output
+-- !query output
 1.0
 
 
--- !query 40
+-- !query
 SELECT '1' / cast(1 as bigint)                          FROM t
--- !query 40 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 40 output
+-- !query output
 1.0
 
 
--- !query 41
+-- !query
 SELECT '1' / cast(1 as float)                           FROM t
--- !query 41 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 41 output
+-- !query output
 1.0
 
 
--- !query 42
+-- !query
 SELECT '1' / cast(1 as double)                          FROM t
--- !query 42 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 42 output
+-- !query output
 1.0
 
 
--- !query 43
+-- !query
 SELECT '1' / cast(1 as decimal(10, 0))                  FROM t
--- !query 43 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 43 output
+-- !query output
 1.0
 
 
--- !query 44
+-- !query
 SELECT '1' / '1'                                        FROM t
--- !query 44 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 44 output
+-- !query output
 1.0
 
 
--- !query 45
+-- !query
 SELECT '1' / cast('1' as binary)                        FROM t
--- !query 45 schema
+-- !query schema
 struct<>
--- !query 45 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
 
 
--- !query 46
+-- !query
 SELECT '1' / cast(1 as boolean)                         FROM t
--- !query 46 schema
+-- !query schema
 struct<>
--- !query 46 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
 
 
--- !query 47
+-- !query
 SELECT '1' / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 47 schema
+-- !query schema
 struct<>
--- !query 47 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
 
 
--- !query 48
+-- !query
 SELECT '1' / cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
 
 
--- !query 49
+-- !query
 SELECT '1' % cast(1 as tinyint)                         FROM t
--- !query 49 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 49 output
+-- !query output
 0.0
 
 
--- !query 50
+-- !query
 SELECT '1' % cast(1 as smallint)                        FROM t
--- !query 50 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 50 output
+-- !query output
 0.0
 
 
--- !query 51
+-- !query
 SELECT '1' % cast(1 as int)                             FROM t
--- !query 51 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 51 output
+-- !query output
 0.0
 
 
--- !query 52
+-- !query
 SELECT '1' % cast(1 as bigint)                          FROM t
--- !query 52 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 52 output
+-- !query output
 0.0
 
 
--- !query 53
+-- !query
 SELECT '1' % cast(1 as float)                           FROM t
--- !query 53 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 53 output
+-- !query output
 0.0
 
 
--- !query 54
+-- !query
 SELECT '1' % cast(1 as double)                          FROM t
--- !query 54 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 54 output
+-- !query output
 0.0
 
 
--- !query 55
+-- !query
 SELECT '1' % cast(1 as decimal(10, 0))                  FROM t
--- !query 55 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 55 output
+-- !query output
 0.0
 
 
--- !query 56
+-- !query
 SELECT '1' % '1'                                        FROM t
--- !query 56 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 56 output
+-- !query output
 0.0
 
 
--- !query 57
+-- !query
 SELECT '1' % cast('1' as binary)                        FROM t
--- !query 57 schema
+-- !query schema
 struct<>
--- !query 57 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
 
 
--- !query 58
+-- !query
 SELECT '1' % cast(1 as boolean)                         FROM t
--- !query 58 schema
+-- !query schema
 struct<>
--- !query 58 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
 
 
--- !query 59
+-- !query
 SELECT '1' % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 59 schema
+-- !query schema
 struct<>
--- !query 59 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
 
 
--- !query 60
+-- !query
 SELECT '1' % cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 60 schema
+-- !query schema
 struct<>
--- !query 60 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
 
 
--- !query 61
+-- !query
 SELECT pmod('1', cast(1 as tinyint))                         FROM t
--- !query 61 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS TINYINT) AS DOUBLE)):double>
--- !query 61 output
+-- !query output
 0.0
 
 
--- !query 62
+-- !query
 SELECT pmod('1', cast(1 as smallint))                        FROM t
--- !query 62 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS SMALLINT) AS DOUBLE)):double>
--- !query 62 output
+-- !query output
 0.0
 
 
--- !query 63
+-- !query
 SELECT pmod('1', cast(1 as int))                             FROM t
--- !query 63 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS INT) AS DOUBLE)):double>
--- !query 63 output
+-- !query output
 0.0
 
 
--- !query 64
+-- !query
 SELECT pmod('1', cast(1 as bigint))                          FROM t
--- !query 64 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS BIGINT) AS DOUBLE)):double>
--- !query 64 output
+-- !query output
 0.0
 
 
--- !query 65
+-- !query
 SELECT pmod('1', cast(1 as float))                           FROM t
--- !query 65 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS FLOAT) AS DOUBLE)):double>
--- !query 65 output
+-- !query output
 0.0
 
 
--- !query 66
+-- !query
 SELECT pmod('1', cast(1 as double))                          FROM t
--- !query 66 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 66 output
+-- !query output
 0.0
 
 
--- !query 67
+-- !query
 SELECT pmod('1', cast(1 as decimal(10, 0)))                  FROM t
--- !query 67 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):double>
--- !query 67 output
+-- !query output
 0.0
 
 
--- !query 68
+-- !query
 SELECT pmod('1', '1')                                        FROM t
--- !query 68 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 68 output
+-- !query output
 0.0
 
 
--- !query 69
+-- !query
 SELECT pmod('1', cast('1' as binary))                        FROM t
--- !query 69 schema
+-- !query schema
 struct<>
--- !query 69 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
 
 
--- !query 70
+-- !query
 SELECT pmod('1', cast(1 as boolean))                         FROM t
--- !query 70 schema
+-- !query schema
 struct<>
--- !query 70 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
 
 
--- !query 71
+-- !query
 SELECT pmod('1', cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 71 schema
+-- !query schema
 struct<>
--- !query 71 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
 
 
--- !query 72
+-- !query
 SELECT pmod('1', cast('2017-12-11 09:30:00' as date))        FROM t
--- !query 72 schema
+-- !query schema
 struct<>
--- !query 72 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
 
 
--- !query 73
+-- !query
 SELECT cast(1 as tinyint)                         + '1' FROM t
--- !query 73 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 73 output
+-- !query output
 2.0
 
 
--- !query 74
+-- !query
 SELECT cast(1 as smallint)                        + '1' FROM t
--- !query 74 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 74 output
+-- !query output
 2.0
 
 
--- !query 75
+-- !query
 SELECT cast(1 as int)                             + '1' FROM t
--- !query 75 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 75 output
+-- !query output
 2.0
 
 
--- !query 76
+-- !query
 SELECT cast(1 as bigint)                          + '1' FROM t
--- !query 76 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 76 output
+-- !query output
 2.0
 
 
--- !query 77
+-- !query
 SELECT cast(1 as float)                           + '1' FROM t
--- !query 77 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 77 output
+-- !query output
 2.0
 
 
--- !query 78
+-- !query
 SELECT cast(1 as double)                          + '1' FROM t
--- !query 78 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 78 output
+-- !query output
 2.0
 
 
--- !query 79
+-- !query
 SELECT cast(1 as decimal(10, 0))                  + '1' FROM t
--- !query 79 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) + CAST(1 AS DOUBLE)):double>
--- !query 79 output
+-- !query output
 2.0
 
 
--- !query 80
+-- !query
 SELECT cast('1' as binary)                        + '1' FROM t
--- !query 80 schema
+-- !query schema
 struct<>
--- !query 80 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
 
 
--- !query 81
+-- !query
 SELECT cast(1 as boolean)                         + '1' FROM t
--- !query 81 schema
+-- !query schema
 struct<>
--- !query 81 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) + CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
 
 
--- !query 82
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) + '1' FROM t
--- !query 82 schema
+-- !query schema
 struct<>
--- !query 82 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
 
 
--- !query 83
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        + '1' FROM t
--- !query 83 schema
+-- !query schema
 struct<>
--- !query 83 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) + CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), '1')' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, ''1'' is of string type.; line 1 pos 7
 
 
--- !query 84
+-- !query
 SELECT cast(1 as tinyint)                         - '1' FROM t
--- !query 84 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 84 output
+-- !query output
 0.0
 
 
--- !query 85
+-- !query
 SELECT cast(1 as smallint)                        - '1' FROM t
--- !query 85 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 85 output
+-- !query output
 0.0
 
 
--- !query 86
+-- !query
 SELECT cast(1 as int)                             - '1' FROM t
--- !query 86 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 86 output
+-- !query output
 0.0
 
 
--- !query 87
+-- !query
 SELECT cast(1 as bigint)                          - '1' FROM t
--- !query 87 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 87 output
+-- !query output
 0.0
 
 
--- !query 88
+-- !query
 SELECT cast(1 as float)                           - '1' FROM t
--- !query 88 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 88 output
+-- !query output
 0.0
 
 
--- !query 89
+-- !query
 SELECT cast(1 as double)                          - '1' FROM t
--- !query 89 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 89 output
+-- !query output
 0.0
 
 
--- !query 90
+-- !query
 SELECT cast(1 as decimal(10, 0))                  - '1' FROM t
--- !query 90 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) - CAST(1 AS DOUBLE)):double>
--- !query 90 output
+-- !query output
 0.0
 
 
--- !query 91
+-- !query
 SELECT cast('1' as binary)                        - '1' FROM t
--- !query 91 schema
+-- !query schema
 struct<>
--- !query 91 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
 
 
--- !query 92
+-- !query
 SELECT cast(1 as boolean)                         - '1' FROM t
--- !query 92 schema
+-- !query schema
 struct<>
--- !query 92 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) - CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
 
 
--- !query 93
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) - '1' FROM t
--- !query 93 schema
+-- !query schema
 struct<>
--- !query 93 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+cannot resolve 'subtracttimestamps(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), '1')' due to data type mismatch: argument 2 requires timestamp type, however, ''1'' is of string type.; line 1 pos 7
 
 
--- !query 94
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        - '1' FROM t
--- !query 94 schema
+-- !query schema
 struct<>
--- !query 94 output
+-- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) - CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), '1')' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, ''1'' is of string type.; line 1 pos 7
 
 
--- !query 95
+-- !query
 SELECT cast(1 as tinyint)                         * '1' FROM t
--- !query 95 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 95 output
+-- !query output
 1.0
 
 
--- !query 96
+-- !query
 SELECT cast(1 as smallint)                        * '1' FROM t
--- !query 96 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 96 output
+-- !query output
 1.0
 
 
--- !query 97
+-- !query
 SELECT cast(1 as int)                             * '1' FROM t
--- !query 97 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 97 output
+-- !query output
 1.0
 
 
--- !query 98
+-- !query
 SELECT cast(1 as bigint)                          * '1' FROM t
--- !query 98 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 98 output
+-- !query output
 1.0
 
 
--- !query 99
+-- !query
 SELECT cast(1 as float)                           * '1' FROM t
--- !query 99 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 99 output
+-- !query output
 1.0
 
 
--- !query 100
+-- !query
 SELECT cast(1 as double)                          * '1' FROM t
--- !query 100 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 100 output
+-- !query output
 1.0
 
 
--- !query 101
+-- !query
 SELECT cast(1 as decimal(10, 0))                  * '1' FROM t
--- !query 101 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) * CAST(1 AS DOUBLE)):double>
--- !query 101 output
+-- !query output
 1.0
 
 
--- !query 102
+-- !query
 SELECT cast('1' as binary)                        * '1' FROM t
--- !query 102 schema
+-- !query schema
 struct<>
--- !query 102 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
 
 
--- !query 103
+-- !query
 SELECT cast(1 as boolean)                         * '1' FROM t
--- !query 103 schema
+-- !query schema
 struct<>
--- !query 103 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) * CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
 
 
--- !query 104
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) * '1' FROM t
--- !query 104 schema
+-- !query schema
 struct<>
--- !query 104 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) * CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
 
 
--- !query 105
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        * '1' FROM t
--- !query 105 schema
+-- !query schema
 struct<>
--- !query 105 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) * CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
 
 
--- !query 106
+-- !query
 SELECT cast(1 as tinyint)                         / '1' FROM t
--- !query 106 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 106 output
+-- !query output
 1.0
 
 
--- !query 107
+-- !query
 SELECT cast(1 as smallint)                        / '1' FROM t
--- !query 107 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 107 output
+-- !query output
 1.0
 
 
--- !query 108
+-- !query
 SELECT cast(1 as int)                             / '1' FROM t
--- !query 108 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 108 output
+-- !query output
 1.0
 
 
--- !query 109
+-- !query
 SELECT cast(1 as bigint)                          / '1' FROM t
--- !query 109 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 109 output
+-- !query output
 1.0
 
 
--- !query 110
+-- !query
 SELECT cast(1 as float)                           / '1' FROM t
--- !query 110 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) / CAST(CAST(1 AS DOUBLE) AS DOUBLE)):double>
--- !query 110 output
+-- !query output
 1.0
 
 
--- !query 111
+-- !query
 SELECT cast(1 as double)                          / '1' FROM t
--- !query 111 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 111 output
+-- !query output
 1.0
 
 
--- !query 112
+-- !query
 SELECT cast(1 as decimal(10, 0))                  / '1' FROM t
--- !query 112 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) / CAST(1 AS DOUBLE)):double>
--- !query 112 output
+-- !query output
 1.0
 
 
--- !query 113
+-- !query
 SELECT cast('1' as binary)                        / '1' FROM t
--- !query 113 schema
+-- !query schema
 struct<>
--- !query 113 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
 
 
--- !query 114
+-- !query
 SELECT cast(1 as boolean)                         / '1' FROM t
--- !query 114 schema
+-- !query schema
 struct<>
--- !query 114 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
 
 
--- !query 115
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) / '1' FROM t
--- !query 115 schema
+-- !query schema
 struct<>
--- !query 115 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
 
 
--- !query 116
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        / '1' FROM t
--- !query 116 schema
+-- !query schema
 struct<>
--- !query 116 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
 
 
--- !query 117
+-- !query
 SELECT cast(1 as tinyint)                         % '1' FROM t
--- !query 117 schema
+-- !query schema
 struct<(CAST(CAST(1 AS TINYINT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 117 output
+-- !query output
 0.0
 
 
--- !query 118
+-- !query
 SELECT cast(1 as smallint)                        % '1' FROM t
--- !query 118 schema
+-- !query schema
 struct<(CAST(CAST(1 AS SMALLINT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 118 output
+-- !query output
 0.0
 
 
--- !query 119
+-- !query
 SELECT cast(1 as int)                             % '1' FROM t
--- !query 119 schema
+-- !query schema
 struct<(CAST(CAST(1 AS INT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 119 output
+-- !query output
 0.0
 
 
--- !query 120
+-- !query
 SELECT cast(1 as bigint)                          % '1' FROM t
--- !query 120 schema
+-- !query schema
 struct<(CAST(CAST(1 AS BIGINT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 120 output
+-- !query output
 0.0
 
 
--- !query 121
+-- !query
 SELECT cast(1 as float)                           % '1' FROM t
--- !query 121 schema
+-- !query schema
 struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 121 output
+-- !query output
 0.0
 
 
--- !query 122
+-- !query
 SELECT cast(1 as double)                          % '1' FROM t
--- !query 122 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 122 output
+-- !query output
 0.0
 
 
--- !query 123
+-- !query
 SELECT cast(1 as decimal(10, 0))                  % '1' FROM t
--- !query 123 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) % CAST(1 AS DOUBLE)):double>
--- !query 123 output
+-- !query output
 0.0
 
 
--- !query 124
+-- !query
 SELECT cast('1' as binary)                        % '1' FROM t
--- !query 124 schema
+-- !query schema
 struct<>
--- !query 124 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('1' AS BINARY) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
 
 
--- !query 125
+-- !query
 SELECT cast(1 as boolean)                         % '1' FROM t
--- !query 125 schema
+-- !query schema
 struct<>
--- !query 125 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST(1 AS BOOLEAN) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) % CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
 
 
--- !query 126
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) % '1' FROM t
--- !query 126 schema
+-- !query schema
 struct<>
--- !query 126 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
 
 
--- !query 127
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        % '1' FROM t
--- !query 127 schema
+-- !query schema
 struct<>
--- !query 127 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
 
 
--- !query 128
+-- !query
 SELECT pmod(cast(1 as tinyint), '1')                         FROM t
--- !query 128 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS TINYINT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 128 output
+-- !query output
 0.0
 
 
--- !query 129
+-- !query
 SELECT pmod(cast(1 as smallint), '1')                        FROM t
--- !query 129 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS SMALLINT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 129 output
+-- !query output
 0.0
 
 
--- !query 130
+-- !query
 SELECT pmod(cast(1 as int), '1')                             FROM t
--- !query 130 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS INT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 130 output
+-- !query output
 0.0
 
 
--- !query 131
+-- !query
 SELECT pmod(cast(1 as bigint), '1')                          FROM t
--- !query 131 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS BIGINT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 131 output
+-- !query output
 0.0
 
 
--- !query 132
+-- !query
 SELECT pmod(cast(1 as float), '1')                           FROM t
--- !query 132 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS FLOAT) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 132 output
+-- !query output
 0.0
 
 
--- !query 133
+-- !query
 SELECT pmod(cast(1 as double), '1')                          FROM t
--- !query 133 schema
+-- !query schema
 struct<pmod(CAST(1 AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 133 output
+-- !query output
 0.0
 
 
--- !query 134
+-- !query
 SELECT pmod(cast(1 as decimal(10, 0)), '1')                  FROM t
--- !query 134 schema
+-- !query schema
 struct<pmod(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE), CAST(1 AS DOUBLE)):double>
--- !query 134 output
+-- !query output
 0.0
 
 
--- !query 135
+-- !query
 SELECT pmod(cast('1' as binary), '1')                        FROM t
--- !query 135 schema
+-- !query schema
 struct<>
--- !query 135 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('1' AS BINARY), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
 
 
--- !query 136
+-- !query
 SELECT pmod(cast(1 as boolean), '1')                         FROM t
--- !query 136 schema
+-- !query schema
 struct<>
--- !query 136 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST(1 AS BOOLEAN), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS BOOLEAN), CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
 
 
--- !query 137
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), '1') FROM t
--- !query 137 schema
+-- !query schema
 struct<>
--- !query 137 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
 
 
--- !query 138
+-- !query
 SELECT pmod(cast('2017-12-11 09:30:00' as date), '1')        FROM t
--- !query 138 schema
+-- !query schema
 struct<>
--- !query 138 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
 
 
--- !query 139
+-- !query
 SELECT '1' = cast(1 as tinyint)                         FROM t
--- !query 139 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) = CAST(1 AS TINYINT)):boolean>
--- !query 139 output
+-- !query output
 true
 
 
--- !query 140
+-- !query
 SELECT '1' = cast(1 as smallint)                        FROM t
--- !query 140 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
--- !query 140 output
+-- !query output
 true
 
 
--- !query 141
+-- !query
 SELECT '1' = cast(1 as int)                             FROM t
--- !query 141 schema
+-- !query schema
 struct<(CAST(1 AS INT) = CAST(1 AS INT)):boolean>
--- !query 141 output
+-- !query output
 true
 
 
--- !query 142
+-- !query
 SELECT '1' = cast(1 as bigint)                          FROM t
--- !query 142 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) = CAST(1 AS BIGINT)):boolean>
--- !query 142 output
+-- !query output
 true
 
 
--- !query 143
+-- !query
 SELECT '1' = cast(1 as float)                           FROM t
--- !query 143 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) = CAST(1 AS FLOAT)):boolean>
--- !query 143 output
+-- !query output
 true
 
 
--- !query 144
+-- !query
 SELECT '1' = cast(1 as double)                          FROM t
--- !query 144 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 144 output
+-- !query output
 true
 
 
--- !query 145
+-- !query
 SELECT '1' = cast(1 as decimal(10, 0))                  FROM t
--- !query 145 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 145 output
+-- !query output
 true
 
 
--- !query 146
+-- !query
 SELECT '1' = '1'                                        FROM t
--- !query 146 schema
+-- !query schema
 struct<(1 = 1):boolean>
--- !query 146 output
+-- !query output
 true
 
 
--- !query 147
+-- !query
 SELECT '1' = cast('1' as binary)                        FROM t
--- !query 147 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) = CAST(1 AS BINARY)):boolean>
--- !query 147 output
+-- !query output
 true
 
 
--- !query 148
+-- !query
 SELECT '1' = cast(1 as boolean)                         FROM t
--- !query 148 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) = CAST(1 AS BOOLEAN)):boolean>
--- !query 148 output
+-- !query output
 true
 
 
--- !query 149
+-- !query
 SELECT '1' = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 149 schema
+-- !query schema
 struct<(CAST(1 AS TIMESTAMP) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
--- !query 149 output
+-- !query output
 NULL
 
 
--- !query 150
+-- !query
 SELECT '1' = cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 150 schema
+-- !query schema
 struct<(CAST(1 AS DATE) = CAST(2017-12-11 09:30:00 AS DATE)):boolean>
--- !query 150 output
+-- !query output
 NULL
 
 
--- !query 151
+-- !query
 SELECT cast(1 as tinyint)                         = '1' FROM t
--- !query 151 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) = CAST(1 AS TINYINT)):boolean>
--- !query 151 output
+-- !query output
 true
 
 
--- !query 152
+-- !query
 SELECT cast(1 as smallint)                        = '1' FROM t
--- !query 152 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT)):boolean>
--- !query 152 output
+-- !query output
 true
 
 
--- !query 153
+-- !query
 SELECT cast(1 as int)                             = '1' FROM t
--- !query 153 schema
+-- !query schema
 struct<(CAST(1 AS INT) = CAST(1 AS INT)):boolean>
--- !query 153 output
+-- !query output
 true
 
 
--- !query 154
+-- !query
 SELECT cast(1 as bigint)                          = '1' FROM t
--- !query 154 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) = CAST(1 AS BIGINT)):boolean>
--- !query 154 output
+-- !query output
 true
 
 
--- !query 155
+-- !query
 SELECT cast(1 as float)                           = '1' FROM t
--- !query 155 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) = CAST(1 AS FLOAT)):boolean>
--- !query 155 output
+-- !query output
 true
 
 
--- !query 156
+-- !query
 SELECT cast(1 as double)                          = '1' FROM t
--- !query 156 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 156 output
+-- !query output
 true
 
 
--- !query 157
+-- !query
 SELECT cast(1 as decimal(10, 0))                  = '1' FROM t
--- !query 157 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE)):boolean>
--- !query 157 output
+-- !query output
 true
 
 
--- !query 158
+-- !query
 SELECT cast('1' as binary)                        = '1' FROM t
--- !query 158 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) = CAST(1 AS BINARY)):boolean>
--- !query 158 output
+-- !query output
 true
 
 
--- !query 159
+-- !query
 SELECT cast(1 as boolean)                         = '1' FROM t
--- !query 159 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) = CAST(1 AS BOOLEAN)):boolean>
--- !query 159 output
+-- !query output
 true
 
 
--- !query 160
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) = '1' FROM t
--- !query 160 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS TIMESTAMP)):boolean>
--- !query 160 output
+-- !query output
 NULL
 
 
--- !query 161
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        = '1' FROM t
--- !query 161 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DATE)):boolean>
--- !query 161 output
+-- !query output
 NULL
 
 
--- !query 162
+-- !query
 SELECT '1' <=> cast(1 as tinyint)                         FROM t
--- !query 162 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) <=> CAST(1 AS TINYINT)):boolean>
--- !query 162 output
+-- !query output
 true
 
 
--- !query 163
+-- !query
 SELECT '1' <=> cast(1 as smallint)                        FROM t
--- !query 163 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) <=> CAST(1 AS SMALLINT)):boolean>
--- !query 163 output
+-- !query output
 true
 
 
--- !query 164
+-- !query
 SELECT '1' <=> cast(1 as int)                             FROM t
--- !query 164 schema
+-- !query schema
 struct<(CAST(1 AS INT) <=> CAST(1 AS INT)):boolean>
--- !query 164 output
+-- !query output
 true
 
 
--- !query 165
+-- !query
 SELECT '1' <=> cast(1 as bigint)                          FROM t
--- !query 165 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) <=> CAST(1 AS BIGINT)):boolean>
--- !query 165 output
+-- !query output
 true
 
 
--- !query 166
+-- !query
 SELECT '1' <=> cast(1 as float)                           FROM t
--- !query 166 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) <=> CAST(1 AS FLOAT)):boolean>
--- !query 166 output
+-- !query output
 true
 
 
--- !query 167
+-- !query
 SELECT '1' <=> cast(1 as double)                          FROM t
--- !query 167 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
--- !query 167 output
+-- !query output
 true
 
 
--- !query 168
+-- !query
 SELECT '1' <=> cast(1 as decimal(10, 0))                  FROM t
--- !query 168 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <=> CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 168 output
+-- !query output
 true
 
 
--- !query 169
+-- !query
 SELECT '1' <=> '1'                                        FROM t
--- !query 169 schema
+-- !query schema
 struct<(1 <=> 1):boolean>
--- !query 169 output
+-- !query output
 true
 
 
--- !query 170
+-- !query
 SELECT '1' <=> cast('1' as binary)                        FROM t
--- !query 170 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) <=> CAST(1 AS BINARY)):boolean>
--- !query 170 output
+-- !query output
 true
 
 
--- !query 171
+-- !query
 SELECT '1' <=> cast(1 as boolean)                         FROM t
--- !query 171 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) <=> CAST(1 AS BOOLEAN)):boolean>
--- !query 171 output
+-- !query output
 true
 
 
--- !query 172
+-- !query
 SELECT '1' <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 172 schema
+-- !query schema
 struct<(CAST(1 AS TIMESTAMP) <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
--- !query 172 output
+-- !query output
 false
 
 
--- !query 173
+-- !query
 SELECT '1' <=> cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 173 schema
+-- !query schema
 struct<(CAST(1 AS DATE) <=> CAST(2017-12-11 09:30:00 AS DATE)):boolean>
--- !query 173 output
+-- !query output
 false
 
 
--- !query 174
+-- !query
 SELECT cast(1 as tinyint)                         <=> '1' FROM t
--- !query 174 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) <=> CAST(1 AS TINYINT)):boolean>
--- !query 174 output
+-- !query output
 true
 
 
--- !query 175
+-- !query
 SELECT cast(1 as smallint)                        <=> '1' FROM t
--- !query 175 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) <=> CAST(1 AS SMALLINT)):boolean>
--- !query 175 output
+-- !query output
 true
 
 
--- !query 176
+-- !query
 SELECT cast(1 as int)                             <=> '1' FROM t
--- !query 176 schema
+-- !query schema
 struct<(CAST(1 AS INT) <=> CAST(1 AS INT)):boolean>
--- !query 176 output
+-- !query output
 true
 
 
--- !query 177
+-- !query
 SELECT cast(1 as bigint)                          <=> '1' FROM t
--- !query 177 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) <=> CAST(1 AS BIGINT)):boolean>
--- !query 177 output
+-- !query output
 true
 
 
--- !query 178
+-- !query
 SELECT cast(1 as float)                           <=> '1' FROM t
--- !query 178 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) <=> CAST(1 AS FLOAT)):boolean>
--- !query 178 output
+-- !query output
 true
 
 
--- !query 179
+-- !query
 SELECT cast(1 as double)                          <=> '1' FROM t
--- !query 179 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
--- !query 179 output
+-- !query output
 true
 
 
--- !query 180
+-- !query
 SELECT cast(1 as decimal(10, 0))                  <=> '1' FROM t
--- !query 180 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <=> CAST(1 AS DOUBLE)):boolean>
--- !query 180 output
+-- !query output
 true
 
 
--- !query 181
+-- !query
 SELECT cast('1' as binary)                        <=> '1' FROM t
--- !query 181 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) <=> CAST(1 AS BINARY)):boolean>
--- !query 181 output
+-- !query output
 true
 
 
--- !query 182
+-- !query
 SELECT cast(1 as boolean)                         <=> '1' FROM t
--- !query 182 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) <=> CAST(1 AS BOOLEAN)):boolean>
--- !query 182 output
+-- !query output
 true
 
 
--- !query 183
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> '1' FROM t
--- !query 183 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <=> CAST(1 AS TIMESTAMP)):boolean>
--- !query 183 output
+-- !query output
 false
 
 
--- !query 184
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        <=> '1' FROM t
--- !query 184 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00 AS DATE) <=> CAST(1 AS DATE)):boolean>
--- !query 184 output
+-- !query output
 false
 
 
--- !query 185
+-- !query
 SELECT '1' < cast(1 as tinyint)                         FROM t
--- !query 185 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) < CAST(1 AS TINYINT)):boolean>
--- !query 185 output
+-- !query output
 false
 
 
--- !query 186
+-- !query
 SELECT '1' < cast(1 as smallint)                        FROM t
--- !query 186 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) < CAST(1 AS SMALLINT)):boolean>
--- !query 186 output
+-- !query output
 false
 
 
--- !query 187
+-- !query
 SELECT '1' < cast(1 as int)                             FROM t
--- !query 187 schema
+-- !query schema
 struct<(CAST(1 AS INT) < CAST(1 AS INT)):boolean>
--- !query 187 output
+-- !query output
 false
 
 
--- !query 188
+-- !query
 SELECT '1' < cast(1 as bigint)                          FROM t
--- !query 188 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) < CAST(1 AS BIGINT)):boolean>
--- !query 188 output
+-- !query output
 false
 
 
--- !query 189
+-- !query
 SELECT '1' < cast(1 as float)                           FROM t
--- !query 189 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) < CAST(1 AS FLOAT)):boolean>
--- !query 189 output
+-- !query output
 false
 
 
--- !query 190
+-- !query
 SELECT '1' < cast(1 as double)                          FROM t
--- !query 190 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
--- !query 190 output
+-- !query output
 false
 
 
--- !query 191
+-- !query
 SELECT '1' < cast(1 as decimal(10, 0))                  FROM t
--- !query 191 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) < CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 191 output
+-- !query output
 false
 
 
--- !query 192
+-- !query
 SELECT '1' < '1'                                        FROM t
--- !query 192 schema
+-- !query schema
 struct<(1 < 1):boolean>
--- !query 192 output
+-- !query output
 false
 
 
--- !query 193
+-- !query
 SELECT '1' < cast('1' as binary)                        FROM t
--- !query 193 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) < CAST(1 AS BINARY)):boolean>
--- !query 193 output
+-- !query output
 false
 
 
--- !query 194
+-- !query
 SELECT '1' < cast(1 as boolean)                         FROM t
--- !query 194 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) < CAST(1 AS BOOLEAN)):boolean>
--- !query 194 output
+-- !query output
 false
 
 
--- !query 195
+-- !query
 SELECT '1' < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 195 schema
+-- !query schema
 struct<(CAST(1 AS TIMESTAMP) < CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
--- !query 195 output
+-- !query output
 NULL
 
 
--- !query 196
+-- !query
 SELECT '1' < cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 196 schema
+-- !query schema
 struct<(CAST(1 AS DATE) < CAST(2017-12-11 09:30:00 AS DATE)):boolean>
--- !query 196 output
+-- !query output
 NULL
 
 
--- !query 197
+-- !query
 SELECT '1' <= cast(1 as tinyint)                         FROM t
--- !query 197 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) <= CAST(1 AS TINYINT)):boolean>
--- !query 197 output
+-- !query output
 true
 
 
--- !query 198
+-- !query
 SELECT '1' <= cast(1 as smallint)                        FROM t
--- !query 198 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) <= CAST(1 AS SMALLINT)):boolean>
--- !query 198 output
+-- !query output
 true
 
 
--- !query 199
+-- !query
 SELECT '1' <= cast(1 as int)                             FROM t
--- !query 199 schema
+-- !query schema
 struct<(CAST(1 AS INT) <= CAST(1 AS INT)):boolean>
--- !query 199 output
+-- !query output
 true
 
 
--- !query 200
+-- !query
 SELECT '1' <= cast(1 as bigint)                          FROM t
--- !query 200 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) <= CAST(1 AS BIGINT)):boolean>
--- !query 200 output
+-- !query output
 true
 
 
--- !query 201
+-- !query
 SELECT '1' <= cast(1 as float)                           FROM t
--- !query 201 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) <= CAST(1 AS FLOAT)):boolean>
--- !query 201 output
+-- !query output
 true
 
 
--- !query 202
+-- !query
 SELECT '1' <= cast(1 as double)                          FROM t
--- !query 202 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
--- !query 202 output
+-- !query output
 true
 
 
--- !query 203
+-- !query
 SELECT '1' <= cast(1 as decimal(10, 0))                  FROM t
--- !query 203 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 203 output
+-- !query output
 true
 
 
--- !query 204
+-- !query
 SELECT '1' <= '1'                                        FROM t
--- !query 204 schema
+-- !query schema
 struct<(1 <= 1):boolean>
--- !query 204 output
+-- !query output
 true
 
 
--- !query 205
+-- !query
 SELECT '1' <= cast('1' as binary)                        FROM t
--- !query 205 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) <= CAST(1 AS BINARY)):boolean>
--- !query 205 output
+-- !query output
 true
 
 
--- !query 206
+-- !query
 SELECT '1' <= cast(1 as boolean)                         FROM t
--- !query 206 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) <= CAST(1 AS BOOLEAN)):boolean>
--- !query 206 output
+-- !query output
 true
 
 
--- !query 207
+-- !query
 SELECT '1' <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 207 schema
+-- !query schema
 struct<(CAST(1 AS TIMESTAMP) <= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
--- !query 207 output
+-- !query output
 NULL
 
 
--- !query 208
+-- !query
 SELECT '1' <= cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 208 schema
+-- !query schema
 struct<(CAST(1 AS DATE) <= CAST(2017-12-11 09:30:00 AS DATE)):boolean>
--- !query 208 output
+-- !query output
 NULL
 
 
--- !query 209
+-- !query
 SELECT '1' > cast(1 as tinyint)                         FROM t
--- !query 209 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) > CAST(1 AS TINYINT)):boolean>
--- !query 209 output
+-- !query output
 false
 
 
--- !query 210
+-- !query
 SELECT '1' > cast(1 as smallint)                        FROM t
--- !query 210 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) > CAST(1 AS SMALLINT)):boolean>
--- !query 210 output
+-- !query output
 false
 
 
--- !query 211
+-- !query
 SELECT '1' > cast(1 as int)                             FROM t
--- !query 211 schema
+-- !query schema
 struct<(CAST(1 AS INT) > CAST(1 AS INT)):boolean>
--- !query 211 output
+-- !query output
 false
 
 
--- !query 212
+-- !query
 SELECT '1' > cast(1 as bigint)                          FROM t
--- !query 212 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) > CAST(1 AS BIGINT)):boolean>
--- !query 212 output
+-- !query output
 false
 
 
--- !query 213
+-- !query
 SELECT '1' > cast(1 as float)                           FROM t
--- !query 213 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) > CAST(1 AS FLOAT)):boolean>
--- !query 213 output
+-- !query output
 false
 
 
--- !query 214
+-- !query
 SELECT '1' > cast(1 as double)                          FROM t
--- !query 214 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
--- !query 214 output
+-- !query output
 false
 
 
--- !query 215
+-- !query
 SELECT '1' > cast(1 as decimal(10, 0))                  FROM t
--- !query 215 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) > CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 215 output
+-- !query output
 false
 
 
--- !query 216
+-- !query
 SELECT '1' > '1'                                        FROM t
--- !query 216 schema
+-- !query schema
 struct<(1 > 1):boolean>
--- !query 216 output
+-- !query output
 false
 
 
--- !query 217
+-- !query
 SELECT '1' > cast('1' as binary)                        FROM t
--- !query 217 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) > CAST(1 AS BINARY)):boolean>
--- !query 217 output
+-- !query output
 false
 
 
--- !query 218
+-- !query
 SELECT '1' > cast(1 as boolean)                         FROM t
--- !query 218 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) > CAST(1 AS BOOLEAN)):boolean>
--- !query 218 output
+-- !query output
 false
 
 
--- !query 219
+-- !query
 SELECT '1' > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 219 schema
+-- !query schema
 struct<(CAST(1 AS TIMESTAMP) > CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
--- !query 219 output
+-- !query output
 NULL
 
 
--- !query 220
+-- !query
 SELECT '1' > cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 220 schema
+-- !query schema
 struct<(CAST(1 AS DATE) > CAST(2017-12-11 09:30:00 AS DATE)):boolean>
--- !query 220 output
+-- !query output
 NULL
 
 
--- !query 221
+-- !query
 SELECT '1' >= cast(1 as tinyint)                         FROM t
--- !query 221 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) >= CAST(1 AS TINYINT)):boolean>
--- !query 221 output
+-- !query output
 true
 
 
--- !query 222
+-- !query
 SELECT '1' >= cast(1 as smallint)                        FROM t
--- !query 222 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) >= CAST(1 AS SMALLINT)):boolean>
--- !query 222 output
+-- !query output
 true
 
 
--- !query 223
+-- !query
 SELECT '1' >= cast(1 as int)                             FROM t
--- !query 223 schema
+-- !query schema
 struct<(CAST(1 AS INT) >= CAST(1 AS INT)):boolean>
--- !query 223 output
+-- !query output
 true
 
 
--- !query 224
+-- !query
 SELECT '1' >= cast(1 as bigint)                          FROM t
--- !query 224 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) >= CAST(1 AS BIGINT)):boolean>
--- !query 224 output
+-- !query output
 true
 
 
--- !query 225
+-- !query
 SELECT '1' >= cast(1 as float)                           FROM t
--- !query 225 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) >= CAST(1 AS FLOAT)):boolean>
--- !query 225 output
+-- !query output
 true
 
 
--- !query 226
+-- !query
 SELECT '1' >= cast(1 as double)                          FROM t
--- !query 226 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
--- !query 226 output
+-- !query output
 true
 
 
--- !query 227
+-- !query
 SELECT '1' >= cast(1 as decimal(10, 0))                  FROM t
--- !query 227 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) >= CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE)):boolean>
--- !query 227 output
+-- !query output
 true
 
 
--- !query 228
+-- !query
 SELECT '1' >= '1'                                        FROM t
--- !query 228 schema
+-- !query schema
 struct<(1 >= 1):boolean>
--- !query 228 output
+-- !query output
 true
 
 
--- !query 229
+-- !query
 SELECT '1' >= cast('1' as binary)                        FROM t
--- !query 229 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) >= CAST(1 AS BINARY)):boolean>
--- !query 229 output
+-- !query output
 true
 
 
--- !query 230
+-- !query
 SELECT '1' >= cast(1 as boolean)                         FROM t
--- !query 230 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) >= CAST(1 AS BOOLEAN)):boolean>
--- !query 230 output
+-- !query output
 true
 
 
--- !query 231
+-- !query
 SELECT '1' >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 231 schema
+-- !query schema
 struct<(CAST(1 AS TIMESTAMP) >= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
--- !query 231 output
+-- !query output
 NULL
 
 
--- !query 232
+-- !query
 SELECT '1' >= cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 232 schema
+-- !query schema
 struct<(CAST(1 AS DATE) >= CAST(2017-12-11 09:30:00 AS DATE)):boolean>
--- !query 232 output
+-- !query output
 NULL
 
 
--- !query 233
+-- !query
 SELECT '1' <> cast(1 as tinyint)                         FROM t
--- !query 233 schema
+-- !query schema
 struct<(NOT (CAST(1 AS TINYINT) = CAST(1 AS TINYINT))):boolean>
--- !query 233 output
+-- !query output
 false
 
 
--- !query 234
+-- !query
 SELECT '1' <> cast(1 as smallint)                        FROM t
--- !query 234 schema
+-- !query schema
 struct<(NOT (CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT))):boolean>
--- !query 234 output
+-- !query output
 false
 
 
--- !query 235
+-- !query
 SELECT '1' <> cast(1 as int)                             FROM t
--- !query 235 schema
+-- !query schema
 struct<(NOT (CAST(1 AS INT) = CAST(1 AS INT))):boolean>
--- !query 235 output
+-- !query output
 false
 
 
--- !query 236
+-- !query
 SELECT '1' <> cast(1 as bigint)                          FROM t
--- !query 236 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BIGINT) = CAST(1 AS BIGINT))):boolean>
--- !query 236 output
+-- !query output
 false
 
 
--- !query 237
+-- !query
 SELECT '1' <> cast(1 as float)                           FROM t
--- !query 237 schema
+-- !query schema
 struct<(NOT (CAST(1 AS FLOAT) = CAST(1 AS FLOAT))):boolean>
--- !query 237 output
+-- !query output
 false
 
 
--- !query 238
+-- !query
 SELECT '1' <> cast(1 as double)                          FROM t
--- !query 238 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
--- !query 238 output
+-- !query output
 false
 
 
--- !query 239
+-- !query
 SELECT '1' <> cast(1 as decimal(10, 0))                  FROM t
--- !query 239 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DOUBLE) = CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE))):boolean>
--- !query 239 output
+-- !query output
 false
 
 
--- !query 240
+-- !query
 SELECT '1' <> '1'                                        FROM t
--- !query 240 schema
+-- !query schema
 struct<(NOT (1 = 1)):boolean>
--- !query 240 output
+-- !query output
 false
 
 
--- !query 241
+-- !query
 SELECT '1' <> cast('1' as binary)                        FROM t
--- !query 241 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BINARY) = CAST(1 AS BINARY))):boolean>
--- !query 241 output
+-- !query output
 false
 
 
--- !query 242
+-- !query
 SELECT '1' <> cast(1 as boolean)                         FROM t
--- !query 242 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BOOLEAN) = CAST(1 AS BOOLEAN))):boolean>
--- !query 242 output
+-- !query output
 false
 
 
--- !query 243
+-- !query
 SELECT '1' <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 243 schema
+-- !query schema
 struct<(NOT (CAST(1 AS TIMESTAMP) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):boolean>
--- !query 243 output
+-- !query output
 NULL
 
 
--- !query 244
+-- !query
 SELECT '1' <> cast('2017-12-11 09:30:00' as date)        FROM t
--- !query 244 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DATE) = CAST(2017-12-11 09:30:00 AS DATE))):boolean>
--- !query 244 output
+-- !query output
 NULL
 
 
--- !query 245
+-- !query
 SELECT cast(1 as tinyint)                         < '1' FROM t
--- !query 245 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) < CAST(1 AS TINYINT)):boolean>
--- !query 245 output
+-- !query output
 false
 
 
--- !query 246
+-- !query
 SELECT cast(1 as smallint)                        < '1' FROM t
--- !query 246 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) < CAST(1 AS SMALLINT)):boolean>
--- !query 246 output
+-- !query output
 false
 
 
--- !query 247
+-- !query
 SELECT cast(1 as int)                             < '1' FROM t
--- !query 247 schema
+-- !query schema
 struct<(CAST(1 AS INT) < CAST(1 AS INT)):boolean>
--- !query 247 output
+-- !query output
 false
 
 
--- !query 248
+-- !query
 SELECT cast(1 as bigint)                          < '1' FROM t
--- !query 248 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) < CAST(1 AS BIGINT)):boolean>
--- !query 248 output
+-- !query output
 false
 
 
--- !query 249
+-- !query
 SELECT cast(1 as float)                           < '1' FROM t
--- !query 249 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) < CAST(1 AS FLOAT)):boolean>
--- !query 249 output
+-- !query output
 false
 
 
--- !query 250
+-- !query
 SELECT cast(1 as double)                          < '1' FROM t
--- !query 250 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
--- !query 250 output
+-- !query output
 false
 
 
--- !query 251
+-- !query
 SELECT cast(1 as decimal(10, 0))                  < '1' FROM t
--- !query 251 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) < CAST(1 AS DOUBLE)):boolean>
--- !query 251 output
+-- !query output
 false
 
 
--- !query 252
+-- !query
 SELECT '1'                                        < '1' FROM t
--- !query 252 schema
+-- !query schema
 struct<(1 < 1):boolean>
--- !query 252 output
+-- !query output
 false
 
 
--- !query 253
+-- !query
 SELECT cast('1' as binary)                        < '1' FROM t
--- !query 253 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) < CAST(1 AS BINARY)):boolean>
--- !query 253 output
+-- !query output
 false
 
 
--- !query 254
+-- !query
 SELECT cast(1 as boolean)                         < '1' FROM t
--- !query 254 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) < CAST(1 AS BOOLEAN)):boolean>
--- !query 254 output
+-- !query output
 false
 
 
--- !query 255
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) < '1' FROM t
--- !query 255 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) < CAST(1 AS TIMESTAMP)):boolean>
--- !query 255 output
+-- !query output
 NULL
 
 
--- !query 256
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        < '1' FROM t
--- !query 256 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00 AS DATE) < CAST(1 AS DATE)):boolean>
--- !query 256 output
+-- !query output
 NULL
 
 
--- !query 257
+-- !query
 SELECT cast(1 as tinyint)                         <= '1' FROM t
--- !query 257 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) <= CAST(1 AS TINYINT)):boolean>
--- !query 257 output
+-- !query output
 true
 
 
--- !query 258
+-- !query
 SELECT cast(1 as smallint)                        <= '1' FROM t
--- !query 258 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) <= CAST(1 AS SMALLINT)):boolean>
--- !query 258 output
+-- !query output
 true
 
 
--- !query 259
+-- !query
 SELECT cast(1 as int)                             <= '1' FROM t
--- !query 259 schema
+-- !query schema
 struct<(CAST(1 AS INT) <= CAST(1 AS INT)):boolean>
--- !query 259 output
+-- !query output
 true
 
 
--- !query 260
+-- !query
 SELECT cast(1 as bigint)                          <= '1' FROM t
--- !query 260 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) <= CAST(1 AS BIGINT)):boolean>
--- !query 260 output
+-- !query output
 true
 
 
--- !query 261
+-- !query
 SELECT cast(1 as float)                           <= '1' FROM t
--- !query 261 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) <= CAST(1 AS FLOAT)):boolean>
--- !query 261 output
+-- !query output
 true
 
 
--- !query 262
+-- !query
 SELECT cast(1 as double)                          <= '1' FROM t
--- !query 262 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
--- !query 262 output
+-- !query output
 true
 
 
--- !query 263
+-- !query
 SELECT cast(1 as decimal(10, 0))                  <= '1' FROM t
--- !query 263 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) <= CAST(1 AS DOUBLE)):boolean>
--- !query 263 output
+-- !query output
 true
 
 
--- !query 264
+-- !query
 SELECT '1'                                        <= '1' FROM t
--- !query 264 schema
+-- !query schema
 struct<(1 <= 1):boolean>
--- !query 264 output
+-- !query output
 true
 
 
--- !query 265
+-- !query
 SELECT cast('1' as binary)                        <= '1' FROM t
--- !query 265 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) <= CAST(1 AS BINARY)):boolean>
--- !query 265 output
+-- !query output
 true
 
 
--- !query 266
+-- !query
 SELECT cast(1 as boolean)                         <= '1' FROM t
--- !query 266 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) <= CAST(1 AS BOOLEAN)):boolean>
--- !query 266 output
+-- !query output
 true
 
 
--- !query 267
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= '1' FROM t
--- !query 267 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <= CAST(1 AS TIMESTAMP)):boolean>
--- !query 267 output
+-- !query output
 NULL
 
 
--- !query 268
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        <= '1' FROM t
--- !query 268 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00 AS DATE) <= CAST(1 AS DATE)):boolean>
--- !query 268 output
+-- !query output
 NULL
 
 
--- !query 269
+-- !query
 SELECT cast(1 as tinyint)                         > '1' FROM t
--- !query 269 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) > CAST(1 AS TINYINT)):boolean>
--- !query 269 output
+-- !query output
 false
 
 
--- !query 270
+-- !query
 SELECT cast(1 as smallint)                        > '1' FROM t
--- !query 270 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) > CAST(1 AS SMALLINT)):boolean>
--- !query 270 output
+-- !query output
 false
 
 
--- !query 271
+-- !query
 SELECT cast(1 as int)                             > '1' FROM t
--- !query 271 schema
+-- !query schema
 struct<(CAST(1 AS INT) > CAST(1 AS INT)):boolean>
--- !query 271 output
+-- !query output
 false
 
 
--- !query 272
+-- !query
 SELECT cast(1 as bigint)                          > '1' FROM t
--- !query 272 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) > CAST(1 AS BIGINT)):boolean>
--- !query 272 output
+-- !query output
 false
 
 
--- !query 273
+-- !query
 SELECT cast(1 as float)                           > '1' FROM t
--- !query 273 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) > CAST(1 AS FLOAT)):boolean>
--- !query 273 output
+-- !query output
 false
 
 
--- !query 274
+-- !query
 SELECT cast(1 as double)                          > '1' FROM t
--- !query 274 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
--- !query 274 output
+-- !query output
 false
 
 
--- !query 275
+-- !query
 SELECT cast(1 as decimal(10, 0))                  > '1' FROM t
--- !query 275 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) > CAST(1 AS DOUBLE)):boolean>
--- !query 275 output
+-- !query output
 false
 
 
--- !query 276
+-- !query
 SELECT '1'                                        > '1' FROM t
--- !query 276 schema
+-- !query schema
 struct<(1 > 1):boolean>
--- !query 276 output
+-- !query output
 false
 
 
--- !query 277
+-- !query
 SELECT cast('1' as binary)                        > '1' FROM t
--- !query 277 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) > CAST(1 AS BINARY)):boolean>
--- !query 277 output
+-- !query output
 false
 
 
--- !query 278
+-- !query
 SELECT cast(1 as boolean)                         > '1' FROM t
--- !query 278 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) > CAST(1 AS BOOLEAN)):boolean>
--- !query 278 output
+-- !query output
 false
 
 
--- !query 279
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) > '1' FROM t
--- !query 279 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) > CAST(1 AS TIMESTAMP)):boolean>
--- !query 279 output
+-- !query output
 NULL
 
 
--- !query 280
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        > '1' FROM t
--- !query 280 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00 AS DATE) > CAST(1 AS DATE)):boolean>
--- !query 280 output
+-- !query output
 NULL
 
 
--- !query 281
+-- !query
 SELECT cast(1 as tinyint)                         >= '1' FROM t
--- !query 281 schema
+-- !query schema
 struct<(CAST(1 AS TINYINT) >= CAST(1 AS TINYINT)):boolean>
--- !query 281 output
+-- !query output
 true
 
 
--- !query 282
+-- !query
 SELECT cast(1 as smallint)                        >= '1' FROM t
--- !query 282 schema
+-- !query schema
 struct<(CAST(1 AS SMALLINT) >= CAST(1 AS SMALLINT)):boolean>
--- !query 282 output
+-- !query output
 true
 
 
--- !query 283
+-- !query
 SELECT cast(1 as int)                             >= '1' FROM t
--- !query 283 schema
+-- !query schema
 struct<(CAST(1 AS INT) >= CAST(1 AS INT)):boolean>
--- !query 283 output
+-- !query output
 true
 
 
--- !query 284
+-- !query
 SELECT cast(1 as bigint)                          >= '1' FROM t
--- !query 284 schema
+-- !query schema
 struct<(CAST(1 AS BIGINT) >= CAST(1 AS BIGINT)):boolean>
--- !query 284 output
+-- !query output
 true
 
 
--- !query 285
+-- !query
 SELECT cast(1 as float)                           >= '1' FROM t
--- !query 285 schema
+-- !query schema
 struct<(CAST(1 AS FLOAT) >= CAST(1 AS FLOAT)):boolean>
--- !query 285 output
+-- !query output
 true
 
 
--- !query 286
+-- !query
 SELECT cast(1 as double)                          >= '1' FROM t
--- !query 286 schema
+-- !query schema
 struct<(CAST(1 AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
--- !query 286 output
+-- !query output
 true
 
 
--- !query 287
+-- !query
 SELECT cast(1 as decimal(10, 0))                  >= '1' FROM t
--- !query 287 schema
+-- !query schema
 struct<(CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) >= CAST(1 AS DOUBLE)):boolean>
--- !query 287 output
+-- !query output
 true
 
 
--- !query 288
+-- !query
 SELECT '1'                                        >= '1' FROM t
--- !query 288 schema
+-- !query schema
 struct<(1 >= 1):boolean>
--- !query 288 output
+-- !query output
 true
 
 
--- !query 289
+-- !query
 SELECT cast('1' as binary)                        >= '1' FROM t
--- !query 289 schema
+-- !query schema
 struct<(CAST(1 AS BINARY) >= CAST(1 AS BINARY)):boolean>
--- !query 289 output
+-- !query output
 true
 
 
--- !query 290
+-- !query
 SELECT cast(1 as boolean)                         >= '1' FROM t
--- !query 290 schema
+-- !query schema
 struct<(CAST(1 AS BOOLEAN) >= CAST(1 AS BOOLEAN)):boolean>
--- !query 290 output
+-- !query output
 true
 
 
--- !query 291
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= '1' FROM t
--- !query 291 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) >= CAST(1 AS TIMESTAMP)):boolean>
--- !query 291 output
+-- !query output
 NULL
 
 
--- !query 292
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        >= '1' FROM t
--- !query 292 schema
+-- !query schema
 struct<(CAST(2017-12-11 09:30:00 AS DATE) >= CAST(1 AS DATE)):boolean>
--- !query 292 output
+-- !query output
 NULL
 
 
--- !query 293
+-- !query
 SELECT cast(1 as tinyint)                         <> '1' FROM t
--- !query 293 schema
+-- !query schema
 struct<(NOT (CAST(1 AS TINYINT) = CAST(1 AS TINYINT))):boolean>
--- !query 293 output
+-- !query output
 false
 
 
--- !query 294
+-- !query
 SELECT cast(1 as smallint)                        <> '1' FROM t
--- !query 294 schema
+-- !query schema
 struct<(NOT (CAST(1 AS SMALLINT) = CAST(1 AS SMALLINT))):boolean>
--- !query 294 output
+-- !query output
 false
 
 
--- !query 295
+-- !query
 SELECT cast(1 as int)                             <> '1' FROM t
--- !query 295 schema
+-- !query schema
 struct<(NOT (CAST(1 AS INT) = CAST(1 AS INT))):boolean>
--- !query 295 output
+-- !query output
 false
 
 
--- !query 296
+-- !query
 SELECT cast(1 as bigint)                          <> '1' FROM t
--- !query 296 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BIGINT) = CAST(1 AS BIGINT))):boolean>
--- !query 296 output
+-- !query output
 false
 
 
--- !query 297
+-- !query
 SELECT cast(1 as float)                           <> '1' FROM t
--- !query 297 schema
+-- !query schema
 struct<(NOT (CAST(1 AS FLOAT) = CAST(1 AS FLOAT))):boolean>
--- !query 297 output
+-- !query output
 false
 
 
--- !query 298
+-- !query
 SELECT cast(1 as double)                          <> '1' FROM t
--- !query 298 schema
+-- !query schema
 struct<(NOT (CAST(1 AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
--- !query 298 output
+-- !query output
 false
 
 
--- !query 299
+-- !query
 SELECT cast(1 as decimal(10, 0))                  <> '1' FROM t
--- !query 299 schema
+-- !query schema
 struct<(NOT (CAST(CAST(1 AS DECIMAL(10,0)) AS DOUBLE) = CAST(1 AS DOUBLE))):boolean>
--- !query 299 output
+-- !query output
 false
 
 
--- !query 300
+-- !query
 SELECT '1'                                        <> '1' FROM t
--- !query 300 schema
+-- !query schema
 struct<(NOT (1 = 1)):boolean>
--- !query 300 output
+-- !query output
 false
 
 
--- !query 301
+-- !query
 SELECT cast('1' as binary)                        <> '1' FROM t
--- !query 301 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BINARY) = CAST(1 AS BINARY))):boolean>
--- !query 301 output
+-- !query output
 false
 
 
--- !query 302
+-- !query
 SELECT cast(1 as boolean)                         <> '1' FROM t
--- !query 302 schema
+-- !query schema
 struct<(NOT (CAST(1 AS BOOLEAN) = CAST(1 AS BOOLEAN))):boolean>
--- !query 302 output
+-- !query output
 false
 
 
--- !query 303
+-- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> '1' FROM t
--- !query 303 schema
+-- !query schema
 struct<(NOT (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS TIMESTAMP))):boolean>
--- !query 303 output
+-- !query output
 NULL
 
 
--- !query 304
+-- !query
 SELECT cast('2017-12-11 09:30:00' as date)        <> '1' FROM t
--- !query 304 schema
+-- !query schema
 struct<(NOT (CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DATE))):boolean>
--- !query 304 output
+-- !query output
 NULL
 
 
--- !query 305
+-- !query
 SELECT abs('1') FROM t
--- !query 305 schema
+-- !query schema
 struct<abs(CAST(1 AS DOUBLE)):double>
--- !query 305 output
+-- !query output
 1.0
 
 
--- !query 306
+-- !query
 SELECT sum('1') FROM t
--- !query 306 schema
+-- !query schema
 struct<sum(CAST(1 AS DOUBLE)):double>
--- !query 306 output
+-- !query output
 1.0
 
 
--- !query 307
+-- !query
 SELECT avg('1') FROM t
--- !query 307 schema
+-- !query schema
 struct<avg(CAST(1 AS DOUBLE)):double>
--- !query 307 output
+-- !query output
 1.0
 
 
--- !query 308
+-- !query
 SELECT stddev_pop('1') FROM t
--- !query 308 schema
+-- !query schema
 struct<stddev_pop(CAST(1 AS DOUBLE)):double>
--- !query 308 output
+-- !query output
 0.0
 
 
--- !query 309
+-- !query
 SELECT stddev_samp('1') FROM t
--- !query 309 schema
+-- !query schema
 struct<stddev_samp(CAST(1 AS DOUBLE)):double>
--- !query 309 output
+-- !query output
 NaN
 
 
--- !query 310
+-- !query
 SELECT - '1' FROM t
--- !query 310 schema
+-- !query schema
 struct<(- CAST(1 AS DOUBLE)):double>
--- !query 310 output
+-- !query output
 -1.0
 
 
--- !query 311
+-- !query
 SELECT + '1' FROM t
--- !query 311 schema
-struct<1:string>
--- !query 311 output
-1
+-- !query schema
+struct<(+ CAST(1 AS DOUBLE)):double>
+-- !query output
+1.0
 
 
--- !query 312
+-- !query
 SELECT var_pop('1') FROM t
--- !query 312 schema
+-- !query schema
 struct<var_pop(CAST(1 AS DOUBLE)):double>
--- !query 312 output
+-- !query output
 0.0
 
 
--- !query 313
+-- !query
 SELECT var_samp('1') FROM t
--- !query 313 schema
+-- !query schema
 struct<var_samp(CAST(1 AS DOUBLE)):double>
--- !query 313 output
+-- !query output
 NaN
 
 
--- !query 314
+-- !query
 SELECT skewness('1') FROM t
--- !query 314 schema
+-- !query schema
 struct<skewness(CAST(1 AS DOUBLE)):double>
--- !query 314 output
+-- !query output
 NaN
 
 
--- !query 315
+-- !query
 SELECT kurtosis('1') FROM t
--- !query 315 schema
+-- !query schema
 struct<kurtosis(CAST(1 AS DOUBLE)):double>
--- !query 315 output
+-- !query output
 NaN
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
index 850cf9171a2fd..7b419c6702586 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
@@ -2,260 +2,260 @@
 -- Number of queries: 32
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 'aa' as a
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 select cast(a as byte) from t
--- !query 1 schema
+-- !query schema
 struct<a:tinyint>
--- !query 1 output
+-- !query output
 NULL
 
 
--- !query 2
+-- !query
 select cast(a as short) from t
--- !query 2 schema
+-- !query schema
 struct<a:smallint>
--- !query 2 output
+-- !query output
 NULL
 
 
--- !query 3
+-- !query
 select cast(a as int) from t
--- !query 3 schema
+-- !query schema
 struct<a:int>
--- !query 3 output
+-- !query output
 NULL
 
 
--- !query 4
+-- !query
 select cast(a as long) from t
--- !query 4 schema
+-- !query schema
 struct<a:bigint>
--- !query 4 output
+-- !query output
 NULL
 
 
--- !query 5
+-- !query
 select cast(a as float) from t
--- !query 5 schema
+-- !query schema
 struct<a:float>
--- !query 5 output
+-- !query output
 NULL
 
 
--- !query 6
+-- !query
 select cast(a as double) from t
--- !query 6 schema
+-- !query schema
 struct<a:double>
--- !query 6 output
+-- !query output
 NULL
 
 
--- !query 7
+-- !query
 select cast(a as decimal) from t
--- !query 7 schema
+-- !query schema
 struct<a:decimal(10,0)>
--- !query 7 output
+-- !query output
 NULL
 
 
--- !query 8
+-- !query
 select cast(a as boolean) from t
--- !query 8 schema
+-- !query schema
 struct<a:boolean>
--- !query 8 output
+-- !query output
 NULL
 
 
--- !query 9
+-- !query
 select cast(a as timestamp) from t
--- !query 9 schema
+-- !query schema
 struct<a:timestamp>
--- !query 9 output
+-- !query output
 NULL
 
 
--- !query 10
+-- !query
 select cast(a as date) from t
--- !query 10 schema
+-- !query schema
 struct<a:date>
--- !query 10 output
+-- !query output
 NULL
 
 
--- !query 11
+-- !query
 select cast(a as binary) from t
--- !query 11 schema
+-- !query schema
 struct<a:binary>
--- !query 11 output
+-- !query output
 aa
 
 
--- !query 12
+-- !query
 select cast(a as array<string>) from t
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 't.`a`' due to data type mismatch: cannot cast string to array<string>; line 1 pos 7
 
 
--- !query 13
+-- !query
 select cast(a as struct<s:string>) from t
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 't.`a`' due to data type mismatch: cannot cast string to struct<s:string>; line 1 pos 7
 
 
--- !query 14
+-- !query
 select cast(a as map<string, string>) from t
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 't.`a`' due to data type mismatch: cannot cast string to map<string,string>; line 1 pos 7
 
 
--- !query 15
+-- !query
 select to_timestamp(a) from t
--- !query 15 schema
+-- !query schema
 struct<to_timestamp(t.`a`):timestamp>
--- !query 15 output
+-- !query output
 NULL
 
 
--- !query 16
+-- !query
 select to_timestamp('2018-01-01', a) from t
--- !query 16 schema
+-- !query schema
 struct<to_timestamp('2018-01-01', t.`a`):timestamp>
--- !query 16 output
+-- !query output
 NULL
 
 
--- !query 17
+-- !query
 select to_unix_timestamp(a) from t
--- !query 17 schema
+-- !query schema
 struct<to_unix_timestamp(a, uuuu-MM-dd HH:mm:ss):bigint>
--- !query 17 output
+-- !query output
 NULL
 
 
--- !query 18
+-- !query
 select to_unix_timestamp('2018-01-01', a) from t
--- !query 18 schema
+-- !query schema
 struct<to_unix_timestamp(2018-01-01, a):bigint>
--- !query 18 output
+-- !query output
 NULL
 
 
--- !query 19
+-- !query
 select unix_timestamp(a) from t
--- !query 19 schema
+-- !query schema
 struct<unix_timestamp(a, uuuu-MM-dd HH:mm:ss):bigint>
--- !query 19 output
+-- !query output
 NULL
 
 
--- !query 20
+-- !query
 select unix_timestamp('2018-01-01', a) from t
--- !query 20 schema
+-- !query schema
 struct<unix_timestamp(2018-01-01, a):bigint>
--- !query 20 output
+-- !query output
 NULL
 
 
--- !query 21
+-- !query
 select from_unixtime(a) from t
--- !query 21 schema
+-- !query schema
 struct<from_unixtime(CAST(a AS BIGINT), uuuu-MM-dd HH:mm:ss):string>
--- !query 21 output
+-- !query output
 NULL
 
 
--- !query 22
+-- !query
 select from_unixtime('2018-01-01', a) from t
--- !query 22 schema
+-- !query schema
 struct<from_unixtime(CAST(2018-01-01 AS BIGINT), a):string>
--- !query 22 output
+-- !query output
 NULL
 
 
--- !query 23
+-- !query
 select next_day(a, 'MO') from t
--- !query 23 schema
+-- !query schema
 struct<next_day(CAST(a AS DATE), MO):date>
--- !query 23 output
+-- !query output
 NULL
 
 
--- !query 24
+-- !query
 select next_day('2018-01-01', a) from t
--- !query 24 schema
+-- !query schema
 struct<next_day(CAST(2018-01-01 AS DATE), a):date>
--- !query 24 output
+-- !query output
 NULL
 
 
--- !query 25
+-- !query
 select trunc(a, 'MM') from t
--- !query 25 schema
+-- !query schema
 struct<trunc(CAST(a AS DATE), MM):date>
--- !query 25 output
+-- !query output
 NULL
 
 
--- !query 26
+-- !query
 select trunc('2018-01-01', a) from t
--- !query 26 schema
+-- !query schema
 struct<trunc(CAST(2018-01-01 AS DATE), a):date>
--- !query 26 output
+-- !query output
 NULL
 
 
--- !query 27
+-- !query
 select unhex('-123')
--- !query 27 schema
+-- !query schema
 struct<unhex(-123):binary>
--- !query 27 output
+-- !query output
 NULL
 
 
--- !query 28
+-- !query
 select sha2(a, a) from t
--- !query 28 schema
+-- !query schema
 struct<sha2(CAST(a AS BINARY), CAST(a AS INT)):string>
--- !query 28 output
+-- !query output
 NULL
 
 
--- !query 29
+-- !query
 select get_json_object(a, a) from t
--- !query 29 schema
+-- !query schema
 struct<get_json_object(a, a):string>
--- !query 29 output
+-- !query output
 NULL
 
 
--- !query 30
+-- !query
 select json_tuple(a, a) from t
--- !query 30 schema
+-- !query schema
 struct<c0:string>
--- !query 30 output
+-- !query output
 NULL
 
 
--- !query 31
+-- !query
 select from_json(a, 'a INT') from t
--- !query 31 schema
+-- !query schema
 struct<from_json(a):struct<a:int>>
--- !query 31 output
+-- !query output
 {"a":null}
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
index 20a9e47217238..89b1cdb3e353d 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
@@ -2,1304 +2,1304 @@
 -- Number of queries: 145
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 1 schema
+-- !query schema
 struct<CAST(1 AS TINYINT):tinyint>
--- !query 1 output
+-- !query output
 1
 2
 
 
--- !query 2
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 2 schema
+-- !query schema
 struct<CAST(1 AS TINYINT):smallint>
--- !query 2 output
+-- !query output
 1
 2
 
 
--- !query 3
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 3 schema
+-- !query schema
 struct<CAST(1 AS TINYINT):int>
--- !query 3 output
+-- !query output
 1
 2
 
 
--- !query 4
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 4 schema
+-- !query schema
 struct<CAST(1 AS TINYINT):bigint>
--- !query 4 output
+-- !query output
 1
 2
 
 
--- !query 5
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 5 schema
+-- !query schema
 struct<CAST(1 AS TINYINT):float>
--- !query 5 output
+-- !query output
 1.0
 2.0
 
 
--- !query 6
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 6 schema
+-- !query schema
 struct<CAST(1 AS TINYINT):double>
--- !query 6 output
+-- !query output
 1.0
 2.0
 
 
--- !query 7
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 7 schema
+-- !query schema
 struct<CAST(1 AS TINYINT):decimal(10,0)>
--- !query 7 output
+-- !query output
 1
 2
 
 
--- !query 8
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 8 schema
+-- !query schema
 struct<CAST(1 AS TINYINT):string>
--- !query 8 output
+-- !query output
 1
 2
 
 
--- !query 9
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> tinyint at the first column of the second table;
 
 
--- !query 10
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> tinyint at the first column of the second table;
 
 
--- !query 11
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. timestamp <> tinyint at the first column of the second table;
 
 
--- !query 12
+-- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. date <> tinyint at the first column of the second table;
 
 
--- !query 13
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 13 schema
+-- !query schema
 struct<CAST(1 AS SMALLINT):smallint>
--- !query 13 output
+-- !query output
 1
 2
 
 
--- !query 14
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 14 schema
+-- !query schema
 struct<CAST(1 AS SMALLINT):smallint>
--- !query 14 output
+-- !query output
 1
 2
 
 
--- !query 15
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 15 schema
+-- !query schema
 struct<CAST(1 AS SMALLINT):int>
--- !query 15 output
+-- !query output
 1
 2
 
 
--- !query 16
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 16 schema
+-- !query schema
 struct<CAST(1 AS SMALLINT):bigint>
--- !query 16 output
+-- !query output
 1
 2
 
 
--- !query 17
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 17 schema
+-- !query schema
 struct<CAST(1 AS SMALLINT):float>
--- !query 17 output
+-- !query output
 1.0
 2.0
 
 
--- !query 18
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 18 schema
+-- !query schema
 struct<CAST(1 AS SMALLINT):double>
--- !query 18 output
+-- !query output
 1.0
 2.0
 
 
--- !query 19
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 19 schema
+-- !query schema
 struct<CAST(1 AS SMALLINT):decimal(10,0)>
--- !query 19 output
+-- !query output
 1
 2
 
 
--- !query 20
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 20 schema
+-- !query schema
 struct<CAST(1 AS SMALLINT):string>
--- !query 20 output
+-- !query output
 1
 2
 
 
--- !query 21
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> smallint at the first column of the second table;
 
 
--- !query 22
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> smallint at the first column of the second table;
 
 
--- !query 23
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. timestamp <> smallint at the first column of the second table;
 
 
--- !query 24
+-- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. date <> smallint at the first column of the second table;
 
 
--- !query 25
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 25 schema
+-- !query schema
 struct<CAST(1 AS INT):int>
--- !query 25 output
+-- !query output
 1
 2
 
 
--- !query 26
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 26 schema
+-- !query schema
 struct<CAST(1 AS INT):int>
--- !query 26 output
+-- !query output
 1
 2
 
 
--- !query 27
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 27 schema
+-- !query schema
 struct<CAST(1 AS INT):int>
--- !query 27 output
+-- !query output
 1
 2
 
 
--- !query 28
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 28 schema
+-- !query schema
 struct<CAST(1 AS INT):bigint>
--- !query 28 output
+-- !query output
 1
 2
 
 
--- !query 29
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 29 schema
+-- !query schema
 struct<CAST(1 AS INT):float>
--- !query 29 output
+-- !query output
 1.0
 2.0
 
 
--- !query 30
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 30 schema
+-- !query schema
 struct<CAST(1 AS INT):double>
--- !query 30 output
+-- !query output
 1.0
 2.0
 
 
--- !query 31
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 31 schema
+-- !query schema
 struct<CAST(1 AS INT):decimal(10,0)>
--- !query 31 output
+-- !query output
 1
 2
 
 
--- !query 32
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 32 schema
+-- !query schema
 struct<CAST(1 AS INT):string>
--- !query 32 output
+-- !query output
 1
 2
 
 
--- !query 33
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> int at the first column of the second table;
 
 
--- !query 34
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> int at the first column of the second table;
 
 
--- !query 35
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 35 schema
+-- !query schema
 struct<>
--- !query 35 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. timestamp <> int at the first column of the second table;
 
 
--- !query 36
+-- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. date <> int at the first column of the second table;
 
 
--- !query 37
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 37 schema
+-- !query schema
 struct<CAST(1 AS BIGINT):bigint>
--- !query 37 output
+-- !query output
 1
 2
 
 
--- !query 38
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 38 schema
+-- !query schema
 struct<CAST(1 AS BIGINT):bigint>
--- !query 38 output
+-- !query output
 1
 2
 
 
--- !query 39
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 39 schema
+-- !query schema
 struct<CAST(1 AS BIGINT):bigint>
--- !query 39 output
+-- !query output
 1
 2
 
 
--- !query 40
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 40 schema
+-- !query schema
 struct<CAST(1 AS BIGINT):bigint>
--- !query 40 output
+-- !query output
 1
 2
 
 
--- !query 41
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 41 schema
+-- !query schema
 struct<CAST(1 AS BIGINT):float>
--- !query 41 output
+-- !query output
 1.0
 2.0
 
 
--- !query 42
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 42 schema
+-- !query schema
 struct<CAST(1 AS BIGINT):double>
--- !query 42 output
+-- !query output
 1.0
 2.0
 
 
--- !query 43
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 43 schema
+-- !query schema
 struct<CAST(1 AS BIGINT):decimal(20,0)>
--- !query 43 output
+-- !query output
 1
 2
 
 
--- !query 44
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 44 schema
+-- !query schema
 struct<CAST(1 AS BIGINT):string>
--- !query 44 output
+-- !query output
 1
 2
 
 
--- !query 45
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 45 schema
+-- !query schema
 struct<>
--- !query 45 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> bigint at the first column of the second table;
 
 
--- !query 46
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 46 schema
+-- !query schema
 struct<>
--- !query 46 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> bigint at the first column of the second table;
 
 
--- !query 47
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 47 schema
+-- !query schema
 struct<>
--- !query 47 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. timestamp <> bigint at the first column of the second table;
 
 
--- !query 48
+-- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 48 schema
+-- !query schema
 struct<>
--- !query 48 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. date <> bigint at the first column of the second table;
 
 
--- !query 49
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 49 schema
+-- !query schema
 struct<CAST(1 AS FLOAT):float>
--- !query 49 output
+-- !query output
 1.0
 2.0
 
 
--- !query 50
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 50 schema
+-- !query schema
 struct<CAST(1 AS FLOAT):float>
--- !query 50 output
+-- !query output
 1.0
 2.0
 
 
--- !query 51
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 51 schema
+-- !query schema
 struct<CAST(1 AS FLOAT):float>
--- !query 51 output
+-- !query output
 1.0
 2.0
 
 
--- !query 52
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 52 schema
+-- !query schema
 struct<CAST(1 AS FLOAT):float>
--- !query 52 output
+-- !query output
 1.0
 2.0
 
 
--- !query 53
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 53 schema
+-- !query schema
 struct<CAST(1 AS FLOAT):float>
--- !query 53 output
+-- !query output
 1.0
 2.0
 
 
--- !query 54
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 54 schema
+-- !query schema
 struct<CAST(1 AS FLOAT):double>
--- !query 54 output
+-- !query output
 1.0
 2.0
 
 
--- !query 55
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 55 schema
+-- !query schema
 struct<CAST(1 AS FLOAT):double>
--- !query 55 output
+-- !query output
 1.0
 2.0
 
 
--- !query 56
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 56 schema
+-- !query schema
 struct<CAST(1 AS FLOAT):string>
--- !query 56 output
+-- !query output
 1.0
 2
 
 
--- !query 57
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 57 schema
+-- !query schema
 struct<>
--- !query 57 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> float at the first column of the second table;
 
 
--- !query 58
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 58 schema
+-- !query schema
 struct<>
--- !query 58 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> float at the first column of the second table;
 
 
--- !query 59
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 59 schema
+-- !query schema
 struct<>
--- !query 59 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. timestamp <> float at the first column of the second table;
 
 
--- !query 60
+-- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 60 schema
+-- !query schema
 struct<>
--- !query 60 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. date <> float at the first column of the second table;
 
 
--- !query 61
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 61 schema
+-- !query schema
 struct<CAST(1 AS DOUBLE):double>
--- !query 61 output
+-- !query output
 1.0
 2.0
 
 
--- !query 62
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 62 schema
+-- !query schema
 struct<CAST(1 AS DOUBLE):double>
--- !query 62 output
+-- !query output
 1.0
 2.0
 
 
--- !query 63
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 63 schema
+-- !query schema
 struct<CAST(1 AS DOUBLE):double>
--- !query 63 output
+-- !query output
 1.0
 2.0
 
 
--- !query 64
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 64 schema
+-- !query schema
 struct<CAST(1 AS DOUBLE):double>
--- !query 64 output
+-- !query output
 1.0
 2.0
 
 
--- !query 65
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 65 schema
+-- !query schema
 struct<CAST(1 AS DOUBLE):double>
--- !query 65 output
+-- !query output
 1.0
 2.0
 
 
--- !query 66
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 66 schema
+-- !query schema
 struct<CAST(1 AS DOUBLE):double>
--- !query 66 output
+-- !query output
 1.0
 2.0
 
 
--- !query 67
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 67 schema
+-- !query schema
 struct<CAST(1 AS DOUBLE):double>
--- !query 67 output
+-- !query output
 1.0
 2.0
 
 
--- !query 68
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 68 schema
+-- !query schema
 struct<CAST(1 AS DOUBLE):string>
--- !query 68 output
+-- !query output
 1.0
 2
 
 
--- !query 69
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 69 schema
+-- !query schema
 struct<>
--- !query 69 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> double at the first column of the second table;
 
 
--- !query 70
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 70 schema
+-- !query schema
 struct<>
--- !query 70 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> double at the first column of the second table;
 
 
--- !query 71
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 71 schema
+-- !query schema
 struct<>
--- !query 71 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. timestamp <> double at the first column of the second table;
 
 
--- !query 72
+-- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 72 schema
+-- !query schema
 struct<>
--- !query 72 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. date <> double at the first column of the second table;
 
 
--- !query 73
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 73 schema
+-- !query schema
 struct<CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
--- !query 73 output
+-- !query output
 1
 2
 
 
--- !query 74
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 74 schema
+-- !query schema
 struct<CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
--- !query 74 output
+-- !query output
 1
 2
 
 
--- !query 75
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 75 schema
+-- !query schema
 struct<CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
--- !query 75 output
+-- !query output
 1
 2
 
 
--- !query 76
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 76 schema
+-- !query schema
 struct<CAST(1 AS DECIMAL(10,0)):decimal(20,0)>
--- !query 76 output
+-- !query output
 1
 2
 
 
--- !query 77
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 77 schema
+-- !query schema
 struct<CAST(1 AS DECIMAL(10,0)):double>
--- !query 77 output
+-- !query output
 1.0
 2.0
 
 
--- !query 78
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 78 schema
+-- !query schema
 struct<CAST(1 AS DECIMAL(10,0)):double>
--- !query 78 output
+-- !query output
 1.0
 2.0
 
 
--- !query 79
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 79 schema
+-- !query schema
 struct<CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
--- !query 79 output
+-- !query output
 1
 2
 
 
--- !query 80
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 80 schema
+-- !query schema
 struct<CAST(1 AS DECIMAL(10,0)):string>
--- !query 80 output
+-- !query output
 1
 2
 
 
--- !query 81
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 81 schema
+-- !query schema
 struct<>
--- !query 81 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> decimal(10,0) at the first column of the second table;
 
 
--- !query 82
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 82 schema
+-- !query schema
 struct<>
--- !query 82 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> decimal(10,0) at the first column of the second table;
 
 
--- !query 83
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 83 schema
+-- !query schema
 struct<>
--- !query 83 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. timestamp <> decimal(10,0) at the first column of the second table;
 
 
--- !query 84
+-- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 84 schema
+-- !query schema
 struct<>
--- !query 84 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. date <> decimal(10,0) at the first column of the second table;
 
 
--- !query 85
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 85 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 85 output
+-- !query output
 1
 2
 
 
--- !query 86
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 86 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 86 output
+-- !query output
 1
 2
 
 
--- !query 87
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 87 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 87 output
+-- !query output
 1
 2
 
 
--- !query 88
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 88 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 88 output
+-- !query output
 1
 2
 
 
--- !query 89
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 89 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 89 output
+-- !query output
 1
 2.0
 
 
--- !query 90
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 90 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 90 output
+-- !query output
 1
 2.0
 
 
--- !query 91
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 91 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 91 output
+-- !query output
 1
 2
 
 
--- !query 92
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 92 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 92 output
+-- !query output
 1
 2
 
 
--- !query 93
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 93 schema
+-- !query schema
 struct<>
--- !query 93 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> string at the first column of the second table;
 
 
--- !query 94
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 94 schema
+-- !query schema
 struct<>
--- !query 94 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> string at the first column of the second table;
 
 
--- !query 95
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 95 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 95 output
+-- !query output
 1
 2017-12-11 09:30:00
 
 
--- !query 96
+-- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 96 schema
+-- !query schema
 struct<CAST(1 AS STRING):string>
--- !query 96 output
+-- !query output
 1
 2017-12-11
 
 
--- !query 97
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 97 schema
+-- !query schema
 struct<>
--- !query 97 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. tinyint <> binary at the first column of the second table;
 
 
--- !query 98
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 98 schema
+-- !query schema
 struct<>
--- !query 98 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. smallint <> binary at the first column of the second table;
 
 
--- !query 99
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 99 schema
+-- !query schema
 struct<>
--- !query 99 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. int <> binary at the first column of the second table;
 
 
--- !query 100
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 100 schema
+-- !query schema
 struct<>
--- !query 100 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. bigint <> binary at the first column of the second table;
 
 
--- !query 101
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 101 schema
+-- !query schema
 struct<>
--- !query 101 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. float <> binary at the first column of the second table;
 
 
--- !query 102
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 102 schema
+-- !query schema
 struct<>
--- !query 102 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. double <> binary at the first column of the second table;
 
 
--- !query 103
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 103 schema
+-- !query schema
 struct<>
--- !query 103 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. decimal(10,0) <> binary at the first column of the second table;
 
 
--- !query 104
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 104 schema
+-- !query schema
 struct<>
--- !query 104 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. string <> binary at the first column of the second table;
 
 
--- !query 105
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 105 schema
+-- !query schema
 struct<CAST(1 AS BINARY):binary>
--- !query 105 output
+-- !query output
 1
 2
 
 
--- !query 106
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 106 schema
+-- !query schema
 struct<>
--- !query 106 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> binary at the first column of the second table;
 
 
--- !query 107
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 107 schema
+-- !query schema
 struct<>
--- !query 107 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. timestamp <> binary at the first column of the second table;
 
 
--- !query 108
+-- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 108 schema
+-- !query schema
 struct<>
--- !query 108 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. date <> binary at the first column of the second table;
 
 
--- !query 109
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 109 schema
+-- !query schema
 struct<>
--- !query 109 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. tinyint <> boolean at the first column of the second table;
 
 
--- !query 110
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 110 schema
+-- !query schema
 struct<>
--- !query 110 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. smallint <> boolean at the first column of the second table;
 
 
--- !query 111
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 111 schema
+-- !query schema
 struct<>
--- !query 111 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. int <> boolean at the first column of the second table;
 
 
--- !query 112
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 112 schema
+-- !query schema
 struct<>
--- !query 112 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. bigint <> boolean at the first column of the second table;
 
 
--- !query 113
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 113 schema
+-- !query schema
 struct<>
--- !query 113 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. float <> boolean at the first column of the second table;
 
 
--- !query 114
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 114 schema
+-- !query schema
 struct<>
--- !query 114 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. double <> boolean at the first column of the second table;
 
 
--- !query 115
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 115 schema
+-- !query schema
 struct<>
--- !query 115 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. decimal(10,0) <> boolean at the first column of the second table;
 
 
--- !query 116
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 116 schema
+-- !query schema
 struct<>
--- !query 116 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. string <> boolean at the first column of the second table;
 
 
--- !query 117
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 117 schema
+-- !query schema
 struct<>
--- !query 117 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> boolean at the first column of the second table;
 
 
--- !query 118
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 118 schema
+-- !query schema
 struct<CAST(1 AS BOOLEAN):boolean>
--- !query 118 output
+-- !query output
 true
 
 
--- !query 119
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 119 schema
+-- !query schema
 struct<>
--- !query 119 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. timestamp <> boolean at the first column of the second table;
 
 
--- !query 120
+-- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 120 schema
+-- !query schema
 struct<>
--- !query 120 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. date <> boolean at the first column of the second table;
 
 
--- !query 121
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 121 schema
+-- !query schema
 struct<>
--- !query 121 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. tinyint <> timestamp at the first column of the second table;
 
 
--- !query 122
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 122 schema
+-- !query schema
 struct<>
--- !query 122 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. smallint <> timestamp at the first column of the second table;
 
 
--- !query 123
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 123 schema
+-- !query schema
 struct<>
--- !query 123 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. int <> timestamp at the first column of the second table;
 
 
--- !query 124
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 124 schema
+-- !query schema
 struct<>
--- !query 124 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. bigint <> timestamp at the first column of the second table;
 
 
--- !query 125
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 125 schema
+-- !query schema
 struct<>
--- !query 125 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. float <> timestamp at the first column of the second table;
 
 
--- !query 126
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 126 schema
+-- !query schema
 struct<>
--- !query 126 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. double <> timestamp at the first column of the second table;
 
 
--- !query 127
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 127 schema
+-- !query schema
 struct<>
--- !query 127 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. decimal(10,0) <> timestamp at the first column of the second table;
 
 
--- !query 128
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 128 schema
+-- !query schema
 struct<CAST(2017-12-12 09:30:00.0 AS TIMESTAMP):string>
--- !query 128 output
+-- !query output
 2
 2017-12-12 09:30:00
 
 
--- !query 129
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 129 schema
+-- !query schema
 struct<>
--- !query 129 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> timestamp at the first column of the second table;
 
 
--- !query 130
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 130 schema
+-- !query schema
 struct<>
--- !query 130 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> timestamp at the first column of the second table;
 
 
--- !query 131
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 131 schema
+-- !query schema
 struct<CAST(2017-12-12 09:30:00.0 AS TIMESTAMP):timestamp>
--- !query 131 output
+-- !query output
 2017-12-11 09:30:00
 2017-12-12 09:30:00
 
 
--- !query 132
+-- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 132 schema
+-- !query schema
 struct<CAST(2017-12-12 09:30:00.0 AS TIMESTAMP):timestamp>
--- !query 132 output
+-- !query output
 2017-12-11 00:00:00
 2017-12-12 09:30:00
 
 
--- !query 133
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as tinyint) FROM t
--- !query 133 schema
+-- !query schema
 struct<>
--- !query 133 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. tinyint <> date at the first column of the second table;
 
 
--- !query 134
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as smallint) FROM t
--- !query 134 schema
+-- !query schema
 struct<>
--- !query 134 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. smallint <> date at the first column of the second table;
 
 
--- !query 135
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as int) FROM t
--- !query 135 schema
+-- !query schema
 struct<>
--- !query 135 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. int <> date at the first column of the second table;
 
 
--- !query 136
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as bigint) FROM t
--- !query 136 schema
+-- !query schema
 struct<>
--- !query 136 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. bigint <> date at the first column of the second table;
 
 
--- !query 137
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as float) FROM t
--- !query 137 schema
+-- !query schema
 struct<>
--- !query 137 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. float <> date at the first column of the second table;
 
 
--- !query 138
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as double) FROM t
--- !query 138 schema
+-- !query schema
 struct<>
--- !query 138 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. double <> date at the first column of the second table;
 
 
--- !query 139
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
--- !query 139 schema
+-- !query schema
 struct<>
--- !query 139 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. decimal(10,0) <> date at the first column of the second table;
 
 
--- !query 140
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as string) FROM t
--- !query 140 schema
+-- !query schema
 struct<CAST(2017-12-12 09:30:00 AS DATE):string>
--- !query 140 output
+-- !query output
 2
 2017-12-12
 
 
--- !query 141
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2' as binary) FROM t
--- !query 141 schema
+-- !query schema
 struct<>
--- !query 141 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. binary <> date at the first column of the second table;
 
 
--- !query 142
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as boolean) FROM t
--- !query 142 schema
+-- !query schema
 struct<>
--- !query 142 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Union can only be performed on tables with the compatible column types. boolean <> date at the first column of the second table;
 
 
--- !query 143
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query 143 schema
+-- !query schema
 struct<CAST(2017-12-12 09:30:00 AS DATE):timestamp>
--- !query 143 output
+-- !query output
 2017-12-11 09:30:00
 2017-12-12 00:00:00
 
 
--- !query 144
+-- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query 144 schema
+-- !query schema
 struct<CAST(2017-12-12 09:30:00 AS DATE):date>
--- !query 144 output
+-- !query output
 2017-12-11
 2017-12-12
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
index 5b77bf9f35f25..12af1b7d034da 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
@@ -2,205 +2,205 @@
 -- Number of queries: 25
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as tinyint)) FROM t
--- !query 1 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS TINYINT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 1 output
+-- !query output
 1
 
 
--- !query 2
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as smallint)) FROM t
--- !query 2 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS SMALLINT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 2 output
+-- !query output
 1
 
 
--- !query 3
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as int)) FROM t
--- !query 3 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS INT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 3 output
+-- !query output
 1
 
 
--- !query 4
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as bigint)) FROM t
--- !query 4 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS BIGINT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 4 output
+-- !query output
 1
 
 
--- !query 5
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as float)) FROM t
--- !query 5 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS FLOAT) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 5 output
+-- !query output
 1
 
 
--- !query 6
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as double)) FROM t
--- !query 6 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS DOUBLE) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 6 output
+-- !query output
 1
 
 
--- !query 7
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as decimal(10, 0))) FROM t
--- !query 7 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS DECIMAL(10,0)) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 7 output
+-- !query output
 1
 
 
--- !query 8
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as string)) FROM t
--- !query 8 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS STRING) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 8 output
+-- !query output
 1
 
 
--- !query 9
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary)) FROM t
--- !query 9 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS BINARY) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 9 output
+-- !query output
 1
 
 
--- !query 10
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as boolean)) FROM t
--- !query 10 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS BOOLEAN) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 10 output
+-- !query output
 1
 
 
--- !query 11
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query 11 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 11 output
+-- !query output
 1
 
 
--- !query 12
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00' as date)) FROM t
--- !query 12 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(2017-12-11 09:30:00 AS DATE) ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 12 output
+-- !query output
 1
 
 
--- !query 13
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as tinyint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 13 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS TINYINT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS TINYINT) FOLLOWING):bigint>
--- !query 13 output
+-- !query output
 1
 
 
--- !query 14
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as smallint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 14 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS SMALLINT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS SMALLINT) FOLLOWING):bigint>
--- !query 14 output
+-- !query output
 1
 
 
--- !query 15
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as int) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 15 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS INT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 15 output
+-- !query output
 1
 
 
--- !query 16
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as bigint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 16 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS BIGINT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS BIGINT) FOLLOWING):bigint>
--- !query 16 output
+-- !query output
 1
 
 
--- !query 17
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as float) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 17 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS FLOAT) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS FLOAT) FOLLOWING):bigint>
--- !query 17 output
+-- !query output
 1
 
 
--- !query 18
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as double) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 18 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS DOUBLE) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS DOUBLE) FOLLOWING):bigint>
--- !query 18 output
+-- !query output
 1
 
 
--- !query 19
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as decimal(10, 0)) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 19 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(1 AS DECIMAL(10,0)) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND CAST(1 AS DECIMAL(10,0)) FOLLOWING):bigint>
--- !query 19 output
+-- !query output
 1
 
 
--- !query 20
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as string) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'RANGE BETWEEN CURRENT ROW AND CAST(1 AS STRING) FOLLOWING' due to data type mismatch: The data type of the upper bound 'string' does not match the expected data type '(numeric or interval)'.; line 1 pos 21
 
 
--- !query 21
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'RANGE BETWEEN CURRENT ROW AND CAST(1 AS BINARY) FOLLOWING' due to data type mismatch: The data type of the upper bound 'binary' does not match the expected data type '(numeric or interval)'.; line 1 pos 21
 
 
--- !query 22
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as boolean) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'RANGE BETWEEN CURRENT ROW AND CAST(1 AS BOOLEAN) FOLLOWING' due to data type mismatch: The data type of the upper bound 'boolean' does not match the expected data type '(numeric or interval)'.; line 1 pos 21
 
 
--- !query 23
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00.0' as timestamp) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(PARTITION BY 1 ORDER BY CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'timestamp' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 21
 
 
--- !query 24
+-- !query
 SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00' as date) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t
--- !query 24 schema
+-- !query schema
 struct<count(1) OVER (PARTITION BY 1 ORDER BY CAST(2017-12-11 09:30:00 AS DATE) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 24 output
+-- !query output
 1
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out
index f4455bb717578..9f4229a11b65d 100644
--- a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out
@@ -2,69 +2,69 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
 (1), (2), (3), (4)
 as t1(int_col1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE FUNCTION myDoubleAvg AS 'test.org.apache.spark.sql.MyDoubleAvg'
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT default.myDoubleAvg(int_col1) as my_avg from t1
--- !query 2 schema
+-- !query schema
 struct<my_avg:double>
--- !query 2 output
+-- !query output
 102.5
 
 
--- !query 3
+-- !query
 SELECT default.myDoubleAvg(int_col1, 3) as my_avg from t1
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid number of arguments for function default.myDoubleAvg. Expected: 1; Found: 2; line 1 pos 7
 
 
--- !query 4
+-- !query
 CREATE FUNCTION udaf1 AS 'test.non.existent.udaf'
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT default.udaf1(int_col1) as udaf1 from t1
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 7
 
 
--- !query 6
+-- !query
 DROP FUNCTION myDoubleAvg
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 DROP FUNCTION udaf1
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part2.sql.out
deleted file mode 100644
index ad2f1bdf77d7a..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part2.sql.out
+++ /dev/null
@@ -1,156 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
-
-
--- !query 0
-create temporary view int4_tbl as select * from values
-  (0),
-  (123456),
-  (-123456),
-  (2147483647),
-  (-2147483647)
-  as int4_tbl(f1)
--- !query 0 schema
-struct<>
--- !query 0 output
-
-
-
--- !query 1
-SELECT
-  (NULL AND NULL) IS NULL AS `t`,
-  (TRUE AND NULL) IS NULL AS `t`,
-  (FALSE AND NULL) IS NULL AS `t`,
-  (NULL AND TRUE) IS NULL AS `t`,
-  (NULL AND FALSE) IS NULL AS `t`,
-  (TRUE AND TRUE) AS `t`,
-  NOT (TRUE AND FALSE) AS `t`,
-  NOT (FALSE AND TRUE) AS `t`,
-  NOT (FALSE AND FALSE) AS `t`
--- !query 1 schema
-struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
--- !query 1 output
-true	true	false	true	false	true	true	true	true
-
-
--- !query 2
-SELECT
-  (NULL OR NULL) IS NULL AS `t`,
-  (TRUE OR NULL) IS NULL AS `t`,
-  (FALSE OR NULL) IS NULL AS `t`,
-  (NULL OR TRUE) IS NULL AS `t`,
-  (NULL OR FALSE) IS NULL AS `t`,
-  (TRUE OR TRUE) AS `t`,
-  (TRUE OR FALSE) AS `t`,
-  (FALSE OR TRUE) AS `t`,
-  NOT (FALSE OR FALSE) AS `t`
--- !query 2 schema
-struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
--- !query 2 output
-true	false	true	false	true	true	true	true	true
-
-
--- !query 3
-select min(udf(unique1)) from tenk1
--- !query 3 schema
-struct<min(CAST(udf(cast(unique1 as string)) AS INT)):int>
--- !query 3 output
-0
-
-
--- !query 4
-select udf(max(unique1)) from tenk1
--- !query 4 schema
-struct<CAST(udf(cast(max(unique1) as string)) AS INT):int>
--- !query 4 output
-9999
-
-
--- !query 5
-select max(unique1) from tenk1 where udf(unique1) < 42
--- !query 5 schema
-struct<max(unique1):int>
--- !query 5 output
-41
-
-
--- !query 6
-select max(unique1) from tenk1 where unique1 > udf(42)
--- !query 6 schema
-struct<max(unique1):int>
--- !query 6 output
-9999
-
-
--- !query 7
-select max(unique1) from tenk1 where udf(unique1) > 42000
--- !query 7 schema
-struct<max(unique1):int>
--- !query 7 output
-NULL
-
-
--- !query 8
-select max(tenthous) from tenk1 where udf(thousand) = 33
--- !query 8 schema
-struct<max(tenthous):int>
--- !query 8 output
-9033
-
-
--- !query 9
-select min(tenthous) from tenk1 where udf(thousand) = 33
--- !query 9 schema
-struct<min(tenthous):int>
--- !query 9 output
-33
-
-
--- !query 10
-select distinct max(udf(unique2)) from tenk1
--- !query 10 schema
-struct<max(CAST(udf(cast(unique2 as string)) AS INT)):int>
--- !query 10 output
-9999
-
-
--- !query 11
-select max(unique2) from tenk1 order by udf(1)
--- !query 11 schema
-struct<max(unique2):int>
--- !query 11 output
-9999
-
-
--- !query 12
-select max(unique2) from tenk1 order by max(udf(unique2))
--- !query 12 schema
-struct<max(unique2):int>
--- !query 12 output
-9999
-
-
--- !query 13
-select udf(max(udf(unique2))) from tenk1 order by udf(max(unique2))+1
--- !query 13 schema
-struct<CAST(udf(cast(max(cast(udf(cast(unique2 as string)) as int)) as string)) AS INT):int>
--- !query 13 output
-9999
-
-
--- !query 14
-select t1.max_unique2, udf(g) from (select max(udf(unique2)) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc
--- !query 14 schema
-struct<max_unique2:int,CAST(udf(cast(g as string)) AS INT):int>
--- !query 14 output
-9999	3
-9999	2
-9999	1
-
-
--- !query 15
-select udf(max(100)) from tenk1
--- !query 15 schema
-struct<CAST(udf(cast(max(100) as string)) AS INT):int>
--- !query 15 output
-100
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
similarity index 77%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
index a2f64717d73a1..d65c56774eafd 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
@@ -2,321 +2,321 @@
 -- Number of queries: 43
 
 
--- !query 0
+-- !query
 SELECT avg(udf(four)) AS avg_1 FROM onek
--- !query 0 schema
+-- !query schema
 struct<avg_1:double>
--- !query 0 output
+-- !query output
 1.5
 
 
--- !query 1
+-- !query
 SELECT udf(avg(a)) AS avg_32 FROM aggtest WHERE a < 100
--- !query 1 schema
+-- !query schema
 struct<avg_32:double>
--- !query 1 output
+-- !query output
 32.666666666666664
 
 
--- !query 2
+-- !query
 select CAST(avg(udf(b)) AS Decimal(10,3)) AS avg_107_943 FROM aggtest
--- !query 2 schema
+-- !query schema
 struct<avg_107_943:decimal(10,3)>
--- !query 2 output
+-- !query output
 107.943
 
 
--- !query 3
+-- !query
 SELECT sum(udf(four)) AS sum_1500 FROM onek
--- !query 3 schema
+-- !query schema
 struct<sum_1500:bigint>
--- !query 3 output
+-- !query output
 1500
 
 
--- !query 4
+-- !query
 SELECT udf(sum(a)) AS sum_198 FROM aggtest
--- !query 4 schema
+-- !query schema
 struct<sum_198:bigint>
--- !query 4 output
+-- !query output
 198
 
 
--- !query 5
+-- !query
 SELECT udf(udf(sum(b))) AS avg_431_773 FROM aggtest
--- !query 5 schema
+-- !query schema
 struct<avg_431_773:double>
--- !query 5 output
+-- !query output
 431.77260909229517
 
 
--- !query 6
+-- !query
 SELECT udf(max(four)) AS max_3 FROM onek
--- !query 6 schema
+-- !query schema
 struct<max_3:int>
--- !query 6 output
+-- !query output
 3
 
 
--- !query 7
+-- !query
 SELECT max(udf(a)) AS max_100 FROM aggtest
--- !query 7 schema
+-- !query schema
 struct<max_100:int>
--- !query 7 output
+-- !query output
 100
 
 
--- !query 8
+-- !query
 SELECT udf(udf(max(aggtest.b))) AS max_324_78 FROM aggtest
--- !query 8 schema
+-- !query schema
 struct<max_324_78:float>
--- !query 8 output
+-- !query output
 324.78
 
 
--- !query 9
+-- !query
 SELECT stddev_pop(udf(b)) FROM aggtest
--- !query 9 schema
+-- !query schema
 struct<stddev_pop(CAST(CAST(udf(cast(b as string)) AS FLOAT) AS DOUBLE)):double>
--- !query 9 output
+-- !query output
 131.10703231895047
 
 
--- !query 10
+-- !query
 SELECT udf(stddev_samp(b)) FROM aggtest
--- !query 10 schema
+-- !query schema
 struct<CAST(udf(cast(stddev_samp(cast(b as double)) as string)) AS DOUBLE):double>
--- !query 10 output
+-- !query output
 151.38936080399804
 
 
--- !query 11
+-- !query
 SELECT var_pop(udf(b)) FROM aggtest
--- !query 11 schema
+-- !query schema
 struct<var_pop(CAST(CAST(udf(cast(b as string)) AS FLOAT) AS DOUBLE)):double>
--- !query 11 output
+-- !query output
 17189.053923482323
 
 
--- !query 12
+-- !query
 SELECT udf(var_samp(b)) FROM aggtest
--- !query 12 schema
+-- !query schema
 struct<CAST(udf(cast(var_samp(cast(b as double)) as string)) AS DOUBLE):double>
--- !query 12 output
+-- !query output
 22918.738564643096
 
 
--- !query 13
+-- !query
 SELECT udf(stddev_pop(CAST(b AS Decimal(38,0)))) FROM aggtest
--- !query 13 schema
+-- !query schema
 struct<CAST(udf(cast(stddev_pop(cast(cast(b as decimal(38,0)) as double)) as string)) AS DOUBLE):double>
--- !query 13 output
+-- !query output
 131.18117242958306
 
 
--- !query 14
+-- !query
 SELECT stddev_samp(CAST(udf(b) AS Decimal(38,0))) FROM aggtest
--- !query 14 schema
+-- !query schema
 struct<stddev_samp(CAST(CAST(CAST(udf(cast(b as string)) AS FLOAT) AS DECIMAL(38,0)) AS DOUBLE)):double>
--- !query 14 output
+-- !query output
 151.47497042966097
 
 
--- !query 15
+-- !query
 SELECT udf(var_pop(CAST(b AS Decimal(38,0)))) FROM aggtest
--- !query 15 schema
+-- !query schema
 struct<CAST(udf(cast(var_pop(cast(cast(b as decimal(38,0)) as double)) as string)) AS DOUBLE):double>
--- !query 15 output
+-- !query output
 17208.5
 
 
--- !query 16
+-- !query
 SELECT var_samp(udf(CAST(b AS Decimal(38,0)))) FROM aggtest
--- !query 16 schema
+-- !query schema
 struct<var_samp(CAST(CAST(udf(cast(cast(b as decimal(38,0)) as string)) AS DECIMAL(38,0)) AS DOUBLE)):double>
--- !query 16 output
+-- !query output
 22944.666666666668
 
 
--- !query 17
+-- !query
 SELECT udf(var_pop(1.0)), var_samp(udf(2.0))
--- !query 17 schema
+-- !query schema
 struct<CAST(udf(cast(var_pop(cast(1.0 as double)) as string)) AS DOUBLE):double,var_samp(CAST(CAST(udf(cast(2.0 as string)) AS DECIMAL(2,1)) AS DOUBLE)):double>
--- !query 17 output
+-- !query output
 0.0	NaN
 
 
--- !query 18
+-- !query
 SELECT stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))), stddev_samp(CAST(udf(4.0) AS Decimal(38,0)))
--- !query 18 schema
+-- !query schema
 struct<stddev_pop(CAST(CAST(udf(cast(cast(3.0 as decimal(38,0)) as string)) AS DECIMAL(38,0)) AS DOUBLE)):double,stddev_samp(CAST(CAST(CAST(udf(cast(4.0 as string)) AS DECIMAL(2,1)) AS DECIMAL(38,0)) AS DOUBLE)):double>
--- !query 18 output
+-- !query output
 0.0	NaN
 
 
--- !query 19
+-- !query
 select sum(udf(CAST(null AS int))) from range(1,4)
--- !query 19 schema
+-- !query schema
 struct<sum(CAST(udf(cast(cast(null as int) as string)) AS INT)):bigint>
--- !query 19 output
+-- !query output
 NULL
 
 
--- !query 20
+-- !query
 select sum(udf(CAST(null AS long))) from range(1,4)
--- !query 20 schema
+-- !query schema
 struct<sum(CAST(udf(cast(cast(null as bigint) as string)) AS BIGINT)):bigint>
--- !query 20 output
+-- !query output
 NULL
 
 
--- !query 21
+-- !query
 select sum(udf(CAST(null AS Decimal(38,0)))) from range(1,4)
--- !query 21 schema
+-- !query schema
 struct<sum(CAST(udf(cast(cast(null as decimal(38,0)) as string)) AS DECIMAL(38,0))):decimal(38,0)>
--- !query 21 output
+-- !query output
 NULL
 
 
--- !query 22
+-- !query
 select sum(udf(CAST(null AS DOUBLE))) from range(1,4)
--- !query 22 schema
+-- !query schema
 struct<sum(CAST(udf(cast(cast(null as double) as string)) AS DOUBLE)):double>
--- !query 22 output
+-- !query output
 NULL
 
 
--- !query 23
+-- !query
 select avg(udf(CAST(null AS int))) from range(1,4)
--- !query 23 schema
+-- !query schema
 struct<avg(CAST(udf(cast(cast(null as int) as string)) AS INT)):double>
--- !query 23 output
+-- !query output
 NULL
 
 
--- !query 24
+-- !query
 select avg(udf(CAST(null AS long))) from range(1,4)
--- !query 24 schema
+-- !query schema
 struct<avg(CAST(udf(cast(cast(null as bigint) as string)) AS BIGINT)):double>
--- !query 24 output
+-- !query output
 NULL
 
 
--- !query 25
+-- !query
 select avg(udf(CAST(null AS Decimal(38,0)))) from range(1,4)
--- !query 25 schema
+-- !query schema
 struct<avg(CAST(udf(cast(cast(null as decimal(38,0)) as string)) AS DECIMAL(38,0))):decimal(38,4)>
--- !query 25 output
+-- !query output
 NULL
 
 
--- !query 26
+-- !query
 select avg(udf(CAST(null AS DOUBLE))) from range(1,4)
--- !query 26 schema
+-- !query schema
 struct<avg(CAST(udf(cast(cast(null as double) as string)) AS DOUBLE)):double>
--- !query 26 output
+-- !query output
 NULL
 
 
--- !query 27
+-- !query
 select sum(CAST(udf('NaN') AS DOUBLE)) from range(1,4)
--- !query 27 schema
+-- !query schema
 struct<sum(CAST(CAST(udf(cast(NaN as string)) AS STRING) AS DOUBLE)):double>
--- !query 27 output
+-- !query output
 NaN
 
 
--- !query 28
+-- !query
 select avg(CAST(udf('NaN') AS DOUBLE)) from range(1,4)
--- !query 28 schema
+-- !query schema
 struct<avg(CAST(CAST(udf(cast(NaN as string)) AS STRING) AS DOUBLE)):double>
--- !query 28 output
+-- !query output
 NaN
 
 
--- !query 29
+-- !query
 SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE))
 FROM (VALUES ('Infinity'), ('1')) v(x)
--- !query 29 schema
+-- !query schema
 struct<avg(CAST(CAST(udf(cast(x as string)) AS STRING) AS DOUBLE)):double,var_pop(CAST(CAST(udf(cast(x as string)) AS STRING) AS DOUBLE)):double>
--- !query 29 output
+-- !query output
 Infinity	NaN
 
 
--- !query 30
+-- !query
 SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE))
 FROM (VALUES ('Infinity'), ('Infinity')) v(x)
--- !query 30 schema
+-- !query schema
 struct<avg(CAST(CAST(udf(cast(x as string)) AS STRING) AS DOUBLE)):double,var_pop(CAST(CAST(udf(cast(x as string)) AS STRING) AS DOUBLE)):double>
--- !query 30 output
+-- !query output
 Infinity	NaN
 
 
--- !query 31
+-- !query
 SELECT avg(CAST(udf(x) AS DOUBLE)), var_pop(CAST(udf(x) AS DOUBLE))
 FROM (VALUES ('-Infinity'), ('Infinity')) v(x)
--- !query 31 schema
+-- !query schema
 struct<avg(CAST(CAST(udf(cast(x as string)) AS STRING) AS DOUBLE)):double,var_pop(CAST(CAST(udf(cast(x as string)) AS STRING) AS DOUBLE)):double>
--- !query 31 output
+-- !query output
 NaN	NaN
 
 
--- !query 32
+-- !query
 SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE)))
 FROM (VALUES (100000003), (100000004), (100000006), (100000007)) v(x)
--- !query 32 schema
+-- !query schema
 struct<avg(CAST(udf(cast(cast(x as double) as string)) AS DOUBLE)):double,CAST(udf(cast(var_pop(cast(x as double)) as string)) AS DOUBLE):double>
--- !query 32 output
+-- !query output
 1.00000005E8	2.5
 
 
--- !query 33
+-- !query
 SELECT avg(udf(CAST(x AS DOUBLE))), udf(var_pop(CAST(x AS DOUBLE)))
 FROM (VALUES (7000000000005), (7000000000007)) v(x)
--- !query 33 schema
+-- !query schema
 struct<avg(CAST(udf(cast(cast(x as double) as string)) AS DOUBLE)):double,CAST(udf(cast(var_pop(cast(x as double)) as string)) AS DOUBLE):double>
--- !query 33 output
+-- !query output
 7.000000000006E12	1.0
 
 
--- !query 34
+-- !query
 SELECT udf(covar_pop(b, udf(a))), covar_samp(udf(b), a) FROM aggtest
--- !query 34 schema
+-- !query schema
 struct<CAST(udf(cast(covar_pop(cast(b as double), cast(cast(udf(cast(a as string)) as int) as double)) as string)) AS DOUBLE):double,covar_samp(CAST(CAST(udf(cast(b as string)) AS FLOAT) AS DOUBLE), CAST(a AS DOUBLE)):double>
--- !query 34 output
+-- !query output
 653.6289553875104	871.5052738500139
 
 
--- !query 35
+-- !query
 SELECT corr(b, udf(a)) FROM aggtest
--- !query 35 schema
+-- !query schema
 struct<corr(CAST(b AS DOUBLE), CAST(CAST(udf(cast(a as string)) AS INT) AS DOUBLE)):double>
--- !query 35 output
+-- !query output
 0.1396345165178734
 
 
--- !query 36
+-- !query
 SELECT count(udf(four)) AS cnt_1000 FROM onek
--- !query 36 schema
+-- !query schema
 struct<cnt_1000:bigint>
--- !query 36 output
+-- !query output
 1000
 
 
--- !query 37
+-- !query
 SELECT udf(count(DISTINCT four)) AS cnt_4 FROM onek
--- !query 37 schema
+-- !query schema
 struct<cnt_4:bigint>
--- !query 37 output
+-- !query output
 4
 
 
--- !query 38
+-- !query
 select ten, udf(count(*)), sum(udf(four)) from onek
 group by ten order by ten
--- !query 38 schema
+-- !query schema
 struct<ten:int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint,sum(CAST(udf(cast(four as string)) AS INT)):bigint>
--- !query 38 output
+-- !query output
 0	100	100
 1	100	200
 2	100	100
@@ -329,12 +329,12 @@ struct<ten:int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint,sum(CAST(udf
 9	100	200
 
 
--- !query 39
+-- !query
 select ten, count(udf(four)), udf(sum(DISTINCT four)) from onek
 group by ten order by ten
--- !query 39 schema
+-- !query schema
 struct<ten:int,count(CAST(udf(cast(four as string)) AS INT)):bigint,CAST(udf(cast(sum(distinct cast(four as bigint)) as string)) AS BIGINT):bigint>
--- !query 39 output
+-- !query output
 0	100	2
 1	100	4
 2	100	2
@@ -347,13 +347,13 @@ struct<ten:int,count(CAST(udf(cast(four as string)) AS INT)):bigint,CAST(udf(cas
 9	100	4
 
 
--- !query 40
+-- !query
 select ten, udf(sum(distinct four)) from onek a
 group by ten
 having exists (select 1 from onek b where udf(sum(distinct a.four)) = b.four)
--- !query 40 schema
+-- !query schema
 struct<ten:int,CAST(udf(cast(sum(distinct cast(four as bigint)) as string)) AS BIGINT):bigint>
--- !query 40 output
+-- !query output
 0	2
 2	2
 4	2
@@ -361,14 +361,14 @@ struct<ten:int,CAST(udf(cast(sum(distinct cast(four as bigint)) as string)) AS B
 8	2
 
 
--- !query 41
+-- !query
 select ten, sum(distinct four) from onek a
 group by ten
 having exists (select 1 from onek b
                where sum(distinct a.four + b.four) = udf(b.four))
--- !query 41 schema
+-- !query schema
 struct<>
--- !query 41 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
@@ -376,12 +376,12 @@ Expression in where clause: [(sum(DISTINCT CAST((outer() + b.`four`) AS BIGINT))
 Invalid expressions: [sum(DISTINCT CAST((outer() + b.`four`) AS BIGINT))];
 
 
--- !query 42
+-- !query
 select
   (select udf(max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))))
 from tenk1 o
--- !query 42 schema
+-- !query schema
 struct<>
--- !query 42 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`o.unique1`' given input columns: [i.even, i.fivethous, i.four, i.hundred, i.odd, i.string4, i.stringu1, i.stringu2, i.ten, i.tenthous, i.thousand, i.twenty, i.two, i.twothousand, i.unique1, i.unique2]; line 2 pos 67
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out
new file mode 100644
index 0000000000000..c10fe9b51dd72
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out
@@ -0,0 +1,264 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 25
+
+
+-- !query
+create temporary view int4_tbl as select * from values
+  (0),
+  (123456),
+  (-123456),
+  (2147483647),
+  (-2147483647)
+  as int4_tbl(f1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
+  (1, 1, 1, 1L),
+  (3, 3, 3, null),
+  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT BIT_AND(b1) AS n1, BIT_OR(b2)  AS n2 FROM bitwise_test where 1 = 0
+-- !query schema
+struct<n1:int,n2:int>
+-- !query output
+NULL	NULL
+
+
+-- !query
+SELECT BIT_AND(b4) AS n1, BIT_OR(b4)  AS n2 FROM bitwise_test where b4 is null
+-- !query schema
+struct<n1:bigint,n2:bigint>
+-- !query output
+NULL	NULL
+
+
+-- !query
+SELECT
+ BIT_AND(cast(b1 as tinyint)) AS a1,
+ BIT_AND(cast(b2 as smallint)) AS b1,
+ BIT_AND(b3) AS c1,
+ BIT_AND(b4) AS d1,
+ BIT_OR(cast(b1 as tinyint))  AS e7,
+ BIT_OR(cast(b2 as smallint))  AS f7,
+ BIT_OR(b3)  AS g7,
+ BIT_OR(b4)  AS h3
+FROM bitwise_test
+-- !query schema
+struct<a1:tinyint,b1:smallint,c1:int,d1:bigint,e7:tinyint,f7:smallint,g7:int,h3:bigint>
+-- !query output
+1	1	1	1	7	7	7	3
+
+
+-- !query
+SELECT
+  (NULL AND NULL) IS NULL AS `t`,
+  (TRUE AND NULL) IS NULL AS `t`,
+  (FALSE AND NULL) IS NULL AS `t`,
+  (NULL AND TRUE) IS NULL AS `t`,
+  (NULL AND FALSE) IS NULL AS `t`,
+  (TRUE AND TRUE) AS `t`,
+  NOT (TRUE AND FALSE) AS `t`,
+  NOT (FALSE AND TRUE) AS `t`,
+  NOT (FALSE AND FALSE) AS `t`
+-- !query schema
+struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
+-- !query output
+true	true	false	true	false	true	true	true	true
+
+
+-- !query
+SELECT
+  (NULL OR NULL) IS NULL AS `t`,
+  (TRUE OR NULL) IS NULL AS `t`,
+  (FALSE OR NULL) IS NULL AS `t`,
+  (NULL OR TRUE) IS NULL AS `t`,
+  (NULL OR FALSE) IS NULL AS `t`,
+  (TRUE OR TRUE) AS `t`,
+  (TRUE OR FALSE) AS `t`,
+  (FALSE OR TRUE) AS `t`,
+  NOT (FALSE OR FALSE) AS `t`
+-- !query schema
+struct<t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean,t:boolean>
+-- !query output
+true	false	true	false	true	true	true	true	true
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES
+  (TRUE, null, FALSE, null),
+  (FALSE, TRUE, null, null),
+  (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT BOOL_AND(b1) AS n1, BOOL_OR(b3) AS n2 FROM bool_test WHERE 1 = 0
+-- !query schema
+struct<n1:boolean,n2:boolean>
+-- !query output
+NULL	NULL
+
+
+-- !query
+SELECT
+  BOOL_AND(b1)     AS f1,
+  BOOL_AND(b2)     AS t2,
+  BOOL_AND(b3)     AS f3,
+  BOOL_AND(b4)     AS n4,
+  BOOL_AND(NOT b2) AS f5,
+  BOOL_AND(NOT b3) AS t6
+FROM bool_test
+-- !query schema
+struct<f1:boolean,t2:boolean,f3:boolean,n4:boolean,f5:boolean,t6:boolean>
+-- !query output
+false	true	false	NULL	false	true
+
+
+-- !query
+SELECT
+  EVERY(b1)     AS f1,
+  EVERY(b2)     AS t2,
+  EVERY(b3)     AS f3,
+  EVERY(b4)     AS n4,
+  EVERY(NOT b2) AS f5,
+  EVERY(NOT b3) AS t6
+FROM bool_test
+-- !query schema
+struct<f1:boolean,t2:boolean,f3:boolean,n4:boolean,f5:boolean,t6:boolean>
+-- !query output
+false	true	false	NULL	false	true
+
+
+-- !query
+SELECT
+  BOOL_OR(b1)      AS t1,
+  BOOL_OR(b2)      AS t2,
+  BOOL_OR(b3)      AS f3,
+  BOOL_OR(b4)      AS n4,
+  BOOL_OR(NOT b2)  AS f5,
+  BOOL_OR(NOT b3)  AS t6
+FROM bool_test
+-- !query schema
+struct<t1:boolean,t2:boolean,f3:boolean,n4:boolean,f5:boolean,t6:boolean>
+-- !query output
+true	true	false	NULL	false	true
+
+
+-- !query
+select min(udf(unique1)) from tenk1
+-- !query schema
+struct<min(CAST(udf(cast(unique1 as string)) AS INT)):int>
+-- !query output
+0
+
+
+-- !query
+select udf(max(unique1)) from tenk1
+-- !query schema
+struct<CAST(udf(cast(max(unique1) as string)) AS INT):int>
+-- !query output
+9999
+
+
+-- !query
+select max(unique1) from tenk1 where udf(unique1) < 42
+-- !query schema
+struct<max(unique1):int>
+-- !query output
+41
+
+
+-- !query
+select max(unique1) from tenk1 where unique1 > udf(42)
+-- !query schema
+struct<max(unique1):int>
+-- !query output
+9999
+
+
+-- !query
+select max(unique1) from tenk1 where udf(unique1) > 42000
+-- !query schema
+struct<max(unique1):int>
+-- !query output
+NULL
+
+
+-- !query
+select max(tenthous) from tenk1 where udf(thousand) = 33
+-- !query schema
+struct<max(tenthous):int>
+-- !query output
+9033
+
+
+-- !query
+select min(tenthous) from tenk1 where udf(thousand) = 33
+-- !query schema
+struct<min(tenthous):int>
+-- !query output
+33
+
+
+-- !query
+select distinct max(udf(unique2)) from tenk1
+-- !query schema
+struct<max(CAST(udf(cast(unique2 as string)) AS INT)):int>
+-- !query output
+9999
+
+
+-- !query
+select max(unique2) from tenk1 order by udf(1)
+-- !query schema
+struct<max(unique2):int>
+-- !query output
+9999
+
+
+-- !query
+select max(unique2) from tenk1 order by max(udf(unique2))
+-- !query schema
+struct<max(unique2):int>
+-- !query output
+9999
+
+
+-- !query
+select udf(max(udf(unique2))) from tenk1 order by udf(max(unique2))+1
+-- !query schema
+struct<CAST(udf(cast(max(cast(udf(cast(unique2 as string)) as int)) as string)) AS INT):int>
+-- !query output
+9999
+
+
+-- !query
+select t1.max_unique2, udf(g) from (select max(udf(unique2)) as max_unique2 FROM tenk1) t1 LATERAL VIEW explode(array(1,2,3)) t2 AS g order by g desc
+-- !query schema
+struct<max_unique2:int,CAST(udf(cast(g as string)) AS INT):int>
+-- !query output
+9999	3
+9999	2
+9999	1
+
+
+-- !query
+select udf(max(100)) from tenk1
+-- !query schema
+struct<CAST(udf(cast(max(100) as string)) AS INT):int>
+-- !query output
+100
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
similarity index 81%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part3.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
index eff33f280cff4..f491d9b9ba3a8 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
@@ -2,21 +2,21 @@
 -- Number of queries: 2
 
 
--- !query 0
+-- !query
 select udf(max(min(unique1))) from tenk1
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
 
 
--- !query 1
+-- !query
 select udf((select udf(count(*))
         from (values (1)) t0(inner_c))) as col
 from (values (2),(3)) t1(outer_c)
--- !query 1 schema
+-- !query schema
 struct<col:bigint>
--- !query 1 output
+-- !query output
 1
 1
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part4.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
old mode 100644
new mode 100755
similarity index 67%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
index 44a764ce4e6dd..04c4f54b02a3e
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
@@ -2,243 +2,243 @@
 -- Number of queries: 35
 
 
--- !query 0
+-- !query
 CREATE TABLE CASE_TBL (
   i integer,
   f double
 ) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TABLE CASE2_TBL (
   i integer,
   j integer
 ) USING parquet
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 INSERT INTO CASE_TBL VALUES (1, 10.1)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 INSERT INTO CASE_TBL VALUES (2, 20.2)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 INSERT INTO CASE_TBL VALUES (3, -30.3)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 INSERT INTO CASE_TBL VALUES (4, NULL)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 INSERT INTO CASE2_TBL VALUES (1, -1)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 INSERT INTO CASE2_TBL VALUES (2, -2)
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 INSERT INTO CASE2_TBL VALUES (3, -3)
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 INSERT INTO CASE2_TBL VALUES (2, -4)
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 INSERT INTO CASE2_TBL VALUES (1, NULL)
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 INSERT INTO CASE2_TBL VALUES (NULL, -6)
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 SELECT '3' AS `One`,
   CASE
     WHEN udf(1 < 2) THEN 3
   END AS `Simple WHEN`
--- !query 12 schema
+-- !query schema
 struct<One:string,Simple WHEN:int>
--- !query 12 output
+-- !query output
 3	3
 
 
--- !query 13
+-- !query
 SELECT '<NULL>' AS `One`,
   CASE
     WHEN 1 > 2 THEN udf(3)
   END AS `Simple default`
--- !query 13 schema
+-- !query schema
 struct<One:string,Simple default:int>
--- !query 13 output
+-- !query output
 <NULL>	NULL
 
 
--- !query 14
+-- !query
 SELECT '3' AS `One`,
   CASE
     WHEN udf(1) < 2 THEN udf(3)
     ELSE udf(4)
   END AS `Simple ELSE`
--- !query 14 schema
+-- !query schema
 struct<One:string,Simple ELSE:int>
--- !query 14 output
+-- !query output
 3	3
 
 
--- !query 15
+-- !query
 SELECT udf('4') AS `One`,
   CASE
     WHEN 1 > 2 THEN 3
     ELSE 4
   END AS `ELSE default`
--- !query 15 schema
+-- !query schema
 struct<One:string,ELSE default:int>
--- !query 15 output
+-- !query output
 4	4
 
 
--- !query 16
+-- !query
 SELECT udf('6') AS `One`,
   CASE
     WHEN udf(1 > 2) THEN 3
     WHEN udf(4) < 5 THEN 6
     ELSE 7
   END AS `Two WHEN with default`
--- !query 16 schema
+-- !query schema
 struct<One:string,Two WHEN with default:int>
--- !query 16 output
+-- !query output
 6	6
 
 
--- !query 17
+-- !query
 SELECT '7' AS `None`,
   CASE WHEN rand() < udf(0) THEN 1
   END AS `NULL on no matches`
--- !query 17 schema
+-- !query schema
 struct<None:string,NULL on no matches:int>
--- !query 17 output
+-- !query output
 7	NULL
 
 
--- !query 18
+-- !query
 SELECT CASE WHEN udf(1=0) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END
--- !query 18 schema
-struct<CASE WHEN CAST(udf(cast((1 = 0) as string)) AS BOOLEAN) THEN (1 div 0) WHEN (1 = 1) THEN 1 ELSE (2 div 0) END:int>
--- !query 18 output
-1
+-- !query schema
+struct<CASE WHEN CAST(udf(cast((1 = 0) as string)) AS BOOLEAN) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
+-- !query output
+1.0
 
 
--- !query 19
+-- !query
 SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END
--- !query 19 schema
-struct<CASE WHEN (1 = 0) THEN (1 div CAST(udf(cast(0 as string)) AS INT)) WHEN (1 = 1) THEN 1 ELSE (2 div 0) END:int>
--- !query 19 output
-1
+-- !query schema
+struct<CASE WHEN (1 = 0) THEN (CAST(1 AS DOUBLE) / CAST(CAST(udf(cast(0 as string)) AS INT) AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
+-- !query output
+1.0
 
 
--- !query 20
+-- !query
 SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl
--- !query 20 schema
-struct<CASE WHEN (i > 100) THEN CAST(udf(cast((1 div 0) as string)) AS INT) ELSE CAST(udf(cast(0 as string)) AS INT) END:int>
--- !query 20 output
-0
-0
-0
-0
+-- !query schema
+struct<CASE WHEN (i > 100) THEN CAST(udf(cast((cast(1 as double) / cast(0 as double)) as string)) AS DOUBLE) ELSE CAST(CAST(udf(cast(0 as string)) AS INT) AS DOUBLE) END:double>
+-- !query output
+0.0
+0.0
+0.0
+0.0
 
 
--- !query 21
+-- !query
 SELECT CASE 'a' WHEN 'a' THEN udf(1) ELSE udf(2) END
--- !query 21 schema
+-- !query schema
 struct<CASE WHEN (a = a) THEN CAST(udf(cast(1 as string)) AS INT) ELSE CAST(udf(cast(2 as string)) AS INT) END:int>
--- !query 21 output
+-- !query output
 1
 
 
--- !query 22
+-- !query
 SELECT '' AS `Five`,
   CASE
     WHEN i >= 3 THEN i
   END AS `>= 3 or Null`
   FROM CASE_TBL
--- !query 22 schema
+-- !query schema
 struct<Five:string,>= 3 or Null:int>
--- !query 22 output
+-- !query output
 	3
 	4
 	NULL
 	NULL
 
 
--- !query 23
+-- !query
 SELECT '' AS `Five`,
   CASE WHEN i >= 3 THEN (i + i)
        ELSE i
   END AS `Simplest Math`
   FROM CASE_TBL
--- !query 23 schema
+-- !query schema
 struct<Five:string,Simplest Math:int>
--- !query 23 output
+-- !query output
 	1
 	2
 	6
 	8
 
 
--- !query 24
+-- !query
 SELECT '' AS `Five`, i AS `Value`,
   CASE WHEN (i < 0) THEN 'small'
        WHEN (i = 0) THEN 'zero'
@@ -247,16 +247,16 @@ SELECT '' AS `Five`, i AS `Value`,
        ELSE 'big'
   END AS `Category`
   FROM CASE_TBL
--- !query 24 schema
+-- !query schema
 struct<Five:string,Value:int,Category:string>
--- !query 24 output
+-- !query output
 	1	one
 	2	two
 	3	big
 	4	big
 
 
--- !query 25
+-- !query
 SELECT '' AS `Five`,
   CASE WHEN ((i < 0) or (i < 0)) THEN 'small'
        WHEN ((i = 0) or (i = 0)) THEN 'zero'
@@ -265,37 +265,37 @@ SELECT '' AS `Five`,
        ELSE 'big'
   END AS `Category`
   FROM CASE_TBL
--- !query 25 schema
+-- !query schema
 struct<Five:string,Category:string>
--- !query 25 output
+-- !query output
 	big
 	big
 	one
 	two
 
 
--- !query 26
+-- !query
 SELECT * FROM CASE_TBL WHERE udf(COALESCE(f,i)) = 4
--- !query 26 schema
+-- !query schema
 struct<i:int,f:double>
--- !query 26 output
+-- !query output
 4	NULL
 
 
--- !query 27
+-- !query
 SELECT * FROM CASE_TBL WHERE udf(NULLIF(f,i)) = 2
--- !query 27 schema
+-- !query schema
 struct<i:int,f:double>
--- !query 27 output
+-- !query output
 
 
 
--- !query 28
+-- !query
 SELECT udf(COALESCE(a.f, b.i, b.j))
   FROM CASE_TBL a, CASE2_TBL b
--- !query 28 schema
+-- !query schema
 struct<CAST(udf(cast(coalesce(f, cast(i as double), cast(j as double)) as string)) AS DOUBLE):double>
--- !query 28 output
+-- !query output
 -30.3
 -30.3
 -30.3
@@ -322,24 +322,24 @@ struct<CAST(udf(cast(coalesce(f, cast(i as double), cast(j as double)) as string
 3.0
 
 
--- !query 29
+-- !query
 SELECT *
    FROM CASE_TBL a, CASE2_TBL b
    WHERE udf(COALESCE(a.f, b.i, b.j)) = 2
--- !query 29 schema
+-- !query schema
 struct<i:int,f:double,i:int,j:int>
--- !query 29 output
+-- !query output
 4	NULL	2	-2
 4	NULL	2	-4
 
 
--- !query 30
+-- !query
 SELECT udf('') AS Five, NULLIF(a.i,b.i) AS `NULLIF(a.i,b.i)`,
   NULLIF(b.i, 4) AS `NULLIF(b.i,4)`
   FROM CASE_TBL a, CASE2_TBL b
--- !query 30 schema
+-- !query schema
 struct<Five:string,NULLIF(a.i,b.i):int,NULLIF(b.i,4):int>
--- !query 30 output
+-- !query output
 	1	2
 	1	2
 	1	3
@@ -366,18 +366,18 @@ struct<Five:string,NULLIF(a.i,b.i):int,NULLIF(b.i,4):int>
 	NULL	3
 
 
--- !query 31
+-- !query
 SELECT '' AS `Two`, *
   FROM CASE_TBL a, CASE2_TBL b
   WHERE udf(COALESCE(f,b.i) = 2)
--- !query 31 schema
+-- !query schema
 struct<Two:string,i:int,f:double,i:int,j:int>
--- !query 31 output
+-- !query output
 	4	NULL	2	-2
 	4	NULL	2	-4
 
 
--- !query 32
+-- !query
 SELECT CASE
   (CASE vol('bar')
     WHEN udf('foo') THEN 'it was foo!'
@@ -387,23 +387,23 @@ SELECT CASE
   WHEN udf('it was foo!') THEN 'foo recognized'
   WHEN 'it was bar!' THEN udf('bar recognized')
   ELSE 'unrecognized' END AS col
--- !query 32 schema
+-- !query schema
 struct<col:string>
--- !query 32 output
+-- !query output
 bar recognized
 
 
--- !query 33
+-- !query
 DROP TABLE CASE_TBL
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 
 
 
--- !query 34
+-- !query
 DROP TABLE CASE2_TBL
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
similarity index 86%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-join.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
index 6fcff129d7568..f113aee6d3b51 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
@@ -2,17 +2,17 @@
 -- Number of queries: 185
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
   AS v(f1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
   (VALUES
     (123, 456),
@@ -21,230 +21,230 @@ CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
     (4567890123456789, 4567890123456789),
     (4567890123456789, -4567890123456789))
   AS v(q1, q2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW FLOAT8_TBL AS SELECT * FROM
   (VALUES (0.0), (1004.30), (-34.84),
     (cast('1.2345678901234e+200' as double)), (cast('1.2345678901234e-200' as double)))
   AS v(f1)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW TEXT_TBL AS SELECT * FROM
   (VALUES ('doh!'), ('hi de ho neighbor'))
   AS v(f1)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 CREATE TABLE J1_TBL (
   i integer,
   j integer,
   t string
 ) USING parquet
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 CREATE TABLE J2_TBL (
   i integer,
   k integer
 ) USING parquet
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 INSERT INTO J1_TBL VALUES (1, 4, 'one')
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 INSERT INTO J1_TBL VALUES (2, 3, 'two')
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 INSERT INTO J1_TBL VALUES (3, 2, 'three')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 INSERT INTO J1_TBL VALUES (4, 1, 'four')
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 INSERT INTO J1_TBL VALUES (5, 0, 'five')
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 INSERT INTO J1_TBL VALUES (6, 6, 'six')
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 INSERT INTO J1_TBL VALUES (7, 7, 'seven')
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
+-- !query
 INSERT INTO J1_TBL VALUES (8, 8, 'eight')
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 INSERT INTO J1_TBL VALUES (0, NULL, 'zero')
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 
 
 
--- !query 16
+-- !query
 INSERT INTO J1_TBL VALUES (NULL, NULL, 'null')
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 
 
 
--- !query 17
+-- !query
 INSERT INTO J1_TBL VALUES (NULL, 0, 'zero')
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 
 
 
--- !query 18
+-- !query
 INSERT INTO J2_TBL VALUES (1, -1)
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 INSERT INTO J2_TBL VALUES (2, 2)
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 INSERT INTO J2_TBL VALUES (3, -3)
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 INSERT INTO J2_TBL VALUES (2, 4)
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
 
 
--- !query 22
+-- !query
 INSERT INTO J2_TBL VALUES (5, -5)
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 
 
 
--- !query 23
+-- !query
 INSERT INTO J2_TBL VALUES (5, -5)
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 
 
 
--- !query 24
+-- !query
 INSERT INTO J2_TBL VALUES (0, NULL)
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 
 
 
--- !query 25
+-- !query
 INSERT INTO J2_TBL VALUES (NULL, NULL)
--- !query 25 schema
+-- !query schema
 struct<>
--- !query 25 output
+-- !query output
 
 
 
--- !query 26
+-- !query
 INSERT INTO J2_TBL VALUES (NULL, 0)
--- !query 26 schema
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 
 
 
--- !query 27
+-- !query
 SELECT udf('') AS `xxx`, udf(i), udf(j), udf(t)
   FROM J1_TBL AS tx
--- !query 27 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string>
--- !query 27 output
+-- !query output
 	0	NULL	zero
 	1	4	one
 	2	3	two
@@ -258,12 +258,12 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as str
 	NULL	NULL	null
 
 
--- !query 28
+-- !query
 SELECT udf(udf('')) AS `xxx`, udf(udf(i)), udf(j), udf(t)
   FROM J1_TBL tx
--- !query 28 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(cast(udf(cast(i as string)) as int) as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string>
--- !query 28 output
+-- !query output
 	0	NULL	zero
 	1	4	one
 	2	3	two
@@ -277,12 +277,12 @@ struct<xxx:string,CAST(udf(cast(cast(udf(cast(i as string)) as int) as string))
 	NULL	NULL	null
 
 
--- !query 29
+-- !query
 SELECT udf('') AS `xxx`, a, udf(udf(b)), c
   FROM J1_TBL AS t1 (a, b, c)
--- !query 29 schema
+-- !query schema
 struct<xxx:string,a:int,CAST(udf(cast(cast(udf(cast(b as string)) as int) as string)) AS INT):int,c:string>
--- !query 29 output
+-- !query output
 	0	NULL	zero
 	1	4	one
 	2	3	two
@@ -296,12 +296,12 @@ struct<xxx:string,a:int,CAST(udf(cast(cast(udf(cast(b as string)) as int) as str
 	NULL	NULL	null
 
 
--- !query 30
+-- !query
 SELECT udf('') AS `xxx`, udf(a), udf(b), udf(udf(c))
   FROM J1_TBL t1 (a, b, c)
--- !query 30 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(c as string)) as string) as string)) AS STRING):string>
--- !query 30 output
+-- !query output
 	0	NULL	zero
 	1	4	one
 	2	3	two
@@ -315,12 +315,12 @@ struct<xxx:string,CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(b as str
 	NULL	NULL	null
 
 
--- !query 31
+-- !query
 SELECT udf('') AS `xxx`, udf(a), b, udf(c), udf(d), e
   FROM J1_TBL t1 (a, b, c), J2_TBL t2 (d, e)
--- !query 31 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(a as string)) AS INT):int,b:int,CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(d as string)) AS INT):int,e:int>
--- !query 31 output
+-- !query output
 	0	NULL	zero	0	NULL
 	0	NULL	zero	1	-1
 	0	NULL	zero	2	2
@@ -422,12 +422,12 @@ struct<xxx:string,CAST(udf(cast(a as string)) AS INT):int,b:int,CAST(udf(cast(c
 	NULL	NULL	null	NULL	NULL
 
 
--- !query 32
+-- !query
 SELECT udf('') AS `xxx`, *
   FROM J1_TBL CROSS JOIN J2_TBL
--- !query 32 schema
+-- !query schema
 struct<xxx:string,i:int,j:int,t:string,i:int,k:int>
--- !query 32 output
+-- !query output
 	0	NULL	zero	0	NULL
 	0	NULL	zero	1	-1
 	0	NULL	zero	2	2
@@ -529,22 +529,22 @@ struct<xxx:string,i:int,j:int,t:string,i:int,k:int>
 	NULL	NULL	null	NULL	NULL
 
 
--- !query 33
+-- !query
 SELECT udf('') AS `xxx`, udf(i) AS i, udf(k), udf(t) AS t
   FROM J1_TBL CROSS JOIN J2_TBL
--- !query 33 schema
+-- !query schema
 struct<>
--- !query 33 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'i' is ambiguous, could be: default.j1_tbl.i, default.j2_tbl.i.; line 1 pos 29
 
 
--- !query 34
+-- !query
 SELECT udf('') AS `xxx`, udf(t1.i) AS i, udf(k), udf(t)
   FROM J1_TBL t1 CROSS JOIN J2_TBL t2
--- !query 34 schema
+-- !query schema
 struct<xxx:string,i:int,CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string>
--- !query 34 output
+-- !query output
 	0	-1	zero
 	0	-3	zero
 	0	-5	zero
@@ -646,13 +646,13 @@ struct<xxx:string,i:int,CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(t
 	NULL	NULL	zero
 
 
--- !query 35
+-- !query
 SELECT udf(udf('')) AS `xxx`, udf(udf(ii)) AS ii, udf(udf(tt)) AS tt, udf(udf(kk))
   FROM (J1_TBL CROSS JOIN J2_TBL)
     AS tx (ii, jj, tt, ii2, kk)
--- !query 35 schema
+-- !query schema
 struct<xxx:string,ii:int,tt:string,CAST(udf(cast(cast(udf(cast(kk as string)) as int) as string)) AS INT):int>
--- !query 35 output
+-- !query output
 	0	zero	-1
 	0	zero	-3
 	0	zero	-5
@@ -754,12 +754,12 @@ struct<xxx:string,ii:int,tt:string,CAST(udf(cast(cast(udf(cast(kk as string)) as
 	NULL	zero	NULL
 
 
--- !query 36
+-- !query
 SELECT udf('') AS `xxx`, udf(udf(j1_tbl.i)), udf(j), udf(t), udf(a.i), udf(a.k), udf(b.i),  udf(b.k)
   FROM J1_TBL CROSS JOIN J2_TBL a CROSS JOIN J2_TBL b
--- !query 36 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(cast(udf(cast(i as string)) as int) as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(k as string)) AS INT):int>
--- !query 36 output
+-- !query output
 	0	NULL	zero	0	NULL	0	NULL
 	0	NULL	zero	0	NULL	1	-1
 	0	NULL	zero	0	NULL	2	2
@@ -1653,12 +1653,12 @@ struct<xxx:string,CAST(udf(cast(cast(udf(cast(i as string)) as int) as string))
 	NULL	NULL	null	NULL	NULL	NULL	NULL
 
 
--- !query 37
+-- !query
 SELECT udf('') AS `xxx`, udf(i) AS i, udf(j), udf(t) AS t, udf(k)
   FROM J1_TBL INNER JOIN J2_TBL USING (i)
--- !query 37 schema
+-- !query schema
 struct<xxx:string,i:int,CAST(udf(cast(j as string)) AS INT):int,t:string,CAST(udf(cast(k as string)) AS INT):int>
--- !query 37 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1668,12 +1668,12 @@ struct<xxx:string,i:int,CAST(udf(cast(j as string)) AS INT):int,t:string,CAST(ud
 	5	0	five	-5
 
 
--- !query 38
+-- !query
 SELECT udf(udf('')) AS `xxx`, udf(i), udf(j) AS j, udf(t), udf(k) AS k
   FROM J1_TBL JOIN J2_TBL USING (i)
--- !query 38 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,j:int,CAST(udf(cast(t as string)) AS STRING):string,k:int>
--- !query 38 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1683,13 +1683,13 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,j:int,CAST(udf(cast(t
 	5	0	five	-5
 
 
--- !query 39
+-- !query
 SELECT udf('') AS `xxx`, *
   FROM J1_TBL t1 (a, b, c) JOIN J2_TBL t2 (a, d) USING (a)
   ORDER BY udf(udf(a)), udf(d)
--- !query 39 schema
+-- !query schema
 struct<xxx:string,a:int,b:int,c:string,d:int>
--- !query 39 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1699,12 +1699,12 @@ struct<xxx:string,a:int,b:int,c:string,d:int>
 	5	0	five	-5
 
 
--- !query 40
+-- !query
 SELECT udf(udf('')) AS `xxx`, udf(i), udf(j), udf(t), udf(k)
   FROM J1_TBL NATURAL JOIN J2_TBL
--- !query 40 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(k as string)) AS INT):int>
--- !query 40 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1714,12 +1714,12 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as str
 	5	0	five	-5
 
 
--- !query 41
+-- !query
 SELECT udf('') AS `xxx`, udf(udf(udf(a))) AS a, udf(b), udf(c), udf(d)
   FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d)
--- !query 41 schema
+-- !query schema
 struct<xxx:string,a:int,CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(d as string)) AS INT):int>
--- !query 41 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1729,23 +1729,23 @@ struct<xxx:string,a:int,CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(c
 	5	0	five	-5
 
 
--- !query 42
+-- !query
 SELECT udf('') AS `xxx`, udf(udf(a)), udf(udf(b)), udf(udf(c)) AS c, udf(udf(udf(d))) AS d
   FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a)
--- !query 42 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(b as string)) as int) as string)) AS INT):int,c:string,d:int>
--- !query 42 output
+-- !query output
 	0	NULL	zero	NULL
 	2	3	two	2
 	4	1	four	2
 
 
--- !query 43
+-- !query
 SELECT udf('') AS `xxx`, udf(J1_TBL.i), udf(udf(J1_TBL.j)), udf(J1_TBL.t), udf(J2_TBL.i), udf(J2_TBL.k)
   FROM J1_TBL JOIN J2_TBL ON (udf(J1_TBL.i) = J2_TBL.i)
--- !query 43 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(j as string)) as int) as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(k as string)) AS INT):int>
--- !query 43 output
+-- !query output
 	0	NULL	zero	0	NULL
 	1	4	one	1	-1
 	2	3	two	2	2
@@ -1755,23 +1755,23 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(cast(udf
 	5	0	five	5	-5
 
 
--- !query 44
+-- !query
 SELECT udf('') AS `xxx`, udf(udf(J1_TBL.i)), udf(udf(J1_TBL.j)), udf(udf(J1_TBL.t)), J2_TBL.i, J2_TBL.k
   FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = udf(J2_TBL.k))
--- !query 44 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(cast(udf(cast(i as string)) as int) as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(j as string)) as int) as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(t as string)) as string) as string)) AS STRING):string,i:int,k:int>
--- !query 44 output
+-- !query output
 	0	NULL	zero	NULL	0
 	2	3	two	2	2
 	4	1	four	2	4
 
 
--- !query 45
+-- !query
 SELECT udf('') AS `xxx`, udf(J1_TBL.i), udf(J1_TBL.j), udf(J1_TBL.t), udf(J2_TBL.i), udf(J2_TBL.k)
   FROM J1_TBL JOIN J2_TBL ON (udf(J1_TBL.i) <= udf(udf(J2_TBL.k)))
--- !query 45 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(k as string)) AS INT):int>
--- !query 45 output
+-- !query output
 	0	NULL	zero	2	2
 	0	NULL	zero	2	4
 	0	NULL	zero	NULL	0
@@ -1783,13 +1783,13 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as str
 	4	1	four	2	4
 
 
--- !query 46
+-- !query
 SELECT udf(udf('')) AS `xxx`, udf(i), udf(j), udf(t), udf(k)
   FROM J1_TBL LEFT OUTER JOIN J2_TBL USING (i)
   ORDER BY udf(udf(i)), udf(k), udf(t)
--- !query 46 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(k as string)) AS INT):int>
--- !query 46 output
+-- !query output
 	NULL	NULL	null	NULL
 	NULL	0	zero	NULL
 	0	NULL	zero	NULL
@@ -1805,13 +1805,13 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as str
 	8	8	eight	NULL
 
 
--- !query 47
+-- !query
 SELECT udf('') AS `xxx`, udf(i), udf(j), udf(t), udf(k)
   FROM J1_TBL LEFT JOIN J2_TBL USING (i)
   ORDER BY udf(i), udf(udf(k)), udf(t)
--- !query 47 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(k as string)) AS INT):int>
--- !query 47 output
+-- !query output
 	NULL	NULL	null	NULL
 	NULL	0	zero	NULL
 	0	NULL	zero	NULL
@@ -1827,12 +1827,12 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as str
 	8	8	eight	NULL
 
 
--- !query 48
+-- !query
 SELECT udf('') AS `xxx`, udf(udf(i)), udf(j), udf(t), udf(k)
   FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i)
--- !query 48 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(cast(udf(cast(i as string)) as int) as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(k as string)) AS INT):int>
--- !query 48 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1844,12 +1844,12 @@ struct<xxx:string,CAST(udf(cast(cast(udf(cast(i as string)) as int) as string))
 	NULL	NULL	NULL	NULL
 
 
--- !query 49
+-- !query
 SELECT udf('') AS `xxx`, udf(i), udf(udf(j)), udf(t), udf(k)
   FROM J1_TBL RIGHT JOIN J2_TBL USING (i)
--- !query 49 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(j as string)) as int) as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(k as string)) AS INT):int>
--- !query 49 output
+-- !query output
 	0	NULL	zero	NULL
 	1	4	one	-1
 	2	3	two	2
@@ -1861,13 +1861,13 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(cast(udf
 	NULL	NULL	NULL	NULL
 
 
--- !query 50
+-- !query
 SELECT udf('') AS `xxx`, udf(i), udf(j), udf(udf(t)), udf(k)
   FROM J1_TBL FULL OUTER JOIN J2_TBL USING (i)
   ORDER BY udf(udf(i)), udf(k), udf(t)
--- !query 50 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(t as string)) as string) as string)) AS STRING):string,CAST(udf(cast(k as string)) AS INT):int>
--- !query 50 output
+-- !query output
 	NULL	NULL	NULL	NULL
 	NULL	NULL	null	NULL
 	NULL	0	zero	NULL
@@ -1885,13 +1885,13 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as str
 	8	8	eight	NULL
 
 
--- !query 51
+-- !query
 SELECT udf('') AS `xxx`, udf(i), udf(j), t, udf(udf(k))
   FROM J1_TBL FULL JOIN J2_TBL USING (i)
   ORDER BY udf(udf(i)), udf(k), udf(udf(t))
--- !query 51 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,t:string,CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int>
--- !query 51 output
+-- !query output
 	NULL	NULL	NULL	NULL
 	NULL	NULL	null	NULL
 	NULL	0	zero	NULL
@@ -1909,226 +1909,226 @@ struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as str
 	8	8	eight	NULL
 
 
--- !query 52
+-- !query
 SELECT udf('') AS `xxx`, udf(i), udf(j), udf(t), udf(udf(k))
   FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (udf(k) = 1)
--- !query 52 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int>
--- !query 52 output
+-- !query output
 
 
 
--- !query 53
+-- !query
 SELECT udf('') AS `xxx`, udf(i), udf(j), udf(t), udf(k)
   FROM J1_TBL LEFT JOIN J2_TBL USING (i) WHERE (udf(udf(i)) = udf(1))
--- !query 53 schema
+-- !query schema
 struct<xxx:string,CAST(udf(cast(i as string)) AS INT):int,CAST(udf(cast(j as string)) AS INT):int,CAST(udf(cast(t as string)) AS STRING):string,CAST(udf(cast(k as string)) AS INT):int>
--- !query 53 output
+-- !query output
 	1	4	one	-1
 
 
--- !query 54
+-- !query
 CREATE TABLE t1 (name STRING, n INTEGER) USING parquet
--- !query 54 schema
+-- !query schema
 struct<>
--- !query 54 output
+-- !query output
 
 
 
--- !query 55
+-- !query
 CREATE TABLE t2 (name STRING, n INTEGER) USING parquet
--- !query 55 schema
+-- !query schema
 struct<>
--- !query 55 output
+-- !query output
 
 
 
--- !query 56
+-- !query
 CREATE TABLE t3 (name STRING, n INTEGER) USING parquet
--- !query 56 schema
+-- !query schema
 struct<>
--- !query 56 output
+-- !query output
 
 
 
--- !query 57
+-- !query
 INSERT INTO t1 VALUES ( 'bb', 11 )
--- !query 57 schema
+-- !query schema
 struct<>
--- !query 57 output
+-- !query output
 
 
 
--- !query 58
+-- !query
 INSERT INTO t2 VALUES ( 'bb', 12 )
--- !query 58 schema
+-- !query schema
 struct<>
--- !query 58 output
+-- !query output
 
 
 
--- !query 59
+-- !query
 INSERT INTO t2 VALUES ( 'cc', 22 )
--- !query 59 schema
+-- !query schema
 struct<>
--- !query 59 output
+-- !query output
 
 
 
--- !query 60
+-- !query
 INSERT INTO t2 VALUES ( 'ee', 42 )
--- !query 60 schema
+-- !query schema
 struct<>
--- !query 60 output
+-- !query output
 
 
 
--- !query 61
+-- !query
 INSERT INTO t3 VALUES ( 'bb', 13 )
--- !query 61 schema
+-- !query schema
 struct<>
--- !query 61 output
+-- !query output
 
 
 
--- !query 62
+-- !query
 INSERT INTO t3 VALUES ( 'cc', 23 )
--- !query 62 schema
+-- !query schema
 struct<>
--- !query 62 output
+-- !query output
 
 
 
--- !query 63
+-- !query
 INSERT INTO t3 VALUES ( 'dd', 33 )
--- !query 63 schema
+-- !query schema
 struct<>
--- !query 63 output
+-- !query output
 
 
 
--- !query 64
+-- !query
 SELECT * FROM t1 FULL JOIN t2 USING (name) FULL JOIN t3 USING (name)
--- !query 64 schema
+-- !query schema
 struct<name:string,n:int,n:int,n:int>
--- !query 64 output
+-- !query output
 bb	11	12	13
 cc	NULL	22	23
 dd	NULL	NULL	33
 ee	NULL	42	NULL
 
 
--- !query 65
+-- !query
 SELECT * FROM
 (SELECT udf(name) as name, t2.n FROM t2) as s2
 INNER JOIN
 (SELECT udf(udf(name)) as name, t3.n FROM t3) s3
 USING (name)
--- !query 65 schema
+-- !query schema
 struct<name:string,n:int,n:int>
--- !query 65 output
+-- !query output
 bb	12	13
 cc	22	23
 
 
--- !query 66
+-- !query
 SELECT * FROM
 (SELECT udf(udf(name)) as name, t2.n FROM t2) as s2
 LEFT JOIN
 (SELECT udf(name) as name, t3.n FROM t3) s3
 USING (name)
--- !query 66 schema
+-- !query schema
 struct<name:string,n:int,n:int>
--- !query 66 output
+-- !query output
 bb	12	13
 cc	22	23
 ee	42	NULL
 
 
--- !query 67
+-- !query
 SELECT udf(name), udf(udf(s2.n)), udf(s3.n) FROM
 (SELECT * FROM t2) as s2
 FULL JOIN
 (SELECT * FROM t3) s3
 USING (name)
--- !query 67 schema
+-- !query schema
 struct<CAST(udf(cast(name as string)) AS STRING):string,CAST(udf(cast(cast(udf(cast(n as string)) as int) as string)) AS INT):int,CAST(udf(cast(n as string)) AS INT):int>
--- !query 67 output
+-- !query output
 bb	12	13
 cc	22	23
 dd	NULL	33
 ee	42	NULL
 
 
--- !query 68
+-- !query
 SELECT * FROM
 (SELECT udf(udf(name)) as name, udf(n) as s2_n, udf(2) as s2_2 FROM t2) as s2
 NATURAL INNER JOIN
 (SELECT udf(name) as name, udf(udf(n)) as s3_n, udf(3) as s3_2 FROM t3) s3
--- !query 68 schema
+-- !query schema
 struct<name:string,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 68 output
+-- !query output
 bb	12	2	13	3
 cc	22	2	23	3
 
 
--- !query 69
+-- !query
 SELECT * FROM
 (SELECT udf(name) as name, udf(udf(n)) as s2_n, 2 as s2_2 FROM t2) as s2
 NATURAL LEFT JOIN
 (SELECT udf(udf(name)) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3
--- !query 69 schema
+-- !query schema
 struct<name:string,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 69 output
+-- !query output
 bb	12	2	13	3
 cc	22	2	23	3
 ee	42	2	NULL	NULL
 
 
--- !query 70
+-- !query
 SELECT * FROM
 (SELECT udf(name) as name, udf(n) as s2_n, 2 as s2_2 FROM t2) as s2
 NATURAL FULL JOIN
 (SELECT udf(udf(name)) as name, udf(udf(n)) as s3_n, 3 as s3_2 FROM t3) s3
--- !query 70 schema
+-- !query schema
 struct<name:string,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 70 output
+-- !query output
 bb	12	2	13	3
 cc	22	2	23	3
 dd	NULL	NULL	33	3
 ee	42	2	NULL	NULL
 
 
--- !query 71
+-- !query
 SELECT * FROM
 (SELECT udf(udf(name)) as name, udf(n) as s1_n, 1 as s1_1 FROM t1) as s1
 NATURAL INNER JOIN
 (SELECT udf(name) as name, udf(n) as s2_n, 2 as s2_2 FROM t2) as s2
 NATURAL INNER JOIN
 (SELECT udf(udf(udf(name))) as name, udf(n) as s3_n, 3 as s3_2 FROM t3) s3
--- !query 71 schema
+-- !query schema
 struct<name:string,s1_n:int,s1_1:int,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 71 output
+-- !query output
 bb	11	1	12	2	13	3
 
 
--- !query 72
+-- !query
 SELECT * FROM
 (SELECT udf(name) as name, udf(n) as s1_n, udf(udf(1)) as s1_1 FROM t1) as s1
 NATURAL FULL JOIN
 (SELECT udf(name) as name, udf(udf(n)) as s2_n, udf(2) as s2_2 FROM t2) as s2
 NATURAL FULL JOIN
 (SELECT udf(udf(name)) as name, udf(n) as s3_n, udf(3) as s3_2 FROM t3) s3
--- !query 72 schema
+-- !query schema
 struct<name:string,s1_n:int,s1_1:int,s2_n:int,s2_2:int,s3_n:int,s3_2:int>
--- !query 72 output
+-- !query output
 bb	11	1	12	2	13	3
 cc	NULL	NULL	22	2	23	3
 dd	NULL	NULL	NULL	NULL	33	3
 ee	NULL	NULL	42	2	NULL	NULL
 
 
--- !query 73
+-- !query
 SELECT name, udf(udf(s1_n)), udf(s2_n), udf(s3_n) FROM
 (SELECT name, udf(udf(n)) as s1_n FROM t1) as s1
 NATURAL FULL JOIN
@@ -2137,16 +2137,16 @@ NATURAL FULL JOIN
     NATURAL FULL JOIN
     (SELECT name, udf(udf(n)) as s3_n FROM t3) as s3
   ) ss2
--- !query 73 schema
+-- !query schema
 struct<name:string,CAST(udf(cast(cast(udf(cast(s1_n as string)) as int) as string)) AS INT):int,CAST(udf(cast(s2_n as string)) AS INT):int,CAST(udf(cast(s3_n as string)) AS INT):int>
--- !query 73 output
+-- !query output
 bb	11	12	13
 cc	NULL	22	23
 dd	NULL	NULL	33
 ee	NULL	42	NULL
 
 
--- !query 74
+-- !query
 SELECT * FROM
 (SELECT name, n as s1_n FROM t1) as s1
 NATURAL FULL JOIN
@@ -2155,55 +2155,55 @@ NATURAL FULL JOIN
     NATURAL FULL JOIN
     (SELECT name, udf(n) as s3_n FROM t3) as s3
   ) ss2
--- !query 74 schema
+-- !query schema
 struct<name:string,s1_n:int,s2_n:int,s2_2:int,s3_n:int>
--- !query 74 output
+-- !query output
 bb	11	12	2	13
 cc	NULL	22	2	23
 dd	NULL	NULL	NULL	33
 ee	NULL	42	2	NULL
 
 
--- !query 75
+-- !query
 SELECT s1.name, udf(s1_n), s2.name, udf(udf(s2_n)) FROM
   (SELECT name, udf(n) as s1_n FROM t1) as s1
 FULL JOIN
   (SELECT name, 2 as s2_n FROM t2) as s2
 ON (udf(udf(s1_n)) = udf(s2_n))
--- !query 75 schema
+-- !query schema
 struct<name:string,CAST(udf(cast(s1_n as string)) AS INT):int,name:string,CAST(udf(cast(cast(udf(cast(s2_n as string)) as int) as string)) AS INT):int>
--- !query 75 output
+-- !query output
 NULL	NULL	bb	2
 NULL	NULL	cc	2
 NULL	NULL	ee	2
 bb	11	NULL	NULL
 
 
--- !query 76
+-- !query
 create or replace temporary view x as select * from
   (values (1,11), (2,22), (3,null), (4,44), (5,null))
   as v(x1, x2)
--- !query 76 schema
+-- !query schema
 struct<>
--- !query 76 output
+-- !query output
 
 
 
--- !query 77
+-- !query
 create or replace temporary view y as select * from
   (values (1,111), (2,222), (3,333), (4,null))
   as v(y1, y2)
--- !query 77 schema
+-- !query schema
 struct<>
--- !query 77 output
+-- !query output
 
 
 
--- !query 78
+-- !query
 select udf(udf(x1)), udf(x2) from x
--- !query 78 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(x1 as string)) as int) as string)) AS INT):int,CAST(udf(cast(x2 as string)) AS INT):int>
--- !query 78 output
+-- !query output
 1	11
 2	22
 3	NULL
@@ -2211,22 +2211,22 @@ struct<CAST(udf(cast(cast(udf(cast(x1 as string)) as int) as string)) AS INT):in
 5	NULL
 
 
--- !query 79
+-- !query
 select udf(y1), udf(udf(y2)) from y
--- !query 79 schema
+-- !query schema
 struct<CAST(udf(cast(y1 as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(y2 as string)) as int) as string)) AS INT):int>
--- !query 79 output
+-- !query output
 1	111
 2	222
 3	333
 4	NULL
 
 
--- !query 80
+-- !query
 select * from x left join y on (udf(x1) = udf(udf(y1)) and udf(x2) is not null)
--- !query 80 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int>
--- !query 80 output
+-- !query output
 1	11	1	111
 2	22	2	222
 3	NULL	NULL	NULL
@@ -2234,11 +2234,11 @@ struct<x1:int,x2:int,y1:int,y2:int>
 5	NULL	NULL	NULL
 
 
--- !query 81
+-- !query
 select * from x left join y on (udf(udf(x1)) = udf(y1) and udf(y2) is not null)
--- !query 81 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int>
--- !query 81 output
+-- !query output
 1	11	1	111
 2	22	2	222
 3	NULL	3	333
@@ -2246,12 +2246,12 @@ struct<x1:int,x2:int,y1:int,y2:int>
 5	NULL	NULL	NULL
 
 
--- !query 82
+-- !query
 select * from (x left join y on (udf(x1) = udf(udf(y1)))) left join x xx(xx1,xx2)
 on (udf(udf(x1)) = udf(xx1))
--- !query 82 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 82 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	3	NULL
@@ -2259,12 +2259,12 @@ struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
 5	NULL	NULL	NULL	5	NULL
 
 
--- !query 83
+-- !query
 select * from (x left join y on (udf(x1) = udf(y1))) left join x xx(xx1,xx2)
 on (udf(x1) = xx1 and udf(x2) is not null)
--- !query 83 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 83 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	NULL	NULL
@@ -2272,12 +2272,12 @@ struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
 5	NULL	NULL	NULL	NULL	NULL
 
 
--- !query 84
+-- !query
 select * from (x left join y on (x1 = udf(y1))) left join x xx(xx1,xx2)
 on (udf(x1) = udf(udf(xx1)) and udf(y2) is not null)
--- !query 84 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 84 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	3	NULL
@@ -2285,12 +2285,12 @@ struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
 5	NULL	NULL	NULL	NULL	NULL
 
 
--- !query 85
+-- !query
 select * from (x left join y on (udf(x1) = y1)) left join x xx(xx1,xx2)
 on (udf(udf(x1)) = udf(xx1) and udf(udf(xx2)) is not null)
--- !query 85 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 85 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	NULL	NULL
@@ -2298,78 +2298,78 @@ struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
 5	NULL	NULL	NULL	NULL	NULL
 
 
--- !query 86
+-- !query
 select * from (x left join y on (udf(udf(x1)) = udf(udf(y1)))) left join x xx(xx1,xx2)
 on (udf(x1) = udf(xx1)) where (udf(x2) is not null)
--- !query 86 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 86 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 4	44	4	NULL	4	44
 
 
--- !query 87
+-- !query
 select * from (x left join y on (udf(x1) = udf(y1))) left join x xx(xx1,xx2)
 on (udf(x1) = xx1) where (udf(y2) is not null)
--- !query 87 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 87 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 3	NULL	3	333	3	NULL
 
 
--- !query 88
+-- !query
 select * from (x left join y on (udf(x1) = udf(y1))) left join x xx(xx1,xx2)
 on (x1 = udf(xx1)) where (xx2 is not null)
--- !query 88 schema
+-- !query schema
 struct<x1:int,x2:int,y1:int,y2:int,xx1:int,xx2:int>
--- !query 88 output
+-- !query output
 1	11	1	111	1	11
 2	22	2	222	2	22
 4	44	4	NULL	4	44
 
 
--- !query 89
+-- !query
 select udf(udf(count(*))) from tenk1 a where udf(udf(unique1)) in
   (select udf(unique1) from tenk1 b join tenk1 c using (unique1)
    where udf(udf(b.unique2)) = udf(42))
--- !query 89 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(count(1) as string)) as bigint) as string)) AS BIGINT):bigint>
--- !query 89 output
+-- !query output
 1
 
 
--- !query 90
+-- !query
 select udf(count(*)) from tenk1 x where
   udf(x.unique1) in (select udf(a.f1) from int4_tbl a,float8_tbl b where udf(udf(a.f1))=b.f1) and
   udf(x.unique1) = 0 and
   udf(x.unique1) in (select aa.f1 from int4_tbl aa,float8_tbl bb where aa.f1=udf(udf(bb.f1)))
--- !query 90 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 90 output
+-- !query output
 1
 
 
--- !query 91
+-- !query
 select udf(udf(count(*))) from tenk1 x where
   udf(x.unique1) in (select udf(a.f1) from int4_tbl a,float8_tbl b where udf(udf(a.f1))=b.f1) and
   udf(x.unique1) = 0 and
   udf(udf(x.unique1)) in (select udf(aa.f1) from int4_tbl aa,float8_tbl bb where udf(aa.f1)=udf(udf(bb.f1)))
--- !query 91 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(count(1) as string)) as bigint) as string)) AS BIGINT):bigint>
--- !query 91 output
+-- !query output
 1
 
 
--- !query 92
+-- !query
 select * from int8_tbl i1 left join (int8_tbl i2 join
   (select udf(123) as x) ss on udf(udf(i2.q1)) = udf(x)) on udf(udf(i1.q2)) = udf(udf(i2.q2))
 order by udf(udf(1)), 2
--- !query 92 schema
+-- !query schema
 struct<q1:bigint,q2:bigint,q1:bigint,q2:bigint,x:int>
--- !query 92 output
+-- !query output
 4567890123456789	-4567890123456789	NULL	NULL	NULL
 4567890123456789	123	NULL	NULL	NULL
 123	456	123	456	123
@@ -2377,7 +2377,7 @@ struct<q1:bigint,q2:bigint,q1:bigint,q2:bigint,x:int>
 4567890123456789	4567890123456789	123	4567890123456789	123
 
 
--- !query 93
+-- !query
 select udf(count(*))
 from
   (select udf(t3.tenthous) as x1, udf(coalesce(udf(t1.stringu1), udf(t2.stringu1))) as x2
@@ -2387,32 +2387,32 @@ from
   tenk1 t4,
   tenk1 t5
 where udf(t4.thousand) = udf(t5.unique1) and udf(udf(ss.x1)) = t4.tenthous and udf(ss.x2) = udf(udf(t5.stringu1))
--- !query 93 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 93 output
+-- !query output
 1000
 
 
--- !query 94
+-- !query
 select udf(a.f1), udf(b.f1), udf(t.thousand), udf(t.tenthous) from
   tenk1 t,
   (select udf(udf(sum(udf(f1))+1)) as f1 from int4_tbl i4a) a,
   (select udf(sum(udf(f1))) as f1 from int4_tbl i4b) b
 where b.f1 = udf(t.thousand) and udf(a.f1) = udf(b.f1) and udf((udf(a.f1)+udf(b.f1)+999)) = udf(udf(t.tenthous))
--- !query 94 schema
+-- !query schema
 struct<CAST(udf(cast(f1 as string)) AS BIGINT):bigint,CAST(udf(cast(f1 as string)) AS BIGINT):bigint,CAST(udf(cast(thousand as string)) AS INT):int,CAST(udf(cast(tenthous as string)) AS INT):int>
--- !query 94 output
+-- !query output
 
 
 
--- !query 95
+-- !query
 select * from
   j1_tbl full join
   (select * from j2_tbl order by udf(udf(j2_tbl.i)) desc, udf(j2_tbl.k) asc) j2_tbl
   on udf(j1_tbl.i) = udf(j2_tbl.i) and udf(j1_tbl.i) = udf(j2_tbl.k)
--- !query 95 schema
+-- !query schema
 struct<i:int,j:int,t:string,i:int,k:int>
--- !query 95 output
+-- !query output
 0	NULL	zero	NULL	NULL
 1	4	one	NULL	NULL
 2	3	two	2	2
@@ -2434,156 +2434,156 @@ NULL	NULL	NULL	NULL	NULL
 NULL	NULL	null	NULL	NULL
 
 
--- !query 96
+-- !query
 select udf(count(*)) from
   (select * from tenk1 x order by udf(x.thousand), udf(udf(x.twothousand)), x.fivethous) x
   left join
   (select * from tenk1 y order by udf(y.unique2)) y
   on udf(x.thousand) = y.unique2 and x.twothousand = udf(y.hundred) and x.fivethous = y.unique2
--- !query 96 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 96 output
+-- !query output
 10000
 
 
--- !query 97
+-- !query
 DROP TABLE t1
--- !query 97 schema
+-- !query schema
 struct<>
--- !query 97 output
+-- !query output
 
 
 
--- !query 98
+-- !query
 DROP TABLE t2
--- !query 98 schema
+-- !query schema
 struct<>
--- !query 98 output
+-- !query output
 
 
 
--- !query 99
+-- !query
 DROP TABLE t3
--- !query 99 schema
+-- !query schema
 struct<>
--- !query 99 output
+-- !query output
 
 
 
--- !query 100
+-- !query
 DROP TABLE J1_TBL
--- !query 100 schema
+-- !query schema
 struct<>
--- !query 100 output
+-- !query output
 
 
 
--- !query 101
+-- !query
 DROP TABLE J2_TBL
--- !query 101 schema
+-- !query schema
 struct<>
--- !query 101 output
+-- !query output
 
 
 
--- !query 102
+-- !query
 create or replace temporary view tt1 as select * from
   (values (1, 11), (2, NULL))
   as v(tt1_id, joincol)
--- !query 102 schema
+-- !query schema
 struct<>
--- !query 102 output
+-- !query output
 
 
 
--- !query 103
+-- !query
 create or replace temporary view tt2 as select * from
   (values (21, 11), (22, 11))
   as v(tt2_id, joincol)
--- !query 103 schema
+-- !query schema
 struct<>
--- !query 103 output
+-- !query output
 
 
 
--- !query 104
+-- !query
 select tt1.*, tt2.* from tt1 left join tt2 on udf(udf(tt1.joincol)) = udf(tt2.joincol)
--- !query 104 schema
+-- !query schema
 struct<tt1_id:int,joincol:int,tt2_id:int,joincol:int>
--- !query 104 output
+-- !query output
 1	11	21	11
 1	11	22	11
 2	NULL	NULL	NULL
 
 
--- !query 105
+-- !query
 select tt1.*, tt2.* from tt2 right join tt1 on udf(udf(tt1.joincol)) = udf(udf(tt2.joincol))
--- !query 105 schema
+-- !query schema
 struct<tt1_id:int,joincol:int,tt2_id:int,joincol:int>
--- !query 105 output
+-- !query output
 1	11	21	11
 1	11	22	11
 2	NULL	NULL	NULL
 
 
--- !query 106
+-- !query
 select udf(count(*)) from tenk1 a, tenk1 b
   where udf(a.hundred) = b.thousand and udf(udf((b.fivethous % 10)) < 10)
--- !query 106 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 106 output
+-- !query output
 100000
 
 
--- !query 107
+-- !query
 DROP TABLE IF EXISTS tt3
--- !query 107 schema
+-- !query schema
 struct<>
--- !query 107 output
+-- !query output
 
 
 
--- !query 108
+-- !query
 CREATE TABLE tt3(f1 int, f2 string) USING parquet
--- !query 108 schema
+-- !query schema
 struct<>
--- !query 108 output
+-- !query output
 
 
 
--- !query 109
+-- !query
 INSERT INTO tt3 SELECT x.id, repeat('xyzzy', 100) FROM range(1,10001) x
--- !query 109 schema
+-- !query schema
 struct<>
--- !query 109 output
+-- !query output
 
 
 
--- !query 110
+-- !query
 DROP TABLE IF EXISTS tt4
--- !query 110 schema
+-- !query schema
 struct<>
--- !query 110 output
+-- !query output
 
 
 
--- !query 111
+-- !query
 CREATE TABLE tt4(f1 int) USING parquet
--- !query 111 schema
+-- !query schema
 struct<>
--- !query 111 output
+-- !query output
 
 
 
--- !query 112
+-- !query
 INSERT INTO tt4 VALUES (0),(1),(9999)
--- !query 112 schema
+-- !query schema
 struct<>
--- !query 112 output
+-- !query output
 
 
 
--- !query 113
+-- !query
 SELECT udf(udf(a.f1)) as f1
 FROM tt4 a
 LEFT JOIN (
@@ -2592,242 +2592,242 @@ LEFT JOIN (
         WHERE udf(c.f1) IS NULL
 ) AS d ON udf(a.f1) = d.f1
 WHERE udf(udf(d.f1)) IS NULL
--- !query 113 schema
+-- !query schema
 struct<f1:int>
--- !query 113 output
+-- !query output
 0
 1
 9999
 
 
--- !query 114
+-- !query
 create or replace temporary view tt5 as select * from
   (values (1, 10), (1, 11))
   as v(f1, f2)
--- !query 114 schema
+-- !query schema
 struct<>
--- !query 114 output
+-- !query output
 
 
 
--- !query 115
+-- !query
 create or replace temporary view tt6 as select * from
   (values (1, 9), (1, 2), (2, 9))
   as v(f1, f2)
--- !query 115 schema
+-- !query schema
 struct<>
--- !query 115 output
+-- !query output
 
 
 
--- !query 116
+-- !query
 select * from tt5,tt6 where udf(tt5.f1) = udf(tt6.f1) and udf(tt5.f1) = udf(udf(tt5.f2) - udf(tt6.f2))
--- !query 116 schema
+-- !query schema
 struct<f1:int,f2:int,f1:int,f2:int>
--- !query 116 output
+-- !query output
 1	10	1	9
 
 
--- !query 117
+-- !query
 create or replace temporary view xx as select * from
   (values (1), (2), (3))
   as v(pkxx)
--- !query 117 schema
+-- !query schema
 struct<>
--- !query 117 output
+-- !query output
 
 
 
--- !query 118
+-- !query
 create or replace temporary view yy as select * from
   (values (101, 1), (201, 2), (301, NULL))
   as v(pkyy, pkxx)
--- !query 118 schema
+-- !query schema
 struct<>
--- !query 118 output
+-- !query output
 
 
 
--- !query 119
+-- !query
 select udf(udf(yy.pkyy)) as yy_pkyy, udf(yy.pkxx) as yy_pkxx, udf(yya.pkyy) as yya_pkyy,
        udf(xxa.pkxx) as xxa_pkxx, udf(xxb.pkxx) as xxb_pkxx
 from yy
      left join (SELECT * FROM yy where pkyy = 101) as yya ON udf(yy.pkyy) = udf(yya.pkyy)
      left join xx xxa on udf(yya.pkxx) = udf(udf(xxa.pkxx))
      left join xx xxb on udf(udf(coalesce (xxa.pkxx, 1))) = udf(xxb.pkxx)
--- !query 119 schema
+-- !query schema
 struct<yy_pkyy:int,yy_pkxx:int,yya_pkyy:int,xxa_pkxx:int,xxb_pkxx:int>
--- !query 119 output
+-- !query output
 101	1	101	1	1
 201	2	NULL	NULL	1
 301	NULL	NULL	NULL	1
 
 
--- !query 120
+-- !query
 create or replace temporary view zt1 as select * from
   (values (53))
   as v(f1)
--- !query 120 schema
+-- !query schema
 struct<>
--- !query 120 output
+-- !query output
 
 
 
--- !query 121
+-- !query
 create or replace temporary view zt2 as select * from
   (values (53))
   as v(f2)
--- !query 121 schema
+-- !query schema
 struct<>
--- !query 121 output
+-- !query output
 
 
 
--- !query 122
+-- !query
 create or replace temporary view zt3(f3 int) using parquet
--- !query 122 schema
+-- !query schema
 struct<>
--- !query 122 output
+-- !query output
 
 
 
--- !query 123
+-- !query
 select * from
   zt2 left join zt3 on (udf(f2) = udf(udf(f3)))
       left join zt1 on (udf(udf(f3)) = udf(f1))
 where udf(f2) = 53
--- !query 123 schema
+-- !query schema
 struct<f2:int,f3:int,f1:int>
--- !query 123 output
+-- !query output
 53	NULL	NULL
 
 
--- !query 124
+-- !query
 create temp view zv1 as select *,'dummy' AS junk from zt1
--- !query 124 schema
+-- !query schema
 struct<>
--- !query 124 output
+-- !query output
 
 
 
--- !query 125
+-- !query
 select * from
   zt2 left join zt3 on (f2 = udf(f3))
       left join zv1 on (udf(f3) = f1)
 where udf(udf(f2)) = 53
--- !query 125 schema
+-- !query schema
 struct<f2:int,f3:int,f1:int,junk:string>
--- !query 125 output
+-- !query output
 53	NULL	NULL	NULL
 
 
--- !query 126
+-- !query
 select udf(a.unique2), udf(a.ten), udf(b.tenthous), udf(b.unique2), udf(b.hundred)
 from tenk1 a left join tenk1 b on a.unique2 = udf(b.tenthous)
 where udf(a.unique1) = 42 and
       ((udf(b.unique2) is null and udf(a.ten) = 2) or udf(udf(b.hundred)) = udf(udf(3)))
--- !query 126 schema
+-- !query schema
 struct<CAST(udf(cast(unique2 as string)) AS INT):int,CAST(udf(cast(ten as string)) AS INT):int,CAST(udf(cast(tenthous as string)) AS INT):int,CAST(udf(cast(unique2 as string)) AS INT):int,CAST(udf(cast(hundred as string)) AS INT):int>
--- !query 126 output
+-- !query output
 
 
 
--- !query 127
+-- !query
 create or replace temporary view a (i integer) using parquet
--- !query 127 schema
+-- !query schema
 struct<>
--- !query 127 output
+-- !query output
 
 
 
--- !query 128
+-- !query
 create or replace temporary view b (x integer, y integer) using parquet
--- !query 128 schema
+-- !query schema
 struct<>
--- !query 128 output
+-- !query output
 
 
 
--- !query 129
+-- !query
 select * from a left join b on udf(i) = x and i = udf(y) and udf(x) = udf(i)
--- !query 129 schema
+-- !query schema
 struct<i:int,x:int,y:int>
--- !query 129 output
+-- !query output
 
 
 
--- !query 130
+-- !query
 select udf(t1.q2), udf(count(t2.*))
 from int8_tbl t1 left join int8_tbl t2 on (udf(udf(t1.q2)) = t2.q1)
 group by udf(t1.q2) order by 1
--- !query 130 schema
+-- !query schema
 struct<CAST(udf(cast(q2 as string)) AS BIGINT):bigint,CAST(udf(cast(count(q1, q2) as string)) AS BIGINT):bigint>
--- !query 130 output
+-- !query output
 -4567890123456789	0
 123	2
 456	0
 4567890123456789	6
 
 
--- !query 131
+-- !query
 select udf(udf(t1.q2)), udf(count(t2.*))
 from int8_tbl t1 left join (select * from int8_tbl) t2 on (udf(udf(t1.q2)) = udf(t2.q1))
 group by udf(udf(t1.q2)) order by 1
--- !query 131 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(q2 as string)) as bigint) as string)) AS BIGINT):bigint,CAST(udf(cast(count(q1, q2) as string)) AS BIGINT):bigint>
--- !query 131 output
+-- !query output
 -4567890123456789	0
 123	2
 456	0
 4567890123456789	6
 
 
--- !query 132
+-- !query
 select udf(t1.q2) as q2, udf(udf(count(t2.*)))
 from int8_tbl t1 left join
   (select udf(q1) as q1, case when q2=1 then 1 else q2 end as q2 from int8_tbl) t2
   on (udf(t1.q2) = udf(t2.q1))
 group by t1.q2 order by 1
--- !query 132 schema
+-- !query schema
 struct<q2:bigint,CAST(udf(cast(cast(udf(cast(count(q1, q2) as string)) as bigint) as string)) AS BIGINT):bigint>
--- !query 132 output
+-- !query output
 -4567890123456789	0
 123	2
 456	0
 4567890123456789	6
 
 
--- !query 133
+-- !query
 create or replace temporary view a as select * from
   (values ('p'), ('q'))
   as v(code)
--- !query 133 schema
+-- !query schema
 struct<>
--- !query 133 output
+-- !query output
 
 
 
--- !query 134
+-- !query
 create or replace temporary view b as select * from
   (values ('p', 1), ('p', 2))
   as v(a, num)
--- !query 134 schema
+-- !query schema
 struct<>
--- !query 134 output
+-- !query output
 
 
 
--- !query 135
+-- !query
 create or replace temporary view c as select * from
   (values ('A', 'p'), ('B', 'q'), ('C', null))
   as v(name, a)
--- !query 135 schema
+-- !query schema
 struct<>
--- !query 135 output
+-- !query output
 
 
 
--- !query 136
+-- !query
 select udf(c.name), udf(ss.code), udf(ss.b_cnt), udf(ss.const)
 from c left join
   (select a.code, coalesce(b_grp.cnt, 0) as b_cnt, -1 as const
@@ -2837,15 +2837,15 @@ from c left join
   ) as ss
   on (udf(udf(c.a)) = udf(ss.code))
 order by c.name
--- !query 136 schema
+-- !query schema
 struct<CAST(udf(cast(name as string)) AS STRING):string,CAST(udf(cast(code as string)) AS STRING):string,CAST(udf(cast(b_cnt as string)) AS BIGINT):bigint,CAST(udf(cast(const as string)) AS INT):int>
--- !query 136 output
+-- !query output
 A	p	2	-1
 B	q	0	-1
 C	NULL	NULL	NULL
 
 
--- !query 137
+-- !query
 SELECT * FROM
 ( SELECT 1 as key1 ) sub1
 LEFT JOIN
@@ -2861,13 +2861,13 @@ LEFT JOIN
     ON udf(sub4.key5) = sub3.key3
 ) sub2
 ON udf(udf(sub1.key1)) = udf(udf(sub2.key3))
--- !query 137 schema
+-- !query schema
 struct<key1:int,key3:int,value2:int,value3:int>
--- !query 137 output
+-- !query output
 1	1	1	1
 
 
--- !query 138
+-- !query
 SELECT * FROM
 ( SELECT 1 as key1 ) sub1
 LEFT JOIN
@@ -2883,13 +2883,13 @@ LEFT JOIN
     ON sub4.key5 = sub3.key3
 ) sub2
 ON sub1.key1 = udf(udf(sub2.key3))
--- !query 138 schema
+-- !query schema
 struct<key1:int,key3:int,CAST(udf(cast(value2 as string)) AS INT):int,value3:int>
--- !query 138 output
+-- !query output
 1	1	1	1
 
 
--- !query 139
+-- !query
 SELECT udf(qq), udf(udf(unique1))
   FROM
   ( SELECT udf(COALESCE(q1, 0)) AS qq FROM int8_tbl a ) AS ss1
@@ -2897,45 +2897,45 @@ SELECT udf(qq), udf(udf(unique1))
   ( SELECT udf(udf(COALESCE(q2, -1))) AS qq FROM int8_tbl b ) AS ss2
   USING (qq)
   INNER JOIN tenk1 c ON udf(qq) = udf(unique2)
--- !query 139 schema
+-- !query schema
 struct<CAST(udf(cast(qq as string)) AS BIGINT):bigint,CAST(udf(cast(cast(udf(cast(unique1 as string)) as int) as string)) AS INT):int>
--- !query 139 output
+-- !query output
 123	4596
 123	4596
 456	7318
 
 
--- !query 140
+-- !query
 create or replace temporary view nt1 as select * from
   (values(1,true,true), (2,true,false), (3,false,false))
   as v(id, a1, a2)
--- !query 140 schema
+-- !query schema
 struct<>
--- !query 140 output
+-- !query output
 
 
 
--- !query 141
+-- !query
 create or replace temporary view nt2 as select * from
   (values(1,1,true,true), (2,2,true,false), (3,3,false,false))
   as v(id, nt1_id, b1, b2)
--- !query 141 schema
+-- !query schema
 struct<>
--- !query 141 output
+-- !query output
 
 
 
--- !query 142
+-- !query
 create or replace temporary view nt3 as select * from
   (values(1,1,true), (2,2,false), (3,3,true))
   as v(id, nt2_id, c1)
--- !query 142 schema
+-- !query schema
 struct<>
--- !query 142 output
+-- !query output
 
 
 
--- !query 143
+-- !query
 select udf(nt3.id)
 from nt3 as nt3
   left join
@@ -2947,17 +2947,17 @@ from nt3 as nt3
     ) as ss2
     on udf(ss2.id) = nt3.nt2_id
 where udf(nt3.id) = 1 and udf(ss2.b3)
--- !query 143 schema
+-- !query schema
 struct<CAST(udf(cast(id as string)) AS INT):int>
--- !query 143 output
+-- !query output
 1
 
 
--- !query 144
+-- !query
 select * from int4_tbl a full join int4_tbl b on true
--- !query 144 schema
+-- !query schema
 struct<f1:int,f1:int>
--- !query 144 output
+-- !query output
 -123456	-123456
 -123456	-2147483647
 -123456	0
@@ -2985,11 +2985,11 @@ struct<f1:int,f1:int>
 2147483647	2147483647
 
 
--- !query 145
+-- !query
 select * from int4_tbl a full join int4_tbl b on false
--- !query 145 schema
+-- !query schema
 struct<f1:int,f1:int>
--- !query 145 output
+-- !query output
 -123456	NULL
 -2147483647	NULL
 0	NULL
@@ -3002,27 +3002,27 @@ NULL	123456
 NULL	2147483647
 
 
--- !query 146
+-- !query
 select udf(count(*)) from
   tenk1 a join tenk1 b on udf(a.unique1) = udf(b.unique2)
   left join tenk1 c on udf(a.unique2) = udf(b.unique1) and udf(c.thousand) = udf(udf(a.thousand))
   join int4_tbl on udf(b.thousand) = f1
--- !query 146 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 146 output
+-- !query output
 10
 
 
--- !query 147
+-- !query
 select udf(b.unique1) from
   tenk1 a join tenk1 b on udf(a.unique1) = udf(b.unique2)
   left join tenk1 c on udf(b.unique1) = 42 and c.thousand = udf(a.thousand)
   join int4_tbl i1 on udf(b.thousand) = udf(udf(f1))
   right join int4_tbl i2 on udf(udf(i2.f1)) = udf(b.tenthous)
   order by udf(1)
--- !query 147 schema
+-- !query schema
 struct<CAST(udf(cast(unique1 as string)) AS INT):int>
--- !query 147 output
+-- !query output
 NULL
 NULL
 0
@@ -3030,7 +3030,7 @@ NULL
 NULL
 
 
--- !query 148
+-- !query
 select * from
 (
   select udf(unique1), udf(q1), udf(udf(coalesce(unique1, -1)) + udf(q1)) as fault
@@ -3038,43 +3038,43 @@ select * from
 ) ss
 where udf(fault) = udf(122)
 order by udf(fault)
--- !query 148 schema
+-- !query schema
 struct<CAST(udf(cast(unique1 as string)) AS INT):int,CAST(udf(cast(q1 as string)) AS BIGINT):bigint,fault:bigint>
--- !query 148 output
+-- !query output
 NULL	123	122
 
 
--- !query 149
+-- !query
 select udf(q1), udf(unique2), udf(thousand), udf(hundred)
   from int8_tbl a left join tenk1 b on udf(q1) = udf(unique2)
   where udf(coalesce(thousand,123)) = udf(q1) and udf(q1) = udf(udf(coalesce(hundred,123)))
--- !query 149 schema
+-- !query schema
 struct<CAST(udf(cast(q1 as string)) AS BIGINT):bigint,CAST(udf(cast(unique2 as string)) AS INT):int,CAST(udf(cast(thousand as string)) AS INT):int,CAST(udf(cast(hundred as string)) AS INT):int>
--- !query 149 output
+-- !query output
 
 
 
--- !query 150
+-- !query
 select udf(f1), udf(unique2), case when udf(udf(unique2)) is null then udf(f1) else 0 end
   from int4_tbl a left join tenk1 b on udf(f1) = udf(udf(unique2))
   where (case when udf(unique2) is null then udf(f1) else 0 end) = 0
--- !query 150 schema
+-- !query schema
 struct<CAST(udf(cast(f1 as string)) AS INT):int,CAST(udf(cast(unique2 as string)) AS INT):int,CASE WHEN (CAST(udf(cast(cast(udf(cast(unique2 as string)) as int) as string)) AS INT) IS NULL) THEN CAST(udf(cast(f1 as string)) AS INT) ELSE 0 END:int>
--- !query 150 output
+-- !query output
 0	0	0
 
 
--- !query 151
+-- !query
 select udf(a.unique1), udf(b.unique1), udf(c.unique1), udf(coalesce(b.twothousand, a.twothousand))
   from tenk1 a left join tenk1 b on udf(b.thousand) = a.unique1                       left join tenk1 c on udf(c.unique2) = udf(coalesce(b.twothousand, a.twothousand))
   where a.unique2 < udf(10) and udf(udf(coalesce(b.twothousand, a.twothousand))) = udf(44)
--- !query 151 schema
+-- !query schema
 struct<CAST(udf(cast(unique1 as string)) AS INT):int,CAST(udf(cast(unique1 as string)) AS INT):int,CAST(udf(cast(unique1 as string)) AS INT):int,CAST(udf(cast(coalesce(twothousand, twothousand) as string)) AS INT):int>
--- !query 151 output
+-- !query output
 
 
 
--- !query 152
+-- !query
 select * from
   text_tbl t1
   inner join int8_tbl i8
@@ -3083,32 +3083,32 @@ select * from
   on udf(t1.f1) = udf(udf('doh!'))
   left join int4_tbl i4
   on udf(udf(i8.q1)) = i4.f1
--- !query 152 schema
+-- !query schema
 struct<f1:string,q1:bigint,q2:bigint,f1:string,f1:int>
--- !query 152 output
+-- !query output
 doh!	123	456	doh!	NULL
 doh!	123	456	hi de ho neighbor	NULL
 
 
--- !query 153
+-- !query
 select * from
   (select udf(udf(1)) as id) as xx
   left join
     (tenk1 as a1 full join (select udf(1) as id) as yy on (udf(a1.unique1) = udf(yy.id)))
   on (xx.id = udf(udf(coalesce(yy.id))))
--- !query 153 schema
+-- !query schema
 struct<id:int,unique1:int,unique2:int,two:int,four:int,ten:int,twenty:int,hundred:int,thousand:int,twothousand:int,fivethous:int,tenthous:int,odd:int,even:int,stringu1:string,stringu2:string,string4:string,id:int>
--- !query 153 output
+-- !query output
 1	1	2838	1	1	1	1	1	1	1	1	1	2	3	BAAAAA	EFEAAA	OOOOxx	1
 
 
--- !query 154
+-- !query
 select udf(a.q2), udf(b.q1)
   from int8_tbl a left join int8_tbl b on udf(a.q2) = coalesce(b.q1, 1)
   where udf(udf(coalesce(b.q1, 1)) > 0)
--- !query 154 schema
+-- !query schema
 struct<CAST(udf(cast(q2 as string)) AS BIGINT):bigint,CAST(udf(cast(q1 as string)) AS BIGINT):bigint>
--- !query 154 output
+-- !query output
 -4567890123456789	NULL
 123	123
 123	123
@@ -3121,124 +3121,124 @@ struct<CAST(udf(cast(q2 as string)) AS BIGINT):bigint,CAST(udf(cast(q1 as string
 4567890123456789	4567890123456789
 
 
--- !query 155
+-- !query
 create or replace temporary view parent as select * from
   (values (1, 10), (2, 20), (3, 30))
   as v(k, pd)
--- !query 155 schema
+-- !query schema
 struct<>
--- !query 155 output
+-- !query output
 
 
 
--- !query 156
+-- !query
 create or replace temporary view child as select * from
   (values (1, 100), (4, 400))
   as v(k, cd)
--- !query 156 schema
+-- !query schema
 struct<>
--- !query 156 output
+-- !query output
 
 
 
--- !query 157
+-- !query
 select p.* from parent p left join child c on (udf(p.k) = udf(c.k))
--- !query 157 schema
+-- !query schema
 struct<k:int,pd:int>
--- !query 157 output
+-- !query output
 1	10
 2	20
 3	30
 
 
--- !query 158
+-- !query
 select p.*, linked from parent p
   left join (select c.*, udf(udf(true)) as linked from child c) as ss
   on (udf(p.k) = udf(udf(ss.k)))
--- !query 158 schema
+-- !query schema
 struct<k:int,pd:int,linked:boolean>
--- !query 158 output
+-- !query output
 1	10	true
 2	20	NULL
 3	30	NULL
 
 
--- !query 159
+-- !query
 select p.* from
   parent p left join child c on (udf(p.k) = c.k)
   where p.k = udf(1) and udf(udf(p.k)) = udf(udf(2))
--- !query 159 schema
+-- !query schema
 struct<k:int,pd:int>
--- !query 159 output
+-- !query output
 
 
 
--- !query 160
+-- !query
 select p.* from
   (parent p left join child c on (udf(p.k) = c.k)) join parent x on p.k = udf(x.k)
   where udf(p.k) = udf(1) and udf(udf(p.k)) = udf(udf(2))
--- !query 160 schema
+-- !query schema
 struct<k:int,pd:int>
--- !query 160 output
+-- !query output
 
 
 
--- !query 161
+-- !query
 create or replace temporary view a as select * from
   (values (0), (1))
   as v(id)
--- !query 161 schema
+-- !query schema
 struct<>
--- !query 161 output
+-- !query output
 
 
 
--- !query 162
+-- !query
 create or replace temporary view b as select * from
   (values (0, 0), (1, NULL))
   as v(id, a_id)
--- !query 162 schema
+-- !query schema
 struct<>
--- !query 162 output
+-- !query output
 
 
 
--- !query 163
+-- !query
 SELECT * FROM b LEFT JOIN a ON (udf(b.a_id) = udf(a.id)) WHERE (udf(udf(a.id)) IS NULL OR udf(a.id) > 0)
--- !query 163 schema
+-- !query schema
 struct<id:int,a_id:int,id:int>
--- !query 163 output
+-- !query output
 1	NULL	NULL
 
 
--- !query 164
+-- !query
 SELECT b.* FROM b LEFT JOIN a ON (udf(b.a_id) = udf(a.id)) WHERE (udf(a.id) IS NULL OR udf(udf(a.id)) > 0)
--- !query 164 schema
+-- !query schema
 struct<id:int,a_id:int>
--- !query 164 output
+-- !query output
 1	NULL
 
 
--- !query 165
+-- !query
 create or replace temporary view innertab as select * from
   (values (123L, 42L))
   as v(id, dat1)
--- !query 165 schema
+-- !query schema
 struct<>
--- !query 165 output
+-- !query output
 
 
 
--- !query 166
+-- !query
 SELECT * FROM
     (SELECT udf(1) AS x) ss1
   LEFT JOIN
     (SELECT udf(q1), udf(q2), udf(COALESCE(dat1, q1)) AS y
      FROM int8_tbl LEFT JOIN innertab ON udf(udf(q2)) = id) ss2
   ON true
--- !query 166 schema
+-- !query schema
 struct<x:int,CAST(udf(cast(q1 as string)) AS BIGINT):bigint,CAST(udf(cast(q2 as string)) AS BIGINT):bigint,y:bigint>
--- !query 166 output
+-- !query output
 1	123	456	123
 1	123	4567890123456789	123
 1	4567890123456789	-4567890123456789	4567890123456789
@@ -3246,163 +3246,163 @@ struct<x:int,CAST(udf(cast(q1 as string)) AS BIGINT):bigint,CAST(udf(cast(q2 as
 1	4567890123456789	4567890123456789	4567890123456789
 
 
--- !query 167
+-- !query
 select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y) j on udf(q1) = udf(f1)
--- !query 167 schema
+-- !query schema
 struct<>
--- !query 167 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 72
 
 
--- !query 168
+-- !query
 select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y) j on udf(q1) = udf(y.f1)
--- !query 168 schema
+-- !query schema
 struct<>
--- !query 168 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`y.f1`' given input columns: [j.f1, j.f1, x.q1, x.q2]; line 2 pos 72
 
 
--- !query 169
+-- !query
 select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on udf(q1) = udf(udf(f1))
--- !query 169 schema
+-- !query schema
 struct<q1:bigint,q2:bigint,f1:int,ff:int>
--- !query 169 output
+-- !query output
 
 
 
--- !query 170
+-- !query
 select udf(t1.uunique1) from
   tenk1 t1 join tenk2 t2 on t1.two = udf(t2.two)
--- !query 170 schema
+-- !query schema
 struct<>
--- !query 170 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`t1.uunique1`' given input columns: [t1.even, t2.even, t1.fivethous, t2.fivethous, t1.four, t2.four, t1.hundred, t2.hundred, t1.odd, t2.odd, t1.string4, t2.string4, t1.stringu1, t2.stringu1, t1.stringu2, t2.stringu2, t1.ten, t2.ten, t1.tenthous, t2.tenthous, t1.thousand, t2.thousand, t1.twenty, t2.twenty, t1.two, t2.two, t1.twothousand, t2.twothousand, t1.unique1, t2.unique1, t1.unique2, t2.unique2]; line 1 pos 11
 
 
--- !query 171
+-- !query
 select udf(udf(t2.uunique1)) from
   tenk1 t1 join tenk2 t2 on udf(t1.two) = t2.two
--- !query 171 schema
+-- !query schema
 struct<>
--- !query 171 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`t2.uunique1`' given input columns: [t1.even, t2.even, t1.fivethous, t2.fivethous, t1.four, t2.four, t1.hundred, t2.hundred, t1.odd, t2.odd, t1.string4, t2.string4, t1.stringu1, t2.stringu1, t1.stringu2, t2.stringu2, t1.ten, t2.ten, t1.tenthous, t2.tenthous, t1.thousand, t2.thousand, t1.twenty, t2.twenty, t1.two, t2.two, t1.twothousand, t2.twothousand, t1.unique1, t2.unique1, t1.unique2, t2.unique2]; line 1 pos 15
 
 
--- !query 172
+-- !query
 select udf(uunique1) from
   tenk1 t1 join tenk2 t2 on udf(t1.two) = udf(t2.two)
--- !query 172 schema
+-- !query schema
 struct<>
--- !query 172 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`uunique1`' given input columns: [t1.even, t2.even, t1.fivethous, t2.fivethous, t1.four, t2.four, t1.hundred, t2.hundred, t1.odd, t2.odd, t1.string4, t2.string4, t1.stringu1, t2.stringu1, t1.stringu2, t2.stringu2, t1.ten, t2.ten, t1.tenthous, t2.tenthous, t1.thousand, t2.thousand, t1.twenty, t2.twenty, t1.two, t2.two, t1.twothousand, t2.twothousand, t1.unique1, t2.unique1, t1.unique2, t2.unique2]; line 1 pos 11
 
 
--- !query 173
+-- !query
 select udf(udf(f1,g)) from int4_tbl a, (select udf(udf(f1)) as g) ss
--- !query 173 schema
+-- !query schema
 struct<>
--- !query 173 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`f1`' given input columns: []; line 1 pos 55
 
 
--- !query 174
+-- !query
 select udf(f1,g) from int4_tbl a, (select a.f1 as g) ss
--- !query 174 schema
+-- !query schema
 struct<>
--- !query 174 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`a.f1`' given input columns: []; line 1 pos 42
 
 
--- !query 175
+-- !query
 select udf(udf(f1,g)) from int4_tbl a cross join (select udf(f1) as g) ss
--- !query 175 schema
+-- !query schema
 struct<>
--- !query 175 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`f1`' given input columns: []; line 1 pos 61
 
 
--- !query 176
+-- !query
 select udf(f1,g) from int4_tbl a cross join (select udf(udf(a.f1)) as g) ss
--- !query 176 schema
+-- !query schema
 struct<>
--- !query 176 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`a.f1`' given input columns: []; line 1 pos 60
 
 
--- !query 177
+-- !query
 CREATE TABLE j1 (id1 int, id2 int) USING parquet
--- !query 177 schema
+-- !query schema
 struct<>
--- !query 177 output
+-- !query output
 
 
 
--- !query 178
+-- !query
 CREATE TABLE j2 (id1 int, id2 int) USING parquet
--- !query 178 schema
+-- !query schema
 struct<>
--- !query 178 output
+-- !query output
 
 
 
--- !query 179
+-- !query
 INSERT INTO j1 values(1,1),(1,2)
--- !query 179 schema
+-- !query schema
 struct<>
--- !query 179 output
+-- !query output
 
 
 
--- !query 180
+-- !query
 INSERT INTO j2 values(1,1)
--- !query 180 schema
+-- !query schema
 struct<>
--- !query 180 output
+-- !query output
 
 
 
--- !query 181
+-- !query
 INSERT INTO j2 values(1,2)
--- !query 181 schema
+-- !query schema
 struct<>
--- !query 181 output
+-- !query output
 
 
 
--- !query 182
+-- !query
 select * from j1
 inner join j2 on udf(j1.id1) = udf(j2.id1) and udf(udf(j1.id2)) = udf(j2.id2)
 where udf(j1.id1) % 1000 = 1 and udf(udf(j2.id1) % 1000) = 1
--- !query 182 schema
+-- !query schema
 struct<id1:int,id2:int,id1:int,id2:int>
--- !query 182 output
+-- !query output
 1	1	1	1
 1	2	1	2
 
 
--- !query 183
+-- !query
 drop table j1
--- !query 183 schema
+-- !query schema
 struct<>
--- !query 183 output
+-- !query output
 
 
 
--- !query 184
+-- !query
 drop table j2
--- !query 184 schema
+-- !query schema
 struct<>
--- !query 184 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
similarity index 72%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_having.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
index f731d11c6d3da..68113afdfae30 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
@@ -2,186 +2,186 @@
 -- Number of queries: 22
 
 
--- !query 0
+-- !query
 CREATE TABLE test_having (a int, b int, c string, d string) USING parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 INSERT INTO test_having VALUES (0, 1, 'XXXX', 'A')
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 INSERT INTO test_having VALUES (1, 2, 'AAAA', 'b')
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 INSERT INTO test_having VALUES (2, 2, 'AAAA', 'c')
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 INSERT INTO test_having VALUES (3, 3, 'BBBB', 'D')
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 INSERT INTO test_having VALUES (4, 3, 'BBBB', 'e')
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 INSERT INTO test_having VALUES (5, 3, 'bbbb', 'F')
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 INSERT INTO test_having VALUES (6, 4, 'cccc', 'g')
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 INSERT INTO test_having VALUES (7, 4, 'cccc', 'h')
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j')
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 SELECT udf(b), udf(c) FROM test_having
 	GROUP BY b, c HAVING udf(count(*)) = 1 ORDER BY udf(b), udf(c)
--- !query 11 schema
+-- !query schema
 struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(c as string)) AS STRING):string>
--- !query 11 output
+-- !query output
 1	XXXX
 3	bbbb
 
 
--- !query 12
+-- !query
 SELECT udf(b), udf(c) FROM test_having
 	GROUP BY b, c HAVING udf(b) = 3 ORDER BY udf(b), udf(c)
--- !query 12 schema
+-- !query schema
 struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(c as string)) AS STRING):string>
--- !query 12 output
+-- !query output
 3	BBBB
 3	bbbb
 
 
--- !query 13
+-- !query
 SELECT udf(c), max(udf(a)) FROM test_having
 	GROUP BY c HAVING udf(count(*)) > 2 OR udf(min(a)) = udf(max(a))
 	ORDER BY c
--- !query 13 schema
+-- !query schema
 struct<CAST(udf(cast(c as string)) AS STRING):string,max(CAST(udf(cast(a as string)) AS INT)):int>
--- !query 13 output
+-- !query output
 XXXX	0
 bbbb	5
 
 
--- !query 14
+-- !query
 SELECT udf(udf(min(udf(a)))), udf(udf(max(udf(a)))) FROM test_having HAVING udf(udf(min(udf(a)))) = udf(udf(max(udf(a))))
--- !query 14 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(min(cast(udf(cast(a as string)) as int)) as string)) as int) as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(max(cast(udf(cast(a as string)) as int)) as string)) as int) as string)) AS INT):int>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 SELECT udf(min(udf(a))), udf(udf(max(a))) FROM test_having HAVING udf(min(a)) < udf(max(udf(a)))
--- !query 15 schema
+-- !query schema
 struct<CAST(udf(cast(min(cast(udf(cast(a as string)) as int)) as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(max(a) as string)) as int) as string)) AS INT):int>
--- !query 15 output
+-- !query output
 0	9
 
 
--- !query 16
+-- !query
 SELECT udf(a) FROM test_having HAVING udf(min(a)) < udf(max(a))
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and 'default.test_having.`a`' is not an aggregate function. Wrap '(min(default.test_having.`a`) AS `min(a#x)`, max(default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'default.test_having.`a`' in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 17
+-- !query
 SELECT 1 AS one FROM test_having HAVING udf(a) > 1
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`a`' given input columns: [one]; line 1 pos 44
 
 
--- !query 18
+-- !query
 SELECT 1 AS one FROM test_having HAVING udf(udf(1) > udf(2))
--- !query 18 schema
+-- !query schema
 struct<one:int>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 SELECT 1 AS one FROM test_having HAVING udf(udf(1) < udf(2))
--- !query 19 schema
+-- !query schema
 struct<one:int>
--- !query 19 output
+-- !query output
 1
 
 
--- !query 20
+-- !query
 SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2
--- !query 20 schema
+-- !query schema
 struct<one:int>
--- !query 20 output
+-- !query output
 1
 
 
--- !query 21
+-- !query
 DROP TABLE test_having
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
similarity index 71%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
index a60cbf33b9b24..11cb682ee1494 100755
--- a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
@@ -2,101 +2,101 @@
 -- Number of queries: 38
 
 
--- !query 0
+-- !query
 CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 INSERT INTO test_missing_target VALUES (0, 1, 'XXXX', 'A')
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 INSERT INTO test_missing_target VALUES (1, 2, 'ABAB', 'b')
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 INSERT INTO test_missing_target VALUES (2, 2, 'ABAB', 'c')
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 INSERT INTO test_missing_target VALUES (3, 3, 'BBBB', 'D')
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 INSERT INTO test_missing_target VALUES (4, 3, 'BBBB', 'e')
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 INSERT INTO test_missing_target VALUES (5, 3, 'bbbb', 'F')
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 INSERT INTO test_missing_target VALUES (6, 4, 'cccc', 'g')
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 INSERT INTO test_missing_target VALUES (7, 4, 'cccc', 'h')
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 INSERT INTO test_missing_target VALUES (8, 4, 'CCCC', 'I')
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 INSERT INTO test_missing_target VALUES (9, 4, 'CCCC', 'j')
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY
 udf(test_missing_target.c)
 ORDER BY udf(c)
--- !query 11 schema
+-- !query schema
 struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 11 output
+-- !query output
 ABAB	2
 BBBB	2
 CCCC	2
@@ -105,12 +105,12 @@ bbbb	1
 cccc	2
 
 
--- !query 12
+-- !query
 SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(test_missing_target.c)
 ORDER BY udf(c)
--- !query 12 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 12 output
+-- !query output
 2
 2
 2
@@ -119,43 +119,43 @@ struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
 2
 
 
--- !query 13
+-- !query
 SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b)
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`b`' given input columns: [CAST(udf(cast(count(1) as string)) AS BIGINT)]; line 1 pos 75
 
 
--- !query 14
+-- !query
 SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b)
--- !query 14 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 14 output
+-- !query output
 1
 2
 3
 4
 
 
--- !query 15
+-- !query
 SELECT udf(test_missing_target.b), udf(count(*))
   FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b)
--- !query 15 schema
+-- !query schema
 struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 15 output
+-- !query output
 1	1
 2	2
 3	3
 4	4
 
 
--- !query 16
+-- !query
 SELECT udf(c) FROM test_missing_target ORDER BY udf(a)
--- !query 16 schema
+-- !query schema
 struct<CAST(udf(cast(c as string)) AS STRING):string>
--- !query 16 output
+-- !query output
 XXXX
 ABAB
 ABAB
@@ -168,30 +168,30 @@ CCCC
 CCCC
 
 
--- !query 17
+-- !query
 SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(b) ORDER BY udf(b) desc
--- !query 17 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 17 output
+-- !query output
 4
 3
 2
 1
 
 
--- !query 18
+-- !query
 SELECT udf(count(*)) FROM test_missing_target ORDER BY udf(1) desc
--- !query 18 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 18 output
+-- !query output
 10
 
 
--- !query 19
+-- !query
 SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 1 ORDER BY 1
--- !query 19 schema
+-- !query schema
 struct<CAST(udf(cast(c as string)) AS STRING):string,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 19 output
+-- !query output
 ABAB	2
 BBBB	2
 CCCC	2
@@ -200,32 +200,32 @@ bbbb	1
 cccc	2
 
 
--- !query 20
+-- !query
 SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63
 
 
--- !query 21
+-- !query
 SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
 	WHERE udf(x.a) = udf(y.a)
 	GROUP BY udf(b) ORDER BY udf(b)
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
 
 
--- !query 22
+-- !query
 SELECT udf(a), udf(a) FROM test_missing_target
 	ORDER BY udf(a)
--- !query 22 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
--- !query 22 output
+-- !query output
 0	0
 1	1
 2	2
@@ -238,123 +238,129 @@ struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS IN
 9	9
 
 
--- !query 23
+-- !query
 SELECT udf(udf(a)/2), udf(udf(a)/2) FROM test_missing_target
 	ORDER BY udf(udf(a)/2)
--- !query 23 schema
-struct<CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int,CAST(udf(cast((cast(udf(cast(a as string)) as int) div 2) as string)) AS INT):int>
--- !query 23 output
-0	0
-0	0
-1	1
-1	1
-2	2
-2	2
-3	3
-3	3
-4	4
-4	4
-
-
--- !query 24
+-- !query schema
+struct<CAST(udf(cast((cast(cast(udf(cast(a as string)) as int) as double) / cast(2 as double)) as string)) AS DOUBLE):double,CAST(udf(cast((cast(cast(udf(cast(a as string)) as int) as double) / cast(2 as double)) as string)) AS DOUBLE):double>
+-- !query output
+0.0	0.0
+0.5	0.5
+1.0	1.0
+1.5	1.5
+2.0	2.0
+2.5	2.5
+3.0	3.0
+3.5	3.5
+4.0	4.0
+4.5	4.5
+
+
+-- !query
 SELECT udf(a/2), udf(a/2) FROM test_missing_target
 	GROUP BY udf(a/2) ORDER BY udf(a/2)
--- !query 24 schema
-struct<CAST(udf(cast((a div 2) as string)) AS INT):int,CAST(udf(cast((a div 2) as string)) AS INT):int>
--- !query 24 output
-0	0
-1	1
-2	2
-3	3
-4	4
-
-
--- !query 25
+-- !query schema
+struct<CAST(udf(cast((cast(a as double) / cast(2 as double)) as string)) AS DOUBLE):double,CAST(udf(cast((cast(a as double) / cast(2 as double)) as string)) AS DOUBLE):double>
+-- !query output
+0.0	0.0
+0.5	0.5
+1.0	1.0
+1.5	1.5
+2.0	2.0
+2.5	2.5
+3.0	3.0
+3.5	3.5
+4.0	4.0
+4.5	4.5
+
+
+-- !query
 SELECT udf(x.b), udf(count(*)) FROM test_missing_target x, test_missing_target y
 	WHERE udf(x.a) = udf(y.a)
 	GROUP BY udf(x.b) ORDER BY udf(x.b)
--- !query 25 schema
+-- !query schema
 struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 25 output
+-- !query output
 1	1
 2	2
 3	3
 4	4
 
 
--- !query 26
+-- !query
 SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
 	WHERE udf(x.a) = udf(y.a)
 	GROUP BY udf(x.b) ORDER BY udf(x.b)
--- !query 26 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 26 output
+-- !query output
 1
 2
 3
 4
 
 
--- !query 27
+-- !query
 SELECT udf(a%2), udf(count(udf(b))) FROM test_missing_target
 GROUP BY udf(test_missing_target.a%2)
 ORDER BY udf(test_missing_target.a%2)
--- !query 27 schema
+-- !query schema
 struct<CAST(udf(cast((a % 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
--- !query 27 output
+-- !query output
 0	5
 1	5
 
 
--- !query 28
+-- !query
 SELECT udf(count(c)) FROM test_missing_target
 GROUP BY udf(lower(test_missing_target.c))
 ORDER BY udf(lower(test_missing_target.c))
--- !query 28 schema
+-- !query schema
 struct<CAST(udf(cast(count(c) as string)) AS BIGINT):bigint>
--- !query 28 output
+-- !query output
 2
 3
 4
 1
 
 
--- !query 29
+-- !query
 SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b)
--- !query 29 schema
+-- !query schema
 struct<>
--- !query 29 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`b`' given input columns: [CAST(udf(cast(count(cast(udf(cast(a as string)) as int)) as string)) AS BIGINT)]; line 1 pos 80
 
 
--- !query 30
+-- !query
 SELECT udf(count(b)) FROM test_missing_target GROUP BY udf(b/2) ORDER BY udf(b/2)
--- !query 30 schema
+-- !query schema
 struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
--- !query 30 output
+-- !query output
 1
-5
+2
+3
 4
 
 
--- !query 31
+-- !query
 SELECT udf(lower(test_missing_target.c)), udf(count(udf(c)))
   FROM test_missing_target GROUP BY udf(lower(c)) ORDER BY udf(lower(c))
--- !query 31 schema
+-- !query schema
 struct<CAST(udf(cast(lower(c) as string)) AS STRING):string,CAST(udf(cast(count(cast(udf(cast(c as string)) as string)) as string)) AS BIGINT):bigint>
--- !query 31 output
+-- !query output
 abab	2
 bbbb	3
 cccc	4
 xxxx	1
 
 
--- !query 32
+-- !query
 SELECT udf(a) FROM test_missing_target ORDER BY udf(upper(udf(d)))
--- !query 32 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int>
--- !query 32 output
+-- !query output
 0
 1
 2
@@ -367,54 +373,57 @@ struct<CAST(udf(cast(a as string)) AS INT):int>
 9
 
 
--- !query 33
+-- !query
 SELECT udf(count(b)) FROM test_missing_target
 	GROUP BY udf((b + 1) / 2) ORDER BY udf((b + 1) / 2) desc
--- !query 33 schema
+-- !query schema
 struct<CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
--- !query 33 output
-7
+-- !query output
+4
 3
+2
+1
 
 
--- !query 34
+-- !query
 SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
 	WHERE udf(x.a) = udf(y.a)
 	GROUP BY udf(b/2) ORDER BY udf(b/2)
--- !query 34 schema
+-- !query schema
 struct<>
--- !query 34 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
 
 
--- !query 35
+-- !query
 SELECT udf(x.b/2), udf(count(udf(x.b))) FROM test_missing_target x,
 test_missing_target y
 	WHERE udf(x.a) = udf(y.a)
 	GROUP BY udf(x.b/2) ORDER BY udf(x.b/2)
--- !query 35 schema
-struct<CAST(udf(cast((b div 2) as string)) AS INT):int,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
--- !query 35 output
-0	1
-1	5
-2	4
+-- !query schema
+struct<CAST(udf(cast((cast(b as double) / cast(2 as double)) as string)) AS DOUBLE):double,CAST(udf(cast(count(cast(udf(cast(b as string)) as int)) as string)) AS BIGINT):bigint>
+-- !query output
+0.5	1
+1.0	2
+1.5	3
+2.0	4
 
 
--- !query 36
+-- !query
 SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
 	WHERE udf(x.a) = udf(y.a)
 	GROUP BY udf(x.b/2)
--- !query 36 schema
+-- !query schema
 struct<>
--- !query 36 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21
 
 
--- !query 37
+-- !query
 DROP TABLE test_missing_target
--- !query 37 schema
+-- !query schema
 struct<>
--- !query 37 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out
index 3d7c64054a6ac..e66948dcdea34 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out
@@ -2,27 +2,27 @@
 -- Number of queries: 5
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
 AS testData(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT
   udf(count(*)), udf(count(1)), udf(count(null)), udf(count(a)), udf(count(b)), udf(count(a + b)), udf(count((a, b)))
 FROM testData
--- !query 1 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint,CAST(udf(cast(count(1) as string)) AS BIGINT):bigint,CAST(udf(cast(count(null) as string)) AS BIGINT):bigint,CAST(udf(cast(count(a) as string)) AS BIGINT):bigint,CAST(udf(cast(count(b) as string)) AS BIGINT):bigint,CAST(udf(cast(count((a + b)) as string)) AS BIGINT):bigint,CAST(udf(cast(count(named_struct(a, a, b, b)) as string)) AS BIGINT):bigint>
--- !query 1 output
+-- !query output
 7	7	0	5	5	4	7
 
 
--- !query 2
+-- !query
 SELECT
   udf(count(DISTINCT 1)),
   udf(count(DISTINCT null)),
@@ -31,25 +31,25 @@ SELECT
   udf(count(DISTINCT (a + b))),
   udf(count(DISTINCT (a, b)))
 FROM testData
--- !query 2 schema
+-- !query schema
 struct<CAST(udf(cast(count(distinct 1) as string)) AS BIGINT):bigint,CAST(udf(cast(count(distinct null) as string)) AS BIGINT):bigint,CAST(udf(cast(count(distinct a) as string)) AS BIGINT):bigint,CAST(udf(cast(count(distinct b) as string)) AS BIGINT):bigint,CAST(udf(cast(count(distinct (a + b)) as string)) AS BIGINT):bigint,CAST(udf(cast(count(distinct named_struct(a, a, b, b)) as string)) AS BIGINT):bigint>
--- !query 2 output
+-- !query output
 1	0	2	2	2	6
 
 
--- !query 3
+-- !query
 SELECT udf(count(a, b)), udf(count(b, a)), udf(count(testData.*)) FROM testData
--- !query 3 schema
+-- !query schema
 struct<CAST(udf(cast(count(a, b) as string)) AS BIGINT):bigint,CAST(udf(cast(count(b, a) as string)) AS BIGINT):bigint,CAST(udf(cast(count(a, b) as string)) AS BIGINT):bigint>
--- !query 3 output
+-- !query output
 4	4	4
 
 
--- !query 4
+-- !query
 SELECT
   udf(count(DISTINCT a, b)), udf(count(DISTINCT b, a)), udf(count(DISTINCT *)), udf(count(DISTINCT testData.*))
 FROM testData
--- !query 4 schema
+-- !query schema
 struct<CAST(udf(cast(count(distinct a, b) as string)) AS BIGINT):bigint,CAST(udf(cast(count(distinct b, a) as string)) AS BIGINT):bigint,CAST(udf(cast(count(distinct a, b) as string)) AS BIGINT):bigint,CAST(udf(cast(count(distinct a, b) as string)) AS BIGINT):bigint>
--- !query 4 output
+-- !query output
 3	3	3	3
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out
index 98d3ad37a8dfa..fdddfc55978b4 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out
@@ -2,35 +2,35 @@
 -- Number of queries: 13
 
 
--- !query 0
+-- !query
 create temporary view nt1 as select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3)
   as nt1(k, v1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view nt2 as select * from values
   ("one", 1),
   ("two", 22),
   ("one", 5)
   as nt2(k, v2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT * FROM nt1 cross join nt2
--- !query 2 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 2 output
+-- !query output
 one	1	one	1
 one	1	one	5
 one	1	two	22
@@ -42,82 +42,82 @@ two	2	one	5
 two	2	two	22
 
 
--- !query 3
+-- !query
 SELECT * FROM nt1 cross join nt2 where udf(nt1.k) = udf(nt2.k)
--- !query 3 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 3 output
+-- !query output
 one	1	one	1
 one	1	one	5
 two	2	two	22
 
 
--- !query 4
+-- !query
 SELECT * FROM nt1 cross join nt2 on (udf(nt1.k) = udf(nt2.k))
--- !query 4 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 4 output
+-- !query output
 one	1	one	1
 one	1	one	5
 two	2	two	22
 
 
--- !query 5
+-- !query
 SELECT * FROM nt1 cross join nt2 where udf(nt1.v1) = "1" and udf(nt2.v2) = "22"
--- !query 5 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 5 output
+-- !query output
 one	1	two	22
 
 
--- !query 6
+-- !query
 SELECT udf(a.key), udf(b.key) FROM
 (SELECT udf(k) key FROM nt1 WHERE v1 < 2) a
 CROSS JOIN
 (SELECT udf(k) key FROM nt2 WHERE v2 = 22) b
--- !query 6 schema
+-- !query schema
 struct<CAST(udf(cast(key as string)) AS STRING):string,CAST(udf(cast(key as string)) AS STRING):string>
--- !query 6 output
+-- !query output
 one	two
 
 
--- !query 7
+-- !query
 create temporary view A(a, va) as select * from nt1
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 create temporary view B(b, vb) as select * from nt1
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 create temporary view C(c, vc) as select * from nt1
--- !query 9 schema
+-- !query schema
 struct<>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 create temporary view D(d, vd) as select * from nt1
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 select * from ((A join B on (udf(a) = udf(b))) cross join C) join D on (udf(a) = udf(d))
--- !query 11 schema
+-- !query schema
 struct<a:string,va:int,b:string,vb:int,c:string,vc:int,d:string,vd:int>
--- !query 11 output
+-- !query output
 one	1	one	1	one	1	one	1
 one	1	one	1	three	3	one	1
 one	1	one	1	two	2	one	1
@@ -129,11 +129,11 @@ two	2	two	2	three	3	two	2
 two	2	two	2	two	2	two	2
 
 
--- !query 12
+-- !query
 SELECT * FROM nt1 CROSS JOIN nt2 ON (udf(nt1.k) > udf(nt2.k))
--- !query 12 schema
+-- !query schema
 struct<k:string,v1:int,k:string,v2:int>
--- !query 12 output
+-- !query output
 three	3	one	1
 three	3	one	5
 two	2	one	1
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
index b7bfad0e538ac..2613120e004df 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
@@ -2,25 +2,25 @@
 -- Number of queries: 27
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW tab2 AS SELECT * FROM VALUES
     (1), (2), (2), (3), (5), (5), (null) AS tab2(c1)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW tab3 AS SELECT * FROM VALUES
     (1, 2), 
     (1, 2),
@@ -28,13 +28,13 @@ CREATE TEMPORARY VIEW tab3 AS SELECT * FROM VALUES
     (2, 3),
     (2, 2)
     AS tab3(k, v)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE TEMPORARY VIEW tab4 AS SELECT * FROM VALUES
     (1, 2), 
     (2, 3),
@@ -42,45 +42,45 @@ CREATE TEMPORARY VIEW tab4 AS SELECT * FROM VALUES
     (2, 2),
     (2, 20)
     AS tab4(k, v)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 SELECT udf(c1) FROM tab1
 EXCEPT ALL
 SELECT udf(c1) FROM tab2
--- !query 4 schema
+-- !query schema
 struct<CAST(udf(cast(c1 as string)) AS INT):int>
--- !query 4 output
+-- !query output
 0
 2
 2
 NULL
 
 
--- !query 5
+-- !query
 SELECT udf(c1) FROM tab1
 MINUS ALL
 SELECT udf(c1) FROM tab2
--- !query 5 schema
+-- !query schema
 struct<CAST(udf(cast(c1 as string)) AS INT):int>
--- !query 5 output
+-- !query output
 0
 2
 2
 NULL
 
 
--- !query 6
+-- !query
 SELECT udf(c1) FROM tab1
 EXCEPT ALL
 SELECT udf(c1) FROM tab2 WHERE udf(c1) IS NOT NULL
--- !query 6 schema
+-- !query schema
 struct<CAST(udf(cast(c1 as string)) AS INT):int>
--- !query 6 output
+-- !query output
 0
 2
 2
@@ -88,23 +88,23 @@ NULL
 NULL
 
 
--- !query 7
+-- !query
 SELECT udf(c1) FROM tab1 WHERE udf(c1) > 5
 EXCEPT ALL
 SELECT udf(c1) FROM tab2
--- !query 7 schema
+-- !query schema
 struct<CAST(udf(cast(c1 as string)) AS INT):int>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 SELECT udf(c1) FROM tab1
 EXCEPT ALL
 SELECT udf(c1) FROM tab2 WHERE udf(c1 > udf(6))
--- !query 8 schema
+-- !query schema
 struct<CAST(udf(cast(c1 as string)) AS INT):int>
--- !query 8 output
+-- !query output
 0
 1
 2
@@ -116,13 +116,13 @@ NULL
 NULL
 
 
--- !query 9
+-- !query
 SELECT udf(c1) FROM tab1
 EXCEPT ALL
 SELECT CAST(udf(1) AS BIGINT)
--- !query 9 schema
+-- !query schema
 struct<CAST(udf(cast(c1 as string)) AS INT):bigint>
--- !query 9 output
+-- !query output
 0
 2
 2
@@ -133,65 +133,65 @@ NULL
 NULL
 
 
--- !query 10
+-- !query
 SELECT udf(c1) FROM tab1
 EXCEPT ALL
 SELECT array(1)
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 ExceptAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
 
 
--- !query 11
+-- !query
 SELECT udf(k), v FROM tab3
 EXCEPT ALL
 SELECT k, udf(v) FROM tab4
--- !query 11 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,v:int>
--- !query 11 output
+-- !query output
 1	2
 1	3
 
 
--- !query 12
+-- !query
 SELECT k, udf(v) FROM tab4
 EXCEPT ALL
 SELECT udf(k), v FROM tab3
--- !query 12 schema
+-- !query schema
 struct<k:int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 12 output
+-- !query output
 2	2
 2	20
 
 
--- !query 13
+-- !query
 SELECT udf(k), udf(v) FROM tab4
 EXCEPT ALL
 SELECT udf(k), udf(v) FROM tab3
 INTERSECT DISTINCT
 SELECT udf(k), udf(v) FROM tab4
--- !query 13 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 13 output
+-- !query output
 2	2
 2	20
 
 
--- !query 14
+-- !query
 SELECT udf(k), v FROM tab4
 EXCEPT ALL
 SELECT k, udf(v) FROM tab3
 EXCEPT DISTINCT
 SELECT udf(k), udf(v) FROM tab4
--- !query 14 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,v:int>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 SELECT k, udf(v) FROM tab3
 EXCEPT ALL
 SELECT udf(k), udf(v) FROM tab4
@@ -199,24 +199,24 @@ UNION ALL
 SELECT udf(k), v FROM tab3
 EXCEPT DISTINCT
 SELECT k, udf(v) FROM tab4
--- !query 15 schema
+-- !query schema
 struct<k:int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 15 output
+-- !query output
 1	3
 
 
--- !query 16
+-- !query
 SELECT k FROM tab3
 EXCEPT ALL
 SELECT k, v FROM tab4
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
 
 
--- !query 17
+-- !query
 SELECT udf(k), udf(v) FROM tab3
 EXCEPT ALL
 SELECT udf(k), udf(v) FROM tab4
@@ -224,13 +224,13 @@ UNION
 SELECT udf(k), udf(v) FROM tab3
 EXCEPT DISTINCT
 SELECT udf(k), udf(v) FROM tab4
--- !query 17 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 17 output
+-- !query output
 1	3
 
 
--- !query 18
+-- !query
 SELECT udf(k), udf(v) FROM tab3
 MINUS ALL
 SELECT k, udf(v) FROM tab4
@@ -238,13 +238,13 @@ UNION
 SELECT udf(k), udf(v) FROM tab3
 MINUS DISTINCT
 SELECT k, udf(v) FROM tab4
--- !query 18 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 18 output
+-- !query output
 1	3
 
 
--- !query 19
+-- !query
 SELECT k, udf(v) FROM tab3
 EXCEPT ALL
 SELECT udf(k), v FROM tab4
@@ -252,13 +252,13 @@ EXCEPT DISTINCT
 SELECT k, udf(v) FROM tab3
 EXCEPT DISTINCT
 SELECT udf(k), v FROM tab4
--- !query 19 schema
+-- !query schema
 struct<k:int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 SELECT * 
 FROM   (SELECT tab3.k,
                udf(tab4.v)
@@ -272,13 +272,13 @@ FROM   (SELECT udf(tab3.k),
         FROM   tab3 
                JOIN tab4 
                  ON tab3.k = udf(tab4.k))
--- !query 20 schema
+-- !query schema
 struct<k:int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 SELECT * 
 FROM   (SELECT udf(udf(tab3.k)),
                udf(tab4.v)
@@ -292,9 +292,9 @@ FROM   (SELECT udf(tab4.v) AS k,
         FROM   tab3 
                JOIN tab4 
                  ON udf(tab3.k) = udf(tab4.k))
--- !query 21 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 21 output
+-- !query output
 1	2
 1	2
 1	2
@@ -304,43 +304,43 @@ struct<CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int
 2	3
 
 
--- !query 22
+-- !query
 SELECT udf(v) FROM tab3 GROUP BY v
 EXCEPT ALL
 SELECT udf(k) FROM tab4 GROUP BY k
--- !query 22 schema
+-- !query schema
 struct<CAST(udf(cast(v as string)) AS INT):int>
--- !query 22 output
+-- !query output
 3
 
 
--- !query 23
+-- !query
 DROP VIEW IF EXISTS tab1
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 
 
 
--- !query 24
+-- !query
 DROP VIEW IF EXISTS tab2
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 
 
 
--- !query 25
+-- !query
 DROP VIEW IF EXISTS tab3
--- !query 25 schema
+-- !query schema
 struct<>
--- !query 25 output
+-- !query output
 
 
 
--- !query 26
+-- !query
 DROP VIEW IF EXISTS tab4
--- !query 26 schema
+-- !query schema
 struct<>
--- !query 26 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
index 0badaf050e194..054ee00ecc2ae 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
@@ -2,20 +2,20 @@
 -- Number of queries: 9
 
 
--- !query 0
+-- !query
 create temporary view t1 as select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3),
   ("one", NULL)
   as t1(k, v)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view t2 as select * from values
   ("one", 1),
   ("two", 22),
@@ -23,71 +23,71 @@ create temporary view t2 as select * from values
   ("one", NULL),
   (NULL, 5)
   as t2(k, v)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT udf(k), udf(v) FROM t1 EXCEPT SELECT udf(k), udf(v) FROM t2
--- !query 2 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS STRING):string,CAST(udf(cast(v as string)) AS INT):int>
--- !query 2 output
+-- !query output
 three	3
 two	2
 
 
--- !query 3
+-- !query
 SELECT * FROM t1 EXCEPT SELECT * FROM t1 where udf(v) <> 1 and v <> udf(2)
--- !query 3 schema
+-- !query schema
 struct<k:string,v:int>
--- !query 3 output
+-- !query output
 one	1
 one	NULL
 two	2
 
 
--- !query 4
+-- !query
 SELECT * FROM t1 where udf(v) <> 1 and v <> udf(22) EXCEPT SELECT * FROM t1 where udf(v) <> 2 and v >= udf(3)
--- !query 4 schema
+-- !query schema
 struct<k:string,v:int>
--- !query 4 output
+-- !query output
 two	2
 
 
--- !query 5
+-- !query
 SELECT t1.* FROM t1, t2 where t1.k = t2.k
 EXCEPT
 SELECT t1.* FROM t1, t2 where t1.k = t2.k and t1.k != udf('one')
--- !query 5 schema
+-- !query schema
 struct<k:string,v:int>
--- !query 5 output
+-- !query output
 one	1
 one	NULL
 
 
--- !query 6
+-- !query
 SELECT * FROM t2 where v >= udf(1) and udf(v) <> 22 EXCEPT SELECT * FROM t1
--- !query 6 schema
+-- !query schema
 struct<k:string,v:int>
--- !query 6 output
+-- !query output
 NULL	5
 one	5
 
 
--- !query 7
+-- !query
 SELECT (SELECT min(udf(k)) FROM t2 WHERE t2.k = t1.k) min_t2 FROM t1
 MINUS
 SELECT (SELECT udf(min(k)) FROM t2) abs_min_t2 FROM t1 WHERE  t1.k = udf('one')
--- !query 7 schema
+-- !query schema
 struct<min_t2:string>
--- !query 7 output
+-- !query output
 NULL
 two
 
 
--- !query 8
+-- !query
 SELECT t1.k
 FROM   t1
 WHERE  t1.v <= (SELECT   udf(max(udf(t2.v)))
@@ -99,7 +99,7 @@ FROM   t1
 WHERE  udf(t1.v) >= (SELECT   min(udf(t2.v))
                 FROM     t2
                 WHERE    t2.k = t1.k)
--- !query 8 schema
+-- !query schema
 struct<k:string>
--- !query 8 output
+-- !query output
 two
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
index de297ab166965..dc291a7696ea7 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
@@ -2,21 +2,21 @@
 -- Number of queries: 29
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
 AS testData(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT udf(a + b), b, udf(SUM(a - b)) FROM testData GROUP BY udf(a + b), b WITH CUBE
--- !query 1 schema
+-- !query schema
 struct<CAST(udf(cast((a + b) as string)) AS INT):int,b:int,CAST(udf(cast(sum(cast((a - b) as bigint)) as string)) AS BIGINT):bigint>
--- !query 1 output
+-- !query output
 2	1	0
 2	NULL	0
 3	1	1
@@ -32,11 +32,11 @@ NULL	2	0
 NULL	NULL	3
 
 
--- !query 2
+-- !query
 SELECT udf(a), udf(b), SUM(b) FROM testData GROUP BY udf(a), b WITH CUBE
--- !query 2 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(b as string)) AS INT):int,sum(b):bigint>
--- !query 2 output
+-- !query output
 1	1	1
 1	2	2
 1	NULL	3
@@ -51,11 +51,11 @@ NULL	2	6
 NULL	NULL	9
 
 
--- !query 3
+-- !query
 SELECT udf(a + b), b, SUM(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
--- !query 3 schema
+-- !query schema
 struct<CAST(udf(cast((a + b) as string)) AS INT):int,b:int,sum((a - b)):bigint>
--- !query 3 output
+-- !query output
 2	1	0
 2	NULL	0
 3	1	1
@@ -69,11 +69,11 @@ struct<CAST(udf(cast((a + b) as string)) AS INT):int,b:int,sum((a - b)):bigint>
 NULL	NULL	3
 
 
--- !query 4
+-- !query
 SELECT udf(a), b, udf(SUM(b)) FROM testData GROUP BY udf(a), b WITH ROLLUP
--- !query 4 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,b:int,CAST(udf(cast(sum(cast(b as bigint)) as string)) AS BIGINT):bigint>
--- !query 4 output
+-- !query output
 1	1	1
 1	2	2
 1	NULL	3
@@ -86,21 +86,21 @@ struct<CAST(udf(cast(a as string)) AS INT):int,b:int,CAST(udf(cast(sum(cast(b as
 NULL	NULL	9
 
 
--- !query 5
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
 ("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
 AS courseSales(course, year, earnings)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY udf(course), year
--- !query 6 schema
+-- !query schema
 struct<course:string,year:int,sum(earnings):bigint>
--- !query 6 output
+-- !query output
 NULL	NULL	113000
 Java	NULL	50000
 Java	2012	20000
@@ -110,11 +110,11 @@ dotNET	2012	15000
 dotNET	2013	48000
 
 
--- !query 7
+-- !query
 SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, udf(year)
--- !query 7 schema
+-- !query schema
 struct<course:string,year:int,sum(earnings):bigint>
--- !query 7 output
+-- !query output
 NULL	NULL	113000
 NULL	2012	35000
 NULL	2013	78000
@@ -126,41 +126,41 @@ dotNET	2012	15000
 dotNET	2013	48000
 
 
--- !query 8
+-- !query
 SELECT course, udf(year), SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
--- !query 8 schema
+-- !query schema
 struct<course:string,CAST(udf(cast(year as string)) AS INT):int,sum(earnings):bigint>
--- !query 8 output
+-- !query output
 Java	NULL	50000
 NULL	2012	35000
 NULL	2013	78000
 dotNET	NULL	63000
 
 
--- !query 9
+-- !query
 SELECT course, year, udf(SUM(earnings)) FROM courseSales GROUP BY course, year GROUPING SETS(course)
--- !query 9 schema
+-- !query schema
 struct<course:string,year:int,CAST(udf(cast(sum(cast(earnings as bigint)) as string)) AS BIGINT):bigint>
--- !query 9 output
+-- !query output
 Java	NULL	50000
 dotNET	NULL	63000
 
 
--- !query 10
+-- !query
 SELECT udf(course), year, SUM(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
--- !query 10 schema
+-- !query schema
 struct<CAST(udf(cast(course as string)) AS STRING):string,year:int,sum(earnings):bigint>
--- !query 10 output
+-- !query output
 NULL	2012	35000
 NULL	2013	78000
 
 
--- !query 11
+-- !query
 SELECT course, udf(SUM(earnings)) AS sum FROM courseSales
 GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, udf(sum)
--- !query 11 schema
+-- !query schema
 struct<course:string,sum:bigint>
--- !query 11 output
+-- !query output
 NULL	113000
 Java	20000
 Java	30000
@@ -171,12 +171,12 @@ dotNET	48000
 dotNET	63000
 
 
--- !query 12
+-- !query
 SELECT course, SUM(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
 GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY udf(course), sum
--- !query 12 schema
+-- !query schema
 struct<course:string,sum:bigint,grouping_id(course, earnings):int>
--- !query 12 output
+-- !query output
 NULL	113000	3
 Java	20000	0
 Java	30000	0
@@ -187,12 +187,12 @@ dotNET	48000	0
 dotNET	63000	1
 
 
--- !query 13
+-- !query
 SELECT udf(course), udf(year), GROUPING(course), GROUPING(year), GROUPING_ID(course, year) FROM courseSales
 GROUP BY CUBE(course, year)
--- !query 13 schema
+-- !query schema
 struct<CAST(udf(cast(course as string)) AS STRING):string,CAST(udf(cast(year as string)) AS INT):int,grouping(course):tinyint,grouping(year):tinyint,grouping_id(course, year):int>
--- !query 13 output
+-- !query output
 Java	2012	0	0	0
 Java	2013	0	0	0
 Java	NULL	0	1	1
@@ -204,29 +204,29 @@ dotNET	2013	0	0	0
 dotNET	NULL	0	1	1
 
 
--- !query 14
+-- !query
 SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, udf(year)
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 15
+-- !query
 SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY udf(course), year
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 16
+-- !query
 SELECT course, year, grouping__id FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, course, udf(year)
--- !query 16 schema
+-- !query schema
 struct<course:string,year:int,grouping__id:int>
--- !query 16 output
+-- !query output
 Java	2012	0
 Java	2013	0
 dotNET	2012	0
@@ -238,40 +238,40 @@ NULL	2013	2
 NULL	NULL	3
 
 
--- !query 17
+-- !query
 SELECT course, year FROM courseSales GROUP BY CUBE(course, year)
 HAVING GROUPING(year) = 1 AND GROUPING_ID(course, year) > 0 ORDER BY course, udf(year)
--- !query 17 schema
+-- !query schema
 struct<course:string,year:int>
--- !query 17 output
+-- !query output
 NULL	NULL
 Java	NULL
 dotNET	NULL
 
 
--- !query 18
+-- !query
 SELECT course, udf(year) FROM courseSales GROUP BY udf(course), year HAVING GROUPING(course) > 0
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 19
+-- !query
 SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROUPING_ID(course) > 0
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 20
+-- !query
 SELECT udf(course), year FROM courseSales GROUP BY CUBE(course, year) HAVING grouping__id > 0
--- !query 20 schema
+-- !query schema
 struct<CAST(udf(cast(course as string)) AS STRING):string,year:int>
--- !query 20 output
+-- !query output
 Java	NULL
 NULL	2012
 NULL	2013
@@ -279,12 +279,12 @@ NULL	NULL
 dotNET	NULL
 
 
--- !query 21
+-- !query
 SELECT course, year, GROUPING(course), GROUPING(year) FROM courseSales GROUP BY CUBE(course, year)
 ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
--- !query 21 schema
+-- !query schema
 struct<course:string,year:int,grouping(course):tinyint,grouping(year):tinyint>
--- !query 21 output
+-- !query output
 Java	2012	0	0
 Java	2013	0	0
 dotNET	2012	0	0
@@ -296,12 +296,12 @@ NULL	2013	1	0
 NULL	NULL	1	1
 
 
--- !query 22
+-- !query
 SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY CUBE(course, year)
 ORDER BY GROUPING(course), GROUPING(year), course, udf(year)
--- !query 22 schema
+-- !query schema
 struct<course:string,year:int,grouping_id(course, year):int>
--- !query 22 output
+-- !query output
 Java	2012	0
 Java	2013	0
 dotNET	2012	0
@@ -313,29 +313,29 @@ NULL	2013	2
 NULL	NULL	3
 
 
--- !query 23
+-- !query
 SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING(course)
--- !query 23 schema
+-- !query schema
 struct<>
--- !query 23 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 24
+-- !query
 SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GROUPING_ID(course)
--- !query 24 schema
+-- !query schema
 struct<>
--- !query 24 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
 
 
--- !query 25
+-- !query
 SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY grouping__id, udf(course), year
--- !query 25 schema
+-- !query schema
 struct<course:string,year:int>
--- !query 25 output
+-- !query output
 Java	2012
 Java	2013
 dotNET	2012
@@ -347,11 +347,11 @@ NULL	2013
 NULL	NULL
 
 
--- !query 26
+-- !query
 SELECT udf(a + b) AS k1, udf(b) AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2)
--- !query 26 schema
+-- !query schema
 struct<k1:int,k2:int,sum((a - b)):bigint>
--- !query 26 output
+-- !query output
 2	1	0
 2	NULL	0
 3	1	1
@@ -367,11 +367,11 @@ NULL	2	0
 NULL	NULL	3
 
 
--- !query 27
+-- !query
 SELECT udf(udf(a + b)) AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b)
--- !query 27 schema
+-- !query schema
 struct<k:int,b:int,sum((a - b)):bigint>
--- !query 27 output
+-- !query output
 2	1	0
 2	NULL	0
 3	1	1
@@ -385,10 +385,10 @@ struct<k:int,b:int,sum((a - b)):bigint>
 NULL	NULL	3
 
 
--- !query 28
+-- !query
 SELECT udf(a + b), udf(udf(b)) AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
--- !query 28 schema
+-- !query schema
 struct<CAST(udf(cast((a + b) as string)) AS INT):int,k:int,sum((a - b)):bigint>
--- !query 28 output
+-- !query output
 NULL	1	3
 NULL	2	0
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
index febe47b5ba84e..6403406413db9 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
@@ -2,101 +2,101 @@
 -- Number of queries: 52
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
 AS testData(a, b)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT udf(a), udf(COUNT(b)) FROM testData
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(CAST(udf(cast(count(b) as string)) AS BIGINT) AS `CAST(udf(cast(count(b) as string)) AS BIGINT)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 2
+-- !query
 SELECT COUNT(udf(a)), udf(COUNT(b)) FROM testData
--- !query 2 schema
+-- !query schema
 struct<count(CAST(udf(cast(a as string)) AS INT)):bigint,CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
--- !query 2 output
+-- !query output
 7	7
 
 
--- !query 3
+-- !query
 SELECT udf(a), COUNT(udf(b)) FROM testData GROUP BY a
--- !query 3 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,count(CAST(udf(cast(b as string)) AS INT)):bigint>
--- !query 3 output
+-- !query output
 1	2
 2	2
 3	2
 NULL	1
 
 
--- !query 4
+-- !query
 SELECT udf(a), udf(COUNT(udf(b))) FROM testData GROUP BY b
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 5
+-- !query
 SELECT COUNT(udf(a)), COUNT(udf(b)) FROM testData GROUP BY udf(a)
--- !query 5 schema
+-- !query schema
 struct<count(CAST(udf(cast(a as string)) AS INT)):bigint,count(CAST(udf(cast(b as string)) AS INT)):bigint>
--- !query 5 output
+-- !query output
 0	1
 2	2
 2	2
 3	2
 
 
--- !query 6
+-- !query
 SELECT 'foo', COUNT(udf(a)) FROM testData GROUP BY 1
--- !query 6 schema
+-- !query schema
 struct<foo:string,count(CAST(udf(cast(a as string)) AS INT)):bigint>
--- !query 6 output
+-- !query output
 foo	7
 
 
--- !query 7
+-- !query
 SELECT 'foo' FROM testData WHERE a = 0 GROUP BY udf(1)
--- !query 7 schema
+-- !query schema
 struct<foo:string>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 SELECT 'foo', udf(APPROX_COUNT_DISTINCT(udf(a))) FROM testData WHERE a = 0 GROUP BY udf(1)
--- !query 8 schema
+-- !query schema
 struct<foo:string,CAST(udf(cast(approx_count_distinct(cast(udf(cast(a as string)) as int), 0.05, 0, 0) as string)) AS BIGINT):bigint>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 SELECT 'foo', MAX(STRUCT(udf(a))) FROM testData WHERE a = 0 GROUP BY udf(1)
--- !query 9 schema
+-- !query schema
 struct<foo:string,max(named_struct(col1, CAST(udf(cast(a as string)) AS INT))):struct<col1:int>>
--- !query 9 output
+-- !query output
 
 
 
--- !query 10
+-- !query
 SELECT udf(a + b), udf(COUNT(b)) FROM testData GROUP BY a + b
--- !query 10 schema
+-- !query schema
 struct<CAST(udf(cast((a + b) as string)) AS INT):int,CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
--- !query 10 output
+-- !query output
 2	1
 3	2
 4	2
@@ -104,132 +104,132 @@ struct<CAST(udf(cast((a + b) as string)) AS INT):int,CAST(udf(cast(count(b) as s
 NULL	1
 
 
--- !query 11
+-- !query
 SELECT udf(a + 2), udf(COUNT(b)) FROM testData GROUP BY a + 1
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 12
+-- !query
 SELECT udf(a + 1) + 1, udf(COUNT(b)) FROM testData GROUP BY udf(a + 1)
--- !query 12 schema
+-- !query schema
 struct<(CAST(udf(cast((a + 1) as string)) AS INT) + 1):int,CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
--- !query 12 output
+-- !query output
 3	2
 4	2
 5	2
 NULL	1
 
 
--- !query 13
+-- !query
 SELECT SKEWNESS(udf(a)), udf(KURTOSIS(a)), udf(MIN(a)), MAX(udf(a)), udf(AVG(udf(a))), udf(VARIANCE(a)), STDDEV(udf(a)), udf(SUM(a)), udf(COUNT(a))
 FROM testData
--- !query 13 schema
-struct<skewness(CAST(CAST(udf(cast(a as string)) AS INT) AS DOUBLE)):double,CAST(udf(cast(kurtosis(cast(a as double)) as string)) AS DOUBLE):double,CAST(udf(cast(min(a) as string)) AS INT):int,max(CAST(udf(cast(a as string)) AS INT)):int,CAST(udf(cast(avg(cast(cast(udf(cast(a as string)) as int) as bigint)) as string)) AS DOUBLE):double,CAST(udf(cast(var_samp(cast(a as double)) as string)) AS DOUBLE):double,stddev_samp(CAST(CAST(udf(cast(a as string)) AS INT) AS DOUBLE)):double,CAST(udf(cast(sum(cast(a as bigint)) as string)) AS BIGINT):bigint,CAST(udf(cast(count(a) as string)) AS BIGINT):bigint>
--- !query 13 output
+-- !query schema
+struct<skewness(CAST(CAST(udf(cast(a as string)) AS INT) AS DOUBLE)):double,CAST(udf(cast(kurtosis(cast(a as double)) as string)) AS DOUBLE):double,CAST(udf(cast(min(a) as string)) AS INT):int,max(CAST(udf(cast(a as string)) AS INT)):int,CAST(udf(cast(avg(cast(cast(udf(cast(a as string)) as int) as bigint)) as string)) AS DOUBLE):double,CAST(udf(cast(variance(cast(a as double)) as string)) AS DOUBLE):double,stddev(CAST(CAST(udf(cast(a as string)) AS INT) AS DOUBLE)):double,CAST(udf(cast(sum(cast(a as bigint)) as string)) AS BIGINT):bigint,CAST(udf(cast(count(a) as string)) AS BIGINT):bigint>
+-- !query output
 -0.2723801058145729	-1.5069204152249134	1	3	2.142857142857143	0.8095238095238094	0.8997354108424372	15	7
 
 
--- !query 14
+-- !query
 SELECT COUNT(DISTINCT udf(b)), udf(COUNT(DISTINCT b, c)) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY udf(a)
--- !query 14 schema
+-- !query schema
 struct<count(DISTINCT CAST(udf(cast(b as string)) AS INT)):bigint,CAST(udf(cast(count(distinct b, c) as string)) AS BIGINT):bigint>
--- !query 14 output
+-- !query output
 1	1
 
 
--- !query 15
+-- !query
 SELECT udf(a) AS k, COUNT(udf(b)) FROM testData GROUP BY k
--- !query 15 schema
+-- !query schema
 struct<k:int,count(CAST(udf(cast(b as string)) AS INT)):bigint>
--- !query 15 output
+-- !query output
 1	2
 2	2
 3	2
 NULL	1
 
 
--- !query 16
+-- !query
 SELECT a AS k, udf(COUNT(b)) FROM testData GROUP BY k HAVING k > 1
--- !query 16 schema
+-- !query schema
 struct<k:int,CAST(udf(cast(count(b) as string)) AS BIGINT):bigint>
--- !query 16 output
+-- !query output
 2	2
 3	2
 
 
--- !query 17
+-- !query
 SELECT udf(COUNT(b)) AS k FROM testData GROUP BY k
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 aggregate functions are not allowed in GROUP BY, but found CAST(udf(cast(count(b) as string)) AS BIGINT);
 
 
--- !query 18
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
 (1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
--- !query 18 schema
+-- !query schema
 struct<>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 SELECT k AS a, udf(COUNT(udf(v))) FROM testDataHasSameNameWithAlias GROUP BY udf(a)
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 20
+-- !query
 set spark.sql.groupByAliases=false
--- !query 20 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 20 output
+-- !query output
 spark.sql.groupByAliases	false
 
 
--- !query 21
+-- !query
 SELECT a AS k, udf(COUNT(udf(b))) FROM testData GROUP BY k
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`k`' given input columns: [testdata.a, testdata.b]; line 1 pos 57
 
 
--- !query 22
+-- !query
 SELECT udf(a), COUNT(udf(1)) FROM testData WHERE false GROUP BY udf(a)
--- !query 22 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,count(CAST(udf(cast(1 as string)) AS INT)):bigint>
--- !query 22 output
+-- !query output
 
 
 
--- !query 23
+-- !query
 SELECT udf(COUNT(1)) FROM testData WHERE false
--- !query 23 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 23 output
+-- !query output
 0
 
 
--- !query 24
+-- !query
 SELECT 1 FROM (SELECT udf(COUNT(1)) FROM testData WHERE false) t
--- !query 24 schema
+-- !query schema
 struct<1:int>
--- !query 24 output
+-- !query output
 1
 
 
--- !query 25
+-- !query
 SELECT 1 from (
   SELECT 1 AS z,
   udf(MIN(a.x))
@@ -237,88 +237,88 @@ SELECT 1 from (
   WHERE false
 ) b
 where b.z != b.z
--- !query 25 schema
+-- !query schema
 struct<1:int>
--- !query 25 output
+-- !query output
 
 
 
--- !query 26
+-- !query
 SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*)
   FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y)
--- !query 26 schema
+-- !query schema
 struct<corr(DISTINCT CAST(x AS DOUBLE), CAST(y AS DOUBLE)):double,CAST(udf(cast(corr(distinct cast(y as double), cast(x as double)) as string)) AS DOUBLE):double,count(1):bigint>
--- !query 26 output
+-- !query output
 1.0	1.0	3
 
 
--- !query 27
+-- !query
 SELECT udf(1) FROM range(10) HAVING true
--- !query 27 schema
+-- !query schema
 struct<CAST(udf(cast(1 as string)) AS INT):int>
--- !query 27 output
+-- !query output
 1
 
 
--- !query 28
+-- !query
 SELECT udf(udf(1)) FROM range(10) HAVING MAX(id) > 0
--- !query 28 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(1 as string)) as int) as string)) AS INT):int>
--- !query 28 output
+-- !query output
 1
 
 
--- !query 29
+-- !query
 SELECT udf(id) FROM range(10) HAVING id > 0
--- !query 29 schema
+-- !query schema
 struct<>
--- !query 29 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.;
 
 
--- !query 30
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
   (1, true), (1, false),
   (2, true),
   (3, false), (3, null),
   (4, null), (4, null),
   (5, null), (5, true), (5, false) AS test_agg(k, v)
--- !query 30 schema
+-- !query schema
 struct<>
--- !query 30 output
+-- !query output
 
 
 
--- !query 31
+-- !query
 SELECT udf(every(v)), udf(some(v)), any(v) FROM test_agg WHERE 1 = 0
--- !query 31 schema
+-- !query schema
 struct<CAST(udf(cast(every(v) as string)) AS BOOLEAN):boolean,CAST(udf(cast(some(v) as string)) AS BOOLEAN):boolean,any(v):boolean>
--- !query 31 output
+-- !query output
 NULL	NULL	NULL
 
 
--- !query 32
+-- !query
 SELECT udf(every(udf(v))), some(v), any(v) FROM test_agg WHERE k = 4
--- !query 32 schema
+-- !query schema
 struct<CAST(udf(cast(every(cast(udf(cast(v as string)) as boolean)) as string)) AS BOOLEAN):boolean,some(v):boolean,any(v):boolean>
--- !query 32 output
+-- !query output
 NULL	NULL	NULL
 
 
--- !query 33
+-- !query
 SELECT every(v), udf(some(v)), any(v) FROM test_agg WHERE k = 5
--- !query 33 schema
+-- !query schema
 struct<every(v):boolean,CAST(udf(cast(some(v) as string)) AS BOOLEAN):boolean,any(v):boolean>
--- !query 33 output
+-- !query output
 false	true	true
 
 
--- !query 34
+-- !query
 SELECT udf(k), every(v), udf(some(v)), any(v) FROM test_agg GROUP BY udf(k)
--- !query 34 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean,CAST(udf(cast(some(v) as string)) AS BOOLEAN):boolean,any(v):boolean>
--- !query 34 output
+-- !query output
 1	false	true	true
 2	true	true	true
 3	false	false	false
@@ -326,25 +326,25 @@ struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean,CAST(udf(cast(so
 5	false	true	true
 
 
--- !query 35
+-- !query
 SELECT udf(k), every(v) FROM test_agg GROUP BY k HAVING every(v) = false
--- !query 35 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,every(v):boolean>
--- !query 35 output
+-- !query output
 1	false
 3	false
 5	false
 
 
--- !query 36
+-- !query
 SELECT udf(k), udf(every(v)) FROM test_agg GROUP BY udf(k) HAVING every(v) IS NULL
--- !query 36 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(every(v) as string)) AS BOOLEAN):boolean>
--- !query 36 output
+-- !query output
 4	NULL
 
 
--- !query 37
+-- !query
 SELECT udf(k),
        udf(Every(v)) AS every
 FROM   test_agg
@@ -353,13 +353,13 @@ WHERE  k = 2
                  FROM   test_agg
                  WHERE  k = 1)
 GROUP  BY udf(k)
--- !query 37 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,every:boolean>
--- !query 37 output
+-- !query output
 2	true
 
 
--- !query 38
+-- !query
 SELECT udf(udf(k)),
        Every(v) AS every
 FROM   test_agg
@@ -368,53 +368,53 @@ WHERE  k = 2
                  FROM   test_agg
                  WHERE  k = 1)
 GROUP  BY udf(udf(k))
--- !query 38 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int,every:boolean>
--- !query 38 output
+-- !query output
 
 
 
--- !query 39
+-- !query
 SELECT every(udf(1))
--- !query 39 schema
+-- !query schema
 struct<>
--- !query 39 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'every(CAST(udf(cast(1 as string)) AS INT))' due to data type mismatch: Input to function 'every' should have been boolean, but it's [int].; line 1 pos 7
 
 
--- !query 40
+-- !query
 SELECT some(udf(1S))
--- !query 40 schema
+-- !query schema
 struct<>
--- !query 40 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'some(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: Input to function 'some' should have been boolean, but it's [smallint].; line 1 pos 7
 
 
--- !query 41
+-- !query
 SELECT any(udf(1L))
--- !query 41 schema
+-- !query schema
 struct<>
--- !query 41 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'any(CAST(udf(cast(1 as string)) AS BIGINT))' due to data type mismatch: Input to function 'any' should have been boolean, but it's [bigint].; line 1 pos 7
 
 
--- !query 42
+-- !query
 SELECT udf(every("true"))
--- !query 42 schema
+-- !query schema
 struct<>
--- !query 42 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'every('true')' due to data type mismatch: Input to function 'every' should have been boolean, but it's [string].; line 1 pos 11
 
 
--- !query 43
+-- !query
 SELECT k, v, every(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
--- !query 43 schema
+-- !query schema
 struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
--- !query 43 output
+-- !query output
 1	false	false
 1	true	false
 2	true	true
@@ -427,11 +427,11 @@ struct<k:int,v:boolean,every(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST
 5	true	false
 
 
--- !query 44
+-- !query
 SELECT k, udf(udf(v)), some(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
--- !query 44 schema
+-- !query schema
 struct<k:int,CAST(udf(cast(cast(udf(cast(v as string)) as boolean) as string)) AS BOOLEAN):boolean,some(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
--- !query 44 output
+-- !query output
 1	false	false
 1	true	true
 2	true	true
@@ -444,11 +444,11 @@ struct<k:int,CAST(udf(cast(cast(udf(cast(v as string)) as boolean) as string)) A
 5	true	true
 
 
--- !query 45
+-- !query
 SELECT udf(udf(k)), v, any(v) OVER (PARTITION BY k ORDER BY v) FROM test_agg
--- !query 45 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int,v:boolean,any(v) OVER (PARTITION BY k ORDER BY v ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):boolean>
--- !query 45 output
+-- !query output
 1	false	false
 1	true	true
 2	true	true
@@ -461,37 +461,37 @@ struct<CAST(udf(cast(cast(udf(cast(k as string)) as int) as string)) AS INT):int
 5	true	true
 
 
--- !query 46
+-- !query
 SELECT udf(count(*)) FROM test_agg HAVING count(*) > 1L
--- !query 46 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 46 output
+-- !query output
 10
 
 
--- !query 47
+-- !query
 SELECT k, udf(max(v)) FROM test_agg GROUP BY k HAVING max(v) = true
--- !query 47 schema
+-- !query schema
 struct<k:int,CAST(udf(cast(max(v) as string)) AS BOOLEAN):boolean>
--- !query 47 output
+-- !query output
 1	true
 2	true
 5	true
 
 
--- !query 48
+-- !query
 SELECT * FROM (SELECT udf(COUNT(*)) AS cnt FROM test_agg) WHERE cnt > 1L
--- !query 48 schema
+-- !query schema
 struct<cnt:bigint>
--- !query 48 output
+-- !query output
 10
 
 
--- !query 49
+-- !query
 SELECT udf(count(*)) FROM test_agg WHERE count(*) > 1L
--- !query 49 schema
+-- !query schema
 struct<>
--- !query 49 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
@@ -499,11 +499,11 @@ Expression in where clause: [(count(1) > 1L)]
 Invalid expressions: [count(1)];
 
 
--- !query 50
+-- !query
 SELECT udf(count(*)) FROM test_agg WHERE count(*) + 1L > 1L
--- !query 50 schema
+-- !query schema
 struct<>
--- !query 50 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
@@ -511,11 +511,11 @@ Expression in where clause: [((count(1) + 1L) > 1L)]
 Invalid expressions: [count(1)];
 
 
--- !query 51
+-- !query
 SELECT udf(count(*)) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1
--- !query 51 schema
+-- !query schema
 struct<>
--- !query 51 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
index 1effcc8470e19..9be27bb77f81a 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
@@ -2,48 +2,48 @@
 -- Number of queries: 5
 
 
--- !query 0
+-- !query
 create temporary view hav as select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3),
   ("one", 5)
   as hav(k, v)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT udf(k) AS k, udf(sum(v)) FROM hav GROUP BY k HAVING udf(sum(v)) > 2
--- !query 1 schema
+-- !query schema
 struct<k:string,CAST(udf(cast(sum(cast(v as bigint)) as string)) AS BIGINT):bigint>
--- !query 1 output
+-- !query output
 one	6
 three	3
 
 
--- !query 2
+-- !query
 SELECT udf(count(udf(k))) FROM hav GROUP BY v + 1 HAVING v + 1 = udf(2)
--- !query 2 schema
+-- !query schema
 struct<CAST(udf(cast(count(cast(udf(cast(k as string)) as string)) as string)) AS BIGINT):bigint>
--- !query 2 output
+-- !query output
 1
 
 
--- !query 3
+-- !query
 SELECT udf(MIN(t.v)) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(udf(COUNT(udf(1))) > 0)
--- !query 3 schema
+-- !query schema
 struct<CAST(udf(cast(min(v) as string)) AS INT):int>
--- !query 3 output
+-- !query output
 1
 
 
--- !query 4
+-- !query
 SELECT udf(a + b) FROM VALUES (1L, 2), (3L, 4) AS T(a, b) GROUP BY a + b HAVING a + b > udf(1)
--- !query 4 schema
+-- !query schema
 struct<CAST(udf(cast((a + cast(b as bigint)) as string)) AS BIGINT):bigint>
--- !query 4 output
+-- !query output
 3
 7
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
index 2cf24e50c80a5..d78d347bc9802 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
@@ -2,152 +2,152 @@
 -- Number of queries: 17
 
 
--- !query 0
+-- !query
 select udf(col1), udf(col2) from values ("one", 1)
--- !query 0 schema
+-- !query schema
 struct<CAST(udf(cast(col1 as string)) AS STRING):string,CAST(udf(cast(col2 as string)) AS INT):int>
--- !query 0 output
+-- !query output
 one	1
 
 
--- !query 1
+-- !query
 select udf(col1), udf(udf(col2)) from values ("one", 1) as data
--- !query 1 schema
+-- !query schema
 struct<CAST(udf(cast(col1 as string)) AS STRING):string,CAST(udf(cast(cast(udf(cast(col2 as string)) as int) as string)) AS INT):int>
--- !query 1 output
+-- !query output
 one	1
 
 
--- !query 2
+-- !query
 select udf(a), b from values ("one", 1) as data(a, b)
--- !query 2 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS STRING):string,b:int>
--- !query 2 output
+-- !query output
 one	1
 
 
--- !query 3
+-- !query
 select udf(a) from values 1, 2, 3 as data(a)
--- !query 3 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int>
--- !query 3 output
+-- !query output
 1
 2
 3
 
 
--- !query 4
+-- !query
 select udf(a), b from values ("one", 1), ("two", 2), ("three", null) as data(a, b)
--- !query 4 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS STRING):string,b:int>
--- !query 4 output
+-- !query output
 one	1
 three	NULL
 two	2
 
 
--- !query 5
+-- !query
 select udf(a), b from values ("one", null), ("two", null) as data(a, b)
--- !query 5 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS STRING):string,b:null>
--- !query 5 output
+-- !query output
 one	NULL
 two	NULL
 
 
--- !query 6
+-- !query
 select udf(a), b from values ("one", 1), ("two", 2L) as data(a, b)
--- !query 6 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS STRING):string,b:bigint>
--- !query 6 output
+-- !query output
 one	1
 two	2
 
 
--- !query 7
+-- !query
 select udf(udf(a)), udf(b) from values ("one", 1 + 0), ("two", 1 + 3L) as data(a, b)
--- !query 7 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(a as string)) as string) as string)) AS STRING):string,CAST(udf(cast(b as string)) AS BIGINT):bigint>
--- !query 7 output
+-- !query output
 one	1
 two	4
 
 
--- !query 8
+-- !query
 select udf(a), b from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b)
--- !query 8 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS STRING):string,b:array<int>>
--- !query 8 output
+-- !query output
 one	[0,1]
 two	[2,3]
 
 
--- !query 9
+-- !query
 select udf(a), b from values ("one", 2.0), ("two", 3.0D) as data(a, b)
--- !query 9 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS STRING):string,b:double>
--- !query 9 output
+-- !query output
 one	2.0
 two	3.0
 
 
--- !query 10
+-- !query
 select udf(a), b from values ("one", rand(5)), ("two", 3.0D) as data(a, b)
--- !query 10 schema
+-- !query schema
 struct<>
--- !query 10 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot evaluate expression rand(5) in inline table definition; line 1 pos 37
 
 
--- !query 11
+-- !query
 select udf(a), udf(b) from values ("one", 2.0), ("two") as data(a, b)
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expected 2 columns but found 1 columns in row 1; line 1 pos 27
 
 
--- !query 12
+-- !query
 select udf(a), udf(b) from values ("one", array(0, 1)), ("two", struct(1, 2)) as data(a, b)
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 incompatible types found in column b for inline table; line 1 pos 27
 
 
--- !query 13
+-- !query
 select udf(a), udf(b) from values ("one"), ("two") as data(a, b)
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 expected 2 columns but found 1 columns in row 0; line 1 pos 27
 
 
--- !query 14
+-- !query
 select udf(a), udf(b) from values ("one", random_not_exist_func(1)), ("two", 2) as data(a, b)
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Undefined function: 'random_not_exist_func'. This function is neither a registered temporary function nor a permanent function registered in the database 'default'.; line 1 pos 42
 
 
--- !query 15
+-- !query
 select udf(a), udf(b) from values ("one", count(1)), ("two", 2) as data(a, b)
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot evaluate expression count(1) in inline table definition; line 1 pos 42
 
 
--- !query 16
+-- !query
 select udf(a), b from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991-12-06 01:00:00.0'), timestamp('1991-12-06 12:00:00.0'))) as data(a, b)
--- !query 16 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS TIMESTAMP):timestamp,b:array<timestamp>>
--- !query 16 output
-1991-12-06 00:00:00	[1991-12-06 01:00:00.0,1991-12-06 12:00:00.0]
+-- !query output
+1991-12-06 00:00:00	[1991-12-06 01:00:00,1991-12-06 12:00:00]
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inner-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inner-join.sql.out
index 120f2d39f73dc..107fe9eb2fe55 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inner-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inner-join.sql.out
@@ -2,65 +2,65 @@
 -- Number of queries: 7
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 CREATE TEMPORARY VIEW ta AS
 SELECT udf(a) AS a, udf('a') AS tag FROM t1
 UNION ALL
 SELECT udf(a) AS a, udf('b') AS tag FROM t2
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 CREATE TEMPORARY VIEW tb AS
 SELECT udf(a) AS a, udf('a') AS tag FROM t3
 UNION ALL
 SELECT udf(a) AS a, udf('b') AS tag FROM t4
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT tb.* FROM ta INNER JOIN tb ON ta.a = tb.a AND ta.tag = tb.tag
--- !query 6 schema
+-- !query schema
 struct<a:int,tag:string>
--- !query 6 output
+-- !query output
 1	a
 1	a
 1	b
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
index 0cb82be2da228..b3735ae153267 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 22
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (1, 2), 
     (1, 2),
@@ -12,13 +12,13 @@ CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (null, null),
     (null, null)
     AS tab1(k, v)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW tab2 AS SELECT * FROM VALUES
     (1, 2), 
     (1, 2), 
@@ -27,19 +27,19 @@ CREATE TEMPORARY VIEW tab2 AS SELECT * FROM VALUES
     (null, null),
     (null, null)
     AS tab2(k, v)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT udf(k), v FROM tab1
 INTERSECT ALL
 SELECT k, udf(v) FROM tab2
--- !query 2 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,v:int>
--- !query 2 output
+-- !query output
 1	2
 1	2
 2	3
@@ -47,80 +47,80 @@ NULL	NULL
 NULL	NULL
 
 
--- !query 3
+-- !query
 SELECT k, udf(v) FROM tab1
 INTERSECT ALL
 SELECT udf(k), v FROM tab1 WHERE udf(k) = 1
--- !query 3 schema
+-- !query schema
 struct<k:int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 3 output
+-- !query output
 1	2
 1	2
 1	3
 1	3
 
 
--- !query 4
+-- !query
 SELECT udf(k), udf(v) FROM tab1 WHERE k > udf(2)
 INTERSECT ALL
 SELECT udf(k), udf(v) FROM tab2
--- !query 4 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT udf(k), v FROM tab1
 INTERSECT ALL
 SELECT udf(k), v FROM tab2 WHERE udf(udf(k)) > 3
--- !query 5 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,v:int>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 SELECT udf(k), v FROM tab1
 INTERSECT ALL
 SELECT CAST(udf(1) AS BIGINT), CAST(udf(2) AS BIGINT)
--- !query 6 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):bigint,v:bigint>
--- !query 6 output
+-- !query output
 1	2
 
 
--- !query 7
+-- !query
 SELECT k, udf(v) FROM tab1
 INTERSECT ALL
 SELECT array(1), udf(2)
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 IntersectAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
 
 
--- !query 8
+-- !query
 SELECT udf(k) FROM tab1
 INTERSECT ALL
 SELECT udf(k), udf(v) FROM tab2
--- !query 8 schema
+-- !query schema
 struct<>
--- !query 8 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
 
 
--- !query 9
+-- !query
 SELECT udf(k), v FROM tab2
 INTERSECT ALL
 SELECT k, udf(v) FROM tab1
 INTERSECT ALL
 SELECT udf(k), udf(v) FROM tab2
--- !query 9 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,v:int>
--- !query 9 output
+-- !query output
 1	2
 1	2
 2	3
@@ -128,7 +128,7 @@ NULL	NULL
 NULL	NULL
 
 
--- !query 10
+-- !query
 SELECT udf(k), v FROM tab1
 EXCEPT
 SELECT k, udf(v) FROM tab2
@@ -136,9 +136,9 @@ UNION ALL
 SELECT k, udf(udf(v)) FROM tab1
 INTERSECT ALL
 SELECT udf(k), v FROM tab2
--- !query 10 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,v:int>
--- !query 10 output
+-- !query output
 1	2
 1	2
 1	3
@@ -147,7 +147,7 @@ NULL	NULL
 NULL	NULL
 
 
--- !query 11
+-- !query
 SELECT udf(k), udf(v) FROM tab1
 EXCEPT
 SELECT udf(k), v FROM tab2
@@ -155,13 +155,13 @@ EXCEPT
 SELECT k, udf(v) FROM tab1
 INTERSECT ALL
 SELECT udf(k), udf(udf(v)) FROM tab2
--- !query 11 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 11 output
+-- !query output
 1	3
 
 
--- !query 12
+-- !query
 (
   (
     (
@@ -175,13 +175,13 @@ struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(v as string)) AS IN
   INTERSECT ALL
   SELECT udf(k), udf(v) FROM tab2
 )
--- !query 12 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,v:int>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 SELECT * 
 FROM   (SELECT udf(tab1.k),
                udf(tab2.v)
@@ -195,9 +195,9 @@ FROM   (SELECT udf(tab1.k),
         FROM   tab1 
                JOIN tab2 
                  ON udf(tab1.k) = udf(udf(tab2.k)))
--- !query 13 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 13 output
+-- !query output
 1	2
 1	2
 1	2
@@ -209,7 +209,7 @@ struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(v as string)) AS IN
 2	3
 
 
--- !query 14
+-- !query
 SELECT * 
 FROM   (SELECT udf(tab1.k),
                udf(tab2.v)
@@ -223,33 +223,33 @@ FROM   (SELECT udf(tab2.v) AS k,
         FROM   tab1 
                JOIN tab2 
                  ON tab1.k = udf(tab2.k))
--- !query 14 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 SELECT udf(v) FROM tab1 GROUP BY v
 INTERSECT ALL
 SELECT udf(udf(k)) FROM tab2 GROUP BY k
--- !query 15 schema
+-- !query schema
 struct<CAST(udf(cast(v as string)) AS INT):int>
--- !query 15 output
+-- !query output
 2
 3
 NULL
 
 
--- !query 16
+-- !query
 SET spark.sql.legacy.setopsPrecedence.enabled= true
--- !query 16 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 16 output
+-- !query output
 spark.sql.legacy.setopsPrecedence.enabled	true
 
 
--- !query 17
+-- !query
 SELECT udf(k), v FROM tab1
 EXCEPT
 SELECT k, udf(v) FROM tab2
@@ -257,9 +257,9 @@ UNION ALL
 SELECT udf(k), udf(v) FROM tab1
 INTERSECT ALL
 SELECT udf(udf(k)), udf(v) FROM tab2
--- !query 17 schema
+-- !query schema
 struct<CAST(udf(cast(k as string)) AS INT):int,v:int>
--- !query 17 output
+-- !query output
 1	2
 1	2
 2	3
@@ -267,7 +267,7 @@ NULL	NULL
 NULL	NULL
 
 
--- !query 18
+-- !query
 SELECT k, udf(v) FROM tab1
 EXCEPT
 SELECT udf(k), v FROM tab2
@@ -275,33 +275,33 @@ UNION ALL
 SELECT udf(k), udf(v) FROM tab1
 INTERSECT
 SELECT udf(k), udf(udf(v)) FROM tab2
--- !query 18 schema
+-- !query schema
 struct<k:int,CAST(udf(cast(v as string)) AS INT):int>
--- !query 18 output
+-- !query output
 1	2
 2	3
 NULL	NULL
 
 
--- !query 19
+-- !query
 SET spark.sql.legacy.setopsPrecedence.enabled = false
--- !query 19 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 19 output
+-- !query output
 spark.sql.legacy.setopsPrecedence.enabled	false
 
 
--- !query 20
+-- !query
 DROP VIEW IF EXISTS tab1
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 DROP VIEW IF EXISTS tab2
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out
index e79d01fb14d60..0802eb9a9f62b 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out
@@ -2,193 +2,193 @@
 -- Number of queries: 24
 
 
--- !query 0
+-- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT udf(t1.a), udf(empty_table.a) FROM t1 INNER JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)))
--- !query 3 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 SELECT udf(t1.a), udf(udf(empty_table.a)) FROM t1 CROSS JOIN empty_table ON (udf(udf(t1.a)) = udf(empty_table.a))
--- !query 4 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT udf(udf(t1.a)), empty_table.a FROM t1 LEFT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
--- !query 5 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,a:int>
--- !query 5 output
+-- !query output
 1	NULL
 
 
--- !query 6
+-- !query
 SELECT udf(t1.a), udf(empty_table.a) FROM t1 RIGHT OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
--- !query 6 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 SELECT udf(t1.a), empty_table.a FROM t1 FULL OUTER JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
--- !query 7 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,a:int>
--- !query 7 output
+-- !query output
 1	NULL
 
 
--- !query 8
+-- !query
 SELECT udf(udf(t1.a)) FROM t1 LEFT SEMI JOIN empty_table ON (udf(t1.a) = udf(udf(empty_table.a)))
--- !query 8 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
--- !query 8 output
+-- !query output
 
 
 
--- !query 9
+-- !query
 SELECT udf(t1.a) FROM t1 LEFT ANTI JOIN empty_table ON (udf(t1.a) = udf(empty_table.a))
--- !query 9 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int>
--- !query 9 output
+-- !query output
 1
 
 
--- !query 10
+-- !query
 SELECT udf(empty_table.a), udf(t1.a) FROM empty_table INNER JOIN t1 ON (udf(udf(empty_table.a)) = udf(t1.a))
--- !query 10 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
--- !query 10 output
+-- !query output
 
 
 
--- !query 11
+-- !query
 SELECT udf(empty_table.a), udf(udf(t1.a)) FROM empty_table CROSS JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)))
--- !query 11 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 SELECT udf(udf(empty_table.a)), udf(t1.a) FROM empty_table LEFT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
--- !query 12 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int,CAST(udf(cast(a as string)) AS INT):int>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 SELECT empty_table.a, udf(t1.a) FROM empty_table RIGHT OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
--- !query 13 schema
+-- !query schema
 struct<a:int,CAST(udf(cast(a as string)) AS INT):int>
--- !query 13 output
+-- !query output
 NULL	1
 
 
--- !query 14
+-- !query
 SELECT empty_table.a, udf(udf(t1.a)) FROM empty_table FULL OUTER JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
--- !query 14 schema
+-- !query schema
 struct<a:int,CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
--- !query 14 output
+-- !query output
 NULL	1
 
 
--- !query 15
+-- !query
 SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN t1 ON (udf(empty_table.a) = udf(udf(t1.a)))
--- !query 15 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
--- !query 15 output
+-- !query output
 
 
 
--- !query 16
+-- !query
 SELECT empty_table.a FROM empty_table LEFT ANTI JOIN t1 ON (udf(empty_table.a) = udf(t1.a))
--- !query 16 schema
+-- !query schema
 struct<a:int>
--- !query 16 output
+-- !query output
 
 
 
--- !query 17
+-- !query
 SELECT udf(empty_table.a) FROM empty_table INNER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)))
--- !query 17 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int>
--- !query 17 output
+-- !query output
 
 
 
--- !query 18
+-- !query
 SELECT udf(udf(empty_table.a)) FROM empty_table CROSS JOIN empty_table AS empty_table2 ON (udf(udf(empty_table.a)) = udf(empty_table2.a))
--- !query 18 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
--- !query 18 output
+-- !query output
 
 
 
--- !query 19
+-- !query
 SELECT udf(empty_table.a) FROM empty_table LEFT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
--- !query 19 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int>
--- !query 19 output
+-- !query output
 
 
 
--- !query 20
+-- !query
 SELECT udf(udf(empty_table.a)) FROM empty_table RIGHT OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(udf(empty_table2.a)))
--- !query 20 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
--- !query 20 output
+-- !query output
 
 
 
--- !query 21
+-- !query
 SELECT udf(empty_table.a) FROM empty_table FULL OUTER JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
--- !query 21 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int>
--- !query 21 output
+-- !query output
 
 
 
--- !query 22
+-- !query
 SELECT udf(udf(empty_table.a)) FROM empty_table LEFT SEMI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
--- !query 22 schema
+-- !query schema
 struct<CAST(udf(cast(cast(udf(cast(a as string)) as int) as string)) AS INT):int>
--- !query 22 output
+-- !query output
 
 
 
--- !query 23
+-- !query
 SELECT udf(empty_table.a) FROM empty_table LEFT ANTI JOIN empty_table AS empty_table2 ON (udf(empty_table.a) = udf(empty_table2.a))
--- !query 23 schema
+-- !query schema
 struct<CAST(udf(cast(a as string)) AS INT):int>
--- !query 23 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-natural-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-natural-join.sql.out
index 950809ddcaf25..a8233a0e398b0 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-natural-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-natural-join.sql.out
@@ -2,63 +2,63 @@
 -- Number of queries: 6
 
 
--- !query 0
+-- !query
 create temporary view nt1 as select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3)
   as nt1(k, v1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view nt2 as select * from values
   ("one", 1),
   ("two", 22),
   ("one", 5)
   as nt2(k, v2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT * FROM nt1 natural join nt2 where udf(k) = "one"
--- !query 2 schema
+-- !query schema
 struct<k:string,v1:int,v2:int>
--- !query 2 output
+-- !query output
 one	1	1
 one	1	5
 
 
--- !query 3
+-- !query
 SELECT * FROM nt1 natural left join nt2 where k <> udf("") order by v1, v2
--- !query 3 schema
+-- !query schema
 struct<k:string,v1:int,v2:int>
--- !query 3 output
+-- !query output
 one	1	1
 one	1	5
 two	2	22
 three	3	NULL
 
 
--- !query 4
+-- !query
 SELECT * FROM nt1 natural right join nt2 where udf(k) <> udf("") order by v1, v2
--- !query 4 schema
+-- !query schema
 struct<k:string,v1:int,v2:int>
--- !query 4 output
+-- !query output
 one	1	1
 one	1	5
 two	2	22
 
 
--- !query 5
+-- !query
 SELECT udf(count(*)) FROM nt1 natural full outer join nt2
--- !query 5 schema
+-- !query schema
 struct<CAST(udf(cast(count(1) as string)) AS BIGINT):bigint>
--- !query 5 output
+-- !query output
 4
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
index 819f786070882..afebbb0c1da92 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
@@ -2,27 +2,27 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
 (-234), (145), (367), (975), (298)
 as t1(int_col1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES
 (-769, -244), (-800, -409), (940, 86), (-507, 304), (-367, 158)
 as t2(int_col0, int_col1)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT
   (udf(SUM(udf(COALESCE(t1.int_col1, t2.int_col0))))),
      (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2)
@@ -33,40 +33,40 @@ GROUP BY udf(GREATEST(COALESCE(udf(t2.int_col1), 109), COALESCE(t1.int_col1, udf
          COALESCE(t1.int_col1, t2.int_col0)
 HAVING (udf(SUM(COALESCE(udf(t1.int_col1), udf(t2.int_col0)))))
             > (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2)
--- !query 2 schema
+-- !query schema
 struct<CAST(udf(cast(sum(cast(cast(udf(cast(coalesce(int_col1, int_col0) as string)) as int) as bigint)) as string)) AS BIGINT):bigint,(CAST(udf(cast(coalesce(int_col1, int_col0) as string)) AS INT) * 2):int>
--- !query 2 output
+-- !query output
 -367	-734
 -507	-1014
 -769	-1538
 -800	-1600
 
 
--- !query 3
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (97) as t1(int_col1)
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 
 
 
--- !query 4
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (0) as t2(int_col1)
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 set spark.sql.crossJoin.enabled = true
--- !query 5 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 5 output
+-- !query output
 spark.sql.crossJoin.enabled	true
 
 
--- !query 6
+-- !query
 SELECT *
 FROM (
 SELECT
@@ -74,15 +74,15 @@ SELECT
     FROM t1
     LEFT JOIN t2 ON false
 ) t where (udf(t.int_col)) is not null
--- !query 6 schema
+-- !query schema
 struct<int_col:int>
--- !query 6 output
+-- !query output
 97
 
 
--- !query 7
+-- !query
 set spark.sql.crossJoin.enabled = false
--- !query 7 schema
+-- !query schema
 struct<key:string,value:string>
--- !query 7 output
+-- !query output
 spark.sql.crossJoin.enabled	false
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
index cb9e4d736c9a0..087b4ed9302d8 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 30
 
 
--- !query 0
+-- !query
 create temporary view courseSales as select * from values
   ("dotNET", 2012, 10000),
   ("Java", 2012, 20000),
@@ -10,35 +10,35 @@ create temporary view courseSales as select * from values
   ("dotNET", 2013, 48000),
   ("Java", 2013, 30000)
   as courseSales(course, year, earnings)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 create temporary view years as select * from values
   (2012, 1),
   (2013, 2)
   as years(y, s)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 create temporary view yearsWithComplexTypes as select * from values
   (2012, array(1, 1), map('1', 1), struct(1, 'a')),
   (2013, array(2, 2), map('2', 2), struct(2, 'b'))
   as yearsWithComplexTypes(y, a, m, s)
--- !query 2 schema
+-- !query schema
 struct<>
--- !query 2 output
+-- !query output
 
 
 
--- !query 3
+-- !query
 SELECT * FROM (
   SELECT udf(year), course, earnings FROM courseSales
 )
@@ -46,27 +46,27 @@ PIVOT (
   udf(sum(earnings))
   FOR course IN ('dotNET', 'Java')
 )
--- !query 3 schema
+-- !query schema
 struct<CAST(udf(cast(year as string)) AS INT):int,dotNET:bigint,Java:bigint>
--- !query 3 output
+-- !query output
 2012	15000	20000
 2013	48000	30000
 
 
--- !query 4
+-- !query
 SELECT * FROM courseSales
 PIVOT (
   udf(sum(earnings))
   FOR year IN (2012, 2013)
 )
--- !query 4 schema
+-- !query schema
 struct<course:string,2012:bigint,2013:bigint>
--- !query 4 output
+-- !query output
 Java	20000	30000
 dotNET	15000	48000
 
 
--- !query 5
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -74,14 +74,14 @@ PIVOT (
   udf(sum(earnings)), udf(avg(earnings))
   FOR course IN ('dotNET', 'Java')
 )
--- !query 5 schema
+-- !query schema
 struct<year:int,dotNET_CAST(udf(cast(sum(cast(earnings as bigint)) as string)) AS BIGINT):bigint,dotNET_CAST(udf(cast(avg(cast(earnings as bigint)) as string)) AS DOUBLE):double,Java_CAST(udf(cast(sum(cast(earnings as bigint)) as string)) AS BIGINT):bigint,Java_CAST(udf(cast(avg(cast(earnings as bigint)) as string)) AS DOUBLE):double>
--- !query 5 output
+-- !query output
 2012	15000	7500.0	20000	20000.0
 2013	48000	48000.0	30000	30000.0
 
 
--- !query 6
+-- !query
 SELECT * FROM (
   SELECT udf(course) as course, earnings FROM courseSales
 )
@@ -89,13 +89,13 @@ PIVOT (
   udf(sum(earnings))
   FOR course IN ('dotNET', 'Java')
 )
--- !query 6 schema
+-- !query schema
 struct<dotNET:bigint,Java:bigint>
--- !query 6 output
+-- !query output
 63000	50000
 
 
--- !query 7
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -103,13 +103,13 @@ PIVOT (
   udf(sum(udf(earnings))), udf(min(year))
   FOR course IN ('dotNET', 'Java')
 )
--- !query 7 schema
+-- !query schema
 struct<dotNET_CAST(udf(cast(sum(cast(cast(udf(cast(earnings as string)) as int) as bigint)) as string)) AS BIGINT):bigint,dotNET_CAST(udf(cast(min(year) as string)) AS INT):int,Java_CAST(udf(cast(sum(cast(cast(udf(cast(earnings as string)) as int) as bigint)) as string)) AS BIGINT):bigint,Java_CAST(udf(cast(min(year) as string)) AS INT):int>
--- !query 7 output
+-- !query output
 63000	2012	50000	2012
 
 
--- !query 8
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, udf(s) as s
   FROM courseSales
@@ -119,16 +119,16 @@ PIVOT (
   udf(sum(earnings))
   FOR s IN (1, 2)
 )
--- !query 8 schema
+-- !query schema
 struct<course:string,year:int,1:bigint,2:bigint>
--- !query 8 output
+-- !query output
 Java	2012	20000	NULL
 Java	2013	NULL	30000
 dotNET	2012	15000	NULL
 dotNET	2013	NULL	48000
 
 
--- !query 9
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -138,14 +138,14 @@ PIVOT (
   udf(sum(earnings)), udf(min(s))
   FOR course IN ('dotNET', 'Java')
 )
--- !query 9 schema
+-- !query schema
 struct<year:int,dotNET_CAST(udf(cast(sum(cast(earnings as bigint)) as string)) AS BIGINT):bigint,dotNET_CAST(udf(cast(min(s) as string)) AS INT):int,Java_CAST(udf(cast(sum(cast(earnings as bigint)) as string)) AS BIGINT):bigint,Java_CAST(udf(cast(min(s) as string)) AS INT):int>
--- !query 9 output
+-- !query output
 2012	15000	1	20000	1
 2013	48000	2	30000	2
 
 
--- !query 10
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -155,14 +155,14 @@ PIVOT (
   udf(sum(earnings * s))
   FOR course IN ('dotNET', 'Java')
 )
--- !query 10 schema
+-- !query schema
 struct<year:int,dotNET:bigint,Java:bigint>
--- !query 10 output
+-- !query output
 2012	15000	20000
 2013	96000	60000
 
 
--- !query 11
+-- !query
 SELECT 2012_s, 2013_s, 2012_a, 2013_a, c FROM (
   SELECT year y, course c, earnings e FROM courseSales
 )
@@ -170,14 +170,14 @@ PIVOT (
   udf(sum(e)) s, udf(avg(e)) a
   FOR y IN (2012, 2013)
 )
--- !query 11 schema
+-- !query schema
 struct<2012_s:bigint,2013_s:bigint,2012_a:double,2013_a:double,c:string>
--- !query 11 output
+-- !query output
 15000	48000	7500.0	48000.0	dotNET
 20000	30000	20000.0	30000.0	Java
 
 
--- !query 12
+-- !query
 SELECT firstYear_s, secondYear_s, firstYear_a, secondYear_a, c FROM (
   SELECT year y, course c, earnings e FROM courseSales
 )
@@ -185,27 +185,27 @@ PIVOT (
   udf(sum(e)) s, udf(avg(e)) a
   FOR y IN (2012 as firstYear, 2013 secondYear)
 )
--- !query 12 schema
+-- !query schema
 struct<firstYear_s:bigint,secondYear_s:bigint,firstYear_a:double,secondYear_a:double,c:string>
--- !query 12 output
+-- !query output
 15000	48000	7500.0	48000.0	dotNET
 20000	30000	20000.0	30000.0	Java
 
 
--- !query 13
+-- !query
 SELECT * FROM courseSales
 PIVOT (
   udf(abs(earnings))
   FOR year IN (2012, 2013)
 )
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function.;
 
 
--- !query 14
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -213,14 +213,14 @@ PIVOT (
   udf(sum(earnings)), year
   FOR course IN ('dotNET', 'Java')
 )
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function.;
 
 
--- !query 15
+-- !query
 SELECT * FROM (
   SELECT course, earnings FROM courseSales
 )
@@ -228,14 +228,14 @@ PIVOT (
   udf(sum(earnings))
   FOR year IN (2012, 2013)
 )
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`year`' given input columns: [__auto_generated_subquery_name.course, __auto_generated_subquery_name.earnings]; line 4 pos 0
 
 
--- !query 16
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -243,14 +243,14 @@ PIVOT (
   udf(ceil(udf(sum(earnings)))), avg(earnings) + 1 as a1
   FOR course IN ('dotNET', 'Java')
 )
--- !query 16 schema
+-- !query schema
 struct<year:int,dotNET_CAST(udf(cast(CEIL(cast(udf(cast(sum(cast(earnings as bigint)) as string)) as bigint)) as string)) AS BIGINT):bigint,dotNET_a1:double,Java_CAST(udf(cast(CEIL(cast(udf(cast(sum(cast(earnings as bigint)) as string)) as bigint)) as string)) AS BIGINT):bigint,Java_a1:double>
--- !query 16 output
+-- !query output
 2012	15000	7501.0	20000	20001.0
 2013	48000	48001.0	30000	30001.0
 
 
--- !query 17
+-- !query
 SELECT * FROM (
   SELECT year, course, earnings FROM courseSales
 )
@@ -258,14 +258,14 @@ PIVOT (
   sum(udf(avg(earnings)))
   FOR course IN ('dotNET', 'Java')
 )
--- !query 17 schema
+-- !query schema
 struct<>
--- !query 17 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
 
 
--- !query 18
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -275,14 +275,14 @@ PIVOT (
   udf(sum(earnings))
   FOR (course, year) IN (('dotNET', 2012), ('Java', 2013))
 )
--- !query 18 schema
+-- !query schema
 struct<s:int,[dotNET, 2012]:bigint,[Java, 2013]:bigint>
--- !query 18 output
+-- !query output
 1	15000	NULL
 2	NULL	30000
 
 
--- !query 19
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -292,14 +292,14 @@ PIVOT (
   udf(sum(earnings))
   FOR (course, s) IN (('dotNET', 2) as c1, ('Java', 1) as c2)
 )
--- !query 19 schema
+-- !query schema
 struct<year:int,c1:bigint,c2:bigint>
--- !query 19 output
+-- !query output
 2012	NULL	20000
 2013	48000	NULL
 
 
--- !query 20
+-- !query
 SELECT * FROM (
   SELECT course, year, earnings, s
   FROM courseSales
@@ -309,40 +309,40 @@ PIVOT (
   udf(sum(earnings))
   FOR (course, year) IN ('dotNET', 'Java')
 )
--- !query 20 schema
+-- !query schema
 struct<>
--- !query 20 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct<course:string,year:int>;
 
 
--- !query 21
+-- !query
 SELECT * FROM courseSales
 PIVOT (
   udf(sum(earnings))
   FOR year IN (s, 2013)
 )
--- !query 21 schema
+-- !query schema
 struct<>
--- !query 21 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '`s`' given input columns: [coursesales.course, coursesales.earnings, coursesales.year]; line 4 pos 15
 
 
--- !query 22
+-- !query
 SELECT * FROM courseSales
 PIVOT (
   udf(sum(earnings))
   FOR year IN (course, 2013)
 )
--- !query 22 schema
+-- !query schema
 struct<>
--- !query 22 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Literal expressions required for pivot values, found 'course#x';
 
 
--- !query 23
+-- !query
 SELECT * FROM (
   SELECT earnings, year, a
   FROM courseSales
@@ -352,14 +352,14 @@ PIVOT (
   udf(sum(earnings))
   FOR a IN (array(1, 1), array(2, 2))
 )
--- !query 23 schema
+-- !query schema
 struct<year:int,[1, 1]:bigint,[2, 2]:bigint>
--- !query 23 output
+-- !query output
 2012	35000	NULL
 2013	NULL	78000
 
 
--- !query 24
+-- !query
 SELECT * FROM (
   SELECT course, earnings, udf(year) as year, a
   FROM courseSales
@@ -369,14 +369,14 @@ PIVOT (
   udf(sum(earnings))
   FOR (course, a) IN (('dotNET', array(1, 1)), ('Java', array(2, 2)))
 )
--- !query 24 schema
+-- !query schema
 struct<year:int,[dotNET, [1, 1]]:bigint,[Java, [2, 2]]:bigint>
--- !query 24 output
+-- !query output
 2012	15000	NULL
 2013	NULL	30000
 
 
--- !query 25
+-- !query
 SELECT * FROM (
   SELECT earnings, year, s
   FROM courseSales
@@ -386,14 +386,14 @@ PIVOT (
   udf(sum(earnings))
   FOR s IN ((1, 'a'), (2, 'b'))
 )
--- !query 25 schema
+-- !query schema
 struct<year:int,[1, a]:bigint,[2, b]:bigint>
--- !query 25 output
+-- !query output
 2012	35000	NULL
 2013	NULL	78000
 
 
--- !query 26
+-- !query
 SELECT * FROM (
   SELECT course, earnings, year, s
   FROM courseSales
@@ -403,14 +403,14 @@ PIVOT (
   udf(sum(earnings))
   FOR (course, s) IN (('dotNET', (1, 'a')), ('Java', (2, 'b')))
 )
--- !query 26 schema
+-- !query schema
 struct<year:int,[dotNET, [1, a]]:bigint,[Java, [2, b]]:bigint>
--- !query 26 output
+-- !query output
 2012	15000	NULL
 2013	NULL	30000
 
 
--- !query 27
+-- !query
 SELECT * FROM (
   SELECT earnings, year, m
   FROM courseSales
@@ -420,14 +420,14 @@ PIVOT (
   udf(sum(earnings))
   FOR m IN (map('1', 1), map('2', 2))
 )
--- !query 27 schema
+-- !query schema
 struct<>
--- !query 27 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid pivot column 'm#x'. Pivot columns must be comparable.;
 
 
--- !query 28
+-- !query
 SELECT * FROM (
   SELECT course, earnings, year, m
   FROM courseSales
@@ -437,14 +437,14 @@ PIVOT (
   udf(sum(earnings))
   FOR (course, m) IN (('dotNET', map('1', 1)), ('Java', map('2', 2)))
 )
--- !query 28 schema
+-- !query schema
 struct<>
--- !query 28 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable.;
 
 
--- !query 29
+-- !query
 SELECT * FROM (
   SELECT course, earnings, udf("a") as a, udf("z") as z, udf("b") as b, udf("y") as y,
   udf("c") as c, udf("x") as x, udf("d") as d, udf("w") as w
@@ -454,7 +454,7 @@ PIVOT (
   udf(sum(Earnings))
   FOR Course IN ('dotNET', 'Java')
 )
--- !query 29 schema
+-- !query schema
 struct<a:string,z:string,b:string,y:string,c:string,x:string,d:string,w:string,dotNET:bigint,Java:bigint>
--- !query 29 output
+-- !query output
 a	z	b	y	c	x	d	w	63000	50000
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-special-values.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-special-values.sql.out
index 7b2b5dbe578cc..5e5c79172bb7a 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-special-values.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-special-values.sql.out
@@ -2,61 +2,61 @@
 -- Number of queries: 6
 
 
--- !query 0
+-- !query
 SELECT udf(x) FROM (VALUES (1), (2), (NULL)) v(x)
--- !query 0 schema
+-- !query schema
 struct<CAST(udf(cast(x as string)) AS INT):int>
--- !query 0 output
+-- !query output
 1
 2
 NULL
 
 
--- !query 1
+-- !query
 SELECT udf(x) FROM (VALUES ('A'), ('B'), (NULL)) v(x)
--- !query 1 schema
+-- !query schema
 struct<CAST(udf(cast(x as string)) AS STRING):string>
--- !query 1 output
+-- !query output
 A
 B
 NULL
 
 
--- !query 2
+-- !query
 SELECT udf(x) FROM (VALUES ('NaN'), ('1'), ('2')) v(x)
--- !query 2 schema
+-- !query schema
 struct<CAST(udf(cast(x as string)) AS STRING):string>
--- !query 2 output
+-- !query output
 1
 2
 NaN
 
 
--- !query 3
+-- !query
 SELECT udf(x) FROM (VALUES ('Infinity'), ('1'), ('2')) v(x)
--- !query 3 schema
+-- !query schema
 struct<CAST(udf(cast(x as string)) AS STRING):string>
--- !query 3 output
+-- !query output
 1
 2
 Infinity
 
 
--- !query 4
+-- !query
 SELECT udf(x) FROM (VALUES ('-Infinity'), ('1'), ('2')) v(x)
--- !query 4 schema
+-- !query schema
 struct<CAST(udf(cast(x as string)) AS STRING):string>
--- !query 4 output
+-- !query output
 -Infinity
 1
 2
 
 
--- !query 5
+-- !query
 SELECT udf(x) FROM (VALUES 0.00000001, 0.00000002, 0.00000003) v(x)
--- !query 5 schema
+-- !query schema
 struct<CAST(udf(cast(x as string)) AS DECIMAL(8,8)):decimal(8,8)>
--- !query 5 output
+-- !query output
 0.00000001
 0.00000002
 0.00000003
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out
index f8e5fe6a62f33..19221947b4a88 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out
@@ -2,69 +2,69 @@
 -- Number of queries: 8
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
 (1), (2), (3), (4)
 as t1(int_col1)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE FUNCTION myDoubleAvg AS 'test.org.apache.spark.sql.MyDoubleAvg'
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT default.myDoubleAvg(udf(int_col1)) as my_avg, udf(default.myDoubleAvg(udf(int_col1))) as my_avg2, udf(default.myDoubleAvg(int_col1)) as my_avg3 from t1
--- !query 2 schema
+-- !query schema
 struct<my_avg:double,my_avg2:double,my_avg3:double>
--- !query 2 output
+-- !query output
 102.5	102.5	102.5
 
 
--- !query 3
+-- !query
 SELECT default.myDoubleAvg(udf(int_col1), udf(3)) as my_avg from t1
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Invalid number of arguments for function default.myDoubleAvg. Expected: 1; Found: 2; line 1 pos 7
 
 
--- !query 4
+-- !query
 CREATE FUNCTION udaf1 AS 'test.non.existent.udaf'
--- !query 4 schema
+-- !query schema
 struct<>
--- !query 4 output
+-- !query output
 
 
 
--- !query 5
+-- !query
 SELECT default.udaf1(udf(int_col1)) as udaf1, udf(default.udaf1(udf(int_col1))) as udaf2, udf(default.udaf1(int_col1)) as udaf3 from t1
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 94
 
 
--- !query 6
+-- !query
 DROP FUNCTION myDoubleAvg
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 DROP FUNCTION udaf1
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out
index 84b5e10dbeb8e..c06c35d34cd74 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out
@@ -2,93 +2,93 @@
 -- Number of queries: 16
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (1, 'a'), (2, 'b') tbl(c1, c2)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT udf(c1) as c1, udf(c2) as c2
 FROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1
         UNION ALL
         SELECT udf(c1) as c1, udf(c2) as c2 FROM t1)
--- !query 2 schema
+-- !query schema
 struct<c1:int,c2:string>
--- !query 2 output
+-- !query output
 1	a
 1	a
 2	b
 2	b
 
 
--- !query 3
+-- !query
 SELECT udf(c1) as c1, udf(c2) as c2
 FROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1
         UNION ALL
         SELECT udf(c1) as c1, udf(c2) as c2 FROM t2
         UNION ALL
         SELECT udf(c1) as c1, udf(c2) as c2 FROM t2)
--- !query 3 schema
+-- !query schema
 struct<c1:decimal(11,1),c2:string>
--- !query 3 output
-1	1
-1	1
-1	a
-2	4
-2	4
-2	b
+-- !query output
+1.0	1
+1.0	1
+1.0	a
+2.0	4
+2.0	4
+2.0	b
 
 
--- !query 4
+-- !query
 SELECT udf(udf(a)) as a
 FROM (SELECT udf(0) a, udf(0) b
       UNION ALL
       SELECT udf(SUM(1)) a, udf(CAST(0 AS BIGINT)) b
       UNION ALL SELECT udf(0) a, udf(0) b) T
--- !query 4 schema
+-- !query schema
 struct<a:bigint>
--- !query 4 output
+-- !query output
 0
 0
 1
 
 
--- !query 5
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW p1 AS VALUES 1 T(col)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW p2 AS VALUES 1 T(col)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW p3 AS VALUES 1 T(col)
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 SELECT udf(1) AS x,
        udf(col) as col
 FROM   (SELECT udf(col) AS col
@@ -97,70 +97,70 @@ FROM   (SELECT udf(col) AS col
               UNION ALL
               SELECT udf(col)
               FROM p3) T1) T2
--- !query 8 schema
+-- !query schema
 struct<x:int,col:int>
--- !query 8 output
+-- !query output
 1	1
 1	1
 
 
--- !query 9
+-- !query
 SELECT map(1, 2), udf('str') as str
 UNION ALL
 SELECT map(1, 2, 3, NULL), udf(1)
--- !query 9 schema
+-- !query schema
 struct<map(1, 2):map<int,int>,str:string>
--- !query 9 output
+-- !query output
 {1:2,3:null}	1
 {1:2}	str
 
 
--- !query 10
+-- !query
 SELECT array(1, 2), udf('str') as str
 UNION ALL
 SELECT array(1, 2, 3, NULL), udf(1)
--- !query 10 schema
+-- !query schema
 struct<array(1, 2):array<int>,str:string>
--- !query 10 output
+-- !query output
 [1,2,3,null]	1
 [1,2]	str
 
 
--- !query 11
+-- !query
 DROP VIEW IF EXISTS t1
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 DROP VIEW IF EXISTS t2
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 DROP VIEW IF EXISTS p1
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
+-- !query
 DROP VIEW IF EXISTS p2
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 DROP VIEW IF EXISTS p3
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
index 9354d5e3117f4..a915c1bd6c717 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
@@ -2,7 +2,7 @@
 -- Number of queries: 23
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (null, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), "a"),
 (1, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), "a"),
@@ -14,18 +14,18 @@ CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (null, null, null, null, null, null),
 (3, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), null)
 AS testData(val, val_long, val_double, val_date, val_timestamp, cate)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT udf(val), cate, count(val) OVER(PARTITION BY cate ORDER BY udf(val) ROWS CURRENT ROW) FROM testData
 ORDER BY cate, udf(val)
--- !query 1 schema
+-- !query schema
 struct<CAST(udf(cast(val as string)) AS INT):int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY CAST(udf(cast(val as string)) AS INT) ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND CURRENT ROW):bigint>
--- !query 1 output
+-- !query output
 NULL	NULL	0
 3	NULL	1
 NULL	a	0
@@ -37,12 +37,12 @@ NULL	a	0
 3	b	1
 
 
--- !query 2
+-- !query
 SELECT udf(val), cate, sum(val) OVER(PARTITION BY cate ORDER BY udf(val)
 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, udf(val)
--- !query 2 schema
+-- !query schema
 struct<CAST(udf(cast(val as string)) AS INT):int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY CAST(udf(cast(val as string)) AS INT) ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING):bigint>
--- !query 2 output
+-- !query output
 NULL	NULL	3
 3	NULL	3
 NULL	a	1
@@ -54,22 +54,22 @@ NULL	a	1
 3	b	6
 
 
--- !query 3
+-- !query
 SELECT val_long, udf(cate), sum(val_long) OVER(PARTITION BY cate ORDER BY udf(val_long)
 ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY udf(cate), val_long
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'ROWS BETWEEN CURRENT ROW AND 2147483648L FOLLOWING' due to data type mismatch: The data type of the upper bound 'bigint' does not match the expected data type 'int'.; line 1 pos 46
 
 
--- !query 4
+-- !query
 SELECT udf(val), cate, count(val) OVER(PARTITION BY udf(cate) ORDER BY val RANGE 1 PRECEDING) FROM testData
 ORDER BY cate, udf(val)
--- !query 4 schema
+-- !query schema
 struct<CAST(udf(cast(val as string)) AS INT):int,cate:string,count(val) OVER (PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY val ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CURRENT ROW):bigint>
--- !query 4 output
+-- !query output
 NULL	NULL	0
 3	NULL	1
 NULL	a	0
@@ -81,12 +81,12 @@ NULL	a	0
 3	b	2
 
 
--- !query 5
+-- !query
 SELECT val, udf(cate), sum(val) OVER(PARTITION BY udf(cate) ORDER BY val
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY udf(cate), val
--- !query 5 schema
+-- !query schema
 struct<val:int,CAST(udf(cast(cate as string)) AS STRING):string,sum(val) OVER (PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY val ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 5 output
+-- !query output
 NULL	NULL	NULL
 3	NULL	3
 NULL	a	NULL
@@ -98,12 +98,12 @@ NULL	a	NULL
 3	b	3
 
 
--- !query 6
+-- !query
 SELECT val_long, udf(cate), sum(val_long) OVER(PARTITION BY udf(cate) ORDER BY val_long
 RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY udf(cate), val_long
--- !query 6 schema
+-- !query schema
 struct<val_long:bigint,CAST(udf(cast(cate as string)) AS STRING):string,sum(val_long) OVER (PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY val_long ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING):bigint>
--- !query 6 output
+-- !query output
 NULL	NULL	NULL
 1	NULL	1
 1	a	4
@@ -115,12 +115,12 @@ NULL	b	NULL
 2147483650	b	2147483650
 
 
--- !query 7
+-- !query
 SELECT val_double, udf(cate), sum(val_double) OVER(PARTITION BY udf(cate) ORDER BY val_double
 RANGE BETWEEN CURRENT ROW AND 2.5 FOLLOWING) FROM testData ORDER BY udf(cate), val_double
--- !query 7 schema
+-- !query schema
 struct<val_double:double,CAST(udf(cast(cate as string)) AS STRING):string,sum(val_double) OVER (PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY val_double ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND CAST(2.5 AS DOUBLE) FOLLOWING):double>
--- !query 7 output
+-- !query output
 NULL	NULL	NULL
 1.0	NULL	1.0
 1.0	a	4.5
@@ -132,12 +132,12 @@ NULL	NULL	NULL
 100.001	b	100.001
 
 
--- !query 8
+-- !query
 SELECT val_date, udf(cate), max(val_date) OVER(PARTITION BY udf(cate) ORDER BY val_date
 RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING) FROM testData ORDER BY udf(cate), val_date
--- !query 8 schema
+-- !query schema
 struct<val_date:date,CAST(udf(cast(cate as string)) AS STRING):string,max(val_date) OVER (PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY val_date ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING):date>
--- !query 8 output
+-- !query output
 NULL	NULL	NULL
 2017-08-01	NULL	2017-08-01
 2017-08-01	a	2017-08-02
@@ -149,13 +149,13 @@ NULL	NULL	NULL
 2020-12-31	b	2020-12-31
 
 
--- !query 9
+-- !query
 SELECT val_timestamp, udf(cate), avg(val_timestamp) OVER(PARTITION BY udf(cate) ORDER BY val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
 ORDER BY udf(cate), val_timestamp
--- !query 9 schema
-struct<val_timestamp:timestamp,CAST(udf(cast(cate as string)) AS STRING):string,avg(CAST(val_timestamp AS DOUBLE)) OVER (PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY val_timestamp ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND interval 3 weeks 2 days 4 hours FOLLOWING):double>
--- !query 9 output
+-- !query schema
+struct<val_timestamp:timestamp,CAST(udf(cast(cate as string)) AS STRING):string,avg(CAST(val_timestamp AS DOUBLE)) OVER (PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY val_timestamp ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '23 days 4 hours' FOLLOWING):double>
+-- !query output
 NULL	NULL	NULL
 2017-07-31 17:00:00	NULL	1.5015456E9
 2017-07-31 17:00:00	a	1.5016970666666667E9
@@ -167,12 +167,12 @@ NULL	NULL	NULL
 2020-12-30 16:00:00	b	1.6093728E9
 
 
--- !query 10
+-- !query
 SELECT val, udf(cate), sum(val) OVER(PARTITION BY cate ORDER BY val DESC
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 10 schema
+-- !query schema
 struct<val:int,CAST(udf(cast(cate as string)) AS STRING):string,sum(val) OVER (PARTITION BY cate ORDER BY val DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 10 output
+-- !query output
 NULL	NULL	NULL
 3	NULL	3
 NULL	a	NULL
@@ -184,62 +184,62 @@ NULL	a	NULL
 3	b	5
 
 
--- !query 11
+-- !query
 SELECT udf(val), cate, count(val) OVER(PARTITION BY udf(cate)
 ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, udf(val)
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING' due to data type mismatch: Window frame upper bound '1' does not follow the lower bound 'unboundedfollowing$()'.; line 1 pos 38
 
 
--- !query 12
+-- !query
 SELECT udf(val), cate, count(val) OVER(PARTITION BY udf(cate)
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, udf(val)
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(PARTITION BY CAST(udf(cast(cate as string)) AS STRING) RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame cannot be used in an unordered window specification.; line 1 pos 38
 
 
--- !query 13
+-- !query
 SELECT udf(val), cate, count(val) OVER(PARTITION BY udf(cate) ORDER BY udf(val), cate
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, udf(val)
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY CAST(udf(cast(val as string)) AS INT) ASC NULLS FIRST, testdata.`cate` ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame with value boundaries cannot be used in a window specification with multiple order by expressions: cast(udf(cast(val#x as string)) as int) ASC NULLS FIRST,cate#x ASC NULLS FIRST; line 1 pos 38
 
 
--- !query 14
+-- !query
 SELECT udf(val), cate, count(val) OVER(PARTITION BY udf(cate) ORDER BY current_timestamp
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, udf(val)
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY current_timestamp() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'timestamp' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 38
 
 
--- !query 15
+-- !query
 SELECT udf(val), cate, count(val) OVER(PARTITION BY udf(cate) ORDER BY val
 RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY udf(cate), val
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING' due to data type mismatch: The lower bound of a window frame must be less than or equal to the upper bound; line 1 pos 38
 
 
--- !query 16
+-- !query
 SELECT udf(val), cate, count(val) OVER(PARTITION BY udf(cate) ORDER BY udf(val)
 RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val(val)
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Frame bound value must be a literal.(line 2, pos 30)
@@ -250,7 +250,7 @@ RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cat
 ------------------------------^^^
 
 
--- !query 17
+-- !query
 SELECT udf(val), cate,
 max(udf(val)) OVER w AS max,
 min(udf(val)) OVER w AS min,
@@ -285,9 +285,9 @@ kurtosis(udf(val_double)) OVER w AS kurtosis
 FROM testData
 WINDOW w AS (PARTITION BY udf(cate) ORDER BY udf(val))
 ORDER BY cate, udf(val)
--- !query 17 schema
+-- !query schema
 struct<CAST(udf(cast(val as string)) AS INT):int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
--- !query 17 output
+-- !query output
 NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
 3	NULL	3	3	3	1	3	3.0	NaN	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NaN	1	0.0	NaN	NaN	0.0	[3]	[3]	NaN	NaN
 NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NaN	NaN
@@ -299,11 +299,11 @@ NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.
 3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
 
 
--- !query 18
+-- !query
 SELECT udf(val), cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val
--- !query 18 schema
+-- !query schema
 struct<CAST(udf(cast(val as string)) AS INT):int,cate:string,avg(CAST(NULL AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
--- !query 18 output
+-- !query output
 NULL	NULL	NULL
 3	NULL	NULL
 NULL	a	NULL
@@ -315,20 +315,20 @@ NULL	a	NULL
 3	b	NULL
 
 
--- !query 19
+-- !query
 SELECT udf(val), cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, udf(val)
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
 
 
--- !query 20
+-- !query
 SELECT udf(val), cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val
--- !query 20 schema
+-- !query schema
 struct<CAST(udf(cast(val as string)) AS INT):int,cate:string,sum(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
--- !query 20 output
+-- !query output
 NULL	NULL	13	1.8571428571428572
 3	NULL	13	1.8571428571428572
 NULL	a	13	1.8571428571428572
@@ -340,7 +340,7 @@ NULL	a	13	1.8571428571428572
 3	b	13	1.8571428571428572
 
 
--- !query 21
+-- !query
 SELECT udf(val), cate,
 first_value(false) OVER w AS first_value,
 first_value(true, true) OVER w AS first_value_ignore_null,
@@ -351,9 +351,9 @@ last_value(false, false) OVER w AS last_value_contain_null
 FROM testData
 WINDOW w AS ()
 ORDER BY cate, val
--- !query 21 schema
+-- !query schema
 struct<CAST(udf(cast(val as string)) AS INT):int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
--- !query 21 output
+-- !query output
 NULL	NULL	false	true	false	false	true	false
 3	NULL	false	true	false	false	true	false
 NULL	a	false	true	false	false	true	false
@@ -365,14 +365,14 @@ NULL	a	false	true	false	false	true	false
 3	b	false	true	false	false	true	false
 
 
--- !query 22
+-- !query
 SELECT udf(cate), sum(val) OVER (w)
 FROM testData
 WHERE val is not null
 WINDOW w AS (PARTITION BY cate ORDER BY val)
--- !query 22 schema
+-- !query schema
 struct<CAST(udf(cast(cate as string)) AS STRING):string,sum(CAST(val AS BIGINT)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 22 output
+-- !query output
 NULL	3
 a	2
 a	2
diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out
index b023df825d814..44002406836a4 100644
--- a/sql/core/src/test/resources/sql-tests/results/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out
@@ -2,93 +2,93 @@
 -- Number of queries: 16
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (1, 'a'), (2, 'b') tbl(c1, c2)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2)
--- !query 1 schema
+-- !query schema
 struct<>
--- !query 1 output
+-- !query output
 
 
 
--- !query 2
+-- !query
 SELECT *
 FROM   (SELECT * FROM t1
         UNION ALL
         SELECT * FROM t1)
--- !query 2 schema
+-- !query schema
 struct<c1:int,c2:string>
--- !query 2 output
+-- !query output
 1	a
 1	a
 2	b
 2	b
 
 
--- !query 3
+-- !query
 SELECT *
 FROM   (SELECT * FROM t1
         UNION ALL
         SELECT * FROM t2
         UNION ALL
         SELECT * FROM t2)
--- !query 3 schema
+-- !query schema
 struct<c1:decimal(11,1),c2:string>
--- !query 3 output
-1	1
-1	1
-1	a
-2	4
-2	4
-2	b
+-- !query output
+1.0	1
+1.0	1
+1.0	a
+2.0	4
+2.0	4
+2.0	b
 
 
--- !query 4
+-- !query
 SELECT a
 FROM (SELECT 0 a, 0 b
       UNION ALL
       SELECT SUM(1) a, CAST(0 AS BIGINT) b
       UNION ALL SELECT 0 a, 0 b) T
--- !query 4 schema
+-- !query schema
 struct<a:bigint>
--- !query 4 output
+-- !query output
 0
 0
 1
 
 
--- !query 5
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW p1 AS VALUES 1 T(col)
--- !query 5 schema
+-- !query schema
 struct<>
--- !query 5 output
+-- !query output
 
 
 
--- !query 6
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW p2 AS VALUES 1 T(col)
--- !query 6 schema
+-- !query schema
 struct<>
--- !query 6 output
+-- !query output
 
 
 
--- !query 7
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW p3 AS VALUES 1 T(col)
--- !query 7 schema
+-- !query schema
 struct<>
--- !query 7 output
+-- !query output
 
 
 
--- !query 8
+-- !query
 SELECT 1 AS x,
        col
 FROM   (SELECT col AS col
@@ -97,70 +97,70 @@ FROM   (SELECT col AS col
               UNION ALL
               SELECT col
               FROM p3) T1) T2
--- !query 8 schema
+-- !query schema
 struct<x:int,col:int>
--- !query 8 output
+-- !query output
 1	1
 1	1
 
 
--- !query 9
+-- !query
 SELECT map(1, 2), 'str'
 UNION ALL
 SELECT map(1, 2, 3, NULL), 1
--- !query 9 schema
+-- !query schema
 struct<map(1, 2):map<int,int>,str:string>
--- !query 9 output
+-- !query output
 {1:2,3:null}	1
 {1:2}	str
 
 
--- !query 10
+-- !query
 SELECT array(1, 2), 'str'
 UNION ALL
 SELECT array(1, 2, 3, NULL), 1
--- !query 10 schema
+-- !query schema
 struct<array(1, 2):array<int>,str:string>
--- !query 10 output
+-- !query output
 [1,2,3,null]	1
 [1,2]	str
 
 
--- !query 11
+-- !query
 DROP VIEW IF EXISTS t1
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 
 
 
--- !query 12
+-- !query
 DROP VIEW IF EXISTS t2
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 
 
 
--- !query 13
+-- !query
 DROP VIEW IF EXISTS p1
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 
 
 
--- !query 14
+-- !query
 DROP VIEW IF EXISTS p2
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 
 
 
--- !query 15
+-- !query
 DROP VIEW IF EXISTS p3
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index 367dc4f513635..625088f90ced9 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,8 +1,8 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 23
+-- Number of queries: 24
 
 
--- !query 0
+-- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (null, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), "a"),
 (1, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), "a"),
@@ -14,18 +14,18 @@ CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (null, null, null, null, null, null),
 (3, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), null)
 AS testData(val, val_long, val_double, val_date, val_timestamp, cate)
--- !query 0 schema
+-- !query schema
 struct<>
--- !query 0 output
+-- !query output
 
 
 
--- !query 1
+-- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
 ORDER BY cate, val
--- !query 1 schema
+-- !query schema
 struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN CURRENT ROW AND CURRENT ROW):bigint>
--- !query 1 output
+-- !query output
 NULL	NULL	0
 3	NULL	1
 NULL	a	0
@@ -37,12 +37,12 @@ NULL	a	0
 3	b	1
 
 
--- !query 2
+-- !query
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 2 schema
+-- !query schema
 struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING):bigint>
--- !query 2 output
+-- !query output
 NULL	NULL	3
 3	NULL	3
 NULL	a	1
@@ -54,22 +54,22 @@ NULL	a	1
 3	b	6
 
 
--- !query 3
+-- !query
 SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
 ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long
--- !query 3 schema
+-- !query schema
 struct<>
--- !query 3 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'ROWS BETWEEN CURRENT ROW AND 2147483648L FOLLOWING' due to data type mismatch: The data type of the upper bound 'bigint' does not match the expected data type 'int'.; line 1 pos 41
 
 
--- !query 4
+-- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1 PRECEDING) FROM testData
 ORDER BY cate, val
--- !query 4 schema
+-- !query schema
 struct<val:int,cate:string,count(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN 1 PRECEDING AND CURRENT ROW):bigint>
--- !query 4 output
+-- !query output
 NULL	NULL	0
 3	NULL	1
 NULL	a	0
@@ -81,12 +81,12 @@ NULL	a	0
 3	b	2
 
 
--- !query 5
+-- !query
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 5 schema
+-- !query schema
 struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 5 output
+-- !query output
 NULL	NULL	NULL
 3	NULL	3
 NULL	a	NULL
@@ -98,12 +98,12 @@ NULL	a	NULL
 3	b	3
 
 
--- !query 6
+-- !query
 SELECT val_long, cate, sum(val_long) OVER(PARTITION BY cate ORDER BY val_long
 RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate, val_long
--- !query 6 schema
+-- !query schema
 struct<val_long:bigint,cate:string,sum(val_long) OVER (PARTITION BY cate ORDER BY val_long ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING):bigint>
--- !query 6 output
+-- !query output
 NULL	NULL	NULL
 1	NULL	1
 1	a	4
@@ -115,12 +115,12 @@ NULL	b	NULL
 2147483650	b	2147483650
 
 
--- !query 7
+-- !query
 SELECT val_double, cate, sum(val_double) OVER(PARTITION BY cate ORDER BY val_double
 RANGE BETWEEN CURRENT ROW AND 2.5 FOLLOWING) FROM testData ORDER BY cate, val_double
--- !query 7 schema
+-- !query schema
 struct<val_double:double,cate:string,sum(val_double) OVER (PARTITION BY cate ORDER BY val_double ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND CAST(2.5 AS DOUBLE) FOLLOWING):double>
--- !query 7 output
+-- !query output
 NULL	NULL	NULL
 1.0	NULL	1.0
 1.0	a	4.5
@@ -132,12 +132,12 @@ NULL	NULL	NULL
 100.001	b	100.001
 
 
--- !query 8
+-- !query
 SELECT val_date, cate, max(val_date) OVER(PARTITION BY cate ORDER BY val_date
 RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING) FROM testData ORDER BY cate, val_date
--- !query 8 schema
+-- !query schema
 struct<val_date:date,cate:string,max(val_date) OVER (PARTITION BY cate ORDER BY val_date ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING):date>
--- !query 8 output
+-- !query output
 NULL	NULL	NULL
 2017-08-01	NULL	2017-08-01
 2017-08-01	a	2017-08-02
@@ -149,13 +149,13 @@ NULL	NULL	NULL
 2020-12-31	b	2020-12-31
 
 
--- !query 9
+-- !query
 SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp
 RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
 ORDER BY cate, val_timestamp
--- !query 9 schema
-struct<val_timestamp:timestamp,cate:string,avg(CAST(val_timestamp AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val_timestamp ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND interval 3 weeks 2 days 4 hours FOLLOWING):double>
--- !query 9 output
+-- !query schema
+struct<val_timestamp:timestamp,cate:string,avg(CAST(val_timestamp AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val_timestamp ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '23 days 4 hours' FOLLOWING):double>
+-- !query output
 NULL	NULL	NULL
 2017-07-31 17:00:00	NULL	1.5015456E9
 2017-07-31 17:00:00	a	1.5016970666666667E9
@@ -167,12 +167,12 @@ NULL	NULL	NULL
 2020-12-30 16:00:00	b	1.6093728E9
 
 
--- !query 10
+-- !query
 SELECT val, cate, sum(val) OVER(PARTITION BY cate ORDER BY val DESC
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 10 schema
+-- !query schema
 struct<val:int,cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING):bigint>
--- !query 10 output
+-- !query output
 NULL	NULL	NULL
 3	NULL	3
 NULL	a	NULL
@@ -184,62 +184,62 @@ NULL	a	NULL
 3	b	5
 
 
--- !query 11
+-- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate
 ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 11 schema
+-- !query schema
 struct<>
--- !query 11 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING' due to data type mismatch: Window frame upper bound '1' does not follow the lower bound 'unboundedfollowing$()'.; line 1 pos 33
 
 
--- !query 12
+-- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 12 schema
+-- !query schema
 struct<>
--- !query 12 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(PARTITION BY testdata.`cate` RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame cannot be used in an unordered window specification.; line 1 pos 33
 
 
--- !query 13
+-- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val, cate
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 13 schema
+-- !query schema
 struct<>
--- !query 13 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(PARTITION BY testdata.`cate` ORDER BY testdata.`val` ASC NULLS FIRST, testdata.`cate` ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame with value boundaries cannot be used in a window specification with multiple order by expressions: val#x ASC NULLS FIRST,cate#x ASC NULLS FIRST; line 1 pos 33
 
 
--- !query 14
+-- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY current_timestamp
 RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
--- !query 14 schema
+-- !query schema
 struct<>
--- !query 14 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve '(PARTITION BY testdata.`cate` ORDER BY current_timestamp() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'timestamp' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 33
 
 
--- !query 15
+-- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
 RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY cate, val
--- !query 15 schema
+-- !query schema
 struct<>
--- !query 15 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 cannot resolve 'RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING' due to data type mismatch: The lower bound of a window frame must be less than or equal to the upper bound; line 1 pos 33
 
 
--- !query 16
+-- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
 RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val
--- !query 16 schema
+-- !query schema
 struct<>
--- !query 16 output
+-- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 
 Frame bound value must be a literal.(line 2, pos 30)
@@ -250,7 +250,7 @@ RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cat
 ------------------------------^^^
 
 
--- !query 17
+-- !query
 SELECT val, cate,
 max(val) OVER w AS max,
 min(val) OVER w AS min,
@@ -285,9 +285,9 @@ kurtosis(val_double) OVER w AS kurtosis
 FROM testData
 WINDOW w AS (PARTITION BY cate ORDER BY val)
 ORDER BY cate, val
--- !query 17 schema
+-- !query schema
 struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
--- !query 17 output
+-- !query output
 NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
 3	NULL	3	3	3	1	3	3.0	NaN	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NaN	1	0.0	NaN	NaN	0.0	[3]	[3]	NaN	NaN
 NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NaN	NaN
@@ -299,11 +299,11 @@ NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.
 3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
 
 
--- !query 18
+-- !query
 SELECT val, cate, avg(null) OVER(PARTITION BY cate ORDER BY val) FROM testData ORDER BY cate, val
--- !query 18 schema
+-- !query schema
 struct<val:int,cate:string,avg(CAST(NULL AS DOUBLE)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double>
--- !query 18 output
+-- !query output
 NULL	NULL	NULL
 3	NULL	NULL
 NULL	a	NULL
@@ -315,20 +315,20 @@ NULL	a	NULL
 3	b	NULL
 
 
--- !query 19
+-- !query
 SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY cate, val
--- !query 19 schema
+-- !query schema
 struct<>
--- !query 19 output
+-- !query output
 org.apache.spark.sql.AnalysisException
 Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
 
 
--- !query 20
+-- !query
 SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val
--- !query 20 schema
+-- !query schema
 struct<val:int,cate:string,sum(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,avg(CAST(val AS BIGINT)) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
--- !query 20 output
+-- !query output
 NULL	NULL	13	1.8571428571428572
 3	NULL	13	1.8571428571428572
 NULL	a	13	1.8571428571428572
@@ -340,7 +340,7 @@ NULL	a	13	1.8571428571428572
 3	b	13	1.8571428571428572
 
 
--- !query 21
+-- !query
 SELECT val, cate,
 first_value(false) OVER w AS first_value,
 first_value(true, true) OVER w AS first_value_ignore_null,
@@ -351,9 +351,9 @@ last_value(false, false) OVER w AS last_value_contain_null
 FROM testData
 WINDOW w AS ()
 ORDER BY cate, val
--- !query 21 schema
+-- !query schema
 struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
--- !query 21 output
+-- !query output
 NULL	NULL	false	true	false	false	true	false
 3	NULL	false	true	false	false	true	false
 NULL	a	false	true	false	false	true	false
@@ -365,14 +365,14 @@ NULL	a	false	true	false	false	true	false
 3	b	false	true	false	false	true	false
 
 
--- !query 22
+-- !query
 SELECT cate, sum(val) OVER (w)
 FROM testData
 WHERE val is not null
 WINDOW w AS (PARTITION BY cate ORDER BY val)
--- !query 22 schema
+-- !query schema
 struct<cate:string,sum(CAST(val AS BIGINT)) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
--- !query 22 output
+-- !query output
 NULL	3
 a	2
 a	2
@@ -380,3 +380,14 @@ a	4
 b	1
 b	3
 b	6
+
+
+-- !query
+SELECT val, cate,
+count(val) FILTER (WHERE val > 1) OVER(PARTITION BY cate)
+FROM testData ORDER BY cate, val
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+window aggregate function with filter predicate is not supported yet.;
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/metadata
new file mode 100644
index 0000000000000..543f156048abe
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/metadata
@@ -0,0 +1 @@
+{"id":"1ab1ee6f-993c-4a51-824c-1c7cc8202f62"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/offsets/0
new file mode 100644
index 0000000000000..63dba425b7e16
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/offsets/0
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1548845804202,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/0/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..2cdf645d3a406
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..9c69d01231196
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/1/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..edc7a97408aaa
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..edc7a97408aaa
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/2/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..859c2b1315a5e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..7535621b3adb2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/3/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..0bdaf341003b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..f17037b3c5218
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..0bdaf341003b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..f17037b3c5218
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-2.4.0-streaming-join/state/0/4/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/test-data/bad_after_good.csv b/sql/core/src/test/resources/test-data/bad_after_good.csv
index 4621a7d23714d..1a7c2651a11a7 100644
--- a/sql/core/src/test/resources/test-data/bad_after_good.csv
+++ b/sql/core/src/test/resources/test-data/bad_after_good.csv
@@ -1,2 +1,2 @@
 "good record",1999-08-01
-"bad record",1999-088-01
+"bad record",1999-088_01
diff --git a/sql/core/src/test/resources/test-data/cars-multichar-delim-crazy.csv b/sql/core/src/test/resources/test-data/cars-multichar-delim-crazy.csv
new file mode 100644
index 0000000000000..cabb50e9608e6
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/cars-multichar-delim-crazy.csv
@@ -0,0 +1,4 @@
+year_/-\_make_/-\_model_/-\_comment_/-\_blank
+'2012'_/-\_'Tesla'_/-\_'S'_/-\_'No comment'_/-\_
+1997_/-\_Ford_/-\_E350_/-\_'Go get one now they are going fast'_/-\_
+2015_/-\_Chevy_/-\_Volt
diff --git a/sql/core/src/test/resources/test-data/cars-multichar-delim.csv b/sql/core/src/test/resources/test-data/cars-multichar-delim.csv
new file mode 100644
index 0000000000000..4309edbf04418
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/cars-multichar-delim.csv
@@ -0,0 +1,4 @@
+year, make, model, comment, blank
+'2012', 'Tesla', 'S', No comment,
+1997, Ford, E350, 'Go get one now they are going fast', 
+2015, Chevy, Volt
diff --git a/sql/core/src/test/resources/test-data/malformedRow.csv b/sql/core/src/test/resources/test-data/malformedRow.csv
new file mode 100644
index 0000000000000..8cfb3eefb982c
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/malformedRow.csv
@@ -0,0 +1,5 @@
+fruit,color,price,quantity
+apple,red,1,3
+banana,yellow,2,4
+orange,orange,3,5
+malformedrow
diff --git a/sql/core/src/test/resources/test-data/value-malformed.csv b/sql/core/src/test/resources/test-data/value-malformed.csv
index 8945ed73d2e83..6e6f08fca6df8 100644
--- a/sql/core/src/test/resources/test-data/value-malformed.csv
+++ b/sql/core/src/test/resources/test-data/value-malformed.csv
@@ -1,2 +1,2 @@
-0,2013-111-11 12:13:14
+0,2013-111_11 12:13:14
 1,1983-08-04
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
index a4b142b7ab78e..2b4abed645910 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
@@ -124,20 +124,24 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
   test("percentile_approx, with different accuracies") {
 
     withTempView(table) {
-      (1 to 1000).toDF("col").createOrReplaceTempView(table)
+      val tableCount = 1000
+      (1 to tableCount).toDF("col").createOrReplaceTempView(table)
 
       // With different accuracies
-      val expectedPercentile = 250D
       val accuracies = Array(1, 10, 100, 1000, 10000)
-      val errors = accuracies.map { accuracy =>
-        val df = spark.sql(s"SELECT percentile_approx(col, 0.25, $accuracy) FROM $table")
-        val approximatePercentile = df.collect().head.getInt(0)
-        val error = Math.abs(approximatePercentile - expectedPercentile)
-        error
+      val expectedPercentiles = Array(100D, 200D, 250D, 314D, 777D)
+      for (accuracy <- accuracies) {
+        for (expectedPercentile <- expectedPercentiles) {
+          val df = spark.sql(
+            s"""SELECT
+               | percentile_approx(col, $expectedPercentile/$tableCount, $accuracy)
+               |FROM $table
+             """.stripMargin)
+          val approximatePercentile = df.collect().head.getInt(0)
+          val error = Math.abs(approximatePercentile - expectedPercentile)
+          assert(error <= math.floor(tableCount.toDouble / accuracy.toDouble))
+        }
       }
-
-      // The larger accuracy value we use, the smaller error we get
-      assert(errors.sorted.sameElements(errors.reverse))
     }
   }
 
@@ -145,7 +149,7 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
     withTempView(table) {
       (1 to 1000).toDF("col").createOrReplaceTempView(table)
       checkAnswer(
-        spark.sql(s"SELECT percentile_approx(col, array(0.25 + 0.25D), 200 + 800D) FROM $table"),
+        spark.sql(s"SELECT percentile_approx(col, array(0.25 + 0.25D), 200 + 800) FROM $table"),
         Row(Seq(499))
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
index 3fcb9892800b6..07afd4195c3d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql
 
 import org.apache.spark.internal.config.Tests.IS_TESTING
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator}
+import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodeGenerator}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.test.SharedSparkSession
@@ -43,12 +43,12 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
     }
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     RuleExecutor.resetMetrics()
   }
 
-  protected def checkGeneratedCode(plan: SparkPlan): Unit = {
+  protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
     val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
     plan foreach {
       case s: WholeStageCodegenExec =>
@@ -57,7 +57,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
     }
     codegenSubtrees.toSeq.foreach { subtree =>
       val code = subtree.doCodeGen()._2
-      try {
+      val (_, ByteCodeStats(maxMethodCodeSize, _, _)) = try {
         // Just check the generated code can be properly compiled
         CodeGenerator.compile(code)
       } catch {
@@ -72,6 +72,11 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
              """.stripMargin
           throw new Exception(msg, e)
       }
+
+      assert(!checkMethodCodeSize ||
+          maxMethodCodeSize <= CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT,
+        s"too long generated codes found in the WholeStageCodegenExec subtree (id=${subtree.id}) " +
+          s"and JIT optimization might not work:\n${subtree.treeString}")
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 6e1ee6da9200d..cd2c681dd7e0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -27,7 +27,9 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, Join, JoinStrategyHint, SHUFFLE_HASH}
-import org.apache.spark.sql.execution.{RDDScanExec, SparkPlan}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants
+import org.apache.spark.sql.execution.{ExecSubqueryExpression, RDDScanExec, SparkPlan}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar._
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.functions._
@@ -36,11 +38,14 @@ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
 import org.apache.spark.storage.StorageLevel.{MEMORY_AND_DISK_2, MEMORY_ONLY}
+import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.{AccumulatorContext, Utils}
 
 private case class BigData(s: String)
 
-class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSession {
+class CachedTableSuite extends QueryTest with SQLTestUtils
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   setupTestData()
@@ -87,16 +92,25 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
     sum
   }
 
+  private def getNumInMemoryTablesInSubquery(plan: SparkPlan): Int = {
+    plan.expressions.flatMap(_.collect {
+      case sub: ExecSubqueryExpression => getNumInMemoryTablesRecursively(sub.plan)
+    }).sum
+  }
+
   private def getNumInMemoryTablesRecursively(plan: SparkPlan): Int = {
-    plan.collect {
-      case InMemoryTableScanExec(_, _, relation) =>
-        getNumInMemoryTablesRecursively(relation.cachedPlan) + 1
+    collect(plan) {
+      case inMemoryTable @ InMemoryTableScanExec(_, _, relation) =>
+        getNumInMemoryTablesRecursively(relation.cachedPlan) +
+          getNumInMemoryTablesInSubquery(inMemoryTable) + 1
+      case p =>
+        getNumInMemoryTablesInSubquery(p)
     }.sum
   }
 
   test("cache temp table") {
     withTempView("tempTable") {
-      testData.select('key).createOrReplaceTempView("tempTable")
+      testData.select("key").createOrReplaceTempView("tempTable")
       assertCached(sql("SELECT COUNT(*) FROM tempTable"), 0)
       spark.catalog.cacheTable("tempTable")
       assertCached(sql("SELECT COUNT(*) FROM tempTable"))
@@ -127,8 +141,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
   }
 
   test("uncaching temp table") {
-    testData.select('key).createOrReplaceTempView("tempTable1")
-    testData.select('key).createOrReplaceTempView("tempTable2")
+    testData.select("key").createOrReplaceTempView("tempTable1")
+    testData.select("key").createOrReplaceTempView("tempTable2")
     spark.catalog.cacheTable("tempTable1")
 
     assertCached(sql("SELECT COUNT(*) FROM tempTable1"))
@@ -361,15 +375,15 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
   }
 
   test("Drops temporary table") {
-    testData.select('key).createOrReplaceTempView("t1")
+    testData.select("key").createOrReplaceTempView("t1")
     spark.table("t1")
     spark.catalog.dropTempView("t1")
     intercept[AnalysisException](spark.table("t1"))
   }
 
   test("Drops cached temporary table") {
-    testData.select('key).createOrReplaceTempView("t1")
-    testData.select('key).createOrReplaceTempView("t2")
+    testData.select("key").createOrReplaceTempView("t1")
+    testData.select("key").createOrReplaceTempView("t2")
     spark.catalog.cacheTable("t1")
 
     assert(spark.catalog.isCached("t1"))
@@ -464,7 +478,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
    */
   private def verifyNumExchanges(df: DataFrame, expected: Int): Unit = {
     assert(
-      df.queryExecution.executedPlan.collect { case e: ShuffleExchangeExec => e }.size == expected)
+      collect(df.queryExecution.executedPlan) { case e: ShuffleExchangeExec => e }.size == expected)
   }
 
   test("A cached table preserves the partitioning and ordering of its cached SparkPlan") {
@@ -515,7 +529,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
       val query = sql("SELECT key, value, a, b FROM t1 t1 JOIN t2 t2 ON t1.key = t2.a")
       verifyNumExchanges(query, 1)
-      assert(query.queryExecution.executedPlan.outputPartitioning.numPartitions === 6)
+      assert(stripAQEPlan(query.queryExecution.executedPlan).outputPartitioning.numPartitions === 6)
       checkAnswer(
         query,
         testData.join(testData2, $"key" === $"a").select($"key", $"value", $"a", $"b"))
@@ -532,7 +546,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
       val query = sql("SELECT key, value, a, b FROM t1 t1 JOIN t2 t2 ON t1.key = t2.a")
       verifyNumExchanges(query, 1)
-      assert(query.queryExecution.executedPlan.outputPartitioning.numPartitions === 6)
+      assert(stripAQEPlan(query.queryExecution.executedPlan).outputPartitioning.numPartitions === 6)
       checkAnswer(
         query,
         testData.join(testData2, $"key" === $"a").select($"key", $"value", $"a", $"b"))
@@ -548,7 +562,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
       val query = sql("SELECT key, value, a, b FROM t1 t1 JOIN t2 t2 ON t1.key = t2.a")
       verifyNumExchanges(query, 1)
-      assert(query.queryExecution.executedPlan.outputPartitioning.numPartitions === 12)
+      assert(stripAQEPlan(query.queryExecution.executedPlan).
+        outputPartitioning.numPartitions === 12)
       checkAnswer(
         query,
         testData.join(testData2, $"key" === $"a").select($"key", $"value", $"a", $"b"))
@@ -603,7 +618,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
       val query =
         sql("SELECT key, value, a, b FROM t1 t1 JOIN t2 t2 ON t1.key = t2.a and t1.value = t2.b")
       verifyNumExchanges(query, 1)
-      assert(query.queryExecution.executedPlan.outputPartitioning.numPartitions === 6)
+      assert(stripAQEPlan(query.queryExecution.executedPlan).outputPartitioning.numPartitions === 6)
       checkAnswer(
         query,
         df1.join(df2, $"key" === $"a" && $"value" === $"b").select($"key", $"value", $"a", $"b"))
@@ -849,7 +864,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
     sparkContext.addSparkListener(jobListener)
     try {
       val result = f
-      sparkContext.listenerBus.waitUntilEmpty(10000L)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(numJobTrigered === 0)
       result
     } finally {
@@ -859,7 +874,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
   test("SPARK-23880 table cache should be lazy and don't trigger any jobs") {
     val cachedData = checkIfNoJobTriggered {
-      spark.range(1002).filter('id > 1000).orderBy('id.desc).cache()
+      spark.range(1002).filter($"id" > 1000).orderBy($"id".desc).cache()
     }
     assert(cachedData.collect === Seq(1001))
   }
@@ -891,7 +906,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
   test("SPARK-24596 Non-cascading Cache Invalidation - drop persistent view") {
     withTable("t") {
-      spark.range(1, 10).toDF("key").withColumn("value", 'key * 2)
+      spark.range(1, 10).toDF("key").withColumn("value", $"key" * 2)
         .write.format("json").saveAsTable("t")
       withView("t1") {
         withTempView("t2") {
@@ -911,7 +926,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
 
   test("SPARK-24596 Non-cascading Cache Invalidation - uncache table") {
     withTable("t") {
-      spark.range(1, 10).toDF("key").withColumn("value", 'key * 2)
+      spark.range(1, 10).toDF("key").withColumn("value", $"key" * 2)
         .write.format("json").saveAsTable("t")
       withTempView("t1", "t2") {
         sql("CACHE TABLE t")
@@ -1094,4 +1109,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
       }
     }
   }
+
+  test("cache supports for intervals") {
+    withTable("interval_cache") {
+      Seq((1, "1 second"), (2, "2 seconds"), (2, null))
+        .toDF("k", "v").write.saveAsTable("interval_cache")
+      sql("CACHE TABLE t1 AS SELECT k, cast(v as interval) FROM interval_cache")
+      assert(spark.catalog.isCached("t1"))
+      checkAnswer(sql("SELECT * FROM t1 WHERE k = 1"),
+        Row(1, new CalendarInterval(0, 0, DateTimeConstants.MICROS_PER_SECOND)))
+      sql("UNCACHE TABLE t1")
+      assert(!spark.catalog.isCached("t1"))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index a52c6d503d147..a9ee25b10dc02 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat}
 import org.scalatest.Matchers._
 
-import org.apache.spark.sql.catalyst.expressions.NamedExpression
+import org.apache.spark.sql.catalyst.expressions.{In, InSet, NamedExpression}
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -454,25 +454,36 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("isInCollection: Scala Collection") {
     val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
-    // Test with different types of collections
-    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
-      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
-    checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
-      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
-    checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
-      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
-    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
-      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
 
-    val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
+    Seq(1, 2).foreach { conf =>
+      withSQLConf(SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> conf.toString) {
+        if (conf <= 1) {
+          assert($"a".isInCollection(Seq(3, 1)).expr.isInstanceOf[InSet], "Expect expr to be InSet")
+        } else {
+          assert($"a".isInCollection(Seq(3, 1)).expr.isInstanceOf[In], "Expect expr to be In")
+        }
 
-    val e = intercept[AnalysisException] {
-      df2.filter($"a".isInCollection(Seq($"b")))
-    }
-    Seq("cannot resolve", "due to data type mismatch: Arguments must be same type but were")
-      .foreach { s =>
-        assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
+        // Test with different types of collections
+        checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
+          df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+        checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
+          df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
+        checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
+          df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
+        checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
+          df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+
+        val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
+
+        val e = intercept[AnalysisException] {
+          df2.filter($"a".isInCollection(Seq($"b")))
+        }
+        Seq("cannot resolve",
+          "due to data type mismatch: Arguments must be same type but were").foreach { s =>
+            assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
+          }
       }
+    }
   }
 
   test("&&") {
@@ -526,12 +537,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("sqrt") {
     checkAnswer(
-      testData.select(sqrt('key)).orderBy('key.asc),
+      testData.select(sqrt($"key")).orderBy($"key".asc),
       (1 to 100).map(n => Row(math.sqrt(n)))
     )
 
     checkAnswer(
-      testData.select(sqrt('value), 'key).orderBy('key.asc, 'value.asc),
+      testData.select(sqrt($"value"), $"key").orderBy($"key".asc, $"value".asc),
       (1 to 100).map(n => Row(math.sqrt(n), n))
     )
 
@@ -543,12 +554,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("upper") {
     checkAnswer(
-      lowerCaseData.select(upper('l)),
+      lowerCaseData.select(upper($"l")),
       ('a' to 'd').map(c => Row(c.toString.toUpperCase(Locale.ROOT)))
     )
 
     checkAnswer(
-      testData.select(upper('value), 'key),
+      testData.select(upper($"value"), $"key"),
       (1 to 100).map(n => Row(n.toString, n))
     )
 
@@ -564,12 +575,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("lower") {
     checkAnswer(
-      upperCaseData.select(lower('L)),
+      upperCaseData.select(lower($"L")),
       ('A' to 'F').map(c => Row(c.toString.toLowerCase(Locale.ROOT)))
     )
 
     checkAnswer(
-      testData.select(lower('value), 'key),
+      testData.select(lower($"value"), $"key"),
       (1 to 100).map(n => Row(n.toString, n))
     )
 
@@ -742,8 +753,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("columns can be compared") {
-    assert('key.desc == 'key.desc)
-    assert('key.desc != 'key.asc)
+    assert($"key".desc == $"key".desc)
+    assert($"key".desc != $"key".asc)
   }
 
   test("alias with metadata") {
@@ -806,7 +817,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("randn") {
-    val randCol = testData.select('key, randn(5L).as("rand"))
+    val randCol = testData.select($"key", randn(5L).as("rand"))
     randCol.columns.length should be (2)
     val rows = randCol.collect()
     rows.foreach { row =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
index 4d0eb04be751b..6b503334f9f23 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
@@ -23,14 +23,14 @@ import org.apache.spark.sql.test.SharedSparkSession
 
 class ComplexTypesSuite extends QueryTest with SharedSparkSession {
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     spark.range(10).selectExpr(
       "id + 1 as i1", "id + 2 as i2", "id + 3 as i3", "id + 4 as i4", "id + 5 as i5")
       .write.saveAsTable("tab")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       spark.sql("DROP TABLE IF EXISTS tab")
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
index 431e797e1686e..c3dbbb325d842 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
@@ -41,7 +41,7 @@ class ConfigBehaviorSuite extends QueryTest with SharedSparkSession {
       // Trigger a sort
       // Range has range partitioning in its output now. To have a range shuffle, we
       // need to run a repartition first.
-      val data = spark.range(0, n, 1, 1).repartition(10).sort('id.desc)
+      val data = spark.range(0, n, 1, 1).repartition(10).sort($"id".desc)
         .selectExpr("SPARK_PARTITION_ID() pid", "id").as[(Int, Long)].collect()
 
       // Compute histogram for the number of records per partition post sort
@@ -53,7 +53,11 @@ class ConfigBehaviorSuite extends QueryTest with SharedSparkSession {
         dist)
     }
 
-    withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> numPartitions.toString) {
+    // When enable AQE, the post partition number is changed.
+    // And the ChiSquareTest result is also need updated. So disable AQE.
+    withSQLConf(
+        SQLConf.SHUFFLE_PARTITIONS.key -> numPartitions.toString,
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       // The default chi-sq value should be low
       assert(computeChiSquareTest() < 100)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 52cf91cfade51..61f0e138cc358 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
 import java.util.Locale
 
@@ -152,7 +153,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
   test("infers schemas of a CSV string and pass to to from_csv") {
     val in = Seq("""0.123456789,987654321,"San Francisco"""").toDS()
     val options = Map.empty[String, String].asJava
-    val out = in.select(from_csv('value, schema_of_csv("0.1,1,a"), options) as "parsed")
+    val out = in.select(from_csv($"value", schema_of_csv("0.1,1,a"), options) as "parsed")
     val expected = StructType(Seq(StructField(
       "parsed",
       StructType(Seq(
@@ -181,4 +182,22 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(df, Row(Row(java.sql.Timestamp.valueOf("2018-11-06 18:00:00.0"))))
     }
   }
+
+  test("special timestamp values") {
+    Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue =>
+      val input = Seq(specialValue).toDS()
+      val readback = input.select(from_csv($"value", lit("t timestamp"),
+        Map.empty[String, String].asJava)).collect()
+      assert(readback(0).getAs[Row](0).getAs[Timestamp](0).getTime >= 0)
+    }
+  }
+
+  test("special date values") {
+    Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue =>
+      val input = Seq(specialValue).toDS()
+      val readback = input.select(from_csv($"value", lit("d date"),
+        Map.empty[String, String].asJava)).collect()
+      assert(readback(0).getAs[Row](0).getAs[Date](0).getTime >= 0)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index ec7b636c8f695..d7df75fd0e2c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -22,6 +22,7 @@ import scala.util.Random
 import org.scalatest.Matchers.the
 
 import org.apache.spark.sql.execution.WholeStageCodegenExec
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.expressions.Window
@@ -29,11 +30,14 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.test.SQLTestData.DecimalData
-import org.apache.spark.sql.types.DecimalType
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 
 case class Fact(date: Int, hour: Int, minute: Int, room_name: String, temp: Double)
 
-class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
+class DataFrameAggregateSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   val absTol = 1e-8
@@ -44,7 +48,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
       Seq(Row(1, 3), Row(2, 3), Row(3, 3))
     )
     checkAnswer(
-      testData2.groupBy("a").agg(sum($"b").as("totB")).agg(sum('totB)),
+      testData2.groupBy("a").agg(sum($"b").as("totB")).agg(sum($"totB")),
       Row(9)
     )
     checkAnswer(
@@ -110,7 +114,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
     val df = Seq(("some[thing]", "random-string")).toDF("key", "val")
 
     checkAnswer(
-      df.groupBy(regexp_extract('key, "([a-z]+)\\[", 1)).count(),
+      df.groupBy(regexp_extract($"key", "([a-z]+)\\[", 1)).count(),
       Row("some", 1) :: Nil
     )
   }
@@ -276,7 +280,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
 
   test("agg without groups") {
     checkAnswer(
-      testData2.agg(sum('b)),
+      testData2.agg(sum($"b")),
       Row(9)
     )
   }
@@ -290,52 +294,53 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
 
   test("average") {
     checkAnswer(
-      testData2.agg(avg('a), mean('a)),
+      testData2.agg(avg($"a"), mean($"a")),
       Row(2.0, 2.0))
 
     checkAnswer(
-      testData2.agg(avg('a), sumDistinct('a)), // non-partial
+      testData2.agg(avg($"a"), sumDistinct($"a")), // non-partial
       Row(2.0, 6.0) :: Nil)
 
     checkAnswer(
-      decimalData.agg(avg('a)),
+      decimalData.agg(avg($"a")),
       Row(new java.math.BigDecimal(2)))
 
     checkAnswer(
-      decimalData.agg(avg('a), sumDistinct('a)), // non-partial
+      decimalData.agg(avg($"a"), sumDistinct($"a")), // non-partial
       Row(new java.math.BigDecimal(2), new java.math.BigDecimal(6)) :: Nil)
 
     checkAnswer(
-      decimalData.agg(avg('a cast DecimalType(10, 2))),
+      decimalData.agg(avg($"a" cast DecimalType(10, 2))),
       Row(new java.math.BigDecimal(2)))
     // non-partial
     checkAnswer(
-      decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))),
+      decimalData.agg(
+        avg($"a" cast DecimalType(10, 2)), sumDistinct($"a" cast DecimalType(10, 2))),
       Row(new java.math.BigDecimal(2), new java.math.BigDecimal(6)) :: Nil)
   }
 
   test("null average") {
     checkAnswer(
-      testData3.agg(avg('b)),
+      testData3.agg(avg($"b")),
       Row(2.0))
 
     checkAnswer(
-      testData3.agg(avg('b), countDistinct('b)),
+      testData3.agg(avg($"b"), countDistinct($"b")),
       Row(2.0, 1))
 
     checkAnswer(
-      testData3.agg(avg('b), sumDistinct('b)), // non-partial
+      testData3.agg(avg($"b"), sumDistinct($"b")), // non-partial
       Row(2.0, 2.0))
   }
 
   test("zero average") {
     val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
     checkAnswer(
-      emptyTableData.agg(avg('a)),
+      emptyTableData.agg(avg($"a")),
       Row(null))
 
     checkAnswer(
-      emptyTableData.agg(avg('a), sumDistinct('b)), // non-partial
+      emptyTableData.agg(avg($"a"), sumDistinct($"b")), // non-partial
       Row(null, null))
   }
 
@@ -343,28 +348,29 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
     assert(testData2.count() === testData2.rdd.map(_ => 1).count())
 
     checkAnswer(
-      testData2.agg(count('a), sumDistinct('a)), // non-partial
+      testData2.agg(count($"a"), sumDistinct($"a")), // non-partial
       Row(6, 6.0))
   }
 
   test("null count") {
     checkAnswer(
-      testData3.groupBy('a).agg(count('b)),
+      testData3.groupBy($"a").agg(count($"b")),
       Seq(Row(1, 0), Row(2, 1))
     )
 
     checkAnswer(
-      testData3.groupBy('a).agg(count('a + 'b)),
+      testData3.groupBy($"a").agg(count($"a" + $"b")),
       Seq(Row(1, 0), Row(2, 1))
     )
 
     checkAnswer(
-      testData3.agg(count('a), count('b), count(lit(1)), countDistinct('a), countDistinct('b)),
+      testData3.agg(
+        count($"a"), count($"b"), count(lit(1)), countDistinct($"a"), countDistinct($"b")),
       Row(2, 1, 2, 2, 1)
     )
 
     checkAnswer(
-      testData3.agg(count('b), countDistinct('b), sumDistinct('b)), // non-partial
+      testData3.agg(count($"b"), countDistinct($"b"), sumDistinct($"b")), // non-partial
       Row(1, 1, 2)
     )
   }
@@ -379,17 +385,17 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
       .toDF("key1", "key2", "key3")
 
     checkAnswer(
-      df1.agg(countDistinct('key1, 'key2)),
+      df1.agg(countDistinct($"key1", $"key2")),
       Row(3)
     )
 
     checkAnswer(
-      df1.agg(countDistinct('key1, 'key2, 'key3)),
+      df1.agg(countDistinct($"key1", $"key2", $"key3")),
       Row(3)
     )
 
     checkAnswer(
-      df1.groupBy('key1).agg(countDistinct('key2, 'key3)),
+      df1.groupBy($"key1").agg(countDistinct($"key2", $"key3")),
       Seq(Row("a", 2), Row("x", 1))
     )
   }
@@ -397,14 +403,14 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
   test("zero count") {
     val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
     checkAnswer(
-      emptyTableData.agg(count('a), sumDistinct('a)), // non-partial
+      emptyTableData.agg(count($"a"), sumDistinct($"a")), // non-partial
       Row(0, null))
   }
 
   test("stddev") {
     val testData2ADev = math.sqrt(4.0 / 5.0)
     checkAnswer(
-      testData2.agg(stddev('a), stddev_pop('a), stddev_samp('a)),
+      testData2.agg(stddev($"a"), stddev_pop($"a"), stddev_samp($"a")),
       Row(testData2ADev, math.sqrt(4 / 6.0), testData2ADev))
     checkAnswer(
       testData2.agg(stddev("a"), stddev_pop("a"), stddev_samp("a")),
@@ -414,47 +420,47 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
   test("zero stddev") {
     val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
     checkAnswer(
-    emptyTableData.agg(stddev('a), stddev_pop('a), stddev_samp('a)),
+    emptyTableData.agg(stddev($"a"), stddev_pop($"a"), stddev_samp($"a")),
     Row(null, null, null))
   }
 
   test("zero sum") {
     val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
     checkAnswer(
-      emptyTableData.agg(sum('a)),
+      emptyTableData.agg(sum($"a")),
       Row(null))
   }
 
   test("zero sum distinct") {
     val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
     checkAnswer(
-      emptyTableData.agg(sumDistinct('a)),
+      emptyTableData.agg(sumDistinct($"a")),
       Row(null))
   }
 
   test("moments") {
 
-    val sparkVariance = testData2.agg(variance('a))
+    val sparkVariance = testData2.agg(variance($"a"))
     checkAggregatesWithTol(sparkVariance, Row(4.0 / 5.0), absTol)
 
-    val sparkVariancePop = testData2.agg(var_pop('a))
+    val sparkVariancePop = testData2.agg(var_pop($"a"))
     checkAggregatesWithTol(sparkVariancePop, Row(4.0 / 6.0), absTol)
 
-    val sparkVarianceSamp = testData2.agg(var_samp('a))
+    val sparkVarianceSamp = testData2.agg(var_samp($"a"))
     checkAggregatesWithTol(sparkVarianceSamp, Row(4.0 / 5.0), absTol)
 
-    val sparkSkewness = testData2.agg(skewness('a))
+    val sparkSkewness = testData2.agg(skewness($"a"))
     checkAggregatesWithTol(sparkSkewness, Row(0.0), absTol)
 
-    val sparkKurtosis = testData2.agg(kurtosis('a))
+    val sparkKurtosis = testData2.agg(kurtosis($"a"))
     checkAggregatesWithTol(sparkKurtosis, Row(-1.5), absTol)
   }
 
   test("zero moments") {
     val input = Seq((1, 2)).toDF("a", "b")
     checkAnswer(
-      input.agg(stddev('a), stddev_samp('a), stddev_pop('a), variance('a),
-        var_samp('a), var_pop('a), skewness('a), kurtosis('a)),
+      input.agg(stddev($"a"), stddev_samp($"a"), stddev_pop($"a"), variance($"a"),
+        var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")),
       Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0,
         Double.NaN, Double.NaN))
 
@@ -474,8 +480,8 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
 
   test("null moments") {
     val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
-    checkAnswer(
-      emptyTableData.agg(variance('a), var_samp('a), var_pop('a), skewness('a), kurtosis('a)),
+    checkAnswer(emptyTableData.agg(
+      variance($"a"), var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")),
       Row(null, null, null, null, null))
 
     checkAnswer(
@@ -546,6 +552,14 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
     )
   }
 
+  test("collect functions should be able to cast to array type with no null values") {
+    val df = Seq(1, 2).toDF("a")
+    checkAnswer(df.select(collect_list("a") cast ArrayType(IntegerType, false)),
+      Seq(Row(Seq(1, 2))))
+    checkAnswer(df.select(collect_set("a") cast ArrayType(FloatType, false)),
+      Seq(Row(Seq(1.0, 2.0))))
+  }
+
   test("SPARK-14664: Decimal sum/avg over window should work.") {
     checkAnswer(
       spark.sql("select sum(a) over () from values 1.0, 2.0, 3.0 T(a)"),
@@ -557,7 +571,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
 
   test("SQL decimal test (used for catching certain decimal handling bugs in aggregates)") {
     checkAnswer(
-      decimalData.groupBy('a cast DecimalType(10, 2)).agg(avg('b cast DecimalType(10, 2))),
+      decimalData.groupBy($"a" cast DecimalType(10, 2)).agg(avg($"b" cast DecimalType(10, 2))),
       Seq(Row(new java.math.BigDecimal(1), new java.math.BigDecimal("1.5")),
         Row(new java.math.BigDecimal(2), new java.math.BigDecimal("1.5")),
         Row(new java.math.BigDecimal(3), new java.math.BigDecimal("1.5"))))
@@ -607,26 +621,27 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
 
         // test case for HashAggregate
         val hashAggDF = df.groupBy("x").agg(c, sum("y"))
+        hashAggDF.collect()
         val hashAggPlan = hashAggDF.queryExecution.executedPlan
         if (wholeStage) {
-          assert(hashAggPlan.find {
+          assert(find(hashAggPlan) {
             case WholeStageCodegenExec(_: HashAggregateExec) => true
             case _ => false
           }.isDefined)
         } else {
-          assert(hashAggPlan.isInstanceOf[HashAggregateExec])
+          assert(stripAQEPlan(hashAggPlan).isInstanceOf[HashAggregateExec])
         }
-        hashAggDF.collect()
 
         // test case for ObjectHashAggregate and SortAggregate
         val objHashAggOrSortAggDF = df.groupBy("x").agg(c, collect_list("y"))
-        val objHashAggOrSortAggPlan = objHashAggOrSortAggDF.queryExecution.executedPlan
+        objHashAggOrSortAggDF.collect()
+        val objHashAggOrSortAggPlan =
+          stripAQEPlan(objHashAggOrSortAggDF.queryExecution.executedPlan)
         if (useObjectHashAgg) {
           assert(objHashAggOrSortAggPlan.isInstanceOf[ObjectHashAggregateExec])
         } else {
           assert(objHashAggOrSortAggPlan.isInstanceOf[SortAggregateExec])
         }
-        objHashAggOrSortAggDF.collect()
       }
     }
   }
@@ -644,7 +659,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
       testData2.groupBy(lit(3), lit(4)).agg(lit(6), lit(7), sum("b")),
       Seq(Row(3, 4, 6, 7, 9)))
     checkAnswer(
-      testData2.groupBy(lit(3), lit(4)).agg(lit(6), 'b, sum("b")),
+      testData2.groupBy(lit(3), lit(4)).agg(lit(6), $"b", sum("b")),
       Seq(Row(3, 4, 6, 1, 3), Row(3, 4, 6, 2, 6)))
 
     checkAnswer(
@@ -667,17 +682,17 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
         .groupBy("a").agg(collect_list("f").as("g"))
       val aggPlan = objHashAggDF.queryExecution.executedPlan
 
-      val sortAggPlans = aggPlan.collect {
+      val sortAggPlans = collect(aggPlan) {
         case sortAgg: SortAggregateExec => sortAgg
       }
       assert(sortAggPlans.isEmpty)
 
-      val objHashAggPlans = aggPlan.collect {
+      val objHashAggPlans = collect(aggPlan) {
         case objHashAgg: ObjectHashAggregateExec => objHashAgg
       }
       assert(objHashAggPlans.nonEmpty)
 
-      val exchangePlans = aggPlan.collect {
+      val exchangePlans = collect(aggPlan) {
         case shuffle: ShuffleExchangeExec => shuffle
       }
       assert(exchangePlans.length == 1)
@@ -707,14 +722,14 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
       assert(thrownException.message.contains("not allowed to use a window function"))
     }
 
-    checkWindowError(testData2.select(min(avg('b).over(Window.partitionBy('a)))))
-    checkWindowError(testData2.agg(sum('b), max(rank().over(Window.orderBy('a)))))
-    checkWindowError(testData2.groupBy('a).agg(sum('b), max(rank().over(Window.orderBy('b)))))
-    checkWindowError(testData2.groupBy('a).agg(max(sum(sum('b)).over(Window.orderBy('a)))))
-    checkWindowError(
-      testData2.groupBy('a).agg(sum('b).as("s"), max(count("*").over())).where('s === 3))
-    checkAnswer(
-      testData2.groupBy('a).agg(max('b), sum('b).as("s"), count("*").over()).where('s === 3),
+    checkWindowError(testData2.select(min(avg($"b").over(Window.partitionBy($"a")))))
+    checkWindowError(testData2.agg(sum($"b"), max(rank().over(Window.orderBy($"a")))))
+    checkWindowError(testData2.groupBy($"a").agg(sum($"b"), max(rank().over(Window.orderBy($"b")))))
+    checkWindowError(testData2.groupBy($"a").agg(max(sum(sum($"b")).over(Window.orderBy($"a")))))
+    checkWindowError(testData2.groupBy($"a").agg(
+      sum($"b").as("s"), max(count("*").over())).where($"s" === 3))
+    checkAnswer(testData2.groupBy($"a").agg(
+      max($"b"), sum($"b").as("s"), count("*").over()).where($"s" === 3),
       Row(1, 2, 3, 3) :: Row(2, 2, 3, 3) :: Row(3, 2, 3, 3) :: Nil)
 
     checkWindowError(sql("SELECT MIN(AVG(b) OVER(PARTITION BY a)) FROM testData2"))
@@ -730,7 +745,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") {
     // Checks if these raise no exception
-    assert(testData.groupBy('key).toString.contains(
+    assert(testData.groupBy($"key").toString.contains(
       "[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
     assert(testData.groupBy(col("key")).toString.contains(
       "[grouping expressions: [key], value: [key: int, value: string], type: GroupBy]"))
@@ -942,4 +957,17 @@ class DataFrameAggregateSuite extends QueryTest with SharedSparkSession {
       assert(error.message.contains("function count_if requires boolean type"))
     }
   }
+
+  test("calendar interval agg support hash aggregate") {
+    val df1 = Seq((1, "1 day"), (2, "2 day"), (3, "3 day"), (3, null)).toDF("a", "b")
+    val df2 = df1.select(avg($"b" cast CalendarIntervalType))
+    checkAnswer(df2, Row(new CalendarInterval(0, 2, 0)) :: Nil)
+    assert(find(df2.queryExecution.executedPlan)(_.isInstanceOf[HashAggregateExec]).isDefined)
+    val df3 = df1.groupBy($"a").agg(avg($"b" cast CalendarIntervalType))
+    checkAnswer(df3,
+      Row(1, new CalendarInterval(0, 1, 0)) ::
+        Row(2, new CalendarInterval(0, 2, 0)) ::
+        Row(3, new CalendarInterval(0, 3, 0)) :: Nil)
+    assert(find(df3.queryExecution.executedPlan)(_.isInstanceOf[HashAggregateExec]).isDefined)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
index e9179a39d3b6d..4f25642906628 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
@@ -18,8 +18,12 @@
 package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.DefinedByConstructorParams
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.objects.MapObjects
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.ArrayType
 
 /**
  * A test suite to test DataFrame/SQL functionalities with complex types (i.e. array, struct, map).
@@ -64,6 +68,24 @@ class DataFrameComplexTypeSuite extends QueryTest with SharedSparkSession {
     val ds100_5 = Seq(S100_5()).toDS()
     ds100_5.rdd.count
   }
+
+  test("SPARK-29503 nest unsafe struct inside safe array") {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
+      val df = spark.sparkContext.parallelize(Seq(Seq(1, 2, 3))).toDF("items")
+
+      // items: Seq[Int] => items.map { item => Seq(Struct(item)) }
+      val result = df.select(
+        new Column(MapObjects(
+          (item: Expression) => array(struct(new Column(item))).expr,
+          $"items".expr,
+          df.schema("items").dataType.asInstanceOf[ArrayType].elementType
+        )) as "items"
+      ).collect()
+
+      assert(result.size === 1)
+      assert(result === Row(Seq(Seq(Row(1)), Seq(Row(2)), Seq(Row(3)))) :: Nil)
+    }
+  }
 }
 
 class S100(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 7d044638db571..f7531ea446015 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -278,15 +278,15 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   test("pmod") {
     val intData = Seq((7, 3), (-7, 3)).toDF("a", "b")
     checkAnswer(
-      intData.select(pmod('a, 'b)),
+      intData.select(pmod($"a", $"b")),
       Seq(Row(1), Row(2))
     )
     checkAnswer(
-      intData.select(pmod('a, lit(3))),
+      intData.select(pmod($"a", lit(3))),
       Seq(Row(1), Row(2))
     )
     checkAnswer(
-      intData.select(pmod(lit(-7), 'b)),
+      intData.select(pmod(lit(-7), $"b")),
       Seq(Row(2), Row(2))
     )
     checkAnswer(
@@ -303,7 +303,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
     val doubleData = Seq((7.2, 4.1)).toDF("a", "b")
     checkAnswer(
-      doubleData.select(pmod('a, 'b)),
+      doubleData.select(pmod($"a", $"b")),
       Seq(Row(3.1000000000000005)) // same as hive
     )
     checkAnswer(
@@ -312,6 +312,86 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
   }
 
+  test("array_sort with lambda functions") {
+
+    spark.udf.register("fAsc", (x: Int, y: Int) => {
+      if (x < y) -1
+      else if (x == y) 0
+      else 1
+    })
+
+    spark.udf.register("fDesc", (x: Int, y: Int) => {
+      if (x < y) 1
+      else if (x == y) 0
+      else -1
+    })
+
+    spark.udf.register("fString", (x: String, y: String) => {
+      if (x == null && y == null) 0
+      else if (x == null) 1
+      else if (y == null) -1
+      else if (x < y) 1
+      else if (x == y) 0
+      else -1
+    })
+
+    spark.udf.register("fStringLength", (x: String, y: String) => {
+      if (x == null && y == null) 0
+      else if (x == null) 1
+      else if (y == null) -1
+      else if (x.length < y.length) -1
+      else if (x.length == y.length) 0
+      else 1
+    })
+
+    val df1 = Seq(Array[Int](3, 2, 5, 1, 2)).toDF("a")
+    checkAnswer(
+      df1.selectExpr("array_sort(a, (x, y) -> fAsc(x, y))"),
+      Seq(
+        Row(Seq(1, 2, 2, 3, 5)))
+    )
+
+    checkAnswer(
+      df1.selectExpr("array_sort(a, (x, y) -> fDesc(x, y))"),
+      Seq(
+        Row(Seq(5, 3, 2, 2, 1)))
+    )
+
+    val df2 = Seq(Array[String]("bc", "ab", "dc")).toDF("a")
+    checkAnswer(
+      df2.selectExpr("array_sort(a, (x, y) -> fString(x, y))"),
+      Seq(
+        Row(Seq("dc", "bc", "ab")))
+    )
+
+    val df3 = Seq(Array[String]("a", "abcd", "abc")).toDF("a")
+    checkAnswer(
+      df3.selectExpr("array_sort(a, (x, y) -> fStringLength(x, y))"),
+      Seq(
+        Row(Seq("a", "abc", "abcd")))
+    )
+
+    val df4 = Seq((Array[Array[Int]](Array(2, 3, 1), Array(4, 2, 1, 4),
+      Array(1, 2)), "x")).toDF("a", "b")
+    checkAnswer(
+      df4.selectExpr("array_sort(a, (x, y) -> fAsc(cardinality(x), cardinality(y)))"),
+      Seq(
+        Row(Seq[Seq[Int]](Seq(1, 2), Seq(2, 3, 1), Seq(4, 2, 1, 4))))
+    )
+
+    val df5 = Seq(Array[String]("bc", null, "ab", "dc")).toDF("a")
+    checkAnswer(
+      df5.selectExpr("array_sort(a, (x, y) -> fString(x, y))"),
+      Seq(
+        Row(Seq("dc", "bc", "ab", null)))
+    )
+
+    spark.sql("drop temporary function fAsc")
+    spark.sql("drop temporary function fDesc")
+    spark.sql("drop temporary function fString")
+    spark.sql("drop temporary function fStringLength")
+  }
+
   test("sort_array/array_sort functions") {
     val df = Seq(
       (Array[Int](2, 1, 3), Array("b", "c", "a")),
@@ -383,7 +463,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     assert(intercept[AnalysisException] {
       df3.selectExpr("array_sort(a)").collect()
-    }.getMessage().contains("only supports array input"))
+    }.getMessage().contains("argument 1 requires array type, however, '`a`' is of string type"))
   }
 
   def testSizeOfArray(sizeOfNull: Any): Unit = {
@@ -520,7 +600,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     def testPrimitiveType(): Unit = {
-      checkAnswer(idf.select(map_entries('m)), iExpected)
+      checkAnswer(idf.select(map_entries($"m")), iExpected)
       checkAnswer(idf.selectExpr("map_entries(m)"), iExpected)
       checkAnswer(idf.selectExpr("map_entries(map(1, null, 2, null))"),
         Seq.fill(iExpected.length)(Row(Seq(Row(1, null), Row(2, null)))))
@@ -547,7 +627,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     def testNonPrimitiveType(): Unit = {
-      checkAnswer(sdf.select(map_entries('m)), sExpected)
+      checkAnswer(sdf.select(map_entries($"m")), sExpected)
       checkAnswer(sdf.selectExpr("map_entries(m)"), sExpected)
     }
 
@@ -572,7 +652,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     checkAnswer(df1.selectExpr("map_concat(map1, map2)"), expected1a)
-    checkAnswer(df1.select(map_concat('map1, 'map2)), expected1a)
+    checkAnswer(df1.select(map_concat($"map1", $"map2")), expected1a)
 
     val expected1b = Seq(
       Row(Map(1 -> 100, 2 -> 200)),
@@ -581,7 +661,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     checkAnswer(df1.selectExpr("map_concat(map1)"), expected1b)
-    checkAnswer(df1.select(map_concat('map1)), expected1b)
+    checkAnswer(df1.select(map_concat($"map1")), expected1b)
 
     val df2 = Seq(
       (
@@ -613,7 +693,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     checkAnswer(df3.selectExpr("map_concat(map1, map2)"), expected3)
-    checkAnswer(df3.select(map_concat('map1, 'map2)), expected3)
+    checkAnswer(df3.select(map_concat($"map1", $"map2")), expected3)
 
     val expectedMessage1 = "input to function map_concat should all be the same type"
 
@@ -622,7 +702,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }.getMessage().contains(expectedMessage1))
 
     assert(intercept[AnalysisException] {
-      df2.select(map_concat('map1, 'map2)).collect()
+      df2.select(map_concat($"map1", $"map2")).collect()
     }.getMessage().contains(expectedMessage1))
 
     val expectedMessage2 = "input to function map_concat should all be of type map"
@@ -632,7 +712,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }.getMessage().contains(expectedMessage2))
 
     assert(intercept[AnalysisException] {
-      df2.select(map_concat('map1, lit(12))).collect()
+      df2.select(map_concat($"map1", lit(12))).collect()
     }.getMessage().contains(expectedMessage2))
   }
 
@@ -651,7 +731,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(null))
 
     def testPrimitiveType(): Unit = {
-      checkAnswer(idf.select(map_from_entries('a)), iExpected)
+      checkAnswer(idf.select(map_from_entries($"a")), iExpected)
       checkAnswer(idf.selectExpr("map_from_entries(a)"), iExpected)
       checkAnswer(idf.selectExpr("map_from_entries(array(struct(1, null), struct(2, null)))"),
         Seq.fill(iExpected.length)(Row(Map(1 -> null, 2 -> null))))
@@ -679,7 +759,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(null))
 
     def testNonPrimitiveType(): Unit = {
-      checkAnswer(sdf.select(map_from_entries('a)), sExpected)
+      checkAnswer(sdf.select(map_from_entries($"a")), sExpected)
       checkAnswer(sdf.selectExpr("map_from_entries(a)"), sExpected)
     }
 
@@ -770,7 +850,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     val errorMsg1 =
       s"""
          |Input to function array_contains should have been array followed by a
-         |value with same element type, but it's [array<int>, decimal(29,29)].
+         |value with same element type, but it's [array<int>, decimal(38,29)].
        """.stripMargin.replace("\n", " ").trim()
     assert(e1.message.contains(errorMsg1))
 
@@ -785,6 +865,23 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     assert(e2.message.contains(errorMsg2))
   }
 
+  test("SPARK-29600: ArrayContains function may return incorrect result for DecimalType") {
+    checkAnswer(
+      sql("select array_contains(array(1.10), 1.1)"),
+      Seq(Row(true))
+    )
+
+    checkAnswer(
+      sql("SELECT array_contains(array(1.1), 1.10)"),
+      Seq(Row(true))
+    )
+
+    checkAnswer(
+      sql("SELECT array_contains(array(1.11), 1.1)"),
+      Seq(Row(false))
+    )
+  }
+
   test("arrays_overlap function") {
     val df = Seq(
       (Seq[Option[Int]](Some(1), Some(2)), Seq[Option[Int]](Some(-1), Some(10))),
@@ -899,8 +996,10 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("sequence") {
-    checkAnswer(Seq((-2, 2)).toDF().select(sequence('_1, '_2)), Seq(Row(Array(-2, -1, 0, 1, 2))))
-    checkAnswer(Seq((7, 2, -2)).toDF().select(sequence('_1, '_2, '_3)), Seq(Row(Array(7, 5, 3))))
+    checkAnswer(Seq((-2, 2)).toDF().select(sequence($"_1", $"_2")),
+      Seq(Row(Array(-2, -1, 0, 1, 2))))
+    checkAnswer(Seq((7, 2, -2)).toDF().select(sequence($"_1", $"_2", $"_3")),
+      Seq(Row(Array(7, 5, 3))))
 
     checkAnswer(
       spark.sql("select sequence(" +
@@ -926,7 +1025,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     // test type coercion
     checkAnswer(
-      Seq((1.toByte, 3L, 1)).toDF().select(sequence('_1, '_2, '_3)),
+      Seq((1.toByte, 3L, 1)).toDF().select(sequence($"_1", $"_2", $"_3")),
       Seq(Row(Array(1L, 2L, 3L))))
 
     checkAnswer(
@@ -954,9 +1053,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   test("reverse function - string") {
     val oneRowDF = Seq(("Spark", 3215)).toDF("s", "i")
     def testString(): Unit = {
-      checkAnswer(oneRowDF.select(reverse('s)), Seq(Row("krapS")))
+      checkAnswer(oneRowDF.select(reverse($"s")), Seq(Row("krapS")))
       checkAnswer(oneRowDF.selectExpr("reverse(s)"), Seq(Row("krapS")))
-      checkAnswer(oneRowDF.select(reverse('i)), Seq(Row("5123")))
+      checkAnswer(oneRowDF.select(reverse($"i")), Seq(Row("5123")))
       checkAnswer(oneRowDF.selectExpr("reverse(i)"), Seq(Row("5123")))
       checkAnswer(oneRowDF.selectExpr("reverse(null)"), Seq(Row(null)))
     }
@@ -978,7 +1077,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     def testArrayOfPrimitiveTypeNotContainsNull(): Unit = {
       checkAnswer(
-        idfNotContainsNull.select(reverse('i)),
+        idfNotContainsNull.select(reverse($"i")),
         Seq(Row(Seq(7, 8, 9, 1)), Row(Seq(2, 7, 9, 8, 5)), Row(Seq.empty), Row(null))
       )
       checkAnswer(
@@ -1004,7 +1103,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     def testArrayOfPrimitiveTypeContainsNull(): Unit = {
       checkAnswer(
-        idfContainsNull.select(reverse('i)),
+        idfContainsNull.select(reverse($"i")),
         Seq(Row(Seq(7, null, 8, 9, 1)), Row(Seq(2, 7, 9, 8, 5, null)), Row(Seq.empty), Row(null))
       )
       checkAnswer(
@@ -1030,7 +1129,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     def testArrayOfNonPrimitiveType(): Unit = {
       checkAnswer(
-        sdf.select(reverse('s)),
+        sdf.select(reverse($"s")),
         Seq(Row(Seq("b", "a", "c")), Row(Seq(null, "c", null, "b")), Row(Seq.empty), Row(null))
       )
       checkAnswer(
@@ -1735,7 +1834,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).toDF("i")
 
     def testArrayOfPrimitiveTypeNotContainsNull(): Unit = {
-      checkShuffleResult(idfNotContainsNull.select(shuffle('i)))
+      checkShuffleResult(idfNotContainsNull.select(shuffle($"i")))
       checkShuffleResult(idfNotContainsNull.selectExpr("shuffle(i)"))
     }
 
@@ -1755,7 +1854,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).toDF("i")
 
     def testArrayOfPrimitiveTypeContainsNull(): Unit = {
-      checkShuffleResult(idfContainsNull.select(shuffle('i)))
+      checkShuffleResult(idfContainsNull.select(shuffle($"i")))
       checkShuffleResult(idfContainsNull.selectExpr("shuffle(i)"))
     }
 
@@ -1775,7 +1874,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).toDF("s")
 
     def testNonPrimitiveType(): Unit = {
-      checkShuffleResult(sdf.select(shuffle('s)))
+      checkShuffleResult(sdf.select(shuffle($"s")))
       checkShuffleResult(sdf.selectExpr("shuffle(s)"))
     }
 
@@ -1930,6 +2029,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq(5, 9, 11, 10, 6)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(transform(col("i"), x => x + 1)),
+        Seq(
+          Row(Seq(2, 10, 9, 8)),
+          Row(Seq(6, 9, 10, 8, 3)),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("i"), (x, i) => x + i)),
+        Seq(
+          Row(Seq(1, 10, 10, 10)),
+          Row(Seq(5, 9, 11, 10, 6)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -1960,6 +2071,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq(5, null, 10, 12, 11, 7)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(transform(col("i"), x => x + 1)),
+        Seq(
+          Row(Seq(2, 10, 9, null, 8)),
+          Row(Seq(6, null, 9, 10, 8, 3)),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("i"), (x, i) => x + i)),
+        Seq(
+          Row(Seq(1, 10, 10, null, 11)),
+          Row(Seq(5, null, 10, 12, 11, 7)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -1990,6 +2113,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq("b0", null, "c2", null)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(transform(col("s"), x => concat(x, x))),
+        Seq(
+          Row(Seq("cc", "aa", "bb")),
+          Row(Seq("bb", null, "cc", null)),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("s"), (x, i) => concat(x, i))),
+        Seq(
+          Row(Seq("c0", "a1", "b2")),
+          Row(Seq("b0", null, "c2", null)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2034,6 +2169,32 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
             Seq("b", null, "c", null, null))),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(transform(col("arg"), arg => arg)),
+        Seq(
+          Row(Seq("c", "a", "b")),
+          Row(Seq("b", null, "c", null)),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("arg"), _ => col("arg"))),
+        Seq(
+          Row(Seq(Seq("c", "a", "b"), Seq("c", "a", "b"), Seq("c", "a", "b"))),
+          Row(Seq(
+            Seq("b", null, "c", null),
+            Seq("b", null, "c", null),
+            Seq("b", null, "c", null),
+            Seq("b", null, "c", null))),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("arg"), x => concat(col("arg"), array(x)))),
+        Seq(
+          Row(Seq(Seq("c", "a", "b", "c"), Seq("c", "a", "b", "a"), Seq("c", "a", "b", "b"))),
+          Row(Seq(
+            Seq("b", null, "c", null, "b"),
+            Seq("b", null, "c", null, null),
+            Seq("b", null, "c", null, "c"),
+            Seq("b", null, "c", null, null))),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2080,6 +2241,14 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Row(Map(), Map(1 -> -1, 2 -> -2, 3 -> -3)),
         Row(Map(1 -> 10), Map(3 -> -3))))
 
+    checkAnswer(dfInts.select(
+      map_filter(col("m"), (k, v) => k * 10 === v),
+      map_filter(col("m"), (k, v) => k === (v * -1))),
+      Seq(
+        Row(Map(1 -> 10, 2 -> 20, 3 -> 30), Map()),
+        Row(Map(), Map(1 -> -1, 2 -> -2, 3 -> -3)),
+        Row(Map(1 -> 10), Map(3 -> -3))))
+
     val dfComplex = Seq(
       Map(1 -> Seq(Some(1)), 2 -> Seq(Some(1), Some(2)), 3 -> Seq(Some(1), Some(2), Some(3))),
       Map(1 -> null, 2 -> Seq(Some(-2), Some(-2)), 3 -> Seq[Option[Int]](None))).toDF("m")
@@ -2090,6 +2259,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Row(Map(1 -> Seq(1)), Map(1 -> Seq(1), 2 -> Seq(1, 2), 3 -> Seq(1, 2, 3))),
         Row(Map(), Map(2 -> Seq(-2, -2)))))
 
+    checkAnswer(dfComplex.select(
+      map_filter(col("m"), (k, v) => k === element_at(v, 1)),
+      map_filter(col("m"), (k, v) => k === size(v))),
+      Seq(
+        Row(Map(1 -> Seq(1)), Map(1 -> Seq(1), 2 -> Seq(1, 2), 3 -> Seq(1, 2, 3))),
+        Row(Map(), Map(2 -> Seq(-2, -2)))))
+
     // Invalid use cases
     val df = Seq(
       (Map(1 -> "a"), 1),
@@ -2112,6 +2288,11 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 1 requires map type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(map_filter(col("i"), (k, v) => k > v))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 1 requires map type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("map_filter(a, (k, v) -> k > v)")
     }
@@ -2133,6 +2314,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq(8, 2)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(filter(col("i"), _ % 2 === 0)),
+        Seq(
+          Row(Seq(8)),
+          Row(Seq(8, 2)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2157,6 +2344,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq(8, 2)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(filter(col("i"), _ % 2 === 0)),
+        Seq(
+          Row(Seq(8)),
+          Row(Seq(8, 2)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2181,6 +2374,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq("b", "c")),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(filter(col("s"), x => x.isNotNull)),
+        Seq(
+          Row(Seq("c", "a", "b")),
+          Row(Seq("b", "c")),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2190,6 +2389,36 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     testNonPrimitiveType()
   }
 
+  test("filter function - index argument") {
+    val df = Seq(
+      Seq("c", "a", "b"),
+      Seq("b", null, "c", null),
+      Seq.empty,
+      null
+    ).toDF("s")
+
+    def testIndexArgument(): Unit = {
+      checkAnswer(df.selectExpr("filter(s, (x, i) -> i % 2 == 0)"),
+        Seq(
+          Row(Seq("c", "b")),
+          Row(Seq("b", "c")),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(filter(col("s"), (x, i) => i % 2 === 0)),
+        Seq(
+          Row(Seq("c", "b")),
+          Row(Seq("b", "c")),
+          Row(Seq.empty),
+          Row(null)))
+    }
+
+    // Test with local relation, the Project will be evaluated without codegen
+    testIndexArgument()
+    // Test with cached relation, the Project will be evaluated with codegen
+    df.cache()
+    testIndexArgument()
+  }
+
   test("filter function - invalid") {
     val df = Seq(
       (Seq("c", "a", "b"), 1),
@@ -2199,20 +2428,30 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).toDF("s", "i")
 
     val ex1 = intercept[AnalysisException] {
-      df.selectExpr("filter(s, (x, y) -> x + y)")
+      df.selectExpr("filter(s, (x, y, z) -> x + y)")
     }
-    assert(ex1.getMessage.contains("The number of lambda function arguments '2' does not match"))
+    assert(ex1.getMessage.contains("The number of lambda function arguments '3' does not match"))
 
     val ex2 = intercept[AnalysisException] {
       df.selectExpr("filter(i, x -> x)")
     }
     assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
 
+    val ex2a = intercept[AnalysisException] {
+      df.select(filter(col("i"), x => x))
+    }
+    assert(ex2a.getMessage.contains("data type mismatch: argument 1 requires array type"))
+
     val ex3 = intercept[AnalysisException] {
       df.selectExpr("filter(s, x -> x)")
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(filter(col("s"), x => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("filter(a, x -> x)")
     }
@@ -2234,6 +2473,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(false),
           Row(false),
           Row(null)))
+      checkAnswer(df.select(exists(col("i"), _ % 2 === 0)),
+        Seq(
+          Row(true),
+          Row(false),
+          Row(false),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2260,6 +2505,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(null),
           Row(false),
           Row(null)))
+      checkAnswer(df.select(exists(col("i"), _ % 2 === 0)),
+        Seq(
+          Row(true),
+          Row(false),
+          Row(null),
+          Row(false),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2284,6 +2536,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(false),
           Row(null)))
+      checkAnswer(df.select(exists(col("s"), x => x.isNull)),
+        Seq(
+          Row(false),
+          Row(true),
+          Row(false),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2311,11 +2569,21 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
 
+    val ex2a = intercept[AnalysisException] {
+      df.select(exists(col("i"), x => x))
+    }
+    assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
+
     val ex3 = intercept[AnalysisException] {
       df.selectExpr("exists(s, x -> x)")
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(exists(df("s"), x => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("exists(a, x -> x)")
     }
@@ -2337,6 +2605,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(true),
           Row(null)))
+      checkAnswer(df.select(forall(col("i"), x => x % 2 === 0)),
+        Seq(
+          Row(false),
+          Row(true),
+          Row(true),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2363,6 +2637,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(true),
           Row(null)))
+      checkAnswer(df.select(forall(col("i"), x => (x % 2 === 0) || x.isNull)),
+        Seq(
+          Row(false),
+          Row(true),
+          Row(true),
+          Row(true),
+          Row(null)))
       checkAnswer(df.selectExpr("forall(i, x -> x % 2 == 0)"),
         Seq(
           Row(false),
@@ -2370,6 +2651,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(true),
           Row(null)))
+      checkAnswer(df.select(forall(col("i"), x => x % 2 === 0)),
+        Seq(
+          Row(false),
+          Row(null),
+          Row(true),
+          Row(true),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2394,6 +2682,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(true),
           Row(null)))
+      checkAnswer(df.select(forall(col("s"), _.isNull)),
+        Seq(
+          Row(false),
+          Row(true),
+          Row(true),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2421,15 +2715,30 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
 
+    val ex2a = intercept[AnalysisException] {
+      df.select(forall(col("i"), x => x))
+    }
+    assert(ex2a.getMessage.contains("data type mismatch: argument 1 requires array type"))
+
     val ex3 = intercept[AnalysisException] {
       df.selectExpr("forall(s, x -> x)")
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(forall(col("s"), x => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("forall(a, x -> x)")
     }
     assert(ex4.getMessage.contains("cannot resolve '`a`'"))
+
+    val ex4a = intercept[AnalysisException] {
+      df.select(forall(col("a"), x => x))
+    }
+    assert(ex4a.getMessage.contains("cannot resolve '`a`'"))
   }
 
   test("aggregate function - array for primitive type not containing null") {
@@ -2453,6 +2762,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(310),
           Row(0),
           Row(null)))
+      checkAnswer(df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x)),
+        Seq(
+          Row(25),
+          Row(31),
+          Row(0),
+          Row(null)))
+      checkAnswer(df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x, _ * 10)),
+        Seq(
+          Row(250),
+          Row(310),
+          Row(0),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2484,6 +2805,20 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(0),
           Row(0),
           Row(null)))
+      checkAnswer(df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x)),
+        Seq(
+          Row(25),
+          Row(null),
+          Row(0),
+          Row(null)))
+      checkAnswer(
+        df.select(
+          aggregate(col("i"), lit(0), (acc, x) => acc + x, acc => coalesce(acc, lit(0)) * 10)),
+        Seq(
+          Row(250),
+          Row(0),
+          Row(0),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2515,6 +2850,21 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(""),
           Row("c"),
           Row(null)))
+      checkAnswer(df.select(aggregate(col("ss"), col("s"), (acc, x) => concat(acc, x))),
+        Seq(
+          Row("acab"),
+          Row(null),
+          Row("c"),
+          Row(null)))
+      checkAnswer(
+        df.select(
+          aggregate(col("ss"), col("s"), (acc, x) => concat(acc, x),
+            acc => coalesce(acc, lit("")))),
+        Seq(
+          Row("acab"),
+          Row(""),
+          Row("c"),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2547,11 +2897,21 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 1 requires array type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(aggregate(col("i"), lit(0), (acc, x) => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 1 requires array type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("aggregate(s, 0, (acc, x) -> x)")
     }
     assert(ex4.getMessage.contains("data type mismatch: argument 3 requires int type"))
 
+    val ex4a = intercept[AnalysisException] {
+      df.select(aggregate(col("s"), lit(0), (acc, x) => x))
+    }
+    assert(ex4a.getMessage.contains("data type mismatch: argument 3 requires int type"))
+
     val ex5 = intercept[AnalysisException] {
       df.selectExpr("aggregate(a, 0, (acc, x) -> x)")
     }
@@ -2572,6 +2932,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Row(Map(10 -> null, 8 -> false, 4 -> null)),
         Row(Map(5 -> null)),
         Row(null)))
+
+    checkAnswer(df.select(map_zip_with(df("m1"), df("m2"), (k, v1, v2) => k === v1 + v2)),
+      Seq(
+        Row(Map(8 -> true, 3 -> false, 6 -> true)),
+        Row(Map(10 -> null, 8 -> false, 4 -> null)),
+        Row(Map(5 -> null)),
+        Row(null)))
   }
 
   test("map_zip_with function - map of non-primitive types") {
@@ -2588,6 +2955,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Row(Map("b" -> Row("a", null), "c" -> Row("d", "a"), "d" -> Row(null, "k"))),
         Row(Map("a" -> Row("d", null))),
         Row(null)))
+
+    checkAnswer(df.select(map_zip_with(col("m1"), col("m2"), (k, v1, v2) => struct(v1, v2))),
+      Seq(
+        Row(Map("z" -> Row("a", "c"), "y" -> Row("b", null), "x" -> Row("c", "a"))),
+        Row(Map("b" -> Row("a", null), "c" -> Row("d", "a"), "d" -> Row(null, "k"))),
+        Row(Map("a" -> Row("d", null))),
+        Row(null)))
   }
 
   test("map_zip_with function - invalid") {
@@ -2606,16 +2980,32 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     assert(ex2.getMessage.contains("The input to function map_zip_with should have " +
       "been two maps with compatible key types"))
 
+    val ex2a = intercept[AnalysisException] {
+      df.select(map_zip_with(df("mis"), col("mmi"), (x, y, z) => concat(x, y, z)))
+    }
+    assert(ex2a.getMessage.contains("The input to function map_zip_with should have " +
+      "been two maps with compatible key types"))
+
     val ex3 = intercept[AnalysisException] {
       df.selectExpr("map_zip_with(i, mis, (x, y, z) -> concat(x, y, z))")
     }
     assert(ex3.getMessage.contains("type mismatch: argument 1 requires map type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(map_zip_with(col("i"), col("mis"), (x, y, z) => concat(x, y, z)))
+    }
+    assert(ex3a.getMessage.contains("type mismatch: argument 1 requires map type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("map_zip_with(mis, i, (x, y, z) -> concat(x, y, z))")
     }
     assert(ex4.getMessage.contains("type mismatch: argument 2 requires map type"))
 
+    val ex4a = intercept[AnalysisException] {
+      df.select(map_zip_with(col("mis"), col("i"), (x, y, z) => concat(x, y, z)))
+    }
+    assert(ex4a.getMessage.contains("type mismatch: argument 2 requires map type"))
+
     val ex5 = intercept[AnalysisException] {
       df.selectExpr("map_zip_with(mmi, mmi, (x, y, z) -> x)")
     }
@@ -2644,27 +3034,59 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
         Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
 
+      checkAnswer(dfExample1.select(transform_keys(col("i"), (k, v) => k + v)),
+        Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
+
       checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
         "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
         Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
 
+      checkAnswer(dfExample2.select(
+          transform_keys(
+            col("j"),
+            (k, v) => element_at(
+              map_from_arrays(
+                array(lit(1), lit(2), lit(3)),
+                array(lit("one"), lit("two"), lit("three"))
+              ),
+              k
+            )
+          )
+        ),
+        Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
+
       checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
         Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
 
+      checkAnswer(dfExample2.select(transform_keys(col("j"),
+        (k, v) => (v * 2).cast("bigint") + k)),
+        Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
+
       checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
         Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
 
+      checkAnswer(dfExample2.select(transform_keys(col("j"), (k, v) => k + v)),
+        Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
+
       checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) ->  k % 2 = 0 OR v)"),
         Seq(Row(Map(true -> true, true -> false))))
 
+      checkAnswer(dfExample3.select(transform_keys(col("x"), (k, v) => k % 2 === 0 || v)),
+        Seq(Row(Map(true -> true, true -> false))))
+
       checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
         Seq(Row(Map(50 -> true, 78 -> false))))
 
-      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+      checkAnswer(dfExample3.select(transform_keys(col("x"),
+        (k, v) => when(v, k * 2).otherwise(k * 3))),
         Seq(Row(Map(50 -> true, 78 -> false))))
 
       checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
         Seq(Row(Map(false -> false))))
+
+      checkAnswer(dfExample4.select(transform_keys(col("y"),
+        (k, v) => array_contains(k, lit(3)) && v)),
+        Seq(Row(Map(false -> false))))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2702,6 +3124,11 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex3.getMessage.contains("Cannot use null as map key"))
 
+    val ex3a = intercept[Exception] {
+      dfExample1.select(transform_keys(col("i"), (k, v) => v)).show()
+    }
+    assert(ex3a.getMessage.contains("Cannot use null as map key"))
+
     val ex4 = intercept[AnalysisException] {
       dfExample2.selectExpr("transform_keys(j, (k, v) -> k + 1)")
     }
@@ -2766,6 +3193,46 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(
         dfExample5.selectExpr("transform_values(c, (k, v) -> k + cardinality(v))"),
         Seq(Row(Map(1 -> 3))))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => k + v)),
+        Seq(Row(Map(1 -> 2, 9 -> 18, 8 -> 16, 7 -> 14))))
+
+      checkAnswer(dfExample2.select(
+        transform_values(col("x"), (k, v) => when(k, v).otherwise(k.cast("string")))),
+        Seq(Row(Map(false -> "false", true -> "def"))))
+
+      checkAnswer(dfExample2.select(transform_values(col("x"),
+        (k, v) => (!k) && v === "abc")),
+        Seq(Row(Map(false -> true, true -> false))))
+
+      checkAnswer(dfExample3.select(transform_values(col("y"), (k, v) => v * v)),
+        Seq(Row(Map("a" -> 1, "b" -> 4, "c" -> 9))))
+
+      checkAnswer(dfExample3.select(
+        transform_values(col("y"), (k, v) => concat(k, lit(":"), v.cast("string")))),
+        Seq(Row(Map("a" -> "a:1", "b" -> "b:2", "c" -> "c:3"))))
+
+      checkAnswer(
+        dfExample3.select(transform_values(col("y"), (k, v) => concat(k, v.cast("string")))),
+        Seq(Row(Map("a" -> "a1", "b" -> "b2", "c" -> "c3"))))
+
+      val testMap = map_from_arrays(
+        array(lit(1), lit(2), lit(3)),
+        array(lit("one"), lit("two"), lit("three"))
+      )
+
+      checkAnswer(
+        dfExample4.select(transform_values(col("z"),
+          (k, v) => concat(element_at(testMap, k), lit("_"), v.cast("string")))),
+        Seq(Row(Map(1 -> "one_1.0", 2 -> "two_1.4", 3 ->"three_1.7"))))
+
+      checkAnswer(
+        dfExample4.select(transform_values(col("z"), (k, v) => k - v)),
+        Seq(Row(Map(1 -> 0.0, 2 -> 0.6000000000000001, 3 -> 1.3))))
+
+      checkAnswer(
+        dfExample5.select(transform_values(col("c"), (k, v) => k + size(v))),
+        Seq(Row(Map(1 -> 3))))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2809,6 +3276,28 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
       checkAnswer(dfExample2.selectExpr("transform_values(j, (k, v) -> k + cast(v as BIGINT))"),
         Seq(Row(Map.empty[BigInt, BigInt])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"),
+        (k, v) => lit(null).cast("int"))),
+        Seq(Row(Map.empty[Integer, Integer])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => k)),
+        Seq(Row(Map.empty[Integer, Integer])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => v)),
+        Seq(Row(Map.empty[Integer, Integer])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => lit(0))),
+        Seq(Row(Map.empty[Integer, Integer])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => lit("value"))),
+        Seq(Row(Map.empty[Integer, String])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => lit(true))),
+        Seq(Row(Map.empty[Integer, Boolean])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => v.cast("bigint"))),
+        Seq(Row(Map.empty[BigInt, BigInt])))
     }
 
     testEmpty()
@@ -2833,6 +3322,15 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(dfExample2.selectExpr(
         "transform_values(b, (k, v) -> IF(v IS NULL, k + 1, k + 2))"),
         Seq(Row(Map(1 -> 3, 2 -> 4, 3 -> 4))))
+
+      checkAnswer(dfExample1.select(transform_values(col("a"),
+        (k, v) => lit(null).cast("int"))),
+        Seq(Row(Map[Int, Integer](1 -> null, 2 -> null, 3 -> null, 4 -> null))))
+
+      checkAnswer(dfExample2.select(
+        transform_values(col("b"), (k, v) => when(v.isNull, k + 1).otherwise(k + 2))
+        ),
+        Seq(Row(Map(1 -> 3, 2 -> 4, 3 -> 4))))
     }
 
     testNullValue()
@@ -2871,6 +3369,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       }
       assert(ex3.getMessage.contains(
         "data type mismatch: argument 1 requires map type"))
+
+      val ex3a = intercept[AnalysisException] {
+        dfExample3.select(transform_values(col("x"), (k, v) => k + 1))
+      }
+      assert(ex3a.getMessage.contains(
+        "data type mismatch: argument 1 requires map type"))
     }
 
     testInvalidLambdaFunctions()
@@ -2897,10 +3401,15 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Seq.empty),
       Row(null))
     checkAnswer(df1.selectExpr("zip_with(val1, val2, (x, y) -> x + y)"), expectedValue1)
+    checkAnswer(df1.select(zip_with(df1("val1"), df1("val2"), (x, y) => x + y)), expectedValue1)
     val expectedValue2 = Seq(
       Row(Seq(Row(1L, 1), Row(2L, null), Row(null, 3))),
       Row(Seq(Row(4L, 1), Row(11L, 2), Row(null, 3))))
     checkAnswer(df2.selectExpr("zip_with(val1, val2, (x, y) -> (y, x))"), expectedValue2)
+    checkAnswer(
+      df2.select(zip_with(df2("val1"), df2("val2"), (x, y) => struct(y, x))),
+      expectedValue2
+    )
   }
 
   test("arrays zip_with function - for non-primitive types") {
@@ -2915,7 +3424,14 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Seq(Row("x", "a"), Row("y", null))),
       Row(Seq.empty),
       Row(null))
-    checkAnswer(df.selectExpr("zip_with(val1, val2, (x, y) -> (y, x))"), expectedValue1)
+    checkAnswer(
+      df.selectExpr("zip_with(val1, val2, (x, y) -> (y, x))"),
+      expectedValue1
+    )
+    checkAnswer(
+      df.select(zip_with(col("val1"), col("val2"), (x, y) => struct(y, x))),
+      expectedValue1
+    )
   }
 
   test("arrays zip_with function - invalid") {
@@ -2937,6 +3453,10 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       df.selectExpr("zip_with(i, a2, (acc, x) -> x)")
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 1 requires array type"))
+    val ex3a = intercept[AnalysisException] {
+      df.select(zip_with(df("i"), df("a2"), (acc, x) => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 1 requires array type"))
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("zip_with(a1, a, (acc, x) -> x)")
     }
@@ -2979,16 +3499,6 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).foreach(assertValuesDoNotChangeAfterCoalesceOrUnion(_))
   }
 
-  test("SPARK-21281 use string types by default if array and map have no argument") {
-    val ds = spark.range(1)
-    var expectedSchema = new StructType()
-      .add("x", ArrayType(StringType, containsNull = false), nullable = false)
-    assert(ds.select(array().as("x")).schema == expectedSchema)
-    expectedSchema = new StructType()
-      .add("x", MapType(StringType, StringType, valueContainsNull = false), nullable = false)
-    assert(ds.select(map().as("x")).schema == expectedSchema)
-  }
-
   test("SPARK-21281 fails if functions have no argument") {
     val df = Seq(1).toDF("a")
 
@@ -3042,6 +3552,34 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select("x").filter("exists(i, x -> x % d == 0)"),
       Seq(Row(1)))
   }
+
+  test("SPARK-29462: Empty array of NullType for array function with no arguments") {
+    Seq((true, StringType), (false, NullType)).foreach {
+      case (arrayDefaultToString, expectedType) =>
+        withSQLConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE.key ->
+          arrayDefaultToString.toString) {
+          val schema = spark.range(1).select(array()).schema
+          assert(schema.nonEmpty && schema.head.dataType.isInstanceOf[ArrayType])
+          val actualType = schema.head.dataType.asInstanceOf[ArrayType].elementType
+          assert(actualType === expectedType)
+        }
+    }
+  }
+
+  test("SPARK-30790: Empty map with NullType as key/value type for map function with no argument") {
+    Seq((true, StringType), (false, NullType)).foreach {
+      case (mapDefaultToString, expectedType) =>
+        withSQLConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE.key ->
+          mapDefaultToString.toString) {
+          val schema = spark.range(1).select(map()).schema
+          assert(schema.nonEmpty && schema.head.dataType.isInstanceOf[MapType])
+          val actualKeyType = schema.head.dataType.asInstanceOf[MapType].keyType
+          val actualValueType = schema.head.dataType.asInstanceOf[MapType].valueType
+          assert(actualKeyType === expectedType)
+          assert(actualValueType === expectedType)
+        }
+    }
+  }
 }
 
 object DataFrameFunctionsSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala
index b33c26a0b75a2..37dc8f1bcc7f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameHintSuite.scala
@@ -68,5 +68,17 @@ class DataFrameHintSuite extends AnalysisTest with SharedSparkSession {
     check(
       df.hint("REPARTITION", 100),
       UnresolvedHint("REPARTITION", Seq(100), df.logicalPlan))
+
+    check(
+      df.hint("REPARTITION", 10, $"id".expr),
+      UnresolvedHint("REPARTITION", Seq(10, $"id".expr), df.logicalPlan))
+
+    check(
+      df.hint("REPARTITION_BY_RANGE", $"id".expr),
+      UnresolvedHint("REPARTITION_BY_RANGE", Seq($"id".expr), df.logicalPlan))
+
+    check(
+      df.hint("REPARTITION_BY_RANGE", 10, $"id".expr),
+      UnresolvedHint("REPARTITION_BY_RANGE", Seq(10, $"id".expr), df.logicalPlan))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 3a217e6e28060..c7545bcad8962 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -17,14 +17,18 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.catalyst.plans.{Inner, LeftOuter, RightOuter}
-import org.apache.spark.sql.catalyst.plans.logical.Join
+import org.apache.spark.sql.catalyst.plans.{Inner, InnerLike, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan, Project}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-class DataFrameJoinSuite extends QueryTest with SharedSparkSession {
+class DataFrameJoinSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   test("join - join using") {
@@ -46,13 +50,13 @@ class DataFrameJoinSuite extends QueryTest with SharedSparkSession {
   }
 
   test("join - sorted columns not in join's outputSet") {
-    val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str_sort").as('df1)
-    val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as('df2)
-    val df3 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as('df3)
+    val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str_sort").as("df1")
+    val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("df2")
+    val df3 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("df3")
 
     checkAnswer(
       df.join(df2, $"df1.int" === $"df2.int", "outer").select($"df1.int", $"df2.int2")
-        .orderBy('str_sort.asc, 'str.asc),
+        .orderBy(Symbol("str_sort").asc, Symbol("str").asc),
       Row(null, 6) :: Row(1, 3) :: Row(3, null) :: Nil)
 
     checkAnswer(
@@ -149,7 +153,7 @@ class DataFrameJoinSuite extends QueryTest with SharedSparkSession {
       spark.range(10e10.toLong)
         .join(spark.range(10e10.toLong).hint("broadcast"), "id")
         .queryExecution.executedPlan
-    assert(plan2.collect { case p: BroadcastHashJoinExec => p }.size == 1)
+    assert(collect(plan2) { case p: BroadcastHashJoinExec => p }.size == 1)
   }
 
   test("join - outer join conversion") {
@@ -256,4 +260,66 @@ class DataFrameJoinSuite extends QueryTest with SharedSparkSession {
       df.join(df, df("id") <=> df("id")).queryExecution.optimizedPlan
     }
   }
+
+  def extractLeftDeepInnerJoins(plan: LogicalPlan): Seq[LogicalPlan] = plan match {
+    case j @ Join(left, right, _: InnerLike, _, _) => right +: extractLeftDeepInnerJoins(left)
+    case Filter(_, child) => extractLeftDeepInnerJoins(child)
+    case Project(_, child) => extractLeftDeepInnerJoins(child)
+    case _ => Seq(plan)
+  }
+
+  test("SPARK-24690 enables star schema detection even if CBO disabled") {
+    withTable("r0", "r1", "r2", "r3") {
+      withTempDir { dir =>
+
+        withSQLConf(
+            SQLConf.STARSCHEMA_DETECTION.key -> "true",
+            SQLConf.CBO_ENABLED.key -> "false",
+            SQLConf.PLAN_STATS_ENABLED.key -> "true") {
+
+          val path = dir.getAbsolutePath
+
+          // Collects column statistics first
+          spark.range(300).selectExpr("id AS a", "id AS b", "id AS c")
+            .write.mode("overwrite").parquet(s"$path/r0")
+          spark.read.parquet(s"$path/r0").write.saveAsTable("r0")
+          spark.sql("ANALYZE TABLE r0 COMPUTE STATISTICS FOR COLUMNS a, b, c")
+
+          spark.range(10).selectExpr("id AS a", "id AS d")
+            .write.mode("overwrite").parquet(s"$path/r1")
+          spark.read.parquet(s"$path/r1").write.saveAsTable("r1")
+          spark.sql("ANALYZE TABLE r1 COMPUTE STATISTICS FOR COLUMNS a")
+
+          spark.range(50).selectExpr("id AS b", "id AS e")
+            .write.mode("overwrite").parquet(s"$path/r2")
+          spark.read.parquet(s"$path/r2").write.saveAsTable("r2")
+          spark.sql("ANALYZE TABLE r2 COMPUTE STATISTICS FOR COLUMNS b")
+
+          spark.range(1).selectExpr("id AS c", "id AS f")
+            .write.mode("overwrite").parquet(s"$path/r3")
+          spark.read.parquet(s"$path/r3").write.saveAsTable("r3")
+          spark.sql("ANALYZE TABLE r3 COMPUTE STATISTICS FOR COLUMNS c")
+
+          val resultDf = sql(
+            s"""SELECT * FROM r0, r1, r2, r3
+               |  WHERE
+               |    r0.a = r1.a AND
+               |    r1.d >= 3 AND
+               |    r0.b = r2.b AND
+               |    r2.e >= 5 AND
+               |    r0.c = r3.c AND
+               |    r3.f <= 100
+             """.stripMargin)
+
+          val optimized = resultDf.queryExecution.optimizedPlan
+          val optJoins = extractLeftDeepInnerJoins(optimized)
+          val joinOrder = optJoins
+            .flatMap(_.collect { case p: LogicalRelation => p.catalogTable }.head)
+            .map(_.identifier.identifier)
+
+          assert(joinOrder === Seq("r2", "r1", "r3", "r0"))
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index aeee4577d3483..fb1ca69b6f73f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -21,6 +21,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{StringType, StructType}
 
 class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
@@ -36,6 +37,14 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
       ).toDF("name", "age", "height")
   }
 
+  def createNaNDF(): DataFrame = {
+    Seq[(java.lang.Integer, java.lang.Long, java.lang.Short,
+      java.lang.Byte, java.lang.Float, java.lang.Double)](
+      (1, 1L, 1.toShort, 1.toByte, 1.0f, 1.0),
+      (0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN)
+    ).toDF("int", "long", "short", "byte", "float", "double")
+  }
+
   test("drop") {
     val input = createDF()
     val rows = input.collect()
@@ -231,6 +240,70 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  def createDFsWithSameFieldsName(): (DataFrame, DataFrame) = {
+    val df1 = Seq(
+      ("f1-1", "f2", null),
+      ("f1-2", null, null),
+      ("f1-3", "f2", "f3-1"),
+      ("f1-4", "f2", "f3-1")
+    ).toDF("f1", "f2", "f3")
+    val df2 = Seq(
+      ("f1-1", null, null),
+      ("f1-2", "f2", null),
+      ("f1-3", "f2", "f4-1")
+    ).toDF("f1", "f2", "f4")
+    (df1, df2)
+  }
+
+  test("fill unambiguous field for join operation") {
+    val (df1, df2) = createDFsWithSameFieldsName()
+    val joined_df = df1.join(df2, Seq("f1"), joinType = "left_outer")
+    checkAnswer(joined_df.na.fill("", cols = Seq("f4")),
+      Row("f1-1", "f2", null, null, "") ::
+        Row("f1-2", null, null, "f2", "") ::
+        Row("f1-3", "f2", "f3-1", "f2", "f4-1") ::
+        Row("f1-4", "f2", "f3-1", null, "") :: Nil)
+  }
+
+  test("fill ambiguous field for join operation") {
+    val (df1, df2) = createDFsWithSameFieldsName()
+    val joined_df = df1.join(df2, Seq("f1"), joinType = "left_outer")
+
+    val message = intercept[AnalysisException] {
+      joined_df.na.fill("", cols = Seq("f2"))
+    }.getMessage
+    assert(message.contains("Reference 'f2' is ambiguous"))
+  }
+
+  test("fill/drop with col(*)") {
+    val df = createDF()
+    // If columns are specified with "*", they are ignored.
+    checkAnswer(df.na.fill("new name", Seq("*")), df.collect())
+    checkAnswer(df.na.drop("any", Seq("*")), df.collect())
+  }
+
+  test("fill/drop with nested columns") {
+    val schema = new StructType()
+      .add("c1", new StructType()
+        .add("c1-1", StringType)
+        .add("c1-2", StringType))
+
+    val data = Seq(
+      Row(Row(null, "a2")),
+      Row(Row("b1", "b2")),
+      Row(null))
+
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(data), schema)
+
+    checkAnswer(df.select("c1.c1-1"),
+      Row(null) :: Row("b1") :: Row(null) :: Nil)
+
+    // Nested columns are ignored for fill() and drop().
+    checkAnswer(df.na.fill("a1", Seq("c1.c1-1")), data)
+    checkAnswer(df.na.drop("any", Seq("c1.c1-1")), data)
+  }
+
   test("replace") {
     val input = createDF()
 
@@ -305,4 +378,74 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
       )).na.drop("name" :: Nil).select("name"),
       Row("Alice") :: Row("David") :: Nil)
   }
+
+  test("SPARK-29890: duplicate names are allowed for fill() if column names are not specified.") {
+    val left = Seq(("1", null), ("3", "4")).toDF("col1", "col2")
+    val right = Seq(("1", "2"), ("3", null)).toDF("col1", "col2")
+    val df = left.join(right, Seq("col1"))
+
+    // If column names are specified, the following fails due to ambiguity.
+    val exception = intercept[AnalysisException] {
+      df.na.fill("hello", Seq("col2"))
+    }
+    assert(exception.getMessage.contains("Reference 'col2' is ambiguous"))
+
+    // If column names are not specified, fill() is applied to all the eligible columns.
+    checkAnswer(
+      df.na.fill("hello"),
+      Row("1", "hello", "2") :: Row("3", "4", "hello") :: Nil)
+  }
+
+  test("SPARK-30065: duplicate names are allowed for drop() if column names are not specified.") {
+    val left = Seq(("1", null), ("3", "4"), ("5", "6")).toDF("col1", "col2")
+    val right = Seq(("1", "2"), ("3", null), ("5", "6")).toDF("col1", "col2")
+    val df = left.join(right, Seq("col1"))
+
+    // If column names are specified, the following fails due to ambiguity.
+    val exception = intercept[AnalysisException] {
+      df.na.drop("any", Seq("col2"))
+    }
+    assert(exception.getMessage.contains("Reference 'col2' is ambiguous"))
+
+    // If column names are not specified, drop() is applied to all the eligible rows.
+    checkAnswer(
+      df.na.drop("any"),
+      Row("5", "6", "6") :: Nil)
+  }
+
+  test("replace nan with float") {
+    checkAnswer(
+      createNaNDF().na.replace("*", Map(
+        Float.NaN -> 10.0f
+      )),
+      Row(1, 1L, 1.toShort, 1.toByte, 1.0f, 1.0) ::
+      Row(0, 0L, 0.toShort, 0.toByte, 10.0f, 10.0) :: Nil)
+  }
+
+  test("replace nan with double") {
+    checkAnswer(
+      createNaNDF().na.replace("*", Map(
+        Double.NaN -> 10.0
+      )),
+      Row(1, 1L, 1.toShort, 1.toByte, 1.0f, 1.0) ::
+      Row(0, 0L, 0.toShort, 0.toByte, 10.0f, 10.0) :: Nil)
+  }
+
+  test("replace float with nan") {
+    checkAnswer(
+      createNaNDF().na.replace("*", Map(
+        1.0f -> Float.NaN
+      )),
+      Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) ::
+      Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) :: Nil)
+  }
+
+  test("replace double with nan") {
+    checkAnswer(
+      createNaNDF().na.replace("*", Map(
+        1.0 -> Double.NaN
+      )),
+      Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) ::
+      Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) :: Nil)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index bcd0c3f0d64a7..51c6a835d58d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -46,7 +46,7 @@ class DataFramePivotSuite extends QueryTest with SharedSparkSession {
       courseSales.groupBy("course").pivot("year", Seq(2012, 2013)).agg(sum($"earnings")),
       expected)
     checkAnswer(
-      courseSales.groupBy('course).pivot('year, Seq(2012, 2013)).agg(sum('earnings)),
+      courseSales.groupBy($"course").pivot($"year", Seq(2012, 2013)).agg(sum($"earnings")),
       expected)
   }
 
@@ -206,7 +206,7 @@ class DataFramePivotSuite extends QueryTest with SharedSparkSession {
       complexData.groupBy().pivot("b", Seq(true, false)).agg(max("a")),
       expected)
     checkAnswer(
-      complexData.groupBy().pivot('b, Seq(true, false)).agg(max('a)),
+      complexData.groupBy().pivot($"b", Seq(true, false)).agg(max("a")),
       expected)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
index 92f1e4306c5b1..250ec7dc0ba5a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
@@ -34,8 +34,8 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
   }
 
   test("join - self join") {
-    val df1 = testData.select(testData("key")).as('df1)
-    val df2 = testData.select(testData("key")).as('df2)
+    val df1 = testData.select(testData("key")).as("df1")
+    val df2 = testData.select(testData("key")).as("df2")
 
     checkAnswer(
       df1.join(df2, $"df1.key" === $"df2.key"),
@@ -57,11 +57,11 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
   test("join - using aliases after self join") {
     val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str")
     checkAnswer(
-      df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("x.str").count(),
+      df.as("x").join(df.as("y"), $"x.str" === $"y.str").groupBy("x.str").count(),
       Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
 
     checkAnswer(
-      df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("y.str").count(),
+      df.as("x").join(df.as("y"), $"x.str" === $"y.str").groupBy("y.str").count(),
       Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
   }
 
@@ -96,7 +96,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df2 = df1.filter($"id" > 0)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "false",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "false",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       // `df1("id") > df2("id")` is always false.
       checkAnswer(df1.join(df2, df1("id") > df2("id")), Nil)
@@ -110,7 +110,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     }
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2, df1("id") > df2("id")))
     }
@@ -121,7 +121,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df2 = df1.filter($"id" > 0)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2, df1.colRegex("id") > df2.colRegex("id")))
     }
@@ -132,7 +132,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df2 = df1.filter($"a.b" > 0)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2, df1("a.b") > df2("a.c")))
     }
@@ -143,7 +143,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df2 = df1.filter($"id" > 0)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "false",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "false",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       // `df2("id")` actually points to the column of `df1`.
       checkAnswer(df1.join(df2).select(df2("id")), Seq(0, 0, 1, 1, 2, 2).map(Row(_)))
@@ -157,7 +157,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     }
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2).select(df2("id")))
     }
@@ -170,7 +170,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     val df4 = spark.range(1)
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "false",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "false",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       // `df2("id") < df3("id")` is always false
       checkAnswer(df1.join(df2).join(df3, df2("id") < df3("id")), Nil)
@@ -196,7 +196,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     }
 
     withSQLConf(
-      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN.key -> "true",
+      SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key -> "true",
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       assertAmbiguousSelfJoin(df1.join(df2).join(df3, df2("id") < df3("id")))
       assertAmbiguousSelfJoin(df1.join(df4).join(df2).select(df2("id")))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index fbb7e903c3450..bd3f48078374d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -307,7 +307,7 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
 
     val union = df1.union(df2)
     checkAnswer(
-      union.filter('i < rand(7) * 10),
+      union.filter($"i" < rand(7) * 10),
       expected(union)
     )
     checkAnswer(
@@ -321,13 +321,13 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
 
     val intersect = df1.intersect(df2)
     checkAnswer(
-      intersect.filter('i < rand(7) * 10),
+      intersect.filter($"i" < rand(7) * 10),
       expected(intersect)
     )
 
     val except = df1.except(df2)
     checkAnswer(
-      except.filter('i < rand(7) * 10),
+      except.filter($"i" < rand(7) * 10),
       expected(except)
     )
   }
@@ -375,7 +375,7 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
       case j: Union if j.children.size == 5 => j }.size === 1)
 
     checkAnswer(
-      unionDF.agg(avg('key), max('key), min('key), sum('key)),
+      unionDF.agg(avg("key"), max("key"), min("key"), sum("key")),
       Row(50.5, 100, 1, 25250) :: Nil
     )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index 3b57173bd246b..394bad751b5ce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -443,9 +443,9 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession {
     assert(sketch4.confidence() === 0.99 +- 5e-3)
 
     intercept[IllegalArgumentException] {
-      df.select('id cast DoubleType as 'id)
+      df.select($"id" cast DoubleType as "id")
         .stat
-        .countMinSketch('id, depth = 10, width = 20, seed = 42)
+        .countMinSketch($"id", depth = 10, width = 20, seed = 42)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index b4ddfecaee469..694e576fcded4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -21,6 +21,7 @@ import java.io.{ByteArrayOutputStream, File}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.util.UUID
+import java.util.concurrent.atomic.AtomicLong
 
 import scala.util.Random
 
@@ -29,10 +30,12 @@ import org.scalatest.Matchers._
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.Uuid
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
-import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Union}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, OneRowRelation, Union}
 import org.apache.spark.sql.execution.{FilterExec, QueryExecution, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.functions._
@@ -43,13 +46,15 @@ import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 import org.apache.spark.util.random.XORShiftRandom
 
-class DataFrameSuite extends QueryTest with SharedSparkSession {
+class DataFrameSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   test("analysis error should be eagerly reported") {
-    intercept[Exception] { testData.select('nonExistentName) }
+    intercept[Exception] { testData.select("nonExistentName") }
     intercept[Exception] {
-      testData.groupBy('key).agg(Map("nonExistentName" -> "sum"))
+      testData.groupBy("key").agg(Map("nonExistentName" -> "sum"))
     }
     intercept[Exception] {
       testData.groupBy("nonExistentName").agg(Map("key" -> "sum"))
@@ -90,9 +95,10 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
     assert(spark.emptyDataFrame.count() === 0)
   }
 
-  test("head and take") {
+  test("head, take and tail") {
     assert(testData.take(2) === testData.collect().take(2))
     assert(testData.head(2) === testData.collect().take(2))
+    assert(testData.tail(2) === testData.collect().takeRight(2))
     assert(testData.head(2).head.schema === testData.schema)
   }
 
@@ -106,8 +112,10 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
   test("Star Expansion - CreateStruct and CreateArray") {
     val structDf = testData2.select("a", "b").as("record")
     // CreateStruct and CreateArray in aggregateExpressions
-    assert(structDf.groupBy($"a").agg(min(struct($"record.*"))).first() == Row(3, Row(3, 1)))
-    assert(structDf.groupBy($"a").agg(min(array($"record.*"))).first() == Row(3, Seq(3, 1)))
+    assert(structDf.groupBy($"a").agg(min(struct($"record.*"))).
+      sort("a").first() == Row(1, Row(1, 1)))
+    assert(structDf.groupBy($"a").agg(min(array($"record.*"))).
+      sort("a").first() == Row(1, Seq(1, 1)))
 
     // CreateStruct and CreateArray in project list (unresolved alias)
     assert(structDf.select(struct($"record.*")).first() == Row(Row(1, 1)))
@@ -161,10 +169,10 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
       DecimalData(BigDecimal("1"* 20 + ".123"), BigDecimal("1"* 20 + ".123")) ::
         DecimalData(BigDecimal("9"* 20 + ".123"), BigDecimal("9"* 20 + ".123")) :: Nil).toDF()
 
-    Seq(true, false).foreach { nullOnOverflow =>
-      withSQLConf((SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key, nullOnOverflow.toString)) {
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf((SQLConf.ANSI_ENABLED.key, ansiEnabled.toString)) {
         val structDf = largeDecimals.select("a").agg(sum("a"))
-        if (nullOnOverflow) {
+        if (!ansiEnabled) {
           checkAnswer(structDf, Row(null))
         } else {
           val e = intercept[SparkException] {
@@ -246,12 +254,12 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
   test("repartition") {
     intercept[IllegalArgumentException] {
-      testData.select('key).repartition(0)
+      testData.select("key").repartition(0)
     }
 
     checkAnswer(
-      testData.select('key).repartition(10).select('key),
-      testData.select('key).collect().toSeq)
+      testData.select("key").repartition(10).select("key"),
+      testData.select("key").collect().toSeq)
   }
 
   test("repartition with SortOrder") {
@@ -313,16 +321,16 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
   test("coalesce") {
     intercept[IllegalArgumentException] {
-      testData.select('key).coalesce(0)
+      testData.select("key").coalesce(0)
     }
 
-    assert(testData.select('key).coalesce(1).rdd.partitions.size === 1)
+    assert(testData.select("key").coalesce(1).rdd.partitions.size === 1)
 
     checkAnswer(
-      testData.select('key).coalesce(1).select('key),
-      testData.select('key).collect().toSeq)
+      testData.select("key").coalesce(1).select("key"),
+      testData.select("key").collect().toSeq)
 
-    assert(spark.emptyDataFrame.coalesce(1).rdd.partitions.size === 1)
+    assert(spark.emptyDataFrame.coalesce(1).rdd.partitions.size === 0)
   }
 
   test("convert $\"attribute name\" into unresolved attribute") {
@@ -333,7 +341,7 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
   test("convert Scala Symbol 'attrname into unresolved attribute") {
     checkAnswer(
-      testData.where('key === lit(1)).select('value),
+      testData.where($"key" === lit(1)).select("value"),
       Row("1"))
   }
 
@@ -345,17 +353,17 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
   test("simple select") {
     checkAnswer(
-      testData.where('key === lit(1)).select('value),
+      testData.where($"key" === lit(1)).select("value"),
       Row("1"))
   }
 
   test("select with functions") {
     checkAnswer(
-      testData.select(sum('value), avg('value), count(lit(1))),
+      testData.select(sum("value"), avg("value"), count(lit(1))),
       Row(5050.0, 50.5, 100))
 
     checkAnswer(
-      testData2.select('a + 'b, 'a < 'b),
+      testData2.select($"a" + $"b", $"a" < $"b"),
       Seq(
         Row(2, false),
         Row(3, true),
@@ -365,31 +373,31 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
         Row(5, false)))
 
     checkAnswer(
-      testData2.select(sumDistinct('a)),
+      testData2.select(sumDistinct($"a")),
       Row(6))
   }
 
   test("sorting with null ordering") {
     val data = Seq[java.lang.Integer](2, 1, null).toDF("key")
 
-    checkAnswer(data.orderBy('key.asc), Row(null) :: Row(1) :: Row(2) :: Nil)
+    checkAnswer(data.orderBy($"key".asc), Row(null) :: Row(1) :: Row(2) :: Nil)
     checkAnswer(data.orderBy(asc("key")), Row(null) :: Row(1) :: Row(2) :: Nil)
-    checkAnswer(data.orderBy('key.asc_nulls_first), Row(null) :: Row(1) :: Row(2) :: Nil)
+    checkAnswer(data.orderBy($"key".asc_nulls_first), Row(null) :: Row(1) :: Row(2) :: Nil)
     checkAnswer(data.orderBy(asc_nulls_first("key")), Row(null) :: Row(1) :: Row(2) :: Nil)
-    checkAnswer(data.orderBy('key.asc_nulls_last), Row(1) :: Row(2) :: Row(null) :: Nil)
+    checkAnswer(data.orderBy($"key".asc_nulls_last), Row(1) :: Row(2) :: Row(null) :: Nil)
     checkAnswer(data.orderBy(asc_nulls_last("key")), Row(1) :: Row(2) :: Row(null) :: Nil)
 
-    checkAnswer(data.orderBy('key.desc), Row(2) :: Row(1) :: Row(null) :: Nil)
+    checkAnswer(data.orderBy($"key".desc), Row(2) :: Row(1) :: Row(null) :: Nil)
     checkAnswer(data.orderBy(desc("key")), Row(2) :: Row(1) :: Row(null) :: Nil)
-    checkAnswer(data.orderBy('key.desc_nulls_first), Row(null) :: Row(2) :: Row(1) :: Nil)
+    checkAnswer(data.orderBy($"key".desc_nulls_first), Row(null) :: Row(2) :: Row(1) :: Nil)
     checkAnswer(data.orderBy(desc_nulls_first("key")), Row(null) :: Row(2) :: Row(1) :: Nil)
-    checkAnswer(data.orderBy('key.desc_nulls_last), Row(2) :: Row(1) :: Row(null) :: Nil)
+    checkAnswer(data.orderBy($"key".desc_nulls_last), Row(2) :: Row(1) :: Row(null) :: Nil)
     checkAnswer(data.orderBy(desc_nulls_last("key")), Row(2) :: Row(1) :: Row(null) :: Nil)
   }
 
   test("global sorting") {
     checkAnswer(
-      testData2.orderBy('a.asc, 'b.asc),
+      testData2.orderBy($"a".asc, $"b".asc),
       Seq(Row(1, 1), Row(1, 2), Row(2, 1), Row(2, 2), Row(3, 1), Row(3, 2)))
 
     checkAnswer(
@@ -397,31 +405,31 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
       Seq(Row(1, 2), Row(1, 1), Row(2, 2), Row(2, 1), Row(3, 2), Row(3, 1)))
 
     checkAnswer(
-      testData2.orderBy('a.asc, 'b.desc),
+      testData2.orderBy($"a".asc, $"b".desc),
       Seq(Row(1, 2), Row(1, 1), Row(2, 2), Row(2, 1), Row(3, 2), Row(3, 1)))
 
     checkAnswer(
-      testData2.orderBy('a.desc, 'b.desc),
+      testData2.orderBy($"a".desc, $"b".desc),
       Seq(Row(3, 2), Row(3, 1), Row(2, 2), Row(2, 1), Row(1, 2), Row(1, 1)))
 
     checkAnswer(
-      testData2.orderBy('a.desc, 'b.asc),
+      testData2.orderBy($"a".desc, $"b".asc),
       Seq(Row(3, 1), Row(3, 2), Row(2, 1), Row(2, 2), Row(1, 1), Row(1, 2)))
 
     checkAnswer(
-      arrayData.toDF().orderBy('data.getItem(0).asc),
+      arrayData.toDF().orderBy($"data".getItem(0).asc),
       arrayData.toDF().collect().sortBy(_.getAs[Seq[Int]](0)(0)).toSeq)
 
     checkAnswer(
-      arrayData.toDF().orderBy('data.getItem(0).desc),
+      arrayData.toDF().orderBy($"data".getItem(0).desc),
       arrayData.toDF().collect().sortBy(_.getAs[Seq[Int]](0)(0)).reverse.toSeq)
 
     checkAnswer(
-      arrayData.toDF().orderBy('data.getItem(1).asc),
+      arrayData.toDF().orderBy($"data".getItem(1).asc),
       arrayData.toDF().collect().sortBy(_.getAs[Seq[Int]](0)(1)).toSeq)
 
     checkAnswer(
-      arrayData.toDF().orderBy('data.getItem(1).desc),
+      arrayData.toDF().orderBy($"data".getItem(1).desc),
       arrayData.toDF().collect().sortBy(_.getAs[Seq[Int]](0)(1)).reverse.toSeq)
   }
 
@@ -450,7 +458,7 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       // SELECT *, foo(key, value) FROM testData
-      testData.select($"*", foo('key, 'value)).limit(3),
+      testData.select($"*", foo($"key", $"value")).limit(3),
       Row(1, "1", "11") :: Row(2, "2", "22") :: Row(3, "3", "33") :: Nil
     )
   }
@@ -553,7 +561,7 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
   }
 
   test("replace column using withColumns") {
-    val df2 = sparkContext.parallelize(Array((1, 2), (2, 3), (3, 4))).toDF("x", "y")
+    val df2 = sparkContext.parallelize(Seq((1, 2), (2, 3), (3, 4))).toDF("x", "y")
     val df3 = df2.withColumns(Seq("x", "newCol1", "newCol2"),
       Seq(df2("x") + 1, df2("y"), df2("y") + 1))
     checkAnswer(
@@ -794,7 +802,7 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
   test("apply on query results (SPARK-5462)") {
     val df = testData.sparkSession.sql("select key from testData")
-    checkAnswer(df.select(df("key")), testData.select('key).collect().toSeq)
+    checkAnswer(df.select(df("key")), testData.select("key").collect().toSeq)
   }
 
   test("inputFiles") {
@@ -1197,7 +1205,7 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-6899: type should match when using codegen") {
-    checkAnswer(decimalData.agg(avg('a)), Row(new java.math.BigDecimal(2)))
+    checkAnswer(decimalData.agg(avg("a")), Row(new java.math.BigDecimal(2)))
   }
 
   test("SPARK-7133: Implement struct, array, and map field accessor") {
@@ -1399,7 +1407,7 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
   test("Sorting columns are not in Filter and Project") {
     checkAnswer(
-      upperCaseData.filter('N > 1).select('N).filter('N < 6).orderBy('L.asc),
+      upperCaseData.filter($"N" > 1).select("N").filter($"N" < 6).orderBy($"L".asc),
       Row(2) :: Row(3) :: Row(4) :: Row(5) :: Nil)
   }
 
@@ -1442,17 +1450,17 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
   test("Alias uses internally generated names 'aggOrder' and 'havingCondition'") {
     val df = Seq(1 -> 2).toDF("i", "j")
-    val query1 = df.groupBy('i)
-      .agg(max('j).as("aggOrder"))
-      .orderBy(sum('j))
+    val query1 = df.groupBy("i")
+      .agg(max("j").as("aggOrder"))
+      .orderBy(sum("j"))
     checkAnswer(query1, Row(1, 2))
 
     // In the plan, there are two attributes having the same name 'havingCondition'
     // One is a user-provided alias name; another is an internally generated one.
-    val query2 = df.groupBy('i)
-      .agg(max('j).as("havingCondition"))
-      .where(sum('j) > 0)
-      .orderBy('havingCondition.asc)
+    val query2 = df.groupBy("i")
+      .agg(max("j").as("havingCondition"))
+      .where(sum("j") > 0)
+      .orderBy($"havingCondition".asc)
     checkAnswer(query2, Row(1, 2))
   }
 
@@ -1461,7 +1469,7 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
       (1 to 10).toDF("id").write.mode(SaveMode.Overwrite).json(dir.getCanonicalPath)
       val input = spark.read.json(dir.getCanonicalPath)
 
-      val df = input.select($"id", rand(0).as('r))
+      val df = input.select($"id", rand(0).as("r"))
       df.as("a").join(df.filter($"r" < 0.5).as("b"), $"a.id" === $"b.id").collect().foreach { row =>
         assert(row.getDouble(1) - row.getDouble(3) === 0.0 +- 0.001)
       }
@@ -1691,19 +1699,21 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
       val plan = join.queryExecution.executedPlan
       checkAnswer(join, df)
       assert(
-        join.queryExecution.executedPlan.collect { case e: ShuffleExchangeExec => true }.size === 1)
+        collect(join.queryExecution.executedPlan) {
+          case e: ShuffleExchangeExec => true }.size === 1)
       assert(
-        join.queryExecution.executedPlan.collect { case e: ReusedExchangeExec => true }.size === 1)
+        collect(join.queryExecution.executedPlan) { case e: ReusedExchangeExec => true }.size === 1)
       val broadcasted = broadcast(join)
       val join2 = join.join(broadcasted, "id").join(broadcasted, "id")
       checkAnswer(join2, df)
       assert(
-        join2.queryExecution.executedPlan.collect { case e: ShuffleExchangeExec => true }.size == 1)
+        collect(join2.queryExecution.executedPlan) {
+          case e: ShuffleExchangeExec => true }.size == 1)
       assert(
-        join2.queryExecution.executedPlan
-          .collect { case e: BroadcastExchangeExec => true }.size === 1)
+        collect(join2.queryExecution.executedPlan) {
+          case e: BroadcastExchangeExec => true }.size === 1)
       assert(
-        join2.queryExecution.executedPlan.collect { case e: ReusedExchangeExec => true }.size == 4)
+        collect(join2.queryExecution.executedPlan) { case e: ReusedExchangeExec => true }.size == 4)
     }
   }
 
@@ -1749,7 +1759,7 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
   test("assertAnalyzed shouldn't replace original stack trace") {
     val e = intercept[AnalysisException] {
-      spark.range(1).select('id as 'a, 'id as 'b).groupBy('a).agg('b)
+      spark.range(1).select($"id" as "a", $"id" as "b").groupBy("a").agg($"b")
     }
 
     assert(e.getStackTrace.head.getClassName != classOf[QueryExecution].getName)
@@ -1982,14 +1992,14 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
 
   test("order-by ordinal.") {
     checkAnswer(
-      testData2.select(lit(7), 'a, 'b).orderBy(lit(1), lit(2), lit(3)),
+      testData2.select(lit(7), $"a", $"b").orderBy(lit(1), lit(2), lit(3)),
       Seq(Row(7, 1, 1), Row(7, 1, 2), Row(7, 2, 1), Row(7, 2, 2), Row(7, 3, 1), Row(7, 3, 2)))
   }
 
   test("SPARK-22271: mean overflows and returns null for some decimal variables") {
     val d = 0.034567890
     val df = Seq(d, d, d, d, d, d, d, d, d, d).toDF("DecimalCol")
-    val result = df.select('DecimalCol cast DecimalType(38, 33))
+    val result = df.select($"DecimalCol" cast DecimalType(38, 33))
       .select(col("DecimalCol")).describe()
     val mean = result.select("DecimalCol").where($"summary" === "mean")
     assert(mean.collect().toSet === Set(Row("0.0345678900000000000000000000000000000")))
@@ -2025,24 +2035,25 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
     val sourceDF = spark.createDataFrame(rows, schema)
 
     def structWhenDF: DataFrame = sourceDF
-      .select(when('cond, struct(lit("a").as("val1"), lit(10).as("val2"))).otherwise('s) as "res")
-      .select('res.getField("val1"))
+      .select(when($"cond",
+        struct(lit("a").as("val1"), lit(10).as("val2"))).otherwise($"s") as "res")
+      .select($"res".getField("val1"))
     def arrayWhenDF: DataFrame = sourceDF
-      .select(when('cond, array(lit("a"), lit("b"))).otherwise('a) as "res")
-      .select('res.getItem(0))
+      .select(when($"cond", array(lit("a"), lit("b"))).otherwise($"a") as "res")
+      .select($"res".getItem(0))
     def mapWhenDF: DataFrame = sourceDF
-      .select(when('cond, map(lit(0), lit("a"))).otherwise('m) as "res")
-      .select('res.getItem(0))
+      .select(when($"cond", map(lit(0), lit("a"))).otherwise($"m") as "res")
+      .select($"res".getItem(0))
 
     def structIfDF: DataFrame = sourceDF
       .select(expr("if(cond, struct('a' as val1, 10 as val2), s)") as "res")
-      .select('res.getField("val1"))
+      .select($"res".getField("val1"))
     def arrayIfDF: DataFrame = sourceDF
       .select(expr("if(cond, array('a', 'b'), a)") as "res")
-      .select('res.getItem(0))
+      .select($"res".getItem(0))
     def mapIfDF: DataFrame = sourceDF
       .select(expr("if(cond, map(0, 'a'), m)") as "res")
-      .select('res.getItem(0))
+      .select($"res".getItem(0))
 
     def checkResult(): Unit = {
       checkAnswer(structWhenDF, Seq(Row("a"), Row(null)))
@@ -2105,17 +2116,17 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
         // partitions.
         .write.partitionBy("p").option("compression", "gzip").json(path.getCanonicalPath)
 
-      var numJobs = 0
+      val numJobs = new AtomicLong(0)
       sparkContext.addSparkListener(new SparkListener {
         override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
-          numJobs += 1
+          numJobs.incrementAndGet()
         }
       })
 
       val df = spark.read.json(path.getCanonicalPath)
       assert(df.columns === Array("i", "p"))
-      spark.sparkContext.listenerBus.waitUntilEmpty(10000)
-      assert(numJobs == 1)
+      spark.sparkContext.listenerBus.waitUntilEmpty()
+      assert(numJobs.get() == 1L)
     }
   }
 
@@ -2202,4 +2213,91 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
           |*(1) Range (0, 10, step=1, splits=2)""".stripMargin))
     }
   }
+
+  test("SPARK-29442 Set `default` mode should override the existing mode") {
+    val df = Seq(Tuple1(1)).toDF()
+    val writer = df.write.mode("overwrite").mode("default")
+    val modeField = classOf[DataFrameWriter[Tuple1[Int]]].getDeclaredField("mode")
+    modeField.setAccessible(true)
+    assert(SaveMode.ErrorIfExists === modeField.get(writer).asInstanceOf[SaveMode])
+  }
+
+  test("sample should not duplicated the input data") {
+    val df1 = spark.range(10).select($"id" as "id1", $"id" % 5 as "key1")
+    val df2 = spark.range(10).select($"id" as "id2", $"id" % 5 as "key2")
+    val sampled = df1.join(df2, $"key1" === $"key2")
+      .sample(0.5, 42)
+      .select("id1", "id2")
+    val idTuples = sampled.collect().map(row => row.getLong(0) -> row.getLong(1))
+    assert(idTuples.length == idTuples.toSet.size)
+  }
+
+  test("groupBy.as") {
+    val df1 = Seq((1, 2, 3), (2, 3, 4)).toDF("a", "b", "c")
+      .repartition($"a", $"b").sortWithinPartitions("a", "b")
+    val df2 = Seq((1, 2, 4), (2, 3, 5)).toDF("a", "b", "c")
+      .repartition($"a", $"b").sortWithinPartitions("a", "b")
+
+    implicit val valueEncoder = RowEncoder(df1.schema)
+
+    val df3 = df1.groupBy("a", "b").as[GroupByKey, Row]
+      .cogroup(df2.groupBy("a", "b").as[GroupByKey, Row]) { case (_, data1, data2) =>
+        data1.zip(data2).map { p =>
+          p._1.getInt(2) + p._2.getInt(2)
+        }
+      }.toDF
+
+    checkAnswer(df3.sort("value"), Row(7) :: Row(9) :: Nil)
+
+    // Assert that no extra shuffle introduced by cogroup.
+    val exchanges = collect(df3.queryExecution.executedPlan) {
+      case h: ShuffleExchangeExec => h
+    }
+    assert(exchanges.size == 2)
+  }
+
+  test("groupBy.as: custom grouping expressions") {
+    val df1 = Seq((1, 2, 3), (2, 3, 4)).toDF("a1", "b", "c")
+      .repartition($"a1", $"b").sortWithinPartitions("a1", "b")
+    val df2 = Seq((1, 2, 4), (2, 3, 5)).toDF("a1", "b", "c")
+      .repartition($"a1", $"b").sortWithinPartitions("a1", "b")
+
+    implicit val valueEncoder = RowEncoder(df1.schema)
+
+    val groupedDataset1 = df1.groupBy(($"a1" + 1).as("a"), $"b").as[GroupByKey, Row]
+    val groupedDataset2 = df2.groupBy(($"a1" + 1).as("a"), $"b").as[GroupByKey, Row]
+
+    val df3 = groupedDataset1
+      .cogroup(groupedDataset2) { case (_, data1, data2) =>
+        data1.zip(data2).map { p =>
+          p._1.getInt(2) + p._2.getInt(2)
+        }
+      }.toDF
+
+    checkAnswer(df3.sort("value"), Row(7) :: Row(9) :: Nil)
+  }
+
+  test("groupBy.as: throw AnalysisException for unresolved grouping expr") {
+    val df = Seq((1, 2, 3), (2, 3, 4)).toDF("a", "b", "c")
+
+    implicit val valueEncoder = RowEncoder(df.schema)
+
+    val err = intercept[AnalysisException] {
+      df.groupBy($"d", $"b").as[GroupByKey, Row]
+    }
+    assert(err.getMessage.contains("cannot resolve '`d`'"))
+  }
+
+  test("emptyDataFrame should be foldable") {
+    val emptyDf = spark.emptyDataFrame.withColumn("id", lit(1L))
+    val joined = spark.range(10).join(emptyDf, "id")
+    joined.queryExecution.optimizedPlan match {
+      case LocalRelation(Seq(id), Nil, _) =>
+        assert(id.name == "id")
+      case _ =>
+        fail("emptyDataFrame should be foldable")
+    }
+  }
 }
+
+case class GroupByKey(a: Int, b: Int)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
index fbd399917e390..8c998290b5044 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
@@ -111,7 +111,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       df.select(
-        'key,
+        $"key",
         first("value").over(
           window.rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
         first("value").over(
@@ -226,7 +226,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       df.select(
-        'key,
+        $"key",
         sum("value").over(window.
           rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
         sum("value").over(window.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 835630bff7099..d398657ec0b6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -21,8 +21,9 @@ import org.scalatest.Matchers.the
 
 import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
 import org.apache.spark.sql.catalyst.optimizer.TransposeWindow
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.exchange.Exchange
-import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
+import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -31,7 +32,9 @@ import org.apache.spark.sql.types._
 /**
  * Window function testing for DataFrame API.
  */
-class DataFrameWindowFunctionsSuite extends QueryTest with SharedSparkSession {
+class DataFrameWindowFunctionsSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper{
 
   import testImplicits._
 
@@ -412,6 +415,42 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSparkSession {
         Row("b", 2, 4, 8)))
   }
 
+  test("window function with aggregator") {
+    val agg = udaf(new Aggregator[(Long, Long), Long, Long] {
+      def zero: Long = 0L
+      def reduce(b: Long, a: (Long, Long)): Long = b + (a._1 * a._2)
+      def merge(b1: Long, b2: Long): Long = b1 + b2
+      def finish(r: Long): Long = r
+      def bufferEncoder: Encoder[Long] = Encoders.scalaLong
+      def outputEncoder: Encoder[Long] = Encoders.scalaLong
+    })
+
+    val df = Seq(
+      ("a", 1, 1),
+      ("a", 1, 5),
+      ("a", 2, 10),
+      ("a", 2, -1),
+      ("b", 4, 7),
+      ("b", 3, 8),
+      ("b", 2, 4))
+      .toDF("key", "a", "b")
+    val window = Window.partitionBy($"key").orderBy($"a").rangeBetween(Long.MinValue, 0L)
+    checkAnswer(
+      df.select(
+        $"key",
+        $"a",
+        $"b",
+        agg($"a", $"b").over(window)),
+      Seq(
+        Row("a", 1, 1, 6),
+        Row("a", 1, 5, 6),
+        Row("a", 2, 10, 24),
+        Row("a", 2, -1, 24),
+        Row("b", 4, 7, 60),
+        Row("b", 3, 8, 32),
+        Row("b", 2, 4, 8)))
+  }
+
   test("null inputs") {
     val df = Seq(("a", 1), ("a", 1), ("a", 2), ("a", 2), ("b", 4), ("b", 3), ("b", 2))
       .toDF("key", "value")
@@ -633,20 +672,20 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSparkSession {
       assert(thrownException.message.contains("window functions inside WHERE and HAVING clauses"))
     }
 
-    checkAnalysisError(testData2.select('a).where(rank().over(Window.orderBy('b)) === 1))
-    checkAnalysisError(testData2.where('b === 2 && rank().over(Window.orderBy('b)) === 1))
+    checkAnalysisError(testData2.select("a").where(rank().over(Window.orderBy($"b")) === 1))
+    checkAnalysisError(testData2.where($"b" === 2 && rank().over(Window.orderBy($"b")) === 1))
     checkAnalysisError(
-      testData2.groupBy('a)
-        .agg(avg('b).as("avgb"))
-        .where('a > 'avgb && rank().over(Window.orderBy('a)) === 1))
+      testData2.groupBy($"a")
+        .agg(avg($"b").as("avgb"))
+        .where($"a" > $"avgb" && rank().over(Window.orderBy($"a")) === 1))
     checkAnalysisError(
-      testData2.groupBy('a)
-        .agg(max('b).as("maxb"), sum('b).as("sumb"))
-        .where(rank().over(Window.orderBy('a)) === 1))
+      testData2.groupBy($"a")
+        .agg(max($"b").as("maxb"), sum($"b").as("sumb"))
+        .where(rank().over(Window.orderBy($"a")) === 1))
     checkAnalysisError(
-      testData2.groupBy('a)
-        .agg(max('b).as("maxb"), sum('b).as("sumb"))
-        .where('sumb === 5 && rank().over(Window.orderBy('a)) === 1))
+      testData2.groupBy($"a")
+        .agg(max($"b").as("maxb"), sum($"b").as("sumb"))
+        .where($"sumb" === 5 && rank().over(Window.orderBy($"a")) === 1))
 
     checkAnalysisError(sql("SELECT a FROM testData2 WHERE RANK() OVER(ORDER BY b) = 1"))
     checkAnalysisError(sql("SELECT * FROM testData2 WHERE b = 2 AND RANK() OVER(ORDER BY b) = 1"))
@@ -680,7 +719,7 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSparkSession {
           .select($"sno", $"pno", $"qty", col("sum_qty_2"), sum("qty").over(w1).alias("sum_qty_1"))
 
         val expectedNumExchanges = if (transposeWindowEnabled) 1 else 2
-        val actualNumExchanges = select.queryExecution.executedPlan.collect {
+        val actualNumExchanges = stripAQEPlan(select.queryExecution.executedPlan).collect {
           case e: Exchange => e
         }.length
         assert(actualNumExchanges == expectedNumExchanges)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
new file mode 100644
index 0000000000000..cd157086a8b8e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -0,0 +1,637 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.sql.Timestamp
+
+import scala.collection.JavaConverters._
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
+import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
+import org.apache.spark.sql.types.TimestampType
+import org.apache.spark.sql.util.QueryExecutionListener
+import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.Utils
+
+class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with BeforeAndAfter {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import org.apache.spark.sql.functions._
+  import testImplicits._
+
+  private def catalog(name: String): TableCatalog = {
+    spark.sessionState.catalogManager.catalog(name).asTableCatalog
+  }
+
+  private val defaultOwnership = Map(TableCatalog.PROP_OWNER -> Utils.getCurrentUserName())
+
+  before {
+    spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+
+    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+    df.createOrReplaceTempView("source")
+    val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
+    df2.createOrReplaceTempView("source2")
+  }
+
+  after {
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.clear()
+  }
+
+  test("DataFrameWriteV2 encode identifiers correctly") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    var plan: LogicalPlan = null
+    val listener = new QueryExecutionListener {
+      override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+        plan = qe.analyzed
+
+      }
+      override def onFailure(funcName: String, qe: QueryExecution, error: Throwable): Unit = {}
+    }
+    spark.listenerManager.register(listener)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+    sparkContext.listenerBus.waitUntilEmpty()
+    assert(plan.isInstanceOf[AppendData])
+    checkV2Identifiers(plan.asInstanceOf[AppendData].table)
+
+    spark.table("source").writeTo("testcat.table_name").overwrite(lit(true))
+    sparkContext.listenerBus.waitUntilEmpty()
+    assert(plan.isInstanceOf[OverwriteByExpression])
+    checkV2Identifiers(plan.asInstanceOf[OverwriteByExpression].table)
+
+    spark.table("source").writeTo("testcat.table_name").overwritePartitions()
+    sparkContext.listenerBus.waitUntilEmpty()
+    assert(plan.isInstanceOf[OverwritePartitionsDynamic])
+    checkV2Identifiers(plan.asInstanceOf[OverwritePartitionsDynamic].table)
+  }
+
+  private def checkV2Identifiers(
+      plan: LogicalPlan,
+      identifier: String = "table_name",
+      catalogPlugin: TableCatalog = catalog("testcat")): Unit = {
+    assert(plan.isInstanceOf[DataSourceV2Relation])
+    val v2 = plan.asInstanceOf[DataSourceV2Relation]
+    assert(v2.identifier.exists(_.name() == identifier))
+    assert(v2.catalog.exists(_ == catalogPlugin))
+  }
+
+  test("Append: basic append") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
+  test("Append: by name not position") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    val exc = intercept[AnalysisException] {
+      spark.table("source").withColumnRenamed("data", "d").writeTo("testcat.table_name").append()
+    }
+
+    assert(exc.getMessage.contains("Cannot find data for output column"))
+    assert(exc.getMessage.contains("'data'"))
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq())
+  }
+
+  test("Append: fail if table does not exist") {
+    val exc = intercept[NoSuchTableException] {
+      spark.table("source").writeTo("testcat.table_name").append()
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+  }
+
+  test("Overwrite: overwrite by expression: true") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").writeTo("testcat.table_name").overwrite(lit(true))
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
+  test("Overwrite: overwrite by expression: id = 3") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").writeTo("testcat.table_name").overwrite($"id" === 3)
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
+  test("Overwrite: by name not position") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    val exc = intercept[AnalysisException] {
+      spark.table("source").withColumnRenamed("data", "d")
+          .writeTo("testcat.table_name").overwrite(lit(true))
+    }
+
+    assert(exc.getMessage.contains("Cannot find data for output column"))
+    assert(exc.getMessage.contains("'data'"))
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq())
+  }
+
+  test("Overwrite: fail if table does not exist") {
+    val exc = intercept[NoSuchTableException] {
+      spark.table("source").writeTo("testcat.table_name").overwrite(lit(true))
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+  }
+
+  test("OverwritePartitions: overwrite conflicting partitions") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").withColumn("id", $"id" - 2)
+        .writeTo("testcat.table_name").overwritePartitions()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "d"), Row(3L, "e"), Row(4L, "f")))
+  }
+
+  test("OverwritePartitions: overwrite all rows if not partitioned") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").writeTo("testcat.table_name").overwritePartitions()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
+  test("OverwritePartitions: by name not position") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    val exc = intercept[AnalysisException] {
+      spark.table("source").withColumnRenamed("data", "d")
+          .writeTo("testcat.table_name").overwritePartitions()
+    }
+
+    assert(exc.getMessage.contains("Cannot find data for output column"))
+    assert(exc.getMessage.contains("'data'"))
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq())
+  }
+
+  test("OverwritePartitions: fail if table does not exist") {
+    val exc = intercept[NoSuchTableException] {
+      spark.table("source").writeTo("testcat.table_name").overwritePartitions()
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+  }
+
+  test("Create: basic behavior") {
+    spark.table("source").writeTo("testcat.table_name").create()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == defaultOwnership.asJava)
+  }
+
+  test("Create: with using") {
+    spark.table("source").writeTo("testcat.table_name").using("foo").create()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning.isEmpty)
+    assert(table.properties === (Map("provider" -> "foo") ++ defaultOwnership).asJava)
+  }
+
+  test("Create: with property") {
+    spark.table("source").writeTo("testcat.table_name").tableProperty("prop", "value").create()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning.isEmpty)
+    assert(table.properties === (Map("prop" -> "value") ++ defaultOwnership).asJava)
+  }
+
+  test("Create: identity partitioned table") {
+    spark.table("source").writeTo("testcat.table_name").partitionedBy($"id").create()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(table.properties == defaultOwnership.asJava)
+  }
+
+  test("Create: partitioned by years(ts)") {
+    spark.table("source")
+        .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp"))
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(years($"ts"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(YearsTransform(FieldReference("ts"))))
+  }
+
+  test("Create: partitioned by months(ts)") {
+    spark.table("source")
+        .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp"))
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(months($"ts"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(MonthsTransform(FieldReference("ts"))))
+  }
+
+  test("Create: partitioned by days(ts)") {
+    spark.table("source")
+        .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp"))
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(days($"ts"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(DaysTransform(FieldReference("ts"))))
+  }
+
+  test("Create: partitioned by hours(ts)") {
+    spark.table("source")
+        .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp"))
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(hours($"ts"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(HoursTransform(FieldReference("ts"))))
+  }
+
+  test("Create: partitioned by bucket(4, id)") {
+    spark.table("source")
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(bucket(4, $"id"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning ===
+        Seq(BucketTransform(LiteralValue(4, IntegerType), Seq(FieldReference("id")))))
+  }
+
+  test("Create: fail if table already exists") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+    val exc = intercept[TableAlreadyExistsException] {
+      spark.table("source").writeTo("testcat.table_name").create()
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // table should not have been changed
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(table.properties === (Map("provider" -> "foo") ++ defaultOwnership).asJava)
+  }
+
+  test("Replace: basic behavior") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+    spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source")
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the initial table
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(table.properties === (Map("provider" -> "foo") ++ defaultOwnership).asJava)
+
+    spark.table("source2")
+        .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd"))
+        .writeTo("testcat.table_name").replace()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even")))
+
+    val replaced = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the replacement table
+    assert(replaced.name === "testcat.table_name")
+    assert(replaced.schema === new StructType()
+        .add("id", LongType)
+        .add("data", StringType)
+        .add("even_or_odd", StringType))
+    assert(replaced.partitioning.isEmpty)
+    assert(replaced.properties === defaultOwnership.asJava)
+  }
+
+  test("Replace: partitioned table") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+    spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source")
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the initial table
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning.isEmpty)
+    assert(table.properties === (Map("provider" -> "foo") ++ defaultOwnership).asJava)
+
+    spark.table("source2")
+        .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd"))
+        .writeTo("testcat.table_name").partitionedBy($"id").replace()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even")))
+
+    val replaced = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the replacement table
+    assert(replaced.name === "testcat.table_name")
+    assert(replaced.schema === new StructType()
+        .add("id", LongType)
+        .add("data", StringType)
+        .add("even_or_odd", StringType))
+    assert(replaced.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(replaced.properties === defaultOwnership.asJava)
+  }
+
+  test("Replace: fail if table does not exist") {
+    val exc = intercept[CannotReplaceMissingTableException] {
+      spark.table("source").writeTo("testcat.table_name").replace()
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+  }
+
+  test("CreateOrReplace: table does not exist") {
+    spark.table("source2").writeTo("testcat.table_name").createOrReplace()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+
+    val replaced = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the replacement table
+    assert(replaced.name === "testcat.table_name")
+    assert(replaced.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(replaced.partitioning.isEmpty)
+    assert(replaced.properties === defaultOwnership.asJava)
+  }
+
+  test("CreateOrReplace: table exists") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+    spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source")
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the initial table
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(table.properties === (Map("provider" -> "foo") ++ defaultOwnership).asJava)
+
+    spark.table("source2")
+        .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd"))
+        .writeTo("testcat.table_name").createOrReplace()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even")))
+
+    val replaced = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the replacement table
+    assert(replaced.name === "testcat.table_name")
+    assert(replaced.schema === new StructType()
+        .add("id", LongType)
+        .add("data", StringType)
+        .add("even_or_odd", StringType))
+    assert(replaced.partitioning.isEmpty)
+    assert(replaced.properties === defaultOwnership.asJava)
+  }
+
+  test("SPARK-30289 Create: partitioned by nested column") {
+    val schema = new StructType().add("ts", new StructType()
+      .add("created", TimestampType)
+      .add("modified", TimestampType)
+      .add("timezone", StringType))
+
+    val data = Seq(
+      Row(Row(Timestamp.valueOf("2019-06-01 10:00:00"), Timestamp.valueOf("2019-09-02 07:00:00"),
+        "America/Los_Angeles")),
+      Row(Row(Timestamp.valueOf("2019-08-26 18:00:00"), Timestamp.valueOf("2019-09-26 18:00:00"),
+        "America/Los_Angeles")),
+      Row(Row(Timestamp.valueOf("2018-11-23 18:00:00"), Timestamp.valueOf("2018-12-22 18:00:00"),
+        "America/New_York")))
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(data, 1), schema)
+
+    df.writeTo("testcat.table_name")
+      .partitionedBy($"ts.timezone")
+      .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+      .asInstanceOf[InMemoryTable]
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(IdentityTransform(FieldReference(Array("ts", "timezone")))))
+    checkAnswer(spark.table(table.name), data)
+    assert(table.dataMap.toArray.length == 2)
+    assert(table.dataMap(Seq(UTF8String.fromString("America/Los_Angeles"))).rows.size == 2)
+    assert(table.dataMap(Seq(UTF8String.fromString("America/New_York"))).rows.size == 1)
+
+    // TODO: `DataSourceV2Strategy` can not translate nested fields into source filter yet
+    // so the following sql will fail.
+    // sql("DELETE FROM testcat.table_name WHERE ts.timezone = \"America/Los_Angeles\"")
+  }
+
+  test("SPARK-30289 Create: partitioned by multiple transforms on nested columns") {
+    spark.table("source")
+      .withColumn("ts", struct(
+        lit("2019-06-01 10:00:00.000000").cast("timestamp") as "created",
+        lit("2019-09-02 07:00:00.000000").cast("timestamp") as "modified",
+        lit("America/Los_Angeles") as "timezone"))
+      .writeTo("testcat.table_name")
+      .tableProperty("allow-unsupported-transforms", "true")
+      .partitionedBy(
+        years($"ts.created"), months($"ts.created"), days($"ts.created"), hours($"ts.created"),
+        years($"ts.modified"), months($"ts.modified"), days($"ts.modified"), hours($"ts.modified")
+      )
+      .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(
+      YearsTransform(FieldReference(Array("ts", "created"))),
+      MonthsTransform(FieldReference(Array("ts", "created"))),
+      DaysTransform(FieldReference(Array("ts", "created"))),
+      HoursTransform(FieldReference(Array("ts", "created"))),
+      YearsTransform(FieldReference(Array("ts", "modified"))),
+      MonthsTransform(FieldReference(Array("ts", "modified"))),
+      DaysTransform(FieldReference(Array("ts", "modified"))),
+      HoursTransform(FieldReference(Array("ts", "modified")))))
+  }
+
+  test("SPARK-30289 Create: partitioned by bucket(4, ts.timezone)") {
+    spark.table("source")
+      .withColumn("ts", struct(
+        lit("2019-06-01 10:00:00.000000").cast("timestamp") as "created",
+        lit("2019-09-02 07:00:00.000000").cast("timestamp") as "modified",
+        lit("America/Los_Angeles") as "timezone"))
+      .writeTo("testcat.table_name")
+      .tableProperty("allow-unsupported-transforms", "true")
+      .partitionedBy(bucket(4, $"ts.timezone"))
+      .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(BucketTransform(LiteralValue(4, IntegerType),
+      Seq(FieldReference(Seq("ts", "timezone"))))))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
index 817387b2845f5..6ffe133ee652b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
@@ -19,12 +19,10 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.expressions.Aggregator
-import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType, StructType}
 
-
 object ComplexResultAgg extends Aggregator[(String, Int), (Long, Long), (Long, Long)] {
   override def zero: (Long, Long) = (0, 0)
   override def reduce(countAndSum: (Long, Long), input: (String, Int)): (Long, Long) = {
@@ -226,25 +224,6 @@ class DatasetAggregatorSuite extends QueryTest with SharedSparkSession {
 
   private implicit val ordering = Ordering.by((c: AggData) => c.a -> c.b)
 
-  test("typed aggregation: TypedAggregator") {
-    val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
-
-    checkDataset(
-      ds.groupByKey(_._1).agg(typed.sum(_._2)),
-      ("a", 30.0), ("b", 3.0), ("c", 1.0))
-  }
-
-  test("typed aggregation: TypedAggregator, expr, expr") {
-    val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
-
-    checkDataset(
-      ds.groupByKey(_._1).agg(
-        typed.sum(_._2),
-        expr("sum(_2)").as[Long],
-        count("*")),
-      ("a", 30.0, 30L, 2L), ("b", 3.0, 3L, 2L), ("c", 1.0, 1L, 1L))
-  }
-
   test("typed aggregation: complex result type") {
     val ds = Seq("a" -> 1, "a" -> 3, "b" -> 3).toDS()
 
@@ -255,17 +234,6 @@ class DatasetAggregatorSuite extends QueryTest with SharedSparkSession {
       ("a", 2.0, (2L, 4L)), ("b", 3.0, (1L, 3L)))
   }
 
-  test("typed aggregation: in project list") {
-    val ds = Seq(1, 3, 2, 5).toDS()
-
-    checkDataset(
-      ds.select(typed.sum((i: Int) => i)),
-      11.0)
-    checkDataset(
-      ds.select(typed.sum((i: Int) => i), typed.sum((i: Int) => i * 2)),
-      11.0 -> 22.0)
-  }
-
   test("typed aggregation: class input") {
     val ds = Seq(AggData(1, "one"), AggData(2, "two")).toDS()
 
@@ -315,14 +283,6 @@ class DatasetAggregatorSuite extends QueryTest with SharedSparkSession {
       ("one", 1), ("two", 1))
   }
 
-  test("typed aggregate: avg, count, sum") {
-    val ds = Seq("a" -> 1, "a" -> 3, "b" -> 3).toDS()
-    checkDataset(
-      ds.groupByKey(_._1).agg(
-        typed.avg(_._2), typed.count(_._2), typed.sum(_._2), typed.sumLong(_._2)),
-      ("a", 2.0, 2L, 4.0, 4L), ("b", 3.0, 1L, 3.0, 3L))
-  }
-
   test("generic typed sum") {
     val ds = Seq("a" -> 1, "a" -> 3, "b" -> 3).toDS()
     checkDataset(
@@ -366,18 +326,6 @@ class DatasetAggregatorSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df2.agg(RowAgg.toColumn as "b").select("b"), Row(6) :: Nil)
   }
 
-  test("spark-15114 shorter system generated alias names") {
-    val ds = Seq(1, 3, 2, 5).toDS()
-    assert(ds.select(typed.sum((i: Int) => i)).columns.head === "TypedSumDouble(int)")
-    val ds2 = ds.select(typed.sum((i: Int) => i), typed.avg((i: Int) => i))
-    assert(ds2.columns.head === "TypedSumDouble(int)")
-    assert(ds2.columns.last === "TypedAverage(int)")
-    val df = Seq(1 -> "a", 2 -> "b", 3 -> "b").toDF("i", "j")
-    assert(df.groupBy($"j").agg(RowAgg.toColumn).columns.last ==
-      "RowAgg(org.apache.spark.sql.Row)")
-    assert(df.groupBy($"j").agg(RowAgg.toColumn as "agg1").columns.last == "agg1")
-  }
-
   test("SPARK-15814 Aggregator can return null result") {
     val ds = Seq(AggData(1, "one"), AggData(2, "two")).toDS()
     checkDatasetUnorderly(
@@ -390,15 +338,6 @@ class DatasetAggregatorSuite extends QueryTest with SharedSparkSession {
     checkDataset(ds.select(MapTypeBufferAgg.toColumn), 1)
   }
 
-  test("SPARK-15204 improve nullability inference for Aggregator") {
-    val ds1 = Seq(1, 3, 2, 5).toDS()
-    assert(ds1.select(typed.sum((i: Int) => i)).schema.head.nullable === false)
-    val ds2 = Seq(AggData(1, "a"), AggData(2, "a")).toDS()
-    assert(ds2.select(SeqAgg.toColumn).schema.head.nullable)
-    val ds3 = sql("SELECT 'Some String' AS b, 1279869254 AS a").as[AggData]
-    assert(ds3.select(NameAgg.toColumn).schema.head.nullable)
-  }
-
   test("SPARK-18147: very complex aggregator result type") {
     val df = Seq(1 -> "a", 2 -> "b", 2 -> "c").toDF("i", "j")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
index dba906f63aed4..e47a6a68a0a9c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
@@ -55,7 +55,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.map(func)
         i += 1
       }
-      res.foreach(_ => Unit)
+      res.foreach(_ => ())
     }
 
     benchmark.addCase("DataFrame") { iter =>
@@ -65,7 +65,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.select($"l" + 1 as "l")
         i += 1
       }
-      res.queryExecution.toRdd.foreach(_ => Unit)
+      res.queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark.addCase("Dataset") { iter =>
@@ -75,7 +75,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.map(func)
         i += 1
       }
-      res.queryExecution.toRdd.foreach(_ => Unit)
+      res.queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark
@@ -96,7 +96,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.map(func)
         i += 1
       }
-      res.foreach(_ => Unit)
+      res.foreach(_ => ())
     }
 
     benchmark.addCase("DataFrame") { iter =>
@@ -106,7 +106,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.select($"l" + 1 as "l", $"s")
         i += 1
       }
-      res.queryExecution.toRdd.foreach(_ => Unit)
+      res.queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark.addCase("Dataset") { iter =>
@@ -116,7 +116,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.map(func)
         i += 1
       }
-      res.queryExecution.toRdd.foreach(_ => Unit)
+      res.queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark
@@ -139,7 +139,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.filter(func)
         i += 1
       }
-      res.foreach(_ => Unit)
+      res.foreach(_ => ())
     }
 
     benchmark.addCase("DataFrame") { iter =>
@@ -149,7 +149,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.filter($"l" % 2L === 0L)
         i += 1
       }
-      res.queryExecution.toRdd.foreach(_ => Unit)
+      res.queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark.addCase("Dataset") { iter =>
@@ -159,7 +159,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.filter(func)
         i += 1
       }
-      res.queryExecution.toRdd.foreach(_ => Unit)
+      res.queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark
@@ -183,7 +183,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.filter(funcs(i))
         i += 1
       }
-      res.foreach(_ => Unit)
+      res.foreach(_ => ())
     }
 
     benchmark.addCase("DataFrame") { iter =>
@@ -193,7 +193,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.filter($"l" % (100L + i) === 0L)
         i += 1
       }
-      res.queryExecution.toRdd.foreach(_ => Unit)
+      res.queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark.addCase("Dataset") { iter =>
@@ -203,7 +203,7 @@ object DatasetBenchmark extends SqlBasedBenchmark {
         res = res.filter(funcs(i))
         i += 1
       }
-      res.queryExecution.toRdd.foreach(_ => Unit)
+      res.queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark
@@ -235,15 +235,15 @@ object DatasetBenchmark extends SqlBasedBenchmark {
     }
 
     benchmark.addCase("DataFrame sum") { iter =>
-      df.select(sum($"l")).queryExecution.toRdd.foreach(_ => Unit)
+      df.select(sum($"l")).queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark.addCase("Dataset sum using Aggregator") { iter =>
-      df.as[Data].select(typed.sumLong((d: Data) => d.l)).queryExecution.toRdd.foreach(_ => Unit)
+      df.as[Data].select(typed.sumLong((d: Data) => d.l)).queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark.addCase("Dataset complex Aggregator") { iter =>
-      df.as[Data].select(ComplexAggregator.toColumn).queryExecution.toRdd.foreach(_ => Unit)
+      df.as[Data].select(ComplexAggregator.toColumn).queryExecution.toRdd.foreach(_ => ())
     }
 
     benchmark
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
index 33d9def0b44e5..5c144dad23c30 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
@@ -20,13 +20,17 @@ package org.apache.spark.sql
 import org.scalatest.concurrent.TimeLimits
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.storage.StorageLevel
 
 
-class DatasetCacheSuite extends QueryTest with SharedSparkSession with TimeLimits {
+class DatasetCacheSuite extends QueryTest
+  with SharedSparkSession
+  with TimeLimits
+  with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   /**
@@ -36,7 +40,8 @@ class DatasetCacheSuite extends QueryTest with SharedSparkSession with TimeLimit
     val plan = df.queryExecution.withCachedData
     assert(plan.isInstanceOf[InMemoryRelation])
     val internalPlan = plan.asInstanceOf[InMemoryRelation].cacheBuilder.cachedPlan
-    assert(internalPlan.find(_.isInstanceOf[InMemoryTableScanExec]).size == numOfCachesDependedUpon)
+    assert(find(internalPlan)(_.isInstanceOf[InMemoryTableScanExec]).size
+      == numOfCachesDependedUpon)
   }
 
   test("get storage level") {
@@ -97,7 +102,7 @@ class DatasetCacheSuite extends QueryTest with SharedSparkSession with TimeLimit
   test("persist and then groupBy columns asKey, map") {
     val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
     val grouped = ds.groupByKey(_._1)
-    val agged = grouped.mapGroups { case (g, iter) => (g, iter.map(_._2).sum) }
+    val agged = grouped.mapGroups { (g, iter) => (g, iter.map(_._2).sum) }
     agged.persist()
 
     checkDataset(
@@ -158,8 +163,8 @@ class DatasetCacheSuite extends QueryTest with SharedSparkSession with TimeLimit
 
   test("SPARK-24596 Non-cascading Cache Invalidation") {
     val df = Seq(("a", 1), ("b", 2)).toDF("s", "i")
-    val df2 = df.filter('i > 1)
-    val df3 = df.filter('i < 2)
+    val df2 = df.filter($"i" > 1)
+    val df3 = df.filter($"i" < 2)
 
     df2.cache()
     df.cache()
@@ -178,8 +183,8 @@ class DatasetCacheSuite extends QueryTest with SharedSparkSession with TimeLimit
     val expensiveUDF = udf({ x: Int => Thread.sleep(5000); x })
     val df = spark.range(0, 5).toDF("a")
     val df1 = df.withColumn("b", expensiveUDF($"a"))
-    val df2 = df1.groupBy('a).agg(sum('b))
-    val df3 = df.agg(sum('a))
+    val df2 = df1.groupBy($"a").agg(sum($"b"))
+    val df3 = df.agg(sum($"a"))
 
     df1.cache()
     df2.cache()
@@ -192,16 +197,16 @@ class DatasetCacheSuite extends QueryTest with SharedSparkSession with TimeLimit
 
     // df1 un-cached; df2's cache plan stays the same
     assert(df1.storageLevel == StorageLevel.NONE)
-    assertCacheDependency(df1.groupBy('a).agg(sum('b)))
+    assertCacheDependency(df1.groupBy($"a").agg(sum($"b")))
 
-    val df4 = df1.groupBy('a).agg(sum('b)).agg(sum("sum(b)"))
+    val df4 = df1.groupBy($"a").agg(sum($"b")).agg(sum("sum(b)"))
     assertCached(df4)
     // reuse loaded cache
     failAfter(3.seconds) {
       checkDataset(df4, Row(10))
     }
 
-    val df5 = df.agg(sum('a)).filter($"sum(a)" > 1)
+    val df5 = df.agg(sum($"a")).filter($"sum(a)" > 1)
     assertCached(df5)
     // first time use, load cache
     checkDataset(df5, Row(10))
@@ -209,8 +214,8 @@ class DatasetCacheSuite extends QueryTest with SharedSparkSession with TimeLimit
 
   test("SPARK-26708 Cache data and cached plan should stay consistent") {
     val df = spark.range(0, 5).toDF("a")
-    val df1 = df.withColumn("b", 'a + 1)
-    val df2 = df.filter('a > 1)
+    val df1 = df.withColumn("b", $"a" + 1)
+    val df2 = df.filter($"a" > 1)
 
     df.cache()
     // Add df1 to the CacheManager; the buffer is currently empty.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
index 892122b94b977..0ac99905f35f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
@@ -181,15 +181,6 @@ class DatasetOptimizationSuite extends QueryTest with SharedSparkSession {
       // codegen cache should work for Datasets of same type.
       val count3 = getCodegenCount()
       assert(count3 == count2)
-
-      withSQLConf(SQLConf.OPTIMIZER_REASSIGN_LAMBDA_VARIABLE_ID.key -> "false") {
-        // trigger codegen for another Dataset of same type
-        createDataset().collect()
-        // with the rule disabled, codegen happens again for encoder serializer and encoder
-        // deserializer
-        val count4 = getCodegenCount()
-        assert(count4 == (count3 + 2))
-      }
     }
 
     withClue("array type") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index 91a8f0a26b360..124b58483d24f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -171,7 +171,7 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSparkSession {
   test("groupBy function, map") {
     val ds = Seq(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11).toDS()
     val grouped = ds.groupByKey(_ % 2)
-    val agged = grouped.mapGroups { case (g, iter) =>
+    val agged = grouped.mapGroups { (g, iter) =>
       val name = if (g == 0) "even" else "odd"
       (name, iter.size)
     }
@@ -184,7 +184,7 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSparkSession {
   test("groupBy function, flatMap") {
     val ds = Seq("a", "b", "c", "xyz", "hello").toDS()
     val grouped = ds.groupByKey(_.length)
-    val agged = grouped.flatMapGroups { case (g, iter) => Iterator(g.toString, iter.mkString) }
+    val agged = grouped.flatMapGroups { (g, iter) => Iterator(g.toString, iter.mkString) }
 
     checkDatasetUnorderly(
       agged,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 42e5ee58954e8..b0bd612e88d98 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -20,7 +20,9 @@ package org.apache.spark.sql
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.sql.{Date, Timestamp}
 
+import org.scalatest.Assertions._
 import org.scalatest.exceptions.TestFailedException
+import org.scalatest.prop.TableDrivenPropertyChecks._
 
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.sql.catalyst.ScroogeLikeExample
@@ -28,6 +30,7 @@ import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.expressions.UserDefinedFunction
@@ -49,7 +52,9 @@ object TestForTypeAlias {
   def seqOfTupleTypeAlias: SeqOfTwoInt = Seq((1, 1), (2, 2))
 }
 
-class DatasetSuite extends QueryTest with SharedSparkSession {
+class DatasetSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   private implicit val ordering = Ordering.by((c: ClassData) => c.a -> c.b)
@@ -194,6 +199,11 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
     assert(ds.take(2) === Array(ClassData("a", 1), ClassData("b", 2)))
   }
 
+  test("as case class - tail") {
+    val ds = Seq((1, "a"), (2, "b"), (3, "c")).toDF("b", "a").as[ClassData]
+    assert(ds.tail(2) === Array(ClassData("b", 2), ClassData("c", 3)))
+  }
+
   test("as seq of case class - reorder fields by name") {
     val df = spark.range(3).select(array(struct($"id".cast("int").as("b"), lit("a").as("a"))))
     val ds = df.as[Seq[ClassData]]
@@ -518,7 +528,7 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
   test("groupBy function, map") {
     val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
     val grouped = ds.groupByKey(v => (v._1, "word"))
-    val agged = grouped.mapGroups { case (g, iter) => (g._1, iter.map(_._2).sum) }
+    val agged = grouped.mapGroups { (g, iter) => (g._1, iter.map(_._2).sum) }
 
     checkDatasetUnorderly(
       agged,
@@ -528,7 +538,7 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
   test("groupBy function, flatMap") {
     val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
     val grouped = ds.groupByKey(v => (v._1, "word"))
-    val agged = grouped.flatMapGroups { case (g, iter) =>
+    val agged = grouped.flatMapGroups { (g, iter) =>
       Iterator(g._1, iter.map(_._2).sum.toString)
     }
 
@@ -540,11 +550,11 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
   test("groupBy function, mapValues, flatMap") {
     val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
     val keyValue = ds.groupByKey(_._1).mapValues(_._2)
-    val agged = keyValue.mapGroups { case (g, iter) => (g, iter.sum) }
+    val agged = keyValue.mapGroups { (g, iter) => (g, iter.sum) }
     checkDataset(agged, ("a", 30), ("b", 3), ("c", 1))
 
     val keyValue1 = ds.groupByKey(t => (t._1, "key")).mapValues(t => (t._2, "value"))
-    val agged1 = keyValue1.mapGroups { case (g, iter) => (g._1, iter.map(_._1).sum) }
+    val agged1 = keyValue1.mapGroups { (g, iter) => (g._1, iter.map(_._1).sum) }
     checkDataset(agged1, ("a", 30), ("b", 3), ("c", 1))
   }
 
@@ -905,7 +915,7 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
   test("grouping key and grouped value has field with same name") {
     val ds = Seq(ClassData("a", 1), ClassData("a", 2)).toDS()
     val agged = ds.groupByKey(d => ClassNullableData(d.a, null)).mapGroups {
-      case (key, values) => key.a + values.map(_.b).sum
+      (key, values) => key.a + values.map(_.b).sum
     }
 
     checkDataset(agged, "a3")
@@ -978,7 +988,7 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-14554: Dataset.map may generate wrong java code for wide table") {
-    val wideDF = spark.range(10).select(Seq.tabulate(1000) {i => ('id + i).as(s"c$i")} : _*)
+    val wideDF = spark.range(10).select(Seq.tabulate(1000) {i => ($"id" + i).as(s"c$i")} : _*)
     // Make sure the generated code for this plan can compile and execute.
     checkDataset(wideDF.map(_.getLong(0)), 0L until 10 : _*)
   }
@@ -1000,7 +1010,7 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
       .select("user", "item")
       .as[(Int, Int)]
       .groupByKey(_._1)
-      .mapGroups { case (src, ids) => (src, ids.map(_._2).toArray) }
+      .mapGroups { (src, ids) => (src, ids.map(_._2).toArray) }
       .toDF("id", "actual")
 
     dataset.join(actual, dataset("user") === actual("id")).collect()
@@ -1267,10 +1277,10 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
 
     checkDataset(
       df.withColumn("b", lit(0)).as[ClassData]
-        .groupByKey(_.a).flatMapGroups { case (x, iter) => List[Int]() })
+        .groupByKey(_.a).flatMapGroups { (_, _) => List[Int]() })
     checkDataset(
       df.withColumn("b", expr("0")).as[ClassData]
-        .groupByKey(_.a).flatMapGroups { case (x, iter) => List[Int]() })
+        .groupByKey(_.a).flatMapGroups { (_, _) => List[Int]() })
   }
 
   test("SPARK-18125: Spark generated code causes CompileException") {
@@ -1388,7 +1398,7 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
       }
 
       testCheckpointing("basic") {
-        val ds = spark.range(10).repartition('id % 2).filter('id > 5).orderBy('id.desc)
+        val ds = spark.range(10).repartition($"id" % 2).filter($"id" > 5).orderBy($"id".desc)
         val cp = if (reliable) ds.checkpoint(eager) else ds.localCheckpoint(eager)
 
         val logicalRDD = cp.logicalPlan match {
@@ -1423,10 +1433,10 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
       }
 
       testCheckpointing("should preserve partitioning information") {
-        val ds = spark.range(10).repartition('id % 2)
+        val ds = spark.range(10).repartition($"id" % 2)
         val cp = if (reliable) ds.checkpoint(eager) else ds.localCheckpoint(eager)
 
-        val agg = cp.groupBy('id % 2).agg(count('id))
+        val agg = cp.groupBy($"id" % 2).agg(count($"id"))
 
         agg.queryExecution.executedPlan.collectFirst {
           case ShuffleExchangeExec(_, _: RDDScanExec, _) =>
@@ -1438,7 +1448,7 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
           )
         }
 
-        checkAnswer(agg, ds.groupBy('id % 2).agg(count('id)))
+        checkAnswer(agg, ds.groupBy($"id" % 2).agg(count($"id")))
       }
     }
   }
@@ -1536,11 +1546,9 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.sort("id"), expected)
     checkAnswer(df.sort(col("id")), expected)
     checkAnswer(df.sort($"id"), expected)
-    checkAnswer(df.sort('id), expected)
     checkAnswer(df.orderBy("id"), expected)
     checkAnswer(df.orderBy(col("id")), expected)
     checkAnswer(df.orderBy($"id"), expected)
-    checkAnswer(df.orderBy('id), expected)
   }
 
   test("SPARK-21567: Dataset should work with type alias") {
@@ -1695,7 +1703,7 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-24571: filtering of string values by char literal") {
     val df = Seq("Amsterdam", "San Francisco", "X").toDF("city")
-    checkAnswer(df.where('city === 'X'), Seq(Row("X")))
+    checkAnswer(df.where($"city" === 'X'), Seq(Row("X")))
     checkAnswer(
       df.where($"city".contains(java.lang.Character.valueOf('A'))),
       Seq(Row("Amsterdam")))
@@ -1841,6 +1849,66 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
     val instant = java.time.Instant.parse("2019-03-30T09:54:00Z")
     assert(spark.range(1).map { _ => instant }.head === instant)
   }
+
+  val dotColumnTestModes = Table(
+    ("caseSensitive", "colName"),
+    ("true", "field.1"),
+    ("false", "Field.1")
+  )
+
+  test("SPARK-25153: Improve error messages for columns with dots/periods") {
+    forAll(dotColumnTestModes) { (caseSensitive, colName) =>
+      val ds = Seq(SpecialCharClass("1", "2")).toDS
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) {
+        val errorMsg = intercept[AnalysisException] {
+          ds(colName)
+        }
+        assert(errorMsg.getMessage.contains(s"did you mean to quote the `$colName` column?"))
+      }
+    }
+  }
+
+  test("groupBy.as") {
+    val df1 = Seq(DoubleData(1, "one"), DoubleData(2, "two"), DoubleData(3, "three")).toDS()
+      .repartition($"id").sortWithinPartitions("id")
+    val df2 = Seq(DoubleData(5, "one"), DoubleData(1, "two"), DoubleData(3, "three")).toDS()
+      .repartition($"id").sortWithinPartitions("id")
+
+    val df3 = df1.groupBy("id").as[Int, DoubleData]
+      .cogroup(df2.groupBy("id").as[Int, DoubleData]) { case (key, data1, data2) =>
+        if (key == 1) {
+          Iterator(DoubleData(key, (data1 ++ data2).foldLeft("")((cur, next) => cur + next.val1)))
+        } else Iterator.empty
+      }
+    checkDataset(df3, DoubleData(1, "onetwo"))
+
+    // Assert that no extra shuffle introduced by cogroup.
+    val exchanges = collect(df3.queryExecution.executedPlan) {
+      case h: ShuffleExchangeExec => h
+    }
+    assert(exchanges.size == 2)
+  }
+
+  test("tail with different numbers") {
+    Seq(0, 2, 5, 10, 50, 100, 1000).foreach { n =>
+      assert(spark.range(n).tail(6) === (math.max(n - 6, 0) until n))
+    }
+  }
+
+  test("tail should not accept minus value") {
+    val e = intercept[AnalysisException](spark.range(1).tail(-1))
+    e.getMessage.contains("tail expression must be equal to or greater than 0")
+  }
+
+  test("SparkSession.active should be the same instance after dataset operations") {
+    val active = SparkSession.getActiveSession.get
+    val clone = active.cloneSession()
+    val ds = new Dataset(clone, spark.range(10).queryExecution.logical, Encoders.INT)
+
+    ds.queryExecution.analyzed
+
+    assert(active eq SparkSession.getActiveSession.get)
+  }
 }
 
 object AssertExecutionId {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 2fef05f97e57c..ba45b9f9b62df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -19,14 +19,15 @@ package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.Instant
-import java.util.Locale
+import java.time.{Instant, LocalDateTime}
+import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.DoubleType
 import org.apache.spark.unsafe.types.CalendarInterval
 
 class DateFunctionsSuite extends QueryTest with SharedSparkSession {
@@ -95,15 +96,19 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("date format") {
-    val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
-
-    checkAnswer(
-      df.select(date_format($"a", "y"), date_format($"b", "y"), date_format($"c", "y")),
-      Row("2015", "2015", "2013"))
-
-    checkAnswer(
-      df.selectExpr("date_format(a, 'y')", "date_format(b, 'y')", "date_format(c, 'y')"),
-      Row("2015", "2015", "2013"))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
+
+        checkAnswer(
+          df.select(date_format($"a", "y"), date_format($"b", "y"), date_format($"c", "y")),
+          Row("2015", "2015", "2013"))
+
+        checkAnswer(
+          df.selectExpr("date_format(a, 'y')", "date_format(b, 'y')", "date_format(c, 'y')"),
+          Row("2015", "2015", "2013"))
+      }
+    }
   }
 
   test("year") {
@@ -289,15 +294,15 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     val t2 = Timestamp.valueOf("2015-12-31 00:00:00")
     val d1 = Date.valueOf("2015-07-31")
     val d2 = Date.valueOf("2015-12-31")
-    val i = new CalendarInterval(2, 2000000L)
+    val i = new CalendarInterval(2, 2, 2000000L)
     val df = Seq((1, t1, d1), (3, t2, d2)).toDF("n", "t", "d")
     checkAnswer(
-      df.selectExpr(s"d + $i"),
-      Seq(Row(Date.valueOf("2015-09-30")), Row(Date.valueOf("2016-02-29"))))
+      df.selectExpr(s"d + INTERVAL'${i.toString}'"),
+      Seq(Row(Date.valueOf("2015-10-02")), Row(Date.valueOf("2016-03-02"))))
     checkAnswer(
-      df.selectExpr(s"t + $i"),
-      Seq(Row(Timestamp.valueOf("2015-10-01 00:00:01")),
-        Row(Timestamp.valueOf("2016-02-29 00:00:02"))))
+      df.selectExpr(s"t + INTERVAL'${i.toString}'"),
+      Seq(Row(Timestamp.valueOf("2015-10-03 00:00:01")),
+        Row(Timestamp.valueOf("2016-03-02 00:00:02"))))
   }
 
   test("time_sub") {
@@ -305,15 +310,15 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     val t2 = Timestamp.valueOf("2016-02-29 00:00:02")
     val d1 = Date.valueOf("2015-09-30")
     val d2 = Date.valueOf("2016-02-29")
-    val i = new CalendarInterval(2, 2000000L)
+    val i = new CalendarInterval(2, 2, 2000000L)
     val df = Seq((1, t1, d1), (3, t2, d2)).toDF("n", "t", "d")
     checkAnswer(
-      df.selectExpr(s"d - $i"),
-      Seq(Row(Date.valueOf("2015-07-29")), Row(Date.valueOf("2015-12-28"))))
+      df.selectExpr(s"d - INTERVAL'${i.toString}'"),
+      Seq(Row(Date.valueOf("2015-07-27")), Row(Date.valueOf("2015-12-26"))))
     checkAnswer(
-      df.selectExpr(s"t - $i"),
-      Seq(Row(Timestamp.valueOf("2015-07-31 23:59:59")),
-        Row(Timestamp.valueOf("2015-12-29 00:00:00"))))
+      df.selectExpr(s"t - INTERVAL'${i.toString}'"),
+      Seq(Row(Timestamp.valueOf("2015-07-29 23:59:59")),
+        Row(Timestamp.valueOf("2015-12-27 00:00:00"))))
   }
 
   test("function add_months") {
@@ -524,170 +529,194 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("from_unixtime") {
-    val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
-    val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
-    val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
-    val fmt3 = "yy-MM-dd HH-mm-ss"
-    val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
-    val df = Seq((1000, "yyyy-MM-dd HH:mm:ss.SSS"), (-1000, "yy-MM-dd HH-mm-ss")).toDF("a", "b")
-    checkAnswer(
-      df.select(from_unixtime(col("a"))),
-      Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.select(from_unixtime(col("a"), fmt2)),
-      Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.select(from_unixtime(col("a"), fmt3)),
-      Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.selectExpr("from_unixtime(a)"),
-      Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.selectExpr(s"from_unixtime(a, '$fmt2')"),
-      Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000)))))
-    checkAnswer(
-      df.selectExpr(s"from_unixtime(a, '$fmt3')"),
-      Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000)))))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
+        val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
+        val sdf2 = new SimpleDateFormat(fmt2, Locale.US)
+        val fmt3 = "yy-MM-dd HH-mm-ss"
+        val sdf3 = new SimpleDateFormat(fmt3, Locale.US)
+        val df = Seq((1000, "yyyy-MM-dd HH:mm:ss.SSS"), (-1000, "yy-MM-dd HH-mm-ss")).toDF("a", "b")
+        checkAnswer(
+          df.select(from_unixtime(col("a"))),
+          Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.select(from_unixtime(col("a"), fmt2)),
+          Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.select(from_unixtime(col("a"), fmt3)),
+          Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.selectExpr("from_unixtime(a)"),
+          Seq(Row(sdf1.format(new Timestamp(1000000))), Row(sdf1.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.selectExpr(s"from_unixtime(a, '$fmt2')"),
+          Seq(Row(sdf2.format(new Timestamp(1000000))), Row(sdf2.format(new Timestamp(-1000000)))))
+        checkAnswer(
+          df.selectExpr(s"from_unixtime(a, '$fmt3')"),
+          Seq(Row(sdf3.format(new Timestamp(1000000))), Row(sdf3.format(new Timestamp(-1000000)))))
+      }
+    }
   }
 
   private def secs(millis: Long): Long = TimeUnit.MILLISECONDS.toSeconds(millis)
 
   test("unix_timestamp") {
-    val date1 = Date.valueOf("2015-07-24")
-    val date2 = Date.valueOf("2015-07-25")
-    val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3")
-    val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2")
-    val s1 = "2015/07/24 10:00:00.5"
-    val s2 = "2015/07/25 02:02:02.6"
-    val ss1 = "2015-07-24 10:00:00"
-    val ss2 = "2015-07-25 02:02:02"
-    val fmt = "yyyy/MM/dd HH:mm:ss.S"
-    val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
-    checkAnswer(df.select(unix_timestamp(col("ts"))), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.select(unix_timestamp(col("ss"))), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.select(unix_timestamp(col("d"), fmt)), Seq(
-      Row(secs(date1.getTime)), Row(secs(date2.getTime))))
-    checkAnswer(df.select(unix_timestamp(col("s"), fmt)), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr("unix_timestamp(ts)"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr("unix_timestamp(ss)"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr(s"unix_timestamp(d, '$fmt')"), Seq(
-      Row(secs(date1.getTime)), Row(secs(date2.getTime))))
-    checkAnswer(df.selectExpr(s"unix_timestamp(s, '$fmt')"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-
-    val x1 = "2015-07-24 10:00:00"
-    val x2 = "2015-25-07 02:02:02"
-    val x3 = "2015-07-24 25:02:02"
-    val x4 = "2015-24-07 26:02:02"
-    val ts3 = Timestamp.valueOf("2015-07-24 02:25:02")
-    val ts4 = Timestamp.valueOf("2015-07-24 00:10:00")
-
-    val df1 = Seq(x1, x2, x3, x4).toDF("x")
-    checkAnswer(df1.select(unix_timestamp(col("x"))), Seq(
-      Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
-    checkAnswer(df1.selectExpr("unix_timestamp(x)"), Seq(
-      Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
-    checkAnswer(df1.select(unix_timestamp(col("x"), "yyyy-dd-MM HH:mm:ss")), Seq(
-      Row(null), Row(secs(ts2.getTime)), Row(null), Row(null)))
-    checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
-      Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
-
-    // invalid format
-    checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')"), Seq(
-      Row(null), Row(null), Row(null), Row(null)))
-
-    // february
-    val y1 = "2016-02-29"
-    val y2 = "2017-02-29"
-    val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
-    val df2 = Seq(y1, y2).toDF("y")
-    checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
-      Row(secs(ts5.getTime)), Row(null)))
-
-    val now = sql("select unix_timestamp()").collect().head.getLong(0)
-    checkAnswer(
-      sql(s"select cast ($now as timestamp)"),
-      Row(new java.util.Date(TimeUnit.SECONDS.toMillis(now))))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val date1 = Date.valueOf("2015-07-24")
+        val date2 = Date.valueOf("2015-07-25")
+        val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3")
+        val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2")
+        val s1 = "2015/07/24 10:00:00.5"
+        val s2 = "2015/07/25 02:02:02.6"
+        val ss1 = "2015-07-24 10:00:00"
+        val ss2 = "2015-07-25 02:02:02"
+        val fmt = "yyyy/MM/dd HH:mm:ss.S"
+        val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
+        checkAnswer(df.select(unix_timestamp(col("ts"))), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.select(unix_timestamp(col("ss"))), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.select(unix_timestamp(col("d"), fmt)), Seq(
+          Row(secs(date1.getTime)), Row(secs(date2.getTime))))
+        checkAnswer(df.select(unix_timestamp(col("s"), fmt)), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr("unix_timestamp(ts)"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr("unix_timestamp(ss)"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr(s"unix_timestamp(d, '$fmt')"), Seq(
+          Row(secs(date1.getTime)), Row(secs(date2.getTime))))
+        checkAnswer(df.selectExpr(s"unix_timestamp(s, '$fmt')"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+
+        val x1 = "2015-07-24 10:00:00"
+        val x2 = "2015-25-07 02:02:02"
+        val x3 = "2015-07-24 25:02:02"
+        val x4 = "2015-24-07 26:02:02"
+        val ts3 = Timestamp.valueOf("2015-07-24 02:25:02")
+        val ts4 = Timestamp.valueOf("2015-07-24 00:10:00")
+
+        val df1 = Seq(x1, x2, x3, x4).toDF("x")
+        checkAnswer(df1.select(unix_timestamp(col("x"))), Seq(
+          Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
+        checkAnswer(df1.selectExpr("unix_timestamp(x)"), Seq(
+          Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
+        checkAnswer(df1.select(unix_timestamp(col("x"), "yyyy-dd-MM HH:mm:ss")), Seq(
+          Row(null), Row(secs(ts2.getTime)), Row(null), Row(null)))
+        checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
+          Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
+
+        // invalid format
+        checkAnswer(df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd aa:HH:ss')"), Seq(
+          Row(null), Row(null), Row(null), Row(null)))
+
+        // february
+        val y1 = "2016-02-29"
+        val y2 = "2017-02-29"
+        val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
+        val df2 = Seq(y1, y2).toDF("y")
+        checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
+          Row(secs(ts5.getTime)), Row(null)))
+
+        val now = sql("select unix_timestamp()").collect().head.getLong(0)
+        checkAnswer(
+          sql(s"select cast ($now as timestamp)"),
+          Row(new java.util.Date(TimeUnit.SECONDS.toMillis(now))))
+      }
+    }
   }
 
   test("to_unix_timestamp") {
-    val date1 = Date.valueOf("2015-07-24")
-    val date2 = Date.valueOf("2015-07-25")
-    val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3")
-    val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2")
-    val s1 = "2015/07/24 10:00:00.5"
-    val s2 = "2015/07/25 02:02:02.6"
-    val ss1 = "2015-07-24 10:00:00"
-    val ss2 = "2015-07-25 02:02:02"
-    val fmt = "yyyy/MM/dd HH:mm:ss.S"
-    val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
-    checkAnswer(df.selectExpr("to_unix_timestamp(ts)"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr("to_unix_timestamp(ss)"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-    checkAnswer(df.selectExpr(s"to_unix_timestamp(d, '$fmt')"), Seq(
-      Row(secs(date1.getTime)), Row(secs(date2.getTime))))
-    checkAnswer(df.selectExpr(s"to_unix_timestamp(s, '$fmt')"), Seq(
-      Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
-
-    val x1 = "2015-07-24 10:00:00"
-    val x2 = "2015-25-07 02:02:02"
-    val x3 = "2015-07-24 25:02:02"
-    val x4 = "2015-24-07 26:02:02"
-    val ts3 = Timestamp.valueOf("2015-07-24 02:25:02")
-    val ts4 = Timestamp.valueOf("2015-07-24 00:10:00")
-
-    val df1 = Seq(x1, x2, x3, x4).toDF("x")
-    checkAnswer(df1.selectExpr("to_unix_timestamp(x)"), Seq(
-      Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
-    checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
-      Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
-
-    // february
-    val y1 = "2016-02-29"
-    val y2 = "2017-02-29"
-    val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
-    val df2 = Seq(y1, y2).toDF("y")
-    checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
-      Row(secs(ts5.getTime)), Row(null)))
-
-    // invalid format
-    checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')"), Seq(
-      Row(null), Row(null), Row(null), Row(null)))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val date1 = Date.valueOf("2015-07-24")
+        val date2 = Date.valueOf("2015-07-25")
+        val ts1 = Timestamp.valueOf("2015-07-24 10:00:00.3")
+        val ts2 = Timestamp.valueOf("2015-07-25 02:02:02.2")
+        val s1 = "2015/07/24 10:00:00.5"
+        val s2 = "2015/07/25 02:02:02.6"
+        val ss1 = "2015-07-24 10:00:00"
+        val ss2 = "2015-07-25 02:02:02"
+        val fmt = "yyyy/MM/dd HH:mm:ss.S"
+        val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
+        checkAnswer(df.selectExpr("to_unix_timestamp(ts)"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr("to_unix_timestamp(ss)"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+        checkAnswer(df.selectExpr(s"to_unix_timestamp(d, '$fmt')"), Seq(
+          Row(secs(date1.getTime)), Row(secs(date2.getTime))))
+        checkAnswer(df.selectExpr(s"to_unix_timestamp(s, '$fmt')"), Seq(
+          Row(secs(ts1.getTime)), Row(secs(ts2.getTime))))
+
+        val x1 = "2015-07-24 10:00:00"
+        val x2 = "2015-25-07 02:02:02"
+        val x3 = "2015-07-24 25:02:02"
+        val x4 = "2015-24-07 26:02:02"
+        val ts3 = Timestamp.valueOf("2015-07-24 02:25:02")
+        val ts4 = Timestamp.valueOf("2015-07-24 00:10:00")
+
+        val df1 = Seq(x1, x2, x3, x4).toDF("x")
+        checkAnswer(df1.selectExpr("to_unix_timestamp(x)"), Seq(
+          Row(secs(ts1.getTime)), Row(null), Row(null), Row(null)))
+        checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
+          Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
+
+        // february
+        val y1 = "2016-02-29"
+        val y2 = "2017-02-29"
+        val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
+        val df2 = Seq(y1, y2).toDF("y")
+        checkAnswer(df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")), Seq(
+          Row(secs(ts5.getTime)), Row(null)))
+
+        // invalid format
+        checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd bb:HH:ss')"), Seq(
+          Row(null), Row(null), Row(null), Row(null)))
+      }
+    }
   }
 
 
   test("to_timestamp") {
-    val date1 = Date.valueOf("2015-07-24")
-    val date2 = Date.valueOf("2015-07-25")
-    val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
-    val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
-    val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
-    val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
-    val s1 = "2015/07/24 10:00:00.5"
-    val s2 = "2015/07/25 02:02:02.6"
-    val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
-    val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
-    val ss1 = "2015-07-24 10:00:00"
-    val ss2 = "2015-07-25 02:02:02"
-    val fmt = "yyyy/MM/dd HH:mm:ss.S"
-    val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
-
-    checkAnswer(df.select(to_timestamp(col("ss"))),
-      df.select(unix_timestamp(col("ss")).cast("timestamp")))
-    checkAnswer(df.select(to_timestamp(col("ss"))), Seq(
-      Row(ts1), Row(ts2)))
-    checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(
-      Row(ts1m), Row(ts2m)))
-    checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(
-      Row(ts1), Row(ts2)))
-    checkAnswer(df.select(to_timestamp(col("d"), "yyyy-MM-dd")), Seq(
-      Row(ts_date1), Row(ts_date2)))
+    Seq(false, true).foreach { legacyParser =>
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
+        val date1 = Date.valueOf("2015-07-24")
+        val date2 = Date.valueOf("2015-07-25")
+        val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
+        val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
+        val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
+        val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
+        val s1 = "2015/07/24 10:00:00.5"
+        val s2 = "2015/07/25 02:02:02.6"
+        val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
+        val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
+        val ss1 = "2015-07-24 10:00:00"
+        val ss2 = "2015-07-25 02:02:02"
+        val fmt = "yyyy/MM/dd HH:mm:ss.S"
+        val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d", "ts", "s", "ss")
+
+        checkAnswer(df.select(to_timestamp(col("ss"))),
+          df.select(unix_timestamp(col("ss")).cast("timestamp")))
+        checkAnswer(df.select(to_timestamp(col("ss"))), Seq(
+          Row(ts1), Row(ts2)))
+        if (legacyParser) {
+          // In Spark 2.4 and earlier, to_timestamp() parses in seconds precision and cuts off
+          // the fractional part of seconds. The behavior was changed by SPARK-27438.
+          val legacyFmt = "yyyy/MM/dd HH:mm:ss"
+          checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(
+            Row(ts1), Row(ts2)))
+        } else {
+          checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(
+            Row(ts1m), Row(ts2m)))
+        }
+        checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(
+          Row(ts1), Row(ts2)))
+        checkAnswer(df.select(to_timestamp(col("d"), "yyyy-MM-dd")), Seq(
+          Row(ts_date1), Row(ts_date2)))
+      }
+    }
   }
 
   test("datediff") {
@@ -703,45 +732,55 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.selectExpr("datediff(a, d)"), Seq(Row(1), Row(1)))
   }
 
+  test("to_timestamp with microseconds precision") {
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
+      val timestamp = "1970-01-01T00:00:00.123456Z"
+      val df = Seq(timestamp).toDF("t")
+      checkAnswer(df.select(to_timestamp($"t", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSX")),
+        Seq(Row(Instant.parse(timestamp))))
+    }
+  }
+
   test("from_utc_timestamp with literal zone") {
     val df = Seq(
       (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
       (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
     ).toDF("a", "b")
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      checkAnswer(
-        df.select(from_utc_timestamp(col("a"), "PST")),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-23 17:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-      checkAnswer(
-        df.select(from_utc_timestamp(col("b"), "PST")),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-23 17:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-    }
-    val msg = intercept[AnalysisException] {
-      df.select(from_utc_timestamp(col("a"), "PST")).collect()
-    }.getMessage
-    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
+    checkAnswer(
+      df.select(from_utc_timestamp(col("a"), "PST")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    checkAnswer(
+      df.select(from_utc_timestamp(col("b"), "PST")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-23 17:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
   }
 
   test("from_utc_timestamp with column zone") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      val df = Seq(
-        (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"),
-        (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST")
-      ).toDF("a", "b", "c")
-      checkAnswer(
-        df.select(from_utc_timestamp(col("a"), col("c"))),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 02:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
-      checkAnswer(
-        df.select(from_utc_timestamp(col("b"), col("c"))),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 02:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "CET"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "PST")
+    ).toDF("a", "b", "c")
+    checkAnswer(
+      df.select(from_utc_timestamp(col("a"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 02:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+    checkAnswer(
+      df.select(from_utc_timestamp(col("b"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 02:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 17:00:00"))))
+  }
+
+  test("handling null field by date_part") {
+    val input = Seq(Date.valueOf("2019-09-20")).toDF("d")
+    Seq("date_part(null, d)", "date_part(null, date'2019-09-20')").foreach { expr =>
+      val df = input.selectExpr(expr)
+      assert(df.schema.headOption.get.dataType == DoubleType)
+      checkAnswer(df, Row(null))
     }
   }
 
@@ -750,50 +789,84 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
       (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00"),
       (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00")
     ).toDF("a", "b")
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      checkAnswer(
-        df.select(to_utc_timestamp(col("a"), "PST")),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-          Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
-      checkAnswer(
-        df.select(to_utc_timestamp(col("b"), "PST")),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-          Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
-    }
-    val msg = intercept[AnalysisException] {
-      df.select(to_utc_timestamp(col("a"), "PST")).collect()
-    }.getMessage
-    assert(msg.contains(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key))
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), "PST")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), "PST")),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-25 07:00:00"))))
   }
 
   test("to_utc_timestamp with column zone") {
-    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-      val df = Seq(
-        (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"),
-        (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET")
-      ).toDF("a", "b", "c")
-      checkAnswer(
-        df.select(to_utc_timestamp(col("a"), col("c"))),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
-      checkAnswer(
-        df.select(to_utc_timestamp(col("b"), col("c"))),
-        Seq(
-          Row(Timestamp.valueOf("2015-07-24 07:00:00")),
-          Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+    val df = Seq(
+      (Timestamp.valueOf("2015-07-24 00:00:00"), "2015-07-24 00:00:00", "PST"),
+      (Timestamp.valueOf("2015-07-25 00:00:00"), "2015-07-25 00:00:00", "CET")
+    ).toDF("a", "b", "c")
+    checkAnswer(
+      df.select(to_utc_timestamp(col("a"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+    checkAnswer(
+      df.select(to_utc_timestamp(col("b"), col("c"))),
+      Seq(
+        Row(Timestamp.valueOf("2015-07-24 07:00:00")),
+        Row(Timestamp.valueOf("2015-07-24 22:00:00"))))
+  }
+
+  test("SPARK-30668: use legacy timestamp parser in to_timestamp") {
+    def checkTimeZoneParsing(expected: Any): Unit = {
+      val df = Seq("2020-01-27T20:06:11.847-0800").toDF("ts")
+      checkAnswer(df.select(to_timestamp(col("ts"), "yyyy-MM-dd'T'HH:mm:ss.SSSz")),
+        Row(expected))
+    }
+    withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> "true") {
+      checkTimeZoneParsing(Timestamp.valueOf("2020-01-27 20:06:11.847"))
+    }
+    withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> "false") {
+      checkTimeZoneParsing(null)
     }
   }
 
+  test("SPARK-30752: convert time zones on a daylight saving day") {
+    val systemTz = "PST"
+    val sessionTz = "UTC"
+    val fromTz = "Asia/Hong_Kong"
+    val fromTs = "2019-11-03T12:00:00" // daylight saving date in PST
+    val utsTs = "2019-11-03T04:00:00"
+    val defaultTz = TimeZone.getDefault
+    try {
+      TimeZone.setDefault(DateTimeUtils.getTimeZone(systemTz))
+      withSQLConf(
+        SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true",
+        SQLConf.SESSION_LOCAL_TIMEZONE.key -> sessionTz) {
+        val expected = LocalDateTime.parse(utsTs)
+          .atZone(DateTimeUtils.getZoneId(sessionTz))
+          .toInstant
+        val df = Seq(fromTs).toDF("localTs")
+        checkAnswer(
+          df.select(to_utc_timestamp(col("localTs"), fromTz)),
+          Row(expected))
+      }
+    } finally {
+      TimeZone.setDefault(defaultTz)
+    }
+  }
 
-  test("to_timestamp with microseconds precision") {
-    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
-      val timestamp = "1970-01-01T00:00:00.123456Z"
-      val df = Seq(timestamp).toDF("t")
-      checkAnswer(df.select(to_timestamp($"t", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSX")),
-        Seq(Row(Instant.parse(timestamp))))
+  test("SPARK-30766: date_trunc of old timestamps to hours and days") {
+    def checkTrunc(level: String, expected: String): Unit = {
+      val df = Seq("0010-01-01 01:02:03.123456")
+        .toDF()
+        .select($"value".cast("timestamp").as("ts"))
+        .select(date_trunc(level, $"ts").cast("string"))
+      checkAnswer(df, Row(expected))
     }
+
+    checkTrunc("HOUR", "0010-01-01 01:00:00")
+    checkTrunc("DAY", "0010-01-01 00:00:00")
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedDatasetAggregatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedDatasetAggregatorSuite.scala
new file mode 100644
index 0000000000000..b1d5e80f8563f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedDatasetAggregatorSuite.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.expressions.scalalang.typed
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.test.SharedSparkSession
+
+@deprecated("This test suite will be removed.", "3.0.0")
+class DeprecatedDatasetAggregatorSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  test("typed aggregation: TypedAggregator") {
+    val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
+
+    checkDataset(
+      ds.groupByKey(_._1).agg(typed.sum(_._2)),
+      ("a", 30.0), ("b", 3.0), ("c", 1.0))
+  }
+
+  test("typed aggregation: TypedAggregator, expr, expr") {
+    val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
+
+    checkDataset(
+      ds.groupByKey(_._1).agg(
+        typed.sum(_._2),
+        expr("sum(_2)").as[Long],
+        count("*")),
+      ("a", 30.0, 30L, 2L), ("b", 3.0, 3L, 2L), ("c", 1.0, 1L, 1L))
+  }
+
+  test("typed aggregation: in project list") {
+    val ds = Seq(1, 3, 2, 5).toDS()
+
+    checkDataset(
+      ds.select(typed.sum((i: Int) => i)),
+      11.0)
+    checkDataset(
+      ds.select(typed.sum((i: Int) => i), typed.sum((i: Int) => i * 2)),
+      11.0 -> 22.0)
+  }
+
+  test("typed aggregate: avg, count, sum") {
+    val ds = Seq("a" -> 1, "a" -> 3, "b" -> 3).toDS()
+    checkDataset(
+      ds.groupByKey(_._1).agg(
+        typed.avg(_._2), typed.count(_._2), typed.sum(_._2), typed.sumLong(_._2)),
+      ("a", 2.0, 2L, 4.0, 4L), ("b", 3.0, 1L, 3.0, 3L))
+  }
+
+  test("spark-15114 shorter system generated alias names") {
+    val ds = Seq(1, 3, 2, 5).toDS()
+    assert(ds.select(typed.sum((i: Int) => i)).columns.head === "TypedSumDouble(int)")
+    val ds2 = ds.select(typed.sum((i: Int) => i), typed.avg((i: Int) => i))
+    assert(ds2.columns.head === "TypedSumDouble(int)")
+    assert(ds2.columns.last === "TypedAverage(int)")
+    val df = Seq(1 -> "a", 2 -> "b", 3 -> "b").toDF("i", "j")
+    assert(df.groupBy($"j").agg(RowAgg.toColumn).columns.last ==
+      "RowAgg(org.apache.spark.sql.Row)")
+    assert(df.groupBy($"j").agg(RowAgg.toColumn as "agg1").columns.last == "agg1")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index 09221efe28e15..baa9f5ecafc68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -22,6 +22,8 @@ import org.scalatest.GivenWhenThen
 import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression, Expression}
 import org.apache.spark.sql.catalyst.plans.ExistenceJoin
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper}
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec}
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
 import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingQueryWrapper}
 import org.apache.spark.sql.functions._
@@ -31,18 +33,24 @@ import org.apache.spark.sql.test.SharedSparkSession
 /**
  * Test suite for the filtering ratio policy used to trigger dynamic partition pruning (DPP).
  */
-class DynamicPartitionPruningSuite
+abstract class DynamicPartitionPruningSuiteBase
     extends QueryTest
     with SharedSparkSession
-    with GivenWhenThen {
+    with GivenWhenThen
+    with AdaptiveSparkPlanHelper {
 
   val tableFormat: String = "parquet"
 
   import testImplicits._
 
+  val adaptiveExecutionOn: Boolean
+
   override def beforeAll(): Unit = {
     super.beforeAll()
 
+    spark.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, adaptiveExecutionOn)
+    spark.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY, true)
+
     val factData = Seq[(Int, Int, Int, Int)](
       (1000, 1, 1, 10),
       (1010, 2, 1, 10),
@@ -96,7 +104,8 @@ class DynamicPartitionPruningSuite
       (6, 60)
     )
 
-    spark.range(1000).select('id as 'product_id, ('id % 10) as 'store_id, ('id + 1) as 'code)
+    spark.range(1000)
+      .select($"id" as "product_id", ($"id" % 10) as "store_id", ($"id" + 1) as "code")
       .write
       .format(tableFormat)
       .mode("overwrite")
@@ -149,6 +158,8 @@ class DynamicPartitionPruningSuite
       sql("DROP TABLE IF EXISTS fact_stats")
       sql("DROP TABLE IF EXISTS dim_stats")
     } finally {
+      spark.sessionState.conf.unsetConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED)
+      spark.sessionState.conf.unsetConf(SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY)
       super.afterAll()
     }
   }
@@ -161,22 +172,41 @@ class DynamicPartitionPruningSuite
       df: DataFrame,
       withSubquery: Boolean,
       withBroadcast: Boolean): Unit = {
-    val dpExprs = collectDynamicPruningExpressions(df.queryExecution.executedPlan)
+    val plan = df.queryExecution.executedPlan
+    val dpExprs = collectDynamicPruningExpressions(plan)
     val hasSubquery = dpExprs.exists {
       case InSubqueryExec(_, _: SubqueryExec, _, _) => true
       case _ => false
     }
-    val hasSubqueryBroadcast = dpExprs.exists {
-      case InSubqueryExec(_, _: SubqueryBroadcastExec, _, _) => true
-      case _ => false
+    val subqueryBroadcast = dpExprs.collect {
+      case InSubqueryExec(_, b: SubqueryBroadcastExec, _, _) => b
     }
 
     val hasFilter = if (withSubquery) "Should" else "Shouldn't"
     assert(hasSubquery == withSubquery,
       s"$hasFilter trigger DPP with a subquery duplicate:\n${df.queryExecution}")
     val hasBroadcast = if (withBroadcast) "Should" else "Shouldn't"
-    assert(hasSubqueryBroadcast == withBroadcast,
+    assert(subqueryBroadcast.nonEmpty == withBroadcast,
       s"$hasBroadcast trigger DPP with a reused broadcast exchange:\n${df.queryExecution}")
+
+    subqueryBroadcast.foreach { s =>
+      s.child match {
+        case _: ReusedExchangeExec => // reuse check ok.
+        case b: BroadcastExchangeExec =>
+          val hasReuse = plan.find {
+            case ReusedExchangeExec(_, e) => e eq b
+            case _ => false
+          }.isDefined
+          assert(hasReuse, s"$s\nshould have been reused in\n$plan")
+        case _ =>
+          fail(s"Invalid child node found in\n$s")
+      }
+    }
+
+    val isMainQueryAdaptive = plan.isInstanceOf[AdaptiveSparkPlanExec]
+    subqueriesAll(plan).filterNot(subqueryBroadcast.contains).foreach { s =>
+      assert(s.find(_.isInstanceOf[AdaptiveSparkPlanExec]).isDefined == isMainQueryAdaptive)
+    }
   }
 
   /**
@@ -221,7 +251,8 @@ class DynamicPartitionPruningSuite
    */
   test("simple inner join triggers DPP with mock-up tables") {
     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "false") {
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
+      SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
       withTable("df1", "df2") {
         spark.range(1000)
           .select(col("id"), col("id").as("k"))
@@ -253,7 +284,8 @@ class DynamicPartitionPruningSuite
    */
   test("self-join on a partitioned table should not trigger DPP") {
     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "false") {
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
+      SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
       withTable("fact") {
         sql(
           s"""
@@ -284,7 +316,8 @@ class DynamicPartitionPruningSuite
    */
   test("static scan metrics") {
     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "false") {
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
+      SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
       withTable("fact", "dim") {
         spark.range(10)
           .map { x => Tuple3(x, x + 1, 0) }
@@ -304,7 +337,7 @@ class DynamicPartitionPruningSuite
 
         def getFactScan(plan: SparkPlan): SparkPlan = {
           val scanOption =
-            plan.find {
+            find(plan) {
               case s: FileSourceScanExec =>
                 s.output.exists(_.find(_.argString(maxFields = 100).contains("fid")).isDefined)
               case _ => false
@@ -352,7 +385,8 @@ class DynamicPartitionPruningSuite
   test("DPP should not be rewritten as an existential join") {
     withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
       SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "1.5",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "false") {
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
+      SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
       val df = sql(
         s"""
            |SELECT * FROM product p WHERE p.store_id NOT IN
@@ -377,7 +411,7 @@ class DynamicPartitionPruningSuite
    */
   test("DPP triggers only for certain types of query") {
     withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "false") {
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false") {
       Given("dynamic partition pruning disabled")
       withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "false") {
         val df = sql(
@@ -408,14 +442,15 @@ class DynamicPartitionPruningSuite
           """
             |SELECT * FROM fact_sk f
             |JOIN dim_store s
-            |ON f.date_id = s.store_id
+            |ON f.store_id = s.store_id
           """.stripMargin)
 
         checkPartitionPruningPredicate(df, false, false)
       }
 
       Given("left-semi join with partition column on the left side")
-      withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+      withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
+        SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
         val df = sql(
           """
             |SELECT * FROM fact_sk f
@@ -439,7 +474,8 @@ class DynamicPartitionPruningSuite
       }
 
       Given("right outer join with partition column on the left side")
-      withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+      withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
+        SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
         val df = sql(
           """
             |SELECT * FROM fact_sk f RIGHT OUTER JOIN dim_store s
@@ -456,7 +492,8 @@ class DynamicPartitionPruningSuite
    */
   test("filtering ratio policy fallback") {
     withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "false") {
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
+      SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
       Given("no stats and selective predicate")
       withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
         SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "true") {
@@ -525,7 +562,8 @@ class DynamicPartitionPruningSuite
    */
   test("filtering ratio policy with stats when the broadcast pruning is disabled") {
     withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "false") {
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
+      SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
       Given("disabling the use of stats in the DPP heuristic")
       withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
         SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false") {
@@ -595,10 +633,7 @@ class DynamicPartitionPruningSuite
 
   test("partition pruning in broadcast hash joins with non-deterministic probe part") {
     Given("alias with simple join condition, and non-deterministic query")
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT f.date_id, f.pid, f.sid FROM
@@ -612,10 +647,7 @@ class DynamicPartitionPruningSuite
     }
 
     Given("alias over multiple sub-queries with simple join condition")
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT f.date_id, f.pid, f.sid FROM
@@ -633,10 +665,7 @@ class DynamicPartitionPruningSuite
 
   test("partition pruning in broadcast hash joins with aliases") {
     Given("alias with simple join condition, using attribute names only")
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT f.date_id, f.pid, f.sid FROM
@@ -656,10 +685,7 @@ class DynamicPartitionPruningSuite
     }
 
     Given("alias with expr as join condition")
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT f.date_id, f.pid, f.sid FROM
@@ -679,10 +705,7 @@ class DynamicPartitionPruningSuite
     }
 
     Given("alias over multiple sub-queries with simple join condition")
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT f.date_id, f.pid, f.sid FROM
@@ -704,10 +727,7 @@ class DynamicPartitionPruningSuite
     }
 
     Given("alias over multiple sub-queries with simple join condition")
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT f.date_id, f.pid_d as pid, f.sid_d as sid FROM
@@ -736,10 +756,8 @@ class DynamicPartitionPruningSuite
   test("partition pruning in broadcast hash joins") {
     Given("disable broadcast pruning and disable subquery duplication")
     withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "false",
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true",
+      SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
       val df = sql(
         """
           |SELECT f.date_id, f.product_id, f.units_sold, f.store_id FROM fact_stats f
@@ -759,9 +777,10 @@ class DynamicPartitionPruningSuite
 
     Given("disable reuse broadcast results and enable subquery duplication")
     withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "false",
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
       SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0.5") {
+      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0.5",
+      SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
       val df = sql(
         """
           |SELECT f.date_id, f.product_id, f.units_sold, f.store_id FROM fact_stats f
@@ -780,52 +799,47 @@ class DynamicPartitionPruningSuite
     }
 
     Given("enable reuse broadcast results and disable query duplication")
-      withSQLConf(
-        SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-        SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-        SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
-        val df = sql(
-          """
-            |SELECT f.date_id, f.product_id, f.units_sold, f.store_id FROM fact_stats f
-            |JOIN dim_stats s
-            |ON f.store_id = s.store_id WHERE s.country = 'DE'
-          """.stripMargin)
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
+      val df = sql(
+        """
+          |SELECT f.date_id, f.product_id, f.units_sold, f.store_id FROM fact_stats f
+          |JOIN dim_stats s
+          |ON f.store_id = s.store_id WHERE s.country = 'DE'
+        """.stripMargin)
 
-        checkPartitionPruningPredicate(df, false, true)
+      checkPartitionPruningPredicate(df, false, true)
 
-        checkAnswer(df,
-          Row(1030, 2, 10, 3) ::
-          Row(1040, 2, 50, 3) ::
-          Row(1050, 2, 50, 3) ::
-          Row(1060, 2, 50, 3) :: Nil
-        )
+      checkAnswer(df,
+        Row(1030, 2, 10, 3) ::
+        Row(1040, 2, 50, 3) ::
+        Row(1050, 2, 50, 3) ::
+        Row(1060, 2, 50, 3) :: Nil
+      )
     }
 
     Given("disable broadcast hash join and disable query duplication")
-      withSQLConf(
-        SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-        SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-        SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
-        val df = sql(
-          """
-            |SELECT f.date_id, f.product_id, f.units_sold, f.store_id FROM fact_stats f
-            |JOIN dim_stats s
-            |ON f.store_id = s.store_id WHERE s.country = 'DE'
-          """.stripMargin)
+    withSQLConf(
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val df = sql(
+        """
+          |SELECT f.date_id, f.product_id, f.units_sold, f.store_id FROM fact_stats f
+          |JOIN dim_stats s
+          |ON f.store_id = s.store_id WHERE s.country = 'DE'
+        """.stripMargin)
 
-        checkPartitionPruningPredicate(df, false, false)
+      checkPartitionPruningPredicate(df, false, false)
 
-        checkAnswer(df,
-          Row(1030, 2, 10, 3) ::
-          Row(1040, 2, 50, 3) ::
-          Row(1050, 2, 50, 3) ::
-          Row(1060, 2, 50, 3) :: Nil
-        )
+      checkAnswer(df,
+        Row(1030, 2, 10, 3) ::
+        Row(1040, 2, 50, 3) ::
+        Row(1050, 2, 50, 3) ::
+        Row(1060, 2, 50, 3) :: Nil
+      )
     }
 
     Given("disable broadcast hash join and enable query duplication")
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
       SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "true") {
       val df = sql(
@@ -847,9 +861,7 @@ class DynamicPartitionPruningSuite
   }
 
   test("broadcast a single key in a HashedRelation") {
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       withTable("fact", "dim") {
         spark.range(100).select(
           $"id",
@@ -907,9 +919,7 @@ class DynamicPartitionPruningSuite
   }
 
   test("broadcast multiple keys in a LongHashedRelation") {
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       withTable("fact", "dim") {
         spark.range(100).select(
           $"id",
@@ -944,9 +954,7 @@ class DynamicPartitionPruningSuite
   }
 
   test("broadcast multiple keys in an UnsafeHashedRelation") {
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       withTable("fact", "dim") {
         spark.range(100).select(
           $"id",
@@ -981,9 +989,7 @@ class DynamicPartitionPruningSuite
   }
 
   test("different broadcast subqueries with identical children") {
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       withTable("fact", "dim") {
         spark.range(100).select(
           $"id",
@@ -1022,7 +1028,7 @@ class DynamicPartitionPruningSuite
   test("no partition pruning when the build side is a stream") {
     withTable("fact") {
       val input = MemoryStream[Int]
-      val stream = input.toDF.select('value as "one", ('value * 3) as "code")
+      val stream = input.toDF.select($"value" as "one", ($"value" * 3) as "code")
       spark.range(100).select(
         $"id",
         ($"id" + 1).as("one"),
@@ -1055,7 +1061,7 @@ class DynamicPartitionPruningSuite
   }
 
   test("avoid reordering broadcast join keys to match input hash partitioning") {
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTable("large", "dimTwo", "dimThree") {
         spark.range(100).select(
@@ -1105,9 +1111,7 @@ class DynamicPartitionPruningSuite
    * duplicated partitioning keys, also used to uniquely identify the dynamic pruning filters.
    */
   test("dynamic partition pruning ambiguity issue across nested joins") {
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       withTable("store", "date", "item") {
         spark.range(500)
           .select((($"id" + 30) % 50).as("ss_item_sk"),
@@ -1145,9 +1149,7 @@ class DynamicPartitionPruningSuite
   }
 
   test("cleanup any DPP filter that isn't pushed down due to expression id clashes") {
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       withTable("fact", "dim") {
         spark.range(1000).select($"id".as("A"), $"id".as("AA"))
           .write.partitionBy("A").format(tableFormat).mode("overwrite").saveAsTable("fact")
@@ -1168,10 +1170,7 @@ class DynamicPartitionPruningSuite
   }
 
   test("cleanup any DPP filter that isn't pushed down due to non-determinism") {
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT f.date_id, f.pid, f.sid FROM
@@ -1186,9 +1185,8 @@ class DynamicPartitionPruningSuite
   }
 
   test("join key with multiple references on the filtering plan") {
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
+      // when enable AQE, the reusedExchange is inserted when executed.
       withTable("fact", "dim") {
         spark.range(100).select(
           $"id",
@@ -1220,9 +1218,7 @@ class DynamicPartitionPruningSuite
   }
 
   test("Make sure dynamic pruning works on uncorrelated queries") {
-    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT d.store_id,
@@ -1246,10 +1242,7 @@ class DynamicPartitionPruningSuite
 
   test("Plan broadcast pruning only when the broadcast can be reused") {
     Given("dynamic pruning filter on the build side")
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT f.date_id, f.store_id, f.product_id, f.units_sold FROM fact_np f
@@ -1268,10 +1261,7 @@ class DynamicPartitionPruningSuite
     }
 
     Given("dynamic pruning filter on the probe side")
-    withSQLConf(
-      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST.key -> "true",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_USE_STATS.key -> "false",
-      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "0") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") {
       val df = sql(
         """
           |SELECT /*+ BROADCAST(f)*/
@@ -1291,3 +1281,11 @@ class DynamicPartitionPruningSuite
     }
   }
 }
+
+class DynamicPartitionPruningSuiteAEOff extends DynamicPartitionPruningSuiteBase {
+  override val adaptiveExecutionOn: Boolean = false
+}
+
+class DynamicPartitionPruningSuiteAEOn extends DynamicPartitionPruningSuiteBase {
+  override val adaptiveExecutionOn: Boolean = true
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 125cff0e6628a..b591705274110 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
@@ -24,13 +26,41 @@ import org.apache.spark.sql.types.StructType
 class ExplainSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
+  var originalValue: String = _
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalValue = spark.conf.get(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key)
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
+  }
+
+  protected override def afterAll(): Unit = {
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, originalValue)
+    super.afterAll()
+  }
+
+  private def getNormalizedExplain(df: DataFrame, mode: ExplainMode): String = {
+    val output = new java.io.ByteArrayOutputStream()
+    Console.withOut(output) {
+      df.explain(mode.name)
+    }
+    output.toString.replaceAll("#\\d+", "#x")
+  }
+
   /**
    * Get the explain from a DataFrame and run the specified action on it.
    */
-  private def withNormalizedExplain(df: DataFrame, extended: Boolean)(f: String => Unit) = {
+  private def withNormalizedExplain(df: DataFrame, mode: ExplainMode)(f: String => Unit) = {
+    f(getNormalizedExplain(df, mode))
+  }
+
+  /**
+   * Get the explain by running the sql. The explain mode should be part of the
+   * sql text itself.
+   */
+  private def withNormalizedExplain(queryText: String)(f: String => Unit) = {
     val output = new java.io.ByteArrayOutputStream()
     Console.withOut(output) {
-      df.explain(extended = extended)
+      sql(queryText).show(false)
     }
     val normalizedOutput = output.toString.replaceAll("#\\d+", "#x")
     f(normalizedOutput)
@@ -39,14 +69,19 @@ class ExplainSuite extends QueryTest with SharedSparkSession {
   /**
    * Runs the plan and makes sure the plans contains all of the keywords.
    */
-  private def checkKeywordsExistsInExplain(df: DataFrame, keywords: String*): Unit = {
-    withNormalizedExplain(df, extended = true) { normalizedOutput =>
+  private def checkKeywordsExistsInExplain(
+      df: DataFrame, mode: ExplainMode, keywords: String*): Unit = {
+    withNormalizedExplain(df, mode) { normalizedOutput =>
       for (key <- keywords) {
         assert(normalizedOutput.contains(key))
       }
     }
   }
 
+  private def checkKeywordsExistsInExplain(df: DataFrame, keywords: String*): Unit = {
+    checkKeywordsExistsInExplain(df, ExtendedMode, keywords: _*)
+  }
+
   test("SPARK-23034 show rdd names in RDD scan nodes (Dataset)") {
     val rddWithName = spark.sparkContext.parallelize(Row(1, "abc") :: Nil).setName("testRdd")
     val df = spark.createDataFrame(rddWithName, StructType.fromDDL("c0 int, c1 string"))
@@ -195,11 +230,117 @@ class ExplainSuite extends QueryTest with SharedSparkSession {
   test("SPARK-26659: explain of DataWritingCommandExec should not contain duplicate cmd.nodeName") {
     withTable("temptable") {
       val df = sql("create table temptable using parquet as select * from range(2)")
-      withNormalizedExplain(df, extended = false) { normalizedOutput =>
+      withNormalizedExplain(df, SimpleMode) { normalizedOutput =>
         assert("Create\\w*?TableAsSelectCommand".r.findAllMatchIn(normalizedOutput).length == 1)
       }
     }
   }
+
+  test("explain formatted - check presence of subquery in case of DPP") {
+    withTable("df1", "df2") {
+      withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
+        SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
+        SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
+        withTable("df1", "df2") {
+          spark.range(1000).select(col("id"), col("id").as("k"))
+            .write
+            .partitionBy("k")
+            .format("parquet")
+            .mode("overwrite")
+            .saveAsTable("df1")
+
+          spark.range(100)
+            .select(col("id"), col("id").as("k"))
+            .write
+            .partitionBy("k")
+            .format("parquet")
+            .mode("overwrite")
+            .saveAsTable("df2")
+
+          val sqlText =
+            """
+              |EXPLAIN FORMATTED SELECT df1.id, df2.k
+              |FROM df1 JOIN df2 ON df1.k = df2.k AND df2.id < 2
+              |""".stripMargin
+
+          val expected_pattern1 =
+            "Subquery:1 Hosting operator id = 1 Hosting Expression = k#xL IN subquery#x"
+          val expected_pattern2 =
+            "PartitionFilters: \\[isnotnull\\(k#xL\\), dynamicpruningexpression\\(k#xL " +
+              "IN subquery#x\\)\\]"
+          val expected_pattern3 =
+            "Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" +
+              "/df2/.*, ... 99 entries\\]"
+          val expected_pattern4 =
+            "Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" +
+              "/df1/.*, ... 999 entries\\]"
+          withNormalizedExplain(sqlText) { normalizedOutput =>
+            assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1)
+            assert(expected_pattern2.r.findAllMatchIn(normalizedOutput).length == 1)
+            assert(expected_pattern3.r.findAllMatchIn(normalizedOutput).length == 2)
+            assert(expected_pattern4.r.findAllMatchIn(normalizedOutput).length == 1)
+          }
+        }
+      }
+    }
+  }
+
+  test("Support ExplainMode in Dataset.explain") {
+    val df1 = Seq((1, 2), (2, 3)).toDF("k", "v1")
+    val df2 = Seq((2, 3), (1, 1)).toDF("k", "v2")
+    val testDf = df1.join(df2, "k").groupBy("k").agg(count("v1"), sum("v1"), avg("v2"))
+
+    val simpleExplainOutput = getNormalizedExplain(testDf, SimpleMode)
+    assert(simpleExplainOutput.startsWith("== Physical Plan =="))
+    Seq("== Parsed Logical Plan ==",
+        "== Analyzed Logical Plan ==",
+        "== Optimized Logical Plan ==").foreach { planType =>
+      assert(!simpleExplainOutput.contains(planType))
+    }
+    checkKeywordsExistsInExplain(
+      testDf,
+      ExtendedMode,
+      "== Parsed Logical Plan ==" ::
+        "== Analyzed Logical Plan ==" ::
+        "== Optimized Logical Plan ==" ::
+        "== Physical Plan ==" ::
+        Nil: _*)
+    checkKeywordsExistsInExplain(
+      testDf,
+      CostMode,
+      "Statistics(sizeInBytes=" ::
+        Nil: _*)
+    checkKeywordsExistsInExplain(
+      testDf,
+      CodegenMode,
+      "WholeStageCodegen subtrees" ::
+        "Generated code:" ::
+        Nil: _*)
+    checkKeywordsExistsInExplain(
+      testDf,
+      FormattedMode,
+      "* LocalTableScan (1)" ::
+        "(1) LocalTableScan [codegen id :" ::
+        Nil: _*)
+  }
+
+  test("Dataset.toExplainString has mode as string") {
+    val df = spark.range(10).toDF
+    def assertExplainOutput(mode: ExplainMode): Unit = {
+      assert(df.queryExecution.explainString(mode).replaceAll("#\\d+", "#x").trim ===
+        getNormalizedExplain(df, mode).trim)
+    }
+    assertExplainOutput(SimpleMode)
+    assertExplainOutput(ExtendedMode)
+    assertExplainOutput(CodegenMode)
+    assertExplainOutput(CostMode)
+    assertExplainOutput(FormattedMode)
+
+    val errMsg = intercept[IllegalArgumentException] {
+      ExplainMode.fromString("unknown")
+    }.getMessage
+    assert(errMsg.contains("Unknown explain mode: unknown"))
+  }
 }
 
 case class ExplainSingleData(id: Int)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 23848d90dc53d..c870958128483 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.io.{File, FilenameFilter, FileNotFoundException}
+import java.io.{File, FileNotFoundException}
 import java.nio.file.{Files, StandardOpenOption}
 import java.util.Locale
 
@@ -27,9 +27,13 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
-import org.apache.spark.sql.TestingUDT.{IntervalData, IntervalUDT, NullData, NullUDT}
+import org.apache.spark.sql.TestingUDT.{IntervalUDT, NullData, NullUDT}
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.catalyst.plans.logical.Filter
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.datasources.FilePartition
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2ScanRelation, FileScan}
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetTable
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions._
@@ -38,7 +42,9 @@ import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
 
-class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
+class FileBasedDataSourceSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   override def beforeAll(): Unit = {
@@ -175,18 +181,23 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
         withTempDir { dir =>
           val basePath = dir.getCanonicalPath
 
-          Seq("0").toDF("a").write.format(format).save(new Path(basePath, "first").toString)
-          Seq("1").toDF("a").write.format(format).save(new Path(basePath, "second").toString)
+          Seq("0").toDF("a").write.format(format).save(new Path(basePath, "second").toString)
+          Seq("1").toDF("a").write.format(format).save(new Path(basePath, "fourth").toString)
 
+          val firstPath = new Path(basePath, "first")
           val thirdPath = new Path(basePath, "third")
           val fs = thirdPath.getFileSystem(spark.sessionState.newHadoopConf())
-          Seq("2").toDF("a").write.format(format).save(thirdPath.toString)
-          val files = fs.listStatus(thirdPath).filter(_.isFile).map(_.getPath)
+          Seq("2").toDF("a").write.format(format).save(firstPath.toString)
+          Seq("3").toDF("a").write.format(format).save(thirdPath.toString)
+          val files = Seq(firstPath, thirdPath).flatMap { p =>
+            fs.listStatus(p).filter(_.isFile).map(_.getPath)
+          }
 
           val df = spark.read.format(format).load(
             new Path(basePath, "first").toString,
             new Path(basePath, "second").toString,
-            new Path(basePath, "third").toString)
+            new Path(basePath, "third").toString,
+            new Path(basePath, "fourth").toString)
 
           // Make sure all data files are deleted and can't be opened.
           files.foreach(f => fs.delete(f, false))
@@ -199,15 +210,21 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
         }
       }
 
-      withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "true") {
-        testIgnoreMissingFiles()
-      }
-
-      withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "false") {
-        val exception = intercept[SparkException] {
-          testIgnoreMissingFiles()
+      for {
+        ignore <- Seq("true", "false")
+        sources <- Seq("", format)
+      } {
+        withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> ignore,
+          SQLConf.USE_V1_SOURCE_LIST.key -> sources) {
+            if (ignore.toBoolean) {
+              testIgnoreMissingFiles()
+            } else {
+              val exception = intercept[SparkException] {
+                testIgnoreMissingFiles()
+              }
+              assert(exception.getMessage().contains("does not exist"))
+            }
         }
-        assert(exception.getMessage().contains("does not exist"))
       }
     }
   }
@@ -481,14 +498,14 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
         spark.range(1000).repartition(1).write.csv(path)
         val bytesReads = new mutable.ArrayBuffer[Long]()
         val bytesReadListener = new SparkListener() {
-          override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+          override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
             bytesReads += taskEnd.taskMetrics.inputMetrics.bytesRead
           }
         }
         sparkContext.addSparkListener(bytesReadListener)
         try {
           spark.read.csv(path).limit(1).collect()
-          sparkContext.listenerBus.waitUntilEmpty(1000L)
+          sparkContext.listenerBus.waitUntilEmpty()
           assert(bytesReads.sum === 7860)
         } finally {
           sparkContext.removeSparkListener(bytesReadListener)
@@ -657,6 +674,23 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("Return correct results when data columns overlap with partition columns (nested data)") {
+    Seq("parquet", "orc", "json").foreach { format =>
+      withSQLConf(SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
+        withTempPath { path =>
+          val tablePath = new File(s"${path.getCanonicalPath}/c3=c/c1=a/c5=e")
+
+          val inputDF = sql("SELECT 1 c1, 2 c2, 3 c3, named_struct('c4_1', 2, 'c4_2', 3) c4, 5 c5")
+          inputDF.write.format(format).save(tablePath.getCanonicalPath)
+
+          val resultDF = spark.read.format(format).load(path.getCanonicalPath)
+            .select("c1", "c4.c4_1", "c5", "c3")
+          checkAnswer(resultDF, Row("a", 2, "e", "c"))
+        }
+      }
+    }
+  }
+
   test("sizeInBytes should be the total size of all files") {
     Seq("orc", "").foreach { useV1SourceReaderList =>
       withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> useV1SourceReaderList) {
@@ -664,7 +698,7 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
           dir.delete()
           spark.range(1000).write.orc(dir.toString)
           val df = spark.read.orc(dir.toString)
-          assert(df.queryExecution.logical.stats.sizeInBytes === BigInt(getLocalDirSize(dir)))
+          assert(df.queryExecution.optimizedPlan.stats.sizeInBytes === BigInt(getLocalDirSize(dir)))
         }
       }
     }
@@ -685,21 +719,21 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
           val df2FromFile = spark.read.orc(workDirPath + "/data2")
           val joinedDF = df1FromFile.join(df2FromFile, Seq("count"))
           if (compressionFactor == 0.5) {
-            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
+            val bJoinExec = collect(joinedDF.queryExecution.executedPlan) {
               case bJoin: BroadcastHashJoinExec => bJoin
             }
             assert(bJoinExec.nonEmpty)
-            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
+            val smJoinExec = collect(joinedDF.queryExecution.executedPlan) {
               case smJoin: SortMergeJoinExec => smJoin
             }
             assert(smJoinExec.isEmpty)
           } else {
             // compressionFactor is 1.0
-            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
+            val bJoinExec = collect(joinedDF.queryExecution.executedPlan) {
               case bJoin: BroadcastHashJoinExec => bJoin
             }
             assert(bJoinExec.isEmpty)
-            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
+            val smJoinExec = collect(joinedDF.queryExecution.executedPlan) {
               case smJoin: SortMergeJoinExec => smJoin
             }
             assert(smJoinExec.nonEmpty)
@@ -709,6 +743,85 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("File source v2: support partition pruning") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      allFileBasedDataSources.foreach { format =>
+        withTempPath { dir =>
+          Seq(("a", 1, 2), ("b", 1, 2), ("c", 2, 1))
+            .toDF("value", "p1", "p2")
+            .write
+            .format(format)
+            .partitionBy("p1", "p2")
+            .option("header", true)
+            .save(dir.getCanonicalPath)
+          val df = spark
+            .read
+            .format(format)
+            .option("header", true)
+            .load(dir.getCanonicalPath)
+            .where("p1 = 1 and p2 = 2 and value != \"a\"")
+
+          val filterCondition = df.queryExecution.optimizedPlan.collectFirst {
+            case f: Filter => f.condition
+          }
+          assert(filterCondition.isDefined)
+          // The partitions filters should be pushed down and no need to be reevaluated.
+          assert(filterCondition.get.collectFirst {
+            case a: AttributeReference if a.name == "p1" || a.name == "p2" => a
+          }.isEmpty)
+
+          val fileScan = df.queryExecution.executedPlan collectFirst {
+            case BatchScanExec(_, f: FileScan) => f
+          }
+          assert(fileScan.nonEmpty)
+          assert(fileScan.get.partitionFilters.nonEmpty)
+          assert(fileScan.get.dataFilters.nonEmpty)
+          assert(fileScan.get.planInputPartitions().forall { partition =>
+            partition.asInstanceOf[FilePartition].files.forall { file =>
+              file.filePath.contains("p1=1") && file.filePath.contains("p2=2")
+            }
+          })
+          checkAnswer(df, Row("b", 1, 2))
+        }
+      }
+    }
+  }
+
+  test("File source v2: support passing data filters to FileScan without partitionFilters") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      allFileBasedDataSources.foreach { format =>
+        withTempPath { dir =>
+          Seq(("a", 1, 2), ("b", 1, 2), ("c", 2, 1))
+            .toDF("value", "p1", "p2")
+            .write
+            .format(format)
+            .partitionBy("p1", "p2")
+            .option("header", true)
+            .save(dir.getCanonicalPath)
+          val df = spark
+            .read
+            .format(format)
+            .option("header", true)
+            .load(dir.getCanonicalPath)
+            .where("value = 'a'")
+
+          val filterCondition = df.queryExecution.optimizedPlan.collectFirst {
+            case f: Filter => f.condition
+          }
+          assert(filterCondition.isDefined)
+
+          val fileScan = df.queryExecution.executedPlan collectFirst {
+            case BatchScanExec(_, f: FileScan) => f
+          }
+          assert(fileScan.nonEmpty)
+          assert(fileScan.get.partitionFilters.isEmpty)
+          assert(fileScan.get.dataFilters.nonEmpty)
+          checkAnswer(df, Row("a", 1, 2))
+        }
+      }
+    }
+  }
+
   test("File table location should include both values of option `path` and `paths`") {
     withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
       withTempPaths(3) { paths =>
@@ -720,7 +833,7 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
           .option("path", paths.head.getCanonicalPath)
           .parquet(paths(1).getCanonicalPath, paths(2).getCanonicalPath)
         df.queryExecution.optimizedPlan match {
-          case PhysicalOperation(_, _, DataSourceV2Relation(table: ParquetTable, _, _)) =>
+          case PhysicalOperation(_, _, DataSourceV2ScanRelation(table: ParquetTable, _, _)) =>
             assert(table.paths.toSet == paths.map(_.getCanonicalPath).toSet)
           case _ =>
             throw new AnalysisException("Can not match ParquetTable in the query.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 4edce3b0811e0..96a0eb3e32e9b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -88,28 +88,28 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
   test("single explode") {
     val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList")
     checkAnswer(
-      df.select(explode('intList)),
+      df.select(explode($"intList")),
       Row(1) :: Row(2) :: Row(3) :: Nil)
   }
 
   test("single explode_outer") {
     val df = Seq((1, Seq(1, 2, 3)), (2, Seq())).toDF("a", "intList")
     checkAnswer(
-      df.select(explode_outer('intList)),
+      df.select(explode_outer($"intList")),
       Row(1) :: Row(2) :: Row(3) :: Row(null) :: Nil)
   }
 
   test("single posexplode") {
     val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList")
     checkAnswer(
-      df.select(posexplode('intList)),
+      df.select(posexplode($"intList")),
       Row(0, 1) :: Row(1, 2) :: Row(2, 3) :: Nil)
   }
 
   test("single posexplode_outer") {
     val df = Seq((1, Seq(1, 2, 3)), (2, Seq())).toDF("a", "intList")
     checkAnswer(
-      df.select(posexplode_outer('intList)),
+      df.select(posexplode_outer($"intList")),
       Row(0, 1) :: Row(1, 2) :: Row(2, 3) :: Row(null, null) :: Nil)
   }
 
@@ -117,13 +117,13 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList")
 
     checkAnswer(
-      df.select($"a", explode('intList)),
+      df.select($"a", explode($"intList")),
       Row(1, 1) ::
       Row(1, 2) ::
       Row(1, 3) :: Nil)
 
     checkAnswer(
-      df.select($"*", explode('intList)),
+      df.select($"*", explode($"intList")),
       Row(1, Seq(1, 2, 3), 1) ::
       Row(1, Seq(1, 2, 3), 2) ::
       Row(1, Seq(1, 2, 3), 3) :: Nil)
@@ -133,7 +133,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df = Seq((1, Seq(1, 2, 3)), (2, Seq())).toDF("a", "intList")
 
     checkAnswer(
-      df.select($"a", explode_outer('intList)),
+      df.select($"a", explode_outer($"intList")),
       Row(1, 1) ::
         Row(1, 2) ::
         Row(1, 3) ::
@@ -141,7 +141,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
         Nil)
 
     checkAnswer(
-      df.select($"*", explode_outer('intList)),
+      df.select($"*", explode_outer($"intList")),
       Row(1, Seq(1, 2, 3), 1) ::
         Row(1, Seq(1, 2, 3), 2) ::
         Row(1, Seq(1, 2, 3), 3) ::
@@ -153,11 +153,11 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList")
 
     checkAnswer(
-      df.select(explode('intList).as('int)).select('int),
+      df.select(explode($"intList").as("int")).select($"int"),
       Row(1) :: Row(2) :: Row(3) :: Nil)
 
     checkAnswer(
-      df.select(explode('intList).as('int)).select(sum('int)),
+      df.select(explode($"intList").as("int")).select(sum($"int")),
       Row(6) :: Nil)
   }
 
@@ -165,11 +165,11 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df = Seq((1, Seq(1, 2, 3)), (2, Seq())).toDF("a", "intList")
 
     checkAnswer(
-      df.select(explode_outer('intList).as('int)).select('int),
+      df.select(explode_outer($"intList").as("int")).select($"int"),
       Row(1) :: Row(2) :: Row(3) :: Row(null) :: Nil)
 
     checkAnswer(
-      df.select(explode('intList).as('int)).select(sum('int)),
+      df.select(explode($"intList").as("int")).select(sum($"int")),
       Row(6) :: Nil)
   }
 
@@ -177,7 +177,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df = Seq((1, Map("a" -> "b"))).toDF("a", "map")
 
     checkAnswer(
-      df.select(explode('map)),
+      df.select(explode($"map")),
       Row("a", "b"))
   }
 
@@ -186,7 +186,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       (3, Map("c" -> "d"))).toDF("a", "map")
 
     checkAnswer(
-      df.select(explode_outer('map)),
+      df.select(explode_outer($"map")),
       Row("a", "b") :: Row(null, null) :: Row("c", "d") :: Nil)
   }
 
@@ -194,7 +194,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df = Seq((1, Map("a" -> "b"))).toDF("a", "map")
 
     checkAnswer(
-      df.select(explode('map).as("key1" :: "value1" :: Nil)).select("key1", "value1"),
+      df.select(explode($"map").as("key1" :: "value1" :: Nil)).select("key1", "value1"),
       Row("a", "b"))
   }
 
@@ -202,13 +202,13 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df = Seq((3, None), (1, Some(Map("a" -> "b")))).toDF("a", "map")
 
     checkAnswer(
-      df.select(explode_outer('map).as("key1" :: "value1" :: Nil)).select("key1", "value1"),
+      df.select(explode_outer($"map").as("key1" :: "value1" :: Nil)).select("key1", "value1"),
       Row("a", "b") :: Row(null, null) :: Nil)
   }
 
   test("self join explode") {
     val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList")
-    val exploded = df.select(explode('intList).as('i))
+    val exploded = df.select(explode($"intList").as("i"))
 
     checkAnswer(
       exploded.join(exploded, exploded("i") === exploded("i")).agg(count("*")),
@@ -277,7 +277,8 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
 
   test("inline_outer") {
     val df = Seq((1, "2"), (3, "4"), (5, "6")).toDF("col1", "col2")
-    val df2 = df.select(when('col1 === 1, null).otherwise(array(struct('col1, 'col2))).as("col1"))
+    val df2 = df.select(
+      when($"col1" === 1, null).otherwise(array(struct($"col1", $"col2"))).as("col1"))
     checkAnswer(
       df2.selectExpr("inline(col1)"),
       Row(3, "4") :: Row(5, "6") :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index d62fe961117a9..51150a1b38b49 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -22,6 +22,8 @@ import java.nio.file.{Files, Paths}
 import scala.collection.JavaConverters._
 import scala.util.Try
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.TestUtils
 import org.apache.spark.api.python.{PythonBroadcast, PythonEvalType, PythonFunction, PythonUtils}
 import org.apache.spark.broadcast.Broadcast
@@ -103,7 +105,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
       Seq(
         pythonExec,
         "-c",
-        "from pyspark.sql.utils import require_minimum_pandas_version;" +
+        "from pyspark.sql.pandas.utils import require_minimum_pandas_version;" +
           "require_minimum_pandas_version()"),
       None,
       "PYTHONPATH" -> s"$pysparkPythonPath:$pythonPath").!!
@@ -115,14 +117,14 @@ object IntegratedUDFTestUtils extends SQLHelper {
       Seq(
         pythonExec,
         "-c",
-        "from pyspark.sql.utils import require_minimum_pyarrow_version;" +
+        "from pyspark.sql.pandas.utils import require_minimum_pyarrow_version;" +
           "require_minimum_pyarrow_version()"),
       None,
       "PYTHONPATH" -> s"$pysparkPythonPath:$pythonPath").!!
     true
   }.getOrElse(false)
 
-  private lazy val pythonVer = if (isPythonAvailable) {
+  lazy val pythonVer: String = if (isPythonAvailable) {
     Process(
       Seq(pythonExec, "-c", "import sys; print('%d.%d' % sys.version_info[:2])"),
       None,
@@ -131,6 +133,24 @@ object IntegratedUDFTestUtils extends SQLHelper {
     throw new RuntimeException(s"Python executable [$pythonExec] is unavailable.")
   }
 
+  lazy val pandasVer: String = if (isPandasAvailable) {
+    Process(
+      Seq(pythonExec, "-c", "import pandas; print(pandas.__version__)"),
+      None,
+      "PYTHONPATH" -> s"$pysparkPythonPath:$pythonPath").!!.trim()
+  } else {
+    throw new RuntimeException("Pandas is unavailable.")
+  }
+
+  lazy val pyarrowVer: String = if (isPyArrowAvailable) {
+    Process(
+      Seq(pythonExec, "-c", "import pyarrow; print(pyarrow.__version__)"),
+      None,
+      "PYTHONPATH" -> s"$pysparkPythonPath:$pythonPath").!!.trim()
+  } else {
+    throw new RuntimeException("PyArrow is unavailable.")
+  }
+
   // Dynamically pickles and reads the Python instance into JVM side in order to mimic
   // Python native function within Python UDF.
   private lazy val pythonFunc: Array[Byte] = if (shouldTestPythonUDFs) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
index 6b154253e6e6c..f68c416941266 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinHintSuite.scala
@@ -17,20 +17,18 @@
 
 package org.apache.spark.sql
 
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.log4j.{AppenderSkeleton, Level}
-import org.apache.log4j.spi.LoggingEvent
+import org.apache.log4j.Level
 
 import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-class JoinHintSuite extends PlanTest with SharedSparkSession {
+class JoinHintSuite extends PlanTest with SharedSparkSession with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   lazy val df = spark.range(10)
@@ -38,14 +36,6 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
   lazy val df2 = df.selectExpr("id as b1", "id as b2")
   lazy val df3 = df.selectExpr("id as c1", "id as c2")
 
-  class MockAppender extends AppenderSkeleton {
-    val loggingEvents = new ArrayBuffer[LoggingEvent]()
-
-    override def append(loggingEvent: LoggingEvent): Unit = loggingEvents.append(loggingEvent)
-    override def close(): Unit = {}
-    override def requiresLayout(): Boolean = false
-  }
-
   def msgNoHintRelationFound(relation: String, hint: String): String =
     s"Count not find relation '$relation' specified in hint '$hint'."
 
@@ -59,7 +49,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
       df: => DataFrame,
       expectedHints: Seq[JoinHint],
       warnings: Seq[String]): Unit = {
-    val logAppender = new MockAppender()
+    val logAppender = new LogAppender("join hints")
     withLogAppender(logAppender) {
       verifyJoinHint(df, expectedHints)
     }
@@ -99,7 +89,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
 
   test("multiple joins") {
     verifyJoinHint(
-      df1.join(df2.hint("broadcast").join(df3, 'b1 === 'c1).hint("broadcast"), 'a1 === 'c1),
+      df1.join(df2.hint("broadcast").join(df3, $"b1" === $"c1").hint("broadcast"), $"a1" === $"c1"),
       JoinHint(
         None,
         Some(HintInfo(strategy = Some(BROADCAST)))) ::
@@ -108,7 +98,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
           None) :: Nil
     )
     verifyJoinHint(
-      df1.hint("broadcast").join(df2, 'a1 === 'b1).hint("broadcast").join(df3, 'a1 === 'c1),
+      df1.hint("broadcast").join(df2, $"a1" === $"b1").hint("broadcast").join(df3, $"a1" === $"c1"),
       JoinHint(
         Some(HintInfo(strategy = Some(BROADCAST))),
         None) ::
@@ -180,8 +170,8 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
         )
 
         verifyJoinHint(
-          df1.join(df2, 'a1 === 'b1 && 'a1 > 5).hint("broadcast")
-            .join(df3, 'b1 === 'c1 && 'a1 < 10),
+          df1.join(df2, $"a1" === $"b1" && $"a1" > 5).hint("broadcast")
+            .join(df3, $"b1" === $"c1" && $"a1" < 10),
           JoinHint(
             Some(HintInfo(strategy = Some(BROADCAST))),
             None) ::
@@ -189,9 +179,9 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
         )
 
         verifyJoinHint(
-          df1.join(df2, 'a1 === 'b1 && 'a1 > 5).hint("broadcast")
-            .join(df3, 'b1 === 'c1 && 'a1 < 10)
-            .join(df, 'b1 === 'id),
+          df1.join(df2, $"a1" === $"b1" && $"a1" > 5).hint("broadcast")
+            .join(df3, $"b1" === $"c1" && $"a1" < 10)
+            .join(df, $"b1" === $"id"),
           JoinHint.NONE ::
             JoinHint(
               Some(HintInfo(strategy = Some(BROADCAST))),
@@ -222,7 +212,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
 
   test("hint merge") {
     verifyJoinHintWithWarnings(
-      df.hint("broadcast").filter('id > 2).hint("broadcast").join(df, "id"),
+      df.hint("broadcast").filter($"id" > 2).hint("broadcast").join(df, "id"),
       JoinHint(
         Some(HintInfo(strategy = Some(BROADCAST))),
         None) :: Nil,
@@ -236,7 +226,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
       Nil
     )
     verifyJoinHintWithWarnings(
-      df.hint("merge").filter('id > 2).hint("shuffle_hash").join(df, "id").hint("broadcast"),
+      df.hint("merge").filter($"id" > 2).hint("shuffle_hash").join(df, "id").hint("broadcast"),
       JoinHint(
         Some(HintInfo(strategy = Some(SHUFFLE_HASH))),
         None) :: Nil,
@@ -312,13 +302,13 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
 
   test("nested hint") {
     verifyJoinHint(
-      df.hint("broadcast").hint("broadcast").filter('id > 2).join(df, "id"),
+      df.hint("broadcast").hint("broadcast").filter($"id" > 2).join(df, "id"),
       JoinHint(
         Some(HintInfo(strategy = Some(BROADCAST))),
         None) :: Nil
     )
     verifyJoinHint(
-      df.hint("shuffle_hash").hint("broadcast").hint("merge").filter('id > 2).join(df, "id"),
+      df.hint("shuffle_hash").hint("broadcast").hint("merge").filter($"id" > 2).join(df, "id"),
       JoinHint(
         Some(HintInfo(strategy = Some(SHUFFLE_MERGE))),
         None) :: Nil
@@ -352,7 +342,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
 
   private def assertBroadcastHashJoin(df: DataFrame, buildSide: BuildSide): Unit = {
     val executedPlan = df.queryExecution.executedPlan
-    val broadcastHashJoins = executedPlan.collect {
+    val broadcastHashJoins = collect(executedPlan) {
       case b: BroadcastHashJoinExec => b
     }
     assert(broadcastHashJoins.size == 1)
@@ -361,7 +351,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
 
   private def assertBroadcastNLJoin(df: DataFrame, buildSide: BuildSide): Unit = {
     val executedPlan = df.queryExecution.executedPlan
-    val broadcastNLJoins = executedPlan.collect {
+    val broadcastNLJoins = collect(executedPlan) {
       case b: BroadcastNestedLoopJoinExec => b
     }
     assert(broadcastNLJoins.size == 1)
@@ -370,7 +360,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
 
   private def assertShuffleHashJoin(df: DataFrame, buildSide: BuildSide): Unit = {
     val executedPlan = df.queryExecution.executedPlan
-    val shuffleHashJoins = executedPlan.collect {
+    val shuffleHashJoins = collect(executedPlan) {
       case s: ShuffledHashJoinExec => s
     }
     assert(shuffleHashJoins.size == 1)
@@ -379,7 +369,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
 
   private def assertShuffleMergeJoin(df: DataFrame): Unit = {
     val executedPlan = df.queryExecution.executedPlan
-    val shuffleMergeJoins = executedPlan.collect {
+    val shuffleMergeJoins = collect(executedPlan) {
       case s: SortMergeJoinExec => s
     }
     assert(shuffleMergeJoins.size == 1)
@@ -387,7 +377,7 @@ class JoinHintSuite extends PlanTest with SharedSparkSession {
 
   private def assertShuffleReplicateNLJoin(df: DataFrame): Unit = {
     val executedPlan = df.queryExecution.executedPlan
-    val shuffleReplicateNLJoins = executedPlan.collect {
+    val shuffleReplicateNLJoins = collect(executedPlan) {
       case c: CartesianProductExec => c
     }
     assert(shuffleReplicateNLJoins.size == 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 72742644ff34e..f45bd950040ce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -22,21 +22,41 @@ import java.util.Locale
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ListBuffer
 
+import org.mockito.Mockito._
+
 import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.expressions.{Ascending, SortOrder}
+import org.apache.spark.sql.catalyst.expressions.{Ascending, GenericRow, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.execution.{BinaryExecNode, FilterExec, SortExec}
+import org.apache.spark.sql.execution.{BinaryExecNode, FilterExec, SortExec, SparkPlan}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.execution.python.BatchEvalPythonExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
 
-class JoinSuite extends QueryTest with SharedSparkSession {
+class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper {
   import testImplicits._
 
+  private def attachCleanupResourceChecker(plan: SparkPlan): Unit = {
+    // SPARK-21492: Check cleanupResources are finally triggered in SortExec node for every
+    // test case
+    plan.foreachUp {
+      case s: SortExec =>
+        val sortExec = spy(s)
+        verify(sortExec, atLeastOnce).cleanupResources()
+        verify(sortExec.rowSorter, atLeastOnce).cleanupResources()
+      case _ =>
+    }
+  }
+
+  override protected def checkAnswer(df: => DataFrame, rows: Seq[Row]): Unit = {
+    attachCleanupResourceChecker(df.queryExecution.sparkPlan)
+    super.checkAnswer(df, rows)
+  }
+
   setupTestData()
 
   def statisticSizeInByte(df: DataFrame): BigInt = {
@@ -219,7 +239,9 @@ class JoinSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       bigDataX.join(bigDataY).where($"x.key" === $"y.key"),
-      testData.rdd.flatMap(row => Seq.fill(16)(Row.merge(row, row))).collect().toSeq)
+      testData.rdd.flatMap { row =>
+        Seq.fill(16)(new GenericRow(Seq(row, row).flatMap(_.toSeq).toArray))
+      }.collect().toSeq)
   }
 
   test("cartesian product join") {
@@ -503,10 +525,10 @@ class JoinSuite extends QueryTest with SharedSparkSession {
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
 
       assert(statisticSizeInByte(spark.table("testData2")) >
-        spark.conf.get(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+        spark.conf.get[Long](SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
 
       assert(statisticSizeInByte(spark.table("testData")) <
-        spark.conf.get(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+        spark.conf.get[Long](SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
 
       Seq(
         ("SELECT * FROM testData LEFT SEMI JOIN testData2 ON key = a",
@@ -821,7 +843,7 @@ class JoinSuite extends QueryTest with SharedSparkSession {
         case j: SortMergeJoinExec => j
       }
       val executed = df.queryExecution.executedPlan
-      val executedJoins = executed.collect {
+      val executedJoins = collect(executed) {
         case j: SortMergeJoinExec => j
       }
       // This only applies to the above tested queries, in which a child SortMergeJoin always
@@ -1005,12 +1027,12 @@ class JoinSuite extends QueryTest with SharedSparkSession {
     val right = Seq((1, 2), (3, 4)).toDF("c", "d")
     val df = left.join(right, pythonTestUDF(left("a")) === pythonTestUDF(right.col("c")))
 
-    val joinNode = df.queryExecution.executedPlan.find(_.isInstanceOf[BroadcastHashJoinExec])
+    val joinNode = find(df.queryExecution.executedPlan)(_.isInstanceOf[BroadcastHashJoinExec])
     assert(joinNode.isDefined)
 
     // There are two PythonUDFs which use attribute from left and right of join, individually.
     // So two PythonUDFs should be evaluated before the join operator, at left and right side.
-    val pythonEvals = joinNode.get.collect {
+    val pythonEvals = collect(joinNode.get) {
       case p: BatchEvalPythonExec => p
     }
     assert(pythonEvals.size == 2)
@@ -1034,9 +1056,30 @@ class JoinSuite extends QueryTest with SharedSparkSession {
     assert(filterInAnalysis.isDefined)
 
     // Filter predicate was pushdown as join condition. So there is no Filter exec operator.
-    val filterExec = df.queryExecution.executedPlan.find(_.isInstanceOf[FilterExec])
+    val filterExec = find(df.queryExecution.executedPlan)(_.isInstanceOf[FilterExec])
     assert(filterExec.isEmpty)
 
     checkAnswer(df, Row(1, 2, 1, 2) :: Nil)
   }
+
+  test("SPARK-21492: cleanupResource without code generation") {
+    withSQLConf(
+      SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val df1 = spark.range(0, 10, 1, 2)
+      val df2 = spark.range(10).select($"id".as("b1"), (- $"id").as("b2"))
+      val res = df1.join(df2, $"id" === $"b1" && $"id" === $"b2").select($"b1", $"b2", $"id")
+      checkAnswer(res, Row(0, 0, 0))
+    }
+  }
+
+  test("SPARK-29850: sort-merge-join an empty table should not memory leak") {
+    val df1 = spark.range(10).select($"id", $"id" % 3 as 'p)
+      .repartition($"id").groupBy($"id").agg(Map("p" -> "max"))
+    val df2 = spark.range(0)
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      assert(df2.join(df1, "id").collect().isEmpty)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 92a4acc130be5..fd1e9e309558e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
 import java.util.Locale
 
@@ -38,6 +39,13 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       Row("alice", "5"))
   }
 
+  test("function get_json_object - support single quotes") {
+    val df: DataFrame = Seq(("""{'name': 'fang', 'age': 5}""")).toDF("a")
+    checkAnswer(
+      df.selectExpr("get_json_object(a, '$.name')", "get_json_object(a, '$.age')"),
+      Row("fang", "5"))
+  }
+
   val tuples: Seq[(String, String)] =
     ("1", """{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}""") ::
     ("2", """{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}""") ::
@@ -214,33 +222,24 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
   }
 
-  test("to_json - key types of map don't matter") {
-    // interval type is invalid for converting to JSON. However, the keys of a map are treated
-    // as strings, so its type doesn't matter.
-    val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
-      .select(struct(map($"a._1".cast(CalendarIntervalType), lit("a")).as("col1")).as("c"))
+  test("to_json - interval support") {
+    val baseDf = Seq(Tuple1(Tuple1("-3 month 7 hours"))).toDF("a")
+    val df = baseDf.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
     checkAnswer(
       df.select(to_json($"c")),
-      Row("""{"col1":{"interval -3 months 7 hours":"a"}}""") :: Nil)
-  }
+      Row("""{"a":"-3 months 7 hours"}""") :: Nil)
 
-  test("to_json unsupported type") {
-    val baseDf = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
-    val df = baseDf.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
-    val e = intercept[AnalysisException]{
-      // Unsupported type throws an exception
-      df.select(to_json($"c")).collect()
-    }
-    assert(e.getMessage.contains(
-      "Unable to convert column a of type interval to JSON."))
+    val df1 = baseDf
+      .select(struct(map($"a._1".cast(CalendarIntervalType), lit("a")).as("col1")).as("c"))
+    checkAnswer(
+      df1.select(to_json($"c")),
+      Row("""{"col1":{"-3 months 7 hours":"a"}}""") :: Nil)
 
-    // interval type is invalid for converting to JSON. We can't use it as value type of a map.
     val df2 = baseDf
       .select(struct(map(lit("a"), $"a._1".cast(CalendarIntervalType)).as("col1")).as("c"))
-    val e2 = intercept[AnalysisException] {
-      df2.select(to_json($"c")).collect()
-    }
-    assert(e2.getMessage.contains("Unable to convert column col1 of type interval to JSON"))
+    checkAnswer(
+      df2.select(to_json($"c")),
+      Row("""{"col1":{"a":"-3 months 7 hours"}}""") :: Nil)
   }
 
   test("roundtrip in to_json and from_json - struct") {
@@ -608,4 +607,50 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(df, Row(Row(java.sql.Timestamp.valueOf("2018-11-06 18:00:00.0"))))
     }
   }
+
+  test("special timestamp values") {
+    Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue =>
+      val input = Seq(s"""{"t": "$specialValue"}""").toDS()
+      val readback = input.select(from_json($"value", lit("t timestamp"),
+        Map.empty[String, String].asJava)).collect()
+      assert(readback(0).getAs[Row](0).getAs[Timestamp](0).getTime >= 0)
+    }
+  }
+
+  test("special date values") {
+    Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue =>
+      val input = Seq(s"""{"d": "$specialValue"}""").toDS()
+      val readback = input.select(from_json($"value", lit("d date"),
+        Map.empty[String, String].asJava)).collect()
+      assert(readback(0).getAs[Row](0).getAs[Date](0).getTime >= 0)
+    }
+  }
+
+  test("from_json - timestamp in micros") {
+    val df = Seq("""{"time": "1970-01-01T00:00:00.123456"}""").toDS()
+    val schema = new StructType().add("time", TimestampType)
+    val options = Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss.SSSSSS")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.123456"))))
+  }
+
+  test("to_json - timestamp in micros") {
+    val s = "2019-11-18 11:56:00.123456"
+    val df = Seq(java.sql.Timestamp.valueOf(s)).toDF("t").select(
+      to_json(struct($"t"), Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss.SSSSSS")))
+    checkAnswer(df, Row(s"""{"t":"$s"}"""))
+  }
+
+  test("json_tuple - do not truncate results") {
+    Seq(2000, 2800, 8000 - 1, 8000, 8000 + 1, 65535).foreach { len =>
+      val str = Array.tabulate(len)(_ => "a").mkString
+      val json_tuple_result = Seq(s"""{"test":"$str"}""").toDF("json")
+        .withColumn("result", json_tuple('json, "test"))
+        .select('result)
+        .as[String].head.length
+      assert(json_tuple_result === len)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
index 6b90f20a94fa4..36db95ff8a31b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
@@ -27,14 +27,14 @@ trait LocalSparkSession extends BeforeAndAfterEach with BeforeAndAfterAll { self
 
   @transient var spark: SparkSession = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     InternalLoggerFactory.setDefaultFactory(Slf4JLoggerFactory.INSTANCE)
     SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       LocalSparkSession.stop(spark)
       SparkSession.clearActiveSession()
@@ -47,7 +47,7 @@ trait LocalSparkSession extends BeforeAndAfterEach with BeforeAndAfterAll { self
 }
 
 object LocalSparkSession {
-  def stop(spark: SparkSession) {
+  def stop(spark: SparkSession): Unit = {
     if (spark != null) {
       spark.stop()
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 567bcdd1878a8..bd86c2ec075b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets
 
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.functions.{log => logarithm}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
 private object MathFunctionsTestData {
@@ -218,19 +219,21 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq(Row(5, 0, 0), Row(55, 60, 100), Row(555, 560, 600))
     )
 
-    val pi = "3.1415"
-    checkAnswer(
-      sql(s"SELECT round($pi, -3), round($pi, -2), round($pi, -1), " +
-        s"round($pi, 0), round($pi, 1), round($pi, 2), round($pi, 3)"),
-      Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), BigDecimal(3),
-        BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.142")))
-    )
-    checkAnswer(
-      sql(s"SELECT bround($pi, -3), bround($pi, -2), bround($pi, -1), " +
-        s"bround($pi, 0), bround($pi, 1), bround($pi, 2), bround($pi, 3)"),
-      Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), BigDecimal(3),
-        BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.142")))
-    )
+    withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
+      val pi = "3.1415"
+      checkAnswer(
+        sql(s"SELECT round($pi, -3), round($pi, -2), round($pi, -1), " +
+          s"round($pi, 0), round($pi, 1), round($pi, 2), round($pi, 3)"),
+        Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), BigDecimal(3),
+          BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.142")))
+      )
+      checkAnswer(
+        sql(s"SELECT bround($pi, -3), bround($pi, -2), bround($pi, -1), " +
+          s"bround($pi, 0), bround($pi, 1), bround($pi, 2), bround($pi, 3)"),
+        Seq(Row(BigDecimal("0E3"), BigDecimal("0E2"), BigDecimal("0E1"), BigDecimal(3),
+          BigDecimal("3.1"), BigDecimal("3.14"), BigDecimal("3.142")))
+      )
+    }
 
     val bdPi: BigDecimal = BigDecimal(31415925L, 7)
     checkAnswer(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala
index cad0821dbf5aa..5ab06b1ebebf6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT}
 import org.apache.spark.sql.test.SharedSparkSession
 
 class MiscFunctionsSuite extends QueryTest with SharedSparkSession {
@@ -31,6 +32,12 @@ class MiscFunctionsSuite extends QueryTest with SharedSparkSession {
         s"java_method('$className', 'method1', a, b)"),
       Row("m1one", "m1one"))
   }
+
+  test("version") {
+    checkAnswer(
+      Seq("").toDF("a").selectExpr("version()"),
+      Row(SPARK_VERSION_SHORT + " " + SPARK_REVISION))
+  }
 }
 
 object ReflectClass {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 3039a4ccb677c..4a21ae9242039 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -21,6 +21,9 @@ import java.util.{Locale, TimeZone}
 
 import scala.collection.JavaConverters._
 
+import org.junit.Assert
+import org.scalatest.Assertions
+
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.SQLExecution
@@ -150,10 +153,7 @@ abstract class QueryTest extends PlanTest {
 
     assertEmptyMissingInput(analyzedDF)
 
-    QueryTest.checkAnswer(analyzedDF, expectedAnswer) match {
-      case Some(errorMessage) => fail(errorMessage)
-      case None =>
-    }
+    QueryTest.checkAnswer(analyzedDF, expectedAnswer)
   }
 
   protected def checkAnswer(df: => DataFrame, expectedAnswer: Row): Unit = {
@@ -235,18 +235,32 @@ abstract class QueryTest extends PlanTest {
   }
 }
 
-object QueryTest {
+object QueryTest extends Assertions {
+  /**
+   * Runs the plan and makes sure the answer matches the expected result.
+   *
+   * @param df the DataFrame to be executed
+   * @param expectedAnswer the expected result in a Seq of Rows.
+   * @param checkToRDD whether to verify deserialization to an RDD. This runs the query twice.
+   */
+  def checkAnswer(df: DataFrame, expectedAnswer: Seq[Row], checkToRDD: Boolean = true): Unit = {
+    getErrorMessageInCheckAnswer(df, expectedAnswer, checkToRDD) match {
+      case Some(errorMessage) => fail(errorMessage)
+      case None =>
+    }
+  }
+
   /**
    * Runs the plan and makes sure the answer matches the expected result.
    * If there was exception during the execution or the contents of the DataFrame does not
-   * match the expected result, an error message will be returned. Otherwise, a [[None]] will
+   * match the expected result, an error message will be returned. Otherwise, a None will
    * be returned.
    *
-   * @param df the [[DataFrame]] to be executed
-   * @param expectedAnswer the expected result in a [[Seq]] of [[Row]]s.
+   * @param df the DataFrame to be executed
+   * @param expectedAnswer the expected result in a Seq of Rows.
    * @param checkToRDD whether to verify deserialization to an RDD. This runs the query twice.
    */
-  def checkAnswer(
+  def getErrorMessageInCheckAnswer(
       df: DataFrame,
       expectedAnswer: Seq[Row],
       checkToRDD: Boolean = true): Option[String] = {
@@ -408,10 +422,10 @@ object QueryTest {
     }
   }
 
-  def checkAnswer(df: DataFrame, expectedAnswer: java.util.List[Row]): String = {
-    checkAnswer(df, expectedAnswer.asScala) match {
-      case Some(errorMessage) => errorMessage
-      case None => null
+  def checkAnswer(df: DataFrame, expectedAnswer: java.util.List[Row]): Unit = {
+    getErrorMessageInCheckAnswer(df, expectedAnswer.asScala) match {
+      case Some(errorMessage) => Assert.fail(errorMessage)
+      case None =>
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index b8b157e275b61..11f9724e587f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -22,11 +22,19 @@ import java.net.{MalformedURLException, URL}
 import java.sql.{Date, Timestamp}
 import java.util.concurrent.atomic.AtomicBoolean
 
+import scala.collection.parallel.immutable.ParVector
+
 import org.apache.spark.{AccumulatorSuite, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
+import org.apache.spark.sql.catalyst.expressions.GenericRow
+import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
+import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.catalyst.util.StringUtils
-import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, SortAggregateExec}
+import org.apache.spark.sql.execution.HiveResult.hiveResultString
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+import org.apache.spark.sql.execution.command.FunctionsCommand
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
@@ -36,8 +44,9 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSparkSession, TestSQLContext}
 import org.apache.spark.sql.test.SQLTestData._
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 
-class SQLQuerySuite extends QueryTest with SharedSparkSession {
+class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   setupTestData()
@@ -55,7 +64,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
   test("show functions") {
     def getFunctions(pattern: String): Seq[Row] = {
       StringUtils.filterPattern(
-        spark.sessionState.catalog.listFunctions("default").map(_._1.funcName), pattern)
+        spark.sessionState.catalog.listFunctions("default").map(_._1.funcName)
+        ++ FunctionsCommand.virtualOperators, pattern)
         .map(Row(_))
     }
 
@@ -115,6 +125,81 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("using _FUNC_ instead of function names in examples") {
+    val exampleRe = "(>.*;)".r
+    val setStmtRe = "(?i)^(>\\s+set\\s+).+".r
+    val ignoreSet = Set(
+      // Examples for CaseWhen show simpler syntax:
+      // `CASE WHEN ... THEN ... WHEN ... THEN ... END`
+      "org.apache.spark.sql.catalyst.expressions.CaseWhen",
+      // _FUNC_ is replaced by `locate` but `locate(... IN ...)` is not supported
+      "org.apache.spark.sql.catalyst.expressions.StringLocate",
+      // _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)`
+      "org.apache.spark.sql.catalyst.expressions.Remainder",
+      // Examples demonstrate alternative names, see SPARK-20749
+      "org.apache.spark.sql.catalyst.expressions.Length")
+    spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
+      val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
+      val className = info.getClassName
+      withClue(s"Expression class '$className'") {
+        val exprExamples = info.getOriginalExamples
+        if (!exprExamples.isEmpty && !ignoreSet.contains(className)) {
+          assert(exampleRe.findAllIn(exprExamples).toIterable
+            .filter(setStmtRe.findFirstIn(_).isEmpty) // Ignore SET commands
+            .forall(_.contains("_FUNC_")))
+        }
+      }
+    }
+  }
+
+  test("check outputs of expression examples") {
+    def unindentAndTrim(s: String): String = {
+      s.replaceAll("\n\\s+", "\n").trim
+    }
+    val beginSqlStmtRe = "  > ".r
+    val endSqlStmtRe = ";\n".r
+    def checkExampleSyntax(example: String): Unit = {
+      val beginStmtNum = beginSqlStmtRe.findAllIn(example).length
+      val endStmtNum = endSqlStmtRe.findAllIn(example).length
+      assert(beginStmtNum === endStmtNum,
+        "The number of ` > ` does not match to the number of `;`")
+    }
+    val exampleRe = """^(.+);\n(?s)(.+)$""".r
+    val ignoreSet = Set(
+      // One of examples shows getting the current timestamp
+      "org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
+      // Random output without a seed
+      "org.apache.spark.sql.catalyst.expressions.Rand",
+      "org.apache.spark.sql.catalyst.expressions.Randn",
+      "org.apache.spark.sql.catalyst.expressions.Shuffle",
+      "org.apache.spark.sql.catalyst.expressions.Uuid",
+      // The example calls methods that return unstable results.
+      "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection")
+
+    val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector)
+    parFuncs.foreach { funcId =>
+      // Examples can change settings. We clone the session to prevent tests clashing.
+      val clonedSpark = spark.cloneSession()
+      val info = clonedSpark.sessionState.catalog.lookupFunctionInfo(funcId)
+      val className = info.getClassName
+      if (!ignoreSet.contains(className)) {
+        withClue(s"Function '${info.getName}', Expression class '$className'") {
+          val example = info.getExamples
+          checkExampleSyntax(example)
+          example.split("  > ").toList.foreach(_ match {
+            case exampleRe(sql, output) =>
+              val df = clonedSpark.sql(sql)
+              val actual = unindentAndTrim(
+                hiveResultString(df.queryExecution.executedPlan).mkString("\n"))
+              val expected = unindentAndTrim(output)
+              assert(actual === expected)
+            case _ =>
+          })
+        }
+      }
+    }
+  }
+
   test("SPARK-6743: no columns from cache") {
     Seq(
       (83, 0, 38),
@@ -699,8 +784,9 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
           |   SELECT * FROM testData UNION ALL
           |   SELECT * FROM testData) y
           |WHERE x.key = y.key""".stripMargin),
-      testData.rdd.flatMap(
-        row => Seq.fill(16)(Row.merge(row, row))).collect().toSeq)
+      testData.rdd.flatMap { row =>
+        Seq.fill(16)(new GenericRow(Seq(row, row).flatMap(_.toSeq).toArray))
+      }.collect().toSeq)
   }
 
   test("cartesian product join") {
@@ -1473,7 +1559,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
     import org.apache.spark.unsafe.types.CalendarInterval
 
     val df = sql("select interval 3 years -3 month 7 week 123 microseconds")
-    checkAnswer(df, Row(new CalendarInterval(12 * 3 - 3, 7L * 1000 * 1000 * 3600 * 24 * 7 + 123 )))
+    checkAnswer(df, Row(new CalendarInterval(12 * 3 - 3, 7 * 7, 123 )))
     withTempPath(f => {
       // Currently we don't yet support saving out values of interval data type.
       val e = intercept[AnalysisException] {
@@ -1481,35 +1567,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
       }
       e.message.contains("Cannot save interval data type into external storage")
     })
-
-    val e1 = intercept[AnalysisException] {
-      sql("select interval")
-    }
-    assert(e1.message.contains("at least one time unit should be given for interval literal"))
-
-    // Currently we don't yet support nanosecond
-    val e2 = intercept[AnalysisException] {
-      sql("select interval 23 nanosecond")
-    }
-    assert(e2.message.contains("no viable alternative at input 'interval 23 nanosecond'"))
   }
 
   test("SPARK-8945: add and subtract expressions for interval type") {
-    import org.apache.spark.unsafe.types.CalendarInterval
-    import org.apache.spark.unsafe.types.CalendarInterval.MICROS_PER_WEEK
-
     val df = sql("select interval 3 years -3 month 7 week 123 microseconds as i")
-    checkAnswer(df, Row(new CalendarInterval(12 * 3 - 3, 7L * MICROS_PER_WEEK + 123)))
+    checkAnswer(df, Row(new CalendarInterval(12 * 3 - 3, 7 * 7, 123)))
 
-    checkAnswer(df.select(df("i") + new CalendarInterval(2, 123)),
-      Row(new CalendarInterval(12 * 3 - 3 + 2, 7L * MICROS_PER_WEEK + 123 + 123)))
+    checkAnswer(df.select(df("i") + new CalendarInterval(2, 1, 123)),
+      Row(new CalendarInterval(12 * 3 - 3 + 2, 7 * 7 + 1, 123 + 123)))
 
-    checkAnswer(df.select(df("i") - new CalendarInterval(2, 123)),
-      Row(new CalendarInterval(12 * 3 - 3 - 2, 7L * MICROS_PER_WEEK + 123 - 123)))
+    checkAnswer(df.select(df("i") - new CalendarInterval(2, 1, 123)),
+      Row(new CalendarInterval(12 * 3 - 3 - 2, 7 * 7 - 1, 123 - 123)))
 
     // unary minus
     checkAnswer(df.select(-df("i")),
-      Row(new CalendarInterval(-(12 * 3 - 3), -(7L * MICROS_PER_WEEK + 123))))
+      Row(new CalendarInterval(-(12 * 3 - 3), -7 * 7, -123)))
   }
 
   test("aggregation with codegen updates peak execution memory") {
@@ -2609,14 +2681,14 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
     }
 
     // Make sure no spurious job starts are pending in the listener bus.
-    sparkContext.listenerBus.waitUntilEmpty(500)
+    sparkContext.listenerBus.waitUntilEmpty()
     sparkContext.addSparkListener(listener)
     try {
       // Execute the command.
       sql("show databases").head()
 
       // Make sure we have seen all events triggered by DataFrame.show()
-      sparkContext.listenerBus.waitUntilEmpty(500)
+      sparkContext.listenerBus.waitUntilEmpty()
     } finally {
       sparkContext.removeSparkListener(listener)
     }
@@ -2765,6 +2837,44 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
     checkAnswer(df, Row(1, 3, 4) :: Row(2, 3, 4) :: Row(3, 3, 4) :: Nil)
   }
 
+  test("Support filter clause for aggregate function with hash aggregate") {
+    Seq(("COUNT(a)", 3), ("COLLECT_LIST(a)", Seq(1, 2, 3))).foreach { funcToResult =>
+      val query = s"SELECT ${funcToResult._1} FILTER (WHERE b > 1) FROM testData2"
+      val df = sql(query)
+      val physical = df.queryExecution.sparkPlan
+      val aggregateExpressions = physical.collect {
+        case agg: HashAggregateExec => agg.aggregateExpressions
+        case agg: ObjectHashAggregateExec => agg.aggregateExpressions
+      }.flatten
+      aggregateExpressions.foreach { expr =>
+        if (expr.mode == Complete || expr.mode == Partial) {
+          assert(expr.filter.isDefined)
+        } else {
+          assert(expr.filter.isEmpty)
+        }
+      }
+      checkAnswer(df, Row(funcToResult._2))
+    }
+  }
+
+  test("Support filter clause for aggregate function uses SortAggregateExec") {
+    withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
+      val df = sql("SELECT PERCENTILE(a, 1) FILTER (WHERE b > 1) FROM testData2")
+      val physical = df.queryExecution.sparkPlan
+      val aggregateExpressions = physical.collect {
+        case agg: SortAggregateExec => agg.aggregateExpressions
+      }.flatten
+      aggregateExpressions.foreach { expr =>
+        if (expr.mode == Complete || expr.mode == Partial) {
+          assert(expr.filter.isDefined)
+        } else {
+          assert(expr.filter.isEmpty)
+        }
+      }
+      checkAnswer(df, Row(3))
+    }
+  }
+
   test("Non-deterministic aggregate functions should not be deduplicated") {
     val query = "SELECT a, first_value(b), first_value(b) + 1 FROM testData2 GROUP BY a"
     val df = sql(query)
@@ -3149,6 +3259,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
       checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result))
       checkAnswer(sql("select to_timestamp('2000-01-01 01:10:00') > '1'"), Row(true))
     }
+    sql("DROP VIEW t1")
   }
 
   test("SPARK-28156: self-join should not miss cached view") {
@@ -3171,7 +3282,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
              |on leftside.a = rightside.a
            """.stripMargin)
 
-        val inMemoryTableScan = queryDf.queryExecution.executedPlan.collect {
+        val inMemoryTableScan = collect(queryDf.queryExecution.executedPlan) {
           case i: InMemoryTableScanExec => i
         }
         assert(inMemoryTableScan.size == 2)
@@ -3180,6 +3291,109 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
     }
 
   }
+
+  test("SPARK-29000: arithmetic computation overflow when don't allow decimal precision loss ") {
+    withSQLConf(SQLConf.DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key -> "false") {
+      val df1 = sql("select case when 1=2 then 1 else 100.000000000000000000000000 end * 1")
+      checkAnswer(df1, Array(Row(100)))
+      val df2 = sql("select case when 1=2 then 1 else 100.000000000000000000000000 end * " +
+        "case when 1=2 then 2 else 1 end")
+      checkAnswer(df2, Array(Row(100)))
+      val df3 = sql("select case when 1=2 then 1 else 1.000000000000000000000001 end / 10")
+      checkAnswer(df3, Array(Row(new java.math.BigDecimal("0.100000000000000000000000100"))))
+    }
+  }
+
+  test("SPARK-29239: Subquery should not cause NPE when eliminating subexpression") {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+        SQLConf.SUBQUERY_REUSE_ENABLED.key -> "false",
+        SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
+        SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) {
+      withTempView("t1", "t2") {
+        sql("create temporary view t1 as select * from values ('val1a', 10L) as t1(t1a, t1b)")
+        sql("create temporary view t2 as select * from values ('val3a', 110L) as t2(t2a, t2b)")
+        val df = sql("SELECT min, min from (SELECT (SELECT min(t2b) FROM t2) min " +
+          "FROM t1 WHERE t1a = 'val1c')")
+        assert(df.collect().size == 0)
+      }
+    }
+  }
+
+  test("SPARK-29213: FilterExec should not throw NPE") {
+    withTempView("t1", "t2", "t3") {
+      sql("SELECT ''").as[String].map(identity).toDF("x").createOrReplaceTempView("t1")
+      sql("SELECT * FROM VALUES 0, CAST(NULL AS BIGINT)")
+        .as[java.lang.Long]
+        .map(identity)
+        .toDF("x")
+        .createOrReplaceTempView("t2")
+      sql("SELECT ''").as[String].map(identity).toDF("x").createOrReplaceTempView("t3")
+      sql(
+        """
+          |SELECT t1.x
+          |FROM t1
+          |LEFT JOIN (
+          |    SELECT x FROM (
+          |        SELECT x FROM t2
+          |        UNION ALL
+          |        SELECT SUBSTR(x,5) x FROM t3
+          |    ) a
+          |    WHERE LENGTH(x)>0
+          |) t3
+          |ON t1.x=t3.x
+        """.stripMargin).collect()
+    }
+  }
+
+  test("SPARK-29682: Conflicting attributes in Expand are resolved") {
+    val numsDF = Seq(1, 2, 3).toDF("nums")
+    val cubeDF = numsDF.cube("nums").agg(max(lit(0)).as("agcol"))
+
+    checkAnswer(
+      cubeDF.join(cubeDF, "nums"),
+      Row(1, 0, 0) :: Row(2, 0, 0) :: Row(3, 0, 0) :: Nil)
+  }
+
+  test("SPARK-29860: Fix dataType mismatch issue for InSubquery") {
+    withTempView("ta", "tb", "tc", "td", "te", "tf") {
+      sql("CREATE TEMPORARY VIEW ta AS SELECT * FROM VALUES(CAST(1 AS DECIMAL(8, 0))) AS ta(id)")
+      sql("CREATE TEMPORARY VIEW tb AS SELECT * FROM VALUES(CAST(1 AS DECIMAL(7, 2))) AS tb(id)")
+      sql("CREATE TEMPORARY VIEW tc AS SELECT * FROM VALUES(CAST(1 AS DOUBLE)) AS tc(id)")
+      sql("CREATE TEMPORARY VIEW td AS SELECT * FROM VALUES(CAST(1 AS FLOAT)) AS td(id)")
+      sql("CREATE TEMPORARY VIEW te AS SELECT * FROM VALUES(CAST(1 AS BIGINT)) AS te(id)")
+      sql("CREATE TEMPORARY VIEW tf AS SELECT * FROM VALUES(CAST(1 AS DECIMAL(38, 38))) AS tf(id)")
+      val df1 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tb)")
+      checkAnswer(df1, Row(new java.math.BigDecimal(1)))
+      val df2 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tc)")
+      checkAnswer(df2, Row(new java.math.BigDecimal(1)))
+      val df3 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM td)")
+      checkAnswer(df3, Row(new java.math.BigDecimal(1)))
+      val df4 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM te)")
+      checkAnswer(df4, Row(new java.math.BigDecimal(1)))
+      val df5 = sql("SELECT id FROM ta WHERE id IN (SELECT id FROM tf)")
+      checkAnswer(df5, Array.empty[Row])
+    }
+  }
+
+  test("SPARK-30447: fix constant propagation inside NOT") {
+    withTempView("t") {
+      Seq[Integer](1, null).toDF("c").createOrReplaceTempView("t")
+      val df = sql("SELECT * FROM t WHERE NOT(c = 1 AND c + 1 = 1)")
+
+      checkAnswer(df, Row(1))
+    }
+  }
+
+  test("SPARK-26218: Fix the corner case when casting float to Integer") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      intercept[ArithmeticException](
+        sql("SELECT CAST(CAST(2147483648 as FLOAT) as Integer)").collect()
+      )
+      intercept[ArithmeticException](
+        sql("SELECT CAST(CAST(2147483648 as DOUBLE) as Integer)").collect()
+      )
+    }
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 28ca0edaef871..83285911b3948 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -19,19 +19,23 @@ package org.apache.spark.sql
 
 import java.io.File
 import java.util.{Locale, TimeZone}
+import java.util.regex.Pattern
 
+import scala.collection.mutable.{ArrayBuffer, HashMap}
 import scala.util.control.NonFatal
 
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.sql.{DescribeColumnStatement, DescribeTableStatement}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
 import org.apache.spark.sql.execution.HiveResult.hiveResultString
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.tags.ExtendedSQLTest
 
 /**
  * End-to-end test cases for SQL queries.
@@ -60,9 +64,24 @@ import org.apache.spark.sql.types.StructType
  * }}}
  *
  * The format for input files is simple:
- *  1. A list of SQL queries separated by semicolon.
+ *  1. A list of SQL queries separated by semicolons by default. If the semicolon cannot effectively
+ *     separate the SQL queries in the test file(e.g. bracketed comments), please use
+ *     --QUERY-DELIMITER-START and --QUERY-DELIMITER-END. Lines starting with
+ *     --QUERY-DELIMITER-START and --QUERY-DELIMITER-END represent the beginning and end of a query,
+ *     respectively. Code that is not surrounded by lines that begin with --QUERY-DELIMITER-START
+ *     and --QUERY-DELIMITER-END is still separated by semicolons.
  *  2. Lines starting with -- are treated as comments and ignored.
- *  3. Lines starting with --SET are used to run the file with the following set of configs.
+ *  3. Lines starting with --SET are used to specify the configs when running this testing file. You
+ *     can set multiple configs in one --SET, using comma to separate them. Or you can use multiple
+ *     --SET statements.
+ *  4. Lines starting with --IMPORT are used to load queries from another test file.
+ *  5. Lines starting with --CONFIG_DIM are used to specify config dimensions of this testing file.
+ *     The dimension name is decided by the string after --CONFIG_DIM. For example, --CONFIG_DIM1
+ *     belongs to dimension 1. One dimension can have multiple lines, each line representing one
+ *     config set (one or more configs, separated by comma). Spark will run this testing file many
+ *     times, each time picks one config set from each dimension, until all the combinations are
+ *     tried. For example, if dimension 1 has 2 lines, dimension 2 has 3 lines, this testing file
+ *     will be run 6 times (cartesian product).
  *
  * For example:
  * {{{
@@ -75,16 +94,16 @@ import org.apache.spark.sql.types.StructType
  * {{{
  *   -- some header information
  *
- *   -- !query 0
+ *   -- !query
  *   select 1, -1
- *   -- !query 0 schema
+ *   -- !query schema
  *   struct<...schema...>
- *   -- !query 0 output
+ *   -- !query output
  *   ... data row 1 ...
  *   ... data row 2 ...
  *   ...
  *
- *   -- !query 1
+ *   -- !query
  *   ...
  * }}}
  *
@@ -102,12 +121,12 @@ import org.apache.spark.sql.types.StructType
  * Therefore, UDF test cases should have single input and output files but executed by three
  * different types of UDFs. See 'udf/udf-inner-join.sql' as an example.
  */
+@ExtendedSQLTest
 class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
 
   import IntegratedUDFTestUtils._
 
   private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
-  protected val isTestWithConfigSets: Boolean = true
 
   protected val baseResourcePath = {
     // We use a path based on Spark home for 2 reasons:
@@ -131,23 +150,29 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
   private val notIncludedMsg = "[not included in comparison]"
   private val clsName = this.getClass.getCanonicalName
 
+  protected val emptySchema = StructType(Seq.empty).catalogString
+
+  protected override def sparkConf: SparkConf = super.sparkConf
+    // Fewer shuffle partitions to speed up testing.
+    .set(SQLConf.SHUFFLE_PARTITIONS, 4)
+
   /** List of test cases to ignore, in lower cases. */
   protected def blackList: Set[String] = Set(
     "blacklist.sql"   // Do NOT remove this one. It is here to test the blacklist functionality.
   )
 
   // Create all the test cases.
-  listTestCases().foreach(createScalaTestCase)
+  listTestCases.foreach(createScalaTestCase)
 
   /** A single SQL query's output. */
   protected case class QueryOutput(sql: String, schema: String, output: String) {
-    def toString(queryIndex: Int): String = {
+    override def toString: String = {
       // We are explicitly not using multi-line string due to stripMargin removing "|" in output.
-      s"-- !query $queryIndex\n" +
+      s"-- !query\n" +
         sql + "\n" +
-        s"-- !query $queryIndex schema\n" +
+        s"-- !query schema\n" +
         schema + "\n" +
-        s"-- !query $queryIndex output\n" +
+        s"-- !query output\n" +
         output
     }
   }
@@ -165,6 +190,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
    */
   protected trait PgSQLTest
 
+  /**
+   * traits that indicate ANSI-related tests with the ANSI mode enabled.
+   */
+  protected trait AnsiTest
+
   protected trait UDFTest {
     val udf: TestUDF
   }
@@ -191,6 +221,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
       resultFile: String,
       udf: TestUDF) extends TestCase with UDFTest with PgSQLTest
 
+  /** An ANSI-related test case. */
+  protected case class AnsiTestCase(
+      name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest
+
   protected def createScalaTestCase(testCase: TestCase): Unit = {
     if (blackList.exists(t =>
         testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) {
@@ -217,55 +251,94 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  // For better test coverage, runs the tests on mixed config sets: WHOLESTAGE_CODEGEN_ENABLED
-  // and CODEGEN_FACTORY_MODE.
-  private lazy val codegenConfigSets = Array(
-    ("true", "CODEGEN_ONLY"),
-    ("false", "CODEGEN_ONLY"),
-    ("false", "NO_CODEGEN")
-  ).map { case (wholeStageCodegenEnabled, codegenFactoryMode) =>
-    Array(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> wholeStageCodegenEnabled,
-      SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactoryMode)
-  }
-
   /** Run a test case. */
   protected def runTest(testCase: TestCase): Unit = {
+    def splitWithSemicolon(seq: Seq[String]) = {
+      seq.mkString("\n").split("(?<=[^\\\\]);")
+    }
     val input = fileToString(new File(testCase.inputFile))
 
-    val (comments, code) = input.split("\n").partition(_.trim.startsWith("--"))
+    val (comments, code) = input.split("\n").partition { line =>
+      val newLine = line.trim
+      newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
+    }
+
+    // If `--IMPORT` found, load code from another test case file, then insert them
+    // into the head in this test.
+    val importedTestCaseName = comments.filter(_.startsWith("--IMPORT ")).map(_.substring(9))
+    val importedCode = importedTestCaseName.flatMap { testCaseName =>
+      listTestCases.find(_.name == testCaseName).map { testCase =>
+        val input = fileToString(new File(testCase.inputFile))
+        val (_, code) = input.split("\n").partition(_.trim.startsWith("--"))
+        code
+      }
+    }.flatten
+
+    val allCode = importedCode ++ code
+    val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) {
+      // Although the loop is heavy, only used for bracketed comments test.
+      val querys = new ArrayBuffer[String]
+      val otherCodes = new ArrayBuffer[String]
+      var tempStr = ""
+      var start = false
+      for (c <- allCode) {
+        if (c.trim.startsWith("--QUERY-DELIMITER-START")) {
+          start = true
+          querys ++= splitWithSemicolon(otherCodes.toSeq)
+          otherCodes.clear()
+        } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) {
+          start = false
+          querys += s"\n${tempStr.stripSuffix(";")}"
+          tempStr = ""
+        } else if (start) {
+          tempStr += s"\n$c"
+        } else {
+          otherCodes += c
+        }
+      }
+      if (otherCodes.nonEmpty) {
+        querys ++= splitWithSemicolon(otherCodes.toSeq)
+      }
+      querys.toSeq
+    } else {
+      splitWithSemicolon(allCode).toSeq
+    }
 
     // List of SQL queries to run
-    // note: this is not a robust way to split queries using semicolon, but works for now.
-    val queries = code.mkString("\n").split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq
+    val queries = tempQueries.map(_.trim).filter(_ != "").toSeq
       // Fix misplacement when comment is at the end of the query.
       .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")
 
-    // When we are regenerating the golden files, we don't need to set any config as they
-    // all need to return the same result
-    if (regenerateGoldenFiles || !isTestWithConfigSets) {
-      runQueries(queries, testCase, None)
+    val settingLines = comments.filter(_.startsWith("--SET ")).map(_.substring(6))
+    val settings = settingLines.flatMap(_.split(",").map { kv =>
+      val (conf, value) = kv.span(_ != '=')
+      conf.trim -> value.substring(1).trim
+    })
+
+    if (regenerateGoldenFiles) {
+      runQueries(queries, testCase, settings)
     } else {
-      val configSets = {
-        val configLines = comments.filter(_.startsWith("--SET")).map(_.substring(5))
-        val configs = configLines.map(_.split(",").map { confAndValue =>
-          val (conf, value) = confAndValue.span(_ != '=')
+      // A config dimension has multiple config sets, and a config set has multiple configs.
+      // - config dim:     Seq[Seq[(String, String)]]
+      //   - config set:   Seq[(String, String)]
+      //     - config:     (String, String))
+      // We need to do cartesian product for all the config dimensions, to get a list of
+      // config sets, and run the query once for each config set.
+      val configDimLines = comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
+      val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).mapValues { lines =>
+        lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv =>
+          val (conf, value) = kv.span(_ != '=')
           conf.trim -> value.substring(1).trim
-        })
+        }.toSeq).toSeq
+      }
 
-        if (configs.nonEmpty) {
-          codegenConfigSets.flatMap { codegenConfig =>
-            configs.map { config =>
-              config ++ codegenConfig
-            }
-          }
-        } else {
-          codegenConfigSets
-        }
+      val configSets = configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) =>
+        dim.flatMap { configSet => res.map(_ ++ configSet) }
       }
 
       configSets.foreach { configSet =>
         try {
-          runQueries(queries, testCase, Some(configSet))
+          runQueries(queries, testCase, settings ++ configSet)
         } catch {
           case e: Throwable =>
             val configs = configSet.map {
@@ -281,7 +354,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
   protected def runQueries(
       queries: Seq[String],
       testCase: TestCase,
-      configSet: Option[Seq[(String, String)]]): Unit = {
+      configSet: Seq[(String, String)]): Unit = {
     // Create a local SparkSession to have stronger isolation between different test cases.
     // This does not isolate catalog changes.
     val localSparkSession = spark.newSession()
@@ -289,10 +362,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
 
     testCase match {
       case udfTestCase: UDFTest =>
-        // In Python UDF tests, the number of shuffle partitions matters considerably in
-        // the testing time because it requires to fork and communicate between external
-        // processes.
-        localSparkSession.conf.set(SQLConf.SHUFFLE_PARTITIONS.key, 4)
         registerTestUDF(udfTestCase.udf, localSparkSession)
       case _ =>
     }
@@ -304,26 +373,27 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
         localSparkSession.udf.register("boolne", (b1: Boolean, b2: Boolean) => b1 != b2)
         // vol used by boolean.sql and case.sql.
         localSparkSession.udf.register("vol", (s: String) => s)
-        // PostgreSQL enabled cartesian product by default.
-        localSparkSession.conf.set(SQLConf.CROSS_JOINS_ENABLED.key, true)
-        localSparkSession.conf.set(SQLConf.ANSI_SQL_PARSER.key, true)
-        localSparkSession.conf.set(SQLConf.PREFER_INTEGRAL_DIVISION.key, true)
+        localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, true)
+      case _: AnsiTest =>
+        localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, true)
       case _ =>
     }
+    localSparkSession.conf.set(SQLConf.DATETIME_JAVA8API_ENABLED.key, true)
 
-    if (configSet.isDefined) {
+    if (configSet.nonEmpty) {
       // Execute the list of set operation in order to add the desired configs
-      val setOperations = configSet.get.map { case (key, value) => s"set $key=$value" }
+      val setOperations = configSet.map { case (key, value) => s"set $key=$value" }
       logInfo(s"Setting configs: ${setOperations.mkString(", ")}")
       setOperations.foreach(localSparkSession.sql)
     }
+
     // Run the SQL queries preparing them for comparison.
     val outputs: Seq[QueryOutput] = queries.map { sql =>
-      val (schema, output) = getNormalizedResult(localSparkSession, sql)
+      val (schema, output) = handleExceptions(getNormalizedResult(localSparkSession, sql))
       // We might need to do some query canonicalization in the future.
       QueryOutput(
         sql = sql,
-        schema = schema.catalogString,
+        schema = schema,
         output = output.mkString("\n").replaceAll("\\s+$", ""))
     }
 
@@ -332,7 +402,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
       val goldenOutput = {
         s"-- Automatically generated by ${getClass.getSimpleName}\n" +
         s"-- Number of queries: ${outputs.size}\n\n\n" +
-        outputs.zipWithIndex.map{case (qr, i) => qr.toString(i)}.mkString("\n\n\n") + "\n"
+        outputs.zipWithIndex.map{case (qr, i) => qr.toString}.mkString("\n\n\n") + "\n"
       }
       val resultFile = new File(testCase.resultFile)
       val parent = resultFile.getParentFile
@@ -345,11 +415,25 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
     // This is a temporary workaround for SPARK-28894. The test names are truncated after
     // the last dot due to a bug in SBT. This makes easier to debug via Jenkins test result
     // report. See SPARK-28894.
-    withClue(s"${testCase.name}${System.lineSeparator()}") {
+    // See also SPARK-29127. It is difficult to see the version information in the failed test
+    // cases so the version information related to Python was also added.
+    val clue = testCase match {
+      case udfTestCase: UDFTest
+          if udfTestCase.udf.isInstanceOf[TestPythonUDF] && shouldTestPythonUDFs =>
+        s"${testCase.name}${System.lineSeparator()}Python: $pythonVer${System.lineSeparator()}"
+      case udfTestCase: UDFTest
+          if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && shouldTestScalarPandasUDFs =>
+        s"${testCase.name}${System.lineSeparator()}" +
+          s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}"
+      case _ =>
+        s"${testCase.name}${System.lineSeparator()}"
+    }
+
+    withClue(clue) {
       // Read back the golden file.
       val expectedOutputs: Seq[QueryOutput] = {
         val goldenOutput = fileToString(new File(testCase.resultFile))
-        val segments = goldenOutput.split("-- !query.+\n")
+        val segments = goldenOutput.split("-- !query.*\n")
 
         // each query has 3 segments, plus the header
         assert(segments.size == outputs.size * 3 + 1,
@@ -377,53 +461,69 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
           s"Schema did not match for query #$i\n${expected.sql}: $output") {
           output.schema
         }
-        assertResult(expected.output, s"Result did not match for query #$i\n${expected.sql}") {
-          output.output
-        }
+        assertResult(expected.output, s"Result did not match" +
+          s" for query #$i\n${expected.sql}") { output.output }
       }
     }
   }
 
+  /**
+   * This method handles exceptions occurred during query execution as they may need special care
+   * to become comparable to the expected output.
+   *
+   * @param result a function that returns a pair of schema and output
+   */
+  protected def handleExceptions(result: => (String, Seq[String])): (String, Seq[String]) = {
+    try {
+      result
+    } catch {
+      case a: AnalysisException =>
+        // Do not output the logical plan tree which contains expression IDs.
+        // Also implement a crude way of masking expression IDs in the error message
+        // with a generic pattern "###".
+        val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage
+        (emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x")))
+      case s: SparkException if s.getCause != null =>
+        // For a runtime exception, it is hard to match because its message contains
+        // information of stage, task ID, etc.
+        // To make result matching simpler, here we match the cause of the exception if it exists.
+        val cause = s.getCause
+        (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
+      case NonFatal(e) =>
+        // If there is an exception, put the exception class followed by the message.
+        (emptySchema, Seq(e.getClass.getName, e.getMessage))
+    }
+  }
+
   /** Executes a query and returns the result as (schema of the output, normalized output). */
-  private def getNormalizedResult(session: SparkSession, sql: String): (StructType, Seq[String]) = {
+  private def getNormalizedResult(session: SparkSession, sql: String): (String, Seq[String]) = {
     // Returns true if the plan is supposed to be sorted.
     def isSorted(plan: LogicalPlan): Boolean = plan match {
       case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
       case _: DescribeCommandBase
           | _: DescribeColumnCommand
-          | _: DescribeTableStatement
+          | _: DescribeRelation
           | _: DescribeColumnStatement => true
       case PhysicalOperation(_, _, Sort(_, true, _)) => true
       case _ => plan.children.iterator.exists(isSorted)
     }
 
-    try {
-      val df = session.sql(sql)
-      val schema = df.schema
-      // Get answer, but also get rid of the #1234 expression ids that show up in explain plans
-      val answer = hiveResultString(df.queryExecution.executedPlan).map(replaceNotIncludedMsg)
-
-      // If the output is not pre-sorted, sort it.
-      if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)
-
-    } catch {
-      case a: AnalysisException =>
-        // Do not output the logical plan tree which contains expression IDs.
-        // Also implement a crude way of masking expression IDs in the error message
-        // with a generic pattern "###".
-        val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage
-        (StructType(Seq.empty), Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x")))
-      case NonFatal(e) =>
-        // If there is an exception, put the exception class followed by the message.
-        (StructType(Seq.empty), Seq(e.getClass.getName, e.getMessage))
+    val df = session.sql(sql)
+    val schema = df.schema.catalogString
+    // Get answer, but also get rid of the #1234 expression ids that show up in explain plans
+    val answer = SQLExecution.withNewExecutionId(df.queryExecution, Some(sql)) {
+      hiveResultString(df.queryExecution.executedPlan).map(replaceNotIncludedMsg)
     }
+
+    // If the output is not pre-sorted, sort it.
+    if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)
   }
 
   protected def replaceNotIncludedMsg(line: String): String = {
     line.replaceAll("#\\d+", "#x")
       .replaceAll(
-        s"Location.*/sql/core/spark-warehouse/$clsName/",
-        s"Location ${notIncludedMsg}sql/core/spark-warehouse/")
+        s"Location.*$clsName/",
+        s"Location $notIncludedMsg/{warehouse_dir}/")
       .replaceAll("Created By.*", s"Created By $notIncludedMsg")
       .replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
       .replaceAll("Last Access.*", s"Last Access $notIncludedMsg")
@@ -431,14 +531,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
       .replaceAll("\\*\\(\\d+\\) ", "*") // remove the WholeStageCodegen codegenStageIds
   }
 
-  protected def listTestCases(): Seq[TestCase] = {
+  protected lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
       val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
       val absPath = file.getAbsolutePath
       val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
 
       if (file.getAbsolutePath.startsWith(
-        s"$inputFilePath${File.separator}udf${File.separator}pgSQL")) {
+        s"$inputFilePath${File.separator}udf${File.separator}postgreSQL")) {
         Seq(TestScalaUDF("udf"), TestPythonUDF("udf"), TestScalarPandasUDF("udf")).map { udf =>
           UDFPgSQLTestCase(
             s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
@@ -448,8 +548,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
           UDFTestCase(
             s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
         }
-      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}pgSQL")) {
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {
+        AnsiTestCase(testCaseName, absPath, resultFile) :: Nil
       } else {
         RegularTestCase(testCaseName, absPath, resultFile) :: Nil
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
index 9a0c61b3304c5..099b559105fe8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.util.resourceToString
  */
 class SSBQuerySuite extends BenchmarkQueryTest {
 
-  override def beforeAll {
+  override def beforeAll: Unit = {
     super.beforeAll
 
     sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 1d461a03fd1f6..31957a99e15af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -158,7 +158,7 @@ class SessionStateSuite extends SparkFunSuite {
       assert(forkedSession ne activeSession)
       assert(forkedSession.listenerManager ne activeSession.listenerManager)
       runCollectQueryOn(forkedSession)
-      activeSession.sparkContext.listenerBus.waitUntilEmpty(1000)
+      activeSession.sparkContext.listenerBus.waitUntilEmpty()
       assert(collectorA.commands.length == 1) // forked should callback to A
       assert(collectorA.commands(0) == "collect")
 
@@ -166,14 +166,14 @@ class SessionStateSuite extends SparkFunSuite {
       // => changes to forked do not affect original
       forkedSession.listenerManager.register(collectorB)
       runCollectQueryOn(activeSession)
-      activeSession.sparkContext.listenerBus.waitUntilEmpty(1000)
+      activeSession.sparkContext.listenerBus.waitUntilEmpty()
       assert(collectorB.commands.isEmpty) // original should not callback to B
       assert(collectorA.commands.length == 2) // original should still callback to A
       assert(collectorA.commands(1) == "collect")
       // <= changes to original do not affect forked
       activeSession.listenerManager.register(collectorC)
       runCollectQueryOn(forkedSession)
-      activeSession.sparkContext.listenerBus.waitUntilEmpty(1000)
+      activeSession.sparkContext.listenerBus.waitUntilEmpty()
       assert(collectorC.commands.isEmpty) // forked should not callback to C
       assert(collectorA.commands.length == 3) // forked should still callback to A
       assert(collectorB.commands.length == 1) // forked should still callback to B
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
index 42307b1b9734e..b3b94f8be0d17 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
@@ -148,20 +148,6 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  test("view") {
-    withView("v1") {
-      sql("CREATE VIEW v1 AS SELECT 1 AS a")
-      checkCreateView("v1")
-    }
-  }
-
-  test("view with output columns") {
-    withView("v1") {
-      sql("CREATE VIEW v1 (b) AS SELECT 1 AS a")
-      checkCreateView("v1")
-    }
-  }
-
   test("temp view") {
     val viewName = "spark_28383"
     withTempView(viewName) {
@@ -186,17 +172,22 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
     withTable("t1") {
       val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>)"
       sql(s"$createTable USING json")
-      val shownDDL = sql(s"SHOW CREATE TABLE t1")
-        .head()
-        .getString(0)
-        .split("\n")
-        .head
+      val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
       assert(shownDDL == createTable)
 
       checkCreateTable("t1")
     }
   }
 
+  protected def getShowDDL(showCreateTableSql: String): String = {
+    val result = sql(showCreateTableSql)
+      .head()
+      .getString(0)
+      .split("\n")
+      .map(_.trim)
+    if (result.length > 1) result(0) + result(1) else result.head
+  }
+
   protected def checkCreateTable(table: String): Unit = {
     checkCreateTableOrView(TableIdentifier(table, Some("default")), "TABLE")
   }
@@ -220,7 +211,7 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  private def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = {
+  protected def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = {
     def normalize(table: CatalogTable): CatalogTable = {
       val nondeterministicProps = Set(
         "CreateTime",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 74341f93dd5ba..99ea95089d71c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -16,16 +16,20 @@
  */
 package org.apache.spark.sql
 
+import java.util.Locale
+
 import org.apache.spark.{SparkFunSuite, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, UnresolvedHint}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.COLUMN_BATCH_SIZE
 import org.apache.spark.sql.internal.StaticSQLConf.SPARK_SESSION_EXTENSIONS
 import org.apache.spark.sql.types.{DataType, Decimal, IntegerType, LongType, Metadata, StructType}
 import org.apache.spark.sql.vectorized.{ColumnarArray, ColumnarBatch, ColumnarMap, ColumnVector}
@@ -122,12 +126,33 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
     }
   }
 
+  case class MyHintRule(spark: SparkSession) extends Rule[LogicalPlan] {
+    val MY_HINT_NAME = Set("CONVERT_TO_EMPTY")
+
+    override def apply(plan: LogicalPlan): LogicalPlan =
+      plan.resolveOperators {
+      case h: UnresolvedHint if MY_HINT_NAME.contains(h.name.toUpperCase(Locale.ROOT)) =>
+        LocalRelation(h.output, data = Seq.empty, isStreaming = h.isStreaming)
+    }
+  }
+
+  test("inject custom hint rule") {
+    withSession(Seq(_.injectPostHocResolutionRule(MyHintRule))) { session =>
+      assert(
+        session.range(1).hint("CONVERT_TO_EMPTY").logicalPlan.isInstanceOf[LocalRelation],
+        "plan is expected to be a local relation"
+      )
+    }
+  }
+
   test("inject columnar") {
     val extensions = create { extensions =>
       extensions.injectColumnar(session =>
         MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))
     }
     withSession(extensions) { session =>
+      // The ApplyColumnarRulesAndInsertTransitions rule is not applied when enable AQE
+      session.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, false)
       assert(session.sessionState.columnarRules.contains(
         MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
       import session.sqlContext.implicits._
@@ -150,6 +175,30 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
     }
   }
 
+  test("reset column vectors") {
+    val session = SparkSession.builder()
+      .master("local[1]")
+      .config(COLUMN_BATCH_SIZE.key, 2)
+      .withExtensions { extensions =>
+        extensions.injectColumnar(session =>
+          MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) }
+      .getOrCreate()
+
+    try {
+      assert(session.sessionState.columnarRules.contains(
+        MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
+      import session.sqlContext.implicits._
+
+      val input = Seq((100L), (200L), (300L))
+      val data = input.toDF("vals").repartition(1)
+      val df = data.selectExpr("vals + 1")
+      val result = df.collect()
+      assert(result sameElements input.map(x => Row(x + 2)))
+    } finally {
+      stop(session)
+    }
+  }
+
   test("use custom class for extensions") {
     val session = SparkSession.builder()
       .master("local[1]")
@@ -283,7 +332,20 @@ case class MyParser(spark: SparkSession, delegate: ParserInterface) extends Pars
 object MyExtensions {
 
   val myFunction = (FunctionIdentifier("myFunction"),
-    new ExpressionInfo("noClass", "myDb", "myFunction", "usage", "extended usage"),
+    new ExpressionInfo(
+      "noClass",
+      "myDb",
+      "myFunction",
+      "usage",
+      "extended usage",
+      "    Examples:",
+      """
+       note
+      """,
+      "3.0.0",
+      """
+       deprecated
+      """),
     (_: Seq[Expression]) => Literal(5, IntegerType))
 }
 
@@ -680,7 +742,20 @@ case class MySparkStrategy2(spark: SparkSession) extends SparkStrategy {
 object MyExtensions2 {
 
   val myFunction = (FunctionIdentifier("myFunction2"),
-    new ExpressionInfo("noClass", "myDb", "myFunction2", "usage", "extended usage"),
+    new ExpressionInfo(
+      "noClass",
+      "myDb",
+      "myFunction2",
+      "usage",
+      "extended usage",
+      "    Examples:",
+      """
+       note
+      """,
+      "3.0.0",
+      """
+       deprecated
+      """),
     (_: Seq[Expression]) => Literal(5, IntegerType))
 }
 
@@ -699,7 +774,20 @@ class MyExtensions2 extends (SparkSessionExtensions => Unit) {
 object MyExtensions2Duplicate {
 
   val myFunction = (FunctionIdentifier("myFunction2"),
-    new ExpressionInfo("noClass", "myDb", "myFunction2", "usage", "extended usage"),
+    new ExpressionInfo(
+      "noClass",
+      "myDb",
+      "myFunction2",
+      "usage",
+      "extended usage",
+      "    Examples:",
+      """
+       note
+      """,
+      "3.0.0",
+      """
+       deprecated
+      """),
     (_: Seq[Expression]) => Literal(5, IntegerType))
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
index 915f66526c3e6..fde8ddf491bd1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
@@ -27,6 +27,7 @@ import scala.util.Random
 
 import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, CatalogTable, HiveTableRelation}
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Histogram, HistogramBin, HistogramSerializer, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
@@ -238,10 +239,14 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
     getTableFromCatalogCache(tableName) != null
   }
 
-  def getCatalogStatistics(tableName: String): CatalogStatistics = {
+  def getTableStats(tableName: String): CatalogStatistics = {
     getCatalogTable(tableName).stats.get
   }
 
+  def getPartitionStats(tableName: String, partSpec: TablePartitionSpec): CatalogStatistics = {
+    spark.sessionState.catalog.getPartition(TableIdentifier(tableName), partSpec).stats.get
+  }
+
   def checkTableStats(
       tableName: String,
       hasSizeInBytes: Boolean,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 88b3e5ec61f8a..ec698818a0d85 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -129,18 +129,37 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       Row("AQIDBA==", bytes))
   }
 
-  test("overlay function") {
+  test("string overlay function") {
     // scalastyle:off
     // non ascii characters are not allowed in the code, so we disable the scalastyle here.
-    val df = Seq(("Spark SQL", "Spark的SQL")).toDF("a", "b")
-    checkAnswer(df.select(overlay($"a", "_", 6)), Row("Spark_SQL"))
-    checkAnswer(df.select(overlay($"a", "CORE", 7)), Row("Spark CORE"))
-    checkAnswer(df.select(overlay($"a", "ANSI ", 7, 0)), Row("Spark ANSI SQL"))
-    checkAnswer(df.select(overlay($"a", "tructured", 2, 4)), Row("Structured SQL"))
-    checkAnswer(df.select(overlay($"b", "_", 6)), Row("Spark_SQL"))
+    val df = Seq(("Spark SQL", "Spark的SQL", "_", "CORE", "ANSI ", "tructured", 6, 7, 0, 2, 4)).
+      toDF("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k")
+    checkAnswer(df.select(overlay($"a", $"c", $"g")), Row("Spark_SQL"))
+    checkAnswer(df.select(overlay($"a", $"d", $"h")), Row("Spark CORE"))
+    checkAnswer(df.select(overlay($"a", $"e", $"h", $"i")), Row("Spark ANSI SQL"))
+    checkAnswer(df.select(overlay($"a", $"f", $"j", $"k")), Row("Structured SQL"))
+    checkAnswer(df.select(overlay($"b", $"c", $"g")), Row("Spark_SQL"))
     // scalastyle:on
   }
 
+  test("binary overlay function") {
+    // non ascii characters are not allowed in the code, so we disable the scalastyle here.
+    val df = Seq((
+      Array[Byte](1, 2, 3, 4, 5, 6, 7, 8, 9),
+      Array[Byte](-1),
+      Array[Byte](-1, -1, -1, -1),
+      Array[Byte](-1, -1),
+      Array[Byte](-1, -1, -1, -1, -1),
+      6, 7, 0, 2, 4)).toDF("a", "b", "c", "d", "e", "f", "g", "h", "i", "j")
+    checkAnswer(df.select(overlay($"a", $"b", $"f")), Row(Array[Byte](1, 2, 3, 4, 5, -1, 7, 8, 9)))
+    checkAnswer(df.select(overlay($"a", $"c", $"g")),
+      Row(Array[Byte](1, 2, 3, 4, 5, 6, -1, -1, -1, -1)))
+    checkAnswer(df.select(overlay($"a", $"d", $"g", $"h")),
+      Row(Array[Byte](1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9)))
+    checkAnswer(df.select(overlay($"a", $"e", $"i", $"j")),
+      Row(Array[Byte](1, -1, -1, -1, -1, -1, 6, 7, 8, 9)))
+  }
+
   test("string / binary substring function") {
     // scalastyle:off
     // non ascii characters are not allowed in the code, so we disable the scalastyle here.
@@ -266,7 +285,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("string parse_url function") {
 
-    def testUrl(url: String, expected: Row) {
+    def testUrl(url: String, expected: Row): Unit = {
       checkAnswer(Seq[String]((url)).toDF("url").selectExpr(
         "parse_url(url, 'HOST')", "parse_url(url, 'PATH')",
         "parse_url(url, 'QUERY')", "parse_url(url, 'REF')",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index a1d7792941ed9..ff8f94c68c5ee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -22,11 +22,12 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort}
 import org.apache.spark.sql.execution.{ColumnarToRowExec, ExecSubqueryExpression, FileSourceScanExec, InputAdapter, ReusedSubqueryExec, ScalarSubquery, SubqueryExec, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.FileScanRDD
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-class SubquerySuite extends QueryTest with SharedSparkSession {
+class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   setupTestData()
@@ -891,9 +892,9 @@ class SubquerySuite extends QueryTest with SharedSparkSession {
 
         val sqlText =
           """
-            |SELECT * FROM t1
+            |SELECT * FROM t1 a
             |WHERE
-            |NOT EXISTS (SELECT * FROM t1)
+            |NOT EXISTS (SELECT * FROM t1 b WHERE a.i = b.i)
           """.stripMargin
         val optimizedPlan = sql(sqlText).queryExecution.optimizedPlan
         val join = optimizedPlan.collectFirst { case j: Join => j }.get
@@ -1080,9 +1081,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession {
            |                    HAVING max(c2) > 0
            |                    ORDER  BY c1)
         """.stripMargin
-      // The rule to remove redundant sorts is not able to remove the inner sort under
-      // an Aggregate operator. We only remove the top level sort.
-      assert(getNumSortsInQuery(query6) == 1)
+
+      assert(getNumSortsInQuery(query6) == 0)
 
       // Cases when sort is not removed from the plan
       // Limit on top of sort
@@ -1272,12 +1272,29 @@ class SubquerySuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("Cannot remove sort for floating-point order-sensitive aggregates from subquery") {
+    Seq("float", "double").foreach { typeName =>
+      Seq("SUM", "AVG", "KURTOSIS", "SKEWNESS", "STDDEV_POP", "STDDEV_SAMP",
+          "VAR_POP", "VAR_SAMP").foreach { aggName =>
+        val query =
+          s"""
+            |SELECT k, $aggName(v) FROM (
+            |  SELECT k, v
+            |  FROM VALUES (1, $typeName(2.0)), (2, $typeName(1.0)) t(k, v)
+            |  ORDER BY v)
+            |GROUP BY k
+          """.stripMargin
+        assert(getNumSortsInQuery(query) == 1)
+      }
+    }
+  }
+
   test("SPARK-25482: Forbid pushdown to datasources of filters containing subqueries") {
     withTempView("t1", "t2") {
       sql("create temporary view t1(a int) using parquet")
       sql("create temporary view t2(b int) using parquet")
       val plan = sql("select * from t2 where b > (select max(a) from t1)")
-      val subqueries = plan.queryExecution.executedPlan.collect {
+      val subqueries = stripAQEPlan(plan.queryExecution.executedPlan).collect {
         case p => p.subqueries
       }.flatten
       assert(subqueries.length == 1)
@@ -1292,7 +1309,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession {
       val df = sql("SELECT * FROM a WHERE p <= (SELECT MIN(id) FROM b)")
       checkAnswer(df, Seq(Row(0, 0), Row(2, 0)))
       // need to execute the query before we can examine fs.inputRDDs()
-      assert(df.queryExecution.executedPlan match {
+      assert(stripAQEPlan(df.queryExecution.executedPlan) match {
         case WholeStageCodegenExec(ColumnarToRowExec(InputAdapter(
             fs @ FileSourceScanExec(_, _, _, partitionFilters, _, _, _)))) =>
           partitionFilters.exists(ExecSubqueryExpression.hasSubquery) &&
@@ -1342,7 +1359,9 @@ class SubquerySuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-27279: Reuse Subquery") {
     Seq(true, false).foreach { reuse =>
-      withSQLConf(SQLConf.SUBQUERY_REUSE_ENABLED.key -> reuse.toString) {
+      withSQLConf(SQLConf.SUBQUERY_REUSE_ENABLED.key -> reuse.toString,
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        // when enable AQE, the reusedExchange is inserted when executed.
         val df = sql(
           """
             |SELECT (SELECT avg(key) FROM testData) + (SELECT avg(key) FROM testData)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index a668434a68aff..aacb625d7921f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.internal.SQLConf
  */
 class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema {
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     for (tableName <- tableNames) {
       createTable(spark, tableName)
@@ -82,13 +82,19 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema {
     "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59",
     "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max")
 
+  // List up the known queries having too large code in a generated function.
+  // A JIRA file for `modified-q3` is as follows;
+  // [SPARK-29128] Split predicate code in OR expressions
+  val blackListForMethodCodeSizeCheck = Set("modified-q3")
+
   modifiedTPCDSQueries.foreach { name =>
     val queryString = resourceToString(s"tpcds-modifiedQueries/$name.sql",
       classLoader = Thread.currentThread().getContextClassLoader)
-    test(s"modified-$name") {
+    val testName = s"modified-$name"
+    test(testName) {
       // check the plans can be properly generated
       val plan = sql(queryString).queryExecution.executedPlan
-      checkGeneratedCode(plan)
+      checkGeneratedCode(plan, !blackListForMethodCodeSizeCheck.contains(testName))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
index b32d95d0b286c..ba99e18714b1d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.util.resourceToString
  */
 class TPCHQuerySuite extends BenchmarkQueryTest {
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
 
     sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 2a034bcdc3f00..cc3995516dcc2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -22,7 +22,7 @@ import java.math.BigDecimal
 import org.apache.spark.sql.api.java._
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.plans.logical.Project
-import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.execution.{QueryExecution, SimpleMode}
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, ExplainCommand}
 import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
@@ -309,7 +309,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-19338 Provide identical names for UDFs in the EXPLAIN output") {
     def explainStr(df: DataFrame): String = {
-      val explain = ExplainCommand(df.queryExecution.logical, extended = false)
+      val explain = ExplainCommand(df.queryExecution.logical, SimpleMode)
       val sparkPlan = spark.sessionState.executePlan(explain).executedPlan
       sparkPlan.executeCollect().map(_.getString(0).trim).headOption.getOrElse("")
     }
@@ -360,13 +360,13 @@ class UDFSuite extends QueryTest with SharedSparkSession {
           .withColumn("b", udf1($"a", lit(10)))
         df.cache()
         df.write.saveAsTable("t")
-        sparkContext.listenerBus.waitUntilEmpty(1000)
+        sparkContext.listenerBus.waitUntilEmpty()
         assert(numTotalCachedHit == 1, "expected to be cached in saveAsTable")
         df.write.insertInto("t")
-        sparkContext.listenerBus.waitUntilEmpty(1000)
+        sparkContext.listenerBus.waitUntilEmpty()
         assert(numTotalCachedHit == 2, "expected to be cached in insertInto")
         df.write.save(path.getCanonicalPath)
-        sparkContext.listenerBus.waitUntilEmpty(1000)
+        sparkContext.listenerBus.waitUntilEmpty()
         assert(numTotalCachedHit == 3, "expected to be cached in save for native")
       }
     }
@@ -443,12 +443,12 @@ class UDFSuite extends QueryTest with SharedSparkSession {
   test("SPARK-25044 Verify null input handling for primitive types - with udf(Any, DataType)") {
     val f = udf((x: Int) => x, IntegerType)
     checkAnswer(
-      Seq(new Integer(1), null).toDF("x").select(f($"x")),
+      Seq(Integer.valueOf(1), null).toDF("x").select(f($"x")),
       Row(1) :: Row(0) :: Nil)
 
     val f2 = udf((x: Double) => x, DoubleType)
     checkAnswer(
-      Seq(new java.lang.Double(1.1), null).toDF("x").select(f2($"x")),
+      Seq(java.lang.Double.valueOf(1.1), null).toDF("x").select(f2($"x")),
       Row(1.1) :: Row(0.0) :: Nil)
 
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index 2b2fedd3ca218..ffc2018d2132d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import java.util.Arrays
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{Cast, ExpressionEvalHelper, GenericInternalRow, Literal}
@@ -277,4 +279,12 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
     val udt = new TestUDT.MyDenseVectorUDT()
     assert(!Cast.canUpCast(udt, StringType))
   }
+
+  test("typeof user defined type") {
+    val schema = new StructType().add("a", new TestUDT.MyDenseVectorUDT())
+    val data = Arrays.asList(
+      RowFactory.create(new TestUDT.MyDenseVector(Array(1.0, 3.0, 5.0, 7.0, 9.0))))
+    checkAnswer(spark.createDataFrame(data, schema).selectExpr("typeof(a)"),
+      Seq(Row("array<double>")))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/AlterTableTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
similarity index 74%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/AlterTableTests.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
index 4b7ee384b4c10..96fe301b512ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/AlterTableTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
@@ -15,12 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
@@ -49,7 +51,7 @@ trait AlterTableTests extends SharedSparkSession {
       }
 
       assert(exc.getMessage.contains(s"${catalogAndNamespace}table_name"))
-      assert(exc.getMessage.contains("Table or view not found"))
+      assert(exc.getMessage.contains("Table not found"))
     }
   }
 
@@ -85,6 +87,21 @@ trait AlterTableTests extends SharedSparkSession {
     }
   }
 
+  test("AlterTable: add column with NOT NULL") {
+    val t = s"${catalogAndNamespace}table_name"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id int) USING $v2Format")
+      sql(s"ALTER TABLE $t ADD COLUMN data string NOT NULL")
+
+      val table = getTableMetadata(t)
+
+      assert(table.name === fullTableName(t))
+      assert(table.schema === StructType(Seq(
+        StructField("id", IntegerType),
+        StructField("data", StringType, nullable = false))))
+    }
+  }
+
   test("AlterTable: add column with comment") {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
@@ -100,6 +117,62 @@ trait AlterTableTests extends SharedSparkSession {
     }
   }
 
+  test("AlterTable: add column with interval type") {
+    val t = s"${catalogAndNamespace}table_name"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id int, point struct<x: double, y: double>) USING $v2Format")
+      val e1 =
+        intercept[AnalysisException](sql(s"ALTER TABLE $t ADD COLUMN data interval"))
+      assert(e1.getMessage.contains("Cannot use interval type in the table schema."))
+      val e2 =
+        intercept[AnalysisException](sql(s"ALTER TABLE $t ADD COLUMN point.z interval"))
+      assert(e2.getMessage.contains("Cannot use interval type in the table schema."))
+    }
+  }
+
+  test("AlterTable: add column with position") {
+    val t = s"${catalogAndNamespace}table_name"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (point struct<x: int>) USING $v2Format")
+
+      sql(s"ALTER TABLE $t ADD COLUMN a string FIRST")
+      assert(getTableMetadata(t).schema == new StructType()
+        .add("a", StringType)
+        .add("point", new StructType().add("x", IntegerType)))
+
+      sql(s"ALTER TABLE $t ADD COLUMN b string AFTER point")
+      assert(getTableMetadata(t).schema == new StructType()
+        .add("a", StringType)
+        .add("point", new StructType().add("x", IntegerType))
+        .add("b", StringType))
+
+      val e1 = intercept[AnalysisException](
+        sql(s"ALTER TABLE $t ADD COLUMN c string AFTER non_exist"))
+      assert(e1.getMessage().contains("Couldn't find the reference column"))
+
+      sql(s"ALTER TABLE $t ADD COLUMN point.y int FIRST")
+      assert(getTableMetadata(t).schema == new StructType()
+        .add("a", StringType)
+        .add("point", new StructType()
+          .add("y", IntegerType)
+          .add("x", IntegerType))
+        .add("b", StringType))
+
+      sql(s"ALTER TABLE $t ADD COLUMN point.z int AFTER x")
+      assert(getTableMetadata(t).schema == new StructType()
+        .add("a", StringType)
+        .add("point", new StructType()
+          .add("y", IntegerType)
+          .add("x", IntegerType)
+          .add("z", IntegerType))
+        .add("b", StringType))
+
+      val e2 = intercept[AnalysisException](
+        sql(s"ALTER TABLE $t ADD COLUMN point.x2 int AFTER non_exist"))
+      assert(e2.getMessage().contains("Couldn't find the reference column"))
+    }
+  }
+
   test("AlterTable: add multiple columns") {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
@@ -239,6 +312,30 @@ trait AlterTableTests extends SharedSparkSession {
     }
   }
 
+  test("AlterTable: add column - new column should not exist") {
+    val t = s"${catalogAndNamespace}table_name"
+    withTable(t) {
+      sql(
+        s"""CREATE TABLE $t (
+           |id int,
+           |point struct<x: double, y: double>,
+           |arr array<struct<x: double, y: double>>,
+           |mk map<struct<x: double, y: double>, string>,
+           |mv map<string, struct<x: double, y: double>>
+           |)
+           |USING $v2Format""".stripMargin)
+
+      Seq("id", "point.x", "arr.element.x", "mk.key.x", "mv.value.x").foreach { field =>
+
+        val e = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t ADD COLUMNS $field double")
+        }
+        assert(e.getMessage.contains("add"))
+        assert(e.getMessage.contains(s"$field already exists"))
+      }
+    }
+  }
+
   test("AlterTable: update column type int -> long") {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
@@ -246,12 +343,42 @@ trait AlterTableTests extends SharedSparkSession {
       sql(s"ALTER TABLE $t ALTER COLUMN id TYPE bigint")
 
       val table = getTableMetadata(t)
-
       assert(table.name === fullTableName(t))
       assert(table.schema === new StructType().add("id", LongType))
     }
   }
 
+  test("AlterTable: update column type to interval") {
+    val t = s"${catalogAndNamespace}table_name"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id int) USING $v2Format")
+      val e = intercept[AnalysisException](sql(s"ALTER TABLE $t ALTER COLUMN id TYPE interval"))
+      assert(e.getMessage.contains("id to interval type"))
+    }
+  }
+
+  test("AlterTable: SET/DROP NOT NULL") {
+    val t = s"${catalogAndNamespace}table_name"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint NOT NULL) USING $v2Format")
+      sql(s"ALTER TABLE $t ALTER COLUMN id SET NOT NULL")
+
+      val table = getTableMetadata(t)
+      assert(table.name === fullTableName(t))
+      assert(table.schema === new StructType().add("id", LongType, nullable = false))
+
+      sql(s"ALTER TABLE $t ALTER COLUMN id DROP NOT NULL")
+      val table2 = getTableMetadata(t)
+      assert(table2.name === fullTableName(t))
+      assert(table2.schema === new StructType().add("id", LongType))
+
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t ALTER COLUMN id SET NOT NULL")
+      }
+      assert(e.message.contains("Cannot change nullable column to non-nullable"))
+    }
+  }
+
   test("AlterTable: update nested type float -> double") {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
@@ -259,7 +386,6 @@ trait AlterTableTests extends SharedSparkSession {
       sql(s"ALTER TABLE $t ALTER COLUMN point.x TYPE double")
 
       val table = getTableMetadata(t)
-
       assert(table.name === fullTableName(t))
       assert(table.schema === new StructType()
         .add("id", IntegerType)
@@ -279,7 +405,7 @@ trait AlterTableTests extends SharedSparkSession {
       }
 
       assert(exc.getMessage.contains("point"))
-      assert(exc.getMessage.contains("update a struct by adding, deleting, or updating its fields"))
+      assert(exc.getMessage.contains("update a struct by updating its fields"))
 
       val table = getTableMetadata(t)
 
@@ -470,16 +596,58 @@ trait AlterTableTests extends SharedSparkSession {
     }
   }
 
-  test("AlterTable: update column type and comment") {
+  test("AlterTable: update column position") {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
-      sql(s"CREATE TABLE $t (id int) USING $v2Format")
-      sql(s"ALTER TABLE $t ALTER COLUMN id TYPE bigint COMMENT 'doc'")
-
-      val table = getTableMetadata(t)
-
-      assert(table.name === fullTableName(t))
-      assert(table.schema === StructType(Seq(StructField("id", LongType).withComment("doc"))))
+      sql(s"CREATE TABLE $t (a int, b int, point struct<x: int, y: int, z: int>) USING $v2Format")
+
+      sql(s"ALTER TABLE $t ALTER COLUMN b FIRST")
+      assert(getTableMetadata(t).schema == new StructType()
+        .add("b", IntegerType)
+        .add("a", IntegerType)
+        .add("point", new StructType()
+          .add("x", IntegerType)
+          .add("y", IntegerType)
+          .add("z", IntegerType)))
+
+      sql(s"ALTER TABLE $t ALTER COLUMN b AFTER point")
+      assert(getTableMetadata(t).schema == new StructType()
+        .add("a", IntegerType)
+        .add("point", new StructType()
+          .add("x", IntegerType)
+          .add("y", IntegerType)
+          .add("z", IntegerType))
+        .add("b", IntegerType))
+
+      val e1 = intercept[AnalysisException](
+        sql(s"ALTER TABLE $t ALTER COLUMN b AFTER non_exist"))
+      assert(e1.getMessage.contains("Couldn't resolve positional argument"))
+
+      sql(s"ALTER TABLE $t ALTER COLUMN point.y FIRST")
+      assert(getTableMetadata(t).schema == new StructType()
+        .add("a", IntegerType)
+        .add("point", new StructType()
+          .add("y", IntegerType)
+          .add("x", IntegerType)
+          .add("z", IntegerType))
+        .add("b", IntegerType))
+
+      sql(s"ALTER TABLE $t ALTER COLUMN point.y AFTER z")
+      assert(getTableMetadata(t).schema == new StructType()
+        .add("a", IntegerType)
+        .add("point", new StructType()
+          .add("x", IntegerType)
+          .add("z", IntegerType)
+          .add("y", IntegerType))
+        .add("b", IntegerType))
+
+      val e2 = intercept[AnalysisException](
+        sql(s"ALTER TABLE $t ALTER COLUMN point.y AFTER non_exist"))
+      assert(e2.getMessage.contains("Couldn't resolve positional argument"))
+
+      // `AlterTable.resolved` checks column existence.
+      intercept[AnalysisException](
+        sql(s"ALTER TABLE $t ALTER COLUMN a.y AFTER x"))
     }
   }
 
@@ -692,6 +860,37 @@ trait AlterTableTests extends SharedSparkSession {
     }
   }
 
+  test("AlterTable: rename column - new name should not exist") {
+    val t = s"${catalogAndNamespace}table_name"
+    withTable(t) {
+      sql(
+        s"""CREATE TABLE $t (
+           |id int,
+           |user_id int,
+           |point struct<x: double, y: double>,
+           |arr array<struct<x: double, y: double>>,
+           |mk map<struct<x: double, y: double>, string>,
+           |mv map<string, struct<x: double, y: double>>
+           |)
+           |USING $v2Format""".stripMargin)
+
+      Seq(
+        "id" -> "user_id",
+        "point.x" -> "y",
+        "arr.element.x" -> "y",
+        "mk.key.x" -> "y",
+        "mv.value.x" -> "y").foreach { case (field, newName) =>
+
+        val e = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t RENAME COLUMN $field TO $newName")
+        }
+        assert(e.getMessage.contains("rename"))
+        assert(e.getMessage.contains((field.split("\\.").init :+ newName).mkString(".")))
+        assert(e.getMessage.contains("already exists"))
+      }
+    }
+  }
+
   test("AlterTable: drop column") {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
@@ -811,7 +1010,20 @@ trait AlterTableTests extends SharedSparkSession {
 
       assert(table.name === fullTableName(t))
       assert(table.properties ===
-        Map("provider" -> v2Format, "location" -> "s3://bucket/path").asJava)
+        withDefaultOwnership(Map("provider" -> v2Format, "location" -> "s3://bucket/path")).asJava)
+    }
+  }
+
+  test("AlterTable: set partition location") {
+    val t = s"${catalogAndNamespace}table_name"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id int) USING $v2Format")
+
+      val exc = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t PARTITION(ds='2017-06-10') SET LOCATION 's3://bucket/path'")
+      }
+      assert(exc.getMessage.contains(
+        "ALTER TABLE SET LOCATION does not support partition for v2 tables"))
     }
   }
 
@@ -824,7 +1036,8 @@ trait AlterTableTests extends SharedSparkSession {
       val table = getTableMetadata(t)
 
       assert(table.name === fullTableName(t))
-      assert(table.properties === Map("provider" -> v2Format, "test" -> "34").asJava)
+      assert(table.properties ===
+        withDefaultOwnership(Map("provider" -> v2Format, "test" -> "34")).asJava)
     }
   }
 
@@ -836,15 +1049,30 @@ trait AlterTableTests extends SharedSparkSession {
       val table = getTableMetadata(t)
 
       assert(table.name === fullTableName(t))
-      assert(table.properties === Map("provider" -> v2Format, "test" -> "34").asJava)
+      assert(table.properties ===
+        withDefaultOwnership(Map("provider" -> v2Format, "test" -> "34")).asJava)
 
       sql(s"ALTER TABLE $t UNSET TBLPROPERTIES ('test')")
 
       val updated = getTableMetadata(t)
 
       assert(updated.name === fullTableName(t))
-      assert(updated.properties === Map("provider" -> v2Format).asJava)
+      assert(updated.properties === withDefaultOwnership(Map("provider" -> v2Format)).asJava)
     }
   }
 
+  test("AlterTable: replace columns") {
+    val t = s"${catalogAndNamespace}table_name"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (col1 int, col2 int COMMENT 'c2') USING $v2Format")
+      sql(s"ALTER TABLE $t REPLACE COLUMNS (col2 string, col3 int COMMENT 'c3')")
+
+      val table = getTableMetadata(t)
+
+      assert(table.name === fullTableName(t))
+      assert(table.schema === StructType(Seq(
+        StructField("col2", StringType),
+        StructField("col3", IntegerType).withComment("c3"))))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
similarity index 85%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSessionCatalogSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index fee6962501637..01caf8e2eb115 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -15,22 +15,19 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.util
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, SaveMode}
-import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, TableCatalog, TableChange}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util
+import org.apache.spark.sql.{DataFrame, QueryTest, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.connector.InMemoryTable
-import org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog
-import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode, V2_SESSION_CATALOG}
-import org.apache.spark.sql.sources.v2.utils.TestV2SessionCatalogBase
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -39,8 +36,6 @@ class DataSourceV2DataFrameSessionCatalogSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false)
   with SessionCatalogTest[InMemoryTable, InMemoryTableSessionCatalog] {
 
-  import testImplicits._
-
   override protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = {
     val dfw = insert.write.format(v2Format)
     if (mode != null) {
@@ -89,7 +84,7 @@ class DataSourceV2DataFrameSessionCatalogSuite
     val t1 = "prop_table"
     withTable(t1) {
       spark.range(20).write.format(v2Format).option("path", "abc").saveAsTable(t1)
-      val cat = spark.sessionState.catalogManager.v2SessionCatalog.get.asInstanceOf[TableCatalog]
+      val cat = spark.sessionState.catalogManager.currentCatalog.asInstanceOf[TableCatalog]
       val tableInfo = cat.loadTable(Identifier.of(Array.empty, t1))
       assert(tableInfo.properties().get("location") === "abc")
       assert(tableInfo.properties().get("provider") === v2Format)
@@ -97,12 +92,6 @@ class DataSourceV2DataFrameSessionCatalogSuite
   }
 }
 
-class InMemoryTableProvider extends TableProvider {
-  override def getTable(options: CaseInsensitiveStringMap): Table = {
-    throw new UnsupportedOperationException("D'oh!")
-  }
-}
-
 class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable] {
   override def newTable(
       name: String,
@@ -112,6 +101,13 @@ class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable
     new InMemoryTable(name, schema, partitions, properties)
   }
 
+  override def loadTable(ident: Identifier): Table = {
+    val identToUse = Option(InMemoryTableSessionCatalog.customIdentifierResolution)
+      .map(_(ident))
+      .getOrElse(ident)
+    super.loadTable(identToUse)
+  }
+
   override def alterTable(ident: Identifier, changes: TableChange*): Table = {
     val fullIdent = fullIdentifier(ident)
     Option(tables.get(fullIdent)) match {
@@ -136,7 +132,22 @@ class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable
   }
 }
 
-private[v2] trait SessionCatalogTest[T <: Table, Catalog <: TestV2SessionCatalogBase[T]]
+object InMemoryTableSessionCatalog {
+  private var customIdentifierResolution: Identifier => Identifier = _
+
+  def withCustomIdentifierResolver(
+      resolver: Identifier => Identifier)(
+      f: => Unit): Unit = {
+    try {
+      customIdentifierResolution = resolver
+      f
+    } finally {
+      customIdentifierResolution = null
+    }
+  }
+}
+
+private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2SessionCatalogBase[T]]
   extends QueryTest
   with SharedSparkSession
   with BeforeAndAfter {
@@ -145,18 +156,18 @@ private[v2] trait SessionCatalogTest[T <: Table, Catalog <: TestV2SessionCatalog
     spark.sessionState.catalogManager.catalog(name)
   }
 
-  protected val v2Format: String = classOf[InMemoryTableProvider].getName
+  protected val v2Format: String = classOf[FakeV2Provider].getName
 
   protected val catalogClassName: String = classOf[InMemoryTableSessionCatalog].getName
 
   before {
-    spark.conf.set(V2_SESSION_CATALOG.key, catalogClassName)
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION.key, catalogClassName)
   }
 
   override def afterEach(): Unit = {
     super.afterEach()
-    catalog("session").asInstanceOf[Catalog].clearTables()
-    spark.conf.set(V2_SESSION_CATALOG.key, classOf[V2SessionCatalog].getName)
+    catalog(SESSION_CATALOG_NAME).asInstanceOf[Catalog].clearTables()
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
   }
 
   protected def verifyTable(tableName: String, expected: DataFrame): Unit
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
similarity index 58%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
index abccb5cec6752..0a6897b829994 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
@@ -15,13 +15,21 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
-import org.apache.spark.sql.{DataFrame, Row, SaveMode}
-import org.apache.spark.sql.connector.InMemoryTableCatalog
+import java.util.Collections
+
+import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode}
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan}
+import org.apache.spark.sql.connector.catalog.Identifier
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.QueryExecutionListener
 
 class DataSourceV2DataFrameSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false) {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
   import testImplicits._
 
   before {
@@ -76,13 +84,15 @@ class DataSourceV2DataFrameSuite
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING foo")
       val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
-      // Default saveMode is append, therefore this doesn't throw a table already exists exception
-      df.write.saveAsTable(t1)
+      // Default saveMode is ErrorIfExists
+      intercept[TableAlreadyExistsException] {
+        df.write.saveAsTable(t1)
+      }
+      assert(spark.table(t1).count() === 0)
+
+      // appends are by name not by position
+      df.select('data, 'id).write.mode("append").saveAsTable(t1)
       checkAnswer(spark.table(t1), df)
-
-      // also appends are by name not by position
-      df.select('data, 'id).write.saveAsTable(t1)
-      checkAnswer(spark.table(t1), df.union(df))
     }
   }
 
@@ -123,4 +133,57 @@ class DataSourceV2DataFrameSuite
       checkAnswer(spark.table(t1), Seq(Row("c", "d")))
     }
   }
+
+  testQuietly("SPARK-29778: saveAsTable: append mode takes write options") {
+
+    var plan: LogicalPlan = null
+    val listener = new QueryExecutionListener {
+      override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+        plan = qe.analyzed
+      }
+      override def onFailure(funcName: String, qe: QueryExecution, error: Throwable): Unit = {}
+    }
+
+    try {
+      spark.listenerManager.register(listener)
+
+      val t1 = "testcat.ns1.ns2.tbl"
+
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING foo")
+
+      val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
+      df.write.option("other", "20").mode("append").saveAsTable(t1)
+
+      sparkContext.listenerBus.waitUntilEmpty()
+      plan match {
+        case p: AppendData =>
+          assert(p.writeOptions == Map("other" -> "20"))
+        case other =>
+          fail(s"Expected to parse ${classOf[AppendData].getName} from query," +
+            s"got ${other.getClass.getName}: $plan")
+      }
+
+      checkAnswer(spark.table(t1), df)
+    } finally {
+      spark.listenerManager.unregister(listener)
+    }
+  }
+
+  test("Cannot write data with intervals to v2") {
+    withTable("testcat.table_name") {
+      val testCatalog = spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+      testCatalog.createTable(
+        Identifier.of(Array(), "table_name"),
+        new StructType().add("i", "interval"),
+        Array.empty, Collections.emptyMap[String, String])
+      val df = sql("select interval 1 day as i")
+      val v2Writer = df.writeTo("testcat.table_name")
+      val e1 = intercept[AnalysisException](v2Writer.append())
+      assert(e1.getMessage.contains(s"Cannot use interval type in the table schema."))
+      val e2 = intercept[AnalysisException](v2Writer.overwrite(df("i")))
+      assert(e2.getMessage.contains(s"Cannot use interval type in the table schema."))
+      val e3 = intercept[AnalysisException](v2Writer.overwritePartitions())
+      assert(e3.getMessage.contains(s"Cannot use interval type in the table schema."))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
similarity index 75%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSessionCatalogSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
index cfbafdb65c7c3..b6997445013e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
@@ -15,12 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import org.apache.spark.sql.{DataFrame, SaveMode}
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
-import org.apache.spark.sql.connector.InMemoryTable
-import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode}
+import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog}
 
 class DataSourceV2SQLSessionCatalogSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = true)
@@ -46,9 +44,23 @@ class DataSourceV2SQLSessionCatalogSuite
   }
 
   override def getTableMetadata(tableName: String): Table = {
-    val v2Catalog = spark.sessionState.catalogManager.v2SessionCatalog.get
+    val v2Catalog = spark.sessionState.catalogManager.currentCatalog
     val nameParts = spark.sessionState.sqlParser.parseMultipartIdentifier(tableName)
     v2Catalog.asInstanceOf[TableCatalog]
       .loadTable(Identifier.of(Array.empty, nameParts.last))
   }
+
+  test("SPARK-30697: catalog.isView doesn't throw an error for specialized identifiers") {
+    val t1 = "tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
+
+      def idResolver(id: Identifier): Identifier = Identifier.of(Array.empty, id.name())
+
+      InMemoryTableSessionCatalog.withCustomIdentifierResolver(idResolver) {
+        // The following should not throw AnalysisException.
+        sql(s"DESCRIBE TABLE ignored.$t1")
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
new file mode 100644
index 0000000000000..eabcb81c50646
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -0,0 +1,2276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
+import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
+import org.apache.spark.sql.sources.SimpleScanSource
+import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructField, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
+
+class DataSourceV2SQLSuite
+  extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = true)
+  with AlterTableTests {
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  private val v2Source = classOf[FakeV2Provider].getName
+  override protected val v2Format = v2Source
+  override protected val catalogAndNamespace = "testcat.ns1.ns2."
+  private val defaultUser: String = Utils.getCurrentUserName()
+
+  private def catalog(name: String): CatalogPlugin = {
+    spark.sessionState.catalogManager.catalog(name)
+  }
+
+  protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = {
+    val tmpView = "tmp_view"
+    withTempView(tmpView) {
+      insert.createOrReplaceTempView(tmpView)
+      val overwrite = if (mode == SaveMode.Overwrite) "OVERWRITE" else "INTO"
+      sql(s"INSERT $overwrite TABLE $tableName SELECT * FROM $tmpView")
+    }
+  }
+
+  override def verifyTable(tableName: String, expected: DataFrame): Unit = {
+    checkAnswer(spark.table(tableName), expected)
+  }
+
+  override def getTableMetadata(tableName: String): Table = {
+    val nameParts = spark.sessionState.sqlParser.parseMultipartIdentifier(tableName)
+    val v2Catalog = catalog(nameParts.head).asTableCatalog
+    val namespace = nameParts.drop(1).init.toArray
+    v2Catalog.loadTable(Identifier.of(namespace, nameParts.last))
+  }
+
+  before {
+    spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+    spark.conf.set(
+        "spark.sql.catalog.testcat_atomic", classOf[StagingInMemoryTableCatalog].getName)
+    spark.conf.set("spark.sql.catalog.testcat2", classOf[InMemoryTableCatalog].getName)
+    spark.conf.set(
+      V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[InMemoryTableSessionCatalog].getName)
+
+    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+    df.createOrReplaceTempView("source")
+    val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
+    df2.createOrReplaceTempView("source2")
+  }
+
+  after {
+    spark.sessionState.catalog.reset()
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.clear()
+  }
+
+  test("CreateTable: use v2 plan because catalog is set") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint NOT NULL, data string) USING foo")
+
+    val testCatalog = catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table.schema == new StructType()
+      .add("id", LongType, nullable = false)
+      .add("data", StringType))
+
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
+  }
+
+  test("DescribeTable using v2 catalog") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string)" +
+      " USING foo" +
+      " PARTITIONED BY (id)")
+    val descriptionDf = spark.sql("DESCRIBE TABLE testcat.table_name")
+    assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
+      Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+    val description = descriptionDf.collect()
+    assert(description === Seq(
+      Row("id", "bigint", ""),
+      Row("data", "string", ""),
+      Row("", "", ""),
+      Row("# Partitioning", "", ""),
+      Row("Part 0", "id", "")))
+
+    val e = intercept[AnalysisException] {
+      sql("DESCRIBE TABLE testcat.table_name PARTITION (id = 1)")
+    }
+    assert(e.message.contains("DESCRIBE does not support partition for v2 tables"))
+  }
+
+  test("DescribeTable with v2 catalog when table does not exist.") {
+    intercept[AnalysisException] {
+      spark.sql("DESCRIBE TABLE testcat.table_name")
+    }
+  }
+
+  test("DescribeTable extended using v2 catalog") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string)" +
+      " USING foo" +
+      " PARTITIONED BY (id)" +
+      " TBLPROPERTIES ('bar'='baz')" +
+      " COMMENT 'this is a test table'" +
+      " LOCATION '/tmp/testcat/table_name'")
+    val descriptionDf = spark.sql("DESCRIBE TABLE EXTENDED testcat.table_name")
+    assert(descriptionDf.schema.map(field => (field.name, field.dataType))
+      === Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+    assert(descriptionDf.collect()
+      .map(_.toSeq)
+      .map(_.toArray.map(_.toString.trim)) === Array(
+      Array("id", "bigint", ""),
+      Array("data", "string", ""),
+      Array("", "", ""),
+      Array("# Partitioning", "", ""),
+      Array("Part 0", "id", ""),
+      Array("", "", ""),
+      Array("# Detailed Table Information", "", ""),
+      Array("Name", "testcat.table_name", ""),
+      Array("Comment", "this is a test table", ""),
+      Array("Location", "/tmp/testcat/table_name", ""),
+      Array("Provider", "foo", ""),
+      Array(TableCatalog.PROP_OWNER.capitalize, defaultUser, ""),
+      Array("Table Properties", "[bar=baz]", "")))
+
+  }
+
+  test("CreateTable: use v2 plan and session catalog when provider is v2") {
+    spark.sql(s"CREATE TABLE table_name (id bigint, data string) USING $v2Source")
+
+    val testCatalog = catalog(SESSION_CATALOG_NAME).asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "default.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == withDefaultOwnership(Map("provider" -> v2Source)).asJava)
+    assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
+
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
+  }
+
+  test("CreateTable: fail if table exists") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    val testCatalog = catalog("testcat").asTableCatalog
+
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
+
+    // run a second create query that should fail
+    val exc = intercept[TableAlreadyExistsException] {
+      spark.sql("CREATE TABLE testcat.table_name (id bigint, data string, id2 bigint) USING bar")
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+
+    // table should not have changed
+    val table2 = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table2.name == "testcat.table_name")
+    assert(table2.partitioning.isEmpty)
+    assert(table2.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table2.schema == new StructType().add("id", LongType).add("data", StringType))
+
+    // check that the table is still empty
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
+  }
+
+  test("CreateTable: if not exists") {
+    spark.sql(
+      "CREATE TABLE IF NOT EXISTS testcat.table_name (id bigint, data string) USING foo")
+
+    val testCatalog = catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
+
+    spark.sql("CREATE TABLE IF NOT EXISTS testcat.table_name (id bigint, data string) USING bar")
+
+    // table should not have changed
+    val table2 = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table2.name == "testcat.table_name")
+    assert(table2.partitioning.isEmpty)
+    assert(table2.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table2.schema == new StructType().add("id", LongType).add("data", StringType))
+
+    // check that the table is still empty
+    val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), Seq.empty)
+  }
+
+  test("CreateTable: use default catalog for v2 sources when default catalog is set") {
+    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat")
+    spark.sql(s"CREATE TABLE table_name (id bigint, data string) USING foo")
+
+    val testCatalog = catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
+
+    // check that the table is empty
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
+  }
+
+  test("CreateTable/RepalceTable: invalid schema if has interval type") {
+    Seq("CREATE", "REPLACE").foreach { action =>
+      val e1 = intercept[AnalysisException](
+        sql(s"$action TABLE table_name (id int, value interval) USING $v2Format"))
+      assert(e1.getMessage.contains(s"Cannot use interval type in the table schema."))
+      val e2 = intercept[AnalysisException](
+        sql(s"$action TABLE table_name (id array<interval>) USING $v2Format"))
+      assert(e2.getMessage.contains(s"Cannot use interval type in the table schema."))
+    }
+  }
+
+  test("CTAS/RTAS: invalid schema if has interval type") {
+    Seq("CREATE", "REPLACE").foreach { action =>
+      val e1 = intercept[AnalysisException](
+        sql(s"$action TABLE table_name USING $v2Format as select interval 1 day"))
+      assert(e1.getMessage.contains(s"Cannot use interval type in the table schema."))
+      val e2 = intercept[AnalysisException](
+        sql(s"$action TABLE table_name USING $v2Format as select array(interval 1 day)"))
+      assert(e2.getMessage.contains(s"Cannot use interval type in the table schema."))
+    }
+  }
+
+  test("CreateTableAsSelect: use v2 plan because catalog is set") {
+    val basicCatalog = catalog("testcat").asTableCatalog
+    val atomicCatalog = catalog("testcat_atomic").asTableCatalog
+    val basicIdentifier = "testcat.table_name"
+    val atomicIdentifier = "testcat_atomic.table_name"
+
+    Seq((basicCatalog, basicIdentifier), (atomicCatalog, atomicIdentifier)).foreach {
+      case (catalog, identifier) =>
+        spark.sql(s"CREATE TABLE $identifier USING foo AS SELECT id, data FROM source")
+
+        val table = catalog.loadTable(Identifier.of(Array(), "table_name"))
+
+        assert(table.name == identifier)
+        assert(table.partitioning.isEmpty)
+        assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+        assert(table.schema == new StructType()
+          .add("id", LongType)
+          .add("data", StringType))
+
+        val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+        checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
+    }
+  }
+
+  test("ReplaceTableAsSelect: basic v2 implementation.") {
+    val basicCatalog = catalog("testcat").asTableCatalog
+    val atomicCatalog = catalog("testcat_atomic").asTableCatalog
+    val basicIdentifier = "testcat.table_name"
+    val atomicIdentifier = "testcat_atomic.table_name"
+
+    Seq((basicCatalog, basicIdentifier), (atomicCatalog, atomicIdentifier)).foreach {
+      case (catalog, identifier) =>
+        spark.sql(s"CREATE TABLE $identifier USING foo AS SELECT id, data FROM source")
+        val originalTable = catalog.loadTable(Identifier.of(Array(), "table_name"))
+
+        spark.sql(s"REPLACE TABLE $identifier USING foo AS SELECT id FROM source")
+        val replacedTable = catalog.loadTable(Identifier.of(Array(), "table_name"))
+
+        assert(replacedTable != originalTable, "Table should have been replaced.")
+        assert(replacedTable.name == identifier)
+        assert(replacedTable.partitioning.isEmpty)
+        assert(replacedTable.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+        assert(replacedTable.schema == new StructType().add("id", LongType))
+
+        val rdd = spark.sparkContext.parallelize(replacedTable.asInstanceOf[InMemoryTable].rows)
+        checkAnswer(
+          spark.internalCreateDataFrame(rdd, replacedTable.schema),
+          spark.table("source").select("id"))
+    }
+  }
+
+  test("ReplaceTableAsSelect: Non-atomic catalog drops the table if the write fails.") {
+    spark.sql("CREATE TABLE testcat.table_name USING foo AS SELECT id, data FROM source")
+    val testCatalog = catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table.asInstanceOf[InMemoryTable].rows.nonEmpty)
+
+    intercept[Exception] {
+      spark.sql("REPLACE TABLE testcat.table_name" +
+        s" USING foo OPTIONS (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}`=true)" +
+        s" AS SELECT id FROM source")
+    }
+
+    assert(!testCatalog.tableExists(Identifier.of(Array(), "table_name")),
+        "Table should have been dropped as a result of the replace.")
+  }
+
+  test("ReplaceTableAsSelect: Non-atomic catalog drops the table permanently if the" +
+    " subsequent table creation fails.") {
+    spark.sql("CREATE TABLE testcat.table_name USING foo AS SELECT id, data FROM source")
+    val testCatalog = catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table.asInstanceOf[InMemoryTable].rows.nonEmpty)
+
+    intercept[Exception] {
+      spark.sql("REPLACE TABLE testcat.table_name" +
+        s" USING foo" +
+        s" TBLPROPERTIES (`${InMemoryTableCatalog.SIMULATE_FAILED_CREATE_PROPERTY}`=true)" +
+        s" AS SELECT id FROM source")
+    }
+
+    assert(!testCatalog.tableExists(Identifier.of(Array(), "table_name")),
+      "Table should have been dropped and failed to be created.")
+  }
+
+  test("ReplaceTableAsSelect: Atomic catalog does not drop the table when replace fails.") {
+    spark.sql("CREATE TABLE testcat_atomic.table_name USING foo AS SELECT id, data FROM source")
+    val testCatalog = catalog("testcat_atomic").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    intercept[Exception] {
+      spark.sql("REPLACE TABLE testcat_atomic.table_name" +
+        s" USING foo OPTIONS (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}=true)" +
+        s" AS SELECT id FROM source")
+    }
+
+    var maybeReplacedTable = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(maybeReplacedTable === table, "Table should not have changed.")
+
+    intercept[Exception] {
+      spark.sql("REPLACE TABLE testcat_atomic.table_name" +
+        s" USING foo" +
+        s" TBLPROPERTIES (`${InMemoryTableCatalog.SIMULATE_FAILED_CREATE_PROPERTY}`=true)" +
+        s" AS SELECT id FROM source")
+    }
+
+    maybeReplacedTable = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(maybeReplacedTable === table, "Table should not have changed.")
+  }
+
+  test("ReplaceTable: Erases the table contents and changes the metadata.") {
+    spark.sql(s"CREATE TABLE testcat.table_name USING $v2Source AS SELECT id, data FROM source")
+
+    val testCatalog = catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table.asInstanceOf[InMemoryTable].rows.nonEmpty)
+
+    spark.sql("REPLACE TABLE testcat.table_name (id bigint NOT NULL) USING foo")
+    val replaced = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(replaced.asInstanceOf[InMemoryTable].rows.isEmpty,
+        "Replaced table should have no rows after committing.")
+    assert(replaced.schema().fields.length === 1,
+        "Replaced table should have new schema.")
+    assert(replaced.schema().fields(0) === StructField("id", LongType, nullable = false),
+      "Replaced table should have new schema.")
+  }
+
+  test("ReplaceTableAsSelect: CREATE OR REPLACE new table has same behavior as CTAS.") {
+    Seq("testcat", "testcat_atomic").foreach { catalogName =>
+      spark.sql(
+        s"""
+           |CREATE TABLE $catalogName.created USING $v2Source
+           |AS SELECT id, data FROM source
+         """.stripMargin)
+      spark.sql(
+        s"""
+           |CREATE OR REPLACE TABLE $catalogName.replaced USING $v2Source
+           |AS SELECT id, data FROM source
+         """.stripMargin)
+
+      val testCatalog = catalog(catalogName).asTableCatalog
+      val createdTable = testCatalog.loadTable(Identifier.of(Array(), "created"))
+      val replacedTable = testCatalog.loadTable(Identifier.of(Array(), "replaced"))
+
+      assert(createdTable.asInstanceOf[InMemoryTable].rows ===
+        replacedTable.asInstanceOf[InMemoryTable].rows)
+      assert(createdTable.schema === replacedTable.schema)
+    }
+  }
+
+  test("ReplaceTableAsSelect: REPLACE TABLE throws exception if table does not exist.") {
+    Seq("testcat", "testcat_atomic").foreach { catalog =>
+      spark.sql(s"CREATE TABLE $catalog.created USING $v2Source AS SELECT id, data FROM source")
+      intercept[CannotReplaceMissingTableException] {
+        spark.sql(s"REPLACE TABLE $catalog.replaced USING $v2Source AS SELECT id, data FROM source")
+      }
+    }
+  }
+
+  test("ReplaceTableAsSelect: REPLACE TABLE throws exception if table is dropped before commit.") {
+    import InMemoryTableCatalog._
+    spark.sql(s"CREATE TABLE testcat_atomic.created USING $v2Source AS SELECT id, data FROM source")
+    intercept[CannotReplaceMissingTableException] {
+      spark.sql(s"REPLACE TABLE testcat_atomic.replaced" +
+        s" USING $v2Source" +
+        s" TBLPROPERTIES (`$SIMULATE_DROP_BEFORE_REPLACE_PROPERTY`=true)" +
+        s" AS SELECT id, data FROM source")
+    }
+  }
+
+  test("CreateTableAsSelect: use v2 plan and session catalog when provider is v2") {
+    spark.sql(s"CREATE TABLE table_name USING $v2Source AS SELECT id, data FROM source")
+
+    val testCatalog = catalog(SESSION_CATALOG_NAME).asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "default.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == withDefaultOwnership(Map("provider" -> v2Source)).asJava)
+    assert(table.schema == new StructType()
+        .add("id", LongType)
+        .add("data", StringType))
+
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
+  }
+
+  test("CreateTableAsSelect: fail if table exists") {
+    spark.sql("CREATE TABLE testcat.table_name USING foo AS SELECT id, data FROM source")
+
+    val testCatalog = catalog("testcat").asTableCatalog
+
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table.schema == new StructType()
+        .add("id", LongType)
+        .add("data", StringType))
+
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
+
+    // run a second CTAS query that should fail
+    val exc = intercept[TableAlreadyExistsException] {
+      spark.sql(
+        "CREATE TABLE testcat.table_name USING bar AS SELECT id, data, id as id2 FROM source2")
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+
+    // table should not have changed
+    val table2 = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    assert(table2.name == "testcat.table_name")
+    assert(table2.partitioning.isEmpty)
+    assert(table2.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table2.schema == new StructType()
+        .add("id", LongType)
+        .add("data", StringType))
+
+    val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), spark.table("source"))
+  }
+
+  test("CreateTableAsSelect: if not exists") {
+    spark.sql(
+      "CREATE TABLE IF NOT EXISTS testcat.table_name USING foo AS SELECT id, data FROM source")
+
+    val testCatalog = catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table.schema == new StructType()
+        .add("id", LongType)
+        .add("data", StringType))
+
+    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
+
+    spark.sql(
+      "CREATE TABLE IF NOT EXISTS testcat.table_name USING foo AS SELECT id, data FROM source2")
+
+    // check that the table contains data from just the first CTAS
+    val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), spark.table("source"))
+  }
+
+  test("CreateTableAsSelect: use default catalog for v2 sources when default catalog is set") {
+    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat")
+
+    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+    df.createOrReplaceTempView("source")
+
+    // setting the default catalog breaks the reference to source because the default catalog is
+    // used and AsTableIdentifier no longer matches
+    spark.sql(s"CREATE TABLE table_name USING foo AS SELECT id, data FROM source")
+
+    val testCatalog = catalog("testcat").asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name == "testcat.table_name")
+    assert(table.partitioning.isEmpty)
+    assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+    assert(table.schema == new StructType()
+        .add("id", LongType)
+        .add("data", StringType))
+
+    val rdd = sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
+  }
+
+  test("CreateTableAsSelect: v2 session catalog can load v1 source table") {
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+
+    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+    df.createOrReplaceTempView("source")
+
+    sql(s"CREATE TABLE table_name USING parquet AS SELECT id, data FROM source")
+
+    checkAnswer(sql(s"TABLE default.table_name"), spark.table("source"))
+    // The fact that the following line doesn't throw an exception means, the session catalog
+    // can load the table.
+    val t = catalog(SESSION_CATALOG_NAME).asTableCatalog
+      .loadTable(Identifier.of(Array.empty, "table_name"))
+    assert(t.isInstanceOf[V1Table], "V1 table wasn't returned as an unresolved table")
+  }
+
+  test("CreateTableAsSelect: nullable schema") {
+    val basicCatalog = catalog("testcat").asTableCatalog
+    val atomicCatalog = catalog("testcat_atomic").asTableCatalog
+    val basicIdentifier = "testcat.table_name"
+    val atomicIdentifier = "testcat_atomic.table_name"
+
+    Seq((basicCatalog, basicIdentifier), (atomicCatalog, atomicIdentifier)).foreach {
+      case (catalog, identifier) =>
+        spark.sql(s"CREATE TABLE $identifier USING foo AS SELECT 1 i")
+
+        val table = catalog.loadTable(Identifier.of(Array(), "table_name"))
+
+        assert(table.name == identifier)
+        assert(table.partitioning.isEmpty)
+        assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
+        assert(table.schema == new StructType().add("i", "int"))
+
+        val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+        checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Row(1))
+
+        sql(s"INSERT INTO $identifier SELECT CAST(null AS INT)")
+        val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
+        checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), Seq(Row(1), Row(null)))
+    }
+  }
+
+  test("DropTable: basic") {
+    val tableName = "testcat.ns1.ns2.tbl"
+    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
+    sql(s"CREATE TABLE $tableName USING foo AS SELECT id, data FROM source")
+    assert(catalog("testcat").asTableCatalog.tableExists(ident) === true)
+    sql(s"DROP TABLE $tableName")
+    assert(catalog("testcat").asTableCatalog.tableExists(ident) === false)
+  }
+
+  test("DropTable: table qualified with the session catalog name") {
+    val ident = Identifier.of(Array(), "tbl")
+    sql("CREATE TABLE tbl USING json AS SELECT 1 AS i")
+    assert(catalog("spark_catalog").asTableCatalog.tableExists(ident) === true)
+    sql("DROP TABLE spark_catalog.tbl")
+    assert(catalog("spark_catalog").asTableCatalog.tableExists(ident) === false)
+  }
+
+  test("DropTable: if exists") {
+    intercept[NoSuchTableException] {
+      sql(s"DROP TABLE testcat.db.notbl")
+    }
+    sql(s"DROP TABLE IF EXISTS testcat.db.notbl")
+  }
+
+  test("Relation: basic") {
+    val t1 = "testcat.ns1.ns2.tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
+      checkAnswer(sql(s"TABLE $t1"), spark.table("source"))
+      checkAnswer(sql(s"SELECT * FROM $t1"), spark.table("source"))
+    }
+  }
+
+  test("Relation: SparkSession.table()") {
+    val t1 = "testcat.ns1.ns2.tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
+      checkAnswer(spark.table(s"$t1"), spark.table("source"))
+    }
+  }
+
+  test("Relation: CTE") {
+    val t1 = "testcat.ns1.ns2.tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
+      checkAnswer(
+        sql(s"""
+          |WITH cte AS (SELECT * FROM $t1)
+          |SELECT * FROM cte
+        """.stripMargin),
+        spark.table("source"))
+    }
+  }
+
+  test("Relation: view text") {
+    val t1 = "testcat.ns1.ns2.tbl"
+    withTable(t1) {
+      withView("view1") { v1: String =>
+        sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
+        sql(s"CREATE VIEW $v1 AS SELECT * from $t1")
+        checkAnswer(sql(s"TABLE $v1"), spark.table("source"))
+      }
+    }
+  }
+
+  test("Relation: join tables in 2 catalogs") {
+    val t1 = "testcat.ns1.ns2.tbl"
+    val t2 = "testcat2.v2tbl"
+    withTable(t1, t2) {
+      sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
+      sql(s"CREATE TABLE $t2 USING foo AS SELECT id, data FROM source2")
+      val df1 = spark.table("source")
+      val df2 = spark.table("source2")
+      val df_joined = df1.join(df2).where(df1("id") + 1 === df2("id"))
+      checkAnswer(
+        sql(s"""
+          |SELECT *
+          |FROM $t1 t1, $t2 t2
+          |WHERE t1.id + 1 = t2.id
+        """.stripMargin),
+        df_joined)
+    }
+  }
+
+  test("qualified column names for v2 tables") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, point struct<x: bigint, y: bigint>) USING foo")
+      sql(s"INSERT INTO $t VALUES (1, (10, 20))")
+
+      checkAnswer(
+        sql(s"SELECT testcat.ns1.ns2.tbl.id, testcat.ns1.ns2.tbl.point.x FROM $t"),
+        Row(1, 10))
+      checkAnswer(sql(s"SELECT ns1.ns2.tbl.id, ns1.ns2.tbl.point.x FROM $t"), Row(1, 10))
+      checkAnswer(sql(s"SELECT ns2.tbl.id, ns2.tbl.point.x FROM $t"), Row(1, 10))
+      checkAnswer(sql(s"SELECT tbl.id, tbl.point.x FROM $t"), Row(1, 10))
+
+      val ex = intercept[AnalysisException] {
+        sql(s"SELECT ns1.ns2.ns3.tbl.id from $t")
+      }
+      assert(ex.getMessage.contains("cannot resolve '`ns1.ns2.ns3.tbl.id`"))
+    }
+  }
+
+  test("qualified column names for v1 tables") {
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+
+    withTable("t") {
+      sql("CREATE TABLE t USING json AS SELECT 1 AS i")
+      checkAnswer(sql("select default.t.i from spark_catalog.t"), Row(1))
+      checkAnswer(sql("select t.i from spark_catalog.default.t"), Row(1))
+      checkAnswer(sql("select default.t.i from spark_catalog.default.t"), Row(1))
+
+      // catalog name cannot be used for v1 tables.
+      val ex = intercept[AnalysisException] {
+        sql(s"select spark_catalog.default.t.i from spark_catalog.default.t")
+      }
+      assert(ex.getMessage.contains("cannot resolve '`spark_catalog.default.t.i`"))
+    }
+  }
+
+  test("InsertInto: append - across catalog") {
+    val t1 = "testcat.ns1.ns2.tbl"
+    val t2 = "testcat2.db.tbl"
+    withTable(t1, t2) {
+      sql(s"CREATE TABLE $t1 USING foo AS SELECT * FROM source")
+      sql(s"CREATE TABLE $t2 (id bigint, data string) USING foo")
+      sql(s"INSERT INTO $t2 SELECT * FROM $t1")
+      checkAnswer(spark.table(t2), spark.table("source"))
+    }
+  }
+
+  test("ShowTables: using v2 catalog") {
+    spark.sql("CREATE TABLE testcat.db.table_name (id bigint, data string) USING foo")
+    spark.sql("CREATE TABLE testcat.n1.n2.db.table_name (id bigint, data string) USING foo")
+
+    runShowTablesSql("SHOW TABLES FROM testcat.db", Seq(Row("db", "table_name")))
+
+    runShowTablesSql(
+      "SHOW TABLES FROM testcat.n1.n2.db",
+      Seq(Row("n1.n2.db", "table_name")))
+  }
+
+  test("ShowTables: using v2 catalog with a pattern") {
+    spark.sql("CREATE TABLE testcat.db.table (id bigint, data string) USING foo")
+    spark.sql("CREATE TABLE testcat.db.table_name_1 (id bigint, data string) USING foo")
+    spark.sql("CREATE TABLE testcat.db.table_name_2 (id bigint, data string) USING foo")
+    spark.sql("CREATE TABLE testcat.db2.table_name_2 (id bigint, data string) USING foo")
+
+    runShowTablesSql(
+      "SHOW TABLES FROM testcat.db",
+      Seq(
+        Row("db", "table"),
+        Row("db", "table_name_1"),
+        Row("db", "table_name_2")))
+
+    runShowTablesSql(
+      "SHOW TABLES FROM testcat.db LIKE '*name*'",
+      Seq(Row("db", "table_name_1"), Row("db", "table_name_2")))
+
+    runShowTablesSql(
+      "SHOW TABLES FROM testcat.db LIKE '*2'",
+      Seq(Row("db", "table_name_2")))
+  }
+
+  test("ShowTables: using v2 catalog, namespace doesn't exist") {
+    runShowTablesSql("SHOW TABLES FROM testcat.unknown", Seq())
+  }
+
+  test("ShowTables: using v1 catalog") {
+    runShowTablesSql(
+      "SHOW TABLES FROM default",
+      Seq(Row("", "source", true), Row("", "source2", true)),
+      expectV2Catalog = false)
+  }
+
+  test("ShowTables: using v1 catalog, db doesn't exist ") {
+    // 'db' below resolves to a database name for v1 catalog because there is no catalog named
+    // 'db' and there is no default catalog set.
+    val exception = intercept[NoSuchDatabaseException] {
+      runShowTablesSql("SHOW TABLES FROM db", Seq(), expectV2Catalog = false)
+    }
+
+    assert(exception.getMessage.contains("Database 'db' not found"))
+  }
+
+  test("ShowTables: using v1 catalog, db name with multipartIdentifier ('a.b') is not allowed.") {
+    val exception = intercept[AnalysisException] {
+      runShowTablesSql("SHOW TABLES FROM a.b", Seq(), expectV2Catalog = false)
+    }
+
+    assert(exception.getMessage.contains("The database name is not valid: a.b"))
+  }
+
+  test("ShowTables: using v2 catalog with empty namespace") {
+    spark.sql("CREATE TABLE testcat.table (id bigint, data string) USING foo")
+    runShowTablesSql("SHOW TABLES FROM testcat", Seq(Row("", "table")))
+  }
+
+  test("ShowTables: namespace is not specified and default v2 catalog is set") {
+    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat")
+    spark.sql("CREATE TABLE testcat.table (id bigint, data string) USING foo")
+
+    // v2 catalog is used where default namespace is empty for TestInMemoryTableCatalog.
+    runShowTablesSql("SHOW TABLES", Seq(Row("", "table")))
+  }
+
+  test("ShowTables: namespace not specified and default v2 catalog not set - fallback to v1") {
+    runShowTablesSql(
+      "SHOW TABLES",
+      Seq(Row("", "source", true), Row("", "source2", true)),
+      expectV2Catalog = false)
+
+    runShowTablesSql(
+      "SHOW TABLES LIKE '*2'",
+      Seq(Row("", "source2", true)),
+      expectV2Catalog = false)
+  }
+
+  test("ShowTables: change current catalog and namespace with USE statements") {
+    sql("CREATE TABLE testcat.ns1.ns2.table (id bigint) USING foo")
+
+    // Initially, the v2 session catalog (current catalog) is used.
+    runShowTablesSql(
+      "SHOW TABLES", Seq(Row("", "source", true), Row("", "source2", true)),
+      expectV2Catalog = false)
+
+    // Update the current catalog, and no table is matched since the current namespace is Array().
+    sql("USE testcat")
+    runShowTablesSql("SHOW TABLES", Seq())
+
+    // Update the current namespace to match ns1.ns2.table.
+    sql("USE testcat.ns1.ns2")
+    runShowTablesSql("SHOW TABLES", Seq(Row("ns1.ns2", "table")))
+  }
+
+  private def runShowTablesSql(
+      sqlText: String,
+      expected: Seq[Row],
+      expectV2Catalog: Boolean = true): Unit = {
+    val schema = if (expectV2Catalog) {
+      new StructType()
+        .add("namespace", StringType, nullable = false)
+        .add("tableName", StringType, nullable = false)
+    } else {
+      new StructType()
+        .add("database", StringType, nullable = false)
+        .add("tableName", StringType, nullable = false)
+        .add("isTemporary", BooleanType, nullable = false)
+    }
+
+    val df = spark.sql(sqlText)
+    assert(df.schema === schema)
+    assert(expected === df.collect())
+  }
+
+  test("SHOW TABLE EXTENDED not valid v1 database") {
+    def testV1CommandNamespace(sqlCommand: String, namespace: String): Unit = {
+      val e = intercept[AnalysisException] {
+        sql(sqlCommand)
+      }
+      assert(e.message.contains(s"The database name is not valid: ${namespace}"))
+    }
+
+    val namespace = "testcat.ns1.ns2"
+    val table = "tbl"
+    withTable(s"$namespace.$table") {
+      sql(s"CREATE TABLE $namespace.$table (id bigint, data string) " +
+        s"USING foo PARTITIONED BY (id)")
+
+      testV1CommandNamespace(s"SHOW TABLE EXTENDED FROM $namespace LIKE 'tb*'",
+        namespace)
+      testV1CommandNamespace(s"SHOW TABLE EXTENDED IN $namespace LIKE 'tb*'",
+        namespace)
+      testV1CommandNamespace("SHOW TABLE EXTENDED " +
+        s"FROM $namespace LIKE 'tb*' PARTITION(id=1)",
+        namespace)
+      testV1CommandNamespace("SHOW TABLE EXTENDED " +
+        s"IN $namespace LIKE 'tb*' PARTITION(id=1)",
+        namespace)
+    }
+  }
+
+  test("SHOW TABLE EXTENDED valid v1") {
+    val expected = Seq(Row("", "source", true), Row("", "source2", true))
+    val schema = new StructType()
+      .add("database", StringType, nullable = false)
+      .add("tableName", StringType, nullable = false)
+      .add("isTemporary", BooleanType, nullable = false)
+      .add("information", StringType, nullable = false)
+
+    val df = sql("SHOW TABLE EXTENDED FROM default LIKE '*source*'")
+    val result = df.collect()
+    val resultWithoutInfo = result.map{ case Row(db, table, temp, _) => Row(db, table, temp)}
+
+    assert(df.schema === schema)
+    assert(resultWithoutInfo === expected)
+    result.foreach{ case Row(_, _, _, info: String) => assert(info.nonEmpty)}
+  }
+
+  test("CreateNameSpace: basic tests") {
+    // Session catalog is used.
+    withNamespace("ns") {
+      sql("CREATE NAMESPACE ns")
+      testShowNamespaces("SHOW NAMESPACES", Seq("default", "ns"))
+    }
+
+    // V2 non-session catalog is used.
+    withNamespace("testcat.ns1.ns2") {
+      sql("CREATE NAMESPACE testcat.ns1.ns2")
+      testShowNamespaces("SHOW NAMESPACES IN testcat", Seq("ns1"))
+      testShowNamespaces("SHOW NAMESPACES IN testcat.ns1", Seq("ns1.ns2"))
+    }
+
+    withNamespace("testcat.test") {
+      withTempDir { tmpDir =>
+        val path = tmpDir.getCanonicalPath
+        sql(s"CREATE NAMESPACE testcat.test LOCATION '$path'")
+        val metadata =
+          catalog("testcat").asNamespaceCatalog.loadNamespaceMetadata(Array("test")).asScala
+        val catalogPath = metadata(SupportsNamespaces.PROP_LOCATION)
+        assert(catalogPath.equals(catalogPath))
+      }
+    }
+  }
+
+  test("CreateNameSpace: test handling of 'IF NOT EXIST'") {
+    withNamespace("testcat.ns1") {
+      sql("CREATE NAMESPACE IF NOT EXISTS testcat.ns1")
+
+      // The 'ns1' namespace already exists, so this should fail.
+      val exception = intercept[NamespaceAlreadyExistsException] {
+        sql("CREATE NAMESPACE testcat.ns1")
+      }
+      assert(exception.getMessage.contains("Namespace 'ns1' already exists"))
+
+      // The following will be no-op since the namespace already exists.
+      sql("CREATE NAMESPACE IF NOT EXISTS testcat.ns1")
+    }
+  }
+
+  test("CreateNameSpace: reserved properties") {
+    import SupportsNamespaces._
+    withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
+      CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+        val exception = intercept[ParseException] {
+          sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='dummyVal')")
+        }
+        assert(exception.getMessage.contains(s"$key is a reserved namespace property"))
+      }
+    }
+    withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
+      CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+        withNamespace("testcat.reservedTest") {
+          sql(s"CREATE NAMESPACE testcat.reservedTest WITH DBPROPERTIES('$key'='foo')")
+          assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest")
+            .toDF("k", "v")
+            .where("k='Properties'")
+            .isEmpty, s"$key is a reserved namespace property and ignored")
+          val meta =
+            catalog("testcat").asNamespaceCatalog.loadNamespaceMetadata(Array("reservedTest"))
+          assert(meta.get(key) == null || !meta.get(key).contains("foo"),
+            "reserved properties should not have side effects")
+        }
+      }
+    }
+  }
+
+  test("create/replace/alter table - reserved properties") {
+    import TableCatalog._
+    withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
+      CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+        Seq("OPTIONS", "TBLPROPERTIES").foreach { clause =>
+          Seq("CREATE", "REPLACE").foreach { action =>
+            val e = intercept[ParseException] {
+              sql(s"$action TABLE testcat.reservedTest (key int) USING foo $clause ('$key'='bar')")
+            }
+            assert(e.getMessage.contains(s"$key is a reserved table property"))
+          }
+        }
+
+        val e1 = intercept[ParseException] {
+          sql(s"ALTER TABLE testcat.reservedTest SET TBLPROPERTIES ('$key'='bar')")
+        }
+        assert(e1.getMessage.contains(s"$key is a reserved table property"))
+
+        val e2 = intercept[ParseException] {
+          sql(s"ALTER TABLE testcat.reservedTest UNSET TBLPROPERTIES ('$key')")
+        }
+        assert(e2.getMessage.contains(s"$key is a reserved table property"))
+      }
+    }
+    withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
+      CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+        Seq("OPTIONS", "TBLPROPERTIES").foreach { clause =>
+          withTable("testcat.reservedTest") {
+            Seq("CREATE", "REPLACE").foreach { action =>
+              sql(s"$action TABLE testcat.reservedTest (key int) USING foo $clause ('$key'='bar')")
+              val tableCatalog = catalog("testcat").asTableCatalog
+              val identifier = Identifier.of(Array(), "reservedTest")
+              val originValue = tableCatalog.loadTable(identifier).properties().get(key)
+              assert(originValue != "bar", "reserved properties should not have side effects")
+              sql(s"ALTER TABLE testcat.reservedTest SET TBLPROPERTIES ('$key'='newValue')")
+              assert(tableCatalog.loadTable(identifier).properties().get(key) == originValue,
+                "reserved properties should not have side effects")
+              sql(s"ALTER TABLE testcat.reservedTest UNSET TBLPROPERTIES ('$key')")
+              assert(tableCatalog.loadTable(identifier).properties().get(key) == originValue,
+                "reserved properties should not have side effects")
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("create/replace - path property") {
+    Seq("true", "false").foreach { conf =>
+      withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, conf)) {
+        withTable("testcat.reservedTest") {
+          Seq("CREATE", "REPLACE").foreach { action =>
+            val e1 = intercept[ParseException] {
+              sql(s"$action TABLE testcat.reservedTest USING foo LOCATION 'foo' OPTIONS" +
+                s" ('path'='bar')")
+            }
+            assert(e1.getMessage.contains(s"Duplicated table paths found: 'foo' and 'bar'"))
+
+            val e2 = intercept[ParseException] {
+              sql(s"$action TABLE testcat.reservedTest USING foo OPTIONS" +
+                s" ('path'='foo', 'PaTh'='bar')")
+            }
+            assert(e2.getMessage.contains(s"Duplicated table paths found: 'foo' and 'bar'"))
+
+            sql(s"$action TABLE testcat.reservedTest USING foo LOCATION 'foo' TBLPROPERTIES" +
+              s" ('path'='bar', 'Path'='noop')")
+            val tableCatalog = catalog("testcat").asTableCatalog
+            val identifier = Identifier.of(Array(), "reservedTest")
+            assert(tableCatalog.loadTable(identifier).properties()
+              .get(TableCatalog.PROP_LOCATION) == "foo",
+              "path as a table property should not have side effects")
+            assert(tableCatalog.loadTable(identifier).properties().get("path") == "bar",
+              "path as a table property should not have side effects")
+            assert(tableCatalog.loadTable(identifier).properties().get("Path") == "noop",
+              "path as a table property should not have side effects")
+          }
+        }
+      }
+    }
+  }
+
+  test("DropNamespace: basic tests") {
+    // Session catalog is used.
+    sql("CREATE NAMESPACE ns")
+    testShowNamespaces("SHOW NAMESPACES", Seq("default", "ns"))
+    sql("DROP NAMESPACE ns")
+    testShowNamespaces("SHOW NAMESPACES", Seq("default"))
+
+    // V2 non-session catalog is used.
+    sql("CREATE NAMESPACE testcat.ns1")
+    testShowNamespaces("SHOW NAMESPACES IN testcat", Seq("ns1"))
+    sql("DROP NAMESPACE testcat.ns1")
+    testShowNamespaces("SHOW NAMESPACES IN testcat", Seq())
+  }
+
+  test("DropNamespace: drop non-empty namespace with a non-cascading mode") {
+    sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo")
+    sql("CREATE TABLE testcat.ns1.ns2.table (id bigint) USING foo")
+    testShowNamespaces("SHOW NAMESPACES IN testcat", Seq("ns1"))
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1", Seq("ns1.ns2"))
+
+    def assertDropFails(): Unit = {
+      val e = intercept[SparkException] {
+        sql("DROP NAMESPACE testcat.ns1")
+      }
+      assert(e.getMessage.contains("Cannot drop a non-empty namespace: ns1"))
+    }
+
+    // testcat.ns1.table is present, thus testcat.ns1 cannot be dropped.
+    assertDropFails()
+    sql("DROP TABLE testcat.ns1.table")
+
+    // testcat.ns1.ns2.table is present, thus testcat.ns1 cannot be dropped.
+    assertDropFails()
+    sql("DROP TABLE testcat.ns1.ns2.table")
+
+    // testcat.ns1.ns2 namespace is present, thus testcat.ns1 cannot be dropped.
+    assertDropFails()
+    sql("DROP NAMESPACE testcat.ns1.ns2")
+
+    // Now that testcat.ns1 is empty, it can be dropped.
+    sql("DROP NAMESPACE testcat.ns1")
+    testShowNamespaces("SHOW NAMESPACES IN testcat", Seq())
+  }
+
+  test("DropNamespace: drop non-empty namespace with a cascade mode") {
+    sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo")
+    sql("CREATE TABLE testcat.ns1.ns2.table (id bigint) USING foo")
+    testShowNamespaces("SHOW NAMESPACES IN testcat", Seq("ns1"))
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1", Seq("ns1.ns2"))
+
+    sql("DROP NAMESPACE testcat.ns1 CASCADE")
+    testShowNamespaces("SHOW NAMESPACES IN testcat", Seq())
+  }
+
+  test("DropNamespace: test handling of 'IF EXISTS'") {
+    sql("DROP NAMESPACE IF EXISTS testcat.unknown")
+
+    val exception = intercept[NoSuchNamespaceException] {
+      sql("DROP NAMESPACE testcat.ns1")
+    }
+    assert(exception.getMessage.contains("Namespace 'ns1' not found"))
+  }
+
+  test("DescribeNamespace using v2 catalog") {
+    withNamespace("testcat.ns1.ns2") {
+      sql("CREATE NAMESPACE IF NOT EXISTS testcat.ns1.ns2 COMMENT " +
+        "'test namespace' LOCATION '/tmp/ns_test'")
+      val descriptionDf = sql("DESCRIBE NAMESPACE testcat.ns1.ns2")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
+        Seq(
+          ("name", StringType),
+          ("value", StringType)
+        ))
+      val description = descriptionDf.collect()
+      assert(description === Seq(
+        Row("Namespace Name", "ns2"),
+        Row(SupportsNamespaces.PROP_COMMENT.capitalize, "test namespace"),
+        Row(SupportsNamespaces.PROP_LOCATION.capitalize, "/tmp/ns_test"),
+        Row(SupportsNamespaces.PROP_OWNER.capitalize, defaultUser))
+      )
+    }
+  }
+
+  test("AlterNamespaceSetProperties using v2 catalog") {
+    withNamespace("testcat.ns1.ns2") {
+      sql("CREATE NAMESPACE IF NOT EXISTS testcat.ns1.ns2 COMMENT " +
+        "'test namespace' LOCATION '/tmp/ns_test' WITH PROPERTIES ('a'='a','b'='b','c'='c')")
+      sql("ALTER NAMESPACE testcat.ns1.ns2 SET PROPERTIES ('a'='b','b'='a')")
+      val descriptionDf = sql("DESCRIBE NAMESPACE EXTENDED testcat.ns1.ns2")
+      assert(descriptionDf.collect() === Seq(
+        Row("Namespace Name", "ns2"),
+        Row(SupportsNamespaces.PROP_COMMENT.capitalize, "test namespace"),
+        Row(SupportsNamespaces.PROP_LOCATION.capitalize, "/tmp/ns_test"),
+        Row(SupportsNamespaces.PROP_OWNER.capitalize, defaultUser),
+        Row("Properties", "((a,b),(b,a),(c,c))"))
+      )
+    }
+  }
+
+  test("AlterNamespaceSetProperties: reserved properties") {
+    import SupportsNamespaces._
+    withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
+      CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+        withNamespace("testcat.reservedTest") {
+          sql("CREATE NAMESPACE testcat.reservedTest")
+          val exception = intercept[ParseException] {
+            sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='dummyVal')")
+          }
+          assert(exception.getMessage.contains(s"$key is a reserved namespace property"))
+        }
+      }
+    }
+    withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
+      CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+        withNamespace("testcat.reservedTest") {
+          sql(s"CREATE NAMESPACE testcat.reservedTest")
+          sql(s"ALTER NAMESPACE testcat.reservedTest SET PROPERTIES ('$key'='foo')")
+          assert(sql("DESC NAMESPACE EXTENDED testcat.reservedTest")
+            .toDF("k", "v")
+            .where("k='Properties'")
+            .isEmpty, s"$key is a reserved namespace property and ignored")
+          val meta =
+            catalog("testcat").asNamespaceCatalog.loadNamespaceMetadata(Array("reservedTest"))
+          assert(meta.get(key) == null || !meta.get(key).contains("foo"),
+            "reserved properties should not have side effects")
+        }
+      }
+    }
+  }
+
+  test("AlterNamespaceSetLocation using v2 catalog") {
+    withNamespace("testcat.ns1.ns2") {
+      sql("CREATE NAMESPACE IF NOT EXISTS testcat.ns1.ns2 COMMENT " +
+        "'test namespace' LOCATION '/tmp/ns_test_1'")
+      sql("ALTER NAMESPACE testcat.ns1.ns2 SET LOCATION '/tmp/ns_test_2'")
+      val descriptionDf = sql("DESCRIBE NAMESPACE EXTENDED testcat.ns1.ns2")
+      assert(descriptionDf.collect() === Seq(
+        Row("Namespace Name", "ns2"),
+        Row(SupportsNamespaces.PROP_COMMENT.capitalize, "test namespace"),
+        Row(SupportsNamespaces.PROP_LOCATION.capitalize, "/tmp/ns_test_2"),
+        Row(SupportsNamespaces.PROP_OWNER.capitalize, defaultUser))
+      )
+    }
+  }
+
+  test("ShowNamespaces: show root namespaces with default v2 catalog") {
+    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat")
+
+    testShowNamespaces("SHOW NAMESPACES", Seq())
+
+    spark.sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns2.table (id bigint) USING foo")
+
+    testShowNamespaces("SHOW NAMESPACES", Seq("ns1", "ns2"))
+    testShowNamespaces("SHOW NAMESPACES LIKE '*1*'", Seq("ns1"))
+  }
+
+  test("ShowNamespaces: show namespaces with v2 catalog") {
+    spark.sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns1.ns1_2.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns2.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns2.ns2_1.table (id bigint) USING foo")
+
+    // Look up only with catalog name, which should list root namespaces.
+    testShowNamespaces("SHOW NAMESPACES IN testcat", Seq("ns1", "ns2"))
+
+    // Look up sub-namespaces.
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1", Seq("ns1.ns1_1", "ns1.ns1_2"))
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1 LIKE '*2*'", Seq("ns1.ns1_2"))
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns2", Seq("ns2.ns2_1"))
+
+    // Try to look up namespaces that do not exist.
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns3", Seq())
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1.ns3", Seq())
+  }
+
+  test("ShowNamespaces: default v2 catalog is not set") {
+    spark.sql("CREATE TABLE testcat.ns.table (id bigint) USING foo")
+
+    // The current catalog is resolved to a v2 session catalog.
+    testShowNamespaces("SHOW NAMESPACES", Seq("default"))
+  }
+
+  test("ShowNamespaces: default v2 catalog doesn't support namespace") {
+    spark.conf.set(
+      "spark.sql.catalog.testcat_no_namspace",
+      classOf[BasicInMemoryTableCatalog].getName)
+    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat_no_namspace")
+
+    val exception = intercept[AnalysisException] {
+      sql("SHOW NAMESPACES")
+    }
+
+    assert(exception.getMessage.contains("does not support namespaces"))
+  }
+
+  test("ShowNamespaces: v2 catalog doesn't support namespace") {
+    spark.conf.set(
+      "spark.sql.catalog.testcat_no_namspace",
+      classOf[BasicInMemoryTableCatalog].getName)
+
+    val exception = intercept[AnalysisException] {
+      sql("SHOW NAMESPACES in testcat_no_namspace")
+    }
+
+    assert(exception.getMessage.contains("does not support namespaces"))
+  }
+
+  test("ShowNamespaces: session catalog is used and namespace doesn't exist") {
+    val exception = intercept[AnalysisException] {
+      sql("SHOW NAMESPACES in dummy")
+    }
+
+    assert(exception.getMessage.contains("Namespace 'dummy' not found"))
+  }
+
+  test("ShowNamespaces: change catalog and namespace with USE statements") {
+    sql("CREATE TABLE testcat.ns1.ns2.table (id bigint) USING foo")
+
+    // Initially, the current catalog is a v2 session catalog.
+    testShowNamespaces("SHOW NAMESPACES", Seq("default"))
+
+    // Update the current catalog to 'testcat'.
+    sql("USE testcat")
+    testShowNamespaces("SHOW NAMESPACES", Seq("ns1"))
+
+    // Update the current namespace to 'ns1'.
+    sql("USE ns1")
+    // 'SHOW NAMESPACES' is not affected by the current namespace and lists root namespaces.
+    testShowNamespaces("SHOW NAMESPACES", Seq("ns1"))
+  }
+
+  private def testShowNamespaces(
+      sqlText: String,
+      expected: Seq[String]): Unit = {
+    val schema = new StructType().add("namespace", StringType, nullable = false)
+
+    val df = spark.sql(sqlText)
+    assert(df.schema === schema)
+    assert(df.collect().map(_.getAs[String](0)).sorted === expected.sorted)
+  }
+
+  test("Use: basic tests with USE statements") {
+    val catalogManager = spark.sessionState.catalogManager
+
+    // Validate the initial current catalog and namespace.
+    assert(catalogManager.currentCatalog.name() == SESSION_CATALOG_NAME)
+    assert(catalogManager.currentNamespace === Array("default"))
+
+    // The following implicitly creates namespaces.
+    sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo")
+    sql("CREATE TABLE testcat2.ns2.ns2_2.table (id bigint) USING foo")
+    sql("CREATE TABLE testcat2.ns3.ns3_3.table (id bigint) USING foo")
+    sql("CREATE TABLE testcat2.testcat.table (id bigint) USING foo")
+
+    // Catalog is resolved to 'testcat'.
+    sql("USE testcat.ns1.ns1_1")
+    assert(catalogManager.currentCatalog.name() == "testcat")
+    assert(catalogManager.currentNamespace === Array("ns1", "ns1_1"))
+
+    // Catalog is resolved to 'testcat2'.
+    sql("USE testcat2.ns2.ns2_2")
+    assert(catalogManager.currentCatalog.name() == "testcat2")
+    assert(catalogManager.currentNamespace === Array("ns2", "ns2_2"))
+
+    // Only the namespace is changed.
+    sql("USE ns3.ns3_3")
+    assert(catalogManager.currentCatalog.name() == "testcat2")
+    assert(catalogManager.currentNamespace === Array("ns3", "ns3_3"))
+
+    // Only the namespace is changed (explicit).
+    sql("USE NAMESPACE testcat")
+    assert(catalogManager.currentCatalog.name() == "testcat2")
+    assert(catalogManager.currentNamespace === Array("testcat"))
+
+    // Catalog is resolved to `testcat`.
+    sql("USE testcat")
+    assert(catalogManager.currentCatalog.name() == "testcat")
+    assert(catalogManager.currentNamespace === Array())
+  }
+
+  test("Use: set v2 catalog as a current catalog") {
+    val catalogManager = spark.sessionState.catalogManager
+    assert(catalogManager.currentCatalog.name() == SESSION_CATALOG_NAME)
+
+    sql("USE testcat")
+    assert(catalogManager.currentCatalog.name() == "testcat")
+  }
+
+  test("Use: v2 session catalog is used and namespace does not exist") {
+    val exception = intercept[NoSuchDatabaseException] {
+      sql("USE ns1")
+    }
+    assert(exception.getMessage.contains("Database 'ns1' not found"))
+  }
+
+  test("Use: v2 catalog is used and namespace does not exist") {
+    // Namespaces are not required to exist for v2 catalogs.
+    sql("USE testcat.ns1.ns2")
+    val catalogManager = spark.sessionState.catalogManager
+    assert(catalogManager.currentNamespace === Array("ns1", "ns2"))
+  }
+
+  test("ShowCurrentNamespace: basic tests") {
+    def testShowCurrentNamespace(expectedCatalogName: String, expectedNamespace: String): Unit = {
+      val schema = new StructType()
+        .add("catalog", StringType, nullable = false)
+        .add("namespace", StringType, nullable = false)
+      val df = sql("SHOW CURRENT NAMESPACE")
+      val rows = df.collect
+
+      assert(df.schema === schema)
+      assert(rows.length == 1)
+      assert(rows(0).getAs[String](0) === expectedCatalogName)
+      assert(rows(0).getAs[String](1) === expectedNamespace)
+    }
+
+    // Initially, the v2 session catalog is set as a current catalog.
+    testShowCurrentNamespace("spark_catalog", "default")
+
+    sql("USE testcat")
+    testShowCurrentNamespace("testcat", "")
+    sql("USE testcat.ns1.ns2")
+    testShowCurrentNamespace("testcat", "ns1.ns2")
+  }
+
+  test("tableCreation: partition column case insensitive resolution") {
+    val testCatalog = catalog("testcat").asTableCatalog
+    val sessionCatalog = catalog(SESSION_CATALOG_NAME).asTableCatalog
+
+    def checkPartitioning(cat: TableCatalog, partition: String): Unit = {
+      val table = cat.loadTable(Identifier.of(Array.empty, "tbl"))
+      val partitions = table.partitioning().map(_.references())
+      assert(partitions.length === 1)
+      val fieldNames = partitions.flatMap(_.map(_.fieldNames()))
+      assert(fieldNames === Array(Array(partition)))
+    }
+
+    sql(s"CREATE TABLE tbl (a int, b string) USING $v2Source PARTITIONED BY (A)")
+    checkPartitioning(sessionCatalog, "a")
+    sql(s"CREATE TABLE testcat.tbl (a int, b string) USING $v2Source PARTITIONED BY (A)")
+    checkPartitioning(testCatalog, "a")
+    sql(s"CREATE OR REPLACE TABLE tbl (a int, b string) USING $v2Source PARTITIONED BY (B)")
+    checkPartitioning(sessionCatalog, "b")
+    sql(s"CREATE OR REPLACE TABLE testcat.tbl (a int, b string) USING $v2Source PARTITIONED BY (B)")
+    checkPartitioning(testCatalog, "b")
+  }
+
+  test("tableCreation: partition column case sensitive resolution") {
+    def checkFailure(statement: String): Unit = {
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val e = intercept[AnalysisException] {
+          sql(statement)
+        }
+        assert(e.getMessage.contains("Couldn't find column"))
+      }
+    }
+
+    checkFailure(s"CREATE TABLE tbl (a int, b string) USING $v2Source PARTITIONED BY (A)")
+    checkFailure(s"CREATE TABLE testcat.tbl (a int, b string) USING $v2Source PARTITIONED BY (A)")
+    checkFailure(
+      s"CREATE OR REPLACE TABLE tbl (a int, b string) USING $v2Source PARTITIONED BY (B)")
+    checkFailure(
+      s"CREATE OR REPLACE TABLE testcat.tbl (a int, b string) USING $v2Source PARTITIONED BY (B)")
+  }
+
+  test("tableCreation: duplicate column names in the table definition") {
+    val errorMsg = "Found duplicate column(s) in the table definition of t"
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        assertAnalysisError(
+          s"CREATE TABLE t ($c0 INT, $c1 INT) USING $v2Source",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE OR REPLACE TABLE t ($c0 INT, $c1 INT) USING $v2Source",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE OR REPLACE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source",
+          errorMsg
+        )
+      }
+    }
+  }
+
+  test("tableCreation: duplicate nested column names in the table definition") {
+    val errorMsg = "Found duplicate column(s) in the table definition of t"
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        assertAnalysisError(
+          s"CREATE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE OR REPLACE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE OR REPLACE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
+          errorMsg
+        )
+      }
+    }
+  }
+
+  test("tableCreation: bucket column names not in table definition") {
+    val errorMsg = "Couldn't find column c in"
+    assertAnalysisError(
+      s"CREATE TABLE tbl (a int, b string) USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS",
+      errorMsg
+    )
+    assertAnalysisError(
+      s"CREATE TABLE testcat.tbl (a int, b string) USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS",
+      errorMsg
+    )
+    assertAnalysisError(
+      s"CREATE OR REPLACE TABLE tbl (a int, b string) USING $v2Source " +
+        "CLUSTERED BY (c) INTO 4 BUCKETS",
+      errorMsg
+    )
+    assertAnalysisError(
+      s"CREATE OR REPLACE TABLE testcat.tbl (a int, b string) USING $v2Source " +
+        "CLUSTERED BY (c) INTO 4 BUCKETS",
+      errorMsg
+    )
+  }
+
+  test("tableCreation: bucket column name containing dot") {
+    withTable("t") {
+      sql(
+        """
+          |CREATE TABLE testcat.t (id int, `a.b` string) USING foo
+          |CLUSTERED BY (`a.b`) INTO 4 BUCKETS
+          |OPTIONS ('allow-unsupported-transforms'=true)
+        """.stripMargin)
+
+      val testCatalog = catalog("testcat").asTableCatalog.asInstanceOf[InMemoryTableCatalog]
+      val table = testCatalog.loadTable(Identifier.of(Array.empty, "t"))
+      val partitioning = table.partitioning()
+      assert(partitioning.length == 1 && partitioning.head.name() == "bucket")
+      val references = partitioning.head.references()
+      assert(references.length == 1)
+      assert(references.head.fieldNames().toSeq == Seq("a.b"))
+    }
+  }
+
+  test("tableCreation: column repeated in partition columns") {
+    val errorMsg = "Found duplicate column(s) in the partitioning"
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        assertAnalysisError(
+          s"CREATE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
+          errorMsg
+        )
+      }
+    }
+  }
+
+  test("tableCreation: column repeated in bucket columns") {
+    val errorMsg = "Found duplicate column(s) in the bucket definition"
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        assertAnalysisError(
+          s"CREATE TABLE t ($c0 INT) USING $v2Source " +
+            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source " +
+            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source " +
+            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
+          errorMsg
+        )
+        assertAnalysisError(
+          s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) USING $v2Source " +
+            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
+          errorMsg
+        )
+      }
+    }
+  }
+
+  test("REFRESH TABLE: v2 table") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+
+      val testCatalog = catalog("testcat").asTableCatalog.asInstanceOf[InMemoryTableCatalog]
+      val identifier = Identifier.of(Array("ns1", "ns2"), "tbl")
+
+      assert(!testCatalog.isTableInvalidated(identifier))
+      sql(s"REFRESH TABLE $t")
+      assert(testCatalog.isTableInvalidated(identifier))
+    }
+  }
+
+  test("REPLACE TABLE: v1 table") {
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")
+    }
+    assert(e.message.contains("REPLACE TABLE is only supported with v2 tables"))
+  }
+
+  test("DeleteFrom: basic - delete all") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t")
+      checkAnswer(spark.table(t), Seq())
+    }
+  }
+
+  test("DeleteFrom: basic - delete with where clause") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t WHERE id = 2")
+      checkAnswer(spark.table(t), Seq(
+        Row(3, "c", 3)))
+    }
+  }
+
+  test("DeleteFrom: delete from aliased target table") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t AS tbl WHERE tbl.id = 2")
+      checkAnswer(spark.table(t), Seq(
+        Row(3, "c", 3)))
+    }
+  }
+
+  test("DeleteFrom: normalize attribute names") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t AS tbl WHERE tbl.ID = 2")
+      checkAnswer(spark.table(t), Seq(
+        Row(3, "c", 3)))
+    }
+  }
+
+  test("DeleteFrom: fail if has subquery") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      val exc = intercept[AnalysisException] {
+        sql(s"DELETE FROM $t WHERE id IN (SELECT id FROM $t)")
+      }
+
+      assert(spark.table(t).count === 3)
+      assert(exc.getMessage.contains("Delete by condition with subquery is not supported"))
+    }
+  }
+
+  test("DeleteFrom: DELETE is only supported with v2 tables") {
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    val v1Table = "tbl"
+    withTable(v1Table) {
+      sql(s"CREATE TABLE $v1Table" +
+          s" USING ${classOf[SimpleScanSource].getName} OPTIONS (from=0,to=1)")
+      val exc = intercept[AnalysisException] {
+        sql(s"DELETE FROM $v1Table WHERE i = 2")
+      }
+
+      assert(exc.getMessage.contains("DELETE is only supported with v2 tables"))
+    }
+  }
+
+  test("UPDATE TABLE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(
+        s"""
+           |CREATE TABLE $t (id bigint, name string, age int, p int)
+           |USING foo
+           |PARTITIONED BY (id, p)
+         """.stripMargin)
+
+      // UPDATE non-existing table
+      assertAnalysisError(
+        "UPDATE dummy SET name='abc'",
+        "Table or view not found")
+
+      // UPDATE non-existing column
+      assertAnalysisError(
+        s"UPDATE $t SET dummy='abc'",
+        "cannot resolve")
+      assertAnalysisError(
+        s"UPDATE $t SET name='abc' WHERE dummy=1",
+        "cannot resolve")
+
+      // UPDATE is not implemented yet.
+      val e = intercept[UnsupportedOperationException] {
+        sql(s"UPDATE $t SET name='Robert', age=32 WHERE p=1")
+      }
+      assert(e.getMessage.contains("UPDATE TABLE is not supported temporarily"))
+    }
+  }
+
+  test("MERGE INTO TABLE") {
+    val target = "testcat.ns1.ns2.target"
+    val source = "testcat.ns1.ns2.source"
+    withTable(target, source) {
+      sql(
+        s"""
+           |CREATE TABLE $target (id bigint, name string, age int, p int)
+           |USING foo
+           |PARTITIONED BY (id, p)
+         """.stripMargin)
+      sql(
+        s"""
+           |CREATE TABLE $source (id bigint, name string, age int, p int)
+           |USING foo
+           |PARTITIONED BY (id, p)
+         """.stripMargin)
+
+      // MERGE INTO non-existing table
+      assertAnalysisError(
+        s"""
+           |MERGE INTO testcat.ns1.ns2.dummy AS target
+           |USING testcat.ns1.ns2.source AS source
+           |ON target.id = source.id
+           |WHEN MATCHED AND (target.age < 10) THEN DELETE
+           |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET *
+           |WHEN NOT MATCHED AND (target.col2='insert')
+           |THEN INSERT *
+         """.stripMargin,
+        "Table or view not found")
+
+      // USING non-existing table
+      assertAnalysisError(
+        s"""
+           |MERGE INTO testcat.ns1.ns2.target AS target
+           |USING testcat.ns1.ns2.dummy AS source
+           |ON target.id = source.id
+           |WHEN MATCHED AND (target.age < 10) THEN DELETE
+           |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET *
+           |WHEN NOT MATCHED AND (target.col2='insert')
+           |THEN INSERT *
+         """.stripMargin,
+        "Table or view not found")
+
+      // UPDATE non-existing column
+      assertAnalysisError(
+        s"""
+           |MERGE INTO testcat.ns1.ns2.target AS target
+           |USING testcat.ns1.ns2.source AS source
+           |ON target.id = source.id
+           |WHEN MATCHED AND (target.age < 10) THEN DELETE
+           |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET target.dummy = source.age
+           |WHEN NOT MATCHED AND (target.col2='insert')
+           |THEN INSERT *
+         """.stripMargin,
+        "cannot resolve")
+
+      // UPDATE using non-existing column
+      assertAnalysisError(
+        s"""
+           |MERGE INTO testcat.ns1.ns2.target AS target
+           |USING testcat.ns1.ns2.source AS source
+           |ON target.id = source.id
+           |WHEN MATCHED AND (target.age < 10) THEN DELETE
+           |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET target.age = source.dummy
+           |WHEN NOT MATCHED AND (target.col2='insert')
+           |THEN INSERT *
+         """.stripMargin,
+        "cannot resolve")
+
+      // MERGE INTO is not implemented yet.
+      val e = intercept[UnsupportedOperationException] {
+        sql(
+          s"""
+             |MERGE INTO testcat.ns1.ns2.target AS target
+             |USING testcat.ns1.ns2.source AS source
+             |ON target.id = source.id
+             |WHEN MATCHED AND (target.p < 0) THEN DELETE
+             |WHEN MATCHED AND (target.p > 0) THEN UPDATE SET *
+             |WHEN NOT MATCHED THEN INSERT *
+           """.stripMargin)
+      }
+      assert(e.getMessage.contains("MERGE INTO TABLE is not supported temporarily"))
+    }
+  }
+
+  test("AlterTable: rename table basic test") {
+    withTable("testcat.ns1.new") {
+      sql(s"CREATE TABLE testcat.ns1.ns2.old USING foo AS SELECT id, data FROM source")
+      checkAnswer(sql("SHOW TABLES FROM testcat.ns1.ns2"), Seq(Row("ns1.ns2", "old")))
+
+      sql(s"ALTER TABLE testcat.ns1.ns2.old RENAME TO ns1.new")
+      checkAnswer(sql("SHOW TABLES FROM testcat.ns1.ns2"), Seq.empty)
+      checkAnswer(sql("SHOW TABLES FROM testcat.ns1"), Seq(Row("ns1", "new")))
+    }
+  }
+
+  test("AlterTable: renaming views are not supported") {
+    val e = intercept[AnalysisException] {
+      sql(s"ALTER VIEW testcat.ns.tbl RENAME TO ns.view")
+    }
+    assert(e.getMessage.contains("Renaming view is not supported in v2 catalogs"))
+  }
+
+  test("ANALYZE TABLE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      testV1Command("ANALYZE TABLE", s"$t COMPUTE STATISTICS")
+      testV1Command("ANALYZE TABLE", s"$t COMPUTE STATISTICS FOR ALL COLUMNS")
+    }
+  }
+
+  test("MSCK REPAIR TABLE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      testV1Command("MSCK REPAIR TABLE", t)
+    }
+  }
+
+  test("TRUNCATE TABLE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(
+        s"""
+           |CREATE TABLE $t (id bigint, data string)
+           |USING foo
+           |PARTITIONED BY (id)
+         """.stripMargin)
+
+      testV1Command("TRUNCATE TABLE", t)
+      testV1Command("TRUNCATE TABLE", s"$t PARTITION(id='1')")
+    }
+  }
+
+  test("SHOW PARTITIONS") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(
+        s"""
+           |CREATE TABLE $t (id bigint, data string)
+           |USING foo
+           |PARTITIONED BY (id)
+         """.stripMargin)
+
+      testV1Command("SHOW PARTITIONS", t)
+      testV1Command("SHOW PARTITIONS", s"$t PARTITION(id='1')")
+    }
+  }
+
+  test("LOAD DATA INTO TABLE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(
+        s"""
+           |CREATE TABLE $t (id bigint, data string)
+           |USING foo
+           |PARTITIONED BY (id)
+         """.stripMargin)
+
+      testV1Command("LOAD DATA", s"INPATH 'filepath' INTO TABLE $t")
+      testV1Command("LOAD DATA", s"LOCAL INPATH 'filepath' INTO TABLE $t")
+      testV1Command("LOAD DATA", s"LOCAL INPATH 'filepath' OVERWRITE INTO TABLE $t")
+      testV1Command("LOAD DATA",
+        s"LOCAL INPATH 'filepath' OVERWRITE INTO TABLE $t PARTITION(id=1)")
+    }
+  }
+
+  test("SHOW CREATE TABLE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      testV1Command("SHOW CREATE TABLE", t)
+    }
+  }
+
+  test("CACHE TABLE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+
+      testV1Command("CACHE TABLE", t)
+
+      val e = intercept[AnalysisException] {
+        sql(s"CACHE LAZY TABLE $t")
+      }
+      assert(e.message.contains("CACHE TABLE is only supported with v1 tables"))
+    }
+  }
+
+  test("UNCACHE TABLE") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+
+      testV1Command("UNCACHE TABLE", t)
+      testV1Command("UNCACHE TABLE", s"IF EXISTS $t")
+    }
+  }
+
+  test("SHOW COLUMNS") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+
+      testV1Command("SHOW COLUMNS", s"FROM $t")
+      testV1Command("SHOW COLUMNS", s"IN $t")
+
+      val e3 = intercept[AnalysisException] {
+        sql(s"SHOW COLUMNS FROM tbl IN testcat.ns1.ns2")
+      }
+      assert(e3.message.contains("Namespace name should have " +
+        "only one part if specified: testcat.ns1.ns2"))
+    }
+  }
+
+  test("ALTER TABLE RECOVER PARTITIONS") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t RECOVER PARTITIONS")
+      }
+      assert(e.message.contains("ALTER TABLE RECOVER PARTITIONS is only supported with v1 tables"))
+    }
+  }
+
+  test("ALTER TABLE ADD PARTITION") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+      }
+      assert(e.message.contains("ALTER TABLE ADD PARTITION is only supported with v1 tables"))
+    }
+  }
+
+  test("ALTER TABLE RENAME PARTITION") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)")
+      }
+      assert(e.message.contains("ALTER TABLE RENAME PARTITION is only supported with v1 tables"))
+    }
+  }
+
+  test("ALTER TABLE DROP PARTITIONS") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
+      }
+      assert(e.message.contains("ALTER TABLE DROP PARTITION is only supported with v1 tables"))
+    }
+  }
+
+  test("ALTER TABLE SerDe properties") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')")
+      }
+      assert(e.message.contains("ALTER TABLE SerDe Properties is only supported with v1 tables"))
+    }
+  }
+
+  test("ALTER VIEW AS QUERY") {
+    val v = "testcat.ns1.ns2.v"
+    val e = intercept[AnalysisException] {
+      sql(s"ALTER VIEW $v AS SELECT 1")
+    }
+    assert(e.message.contains("ALTER VIEW QUERY is only supported with v1 tables"))
+  }
+
+  test("CREATE VIEW") {
+    val v = "testcat.ns1.ns2.v"
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE VIEW $v AS SELECT * FROM tab1")
+    }
+    assert(e.message.contains("CREATE VIEW is only supported with v1 tables"))
+  }
+
+  test("SHOW TBLPROPERTIES: v2 table") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      val user = "andrew"
+      val status = "new"
+      val provider = "foo"
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING $provider " +
+        s"TBLPROPERTIES ('user'='$user', 'status'='$status')")
+
+      val properties = sql(s"SHOW TBLPROPERTIES $t").orderBy("key")
+
+      val schema = new StructType()
+        .add("key", StringType, nullable = false)
+        .add("value", StringType, nullable = false)
+
+      val expected = Seq(
+        Row(TableCatalog.PROP_OWNER, defaultUser),
+        Row("provider", provider),
+        Row("status", status),
+        Row("user", user))
+
+      assert(properties.schema === schema)
+      assert(expected === properties.collect())
+    }
+  }
+
+  test("SHOW TBLPROPERTIES(key): v2 table") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      val user = "andrew"
+      val status = "new"
+      val provider = "foo"
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING $provider " +
+        s"TBLPROPERTIES ('user'='$user', 'status'='$status')")
+
+      val properties = sql(s"SHOW TBLPROPERTIES $t ('status')")
+
+      val expected = Seq(Row("status", status))
+
+      assert(expected === properties.collect())
+    }
+  }
+
+  test("SHOW TBLPROPERTIES(key): v2 table, key not found") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      val nonExistingKey = "nonExistingKey"
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo " +
+        s"TBLPROPERTIES ('user'='andrew', 'status'='new')")
+
+      val properties = sql(s"SHOW TBLPROPERTIES $t ('$nonExistingKey')")
+
+      val expected = Seq(Row(nonExistingKey, s"Table $t does not have property: $nonExistingKey"))
+
+      assert(expected === properties.collect())
+    }
+  }
+
+  test("DESCRIBE FUNCTION: only support session catalog") {
+    val e = intercept[AnalysisException] {
+      sql("DESCRIBE FUNCTION testcat.ns1.ns2.fun")
+    }
+    assert(e.message.contains("DESCRIBE FUNCTION is only supported in v1 catalog"))
+
+    val e1 = intercept[AnalysisException] {
+      sql("DESCRIBE FUNCTION default.ns1.ns2.fun")
+    }
+    assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+  }
+
+  test("SHOW FUNCTIONS not valid v1 namespace") {
+    val function = "testcat.ns1.ns2.fun"
+
+    val e = intercept[AnalysisException] {
+      sql(s"SHOW FUNCTIONS LIKE $function")
+    }
+    assert(e.message.contains("SHOW FUNCTIONS is only supported in v1 catalog"))
+  }
+
+  test("DROP FUNCTION: only support session catalog") {
+    val e = intercept[AnalysisException] {
+      sql("DROP FUNCTION testcat.ns1.ns2.fun")
+    }
+    assert(e.message.contains("DROP FUNCTION is only supported in v1 catalog"))
+
+    val e1 = intercept[AnalysisException] {
+      sql("DESCRIBE FUNCTION default.ns1.ns2.fun")
+    }
+    assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+  }
+
+  test("CREATE FUNCTION: only support session catalog") {
+    val e = intercept[AnalysisException] {
+      sql("CREATE FUNCTION testcat.ns1.ns2.fun as 'f'")
+    }
+    assert(e.message.contains("CREATE FUNCTION is only supported in v1 catalog"))
+
+    val e1 = intercept[AnalysisException] {
+      sql("CREATE FUNCTION default.ns1.ns2.fun as 'f'")
+    }
+    assert(e1.message.contains("Unsupported function name 'default.ns1.ns2.fun'"))
+  }
+
+  test("global temp view should not be masked by v2 catalog") {
+    val globalTempDB = spark.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    spark.conf.set(s"spark.sql.catalog.$globalTempDB", classOf[InMemoryTableCatalog].getName)
+
+    try {
+      sql("create global temp view v as select 1")
+      sql(s"alter view $globalTempDB.v rename to v2")
+      checkAnswer(spark.table(s"$globalTempDB.v2"), Row(1))
+      sql(s"drop view $globalTempDB.v2")
+    } finally {
+      spark.sharedState.globalTempViewManager.clear()
+    }
+  }
+
+  test("SPARK-30104: global temp db is used as a table name under v2 catalog") {
+    val globalTempDB = spark.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    val t = s"testcat.$globalTempDB"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      sql("USE testcat")
+      // The following should not throw AnalysisException, but should use `testcat.$globalTempDB`.
+      sql(s"DESCRIBE TABLE $globalTempDB")
+    }
+  }
+
+  test("SPARK-30104: v2 catalog named global_temp will be masked") {
+    val globalTempDB = spark.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    spark.conf.set(s"spark.sql.catalog.$globalTempDB", classOf[InMemoryTableCatalog].getName)
+
+    val e = intercept[AnalysisException] {
+      // Since the following multi-part name starts with `globalTempDB`, it is resolved to
+      // the session catalog, not the `gloabl_temp` v2 catalog.
+      sql(s"CREATE TABLE $globalTempDB.ns1.ns2.tbl (id bigint, data string) USING json")
+    }
+    assert(e.message.contains("global_temp.ns1.ns2.tbl is not a valid TableIdentifier"))
+  }
+
+  test("table name same as catalog can be used") {
+    withTable("testcat.testcat") {
+      sql(s"CREATE TABLE testcat.testcat (id bigint, data string) USING foo")
+      sql("USE testcat")
+      // The following should not throw AnalysisException.
+      sql(s"DESCRIBE TABLE testcat")
+    }
+  }
+
+  test("SPARK-30001: session catalog name can be specified in SQL statements") {
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+
+    withTable("t") {
+      sql("CREATE TABLE t USING json AS SELECT 1 AS i")
+      checkAnswer(sql("select * from t"), Row(1))
+      checkAnswer(sql("select * from spark_catalog.t"), Row(1))
+      checkAnswer(sql("select * from spark_catalog.default.t"), Row(1))
+    }
+  }
+
+  test("SPARK-30259: session catalog can be specified in CREATE TABLE AS SELECT command") {
+    withTable("tbl") {
+      val ident = Identifier.of(Array(), "tbl")
+      sql("CREATE TABLE spark_catalog.tbl USING json AS SELECT 1 AS i")
+      assert(catalog("spark_catalog").asTableCatalog.tableExists(ident) === true)
+    }
+  }
+
+  test("SPARK-30259: session catalog can be specified in CREATE TABLE command") {
+    withTable("tbl") {
+      val ident = Identifier.of(Array(), "tbl")
+      sql("CREATE TABLE spark_catalog.tbl (col string) USING json")
+      assert(catalog("spark_catalog").asTableCatalog.tableExists(ident) === true)
+    }
+  }
+
+  test("SPARK-30094: current namespace is used during table resolution") {
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+
+    withTable("spark_catalog.t", "testcat.ns.t") {
+      sql("CREATE TABLE t USING parquet AS SELECT 1")
+      sql("CREATE TABLE testcat.ns.t USING parquet AS SELECT 2")
+
+      checkAnswer(sql("SELECT * FROM t"), Row(1))
+
+      sql("USE testcat.ns")
+      checkAnswer(sql("SELECT * FROM t"), Row(2))
+    }
+  }
+
+  test("SPARK-30284: CREATE VIEW should track the current catalog and namespace") {
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    val sessionCatalogName = CatalogManager.SESSION_CATALOG_NAME
+
+    sql("USE testcat.ns1.ns2")
+    sql("CREATE TABLE t USING foo AS SELECT 1 col")
+    checkAnswer(spark.table("t"), Row(1))
+
+    withTempView("t") {
+      spark.range(10).createTempView("t")
+      withView(s"$sessionCatalogName.v") {
+        val e = intercept[AnalysisException] {
+          sql(s"CREATE VIEW $sessionCatalogName.v AS SELECT * FROM t")
+        }
+        assert(e.message.contains("referencing a temporary view"))
+      }
+    }
+
+    withTempView("t") {
+      withView(s"$sessionCatalogName.v") {
+        sql(s"CREATE VIEW $sessionCatalogName.v AS SELECT t1.col FROM t t1 JOIN ns1.ns2.t t2")
+        sql(s"USE $sessionCatalogName")
+        // The view should read data from table `testcat.ns1.ns2.t` not the temp view.
+        spark.range(10).createTempView("t")
+        checkAnswer(spark.table("v"), Row(1))
+      }
+    }
+  }
+
+  test("COMMENT ON NAMESPACE") {
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // Session catalog is used.
+    sql("CREATE NAMESPACE ns")
+    checkNamespaceComment("ns", "minor revision")
+    checkNamespaceComment("ns", null)
+    checkNamespaceComment("ns", "NULL")
+    intercept[AnalysisException](sql("COMMENT ON NAMESPACE abc IS NULL"))
+
+    // V2 non-session catalog is used.
+    sql("CREATE NAMESPACE testcat.ns1")
+    checkNamespaceComment("testcat.ns1", "minor revision")
+    checkNamespaceComment("testcat.ns1", null)
+    checkNamespaceComment("testcat.ns1", "NULL")
+    intercept[AnalysisException](sql("COMMENT ON NAMESPACE testcat.abc IS NULL"))
+  }
+
+  private def checkNamespaceComment(namespace: String, comment: String): Unit = {
+    sql(s"COMMENT ON NAMESPACE $namespace IS " +
+      Option(comment).map("'" + _ + "'").getOrElse("NULL"))
+    val expectedComment = Option(comment).getOrElse("")
+    assert(sql(s"DESC NAMESPACE extended $namespace").toDF("k", "v")
+      .where(s"k='${SupportsNamespaces.PROP_COMMENT.capitalize}'")
+      .head().getString(1) === expectedComment)
+  }
+
+  test("COMMENT ON TABLE") {
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // Session catalog is used.
+    withTable("t") {
+      sql("CREATE TABLE t(k int) USING json")
+      checkTableComment("t", "minor revision")
+      checkTableComment("t", null)
+      checkTableComment("t", "NULL")
+    }
+    intercept[AnalysisException](sql("COMMENT ON TABLE abc IS NULL"))
+
+    // V2 non-session catalog is used.
+    withTable("testcat.ns1.ns2.t") {
+      sql("CREATE TABLE testcat.ns1.ns2.t(k int) USING foo")
+      checkTableComment("testcat.ns1.ns2.t", "minor revision")
+      checkTableComment("testcat.ns1.ns2.t", null)
+      checkTableComment("testcat.ns1.ns2.t", "NULL")
+    }
+    intercept[AnalysisException](sql("COMMENT ON TABLE testcat.abc IS NULL"))
+
+    val globalTempDB = spark.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    spark.conf.set(s"spark.sql.catalog.$globalTempDB", classOf[InMemoryTableCatalog].getName)
+    withTempView("v") {
+      sql("create global temp view v as select 1")
+      val e = intercept[AnalysisException](sql("COMMENT ON TABLE global_temp.v IS NULL"))
+      assert(e.getMessage.contains("global_temp.v is a temp view not table."))
+    }
+  }
+
+  private def checkTableComment(tableName: String, comment: String): Unit = {
+    sql(s"COMMENT ON TABLE $tableName IS " + Option(comment).map("'" + _ + "'").getOrElse("NULL"))
+    val expectedComment = Option(comment).getOrElse("")
+    assert(sql(s"DESC extended $tableName").toDF("k", "v", "c")
+      .where(s"k='${TableCatalog.PROP_COMMENT.capitalize}'")
+      .head().getString(1) === expectedComment)
+  }
+
+  private def testV1Command(sqlCommand: String, sqlParams: String): Unit = {
+    val e = intercept[AnalysisException] {
+      sql(s"$sqlCommand $sqlParams")
+    }
+    assert(e.message.contains(s"$sqlCommand is only supported with v1 tables"))
+  }
+
+  private def assertAnalysisError(sqlStatement: String, expectedError: String): Unit = {
+    val errMsg = intercept[AnalysisException] {
+      sql(sqlStatement)
+    }.getMessage
+    assert(errMsg.contains(expectedError))
+  }
+}
+
+
+/** Used as a V2 DataSource for V2SessionCatalog DDL */
+class FakeV2Provider extends SimpleTableProvider {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
+    throw new UnsupportedOperationException("Unnecessary for DDL tests")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
similarity index 89%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index 8f7dbe8d13c39..2d8761f872da7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.io.File
 import java.util
@@ -23,25 +23,28 @@ import java.util.OptionalLong
 
 import scala.collection.JavaConverters._
 
-import test.org.apache.spark.sql.sources.v2._
+import test.org.apache.spark.sql.connector._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation}
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.connector.read.partitioning.{ClusteredDistribution, Distribution, Partitioning}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation, DataSourceV2ScanRelation}
 import org.apache.spark.sql.execution.exchange.{Exchange, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.sources.{Filter, GreaterThan}
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.partitioning.{ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
-class DataSourceV2Suite extends QueryTest with SharedSparkSession {
+class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   private def getBatch(query: DataFrame): AdvancedBatch = {
@@ -163,25 +166,25 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
 
         val groupByColA = df.groupBy('i).agg(sum('j))
         checkAnswer(groupByColA, Seq(Row(1, 8), Row(2, 6), Row(3, 6), Row(4, 4)))
-        assert(groupByColA.queryExecution.executedPlan.collectFirst {
+        assert(collectFirst(groupByColA.queryExecution.executedPlan) {
           case e: ShuffleExchangeExec => e
         }.isEmpty)
 
         val groupByColAB = df.groupBy('i, 'j).agg(count("*"))
         checkAnswer(groupByColAB, Seq(Row(1, 4, 2), Row(2, 6, 1), Row(3, 6, 1), Row(4, 2, 2)))
-        assert(groupByColAB.queryExecution.executedPlan.collectFirst {
+        assert(collectFirst(groupByColAB.queryExecution.executedPlan) {
           case e: ShuffleExchangeExec => e
         }.isEmpty)
 
         val groupByColB = df.groupBy('j).agg(sum('i))
         checkAnswer(groupByColB, Seq(Row(2, 8), Row(4, 2), Row(6, 5)))
-        assert(groupByColB.queryExecution.executedPlan.collectFirst {
+        assert(collectFirst(groupByColB.queryExecution.executedPlan) {
           case e: ShuffleExchangeExec => e
         }.isDefined)
 
         val groupByAPlusB = df.groupBy('i + 'j).agg(count("*"))
         checkAnswer(groupByAPlusB, Seq(Row(5, 2), Row(6, 2), Row(8, 1), Row(9, 1)))
-        assert(groupByAPlusB.queryExecution.executedPlan.collectFirst {
+        assert(collectFirst(groupByAPlusB.queryExecution.executedPlan) {
           case e: ShuffleExchangeExec => e
         }.isDefined)
       }
@@ -194,7 +197,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
         withClue(cls.getName) {
           val df = spark.read.format(cls.getName).load()
           val logical = df.queryExecution.optimizedPlan.collect {
-            case d: DataSourceV2Relation => d
+            case d: DataSourceV2ScanRelation => d
           }.head
 
           val statics = logical.computeStats()
@@ -224,8 +227,12 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
           spark.read.format(cls.getName).option("path", path).load(),
           spark.range(10).select('id, -'id))
 
-        // default save mode is append
-        spark.range(10).select('id as 'i, -'id as 'j).write.format(cls.getName)
+        // default save mode is ErrorIfExists
+        intercept[AnalysisException] {
+          spark.range(10).select('id as 'i, -'id as 'j).write.format(cls.getName)
+            .option("path", path).save()
+        }
+        spark.range(10).select('id as 'i, -'id as 'j).write.mode("append").format(cls.getName)
           .option("path", path).save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
@@ -280,7 +287,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
 
         val numPartition = 6
         spark.range(0, 10, 1, numPartition).select('id as 'i, -'id as 'j).write.format(cls.getName)
-          .option("path", path).save()
+          .mode("append").option("path", path).save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
           spark.range(10).select('id, -'id))
@@ -327,7 +334,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
   test("SPARK-23315: get output from canonicalized data source v2 related plans") {
     def checkCanonicalizedOutput(
         df: DataFrame, logicalNumOutput: Int, physicalNumOutput: Int): Unit = {
-      val logical = df.queryExecution.optimizedPlan.collect {
+      val logical = df.queryExecution.logical.collect {
         case d: DataSourceV2Relation => d
       }.head
       assert(logical.canonicalized.output.length == logicalNumOutput)
@@ -351,7 +358,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
         .read
         .option(optionName, false)
         .format(classOf[DataSourceV2WithSessionConfig].getName).load()
-      val options = df.queryExecution.optimizedPlan.collectFirst {
+      val options = df.queryExecution.logical.collectFirst {
         case d: DataSourceV2Relation => d.options
       }.get
       assert(options.get(optionName) === "false")
@@ -367,7 +374,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
           val format = classOf[SimpleWritableDataSource].getName
 
           val df = Seq((1L, 2L)).toDF("i", "j")
-          df.write.format(format).option("path", optionPath).save()
+          df.write.format(format).mode("append").option("path", optionPath).save()
           assert(!new File(sessionPath).exists)
           checkAnswer(spark.read.format(format).option("path", optionPath).load(), df)
         }
@@ -380,7 +387,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
       val t2 = spark.read.format(classOf[SimpleDataSourceV2].getName).load()
       Seq(2, 3).toDF("a").createTempView("t1")
       val df = t2.where("i < (select max(a) from t1)").select('i)
-      val subqueries = df.queryExecution.executedPlan.collect {
+      val subqueries = stripAQEPlan(df.queryExecution.executedPlan).collect {
         case p => p.subqueries
       }.flatten
       assert(subqueries.length == 1)
@@ -412,7 +419,7 @@ object SimpleReaderFactory extends PartitionReaderFactory {
 
 abstract class SimpleBatchTable extends Table with SupportsRead  {
 
-  override def schema(): StructType = new StructType().add("i", "int").add("j", "int")
+  override def schema(): StructType = TestingV2Source.schema
 
   override def name(): String = this.getClass.toString
 
@@ -426,12 +433,31 @@ abstract class SimpleScanBuilder extends ScanBuilder
 
   override def toBatch: Batch = this
 
-  override def readSchema(): StructType = new StructType().add("i", "int").add("j", "int")
+  override def readSchema(): StructType = TestingV2Source.schema
 
   override def createReaderFactory(): PartitionReaderFactory = SimpleReaderFactory
 }
 
-class SimpleSinglePartitionSource extends TableProvider {
+trait TestingV2Source extends TableProvider {
+  override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
+    TestingV2Source.schema
+  }
+
+  override def getTable(
+      schema: StructType,
+      partitioning: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    getTable(new CaseInsensitiveStringMap(properties))
+  }
+
+  def getTable(options: CaseInsensitiveStringMap): Table
+}
+
+object TestingV2Source {
+  val schema = new StructType().add("i", "int").add("j", "int")
+}
+
+class SimpleSinglePartitionSource extends TestingV2Source {
 
   class MyScanBuilder extends SimpleScanBuilder {
     override def planInputPartitions(): Array[InputPartition] = {
@@ -446,9 +472,10 @@ class SimpleSinglePartitionSource extends TableProvider {
   }
 }
 
+
 // This class is used by pyspark tests. If this class is modified/moved, make sure pyspark
 // tests still pass.
-class SimpleDataSourceV2 extends TableProvider {
+class SimpleDataSourceV2 extends TestingV2Source {
 
   class MyScanBuilder extends SimpleScanBuilder {
     override def planInputPartitions(): Array[InputPartition] = {
@@ -463,7 +490,7 @@ class SimpleDataSourceV2 extends TableProvider {
   }
 }
 
-class AdvancedDataSourceV2 extends TableProvider {
+class AdvancedDataSourceV2 extends TestingV2Source {
 
   override def getTable(options: CaseInsensitiveStringMap): Table = new SimpleBatchTable {
     override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
@@ -475,7 +502,7 @@ class AdvancedDataSourceV2 extends TableProvider {
 class AdvancedScanBuilder extends ScanBuilder
   with Scan with SupportsPushDownFilters with SupportsPushDownRequiredColumns {
 
-  var requiredSchema = new StructType().add("i", "int").add("j", "int")
+  var requiredSchema = TestingV2Source.schema
   var filters = Array.empty[Filter]
 
   override def pruneColumns(requiredSchema: StructType): Unit = {
@@ -561,11 +588,16 @@ class SchemaRequiredDataSource extends TableProvider {
     override def readSchema(): StructType = schema
   }
 
-  override def getTable(options: CaseInsensitiveStringMap): Table = {
+  override def supportsExternalMetadata(): Boolean = true
+
+  override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
     throw new IllegalArgumentException("requires a user-supplied schema")
   }
 
-  override def getTable(options: CaseInsensitiveStringMap, schema: StructType): Table = {
+  override def getTable(
+      schema: StructType,
+      partitioning: Array[Transform],
+      properties: util.Map[String, String]): Table = {
     val userGivenSchema = schema
     new SimpleBatchTable {
       override def schema(): StructType = userGivenSchema
@@ -577,7 +609,7 @@ class SchemaRequiredDataSource extends TableProvider {
   }
 }
 
-class ColumnarDataSourceV2 extends TableProvider {
+class ColumnarDataSourceV2 extends TestingV2Source {
 
   class MyScanBuilder extends SimpleScanBuilder {
 
@@ -642,7 +674,7 @@ object ColumnarReaderFactory extends PartitionReaderFactory {
   }
 }
 
-class PartitionAwareDataSource extends TableProvider {
+class PartitionAwareDataSource extends TestingV2Source {
 
   class MyScanBuilder extends SimpleScanBuilder
     with SupportsReportPartitioning{
@@ -710,7 +742,7 @@ class SimpleWriteOnlyDataSource extends SimpleWritableDataSource {
   }
 }
 
-class ReportStatisticsDataSource extends TableProvider {
+class ReportStatisticsDataSource extends SimpleWritableDataSource {
 
   class MyScanBuilder extends SimpleScanBuilder
     with SupportsReportStatistics {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala
similarity index 95%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala
index 0b1e3b5fb076d..01fcced5b12a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala
@@ -15,9 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.connector.catalog.SessionConfigSupport
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.internal.SQLConf
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
similarity index 93%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
index 26f941244f5cc..b0da2eb697f36 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
@@ -14,21 +14,23 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.{FileSourceScanExec, QueryExecution}
 import org.apache.spark.sql.execution.datasources.{FileFormat, InsertIntoHadoopFsRelationCommand}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.{CaseInsensitiveStringMap, QueryExecutionListener}
@@ -73,7 +75,7 @@ class DummyWriteOnlyFileTable extends Table with SupportsWrite {
 
   override def schema(): StructType = StructType(Nil)
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
     throw new AnalysisException("Dummy file writer")
 
   override def capabilities(): java.util.Set[TableCapability] =
@@ -85,6 +87,8 @@ class FileDataSourceV2FallBackSuite extends QueryTest with SharedSparkSession {
   private val dummyReadOnlyFileSourceV2 = classOf[DummyReadOnlyFileDataSourceV2].getName
   private val dummyWriteOnlyFileSourceV2 = classOf[DummyWriteOnlyFileDataSourceV2].getName
 
+  override protected def sparkConf: SparkConf = super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "")
+
   test("Fall back to v1 when writing to file with read only FileDataSourceV2") {
     val df = spark.range(10).toDF()
     withTempPath { file =>
@@ -172,7 +176,7 @@ class FileDataSourceV2FallBackSuite extends QueryTest with SharedSparkSession {
           withTempPath { path =>
             val inputData = spark.range(10)
             inputData.write.format(format).save(path.getCanonicalPath)
-            sparkContext.listenerBus.waitUntilEmpty(1000)
+            sparkContext.listenerBus.waitUntilEmpty()
             assert(commands.length == 1)
             assert(commands.head._1 == "save")
             assert(commands.head._2.isInstanceOf[InsertIntoHadoopFsRelationCommand])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/InsertIntoTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
similarity index 99%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/InsertIntoTests.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
index 5b5382e5ca931..0fd6cf1b6746c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/InsertIntoTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import org.scalatest.BeforeAndAfter
 
@@ -175,7 +175,7 @@ abstract class InsertIntoTests(
   }
 }
 
-private[v2] trait InsertIntoSQLOnlyTests
+trait InsertIntoSQLOnlyTests
   extends QueryTest
   with SharedSparkSession
   with BeforeAndAfter {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
similarity index 87%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
index c9d2f1eef24bb..f9306ba28e7f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.io.{BufferedReader, InputStreamReader, IOException}
 import java.util
@@ -27,9 +27,11 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.SparkContext
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.connector.catalog.{SessionConfigSupport, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory, ScanBuilder}
+import org.apache.spark.sql.connector.write._
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
@@ -39,7 +41,7 @@ import org.apache.spark.util.SerializableConfiguration
  * Each task writes data to `target/_temporary/uniqueId/$jobId-$partitionId-$attemptNumber`.
  * Each job moves files from `target/_temporary/uniqueId/` to `target`.
  */
-class SimpleWritableDataSource extends TableProvider with SessionConfigSupport {
+class SimpleWritableDataSource extends SimpleTableProvider with SessionConfigSupport {
 
   private val tableSchema = new StructType().add("i", "long").add("j", "long")
 
@@ -69,15 +71,11 @@ class SimpleWritableDataSource extends TableProvider with SessionConfigSupport {
     override def readSchema(): StructType = tableSchema
   }
 
-  class MyWriteBuilder(path: String) extends WriteBuilder with SupportsTruncate {
-    private var queryId: String = _
+  class MyWriteBuilder(path: String, info: LogicalWriteInfo)
+      extends WriteBuilder with SupportsTruncate {
+    private val queryId: String = info.queryId()
     private var needTruncate = false
 
-    override def withQueryId(queryId: String): WriteBuilder = {
-      this.queryId = queryId
-      this
-    }
-
     override def truncate(): WriteBuilder = {
       this.needTruncate = true
       this
@@ -98,7 +96,7 @@ class SimpleWritableDataSource extends TableProvider with SessionConfigSupport {
   }
 
   class MyBatchWrite(queryId: String, path: String, conf: Configuration) extends BatchWrite {
-    override def createBatchWriterFactory(): DataWriterFactory = {
+    override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = {
       SimpleCounter.resetCounter
       new CSVDataWriterFactory(path, queryId, new SerializableConfiguration(conf))
     }
@@ -142,8 +140,8 @@ class SimpleWritableDataSource extends TableProvider with SessionConfigSupport {
       new MyScanBuilder(new Path(path).toUri.toString, conf)
     }
 
-    override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
-      new MyWriteBuilder(path)
+    override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+      new MyWriteBuilder(path, info)
     }
 
     override def capabilities(): util.Set[TableCapability] =
@@ -190,7 +188,7 @@ class CSVReaderFactory(conf: SerializableConfiguration)
   }
 }
 
-private[v2] object SimpleCounter {
+private[connector] object SimpleCounter {
   private var count: Int = 0
 
   def increaseCounter: Unit = {
@@ -239,4 +237,6 @@ class CSVDataWriter(fs: FileSystem, file: Path) extends DataWriter[InternalRow]
       fs.delete(file, false)
     }
   }
+
+  override def close(): Unit = {}
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
new file mode 100644
index 0000000000000..7bff955b18360
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import scala.language.implicitConversions
+import scala.util.Try
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.{DataFrame, QueryTest, SaveMode}
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression}
+import org.apache.spark.sql.connector.catalog.{Identifier, SupportsCatalogOptions, TableCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{LongType, StructType}
+import org.apache.spark.sql.util.{CaseInsensitiveStringMap, QueryExecutionListener}
+
+class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with BeforeAndAfter {
+
+  import testImplicits._
+
+  private val catalogName = "testcat"
+  private val format = classOf[CatalogSupportingInMemoryTableProvider].getName
+
+  private def catalog(name: String): TableCatalog = {
+    spark.sessionState.catalogManager.catalog(name).asInstanceOf[TableCatalog]
+  }
+
+  private implicit def stringToIdentifier(value: String): Identifier = {
+    Identifier.of(Array.empty, value)
+  }
+
+  before {
+    spark.conf.set(
+      V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[InMemoryTableSessionCatalog].getName)
+    spark.conf.set(
+      s"spark.sql.catalog.$catalogName", classOf[InMemoryTableCatalog].getName)
+  }
+
+  override def afterEach(): Unit = {
+    super.afterEach()
+    Try(catalog(SESSION_CATALOG_NAME).asInstanceOf[InMemoryTableSessionCatalog].clearTables())
+    catalog(catalogName).listTables(Array.empty).foreach(
+      catalog(catalogName).dropTable(_))
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    spark.conf.unset(s"spark.sql.catalog.$catalogName")
+  }
+
+  private def testCreateAndRead(
+      saveMode: SaveMode,
+      withCatalogOption: Option[String],
+      partitionBy: Seq[String]): Unit = {
+    val df = spark.range(10).withColumn("part", 'id % 5)
+    val dfw = df.write.format(format).mode(saveMode).option("name", "t1")
+    withCatalogOption.foreach(cName => dfw.option("catalog", cName))
+    dfw.partitionBy(partitionBy: _*).save()
+
+    val table = catalog(withCatalogOption.getOrElse(SESSION_CATALOG_NAME)).loadTable("t1")
+    val namespace = withCatalogOption.getOrElse("default")
+    assert(table.name() === s"$namespace.t1", "Table identifier was wrong")
+    assert(table.partitioning().length === partitionBy.length, "Partitioning did not match")
+    if (partitionBy.nonEmpty) {
+      table.partitioning.head match {
+        case IdentityTransform(FieldReference(field)) =>
+          assert(field === Seq(partitionBy.head), "Partitioning column did not match")
+        case otherTransform =>
+          fail(s"Unexpected partitioning ${otherTransform.describe()} received")
+      }
+    }
+    assert(table.partitioning().map(_.references().head.fieldNames().head) === partitionBy,
+      "Partitioning was incorrect")
+    assert(table.schema() === df.schema.asNullable, "Schema did not match")
+
+    checkAnswer(load("t1", withCatalogOption), df.toDF())
+  }
+
+  test(s"save works with ErrorIfExists - no table, no partitioning, session catalog") {
+    testCreateAndRead(SaveMode.ErrorIfExists, None, Nil)
+  }
+
+  test(s"save works with ErrorIfExists - no table, with partitioning, session catalog") {
+    testCreateAndRead(SaveMode.ErrorIfExists, None, Seq("part"))
+  }
+
+  test(s"save works with Ignore - no table, no partitioning, testcat catalog") {
+    testCreateAndRead(SaveMode.Ignore, Some(catalogName), Nil)
+  }
+
+  test(s"save works with Ignore - no table, with partitioning, testcat catalog") {
+    testCreateAndRead(SaveMode.Ignore, Some(catalogName), Seq("part"))
+  }
+
+  test("save fails with ErrorIfExists if table exists - session catalog") {
+    sql(s"create table t1 (id bigint) using $format")
+    val df = spark.range(10)
+    intercept[TableAlreadyExistsException] {
+      val dfw = df.write.format(format).option("name", "t1")
+      dfw.save()
+    }
+  }
+
+  test("save fails with ErrorIfExists if table exists - testcat catalog") {
+    sql(s"create table $catalogName.t1 (id bigint) using $format")
+    val df = spark.range(10)
+    intercept[TableAlreadyExistsException] {
+      val dfw = df.write.format(format).option("name", "t1").option("catalog", catalogName)
+      dfw.save()
+    }
+  }
+
+  test("Ignore mode if table exists - session catalog") {
+    sql(s"create table t1 (id bigint) using $format")
+    val df = spark.range(10).withColumn("part", 'id % 5)
+    val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1")
+    dfw.save()
+
+    val table = catalog(SESSION_CATALOG_NAME).loadTable("t1")
+    assert(table.partitioning().isEmpty, "Partitioning should be empty")
+    assert(table.schema() === new StructType().add("id", LongType), "Schema did not match")
+    assert(load("t1", None).count() === 0)
+  }
+
+  test("Ignore mode if table exists - testcat catalog") {
+    sql(s"create table $catalogName.t1 (id bigint) using $format")
+    val df = spark.range(10).withColumn("part", 'id % 5)
+    val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1")
+    dfw.option("catalog", catalogName).save()
+
+    val table = catalog(catalogName).loadTable("t1")
+    assert(table.partitioning().isEmpty, "Partitioning should be empty")
+    assert(table.schema() === new StructType().add("id", LongType), "Schema did not match")
+    assert(load("t1", Some(catalogName)).count() === 0)
+  }
+
+  test("append and overwrite modes - session catalog") {
+    sql(s"create table t1 (id bigint) using $format")
+    val df = spark.range(10)
+    df.write.format(format).option("name", "t1").mode(SaveMode.Append).save()
+
+    checkAnswer(load("t1", None), df.toDF())
+
+    val df2 = spark.range(10, 20)
+    df2.write.format(format).option("name", "t1").mode(SaveMode.Overwrite).save()
+
+    checkAnswer(load("t1", None), df2.toDF())
+  }
+
+  test("append and overwrite modes - testcat catalog") {
+    sql(s"create table $catalogName.t1 (id bigint) using $format")
+    val df = spark.range(10)
+    df.write.format(format).option("name", "t1").option("catalog", catalogName)
+      .mode(SaveMode.Append).save()
+
+    checkAnswer(load("t1", Some(catalogName)), df.toDF())
+
+    val df2 = spark.range(10, 20)
+    df2.write.format(format).option("name", "t1").option("catalog", catalogName)
+      .mode(SaveMode.Overwrite).save()
+
+    checkAnswer(load("t1", Some(catalogName)), df2.toDF())
+  }
+
+  test("fail on user specified schema when reading - session catalog") {
+    sql(s"create table t1 (id bigint) using $format")
+    val e = intercept[IllegalArgumentException] {
+      spark.read.format(format).option("name", "t1").schema("id bigint").load()
+    }
+    assert(e.getMessage.contains("not support user specified schema"))
+  }
+
+  test("fail on user specified schema when reading - testcat catalog") {
+    sql(s"create table $catalogName.t1 (id bigint) using $format")
+    val e = intercept[IllegalArgumentException] {
+      spark.read.format(format).option("name", "t1").option("catalog", catalogName)
+        .schema("id bigint").load()
+    }
+    assert(e.getMessage.contains("not support user specified schema"))
+  }
+
+  test("DataFrameReader creates v2Relation with identifiers") {
+    sql(s"create table $catalogName.t1 (id bigint) using $format")
+    val df = load("t1", Some(catalogName))
+    checkV2Identifiers(df.logicalPlan)
+  }
+
+  test("DataFrameWriter creates v2Relation with identifiers") {
+    sql(s"create table $catalogName.t1 (id bigint) using $format")
+
+    var plan: LogicalPlan = null
+    val listener = new QueryExecutionListener {
+      override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+        plan = qe.analyzed
+      }
+      override def onFailure(funcName: String, qe: QueryExecution, error: Throwable): Unit = {}
+    }
+
+    spark.listenerManager.register(listener)
+
+    try {
+      // Test append
+      save("t1", SaveMode.Append, Some(catalogName))
+      sparkContext.listenerBus.waitUntilEmpty()
+      assert(plan.isInstanceOf[AppendData])
+      val appendRelation = plan.asInstanceOf[AppendData].table
+      checkV2Identifiers(appendRelation)
+
+      // Test overwrite
+      save("t1", SaveMode.Overwrite, Some(catalogName))
+      sparkContext.listenerBus.waitUntilEmpty()
+      assert(plan.isInstanceOf[OverwriteByExpression])
+      val overwriteRelation = plan.asInstanceOf[OverwriteByExpression].table
+      checkV2Identifiers(overwriteRelation)
+
+      // Test insert
+      spark.range(10).write.format(format).insertInto(s"$catalogName.t1")
+      sparkContext.listenerBus.waitUntilEmpty()
+      assert(plan.isInstanceOf[AppendData])
+      val insertRelation = plan.asInstanceOf[AppendData].table
+      checkV2Identifiers(insertRelation)
+
+      // Test saveAsTable append
+      spark.range(10).write.format(format).mode(SaveMode.Append).saveAsTable(s"$catalogName.t1")
+      sparkContext.listenerBus.waitUntilEmpty()
+      assert(plan.isInstanceOf[AppendData])
+      val saveAsTableRelation = plan.asInstanceOf[AppendData].table
+      checkV2Identifiers(saveAsTableRelation)
+    } finally {
+      spark.listenerManager.unregister(listener)
+    }
+  }
+
+  private def checkV2Identifiers(
+      plan: LogicalPlan,
+      identifier: String = "t1",
+      catalogPlugin: TableCatalog = catalog(catalogName)): Unit = {
+    assert(plan.isInstanceOf[DataSourceV2Relation])
+    val v2 = plan.asInstanceOf[DataSourceV2Relation]
+    assert(v2.identifier.exists(_.name() == identifier))
+    assert(v2.catalog.exists(_ == catalogPlugin))
+  }
+
+  private def load(name: String, catalogOpt: Option[String]): DataFrame = {
+    val dfr = spark.read.format(format).option("name", name)
+    catalogOpt.foreach(cName => dfr.option("catalog", cName))
+    dfr.load()
+  }
+
+  private def save(name: String, mode: SaveMode, catalogOpt: Option[String]): Unit = {
+    val df = spark.range(10).write.format(format).option("name", name)
+    catalogOpt.foreach(cName => df.option("catalog", cName))
+    df.mode(mode).save()
+  }
+}
+
+class CatalogSupportingInMemoryTableProvider
+  extends FakeV2Provider
+  with SupportsCatalogOptions {
+
+  override def extractIdentifier(options: CaseInsensitiveStringMap): Identifier = {
+    val name = options.get("name")
+    assert(name != null, "The name should be provided for this table")
+    Identifier.of(Array.empty, name)
+  }
+
+  override def extractCatalog(options: CaseInsensitiveStringMap): String = {
+    options.get("catalog")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TableCapabilityCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
similarity index 79%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TableCapabilityCheckSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
index ab47836001704..23e4c293cbc28 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TableCapabilityCheckSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.util
 
@@ -24,21 +24,23 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.{AnalysisSuite, NamedRelation}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LeafNode, OverwriteByExpression, OverwritePartitionsDynamic, Union}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, TableCapabilityCheck}
 import org.apache.spark.sql.execution.streaming.{Offset, Source, StreamingRelation, StreamingRelationV2}
 import org.apache.spark.sql.sources.StreamSourceProvider
-import org.apache.spark.sql.sources.v2.TableCapability.{BATCH_WRITE, CONTINUOUS_READ, MICRO_BATCH_READ, OVERWRITE_BY_FILTER, OVERWRITE_DYNAMIC, TRUNCATE}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{LongType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
 
+  private val emptyMap = CaseInsensitiveStringMap.empty
   private def createStreamingRelation(table: Table, v1Relation: Option[StreamingRelation]) = {
     StreamingRelationV2(
-      TestTableProvider,
+      new FakeV2Provider,
       "fake",
       table,
       CaseInsensitiveStringMap.empty(),
@@ -52,9 +54,9 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
 
   test("batch scan: check missing capabilities") {
     val e = intercept[AnalysisException] {
-      TableCapabilityCheck.apply(DataSourceV2Relation.create(
-        CapabilityTable(),
-        CaseInsensitiveStringMap.empty))
+      TableCapabilityCheck.apply(
+        DataSourceV2Relation.create(CapabilityTable(), None, None, emptyMap)
+      )
     }
     assert(e.message.contains("does not support batch scan"))
   }
@@ -87,7 +89,8 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
 
   test("AppendData: check missing capabilities") {
     val plan = AppendData.byName(
-      DataSourceV2Relation.create(CapabilityTable(), CaseInsensitiveStringMap.empty), TestRelation)
+      DataSourceV2Relation.create(CapabilityTable(), None, None, emptyMap),
+      TestRelation)
 
     val exc = intercept[AnalysisException]{
       TableCapabilityCheck.apply(plan)
@@ -97,21 +100,25 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
   }
 
   test("AppendData: check correct capabilities") {
-    val plan = AppendData.byName(
-      DataSourceV2Relation.create(CapabilityTable(BATCH_WRITE), CaseInsensitiveStringMap.empty),
-      TestRelation)
+    Seq(BATCH_WRITE, V1_BATCH_WRITE).foreach { write =>
+      val plan = AppendData.byName(
+        DataSourceV2Relation.create(CapabilityTable(write), None, None, emptyMap),
+        TestRelation)
 
-    TableCapabilityCheck.apply(plan)
+      TableCapabilityCheck.apply(plan)
+    }
   }
 
   test("Truncate: check missing capabilities") {
     Seq(CapabilityTable(),
       CapabilityTable(BATCH_WRITE),
+      CapabilityTable(V1_BATCH_WRITE),
       CapabilityTable(TRUNCATE),
       CapabilityTable(OVERWRITE_BY_FILTER)).foreach { table =>
 
       val plan = OverwriteByExpression.byName(
-        DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
+        DataSourceV2Relation.create(table, None, None, emptyMap),
+        TestRelation,
         Literal(true))
 
       val exc = intercept[AnalysisException]{
@@ -124,10 +131,13 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
 
   test("Truncate: check correct capabilities") {
     Seq(CapabilityTable(BATCH_WRITE, TRUNCATE),
-      CapabilityTable(BATCH_WRITE, OVERWRITE_BY_FILTER)).foreach { table =>
+      CapabilityTable(V1_BATCH_WRITE, TRUNCATE),
+      CapabilityTable(BATCH_WRITE, OVERWRITE_BY_FILTER),
+      CapabilityTable(V1_BATCH_WRITE, OVERWRITE_BY_FILTER)).foreach { table =>
 
       val plan = OverwriteByExpression.byName(
-        DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
+        DataSourceV2Relation.create(table, None, None, emptyMap),
+        TestRelation,
         Literal(true))
 
       TableCapabilityCheck.apply(plan)
@@ -136,11 +146,13 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
 
   test("OverwriteByExpression: check missing capabilities") {
     Seq(CapabilityTable(),
+      CapabilityTable(V1_BATCH_WRITE),
       CapabilityTable(BATCH_WRITE),
       CapabilityTable(OVERWRITE_BY_FILTER)).foreach { table =>
 
       val plan = OverwriteByExpression.byName(
-        DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
+        DataSourceV2Relation.create(table, None, None, emptyMap),
+        TestRelation,
         EqualTo(AttributeReference("x", LongType)(), Literal(5)))
 
       val exc = intercept[AnalysisException]{
@@ -152,12 +164,15 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
   }
 
   test("OverwriteByExpression: check correct capabilities") {
-    val table = CapabilityTable(BATCH_WRITE, OVERWRITE_BY_FILTER)
-    val plan = OverwriteByExpression.byName(
-      DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
-      EqualTo(AttributeReference("x", LongType)(), Literal(5)))
+    Seq(BATCH_WRITE, V1_BATCH_WRITE).foreach { write =>
+      val table = CapabilityTable(write, OVERWRITE_BY_FILTER)
+      val plan = OverwriteByExpression.byName(
+        DataSourceV2Relation.create(table, None, None, emptyMap),
+        TestRelation,
+        EqualTo(AttributeReference("x", LongType)(), Literal(5)))
 
-    TableCapabilityCheck.apply(plan)
+      TableCapabilityCheck.apply(plan)
+    }
   }
 
   test("OverwritePartitionsDynamic: check missing capabilities") {
@@ -166,7 +181,8 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
       CapabilityTable(OVERWRITE_DYNAMIC)).foreach { table =>
 
       val plan = OverwritePartitionsDynamic.byName(
-        DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation)
+        DataSourceV2Relation.create(table, None, None, emptyMap),
+        TestRelation)
 
       val exc = intercept[AnalysisException] {
         TableCapabilityCheck.apply(plan)
@@ -179,7 +195,8 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
   test("OverwritePartitionsDynamic: check correct capabilities") {
     val table = CapabilityTable(BATCH_WRITE, OVERWRITE_DYNAMIC)
     val plan = OverwritePartitionsDynamic.byName(
-      DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation)
+      DataSourceV2Relation.create(table, None, None, emptyMap),
+      TestRelation)
 
     TableCapabilityCheck.apply(plan)
   }
@@ -194,12 +211,6 @@ private case object TestRelation extends LeafNode with NamedRelation {
   override def output: Seq[AttributeReference] = TableCapabilityCheckSuite.schema.toAttributes
 }
 
-private object TestTableProvider extends TableProvider {
-  override def getTable(options: CaseInsensitiveStringMap): Table = {
-    throw new UnsupportedOperationException
-  }
-}
-
 private case class CapabilityTable(_capabilities: TableCapability*) extends Table {
   override def name(): String = "capability_test_table"
   override def schema(): StructType = TableCapabilityCheckSuite.schema
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/utils/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
similarity index 86%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/utils/TestV2SessionCatalogBase.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index 28ce6a94b253a..3f6ac0b7f8d3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/utils/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -15,17 +15,15 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.utils
+package org.apache.spark.sql.connector
 
 import java.util
 import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.catalog.v2.Identifier
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog
-import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -33,7 +31,7 @@ import org.apache.spark.sql.types.StructType
  * for testing DDL as well as write operations (through df.write.saveAsTable, df.write.insertInto
  * and SQL).
  */
-private[v2] trait TestV2SessionCatalogBase[T <: Table] extends V2SessionCatalog {
+private[connector] trait TestV2SessionCatalogBase[T <: Table] extends DelegatingCatalogExtension {
 
   protected val tables: util.Map[Identifier, T] = new ConcurrentHashMap[Identifier, T]()
 
@@ -76,6 +74,11 @@ private[v2] trait TestV2SessionCatalogBase[T <: Table] extends V2SessionCatalog
     t
   }
 
+  override def dropTable(ident: Identifier): Boolean = {
+    tables.remove(fullIdentifier(ident))
+    super.dropTable(ident)
+  }
+
   def clearTables(): Unit = {
     assert(!tables.isEmpty, "Tables were empty, maybe didn't use the session catalog code path?")
     tables.keySet().asScala.foreach(super.dropTable)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
new file mode 100644
index 0000000000000..74f2ca14234d2
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession, SQLContext}
+import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns, V1Scan}
+import org.apache.spark.sql.execution.RowDataSourceScanExec
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
+import org.apache.spark.sql.sources.{BaseRelation, Filter, GreaterThan, TableScan}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+abstract class V1ReadFallbackSuite extends QueryTest with SharedSparkSession {
+  protected def baseTableScan(): DataFrame
+
+  test("full scan") {
+    val df = baseTableScan()
+    val v1Scan = df.queryExecution.executedPlan.collect {
+      case s: RowDataSourceScanExec => s
+    }
+    assert(v1Scan.length == 1)
+    checkAnswer(df, Seq(Row(1, 10), Row(2, 20), Row(3, 30)))
+  }
+
+  test("column pruning") {
+    val df = baseTableScan().select("i")
+    val v1Scan = df.queryExecution.executedPlan.collect {
+      case s: RowDataSourceScanExec => s
+    }
+    assert(v1Scan.length == 1)
+    assert(v1Scan.head.output.map(_.name) == Seq("i"))
+    checkAnswer(df, Seq(Row(1), Row(2), Row(3)))
+  }
+
+  test("filter push down") {
+    val df = baseTableScan().filter("i > 1 and j < 30")
+    val v1Scan = df.queryExecution.executedPlan.collect {
+      case s: RowDataSourceScanExec => s
+    }
+    assert(v1Scan.length == 1)
+    // `j < 30` can't be pushed.
+    assert(v1Scan.head.handledFilters.size == 1)
+    checkAnswer(df, Seq(Row(2, 20)))
+  }
+
+  test("filter push down + column pruning") {
+    val df = baseTableScan().filter("i > 1").select("i")
+    val v1Scan = df.queryExecution.executedPlan.collect {
+      case s: RowDataSourceScanExec => s
+    }
+    assert(v1Scan.length == 1)
+    assert(v1Scan.head.output.map(_.name) == Seq("i"))
+    assert(v1Scan.head.handledFilters.size == 1)
+    checkAnswer(df, Seq(Row(2), Row(3)))
+  }
+}
+
+class V1ReadFallbackWithDataFrameReaderSuite extends V1ReadFallbackSuite {
+  override protected def baseTableScan(): DataFrame = {
+    spark.read.format(classOf[V1ReadFallbackTableProvider].getName).load()
+  }
+}
+
+class V1ReadFallbackWithCatalogSuite extends V1ReadFallbackSuite {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set("spark.sql.catalog.read_fallback", classOf[V1ReadFallbackCatalog].getName)
+    sql("CREATE TABLE read_fallback.tbl(i int, j int) USING foo")
+  }
+
+  override def afterAll(): Unit = {
+    spark.conf.unset("spark.sql.catalog.read_fallback")
+    super.afterAll()
+  }
+
+  override protected def baseTableScan(): DataFrame = {
+    spark.table("read_fallback.tbl")
+  }
+}
+
+class V1ReadFallbackCatalog extends BasicInMemoryTableCatalog {
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    // To simplify the test implementation, only support fixed schema.
+    if (schema != V1ReadFallbackCatalog.schema || partitions.nonEmpty) {
+      throw new UnsupportedOperationException
+    }
+    val table = new TableWithV1ReadFallback(ident.toString)
+    tables.put(ident, table)
+    table
+  }
+}
+
+object V1ReadFallbackCatalog {
+  val schema = new StructType().add("i", "int").add("j", "int")
+}
+
+class V1ReadFallbackTableProvider extends SimpleTableProvider {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
+    new TableWithV1ReadFallback("v1-read-fallback")
+  }
+}
+
+class TableWithV1ReadFallback(override val name: String) extends Table with SupportsRead {
+
+  override def schema(): StructType = V1ReadFallbackCatalog.schema
+
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(TableCapability.BATCH_READ).asJava
+  }
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    new V1ReadFallbackScanBuilder
+  }
+
+  private class V1ReadFallbackScanBuilder extends ScanBuilder
+    with SupportsPushDownRequiredColumns with SupportsPushDownFilters {
+
+    private var requiredSchema: StructType = schema()
+    override def pruneColumns(requiredSchema: StructType): Unit = {
+      this.requiredSchema = requiredSchema
+    }
+
+    private var filters: Array[Filter] = Array.empty
+    override def pushFilters(filters: Array[Filter]): Array[Filter] = {
+      val (supported, unsupported) = filters.partition {
+        case GreaterThan("i", _: Int) => true
+        case _ => false
+      }
+      this.filters = supported
+      unsupported
+    }
+    override def pushedFilters(): Array[Filter] = filters
+
+    override def build(): Scan = new V1ReadFallbackScan(requiredSchema, filters)
+  }
+
+  private class V1ReadFallbackScan(
+      requiredSchema: StructType,
+      filters: Array[Filter]) extends V1Scan {
+    override def readSchema(): StructType = requiredSchema
+
+    override def toV1TableScan[T <: BaseRelation with TableScan](context: SQLContext): T = {
+      new V1TableScan(context, requiredSchema, filters).asInstanceOf[T]
+    }
+  }
+}
+
+class V1TableScan(
+    context: SQLContext,
+    requiredSchema: StructType,
+    filters: Array[Filter]) extends BaseRelation with TableScan {
+  override def sqlContext: SQLContext = context
+  override def schema: StructType = requiredSchema
+  override def buildScan(): RDD[Row] = {
+    val lowerBound = if (filters.isEmpty) {
+      0
+    } else {
+      filters.collect { case GreaterThan("i", v: Int) => v }.max
+    }
+    val data = Seq(Row(1, 10), Row(2, 20), Row(3, 30)).filter(_.getInt(0) > lowerBound)
+    val result = if (requiredSchema.length == 2) {
+      data
+    } else if (requiredSchema.map(_.name) == Seq("i")) {
+      data.map(row => Row(row.getInt(0)))
+    } else if (requiredSchema.map(_.name) == Seq("j")) {
+      data.map(row => Row(row.getInt(1)))
+    } else {
+      throw new UnsupportedOperationException
+    }
+
+    SparkSession.active.sparkContext.makeRDD(result)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
similarity index 54%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V1WriteFallbackSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index 9002775bce211..10ed2048dbf61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.util
 
@@ -24,15 +24,15 @@ import scala.collection.mutable
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession}
-import org.apache.spark.sql.catalog.v2.expressions.{FieldReference, IdentityTransform, Transform}
-import org.apache.spark.sql.connector.InMemoryTable
-import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode}
-import org.apache.spark.sql.sources.{DataSourceRegister, Filter, InsertableRelation}
-import org.apache.spark.sql.sources.v2.utils.TestV2SessionCatalogBase
-import org.apache.spark.sql.sources.v2.writer.{SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession, SQLContext}
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
+import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
+import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with BeforeAndAfter {
@@ -54,7 +54,11 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
   test("append fallback") {
     val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
     df.write.mode("append").option("name", "t1").format(v2Format).save()
+
     checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df)
+    assert(InMemoryV1Provider.tables("t1").schema === df.schema.asNullable)
+    assert(InMemoryV1Provider.tables("t1").partitioning.isEmpty)
+
     df.write.mode("append").option("name", "t1").format(v2Format).save()
     checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df.union(df))
   }
@@ -67,6 +71,59 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
     df2.write.mode("overwrite").option("name", "t1").format(v2Format).save()
     checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df2)
   }
+
+  SaveMode.values().foreach { mode =>
+    test(s"save: new table creations with partitioning for table - mode: $mode") {
+      val format = classOf[InMemoryV1Provider].getName
+      val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+      df.write.mode(mode).option("name", "t1").format(format).partitionBy("a").save()
+
+      checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df)
+      assert(InMemoryV1Provider.tables("t1").schema === df.schema.asNullable)
+      assert(InMemoryV1Provider.tables("t1").partitioning.sameElements(
+        Array(IdentityTransform(FieldReference(Seq("a"))))))
+    }
+  }
+
+  test("save: default mode is ErrorIfExists") {
+    val format = classOf[InMemoryV1Provider].getName
+    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+
+    df.write.option("name", "t1").format(format).partitionBy("a").save()
+    // default is ErrorIfExists, and since a table already exists we throw an exception
+    val e = intercept[AnalysisException] {
+      df.write.option("name", "t1").format(format).partitionBy("a").save()
+    }
+    assert(e.getMessage.contains("already exists"))
+  }
+
+  test("save: Ignore mode") {
+    val format = classOf[InMemoryV1Provider].getName
+    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+
+    df.write.option("name", "t1").format(format).partitionBy("a").save()
+    // no-op
+    df.write.option("name", "t1").format(format).mode("ignore").partitionBy("a").save()
+
+    checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df)
+  }
+
+  test("save: tables can perform schema and partitioning checks if they already exist") {
+    val format = classOf[InMemoryV1Provider].getName
+    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+
+    df.write.option("name", "t1").format(format).partitionBy("a").save()
+    val e2 = intercept[IllegalArgumentException] {
+      df.write.mode("append").option("name", "t1").format(format).partitionBy("b").save()
+    }
+    assert(e2.getMessage.contains("partitioning"))
+
+    val e3 = intercept[IllegalArgumentException] {
+      Seq((1, "x")).toDF("c", "d").write.mode("append").option("name", "t1").format(format)
+        .save()
+    }
+    assert(e3.getMessage.contains("schema"))
+  }
 }
 
 class V1WriteFallbackSessionCatalogSuite
@@ -116,26 +173,85 @@ private object InMemoryV1Provider {
   }
 }
 
-class InMemoryV1Provider extends TableProvider with DataSourceRegister {
+class InMemoryV1Provider
+  extends SimpleTableProvider
+  with DataSourceRegister
+  with CreatableRelationProvider {
   override def getTable(options: CaseInsensitiveStringMap): Table = {
-    InMemoryV1Provider.tables.getOrElseUpdate(options.get("name"), {
+
+    InMemoryV1Provider.tables.getOrElse(options.get("name"), {
       new InMemoryTableWithV1Fallback(
         "InMemoryTableWithV1Fallback",
-        new StructType().add("a", IntegerType).add("b", StringType),
-        Array(IdentityTransform(FieldReference(Seq("a")))),
+        new StructType(),
+        Array.empty,
         options.asCaseSensitiveMap()
       )
     })
   }
 
   override def shortName(): String = "in-memory"
+
+  override def createRelation(
+      sqlContext: SQLContext,
+      mode: SaveMode,
+      parameters: Map[String, String],
+      data: DataFrame): BaseRelation = {
+    val _sqlContext = sqlContext
+
+    val partitioning = parameters.get(DataSourceUtils.PARTITIONING_COLUMNS_KEY).map { value =>
+      DataSourceUtils.decodePartitioningColumns(value).map { partitioningColumn =>
+        IdentityTransform(FieldReference(partitioningColumn))
+      }
+    }.getOrElse(Nil)
+
+    val tableName = parameters("name")
+    val tableOpt = InMemoryV1Provider.tables.get(tableName)
+    val table = tableOpt.getOrElse(new InMemoryTableWithV1Fallback(
+      "InMemoryTableWithV1Fallback",
+      data.schema.asNullable,
+      partitioning.toArray,
+      Map.empty[String, String].asJava
+    ))
+    if (tableOpt.isEmpty) {
+      InMemoryV1Provider.tables.put(tableName, table)
+    } else {
+      if (data.schema.asNullable != table.schema) {
+        throw new IllegalArgumentException("Wrong schema provided")
+      }
+      if (!partitioning.sameElements(table.partitioning)) {
+        throw new IllegalArgumentException("Wrong partitioning provided")
+      }
+    }
+
+    def getRelation: BaseRelation = new BaseRelation {
+      override def sqlContext: SQLContext = _sqlContext
+      override def schema: StructType = table.schema
+    }
+
+    if (mode == SaveMode.ErrorIfExists && tableOpt.isDefined) {
+      throw new AnalysisException("Table already exists")
+    } else if (mode == SaveMode.Ignore && tableOpt.isDefined) {
+      // do nothing
+      return getRelation
+    }
+    val writer = table.newWriteBuilder(
+      LogicalWriteInfoImpl(
+        "", StructType(Seq.empty), new CaseInsensitiveStringMap(parameters.asJava)))
+    if (mode == SaveMode.Overwrite) {
+      writer.asInstanceOf[SupportsTruncate].truncate()
+    }
+    writer.asInstanceOf[V1WriteBuilder].buildForV1Write().insert(data, overwrite = false)
+    getRelation
+  }
 }
 
 class InMemoryTableWithV1Fallback(
     override val name: String,
     override val schema: StructType,
     override val partitioning: Array[Transform],
-    override val properties: util.Map[String, String]) extends Table with SupportsWrite {
+    override val properties: util.Map[String, String])
+  extends Table
+  with SupportsWrite {
 
   partitioning.foreach { t =>
     if (!t.isInstanceOf[IdentityTransform]) {
@@ -144,7 +260,6 @@ class InMemoryTableWithV1Fallback(
   }
 
   override def capabilities: util.Set[TableCapability] = Set(
-    TableCapability.BATCH_WRITE,
     TableCapability.V1_BATCH_WRITE,
     TableCapability.OVERWRITE_BY_FILTER,
     TableCapability.TRUNCATE).asJava
@@ -155,8 +270,8 @@ class InMemoryTableWithV1Fallback(
 
   def getData: Seq[Row] = dataMap.values.flatten.toSeq
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
-    new FallbackWriteBuilder(options)
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    new FallbackWriteBuilder(info.options)
   }
 
   private class FallbackWriteBuilder(options: CaseInsensitiveStringMap)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
new file mode 100644
index 0000000000000..289f9dc427795
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, TestRelation2}
+import org.apache.spark.sql.catalyst.analysis.CreateTablePartitioningValidationSuite
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, CreateTableAsSelect, LogicalPlan, ReplaceTableAsSelect}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.{Identifier, TableChange}
+import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
+import org.apache.spark.sql.connector.expressions.Expressions
+import org.apache.spark.sql.execution.datasources.PreprocessTableCreation
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{LongType, StringType}
+
+class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTest {
+  import CreateTablePartitioningValidationSuite._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  override protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = {
+    Seq(PreprocessTableCreation(spark))
+  }
+
+  test("CreateTableAsSelect: using top level field for partitioning") {
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        Seq("ID", "iD").foreach { ref =>
+          val plan = CreateTableAsSelect(
+            catalog,
+            Identifier.of(Array(), "table_name"),
+            Expressions.identity(ref) :: Nil,
+            TestRelation2,
+            Map.empty,
+            Map.empty,
+            ignoreIfExists = false)
+
+          if (caseSensitive) {
+            assertAnalysisError(plan, Seq("Couldn't find column", ref), caseSensitive)
+          } else {
+            assertAnalysisSuccess(plan, caseSensitive)
+          }
+        }
+      }
+    }
+  }
+
+  test("CreateTableAsSelect: using nested column for partitioning") {
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        Seq("POINT.X", "point.X", "poInt.x", "poInt.X").foreach { ref =>
+          val plan = CreateTableAsSelect(
+            catalog,
+            Identifier.of(Array(), "table_name"),
+            Expressions.bucket(4, ref) :: Nil,
+            TestRelation2,
+            Map.empty,
+            Map.empty,
+            ignoreIfExists = false)
+
+          if (caseSensitive) {
+            val field = ref.split("\\.")
+            assertAnalysisError(plan, Seq("Couldn't find column", field.head), caseSensitive)
+          } else {
+            assertAnalysisSuccess(plan, caseSensitive)
+          }
+        }
+      }
+    }
+  }
+
+  test("ReplaceTableAsSelect: using top level field for partitioning") {
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        Seq("ID", "iD").foreach { ref =>
+          val plan = ReplaceTableAsSelect(
+            catalog,
+            Identifier.of(Array(), "table_name"),
+            Expressions.identity(ref) :: Nil,
+            TestRelation2,
+            Map.empty,
+            Map.empty,
+            orCreate = true)
+
+          if (caseSensitive) {
+            assertAnalysisError(plan, Seq("Couldn't find column", ref), caseSensitive)
+          } else {
+            assertAnalysisSuccess(plan, caseSensitive)
+          }
+        }
+      }
+    }
+  }
+
+  test("ReplaceTableAsSelect: using nested column for partitioning") {
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        Seq("POINT.X", "point.X", "poInt.x", "poInt.X").foreach { ref =>
+          val plan = ReplaceTableAsSelect(
+            catalog,
+            Identifier.of(Array(), "table_name"),
+            Expressions.bucket(4, ref) :: Nil,
+            TestRelation2,
+            Map.empty,
+            Map.empty,
+            orCreate = true)
+
+          if (caseSensitive) {
+            val field = ref.split("\\.")
+            assertAnalysisError(plan, Seq("Couldn't find column", field.head), caseSensitive)
+          } else {
+            assertAnalysisSuccess(plan, caseSensitive)
+          }
+        }
+      }
+    }
+  }
+
+  test("AlterTable: add column - nested") {
+    Seq("POINT.Z", "poInt.z", "poInt.Z").foreach { ref =>
+      val field = ref.split("\\.")
+      alterTableTest(
+        TableChange.addColumn(field, LongType),
+        Seq("add", field.head)
+      )
+    }
+  }
+
+  test("AlterTable: add column resolution - positional") {
+    Seq("ID", "iD").foreach { ref =>
+      alterTableTest(
+        TableChange.addColumn(
+          Array("f"), LongType, true, null, ColumnPosition.after(ref)),
+        Seq("reference column", ref)
+      )
+    }
+  }
+
+  test("AlterTable: add column resolution - nested positional") {
+    Seq("X", "Y").foreach { ref =>
+      alterTableTest(
+        TableChange.addColumn(
+          Array("point", "z"), LongType, true, null, ColumnPosition.after(ref)),
+        Seq("reference column", ref)
+      )
+    }
+  }
+
+  test("AlterTable: drop column resolution") {
+    Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
+      alterTableTest(
+        TableChange.deleteColumn(ref),
+        Seq("Cannot delete missing field", ref.quoted)
+      )
+    }
+  }
+
+  test("AlterTable: rename column resolution") {
+    Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
+      alterTableTest(
+        TableChange.renameColumn(ref, "newName"),
+        Seq("Cannot rename missing field", ref.quoted)
+      )
+    }
+  }
+
+  test("AlterTable: drop column nullability resolution") {
+    Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
+      alterTableTest(
+        TableChange.updateColumnNullability(ref, true),
+        Seq("Cannot update missing field", ref.quoted)
+      )
+    }
+  }
+
+  test("AlterTable: change column type resolution") {
+    Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
+      alterTableTest(
+        TableChange.updateColumnType(ref, StringType),
+        Seq("Cannot update missing field", ref.quoted)
+      )
+    }
+  }
+
+  test("AlterTable: change column comment resolution") {
+    Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
+      alterTableTest(
+        TableChange.updateColumnComment(ref, "Here's a comment for ya"),
+        Seq("Cannot update missing field", ref.quoted)
+      )
+    }
+  }
+
+  private def alterTableTest(change: TableChange, error: Seq[String]): Unit = {
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val plan = AlterTable(
+          catalog,
+          Identifier.of(Array(), "table_name"),
+          TestRelation2,
+          Seq(change)
+        )
+
+        if (caseSensitive) {
+          assertAnalysisError(plan, error, caseSensitive)
+        } else {
+          assertAnalysisSuccess(plan, caseSensitive)
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala
new file mode 100644
index 0000000000000..a33b9fad7ff4f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import java.util.Properties
+
+import org.apache.spark.{SparkFunSuite, TaskContext, TaskContextImpl}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper, If, SortArray, SparkPartitionID, SpecificInternalRow}
+import org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet
+import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{LongType, StringType, StructType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Test suite for [[AggregatingAccumulator]].
+ */
+class AggregatingAccumulatorSuite
+  extends SparkFunSuite
+  with SharedSparkSession
+  with ExpressionEvalHelper {
+  private val a = 'a.long
+  private val b = 'b.string
+  private val c = 'c.double
+  private val inputAttributes = Seq(a, b, c)
+  private def str(s: String): UTF8String = UTF8String.fromString(s)
+
+  test("empty aggregation") {
+    val acc1 = AggregatingAccumulator(
+      Seq(sum(a) + 1L as "sum_a", max(b) as "max_b", approxCountDistinct(c) as "acntd_c"),
+      inputAttributes)
+    val expectedSchema = new StructType()
+      .add("sum_a", "long")
+      .add("max_b", "string")
+      .add("acntd_c", "long", nullable = false)
+    assert(acc1.schema === expectedSchema)
+
+    val accEmpty = acc1.copy()
+    val acc2 = acc1.copy()
+
+    // Merge empty
+    acc1.merge(accEmpty)
+    assert(acc1.isZero)
+
+    // No updates
+    assert(acc1.isZero)
+    checkResult(acc1.value, InternalRow(null, null, 0), expectedSchema, false)
+    assert(acc1.isZero)
+
+    // A few updates
+    acc1.add(InternalRow(4L, str("foo"), 4.9d))
+    acc1.add(InternalRow(98L, str("bar"), -323.9d))
+    acc1.add(InternalRow(-30L, str("baz"), 4129.8d))
+    assert(!acc1.isZero)
+    checkResult(acc1.value, InternalRow(73L, str("baz"), 3L), expectedSchema, false)
+
+    // Idempotency of result
+    checkResult(acc1.value, InternalRow(73L, str("baz"), 3L), expectedSchema, false)
+
+    // A few updates to the copied accumulator using an updater
+    val updater = acc2.copyAndReset()
+    updater.add(InternalRow(-2L, str("qwerty"), -6773.9d))
+    updater.add(InternalRow(-35L, str("zzz-top"), -323.9d))
+    assert(acc2.isZero)
+    acc2.setState(updater)
+    checkResult(acc2.value, InternalRow(-36L, str("zzz-top"), 2L), expectedSchema, false)
+
+    // Merge accumulators
+    acc1.merge(acc2)
+    acc1.merge(acc2)
+    acc1.merge(accEmpty)
+    acc1.merge(accEmpty)
+    checkResult(acc1.value, InternalRow(1L, str("zzz-top"), 5L), expectedSchema, false)
+
+    // Reset
+    acc1.reset()
+    assert(acc1.isZero)
+  }
+
+  test("non-deterministic expressions") {
+    val acc_driver = AggregatingAccumulator(
+      Seq(
+        min(SparkPartitionID()) as "min_pid",
+        max(SparkPartitionID()) as "max_pid",
+        SparkPartitionID()),
+      Nil)
+    checkResult(acc_driver.value, InternalRow(null, null, 0), acc_driver.schema, false)
+
+    def inPartition(id: Int)(f: => Unit): Unit = {
+      val ctx = new TaskContextImpl(0, 0, 1, 0, 0, null, new Properties, null)
+      TaskContext.setTaskContext(ctx)
+      try {
+        f
+      } finally {
+        TaskContext.unset()
+      }
+    }
+
+    val acc1 = acc_driver.copy()
+    inPartition(3) {
+      acc1.add(InternalRow.empty)
+    }
+    val acc2 = acc_driver.copy()
+    inPartition(42) {
+      acc2.add(InternalRow.empty)
+    }
+    val acc3 = acc_driver.copy()
+    inPartition(96) {
+      acc3.add(InternalRow.empty)
+    }
+
+    acc_driver.merge(acc1)
+    acc_driver.merge(acc2)
+    acc_driver.merge(acc3)
+    assert(!acc_driver.isZero)
+    checkResult(acc_driver.value, InternalRow(3, 96, 0), acc_driver.schema, false)
+  }
+
+  test("collect agg metrics on job") {
+    val acc = AggregatingAccumulator(
+      Seq(
+        avg(a) + 1.0d as "avg_a",
+        sum(a + 10L) as "sum_a",
+        min(b) as "min_b",
+        max(b) as "max_b",
+        approxCountDistinct(b) as "acntd_b",
+        SortArray(CollectSet(If(a < 1000L, a % 3L, a % 6L)).toAggregateExpression(), true)
+          as "item_set",
+        min(SparkPartitionID()) as "min_pid",
+        max(SparkPartitionID()) as "max_pid",
+        SparkPartitionID()),
+      Seq(a, b))
+    sparkContext.register(acc)
+    def consume(ids: Iterator[Long]): Unit = {
+      val row = new SpecificInternalRow(Seq(LongType, StringType))
+      ids.foreach { id =>
+        // Create the new row values.
+        row.setLong(0, id)
+        row.update(1, UTF8String.fromString(f"val_$id%06d"))
+
+        // Update the accumulator
+        acc.add(row)
+      }
+    }
+
+    // Run job 1
+    spark.sparkContext
+      .range(0, 1000, 1, 8)
+      .foreachPartition(consume)
+    assert(checkResult(
+      acc.value,
+      InternalRow(
+        500.5d,
+        509500L,
+        str("val_000000"),
+        str("val_000999"),
+        1057L,
+        new GenericArrayData(Seq(0L, 1L, 2L)),
+        0,
+        7,
+        0),
+      acc.schema,
+      false))
+
+    // Run job 2
+    spark.sparkContext
+      .range(1000, 1200, 1, 8)
+      .foreachPartition(consume)
+    assert(checkResult(
+      acc.value,
+      InternalRow(
+        600.5d,
+        731400L,
+        str("val_000000"),
+        str("val_001199"),
+        1280L,
+        new GenericArrayData(Seq(0L, 1L, 2L, 3L, 4L, 5L)),
+        0,
+        7,
+        0),
+      acc.schema,
+      false))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
index 43e29c2d50786..7d6306b65ff47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
@@ -21,13 +21,16 @@ import java.util.concurrent.{CountDownLatch, TimeUnit}
 
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler._
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.exchange.BroadcastExchangeExec
 import org.apache.spark.sql.execution.joins.HashedRelation
 import org.apache.spark.sql.functions.broadcast
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-class BroadcastExchangeSuite extends SparkPlanTest with SharedSparkSession {
+class BroadcastExchangeSuite extends SparkPlanTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
 
   import testImplicits._
 
@@ -53,8 +56,8 @@ class BroadcastExchangeSuite extends SparkPlanTest with SharedSparkSession {
       }).where("id = value")
 
       // get the exchange physical plan
-      val hashExchange = df.queryExecution.executedPlan
-        .collect { case p: BroadcastExchangeExec => p }.head
+      val hashExchange = collect(
+        df.queryExecution.executedPlan) { case p: BroadcastExchangeExec => p }.head
 
       // materialize the future and wait for the job being scheduled
       hashExchange.prepare()
@@ -84,8 +87,8 @@ class BroadcastExchangeSuite extends SparkPlanTest with SharedSparkSession {
     withSQLConf(SQLConf.BROADCAST_TIMEOUT.key -> "-1") {
       val df = spark.range(1).toDF()
       val joinDF = df.join(broadcast(df), "id")
-      val broadcastExchangeExec = joinDF.queryExecution.executedPlan
-        .collect { case p: BroadcastExchangeExec => p }
+      val broadcastExchangeExec = collect(
+        joinDF.queryExecution.executedPlan) { case p: BroadcastExchangeExec => p }
       assert(broadcastExchangeExec.size == 1, "one and only BroadcastExchangeExec")
       assert(joinDF.collect().length == 1)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
index f1a3092a193f4..f1411b263c77b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
@@ -16,9 +16,12 @@
  */
 package org.apache.spark.sql.execution
 
+import scala.collection.mutable
+
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkConf
+import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
@@ -150,15 +153,50 @@ class DataSourceV2ScanExecRedactionSuite extends DataSourceScanRedactionTest {
   }
 
   test("FileScan description") {
-    withTempPath { path =>
-      val dir = path.getCanonicalPath
-      spark.range(0, 10).write.orc(dir)
-      val df = spark.read.orc(dir)
+    Seq("json", "orc", "parquet").foreach { format =>
+      withTempPath { path =>
+        val dir = path.getCanonicalPath
+        spark.range(0, 10).write.format(format).save(dir)
+        val df = spark.read.format(format).load(dir)
+
+        withClue(s"Source '$format':") {
+          assert(isIncluded(df.queryExecution, "ReadSchema"))
+          assert(isIncluded(df.queryExecution, "BatchScan"))
+          if (Seq("orc", "parquet").contains(format)) {
+            assert(isIncluded(df.queryExecution, "PushedFilters"))
+          }
+          assert(isIncluded(df.queryExecution, "Location"))
+        }
+      }
+    }
+  }
 
-      assert(isIncluded(df.queryExecution, "ReadSchema"))
-      assert(isIncluded(df.queryExecution, "BatchScan"))
-      assert(isIncluded(df.queryExecution, "PushedFilters"))
-      assert(isIncluded(df.queryExecution, "Location"))
+  test("SPARK-30362: test input metrics for DSV2") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      Seq("json", "orc", "parquet").foreach { format =>
+        withTempPath { path =>
+          val dir = path.getCanonicalPath
+          spark.range(0, 10).write.format(format).save(dir)
+          val df = spark.read.format(format).load(dir)
+          val bytesReads = new mutable.ArrayBuffer[Long]()
+          val recordsRead = new mutable.ArrayBuffer[Long]()
+          val bytesReadListener = new SparkListener() {
+            override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
+              bytesReads += taskEnd.taskMetrics.inputMetrics.bytesRead
+              recordsRead += taskEnd.taskMetrics.inputMetrics.recordsRead
+            }
+          }
+          sparkContext.addSparkListener(bytesReadListener)
+          try {
+            df.collect()
+            sparkContext.listenerBus.waitUntilEmpty()
+            assert(bytesReads.sum > 0)
+            assert(recordsRead.sum == 10)
+          } finally {
+            sparkContext.removeSparkListener(bytesReadListener)
+          }
+        }
+      }
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala
new file mode 100644
index 0000000000000..1e90754ad7721
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DeprecatedWholeStageCodegenSuite.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.aggregate.HashAggregateExec
+import org.apache.spark.sql.expressions.scalalang.typed
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+@deprecated("This test suite will be removed.", "3.0.0")
+class DeprecatedWholeStageCodegenSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
+
+  test("simple typed UDAF should be included in WholeStageCodegen") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      // With enable AQE, the WholeStageCodegenExec rule is applied when running QueryStageExec.
+      import testImplicits._
+
+      val ds = Seq(("a", 10), ("b", 1), ("b", 2), ("c", 1)).toDS()
+        .groupByKey(_._1).agg(typed.sum(_._2))
+
+      val plan = ds.queryExecution.executedPlan
+      assert(find(plan)(p =>
+        p.isInstanceOf[WholeStageCodegenExec] &&
+          p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]).isDefined)
+      assert(ds.collect() === Array(("a", 10.0), ("b", 3.0), ("c", 1.0)))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
index 104cf4c58d617..bb59b12e6f350 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
@@ -17,25 +17,35 @@
 
 package org.apache.spark.sql.execution
 
-import java.sql.{Date, Timestamp}
-
 import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession}
 
 class HiveResultSuite extends SharedSparkSession {
   import testImplicits._
 
   test("date formatting in hive result") {
-    val date = "2018-12-28"
-    val executedPlan = Seq(Date.valueOf(date)).toDS().queryExecution.executedPlan
-    val result = HiveResult.hiveResultString(executedPlan)
-    assert(result.head == date)
+    val dates = Seq("2018-12-28", "1582-10-13", "1582-10-14", "1582-10-15")
+    val df = dates.toDF("a").selectExpr("cast(a as date) as b")
+    val executedPlan1 = df.queryExecution.executedPlan
+    val result = HiveResult.hiveResultString(executedPlan1)
+    assert(result == dates)
+    val executedPlan2 = df.selectExpr("array(b)").queryExecution.executedPlan
+    val result2 = HiveResult.hiveResultString(executedPlan2)
+    assert(result2 == dates.map(x => s"[$x]"))
   }
 
   test("timestamp formatting in hive result") {
-    val timestamp = "2018-12-28 01:02:03"
-    val executedPlan = Seq(Timestamp.valueOf(timestamp)).toDS().queryExecution.executedPlan
-    val result = HiveResult.hiveResultString(executedPlan)
-    assert(result.head == timestamp)
+    val timestamps = Seq(
+      "2018-12-28 01:02:03",
+      "1582-10-13 01:02:03",
+      "1582-10-14 01:02:03",
+      "1582-10-15 01:02:03")
+    val df = timestamps.toDF("a").selectExpr("cast(a as timestamp) as b")
+    val executedPlan1 = df.queryExecution.executedPlan
+    val result = HiveResult.hiveResultString(executedPlan1)
+    assert(result == timestamps)
+    val executedPlan2 = df.selectExpr("array(b)").queryExecution.executedPlan
+    val result2 = HiveResult.hiveResultString(executedPlan2)
+    assert(result2 == timestamps.map(x => s"[$x]"))
   }
 
   test("toHiveString correctly handles UDTs") {
@@ -43,4 +53,19 @@ class HiveResultSuite extends SharedSparkSession {
     val tpe = new ExamplePointUDT()
     assert(HiveResult.toHiveString((point, tpe)) === "(50.0, 50.0)")
   }
+
+  test("decimal formatting in hive result") {
+    val df = Seq(new java.math.BigDecimal("1")).toDS()
+    Seq(2, 6, 18).foreach { scala =>
+      val executedPlan =
+        df.selectExpr(s"CAST(value AS decimal(38, $scala))").queryExecution.executedPlan
+      val result = HiveResult.hiveResultString(executedPlan)
+      assert(result.head.split("\\.").last.length === scala)
+    }
+
+    val executedPlan = Seq(java.math.BigDecimal.ZERO).toDS()
+      .selectExpr(s"CAST(value AS decimal(38, 8))").queryExecution.executedPlan
+    val result = HiveResult.hiveResultString(executedPlan)
+    assert(result.head === "0.00000000")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
index aa83b9b11dcfc..311f84c07a955 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
@@ -29,11 +29,26 @@ import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.execution.window.WindowExec
+import org.apache.spark.sql.internal.SQLConf
 
 class LogicalPlanTagInSparkPlanSuite extends TPCDSQuerySuite {
 
-  override protected def checkGeneratedCode(plan: SparkPlan): Unit = {
-    super.checkGeneratedCode(plan)
+  var originalValue: String = _
+  // when enable AQE, the 'AdaptiveSparkPlanExec' node does not have a logical plan link
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalValue = spark.conf.get(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key)
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
+  }
+
+  override def afterAll(): Unit = {
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, originalValue)
+    super.afterAll()
+  }
+
+  override protected def checkGeneratedCode(
+      plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
+    super.checkGeneratedCode(plan, checkMethodCodeSize)
     checkLogicalPlanTag(plan)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 90ce6765013b4..0c5e2e3c7d1d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -24,6 +24,8 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range, Repartition, Sort, Union}
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReusedExchangeExec, ReuseExchange, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
@@ -32,7 +34,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
-class PlannerSuite extends SharedSparkSession {
+class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   setupTestData()
@@ -254,29 +256,31 @@ class PlannerSuite extends SharedSparkSession {
       // Disable broadcast join
       withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
         {
-          val numExchanges = sql(
+          val plan = sql(
             """
               |SELECT *
               |FROM
               |  normal JOIN small ON (normal.key = small.key)
               |  JOIN tiny ON (small.key = tiny.key)
             """.stripMargin
-          ).queryExecution.executedPlan.collect {
+          ).queryExecution.executedPlan
+          val numExchanges = collect(plan) {
             case exchange: ShuffleExchangeExec => exchange
           }.length
           assert(numExchanges === 5)
         }
 
         {
-          // This second query joins on different keys:
-          val numExchanges = sql(
+          val plan = sql(
             """
               |SELECT *
               |FROM
               |  normal JOIN small ON (normal.key = small.key)
               |  JOIN tiny ON (normal.key = tiny.key)
             """.stripMargin
-          ).queryExecution.executedPlan.collect {
+          ).queryExecution.executedPlan
+          // This second query joins on different keys:
+          val numExchanges = collect(plan) {
             case exchange: ShuffleExchangeExec => exchange
           }.length
           assert(numExchanges === 5)
@@ -689,7 +693,7 @@ class PlannerSuite extends SharedSparkSession {
 
     val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(smjExec)
     outputPlan match {
-      case SortMergeJoinExec(leftKeys, rightKeys, _, _, _, _) =>
+      case SortMergeJoinExec(leftKeys, rightKeys, _, _, _, _, _) =>
         assert(leftKeys == Seq(exprA, exprA))
         assert(rightKeys == Seq(exprB, exprC))
       case _ => fail()
@@ -713,7 +717,8 @@ class PlannerSuite extends SharedSparkSession {
              SortExec(_, _,
                ShuffleExchangeExec(HashPartitioning(leftPartitioningExpressions, _), _, _), _),
              SortExec(_, _,
-               ShuffleExchangeExec(HashPartitioning(rightPartitioningExpressions, _), _, _), _)) =>
+               ShuffleExchangeExec(HashPartitioning(rightPartitioningExpressions, _),
+               _, _), _), _) =>
         assert(leftKeys === smjExec.leftKeys)
         assert(rightKeys === smjExec.rightKeys)
         assert(leftKeys === leftPartitioningExpressions)
@@ -762,7 +767,7 @@ class PlannerSuite extends SharedSparkSession {
     def checkReusedExchangeOutputPartitioningRewrite(
         df: DataFrame,
         expectedPartitioningClass: Class[_]): Unit = {
-      val reusedExchange = df.queryExecution.executedPlan.collect {
+      val reusedExchange = collect(df.queryExecution.executedPlan) {
         case r: ReusedExchangeExec => r
       }
       checkOutputPartitioningRewrite(reusedExchange, expectedPartitioningClass)
@@ -771,31 +776,34 @@ class PlannerSuite extends SharedSparkSession {
     def checkInMemoryTableScanOutputPartitioningRewrite(
         df: DataFrame,
         expectedPartitioningClass: Class[_]): Unit = {
-      val inMemoryScan = df.queryExecution.executedPlan.collect {
+      val inMemoryScan = collect(df.queryExecution.executedPlan) {
         case m: InMemoryTableScanExec => m
       }
       checkOutputPartitioningRewrite(inMemoryScan, expectedPartitioningClass)
     }
+    // when enable AQE, the reusedExchange is inserted when executed.
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      // ReusedExchange is HashPartitioning
+      val df1 = Seq(1 -> "a").toDF("i", "j").repartition($"i")
+      val df2 = Seq(1 -> "a").toDF("i", "j").repartition($"i")
+      checkReusedExchangeOutputPartitioningRewrite(df1.union(df2), classOf[HashPartitioning])
+
+      // ReusedExchange is RangePartitioning
+      val df3 = Seq(1 -> "a").toDF("i", "j").orderBy($"i")
+      val df4 = Seq(1 -> "a").toDF("i", "j").orderBy($"i")
+      checkReusedExchangeOutputPartitioningRewrite(df3.union(df4), classOf[RangePartitioning])
+
+      // InMemoryTableScan is HashPartitioning
+      Seq(1 -> "a").toDF("i", "j").repartition($"i").persist()
+      checkInMemoryTableScanOutputPartitioningRewrite(
+        Seq(1 -> "a").toDF("i", "j").repartition($"i"), classOf[HashPartitioning])
 
-    // ReusedExchange is HashPartitioning
-    val df1 = Seq(1 -> "a").toDF("i", "j").repartition($"i")
-    val df2 = Seq(1 -> "a").toDF("i", "j").repartition($"i")
-    checkReusedExchangeOutputPartitioningRewrite(df1.union(df2), classOf[HashPartitioning])
-
-    // ReusedExchange is RangePartitioning
-    val df3 = Seq(1 -> "a").toDF("i", "j").orderBy($"i")
-    val df4 = Seq(1 -> "a").toDF("i", "j").orderBy($"i")
-    checkReusedExchangeOutputPartitioningRewrite(df3.union(df4), classOf[RangePartitioning])
-
-    // InMemoryTableScan is HashPartitioning
-    Seq(1 -> "a").toDF("i", "j").repartition($"i").persist()
-    checkInMemoryTableScanOutputPartitioningRewrite(
-      Seq(1 -> "a").toDF("i", "j").repartition($"i"), classOf[HashPartitioning])
-
-    // InMemoryTableScan is RangePartitioning
-    spark.range(1, 100, 1, 10).toDF().persist()
-    checkInMemoryTableScanOutputPartitioningRewrite(
-      spark.range(1, 100, 1, 10).toDF(), classOf[RangePartitioning])
+      // InMemoryTableScan is RangePartitioning
+      spark.range(1, 100, 1, 10).toDF().persist()
+      checkInMemoryTableScanOutputPartitioningRewrite(
+        spark.range(1, 100, 1, 10).toDF(), classOf[RangePartitioning])
+    }
 
     // InMemoryTableScan is PartitioningCollection
     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
@@ -856,6 +864,117 @@ class PlannerSuite extends SharedSparkSession {
         StructField("f2", StringType, nullable = true),
         StructField("f3", StringType, nullable = false))))
   }
+
+  test("Do not analyze subqueries twice") {
+    // Analyzing the subquery twice will result in stacked
+    // CheckOverflow & PromotePrecision expressions.
+    val df = sql(
+      """
+        |SELECT id,
+        |       (SELECT 1.3000000 * AVG(CAST(id AS DECIMAL(10, 3))) FROM range(13)) AS ref
+        |FROM   range(5)
+        |""".stripMargin)
+
+    val Seq(subquery) = stripAQEPlan(df.queryExecution.executedPlan).subqueriesAll
+    subquery.foreach { node =>
+      node.expressions.foreach { expression =>
+        expression.foreach {
+          case PromotePrecision(_: PromotePrecision) =>
+            fail(s"$expression contains stacked PromotePrecision expressions.")
+          case CheckOverflow(_: CheckOverflow, _, _) =>
+            fail(s"$expression contains stacked CheckOverflow expressions.")
+          case _ => // Ok
+        }
+      }
+    }
+  }
+
+  test("aliases in the project should not introduce extra shuffle") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("df1", "df2") {
+        spark.range(10).selectExpr("id AS key", "0").repartition($"key").createTempView("df1")
+        spark.range(20).selectExpr("id AS key", "0").repartition($"key").createTempView("df2")
+        val planned = sql(
+          """
+            |SELECT * FROM
+            |  (SELECT key AS k from df1) t1
+            |INNER JOIN
+            |  (SELECT key AS k from df2) t2
+            |ON t1.k = t2.k
+          """.stripMargin).queryExecution.executedPlan
+        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        assert(exchanges.size == 2)
+      }
+    }
+  }
+
+  test("aliases to expressions should not be replaced") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("df1", "df2") {
+        spark.range(10).selectExpr("id AS key", "0").repartition($"key").createTempView("df1")
+        spark.range(20).selectExpr("id AS key", "0").repartition($"key").createTempView("df2")
+        val planned = sql(
+          """
+            |SELECT * FROM
+            |  (SELECT key + 1 AS k1 from df1) t1
+            |INNER JOIN
+            |  (SELECT key + 1 AS k2 from df2) t2
+            |ON t1.k1 = t2.k2
+            |""".stripMargin).queryExecution.executedPlan
+        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+
+        // Make sure aliases to an expression (key + 1) are not replaced.
+        Seq("k1", "k2").foreach { alias =>
+          assert(exchanges.exists(_.outputPartitioning match {
+            case HashPartitioning(Seq(a: AttributeReference), _) => a.name == alias
+            case _ => false
+          }))
+        }
+      }
+    }
+  }
+
+  test("aliases in the aggregate expressions should not introduce extra shuffle") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val t1 = spark.range(10).selectExpr("floor(id/4) as k1")
+      val t2 = spark.range(20).selectExpr("floor(id/4) as k2")
+
+      val agg1 = t1.groupBy("k1").agg(count(lit("1")).as("cnt1"))
+      val agg2 = t2.groupBy("k2").agg(count(lit("1")).as("cnt2")).withColumnRenamed("k2", "k3")
+
+      val planned = agg1.join(agg2, $"k1" === $"k3").queryExecution.executedPlan
+
+      assert(planned.collect { case h: HashAggregateExec => h }.nonEmpty)
+
+      val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+      assert(exchanges.size == 2)
+    }
+  }
+
+  test("aliases in the object hash/sort aggregate expressions should not introduce extra shuffle") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      Seq(true, false).foreach { useObjectHashAgg =>
+        withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> useObjectHashAgg.toString) {
+          val t1 = spark.range(10).selectExpr("floor(id/4) as k1")
+          val t2 = spark.range(20).selectExpr("floor(id/4) as k2")
+
+          val agg1 = t1.groupBy("k1").agg(collect_list("k1"))
+          val agg2 = t2.groupBy("k2").agg(collect_list("k2")).withColumnRenamed("k2", "k3")
+
+          val planned = agg1.join(agg2, $"k1" === $"k3").queryExecution.executedPlan
+
+          if (useObjectHashAgg) {
+            assert(planned.collect { case o: ObjectHashAggregateExec => o }.nonEmpty)
+          } else {
+            assert(planned.collect { case s: SortAggregateExec => s }.nonEmpty)
+          }
+
+          val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+          assert(exchanges.size == 2)
+        }
+      }
+    }
+  }
 }
 
 // Used for unit-testing EnsureRequirements
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala
index 76006efda992f..987338cf6cbbf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryPlanningTrackerEndToEndSuite.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamExecution}
+import org.apache.spark.sql.streaming.StreamTest
 
-class QueryPlanningTrackerEndToEndSuite extends SharedSparkSession {
+class QueryPlanningTrackerEndToEndSuite extends StreamTest {
+  import testImplicits._
 
   test("programmatic API") {
     val df = spark.range(1000).selectExpr("count(*)")
@@ -38,4 +40,22 @@ class QueryPlanningTrackerEndToEndSuite extends SharedSparkSession {
     assert(tracker.rules.nonEmpty)
   }
 
+  test("SPARK-29227: Track rule info in optimization phase in streaming") {
+    val inputData = MemoryStream[Int]
+    val df = inputData.toDF()
+
+    def assertStatus(stream: StreamExecution): Unit = {
+      stream.processAllAvailable()
+      val tracker = stream.lastExecution.tracker
+      assert(tracker.phases.keys == Set("analysis", "optimization", "planning"))
+      assert(tracker.rules.nonEmpty)
+    }
+
+    testStream(df)(
+      StartStream(),
+      AddData(inputData, 1, 2, 3),
+      Execute(assertStatus),
+      StopStream)
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ReduceNumShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ReduceNumShufflePartitionsSuite.scala
index b5dbdd0b18b49..5565a0dd01840 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ReduceNumShufflePartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ReduceNumShufflePartitionsSuite.scala
@@ -19,11 +19,12 @@ package org.apache.spark.sql.execution
 
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.{MapOutputStatistics, SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql._
 import org.apache.spark.sql.execution.adaptive._
-import org.apache.spark.sql.execution.adaptive.rule.{CoalescedShuffleReaderExec, ReduceNumShufflePartitions}
+import org.apache.spark.sql.execution.adaptive.CoalescedShuffleReaderExec
+import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 
@@ -51,220 +52,8 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
     }
   }
 
-  private def checkEstimation(
-      rule: ReduceNumShufflePartitions,
-      bytesByPartitionIdArray: Array[Array[Long]],
-      expectedPartitionStartIndices: Array[Int]): Unit = {
-    val mapOutputStatistics = bytesByPartitionIdArray.zipWithIndex.map {
-      case (bytesByPartitionId, index) =>
-        new MapOutputStatistics(index, bytesByPartitionId)
-    }
-    val estimatedPartitionStartIndices =
-      rule.estimatePartitionStartIndices(mapOutputStatistics)
-    assert(estimatedPartitionStartIndices === expectedPartitionStartIndices)
-  }
-
-  private def createReduceNumShufflePartitionsRule(
-      advisoryTargetPostShuffleInputSize: Long,
-      minNumPostShufflePartitions: Int = 1): ReduceNumShufflePartitions = {
-    val conf = new SQLConf().copy(
-      SQLConf.SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE -> advisoryTargetPostShuffleInputSize,
-      SQLConf.SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS -> minNumPostShufflePartitions)
-    ReduceNumShufflePartitions(conf)
-  }
-
-  test("test estimatePartitionStartIndices - 1 Exchange") {
-    val rule = createReduceNumShufflePartitionsRule(100L)
-
-    {
-      // All bytes per partition are 0.
-      val bytesByPartitionId = Array[Long](0, 0, 0, 0, 0)
-      val expectedPartitionStartIndices = Array[Int](0)
-      checkEstimation(rule, Array(bytesByPartitionId), expectedPartitionStartIndices)
-    }
-
-    {
-      // Some bytes per partition are 0 and total size is less than the target size.
-      // 1 post-shuffle partition is needed.
-      val bytesByPartitionId = Array[Long](10, 0, 20, 0, 0)
-      val expectedPartitionStartIndices = Array[Int](0)
-      checkEstimation(rule, Array(bytesByPartitionId), expectedPartitionStartIndices)
-    }
-
-    {
-      // 2 post-shuffle partitions are needed.
-      val bytesByPartitionId = Array[Long](10, 0, 90, 20, 0)
-      val expectedPartitionStartIndices = Array[Int](0, 3)
-      checkEstimation(rule, Array(bytesByPartitionId), expectedPartitionStartIndices)
-    }
-
-    {
-      // There are a few large pre-shuffle partitions.
-      val bytesByPartitionId = Array[Long](110, 10, 100, 110, 0)
-      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
-      checkEstimation(rule, Array(bytesByPartitionId), expectedPartitionStartIndices)
-    }
-
-    {
-      // All pre-shuffle partitions are larger than the targeted size.
-      val bytesByPartitionId = Array[Long](100, 110, 100, 110, 110)
-      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
-      checkEstimation(rule, Array(bytesByPartitionId), expectedPartitionStartIndices)
-    }
-
-    {
-      // The last pre-shuffle partition is in a single post-shuffle partition.
-      val bytesByPartitionId = Array[Long](30, 30, 0, 40, 110)
-      val expectedPartitionStartIndices = Array[Int](0, 4)
-      checkEstimation(rule, Array(bytesByPartitionId), expectedPartitionStartIndices)
-    }
-  }
-
-  test("test estimatePartitionStartIndices - 2 Exchanges") {
-    val rule = createReduceNumShufflePartitionsRule(100L)
-
-    {
-      // If there are multiple values of the number of pre-shuffle partitions,
-      // we should see an assertion error.
-      val bytesByPartitionId1 = Array[Long](0, 0, 0, 0, 0)
-      val bytesByPartitionId2 = Array[Long](0, 0, 0, 0, 0, 0)
-      val mapOutputStatistics =
-        Array(
-          new MapOutputStatistics(0, bytesByPartitionId1),
-          new MapOutputStatistics(1, bytesByPartitionId2))
-      intercept[AssertionError](rule.estimatePartitionStartIndices(mapOutputStatistics))
-    }
-
-    {
-      // All bytes per partition are 0.
-      val bytesByPartitionId1 = Array[Long](0, 0, 0, 0, 0)
-      val bytesByPartitionId2 = Array[Long](0, 0, 0, 0, 0)
-      val expectedPartitionStartIndices = Array[Int](0)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-
-    {
-      // Some bytes per partition are 0.
-      // 1 post-shuffle partition is needed.
-      val bytesByPartitionId1 = Array[Long](0, 10, 0, 20, 0)
-      val bytesByPartitionId2 = Array[Long](30, 0, 20, 0, 20)
-      val expectedPartitionStartIndices = Array[Int](0)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-
-    {
-      // 2 post-shuffle partition are needed.
-      val bytesByPartitionId1 = Array[Long](0, 10, 0, 20, 0)
-      val bytesByPartitionId2 = Array[Long](30, 0, 70, 0, 30)
-      val expectedPartitionStartIndices = Array[Int](0, 2, 4)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-
-    {
-      // 4 post-shuffle partition are needed.
-      val bytesByPartitionId1 = Array[Long](0, 99, 0, 20, 0)
-      val bytesByPartitionId2 = Array[Long](30, 0, 70, 0, 30)
-      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 4)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-
-    {
-      // 2 post-shuffle partition are needed.
-      val bytesByPartitionId1 = Array[Long](0, 100, 0, 30, 0)
-      val bytesByPartitionId2 = Array[Long](30, 0, 70, 0, 30)
-      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 4)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-
-    {
-      // There are a few large pre-shuffle partitions.
-      val bytesByPartitionId1 = Array[Long](0, 100, 40, 30, 0)
-      val bytesByPartitionId2 = Array[Long](30, 0, 60, 0, 110)
-      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-
-    {
-      // All pairs of pre-shuffle partitions are larger than the targeted size.
-      val bytesByPartitionId1 = Array[Long](100, 100, 40, 30, 0)
-      val bytesByPartitionId2 = Array[Long](30, 0, 60, 70, 110)
-      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-  }
-
-  test("test estimatePartitionStartIndices and enforce minimal number of reducers") {
-    val rule = createReduceNumShufflePartitionsRule(100L, 2)
-
-    {
-      // The minimal number of post-shuffle partitions is not enforced because
-      // the size of data is 0.
-      val bytesByPartitionId1 = Array[Long](0, 0, 0, 0, 0)
-      val bytesByPartitionId2 = Array[Long](0, 0, 0, 0, 0)
-      val expectedPartitionStartIndices = Array[Int](0)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-
-    {
-      // The minimal number of post-shuffle partitions is enforced.
-      val bytesByPartitionId1 = Array[Long](10, 5, 5, 0, 20)
-      val bytesByPartitionId2 = Array[Long](5, 10, 0, 10, 5)
-      val expectedPartitionStartIndices = Array[Int](0, 3)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-
-    {
-      // The number of post-shuffle partitions is determined by the coordinator.
-      val bytesByPartitionId1 = Array[Long](10, 50, 20, 80, 20)
-      val bytesByPartitionId2 = Array[Long](40, 10, 0, 10, 30)
-      val expectedPartitionStartIndices = Array[Int](0, 1, 3, 4)
-      checkEstimation(
-        rule,
-        Array(bytesByPartitionId1, bytesByPartitionId2),
-        expectedPartitionStartIndices)
-    }
-  }
-
-  ///////////////////////////////////////////////////////////////////////////
-  // Query tests
-  ///////////////////////////////////////////////////////////////////////////
-
   val numInputPartitions: Int = 10
 
-  def checkAnswer(actual: => DataFrame, expectedAnswer: Seq[Row]): Unit = {
-    QueryTest.checkAnswer(actual, expectedAnswer) match {
-      case Some(errorMessage) => fail(errorMessage)
-      case None =>
-    }
-  }
-
   def withSparkSession(
       f: SparkSession => Unit,
       targetPostShuffleInputSize: Int,
@@ -274,6 +63,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
         .setMaster("local[*]")
         .setAppName("test")
         .set(UI_ENABLED, false)
+        .set(SQLConf.SHUFFLE_PARTITIONS.key, "5")
         .set(SQLConf.SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS.key, "5")
         .set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "true")
         .set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
@@ -308,7 +98,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
         val agg = df.groupBy("key").count()
 
         // Check the answer first.
-        checkAnswer(
+        QueryTest.checkAnswer(
           agg,
           spark.range(0, 20).selectExpr("id", "50 as cnt").collect())
 
@@ -355,7 +145,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
             .range(0, 1000)
             .selectExpr("id % 500 as key", "id as value")
             .union(spark.range(0, 1000).selectExpr("id % 500 as key", "id as value"))
-        checkAnswer(
+        QueryTest.checkAnswer(
           join,
           expectedAnswer.collect())
 
@@ -407,7 +197,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
           spark
             .range(0, 500)
             .selectExpr("id", "2 as cnt")
-        checkAnswer(
+        QueryTest.checkAnswer(
           join,
           expectedAnswer.collect())
 
@@ -459,7 +249,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
           spark
             .range(0, 1000)
             .selectExpr("id % 500 as key", "2 as cnt", "id as value")
-        checkAnswer(
+        QueryTest.checkAnswer(
           join,
           expectedAnswer.collect())
 
@@ -503,11 +293,11 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
           // Check the answer first.
           val expectedAnswer = spark.range(0, 500).selectExpr("id % 500", "id as value")
             .union(spark.range(500, 1000).selectExpr("id % 500", "id as value"))
-          checkAnswer(
+          QueryTest.checkAnswer(
             join,
             expectedAnswer.collect())
 
-          // Then, let's make sure we do not reduce number of ppst shuffle partitions.
+          // Then, let's make sure we do not reduce number of post shuffle partitions.
           val finalPlan = join.queryExecution.executedPlan
             .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
           val shuffleReaders = finalPlan.collect {
@@ -533,10 +323,12 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
       //   ReusedQueryStage 0
       //   ReusedQueryStage 0
       val resultDf = df.join(df, "key").join(df, "key")
-      checkAnswer(resultDf, Row(0, 0, 0, 0) :: Nil)
+      QueryTest.checkAnswer(resultDf, Row(0, 0, 0, 0) :: Nil)
       val finalPlan = resultDf.queryExecution.executedPlan
         .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
-      assert(finalPlan.collect { case p: ReusedQueryStageExec => p }.length == 2)
+      assert(finalPlan.collect {
+        case ShuffleQueryStageExec(_, r: ReusedExchangeExec) => r
+      }.length == 2)
       assert(finalPlan.collect { case p: CoalescedShuffleReaderExec => p }.length == 3)
 
 
@@ -549,7 +341,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
       val grouped = df.groupBy("key").agg(max("value").as("value"))
       val resultDf2 = grouped.groupBy(col("key") + 1).max("value")
         .union(grouped.groupBy(col("key") + 2).max("value"))
-      checkAnswer(resultDf2, Row(1, 0) :: Row(2, 0) :: Nil)
+      QueryTest.checkAnswer(resultDf2, Row(1, 0) :: Row(2, 0) :: Nil)
 
       val finalPlan2 = resultDf2.queryExecution.executedPlan
         .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
@@ -567,7 +359,9 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
       assert(leafStages.length == 2)
 
       val reusedStages = level1Stages.flatMap { stage =>
-        stage.plan.collect { case r: ReusedQueryStageExec => r }
+        stage.plan.collect {
+          case ShuffleQueryStageExec(_, r: ReusedExchangeExec) => r
+        }
       }
       assert(reusedStages.length == 1)
     }
@@ -579,7 +373,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
       val ds = spark.range(3)
       val resultDf = ds.repartition(2, ds.col("id")).toDF()
 
-      checkAnswer(resultDf,
+      QueryTest.checkAnswer(resultDf,
         Seq(0, 1, 2).map(i => Row(i)))
       val finalPlan = resultDf.queryExecution.executedPlan
         .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
@@ -595,7 +389,7 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
 
       val resultDf = df1.union(df2)
 
-      checkAnswer(resultDf, Seq((0), (1), (2), (3)).map(i => Row(i)))
+      QueryTest.checkAnswer(resultDf, Seq((0), (1), (2), (3)).map(i => Row(i)))
 
       val finalPlan = resultDf.queryExecution.executedPlan
         .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ReferenceSort.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ReferenceSort.scala
index 6abcb1f067968..25b4464823e5f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ReferenceSort.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ReferenceSort.scala
@@ -21,7 +21,7 @@ import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.errors._
-import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, RowOrdering, SortOrder}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.util.CompletionIterator
 import org.apache.spark.util.collection.ExternalSorter
@@ -41,7 +41,7 @@ case class ReferenceSort(
 
   protected override def doExecute(): RDD[InternalRow] = attachTree(this, "sort") {
     child.execute().mapPartitions( { iterator =>
-      val ordering = newOrdering(sortOrder, child.output)
+      val ordering = RowOrdering.create(sortOrder, child.output)
       val sorter = new ExternalSorter[InternalRow, Null, InternalRow](
         TaskContext.get(), ordering = Some(ordering))
       sorter.insertAll(iterator.map(r => (r.copy(), null)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
index f6b006b98edd1..8bf7fe62cd49b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import java.util.Properties
+import scala.collection.parallel.immutable.ParRange
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
@@ -46,7 +46,7 @@ class SQLExecutionSuite extends SparkFunSuite {
     import spark.implicits._
     try {
       // Should not throw IllegalArgumentException
-      (1 to 100).par.foreach { _ =>
+      new ParRange(1 to 100).foreach { _ =>
         spark.sparkContext.parallelize(1 to 5).map { i => (i, i) }.toDF("a", "b").count()
       }
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index ff84b05713676..9a393f19ce9bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -80,7 +80,8 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
             sql("CREATE VIEW jtv1 AS SELECT * FROM temp_jtv1 WHERE id < 6")
           }.getMessage
           assert(e.contains("Not allowed to create a permanent view `jtv1` by " +
-            "referencing a temporary view `temp_jtv1`"))
+            "referencing a temporary view temp_jtv1. " +
+            "Please create a temp view instead by CREATE TEMP VIEW"))
 
           val globalTempDB = spark.sharedState.globalTempViewManager.database
           sql("CREATE GLOBAL TEMP VIEW global_temp_jtv1 AS SELECT * FROM jt WHERE id > 0")
@@ -88,7 +89,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
             sql(s"CREATE VIEW jtv1 AS SELECT * FROM $globalTempDB.global_temp_jtv1 WHERE id < 6")
           }.getMessage
           assert(e.contains(s"Not allowed to create a permanent view `jtv1` by referencing " +
-            s"a temporary view `global_temp`.`global_temp_jtv1`"))
+            s"a temporary view global_temp.global_temp_jtv1"))
         }
       }
     }
@@ -136,12 +137,21 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       assertNoSuchTable(s"ALTER TABLE $viewName SET SERDE 'whatever'")
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'")
       assertNoSuchTable(s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')")
-      assertNoSuchTable(s"ALTER TABLE $viewName SET LOCATION '/path/to/your/lovely/heart'")
-      assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') SET LOCATION '/path/to/home'")
       assertNoSuchTable(s"ALTER TABLE $viewName ADD IF NOT EXISTS PARTITION (a='4', b='8')")
       assertNoSuchTable(s"ALTER TABLE $viewName DROP PARTITION (a='4', b='8')")
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')")
       assertNoSuchTable(s"ALTER TABLE $viewName RECOVER PARTITIONS")
+
+      // For v2 ALTER TABLE statements, we have better error message saying view is not supported.
+      assertAnalysisError(
+        s"ALTER TABLE $viewName SET LOCATION '/path/to/your/lovely/heart'",
+        s"'$viewName' is a view not a table")
+
+      // For the following v2 ALERT TABLE statements, unsupported operations are checked first
+      // before resolving the relations.
+      assertAnalysisError(
+        s"ALTER TABLE $viewName PARTITION (a='4') SET LOCATION '/path/to/home'",
+        "ALTER TABLE SET LOCATION does not support partition for v2 tables")
     }
   }
 
@@ -175,6 +185,11 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  private def assertAnalysisError(query: String, message: String): Unit = {
+    val e = intercept[AnalysisException](sql(query))
+    assert(e.message.contains(message))
+  }
+
   test("error handling: insert/load/truncate table commands against a view") {
     val viewName = "testView"
     withView(viewName) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsCoalescerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsCoalescerSuite.scala
new file mode 100644
index 0000000000000..fcfde83b2ffd5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsCoalescerSuite.scala
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.{MapOutputStatistics, SparkFunSuite}
+import org.apache.spark.sql.execution.adaptive.ShufflePartitionsCoalescer
+
+class ShufflePartitionsCoalescerSuite extends SparkFunSuite {
+
+  private def checkEstimation(
+      bytesByPartitionIdArray: Array[Array[Long]],
+      expectedPartitionStartIndices: Array[Int],
+      targetSize: Long,
+      minNumPartitions: Int = 1): Unit = {
+    val mapOutputStatistics = bytesByPartitionIdArray.zipWithIndex.map {
+      case (bytesByPartitionId, index) =>
+        new MapOutputStatistics(index, bytesByPartitionId)
+    }
+    val estimatedPartitionStartIndices = ShufflePartitionsCoalescer.coalescePartitions(
+      mapOutputStatistics,
+      0,
+      bytesByPartitionIdArray.head.length,
+      targetSize,
+      minNumPartitions)
+    assert(estimatedPartitionStartIndices === expectedPartitionStartIndices)
+  }
+
+  test("1 shuffle") {
+    val targetSize = 100
+
+    {
+      // All bytes per partition are 0.
+      val bytesByPartitionId = Array[Long](0, 0, 0, 0, 0)
+      val expectedPartitionStartIndices = Array[Int](0)
+      checkEstimation(Array(bytesByPartitionId), expectedPartitionStartIndices, targetSize)
+    }
+
+    {
+      // Some bytes per partition are 0 and total size is less than the target size.
+      // 1 coalesced partition is expected.
+      val bytesByPartitionId = Array[Long](10, 0, 20, 0, 0)
+      val expectedPartitionStartIndices = Array[Int](0)
+      checkEstimation(Array(bytesByPartitionId), expectedPartitionStartIndices, targetSize)
+    }
+
+    {
+      // 2 coalesced partitions are expected.
+      val bytesByPartitionId = Array[Long](10, 0, 90, 20, 0)
+      val expectedPartitionStartIndices = Array[Int](0, 3)
+      checkEstimation(Array(bytesByPartitionId), expectedPartitionStartIndices, targetSize)
+    }
+
+    {
+      // There are a few large shuffle partitions.
+      val bytesByPartitionId = Array[Long](110, 10, 100, 110, 0)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
+      checkEstimation(Array(bytesByPartitionId), expectedPartitionStartIndices, targetSize)
+    }
+
+    {
+      // All shuffle partitions are larger than the targeted size.
+      val bytesByPartitionId = Array[Long](100, 110, 100, 110, 110)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
+      checkEstimation(Array(bytesByPartitionId), expectedPartitionStartIndices, targetSize)
+    }
+
+    {
+      // The last shuffle partition is in a single coalesced partition.
+      val bytesByPartitionId = Array[Long](30, 30, 0, 40, 110)
+      val expectedPartitionStartIndices = Array[Int](0, 4)
+      checkEstimation(Array(bytesByPartitionId), expectedPartitionStartIndices, targetSize)
+    }
+  }
+
+  test("2 shuffles") {
+    val targetSize = 100
+
+    {
+      // If there are multiple values of the number of shuffle partitions,
+      // we should see an assertion error.
+      val bytesByPartitionId1 = Array[Long](0, 0, 0, 0, 0)
+      val bytesByPartitionId2 = Array[Long](0, 0, 0, 0, 0, 0)
+      intercept[AssertionError] {
+        checkEstimation(Array(bytesByPartitionId1, bytesByPartitionId2), Array.empty, targetSize)
+      }
+    }
+
+    {
+      // All bytes per partition are 0.
+      val bytesByPartitionId1 = Array[Long](0, 0, 0, 0, 0)
+      val bytesByPartitionId2 = Array[Long](0, 0, 0, 0, 0)
+      val expectedPartitionStartIndices = Array[Int](0)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize)
+    }
+
+    {
+      // Some bytes per partition are 0.
+      // 1 coalesced partition is expected.
+      val bytesByPartitionId1 = Array[Long](0, 10, 0, 20, 0)
+      val bytesByPartitionId2 = Array[Long](30, 0, 20, 0, 20)
+      val expectedPartitionStartIndices = Array[Int](0)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize)
+    }
+
+    {
+      // 2 coalesced partition are expected.
+      val bytesByPartitionId1 = Array[Long](0, 10, 0, 20, 0)
+      val bytesByPartitionId2 = Array[Long](30, 0, 70, 0, 30)
+      val expectedPartitionStartIndices = Array[Int](0, 2, 4)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize)
+    }
+
+    {
+      // 4 coalesced partition are expected.
+      val bytesByPartitionId1 = Array[Long](0, 99, 0, 20, 0)
+      val bytesByPartitionId2 = Array[Long](30, 0, 70, 0, 30)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 4)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize)
+    }
+
+    {
+      // 2 coalesced partition are needed.
+      val bytesByPartitionId1 = Array[Long](0, 100, 0, 30, 0)
+      val bytesByPartitionId2 = Array[Long](30, 0, 70, 0, 30)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 4)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize)
+    }
+
+    {
+      // There are a few large shuffle partitions.
+      val bytesByPartitionId1 = Array[Long](0, 100, 40, 30, 0)
+      val bytesByPartitionId2 = Array[Long](30, 0, 60, 0, 110)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize)
+    }
+
+    {
+      // All pairs of shuffle partitions are larger than the targeted size.
+      val bytesByPartitionId1 = Array[Long](100, 100, 40, 30, 0)
+      val bytesByPartitionId2 = Array[Long](30, 0, 60, 70, 110)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 2, 3, 4)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize)
+    }
+  }
+
+  test("enforce minimal number of coalesced partitions") {
+    val targetSize = 100
+    val minNumPartitions = 2
+
+    {
+      // The minimal number of coalesced partitions is not enforced because
+      // the size of data is 0.
+      val bytesByPartitionId1 = Array[Long](0, 0, 0, 0, 0)
+      val bytesByPartitionId2 = Array[Long](0, 0, 0, 0, 0)
+      val expectedPartitionStartIndices = Array[Int](0)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize, minNumPartitions)
+    }
+
+    {
+      // The minimal number of coalesced partitions is enforced.
+      val bytesByPartitionId1 = Array[Long](10, 5, 5, 0, 20)
+      val bytesByPartitionId2 = Array[Long](5, 10, 0, 10, 5)
+      val expectedPartitionStartIndices = Array[Int](0, 3)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize, minNumPartitions)
+    }
+
+    {
+      // The number of coalesced partitions is determined by the algorithm.
+      val bytesByPartitionId1 = Array[Long](10, 50, 20, 80, 20)
+      val bytesByPartitionId2 = Array[Long](40, 10, 0, 10, 30)
+      val expectedPartitionStartIndices = Array[Int](0, 1, 3, 4)
+      checkEstimation(
+        Array(bytesByPartitionId1, bytesByPartitionId2),
+        expectedPartitionStartIndices,
+        targetSize, minNumPartitions)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
index 79000be05a8c7..56fff1107ae39 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
@@ -34,6 +34,7 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession {
     intercept[IllegalStateException] { plan.executeToIterator() }
     intercept[IllegalStateException] { plan.executeBroadcast() }
     intercept[IllegalStateException] { plan.executeTake(1) }
+    intercept[IllegalStateException] { plan.executeTail(1) }
   }
 
   test("SPARK-23731 plans should be canonicalizable after being (de)serialized") {
@@ -83,4 +84,8 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-30780 empty LocalTableScan should use RDD without partitions") {
+    assert(LocalTableScanExec(Nil, Nil).execute().getNumPartitions == 0)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index b751fb7c50438..06574a9f8fd2c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -18,12 +18,11 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Concat, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, RepartitionByExpression, Sort}
-import org.apache.spark.sql.catalyst.plans.logical.sql.{DescribeColumnStatement, DescribeTableStatement}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, RefreshResource}
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
@@ -81,33 +80,6 @@ class SparkSqlParserSuite extends AnalysisTest {
     intercept("REFRESH", "Resource paths cannot be empty in REFRESH statements")
   }
 
-  test("show functions") {
-    assertEqual("show functions", ShowFunctionsCommand(None, None, true, true))
-    assertEqual("show all functions", ShowFunctionsCommand(None, None, true, true))
-    assertEqual("show user functions", ShowFunctionsCommand(None, None, true, false))
-    assertEqual("show system functions", ShowFunctionsCommand(None, None, false, true))
-    intercept("show special functions", "SHOW special FUNCTIONS")
-    assertEqual("show functions foo",
-      ShowFunctionsCommand(None, Some("foo"), true, true))
-    assertEqual("show functions foo.bar",
-      ShowFunctionsCommand(Some("foo"), Some("bar"), true, true))
-    assertEqual("show functions 'foo\\\\.*'",
-      ShowFunctionsCommand(None, Some("foo\\.*"), true, true))
-    intercept("show functions foo.bar.baz", "Unsupported function name")
-  }
-
-  test("describe function") {
-    assertEqual("describe function bar",
-      DescribeFunctionCommand(FunctionIdentifier("bar", database = None), isExtended = false))
-    assertEqual("describe function extended bar",
-      DescribeFunctionCommand(FunctionIdentifier("bar", database = None), isExtended = true))
-    assertEqual("describe function foo.bar",
-      DescribeFunctionCommand(
-        FunctionIdentifier("bar", database = Some("foo")), isExtended = false))
-    assertEqual("describe function extended f.bar",
-      DescribeFunctionCommand(FunctionIdentifier("bar", database = Some("f")), isExtended = true))
-  }
-
   private def createTableUsing(
       table: String,
       database: Option[String] = None,
@@ -160,7 +132,7 @@ class SparkSqlParserSuite extends AnalysisTest {
   }
 
   test("create table - schema") {
-    assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING)",
+    assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) STORED AS textfile",
       createTable(
         table = "my_tab",
         schema = (new StructType)
@@ -180,7 +152,8 @@ class SparkSqlParserSuite extends AnalysisTest {
         partitionColumnNames = Seq("c", "d")
       )
     )
-    assertEqual("CREATE TABLE my_tab(id BIGINT, nested STRUCT<col1: STRING,col2: INT>)",
+    assertEqual("CREATE TABLE my_tab(id BIGINT, nested STRUCT<col1: STRING,col2: INT>) " +
+      "STORED AS textfile",
       createTable(
         table = "my_tab",
         schema = (new StructType)
@@ -217,68 +190,6 @@ class SparkSqlParserSuite extends AnalysisTest {
     assertEqual("DESCRIBE " + query, DescribeQueryCommand(query, parser.parsePlan(query)))
   }
 
-  test("analyze table statistics") {
-    assertEqual("analyze table t compute statistics",
-      AnalyzeTableCommand(TableIdentifier("t"), noscan = false))
-    assertEqual("analyze table t compute statistics noscan",
-      AnalyzeTableCommand(TableIdentifier("t"), noscan = true))
-    assertEqual("analyze table t partition (a) compute statistics nOscAn",
-      AnalyzePartitionCommand(TableIdentifier("t"), Map("a" -> None), noscan = true))
-
-    // Partitions specified
-    assertEqual("ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS",
-      AnalyzePartitionCommand(TableIdentifier("t"), noscan = false,
-        partitionSpec = Map("ds" -> Some("2008-04-09"), "hr" -> Some("11"))))
-    assertEqual("ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan",
-      AnalyzePartitionCommand(TableIdentifier("t"), noscan = true,
-        partitionSpec = Map("ds" -> Some("2008-04-09"), "hr" -> Some("11"))))
-    assertEqual("ANALYZE TABLE t PARTITION(ds='2008-04-09') COMPUTE STATISTICS noscan",
-      AnalyzePartitionCommand(TableIdentifier("t"), noscan = true,
-        partitionSpec = Map("ds" -> Some("2008-04-09"))))
-    assertEqual("ANALYZE TABLE t PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS",
-      AnalyzePartitionCommand(TableIdentifier("t"), noscan = false,
-        partitionSpec = Map("ds" -> Some("2008-04-09"), "hr" -> None)))
-    assertEqual("ANALYZE TABLE t PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS noscan",
-      AnalyzePartitionCommand(TableIdentifier("t"), noscan = true,
-        partitionSpec = Map("ds" -> Some("2008-04-09"), "hr" -> None)))
-    assertEqual("ANALYZE TABLE t PARTITION(ds, hr=11) COMPUTE STATISTICS noscan",
-      AnalyzePartitionCommand(TableIdentifier("t"), noscan = true,
-        partitionSpec = Map("ds" -> None, "hr" -> Some("11"))))
-    assertEqual("ANALYZE TABLE t PARTITION(ds, hr) COMPUTE STATISTICS",
-      AnalyzePartitionCommand(TableIdentifier("t"), noscan = false,
-        partitionSpec = Map("ds" -> None, "hr" -> None)))
-    assertEqual("ANALYZE TABLE t PARTITION(ds, hr) COMPUTE STATISTICS noscan",
-      AnalyzePartitionCommand(TableIdentifier("t"), noscan = true,
-        partitionSpec = Map("ds" -> None, "hr" -> None)))
-
-    intercept("analyze table t compute statistics xxxx",
-      "Expected `NOSCAN` instead of `xxxx`")
-    intercept("analyze table t partition (a) compute statistics xxxx",
-      "Expected `NOSCAN` instead of `xxxx`")
-  }
-
-  test("analyze table column statistics") {
-    intercept("ANALYZE TABLE t COMPUTE STATISTICS FOR COLUMNS", "")
-
-    assertEqual("ANALYZE TABLE t COMPUTE STATISTICS FOR COLUMNS key, value",
-      AnalyzeColumnCommand(TableIdentifier("t"), Option(Seq("key", "value")), allColumns = false))
-
-    // Partition specified - should be ignored
-    assertEqual("ANALYZE TABLE t PARTITION(ds='2017-06-10') " +
-      "COMPUTE STATISTICS FOR COLUMNS key, value",
-      AnalyzeColumnCommand(TableIdentifier("t"), Option(Seq("key", "value")), allColumns = false))
-
-    // Partition specified should be ignored in case of COMPUTE STATISTICS FOR ALL COLUMNS
-    assertEqual("ANALYZE TABLE t PARTITION(ds='2017-06-10') " +
-      "COMPUTE STATISTICS FOR ALL COLUMNS",
-      AnalyzeColumnCommand(TableIdentifier("t"), None, allColumns = true))
-
-    intercept("ANALYZE TABLE t COMPUTE STATISTICS FOR ALL COLUMNS key, value",
-      "mismatched input 'key' expecting <EOF>")
-    intercept("ANALYZE TABLE t COMPUTE STATISTICS FOR ALL",
-      "missing 'COLUMNS' at '<EOF>'")
-  }
-
   test("query organization") {
     // Test all valid combinations of order by/sort by/distribute by/cluster by/limit/windows
     val baseSql = "select * from t"
@@ -322,4 +233,22 @@ class SparkSqlParserSuite extends AnalysisTest {
       parser.parsePlan("ALTER SCHEMA foo SET DBPROPERTIES ('x' = 'y')"))
     assertEqual("DESC DATABASE foo", parser.parsePlan("DESC SCHEMA foo"))
   }
+
+  test("manage resources") {
+    assertEqual("ADD FILE abc.txt", AddFileCommand("abc.txt"))
+    assertEqual("ADD FILE 'abc.txt'", AddFileCommand("abc.txt"))
+    assertEqual("ADD FILE \"/path/to/abc.txt\"", AddFileCommand("/path/to/abc.txt"))
+    assertEqual("LIST FILE abc.txt", ListFilesCommand(Array("abc.txt")))
+    assertEqual("LIST FILE '/path//abc.txt'", ListFilesCommand(Array("/path//abc.txt")))
+    assertEqual("LIST FILE \"/path2/abc.txt\"", ListFilesCommand(Array("/path2/abc.txt")))
+    assertEqual("ADD JAR /path2/_2/abc.jar", AddJarCommand("/path2/_2/abc.jar"))
+    assertEqual("ADD JAR '/test/path_2/jar/abc.jar'", AddJarCommand("/test/path_2/jar/abc.jar"))
+    assertEqual("ADD JAR \"abc.jar\"", AddJarCommand("abc.jar"))
+    assertEqual("LIST JAR /path-with-dash/abc.jar",
+      ListJarsCommand(Array("/path-with-dash/abc.jar")))
+    assertEqual("LIST JAR 'abc.jar'", ListJarsCommand(Array("abc.jar")))
+    assertEqual("LIST JAR \"abc.jar\"", ListJarsCommand(Array("abc.jar")))
+    assertEqual("ADD FILE /path with space/abc.txt", AddFileCommand("/path with space/abc.txt"))
+    assertEqual("ADD JAR /path with space/abc.jar", AddJarCommand("/path with space/abc.jar"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
index 392cce54ebede..ef81f1b788496 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
@@ -57,7 +57,7 @@ class UnsafeFixedWidthAggregationMapSuite
 
   private var taskContext: TaskContext = null
 
-  def testWithMemoryLeakDetection(name: String)(f: => Unit) {
+  def testWithMemoryLeakDetection(name: String)(f: => Unit): Unit = {
     def cleanup(): Unit = {
       if (taskMemoryManager != null) {
         assert(taskMemoryManager.cleanUpAllAllocatedMemory() === 0)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
index f985386eee292..f6814d8ff8a3d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
@@ -48,6 +48,7 @@ class WholeStageCodegenSparkSubmitSuite extends SparkFunSuite
       "--conf", "spark.master.rest.enabled=false",
       "--conf", "spark.driver.extraJavaOptions=-XX:-UseCompressedOops",
       "--conf", "spark.executor.extraJavaOptions=-XX:+UseCompressedOops",
+      "--conf", "spark.sql.adaptive.enabled=false",
       unusedJar.toString)
     SparkSubmitSuite.runSparkSubmit(argsForSparkSubmit, "../..")
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 0ea16a1a15d66..06a016fac5300 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -18,12 +18,11 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.{Dataset, QueryTest, Row, SaveMode}
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeGenerator}
+import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeAndComment, CodeGenerator}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
-import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -33,6 +32,19 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession {
 
   import testImplicits._
 
+  var originalValue: String = _
+  // With on AQE, the WholeStageCodegenExec is added when running QueryStageExec.
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalValue = spark.conf.get(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key)
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
+  }
+
+  override def afterAll(): Unit = {
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, originalValue)
+    super.afterAll()
+  }
+
   test("range/filter should be combined") {
     val df = spark.range(10).filter("id = 1").selectExpr("id + 1")
     val plan = df.queryExecution.executedPlan
@@ -107,19 +119,6 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession {
     assert(ds.collect() === Array(0, 6))
   }
 
-  test("simple typed UDAF should be included in WholeStageCodegen") {
-    import testImplicits._
-
-    val ds = Seq(("a", 10), ("b", 1), ("b", 2), ("c", 1)).toDS()
-      .groupByKey(_._1).agg(typed.sum(_._2))
-
-    val plan = ds.queryExecution.executedPlan
-    assert(plan.find(p =>
-      p.isInstanceOf[WholeStageCodegenExec] &&
-        p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]).isDefined)
-    assert(ds.collect() === Array(("a", 10.0), ("b", 3.0), ("c", 1.0)))
-  }
-
   test("cache for primitive type should be in WholeStageCodegen with InMemoryTableScanExec") {
     import testImplicits._
 
@@ -213,10 +212,10 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession {
 
   ignore("SPARK-21871 check if we can get large code size when compiling too long functions") {
     val codeWithShortFunctions = genGroupByCode(3)
-    val (_, maxCodeSize1) = CodeGenerator.compile(codeWithShortFunctions)
+    val (_, ByteCodeStats(maxCodeSize1, _, _)) = CodeGenerator.compile(codeWithShortFunctions)
     assert(maxCodeSize1 < SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.defaultValue.get)
     val codeWithLongFunctions = genGroupByCode(50)
-    val (_, maxCodeSize2) = CodeGenerator.compile(codeWithLongFunctions)
+    val (_, ByteCodeStats(maxCodeSize2, _, _)) = CodeGenerator.compile(codeWithLongFunctions)
     assert(maxCodeSize2 > SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.defaultValue.get)
   }
 
@@ -398,4 +397,48 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession {
       }.isDefined,
       "LocalTableScanExec should be within a WholeStageCodegen domain.")
   }
+
+  test("Give up splitting aggregate code if a parameter length goes over the limit") {
+    withSQLConf(
+        SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "true",
+        SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1",
+        "spark.sql.CodeGenerator.validParamLength" -> "0") {
+      withTable("t") {
+        val expectedErrMsg = "Failed to split aggregate code into small functions"
+        Seq(
+          // Test case without keys
+          "SELECT AVG(v) FROM VALUES(1) t(v)",
+          // Tet case with keys
+          "SELECT k, AVG(v) FROM VALUES((1, 1)) t(k, v) GROUP BY k").foreach { query =>
+          val errMsg = intercept[IllegalStateException] {
+            sql(query).collect
+          }.getMessage
+          assert(errMsg.contains(expectedErrMsg))
+        }
+      }
+    }
+  }
+
+  test("Give up splitting subexpression code if a parameter length goes over the limit") {
+    withSQLConf(
+        SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "false",
+        SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1",
+        "spark.sql.CodeGenerator.validParamLength" -> "0") {
+      withTable("t") {
+        val expectedErrMsg = "Failed to split subexpression code into small functions"
+        Seq(
+          // Test case without keys
+          "SELECT AVG(a + b), SUM(a + b + c) FROM VALUES((1, 1, 1)) t(a, b, c)",
+          // Tet case with keys
+          "SELECT k, AVG(a + b), SUM(a + b + c) FROM VALUES((1, 1, 1, 1)) t(k, a, b, c) " +
+            "GROUP BY k").foreach { query =>
+          val e = intercept[Exception] {
+            sql(query).collect
+          }.getCause
+          assert(e.isInstanceOf[IllegalStateException])
+          assert(e.getMessage.contains(expectedErrMsg))
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 55e57a244c030..4edb35ea30fde 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -17,20 +17,44 @@
 
 package org.apache.spark.sql.execution.adaptive
 
+import java.io.File
+import java.net.URI
+
+import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart}
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.execution.{ReusedSubqueryExec, SparkPlan}
-import org.apache.spark.sql.execution.adaptive.rule.CoalescedShuffleReaderExec
-import org.apache.spark.sql.execution.exchange.Exchange
-import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, Exchange, ReusedExchangeExec, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BuildRight, SortMergeJoinExec}
+import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.util.Utils
+
+class AdaptiveQueryExecSuite
+  extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
 
-class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
   setupTestData()
 
   private def runAdaptiveAndVerifyResult(query: String): (SparkPlan, SparkPlan) = {
+    var finalPlanCnt = 0
+    val listener = new SparkListener {
+      override def onOtherEvent(event: SparkListenerEvent): Unit = {
+        event match {
+          case SparkListenerSQLAdaptiveExecutionUpdate(_, _, sparkPlanInfo) =>
+            if (sparkPlanInfo.simpleString.startsWith(
+              "AdaptiveSparkPlan(isFinalPlan=true)")) {
+              finalPlanCnt += 1
+            }
+          case _ => // ignore other events
+        }
+      }
+    }
+    spark.sparkContext.addSparkListener(listener)
+
     val dfAdaptive = sql(query)
     val planBefore = dfAdaptive.queryExecution.executedPlan
     assert(planBefore.toString.startsWith("AdaptiveSparkPlan(isFinalPlan=false)"))
@@ -41,6 +65,11 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
     }
     val planAfter = dfAdaptive.queryExecution.executedPlan
     assert(planAfter.toString.startsWith("AdaptiveSparkPlan(isFinalPlan=true)"))
+
+    spark.sparkContext.listenerBus.waitUntilEmpty()
+    assert(finalPlanCnt == 1)
+    spark.sparkContext.removeSparkListener(listener)
+
     val adaptivePlan = planAfter.asInstanceOf[AdaptiveSparkPlanExec].executedPlan
     val exchanges = adaptivePlan.collect {
       case e: Exchange => e
@@ -50,34 +79,41 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
   }
 
   private def findTopLevelBroadcastHashJoin(plan: SparkPlan): Seq[BroadcastHashJoinExec] = {
-    plan.collect {
-      case j: BroadcastHashJoinExec => Seq(j)
-      case s: QueryStageExec => findTopLevelBroadcastHashJoin(s.plan)
-    }.flatten
+    collect(plan) {
+      case j: BroadcastHashJoinExec => j
+    }
   }
 
   private def findTopLevelSortMergeJoin(plan: SparkPlan): Seq[SortMergeJoinExec] = {
-    plan.collect {
-      case j: SortMergeJoinExec => Seq(j)
-      case s: QueryStageExec => findTopLevelSortMergeJoin(s.plan)
-    }.flatten
+    collect(plan) {
+      case j: SortMergeJoinExec => j
+    }
   }
 
-  private def findReusedExchange(plan: SparkPlan): Seq[ReusedQueryStageExec] = {
-    plan.collect {
-      case e: ReusedQueryStageExec => Seq(e)
-      case a: AdaptiveSparkPlanExec => findReusedExchange(a.executedPlan)
-      case s: QueryStageExec => findReusedExchange(s.plan)
-      case p: SparkPlan => p.subqueries.flatMap(findReusedExchange)
-    }.flatten
+  private def findReusedExchange(plan: SparkPlan): Seq[ReusedExchangeExec] = {
+    collectInPlanAndSubqueries(plan) {
+      case ShuffleQueryStageExec(_, e: ReusedExchangeExec) => e
+      case BroadcastQueryStageExec(_, e: ReusedExchangeExec) => e
+    }
   }
 
   private def findReusedSubquery(plan: SparkPlan): Seq[ReusedSubqueryExec] = {
-    plan.collect {
-      case e: ReusedSubqueryExec => Seq(e)
-      case s: QueryStageExec => findReusedSubquery(s.plan)
-      case p: SparkPlan => p.subqueries.flatMap(findReusedSubquery)
-    }.flatten
+    collectInPlanAndSubqueries(plan) {
+      case e: ReusedSubqueryExec => e
+    }
+  }
+
+  private def checkNumLocalShuffleReaders(
+      plan: SparkPlan, numShufflesWithoutLocalReader: Int = 0): Unit = {
+    val numShuffles = collect(plan) {
+      case s: ShuffleQueryStageExec => s
+    }.length
+
+    val numLocalReaders = collect(plan) {
+      case reader: LocalShuffleReaderExec => reader
+    }.length
+
+    assert(numShuffles === (numLocalReaders + numShufflesWithoutLocalReader))
   }
 
   test("Change merge join to broadcast join") {
@@ -90,30 +126,65 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 1)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 1)
+      checkNumLocalShuffleReaders(adaptivePlan)
     }
   }
 
-  test("Change merge join to broadcast join and reduce number of shuffle partitions") {
+  test("Reuse the parallelism of CoalescedShuffleReaderExec in LocalShuffleReaderExec") {
     withSQLConf(
       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
-      SQLConf.REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED.key -> "true",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80",
-      SQLConf.SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE.key -> "150") {
+      SQLConf.SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE.key -> "10") {
       val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
         "SELECT * FROM testData join testData2 ON key = a where value = '1'")
       val smj = findTopLevelSortMergeJoin(plan)
       assert(smj.size == 1)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 1)
-
-      val shuffleReaders = adaptivePlan.collect {
-        case reader: CoalescedShuffleReaderExec => reader
+      val localReaders = collect(adaptivePlan) {
+        case reader: LocalShuffleReaderExec => reader
       }
-      assert(shuffleReaders.length === 1)
+      assert(localReaders.length == 2)
+      val localShuffleRDD0 = localReaders(0).execute().asInstanceOf[LocalShuffledRowRDD]
+      val localShuffleRDD1 = localReaders(1).execute().asInstanceOf[LocalShuffledRowRDD]
+      // The pre-shuffle partition size is [0, 0, 0, 72, 0]
+      // And the partitionStartIndices is [0, 3, 4], so advisoryParallelism = 3.
+      // the final parallelism is
+      // math.max(1, advisoryParallelism / numMappers): math.max(1, 3/2) = 1
+      // and the partitions length is 1 * numMappers = 2
+      assert(localShuffleRDD0.getPartitions.length == 2)
       // The pre-shuffle partition size is [0, 72, 0, 72, 126]
-      shuffleReaders.foreach { reader =>
-        assert(reader.outputPartitioning.numPartitions === 2)
+      // And the partitionStartIndices is [0, 1, 2, 3, 4], so advisoryParallelism = 5.
+      // the final parallelism is
+      // math.max(1, advisoryParallelism / numMappers): math.max(1, 5/2) = 2
+      // and the partitions length is 2 * numMappers = 4
+      assert(localShuffleRDD1.getPartitions.length == 4)
+    }
+  }
+
+  test("Reuse the default parallelism in LocalShuffleReaderExec") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80",
+      SQLConf.REDUCE_POST_SHUFFLE_PARTITIONS_ENABLED.key -> "false") {
+      val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
+        "SELECT * FROM testData join testData2 ON key = a where value = '1'")
+      val smj = findTopLevelSortMergeJoin(plan)
+      assert(smj.size == 1)
+      val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+      assert(bhj.size == 1)
+      val localReaders = collect(adaptivePlan) {
+        case reader: LocalShuffleReaderExec => reader
       }
+      assert(localReaders.length == 2)
+      val localShuffleRDD0 = localReaders(0).execute().asInstanceOf[LocalShuffledRowRDD]
+      val localShuffleRDD1 = localReaders(1).execute().asInstanceOf[LocalShuffledRowRDD]
+      // the final parallelism is math.max(1, numReduces / numMappers): math.max(1, 5/2) = 2
+      // and the partitions length is 2 * numMappers = 4
+      assert(localShuffleRDD0.getPartitions.length == 4)
+      // the final parallelism is math.max(1, numReduces / numMappers): math.max(1, 5/2) = 2
+      // and the partitions length is 2 * numMappers = 4
+      assert(localShuffleRDD1.getPartitions.length == 4)
     }
   }
 
@@ -128,6 +199,7 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 1)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 1)
+      checkNumLocalShuffleReaders(adaptivePlan)
     }
   }
 
@@ -142,6 +214,8 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 1)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 1)
+
+      checkNumLocalShuffleReaders(adaptivePlan)
     }
   }
 
@@ -163,6 +237,30 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 3)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 3)
+
+      // A possible resulting query plan:
+      // BroadcastHashJoin
+      // +- BroadcastExchange
+      //    +- LocalShuffleReader*
+      //       +- ShuffleExchange
+      //          +- BroadcastHashJoin
+      //             +- BroadcastExchange
+      //                +- LocalShuffleReader*
+      //                   +- ShuffleExchange
+      //             +- LocalShuffleReader*
+      //                +- ShuffleExchange
+      // +- BroadcastHashJoin
+      //    +- LocalShuffleReader*
+      //       +- ShuffleExchange
+      //    +- BroadcastExchange
+      //       +-LocalShuffleReader*
+      //             +- ShuffleExchange
+
+      // After applied the 'OptimizeLocalShuffleReader' rule, we can convert all the four
+      // shuffle reader to local shuffle reader in the bottom two 'BroadcastHashJoin'.
+      // For the top level 'BroadcastHashJoin', the probe side is not shuffle query stage
+      // and the build side shuffle query stage is also converted to local shuffle reader.
+      checkNumLocalShuffleReaders(adaptivePlan)
     }
   }
 
@@ -186,6 +284,28 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 3)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 3)
+
+      // A possible resulting query plan:
+      // BroadcastHashJoin
+      // +- BroadcastExchange
+      //    +- LocalShuffleReader*
+      //       +- ShuffleExchange
+      //          +- BroadcastHashJoin
+      //             +- BroadcastExchange
+      //                +- LocalShuffleReader*
+      //                   +- ShuffleExchange
+      //             +- LocalShuffleReader*
+      //                +- ShuffleExchange
+      // +- BroadcastHashJoin
+      //    +- LocalShuffleReader*
+      //       +- ShuffleExchange
+      //    +- BroadcastExchange
+      //       +-HashAggregate
+      //          +- CoalescedShuffleReader
+      //             +- ShuffleExchange
+
+      // The shuffle added by Aggregate can't apply local reader.
+      checkNumLocalShuffleReaders(adaptivePlan, 1)
     }
   }
 
@@ -209,6 +329,29 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 3)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 3)
+
+      // A possible resulting query plan:
+      // BroadcastHashJoin
+      // +- BroadcastExchange
+      //    +- LocalShuffleReader*
+      //       +- ShuffleExchange
+      //          +- BroadcastHashJoin
+      //             +- BroadcastExchange
+      //                +- LocalShuffleReader*
+      //                   +- ShuffleExchange
+      //             +- LocalShuffleReader*
+      //                +- ShuffleExchange
+      // +- BroadcastHashJoin
+      //    +- Filter
+      //       +- HashAggregate
+      //          +- CoalescedShuffleReader
+      //             +- ShuffleExchange
+      //    +- BroadcastExchange
+      //       +-LocalShuffleReader*
+      //           +- ShuffleExchange
+
+      // The shuffle added by Aggregate can't apply local reader.
+      checkNumLocalShuffleReaders(adaptivePlan, 1)
     }
   }
 
@@ -223,6 +366,9 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 3)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 2)
+      // There is still a SMJ, and its two shuffles can't apply local reader.
+      checkNumLocalShuffleReaders(adaptivePlan, 2)
+      // Even with local shuffle reader, the query stage reuse can also work.
       val ex = findReusedExchange(adaptivePlan)
       assert(ex.size == 1)
     }
@@ -239,6 +385,8 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 1)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 1)
+      checkNumLocalShuffleReaders(adaptivePlan)
+      // Even with local shuffle reader, the query stage reuse can also work.
       val ex = findReusedExchange(adaptivePlan)
       assert(ex.size == 1)
     }
@@ -257,6 +405,8 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 1)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 1)
+      checkNumLocalShuffleReaders(adaptivePlan)
+      // Even with local shuffle reader, the query stage reuse can also work.
       val ex = findReusedExchange(adaptivePlan)
       assert(ex.nonEmpty)
       val sub = findReusedSubquery(adaptivePlan)
@@ -276,6 +426,8 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 1)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 1)
+      checkNumLocalShuffleReaders(adaptivePlan)
+      // Even with local shuffle reader, the query stage reuse can also work.
       val ex = findReusedExchange(adaptivePlan)
       assert(ex.isEmpty)
       val sub = findReusedSubquery(adaptivePlan)
@@ -298,9 +450,11 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj.size == 1)
       val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
       assert(bhj.size == 1)
+      checkNumLocalShuffleReaders(adaptivePlan)
+      // Even with local shuffle reader, the query stage reuse can also work.
       val ex = findReusedExchange(adaptivePlan)
       assert(ex.nonEmpty)
-      assert(ex.head.plan.isInstanceOf[BroadcastQueryStageExec])
+      assert(ex.head.child.isInstanceOf[BroadcastExchangeExec])
       val sub = findReusedSubquery(adaptivePlan)
       assert(sub.isEmpty)
     }
@@ -357,4 +511,222 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
       assert(smj2.size == 2, origPlan.toString)
     }
   }
+
+  test("Change merge join to broadcast join without local shuffle reader") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.LOCAL_SHUFFLE_READER_ENABLED.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "40") {
+      val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
+        """
+          |SELECT * FROM testData t1 join testData2 t2
+          |ON t1.key = t2.a join testData3 t3 on t2.a = t3.a
+          |where t1.value = 1
+        """.stripMargin
+      )
+      val smj = findTopLevelSortMergeJoin(plan)
+      assert(smj.size == 2)
+      val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+      assert(bhj.size == 1)
+      // There is still a SMJ, and its two shuffles can't apply local reader.
+      checkNumLocalShuffleReaders(adaptivePlan, 2)
+    }
+  }
+
+  test("Avoid changing merge join to broadcast join if too many empty partitions on build plan") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN.key -> "0.5") {
+      // `testData` is small enough to be broadcast but has empty partition ratio over the config.
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+        val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
+          "SELECT * FROM testData join testData2 ON key = a where value = '1'")
+        val smj = findTopLevelSortMergeJoin(plan)
+        assert(smj.size == 1)
+        val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+        assert(bhj.isEmpty)
+      }
+      // It is still possible to broadcast `testData2`.
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") {
+        val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
+          "SELECT * FROM testData join testData2 ON key = a where value = '1'")
+        val smj = findTopLevelSortMergeJoin(plan)
+        assert(smj.size == 1)
+        val bhj = findTopLevelBroadcastHashJoin(adaptivePlan)
+        assert(bhj.size == 1)
+        assert(bhj.head.buildSide == BuildRight)
+      }
+    }
+  }
+
+  test("SPARK-29906: AQE should not introduce extra shuffle for outermost limit") {
+    var numStages = 0
+    val listener = new SparkListener {
+      override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+        numStages = jobStart.stageInfos.length
+      }
+    }
+    try {
+      withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+        spark.sparkContext.addSparkListener(listener)
+        spark.range(0, 100, 1, numPartitions = 10).take(1)
+        spark.sparkContext.listenerBus.waitUntilEmpty()
+        // Should be only one stage since there is no shuffle.
+        assert(numStages == 1)
+      }
+    } finally {
+      spark.sparkContext.removeSparkListener(listener)
+    }
+  }
+
+  test("SPARK-30524: Do not optimize skew join if introduce additional shuffle") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_SKEWED_PARTITION_SIZE_THRESHOLD.key -> "100",
+      SQLConf.SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE.key -> "700") {
+      withTempView("skewData1", "skewData2") {
+        spark
+          .range(0, 1000, 1, 10)
+          .selectExpr("id % 2 as key1", "id as value1")
+          .createOrReplaceTempView("skewData1")
+        spark
+          .range(0, 1000, 1, 10)
+          .selectExpr("id % 1 as key2", "id as value2")
+          .createOrReplaceTempView("skewData2")
+        val (_, innerAdaptivePlan) = runAdaptiveAndVerifyResult(
+          "SELECT key1 FROM skewData1 join skewData2 ON key1 = key2 group by key1")
+        // Additional shuffle introduced, so disable the "OptimizeSkewedJoin" optimization
+        val innerSmj = findTopLevelSortMergeJoin(innerAdaptivePlan)
+        assert(innerSmj.size == 1 && !innerSmj.head.isSkewJoin)
+      }
+    }
+  }
+
+  // TODO: we need a way to customize data distribution after shuffle, to improve test coverage
+  //       of this case.
+  test("SPARK-29544: adaptive skew join with different join types") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_SKEWED_PARTITION_SIZE_THRESHOLD.key -> "100",
+      SQLConf.SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE.key -> "700") {
+      withTempView("skewData1", "skewData2") {
+        spark
+          .range(0, 1000, 1, 10)
+          .selectExpr("id % 2 as key1", "id as value1")
+          .createOrReplaceTempView("skewData1")
+        spark
+          .range(0, 1000, 1, 10)
+          .selectExpr("id % 1 as key2", "id as value2")
+          .createOrReplaceTempView("skewData2")
+
+        def checkSkewJoin(joins: Seq[SortMergeJoinExec], expectedNumPartitions: Int): Unit = {
+          assert(joins.size == 1 && joins.head.isSkewJoin)
+          assert(joins.head.left.collect {
+            case r: SkewJoinShuffleReaderExec => r
+          }.head.partitionSpecs.length == expectedNumPartitions)
+          assert(joins.head.right.collect {
+            case r: SkewJoinShuffleReaderExec => r
+          }.head.partitionSpecs.length == expectedNumPartitions)
+        }
+
+        // skewed inner join optimization
+        val (_, innerAdaptivePlan) = runAdaptiveAndVerifyResult(
+          "SELECT * FROM skewData1 join skewData2 ON key1 = key2")
+        // left stats: [3496, 0, 0, 0, 4014]
+        // right stats:[6292, 0, 0, 0, 0]
+        // Partition 0: both left and right sides are skewed, and divide into 5 splits, so
+        //              5 x 5 sub-partitions.
+        // Partition 1, 2, 3: not skewed, and coalesced into 1 partition.
+        // Partition 4: only left side is skewed, and divide into 5 splits, so
+        //              5 sub-partitions.
+        // So total (25 + 1 + 5) partitions.
+        val innerSmj = findTopLevelSortMergeJoin(innerAdaptivePlan)
+        checkSkewJoin(innerSmj, 25 + 1 + 5)
+
+        // skewed left outer join optimization
+        val (_, leftAdaptivePlan) = runAdaptiveAndVerifyResult(
+          "SELECT * FROM skewData1 left outer join skewData2 ON key1 = key2")
+        // left stats: [3496, 0, 0, 0, 4014]
+        // right stats:[6292, 0, 0, 0, 0]
+        // Partition 0: both left and right sides are skewed, but left join can't split right side,
+        //              so only left side is divided into 5 splits, and thus 5 sub-partitions.
+        // Partition 1, 2, 3: not skewed, and coalesced into 1 partition.
+        // Partition 4: only left side is skewed, and divide into 5 splits, so
+        //              5 sub-partitions.
+        // So total (5 + 1 + 5) partitions.
+        val leftSmj = findTopLevelSortMergeJoin(leftAdaptivePlan)
+        checkSkewJoin(leftSmj, 5 + 1 + 5)
+
+        // skewed right outer join optimization
+        val (_, rightAdaptivePlan) = runAdaptiveAndVerifyResult(
+          "SELECT * FROM skewData1 right outer join skewData2 ON key1 = key2")
+        // left stats: [3496, 0, 0, 0, 4014]
+        // right stats:[6292, 0, 0, 0, 0]
+        // Partition 0: both left and right sides are skewed, but right join can't split left side,
+        //              so only right side is divided into 5 splits, and thus 5 sub-partitions.
+        // Partition 1, 2, 3: not skewed, and coalesced into 1 partition.
+        // Partition 4: only left side is skewed, but right join can't split left side, so just
+        //              1 partition.
+        // So total (5 + 1 + 1) partitions.
+        val rightSmj = findTopLevelSortMergeJoin(rightAdaptivePlan)
+        checkSkewJoin(rightSmj, 5 + 1 + 1)
+      }
+    }
+  }
+
+  test("SPARK-30291: AQE should catch the exceptions when doing materialize") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      withTable("bucketed_table") {
+        val df1 =
+          (0 until 50).map(i => (i % 5, i % 13, i.toString)).toDF("i", "j", "k").as("df1")
+        df1.write.format("parquet").bucketBy(8, "i").saveAsTable("bucketed_table")
+        val warehouseFilePath = new URI(spark.sessionState.conf.warehousePath).getPath
+        val tableDir = new File(warehouseFilePath, "bucketed_table")
+        Utils.deleteRecursively(tableDir)
+        df1.write.parquet(tableDir.getAbsolutePath)
+
+        val agged = spark.table("bucketed_table").groupBy("i").count()
+        val error = intercept[Exception] {
+          agged.count()
+        }
+        assert(error.getCause().toString contains "Failed to materialize query stage")
+      }
+    }
+  }
+
+  test("SPARK-30403: AQE should handle InSubquery") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      runAdaptiveAndVerifyResult("SELECT * FROM testData LEFT OUTER join testData2" +
+        " ON key = a  AND key NOT IN (select a from testData3) where value = '1'"
+      )
+    }
+  }
+
+  test("force apply AQE") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {
+      val plan = sql("SELECT * FROM testData").queryExecution.executedPlan
+      assert(plan.isInstanceOf[AdaptiveSparkPlanExec])
+    }
+  }
+
+  test("SPARK-30719: do not log warning if intentionally skip AQE") {
+    val testAppender = new LogAppender("aqe logging warning test when skip")
+    withLogAppender(testAppender) {
+      withSQLConf(
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+        val plan = sql("SELECT * FROM testData").queryExecution.executedPlan
+        assert(!plan.isInstanceOf[AdaptiveSparkPlanExec])
+      }
+    }
+    assert(!testAppender.loggingEvents
+      .exists(msg => msg.getRenderedMessage.contains(
+        s"${SQLConf.ADAPTIVE_EXECUTION_ENABLED.key} is" +
+        s" enabled but is not supported for")))
+  }
 }
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
index dc67446460877..3e47fd4289bef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
@@ -140,7 +140,7 @@ class SortBasedAggregationStoreSuite  extends SparkFunSuite with LocalSparkConte
       }
       override def getKey(): UnsafeRow = key
       override def getValue(): UnsafeRow = value
-      override def close(): Unit = Unit
+      override def close(): Unit = ()
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
index 2eb4ac52aca90..fdb23d5be78a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
@@ -1210,15 +1210,13 @@ class ArrowConvertersSuite extends SharedSparkSession {
 
   testQuietly("unsupported types") {
     def runUnsupported(block: => Unit): Unit = {
-      val msg = intercept[SparkException] {
+      val msg = intercept[UnsupportedOperationException] {
         block
       }
-      assert(msg.getMessage.contains("Unsupported data type"))
-      assert(msg.getCause.getClass === classOf[UnsupportedOperationException])
+      assert(msg.getMessage.contains("is not supported"))
     }
 
-    runUnsupported { mapData.toDF().toArrowBatchRdd.collect() }
-    runUnsupported { complexData.toArrowBatchRdd.collect() }
+    runUnsupported { calenderIntervalData.toDF().toArrowBatchRdd.collect() }
   }
 
   test("test Arrow Validator") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala
index 92506032ab2e5..bdc3b5eed7d8d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowWriterSuite.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.arrow
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.vectorized.ArrowColumnVector
+import org.apache.spark.sql.vectorized._
 import org.apache.spark.unsafe.types.UTF8String
 
 class ArrowWriterSuite extends SparkFunSuite {
@@ -267,4 +267,120 @@ class ArrowWriterSuite extends SparkFunSuite {
 
     writer.root.close()
   }
+
+  test("map") {
+    val schema = new StructType()
+      .add("map", MapType(IntegerType, StringType), nullable = true)
+    val writer = ArrowWriter.create(schema, null)
+    assert(writer.schema == schema)
+
+    writer.write(InternalRow(ArrayBasedMapData(
+      keys = Array(1, 2, 3),
+      values = Array(
+        UTF8String.fromString("v2"),
+        UTF8String.fromString("v3"),
+        UTF8String.fromString("v4")
+      )
+    )))
+    writer.write(InternalRow(ArrayBasedMapData(Array(43),
+      Array(UTF8String.fromString("v5"))
+    )))
+    writer.write(InternalRow(ArrayBasedMapData(Array(43), Array(null))))
+    writer.write(InternalRow(null))
+
+    writer.finish()
+
+    val reader = new ArrowColumnVector(writer.root.getFieldVectors.get(0))
+    val map0 = reader.getMap(0)
+    assert(map0.numElements() == 3)
+    assert(map0.keyArray().array().mkString(",") == Array(1, 2, 3).mkString(","))
+    assert(map0.valueArray().array().mkString(",") == Array("v2", "v3", "v4").mkString(","))
+
+    val map1 = reader.getMap(1)
+    assert(map1.numElements() == 1)
+    assert(map1.keyArray().array().mkString(",") == Array(43).mkString(","))
+    assert(map1.valueArray().array().mkString(",") == Array("v5").mkString(","))
+
+    val map2 = reader.getMap(2)
+    assert(map2.numElements() == 1)
+    assert(map2.keyArray().array().mkString(",") == Array(43).mkString(","))
+    assert(map2.valueArray().array().mkString(",") == Array(null).mkString(","))
+
+    val map3 = reader.getMap(3)
+    assert(map3 == null)
+    writer.root.close()
+  }
+
+  test("empty map") {
+    val schema = new StructType()
+      .add("map", MapType(IntegerType, StringType), nullable = true)
+    val writer = ArrowWriter.create(schema, null)
+    assert(writer.schema == schema)
+    writer.write(InternalRow(ArrayBasedMapData(Array(), Array())))
+    writer.finish()
+
+    val reader = new ArrowColumnVector(writer.root.getFieldVectors.get(0))
+
+    val map0 = reader.getMap(0)
+    assert(map0.numElements() == 0)
+    writer.root.close()
+  }
+
+  test("nested map") {
+    val valueSchema = new StructType()
+      .add("name", StringType)
+      .add("age", IntegerType)
+
+    val schema = new StructType()
+      .add("map",
+        MapType(
+          keyType = IntegerType,
+          valueType = valueSchema
+        ),
+        nullable = true)
+    val writer = ArrowWriter.create(schema, null)
+    assert(writer.schema == schema)
+
+    writer.write(InternalRow(
+      ArrayBasedMapData(
+        keys = Array(1),
+        values = Array(InternalRow(UTF8String.fromString("jon"), 20))
+      )))
+
+    writer.write(InternalRow(
+      ArrayBasedMapData(
+        keys = Array(1),
+        values = Array(InternalRow(UTF8String.fromString("alice"), 30))
+      )))
+
+    writer.write(InternalRow(
+      ArrayBasedMapData(
+        keys = Array(1),
+        values = Array(InternalRow(UTF8String.fromString("bob"), 40))
+      )))
+
+
+    writer.finish()
+
+    val reader = new ArrowColumnVector(writer.root.getFieldVectors.get(0))
+
+    def stringRepr(map: ColumnarMap): String = {
+      map.valueArray().getStruct(0, 2).toSeq(valueSchema).mkString(",")
+    }
+
+    val map0 = reader.getMap(0)
+    assert(map0.numElements() == 1)
+    assert(map0.keyArray().array().mkString(",") == Array(1).mkString(","))
+    assert(stringRepr(map0) == Array("jon", "20").mkString(","))
+
+    val map1 = reader.getMap(1)
+    assert(map1.numElements() == 1)
+    assert(map1.keyArray().array().mkString(",") == Array(1).mkString(","))
+    assert(stringRepr(map1) == Array("alice", "30").mkString(","))
+
+    val map2 = reader.getMap(2)
+    assert(map2.numElements() == 1)
+    assert(map2.keyArray().array().mkString(",") == Array(1).mkString(","))
+    assert(stringRepr(map2) == Array("bob", "40").mkString(","))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
index 2776bc310fefe..965d78227c335 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
@@ -48,7 +48,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
     runBenchmark("aggregate without grouping") {
       val N = 500L << 22
       codegenBenchmark("agg w/o group", N) {
-        spark.range(N).selectExpr("sum(id)").collect()
+        spark.range(N).selectExpr("sum(id)").noop()
       }
     }
 
@@ -56,11 +56,11 @@ object AggregateBenchmark extends SqlBasedBenchmark {
       val N = 100L << 20
 
       codegenBenchmark("stddev", N) {
-        spark.range(N).groupBy().agg("id" -> "stddev").collect()
+        spark.range(N).groupBy().agg("id" -> "stddev").noop()
       }
 
       codegenBenchmark("kurtosis", N) {
-        spark.range(N).groupBy().agg("id" -> "kurtosis").collect()
+        spark.range(N).groupBy().agg("id" -> "kurtosis").noop()
       }
     }
 
@@ -70,7 +70,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
       val benchmark = new Benchmark("Aggregate w keys", N, output = output)
 
       def f(): Unit = {
-        spark.range(N).selectExpr("(id & 65535) as k").groupBy("k").sum().collect()
+        spark.range(N).selectExpr("(id & 65535) as k").groupBy("k").sum().noop()
       }
 
       benchmark.addCase("codegen = F", numIters = 2) { _ =>
@@ -107,7 +107,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
       spark.range(N).selectExpr("id", "floor(rand() * 10000) as k")
         .createOrReplaceTempView("test")
 
-      def f(): Unit = spark.sql("select k, k, sum(id) from test group by k, k").collect()
+      def f(): Unit = spark.sql("select k, k, sum(id) from test group by k, k").noop()
 
       benchmark.addCase("codegen = F", numIters = 2) { _ =>
         withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
@@ -142,7 +142,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
       val benchmark = new Benchmark("Aggregate w string key", N, output = output)
 
       def f(): Unit = spark.range(N).selectExpr("id", "cast(id & 1023 as string) as k")
-        .groupBy("k").count().collect()
+        .groupBy("k").count().noop()
 
       benchmark.addCase("codegen = F", numIters = 2) { _ =>
         withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
@@ -177,7 +177,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
       val benchmark = new Benchmark("Aggregate w decimal key", N, output = output)
 
       def f(): Unit = spark.range(N).selectExpr("id", "cast(id & 65535 as decimal) as k")
-        .groupBy("k").count().collect()
+        .groupBy("k").count().noop()
 
       benchmark.addCase("codegen = F") { _ =>
         withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
@@ -222,7 +222,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
           "id > 1023 as k6")
         .groupBy("k1", "k2", "k3", "k4", "k5", "k6")
         .sum()
-        .collect()
+        .noop()
 
       benchmark.addCase("codegen = F") { _ =>
         withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
@@ -282,7 +282,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
           "case when id > 1800 and id <= 1900 then 1 else 0 end as v18")
         .groupBy("k1", "k2", "k3")
         .sum()
-        .collect()
+        .noop()
 
       benchmark.addCase("codegen = F") { _ =>
         withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
@@ -315,7 +315,7 @@ object AggregateBenchmark extends SqlBasedBenchmark {
 
       codegenBenchmark("cube", N) {
         spark.range(N).selectExpr("id", "id % 1000 as k1", "id & 256 as k2")
-          .cube("k1", "k2").sum("id").collect()
+          .cube("k1", "k2").sum("id").noop()
       }
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
index f727ebcf3fd1e..ae241b3625d02 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
@@ -70,10 +70,10 @@ object BloomFilterBenchmark extends SqlBasedBenchmark {
       runBenchmark(s"ORC Read") {
         val benchmark = new Benchmark(s"Read a row from ${scaleFactor}M rows", N, output = output)
         benchmark.addCase("Without bloom filter") { _ =>
-          spark.read.orc(path + "/withoutBF").where("value = 0").count
+          spark.read.orc(path + "/withoutBF").where("value = 0").noop()
         }
         benchmark.addCase("With bloom filter") { _ =>
-          spark.read.orc(path + "/withBF").where("value = 0").count
+          spark.read.orc(path + "/withBF").where("value = 0").noop()
         }
         benchmark.run()
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
index bd2470ee20660..a084bec985510 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
@@ -22,11 +22,10 @@ import scala.collection.JavaConverters._
 import scala.util.Random
 
 import org.apache.spark.SparkConf
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.datasources.parquet.{SpecificParquetRecordReaderBase, VectorizedParquetRecordReader}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -44,21 +43,26 @@ import org.apache.spark.sql.vectorized.ColumnVector
  *      Results will be written to "benchmarks/DataSourceReadBenchmark-results.txt".
  * }}}
  */
-object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
-  val conf = new SparkConf()
-    .setAppName("DataSourceReadBenchmark")
-    // Since `spark.master` always exists, overrides this value
-    .set("spark.master", "local[1]")
-    .setIfMissing("spark.driver.memory", "3g")
-    .setIfMissing("spark.executor.memory", "3g")
-    .setIfMissing(UI_ENABLED, false)
-
-  val spark = SparkSession.builder.config(conf).getOrCreate()
-
-  // Set default configs. Individual cases will change them if necessary.
-  spark.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
-  spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
-  spark.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
+object DataSourceReadBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = {
+    val conf = new SparkConf()
+      .setAppName("DataSourceReadBenchmark")
+      // Since `spark.master` always exists, overrides this value
+      .set("spark.master", "local[1]")
+      .setIfMissing("spark.driver.memory", "3g")
+      .setIfMissing("spark.executor.memory", "3g")
+      .setIfMissing(UI_ENABLED, false)
+
+    val sparkSession = SparkSession.builder.config(conf).getOrCreate()
+
+    // Set default configs. Individual cases will change them if necessary.
+    sparkSession.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
+    sparkSession.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
+    sparkSession.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
+
+    sparkSession
+  }
 
   def withTempTable(tableNames: String*)(f: => Unit): Unit = {
     try f finally tableNames.foreach(spark.catalog.dropTempView)
@@ -118,30 +122,30 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
         prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM t1"))
 
         sqlBenchmark.addCase("SQL CSV") { _ =>
-          spark.sql("select sum(id) from csvTable").collect()
+          spark.sql("select sum(id) from csvTable").noop()
         }
 
         sqlBenchmark.addCase("SQL Json") { _ =>
-          spark.sql("select sum(id) from jsonTable").collect()
+          spark.sql("select sum(id) from jsonTable").noop()
         }
 
         sqlBenchmark.addCase("SQL Parquet Vectorized") { _ =>
-          spark.sql("select sum(id) from parquetTable").collect()
+          spark.sql("select sum(id) from parquetTable").noop()
         }
 
         sqlBenchmark.addCase("SQL Parquet MR") { _ =>
           withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(id) from parquetTable").collect()
+            spark.sql("select sum(id) from parquetTable").noop()
           }
         }
 
         sqlBenchmark.addCase("SQL ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(id) FROM orcTable").collect()
+          spark.sql("SELECT sum(id) FROM orcTable").noop()
         }
 
         sqlBenchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(id) FROM orcTable").collect()
+            spark.sql("SELECT sum(id) FROM orcTable").noop()
           }
         }
 
@@ -234,30 +238,30 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("SELECT CAST(value AS INT) AS c1, CAST(value as STRING) AS c2 FROM t1"))
 
         benchmark.addCase("SQL CSV") { _ =>
-          spark.sql("select sum(c1), sum(length(c2)) from csvTable").collect()
+          spark.sql("select sum(c1), sum(length(c2)) from csvTable").noop()
         }
 
         benchmark.addCase("SQL Json") { _ =>
-          spark.sql("select sum(c1), sum(length(c2)) from jsonTable").collect()
+          spark.sql("select sum(c1), sum(length(c2)) from jsonTable").noop()
         }
 
         benchmark.addCase("SQL Parquet Vectorized") { _ =>
-          spark.sql("select sum(c1), sum(length(c2)) from parquetTable").collect()
+          spark.sql("select sum(c1), sum(length(c2)) from parquetTable").noop()
         }
 
         benchmark.addCase("SQL Parquet MR") { _ =>
           withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(c1), sum(length(c2)) from parquetTable").collect()
+            spark.sql("select sum(c1), sum(length(c2)) from parquetTable").noop()
           }
         }
 
         benchmark.addCase("SQL ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(c1), sum(length(c2)) FROM orcTable").collect()
+          spark.sql("SELECT sum(c1), sum(length(c2)) FROM orcTable").noop()
         }
 
         benchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(c1), sum(length(c2)) FROM orcTable").collect()
+            spark.sql("SELECT sum(c1), sum(length(c2)) FROM orcTable").noop()
           }
         }
 
@@ -279,30 +283,30 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
           spark.sql("select cast((value % 200) + 10000 as STRING) as c1 from t1"))
 
         benchmark.addCase("SQL CSV") { _ =>
-          spark.sql("select sum(length(c1)) from csvTable").collect()
+          spark.sql("select sum(length(c1)) from csvTable").noop()
         }
 
         benchmark.addCase("SQL Json") { _ =>
-          spark.sql("select sum(length(c1)) from jsonTable").collect()
+          spark.sql("select sum(length(c1)) from jsonTable").noop()
         }
 
         benchmark.addCase("SQL Parquet Vectorized") { _ =>
-          spark.sql("select sum(length(c1)) from parquetTable").collect()
+          spark.sql("select sum(length(c1)) from parquetTable").noop()
         }
 
         benchmark.addCase("SQL Parquet MR") { _ =>
           withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(length(c1)) from parquetTable").collect()
+            spark.sql("select sum(length(c1)) from parquetTable").noop()
           }
         }
 
         benchmark.addCase("SQL ORC Vectorized") { _ =>
-          spark.sql("select sum(length(c1)) from orcTable").collect()
+          spark.sql("select sum(length(c1)) from orcTable").noop()
         }
 
         benchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(length(c1)) from orcTable").collect()
+            spark.sql("select sum(length(c1)) from orcTable").noop()
           }
         }
 
@@ -322,86 +326,86 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
         prepareTable(dir, spark.sql("SELECT value % 2 AS p, value AS id FROM t1"), Some("p"))
 
         benchmark.addCase("Data column - CSV") { _ =>
-          spark.sql("select sum(id) from csvTable").collect()
+          spark.sql("select sum(id) from csvTable").noop()
         }
 
         benchmark.addCase("Data column - Json") { _ =>
-          spark.sql("select sum(id) from jsonTable").collect()
+          spark.sql("select sum(id) from jsonTable").noop()
         }
 
         benchmark.addCase("Data column - Parquet Vectorized") { _ =>
-          spark.sql("select sum(id) from parquetTable").collect()
+          spark.sql("select sum(id) from parquetTable").noop()
         }
 
         benchmark.addCase("Data column - Parquet MR") { _ =>
           withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(id) from parquetTable").collect()
+            spark.sql("select sum(id) from parquetTable").noop()
           }
         }
 
         benchmark.addCase("Data column - ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(id) FROM orcTable").collect()
+          spark.sql("SELECT sum(id) FROM orcTable").noop()
         }
 
         benchmark.addCase("Data column - ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(id) FROM orcTable").collect()
+            spark.sql("SELECT sum(id) FROM orcTable").noop()
           }
         }
 
         benchmark.addCase("Partition column - CSV") { _ =>
-          spark.sql("select sum(p) from csvTable").collect()
+          spark.sql("select sum(p) from csvTable").noop()
         }
 
         benchmark.addCase("Partition column - Json") { _ =>
-          spark.sql("select sum(p) from jsonTable").collect()
+          spark.sql("select sum(p) from jsonTable").noop()
         }
 
         benchmark.addCase("Partition column - Parquet Vectorized") { _ =>
-          spark.sql("select sum(p) from parquetTable").collect()
+          spark.sql("select sum(p) from parquetTable").noop()
         }
 
         benchmark.addCase("Partition column - Parquet MR") { _ =>
           withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(p) from parquetTable").collect()
+            spark.sql("select sum(p) from parquetTable").noop()
           }
         }
 
         benchmark.addCase("Partition column - ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(p) FROM orcTable").collect()
+          spark.sql("SELECT sum(p) FROM orcTable").noop()
         }
 
         benchmark.addCase("Partition column - ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(p) FROM orcTable").collect()
+            spark.sql("SELECT sum(p) FROM orcTable").noop()
           }
         }
 
         benchmark.addCase("Both columns - CSV") { _ =>
-          spark.sql("select sum(p), sum(id) from csvTable").collect()
+          spark.sql("select sum(p), sum(id) from csvTable").noop()
         }
 
         benchmark.addCase("Both columns - Json") { _ =>
-          spark.sql("select sum(p), sum(id) from jsonTable").collect()
+          spark.sql("select sum(p), sum(id) from jsonTable").noop()
         }
 
         benchmark.addCase("Both columns - Parquet Vectorized") { _ =>
-          spark.sql("select sum(p), sum(id) from parquetTable").collect()
+          spark.sql("select sum(p), sum(id) from parquetTable").noop()
         }
 
         benchmark.addCase("Both columns - Parquet MR") { _ =>
           withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("select sum(p), sum(id) from parquetTable").collect
+            spark.sql("select sum(p), sum(id) from parquetTable").noop()
           }
         }
 
         benchmark.addCase("Both columns - ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(p), sum(id) FROM orcTable").collect()
+          spark.sql("SELECT sum(p), sum(id) FROM orcTable").noop()
         }
 
         benchmark.addCase("Both columns - ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(p), sum(id) FROM orcTable").collect()
+            spark.sql("SELECT sum(p), sum(id) FROM orcTable").noop()
           }
         }
 
@@ -427,23 +431,23 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
 
         benchmark.addCase("SQL CSV") { _ =>
           spark.sql("select sum(length(c2)) from csvTable where c1 is " +
-            "not NULL and c2 is not NULL").collect()
+            "not NULL and c2 is not NULL").noop()
         }
 
         benchmark.addCase("SQL Json") { _ =>
           spark.sql("select sum(length(c2)) from jsonTable where c1 is " +
-            "not NULL and c2 is not NULL").collect()
+            "not NULL and c2 is not NULL").noop()
         }
 
         benchmark.addCase("SQL Parquet Vectorized") { _ =>
           spark.sql("select sum(length(c2)) from parquetTable where c1 is " +
-            "not NULL and c2 is not NULL").collect()
+            "not NULL and c2 is not NULL").noop()
         }
 
         benchmark.addCase("SQL Parquet MR") { _ =>
           withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("select sum(length(c2)) from parquetTable where c1 is " +
-              "not NULL and c2 is not NULL").collect()
+              "not NULL and c2 is not NULL").noop()
           }
         }
 
@@ -474,13 +478,13 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
 
         benchmark.addCase("SQL ORC Vectorized") { _ =>
           spark.sql("SELECT SUM(LENGTH(c2)) FROM orcTable " +
-            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop()
         }
 
         benchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT SUM(LENGTH(c2)) FROM orcTable " +
-              "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+              "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop()
           }
         }
 
@@ -506,30 +510,30 @@ object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
         prepareTable(dir, spark.sql("SELECT * FROM t1"))
 
         benchmark.addCase("SQL CSV") { _ =>
-          spark.sql(s"SELECT sum(c$middle) FROM csvTable").collect()
+          spark.sql(s"SELECT sum(c$middle) FROM csvTable").noop()
         }
 
         benchmark.addCase("SQL Json") { _ =>
-          spark.sql(s"SELECT sum(c$middle) FROM jsonTable").collect()
+          spark.sql(s"SELECT sum(c$middle) FROM jsonTable").noop()
         }
 
         benchmark.addCase("SQL Parquet Vectorized") { _ =>
-          spark.sql(s"SELECT sum(c$middle) FROM parquetTable").collect()
+          spark.sql(s"SELECT sum(c$middle) FROM parquetTable").noop()
         }
 
         benchmark.addCase("SQL Parquet MR") { _ =>
           withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql(s"SELECT sum(c$middle) FROM parquetTable").collect()
+            spark.sql(s"SELECT sum(c$middle) FROM parquetTable").noop()
           }
         }
 
         benchmark.addCase("SQL ORC Vectorized") { _ =>
-          spark.sql(s"SELECT sum(c$middle) FROM orcTable").collect()
+          spark.sql(s"SELECT sum(c$middle) FROM orcTable").noop()
         }
 
         benchmark.addCase("SQL ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql(s"SELECT sum(c$middle) FROM orcTable").collect()
+            spark.sql(s"SELECT sum(c$middle) FROM orcTable").noop()
           }
         }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index df0f87e483cdc..086583fdafe6d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.benchmark
 import java.sql.Timestamp
 
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Synthetic benchmark for date and timestamp functions.
@@ -36,7 +35,9 @@ import org.apache.spark.sql.internal.SQLConf
  */
 object DateTimeBenchmark extends SqlBasedBenchmark {
   private def doBenchmark(cardinality: Int, exprs: String*): Unit = {
-    spark.range(cardinality).selectExpr(exprs: _*).write.format("noop").save()
+    spark.range(cardinality)
+      .selectExpr(exprs: _*)
+      .noop()
   }
 
   private def run(cardinality: Int, name: String, exprs: String*): Unit = {
@@ -89,11 +90,9 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
       run(N, "from_unixtime", "from_unixtime(id, 'yyyy-MM-dd HH:mm:ss.SSSSSS')")
     }
     runBenchmark("Convert timestamps") {
-      withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
-        val timestampExpr = "cast(id as timestamp)"
-        run(N, "from_utc_timestamp", s"from_utc_timestamp($timestampExpr, 'CET')")
-        run(N, "to_utc_timestamp", s"to_utc_timestamp($timestampExpr, 'CET')")
-      }
+      val timestampExpr = "cast(id as timestamp)"
+      run(N, "from_utc_timestamp", s"from_utc_timestamp($timestampExpr, 'CET')")
+      run(N, "to_utc_timestamp", s"to_utc_timestamp($timestampExpr, 'CET')")
     }
     runBenchmark("Intervals") {
       val (start, end) = ("cast(id as timestamp)", "cast((id+8640000) as timestamp)")
@@ -132,7 +131,7 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
       benchmark.addCase("From java.sql.Timestamp", numIters) { _ =>
         spark.range(rowsNum)
           .map(millis => new Timestamp(millis))
-          .write.format("noop").save()
+          .noop()
       }
       benchmark.addCase("Collect longs", numIters) { _ =>
         spark.range(0, rowsNum, 1, 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
index dbbad43efa08c..de23132284dc8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
@@ -19,6 +19,9 @@ package org.apache.spark.sql.execution.benchmark
 
 import java.time.Instant
 
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.internal.SQLConf
+
 /**
  * Synthetic benchmark for the extract function.
  * To run this benchmark:
@@ -32,51 +35,83 @@ import java.time.Instant
  * }}}
  */
 object ExtractBenchmark extends SqlBasedBenchmark {
+
   private def doBenchmark(cardinality: Long, exprs: String*): Unit = {
     val sinceSecond = Instant.parse("2010-01-01T00:00:00Z").getEpochSecond
-    spark
-      .range(sinceSecond, sinceSecond + cardinality, 1, 1)
-      .selectExpr(exprs: _*)
-      .write
-      .format("noop")
-      .save()
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+      spark
+        .range(sinceSecond, sinceSecond + cardinality, 1, 1)
+        .selectExpr(exprs: _*)
+        .noop()
+    }
   }
 
-  private def run(cardinality: Long, name: String, exprs: String*): Unit = {
-    codegenBenchmark(name, cardinality) {
+  private def run(
+      benchmark: Benchmark,
+      cardinality: Long,
+      name: String,
+      exprs: String*): Unit = {
+    benchmark.addCase(name, numIters = 3) { _ =>
       doBenchmark(cardinality, exprs: _*)
     }
   }
 
-  private def run(cardinality: Long, field: String): Unit = {
-    codegenBenchmark(s"$field of timestamp", cardinality) {
-      doBenchmark(cardinality, s"EXTRACT($field FROM (cast(id as timestamp)))")
+  private def castExpr(from: String): String = from match {
+    case "timestamp" => "cast(id as timestamp)"
+    case "date" => "cast(cast(id as timestamp) as date)"
+    case "interval" => "(cast(cast(id as timestamp) as date) - date'0001-01-01') + " +
+      "(cast(id as timestamp) - timestamp'1000-01-01 01:02:03.123456')"
+    case other => throw new IllegalArgumentException(
+      s"Unsupported column type $other. Valid column types are 'timestamp' and 'date'")
+  }
+
+  private def run(
+      benchmark: Benchmark,
+      func: String,
+      cardinality: Long,
+      field: String,
+      from: String): Unit = {
+    val expr = func match {
+      case "extract" => s"EXTRACT($field FROM ${castExpr(from)}) AS $field"
+      case "date_part" => s"DATE_PART('$field', ${castExpr(from)}) AS $field"
+      case other => throw new IllegalArgumentException(
+        s"Unsupported function '$other'. Valid functions are 'extract' and 'date_part'.")
+    }
+    benchmark.addCase(s"$field of $from", numIters = 3) { _ =>
+      doBenchmark(cardinality, expr)
     }
   }
 
+  private case class Settings(fields: Seq[String], func: Seq[String], iterNum: Long)
+
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     val N = 10000000L
-    runBenchmark("Extract") {
-      run(N, "cast to timestamp", "cast(id as timestamp)")
-      run(N, "MILLENNIUM")
-      run(N, "CENTURY")
-      run(N, "DECADE")
-      run(N, "YEAR")
-      run(N, "ISOYEAR")
-      run(N, "QUARTER")
-      run(N, "MONTH")
-      run(N, "WEEK")
-      run(N, "DAY")
-      run(N, "DAYOFWEEK")
-      run(N, "DOW")
-      run(N, "ISODOW")
-      run(N, "DOY")
-      run(N, "HOUR")
-      run(N, "MINUTE")
-      run(N, "SECOND")
-      run(N, "MILLISECONDS")
-      run(N, "MICROSECONDS")
-      run(N, "EPOCH")
+    val datetimeFields = Seq(
+      "MILLENNIUM", "CENTURY", "DECADE", "YEAR",
+      "ISOYEAR", "QUARTER", "MONTH", "WEEK",
+      "DAY", "DAYOFWEEK", "DOW", "ISODOW",
+      "DOY", "HOUR", "MINUTE", "SECOND",
+      "MILLISECONDS", "MICROSECONDS", "EPOCH")
+    val intervalFields = Seq(
+      "MILLENNIUM", "CENTURY", "DECADE", "YEAR",
+      "QUARTER", "MONTH", "DAY",
+      "HOUR", "MINUTE", "SECOND",
+      "MILLISECONDS", "MICROSECONDS", "EPOCH")
+    val settings = Map(
+      "timestamp" -> Settings(datetimeFields, Seq("extract", "date_part"), N),
+      "date" -> Settings(datetimeFields, Seq("extract", "date_part"), N),
+      "interval" -> Settings(intervalFields, Seq("date_part"), N))
+
+    for {
+      (dataType, Settings(fields, funcs, iterNum)) <- settings
+      func <- funcs} {
+
+      val benchmark = new Benchmark(s"Invoke $func for $dataType", N, output = output)
+
+      run(benchmark, iterNum, s"cast to $dataType", castExpr(dataType))
+      fields.foreach(run(benchmark, func, iterNum, _, dataType))
+
+      benchmark.run()
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
index b040243717137..444ffa4f99697 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
@@ -22,10 +22,9 @@ import java.io.File
 import scala.util.Random
 
 import org.apache.spark.SparkConf
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.sql.{DataFrame, SparkSession}
-import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.functions.monotonically_increasing_id
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
@@ -41,17 +40,21 @@ import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, TimestampType
  *      Results will be written to "benchmarks/FilterPushdownBenchmark-results.txt".
  * }}}
  */
-object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper {
-
-  private val conf = new SparkConf()
-    .setAppName(this.getClass.getSimpleName)
-    // Since `spark.master` always exists, overrides this value
-    .set("spark.master", "local[1]")
-    .setIfMissing("spark.driver.memory", "3g")
-    .setIfMissing("spark.executor.memory", "3g")
-    .setIfMissing(UI_ENABLED, false)
-    .setIfMissing("orc.compression", "snappy")
-    .setIfMissing("spark.sql.parquet.compression.codec", "snappy")
+object FilterPushdownBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = {
+    val conf = new SparkConf()
+      .setAppName(this.getClass.getSimpleName)
+      // Since `spark.master` always exists, overrides this value
+      .set("spark.master", "local[1]")
+      .setIfMissing("spark.driver.memory", "3g")
+      .setIfMissing("spark.executor.memory", "3g")
+      .setIfMissing(UI_ENABLED, false)
+      .setIfMissing("orc.compression", "snappy")
+      .setIfMissing("spark.sql.parquet.compression.codec", "snappy")
+
+    SparkSession.builder().config(conf).getOrCreate()
+  }
 
   private val numRows = 1024 * 1024 * 15
   private val width = 5
@@ -59,8 +62,6 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper {
   // For Parquet/ORC, we will use the same value for block size and compression size
   private val blockSize = org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE
 
-  private val spark = SparkSession.builder().config(conf).getOrCreate()
-
   def withTempTable(tableNames: String*)(f: => Unit): Unit = {
     try f finally tableNames.foreach(spark.catalog.dropTempView)
   }
@@ -118,7 +119,7 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper {
       val name = s"Parquet Vectorized ${if (pushDownEnabled) s"(Pushdown)" else ""}"
       benchmark.addCase(name) { _ =>
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> s"$pushDownEnabled") {
-          spark.sql(s"SELECT $selectExpr FROM parquetTable WHERE $whereExpr").collect()
+          spark.sql(s"SELECT $selectExpr FROM parquetTable WHERE $whereExpr").noop()
         }
       }
     }
@@ -127,7 +128,7 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper {
       val name = s"Native ORC Vectorized ${if (pushDownEnabled) s"(Pushdown)" else ""}"
       benchmark.addCase(name) { _ =>
         withSQLConf(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key -> s"$pushDownEnabled") {
-          spark.sql(s"SELECT $selectExpr FROM orcTable WHERE $whereExpr").collect()
+          spark.sql(s"SELECT $selectExpr FROM orcTable WHERE $whereExpr").noop()
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
index ebe278bff7d86..f3647b3bb2631 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.benchmark
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.internal.config.MEMORY_OFFHEAP_ENABLED
@@ -71,7 +73,7 @@ object HashedRelationMetricsBenchmark extends SqlBasedBenchmark {
           thread.start()
           thread
         }
-        threads.map(_.join())
+        threads.foreach(_.join())
         map.free()
       }
       benchmark.run()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
index 611f582b66605..caf3387875813 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
@@ -167,7 +167,7 @@ object InExpressionBenchmark extends SqlBasedBenchmark {
 
     def testClosure(): Unit = {
       val df = spark.sql(s"SELECT * FROM t WHERE id IN (${values.mkString(",")})")
-      df.queryExecution.toRdd.foreach(_ => Unit)
+      df.noop()
     }
 
     benchmark.addCase("In expression") { _ =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala
new file mode 100644
index 0000000000000..94e763459a111
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import scala.collection.mutable.ListBuffer
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Synthetic benchmark for interval functions.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/IntervalBenchmark-results.txt".
+ * }}}
+ */
+object IntervalBenchmark extends SqlBasedBenchmark {
+  import spark.implicits._
+
+  private def doBenchmark(cardinality: Long, exprs: Column*): Unit = {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+      spark
+        .range(0, cardinality, 1, 1)
+        .select(exprs: _*)
+        .noop()
+    }
+  }
+
+  private def addCase(
+      benchmark: Benchmark,
+      cardinality: Long,
+      name: String,
+      exprs: Column*): Unit = {
+    benchmark.addCase(name, numIters = 3) { _ =>
+      doBenchmark(cardinality, exprs: _*)
+    }
+  }
+
+  private def buildString(withPrefix: Boolean, units: Seq[String] = Seq.empty): Column = {
+    val init = lit(if (withPrefix) "interval" else "") ::
+      ($"id" % 10000).cast("string") ::
+      lit("years") :: Nil
+
+    concat_ws(" ", (init ++ units.map(lit)): _*)
+  }
+
+  private def addCase(benchmark: Benchmark, cardinality: Long, units: Seq[String]): Unit = {
+    Seq(true, false).foreach { withPrefix =>
+      val expr = buildString(withPrefix, units).cast("interval")
+      val note = if (withPrefix) "w/ interval" else "w/o interval"
+      benchmark.addCase(s"${units.length + 1} units $note", numIters = 3) { _ =>
+        doBenchmark(cardinality, expr)
+      }
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val N = 1000000
+    val timeUnits = Seq(
+      "13 months", "                      1                     months",
+      "100 weeks", "9 days", "12 hours", "-                    3 hours",
+      "5 minutes", "45 seconds", "123 milliseconds", "567 microseconds")
+    val intervalToTest = ListBuffer[String]()
+
+    val benchmark = new Benchmark("cast strings to intervals", N, output = output)
+    // The first 2 cases are used to show the overhead of preparing the interval string.
+    addCase(benchmark, N, "prepare string w/ interval", buildString(true, timeUnits))
+    addCase(benchmark, N, "prepare string w/o interval", buildString(false, timeUnits))
+    addCase(benchmark, N, intervalToTest) // Only years
+
+    for (unit <- timeUnits) {
+      intervalToTest.append(unit)
+      addCase(benchmark, N, intervalToTest)
+    }
+
+    benchmark.run()
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala
index ad81711a13947..1cc92892fe122 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.benchmark
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -44,7 +46,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     codegenBenchmark("Join w long", N) {
       val df = spark.range(N).join(dim, (col("id") % M) === col("k"))
       assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
-      df.count()
+      df.noop()
     }
   }
 
@@ -55,7 +57,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     codegenBenchmark("Join w long duplicated", N) {
       val df = spark.range(N).join(dim, (col("id") % M) === col("k"))
       assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
-      df.count()
+      df.noop()
     }
   }
 
@@ -70,7 +72,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
         (col("id") % M).cast(IntegerType) === col("k1")
           && (col("id") % M).cast(IntegerType) === col("k2"))
       assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
-      df.count()
+      df.noop()
     }
   }
 
@@ -84,7 +86,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
       val df = spark.range(N).join(dim3,
         (col("id") % M) === col("k1") && (col("id") % M) === col("k2"))
       assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
-      df.count()
+      df.noop()
     }
   }
 
@@ -98,7 +100,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
       val df = spark.range(N).join(dim4,
         (col("id") bitwiseAND M) === col("k1") && (col("id") bitwiseAND M) === col("k2"))
       assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
-      df.count()
+      df.noop()
     }
   }
 
@@ -109,7 +111,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     codegenBenchmark("outer join w long", N) {
       val df = spark.range(N).join(dim, (col("id") % M) === col("k"), "left")
       assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
-      df.count()
+      df.noop()
     }
   }
 
@@ -120,7 +122,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
     codegenBenchmark("semi join w long", N) {
       val df = spark.range(N).join(dim, (col("id") % M) === col("k"), "leftsemi")
       assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
-      df.count()
+      df.noop()
     }
   }
 
@@ -131,7 +133,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
       val df2 = spark.range(N).selectExpr(s"id * 3 as k2")
       val df = df1.join(df2, col("k1") === col("k2"))
       assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[SortMergeJoinExec]).isDefined)
-      df.count()
+      df.noop()
     }
   }
 
@@ -144,7 +146,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
         .selectExpr(s"(id * 15485867) % ${N*10} as k2")
       val df = df1.join(df2, col("k1") === col("k2"))
       assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[SortMergeJoinExec]).isDefined)
-      df.count()
+      df.noop()
     }
   }
 
@@ -159,7 +161,7 @@ object JoinBenchmark extends SqlBasedBenchmark {
         val df2 = spark.range(N / 3).selectExpr(s"id * 3 as k2")
         val df = df1.join(df2, col("k1") === col("k2"))
         assert(df.queryExecution.sparkPlan.find(_.isInstanceOf[ShuffledHashJoinExec]).isDefined)
-        df.count()
+        df.noop()
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala
new file mode 100644
index 0000000000000..c92098c93aa1e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Synthetic benchmark for the make_date() and make_timestamp() functions.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/MakeDateTimeBenchmark-results.txt".
+ * }}}
+ */
+object MakeDateTimeBenchmark extends SqlBasedBenchmark {
+
+  private def doBenchmark(cardinality: Long, exprs: String*): Unit = {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+      spark
+        .range(0, cardinality, 1, 1)
+        .selectExpr(exprs: _*)
+        .noop()
+    }
+  }
+
+  private def run(benchmark: Benchmark, cardinality: Long, name: String, exprs: String*): Unit = {
+    benchmark.addCase(name, numIters = 3) { _ => doBenchmark(cardinality, exprs: _*) }
+  }
+
+  private val ymdExprs = Seq("(2000 + (id % 30))", "((id % 12) + 1)", "((id % 27) + 1)")
+
+  private def benchmarkMakeDate(cardinality: Long): Unit = {
+    val benchmark = new Benchmark("make_date()", cardinality, output = output)
+    val args = ymdExprs
+
+    run(benchmark, cardinality, "prepare make_date()", args: _*)
+    val foldableExpr = "make_date(2019, 9, 16)"
+    run(benchmark, cardinality, foldableExpr, foldableExpr)
+    run(
+      benchmark,
+      cardinality,
+      "make_date(*, *, *)",
+      "make_date" + args.mkString("(", ",", ")"))
+
+    benchmark.run()
+  }
+
+  private def benchmarkMakeTimestamp(cardinality: Long): Unit = {
+    val benchmark = new Benchmark("make_timestamp()", cardinality, output = output)
+    val hmExprs = Seq("id % 24", "id % 60")
+    val hmsExprs = hmExprs ++ Seq("cast((id % 60000000) / 1000000.0 as decimal(8, 6))")
+    val args = ymdExprs ++ hmsExprs
+
+    run(
+      benchmark,
+      cardinality,
+      "prepare make_timestamp()",
+      args: _*)
+    var foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 50.123456)"
+    run(benchmark, cardinality, foldableExpr, foldableExpr)
+    foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 60.000000)"
+    run(benchmark, cardinality, foldableExpr, foldableExpr)
+    foldableExpr = "make_timestamp(2019, 12, 31, 23, 59, 60.00)"
+    run(benchmark, cardinality, foldableExpr, foldableExpr)
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(*, *, *, 3, 4, 50.123456)",
+      s"make_timestamp(${ymdExprs.mkString(",")}, 3, 4, 50.123456)")
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(*, *, *, *, *, 0)",
+      s"make_timestamp(" + (ymdExprs ++ hmExprs).mkString(", ") + ", 0)")
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(*, *, *, *, *, 60.0)",
+      s"make_timestamp(" + (ymdExprs ++ hmExprs).mkString(", ") + ", 60.0)")
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(2019, 1, 2, *, *, *)",
+      s"make_timestamp(2019, 1, 2, ${hmsExprs.mkString(",")})")
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(*, *, *, *, *, *)",
+      s"make_timestamp" + args.mkString("(", ", ", ")"))
+
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    benchmarkMakeDate(100000000L)
+    benchmarkMakeTimestamp(1000000L)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala
index bafc0337bdc0e..2aecf553d75a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala
@@ -35,7 +35,7 @@ object MiscBenchmark extends SqlBasedBenchmark {
   def filterAndAggregateWithoutGroup(numRows: Long): Unit = {
     runBenchmark("filter & aggregate without group") {
       codegenBenchmark("range/filter/sum", numRows) {
-        spark.range(numRows).filter("(id & 1) = 1").groupBy().sum().collect()
+        spark.range(numRows).filter("(id & 1) = 1").groupBy().sum().noop()
       }
     }
   }
@@ -43,7 +43,7 @@ object MiscBenchmark extends SqlBasedBenchmark {
   def limitAndAggregateWithoutGroup(numRows: Long): Unit = {
     runBenchmark("range/limit/sum") {
       codegenBenchmark("range/limit/sum", numRows) {
-        spark.range(numRows).limit(1000000).groupBy().sum().collect()
+        spark.range(numRows).limit(1000000).groupBy().sum().noop()
       }
     }
   }
@@ -51,11 +51,11 @@ object MiscBenchmark extends SqlBasedBenchmark {
   def sample(numRows: Int): Unit = {
     runBenchmark("sample") {
       codegenBenchmark("sample with replacement", numRows) {
-        spark.range(numRows).sample(withReplacement = true, 0.01).groupBy().sum().collect()
+        spark.range(numRows).sample(withReplacement = true, 0.01).groupBy().sum().noop()
       }
 
       codegenBenchmark("sample without replacement", numRows) {
-        spark.range(numRows).sample(withReplacement = false, 0.01).groupBy().sum().collect()
+        spark.range(numRows).sample(withReplacement = false, 0.01).groupBy().sum().noop()
       }
     }
   }
@@ -95,28 +95,28 @@ object MiscBenchmark extends SqlBasedBenchmark {
         val df = spark.range(numRows).selectExpr(
           "id as key",
           "array(rand(), rand(), rand(), rand(), rand()) as values")
-        df.selectExpr("key", "explode(values) value").count()
+        df.selectExpr("key", "explode(values) value").noop()
       }
 
       codegenBenchmark("generate explode map", numRows) {
         val df = spark.range(numRows).selectExpr(
           "id as key",
           "map('a', rand(), 'b', rand(), 'c', rand(), 'd', rand(), 'e', rand()) pairs")
-        df.selectExpr("key", "explode(pairs) as (k, v)").count()
+        df.selectExpr("key", "explode(pairs) as (k, v)").noop()
       }
 
       codegenBenchmark("generate posexplode array", numRows) {
         val df = spark.range(numRows).selectExpr(
           "id as key",
           "array(rand(), rand(), rand(), rand(), rand()) as values")
-        df.selectExpr("key", "posexplode(values) as (idx, value)").count()
+        df.selectExpr("key", "posexplode(values) as (idx, value)").noop()
       }
 
       codegenBenchmark("generate inline array", numRows) {
         val df = spark.range(numRows).selectExpr(
           "id as key",
           "array((rand(), rand()), (rand(), rand()), (rand(), 0.0d)) as values")
-        df.selectExpr("key", "inline(values) as (r1, r2)").count()
+        df.selectExpr("key", "inline(values) as (r1, r2)").noop()
       }
 
       val M = 60000
@@ -129,7 +129,7 @@ object MiscBenchmark extends SqlBasedBenchmark {
           })))).toDF("col", "arr")
 
         df.selectExpr("*", "explode(arr) as arr_col")
-          .select("col", "arr_col.*").count
+          .select("col", "arr_col.*").noop()
       }
 
       withSQLConf(SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> "true") {
@@ -142,7 +142,7 @@ object MiscBenchmark extends SqlBasedBenchmark {
             })))).toDF("col", "arr")
             .selectExpr("col", "struct(col, arr) as st")
             .selectExpr("col", "st.col as col1", "explode(st.arr) as arr_col")
-          df.collect()
+          df.noop()
         }
       }
     }
@@ -158,7 +158,7 @@ object MiscBenchmark extends SqlBasedBenchmark {
           "id % 5 as t3",
           "id % 7 as t4",
           "id % 13 as t5")
-        df.selectExpr("key", "stack(4, t1, t2, t3, t4, t5)").count()
+        df.selectExpr("key", "stack(4, t1, t2, t3, t4, t5)").noop()
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
index 96f90f29707d2..90fad7f36b862 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
@@ -35,7 +35,7 @@ abstract class NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
 
   // We use `col1 BIGINT, col2 STRUCT<_1: BIGINT, _2: STRING>,
   // col3 ARRAY<STRUCT<_1: BIGINT, _2: STRING>>` as a test schema.
-  // col1, col2._1 and col3._1 are used for comparision. col2._2 and col3._2 mimics the burden
+  // col1, col2._1 and col3._1 are used for comparison. col2._2 and col3._2 mimics the burden
   // for the other columns
   private val df = spark
     .range(N * 10)
@@ -47,7 +47,7 @@ abstract class NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
 
   private def addCase(benchmark: Benchmark, name: String, sql: String): Unit = {
     benchmark.addCase(name) { _ =>
-      spark.sql(sql).write.format("noop").save()
+      spark.sql(sql).noop()
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
index 8b1c422e63a3f..e07921bf3aa74 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.benchmark
 
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.SparkSession
 
 /**
@@ -28,13 +28,16 @@ import org.apache.spark.sql.SparkSession
  * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
  *    Results will be written to "benchmarks/PrimitiveArrayBenchmark-results.txt".
  */
-object PrimitiveArrayBenchmark extends BenchmarkBase {
-  lazy val sparkSession = SparkSession.builder
-    .master("local[1]")
-    .appName("microbenchmark")
-    .config("spark.sql.shuffle.partitions", 1)
-    .config("spark.sql.autoBroadcastJoinThreshold", 1)
-    .getOrCreate()
+object PrimitiveArrayBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = {
+    SparkSession.builder
+      .master("local[1]")
+      .appName("microbenchmark")
+      .config("spark.sql.shuffle.partitions", 1)
+      .config("spark.sql.autoBroadcastJoinThreshold", 1)
+      .getOrCreate()
+  }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     runBenchmark("Write primitive arrays in dataset") {
@@ -43,11 +46,11 @@ object PrimitiveArrayBenchmark extends BenchmarkBase {
   }
 
   def writeDatasetArray(iters: Int): Unit = {
-    import sparkSession.implicits._
+    import spark.implicits._
 
     val count = 1024 * 1024 * 2
 
-    val sc = sparkSession.sparkContext
+    val sc = spark.sparkContext
     val primitiveIntArray = Array.fill[Int](count)(65535)
     val dsInt = sc.parallelize(Seq(primitiveIntArray), 1).toDS
     dsInt.count  // force to build dataset
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala
index a9f873f9094ba..e566f5d5adee6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala
@@ -40,15 +40,15 @@ object RangeBenchmark extends SqlBasedBenchmark {
       val benchmark = new Benchmark("range", N, output = output)
 
       benchmark.addCase("full scan", numIters = 4) { _ =>
-        spark.range(N).queryExecution.toRdd.foreach(_ => ())
+        spark.range(N).noop()
       }
 
       benchmark.addCase("limit after range", numIters = 4) { _ =>
-        spark.range(N).limit(100).queryExecution.toRdd.foreach(_ => ())
+        spark.range(N).limit(100).noop()
       }
 
       benchmark.addCase("filter after range", numIters = 4) { _ =>
-        spark.range(N).filter('id % 100 === 0).queryExecution.toRdd.foreach(_ => ())
+        spark.range(N).filter('id % 100 === 0).noop()
       }
 
       benchmark.addCase("count after range", numIters = 4) { _ =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
index 3760539c16841..2c9e8a909633c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
@@ -38,7 +38,8 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 object SortBenchmark extends BenchmarkBase {
 
-  private def referenceKeyPrefixSort(buf: LongArray, lo: Int, hi: Int, refCmp: PrefixComparator) {
+  private def referenceKeyPrefixSort(buf: LongArray, lo: Int, hi: Int,
+      refCmp: PrefixComparator): Unit = {
     val sortBuffer = new LongArray(MemoryBlock.fromLongArray(new Array[Long](buf.size().toInt)))
     new Sorter(new UnsafeSortDataFormat(sortBuffer)).sort(buf, lo, hi,
       (r1: RecordPointerAndKeyPrefix, r2: RecordPointerAndKeyPrefix) =>
@@ -47,7 +48,7 @@ object SortBenchmark extends BenchmarkBase {
 
   private def generateKeyPrefixTestData(size: Int, rand: => Long): (LongArray, LongArray) = {
     val ref = Array.tabulate[Long](size * 2) { i => rand }
-    val extended = ref ++ Array.fill[Long](size * 2)(0)
+    val extended = ref ++ Array.ofDim[Long](size * 2)
     (new LongArray(MemoryBlock.fromLongArray(ref)),
       new LongArray(MemoryBlock.fromLongArray(extended)))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
index e95e5a960246b..ee7a03e5e0542 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{Dataset, SparkSession}
+import org.apache.spark.sql.SaveMode.Overwrite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.internal.SQLConf
 
@@ -57,4 +58,10 @@ trait SqlBasedBenchmark extends BenchmarkBase with SQLHelper {
 
     benchmark.run()
   }
+
+  implicit class DatasetToBenchmark(ds: Dataset[_]) {
+    def noop(): Unit = {
+      ds.write.format("noop").mode(Overwrite).save()
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index 93006d05b75bc..ad3d79760adf0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -29,11 +29,19 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation
 /**
  * Benchmark to measure TPCDS query performance.
  * To run this:
- *  spark-submit --class <this class> <spark sql test jar> --data-location <TPCDS data location>
+ * {{{
+ *   1. without sbt:
+ *        bin/spark-submit --class <this class> <spark sql test jar> --data-location <location>
+ *   2. build/sbt "sql/test:runMain <this class> --data-location <TPCDS data location>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt
+ *        "sql/test:runMain <this class> --data-location <location>"
+ *      Results will be written to "benchmarks/TPCDSQueryBenchmark-results.txt".
+ * }}}
  */
-object TPCDSQueryBenchmark extends Logging {
-  val conf =
-    new SparkConf()
+object TPCDSQueryBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = {
+    val conf = new SparkConf()
       .setMaster("local[1]")
       .setAppName("test-sql-context")
       .set("spark.sql.parquet.compression.codec", "snappy")
@@ -43,7 +51,8 @@ object TPCDSQueryBenchmark extends Logging {
       .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
       .set("spark.sql.crossJoin.enabled", "true")
 
-  val spark = SparkSession.builder.config(conf).getOrCreate()
+    SparkSession.builder.config(conf).getOrCreate()
+  }
 
   val tables = Seq("catalog_page", "catalog_returns", "customer", "customer_address",
     "customer_demographics", "date_dim", "household_demographics", "inventory", "item",
@@ -72,21 +81,19 @@ object TPCDSQueryBenchmark extends Logging {
       val queryRelations = scala.collection.mutable.HashSet[String]()
       spark.sql(queryString).queryExecution.analyzed.foreach {
         case SubqueryAlias(alias, _: LogicalRelation) =>
-          queryRelations.add(alias.identifier)
+          queryRelations.add(alias.name)
         case LogicalRelation(_, _, Some(catalogTable), _) =>
           queryRelations.add(catalogTable.identifier.table)
-        case HiveTableRelation(tableMeta, _, _, _) =>
+        case HiveTableRelation(tableMeta, _, _, _, _) =>
           queryRelations.add(tableMeta.identifier.table)
         case _ =>
       }
       val numRows = queryRelations.map(tableSizes.getOrElse(_, 0L)).sum
-      val benchmark = new Benchmark(s"TPCDS Snappy", numRows, 5)
+      val benchmark = new Benchmark(s"TPCDS Snappy", numRows, 2, output = output)
       benchmark.addCase(s"$name$nameSuffix") { _ =>
-        spark.sql(queryString).collect()
+        spark.sql(queryString).noop()
       }
-      logInfo(s"\n\n===== TPCDS QUERY BENCHMARK OUTPUT FOR $name =====\n")
       benchmark.run()
-      logInfo(s"\n\n===== FINISHED $name =====\n")
     }
   }
 
@@ -100,8 +107,8 @@ object TPCDSQueryBenchmark extends Logging {
     }
   }
 
-  def main(args: Array[String]): Unit = {
-    val benchmarkArgs = new TPCDSQueryBenchmarkArguments(args)
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val benchmarkArgs = new TPCDSQueryBenchmarkArguments(mainArgs)
 
     // List of all TPC-DS v1.4 queries
     val tpcdsQueries = Seq(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala
index 9cbd6423f667f..ee8a6e787c36c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala
@@ -42,8 +42,9 @@ object UDFBenchmark extends SqlBasedBenchmark {
     val nullableIntCol = when(
       idCol % 2 === 0, idCol.cast(IntegerType)).otherwise(Literal(null, IntegerType))
     val stringCol = idCol.cast(StringType)
-    spark.range(cardinality).select(
-      udf(idCol, nullableIntCol, stringCol)).write.format("noop").save()
+    spark.range(cardinality)
+      .select(udf(idCol, nullableIntCol, stringCol))
+      .noop()
   }
 
   private def doRunBenchmarkWithPrimitiveTypes(
@@ -51,7 +52,9 @@ object UDFBenchmark extends SqlBasedBenchmark {
     val idCol = col("id")
     val nullableIntCol = when(
       idCol % 2 === 0, idCol.cast(IntegerType)).otherwise(Literal(null, IntegerType))
-    spark.range(cardinality).select(udf(idCol, nullableIntCol)).write.format("noop").save()
+    spark.range(cardinality)
+      .select(udf(idCol, nullableIntCol))
+      .noop()
   }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
@@ -104,16 +107,19 @@ object UDFBenchmark extends SqlBasedBenchmark {
       val benchmark = new Benchmark("UDF identity overhead", cardinality, output = output)
 
       benchmark.addCase(s"Baseline", numIters = 5) { _ =>
-        spark.range(cardinality).select(
-          col("id"), col("id") * 2, col("id") * 3).write.format("noop").save()
+        spark.range(cardinality)
+          .select(col("id"), col("id") * 2, col("id") * 3)
+          .noop()
       }
 
       val identityUDF = udf { x: Long => x }
       benchmark.addCase(s"With identity UDF", numIters = 5) { _ =>
-        spark.range(cardinality).select(
-          identityUDF(col("id")),
-          identityUDF(col("id") * 2),
-          identityUDF(col("id") * 3)).write.format("noop").save()
+        spark.range(cardinality)
+          .select(
+            identityUDF(col("id")),
+            identityUDF(col("id") * 2),
+            identityUDF(col("id") * 3))
+          .noop()
       }
 
       benchmark.run()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala
index f4642e7d353e6..77dc3a10f8033 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.benchmark
 
 import java.io.File
 
+import org.scalatest.Assertions._
+
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.util.Utils
@@ -68,14 +70,14 @@ object WideSchemaBenchmark extends SqlBasedBenchmark {
       desc: String,
       selector: String): Unit = {
     benchmark.addCase(desc + " (read in-mem)") { iter =>
-      df.selectExpr(s"sum($selector)").collect()
+      df.selectExpr(s"sum($selector)").noop()
     }
     benchmark.addCase(desc + " (exec in-mem)") { iter =>
-      df.selectExpr("*", s"hash($selector) as f").selectExpr(s"sum($selector)", "sum(f)").collect()
+      df.selectExpr("*", s"hash($selector) as f").selectExpr(s"sum($selector)", "sum(f)").noop()
     }
     val parquet = saveAsParquet(df)
     benchmark.addCase(desc + " (read parquet)") { iter =>
-      parquet.selectExpr(s"sum($selector) as f").collect()
+      parquet.selectExpr(s"sum($selector) as f").noop()
     }
     benchmark.addCase(desc + " (write parquet)") { iter =>
       saveAsParquet(df.selectExpr(s"sum($selector) as f"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
index 52426d81bd1a7..ba79c12c461c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
@@ -42,7 +42,7 @@ object WideTableBenchmark extends SqlBasedBenchmark {
       Seq("10", "100", "1024", "2048", "4096", "8192", "65536").foreach { n =>
         benchmark.addCase(s"split threshold $n", numIters = 5) { iter =>
           withSQLConf(SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> n) {
-            df.selectExpr(columns: _*).foreach(_ => ())
+            df.selectExpr(columns: _*).noop()
           }
         }
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
index 3121b7e99c99d..847e0ec4f3195 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.columnar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 
 class ColumnStatsSuite extends SparkFunSuite {
   testColumnStats(classOf[BooleanColumnStats], BOOLEAN, Array(true, false, 0))
@@ -30,6 +31,7 @@ class ColumnStatsSuite extends SparkFunSuite {
   testColumnStats(classOf[DoubleColumnStats], DOUBLE, Array(Double.MaxValue, Double.MinValue, 0))
   testColumnStats(classOf[StringColumnStats], STRING, Array(null, null, 0))
   testDecimalColumnStats(Array(null, null, 0))
+  testIntervalColumnStats(Array(null, null, 0))
 
   def testColumnStats[T <: AtomicType, U <: ColumnStats](
       columnStatsClass: Class[U],
@@ -103,4 +105,36 @@ class ColumnStatsSuite extends SparkFunSuite {
       }
     }
   }
+
+  def testIntervalColumnStats[T <: AtomicType, U <: ColumnStats](
+      initialStatistics: Array[Any]): Unit = {
+
+    val columnStatsName = classOf[IntervalColumnStats].getSimpleName
+    val columnType = CALENDAR_INTERVAL
+
+    test(s"$columnStatsName: empty") {
+      val columnStats = new IntervalColumnStats
+      columnStats.collectedStatistics.zip(initialStatistics).foreach {
+        case (actual, expected) => assert(actual === expected)
+      }
+    }
+
+    test(s"$columnStatsName: non-empty") {
+      import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
+
+      val columnStats = new IntervalColumnStats
+      val rows = Seq.fill(10)(makeRandomRow(columnType)) ++ Seq.fill(10)(makeNullRow(1))
+      rows.foreach(columnStats.gatherStats(_, 0))
+
+      val stats = columnStats.collectedStatistics
+
+      assertResult(10, "Wrong null count")(stats(2))
+      assertResult(20, "Wrong row count")(stats(3))
+      assertResult(stats(4), "Wrong size in bytes") {
+        rows.map { row =>
+          if (row.isNullAt(0)) 4 else columnType.actualSize(row, 0)
+        }.sum
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
index ff05049551dc8..b25aa6e308657 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection}
 import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 
 class ColumnTypeSuite extends SparkFunSuite with Logging {
   private val DEFAULT_BUFFER_SIZE = 512
@@ -38,7 +39,8 @@ class ColumnTypeSuite extends SparkFunSuite with Logging {
     val checks = Map(
       NULL -> 0, BOOLEAN -> 1, BYTE -> 1, SHORT -> 2, INT -> 4, LONG -> 8,
       FLOAT -> 4, DOUBLE -> 8, COMPACT_DECIMAL(15, 10) -> 8, LARGE_DECIMAL(20, 10) -> 12,
-      STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 28, MAP_TYPE -> 68)
+      STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 28, MAP_TYPE -> 68,
+      CALENDAR_INTERVAL -> 16)
 
     checks.foreach { case (columnType, expectedSize) =>
       assertResult(expectedSize, s"Wrong defaultSize for $columnType") {
@@ -76,6 +78,7 @@ class ColumnTypeSuite extends SparkFunSuite with Logging {
     checkActualSize(ARRAY_TYPE, Array[Any](1), 4 + 8 + 8 + 8)
     checkActualSize(MAP_TYPE, Map(1 -> "a"), 4 + (8 + 8 + 8 + 8) + (8 + 8 + 8 + 8))
     checkActualSize(STRUCT_TYPE, Row("hello"), 28)
+    checkActualSize(CALENDAR_INTERVAL, new CalendarInterval(0, 0, 0), 4 + 4 + 8)
   }
 
   testNativeColumnType(BOOLEAN)
@@ -94,6 +97,7 @@ class ColumnTypeSuite extends SparkFunSuite with Logging {
   testColumnType(STRUCT_TYPE)
   testColumnType(ARRAY_TYPE)
   testColumnType(MAP_TYPE)
+  testColumnType(CALENDAR_INTERVAL)
 
   def testNativeColumnType[T <: AtomicType](columnType: NativeColumnType[T]): Unit = {
     testColumnType[T#InternalType](columnType)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala
index 686c8fa6f5fa9..fee3329030e66 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
 import org.apache.spark.sql.types.{AtomicType, Decimal}
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 object ColumnarTestUtils {
   def makeNullRow(length: Int): GenericInternalRow = {
@@ -51,6 +51,8 @@ object ColumnarTestUtils {
       case DOUBLE => Random.nextDouble()
       case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
       case BINARY => randomBytes(Random.nextInt(32))
+      case CALENDAR_INTERVAL =>
+        new CalendarInterval(Random.nextInt(), Random.nextInt(), Random.nextLong())
       case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
       case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
       case STRUCT(_) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 0fac4dd3e5137..77047f329e105 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -38,7 +38,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession {
 
   setupTestData()
 
-  private def cachePrimitiveTest(data: DataFrame, dataType: String) {
+  private def cachePrimitiveTest(data: DataFrame, dataType: String): Unit = {
     data.createOrReplaceTempView(s"testData$dataType")
     val storageLevel = MEMORY_ONLY
     val plan = spark.sessionState.executePlan(data.logicalPlan).sparkPlan
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala
index 8f4ca3cea77a5..92d9d84d9fac6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala
@@ -44,7 +44,8 @@ class NullableColumnAccessorSuite extends SparkFunSuite {
     NULL, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE,
     STRING, BINARY, COMPACT_DECIMAL(15, 10), LARGE_DECIMAL(20, 10),
     STRUCT(StructType(StructField("a", StringType) :: Nil)),
-    ARRAY(ArrayType(IntegerType)), MAP(MapType(IntegerType, StringType)))
+    ARRAY(ArrayType(IntegerType)), MAP(MapType(IntegerType, StringType)),
+    CALENDAR_INTERVAL)
     .foreach {
     testNullableColumnAccessor(_)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala
index b2b6e92e9a056..7e295b4dc31c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala
@@ -42,7 +42,8 @@ class NullableColumnBuilderSuite extends SparkFunSuite {
     BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE,
     STRING, BINARY, COMPACT_DECIMAL(15, 10), LARGE_DECIMAL(20, 10),
     STRUCT(StructType(StructField("a", StringType) :: Nil)),
-    ARRAY(ArrayType(IntegerType)), MAP(MapType(IntegerType, StringType)))
+    ARRAY(ArrayType(IntegerType)), MAP(MapType(IntegerType, StringType)),
+    CALENDAR_INTERVAL)
     .foreach {
     testNullableColumnBuilder(_)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala
index 2d71a42628dfb..192db0e910d03 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.types.BooleanType
 class BooleanBitSetSuite extends SparkFunSuite {
   import BooleanBitSet._
 
-  def skeleton(count: Int) {
+  def skeleton(count: Int): Unit = {
     // -------------
     // Tests encoder
     // -------------
@@ -87,7 +87,7 @@ class BooleanBitSetSuite extends SparkFunSuite {
     assert(!decoder.hasNext)
   }
 
-  def skeletonForDecompress(count: Int) {
+  def skeletonForDecompress(count: Int): Unit = {
     val builder = TestCompressibleColumnBuilder(new NoopColumnStats, BOOLEAN, BooleanBitSet)
     val rows = Seq.fill[InternalRow](count)(makeRandomRow(BOOLEAN))
     val values = rows.map(_.getBoolean(0))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
index 8ea20f28a37b2..fcb18392235c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
@@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets
 
 import org.apache.commons.lang3.RandomStringUtils
 import org.apache.commons.math3.distribution.LogNormalDistribution
+import org.scalatest.Assertions._
 
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala
index 28950b74cf1c8..61e4cc068fa80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala
@@ -35,7 +35,7 @@ class DictionaryEncodingSuite extends SparkFunSuite {
   def testDictionaryEncoding[T <: AtomicType](
       columnStats: ColumnStats,
       columnType: NativeColumnType[T],
-      testDecompress: Boolean = true) {
+      testDecompress: Boolean = true): Unit = {
 
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
 
@@ -49,7 +49,7 @@ class DictionaryEncodingSuite extends SparkFunSuite {
       seq.head +: seq.tail.filterNot(_ == seq.head)
     }
 
-    def skeleton(uniqueValueCount: Int, inputSeq: Seq[Int]) {
+    def skeleton(uniqueValueCount: Int, inputSeq: Seq[Int]): Unit = {
       // -------------
       // Tests encoder
       // -------------
@@ -116,7 +116,7 @@ class DictionaryEncodingSuite extends SparkFunSuite {
       }
     }
 
-    def skeletonForDecompress(uniqueValueCount: Int, inputSeq: Seq[Int]) {
+    def skeletonForDecompress(uniqueValueCount: Int, inputSeq: Seq[Int]): Unit = {
       if (!testDecompress) return
       val builder = TestCompressibleColumnBuilder(columnStats, columnType, DictionaryEncoding)
       val (values, rows) = makeUniqueValuesAndSingleValueRows(columnType, uniqueValueCount)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala
index fb3388452e4e5..b5630488b3667 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala
@@ -32,9 +32,9 @@ class IntegralDeltaSuite extends SparkFunSuite {
   def testIntegralDelta[I <: IntegralType](
       columnStats: ColumnStats,
       columnType: NativeColumnType[I],
-      scheme: CompressionScheme) {
+      scheme: CompressionScheme): Unit = {
 
-    def skeleton(input: Seq[I#InternalType]) {
+    def skeleton(input: Seq[Any]): Unit = {
       // -------------
       // Tests encoder
       // -------------
@@ -52,7 +52,7 @@ class IntegralDeltaSuite extends SparkFunSuite {
 
       input.foreach { value =>
         val row = new GenericInternalRow(1)
-        columnType.setField(row, 0, value)
+        columnType.setField(row, 0, value.asInstanceOf[I#InternalType])
         builder.appendFrom(row, 0)
       }
 
@@ -112,7 +112,7 @@ class IntegralDeltaSuite extends SparkFunSuite {
       assert(!decoder.hasNext)
     }
 
-    def skeletonForDecompress(input: Seq[I#InternalType]) {
+    def skeletonForDecompress(input: Seq[I#InternalType]): Unit = {
       val builder = TestCompressibleColumnBuilder(columnStats, columnType, scheme)
       val row = new GenericInternalRow(1)
       val nullRow = new GenericInternalRow(1)
@@ -173,9 +173,7 @@ class IntegralDeltaSuite extends SparkFunSuite {
     }
 
     test(s"$scheme: long random series") {
-      // Have to workaround with `Any` since no `ClassTag[I#JvmType]` available here.
-      val input = Array.fill[Any](10000)(makeRandomValue(columnType))
-      skeleton(input.map(_.asInstanceOf[I#InternalType]))
+      skeleton(Seq.fill[I#InternalType](10000)(makeRandomValue(columnType)))
     }
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala
index b6f0b5e6277b4..f946a6779ec95 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala
@@ -35,11 +35,11 @@ class PassThroughSuite extends SparkFunSuite {
 
   def testPassThrough[T <: AtomicType](
       columnStats: ColumnStats,
-      columnType: NativeColumnType[T]) {
+      columnType: NativeColumnType[T]): Unit = {
 
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
 
-    def skeleton(input: Seq[T#InternalType]) {
+    def skeleton(input: Seq[T#InternalType]): Unit = {
       // -------------
       // Tests encoder
       // -------------
@@ -93,7 +93,7 @@ class PassThroughSuite extends SparkFunSuite {
       assert(!decoder.hasNext)
     }
 
-    def skeletonForDecompress(input: Seq[T#InternalType]) {
+    def skeletonForDecompress(input: Seq[T#InternalType]): Unit = {
       val builder = TestCompressibleColumnBuilder(columnStats, columnType, PassThrough)
       val row = new GenericInternalRow(1)
       val nullRow = new GenericInternalRow(1)
@@ -160,8 +160,7 @@ class PassThroughSuite extends SparkFunSuite {
     }
 
     test(s"$PassThrough with $typeName: long random series") {
-      val input = Array.fill[Any](10000)(makeRandomValue(columnType))
-      skeleton(input.map(_.asInstanceOf[T#InternalType]))
+      skeleton(Seq.fill[T#InternalType](10000)(makeRandomValue(columnType)))
     }
 
     test(s"$PassThrough with $typeName: empty column for decompress()") {
@@ -169,8 +168,7 @@ class PassThroughSuite extends SparkFunSuite {
     }
 
     test(s"$PassThrough with $typeName: long random series for decompress()") {
-      val input = Array.fill[Any](10000)(makeRandomValue(columnType))
-      skeletonForDecompress(input.map(_.asInstanceOf[T#InternalType]))
+      skeletonForDecompress(Seq.fill[T#InternalType](10000)(makeRandomValue(columnType)))
     }
 
     test(s"$PassThrough with $typeName: simple case with null for decompress()") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala
index eb1cdd9bbceff..29dbc13b59c6b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala
@@ -36,11 +36,11 @@ class RunLengthEncodingSuite extends SparkFunSuite {
   def testRunLengthEncoding[T <: AtomicType](
       columnStats: ColumnStats,
       columnType: NativeColumnType[T],
-      testDecompress: Boolean = true) {
+      testDecompress: Boolean = true): Unit = {
 
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
 
-    def skeleton(uniqueValueCount: Int, inputRuns: Seq[(Int, Int)]) {
+    def skeleton(uniqueValueCount: Int, inputRuns: Seq[(Int, Int)]): Unit = {
       // -------------
       // Tests encoder
       // -------------
@@ -98,7 +98,7 @@ class RunLengthEncodingSuite extends SparkFunSuite {
       assert(!decoder.hasNext)
     }
 
-    def skeletonForDecompress(uniqueValueCount: Int, inputRuns: Seq[(Int, Int)]) {
+    def skeletonForDecompress(uniqueValueCount: Int, inputRuns: Seq[(Int, Int)]): Unit = {
       if (!testDecompress) return
       val builder = TestCompressibleColumnBuilder(columnStats, columnType, RunLengthEncoding)
       val (values, rows) = makeUniqueValuesAndSingleValueRows(columnType, uniqueValueCount)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 74ef81f7181da..81965e4c6c353 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.catalyst.dsl.plans
 import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan
 import org.apache.spark.sql.catalyst.expressions.JsonTuple
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.catalyst.plans.logical.{Generate, InsertIntoDir, LogicalPlan, Project, ScriptTransformation}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform}
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.execution.datasources.CreateTable
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
@@ -74,108 +75,10 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     }.head
   }
 
-  test("create database") {
-    val sql =
-      """
-       |CREATE DATABASE IF NOT EXISTS database_name
-       |WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
-       |COMMENT 'database_comment' LOCATION '/home/user/db'
-      """.stripMargin
-    val parsed = parser.parsePlan(sql)
-    val expected = CreateDatabaseCommand(
-      "database_name",
-      ifNotExists = true,
-      Some("/home/user/db"),
-      Some("database_comment"),
-      Map("a" -> "a", "b" -> "b", "c" -> "c"))
-    comparePlans(parsed, expected)
-  }
-
-  test("create database -- check duplicates") {
-    def createDatabase(duplicateClause: String): String = {
-      s"""
-        |CREATE DATABASE IF NOT EXISTS database_name
-        |$duplicateClause
-        |$duplicateClause
-      """.stripMargin
-    }
-    val sql1 = createDatabase("COMMENT 'database_comment'")
-    val sql2 = createDatabase("LOCATION '/home/user/db'")
-    val sql3 = createDatabase("WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
-
-    intercept(sql1, "Found duplicate clauses: COMMENT")
-    intercept(sql2, "Found duplicate clauses: LOCATION")
-    intercept(sql3, "Found duplicate clauses: WITH DBPROPERTIES")
-  }
-
-  test("create database - property values must be set") {
-    assertUnsupported(
-      sql = "CREATE DATABASE my_db WITH DBPROPERTIES('key_without_value', 'key_with_value'='x')",
-      containsThesePhrases = Seq("key_without_value"))
-  }
-
-  test("drop database") {
-    val sql1 = "DROP DATABASE IF EXISTS database_name RESTRICT"
-    val sql2 = "DROP DATABASE IF EXISTS database_name CASCADE"
-    val sql3 = "DROP SCHEMA IF EXISTS database_name RESTRICT"
-    val sql4 = "DROP SCHEMA IF EXISTS database_name CASCADE"
-    // The default is restrict=true
-    val sql5 = "DROP DATABASE IF EXISTS database_name"
-    // The default is ifExists=false
-    val sql6 = "DROP DATABASE database_name"
-    val sql7 = "DROP DATABASE database_name CASCADE"
-
-    val parsed1 = parser.parsePlan(sql1)
-    val parsed2 = parser.parsePlan(sql2)
-    val parsed3 = parser.parsePlan(sql3)
-    val parsed4 = parser.parsePlan(sql4)
-    val parsed5 = parser.parsePlan(sql5)
-    val parsed6 = parser.parsePlan(sql6)
-    val parsed7 = parser.parsePlan(sql7)
-
-    val expected1 = DropDatabaseCommand(
-      "database_name",
-      ifExists = true,
-      cascade = false)
-    val expected2 = DropDatabaseCommand(
-      "database_name",
-      ifExists = true,
-      cascade = true)
-    val expected3 = DropDatabaseCommand(
-      "database_name",
-      ifExists = false,
-      cascade = false)
-    val expected4 = DropDatabaseCommand(
-      "database_name",
-      ifExists = false,
-      cascade = true)
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-    comparePlans(parsed3, expected1)
-    comparePlans(parsed4, expected2)
-    comparePlans(parsed5, expected1)
-    comparePlans(parsed6, expected3)
-    comparePlans(parsed7, expected4)
-  }
-
-  test("alter database set dbproperties") {
-    // ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...)
-    val sql1 = "ALTER DATABASE database_name SET DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')"
-    val sql2 = "ALTER SCHEMA database_name SET DBPROPERTIES ('a'='a')"
-
-    val parsed1 = parser.parsePlan(sql1)
-    val parsed2 = parser.parsePlan(sql2)
-
-    val expected1 = AlterDatabasePropertiesCommand(
-      "database_name",
-      Map("a" -> "a", "b" -> "b", "c" -> "c"))
-    val expected2 = AlterDatabasePropertiesCommand(
-      "database_name",
-      Map("a" -> "a"))
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
+  private def withCreateTableStatement(sql: String)(prediction: CreateTableStatement => Unit)
+    : Unit = {
+    val statement = parser.parsePlan(sql).asInstanceOf[CreateTableStatement]
+    prediction(statement)
   }
 
   test("alter database - property values must be set") {
@@ -184,146 +87,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       containsThesePhrases = Seq("key_without_value"))
   }
 
-  test("describe database") {
-    // DESCRIBE DATABASE [EXTENDED] db_name;
-    val sql1 = "DESCRIBE DATABASE EXTENDED db_name"
-    val sql2 = "DESCRIBE DATABASE db_name"
-
-    val parsed1 = parser.parsePlan(sql1)
-    val parsed2 = parser.parsePlan(sql2)
-
-    val expected1 = DescribeDatabaseCommand(
-      "db_name",
-      extended = true)
-    val expected2 = DescribeDatabaseCommand(
-      "db_name",
-      extended = false)
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-  }
-
-  test("create function") {
-    val sql1 =
-      """
-       |CREATE TEMPORARY FUNCTION helloworld as
-       |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
-       |JAR '/path/to/jar2'
-     """.stripMargin
-    val sql2 =
-      """
-        |CREATE FUNCTION hello.world as
-        |'com.matthewrathbone.example.SimpleUDFExample' USING ARCHIVE '/path/to/archive',
-        |FILE '/path/to/file'
-      """.stripMargin
-    val sql3 =
-      """
-        |CREATE OR REPLACE TEMPORARY FUNCTION helloworld3 as
-        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
-        |JAR '/path/to/jar2'
-      """.stripMargin
-    val sql4 =
-      """
-        |CREATE OR REPLACE FUNCTION hello.world1 as
-        |'com.matthewrathbone.example.SimpleUDFExample' USING ARCHIVE '/path/to/archive',
-        |FILE '/path/to/file'
-      """.stripMargin
-    val sql5 =
-      """
-        |CREATE FUNCTION IF NOT EXISTS hello.world2 as
-        |'com.matthewrathbone.example.SimpleUDFExample' USING ARCHIVE '/path/to/archive',
-        |FILE '/path/to/file'
-      """.stripMargin
-    val parsed1 = parser.parsePlan(sql1)
-    val parsed2 = parser.parsePlan(sql2)
-    val parsed3 = parser.parsePlan(sql3)
-    val parsed4 = parser.parsePlan(sql4)
-    val parsed5 = parser.parsePlan(sql5)
-    val expected1 = CreateFunctionCommand(
-      None,
-      "helloworld",
-      "com.matthewrathbone.example.SimpleUDFExample",
-      Seq(
-        FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
-        FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
-      isTemp = true, ignoreIfExists = false, replace = false)
-    val expected2 = CreateFunctionCommand(
-      Some("hello"),
-      "world",
-      "com.matthewrathbone.example.SimpleUDFExample",
-      Seq(
-        FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
-        FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
-      isTemp = false, ignoreIfExists = false, replace = false)
-    val expected3 = CreateFunctionCommand(
-      None,
-      "helloworld3",
-      "com.matthewrathbone.example.SimpleUDFExample",
-      Seq(
-        FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
-        FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
-      isTemp = true, ignoreIfExists = false, replace = true)
-    val expected4 = CreateFunctionCommand(
-      Some("hello"),
-      "world1",
-      "com.matthewrathbone.example.SimpleUDFExample",
-      Seq(
-        FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
-        FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
-      isTemp = false, ignoreIfExists = false, replace = true)
-    val expected5 = CreateFunctionCommand(
-      Some("hello"),
-      "world2",
-      "com.matthewrathbone.example.SimpleUDFExample",
-      Seq(
-        FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
-        FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
-      isTemp = false, ignoreIfExists = true, replace = false)
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-    comparePlans(parsed3, expected3)
-    comparePlans(parsed4, expected4)
-    comparePlans(parsed5, expected5)
-  }
-
-  test("drop function") {
-    val sql1 = "DROP TEMPORARY FUNCTION helloworld"
-    val sql2 = "DROP TEMPORARY FUNCTION IF EXISTS helloworld"
-    val sql3 = "DROP FUNCTION hello.world"
-    val sql4 = "DROP FUNCTION IF EXISTS hello.world"
-
-    val parsed1 = parser.parsePlan(sql1)
-    val parsed2 = parser.parsePlan(sql2)
-    val parsed3 = parser.parsePlan(sql3)
-    val parsed4 = parser.parsePlan(sql4)
-
-    val expected1 = DropFunctionCommand(
-      None,
-      "helloworld",
-      ifExists = false,
-      isTemp = true)
-    val expected2 = DropFunctionCommand(
-      None,
-      "helloworld",
-      ifExists = true,
-      isTemp = true)
-    val expected3 = DropFunctionCommand(
-      Some("hello"),
-      "world",
-      ifExists = false,
-      isTemp = false)
-    val expected4 = DropFunctionCommand(
-      Some("hello"),
-      "world",
-      ifExists = true,
-      isTemp = false)
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-    comparePlans(parsed3, expected3)
-    comparePlans(parsed4, expected4)
-  }
-
   test("create hive table - table file format") {
     val allSources = Seq("parquet", "parquetfile", "orc", "orcfile", "avro", "avrofile",
       "sequencefile", "rcfile", "textfile")
@@ -400,9 +163,9 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
 
   test("create hive external table - location must be specified") {
     assertUnsupported(
-      sql = "CREATE EXTERNAL TABLE my_tab",
+      sql = "CREATE EXTERNAL TABLE my_tab STORED AS parquet",
       containsThesePhrases = Seq("create external table", "location"))
-    val query = "CREATE EXTERNAL TABLE my_tab LOCATION '/something/anything'"
+    val query = "CREATE EXTERNAL TABLE my_tab STORED AS parquet LOCATION '/something/anything'"
     val ct = parseAs[CreateTable](query)
     assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL)
     assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything")))
@@ -410,7 +173,8 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
 
   test("create hive table - property values must be set") {
     assertUnsupported(
-      sql = "CREATE TABLE my_tab TBLPROPERTIES('key_without_value', 'key_with_value'='x')",
+      sql = "CREATE TABLE my_tab STORED AS parquet " +
+        "TBLPROPERTIES('key_without_value', 'key_with_value'='x')",
       containsThesePhrases = Seq("key_without_value"))
     assertUnsupported(
       sql = "CREATE TABLE my_tab ROW FORMAT SERDE 'serde' " +
@@ -419,7 +183,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
   }
 
   test("create hive table - location implies external") {
-    val query = "CREATE TABLE my_tab LOCATION '/something/anything'"
+    val query = "CREATE TABLE my_tab STORED AS parquet LOCATION '/something/anything'"
     val ct = parseAs[CreateTable](query)
     assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL)
     assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything")))
@@ -496,32 +260,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       "Directory path and 'path' in OPTIONS should be specified one, but not both"))
   }
 
-  // ALTER TABLE table_name RENAME TO new_table_name;
-  // ALTER VIEW view_name RENAME TO new_view_name;
-  test("alter table/view: rename table/view") {
-    val sql_table = "ALTER TABLE table_name RENAME TO new_table_name"
-    val sql_view = sql_table.replace("TABLE", "VIEW")
-    val parsed_table = parser.parsePlan(sql_table)
-    val parsed_view = parser.parsePlan(sql_view)
-    val expected_table = AlterTableRenameCommand(
-      TableIdentifier("table_name"),
-      TableIdentifier("new_table_name"),
-      isView = false)
-    val expected_view = AlterTableRenameCommand(
-      TableIdentifier("table_name"),
-      TableIdentifier("new_table_name"),
-      isView = true)
-    comparePlans(parsed_table, expected_table)
-    comparePlans(parsed_view, expected_view)
-  }
-
-  test("alter table: rename table with database") {
-    val query = "ALTER TABLE db1.tbl RENAME TO db1.tbl2"
-    val plan = parseAs[AlterTableRenameCommand](query)
-    assert(plan.oldName == TableIdentifier("tbl", Some("db1")))
-    assert(plan.newName == TableIdentifier("tbl2", Some("db1")))
-  }
-
   test("alter table - property values must be set") {
     assertUnsupported(
       sql = "ALTER TABLE my_tab SET TBLPROPERTIES('key_without_value', 'key_with_value'='x')",
@@ -534,61 +272,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       containsThesePhrases = Seq("key_with_value"))
   }
 
-  test("alter table: SerDe properties") {
-    val sql1 = "ALTER TABLE table_name SET SERDE 'org.apache.class'"
-    val sql2 =
-      """
-       |ALTER TABLE table_name SET SERDE 'org.apache.class'
-       |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
-      """.stripMargin
-    val sql3 =
-      """
-       |ALTER TABLE table_name SET SERDEPROPERTIES ('columns'='foo,bar',
-       |'field.delim' = ',')
-      """.stripMargin
-    val sql4 =
-      """
-       |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08',
-       |country='us') SET SERDE 'org.apache.class' WITH SERDEPROPERTIES ('columns'='foo,bar',
-       |'field.delim' = ',')
-      """.stripMargin
-    val sql5 =
-      """
-       |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08',
-       |country='us') SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
-      """.stripMargin
-    val parsed1 = parser.parsePlan(sql1)
-    val parsed2 = parser.parsePlan(sql2)
-    val parsed3 = parser.parsePlan(sql3)
-    val parsed4 = parser.parsePlan(sql4)
-    val parsed5 = parser.parsePlan(sql5)
-    val tableIdent = TableIdentifier("table_name", None)
-    val expected1 = AlterTableSerDePropertiesCommand(
-      tableIdent, Some("org.apache.class"), None, None)
-    val expected2 = AlterTableSerDePropertiesCommand(
-      tableIdent,
-      Some("org.apache.class"),
-      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      None)
-    val expected3 = AlterTableSerDePropertiesCommand(
-      tableIdent, None, Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), None)
-    val expected4 = AlterTableSerDePropertiesCommand(
-      tableIdent,
-      Some("org.apache.class"),
-      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
-    val expected5 = AlterTableSerDePropertiesCommand(
-      tableIdent,
-      None,
-      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-    comparePlans(parsed3, expected3)
-    comparePlans(parsed4, expected4)
-    comparePlans(parsed5, expected5)
-  }
-
   test("alter table - SerDe property values must be set") {
     assertUnsupported(
       sql = "ALTER TABLE my_tab SET SERDE 'serde' " +
@@ -596,66 +279,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       containsThesePhrases = Seq("key_without_value"))
   }
 
-  // ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec
-  // [LOCATION 'location1'] partition_spec [LOCATION 'location2'] ...;
-  test("alter table: add partition") {
-    val sql1 =
-      """
-       |ALTER TABLE table_name ADD IF NOT EXISTS PARTITION
-       |(dt='2008-08-08', country='us') LOCATION 'location1' PARTITION
-       |(dt='2009-09-09', country='uk')
-      """.stripMargin
-    val sql2 = "ALTER TABLE table_name ADD PARTITION (dt='2008-08-08') LOCATION 'loc'"
-
-    val parsed1 = parser.parsePlan(sql1)
-    val parsed2 = parser.parsePlan(sql2)
-
-    val expected1 = AlterTableAddPartitionCommand(
-      TableIdentifier("table_name", None),
-      Seq(
-        (Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")),
-        (Map("dt" -> "2009-09-09", "country" -> "uk"), None)),
-      ifNotExists = true)
-    val expected2 = AlterTableAddPartitionCommand(
-      TableIdentifier("table_name", None),
-      Seq((Map("dt" -> "2008-08-08"), Some("loc"))),
-      ifNotExists = false)
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-  }
-
-  test("alter table: recover partitions") {
-    val sql = "ALTER TABLE table_name RECOVER PARTITIONS"
-    val parsed = parser.parsePlan(sql)
-    val expected = AlterTableRecoverPartitionsCommand(
-      TableIdentifier("table_name", None))
-    comparePlans(parsed, expected)
-  }
-
-  test("alter view: add partition (not supported)") {
-    assertUnsupported(
-      """
-        |ALTER VIEW view_name ADD IF NOT EXISTS PARTITION
-        |(dt='2008-08-08', country='us') PARTITION
-        |(dt='2009-09-09', country='uk')
-      """.stripMargin)
-  }
-
-  test("alter table: rename partition") {
-    val sql =
-      """
-       |ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us')
-       |RENAME TO PARTITION (dt='2008-09-09', country='uk')
-      """.stripMargin
-    val parsed = parser.parsePlan(sql)
-    val expected = AlterTableRenamePartitionCommand(
-      TableIdentifier("table_name", None),
-      Map("dt" -> "2008-08-08", "country" -> "us"),
-      Map("dt" -> "2008-09-09", "country" -> "uk"))
-    comparePlans(parsed, expected)
-  }
-
   test("alter table: exchange partition (not supported)") {
     assertUnsupported(
       """
@@ -664,45 +287,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       """.stripMargin)
   }
 
-  // ALTER TABLE table_name DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...]
-  // ALTER VIEW table_name DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...]
-  test("alter table/view: drop partitions") {
-    val sql1_table =
-      """
-       |ALTER TABLE table_name DROP IF EXISTS PARTITION
-       |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk')
-      """.stripMargin
-    val sql2_table =
-      """
-       |ALTER TABLE table_name DROP PARTITION
-       |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk')
-      """.stripMargin
-    val sql1_view = sql1_table.replace("TABLE", "VIEW")
-    val sql2_view = sql2_table.replace("TABLE", "VIEW")
-
-    val parsed1_table = parser.parsePlan(sql1_table)
-    val parsed2_table = parser.parsePlan(sql2_table)
-    val parsed1_purge = parser.parsePlan(sql1_table + " PURGE")
-    assertUnsupported(sql1_view)
-    assertUnsupported(sql2_view)
-
-    val tableIdent = TableIdentifier("table_name", None)
-    val expected1_table = AlterTableDropPartitionCommand(
-      tableIdent,
-      Seq(
-        Map("dt" -> "2008-08-08", "country" -> "us"),
-        Map("dt" -> "2009-09-09", "country" -> "uk")),
-      ifExists = true,
-      purge = false,
-      retainData = false)
-    val expected2_table = expected1_table.copy(ifExists = false)
-    val expected1_purge = expected1_table.copy(purge = true)
-
-    comparePlans(parsed1_table, expected1_table)
-    comparePlans(parsed2_table, expected2_table)
-    comparePlans(parsed1_purge, expected1_purge)
-  }
-
   test("alter table: archive partition (not supported)") {
     assertUnsupported("ALTER TABLE table_name ARCHIVE PARTITION (dt='2008-08-08', country='us')")
   }
@@ -719,46 +303,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         "SET FILEFORMAT PARQUET")
   }
 
-  test("alter table: set partition location") {
-    val sql2 = "ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') " +
-      "SET LOCATION 'new location'"
-    val parsed2 = parser.parsePlan(sql2)
-    val tableIdent = TableIdentifier("table_name", None)
-    val expected2 = AlterTableSetLocationCommand(
-      tableIdent,
-      Some(Map("dt" -> "2008-08-08", "country" -> "us")),
-      "new location")
-    comparePlans(parsed2, expected2)
-  }
-
-  test("alter table: change column name/type/comment") {
-    val sql1 = "ALTER TABLE table_name CHANGE COLUMN col_old_name col_new_name INT"
-    val sql2 = "ALTER TABLE table_name CHANGE COLUMN col_name col_name INT COMMENT 'new_comment'"
-    val parsed1 = parser.parsePlan(sql1)
-    val parsed2 = parser.parsePlan(sql2)
-    val tableIdent = TableIdentifier("table_name", None)
-    val expected1 = AlterTableChangeColumnCommand(
-      tableIdent,
-      "col_old_name",
-      StructField("col_new_name", IntegerType))
-    val expected2 = AlterTableChangeColumnCommand(
-      tableIdent,
-      "col_name",
-      StructField("col_name", IntegerType).withComment("new_comment"))
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-  }
-
-  test("alter table: change column position (not supported)") {
-    assertUnsupported("ALTER TABLE table_name CHANGE COLUMN col_old_name col_new_name INT FIRST")
-    assertUnsupported(
-      "ALTER TABLE table_name CHANGE COLUMN col_old_name col_new_name INT AFTER other_col")
-  }
-
-  test("alter table: change column in partition spec") {
-    assertUnsupported("ALTER TABLE table_name PARTITION (a='1', a='2') CHANGE COLUMN a new_a INT")
-  }
-
   test("alter table: touch (not supported)") {
     assertUnsupported("ALTER TABLE table_name TOUCH")
     assertUnsupported("ALTER TABLE table_name TOUCH PARTITION (dt='2008-08-08', country='us')")
@@ -802,26 +346,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       """.stripMargin)
   }
 
-  test("show databases") {
-    val sql1 = "SHOW DATABASES"
-    val sql2 = "SHOW DATABASES LIKE 'defau*'"
-    val parsed1 = parser.parsePlan(sql1)
-    val expected1 = ShowDatabasesCommand(None)
-    val parsed2 = parser.parsePlan(sql2)
-    val expected2 = ShowDatabasesCommand(Some("defau*"))
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-  }
-
-  test("show tblproperties") {
-    val parsed1 = parser.parsePlan("SHOW TBLPROPERTIES tab1")
-    val expected1 = ShowTablePropertiesCommand(TableIdentifier("tab1", None), None)
-    val parsed2 = parser.parsePlan("SHOW TBLPROPERTIES tab1('propKey1')")
-    val expected2 = ShowTablePropertiesCommand(TableIdentifier("tab1", None), Some("propKey1"))
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-  }
-
   test("SPARK-14383: DISTRIBUTE and UNSET as non-keywords") {
     val sql = "SELECT distribute, unset FROM x"
     val parsed = parser.parsePlan(sql)
@@ -851,66 +375,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(e.contains("Found an empty partition key 'b'"))
   }
 
-  test("show columns") {
-    val sql1 = "SHOW COLUMNS FROM t1"
-    val sql2 = "SHOW COLUMNS IN db1.t1"
-    val sql3 = "SHOW COLUMNS FROM t1 IN db1"
-    val sql4 = "SHOW COLUMNS FROM db1.t1 IN db2"
-
-    val parsed1 = parser.parsePlan(sql1)
-    val expected1 = ShowColumnsCommand(None, TableIdentifier("t1", None))
-    val parsed2 = parser.parsePlan(sql2)
-    val expected2 = ShowColumnsCommand(None, TableIdentifier("t1", Some("db1")))
-    val parsed3 = parser.parsePlan(sql3)
-    val expected3 = ShowColumnsCommand(Some("db1"), TableIdentifier("t1", None))
-    val parsed4 = parser.parsePlan(sql4)
-    val expected4 = ShowColumnsCommand(Some("db2"), TableIdentifier("t1", Some("db1")))
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-    comparePlans(parsed3, expected3)
-    comparePlans(parsed4, expected4)
-  }
-
-
-  test("show partitions") {
-    val sql1 = "SHOW PARTITIONS t1"
-    val sql2 = "SHOW PARTITIONS db1.t1"
-    val sql3 = "SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')"
-
-    val parsed1 = parser.parsePlan(sql1)
-    val expected1 =
-      ShowPartitionsCommand(TableIdentifier("t1", None), None)
-    val parsed2 = parser.parsePlan(sql2)
-    val expected2 =
-      ShowPartitionsCommand(TableIdentifier("t1", Some("db1")), None)
-    val expected3 =
-      ShowPartitionsCommand(TableIdentifier("t1", None),
-        Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue")))
-    val parsed3 = parser.parsePlan(sql3)
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-    comparePlans(parsed3, expected3)
-  }
-
-  test("support for other types in DBPROPERTIES") {
-    val sql =
-      """
-        |CREATE DATABASE database_name
-        |LOCATION '/home/user/db'
-        |WITH DBPROPERTIES ('a'=1, 'b'=0.1, 'c'=TRUE)
-      """.stripMargin
-    val parsed = parser.parsePlan(sql)
-    val expected = CreateDatabaseCommand(
-      "database_name",
-      ifNotExists = false,
-      Some("/home/user/db"),
-      None,
-      Map("a" -> "1", "b" -> "0.1", "c" -> "true"))
-
-    comparePlans(parsed, expected)
-  }
-
   test("Test CTAS #1") {
     val s1 =
       """
@@ -957,7 +421,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       assert(desc.comment == Some("This is the staging page view table"))
       // TODO will be SQLText
       assert(desc.viewText.isEmpty)
-      assert(desc.viewDefaultDatabase.isEmpty)
+      assert(desc.viewCatalogAndNamespace.isEmpty)
       assert(desc.viewQueryColumnNames.isEmpty)
       assert(desc.partitionColumnNames.isEmpty)
       assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
@@ -1009,7 +473,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       // TODO will be SQLText
       assert(desc.comment == Some("This is the staging page view table"))
       assert(desc.viewText.isEmpty)
-      assert(desc.viewDefaultDatabase.isEmpty)
+      assert(desc.viewCatalogAndNamespace.isEmpty)
       assert(desc.viewQueryColumnNames.isEmpty)
       assert(desc.partitionColumnNames.isEmpty)
       assert(desc.storage.properties == Map())
@@ -1022,22 +486,17 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
 
   test("Test CTAS #3") {
     val s3 = """CREATE TABLE page_view AS SELECT * FROM src"""
-    val (desc, exists) = extractTableDesc(s3)
-    assert(exists == false)
-    assert(desc.identifier.database == None)
-    assert(desc.identifier.table == "page_view")
-    assert(desc.tableType == CatalogTableType.MANAGED)
-    assert(desc.storage.locationUri == None)
-    assert(desc.schema.isEmpty)
-    assert(desc.viewText == None) // TODO will be SQLText
-    assert(desc.viewDefaultDatabase.isEmpty)
-    assert(desc.viewQueryColumnNames.isEmpty)
-    assert(desc.storage.properties == Map())
-    assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat"))
-    assert(desc.storage.outputFormat ==
-      Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
-    assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-    assert(desc.properties == Map())
+    val statement = parser.parsePlan(s3).asInstanceOf[CreateTableAsSelectStatement]
+    assert(statement.tableName(0) == "page_view")
+    assert(statement.asSelect == parser.parsePlan("SELECT * FROM src"))
+    assert(statement.partitioning.isEmpty)
+    assert(statement.bucketSpec.isEmpty)
+    assert(statement.properties.isEmpty)
+    assert(statement.provider == conf.defaultDataSourceName)
+    assert(statement.options.isEmpty)
+    assert(statement.location.isEmpty)
+    assert(statement.comment.isEmpty)
+    assert(!statement.ifNotExists)
   }
 
   test("Test CTAS #4") {
@@ -1067,7 +526,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(desc.storage.locationUri == None)
     assert(desc.schema.isEmpty)
     assert(desc.viewText == None) // TODO will be SQLText
-    assert(desc.viewDefaultDatabase.isEmpty)
+    assert(desc.viewCatalogAndNamespace.isEmpty)
     assert(desc.viewQueryColumnNames.isEmpty)
     assert(desc.storage.properties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2")))
     assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
@@ -1139,7 +598,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assertError("select interval '23:61:15' hour to second",
       "minute 61 outside range [0, 59]")
     assertError("select interval '.1111111111' second",
-      "nanosecond 1111111111 outside range")
+      "'.1111111111' is out of range")
   }
 
   test("use native json_tuple instead of hive's UDTF in LATERAL VIEW") {
@@ -1197,68 +656,60 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
 
   test("create table - basic") {
     val query = "CREATE TABLE my_table (id int, name string)"
-    val (desc, allowExisting) = extractTableDesc(query)
-    assert(!allowExisting)
-    assert(desc.identifier.database.isEmpty)
-    assert(desc.identifier.table == "my_table")
-    assert(desc.tableType == CatalogTableType.MANAGED)
-    assert(desc.schema == new StructType().add("id", "int").add("name", "string"))
-    assert(desc.partitionColumnNames.isEmpty)
-    assert(desc.bucketSpec.isEmpty)
-    assert(desc.viewText.isEmpty)
-    assert(desc.viewDefaultDatabase.isEmpty)
-    assert(desc.viewQueryColumnNames.isEmpty)
-    assert(desc.storage.locationUri.isEmpty)
-    assert(desc.storage.inputFormat ==
-      Some("org.apache.hadoop.mapred.TextInputFormat"))
-    assert(desc.storage.outputFormat ==
-      Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
-    assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-    assert(desc.storage.properties.isEmpty)
-    assert(desc.properties.isEmpty)
-    assert(desc.comment.isEmpty)
+    withCreateTableStatement(query) { state =>
+      assert(state.tableName(0) == "my_table")
+      assert(state.tableSchema == new StructType().add("id", "int").add("name", "string"))
+      assert(state.partitioning.isEmpty)
+      assert(state.bucketSpec.isEmpty)
+      assert(state.properties.isEmpty)
+      assert(state.provider == conf.defaultDataSourceName)
+      assert(state.options.isEmpty)
+      assert(state.location.isEmpty)
+      assert(state.comment.isEmpty)
+      assert(!state.ifNotExists)
+    }
   }
 
   test("create table - with database name") {
     val query = "CREATE TABLE dbx.my_table (id int, name string)"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.identifier.database == Some("dbx"))
-    assert(desc.identifier.table == "my_table")
+    withCreateTableStatement(query) { state =>
+      assert(state.tableName(0) == "dbx")
+      assert(state.tableName(1) == "my_table")
+    }
   }
 
   test("create table - temporary") {
     val query = "CREATE TEMPORARY TABLE tab1 (id int, name string)"
     val e = intercept[ParseException] { parser.parsePlan(query) }
-    assert(e.message.contains("CREATE TEMPORARY TABLE is not supported yet"))
+    assert(e.message.contains("CREATE TEMPORARY TABLE without a provider is not allowed."))
   }
 
   test("create table - external") {
     val query = "CREATE EXTERNAL TABLE tab1 (id int, name string) LOCATION '/path/to/nowhere'"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.tableType == CatalogTableType.EXTERNAL)
-    assert(desc.storage.locationUri == Some(new URI("/path/to/nowhere")))
+    val e = intercept[ParseException] { parser.parsePlan(query) }
+    assert(e.message.contains("Operation not allowed: CREATE EXTERNAL TABLE ..."))
   }
 
   test("create table - if not exists") {
     val query = "CREATE TABLE IF NOT EXISTS tab1 (id int, name string)"
-    val (_, allowExisting) = extractTableDesc(query)
-    assert(allowExisting)
+    withCreateTableStatement(query) { state =>
+      assert(state.ifNotExists)
+    }
   }
 
   test("create table - comment") {
     val query = "CREATE TABLE my_table (id int, name string) COMMENT 'its hot as hell below'"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.comment == Some("its hot as hell below"))
+    withCreateTableStatement(query) { state =>
+      assert(state.comment == Some("its hot as hell below"))
+    }
   }
 
   test("create table - partitioned columns") {
-    val query = "CREATE TABLE my_table (id int, name string) PARTITIONED BY (month int)"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.schema == new StructType()
-      .add("id", "int")
-      .add("name", "string")
-      .add("month", "int"))
-    assert(desc.partitionColumnNames == Seq("month"))
+    val query = "CREATE TABLE my_table (id int, name string) PARTITIONED BY (id)"
+    withCreateTableStatement(query) { state =>
+      val transform = IdentityTransform(FieldReference(Seq("id")))
+      assert(state.partitioning == Seq(transform))
+    }
   }
 
   test("create table - clustered by") {
@@ -1274,23 +725,25 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
        """
 
     val query1 = s"$baseQuery INTO $numBuckets BUCKETS"
-    val (desc1, _) = extractTableDesc(query1)
-    assert(desc1.bucketSpec.isDefined)
-    val bucketSpec1 = desc1.bucketSpec.get
-    assert(bucketSpec1.numBuckets == numBuckets)
-    assert(bucketSpec1.bucketColumnNames.head.equals(bucketedColumn))
-    assert(bucketSpec1.sortColumnNames.isEmpty)
+    withCreateTableStatement(query1) { state =>
+      assert(state.bucketSpec.isDefined)
+      val bucketSpec = state.bucketSpec.get
+      assert(bucketSpec.numBuckets == numBuckets)
+      assert(bucketSpec.bucketColumnNames.head.equals(bucketedColumn))
+      assert(bucketSpec.sortColumnNames.isEmpty)
+    }
 
     val query2 = s"$baseQuery SORTED BY($sortColumn) INTO $numBuckets BUCKETS"
-    val (desc2, _) = extractTableDesc(query2)
-    assert(desc2.bucketSpec.isDefined)
-    val bucketSpec2 = desc2.bucketSpec.get
-    assert(bucketSpec2.numBuckets == numBuckets)
-    assert(bucketSpec2.bucketColumnNames.head.equals(bucketedColumn))
-    assert(bucketSpec2.sortColumnNames.head.equals(sortColumn))
+    withCreateTableStatement(query2) { state =>
+      assert(state.bucketSpec.isDefined)
+      val bucketSpec = state.bucketSpec.get
+      assert(bucketSpec.numBuckets == numBuckets)
+      assert(bucketSpec.bucketColumnNames.head.equals(bucketedColumn))
+      assert(bucketSpec.sortColumnNames.head.equals(sortColumn))
+    }
   }
 
-  test("create table - skewed by") {
+  test("create table(hive) - skewed by") {
     val baseQuery = "CREATE TABLE my_table (id int, name string) SKEWED BY"
     val query1 = s"$baseQuery(id) ON (1, 10, 100)"
     val query2 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z'))"
@@ -1303,7 +756,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(e3.getMessage.contains("Operation not allowed"))
   }
 
-  test("create table - row format") {
+  test("create table(hive) - row format") {
     val baseQuery = "CREATE TABLE my_table (id int, name string) ROW FORMAT"
     val query1 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff'"
     val query2 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1')"
@@ -1331,7 +784,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       "mapkey.delim" -> "b"))
   }
 
-  test("create table - file format") {
+  test("create table(hive) - file format") {
     val baseQuery = "CREATE TABLE my_table (id int, name string) STORED AS"
     val query1 = s"$baseQuery INPUTFORMAT 'winput' OUTPUTFORMAT 'wowput'"
     val query2 = s"$baseQuery ORC"
@@ -1345,7 +798,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(desc2.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
   }
 
-  test("create table - storage handler") {
+  test("create table(hive) - storage handler") {
     val baseQuery = "CREATE TABLE my_table (id int, name string) STORED BY"
     val query1 = s"$baseQuery 'org.papachi.StorageHandler'"
     val query2 = s"$baseQuery 'org.mamachi.StorageHandler' WITH SERDEPROPERTIES ('k1'='v1')"
@@ -1357,11 +810,12 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
 
   test("create table - properties") {
     val query = "CREATE TABLE my_table (id int, name string) TBLPROPERTIES ('k1'='v1', 'k2'='v2')"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.properties == Map("k1" -> "v1", "k2" -> "v2"))
+    withCreateTableStatement(query) { state =>
+      assert(state.properties == Map("k1" -> "v1", "k2" -> "v2"))
+    }
   }
 
-  test("create table - everything!") {
+  test("create table(hive) - everything!") {
     val query =
       """
         |CREATE EXTERNAL TABLE IF NOT EXISTS dbx.my_table (id int, name string)
@@ -1384,7 +838,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(desc.partitionColumnNames == Seq("month"))
     assert(desc.bucketSpec.isEmpty)
     assert(desc.viewText.isEmpty)
-    assert(desc.viewDefaultDatabase.isEmpty)
+    assert(desc.viewCatalogAndNamespace.isEmpty)
     assert(desc.viewQueryColumnNames.isEmpty)
     assert(desc.storage.locationUri == Some(new URI("/path/to/mercury")))
     assert(desc.storage.inputFormat == Some("winput"))
@@ -1395,134 +849,83 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(desc.comment == Some("no comment"))
   }
 
-  test("create view -- basic") {
-    val v1 = "CREATE VIEW view1 AS SELECT * FROM tab1"
-    val command = parser.parsePlan(v1).asInstanceOf[CreateViewCommand]
-    assert(!command.allowExisting)
-    assert(command.name.database.isEmpty)
-    assert(command.name.table == "view1")
-    assert(command.originalText == Some("SELECT * FROM tab1"))
-    assert(command.userSpecifiedColumns.isEmpty)
-  }
-
-  test("create view - full") {
-    val v1 =
-      """
-        |CREATE OR REPLACE VIEW view1
-        |(col1, col3 COMMENT 'hello')
-        |TBLPROPERTIES('prop1Key'="prop1Val")
-        |COMMENT 'BLABLA'
-        |AS SELECT * FROM tab1
-      """.stripMargin
-    val command = parser.parsePlan(v1).asInstanceOf[CreateViewCommand]
-    assert(command.name.database.isEmpty)
-    assert(command.name.table == "view1")
-    assert(command.userSpecifiedColumns == Seq("col1" -> None, "col3" -> Some("hello")))
-    assert(command.originalText == Some("SELECT * FROM tab1"))
-    assert(command.properties == Map("prop1Key" -> "prop1Val"))
-    assert(command.comment == Some("BLABLA"))
-  }
-
-  test("create view -- partitioned view") {
-    val v1 = "CREATE VIEW view1 partitioned on (ds, hr) as select * from srcpart"
-    intercept[ParseException] {
-      parser.parsePlan(v1)
-    }
-  }
-
-  test("create view - duplicate clauses") {
-    def createViewStatement(duplicateClause: String): String = {
-      s"""
-        |CREATE OR REPLACE VIEW view1
-        |(col1, col3 COMMENT 'hello')
-        |$duplicateClause
-        |$duplicateClause
-        |AS SELECT * FROM tab1
-      """.stripMargin
-    }
-    val sql1 = createViewStatement("COMMENT 'BLABLA'")
-    val sql2 = createViewStatement("TBLPROPERTIES('prop1Key'=\"prop1Val\")")
-    intercept(sql1, "Found duplicate clauses: COMMENT")
-    intercept(sql2, "Found duplicate clauses: TBLPROPERTIES")
-  }
-
-  test("MSCK REPAIR table") {
-    val sql = "MSCK REPAIR TABLE tab1"
-    val parsed = parser.parsePlan(sql)
-    val expected = AlterTableRecoverPartitionsCommand(
-      TableIdentifier("tab1", None),
-      "MSCK REPAIR TABLE")
-    comparePlans(parsed, expected)
-  }
-
   test("create table like") {
     val v1 = "CREATE TABLE table1 LIKE table2"
-    val (target, source, location, exists) = parser.parsePlan(v1).collect {
-      case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
-    }.head
+    val (target, source, fileFormat, provider, properties, exists) =
+      parser.parsePlan(v1).collect {
+        case CreateTableLikeCommand(t, s, f, p, pr, e) => (t, s, f, p, pr, e)
+      }.head
     assert(exists == false)
     assert(target.database.isEmpty)
     assert(target.table == "table1")
     assert(source.database.isEmpty)
     assert(source.table == "table2")
-    assert(location.isEmpty)
+    assert(fileFormat.locationUri.isEmpty)
+    assert(provider.isEmpty)
 
     val v2 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2"
-    val (target2, source2, location2, exists2) = parser.parsePlan(v2).collect {
-      case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
-    }.head
+    val (target2, source2, fileFormat2, provider2, properties2, exists2) =
+      parser.parsePlan(v2).collect {
+        case CreateTableLikeCommand(t, s, f, p, pr, e) => (t, s, f, p, pr, e)
+      }.head
     assert(exists2)
     assert(target2.database.isEmpty)
     assert(target2.table == "table1")
     assert(source2.database.isEmpty)
     assert(source2.table == "table2")
-    assert(location2.isEmpty)
+    assert(fileFormat2.locationUri.isEmpty)
+    assert(provider2.isEmpty)
 
     val v3 = "CREATE TABLE table1 LIKE table2 LOCATION '/spark/warehouse'"
-    val (target3, source3, location3, exists3) = parser.parsePlan(v3).collect {
-      case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
-    }.head
+    val (target3, source3, fileFormat3, provider3, properties3, exists3) =
+      parser.parsePlan(v3).collect {
+        case CreateTableLikeCommand(t, s, f, p, pr, e) => (t, s, f, p, pr, e)
+      }.head
     assert(!exists3)
     assert(target3.database.isEmpty)
     assert(target3.table == "table1")
     assert(source3.database.isEmpty)
     assert(source3.table == "table2")
-    assert(location3 == Some("/spark/warehouse"))
-
-    val v4 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2  LOCATION '/spark/warehouse'"
-    val (target4, source4, location4, exists4) = parser.parsePlan(v4).collect {
-      case CreateTableLikeCommand(t, s, l, allowExisting) => (t, s, l, allowExisting)
-    }.head
+    assert(fileFormat3.locationUri.map(_.toString) == Some("/spark/warehouse"))
+    assert(provider3.isEmpty)
+
+    val v4 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2 LOCATION '/spark/warehouse'"
+    val (target4, source4, fileFormat4, provider4, properties4, exists4) =
+      parser.parsePlan(v4).collect {
+        case CreateTableLikeCommand(t, s, f, p, pr, e) => (t, s, f, p, pr, e)
+      }.head
     assert(exists4)
     assert(target4.database.isEmpty)
     assert(target4.table == "table1")
     assert(source4.database.isEmpty)
     assert(source4.table == "table2")
-    assert(location4 == Some("/spark/warehouse"))
-  }
-
-  test("load data") {
-    val v1 = "LOAD DATA INPATH 'path' INTO TABLE table1"
-    val (table, path, isLocal, isOverwrite, partition) = parser.parsePlan(v1).collect {
-      case LoadDataCommand(t, path, l, o, partition) => (t, path, l, o, partition)
-    }.head
-    assert(table.database.isEmpty)
-    assert(table.table == "table1")
-    assert(path == "path")
-    assert(!isLocal)
-    assert(!isOverwrite)
-    assert(partition.isEmpty)
-
-    val v2 = "LOAD DATA LOCAL INPATH 'path' OVERWRITE INTO TABLE table1 PARTITION(c='1', d='2')"
-    val (table2, path2, isLocal2, isOverwrite2, partition2) = parser.parsePlan(v2).collect {
-      case LoadDataCommand(t, path, l, o, partition) => (t, path, l, o, partition)
-    }.head
-    assert(table2.database.isEmpty)
-    assert(table2.table == "table1")
-    assert(path2 == "path")
-    assert(isLocal2)
-    assert(isOverwrite2)
-    assert(partition2.nonEmpty)
-    assert(partition2.get.apply("c") == "1" && partition2.get.apply("d") == "2")
+    assert(fileFormat4.locationUri.map(_.toString) == Some("/spark/warehouse"))
+    assert(provider4.isEmpty)
+
+    val v5 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2 USING parquet"
+    val (target5, source5, fileFormat5, provider5, properties5, exists5) =
+      parser.parsePlan(v5).collect {
+        case CreateTableLikeCommand(t, s, f, p, pr, e) => (t, s, f, p, pr, e)
+      }.head
+    assert(exists5)
+    assert(target5.database.isEmpty)
+    assert(target5.table == "table1")
+    assert(source5.database.isEmpty)
+    assert(source5.table == "table2")
+    assert(fileFormat5.locationUri.isEmpty)
+    assert(provider5 == Some("parquet"))
+
+    val v6 = "CREATE TABLE IF NOT EXISTS table1 LIKE table2 USING ORC"
+    val (target6, source6, fileFormat6, provider6, properties6, exists6) =
+      parser.parsePlan(v6).collect {
+        case CreateTableLikeCommand(t, s, f, p, pr, e) => (t, s, f, p, pr, e)
+      }.head
+    assert(exists6)
+    assert(target6.database.isEmpty)
+    assert(target6.table == "table1")
+    assert(source6.database.isEmpty)
+    assert(source6.table == "table2")
+    assert(fileFormat6.locationUri.isEmpty)
+    assert(provider6 == Some("ORC"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index fd1da2011f28e..dbf4b09403423 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -21,16 +21,18 @@ import java.io.{File, PrintWriter}
 import java.net.URI
 import java.util.Locale
 
-import org.apache.hadoop.fs.Path
-import org.scalatest.BeforeAndAfterEach
+import org.apache.hadoop.fs.{Path, RawLocalFileSystem}
+import org.apache.hadoop.fs.permission.{AclEntry, AclEntryScope, AclEntryType, AclStatus, FsAction, FsPermission}
 
+import org.apache.spark.{SparkException, SparkFiles}
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
@@ -82,7 +84,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
     val tabName = "tbl"
     withTable(tabName) {
       val e = intercept[AnalysisException] {
-        sql(s"CREATE TABLE $tabName (i INT, j STRING)")
+        sql(s"CREATE TABLE $tabName (i INT, j STRING) STORED AS parquet")
       }.getMessage
       assert(e.contains("Hive support is required to CREATE Hive TABLE"))
     }
@@ -110,13 +112,13 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
     import testImplicits._
     withTable("t", "t1") {
       var e = intercept[AnalysisException] {
-        sql("CREATE TABLE t SELECT 1 as a, 1 as b")
+        sql("CREATE TABLE t STORED AS parquet SELECT 1 as a, 1 as b")
       }.getMessage
       assert(e.contains("Hive support is required to CREATE Hive TABLE (AS SELECT)"))
 
       spark.range(1).select('id as 'a, 'id as 'b).write.saveAsTable("t1")
       e = intercept[AnalysisException] {
-        sql("CREATE TABLE t SELECT a, b from t1")
+        sql("CREATE TABLE t STORED AS parquet SELECT a, b from t1")
       }.getMessage
       assert(e.contains("Hive support is required to CREATE Hive TABLE (AS SELECT)"))
     }
@@ -150,9 +152,9 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
       Seq(3 -> "c").toDF("i", "j").write.mode("append").saveAsTable("t")
       checkAnswer(spark.table("t"), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil)
 
-      Seq("c" -> 3).toDF("i", "j").write.mode("append").saveAsTable("t")
+      Seq(3.5 -> 3).toDF("i", "j").write.mode("append").saveAsTable("t")
       checkAnswer(spark.table("t"), Row(1, "a") :: Row(2, "b") :: Row(3, "c")
-        :: Row(null, "3") :: Nil)
+        :: Row(3, "3") :: Nil)
 
       Seq(4 -> "d").toDF("i", "j").write.saveAsTable("t1")
 
@@ -168,10 +170,66 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
       assert(e.message.contains("It doesn't match the specified format"))
     }
   }
+
+  test("throw exception if Create Table LIKE USING Hive built-in ORC in in-memory catalog") {
+    val catalog = spark.sessionState.catalog
+    withTable("s", "t") {
+      sql("CREATE TABLE s(a INT, b INT) USING parquet")
+      val source = catalog.getTableMetadata(TableIdentifier("s"))
+      assert(source.provider == Some("parquet"))
+      val e = intercept[AnalysisException] {
+        sql("CREATE TABLE t LIKE s USING org.apache.spark.sql.hive.orc")
+      }.getMessage
+      assert(e.contains("Hive built-in ORC data source must be used with Hive support enabled"))
+    }
+  }
+
+  test("ALTER TABLE ALTER COLUMN with position is not supported") {
+    withTable("t") {
+      sql("CREATE TABLE t(i INT) USING parquet")
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE t ALTER COLUMN i FIRST")
+      }
+      assert(e.message.contains("ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables"))
+    }
+  }
+
+  test("SPARK-25403 refresh the table after inserting data") {
+    withTable("t") {
+      val catalog = spark.sessionState.catalog
+      val table = QualifiedTableName(catalog.getCurrentDatabase, "t")
+      sql("CREATE TABLE t (a INT) USING parquet")
+      sql("INSERT INTO TABLE t VALUES (1)")
+      assert(catalog.getCachedTable(table) === null, "Table relation should be invalidated.")
+      assert(spark.table("t").count() === 1)
+      assert(catalog.getCachedTable(table) !== null, "Table relation should be cached.")
+    }
+  }
+
+  test("SPARK-19784 refresh the table after altering the table location") {
+    withTable("t") {
+      withTempDir { dir =>
+        val catalog = spark.sessionState.catalog
+        val table = QualifiedTableName(catalog.getCurrentDatabase, "t")
+        val p1 = s"${dir.getCanonicalPath}/p1"
+        val p2 = s"${dir.getCanonicalPath}/p2"
+        sql(s"CREATE TABLE t (a INT) USING parquet LOCATION '$p1'")
+        sql("INSERT INTO TABLE t VALUES (1)")
+        assert(catalog.getCachedTable(table) === null, "Table relation should be invalidated.")
+        spark.range(5).toDF("a").write.parquet(p2)
+        spark.sql(s"ALTER TABLE t SET LOCATION '$p2'")
+        assert(catalog.getCachedTable(table) === null, "Table relation should be invalidated.")
+        assert(spark.table("t").count() === 5)
+        assert(catalog.getCachedTable(table) !== null, "Table relation should be cached.")
+      }
+    }
+  }
 }
 
 abstract class DDLSuite extends QueryTest with SQLTestUtils {
 
+  protected val reversedProperties = Seq(PROP_OWNER)
+
   protected def isUsingHiveMetastore: Boolean = {
     spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive"
   }
@@ -315,7 +373,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     try {
       sql(s"CREATE DATABASE $dbName")
       val db1 = catalog.getDatabaseMetadata(dbName)
-      assert(db1 == CatalogDatabase(
+      assert(db1.copy(properties = db1.properties -- reversedProperties) == CatalogDatabase(
         dbName,
         "",
         getDBPath(dbName),
@@ -338,7 +396,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
           sql(s"CREATE DATABASE $dbName Location '$path'")
           val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks)
           val expPath = makeQualifiedPath(tmpDir.toString)
-          assert(db1 == CatalogDatabase(
+          assert(db1.copy(properties = db1.properties -- reversedProperties) == CatalogDatabase(
             dbNameWithoutBackTicks,
             "",
             expPath,
@@ -361,7 +419,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         val dbNameWithoutBackTicks = cleanIdentifier(dbName)
         sql(s"CREATE DATABASE $dbName")
         val db1 = catalog.getDatabaseMetadata(dbNameWithoutBackTicks)
-        assert(db1 == CatalogDatabase(
+        assert(db1.copy(properties = db1.properties -- reversedProperties) == CatalogDatabase(
           dbNameWithoutBackTicks,
           "",
           getDBPath(dbNameWithoutBackTicks),
@@ -734,29 +792,55 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         sql(s"CREATE DATABASE $dbName")
 
         checkAnswer(
-          sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
+          sql(s"DESCRIBE DATABASE EXTENDED $dbName").toDF("key", "value")
+            .where("key not like 'Owner%'"), // filter for consistency with in-memory catalog
           Row("Database Name", dbNameWithoutBackTicks) ::
-            Row("Description", "") ::
+            Row("Comment", "") ::
             Row("Location", CatalogUtils.URIToString(location)) ::
             Row("Properties", "") :: Nil)
 
         sql(s"ALTER DATABASE $dbName SET DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
 
         checkAnswer(
-          sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
+          sql(s"DESCRIBE DATABASE EXTENDED $dbName").toDF("key", "value")
+            .where("key not like 'Owner%'"), // filter for consistency with in-memory catalog
           Row("Database Name", dbNameWithoutBackTicks) ::
-            Row("Description", "") ::
+            Row("Comment", "") ::
             Row("Location", CatalogUtils.URIToString(location)) ::
             Row("Properties", "((a,a), (b,b), (c,c))") :: Nil)
 
         sql(s"ALTER DATABASE $dbName SET DBPROPERTIES ('d'='d')")
 
         checkAnswer(
-          sql(s"DESCRIBE DATABASE EXTENDED $dbName"),
+          sql(s"DESCRIBE DATABASE EXTENDED $dbName").toDF("key", "value")
+            .where("key not like 'Owner%'"), // filter for consistency with in-memory catalog
           Row("Database Name", dbNameWithoutBackTicks) ::
-            Row("Description", "") ::
+            Row("Comment", "") ::
             Row("Location", CatalogUtils.URIToString(location)) ::
             Row("Properties", "((a,a), (b,b), (c,c), (d,d))") :: Nil)
+
+        withTempDir { tmpDir =>
+          if (isUsingHiveMetastore) {
+            val e1 = intercept[AnalysisException] {
+              sql(s"ALTER DATABASE $dbName SET LOCATION '${tmpDir.toURI}'")
+            }
+            assert(e1.getMessage.contains("does not support altering database location"))
+          } else {
+            sql(s"ALTER DATABASE $dbName SET LOCATION '${tmpDir.toURI}'")
+            val uriInCatalog = catalog.getDatabaseMetadata(dbNameWithoutBackTicks).locationUri
+            assert("file" === uriInCatalog.getScheme)
+            assert(new Path(tmpDir.getPath).toUri.getPath === uriInCatalog.getPath)
+          }
+
+          intercept[NoSuchDatabaseException] {
+            sql(s"ALTER DATABASE `db-not-exist` SET LOCATION '${tmpDir.toURI}'")
+          }
+
+          val e3 = intercept[IllegalArgumentException] {
+            sql(s"ALTER DATABASE $dbName SET LOCATION ''")
+          }
+          assert(e3.getMessage.contains("Can not create a Path from an empty string"))
+        }
       } finally {
         catalog.reset()
       }
@@ -1134,7 +1218,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  protected def testRecoverPartitions() {
+  protected def testRecoverPartitions(): Unit = {
     val catalog = spark.sessionState.catalog
     // table to alter does not exist
     intercept[AnalysisException] {
@@ -1372,7 +1456,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       // if (isUsingHiveMetastore) {
       //  assert(storageFormat.properties.get("path") === expected)
       // }
-      assert(storageFormat.locationUri.map(_.getPath) === Some(expected.getPath))
+      assert(storageFormat.locationUri ===
+        Some(makeQualifiedPath(CatalogUtils.URIToString(expected))))
     }
     // set table location
     sql("ALTER TABLE dbx.tab1 SET LOCATION '/path/to/your/lovely/heart'")
@@ -1386,7 +1471,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     verifyLocation(new URI("/swanky/steak/place"))
     // set table partition location without explicitly specifying database
     sql("ALTER TABLE tab1 PARTITION (a='1', b='2') SET LOCATION 'vienna'")
-    verifyLocation(new URI("vienna"), Some(partSpec))
+    val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tab1"))
+    val viennaPartPath = new Path(new Path(table. location), "vienna")
+    verifyLocation(CatalogUtils.stringToURI(viennaPartPath.toString), Some(partSpec))
     // table to alter does not exist
     intercept[AnalysisException] {
       sql("ALTER TABLE dbx.does_not_exist SET LOCATION '/mister/spark'")
@@ -1550,13 +1637,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       "PARTITION (a='2', b='6') LOCATION 'paris' PARTITION (a='3', b='7')")
     assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part3))
     assert(catalog.getPartition(tableIdent, part1).storage.locationUri.isDefined)
-    val partitionLocation = if (isUsingHiveMetastore) {
-      val tableLocation = catalog.getTableMetadata(tableIdent).storage.locationUri
-      assert(tableLocation.isDefined)
-      makeQualifiedPath(new Path(tableLocation.get.toString, "paris").toString)
-    } else {
-      new URI("paris")
-    }
+
+    val tableLocation = catalog.getTableMetadata(tableIdent).storage.locationUri
+    assert(tableLocation.isDefined)
+    val partitionLocation = makeQualifiedPath(
+      new Path(tableLocation.get.toString, "paris").toString)
 
     assert(catalog.getPartition(tableIdent, part2).storage.locationUri == Option(partitionLocation))
     assert(catalog.getPartition(tableIdent, part3).storage.locationUri.isDefined)
@@ -1701,7 +1786,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       column.map(_.metadata).getOrElse(Metadata.empty)
     }
     // Ensure that change column will preserve other metadata fields.
-    sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 col1 INT COMMENT 'this is col1'")
+    sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 TYPE INT")
+    sql("ALTER TABLE dbx.tab1 CHANGE COLUMN col1 COMMENT 'this is col1'")
     assert(getMetadata("col1").getString("key") == "value")
     assert(getMetadata("col1").getString("comment") == "this is col1")
   }
@@ -1929,6 +2015,79 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("SPARK-30312: truncate table - keep acl/permission") {
+    import testImplicits._
+    val ignorePermissionAcl = Seq(true, false)
+
+    ignorePermissionAcl.foreach { ignore =>
+      withSQLConf(
+        "fs.file.impl" -> classOf[FakeLocalFsFileSystem].getName,
+        "fs.file.impl.disable.cache" -> "true",
+        SQLConf.TRUNCATE_TABLE_IGNORE_PERMISSION_ACL.key -> ignore.toString) {
+        withTable("tab1") {
+          sql("CREATE TABLE tab1 (col INT) USING parquet")
+          sql("INSERT INTO tab1 SELECT 1")
+          checkAnswer(spark.table("tab1"), Row(1))
+
+          val tablePath = new Path(spark.sessionState.catalog
+            .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get)
+
+          val hadoopConf = spark.sessionState.newHadoopConf()
+          val fs = tablePath.getFileSystem(hadoopConf)
+          val fileStatus = fs.getFileStatus(tablePath);
+
+          fs.setPermission(tablePath, new FsPermission("777"))
+          assert(fileStatus.getPermission().toString() == "rwxrwxrwx")
+
+          // Set ACL to table path.
+          val customAcl = new java.util.ArrayList[AclEntry]()
+          customAcl.add(new AclEntry.Builder()
+            .setName("test")
+            .setType(AclEntryType.USER)
+            .setScope(AclEntryScope.ACCESS)
+            .setPermission(FsAction.READ).build())
+          fs.setAcl(tablePath, customAcl)
+          assert(fs.getAclStatus(tablePath).getEntries().get(0) == customAcl.get(0))
+
+          sql("TRUNCATE TABLE tab1")
+          assert(spark.table("tab1").collect().isEmpty)
+
+          val fileStatus2 = fs.getFileStatus(tablePath)
+          if (ignore) {
+            assert(fileStatus2.getPermission().toString() != "rwxrwxrwx")
+          } else {
+            assert(fileStatus2.getPermission().toString() == "rwxrwxrwx")
+          }
+          val aclEntries = fs.getAclStatus(tablePath).getEntries()
+          if (ignore) {
+            assert(aclEntries.size() == 0)
+          } else {
+            assert(aclEntries.size() == 4)
+            assert(aclEntries.get(0) == customAcl.get(0))
+
+            // Setting ACLs will also set user/group/other permissions
+            // as ACL entries.
+            val user = new AclEntry.Builder()
+              .setType(AclEntryType.USER)
+              .setScope(AclEntryScope.ACCESS)
+              .setPermission(FsAction.ALL).build()
+            val group = new AclEntry.Builder()
+              .setType(AclEntryType.GROUP)
+              .setScope(AclEntryScope.ACCESS)
+              .setPermission(FsAction.ALL).build()
+            val other = new AclEntry.Builder()
+              .setType(AclEntryType.OTHER)
+              .setScope(AclEntryScope.ACCESS)
+              .setPermission(FsAction.ALL).build()
+            assert(aclEntries.get(1) == user)
+            assert(aclEntries.get(2) == group)
+            assert(aclEntries.get(3) == other)
+          }
+        }
+      }
+    }
+  }
+
   test("create temporary view with mismatched schema") {
     withTable("tab1") {
       spark.range(10).write.saveAsTable("tab1")
@@ -2040,7 +2199,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
 
   test("show functions") {
     withUserDefinedFunction("add_one" -> true) {
-      val numFunctions = FunctionRegistry.functionSet.size.toLong
+      val numFunctions = FunctionRegistry.functionSet.size.toLong +
+        FunctionsCommand.virtualOperators.size.toLong
       assert(sql("show functions").count() === numFunctions)
       assert(sql("show system functions").count() === numFunctions)
       assert(sql("show all functions").count() === numFunctions)
@@ -2064,7 +2224,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
           val message = intercept[AnalysisException] {
             sql(s"SHOW COLUMNS IN $db.showcolumn FROM ${db.toUpperCase(Locale.ROOT)}")
           }.getMessage
-          assert(message.contains("SHOW COLUMNS with conflicting databases"))
+          assert(message.contains(
+            s"SHOW COLUMNS with conflicting databases: " +
+              s"'${db.toUpperCase(Locale.ROOT)}' != '$db'"))
         }
       }
     }
@@ -2138,7 +2300,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         spark.sessionState.catalog.refreshTable(TableIdentifier("t"))
 
         val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
-        assert(table1.location == newDir)
+        assert(table1.location == makeQualifiedPath(newDir.toString))
         assert(!newDirFile.exists)
 
         spark.sql("INSERT INTO TABLE t SELECT 'c', 1")
@@ -2501,6 +2663,13 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         assert(table.location.toString.startsWith("file:/"))
       }
 
+      withTempDir { dir =>
+        assert(!dir.getAbsolutePath.startsWith("file:/"))
+        spark.sql(s"ALTER TABLE t SET LOCATION '$dir'")
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        assert(table.location.toString.startsWith("file:/"))
+      }
+
       withTempDir { dir =>
         assert(!dir.getAbsolutePath.startsWith("file:/"))
         // The parser does not recognize the backslashes on Windows as they are.
@@ -2519,6 +2688,37 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("the qualified path of a partition is stored in the catalog") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+             |CREATE TABLE t(a STRING, b STRING)
+             |USING ${dataSource} PARTITIONED BY(b) LOCATION '$dir'
+           """.stripMargin)
+        spark.sql("INSERT INTO TABLE t PARTITION(b=1) SELECT 2")
+        val part = spark.sessionState.catalog.getPartition(TableIdentifier("t"), Map("b" -> "1"))
+        assert(part.storage.locationUri.contains(
+          makeQualifiedPath(new File(dir, "b=1").getAbsolutePath)))
+        assert(part.storage.locationUri.get.toString.startsWith("file:/"))
+      }
+      withTempDir { dir =>
+        spark.sql(s"ALTER TABLE t PARTITION(b=1) SET LOCATION '$dir'")
+
+        val part = spark.sessionState.catalog.getPartition(TableIdentifier("t"), Map("b" -> "1"))
+        assert(part.storage.locationUri.contains(makeQualifiedPath(dir.getAbsolutePath)))
+        assert(part.storage.locationUri.get.toString.startsWith("file:/"))
+      }
+
+      withTempDir { dir =>
+        spark.sql(s"ALTER TABLE t ADD PARTITION(b=2) LOCATION '$dir'")
+        val part = spark.sessionState.catalog.getPartition(TableIdentifier("t"), Map("b" -> "2"))
+        assert(part.storage.locationUri.contains(makeQualifiedPath(dir.getAbsolutePath)))
+        assert(part.storage.locationUri.get.toString.startsWith("file:/"))
+      }
+    }
+  }
+
   protected def testAddColumn(provider: String): Unit = {
     withTable("t1") {
       sql(s"CREATE TABLE t1 (c1 int) USING $provider")
@@ -2599,7 +2799,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       val e = intercept[AnalysisException] {
         sql("ALTER TABLE tmp_v ADD COLUMNS (c3 INT)")
       }
-      assert(e.message.contains("ALTER ADD COLUMNS does not support views"))
+      assert(e.message.contains("'tmp_v' is a view not a table"))
     }
   }
 
@@ -2755,4 +2955,74 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("Create Table LIKE USING provider") {
+    val catalog = spark.sessionState.catalog
+    withTable("s", "t1", "t2", "t3", "t4") {
+      sql("CREATE TABLE s(a INT, b INT) USING parquet")
+      val source = catalog.getTableMetadata(TableIdentifier("s"))
+      assert(source.provider == Some("parquet"))
+
+      sql("CREATE TABLE t1 LIKE s USING orc")
+      val table1 = catalog.getTableMetadata(TableIdentifier("t1"))
+      assert(table1.provider == Some("orc"))
+
+      sql("CREATE TABLE t2 LIKE s USING hive")
+      val table2 = catalog.getTableMetadata(TableIdentifier("t2"))
+      assert(table2.provider == Some("hive"))
+
+      val e1 = intercept[ClassNotFoundException] {
+        sql("CREATE TABLE t3 LIKE s USING unknown")
+      }.getMessage
+      assert(e1.contains("Failed to find data source"))
+
+      withGlobalTempView("src") {
+        val globalTempDB = spark.sharedState.globalTempViewManager.database
+        sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1 AS a, '2' AS b")
+        sql(s"CREATE TABLE t4 LIKE $globalTempDB.src USING parquet")
+        val table = catalog.getTableMetadata(TableIdentifier("t4"))
+        assert(table.provider == Some("parquet"))
+      }
+    }
+  }
+
+  test("Add a directory when spark.sql.legacy.addDirectory.recursive.enabled set to true") {
+    val directoryToAdd = Utils.createTempDir("/tmp/spark/addDirectory/")
+    val testFile = File.createTempFile("testFile", "1", directoryToAdd)
+    spark.sql(s"ADD FILE $directoryToAdd")
+    assert(new File(SparkFiles.get(s"${directoryToAdd.getName}/${testFile.getName}")).exists())
+  }
+
+  test("Add a directory when spark.sql.legacy.addDirectory.recursive.enabled not set to true") {
+    withTempDir { testDir =>
+      withSQLConf(SQLConf.LEGACY_ADD_DIRECTORY_USING_RECURSIVE.key -> "false") {
+        val msg = intercept[SparkException] {
+          spark.sql(s"ADD FILE $testDir")
+        }.getMessage
+        assert(msg.contains("is a directory and recursive is not turned on"))
+      }
+    }
+  }
+}
+
+object FakeLocalFsFileSystem {
+  var aclStatus = new AclStatus.Builder().build()
+}
+
+// A fake test local filesystem used to test ACL. It keeps a ACL status. If deletes
+// a path of this filesystem, it will clean up the ACL status. Note that for test purpose,
+// it has only one ACL status for all paths.
+class FakeLocalFsFileSystem extends RawLocalFileSystem {
+  import FakeLocalFsFileSystem._
+
+  override def delete(f: Path, recursive: Boolean): Boolean = {
+    aclStatus = new AclStatus.Builder().build()
+    super.delete(f, recursive)
+  }
+
+  override def getAclStatus(path: Path): AclStatus = aclStatus
+
+  override def setAcl(path: Path, aclSpec: java.util.List[AclEntry]): Unit = {
+    aclStatus = new AclStatus.Builder().addEntries(aclSpec).build()
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index bba1dc0f697a1..d439e5b1cd651 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -18,57 +18,102 @@
 package org.apache.spark.sql.execution.command
 
 import java.net.URI
-import java.util.Locale
+import java.util.{Collections, Locale}
 
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, when}
 import org.mockito.invocation.InvocationOnMock
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
-import org.apache.spark.sql.catalog.v2.{CatalogManager, CatalogNotFoundException, Identifier, TableCatalog}
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.AnalysisTest
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation}
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, StringLiteral}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, CreateV2Table, DropTable, LogicalPlan}
-import org.apache.spark.sql.connector.InMemoryTableCatalog
-import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceResolution}
-import org.apache.spark.sql.internal.SQLConf.DEFAULT_V2_CATALOG
-import org.apache.spark.sql.sources.v2.InMemoryTableProvider
-import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructType}
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, Assignment, CreateTableAsSelect, CreateV2Table, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, ShowTableProperties, SubqueryAlias, UpdateAction, UpdateTable}
+import org.apache.spark.sql.connector.FakeV2Provider
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, TableCapability, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.TableChange.{UpdateColumnComment, UpdateColumnType}
+import org.apache.spark.sql.execution.datasources.CreateTable
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{CharType, DoubleType, HIVE_TYPE_STRING, IntegerType, LongType, MetadataBuilder, StringType, StructField, StructType}
 
 class PlanResolutionSuite extends AnalysisTest {
   import CatalystSqlParser._
 
-  private val v2Format = classOf[InMemoryTableProvider].getName
+  private val v2Format = classOf[FakeV2Provider].getName
+
+  private val table: Table = {
+    val t = mock(classOf[Table])
+    when(t.schema()).thenReturn(new StructType().add("i", "int").add("s", "string"))
+    t
+  }
+
+  private val tableWithAcceptAnySchemaCapability: Table = {
+    val t = mock(classOf[Table])
+    when(t.schema()).thenReturn(new StructType().add("i", "int"))
+    when(t.capabilities()).thenReturn(Collections.singleton(TableCapability.ACCEPT_ANY_SCHEMA))
+    t
+  }
+
+  private val v1Table: V1Table = {
+    val t = mock(classOf[CatalogTable])
+    when(t.schema).thenReturn(new StructType().add("i", "int").add("s", "string"))
+    when(t.tableType).thenReturn(CatalogTableType.MANAGED)
+    V1Table(t)
+  }
 
   private val testCat: TableCatalog = {
-    val newCatalog = new InMemoryTableCatalog
-    newCatalog.initialize("testcat", CaseInsensitiveStringMap.empty())
+    val newCatalog = mock(classOf[TableCatalog])
+    when(newCatalog.loadTable(any())).thenAnswer((invocation: InvocationOnMock) => {
+      invocation.getArgument[Identifier](0).name match {
+        case "tab" => table
+        case "tab1" => table
+        case name => throw new NoSuchTableException(name)
+      }
+    })
+    when(newCatalog.name()).thenReturn("testcat")
     newCatalog
   }
 
-  private val v2SessionCatalog = {
-    val newCatalog = new InMemoryTableCatalog
-    newCatalog.initialize("session", CaseInsensitiveStringMap.empty())
+  private val v2SessionCatalog: TableCatalog = {
+    val newCatalog = mock(classOf[TableCatalog])
+    when(newCatalog.loadTable(any())).thenAnswer((invocation: InvocationOnMock) => {
+      invocation.getArgument[Identifier](0).name match {
+        case "v1Table" => v1Table
+        case "v1Table1" => v1Table
+        case "v2Table" => table
+        case "v2Table1" => table
+        case "v2TableWithAcceptAnySchemaCapability" => tableWithAcceptAnySchemaCapability
+        case name => throw new NoSuchTableException(name)
+      }
+    })
+    when(newCatalog.name()).thenReturn(CatalogManager.SESSION_CATALOG_NAME)
     newCatalog
   }
 
+  private val v1SessionCatalog: SessionCatalog = new SessionCatalog(
+    new InMemoryCatalog,
+    EmptyFunctionRegistry,
+    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
+  v1SessionCatalog.createTempView("v", LocalRelation(Nil), false)
+
   private val catalogManagerWithDefault = {
     val manager = mock(classOf[CatalogManager])
     when(manager.catalog(any())).thenAnswer((invocation: InvocationOnMock) => {
       invocation.getArgument[String](0) match {
         case "testcat" =>
           testCat
-        case "session" =>
+        case CatalogManager.SESSION_CATALOG_NAME =>
           v2SessionCatalog
         case name =>
           throw new CatalogNotFoundException(s"No such catalog: $name")
       }
     })
-    when(manager.defaultCatalog).thenReturn(Some(testCat))
-    when(manager.v2SessionCatalog).thenCallRealMethod()
+    when(manager.currentCatalog).thenReturn(testCat)
+    when(manager.currentNamespace).thenReturn(Array.empty[String])
+    when(manager.v1SessionCatalog).thenReturn(v1SessionCatalog)
     manager
   }
 
@@ -78,26 +123,37 @@ class PlanResolutionSuite extends AnalysisTest {
       invocation.getArgument[String](0) match {
         case "testcat" =>
           testCat
-        case "session" =>
-          v2SessionCatalog
         case name =>
           throw new CatalogNotFoundException(s"No such catalog: $name")
       }
     })
-    when(manager.defaultCatalog).thenReturn(None)
-    when(manager.v2SessionCatalog).thenCallRealMethod()
+    when(manager.currentCatalog).thenReturn(v2SessionCatalog)
+    when(manager.v1SessionCatalog).thenReturn(v1SessionCatalog)
     manager
   }
 
   def parseAndResolve(query: String, withDefault: Boolean = false): LogicalPlan = {
-    val newConf = conf.copy()
-    newConf.setConfString(DEFAULT_V2_CATALOG.key, "testcat")
     val catalogManager = if (withDefault) {
       catalogManagerWithDefault
     } else {
       catalogManagerWithoutDefault
     }
-    DataSourceResolution(newConf, catalogManager).apply(parsePlan(query))
+    val analyzer = new Analyzer(catalogManager, conf)
+    // TODO: run the analyzer directly.
+    val rules = Seq(
+      CTESubstitution,
+      ResolveInlineTables(conf),
+      analyzer.ResolveRelations,
+      new ResolveCatalogs(catalogManager),
+      new ResolveSessionCatalog(catalogManager, conf, _ == Seq("v")),
+      analyzer.ResolveTables,
+      analyzer.ResolveReferences,
+      analyzer.ResolveSubqueryColumnAliases,
+      analyzer.ResolveReferences,
+      analyzer.ResolveAlterTableChanges)
+    rules.foldLeft(parsePlan(query)) {
+      case (plan, rule) => rule.apply(plan)
+    }
   }
 
   private def parseResolveCompare(query: String, expected: LogicalPlan): Unit =
@@ -335,7 +391,7 @@ class PlanResolutionSuite extends AnalysisTest {
       assert(desc.schema.isEmpty) // will be populated later when the table is actually created
       assert(desc.comment.contains("This is the staging page view table"))
       assert(desc.viewText.isEmpty)
-      assert(desc.viewDefaultDatabase.isEmpty)
+      assert(desc.viewCatalogAndNamespace.isEmpty)
       assert(desc.viewQueryColumnNames.isEmpty)
       assert(desc.partitionColumnNames.isEmpty)
       assert(desc.provider.contains("parquet"))
@@ -443,7 +499,7 @@ class PlanResolutionSuite extends AnalysisTest {
 
     parseAndResolve(sql) match {
       case create: CreateV2Table =>
-        assert(create.catalog.name == "session")
+        assert(create.catalog.name == CatalogManager.SESSION_CATALOG_NAME)
         assert(create.tableName == Identifier.of(Array("mydb"), "page_view"))
         assert(create.tableSchema == new StructType()
             .add("id", LongType)
@@ -547,7 +603,7 @@ class PlanResolutionSuite extends AnalysisTest {
 
     parseAndResolve(sql) match {
       case ctas: CreateTableAsSelect =>
-        assert(ctas.catalog.name == "session")
+        assert(ctas.catalog.name == CatalogManager.SESSION_CATALOG_NAME)
         assert(ctas.tableName == Identifier.of(Array("mydb"), "page_view"))
         assert(ctas.properties == expectedProperties)
         assert(ctas.writeOptions.isEmpty)
@@ -647,51 +703,813 @@ class PlanResolutionSuite extends AnalysisTest {
   // ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
   // ALTER TABLE table_name UNSET TBLPROPERTIES [IF EXISTS] ('comment', 'key');
   test("alter table: alter table properties") {
-    val sql1_table = "ALTER TABLE table_name SET TBLPROPERTIES ('test' = 'test', " +
-        "'comment' = 'new_comment')"
-    val sql2_table = "ALTER TABLE table_name UNSET TBLPROPERTIES ('comment', 'test')"
-    val sql3_table = "ALTER TABLE table_name UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')"
-
-    val parsed1_table = parseAndResolve(sql1_table)
-    val parsed2_table = parseAndResolve(sql2_table)
-    val parsed3_table = parseAndResolve(sql3_table)
+    Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
+      case (tblName, useV1Command) =>
+        val sql1 = s"ALTER TABLE $tblName SET TBLPROPERTIES ('test' = 'test', " +
+          "'comment' = 'new_comment')"
+        val sql2 = s"ALTER TABLE $tblName UNSET TBLPROPERTIES ('comment', 'test')"
+        val sql3 = s"ALTER TABLE $tblName UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')"
+
+        val parsed1 = parseAndResolve(sql1)
+        val parsed2 = parseAndResolve(sql2)
+        val parsed3 = parseAndResolve(sql3)
+
+        val tableIdent = TableIdentifier(tblName, None)
+        if (useV1Command) {
+          val expected1 = AlterTableSetPropertiesCommand(
+            tableIdent, Map("test" -> "test", "comment" -> "new_comment"), isView = false)
+          val expected2 = AlterTableUnsetPropertiesCommand(
+            tableIdent, Seq("comment", "test"), ifExists = false, isView = false)
+          val expected3 = AlterTableUnsetPropertiesCommand(
+            tableIdent, Seq("comment", "test"), ifExists = true, isView = false)
+
+          comparePlans(parsed1, expected1)
+          comparePlans(parsed2, expected2)
+          comparePlans(parsed3, expected3)
+        } else {
+          parsed1 match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.setProperty("test", "test"),
+                TableChange.setProperty("comment", "new_comment")))
+            case _ => fail("expect AlterTable")
+          }
+
+          parsed2 match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.removeProperty("comment"),
+                TableChange.removeProperty("test")))
+            case _ => fail("expect AlterTable")
+          }
+
+          parsed3 match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.removeProperty("comment"),
+                TableChange.removeProperty("test")))
+            case _ => fail("expect AlterTable")
+          }
+        }
+    }
 
-    val tableIdent = TableIdentifier("table_name", None)
-    val expected1_table = AlterTableSetPropertiesCommand(
-      tableIdent, Map("test" -> "test", "comment" -> "new_comment"), isView = false)
-    val expected2_table = AlterTableUnsetPropertiesCommand(
-      tableIdent, Seq("comment", "test"), ifExists = false, isView = false)
-    val expected3_table = AlterTableUnsetPropertiesCommand(
-      tableIdent, Seq("comment", "test"), ifExists = true, isView = false)
+    val sql4 = "ALTER TABLE non_exist SET TBLPROPERTIES ('test' = 'test')"
+    val sql5 = "ALTER TABLE non_exist UNSET TBLPROPERTIES ('test')"
+    val parsed4 = parseAndResolve(sql4)
+    val parsed5 = parseAndResolve(sql5)
 
-    comparePlans(parsed1_table, expected1_table)
-    comparePlans(parsed2_table, expected2_table)
-    comparePlans(parsed3_table, expected3_table)
+    // For non-existing tables, we convert it to v2 command with `UnresolvedV2Table`
+    parsed4 match {
+      case AlterTable(_, _, _: UnresolvedV2Relation, _) => // OK
+      case _ => fail("Expect AlterTable, but got:\n" + parsed4.treeString)
+    }
+    parsed5 match {
+      case AlterTable(_, _, _: UnresolvedV2Relation, _) => // OK
+      case _ => fail("Expect AlterTable, but got:\n" + parsed5.treeString)
+    }
   }
 
   test("support for other types in TBLPROPERTIES") {
-    val sql =
-      """
-        |ALTER TABLE table_name
-        |SET TBLPROPERTIES ('a' = 1, 'b' = 0.1, 'c' = TRUE)
-      """.stripMargin
+    Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
+      case (tblName, useV1Command) =>
+        val sql =
+          s"""
+            |ALTER TABLE $tblName
+            |SET TBLPROPERTIES ('a' = 1, 'b' = 0.1, 'c' = TRUE)
+          """.stripMargin
+        val parsed = parseAndResolve(sql)
+        if (useV1Command) {
+          val expected = AlterTableSetPropertiesCommand(
+            TableIdentifier(tblName),
+            Map("a" -> "1", "b" -> "0.1", "c" -> "true"),
+            isView = false)
+
+          comparePlans(parsed, expected)
+        } else {
+          parsed match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.setProperty("a", "1"),
+                TableChange.setProperty("b", "0.1"),
+                TableChange.setProperty("c", "true")))
+            case _ => fail("Expect AlterTable, but got:\n" + parsed.treeString)
+          }
+        }
+    }
+  }
+
+  test("alter table: set location") {
+    Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
+      case (tblName, useV1Command) =>
+        val sql = s"ALTER TABLE $tblName SET LOCATION 'new location'"
+        val parsed = parseAndResolve(sql)
+        if (useV1Command) {
+          val expected = AlterTableSetLocationCommand(
+            TableIdentifier(tblName, None),
+            None,
+            "new location")
+          comparePlans(parsed, expected)
+        } else {
+          parsed match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(TableChange.setProperty("location", "new location")))
+            case _ => fail("Expect AlterTable, but got:\n" + parsed.treeString)
+          }
+        }
+    }
+  }
+
+  test("DESCRIBE relation") {
+    Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
+      case (tblName, useV1Command) =>
+        val sql1 = s"DESC TABLE $tblName"
+        val sql2 = s"DESC TABLE EXTENDED $tblName"
+        val parsed1 = parseAndResolve(sql1)
+        val parsed2 = parseAndResolve(sql2)
+        if (useV1Command) {
+          val expected1 = DescribeTableCommand(TableIdentifier(tblName, None), Map.empty, false)
+          val expected2 = DescribeTableCommand(TableIdentifier(tblName, None), Map.empty, true)
+
+          comparePlans(parsed1, expected1)
+          comparePlans(parsed2, expected2)
+        } else {
+          parsed1 match {
+            case DescribeRelation(_: ResolvedTable, _, isExtended) =>
+              assert(!isExtended)
+            case _ => fail("Expect DescribeTable, but got:\n" + parsed1.treeString)
+          }
+
+          parsed2 match {
+            case DescribeRelation(_: ResolvedTable, _, isExtended) =>
+              assert(isExtended)
+            case _ => fail("Expect DescribeTable, but got:\n" + parsed2.treeString)
+          }
+        }
+
+        val sql3 = s"DESC TABLE $tblName PARTITION(a=1)"
+        val parsed3 = parseAndResolve(sql3)
+        if (useV1Command) {
+          val expected3 = DescribeTableCommand(
+            TableIdentifier(tblName, None), Map("a" -> "1"), false)
+          comparePlans(parsed3, expected3)
+        } else {
+          parsed3 match {
+            case DescribeRelation(_: ResolvedTable, partitionSpec, isExtended) =>
+              assert(!isExtended)
+              assert(partitionSpec == Map("a" -> "1"))
+            case _ => fail("Expect DescribeTable, but got:\n" + parsed2.treeString)
+          }
+        }
+    }
+
+    // use v1 command to describe views.
+    val sql4 = "DESC TABLE v"
+    val parsed4 = parseAndResolve(sql4)
+    assert(parsed4.isInstanceOf[DescribeTableCommand])
+  }
+
+  test("DELETE FROM") {
+    Seq("v2Table", "testcat.tab").foreach { tblName =>
+      val sql1 = s"DELETE FROM $tblName"
+      val sql2 = s"DELETE FROM $tblName where name='Robert'"
+      val sql3 = s"DELETE FROM $tblName AS t where t.name='Robert'"
+      val sql4 =
+        s"""
+           |WITH s(name) AS (SELECT 'Robert')
+           |DELETE FROM $tblName AS t WHERE t.name IN (SELECT s.name FROM s)
+         """.stripMargin
+
+      val parsed1 = parseAndResolve(sql1)
+      val parsed2 = parseAndResolve(sql2)
+      val parsed3 = parseAndResolve(sql3)
+      val parsed4 = parseAndResolve(sql4)
+
+      parsed1 match {
+        case DeleteFromTable(AsDataSourceV2Relation(_), None) =>
+        case _ => fail("Expect DeleteFromTable, but got:\n" + parsed1.treeString)
+      }
+
+      parsed2 match {
+        case DeleteFromTable(
+          AsDataSourceV2Relation(_),
+          Some(EqualTo(name: UnresolvedAttribute, StringLiteral("Robert")))) =>
+          assert(name.name == "name")
+        case _ => fail("Expect DeleteFromTable, but got:\n" + parsed2.treeString)
+      }
+
+      parsed3 match {
+        case DeleteFromTable(
+          SubqueryAlias(AliasIdentifier("t", Seq()), AsDataSourceV2Relation(_)),
+          Some(EqualTo(name: UnresolvedAttribute, StringLiteral("Robert")))) =>
+          assert(name.name == "t.name")
+        case _ => fail("Expect DeleteFromTable, but got:\n" + parsed3.treeString)
+      }
+
+      parsed4 match {
+        case DeleteFromTable(
+            SubqueryAlias(AliasIdentifier("t", Seq()), AsDataSourceV2Relation(_)),
+            Some(InSubquery(values, query))) =>
+          assert(values.size == 1 && values.head.isInstanceOf[UnresolvedAttribute])
+          assert(values.head.asInstanceOf[UnresolvedAttribute].name == "t.name")
+          query match {
+            case ListQuery(Project(projects, SubqueryAlias(AliasIdentifier("s", Seq()),
+                UnresolvedSubqueryColumnAliases(outputColumnNames, Project(_, _: OneRowRelation)))),
+                _, _, _) =>
+              assert(projects.size == 1 && projects.head.name == "s.name")
+              assert(outputColumnNames.size == 1 && outputColumnNames.head == "name")
+            case o => fail("Unexpected subquery: \n" + o.treeString)
+          }
+
+        case _ => fail("Expect DeleteFromTable, bug got:\n" + parsed4.treeString)
+      }
+    }
+  }
+
+  test("UPDATE TABLE") {
+    Seq("v2Table", "testcat.tab").foreach { tblName =>
+      val sql1 = s"UPDATE $tblName SET name='Robert', age=32"
+      val sql2 = s"UPDATE $tblName AS t SET name='Robert', age=32"
+      val sql3 = s"UPDATE $tblName AS t SET name='Robert', age=32 WHERE p=1"
+      val sql4 =
+        s"""
+           |WITH s(name) AS (SELECT 'Robert')
+           |UPDATE $tblName AS t
+           |SET t.age=32
+           |WHERE t.name IN (SELECT s.name FROM s)
+         """.stripMargin
+
+      val parsed1 = parseAndResolve(sql1)
+      val parsed2 = parseAndResolve(sql2)
+      val parsed3 = parseAndResolve(sql3)
+      val parsed4 = parseAndResolve(sql4)
+
+      parsed1 match {
+        case UpdateTable(
+            AsDataSourceV2Relation(_),
+            Seq(Assignment(name: UnresolvedAttribute, StringLiteral("Robert")),
+              Assignment(age: UnresolvedAttribute, IntegerLiteral(32))),
+            None) =>
+          assert(name.name == "name")
+          assert(age.name == "age")
+
+        case _ => fail("Expect UpdateTable, but got:\n" + parsed1.treeString)
+      }
+
+      parsed2 match {
+        case UpdateTable(
+            SubqueryAlias(AliasIdentifier("t", Seq()), AsDataSourceV2Relation(_)),
+            Seq(Assignment(name: UnresolvedAttribute, StringLiteral("Robert")),
+              Assignment(age: UnresolvedAttribute, IntegerLiteral(32))),
+            None) =>
+          assert(name.name == "name")
+          assert(age.name == "age")
+
+        case _ => fail("Expect UpdateTable, but got:\n" + parsed2.treeString)
+      }
+
+      parsed3 match {
+        case UpdateTable(
+            SubqueryAlias(AliasIdentifier("t", Seq()), AsDataSourceV2Relation(_)),
+            Seq(Assignment(name: UnresolvedAttribute, StringLiteral("Robert")),
+              Assignment(age: UnresolvedAttribute, IntegerLiteral(32))),
+            Some(EqualTo(p: UnresolvedAttribute, IntegerLiteral(1)))) =>
+          assert(name.name == "name")
+          assert(age.name == "age")
+          assert(p.name == "p")
+
+        case _ => fail("Expect UpdateTable, but got:\n" + parsed3.treeString)
+      }
+
+      parsed4 match {
+        case UpdateTable(SubqueryAlias(AliasIdentifier("t", Seq()), AsDataSourceV2Relation(_)),
+          Seq(Assignment(key: UnresolvedAttribute, IntegerLiteral(32))),
+          Some(InSubquery(values, query))) =>
+          assert(key.name == "t.age")
+          assert(values.size == 1 && values.head.isInstanceOf[UnresolvedAttribute])
+          assert(values.head.asInstanceOf[UnresolvedAttribute].name == "t.name")
+          query match {
+            case ListQuery(Project(projects, SubqueryAlias(AliasIdentifier("s", Seq()),
+                UnresolvedSubqueryColumnAliases(outputColumnNames, Project(_, _: OneRowRelation)))),
+                _, _, _) =>
+              assert(projects.size == 1 && projects.head.name == "s.name")
+              assert(outputColumnNames.size == 1 && outputColumnNames.head == "name")
+            case o => fail("Unexpected subquery: \n" + o.treeString)
+          }
+
+        case _ => fail("Expect UpdateTable, but got:\n" + parsed4.treeString)
+      }
+    }
+
+    val sql = "UPDATE non_existing SET id=1"
     val parsed = parseAndResolve(sql)
-    val expected = AlterTableSetPropertiesCommand(
-      TableIdentifier("table_name"),
-      Map("a" -> "1", "b" -> "0.1", "c" -> "true"),
-      isView = false)
+    parsed match {
+      case u: UpdateTable =>
+        assert(u.table.isInstanceOf[UnresolvedRelation])
+      case _ => fail("Expect UpdateTable, but got:\n" + parsed.treeString)
+    }
+  }
 
-    comparePlans(parsed, expected)
+  test("alter table: alter column") {
+    Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
+      case (tblName, useV1Command) =>
+        val sql1 = s"ALTER TABLE $tblName ALTER COLUMN i TYPE bigint"
+        val sql2 = s"ALTER TABLE $tblName ALTER COLUMN i COMMENT 'new comment'"
+
+        val parsed1 = parseAndResolve(sql1)
+        val parsed2 = parseAndResolve(sql2)
+
+        val tableIdent = TableIdentifier(tblName, None)
+        if (useV1Command) {
+          val oldColumn = StructField("i", IntegerType)
+          val newColumn = StructField("i", LongType)
+          val expected1 = AlterTableChangeColumnCommand(
+            tableIdent, "i", newColumn)
+          val expected2 = AlterTableChangeColumnCommand(
+            tableIdent, "i", oldColumn.withComment("new comment"))
+
+          comparePlans(parsed1, expected1)
+          comparePlans(parsed2, expected2)
+
+          val sql3 = s"ALTER TABLE $tblName ALTER COLUMN j COMMENT 'new comment'"
+          val e1 = intercept[AnalysisException] {
+            parseAndResolve(sql3)
+          }
+          assert(e1.getMessage.contains(
+            "ALTER COLUMN cannot find column j in v1 table. Available: i, s"))
+
+          val sql4 = s"ALTER TABLE $tblName ALTER COLUMN a.b.c TYPE bigint"
+          val e2 = intercept[AnalysisException] {
+            parseAndResolve(sql4)
+          }
+          assert(e2.getMessage.contains(
+            "ALTER COLUMN with qualified column is only supported with v2 tables"))
+
+          val sql5 = s"ALTER TABLE $tblName ALTER COLUMN i TYPE char(1)"
+          val builder = new MetadataBuilder
+          builder.putString(HIVE_TYPE_STRING, CharType(1).catalogString)
+          val newColumnWithCleanedType = StructField("i", StringType, true, builder.build())
+          val expected5 = AlterTableChangeColumnCommand(
+            tableIdent, "i", newColumnWithCleanedType)
+          val parsed5 = parseAndResolve(sql5)
+          comparePlans(parsed5, expected5)
+        } else {
+          parsed1 match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.updateColumnType(Array("i"), LongType)))
+            case _ => fail("expect AlterTable")
+          }
+
+          parsed2 match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.updateColumnComment(Array("i"), "new comment")))
+            case _ => fail("expect AlterTable")
+          }
+        }
+    }
   }
 
-  test("alter table: set location") {
-    val sql1 = "ALTER TABLE table_name SET LOCATION 'new location'"
-    val parsed1 = parseAndResolve(sql1)
-    val tableIdent = TableIdentifier("table_name", None)
-    val expected1 = AlterTableSetLocationCommand(
-      tableIdent,
-      None,
-      "new location")
-    comparePlans(parsed1, expected1)
+  test("alter table: alter column action is not specified") {
+    val e = intercept[AnalysisException] {
+      parseAndResolve("ALTER TABLE v1Table ALTER COLUMN i")
+    }
+    assert(e.getMessage.contains(
+      "ALTER TABLE table ALTER COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER"))
+  }
+
+  test("alter table: alter column case sensitivity for v1 table") {
+    val tblName = "v1Table"
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val sql = s"ALTER TABLE $tblName ALTER COLUMN I COMMENT 'new comment'"
+        if (caseSensitive) {
+          val e = intercept[AnalysisException] {
+            parseAndResolve(sql)
+          }
+          assert(e.getMessage.contains(
+            "ALTER COLUMN cannot find column I in v1 table. Available: i, s"))
+        } else {
+          val actual = parseAndResolve(sql)
+          val expected = AlterTableChangeColumnCommand(
+            TableIdentifier(tblName, None),
+            "I",
+            StructField("I", IntegerType).withComment("new comment"))
+          comparePlans(actual, expected)
+        }
+      }
+    }
+  }
+
+  test("alter table: hive style change column") {
+    Seq("v2Table", "testcat.tab").foreach { tblName =>
+      parseAndResolve(s"ALTER TABLE $tblName CHANGE COLUMN i i int COMMENT 'an index'") match {
+        case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+          assert(changes.length == 1, "Should only have a comment change")
+          assert(changes.head.isInstanceOf[UpdateColumnComment],
+            s"Expected only a UpdateColumnComment change but got: ${changes.head}")
+        case _ => fail("expect AlterTable")
+      }
+
+      parseAndResolve(s"ALTER TABLE $tblName CHANGE COLUMN i i long COMMENT 'an index'") match {
+        case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+          assert(changes.length == 2, "Should have a comment change and type change")
+          assert(changes.exists(_.isInstanceOf[UpdateColumnComment]),
+            s"Expected UpdateColumnComment change but got: ${changes}")
+          assert(changes.exists(_.isInstanceOf[UpdateColumnType]),
+            s"Expected UpdateColumnType change but got: ${changes}")
+        case _ => fail("expect AlterTable")
+      }
+    }
+  }
+
+  val DSV2ResolutionTests = {
+    val v2SessionCatalogTable = s"${CatalogManager.SESSION_CATALOG_NAME}.v2Table"
+    Seq(
+      ("ALTER TABLE testcat.tab ALTER COLUMN i TYPE bigint", false),
+      ("ALTER TABLE tab ALTER COLUMN i TYPE bigint", false),
+      (s"ALTER TABLE $v2SessionCatalogTable ALTER COLUMN i TYPE bigint", true),
+      ("INSERT INTO TABLE tab VALUES (1)", false),
+      ("INSERT INTO TABLE testcat.tab VALUES (1)", false),
+      (s"INSERT INTO TABLE $v2SessionCatalogTable VALUES (1)", true),
+      ("DESC TABLE tab", false),
+      ("DESC TABLE testcat.tab", false),
+      (s"DESC TABLE $v2SessionCatalogTable", true),
+      ("SHOW TBLPROPERTIES tab", false),
+      ("SHOW TBLPROPERTIES testcat.tab", false),
+      (s"SHOW TBLPROPERTIES $v2SessionCatalogTable", true),
+      ("SELECT * from tab", false),
+      ("SELECT * from testcat.tab", false),
+      (s"SELECT * from ${CatalogManager.SESSION_CATALOG_NAME}.v2Table", true)
+    )
+  }
+
+  DSV2ResolutionTests.foreach { case (sql, isSessionCatlog) =>
+    test(s"Data source V2 relation resolution '$sql'") {
+      val parsed = parseAndResolve(sql, withDefault = true)
+      val catlogIdent = if (isSessionCatlog) v2SessionCatalog else testCat
+      val tableIdent = if (isSessionCatlog) "v2Table" else "tab"
+      parsed match {
+        case AlterTable(_, _, r: DataSourceV2Relation, _) =>
+          assert(r.catalog.exists(_ == catlogIdent))
+          assert(r.identifier.exists(_.name() == tableIdent))
+        case Project(_, AsDataSourceV2Relation(r)) =>
+          assert(r.catalog.exists(_ == catlogIdent))
+          assert(r.identifier.exists(_.name() == tableIdent))
+        case InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _) =>
+          assert(r.catalog.exists(_ == catlogIdent))
+          assert(r.identifier.exists(_.name() == tableIdent))
+        case DescribeRelation(r: ResolvedTable, _, _) =>
+          assert(r.catalog == catlogIdent)
+          assert(r.identifier.name() == tableIdent)
+        case ShowTableProperties(r: ResolvedTable, _) =>
+          assert(r.catalog == catlogIdent)
+          assert(r.identifier.name() == tableIdent)
+        case ShowTablePropertiesCommand(t: TableIdentifier, _) =>
+          assert(t.identifier == tableIdent)
+      }
+    }
+  }
+
+  test("MERGE INTO TABLE") {
+    def checkResolution(
+        target: LogicalPlan,
+        source: LogicalPlan,
+        mergeCondition: Expression,
+        deleteCondAttr: Option[AttributeReference],
+        updateCondAttr: Option[AttributeReference],
+        insertCondAttr: Option[AttributeReference],
+        updateAssigns: Seq[Assignment],
+        insertAssigns: Seq[Assignment],
+        starInUpdate: Boolean = false): Unit = {
+      val ti = target.output.find(_.name == "i").get.asInstanceOf[AttributeReference]
+      val ts = target.output.find(_.name == "s").get.asInstanceOf[AttributeReference]
+      val si = source.output.find(_.name == "i").get.asInstanceOf[AttributeReference]
+      val ss = source.output.find(_.name == "s").get.asInstanceOf[AttributeReference]
+
+      mergeCondition match {
+        case EqualTo(l: AttributeReference, r: AttributeReference) =>
+          assert(l.sameRef(ti) && r.sameRef(si))
+        case other => fail("unexpected merge condition " + other)
+      }
+
+      deleteCondAttr.foreach(a => assert(a.sameRef(ts)))
+      updateCondAttr.foreach(a => assert(a.sameRef(ts)))
+      insertCondAttr.foreach(a => assert(a.sameRef(ss)))
+
+      if (starInUpdate) {
+        assert(updateAssigns.size == 2)
+        assert(updateAssigns(0).key.asInstanceOf[AttributeReference].sameRef(ti))
+        assert(updateAssigns(0).value.asInstanceOf[AttributeReference].sameRef(si))
+        assert(updateAssigns(1).key.asInstanceOf[AttributeReference].sameRef(ts))
+        assert(updateAssigns(1).value.asInstanceOf[AttributeReference].sameRef(ss))
+      } else {
+        assert(updateAssigns.size == 1)
+        assert(updateAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ts))
+        assert(updateAssigns.head.value.asInstanceOf[AttributeReference].sameRef(ss))
+      }
+      assert(insertAssigns.size == 2)
+      assert(insertAssigns(0).key.asInstanceOf[AttributeReference].sameRef(ti))
+      assert(insertAssigns(0).value.asInstanceOf[AttributeReference].sameRef(si))
+      assert(insertAssigns(1).key.asInstanceOf[AttributeReference].sameRef(ts))
+      assert(insertAssigns(1).value.asInstanceOf[AttributeReference].sameRef(ss))
+    }
+
+    Seq(("v2Table", "v2Table1"), ("testcat.tab", "testcat.tab1")).foreach {
+      case(target, source) =>
+        // basic
+        val sql1 =
+          s"""
+             |MERGE INTO $target AS target
+             |USING $source AS source
+             |ON target.i = source.i
+             |WHEN MATCHED AND (target.s='delete') THEN DELETE
+             |WHEN MATCHED AND (target.s='update') THEN UPDATE SET target.s = source.s
+             |WHEN NOT MATCHED AND (source.s='insert')
+             |  THEN INSERT (target.i, target.s) values (source.i, source.s)
+           """.stripMargin
+        parseAndResolve(sql1) match {
+          case MergeIntoTable(
+              SubqueryAlias(AliasIdentifier("target", Seq()), AsDataSourceV2Relation(target)),
+              SubqueryAlias(AliasIdentifier("source", Seq()), AsDataSourceV2Relation(source)),
+              mergeCondition,
+              Seq(DeleteAction(Some(EqualTo(dl: AttributeReference, StringLiteral("delete")))),
+                UpdateAction(Some(EqualTo(ul: AttributeReference, StringLiteral("update"))),
+                  updateAssigns)),
+              Seq(InsertAction(Some(EqualTo(il: AttributeReference, StringLiteral("insert"))),
+                insertAssigns))) =>
+            checkResolution(target, source, mergeCondition, Some(dl), Some(ul), Some(il),
+              updateAssigns, insertAssigns)
+
+          case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
+        }
+
+        // star
+        val sql2 =
+          s"""
+             |MERGE INTO $target AS target
+             |USING $source AS source
+             |ON target.i = source.i
+             |WHEN MATCHED AND (target.s='delete') THEN DELETE
+             |WHEN MATCHED AND (target.s='update') THEN UPDATE SET *
+             |WHEN NOT MATCHED AND (source.s='insert') THEN INSERT *
+           """.stripMargin
+        parseAndResolve(sql2) match {
+          case MergeIntoTable(
+              SubqueryAlias(AliasIdentifier("target", Seq()), AsDataSourceV2Relation(target)),
+              SubqueryAlias(AliasIdentifier("source", Seq()), AsDataSourceV2Relation(source)),
+              mergeCondition,
+              Seq(DeleteAction(Some(EqualTo(dl: AttributeReference, StringLiteral("delete")))),
+                UpdateAction(Some(EqualTo(ul: AttributeReference,
+                  StringLiteral("update"))), updateAssigns)),
+              Seq(InsertAction(Some(EqualTo(il: AttributeReference, StringLiteral("insert"))),
+                insertAssigns))) =>
+            checkResolution(target, source, mergeCondition, Some(dl), Some(ul), Some(il),
+              updateAssigns, insertAssigns, starInUpdate = true)
+
+          case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
+        }
+
+        // no additional conditions
+        val sql3 =
+          s"""
+             |MERGE INTO $target AS target
+             |USING $source AS source
+             |ON target.i = source.i
+             |WHEN MATCHED THEN DELETE
+             |WHEN MATCHED THEN UPDATE SET target.s = source.s
+             |WHEN NOT MATCHED THEN INSERT (target.i, target.s) values (source.i, source.s)
+           """.stripMargin
+        parseAndResolve(sql3) match {
+          case MergeIntoTable(
+              SubqueryAlias(AliasIdentifier("target", Seq()), AsDataSourceV2Relation(target)),
+              SubqueryAlias(AliasIdentifier("source", Seq()), AsDataSourceV2Relation(source)),
+              mergeCondition,
+              Seq(DeleteAction(None), UpdateAction(None, updateAssigns)),
+              Seq(InsertAction(None, insertAssigns))) =>
+            checkResolution(target, source, mergeCondition, None, None, None,
+              updateAssigns, insertAssigns)
+
+          case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
+        }
+
+        // using subquery
+        val sql4 =
+          s"""
+             |MERGE INTO $target AS target
+             |USING (SELECT * FROM $source) AS source
+             |ON target.i = source.i
+             |WHEN MATCHED AND (target.s='delete') THEN DELETE
+             |WHEN MATCHED AND (target.s='update') THEN UPDATE SET target.s = source.s
+             |WHEN NOT MATCHED AND (source.s='insert')
+             |  THEN INSERT (target.i, target.s) values (source.i, source.s)
+           """.stripMargin
+        parseAndResolve(sql4) match {
+          case MergeIntoTable(
+              SubqueryAlias(AliasIdentifier("target", Seq()), AsDataSourceV2Relation(target)),
+              SubqueryAlias(AliasIdentifier("source", Seq()), source: Project),
+              mergeCondition,
+              Seq(DeleteAction(Some(EqualTo(dl: AttributeReference, StringLiteral("delete")))),
+                UpdateAction(Some(EqualTo(ul: AttributeReference, StringLiteral("update"))),
+                  updateAssigns)),
+              Seq(InsertAction(Some(EqualTo(il: AttributeReference, StringLiteral("insert"))),
+                insertAssigns))) =>
+            checkResolution(target, source, mergeCondition, Some(dl), Some(ul), Some(il),
+              updateAssigns, insertAssigns)
+
+          case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
+        }
+
+        // cte
+        val sql5 =
+          s"""
+             |WITH source(i, s) AS
+             | (SELECT * FROM $source)
+             |MERGE INTO $target AS target
+             |USING source
+             |ON target.i = source.i
+             |WHEN MATCHED AND (target.s='delete') THEN DELETE
+             |WHEN MATCHED AND (target.s='update') THEN UPDATE SET target.s = source.s
+             |WHEN NOT MATCHED AND (source.s='insert')
+             |THEN INSERT (target.i, target.s) values (source.i, source.s)
+           """.stripMargin
+        parseAndResolve(sql5) match {
+          case MergeIntoTable(
+              SubqueryAlias(AliasIdentifier("target", Seq()), AsDataSourceV2Relation(target)),
+              SubqueryAlias(AliasIdentifier("source", Seq()), source: Project),
+              mergeCondition,
+              Seq(DeleteAction(Some(EqualTo(dl: AttributeReference, StringLiteral("delete")))),
+                UpdateAction(Some(EqualTo(ul: AttributeReference, StringLiteral("update"))),
+                  updateAssigns)),
+              Seq(InsertAction(Some(EqualTo(il: AttributeReference, StringLiteral("insert"))),
+                insertAssigns))) =>
+            assert(source.output.map(_.name) == Seq("i", "s"))
+            checkResolution(target, source, mergeCondition, Some(dl), Some(ul), Some(il),
+              updateAssigns, insertAssigns)
+
+          case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
+        }
+    }
+
+    // no aliases
+    Seq(("v2Table", "v2Table1"),
+      ("testcat.tab", "testcat.tab1")).foreach { pair =>
+
+      val target = pair._1
+      val source = pair._2
+
+      val sql1 =
+        s"""
+           |MERGE INTO $target
+           |USING $source
+           |ON 1 = 1
+           |WHEN MATCHED THEN DELETE
+           |WHEN MATCHED THEN UPDATE SET s = 1
+           |WHEN NOT MATCHED AND (s = 'a') THEN INSERT (i) values (i)
+         """.stripMargin
+
+      parseAndResolve(sql1) match {
+        case MergeIntoTable(
+            AsDataSourceV2Relation(target),
+            AsDataSourceV2Relation(source),
+            _,
+            Seq(DeleteAction(None), UpdateAction(None, updateAssigns)),
+            Seq(InsertAction(
+              Some(EqualTo(il: AttributeReference, StringLiteral("a"))),
+              insertAssigns))) =>
+          val ti = target.output.find(_.name == "i").get
+          val ts = target.output.find(_.name == "s").get
+          val si = source.output.find(_.name == "i").get
+          val ss = source.output.find(_.name == "s").get
+
+          // INSERT condition is resolved with source table only, so column `s` is not ambiguous.
+          assert(il.sameRef(ss))
+          assert(updateAssigns.size == 1)
+          // UPDATE key is resolved with target table only, so column `s` is not ambiguous.
+          assert(updateAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ts))
+          assert(insertAssigns.size == 1)
+          // INSERT key is resolved with target table only, so column `i` is not ambiguous.
+          assert(insertAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ti))
+          // INSERT value is resolved with source table only, so column `i` is not ambiguous.
+          assert(insertAssigns.head.value.asInstanceOf[AttributeReference].sameRef(si))
+
+        case p => fail("Expect MergeIntoTable, but got:\n" + p.treeString)
+      }
+
+      val sql2 =
+        s"""
+           |MERGE INTO $target
+           |USING $source
+           |ON i = 1
+           |WHEN MATCHED THEN DELETE
+         """.stripMargin
+      // merge condition is resolved with both target and source tables, and we can't
+      // resolve column `i` as it's ambiguous.
+      val e2 = intercept[AnalysisException](parseAndResolve(sql2))
+      assert(e2.message.contains("Reference 'i' is ambiguous"))
+
+      val sql3 =
+        s"""
+           |MERGE INTO $target
+           |USING $source
+           |ON 1 = 1
+           |WHEN MATCHED AND (s='delete') THEN DELETE
+         """.stripMargin
+      // delete condition is resolved with both target and source tables, and we can't
+      // resolve column `s` as it's ambiguous.
+      val e3 = intercept[AnalysisException](parseAndResolve(sql3))
+      assert(e3.message.contains("Reference 's' is ambiguous"))
+
+      val sql4 =
+        s"""
+           |MERGE INTO $target
+           |USING $source
+           |ON 1 = 1
+           |WHEN MATCHED AND (s = 'a') THEN UPDATE SET i = 1
+         """.stripMargin
+      // update condition is resolved with both target and source tables, and we can't
+      // resolve column `s` as it's ambiguous.
+      val e4 = intercept[AnalysisException](parseAndResolve(sql4))
+      assert(e4.message.contains("Reference 's' is ambiguous"))
+
+      val sql5 =
+        s"""
+           |MERGE INTO $target
+           |USING $source
+           |ON 1 = 1
+           |WHEN MATCHED THEN UPDATE SET s = s
+         """.stripMargin
+      // update value is resolved with both target and source tables, and we can't
+      // resolve column `s` as it's ambiguous.
+      val e5 = intercept[AnalysisException](parseAndResolve(sql5))
+      assert(e5.message.contains("Reference 's' is ambiguous"))
+    }
+
+    val sql6 =
+      s"""
+         |MERGE INTO non_exist_target
+         |USING non_exist_source
+         |ON target.i = source.i
+         |WHEN MATCHED THEN DELETE
+         |WHEN MATCHED THEN UPDATE SET *
+         |WHEN NOT MATCHED THEN INSERT *
+       """.stripMargin
+    val parsed = parseAndResolve(sql6)
+    parsed match {
+      case u: MergeIntoTable =>
+        assert(u.targetTable.isInstanceOf[UnresolvedRelation])
+        assert(u.sourceTable.isInstanceOf[UnresolvedRelation])
+      case _ => fail("Expect MergeIntoTable, but got:\n" + parsed.treeString)
+    }
+  }
+
+  test("MERGE INTO TABLE - skip resolution on v2 tables that accept any schema") {
+    val sql =
+      s"""
+         |MERGE INTO v2TableWithAcceptAnySchemaCapability AS target
+         |USING v2Table AS source
+         |ON target.i = source.i
+         |WHEN MATCHED AND (target.s='delete') THEN DELETE
+         |WHEN MATCHED AND (target.s='update') THEN UPDATE SET target.s = source.s
+         |WHEN NOT MATCHED AND (target.s='insert')
+         |  THEN INSERT (target.i, target.s) values (source.i, source.s)
+       """.stripMargin
+
+    parseAndResolve(sql) match {
+      case MergeIntoTable(
+          SubqueryAlias(AliasIdentifier("target", Seq()), AsDataSourceV2Relation(_)),
+          SubqueryAlias(AliasIdentifier("source", Seq()), AsDataSourceV2Relation(_)),
+          EqualTo(l: UnresolvedAttribute, r: UnresolvedAttribute),
+          Seq(
+            DeleteAction(Some(EqualTo(dl: UnresolvedAttribute, StringLiteral("delete")))),
+            UpdateAction(
+              Some(EqualTo(ul: UnresolvedAttribute, StringLiteral("update"))),
+              updateAssigns)),
+          Seq(
+            InsertAction(
+              Some(EqualTo(il: UnresolvedAttribute, StringLiteral("insert"))),
+              insertAssigns))) =>
+        assert(l.name == "target.i" && r.name == "source.i")
+        assert(dl.name == "target.s")
+        assert(ul.name == "target.s")
+        assert(il.name == "target.s")
+        assert(updateAssigns.size == 1)
+        assert(updateAssigns.head.key.asInstanceOf[UnresolvedAttribute].name == "target.s")
+        assert(updateAssigns.head.value.asInstanceOf[UnresolvedAttribute].name == "source.s")
+        assert(insertAssigns.size == 2)
+        assert(insertAssigns.head.key.asInstanceOf[UnresolvedAttribute].name == "target.i")
+        assert(insertAssigns.head.value.asInstanceOf[UnresolvedAttribute].name == "source.i")
+
+      case l => fail("Expected unresolved MergeIntoTable, but got:\n" + l.treeString)
+    }
   }
+  // TODO: add tests for more commands.
 }
+
+object AsDataSourceV2Relation {
+  def unapply(plan: LogicalPlan): Option[DataSourceV2Relation] = plan match {
+    case SubqueryAlias(_, r: DataSourceV2Relation) => Some(r)
+    case _ => None
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
index a44a94aaa4f94..b76db70494cf8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
@@ -222,7 +222,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
   test("SPARK-26865 DataSourceV2Strategy should push normalized filters") {
     val attrInt = 'cint.int
     assertResult(Seq(IsNotNull(attrInt))) {
-      DataSourceStrategy.normalizeFilters(Seq(IsNotNull(attrInt.withName("CiNt"))), Seq(attrInt))
+      DataSourceStrategy.normalizeExprs(Seq(IsNotNull(attrInt.withName("CiNt"))), Seq(attrInt))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index 4b086e830e456..553773e2555cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -352,6 +352,26 @@ class FileIndexSuite extends SharedSparkSession {
       "driver side must not be negative"))
   }
 
+  test ("SPARK-29537: throw exception when user defined a wrong base path") {
+    withTempDir { dir =>
+      val partitionDirectory = new File(dir, "a=foo")
+      partitionDirectory.mkdir()
+      val file = new File(partitionDirectory, "text.txt")
+      stringToFile(file, "text")
+      val path = new Path(dir.getCanonicalPath)
+      val wrongBasePath = new File(dir, "unknown")
+      // basePath must be a directory
+      wrongBasePath.mkdir()
+      val parameters = Map("basePath" -> wrongBasePath.getCanonicalPath)
+      val fileIndex = new InMemoryFileIndex(spark, Seq(path), parameters, None)
+      val msg = intercept[IllegalArgumentException] {
+        // trigger inferPartitioning()
+        fileIndex.partitionSpec()
+      }.getMessage
+      assert(msg === s"Wrong basePath ${wrongBasePath.getCanonicalPath} for the root path: $path")
+    }
+  }
+
   test("refresh for InMemoryFileIndex with FileStatusCache") {
     withTempDir { dir =>
       val fileStatusCache = FileStatusCache.getOrCreate(spark)
@@ -416,6 +436,35 @@ class FileIndexSuite extends SharedSparkSession {
     }
   }
 
+  test("Add an option to ignore block locations when listing file") {
+    withTempDir { dir =>
+      val partitionDirectory = new File(dir, "a=foo")
+      partitionDirectory.mkdir()
+      for (i <- 1 to 8) {
+        val file = new File(partitionDirectory, i + ".txt")
+        stringToFile(file, "text")
+      }
+      val path = new Path(dir.getCanonicalPath)
+      val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, None)
+      withSQLConf(SQLConf.IGNORE_DATA_LOCALITY.key -> "false",
+         "fs.file.impl" -> classOf[SpecialBlockLocationFileSystem].getName) {
+        val withBlockLocations = fileIndex.
+          listLeafFiles(Seq(new Path(partitionDirectory.getPath)))
+
+        withSQLConf(SQLConf.IGNORE_DATA_LOCALITY.key -> "true") {
+          val withoutBlockLocations = fileIndex.
+            listLeafFiles(Seq(new Path(partitionDirectory.getPath)))
+
+          assert(withBlockLocations.size == withoutBlockLocations.size)
+          assert(withBlockLocations.forall(b => b.isInstanceOf[LocatedFileStatus] &&
+            b.asInstanceOf[LocatedFileStatus].getBlockLocations.nonEmpty))
+          assert(withoutBlockLocations.forall(b => b.isInstanceOf[FileStatus] &&
+            !b.isInstanceOf[LocatedFileStatus]))
+          assert(withoutBlockLocations.forall(withBlockLocations.contains))
+        }
+      }
+    }
+  }
 }
 
 object DeletionRaceFileSystem {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index fa8111407665a..812305ba24403 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -31,12 +31,13 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionSet, PredicateHelper}
 import org.apache.spark.sql.catalyst.util
-import org.apache.spark.sql.execution.{DataSourceScanExec, SparkPlan}
+import org.apache.spark.sql.execution.{DataSourceScanExec, FileSourceScanExec, SparkPlan}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
 import org.apache.spark.util.Utils
 
 class FileSourceStrategySuite extends QueryTest with SharedSparkSession with PredicateHelper {
@@ -497,6 +498,36 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
     }
   }
 
+  test("SPARK-29768: Column pruning through non-deterministic expressions") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") {
+      withTempPath { path =>
+        spark.range(10)
+          .selectExpr("id as key", "id * 3 as s1", "id * 5 as s2")
+          .write.format("parquet").save(path.getAbsolutePath)
+        val df1 = spark.read.parquet(path.getAbsolutePath)
+        val df2 = df1.selectExpr("key", "rand()").where("key > 5")
+        val plan = df2.queryExecution.sparkPlan
+        val scan = plan.collect { case scan: FileSourceScanExec => scan }
+        assert(scan.size === 1)
+        assert(scan.head.requiredSchema == StructType(StructField("key", LongType) :: Nil))
+      }
+    }
+
+    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      withTempPath { path =>
+        spark.range(10)
+          .selectExpr("id as key", "id * 3 as s1", "id * 5 as s2")
+          .write.format("parquet").save(path.getAbsolutePath)
+        val df1 = spark.read.parquet(path.getAbsolutePath)
+        val df2 = df1.selectExpr("key", "rand()").where("key > 5")
+        val plan = df2.queryExecution.optimizedPlan
+        val scan = plan.collect { case r: DataSourceV2ScanRelation => r }
+        assert(scan.size === 1)
+        assert(scan.head.scan.readSchema() == StructType(StructField("key", LongType) :: Nil))
+      }
+    }
+  }
+
   // Helpers for checking the arguments passed to the FileFormat.
 
   protected val checkPartitionSchema =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
index d5502ba5737c0..5256043289d5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
@@ -99,13 +99,13 @@ class OrcReadSchemaSuite
 
   override val format: String = "orc"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.ORC_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, "false")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, originalConf)
     super.afterAll()
   }
@@ -124,13 +124,13 @@ class VectorizedOrcReadSchemaSuite
 
   override val format: String = "orc"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.ORC_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, "true")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, originalConf)
     super.afterAll()
   }
@@ -165,13 +165,13 @@ class ParquetReadSchemaSuite
 
   override val format: String = "parquet"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "false")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, originalConf)
     super.afterAll()
   }
@@ -187,13 +187,13 @@ class VectorizedParquetReadSchemaSuite
 
   override val format: String = "parquet"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, originalConf)
     super.afterAll()
   }
@@ -209,13 +209,13 @@ class MergedParquetReadSchemaSuite
 
   override val format: String = "parquet"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED)
     spark.conf.set(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key, "true")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key, originalConf)
     super.afterAll()
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
index bb3cec579016e..a3d4905e82cee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.{DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.SchemaPruningTest
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -34,7 +35,8 @@ abstract class SchemaPruningSuite
   extends QueryTest
   with FileBasedDataSourceTest
   with SchemaPruningTest
-  with SharedSparkSession {
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   case class FullName(first: String, middle: String, last: String)
   case class Company(name: String, address: String)
   case class Employer(id: Int, company: Company)
@@ -90,6 +92,36 @@ abstract class SchemaPruningSuite
     briefContacts.map { case BriefContact(id, name, address) =>
       BriefContactWithDataPartitionColumn(id, name, address, 2) }
 
+  testSchemaPruning("select only top-level fields") {
+    val query = sql("select address from contacts")
+    checkScan(query, "struct<address:string>")
+    checkAnswer(query.orderBy("id"),
+      Row("123 Main Street") ::
+      Row("321 Wall Street") ::
+      Row("567 Maple Drive") ::
+      Row("6242 Ash Street") ::
+      Nil)
+  }
+
+  testSchemaPruning("select a single complex field with disabled nested schema pruning") {
+    withSQLConf(SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.key -> "false") {
+      val query = sql("select name.middle from contacts")
+      checkScan(query, "struct<name:struct<first:string,middle:string,last:string>>")
+      checkAnswer(query.orderBy("id"), Row("X.") :: Row("Y.") :: Row(null) :: Row(null) :: Nil)
+    }
+  }
+
+  testSchemaPruning("select only input_file_name()") {
+    val query = sql("select input_file_name() from contacts")
+    checkScan(query, "struct<>")
+  }
+
+  testSchemaPruning("select only expressions without references") {
+    val query = sql("select count(*) from contacts")
+    checkScan(query, "struct<>")
+    checkAnswer(query, Row(4))
+  }
+
   testSchemaPruning("select a single complex field") {
     val query = sql("select name.middle from contacts")
     checkScan(query, "struct<name:struct<middle:string>>")
@@ -269,7 +301,7 @@ abstract class SchemaPruningSuite
     checkAnswer(query, Row("Y.", 1) :: Row("X.", 1) :: Row(null, 2) :: Row(null, 2) :: Nil)
   }
 
-  protected def testSchemaPruning(testName: String)(testThunk: => Unit) {
+  protected def testSchemaPruning(testName: String)(testThunk: => Unit): Unit = {
     test(s"Spark vectorized reader - without partition data column - $testName") {
       withSQLConf(vectorizedReaderEnabledKey -> "true") {
         withContacts(testThunk)
@@ -293,7 +325,7 @@ abstract class SchemaPruningSuite
     }
   }
 
-  private def withContacts(testThunk: => Unit) {
+  private def withContacts(testThunk: => Unit): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
@@ -315,7 +347,7 @@ abstract class SchemaPruningSuite
     }
   }
 
-  private def withContactsWithDataPartitionColumn(testThunk: => Unit) {
+  private def withContactsWithDataPartitionColumn(testThunk: => Unit): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
@@ -378,10 +410,24 @@ abstract class SchemaPruningSuite
     checkAnswer(query.orderBy("id"), Row(1) :: Nil)
   }
 
+  testMixedCaseQueryPruning("subquery filter with different-case column names") {
+    withTempView("temp") {
+      val spark = this.spark
+      import spark.implicits._
+
+      val df = Seq(2).toDF("col2")
+      df.createOrReplaceTempView("temp")
+
+      val query = sql("select id from mixedcase where Col2.b IN (select col2 from temp)")
+      checkScan(query, "struct<id:int,coL2:struct<B:int>>")
+      checkAnswer(query.orderBy("id"), Row(1) :: Nil)
+    }
+  }
+
   // Tests schema pruning for a query whose column and field names are exactly the same as the table
   // schema's column and field names. N.B. this implies that `testThunk` should pass using either a
   // case-sensitive or case-insensitive query parser
-  private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) {
+  private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit): Unit = {
     test(s"Case-sensitive parser - mixed-case schema - $testName") {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
         withMixedCaseData(testThunk)
@@ -392,7 +438,7 @@ abstract class SchemaPruningSuite
 
   // Tests schema pruning for a query whose column and field names may differ in case from the table
   // schema's column and field names
-  private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) {
+  private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit): Unit = {
     test(s"Case-insensitive parser - mixed-case schema - $testName") {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
         withMixedCaseData(testThunk)
@@ -401,7 +447,7 @@ abstract class SchemaPruningSuite
   }
 
   // Tests given test function with Spark vectorized reader and non-vectorized reader.
-  private def withMixedCaseData(testThunk: => Unit) {
+  private def withMixedCaseData(testThunk: => Unit): Unit = {
     withDataSourceTable(mixedCaseData, "mixedcase") {
       testThunk
     }
@@ -424,7 +470,7 @@ abstract class SchemaPruningSuite
 
   protected def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
     val fileSourceScanSchemata =
-      df.queryExecution.executedPlan.collect {
+      collect(df.queryExecution.executedPlan) {
         case scan: FileSourceScanExec => scan.requiredSchema
       }
     assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
index 70ec9bbf4819d..2cd142f913072 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -352,15 +352,15 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession {
           .select(CONTENT)
       }
       val expected = Seq(Row(content))
-      QueryTest.checkAnswer(readContent(), expected)
+      checkAnswer(readContent(), expected)
       withSQLConf(SOURCES_BINARY_FILE_MAX_LENGTH.key -> content.length.toString) {
-        QueryTest.checkAnswer(readContent(), expected)
+        checkAnswer(readContent(), expected)
       }
       // Disable read. If the implementation attempts to read, the exception would be different.
       file.setReadable(false)
       val caught = intercept[SparkException] {
         withSQLConf(SOURCES_BINARY_FILE_MAX_LENGTH.key -> (content.length - 1).toString) {
-          QueryTest.checkAnswer(readContent(), expected)
+          checkAnswer(readContent(), expected)
         }
       }
       assert(caught.getMessage.contains("exceeds the max length allowed"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
index e41e81af508f1..e2abb39c986a7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
@@ -23,6 +23,7 @@ import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{Column, Dataset, Row}
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
@@ -42,8 +43,6 @@ import org.apache.spark.sql.types._
 object CSVBenchmark extends SqlBasedBenchmark {
   import spark.implicits._
 
-  private def toNoop(ds: Dataset[_]): Unit = ds.write.format("noop").save()
-
   private def quotedValuesBenchmark(rowsNum: Int, numIters: Int): Unit = {
     val benchmark = new Benchmark(s"Parsing quoted values", rowsNum, output = output)
 
@@ -59,7 +58,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
       val ds = spark.read.option("header", true).schema(schema).csv(path.getAbsolutePath)
 
       benchmark.addCase(s"One quoted string", numIters) { _ =>
-        toNoop(ds)
+        ds.noop()
       }
 
       benchmark.run()
@@ -84,14 +83,14 @@ object CSVBenchmark extends SqlBasedBenchmark {
       val ds = spark.read.schema(schema).csv(path.getAbsolutePath)
 
       benchmark.addCase(s"Select $colsNum columns", numIters) { _ =>
-        toNoop(ds.select("*"))
+        ds.select("*").noop()
       }
       val cols100 = columnNames.take(100).map(Column(_))
       benchmark.addCase(s"Select 100 columns", numIters) { _ =>
-        toNoop(ds.select(cols100: _*))
+        ds.select(cols100: _*).noop()
       }
       benchmark.addCase(s"Select one column", numIters) { _ =>
-        toNoop(ds.select($"col1"))
+        ds.select($"col1").noop()
       }
       benchmark.addCase(s"count()", numIters) { _ =>
         ds.count()
@@ -101,7 +100,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
         (1 until colsNum).map(i => StructField(s"col$i", IntegerType)))
       val dsErr1 = spark.read.schema(schemaErr1).csv(path.getAbsolutePath)
       benchmark.addCase(s"Select 100 columns, one bad input field", numIters) { _ =>
-        toNoop(dsErr1.select(cols100: _*))
+        dsErr1.select(cols100: _*).noop()
       }
 
       val badRecColName = "badRecord"
@@ -110,7 +109,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
         .option("columnNameOfCorruptRecord", badRecColName)
         .csv(path.getAbsolutePath)
       benchmark.addCase(s"Select 100 columns, corrupt record field", numIters) { _ =>
-        toNoop(dsErr2.select((Column(badRecColName) +: cols100): _*))
+        dsErr2.select((Column(badRecColName) +: cols100): _*).noop()
       }
 
       benchmark.run()
@@ -167,11 +166,11 @@ object CSVBenchmark extends SqlBasedBenchmark {
 
       val writeBench = new Benchmark("Write dates and timestamps", rowsNum, output = output)
       writeBench.addCase(s"Create a dataset of timestamps", numIters) { _ =>
-        toNoop(timestamps)
+        timestamps.noop()
       }
 
       writeBench.addCase("to_csv(timestamp)", numIters) { _ =>
-        toNoop(timestamps.select(to_csv(struct($"timestamp"))))
+        timestamps.select(to_csv(struct($"timestamp"))).noop()
       }
 
       writeBench.addCase("write timestamps to files", numIters) { _ =>
@@ -179,11 +178,11 @@ object CSVBenchmark extends SqlBasedBenchmark {
       }
 
       writeBench.addCase("Create a dataset of dates", numIters) { _ =>
-        toNoop(dates)
+        dates.noop()
       }
 
       writeBench.addCase("to_csv(date)", numIters) { _ =>
-        toNoop(dates.select(to_csv(struct($"date"))))
+        dates.select(to_csv(struct($"date"))).noop()
       }
 
       writeBench.addCase("write dates to files", numIters) { _ =>
@@ -196,7 +195,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
       val tsSchema = new StructType().add("timestamp", TimestampType)
 
       readBench.addCase("read timestamp text from files", numIters) { _ =>
-        toNoop(spark.read.text(timestampDir))
+        spark.read.text(timestampDir).noop()
       }
 
       readBench.addCase("read timestamps from files", numIters) { _ =>
@@ -204,7 +203,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
           .option("header", true)
           .schema(tsSchema)
           .csv(timestampDir)
-        toNoop(ds)
+        ds.noop()
       }
 
       readBench.addCase("infer timestamps from files", numIters) { _ =>
@@ -212,13 +211,13 @@ object CSVBenchmark extends SqlBasedBenchmark {
           .option("header", true)
           .option("inferSchema", true)
           .csv(timestampDir)
-        toNoop(ds)
+        ds.noop()
       }
 
       val dateSchema = new StructType().add("date", DateType)
 
       readBench.addCase("read date text from files", numIters) { _ =>
-        toNoop(spark.read.text(dateDir))
+        spark.read.text(dateDir).noop()
       }
 
       readBench.addCase("read date from files", numIters) { _ =>
@@ -226,7 +225,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
           .option("header", true)
           .schema(dateSchema)
           .csv(dateDir)
-        toNoop(ds)
+        ds.noop()
       }
 
       readBench.addCase("infer date from files", numIters) { _ =>
@@ -234,7 +233,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
           .option("header", true)
           .option("inferSchema", true)
           .csv(dateDir)
-        toNoop(ds)
+        ds.noop()
       }
 
       def timestampStr: Dataset[String] = {
@@ -244,7 +243,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
       }
 
       readBench.addCase("timestamp strings", numIters) { _ =>
-        toNoop(timestampStr)
+        timestampStr.noop()
       }
 
       readBench.addCase("parse timestamps from Dataset[String]", numIters) { _ =>
@@ -252,7 +251,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
           .option("header", false)
           .schema(tsSchema)
           .csv(timestampStr)
-        toNoop(ds)
+        ds.noop()
       }
 
       readBench.addCase("infer timestamps from Dataset[String]", numIters) { _ =>
@@ -260,7 +259,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
           .option("header", false)
           .option("inferSchema", true)
           .csv(timestampStr)
-        toNoop(ds)
+        ds.noop()
       }
 
       def dateStr: Dataset[String] = {
@@ -270,7 +269,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
       }
 
       readBench.addCase("date strings", numIters) { _ =>
-        toNoop(dateStr)
+        dateStr.noop()
       }
 
       readBench.addCase("parse dates from Dataset[String]", numIters) { _ =>
@@ -278,23 +277,67 @@ object CSVBenchmark extends SqlBasedBenchmark {
           .option("header", false)
           .schema(dateSchema)
           .csv(dateStr)
-        toNoop(ds)
+        ds.noop()
       }
 
       readBench.addCase("from_csv(timestamp)", numIters) { _ =>
         val ds = timestampStr.select(from_csv($"timestamp", tsSchema, Map.empty[String, String]))
-        toNoop(ds)
+        ds.noop()
       }
 
       readBench.addCase("from_csv(date)", numIters) { _ =>
         val ds = dateStr.select(from_csv($"date", dateSchema, Map.empty[String, String]))
-        toNoop(ds)
+        ds.noop()
       }
 
       readBench.run()
     }
   }
 
+  private def filtersPushdownBenchmark(rowsNum: Int, numIters: Int): Unit = {
+    val benchmark = new Benchmark(s"Filters pushdown", rowsNum, output = output)
+    val colsNum = 100
+    val fields = Seq.tabulate(colsNum)(i => StructField(s"col$i", TimestampType))
+    val schema = StructType(StructField("key", IntegerType) +: fields)
+    def columns(): Seq[Column] = {
+      val ts = Seq.tabulate(colsNum) { i =>
+        lit(Instant.ofEpochSecond(i * 12345678)).as(s"col$i")
+      }
+      ($"id" % 1000).as("key") +: ts
+    }
+    withTempPath { path =>
+      spark.range(rowsNum).select(columns(): _*)
+        .write.option("header", true)
+        .csv(path.getAbsolutePath)
+      def readback = {
+        spark.read
+          .option("header", true)
+          .schema(schema)
+          .csv(path.getAbsolutePath)
+      }
+
+      benchmark.addCase(s"w/o filters", numIters) { _ =>
+        readback.noop()
+      }
+
+      def withFilter(configEnabled: Boolean): Unit = {
+        withSQLConf(SQLConf.CSV_FILTER_PUSHDOWN_ENABLED.key -> configEnabled.toString()) {
+          readback.filter($"key" === 0).noop()
+        }
+      }
+
+      benchmark.addCase(s"pushdown disabled", numIters) { _ =>
+        withFilter(configEnabled = false)
+      }
+
+      benchmark.addCase(s"w/ filters", numIters) { _ =>
+        withFilter(configEnabled = true)
+      }
+
+      benchmark.run()
+    }
+  }
+
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     runBenchmark("Benchmark to measure CSV read/write performance") {
       val numIters = 3
@@ -302,6 +345,7 @@ object CSVBenchmark extends SqlBasedBenchmark {
       multiColumnsBenchmark(rowsNum = 1000 * 1000, numIters)
       countBenchmark(rowsNum = 10 * 1000 * 1000, numIters)
       datetimeBenchmark(rowsNum = 10 * 1000 * 1000, numIters)
+      filtersPushdownBenchmark(rowsNum = 100 * 1000, numIters)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 5afd019c11a16..0be0e1e3da3dc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -32,17 +32,15 @@ import com.univocity.parsers.common.TextParsingException
 import org.apache.commons.lang3.time.FastDateFormat
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.GzipCodec
-import org.apache.log4j.{AppenderSkeleton, LogManager}
-import org.apache.log4j.spi.LoggingEvent
 
-import org.apache.spark.{SparkException, TestUtils}
-import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
+import org.apache.spark.{SparkConf, SparkException, TestUtils}
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
-class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
+abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
   import testImplicits._
 
   private val carsFile = "test-data/cars.csv"
@@ -50,6 +48,8 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
   private val carsFile8859 = "test-data/cars_iso-8859-1.csv"
   private val carsTsvFile = "test-data/cars.tsv"
   private val carsAltFile = "test-data/cars-alternative.csv"
+  private val carsMultiCharDelimitedFile = "test-data/cars-multichar-delim.csv"
+  private val carsMultiCharCrazyDelimitedFile = "test-data/cars-multichar-delim-crazy.csv"
   private val carsUnbalancedQuotesFile = "test-data/cars-unbalanced-quotes.csv"
   private val carsNullFile = "test-data/cars-null.csv"
   private val carsEmptyValueFile = "test-data/cars-empty-value.csv"
@@ -66,6 +66,7 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
   private val unescapedQuotesFile = "test-data/unescaped-quotes.csv"
   private val valueMalformedFile = "test-data/value-malformed.csv"
   private val badAfterGoodFile = "test-data/bad_after_good.csv"
+  private val malformedRowFile = "test-data/malformedRow.csv"
 
   /** Verifies data and schema. */
   private def verifyCars(
@@ -187,6 +188,49 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
     verifyCars(cars, withHeader = true)
   }
 
+  test("test with tab delimiter and double quote") {
+    val cars = spark.read
+        .options(Map("quote" -> "\"", "delimiter" -> """\t""", "header" -> "true"))
+        .csv(testFile(carsTsvFile))
+
+    verifyCars(cars, numFields = 6, withHeader = true, checkHeader = false)
+  }
+
+  test("SPARK-24540: test with multiple character delimiter (comma space)") {
+    val cars = spark.read
+        .options(Map("quote" -> "\'", "delimiter" -> ", ", "header" -> "true"))
+        .csv(testFile(carsMultiCharDelimitedFile))
+
+    verifyCars(cars, withHeader = true)
+  }
+
+  test("SPARK-24540: test with multiple (crazy) character delimiter") {
+    val cars = spark.read
+        .options(Map("quote" -> "\'", "delimiter" -> """_/-\\_""", "header" -> "true"))
+        .csv(testFile(carsMultiCharCrazyDelimitedFile))
+
+    verifyCars(cars, withHeader = true)
+
+    // check all the other columns, besides year (which is covered by verifyCars)
+    val otherCols = cars.select("make", "model", "comment", "blank").collect()
+    val expectedOtherColVals = Seq(
+      ("Tesla", "S", "No comment", null),
+      ("Ford", "E350", "Go get one now they are going fast", null),
+      ("Chevy", "Volt", null, null)
+    )
+
+    expectedOtherColVals.zipWithIndex.foreach { case (values, index) =>
+      val actualRow = otherCols(index)
+      values match {
+        case (make, model, comment, blank) =>
+          assert(make == actualRow.getString(0))
+          assert(model == actualRow.getString(1))
+          assert(comment == actualRow.getString(2))
+          assert(blank == actualRow.getString(3))
+      }
+    }
+  }
+
   test("parse unescaped quotes with maxCharsPerColumn") {
     val rows = spark.read
       .format("csv")
@@ -819,8 +863,8 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
       .load(testFile(simpleSparseFile))
 
     assert(
-      df.schema.fields.map(field => field.dataType).deep ==
-      Array(IntegerType, IntegerType, IntegerType, IntegerType).deep)
+      df.schema.fields.map(field => field.dataType).sameElements(
+        Array(IntegerType, IntegerType, IntegerType, IntegerType)))
   }
 
   test("old csv data source name works") {
@@ -1138,7 +1182,7 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
         .schema(schemaWithCorrField1)
         .csv(testFile(valueMalformedFile))
       checkAnswer(df2,
-        Row(0, null, "0,2013-111-11 12:13:14") ::
+        Row(0, null, "0,2013-111_11 12:13:14") ::
         Row(1, java.sql.Date.valueOf("1983-08-04"), null) ::
         Nil)
 
@@ -1155,7 +1199,7 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
         .schema(schemaWithCorrField2)
         .csv(testFile(valueMalformedFile))
       checkAnswer(df3,
-        Row(0, "0,2013-111-11 12:13:14", null) ::
+        Row(0, "0,2013-111_11 12:13:14", null) ::
         Row(1, null, java.sql.Date.valueOf("1983-08-04")) ::
         Nil)
 
@@ -1391,7 +1435,7 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
     assert(df.filter($"_corrupt_record".isNull).count() == 1)
     checkAnswer(
       df.select(columnNameOfCorruptRecord),
-      Row("0,2013-111-11 12:13:14") :: Row(null) :: Nil
+      Row("0,2013-111_11 12:13:14") :: Row(null) :: Nil
     )
   }
 
@@ -1717,24 +1761,17 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
   }
 
   test("SPARK-23786: warning should be printed if CSV header doesn't conform to schema") {
-    class TestAppender extends AppenderSkeleton {
-      var events = new java.util.ArrayList[LoggingEvent]
-      override def close(): Unit = {}
-      override def requiresLayout: Boolean = false
-      protected def append(event: LoggingEvent): Unit = events.add(event)
-    }
-
-    val testAppender1 = new TestAppender
+    val testAppender1 = new LogAppender("CSV header matches to schema")
     withLogAppender(testAppender1) {
       val ds = Seq("columnA,columnB", "1.0,1000.0").toDS()
       val ischema = new StructType().add("columnB", DoubleType).add("columnA", DoubleType)
 
       spark.read.schema(ischema).option("header", true).option("enforceSchema", true).csv(ds)
     }
-    assert(testAppender1.events.asScala
+    assert(testAppender1.loggingEvents
       .exists(msg => msg.getRenderedMessage.contains("CSV header does not conform to the schema")))
 
-    val testAppender2 = new TestAppender
+    val testAppender2 = new LogAppender("CSV header matches to schema w/ enforceSchema")
     withLogAppender(testAppender2) {
       withTempPath { path =>
         val oschema = new StructType().add("f1", DoubleType).add("f2", DoubleType)
@@ -1749,7 +1786,7 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
           .collect()
       }
     }
-    assert(testAppender2.events.asScala
+    assert(testAppender2.loggingEvents
       .exists(msg => msg.getRenderedMessage.contains("CSV header does not conform to the schema")))
   }
 
@@ -2027,15 +2064,6 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
     }
   }
 
-  test("do not produce empty files for empty partitions") {
-    withTempPath { dir =>
-      val path = dir.getCanonicalPath
-      spark.emptyDataset[String].write.csv(path)
-      val files = new File(path).listFiles()
-      assert(!files.exists(_.getName.endsWith("csv")))
-    }
-  }
-
   test("Do not reuse last good value for bad input field") {
     val schema = StructType(
       StructField("col1", StringType) ::
@@ -2065,7 +2093,7 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
     Seq("csv", "").foreach { reader =>
       withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> reader) {
         withTempPath { path =>
-          val df = Seq(("0", "2013-111-11")).toDF("a", "b")
+          val df = Seq(("0", "2013-111_11")).toDF("a", "b")
           df.write
             .option("header", "true")
             .csv(path.getAbsolutePath)
@@ -2081,7 +2109,7 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
             .option("columnNameOfCorruptRecord", columnNameOfCorruptRecord)
             .schema(schemaWithCorrField)
             .csv(path.getAbsoluteFile.toString)
-          checkAnswer(readDF, Row(0, null, "0,2013-111-11") :: Nil)
+          checkAnswer(readDF, Row(0, null, "0,2013-111_11") :: Nil)
         }
       }
     }
@@ -2109,4 +2137,189 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
         "expect the TextParsingException truncate the error content to be 1000 length.")
     }
   }
+
+  test("SPARK-29101 test count with DROPMALFORMED mode") {
+    Seq((true, 4), (false, 3)).foreach { case (csvColumnPruning, expectedCount) =>
+      withSQLConf(SQLConf.CSV_PARSER_COLUMN_PRUNING.key -> csvColumnPruning.toString) {
+        val count = spark.read
+          .option("header", "true")
+          .option("mode", "DROPMALFORMED")
+          .csv(testFile(malformedRowFile))
+          .count()
+        assert(expectedCount == count)
+      }
+    }
+  }
+
+  test("parse timestamp in microsecond precision") {
+    withTempPath { path =>
+      val t = "2019-11-14 20:35:30.123456"
+      Seq(t).toDF("t").write.text(path.getAbsolutePath)
+      val readback = spark.read
+        .schema("t timestamp")
+        .option("timestampFormat", "yyyy-MM-dd HH:mm:ss.SSSSSS")
+        .csv(path.getAbsolutePath)
+      checkAnswer(readback, Row(Timestamp.valueOf(t)))
+    }
+  }
+
+  test("Roundtrip in reading and writing timestamps in microsecond precision") {
+    withTempPath { path =>
+      val timestamp = Timestamp.valueOf("2019-11-18 11:56:00.123456")
+      Seq(timestamp).toDF("t")
+        .write
+        .option("timestampFormat", "yyyy-MM-dd HH:mm:ss.SSSSSS")
+        .csv(path.getAbsolutePath)
+      val readback = spark.read
+        .schema("t timestamp")
+        .option("timestampFormat", "yyyy-MM-dd HH:mm:ss.SSSSSS")
+        .csv(path.getAbsolutePath)
+      checkAnswer(readback, Row(timestamp))
+    }
+  }
+
+  test("return correct results when data columns overlap with partition columns") {
+    withTempPath { path =>
+      val tablePath = new File(s"${path.getCanonicalPath}/cOl3=c/cOl1=a/cOl5=e")
+
+      val inputDF = Seq((1, 2, 3, 4, 5)).toDF("cOl1", "cOl2", "cOl3", "cOl4", "cOl5")
+      inputDF.write
+        .option("header", "true")
+        .csv(tablePath.getCanonicalPath)
+
+      val resultDF = spark.read
+        .option("header", "true")
+        .option("inferSchema", "true")
+        .csv(path.getCanonicalPath)
+        .select("CoL1", "Col2", "CoL5", "CoL3")
+      checkAnswer(resultDF, Row("a", 2, "e", "c"))
+    }
+  }
+
+  test("filters push down") {
+    Seq(true, false).foreach { filterPushdown =>
+      Seq(true, false).foreach { columnPruning =>
+        withSQLConf(
+          SQLConf.CSV_FILTER_PUSHDOWN_ENABLED.key -> filterPushdown.toString,
+          SQLConf.CSV_PARSER_COLUMN_PRUNING.key -> columnPruning.toString) {
+
+          withTempPath { path =>
+            val t = "2019-12-17 00:01:02"
+            Seq(
+              "c0,c1,c2",
+              "abc,1,2019-11-14 20:35:30",
+              s"def,2,$t").toDF("data")
+              .repartition(1)
+              .write.text(path.getAbsolutePath)
+            Seq(true, false).foreach { multiLine =>
+              Seq("PERMISSIVE", "DROPMALFORMED", "FAILFAST").foreach { mode =>
+                val readback = spark.read
+                  .option("mode", mode)
+                  .option("header", true)
+                  .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
+                  .option("multiLine", multiLine)
+                  .schema("c0 string, c1 integer, c2 timestamp")
+                  .csv(path.getAbsolutePath)
+                  .where($"c1" === 2)
+                  .select($"c2")
+                // count() pushes empty schema. This checks handling of a filter
+                // which refers to not existed field.
+                assert(readback.count() === 1)
+                checkAnswer(readback, Row(Timestamp.valueOf(t)))
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("filters push down - malformed input in PERMISSIVE mode") {
+    val invalidTs = "2019-123_14 20:35:30"
+    val invalidRow = s"0,$invalidTs,999"
+    val validTs = "2019-12-14 20:35:30"
+    Seq(true, false).foreach { filterPushdown =>
+      withSQLConf(SQLConf.CSV_FILTER_PUSHDOWN_ENABLED.key -> filterPushdown.toString) {
+        withTempPath { path =>
+          Seq(
+            "c0,c1,c2",
+            invalidRow,
+            s"1,$validTs,999").toDF("data")
+            .repartition(1)
+            .write.text(path.getAbsolutePath)
+          def checkReadback(condition: Column, expected: Seq[Row]): Unit = {
+            val readback = spark.read
+              .option("mode", "PERMISSIVE")
+              .option("columnNameOfCorruptRecord", "c3")
+              .option("header", true)
+              .option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
+              .schema("c0 integer, c1 timestamp, c2 integer, c3 string")
+              .csv(path.getAbsolutePath)
+              .where(condition)
+              .select($"c0", $"c1", $"c3")
+            checkAnswer(readback, expected)
+          }
+
+          checkReadback(
+            condition = $"c2" === 999,
+            expected = Seq(Row(0, null, invalidRow), Row(1, Timestamp.valueOf(validTs), null)))
+          checkReadback(
+            condition = $"c2" === 999 && $"c1" > "1970-01-01 00:00:00",
+            expected = Seq(Row(1, Timestamp.valueOf(validTs), null)))
+        }
+      }
+    }
+  }
+
+  test("SPARK-30530: apply filters to malformed rows") {
+    withSQLConf(SQLConf.CSV_FILTER_PUSHDOWN_ENABLED.key -> "true") {
+      withTempPath { path =>
+        Seq(
+          "100.0,1.0,",
+          "200.0,,",
+          "300.0,3.0,",
+          "1.0,4.0,",
+          ",4.0,",
+          "500.0,,",
+          ",6.0,",
+          "-500.0,50.5").toDF("data")
+          .repartition(1)
+          .write.text(path.getAbsolutePath)
+        val schema = new StructType().add("floats", FloatType).add("more_floats", FloatType)
+        val readback = spark.read
+          .schema(schema)
+          .csv(path.getAbsolutePath)
+          .filter("floats is null")
+        checkAnswer(readback, Seq(Row(null, 4.0), Row(null, 6.0)))
+      }
+    }
+  }
+
+  test("SPARK-30810: parses and convert a CSV Dataset having different column from 'value'") {
+    val ds = spark.range(2).selectExpr("concat('a,b,', id) AS `a.text`").as[String]
+    val csv = spark.read.option("header", true).option("inferSchema", true).csv(ds)
+    assert(csv.schema.fieldNames === Seq("a", "b", "0"))
+    checkAnswer(csv, Row("a", "b", 1))
+  }
+}
+
+class CSVv1Suite extends CSVSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "csv")
+}
+
+class CSVv2Suite extends CSVSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "")
+}
+
+class CSVLegacyTimeParserSuite extends CSVSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.LEGACY_TIME_PARSER_ENABLED, true)
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index f486e603e2552..bcecaccc8cc89 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.types._
  * }}}
  */
 
-object JSONBenchmark extends SqlBasedBenchmark {
+object JsonBenchmark extends SqlBasedBenchmark {
   import spark.implicits._
 
   private def prepareDataInfo(benchmark: Benchmark): Unit = {
@@ -48,10 +48,6 @@ object JSONBenchmark extends SqlBasedBenchmark {
     // scalastyle:on println
   }
 
-  private def run(ds: Dataset[_]): Unit = {
-    ds.write.format("noop").save()
-  }
-
   def schemaInferring(rowsNum: Int, numIters: Int): Unit = {
     val benchmark = new Benchmark("JSON schema inferring", rowsNum, output = output)
 
@@ -219,11 +215,11 @@ object JSONBenchmark extends SqlBasedBenchmark {
 
       benchmark.addCase(s"Select $colsNum columns", numIters) { _ =>
         val ds = in.select("*")
-        run(ds)
+        ds.noop()
       }
       benchmark.addCase(s"Select 1 column", numIters) { _ =>
         val ds = in.select($"col1")
-        run(ds)
+        ds.noop()
       }
 
       benchmark.run()
@@ -244,7 +240,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
 
       benchmark.addCase("Short column without encoding", numIters) { _ =>
         val ds = spark.read.schema(shortSchema).json(shortColumnPath)
-        run(ds)
+        ds.noop()
       }
 
       benchmark.addCase("Short column with UTF-8", numIters) { _ =>
@@ -252,12 +248,12 @@ object JSONBenchmark extends SqlBasedBenchmark {
           .option("encoding", "UTF-8")
           .schema(shortSchema)
           .json(shortColumnPath)
-        run(ds)
+        ds.noop()
       }
 
       benchmark.addCase("Wide column without encoding", numIters) { _ =>
         val ds = spark.read.schema(wideSchema).json(wideColumnPath)
-        run(ds)
+        ds.noop()
       }
 
       benchmark.addCase("Wide column with UTF-8", numIters) { _ =>
@@ -265,7 +261,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
           .option("encoding", "UTF-8")
           .schema(wideSchema)
           .json(wideColumnPath)
-        run(ds)
+        ds.noop()
       }
 
       benchmark.run()
@@ -280,23 +276,23 @@ object JSONBenchmark extends SqlBasedBenchmark {
     val in = spark.range(0, rows, 1, 1).map(_ => """{"a":1}""")
 
     benchmark.addCase("Text read", iters) { _ =>
-      run(in)
+      in.noop()
     }
 
     benchmark.addCase("from_json", iters) { _ =>
       val schema = new StructType().add("a", IntegerType)
       val from_json_ds = in.select(from_json('value, schema))
-      run(from_json_ds)
+      from_json_ds.noop()
     }
 
     benchmark.addCase("json_tuple", iters) { _ =>
       val json_tuple_ds = in.select(json_tuple($"value", "a"))
-      run(json_tuple_ds)
+      json_tuple_ds.noop()
     }
 
     benchmark.addCase("get_json_object", iters) { _ =>
       val get_json_object_ds = in.select(get_json_object($"value", "$.a"))
-      run(get_json_object_ds)
+      get_json_object_ds.noop()
     }
 
     benchmark.run()
@@ -310,7 +306,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
     val in = spark.range(0, rows, 1, 1).map(_ => """{"a":1}""")
 
     benchmark.addCase("Text read", iters) { _ =>
-      run(in)
+      in.noop()
     }
 
     benchmark.addCase("schema inferring", iters) { _ =>
@@ -322,7 +318,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
       val ds = spark.read
         .schema(schema)
         .json(in)
-      run(ds)
+      ds.noop()
     }
 
     benchmark.run()
@@ -343,7 +339,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
         val ds = spark.read
           .format("text")
           .load(path.getAbsolutePath)
-        run(ds)
+        ds.noop()
       }
 
       benchmark.addCase("Schema inferring", iters) { _ =>
@@ -360,7 +356,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
           .schema(schema)
           .option("multiLine", false)
           .json(path.getAbsolutePath)
-        run(ds)
+        ds.noop()
       }
 
       benchmark.addCase("Parsing with UTF-8", iters) { _ =>
@@ -370,7 +366,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
           .option("charset", "UTF-8")
           .json(path.getAbsolutePath)
 
-        run(ds)
+        ds.noop()
       }
 
       benchmark.run()
@@ -397,11 +393,11 @@ object JSONBenchmark extends SqlBasedBenchmark {
 
       val writeBench = new Benchmark("Write dates and timestamps", rowsNum, output = output)
       writeBench.addCase(s"Create a dataset of timestamps", numIters) { _ =>
-        run(timestamps)
+        timestamps.noop()
       }
 
       writeBench.addCase("to_json(timestamp)", numIters) { _ =>
-        run(timestamps.select(to_json(struct($"timestamp"))))
+        timestamps.select(to_json(struct($"timestamp"))).noop()
       }
 
       writeBench.addCase("write timestamps to files", numIters) { _ =>
@@ -409,11 +405,11 @@ object JSONBenchmark extends SqlBasedBenchmark {
       }
 
       writeBench.addCase("Create a dataset of dates", numIters) { _ =>
-        run(dates)
+        dates.noop()
       }
 
       writeBench.addCase("to_json(date)", numIters) { _ =>
-        run(dates.select(to_json(struct($"date"))))
+        dates.select(to_json(struct($"date"))).noop()
       }
 
       writeBench.addCase("write dates to files", numIters) { _ =>
@@ -426,25 +422,25 @@ object JSONBenchmark extends SqlBasedBenchmark {
       val tsSchema = new StructType().add("timestamp", TimestampType)
 
       readBench.addCase("read timestamp text from files", numIters) { _ =>
-        run(spark.read.text(timestampDir))
+        spark.read.text(timestampDir).noop()
       }
 
       readBench.addCase("read timestamps from files", numIters) { _ =>
-        run(spark.read.schema(tsSchema).json(timestampDir))
+        spark.read.schema(tsSchema).json(timestampDir).noop()
       }
 
       readBench.addCase("infer timestamps from files", numIters) { _ =>
-        run(spark.read.json(timestampDir))
+        spark.read.json(timestampDir).noop()
       }
 
       val dateSchema = new StructType().add("date", DateType)
 
       readBench.addCase("read date text from files", numIters) { _ =>
-        run(spark.read.text(dateDir))
+        spark.read.text(dateDir).noop()
       }
 
       readBench.addCase("read date from files", numIters) { _ =>
-        run(spark.read.schema(dateSchema).json(dateDir))
+        spark.read.schema(dateSchema).json(dateDir).noop()
       }
 
       def timestampStr: Dataset[String] = {
@@ -454,15 +450,15 @@ object JSONBenchmark extends SqlBasedBenchmark {
       }
 
       readBench.addCase("timestamp strings", numIters) { _ =>
-        run(timestampStr)
+        timestampStr.noop()
       }
 
       readBench.addCase("parse timestamps from Dataset[String]", numIters) { _ =>
-        run(spark.read.schema(tsSchema).json(timestampStr))
+        spark.read.schema(tsSchema).json(timestampStr).noop()
       }
 
       readBench.addCase("infer timestamps from Dataset[String]", numIters) { _ =>
-        run(spark.read.json(timestampStr))
+        spark.read.json(timestampStr).noop()
       }
 
       def dateStr: Dataset[String] = {
@@ -472,7 +468,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
       }
 
       readBench.addCase("date strings", numIters) { _ =>
-        run(dateStr)
+        dateStr.noop()
       }
 
       readBench.addCase("parse dates from Dataset[String]", numIters) { _ =>
@@ -480,17 +476,17 @@ object JSONBenchmark extends SqlBasedBenchmark {
           .option("header", false)
           .schema(dateSchema)
           .json(dateStr)
-        run(ds)
+        ds.noop()
       }
 
       readBench.addCase("from_json(timestamp)", numIters) { _ =>
         val ds = timestampStr.select(from_json($"timestamp", tsSchema, Map.empty[String, String]))
-        run(ds)
+        ds.noop()
       }
 
       readBench.addCase("from_json(date)", numIters) { _ =>
         val ds = dateStr.select(from_json($"date", dateSchema, Map.empty[String, String]))
-        run(ds)
+        ds.noop()
       }
 
       readBench.run()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index bafb6769af69c..7592809d7c85b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -103,7 +103,7 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSparkSession {
   }
 
   // The following two tests are not really working - need to look into Jackson's
-  // JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS.
+  // JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS.
   ignore("allowNonNumericNumbers off") {
     val str = """{"age": NaN}"""
     val df = spark.read.json(Seq(str).toDS())
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 2998e673bd45c..7abe818a29d9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.fs.{Path, PathFilter}
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.GzipCodec
 
-import org.apache.spark.{SparkException, TestUtils}
+import org.apache.spark.{SparkConf, SparkException, TestUtils}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{functions => F, _}
 import org.apache.spark.sql.catalyst.json._
@@ -45,11 +45,11 @@ class TestFileFilter extends PathFilter {
   override def accept(path: Path): Boolean = path.getParent.getName != "p=2"
 }
 
-class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
+abstract class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
   import testImplicits._
 
   test("Type promotion") {
-    def checkTypePromotion(expected: Any, actual: Any) {
+    def checkTypePromotion(expected: Any, actual: Any): Unit = {
       assert(expected.getClass == actual.getClass,
         s"Failed to promote ${actual.getClass} to ${expected.getClass}.")
       assert(expected == actual,
@@ -92,7 +92,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
     checkTypePromotion(
       Decimal(longNumber), enforceCorrectType(longNumber, DecimalType.SYSTEM_DEFAULT))
 
-    val doubleNumber: Double = 1.7976931348623157E308d
+    val doubleNumber: Double = 1.7976931348623157d
     checkTypePromotion(doubleNumber.toDouble, enforceCorrectType(doubleNumber, DoubleType))
 
     checkTypePromotion(DateTimeUtils.fromJavaTimestamp(new Timestamp(intNumber * 1000L)),
@@ -128,7 +128,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
   }
 
   test("Get compatible type") {
-    def checkDataType(t1: DataType, t2: DataType, expected: DataType) {
+    def checkDataType(t1: DataType, t2: DataType, expected: DataType): Unit = {
       var actual = JsonInferSchema.compatibleType(t1, t2)
       assert(actual == expected,
         s"Expected $expected as the most general data type for $t1 and $t2, found $actual")
@@ -284,7 +284,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
       sql("select * from jsonTable"),
       Row(new java.math.BigDecimal("92233720368547758070"),
         true,
-        1.7976931348623157E308,
+        1.7976931348623157,
         10,
         21474836470L,
         null,
@@ -624,7 +624,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
       sql("select * from jsonTable"),
       Row(new java.math.BigDecimal("92233720368547758070"),
       true,
-      1.7976931348623157E308,
+      1.7976931348623157,
       10,
       21474836470L,
       null,
@@ -656,7 +656,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
       sql("select * from jsonTable"),
       Row("92233720368547758070",
       "true",
-      "1.7976931348623157E308",
+      "1.7976931348623157",
       "10",
       "21474836470",
       null,
@@ -768,7 +768,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
     val expectedSchema = StructType(
       StructField("bigInteger", DecimalType(20, 0), true) ::
         StructField("boolean", BooleanType, true) ::
-        StructField("double", DecimalType(17, -292), true) ::
+        StructField("double", DecimalType(17, 16), true) ::
         StructField("integer", LongType, true) ::
         StructField("long", LongType, true) ::
         StructField("null", StringType, true) ::
@@ -782,7 +782,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
       sql("select * from jsonTable"),
       Row(BigDecimal("92233720368547758070"),
         true,
-        BigDecimal("1.7976931348623157E308"),
+        BigDecimal("1.7976931348623157"),
         10,
         21474836470L,
         null,
@@ -875,7 +875,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
       sql("select * from jsonTableSQL"),
       Row(new java.math.BigDecimal("92233720368547758070"),
         true,
-        1.7976931348623157E308,
+        1.7976931348623157,
         10,
         21474836470L,
         null,
@@ -908,7 +908,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
       sql("select * from jsonTable1"),
       Row(new java.math.BigDecimal("92233720368547758070"),
       true,
-      1.7976931348623157E308,
+      1.7976931348623157,
       10,
       21474836470L,
       null,
@@ -925,7 +925,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
       sql("select * from jsonTable2"),
       Row(new java.math.BigDecimal("92233720368547758070"),
       true,
-      1.7976931348623157E308,
+      1.7976931348623157,
       10,
       21474836470L,
       null,
@@ -1274,7 +1274,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
         sql("select * from primitiveTable"),
       Row(new java.math.BigDecimal("92233720368547758070"),
         true,
-        1.7976931348623157E308,
+        1.7976931348623157,
         10,
         21474836470L,
         "this is a simple string.")
@@ -2436,23 +2436,24 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
     }
   }
 
-  test("SPARK-25040: empty strings should be disallowed") {
-    def failedOnEmptyString(dataType: DataType): Unit = {
-       val df = spark.read.schema(s"a ${dataType.catalogString}")
-        .option("mode", "FAILFAST").json(Seq("""{"a":""}""").toDS)
-      val errMessage = intercept[SparkException] {
-        df.collect()
-      }.getMessage
-      assert(errMessage.contains(
-        s"Failed to parse an empty string for data type ${dataType.catalogString}"))
-    }
 
-    def emptyString(dataType: DataType, expected: Any): Unit = {
-      val df = spark.read.schema(s"a ${dataType.catalogString}")
-        .option("mode", "FAILFAST").json(Seq("""{"a":""}""").toDS)
-      checkAnswer(df, Row(expected) :: Nil)
-    }
+  private def failedOnEmptyString(dataType: DataType): Unit = {
+    val df = spark.read.schema(s"a ${dataType.catalogString}")
+      .option("mode", "FAILFAST").json(Seq("""{"a":""}""").toDS)
+    val errMessage = intercept[SparkException] {
+      df.collect()
+    }.getMessage
+    assert(errMessage.contains(
+      s"Failed to parse an empty string for data type ${dataType.catalogString}"))
+  }
 
+  private def emptyString(dataType: DataType, expected: Any): Unit = {
+    val df = spark.read.schema(s"a ${dataType.catalogString}")
+      .option("mode", "FAILFAST").json(Seq("""{"a":""}""").toDS)
+    checkAnswer(df, Row(expected) :: Nil)
+  }
+
+  test("SPARK-25040: empty strings should be disallowed") {
     failedOnEmptyString(BooleanType)
     failedOnEmptyString(ByteType)
     failedOnEmptyString(ShortType)
@@ -2471,12 +2472,33 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
     emptyString(BinaryType, "".getBytes(StandardCharsets.UTF_8))
   }
 
-  test("do not produce empty files for empty partitions") {
-    withTempPath { dir =>
-      val path = dir.getCanonicalPath
-      spark.emptyDataset[String].write.json(path)
-      val files = new File(path).listFiles()
-      assert(!files.exists(_.getName.endsWith("json")))
+  test("SPARK-25040: allowing empty strings when legacy config is enabled") {
+    def emptyStringAsNull(dataType: DataType): Unit = {
+      val df = spark.read.schema(s"a ${dataType.catalogString}")
+        .option("mode", "FAILFAST").json(Seq("""{"a":""}""").toDS)
+      checkAnswer(df, Row(null) :: Nil)
+    }
+
+    // Legacy mode prior to Spark 3.0.0
+    withSQLConf(SQLConf.LEGACY_ALLOW_EMPTY_STRING_IN_JSON.key -> "true") {
+      emptyStringAsNull(BooleanType)
+      emptyStringAsNull(ByteType)
+      emptyStringAsNull(ShortType)
+      emptyStringAsNull(IntegerType)
+      emptyStringAsNull(LongType)
+
+      failedOnEmptyString(FloatType)
+      failedOnEmptyString(DoubleType)
+      failedOnEmptyString(TimestampType)
+      failedOnEmptyString(DateType)
+
+      emptyStringAsNull(DecimalType.SYSTEM_DEFAULT)
+      emptyStringAsNull(ArrayType(IntegerType))
+      emptyStringAsNull(MapType(StringType, IntegerType, true))
+      emptyStringAsNull(StructType(StructField("f1", IntegerType, true) :: Nil))
+
+      emptyString(StringType, "")
+      emptyString(BinaryType, "".getBytes(StandardCharsets.UTF_8))
     }
   }
 
@@ -2536,3 +2558,24 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
     }
   }
 }
+
+class JsonV1Suite extends JsonSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "json")
+}
+
+class JsonV2Suite extends JsonSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "")
+}
+
+class JsonLegacyTimeParserSuite extends JsonSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.LEGACY_TIME_PARSER_ENABLED, true)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
index 17503330bfd5c..5c35ee03fb271 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
@@ -28,7 +28,7 @@ private[json] trait TestJsonData {
           "integer":10,
           "long":21474836470,
           "bigInteger":92233720368547758070,
-          "double":1.7976931348623157E308,
+          "double":1.7976931348623157,
           "boolean":true,
           "null":null
       }"""  :: Nil))(Encoders.STRING)
@@ -87,7 +87,7 @@ private[json] trait TestJsonData {
           "arrayOfInteger":[1, 2147483647, -2147483648],
           "arrayOfLong":[21474836470, 9223372036854775807, -9223372036854775808],
           "arrayOfBigInteger":[922337203685477580700, -922337203685477580800],
-          "arrayOfDouble":[1.2, 1.7976931348623157E308, 4.9E-324, 2.2250738585072014E-308],
+          "arrayOfDouble":[1.2, 1.7976931348623157, 4.9E-324, 2.2250738585072014E-308],
           "arrayOfBoolean":[true, false, true],
           "arrayOfNull":[null, null, null, null],
           "arrayOfStruct":[{"field1": true, "field2": "str1"}, {"field1": false}, {"field3": null}],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
index c5a03cb8ef6d3..b4073bedf5597 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
@@ -32,6 +32,7 @@ class NoopSuite extends SharedSparkSession {
       }
       .write
       .format("noop")
+      .mode("append")
       .save()
     assert(accum.value == numElems)
   }
@@ -54,7 +55,7 @@ class NoopSuite extends SharedSparkSession {
           accum.add(1)
           x
         }
-        .write.format("noop").save()
+        .write.mode("append").format("noop").save()
       assert(accum.value == numElems)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
index 5d21ee698f4e6..ea839b8e1ef10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcPartitionDiscoverySuite.scala
@@ -169,6 +169,8 @@ abstract class OrcPartitionDiscoveryTest extends OrcTest {
 }
 
 class OrcPartitionDiscoverySuite extends OrcPartitionDiscoveryTest with SharedSparkSession {
+  override protected def sparkConf: SparkConf = super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "")
+
   test("read partitioned table - partition key included in orc file") {
     withTempDir { base =>
       for {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index edc1822887f9f..b8bf4b16fe53c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -194,7 +194,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("ZLIB" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("ZLIB" === reader.getCompressionKind.name)
+      }
     }
 
     // `compression` overrides `orc.compress`.
@@ -209,7 +211,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("ZLIB" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("ZLIB" === reader.getCompressionKind.name)
+      }
     }
   }
 
@@ -225,7 +229,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("ZLIB" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("ZLIB" === reader.getCompressionKind.name)
+      }
     }
 
     withTempPath { file =>
@@ -238,7 +244,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("SNAPPY" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("SNAPPY" === reader.getCompressionKind.name)
+      }
     }
 
     withTempPath { file =>
@@ -251,7 +259,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("NONE" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("NONE" === reader.getCompressionKind.name)
+      }
     }
   }
 
@@ -635,7 +645,9 @@ class OrcQuerySuite extends OrcQueryTest with SharedSparkSession {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("LZO" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("LZO" === reader.getCompressionKind.name)
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 0d904a09c07e8..1e27593584786 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -60,7 +60,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
       .createOrReplaceTempView("orc_temp_table")
   }
 
-  protected def testBloomFilterCreation(bloomFilterKind: Kind) {
+  protected def testBloomFilterCreation(bloomFilterKind: Kind): Unit = {
     val tableName = "bloomFilter"
 
     withTempDir { dir =>
@@ -120,7 +120,8 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
     }
   }
 
-  protected def testSelectiveDictionaryEncoding(isSelective: Boolean, isHive23: Boolean = false) {
+  protected def testSelectiveDictionaryEncoding(isSelective: Boolean,
+      isHive23: Boolean = false): Unit = {
     val tableName = "orcTable"
 
     withTempDir { dir =>
@@ -345,7 +346,9 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
     }
   }
 
-  test("SPARK-23340 Empty float/double array columns raise EOFException") {
+  // SPARK-28885 String value is not allowed to be stored as numeric type with
+  // ANSI store assignment policy.
+  ignore("SPARK-23340 Empty float/double array columns raise EOFException") {
     Seq(Seq(Array.empty[Float]).toDF(), Seq(Array.empty[Double]).toDF()).foreach { df =>
       withTempPath { path =>
         df.write.format("orc").save(path.getCanonicalPath)
@@ -372,9 +375,10 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
 
       val orcFilePath = new Path(partFiles.head.getAbsolutePath)
       val readerOptions = OrcFile.readerOptions(new Configuration())
-      val reader = OrcFile.createReader(orcFilePath, readerOptions)
-      val version = UTF_8.decode(reader.getMetadataValue(SPARK_VERSION_METADATA_KEY)).toString
-      assert(version === SPARK_VERSION_SHORT)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, readerOptions)) { reader =>
+        val version = UTF_8.decode(reader.getMetadataValue(SPARK_VERSION_METADATA_KEY)).toString
+        assert(version === SPARK_VERSION_SHORT)
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index adbd93dcb4fe8..388744bd0fd6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -27,9 +27,9 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Predicate}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, FileBasedDataSourceTest}
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
+import org.apache.spark.sql.execution.datasources.FileBasedDataSourceTest
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
 
@@ -119,17 +119,14 @@ abstract class OrcTest extends QueryTest with FileBasedDataSourceTest with Befor
 
     query.queryExecution.optimizedPlan match {
       case PhysicalOperation(_, filters,
-      DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
+          DataSourceV2ScanRelation(_, o: OrcScan, _)) =>
         assert(filters.nonEmpty, "No filter is analyzed from the given query")
-        val scanBuilder = orcTable.newScanBuilder(options)
-        scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
-        val pushedFilters = scanBuilder.pushedFilters()
         if (noneSupported) {
-          assert(pushedFilters.isEmpty, "Unsupported filters should not show in pushed filters")
+          assert(o.pushedFilters.isEmpty, "Unsupported filters should not show in pushed filters")
         } else {
-          assert(pushedFilters.nonEmpty, "No filter is pushed down")
-          val maybeFilter = OrcFilters.createFilter(query.schema, pushedFilters)
-          assert(maybeFilter.isEmpty, s"Couldn't generate filter predicate for $pushedFilters")
+          assert(o.pushedFilters.nonEmpty, "No filter is pushed down")
+          val maybeFilter = OrcFilters.createFilter(query.schema, o.pushedFilters)
+          assert(maybeFilter.isEmpty, s"Couldn't generate filter predicate for ${o.pushedFilters}")
         }
 
       case _ =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
index b626edf5dc28e..6c9bd32913178 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
@@ -17,14 +17,15 @@
 package org.apache.spark.sql.execution.datasources.orc
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.internal.SQLConf
 
-class OrcV2SchemaPruningSuite extends SchemaPruningSuite {
+class OrcV2SchemaPruningSuite extends SchemaPruningSuite with AdaptiveSparkPlanHelper {
   override protected val dataSourceName: String = "orc"
   override protected val vectorizedReaderEnabledKey: String =
     SQLConf.ORC_VECTORIZED_READER_ENABLED.key
@@ -36,7 +37,7 @@ class OrcV2SchemaPruningSuite extends SchemaPruningSuite {
 
   override def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
     val fileSourceScanSchemata =
-      df.queryExecution.executedPlan.collect {
+      collect(df.queryExecution.executedPlan) {
         case BatchScanExec(_, scan: OrcScan) => scan.readDataSchema
       }
     assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 9671866fe1535..4e0c1c2dbe601 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -33,9 +33,8 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
-import org.apache.spark.sql.execution.datasources.orc.OrcFilters
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetTable
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
+import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
@@ -1391,6 +1390,27 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
       }
     }
   }
+
+  test("SPARK-30826: case insensitivity of StringStartsWith attribute") {
+    import testImplicits._
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      withTable("t1") {
+        withTempPath { dir =>
+          val path = dir.toURI.toString
+          Seq("42").toDF("COL").write.parquet(path)
+          spark.sql(
+            s"""
+               |CREATE TABLE t1 (col STRING)
+               |USING parquet
+               |OPTIONS (path '$path')
+           """.stripMargin)
+          checkAnswer(
+            spark.sql("SELECT * FROM t1 WHERE col LIKE '4%'"),
+            Row("42"))
+        }
+      }
+    }
+  }
 }
 
 class ParquetV1FilterSuite extends ParquetFilterSuite {
@@ -1484,12 +1504,10 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
 
       query.queryExecution.optimizedPlan.collectFirst {
         case PhysicalOperation(_, filters,
-        DataSourceV2Relation(parquetTable: ParquetTable, _, options)) =>
+            DataSourceV2ScanRelation(_, scan: ParquetScan, _)) =>
           assert(filters.nonEmpty, "No filter is analyzed from the given query")
-          val scanBuilder = parquetTable.newScanBuilder(options)
           val sourceFilters = filters.flatMap(DataSourceStrategy.translateFilter).toArray
-          scanBuilder.pushFilters(sourceFilters)
-          val pushedFilters = scanBuilder.pushedFilters()
+          val pushedFilters = scan.pushedFilters
           assert(pushedFilters.nonEmpty, "No filter is pushed down")
           val schema = new SparkToParquetSchemaConverter(conf).convert(df.schema)
           val parquetFilters = createParquetFilters(schema)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 026ba5deffdfd..1550b3bbb6242 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -66,7 +66,7 @@ private[parquet] class TestGroupWriteSupport(schema: MessageType) extends WriteS
     new WriteContext(schema, new java.util.HashMap[String, String]())
   }
 
-  override def write(record: Group) {
+  override def write(record: Group): Unit = {
     groupWriter.write(record)
   }
 }
@@ -204,6 +204,42 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     }
   }
 
+  testStandardAndLegacyModes("array of struct") {
+    val data = (1 to 4).map { i =>
+      Tuple1(
+        Seq(
+          Tuple1(s"1st_val_$i"),
+          Tuple1(s"2nd_val_$i")
+        )
+      )
+    }
+    withParquetDataFrame(data) { df =>
+      // Structs are converted to `Row`s
+      checkAnswer(df, data.map { case Tuple1(array) =>
+        Row(array.map(struct => Row(struct.productIterator.toSeq: _*)))
+      })
+    }
+  }
+
+  testStandardAndLegacyModes("array of nested struct") {
+    val data = (1 to 4).map { i =>
+      Tuple1(
+        Seq(
+          Tuple1(
+            Tuple1(s"1st_val_$i")),
+          Tuple1(
+            Tuple1(s"2nd_val_$i"))
+        )
+      )
+    }
+    withParquetDataFrame(data) { df =>
+      // Structs are converted to `Row`s
+      checkAnswer(df, data.map { case Tuple1(array) =>
+        Row(array.map { case Tuple1(Tuple1(str)) => Row(Row(str))})
+      })
+    }
+  }
+
   testStandardAndLegacyModes("nested struct with array of array as field") {
     val data = (1 to 4).map(i => Tuple1((i, Seq(Seq(s"val_$i")))))
     withParquetDataFrame(data) { df =>
@@ -214,9 +250,34 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     }
   }
 
+  testStandardAndLegacyModes("nested map with struct as key type") {
+    val data = (1 to 4).map { i =>
+      Tuple1(
+        Map(
+          (i, s"kA_$i") -> s"vA_$i",
+          (i, s"kB_$i") -> s"vB_$i"
+        )
+      )
+    }
+    withParquetDataFrame(data) { df =>
+      // Structs are converted to `Row`s
+      checkAnswer(df, data.map { case Tuple1(m) =>
+        Row(m.map { case (k, v) => Row(k.productIterator.toSeq: _*) -> v })
+      })
+    }
+  }
+
   testStandardAndLegacyModes("nested map with struct as value type") {
-    val data = (1 to 4).map(i => Tuple1(Map(i -> ((i, s"val_$i")))))
+    val data = (1 to 4).map { i =>
+      Tuple1(
+        Map(
+          s"kA_$i" -> ((i, s"vA_$i")),
+          s"kB_$i" -> ((i, s"vB_$i"))
+        )
+      )
+    }
     withParquetDataFrame(data) { df =>
+      // Structs are converted to `Row`s
       checkAnswer(df, data.map { case Tuple1(m) =>
         Row(m.mapValues(struct => Row(struct.productIterator.toSeq: _*)))
       })
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
index 1ded34f24e436..649a46f190580 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.datasources.parquet
 
 import java.io.File
+import java.time.ZoneOffset
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
@@ -145,8 +146,8 @@ class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedS
               impalaFileData.map { ts =>
                 DateTimeUtils.toJavaTimestamp(DateTimeUtils.convertTz(
                   DateTimeUtils.fromJavaTimestamp(ts),
-                  DateTimeUtils.TimeZoneUTC,
-                  DateTimeUtils.getTimeZone(conf.sessionLocalTimeZone)))
+                  ZoneOffset.UTC,
+                  DateTimeUtils.getZoneId(conf.sessionLocalTimeZone)))
               }
             }
             val fullExpectations = (ts ++ impalaExpectations).map(_.toString).sorted.toArray
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 0a85e3cdeaf1d..e63929470ce5f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -58,7 +58,7 @@ abstract class ParquetPartitionDiscoverySuite
   val defaultPartitionName = ExternalCatalogUtils.DEFAULT_PARTITION_NAME
 
   val timeZoneId = ZoneId.systemDefault()
-  val df = DateFormatter()
+  val df = DateFormatter(timeZoneId)
   val tf = TimestampFormatter(timestampPartitionPattern, timeZoneId)
 
   protected override def beforeAll(): Unit = {
@@ -215,14 +215,14 @@ abstract class ParquetPartitionDiscoverySuite
 
     check("file://path/a=10", Some {
       PartitionValues(
-        ArrayBuffer("a"),
-        ArrayBuffer(Literal.create(10, IntegerType)))
+        Seq("a"),
+        Seq(Literal.create(10, IntegerType)))
     })
 
     check("file://path/a=10/b=hello/c=1.5", Some {
       PartitionValues(
-        ArrayBuffer("a", "b", "c"),
-        ArrayBuffer(
+        Seq("a", "b", "c"),
+        Seq(
           Literal.create(10, IntegerType),
           Literal.create("hello", StringType),
           Literal.create(1.5, DoubleType)))
@@ -230,8 +230,8 @@ abstract class ParquetPartitionDiscoverySuite
 
     check("file://path/a=10/b_hello/c=1.5", Some {
       PartitionValues(
-        ArrayBuffer("c"),
-        ArrayBuffer(Literal.create(1.5, DoubleType)))
+        Seq("c"),
+        Seq(Literal.create(1.5, DoubleType)))
     })
 
     check("file:///", None)
@@ -272,8 +272,8 @@ abstract class ParquetPartitionDiscoverySuite
 
     assert(partitionSpec2 ==
       Option(PartitionValues(
-        ArrayBuffer("a"),
-        ArrayBuffer(Literal.create(10, IntegerType)))))
+        Seq("a"),
+        Seq(Literal.create(10, IntegerType)))))
   }
 
   test("parse partitions") {
@@ -1281,7 +1281,7 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
       (1 to 10).map(i => (i, i.toString)).toDF("a", "b").write.parquet(dir.getCanonicalPath)
       val queryExecution = spark.read.parquet(dir.getCanonicalPath).queryExecution
       queryExecution.analyzed.collectFirst {
-        case DataSourceV2Relation(fileTable: FileTable, _, _) =>
+        case DataSourceV2Relation(fileTable: FileTable, _, _, _, _) =>
           assert(fileTable.fileIndex.partitionSpec() === PartitionSpec.emptySpec)
       }.getOrElse {
         fail(s"Expecting a matching DataSourceV2Relation, but got:\n$queryExecution")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 88b94281d88ee..917aaba2669ce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -141,30 +141,12 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
     }
   }
 
-  test("SPARK-10634 timestamp written and read as INT64 - TIMESTAMP_MILLIS") {
-    val data = (1 to 10).map(i => Row(i, new java.sql.Timestamp(i)))
-    val schema = StructType(List(StructField("d", IntegerType, false),
-      StructField("time", TimestampType, false)).toArray)
-    withSQLConf(SQLConf.PARQUET_INT64_AS_TIMESTAMP_MILLIS.key -> "true") {
-      withTempPath { file =>
-        val df = spark.createDataFrame(sparkContext.parallelize(data), schema)
-        df.write.parquet(file.getCanonicalPath)
-        ("true" :: "false" :: Nil).foreach { vectorized =>
-          withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized) {
-            val df2 = spark.read.parquet(file.getCanonicalPath)
-            checkAnswer(df2, df.collect().toSeq)
-          }
-        }
-      }
-    }
-  }
-
   test("SPARK-10634 timestamp written and read as INT64 - truncation") {
     withTable("ts") {
       sql("create table ts (c1 int, c2 timestamp) using parquet")
-      sql("insert into ts values (1, '2016-01-01 10:11:12.123456')")
+      sql("insert into ts values (1, timestamp'2016-01-01 10:11:12.123456')")
       sql("insert into ts values (2, null)")
-      sql("insert into ts values (3, '1965-01-01 10:11:12.123456')")
+      sql("insert into ts values (3, timestamp'1965-01-01 10:11:12.123456')")
       val expected = Seq(
         (1, "2016-01-01 10:11:12.123456"),
         (2, null),
@@ -172,45 +154,6 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
         .toDS().select('_1, $"_2".cast("timestamp"))
       checkAnswer(sql("select * from ts"), expected)
     }
-
-    // The microsecond portion is truncated when written as TIMESTAMP_MILLIS.
-    withTable("ts") {
-      withSQLConf(SQLConf.PARQUET_INT64_AS_TIMESTAMP_MILLIS.key -> "true") {
-        sql("create table ts (c1 int, c2 timestamp) using parquet")
-        sql("insert into ts values (1, '2016-01-01 10:11:12.123456')")
-        sql("insert into ts values (2, null)")
-        sql("insert into ts values (3, '1965-01-01 10:11:12.125456')")
-        sql("insert into ts values (4, '1965-01-01 10:11:12.125')")
-        sql("insert into ts values (5, '1965-01-01 10:11:12.1')")
-        sql("insert into ts values (6, '1965-01-01 10:11:12.123456789')")
-        sql("insert into ts values (7, '0001-01-01 00:00:00.000000')")
-        val expected = Seq(
-          (1, "2016-01-01 10:11:12.123"),
-          (2, null),
-          (3, "1965-01-01 10:11:12.125"),
-          (4, "1965-01-01 10:11:12.125"),
-          (5, "1965-01-01 10:11:12.1"),
-          (6, "1965-01-01 10:11:12.123"),
-          (7, "0001-01-01 00:00:00.000"))
-          .toDS().select('_1, $"_2".cast("timestamp"))
-        checkAnswer(sql("select * from ts"), expected)
-
-        // Read timestamps that were encoded as TIMESTAMP_MILLIS annotated as INT64
-        // with PARQUET_INT64_AS_TIMESTAMP_MILLIS set to false.
-        withSQLConf(SQLConf.PARQUET_INT64_AS_TIMESTAMP_MILLIS.key -> "false") {
-          val expected = Seq(
-            (1, "2016-01-01 10:11:12.123"),
-            (2, null),
-            (3, "1965-01-01 10:11:12.125"),
-            (4, "1965-01-01 10:11:12.125"),
-            (5, "1965-01-01 10:11:12.1"),
-            (6, "1965-01-01 10:11:12.123"),
-            (7, "0001-01-01 00:00:00.000"))
-            .toDS().select('_1, $"_2".cast("timestamp"))
-          checkAnswer(sql("select * from ts"), expected)
-        }
-      }
-    }
   }
 
   test("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") {
@@ -391,7 +334,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
     withTempPath { dir =>
       val basePath = dir.getCanonicalPath
       val schema = StructType(Array(StructField("name", DecimalType(10, 5), false)))
-      val rowRDD = sparkContext.parallelize(Array(Row(Decimal("67123.45"))))
+      val rowRDD = sparkContext.parallelize(Seq(Row(Decimal("67123.45"))))
       val df = spark.createDataFrame(rowRDD, schema)
       df.write.parquet(basePath)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
index 309507d4ddd84..c64e95078e916 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
@@ -20,12 +20,13 @@ package org.apache.spark.sql.execution.datasources.parquet
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
 import org.apache.spark.sql.internal.SQLConf
 
-abstract class ParquetSchemaPruningSuite extends SchemaPruningSuite {
+abstract class ParquetSchemaPruningSuite extends SchemaPruningSuite with AdaptiveSparkPlanHelper {
   override protected val dataSourceName: String = "parquet"
   override protected val vectorizedReaderEnabledKey: String =
     SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key
@@ -48,7 +49,7 @@ class ParquetV2SchemaPruningSuite extends ParquetSchemaPruningSuite {
 
   override def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
     val fileSourceScanSchemata =
-      df.queryExecution.executedPlan.collect {
+      collect(df.queryExecution.executedPlan) {
         case scan: BatchScanExec => scan.scan.asInstanceOf[ParquetScan].readDataSchema
       }
     assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
index 62a779528cec1..539ff0d0e905c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
@@ -233,13 +233,4 @@ class TextSuite extends QueryTest with SharedSparkSession {
     assert(data(3) == Row("\"doh\""))
     assert(data.length == 4)
   }
-
-  test("do not produce empty files for empty partitions") {
-    withTempPath { dir =>
-      val path = dir.getCanonicalPath
-      spark.emptyDataset[String].write.text(path)
-      val files = new File(path).listFiles()
-      assert(!files.exists(_.getName.endsWith("txt")))
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
index ad0dfadacca15..8f001e0e4d668 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
@@ -21,10 +21,10 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{QueryTest, SparkSession}
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.text.TextFileFormat
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -44,7 +44,7 @@ class DummyFileTable(
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = null
 
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = null
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = null
 
   override def supportsDataType(dataType: DataType): Boolean = dataType == StringType
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index 275bc339b3b5b..c399a011f9073 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -24,17 +24,15 @@ import scala.collection.JavaConverters._
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.{Catalogs, Identifier, NamespaceChange, TableChange}
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, TableChange}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class V2SessionCatalogBaseSuite extends SparkFunSuite with SharedSparkSession with BeforeAndAfter {
+abstract class V2SessionCatalogBaseSuite extends SharedSparkSession with BeforeAndAfter {
 
   val emptyProps: util.Map[String, String] = Collections.emptyMap[String, String]
   val schema: StructType = new StructType()
@@ -46,7 +44,7 @@ class V2SessionCatalogBaseSuite extends SparkFunSuite with SharedSparkSession wi
   val testIdent: Identifier = Identifier.of(testNs, "test_table")
 
   def newCatalog(): V2SessionCatalog = {
-    val newCatalog = new V2SessionCatalog(spark.sessionState)
+    val newCatalog = new V2SessionCatalog(spark.sessionState.catalog, spark.sessionState.conf)
     newCatalog.initialize("test", CaseInsensitiveStringMap.empty())
     newCatalog
   }
@@ -54,11 +52,10 @@ class V2SessionCatalogBaseSuite extends SparkFunSuite with SharedSparkSession wi
 
 class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override protected def beforeAll(): Unit = {
     super.beforeAll()
-    // TODO: when there is a public API for v2 catalogs, use that instead
     val catalog = newCatalog()
     catalog.createNamespace(Array("db"), emptyProps)
     catalog.createNamespace(Array("db2"), emptyProps)
@@ -82,16 +79,6 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
   private val testIdentNew = Identifier.of(testNs, "test_table_new")
 
-  test("Catalogs can load the catalog") {
-    val catalog = newCatalog()
-
-    val conf = new SQLConf
-    conf.setConfString("spark.sql.catalog.test", catalog.getClass.getName)
-
-    val loaded = Catalogs.load("test", conf)
-    assert(loaded.getClass == catalog.getClass)
-  }
-
   test("listTables") {
     val catalog = newCatalog()
     val ident1 = Identifier.of(Array("ns"), "test_table_1")
@@ -404,7 +391,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(updated.schema == expectedSchema)
   }
 
-  test("alterTable: update column data type and nullability") {
+  test("alterTable: update column nullability") {
     val catalog = newCatalog()
 
     val originalSchema = new StructType()
@@ -415,27 +402,12 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(table.schema == originalSchema)
 
     val updated = catalog.alterTable(testIdent,
-      TableChange.updateColumnType(Array("id"), LongType, true))
+      TableChange.updateColumnNullability(Array("id"), true))
 
-    val expectedSchema = new StructType().add("id", LongType).add("data", StringType)
+    val expectedSchema = new StructType().add("id", IntegerType).add("data", StringType)
     assert(updated.schema == expectedSchema)
   }
 
-  test("alterTable: update optional column to required fails") {
-    val catalog = newCatalog()
-
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
-
-    assert(table.schema == schema)
-
-    val exc = intercept[IllegalArgumentException] {
-      catalog.alterTable(testIdent, TableChange.updateColumnType(Array("id"), LongType, false))
-    }
-
-    assert(exc.getMessage.contains("Cannot change optional column to required"))
-    assert(exc.getMessage.contains("id"))
-  }
-
   test("alterTable: update missing column fails") {
     val catalog = newCatalog()
 
@@ -763,13 +735,14 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
 class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   def checkMetadata(
       expected: scala.collection.Map[String, String],
       actual: scala.collection.Map[String, String]): Unit = {
     // remove location and comment that are automatically added by HMS unless they are expected
-    val toRemove = V2SessionCatalog.RESERVED_PROPERTIES.filter(expected.contains)
+    val toRemove =
+      CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.filter(expected.contains)
     assert(expected -- toRemove === actual)
   }
 
@@ -1022,31 +995,18 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     assert(exc.getMessage.contains(testNs.quoted))
   }
 
-  test("alterNamespace: fail to remove location") {
+  test("alterNamespace: fail to remove reserved properties") {
     val catalog = newCatalog()
 
     catalog.createNamespace(testNs, emptyProps)
 
-    val exc = intercept[UnsupportedOperationException] {
-      catalog.alterNamespace(testNs, NamespaceChange.removeProperty("location"))
-    }
+    CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.foreach { p =>
+      val exc = intercept[UnsupportedOperationException] {
+        catalog.alterNamespace(testNs, NamespaceChange.removeProperty(p))
+      }
+      assert(exc.getMessage.contains(s"Cannot remove reserved property: $p"))
 
-    assert(exc.getMessage.contains("Cannot remove reserved property: location"))
-
-    catalog.dropNamespace(testNs)
-  }
-
-  test("alterNamespace: fail to remove comment") {
-    val catalog = newCatalog()
-
-    catalog.createNamespace(testNs, Map("comment" -> "test db").asJava)
-
-    val exc = intercept[UnsupportedOperationException] {
-      catalog.alterNamespace(testNs, NamespaceChange.removeProperty("comment"))
     }
-
-    assert(exc.getMessage.contains("Cannot remove reserved property: comment"))
-
     catalog.dropNamespace(testNs)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index 7a8da7e7669a4..4cb845b2487d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -19,12 +19,33 @@ package org.apache.spark.sql.execution.debug
 
 import java.io.ByteArrayOutputStream
 
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.execution.{CodegenSupport, LeafExecNode, WholeStageCodegenExec}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.test.SQLTestData.TestData
+import org.apache.spark.sql.types.StructType
 
 class DebuggingSuite extends SharedSparkSession {
 
+
+  var originalValue: String = _
+  // With on AQE, the WholeStageCodegenExec is added when running QueryStageExec.
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalValue = spark.conf.get(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key)
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
+  }
+
+  override def afterAll(): Unit = {
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, originalValue)
+    super.afterAll()
+  }
+
   test("DataFrame.debug()") {
     testData.debug()
   }
@@ -46,7 +67,7 @@ class DebuggingSuite extends SharedSparkSession {
     val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
       .queryExecution.executedPlan)
     assert(res.length == 2)
-    assert(res.forall{ case (subtree, code) =>
+    assert(res.forall{ case (subtree, code, _) =>
       subtree.contains("Range") && code.contains("Object[]")})
   }
 
@@ -65,7 +86,7 @@ class DebuggingSuite extends SharedSparkSession {
       """== BroadcastExchange HashedRelationBroadcastMode(List(input[0, bigint, false])), [id=#x] ==
         |Tuples output: 0
         | id LongType: {}
-        |== WholeStageCodegen ==
+        |== WholeStageCodegen (1) ==
         |Tuples output: 10
         | id LongType: {java.lang.Long}
         |== Range (0, 10, step=1, splits=2) ==
@@ -90,4 +111,41 @@ class DebuggingSuite extends SharedSparkSession {
         | id LongType: {}
         |""".stripMargin))
   }
+
+  case class DummyCodeGeneratorPlan(useInnerClass: Boolean)
+      extends CodegenSupport with LeafExecNode {
+    override def output: Seq[Attribute] = StructType.fromDDL("d int").toAttributes
+    override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(spark.sparkContext.emptyRDD[InternalRow])
+    override protected def doExecute(): RDD[InternalRow] = sys.error("Not used")
+    override protected def doProduce(ctx: CodegenContext): String = {
+      if (useInnerClass) {
+        val innerClassName = ctx.freshName("innerClass")
+        ctx.addInnerClass(
+          s"""
+             |public class $innerClassName {
+             |  public $innerClassName() {}
+             |}
+           """.stripMargin)
+      }
+      ""
+    }
+  }
+
+  test("Prints bytecode statistics in debugCodegen") {
+    Seq(true, false).foreach { useInnerClass =>
+      val plan = WholeStageCodegenExec(DummyCodeGeneratorPlan(useInnerClass))(codegenStageId = 0)
+
+      val genCodes = codegenStringSeq(plan)
+      assert(genCodes.length == 1)
+      val (_, _, codeStats) = genCodes.head
+      val expectedNumInnerClasses = if (useInnerClass) 1 else 0
+      assert(codeStats.maxMethodCodeSize > 0 && codeStats.maxConstPoolSize > 0 &&
+        codeStats.numInnerClasses == expectedNumInnerClasses)
+
+      val debugCodegenStr = codegenString(plan)
+      assert(debugCodegenStr.contains("maxMethodCodeSize:"))
+      assert(debugCodegenStr.contains("maxConstantPoolSize:"))
+      assert(debugCodegenStr.contains("numInnerClasses:"))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala
new file mode 100644
index 0000000000000..5f3d750e8f271
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.history
+
+import java.util.Properties
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.scheduler._
+import org.apache.spark.sql.execution.{SparkPlanInfo, SQLExecution}
+import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}
+import org.apache.spark.status.ListenerEventsTestHelper
+
+class SQLEventFilterBuilderSuite extends SparkFunSuite {
+  import ListenerEventsTestHelper._
+
+  override protected def beforeEach(): Unit = {
+    ListenerEventsTestHelper.reset()
+  }
+
+  test("track live SQL executions") {
+    var time = 0L
+
+    val listener = new SQLEventFilterBuilder
+
+    listener.onOtherEvent(SparkListenerLogStart("TestSparkVersion"))
+
+    // Start the application.
+    time += 1
+    listener.onApplicationStart(SparkListenerApplicationStart(
+      "name",
+      Some("id"),
+      time,
+      "user",
+      Some("attempt"),
+      None))
+
+    // Start a couple of executors.
+    time += 1
+    val execIds = Array("1", "2")
+    execIds.foreach { id =>
+      listener.onExecutorAdded(createExecutorAddedEvent(id, time))
+    }
+
+    // Start SQL Execution
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(1, "desc1", "details1", "plan",
+      new SparkPlanInfo("node", "str", Seq.empty, Map.empty, Seq.empty), time))
+
+    time += 1
+
+    // job 1, 2: coupled with SQL execution 1, finished
+    val jobProp = createJobProps()
+    val jobPropWithSqlExecution = new Properties(jobProp)
+    jobPropWithSqlExecution.setProperty(SQLExecution.EXECUTION_ID_KEY, "1")
+    val jobInfoForJob1 = pushJobEventsWithoutJobEnd(listener, 1, jobPropWithSqlExecution,
+      execIds, time)
+    listener.onJobEnd(SparkListenerJobEnd(1, time, JobSucceeded))
+
+    val jobInfoForJob2 = pushJobEventsWithoutJobEnd(listener, 2, jobPropWithSqlExecution,
+      execIds, time)
+    listener.onJobEnd(SparkListenerJobEnd(2, time, JobSucceeded))
+
+    // job 3: not coupled with SQL execution 1, finished
+    pushJobEventsWithoutJobEnd(listener, 3, jobProp, execIds, time)
+    listener.onJobEnd(SparkListenerJobEnd(3, time, JobSucceeded))
+
+    // job 4: not coupled with SQL execution 1, not finished
+    pushJobEventsWithoutJobEnd(listener, 4, jobProp, execIds, time)
+    listener.onJobEnd(SparkListenerJobEnd(4, time, JobSucceeded))
+
+    assert(listener.liveSQLExecutions === Set(1))
+
+    // only SQL executions related jobs are tracked
+    assert(listener.liveJobs === Set(1, 2))
+    assert(listener.liveStages ===
+      (jobInfoForJob1.stageIds ++ jobInfoForJob2.stageIds).toSet)
+    assert(listener.liveTasks ===
+      (jobInfoForJob1.stageToTaskIds.values.flatten ++
+        jobInfoForJob2.stageToTaskIds.values.flatten).toSet)
+    assert(listener.liveRDDs ===
+      (jobInfoForJob1.stageToRddIds.values.flatten ++
+        jobInfoForJob2.stageToRddIds.values.flatten).toSet)
+
+    // End SQL execution
+    listener.onOtherEvent(SparkListenerSQLExecutionEnd(1, 0))
+
+    assert(listener.liveSQLExecutions.isEmpty)
+    assert(listener.liveJobs.isEmpty)
+    assert(listener.liveStages.isEmpty)
+    assert(listener.liveTasks.isEmpty)
+    assert(listener.liveRDDs.isEmpty)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala
new file mode 100644
index 0000000000000..46fdaba413c6e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.history
+
+import org.apache.spark.{SparkFunSuite, Success, TaskState}
+import org.apache.spark.executor.ExecutorMetrics
+import org.apache.spark.scheduler._
+import org.apache.spark.sql.execution.ui.{SparkListenerDriverAccumUpdates, SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}
+import org.apache.spark.status.ListenerEventsTestHelper.{createRddsWithId, createStage, createTasks}
+
+class SQLLiveEntitiesEventFilterSuite extends SparkFunSuite {
+  test("filter in events for jobs related to live SQL execution") {
+    // assume finished job 1 with stage 1, task (1, 2), rdds (1, 2) and finished sql execution id 1
+    // live job 2 with stages 2, tasks (3, 4), rdds (3, 4) and job 2 belongs to the live
+    // sql execution id 2
+
+    val liveSQLExecutions = Set(2L)
+    val liveJobs = Set(2)
+    val liveStages = Set(2, 3)
+    val liveTasks = Set(3L, 4L, 5L, 6L)
+    val liveRDDs = Set(3, 4, 5, 6)
+    val liveExecutors: Set[String] = Set("1", "2")
+
+    val filter = new SQLLiveEntitiesEventFilter(liveSQLExecutions, liveJobs, liveStages, liveTasks,
+      liveRDDs)
+    val acceptFn = filter.acceptFn().lift
+
+    // Verifying with finished SQL execution 1
+    assert(Some(false) === acceptFn(SparkListenerSQLExecutionStart(1, "description1", "details1",
+      "plan", null, 0)))
+    assert(Some(false) === acceptFn(SparkListenerSQLExecutionEnd(1, 0)))
+    assert(Some(false) === acceptFn(SparkListenerSQLAdaptiveExecutionUpdate(1, "plan", null)))
+    assert(Some(false) === acceptFn(SparkListenerDriverAccumUpdates(1, Seq.empty)))
+
+    // Verifying with finished job 1
+    val rddsForStage1 = createRddsWithId(1 to 2)
+    val stage1 = createStage(1, rddsForStage1, Nil)
+    val tasksForStage1 = createTasks(Seq(1L, 2L), liveExecutors.toArray, 0)
+    tasksForStage1.foreach { task => task.markFinished(TaskState.FINISHED, 5) }
+
+    val jobStartEventForJob1 = SparkListenerJobStart(1, 0, Seq(stage1))
+    val jobEndEventForJob1 = SparkListenerJobEnd(1, 0, JobSucceeded)
+    val stageSubmittedEventsForJob1 = SparkListenerStageSubmitted(stage1)
+    val stageCompletedEventsForJob1 = SparkListenerStageCompleted(stage1)
+    val unpersistRDDEventsForJob1 = (1 to 2).map(SparkListenerUnpersistRDD)
+
+    // job events for finished job should be considered as "don't know"
+    assert(None === acceptFn(jobStartEventForJob1))
+    assert(None === acceptFn(jobEndEventForJob1))
+
+    // stage events for finished job should be considered as "don't know"
+    assert(None === acceptFn(stageSubmittedEventsForJob1))
+    assert(None === acceptFn(stageCompletedEventsForJob1))
+    unpersistRDDEventsForJob1.foreach { event =>
+      assert(None === acceptFn(event))
+    }
+
+    val taskSpeculativeTaskSubmittedEvent = SparkListenerSpeculativeTaskSubmitted(stage1.stageId,
+      stageAttemptId = 1)
+    assert(None === acceptFn(taskSpeculativeTaskSubmittedEvent))
+
+    // task events for finished job should be considered as "don't know"
+    tasksForStage1.foreach { task =>
+      val taskStartEvent = SparkListenerTaskStart(stage1.stageId, 0, task)
+      assert(None === acceptFn(taskStartEvent))
+
+      val taskGettingResultEvent = SparkListenerTaskGettingResult(task)
+      assert(None === acceptFn(taskGettingResultEvent))
+
+      val taskEndEvent = SparkListenerTaskEnd(stage1.stageId, 0, "taskType",
+        Success, task, new ExecutorMetrics, null)
+      assert(None === acceptFn(taskEndEvent))
+    }
+
+    // Verifying with live SQL execution 2
+    assert(Some(true) === acceptFn(SparkListenerSQLExecutionStart(2, "description2", "details2",
+      "plan", null, 0)))
+    assert(Some(true) === acceptFn(SparkListenerSQLExecutionEnd(2, 0)))
+    assert(Some(true) === acceptFn(SparkListenerSQLAdaptiveExecutionUpdate(2, "plan", null)))
+    assert(Some(true) === acceptFn(SparkListenerDriverAccumUpdates(2, Seq.empty)))
+
+    // Verifying with live job 2
+    val rddsForStage2 = createRddsWithId(3 to 4)
+    val stage2 = createStage(2, rddsForStage2, Nil)
+    val tasksForStage2 = createTasks(Seq(3L, 4L), liveExecutors.toArray, 0)
+    tasksForStage1.foreach { task => task.markFinished(TaskState.FINISHED, 5) }
+
+    val jobStartEventForJob2 = SparkListenerJobStart(2, 0, Seq(stage2))
+    val stageSubmittedEventsForJob2 = SparkListenerStageSubmitted(stage2)
+    val stageCompletedEventsForJob2 = SparkListenerStageCompleted(stage2)
+    val unpersistRDDEventsForJob2 = rddsForStage2.map { rdd => SparkListenerUnpersistRDD(rdd.id) }
+
+    // job events for live job should be accepted
+    assert(Some(true) === acceptFn(jobStartEventForJob2))
+
+    // stage events for live job should be accepted
+    assert(Some(true) === acceptFn(stageSubmittedEventsForJob2))
+    assert(Some(true) === acceptFn(stageCompletedEventsForJob2))
+    unpersistRDDEventsForJob2.foreach { event =>
+      assert(Some(true) === acceptFn(event))
+    }
+
+    val taskSpeculativeTaskSubmittedEvent2 = SparkListenerSpeculativeTaskSubmitted(stage2.stageId,
+      stageAttemptId = 1)
+    assert(Some(true) === acceptFn(taskSpeculativeTaskSubmittedEvent2))
+
+    // task events for live job should be accepted
+    tasksForStage2.foreach { task =>
+      val taskStartEvent = SparkListenerTaskStart(stage2.stageId, 0, task)
+      assert(Some(true) === acceptFn(taskStartEvent))
+
+      val taskGettingResultEvent = SparkListenerTaskGettingResult(task)
+      assert(Some(true) === acceptFn(taskGettingResultEvent))
+
+      val taskEndEvent = SparkListenerTaskEnd(stage1.stageId, 0, "taskType",
+        Success, task, new ExecutorMetrics, null)
+      assert(Some(true) === acceptFn(taskEndEvent))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index 91cb919479bfa..5ce758e1e4eb8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{BitwiseAnd, BitwiseOr, Cast, Literal, ShiftLeft}
 import org.apache.spark.sql.catalyst.plans.logical.BROADCAST
 import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.exchange.EnsureRequirements
 import org.apache.spark.sql.functions._
@@ -38,7 +39,7 @@ import org.apache.spark.sql.types.{LongType, ShortType}
  * unsafe map in [[org.apache.spark.sql.execution.joins.UnsafeHashedRelation]] is not triggered
  * without serializing the hashed relation, which does not happen in local mode.
  */
-class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
+class BroadcastJoinSuite extends QueryTest with SQLTestUtils with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   protected var spark: SparkSession = null
@@ -122,7 +123,7 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
         val df2 = Seq((1, "1"), (2, "2")).toDF("key", "value")
         df2.cache()
         val df3 = df1.join(broadcast(df2), Seq("key"), "inner")
-        val numBroadCastHashJoin = df3.queryExecution.executedPlan.collect {
+        val numBroadCastHashJoin = collect(df3.queryExecution.executedPlan) {
           case b: BroadcastHashJoinExec => b
         }.size
         assert(numBroadCastHashJoin === 1)
@@ -140,13 +141,13 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
         broadcast(df2).cache()
 
         val df3 = df1.join(df2, Seq("key"), "inner")
-        val numCachedPlan = df3.queryExecution.executedPlan.collect {
+        val numCachedPlan = collect(df3.queryExecution.executedPlan) {
           case i: InMemoryTableScanExec => i
         }.size
         // df2 should be cached.
         assert(numCachedPlan === 1)
 
-        val numBroadCastHashJoin = df3.queryExecution.executedPlan.collect {
+        val numBroadCastHashJoin = collect(df3.queryExecution.executedPlan) {
           case b: BroadcastHashJoinExec => b
         }.size
         // df2 should not be broadcasted.
@@ -272,7 +273,6 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
   }
 
   test("Shouldn't change broadcast join buildSide if user clearly specified") {
-
     withTempView("t1", "t2") {
       Seq((1, "4"), (2, "2")).toDF("key", "value").createTempView("t1")
       Seq((1, "1"), (2, "12.3"), (2, "123")).toDF("key", "value").createTempView("t2")
@@ -378,7 +378,7 @@ class BroadcastJoinSuite extends QueryTest with SQLTestUtils {
   private val bl = BroadcastNestedLoopJoinExec.toString
 
   private def assertJoinBuildSide(sqlStr: String, joinMethod: String, buildSide: BuildSide): Any = {
-    val executedPlan = sql(sqlStr).queryExecution.executedPlan
+    val executedPlan = stripAQEPlan(sql(sqlStr).queryExecution.executedPlan)
     executedPlan match {
       case b: BroadcastNestedLoopJoinExec =>
         assert(b.getClass.getSimpleName === joinMethod)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index e7f1c42d7d7c5..7d09577075d5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, Partial}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.execution.{FilterExec, RangeExec, SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -35,6 +36,19 @@ import org.apache.spark.util.{AccumulatorContext, JsonProtocol}
 class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
   import testImplicits._
 
+  var originalValue: String = _
+  // With AQE on/off, the metric info is different.
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalValue = spark.conf.get(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key)
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
+  }
+
+  override def afterAll(): Unit = {
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, originalValue)
+    super.afterAll()
+  }
+
   /**
    * Generates a `DataFrame` by filling randomly generated bytes for hash collision.
    */
@@ -83,9 +97,10 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
     // TODO: update metrics in generated operators
     val ds = spark.range(10).filter('id < 5)
     testSparkPlanMetricsWithPredicates(ds.toDF(), 1, Map(
-      0L -> (("WholeStageCodegen", Map(
-        "duration total (min, med, max)" -> {_.toString.matches(timingMetricPattern)})))
-    ), true)
+      0L -> (("WholeStageCodegen (1)", Map(
+        "duration total (min, med, max (stageId (attemptId): taskId))" -> {
+          _.toString.matches(timingMetricPattern)
+        })))), true)
   }
 
   test("Aggregate metrics") {
@@ -95,9 +110,11 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
     val df = testData2.groupBy().count() // 2 partitions
     val expected1 = Seq(
       Map("number of output rows" -> 2L,
-        "avg hash probe bucket list iters (min, med, max)" -> "\n(1, 1, 1)"),
+        "avg hash probe bucket list iters (min, med, max (stageId (attemptId): taskId))" ->
+          aggregateMetricsPattern),
       Map("number of output rows" -> 1L,
-        "avg hash probe bucket list iters (min, med, max)" -> "\n(1, 1, 1)"))
+        "avg hash probe bucket list iters (min, med, max (stageId (attemptId): taskId))" ->
+          aggregateMetricsPattern))
     val shuffleExpected1 = Map(
       "records read" -> 2L,
       "local blocks read" -> 2L,
@@ -113,9 +130,12 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
     val df2 = testData2.groupBy('a).count()
     val expected2 = Seq(
       Map("number of output rows" -> 4L,
-        "avg hash probe bucket list iters (min, med, max)" -> "\n(1, 1, 1)"),
+        "avg hash probe bucket list iters (min, med, max (stageId (attemptId): taskId))" ->
+          aggregateMetricsPattern),
       Map("number of output rows" -> 3L,
-        "avg hash probe bucket list iters (min, med, max)" -> "\n(1, 1, 1)"))
+        "avg hash probe bucket list iters (min, med, max (stageId (attemptId): taskId))" ->
+          aggregateMetricsPattern))
+
     val shuffleExpected2 = Map(
       "records read" -> 4L,
       "local blocks read" -> 4L,
@@ -161,9 +181,12 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
       }
       val metrics = getSparkPlanMetrics(df, 1, nodeIds, enableWholeStage).get
       nodeIds.foreach { nodeId =>
-        val probes = metrics(nodeId)._2("avg hash probe bucket list iters (min, med, max)")
-        probes.toString.stripPrefix("\n(").stripSuffix(")").split(", ").foreach { probe =>
-          assert(probe.toDouble > 1.0)
+        val probes = metrics(nodeId)._2("avg hash probe bucket list iters (min, med, max (stageId" +
+          " (attemptId): taskId))")
+        // Extract min, med, max from the string and strip off everthing else.
+        val index = probes.toString.stripPrefix("\n(").stripSuffix(")").indexOf(" (", 0)
+        probes.toString.stripPrefix("\n(").stripSuffix(")").slice(0, index).split(", ").foreach {
+          probe => assert(probe.toDouble > 1.0)
         }
       }
     }
@@ -208,9 +231,15 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
     val df = Seq(1, 3, 2).toDF("id").sort('id)
     testSparkPlanMetricsWithPredicates(df, 2, Map(
       0L -> (("Sort", Map(
-        "sort time total (min, med, max)" -> {_.toString.matches(timingMetricPattern)},
-        "peak memory total (min, med, max)" -> {_.toString.matches(sizeMetricPattern)},
-        "spill size total (min, med, max)" -> {_.toString.matches(sizeMetricPattern)})))
+        "sort time total (min, med, max (stageId (attemptId): taskId))" -> {
+          _.toString.matches(timingMetricPattern)
+        },
+        "peak memory total (min, med, max (stageId (attemptId): taskId))" -> {
+          _.toString.matches(sizeMetricPattern)
+        },
+        "spill size total (min, med, max (stageId (attemptId): taskId))" -> {
+          _.toString.matches(sizeMetricPattern)
+        })))
     ))
   }
 
@@ -388,7 +417,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
       // Assume the execution plan is
       // PhysicalRDD(nodeId = 0)
       data.write.format("json").save(file.getAbsolutePath)
-      sparkContext.listenerBus.waitUntilEmpty(10000)
+      sparkContext.listenerBus.waitUntilEmpty()
       val executionIds = currentExecutionIds().diff(previousExecutionIds)
       assert(executionIds.size === 1)
       val executionId = executionIds.head
@@ -598,4 +627,29 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
       Map(1L -> (("InMemoryTableScan", Map.empty)))
     )
   }
+
+  test("SPARK-28332: SQLMetric merge should handle -1 properly") {
+    def checkSparkPlanMetrics(plan: SparkPlan, expected: Map[String, Long]): Unit = {
+      expected.foreach { case (metricName: String, metricValue: Long) =>
+        assert(plan.metrics.contains(metricName), s"The query plan should have metric $metricName")
+        val actualMetric = plan.metrics.get(metricName).get
+        assert(actualMetric.value == metricValue,
+          s"The query plan metric $metricName did not match, " +
+            s"expected:$metricValue, actual:${actualMetric.value}")
+      }
+    }
+
+    val df = testData.join(testData2.filter('b === 0), $"key" === $"a", "left_outer")
+    df.collect()
+    val plan = df.queryExecution.executedPlan
+
+    val exchanges = plan.collect {
+      case s: ShuffleExchangeExec => s
+    }
+
+    assert(exchanges.size == 2, "The query plan should have two shuffle exchanges")
+
+    checkSparkPlanMetrics(exchanges(0), Map("dataSize" -> 3200, "shuffleRecordsWritten" -> 100))
+    checkSparkPlanMetrics(exchanges(1), Map("dataSize" -> 0, "shuffleRecordsWritten" -> 0))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index 8f26c04307adc..0c1148f7b82e4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -41,16 +41,28 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
 
   protected def statusStore: SQLAppStatusStore = spark.sharedState.statusStore
 
-  // Pattern of size SQLMetric value, e.g. "\n96.2 MiB (32.1 MiB, 32.1 MiB, 32.1 MiB)"
+  // Pattern of size SQLMetric value, e.g. "\n96.2 MiB (32.1 MiB, 32.1 MiB, 32.1 MiB (stage 0
+  // (attempt 0): task 4))" OR "\n96.2 MiB (32.1 MiB, 32.1 MiB, 32.1 MiB)"
   protected val sizeMetricPattern = {
     val bytes = "([0-9]+(\\.[0-9]+)?) (EiB|PiB|TiB|GiB|MiB|KiB|B)"
-    s"\\n$bytes \\($bytes, $bytes, $bytes\\)"
+    val maxMetrics = "\\(stage ([0-9])+ \\(attempt ([0-9])+\\)\\: task ([0-9])+\\)"
+    s"\\n$bytes \\($bytes, $bytes, $bytes( $maxMetrics)?\\)"
   }
 
-  // Pattern of timing SQLMetric value, e.g. "\n2.0 ms (1.0 ms, 1.0 ms, 1.0 ms)"
+  // Pattern of timing SQLMetric value, e.g. "\n2.0 ms (1.0 ms, 1.0 ms, 1.0 ms (stage 3 (attempt
+  // 0): task 217))" OR "\n2.0 ms (1.0 ms, 1.0 ms, 1.0 ms)"
   protected val timingMetricPattern = {
     val duration = "([0-9]+(\\.[0-9]+)?) (ms|s|m|h)"
-    s"\\n$duration \\($duration, $duration, $duration\\)"
+    val maxMetrics = "\\(stage ([0-9])+ \\(attempt ([0-9])+\\)\\: task ([0-9])+\\)"
+    s"\\n$duration \\($duration, $duration, $duration( $maxMetrics)?\\)"
+  }
+
+  // Pattern of size SQLMetric value for Aggregate tests.
+  // e.g "\n(1, 1, 0.9 (stage 1 (attempt 0): task 8)) OR "\n(1, 1, 0.9 )"
+  protected val aggregateMetricsPattern = {
+    val iters = "([0-9]+(\\.[0-9]+)?)"
+    val maxMetrics = "\\(stage ([0-9])+ \\(attempt ([0-9])+\\)\\: task ([0-9])+\\)"
+    s"\\n\\($iters, $iters, $iters( $maxMetrics)?\\)"
   }
 
   /**
@@ -86,7 +98,7 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
     }
 
     val totalNumBytesMetric = executedNode.metrics.find(
-      _.name == "written output total (min, med, max)").get
+      _.name == "written output total (min, med, max (stageId (attemptId): taskId))").get
     val totalNumBytes = metrics(totalNumBytesMetric.accumulatorId).replaceAll(",", "")
       .split(" ").head.trim.toDouble
     assert(totalNumBytes > 0)
@@ -115,29 +127,31 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
       provider: String,
       dataFormat: String,
       tableName: String): Unit = {
-    withTempPath { dir =>
-      spark.sql(
-        s"""
-           |CREATE TABLE $tableName(a int, b int)
-           |USING $provider
-           |PARTITIONED BY(a)
-           |LOCATION '${dir.toURI}'
-         """.stripMargin)
-      val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
-      assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
-
-      val df = spark.range(start = 0, end = 40, step = 1, numPartitions = 1)
-        .selectExpr("id a", "id b")
-
-      // 40 files, 80 rows, 40 dynamic partitions.
-      verifyWriteDataMetrics(Seq(40, 40, 80)) {
-        df.union(df).repartition(2, $"a")
-          .write
-          .format(dataFormat)
-          .mode("overwrite")
-          .insertInto(tableName)
+    withTable(tableName) {
+      withTempPath { dir =>
+        spark.sql(
+          s"""
+             |CREATE TABLE $tableName(a int, b int)
+             |USING $provider
+             |PARTITIONED BY(a)
+             |LOCATION '${dir.toURI}'
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
+        assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
+
+        val df = spark.range(start = 0, end = 40, step = 1, numPartitions = 1)
+          .selectExpr("id a", "id b")
+
+        // 40 files, 80 rows, 40 dynamic partitions.
+        verifyWriteDataMetrics(Seq(40, 40, 80)) {
+          df.union(df).repartition(2, $"a")
+            .write
+            .format(dataFormat)
+            .mode("overwrite")
+            .insertInto(tableName)
+        }
+        assert(TestUtils.recursiveList(dir).count(_.getName.startsWith("part-")) == 40)
       }
-      assert(TestUtils.recursiveList(dir).count(_.getName.startsWith("part-")) == 40)
     }
   }
 
@@ -203,7 +217,9 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
       expectedMetrics: Map[Long, (String, Map[String, Any])]): Unit = {
     val expectedMetricsPredicates = expectedMetrics.mapValues { case (nodeName, nodeMetrics) =>
       (nodeName, nodeMetrics.mapValues(expectedMetricValue =>
-        (actualMetricValue: Any) => expectedMetricValue.toString === actualMetricValue))
+        (actualMetricValue: Any) => {
+          actualMetricValue.toString.matches(expectedMetricValue.toString)
+        }))
     }
     testSparkPlanMetricsWithPredicates(df, expectedNumOfJobs, expectedMetricsPredicates)
   }
@@ -230,7 +246,8 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
         val (actualNodeName, actualMetricsMap) = actualMetrics(nodeId)
         assert(expectedNodeName === actualNodeName)
         for ((metricName, metricPredicate) <- expectedMetricsPredicatesMap) {
-          assert(metricPredicate(actualMetricsMap(metricName)))
+          assert(metricPredicate(actualMetricsMap(metricName)),
+            s"$nodeId / '$metricName' (= ${actualMetricsMap(metricName)}) did not match predicate.")
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
index d26989b00a651..5fe3d6a71167e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
@@ -24,10 +24,13 @@ import org.apache.spark.api.python.{PythonEvalType, PythonFunction}
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, GreaterThan, In}
 import org.apache.spark.sql.execution.{FilterExec, InputAdapter, SparkPlanTest, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{BooleanType, DoubleType}
 
-class BatchEvalPythonExecSuite extends SparkPlanTest with SharedSparkSession {
+class BatchEvalPythonExecSuite extends SparkPlanTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   import testImplicits.newProductEncoder
   import testImplicits.localSeqToDatasetHolder
 
@@ -95,7 +98,7 @@ class BatchEvalPythonExecSuite extends SparkPlanTest with SharedSparkSession {
     val df = Seq(("Hello", 4)).toDF("a", "b")
     val df2 = Seq(("Hello", 4)).toDF("c", "d")
     val joinDF = df.crossJoin(df2).where("dummyPythonUDF(a, c) == dummyPythonUDF(d, c)")
-    val qualifiedPlanNodes = joinDF.queryExecution.executedPlan.collect {
+    val qualifiedPlanNodes = collect(joinDF.queryExecution.executedPlan) {
       case b: BatchEvalPythonExec => b
     }
     assert(qualifiedPlanNodes.size == 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
index d02014c0dee54..61c9782bd175d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
@@ -21,8 +21,8 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.mockito.Mockito.when
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.memory.{TaskMemoryManager, TestMemoryManager}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala
index 1ec9986328429..06077c94b66fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala
@@ -33,7 +33,7 @@ class RowQueueSuite extends SparkFunSuite with EncryptionFunSuite {
   test("in-memory queue") {
     val page = MemoryBlock.fromLongArray(new Array[Long](1<<10))
     val queue = new InMemoryRowQueue(page, 1) {
-      override def close() {}
+      override def close(): Unit = {}
     }
     val row = new UnsafeRow(1)
     row.pointTo(new Array[Byte](16), 16)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index ef88598fcb11b..6440e69e2ec23 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -24,12 +24,12 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ManualClock
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index e1284ea03267e..5c66fc52592b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -29,12 +29,12 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.streaming.{StreamingQueryException, StreamTest}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -42,7 +42,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class TextSocketStreamSuite extends StreamTest with SharedSparkSession {
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     sqlContext.streams.active.foreach(_.stop())
     if (serverThread != null) {
       serverThread.interrupt()
@@ -194,13 +194,12 @@ class TextSocketStreamSuite extends StreamTest with SharedSparkSession {
   }
 
   test("user-specified schema given") {
-    val provider = new TextSocketSourceProvider
     val userSpecifiedSchema = StructType(
       StructField("name", StringType) ::
       StructField("area", StringType) :: Nil)
     val params = Map("host" -> "localhost", "port" -> "1234")
     val exception = intercept[UnsupportedOperationException] {
-      provider.getTable(new CaseInsensitiveStringMap(params.asJava), userSpecifiedSchema)
+      spark.readStream.schema(userSpecifiedSchema).format("socket").options(params).load()
     }
     assert(exception.getMessage.contains(
       "TextSocketSourceProvider source does not support user-specified schema"))
@@ -318,7 +317,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSparkSession {
           for (i <- 0 until numRecords / 2) {
             r.next()
             offsets.append(r.getOffset().asInstanceOf[ContinuousRecordPartitionOffset].offset)
-            data.append(r.get().get(0, DataTypes.StringType).asInstanceOf[String].toInt)
+            data.append(r.get().getString(0).toInt)
             // commit the offsets in the middle and validate if processing continues
             if (i == 2) {
               commitOffset(t.partitionId, i + 1)
@@ -381,7 +380,10 @@ class TextSocketStreamSuite extends StreamTest with SharedSparkSession {
         val r = readerFactory.createReader(t).asInstanceOf[TextSocketContinuousPartitionReader]
         for (_ <- 0 until numRecords / 2) {
           r.next()
-          assert(r.get().get(0, TextSocketReader.SCHEMA_TIMESTAMP).isInstanceOf[(_, _)])
+          assert(r.get().numFields === 2)
+          // just try to read columns one by one - it would throw error if the row is corrupted
+          r.get().getString(0)
+          r.get().getLong(1)
         }
       case _ => throw new IllegalStateException("Unexpected task type")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index a84d107f2cbc0..488879938339d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -406,7 +406,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
 
     var latestStoreVersion = 0
 
-    def generateStoreVersions() {
+    def generateStoreVersions(): Unit = {
       for (i <- 1 to 20) {
         val store = StateStore.get(storeProviderId, keySchema, valueSchema, None,
           latestStoreVersion, storeConf, hadoopConf)
@@ -586,7 +586,8 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
         query.processAllAvailable()
         require(query.lastProgress != null) // at least one batch processed after start
         val loadedProvidersMethod =
-          PrivateMethod[mutable.HashMap[StateStoreProviderId, StateStoreProvider]]('loadedProviders)
+          PrivateMethod[mutable.HashMap[StateStoreProviderId, StateStoreProvider]](
+            Symbol("loadedProviders"))
         val loadedProvidersMap = StateStore invokePrivate loadedProvidersMethod()
         val loadedProviders = loadedProvidersMap.synchronized { loadedProvidersMap.values.toSeq }
         query.stop()
@@ -781,7 +782,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       provider: HDFSBackedStateStoreProvider,
       version: Long,
       isSnapshot: Boolean): Boolean = {
-    val method = PrivateMethod[Path]('baseDir)
+    val method = PrivateMethod[Path](Symbol("baseDir"))
     val basePath = provider invokePrivate method()
     val fileName = if (isSnapshot) s"$version.snapshot" else s"$version.delta"
     val filePath = new File(basePath.toString, fileName)
@@ -789,7 +790,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
   }
 
   def deleteFilesEarlierThanVersion(provider: HDFSBackedStateStoreProvider, version: Long): Unit = {
-    val method = PrivateMethod[Path]('baseDir)
+    val method = PrivateMethod[Path](Symbol("baseDir"))
     val basePath = provider invokePrivate method()
     for (version <- 0 until version.toInt) {
       for (isSnapshot <- Seq(false, true)) {
@@ -804,7 +805,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     provider: HDFSBackedStateStoreProvider,
     version: Long,
     isSnapshot: Boolean): Unit = {
-    val method = PrivateMethod[Path]('baseDir)
+    val method = PrivateMethod[Path](Symbol("baseDir"))
     val basePath = provider invokePrivate method()
     val fileName = if (isSnapshot) s"$version.snapshot" else s"$version.delta"
     val filePath = new File(basePath.toString, fileName)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
index c0216a2ef3e61..ce1eabeb932fb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
@@ -38,9 +38,14 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter
     spark.streams.stateStoreCoordinator // initialize the lazy coordinator
   }
 
+  SymmetricHashJoinStateManager.supportedVersions.foreach { version =>
+    test(s"StreamingJoinStateManager V${version} - all operations") {
+      testAllOperations(version)
+    }
+  }
 
-  test("SymmetricHashJoinStateManager - all operations") {
-    withJoinStateManager(inputValueAttribs, joinKeyExprs) { manager =>
+  private def testAllOperations(stateFormatVersion: Int): Unit = {
+    withJoinStateManager(inputValueAttribs, joinKeyExprs, stateFormatVersion) { manager =>
       implicit val mgr = manager
 
       assert(get(20) === Seq.empty)     // initially empty
@@ -123,7 +128,8 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter
   def toValueInt(inputValueRow: UnsafeRow): Int = inputValueRow.getInt(0)
 
   def append(key: Int, value: Int)(implicit manager: SymmetricHashJoinStateManager): Unit = {
-    manager.append(toJoinKeyRow(key), toInputValue(value))
+    // we only put matched = false for simplicity - StreamingJoinSuite will test the functionality
+    manager.append(toJoinKeyRow(key), toInputValue(value), matched = false)
   }
 
   def get(key: Int)(implicit manager: SymmetricHashJoinStateManager): Seq[Int] = {
@@ -156,13 +162,15 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter
 
   def withJoinStateManager(
     inputValueAttribs: Seq[Attribute],
-    joinKeyExprs: Seq[Expression])(f: SymmetricHashJoinStateManager => Unit): Unit = {
+    joinKeyExprs: Seq[Expression],
+    stateFormatVersion: Int)(f: SymmetricHashJoinStateManager => Unit): Unit = {
 
     withTempDir { file =>
       val storeConf = new StateStoreConf()
       val stateInfo = StatefulOperatorStateInfo(file.getAbsolutePath, UUID.randomUUID, 0, 0, 5)
       val manager = new SymmetricHashJoinStateManager(
-        LeftSide, inputValueAttribs, joinKeyExprs, Some(stateInfo), storeConf, new Configuration)
+        LeftSide, inputValueAttribs, joinKeyExprs, Some(stateInfo), storeConf, new Configuration,
+        partitionId = 0, stateFormatVersion)
       try {
         f(manager)
       } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
index 9e42056c19a0c..298afa880c930 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
@@ -73,7 +73,7 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter {
     map.put("failed.sort", Array("duration"))
     when(request.getParameterMap()).thenReturn(map)
     val html = renderSQLPage(request, tab, statusStore).toString().toLowerCase(Locale.ROOT)
-    assert(!html.contains("IllegalArgumentException"))
+    assert(!html.contains("illegalargumentexception"))
     assert(html.contains("duration"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala
new file mode 100644
index 0000000000000..c09ff51ecaff2
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.ui
+
+import java.util.Properties
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.collection.mutable
+import scala.concurrent.duration._
+
+import org.apache.spark.{SparkConf, TaskState}
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.executor.ExecutorMetrics
+import org.apache.spark.internal.config.Status._
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.scheduler._
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.metric.SQLMetricInfo
+import org.apache.spark.status.ElementTrackingStore
+import org.apache.spark.util.{AccumulatorMetadata, LongAccumulator, Utils}
+import org.apache.spark.util.kvstore.InMemoryStore
+
+/**
+ * Benchmark for metrics aggregation in the SQL listener.
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt: bin/spark-submit --class <this class> --jars <core test jar>
+ *   2. build/sbt "core/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      Results will be written to "benchmarks/MetricsAggregationBenchmark-results.txt".
+ * }}}
+ */
+object MetricsAggregationBenchmark extends BenchmarkBase {
+
+  private def metricTrackingBenchmark(
+      timer: Benchmark.Timer,
+      numMetrics: Int,
+      numTasks: Int,
+      numStages: Int): Measurements = {
+    val conf = new SparkConf()
+      .set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
+      .set(ASYNC_TRACKING_ENABLED, false)
+    val kvstore = new ElementTrackingStore(new InMemoryStore(), conf)
+    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
+    val store = new SQLAppStatusStore(kvstore, Some(listener))
+
+    val metrics = (0 until numMetrics).map { i =>
+      new SQLMetricInfo(s"metric$i", i.toLong, "average")
+    }
+
+    val planInfo = new SparkPlanInfo(
+      getClass().getName(),
+      getClass().getName(),
+      Nil,
+      Map.empty,
+      metrics)
+
+    val idgen = new AtomicInteger()
+    val executionId = idgen.incrementAndGet()
+    val executionStart = SparkListenerSQLExecutionStart(
+      executionId,
+      getClass().getName(),
+      getClass().getName(),
+      getClass().getName(),
+      planInfo,
+      System.currentTimeMillis())
+
+    val executionEnd = SparkListenerSQLExecutionEnd(executionId, System.currentTimeMillis())
+
+    val properties = new Properties()
+    properties.setProperty(SQLExecution.EXECUTION_ID_KEY, executionId.toString)
+
+    timer.startTiming()
+    listener.onOtherEvent(executionStart)
+
+    val taskEventsTime = (0 until numStages).map { _ =>
+      val stageInfo = new StageInfo(idgen.incrementAndGet(), 0, getClass().getName(),
+        numTasks, Nil, Nil, getClass().getName(),
+        resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+
+      val jobId = idgen.incrementAndGet()
+      val jobStart = SparkListenerJobStart(
+        jobId = jobId,
+        time = System.currentTimeMillis(),
+        stageInfos = Seq(stageInfo),
+        properties)
+
+      val stageStart = SparkListenerStageSubmitted(stageInfo)
+
+      val taskOffset = idgen.incrementAndGet().toLong
+      val taskEvents = (0 until numTasks).map { i =>
+        val info = new TaskInfo(
+          taskId = taskOffset + i.toLong,
+          index = i,
+          attemptNumber = 0,
+          // The following fields are not used.
+          launchTime = 0,
+          executorId = "",
+          host = "",
+          taskLocality = null,
+          speculative = false)
+        info.markFinished(TaskState.FINISHED, 1L)
+
+        val accumulables = (0 until numMetrics).map { mid =>
+          val acc = new LongAccumulator
+          acc.metadata = AccumulatorMetadata(mid, None, false)
+          acc.toInfo(Some(i.toLong), None)
+        }
+
+        info.setAccumulables(accumulables)
+
+        val start = SparkListenerTaskStart(stageInfo.stageId, stageInfo.attemptNumber, info)
+        val end = SparkListenerTaskEnd(stageInfo.stageId, stageInfo.attemptNumber,
+          taskType = "",
+          reason = null,
+          info,
+          new ExecutorMetrics(),
+          null)
+
+        (start, end)
+      }
+
+      val jobEnd = SparkListenerJobEnd(
+        jobId = jobId,
+        time = System.currentTimeMillis(),
+        JobSucceeded)
+
+      listener.onJobStart(jobStart)
+      listener.onStageSubmitted(stageStart)
+
+      val (_, _taskEventsTime) = Utils.timeTakenMs {
+        taskEvents.foreach { case (start, end) =>
+          listener.onTaskStart(start)
+          listener.onTaskEnd(end)
+        }
+      }
+
+      listener.onJobEnd(jobEnd)
+      _taskEventsTime
+    }
+
+    val (_, aggTime) = Utils.timeTakenMs {
+      listener.onOtherEvent(executionEnd)
+      val metrics = store.executionMetrics(executionId)
+      assert(metrics.size == numMetrics, s"${metrics.size} != $numMetrics")
+    }
+
+    timer.stopTiming()
+    kvstore.close()
+
+    Measurements(taskEventsTime, aggTime)
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val metricCount = 50
+    val taskCount = 100000
+    val stageCounts = Seq(1, 2, 3)
+
+    val benchmark = new Benchmark(
+      s"metrics aggregation ($metricCount metrics, $taskCount tasks per stage)", 1,
+      warmupTime = 0.seconds, output = output)
+
+    // Run this outside the measurement code so that classes are loaded and JIT is triggered,
+    // otherwise the first run tends to be much slower than others. Also because this benchmark is a
+    // bit weird and doesn't really map to what the Benchmark class expects, so it's a bit harder
+    // to use warmupTime and friends effectively.
+    stageCounts.foreach { count =>
+      metricTrackingBenchmark(new Benchmark.Timer(-1), metricCount, taskCount, count)
+    }
+
+    val measurements = mutable.HashMap[Int, Seq[Measurements]]()
+
+    stageCounts.foreach { count =>
+      benchmark.addTimerCase(s"$count stage(s)") { timer =>
+        val m = metricTrackingBenchmark(timer, metricCount, taskCount, count)
+        val all = measurements.getOrElse(count, Nil)
+        measurements(count) = all ++ Seq(m)
+      }
+    }
+
+    benchmark.run()
+
+    benchmark.out.printf("Stage Count    Stage Proc. Time    Aggreg. Time\n")
+    stageCounts.foreach { count =>
+      val data = measurements(count)
+      val eventsTimes = data.flatMap(_.taskEventsTimes)
+      val aggTimes = data.map(_.aggregationTime)
+
+      val msg = "     %d              %d                %d\n".format(
+        count,
+        eventsTimes.sum / eventsTimes.size,
+        aggTimes.sum / aggTimes.size)
+      benchmark.out.printf(msg)
+    }
+  }
+
+  /**
+   * Finer-grained measurements of how long it takes to run some parts of the benchmark. This is
+   * collected by the benchmark method, so this collection slightly affects the overall benchmark
+   * results, but this data helps with seeing where the time is going, since this benchmark is
+   * triggering a whole lot of code in the listener class.
+   */
+  case class Measurements(
+      taskEventsTimes: Seq[Long],
+      aggregationTime: Long)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index 90966d2efec23..d18a35c3110f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.rdd.RDD
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler._
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
@@ -38,6 +39,8 @@ import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.execution.{LeafExecNode, QueryExecution, SparkPlanInfo, SQLExecution}
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.functions.count
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.UI_RETAINED_EXECUTIONS
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.status.ElementTrackingStore
@@ -79,12 +82,13 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
   private def createStageInfo(stageId: Int, attemptId: Int): StageInfo = {
     new StageInfo(stageId = stageId,
       attemptId = attemptId,
+      numTasks = 8,
       // The following fields are not used in tests
       name = "",
-      numTasks = 0,
       rddInfos = Nil,
       parentIds = Nil,
-      details = "")
+      details = "",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
   }
 
   private def createTaskInfo(
@@ -94,8 +98,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     val info = new TaskInfo(
       taskId = taskId,
       attemptNumber = attemptNumber,
+      index = taskId.toInt,
       // The following fields are not used in tests
-      index = 0,
       launchTime = 0,
       executorId = "",
       host = "",
@@ -190,6 +194,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
       ),
       createProperties(executionId)))
     listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(0, 0)))
+    listener.onTaskStart(SparkListenerTaskStart(0, 0, createTaskInfo(0, 0)))
+    listener.onTaskStart(SparkListenerTaskStart(0, 0, createTaskInfo(1, 0)))
 
     assert(statusStore.executionMetrics(executionId).isEmpty)
 
@@ -217,6 +223,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
 
     // Retrying a stage should reset the metrics
     listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(0, 1)))
+    listener.onTaskStart(SparkListenerTaskStart(0, 1, createTaskInfo(0, 0)))
+    listener.onTaskStart(SparkListenerTaskStart(0, 1, createTaskInfo(1, 0)))
 
     listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate("", Seq(
       // (task id, stage id, stage attempt, accum updates)
@@ -260,6 +268,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
 
     // Summit a new stage
     listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(1, 0)))
+    listener.onTaskStart(SparkListenerTaskStart(1, 0, createTaskInfo(0, 0)))
+    listener.onTaskStart(SparkListenerTaskStart(1, 0, createTaskInfo(1, 0)))
 
     listener.onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate("", Seq(
       // (task id, stage id, stage attempt, accum updates)
@@ -480,7 +490,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     // At the beginning of this test case, there should be no live data in the listener.
     assert(listener.noLiveData())
     spark.sparkContext.parallelize(1 to 10).foreach(i => ())
-    spark.sparkContext.listenerBus.waitUntilEmpty(10000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
     // Listener should ignore the non-SQL stages, as the stage data are only removed when SQL
     // execution ends, which will not be triggered for non-SQL jobs.
     assert(listener.noLiveData())
@@ -490,15 +500,15 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     val statusStore = spark.sharedState.statusStore
     val oldCount = statusStore.executionsList().size
 
-    val expectedAccumValue = 12345
-    val expectedAccumValue2 = 54321
+    val expectedAccumValue = 12345L
+    val expectedAccumValue2 = 54321L
     val physicalPlan = MyPlan(sqlContext.sparkContext, expectedAccumValue, expectedAccumValue2)
     val dummyQueryExecution = new QueryExecution(spark, LocalRelation()) {
       override lazy val sparkPlan = physicalPlan
       override lazy val executedPlan = physicalPlan
     }
 
-    SQLExecution.withNewExecutionId(spark, dummyQueryExecution) {
+    SQLExecution.withNewExecutionId(dummyQueryExecution) {
       physicalPlan.execute().collect()
     }
 
@@ -517,8 +527,10 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     val metrics = statusStore.executionMetrics(execId)
     val driverMetric = physicalPlan.metrics("dummy")
     val driverMetric2 = physicalPlan.metrics("dummy2")
-    val expectedValue = SQLMetrics.stringValue(driverMetric.metricType, Seq(expectedAccumValue))
-    val expectedValue2 = SQLMetrics.stringValue(driverMetric2.metricType, Seq(expectedAccumValue2))
+    val expectedValue = SQLMetrics.stringValue(driverMetric.metricType,
+      Array(expectedAccumValue), Array.empty[Long])
+    val expectedValue2 = SQLMetrics.stringValue(driverMetric2.metricType,
+      Array(expectedAccumValue2), Array.empty[Long])
 
     assert(metrics.contains(driverMetric.id))
     assert(metrics(driverMetric.id) === expectedValue)
@@ -609,6 +621,15 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     assert(statusStore.executionsCount === 2)
     assert(statusStore.execution(2) === None)
   }
+
+  test("SPARK-29894 test Codegen Stage Id in SparkPlanInfo") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      // with AQE on, the WholeStageCodegen rule is applied when running QueryStageExec.
+      val df = createTestDataFrame.select(count("*"))
+      val sparkPlanInfo = SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan)
+      assert(sparkPlanInfo.nodeName === "WholeStageCodegen (2)")
+    }
+  }
 }
 
 
@@ -673,7 +694,7 @@ class SQLAppStatusListenerMemoryLeakSuite extends SparkFunSuite {
             case e: SparkException => // This is expected for a failed job
           }
         }
-        sc.listenerBus.waitUntilEmpty(10000)
+        sc.listenerBus.waitUntilEmpty()
         val statusStore = spark.sharedState.statusStore
         assert(statusStore.executionsCount() <= 50)
         assert(statusStore.planGraphCount() <= 50)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index 758780c80b284..37d028d6a713f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -636,30 +636,24 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(column.arrayData().elementsAppended == 0)
   }
 
-  testVector("CalendarInterval APIs", 4, CalendarIntervalType) {
+  testVector("CalendarInterval APIs", 5, CalendarIntervalType) {
     column =>
       val reference = mutable.ArrayBuffer.empty[CalendarInterval]
 
       val months = column.getChild(0)
-      val microseconds = column.getChild(1)
+      val days = column.getChild(1)
+      val microseconds = column.getChild(2)
       assert(months.dataType() == IntegerType)
+      assert(days.dataType() == IntegerType)
       assert(microseconds.dataType() == LongType)
 
-      months.putInt(0, 1)
-      microseconds.putLong(0, 100)
-      reference += new CalendarInterval(1, 100)
-
-      months.putInt(1, 0)
-      microseconds.putLong(1, 2000)
-      reference += new CalendarInterval(0, 2000)
-
-      column.putNull(2)
-      assert(column.getInterval(2) == null)
-      reference += null
-
-      months.putInt(3, 20)
-      microseconds.putLong(3, 0)
-      reference += new CalendarInterval(20, 0)
+      Seq(new CalendarInterval(1, 10, 100),
+        new CalendarInterval(0, 0, 2000),
+        new CalendarInterval(20, 0, 0),
+        new CalendarInterval(0, 200, 0)).zipWithIndex.foreach { case (v, i) =>
+          column.putInterval(i, v)
+          reference += v
+      }
 
       reference.zipWithIndex.foreach { case (v, i) =>
         val errMsg = "VectorType=" + column.getClass.getSimpleName
@@ -1067,7 +1061,8 @@ class ColumnarBatchSuite extends SparkFunSuite {
     }
   }
 
-  private def compareStruct(fields: Seq[StructField], r1: InternalRow, r2: Row, seed: Long) {
+  private def compareStruct(fields: Seq[StructField], r1: InternalRow, r2: Row,
+      seed: Long): Unit = {
     fields.zipWithIndex.foreach { case (field: StructField, ordinal: Int) =>
       assert(r1.isNullAt(ordinal) == r2.isNullAt(ordinal), "Seed = " + seed)
       if (!r1.isNullAt(ordinal)) {
@@ -1159,7 +1154,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
    * This test generates a random schema data, serializes it to column batches and verifies the
    * results.
    */
-  def testRandomRows(flatSchema: Boolean, numFields: Int) {
+  def testRandomRows(flatSchema: Boolean, numFields: Int): Unit = {
     // TODO: Figure out why StringType doesn't work on jenkins.
     val types = Array(
       BooleanType, ByteType, FloatType, DoubleType, IntegerType, LongType, ShortType,
@@ -1310,7 +1305,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
       Decimal("1234.23456"),
       DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("2015-01-01")),
       DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123")),
-      new CalendarInterval(1, 0),
+      new CalendarInterval(1, 0, 0),
       new GenericArrayData(Array(1, 2, 3, 4, null)),
       new GenericInternalRow(Array[Any](5.asInstanceOf[Any], 10)),
       mapBuilder.build()
@@ -1331,7 +1326,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
       Decimal("0.01000"),
       DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("1875-12-12")),
       DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("1880-01-05 12:45:21.321")),
-      new CalendarInterval(-10, -100),
+      new CalendarInterval(-10, -50, -100),
       new GenericArrayData(Array(5, 10, -100)),
       new GenericInternalRow(Array[Any](20.asInstanceOf[Any], null)),
       mapBuilder.build()
@@ -1423,8 +1418,8 @@ class ColumnarBatchSuite extends SparkFunSuite {
       assert(columns(10).isNullAt(2))
 
       assert(columns(11).dataType() == CalendarIntervalType)
-      assert(columns(11).getInterval(0) == new CalendarInterval(1, 0))
-      assert(columns(11).getInterval(1) == new CalendarInterval(-10, -100))
+      assert(columns(11).getInterval(0) == new CalendarInterval(1, 0, 0))
+      assert(columns(11).getInterval(1) == new CalendarInterval(-10, -50, -100))
       assert(columns(11).isNullAt(2))
 
       assert(columns(12).dataType() == ArrayType(IntegerType))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
index d885348f3774a..46d0c64592a00 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
@@ -17,8 +17,17 @@
 
 package org.apache.spark.sql.internal
 
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import java.util.UUID
+
+import org.scalatest.Assertions._
+
+import org.apache.spark.{SparkException, SparkFunSuite, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.execution.{LeafExecNode, QueryExecution, SparkPlan}
 import org.apache.spark.sql.execution.debug.codegenStringSeq
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.test.SQLTestUtils
@@ -91,15 +100,86 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
 
   test("SPARK-22219: refactor to control to generate comment") {
     Seq(true, false).foreach { flag =>
-      withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> flag.toString) {
+      withSQLConf(StaticSQLConf.CODEGEN_COMMENTS.key -> flag.toString,
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        // with AQE on, the WholeStageCodegen rule is applied when running QueryStageExec.
         val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
           .queryExecution.executedPlan)
         assert(res.length == 2)
-        assert(res.forall { case (_, code) =>
+        assert(res.forall { case (_, code, _) =>
           (code.contains("* Codegend pipeline") == flag) &&
             (code.contains("// input[") == flag)
         })
       }
     }
   }
+
+  test("SPARK-28939: propagate SQLConf also in conversions to RDD") {
+    val confs = Seq("spark.sql.a" -> "x", "spark.sql.b" -> "y")
+    val physicalPlan = SQLConfAssertPlan(confs)
+    val dummyQueryExecution = FakeQueryExecution(spark, physicalPlan)
+    withSQLConf(confs: _*) {
+      // Force RDD evaluation to trigger asserts
+      dummyQueryExecution.toRdd.collect()
+    }
+    val dummyQueryExecution1 = FakeQueryExecution(spark, physicalPlan)
+    // Without setting the configs assertions fail
+    val e = intercept[SparkException](dummyQueryExecution1.toRdd.collect())
+    assert(e.getCause.isInstanceOf[NoSuchElementException])
+  }
+
+  test("SPARK-30556 propagate local properties to subquery execution thread") {
+    withSQLConf(StaticSQLConf.SUBQUERY_MAX_THREAD_THRESHOLD.key -> "1") {
+      withTempView("l", "m", "n") {
+        Seq(true).toDF().createOrReplaceTempView("l")
+        val confKey = "spark.sql.y"
+
+        def createDataframe(confKey: String, confValue: String): Dataset[Boolean] = {
+          Seq(true)
+            .toDF()
+            .mapPartitions { _ =>
+              TaskContext.get.getLocalProperty(confKey) == confValue match {
+                case true => Iterator(true)
+                case false => Iterator.empty
+              }
+            }
+        }
+
+        // set local configuration and assert
+        val confValue1 = UUID.randomUUID().toString()
+        createDataframe(confKey, confValue1).createOrReplaceTempView("m")
+        spark.sparkContext.setLocalProperty(confKey, confValue1)
+        assert(sql("SELECT * FROM l WHERE EXISTS (SELECT * FROM m)").collect().length == 1)
+
+        // change the conf value and assert again
+        val confValue2 = UUID.randomUUID().toString()
+        createDataframe(confKey, confValue2).createOrReplaceTempView("n")
+        spark.sparkContext.setLocalProperty(confKey, confValue2)
+        assert(sql("SELECT * FROM l WHERE EXISTS (SELECT * FROM n)").collect().length == 1)
+      }
+    }
+  }
+}
+
+case class SQLConfAssertPlan(confToCheck: Seq[(String, String)]) extends LeafExecNode {
+  override protected def doExecute(): RDD[InternalRow] = {
+    sqlContext
+      .sparkContext
+      .parallelize(0 until 2, 2)
+      .mapPartitions { it =>
+        val confs = SQLConf.get
+        confToCheck.foreach { case (key, expectedValue) =>
+          assert(confs.getConfString(key) == expectedValue)
+        }
+        it.map(i => InternalRow.fromSeq(Seq(i)))
+      }
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
+
+case class FakeQueryExecution(spark: SparkSession, physicalPlan: SparkPlan)
+    extends QueryExecution(spark, LocalRelation()) {
+  override lazy val sparkPlan: SparkPlan = physicalPlan
+  override lazy val executedPlan: SparkPlan = physicalPlan
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 1dfbca64f5778..61be3672f3ebe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark.sql.internal
 
+import scala.language.reflectiveCalls
+
 import org.apache.hadoop.fs.Path
+import org.apache.log4j.Level
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.internal.StaticSQLConf._
@@ -25,7 +28,6 @@ import org.apache.spark.sql.test.{SharedSparkSession, TestSQLContext}
 import org.apache.spark.util.Utils
 
 class SQLConfSuite extends QueryTest with SharedSparkSession {
-  import testImplicits._
 
   private val testKey = "test.key.0"
   private val testVal = "test.val.0"
@@ -259,12 +261,6 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     assert(spark.sessionState.conf.parquetOutputTimestampType ==
       SQLConf.ParquetOutputTimestampType.TIMESTAMP_MICROS)
 
-    // PARQUET_INT64_AS_TIMESTAMP_MILLIS should be respected.
-    spark.sessionState.conf.setConf(SQLConf.PARQUET_INT64_AS_TIMESTAMP_MILLIS, true)
-    assert(spark.sessionState.conf.parquetOutputTimestampType ==
-      SQLConf.ParquetOutputTimestampType.TIMESTAMP_MILLIS)
-
-    // PARQUET_OUTPUT_TIMESTAMP_TYPE has higher priority over PARQUET_INT64_AS_TIMESTAMP_MILLIS
     spark.sessionState.conf.setConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE, "timestamp_micros")
     assert(spark.sessionState.conf.parquetOutputTimestampType ==
       SQLConf.ParquetOutputTimestampType.TIMESTAMP_MICROS)
@@ -320,4 +316,36 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     assert(e2.getMessage.contains("spark.sql.shuffle.partitions"))
   }
 
+  test("set removed config to non-default value") {
+    val config = "spark.sql.fromJsonForceNullableSchema"
+    val defaultValue = true
+
+    spark.conf.set(config, defaultValue)
+
+    val e = intercept[AnalysisException] {
+      spark.conf.set(config, !defaultValue)
+    }
+    assert(e.getMessage.contains(config))
+  }
+
+  test("log deprecation warnings") {
+    val logAppender = new LogAppender("deprecated SQL configs")
+    def check(config: String): Unit = {
+      assert(logAppender.loggingEvents.exists(
+        e => e.getLevel == Level.WARN &&
+        e.getRenderedMessage.contains(config)))
+    }
+
+    val config1 = SQLConf.HIVE_VERIFY_PARTITION_PATH.key
+    withLogAppender(logAppender) {
+      spark.conf.set(config1, true)
+    }
+    check(config1)
+
+    val config2 = SQLConf.ARROW_EXECUTION_ENABLED.key
+    withLogAppender(logAppender) {
+      spark.conf.unset(config2)
+    }
+    check(config2)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 7fe00aef56e16..9cba95f7d7df2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeTestUtils}
-import org.apache.spark.sql.execution.DataSourceScanExec
+import org.apache.spark.sql.execution.{DataSourceScanExec, ExtendedMode}
 import org.apache.spark.sql.execution.command.{ExplainCommand, ShowCreateTableCommand}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRDD, JDBCRelation, JdbcUtils}
@@ -51,7 +51,7 @@ class JDBCSuite extends QueryTest
   val testBytes = Array[Byte](99.toByte, 134.toByte, 135.toByte, 200.toByte, 205.toByte)
 
   val testH2Dialect = new JdbcDialect {
-    override def canHandle(url: String) : Boolean = url.startsWith("jdbc:h2")
+    override def canHandle(url: String): Boolean = url.startsWith("jdbc:h2")
     override def getCatalystType(
         sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] =
       Some(StringType)
@@ -450,15 +450,6 @@ class JDBCSuite extends QueryTest
       urlWithUserAndPass, "TEST.PEOPLE", new Properties()).collect().length === 3)
   }
 
-  test("Basic API with illegal fetchsize") {
-    val properties = new Properties()
-    properties.setProperty(JDBCOptions.JDBC_BATCH_FETCH_SIZE, "-1")
-    val e = intercept[IllegalArgumentException] {
-      spark.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", properties).collect()
-    }.getMessage
-    assert(e.contains("Invalid value `-1` for parameter `fetchsize`"))
-  }
-
   test("Missing partition columns") {
     withView("tempPeople") {
       val e = intercept[IllegalArgumentException] {
@@ -743,7 +734,7 @@ class JDBCSuite extends QueryTest
   }
 
   test("compile filters") {
-    val compileFilter = PrivateMethod[Option[String]]('compileFilter)
+    val compileFilter = PrivateMethod[Option[String]](Symbol("compileFilter"))
     def doCompileFilter(f: Filter): String =
       JDBCRDD invokePrivate compileFilter(f, JdbcDialects.get("jdbc:")) getOrElse("")
     assert(doCompileFilter(EqualTo("col0", 3)) === """"col0" = 3""")
@@ -893,17 +884,37 @@ class JDBCSuite extends QueryTest
       "BIT")
     assert(msSqlServerDialect.getJDBCType(BinaryType).map(_.databaseTypeDefinition).get ==
       "VARBINARY(MAX)")
-    assert(msSqlServerDialect.getJDBCType(ShortType).map(_.databaseTypeDefinition).get ==
-      "SMALLINT")
+    Seq(true, false).foreach { flag =>
+      withSQLConf(SQLConf.LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED.key -> s"$flag") {
+        if (SQLConf.get.legacyMsSqlServerNumericMappingEnabled) {
+          assert(msSqlServerDialect.getJDBCType(ShortType).map(_.databaseTypeDefinition).isEmpty)
+        } else {
+          assert(msSqlServerDialect.getJDBCType(ShortType).map(_.databaseTypeDefinition).get ==
+            "SMALLINT")
+        }
+      }
+    }
   }
 
   test("SPARK-28152 MsSqlServerDialect catalyst type mapping") {
     val msSqlServerDialect = JdbcDialects.get("jdbc:sqlserver")
     val metadata = new MetadataBuilder().putLong("scale", 1)
-    assert(msSqlServerDialect.getCatalystType(java.sql.Types.SMALLINT, "SMALLINT", 1,
-      metadata).get == ShortType)
-    assert(msSqlServerDialect.getCatalystType(java.sql.Types.REAL, "REAL", 1,
-      metadata).get == FloatType)
+
+    Seq(true, false).foreach { flag =>
+      withSQLConf(SQLConf.LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED.key -> s"$flag") {
+        if (SQLConf.get.legacyMsSqlServerNumericMappingEnabled) {
+          assert(msSqlServerDialect.getCatalystType(java.sql.Types.SMALLINT, "SMALLINT", 1,
+            metadata).isEmpty)
+          assert(msSqlServerDialect.getCatalystType(java.sql.Types.REAL, "REAL", 1,
+            metadata).isEmpty)
+        } else {
+          assert(msSqlServerDialect.getCatalystType(java.sql.Types.SMALLINT, "SMALLINT", 1,
+            metadata).get == ShortType)
+          assert(msSqlServerDialect.getCatalystType(java.sql.Types.REAL, "REAL", 1,
+            metadata).get == FloatType)
+        }
+      }
+    }
   }
 
   test("table exists query by jdbc dialect") {
@@ -983,7 +994,7 @@ class JDBCSuite extends QueryTest
 
   test("test credentials in the properties are not in plan output") {
     val df = sql("SELECT * FROM parts")
-    val explain = ExplainCommand(df.queryExecution.logical, extended = true)
+    val explain = ExplainCommand(df.queryExecution.logical, ExtendedMode)
     spark.sessionState.executePlan(explain).executedPlan.executeCollect().foreach {
       r => assert(!List("testPass", "testUser").exists(r.toString.contains))
     }
@@ -996,7 +1007,7 @@ class JDBCSuite extends QueryTest
 
   test("test credentials in the connection url are not in the plan output") {
     val df = spark.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", new Properties())
-    val explain = ExplainCommand(df.queryExecution.logical, extended = true)
+    val explain = ExplainCommand(df.queryExecution.logical, ExtendedMode)
     spark.sessionState.executePlan(explain).executedPlan.executeCollect().foreach {
       r => assert(!List("testPass", "testUser").exists(r.toString.contains))
     }
@@ -1018,7 +1029,7 @@ class JDBCSuite extends QueryTest
              | password '$password')
            """.stripMargin)
 
-        val explain = ExplainCommand(df.queryExecution.logical, extended = true)
+        val explain = ExplainCommand(df.queryExecution.logical, ExtendedMode)
         spark.sessionState.executePlan(explain).executedPlan.executeCollect().foreach { r =>
           assert(!r.toString.contains(password))
         }
@@ -1658,4 +1669,34 @@ class JDBCSuite extends QueryTest
       }
     }
   }
+
+  test("Add exception when isolationLevel is Illegal") {
+    val e = intercept[IllegalArgumentException] {
+      spark.read.format("jdbc")
+        .option("Url", urlWithUserAndPass)
+        .option("dbTable", "test.people")
+        .option("isolationLevel", "test")
+        .load()
+    }.getMessage
+    assert(e.contains(
+      "Invalid value `test` for parameter `isolationLevel`. This can be " +
+      "`NONE`, `READ_UNCOMMITTED`, `READ_COMMITTED`, `REPEATABLE_READ` or `SERIALIZABLE`."))
+  }
+
+  test("SPARK-28552: Case-insensitive database URLs in JdbcDialect") {
+    assert(JdbcDialects.get("jdbc:mysql://localhost/db") === MySQLDialect)
+    assert(JdbcDialects.get("jdbc:MySQL://localhost/db") === MySQLDialect)
+    assert(JdbcDialects.get("jdbc:postgresql://localhost/db") === PostgresDialect)
+    assert(JdbcDialects.get("jdbc:postGresql://localhost/db") === PostgresDialect)
+    assert(JdbcDialects.get("jdbc:db2://localhost/db") === DB2Dialect)
+    assert(JdbcDialects.get("jdbc:DB2://localhost/db") === DB2Dialect)
+    assert(JdbcDialects.get("jdbc:sqlserver://localhost/db") === MsSqlServerDialect)
+    assert(JdbcDialects.get("jdbc:sqlServer://localhost/db") === MsSqlServerDialect)
+    assert(JdbcDialects.get("jdbc:derby://localhost/db") === DerbyDialect)
+    assert(JdbcDialects.get("jdbc:derBy://localhost/db") === DerbyDialect)
+    assert(JdbcDialects.get("jdbc:oracle://localhost/db") === OracleDialect)
+    assert(JdbcDialects.get("jdbc:Oracle://localhost/db") === OracleDialect)
+    assert(JdbcDialects.get("jdbc:teradata://localhost/db") === TeradataDialect)
+    assert(JdbcDialects.get("jdbc:Teradata://localhost/db") === TeradataDialect)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index b28c6531d42b2..8021ef1a17a18 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -21,10 +21,12 @@ import java.sql.DriverManager
 import java.util.Properties
 
 import scala.collection.JavaConverters.propertiesAsScalaMapConverter
+import scala.collection.mutable.ArrayBuffer
 
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.SparkException
+import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
@@ -543,4 +545,57 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
     }.getMessage
     assert(errMsg.contains("Statement was canceled or the session timed out"))
   }
+
+  test("metrics") {
+    val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
+    val df2 = spark.createDataFrame(sparkContext.parallelize(arr1x2), schema2)
+
+    runAndVerifyRecordsWritten(2) {
+      df.write.mode(SaveMode.Append).jdbc(url, "TEST.BASICCREATETEST", new Properties())
+    }
+
+    runAndVerifyRecordsWritten(1) {
+      df2.write.mode(SaveMode.Overwrite).jdbc(url, "TEST.BASICCREATETEST", new Properties())
+    }
+
+    runAndVerifyRecordsWritten(1) {
+      df2.write.mode(SaveMode.Overwrite).option("truncate", true)
+        .jdbc(url, "TEST.BASICCREATETEST", new Properties())
+    }
+
+    runAndVerifyRecordsWritten(0) {
+      intercept[AnalysisException] {
+        df2.write.mode(SaveMode.ErrorIfExists).jdbc(url, "TEST.BASICCREATETEST", new Properties())
+      }
+    }
+
+    runAndVerifyRecordsWritten(0) {
+      df.write.mode(SaveMode.Ignore).jdbc(url, "TEST.BASICCREATETEST", new Properties())
+    }
+  }
+
+  private def runAndVerifyRecordsWritten(expected: Long)(job: => Unit): Unit = {
+    assert(expected === runAndReturnMetrics(job, _.taskMetrics.outputMetrics.recordsWritten))
+  }
+
+  private def runAndReturnMetrics(job: => Unit, collector: (SparkListenerTaskEnd) => Long): Long = {
+    val taskMetrics = new ArrayBuffer[Long]()
+
+    // Avoid receiving earlier taskEnd events
+    sparkContext.listenerBus.waitUntilEmpty()
+
+    val listener = new SparkListener() {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
+        taskMetrics += collector(taskEnd)
+      }
+    }
+    sparkContext.addSparkListener(listener)
+
+    job
+
+    sparkContext.listenerBus.waitUntilEmpty()
+
+    sparkContext.removeSparkListener(listener)
+    taskMetrics.sum
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 7043b6d396977..c7266c886128c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.{DataSourceScanExec, SortExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.execution.datasources.BucketingUtils
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
@@ -382,8 +383,16 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
           joined.sort("bucketed_table1.k", "bucketed_table2.k"),
           df1.join(df2, joinCondition(df1, df2), joinType).sort("df1.k", "df2.k"))
 
-        assert(joined.queryExecution.executedPlan.isInstanceOf[SortMergeJoinExec])
-        val joinOperator = joined.queryExecution.executedPlan.asInstanceOf[SortMergeJoinExec]
+        val joinOperator = if (joined.sqlContext.conf.adaptiveExecutionEnabled) {
+          val executedPlan =
+            joined.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec].executedPlan
+          assert(executedPlan.isInstanceOf[SortMergeJoinExec])
+          executedPlan.asInstanceOf[SortMergeJoinExec]
+        } else {
+          val executedPlan = joined.queryExecution.executedPlan
+          assert(executedPlan.isInstanceOf[SortMergeJoinExec])
+          executedPlan.asInstanceOf[SortMergeJoinExec]
+        }
 
         // check existence of shuffle
         assert(
@@ -595,6 +604,20 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("bucket join should work with SubqueryAlias plan") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0") {
+      withTable("t") {
+        withView("v") {
+          spark.range(20).selectExpr("id as i").write.bucketBy(8, "i").saveAsTable("t")
+          sql("CREATE VIEW v AS SELECT * FROM t").collect()
+
+          val plan = sql("SELECT * FROM t a JOIN v b ON a.i = b.i").queryExecution.executedPlan
+          assert(plan.collect { case exchange: ShuffleExchangeExec => exchange }.isEmpty)
+        }
+      }
+    }
+  }
+
   test("avoid shuffle when grouping keys are a super-set of bucket keys") {
     withTable("bucketed_table") {
       df1.write.format("parquet").bucketBy(8, "i").saveAsTable("bucketed_table")
@@ -795,4 +818,22 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("SPARK-29655 Read bucketed tables obeys spark.sql.shuffle.partitions") {
+    withSQLConf(
+      SQLConf.SHUFFLE_PARTITIONS.key -> "5",
+      SQLConf.SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS.key -> "7")  {
+      val bucketSpec = Some(BucketSpec(6, Seq("i", "j"), Nil))
+      Seq(false, true).foreach { enableAdaptive =>
+        withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> s"$enableAdaptive") {
+          val bucketedTableTestSpecLeft = BucketedTableTestSpec(bucketSpec, expectedShuffle = false)
+          val bucketedTableTestSpecRight = BucketedTableTestSpec(None, expectedShuffle = true)
+          testBucketing(
+            bucketedTableTestSpecLeft = bucketedTableTestSpecLeft,
+            bucketedTableTestSpecRight = bucketedTableTestSpecRight,
+            joinCondition = joinCondition(Seq("i", "j"))
+          )
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 08f0865c1e128..983209051c8ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -183,7 +183,7 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
       }.getMessage
 
       assert(error.contains("Operation not allowed") &&
-        error.contains("CREATE EXTERNAL TABLE ... USING"))
+        error.contains("CREATE EXTERNAL TABLE ..."))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
index 1ece98aa7eb3a..7c10f9950f8eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
@@ -26,7 +26,8 @@ import org.apache.spark.unsafe.types.UTF8String
 
 private[sql] abstract class DataSourceTest extends QueryTest {
 
-  protected def sqlTest(sqlString: String, expectedAnswer: Seq[Row], enableRegex: Boolean = false) {
+  protected def sqlTest(sqlString: String, expectedAnswer: Seq[Row],
+      enableRegex: Boolean = false): Unit = {
     test(sqlString) {
       withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> enableRegex.toString) {
         checkAnswer(spark.sql(sqlString), expectedAnswer)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/ExternalCommandRunnerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/ExternalCommandRunnerSuite.scala
new file mode 100644
index 0000000000000..55fb3eb8ade35
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/ExternalCommandRunnerSuite.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.connector.ExternalCommandRunner
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class ExternalCommandRunnerSuite extends QueryTest with SharedSparkSession {
+  test("execute command") {
+    try {
+      System.setProperty("command", "hello")
+      assert(System.getProperty("command") === "hello")
+
+      val options = Map("one" -> "1", "two" -> "2")
+      val df = spark.executeCommand(classOf[FakeCommandRunner].getName, "world", options)
+      // executeCommand should execute the command eagerly
+      assert(System.getProperty("command") === "world")
+      checkAnswer(df, Seq(Row("one"), Row("two")))
+    } finally {
+      System.clearProperty("command")
+    }
+  }
+}
+
+class FakeCommandRunner extends ExternalCommandRunner {
+
+  override def executeCommand(command: String, options: CaseInsensitiveStringMap): Array[String] = {
+    System.setProperty("command", command)
+    options.keySet().iterator().asScala.toSeq.sorted.toArray
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 0d236a43ece6b..bcff30a51c3f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.sources
 import java.io.File
 import java.sql.Date
 
+import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataOutputStream, Path, RawLocalFileSystem}
+
 import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -268,6 +270,55 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       "INSERT OVERWRITE to a table while querying it should not be allowed.")
   }
 
+  test("SPARK-30112: it is allowed to write to a table while querying it for " +
+    "dynamic partition overwrite.") {
+    Seq(PartitionOverwriteMode.DYNAMIC.toString,
+        PartitionOverwriteMode.STATIC.toString).foreach { mode =>
+      withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> mode) {
+        withTable("insertTable") {
+          sql(
+            """
+              |CREATE TABLE insertTable(i int, part1 int, part2 int) USING PARQUET
+              |PARTITIONED BY (part1, part2)
+            """.stripMargin)
+
+          sql("INSERT INTO TABLE insertTable PARTITION(part1=1, part2=1) SELECT 1")
+          checkAnswer(spark.table("insertTable"), Row(1, 1, 1))
+          sql("INSERT OVERWRITE TABLE insertTable PARTITION(part1=1, part2=2) SELECT 2")
+          checkAnswer(spark.table("insertTable"), Row(1, 1, 1) :: Row(2, 1, 2) :: Nil)
+
+          if (mode == PartitionOverwriteMode.DYNAMIC.toString) {
+            sql(
+              """
+                |INSERT OVERWRITE TABLE insertTable PARTITION(part1=1, part2)
+                |SELECT i + 1, part2 FROM insertTable
+              """.stripMargin)
+            checkAnswer(spark.table("insertTable"), Row(2, 1, 1) :: Row(3, 1, 2) :: Nil)
+
+            sql(
+              """
+                |INSERT OVERWRITE TABLE insertTable PARTITION(part1=1, part2)
+                |SELECT i + 1, part2 + 1 FROM insertTable
+              """.stripMargin)
+            checkAnswer(spark.table("insertTable"),
+              Row(2, 1, 1) :: Row(3, 1, 2) :: Row(4, 1, 3) :: Nil)
+          } else {
+            val message = intercept[AnalysisException] {
+              sql(
+                """
+                  |INSERT OVERWRITE TABLE insertTable PARTITION(part1=1, part2)
+                  |SELECT i + 1, part2 FROM insertTable
+                """.stripMargin)
+            }.getMessage
+            assert(
+              message.contains("Cannot overwrite a path that is also being read from."),
+              "INSERT OVERWRITE to a table while querying it should not be allowed.")
+          }
+        }
+      }
+    }
+  }
+
   test("Caching")  {
     // write something to the jsonTable
     sql(
@@ -470,6 +521,20 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
+  test("new partitions should be added to catalog after writing to catalog table") {
+    val table = "partitioned_catalog_table"
+    val numParts = 210
+    withTable(table) {
+      val df = (1 to numParts).map(i => (i, i)).toDF("part", "col1")
+      val tempTable = "partitioned_catalog_temp_table"
+      df.createOrReplaceTempView(tempTable)
+      sql(s"CREATE TABLE $table (part Int, col1 Int) USING parquet PARTITIONED BY (part)")
+      sql(s"INSERT INTO TABLE $table SELECT * from $tempTable")
+      val partitions = spark.sessionState.catalog.listPartitionNames(TableIdentifier(table))
+      assert(partitions.size == numParts)
+    }
+  }
+
   test("SPARK-20236: dynamic partition overwrite without catalog table") {
     withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString) {
       withTempPath { path =>
@@ -634,6 +699,60 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
+  test("Throw exceptions on inserting out-of-range int value with ANSI casting policy") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.ANSI.toString) {
+      withTable("t") {
+        sql("create table t(b int) using parquet")
+        val outOfRangeValue1 = (Int.MaxValue + 1L).toString
+        var msg = intercept[SparkException] {
+          sql(s"insert into t values($outOfRangeValue1)")
+        }.getCause.getMessage
+        assert(msg.contains(s"Casting $outOfRangeValue1 to int causes overflow"))
+
+        val outOfRangeValue2 = (Int.MinValue - 1L).toString
+        msg = intercept[SparkException] {
+          sql(s"insert into t values($outOfRangeValue2)")
+        }.getCause.getMessage
+        assert(msg.contains(s"Casting $outOfRangeValue2 to int causes overflow"))
+      }
+    }
+  }
+
+  test("Throw exceptions on inserting out-of-range long value with ANSI casting policy") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.ANSI.toString) {
+      withTable("t") {
+        sql("create table t(b long) using parquet")
+        val outOfRangeValue1 = Math.nextUp(Long.MaxValue)
+        var msg = intercept[SparkException] {
+          sql(s"insert into t values(${outOfRangeValue1}D)")
+        }.getCause.getMessage
+        assert(msg.contains(s"Casting $outOfRangeValue1 to long causes overflow"))
+
+        val outOfRangeValue2 = Math.nextDown(Long.MinValue)
+        msg = intercept[SparkException] {
+          sql(s"insert into t values(${outOfRangeValue2}D)")
+        }.getCause.getMessage
+        assert(msg.contains(s"Casting $outOfRangeValue2 to long causes overflow"))
+      }
+    }
+  }
+
+  test("Throw exceptions on inserting out-of-range decimal value with ANSI casting policy") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.ANSI.toString) {
+      withTable("t") {
+        sql("create table t(b decimal(3,2)) using parquet")
+        val outOfRangeValue = "123.45"
+        val msg = intercept[SparkException] {
+          sql(s"insert into t values(${outOfRangeValue})")
+        }.getCause.getMessage
+        assert(msg.contains("cannot be represented as Decimal(3, 2)"))
+      }
+    }
+  }
+
   test("SPARK-24860: dynamic partition overwrite specified per source without catalog table") {
     withTempPath { path =>
       Seq((1, 1), (2, 2)).toDF("i", "part")
@@ -675,7 +794,41 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       spark.sessionState.catalog.createTable(newTable, false)
 
       sql("INSERT INTO TABLE test_table SELECT 1, 'a'")
-      sql("INSERT INTO TABLE test_table SELECT 2, null")
+      val msg = intercept[AnalysisException] {
+        sql("INSERT INTO TABLE test_table SELECT 2, null")
+      }.getMessage
+      assert(msg.contains("Cannot write nullable values to non-null column 's'"))
     }
   }
+
+  test("Stop task set if FileAlreadyExistsException was thrown") {
+    withSQLConf("fs.file.impl" -> classOf[FileExistingTestFileSystem].getName,
+        "fs.file.impl.disable.cache" -> "true") {
+      withTable("t") {
+        sql(
+          """
+            |CREATE TABLE t(i INT, part1 INT) USING PARQUET
+            |PARTITIONED BY (part1)
+          """.stripMargin)
+
+        val df = Seq((1, 1)).toDF("i", "part1")
+        val err = intercept[SparkException] {
+          df.write.mode("overwrite").format("parquet").insertInto("t")
+        }
+        assert(err.getCause.getMessage.contains("can not write to output file: " +
+          "org.apache.hadoop.fs.FileAlreadyExistsException"))
+      }
+    }
+  }
+}
+
+class FileExistingTestFileSystem extends RawLocalFileSystem {
+  override def create(
+      f: Path,
+      overwrite: Boolean,
+      bufferSize: Int,
+      replication: Short,
+      blockSize: Long): FSDataOutputStream = {
+    throw new FileAlreadyExistsException(s"${f.toString} already exists")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
index 87dce376a09dd..9b26a5659df49 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
@@ -123,7 +123,8 @@ class PathOptionSuite extends DataSourceTest with SharedSparkSession {
            |USING ${classOf[TestOptionsSource].getCanonicalName}
            |OPTIONS (PATH '/tmp/path')""".stripMargin)
       sql("ALTER TABLE src SET LOCATION '/tmp/path2'")
-      assert(getPathOption("src").map(makeQualifiedPath) == Some(makeQualifiedPath("/tmp/path2")))
+      assert(getPathOption("src") ==
+        Some(CatalogUtils.URIToString(makeQualifiedPath("/tmp/path2"))))
     }
 
     withTable("src", "src2") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
index d99c605b2e478..f242f75f39f20 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala
@@ -115,6 +115,10 @@ class PrunedScanSuite extends DataSourceTest with SharedSparkSession {
   testPruning("SELECT b, b FROM oneToTenPruned", "b")
   testPruning("SELECT a FROM oneToTenPruned", "a")
   testPruning("SELECT b FROM oneToTenPruned", "b")
+  testPruning("SELECT a, rand() FROM oneToTenPruned WHERE a > 5", "a")
+  testPruning("SELECT a FROM oneToTenPruned WHERE rand() > 0.5", "a")
+  testPruning("SELECT a, rand() FROM oneToTenPruned WHERE rand() > 0.5", "a")
+  testPruning("SELECT a, rand() FROM oneToTenPruned WHERE b > 5", "a", "b")
 
   def testPruning(sqlString: String, expectedColumns: String*): Unit = {
     test(s"Columns output ${expectedColumns.mkString(",")}: $sqlString") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index d4e117953942e..9a95bf770772e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -358,7 +358,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
     // Make sure we do throw correct exception when users use a relation provider that
     // only implements the RelationProvider or the SchemaRelationProvider.
     Seq("TEMPORARY VIEW", "TABLE").foreach { tableType =>
-      val schemaNotAllowed = intercept[Exception] {
+      val schemaNotMatch = intercept[Exception] {
         sql(
           s"""
              |CREATE $tableType relationProvierWithSchema (i int)
@@ -369,7 +369,8 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
              |)
            """.stripMargin)
       }
-      assert(schemaNotAllowed.getMessage.contains("does not allow user-specified schemas"))
+      assert(schemaNotMatch.getMessage.contains(
+        "The user-specified schema doesn't match the actual schema"))
 
       val schemaNeeded = intercept[Exception] {
         sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala
deleted file mode 100644
index b6e7bc5d1a4dc..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala
+++ /dev/null
@@ -1,962 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.sources.v2
-
-import scala.collection.JavaConverters._
-
-import org.apache.spark.SparkException
-import org.apache.spark.sql._
-import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, TableCatalog}
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchDatabaseException, NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog, StagingInMemoryTableCatalog}
-import org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG
-import org.apache.spark.sql.sources.v2.internal.V1Table
-import org.apache.spark.sql.types.{ArrayType, BooleanType, DoubleType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType}
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
-
-class DataSourceV2SQLSuite
-  extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = true)
-  with AlterTableTests {
-
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-
-  private val v2Source = classOf[FakeV2Provider].getName
-  override protected val v2Format = v2Source
-  override protected val catalogAndNamespace = "testcat.ns1.ns2."
-
-  private def catalog(name: String): CatalogPlugin = {
-    spark.sessionState.catalogManager.catalog(name)
-  }
-
-  protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = {
-    val tmpView = "tmp_view"
-    withTempView(tmpView) {
-      insert.createOrReplaceTempView(tmpView)
-      val overwrite = if (mode == SaveMode.Overwrite) "OVERWRITE" else "INTO"
-      sql(s"INSERT $overwrite TABLE $tableName SELECT * FROM $tmpView")
-    }
-  }
-
-  override def verifyTable(tableName: String, expected: DataFrame): Unit = {
-    checkAnswer(spark.table(tableName), expected)
-  }
-
-  override def getTableMetadata(tableName: String): Table = {
-    val nameParts = spark.sessionState.sqlParser.parseMultipartIdentifier(tableName)
-    val v2Catalog = catalog(nameParts.head).asTableCatalog
-    val namespace = nameParts.drop(1).init.toArray
-    v2Catalog.loadTable(Identifier.of(namespace, nameParts.last))
-  }
-
-  before {
-    spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
-    spark.conf.set(
-        "spark.sql.catalog.testcat_atomic", classOf[StagingInMemoryTableCatalog].getName)
-    spark.conf.set("spark.sql.catalog.testcat2", classOf[InMemoryTableCatalog].getName)
-    spark.conf.set(V2_SESSION_CATALOG.key, classOf[InMemoryTableSessionCatalog].getName)
-
-    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
-    df.createOrReplaceTempView("source")
-    val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
-    df2.createOrReplaceTempView("source2")
-  }
-
-  after {
-    spark.sessionState.catalog.reset()
-    spark.sessionState.catalogManager.reset()
-    spark.sessionState.conf.clear()
-  }
-
-  test("CreateTable: use v2 plan because catalog is set") {
-    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
-
-    val testCatalog = catalog("testcat").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-
-    assert(table.name == "testcat.table_name")
-    assert(table.partitioning.isEmpty)
-    assert(table.properties == Map("provider" -> "foo").asJava)
-    assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
-
-    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
-  }
-
-  test("DescribeTable using v2 catalog") {
-    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string)" +
-      " USING foo" +
-      " PARTITIONED BY (id)")
-    val descriptionDf = spark.sql("DESCRIBE TABLE testcat.table_name")
-    assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
-      Seq(
-        ("col_name", StringType),
-        ("data_type", StringType),
-        ("comment", StringType)))
-    val description = descriptionDf.collect()
-    assert(description === Seq(
-      Row("id", "bigint", ""),
-      Row("data", "string", "")))
-  }
-
-  test("DescribeTable with v2 catalog when table does not exist.") {
-    intercept[AnalysisException] {
-      spark.sql("DESCRIBE TABLE testcat.table_name")
-    }
-  }
-
-  test("DescribeTable extended using v2 catalog") {
-    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string)" +
-      " USING foo" +
-      " PARTITIONED BY (id)" +
-      " TBLPROPERTIES ('bar'='baz')")
-    val descriptionDf = spark.sql("DESCRIBE TABLE EXTENDED testcat.table_name")
-    assert(descriptionDf.schema.map(field => (field.name, field.dataType))
-      === Seq(
-        ("col_name", StringType),
-        ("data_type", StringType),
-        ("comment", StringType)))
-    assert(descriptionDf.collect()
-      .map(_.toSeq)
-      .map(_.toArray.map(_.toString.trim)) === Array(
-      Array("id", "bigint", ""),
-      Array("data", "string", ""),
-      Array("", "", ""),
-      Array("Partitioning", "", ""),
-      Array("--------------", "", ""),
-      Array("Part 0", "id", ""),
-      Array("", "", ""),
-      Array("Table Property", "Value", ""),
-      Array("----------------", "-------", ""),
-      Array("bar", "baz", ""),
-      Array("provider", "foo", "")))
-
-  }
-
-  test("CreateTable: use v2 plan and session catalog when provider is v2") {
-    spark.sql(s"CREATE TABLE table_name (id bigint, data string) USING $v2Source")
-
-    val testCatalog = catalog("session").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-
-    assert(table.name == "default.table_name")
-    assert(table.partitioning.isEmpty)
-    assert(table.properties == Map("provider" -> v2Source).asJava)
-    assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
-
-    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
-  }
-
-  test("CreateTable: fail if table exists") {
-    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
-
-    val testCatalog = catalog("testcat").asTableCatalog
-
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(table.name == "testcat.table_name")
-    assert(table.partitioning.isEmpty)
-    assert(table.properties == Map("provider" -> "foo").asJava)
-    assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
-
-    // run a second create query that should fail
-    val exc = intercept[TableAlreadyExistsException] {
-      spark.sql("CREATE TABLE testcat.table_name (id bigint, data string, id2 bigint) USING bar")
-    }
-
-    assert(exc.getMessage.contains("table_name"))
-
-    // table should not have changed
-    val table2 = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(table2.name == "testcat.table_name")
-    assert(table2.partitioning.isEmpty)
-    assert(table2.properties == Map("provider" -> "foo").asJava)
-    assert(table2.schema == new StructType().add("id", LongType).add("data", StringType))
-
-    // check that the table is still empty
-    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
-  }
-
-  test("CreateTable: if not exists") {
-    spark.sql(
-      "CREATE TABLE IF NOT EXISTS testcat.table_name (id bigint, data string) USING foo")
-
-    val testCatalog = catalog("testcat").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-
-    assert(table.name == "testcat.table_name")
-    assert(table.partitioning.isEmpty)
-    assert(table.properties == Map("provider" -> "foo").asJava)
-    assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
-
-    spark.sql("CREATE TABLE IF NOT EXISTS testcat.table_name (id bigint, data string) USING bar")
-
-    // table should not have changed
-    val table2 = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(table2.name == "testcat.table_name")
-    assert(table2.partitioning.isEmpty)
-    assert(table2.properties == Map("provider" -> "foo").asJava)
-    assert(table2.schema == new StructType().add("id", LongType).add("data", StringType))
-
-    // check that the table is still empty
-    val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), Seq.empty)
-  }
-
-  test("CreateTable: use default catalog for v2 sources when default catalog is set") {
-    spark.conf.set("spark.sql.default.catalog", "testcat")
-    spark.sql(s"CREATE TABLE table_name (id bigint, data string) USING foo")
-
-    val testCatalog = catalog("testcat").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-
-    assert(table.name == "testcat.table_name")
-    assert(table.partitioning.isEmpty)
-    assert(table.properties == Map("provider" -> "foo").asJava)
-    assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
-
-    // check that the table is empty
-    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
-  }
-
-  test("CreateTableAsSelect: use v2 plan because catalog is set") {
-    val basicCatalog = catalog("testcat").asTableCatalog
-    val atomicCatalog = catalog("testcat_atomic").asTableCatalog
-    val basicIdentifier = "testcat.table_name"
-    val atomicIdentifier = "testcat_atomic.table_name"
-
-    Seq((basicCatalog, basicIdentifier), (atomicCatalog, atomicIdentifier)).foreach {
-      case (catalog, identifier) =>
-        spark.sql(s"CREATE TABLE $identifier USING foo AS SELECT id, data FROM source")
-
-        val table = catalog.loadTable(Identifier.of(Array(), "table_name"))
-
-        assert(table.name == identifier)
-        assert(table.partitioning.isEmpty)
-        assert(table.properties == Map("provider" -> "foo").asJava)
-        assert(table.schema == new StructType()
-          .add("id", LongType)
-          .add("data", StringType))
-
-        val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-        checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
-    }
-  }
-
-  test("ReplaceTableAsSelect: basic v2 implementation.") {
-    val basicCatalog = catalog("testcat").asTableCatalog
-    val atomicCatalog = catalog("testcat_atomic").asTableCatalog
-    val basicIdentifier = "testcat.table_name"
-    val atomicIdentifier = "testcat_atomic.table_name"
-
-    Seq((basicCatalog, basicIdentifier), (atomicCatalog, atomicIdentifier)).foreach {
-      case (catalog, identifier) =>
-        spark.sql(s"CREATE TABLE $identifier USING foo AS SELECT id, data FROM source")
-        val originalTable = catalog.loadTable(Identifier.of(Array(), "table_name"))
-
-        spark.sql(s"REPLACE TABLE $identifier USING foo AS SELECT id FROM source")
-        val replacedTable = catalog.loadTable(Identifier.of(Array(), "table_name"))
-
-        assert(replacedTable != originalTable, "Table should have been replaced.")
-        assert(replacedTable.name == identifier)
-        assert(replacedTable.partitioning.isEmpty)
-        assert(replacedTable.properties == Map("provider" -> "foo").asJava)
-        assert(replacedTable.schema == new StructType().add("id", LongType))
-
-        val rdd = spark.sparkContext.parallelize(replacedTable.asInstanceOf[InMemoryTable].rows)
-        checkAnswer(
-          spark.internalCreateDataFrame(rdd, replacedTable.schema),
-          spark.table("source").select("id"))
-    }
-  }
-
-  test("ReplaceTableAsSelect: Non-atomic catalog drops the table if the write fails.") {
-    spark.sql("CREATE TABLE testcat.table_name USING foo AS SELECT id, data FROM source")
-    val testCatalog = catalog("testcat").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(table.asInstanceOf[InMemoryTable].rows.nonEmpty)
-
-    intercept[Exception] {
-      spark.sql("REPLACE TABLE testcat.table_name" +
-        s" USING foo OPTIONS (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}`=true)" +
-        s" AS SELECT id FROM source")
-    }
-
-    assert(!testCatalog.tableExists(Identifier.of(Array(), "table_name")),
-        "Table should have been dropped as a result of the replace.")
-  }
-
-  test("ReplaceTableAsSelect: Non-atomic catalog drops the table permanently if the" +
-    " subsequent table creation fails.") {
-    spark.sql("CREATE TABLE testcat.table_name USING foo AS SELECT id, data FROM source")
-    val testCatalog = catalog("testcat").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(table.asInstanceOf[InMemoryTable].rows.nonEmpty)
-
-    intercept[Exception] {
-      spark.sql("REPLACE TABLE testcat.table_name" +
-        s" USING foo" +
-        s" TBLPROPERTIES (`${InMemoryTableCatalog.SIMULATE_FAILED_CREATE_PROPERTY}`=true)" +
-        s" AS SELECT id FROM source")
-    }
-
-    assert(!testCatalog.tableExists(Identifier.of(Array(), "table_name")),
-      "Table should have been dropped and failed to be created.")
-  }
-
-  test("ReplaceTableAsSelect: Atomic catalog does not drop the table when replace fails.") {
-    spark.sql("CREATE TABLE testcat_atomic.table_name USING foo AS SELECT id, data FROM source")
-    val testCatalog = catalog("testcat_atomic").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-
-    intercept[Exception] {
-      spark.sql("REPLACE TABLE testcat_atomic.table_name" +
-        s" USING foo OPTIONS (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}=true)" +
-        s" AS SELECT id FROM source")
-    }
-
-    var maybeReplacedTable = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(maybeReplacedTable === table, "Table should not have changed.")
-
-    intercept[Exception] {
-      spark.sql("REPLACE TABLE testcat_atomic.table_name" +
-        s" USING foo" +
-        s" TBLPROPERTIES (`${InMemoryTableCatalog.SIMULATE_FAILED_CREATE_PROPERTY}`=true)" +
-        s" AS SELECT id FROM source")
-    }
-
-    maybeReplacedTable = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(maybeReplacedTable === table, "Table should not have changed.")
-  }
-
-  test("ReplaceTable: Erases the table contents and changes the metadata.") {
-    spark.sql(s"CREATE TABLE testcat.table_name USING $v2Source AS SELECT id, data FROM source")
-
-    val testCatalog = catalog("testcat").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(table.asInstanceOf[InMemoryTable].rows.nonEmpty)
-
-    spark.sql("REPLACE TABLE testcat.table_name (id bigint) USING foo")
-    val replaced = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-
-    assert(replaced.asInstanceOf[InMemoryTable].rows.isEmpty,
-        "Replaced table should have no rows after committing.")
-    assert(replaced.schema().fields.length === 1,
-        "Replaced table should have new schema.")
-    assert(replaced.schema().fields(0).name === "id",
-      "Replaced table should have new schema.")
-  }
-
-  test("ReplaceTableAsSelect: CREATE OR REPLACE new table has same behavior as CTAS.") {
-    Seq("testcat", "testcat_atomic").foreach { catalogName =>
-      spark.sql(
-        s"""
-           |CREATE TABLE $catalogName.created USING $v2Source
-           |AS SELECT id, data FROM source
-         """.stripMargin)
-      spark.sql(
-        s"""
-           |CREATE OR REPLACE TABLE $catalogName.replaced USING $v2Source
-           |AS SELECT id, data FROM source
-         """.stripMargin)
-
-      val testCatalog = catalog(catalogName).asTableCatalog
-      val createdTable = testCatalog.loadTable(Identifier.of(Array(), "created"))
-      val replacedTable = testCatalog.loadTable(Identifier.of(Array(), "replaced"))
-
-      assert(createdTable.asInstanceOf[InMemoryTable].rows ===
-        replacedTable.asInstanceOf[InMemoryTable].rows)
-      assert(createdTable.schema === replacedTable.schema)
-    }
-  }
-
-  test("ReplaceTableAsSelect: REPLACE TABLE throws exception if table does not exist.") {
-    Seq("testcat", "testcat_atomic").foreach { catalog =>
-      spark.sql(s"CREATE TABLE $catalog.created USING $v2Source AS SELECT id, data FROM source")
-      intercept[CannotReplaceMissingTableException] {
-        spark.sql(s"REPLACE TABLE $catalog.replaced USING $v2Source AS SELECT id, data FROM source")
-      }
-    }
-  }
-
-  test("ReplaceTableAsSelect: REPLACE TABLE throws exception if table is dropped before commit.") {
-    import InMemoryTableCatalog._
-    spark.sql(s"CREATE TABLE testcat_atomic.created USING $v2Source AS SELECT id, data FROM source")
-    intercept[CannotReplaceMissingTableException] {
-      spark.sql(s"REPLACE TABLE testcat_atomic.replaced" +
-        s" USING $v2Source" +
-        s" TBLPROPERTIES (`$SIMULATE_DROP_BEFORE_REPLACE_PROPERTY`=true)" +
-        s" AS SELECT id, data FROM source")
-    }
-  }
-
-  test("CreateTableAsSelect: use v2 plan and session catalog when provider is v2") {
-    spark.sql(s"CREATE TABLE table_name USING $v2Source AS SELECT id, data FROM source")
-
-    val testCatalog = catalog("session").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-
-    assert(table.name == "default.table_name")
-    assert(table.partitioning.isEmpty)
-    assert(table.properties == Map("provider" -> v2Source).asJava)
-    assert(table.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
-
-    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
-  }
-
-  test("CreateTableAsSelect: fail if table exists") {
-    spark.sql("CREATE TABLE testcat.table_name USING foo AS SELECT id, data FROM source")
-
-    val testCatalog = catalog("testcat").asTableCatalog
-
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(table.name == "testcat.table_name")
-    assert(table.partitioning.isEmpty)
-    assert(table.properties == Map("provider" -> "foo").asJava)
-    assert(table.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
-
-    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
-
-    // run a second CTAS query that should fail
-    val exc = intercept[TableAlreadyExistsException] {
-      spark.sql(
-        "CREATE TABLE testcat.table_name USING bar AS SELECT id, data, id as id2 FROM source2")
-    }
-
-    assert(exc.getMessage.contains("table_name"))
-
-    // table should not have changed
-    val table2 = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-    assert(table2.name == "testcat.table_name")
-    assert(table2.partitioning.isEmpty)
-    assert(table2.properties == Map("provider" -> "foo").asJava)
-    assert(table2.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
-
-    val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), spark.table("source"))
-  }
-
-  test("CreateTableAsSelect: if not exists") {
-    spark.sql(
-      "CREATE TABLE IF NOT EXISTS testcat.table_name USING foo AS SELECT id, data FROM source")
-
-    val testCatalog = catalog("testcat").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-
-    assert(table.name == "testcat.table_name")
-    assert(table.partitioning.isEmpty)
-    assert(table.properties == Map("provider" -> "foo").asJava)
-    assert(table.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
-
-    val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
-
-    spark.sql(
-      "CREATE TABLE IF NOT EXISTS testcat.table_name USING foo AS SELECT id, data FROM source2")
-
-    // check that the table contains data from just the first CTAS
-    val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), spark.table("source"))
-  }
-
-  test("CreateTableAsSelect: use default catalog for v2 sources when default catalog is set") {
-    spark.conf.set("spark.sql.default.catalog", "testcat")
-
-    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
-    df.createOrReplaceTempView("source")
-
-    // setting the default catalog breaks the reference to source because the default catalog is
-    // used and AsTableIdentifier no longer matches
-    spark.sql(s"CREATE TABLE table_name USING foo AS SELECT id, data FROM source")
-
-    val testCatalog = catalog("testcat").asTableCatalog
-    val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
-
-    assert(table.name == "testcat.table_name")
-    assert(table.partitioning.isEmpty)
-    assert(table.properties == Map("provider" -> "foo").asJava)
-    assert(table.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
-
-    val rdd = sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-    checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
-  }
-
-  test("CreateTableAsSelect: v2 session catalog can load v1 source table") {
-    spark.conf.set(V2_SESSION_CATALOG.key, classOf[V2SessionCatalog].getName)
-
-    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
-    df.createOrReplaceTempView("source")
-
-    sql(s"CREATE TABLE table_name USING parquet AS SELECT id, data FROM source")
-
-    checkAnswer(sql(s"TABLE default.table_name"), spark.table("source"))
-    // The fact that the following line doesn't throw an exception means, the session catalog
-    // can load the table.
-    val t = catalog("session").asTableCatalog
-      .loadTable(Identifier.of(Array.empty, "table_name"))
-    assert(t.isInstanceOf[V1Table], "V1 table wasn't returned as an unresolved table")
-  }
-
-  test("CreateTableAsSelect: nullable schema") {
-    val basicCatalog = catalog("testcat").asTableCatalog
-    val atomicCatalog = catalog("testcat_atomic").asTableCatalog
-    val basicIdentifier = "testcat.table_name"
-    val atomicIdentifier = "testcat_atomic.table_name"
-
-    Seq((basicCatalog, basicIdentifier), (atomicCatalog, atomicIdentifier)).foreach {
-      case (catalog, identifier) =>
-        spark.sql(s"CREATE TABLE $identifier USING foo AS SELECT 1 i")
-
-        val table = catalog.loadTable(Identifier.of(Array(), "table_name"))
-
-        assert(table.name == identifier)
-        assert(table.partitioning.isEmpty)
-        assert(table.properties == Map("provider" -> "foo").asJava)
-        assert(table.schema == new StructType().add("i", "int"))
-
-        val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-        checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Row(1))
-
-        sql(s"INSERT INTO $identifier SELECT CAST(null AS INT)")
-        val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
-        checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), Seq(Row(1), Row(null)))
-    }
-  }
-
-  test("DropTable: basic") {
-    val tableName = "testcat.ns1.ns2.tbl"
-    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
-    sql(s"CREATE TABLE $tableName USING foo AS SELECT id, data FROM source")
-    assert(catalog("testcat").asTableCatalog.tableExists(ident) === true)
-    sql(s"DROP TABLE $tableName")
-    assert(catalog("testcat").asTableCatalog.tableExists(ident) === false)
-  }
-
-  test("DropTable: if exists") {
-    intercept[NoSuchTableException] {
-      sql(s"DROP TABLE testcat.db.notbl")
-    }
-    sql(s"DROP TABLE IF EXISTS testcat.db.notbl")
-  }
-
-  test("Relation: basic") {
-    val t1 = "testcat.ns1.ns2.tbl"
-    withTable(t1) {
-      sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
-      checkAnswer(sql(s"TABLE $t1"), spark.table("source"))
-      checkAnswer(sql(s"SELECT * FROM $t1"), spark.table("source"))
-    }
-  }
-
-  test("Relation: SparkSession.table()") {
-    val t1 = "testcat.ns1.ns2.tbl"
-    withTable(t1) {
-      sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
-      checkAnswer(spark.table(s"$t1"), spark.table("source"))
-    }
-  }
-
-  test("Relation: CTE") {
-    val t1 = "testcat.ns1.ns2.tbl"
-    withTable(t1) {
-      sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
-      checkAnswer(
-        sql(s"""
-          |WITH cte AS (SELECT * FROM $t1)
-          |SELECT * FROM cte
-        """.stripMargin),
-        spark.table("source"))
-    }
-  }
-
-  test("Relation: view text") {
-    val t1 = "testcat.ns1.ns2.tbl"
-    withTable(t1) {
-      withView("view1") { v1: String =>
-        sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
-        sql(s"CREATE VIEW $v1 AS SELECT * from $t1")
-        checkAnswer(sql(s"TABLE $v1"), spark.table("source"))
-      }
-    }
-  }
-
-  test("Relation: join tables in 2 catalogs") {
-    val t1 = "testcat.ns1.ns2.tbl"
-    val t2 = "testcat2.v2tbl"
-    withTable(t1, t2) {
-      sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
-      sql(s"CREATE TABLE $t2 USING foo AS SELECT id, data FROM source2")
-      val df1 = spark.table("source")
-      val df2 = spark.table("source2")
-      val df_joined = df1.join(df2).where(df1("id") + 1 === df2("id"))
-      checkAnswer(
-        sql(s"""
-          |SELECT *
-          |FROM $t1 t1, $t2 t2
-          |WHERE t1.id + 1 = t2.id
-        """.stripMargin),
-        df_joined)
-    }
-  }
-
-  test("InsertInto: append - across catalog") {
-    val t1 = "testcat.ns1.ns2.tbl"
-    val t2 = "testcat2.db.tbl"
-    withTable(t1, t2) {
-      sql(s"CREATE TABLE $t1 USING foo AS SELECT * FROM source")
-      sql(s"CREATE TABLE $t2 (id bigint, data string) USING foo")
-      sql(s"INSERT INTO $t2 SELECT * FROM $t1")
-      checkAnswer(spark.table(t2), spark.table("source"))
-    }
-  }
-
-  test("ShowTables: using v2 catalog") {
-    spark.sql("CREATE TABLE testcat.db.table_name (id bigint, data string) USING foo")
-    spark.sql("CREATE TABLE testcat.n1.n2.db.table_name (id bigint, data string) USING foo")
-
-    runShowTablesSql("SHOW TABLES FROM testcat.db", Seq(Row("db", "table_name")))
-
-    runShowTablesSql(
-      "SHOW TABLES FROM testcat.n1.n2.db",
-      Seq(Row("n1.n2.db", "table_name")))
-  }
-
-  test("ShowTables: using v2 catalog with a pattern") {
-    spark.sql("CREATE TABLE testcat.db.table (id bigint, data string) USING foo")
-    spark.sql("CREATE TABLE testcat.db.table_name_1 (id bigint, data string) USING foo")
-    spark.sql("CREATE TABLE testcat.db.table_name_2 (id bigint, data string) USING foo")
-    spark.sql("CREATE TABLE testcat.db2.table_name_2 (id bigint, data string) USING foo")
-
-    runShowTablesSql(
-      "SHOW TABLES FROM testcat.db",
-      Seq(
-        Row("db", "table"),
-        Row("db", "table_name_1"),
-        Row("db", "table_name_2")))
-
-    runShowTablesSql(
-      "SHOW TABLES FROM testcat.db LIKE '*name*'",
-      Seq(Row("db", "table_name_1"), Row("db", "table_name_2")))
-
-    runShowTablesSql(
-      "SHOW TABLES FROM testcat.db LIKE '*2'",
-      Seq(Row("db", "table_name_2")))
-  }
-
-  test("ShowTables: using v2 catalog, namespace doesn't exist") {
-    runShowTablesSql("SHOW TABLES FROM testcat.unknown", Seq())
-  }
-
-  test("ShowTables: using v1 catalog") {
-    runShowTablesSql(
-      "SHOW TABLES FROM default",
-      Seq(Row("", "source", true), Row("", "source2", true)),
-      expectV2Catalog = false)
-  }
-
-  test("ShowTables: using v1 catalog, db doesn't exist ") {
-    // 'db' below resolves to a database name for v1 catalog because there is no catalog named
-    // 'db' and there is no default catalog set.
-    val exception = intercept[NoSuchDatabaseException] {
-      runShowTablesSql("SHOW TABLES FROM db", Seq(), expectV2Catalog = false)
-    }
-
-    assert(exception.getMessage.contains("Database 'db' not found"))
-  }
-
-  test("ShowTables: using v1 catalog, db name with multipartIdentifier ('a.b') is not allowed.") {
-    val exception = intercept[AnalysisException] {
-      runShowTablesSql("SHOW TABLES FROM a.b", Seq(), expectV2Catalog = false)
-    }
-
-    assert(exception.getMessage.contains("The database name is not valid: a.b"))
-  }
-
-  test("ShowTables: using v2 catalog with empty namespace") {
-    spark.sql("CREATE TABLE testcat.table (id bigint, data string) USING foo")
-    runShowTablesSql("SHOW TABLES FROM testcat", Seq(Row("", "table")))
-  }
-
-  test("ShowTables: namespace is not specified and default v2 catalog is set") {
-    spark.conf.set("spark.sql.default.catalog", "testcat")
-    spark.sql("CREATE TABLE testcat.table (id bigint, data string) USING foo")
-
-    // v2 catalog is used where default namespace is empty for TestInMemoryTableCatalog.
-    runShowTablesSql("SHOW TABLES", Seq(Row("", "table")))
-  }
-
-  test("ShowTables: namespace not specified and default v2 catalog not set - fallback to v1") {
-    runShowTablesSql(
-      "SHOW TABLES",
-      Seq(Row("", "source", true), Row("", "source2", true)),
-      expectV2Catalog = false)
-
-    runShowTablesSql(
-      "SHOW TABLES LIKE '*2'",
-      Seq(Row("", "source2", true)),
-      expectV2Catalog = false)
-  }
-
-  private def runShowTablesSql(
-      sqlText: String,
-      expected: Seq[Row],
-      expectV2Catalog: Boolean = true): Unit = {
-    val schema = if (expectV2Catalog) {
-      new StructType()
-        .add("namespace", StringType, nullable = false)
-        .add("tableName", StringType, nullable = false)
-    } else {
-      new StructType()
-        .add("database", StringType, nullable = false)
-        .add("tableName", StringType, nullable = false)
-        .add("isTemporary", BooleanType, nullable = false)
-    }
-
-    val df = spark.sql(sqlText)
-    assert(df.schema === schema)
-    assert(expected === df.collect())
-  }
-
-  test("tableCreation: partition column case insensitive resolution") {
-    val testCatalog = catalog("testcat").asTableCatalog
-    val sessionCatalog = catalog("session").asTableCatalog
-
-    def checkPartitioning(cat: TableCatalog, partition: String): Unit = {
-      val table = cat.loadTable(Identifier.of(Array.empty, "tbl"))
-      val partitions = table.partitioning().map(_.references())
-      assert(partitions.length === 1)
-      val fieldNames = partitions.flatMap(_.map(_.fieldNames()))
-      assert(fieldNames === Array(Array(partition)))
-    }
-
-    sql(s"CREATE TABLE tbl (a int, b string) USING $v2Source PARTITIONED BY (A)")
-    checkPartitioning(sessionCatalog, "a")
-    sql(s"CREATE TABLE testcat.tbl (a int, b string) USING $v2Source PARTITIONED BY (A)")
-    checkPartitioning(testCatalog, "a")
-    sql(s"CREATE OR REPLACE TABLE tbl (a int, b string) USING $v2Source PARTITIONED BY (B)")
-    checkPartitioning(sessionCatalog, "b")
-    sql(s"CREATE OR REPLACE TABLE testcat.tbl (a int, b string) USING $v2Source PARTITIONED BY (B)")
-    checkPartitioning(testCatalog, "b")
-  }
-
-  test("tableCreation: partition column case sensitive resolution") {
-    def checkFailure(statement: String): Unit = {
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val e = intercept[AnalysisException] {
-          sql(statement)
-        }
-        assert(e.getMessage.contains("Couldn't find column"))
-      }
-    }
-
-    checkFailure(s"CREATE TABLE tbl (a int, b string) USING $v2Source PARTITIONED BY (A)")
-    checkFailure(s"CREATE TABLE testcat.tbl (a int, b string) USING $v2Source PARTITIONED BY (A)")
-    checkFailure(
-      s"CREATE OR REPLACE TABLE tbl (a int, b string) USING $v2Source PARTITIONED BY (B)")
-    checkFailure(
-      s"CREATE OR REPLACE TABLE testcat.tbl (a int, b string) USING $v2Source PARTITIONED BY (B)")
-  }
-
-  test("tableCreation: duplicate column names in the table definition") {
-    val errorMsg = "Found duplicate column(s) in the table definition of `t`"
-    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        testCreateAnalysisError(
-          s"CREATE TABLE t ($c0 INT, $c1 INT) USING $v2Source",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE OR REPLACE TABLE t ($c0 INT, $c1 INT) USING $v2Source",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE OR REPLACE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source",
-          errorMsg
-        )
-      }
-    }
-  }
-
-  test("tableCreation: duplicate nested column names in the table definition") {
-    val errorMsg = "Found duplicate column(s) in the table definition of `t`"
-    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        testCreateAnalysisError(
-          s"CREATE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE OR REPLACE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE OR REPLACE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
-          errorMsg
-        )
-      }
-    }
-  }
-
-  test("tableCreation: bucket column names not in table definition") {
-    val errorMsg = "Couldn't find column c in"
-    testCreateAnalysisError(
-      s"CREATE TABLE tbl (a int, b string) USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS",
-      errorMsg
-    )
-    testCreateAnalysisError(
-      s"CREATE TABLE testcat.tbl (a int, b string) USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS",
-      errorMsg
-    )
-    testCreateAnalysisError(
-      s"CREATE OR REPLACE TABLE tbl (a int, b string) USING $v2Source " +
-        "CLUSTERED BY (c) INTO 4 BUCKETS",
-      errorMsg
-    )
-    testCreateAnalysisError(
-      s"CREATE OR REPLACE TABLE testcat.tbl (a int, b string) USING $v2Source " +
-        "CLUSTERED BY (c) INTO 4 BUCKETS",
-      errorMsg
-    )
-  }
-
-  test("tableCreation: column repeated in partition columns") {
-    val errorMsg = "Found duplicate column(s) in the partitioning"
-    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        testCreateAnalysisError(
-          s"CREATE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
-          errorMsg
-        )
-      }
-    }
-  }
-
-  test("tableCreation: column repeated in bucket columns") {
-    val errorMsg = "Found duplicate column(s) in the bucket definition"
-    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        testCreateAnalysisError(
-          s"CREATE TABLE t ($c0 INT) USING $v2Source " +
-            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source " +
-            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source " +
-            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
-          errorMsg
-        )
-        testCreateAnalysisError(
-          s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) USING $v2Source " +
-            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
-          errorMsg
-        )
-      }
-    }
-  }
-
-  test("DeleteFrom: basic") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      sql(s"DELETE FROM $t WHERE id = 2")
-      checkAnswer(spark.table(t), Seq(
-        Row(3, "c", 3)))
-    }
-  }
-
-  test("DeleteFrom: alias") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      sql(s"DELETE FROM $t tbl WHERE tbl.id = 2")
-      checkAnswer(spark.table(t), Seq(
-        Row(3, "c", 3)))
-    }
-  }
-
-  test("DeleteFrom: fail if has subquery") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      val exc = intercept[AnalysisException] {
-        sql(s"DELETE FROM $t WHERE id IN (SELECT id FROM $t)")
-      }
-
-      assert(spark.table(t).count === 3)
-      assert(exc.getMessage.contains("Delete by condition with subquery is not supported"))
-    }
-  }
-
-  private def testCreateAnalysisError(sqlStatement: String, expectedError: String): Unit = {
-    val errMsg = intercept[AnalysisException] {
-      sql(sqlStatement)
-    }.getMessage
-    assert(errMsg.contains(expectedError))
-  }
-}
-
-
-/** Used as a V2 DataSource for V2SessionCatalog DDL */
-class FakeV2Provider extends TableProvider {
-  override def getTable(options: CaseInsensitiveStringMap): Table = {
-    throw new UnsupportedOperationException("Unnecessary for DDL tests")
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/DeprecatedStreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/DeprecatedStreamingAggregationSuite.scala
new file mode 100644
index 0000000000000..99f7e32d4df72
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/DeprecatedStreamingAggregationSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.scalatest.Assertions
+
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.state.StreamingAggregationStateManager
+import org.apache.spark.sql.expressions.scalalang.typed
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.OutputMode._
+
+@deprecated("This test suite will be removed.", "3.0.0")
+class DeprecatedStreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
+
+  import testImplicits._
+
+  def executeFuncWithStateVersionSQLConf(
+      stateVersion: Int,
+      confPairs: Seq[(String, String)],
+      func: => Any): Unit = {
+    withSQLConf(confPairs ++
+      Seq(SQLConf.STREAMING_AGGREGATION_STATE_FORMAT_VERSION.key -> stateVersion.toString): _*) {
+      func
+    }
+  }
+
+  def testWithAllStateVersions(name: String, confPairs: (String, String)*)
+                              (func: => Any): Unit = {
+    for (version <- StreamingAggregationStateManager.supportedVersions) {
+      test(s"$name - state format version $version") {
+        executeFuncWithStateVersionSQLConf(version, confPairs, func)
+      }
+    }
+  }
+
+
+  testWithAllStateVersions("typed aggregators") {
+    val inputData = MemoryStream[(String, Int)]
+    val aggregated = inputData.toDS().groupByKey(_._1).agg(typed.sumLong(_._2))
+
+    testStream(aggregated, Update)(
+      AddData(inputData, ("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)),
+      CheckLastBatch(("a", 30), ("b", 3), ("c", 1))
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 7d343bb58ea3f..877965100f018 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -22,10 +22,13 @@ import java.nio.file.Files
 import java.util.Locale
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.JobContext
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.{AnalysisException, DataFrame}
 import org.apache.spark.sql.execution.DataSourceScanExec
@@ -389,7 +392,7 @@ abstract class FileStreamSinkSuite extends StreamTest {
           var bytesWritten: Long = 0L
           try {
             spark.sparkContext.addSparkListener(new SparkListener() {
-              override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+              override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
                 val outputMetrics = taskEnd.taskMetrics.outputMetrics
                 recordsWritten += outputMetrics.recordsWritten
                 bytesWritten += outputMetrics.bytesWritten
@@ -473,6 +476,125 @@ abstract class FileStreamSinkSuite extends StreamTest {
       assert(outputFiles.toList.isEmpty, "Incomplete files should be cleaned up.")
     }
   }
+
+  testQuietly("cleanup complete but invalid output for aborted job") {
+    withSQLConf(("spark.sql.streaming.commitProtocolClass",
+      classOf[PendingCommitFilesTrackingManifestFileCommitProtocol].getCanonicalName)) {
+      withTempDir { tempDir =>
+        val checkpointDir = new File(tempDir, "chk")
+        val outputDir = new File(tempDir, "output @#output")
+        val inputData = MemoryStream[Int]
+        inputData.addData(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+        val q = inputData.toDS()
+          .repartition(10)
+          .map { value =>
+            // we intend task failure after some tasks succeeds
+            if (value == 5) {
+              // put some delay to let other task commits before this task fails
+              Thread.sleep(100)
+              value / 0
+            } else {
+              value
+            }
+          }
+          .writeStream
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .format("parquet")
+          .start(outputDir.getCanonicalPath)
+
+        intercept[StreamingQueryException] {
+          try {
+            q.processAllAvailable()
+          } finally {
+            q.stop()
+          }
+        }
+
+        import PendingCommitFilesTrackingManifestFileCommitProtocol._
+        val outputFileNames = Files.walk(outputDir.toPath).iterator().asScala
+          .filter(_.toString.endsWith(".parquet"))
+          .map(_.getFileName.toString)
+          .toSet
+        val trackingFileNames = tracking.map(new Path(_).getName).toSet
+
+        // there would be possible to have race condition:
+        // - some tasks complete while abortJob is being called
+        // we can't delete complete files for these tasks (it's OK since this is a best effort)
+        assert(outputFileNames.intersect(trackingFileNames).isEmpty,
+          "abortJob should clean up files reported as successful.")
+      }
+    }
+  }
+
+  test("Handle FileStreamSink metadata correctly for empty partition") {
+    Seq("parquet", "orc", "text", "json").foreach { format =>
+      val inputData = MemoryStream[String]
+      val df = inputData.toDF()
+
+      withTempDir { outputDir =>
+        withTempDir { checkpointDir =>
+          var query: StreamingQuery = null
+          try {
+            // repartition to more than the input to leave empty partitions
+            query =
+              df.repartition(10)
+                .writeStream
+                .option("checkpointLocation", checkpointDir.getCanonicalPath)
+                .format(format)
+                .start(outputDir.getCanonicalPath)
+
+            inputData.addData("1", "2", "3")
+            inputData.addData("4", "5")
+
+            failAfter(streamingTimeout) {
+              query.processAllAvailable()
+            }
+          } finally {
+            if (query != null) {
+              query.stop()
+            }
+          }
+
+          val fs = new Path(outputDir.getCanonicalPath).getFileSystem(
+            spark.sessionState.newHadoopConf())
+          val sinkLog = new FileStreamSinkLog(FileStreamSinkLog.VERSION, spark,
+            outputDir.getCanonicalPath)
+
+          val allFiles = sinkLog.allFiles()
+          // only files from non-empty partition should be logged
+          assert(allFiles.length < 10)
+          assert(allFiles.forall(file => fs.exists(new Path(file.path))))
+
+          // the query should be able to read all rows correctly with metadata log
+          val outputDf = spark.read.format(format).load(outputDir.getCanonicalPath)
+            .selectExpr("CAST(value AS INT)").as[Int]
+          checkDatasetUnorderly(outputDf, 1, 2, 3, 4, 5)
+        }
+      }
+    }
+  }
+}
+
+object PendingCommitFilesTrackingManifestFileCommitProtocol {
+  val tracking: ArrayBuffer[String] = new ArrayBuffer[String]()
+
+  def cleanPendingCommitFiles(): Unit = tracking.clear()
+  def addPendingCommitFiles(paths: Seq[String]): Unit = tracking ++= paths
+}
+
+class PendingCommitFilesTrackingManifestFileCommitProtocol(jobId: String, path: String)
+  extends ManifestFileCommitProtocol(jobId, path) {
+  import PendingCommitFilesTrackingManifestFileCommitProtocol._
+
+  override def setupJob(jobContext: JobContext): Unit = {
+    super.setupJob(jobContext)
+    cleanPendingCommitFiles()
+  }
+
+  override def onTaskCommit(taskCommit: FileCommitProtocol.TaskCommitMessage): Unit = {
+    super.onTaskCommit(taskCommit)
+    addPendingCommitFiles(taskCommit.obj.asInstanceOf[Seq[SinkFileStatus]].map(_.path))
+  }
 }
 
 class FileStreamSinkV1Suite extends FileStreamSinkSuite {
@@ -535,7 +657,7 @@ class FileStreamSinkV2Suite extends FileStreamSinkSuite {
     // Verify that MetadataLogFileIndex is being used and the correct partitioning schema has
     // been inferred
     val table = df.queryExecution.analyzed.collect {
-      case DataSourceV2Relation(table: FileTable, _, _) => table
+      case DataSourceV2Relation(table: FileTable, _, _, _, _) => table
     }
     assert(table.size === 1)
     assert(table.head.fileIndex.isInstanceOf[MetadataLogFileIndex])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index f59f819c9c108..fa320333143ec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -20,23 +20,27 @@ package org.apache.spark.sql.streaming
 import java.io.File
 import java.net.URI
 
+import scala.collection.mutable
 import scala.util.Random
 
-import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
+import org.apache.hadoop.fs._
+import org.apache.hadoop.fs.permission.FsPermission
+import org.apache.hadoop.util.Progressable
 import org.scalatest.PrivateMethodTester
-import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.FileStreamSource.{FileEntry, SeenFilesMap}
+import org.apache.spark.sql.execution.streaming.FileStreamSource.{FileEntry, SeenFilesMap, SourceFileArchiver}
 import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.ExistsThrowsExceptionFileSystem._
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.{StructType, _}
 import org.apache.spark.util.Utils
 
 abstract class FileStreamSourceTest
@@ -49,7 +53,7 @@ abstract class FileStreamSourceTest
    * `FileStreamSource` actually being used in the execution.
    */
   abstract class AddFileData extends AddData {
-    private val _qualifiedBasePath = PrivateMethod[Path]('qualifiedBasePath)
+    private val _qualifiedBasePath = PrivateMethod[Path](Symbol("qualifiedBasePath"))
 
     private def isSamePath(fileSource: FileStreamSource, srcPath: File): Boolean = {
       val path = (fileSource invokePrivate _qualifiedBasePath()).toString.stripPrefix("file:")
@@ -146,6 +150,20 @@ abstract class FileStreamSourceTest
     }
   }
 
+  case class AddFilesToFileStreamSinkLog(
+      fs: FileSystem,
+      srcDir: Path,
+      sinkLog: FileStreamSinkLog,
+      batchId: Int)(
+      pathFilter: Path => Boolean) extends ExternalAction {
+    override def runAction(): Unit = {
+      val statuses = fs.listStatus(srcDir, new PathFilter {
+        override def accept(path: Path): Boolean = pathFilter(path)
+      })
+      sinkLog.add(batchId, statuses.map(SinkFileStatus(_)))
+    }
+  }
+
   /** Use `format` and `path` to create FileStreamSource via DataFrameReader */
   def createFileStream(
       format: String,
@@ -177,8 +195,7 @@ abstract class FileStreamSourceTest
     }
   }
 
-
-  protected def withTempDirs(body: (File, File) => Unit) {
+  protected def withTempDirs(body: (File, File) => Unit): Unit = {
     val src = Utils.createTempDir(namePrefix = "streaming.src")
     val tmp = Utils.createTempDir(namePrefix = "streaming.tmp")
     try {
@@ -189,6 +206,19 @@ abstract class FileStreamSourceTest
     }
   }
 
+  protected def withThreeTempDirs(body: (File, File, File) => Unit): Unit = {
+    val src = Utils.createTempDir(namePrefix = "streaming.src")
+    val tmp = Utils.createTempDir(namePrefix = "streaming.tmp")
+    val archive = Utils.createTempDir(namePrefix = "streaming.archive")
+    try {
+      body(src, tmp, archive)
+    } finally {
+      Utils.deleteRecursively(src)
+      Utils.deleteRecursively(tmp)
+      Utils.deleteRecursively(archive)
+    }
+  }
+
   val valueSchema = new StructType().add("value", StringType)
 }
 
@@ -1144,6 +1174,62 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     }
   }
 
+  test("SPARK-30669: maxFilesPerTrigger - ignored when using Trigger.Once") {
+    withTempDirs { (src, target) =>
+      val checkpoint = new File(target, "chk").getCanonicalPath
+      val targetDir = new File(target, "data").getCanonicalPath
+      var lastFileModTime: Option[Long] = None
+
+      /** Create a text file with a single data item */
+      def createFile(data: Int): File = {
+        val file = stringToFile(new File(src, s"$data.txt"), data.toString)
+        if (lastFileModTime.nonEmpty) file.setLastModified(lastFileModTime.get + 1000)
+        lastFileModTime = Some(file.lastModified)
+        file
+      }
+
+      createFile(1)
+      createFile(2)
+      createFile(3)
+
+      // Set up a query to read text files one at a time
+      val df = spark
+        .readStream
+        .option("maxFilesPerTrigger", 1)
+        .text(src.getCanonicalPath)
+
+      def startQuery(): StreamingQuery = {
+        df.writeStream
+          .format("parquet")
+          .trigger(Trigger.Once)
+          .option("checkpointLocation", checkpoint)
+          .start(targetDir)
+      }
+      val q = startQuery()
+
+      try {
+        assert(q.awaitTermination(streamingTimeout.toMillis))
+        assert(q.recentProgress.count(_.numInputRows != 0) == 1) // only one trigger was run
+        checkAnswer(sql(s"SELECT * from parquet.`$targetDir`"), (1 to 3).map(_.toString).toDF)
+      } finally {
+        q.stop()
+      }
+
+      createFile(4)
+      createFile(5)
+
+      // run a second batch
+      val q2 = startQuery()
+      try {
+        assert(q2.awaitTermination(streamingTimeout.toMillis))
+        assert(q2.recentProgress.count(_.numInputRows != 0) == 1) // only one trigger was run
+        checkAnswer(sql(s"SELECT * from parquet.`$targetDir`"), (1 to 5).map(_.toString).toDF)
+      } finally {
+        q2.stop()
+      }
+    }
+  }
+
   test("explain") {
     withTempDirs { case (src, tmp) =>
       src.mkdirs()
@@ -1218,8 +1304,8 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
   }
 
   test("compact interval metadata log") {
-    val _sources = PrivateMethod[Seq[Source]]('sources)
-    val _metadataLog = PrivateMethod[FileStreamSourceLog]('metadataLog)
+    val _sources = PrivateMethod[Seq[Source]](Symbol("sources"))
+    val _metadataLog = PrivateMethod[FileStreamSourceLog](Symbol("metadataLog"))
 
     def verify(
         execution: StreamExecution,
@@ -1303,7 +1389,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
           AddTextFileData("keep3", src, tmp),
           CheckAnswer("keep1", "keep2", "keep3"),
           AssertOnQuery("check getBatch") { execution: StreamExecution =>
-            val _sources = PrivateMethod[Seq[Source]]('sources)
+            val _sources = PrivateMethod[Seq[Source]](Symbol("sources"))
             val fileSource = getSourcesFromStreamingQuery(execution).head
 
             def verify(startId: Option[Int], endId: Int, expected: String*): Unit = {
@@ -1386,9 +1472,13 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       latestFirst: Boolean,
       firstBatch: String,
       secondBatch: String,
-      maxFileAge: Option[String] = None): Unit = {
+      maxFileAge: Option[String] = None,
+      cleanSource: CleanSourceMode.Value = CleanSourceMode.OFF,
+      archiveDir: Option[String] = None): Unit = {
     val srcOptions = Map("latestFirst" -> latestFirst.toString, "maxFilesPerTrigger" -> "1") ++
-      maxFileAge.map("maxFileAge" -> _)
+      maxFileAge.map("maxFileAge" -> _) ++
+      Seq("cleanSource" -> cleanSource.toString) ++
+      archiveDir.map("sourceArchiveDir" -> _)
     val fileStream = createFileStream(
       "text",
       src.getCanonicalPath,
@@ -1547,7 +1637,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
         val actions = Seq(
           AddTextFileData(source1Content, sourceDir1, tmp),
           AddTextFileData(source2Content, sourceDir2, tmp)
-        ).filter(_.content != null)  // don't write to a source dir if no content specified
+        ).filter(_.content != null) // don't write to a source dir if no content specified
         StreamProgressLockedActions(actions, desc = actions.mkString("[ ", " | ", " ]"))
       }
 
@@ -1596,6 +1686,255 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
       }
     }
   }
+
+  test("remove completed files when remove option is enabled") {
+    withTempDirs { case (src, tmp) =>
+      withSQLConf(
+        SQLConf.FILE_SOURCE_LOG_COMPACT_INTERVAL.key -> "2",
+        // Force deleting the old logs
+        SQLConf.FILE_SOURCE_LOG_CLEANUP_DELAY.key -> "1",
+        SQLConf.FILE_SOURCE_CLEANER_NUM_THREADS.key -> "0"
+      ) {
+        val option = Map("latestFirst" -> "false", "maxFilesPerTrigger" -> "1",
+          "cleanSource" -> "delete")
+
+        val fileStream = createFileStream("text", src.getCanonicalPath, options = option)
+        val filtered = fileStream.filter($"value" contains "keep")
+
+        testStream(filtered)(
+          AddTextFileData("keep1", src, tmp, tmpFilePrefix = "keep1"),
+          CheckAnswer("keep1"),
+          AssertOnQuery("input file removed") { _: StreamExecution =>
+            // it doesn't rename any file yet
+            assertFileIsNotRemoved(src, "keep1")
+            true
+          },
+          AddTextFileData("keep2", src, tmp, tmpFilePrefix = "ke ep2 %"),
+          CheckAnswer("keep1", "keep2"),
+          AssertOnQuery("input file removed") { _: StreamExecution =>
+            // it renames input file for first batch, but not for second batch yet
+            assertFileIsRemoved(src, "keep1")
+            assertFileIsNotRemoved(src, "ke ep2 %")
+
+            true
+          },
+          AddTextFileData("keep3", src, tmp, tmpFilePrefix = "keep3"),
+          CheckAnswer("keep1", "keep2", "keep3"),
+          AssertOnQuery("input file renamed") { _: StreamExecution =>
+            // it renames input file for second batch, but not third batch yet
+            assertFileIsRemoved(src, "ke ep2 %")
+            assertFileIsNotRemoved(src, "keep3")
+
+            true
+          }
+        )
+      }
+    }
+  }
+
+  test("move completed files to archive directory when archive option is enabled") {
+    withThreeTempDirs { case (src, tmp, archiveDir) =>
+      withSQLConf(
+        SQLConf.FILE_SOURCE_LOG_COMPACT_INTERVAL.key -> "2",
+        // Force deleting the old logs
+        SQLConf.FILE_SOURCE_LOG_CLEANUP_DELAY.key -> "1",
+        SQLConf.FILE_SOURCE_CLEANER_NUM_THREADS.key -> "0"
+      ) {
+        val option = Map("latestFirst" -> "false", "maxFilesPerTrigger" -> "1",
+          "cleanSource" -> "archive", "sourceArchiveDir" -> archiveDir.getAbsolutePath)
+
+        val fileStream = createFileStream("text", s"${src.getCanonicalPath}/*/*",
+          options = option)
+        val filtered = fileStream.filter($"value" contains "keep")
+
+        // src/k %1
+        // file: src/k %1/keep1
+        val dirForKeep1 = new File(src, "k %1")
+        // src/k %1/k 2
+        // file: src/k %1/k 2/keep2
+        val dirForKeep2 = new File(dirForKeep1, "k 2")
+        // src/k3
+        // file: src/k3/keep3
+        val dirForKeep3 = new File(src, "k3")
+
+        val expectedMovedDir1 = new File(archiveDir.getAbsolutePath + dirForKeep1.toURI.getPath)
+        val expectedMovedDir2 = new File(archiveDir.getAbsolutePath + dirForKeep2.toURI.getPath)
+        val expectedMovedDir3 = new File(archiveDir.getAbsolutePath + dirForKeep3.toURI.getPath)
+
+        testStream(filtered)(
+          AddTextFileData("keep1", dirForKeep1, tmp, tmpFilePrefix = "keep1"),
+          CheckAnswer("keep1"),
+          AssertOnQuery("input file archived") { _: StreamExecution =>
+            // it doesn't rename any file yet
+            assertFileIsNotMoved(dirForKeep1, expectedMovedDir1, "keep1")
+            true
+          },
+          AddTextFileData("keep2", dirForKeep2, tmp, tmpFilePrefix = "keep2 %"),
+          CheckAnswer("keep1", "keep2"),
+          AssertOnQuery("input file archived") { _: StreamExecution =>
+            // it renames input file for first batch, but not for second batch yet
+            assertFileIsMoved(dirForKeep1, expectedMovedDir1, "keep1")
+            assertFileIsNotMoved(dirForKeep2, expectedMovedDir2, "keep2 %")
+            true
+          },
+          AddTextFileData("keep3", dirForKeep3, tmp, tmpFilePrefix = "keep3"),
+          CheckAnswer("keep1", "keep2", "keep3"),
+          AssertOnQuery("input file archived") { _: StreamExecution =>
+            // it renames input file for second batch, but not third batch yet
+            assertFileIsMoved(dirForKeep2, expectedMovedDir2, "keep2 %")
+            assertFileIsNotMoved(dirForKeep3, expectedMovedDir3, "keep3")
+
+            true
+          },
+          AddTextFileData("keep4", dirForKeep3, tmp, tmpFilePrefix = "keep4"),
+          CheckAnswer("keep1", "keep2", "keep3", "keep4"),
+          AssertOnQuery("input file archived") { _: StreamExecution =>
+            // it renames input file for third batch, but not fourth batch yet
+            assertFileIsMoved(dirForKeep3, expectedMovedDir3, "keep3")
+            assertFileIsNotMoved(dirForKeep3, expectedMovedDir3, "keep4")
+
+            true
+          }
+        )
+      }
+    }
+  }
+
+  Seq("delete", "archive").foreach { cleanOption =>
+    test(s"Throw UnsupportedOperationException on configuring $cleanOption when source path" +
+      " refers the output dir of FileStreamSink") {
+      withThreeTempDirs { case (src, tmp, archiveDir) =>
+        withSQLConf(
+          SQLConf.FILE_SOURCE_LOG_COMPACT_INTERVAL.key -> "2",
+          // Force deleting the old logs
+          SQLConf.FILE_SOURCE_LOG_CLEANUP_DELAY.key -> "1",
+          SQLConf.FILE_SOURCE_CLEANER_NUM_THREADS.key -> "0"
+        ) {
+          val option = Map("latestFirst" -> "false", "maxFilesPerTrigger" -> "1",
+            "cleanSource" -> cleanOption, "sourceArchiveDir" -> archiveDir.getAbsolutePath)
+
+          val fileStream = createFileStream("text", src.getCanonicalPath, options = option)
+          val filtered = fileStream.filter($"value" contains "keep")
+
+          // create FileStreamSinkLog under source directory
+          val sinkLog = new FileStreamSinkLog(FileStreamSinkLog.VERSION, spark,
+            new File(src, FileStreamSink.metadataDir).getCanonicalPath)
+          val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf)
+          val srcPath = new Path(src.getCanonicalPath)
+          val fileSystem = srcPath.getFileSystem(hadoopConf)
+
+          // Here we will just check whether the source file is removed or not, as we cover
+          // functionality test of "archive" in other UT.
+          testStream(filtered)(
+            AddTextFileData("keep1", src, tmp, tmpFilePrefix = "keep1"),
+            AddFilesToFileStreamSinkLog(fileSystem, srcPath, sinkLog, 0) { path =>
+              path.getName.startsWith("keep1")
+            },
+            ExpectFailure[UnsupportedOperationException](
+              t => assert(t.getMessage.startsWith("Clean up source files is not supported")),
+              isFatalError = false)
+          )
+        }
+      }
+    }
+  }
+
+  class FakeFileSystem(scheme: String) extends FileSystem {
+    override def exists(f: Path): Boolean = true
+
+    override def mkdirs(f: Path, permission: FsPermission): Boolean = true
+
+    override def rename(src: Path, dst: Path): Boolean = true
+
+    override def getUri: URI = URI.create(s"${scheme}:///")
+
+    override def open(f: Path, bufferSize: Int): FSDataInputStream = throw new NotImplementedError
+
+    override def create(
+        f: Path,
+        permission: FsPermission,
+        overwrite: Boolean,
+        bufferSize: Int,
+        replication: Short,
+        blockSize: Long,
+        progress: Progressable): FSDataOutputStream = throw new NotImplementedError
+
+    override def append(f: Path, bufferSize: Int, progress: Progressable): FSDataOutputStream =
+      throw new NotImplementedError
+
+    override def delete(f: Path, recursive: Boolean): Boolean = throw new NotImplementedError
+
+    override def listStatus(f: Path): Array[FileStatus] = throw new NotImplementedError
+
+    override def setWorkingDirectory(new_dir: Path): Unit = throw new NotImplementedError
+
+    override def getWorkingDirectory: Path = new Path("/somewhere")
+
+    override def getFileStatus(f: Path): FileStatus = throw new NotImplementedError
+  }
+
+  test("SourceFileArchiver - fail when base archive path matches source pattern") {
+    val fakeFileSystem = new FakeFileSystem("fake")
+
+    def assertThrowIllegalArgumentException(sourcePatttern: Path, baseArchivePath: Path): Unit = {
+      intercept[IllegalArgumentException] {
+        new SourceFileArchiver(fakeFileSystem, sourcePatttern, fakeFileSystem, baseArchivePath)
+      }
+    }
+
+    // 1) prefix of base archive path matches source pattern (baseArchiveDirPath has more depths)
+    val sourcePatternPath = new Path("/hello*/spar?")
+    val baseArchiveDirPath = new Path("/hello/spark/structured/streaming")
+    assertThrowIllegalArgumentException(sourcePatternPath, baseArchiveDirPath)
+
+    // 2) prefix of source pattern matches base archive path (source pattern has more depths)
+    val sourcePatternPath2 = new Path("/hello*/spar?/structured/streaming")
+    val baseArchiveDirPath2 = new Path("/hello/spark/structured")
+    assertThrowIllegalArgumentException(sourcePatternPath2, baseArchiveDirPath2)
+
+    // 3) source pattern matches base archive path (both have same depth)
+    val sourcePatternPath3 = new Path("/hello*/spar?/structured/*")
+    val baseArchiveDirPath3 = new Path("/hello/spark/structured/streaming")
+    assertThrowIllegalArgumentException(sourcePatternPath3, baseArchiveDirPath3)
+  }
+
+  test("SourceFileArchiver - different filesystems between source and archive") {
+    val fakeFileSystem = new FakeFileSystem("fake")
+    val fakeFileSystem2 = new FakeFileSystem("fake2")
+
+    val sourcePatternPath = new Path("/hello*/h{e,f}ll?")
+    val baseArchiveDirPath = new Path("/hello")
+
+    intercept[IllegalArgumentException] {
+      new SourceFileArchiver(fakeFileSystem, sourcePatternPath, fakeFileSystem2,
+        baseArchiveDirPath)
+    }
+  }
+
+  private def assertFileIsRemoved(sourceDir: File, fileName: String): Unit = {
+    assert(!sourceDir.list().exists(_.startsWith(fileName)))
+  }
+
+  private def assertFileIsNotRemoved(sourceDir: File, fileName: String): Unit = {
+    assert(sourceDir.list().exists(_.startsWith(fileName)))
+  }
+
+  private def assertFileIsNotMoved(sourceDir: File, expectedDir: File, filePrefix: String): Unit = {
+    assert(sourceDir.exists())
+    assert(sourceDir.list().exists(_.startsWith(filePrefix)))
+    if (!expectedDir.exists()) {
+      // OK
+    } else {
+      assert(!expectedDir.list().exists(_.startsWith(filePrefix)))
+    }
+  }
+
+  private def assertFileIsMoved(sourceDir: File, expectedDir: File, filePrefix: String): Unit = {
+    assert(sourceDir.exists())
+    assert(!sourceDir.list().exists(_.startsWith(filePrefix)))
+    assert(expectedDir.exists())
+    assert(expectedDir.list().exists(_.startsWith(filePrefix)))
+  }
 }
 
 class FileStreamSourceStressTestSuite extends FileStreamSourceTest {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index df7e9217f9140..d36c64f61a726 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -125,6 +125,8 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
     var state: GroupStateImpl[Int] = GroupStateImpl.createForStreaming(
       None, 1000, 1000, ProcessingTimeTimeout, hasTimedOut = false, watermarkPresent = false)
     assert(state.getTimeoutTimestamp === NO_TIMESTAMP)
+    state.setTimeoutDuration("-1 month 31 days 1 second")
+    assert(state.getTimeoutTimestamp === 2000)
     state.setTimeoutDuration(500)
     assert(state.getTimeoutTimestamp === 1500) // can be set without initializing state
     testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
@@ -225,8 +227,9 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
     testIllegalTimeout {
       state.setTimeoutDuration("-1 month")
     }
+
     testIllegalTimeout {
-      state.setTimeoutDuration("1 month -1 day")
+      state.setTimeoutDuration("1 month -31 day")
     }
 
     state = GroupStateImpl.createForStreaming(
@@ -241,7 +244,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
       state.setTimeoutTimestamp(10000, "-1 month")
     }
     testIllegalTimeout {
-      state.setTimeoutTimestamp(10000, "1 month -1 day")
+      state.setTimeoutTimestamp(10000, "1 month -32 day")
     }
     testIllegalTimeout {
       state.setTimeoutTimestamp(new Date(-10000))
@@ -253,7 +256,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
       state.setTimeoutTimestamp(new Date(-10000), "-1 month")
     }
     testIllegalTimeout {
-      state.setTimeoutTimestamp(new Date(-10000), "1 month -1 day")
+      state.setTimeoutTimestamp(new Date(-10000), "1 month -32 day")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 958d15ba1701d..b6618826487c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -19,8 +19,9 @@ package org.apache.spark.sql.streaming
 
 import java.io.{File, InterruptedIOException, IOException, UncheckedIOException}
 import java.nio.channels.ClosedByInterruptException
-import java.util.concurrent.{CountDownLatch, ExecutionException, TimeoutException, TimeUnit}
+import java.util.concurrent.{CountDownLatch, ExecutionException, TimeUnit}
 
+import scala.concurrent.TimeoutException
 import scala.reflect.ClassTag
 import scala.util.control.ControlThrowable
 
@@ -35,6 +36,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.{LocalLimitExec, SimpleMode, SparkPlan}
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.{ContinuousMemoryStream, MemorySink}
@@ -42,7 +44,7 @@ import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreCon
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.StreamSourceProvider
-import org.apache.spark.sql.streaming.util.StreamManualClock
+import org.apache.spark.sql.streaming.util.{BlockOnStopSourceProvider, StreamManualClock}
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
 
@@ -201,7 +203,7 @@ class StreamSuite extends StreamTest {
   }
 
   test("DataFrame reuse") {
-    def assertDF(df: DataFrame) {
+    def assertDF(df: DataFrame): Unit = {
       withTempDir { outputDir =>
         withTempDir { checkpointDir =>
           val query = df.writeStream.format("parquet")
@@ -471,7 +473,7 @@ class StreamSuite extends StreamTest {
     val df = inputData.toDS().map(_ + "foo").groupBy("value").agg(count("*"))
 
     // Test `df.explain`
-    val explain = ExplainCommand(df.queryExecution.logical, extended = false)
+    val explain = ExplainCommand(df.queryExecution.logical, SimpleMode)
     val explainString =
       spark.sessionState
         .executePlan(explain)
@@ -523,7 +525,7 @@ class StreamSuite extends StreamTest {
     val df = inputData.toDS().map(_ * 2).filter(_ > 5)
 
     // Test `df.explain`
-    val explain = ExplainCommand(df.queryExecution.logical, extended = false)
+    val explain = ExplainCommand(df.queryExecution.logical, SimpleMode)
     val explainString =
       spark.sessionState
         .executePlan(explain)
@@ -755,9 +757,9 @@ class StreamSuite extends StreamTest {
         inputData.addData(9)
         streamingQuery.processAllAvailable()
 
-        QueryTest.checkAnswer(spark.table("counts").toDF(),
-          Row("1", 1) :: Row("2", 1) :: Row("3", 2) :: Row("4", 2) ::
-          Row("5", 2) :: Row("6", 2) :: Row("7", 1) :: Row("8", 1) :: Row("9", 1) :: Nil)
+        checkAnswer(spark.table("counts").toDF(),
+          Row(1, 1L) :: Row(2, 1L) :: Row(3, 2L) :: Row(4, 2L) ::
+          Row(5, 2L) :: Row(6, 2L) :: Row(7, 1L) :: Row(8, 1L) :: Row(9, 1L) :: Nil)
       } finally {
         if (streamingQuery ne null) {
           streamingQuery.stop()
@@ -974,24 +976,50 @@ class StreamSuite extends StreamTest {
       CheckAnswer(1 to 3: _*))
   }
 
-  test("streaming limit in complete mode") {
+  test("SPARK-30658: streaming limit before agg in complete mode") {
     val inputData = MemoryStream[Int]
     val limited = inputData.toDF().limit(5).groupBy("value").count()
     testStream(limited, OutputMode.Complete())(
       AddData(inputData, 1 to 3: _*),
       CheckAnswer(Row(1, 1), Row(2, 1), Row(3, 1)),
       AddData(inputData, 1 to 9: _*),
-      CheckAnswer(Row(1, 2), Row(2, 2), Row(3, 2), Row(4, 1), Row(5, 1)))
+      CheckAnswer(Row(1, 2), Row(2, 2), Row(3, 1)))
   }
 
-  test("streaming limits in complete mode") {
+  test("SPARK-30658: streaming limits before and after agg in complete mode " +
+    "(after limit < before limit)") {
     val inputData = MemoryStream[Int]
     val limited = inputData.toDF().limit(4).groupBy("value").count().orderBy("value").limit(3)
     testStream(limited, OutputMode.Complete())(
+      StartStream(additionalConfs = Map(SQLConf.SHUFFLE_PARTITIONS.key -> "1")),
       AddData(inputData, 1 to 9: _*),
+      // only 1 to 4 should be allowed to aggregate, and counts for only 1 to 3 should be output
       CheckAnswer(Row(1, 1), Row(2, 1), Row(3, 1)),
       AddData(inputData, 2 to 6: _*),
-      CheckAnswer(Row(1, 1), Row(2, 2), Row(3, 2)))
+      // None of the new values should be allowed to aggregate, same 3 counts should be output
+      CheckAnswer(Row(1, 1), Row(2, 1), Row(3, 1)))
+  }
+
+  test("SPARK-30658: streaming limits before and after agg in complete mode " +
+    "(before limit < after limit)") {
+    val inputData = MemoryStream[Int]
+    val limited = inputData.toDF().limit(2).groupBy("value").count().orderBy("value").limit(3)
+    testStream(limited, OutputMode.Complete())(
+      StartStream(additionalConfs = Map(SQLConf.SHUFFLE_PARTITIONS.key -> "1")),
+      AddData(inputData, 1 to 9: _*),
+      CheckAnswer(Row(1, 1), Row(2, 1)),
+      AddData(inputData, 2 to 6: _*),
+      CheckAnswer(Row(1, 1), Row(2, 1)))
+  }
+
+  test("SPARK-30657: streaming limit after streaming dedup in append mode") {
+    val inputData = MemoryStream[Int]
+    val limited = inputData.toDF().dropDuplicates().limit(1)
+    testStream(limited)(
+      AddData(inputData, 1, 2),
+      CheckAnswer(Row(1)),
+      AddData(inputData, 3, 4),
+      CheckAnswer(Row(1)))
   }
 
   test("streaming limit in update mode") {
@@ -1032,6 +1060,82 @@ class StreamSuite extends StreamTest {
         false))
   }
 
+  test("SPARK-30657: streaming limit should not apply on limits on state subplans") {
+    val streanData = MemoryStream[Int]
+    val streamingDF = streanData.toDF().toDF("value")
+    val staticDF = spark.createDataset(Seq(1)).toDF("value").orderBy("value")
+    testStream(streamingDF.join(staticDF.limit(1), "value"))(
+      AddData(streanData, 1, 2, 3),
+      CheckAnswer(Row(1)),
+      AddData(streanData, 1, 3, 5),
+      CheckAnswer(Row(1), Row(1)))
+  }
+
+  test("SPARK-30657: streaming limit optimization from StreamingLocalLimitExec to LocalLimitExec") {
+    val inputData = MemoryStream[Int]
+    val inputDF = inputData.toDF()
+
+    /** Verify whether the local limit in the plan is a streaming limit or is a simple */
+    def verifyLocalLimit(
+        df: DataFrame,
+        expectStreamingLimit: Boolean,
+      outputMode: OutputMode = OutputMode.Append): Unit = {
+
+      var execPlan: SparkPlan = null
+      testStream(df, outputMode)(
+        AddData(inputData, 1),
+        AssertOnQuery { q =>
+          q.processAllAvailable()
+          execPlan = q.lastExecution.executedPlan
+          true
+        }
+      )
+      require(execPlan != null)
+
+      val localLimits = execPlan.collect {
+        case l: LocalLimitExec => l
+        case l: StreamingLocalLimitExec => l
+      }
+
+      require(
+        localLimits.size == 1,
+        s"Cant verify local limit optimization with this plan:\n$execPlan")
+
+      if (expectStreamingLimit) {
+        assert(
+          localLimits.head.isInstanceOf[StreamingLocalLimitExec],
+          s"Local limit was not StreamingLocalLimitExec:\n$execPlan")
+      } else {
+        assert(
+          localLimits.head.isInstanceOf[LocalLimitExec],
+          s"Local limit was not LocalLimitExec:\n$execPlan")
+      }
+    }
+
+    // Should not be optimized, so StreamingLocalLimitExec should be present
+    verifyLocalLimit(inputDF.dropDuplicates().limit(1), expectStreamingLimit = true)
+
+    // Should be optimized from StreamingLocalLimitExec to LocalLimitExec
+    verifyLocalLimit(inputDF.limit(1), expectStreamingLimit = false)
+    verifyLocalLimit(
+      inputDF.limit(1).groupBy().count(),
+      expectStreamingLimit = false,
+      outputMode = OutputMode.Complete())
+
+    // Should be optimized as repartition is sufficient to ensure that the iterators of
+    // StreamingDeduplicationExec should be consumed completely by the repartition exchange.
+    verifyLocalLimit(inputDF.dropDuplicates().repartition(1).limit(1), expectStreamingLimit = false)
+
+    // Should be LocalLimitExec in the first place, not from optimization of StreamingLocalLimitExec
+    val staticDF = spark.range(1).toDF("value").limit(1)
+    verifyLocalLimit(inputDF.toDF("value").join(staticDF, "value"), expectStreamingLimit = false)
+
+    verifyLocalLimit(
+      inputDF.groupBy().count().limit(1),
+      expectStreamingLimit = false,
+      outputMode = OutputMode.Complete())
+  }
+
   test("is_continuous_processing property should be false for microbatch processing") {
     val input = MemoryStream[Int]
     val df = input.toDS()
@@ -1125,6 +1229,36 @@ class StreamSuite extends StreamTest {
       }
     )
   }
+
+  // ProcessingTime trigger generates MicroBatchExecution, and ContinuousTrigger starts a
+  // ContinuousExecution
+  Seq(Trigger.ProcessingTime("1 second"), Trigger.Continuous("1 second")).foreach { trigger =>
+    test(s"SPARK-30143: stop waits until timeout if blocked - trigger: $trigger") {
+      BlockOnStopSourceProvider.enableBlocking()
+      val sq = spark.readStream.format(classOf[BlockOnStopSourceProvider].getName)
+        .load()
+        .writeStream
+        .format("console")
+        .trigger(trigger)
+        .start()
+      failAfter(60.seconds) {
+        val startTime = System.nanoTime()
+        withSQLConf(SQLConf.STREAMING_STOP_TIMEOUT.key -> "2000") {
+          intercept[TimeoutException] {
+            sq.stop()
+          }
+        }
+        val duration = (System.nanoTime() - startTime) / 1e6
+        assert(duration >= 2000,
+          s"Should have waited more than 2000 millis, but waited $duration millis")
+
+        BlockOnStopSourceProvider.disableBlocking()
+        withSQLConf(SQLConf.STREAMING_STOP_TIMEOUT.key -> "0") {
+          sq.stop()
+        }
+      }
+    }
+  }
 }
 
 abstract class FakeSource extends StreamSourceProvider {
@@ -1175,7 +1309,7 @@ class FakeDefaultSource extends FakeSource {
         ds.toDF("a")
       }
 
-      override def stop() {}
+      override def stop(): Unit = {}
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 7914a713f0baa..6d5ad873eedea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -37,12 +37,12 @@ import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder, Ro
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical.AllTuples
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch}
 import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.execution.streaming.state.StateStore
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.{Clock, SystemClock, Utils}
@@ -112,7 +112,11 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
   object MultiAddData {
     def apply[A]
       (source1: MemoryStream[A], data1: A*)(source2: MemoryStream[A], data2: A*): StreamAction = {
-      val actions = Seq(AddDataMemory(source1, data1), AddDataMemory(source2, data2))
+      apply((source1, data1), (source2, data2))
+    }
+
+    def apply[A](inputs: (MemoryStream[A], Seq[A])*): StreamAction = {
+      val actions = inputs.map { case (source, data) => AddDataMemory(source, data) }
       StreamProgressLockedActions(actions, desc = actions.mkString("[ ", " | ", " ]"))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 134e61ed12a21..741355381222d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.streaming
 import java.io.File
 import java.util.{Locale, TimeZone}
 
+import scala.collection.mutable
+
 import org.apache.commons.io.FileUtils
 import org.scalatest.Assertions
 
@@ -28,13 +30,12 @@ import org.apache.spark.rdd.BlockRDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.execution.streaming.state.StreamingAggregationStateManager
-import org.apache.spark.sql.expressions.scalalang.typed
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode._
@@ -184,7 +185,68 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     )
   }
 
-  testWithAllStateVersions("state metrics") {
+  testWithAllStateVersions("state metrics - append mode") {
+    val inputData = MemoryStream[Int]
+    val aggWithWatermark = inputData.toDF()
+      .withColumn("eventTime", $"value".cast("timestamp"))
+      .withWatermark("eventTime", "10 seconds")
+      .groupBy(window($"eventTime", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    implicit class RichStreamExecution(query: StreamExecution) {
+      // this could be either empty row batch or actual batch
+      def stateNodes: Seq[SparkPlan] = {
+        query.lastExecution.executedPlan.collect {
+          case p if p.isInstanceOf[StateStoreSaveExec] => p
+        }
+      }
+
+      def stateOperatorProgresses: Seq[StateOperatorProgress] = {
+        val operatorProgress = mutable.ArrayBuffer[StateOperatorProgress]()
+        var progress = query.recentProgress.last
+
+        operatorProgress ++= progress.stateOperators.map { op => op.copy(op.numRowsUpdated) }
+        if (progress.numInputRows == 0) {
+          // empty batch, merge metrics from previous batch as well
+          progress = query.recentProgress.takeRight(2).head
+          operatorProgress.zipWithIndex.foreach { case (sop, index) =>
+            // "numRowsUpdated" should be merged, as it could be updated in both batches.
+            // (for now it is only updated from previous batch, but things can be changed.)
+            // other metrics represent current status of state so picking up the latest values.
+            val newOperatorProgress = sop.copy(
+              sop.numRowsUpdated + progress.stateOperators(index).numRowsUpdated)
+            operatorProgress(index) = newOperatorProgress
+          }
+        }
+
+        operatorProgress
+      }
+    }
+
+    testStream(aggWithWatermark)(
+      AddData(inputData, 15),
+      CheckAnswer(), // watermark = 5
+      AssertOnQuery { _.stateNodes.size === 1 },
+      AssertOnQuery { _.stateNodes.head.metrics("numOutputRows").value === 0 },
+      AssertOnQuery { _.stateOperatorProgresses.head.numRowsUpdated === 1 },
+      AssertOnQuery { _.stateOperatorProgresses.head.numRowsTotal === 1 },
+      AddData(inputData, 10, 12, 14),
+      CheckAnswer(), // watermark = 5
+      AssertOnQuery { _.stateNodes.size === 1 },
+      AssertOnQuery { _.stateNodes.head.metrics("numOutputRows").value === 0 },
+      AssertOnQuery { _.stateOperatorProgresses.head.numRowsUpdated === 1 },
+      AssertOnQuery { _.stateOperatorProgresses.head.numRowsTotal === 2 },
+      AddData(inputData, 25),
+      CheckAnswer((10, 3)), // watermark = 15
+      AssertOnQuery { _.stateNodes.size === 1 },
+      AssertOnQuery { _.stateNodes.head.metrics("numOutputRows").value === 1 },
+      AssertOnQuery { _.stateOperatorProgresses.head.numRowsUpdated === 1 },
+      AssertOnQuery { _.stateOperatorProgresses.head.numRowsTotal === 2 }
+    )
+  }
+
+  testWithAllStateVersions("state metrics - update/complete mode") {
     val inputData = MemoryStream[Int]
 
     val aggregated =
@@ -280,16 +342,6 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     )
   }
 
-  testWithAllStateVersions("typed aggregators") {
-    val inputData = MemoryStream[(String, Int)]
-    val aggregated = inputData.toDS().groupByKey(_._1).agg(typed.sumLong(_._2))
-
-    testStream(aggregated, Update)(
-      AddData(inputData, ("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)),
-      CheckLastBatch(("a", 30), ("b", 3), ("c", 1))
-    )
-  }
-
   testWithAllStateVersions("prune results by current_time, complete mode") {
     import testImplicits._
     val clock = new StreamManualClock
@@ -345,28 +397,29 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   testWithAllStateVersions("prune results by current_date, complete mode") {
     import testImplicits._
     val clock = new StreamManualClock
+    val tz = TimeZone.getDefault.getID
     val inputData = MemoryStream[Long]
     val aggregated =
       inputData.toDF()
-        .select(($"value" * DateTimeUtils.SECONDS_PER_DAY).cast("timestamp").as("value"))
+        .select(to_utc_timestamp(from_unixtime('value * SECONDS_PER_DAY), tz))
+        .toDF("value")
         .groupBy($"value")
         .agg(count("*"))
-        .where($"value".cast("date") >= date_sub(current_timestamp().cast("date"), 10))
-        .select(
-          ($"value".cast("long") / DateTimeUtils.SECONDS_PER_DAY).cast("long"), $"count(1)")
+        .where($"value".cast("date") >= date_sub(current_date(), 10))
+        .select(($"value".cast("long") / SECONDS_PER_DAY).cast("long"), $"count(1)")
     testStream(aggregated, Complete)(
       StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
       // advance clock to 10 days, should retain all keys
       AddData(inputData, 0L, 5L, 5L, 10L),
-      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      AdvanceManualClock(MILLIS_PER_DAY * 10),
       CheckLastBatch((0L, 1), (5L, 2), (10L, 1)),
       // advance clock to 20 days, should retain keys >= 10
       AddData(inputData, 15L, 15L, 20L),
-      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      AdvanceManualClock(MILLIS_PER_DAY * 10),
       CheckLastBatch((10L, 1), (15L, 2), (20L, 1)),
       // advance clock to 30 days, should retain keys >= 20
       AddData(inputData, 85L),
-      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      AdvanceManualClock(MILLIS_PER_DAY * 10),
       CheckLastBatch((20L, 1), (85L, 1)),
 
       // bounce stream and ensure correct batch timestamp is used
@@ -376,7 +429,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
         q.sink.asInstanceOf[MemorySink].clear()
         q.commitLog.purge(3)
         // advance by 60 days i.e., 90 days total
-        clock.advance(DateTimeUtils.MILLIS_PER_DAY * 60)
+        clock.advance(MILLIS_PER_DAY * 60)
         true
       },
       StartStream(Trigger.ProcessingTime("10 day"), triggerClock = clock),
@@ -385,7 +438,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
 
       // advance clock to 100 days, should retain keys >= 90
       AddData(inputData, 85L, 90L, 100L, 105L),
-      AdvanceManualClock(DateTimeUtils.MILLIS_PER_DAY * 10),
+      AdvanceManualClock(MILLIS_PER_DAY * 10),
       CheckLastBatch((90L, 1), (100L, 1), (105L, 1))
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 42fe9f34ee3ec..3f218c9cb7fd9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.sql.streaming
 
-import java.util.UUID
+import java.io.File
+import java.util.{Locale, UUID}
 
 import scala.util.Random
 
+import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.SparkContext
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.StreamingJoinHelper
@@ -31,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.execution.{FileSourceScanExec, LogicalRDD}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinHelper}
+import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper}
 import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreProviderId}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
@@ -374,7 +377,7 @@ class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with
       val rdd1 = spark.sparkContext.makeRDD(1 to 10, numPartitions)
       val rdd2 = spark.sparkContext.makeRDD((1 to 10).map(_.toString), numPartitions)
       val rdd = rdd1.stateStoreAwareZipPartitions(rdd2, stateInfo, storeNames, coordinatorRef) {
-        (left, right) => left.zip(right)
+        (_, left, right) => left.zip(right)
       }
       require(rdd.partitions.length === numPartitions)
       for (partIndex <- 0 until numPartitions) {
@@ -418,6 +421,63 @@ class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with
       AddData(input2, 1.to(1000): _*),
       CheckAnswer(1.to(1000): _*))
   }
+
+  test("SPARK-26187 restore the stream-stream inner join query from Spark 2.4") {
+    val inputStream = MemoryStream[(Int, Long)]
+    val df = inputStream.toDS()
+      .select(col("_1").as("value"), col("_2").cast("timestamp").as("timestamp"))
+
+    val leftStream = df.select(col("value").as("leftId"), col("timestamp").as("leftTime"))
+
+    val rightStream = df
+      // Introduce misses for ease of debugging
+      .where(col("value") % 2 === 0)
+      .select(col("value").as("rightId"), col("timestamp").as("rightTime"))
+
+    val query = leftStream
+      .withWatermark("leftTime", "5 seconds")
+      .join(
+        rightStream.withWatermark("rightTime", "5 seconds"),
+        expr("rightId = leftId AND rightTime >= leftTime AND " +
+          "rightTime <= leftTime + interval 5 seconds"),
+        joinType = "inner")
+      .select(col("leftId"), col("leftTime").cast("int"),
+        col("rightId"), col("rightTime").cast("int"))
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-2.4.0-streaming-join/").toURI
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+    inputStream.addData((1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L))
+
+    testStream(query)(
+      StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+      /*
+      Note: The checkpoint was generated using the following input in Spark version 2.4.0
+      AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
+      // batch 1 - global watermark = 0
+      // states
+      // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)
+      // right: (2, 2L), (4, 4L)
+      CheckNewAnswer((2, 2L, 2, 2L), (4, 4L, 4, 4L)),
+      assertNumStateRows(7, 7),
+      */
+      AddData(inputStream, (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L)),
+      // batch 2: same result as above test
+      CheckNewAnswer((6, 6L, 6, 6L), (8, 8L, 8, 8L), (10, 10L, 10, 10L)),
+      assertNumStateRows(11, 6),
+      Execute { query =>
+        // Verify state format = 1
+        val f = query.lastExecution.executedPlan.collect {
+          case f: StreamingSymmetricHashJoinExec => f
+        }
+        assert(f.size == 1)
+        assert(f.head.stateFormatVersion == 1)
+      }
+    )
+  }
 }
 
 
@@ -712,5 +772,223 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with
       assertNumStateRows(total = 2, updated = 2)
     )
   }
-}
 
+  test("SPARK-26187 self left outer join should not return outer nulls for already matched rows") {
+    val inputStream = MemoryStream[(Int, Long)]
+
+    val df = inputStream.toDS()
+      .select(col("_1").as("value"), col("_2").cast("timestamp").as("timestamp"))
+
+    val leftStream = df.select(col("value").as("leftId"), col("timestamp").as("leftTime"))
+
+    val rightStream = df
+      // Introduce misses for ease of debugging
+      .where(col("value") % 2 === 0)
+      .select(col("value").as("rightId"), col("timestamp").as("rightTime"))
+
+    val query = leftStream
+      .withWatermark("leftTime", "5 seconds")
+      .join(
+        rightStream.withWatermark("rightTime", "5 seconds"),
+        expr("leftId = rightId AND rightTime >= leftTime AND " +
+          "rightTime <= leftTime + interval 5 seconds"),
+        joinType = "leftOuter")
+      .select(col("leftId"), col("leftTime").cast("int"),
+        col("rightId"), col("rightTime").cast("int"))
+
+    testStream(query)(
+      AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
+      // batch 1 - global watermark = 0
+      // states
+      // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)
+      // right: (2, 2L), (4, 4L)
+      CheckNewAnswer((2, 2L, 2, 2L), (4, 4L, 4, 4L)),
+      assertNumStateRows(7, 7),
+
+      AddData(inputStream, (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L)),
+      // batch 2 - global watermark = 5
+      // states
+      // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L), (6, 6L), (7, 7L), (8, 8L),
+      //       (9, 9L), (10, 10L)
+      // right: (6, 6L), (8, 8L), (10, 10L)
+      // states evicted
+      // left: nothing (it waits for 5 seconds more than watermark due to join condition)
+      // right: (2, 2L), (4, 4L)
+      // NOTE: look for evicted rows in right which are not evicted from left - they were
+      // properly joined in batch 1
+      CheckNewAnswer((6, 6L, 6, 6L), (8, 8L, 8, 8L), (10, 10L, 10, 10L)),
+      assertNumStateRows(13, 8),
+
+      AddData(inputStream, (11, 11L), (12, 12L), (13, 13L), (14, 14L), (15, 15L)),
+      // batch 3
+      // - global watermark = 9 <= min(9, 10)
+      // states
+      // left: (4, 4L), (5, 5L), (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L), (11, 11L),
+      //       (12, 12L), (13, 13L), (14, 14L), (15, 15L)
+      // right: (10, 10L), (12, 12L), (14, 14L)
+      // states evicted
+      // left: (1, 1L), (2, 2L), (3, 3L)
+      // right: (6, 6L), (8, 8L)
+      CheckNewAnswer(
+        Row(12, 12L, 12, 12L), Row(14, 14L, 14, 14L),
+        Row(1, 1L, null, null), Row(3, 3L, null, null)),
+      assertNumStateRows(15, 7)
+    )
+  }
+
+  test("SPARK-26187 self right outer join should not return outer nulls for already matched rows") {
+    val inputStream = MemoryStream[(Int, Long)]
+
+    val df = inputStream.toDS()
+      .select(col("_1").as("value"), col("_2").cast("timestamp").as("timestamp"))
+
+    // we're just flipping "left" and "right" from left outer join and apply right outer join
+
+    val leftStream = df
+      // Introduce misses for ease of debugging
+      .where(col("value") % 2 === 0)
+      .select(col("value").as("leftId"), col("timestamp").as("leftTime"))
+
+    val rightStream = df.select(col("value").as("rightId"), col("timestamp").as("rightTime"))
+
+    val query = leftStream
+      .withWatermark("leftTime", "5 seconds")
+      .join(
+        rightStream.withWatermark("rightTime", "5 seconds"),
+        expr("leftId = rightId AND leftTime >= rightTime AND " +
+          "leftTime <= rightTime + interval 5 seconds"),
+        joinType = "rightOuter")
+      .select(col("leftId"), col("leftTime").cast("int"),
+        col("rightId"), col("rightTime").cast("int"))
+
+    // we can just flip left and right in the explanation of left outer query test
+    // to assume the status of right outer query, hence skip explaining here
+    testStream(query)(
+      AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
+      CheckNewAnswer((2, 2L, 2, 2L), (4, 4L, 4, 4L)),
+      assertNumStateRows(7, 7),
+
+      AddData(inputStream, (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L)),
+      CheckNewAnswer((6, 6L, 6, 6L), (8, 8L, 8, 8L), (10, 10L, 10, 10L)),
+      assertNumStateRows(13, 8),
+
+      AddData(inputStream, (11, 11L), (12, 12L), (13, 13L), (14, 14L), (15, 15L)),
+      CheckNewAnswer(
+        Row(12, 12L, 12, 12L), Row(14, 14L, 14, 14L),
+        Row(null, null, 1, 1L), Row(null, null, 3, 3L)),
+      assertNumStateRows(15, 7)
+    )
+  }
+
+  test("SPARK-26187 restore the stream-stream outer join query from Spark 2.4") {
+    val inputStream = MemoryStream[(Int, Long)]
+    val df = inputStream.toDS()
+      .select(col("_1").as("value"), col("_2").cast("timestamp").as("timestamp"))
+
+    val leftStream = df.select(col("value").as("leftId"), col("timestamp").as("leftTime"))
+
+    val rightStream = df
+      // Introduce misses for ease of debugging
+      .where(col("value") % 2 === 0)
+      .select(col("value").as("rightId"), col("timestamp").as("rightTime"))
+
+    val query = leftStream
+      .withWatermark("leftTime", "5 seconds")
+      .join(
+        rightStream.withWatermark("rightTime", "5 seconds"),
+        expr("rightId = leftId AND rightTime >= leftTime AND " +
+          "rightTime <= leftTime + interval 5 seconds"),
+        joinType = "leftOuter")
+      .select(col("leftId"), col("leftTime").cast("int"),
+        col("rightId"), col("rightTime").cast("int"))
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-2.4.0-streaming-join/").toURI
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+    inputStream.addData((1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L))
+
+    /*
+      Note: The checkpoint was generated using the following input in Spark version 2.4.0
+      AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
+      // batch 1 - global watermark = 0
+      // states
+      // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)
+      // right: (2, 2L), (4, 4L)
+      CheckNewAnswer((2, 2L, 2, 2L), (4, 4L, 4, 4L)),
+      assertNumStateRows(7, 7),
+      */
+
+    // we just fail the query if the checkpoint was create from less than Spark 3.0
+    val e = intercept[StreamingQueryException] {
+      val writer = query.writeStream.format("console")
+        .option("checkpointLocation", checkpointDir.getAbsolutePath).start()
+      inputStream.addData((7, 7L), (8, 8L))
+      eventually(timeout(streamingTimeout)) {
+        assert(writer.exception.isDefined)
+      }
+      throw writer.exception.get
+    }
+    assert(e.getMessage.toLowerCase(Locale.ROOT)
+      .contains("the query is using stream-stream outer join with state format version 1"))
+  }
+
+  test("SPARK-29438: ensure UNION doesn't lead stream-stream join to use shifted partition IDs") {
+    def constructUnionDf(desiredPartitionsForInput1: Int)
+        : (MemoryStream[Int], MemoryStream[Int], MemoryStream[Int], DataFrame) = {
+      val input1 = MemoryStream[Int](desiredPartitionsForInput1)
+      val df1 = input1.toDF
+        .select(
+          'value as "key",
+          'value as "leftValue",
+          'value as "rightValue")
+      val (input2, df2) = setupStream("left", 2)
+      val (input3, df3) = setupStream("right", 3)
+
+      val joined = df2
+        .join(df3,
+          df2("key") === df3("key") && df2("leftTime") === df3("rightTime"),
+          "inner")
+        .select(df2("key"), 'leftValue, 'rightValue)
+
+      (input1, input2, input3, df1.union(joined))
+    }
+
+    withTempDir { tempDir =>
+      val (input1, input2, input3, unionDf) = constructUnionDf(2)
+
+      testStream(unionDf)(
+        StartStream(checkpointLocation = tempDir.getAbsolutePath),
+        MultiAddData(
+          (input1, Seq(11, 12, 13)),
+          (input2, Seq(11, 12, 13, 14, 15)),
+          (input3, Seq(13, 14, 15, 16, 17))),
+        CheckNewAnswer(Row(11, 11, 11), Row(12, 12, 12), Row(13, 13, 13), Row(13, 26, 39),
+          Row(14, 28, 42), Row(15, 30, 45)),
+        StopStream
+      )
+
+      // We're restoring the query with different number of partitions in left side of UNION,
+      // which leads right side of union to have mismatched partition IDs if it relies on
+      // TaskContext.partitionId(). SPARK-29438 fixes this issue to not rely on it.
+
+      val (newInput1, newInput2, newInput3, newUnionDf) = constructUnionDf(3)
+
+      newInput1.addData(11, 12, 13)
+      newInput2.addData(11, 12, 13, 14, 15)
+      newInput3.addData(13, 14, 15, 16, 17)
+
+      testStream(newUnionDf)(
+        StartStream(checkpointLocation = tempDir.getAbsolutePath),
+        MultiAddData(
+          (newInput1, Seq(21, 22, 23)),
+          (newInput2, Seq(21, 22, 23, 24, 25)),
+          (newInput3, Seq(23, 24, 25, 26, 27))),
+        CheckNewAnswer(Row(21, 21, 21), Row(22, 22, 22), Row(23, 23, 23), Row(23, 46, 69),
+          Row(24, 48, 72), Row(25, 50, 75))
+      )
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index d96404863a255..9d0f829ac9684 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -23,17 +23,17 @@ import scala.collection.mutable
 
 import org.scalactic.TolerantNumerics
 import org.scalatest.BeforeAndAfter
-import org.scalatest.PrivateMethodTester._
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.concurrent.Waiters.Waiter
 
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler._
-import org.apache.spark.sql.{Encoder, SparkSession}
+import org.apache.spark.sql.{Encoder, Row, SparkSession}
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
 import org.apache.spark.sql.streaming.StreamingQueryListener._
+import org.apache.spark.sql.streaming.ui.StreamingQueryStatusListener
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.util.JsonProtocol
 
@@ -47,9 +47,11 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
   after {
     spark.streams.active.foreach(_.stop())
     assert(spark.streams.active.isEmpty)
-    assert(spark.streams.listListeners().isEmpty)
+    // Skip check default `StreamingQueryStatusListener` which is for streaming UI.
+    assert(spark.streams.listListeners()
+      .filterNot(_.isInstanceOf[StreamingQueryStatusListener]).isEmpty)
     // Make sure we don't leak any events to the next test
-    spark.sparkContext.listenerBus.waitUntilEmpty(10000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
   }
 
   testQuietly("single listener, check trigger events are generated correctly") {
@@ -252,8 +254,8 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
       assert(newEvent.name === event.name)
     }
 
-    testSerialization(new QueryStartedEvent(UUID.randomUUID, UUID.randomUUID, "name"))
-    testSerialization(new QueryStartedEvent(UUID.randomUUID, UUID.randomUUID, null))
+    testSerialization(new QueryStartedEvent(UUID.randomUUID, UUID.randomUUID, "name", 1L))
+    testSerialization(new QueryStartedEvent(UUID.randomUUID, UUID.randomUUID, null, 1L))
   }
 
   test("QueryProgressEvent serialization") {
@@ -320,7 +322,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
           q.recentProgress.size > 1 && q.recentProgress.size <= 11
         }
         testStream(input.toDS)(actions: _*)
-        spark.sparkContext.listenerBus.waitUntilEmpty(10000)
+        spark.sparkContext.listenerBus.waitUntilEmpty()
         // 11 is the max value of the possible numbers of events.
         assert(numProgressEvent > 1 && numProgressEvent <= 11)
       } finally {
@@ -343,7 +345,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         AddData(mem, 1, 2, 3),
         CheckAnswer(1, 2, 3)
       )
-      session.sparkContext.listenerBus.waitUntilEmpty(5000)
+      session.sparkContext.listenerBus.waitUntilEmpty()
     }
 
     def assertEventsCollected(collector: EventCollector): Unit = {
@@ -404,6 +406,63 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     testReplayListenerBusWithBorkenEventJsons("query-event-logs-version-2.0.2.txt")
   }
 
+  test("listener propagates observable metrics") {
+    import org.apache.spark.sql.functions._
+    val clock = new StreamManualClock
+    val inputData = new MemoryStream[Int](0, sqlContext)
+    val df = inputData.toDF()
+      .observe(
+        name = "my_event",
+        min($"value").as("min_val"),
+        max($"value").as("max_val"),
+        sum($"value").as("sum_val"),
+        count(when($"value" % 2 === 0, 1)).as("num_even"))
+      .observe(
+        name = "other_event",
+        avg($"value").cast("int").as("avg_val"))
+    val listener = new EventCollector
+    def checkMetrics(f: java.util.Map[String, Row] => Unit): StreamAction = {
+      AssertOnQuery { _ =>
+        eventually(Timeout(streamingTimeout)) {
+          assert(listener.allProgressEvents.nonEmpty)
+          f(listener.allProgressEvents.last.observedMetrics)
+          true
+        }
+      }
+    }
+
+    try {
+      spark.streams.addListener(listener)
+      testStream(df, OutputMode.Append)(
+        StartStream(Trigger.ProcessingTime(100), triggerClock = clock),
+        // Batch 1
+        AddData(inputData, 1, 2),
+        AdvanceManualClock(100),
+        checkMetrics { metrics =>
+          assert(metrics.get("my_event") === Row(1, 2, 3L, 1L))
+          assert(metrics.get("other_event") === Row(1))
+        },
+
+        // Batch 2
+        AddData(inputData, 10, 30, -10, 5),
+        AdvanceManualClock(100),
+        checkMetrics { metrics =>
+          assert(metrics.get("my_event") === Row(-10, 30, 35L, 3L))
+          assert(metrics.get("other_event") === Row(8))
+        },
+
+        // Batch 3 - no data
+        AdvanceManualClock(100),
+        checkMetrics { metrics =>
+          assert(metrics.isEmpty)
+        },
+        StopStream
+      )
+    } finally {
+      spark.streams.removeListener(listener)
+    }
+  }
+
   private def testReplayListenerBusWithBorkenEventJsons(fileName: String): Unit = {
     val input = getClass.getResourceAsStream(s"/structured-streaming/$fileName")
     val events = mutable.ArrayBuffer[SparkListenerEvent]()
@@ -454,6 +513,10 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
       _progressEvents.filter(_.numInputRows > 0)
     }
 
+    def allProgressEvents: Seq[StreamingQueryProgress] = _progressEvents.synchronized {
+      _progressEvents.clone()
+    }
+
     def reset(): Unit = {
       startEvent = null
       terminationEvent = null
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala
index 7801d968e901d..d538d93b845b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala
@@ -39,7 +39,7 @@ class StreamingQueryListenersConfSuite extends StreamTest with BeforeAndAfter {
       StopStream
     )
 
-    spark.sparkContext.listenerBus.waitUntilEmpty(5000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
 
     assert(TestListener.queryStartedEvent != null)
     assert(TestListener.queryTerminatedEvent != null)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
index b26d2556b2e36..96f7efeef98e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryManagerSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.streaming
 
+import java.io.File
 import java.util.concurrent.CountDownLatch
 
 import scala.concurrent.Future
@@ -28,9 +29,10 @@ import org.scalatest.time.Span
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.{Dataset, Encoders}
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.BlockingSource
 import org.apache.spark.util.Utils
 
@@ -242,6 +244,154 @@ class StreamingQueryManagerSuite extends StreamTest {
     }
   }
 
+  testQuietly("can't start a streaming query with the same name in the same session") {
+    val ds1 = makeDataset._2
+    val ds2 = makeDataset._2
+    val queryName = "abc"
+
+    val query1 = ds1.writeStream.format("noop").queryName(queryName).start()
+    try {
+      val e = intercept[IllegalArgumentException] {
+        ds2.writeStream.format("noop").queryName(queryName).start()
+      }
+      assert(e.getMessage.contains("query with that name is already active"))
+    } finally {
+      query1.stop()
+    }
+  }
+
+  testQuietly("can start a streaming query with the same name in a different session") {
+    val session2 = spark.cloneSession()
+
+    val ds1 = MemoryStream(Encoders.INT, spark.sqlContext).toDS()
+    val ds2 = MemoryStream(Encoders.INT, session2.sqlContext).toDS()
+    val queryName = "abc"
+
+    val query1 = ds1.writeStream.format("noop").queryName(queryName).start()
+    val query2 = ds2.writeStream.format("noop").queryName(queryName).start()
+
+    query1.stop()
+    query2.stop()
+  }
+
+  testQuietly("can't start multiple instances of the same streaming query in the same session") {
+    withSQLConf(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART.key -> "false") {
+      withTempDir { dir =>
+        val (ms1, ds1) = makeDataset
+        val (ms2, ds2) = makeDataset
+        val chkLocation = new File(dir, "_checkpoint").getCanonicalPath
+        val dataLocation = new File(dir, "data").getCanonicalPath
+
+        val query1 = ds1.writeStream.format("parquet")
+          .option("checkpointLocation", chkLocation).start(dataLocation)
+        ms1.addData(1, 2, 3)
+        try {
+          val e = intercept[IllegalStateException] {
+            ds2.writeStream.format("parquet")
+              .option("checkpointLocation", chkLocation).start(dataLocation)
+          }
+          assert(e.getMessage.contains("same id"))
+        } finally {
+          spark.streams.active.foreach(_.stop())
+        }
+      }
+    }
+  }
+
+  testQuietly("new instance of the same streaming query stops old query in the same session") {
+    failAfter(90 seconds) {
+      withSQLConf(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART.key -> "true") {
+        withTempDir { dir =>
+          val (ms1, ds1) = makeDataset
+          val (ms2, ds2) = makeDataset
+          val chkLocation = new File(dir, "_checkpoint").getCanonicalPath
+          val dataLocation = new File(dir, "data").getCanonicalPath
+
+          val query1 = ds1.writeStream.format("parquet")
+            .option("checkpointLocation", chkLocation).start(dataLocation)
+          ms1.addData(1, 2, 3)
+          val query2 = ds2.writeStream.format("parquet")
+            .option("checkpointLocation", chkLocation).start(dataLocation)
+          try {
+            ms2.addData(1, 2, 3)
+            query2.processAllAvailable()
+            assert(spark.sharedState.activeStreamingQueries.get(query2.id) ===
+              query2.asInstanceOf[StreamingQueryWrapper].streamingQuery,
+              "The correct streaming query is not being tracked in global state")
+
+            assert(!query1.isActive,
+              "First query should have stopped before starting the second query")
+          } finally {
+            spark.streams.active.foreach(_.stop())
+          }
+        }
+      }
+    }
+  }
+
+  testQuietly(
+    "can't start multiple instances of the same streaming query in the different sessions") {
+    withSQLConf(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART.key -> "false") {
+      withTempDir { dir =>
+        val session2 = spark.cloneSession()
+
+        val ms1 = MemoryStream(Encoders.INT, spark.sqlContext)
+        val ds2 = MemoryStream(Encoders.INT, session2.sqlContext).toDS()
+        val chkLocation = new File(dir, "_checkpoint").getCanonicalPath
+        val dataLocation = new File(dir, "data").getCanonicalPath
+
+        val query1 = ms1.toDS().writeStream.format("parquet")
+          .option("checkpointLocation", chkLocation).start(dataLocation)
+        ms1.addData(1, 2, 3)
+        try {
+          val e = intercept[IllegalStateException] {
+            ds2.writeStream.format("parquet")
+              .option("checkpointLocation", chkLocation).start(dataLocation)
+          }
+          assert(e.getMessage.contains("same id"))
+        } finally {
+          spark.streams.active.foreach(_.stop())
+          session2.streams.active.foreach(_.stop())
+        }
+      }
+    }
+  }
+
+  testQuietly(
+    "new instance of the same streaming query stops old query in a different session") {
+    failAfter(90 seconds) {
+      withSQLConf(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART.key -> "true") {
+        withTempDir { dir =>
+          val session2 = spark.cloneSession()
+
+          val ms1 = MemoryStream(Encoders.INT, spark.sqlContext)
+          val ds2 = MemoryStream(Encoders.INT, session2.sqlContext).toDS()
+          val chkLocation = new File(dir, "_checkpoint").getCanonicalPath
+          val dataLocation = new File(dir, "data").getCanonicalPath
+
+          val query1 = ms1.toDS().writeStream.format("parquet")
+            .option("checkpointLocation", chkLocation).start(dataLocation)
+          ms1.addData(1, 2, 3)
+          val query2 = ds2.writeStream.format("parquet")
+            .option("checkpointLocation", chkLocation).start(dataLocation)
+          try {
+            ms1.addData(1, 2, 3)
+            query2.processAllAvailable()
+            assert(spark.sharedState.activeStreamingQueries.get(query2.id) ===
+              query2.asInstanceOf[StreamingQueryWrapper].streamingQuery,
+              "The correct streaming execution is not being tracked in global state")
+
+            assert(!query1.isActive,
+              "First query should have stopped before starting the second query")
+          } finally {
+            spark.streams.active.foreach(_.stop())
+            session2.streams.active.foreach(_.stop())
+          }
+        }
+      }
+    }
+  }
+
   /** Run a body of code by defining a query on each dataset */
   private def withQueriesOn(datasets: Dataset[_]*)(body: Seq[StreamingQuery] => Unit): Unit = {
     failAfter(streamingTimeout) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index e784d318b4ffa..6f00b528cb8bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -24,12 +24,18 @@ import scala.collection.JavaConverters._
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
 import org.scalatest.concurrent.Eventually
+import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamingQueryStatusAndProgressSuite._
+import org.apache.spark.sql.streaming.StreamingQuerySuite.clock
+import org.apache.spark.sql.streaming.util.StreamManualClock
+import org.apache.spark.sql.types.StructType
 
 class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
   test("StreamingQueryProgress - prettyJson") {
@@ -74,6 +80,17 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
         |  "sink" : {
         |    "description" : "sink",
         |    "numOutputRows" : -1
+        |  },
+        |  "observedMetrics" : {
+        |    "event1" : {
+        |      "c1" : 1,
+        |      "c2" : 3.0
+        |    },
+        |    "event2" : {
+        |      "rc" : 1,
+        |      "min_q" : "hello",
+        |      "max_q" : "world"
+        |    }
         |  }
         |}
       """.stripMargin.trim)
@@ -107,6 +124,22 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
          |  "sink" : {
          |    "description" : "sink",
          |    "numOutputRows" : -1
+         |  },
+         |  "observedMetrics" : {
+         |    "event_a" : {
+         |      "c1" : null,
+         |      "c2" : -20.7
+         |    },
+         |    "event_b1" : {
+         |      "rc" : 33,
+         |      "min_q" : "foo",
+         |      "max_q" : "bar"
+         |    },
+         |    "event_b2" : {
+         |      "rc" : 200,
+         |      "min_q" : "fzo",
+         |      "max_q" : "baz"
+         |    }
          |  }
          |}
       """.stripMargin.trim)
@@ -215,6 +248,45 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
     }
   }
 
+  test("SPARK-29973: Make `processedRowsPerSecond` calculated more accurately and meaningfully") {
+    import testImplicits._
+
+    clock = new StreamManualClock
+    val inputData = MemoryStream[Int]
+    val query = inputData.toDS()
+
+    testStream(query)(
+      StartStream(Trigger.ProcessingTime(1000), triggerClock = clock),
+      AdvanceManualClock(1000),
+      waitUntilBatchProcessed,
+      AssertOnQuery(query => {
+        assert(query.lastProgress.numInputRows == 0)
+        assert(query.lastProgress.processedRowsPerSecond == 0.0d)
+        true
+      }),
+      AddData(inputData, 1, 2),
+      AdvanceManualClock(1000),
+      waitUntilBatchProcessed,
+      AssertOnQuery(query => {
+        assert(query.lastProgress.numInputRows == 2)
+        assert(query.lastProgress.processedRowsPerSecond == 2000d)
+        true
+      }),
+      StopStream
+    )
+  }
+
+  def waitUntilBatchProcessed: AssertOnQuery = Execute { q =>
+    eventually(Timeout(streamingTimeout)) {
+      if (q.exception.isEmpty) {
+        assert(clock.isStreamWaitingAt(clock.getTimeMillis()))
+      }
+    }
+    if (q.exception.isDefined) {
+      throw q.exception.get
+    }
+  }
+
   def assertJson(source: String, expected: String): Unit = {
     assert(
       source.replaceAll("\r\n|\r|\n", System.lineSeparator) ===
@@ -223,12 +295,24 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
 }
 
 object StreamingQueryStatusAndProgressSuite {
+  private val schema1 = new StructType()
+    .add("c1", "long")
+    .add("c2", "double")
+  private val schema2 = new StructType()
+    .add("rc", "long")
+    .add("min_q", "string")
+    .add("max_q", "string")
+  private def row(schema: StructType, elements: Any*): Row = {
+    new GenericRowWithSchema(elements.toArray, schema)
+  }
+
   val testProgress1 = new StreamingQueryProgress(
     id = UUID.randomUUID,
     runId = UUID.randomUUID,
     name = "myName",
     timestamp = "2016-12-05T20:54:20.827Z",
     batchId = 2L,
+    batchDuration = 0L,
     durationMs = new java.util.HashMap(Map("total" -> 0L).mapValues(long2Long).asJava),
     eventTime = new java.util.HashMap(Map(
       "max" -> "2016-12-05T20:54:20.827Z",
@@ -251,7 +335,10 @@ object StreamingQueryStatusAndProgressSuite {
         processedRowsPerSecond = Double.PositiveInfinity  // should not be present in the json
       )
     ),
-    sink = SinkProgress("sink", None)
+    sink = SinkProgress("sink", None),
+    observedMetrics = new java.util.HashMap(Map(
+      "event1" -> row(schema1, 1L, 3.0d),
+      "event2" -> row(schema2, 1L, "hello", "world")).asJava)
   )
 
   val testProgress2 = new StreamingQueryProgress(
@@ -260,6 +347,7 @@ object StreamingQueryStatusAndProgressSuite {
     name = null, // should not be present in the json
     timestamp = "2016-12-05T20:54:20.827Z",
     batchId = 2L,
+    batchDuration = 0L,
     durationMs = new java.util.HashMap(Map("total" -> 0L).mapValues(long2Long).asJava),
     // empty maps should be handled correctly
     eventTime = new java.util.HashMap(Map.empty[String, String].asJava),
@@ -275,7 +363,11 @@ object StreamingQueryStatusAndProgressSuite {
         processedRowsPerSecond = Double.NegativeInfinity // should not be present in the json
       )
     ),
-    sink = SinkProgress("sink", None)
+    sink = SinkProgress("sink", None),
+    observedMetrics = new java.util.HashMap(Map(
+      "event_a" -> row(schema1, null, -20.7d),
+      "event_b1" -> row(schema2, 33L, "foo", "bar"),
+      "event_b2" -> row(schema2, 200L, "fzo", "baz")).asJava)
   )
 
   val testStatus = new StreamingQueryStatus("active", true, false)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 3ad893f871c94..77f5c856ff0f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -29,19 +29,19 @@ import org.apache.hadoop.fs.Path
 import org.scalactic.TolerantNumerics
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkException, TestUtils}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
 import org.apache.spark.sql.catalyst.expressions.{Literal, Rand, Randn, Shuffle, Uuid}
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2}
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.{MemorySink, TestForeachWriter}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.InputPartition
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
 import org.apache.spark.sql.streaming.util.{BlockingSource, MockSourceProvider, StreamManualClock}
 import org.apache.spark.sql.types.StructType
 
@@ -123,9 +123,11 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       assert(q3.runId !== q4.runId)
 
       // Only one query with same id can be active
-      val q5 = startQuery(restart = false)
-      val e = intercept[IllegalStateException] {
-        startQuery(restart = true)
+      withSQLConf(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART.key -> "false") {
+        val q5 = startQuery(restart = false)
+        val e = intercept[IllegalStateException] {
+          startQuery(restart = true)
+        }
       }
     }
   }
@@ -464,7 +466,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     val streamingTriggerDF = spark.createDataset(1 to 10).toDF
     val streamingInputDF = createSingleTriggerStreamingDF(streamingTriggerDF).toDF("value")
 
-    val progress = getFirstProgress(streamingInputDF.join(streamingInputDF, "value"))
+    val progress = getStreamingQuery(streamingInputDF.join(streamingInputDF, "value"))
+      .recentProgress.head
     assert(progress.numInputRows === 20) // data is read multiple times in self-joins
     assert(progress.sources.size === 1)
     assert(progress.sources(0).numInputRows === 20)
@@ -477,7 +480,8 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
 
     // Trigger input has 10 rows, static input has 2 rows,
     // therefore after the first trigger, the calculated input rows should be 10
-    val progress = getFirstProgress(streamingInputDF.join(staticInputDF, "value"))
+    val progress = getStreamingQuery(streamingInputDF.join(staticInputDF, "value"))
+      .recentProgress.head
     assert(progress.numInputRows === 10)
     assert(progress.sources.size === 1)
     assert(progress.sources(0).numInputRows === 10)
@@ -490,7 +494,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     val streamingInputDF = createSingleTriggerStreamingDF(streamingTriggerDF)
 
     // After the first trigger, the calculated input rows should be 10
-    val progress = getFirstProgress(streamingInputDF)
+    val progress = getStreamingQuery(streamingInputDF).recentProgress.head
     assert(progress.numInputRows === 10)
     assert(progress.sources.size === 1)
     assert(progress.sources(0).numInputRows === 10)
@@ -1118,12 +1122,12 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     StreamingExecutionRelation(source, spark)
   }
 
-  /** Returns the query progress at the end of the first trigger of streaming DF */
-  private def getFirstProgress(streamingDF: DataFrame): StreamingQueryProgress = {
+  /** Returns the query at the end of the first trigger of streaming DF */
+  private def getStreamingQuery(streamingDF: DataFrame): StreamingQuery = {
     try {
       val q = streamingDF.writeStream.format("memory").queryName("test").start()
       q.processAllAvailable()
-      q.recentProgress.head
+      q
     } finally {
       spark.streams.active.map(_.stop())
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
index bad22590807a7..55b884573f647 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
@@ -20,15 +20,15 @@ package org.apache.spark.sql.streaming.continuous
 import java.util.concurrent.{ArrayBlockingQueue, BlockingQueue}
 
 import org.mockito.Mockito._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousStream, PartitionOffset}
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.streaming.continuous._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReader, ContinuousStream, PartitionOffset}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.types.{DataType, IntegerType, StructType}
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index 5bd75c850fe76..8599ceb833ca4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.streaming.continuous
 
+import java.sql.Timestamp
+
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
 import org.apache.spark.sql._
@@ -100,6 +102,21 @@ class ContinuousSuite extends ContinuousSuiteBase {
       CheckAnswer(0, 1, 2, 3, 4, 5))
   }
 
+  test("SPARK-29642: basic with various types") {
+    val input = ContinuousMemoryStream[String]
+
+    testStream(input.toDF())(
+      AddData(input, "0", "1", "2"),
+      CheckAnswer("0", "1", "2"))
+
+    val input2 = ContinuousMemoryStream[(String, Timestamp)]
+
+    val timestamp = Timestamp.valueOf("2015-06-11 10:10:10.100")
+    testStream(input2.toDF())(
+      AddData(input2, ("0", timestamp), ("1", timestamp)),
+      CheckAnswer(("0", timestamp), ("1", timestamp)))
+  }
+
   test("map") {
     val input = ContinuousMemoryStream[Int]
     val df = input.toDF().map(_.getInt(0) * 2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
index e3498db4194e8..0e1c9b9c4ba46 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
@@ -21,16 +21,16 @@ import org.mockito.{ArgumentCaptor, InOrder}
 import org.mockito.ArgumentMatchers.{any, eq => eqTo}
 import org.mockito.Mockito._
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.LocalSparkSession
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, PartitionOffset}
+import org.apache.spark.sql.connector.write.WriterCommitMessage
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.internal.SQLConf.CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.test.TestSparkSession
 
 class EpochCoordinatorSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 4db605ee1b238..05cf324f8d490 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -23,16 +23,17 @@ import java.util.Collections
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.connector.catalog.{SessionConfigSupport, SupportsRead, SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, MicroBatchStream, Offset, PartitionOffset}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, PhysicalWriteInfo, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.streaming.{ContinuousTrigger, RateStreamOffset, Sink, StreamingQueryWrapper}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming._
-import org.apache.spark.sql.sources.v2.writer.{WriteBuilder, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, StreamTest, Trigger}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -68,7 +69,8 @@ class FakeScanBuilder extends ScanBuilder with Scan {
 
 class FakeWriteBuilder extends WriteBuilder with StreamingWrite {
   override def buildForStreaming(): StreamingWrite = this
-  override def createStreamingWriterFactory(): StreamingDataWriterFactory = {
+  override def createStreamingWriterFactory(
+      info: PhysicalWriteInfo): StreamingDataWriterFactory = {
     throw new IllegalStateException("fake sink - cannot actually write")
   }
   override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
@@ -85,14 +87,14 @@ trait FakeStreamingWriteTable extends Table with SupportsWrite {
   override def capabilities(): util.Set[TableCapability] = {
     Set(STREAMING_WRITE).asJava
   }
-  override def newWriteBuilder(options: CaseInsensitiveStringMap): WriteBuilder = {
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new FakeWriteBuilder
   }
 }
 
 class FakeReadMicroBatchOnly
     extends DataSourceRegister
-    with TableProvider
+    with SimpleTableProvider
     with SessionConfigSupport {
   override def shortName(): String = "fake-read-microbatch-only"
 
@@ -115,7 +117,7 @@ class FakeReadMicroBatchOnly
 
 class FakeReadContinuousOnly
     extends DataSourceRegister
-    with TableProvider
+    with SimpleTableProvider
     with SessionConfigSupport {
   override def shortName(): String = "fake-read-continuous-only"
 
@@ -136,7 +138,7 @@ class FakeReadContinuousOnly
   }
 }
 
-class FakeReadBothModes extends DataSourceRegister with TableProvider {
+class FakeReadBothModes extends DataSourceRegister with SimpleTableProvider {
   override def shortName(): String = "fake-read-microbatch-continuous"
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
@@ -153,7 +155,7 @@ class FakeReadBothModes extends DataSourceRegister with TableProvider {
   }
 }
 
-class FakeReadNeitherMode extends DataSourceRegister with TableProvider {
+class FakeReadNeitherMode extends DataSourceRegister with SimpleTableProvider {
   override def shortName(): String = "fake-read-neither-mode"
 
   override def getTable(options: CaseInsensitiveStringMap): Table = {
@@ -167,7 +169,7 @@ class FakeReadNeitherMode extends DataSourceRegister with TableProvider {
 
 class FakeWriteOnly
     extends DataSourceRegister
-    with TableProvider
+    with SimpleTableProvider
     with SessionConfigSupport {
   override def shortName(): String = "fake-write-microbatch-continuous"
 
@@ -182,7 +184,7 @@ class FakeWriteOnly
   }
 }
 
-class FakeNoWrite extends DataSourceRegister with TableProvider {
+class FakeNoWrite extends DataSourceRegister with SimpleTableProvider {
   override def shortName(): String = "fake-write-neither-mode"
   override def getTable(options: CaseInsensitiveStringMap): Table = {
     new Table {
@@ -200,7 +202,7 @@ class FakeSink extends Sink {
 }
 
 class FakeWriteSupportProviderV1Fallback extends DataSourceRegister
-  with TableProvider with StreamSinkProvider {
+  with SimpleTableProvider with StreamSinkProvider {
 
   override def createSink(
       sqlContext: SQLContext,
@@ -377,10 +379,10 @@ class StreamingDataSourceV2Suite extends StreamTest {
   for ((read, write, trigger) <- cases) {
     testQuietly(s"stream with read format $read, write format $write, trigger $trigger") {
       val sourceTable = DataSource.lookupDataSource(read, spark.sqlContext.conf).getConstructor()
-        .newInstance().asInstanceOf[TableProvider].getTable(CaseInsensitiveStringMap.empty())
+        .newInstance().asInstanceOf[SimpleTableProvider].getTable(CaseInsensitiveStringMap.empty())
 
       val sinkTable = DataSource.lookupDataSource(write, spark.sqlContext.conf).getConstructor()
-        .newInstance().asInstanceOf[TableProvider].getTable(CaseInsensitiveStringMap.empty())
+        .newInstance().asInstanceOf[SimpleTableProvider].getTable(CaseInsensitiveStringMap.empty())
 
       import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
       trigger match {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index c630f1497a17e..f9fc540c2ab80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -92,7 +92,7 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider {
         spark.internalCreateDataFrame(spark.sparkContext.emptyRDD, schema, isStreaming = true)
       }
 
-      override def stop() {}
+      override def stop(): Unit = {}
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
new file mode 100644
index 0000000000000..de43e470e8e13
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.ui
+
+import java.util.{Locale, UUID}
+import javax.servlet.http.HttpServletRequest
+
+import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
+import org.scalatest.BeforeAndAfter
+import scala.xml.Node
+
+import org.apache.spark.sql.streaming.StreamingQueryProgress
+import org.apache.spark.sql.test.SharedSparkSession
+
+class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter {
+
+  test("correctly display streaming query page") {
+    val id = UUID.randomUUID()
+    val request = mock(classOf[HttpServletRequest])
+    val tab = mock(classOf[StreamingQueryTab], RETURNS_SMART_NULLS)
+    val statusListener = mock(classOf[StreamingQueryStatusListener], RETURNS_SMART_NULLS)
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+    when(tab.statusListener).thenReturn(statusListener)
+
+    val streamQuery = createStreamQueryUIData(id)
+    when(statusListener.allQueryStatus).thenReturn(Seq(streamQuery))
+    var html = renderStreamingQueryPage(request, tab)
+      .toString().toLowerCase(Locale.ROOT)
+    assert(html.contains("active streaming queries (1)"))
+    assert(html.contains("completed streaming queries (0)"))
+
+    when(streamQuery.isActive).thenReturn(false)
+    when(streamQuery.exception).thenReturn(None)
+    html = renderStreamingQueryPage(request, tab)
+      .toString().toLowerCase(Locale.ROOT)
+    assert(html.contains("active streaming queries (0)"))
+    assert(html.contains("completed streaming queries (1)"))
+    assert(html.contains("finished"))
+
+    when(streamQuery.isActive).thenReturn(false)
+    when(streamQuery.exception).thenReturn(Option("exception in query"))
+    html = renderStreamingQueryPage(request, tab)
+      .toString().toLowerCase(Locale.ROOT)
+    assert(html.contains("active streaming queries (0)"))
+    assert(html.contains("completed streaming queries (1)"))
+    assert(html.contains("failed"))
+    assert(html.contains("exception in query"))
+  }
+
+  test("correctly display streaming query statistics page") {
+    val id = UUID.randomUUID()
+    val request = mock(classOf[HttpServletRequest])
+    val tab = mock(classOf[StreamingQueryTab], RETURNS_SMART_NULLS)
+    val statusListener = mock(classOf[StreamingQueryStatusListener], RETURNS_SMART_NULLS)
+    when(request.getParameter("id")).thenReturn(id.toString)
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+    when(tab.statusListener).thenReturn(statusListener)
+
+    val streamQuery = createStreamQueryUIData(id)
+    when(statusListener.allQueryStatus).thenReturn(Seq(streamQuery))
+    val html = renderStreamingQueryStatisticsPage(request, tab)
+      .toString().toLowerCase(Locale.ROOT)
+
+    assert(html.contains("<strong>name: </strong>query<"))
+    assert(html.contains("""{"x": 1001898000100, "y": 10.0}"""))
+    assert(html.contains("""{"x": 1001898000100, "y": 12.0}"""))
+    assert(html.contains("(<strong>3</strong> completed batches)"))
+  }
+
+  private def createStreamQueryUIData(id: UUID): StreamingQueryUIData = {
+    val progress = mock(classOf[StreamingQueryProgress], RETURNS_SMART_NULLS)
+    when(progress.timestamp).thenReturn("2001-10-01T01:00:00.100Z")
+    when(progress.inputRowsPerSecond).thenReturn(10.0)
+    when(progress.processedRowsPerSecond).thenReturn(12.0)
+    when(progress.batchId).thenReturn(2)
+    when(progress.prettyJson).thenReturn("""{"a":1}""")
+
+    val streamQuery = mock(classOf[StreamingQueryUIData], RETURNS_SMART_NULLS)
+    when(streamQuery.isActive).thenReturn(true)
+    when(streamQuery.name).thenReturn("query")
+    when(streamQuery.id).thenReturn(id)
+    when(streamQuery.runId).thenReturn(id)
+    when(streamQuery.submissionTime).thenReturn(1L)
+    when(streamQuery.lastProgress).thenReturn(progress)
+    when(streamQuery.recentProgress).thenReturn(Array(progress))
+    when(streamQuery.exception).thenReturn(None)
+
+    streamQuery
+  }
+
+  /**
+   * Render a stage page started with the given conf and return the HTML.
+   * This also runs a dummy execution page to populate the page with useful content.
+   */
+  private def renderStreamingQueryPage(
+      request: HttpServletRequest,
+      tab: StreamingQueryTab): Seq[Node] = {
+    val page = new StreamingQueryPage(tab)
+    page.render(request)
+  }
+
+  private def renderStreamingQueryStatisticsPage(
+      request: HttpServletRequest,
+      tab: StreamingQueryTab): Seq[Node] = {
+    val page = new StreamingQueryStatisticsPage(tab)
+    page.render(request)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
new file mode 100644
index 0000000000000..adbb501f9842e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.ui
+
+import java.util.UUID
+
+import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
+
+import org.apache.spark.sql.streaming.{StreamingQueryListener, StreamingQueryProgress, StreamTest}
+import org.apache.spark.sql.streaming
+
+class StreamingQueryStatusListenerSuite extends StreamTest {
+
+  test("onQueryStarted, onQueryProgress, onQueryTerminated") {
+    val listener = new StreamingQueryStatusListener(spark.sparkContext.conf)
+
+    // hanlde query started event
+    val id = UUID.randomUUID()
+    val runId = UUID.randomUUID()
+    val startEvent = new StreamingQueryListener.QueryStartedEvent(id, runId, "test", 1L)
+    listener.onQueryStarted(startEvent)
+
+    // result checking
+    assert(listener.activeQueryStatus.size() == 1)
+    assert(listener.activeQueryStatus.get(runId).name == "test")
+
+    // handle query progress event
+    val progress = mock(classOf[StreamingQueryProgress], RETURNS_SMART_NULLS)
+    when(progress.id).thenReturn(id)
+    when(progress.runId).thenReturn(runId)
+    when(progress.timestamp).thenReturn("2001-10-01T01:00:00.100Z")
+    when(progress.inputRowsPerSecond).thenReturn(10.0)
+    when(progress.processedRowsPerSecond).thenReturn(12.0)
+    when(progress.batchId).thenReturn(2)
+    when(progress.prettyJson).thenReturn("""{"a":1}""")
+    val processEvent = new streaming.StreamingQueryListener.QueryProgressEvent(progress)
+    listener.onQueryProgress(processEvent)
+
+    // result checking
+    val activeQuery = listener.activeQueryStatus.get(runId)
+    assert(activeQuery.isActive)
+    assert(activeQuery.recentProgress.length == 1)
+    assert(activeQuery.lastProgress.id == id)
+    assert(activeQuery.lastProgress.runId == runId)
+    assert(activeQuery.lastProgress.timestamp == "2001-10-01T01:00:00.100Z")
+    assert(activeQuery.lastProgress.inputRowsPerSecond == 10.0)
+    assert(activeQuery.lastProgress.processedRowsPerSecond == 12.0)
+    assert(activeQuery.lastProgress.batchId == 2)
+    assert(activeQuery.lastProgress.prettyJson == """{"a":1}""")
+
+    // handle terminate event
+    val terminateEvent = new StreamingQueryListener.QueryTerminatedEvent(id, runId, None)
+    listener.onQueryTerminated(terminateEvent)
+
+    assert(!listener.inactiveQueryStatus.head.isActive)
+    assert(listener.inactiveQueryStatus.head.runId == runId)
+    assert(listener.inactiveQueryStatus.head.id == id)
+  }
+
+  test("same query start multiple times") {
+    val listener = new StreamingQueryStatusListener(spark.sparkContext.conf)
+
+    // handle first time start
+    val id = UUID.randomUUID()
+    val runId0 = UUID.randomUUID()
+    val startEvent0 = new StreamingQueryListener.QueryStartedEvent(id, runId0, "test", 1L)
+    listener.onQueryStarted(startEvent0)
+
+    // handle terminate event
+    val terminateEvent0 = new StreamingQueryListener.QueryTerminatedEvent(id, runId0, None)
+    listener.onQueryTerminated(terminateEvent0)
+
+    // handle second time start
+    val runId1 = UUID.randomUUID()
+    val startEvent1 = new StreamingQueryListener.QueryStartedEvent(id, runId1, "test", 1L)
+    listener.onQueryStarted(startEvent1)
+
+    // result checking
+    assert(listener.activeQueryStatus.size() == 1)
+    assert(listener.inactiveQueryStatus.length == 1)
+    assert(listener.activeQueryStatus.containsKey(runId1))
+    assert(listener.activeQueryStatus.get(runId1).id == id)
+    assert(listener.inactiveQueryStatus.head.runId == runId0)
+    assert(listener.inactiveQueryStatus.head.id == id)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UIUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UIUtilsSuite.scala
new file mode 100644
index 0000000000000..46f2eadc05835
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UIUtilsSuite.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.ui
+
+import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
+import org.scalatest.Matchers
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.streaming.StreamingQueryProgress
+
+class UIUtilsSuite extends SparkFunSuite with Matchers {
+  test("streaming query started with no batch completed") {
+    val query = mock(classOf[StreamingQueryUIData], RETURNS_SMART_NULLS)
+    when(query.lastProgress).thenReturn(null)
+
+    assert(0 == UIUtils.withNoProgress(query, 1, 0))
+  }
+
+  test("streaming query started with at least one batch completed") {
+    val query = mock(classOf[StreamingQueryUIData], RETURNS_SMART_NULLS)
+    val progress = mock(classOf[StreamingQueryProgress], RETURNS_SMART_NULLS)
+    when(query.lastProgress).thenReturn(progress)
+
+    assert(1 == UIUtils.withNoProgress(query, 1, 0))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockOnStopSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockOnStopSource.scala
new file mode 100644
index 0000000000000..c594a8523d15e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockOnStopSource.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.util
+
+import java.util
+import java.util.concurrent.CountDownLatch
+
+import scala.collection.JavaConverters._
+
+import org.apache.zookeeper.KeeperException.UnimplementedException
+
+import org.apache.spark.sql.{DataFrame, Row, SparkSession, SQLContext}
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.TableCapability.CONTINUOUS_READ
+import org.apache.spark.sql.connector.read.{streaming, InputPartition, Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, PartitionOffset}
+import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Source}
+import org.apache.spark.sql.internal.connector.SimpleTableProvider
+import org.apache.spark.sql.sources.StreamSourceProvider
+import org.apache.spark.sql.types.{LongType, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+/** The V1 and V2 provider of a streaming source, which blocks indefinitely on the call of stop() */
+object BlockOnStopSourceProvider {
+  private var _latch: CountDownLatch = _
+  val schema: StructType = new StructType().add("id", LongType)
+
+  /** Set the latch that we will use to block the streaming query thread. */
+  def enableBlocking(): Unit = {
+    if (_latch == null || _latch.getCount == 0) {
+      _latch = new CountDownLatch(1)
+    }
+  }
+
+  def disableBlocking(): Unit = {
+    if (_latch != null) {
+      _latch.countDown()
+      _latch = null
+    }
+  }
+}
+
+class BlockOnStopSourceProvider extends StreamSourceProvider with SimpleTableProvider {
+  override def getTable(options: CaseInsensitiveStringMap): Table = {
+    new BlockOnStopSourceTable(BlockOnStopSourceProvider._latch)
+  }
+
+  override def sourceSchema(
+      sqlContext: SQLContext,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): (String, StructType) = {
+    "blockingSource" -> BlockOnStopSourceProvider.schema
+  }
+
+  override def createSource(
+      sqlContext: SQLContext,
+      metadataPath: String,
+      schema: Option[StructType],
+      providerName: String,
+      parameters: Map[String, String]): Source = {
+    new BlockOnStopSource(sqlContext.sparkSession, BlockOnStopSourceProvider._latch)
+  }
+}
+
+/** A V1 Streaming Source which blocks on stop(). It does not produce any data. */
+class BlockOnStopSource(spark: SparkSession, latch: CountDownLatch) extends Source {
+  // Blocks until latch countdowns
+  override def stop(): Unit = latch.await()
+
+  // Boiler-plate
+  override val schema: StructType = BlockOnStopSourceProvider.schema
+  override def getOffset: Option[Offset] = Some(LongOffset(0))
+  override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
+    spark.createDataFrame(spark.sparkContext.emptyRDD[Row], schema)
+  }
+}
+
+/** A V2 Table, which can create a blocking streaming source for ContinuousExecution. */
+class BlockOnStopSourceTable(latch: CountDownLatch) extends Table with SupportsRead {
+  override def schema(): StructType = BlockOnStopSourceProvider.schema
+
+  override def name(): String = "blockingSource"
+
+  override def capabilities(): util.Set[TableCapability] = Set(CONTINUOUS_READ).asJava
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    new ScanBuilder {
+      override def build(): Scan = new Scan {
+        override def readSchema(): StructType = schema()
+
+        override def toContinuousStream(checkpointLocation: String): ContinuousStream = {
+          new BlockOnStopContinuousStream(latch)
+        }
+      }
+    }
+  }
+}
+
+/**
+ * A V2 Streaming Source which blocks on stop(). It does not produce any data. We use this for
+ * testing stopping in ContinuousExecution.
+ */
+class BlockOnStopContinuousStream(latch: CountDownLatch) extends ContinuousStream {
+
+  // Blocks until latch countdowns
+  override def stop(): Unit = latch.await()
+
+  // Boiler-plate
+  override def planInputPartitions(start: streaming.Offset): Array[InputPartition] = Array.empty
+  override def mergeOffsets(offsets: Array[PartitionOffset]): streaming.Offset = LongOffset(0L)
+  override def deserializeOffset(json: String): streaming.Offset = LongOffset(0L)
+  override def initialOffset(): Offset = LongOffset(0)
+  override def commit(end: streaming.Offset): Unit = {}
+  override def createContinuousReaderFactory(): ContinuousPartitionReaderFactory = {
+    throw new UnimplementedException
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
index 67158fb99d13d..c1b29b5130e86 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
@@ -52,7 +52,7 @@ class BlockingSource extends StreamSourceProvider with StreamSinkProvider {
         import spark.implicits._
         Seq[Int]().toDS().toDF()
       }
-      override def stop() {}
+      override def stop(): Unit = {}
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index b98626a34cc29..fb939007697c2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -234,6 +234,21 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     assert(DataSourceUtils.decodePartitioningColumns(partColumns) === Seq("col1", "col2"))
   }
 
+  test ("SPARK-29537: throw exception when user defined a wrong base path") {
+    withTempPath { p =>
+      val path = new Path(p.toURI).toString
+      Seq((1, 1), (2, 2)).toDF("c1", "c2")
+        .write.partitionBy("c1").mode(SaveMode.Overwrite).parquet(path)
+      val wrongBasePath = new File(p, "unknown")
+      // basePath must be a directory
+      wrongBasePath.mkdir()
+      val msg = intercept[IllegalArgumentException] {
+        spark.read.option("basePath", wrongBasePath.getCanonicalPath).parquet(path)
+      }.getMessage
+      assert(msg === s"Wrong basePath ${wrongBasePath.getCanonicalPath} for the root path: $path")
+    }
+  }
+
   test("save mode") {
     spark.range(10).write
       .format("org.apache.spark.sql.test")
@@ -277,7 +292,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
         .format(classOf[NoopDataSource].getName)
         .mode(SaveMode.Append)
         .save()
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(plan.isInstanceOf[AppendData])
 
       // overwrite mode creates `OverwriteByExpression`
@@ -285,22 +300,24 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
         .format(classOf[NoopDataSource].getName)
         .mode(SaveMode.Overwrite)
         .save()
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(plan.isInstanceOf[OverwriteByExpression])
 
       // By default the save mode is `ErrorIfExists` for data source v2.
-      spark.range(10).write
-        .format(classOf[NoopDataSource].getName)
-        .save()
-      sparkContext.listenerBus.waitUntilEmpty(1000)
-      assert(plan.isInstanceOf[AppendData])
+      val e = intercept[AnalysisException] {
+        spark.range(10).write
+          .format(classOf[NoopDataSource].getName)
+          .save()
+      }
+      assert(e.getMessage.contains("ErrorIfExists"))
 
-      spark.range(10).write
-        .format(classOf[NoopDataSource].getName)
-        .mode("default")
-        .save()
-      sparkContext.listenerBus.waitUntilEmpty(1000)
-      assert(plan.isInstanceOf[AppendData])
+      val e2 = intercept[AnalysisException] {
+        spark.range(10).write
+          .format(classOf[NoopDataSource].getName)
+          .mode("default")
+          .save()
+      }
+      assert(e2.getMessage.contains("ErrorIfExists"))
     } finally {
       spark.listenerManager.unregister(listener)
     }
@@ -472,11 +489,10 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     // when users do not specify the schema
     checkAnswer(dfReader.load(), spark.range(1, 11).toDF())
 
-    // when users specify the schema
+    // when users specify a wrong schema
     val inputSchema = new StructType().add("s", IntegerType, nullable = false)
     val e = intercept[AnalysisException] { dfReader.schema(inputSchema).load() }
-    assert(e.getMessage.contains(
-      "org.apache.spark.sql.sources.SimpleScanSource does not allow user-specified schemas"))
+    assert(e.getMessage.contains("The user-specified schema doesn't match the actual schema"))
   }
 
   test("read a data source that does not extend RelationProvider") {
@@ -1058,7 +1074,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
           checkDatasetUnorderly(
             spark.read.parquet(dir.getCanonicalPath).as[(Long, Long)],
             0L -> 0L, 1L -> 1L, 2L -> 2L)
-          sparkContext.listenerBus.waitUntilEmpty(10000)
+          sparkContext.listenerBus.waitUntilEmpty()
           assert(jobDescriptions.asScala.toList.exists(
             _.contains("Listing leaf files and directories for 3 paths")))
         } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
index 615923fe02d6c..c51faaf10f5dd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
@@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, SparkSession, SQLContext, SQLImplicits}
+import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
  * A collection of sample data used in SQL tests.
@@ -168,6 +169,13 @@ private[sql] trait SQLTestData { self =>
     rdd
   }
 
+  protected lazy val calenderIntervalData: RDD[IntervalData] = {
+    val rdd = spark.sparkContext.parallelize(
+      IntervalData(new CalendarInterval(1, 1, 1)) :: Nil)
+    rdd.toDF().createOrReplaceTempView("calenderIntervalData")
+    rdd
+  }
+
   protected lazy val repeatedData: RDD[StringData] = {
     val rdd = spark.sparkContext.parallelize(List.fill(2)(StringData("test")))
     rdd.toDF().createOrReplaceTempView("repeatedData")
@@ -335,4 +343,5 @@ private[sql] object SQLTestData {
   case class ComplexData(m: Map[String, Int], s: TestData, a: Seq[Int], b: Boolean)
   case class CourseSales(course: String, year: Int, earnings: Double)
   case class TrainingSales(training: String, sales: CourseSales)
+  case class IntervalData(data: CalendarInterval)
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 115536da8949e..38893f846e5a4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -363,6 +363,19 @@ private[sql] trait SQLTestUtilsBase
     }
   }
 
+  /**
+   * Drops namespace `namespace` after calling `f`.
+   *
+   * Note that, if you switch current catalog/namespace in `f`, you should switch it back manually.
+   */
+  protected def withNamespace(namespaces: String*)(f: => Unit): Unit = {
+    Utils.tryWithSafeFinally(f) {
+      namespaces.foreach { name =>
+        spark.sql(s"DROP NAMESPACE IF EXISTS $name CASCADE")
+      }
+    }
+  }
+
   /**
    * Enables Locale `language` before executing `f`, then switches back to the default locale of JVM
    * after `f` returns.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
index a8e1a44f3d5d2..6881812286b24 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
@@ -20,15 +20,19 @@ package org.apache.spark.sql.util
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark._
-import org.apache.spark.sql.{functions, AnalysisException, QueryTest}
+import org.apache.spark.sql.{functions, AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, InsertIntoTable, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, InsertIntoStatement, LogicalPlan, Project}
 import org.apache.spark.sql.execution.{QueryExecution, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.{CreateTable, InsertIntoHadoopFsRelationCommand}
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
+class DataFrameCallbackSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   import testImplicits._
   import functions._
 
@@ -48,7 +52,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
     df.select("i").collect()
     df.filter($"i" > 0).count()
 
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     assert(metrics.length == 2)
 
     assert(metrics(0)._1 == "collect")
@@ -79,7 +83,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
 
     val e = intercept[SparkException](df.select(errorUdf($"i")).collect())
 
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     assert(metrics.length == 1)
     assert(metrics(0)._1 == "collect")
     assert(metrics(0)._2.analyzed.isInstanceOf[Project])
@@ -95,7 +99,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
       override def onFailure(funcName: String, qe: QueryExecution, error: Throwable): Unit = {}
 
       override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
-        val metric = qe.executedPlan match {
+        val metric = stripAQEPlan(qe.executedPlan) match {
           case w: WholeStageCodegenExec => w.child.longMetric("numOutputRows")
           case other => other.longMetric("numOutputRows")
         }
@@ -109,12 +113,12 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
     df.collect()
     // Wait for the first `collect` to be caught by our listener. Otherwise the next `collect` will
     // reset the plan metrics.
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     df.collect()
 
     Seq(1 -> "a", 2 -> "a").toDF("i", "j").groupBy("i").count().collect()
 
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     assert(metrics.length == 3)
     assert(metrics(0) === 1)
     assert(metrics(1) === 1)
@@ -162,7 +166,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
 
     // For this simple case, the peakExecutionMemory of a stage should be the data size of the
     // aggregate operator, as we only have one memory consuming operator per stage.
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     assert(metrics.length == 2)
     assert(metrics(0) == topAggDataSize)
     assert(metrics(1) == bottomAggDataSize)
@@ -186,7 +190,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
 
     withTempPath { path =>
       spark.range(10).write.format("json").save(path.getCanonicalPath)
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(commands.length == 1)
       assert(commands.head._1 == "save")
       assert(commands.head._2.isInstanceOf[InsertIntoHadoopFsRelationCommand])
@@ -197,18 +201,18 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
     withTable("tab") {
       sql("CREATE TABLE tab(i long) using parquet") // adds commands(1) via onSuccess
       spark.range(10).write.insertInto("tab")
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(commands.length == 3)
       assert(commands(2)._1 == "insertInto")
-      assert(commands(2)._2.isInstanceOf[InsertIntoTable])
-      assert(commands(2)._2.asInstanceOf[InsertIntoTable].table
+      assert(commands(2)._2.isInstanceOf[InsertIntoStatement])
+      assert(commands(2)._2.asInstanceOf[InsertIntoStatement].table
         .asInstanceOf[UnresolvedRelation].multipartIdentifier == Seq("tab"))
     }
     // exiting withTable adds commands(3) via onSuccess (drops tab)
 
     withTable("tab") {
       spark.range(10).select($"id", $"id" % 5 as "p").write.partitionBy("p").saveAsTable("tab")
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(commands.length == 5)
       assert(commands(4)._1 == "saveAsTable")
       assert(commands(4)._2.isInstanceOf[CreateTable])
@@ -220,10 +224,58 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
       val e = intercept[AnalysisException] {
         spark.range(10).select($"id", $"id").write.insertInto("tab")
       }
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(errors.length == 1)
       assert(errors.head._1 == "insertInto")
       assert(errors.head._2 == e)
     }
   }
+
+  test("get observable metrics by callback") {
+    val metricMaps = ArrayBuffer.empty[Map[String, Row]]
+    val listener = new QueryExecutionListener {
+      override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
+        metricMaps += qe.observedMetrics
+      }
+
+      override def onFailure(funcName: String, qe: QueryExecution, exception: Throwable): Unit = {
+        // No-op
+      }
+    }
+    spark.listenerManager.register(listener)
+    try {
+      val df = spark.range(100)
+        .observe(
+          name = "my_event",
+          min($"id").as("min_val"),
+          max($"id").as("max_val"),
+          sum($"id").as("sum_val"),
+          count(when($"id" % 2 === 0, 1)).as("num_even"))
+        .observe(
+          name = "other_event",
+          avg($"id").cast("int").as("avg_val"))
+
+      def checkMetrics(metrics: Map[String, Row]): Unit = {
+        assert(metrics.size === 2)
+        assert(metrics("my_event") === Row(0L, 99L, 4950L, 50L))
+        assert(metrics("other_event") === Row(49))
+      }
+
+      // First run
+      df.collect()
+      sparkContext.listenerBus.waitUntilEmpty()
+      assert(metricMaps.size === 1)
+      checkMetrics(metricMaps.head)
+      metricMaps.clear()
+
+      // Second run should produce the same result as the first run.
+      df.collect()
+      sparkContext.listenerBus.waitUntilEmpty()
+      assert(metricMaps.size === 1)
+      checkMetrics(metricMaps.head)
+
+    } finally {
+      spark.listenerManager.unregister(listener)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala
index 79819e7655414..2fd6cb220ea3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala
@@ -34,13 +34,13 @@ class ExecutionListenerManagerSuite extends SparkFunSuite with LocalSparkSession
     spark = SparkSession.builder().master("local").appName("test").config(conf).getOrCreate()
 
     spark.sql("select 1").collect()
-    spark.sparkContext.listenerBus.waitUntilEmpty(1000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
     assert(INSTANCE_COUNT.get() === 1)
     assert(CALLBACK_COUNT.get() === 1)
 
     val cloned = spark.cloneSession()
     cloned.sql("select 1").collect()
-    spark.sparkContext.listenerBus.waitUntilEmpty(1000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
     assert(INSTANCE_COUNT.get() === 1)
     assert(CALLBACK_COUNT.get() === 2)
   }
diff --git a/sql/core/v1.2.1/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
similarity index 100%
rename from sql/core/v1.2.1/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
rename to sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
diff --git a/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
similarity index 100%
rename from sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
rename to sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
diff --git a/sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
similarity index 100%
rename from sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
rename to sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
diff --git a/sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
similarity index 51%
rename from sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
rename to sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index b1a907f9cba27..ee5162bced8ac 100644
--- a/sql/core/v1.2.1/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -25,13 +25,14 @@ import scala.collection.JavaConverters._
 
 import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, Column, DataFrame}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
@@ -44,6 +45,11 @@ import org.apache.spark.sql.types._
  */
 class OrcFilterSuite extends OrcTest with SharedSparkSession {
 
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "")
+
   protected def checkFilterPredicate(
       df: DataFrame,
       predicate: Predicate,
@@ -54,15 +60,11 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       .where(Column(predicate))
 
     query.queryExecution.optimizedPlan match {
-      case PhysicalOperation(_, filters,
-        DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
+      case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _)) =>
         assert(filters.nonEmpty, "No filter is analyzed from the given query")
-        val scanBuilder = orcTable.newScanBuilder(options)
-        scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
-        val pushedFilters = scanBuilder.pushedFilters()
-        assert(pushedFilters.nonEmpty, "No filter is pushed down")
-        val maybeFilter = OrcFilters.createFilter(query.schema, pushedFilters)
-        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pushedFilters")
+        assert(o.pushedFilters.nonEmpty, "No filter is pushed down")
+        val maybeFilter = OrcFilters.createFilter(query.schema, o.pushedFilters)
+        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for ${o.pushedFilters}")
         checker(maybeFilter.get)
 
       case _ =>
@@ -91,154 +93,154 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
 
   test("filter pushdown - integer") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - long") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - float") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - double") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - string") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === "1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < "2", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= "4", PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal("1") === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal("1") <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal("2") > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal("3") < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("1") >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("4") <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - boolean") {
     withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === true, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < true, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= false, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(false) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(false) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(false) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(true) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - decimal") {
     withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
 
-      checkFilterPredicate('_1 === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate($"_1" === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
 
-      checkFilterPredicate('_1 < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
 
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) === '_1, PredicateLeaf.Operator.EQUALS)
+        Literal(BigDecimal.valueOf(1)) === $"_1", PredicateLeaf.Operator.EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+        Literal(BigDecimal.valueOf(1)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(2)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+        Literal(BigDecimal.valueOf(2)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(3)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+        Literal(BigDecimal.valueOf(3)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+        Literal(BigDecimal.valueOf(1)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(4)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+        Literal(BigDecimal.valueOf(4)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
@@ -249,46 +251,47 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       new Timestamp(milliseconds)
     }
     withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === timestamps(0), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(timestamps(0)) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(timestamps(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(timestamps(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(timestamps(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(timestamps(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(timestamps(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(timestamps(0)) <=> $"_1",
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(timestamps(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - combinations with logical operators") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
       checkFilterPredicate(
-        '_1.isNotNull,
+        $"_1".isNotNull,
         "leaf-0 = (IS_NULL _1), expr = (not leaf-0)"
       )
       checkFilterPredicate(
-        '_1 =!= 1,
+        $"_1" =!= 1,
         "leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))"
       )
       checkFilterPredicate(
-        !('_1 < 4),
+        !($"_1" < 4),
         "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))"
       )
       checkFilterPredicate(
-        '_1 < 2 || '_1 > 3,
+        $"_1" < 2 || $"_1" > 3,
         "leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " +
           "expr = (or leaf-0 (not leaf-1))"
       )
       checkFilterPredicate(
-        '_1 < 2 && '_1 > 3,
+        $"_1" < 2 && $"_1" > 3,
         "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), leaf-2 = (LESS_THAN_EQUALS _1 3), " +
           "expr = (and (not leaf-0) leaf-1 (not leaf-2))"
       )
@@ -300,22 +303,22 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       Date.valueOf(day)
     }
     withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === dates(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < dates(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= dates(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(dates(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(dates(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(dates(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(dates(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
@@ -325,15 +328,15 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
     }
     // ArrayType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
-      checkNoFilterPredicate('_1.isNull, noneSupported = true)
+      checkNoFilterPredicate($"_1".isNull, noneSupported = true)
     }
     // BinaryType
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
-      checkNoFilterPredicate('_1 <=> 1.b, noneSupported = true)
+      checkNoFilterPredicate($"_1" <=> 1.b, noneSupported = true)
     }
     // MapType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
-      checkNoFilterPredicate('_1.isNotNull, noneSupported = true)
+      checkNoFilterPredicate($"_1".isNotNull, noneSupported = true)
     }
   }
 
diff --git a/sql/core/v2.3.5/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
similarity index 100%
rename from sql/core/v2.3.5/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
rename to sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
diff --git a/sql/core/v2.3.5/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
similarity index 100%
rename from sql/core/v2.3.5/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
rename to sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
diff --git a/sql/core/v2.3.5/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
similarity index 100%
rename from sql/core/v2.3.5/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
rename to sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
diff --git a/sql/core/v2.3.5/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
similarity index 51%
rename from sql/core/v2.3.5/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
rename to sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index 65b0537a0a8c1..1baa69e82bb18 100644
--- a/sql/core/v2.3.5/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -25,13 +25,15 @@ import scala.collection.JavaConverters._
 
 import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument}
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, Column, DataFrame}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.v2.orc.OrcTable
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation}
+import org.apache.spark.sql.execution.datasources.v2.orc.{OrcScan, OrcTable}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
@@ -44,6 +46,11 @@ import org.apache.spark.sql.types._
  */
 class OrcFilterSuite extends OrcTest with SharedSparkSession {
 
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "")
+
   protected def checkFilterPredicate(
       df: DataFrame,
       predicate: Predicate,
@@ -54,15 +61,11 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       .where(Column(predicate))
 
     query.queryExecution.optimizedPlan match {
-      case PhysicalOperation(_, filters,
-        DataSourceV2Relation(orcTable: OrcTable, _, options)) =>
+      case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _)) =>
         assert(filters.nonEmpty, "No filter is analyzed from the given query")
-        val scanBuilder = orcTable.newScanBuilder(options)
-        scanBuilder.pushFilters(filters.flatMap(DataSourceStrategy.translateFilter).toArray)
-        val pushedFilters = scanBuilder.pushedFilters()
-        assert(pushedFilters.nonEmpty, "No filter is pushed down")
-        val maybeFilter = OrcFilters.createFilter(query.schema, pushedFilters)
-        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pushedFilters")
+        assert(o.pushedFilters.nonEmpty, "No filter is pushed down")
+        val maybeFilter = OrcFilters.createFilter(query.schema, o.pushedFilters)
+        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for ${o.pushedFilters}")
         checker(maybeFilter.get)
 
       case _ =>
@@ -91,154 +94,154 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
 
   test("filter pushdown - integer") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - long") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - float") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - double") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - string") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === "1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < "2", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= "4", PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal("1") === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal("1") <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal("2") > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal("3") < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("1") >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("4") <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - boolean") {
     withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === true, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < true, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= false, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(false) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(false) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(false) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(true) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - decimal") {
     withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
 
-      checkFilterPredicate('_1 === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate($"_1" === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
 
-      checkFilterPredicate('_1 < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
 
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) === '_1, PredicateLeaf.Operator.EQUALS)
+        Literal(BigDecimal.valueOf(1)) === $"_1", PredicateLeaf.Operator.EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+        Literal(BigDecimal.valueOf(1)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(2)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+        Literal(BigDecimal.valueOf(2)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(3)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+        Literal(BigDecimal.valueOf(3)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+        Literal(BigDecimal.valueOf(1)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(4)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+        Literal(BigDecimal.valueOf(4)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
@@ -249,46 +252,47 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       new Timestamp(milliseconds)
     }
     withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === timestamps(0), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(timestamps(0)) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(timestamps(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(timestamps(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(timestamps(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(timestamps(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(timestamps(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(
+        Literal(timestamps(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(timestamps(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - combinations with logical operators") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
       checkFilterPredicate(
-        '_1.isNotNull,
+        $"_1".isNotNull,
         "leaf-0 = (IS_NULL _1), expr = (not leaf-0)"
       )
       checkFilterPredicate(
-        '_1 =!= 1,
+        $"_1" =!= 1,
         "leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))"
       )
       checkFilterPredicate(
-        !('_1 < 4),
+        !($"_1" < 4),
         "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))"
       )
       checkFilterPredicate(
-        '_1 < 2 || '_1 > 3,
+        $"_1" < 2 || $"_1" > 3,
         "leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " +
           "expr = (or leaf-0 (not leaf-1))"
       )
       checkFilterPredicate(
-        '_1 < 2 && '_1 > 3,
+        $"_1" < 2 && $"_1" > 3,
         "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), leaf-2 = (LESS_THAN_EQUALS _1 3), " +
           "expr = (and (not leaf-0) leaf-1 (not leaf-2))"
       )
@@ -300,22 +304,22 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       Date.valueOf(day)
     }
     withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === dates(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < dates(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= dates(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(dates(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(dates(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(dates(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(dates(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
@@ -325,15 +329,15 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
     }
     // ArrayType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
-      checkNoFilterPredicate('_1.isNull, noneSupported = true)
+      checkNoFilterPredicate($"_1".isNull, noneSupported = true)
     }
     // BinaryType
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
-      checkNoFilterPredicate('_1 <=> 1.b, noneSupported = true)
+      checkNoFilterPredicate($"_1" <=> 1.b, noneSupported = true)
     }
     // MapType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
-      checkNoFilterPredicate('_1.isNotNull, noneSupported = true)
+      checkNoFilterPredicate($"_1".isNotNull, noneSupported = true)
     }
   }
 
diff --git a/sql/create-docs.sh b/sql/create-docs.sh
index 4353708d22f7b..44aa877332fd5 100755
--- a/sql/create-docs.sh
+++ b/sql/create-docs.sh
@@ -17,7 +17,7 @@
 # limitations under the License.
 #
 
-# Script to create SQL API docs. This requires `mkdocs` and to build
+# Script to create SQL API and config docs. This requires `mkdocs` and to build
 # Spark first. After running this script the html docs can be found in
 # $SPARK_HOME/sql/site
 
@@ -39,14 +39,16 @@ fi
 
 pushd "$FWDIR" > /dev/null
 
-# Now create the markdown file
 rm -fr docs
 mkdir docs
-echo "Generating markdown files for SQL documentation."
-"$SPARK_HOME/bin/spark-submit" gen-sql-markdown.py
 
-# Now create the HTML files
-echo "Generating HTML files for SQL documentation."
+echo "Generating SQL API Markdown files."
+"$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py
+
+echo "Generating SQL configuration table HTML file."
+"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py
+
+echo "Generating HTML files for SQL API documentation."
 mkdocs build --clean
 rm -fr docs
 
diff --git a/sql/gen-sql-markdown.py b/sql/gen-sql-api-docs.py
similarity index 96%
rename from sql/gen-sql-markdown.py
rename to sql/gen-sql-api-docs.py
index e0529f8310613..4feee7ad52570 100644
--- a/sql/gen-sql-markdown.py
+++ b/sql/gen-sql-api-docs.py
@@ -15,10 +15,11 @@
 # limitations under the License.
 #
 
-import sys
 import os
 from collections import namedtuple
 
+from pyspark.java_gateway import launch_gateway
+
 ExpressionInfo = namedtuple(
     "ExpressionInfo", "className name usage arguments examples note since deprecated")
 
@@ -219,8 +220,7 @@ def generate_sql_markdown(jvm, path):
 
 
 if __name__ == "__main__":
-    from pyspark.java_gateway import launch_gateway
-
     jvm = launch_gateway().jvm
-    markdown_file_path = "%s/docs/index.md" % os.path.dirname(sys.argv[0])
+    spark_root_dir = os.path.dirname(os.path.dirname(__file__))
+    markdown_file_path = os.path.join(spark_root_dir, "sql/docs/index.md")
     generate_sql_markdown(jvm, markdown_file_path)
diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py
new file mode 100644
index 0000000000000..04f5a850c9980
--- /dev/null
+++ b/sql/gen-sql-config-docs.py
@@ -0,0 +1,117 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import re
+from collections import namedtuple
+from textwrap import dedent
+
+# To avoid adding a new direct dependency, we import markdown from within mkdocs.
+from mkdocs.structure.pages import markdown
+from pyspark.java_gateway import launch_gateway
+
+SQLConfEntry = namedtuple(
+    "SQLConfEntry", ["name", "default", "description"])
+
+
+def get_public_sql_configs(jvm):
+    sql_configs = [
+        SQLConfEntry(
+            name=_sql_config._1(),
+            default=_sql_config._2(),
+            description=_sql_config._3(),
+        )
+        for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
+    ]
+    return sql_configs
+
+
+def generate_sql_configs_table(sql_configs, path):
+    """
+    Generates an HTML table at `path` that lists all public SQL
+    configuration options.
+
+    The table will look something like this:
+
+    ```html
+    <table class="table">
+    <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+
+    <tr>
+        <td><code>spark.sql.adaptive.enabled</code></td>
+        <td>false</td>
+        <td><p>When true, enable adaptive query execution.</p></td>
+    </tr>
+
+    ...
+
+    </table>
+    ```
+    """
+    value_reference_pattern = re.compile(r"^<value of (\S*)>$")
+
+    with open(path, 'w') as f:
+        f.write(dedent(
+            """
+            <table class="table">
+            <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+            """
+        ))
+        for config in sorted(sql_configs, key=lambda x: x.name):
+            if config.default == "<undefined>":
+                default = "(none)"
+            elif config.default.startswith("<value of "):
+                referenced_config_name = value_reference_pattern.match(config.default).group(1)
+                default = "(value of <code>{}</code>)".format(referenced_config_name)
+            else:
+                default = config.default
+
+            if default.startswith("<"):
+                raise Exception(
+                    "Unhandled reference in SQL config docs. Config '{name}' "
+                    "has default '{default}' that looks like an HTML tag."
+                    .format(
+                        name=config.name,
+                        default=config.default,
+                    )
+                )
+
+            f.write(dedent(
+                """
+                <tr>
+                    <td><code>{name}</code></td>
+                    <td>{default}</td>
+                    <td>{description}</td>
+                </tr>
+                """
+                .format(
+                    name=config.name,
+                    default=default,
+                    description=markdown.markdown(config.description),
+                )
+            ))
+        f.write("</table>\n")
+
+
+if __name__ == "__main__":
+    jvm = launch_gateway().jvm
+    sql_configs = get_public_sql_configs(jvm)
+
+    spark_root_dir = os.path.dirname(os.path.dirname(__file__))
+    sql_configs_table_path = os.path.join(spark_root_dir, "docs/sql-configs.html")
+
+    generate_sql_configs_table(sql_configs, path=sql_configs_table_path)
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 5b1352adddd89..75c7f77942396 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -77,15 +77,6 @@
       <groupId>${hive.group}</groupId>
       <artifactId>hive-beeline</artifactId>
     </dependency>
-    <!-- Explicit listing hive-contrib and hive-hcatalog-core. Otherwise the maven test fails. -->
-    <dependency>
-      <groupId>${hive.group}</groupId>
-      <artifactId>hive-contrib</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${hive.group}.hcatalog</groupId>
-      <artifactId>hive-hcatalog-core</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
@@ -129,7 +120,11 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
-
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>net.sf.jpam</groupId>
       <artifactId>jpam</artifactId>
diff --git a/sql/hive-thriftserver/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin b/sql/hive-thriftserver/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
new file mode 100644
index 0000000000000..96d990372ee4c
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
@@ -0,0 +1 @@
+org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2HistoryServerPlugin
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
index 36d4ac095e10c..f15193b0dc3cc 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -20,9 +20,6 @@ package org.apache.spark.sql.hive.thriftserver
 import java.util.Locale
 import java.util.concurrent.atomic.AtomicBoolean
 
-import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hive.service.cli.thrift.{ThriftBinaryCLIService, ThriftHttpCLIService}
@@ -32,12 +29,11 @@ import org.apache.spark.SparkContext
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.UI.UI_ENABLED
-import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd, SparkListenerJobStart}
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
-import org.apache.spark.sql.hive.thriftserver.ui.ThriftServerTab
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.hive.thriftserver.ui._
+import org.apache.spark.status.ElementTrackingStore
 import org.apache.spark.util.{ShutdownHookManager, Utils}
 
 /**
@@ -47,6 +43,7 @@ import org.apache.spark.util.{ShutdownHookManager, Utils}
 object HiveThriftServer2 extends Logging {
   var uiTab: Option[ThriftServerTab] = None
   var listener: HiveThriftServer2Listener = _
+  var eventManager: HiveThriftServer2EventManager = _
 
   /**
    * :: DeveloperApi ::
@@ -62,17 +59,24 @@ object HiveThriftServer2 extends Logging {
 
     server.init(executionHive.conf)
     server.start()
-    listener = new HiveThriftServer2Listener(server, sqlContext.conf)
-    sqlContext.sparkContext.addSparkListener(listener)
-    uiTab = if (sqlContext.sparkContext.getConf.get(UI_ENABLED)) {
-      Some(new ThriftServerTab(sqlContext.sparkContext))
+    createListenerAndUI(server, sqlContext.sparkContext)
+    server
+  }
+
+  private def createListenerAndUI(server: HiveThriftServer2, sc: SparkContext): Unit = {
+    val kvStore = sc.statusStore.store.asInstanceOf[ElementTrackingStore]
+    eventManager = new HiveThriftServer2EventManager(sc)
+    listener = new HiveThriftServer2Listener(kvStore, sc.conf, Some(server))
+    sc.listenerBus.addToStatusQueue(listener)
+    uiTab = if (sc.getConf.get(UI_ENABLED)) {
+      Some(new ThriftServerTab(new HiveThriftServer2AppStatusStore(kvStore, Some(listener)),
+        ThriftServerTab.getSparkUI(sc)))
     } else {
       None
     }
-    server
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // If the arguments contains "-h" or "--help", print out the usage and exit.
     if (args.contains("-h") || args.contains("--help")) {
       HiveServer2.main(args)
@@ -101,13 +105,7 @@ object HiveThriftServer2 extends Logging {
       server.init(executionHive.conf)
       server.start()
       logInfo("HiveThriftServer2 started")
-      listener = new HiveThriftServer2Listener(server, SparkSQLEnv.sqlContext.conf)
-      SparkSQLEnv.sparkContext.addSparkListener(listener)
-      uiTab = if (SparkSQLEnv.sparkContext.getConf.get(UI_ENABLED)) {
-        Some(new ThriftServerTab(SparkSQLEnv.sparkContext))
-      } else {
-        None
-      }
+      createListenerAndUI(server, SparkSQLEnv.sparkContext)
       // If application was killed before HiveThriftServer2 start successfully then SparkSubmit
       // process can not exit, so check whether if SparkContext was stopped.
       if (SparkSQLEnv.sparkContext.stopped.get()) {
@@ -121,179 +119,10 @@ object HiveThriftServer2 extends Logging {
     }
   }
 
-  private[thriftserver] class SessionInfo(
-      val sessionId: String,
-      val startTimestamp: Long,
-      val ip: String,
-      val userName: String) {
-    var finishTimestamp: Long = 0L
-    var totalExecution: Int = 0
-    def totalTime: Long = {
-      if (finishTimestamp == 0L) {
-        System.currentTimeMillis - startTimestamp
-      } else {
-        finishTimestamp - startTimestamp
-      }
-    }
-  }
-
   private[thriftserver] object ExecutionState extends Enumeration {
     val STARTED, COMPILED, CANCELED, FAILED, FINISHED, CLOSED = Value
     type ExecutionState = Value
   }
-
-  private[thriftserver] class ExecutionInfo(
-      val statement: String,
-      val sessionId: String,
-      val startTimestamp: Long,
-      val userName: String) {
-    var finishTimestamp: Long = 0L
-    var closeTimestamp: Long = 0L
-    var executePlan: String = ""
-    var detail: String = ""
-    var state: ExecutionState.Value = ExecutionState.STARTED
-    val jobId: ArrayBuffer[String] = ArrayBuffer[String]()
-    var groupId: String = ""
-    def totalTime(endTime: Long): Long = {
-      if (endTime == 0L) {
-        System.currentTimeMillis - startTimestamp
-      } else {
-        endTime - startTimestamp
-      }
-    }
-  }
-
-
-  /**
-   * An inner sparkListener called in sc.stop to clean up the HiveThriftServer2
-   */
-  private[thriftserver] class HiveThriftServer2Listener(
-      val server: HiveServer2,
-      val conf: SQLConf) extends SparkListener {
-
-    override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
-      server.stop()
-    }
-    private val sessionList = new mutable.LinkedHashMap[String, SessionInfo]
-    private val executionList = new mutable.LinkedHashMap[String, ExecutionInfo]
-    private val retainedStatements = conf.getConf(SQLConf.THRIFTSERVER_UI_STATEMENT_LIMIT)
-    private val retainedSessions = conf.getConf(SQLConf.THRIFTSERVER_UI_SESSION_LIMIT)
-
-    def getOnlineSessionNum: Int = synchronized {
-      sessionList.count(_._2.finishTimestamp == 0)
-    }
-
-    def isExecutionActive(execInfo: ExecutionInfo): Boolean = {
-      !(execInfo.state == ExecutionState.FAILED ||
-        execInfo.state == ExecutionState.CANCELED ||
-        execInfo.state == ExecutionState.CLOSED)
-    }
-
-    /**
-     * When an error or a cancellation occurs, we set the finishTimestamp of the statement.
-     * Therefore, when we count the number of running statements, we need to exclude errors and
-     * cancellations and count all statements that have not been closed so far.
-     */
-    def getTotalRunning: Int = synchronized {
-      executionList.count {
-        case (_, v) => isExecutionActive(v)
-      }
-    }
-
-    def getSessionList: Seq[SessionInfo] = synchronized { sessionList.values.toSeq }
-
-    def getSession(sessionId: String): Option[SessionInfo] = synchronized {
-      sessionList.get(sessionId)
-    }
-
-    def getExecutionList: Seq[ExecutionInfo] = synchronized { executionList.values.toSeq }
-
-    override def onJobStart(jobStart: SparkListenerJobStart): Unit = synchronized {
-      for {
-        props <- Option(jobStart.properties)
-        groupId <- Option(props.getProperty(SparkContext.SPARK_JOB_GROUP_ID))
-        (_, info) <- executionList if info.groupId == groupId
-      } {
-        info.jobId += jobStart.jobId.toString
-        info.groupId = groupId
-      }
-    }
-
-    def onSessionCreated(ip: String, sessionId: String, userName: String = "UNKNOWN"): Unit = {
-      synchronized {
-        val info = new SessionInfo(sessionId, System.currentTimeMillis, ip, userName)
-        sessionList.put(sessionId, info)
-        trimSessionIfNecessary()
-      }
-    }
-
-    def onSessionClosed(sessionId: String): Unit = synchronized {
-      sessionList(sessionId).finishTimestamp = System.currentTimeMillis
-      trimSessionIfNecessary()
-    }
-
-    def onStatementStart(
-        id: String,
-        sessionId: String,
-        statement: String,
-        groupId: String,
-        userName: String = "UNKNOWN"): Unit = synchronized {
-      val info = new ExecutionInfo(statement, sessionId, System.currentTimeMillis, userName)
-      info.state = ExecutionState.STARTED
-      executionList.put(id, info)
-      trimExecutionIfNecessary()
-      sessionList(sessionId).totalExecution += 1
-      executionList(id).groupId = groupId
-    }
-
-    def onStatementParsed(id: String, executionPlan: String): Unit = synchronized {
-      executionList(id).executePlan = executionPlan
-      executionList(id).state = ExecutionState.COMPILED
-    }
-
-    def onStatementCanceled(id: String): Unit = synchronized {
-      executionList(id).finishTimestamp = System.currentTimeMillis
-      executionList(id).state = ExecutionState.CANCELED
-      trimExecutionIfNecessary()
-    }
-
-    def onStatementError(id: String, errorMsg: String, errorTrace: String): Unit = synchronized {
-      executionList(id).finishTimestamp = System.currentTimeMillis
-      executionList(id).detail = errorMsg
-      executionList(id).state = ExecutionState.FAILED
-      trimExecutionIfNecessary()
-    }
-
-    def onStatementFinish(id: String): Unit = synchronized {
-      executionList(id).finishTimestamp = System.currentTimeMillis
-      executionList(id).state = ExecutionState.FINISHED
-      trimExecutionIfNecessary()
-    }
-
-    def onOperationClosed(id: String): Unit = synchronized {
-      executionList(id).closeTimestamp = System.currentTimeMillis
-      executionList(id).state = ExecutionState.CLOSED
-    }
-
-    private def trimExecutionIfNecessary() = {
-      if (executionList.size > retainedStatements) {
-        val toRemove = math.max(retainedStatements / 10, 1)
-        executionList.filter(_._2.finishTimestamp != 0).take(toRemove).foreach { s =>
-          executionList.remove(s._1)
-        }
-      }
-    }
-
-    private def trimSessionIfNecessary() = {
-      if (sessionList.size > retainedSessions) {
-        val toRemove = math.max(retainedSessions / 10, 1)
-        sessionList.filter(_._2.finishTimestamp != 0).take(toRemove).foreach { s =>
-          sessionList.remove(s._1)
-        }
-      }
-
-    }
-  }
 }
 
 private[hive] class HiveThriftServer2(sqlContext: SQLContext)
@@ -303,7 +132,7 @@ private[hive] class HiveThriftServer2(sqlContext: SQLContext)
   // started, and then once only.
   private val started = new AtomicBoolean(false)
 
-  override def init(hiveConf: HiveConf) {
+  override def init(hiveConf: HiveConf): Unit = {
     val sparkSqlCliService = new SparkSQLCLIService(this, sqlContext)
     setSuperField(this, "cliService", sparkSqlCliService)
     addService(sparkSqlCliService)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala
index 599294dfbb7d7..a4024be67ac9c 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala
@@ -18,11 +18,11 @@
 package org.apache.spark.sql.hive.thriftserver
 
 private[hive] object ReflectionUtils {
-  def setSuperField(obj : Object, fieldName: String, fieldValue: Object) {
+  def setSuperField(obj : Object, fieldName: String, fieldValue: Object): Unit = {
     setAncestorField(obj, 1, fieldName, fieldValue)
   }
 
-  def setAncestorField(obj: AnyRef, level: Int, fieldName: String, fieldValue: AnyRef) {
+  def setAncestorField(obj: AnyRef, level: Int, fieldName: String, fieldValue: AnyRef): Unit = {
     val ancestor = Iterator.iterate[Class[_]](obj.getClass)(_.getSuperclass).drop(level).next()
     val field = ancestor.getDeclaredField(fieldName)
     field.setAccessible(true)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 69e85484ccf8e..cf0e5ebf3a2b1 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -26,6 +26,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.shims.Utils
 import org.apache.hive.service.cli._
@@ -39,6 +40,7 @@ import org.apache.spark.sql.execution.HiveResult
 import org.apache.spark.sql.execution.command.SetCommand
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.{Utils => SparkUtils}
 
 private[hive] class SparkExecuteStatementOperation(
@@ -56,7 +58,8 @@ private[hive] class SparkExecuteStatementOperation(
   // This is only used when `spark.sql.thriftServer.incrementalCollect` is set to `false`.
   // In case of `true`, this will be `None` and FETCH_FIRST will trigger re-execution.
   private var resultList: Option[Array[SparkRow]] = _
-
+  private var previousFetchEndOffset: Long = 0
+  private var previousFetchStartOffset: Long = 0
   private var iter: Iterator[SparkRow] = _
   private var dataTypes: Array[DataType] = _
   private var statementId: String = _
@@ -74,10 +77,10 @@ private[hive] class SparkExecuteStatementOperation(
     // RDDs will be cleaned automatically upon garbage collection.
     logInfo(s"Close statement with $statementId")
     cleanup(OperationState.CLOSED)
-    HiveThriftServer2.listener.onOperationClosed(statementId)
+    HiveThriftServer2.eventManager.onOperationClosed(statementId)
   }
 
-  def addNonNullColumnValue(from: SparkRow, to: ArrayBuffer[Any], ordinal: Int) {
+  def addNonNullColumnValue(from: SparkRow, to: ArrayBuffer[Any], ordinal: Int): Unit = {
     dataTypes(ordinal) match {
       case StringType =>
         to += from.getString(ordinal)
@@ -103,6 +106,8 @@ private[hive] class SparkExecuteStatementOperation(
         to += from.getAs[Timestamp](ordinal)
       case BinaryType =>
         to += from.getAs[Array[Byte]](ordinal)
+      case CalendarIntervalType =>
+        to += HiveResult.toHiveString((from.getAs[CalendarInterval](ordinal), CalendarIntervalType))
       case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] =>
         val hiveString = HiveResult.toHiveString((from.get(ordinal), dataTypes(ordinal)))
         to += hiveString
@@ -110,14 +115,18 @@ private[hive] class SparkExecuteStatementOperation(
   }
 
   def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = withSchedulerPool {
+    log.info(s"Received getNextRowSet request order=${order} and maxRowsL=${maxRowsL} " +
+      s"with ${statementId}")
     validateDefaultFetchOrientation(order)
     assertState(OperationState.FINISHED)
     setHasResultSet(true)
     val resultRowSet: RowSet =
       ThriftserverShimUtils.resultRowSet(getResultSetSchema, getProtocolVersion)
 
-    // Reset iter to header when fetching start from first row
-    if (order.equals(FetchOrientation.FETCH_FIRST)) {
+    // Reset iter when FETCH_FIRST or FETCH_PRIOR
+    if ((order.equals(FetchOrientation.FETCH_FIRST) ||
+        order.equals(FetchOrientation.FETCH_PRIOR)) && previousFetchEndOffset != 0) {
+      // Reset the iterator to the beginning of the query.
       iter = if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) {
         resultList = None
         result.toLocalIterator.asScala
@@ -129,6 +138,28 @@ private[hive] class SparkExecuteStatementOperation(
       }
     }
 
+    var resultOffset = {
+      if (order.equals(FetchOrientation.FETCH_FIRST)) {
+        logInfo(s"FETCH_FIRST request with $statementId. Resetting to resultOffset=0")
+        0
+      } else if (order.equals(FetchOrientation.FETCH_PRIOR)) {
+        // TODO: FETCH_PRIOR should be handled more efficiently than rewinding to beginning and
+        // reiterating.
+        val targetOffset = math.max(previousFetchStartOffset - maxRowsL, 0)
+        logInfo(s"FETCH_PRIOR request with $statementId. Resetting to resultOffset=$targetOffset")
+        var off = 0
+        while (off < targetOffset && iter.hasNext) {
+          iter.next()
+          off += 1
+        }
+        off
+      } else { // FETCH_NEXT
+        previousFetchEndOffset
+      }
+    }
+
+    resultRowSet.setStartOffset(resultOffset)
+    previousFetchStartOffset = resultOffset
     if (!iter.hasNext) {
       resultRowSet
     } else {
@@ -149,7 +180,11 @@ private[hive] class SparkExecuteStatementOperation(
         }
         resultRowSet.addRow(row.toArray.asInstanceOf[Array[Object]])
         curRow += 1
+        resultOffset += 1
       }
+      previousFetchEndOffset = resultOffset
+      log.info(s"Returning result set with ${curRow} rows from offsets " +
+        s"[$previousFetchStartOffset, $previousFetchEndOffset) with $statementId")
       resultRowSet
     }
   }
@@ -160,7 +195,7 @@ private[hive] class SparkExecuteStatementOperation(
     setState(OperationState.PENDING)
     statementId = UUID.randomUUID().toString
     logInfo(s"Submitting query '$statement' with $statementId")
-    HiveThriftServer2.listener.onStatementStart(
+    HiveThriftServer2.eventManager.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
       statement,
@@ -210,14 +245,14 @@ private[hive] class SparkExecuteStatementOperation(
         case rejected: RejectedExecutionException =>
           logError("Error submitting query in background, query rejected", rejected)
           setState(OperationState.ERROR)
-          HiveThriftServer2.listener.onStatementError(
+          HiveThriftServer2.eventManager.onStatementError(
             statementId, rejected.getMessage, SparkUtils.exceptionString(rejected))
           throw new HiveSQLException("The background threadpool cannot accept" +
             " new task for execution, please retry the operation", rejected)
         case NonFatal(e) =>
           logError(s"Error executing query in background", e)
           setState(OperationState.ERROR)
-          HiveThriftServer2.listener.onStatementError(
+          HiveThriftServer2.eventManager.onStatementError(
             statementId, e.getMessage, SparkUtils.exceptionString(e))
           throw new HiveSQLException(e)
       }
@@ -249,7 +284,8 @@ private[hive] class SparkExecuteStatementOperation(
             "in this session.")
         case _ =>
       }
-      HiveThriftServer2.listener.onStatementParsed(statementId, result.queryExecution.toString())
+      HiveThriftServer2.eventManager.onStatementParsed(statementId,
+        result.queryExecution.toString())
       iter = {
         if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) {
           resultList = None
@@ -259,11 +295,18 @@ private[hive] class SparkExecuteStatementOperation(
           resultList.get.iterator
         }
       }
-      dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray
+      dataTypes = result.schema.fields.map(_.dataType)
     } catch {
       // Actually do need to catch Throwable as some failures don't inherit from Exception and
       // HiveServer will silently swallow them.
       case e: Throwable =>
+        // When cancel() or close() is called very quickly after the query is started,
+        // then they may both call cleanup() before Spark Jobs are started. But before background
+        // task interrupted, it may have start some spark job, so we need to cancel again to
+        // make sure job was cancelled when background thread was interrupted
+        if (statementId != null) {
+          sqlContext.sparkContext.cancelJobGroup(statementId)
+        }
         val currentState = getStatus().getState()
         if (currentState.isTerminal) {
           // This may happen if the execution was cancelled, and then closed from another thread.
@@ -271,19 +314,23 @@ private[hive] class SparkExecuteStatementOperation(
         } else {
           logError(s"Error executing query with $statementId, currentState $currentState, ", e)
           setState(OperationState.ERROR)
-          HiveThriftServer2.listener.onStatementError(
-            statementId, e.getMessage, SparkUtils.exceptionString(e))
-          if (e.isInstanceOf[HiveSQLException]) {
-            throw e.asInstanceOf[HiveSQLException]
-          } else {
-            throw new HiveSQLException("Error running query: " + e.toString, e)
+          e match {
+            case hiveException: HiveSQLException =>
+              HiveThriftServer2.eventManager.onStatementError(
+                statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
+              throw hiveException
+            case _ =>
+              val root = ExceptionUtils.getRootCause(e)
+              HiveThriftServer2.eventManager.onStatementError(
+                statementId, root.getMessage, SparkUtils.exceptionString(root))
+              throw new HiveSQLException("Error running query: " + root.toString, root)
           }
         }
     } finally {
       synchronized {
         if (!getStatus.getState.isTerminal) {
           setState(OperationState.FINISHED)
-          HiveThriftServer2.listener.onStatementFinish(statementId)
+          HiveThriftServer2.eventManager.onStatementFinish(statementId)
         }
       }
       sqlContext.sparkContext.clearJobGroup()
@@ -295,12 +342,12 @@ private[hive] class SparkExecuteStatementOperation(
       if (!getStatus.getState.isTerminal) {
         logInfo(s"Cancel query with $statementId")
         cleanup(OperationState.CANCELED)
-        HiveThriftServer2.listener.onStatementCanceled(statementId)
+        HiveThriftServer2.eventManager.onStatementCanceled(statementId)
       }
     }
   }
 
-  private def cleanup(state: OperationState) {
+  private def cleanup(state: OperationState): Unit = {
     setState(state)
     if (runInBackground) {
       val backgroundHandle = getBackgroundHandle()
@@ -331,7 +378,11 @@ private[hive] class SparkExecuteStatementOperation(
 object SparkExecuteStatementOperation {
   def getTableSchema(structType: StructType): TableSchema = {
     val schema = structType.map { field =>
-      val attrTypeString = if (field.dataType == NullType) "void" else field.dataType.catalogString
+      val attrTypeString = field.dataType match {
+        case NullType => "void"
+        case CalendarIntervalType => StringType.catalogString
+        case other => other.catalogString
+      }
       new FieldSchema(field.name, attrTypeString, field.getComment.getOrElse(""))
     }
     new TableSchema(schema.asJava)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetCatalogsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetCatalogsOperation.scala
index cde99fd35bd59..2945cfd200e46 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetCatalogsOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetCatalogsOperation.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.util.UUID
 
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
 import org.apache.hive.service.cli.{HiveSQLException, OperationState}
 import org.apache.hive.service.cli.operation.GetCatalogsOperation
@@ -43,7 +44,7 @@ private[hive] class SparkGetCatalogsOperation(
 
   override def close(): Unit = {
     super.close()
-    HiveThriftServer2.listener.onOperationClosed(statementId)
+    HiveThriftServer2.eventManager.onOperationClosed(statementId)
   }
 
   override def runInternal(): Unit = {
@@ -55,7 +56,7 @@ private[hive] class SparkGetCatalogsOperation(
     val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
     Thread.currentThread().setContextClassLoader(executionHiveClassLoader)
 
-    HiveThriftServer2.listener.onStatementStart(
+    HiveThriftServer2.eventManager.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
       logMsg,
@@ -68,12 +69,21 @@ private[hive] class SparkGetCatalogsOperation(
       }
       setState(OperationState.FINISHED)
     } catch {
-      case e: HiveSQLException =>
+      case e: Throwable =>
+        logError(s"Error executing get catalogs operation with $statementId", e)
         setState(OperationState.ERROR)
-        HiveThriftServer2.listener.onStatementError(
-          statementId, e.getMessage, SparkUtils.exceptionString(e))
-        throw e
+        e match {
+          case hiveException: HiveSQLException =>
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
+            throw hiveException
+          case _ =>
+            val root = ExceptionUtils.getRootCause(e)
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, root.getMessage, SparkUtils.exceptionString(root))
+            throw new HiveSQLException("Error getting catalogs: " + root.toString, root)
+        }
     }
-    HiveThriftServer2.listener.onStatementFinish(statementId)
+    HiveThriftServer2.eventManager.onStatementFinish(statementId)
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
index 89faff2f6f913..ff7cbfeae13be 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
@@ -22,6 +22,7 @@ import java.util.regex.Pattern
 
 import scala.collection.JavaConverters.seqAsJavaListConverter
 
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.ql.security.authorization.plugin.{HiveOperationType, HivePrivilegeObject}
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType
 import org.apache.hive.service.cli._
@@ -62,7 +63,7 @@ private[hive] class SparkGetColumnsOperation(
 
   override def close(): Unit = {
     super.close()
-    HiveThriftServer2.listener.onOperationClosed(statementId)
+    HiveThriftServer2.eventManager.onOperationClosed(statementId)
   }
 
   override def runInternal(): Unit = {
@@ -77,7 +78,7 @@ private[hive] class SparkGetColumnsOperation(
     val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
     Thread.currentThread().setContextClassLoader(executionHiveClassLoader)
 
-    HiveThriftServer2.listener.onStatementStart(
+    HiveThriftServer2.eventManager.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
       logMsg,
@@ -129,13 +130,22 @@ private[hive] class SparkGetColumnsOperation(
       }
       setState(OperationState.FINISHED)
     } catch {
-      case e: HiveSQLException =>
+      case e: Throwable =>
+        logError(s"Error executing get columns operation with $statementId", e)
         setState(OperationState.ERROR)
-        HiveThriftServer2.listener.onStatementError(
-          statementId, e.getMessage, SparkUtils.exceptionString(e))
-        throw e
+        e match {
+          case hiveException: HiveSQLException =>
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
+            throw hiveException
+          case _ =>
+            val root = ExceptionUtils.getRootCause(e)
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, root.getMessage, SparkUtils.exceptionString(root))
+            throw new HiveSQLException("Error getting columns: " + root.toString, root)
+        }
     }
-    HiveThriftServer2.listener.onStatementFinish(statementId)
+    HiveThriftServer2.eventManager.onStatementFinish(statementId)
   }
 
   private def addToRowSet(
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala
index 462e57300e82b..d9c12b6ca9e64 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala
@@ -22,6 +22,7 @@ import java.util.UUID
 
 import scala.collection.JavaConverters.seqAsJavaListConverter
 
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.ql.security.authorization.plugin.{HiveOperationType, HivePrivilegeObjectUtils}
 import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.GetFunctionsOperation
@@ -53,7 +54,7 @@ private[hive] class SparkGetFunctionsOperation(
 
   override def close(): Unit = {
     super.close()
-    HiveThriftServer2.listener.onOperationClosed(statementId)
+    HiveThriftServer2.eventManager.onOperationClosed(statementId)
   }
 
   override def runInternal(): Unit = {
@@ -80,7 +81,7 @@ private[hive] class SparkGetFunctionsOperation(
       authorizeMetaGets(HiveOperationType.GET_FUNCTIONS, privObjs, cmdStr)
     }
 
-    HiveThriftServer2.listener.onStatementStart(
+    HiveThriftServer2.eventManager.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
       logMsg,
@@ -104,12 +105,21 @@ private[hive] class SparkGetFunctionsOperation(
       }
       setState(OperationState.FINISHED)
     } catch {
-      case e: HiveSQLException =>
+      case e: Throwable =>
+        logError(s"Error executing get functions operation with $statementId", e)
         setState(OperationState.ERROR)
-        HiveThriftServer2.listener.onStatementError(
-          statementId, e.getMessage, SparkUtils.exceptionString(e))
-        throw e
+        e match {
+          case hiveException: HiveSQLException =>
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
+            throw hiveException
+          case _ =>
+            val root = ExceptionUtils.getRootCause(e)
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, root.getMessage, SparkUtils.exceptionString(root))
+            throw new HiveSQLException("Error getting functions: " + root.toString, root)
+        }
     }
-    HiveThriftServer2.listener.onStatementFinish(statementId)
+    HiveThriftServer2.eventManager.onStatementFinish(statementId)
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
index 87ef154bcc8ab..db19880d1b99f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.thriftserver
 import java.util.UUID
 import java.util.regex.Pattern
 
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
 import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.GetSchemasOperation
@@ -49,7 +50,7 @@ private[hive] class SparkGetSchemasOperation(
 
   override def close(): Unit = {
     super.close()
-    HiveThriftServer2.listener.onOperationClosed(statementId)
+    HiveThriftServer2.eventManager.onOperationClosed(statementId)
   }
 
   override def runInternal(): Unit = {
@@ -67,7 +68,7 @@ private[hive] class SparkGetSchemasOperation(
       authorizeMetaGets(HiveOperationType.GET_TABLES, null, cmdStr)
     }
 
-    HiveThriftServer2.listener.onStatementStart(
+    HiveThriftServer2.eventManager.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
       logMsg,
@@ -87,12 +88,21 @@ private[hive] class SparkGetSchemasOperation(
       }
       setState(OperationState.FINISHED)
     } catch {
-      case e: HiveSQLException =>
+      case e: Throwable =>
+        logError(s"Error executing get schemas operation with $statementId", e)
         setState(OperationState.ERROR)
-        HiveThriftServer2.listener.onStatementError(
-          statementId, e.getMessage, SparkUtils.exceptionString(e))
-        throw e
+        e match {
+          case hiveException: HiveSQLException =>
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
+            throw hiveException
+          case _ =>
+            val root = ExceptionUtils.getRootCause(e)
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, root.getMessage, SparkUtils.exceptionString(root))
+            throw new HiveSQLException("Error getting schemas: " + root.toString, root)
+        }
     }
-    HiveThriftServer2.listener.onStatementFinish(statementId)
+    HiveThriftServer2.eventManager.onStatementFinish(statementId)
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTableTypesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTableTypesOperation.scala
index 8f2257f77d2a0..b4093e58d3c07 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTableTypesOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTableTypesOperation.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.util.UUID
 
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
 import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.GetTableTypesOperation
@@ -44,7 +45,7 @@ private[hive] class SparkGetTableTypesOperation(
 
   override def close(): Unit = {
     super.close()
-    HiveThriftServer2.listener.onOperationClosed(statementId)
+    HiveThriftServer2.eventManager.onOperationClosed(statementId)
   }
 
   override def runInternal(): Unit = {
@@ -60,7 +61,7 @@ private[hive] class SparkGetTableTypesOperation(
       authorizeMetaGets(HiveOperationType.GET_TABLETYPES, null)
     }
 
-    HiveThriftServer2.listener.onStatementStart(
+    HiveThriftServer2.eventManager.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
       logMsg,
@@ -74,12 +75,21 @@ private[hive] class SparkGetTableTypesOperation(
       }
       setState(OperationState.FINISHED)
     } catch {
-      case e: HiveSQLException =>
+      case e: Throwable =>
+        logError(s"Error executing get table types operation with $statementId", e)
         setState(OperationState.ERROR)
-        HiveThriftServer2.listener.onStatementError(
-          statementId, e.getMessage, SparkUtils.exceptionString(e))
-        throw e
+        e match {
+          case hiveException: HiveSQLException =>
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
+            throw hiveException
+          case _ =>
+            val root = ExceptionUtils.getRootCause(e)
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, root.getMessage, SparkUtils.exceptionString(root))
+            throw new HiveSQLException("Error getting table types: " + root.toString, root)
+        }
     }
-    HiveThriftServer2.listener.onStatementFinish(statementId)
+    HiveThriftServer2.eventManager.onStatementFinish(statementId)
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
index 6441dc50f49fe..45c6d980aac47 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
@@ -22,6 +22,7 @@ import java.util.regex.Pattern
 
 import scala.collection.JavaConverters._
 
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils
 import org.apache.hive.service.cli._
@@ -30,7 +31,6 @@ import org.apache.hive.service.cli.session.HiveSession
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.catalyst.catalog.CatalogTableType
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.util.{Utils => SparkUtils}
@@ -59,7 +59,7 @@ private[hive] class SparkGetTablesOperation(
 
   override def close(): Unit = {
     super.close()
-    HiveThriftServer2.listener.onOperationClosed(statementId)
+    HiveThriftServer2.eventManager.onOperationClosed(statementId)
   }
 
   override def runInternal(): Unit = {
@@ -85,7 +85,7 @@ private[hive] class SparkGetTablesOperation(
       authorizeMetaGets(HiveOperationType.GET_TABLES, privObjs, cmdStr)
     }
 
-    HiveThriftServer2.listener.onStatementStart(
+    HiveThriftServer2.eventManager.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
       logMsg,
@@ -119,13 +119,22 @@ private[hive] class SparkGetTablesOperation(
       }
       setState(OperationState.FINISHED)
     } catch {
-      case e: HiveSQLException =>
+      case e: Throwable =>
+        logError(s"Error executing get tables operation with $statementId", e)
         setState(OperationState.ERROR)
-        HiveThriftServer2.listener.onStatementError(
-          statementId, e.getMessage, SparkUtils.exceptionString(e))
-        throw e
+        e match {
+          case hiveException: HiveSQLException =>
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
+            throw hiveException
+          case _ =>
+            val root = ExceptionUtils.getRootCause(e)
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, root.getMessage, SparkUtils.exceptionString(root))
+            throw new HiveSQLException("Error getting tables: " + root.toString, root)
+        }
     }
-    HiveThriftServer2.listener.onStatementFinish(statementId)
+    HiveThriftServer2.eventManager.onStatementFinish(statementId)
   }
 
   private def addToRowSet(
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
new file mode 100644
index 0000000000000..dd5668a93f82d
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.util.UUID
+
+import org.apache.commons.lang3.exception.ExceptionUtils
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
+import org.apache.hive.service.cli.{HiveSQLException, OperationState}
+import org.apache.hive.service.cli.operation.GetTypeInfoOperation
+import org.apache.hive.service.cli.session.HiveSession
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.util.{Utils => SparkUtils}
+
+/**
+ * Spark's own GetTypeInfoOperation
+ *
+ * @param sqlContext    SQLContext to use
+ * @param parentSession a HiveSession from SessionManager
+ */
+private[hive] class SparkGetTypeInfoOperation(
+    sqlContext: SQLContext,
+    parentSession: HiveSession)
+  extends GetTypeInfoOperation(parentSession) with Logging {
+
+  private var statementId: String = _
+
+  override def close(): Unit = {
+    super.close()
+    HiveThriftServer2.eventManager.onOperationClosed(statementId)
+  }
+
+  override def runInternal(): Unit = {
+    statementId = UUID.randomUUID().toString
+    val logMsg = "Listing type info"
+    logInfo(s"$logMsg with $statementId")
+    setState(OperationState.RUNNING)
+    // Always use the latest class loader provided by executionHive's state.
+    val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
+    Thread.currentThread().setContextClassLoader(executionHiveClassLoader)
+
+    if (isAuthV2Enabled) {
+      authorizeMetaGets(HiveOperationType.GET_TYPEINFO, null)
+    }
+
+    HiveThriftServer2.eventManager.onStatementStart(
+      statementId,
+      parentSession.getSessionHandle.getSessionId.toString,
+      logMsg,
+      statementId,
+      parentSession.getUsername)
+
+    try {
+      ThriftserverShimUtils.supportedType().foreach(typeInfo => {
+        val rowData = Array[AnyRef](
+          typeInfo.getName, // TYPE_NAME
+          typeInfo.toJavaSQLType.asInstanceOf[AnyRef], // DATA_TYPE
+          typeInfo.getMaxPrecision.asInstanceOf[AnyRef], // PRECISION
+          typeInfo.getLiteralPrefix, // LITERAL_PREFIX
+          typeInfo.getLiteralSuffix, // LITERAL_SUFFIX
+          typeInfo.getCreateParams, // CREATE_PARAMS
+          typeInfo.getNullable.asInstanceOf[AnyRef], // NULLABLE
+          typeInfo.isCaseSensitive.asInstanceOf[AnyRef], // CASE_SENSITIVE
+          typeInfo.getSearchable.asInstanceOf[AnyRef], // SEARCHABLE
+          typeInfo.isUnsignedAttribute.asInstanceOf[AnyRef], // UNSIGNED_ATTRIBUTE
+          typeInfo.isFixedPrecScale.asInstanceOf[AnyRef], // FIXED_PREC_SCALE
+          typeInfo.isAutoIncrement.asInstanceOf[AnyRef], // AUTO_INCREMENT
+          typeInfo.getLocalizedName, // LOCAL_TYPE_NAME
+          typeInfo.getMinimumScale.asInstanceOf[AnyRef], // MINIMUM_SCALE
+          typeInfo.getMaximumScale.asInstanceOf[AnyRef], // MAXIMUM_SCALE
+          null, // SQL_DATA_TYPE, unused
+          null, // SQL_DATETIME_SUB, unused
+          typeInfo.getNumPrecRadix // NUM_PREC_RADIX
+        )
+        rowSet.addRow(rowData)
+      })
+      setState(OperationState.FINISHED)
+    } catch {
+      case e: Throwable =>
+        logError(s"Error executing get type info with $statementId", e)
+        setState(OperationState.ERROR)
+        e match {
+          case hiveException: HiveSQLException =>
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, hiveException.getMessage, SparkUtils.exceptionString(hiveException))
+            throw hiveException
+          case _ =>
+            val root = ExceptionUtils.getRootCause(e)
+            HiveThriftServer2.eventManager.onStatementError(
+              statementId, root.getMessage, SparkUtils.exceptionString(root))
+            throw new HiveSQLException("Error getting type info: " + root.toString, root)
+        }
+    }
+    HiveThriftServer2.eventManager.onStatementFinish(statementId)
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index b9614d49eadbd..b665d4a31b9b1 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.io._
 import java.nio.charset.StandardCharsets.UTF_8
-import java.util.{ArrayList => JArrayList, Locale}
+import java.util.{ArrayList => JArrayList, List => JList, Locale}
 import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.log4j.Level
 import org.apache.thrift.transport.TSocket
+import sun.misc.{Signal, SignalHandler}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -63,7 +64,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
    * a signal handler will invoke this registered callback if a Ctrl+C signal is detected while
    * a command is being processed by the current thread.
    */
-  def installSignalHandler() {
+  def installSignalHandler(): Unit = {
     HiveInterruptUtils.add(() => {
       // Handle remote execution mode
       if (SparkSQLEnv.sparkContext != null) {
@@ -77,7 +78,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     })
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val oproc = new OptionsProcessor()
     if (!oproc.process_stage1(args)) {
       System.exit(1)
@@ -111,6 +112,11 @@ private[hive] object SparkSQLCLIDriver extends Logging {
 
     // Set all properties specified via command line.
     val conf: HiveConf = sessionState.getConf
+    // Hive 2.0.0 onwards HiveConf.getClassLoader returns the UDFClassLoader (created by Hive).
+    // Because of this spark cannot find the jars as class loader got changed
+    // Hive changed the class loader because of HIVE-11878, so it is required to use old
+    // classLoader as sparks loaded all the jars in this classLoader
+    conf.setClassLoader(Thread.currentThread().getContextClassLoader)
     sessionState.cmdProperties.entrySet().asScala.foreach { item =>
       val key = item.getKey.toString
       val value = item.getValue.toString
@@ -133,20 +139,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     // Clean up after we exit
     ShutdownHookManager.addShutdownHook { () => SparkSQLEnv.stop() }
 
-    val remoteMode = isRemoteMode(sessionState)
-    // "-h" option has been passed, so connect to Hive thrift server.
-    if (!remoteMode) {
-      // Hadoop-20 and above - we need to augment classpath using hiveconf
-      // components.
-      // See also: code in ExecDriver.java
-      var loader = conf.getClassLoader
-      val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS)
-      if (StringUtils.isNotBlank(auxJars)) {
-        loader = ThriftserverShimUtils.addToClassPath(loader, StringUtils.split(auxJars, ","))
-      }
-      conf.setClassLoader(loader)
-      Thread.currentThread().setContextClassLoader(loader)
-    } else {
+    if (isRemoteMode(sessionState)) {
       // Hive 1.2 + not supported in CLI
       throw new RuntimeException("Remote operations not supported")
     }
@@ -164,6 +157,22 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     val cli = new SparkSQLCLIDriver
     cli.setHiveVariables(oproc.getHiveVariables)
 
+    // In SparkSQL CLI, we may want to use jars augmented by hiveconf
+    // hive.aux.jars.path, here we add jars augmented by hiveconf to
+    // Spark's SessionResourceLoader to obtain these jars.
+    val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS)
+    if (StringUtils.isNotBlank(auxJars)) {
+      val resourceLoader = SparkSQLEnv.sqlContext.sessionState.resourceLoader
+      StringUtils.split(auxJars, ",").foreach(resourceLoader.addJar(_))
+    }
+
+    // The class loader of CliSessionState's conf is current main thread's class loader
+    // used to load jars passed by --jars. One class loader used by AddJarCommand is
+    // sharedState.jarClassLoader which contain jar path passed by --jars in main thread.
+    // We set CliSessionState's conf class loader to sharedState.jarClassLoader.
+    // Thus we can load all jars passed by --jars and AddJarCommand.
+    sessionState.getConf.setClassLoader(SparkSQLEnv.sqlContext.sharedState.jarClassLoader)
+
     // TODO work around for set the log output to console, because the HiveContext
     // will set the output into an invalid buffer.
     sessionState.in = System.in
@@ -430,5 +439,112 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
       ret
     }
   }
+
+  // Adapted processLine from Hive 2.3's CliDriver.processLine.
+  override def processLine(line: String, allowInterrupting: Boolean): Int = {
+    var oldSignal: SignalHandler = null
+    var interruptSignal: Signal = null
+
+    if (allowInterrupting) {
+      // Remember all threads that were running at the time we started line processing.
+      // Hook up the custom Ctrl+C handler while processing this line
+      interruptSignal = new Signal("INT")
+      oldSignal = Signal.handle(interruptSignal, new SignalHandler() {
+        private var interruptRequested: Boolean = false
+
+        override def handle(signal: Signal) {
+          val initialRequest = !interruptRequested
+          interruptRequested = true
+
+          // Kill the VM on second ctrl+c
+          if (!initialRequest) {
+            console.printInfo("Exiting the JVM")
+            System.exit(127)
+          }
+
+          // Interrupt the CLI thread to stop the current statement and return
+          // to prompt
+          console.printInfo("Interrupting... Be patient, this might take some time.")
+          console.printInfo("Press Ctrl+C again to kill JVM")
+
+          HiveInterruptUtils.interrupt()
+        }
+      })
+    }
+
+    try {
+      var lastRet: Int = 0
+
+      // we can not use "split" function directly as ";" may be quoted
+      val commands = splitSemiColon(line).asScala
+      var command: String = ""
+      for (oneCmd <- commands) {
+        if (StringUtils.endsWith(oneCmd, "\\")) {
+          command += StringUtils.chop(oneCmd) + ";"
+        } else {
+          command += oneCmd
+          if (!StringUtils.isBlank(command)) {
+            val ret = processCmd(command)
+            command = ""
+            lastRet = ret
+            val ignoreErrors = HiveConf.getBoolVar(conf, HiveConf.ConfVars.CLIIGNOREERRORS)
+            if (ret != 0 && !ignoreErrors) {
+              CommandProcessorFactory.clean(conf.asInstanceOf[HiveConf])
+              ret
+            }
+          }
+        }
+      }
+      CommandProcessorFactory.clean(conf.asInstanceOf[HiveConf])
+      lastRet
+    } finally {
+      // Once we are done processing the line, restore the old handler
+      if (oldSignal != null && interruptSignal != null) {
+        Signal.handle(interruptSignal, oldSignal)
+      }
+    }
+  }
+
+  // Adapted splitSemiColon from Hive 2.3's CliDriver.splitSemiColon.
+  private def splitSemiColon(line: String): JList[String] = {
+    var insideSingleQuote = false
+    var insideDoubleQuote = false
+    var escape = false
+    var beginIndex = 0
+    val ret = new JArrayList[String]
+    for (index <- 0 until line.length) {
+      if (line.charAt(index) == '\'') {
+        // take a look to see if it is escaped
+        if (!escape) {
+          // flip the boolean variable
+          insideSingleQuote = !insideSingleQuote
+        }
+      } else if (line.charAt(index) == '\"') {
+        // take a look to see if it is escaped
+        if (!escape) {
+          // flip the boolean variable
+          insideDoubleQuote = !insideDoubleQuote
+        }
+      } else if (line.charAt(index) == ';') {
+        if (insideSingleQuote || insideDoubleQuote) {
+          // do not split
+        } else {
+          // split, do not include ; itself
+          ret.add(line.substring(beginIndex, index))
+          beginIndex = index + 1
+        }
+      } else {
+        // nothing to do
+      }
+      // set the escape
+      if (escape) {
+        escape = false
+      } else if (line.charAt(index) == '\\') {
+        escape = true
+      }
+    }
+    ret.add(line.substring(beginIndex))
+    ret
+  }
 }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index c32d908ad1bba..1644ecb2453be 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -43,7 +43,7 @@ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLC
   extends CLIService(hiveServer)
   with ReflectedCompositeService {
 
-  override def init(hiveConf: HiveConf) {
+  override def init(hiveConf: HiveConf): Unit = {
     setSuperField(this, "hiveConf", hiveConf)
 
     val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext)
@@ -105,7 +105,7 @@ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLC
 }
 
 private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
-  def initCompositeService(hiveConf: HiveConf) {
+  def initCompositeService(hiveConf: HiveConf): Unit = {
     // Emulating `CompositeService.init(hiveConf)`
     val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList")
     serviceList.asScala.foreach(_.init(hiveConf))
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 960fdd11db15d..12fba0eae6dce 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -61,7 +61,7 @@ private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlCont
     try {
       context.sparkContext.setJobDescription(command)
       val execution = context.sessionState.executePlan(context.sql(command).logicalPlan)
-      hiveResponse = SQLExecution.withNewExecutionId(context.sparkSession, execution) {
+      hiveResponse = SQLExecution.withNewExecutionId(execution) {
         hiveResultString(execution.executedPlan)
       }
       tableSchema = getResultSetSchema(execution)
@@ -94,7 +94,7 @@ private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlCont
 
   override def getSchema: Schema = tableSchema
 
-  override def destroy() {
+  override def destroy(): Unit = {
     super.destroy()
     hiveResponse = null
     tableSchema = null
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
index 674da18ca1803..8944b93d9b697 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
@@ -33,7 +33,7 @@ private[hive] object SparkSQLEnv extends Logging {
   var sqlContext: SQLContext = _
   var sparkContext: SparkContext = _
 
-  def init() {
+  def init(): Unit = {
     if (sqlContext == null) {
       val sparkConf = new SparkConf(loadDefaults = true)
       // If user doesn't specify the appName, we want to get [SparkSQL::localHostName] instead of
@@ -50,6 +50,11 @@ private[hive] object SparkSQLEnv extends Logging {
       sparkContext = sparkSession.sparkContext
       sqlContext = sparkSession.sqlContext
 
+      // SPARK-29604: force initialization of the session state with the Spark class loader,
+      // instead of having it happen during the initialization of the Hive client (which may use a
+      // different class loader).
+      sparkSession.sessionState
+
       val metadataHive = sparkSession
         .sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
       metadataHive.setOut(new PrintStream(System.out, true, UTF_8.name()))
@@ -60,7 +65,7 @@ private[hive] object SparkSQLEnv extends Logging {
   }
 
   /** Cleans up and shuts down the Spark SQL environments. */
-  def stop() {
+  def stop(): Unit = {
     logDebug("Shutting down Spark SQL Environment")
     // Stop the SparkContext
     if (SparkSQLEnv.sparkContext != null) {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
index 13055e0ae1394..b3171897141c2 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -38,7 +38,7 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
 
   private lazy val sparkSqlOperationManager = new SparkSQLOperationManager()
 
-  override def init(hiveConf: HiveConf) {
+  override def init(hiveConf: HiveConf): Unit = {
     setSuperField(this, "operationManager", sparkSqlOperationManager)
     super.init(hiveConf)
   }
@@ -55,7 +55,7 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
       super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation,
           delegationToken)
     val session = super.getSession(sessionHandle)
-    HiveThriftServer2.listener.onSessionCreated(
+    HiveThriftServer2.eventManager.onSessionCreated(
       session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername)
     val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) {
       sqlContext
@@ -63,6 +63,9 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
       sqlContext.newSession()
     }
     ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion)
+    val hiveSessionState = session.getSessionState
+    setConfMap(ctx, hiveSessionState.getOverriddenConfigurations)
+    setConfMap(ctx, hiveSessionState.getHiveVariables)
     if (sessionConf != null && sessionConf.containsKey("use:database")) {
       ctx.sql(s"use ${sessionConf.get("use:database")}")
     }
@@ -70,10 +73,20 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
     sessionHandle
   }
 
-  override def closeSession(sessionHandle: SessionHandle) {
-    HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString)
+  override def closeSession(sessionHandle: SessionHandle): Unit = {
+    HiveThriftServer2.eventManager.onSessionClosed(sessionHandle.getSessionId.toString)
+    val ctx = sparkSqlOperationManager.sessionToContexts.getOrDefault(sessionHandle, sqlContext)
+    ctx.sparkSession.sessionState.catalog.getTempViewNames().foreach(ctx.uncacheTable)
     super.closeSession(sessionHandle)
     sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle)
     sparkSqlOperationManager.sessionToContexts.remove(sessionHandle)
   }
+
+  def setConfMap(conf: SQLContext, confMap: java.util.Map[String, String]): Unit = {
+    val iterator = confMap.entrySet().iterator()
+    while (iterator.hasNext) {
+      val kv = iterator.next()
+      conf.setConf(kv.getKey, kv.getValue)
+    }
+  }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
index 35f92547e7815..3396560f43502 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
@@ -28,7 +28,6 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.thriftserver._
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Executes queries using Spark SQL, and maintains a list of handles to active queries.
@@ -51,9 +50,6 @@ private[thriftserver] class SparkSQLOperationManager()
     require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" +
       s" initialized or had already closed.")
     val conf = sqlContext.sessionState.conf
-    val hiveSessionState = parentSession.getSessionState
-    setConfMap(conf, hiveSessionState.getOverriddenConfigurations)
-    setConfMap(conf, hiveSessionState.getHiveVariables)
     val runInBackground = async && conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC)
     val operation = new SparkExecuteStatementOperation(parentSession, statement, confOverlay,
       runInBackground)(sqlContext, sessionToActivePool)
@@ -145,11 +141,14 @@ private[thriftserver] class SparkSQLOperationManager()
     operation
   }
 
-  def setConfMap(conf: SQLConf, confMap: java.util.Map[String, String]): Unit = {
-    val iterator = confMap.entrySet().iterator()
-    while (iterator.hasNext) {
-      val kv = iterator.next()
-      conf.setConfString(kv.getKey, kv.getValue)
-    }
+  override def newGetTypeInfoOperation(
+       parentSession: HiveSession): GetTypeInfoOperation = synchronized {
+    val sqlContext = sessionToContexts.get(parentSession.getSessionHandle)
+    require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" +
+      " initialized or had already closed.")
+    val operation = new SparkGetTypeInfoOperation(sqlContext, parentSession)
+    handleToOperation.put(operation.getHandle, operation)
+    logDebug(s"Created GetTypeInfoOperation with session=$parentSession.")
+    operation
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala
new file mode 100644
index 0000000000000..5cb78f6e64650
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver.ui
+
+import com.fasterxml.jackson.annotation.JsonIgnore
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.ExecutionState
+import org.apache.spark.status.KVUtils.KVIndexParam
+import org.apache.spark.util.kvstore.{KVIndex, KVStore}
+
+/**
+ * Provides a view of a KVStore with methods that make it easy to query SQL-specific state. There's
+ * no state kept in this class, so it's ok to have multiple instances of it in an application.
+ */
+class HiveThriftServer2AppStatusStore(
+    store: KVStore,
+    val listener: Option[HiveThriftServer2Listener] = None) {
+
+  def getSessionList: Seq[SessionInfo] = {
+    store.view(classOf[SessionInfo]).asScala.toSeq
+  }
+
+  def getExecutionList: Seq[ExecutionInfo] = {
+    store.view(classOf[ExecutionInfo]).asScala.toSeq
+  }
+
+  def getOnlineSessionNum: Int = {
+    store.view(classOf[SessionInfo]).asScala.count(_.finishTimestamp == 0)
+  }
+
+  def getSession(sessionId: String): Option[SessionInfo] = {
+    try {
+      Some(store.read(classOf[SessionInfo], sessionId))
+    } catch {
+      case _: NoSuchElementException => None
+    }
+  }
+
+  def getExecution(executionId: String): Option[ExecutionInfo] = {
+    try {
+      Some(store.read(classOf[ExecutionInfo], executionId))
+    } catch {
+      case _: NoSuchElementException => None
+    }
+  }
+
+  /**
+   * When an error or a cancellation occurs, we set the finishTimestamp of the statement.
+   * Therefore, when we count the number of running statements, we need to exclude errors and
+   * cancellations and count all statements that have not been closed so far.
+   */
+  def getTotalRunning: Int = {
+    store.view(classOf[ExecutionInfo]).asScala.count(_.isExecutionActive)
+  }
+
+  def getSessionCount: Long = {
+    store.count(classOf[SessionInfo])
+  }
+
+  def getExecutionCount: Long = {
+    store.count(classOf[ExecutionInfo])
+  }
+}
+
+private[thriftserver] class SessionInfo(
+    @KVIndexParam val sessionId: String,
+    val startTimestamp: Long,
+    val ip: String,
+    val userName: String,
+    val finishTimestamp: Long,
+    val totalExecution: Long) {
+  @JsonIgnore @KVIndex("finishTime")
+  private def finishTimeIndex: Long = if (finishTimestamp > 0L ) finishTimestamp else -1L
+  def totalTime: Long = {
+    if (finishTimestamp == 0L) {
+      System.currentTimeMillis - startTimestamp
+    } else {
+      finishTimestamp - startTimestamp
+    }
+  }
+}
+
+private[thriftserver] class ExecutionInfo(
+    @KVIndexParam val execId: String,
+    val statement: String,
+    val sessionId: String,
+    val startTimestamp: Long,
+    val userName: String,
+    val finishTimestamp: Long,
+    val closeTimestamp: Long,
+    val executePlan: String,
+    val detail: String,
+    val state: ExecutionState.Value,
+    val jobId: ArrayBuffer[String],
+    val groupId: String) {
+  @JsonIgnore @KVIndex("finishTime")
+  private def finishTimeIndex: Long = if (finishTimestamp > 0L && !isExecutionActive) {
+    finishTimestamp
+  } else -1L
+
+  @JsonIgnore @KVIndex("isExecutionActive")
+  def isExecutionActive: Boolean = {
+    !(state == ExecutionState.FAILED ||
+      state == ExecutionState.CANCELED ||
+      state == ExecutionState.CLOSED)
+  }
+
+  def totalTime(endTime: Long): Long = {
+    if (endTime == 0L) {
+      System.currentTimeMillis - startTimestamp
+    } else {
+      endTime - startTimestamp
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala
new file mode 100644
index 0000000000000..fa04c67896a69
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.hive.thriftserver.ui
+
+import org.apache.spark.SparkContext
+import org.apache.spark.scheduler.SparkListenerEvent
+
+/**
+ * This class manages events generated by the thriftserver application. It converts the
+ * operation and session events to listener events and post it into the live listener bus.
+ */
+private[thriftserver] class HiveThriftServer2EventManager(sc: SparkContext) {
+
+  def postLiveListenerBus(event: SparkListenerEvent): Unit = {
+    sc.listenerBus.post(event)
+  }
+
+  def onSessionCreated(ip: String, sessionId: String, userName: String = "UNKNOWN"): Unit = {
+    postLiveListenerBus(SparkListenerThriftServerSessionCreated(ip, sessionId,
+      userName, System.currentTimeMillis()))
+  }
+
+  def onSessionClosed(sessionId: String): Unit = {
+    postLiveListenerBus(SparkListenerThriftServerSessionClosed(sessionId,
+      System.currentTimeMillis()))
+  }
+
+  def onStatementStart(
+      id: String,
+      sessionId: String,
+      statement: String,
+      groupId: String,
+      userName: String = "UNKNOWN"): Unit = {
+    postLiveListenerBus(SparkListenerThriftServerOperationStart(id, sessionId, statement, groupId,
+      System.currentTimeMillis(), userName))
+  }
+
+  def onStatementParsed(id: String, executionPlan: String): Unit = {
+    postLiveListenerBus(SparkListenerThriftServerOperationParsed(id, executionPlan))
+  }
+
+  def onStatementCanceled(id: String): Unit = {
+    postLiveListenerBus(SparkListenerThriftServerOperationCanceled(id, System.currentTimeMillis()))
+  }
+
+  def onStatementError(id: String, errorMsg: String, errorTrace: String): Unit = {
+    postLiveListenerBus(SparkListenerThriftServerOperationError(id, errorMsg, errorTrace,
+      System.currentTimeMillis()))
+  }
+
+  def onStatementFinish(id: String): Unit = {
+    postLiveListenerBus(SparkListenerThriftServerOperationFinish(id, System.currentTimeMillis()))
+
+  }
+
+  def onOperationClosed(id: String): Unit = {
+    postLiveListenerBus(SparkListenerThriftServerOperationClosed(id, System.currentTimeMillis()))
+  }
+}
+
+private[thriftserver] case class SparkListenerThriftServerSessionCreated(
+    ip: String,
+    sessionId: String,
+    userName: String,
+    startTime: Long) extends SparkListenerEvent
+
+private[thriftserver] case class SparkListenerThriftServerSessionClosed(
+    sessionId: String, finishTime: Long) extends SparkListenerEvent
+
+private[thriftserver] case class SparkListenerThriftServerOperationStart(
+    id: String,
+    sessionId: String,
+    statement: String,
+    groupId: String,
+    startTime: Long,
+    userName: String = "UNKNOWN") extends SparkListenerEvent
+
+private[thriftserver] case class SparkListenerThriftServerOperationParsed(
+    id: String,
+    executionPlan: String) extends SparkListenerEvent
+
+private[thriftserver] case class SparkListenerThriftServerOperationCanceled(
+    id: String, finishTime: Long) extends SparkListenerEvent
+
+private[thriftserver] case class SparkListenerThriftServerOperationError(
+    id: String,
+    errorMsg: String,
+    errorTrace: String,
+    finishTime: Long) extends SparkListenerEvent
+
+private[thriftserver] case class SparkListenerThriftServerOperationFinish(
+    id: String,
+    finishTime: Long) extends SparkListenerEvent
+
+private[thriftserver] case class SparkListenerThriftServerOperationClosed(
+    id: String,
+    closeTime: Long) extends SparkListenerEvent
+
+
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2HistoryServerPlugin.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2HistoryServerPlugin.scala
new file mode 100644
index 0000000000000..aec4125801f68
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2HistoryServerPlugin.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver.ui
+
+import org.apache.spark.SparkConf
+import org.apache.spark.scheduler.SparkListener
+import org.apache.spark.status.{AppHistoryServerPlugin, ElementTrackingStore}
+import org.apache.spark.ui.SparkUI
+
+class HiveThriftServer2HistoryServerPlugin extends AppHistoryServerPlugin {
+
+  override def createListeners(conf: SparkConf, store: ElementTrackingStore): Seq[SparkListener] = {
+    Seq(new HiveThriftServer2Listener(store, conf, None, false))
+  }
+
+  override def setupUI(ui: SparkUI): Unit = {
+    val store = new HiveThriftServer2AppStatusStore(ui.store.store)
+    if (store.getSessionCount > 0) {
+      new ThriftServerTab(store, ui)
+    }
+  }
+
+  override def displayOrder: Int = 1
+}
+
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
new file mode 100644
index 0000000000000..6d0a506fa94dc
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver.ui
+
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.hive.service.server.HiveServer2
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.internal.config.Status.LIVE_ENTITY_UPDATE_PERIOD
+import org.apache.spark.scheduler._
+import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.ExecutionState
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.status.{ElementTrackingStore, KVUtils, LiveEntity}
+
+/**
+ * An inner sparkListener called in sc.stop to clean up the HiveThriftServer2
+ */
+private[thriftserver] class HiveThriftServer2Listener(
+    kvstore: ElementTrackingStore,
+    sparkConf: SparkConf,
+    server: Option[HiveServer2],
+    live: Boolean = true) extends SparkListener {
+
+  private val sessionList = new ConcurrentHashMap[String, LiveSessionData]()
+  private val executionList = new ConcurrentHashMap[String, LiveExecutionData]()
+
+  private val (retainedStatements: Int, retainedSessions: Int) = {
+    (sparkConf.get(SQLConf.THRIFTSERVER_UI_STATEMENT_LIMIT),
+      sparkConf.get(SQLConf.THRIFTSERVER_UI_SESSION_LIMIT))
+  }
+
+  // How often to update live entities. -1 means "never update" when replaying applications,
+  // meaning only the last write will happen. For live applications, this avoids a few
+  // operations that we can live without when rapidly processing incoming events.
+  private val liveUpdatePeriodNs = if (live) sparkConf.get(LIVE_ENTITY_UPDATE_PERIOD) else -1L
+
+  // Returns true if this listener has no live data. Exposed for tests only.
+  private[thriftserver] def noLiveData(): Boolean = {
+    sessionList.isEmpty && executionList.isEmpty
+  }
+
+  kvstore.addTrigger(classOf[SessionInfo], retainedSessions) { count =>
+    cleanupSession(count)
+  }
+
+  kvstore.addTrigger(classOf[ExecutionInfo], retainedStatements) { count =>
+    cleanupExecutions(count)
+  }
+
+  kvstore.onFlush {
+    if (!live) {
+      flush((entity: LiveEntity) => updateStoreWithTriggerEnabled(entity))
+    }
+  }
+
+  override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
+    if (live) {
+      server.foreach(_.stop())
+    }
+  }
+
+  override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+    val properties = jobStart.properties
+    if (properties != null) {
+      val groupId = properties.getProperty(SparkContext.SPARK_JOB_GROUP_ID)
+      if (groupId != null) {
+        updateJobDetails(jobStart.jobId.toString, groupId)
+        }
+      }
+    }
+
+  private def updateJobDetails(jobId: String, groupId: String): Unit = {
+    val execList = executionList.values().asScala.filter(_.groupId == groupId).toSeq
+    if (execList.nonEmpty) {
+      execList.foreach { exec =>
+        exec.jobId += jobId.toString
+        updateLiveStore(exec)
+      }
+    } else {
+      // It may possible that event reordering happens, such a way that JobStart event come after
+      // Execution end event (Refer SPARK-27019). To handle that situation, if occurs in
+      // Thriftserver, following code will take care. Here will come only if JobStart event comes
+      // after Execution End event.
+      val storeExecInfo = kvstore.view(classOf[ExecutionInfo]).asScala.filter(_.groupId == groupId)
+      storeExecInfo.foreach { exec =>
+        val liveExec = getOrCreateExecution(exec.execId, exec.statement, exec.sessionId,
+          exec.startTimestamp, exec.userName)
+        liveExec.jobId += jobId.toString
+        updateStoreWithTriggerEnabled(liveExec)
+        executionList.remove(liveExec.execId)
+      }
+    }
+  }
+
+  override def onOtherEvent(event: SparkListenerEvent): Unit = {
+    event match {
+      case e: SparkListenerThriftServerSessionCreated => onSessionCreated(e)
+      case e: SparkListenerThriftServerSessionClosed => onSessionClosed(e)
+      case e: SparkListenerThriftServerOperationStart => onOperationStart(e)
+      case e: SparkListenerThriftServerOperationParsed => onOperationParsed(e)
+      case e: SparkListenerThriftServerOperationCanceled => onOperationCanceled(e)
+      case e: SparkListenerThriftServerOperationError => onOperationError(e)
+      case e: SparkListenerThriftServerOperationFinish => onOperationFinished(e)
+      case e: SparkListenerThriftServerOperationClosed => onOperationClosed(e)
+      case _ => // Ignore
+    }
+  }
+
+  private def onSessionCreated(e: SparkListenerThriftServerSessionCreated): Unit = {
+    val session = getOrCreateSession(e.sessionId, e.startTime, e.ip, e.userName)
+    sessionList.put(e.sessionId, session)
+    updateLiveStore(session)
+  }
+
+  private def onSessionClosed(e: SparkListenerThriftServerSessionClosed): Unit = {
+    val session = sessionList.get(e.sessionId)
+    session.finishTimestamp = e.finishTime
+    updateStoreWithTriggerEnabled(session)
+    sessionList.remove(e.sessionId)
+  }
+
+  private def onOperationStart(e: SparkListenerThriftServerOperationStart): Unit = {
+    val info = getOrCreateExecution(
+      e.id,
+      e.statement,
+      e.sessionId,
+      e.startTime,
+      e.userName)
+
+    info.state = ExecutionState.STARTED
+    executionList.put(e.id, info)
+    sessionList.get(e.sessionId).totalExecution += 1
+    executionList.get(e.id).groupId = e.groupId
+    updateLiveStore(executionList.get(e.id))
+    updateLiveStore(sessionList.get(e.sessionId))
+  }
+
+  private def onOperationParsed(e: SparkListenerThriftServerOperationParsed): Unit = {
+    executionList.get(e.id).executePlan = e.executionPlan
+    executionList.get(e.id).state = ExecutionState.COMPILED
+    updateLiveStore(executionList.get(e.id))
+  }
+
+  private def onOperationCanceled(e: SparkListenerThriftServerOperationCanceled): Unit = {
+    executionList.get(e.id).finishTimestamp = e.finishTime
+    executionList.get(e.id).state = ExecutionState.CANCELED
+    updateLiveStore(executionList.get(e.id))
+  }
+
+  private def onOperationError(e: SparkListenerThriftServerOperationError): Unit = {
+    executionList.get(e.id).finishTimestamp = e.finishTime
+    executionList.get(e.id).detail = e.errorMsg
+    executionList.get(e.id).state = ExecutionState.FAILED
+    updateLiveStore(executionList.get(e.id))
+  }
+
+  private def onOperationFinished(e: SparkListenerThriftServerOperationFinish): Unit = {
+    executionList.get(e.id).finishTimestamp = e.finishTime
+    executionList.get(e.id).state = ExecutionState.FINISHED
+    updateLiveStore(executionList.get(e.id))
+  }
+
+  private def onOperationClosed(e: SparkListenerThriftServerOperationClosed): Unit = {
+    executionList.get(e.id).closeTimestamp = e.closeTime
+    executionList.get(e.id).state = ExecutionState.CLOSED
+    updateStoreWithTriggerEnabled(executionList.get(e.id))
+    executionList.remove(e.id)
+  }
+
+  // Update both live and history stores. Trigger is enabled by default, hence
+  // it will cleanup the entity which exceeds the threshold.
+  def updateStoreWithTriggerEnabled(entity: LiveEntity): Unit = {
+    entity.write(kvstore, System.nanoTime(), checkTriggers = true)
+  }
+
+  // Update only live stores. If trigger is enabled, it will cleanup entity
+  // which exceeds the threshold.
+  def updateLiveStore(entity: LiveEntity, trigger: Boolean = false): Unit = {
+    val now = System.nanoTime()
+    if (live && liveUpdatePeriodNs >= 0 && now - entity.lastWriteTime > liveUpdatePeriodNs) {
+      entity.write(kvstore, now, checkTriggers = trigger)
+    }
+  }
+
+  /** Go through all `LiveEntity`s and use `entityFlushFunc(entity)` to flush them. */
+  private def flush(entityFlushFunc: LiveEntity => Unit): Unit = {
+    sessionList.values.asScala.foreach(entityFlushFunc)
+    executionList.values.asScala.foreach(entityFlushFunc)
+  }
+
+  private def getOrCreateSession(
+     sessionId: String,
+     startTime: Long,
+     ip: String,
+     username: String): LiveSessionData = {
+    sessionList.computeIfAbsent(sessionId,
+      (_: String) => new LiveSessionData(sessionId, startTime, ip, username))
+  }
+
+  private def getOrCreateExecution(
+    execId: String, statement: String,
+    sessionId: String, startTimestamp: Long,
+    userName: String): LiveExecutionData = {
+    executionList.computeIfAbsent(execId,
+      (_: String) => new LiveExecutionData(execId, statement, sessionId, startTimestamp, userName))
+  }
+
+  private def cleanupExecutions(count: Long): Unit = {
+    val countToDelete = calculateNumberToRemove(count, retainedStatements)
+    if (countToDelete <= 0L) {
+      return
+    }
+    val view = kvstore.view(classOf[ExecutionInfo]).index("finishTime").first(0L)
+    val toDelete = KVUtils.viewToSeq(view, countToDelete.toInt) { j =>
+      j.finishTimestamp != 0
+    }
+    toDelete.foreach { j => kvstore.delete(j.getClass, j.execId) }
+  }
+
+  private def cleanupSession(count: Long): Unit = {
+    val countToDelete = calculateNumberToRemove(count, retainedSessions)
+    if (countToDelete <= 0L) {
+      return
+    }
+    val view = kvstore.view(classOf[SessionInfo]).index("finishTime").first(0L)
+    val toDelete = KVUtils.viewToSeq(view, countToDelete.toInt) { j =>
+      j.finishTimestamp != 0L
+    }
+
+    toDelete.foreach { j => kvstore.delete(j.getClass, j.sessionId) }
+  }
+
+  /**
+   * Remove at least (retainedSize / 10) items to reduce friction. Because tracking may be done
+   * asynchronously, this method may return 0 in case enough items have been deleted already.
+   */
+  private def calculateNumberToRemove(dataSize: Long, retainedSize: Long): Long = {
+    if (dataSize > retainedSize) {
+      math.max(retainedSize / 10L, dataSize - retainedSize)
+    } else {
+      0L
+    }
+  }
+}
+
+private[thriftserver] class LiveExecutionData(
+    val execId: String,
+    val statement: String,
+    val sessionId: String,
+    val startTimestamp: Long,
+    val userName: String) extends LiveEntity {
+
+    var finishTimestamp: Long = 0L
+    var closeTimestamp: Long = 0L
+    var executePlan: String = ""
+    var detail: String = ""
+    var state: ExecutionState.Value = ExecutionState.STARTED
+    val jobId: ArrayBuffer[String] = ArrayBuffer[String]()
+    var groupId: String = ""
+
+  override protected def doUpdate(): Any = {
+    new ExecutionInfo(
+      execId,
+      statement,
+      sessionId,
+      startTimestamp,
+      userName,
+      finishTimestamp,
+      closeTimestamp,
+      executePlan,
+      detail,
+      state,
+      jobId,
+      groupId)
+  }
+}
+
+private[thriftserver] class LiveSessionData(
+    val sessionId: String,
+    val startTimeStamp: Long,
+    val ip: String,
+    val username: String) extends LiveEntity {
+
+  var finishTimestamp: Long = 0L
+  var totalExecution: Int = 0
+
+  override protected def doUpdate(): Any = {
+    new SessionInfo(
+      sessionId,
+      startTimeStamp,
+      ip,
+      username,
+      finishTimestamp,
+      totalExecution)
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
index 261e8fc912eb9..890a668275b81 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
@@ -17,39 +17,42 @@
 
 package org.apache.spark.sql.hive.thriftserver.ui
 
+import java.net.URLEncoder
+import java.nio.charset.StandardCharsets.UTF_8
 import java.util.Calendar
 import javax.servlet.http.HttpServletRequest
 
-import scala.xml.Node
+import scala.collection.JavaConverters._
+import scala.xml.{Node, Unparsed}
 
 import org.apache.commons.text.StringEscapeUtils
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.{ExecutionInfo, ExecutionState, SessionInfo}
+import org.apache.spark.sql.hive.thriftserver.ui.ToolTips._
 import org.apache.spark.ui._
 import org.apache.spark.ui.UIUtils._
-
+import org.apache.spark.util.Utils
 
 /** Page for Spark Web UI that shows statistics of the thrift server */
 private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("") with Logging {
-
-  private val listener = parent.listener
-  private val startTime = Calendar.getInstance().getTime()
-  private val emptyCell = "-"
+  private val store = parent.store
+  private val startTime = parent.startTime
 
   /** Render the page */
   def render(request: HttpServletRequest): Seq[Node] = {
-    val content =
-      listener.synchronized { // make sure all parts in this page are consistent
-        generateBasicStats() ++
-        <br/> ++
+    val content = store.synchronized { // make sure all parts in this page are consistent
+      generateBasicStats() ++
+          <br/> ++
         <h4>
-        {listener.getOnlineSessionNum} session(s) are online,
-        running {listener.getTotalRunning} SQL statement(s)
+          {store.getOnlineSessionNum}
+          session(s) are online,
+          running
+          {store.getTotalRunning}
+          SQL statement(s)
         </h4> ++
         generateSessionStatsTable(request) ++
         generateSQLStatsTable(request)
-      }
+    }
     UIUtils.headerSparkPage(request, "JDBC/ODBC Server", content, parent)
   }
 
@@ -68,53 +71,301 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
 
   /** Generate stats of batch statements of the thrift server program */
   private def generateSQLStatsTable(request: HttpServletRequest): Seq[Node] = {
-    val numStatement = listener.getExecutionList.size
+
+    val numStatement = store.getExecutionList.size
+
     val table = if (numStatement > 0) {
-      val headerRow = Seq("User", "JobID", "GroupID", "Start Time", "Finish Time", "Close Time",
-        "Execution Time", "Duration", "Statement", "State", "Detail")
-      val dataRows = listener.getExecutionList.sortBy(_.startTimestamp).reverse
-
-      def generateDataRow(info: ExecutionInfo): Seq[Node] = {
-        val jobLink = info.jobId.map { id: String =>
-          <a href={"%s/jobs/job/?id=%s".format(
-            UIUtils.prependBaseUri(request, parent.basePath), id)}>
-            [{id}]
-          </a>
+
+      val sqlTableTag = "sqlstat"
+
+      val parameterOtherTable = request.getParameterMap().asScala
+        .filterNot(_._1.startsWith(sqlTableTag))
+        .map { case (name, vals) =>
+          name + "=" + vals(0)
         }
-        val detail = Option(info.detail).filter(!_.isEmpty).getOrElse(info.executePlan)
-        <tr>
-          <td>{info.userName}</td>
-          <td>
-            {jobLink}
-          </td>
-          <td>{info.groupId}</td>
-          <td>{formatDate(info.startTimestamp)}</td>
-          <td>{if (info.finishTimestamp > 0) formatDate(info.finishTimestamp)}</td>
-          <td>{if (info.closeTimestamp > 0) formatDate(info.closeTimestamp)}</td>
-          <td>{formatDurationOption(Some(info.totalTime(info.finishTimestamp)))}</td>
-          <td>{formatDurationOption(Some(info.totalTime(info.closeTimestamp)))}</td>
-          <td>{info.statement}</td>
-          <td>{info.state}</td>
-          {errorMessageCell(detail)}
-        </tr>
+
+      val parameterSqlTablePage = request.getParameter(s"$sqlTableTag.page")
+      val parameterSqlTableSortColumn = request.getParameter(s"$sqlTableTag.sort")
+      val parameterSqlTableSortDesc = request.getParameter(s"$sqlTableTag.desc")
+      val parameterSqlPageSize = request.getParameter(s"$sqlTableTag.pageSize")
+
+      val sqlTablePage = Option(parameterSqlTablePage).map(_.toInt).getOrElse(1)
+      val sqlTableSortColumn = Option(parameterSqlTableSortColumn).map { sortColumn =>
+        UIUtils.decodeURLParameter(sortColumn)
+      }.getOrElse("Start Time")
+      val sqlTableSortDesc = Option(parameterSqlTableSortDesc).map(_.toBoolean).getOrElse(
+        // New executions should be shown above old executions by default.
+        sqlTableSortColumn == "Start Time"
+      )
+      val sqlTablePageSize = Option(parameterSqlPageSize).map(_.toInt).getOrElse(100)
+
+      try {
+        Some(new SqlStatsPagedTable(
+          request,
+          parent,
+          store.getExecutionList,
+          "sqlserver",
+          UIUtils.prependBaseUri(request, parent.basePath),
+          parameterOtherTable,
+          sqlTableTag,
+          pageSize = sqlTablePageSize,
+          sortColumn = sqlTableSortColumn,
+          desc = sqlTableSortDesc
+        ).table(sqlTablePage))
+      } catch {
+        case e@(_: IllegalArgumentException | _: IndexOutOfBoundsException) =>
+          Some(<div class="alert alert-error">
+            <p>Error while rendering job table:</p>
+            <pre>
+              {Utils.exceptionString(e)}
+            </pre>
+          </div>)
       }
+    } else {
+      None
+    }
+    val content =
+      <span id="sqlstat" class="collapse-aggregated-sqlstat collapse-table"
+            onClick="collapseTable('collapse-aggregated-sqlstat',
+                'aggregated-sqlstat')">
+        <h4>
+          <span class="collapse-table-arrow arrow-open"></span>
+          <a>SQL Statistics ({numStatement})</a>
+        </h4>
+      </span> ++
+        <div class="aggregated-sqlstat collapsible-table">
+          {table.getOrElse("No statistics have been generated yet.")}
+        </div>
+    content
+  }
+
+  /** Generate stats of batch sessions of the thrift server program */
+  private def generateSessionStatsTable(request: HttpServletRequest): Seq[Node] = {
+    val numSessions = store.getSessionList.size
+    val table = if (numSessions > 0) {
+
+      val sessionTableTag = "sessionstat"
+
+      val parameterOtherTable = request.getParameterMap().asScala
+        .filterNot(_._1.startsWith(sessionTableTag))
+        .map { case (name, vals) =>
+          name + "=" + vals(0)
+        }
+
+      val parameterSessionTablePage = request.getParameter(s"$sessionTableTag.page")
+      val parameterSessionTableSortColumn = request.getParameter(s"$sessionTableTag.sort")
+      val parameterSessionTableSortDesc = request.getParameter(s"$sessionTableTag.desc")
+      val parameterSessionPageSize = request.getParameter(s"$sessionTableTag.pageSize")
+
+      val sessionTablePage = Option(parameterSessionTablePage).map(_.toInt).getOrElse(1)
+      val sessionTableSortColumn = Option(parameterSessionTableSortColumn).map { sortColumn =>
+        UIUtils.decodeURLParameter(sortColumn)
+      }.getOrElse("Start Time")
+      val sessionTableSortDesc = Option(parameterSessionTableSortDesc).map(_.toBoolean).getOrElse(
+        // New session should be shown above old session by default.
+        (sessionTableSortColumn == "Start Time")
+      )
+      val sessionTablePageSize = Option(parameterSessionPageSize).map(_.toInt).getOrElse(100)
 
-      Some(UIUtils.listingTable(headerRow, generateDataRow,
-        dataRows, false, None, Seq(null), false))
+      try {
+        Some(new SessionStatsPagedTable(
+          request,
+          parent,
+          store.getSessionList,
+          "sqlserver",
+          UIUtils.prependBaseUri(request, parent.basePath),
+          parameterOtherTable,
+          sessionTableTag,
+          pageSize = sessionTablePageSize,
+          sortColumn = sessionTableSortColumn,
+          desc = sessionTableSortDesc
+        ).table(sessionTablePage))
+      } catch {
+        case e@(_: IllegalArgumentException | _: IndexOutOfBoundsException) =>
+          Some(<div class="alert alert-error">
+            <p>Error while rendering job table:</p>
+            <pre>
+              {Utils.exceptionString(e)}
+            </pre>
+          </div>)
+      }
     } else {
       None
     }
 
     val content =
-      <h5 id="sqlstat">SQL Statistics ({numStatement})</h5> ++
-        <div>
-          <ul class="unstyled">
-            {table.getOrElse("No statistics have been generated yet.")}
-          </ul>
-        </div>
+    <span id="sessionstat" class="collapse-aggregated-sessionstat collapse-table"
+          onClick="collapseTable('collapse-aggregated-sessionstat',
+                'aggregated-sessionstat')">
+      <h4>
+        <span class="collapse-table-arrow arrow-open"></span>
+        <a>Session Statistics ({numSessions})</a>
+      </h4>
+    </span> ++
+      <div class="aggregated-sessionstat collapsible-table">
+        {table.getOrElse("No statistics have been generated yet.")}
+      </div>
 
     content
   }
+}
+
+private[ui] class SqlStatsPagedTable(
+    request: HttpServletRequest,
+    parent: ThriftServerTab,
+    data: Seq[ExecutionInfo],
+    subPath: String,
+    basePath: String,
+    parameterOtherTable: Iterable[String],
+    sqlStatsTableTag: String,
+    pageSize: Int,
+    sortColumn: String,
+    desc: Boolean) extends PagedTable[SqlStatsTableRow] {
+
+  override val dataSource = new SqlStatsTableDataSource(data, pageSize, sortColumn, desc)
+
+  private val parameterPath = s"$basePath/$subPath/?${parameterOtherTable.mkString("&")}"
+
+  override def tableId: String = sqlStatsTableTag
+
+  override def tableCssClass: String =
+    "table table-bordered table-condensed table-striped " +
+      "table-head-clickable table-cell-width-limited"
+
+  override def pageLink(page: Int): String = {
+    val encodedSortColumn = URLEncoder.encode(sortColumn, UTF_8.name())
+    parameterPath +
+      s"&$pageNumberFormField=$page" +
+      s"&$sqlStatsTableTag.sort=$encodedSortColumn" +
+      s"&$sqlStatsTableTag.desc=$desc" +
+      s"&$pageSizeFormField=$pageSize"
+  }
+
+  override def pageSizeFormField: String = s"$sqlStatsTableTag.pageSize"
+
+  override def pageNumberFormField: String = s"$sqlStatsTableTag.page"
+
+  override def goButtonFormPath: String = {
+    val encodedSortColumn = URLEncoder.encode(sortColumn, UTF_8.name())
+    s"$parameterPath&$sqlStatsTableTag.sort=$encodedSortColumn&$sqlStatsTableTag.desc=$desc"
+  }
+
+  override def headers: Seq[Node] = {
+    val sqlTableHeaders = Seq("User", "JobID", "GroupID", "Start Time", "Finish Time",
+      "Close Time", "Execution Time", "Duration", "Statement", "State", "Detail")
+
+    val tooltips = Seq(None, None, None, None, Some(THRIFT_SERVER_FINISH_TIME),
+      Some(THRIFT_SERVER_CLOSE_TIME), Some(THRIFT_SERVER_EXECUTION),
+      Some(THRIFT_SERVER_DURATION), None, None, None)
+
+    assert(sqlTableHeaders.length == tooltips.length)
+
+    val headerRow: Seq[Node] = {
+      sqlTableHeaders.zip(tooltips).map { case (header, tooltip) =>
+        if (header == sortColumn) {
+          val headerLink = Unparsed(
+            parameterPath +
+              s"&$sqlStatsTableTag.sort=${URLEncoder.encode(header, UTF_8.name())}" +
+              s"&$sqlStatsTableTag.desc=${!desc}" +
+              s"&$sqlStatsTableTag.pageSize=$pageSize" +
+              s"#$sqlStatsTableTag")
+          val arrow = if (desc) "&#x25BE;" else "&#x25B4;" // UP or DOWN
+
+          if (tooltip.nonEmpty) {
+            <th>
+              <a href={headerLink}>
+                <span data-toggle="tooltip" title={tooltip.get}>
+                  {header}&nbsp;{Unparsed(arrow)}
+                </span>
+              </a>
+            </th>
+          } else {
+            <th>
+              <a href={headerLink}>
+                {header}&nbsp;{Unparsed(arrow)}
+              </a>
+            </th>
+          }
+        } else {
+          val headerLink = Unparsed(
+            parameterPath +
+              s"&$sqlStatsTableTag.sort=${URLEncoder.encode(header, UTF_8.name())}" +
+              s"&$sqlStatsTableTag.pageSize=$pageSize" +
+              s"#$sqlStatsTableTag")
+
+          if(tooltip.nonEmpty) {
+            <th>
+              <a href={headerLink}>
+                <span data-toggle="tooltip" title={tooltip.get}>
+                  {header}
+                </span>
+              </a>
+            </th>
+          } else {
+            <th>
+              <a href={headerLink}>
+                {header}
+              </a>
+            </th>
+          }
+        }
+      }
+    }
+    <thead>
+      {headerRow}
+    </thead>
+  }
+
+  override def row(sqlStatsTableRow: SqlStatsTableRow): Seq[Node] = {
+    val info = sqlStatsTableRow.executionInfo
+    val startTime = info.startTimestamp
+    val executionTime = sqlStatsTableRow.executionTime
+    val duration = sqlStatsTableRow.duration
+
+    def jobLinks(jobData: Seq[String]): Seq[Node] = {
+      jobData.map { jobId =>
+        <a href={jobURL(request, jobId)}>[{jobId.toString}]</a>
+      }
+    }
+
+    <tr>
+      <td>
+        {info.userName}
+      </td>
+      <td>
+        {jobLinks(sqlStatsTableRow.jobId)}
+      </td>
+      <td>
+        {info.groupId}
+      </td>
+      <td >
+        {UIUtils.formatDate(startTime)}
+      </td>
+      <td>
+        {if (info.finishTimestamp > 0) formatDate(info.finishTimestamp)}
+      </td>
+      <td>
+        {if (info.closeTimestamp > 0) formatDate(info.closeTimestamp)}
+      </td>
+      <!-- Returns a human-readable string representing a duration such as "5 second 35 ms"-->
+      <td >
+        {formatDurationVerbose(executionTime)}
+      </td>
+      <td >
+        {formatDurationVerbose(duration)}
+      </td>
+      <td>
+        <span class="description-input">
+          {info.statement}
+        </span>
+      </td>
+      <td>
+        {info.state}
+      </td>
+      {errorMessageCell(sqlStatsTableRow.detail)}
+    </tr>
+  }
+
 
   private def errorMessageCell(errorMessage: String): Seq[Node] = {
     val isMultiline = errorMessage.indexOf('\n') >= 0
@@ -124,73 +375,236 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
       } else {
         errorMessage
       })
-    val details = if (isMultiline) {
-      // scalastyle:off
-      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
-            class="expand-details">
-        + details
-      </span> ++
-      <div class="stacktrace-details collapsed">
-        <pre>{errorMessage}</pre>
-      </div>
-      // scalastyle:on
-    } else {
-      ""
-    }
-    <td>{errorSummary}{details}</td>
+    val details = detailsUINode(isMultiline, errorMessage)
+    <td>
+      {errorSummary}{details}
+    </td>
   }
 
-  /** Generate stats of batch sessions of the thrift server program */
-  private def generateSessionStatsTable(request: HttpServletRequest): Seq[Node] = {
-    val sessionList = listener.getSessionList
-    val numBatches = sessionList.size
-    val table = if (numBatches > 0) {
-      val dataRows = sessionList.sortBy(_.startTimestamp).reverse
-      val headerRow = Seq("User", "IP", "Session ID", "Start Time", "Finish Time", "Duration",
-        "Total Execute")
-      def generateDataRow(session: SessionInfo): Seq[Node] = {
-        val sessionLink = "%s/%s/session/?id=%s".format(
-          UIUtils.prependBaseUri(request, parent.basePath), parent.prefix, session.sessionId)
-        <tr>
-          <td> {session.userName} </td>
-          <td> {session.ip} </td>
-          <td> <a href={sessionLink}> {session.sessionId} </a> </td>
-          <td> {formatDate(session.startTimestamp)} </td>
-          <td> {if (session.finishTimestamp > 0) formatDate(session.finishTimestamp)} </td>
-          <td> {formatDurationOption(Some(session.totalTime))} </td>
-          <td> {session.totalExecution.toString} </td>
-        </tr>
-      }
-      Some(UIUtils.listingTable(headerRow, generateDataRow, dataRows, true, None, Seq(null), false))
-    } else {
-      None
-    }
+  private def jobURL(request: HttpServletRequest, jobId: String): String =
+    "%s/jobs/job/?id=%s".format(UIUtils.prependBaseUri(request, parent.basePath), jobId)
+}
 
-    val content =
-      <h5 id="sessionstat">Session Statistics ({numBatches})</h5> ++
-      <div>
-        <ul class="unstyled">
-          {table.getOrElse("No statistics have been generated yet.")}
-        </ul>
-      </div>
+private[ui] class SessionStatsPagedTable(
+    request: HttpServletRequest,
+    parent: ThriftServerTab,
+    data: Seq[SessionInfo],
+    subPath: String,
+    basePath: String,
+    parameterOtherTable: Iterable[String],
+    sessionStatsTableTag: String,
+    pageSize: Int,
+    sortColumn: String,
+    desc: Boolean) extends PagedTable[SessionInfo] {
 
-    content
+  override val dataSource = new SessionStatsTableDataSource(data, pageSize, sortColumn, desc)
+
+  private val parameterPath = s"$basePath/$subPath/?${parameterOtherTable.mkString("&")}"
+
+  override def tableId: String = sessionStatsTableTag
+
+  override def tableCssClass: String =
+    "table table-bordered table-condensed table-striped " +
+      "table-head-clickable table-cell-width-limited"
+
+  override def pageLink(page: Int): String = {
+    val encodedSortColumn = URLEncoder.encode(sortColumn, UTF_8.name())
+    parameterPath +
+      s"&$pageNumberFormField=$page" +
+      s"&$sessionStatsTableTag.sort=$encodedSortColumn" +
+      s"&$sessionStatsTableTag.desc=$desc" +
+      s"&$pageSizeFormField=$pageSize"
   }
 
+  override def pageSizeFormField: String = s"$sessionStatsTableTag.pageSize"
 
-  /**
-   * Returns a human-readable string representing a duration such as "5 second 35 ms"
-   */
-  private def formatDurationOption(msOption: Option[Long]): String = {
-    msOption.map(formatDurationVerbose).getOrElse(emptyCell)
+  override def pageNumberFormField: String = s"$sessionStatsTableTag.page"
+
+  override def goButtonFormPath: String = {
+    val encodedSortColumn = URLEncoder.encode(sortColumn, UTF_8.name())
+    s"$parameterPath&$sessionStatsTableTag.sort=$encodedSortColumn&$sessionStatsTableTag.desc=$desc"
   }
 
-  /** Generate HTML table from string data */
-  private def listingTable(headers: Seq[String], data: Seq[Seq[String]]) = {
-    def generateDataRow(data: Seq[String]): Seq[Node] = {
-      <tr> {data.map(d => <td>{d}</td>)} </tr>
+  override def headers: Seq[Node] = {
+    val sessionTableHeaders =
+      Seq("User", "IP", "Session ID", "Start Time", "Finish Time", "Duration", "Total Execute")
+
+    val tooltips = Seq(None, None, None, None, None, Some(THRIFT_SESSION_DURATION),
+      Some(THRIFT_SESSION_TOTAL_EXECUTE))
+    assert(sessionTableHeaders.length == tooltips.length)
+    val colWidthAttr = s"${100.toDouble / sessionTableHeaders.size}%"
+
+    val headerRow: Seq[Node] = {
+      sessionTableHeaders.zip(tooltips).map { case (header, tooltip) =>
+        if (header == sortColumn) {
+          val headerLink = Unparsed(
+            parameterPath +
+              s"&$sessionStatsTableTag.sort=${URLEncoder.encode(header, UTF_8.name())}" +
+              s"&$sessionStatsTableTag.desc=${!desc}" +
+              s"&$sessionStatsTableTag.pageSize=$pageSize" +
+              s"#$sessionStatsTableTag")
+          val arrow = if (desc) "&#x25BE;" else "&#x25B4;" // UP or DOWN
+            <th width={colWidthAttr}>
+              <a href={headerLink}>
+                {
+                  if (tooltip.nonEmpty) {
+                    <span data-toggle="tooltip" data-placement="top" title={tooltip.get}>
+                      {header}&nbsp;{Unparsed(arrow)}
+                    </span>
+                  } else {
+                    <span>
+                      {header}&nbsp;{Unparsed(arrow)}
+                    </span>
+                  }
+                }
+              </a>
+            </th>
+
+        } else {
+          val headerLink = Unparsed(
+            parameterPath +
+              s"&$sessionStatsTableTag.sort=${URLEncoder.encode(header, UTF_8.name())}" +
+              s"&$sessionStatsTableTag.pageSize=$pageSize" +
+              s"#$sessionStatsTableTag")
+
+            <th width={colWidthAttr}>
+              <a href={headerLink}>
+                {
+                  if (tooltip.nonEmpty) {
+                    <span data-toggle="tooltip" data-placement="top" title={tooltip.get}>
+                      {header}
+                    </span>
+                  } else {
+                    {header}
+                  }
+                }
+              </a>
+            </th>
+        }
+      }
     }
-    UIUtils.listingTable(headers, generateDataRow, data, fixedWidth = true)
+    <thead>
+      {headerRow}
+    </thead>
+  }
+
+  override def row(session: SessionInfo): Seq[Node] = {
+    val sessionLink = "%s/%s/session/?id=%s".format(
+      UIUtils.prependBaseUri(request, parent.basePath), parent.prefix, session.sessionId)
+    <tr>
+      <td> {session.userName} </td>
+      <td> {session.ip} </td>
+      <td> <a href={sessionLink}> {session.sessionId} </a> </td>
+      <td> {formatDate(session.startTimestamp)} </td>
+      <td> {if (session.finishTimestamp > 0) formatDate(session.finishTimestamp)} </td>
+      <td> {formatDurationVerbose(session.totalTime)} </td>
+      <td> {session.totalExecution.toString} </td>
+    </tr>
   }
 }
 
+  private[ui] class SqlStatsTableRow(
+    val jobId: Seq[String],
+    val duration: Long,
+    val executionTime: Long,
+    val executionInfo: ExecutionInfo,
+    val detail: String)
+
+  private[ui] class SqlStatsTableDataSource(
+    info: Seq[ExecutionInfo],
+    pageSize: Int,
+    sortColumn: String,
+    desc: Boolean) extends PagedDataSource[SqlStatsTableRow](pageSize) {
+
+    // Convert ExecutionInfo to SqlStatsTableRow which contains the final contents to show in
+    // the table so that we can avoid creating duplicate contents during sorting the data
+    private val data = info.map(sqlStatsTableRow).sorted(ordering(sortColumn, desc))
+
+    private var _slicedStartTime: Set[Long] = null
+
+    override def dataSize: Int = data.size
+
+    override def sliceData(from: Int, to: Int): Seq[SqlStatsTableRow] = {
+      val r = data.slice(from, to)
+      _slicedStartTime = r.map(_.executionInfo.startTimestamp).toSet
+      r
+    }
+
+    private def sqlStatsTableRow(executionInfo: ExecutionInfo): SqlStatsTableRow = {
+      val duration = executionInfo.totalTime(executionInfo.closeTimestamp)
+      val executionTime = executionInfo.totalTime(executionInfo.finishTimestamp)
+      val detail = Option(executionInfo.detail).filter(!_.isEmpty)
+        .getOrElse(executionInfo.executePlan)
+      val jobId = executionInfo.jobId.toSeq.sorted
+
+      new SqlStatsTableRow(jobId, duration, executionTime, executionInfo, detail)
+
+    }
+
+    /**
+     * Return Ordering according to sortColumn and desc.
+     */
+    private def ordering(sortColumn: String, desc: Boolean): Ordering[SqlStatsTableRow] = {
+      val ordering: Ordering[SqlStatsTableRow] = sortColumn match {
+        case "User" => Ordering.by(_.executionInfo.userName)
+        case "JobID" => Ordering by (_.jobId.headOption)
+        case "GroupID" => Ordering.by(_.executionInfo.groupId)
+        case "Start Time" => Ordering.by(_.executionInfo.startTimestamp)
+        case "Finish Time" => Ordering.by(_.executionInfo.finishTimestamp)
+        case "Close Time" => Ordering.by(_.executionInfo.closeTimestamp)
+        case "Execution Time" => Ordering.by(_.executionTime)
+        case "Duration" => Ordering.by(_.duration)
+        case "Statement" => Ordering.by(_.executionInfo.statement)
+        case "State" => Ordering.by(_.executionInfo.state)
+        case "Detail" => Ordering.by(_.detail)
+        case unknownColumn => throw new IllegalArgumentException(s"Unknown column: $unknownColumn")
+      }
+      if (desc) {
+        ordering.reverse
+      } else {
+        ordering
+      }
+    }
+
+  }
+
+  private[ui] class SessionStatsTableDataSource(
+    info: Seq[SessionInfo],
+    pageSize: Int,
+    sortColumn: String,
+    desc: Boolean) extends PagedDataSource[SessionInfo](pageSize) {
+
+    // Sorting SessionInfo data
+    private val data = info.sorted(ordering(sortColumn, desc))
+
+    private var _slicedStartTime: Set[Long] = null
+
+    override def dataSize: Int = data.size
+
+    override def sliceData(from: Int, to: Int): Seq[SessionInfo] = {
+      val r = data.slice(from, to)
+      _slicedStartTime = r.map(_.startTimestamp).toSet
+      r
+    }
+
+    /**
+     * Return Ordering according to sortColumn and desc.
+     */
+    private def ordering(sortColumn: String, desc: Boolean): Ordering[SessionInfo] = {
+      val ordering: Ordering[SessionInfo] = sortColumn match {
+        case "User" => Ordering.by(_.userName)
+        case "IP" => Ordering.by(_.ip)
+        case "Session ID" => Ordering.by(_.sessionId)
+        case "Start Time" => Ordering by (_.startTimestamp)
+        case "Finish Time" => Ordering.by(_.finishTimestamp)
+        case "Duration" => Ordering.by(_.totalTime)
+        case "Total Execute" => Ordering.by(_.totalExecution)
+        case unknownColumn => throw new IllegalArgumentException(s"Unknown column: $unknownColumn")
+      }
+      if (desc) {
+        ordering.reverse
+      } else {
+        ordering
+      }
+    }
+  }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
index 81df1304085e8..c46c3d6b68a43 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
@@ -17,34 +17,29 @@
 
 package org.apache.spark.sql.hive.thriftserver.ui
 
-import java.util.Calendar
 import javax.servlet.http.HttpServletRequest
 
+import scala.collection.JavaConverters._
 import scala.xml.Node
 
-import org.apache.commons.text.StringEscapeUtils
-
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.{ExecutionInfo, ExecutionState}
 import org.apache.spark.ui._
 import org.apache.spark.ui.UIUtils._
+import org.apache.spark.util.Utils
 
 /** Page for Spark Web UI that shows statistics of jobs running in the thrift server */
 private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
   extends WebUIPage("session") with Logging {
-
-  private val listener = parent.listener
-  private val startTime = Calendar.getInstance().getTime()
-  private val emptyCell = "-"
+  val store = parent.store
+  private val startTime = parent.startTime
 
   /** Render the page */
   def render(request: HttpServletRequest): Seq[Node] = {
     val parameterId = request.getParameter("id")
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
-    val content =
-      listener.synchronized { // make sure all parts in this page are consistent
-        val sessionStat = listener.getSession(parameterId).getOrElse(null)
+    val content = store.synchronized { // make sure all parts in this page are consistent
+        val sessionStat = store.getSession(parameterId).getOrElse(null)
         require(sessionStat != null, "Invalid sessionID[" + parameterId + "]")
 
         generateBasicStats() ++
@@ -75,92 +70,72 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
 
   /** Generate stats of batch statements of the thrift server program */
   private def generateSQLStatsTable(request: HttpServletRequest, sessionID: String): Seq[Node] = {
-    val executionList = listener.getExecutionList
+    val executionList = store.getExecutionList
       .filter(_.sessionId == sessionID)
     val numStatement = executionList.size
     val table = if (numStatement > 0) {
-      val headerRow = Seq("User", "JobID", "GroupID", "Start Time", "Finish Time", "Close Time",
-        "Execution Time", "Duration", "Statement", "State", "Detail")
-      val dataRows = executionList.sortBy(_.startTimestamp).reverse
-
-      def generateDataRow(info: ExecutionInfo): Seq[Node] = {
-        val jobLink = info.jobId.map { id: String =>
-          <a href={"%s/jobs/job/?id=%s".format(
-              UIUtils.prependBaseUri(request, parent.basePath), id)}>
-            [{id}]
-          </a>
+
+      val sqlTableTag = "sqlsessionstat"
+
+      val parameterOtherTable = request.getParameterMap().asScala
+        .filterNot(_._1.startsWith(sqlTableTag))
+        .map { case (name, vals) =>
+          name + "=" + vals(0)
         }
-        val detail = Option(info.detail).filter(!_.isEmpty).getOrElse(info.executePlan)
-        <tr>
-          <td>{info.userName}</td>
-          <td>
-            {jobLink}
-          </td>
-          <td>{info.groupId}</td>
-          <td>{formatDate(info.startTimestamp)}</td>
-          <td>{formatDate(info.finishTimestamp)}</td>
-          <td>{formatDate(info.closeTimestamp)}</td>
-          <td>{formatDurationOption(Some(info.totalTime(info.finishTimestamp)))}</td>
-          <td>{formatDurationOption(Some(info.totalTime(info.closeTimestamp)))}</td>
-          <td>{info.statement}</td>
-          <td>{info.state}</td>
-          {errorMessageCell(detail)}
-        </tr>
-      }
 
-      Some(UIUtils.listingTable(headerRow, generateDataRow,
-        dataRows, false, None, Seq(null), false))
+      val parameterSqlTablePage = request.getParameter(s"$sqlTableTag.page")
+      val parameterSqlTableSortColumn = request.getParameter(s"$sqlTableTag.sort")
+      val parameterSqlTableSortDesc = request.getParameter(s"$sqlTableTag.desc")
+      val parameterSqlPageSize = request.getParameter(s"$sqlTableTag.pageSize")
+
+      val sqlTablePage = Option(parameterSqlTablePage).map(_.toInt).getOrElse(1)
+      val sqlTableSortColumn = Option(parameterSqlTableSortColumn).map { sortColumn =>
+        UIUtils.decodeURLParameter(sortColumn)
+      }.getOrElse("Start Time")
+      val sqlTableSortDesc = Option(parameterSqlTableSortDesc).map(_.toBoolean).getOrElse(
+        // New executions should be shown above old executions by default.
+        sqlTableSortColumn == "Start Time"
+      )
+      val sqlTablePageSize = Option(parameterSqlPageSize).map(_.toInt).getOrElse(100)
+
+      try {
+        Some(new SqlStatsPagedTable(
+          request,
+          parent,
+          executionList,
+          "sqlserver/session",
+          UIUtils.prependBaseUri(request, parent.basePath),
+          parameterOtherTable,
+          sqlTableTag,
+          pageSize = sqlTablePageSize,
+          sortColumn = sqlTableSortColumn,
+          desc = sqlTableSortDesc
+        ).table(sqlTablePage))
+      } catch {
+        case e@(_: IllegalArgumentException | _: IndexOutOfBoundsException) =>
+          Some(<div class="alert alert-error">
+            <p>Error while rendering job table:</p>
+            <pre>
+              {Utils.exceptionString(e)}
+            </pre>
+          </div>)
+      }
     } else {
       None
     }
-
     val content =
-      <h5>SQL Statistics</h5> ++
-        <div>
-          <ul class="unstyled">
-            {table.getOrElse("No statistics have been generated yet.")}
-          </ul>
+      <span id="sqlsessionstat" class="collapse-aggregated-sqlsessionstat collapse-table"
+            onClick="collapseTable('collapse-aggregated-sqlsessionstat',
+                'aggregated-sqlsessionstat')">
+        <h4>
+          <span class="collapse-table-arrow arrow-open"></span>
+          <a>SQL Statistics</a>
+        </h4>
+      </span> ++
+        <div class="aggregated-sqlsessionstat collapsible-table">
+          {table.getOrElse("No statistics have been generated yet.")}
         </div>
 
     content
   }
-
-  private def errorMessageCell(errorMessage: String): Seq[Node] = {
-    val isMultiline = errorMessage.indexOf('\n') >= 0
-    val errorSummary = StringEscapeUtils.escapeHtml4(
-      if (isMultiline) {
-        errorMessage.substring(0, errorMessage.indexOf('\n'))
-      } else {
-        errorMessage
-      })
-    val details = if (isMultiline) {
-      // scalastyle:off
-      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
-            class="expand-details">
-        + details
-      </span> ++
-      <div class="stacktrace-details collapsed">
-        <pre>{errorMessage}</pre>
-      </div>
-      // scalastyle:on
-    } else {
-      ""
-    }
-    <td>{errorSummary}{details}</td>
-  }
-
-  /**
-   * Returns a human-readable string representing a duration such as "5 second 35 ms"
-   */
-  private def formatDurationOption(msOption: Option[Long]): String = {
-    msOption.map(formatDurationVerbose).getOrElse(emptyCell)
-  }
-
-  /** Generate HTML table from string data */
-  private def listingTable(headers: Seq[String], data: Seq[Seq[String]]) = {
-    def generateDataRow(data: Seq[String]): Seq[Node] = {
-      <tr> {data.map(d => <td>{d}</td>)} </tr>
-    }
-    UIUtils.listingTable(headers, generateDataRow, data, fixedWidth = true)
-  }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala
index db2066009b351..6d783b1c555a7 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala
@@ -19,28 +19,25 @@ package org.apache.spark.sql.hive.thriftserver.ui
 
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
-import org.apache.spark.sql.hive.thriftserver.ui.ThriftServerTab._
 import org.apache.spark.ui.{SparkUI, SparkUITab}
 
 /**
  * Spark Web UI tab that shows statistics of jobs running in the thrift server.
  * This assumes the given SparkContext has enabled its SparkUI.
  */
-private[thriftserver] class ThriftServerTab(sparkContext: SparkContext)
-  extends SparkUITab(getSparkUI(sparkContext), "sqlserver") with Logging {
-
+private[thriftserver] class ThriftServerTab(
+   val store: HiveThriftServer2AppStatusStore,
+   sparkUI: SparkUI) extends SparkUITab(sparkUI, "sqlserver") with Logging {
   override val name = "JDBC/ODBC Server"
 
-  val parent = getSparkUI(sparkContext)
-  val listener = HiveThriftServer2.listener
+  val parent = sparkUI
+  val startTime = sparkUI.store.applicationInfo().attempts.head.startTime
 
   attachPage(new ThriftServerPage(this))
   attachPage(new ThriftServerSessionPage(this))
   parent.attachTab(this)
-
-  def detach() {
-    getSparkUI(sparkContext).detachTab(this)
+  def detach(): Unit = {
+    sparkUI.detachTab(this)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ToolTips.scala
similarity index 56%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala
rename to sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ToolTips.scala
index 6a66f52c8f732..56ab766f4aabd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ToolTips.scala
@@ -15,17 +15,25 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution.streaming.sources
+package org.apache.spark.sql.hive.thriftserver.ui
 
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
+private[ui] object ToolTips {
+  val THRIFT_SERVER_FINISH_TIME =
+    "Execution finish time, before fetching the results"
 
-// A special `MicroBatchStream` that can get latestOffset with a start offset.
-trait RateControlMicroBatchStream extends MicroBatchStream {
+  val THRIFT_SERVER_CLOSE_TIME =
+    "Operation close time after fetching the results"
 
-  override def latestOffset(): Offset = {
-    throw new IllegalAccessException(
-      "latestOffset should not be called for RateControlMicroBatchReadSupport")
-  }
+  val THRIFT_SERVER_EXECUTION =
+    "Difference between start time and finish time"
+
+  val THRIFT_SERVER_DURATION =
+    "Difference between start time and close time"
+
+  val THRIFT_SESSION_TOTAL_EXECUTE =
+    "Number of operations submitted in this session"
+
+  val THRIFT_SESSION_DURATION =
+    "Elapsed time since session start, or until closed if the session was closed"
 
-  def latestOffset(start: Offset): Offset
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 6e042ac41d9da..6609701be0ede 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -27,12 +27,11 @@ import scala.concurrent.Promise
 import scala.concurrent.duration._
 
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.hive.test.HiveTestUtils
+import org.apache.spark.sql.hive.test.HiveTestJars
 import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -165,7 +164,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
       Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
 
     runCliWithin(3.minute)(
-      "CREATE TABLE hive_test(key INT, val STRING);"
+      "CREATE TABLE hive_test(key INT, val STRING) USING hive;"
         -> "",
       "SHOW TABLES;"
         -> "hive_test",
@@ -202,7 +201,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   }
 
   test("Commands using SerDe provided in --jars") {
-    val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
+    val jarFile = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
 
     val dataFilePath =
       Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
@@ -212,14 +211,14 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
         |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
       """.stripMargin
         -> "",
-      "CREATE TABLE sourceTable (key INT, val STRING);"
+      "CREATE TABLE sourceTable (key INT, val STRING) USING hive;"
         -> "",
       s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTable;"
         -> "",
       "INSERT INTO TABLE t1 SELECT key, val FROM sourceTable;"
         -> "",
-      "SELECT count(key) FROM t1;"
-        -> "5",
+      "SELECT collect_list(array(val)) FROM t1;"
+        -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""",
       "DROP TABLE t1;"
         -> "",
       "DROP TABLE sourceTable;"
@@ -227,6 +226,32 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
     )
   }
 
+  test("SPARK-29022: Commands using SerDe provided in --hive.aux.jars.path") {
+    val dataFilePath =
+      Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
+    val hiveContribJar = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
+    runCliWithin(
+      3.minute,
+      Seq("--conf", s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))(
+      """CREATE TABLE addJarWithHiveAux(key string, val string)
+        |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
+      """.stripMargin
+        -> "",
+      "CREATE TABLE sourceTableForWithHiveAux (key INT, val STRING) USING hive;"
+        -> "",
+      s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTableForWithHiveAux;"
+        -> "",
+      "INSERT INTO TABLE addJarWithHiveAux SELECT key, val FROM sourceTableForWithHiveAux;"
+        -> "",
+      "SELECT collect_list(array(val)) FROM addJarWithHiveAux;"
+        -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""",
+      "DROP TABLE addJarWithHiveAux;"
+        -> "",
+      "DROP TABLE sourceTableForWithHiveAux;"
+        -> ""
+    )
+  }
+
   test("SPARK-11188 Analysis error reporting") {
     runCliWithin(timeout = 2.minute,
       errorResponses = Seq("AnalysisException"))(
@@ -297,12 +322,82 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   }
 
   test("Support hive.aux.jars.path") {
-    val hiveContribJar = HiveTestUtils.getHiveContribJar.getCanonicalPath
+    val hiveContribJar = HiveTestJars.getHiveContribJar().getCanonicalPath
     runCliWithin(
       1.minute,
       Seq("--conf", s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))(
-      s"CREATE TEMPORARY FUNCTION example_max AS '${classOf[UDAFExampleMax].getName}';" -> "",
-      "SELECT example_max(1);" -> "1"
+      "CREATE TEMPORARY FUNCTION example_format AS " +
+        "'org.apache.hadoop.hive.contrib.udf.example.UDFExampleFormat';" -> "",
+      "SELECT example_format('%o', 93);" -> "135"
+    )
+  }
+
+  test("SPARK-28840 test --jars command") {
+    val jarFile = new File("../../sql/hive/src/test/resources/SPARK-21101-1.0.jar").getCanonicalPath
+    runCliWithin(
+      1.minute,
+      Seq("--jars", s"$jarFile"))(
+      "CREATE TEMPORARY FUNCTION testjar AS" +
+        " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "",
+      "SELECT testjar(1,'TEST-SPARK-TEST-jar', 28840);" -> "TEST-SPARK-TEST-jar\t28840"
+    )
+  }
+
+  test("SPARK-28840 test --jars and hive.aux.jars.path command") {
+    val jarFile = new File("../../sql/hive/src/test/resources/SPARK-21101-1.0.jar").getCanonicalPath
+    val hiveContribJar = HiveTestJars.getHiveContribJar().getCanonicalPath
+    runCliWithin(
+      1.minute,
+      Seq("--jars", s"$jarFile", "--conf",
+        s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))(
+      "CREATE TEMPORARY FUNCTION testjar AS" +
+        " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "",
+      "SELECT testjar(1,'TEST-SPARK-TEST-jar', 28840);" -> "TEST-SPARK-TEST-jar\t28840",
+      "CREATE TEMPORARY FUNCTION example_max AS " +
+        "'org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax';" -> "",
+      "SELECT concat_ws(',', 'First', example_max(1234321), 'Third');" -> "First,1234321,Third"
+    )
+  }
+
+  test("SPARK-29022 Commands using SerDe provided in ADD JAR sql") {
+    val dataFilePath =
+      Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
+    val hiveContribJar = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
+    runCliWithin(
+      3.minute)(
+      s"ADD JAR ${hiveContribJar};" -> "",
+      """CREATE TABLE addJarWithSQL(key string, val string)
+        |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
+      """.stripMargin
+        -> "",
+      "CREATE TABLE sourceTableForWithSQL(key INT, val STRING) USING hive;"
+        -> "",
+      s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTableForWithSQL;"
+        -> "",
+      "INSERT INTO TABLE addJarWithSQL SELECT key, val FROM sourceTableForWithSQL;"
+        -> "",
+      "SELECT collect_list(array(val)) FROM addJarWithSQL;"
+        -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""",
+      "DROP TABLE addJarWithSQL;"
+        -> "",
+      "DROP TABLE sourceTableForWithSQL;"
+        -> ""
+    )
+  }
+
+  test("SPARK-26321 Should not split semicolon within quoted string literals") {
+    runCliWithin(3.minute)(
+      """select 'Test1', "^;^";""" -> "Test1\t^;^",
+      """select 'Test2', "\";";""" -> "Test2\t\";",
+      """select 'Test3', "\';";""" -> "Test3\t';",
+      "select concat('Test4', ';');" -> "Test4;"
+    )
+  }
+
+  test("Pad Decimal numbers with trailing zeros to the scale of the column") {
+    runCliWithin(1.minute)(
+      "SELECT CAST(1 AS DECIMAL(38, 18));"
+        -> "1.000000000000000000"
     )
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala
new file mode 100644
index 0000000000000..d056b3b2153cf
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * These classes in this package are intentionally placed to the outer package of spark,
+ * because IsolatedClientLoader leverages Spark classloader for shared classess including
+ * spark package, and the test should fail if Spark initializes these listeners with
+ * IsolatedClientLoader.
+ */
+package test.custom.listener
+
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.streaming.StreamingQueryListener
+import org.apache.spark.sql.util.QueryExecutionListener
+
+class DummyQueryExecutionListener extends QueryExecutionListener {
+  override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {}
+  override def onFailure(funcName: String, qe: QueryExecution, error: Throwable): Unit = {}
+}
+
+class DummyStreamingQueryListener extends StreamingQueryListener {
+  override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = {}
+  override def onQueryProgress(event: StreamingQueryListener.QueryProgressEvent): Unit = {}
+  override def onQueryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = {}
+}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index b7185db2f2ae7..84eed7b2eda22 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -23,6 +23,7 @@ import java.nio.charset.StandardCharsets
 import java.sql.{Date, DriverManager, SQLException, Statement}
 import java.util.{Locale, UUID}
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.{ExecutionContext, Future, Promise}
@@ -34,7 +35,7 @@ import com.google.common.io.Files
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hive.jdbc.HiveDriver
 import org.apache.hive.service.auth.PlainSaslHelper
-import org.apache.hive.service.cli.{FetchOrientation, FetchType, GetInfoType}
+import org.apache.hive.service.cli.{FetchOrientation, FetchType, GetInfoType, RowSet}
 import org.apache.hive.service.cli.thrift.ThriftCLIServiceClient
 import org.apache.thrift.protocol.TBinaryProtocol
 import org.apache.thrift.transport.TSocket
@@ -43,7 +44,7 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.hive.test.HiveTestUtils
+import org.apache.spark.sql.hive.test.HiveTestJars
 import org.apache.spark.sql.internal.StaticSQLConf.HIVE_THRIFT_SERVER_SINGLESESSION
 import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.util.{ThreadUtils, Utils}
@@ -100,7 +101,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
 
       withJdbcStatement("test_16563") { statement =>
         val queries = Seq(
-          "CREATE TABLE test_16563(key INT, val STRING)",
+          "CREATE TABLE test_16563(key INT, val STRING) USING hive",
           s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_16563")
 
         queries.foreach(statement.execute)
@@ -144,10 +145,17 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       def executeTest(hiveList: String): Unit = {
         hiveList.split(";").foreach{ m =>
           val kv = m.split("=")
-          // select "${a}"; ---> avalue
-          val resultSet = statement.executeQuery("select \"${" + kv(0) + "}\"")
+          val k = kv(0)
+          val v = kv(1)
+          val modValue = s"${v}_MOD_VALUE"
+          // select '${a}'; ---> avalue
+          val resultSet = statement.executeQuery(s"select '$${$k}'")
           resultSet.next()
-          assert(resultSet.getString(1) === kv(1))
+          assert(resultSet.getString(1) === v)
+          statement.executeQuery(s"set $k=$modValue")
+          val modResultSet = statement.executeQuery(s"select '$${$k}'")
+          modResultSet.next()
+          assert(modResultSet.getString(1) === s"$modValue")
         }
       }
     }
@@ -157,7 +165,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
     withJdbcStatement("test") { statement =>
       val queries = Seq(
         "SET spark.sql.shuffle.partitions=3",
-        "CREATE TABLE test(key INT, val STRING)",
+        "CREATE TABLE test(key INT, val STRING) USING hive",
         s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test",
         "CACHE TABLE test")
 
@@ -183,7 +191,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
   test("SPARK-3004 regression: result set containing NULL") {
     withJdbcStatement("test_null") { statement =>
       val queries = Seq(
-        "CREATE TABLE test_null(key INT, val STRING)",
+        "CREATE TABLE test_null(key INT, val STRING) USING hive",
         s"LOAD DATA LOCAL INPATH '${TestData.smallKvWithNull}' OVERWRITE INTO TABLE test_null")
 
       queries.foreach(statement.execute)
@@ -203,7 +211,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
   test("SPARK-4292 regression: result set iterator issue") {
     withJdbcStatement("test_4292") { statement =>
       val queries = Seq(
-        "CREATE TABLE test_4292(key INT, val STRING)",
+        "CREATE TABLE test_4292(key INT, val STRING) USING hive",
         s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_4292")
 
       queries.foreach(statement.execute)
@@ -220,7 +228,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
   test("SPARK-4309 regression: Date type support") {
     withJdbcStatement("test_date") { statement =>
       val queries = Seq(
-        "CREATE TABLE test_date(key INT, value STRING)",
+        "CREATE TABLE test_date(key INT, value STRING) USING hive",
         s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_date")
 
       queries.foreach(statement.execute)
@@ -237,7 +245,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
   test("SPARK-4407 regression: Complex type support") {
     withJdbcStatement("test_map") { statement =>
       val queries = Seq(
-        "CREATE TABLE test_map(key INT, value STRING)",
+        "CREATE TABLE test_map(key INT, value STRING) USING hive",
         s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map")
 
       queries.foreach(statement.execute)
@@ -260,7 +268,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
   test("SPARK-12143 regression: Binary type support") {
     withJdbcStatement("test_binary") { statement =>
       val queries = Seq(
-        "CREATE TABLE test_binary(key INT, value STRING)",
+        "CREATE TABLE test_binary(key INT, value STRING) USING hive",
         s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_binary")
 
       queries.foreach(statement.execute)
@@ -286,7 +294,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       { statement =>
 
         val queries = Seq(
-            "CREATE TABLE test_map(key INT, value STRING)",
+            "CREATE TABLE test_map(key INT, value STRING) USING hive",
             s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map",
             "CACHE TABLE test_table AS SELECT key FROM test_map ORDER BY key DESC",
             "CREATE DATABASE db1")
@@ -485,7 +493,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
     withMultipleConnectionJdbcStatement("smallKV", "addJar")(
       {
         statement =>
-          val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
+          val jarFile = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
 
           statement.executeQuery(s"ADD JAR $jarFile")
       },
@@ -493,7 +501,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       {
         statement =>
           val queries = Seq(
-            "CREATE TABLE smallKV(key INT, val STRING)",
+            "CREATE TABLE smallKV(key INT, val STRING) USING hive",
             s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE smallKV",
             """CREATE TABLE addJar(key string)
               |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
@@ -590,7 +598,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
         val dataPath = "../hive/src/test/resources/data/files/kv1.txt"
 
         Seq(
-          "CREATE TABLE test_udtf(key INT, value STRING)",
+          "CREATE TABLE test_udtf(key INT, value STRING) USING hive",
           s"LOAD DATA LOCAL INPATH '$dataPath' OVERWRITE INTO TABLE test_udtf"
         ).foreach(statement.execute)
 
@@ -662,6 +670,107 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       assert(rs.getBigDecimal(1) === new java.math.BigDecimal("1.000000000000000000"))
     }
   }
+
+  test("Support interval type") {
+    withJdbcStatement() { statement =>
+      val rs = statement.executeQuery("SELECT interval 3 months 1 hours")
+      assert(rs.next())
+      assert(rs.getString(1) === "3 months 1 hours")
+    }
+    // Invalid interval value
+    withJdbcStatement() { statement =>
+      val e = intercept[SQLException] {
+        statement.executeQuery("SELECT interval 3 months 1 hou")
+      }
+      assert(e.getMessage.contains("org.apache.spark.sql.catalyst.parser.ParseException"))
+    }
+  }
+
+  test("ThriftCLIService FetchResults FETCH_FIRST, FETCH_NEXT, FETCH_PRIOR") {
+    def checkResult(rows: RowSet, start: Long, end: Long): Unit = {
+      assert(rows.getStartOffset() == start)
+      assert(rows.numRows() == end - start)
+      rows.iterator.asScala.zip((start until end).iterator).foreach { case (row, v) =>
+        assert(row(0).asInstanceOf[Long] === v)
+      }
+    }
+
+    withCLIServiceClient { client =>
+      val user = System.getProperty("user.name")
+      val sessionHandle = client.openSession(user, "")
+
+      val confOverlay = new java.util.HashMap[java.lang.String, java.lang.String]
+      val operationHandle = client.executeStatement(
+        sessionHandle,
+        "SELECT * FROM range(10)",
+        confOverlay) // 10 rows result with sequence 0, 1, 2, ..., 9
+      var rows: RowSet = null
+
+      // Fetch 5 rows with FETCH_NEXT
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_NEXT, 5, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 0, 5) // fetched [0, 5)
+
+      // Fetch another 2 rows with FETCH_NEXT
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_NEXT, 2, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 5, 7) // fetched [5, 7)
+
+      // FETCH_PRIOR 3 rows
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_PRIOR, 3, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 2, 5) // fetched [2, 5)
+
+      // FETCH_PRIOR again will scroll back to 0, and then the returned result
+      // may overlap the results of previous FETCH_PRIOR
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_PRIOR, 3, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 0, 3) // fetched [0, 3)
+
+      // FETCH_PRIOR again will stay at 0
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_PRIOR, 4, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 0, 4) // fetched [0, 4)
+
+      // FETCH_NEXT will continue moving forward from offset 4
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_NEXT, 10, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 4, 10) // fetched [4, 10) until the end of results
+
+      // FETCH_NEXT is at end of results
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_NEXT, 5, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 10, 10) // fetched empty [10, 10) (at end of results)
+
+      // FETCH_NEXT is at end of results again
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_NEXT, 2, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 10, 10) // fetched empty [10, 10) (at end of results)
+
+      // FETCH_PRIOR 1 rows yet again
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_PRIOR, 1, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 9, 10) // fetched [9, 10)
+
+      // FETCH_NEXT will return 0 yet again
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_NEXT, 5, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 10, 10) // fetched empty [10, 10) (at end of results)
+
+      // FETCH_FIRST results from first row
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_FIRST, 3, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 0, 3) // fetch [0, 3)
+
+      // Fetch till the end rows with FETCH_NEXT"
+      rows = client.fetchResults(
+        operationHandle, FetchOrientation.FETCH_NEXT, 1000, FetchType.QUERY_OUTPUT)
+      checkResult(rows, 3, 10) // fetched [3, 10)
+
+      client.closeOperation(operationHandle)
+      client.closeSession(sessionHandle)
+    }
+  }
 }
 
 class SingleSessionSuite extends HiveThriftJdbcTest {
@@ -681,6 +790,8 @@ class SingleSessionSuite extends HiveThriftJdbcTest {
         Seq(
           "SET foo=bar",
           s"ADD JAR $jarURL",
+          "CREATE TABLE test_udtf(key INT, value STRING) USING hive",
+          s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_udtf",
           s"""CREATE TEMPORARY FUNCTION udtf_count2
               |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
            """.stripMargin
@@ -707,6 +818,16 @@ class SingleSessionSuite extends HiveThriftJdbcTest {
 
           assert(rs2.next())
           assert(rs2.getString(1) === "Usage: N/A.")
+
+          val rs3 = statement.executeQuery(
+            "SELECT key, cc FROM test_udtf LATERAL VIEW udtf_count2(value) dd AS cc")
+          assert(rs3.next())
+          assert(rs3.getInt(1) === 165)
+          assert(rs3.getInt(2) === 5)
+
+          assert(rs3.next())
+          assert(rs3.getInt(1) === 165)
+          assert(rs3.getInt(2) === 5)
         } finally {
           statement.executeQuery("DROP TEMPORARY FUNCTION udtf_count2")
         }
@@ -770,7 +891,7 @@ class HiveThriftHttpServerSuite extends HiveThriftJdbcTest {
     withJdbcStatement("test") { statement =>
       val queries = Seq(
         "SET spark.sql.shuffle.partitions=3",
-        "CREATE TABLE test(key INT, val STRING)",
+        "CREATE TABLE test(key INT, val STRING) USING hive",
         s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test",
         "CACHE TABLE test")
 
@@ -820,7 +941,7 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
     s"jdbc:hive2://localhost:$serverPort/?${hiveConfList}#${hiveVarList}"
   }
 
-  def withMultipleConnectionJdbcStatement(tableNames: String*)(fs: (Statement => Unit)*) {
+  def withMultipleConnectionJdbcStatement(tableNames: String*)(fs: (Statement => Unit)*): Unit = {
     val user = System.getProperty("user.name")
     val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
     val statements = connections.map(_.createStatement())
@@ -841,7 +962,7 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
     }
   }
 
-  def withDatabase(dbNames: String*)(fs: (Statement => Unit)*) {
+  def withDatabase(dbNames: String*)(fs: (Statement => Unit)*): Unit = {
     val user = System.getProperty("user.name")
     val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
     val statements = connections.map(_.createStatement())
@@ -857,7 +978,7 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
     }
   }
 
-  def withJdbcStatement(tableNames: String*)(f: Statement => Unit) {
+  def withJdbcStatement(tableNames: String*)(f: Statement => Unit): Unit = {
     withMultipleConnectionJdbcStatement(tableNames: _*)(f)
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala
new file mode 100644
index 0000000000000..ce610098156f3
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.sql.{DriverManager, Statement}
+
+import scala.concurrent.duration._
+import scala.util.{Random, Try}
+
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait SharedThriftServer extends SharedSparkSession {
+
+  private var hiveServer2: HiveThriftServer2 = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    // Chooses a random port between 10000 and 19999
+    var listeningPort = 10000 + Random.nextInt(10000)
+
+    // Retries up to 3 times with different port numbers if the server fails to start
+    (1 to 3).foldLeft(Try(startThriftServer(listeningPort, 0))) { case (started, attempt) =>
+      started.orElse {
+        listeningPort += 1
+        Try(startThriftServer(listeningPort, attempt))
+      }
+    }.recover {
+      case cause: Throwable =>
+        throw cause
+    }.get
+    logInfo("HiveThriftServer2 started successfully")
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      hiveServer2.stop()
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  protected def withJdbcStatement(fs: (Statement => Unit)*): Unit = {
+    val user = System.getProperty("user.name")
+
+    val serverPort = hiveServer2.getHiveConf.get(ConfVars.HIVE_SERVER2_THRIFT_PORT.varname)
+    val connections =
+      fs.map { _ => DriverManager.getConnection(s"jdbc:hive2://localhost:$serverPort", user, "") }
+    val statements = connections.map(_.createStatement())
+
+    try {
+      statements.zip(fs).foreach { case (s, f) => f(s) }
+    } finally {
+      statements.foreach(_.close())
+      connections.foreach(_.close())
+    }
+  }
+
+  private def startThriftServer(port: Int, attempt: Int): Unit = {
+    logInfo(s"Trying to start HiveThriftServer2: port=$port, attempt=$attempt")
+    val sqlContext = spark.newSession().sqlContext
+    sqlContext.setConf(ConfVars.HIVE_SERVER2_THRIFT_PORT.varname, port.toString)
+    hiveServer2 = HiveThriftServer2.startWithContext(sqlContext)
+
+    // Wait for thrift server to be ready to serve the query, via executing simple query
+    // till the query succeeds. See SPARK-30345 for more details.
+    eventually(timeout(30.seconds), interval(1.seconds)) {
+      withJdbcStatement { _.execute("SELECT 1") }
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index 21870ffd463ec..f7ee3e0a46cd1 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -231,4 +231,20 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
       assert(!rs.next())
     }
   }
+
+  test("GetTypeInfo Thrift API") {
+    def checkResult(rs: ResultSet, typeNames: Seq[String]): Unit = {
+      for (i <- typeNames.indices) {
+        assert(rs.next())
+        assert(rs.getString("TYPE_NAME") === typeNames(i))
+      }
+      // Make sure there are no more elements
+      assert(!rs.next())
+    }
+
+    withJdbcStatement() { statement =>
+      val metaData = statement.getConnection.getMetaData
+      checkResult(metaData.getTypeInfo, ThriftserverShimUtils.supportedType().map(_.getName))
+    }
+  }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala
new file mode 100644
index 0000000000000..ffd1fc48f19fe
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import test.custom.listener.{DummyQueryExecutionListener, DummyStreamingQueryListener}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.launcher.SparkLauncher
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.hive.HiveUtils.{HIVE_METASTORE_JARS, HIVE_METASTORE_VERSION}
+import org.apache.spark.sql.hive.test.TestHiveContext
+import org.apache.spark.sql.internal.StaticSQLConf.{QUERY_EXECUTION_LISTENERS, STREAMING_QUERY_LISTENERS, WAREHOUSE_PATH}
+
+class SparkSQLEnvSuite extends SparkFunSuite {
+  test("SPARK-29604 external listeners should be initialized with Spark classloader") {
+    withSystemProperties(
+      QUERY_EXECUTION_LISTENERS.key -> classOf[DummyQueryExecutionListener].getCanonicalName,
+      STREAMING_QUERY_LISTENERS.key -> classOf[DummyStreamingQueryListener].getCanonicalName,
+      WAREHOUSE_PATH.key -> TestHiveContext.makeWarehouseDir().toURI.getPath,
+      // The issue occured from "maven" and list of custom jars, but providing list of custom
+      // jars to initialize HiveClient isn't trivial, so just use "maven".
+      HIVE_METASTORE_JARS.key -> "maven",
+      HIVE_METASTORE_VERSION.key -> null,
+      SparkLauncher.SPARK_MASTER -> "local[2]",
+      "spark.app.name" -> "testApp") {
+
+      try {
+        SparkSQLEnv.init()
+
+        val session = SparkSession.getActiveSession
+        assert(session.isDefined)
+        assert(session.get.listenerManager.listListeners()
+          .exists(_.isInstanceOf[DummyQueryExecutionListener]))
+        assert(session.get.streams.listListeners()
+          .exists(_.isInstanceOf[DummyStreamingQueryListener]))
+      } finally {
+        SparkSQLEnv.stop()
+      }
+    }
+  }
+
+  private def withSystemProperties(pairs: (String, String)*)(f: => Unit): Unit = {
+    def setProperties(properties: Seq[(String, String)]): Unit = {
+      properties.foreach { case (key, value) =>
+        if (value != null) {
+          System.setProperty(key, value)
+        } else {
+          System.clearProperty(key)
+        }
+      }
+    }
+
+    val oldValues = pairs.map { kv => kv._1 -> System.getProperty(kv._1) }
+    try {
+      setProperties(pairs)
+      f
+    } finally {
+      setProperties(oldValues)
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
index f198372a4c998..a63b5dac0aac3 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
@@ -261,10 +261,10 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       }
     }
 
-    // We do not fully support interval type
-    ignore(s"$version get interval type") {
+    test(s"$version get interval type") {
       testExecuteStatementWithProtocolVersion(version, "SELECT interval '1' year '2' day") { rs =>
         assert(rs.next())
+        assert(rs.getString(1) === "1 years 2 days")
       }
     }
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 1f7b3feae47b5..d9ac9ab441f0c 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -18,17 +18,16 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import java.io.File
-import java.sql.{DriverManager, SQLException, Statement, Timestamp}
-import java.util.Locale
+import java.sql.{SQLException, Statement, Timestamp}
+import java.util.{Locale, MissingFormatArgumentException}
 
-import scala.util.{Random, Try}
 import scala.util.control.NonFatal
 
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.apache.hive.service.cli.HiveSQLException
-import org.scalatest.Ignore
+import org.apache.commons.lang3.exception.ExceptionUtils
 
-import org.apache.spark.sql.{AnalysisException, SQLQueryTestSuite}
+import org.apache.spark.SparkException
+import org.apache.spark.sql.SQLQueryTestSuite
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.util.fileToString
 import org.apache.spark.sql.execution.HiveResult
 import org.apache.spark.sql.internal.SQLConf
@@ -36,88 +35,67 @@ import org.apache.spark.sql.types._
 
 /**
  * Re-run all the tests in SQLQueryTestSuite via Thrift Server.
- * Note that this TestSuite does not support maven.
+ *
+ * To run the entire test suite:
+ * {{{
+ *   build/sbt "hive-thriftserver/test-only *ThriftServerQueryTestSuite" -Phive-thriftserver
+ * }}}
+ *
+ * This test suite won't generate golden files. To re-generate golden files for entire suite, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite"
+ * }}}
  *
  * TODO:
  *   1. Support UDF testing.
  *   2. Support DESC command.
  *   3. Support SHOW command.
  */
-@Ignore
-class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
-
-  private var hiveServer2: HiveThriftServer2 = _
-
-  override def beforeEach(): Unit = {
-    // Chooses a random port between 10000 and 19999
-    var listeningPort = 10000 + Random.nextInt(10000)
-
-    // Retries up to 3 times with different port numbers if the server fails to start
-    (1 to 3).foldLeft(Try(startThriftServer(listeningPort, 0))) { case (started, attempt) =>
-      started.orElse {
-        listeningPort += 1
-        Try(startThriftServer(listeningPort, attempt))
-      }
-    }.recover {
-      case cause: Throwable =>
-        throw cause
-    }.get
-    logInfo("HiveThriftServer2 started successfully")
-  }
-
-  override def afterEach(): Unit = {
-    hiveServer2.stop()
-  }
-
-  override val isTestWithConfigSets = false
+class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServer {
 
   /** List of test cases to ignore, in lower cases. */
-  override def blackList: Set[String] = Set(
-    "blacklist.sql",   // Do NOT remove this one. It is here to test the blacklist functionality.
+  override def blackList: Set[String] = super.blackList ++ Set(
     // Missing UDF
-    "pgSQL/boolean.sql",
-    "pgSQL/case.sql",
+    "postgreSQL/boolean.sql",
+    "postgreSQL/case.sql",
     // SPARK-28624
     "date.sql",
-    // SPARK-28619
-    "pgSQL/aggregates_part1.sql",
-    "group-by.sql",
     // SPARK-28620
-    "pgSQL/float4.sql",
+    "postgreSQL/float4.sql",
     // SPARK-28636
     "decimalArithmeticOperations.sql",
     "literals.sql",
     "subquery/scalar-subquery/scalar-subquery-predicate.sql",
     "subquery/in-subquery/in-limit.sql",
+    "subquery/in-subquery/in-group-by.sql",
     "subquery/in-subquery/simple-in.sql",
     "subquery/in-subquery/in-order-by.sql",
-    "subquery/in-subquery/in-set-operations.sql",
-    // SPARK-28637
-    "cast.sql",
-    "ansi/interval.sql"
+    "subquery/in-subquery/in-set-operations.sql"
   )
 
   override def runQueries(
       queries: Seq[String],
       testCase: TestCase,
-      configSet: Option[Seq[(String, String)]]): Unit = {
+      configSet: Seq[(String, String)]): Unit = {
     // We do not test with configSet.
     withJdbcStatement { statement =>
 
       loadTestData(statement)
 
+      configSet.foreach { case (k, v) =>
+        statement.execute(s"SET $k = $v")
+      }
+
       testCase match {
-        case _: PgSQLTest =>
-          // PostgreSQL enabled cartesian product by default.
-          statement.execute(s"SET ${SQLConf.CROSS_JOINS_ENABLED.key} = true")
-          statement.execute(s"SET ${SQLConf.ANSI_SQL_PARSER.key} = true")
-          statement.execute(s"SET ${SQLConf.PREFER_INTEGRAL_DIVISION.key} = true")
+        case _: PgSQLTest | _: AnsiTest =>
+          statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = true")
         case _ =>
+          statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = false")
       }
 
       // Run the SQL queries preparing them for comparison.
       val outputs: Seq[QueryOutput] = queries.map { sql =>
-        val output = getNormalizedResult(statement, sql)
+        val (_, output) = handleExceptions(getNormalizedResult(statement, sql))
         // We might need to do some query canonicalization in the future.
         QueryOutput(
           sql = sql,
@@ -128,7 +106,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
       // Read back the golden file.
       val expectedOutputs: Seq[QueryOutput] = {
         val goldenOutput = fileToString(new File(testCase.resultFile))
-        val segments = goldenOutput.split("-- !query.+\n")
+        val segments = goldenOutput.split("-- !query.*\n")
 
         // each query has 3 segments, plus the header
         assert(segments.size == outputs.size * 3 + 1,
@@ -136,8 +114,9 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
             "Try regenerate the result files.")
         Seq.tabulate(outputs.size) { i =>
           val sql = segments(i * 3 + 1).trim
+          val schema = segments(i * 3 + 2).trim
           val originalOut = segments(i * 3 + 3)
-          val output = if (isNeedSort(sql)) {
+          val output = if (schema != emptySchema && isNeedSort(sql)) {
             originalOut.split("\n").sorted.mkString("\n")
           } else {
             originalOut
@@ -166,19 +145,48 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
             || d.sql.toUpperCase(Locale.ROOT).startsWith("DESC\n")
             || d.sql.toUpperCase(Locale.ROOT).startsWith("DESCRIBE ")
             || d.sql.toUpperCase(Locale.ROOT).startsWith("DESCRIBE\n") =>
+
           // Skip show command, see HiveResult.hiveResultString
           case s if s.sql.toUpperCase(Locale.ROOT).startsWith("SHOW ")
             || s.sql.toUpperCase(Locale.ROOT).startsWith("SHOW\n") =>
-          // AnalysisException should exactly match.
+
+          case _ if output.output.startsWith(classOf[NoSuchTableException].getPackage.getName) =>
+            assert(expected.output.startsWith(classOf[NoSuchTableException].getPackage.getName),
+              s"Exception did not match for query #$i\n${expected.sql}, " +
+                s"expected: ${expected.output}, but got: ${output.output}")
+
+          case _ if output.output.startsWith(classOf[SparkException].getName) &&
+            output.output.contains("overflow") =>
+            assert(expected.output.contains(classOf[ArithmeticException].getName) &&
+              expected.output.contains("overflow"),
+              s"Exception did not match for query #$i\n${expected.sql}, " +
+                s"expected: ${expected.output}, but got: ${output.output}")
+
+          case _ if output.output.startsWith(classOf[RuntimeException].getName) =>
+            assert(expected.output.contains("Exception"),
+              s"Exception did not match for query #$i\n${expected.sql}, " +
+                s"expected: ${expected.output}, but got: ${output.output}")
+
+          case _ if output.output.startsWith(classOf[ArithmeticException].getName) &&
+            output.output.contains("causes overflow") =>
+            assert(expected.output.contains(classOf[ArithmeticException].getName) &&
+              expected.output.contains("causes overflow"),
+              s"Exception did not match for query #$i\n${expected.sql}, " +
+                s"expected: ${expected.output}, but got: ${output.output}")
+
+          case _ if output.output.startsWith(classOf[MissingFormatArgumentException].getName) &&
+            output.output.contains("Format specifier") =>
+            assert(expected.output.contains(classOf[MissingFormatArgumentException].getName) &&
+              expected.output.contains("Format specifier"),
+              s"Exception did not match for query #$i\n${expected.sql}, " +
+                s"expected: ${expected.output}, but got: ${output.output}")
+
           // SQLException should not exactly match. We only assert the result contains Exception.
           case _ if output.output.startsWith(classOf[SQLException].getName) =>
             assert(expected.output.contains("Exception"),
               s"Exception did not match for query #$i\n${expected.sql}, " +
                 s"expected: ${expected.output}, but got: ${output.output}")
-          // HiveSQLException is usually a feature that our ThriftServer cannot support.
-          // Please add SQL to blackList.
-          case _ if output.output.startsWith(classOf[HiveSQLException].getName) =>
-            assert(false, s"${output.output} for query #$i\n${expected.sql}")
+
           case _ =>
             assertResult(expected.output, s"Result did not match for query #$i\n${expected.sql}") {
               output.output
@@ -201,7 +209,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
     }
   }
 
-  override def listTestCases(): Seq[TestCase] = {
+  override lazy val listTestCases: Seq[TestCase] = {
     listFilesRecursively(new File(inputFilePath)).flatMap { file =>
       val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".out"
       val absPath = file.getAbsolutePath
@@ -209,8 +217,10 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
 
       if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) {
         Seq.empty
-      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}pgSQL")) {
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {
+        AnsiTestCase(testCaseName, absPath, resultFile) :: Nil
       } else {
         RegularTestCase(testCaseName, absPath, resultFile) :: Nil
       }
@@ -225,54 +235,30 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
     }
   }
 
-  private def getNormalizedResult(statement: Statement, sql: String): Seq[String] = {
-    try {
-      val rs = statement.executeQuery(sql)
-      val cols = rs.getMetaData.getColumnCount
-      val buildStr = () => (for (i <- 1 to cols) yield {
-        getHiveResult(rs.getObject(i))
-      }).mkString("\t")
-
-      val answer = Iterator.continually(rs.next()).takeWhile(identity).map(_ => buildStr()).toSeq
-        .map(replaceNotIncludedMsg)
-      if (isNeedSort(sql)) {
-        answer.sorted
-      } else {
-        answer
+  /** ThriftServer wraps the root exception, so it needs to be extracted. */
+  override def handleExceptions(result: => (String, Seq[String])): (String, Seq[String]) = {
+    super.handleExceptions {
+      try {
+        result
+      } catch {
+        case NonFatal(e) => throw ExceptionUtils.getRootCause(e)
       }
-    } catch {
-      case a: AnalysisException =>
-        // Do not output the logical plan tree which contains expression IDs.
-        // Also implement a crude way of masking expression IDs in the error message
-        // with a generic pattern "###".
-        val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage
-        Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x")).sorted
-      case NonFatal(e) =>
-        // If there is an exception, put the exception class followed by the message.
-        Seq(e.getClass.getName, e.getMessage)
     }
   }
 
-  private def startThriftServer(port: Int, attempt: Int): Unit = {
-    logInfo(s"Trying to start HiveThriftServer2: port=$port, attempt=$attempt")
-    val sqlContext = spark.newSession().sqlContext
-    sqlContext.setConf(ConfVars.HIVE_SERVER2_THRIFT_PORT.varname, port.toString)
-    hiveServer2 = HiveThriftServer2.startWithContext(sqlContext)
-  }
-
-  private def withJdbcStatement(fs: (Statement => Unit)*) {
-    val user = System.getProperty("user.name")
-
-    val serverPort = hiveServer2.getHiveConf.get(ConfVars.HIVE_SERVER2_THRIFT_PORT.varname)
-    val connections =
-      fs.map { _ => DriverManager.getConnection(s"jdbc:hive2://localhost:$serverPort", user, "") }
-    val statements = connections.map(_.createStatement())
-
-    try {
-      statements.zip(fs).foreach { case (s, f) => f(s) }
-    } finally {
-      statements.foreach(_.close())
-      connections.foreach(_.close())
+  private def getNormalizedResult(statement: Statement, sql: String): (String, Seq[String]) = {
+    val rs = statement.executeQuery(sql)
+    val cols = rs.getMetaData.getColumnCount
+    val buildStr = () => (for (i <- 1 to cols) yield {
+      getHiveResult(rs.getObject(i))
+    }).mkString("\t")
+
+    val answer = Iterator.continually(rs.next()).takeWhile(identity).map(_ => buildStr()).toSeq
+      .map(replaceNotIncludedMsg)
+    if (isNeedSort(sql)) {
+      ("", answer.sorted)
+    } else {
+      ("", answer)
     }
   }
 
@@ -337,7 +323,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
     upperCase.startsWith("SELECT ") || upperCase.startsWith("SELECT\n") ||
       upperCase.startsWith("WITH ") || upperCase.startsWith("WITH\n") ||
       upperCase.startsWith("VALUES ") || upperCase.startsWith("VALUES\n") ||
-      // pgSQL/union.sql
+      // postgreSQL/union.sql
       upperCase.startsWith("(")
   }
 
@@ -350,7 +336,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
       case t: Timestamp =>
         HiveResult.toHiveString((t, TimestampType))
       case d: java.math.BigDecimal =>
-        HiveResult.toHiveString((d, DecimalType.fromBigDecimal(d)))
+        HiveResult.toHiveString((d, DecimalType.fromDecimal(Decimal(d))))
       case bin: Array[Byte] =>
         HiveResult.toHiveString((bin, BinaryType))
       case other =>
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
new file mode 100644
index 0000000000000..3e1fce78ae71c
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+class ThriftServerWithSparkContextSuite extends SharedThriftServer {
+
+  test("SPARK-29911: Uncache cached tables when session closed") {
+    val cacheManager = spark.sharedState.cacheManager
+    val globalTempDB = spark.sharedState.globalTempViewManager.database
+    withJdbcStatement { statement =>
+      statement.execute("CACHE TABLE tempTbl AS SELECT 1")
+    }
+    // the cached data of local temporary view should be uncached
+    assert(cacheManager.isEmpty)
+    try {
+      withJdbcStatement { statement =>
+        statement.execute("CREATE GLOBAL TEMP VIEW globalTempTbl AS SELECT 1, 2")
+        statement.execute(s"CACHE TABLE $globalTempDB.globalTempTbl")
+      }
+      // the cached data of global temporary view shouldn't be uncached
+      assert(!cacheManager.isEmpty)
+    } finally {
+      withJdbcStatement { statement =>
+        statement.execute(s"UNCACHE TABLE IF EXISTS $globalTempDB.globalTempTbl")
+      }
+      assert(cacheManager.isEmpty)
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
index 47cf4f104d204..7f731f3d05e51 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
@@ -24,8 +24,8 @@ import org.openqa.selenium.WebDriver
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.scalatest.{BeforeAndAfterAll, Matchers}
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.selenium.WebBrowser
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.selenium.WebBrowser
 
 import org.apache.spark.ui.SparkUICssErrorHandler
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala
new file mode 100644
index 0000000000000..075032fa5d099
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver.ui
+
+import java.util.Properties
+
+import org.mockito.Mockito.{mock, RETURNS_SMART_NULLS}
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config.Status.{ASYNC_TRACKING_ENABLED, LIVE_ENTITY_UPDATE_PERIOD}
+import org.apache.spark.scheduler.SparkListenerJobStart
+import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.status.ElementTrackingStore
+import org.apache.spark.util.kvstore.InMemoryStore
+
+class HiveThriftServer2ListenerSuite extends SparkFunSuite with BeforeAndAfter {
+
+  private var kvstore: ElementTrackingStore = _
+
+  after {
+    if (kvstore != null) {
+      kvstore.close()
+      kvstore = null
+    }
+  }
+
+  Seq(true, false).foreach { live =>
+    test(s"listener events should store successfully (live = $live)") {
+      val (statusStore: HiveThriftServer2AppStatusStore,
+      listener: HiveThriftServer2Listener) = createAppStatusStore(live)
+
+      listener.onOtherEvent(SparkListenerThriftServerSessionCreated("localhost", "sessionId",
+        "user", System.currentTimeMillis()))
+      listener.onOtherEvent(SparkListenerThriftServerOperationStart("id", "sessionId",
+        "dummy query", "groupId", System.currentTimeMillis(), "user"))
+      listener.onOtherEvent(SparkListenerThriftServerOperationParsed("id", "dummy plan"))
+      listener.onJobStart(SparkListenerJobStart(
+        0,
+        System.currentTimeMillis(),
+        Nil,
+        createProperties))
+      listener.onOtherEvent(SparkListenerThriftServerOperationFinish("id",
+        System.currentTimeMillis()))
+      listener.onOtherEvent(SparkListenerThriftServerOperationClosed("id",
+        System.currentTimeMillis()))
+
+      if (live) {
+        assert(statusStore.getOnlineSessionNum === 1)
+      }
+
+      listener.onOtherEvent(SparkListenerThriftServerSessionClosed("sessionId",
+        System.currentTimeMillis()))
+
+      if (!live) {
+        // To update history store
+        kvstore.close(false)
+      }
+      assert(statusStore.getOnlineSessionNum === 0)
+      assert(statusStore.getExecutionList.size === 1)
+
+      val storeExecData = statusStore.getExecutionList.head
+
+      assert(storeExecData.execId === "id")
+      assert(storeExecData.sessionId === "sessionId")
+      assert(storeExecData.executePlan === "dummy plan")
+      assert(storeExecData.jobId === Seq("0"))
+      assert(listener.noLiveData())
+    }
+  }
+
+  Seq(true, false).foreach { live =>
+    test(s"cleanup session if exceeds the threshold (live = $live)") {
+      val (statusStore: HiveThriftServer2AppStatusStore,
+      listener: HiveThriftServer2Listener) = createAppStatusStore(true)
+      var time = 0
+      listener.onOtherEvent(SparkListenerThriftServerSessionCreated("localhost", "sessionId1",
+        "user", time))
+      time += 1
+      listener.onOtherEvent(SparkListenerThriftServerSessionCreated("localhost", "sessionId2",
+        "user", time))
+      time += 1
+      listener.onOtherEvent(SparkListenerThriftServerSessionClosed("sessionId1", time))
+      time += 1
+      listener.onOtherEvent(SparkListenerThriftServerSessionClosed("sessionId2", time))
+      listener.onOtherEvent(SparkListenerThriftServerSessionCreated("localhost", "sessionId3",
+        "user", time))
+      time += 1
+      listener.onOtherEvent(SparkListenerThriftServerSessionClosed("sessionId3", time))
+
+      if (!live) {
+        kvstore.close(false)
+      }
+      assert(statusStore.getOnlineSessionNum === 0)
+      assert(statusStore.getSessionCount === 1)
+      assert(statusStore.getSession("sessionId1") === None)
+      assert(listener.noLiveData())
+    }
+  }
+
+  test("update execution info when jobstart event come after execution end event") {
+    val (statusStore: HiveThriftServer2AppStatusStore,
+    listener: HiveThriftServer2Listener) = createAppStatusStore(true)
+
+    listener.onOtherEvent(SparkListenerThriftServerSessionCreated("localhost", "sessionId", "user",
+      System.currentTimeMillis()))
+    listener.onOtherEvent(SparkListenerThriftServerOperationStart("id", "sessionId", "dummy query",
+      "groupId", System.currentTimeMillis(), "user"))
+    listener.onOtherEvent(SparkListenerThriftServerOperationParsed("id", "dummy plan"))
+    listener.onOtherEvent(SparkListenerThriftServerOperationFinish("id",
+      System.currentTimeMillis()))
+    listener.onOtherEvent(SparkListenerThriftServerOperationClosed("id",
+      System.currentTimeMillis()))
+    listener.onJobStart(SparkListenerJobStart(
+      0,
+      System.currentTimeMillis(),
+      Nil,
+      createProperties))
+    listener.onOtherEvent(SparkListenerThriftServerSessionClosed("sessionId",
+      System.currentTimeMillis()))
+    val exec = statusStore.getExecution("id")
+    assert(exec.isDefined)
+    assert(exec.get.jobId === Seq("0"))
+    assert(listener.noLiveData())
+  }
+
+  private def createProperties: Properties = {
+    val properties = new Properties()
+    properties.setProperty(SparkContext.SPARK_JOB_GROUP_ID, "groupId")
+    properties
+  }
+
+  private def createAppStatusStore(live: Boolean) = {
+    val sparkConf = new SparkConf()
+    sparkConf.set(ASYNC_TRACKING_ENABLED, false)
+      .set(SQLConf.THRIFTSERVER_UI_SESSION_LIMIT, 1)
+      .set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
+    kvstore = new ElementTrackingStore(new InMemoryStore, sparkConf)
+    if (live) {
+      val server = mock(classOf[HiveThriftServer2], RETURNS_SMART_NULLS)
+      val listener = new HiveThriftServer2Listener(kvstore, sparkConf, Some(server))
+      (new HiveThriftServer2AppStatusStore(kvstore, Some(listener)), listener)
+    } else {
+      (new HiveThriftServer2AppStatusStore(kvstore),
+        new HiveThriftServer2Listener(kvstore, sparkConf, None, false))
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPageSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPageSuite.scala
new file mode 100644
index 0000000000000..9f3c2957a182d
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPageSuite.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver.ui
+
+import java.util.{Calendar, Locale}
+import javax.servlet.http.HttpServletRequest
+
+import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.scheduler.SparkListenerJobStart
+import org.apache.spark.sql.hive.thriftserver._
+import org.apache.spark.status.ElementTrackingStore
+import org.apache.spark.util.kvstore.InMemoryStore
+
+
+class ThriftServerPageSuite extends SparkFunSuite with BeforeAndAfter {
+
+  private var kvstore: ElementTrackingStore = _
+
+  after {
+    if (kvstore != null) {
+      kvstore.close()
+      kvstore = null
+    }
+  }
+
+  /**
+   * Run a dummy session and return the store
+   */
+  private def getStatusStore: HiveThriftServer2AppStatusStore = {
+    kvstore = new ElementTrackingStore(new InMemoryStore, new SparkConf())
+    val server = mock(classOf[HiveThriftServer2], RETURNS_SMART_NULLS)
+    val sparkConf = new SparkConf
+
+    val listener = new HiveThriftServer2Listener(kvstore, sparkConf, Some(server))
+    val statusStore = new HiveThriftServer2AppStatusStore(kvstore, Some(listener))
+
+    listener.onOtherEvent(SparkListenerThriftServerSessionCreated("localhost", "sessionid", "user",
+      System.currentTimeMillis()))
+    listener.onOtherEvent(SparkListenerThriftServerOperationStart("id", "sessionid",
+      "dummy query", "groupid", System.currentTimeMillis(), "user"))
+    listener.onOtherEvent(SparkListenerThriftServerOperationParsed("id", "dummy plan"))
+    listener.onOtherEvent(SparkListenerJobStart(0, System.currentTimeMillis(), Seq()))
+    listener.onOtherEvent(SparkListenerThriftServerOperationFinish("id",
+      System.currentTimeMillis()))
+    listener.onOtherEvent(SparkListenerThriftServerOperationClosed("id",
+      System.currentTimeMillis()))
+    listener.onOtherEvent(SparkListenerThriftServerSessionClosed("sessionid",
+      System.currentTimeMillis()))
+
+    statusStore
+  }
+
+  test("thriftserver page should load successfully") {
+    val store = getStatusStore
+
+    val request = mock(classOf[HttpServletRequest])
+    val tab = mock(classOf[ThriftServerTab], RETURNS_SMART_NULLS)
+    when(tab.startTime).thenReturn(Calendar.getInstance().getTime)
+    when(tab.store).thenReturn(store)
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+    val page = new ThriftServerPage(tab)
+    val html = page.render(request).toString().toLowerCase(Locale.ROOT)
+
+    // session statistics and sql statistics tables should load successfully
+    assert(html.contains("session statistics (1)"))
+    assert(html.contains("sql statistics (1)"))
+    assert(html.contains("dummy query"))
+    assert(html.contains("dummy plan"))
+
+    // Pagination support
+    assert(html.contains("<label>1 pages. jump to</label>"))
+
+    // Hiding table support
+    assert(html.contains("class=\"collapse-aggregated-sessionstat" +
+       " collapse-table\" onclick=\"collapsetable"))
+  }
+
+  test("thriftserver session page should load successfully") {
+    val store = getStatusStore
+
+    val request = mock(classOf[HttpServletRequest])
+    when(request.getParameter("id")).thenReturn("sessionid")
+    val tab = mock(classOf[ThriftServerTab], RETURNS_SMART_NULLS)
+    when(tab.startTime).thenReturn(Calendar.getInstance().getTime)
+    when(tab.store).thenReturn(store)
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+    val page = new ThriftServerSessionPage(tab)
+    val html = page.render(request).toString().toLowerCase(Locale.ROOT)
+
+    // session sql statistics table should load successfully
+    assert(html.contains("sql statistics"))
+    assert(html.contains("user"))
+    assert(html.contains("groupid"))
+
+    // Pagination support
+    assert(html.contains("<label>1 pages. jump to</label>"))
+
+    // Hiding table support
+    assert(html.contains("collapse-aggregated-sqlsessionstat collapse-table\"" +
+          " onclick=\"collapsetable"))
+  }
+}
+
diff --git a/sql/hive-thriftserver/v1.2.1/if/TCLIService.thrift b/sql/hive-thriftserver/v1.2/if/TCLIService.thrift
similarity index 99%
rename from sql/hive-thriftserver/v1.2.1/if/TCLIService.thrift
rename to sql/hive-thriftserver/v1.2/if/TCLIService.thrift
index 7cd6fa37cec37..225e319737811 100644
--- a/sql/hive-thriftserver/v1.2.1/if/TCLIService.thrift
+++ b/sql/hive-thriftserver/v1.2/if/TCLIService.thrift
@@ -1028,7 +1028,6 @@ enum TFetchOrientation {
   FETCH_NEXT,
 
   // Get the previous rowset. The fetch offset is ignored.
-  // NOT SUPPORTED
   FETCH_PRIOR,
 
   // Return the rowset at the given fetch offset relative
@@ -1056,8 +1055,8 @@ struct TFetchResultsReq {
   // Operation from which to fetch results.
   1: required TOperationHandle operationHandle
 
-  // The fetch orientation. For V1 this must be either
-  // FETCH_NEXT or FETCH_FIRST. Defaults to FETCH_NEXT.
+  // The fetch orientation. This must be either
+  // FETCH_NEXT, FETCH_PRIOR or FETCH_FIRST. Defaults to FETCH_NEXT.
   2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT
   
   // Max number of rows that should be returned in
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
rename to sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/AbstractService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/AbstractService.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/CompositeService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/CompositeService.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java
similarity index 97%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/CookieSigner.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java
index ee51c24351c3d..f2a80c9d5ffbc 100644
--- a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/CookieSigner.java
+++ b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java
@@ -81,7 +81,7 @@ public String verifyAndExtract(String signedStr) {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Signature generated for " + rawValue + " inside verify is " + currentSignature);
     }
-    if (!originalSignature.equals(currentSignature)) {
+    if (!MessageDigest.isEqual(originalSignature.getBytes(), currentSignature.getBytes())) {
       throw new IllegalArgumentException("Invalid sign, original = " + originalSignature +
         " current = " + currentSignature);
     }
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/ServiceOperations.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/ServiceOperations.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/ServiceUtils.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/ServiceUtils.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/CLIService.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Column.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Column.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnValue.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ColumnValue.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/FetchOrientation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/GetInfoType.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Handle.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Handle.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/HiveSQLException.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/ICLIService.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationHandle.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationHandle.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationState.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationState.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationType.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/OperationType.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowBasedSet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowSet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowSet.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowSetFactory.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/SessionHandle.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/SessionHandle.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TableSchema.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TableSchema.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Type.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/Type.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
similarity index 99%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
index 0f72071d7e7d1..3e81f8afbd85f 100644
--- a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
+++ b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
@@ -73,7 +73,7 @@ public class GetTypeInfoOperation extends MetadataOperation {
   .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE,
       "Usually 2 or 10");
 
-  private final RowSet rowSet;
+  protected final RowSet rowSet;
 
   protected GetTypeInfoOperation(HiveSession parentSession) {
     super(parentSession, OperationType.GET_TYPE_INFO);
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java
similarity index 98%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/Operation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java
index 19153b654b08a..51bb28748d9e2 100644
--- a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/Operation.java
+++ b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java
@@ -58,7 +58,10 @@ public abstract class Operation {
   private long lastAccessTime;
 
   protected static final EnumSet<FetchOrientation> DEFAULT_FETCH_ORIENTATION_SET =
-      EnumSet.of(FetchOrientation.FETCH_NEXT,FetchOrientation.FETCH_FIRST);
+      EnumSet.of(
+          FetchOrientation.FETCH_NEXT,
+          FetchOrientation.FETCH_FIRST,
+          FetchOrientation.FETCH_PRIOR);
 
   protected Operation(HiveSession parentSession, OperationType opType, boolean runInBackground) {
     this.parentSession = parentSession;
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
similarity index 99%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
index 18652f17aa926..c7726f1fac07a 100644
--- a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
+++ b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
@@ -20,7 +20,6 @@
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.io.UnsupportedEncodingException;
 import java.nio.charset.StandardCharsets;
 import java.security.PrivilegedExceptionAction;
 import java.sql.SQLException;
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSession.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java
similarity index 89%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/server/HiveServer2.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java
index a30be2bc06b9e..95233996cbbcb 100644
--- a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/server/HiveServer2.java
+++ b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java
@@ -31,8 +31,6 @@
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.common.LogUtils;
-import org.apache.hadoop.hive.common.LogUtils.LogInitializationException;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hive.common.util.HiveStringUtils;
@@ -153,25 +151,13 @@ private static void startHiveServer2() throws Throwable {
 
   public static void main(String[] args) {
     HiveConf.setLoadHiveServer2Config(true);
-    try {
-      ServerOptionsProcessor oproc = new ServerOptionsProcessor("hiveserver2");
-      ServerOptionsProcessorResponse oprocResponse = oproc.parse(args);
+    ServerOptionsProcessor oproc = new ServerOptionsProcessor("hiveserver2");
+    ServerOptionsProcessorResponse oprocResponse = oproc.parse(args);
 
-      // NOTE: It is critical to do this here so that log4j is reinitialized
-      // before any of the other core hive classes are loaded
-      String initLog4jMessage = LogUtils.initHiveLog4j();
-      LOG.debug(initLog4jMessage);
-      HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG);
+    HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG);
 
-      // Log debug message from "oproc" after log4j initialize properly
-      LOG.debug(oproc.getDebugMessage().toString());
-
-      // Call the executor which will execute the appropriate command based on the parsed options
-      oprocResponse.getServerOptionsExecutor().execute();
-    } catch (LogInitializationException e) {
-      LOG.error("Error initializing log: " + e.getMessage(), e);
-      System.exit(-1);
-    }
+    // Call the executor which will execute the appropriate command based on the parsed options
+    oprocResponse.getServerOptionsExecutor().execute();
   }
 
   /**
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
similarity index 100%
rename from sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
rename to sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
similarity index 89%
rename from sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
rename to sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
index 87c0f8f6a571a..fbfc698ecb4bf 100644
--- a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
+++ b/sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import org.apache.commons.logging.LogFactory
-import org.apache.hadoop.hive.ql.exec.Utilities
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema, Type}
+import org.apache.hive.service.cli.Type._
 import org.apache.hive.service.cli.thrift.TProtocolVersion._
 
 /**
@@ -51,10 +51,12 @@ private[thriftserver] object ThriftserverShimUtils {
 
   private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType
 
-  private[thriftserver] def addToClassPath(
-      loader: ClassLoader,
-      auxJars: Array[String]): ClassLoader = {
-    Utilities.addToClassPath(loader, auxJars)
+  private[thriftserver] def supportedType(): Seq[Type] = {
+    Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE,
+      TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
+      FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
+      DATE_TYPE, TIMESTAMP_TYPE,
+      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE)
   }
 
   private[thriftserver] val testedProtocolVersions = Seq(
diff --git a/sql/hive-thriftserver/v2.3.5/if/TCLIService.thrift b/sql/hive-thriftserver/v2.3/if/TCLIService.thrift
similarity index 99%
rename from sql/hive-thriftserver/v2.3.5/if/TCLIService.thrift
rename to sql/hive-thriftserver/v2.3/if/TCLIService.thrift
index 824b04919073a..9026cd25df5b3 100644
--- a/sql/hive-thriftserver/v2.3.5/if/TCLIService.thrift
+++ b/sql/hive-thriftserver/v2.3/if/TCLIService.thrift
@@ -1105,7 +1105,6 @@ enum TFetchOrientation {
   FETCH_NEXT,
 
   // Get the previous rowset. The fetch offset is ignored.
-  // NOT SUPPORTED
   FETCH_PRIOR,
 
   // Return the rowset at the given fetch offset relative
@@ -1133,8 +1132,8 @@ struct TFetchResultsReq {
   // Operation from which to fetch results.
   1: required TOperationHandle operationHandle
 
-  // The fetch orientation. For V1 this must be either
-  // FETCH_NEXT or FETCH_FIRST. Defaults to FETCH_NEXT.
+  // The fetch orientation. This must be either
+  // FETCH_NEXT, FETCH_PRIOR or FETCH_FIRST. Defaults to FETCH_NEXT.
   2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT
 
   // Max number of rows that should be returned in
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java
rename to sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/AbstractService.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/AbstractService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/AbstractService.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/AbstractService.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/CompositeService.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CompositeService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/CompositeService.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CompositeService.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CookieSigner.java
similarity index 97%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/CookieSigner.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CookieSigner.java
index 9c8bd563268bc..593abd2e153a0 100644
--- a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/CookieSigner.java
+++ b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CookieSigner.java
@@ -81,7 +81,7 @@ public String verifyAndExtract(String signedStr) {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Signature generated for " + rawValue + " inside verify is " + currentSignature);
     }
-    if (!originalSignature.equals(currentSignature)) {
+    if (!MessageDigest.isEqual(originalSignature.getBytes(), currentSignature.getBytes())) {
       throw new IllegalArgumentException("Invalid sign, original = " + originalSignature +
         " current = " + currentSignature);
     }
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/ServiceOperations.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceOperations.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/ServiceOperations.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceOperations.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/ServiceUtils.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceUtils.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/ServiceUtils.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceUtils.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/CLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/CLIService.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/CLIService.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/ColumnValue.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/ColumnValue.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnValue.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/FetchOrientation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoType.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/GetInfoType.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoType.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/Handle.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/Handle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/Handle.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/Handle.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/HiveSQLException.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ICLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/ICLIService.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ICLIService.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/OperationHandle.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/OperationHandle.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationHandle.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/OperationState.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationState.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/OperationState.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationState.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/OperationType.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationType.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/OperationType.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationType.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/RowBasedSet.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/RowSet.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/RowSet.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSet.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/RowSetFactory.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/SessionHandle.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/SessionHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/SessionHandle.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/SessionHandle.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/TableSchema.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TableSchema.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/TableSchema.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TableSchema.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
similarity index 99%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
index 9612eb145638c..0f57a72e2a1ce 100644
--- a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
+++ b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
@@ -73,7 +73,7 @@ public class GetTypeInfoOperation extends MetadataOperation {
   .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE,
       "Usually 2 or 10");
 
-  private final RowSet rowSet;
+  protected final RowSet rowSet;
 
   protected GetTypeInfoOperation(HiveSession parentSession) {
     super(parentSession, OperationType.GET_TYPE_INFO);
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/Operation.java
similarity index 98%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/Operation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/Operation.java
index 788fcdee282ae..f26c715add987 100644
--- a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/Operation.java
+++ b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/Operation.java
@@ -64,7 +64,10 @@ public abstract class Operation {
   protected final QueryState queryState;
 
   protected static final EnumSet<FetchOrientation> DEFAULT_FETCH_ORIENTATION_SET =
-      EnumSet.of(FetchOrientation.FETCH_NEXT,FetchOrientation.FETCH_FIRST);
+      EnumSet.of(
+          FetchOrientation.FETCH_NEXT,
+          FetchOrientation.FETCH_FIRST,
+          FetchOrientation.FETCH_PRIOR);
 
   protected Operation(HiveSession parentSession, OperationType opType) {
     this(parentSession, null, opType);
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
similarity index 99%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
index ac5392cf42dbf..e2ac1ea78c1ab 100644
--- a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
+++ b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
@@ -20,7 +20,6 @@
 
 import java.io.IOException;
 import java.io.Serializable;
-import java.io.UnsupportedEncodingException;
 import java.security.PrivilegedExceptionAction;
 import java.sql.SQLException;
 import java.util.ArrayList;
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSession.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/HiveServer2.java
similarity index 90%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/server/HiveServer2.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/HiveServer2.java
index ae74641ef6805..b7da4e8fdf3f7 100644
--- a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/server/HiveServer2.java
+++ b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/HiveServer2.java
@@ -30,8 +30,6 @@
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
 import org.apache.hadoop.hive.common.JvmPauseMonitor;
-import org.apache.hadoop.hive.common.LogUtils;
-import org.apache.hadoop.hive.common.LogUtils.LogInitializationException;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hive.common.util.HiveStringUtils;
 import org.apache.hive.service.CompositeService;
@@ -158,25 +156,13 @@ private static void startHiveServer2() throws Throwable {
 
   public static void main(String[] args) {
     HiveConf.setLoadHiveServer2Config(true);
-    try {
-      ServerOptionsProcessor oproc = new ServerOptionsProcessor("hiveserver2");
-      ServerOptionsProcessorResponse oprocResponse = oproc.parse(args);
+    ServerOptionsProcessor oproc = new ServerOptionsProcessor("hiveserver2");
+    ServerOptionsProcessorResponse oprocResponse = oproc.parse(args);
 
-      // NOTE: It is critical to do this here so that log4j is reinitialized
-      // before any of the other core hive classes are loaded
-      String initLog4jMessage = LogUtils.initHiveLog4j();
-      LOG.debug(initLog4jMessage);
-      HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG);
+    HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG);
 
-      // Log debug message from "oproc" after log4j initialize properly
-      LOG.debug(oproc.getDebugMessage().toString());
-
-      // Call the executor which will execute the appropriate command based on the parsed options
-      oprocResponse.getServerOptionsExecutor().execute();
-    } catch (LogInitializationException e) {
-      LOG.error("Error initializing log: " + e.getMessage(), e);
-      System.exit(-1);
-    }
+    // Call the executor which will execute the appropriate command based on the parsed options
+    oprocResponse.getServerOptionsExecutor().execute();
   }
 
   /**
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java b/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
rename to sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v2.3/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
similarity index 88%
rename from sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
rename to sql/hive-thriftserver/v2.3/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
index 124c9937c0fca..850382fe2bfd7 100644
--- a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
+++ b/sql/hive-thriftserver/v2.3/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
@@ -17,13 +17,9 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import java.security.AccessController
-
-import scala.collection.JavaConverters._
-
-import org.apache.hadoop.hive.ql.exec.AddToClassPathAction
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde2.thrift.Type
+import org.apache.hadoop.hive.serde2.thrift.Type._
 import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema}
 import org.apache.hive.service.rpc.thrift.TProtocolVersion._
 import org.slf4j.LoggerFactory
@@ -56,11 +52,12 @@ private[thriftserver] object ThriftserverShimUtils {
 
   private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType
 
-  private[thriftserver] def addToClassPath(
-      loader: ClassLoader,
-      auxJars: Array[String]): ClassLoader = {
-    val addAction = new AddToClassPathAction(loader, auxJars.toList.asJava)
-    AccessController.doPrivileged(addAction)
+  private[thriftserver] def supportedType(): Seq[Type] = {
+    Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE,
+      TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
+      FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
+      DATE_TYPE, TIMESTAMP_TYPE,
+      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE)
   }
 
   private[thriftserver] val testedProtocolVersions = Seq(
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk11-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..4a8058766319f
--- /dev/null
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk11-results.txt
@@ -0,0 +1,45 @@
+================================================================================================
+Hive UDAF vs Spark AF
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+hive udaf w/o group by                             6492           7169         388          0.0       99066.1       1.0X
+spark af w/o group by                                58             88          24          1.1         890.2     111.3X
+hive udaf w/ group by                              4864           4888          33          0.0       74221.0       1.3X
+spark af w/ group by w/o fallback                    60             67           7          1.1         912.9     108.5X
+spark af w/ group by w/ fallback                    154            164          27          0.4        2348.2      42.2X
+
+
+================================================================================================
+ObjectHashAggregateExec vs SortAggregateExec - typed_count
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort agg w/ group by                              51728          51728           0          2.0         493.3       1.0X
+object agg w/ group by w/o fallback               10174          10218          34         10.3          97.0       5.1X
+object agg w/ group by w/ fallback                29341          29537         277          3.6         279.8       1.8X
+sort agg w/o group by                              7541           7577          28         13.9          71.9       6.9X
+object agg w/o group by w/o fallback               5574           5620          38         18.8          53.2       9.3X
+
+
+================================================================================================
+ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort agg w/ group by                                900            925          14          2.3         429.0       1.0X
+object agg w/ group by w/o fallback                 597            633          14          3.5         284.6       1.5X
+object agg w/ group by w/ fallback                  905            923          10          2.3         431.6       1.0X
+sort agg w/o group by                               611            631          10          3.4         291.4       1.5X
+object agg w/o group by w/o fallback                559            576          11          3.8         266.5       1.6X
+
+
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
index f3044da972497..8c58a5a5fdf0b 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-hive udaf vs spark af:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-hive udaf w/o group by                        6370 / 6400          0.0       97193.6       1.0X
-spark af w/o group by                           54 /   63          1.2         820.8     118.4X
-hive udaf w/ group by                         4492 / 4507          0.0       68539.5       1.4X
-spark af w/ group by w/o fallback               58 /   64          1.1         881.7     110.2X
-spark af w/ group by w/ fallback               136 /  142          0.5        2075.0      46.8X
+hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+hive udaf w/o group by                             7014           7206         120          0.0      107031.0       1.0X
+spark af w/o group by                                47             59          11          1.4         716.9     149.3X
+hive udaf w/ group by                              4811           4831          28          0.0       73409.1       1.5X
+spark af w/ group by w/o fallback                    50             56           7          1.3         762.9     140.3X
+spark af w/ group by w/ fallback                    126            130           8          0.5        1916.6      55.8X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-object agg v.s. sort agg:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sort agg w/ group by                        41500 / 41630          2.5         395.8       1.0X
-object agg w/ group by w/o fallback         10075 / 10122         10.4          96.1       4.1X
-object agg w/ group by w/ fallback          28131 / 28205          3.7         268.3       1.5X
-sort agg w/o group by                         6182 / 6221         17.0          59.0       6.7X
-object agg w/o group by w/o fallback          5435 / 5468         19.3          51.8       7.6X
+object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort agg w/ group by                              42969          43306         476          2.4         409.8       1.0X
+object agg w/ group by w/o fallback                9744           9844         145         10.8          92.9       4.4X
+object agg w/ group by w/ fallback                26814          26960         206          3.9         255.7       1.6X
+sort agg w/o group by                              6278           6330          57         16.7          59.9       6.8X
+object agg w/o group by w/o fallback               5433           5478          60         19.3          51.8       7.9X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-object agg v.s. sort agg:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sort agg w/ group by                           970 / 1025          2.2         462.5       1.0X
-object agg w/ group by w/o fallback            772 /  798          2.7         368.1       1.3X
-object agg w/ group by w/ fallback            1013 / 1044          2.1         483.1       1.0X
-sort agg w/o group by                          751 /  781          2.8         358.0       1.3X
-object agg w/o group by w/o fallback           772 /  814          2.7         368.0       1.3X
+object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort agg w/ group by                                756            773           9          2.8         360.3       1.0X
+object agg w/ group by w/o fallback                 548            560           7          3.8         261.3       1.4X
+object agg w/ group by w/ fallback                  759            773           7          2.8         362.0       1.0X
+sort agg w/o group by                               471            483          13          4.4         224.8       1.6X
+object agg w/o group by w/o fallback                471            482          12          4.5         224.7       1.6X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..d516d3369ad05
--- /dev/null
+++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt
@@ -0,0 +1,156 @@
+================================================================================================
+SQL Single Numeric Column Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1750           1872         173          9.0         111.2       1.0X
+Native ORC Vectorized                               433            499          68         36.3          27.5       4.0X
+Hive built-in ORC                                  2540           2575          49          6.2         161.5       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1979           2001          31          7.9         125.8       1.0X
+Native ORC Vectorized                               261            303          42         60.3          16.6       7.6X
+Hive built-in ORC                                  2559           2583          34          6.1         162.7       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2094           2158          91          7.5         133.2       1.0X
+Native ORC Vectorized                               309            361          41         50.8          19.7       6.8X
+Hive built-in ORC                                  2649           2744         135          5.9         168.4       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2256           2271          22          7.0         143.4       1.0X
+Native ORC Vectorized                               511            518          11         30.8          32.5       4.4X
+Hive built-in ORC                                  2867           2880          19          5.5         182.3       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2270           2325          78          6.9         144.3       1.0X
+Native ORC Vectorized                               502            508           5         31.3          31.9       4.5X
+Hive built-in ORC                                  2862           2880          24          5.5         182.0       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2376           2426          71          6.6         151.0       1.0X
+Native ORC Vectorized                               609            616           8         25.8          38.7       3.9X
+Hive built-in ORC                                  2979           2991          17          5.3         189.4       0.8X
+
+
+================================================================================================
+Int and String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      4112           4232         170          2.6         392.1       1.0X
+Native ORC Vectorized                              2199           2223          35          4.8         209.7       1.9X
+Hive built-in ORC                                  5150           5238         123          2.0         491.2       0.8X
+
+
+================================================================================================
+Partitioned Table Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column - Native ORC MR                        2398           2435          53          6.6         152.4       1.0X
+Data column - Native ORC Vectorized                 458            482          26         34.3          29.1       5.2X
+Data column - Hive built-in ORC                    3126           3171          64          5.0         198.8       0.8X
+Partition column - Native ORC MR                   1639           1680          58          9.6         104.2       1.5X
+Partition column - Native ORC Vectorized            105            119          11        149.6           6.7      22.8X
+Partition column - Hive built-in ORC               2223           2229           8          7.1         141.4       1.1X
+Both columns - Native ORC MR                       2588           2608          28          6.1         164.5       0.9X
+Both columns - Native ORC Vectorized                489            522          49         32.2          31.1       4.9X
+Both columns - Hive built-in ORC                   3258           3292          48          4.8         207.1       0.7X
+
+
+================================================================================================
+Repeated String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1991           2028          52          5.3         189.9       1.0X
+Native ORC Vectorized                               392            398           8         26.7          37.4       5.1X
+Hive built-in ORC                                  2810           2816           8          3.7         268.0       0.7X
+
+
+================================================================================================
+String with Nulls Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      3638           3647          13          2.9         346.9       1.0X
+Native ORC Vectorized                              1171           1181          14          9.0         111.7       3.1X
+Hive built-in ORC                                  4847           4871          34          2.2         462.2       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      3280           3283           5          3.2         312.8       1.0X
+Native ORC Vectorized                              1199           1206          10          8.7         114.4       2.7X
+Hive built-in ORC                                  4263           4273          14          2.5         406.5       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1935           1950          21          5.4         184.6       1.0X
+Native ORC Vectorized                               451            459          10         23.2          43.1       4.3X
+Hive built-in ORC                                  2542           2552          14          4.1         242.4       0.8X
+
+
+================================================================================================
+Single Column Scan From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                       270            292          23          3.9         257.2       1.0X
+Native ORC Vectorized                               143            155          12          7.3         136.2       1.9X
+Hive built-in ORC                                  1593           1627          48          0.7        1519.1       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                       369            386          17          2.8         351.5       1.0X
+Native ORC Vectorized                               218            231          15          4.8         208.3       1.7X
+Hive built-in ORC                                  3092           3101          12          0.3        2949.1       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                       498            531          33          2.1         475.0       1.0X
+Native ORC Vectorized                               360            376          18          2.9         342.9       1.4X
+Hive built-in ORC                                  4786           4786           1          0.2        4564.1       0.1X
+
+
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
index caa78b9a8f102..c7d6c976192b2 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
@@ -2,155 +2,155 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1725 / 1759          9.1         109.7       1.0X
-Native ORC Vectorized                          272 /  316         57.8          17.3       6.3X
-Hive built-in ORC                             1970 / 1987          8.0         125.3       0.9X
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1844           1851          10          8.5         117.2       1.0X
+Native ORC Vectorized                               284            312          36         55.5          18.0       6.5X
+Hive built-in ORC                                  2380           2380           1          6.6         151.3       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1633 / 1672          9.6         103.8       1.0X
-Native ORC Vectorized                          238 /  255         66.0          15.1       6.9X
-Hive built-in ORC                             2293 / 2305          6.9         145.8       0.7X
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1999           2031          45          7.9         127.1       1.0X
+Native ORC Vectorized                               252            264          15         62.5          16.0       7.9X
+Hive built-in ORC                                  2483           2509          37          6.3         157.9       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1677 / 1699          9.4         106.6       1.0X
-Native ORC Vectorized                          325 /  342         48.3          20.7       5.2X
-Hive built-in ORC                             2561 / 2569          6.1         162.8       0.7X
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2134           2135           2          7.4         135.7       1.0X
+Native ORC Vectorized                               329            351          34         47.8          20.9       6.5X
+Hive built-in ORC                                  2672           2716          61          5.9         169.9       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1791 / 1795          8.8         113.9       1.0X
-Native ORC Vectorized                          400 /  408         39.3          25.4       4.5X
-Hive built-in ORC                             2713 / 2720          5.8         172.5       0.7X
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2172           2247         105          7.2         138.1       1.0X
+Native ORC Vectorized                               407            427          23         38.7          25.9       5.3X
+Hive built-in ORC                                  2806           2822          22          5.6         178.4       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1791 / 1805          8.8         113.8       1.0X
-Native ORC Vectorized                          433 /  438         36.3          27.5       4.1X
-Hive built-in ORC                             2690 / 2803          5.8         171.0       0.7X
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2187           2200          19          7.2         139.0       1.0X
+Native ORC Vectorized                               451            457           5         34.9          28.7       4.8X
+Hive built-in ORC                                  2886           2938          73          5.4         183.5       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1911 / 1930          8.2         121.5       1.0X
-Native ORC Vectorized                          543 /  552         29.0          34.5       3.5X
-Hive built-in ORC                             2967 / 3065          5.3         188.6       0.6X
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2313           2319           9          6.8         147.1       1.0X
+Native ORC Vectorized                               554            562           7         28.4          35.2       4.2X
+Hive built-in ORC                                  2927           2933           8          5.4         186.1       0.8X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 4160 / 4188          2.5         396.7       1.0X
-Native ORC Vectorized                         2405 / 2406          4.4         229.4       1.7X
-Hive built-in ORC                             5514 / 5562          1.9         525.9       0.8X
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      4162           4294         186          2.5         397.0       1.0X
+Native ORC Vectorized                              2236           2258          32          4.7         213.2       1.9X
+Hive built-in ORC                                  5054           5135         114          2.1         482.0       0.8X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Data column - Native ORC MR                   1863 / 1867          8.4         118.4       1.0X
-Data column - Native ORC Vectorized            411 /  418         38.2          26.2       4.5X
-Data column - Hive built-in ORC               3297 / 3308          4.8         209.6       0.6X
-Partition column - Native ORC MR              1505 / 1506         10.4          95.7       1.2X
-Partition column - Native ORC Vectorized        80 /   93        195.6           5.1      23.2X
-Partition column - Hive built-in ORC          1960 / 1979          8.0         124.6       1.0X
-Both columns - Native ORC MR                  2076 / 2090          7.6         132.0       0.9X
-Both columns - Native ORC Vectorized           450 /  463         34.9          28.6       4.1X
-Both columns - Hive built-in ORC              3528 / 3548          4.5         224.3       0.5X
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column - Native ORC MR                        2436           2447          16          6.5         154.8       1.0X
+Data column - Native ORC Vectorized                 421            443          35         37.4          26.8       5.8X
+Data column - Hive built-in ORC                    3007           3026          27          5.2         191.2       0.8X
+Partition column - Native ORC MR                   1603           1630          39          9.8         101.9       1.5X
+Partition column - Native ORC Vectorized             84             96          15        186.7           5.4      28.9X
+Partition column - Hive built-in ORC               2174           2187          18          7.2         138.2       1.1X
+Both columns - Native ORC MR                       2609           2645          51          6.0         165.9       0.9X
+Both columns - Native ORC Vectorized                460            470           9         34.2          29.3       5.3X
+Both columns - Hive built-in ORC                   3094           3099           8          5.1         196.7       0.8X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1727 / 1733          6.1         164.7       1.0X
-Native ORC Vectorized                          375 /  379         28.0          35.7       4.6X
-Hive built-in ORC                             2665 / 2666          3.9         254.2       0.6X
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2036           2046          13          5.1         194.2       1.0X
+Native ORC Vectorized                               366            386          18         28.6          34.9       5.6X
+Hive built-in ORC                                  2683           2686           4          3.9         255.9       0.8X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 3324 / 3325          3.2         317.0       1.0X
-Native ORC Vectorized                         1085 / 1106          9.7         103.4       3.1X
-Hive built-in ORC                             5272 / 5299          2.0         502.8       0.6X
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      3614           3643          40          2.9         344.7       1.0X
+Native ORC Vectorized                              1072           1087          22          9.8         102.2       3.4X
+Hive built-in ORC                                  4625           4636          15          2.3         441.1       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (50.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 3045 / 3046          3.4         290.4       1.0X
-Native ORC Vectorized                         1248 / 1260          8.4         119.0       2.4X
-Hive built-in ORC                             3989 / 3999          2.6         380.4       0.8X
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      3347           3376          42          3.1         319.2       1.0X
+Native ORC Vectorized                              1220           1225           7          8.6         116.3       2.7X
+Hive built-in ORC                                  4168           4184          23          2.5         397.5       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (95.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1692 / 1694          6.2         161.3       1.0X
-Native ORC Vectorized                          471 /  493         22.3          44.9       3.6X
-Hive built-in ORC                             2398 / 2411          4.4         228.7       0.7X
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1851           1862          16          5.7         176.5       1.0X
+Native ORC Vectorized                               466            471           7         22.5          44.4       4.0X
+Hive built-in ORC                                  2523           2529           8          4.2         240.6       0.7X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1371 / 1379          0.8        1307.5       1.0X
-Native ORC Vectorized                          121 /  135          8.6         115.8      11.3X
-Hive built-in ORC                              521 /  561          2.0         497.1       2.6X
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                       250            264          15          4.2         238.1       1.0X
+Native ORC Vectorized                               121            138          24          8.7         115.5       2.1X
+Hive built-in ORC                                  1761           1792          43          0.6        1679.3       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 200 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 2711 / 2767          0.4        2585.5       1.0X
-Native ORC Vectorized                          210 /  232          5.0         200.5      12.9X
-Hive built-in ORC                              764 /  775          1.4         728.3       3.5X
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                       319            341          17          3.3         304.5       1.0X
+Native ORC Vectorized                               188            222          50          5.6         178.8       1.7X
+Hive built-in ORC                                  3492           3508          24          0.3        3329.8       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 300 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 3979 / 3988          0.3        3794.4       1.0X
-Native ORC Vectorized                          357 /  366          2.9         340.2      11.2X
-Hive built-in ORC                             1091 / 1095          1.0        1040.5       3.6X
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                       443            456          12          2.4         422.9       1.0X
+Native ORC Vectorized                               306            321          23          3.4         292.0       1.4X
+Hive built-in ORC                                  5295           5312          24          0.2        5049.9       0.1X
 
 
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index e7ff3a5f4be2b..29825e5116ef9 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 
 /**
  * Runs the test cases that are included in the hive distribution.
@@ -41,12 +42,13 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
   private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning
   private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled
   private val originalSessionLocalTimeZone = TestHive.conf.sessionLocalTimeZone
+  private val originalCreateHiveTable = TestHive.conf.createHiveTableByDefaultEnabled
 
   def testCases: Seq[(String, File)] = {
     hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     TestHive.setCacheTables(true)
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
@@ -59,13 +61,16 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
     // Ensures that cross joins are enabled so that we can test them
     TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true)
+    // Ensures that the table insertion behaivor is consistent with Hive
+    TestHive.setConf(SQLConf.STORE_ASSIGNMENT_POLICY, StoreAssignmentPolicy.LEGACY.toString)
     // Fix session local timezone to America/Los_Angeles for those timezone sensitive tests
     // (timestamp_*)
     TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, "America/Los_Angeles")
+    TestHive.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED, true)
     RuleExecutor.resetMetrics()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       TestHive.setCacheTables(false)
       TimeZone.setDefault(originalTimeZone)
@@ -74,6 +79,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
       TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
       TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
       TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, originalSessionLocalTimeZone)
+      TestHive.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED,
+        originalCreateHiveTable)
 
       // For debugging dump some statistics about how much time was spent in various optimizer rules
       logWarning(RuleExecutor.dumpTimeSpent())
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala
index c7d953a731b9b..ed23f65815917 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala
@@ -37,7 +37,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
   private val originalLocale = Locale.getDefault
   private val testTempDir = Utils.createTempDir()
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     TestHive.setCacheTables(true)
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
@@ -58,7 +58,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
         |  p_size INT,
         |  p_container STRING,
         |  p_retailprice DOUBLE,
-        |  p_comment STRING)
+        |  p_comment STRING) USING hive
       """.stripMargin)
     val testData1 = TestHive.getHiveFile("data/files/part_tiny.txt").getCanonicalPath
     sql(
@@ -100,7 +100,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
     sql("set mapreduce.jobtracker.address=local")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       TestHive.setCacheTables(false)
       TimeZone.setDefault(originalTimeZone)
@@ -751,7 +751,7 @@ class HiveWindowFunctionQueryFileSuite
   private val originalLocale = Locale.getDefault
   private val testTempDir = Utils.createTempDir()
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     TestHive.setCacheTables(true)
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
@@ -769,7 +769,7 @@ class HiveWindowFunctionQueryFileSuite
     // sql("set mapreduce.jobtracker.address=local")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       TestHive.setCacheTables(false)
       TimeZone.setDefault(originalTimeZone)
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index d37f0c8573659..c37582386347b 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -88,12 +88,11 @@
       <version>${protobuf.version}</version>
     </dependency>
 -->
-<!--
     <dependency>
       <groupId>${hive.group}</groupId>
       <artifactId>hive-common</artifactId>
+      <scope>${hive.common.scope}</scope>
     </dependency>
--->
     <dependency>
       <groupId>${hive.group}</groupId>
       <artifactId>hive-exec</artifactId>
@@ -105,22 +104,24 @@
     </dependency>
     <dependency>
       <groupId>${hive.group}</groupId>
-      <artifactId>hive-contrib</artifactId>
+      <artifactId>hive-serde</artifactId>
+      <scope>${hive.serde.scope}</scope>
+    </dependency>
+    <dependency>
+      <groupId>${hive.group}</groupId>
+      <artifactId>hive-shims</artifactId>
+      <scope>${hive.shims.scope}</scope>
     </dependency>
     <dependency>
-      <groupId>${hive.group}.hcatalog</groupId>
-      <artifactId>hive-hcatalog-core</artifactId>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-llap-common</artifactId>
+      <scope>${hive.llap.scope}</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-llap-client</artifactId>
+      <scope>${hive.llap.scope}</scope>
     </dependency>
-    <!--
-        <dependency>
-          <groupId>${hive.group}</groupId>
-          <artifactId>hive-serde</artifactId>
-        </dependency>
-        <dependency>
-          <groupId>${hive.group}</groupId>
-          <artifactId>hive-shims</artifactId>
-        </dependency>
-    -->
     <!-- hive-serde already depends on avro, but this brings in customized config of avro deps from parent -->
     <dependency>
       <groupId>org.apache.avro</groupId>
@@ -216,31 +217,6 @@
         </plugins>
       </build>
     </profile>
-    <profile>
-      <id>hadoop-3.2</id>
-      <dependencies>
-        <dependency>
-          <groupId>${hive.group}</groupId>
-          <artifactId>hive-common</artifactId>
-        </dependency>
-        <dependency>
-          <groupId>${hive.group}</groupId>
-          <artifactId>hive-serde</artifactId>
-        </dependency>
-        <dependency>
-          <groupId>${hive.group}</groupId>
-          <artifactId>hive-shims</artifactId>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-llap-common</artifactId>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hive</groupId>
-          <artifactId>hive-llap-client</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
   </profiles>
 
   <build>
@@ -252,7 +228,7 @@
         <artifactId>scalatest-maven-plugin</artifactId>
         <configuration>
           <!-- Specially disable assertions since some Hive tests fail them -->
-          <argLine>-da -Xmx4g -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine>
+          <argLine>-da -Xmx4g -XX:ReservedCodeCacheSize=${CodeCacheSize} -Dio.netty.tryReflectionSetAccessible=true</argLine>
         </configuration>
       </plugin>
       <plugin>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
deleted file mode 100644
index 02a5117f005e8..0000000000000
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import org.apache.spark.SparkContext
-import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{SparkSession, SQLContext}
-
-
-/**
- * An instance of the Spark SQL execution engine that integrates with data stored in Hive.
- * Configuration for Hive is read from hive-site.xml on the classpath.
- */
-@deprecated("Use SparkSession.builder.enableHiveSupport instead", "2.0.0")
-class HiveContext private[hive](_sparkSession: SparkSession)
-  extends SQLContext(_sparkSession) with Logging {
-
-  self =>
-
-  def this(sc: SparkContext) = {
-    this(SparkSession.builder().sparkContext(HiveUtils.withHiveExternalCatalog(sc)).getOrCreate())
-  }
-
-  def this(sc: JavaSparkContext) = this(sc.sc)
-
-  /**
-   * Returns a new HiveContext as new session, which will have separated SQLConf, UDF/UDAF,
-   * temporary tables and SessionState, but sharing the same CacheManager, IsolatedClientLoader
-   * and Hive client (both of execution and metadata) with existing HiveContext.
-   */
-  override def newSession(): HiveContext = {
-    new HiveContext(sparkSession.newSession())
-  }
-
-  /**
-   * Invalidate and refresh all the cached the metadata of the given table. For performance reasons,
-   * Spark SQL or the external data source library it uses might cache certain metadata about a
-   * table, such as the location of blocks. When those change outside of Spark SQL, users should
-   * call this function to invalidate the cache.
-   *
-   * @since 1.3.0
-   */
-  def refreshTable(tableName: String): Unit = {
-    sparkSession.catalog.refreshTable(tableName)
-  }
-
-}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 03874d005a6e6..ca292f65efeee 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -40,8 +40,8 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions}
 import org.apache.spark.sql.hive.client.HiveClient
@@ -635,12 +635,16 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           k.startsWith(CREATED_SPARK_VERSION)
       }
       val newTableProps = propsFromOldTable ++ tableDefinition.properties + partitionProviderProp
+
+      // // Add old table's owner if we need to restore
+      val owner = Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner)
       val newDef = tableDefinition.copy(
         storage = newStorage,
         schema = oldTableDef.schema,
         partitionColumnNames = oldTableDef.partitionColumnNames,
         bucketSpec = oldTableDef.bucketSpec,
-        properties = newTableProps)
+        properties = newTableProps,
+        owner = owner)
 
       client.alterTable(newDef)
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 33b5bcefd853f..0cd9b3641bd4a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -305,12 +305,17 @@ private[hive] trait HiveInspectors {
         withNullSafe(o => getByteWritable(o))
       case _: ByteObjectInspector =>
         withNullSafe(o => o.asInstanceOf[java.lang.Byte])
-      case _: JavaHiveVarcharObjectInspector =>
+        // To spark HiveVarchar and HiveChar are same as string
+      case _: HiveVarcharObjectInspector if x.preferWritable() =>
+        withNullSafe(o => getStringWritable(o))
+      case _: HiveVarcharObjectInspector =>
         withNullSafe { o =>
             val s = o.asInstanceOf[UTF8String].toString
             new HiveVarchar(s, s.length)
         }
-      case _: JavaHiveCharObjectInspector =>
+      case _: HiveCharObjectInspector if x.preferWritable() =>
+        withNullSafe(o => getStringWritable(o))
+      case _: HiveCharObjectInspector =>
         withNullSafe { o =>
             val s = o.asInstanceOf[UTF8String].toString
             new HiveChar(s, s.length)
@@ -787,6 +792,9 @@ private[hive] trait HiveInspectors {
       ObjectInspectorFactory.getStandardStructObjectInspector(
         java.util.Arrays.asList(fields.map(f => f.name) : _*),
         java.util.Arrays.asList(fields.map(f => toInspector(f.dataType)) : _*))
+    case _: UserDefinedType[_] =>
+      val sqlType = dataType.asInstanceOf[UserDefinedType[_]].sqlType
+      toInspector(sqlType)
   }
 
   /**
@@ -849,6 +857,8 @@ private[hive] trait HiveInspectors {
       }
     case Literal(_, dt: StructType) =>
       toInspector(dt)
+    case Literal(_, dt: UserDefinedType[_]) =>
+      toInspector(dt.sqlType)
     // We will enumerate all of the possible constant expressions, throw exception if we missed
     case Literal(_, dt) => sys.error(s"Hive doesn't support the constant type [$dt].")
     // ideally, we don't test the foldable here(but in optimizer), however, some of the
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 5ad2caba07fc0..2981e391c0439 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -257,8 +257,20 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     }
     // The inferred schema may have different field names as the table schema, we should respect
     // it, but also respect the exprId in table relation output.
-    assert(result.output.length == relation.output.length &&
-      result.output.zip(relation.output).forall { case (a1, a2) => a1.dataType == a2.dataType })
+    if (result.output.length != relation.output.length) {
+      throw new AnalysisException(
+        s"Converted table has ${result.output.length} columns, " +
+        s"but source Hive table has ${relation.output.length} columns. " +
+        s"Set ${HiveUtils.CONVERT_METASTORE_PARQUET.key} to false, " +
+        s"or recreate table ${relation.tableMeta.identifier} to workaround.")
+    }
+    if (!result.output.zip(relation.output).forall {
+          case (a1, a2) => a1.dataType == a2.dataType }) {
+      throw new AnalysisException(
+        s"Column in converted table has different data type with source Hive table's. " +
+          s"Set ${HiveUtils.CONVERT_METASTORE_PARQUET.key} to false, " +
+          s"or recreate table ${relation.tableMeta.identifier} to workaround.")
+    }
     val newOutput = result.output.zip(relation.output).map {
       case (a1, a2) => a1.withExprId(a2.exprId)
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 3f0a9f222feb2..bc7760c982aab 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -66,49 +66,52 @@ private[sql] class HiveSessionCatalog(
       name: String,
       clazz: Class[_],
       input: Seq[Expression]): Expression = {
-
-    Try(super.makeFunctionExpression(name, clazz, input)).getOrElse {
-      var udfExpr: Option[Expression] = None
-      try {
-        // When we instantiate hive UDF wrapper class, we may throw exception if the input
-        // expressions don't satisfy the hive UDF, such as type mismatch, input number
-        // mismatch, etc. Here we catch the exception and throw AnalysisException instead.
-        if (classOf[UDF].isAssignableFrom(clazz)) {
-          udfExpr = Some(HiveSimpleUDF(name, new HiveFunctionWrapper(clazz.getName), input))
-          udfExpr.get.dataType // Force it to check input data types.
-        } else if (classOf[GenericUDF].isAssignableFrom(clazz)) {
-          udfExpr = Some(HiveGenericUDF(name, new HiveFunctionWrapper(clazz.getName), input))
-          udfExpr.get.dataType // Force it to check input data types.
-        } else if (classOf[AbstractGenericUDAFResolver].isAssignableFrom(clazz)) {
-          udfExpr = Some(HiveUDAFFunction(name, new HiveFunctionWrapper(clazz.getName), input))
-          udfExpr.get.dataType // Force it to check input data types.
-        } else if (classOf[UDAF].isAssignableFrom(clazz)) {
-          udfExpr = Some(HiveUDAFFunction(
-            name,
-            new HiveFunctionWrapper(clazz.getName),
-            input,
-            isUDAFBridgeRequired = true))
-          udfExpr.get.dataType // Force it to check input data types.
-        } else if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
-          udfExpr = Some(HiveGenericUDTF(name, new HiveFunctionWrapper(clazz.getName), input))
-          udfExpr.get.asInstanceOf[HiveGenericUDTF].elementSchema // Force it to check data types.
+    // Current thread context classloader may not be the one loaded the class. Need to switch
+    // context classloader to initialize instance properly.
+    Utils.withContextClassLoader(clazz.getClassLoader) {
+      Try(super.makeFunctionExpression(name, clazz, input)).getOrElse {
+        var udfExpr: Option[Expression] = None
+        try {
+          // When we instantiate hive UDF wrapper class, we may throw exception if the input
+          // expressions don't satisfy the hive UDF, such as type mismatch, input number
+          // mismatch, etc. Here we catch the exception and throw AnalysisException instead.
+          if (classOf[UDF].isAssignableFrom(clazz)) {
+            udfExpr = Some(HiveSimpleUDF(name, new HiveFunctionWrapper(clazz.getName), input))
+            udfExpr.get.dataType // Force it to check input data types.
+          } else if (classOf[GenericUDF].isAssignableFrom(clazz)) {
+            udfExpr = Some(HiveGenericUDF(name, new HiveFunctionWrapper(clazz.getName), input))
+            udfExpr.get.dataType // Force it to check input data types.
+          } else if (classOf[AbstractGenericUDAFResolver].isAssignableFrom(clazz)) {
+            udfExpr = Some(HiveUDAFFunction(name, new HiveFunctionWrapper(clazz.getName), input))
+            udfExpr.get.dataType // Force it to check input data types.
+          } else if (classOf[UDAF].isAssignableFrom(clazz)) {
+            udfExpr = Some(HiveUDAFFunction(
+              name,
+              new HiveFunctionWrapper(clazz.getName),
+              input,
+              isUDAFBridgeRequired = true))
+            udfExpr.get.dataType // Force it to check input data types.
+          } else if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
+            udfExpr = Some(HiveGenericUDTF(name, new HiveFunctionWrapper(clazz.getName), input))
+            udfExpr.get.asInstanceOf[HiveGenericUDTF].elementSchema // Force it to check data types.
+          }
+        } catch {
+          case NonFatal(e) =>
+            val noHandlerMsg = s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}': $e"
+            val errorMsg =
+              if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
+                s"$noHandlerMsg\nPlease make sure your function overrides " +
+                  "`public StructObjectInspector initialize(ObjectInspector[] args)`."
+              } else {
+                noHandlerMsg
+              }
+            val analysisException = new AnalysisException(errorMsg)
+            analysisException.setStackTrace(e.getStackTrace)
+            throw analysisException
+        }
+        udfExpr.getOrElse {
+          throw new AnalysisException(s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}'")
         }
-      } catch {
-        case NonFatal(e) =>
-          val noHandlerMsg = s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}': $e"
-          val errorMsg =
-            if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
-              s"$noHandlerMsg\nPlease make sure your function overrides " +
-                "`public StructObjectInspector initialize(ObjectInspector[] args)`."
-            } else {
-              noHandlerMsg
-            }
-          val analysisException = new AnalysisException(errorMsg)
-          analysisException.setStackTrace(e.getStackTrace)
-          throw analysisException
-      }
-      udfExpr.getOrElse {
-        throw new AnalysisException(s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}'")
       }
     }
   }
@@ -117,7 +120,7 @@ private[sql] class HiveSessionCatalog(
     try {
       lookupFunction0(name, children)
     } catch {
-      case NonFatal(_) =>
+      case NonFatal(_) if children.exists(_.dataType.isInstanceOf[DecimalType]) =>
         // SPARK-16228 ExternalCatalog may recognize `double`-type only.
         val newChildren = children.map { child =>
           if (child.dataType.isInstanceOf[DecimalType]) Cast(child, DoubleType) else child
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 188aedc3640b8..b117c582a3e6e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -19,15 +19,17 @@ package org.apache.spark.sql.hive
 
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.Analyzer
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, ResolveSessionCatalog}
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener
+import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.SparkPlanner
+import org.apache.spark.sql.execution.{SparkOptimizer, SparkPlanner}
 import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.v2.TableCapabilityCheck
 import org.apache.spark.sql.hive.client.HiveClient
+import org.apache.spark.sql.hive.execution.PruneHiveTablePartitions
 import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLoader, SessionState}
 
 /**
@@ -67,13 +69,13 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
   /**
    * A logical query plan `Analyzer` with rules specific to Hive.
    */
-  override protected def analyzer: Analyzer = new Analyzer(catalog, conf) {
+  override protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new ResolveHiveSerdeTable(session) +:
         new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
         new FallBackFileSourceV2(session) +:
-        DataSourceResolution(conf, this.catalogManager) +:
+        new ResolveSessionCatalog(catalogManager, conf, catalog.isView) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
@@ -93,11 +95,25 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
         customCheckRules
   }
 
+  /**
+   * Logical query plan optimizer that takes into account Hive.
+   */
+  override protected def optimizer: Optimizer = {
+    new SparkOptimizer(catalogManager, catalog, experimentalMethods) {
+      override def postHocOptimizationBatches: Seq[Batch] = Seq(
+        Batch("Prune Hive Table Partitions", Once, new PruneHiveTablePartitions(session))
+      )
+
+      override def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] =
+        super.extendedOperatorOptimizationRules ++ customOperatorOptimizationRules
+    }
+  }
+
   /**
    * Planner that takes into account Hive-specific strategies.
    */
   override protected def planner: SparkPlanner = {
-    new SparkPlanner(session.sparkContext, conf, experimentalMethods) with HiveStrategies {
+    new SparkPlanner(session, conf, experimentalMethods) with HiveStrategies {
       override val sparkSession: SparkSession = session
 
       override def extraPlanningStrategies: Seq[Strategy] =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index be4a0c175b6dc..3beef6b1df457 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -51,7 +51,7 @@ private[hive] object HiveShim {
   /*
    * This function in hive-0.13 become private, but we have to do this to work around hive bug
    */
-  private def appendReadColumnNames(conf: Configuration, cols: Seq[String]) {
+  private def appendReadColumnNames(conf: Configuration, cols: Seq[String]): Unit = {
     val old: String = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "")
     val result: StringBuilder = new StringBuilder(old)
     var first: Boolean = old.isEmpty
@@ -70,7 +70,7 @@ private[hive] object HiveShim {
   /*
    * Cannot use ColumnProjectionUtils.appendReadColumns directly, if ids is null
    */
-  def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]) {
+  def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]): Unit = {
     if (ids != null) {
       ColumnProjectionUtils.appendReadColumns(conf, ids.asJava)
     }
@@ -201,7 +201,7 @@ private[hive] object HiveShim {
       }
     }
 
-    def writeExternal(out: java.io.ObjectOutput) {
+    def writeExternal(out: java.io.ObjectOutput): Unit = {
       // output the function name
       out.writeUTF(functionClassName)
 
@@ -220,7 +220,7 @@ private[hive] object HiveShim {
       }
     }
 
-    def readExternal(in: java.io.ObjectInput) {
+    def readExternal(in: java.io.ObjectInput): Unit = {
       // read the function name
       functionClassName = in.readUTF()
 
@@ -279,25 +279,25 @@ private[hive] object HiveShim {
     var compressType: String = _
     var destTableId: Int = _
 
-    def setCompressed(compressed: Boolean) {
+    def setCompressed(compressed: Boolean): Unit = {
       this.compressed = compressed
     }
 
     def getDirName(): String = dir
 
-    def setDestTableId(destTableId: Int) {
+    def setDestTableId(destTableId: Int): Unit = {
       this.destTableId = destTableId
     }
 
-    def setTableInfo(tableInfo: TableDesc) {
+    def setTableInfo(tableInfo: TableDesc): Unit = {
       this.tableInfo = tableInfo
     }
 
-    def setCompressCodec(intermediateCompressorCodec: String) {
+    def setCompressCodec(intermediateCompressorCodec: String): Unit = {
       compressCodec = intermediateCompressorCodec
     }
 
-    def setCompressType(intermediateCompressType: String) {
+    def setCompressType(intermediateCompressType: String): Unit = {
       compressType = intermediateCompressType
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 18feb98519fbe..b9c98f4ea15e9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -26,8 +26,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoTable, LogicalPlan,
-  ScriptTransformation, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, ScriptTransformation, Statistics}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils}
@@ -143,9 +142,9 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
       if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
       hiveTableWithStats(relation)
 
-    // handles InsertIntoTable specially as the table in InsertIntoTable is not added in its
+    // handles InsertIntoStatement specially as the table in InsertIntoStatement is not added in its
     // children, hence not matched directly by previous HiveTableRelation case.
-    case i @ InsertIntoTable(relation: HiveTableRelation, _, _, _, _)
+    case i @ InsertIntoStatement(relation: HiveTableRelation, _, _, _, _)
       if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
       i.copy(table = hiveTableWithStats(relation))
   }
@@ -159,7 +158,7 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
  */
 object HiveAnalysis extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case InsertIntoTable(r: HiveTableRelation, partSpec, query, overwrite, ifPartitionNotExists)
+    case InsertIntoStatement(r: HiveTableRelation, partSpec, query, overwrite, ifPartitionNotExists)
         if DDLUtils.isHiveTable(r.tableMeta) =>
       InsertIntoHiveTable(r.tableMeta, partSpec, query, overwrite,
         ifPartitionNotExists, query.output.map(_.name))
@@ -207,11 +206,12 @@ case class RelationConversions(
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan resolveOperators {
       // Write path
-      case InsertIntoTable(r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists)
+      case InsertIntoStatement(
+           r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists)
           if query.resolved && DDLUtils.isHiveTable(r.tableMeta) &&
             (!r.isPartitioned || SQLConf.get.getConf(HiveUtils.CONVERT_INSERTING_PARTITIONED_TABLE))
             && isConvertible(r) =>
-        InsertIntoTable(metastoreCatalog.convert(r), partition,
+        InsertIntoStatement(metastoreCatalog.convert(r), partition,
           query, overwrite, ifPartitionNotExists)
 
       // Read path
@@ -252,7 +252,7 @@ private[hive] trait HiveStrategies {
    */
   object HiveTableScans extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case PhysicalOperation(projectList, predicates, relation: HiveTableRelation) =>
+      case ScanOperation(projectList, predicates, relation: HiveTableRelation) =>
         // Filter out all predicates that only deal with partition keys, these are given to the
         // hive table scan operator to be used for partition pruning.
         val partitionKeyIds = AttributeSet(relation.partitionCols)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index d5f3697ce3bf7..9c4b8a5819a33 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -476,6 +476,7 @@ private[spark] object HiveUtils extends Logging {
     // Configuration. But it happens before SparkContext initialized, we need to take them from
     // system properties in the form of regular hadoop configurations.
     SparkHadoopUtil.get.appendSparkHadoopConfigs(sys.props.toMap, propMap)
+    SparkHadoopUtil.get.appendSparkHiveConfigs(sys.props.toMap, propMap)
 
     propMap.toMap
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 3f9925e73705e..4d18eb6289418 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -83,7 +83,7 @@ class HadoopTableReader(
       sparkSession.sparkContext.defaultMinPartitions)
   }
 
-  SparkHadoopUtil.get.appendS3AndSparkHadoopConfigurations(
+  SparkHadoopUtil.get.appendS3AndSparkHadoopHiveConfigurations(
     sparkSession.sparkContext.conf, hadoopConf)
 
   private val _broadcastedHadoopConf =
@@ -132,7 +132,9 @@ class HadoopTableReader(
     val deserializedHadoopRDD = hadoopRDD.mapPartitions { iter =>
       val hconf = broadcastedHadoopConf.value.value
       val deserializer = deserializerClass.getConstructor().newInstance()
-      deserializer.initialize(hconf, localTableDesc.getProperties)
+      DeserializerLock.synchronized {
+        deserializer.initialize(hconf, localTableDesc.getProperties)
+      }
       HadoopTableReader.fillObject(iter, deserializer, attrsWithIndex, mutableRow, deserializer)
     }
 
@@ -170,7 +172,7 @@ class HadoopTableReader(
         val pathPatternSet = collection.mutable.Set[String]()
         partitionToDeserializer.filter {
           case (partition, partDeserializer) =>
-            def updateExistPathSetByPathPattern(pathPatternStr: String) {
+            def updateExistPathSetByPathPattern(pathPatternStr: String): Unit = {
               val pathPattern = new Path(pathPatternStr)
               val fs = pathPattern.getFileSystem(hadoopConf)
               val matches = fs.globStatus(pathPattern)
@@ -252,10 +254,14 @@ class HadoopTableReader(
         partProps.asScala.foreach {
           case (key, value) => props.setProperty(key, value)
         }
-        deserializer.initialize(hconf, props)
+        DeserializerLock.synchronized {
+          deserializer.initialize(hconf, props)
+        }
         // get the table deserializer
         val tableSerDe = localTableDesc.getDeserializerClass.getConstructor().newInstance()
-        tableSerDe.initialize(hconf, localTableDesc.getProperties)
+        DeserializerLock.synchronized {
+          tableSerDe.initialize(hconf, tableProperties)
+        }
 
         // fill the non partition key attributes
         HadoopTableReader.fillObject(iter, deserializer, nonPartitionKeyAttrs,
@@ -352,7 +358,7 @@ private[hive] object HiveTableUtil {
   // that calls Hive.get() which tries to access metastore, but it's not valid in runtime
   // it would be fixed in next version of hive but till then, we should use this instead
   def configureJobPropertiesForStorageHandler(
-      tableDesc: TableDesc, conf: Configuration, input: Boolean) {
+      tableDesc: TableDesc, conf: Configuration, input: Boolean): Unit = {
     val property = tableDesc.getProperties.getProperty(META_TABLE_STORAGE)
     val storageHandler =
       org.apache.hadoop.hive.ql.metadata.HiveUtils.getStorageHandler(conf, property)
@@ -370,12 +376,23 @@ private[hive] object HiveTableUtil {
   }
 }
 
+/**
+ * Object to synchronize on when calling org.apache.hadoop.hive.serde2.Deserializer#initialize.
+ *
+ * [SPARK-17398] org.apache.hive.hcatalog.data.JsonSerDe#initialize calls the non-thread-safe
+ * HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector, the results of which are
+ * returned by JsonSerDe#getObjectInspector.
+ * To protect against this bug in Hive (HIVE-15773/HIVE-21752), we synchronize on this object
+ * when calling initialize on Deserializer instances that could be JsonSerDe instances.
+ */
+private[hive] object DeserializerLock
+
 private[hive] object HadoopTableReader extends HiveInspectors with Logging {
   /**
    * Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to
    * instantiate a HadoopRDD.
    */
-  def initializeLocalJobConfFunc(path: String, tableDesc: TableDesc)(jobConf: JobConf) {
+  def initializeLocalJobConfFunc(path: String, tableDesc: TableDesc)(jobConf: JobConf): Unit = {
     FileInputFormat.setInputPaths(jobConf, Seq[Path](new Path(path)): _*)
     if (tableDesc != null) {
       HiveTableUtil.configureJobPropertiesForStorageHandler(tableDesc, jobConf, true)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index cb015d7301c19..e31dffa4795c5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -111,8 +111,8 @@ private[hive] trait HiveClient {
    * TODO(cloud-fan): it's a little hacky to introduce the schema table properties here in
    * `HiveClient`, but we don't have a cleaner solution now.
    */
-  def alterTableDataSchema(
-    dbName: String, tableName: String, newDataSchema: StructType, schemaProps: Map[String, String])
+  def alterTableDataSchema(dbName: String, tableName: String, newDataSchema: StructType,
+    schemaProps: Map[String, String]): Unit
 
   /** Creates a new database with the given name. */
   def createDatabase(database: CatalogDatabase, ignoreIfExists: Boolean): Unit
@@ -292,4 +292,6 @@ private[hive] trait HiveClient {
   /** Used for testing only.  Removes all metadata from this instance of Hive. */
   def reset(): Unit
 
+  /** Returns the user name which is used as owner for Hive table. */
+  def userName: String
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 5b2eeb2cf34c0..b5c5f0e9381bc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -32,8 +32,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.metastore.{IMetaStoreClient, TableType => HiveTableType}
-import org.apache.hadoop.hive.metastore.api.{Database => HiveDatabase, Table => MetaStoreApiTable}
-import org.apache.hadoop.hive.metastore.api.{FieldSchema, Order, SerDeInfo, StorageDescriptor}
+import org.apache.hadoop.hive.metastore.api.{Database => HiveDatabase, Table => MetaStoreApiTable, _}
 import org.apache.hadoop.hive.ql.Driver
 import org.apache.hadoop.hive.ql.metadata.{Hive, HiveException, Partition => HivePartition, Table => HiveTable}
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC
@@ -42,6 +41,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
@@ -53,11 +53,13 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
 import org.apache.spark.sql.execution.QueryExecutionException
-import org.apache.spark.sql.execution.command.DDLUtils
+import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX}
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.client.HiveClientImpl._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{CircularBuffer, Utils}
 
@@ -161,7 +163,7 @@ private[hive] class HiveClientImpl(
     // HiveConf is a Hadoop Configuration, which has a field of classLoader and
     // the initial value will be the current thread's context class loader
     // (i.e. initClassLoader at here).
-    // We call initialConf.setClassLoader(initClassLoader) at here to make
+    // We call hiveConf.setClassLoader(initClassLoader) at here to make
     // this action explicit.
     hiveConf.setClassLoader(initClassLoader)
 
@@ -175,14 +177,15 @@ private[hive] class HiveClientImpl(
     // has hive-site.xml. So, HiveConf will use that to override its default values.
     // 2: we set all spark confs to this hiveConf.
     // 3: we set all entries in config to this hiveConf.
-    (hadoopConf.iterator().asScala.map(kv => kv.getKey -> kv.getValue)
-      ++ sparkConf.getAll.toMap ++ extraConfig).foreach { case (k, v) =>
+    val confMap = (hadoopConf.iterator().asScala.map(kv => kv.getKey -> kv.getValue) ++
+      sparkConf.getAll.toMap ++ extraConfig).toMap
+    confMap.foreach { case (k, v) => hiveConf.set(k, v) }
+    SQLConf.get.redactOptions(confMap).foreach { case (k, v) =>
       logDebug(
         s"""
            |Applying Hadoop/Hive/Spark and extra properties to Hive Conf:
-           |$k=${if (k.toLowerCase(Locale.ROOT).contains("password")) "xxx" else v}
+           |$k=$v
          """.stripMargin)
-      hiveConf.set(k, v)
     }
     // Disable CBO because we removed the Calcite dependency.
     hiveConf.setBoolean("hive.cbo.enable", false)
@@ -190,6 +193,13 @@ private[hive] class HiveClientImpl(
     if (clientLoader.cachedHive != null) {
       Hive.set(clientLoader.cachedHive.asInstanceOf[Hive])
     }
+    // Hive 2.3 will set UDFClassLoader to hiveConf when initializing SessionState
+    // since HIVE-11878, and ADDJarCommand will add jars to clientLoader.classLoader.
+    // For this reason we cannot load the jars added by ADDJarCommand because of class loader
+    // got changed. We reset it to clientLoader.ClassLoader here.
+    if (HiveUtils.isHive23) {
+      state.getConf.setClassLoader(clientLoader.classLoader)
+    }
     SessionState.start(state)
     state.out = new PrintStream(outputBuffer, true, UTF_8.name())
     state.err = new PrintStream(outputBuffer, true, UTF_8.name())
@@ -221,7 +231,7 @@ private[hive] class HiveClientImpl(
     hiveConf
   }
 
-  private val userName = conf.getUser
+  override val userName = UserGroupInformation.getCurrentUser.getShortUserName
 
   override def getConf(key: String, defaultValue: String): String = {
     conf.get(key, defaultValue)
@@ -345,13 +355,8 @@ private[hive] class HiveClientImpl(
   override def createDatabase(
       database: CatalogDatabase,
       ignoreIfExists: Boolean): Unit = withHiveState {
-    client.createDatabase(
-      new HiveDatabase(
-        database.name,
-        database.description,
-        CatalogUtils.URIToString(database.locationUri),
-        Option(database.properties).map(_.asJava).orNull),
-        ignoreIfExists)
+    val hiveDb = toHiveDatabase(database, Some(userName))
+    client.createDatabase(hiveDb, ignoreIfExists)
   }
 
   override def dropDatabase(
@@ -362,22 +367,41 @@ private[hive] class HiveClientImpl(
   }
 
   override def alterDatabase(database: CatalogDatabase): Unit = withHiveState {
-    client.alterDatabase(
+    if (!getDatabase(database.name).locationUri.equals(database.locationUri)) {
+      // SPARK-29260: Enable supported versions once it support altering database location.
+      if (!(version.equals(hive.v3_0) || version.equals(hive.v3_1))) {
+        throw new AnalysisException(
+          s"Hive ${version.fullVersion} does not support altering database location")
+      }
+    }
+    val hiveDb = toHiveDatabase(database)
+    client.alterDatabase(database.name, hiveDb)
+  }
+
+  private def toHiveDatabase(
+      database: CatalogDatabase, userName: Option[String] = None): HiveDatabase = {
+    val props = database.properties
+    val hiveDb = new HiveDatabase(
       database.name,
-      new HiveDatabase(
-        database.name,
-        database.description,
-        CatalogUtils.URIToString(database.locationUri),
-        Option(database.properties).map(_.asJava).orNull))
+      database.description,
+      CatalogUtils.URIToString(database.locationUri),
+      (props -- Seq(PROP_OWNER)).asJava)
+    props.get(PROP_OWNER).orElse(userName).foreach { ownerName =>
+      shim.setDatabaseOwnerName(hiveDb, ownerName)
+    }
+    hiveDb
   }
 
   override def getDatabase(dbName: String): CatalogDatabase = withHiveState {
     Option(client.getDatabase(dbName)).map { d =>
+      val paras = Option(d.getParameters).map(_.asScala.toMap).getOrElse(Map()) ++
+        Map(PROP_OWNER -> shim.getDatabaseOwnerName(d))
+
       CatalogDatabase(
         name = d.getName,
         description = Option(d.getDescription).getOrElse(""),
         locationUri = CatalogUtils.stringToURI(d.getLocationUri),
-        properties = Option(d.getParameters).map(_.asScala.toMap).orNull)
+        properties = paras)
     }.getOrElse(throw new NoSuchDatabaseException(dbName))
   }
 
@@ -423,8 +447,13 @@ private[hive] class HiveClientImpl(
   private def convertHiveTableToCatalogTable(h: HiveTable): CatalogTable = {
     // Note: Hive separates partition columns and the schema, but for us the
     // partition columns are part of the schema
-    val cols = h.getCols.asScala.map(fromHiveColumn)
-    val partCols = h.getPartCols.asScala.map(fromHiveColumn)
+    val (cols, partCols) = try {
+      (h.getCols.asScala.map(fromHiveColumn), h.getPartCols.asScala.map(fromHiveColumn))
+    } catch {
+      case ex: SparkException =>
+        throw new SparkException(
+          s"${ex.getMessage}, db: ${h.getDbName}, table: ${h.getTableName}", ex)
+    }
     val schema = StructType(cols ++ partCols)
 
     val bucketSpec = if (h.getNumBuckets > 0) {
@@ -965,7 +994,8 @@ private[hive] object HiveClientImpl {
       CatalystSqlParser.parseDataType(hc.getType)
     } catch {
       case e: ParseException =>
-        throw new SparkException("Cannot recognize hive type string: " + hc.getType, e)
+        throw new SparkException(
+          s"Cannot recognize hive type string: ${hc.getType}, column: ${hc.getName}", e)
     }
   }
 
@@ -1021,7 +1051,7 @@ private[hive] object HiveClientImpl {
     }
     hiveTable.setFields(schema.asJava)
     hiveTable.setPartCols(partCols.asJava)
-    userName.foreach(hiveTable.setOwner)
+    Option(table.owner).filter(_.nonEmpty).orElse(userName).foreach(hiveTable.setOwner)
     hiveTable.setCreateTime(MILLISECONDS.toSeconds(table.createTime).toInt)
     hiveTable.setLastAccessTime(MILLISECONDS.toSeconds(table.lastAccessTime).toInt)
     table.storage.locationUri.map(CatalogUtils.URIToString).foreach { loc =>
@@ -1042,7 +1072,7 @@ private[hive] object HiveClientImpl {
     }
 
     table.bucketSpec match {
-      case Some(bucketSpec) if DDLUtils.isHiveTable(table) =>
+      case Some(bucketSpec) if !HiveExternalCatalog.isDatasourceTable(table) =>
         hiveTable.setNumBuckets(bucketSpec.numBuckets)
         hiveTable.setBucketCols(bucketSpec.bucketColumnNames.toList.asJava)
 
@@ -1155,9 +1185,10 @@ private[hive] object HiveClientImpl {
    * Note that this statistics could be overridden by Spark's statistics if that's available.
    */
   private def readHiveStats(properties: Map[String, String]): Option[CatalogStatistics] = {
-    val totalSize = properties.get(StatsSetupConst.TOTAL_SIZE).map(BigInt(_))
-    val rawDataSize = properties.get(StatsSetupConst.RAW_DATA_SIZE).map(BigInt(_))
-    val rowCount = properties.get(StatsSetupConst.ROW_COUNT).map(BigInt(_))
+    val totalSize = properties.get(StatsSetupConst.TOTAL_SIZE).filter(_.nonEmpty).map(BigInt(_))
+    val rawDataSize = properties.get(StatsSetupConst.RAW_DATA_SIZE).filter(_.nonEmpty)
+      .map(BigInt(_))
+    val rowCount = properties.get(StatsSetupConst.ROW_COUNT).filter(_.nonEmpty).map(BigInt(_))
     // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be
     // relatively cheap if parameters for the table are populated into the metastore.
     // Currently, only totalSize, rawDataSize, and rowCount are used to build the field `stats`
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 586fbbefade46..50ce536a160c8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -29,8 +29,7 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.metastore.IMetaStoreClient
-import org.apache.hadoop.hive.metastore.api.{EnvironmentContext, Function => HiveFunction, FunctionType}
-import org.apache.hadoop.hive.metastore.api.{MetaException, PrincipalType, ResourceType, ResourceUri}
+import org.apache.hadoop.hive.metastore.api.{Database, EnvironmentContext, Function => HiveFunction, FunctionType, MetaException, PrincipalType, ResourceType, ResourceUri}
 import org.apache.hadoop.hive.ql.Driver
 import org.apache.hadoop.hive.ql.io.AcidUtils
 import org.apache.hadoop.hive.ql.metadata.{Hive, HiveException, Partition, Table}
@@ -154,6 +153,10 @@ private[client] sealed abstract class Shim {
       deleteData: Boolean,
       purge: Boolean): Unit
 
+  def getDatabaseOwnerName(db: Database): String
+
+  def setDatabaseOwnerName(db: Database, owner: String): Unit
+
   protected def findStaticMethod(klass: Class[_], name: String, args: Class[_]*): Method = {
     val method = findMethod(klass, name, args: _*)
     require(Modifier.isStatic(method.getModifiers()),
@@ -456,6 +459,10 @@ private[client] class Shim_v0_12 extends Shim with Logging {
   def listFunctions(hive: Hive, db: String, pattern: String): Seq[String] = {
     Seq.empty[String]
   }
+
+  override def getDatabaseOwnerName(db: Database): String = ""
+
+  override def setDatabaseOwnerName(db: Database, owner: String): Unit = {}
 }
 
 private[client] class Shim_v0_13 extends Shim_v0_12 {
@@ -493,6 +500,17 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       "getResults",
       classOf[JList[Object]])
 
+  private lazy val getDatabaseOwnerNameMethod =
+    findMethod(
+      classOf[Database],
+      "getOwnerName")
+
+  private lazy val setDatabaseOwnerNameMethod =
+    findMethod(
+      classOf[Database],
+      "setOwnerName",
+      classOf[String])
+
   override def setCurrentSessionState(state: SessionState): Unit =
     setCurrentSessionStateMethod.invoke(null, state)
 
@@ -666,7 +684,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       }
     }
 
-    object NonVarcharAttribute {
+    object SupportedAttribute {
       // hive varchar is treated as catalyst string, but hive varchar can't be pushed down.
       private val varcharKeys = table.getPartitionKeys.asScala
         .filter(col => col.getType.startsWith(serdeConstants.VARCHAR_TYPE_NAME) ||
@@ -676,8 +694,10 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       def unapply(attr: Attribute): Option[String] = {
         if (varcharKeys.contains(attr.name)) {
           None
-        } else {
+        } else if (attr.dataType.isInstanceOf[IntegralType] || attr.dataType == StringType) {
           Some(attr.name)
+        } else {
+          None
         }
       }
     }
@@ -700,20 +720,20 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     }
 
     def convert(expr: Expression): Option[String] = expr match {
-      case In(ExtractAttribute(NonVarcharAttribute(name)), ExtractableLiterals(values))
+      case In(ExtractAttribute(SupportedAttribute(name)), ExtractableLiterals(values))
           if useAdvanced =>
         Some(convertInToOr(name, values))
 
-      case InSet(ExtractAttribute(NonVarcharAttribute(name)), ExtractableValues(values))
+      case InSet(ExtractAttribute(SupportedAttribute(name)), ExtractableValues(values))
           if useAdvanced =>
         Some(convertInToOr(name, values))
 
       case op @ SpecialBinaryComparison(
-          ExtractAttribute(NonVarcharAttribute(name)), ExtractableLiteral(value)) =>
+          ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) =>
         Some(s"$name ${op.symbol} $value")
 
       case op @ SpecialBinaryComparison(
-          ExtractableLiteral(value), ExtractAttribute(NonVarcharAttribute(name))) =>
+          ExtractableLiteral(value), ExtractAttribute(SupportedAttribute(name))) =>
         Some(s"$value ${op.symbol} $name")
 
       case And(expr1, expr2) if useAdvanced =>
@@ -809,6 +829,13 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     }
   }
 
+  override def getDatabaseOwnerName(db: Database): String = {
+    Option(getDatabaseOwnerNameMethod.invoke(db)).map(_.asInstanceOf[String]).getOrElse("")
+  }
+
+  override def setDatabaseOwnerName(db: Database, owner: String): Unit = {
+    setDatabaseOwnerNameMethod.invoke(db, owner)
+  }
 }
 
 private[client] class Shim_v0_14 extends Shim_v0_13 {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 6f60bb7c9c74d..5da7b70cfc7aa 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -36,6 +36,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.internal.NonClosableMutableURLClassLoader
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
 /** Factory for `IsolatedClientLoader` with specific versions of hive. */
@@ -60,9 +61,10 @@ private[hive] object IsolatedClientLoader extends Logging {
     val files = if (resolvedVersions.contains((resolvedVersion, hadoopVersion))) {
       resolvedVersions((resolvedVersion, hadoopVersion))
     } else {
+      val remoteRepos = sparkConf.get(SQLConf.ADDITIONAL_REMOTE_REPOSITORIES)
       val (downloadedFiles, actualHadoopVersion) =
         try {
-          (downloadVersion(resolvedVersion, hadoopVersion, ivyPath), hadoopVersion)
+          (downloadVersion(resolvedVersion, hadoopVersion, ivyPath, remoteRepos), hadoopVersion)
         } catch {
           case e: RuntimeException if e.getMessage.contains("hadoop") =>
             // If the error message contains hadoop, it is probably because the hadoop
@@ -74,7 +76,8 @@ private[hive] object IsolatedClientLoader extends Logging {
               "It is recommended to set jars used by Hive metastore client through " +
               "spark.sql.hive.metastore.jars in the production environment.")
             _sharesHadoopClasses = false
-            (downloadVersion(resolvedVersion, fallbackVersion, ivyPath), fallbackVersion)
+            (downloadVersion(
+              resolvedVersion, fallbackVersion, ivyPath, remoteRepos), fallbackVersion)
         }
       resolvedVersions.put((resolvedVersion, actualHadoopVersion), downloadedFiles)
       resolvedVersions((resolvedVersion, actualHadoopVersion))
@@ -112,7 +115,8 @@ private[hive] object IsolatedClientLoader extends Logging {
   private def downloadVersion(
       version: HiveVersion,
       hadoopVersion: String,
-      ivyPath: Option[String]): Seq[URL] = {
+      ivyPath: Option[String],
+      remoteRepos: String): Seq[URL] = {
     val hiveArtifacts = version.extraDeps ++
       Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde")
         .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++
@@ -123,7 +127,7 @@ private[hive] object IsolatedClientLoader extends Logging {
       SparkSubmitUtils.resolveMavenCoordinates(
         hiveArtifacts.mkString(","),
         SparkSubmitUtils.buildIvySettings(
-          Some("http://www.datanucleus.org/downloads/maven2"),
+          Some(remoteRepos),
           ivyPath),
         exclusions = version.exclusions)
     }
@@ -158,7 +162,7 @@ private[hive] object IsolatedClientLoader extends Logging {
  * @param execJars A collection of jar files that must include hive and hadoop.
  * @param config   A set of options that will be added to the HiveConf of the constructed client.
  * @param isolationOn When true, custom versions of barrier classes will be constructed.  Must be
- *                    true unless loading the version of hive that is on Sparks classloader.
+ *                    true unless loading the version of hive that is on Spark's classloader.
  * @param sharesHadoopClasses When true, we will share Hadoop classes between Spark and
  * @param baseClassLoader The spark classloader that is used to load shared classes.
  */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
index 27071075b4165..c51c521cacba0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
@@ -130,10 +130,15 @@ class HiveOutputWriter(
     new Path(path),
     Reporter.NULL)
 
+  /**
+   * Since SPARK-30201 ObjectInspectorCopyOption.JAVA change to ObjectInspectorCopyOption.DEFAULT.
+   * The reason is DEFAULT option can convert `UTF8String` to `Text` with bytes and
+   * we can compatible with non UTF-8 code bytes during write.
+   */
   private val standardOI = ObjectInspectorUtils
     .getStandardObjectInspector(
       tableDesc.getDeserializer(jobConf).getObjectInspector,
-      ObjectInspectorCopyOption.JAVA)
+      ObjectInspectorCopyOption.DEFAULT)
     .asInstanceOf[StructObjectInspector]
 
   private val fieldOIs =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
index 5b00e2ebafa43..4dccacef337e9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
@@ -146,7 +146,7 @@ case class HiveTableScanExec(
    * @param partitions All partitions of the relation.
    * @return Partitions that are involved in the query plan.
    */
-  private[hive] def prunePartitions(partitions: Seq[HivePartition]) = {
+  private[hive] def prunePartitions(partitions: Seq[HivePartition]): Seq[HivePartition] = {
     boundPruningPred match {
       case None => partitions
       case Some(shouldKeep) => partitions.filter { part =>
@@ -162,18 +162,36 @@ case class HiveTableScanExec(
     }
   }
 
+  @transient lazy val prunedPartitions: Seq[HivePartition] = {
+    if (relation.prunedPartitions.nonEmpty) {
+      val hivePartitions =
+        relation.prunedPartitions.get.map(HiveClientImpl.toHivePartition(_, hiveQlTable))
+      if (partitionPruningPred.forall(!ExecSubqueryExpression.hasSubquery(_))) {
+        hivePartitions
+      } else {
+        prunePartitions(hivePartitions)
+      }
+    } else {
+      if (sparkSession.sessionState.conf.metastorePartitionPruning &&
+        partitionPruningPred.nonEmpty) {
+        rawPartitions
+      } else {
+        prunePartitions(rawPartitions)
+      }
+    }
+  }
+
   // exposed for tests
-  @transient lazy val rawPartitions = {
+  @transient lazy val rawPartitions: Seq[HivePartition] = {
     val prunedPartitions =
       if (sparkSession.sessionState.conf.metastorePartitionPruning &&
-          partitionPruningPred.size > 0) {
+        partitionPruningPred.nonEmpty) {
         // Retrieve the original attributes based on expression ID so that capitalization matches.
         val normalizedFilters = partitionPruningPred.map(_.transform {
           case a: AttributeReference => originalAttributes(a)
         })
-        sparkSession.sessionState.catalog.listPartitionsByFilter(
-          relation.tableMeta.identifier,
-          normalizedFilters)
+        sparkSession.sessionState.catalog
+          .listPartitionsByFilter(relation.tableMeta.identifier, normalizedFilters)
       } else {
         sparkSession.sessionState.catalog.listPartitions(relation.tableMeta.identifier)
       }
@@ -189,7 +207,7 @@ case class HiveTableScanExec(
       }
     } else {
       Utils.withDummyCallSite(sqlContext.sparkContext) {
-        hadoopReader.makeRDDForPartitionedTable(prunePartitions(rawPartitions))
+        hadoopReader.makeRDDForPartitionedTable(prunedPartitions)
       }
     }
     val numOutputRows = longMetric("numOutputRows")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index ee1734b1f232c..801be64702519 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.hive.ql.plan.TableDesc
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, ExternalCatalog}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, ExternalCatalog, ExternalCatalogUtils}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
@@ -199,7 +199,7 @@ case class InsertIntoHiveTable(
       attr.withName(name.toLowerCase(Locale.ROOT))
     }
 
-    saveAsHiveFile(
+    val writtenParts = saveAsHiveFile(
       sparkSession = sparkSession,
       plan = child,
       hadoopConf = hadoopConf,
@@ -209,6 +209,42 @@ case class InsertIntoHiveTable(
 
     if (partition.nonEmpty) {
       if (numDynamicPartitions > 0) {
+        if (overwrite && table.tableType == CatalogTableType.EXTERNAL) {
+          // SPARK-29295: When insert overwrite to a Hive external table partition, if the
+          // partition does not exist, Hive will not check if the external partition directory
+          // exists or not before copying files. So if users drop the partition, and then do
+          // insert overwrite to the same partition, the partition will have both old and new
+          // data. We construct partition path. If the path exists, we delete it manually.
+          writtenParts.foreach { partPath =>
+            val dpMap = partPath.split("/").map { part =>
+              val splitPart = part.split("=")
+              assert(splitPart.size == 2, s"Invalid written partition path: $part")
+              ExternalCatalogUtils.unescapePathName(splitPart(0)) ->
+                ExternalCatalogUtils.unescapePathName(splitPart(1))
+            }.toMap
+
+            val updatedPartitionSpec = partition.map {
+              case (key, Some(value)) => key -> value
+              case (key, None) if dpMap.contains(key) => key -> dpMap(key)
+              case (key, _) =>
+                throw new SparkException(s"Dynamic partition key $key is not among " +
+                  "written partition paths.")
+            }
+            val partitionColumnNames = table.partitionColumnNames
+            val tablePath = new Path(table.location)
+            val partitionPath = ExternalCatalogUtils.generatePartitionPath(updatedPartitionSpec,
+              partitionColumnNames, tablePath)
+
+            val fs = partitionPath.getFileSystem(hadoopConf)
+            if (fs.exists(partitionPath)) {
+              if (!fs.delete(partitionPath, true)) {
+                throw new RuntimeException(
+                  "Cannot remove partition directory '" + partitionPath.toString)
+              }
+            }
+          }
+        }
+
         externalCatalog.loadDynamicPartitions(
           db = table.database,
           table = table.identifier.table,
@@ -230,18 +266,32 @@ case class InsertIntoHiveTable(
         var doHiveOverwrite = overwrite
 
         if (oldPart.isEmpty || !ifPartitionNotExists) {
+          // SPARK-29295: When insert overwrite to a Hive external table partition, if the
+          // partition does not exist, Hive will not check if the external partition directory
+          // exists or not before copying files. So if users drop the partition, and then do
+          // insert overwrite to the same partition, the partition will have both old and new
+          // data. We construct partition path. If the path exists, we delete it manually.
+          val partitionPath = if (oldPart.isEmpty && overwrite
+              && table.tableType == CatalogTableType.EXTERNAL) {
+            val partitionColumnNames = table.partitionColumnNames
+            val tablePath = new Path(table.location)
+            Some(ExternalCatalogUtils.generatePartitionPath(partitionSpec,
+              partitionColumnNames, tablePath))
+          } else {
+            oldPart.flatMap(_.storage.locationUri.map(uri => new Path(uri)))
+          }
+
           // SPARK-18107: Insert overwrite runs much slower than hive-client.
           // Newer Hive largely improves insert overwrite performance. As Spark uses older Hive
           // version and we may not want to catch up new Hive version every time. We delete the
           // Hive partition first and then load data file into the Hive partition.
-          if (oldPart.nonEmpty && overwrite) {
-            oldPart.get.storage.locationUri.foreach { uri =>
-              val partitionPath = new Path(uri)
-              val fs = partitionPath.getFileSystem(hadoopConf)
-              if (fs.exists(partitionPath)) {
-                if (!fs.delete(partitionPath, true)) {
+          if (partitionPath.nonEmpty && overwrite) {
+            partitionPath.foreach { path =>
+              val fs = path.getFileSystem(hadoopConf)
+              if (fs.exists(path)) {
+                if (!fs.delete(path, true)) {
                   throw new RuntimeException(
-                    "Cannot remove partition directory '" + partitionPath.toString)
+                    "Cannot remove partition directory '" + path.toString)
                 }
                 // Don't let Hive do overwrite operation since it is slower.
                 doHiveOverwrite = false
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
new file mode 100644
index 0000000000000..da6e4c52cf3a7
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.hadoop.hive.common.StatsSetupConst
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.analysis.CastSupport
+import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable, CatalogTablePartition, ExternalCatalogUtils, HiveTableRelation}
+import org.apache.spark.sql.catalyst.expressions.{And, AttributeSet, Expression, ExpressionSet, SubqueryExpression}
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.datasources.DataSourceStrategy
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Prune hive table partitions using partition filters on [[HiveTableRelation]]. The pruned
+ * partitions will be kept in [[HiveTableRelation.prunedPartitions]], and the statistics of
+ * the hive table relation will be updated based on pruned partitions.
+ *
+ * This rule is executed in optimization phase, so the statistics can be updated before physical
+ * planning, which is useful for some spark strategy, eg.
+ * [[org.apache.spark.sql.execution.SparkStrategies.JoinSelection]].
+ *
+ * TODO: merge this with PruneFileSourcePartitions after we completely make hive as a data source.
+ */
+private[sql] class PruneHiveTablePartitions(session: SparkSession)
+  extends Rule[LogicalPlan] with CastSupport {
+
+  override val conf: SQLConf = session.sessionState.conf
+
+  /**
+   * Extract the partition filters from the filters on the table.
+   */
+  private def getPartitionKeyFilters(
+      filters: Seq[Expression],
+      relation: HiveTableRelation): ExpressionSet = {
+    val normalizedFilters = DataSourceStrategy.normalizeExprs(
+      filters.filter(f => f.deterministic && !SubqueryExpression.hasSubquery(f)), relation.output)
+    val partitionColumnSet = AttributeSet(relation.partitionCols)
+    ExpressionSet(normalizedFilters.filter { f =>
+      !f.references.isEmpty && f.references.subsetOf(partitionColumnSet)
+    })
+  }
+
+  /**
+   * Prune the hive table using filters on the partitions of the table.
+   */
+  private def prunePartitions(
+      relation: HiveTableRelation,
+      partitionFilters: ExpressionSet): Seq[CatalogTablePartition] = {
+    if (conf.metastorePartitionPruning) {
+      session.sessionState.catalog.listPartitionsByFilter(
+        relation.tableMeta.identifier, partitionFilters.toSeq)
+    } else {
+      ExternalCatalogUtils.prunePartitionsByFilter(relation.tableMeta,
+        session.sessionState.catalog.listPartitions(relation.tableMeta.identifier),
+        partitionFilters.toSeq, conf.sessionLocalTimeZone)
+    }
+  }
+
+  /**
+   * Update the statistics of the table.
+   */
+  private def updateTableMeta(
+      tableMeta: CatalogTable,
+      prunedPartitions: Seq[CatalogTablePartition]): CatalogTable = {
+    val sizeOfPartitions = prunedPartitions.map { partition =>
+      val rawDataSize = partition.parameters.get(StatsSetupConst.RAW_DATA_SIZE).map(_.toLong)
+      val totalSize = partition.parameters.get(StatsSetupConst.TOTAL_SIZE).map(_.toLong)
+      if (rawDataSize.isDefined && rawDataSize.get > 0) {
+        rawDataSize.get
+      } else if (totalSize.isDefined && totalSize.get > 0L) {
+        totalSize.get
+      } else {
+        0L
+      }
+    }
+    if (sizeOfPartitions.forall(_ > 0)) {
+      val sizeInBytes = sizeOfPartitions.sum
+      tableMeta.copy(stats = Some(CatalogStatistics(sizeInBytes = BigInt(sizeInBytes))))
+    } else {
+      tableMeta
+    }
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    case op @ PhysicalOperation(projections, filters, relation: HiveTableRelation)
+      if filters.nonEmpty && relation.isPartitioned && relation.prunedPartitions.isEmpty =>
+      val partitionKeyFilters = getPartitionKeyFilters(filters, relation)
+      if (partitionKeyFilters.nonEmpty) {
+        val newPartitions = prunePartitions(relation, partitionKeyFilters)
+        val newTableMeta = updateTableMeta(relation.tableMeta, newPartitions)
+        val newRelation = relation.copy(
+          tableMeta = newTableMeta, prunedPartitions = Some(newPartitions))
+        // Keep partition filters so that they are visible in physical planning
+        Project(projections, Filter(filters.reduceLeft(And), newRelation))
+      } else {
+        op
+      }
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
index e12f663304e7a..40f7b4e8db7c5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformationExec.scala
@@ -94,9 +94,8 @@ case class ScriptTransformationExec(
       // This new thread will consume the ScriptTransformation's input rows and write them to the
       // external process. That process's output will be read by this current thread.
       val writerThread = new ScriptTransformationWriterThread(
-        inputIterator,
+        inputIterator.map(outputProjection),
         input.map(_.dataType),
-        outputProjection,
         inputSerde,
         inputSoi,
         ioschema,
@@ -249,16 +248,15 @@ case class ScriptTransformationExec(
 private class ScriptTransformationWriterThread(
     iter: Iterator[InternalRow],
     inputSchema: Seq[DataType],
-    outputProjection: Projection,
     @Nullable inputSerde: AbstractSerDe,
-    @Nullable inputSoi: ObjectInspector,
+    @Nullable inputSoi: StructObjectInspector,
     ioschema: HiveScriptIOSchema,
     outputStream: OutputStream,
     proc: Process,
     stderrBuffer: CircularBuffer,
     taskContext: TaskContext,
     conf: Configuration
-  ) extends Thread("Thread-ScriptTransformation-Feed") with Logging {
+  ) extends Thread("Thread-ScriptTransformation-Feed") with HiveInspectors with Logging {
 
   setDaemon(true)
 
@@ -278,8 +276,8 @@ private class ScriptTransformationWriterThread(
     var threwException: Boolean = true
     val len = inputSchema.length
     try {
-      iter.map(outputProjection).foreach { row =>
-        if (inputSerde == null) {
+      if (inputSerde == null) {
+        iter.foreach { row =>
           val data = if (len == 0) {
             ioschema.inputRowFormatMap("TOK_TABLEROWFORMATLINES")
           } else {
@@ -295,10 +293,21 @@ private class ScriptTransformationWriterThread(
             sb.toString()
           }
           outputStream.write(data.getBytes(StandardCharsets.UTF_8))
-        } else {
-          val writable = inputSerde.serialize(
-            row.asInstanceOf[GenericInternalRow].values, inputSoi)
+        }
+      } else {
+        // Convert Spark InternalRows to hive data via `HiveInspectors.wrapperFor`.
+        val hiveData = new Array[Any](inputSchema.length)
+        val fieldOIs = inputSoi.getAllStructFieldRefs.asScala.map(_.getFieldObjectInspector).toArray
+        val wrappers = fieldOIs.zip(inputSchema).map { case (f, dt) => wrapperFor(f, dt) }
+
+        iter.foreach { row =>
+          var i = 0
+          while (i < fieldOIs.length) {
+            hiveData(i) = if (row.isNullAt(i)) null else wrappers(i)(row.get(i, inputSchema(i)))
+            i += 1
+          }
 
+          val writable = inputSerde.serialize(hiveData, inputSoi)
           if (scriptInputWriter != null) {
             scriptInputWriter.write(writable)
           } else {
@@ -374,14 +383,13 @@ case class HiveScriptIOSchema (
   val outputRowFormatMap = outputRowFormat.toMap.withDefault((k) => defaultFormat(k))
 
 
-  def initInputSerDe(input: Seq[Expression]): Option[(AbstractSerDe, ObjectInspector)] = {
+  def initInputSerDe(input: Seq[Expression]): Option[(AbstractSerDe, StructObjectInspector)] = {
     inputSerdeClass.map { serdeClass =>
       val (columns, columnTypes) = parseAttrs(input)
       val serde = initSerDe(serdeClass, columns, columnTypes, inputSerdeProps)
       val fieldObjectInspectors = columnTypes.map(toInspector)
       val objectInspector = ObjectInspectorFactory
         .getStandardStructObjectInspector(columns.asJava, fieldObjectInspectors.asJava)
-        .asInstanceOf[ObjectInspector]
       (serde, objectInspector)
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index d78fc9da9f8a5..05d608a2016a5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -247,7 +247,7 @@ private[hive] case class HiveGenericUDTF(
   protected class UDTFCollector extends Collector {
     var collected = new ArrayBuffer[InternalRow]
 
-    override def collect(input: java.lang.Object) {
+    override def collect(input: java.lang.Object): Unit = {
       // We need to clone the input here because implementations of
       // GenericUDTF reuse the same object. Luckily they are always an array, so
       // it is easy to clone.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala
index db074361ef03c..14276c9b583f2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala
@@ -23,8 +23,5 @@ package org.apache.spark.sql
  *  - Using HiveQL to express queries.
  *  - Reading metadata from the Hive Metastore using HiveSerDes.
  *  - Hive UDFs, UDAs, UDTs
- *
- * Users that would like access to this functionality should create a
- * [[hive.HiveContext HiveContext]] instead of a [[SQLContext]].
  */
 package object hive
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
index 636ce10da3734..2b532389bafb6 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
@@ -22,7 +22,6 @@
 import java.util.List;
 
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -39,10 +38,7 @@ public class JavaDataFrameSuite {
   Dataset<Row> df;
 
   private static void checkAnswer(Dataset<Row> actual, List<Row> expected) {
-    String errorMessage = QueryTest$.MODULE$.checkAnswer(actual, expected);
-    if (errorMessage != null) {
-      Assert.fail(errorMessage);
-    }
+    QueryTest$.MODULE$.checkAnswer(actual, expected);
   }
 
   @Before
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
index 25bd4d0017bd8..d433386a6c19a 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
@@ -27,7 +27,6 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -38,9 +37,6 @@
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SQLContext;
 import org.apache.spark.sql.hive.test.TestHive$;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.catalyst.TableIdentifier;
 import org.apache.spark.util.Utils;
@@ -54,13 +50,6 @@ public class JavaMetastoreDataSourcesSuite {
   FileSystem fs;
   Dataset<Row> df;
 
-  private static void checkAnswer(Dataset<Row> actual, List<Row> expected) {
-    String errorMessage = QueryTest$.MODULE$.checkAnswer(actual, expected);
-    if (errorMessage != null) {
-      Assert.fail(errorMessage);
-    }
-  }
-
   @Before
   public void setUp() throws IOException {
     sqlContext = TestHive$.MODULE$;
@@ -94,57 +83,6 @@ public void tearDown() throws IOException {
     }
   }
 
-  @Test
-  public void saveExternalTableAndQueryIt() {
-    Map<String, String> options = new HashMap<>();
-    options.put("path", path.toString());
-    df.write()
-      .format("org.apache.spark.sql.json")
-      .mode(SaveMode.Append)
-      .options(options)
-      .saveAsTable("javaSavedTable");
-
-    checkAnswer(
-      sqlContext.sql("SELECT * FROM javaSavedTable"),
-      df.collectAsList());
-
-    Dataset<Row> loadedDF =
-      sqlContext.createExternalTable("externalTable", "org.apache.spark.sql.json", options);
-
-    checkAnswer(loadedDF, df.collectAsList());
-    checkAnswer(
-      sqlContext.sql("SELECT * FROM externalTable"),
-      df.collectAsList());
-  }
-
-  @Test
-  public void saveExternalTableWithSchemaAndQueryIt() {
-    Map<String, String> options = new HashMap<>();
-    options.put("path", path.toString());
-    df.write()
-      .format("org.apache.spark.sql.json")
-      .mode(SaveMode.Append)
-      .options(options)
-      .saveAsTable("javaSavedTable");
-
-    checkAnswer(
-      sqlContext.sql("SELECT * FROM javaSavedTable"),
-      df.collectAsList());
-
-    List<StructField> fields = new ArrayList<>();
-    fields.add(DataTypes.createStructField("b", DataTypes.StringType, true));
-    StructType schema = DataTypes.createStructType(fields);
-    Dataset<Row> loadedDF =
-      sqlContext.createExternalTable("externalTable", "org.apache.spark.sql.json", schema, options);
-
-    checkAnswer(
-      loadedDF,
-      sqlContext.sql("SELECT b FROM javaSavedTable").collectAsList());
-    checkAnswer(
-      sqlContext.sql("SELECT * FROM externalTable"),
-      sqlContext.sql("SELECT b FROM javaSavedTable").collectAsList());
-  }
-
   @Test
   public void saveTableAndQueryIt() {
     Map<String, String> options = new HashMap<>();
@@ -154,7 +92,7 @@ public void saveTableAndQueryIt() {
       .options(options)
       .saveAsTable("javaSavedTable");
 
-    checkAnswer(
+    QueryTest$.MODULE$.checkAnswer(
       sqlContext.sql("SELECT * FROM javaSavedTable"),
       df.collectAsList());
   }
diff --git a/sql/hive/src/test/noclasspath/README b/sql/hive/src/test/noclasspath/README
new file mode 100644
index 0000000000000..8ce1b0bd09668
--- /dev/null
+++ b/sql/hive/src/test/noclasspath/README
@@ -0,0 +1 @@
+Place files which are being used as resources of tests but shouldn't be added to classpath.
\ No newline at end of file
diff --git a/sql/hive/src/test/noclasspath/TestUDTF-spark-26560.jar b/sql/hive/src/test/noclasspath/TestUDTF-spark-26560.jar
new file mode 100644
index 0000000000000..b73b17d5c7880
Binary files /dev/null and b/sql/hive/src/test/noclasspath/TestUDTF-spark-26560.jar differ
diff --git a/sql/hive/src/test/resources/golden/Partition pruning - with filter containing non-deterministic condition - query test-0-56a1c59bd13c2a83a91eb0ec658fcecc b/sql/hive/src/test/resources/golden/Partition pruning - with filter containing non-deterministic condition - query test-0-56a1c59bd13c2a83a91eb0ec658fcecc
new file mode 100644
index 0000000000000..0fe6b905e7781
--- /dev/null
+++ b/sql/hive/src/test/resources/golden/Partition pruning - with filter containing non-deterministic condition - query test-0-56a1c59bd13c2a83a91eb0ec658fcecc	
@@ -0,0 +1,500 @@
+val_238	11
+val_86	11
+val_311	11
+val_27	11
+val_165	11
+val_409	11
+val_255	11
+val_278	11
+val_98	11
+val_484	11
+val_265	11
+val_193	11
+val_401	11
+val_150	11
+val_273	11
+val_224	11
+val_369	11
+val_66	11
+val_128	11
+val_213	11
+val_146	11
+val_406	11
+val_429	11
+val_374	11
+val_152	11
+val_469	11
+val_145	11
+val_495	11
+val_37	11
+val_327	11
+val_281	11
+val_277	11
+val_209	11
+val_15	11
+val_82	11
+val_403	11
+val_166	11
+val_417	11
+val_430	11
+val_252	11
+val_292	11
+val_219	11
+val_287	11
+val_153	11
+val_193	11
+val_338	11
+val_446	11
+val_459	11
+val_394	11
+val_237	11
+val_482	11
+val_174	11
+val_413	11
+val_494	11
+val_207	11
+val_199	11
+val_466	11
+val_208	11
+val_174	11
+val_399	11
+val_396	11
+val_247	11
+val_417	11
+val_489	11
+val_162	11
+val_377	11
+val_397	11
+val_309	11
+val_365	11
+val_266	11
+val_439	11
+val_342	11
+val_367	11
+val_325	11
+val_167	11
+val_195	11
+val_475	11
+val_17	11
+val_113	11
+val_155	11
+val_203	11
+val_339	11
+val_0	11
+val_455	11
+val_128	11
+val_311	11
+val_316	11
+val_57	11
+val_302	11
+val_205	11
+val_149	11
+val_438	11
+val_345	11
+val_129	11
+val_170	11
+val_20	11
+val_489	11
+val_157	11
+val_378	11
+val_221	11
+val_92	11
+val_111	11
+val_47	11
+val_72	11
+val_4	11
+val_280	11
+val_35	11
+val_427	11
+val_277	11
+val_208	11
+val_356	11
+val_399	11
+val_169	11
+val_382	11
+val_498	11
+val_125	11
+val_386	11
+val_437	11
+val_469	11
+val_192	11
+val_286	11
+val_187	11
+val_176	11
+val_54	11
+val_459	11
+val_51	11
+val_138	11
+val_103	11
+val_239	11
+val_213	11
+val_216	11
+val_430	11
+val_278	11
+val_176	11
+val_289	11
+val_221	11
+val_65	11
+val_318	11
+val_332	11
+val_311	11
+val_275	11
+val_137	11
+val_241	11
+val_83	11
+val_333	11
+val_180	11
+val_284	11
+val_12	11
+val_230	11
+val_181	11
+val_67	11
+val_260	11
+val_404	11
+val_384	11
+val_489	11
+val_353	11
+val_373	11
+val_272	11
+val_138	11
+val_217	11
+val_84	11
+val_348	11
+val_466	11
+val_58	11
+val_8	11
+val_411	11
+val_230	11
+val_208	11
+val_348	11
+val_24	11
+val_463	11
+val_431	11
+val_179	11
+val_172	11
+val_42	11
+val_129	11
+val_158	11
+val_119	11
+val_496	11
+val_0	11
+val_322	11
+val_197	11
+val_468	11
+val_393	11
+val_454	11
+val_100	11
+val_298	11
+val_199	11
+val_191	11
+val_418	11
+val_96	11
+val_26	11
+val_165	11
+val_327	11
+val_230	11
+val_205	11
+val_120	11
+val_131	11
+val_51	11
+val_404	11
+val_43	11
+val_436	11
+val_156	11
+val_469	11
+val_468	11
+val_308	11
+val_95	11
+val_196	11
+val_288	11
+val_481	11
+val_457	11
+val_98	11
+val_282	11
+val_197	11
+val_187	11
+val_318	11
+val_318	11
+val_409	11
+val_470	11
+val_137	11
+val_369	11
+val_316	11
+val_169	11
+val_413	11
+val_85	11
+val_77	11
+val_0	11
+val_490	11
+val_87	11
+val_364	11
+val_179	11
+val_118	11
+val_134	11
+val_395	11
+val_282	11
+val_138	11
+val_238	11
+val_419	11
+val_15	11
+val_118	11
+val_72	11
+val_90	11
+val_307	11
+val_19	11
+val_435	11
+val_10	11
+val_277	11
+val_273	11
+val_306	11
+val_224	11
+val_309	11
+val_389	11
+val_327	11
+val_242	11
+val_369	11
+val_392	11
+val_272	11
+val_331	11
+val_401	11
+val_242	11
+val_452	11
+val_177	11
+val_226	11
+val_5	11
+val_497	11
+val_402	11
+val_396	11
+val_317	11
+val_395	11
+val_58	11
+val_35	11
+val_336	11
+val_95	11
+val_11	11
+val_168	11
+val_34	11
+val_229	11
+val_233	11
+val_143	11
+val_472	11
+val_322	11
+val_498	11
+val_160	11
+val_195	11
+val_42	11
+val_321	11
+val_430	11
+val_119	11
+val_489	11
+val_458	11
+val_78	11
+val_76	11
+val_41	11
+val_223	11
+val_492	11
+val_149	11
+val_449	11
+val_218	11
+val_228	11
+val_138	11
+val_453	11
+val_30	11
+val_209	11
+val_64	11
+val_468	11
+val_76	11
+val_74	11
+val_342	11
+val_69	11
+val_230	11
+val_33	11
+val_368	11
+val_103	11
+val_296	11
+val_113	11
+val_216	11
+val_367	11
+val_344	11
+val_167	11
+val_274	11
+val_219	11
+val_239	11
+val_485	11
+val_116	11
+val_223	11
+val_256	11
+val_263	11
+val_70	11
+val_487	11
+val_480	11
+val_401	11
+val_288	11
+val_191	11
+val_5	11
+val_244	11
+val_438	11
+val_128	11
+val_467	11
+val_432	11
+val_202	11
+val_316	11
+val_229	11
+val_469	11
+val_463	11
+val_280	11
+val_2	11
+val_35	11
+val_283	11
+val_331	11
+val_235	11
+val_80	11
+val_44	11
+val_193	11
+val_321	11
+val_335	11
+val_104	11
+val_466	11
+val_366	11
+val_175	11
+val_403	11
+val_483	11
+val_53	11
+val_105	11
+val_257	11
+val_406	11
+val_409	11
+val_190	11
+val_406	11
+val_401	11
+val_114	11
+val_258	11
+val_90	11
+val_203	11
+val_262	11
+val_348	11
+val_424	11
+val_12	11
+val_396	11
+val_201	11
+val_217	11
+val_164	11
+val_431	11
+val_454	11
+val_478	11
+val_298	11
+val_125	11
+val_431	11
+val_164	11
+val_424	11
+val_187	11
+val_382	11
+val_5	11
+val_70	11
+val_397	11
+val_480	11
+val_291	11
+val_24	11
+val_351	11
+val_255	11
+val_104	11
+val_70	11
+val_163	11
+val_438	11
+val_119	11
+val_414	11
+val_200	11
+val_491	11
+val_237	11
+val_439	11
+val_360	11
+val_248	11
+val_479	11
+val_305	11
+val_417	11
+val_199	11
+val_444	11
+val_120	11
+val_429	11
+val_169	11
+val_443	11
+val_323	11
+val_325	11
+val_277	11
+val_230	11
+val_478	11
+val_178	11
+val_468	11
+val_310	11
+val_317	11
+val_333	11
+val_493	11
+val_460	11
+val_207	11
+val_249	11
+val_265	11
+val_480	11
+val_83	11
+val_136	11
+val_353	11
+val_172	11
+val_214	11
+val_462	11
+val_233	11
+val_406	11
+val_133	11
+val_175	11
+val_189	11
+val_454	11
+val_375	11
+val_401	11
+val_421	11
+val_407	11
+val_384	11
+val_256	11
+val_26	11
+val_134	11
+val_67	11
+val_384	11
+val_379	11
+val_18	11
+val_462	11
+val_492	11
+val_100	11
+val_298	11
+val_9	11
+val_341	11
+val_498	11
+val_146	11
+val_458	11
+val_362	11
+val_186	11
+val_285	11
+val_348	11
+val_167	11
+val_18	11
+val_273	11
+val_183	11
+val_281	11
+val_344	11
+val_97	11
+val_469	11
+val_315	11
+val_84	11
+val_28	11
+val_37	11
+val_448	11
+val_152	11
+val_348	11
+val_307	11
+val_194	11
+val_414	11
+val_477	11
+val_222	11
+val_126	11
+val_90	11
+val_169	11
+val_403	11
+val_400	11
+val_200	11
+val_97	11
diff --git a/sql/hive/src/test/resources/golden/decimal_1_1-3-ac24b36077314acab595ada14e598e b/sql/hive/src/test/resources/golden/decimal_1_1-3-ac24b36077314acab595ada14e598e
index 6944273be927c..f360c8c73ad1d 100644
--- a/sql/hive/src/test/resources/golden/decimal_1_1-3-ac24b36077314acab595ada14e598e
+++ b/sql/hive/src/test/resources/golden/decimal_1_1-3-ac24b36077314acab595ada14e598e
@@ -3,18 +3,18 @@
 -0.3
 -0.9
 -0.9
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
 0.1
 0.2
 0.3
diff --git a/sql/hive/src/test/resources/golden/decimal_1_1-4-128804f8dfe7dbb23be0498b91647ba3 b/sql/hive/src/test/resources/golden/decimal_1_1-4-128804f8dfe7dbb23be0498b91647ba3
index f4bf1446459a9..0fa3b15120f86 100644
--- a/sql/hive/src/test/resources/golden/decimal_1_1-4-128804f8dfe7dbb23be0498b91647ba3
+++ b/sql/hive/src/test/resources/golden/decimal_1_1-4-128804f8dfe7dbb23be0498b91647ba3
@@ -3,18 +3,18 @@
 0.3
 0.2
 0.1
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
-0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
+0.0
 -0.1
 -0.2
 -0.3
diff --git a/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e b/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e
index f59549a6e4a46..a298a84cb2c5a 100644
--- a/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e
+++ b/sql/hive/src/test/resources/golden/decimal_4-6-693c2e345731f9b2b547c3b75218458e
@@ -1,38 +1,38 @@
 NULL	0
--1234567890.123456789	-1234567890
--4400	4400
--1255.49	-1255
--1.122	-11
--1.12	-1
--1.12	-1
--0.333	0
--0.33	0
--0.3	0
-0	0
-0	0
-0	0
-0.01	0
-0.02	0
-0.1	0
-0.2	0
-0.3	0
-0.33	0
-0.333	0
+-1234567890.1234567890000000000000000	-1234567890
+-4400.0000000000000000000000000	4400
+-1255.4900000000000000000000000	-1255
+-1.1220000000000000000000000	-11
+-1.1200000000000000000000000	-1
+-1.1200000000000000000000000	-1
+-0.3330000000000000000000000	0
+-0.3300000000000000000000000	0
+-0.3000000000000000000000000	0
+0.0000000000000000000000000	0
+0.0000000000000000000000000	0
+0.0000000000000000000000000	0
+0.0100000000000000000000000	0
+0.0200000000000000000000000	0
+0.1000000000000000000000000	0
+0.2000000000000000000000000	0
+0.3000000000000000000000000	0
+0.3300000000000000000000000	0
+0.3330000000000000000000000	0
 0.9999999999999999999999999	1
-1	1
-1	1
-1.12	1
-1.122	1
-2	2
-2	2
-3.14	3
-3.14	3
-3.14	3
-3.14	4
-10	10
-20	20
-100	100
-124	124
-125.2	125
-200	200
-1234567890.12345678	1234567890
+1.0000000000000000000000000	1
+1.0000000000000000000000000	1
+1.1200000000000000000000000	1
+1.1220000000000000000000000	1
+2.0000000000000000000000000	2
+2.0000000000000000000000000	2
+3.1400000000000000000000000	3
+3.1400000000000000000000000	3
+3.1400000000000000000000000	3
+3.1400000000000000000000000	4
+10.0000000000000000000000000	10
+20.0000000000000000000000000	20
+100.0000000000000000000000000	100
+124.0000000000000000000000000	124
+125.2000000000000000000000000	125
+200.0000000000000000000000000	200
+1234567890.1234567800000000000000000	1234567890
diff --git a/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531 b/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531
index 6bada475c6d3d..60df68a2e3ab5 100644
--- a/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531
+++ b/sql/hive/src/test/resources/golden/decimal_4-7-f1eb45492510cb76cf6b452121af8531
@@ -1,38 +1,38 @@
 NULL	NULL
--1234567890.123456789	-3703703670.370370367
--4400	-13200
--1255.49	-3766.47
--1.122	-3.366
--1.12	-3.36
--1.12	-3.36
--0.333	-0.999
--0.33	-0.99
--0.3	-0.9
-0	0
-0	0
-0	0
-0.01	0.03
-0.02	0.06
-0.1	0.3
-0.2	0.6
-0.3	0.9
-0.33	0.99
-0.333	0.999
+-1234567890.1234567890000000000000000	-3703703670.3703703670000000000000000
+-4400.0000000000000000000000000	-13200.0000000000000000000000000
+-1255.4900000000000000000000000	-3766.4700000000000000000000000
+-1.1220000000000000000000000	-3.3660000000000000000000000
+-1.1200000000000000000000000	-3.3600000000000000000000000
+-1.1200000000000000000000000	-3.3600000000000000000000000
+-0.3330000000000000000000000	-0.9990000000000000000000000
+-0.3300000000000000000000000	-0.9900000000000000000000000
+-0.3000000000000000000000000	-0.9000000000000000000000000
+0.0000000000000000000000000	0.0000000000000000000000000
+0.0000000000000000000000000	0.0000000000000000000000000
+0.0000000000000000000000000	0.0000000000000000000000000
+0.0100000000000000000000000	0.0300000000000000000000000
+0.0200000000000000000000000	0.0600000000000000000000000
+0.1000000000000000000000000	0.3000000000000000000000000
+0.2000000000000000000000000	0.6000000000000000000000000
+0.3000000000000000000000000	0.9000000000000000000000000
+0.3300000000000000000000000	0.9900000000000000000000000
+0.3330000000000000000000000	0.9990000000000000000000000
 0.9999999999999999999999999	2.9999999999999999999999997
-1	3
-1	3
-1.12	3.36
-1.122	3.366
-2	6
-2	6
-3.14	9.42
-3.14	9.42
-3.14	9.42
-3.14	9.42
-10	30
-20	60
-100	300
-124	372
-125.2	375.6
-200	600
-1234567890.12345678	3703703670.37037034
+1.0000000000000000000000000	3.0000000000000000000000000
+1.0000000000000000000000000	3.0000000000000000000000000
+1.1200000000000000000000000	3.3600000000000000000000000
+1.1220000000000000000000000	3.3660000000000000000000000
+2.0000000000000000000000000	6.0000000000000000000000000
+2.0000000000000000000000000	6.0000000000000000000000000
+3.1400000000000000000000000	9.4200000000000000000000000
+3.1400000000000000000000000	9.4200000000000000000000000
+3.1400000000000000000000000	9.4200000000000000000000000
+3.1400000000000000000000000	9.4200000000000000000000000
+10.0000000000000000000000000	30.0000000000000000000000000
+20.0000000000000000000000000	60.0000000000000000000000000
+100.0000000000000000000000000	300.0000000000000000000000000
+124.0000000000000000000000000	372.0000000000000000000000000
+125.2000000000000000000000000	375.6000000000000000000000000
+200.0000000000000000000000000	600.0000000000000000000000000
+1234567890.1234567800000000000000000	3703703670.3703703400000000000000000
diff --git a/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429 b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
index 93cdc5c85645c..a26c8b7d12886 100644
--- a/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
+++ b/sql/hive/src/test/resources/golden/serde_regex-10-c5b3ec90419a40660e5f83736241c429
@@ -1,38 +1,38 @@
 NULL	0
--1234567890.123456789	-1234567890
--4400	4400
--1255.49	-1255
--1.122	-11
--1.12	-1
--1.12	-1
--0.333	0
--0.33	0
--0.3	0
-0	0
-0	0
-0	0
-0.01	0
-0.02	0
-0.1	0
-0.2	0
-0.3	0
-0.33	0
-0.333	0
-1	1
-1	1
-1	1
-1.12	1
-1.122	1
-2	2
-2	2
-3.14	3
-3.14	3
-3.14	3
-3.14	4
-10	10
-20	20
-100	100
-124	124
-125.2	125
-200	200
-1234567890.12345678	1234567890
+-1234567890.123456789000000000	-1234567890
+-4400.000000000000000000	4400
+-1255.490000000000000000	-1255
+-1.122000000000000000	-11
+-1.120000000000000000	-1
+-1.120000000000000000	-1
+-0.333000000000000000	0
+-0.330000000000000000	0
+-0.300000000000000000	0
+0.000000000000000000	0
+0.000000000000000000	0
+0.000000000000000000	0
+0.010000000000000000	0
+0.020000000000000000	0
+0.100000000000000000	0
+0.200000000000000000	0
+0.300000000000000000	0
+0.330000000000000000	0
+0.333000000000000000	0
+1.000000000000000000	1
+1.000000000000000000	1
+1.000000000000000000	1
+1.120000000000000000	1
+1.122000000000000000	1
+2.000000000000000000	2
+2.000000000000000000	2
+3.140000000000000000	3
+3.140000000000000000	3
+3.140000000000000000	3
+3.140000000000000000	4
+10.000000000000000000	10
+20.000000000000000000	20
+100.000000000000000000	100
+124.000000000000000000	124
+125.200000000000000000	125
+200.000000000000000000	200
+1234567890.123456780000000000	1234567890
diff --git a/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-2-1e88e0ba414a00195f7ebf6b8600ac04 b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-2-1e88e0ba414a00195f7ebf6b8600ac04
index 62d71abc6fc7d..33ea4edf780a6 100644
--- a/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-2-1e88e0ba414a00195f7ebf6b8600ac04	
+++ b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-2-1e88e0ba414a00195f7ebf6b8600ac04	
@@ -3,7 +3,7 @@
 65536	32.68
 65536	33.45
 65536	58.86
-65536	75.7
+65536	75.70
 65536	83.48
 65537	NULL
 65537	4.49
@@ -57,9 +57,9 @@
 65548	75.39
 65548	77.24
 65549	NULL
-65549	13.3
+65549	13.30
 65549	28.93
-65549	50.6
+65549	50.60
 65549	55.04
 65549	64.91
 65549	76.06
@@ -70,7 +70,7 @@
 65550	33.01
 65550	57.63
 65550	91.38
-65550	96.9
+65550	96.90
 65551	NULL
 65551	39.43
 65551	73.93
@@ -99,7 +99,7 @@
 65559	29.55
 65559	56.06
 65559	73.94
-65559	83.5
+65559	83.50
 65560	NULL
 65560	16.86
 65560	21.81
@@ -128,7 +128,7 @@
 65565	NULL
 65565	81.72
 65566	NULL
-65566	7.8
+65566	7.80
 65567	NULL
 65568	NULL
 65568	21.79
@@ -136,14 +136,14 @@
 65569	NULL
 65570	NULL
 65570	17.09
-65570	18.2
+65570	18.20
 65570	25.57
 65570	45.23
-65570	76.8
+65570	76.80
 65571	NULL
 65571	26.64
 65571	40.68
-65571	82.5
+65571	82.50
 65572	NULL
 65572	22.64
 65572	43.49
@@ -156,9 +156,9 @@
 65574	31.28
 65574	38.54
 65575	NULL
-65575	17
+65575	17.00
 65575	32.85
-65575	83.4
+65575	83.40
 65576	NULL
 65576	2.04
 65576	4.88
@@ -166,7 +166,7 @@
 65577	NULL
 65578	NULL
 65578	16.01
-65578	41.1
+65578	41.10
 65578	51.36
 65578	54.35
 65578	58.78
@@ -188,7 +188,7 @@
 65582	NULL
 65582	1.23
 65582	9.35
-65582	96.6
+65582	96.60
 65583	NULL
 65583	28.07
 65583	50.57
@@ -218,7 +218,7 @@
 65588	98.33
 65589	NULL
 65589	49.49
-65589	72.3
+65589	72.30
 65589	74.83
 65589	94.73
 65590	NULL
@@ -240,8 +240,8 @@
 65595	NULL
 65595	8.76
 65595	67.56
-65595	72.7
-65595	89.6
+65595	72.70
+65595	89.60
 65595	90.24
 65596	NULL
 65596	12.72
@@ -252,7 +252,7 @@
 65597	37.41
 65597	69.05
 65598	NULL
-65598	63.3
+65598	63.30
 65599	NULL
 65599	0.56
 65599	4.93
@@ -283,7 +283,7 @@
 65605	NULL
 65606	NULL
 65606	7.51
-65606	24.8
+65606	24.80
 65606	57.69
 65606	67.94
 65606	87.16
@@ -294,9 +294,9 @@
 65607	75.86
 65607	91.52
 65608	NULL
-65608	48.9
+65608	48.90
 65608	69.42
-65608	87.9
+65608	87.90
 65609	NULL
 65610	NULL
 65610	7.59
@@ -309,7 +309,7 @@
 65611	64.89
 65612	NULL
 65612	16.05
-65612	25.1
+65612	25.10
 65612	52.64
 65613	NULL
 65614	NULL
@@ -317,17 +317,17 @@
 65614	94.47
 65615	NULL
 65615	10.79
-65615	39.4
+65615	39.40
 65615	99.88
 65616	NULL
-65616	75.2
+65616	75.20
 65617	NULL
 65617	18.51
 65617	47.45
-65617	64.9
+65617	64.90
 65618	NULL
 65618	10.06
-65618	16.6
+65618	16.60
 65618	81.99
 65618	88.38
 65619	NULL
@@ -348,20 +348,20 @@
 65622	28.37
 65622	50.08
 65622	74.31
-65622	88.6
-65622	93.7
+65622	88.60
+65622	93.70
 65623	NULL
 65623	30.83
 65623	31.22
 65623	39.74
 65623	48.51
 65623	95.58
-65623	97.2
+65623	97.20
 65624	NULL
 65624	58.02
 65624	65.31
 65624	70.08
-65624	93.3
+65624	93.30
 65625	NULL
 65625	20.61
 65625	42.86
@@ -377,13 +377,13 @@
 65628	NULL
 65628	14.83
 65628	30.43
-65628	37.8
+65628	37.80
 65628	74.31
 65628	83.26
 65629	NULL
 65629	19.33
 65629	58.81
-65629	72.9
+65629	72.90
 65630	NULL
 65630	72.13
 65631	NULL
@@ -412,7 +412,7 @@
 65637	48.88
 65637	93.41
 65638	NULL
-65638	11.2
+65638	11.20
 65638	19.13
 65639	NULL
 65640	NULL
@@ -477,20 +477,20 @@
 65654	26.73
 65654	29.85
 65654	37.74
-65654	37.8
+65654	37.80
 65654	53.55
 65654	88.23
 65655	NULL
 65655	77.41
 65656	NULL
-65656	14
+65656	14.00
 65656	14.96
 65656	53.27
 65656	64.44
 65656	82.67
 65657	NULL
 65657	11.93
-65657	26.4
+65657	26.40
 65657	64.39
 65657	65.01
 65658	NULL
@@ -506,8 +506,8 @@
 65659	NULL
 65659	8.95
 65659	46.57
-65659	53.8
-65659	94.3
+65659	53.80
+65659	94.30
 65659	94.69
 65659	95.71
 65659	99.87
@@ -517,7 +517,7 @@
 65661	NULL
 65661	5.24
 65661	8.06
-65661	26.8
+65661	26.80
 65661	68.98
 65662	NULL
 65662	59.92
@@ -531,10 +531,10 @@
 65663	94.16
 65664	NULL
 65664	11.46
-65664	27.6
+65664	27.60
 65664	34.71
 65664	38.42
-65664	45.4
+65664	45.40
 65664	55.82
 65664	97.64
 65665	NULL
@@ -543,13 +543,13 @@
 65666	83.95
 65667	NULL
 65667	13.96
-65667	63.9
+65667	63.90
 65667	97.87
 65668	NULL
 65669	NULL
 65669	1.76
 65669	16.95
-65669	38.6
+65669	38.60
 65669	54.25
 65669	93.79
 65670	NULL
@@ -561,12 +561,12 @@
 65671	8.65
 65671	52.05
 65672	NULL
-65672	52.6
-65672	58.1
+65672	52.60
+65672	58.10
 65672	64.09
 65672	75.27
 65673	NULL
-65673	0.9
+65673	0.90
 65673	33.27
 65673	43.81
 65673	87.78
@@ -576,7 +576,7 @@
 65675	24.19
 65675	35.33
 65675	35.78
-65675	79.9
+65675	79.90
 65675	83.09
 65675	87.36
 65676	NULL
@@ -591,19 +591,19 @@
 65677	87.67
 65678	NULL
 65678	8.72
-65678	33.9
+65678	33.90
 65679	NULL
 65679	64.15
 65680	NULL
 65680	1.01
 65680	34.08
 65680	54.11
-65680	55.3
+65680	55.30
 65680	65.88
 65681	NULL
 65681	35.45
 65681	41.57
-65681	61.3
+65681	61.30
 65681	71.17
 65681	75.85
 65682	NULL
@@ -641,7 +641,7 @@
 65691	28.47
 65691	56.02
 65691	58.01
-65691	69.8
+65691	69.80
 65691	76.98
 65692	NULL
 65692	54.76
@@ -655,19 +655,19 @@
 65694	NULL
 65694	58.23
 65694	82.24
-65694	88.5
+65694	88.50
 65695	NULL
 65695	57.33
 65695	59.96
 65695	77.09
 65696	NULL
 65696	17.35
-65696	40.3
+65696	40.30
 65696	54.02
 65697	NULL
 65697	3.18
 65697	50.01
-65697	67.9
+65697	67.90
 65697	86.79
 65697	90.16
 65698	NULL
@@ -685,9 +685,9 @@
 65701	1.81
 65701	6.35
 65702	NULL
-65702	37.6
+65702	37.60
 65702	55.68
-65702	79.5
+65702	79.50
 65703	NULL
 65703	37.18
 65703	40.81
@@ -708,23 +708,23 @@
 65706	55.94
 65706	72.87
 65707	NULL
-65707	76.2
+65707	76.20
 65708	NULL
 65708	1.29
 65709	NULL
 65709	5.64
 65709	49.79
 65710	NULL
-65710	86.7
+65710	86.70
 65711	NULL
 65711	8.66
 65711	50.26
 65711	71.89
 65711	78.69
-65711	96.1
+65711	96.10
 65712	NULL
 65712	30.27
-65712	34.7
+65712	34.70
 65712	49.69
 65712	53.65
 65713	NULL
@@ -739,11 +739,11 @@
 65715	39.62
 65715	54.79
 65715	81.28
-65715	89.4
+65715	89.40
 65716	NULL
-65716	9
+65716	9.00
 65716	10.07
-65716	33.4
+65716	33.40
 65716	71.53
 65716	85.93
 65717	NULL
@@ -758,10 +758,10 @@
 65719	NULL
 65719	51.13
 65719	66.85
-65719	82.1
+65719	82.10
 65720	NULL
 65720	2.72
-65720	18.8
+65720	18.80
 65720	22.34
 65720	62.04
 65721	NULL
@@ -775,7 +775,7 @@
 65722	1.76
 65722	38.82
 65723	NULL
-65723	39.9
+65723	39.90
 65724	NULL
 65724	10.52
 65724	36.05
@@ -784,7 +784,7 @@
 65724	85.52
 65725	NULL
 65726	NULL
-65726	6
+65726	6.00
 65726	60.46
 65727	NULL
 65727	19.81
@@ -796,7 +796,7 @@
 65729	NULL
 65730	NULL
 65730	1.35
-65730	30.6
+65730	30.60
 65730	81.44
 65731	NULL
 65731	24.48
@@ -810,14 +810,14 @@
 65733	20.72
 65733	88.46
 65733	93.45
-65733	99.8
+65733	99.80
 65734	NULL
 65734	31.71
 65735	NULL
 65735	12.67
 65735	61.16
 65736	NULL
-65736	28.9
+65736	28.90
 65736	48.54
 65736	86.51
 65737	NULL
@@ -828,10 +828,10 @@
 65738	NULL
 65738	30.94
 65738	82.32
-65738	95.1
+65738	95.10
 65739	NULL
 65739	74.77
-65739	92.4
+65739	92.40
 65740	NULL
 65740	7.49
 65740	58.65
@@ -840,9 +840,9 @@
 65742	6.61
 65742	43.84
 65743	NULL
-65743	26.6
+65743	26.60
 65743	52.65
-65743	62
+65743	62.00
 65744	NULL
 65744	46.98
 65745	NULL
@@ -853,11 +853,11 @@
 65746	36.74
 65746	93.21
 65746	97.52
-65746	98.1
+65746	98.10
 65747	NULL
 65747	11.16
 65747	15.07
-65747	21.8
+65747	21.80
 65747	39.77
 65747	52.77
 65747	71.87
@@ -865,7 +865,7 @@
 65748	29.49
 65749	NULL
 65749	15.14
-65749	45
+65749	45.00
 65749	65.49
 65749	73.24
 65750	NULL
@@ -888,12 +888,12 @@
 65755	NULL
 65755	11.23
 65755	22.44
-65755	64
+65755	64.00
 65755	67.54
 65755	76.75
 65755	81.44
 65755	90.08
-65755	96.8
+65755	96.80
 65756	NULL
 65756	1.45
 65756	11.81
@@ -907,10 +907,10 @@
 65758	25.62
 65758	56.56
 65758	60.88
-65758	94.9
+65758	94.90
 65759	NULL
 65759	10.63
-65759	14.1
+65759	14.10
 65759	47.54
 65759	92.81
 65760	NULL
@@ -920,17 +920,17 @@
 65761	NULL
 65762	NULL
 65762	5.49
-65762	45.7
+65762	45.70
 65762	77.96
-65762	87.5
+65762	87.50
 65763	NULL
 65763	0.72
-65763	43.8
+65763	43.80
 65763	86.43
 65763	87.99
 65764	NULL
 65764	31.41
-65764	57.1
+65764	57.10
 65765	NULL
 65765	88.52
 65765	88.56
@@ -938,7 +938,7 @@
 65766	37.06
 65766	66.34
 65766	86.53
-65766	98.9
+65766	98.90
 65767	NULL
 65767	90.88
 65767	95.57
@@ -950,14 +950,14 @@
 65769	70.52
 65769	91.49
 65770	NULL
-65770	51.9
+65770	51.90
 65771	NULL
 65771	6.15
-65771	7.5
+65771	7.50
 65772	NULL
 65773	NULL
 65773	3.81
-65773	18.2
+65773	18.20
 65773	30.49
 65773	47.09
 65773	53.09
@@ -966,7 +966,7 @@
 65774	NULL
 65774	45.74
 65774	45.97
-65774	48.8
+65774	48.80
 65774	56.84
 65774	94.77
 65775	NULL
@@ -975,7 +975,7 @@
 65775	66.68
 65775	98.43
 65776	NULL
-65776	18.7
+65776	18.70
 65776	28.47
 65776	49.73
 65776	98.87
@@ -993,7 +993,7 @@
 65778	95.69
 65779	NULL
 65779	11.87
-65779	28.2
+65779	28.20
 65779	39.48
 65779	45.61
 65779	64.41
@@ -1008,15 +1008,15 @@
 65782	30.24
 65782	34.31
 65782	76.14
-65782	81.9
+65782	81.90
 65783	NULL
 65783	46.34
 65783	51.08
 65783	52.43
 65783	62.58
-65783	77.4
+65783	77.40
 65784	NULL
-65784	15.7
+65784	15.70
 65784	31.35
 65784	68.18
 65784	93.95
@@ -1032,7 +1032,7 @@
 65787	31.19
 65787	64.88
 65788	NULL
-65788	16.1
+65788	16.10
 65788	21.81
 65788	25.77
 65789	NULL
@@ -1041,7 +1041,7 @@
 65789	52.49
 65789	83.18
 65789	92.74
-65789	96.9
+65789	96.90
 65790	NULL
 65790	46.91
 65790	84.87
diff --git a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 2-0-81bb7f49a55385878637c8aac4d08e5 b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 2-0-81bb7f49a55385878637c8aac4d08e5
index 9091a9156134c..207dababa0a50 100644
--- a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 2-0-81bb7f49a55385878637c8aac4d08e5	
+++ b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 2-0-81bb7f49a55385878637c8aac4d08e5	
@@ -18,12 +18,12 @@
 2013-03-01 09:11:58.703073	10.07	1
 2013-03-01 09:11:58.703073	10.07	1
 2013-03-01 09:11:58.703073	10.07	1
-2013-03-01 09:11:58.703074	37.8	1
-2013-03-01 09:11:58.703074	37.8	1
-2013-03-01 09:11:58.703074	37.8	1
-2013-03-01 09:11:58.703074	37.8	1
-2013-03-01 09:11:58.703074	37.8	1
-2013-03-01 09:11:58.703074	37.8	1
+2013-03-01 09:11:58.703074	37.80	1
+2013-03-01 09:11:58.703074	37.80	1
+2013-03-01 09:11:58.703074	37.80	1
+2013-03-01 09:11:58.703074	37.80	1
+2013-03-01 09:11:58.703074	37.80	1
+2013-03-01 09:11:58.703074	37.80	1
 2013-03-01 09:11:58.703075	5.64	1
 2013-03-01 09:11:58.703075	5.64	1
 2013-03-01 09:11:58.703075	5.64	1
@@ -59,11 +59,11 @@
 2013-03-01 09:11:58.70308	1.76	1
 2013-03-01 09:11:58.70308	1.76	1
 2013-03-01 09:11:58.70308	1.76	1
-2013-03-01 09:11:58.703081	67.9	1
-2013-03-01 09:11:58.703081	67.9	1
-2013-03-01 09:11:58.703081	67.9	1
-2013-03-01 09:11:58.703081	67.9	1
-2013-03-01 09:11:58.703081	67.9	1
+2013-03-01 09:11:58.703081	67.90	1
+2013-03-01 09:11:58.703081	67.90	1
+2013-03-01 09:11:58.703081	67.90	1
+2013-03-01 09:11:58.703081	67.90	1
+2013-03-01 09:11:58.703081	67.90	1
 2013-03-01 09:11:58.703082	37.25	1
 2013-03-01 09:11:58.703082	37.25	1
 2013-03-01 09:11:58.703082	37.25	1
@@ -148,9 +148,9 @@
 2013-03-01 09:11:58.703096	11.64	1
 2013-03-01 09:11:58.703096	11.64	1
 2013-03-01 09:11:58.703096	11.64	1
-2013-03-01 09:11:58.703097	0.9	1
-2013-03-01 09:11:58.703097	0.9	1
-2013-03-01 09:11:58.703097	0.9	1
+2013-03-01 09:11:58.703097	0.90	1
+2013-03-01 09:11:58.703097	0.90	1
+2013-03-01 09:11:58.703097	0.90	1
 2013-03-01 09:11:58.703098	1.35	1
 2013-03-01 09:11:58.703098	1.35	1
 2013-03-01 09:11:58.703098	1.35	1
@@ -210,27 +210,27 @@
 2013-03-01 09:11:58.70311	8.16	1
 2013-03-01 09:11:58.70311	8.16	1
 2013-03-01 09:11:58.70311	8.16	1
-2013-03-01 09:11:58.703111	18.8	1
-2013-03-01 09:11:58.703111	18.8	1
-2013-03-01 09:11:58.703111	18.8	1
-2013-03-01 09:11:58.703111	18.8	1
-2013-03-01 09:11:58.703111	18.8	1
-2013-03-01 09:11:58.703111	18.8	1
-2013-03-01 09:11:58.703111	18.8	1
+2013-03-01 09:11:58.703111	18.80	1
+2013-03-01 09:11:58.703111	18.80	1
+2013-03-01 09:11:58.703111	18.80	1
+2013-03-01 09:11:58.703111	18.80	1
+2013-03-01 09:11:58.703111	18.80	1
+2013-03-01 09:11:58.703111	18.80	1
+2013-03-01 09:11:58.703111	18.80	1
 2013-03-01 09:11:58.703112	13.29	1
 2013-03-01 09:11:58.703112	13.29	1
 2013-03-01 09:11:58.703112	13.29	1
 2013-03-01 09:11:58.703112	13.29	1
-2013-03-01 09:11:58.703113	21.8	1
-2013-03-01 09:11:58.703113	21.8	1
-2013-03-01 09:11:58.703113	21.8	1
-2013-03-01 09:11:58.703113	21.8	1
-2013-03-01 09:11:58.703113	21.8	1
-2013-03-01 09:11:58.703113	21.8	1
-2013-03-01 09:11:58.703113	21.8	1
-2013-03-01 09:11:58.703113	21.8	1
-2013-03-01 09:11:58.703113	21.8	1
-2013-03-01 09:11:58.703113	21.8	1
+2013-03-01 09:11:58.703113	21.80	1
+2013-03-01 09:11:58.703113	21.80	1
+2013-03-01 09:11:58.703113	21.80	1
+2013-03-01 09:11:58.703113	21.80	1
+2013-03-01 09:11:58.703113	21.80	1
+2013-03-01 09:11:58.703113	21.80	1
+2013-03-01 09:11:58.703113	21.80	1
+2013-03-01 09:11:58.703113	21.80	1
+2013-03-01 09:11:58.703113	21.80	1
+2013-03-01 09:11:58.703113	21.80	1
 2013-03-01 09:11:58.703114	73.94	1
 2013-03-01 09:11:58.703114	73.94	1
 2013-03-01 09:11:58.703114	73.94	1
@@ -256,14 +256,14 @@
 2013-03-01 09:11:58.703118	8.69	1
 2013-03-01 09:11:58.703119	58.02	1
 2013-03-01 09:11:58.703119	58.02	1
-2013-03-01 09:11:58.70312	52.6	1
-2013-03-01 09:11:58.70312	52.6	1
-2013-03-01 09:11:58.70312	52.6	1
-2013-03-01 09:11:58.70312	52.6	1
-2013-03-01 09:11:58.703121	96.9	1
-2013-03-01 09:11:58.703121	96.9	1
-2013-03-01 09:11:58.703121	96.9	1
-2013-03-01 09:11:58.703121	96.9	1
+2013-03-01 09:11:58.70312	52.60	1
+2013-03-01 09:11:58.70312	52.60	1
+2013-03-01 09:11:58.70312	52.60	1
+2013-03-01 09:11:58.70312	52.60	1
+2013-03-01 09:11:58.703121	96.90	1
+2013-03-01 09:11:58.703121	96.90	1
+2013-03-01 09:11:58.703121	96.90	1
+2013-03-01 09:11:58.703121	96.90	1
 2013-03-01 09:11:58.703122	53.56	1
 2013-03-01 09:11:58.703122	53.56	1
 2013-03-01 09:11:58.703122	53.56	1
@@ -310,11 +310,11 @@
 2013-03-01 09:11:58.703133	27.34	1
 2013-03-01 09:11:58.703133	27.34	1
 2013-03-01 09:11:58.703133	27.34	1
-2013-03-01 09:11:58.703134	98.9	1
-2013-03-01 09:11:58.703134	98.9	1
-2013-03-01 09:11:58.703134	98.9	1
-2013-03-01 09:11:58.703134	98.9	1
-2013-03-01 09:11:58.703134	98.9	1
+2013-03-01 09:11:58.703134	98.90	1
+2013-03-01 09:11:58.703134	98.90	1
+2013-03-01 09:11:58.703134	98.90	1
+2013-03-01 09:11:58.703134	98.90	1
+2013-03-01 09:11:58.703134	98.90	1
 2013-03-01 09:11:58.703135	29.14	1
 2013-03-01 09:11:58.703135	29.14	1
 2013-03-01 09:11:58.703135	29.14	1
@@ -467,12 +467,12 @@
 2013-03-01 09:11:58.703162	3.51	1
 2013-03-01 09:11:58.703162	3.51	1
 2013-03-01 09:11:58.703162	3.51	1
-2013-03-01 09:11:58.703163	15.7	1
-2013-03-01 09:11:58.703163	15.7	1
-2013-03-01 09:11:58.703163	15.7	1
-2013-03-01 09:11:58.703163	15.7	1
-2013-03-01 09:11:58.703163	15.7	1
-2013-03-01 09:11:58.703163	15.7	1
+2013-03-01 09:11:58.703163	15.70	1
+2013-03-01 09:11:58.703163	15.70	1
+2013-03-01 09:11:58.703163	15.70	1
+2013-03-01 09:11:58.703163	15.70	1
+2013-03-01 09:11:58.703163	15.70	1
+2013-03-01 09:11:58.703163	15.70	1
 2013-03-01 09:11:58.703164	30.27	1
 2013-03-01 09:11:58.703164	30.27	1
 2013-03-01 09:11:58.703164	30.27	1
@@ -482,9 +482,9 @@
 2013-03-01 09:11:58.703165	8.38	1
 2013-03-01 09:11:58.703165	8.38	1
 2013-03-01 09:11:58.703165	8.38	1
-2013-03-01 09:11:58.703166	16.6	1
-2013-03-01 09:11:58.703166	16.6	1
-2013-03-01 09:11:58.703166	16.6	1
+2013-03-01 09:11:58.703166	16.60	1
+2013-03-01 09:11:58.703166	16.60	1
+2013-03-01 09:11:58.703166	16.60	1
 2013-03-01 09:11:58.703167	17.66	1
 2013-03-01 09:11:58.703167	17.66	1
 2013-03-01 09:11:58.703167	17.66	1
@@ -537,11 +537,11 @@
 2013-03-01 09:11:58.703175	33.37	1
 2013-03-01 09:11:58.703175	33.37	1
 2013-03-01 09:11:58.703175	33.37	1
-2013-03-01 09:11:58.703176	28.2	1
-2013-03-01 09:11:58.703176	28.2	1
-2013-03-01 09:11:58.703176	28.2	1
-2013-03-01 09:11:58.703176	28.2	1
-2013-03-01 09:11:58.703176	28.2	1
+2013-03-01 09:11:58.703176	28.20	1
+2013-03-01 09:11:58.703176	28.20	1
+2013-03-01 09:11:58.703176	28.20	1
+2013-03-01 09:11:58.703176	28.20	1
+2013-03-01 09:11:58.703176	28.20	1
 2013-03-01 09:11:58.703177	11.43	1
 2013-03-01 09:11:58.703177	11.43	1
 2013-03-01 09:11:58.703177	11.43	1
@@ -567,13 +567,13 @@
 2013-03-01 09:11:58.70318	10.28	1
 2013-03-01 09:11:58.70318	10.28	1
 2013-03-01 09:11:58.70318	10.28	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
 2013-03-01 09:11:58.703182	1.23	1
 2013-03-01 09:11:58.703182	1.23	1
 2013-03-01 09:11:58.703182	1.23	1
@@ -647,10 +647,10 @@
 2013-03-01 09:11:58.703197	16.01	1
 2013-03-01 09:11:58.703197	16.01	1
 2013-03-01 09:11:58.703197	16.01	1
-2013-03-01 09:11:58.703198	30.6	1
-2013-03-01 09:11:58.703198	30.6	1
-2013-03-01 09:11:58.703198	30.6	1
-2013-03-01 09:11:58.703198	30.6	1
+2013-03-01 09:11:58.703198	30.60	1
+2013-03-01 09:11:58.703198	30.60	1
+2013-03-01 09:11:58.703198	30.60	1
+2013-03-01 09:11:58.703198	30.60	1
 2013-03-01 09:11:58.703199	45.69	1
 2013-03-01 09:11:58.703199	45.69	1
 2013-03-01 09:11:58.703199	45.69	1
@@ -669,11 +669,11 @@
 2013-03-01 09:11:58.703203	11.63	1
 2013-03-01 09:11:58.703203	11.63	1
 2013-03-01 09:11:58.703203	11.63	1
-2013-03-01 09:11:58.703205	35.8	1
-2013-03-01 09:11:58.703205	35.8	1
-2013-03-01 09:11:58.703205	35.8	1
-2013-03-01 09:11:58.703205	35.8	1
-2013-03-01 09:11:58.703205	35.8	1
+2013-03-01 09:11:58.703205	35.80	1
+2013-03-01 09:11:58.703205	35.80	1
+2013-03-01 09:11:58.703205	35.80	1
+2013-03-01 09:11:58.703205	35.80	1
+2013-03-01 09:11:58.703205	35.80	1
 2013-03-01 09:11:58.703206	6.61	1
 2013-03-01 09:11:58.703206	6.61	1
 2013-03-01 09:11:58.703206	6.61	1
@@ -824,9 +824,9 @@
 2013-03-01 09:11:58.703233	40.81	1
 2013-03-01 09:11:58.703233	40.81	1
 2013-03-01 09:11:58.703233	40.81	1
-2013-03-01 09:11:58.703234	44.1	1
-2013-03-01 09:11:58.703234	44.1	1
-2013-03-01 09:11:58.703234	44.1	1
+2013-03-01 09:11:58.703234	44.10	1
+2013-03-01 09:11:58.703234	44.10	1
+2013-03-01 09:11:58.703234	44.10	1
 2013-03-01 09:11:58.703235	6.35	1
 2013-03-01 09:11:58.703235	6.35	1
 2013-03-01 09:11:58.703235	6.35	1
@@ -834,11 +834,11 @@
 2013-03-01 09:11:58.703235	6.35	1
 2013-03-01 09:11:58.703235	6.35	1
 2013-03-01 09:11:58.703235	6.35	1
-2013-03-01 09:11:58.703236	37.8	1
-2013-03-01 09:11:58.703236	37.8	1
-2013-03-01 09:11:58.703236	37.8	1
-2013-03-01 09:11:58.703236	37.8	1
-2013-03-01 09:11:58.703236	37.8	1
+2013-03-01 09:11:58.703236	37.80	1
+2013-03-01 09:11:58.703236	37.80	1
+2013-03-01 09:11:58.703236	37.80	1
+2013-03-01 09:11:58.703236	37.80	1
+2013-03-01 09:11:58.703236	37.80	1
 2013-03-01 09:11:58.703237	0.24	1
 2013-03-01 09:11:58.703237	0.24	1
 2013-03-01 09:11:58.703237	0.24	1
@@ -847,17 +847,17 @@
 2013-03-01 09:11:58.703237	0.24	1
 2013-03-01 09:11:58.703237	0.24	1
 2013-03-01 09:11:58.703237	0.24	1
-2013-03-01 09:11:58.703238	6	1
-2013-03-01 09:11:58.703238	6	1
-2013-03-01 09:11:58.703238	6	1
-2013-03-01 09:11:58.703238	6	1
-2013-03-01 09:11:58.703239	24.8	1
-2013-03-01 09:11:58.703239	24.8	1
-2013-03-01 09:11:58.703239	24.8	1
-2013-03-01 09:11:58.703239	24.8	1
-2013-03-01 09:11:58.703239	24.8	1
-2013-03-01 09:11:58.70324	5.1	1
-2013-03-01 09:11:58.70324	5.1	1
+2013-03-01 09:11:58.703238	6.00	1
+2013-03-01 09:11:58.703238	6.00	1
+2013-03-01 09:11:58.703238	6.00	1
+2013-03-01 09:11:58.703238	6.00	1
+2013-03-01 09:11:58.703239	24.80	1
+2013-03-01 09:11:58.703239	24.80	1
+2013-03-01 09:11:58.703239	24.80	1
+2013-03-01 09:11:58.703239	24.80	1
+2013-03-01 09:11:58.703239	24.80	1
+2013-03-01 09:11:58.70324	5.10	1
+2013-03-01 09:11:58.70324	5.10	1
 2013-03-01 09:11:58.703241	19.33	1
 2013-03-01 09:11:58.703241	19.33	1
 2013-03-01 09:11:58.703241	19.33	1
@@ -973,16 +973,16 @@
 2013-03-01 09:11:58.703262	1.81	1
 2013-03-01 09:11:58.703262	1.81	1
 2013-03-01 09:11:58.703262	1.81	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
 2013-03-01 09:11:58.703264	52.49	1
 2013-03-01 09:11:58.703264	52.49	1
 2013-03-01 09:11:58.703264	52.49	1
@@ -1068,12 +1068,12 @@
 2013-03-01 09:11:58.703281	19.95	1
 2013-03-01 09:11:58.703281	19.95	1
 2013-03-01 09:11:58.703281	19.95	1
-2013-03-01 09:11:58.703282	7.5	1
-2013-03-01 09:11:58.703282	7.5	1
-2013-03-01 09:11:58.703282	7.5	1
-2013-03-01 09:11:58.703282	7.5	1
-2013-03-01 09:11:58.703282	7.5	1
-2013-03-01 09:11:58.703282	7.5	1
+2013-03-01 09:11:58.703282	7.50	1
+2013-03-01 09:11:58.703282	7.50	1
+2013-03-01 09:11:58.703282	7.50	1
+2013-03-01 09:11:58.703282	7.50	1
+2013-03-01 09:11:58.703282	7.50	1
+2013-03-01 09:11:58.703282	7.50	1
 2013-03-01 09:11:58.703283	17.62	1
 2013-03-01 09:11:58.703283	17.62	1
 2013-03-01 09:11:58.703283	17.62	1
@@ -1153,12 +1153,12 @@
 2013-03-01 09:11:58.703297	25.67	1
 2013-03-01 09:11:58.703297	25.67	1
 2013-03-01 09:11:58.703297	25.67	1
-2013-03-01 09:11:58.703298	8.8	1
-2013-03-01 09:11:58.703298	8.8	1
-2013-03-01 09:11:58.703298	8.8	1
-2013-03-01 09:11:58.703298	8.8	1
-2013-03-01 09:11:58.703299	9	1
-2013-03-01 09:11:58.703299	9	1
+2013-03-01 09:11:58.703298	8.80	1
+2013-03-01 09:11:58.703298	8.80	1
+2013-03-01 09:11:58.703298	8.80	1
+2013-03-01 09:11:58.703298	8.80	1
+2013-03-01 09:11:58.703299	9.00	1
+2013-03-01 09:11:58.703299	9.00	1
 2013-03-01 09:11:58.7033	7.51	1
 2013-03-01 09:11:58.7033	7.51	1
 2013-03-01 09:11:58.7033	7.51	1
@@ -1217,12 +1217,12 @@
 2013-03-01 09:11:58.703311	7.38	1
 2013-03-01 09:11:58.703311	7.38	1
 2013-03-01 09:11:58.703311	7.38	1
-2013-03-01 09:11:58.703312	18.2	1
-2013-03-01 09:11:58.703312	18.2	1
-2013-03-01 09:11:58.703312	18.2	1
-2013-03-01 09:11:58.703312	18.2	1
-2013-03-01 09:11:58.703312	18.2	1
-2013-03-01 09:11:58.703312	18.2	1
+2013-03-01 09:11:58.703312	18.20	1
+2013-03-01 09:11:58.703312	18.20	1
+2013-03-01 09:11:58.703312	18.20	1
+2013-03-01 09:11:58.703312	18.20	1
+2013-03-01 09:11:58.703312	18.20	1
+2013-03-01 09:11:58.703312	18.20	1
 2013-03-01 09:11:58.703313	9.35	1
 2013-03-01 09:11:58.703313	9.35	1
 2013-03-01 09:11:58.703313	9.35	1
diff --git a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 4-0-12cc78f3953c3e6b5411ddc729541bf0 b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 4-0-12cc78f3953c3e6b5411ddc729541bf0
index d02ca48857b5f..a1628c7e1c0c5 100644
--- a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 4-0-12cc78f3953c3e6b5411ddc729541bf0	
+++ b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 4-0-12cc78f3953c3e6b5411ddc729541bf0	
@@ -46,9 +46,9 @@
 2013-03-01 09:11:58.703092	54.02	1
 2013-03-01 09:11:58.703092	54.02	1
 2013-03-01 09:11:58.703096	87.84	1
-2013-03-01 09:11:58.703097	0.9	1
-2013-03-01 09:11:58.703097	0.9	1
-2013-03-01 09:11:58.703097	0.9	1
+2013-03-01 09:11:58.703097	0.90	1
+2013-03-01 09:11:58.703097	0.90	1
+2013-03-01 09:11:58.703097	0.90	1
 2013-03-01 09:11:58.703098	21.29	1
 2013-03-01 09:11:58.703098	21.29	1
 2013-03-01 09:11:58.703098	21.29	1
@@ -88,10 +88,10 @@
 2013-03-01 09:11:58.703113	58.65	1
 2013-03-01 09:11:58.703118	8.69	1
 2013-03-01 09:11:58.703118	8.69	1
-2013-03-01 09:11:58.70312	52.6	1
-2013-03-01 09:11:58.70312	52.6	1
-2013-03-01 09:11:58.70312	52.6	1
-2013-03-01 09:11:58.70312	52.6	1
+2013-03-01 09:11:58.70312	52.60	1
+2013-03-01 09:11:58.70312	52.60	1
+2013-03-01 09:11:58.70312	52.60	1
+2013-03-01 09:11:58.70312	52.60	1
 2013-03-01 09:11:58.703125	78.52	1
 2013-03-01 09:11:58.703125	78.52	1
 2013-03-01 09:11:58.703125	78.52	1
@@ -119,11 +119,11 @@
 2013-03-01 09:11:58.703136	27.89	1
 2013-03-01 09:11:58.703136	27.89	1
 2013-03-01 09:11:58.703136	27.89	1
-2013-03-01 09:11:58.703138	86.7	1
-2013-03-01 09:11:58.703138	86.7	1
-2013-03-01 09:11:58.703138	86.7	1
-2013-03-01 09:11:58.703138	86.7	1
-2013-03-01 09:11:58.703138	86.7	1
+2013-03-01 09:11:58.703138	86.70	1
+2013-03-01 09:11:58.703138	86.70	1
+2013-03-01 09:11:58.703138	86.70	1
+2013-03-01 09:11:58.703138	86.70	1
+2013-03-01 09:11:58.703138	86.70	1
 2013-03-01 09:11:58.703139	43.53	1
 2013-03-01 09:11:58.703139	43.53	1
 2013-03-01 09:11:58.703139	43.53	1
@@ -167,13 +167,13 @@
 2013-03-01 09:11:58.703179	60.94	1
 2013-03-01 09:11:58.703179	60.94	1
 2013-03-01 09:11:58.703179	60.94	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
-2013-03-01 09:11:58.703181	26.6	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
+2013-03-01 09:11:58.703181	26.60	1
 2013-03-01 09:11:58.703184	73.93	1
 2013-03-01 09:11:58.703184	73.93	1
 2013-03-01 09:11:58.703184	73.93	1
@@ -202,12 +202,12 @@
 2013-03-01 09:11:58.703189	37.74	1
 2013-03-01 09:11:58.703189	37.74	1
 2013-03-01 09:11:58.703189	37.74	1
-2013-03-01 09:11:58.703195	82.5	1
-2013-03-01 09:11:58.703195	82.5	1
-2013-03-01 09:11:58.703195	82.5	1
-2013-03-01 09:11:58.703195	82.5	1
-2013-03-01 09:11:58.703195	82.5	1
-2013-03-01 09:11:58.703195	82.5	1
+2013-03-01 09:11:58.703195	82.50	1
+2013-03-01 09:11:58.703195	82.50	1
+2013-03-01 09:11:58.703195	82.50	1
+2013-03-01 09:11:58.703195	82.50	1
+2013-03-01 09:11:58.703195	82.50	1
+2013-03-01 09:11:58.703195	82.50	1
 2013-03-01 09:11:58.703198	97.18	1
 2013-03-01 09:11:58.703198	97.18	1
 2013-03-01 09:11:58.703198	97.18	1
@@ -233,10 +233,10 @@
 2013-03-01 09:11:58.70321	37.12	1
 2013-03-01 09:11:58.70321	37.12	1
 2013-03-01 09:11:58.70321	37.12	1
-2013-03-01 09:11:58.703213	48.8	1
-2013-03-01 09:11:58.703213	48.8	1
-2013-03-01 09:11:58.703213	48.8	1
-2013-03-01 09:11:58.703213	48.8	1
+2013-03-01 09:11:58.703213	48.80	1
+2013-03-01 09:11:58.703213	48.80	1
+2013-03-01 09:11:58.703213	48.80	1
+2013-03-01 09:11:58.703213	48.80	1
 2013-03-01 09:11:58.703219	32.73	1
 2013-03-01 09:11:58.703219	32.73	1
 2013-03-01 09:11:58.703219	32.73	1
@@ -253,30 +253,30 @@
 2013-03-01 09:11:58.703221	26.64	1
 2013-03-01 09:11:58.703221	26.64	1
 2013-03-01 09:11:58.703221	26.64	1
-2013-03-01 09:11:58.703223	57.1	1
-2013-03-01 09:11:58.703223	57.1	1
-2013-03-01 09:11:58.703223	57.1	1
-2013-03-01 09:11:58.703223	57.1	1
-2013-03-01 09:11:58.703223	57.1	1
-2013-03-01 09:11:58.703223	57.1	1
-2013-03-01 09:11:58.703223	57.1	1
+2013-03-01 09:11:58.703223	57.10	1
+2013-03-01 09:11:58.703223	57.10	1
+2013-03-01 09:11:58.703223	57.10	1
+2013-03-01 09:11:58.703223	57.10	1
+2013-03-01 09:11:58.703223	57.10	1
+2013-03-01 09:11:58.703223	57.10	1
+2013-03-01 09:11:58.703223	57.10	1
 2013-03-01 09:11:58.703224	42.93	1
 2013-03-01 09:11:58.703224	42.93	1
 2013-03-01 09:11:58.703224	42.93	1
 2013-03-01 09:11:58.703224	42.93	1
-2013-03-01 09:11:58.703226	68.3	1
-2013-03-01 09:11:58.703226	68.3	1
-2013-03-01 09:11:58.703226	68.3	1
-2013-03-01 09:11:58.703226	68.3	1
-2013-03-01 09:11:58.703226	68.3	1
-2013-03-01 09:11:58.703226	68.3	1
-2013-03-01 09:11:58.703231	18.7	1
-2013-03-01 09:11:58.703231	18.7	1
-2013-03-01 09:11:58.703231	18.7	1
-2013-03-01 09:11:58.703231	18.7	1
-2013-03-01 09:11:58.703231	18.7	1
-2013-03-01 09:11:58.703231	18.7	1
-2013-03-01 09:11:58.703231	18.7	1
+2013-03-01 09:11:58.703226	68.30	1
+2013-03-01 09:11:58.703226	68.30	1
+2013-03-01 09:11:58.703226	68.30	1
+2013-03-01 09:11:58.703226	68.30	1
+2013-03-01 09:11:58.703226	68.30	1
+2013-03-01 09:11:58.703226	68.30	1
+2013-03-01 09:11:58.703231	18.70	1
+2013-03-01 09:11:58.703231	18.70	1
+2013-03-01 09:11:58.703231	18.70	1
+2013-03-01 09:11:58.703231	18.70	1
+2013-03-01 09:11:58.703231	18.70	1
+2013-03-01 09:11:58.703231	18.70	1
+2013-03-01 09:11:58.703231	18.70	1
 2013-03-01 09:11:58.703233	40.81	1
 2013-03-01 09:11:58.703233	40.81	1
 2013-03-01 09:11:58.703233	40.81	1
@@ -295,24 +295,24 @@
 2013-03-01 09:11:58.703244	25.67	1
 2013-03-01 09:11:58.703244	25.67	1
 2013-03-01 09:11:58.703244	25.67	1
-2013-03-01 09:11:58.703245	32.3	1
-2013-03-01 09:11:58.703245	32.3	1
-2013-03-01 09:11:58.703245	32.3	1
-2013-03-01 09:11:58.703245	32.3	1
-2013-03-01 09:11:58.703245	32.3	1
-2013-03-01 09:11:58.703245	32.3	1
-2013-03-01 09:11:58.703245	32.3	1
-2013-03-01 09:11:58.703245	32.3	1
-2013-03-01 09:11:58.703245	32.3	1
+2013-03-01 09:11:58.703245	32.30	1
+2013-03-01 09:11:58.703245	32.30	1
+2013-03-01 09:11:58.703245	32.30	1
+2013-03-01 09:11:58.703245	32.30	1
+2013-03-01 09:11:58.703245	32.30	1
+2013-03-01 09:11:58.703245	32.30	1
+2013-03-01 09:11:58.703245	32.30	1
+2013-03-01 09:11:58.703245	32.30	1
+2013-03-01 09:11:58.703245	32.30	1
 2013-03-01 09:11:58.703246	72.87	1
 2013-03-01 09:11:58.703246	72.87	1
 2013-03-01 09:11:58.703248	81.28	1
 2013-03-01 09:11:58.703248	81.28	1
 2013-03-01 09:11:58.703248	81.28	1
-2013-03-01 09:11:58.703249	93.3	1
-2013-03-01 09:11:58.703249	93.3	1
-2013-03-01 09:11:58.703249	93.3	1
-2013-03-01 09:11:58.703249	93.3	1
+2013-03-01 09:11:58.703249	93.30	1
+2013-03-01 09:11:58.703249	93.30	1
+2013-03-01 09:11:58.703249	93.30	1
+2013-03-01 09:11:58.703249	93.30	1
 2013-03-01 09:11:58.70325	93.79	1
 2013-03-01 09:11:58.70325	93.79	1
 2013-03-01 09:11:58.70325	93.79	1
@@ -324,12 +324,12 @@
 2013-03-01 09:11:58.703254	0.32	1
 2013-03-01 09:11:58.703254	0.32	1
 2013-03-01 09:11:58.703254	0.32	1
-2013-03-01 09:11:58.703256	43.8	1
-2013-03-01 09:11:58.703256	43.8	1
-2013-03-01 09:11:58.703256	43.8	1
-2013-03-01 09:11:58.703256	43.8	1
-2013-03-01 09:11:58.703256	43.8	1
-2013-03-01 09:11:58.703256	43.8	1
+2013-03-01 09:11:58.703256	43.80	1
+2013-03-01 09:11:58.703256	43.80	1
+2013-03-01 09:11:58.703256	43.80	1
+2013-03-01 09:11:58.703256	43.80	1
+2013-03-01 09:11:58.703256	43.80	1
+2013-03-01 09:11:58.703256	43.80	1
 2013-03-01 09:11:58.703258	21.21	1
 2013-03-01 09:11:58.703258	21.21	1
 2013-03-01 09:11:58.703258	21.21	1
@@ -342,16 +342,16 @@
 2013-03-01 09:11:58.703262	78.56	1
 2013-03-01 09:11:58.703262	78.56	1
 2013-03-01 09:11:58.703262	78.56	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
-2013-03-01 09:11:58.703263	14.4	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
+2013-03-01 09:11:58.703263	14.40	1
 2013-03-01 09:11:58.703264	52.49	1
 2013-03-01 09:11:58.703264	52.49	1
 2013-03-01 09:11:58.703264	52.49	1
@@ -438,10 +438,10 @@
 2013-03-01 09:11:58.703299	23.19	1
 2013-03-01 09:11:58.703299	23.19	1
 2013-03-01 09:11:58.703299	23.19	1
-2013-03-01 09:11:58.703309	89.4	1
-2013-03-01 09:11:58.703309	89.4	1
-2013-03-01 09:11:58.703309	89.4	1
-2013-03-01 09:11:58.703309	89.4	1
+2013-03-01 09:11:58.703309	89.40	1
+2013-03-01 09:11:58.703309	89.40	1
+2013-03-01 09:11:58.703309	89.40	1
+2013-03-01 09:11:58.703309	89.40	1
 2013-03-01 09:11:58.70331	69.26	1
 2013-03-01 09:11:58.70331	69.26	1
 2013-03-01 09:11:58.70331	69.26	1
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q
index 7d3c0dc7d5859..c0a7a368a0768 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/infer_bucket_sort_list_bucket.q
@@ -20,7 +20,7 @@ SELECT key, count(*) FROM src GROUP BY key;
 
 DESC FORMATTED list_bucketing_table PARTITION (part = '1');
 
--- create a table skewed on a key which doesnt exist in the data
+-- create a table skewed on a key which doesn't exist in the data
 CREATE TABLE list_bucketing_table2 (key STRING, value STRING) 
 PARTITIONED BY (part STRING) 
 SKEWED BY (key) ON ("abc")
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
index a75758a0728d5..f92cf24dede8b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
@@ -1,4 +1,4 @@
--- HIVE-3300 [jira] LOAD DATA INPATH fails if a hdfs file with same name is added to table
+-- HIVE-3300 [jira] LOAD DATA INPATH fails if an hdfs file with same name is added to table
 -- 'loader' table is used only for uploading kv1.txt to HDFS (!hdfs -put is not working on minMRDriver)
 
 create table result (key string, value string);
diff --git a/sql/hive/src/test/resources/test_script.py b/sql/hive/src/test/resources/test_script.py
new file mode 100644
index 0000000000000..82ef7b38f0c1b
--- /dev/null
+++ b/sql/hive/src/test/resources/test_script.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+for line in sys.stdin:
+    (a, b, c, d, e) = line.split('\t')
+    sys.stdout.write('\t'.join([a, b, c, d, e]))
+    sys.stdout.flush()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
index 3226e3a5f318a..3f806ad24ca10 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
@@ -21,11 +21,10 @@ import scala.concurrent.duration._
 
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox
 
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{Column, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
-import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.hive.execution.TestingTypedCount
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.internal.SQLConf
@@ -44,9 +43,10 @@ import org.apache.spark.sql.types.LongType
  *      Results will be written to "benchmarks/ObjectHashAggregateExecBenchmark-results.txt".
  * }}}
  */
-object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper {
+object ObjectHashAggregateExecBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = TestHive.sparkSession
 
-  private val spark: SparkSession = TestHive.sparkSession
   private val sql = spark.sql _
   import spark.implicits._
 
@@ -70,13 +70,13 @@ object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper {
 
     benchmark.addCase("hive udaf w/o group by") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
-        sql("SELECT hive_percentile_approx(id, 0.5) FROM t").collect()
+        sql("SELECT hive_percentile_approx(id, 0.5) FROM t").noop()
       }
     }
 
     benchmark.addCase("spark af w/o group by") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "true") {
-        sql("SELECT percentile_approx(id, 0.5) FROM t").collect()
+        sql("SELECT percentile_approx(id, 0.5) FROM t").noop()
       }
     }
 
@@ -84,14 +84,14 @@ object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper {
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
         sql(
           s"SELECT hive_percentile_approx(id, 0.5) FROM t GROUP BY CAST(id / ${N / 4} AS BIGINT)"
-        ).collect()
+        ).noop()
       }
     }
 
     benchmark.addCase("spark af w/ group by w/o fallback") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "true") {
         sql(s"SELECT percentile_approx(id, 0.5) FROM t GROUP BY CAST(id / ${N / 4} AS BIGINT)")
-          .collect()
+          .noop()
       }
     }
 
@@ -100,7 +100,7 @@ object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper {
         SQLConf.USE_OBJECT_HASH_AGG.key -> "true",
         SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "2") {
         sql(s"SELECT percentile_approx(id, 0.5) FROM t GROUP BY CAST(id / ${N / 4} AS BIGINT)")
-          .collect()
+          .noop()
       }
     }
 
@@ -125,13 +125,13 @@ object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper {
 
     benchmark.addCase("sort agg w/ group by") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
-        df.groupBy($"id" < (N / 2)).agg(typed_count($"id")).collect()
+        df.groupBy($"id" < (N / 2)).agg(typed_count($"id")).noop()
       }
     }
 
     benchmark.addCase("object agg w/ group by w/o fallback") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "true") {
-        df.groupBy($"id" < (N / 2)).agg(typed_count($"id")).collect()
+        df.groupBy($"id" < (N / 2)).agg(typed_count($"id")).noop()
       }
     }
 
@@ -139,19 +139,19 @@ object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper {
       withSQLConf(
         SQLConf.USE_OBJECT_HASH_AGG.key -> "true",
         SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "2") {
-        df.groupBy($"id" < (N / 2)).agg(typed_count($"id")).collect()
+        df.groupBy($"id" < (N / 2)).agg(typed_count($"id")).noop()
       }
     }
 
     benchmark.addCase("sort agg w/o group by") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
-        df.select(typed_count($"id")).collect()
+        df.select(typed_count($"id")).noop()
       }
     }
 
     benchmark.addCase("object agg w/o group by w/o fallback") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "true") {
-        df.select(typed_count($"id")).collect()
+        df.select(typed_count($"id")).noop()
       }
     }
 
@@ -173,13 +173,13 @@ object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper {
 
     benchmark.addCase("sort agg w/ group by") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
-        df.groupBy($"id" / (N / 4) cast LongType).agg(percentile_approx($"id", 0.5)).collect()
+        df.groupBy($"id" / (N / 4) cast LongType).agg(percentile_approx($"id", 0.5)).noop()
       }
     }
 
     benchmark.addCase("object agg w/ group by w/o fallback") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "true") {
-        df.groupBy($"id" / (N / 4) cast LongType).agg(percentile_approx($"id", 0.5)).collect()
+        df.groupBy($"id" / (N / 4) cast LongType).agg(percentile_approx($"id", 0.5)).noop()
       }
     }
 
@@ -187,19 +187,19 @@ object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper {
       withSQLConf(
         SQLConf.USE_OBJECT_HASH_AGG.key -> "true",
         SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "2") {
-        df.groupBy($"id" / (N / 4) cast LongType).agg(percentile_approx($"id", 0.5)).collect()
+        df.groupBy($"id" / (N / 4) cast LongType).agg(percentile_approx($"id", 0.5)).noop()
       }
     }
 
     benchmark.addCase("sort agg w/o group by") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
-        df.select(percentile_approx($"id", 0.5)).collect()
+        df.select(percentile_approx($"id", 0.5)).noop()
       }
     }
 
     benchmark.addCase("object agg w/o group by w/o fallback") { _ =>
       withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "true") {
-        df.select(percentile_approx($"id", 0.5)).collect()
+        df.select(percentile_approx($"id", 0.5)).noop()
       }
     }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index fcf0b4591ff84..7b3fb68174234 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -216,7 +216,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
 
     // Drop the table and create it again.
     sql("DROP TABLE refreshTable")
-    sparkSession.catalog.createExternalTable("refreshTable", tempPath.toString, "parquet")
+    sparkSession.catalog.createTable("refreshTable", tempPath.toString, "parquet")
     // It is not cached.
     assert(!isCached("refreshTable"), "refreshTable should not be cached.")
     // Refresh the table. REFRESH TABLE command should not make a uncached
@@ -237,7 +237,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     tempPath.delete()
     table("src").write.mode(SaveMode.Overwrite).parquet(tempPath.toString)
     sql("DROP TABLE IF EXISTS refreshTable")
-    sparkSession.catalog.createExternalTable("refreshTable", tempPath.toString, "parquet")
+    sparkSession.catalog.createTable("refreshTable", tempPath.toString, "parquet")
     checkAnswer(
       table("refreshTable"),
       table("src").collect())
@@ -256,7 +256,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
 
     // Drop the table and create it again.
     sql("DROP TABLE refreshTable")
-    sparkSession.catalog.createExternalTable("refreshTable", tempPath.toString, "parquet")
+    sparkSession.catalog.createTable("refreshTable", tempPath.toString, "parquet")
     // It is not cached.
     assert(!isCached("refreshTable"), "refreshTable should not be cached.")
     // Refresh the table. REFRESH command should not make a uncached
@@ -300,7 +300,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         val e = intercept[ParseException] {
           sql(s"CACHE TABLE $db.cachedTable AS SELECT 1")
         }.getMessage
-        assert(e.contains("It is not allowed to add database prefix ") &&
+        assert(e.contains("It is not allowed to add catalog/namespace prefix ") &&
           e.contains("to the table name in CACHE TABLE AS SELECT"))
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
deleted file mode 100644
index a80db765846e9..0000000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import org.scalatest.BeforeAndAfterEach
-
-import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
-
-
-class HiveContextCompatibilitySuite extends SparkFunSuite with BeforeAndAfterEach {
-
-  override protected val enableAutoThreadAudit = false
-  private var sc: SparkContext = null
-  private var hc: HiveContext = null
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sc = SparkContext.getOrCreate(new SparkConf().setMaster("local").setAppName("test"))
-    HiveUtils.newTemporaryConfiguration(useInMemoryDerby = true).foreach { case (k, v) =>
-      sc.hadoopConfiguration.set(k, v)
-    }
-    hc = new HiveContext(sc)
-  }
-
-  override def afterEach(): Unit = {
-    try {
-      hc.sharedState.cacheManager.clearCache()
-      hc.sessionState.catalog.reset()
-    } finally {
-      super.afterEach()
-    }
-  }
-
-  override def afterAll(): Unit = {
-    try {
-      sc = null
-      hc = null
-    } finally {
-      super.afterAll()
-    }
-  }
-
-  test("basic operations") {
-    val _hc = hc
-    import _hc.implicits._
-    val df1 = (1 to 20).map { i => (i, i) }.toDF("a", "x")
-    val df2 = (1 to 100).map { i => (i, i % 10, i % 2 == 0) }.toDF("a", "b", "c")
-      .select($"a", $"b")
-      .filter($"a" > 10 && $"b" > 6 && $"c")
-    val df3 = df1.join(df2, "a")
-    val res = df3.collect()
-    val expected = Seq((18, 18, 8)).toDF("a", "x", "b").collect()
-    assert(res.toSeq == expected.toSeq)
-    df3.createOrReplaceTempView("mai_table")
-    val df4 = hc.table("mai_table")
-    val res2 = df4.collect()
-    assert(res2.toSeq == expected.toSeq)
-  }
-
-  test("basic DDLs") {
-    val _hc = hc
-    import _hc.implicits._
-    val databases = hc.sql("SHOW DATABASES").collect().map(_.getString(0))
-    assert(databases.toSeq == Seq("default"))
-    hc.sql("CREATE DATABASE mee_db")
-    hc.sql("USE mee_db")
-    val databases2 = hc.sql("SHOW DATABASES").collect().map(_.getString(0))
-    assert(databases2.toSet == Set("default", "mee_db"))
-    val df = (1 to 10).map { i => ("bob" + i.toString, i) }.toDF("name", "age")
-    df.createOrReplaceTempView("mee_table")
-    hc.sql("CREATE TABLE moo_table (name string, age int)")
-    hc.sql("INSERT INTO moo_table SELECT * FROM mee_table")
-    assert(
-      hc.sql("SELECT * FROM moo_table order by name").collect().toSeq ==
-      df.collect().toSeq.sortBy(_.getString(0)))
-    val tables = hc.sql("SHOW TABLES IN mee_db").select("tableName").collect().map(_.getString(0))
-    assert(tables.toSet == Set("moo_table", "mee_table"))
-    hc.sql("DROP TABLE moo_table")
-    hc.sql("DROP TABLE mee_table")
-    val tables2 = hc.sql("SHOW TABLES IN mee_db").select("tableName").collect().map(_.getString(0))
-    assert(tables2.isEmpty)
-    hc.sql("USE default")
-    hc.sql("DROP DATABASE mee_db CASCADE")
-    val databases3 = hc.sql("SHOW DATABASES").collect().map(_.getString(0))
-    assert(databases3.toSeq == Seq("default"))
-  }
-
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index 0a522b6a11c80..79e569b51ca1d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -108,9 +108,50 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite {
     assert(bucketString.contains("10"))
   }
 
+  test("SPARK-30050: analyze/rename table should not erase the bucketing metadata at hive side") {
+    val catalog = newBasicCatalog()
+    externalCatalog.client.runSqlHive(
+      """
+        |CREATE TABLE db1.t(a string, b string)
+        |CLUSTERED BY (a, b) SORTED BY (a, b) INTO 10 BUCKETS
+        |STORED AS PARQUET
+      """.stripMargin)
+
+    val bucketString1 = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t")
+      .filter(_.contains("Num Buckets")).head
+    assert(bucketString1.contains("10"))
+
+    catalog.alterTableStats("db1", "t", None)
+
+    val bucketString2 = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t")
+      .filter(_.contains("Num Buckets")).head
+    assert(bucketString2.contains("10"))
+
+    catalog.renameTable("db1", "t", "t2")
+
+    val bucketString3 = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t2")
+      .filter(_.contains("Num Buckets")).head
+    assert(bucketString3.contains("10"))
+  }
+
   test("SPARK-23001: NullPointerException when running desc database") {
     val catalog = newBasicCatalog()
     catalog.createDatabase(newDb("dbWithNullDesc").copy(description = null), ignoreIfExists = false)
     assert(catalog.getDatabase("dbWithNullDesc").description == "")
   }
+
+  test("SPARK-29498 CatalogTable to HiveTable should not change the table's ownership") {
+    val catalog = newBasicCatalog()
+    val owner = "SPARK-29498"
+    val hiveTable = CatalogTable(
+      identifier = TableIdentifier("spark_29498", Some("db1")),
+      tableType = CatalogTableType.MANAGED,
+      storage = storageFormat,
+      owner = owner,
+      schema = new StructType().add("i", "int"),
+      provider = Some("hive"))
+
+    catalog.createTable(hiveTable, ignoreIfExists = false)
+    assert(catalog.getTable("db1", "spark_29498").owner === owner)
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 7d9030b8f87ed..3b5a1247bc09c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -237,6 +237,7 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
       Source.fromURL(s"${releaseMirror}/spark").mkString
         .split("\n")
         .filter(_.contains("""<li><a href="spark-"""))
+        .filterNot(_.contains("preview"))
         .map("""<a href="spark-(\d.\d.\d)/">""".r.findFirstMatchIn(_).get.group(1))
         .filter(_ < org.apache.spark.SPARK_VERSION)
     } catch {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
index c300660458fdd..5912992694e84 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.io.LongWritable
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{Row, TestUserClassUDT}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
@@ -214,6 +214,12 @@ class HiveInspectorSuite extends SparkFunSuite with HiveInspectors {
     })
   }
 
+  test("wrap / unwrap UDT Type") {
+    val dt = new TestUserClassUDT
+    checkValue(1, unwrap(wrap(1, toInspector(dt), dt), toInspector(dt)))
+    checkValue(null, unwrap(wrap(null, toInspector(dt), dt), toInspector(dt)))
+  }
+
   test("wrap / unwrap Struct Type") {
     val dt = StructType(dataTypes.zipWithIndex.map {
       case (t, idx) => StructField(s"c_$idx", t)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
index e71aba72c31fe..94a55b911f092 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
@@ -38,7 +38,7 @@ class HiveMetadataCacheSuite extends QueryTest with SQLTestUtils with TestHiveSi
     checkRefreshView(isTemp = false)
   }
 
-  private def checkRefreshView(isTemp: Boolean) {
+  private def checkRefreshView(isTemp: Boolean): Unit = {
     withView("view_refresh") {
       withTable("view_table") {
         // Create a Parquet directory
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 0e45e18c4b175..b8ef44b096eed 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.{QueryTest, Row, SaveMode}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -46,7 +46,7 @@ class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils {
   test("duplicated metastore relations") {
     val df = spark.sql("SELECT * FROM src")
     logInfo(df.queryExecution.toString)
-    df.as('a).join(df.as('b), $"a.key" === $"b.key")
+    df.as("a").join(df.as("b"), $"a.key" === $"b.key")
   }
 
   test("should not truncate struct type catalog string") {
@@ -62,7 +62,7 @@ class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils {
       spark.sql("create view vw1 as select 1 as id")
       val plan = spark.sql("select id from vw1").queryExecution.analyzed
       val aliases = plan.collect {
-        case x @ SubqueryAlias(AliasIdentifier("vw1", Some("default")), _) => x
+        case x @ SubqueryAlias(AliasIdentifier("vw1", Seq("default")), _) => x
       }
       assert(aliases.size == 1)
     }
@@ -142,8 +142,8 @@ class DataSourceWithHiveMetastoreCatalogSuite
   import testImplicits._
 
   private val testDF = range(1, 3).select(
-    ('id + 0.1) cast DecimalType(10, 3) as 'd1,
-    'id cast StringType as 'd2
+    ($"id" + 0.1) cast DecimalType(10, 3) as "d1",
+    $"id" cast StringType as "d2"
   ).coalesce(1)
 
   override def beforeAll(): Unit = {
@@ -358,4 +358,24 @@ class DataSourceWithHiveMetastoreCatalogSuite
         Seq(table("src").count().toString))
     }
   }
+
+  test("SPARK-29869: Fix convertToLogicalRelation throws unclear AssertionError") {
+    withTempPath(dir => {
+      val baseDir = s"${dir.getCanonicalFile.toURI.toString}/non_partition_table"
+      val partitionLikeDir = s"$baseDir/dt=20191113"
+      spark.range(3).selectExpr("id").write.parquet(partitionLikeDir)
+      withTable("non_partition_table") {
+        withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "true") {
+          spark.sql(
+            s"""
+               |CREATE TABLE non_partition_table (id bigint)
+               |STORED AS PARQUET LOCATION '$baseDir'
+               |""".stripMargin)
+          val e = intercept[AnalysisException](
+            spark.table("non_partition_table")).getMessage
+          assert(e.contains("Converted table has 2 columns, but source Hive table has 1 columns."))
+        }
+      }
+    })
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetMetastoreSuite.scala
index 5f3705d07bcad..0bdaa0c23c537 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetMetastoreSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetMetastoreSuite.scala
@@ -440,7 +440,7 @@ class HiveParquetMetastoreSuite extends ParquetPartitioningTest {
     def checkCached(tableIdentifier: TableIdentifier): Unit = {
       // Converted test_parquet should be cached.
       getCachedDataSourceTable(tableIdentifier) match {
-        case null => fail("Converted test_parquet should be cached in the cache.")
+        case null => fail(s"Converted ${tableIdentifier.table} should be cached in the cache.")
         case LogicalRelation(_: HadoopFsRelation, _, _, _) => // OK
         case other =>
           fail(
@@ -480,7 +480,7 @@ class HiveParquetMetastoreSuite extends ParquetPartitioningTest {
         |INSERT INTO TABLE test_insert_parquet
         |select a, b from jt
       """.stripMargin)
-    checkCached(tableIdentifier)
+    assert(getCachedDataSourceTable(tableIdentifier) === null)
     // Make sure we can read the data.
     checkAnswer(
       sql("select * from test_insert_parquet"),
@@ -512,14 +512,16 @@ class HiveParquetMetastoreSuite extends ParquetPartitioningTest {
         |PARTITION (`date`='2015-04-01')
         |select a, b from jt
       """.stripMargin)
-    checkCached(tableIdentifier)
+    // Right now, insert into a partitioned data source Parquet table. We refreshed the table.
+    // So, we expect it is not cached.
+    assert(getCachedDataSourceTable(tableIdentifier) === null)
     sql(
       """
         |INSERT INTO TABLE test_parquet_partitioned_cache_test
         |PARTITION (`date`='2015-04-02')
         |select a, b from jt
       """.stripMargin)
-    checkCached(tableIdentifier)
+    assert(getCachedDataSourceTable(tableIdentifier) === null)
 
     // Make sure we can cache the partitioned table.
     table("test_parquet_partitioned_cache_test")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
index de588768cfdee..b557fe73f1154 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.hive
 
 import java.io.File
+import java.io.IOException
 
 import org.apache.spark.sql.{Row, SaveMode}
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
@@ -212,7 +213,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
       val filePath2 = new File(tempDir, "testParquet2").getCanonicalPath
 
       val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str")
-      val df2 = df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("y.str").max("y.int")
+      val df2 = df.as("x").join(df.as("y"), $"x.str" === $"y.str").groupBy("y.str").max("y.int")
       intercept[Throwable](df2.write.parquet(filePath))
 
       val df3 = df2.toDF("str", "max_int")
@@ -222,4 +223,158 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
       assert(df4.columns === Array("str", "max_int"))
     }
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+    Seq("true", "false").foreach { parquetConversion =>
+      withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> parquetConversion) {
+        withTempPath { path =>
+          withTable("parq_tbl1", "parq_tbl2", "parq_tbl3",
+            "tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+            val parquetTblStatement1 =
+              s"""
+                 |CREATE EXTERNAL TABLE parq_tbl1(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS parquet
+                 |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
+            sql(parquetTblStatement1)
+
+            val parquetTblInsertL1 =
+              s"INSERT INTO TABLE parq_tbl1 VALUES (1, 1, 'parq1'), (2, 2, 'parq2')".stripMargin
+            sql(parquetTblInsertL1)
+
+            val parquetTblStatement2 =
+              s"""
+                 |CREATE EXTERNAL TABLE parq_tbl2(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS parquet
+                 |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin
+            sql(parquetTblStatement2)
+
+            val parquetTblInsertL2 =
+              s"INSERT INTO TABLE parq_tbl2 VALUES (3, 3, 'parq3'), (4, 4, 'parq4')".stripMargin
+            sql(parquetTblInsertL2)
+
+            val parquetTblStatement3 =
+              s"""
+                 |CREATE EXTERNAL TABLE parq_tbl3(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS parquet
+                 |LOCATION '${s"${path.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin
+            sql(parquetTblStatement3)
+
+            val parquetTblInsertL3 =
+              s"INSERT INTO TABLE parq_tbl3 VALUES (5, 5, 'parq5'), (6, 6, 'parq6')".stripMargin
+            sql(parquetTblInsertL3)
+
+            val topDirStatement =
+              s"""
+                 |CREATE EXTERNAL TABLE tbl1(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS parquet
+                 |LOCATION '${s"${path.getCanonicalPath}"}'""".stripMargin
+            sql(topDirStatement)
+            if (parquetConversion == "true") {
+              checkAnswer(sql("SELECT * FROM tbl1"), Nil)
+            } else {
+              val msg = intercept[IOException] {
+                sql("SELECT * FROM tbl1").show()
+              }.getMessage
+              assert(msg.contains("Not a file:"))
+            }
+
+            val l1DirStatement =
+              s"""
+                 |CREATE EXTERNAL TABLE tbl2(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS parquet
+                 |LOCATION '${s"${path.getCanonicalPath}/l1/"}'""".stripMargin
+            sql(l1DirStatement)
+            if (parquetConversion == "true") {
+              checkAnswer(sql("SELECT * FROM tbl2"), (1 to 2).map(i => Row(i, i, s"parq$i")))
+            } else {
+              val msg = intercept[IOException] {
+                sql("SELECT * FROM tbl2").show()
+              }.getMessage
+              assert(msg.contains("Not a file:"))
+            }
+
+            val l2DirStatement =
+              s"""
+                 |CREATE EXTERNAL TABLE tbl3(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS parquet
+                 |LOCATION '${s"${path.getCanonicalPath}/l1/l2/"}'""".stripMargin
+            sql(l2DirStatement)
+            if (parquetConversion == "true") {
+              checkAnswer(sql("SELECT * FROM tbl3"), (3 to 4).map(i => Row(i, i, s"parq$i")))
+            } else {
+              val msg = intercept[IOException] {
+                sql("SELECT * FROM tbl3").show()
+              }.getMessage
+              assert(msg.contains("Not a file:"))
+            }
+
+            val wildcardTopDirStatement =
+              s"""
+                 |CREATE EXTERNAL TABLE tbl4(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS parquet
+                 |LOCATION '${new File(s"${path}/*").toURI}'""".stripMargin
+            sql(wildcardTopDirStatement)
+            if (parquetConversion == "true") {
+              checkAnswer(sql("SELECT * FROM tbl4"), (1 to 2).map(i => Row(i, i, s"parq$i")))
+            } else {
+              val msg = intercept[IOException] {
+                sql("SELECT * FROM tbl4").show()
+              }.getMessage
+              assert(msg.contains("Not a file:"))
+            }
+
+            val wildcardL1DirStatement =
+              s"""
+                 |CREATE EXTERNAL TABLE tbl5(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS parquet
+                 |LOCATION '${new File(s"${path}/l1/*").toURI}'""".stripMargin
+            sql(wildcardL1DirStatement)
+            if (parquetConversion == "true") {
+              checkAnswer(sql("SELECT * FROM tbl5"), (1 to 4).map(i => Row(i, i, s"parq$i")))
+            } else {
+              val msg = intercept[IOException] {
+                sql("SELECT * FROM tbl5").show()
+              }.getMessage
+              assert(msg.contains("Not a file:"))
+            }
+
+            val wildcardL2DirStatement =
+              s"""
+                 |CREATE EXTERNAL TABLE tbl6(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS parquet
+                 |LOCATION '${new File(s"${path}/l1/l2/*").toURI}'""".stripMargin
+            sql(wildcardL2DirStatement)
+            checkAnswer(sql("SELECT * FROM tbl6"), (3 to 6).map(i => Row(i, i, s"parq$i")))
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
index 0386dc79804c6..e5d572c90af38 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
@@ -18,10 +18,42 @@
 package org.apache.spark.sql.hive
 
 import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 
 class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton {
 
+  private var origCreateHiveTableConfig = false
+
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    origCreateHiveTableConfig =
+      SQLConf.get.getConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED)
+    SQLConf.get.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED, true)
+  }
+
+  protected override def afterAll(): Unit = {
+    SQLConf.get.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED,
+      origCreateHiveTableConfig)
+    super.afterAll()
+  }
+
+  test("view") {
+    withView("v1") {
+      sql("CREATE VIEW v1 AS SELECT 1 AS a")
+      checkCreateHiveTableOrView("v1", "VIEW")
+    }
+  }
+
+  test("view  with output columns") {
+    withView("v1") {
+      sql("CREATE VIEW v1 (b) AS SELECT 1 AS a")
+      checkCreateHiveTableOrView("v1", "VIEW")
+    }
+  }
+
   test("simple hive table") {
     withTable("t1") {
       sql(
@@ -36,7 +68,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
          """.stripMargin
       )
 
-      checkCreateTable("t1")
+      checkCreateHiveTableOrView("t1")
     }
   }
 
@@ -56,7 +88,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
            """.stripMargin
         )
 
-        checkCreateTable("t1")
+        checkCreateHiveTableOrView("t1")
       }
     }
   }
@@ -76,7 +108,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
          """.stripMargin
       )
 
-      checkCreateTable("t1")
+      checkCreateHiveTableOrView("t1")
     }
   }
 
@@ -94,7 +126,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
          """.stripMargin
       )
 
-      checkCreateTable("t1")
+      checkCreateHiveTableOrView("t1")
     }
   }
 
@@ -109,7 +141,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
          """.stripMargin
       )
 
-      checkCreateTable("t1")
+      checkCreateHiveTableOrView("t1")
     }
   }
 
@@ -131,7 +163,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
          """.stripMargin
       )
 
-      checkCreateTable("t1")
+      checkCreateHiveTableOrView("t1")
     }
   }
 
@@ -144,7 +176,7 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
            |INTO 2 BUCKETS
          """.stripMargin
       )
-      checkCreateTable("t1")
+      checkCreateHiveTableOrView("t1")
     }
   }
 
@@ -172,22 +204,44 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
         }
 
         assert(cause.getMessage.contains(" - partitioned view"))
+
+        val causeForSpark = intercept[AnalysisException] {
+          sql("SHOW CREATE TABLE v1 AS SERDE")
+        }
+
+        assert(causeForSpark.getMessage.contains(" - partitioned view"))
       }
     }
   }
 
   test("SPARK-24911: keep quotes for nested fields in hive") {
     withTable("t1") {
-      val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>)"
+      val createTable = "CREATE TABLE `t1` (`a` STRUCT<`b`: STRING>) USING hive"
       sql(createTable)
-      val shownDDL = sql(s"SHOW CREATE TABLE t1")
-        .head()
-        .getString(0)
-        .split("\n")
-        .head
-      assert(shownDDL == createTable)
+      val shownDDL = getShowDDL("SHOW CREATE TABLE t1")
+      assert(shownDDL == createTable.dropRight(" USING hive".length))
 
-      checkCreateTable("t1")
+      checkCreateHiveTableOrView("t1")
+    }
+  }
+
+  /**
+   * This method compares the given table with the table created by the DDL generated by
+   * `SHOW CREATE TABLE AS SERDE`.
+   */
+  private def checkCreateHiveTableOrView(tableName: String, checkType: String = "TABLE"): Unit = {
+    val table = TableIdentifier(tableName, Some("default"))
+    val db = table.database.getOrElse("default")
+    val expected = spark.sharedState.externalCatalog.getTable(db, table.table)
+    val shownDDL = sql(s"SHOW CREATE TABLE ${table.quotedString} AS SERDE").head().getString(0)
+    sql(s"DROP $checkType ${table.quotedString}")
+
+    try {
+      sql(shownDDL)
+      val actual = spark.sharedState.externalCatalog.getTable(db, table.table)
+      checkCatalogTables(expected, actual)
+    } finally {
+      sql(s"DROP $checkType IF EXISTS ${table.table}")
     }
   }
 
@@ -195,4 +249,269 @@ class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSinglet
     hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
       .client.runSqlHive(ddl)
   }
+
+  private def checkCreateSparkTableAsHive(tableName: String): Unit = {
+    val table = TableIdentifier(tableName, Some("default"))
+    val db = table.database.get
+    val hiveTable = spark.sharedState.externalCatalog.getTable(db, table.table)
+    val sparkDDL = sql(s"SHOW CREATE TABLE ${table.quotedString}").head().getString(0)
+    // Drops original Hive table.
+    sql(s"DROP TABLE ${table.quotedString}")
+
+    try {
+      // Creates Spark datasource table using generated Spark DDL.
+      sql(sparkDDL)
+      val sparkTable = spark.sharedState.externalCatalog.getTable(db, table.table)
+      checkHiveCatalogTables(hiveTable, sparkTable)
+    } finally {
+      sql(s"DROP TABLE IF EXISTS ${table.table}")
+    }
+  }
+
+  private def checkHiveCatalogTables(hiveTable: CatalogTable, sparkTable: CatalogTable): Unit = {
+    def normalize(table: CatalogTable): CatalogTable = {
+      val nondeterministicProps = Set(
+        "CreateTime",
+        "transient_lastDdlTime",
+        "grantTime",
+        "lastUpdateTime",
+        "last_modified_by",
+        "last_modified_time",
+        "Owner:",
+        // The following are hive specific schema parameters which we do not need to match exactly.
+        "totalNumberFiles",
+        "maxFileSize",
+        "minFileSize"
+      )
+
+      table.copy(
+        createTime = 0L,
+        lastAccessTime = 0L,
+        properties = table.properties.filterKeys(!nondeterministicProps.contains(_)),
+        stats = None,
+        ignoredProperties = Map.empty,
+        storage = table.storage.copy(properties = Map.empty),
+        provider = None,
+        tracksPartitionsInCatalog = false
+      )
+    }
+
+    def fillSerdeFromProvider(table: CatalogTable): CatalogTable = {
+      table.provider.flatMap(HiveSerDe.sourceToSerDe(_)).map { hiveSerde =>
+        val newStorage = table.storage.copy(
+          inputFormat = hiveSerde.inputFormat,
+          outputFormat = hiveSerde.outputFormat,
+          serde = hiveSerde.serde
+        )
+        table.copy(storage = newStorage)
+      }.getOrElse(table)
+    }
+
+    assert(normalize(fillSerdeFromProvider(sparkTable)) == normalize(hiveTable))
+  }
+
+  test("simple hive table in Spark DDL") {
+    withTable("t1") {
+      sql(
+        s"""
+           |CREATE TABLE t1 (
+           |  c1 STRING COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |TBLPROPERTIES (
+           |  'prop1' = 'value1',
+           |  'prop2' = 'value2'
+           |)
+           |STORED AS orc
+         """.stripMargin
+      )
+
+      checkCreateSparkTableAsHive("t1")
+    }
+  }
+
+  test("show create table as serde can't work on data source table") {
+    withTable("t1") {
+      sql(
+        s"""
+           |CREATE TABLE t1 (
+           |  c1 STRING COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |USING orc
+         """.stripMargin
+      )
+
+      val cause = intercept[AnalysisException] {
+        checkCreateHiveTableOrView("t1")
+      }
+
+      assert(cause.getMessage.contains("Use `SHOW CREATE TABLE` without `AS SERDE` instead"))
+    }
+  }
+
+  test("simple external hive table in Spark DDL") {
+    withTempDir { dir =>
+      withTable("t1") {
+        sql(
+          s"""
+             |CREATE TABLE t1 (
+             |  c1 STRING COMMENT 'bla',
+             |  c2 STRING
+             |)
+             |LOCATION '${dir.toURI}'
+             |TBLPROPERTIES (
+             |  'prop1' = 'value1',
+             |  'prop2' = 'value2'
+             |)
+             |STORED AS orc
+           """.stripMargin
+        )
+
+        checkCreateSparkTableAsHive("t1")
+      }
+    }
+  }
+
+  test("hive table with STORED AS clause in Spark DDL") {
+    withTable("t1") {
+      sql(
+        s"""
+           |CREATE TABLE t1 (
+           |  c1 INT COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |STORED AS PARQUET
+         """.stripMargin
+      )
+
+      checkCreateSparkTableAsHive("t1")
+    }
+  }
+
+  test("hive table with nested fields with STORED AS clause in Spark DDL") {
+    withTable("t1") {
+      sql(
+        s"""
+           |CREATE TABLE t1 (
+           |  c1 INT COMMENT 'bla',
+           |  c2 STRING,
+           |  c3 STRUCT <s1: INT, s2: STRING>
+           |)
+           |STORED AS PARQUET
+         """.stripMargin
+      )
+
+      checkCreateSparkTableAsHive("t1")
+    }
+  }
+
+  test("hive table with unsupported fileformat in Spark DDL") {
+    withTable("t1") {
+      sql(
+        s"""
+           |CREATE TABLE t1 (
+           |  c1 INT COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |STORED AS RCFILE
+         """.stripMargin
+      )
+
+      val cause = intercept[AnalysisException] {
+        checkCreateSparkTableAsHive("t1")
+      }
+
+      assert(cause.getMessage.contains("unsupported serde configuration"))
+    }
+  }
+
+  test("hive table with serde info in Spark DDL") {
+    withTable("t1") {
+      sql(
+        s"""
+           |CREATE TABLE t1 (
+           |  c1 INT COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+           |STORED AS
+           |  INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+           |  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+         """.stripMargin
+      )
+
+      checkCreateSparkTableAsHive("t1")
+    }
+  }
+
+  test("hive view is not supported by show create table without as serde") {
+    withTable("t1") {
+      withView("v1") {
+        sql("CREATE TABLE t1 (c1 STRING, c2 STRING)")
+
+        createRawHiveTable(
+          s"""
+             |CREATE VIEW v1
+             |AS SELECT * from t1
+           """.stripMargin
+        )
+
+        val cause = intercept[AnalysisException] {
+          sql("SHOW CREATE TABLE v1")
+        }
+
+        assert(cause.getMessage.contains("view isn't supported"))
+      }
+    }
+  }
+
+  test("partitioned, bucketed hive table in Spark DDL") {
+    withTable("t1") {
+      sql(
+        s"""
+           |CREATE TABLE t1 (
+           |  emp_id INT COMMENT 'employee id', emp_name STRING,
+           |  emp_dob STRING COMMENT 'employee date of birth', emp_sex STRING COMMENT 'M/F'
+           |)
+           |COMMENT 'employee table'
+           |PARTITIONED BY (
+           |  emp_country STRING COMMENT '2-char code', emp_state STRING COMMENT '2-char code'
+           |)
+           |CLUSTERED BY (emp_sex) SORTED BY (emp_id ASC) INTO 10 BUCKETS
+           |STORED AS ORC
+         """.stripMargin
+      )
+
+      checkCreateSparkTableAsHive("t1")
+    }
+  }
+
+  test("show create table for transactional hive table") {
+    withTable("t1") {
+      sql(
+        s"""
+           |CREATE TABLE t1 (
+           |  c1 STRING COMMENT 'bla',
+           |  c2 STRING
+           |)
+           |TBLPROPERTIES (
+           |  'transactional' = 'true',
+           |  'prop1' = 'value1',
+           |  'prop2' = 'value2'
+           |)
+           |CLUSTERED BY (c1) INTO 10 BUCKETS
+           |STORED AS ORC
+         """.stripMargin
+      )
+
+
+      val cause = intercept[AnalysisException] {
+        sql("SHOW CREATE TABLE t1")
+      }
+
+      assert(cause.getMessage.contains(
+        "SHOW CREATE TABLE doesn't support transactional Hive table"))
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index d23e0f2e0d937..31ff62ed0a530 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -24,6 +24,7 @@ import scala.util.Properties
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.fs.Path
 import org.scalatest.{BeforeAndAfterEach, Matchers}
+import org.scalatest.Assertions._
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
@@ -33,7 +34,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.expressions.Window
-import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHiveContext}
+import org.apache.spark.sql.hive.test.{HiveTestJars, TestHiveContext}
 import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS
 import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
 import org.apache.spark.sql.types.{DecimalType, StructType}
@@ -52,7 +53,7 @@ class HiveSparkSubmitSuite
 
   override protected val enableAutoThreadAudit = false
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
   }
 
@@ -111,8 +112,8 @@ class HiveSparkSubmitSuite
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
     val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
-    val jar3 = HiveTestUtils.getHiveContribJar.getCanonicalPath
-    val jar4 = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
+    val jar3 = HiveTestJars.getHiveContribJar().getCanonicalPath
+    val jar4 = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
     val jarsString = Seq(jar1, jar2, jar3, jar4).map(j => j.toString).mkString(",")
     val args = Seq(
       "--class", SparkSubmitClassLoaderTest.getClass.getName.stripSuffix("$"),
@@ -321,7 +322,7 @@ class HiveSparkSubmitSuite
       "--master", "local-cluster[2,1,1024]",
       "--conf", "spark.ui.enabled=false",
       "--conf", "spark.master.rest.enabled=false",
-      "--jars", HiveTestUtils.getHiveContribJar.getCanonicalPath,
+      "--jars", HiveTestJars.getHiveContribJar().getCanonicalPath,
       unusedJar.toString)
     runSparkSubmit(argsForCreateTable)
 
@@ -454,7 +455,7 @@ object SetWarehouseLocationTest extends Logging {
 // and use this UDF. We need to run this test in separate JVM to make sure we
 // can load the jar defined with the function.
 object TemporaryHiveUDFTest extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     conf.set(UI_ENABLED, false)
@@ -463,7 +464,7 @@ object TemporaryHiveUDFTest extends Logging {
 
     // Load a Hive UDF from the jar.
     logInfo("Registering a temporary Hive UDF provided in a jar.")
-    val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
+    val jar = HiveTestJars.getHiveContribJar().getCanonicalPath
     hiveContext.sql(
       s"""
          |CREATE TEMPORARY FUNCTION example_max
@@ -492,7 +493,7 @@ object TemporaryHiveUDFTest extends Logging {
 // and use this UDF. We need to run this test in separate JVM to make sure we
 // can load the jar defined with the function.
 object PermanentHiveUDFTest1 extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     conf.set(UI_ENABLED, false)
@@ -501,7 +502,7 @@ object PermanentHiveUDFTest1 extends Logging {
 
     // Load a Hive UDF from the jar.
     logInfo("Registering a permanent Hive UDF provided in a jar.")
-    val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
+    val jar = HiveTestJars.getHiveContribJar().getCanonicalPath
     hiveContext.sql(
       s"""
          |CREATE FUNCTION example_max
@@ -530,7 +531,7 @@ object PermanentHiveUDFTest1 extends Logging {
 // resources can be used. We need to run this test in separate JVM to make sure we
 // can load the jar defined with the function.
 object PermanentHiveUDFTest2 extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     conf.set(UI_ENABLED, false)
@@ -538,7 +539,7 @@ object PermanentHiveUDFTest2 extends Logging {
     val hiveContext = new TestHiveContext(sc)
     // Load a Hive UDF from the jar.
     logInfo("Write the metadata of a permanent Hive UDF into metastore.")
-    val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
+    val jar = HiveTestJars.getHiveContribJar().getCanonicalPath
     val function = CatalogFunction(
       FunctionIdentifier("example_max"),
       "org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax",
@@ -565,7 +566,7 @@ object PermanentHiveUDFTest2 extends Logging {
 // This object is used for testing SPARK-8368: https://issues.apache.org/jira/browse/SPARK-8368.
 // We test if we can load user jars in both driver and executors when HiveContext is used.
 object SparkSubmitClassLoaderTest extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val hiveWarehouseLocation = Utils.createTempDir()
@@ -635,7 +636,7 @@ object SparkSubmitClassLoaderTest extends Logging {
 // This object is used for testing SPARK-8020: https://issues.apache.org/jira/browse/SPARK-8020.
 // We test if we can correctly set spark sql configurations when HiveContext is used.
 object SparkSQLConfTest extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     // We override the SparkConf to add spark.sql.hive.metastore.version and
     // spark.sql.hive.metastore.jars to the beginning of the conf entry array.
@@ -699,7 +700,7 @@ object SPARK_9757 extends QueryTest {
         val df =
           hiveContext
             .range(10)
-            .select(('id + 0.1) cast DecimalType(10, 3) as 'dec)
+            .select(($"id" + 0.1) cast DecimalType(10, 3) as "dec")
         df.write.option("path", dir.getCanonicalPath).mode("overwrite").saveAsTable("t")
         checkAnswer(hiveContext.table("t"), df)
       }
@@ -708,7 +709,7 @@ object SPARK_9757 extends QueryTest {
         val df =
           hiveContext
             .range(10)
-            .select(callUDF("struct", ('id + 0.2) cast DecimalType(10, 3)) as 'dec_struct)
+            .select(callUDF("struct", ($"id" + 0.2) cast DecimalType(10, 3)) as "dec_struct")
         df.write.option("path", dir.getCanonicalPath).mode("overwrite").saveAsTable("t")
         checkAnswer(hiveContext.table("t"), df)
       }
@@ -770,8 +771,8 @@ object SPARK_14244 extends QueryTest {
     import hiveContext.implicits._
 
     try {
-      val window = Window.orderBy('id)
-      val df = spark.range(2).select(cume_dist().over(window).as('cdist)).orderBy('cdist)
+      val window = Window.orderBy("id")
+      val df = spark.range(2).select(cume_dist().over(window).as("cdist")).orderBy("cdist")
       checkAnswer(df, Seq(Row(0.5D), Row(1.0D)))
     } finally {
       sparkContext.stop()
@@ -806,14 +807,14 @@ object SPARK_18360 {
       // Hive will use the value of `hive.metastore.warehouse.dir` to generate default table
       // location for tables in default database.
       assert(rawTable.storage.locationUri.map(
-        CatalogUtils.URIToString(_)).get.contains(newWarehousePath))
+        CatalogUtils.URIToString).get.contains(newWarehousePath))
       hiveClient.dropTable("default", "test_tbl", ignoreIfNotExists = false, purge = false)
 
       spark.sharedState.externalCatalog.createTable(tableMeta, ignoreIfExists = false)
       val readBack = spark.sharedState.externalCatalog.getTable("default", "test_tbl")
       // Spark SQL will use the location of default database to generate default table
       // location for tables in default database.
-      assert(readBack.storage.locationUri.map(CatalogUtils.URIToString(_))
+      assert(readBack.storage.locationUri.map(CatalogUtils.URIToString)
         .get.contains(defaultDbLocation))
     } finally {
       hiveClient.dropTable("default", "test_tbl", ignoreIfNotExists = true, purge = false)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala
new file mode 100644
index 0000000000000..ca1af73b038a7
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import scala.collection.JavaConverters._
+import scala.util.Random
+
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF
+import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, StandardListObjectInspector}
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT}
+import org.apache.spark.sql.types.StructType
+
+class HiveUserDefinedTypeSuite extends QueryTest with TestHiveSingleton {
+  private val functionClass = classOf[org.apache.spark.sql.hive.TestUDF].getCanonicalName
+
+  test("Support UDT in Hive UDF") {
+    val functionName = "get_point_x"
+    try {
+      val schema = new StructType().add("point", new ExamplePointUDT, nullable = false)
+      val input = Row.fromSeq(Seq(new ExamplePoint(3.141592d, -3.141592d)))
+      val df = spark.createDataFrame(Array(input).toList.asJava, schema)
+      df.createOrReplaceTempView("src")
+      spark.sql(s"CREATE FUNCTION $functionName AS '$functionClass'")
+
+      checkAnswer(
+        spark.sql(s"SELECT $functionName(point) FROM src"),
+        Row(input.getAs[ExamplePoint](0).x))
+    } finally {
+      // If the test failed part way, we don't want to mask the failure by failing to remove
+      // temp tables that never got created.
+      spark.sql(s"DROP FUNCTION IF EXISTS $functionName")
+      assert(
+        !spark.sessionState.catalog.functionExists(FunctionIdentifier(functionName)),
+        s"Function $functionName should have been dropped. But, it still exists.")
+    }
+  }
+}
+
+class TestUDF extends GenericUDF {
+  private var data: StandardListObjectInspector = _
+
+  override def getDisplayString(children: Array[String]): String = "get_point_x"
+
+  override def initialize(arguments: Array[ObjectInspector]): ObjectInspector = {
+    data = arguments(0).asInstanceOf[StandardListObjectInspector]
+    PrimitiveObjectInspectorFactory.javaDoubleObjectInspector
+  }
+
+  override def evaluate(arguments: Array[GenericUDF.DeferredObject]): AnyRef = {
+    val point = data.getList(arguments(0).get())
+    java.lang.Double.valueOf(point.get(0).asInstanceOf[Double])
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
index daf06645abc24..4ad97eaa2b1c8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
@@ -54,6 +54,15 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton
     }
   }
 
+  test("newTemporaryConfiguration respect spark.hive.foo=bar in SparkConf") {
+    sys.props.put("spark.hive.foo", "bar")
+    Seq(true, false) foreach { useInMemoryDerby =>
+      val hiveConf = HiveUtils.newTemporaryConfiguration(useInMemoryDerby)
+      assert(!hiveConf.contains("spark.hive.foo"))
+      assert(hiveConf("hive.foo") === "bar")
+    }
+  }
+
   test("ChildFirstURLClassLoader's parent is null, get spark classloader instead") {
     val conf = new SparkConf
     val contextClassLoader = Thread.currentThread().getContextClassLoader
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index 73f5bbd88624e..421dcb499bd6a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -19,13 +19,13 @@ package org.apache.spark.sql.hive
 
 import java.io.File
 
+import com.google.common.io.Files
 import org.apache.hadoop.fs.Path
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{QueryTest, _}
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable
 import org.apache.spark.sql.hive.execution.InsertIntoHiveTable
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
@@ -462,7 +462,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
         // Columns `c + 1` and `d + 1` are resolved by position, and thus mapped to partition
         // columns `b` and `c` of the target table.
         val df = Seq((1, 2, 3, 4)).toDF("a", "b", "c", "d")
-        df.select('a + 1, 'b + 1, 'c + 1, 'd + 1).write.insertInto(tableName)
+        df.select($"a" + 1, $"b" + 1, $"c" + 1, $"d" + 1).write.insertInto(tableName)
 
         checkAnswer(
           sql(s"SELECT a, b, c, d FROM $tableName"),
@@ -556,7 +556,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
     val inputPath = new Path("/tmp/b/c")
     var stagingDir = "tmp/b"
     val saveHiveFile = InsertIntoHiveTable(null, Map.empty, null, false, false, null)
-    val getStagingDir = PrivateMethod[Path]('getStagingDir)
+    val getStagingDir = PrivateMethod[Path](Symbol("getStagingDir"))
     var path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
     assert(path.toString.indexOf("/tmp/b_hive_") != -1)
 
@@ -824,4 +824,27 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
       }
     }
   }
+
+  test("SPARK-30201 HiveOutputWriter standardOI should use ObjectInspectorCopyOption.DEFAULT") {
+    withTable("t1", "t2") {
+      withTempDir { dir =>
+        val file = new File(dir, "test.hex")
+        val hex = "AABBCC"
+        val bs = org.apache.commons.codec.binary.Hex.decodeHex(hex.toCharArray)
+        Files.write(bs, file)
+        val path = file.getParent
+        sql(s"create table t1 (c string) STORED AS TEXTFILE location '$path'")
+        checkAnswer(
+          sql("select hex(c) from t1"),
+          Row(hex)
+        )
+
+        sql("create table t2 as select c from t1")
+        checkAnswer(
+          sql("select hex(c) from t2"),
+          Row(hex)
+        )
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index ba807fb58fe40..41a26344f7c21 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -516,13 +516,13 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         }
 
         withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "json") {
-          sparkSession.catalog.createExternalTable("createdJsonTable", tempPath.toString)
+          sparkSession.catalog.createTable("createdJsonTable", tempPath.toString)
           assert(table("createdJsonTable").schema === df.schema)
           checkAnswer(sql("SELECT * FROM createdJsonTable"), df)
 
           assert(
             intercept[AnalysisException] {
-              sparkSession.catalog.createExternalTable("createdJsonTable", jsonFilePath.toString)
+              sparkSession.catalog.createTable("createdJsonTable", jsonFilePath.toString)
             }.getMessage.contains("Table createdJsonTable already exists."),
             "We should complain that createdJsonTable already exists")
         }
@@ -534,7 +534,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         // Try to specify the schema.
         withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "not a source name") {
           val schema = StructType(StructField("b", StringType, true) :: Nil)
-          sparkSession.catalog.createExternalTable(
+          sparkSession.catalog.createTable(
             "createdJsonTable",
             "org.apache.spark.sql.json",
             schema,
@@ -553,7 +553,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
   test("path required error") {
     assert(
       intercept[AnalysisException] {
-        sparkSession.catalog.createExternalTable(
+        sparkSession.catalog.createTable(
           "createdJsonTable",
           "org.apache.spark.sql.json",
           Map.empty[String, String])
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
index 9060ce2e0eb4b..2d3e462531245 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
@@ -66,14 +66,14 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
     }
   }
 
-  test(s"createExternalTable() to non-default database - with USE") {
+  test(s"createTable() to non-default database - with USE") {
     withTempDatabase { db =>
       activateDatabase(db) {
         withTempPath { dir =>
           val path = dir.getCanonicalPath
           df.write.format("parquet").mode(SaveMode.Overwrite).save(path)
 
-          spark.catalog.createExternalTable("t", path, "parquet")
+          spark.catalog.createTable("t", path, "parquet")
           assert(getTableNames(Option(db)).contains("t"))
           checkAnswer(spark.table("t"), df)
 
@@ -92,12 +92,12 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
     }
   }
 
-  test(s"createExternalTable() to non-default database - without USE") {
+  test(s"createTable() to non-default database - without USE") {
     withTempDatabase { db =>
       withTempPath { dir =>
         val path = dir.getCanonicalPath
         df.write.format("parquet").mode(SaveMode.Overwrite).save(path)
-        spark.catalog.createExternalTable(s"$db.t", path, "parquet")
+        spark.catalog.createTable(s"$db.t", path, "parquet")
 
         assert(getTableNames(Option(db)).contains("t"))
         checkAnswer(spark.table(s"$db.t"), df)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 40581066c62bb..488175a22bad7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -54,7 +54,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
       Seq(dsTbl, hiveTbl).foreach { tbl =>
         sql(s"ANALYZE TABLE $tbl COMPUTE STATISTICS")
-        val catalogStats = getCatalogStatistics(tbl)
+        val catalogStats = getTableStats(tbl)
         withSQLConf(SQLConf.CBO_ENABLED.key -> "false") {
           val relationStats = spark.table(tbl).queryExecution.optimizedPlan.stats
           assert(relationStats.sizeInBytes == catalogStats.sizeInBytes)
@@ -158,7 +158,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     // Non-partitioned table
     val nonPartTable = "non_part_table"
     withTable(nonPartTable) {
-      sql(s"CREATE TABLE $nonPartTable (key STRING, value STRING)")
+      sql(s"CREATE TABLE $nonPartTable (key STRING, value STRING) USING hive")
       sql(s"INSERT INTO TABLE $nonPartTable SELECT * FROM src")
       sql(s"INSERT INTO TABLE $nonPartTable SELECT * FROM src")
 
@@ -312,7 +312,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
         sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
 
-        assert(getCatalogStatistics(tableName).sizeInBytes === BigInt(17436))
+        assert(getTableStats(tableName).sizeInBytes === BigInt(17436))
       }
     }
   }
@@ -353,11 +353,11 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
           // Analyze original table - expect 3 partitions
           sql(s"ANALYZE TABLE $sourceTableName COMPUTE STATISTICS noscan")
-          assert(getCatalogStatistics(sourceTableName).sizeInBytes === BigInt(3 * 5812))
+          assert(getTableStats(sourceTableName).sizeInBytes === BigInt(3 * 5812))
 
           // Analyze partial-copy table - expect only 1 partition
           sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS noscan")
-          assert(getCatalogStatistics(tableName).sizeInBytes === BigInt(5812))
+          assert(getTableStats(tableName).sizeInBytes === BigInt(5812))
         }
     }
   }
@@ -840,7 +840,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
   test("alter table should not have the side effect to store statistics in Spark side") {
     val table = "alter_table_side_effect"
     withTable(table) {
-      sql(s"CREATE TABLE $table (i string, j string)")
+      sql(s"CREATE TABLE $table (i string, j string) USING hive")
       sql(s"INSERT INTO TABLE $table SELECT 'a', 'b'")
       val catalogTable1 = getCatalogTable(table)
       val hiveSize1 = BigInt(catalogTable1.ignoredProperties(StatsSetupConst.TOTAL_SIZE))
@@ -1204,7 +1204,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         assert(tsHistogramProps.size == 1)
 
         // Validate histogram after deserialization.
-        val cs = getCatalogStatistics(tableName).colStats
+        val cs = getTableStats(tableName).colStats
         val intHistogram = cs("cint").histogram.get
         val tsHistogram = cs("ctimestamp").histogram.get
         assert(intHistogram.bins.length == spark.sessionState.conf.histogramNumBins)
@@ -1514,4 +1514,46 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       }
     }
   }
+
+  test("SPARK-30269 failed to update partition stats if it's equal to table's old stats") {
+    val tbl = "SPARK_30269"
+    val ext_tbl = "SPARK_30269_external"
+    withTempDir { dir =>
+      withTable(tbl, ext_tbl) {
+        sql(s"CREATE TABLE $tbl (key INT, value STRING, ds STRING) PARTITIONED BY (ds)")
+        sql(
+          s"""
+             | CREATE TABLE $ext_tbl (key INT, value STRING, ds STRING)
+             | PARTITIONED BY (ds)
+             | LOCATION '${dir.toURI}'
+           """.stripMargin)
+
+        Seq(tbl, ext_tbl).foreach { tblName =>
+          sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")
+
+          // analyze table
+          sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
+          var tableStats = getTableStats(tblName)
+          assert(tableStats.sizeInBytes == 601)
+          assert(tableStats.rowCount.isEmpty)
+
+          sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS")
+          tableStats = getTableStats(tblName)
+          assert(tableStats.sizeInBytes == 601)
+          assert(tableStats.rowCount.get == 1)
+
+          // analyze a single partition
+          sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS NOSCAN")
+          var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
+          assert(partStats.sizeInBytes == 601)
+          assert(partStats.rowCount.isEmpty)
+
+          sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS")
+          partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
+          assert(partStats.sizeInBytes == 601)
+          assert(partStats.rowCount.get == 1)
+        }
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
new file mode 100644
index 0000000000000..77956f4fe69da
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import java.security.PrivilegedExceptionAction
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.security.UserGroupInformation
+import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester}
+
+import org.apache.spark.util.Utils
+
+class HiveClientUserNameSuite(version: String) extends HiveVersionSuite(version) {
+
+  test("username of HiveClient - no UGI") {
+    // Assuming we're not faking System username
+    assert(getUserNameFromHiveClient === System.getProperty("user.name"))
+  }
+
+  test("username of HiveClient - UGI") {
+    val ugi = UserGroupInformation.createUserForTesting(
+      "fakeprincipal@EXAMPLE.COM", Array.empty)
+    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
+      override def run(): Unit = {
+        assert(getUserNameFromHiveClient === ugi.getShortUserName)
+      }
+    })
+  }
+
+  test("username of HiveClient - Proxy user") {
+    val ugi = UserGroupInformation.createUserForTesting(
+      "fakeprincipal@EXAMPLE.COM", Array.empty)
+    val proxyUgi = UserGroupInformation.createProxyUserForTesting(
+      "proxyprincipal@EXAMPLE.COM", ugi, Array.empty)
+    proxyUgi.doAs(new PrivilegedExceptionAction[Unit]() {
+      override def run(): Unit = {
+        assert(getUserNameFromHiveClient === proxyUgi.getShortUserName)
+      }
+    })
+  }
+
+  private def getUserNameFromHiveClient: String = {
+    val hadoopConf = new Configuration()
+    hadoopConf.set("hive.metastore.warehouse.dir", Utils.createTempDir().toURI().toString())
+    val client = buildClient(hadoopConf)
+    client.userName
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DescribeTableStatement.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala
similarity index 73%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DescribeTableStatement.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala
index 02604b4ac5ac1..e076c01c08980 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DescribeTableStatement.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala
@@ -15,11 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.plans.logical.sql
+package org.apache.spark.sql.hive.client
 
-import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import scala.collection.immutable.IndexedSeq
 
-case class DescribeTableStatement(
-    tableName: Seq[String],
-    partitionSpec: TablePartitionSpec,
-    isExtended: Boolean) extends ParsedStatement
+import org.scalatest.Suite
+
+class HiveClientUserNameSuites extends Suite with HiveClientVersions {
+  override def nestedSuites: IndexedSeq[Suite] = {
+    versions.map(new HiveClientUserNameSuite(_))
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
similarity index 98%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index bda711200acdb..2d615f6fdc261 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -31,8 +31,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StructType}
 import org.apache.spark.util.Utils
 
-// TODO: Refactor this to `HivePartitionFilteringSuite`
-class HiveClientSuite(version: String)
+class HivePartitionFilteringSuite(version: String)
     extends HiveVersionSuite(version) with BeforeAndAfterAll {
 
   private val tryDirectSqlKey = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname
@@ -95,7 +94,7 @@ class HiveClientSuite(version: String)
     }
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     client = init(true)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala
similarity index 87%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala
index de1be2115b2d8..a43e778b13b92 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala
@@ -21,9 +21,9 @@ import scala.collection.immutable.IndexedSeq
 
 import org.scalatest.Suite
 
-class HiveClientSuites extends Suite with HiveClientVersions {
+class HivePartitionFilteringSuites extends Suite with HiveClientVersions {
   override def nestedSuites: IndexedSeq[Suite] = {
     // Hive 0.12 does not provide the partition filtering API we call
-    versions.filterNot(_ == "0.12").map(new HiveClientSuite(_))
+    versions.filterNot(_ == "0.12").map(new HivePartitionFilteringSuite(_))
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index da2acdc4aa378..4760af7aa46ff 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -22,10 +22,12 @@ import java.net.URI
 
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.common.StatsSetupConst
 import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.hadoop.mapred.TextInputFormat
+import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
@@ -169,6 +171,34 @@ class VersionsSuite extends SparkFunSuite with Logging {
       client.createDatabase(tempDB, ignoreIfExists = true)
     }
 
+    test(s"$version: create/get/alter database should pick right user name as owner") {
+      if (version != "0.12") {
+        val currentUser = UserGroupInformation.getCurrentUser.getUserName
+        val ownerName = "SPARK_29425"
+        val db1 = "SPARK_29425_1"
+        val db2 = "SPARK_29425_2"
+        val ownerProps = Map("owner" -> ownerName)
+
+        // create database with owner
+        val dbWithOwner = CatalogDatabase(db1, "desc", Utils.createTempDir().toURI, ownerProps)
+        client.createDatabase(dbWithOwner, ignoreIfExists = true)
+        val getDbWithOwner = client.getDatabase(db1)
+        assert(getDbWithOwner.properties("owner") === ownerName)
+        // alter database without owner
+        client.alterDatabase(getDbWithOwner.copy(properties = Map()))
+        assert(client.getDatabase(db1).properties("owner") === "")
+
+        // create database without owner
+        val dbWithoutOwner = CatalogDatabase(db2, "desc", Utils.createTempDir().toURI, Map())
+        client.createDatabase(dbWithoutOwner, ignoreIfExists = true)
+        val getDbWithoutOwner = client.getDatabase(db2)
+        assert(getDbWithoutOwner.properties("owner") === currentUser)
+        // alter database with owner
+        client.alterDatabase(getDbWithoutOwner.copy(properties = ownerProps))
+        assert(client.getDatabase(db2).properties("owner") === ownerName)
+      }
+    }
+
     test(s"$version: createDatabase with null description") {
       withTempDir { tmpDir =>
         val dbWithNullDesc =
@@ -201,6 +231,22 @@ class VersionsSuite extends SparkFunSuite with Logging {
       val database = client.getDatabase("temporary").copy(properties = Map("flag" -> "true"))
       client.alterDatabase(database)
       assert(client.getDatabase("temporary").properties.contains("flag"))
+
+      // test alter database location
+      val tempDatabasePath2 = Utils.createTempDir().toURI
+      // Hive support altering database location since HIVE-8472.
+      if (version == "3.0" || version == "3.1") {
+        client.alterDatabase(database.copy(locationUri = tempDatabasePath2))
+        val uriInCatalog = client.getDatabase("temporary").locationUri
+        assert("file" === uriInCatalog.getScheme)
+        assert(new Path(tempDatabasePath2.getPath).toUri.getPath === uriInCatalog.getPath,
+          "Failed to alter database location")
+      } else {
+        val e = intercept[AnalysisException] {
+          client.alterDatabase(database.copy(locationUri = tempDatabasePath2))
+        }
+        assert(e.getMessage.contains("does not support altering database location"))
+      }
     }
 
     test(s"$version: dropDatabase") {
@@ -274,6 +320,19 @@ class VersionsSuite extends SparkFunSuite with Logging {
       assert(client.getTable("default", "src").properties.contains("changed"))
     }
 
+    test(s"$version: alterTable - should respect the original catalog table's owner name") {
+      val ownerName = "SPARK-29405"
+      val originalTable = client.getTable("default", "src")
+      // mocking the owner is what we declared
+      val newTable = originalTable.copy(owner = ownerName)
+      client.alterTable(newTable)
+      assert(client.getTable("default", "src").owner === ownerName)
+      // mocking the owner is empty
+      val newTable2 = originalTable.copy(owner = "")
+      client.alterTable(newTable2)
+      assert(client.getTable("default", "src").owner === client.userName)
+    }
+
     test(s"$version: alterTable(dbName: String, tableName: String, table: CatalogTable)") {
       val newTable = client.getTable("default", "src").copy(properties = Map("changedAgain" -> ""))
       client.alterTable("default", "src", newTable)
@@ -891,7 +950,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
            """.stripMargin
           )
 
-          val errorMsg = "data type mismatch: cannot cast decimal(2,1) to binary"
+          val errorMsg = "Cannot safely cast 'f0': DecimalType(2,1) to BinaryType"
 
           if (isPartitioned) {
             val insertStmt = s"INSERT OVERWRITE TABLE $tableName partition (ds='a') SELECT 1.3"
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index d06cc1c0a88ac..f84b854048e8a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -24,7 +24,7 @@ import test.org.apache.spark.sql.MyDoubleAvg
 import test.org.apache.spark.sql.MyDoubleSum
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, UnsafeRow}
 import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -1018,6 +1018,31 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
     val agg2 = agg1.groupBy($"text").agg(sum($"avg_res"))
     checkAnswer(agg2, Row("a", BigDecimal("11.9999999994857142860000")))
   }
+
+  test("SPARK-29122: hash-based aggregates for unfixed-length decimals in the interpreter mode") {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+        SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+      withTempView("t") {
+        spark.range(3).selectExpr("CAST(id AS decimal(38, 0)) a").createOrReplaceTempView("t")
+        checkAnswer(sql("SELECT SUM(a) FROM t"), Row(java.math.BigDecimal.valueOf(3)))
+      }
+    }
+  }
+
+  test("SPARK-29140: HashAggregateExec aggregating binary type doesn't break codegen compilation") {
+    val schema = new StructType().add("id", IntegerType, nullable = false)
+      .add("c1", BinaryType, nullable = true)
+
+    withSQLConf(
+      SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "true",
+      SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1") {
+      val emptyRows = spark.sparkContext.parallelize(Seq.empty[Row], 1)
+      val aggDf = spark.createDataFrame(emptyRows, schema)
+        .groupBy($"id" % 10 as "group")
+        .agg(countDistinct($"c1"))
+      checkAnswer(aggDf, Seq.empty[Row])
+    }
+  }
 }
 
 
@@ -1038,7 +1063,7 @@ class HashAggregationQueryWithControlledFallbackSuite extends AggregationQuerySu
             // todo: remove it?
             val newActual = Dataset.ofRows(spark, actual.logicalPlan)
 
-            QueryTest.checkAnswer(newActual, expectedAnswer) match {
+            QueryTest.getErrorMessageInCheckAnswer(newActual, expectedAnswer) match {
               case Some(errorMessage) =>
                 val newErrorMessage =
                   s"""
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index 9147a98c94457..dbbf2b29fe8b7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.execution.command.LoadDataCommand
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
 
@@ -57,7 +58,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         |STORED AS PARQUET
         |TBLPROPERTIES('prop1Key'="prop1Val", '`prop2Key`'="prop2Val")
       """.stripMargin)
-    sql("CREATE TABLE parquet_tab3(col1 int, `col 2` int)")
+    sql("CREATE TABLE parquet_tab3(col1 int, `col 2` int) USING hive")
     sql("CREATE TABLE parquet_tab4 (price int, qty int) partitioned by (year int, month int)")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 1) SELECT 1, 1")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 2) SELECT 2, 2")
@@ -129,10 +130,10 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
   }
 
   test("show tblproperties for datasource table - errors") {
-    val message1 = intercept[NoSuchTableException] {
+    val message = intercept[AnalysisException] {
       sql("SHOW TBLPROPERTIES badtable")
     }.getMessage
-    assert(message1.contains("Table or view 'badtable' not found in database 'default'"))
+    assert(message.contains("Table not found: badtable"))
 
     // When key is not found, a row containing the error is returned.
     checkAnswer(
@@ -146,7 +147,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     checkAnswer(sql("SHOW TBLPROPERTIES parquet_tab2('`prop2Key`')"), Row("prop2Val"))
   }
 
-  test("show tblproperties for spark temporary table - empty row") {
+  test("show tblproperties for spark temporary table - AnalysisException is thrown") {
     withTempView("parquet_temp") {
       sql(
         """
@@ -154,8 +155,10 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
          |USING org.apache.spark.sql.parquet.DefaultSource
         """.stripMargin)
 
-      // An empty sequence of row is returned for session temporary table.
-      checkAnswer(sql("SHOW TBLPROPERTIES parquet_temp"), Nil)
+      val message = intercept[AnalysisException] {
+        sql("SHOW TBLPROPERTIES parquet_temp")
+      }.getMessage
+      assert(message.contains("parquet_temp is a temp view not table"))
     }
   }
 
@@ -289,7 +292,29 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       }
       checkAnswer(
         sql("SELECT employeeID, employeeName FROM part_table WHERE c = '2' AND d = '1'"),
-        sql("SELECT * FROM non_part_table").collect())
+        sql("SELECT * FROM non_part_table"))
+    }
+  }
+
+  test("SPARK-28084 case insensitive names of static partitioning in INSERT commands") {
+    withTable("part_table") {
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        sql("CREATE TABLE part_table (price int, qty int) partitioned by (year int, month int)")
+        sql("INSERT INTO part_table PARTITION(YEar = 2015, month = 1) SELECT 1, 1")
+        checkAnswer(sql("SELECT * FROM part_table"), Row(1, 1, 2015, 1))
+      }
+    }
+  }
+
+  test("SPARK-28084 case insensitive names of dynamic partitioning in INSERT commands") {
+    withTable("part_table") {
+      withSQLConf(
+        SQLConf.CASE_SENSITIVE.key -> "false",
+        "hive.exec.dynamic.partition.mode" -> "nonstrict") {
+        sql("CREATE TABLE part_table (price int) partitioned by (year int)")
+        sql("INSERT INTO part_table PARTITION(YEar) SELECT 1, 2019")
+        checkAnswer(sql("SELECT * FROM part_table"), Row(1, 2019))
+      }
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index c0158f1947d99..8b1f4c92755b9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -30,7 +30,6 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.sql.{DescribeColumnStatement, DescribeTableStatement}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.HiveResult.hiveResultString
 import org.apache.spark.sql.execution.SQLExecution
@@ -229,7 +228,7 @@ abstract class HiveComparisonTest
       sql: String,
       reset: Boolean = true,
       tryWithoutResettingFirst: Boolean = false,
-      skip: Boolean = false) {
+      skip: Boolean = false): Unit = {
     // testCaseName must not contain ':', which is not allowed to appear in a filename of Windows
     assert(!testCaseName.contains(":"))
 
@@ -347,8 +346,7 @@ abstract class HiveComparisonTest
         val catalystResults = queryList.zip(hiveResults).map { case (queryString, hive) =>
           val query = new TestHiveQueryExecution(queryString.replace("../../data", testDataPath))
           def getResult(): Seq[String] = {
-            SQLExecution.withNewExecutionId(
-              query.sparkSession, query)(hiveResultString(query.executedPlan))
+            SQLExecution.withNewExecutionId(query)(hiveResultString(query.executedPlan))
           }
           try { (query, prepareAnswer(query, getResult())) } catch {
             case e: Throwable =>
@@ -374,10 +372,10 @@ abstract class HiveComparisonTest
 
             // We will ignore the ExplainCommand, ShowFunctions, DescribeFunction
             if ((!hiveQuery.logical.isInstanceOf[ExplainCommand]) &&
-                (!hiveQuery.logical.isInstanceOf[ShowFunctionsCommand]) &&
-                (!hiveQuery.logical.isInstanceOf[DescribeFunctionCommand]) &&
+                (!hiveQuery.logical.isInstanceOf[ShowFunctionsStatement]) &&
+                (!hiveQuery.logical.isInstanceOf[DescribeFunctionStatement]) &&
                 (!hiveQuery.logical.isInstanceOf[DescribeCommandBase]) &&
-                (!hiveQuery.logical.isInstanceOf[DescribeTableStatement]) &&
+                (!hiveQuery.logical.isInstanceOf[DescribeRelation]) &&
                 (!hiveQuery.logical.isInstanceOf[DescribeColumnStatement]) &&
                 preparedHive != catalyst) {
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index cd8e2eaa2b4dc..ba48cfd4142f6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -31,6 +31,8 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.HiveExternalCatalog
@@ -179,8 +181,8 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
 
   test("SPARK-22431: illegal nested type") {
     val queries = Seq(
-      "CREATE TABLE t AS SELECT STRUCT('a' AS `$a`, 1 AS b) q",
-      "CREATE TABLE t(q STRUCT<`$a`:INT, col2:STRING>, i1 INT)",
+      "CREATE TABLE t USING hive AS SELECT STRUCT('a' AS `$a`, 1 AS b) q",
+      "CREATE TABLE t(q STRUCT<`$a`:INT, col2:STRING>, i1 INT) USING hive",
       "CREATE VIEW t AS SELECT STRUCT('a' AS `$a`, 1 AS b) q")
 
     queries.foreach(query => {
@@ -251,7 +253,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
 
   test("SPARK-22431: negative alter table tests with nested types") {
     withTable("t1") {
-      spark.sql("CREATE TABLE t1 (q STRUCT<col1:INT, col2:STRING>, i1 INT)")
+      spark.sql("CREATE TABLE t1 (q STRUCT<col1:INT, col2:STRING>, i1 INT) USING hive")
       val err = intercept[SparkException] {
         spark.sql("ALTER TABLE t1 ADD COLUMNS (newcol1 STRUCT<`$col1`:STRING, col2:Int>)")
       }.getMessage
@@ -360,6 +362,46 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
         Row(2147483648L, "AAA", 3.14, false) :: Row(2147483649L, "BBB", 3.142, true) :: Nil)
     }
   }
+
+  test("Create Table LIKE USING Hive built-in ORC in Hive catalog") {
+    val catalog = spark.sessionState.catalog
+    withTable("s", "t") {
+      sql("CREATE TABLE s(a INT, b INT) USING parquet")
+      val source = catalog.getTableMetadata(TableIdentifier("s"))
+      assert(source.provider == Some("parquet"))
+      sql("CREATE TABLE t LIKE s USING org.apache.spark.sql.hive.orc")
+      val table = catalog.getTableMetadata(TableIdentifier("t"))
+      assert(table.provider == Some("org.apache.spark.sql.hive.orc"))
+    }
+  }
+
+  test("Database Ownership") {
+    val catalog = spark.sessionState.catalog
+    try {
+      val db = "spark_29425_1"
+      sql(s"CREATE DATABASE $db")
+      assert(sql(s"DESCRIBE DATABASE EXTENDED $db")
+        .where("database_description_item='Owner'")
+        .collect().head.getString(1) === Utils.getCurrentUserName())
+      sql(s"ALTER DATABASE $db SET DBPROPERTIES('abc'='xyz')")
+      assert(sql(s"DESCRIBE DATABASE EXTENDED $db")
+        .where("database_description_item='Owner'")
+        .collect().head.getString(1) === Utils.getCurrentUserName())
+    } finally {
+      catalog.reset()
+    }
+  }
+
+  test("Table Ownership") {
+    val catalog = spark.sessionState.catalog
+    try {
+      sql(s"CREATE TABLE spark_30019(k int)")
+      assert(sql(s"DESCRIBE TABLE EXTENDED spark_30019").where("col_name='Owner'")
+        .collect().head.getString(1) === Utils.getCurrentUserName())
+    } finally {
+      catalog.reset()
+    }
+  }
 }
 
 class HiveDDLSuite
@@ -417,7 +459,7 @@ class HiveDDLSuite
           "create the table `default`.`tab1`"))
 
         e = intercept[AnalysisException] {
-          sql(s"CREATE TABLE tab2 location '${tempDir.getCanonicalPath}'")
+          sql(s"CREATE TABLE tab2 USING hive location '${tempDir.getCanonicalPath}'")
         }.getMessage
         assert(e.contains("Unable to infer the schema. The schema specification is required to " +
           "create the table `default`.`tab2`"))
@@ -1100,7 +1142,8 @@ class HiveDDLSuite
       sql(s"CREATE DATABASE $dbName Location '${tmpDir.toURI.getPath.stripSuffix("/")}'")
       val db1 = catalog.getDatabaseMetadata(dbName)
       val dbPath = new URI(tmpDir.toURI.toString.stripSuffix("/"))
-      assert(db1 == CatalogDatabase(dbName, "", dbPath, Map.empty))
+      assert(db1.copy(properties = db1.properties -- Seq(PROP_OWNER)) ===
+        CatalogDatabase(dbName, "", dbPath, Map.empty))
       sql("USE db1")
 
       sql(s"CREATE TABLE $tabName as SELECT 1")
@@ -1138,7 +1181,8 @@ class HiveDDLSuite
     val expectedDBLocation = s"file:${dbPath.toUri.getPath.stripSuffix("/")}/$dbName.db"
     val expectedDBUri = CatalogUtils.stringToURI(expectedDBLocation)
     val db1 = catalog.getDatabaseMetadata(dbName)
-    assert(db1 == CatalogDatabase(
+    assert(db1.copy(properties = db1.properties -- Seq(PROP_OWNER)) ==
+      CatalogDatabase(
       dbName,
       "",
       expectedDBUri,
@@ -1223,57 +1267,64 @@ class HiveDDLSuite
   }
 
   test("CREATE TABLE LIKE a temporary view") {
-    // CREATE TABLE LIKE a temporary view.
-    withCreateTableLikeTempView(location = None)
+    Seq(None, Some("parquet"), Some("orc"), Some("hive")) foreach { provider =>
+      // CREATE TABLE LIKE a temporary view.
+      withCreateTableLikeTempView(location = None, provider)
 
-    // CREATE TABLE LIKE a temporary view location ...
-    withTempDir { tmpDir =>
-      withCreateTableLikeTempView(Some(tmpDir.toURI.toString))
+      // CREATE TABLE LIKE a temporary view location ...
+      withTempDir { tmpDir =>
+        withCreateTableLikeTempView(Some(tmpDir.toURI.toString), provider)
+      }
     }
   }
 
-  private def withCreateTableLikeTempView(location : Option[String]): Unit = {
+  private def withCreateTableLikeTempView(
+      location : Option[String], provider: Option[String]): Unit = {
     val sourceViewName = "tab1"
     val targetTabName = "tab2"
     val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
     withTempView(sourceViewName) {
       withTable(targetTabName) {
-        spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
+        spark.range(10).select($"id" as "a", $"id" as "b", $"id" as "c", $"id" as "d")
           .createTempView(sourceViewName)
 
         val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
-        sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $locationClause")
+        val providerClause = if (provider.nonEmpty) s"USING ${provider.get}" else ""
+        sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $providerClause $locationClause")
 
         val sourceTable = spark.sessionState.catalog.getTempViewOrPermanentTableMetadata(
           TableIdentifier(sourceViewName))
         val targetTable = spark.sessionState.catalog.getTableMetadata(
           TableIdentifier(targetTabName, Some("default")))
-
-        checkCreateTableLike(sourceTable, targetTable, tableType)
+        checkCreateTableLike(sourceTable, targetTable, tableType, provider)
       }
     }
   }
 
   test("CREATE TABLE LIKE a data source table") {
-    // CREATE TABLE LIKE a data source table.
-    withCreateTableLikeDSTable(location = None)
+    Seq(None, Some("parquet"), Some("orc"), Some("hive")) foreach { provider =>
+      // CREATE TABLE LIKE a data source table.
+      withCreateTableLikeDSTable(location = None, provider)
 
-    // CREATE TABLE LIKE a data source table location ...
-    withTempDir { tmpDir =>
-      withCreateTableLikeDSTable(Some(tmpDir.toURI.toString))
+      // CREATE TABLE LIKE a data source table location ...
+      withTempDir { tmpDir =>
+        withCreateTableLikeDSTable(Some(tmpDir.toURI.toString), provider)
+      }
     }
   }
 
-  private def withCreateTableLikeDSTable(location : Option[String]): Unit = {
+  private def withCreateTableLikeDSTable(
+      location : Option[String], provider: Option[String]): Unit = {
     val sourceTabName = "tab1"
     val targetTabName = "tab2"
     val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
     withTable(sourceTabName, targetTabName) {
-      spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
+      spark.range(10).select($"id" as "a", $"id" as "b", $"id" as "c", $"id" as "d")
         .write.format("json").saveAsTable(sourceTabName)
 
       val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
-      sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
+      val providerClause = if (provider.nonEmpty) s"USING ${provider.get}" else ""
+      sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $providerClause $locationClause")
 
       val sourceTable =
         spark.sessionState.catalog.getTableMetadata(
@@ -1284,34 +1335,37 @@ class HiveDDLSuite
       // The table type of the source table should be a Hive-managed data source table
       assert(DDLUtils.isDatasourceTable(sourceTable))
       assert(sourceTable.tableType == CatalogTableType.MANAGED)
-
-      checkCreateTableLike(sourceTable, targetTable, tableType)
+      checkCreateTableLike(sourceTable, targetTable, tableType, provider)
     }
   }
 
   test("CREATE TABLE LIKE an external data source table") {
-    // CREATE TABLE LIKE an external data source table.
-    withCreateTableLikeExtDSTable(location = None)
+    Seq(None, Some("parquet"), Some("orc"), Some("hive")) foreach { provider =>
+      // CREATE TABLE LIKE an external data source table.
+      withCreateTableLikeExtDSTable(location = None, provider)
 
-    // CREATE TABLE LIKE an external data source table location ...
-    withTempDir { tmpDir =>
-      withCreateTableLikeExtDSTable(Some(tmpDir.toURI.toString))
+      // CREATE TABLE LIKE an external data source table location ...
+      withTempDir { tmpDir =>
+        withCreateTableLikeExtDSTable(Some(tmpDir.toURI.toString), provider)
+      }
     }
   }
 
-  private def withCreateTableLikeExtDSTable(location : Option[String]): Unit = {
+  private def withCreateTableLikeExtDSTable(
+      location : Option[String], provider: Option[String]): Unit = {
     val sourceTabName = "tab1"
     val targetTabName = "tab2"
     val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
     withTable(sourceTabName, targetTabName) {
       withTempPath { dir =>
         val path = dir.getCanonicalPath
-        spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
+        spark.range(10).select($"id" as "a", $"id" as "b", $"id" as "c", $"id" as "d")
           .write.format("parquet").save(path)
         sql(s"CREATE TABLE $sourceTabName USING parquet OPTIONS (PATH '${dir.toURI}')")
 
         val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
-        sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
+        val providerClause = if (provider.nonEmpty) s"USING ${provider.get}" else ""
+        sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $providerClause $locationClause")
 
         // The source table should be an external data source table
         val sourceTable = spark.sessionState.catalog.getTableMetadata(
@@ -1321,23 +1375,25 @@ class HiveDDLSuite
         // The table type of the source table should be an external data source table
         assert(DDLUtils.isDatasourceTable(sourceTable))
         assert(sourceTable.tableType == CatalogTableType.EXTERNAL)
-
-        checkCreateTableLike(sourceTable, targetTable, tableType)
+        checkCreateTableLike(sourceTable, targetTable, tableType, provider)
       }
     }
   }
 
   test("CREATE TABLE LIKE a managed Hive serde table") {
-    // CREATE TABLE LIKE a managed Hive serde table.
-    withCreateTableLikeManagedHiveTable(location = None)
+    Seq(None, Some("parquet"), Some("orc"), Some("hive")) foreach { provider =>
+      // CREATE TABLE LIKE a managed Hive serde table.
+      withCreateTableLikeManagedHiveTable(location = None, provider)
 
-    // CREATE TABLE LIKE a managed Hive serde table location ...
-    withTempDir { tmpDir =>
-      withCreateTableLikeManagedHiveTable(Some(tmpDir.toURI.toString))
+      // CREATE TABLE LIKE a managed Hive serde table location ...
+      withTempDir { tmpDir =>
+        withCreateTableLikeManagedHiveTable(Some(tmpDir.toURI.toString), provider)
+      }
     }
   }
 
-  private def withCreateTableLikeManagedHiveTable(location : Option[String]): Unit = {
+  private def withCreateTableLikeManagedHiveTable(
+      location : Option[String], provider: Option[String]): Unit = {
     val sourceTabName = "tab1"
     val targetTabName = "tab2"
     val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
@@ -1346,7 +1402,8 @@ class HiveDDLSuite
       sql(s"CREATE TABLE $sourceTabName TBLPROPERTIES('prop1'='value1') AS SELECT 1 key, 'a'")
 
       val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
-      sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
+      val providerClause = if (provider.nonEmpty) s"USING ${provider.get}" else ""
+      sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $providerClause $locationClause")
 
       val sourceTable = catalog.getTableMetadata(
         TableIdentifier(sourceTabName, Some("default")))
@@ -1354,22 +1411,24 @@ class HiveDDLSuite
       assert(sourceTable.properties.get("prop1").nonEmpty)
       val targetTable = catalog.getTableMetadata(
         TableIdentifier(targetTabName, Some("default")))
-
-      checkCreateTableLike(sourceTable, targetTable, tableType)
+      checkCreateTableLike(sourceTable, targetTable, tableType, provider)
     }
   }
 
   test("CREATE TABLE LIKE an external Hive serde table") {
-    // CREATE TABLE LIKE an external Hive serde table.
-    withCreateTableLikeExtHiveTable(location = None)
+    Seq(None, Some("parquet"), Some("orc"), Some("hive")) foreach { provider =>
+      // CREATE TABLE LIKE an external Hive serde table.
+      withCreateTableLikeExtHiveTable(location = None, provider)
 
-    // CREATE TABLE LIKE an external Hive serde table location ...
-    withTempDir { tmpDir =>
-      withCreateTableLikeExtHiveTable(Some(tmpDir.toURI.toString))
+      // CREATE TABLE LIKE an external Hive serde table location ...
+      withTempDir { tmpDir =>
+        withCreateTableLikeExtHiveTable(Some(tmpDir.toURI.toString), provider)
+      }
     }
   }
 
-  private def withCreateTableLikeExtHiveTable(location : Option[String]): Unit = {
+  private def withCreateTableLikeExtHiveTable(
+      location : Option[String], provider: Option[String]): Unit = {
     val catalog = spark.sessionState.catalog
     val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
     withTempDir { tmpDir =>
@@ -1395,7 +1454,8 @@ class HiveDDLSuite
         }
 
         val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
-        sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $locationClause")
+        val providerClause = if (provider.nonEmpty) s"USING ${provider.get}" else ""
+        sql(s"CREATE TABLE $targetTabName LIKE $sourceTabName $providerClause $locationClause")
 
         val sourceTable = catalog.getTableMetadata(
           TableIdentifier(sourceTabName, Some("default")))
@@ -1403,63 +1463,67 @@ class HiveDDLSuite
         assert(sourceTable.comment == Option("Apache Spark"))
         val targetTable = catalog.getTableMetadata(
           TableIdentifier(targetTabName, Some("default")))
-
-        checkCreateTableLike(sourceTable, targetTable, tableType)
+        checkCreateTableLike(sourceTable, targetTable, tableType, provider)
       }
     }
   }
 
   test("CREATE TABLE LIKE a view") {
-    // CREATE TABLE LIKE a view.
-    withCreateTableLikeView(location = None)
+    Seq(None, Some("parquet"), Some("orc"), Some("hive")) foreach { provider =>
+      // CREATE TABLE LIKE a view.
+      withCreateTableLikeView(location = None, provider)
 
-    // CREATE TABLE LIKE a view location ...
-    withTempDir { tmpDir =>
-      withCreateTableLikeView(Some(tmpDir.toURI.toString))
+      // CREATE TABLE LIKE a view location ...
+      withTempDir { tmpDir =>
+        withCreateTableLikeView(Some(tmpDir.toURI.toString), provider)
+      }
     }
   }
 
-  private def withCreateTableLikeView(location : Option[String]): Unit = {
+  private def withCreateTableLikeView(
+      location : Option[String], provider: Option[String]): Unit = {
     val sourceTabName = "tab1"
     val sourceViewName = "view"
     val targetTabName = "tab2"
     val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
     withTable(sourceTabName, targetTabName) {
       withView(sourceViewName) {
-        spark.range(10).select('id as 'a, 'id as 'b, 'id as 'c, 'id as 'd)
+        spark.range(10).select($"id" as "a", $"id" as "b", $"id" as "c", $"id" as "d")
           .write.format("json").saveAsTable(sourceTabName)
         sql(s"CREATE VIEW $sourceViewName AS SELECT * FROM $sourceTabName")
 
         val locationClause = if (location.nonEmpty) s"LOCATION '${location.getOrElse("")}'" else ""
-        sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $locationClause")
+        val providerClause = if (provider.nonEmpty) s"USING ${provider.get}" else ""
+        sql(s"CREATE TABLE $targetTabName LIKE $sourceViewName $providerClause $locationClause")
 
         val sourceView = spark.sessionState.catalog.getTableMetadata(
           TableIdentifier(sourceViewName, Some("default")))
         // The original source should be a VIEW with an empty path
         assert(sourceView.tableType == CatalogTableType.VIEW)
         assert(sourceView.viewText.nonEmpty)
-        assert(sourceView.viewDefaultDatabase == Some("default"))
+        assert(sourceView.viewCatalogAndNamespace ==
+          Seq(CatalogManager.SESSION_CATALOG_NAME, "default"))
         assert(sourceView.viewQueryColumnNames == Seq("a", "b", "c", "d"))
         val targetTable = spark.sessionState.catalog.getTableMetadata(
           TableIdentifier(targetTabName, Some("default")))
-
-        checkCreateTableLike(sourceView, targetTable, tableType)
+        checkCreateTableLike(sourceView, targetTable, tableType, provider)
       }
     }
   }
 
   private def checkCreateTableLike(
-    sourceTable: CatalogTable,
-    targetTable: CatalogTable,
-    tableType: CatalogTableType): Unit = {
+      sourceTable: CatalogTable,
+      targetTable: CatalogTable,
+      tableType: CatalogTableType,
+      provider: Option[String]): Unit = {
     // The created table should be a MANAGED table or EXTERNAL table with empty view text
     // and original text.
     assert(targetTable.tableType == tableType,
       s"the created table must be a/an ${tableType.name} table")
     assert(targetTable.viewText.isEmpty,
       "the view text in the created table must be empty")
-    assert(targetTable.viewDefaultDatabase.isEmpty,
-      "the view default database in the created table must be empty")
+    assert(targetTable.viewCatalogAndNamespace.isEmpty,
+      "the view catalog and namespace in the created table must be empty")
     assert(targetTable.viewQueryColumnNames.isEmpty,
       "the view query output columns in the created table must be empty")
     assert(targetTable.comment.isEmpty,
@@ -1482,21 +1546,29 @@ class HiveDDLSuite
     assert(targetTable.properties.filterKeys(!metastoreGeneratedProperties.contains(_)).isEmpty,
       "the table properties of source tables should not be copied in the created table")
 
-    if (DDLUtils.isDatasourceTable(sourceTable) ||
-        sourceTable.tableType == CatalogTableType.VIEW) {
-      assert(DDLUtils.isDatasourceTable(targetTable),
-        "the target table should be a data source table")
-    } else {
-      assert(!DDLUtils.isDatasourceTable(targetTable),
-        "the target table should be a Hive serde table")
-    }
-
-    if (sourceTable.tableType == CatalogTableType.VIEW) {
-      // Source table is a temporary/permanent view, which does not have a provider. The created
-      // target table uses the default data source format
-      assert(targetTable.provider == Option(spark.sessionState.conf.defaultDataSourceName))
-    } else {
-      assert(targetTable.provider == sourceTable.provider)
+    provider match {
+      case Some(_) =>
+        assert(targetTable.provider == provider)
+        if (DDLUtils.isHiveTable(provider)) {
+          assert(DDLUtils.isHiveTable(targetTable),
+            "the target table should be a hive table if provider is hive")
+        }
+      case None =>
+        if (sourceTable.tableType == CatalogTableType.VIEW) {
+          // Source table is a temporary/permanent view, which does not have a provider.
+          // The created target table uses the default data source format
+          assert(targetTable.provider == Option(spark.sessionState.conf.defaultDataSourceName))
+        } else {
+          assert(targetTable.provider == sourceTable.provider)
+        }
+        if (DDLUtils.isDatasourceTable(sourceTable) ||
+            sourceTable.tableType == CatalogTableType.VIEW) {
+          assert(DDLUtils.isDatasourceTable(targetTable),
+            "the target table should be a data source table")
+        } else {
+          assert(!DDLUtils.isDatasourceTable(targetTable),
+            "the target table should be a Hive serde table")
+        }
     }
 
     assert(targetTable.storage.locationUri.nonEmpty, "target table path should not be empty")
@@ -1551,7 +1623,7 @@ class HiveDDLSuite
         assert(spark.catalog.getTable("default", indexTabName).name === indexTabName)
 
         intercept[TableAlreadyExistsException] {
-          sql(s"CREATE TABLE $indexTabName(b int)")
+          sql(s"CREATE TABLE $indexTabName(b int) USING hive")
         }
         intercept[TableAlreadyExistsException] {
           sql(s"ALTER TABLE $tabName RENAME TO $indexTabName")
@@ -1749,7 +1821,7 @@ class HiveDDLSuite
   test("create hive serde table with Catalog") {
     withTable("t") {
       withTempDir { dir =>
-        val df = spark.catalog.createExternalTable(
+        val df = spark.catalog.createTable(
           "t",
           "hive",
           new StructType().add("i", "int"),
@@ -1828,10 +1900,10 @@ class HiveDDLSuite
         .write.format("hive").mode("append").saveAsTable("t")
       checkAnswer(spark.table("t"), Row(1, "a") :: Row(2, "b") :: Row(3, "c") :: Nil)
 
-      Seq("c" -> 3).toDF("i", "j")
+      Seq(3.5 -> 3).toDF("i", "j")
         .write.format("hive").mode("append").saveAsTable("t")
       checkAnswer(spark.table("t"), Row(1, "a") :: Row(2, "b") :: Row(3, "c")
-        :: Row(null, "3") :: Nil)
+        :: Row(3, "3") :: Nil)
 
       Seq(4 -> "d").toDF("i", "j").write.saveAsTable("t1")
 
@@ -2366,10 +2438,11 @@ class HiveDDLSuite
             checkAnswer(spark.table("t"), Row(1))
             val maybeFile = path.listFiles().find(_.getName.startsWith("part"))
 
-            val reader = getReader(maybeFile.head.getCanonicalPath)
-            assert(reader.getCompressionKind.name === "ZLIB")
-            assert(reader.getCompressionSize == 1001)
-            assert(reader.getRowIndexStride == 2002)
+            Utils.tryWithResource(getReader(maybeFile.head.getCanonicalPath)) { reader =>
+              assert(reader.getCompressionKind.name === "ZLIB")
+              assert(reader.getCompressionSize == 1001)
+              assert(reader.getRowIndexStride == 2002)
+            }
           }
         }
       }
@@ -2407,7 +2480,7 @@ class HiveDDLSuite
 
   test("load command for non local invalid path validation") {
     withTable("tbl") {
-      sql("CREATE TABLE tbl(i INT, j STRING)")
+      sql("CREATE TABLE tbl(i INT, j STRING) USING hive")
       val e = intercept[AnalysisException](
         sql("load data inpath '/doesnotexist.csv' into table tbl"))
       assert(e.message.contains("LOAD DATA input path does not exist"))
@@ -2416,12 +2489,12 @@ class HiveDDLSuite
 
   test("SPARK-22252: FileFormatWriter should respect the input query schema in HIVE") {
     withTable("t1", "t2", "t3", "t4") {
-      spark.range(1).select('id as 'col1, 'id as 'col2).write.saveAsTable("t1")
+      spark.range(1).select($"id" as "col1", $"id" as "col2").write.saveAsTable("t1")
       spark.sql("select COL1, COL2 from t1").write.format("hive").saveAsTable("t2")
       checkAnswer(spark.table("t2"), Row(0, 0))
 
       // Test picking part of the columns when writing.
-      spark.range(1).select('id, 'id as 'col1, 'id as 'col2).write.saveAsTable("t3")
+      spark.range(1).select($"id", $"id" as "col1", $"id" as "col2").write.saveAsTable("t3")
       spark.sql("select COL1, COL2 from t3").write.format("hive").saveAsTable("t4")
       checkAnswer(spark.table("t4"), Row(0, 0))
     }
@@ -2433,9 +2506,9 @@ class HiveDDLSuite
         "CREATE TABLE IF NOT EXISTS t1 (c1_int INT, c2_string STRING, c3_float FLOAT)")
       val desc = sql("DESC FORMATTED t1").filter($"col_name".startsWith("Last Access"))
         .select("data_type")
-      // check if the last access time doesnt have the default date of year
+      // check if the last access time doesn't have the default date of year
       // 1970 as its a wrong access time
-      assert(!(desc.first.toString.contains("1970")))
+      assert((desc.first.toString.contains("UNKNOWN")))
     }
   }
 
@@ -2524,4 +2597,131 @@ class HiveDDLSuite
       }
     }
   }
+
+  test("Create Table LIKE STORED AS Hive Format") {
+    val catalog = spark.sessionState.catalog
+    withTable("s") {
+      sql("CREATE TABLE s(a INT, b INT) STORED AS ORC")
+      hiveFormats.foreach { tableType =>
+        val expectedSerde = HiveSerDe.sourceToSerDe(tableType)
+        withTable("t") {
+          sql(s"CREATE TABLE t LIKE s STORED AS $tableType")
+          val table = catalog.getTableMetadata(TableIdentifier("t"))
+          assert(table.provider == Some("hive"))
+          assert(table.storage.serde == expectedSerde.get.serde)
+          assert(table.storage.inputFormat == expectedSerde.get.inputFormat)
+          assert(table.storage.outputFormat == expectedSerde.get.outputFormat)
+        }
+      }
+    }
+  }
+
+  test("Create Table LIKE with specified TBLPROPERTIES") {
+    val catalog = spark.sessionState.catalog
+    withTable("s", "t") {
+      sql("CREATE TABLE s(a INT, b INT) USING hive TBLPROPERTIES('a'='apple')")
+      val source = catalog.getTableMetadata(TableIdentifier("s"))
+      assert(source.properties("a") == "apple")
+      sql("CREATE TABLE t LIKE s STORED AS parquet TBLPROPERTIES('f'='foo', 'b'='bar')")
+      val table = catalog.getTableMetadata(TableIdentifier("t"))
+      assert(table.properties.get("a") === None)
+      assert(table.properties("f") == "foo")
+      assert(table.properties("b") == "bar")
+    }
+  }
+
+  test("Create Table LIKE with row format") {
+    val catalog = spark.sessionState.catalog
+    withTable("sourceHiveTable", "sourceDsTable", "targetHiveTable1", "targetHiveTable2") {
+      sql("CREATE TABLE sourceHiveTable(a INT, b INT) STORED AS PARQUET")
+      sql("CREATE TABLE sourceDsTable(a INT, b INT) USING PARQUET")
+
+      // row format doesn't work in create targetDsTable
+      var e = intercept[AnalysisException] {
+        spark.sql(
+          """
+            |CREATE TABLE targetDsTable LIKE sourceHiveTable USING PARQUET
+            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+            """.stripMargin)
+      }.getMessage
+      assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'"))
+
+      // row format doesn't work with provider hive
+      e = intercept[AnalysisException] {
+        spark.sql(
+          """
+            |CREATE TABLE targetHiveTable LIKE sourceHiveTable USING hive
+            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+            |WITH SERDEPROPERTIES ('test' = 'test')
+          """.stripMargin)
+      }.getMessage
+      assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'"))
+
+      // row format doesn't work without 'STORED AS'
+      e = intercept[AnalysisException] {
+        spark.sql(
+          """
+            |CREATE TABLE targetDsTable LIKE sourceDsTable
+            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+            |WITH SERDEPROPERTIES ('test' = 'test')
+          """.stripMargin)
+      }.getMessage
+      assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'"))
+
+      // row format works with STORED AS hive format (from hive table)
+      spark.sql(
+        """
+          |CREATE TABLE targetHiveTable1 LIKE sourceHiveTable STORED AS PARQUET
+          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+          |WITH SERDEPROPERTIES ('test' = 'test')
+          """.stripMargin)
+      var table = catalog.getTableMetadata(TableIdentifier("targetHiveTable1"))
+      assert(table.provider === Some("hive"))
+      assert(table.storage.inputFormat ===
+        Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"))
+      assert(table.storage.serde === Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+      assert(table.storage.properties("test") == "test")
+
+      // row format works with STORED AS hive format (from datasource table)
+      spark.sql(
+        """
+          |CREATE TABLE targetHiveTable2 LIKE sourceDsTable STORED AS PARQUET
+          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+          |WITH SERDEPROPERTIES ('test' = 'test')
+          """.stripMargin)
+      table = catalog.getTableMetadata(TableIdentifier("targetHiveTable2"))
+      assert(table.provider === Some("hive"))
+      assert(table.storage.inputFormat ===
+        Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"))
+      assert(table.storage.serde === Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+      assert(table.storage.properties("test") == "test")
+    }
+  }
+
+  test("SPARK-30098: create table without provider should " +
+    "use default data source under non-legacy mode") {
+    val catalog = spark.sessionState.catalog
+    withSQLConf(
+      SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED.key -> "false") {
+      withTable("s") {
+        val defaultProvider = conf.defaultDataSourceName
+        sql("CREATE TABLE s(a INT, b INT)")
+        val table = catalog.getTableMetadata(TableIdentifier("s"))
+        assert(table.provider === Some(defaultProvider))
+      }
+    }
+  }
+
+  test("SPARK-30098: create table without provider should " +
+    "use hive under legacy mode") {
+    val catalog = spark.sessionState.catalog
+    withSQLConf(
+      SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED.key -> "true") {
+      withTable("s") {
+        sql("CREATE TABLE s(a INT, b INT)")
+        val table = catalog.getTableMetadata(TableIdentifier("s"))
+        assert(table.provider === Some("hive"))
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index 68ccee5e6623a..f9a4e2cd210e3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -97,13 +97,14 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
   }
 
   test("explain create table command") {
-    checkKeywordsExist(sql("explain create table temp__b as select * from src limit 2"),
+    checkKeywordsExist(sql("explain create table temp__b using hive as select * from src limit 2"),
                    "== Physical Plan ==",
                    "InsertIntoHiveTable",
                    "Limit",
                    "src")
 
-    checkKeywordsExist(sql("explain extended create table temp__b as select * from src limit 2"),
+    checkKeywordsExist(
+      sql("explain extended create table temp__b using hive as select * from src limit 2"),
       "== Parsed Logical Plan ==",
       "== Analyzed Logical Plan ==",
       "== Optimized Logical Plan ==",
@@ -133,19 +134,21 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
   }
 
   test("explain output of physical plan should contain proper codegen stage ID") {
-    checkKeywordsExist(sql(
-      """
-        |EXPLAIN SELECT t1.id AS a, t2.id AS b FROM
-        |(SELECT * FROM range(3)) t1 JOIN
-        |(SELECT * FROM range(10)) t2 ON t1.id == t2.id % 3
-      """.stripMargin),
-      "== Physical Plan ==",
-      "*(2) Project ",
-      "+- *(2) BroadcastHashJoin ",
-      "   :- BroadcastExchange ",
-      "   :  +- *(1) Range ",
-      "   +- *(2) Range "
-    )
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      checkKeywordsExist(sql(
+        """
+          |EXPLAIN SELECT t1.id AS a, t2.id AS b FROM
+          |(SELECT * FROM range(3)) t1 JOIN
+          |(SELECT * FROM range(10)) t2 ON t1.id == t2.id % 3
+        """.stripMargin),
+        "== Physical Plan ==",
+        "*(2) Project ",
+        "+- *(2) BroadcastHashJoin ",
+        "   :- BroadcastExchange ",
+        "   :  +- *(1) Range ",
+        "   +- *(2) Range "
+      )
+    }
   }
 
   test("EXPLAIN CODEGEN command") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 53798e0ac2727..5a8365017a5ba 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec
 import org.apache.spark.sql.hive._
-import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHive}
+import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -56,7 +56,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
 
   def spark: SparkSession = sparkSession
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     TestHive.setCacheTables(true)
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
@@ -67,7 +67,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       TestHive.setCacheTables(false)
       TimeZone.setDefault(originalTimeZone)
@@ -711,7 +711,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   }
 
   def isExplanation(result: DataFrame): Boolean = {
-    val explanation = result.select('plan).collect().map { case Row(plan: String) => plan }
+    val explanation = result.select("plan").collect().map { case Row(plan: String) => plan }
     explanation.head.startsWith("== Physical Plan ==")
   }
 
@@ -817,7 +817,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
 
   test("ADD JAR command 2") {
     // this is a test case from mapjoin_addjar.q
-    val testJar = HiveTestUtils.getHiveHcatalogCoreJar.toURI
+    val testJar = HiveTestJars.getHiveHcatalogCoreJar().toURI
     val testData = TestHive.getHiveFile("data/files/sample.json").toURI
     sql(s"ADD JAR $testJar")
     sql(
@@ -827,9 +827,9 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     sql("select * from src join t1 on src.key = t1.a")
     sql("DROP TABLE t1")
     assert(sql("list jars").
-      filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0)
+      filter(_.getString(0).contains(HiveTestJars.getHiveHcatalogCoreJar().getName)).count() > 0)
     assert(sql("list jar").
-      filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0)
+      filter(_.getString(0).contains(HiveTestJars.getHiveHcatalogCoreJar().getName)).count() > 0)
     val testJar2 = TestHive.getHiveFile("TestUDTF.jar").getCanonicalPath
     sql(s"ADD JAR $testJar2")
     assert(sql(s"list jar $testJar").count() == 1)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
index 25ff3544185af..f8ba7bf2c1a62 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
@@ -65,7 +65,7 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
       hiveClient.runSqlHive(s"CREATE TABLE hive_serde (c1 TIMESTAMP) STORED AS $fileFormat")
       hiveClient.runSqlHive("INSERT INTO TABLE hive_serde values('2019-04-11 15:50:00')")
       checkAnswer(spark.table("hive_serde"), Row(Timestamp.valueOf("2019-04-11 15:50:00")))
-      spark.sql("INSERT INTO TABLE hive_serde values('2019-04-12 15:50:00')")
+      spark.sql("INSERT INTO TABLE hive_serde values(TIMESTAMP('2019-04-12 15:50:00'))")
       checkAnswer(
         spark.table("hive_serde"),
         Seq(Row(Timestamp.valueOf("2019-04-11 15:50:00")),
@@ -77,7 +77,7 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
       hiveClient.runSqlHive(s"CREATE TABLE hive_serde (c1 DATE) STORED AS $fileFormat")
       hiveClient.runSqlHive("INSERT INTO TABLE hive_serde values('2019-04-11')")
       checkAnswer(spark.table("hive_serde"), Row(Date.valueOf("2019-04-11")))
-      spark.sql("INSERT INTO TABLE hive_serde values('2019-04-12')")
+      spark.sql("INSERT INTO TABLE hive_serde values(TIMESTAMP('2019-04-12'))")
       checkAnswer(
         spark.table("hive_serde"),
         Seq(Row(Date.valueOf("2019-04-11")), Row(Date.valueOf("2019-04-12"))))
@@ -119,7 +119,7 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
       hiveClient.runSqlHive(s"CREATE TABLE hive_serde (c1 BINARY) STORED AS $fileFormat")
       hiveClient.runSqlHive("INSERT INTO TABLE hive_serde values('1')")
       checkAnswer(spark.table("hive_serde"), Row("1".getBytes))
-      spark.sql("INSERT INTO TABLE hive_serde values('2')")
+      spark.sql("INSERT INTO TABLE hive_serde values(BINARY('2'))")
       checkAnswer(spark.table("hive_serde"), Seq(Row("1".getBytes), Row("2".getBytes)))
     }
   }
@@ -168,6 +168,8 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
       checkNumericTypes(fileFormat, "DECIMAL(38, 2)", 2.1D)
 
       // Date/Time Types
+      // SPARK-28885 String value is not allowed to be stored as date/timestamp type with
+      // ANSI store assignment policy.
       checkDateTimeTypes(fileFormat)
 
       // String Types
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index ed4304b9aa57b..9a1190af02fac 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -83,15 +83,18 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte
   }
 
   test("Test the default fileformat for Hive-serde tables") {
-    withSQLConf("hive.default.fileformat" -> "orc") {
-      val (desc, exists) = extractTableDesc("CREATE TABLE IF NOT EXISTS fileformat_test (id int)")
+    withSQLConf("hive.default.fileformat" -> "orc",
+      SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED.key -> "true") {
+      val (desc, exists) = extractTableDesc(
+        "CREATE TABLE IF NOT EXISTS fileformat_test (id int)")
       assert(exists)
       assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"))
       assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
       assert(desc.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
     }
 
-    withSQLConf("hive.default.fileformat" -> "parquet") {
+    withSQLConf("hive.default.fileformat" -> "parquet",
+      SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT_ENABLED.key -> "true") {
       val (desc, exists) = extractTableDesc("CREATE TABLE IF NOT EXISTS fileformat_test (id int)")
       assert(exists)
       val input = desc.storage.inputFormat
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 3f9bb8de42e09..67d7ed0841abb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -85,8 +85,8 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
       sql("create table spark_4959 (col1 string)")
       sql("""insert into table spark_4959 select "hi" from src limit 1""")
       table("spark_4959").select(
-        'col1.as("CaseSensitiveColName"),
-        'col1.as("CaseSensitiveColName2")).createOrReplaceTempView("spark_4959_2")
+        $"col1".as("CaseSensitiveColName"),
+        $"col1".as("CaseSensitiveColName2")).createOrReplaceTempView("spark_4959_2")
 
       assert(sql("select CaseSensitiveColName from spark_4959_2").head() === Row("hi"))
       assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi"))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
index b0d615c1acee9..9e33a8ee4cc5c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
@@ -29,12 +29,14 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo
 import test.org.apache.spark.sql.MyDoubleAvg
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 
-class HiveUDAFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
+class HiveUDAFSuite extends QueryTest
+  with TestHiveSingleton with SQLTestUtils with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   protected override def beforeAll(): Unit = {
@@ -63,7 +65,7 @@ class HiveUDAFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
   test("built-in Hive UDAF") {
     val df = sql("SELECT key % 2, hive_max(key) FROM t GROUP BY key % 2")
 
-    val aggs = df.queryExecution.executedPlan.collect {
+    val aggs = collect(df.queryExecution.executedPlan) {
       case agg: ObjectHashAggregateExec => agg
     }
 
@@ -80,7 +82,7 @@ class HiveUDAFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
   test("customized Hive UDAF") {
     val df = sql("SELECT key % 2, mock(value) FROM t GROUP BY key % 2")
 
-    val aggs = df.queryExecution.executedPlan.collect {
+    val aggs = collect(df.queryExecution.executedPlan) {
       case agg: ObjectHashAggregateExec => agg
     }
 
@@ -99,7 +101,7 @@ class HiveUDAFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       spark.range(100).createTempView("v")
       val df = sql("SELECT id % 2, mock2(id) FROM v GROUP BY id % 2")
 
-      val aggs = df.queryExecution.executedPlan.collect {
+      val aggs = collect(df.queryExecution.executedPlan) {
         case agg: ObjectHashAggregateExec => agg
       }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 587eab4a24810..7bca2af379934 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -32,6 +32,7 @@ import org.apache.hadoop.io.{LongWritable, Writable}
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.execution.command.FunctionsCommand
 import org.apache.spark.sql.functions.max
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
@@ -148,13 +149,6 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       sql("SELECT array(max(key), max(key)) FROM src").collect().toSeq)
   }
 
-  test("SPARK-16228 Percentile needs explicit cast to double") {
-    sql("select percentile(value, cast(0.5 as double)) from values 1,2,3 T(value)")
-    sql("select percentile_approx(value, cast(0.5 as double)) from values 1.0,2.0,3.0 T(value)")
-    sql("select percentile(value, 0.5) from values 1,2,3 T(value)")
-    sql("select percentile_approx(value, 0.5) from values 1.0,2.0,3.0 T(value)")
-  }
-
   test("Generic UDAF aggregates") {
 
     checkAnswer(sql(
@@ -563,7 +557,8 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
         checkAnswer(
           sql("SELECT testUDFToListInt(s) FROM inputTable"),
           Seq(Row(Seq(1, 2, 3))))
-        assert(sql("show functions").count() == numFunc + 1)
+        assert(sql("show functions").count() ==
+          numFunc + FunctionsCommand.virtualOperators.size + 1)
         assert(spark.catalog.listFunctions().count() == numFunc + 1)
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
index 2391106cfb253..327e4104d59a8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedFunction
 import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper, Literal}
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -38,7 +39,8 @@ class ObjectHashAggregateSuite
   extends QueryTest
   with SQLTestUtils
   with TestHiveSingleton
-  with ExpressionEvalHelper {
+  with ExpressionEvalHelper
+  with AdaptiveSparkPlanHelper {
 
   import testImplicits._
 
@@ -156,7 +158,7 @@ class ObjectHashAggregateSuite
       )
 
       checkAnswer(
-        df.groupBy($"id" % 4 as 'mod).agg(aggFunctions.head, aggFunctions.tail: _*),
+        df.groupBy($"id" % 4 as "mod").agg(aggFunctions.head, aggFunctions.tail: _*),
         data.groupBy(_.getInt(0) % 4).map { case (key, value) =>
           key -> Row.fromSeq(value.map(_.toSeq).transpose.map(_.count(_ != null): Long))
         }.toSeq.map {
@@ -394,19 +396,19 @@ class ObjectHashAggregateSuite
   }
 
   private def containsSortAggregateExec(df: DataFrame): Boolean = {
-    df.queryExecution.executedPlan.collectFirst {
+    collectFirst(df.queryExecution.executedPlan) {
       case _: SortAggregateExec => ()
     }.nonEmpty
   }
 
   private def containsObjectHashAggregateExec(df: DataFrame): Boolean = {
-    df.queryExecution.executedPlan.collectFirst {
+    collectFirst(df.queryExecution.executedPlan) {
       case _: ObjectHashAggregateExec => ()
     }.nonEmpty
   }
 
   private def containsHashAggregateExec(df: DataFrame): Boolean = {
-    df.queryExecution.executedPlan.collectFirst {
+    collectFirst(df.queryExecution.executedPlan) {
       case _: HashAggregateExec => ()
     }.nonEmpty
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
index 6b2d0c656b371..c9c36992906a8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala
@@ -65,7 +65,8 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te
           options = Map.empty)(sparkSession = spark)
 
         val logicalRelation = LogicalRelation(relation, tableMeta)
-        val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze
+        val query = Project(Seq(Symbol("i"), Symbol("p")),
+          Filter(Symbol("p") === 1, logicalRelation)).analyze
 
         val optimized = Optimize.execute(query)
         assert(optimized.missingInput.isEmpty)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
new file mode 100644
index 0000000000000..e41709841a736
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.SQLTestUtils
+
+class PruneHiveTablePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("PruneHiveTablePartitions", Once,
+        EliminateSubqueryAliases, new PruneHiveTablePartitions(spark)) :: Nil
+  }
+
+  test("SPARK-15616 statistics pruned after going throuhg PruneHiveTablePartitions") {
+    withTable("test", "temp") {
+      sql(
+        s"""
+          |CREATE TABLE test(i int)
+          |PARTITIONED BY (p int)
+          |STORED AS textfile""".stripMargin)
+      spark.range(0, 1000, 1).selectExpr("id as col")
+        .createOrReplaceTempView("temp")
+
+      for (part <- Seq(1, 2, 3, 4)) {
+        sql(
+          s"""
+            |INSERT OVERWRITE TABLE test PARTITION (p='$part')
+            |select col from temp""".stripMargin)
+      }
+      val analyzed1 = sql("select i from test where p > 0").queryExecution.analyzed
+      val analyzed2 = sql("select i from test where p = 1").queryExecution.analyzed
+      assert(Optimize.execute(analyzed1).stats.sizeInBytes / 4 ===
+        Optimize.execute(analyzed2).stats.sizeInBytes)
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
index cc592cf6ca629..985281bce3036 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala
@@ -141,6 +141,13 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter {
       Seq("2008-04-08", "11"),
       Seq("2008-04-09", "11")))
 
+  createPruningTest("Partition pruning - with filter containing non-deterministic condition",
+    "SELECT value, hr FROM srcpart1 WHERE ds = '2008-04-08' AND hr < 12 AND rand() < 1",
+    Seq("value", "hr"),
+    Seq("value", "hr"),
+    Seq(
+      Seq("2008-04-08", "11")))
+
   def createPruningTest(
       testCaseName: String,
       sql: String,
@@ -154,7 +161,7 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter {
         case p @ HiveTableScanExec(columns, relation, _) =>
           val columnNames = columns.map(_.name)
           val partValues = if (relation.isPartitioned) {
-            p.prunePartitions(p.rawPartitions).map(_.getValues)
+            p.prunedPartitions.map(_.getValues)
           } else {
             Seq.empty
           }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
index 022cb7177339d..16668f93bd4e7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
@@ -19,9 +19,23 @@ package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.execution.metric.SQLMetricsTestUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
 
 class SQLMetricsSuite extends SQLMetricsTestUtils with TestHiveSingleton {
 
+  var originalValue: String = _
+  // With AQE on/off, the metric info is different.
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    originalValue = spark.conf.get(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key)
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, "false")
+  }
+
+  override def afterAll(): Unit = {
+    spark.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, originalValue)
+    super.afterAll()
+  }
+
   test("writing data out metrics: hive") {
     testMetricsNonDynamicPartition("hive", "t1")
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 1638f6cd91808..539b464743461 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -33,16 +33,16 @@ import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, Functio
 import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, CatalogUtils, HiveTableRelation}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
-import org.apache.spark.sql.execution.command.LoadDataCommand
+import org.apache.spark.sql.execution.command.{FunctionsCommand, LoadDataCommand}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
-import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHiveSingleton}
+import org.apache.spark.sql.hive.test.{HiveTestJars, TestHiveSingleton}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.util.Utils
 
 case class Nested1(f1: Nested2)
 case class Nested2(f2: Nested3)
@@ -192,6 +192,11 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     allBuiltinFunctions.foreach { f =>
       assert(allFunctions.contains(f))
     }
+
+    FunctionsCommand.virtualOperators.foreach { f =>
+      assert(allFunctions.contains(f))
+    }
+
     withTempDatabase { db =>
       def createFunction(names: Seq[String]): Unit = {
         names.foreach { name =>
@@ -771,7 +776,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       sql("CREATE TABLE test2 (key INT, value STRING)")
       testData.write.mode(SaveMode.Append).insertInto("test2")
       testData.write.mode(SaveMode.Append).insertInto("test2")
-      sql("CREATE TABLE test AS SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key")
+      sql("CREATE TABLE test USING hive AS " +
+        "SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key")
       checkAnswer(
         table("test"),
         sql("SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key").collect().toSeq)
@@ -932,7 +938,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     read.json(ds).createOrReplaceTempView("data")
 
     withSQLConf(SQLConf.CONVERT_CTAS.key -> "false") {
-      sql("CREATE TABLE explodeTest (key bigInt)")
+      sql("CREATE TABLE explodeTest (key bigInt) USING hive")
       table("explodeTest").queryExecution.analyzed match {
         case SubqueryAlias(_, r: HiveTableRelation) => // OK
         case _ =>
@@ -1103,10 +1109,10 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   test("Call add jar in a different thread (SPARK-8306)") {
     @volatile var error: Option[Throwable] = None
     val thread = new Thread {
-      override def run() {
+      override def run(): Unit = {
         // To make sure this test works, this jar should not be loaded in another place.
         sql(
-          s"ADD JAR ${HiveTestUtils.getHiveContribJar.getCanonicalPath}")
+          s"ADD JAR ${HiveTestJars.getHiveContribJar().getCanonicalPath}")
         try {
           sql(
             """
@@ -1178,51 +1184,6 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkAnswer(sql("SELECT a.`c.b`, `b.$q`[0].`a@!.q`, `q.w`.`w.i&`[0] FROM t"), Row(1, 1, 1))
   }
 
-  test("Convert hive interval term into Literal of CalendarIntervalType") {
-    checkAnswer(sql("select interval '0 0:0:0.1' day to second"),
-      Row(CalendarInterval.fromString("interval 100 milliseconds")))
-    checkAnswer(sql("select interval '10-9' year to month"),
-      Row(CalendarInterval.fromString("interval 10 years 9 months")))
-    checkAnswer(sql("select interval '20 15:40:32.99899999' day to hour"),
-      Row(CalendarInterval.fromString("interval 2 weeks 6 days 15 hours")))
-    checkAnswer(sql("select interval '20 15:40:32.99899999' day to minute"),
-      Row(CalendarInterval.fromString("interval 2 weeks 6 days 15 hours 40 minutes")))
-    checkAnswer(sql("select interval '20 15:40:32.99899999' day to second"),
-      Row(CalendarInterval.fromString("interval 2 weeks 6 days 15 hours 40 minutes " +
-        "32 seconds 998 milliseconds 999 microseconds")))
-    checkAnswer(sql("select interval '15:40:32.99899999' hour to minute"),
-      Row(CalendarInterval.fromString("interval 15 hours 40 minutes")))
-    checkAnswer(sql("select interval '15:40.99899999' hour to second"),
-      Row(CalendarInterval.fromString("interval 15 minutes 40 seconds 998 milliseconds " +
-        "999 microseconds")))
-    checkAnswer(sql("select interval '15:40' hour to second"),
-      Row(CalendarInterval.fromString("interval 15 hours 40 minutes")))
-    checkAnswer(sql("select interval '15:40:32.99899999' hour to second"),
-      Row(CalendarInterval.fromString("interval 15 hours 40 minutes 32 seconds 998 milliseconds " +
-        "999 microseconds")))
-    checkAnswer(sql("select interval '20 40:32.99899999' minute to second"),
-      Row(CalendarInterval.fromString("interval 2 weeks 6 days 40 minutes 32 seconds " +
-        "998 milliseconds 999 microseconds")))
-    checkAnswer(sql("select interval '40:32.99899999' minute to second"),
-      Row(CalendarInterval.fromString("interval 40 minutes 32 seconds 998 milliseconds " +
-        "999 microseconds")))
-    checkAnswer(sql("select interval '40:32' minute to second"),
-      Row(CalendarInterval.fromString("interval 40 minutes 32 seconds")))
-    checkAnswer(sql("select interval '30' year"),
-      Row(CalendarInterval.fromString("interval 30 years")))
-    checkAnswer(sql("select interval '25' month"),
-      Row(CalendarInterval.fromString("interval 25 months")))
-    checkAnswer(sql("select interval '-100' day"),
-      Row(CalendarInterval.fromString("interval -14 weeks -2 days")))
-    checkAnswer(sql("select interval '40' hour"),
-      Row(CalendarInterval.fromString("interval 1 days 16 hours")))
-    checkAnswer(sql("select interval '80' minute"),
-      Row(CalendarInterval.fromString("interval 1 hour 20 minutes")))
-    checkAnswer(sql("select interval '299.889987299' second"),
-      Row(CalendarInterval.fromString(
-        "interval 4 minutes 59 seconds 889 milliseconds 987 microseconds")))
-  }
-
   test("specifying database name for a temporary view is not allowed") {
     withTempPath { dir =>
       withTempView("db.t") {
@@ -1931,7 +1892,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       }
 
       withTable("load_t") {
-        sql("CREATE TABLE load_t (a STRING)")
+        sql("CREATE TABLE load_t (a STRING) USING hive")
         sql(s"LOAD DATA LOCAL INPATH '$path/*part-r*' INTO TABLE load_t")
         checkAnswer(sql("SELECT * FROM load_t"), Seq(Row("1"), Row("2"), Row("3")))
 
@@ -1951,7 +1912,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         Files.write(s"$i", new File(dirPath, s"part-r-0000 $i"), StandardCharsets.UTF_8)
       }
       withTable("load_t") {
-        sql("CREATE TABLE load_t (a STRING)")
+        sql("CREATE TABLE load_t (a STRING) USING hive")
         sql(s"LOAD DATA LOCAL INPATH '$path/part-r-0000 1' INTO TABLE load_t")
         checkAnswer(sql("SELECT * FROM load_t"), Seq(Row("1")))
       }
@@ -1966,7 +1927,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         Files.write(s"$i", new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8)
       }
       withTable("load_t_folder_wildcard") {
-        sql("CREATE TABLE load_t (a STRING)")
+        sql("CREATE TABLE load_t (a STRING) USING hive")
         sql(s"LOAD DATA LOCAL INPATH '${
           path.substring(0, path.length - 1)
             .concat("*")
@@ -1990,7 +1951,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         Files.write(s"$i", new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8)
       }
       withTable("load_t1") {
-        sql("CREATE TABLE load_t1 (a STRING)")
+        sql("CREATE TABLE load_t1 (a STRING) USING hive")
         sql(s"LOAD DATA LOCAL INPATH '$path/part-r-0000?' INTO TABLE load_t1")
         checkAnswer(sql("SELECT * FROM load_t1"), Seq(Row("1"), Row("2"), Row("3")))
       }
@@ -2005,13 +1966,33 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         Files.write(s"$i", new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8)
       }
       withTable("load_t2") {
-        sql("CREATE TABLE load_t2 (a STRING)")
+        sql("CREATE TABLE load_t2 (a STRING) USING hive")
         sql(s"LOAD DATA LOCAL INPATH '$path/?art-r-00001' INTO TABLE load_t2")
         checkAnswer(sql("SELECT * FROM load_t2"), Seq(Row("1")))
       }
     }
   }
 
+  test("SPARK-28084 check for case insensitive property of partition column name in load command") {
+    withTempDir { dir =>
+      val path = dir.toURI.toString.stripSuffix("/")
+      val dirPath = dir.getAbsoluteFile
+      Files.append("1", new File(dirPath, "part-r-000011"), StandardCharsets.UTF_8)
+      withTable("part_table") {
+        withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+          sql(
+            """
+              |CREATE TABLE part_table (c STRING)
+              |PARTITIONED BY (d STRING)
+            """.stripMargin)
+          sql(s"LOAD DATA LOCAL INPATH '$path/part-r-000011' " +
+            "INTO TABLE part_table PARTITION(D ='1')")
+          checkAnswer(sql("SELECT * FROM part_table"), Seq(Row("1", "1")))
+        }
+      }
+    }
+  }
+
   test("SPARK-25738: defaultFs can have a port") {
     val defaultURI = new URI("hdfs://fizz.buzz.com:8020")
     val r = LoadDataCommand.makeQualified(defaultURI, new Path("/foo/bar"), new Path("/flim/flam"))
@@ -2111,7 +2092,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       withTable("t") {
         df.createTempView("tempView")
         val e = intercept[AnalysisException] {
-          sql("CREATE TABLE t AS SELECT key, get_json_object(jstring, '$.f1') FROM tempView")
+          sql("CREATE TABLE t USING hive AS " +
+            "SELECT key, get_json_object(jstring, '$.f1') FROM tempView")
         }.getMessage
         assert(e.contains(expectedMsg))
       }
@@ -2396,7 +2378,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         })
         spark
           .range(5)
-          .select(badUDF('id).as("a"))
+          .select(badUDF($"id").as("a"))
           .createOrReplaceTempView("test")
         val scriptFilePath = getTestResourcePath("data")
         val e = intercept[SparkException] {
@@ -2412,4 +2394,149 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       }
     }
   }
+
+  test("SPARK-29295: insert overwrite external partition should not have old data") {
+    Seq("true", "false").foreach { convertParquet =>
+      withTable("test") {
+        withTempDir { f =>
+          sql("CREATE EXTERNAL TABLE test(id int) PARTITIONED BY (name string) STORED AS " +
+            s"PARQUET LOCATION '${f.getAbsolutePath}'")
+
+          withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> convertParquet) {
+            sql("INSERT OVERWRITE TABLE test PARTITION(name='n1') SELECT 1")
+            sql("ALTER TABLE test DROP PARTITION(name='n1')")
+            sql("INSERT OVERWRITE TABLE test PARTITION(name='n1') SELECT 2")
+            checkAnswer(sql("SELECT id FROM test WHERE name = 'n1' ORDER BY id"),
+              Array(Row(2)))
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-29295: dynamic insert overwrite external partition should not have old data") {
+    Seq("true", "false").foreach { convertParquet =>
+      withTable("test") {
+        withTempDir { f =>
+          sql("CREATE EXTERNAL TABLE test(id int) PARTITIONED BY (p1 string, p2 string) " +
+            s"STORED AS PARQUET LOCATION '${f.getAbsolutePath}'")
+
+          withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> convertParquet,
+            "hive.exec.dynamic.partition.mode" -> "nonstrict") {
+            sql(
+              """
+                |INSERT OVERWRITE TABLE test PARTITION(p1='n1', p2)
+                |SELECT * FROM VALUES (1, 'n2'), (2, 'n3') AS t(id, p2)
+              """.stripMargin)
+            checkAnswer(sql("SELECT id FROM test WHERE p1 = 'n1' and p2 = 'n2' ORDER BY id"),
+              Array(Row(1)))
+            checkAnswer(sql("SELECT id FROM test WHERE p1 = 'n1' and p2 = 'n3' ORDER BY id"),
+              Array(Row(2)))
+
+            sql("INSERT OVERWRITE TABLE test PARTITION(p1='n1', p2) SELECT 4, 'n4'")
+            checkAnswer(sql("SELECT id FROM test WHERE p1 = 'n1' and p2 = 'n4' ORDER BY id"),
+              Array(Row(4)))
+
+            sql("ALTER TABLE test DROP PARTITION(p1='n1',p2='n2')")
+            sql("ALTER TABLE test DROP PARTITION(p1='n1',p2='n3')")
+
+            sql(
+              """
+                |INSERT OVERWRITE TABLE test PARTITION(p1='n1', p2)
+                |SELECT * FROM VALUES (5, 'n2'), (6, 'n3') AS t(id, p2)
+              """.stripMargin)
+            checkAnswer(sql("SELECT id FROM test WHERE p1 = 'n1' and p2 = 'n2' ORDER BY id"),
+              Array(Row(5)))
+            checkAnswer(sql("SELECT id FROM test WHERE p1 = 'n1' and p2 = 'n3' ORDER BY id"),
+              Array(Row(6)))
+            // Partition not overwritten should not be deleted.
+            checkAnswer(sql("SELECT id FROM test WHERE p1 = 'n1' and p2 = 'n4' ORDER BY id"),
+              Array(Row(4)))
+          }
+        }
+      }
+
+      withTable("test") {
+        withTempDir { f =>
+          sql("CREATE EXTERNAL TABLE test(id int) PARTITIONED BY (p1 string, p2 string) " +
+            s"STORED AS PARQUET LOCATION '${f.getAbsolutePath}'")
+
+          withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> convertParquet,
+            "hive.exec.dynamic.partition.mode" -> "nonstrict") {
+            // We should unescape partition value.
+            sql("INSERT OVERWRITE TABLE test PARTITION(p1='n1', p2) SELECT 1, '/'")
+            sql("ALTER TABLE test DROP PARTITION(p1='n1',p2='/')")
+            sql("INSERT OVERWRITE TABLE test PARTITION(p1='n1', p2) SELECT 2, '/'")
+            checkAnswer(sql("SELECT id FROM test WHERE p1 = 'n1' and p2 = '/' ORDER BY id"),
+              Array(Row(2)))
+          }
+        }
+      }
+    }
+  }
+
+  test("partition pruning should handle date correctly") {
+    withSQLConf(SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> "2") {
+      withTable("t") {
+        sql("CREATE TABLE t (i INT) PARTITIONED BY (j DATE)")
+        sql("INSERT INTO t PARTITION(j='1990-11-11') SELECT 1")
+        checkAnswer(sql("SELECT i, CAST(j AS STRING) FROM t"), Row(1, "1990-11-11"))
+        checkAnswer(
+          sql(
+            """
+              |SELECT i, CAST(j AS STRING)
+              |FROM t
+              |WHERE j IN (DATE'1990-11-10', DATE'1990-11-11', DATE'1990-11-12')
+              |""".stripMargin),
+          Row(1, "1990-11-11"))
+      }
+    }
+  }
+
+  test("SPARK-26560 Spark should be able to run Hive UDF using jar regardless of " +
+    "current thread context classloader") {
+    // force to use Spark classloader as other test (even in other test suites) may change the
+    // current thread's context classloader to jar classloader
+    Utils.withContextClassLoader(Utils.getSparkClassLoader) {
+      withUserDefinedFunction("udtf_count3" -> false) {
+        val sparkClassLoader = Thread.currentThread().getContextClassLoader
+
+        // This jar file should not be placed to the classpath; GenericUDTFCount3 is slightly
+        // modified version of GenericUDTFCount2 in hive/contrib, which emits the count for
+        // three times.
+        val jarPath = "src/test/noclasspath/TestUDTF-spark-26560.jar"
+        val jarURL = s"file://${System.getProperty("user.dir")}/$jarPath"
+
+        sql(
+          s"""
+             |CREATE FUNCTION udtf_count3
+             |AS 'org.apache.hadoop.hive.contrib.udtf.example.GenericUDTFCount3'
+             |USING JAR '$jarURL'
+          """.stripMargin)
+
+        assert(Thread.currentThread().getContextClassLoader eq sparkClassLoader)
+
+        // JAR will be loaded at first usage, and it will change the current thread's
+        // context classloader to jar classloader in sharedState.
+        // See SessionState.addJar for details.
+        checkAnswer(
+          sql("SELECT udtf_count3(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
+          Row(3) :: Row(3) :: Row(3) :: Nil)
+
+        assert(Thread.currentThread().getContextClassLoader ne sparkClassLoader)
+        assert(Thread.currentThread().getContextClassLoader eq
+          spark.sqlContext.sharedState.jarClassLoader)
+
+        // Roll back to the original classloader and run query again. Without this line, the test
+        // would pass, as thread's context classloader is changed to jar classloader. But thread
+        // context classloader can be changed from others as well which would fail the query; one
+        // example is spark-shell, which thread context classloader rolls back automatically. This
+        // mimics the behavior of spark-shell.
+        Thread.currentThread().setContextClassLoader(sparkClassLoader)
+        checkAnswer(
+          sql("SELECT udtf_count3(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
+          Row(3) :: Row(3) :: Row(3) :: Nil)
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala
index ed3b376f6eda1..7d01fc53a4099 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ScriptTransformationSuite.scala
@@ -17,21 +17,27 @@
 
 package org.apache.spark.sql.hive.execution
 
+import java.sql.Timestamp
+
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+import org.scalatest.Assertions._
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.{SparkException, TaskContext, TestUtils}
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.{SparkPlan, SparkPlanTest, UnaryExecNode}
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StringType
 
-class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton with
-  BeforeAndAfterEach {
+class ScriptTransformationSuite extends SparkPlanTest with SQLTestUtils with TestHiveSingleton
+  with BeforeAndAfterEach {
   import spark.implicits._
 
   private val noSerdeIOSchema = HiveScriptIOSchema(
@@ -185,6 +191,43 @@ class ScriptTransformationSuite extends SparkPlanTest with TestHiveSingleton wit
       rowsDf.select("name").collect())
     assert(uncaughtExceptionHandler.exception.isEmpty)
   }
+
+  test("SPARK-25990: TRANSFORM should handle different data types correctly") {
+    assume(TestUtils.testCommandAvailable("python"))
+    val scriptFilePath = getTestResourcePath("test_script.py")
+
+    withTempView("v") {
+      val df = Seq(
+        (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)),
+        (2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)),
+        (3, "3", 3.0, BigDecimal(3.0), new Timestamp(3))
+      ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18)
+      df.createTempView("v")
+
+      val query = sql(
+        s"""
+          |SELECT
+          |TRANSFORM(a, b, c, d, e)
+          |USING 'python $scriptFilePath' AS (a, b, c, d, e)
+          |FROM v
+        """.stripMargin)
+
+      // In Hive1.2, it does not do well on Decimal conversion. For example, in this case,
+      // it converts a decimal value's type from Decimal(38, 18) to Decimal(1, 0). So we need
+      // do extra cast here for Hive1.2. But in Hive2.3, it still keeps the original Decimal type.
+      val decimalToString: Column => Column = if (HiveUtils.isHive23) {
+        c => c.cast("string")
+      } else {
+        c => c.cast("decimal(1, 0)").cast("string")
+      }
+      checkAnswer(query, identity, df.select(
+        'a.cast("string"),
+        'b.cast("string"),
+        'c.cast("string"),
+        decimalToString('d),
+        'e.cast("string")).collect())
+    }
+  }
 }
 
 private case class ExceptionInjectingOperator(child: SparkPlan) extends UnaryExecNode {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
new file mode 100644
index 0000000000000..e6856a58b0ea9
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import java.lang.{Double => jlDouble, Integer => jlInt, Long => jlLong}
+
+import scala.collection.JavaConverters._
+import scala.util.Random
+
+import test.org.apache.spark.sql.MyDoubleAvg
+import test.org.apache.spark.sql.MyDoubleSum
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.expressions.{Aggregator}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.types._
+
+class MyDoubleAvgAggBase extends Aggregator[jlDouble, (Double, Long), jlDouble] {
+  def zero: (Double, Long) = (0.0, 0L)
+  def reduce(b: (Double, Long), a: jlDouble): (Double, Long) = {
+    if (a != null) (b._1 + a, b._2 + 1L) else b
+  }
+  def merge(b1: (Double, Long), b2: (Double, Long)): (Double, Long) =
+    (b1._1 + b2._1, b1._2 + b2._2)
+  def finish(r: (Double, Long)): jlDouble =
+    if (r._2 > 0L) 100.0 + (r._1 / r._2.toDouble) else null
+  def bufferEncoder: Encoder[(Double, Long)] =
+    Encoders.tuple(Encoders.scalaDouble, Encoders.scalaLong)
+  def outputEncoder: Encoder[jlDouble] = Encoders.DOUBLE
+}
+
+object MyDoubleAvgAgg extends MyDoubleAvgAggBase
+object MyDoubleSumAgg extends MyDoubleAvgAggBase {
+  override def finish(r: (Double, Long)): jlDouble = if (r._2 > 0L) r._1 else null
+}
+
+object LongProductSumAgg extends Aggregator[(jlLong, jlLong), Long, jlLong] {
+  def zero: Long = 0L
+  def reduce(b: Long, a: (jlLong, jlLong)): Long = {
+    if ((a._1 != null) && (a._2 != null)) b + (a._1 * a._2) else b
+  }
+  def merge(b1: Long, b2: Long): Long = b1 + b2
+  def finish(r: Long): jlLong = r
+  def bufferEncoder: Encoder[Long] = Encoders.scalaLong
+  def outputEncoder: Encoder[jlLong] = Encoders.LONG
+}
+
+@SQLUserDefinedType(udt = classOf[CountSerDeUDT])
+case class CountSerDeSQL(nSer: Int, nDeSer: Int, sum: Int)
+
+class CountSerDeUDT extends UserDefinedType[CountSerDeSQL] {
+  def userClass: Class[CountSerDeSQL] = classOf[CountSerDeSQL]
+
+  override def typeName: String = "count-ser-de"
+
+  private[spark] override def asNullable: CountSerDeUDT = this
+
+  def sqlType: DataType = StructType(
+    StructField("nSer", IntegerType, false) ::
+    StructField("nDeSer", IntegerType, false) ::
+    StructField("sum", IntegerType, false) ::
+    Nil)
+
+  def serialize(sql: CountSerDeSQL): Any = {
+    val row = new GenericInternalRow(3)
+    row.setInt(0, 1 + sql.nSer)
+    row.setInt(1, sql.nDeSer)
+    row.setInt(2, sql.sum)
+    row
+  }
+
+  def deserialize(any: Any): CountSerDeSQL = any match {
+    case row: InternalRow if (row.numFields == 3) =>
+      CountSerDeSQL(row.getInt(0), 1 + row.getInt(1), row.getInt(2))
+    case u => throw new Exception(s"failed to deserialize: $u")
+  }
+
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case _: CountSerDeUDT => true
+      case _ => false
+    }
+  }
+
+  override def hashCode(): Int = classOf[CountSerDeUDT].getName.hashCode()
+}
+
+case object CountSerDeUDT extends CountSerDeUDT
+
+object CountSerDeAgg extends Aggregator[Int, CountSerDeSQL, CountSerDeSQL] {
+  def zero: CountSerDeSQL = CountSerDeSQL(0, 0, 0)
+  def reduce(b: CountSerDeSQL, a: Int): CountSerDeSQL = b.copy(sum = b.sum + a)
+  def merge(b1: CountSerDeSQL, b2: CountSerDeSQL): CountSerDeSQL =
+    CountSerDeSQL(b1.nSer + b2.nSer, b1.nDeSer + b2.nDeSer, b1.sum + b2.sum)
+  def finish(r: CountSerDeSQL): CountSerDeSQL = r
+  def bufferEncoder: Encoder[CountSerDeSQL] = ExpressionEncoder[CountSerDeSQL]()
+  def outputEncoder: Encoder[CountSerDeSQL] = ExpressionEncoder[CountSerDeSQL]()
+}
+
+abstract class UDAQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
+  import testImplicits._
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    val data1 = Seq[(Integer, Integer)](
+      (1, 10),
+      (null, -60),
+      (1, 20),
+      (1, 30),
+      (2, 0),
+      (null, -10),
+      (2, -1),
+      (2, null),
+      (2, null),
+      (null, 100),
+      (3, null),
+      (null, null),
+      (3, null)).toDF("key", "value")
+    data1.write.saveAsTable("agg1")
+
+    val data2 = Seq[(Integer, Integer, Integer)](
+      (1, 10, -10),
+      (null, -60, 60),
+      (1, 30, -30),
+      (1, 30, 30),
+      (2, 1, 1),
+      (null, -10, 10),
+      (2, -1, null),
+      (2, 1, 1),
+      (2, null, 1),
+      (null, 100, -10),
+      (3, null, 3),
+      (null, null, null),
+      (3, null, null)).toDF("key", "value1", "value2")
+    data2.write.saveAsTable("agg2")
+
+    val data3 = Seq[(Seq[Integer], Integer, Integer)](
+      (Seq[Integer](1, 1), 10, -10),
+      (Seq[Integer](null), -60, 60),
+      (Seq[Integer](1, 1), 30, -30),
+      (Seq[Integer](1), 30, 30),
+      (Seq[Integer](2), 1, 1),
+      (null, -10, 10),
+      (Seq[Integer](2, 3), -1, null),
+      (Seq[Integer](2, 3), 1, 1),
+      (Seq[Integer](2, 3, 4), null, 1),
+      (Seq[Integer](null), 100, -10),
+      (Seq[Integer](3), null, 3),
+      (null, null, null),
+      (Seq[Integer](3), null, null)).toDF("key", "value1", "value2")
+    data3.write.saveAsTable("agg3")
+
+    val data4 = Seq[Boolean](true, false, true).toDF("boolvalues")
+    data4.write.saveAsTable("agg4")
+
+    val emptyDF = spark.createDataFrame(
+      sparkContext.emptyRDD[Row],
+      StructType(StructField("key", StringType) :: StructField("value", IntegerType) :: Nil))
+    emptyDF.createOrReplaceTempView("emptyTable")
+
+    // Register UDAs
+    spark.udf.register("mydoublesum", udaf(MyDoubleSumAgg))
+    spark.udf.register("mydoubleavg", udaf(MyDoubleAvgAgg))
+    spark.udf.register("longProductSum", udaf(LongProductSumAgg))
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      spark.sql("DROP TABLE IF EXISTS agg1")
+      spark.sql("DROP TABLE IF EXISTS agg2")
+      spark.sql("DROP TABLE IF EXISTS agg3")
+      spark.sql("DROP TABLE IF EXISTS agg4")
+      spark.catalog.dropTempView("emptyTable")
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  test("aggregators") {
+    checkAnswer(
+      spark.sql(
+        """
+          |SELECT
+          |  key,
+          |  mydoublesum(value + 1.5 * key),
+          |  mydoubleavg(value),
+          |  avg(value - key),
+          |  mydoublesum(value - 1.5 * key),
+          |  avg(value)
+          |FROM agg1
+          |GROUP BY key
+        """.stripMargin),
+      Row(1, 64.5, 120.0, 19.0, 55.5, 20.0) ::
+        Row(2, 5.0, 99.5, -2.5, -7.0, -0.5) ::
+        Row(3, null, null, null, null, null) ::
+        Row(null, null, 110.0, null, null, 10.0) :: Nil)
+  }
+
+  test("non-deterministic children expressions of aggregator") {
+    val e = intercept[AnalysisException] {
+      spark.sql(
+        """
+          |SELECT mydoublesum(value + 1.5 * key + rand())
+          |FROM agg1
+          |GROUP BY key
+        """.stripMargin)
+    }.getMessage
+    assert(Seq("nondeterministic expression",
+      "should not appear in the arguments of an aggregate function").forall(e.contains))
+  }
+
+  test("interpreted aggregate function") {
+    checkAnswer(
+      spark.sql(
+        """
+          |SELECT mydoublesum(value), key
+          |FROM agg1
+          |GROUP BY key
+        """.stripMargin),
+      Row(60.0, 1) :: Row(-1.0, 2) :: Row(null, 3) :: Row(30.0, null) :: Nil)
+
+    checkAnswer(
+      spark.sql(
+        """
+          |SELECT mydoublesum(value) FROM agg1
+        """.stripMargin),
+      Row(89.0) :: Nil)
+
+    checkAnswer(
+      spark.sql(
+        """
+          |SELECT mydoublesum(null)
+        """.stripMargin),
+      Row(null) :: Nil)
+  }
+
+  test("interpreted and expression-based aggregation functions") {
+    checkAnswer(
+      spark.sql(
+        """
+          |SELECT mydoublesum(value), key, avg(value)
+          |FROM agg1
+          |GROUP BY key
+        """.stripMargin),
+      Row(60.0, 1, 20.0) ::
+        Row(-1.0, 2, -0.5) ::
+        Row(null, 3, null) ::
+        Row(30.0, null, 10.0) :: Nil)
+
+    checkAnswer(
+      spark.sql(
+        """
+          |SELECT
+          |  mydoublesum(value + 1.5 * key),
+          |  avg(value - key),
+          |  key,
+          |  mydoublesum(value - 1.5 * key),
+          |  avg(value)
+          |FROM agg1
+          |GROUP BY key
+        """.stripMargin),
+      Row(64.5, 19.0, 1, 55.5, 20.0) ::
+        Row(5.0, -2.5, 2, -7.0, -0.5) ::
+        Row(null, null, 3, null, null) ::
+        Row(null, null, null, null, 10.0) :: Nil)
+  }
+
+  test("single distinct column set") {
+    checkAnswer(
+      spark.sql(
+        """
+          |SELECT
+          |  mydoubleavg(distinct value1),
+          |  avg(value1),
+          |  avg(value2),
+          |  key,
+          |  mydoubleavg(value1 - 1),
+          |  mydoubleavg(distinct value1) * 0.1,
+          |  avg(value1 + value2)
+          |FROM agg2
+          |GROUP BY key
+        """.stripMargin),
+      Row(120.0, 70.0/3.0, -10.0/3.0, 1, 67.0/3.0 + 100.0, 12.0, 20.0) ::
+        Row(100.0, 1.0/3.0, 1.0, 2, -2.0/3.0 + 100.0, 10.0, 2.0) ::
+        Row(null, null, 3.0, 3, null, null, null) ::
+        Row(110.0, 10.0, 20.0, null, 109.0, 11.0, 30.0) :: Nil)
+
+    checkAnswer(
+      spark.sql(
+        """
+          |SELECT
+          |  key,
+          |  mydoubleavg(distinct value1),
+          |  mydoublesum(value2),
+          |  mydoublesum(distinct value1),
+          |  mydoubleavg(distinct value1),
+          |  mydoubleavg(value1)
+          |FROM agg2
+          |GROUP BY key
+        """.stripMargin),
+      Row(1, 120.0, -10.0, 40.0, 120.0, 70.0/3.0 + 100.0) ::
+        Row(2, 100.0, 3.0, 0.0, 100.0, 1.0/3.0 + 100.0) ::
+        Row(3, null, 3.0, null, null, null) ::
+        Row(null, 110.0, 60.0, 30.0, 110.0, 110.0) :: Nil)
+  }
+
+  test("multiple distinct multiple columns sets") {
+    checkAnswer(
+      spark.sql(
+        """
+          |SELECT
+          |  key,
+          |  count(distinct value1),
+          |  sum(distinct value1),
+          |  count(distinct value2),
+          |  sum(distinct value2),
+          |  count(distinct value1, value2),
+          |  longProductSum(distinct value1, value2),
+          |  count(value1),
+          |  sum(value1),
+          |  count(value2),
+          |  sum(value2),
+          |  longProductSum(value1, value2),
+          |  count(*),
+          |  count(1)
+          |FROM agg2
+          |GROUP BY key
+        """.stripMargin),
+      Row(null, 3, 30, 3, 60, 3, -4700, 3, 30, 3, 60, -4700, 4, 4) ::
+        Row(1, 2, 40, 3, -10, 3, -100, 3, 70, 3, -10, -100, 3, 3) ::
+        Row(2, 2, 0, 1, 1, 1, 1, 3, 1, 3, 3, 2, 4, 4) ::
+        Row(3, 0, null, 1, 3, 0, 0, 0, null, 1, 3, 0, 2, 2) :: Nil)
+  }
+
+  test("verify aggregator ser/de behavior") {
+    val data = sparkContext.parallelize((1 to 100).toSeq, 3).toDF("value1")
+    val agg = udaf(CountSerDeAgg)
+    checkAnswer(
+      data.agg(agg($"value1")),
+      Row(CountSerDeSQL(4, 4, 5050)) :: Nil)
+  }
+
+  test("verify type casting failure") {
+    assertThrows[org.apache.spark.sql.AnalysisException] {
+      spark.sql(
+        """
+          |SELECT mydoublesum(boolvalues) FROM agg4
+        """.stripMargin)
+    }
+  }
+}
+
+class HashUDAQuerySuite extends UDAQuerySuite
+
+class HashUDAQueryWithControlledFallbackSuite extends UDAQuerySuite {
+
+  override protected def checkAnswer(actual: => DataFrame, expectedAnswer: Seq[Row]): Unit = {
+    super.checkAnswer(actual, expectedAnswer)
+    Seq("true", "false").foreach { enableTwoLevelMaps =>
+      withSQLConf("spark.sql.codegen.aggregate.map.twolevel.enabled" ->
+        enableTwoLevelMaps) {
+        (1 to 3).foreach { fallbackStartsAt =>
+          withSQLConf("spark.sql.TungstenAggregate.testFallbackStartsAt" ->
+            s"${(fallbackStartsAt - 1).toString}, ${fallbackStartsAt.toString}") {
+            QueryTest.getErrorMessageInCheckAnswer(actual, expectedAnswer) match {
+              case Some(errorMessage) =>
+                val newErrorMessage =
+                  s"""
+                     |The following aggregation query failed when using HashAggregate with
+                     |controlled fallback (it falls back to bytes to bytes map once it has processed
+                     |${fallbackStartsAt - 1} input rows and to sort-based aggregation once it has
+                     |processed $fallbackStartsAt input rows). The query is ${actual.queryExecution}
+                     |
+                    |$errorMessage
+                  """.stripMargin
+
+                fail(newErrorMessage)
+              case None => // Success
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Override it to make sure we call the actually overridden checkAnswer.
+  override protected def checkAnswer(df: => DataFrame, expectedAnswer: Row): Unit = {
+    checkAnswer(df, Seq(expectedAnswer))
+  }
+
+  // Override it to make sure we call the actually overridden checkAnswer.
+  override protected def checkAnswer(df: => DataFrame, expectedAnswer: DataFrame): Unit = {
+    checkAnswer(df, expectedAnswer.collect())
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
index 3f9485dd018b1..15712a18ce751 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
@@ -41,7 +41,7 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         |  p_size INT,
         |  p_container STRING,
         |  p_retailprice DOUBLE,
-        |  p_comment STRING)
+        |  p_comment STRING) USING hive
       """.stripMargin)
     val testData1 = TestHive.getHiveFile("data/files/part_tiny.txt").toURI
     sql(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
index b5e50915c7c89..5fc41067f661d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
@@ -124,154 +124,154 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
 
   test("filter pushdown - integer") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - long") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - float") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - double") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - string") {
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === "1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < "2", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= "4", PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal("1") === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal("1") <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal("2") > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal("3") < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("1") >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal("4") <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - boolean") {
     withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === true, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < true, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= false, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(false) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(false) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(false) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(true) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === true, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < true, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= false, PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(false) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(false) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(false) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(true) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(true) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
   test("filter pushdown - decimal") {
     withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
 
-      checkFilterPredicate('_1 === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate($"_1" === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
 
-      checkFilterPredicate('_1 < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
 
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) === '_1, PredicateLeaf.Operator.EQUALS)
+        Literal(BigDecimal.valueOf(1)) === $"_1", PredicateLeaf.Operator.EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+        Literal(BigDecimal.valueOf(1)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(2)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+        Literal(BigDecimal.valueOf(2)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(3)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+        Literal(BigDecimal.valueOf(3)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+        Literal(BigDecimal.valueOf(1)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
       checkFilterPredicate(
-        Literal(BigDecimal.valueOf(4)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+        Literal(BigDecimal.valueOf(4)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
@@ -282,22 +282,23 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
       new Timestamp(milliseconds)
     }
     withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
-      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate('_1 === timestamps(0), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate('_1 <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate('_1 < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate('_1 > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate('_1 >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(timestamps(0)) === '_1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(timestamps(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(timestamps(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(timestamps(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(timestamps(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(timestamps(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(timestamps(0)) <=> $"_1",
+        PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(timestamps(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
     }
   }
 
@@ -309,30 +310,30 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
       // to produce string expression and then compare it to given string expression below.
       // This might have to be changed after Hive version is upgraded.
       checkFilterPredicateWithDiffHiveVersion(
-        '_1.isNotNull,
+        $"_1".isNotNull,
         """leaf-0 = (IS_NULL _1)
           |expr = (not leaf-0)""".stripMargin.trim
       )
       checkFilterPredicateWithDiffHiveVersion(
-        '_1 =!= 1,
+        $"_1" =!= 1,
         """leaf-0 = (IS_NULL _1)
           |leaf-1 = (EQUALS _1 1)
           |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
       )
       checkFilterPredicateWithDiffHiveVersion(
-        !('_1 < 4),
+        !($"_1" < 4),
         """leaf-0 = (IS_NULL _1)
           |leaf-1 = (LESS_THAN _1 4)
           |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
       )
       checkFilterPredicateWithDiffHiveVersion(
-        '_1 < 2 || '_1 > 3,
+        $"_1" < 2 || $"_1" > 3,
         """leaf-0 = (LESS_THAN _1 2)
           |leaf-1 = (LESS_THAN_EQUALS _1 3)
           |expr = (or leaf-0 (not leaf-1))""".stripMargin.trim
       )
       checkFilterPredicateWithDiffHiveVersion(
-        '_1 < 2 && '_1 > 3,
+        $"_1" < 2 && $"_1" > 3,
         """leaf-0 = (IS_NULL _1)
           |leaf-1 = (LESS_THAN _1 2)
           |leaf-2 = (LESS_THAN_EQUALS _1 3)
@@ -347,22 +348,22 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
     }
     // ArrayType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
-      checkNoFilterPredicate('_1.isNull)
+      checkNoFilterPredicate($"_1".isNull)
     }
     // BinaryType
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
-      checkNoFilterPredicate('_1 <=> 1.b)
+      checkNoFilterPredicate($"_1" <=> 1.b)
     }
     // DateType
     if (!HiveUtils.isHive23) {
       val stringDate = "2015-01-01"
       withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
-        checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
+        checkNoFilterPredicate($"_1" === Date.valueOf(stringDate))
       }
     }
     // MapType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
-      checkNoFilterPredicate('_1.isNotNull)
+      checkNoFilterPredicate($"_1".isNotNull)
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index 00333397e1fbb..990d9425fb7fc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -210,7 +210,10 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
     }
   }
 
-  test("SPARK-23340 Empty float/double array columns raise EOFException") {
+  // SPARK-28885 String value is not allowed to be stored as numeric type with
+  // ANSI store assignment policy.
+  // TODO: re-enable the test case when SPARK-29462 is fixed.
+  ignore("SPARK-23340 Empty float/double array columns raise EOFException") {
     withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
       withTable("spark_23340") {
         sql("CREATE TABLE spark_23340(a array<float>, b array<double>) STORED AS ORC")
@@ -271,8 +274,8 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
 
             val orcPartitionedTable = TableIdentifier("dummy_orc_partitioned", Some("default"))
             if (conversion == "true") {
-              // if converted, it's cached as a datasource table.
-              checkCached(orcPartitionedTable)
+              // if converted, we refresh the cached relation.
+              assert(getCachedDataSourceTable(orcPartitionedTable) === null)
             } else {
               // otherwise, not cached.
               assert(getCachedDataSourceTable(orcPartitionedTable) === null)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
index 0ea941c8e0d8e..f3e712d6c0a4a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -170,4 +170,154 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
   test("SPARK-11412 read and merge orc schemas in parallel") {
     testMergeSchemasInParallel(OrcFileOperator.readOrcSchemasInParallel)
   }
+
+  test("SPARK-25993 CREATE EXTERNAL TABLE with subdirectories") {
+    Seq(true, false).foreach { convertMetastore =>
+      withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") {
+        withTempDir { dir =>
+          withTable("orc_tbl1", "orc_tbl2", "orc_tbl3") {
+            val orcTblStatement1 =
+              s"""
+                 |CREATE EXTERNAL TABLE orc_tbl1(
+                 |  c1 int,
+                 |  c2 int,
+                 |  c3 string)
+                 |STORED AS orc
+                 |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+            sql(orcTblStatement1)
+
+            val orcTblInsertL1 =
+              s"INSERT INTO TABLE orc_tbl1 VALUES (1, 1, 'orc1'), (2, 2, 'orc2')".stripMargin
+            sql(orcTblInsertL1)
+
+            val orcTblStatement2 =
+            s"""
+               |CREATE EXTERNAL TABLE orc_tbl2(
+               |  c1 int,
+               |  c2 int,
+               |  c3 string)
+               |STORED AS orc
+               |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+            sql(orcTblStatement2)
+
+            val orcTblInsertL2 =
+              s"INSERT INTO TABLE orc_tbl2 VALUES (3, 3, 'orc3'), (4, 4, 'orc4')".stripMargin
+            sql(orcTblInsertL2)
+
+            val orcTblStatement3 =
+            s"""
+               |CREATE EXTERNAL TABLE orc_tbl3(
+               |  c1 int,
+               |  c2 int,
+               |  c3 string)
+               |STORED AS orc
+               |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/l3/"}'""".stripMargin
+            sql(orcTblStatement3)
+
+            val orcTblInsertL3 =
+              s"INSERT INTO TABLE orc_tbl3 VALUES (5, 5, 'orc5'), (6, 6, 'orc6')".stripMargin
+            sql(orcTblInsertL3)
+
+            withTable("tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+              val topDirStatement =
+                s"""
+                   |CREATE EXTERNAL TABLE tbl1(
+                   |  c1 int,
+                   |  c2 int,
+                   |  c3 string)
+                   |STORED AS orc
+                   |LOCATION '${s"${dir.getCanonicalPath}"}'""".stripMargin
+              sql(topDirStatement)
+              val topDirSqlStatement = s"SELECT * FROM tbl1"
+              if (convertMetastore) {
+                checkAnswer(sql(topDirSqlStatement), Nil)
+              } else {
+                checkAnswer(sql(topDirSqlStatement), (1 to 6).map(i => Row(i, i, s"orc$i")))
+              }
+
+              val l1DirStatement =
+                s"""
+                   |CREATE EXTERNAL TABLE tbl2(
+                   |  c1 int,
+                   |  c2 int,
+                   |  c3 string)
+                   |STORED AS orc
+                   |LOCATION '${s"${dir.getCanonicalPath}/l1/"}'""".stripMargin
+              sql(l1DirStatement)
+              val l1DirSqlStatement = s"SELECT * FROM tbl2"
+              if (convertMetastore) {
+                checkAnswer(sql(l1DirSqlStatement), (1 to 2).map(i => Row(i, i, s"orc$i")))
+              } else {
+                checkAnswer(sql(l1DirSqlStatement), (1 to 6).map(i => Row(i, i, s"orc$i")))
+              }
+
+              val l2DirStatement =
+                s"""
+                   |CREATE EXTERNAL TABLE tbl3(
+                   |  c1 int,
+                   |  c2 int,
+                   |  c3 string)
+                   |STORED AS orc
+                   |LOCATION '${s"${dir.getCanonicalPath}/l1/l2/"}'""".stripMargin
+              sql(l2DirStatement)
+              val l2DirSqlStatement = s"SELECT * FROM tbl3"
+              if (convertMetastore) {
+                checkAnswer(sql(l2DirSqlStatement), (3 to 4).map(i => Row(i, i, s"orc$i")))
+              } else {
+                checkAnswer(sql(l2DirSqlStatement), (3 to 6).map(i => Row(i, i, s"orc$i")))
+              }
+
+              val wildcardTopDirStatement =
+                s"""
+                   |CREATE EXTERNAL TABLE tbl4(
+                   |  c1 int,
+                   |  c2 int,
+                   |  c3 string)
+                   |STORED AS orc
+                   |LOCATION '${new File(s"${dir}/*").toURI}'""".stripMargin
+              sql(wildcardTopDirStatement)
+              val wildcardTopDirSqlStatement = s"SELECT * FROM tbl4"
+              if (convertMetastore) {
+                checkAnswer(sql(wildcardTopDirSqlStatement), (1 to 2).map(i => Row(i, i, s"orc$i")))
+              } else {
+                checkAnswer(sql(wildcardTopDirSqlStatement), Nil)
+              }
+
+              val wildcardL1DirStatement =
+                s"""
+                   |CREATE EXTERNAL TABLE tbl5(
+                   |  c1 int,
+                   |  c2 int,
+                   |  c3 string)
+                   |STORED AS orc
+                   |LOCATION '${new File(s"${dir}/l1/*").toURI}'""".stripMargin
+              sql(wildcardL1DirStatement)
+              val wildcardL1DirSqlStatement = s"SELECT * FROM tbl5"
+              if (convertMetastore) {
+                checkAnswer(sql(wildcardL1DirSqlStatement), (1 to 4).map(i => Row(i, i, s"orc$i")))
+              } else {
+                checkAnswer(sql(wildcardL1DirSqlStatement), Nil)
+              }
+
+              val wildcardL2Statement =
+                s"""
+                   |CREATE EXTERNAL TABLE tbl6(
+                   |  c1 int,
+                   |  c2 int,
+                   |  c3 string)
+                   |STORED AS orc
+                   |LOCATION '${new File(s"${dir}/l1/l2/*").toURI}'""".stripMargin
+              sql(wildcardL2Statement)
+              val wildcardL2SqlStatement = s"SELECT * FROM tbl6"
+              if (convertMetastore) {
+                checkAnswer(sql(wildcardL2SqlStatement), (3 to 6).map(i => Row(i, i, s"orc$i")))
+              } else {
+                checkAnswer(sql(wildcardL2SqlStatement), Nil)
+              }
+            }
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
index c03ae144a1595..a26412c5163ec 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
@@ -22,9 +22,9 @@ import java.io.File
 import scala.util.Random
 
 import org.apache.spark.SparkConf
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{DataFrame, SparkSession}
-import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -43,18 +43,23 @@ import org.apache.spark.sql.types._
  * This is in `sql/hive` module in order to compare `sql/core` and `sql/hive` ORC data sources.
  */
 // scalastyle:off line.size.limit
-object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
-  val conf = new SparkConf()
-  conf.set("orc.compression", "snappy")
+object OrcReadBenchmark extends SqlBasedBenchmark {
 
-  private val spark = SparkSession.builder()
-    .master("local[1]")
-    .appName("OrcReadBenchmark")
-    .config(conf)
-    .getOrCreate()
+  override def getSparkSession: SparkSession = {
+    val conf = new SparkConf()
+    conf.set("orc.compression", "snappy")
 
-  // Set default configs. Individual cases will change them if necessary.
-  spark.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
+    val sparkSession = SparkSession.builder()
+      .master("local[1]")
+      .appName("OrcReadBenchmark")
+      .config(conf)
+      .getOrCreate()
+
+    // Set default configs. Individual cases will change them if necessary.
+    sparkSession.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
+
+    sparkSession
+  }
 
   def withTempTable(tableNames: String*)(f: => Unit): Unit = {
     try f finally tableNames.foreach(spark.catalog.dropTempView)
@@ -88,16 +93,16 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
 
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+            spark.sql("SELECT sum(id) FROM nativeOrcTable").noop()
           }
         }
 
         benchmark.addCase("Native ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+          spark.sql("SELECT sum(id) FROM nativeOrcTable").noop()
         }
 
         benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(id) FROM hiveOrcTable").collect()
+          spark.sql("SELECT sum(id) FROM hiveOrcTable").noop()
         }
 
         benchmark.run()
@@ -119,16 +124,16 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
 
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").collect()
+            spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").noop()
           }
         }
 
         benchmark.addCase("Native ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").collect()
+          spark.sql("SELECT sum(c1), sum(length(c2)) FROM nativeOrcTable").noop()
         }
 
         benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").collect()
+          spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").noop()
         }
 
         benchmark.run()
@@ -148,44 +153,44 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
 
         benchmark.addCase("Data column - Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+            spark.sql("SELECT sum(id) FROM nativeOrcTable").noop()
           }
         }
 
         benchmark.addCase("Data column - Native ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(id) FROM nativeOrcTable").collect()
+          spark.sql("SELECT sum(id) FROM nativeOrcTable").noop()
         }
 
         benchmark.addCase("Data column - Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(id) FROM hiveOrcTable").collect()
+          spark.sql("SELECT sum(id) FROM hiveOrcTable").noop()
         }
 
         benchmark.addCase("Partition column - Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(p) FROM nativeOrcTable").collect()
+            spark.sql("SELECT sum(p) FROM nativeOrcTable").noop()
           }
         }
 
         benchmark.addCase("Partition column - Native ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(p) FROM nativeOrcTable").collect()
+          spark.sql("SELECT sum(p) FROM nativeOrcTable").noop()
         }
 
         benchmark.addCase("Partition column - Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(p) FROM hiveOrcTable").collect()
+          spark.sql("SELECT sum(p) FROM hiveOrcTable").noop()
         }
 
         benchmark.addCase("Both columns - Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").collect()
+            spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").noop()
           }
         }
 
         benchmark.addCase("Both columns - Native ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").collect()
+          spark.sql("SELECT sum(p), sum(id) FROM nativeOrcTable").noop()
         }
 
         benchmark.addCase("Both columns - Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").collect()
+          spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").noop()
         }
 
         benchmark.run()
@@ -204,16 +209,16 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
 
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").collect()
+            spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").noop()
           }
         }
 
         benchmark.addCase("Native ORC Vectorized") { _ =>
-          spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").collect()
+          spark.sql("SELECT sum(length(c1)) FROM nativeOrcTable").noop()
         }
 
         benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").collect()
+          spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").noop()
         }
 
         benchmark.run()
@@ -239,18 +244,18 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
             spark.sql("SELECT SUM(LENGTH(c2)) FROM nativeOrcTable " +
-              "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+              "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop()
           }
         }
 
         benchmark.addCase("Native ORC Vectorized") { _ =>
           spark.sql("SELECT SUM(LENGTH(c2)) FROM nativeOrcTable " +
-            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop()
         }
 
         benchmark.addCase("Hive built-in ORC") { _ =>
           spark.sql("SELECT SUM(LENGTH(c2)) FROM hiveOrcTable " +
-            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect()
+            "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").noop()
         }
 
         benchmark.run()
@@ -273,16 +278,16 @@ object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
 
         benchmark.addCase("Native ORC MR") { _ =>
           withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
-            spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").collect()
+            spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").noop()
           }
         }
 
         benchmark.addCase("Native ORC Vectorized") { _ =>
-          spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").collect()
+          spark.sql(s"SELECT sum(c$middle) FROM nativeOrcTable").noop()
         }
 
         benchmark.addCase("Hive built-in ORC") { _ =>
-          spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").collect()
+          spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").noop()
         }
 
         benchmark.run()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/security/HiveHadoopDelegationTokenManagerSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/security/HiveHadoopDelegationTokenManagerSuite.scala
index ce40cf51746b2..97eab4f3f4f77 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/security/HiveHadoopDelegationTokenManagerSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/security/HiveHadoopDelegationTokenManagerSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.security
 
 import org.apache.commons.io.IOUtils
 import org.apache.hadoop.conf.Configuration
+import org.scalatest.Assertions._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index d68a47053f18c..222244a04f5f5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -328,10 +328,10 @@ private[hive] class TestHiveSparkSession(
     @transient
     val hiveQTestUtilTables: Seq[TestTable] = Seq(
       TestTable("src",
-        "CREATE TABLE src (key INT, value STRING)".cmd,
+        "CREATE TABLE src (key INT, value STRING) STORED AS TEXTFILE".cmd,
         s"LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}' INTO TABLE src".cmd),
       TestTable("src1",
-        "CREATE TABLE src1 (key INT, value STRING)".cmd,
+        "CREATE TABLE src1 (key INT, value STRING) STORED AS TEXTFILE".cmd,
         s"LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv3.txt")}' INTO TABLE src1".cmd),
       TestTable("srcpart", () => {
         "CREATE TABLE srcpart (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING)"
@@ -489,7 +489,7 @@ private[hive] class TestHiveSparkSession(
 
   def getLoadedTables: collection.mutable.HashSet[String] = sharedState.loadedTables
 
-  def loadTestTable(name: String) {
+  def loadTestTable(name: String): Unit = {
     if (!sharedState.loadedTables.contains(name)) {
       // Marks the table as loaded first to prevent infinite mutually recursive table loading.
       sharedState.loadedTables += name
@@ -501,7 +501,7 @@ private[hive] class TestHiveSparkSession(
       // has already set the execution id.
       if (sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) == null) {
         // We don't actually have a `QueryExecution` here, use a fake one instead.
-        SQLExecution.withNewExecutionId(this, new QueryExecution(this, OneRowRelation())) {
+        SQLExecution.withNewExecutionId(new QueryExecution(this, OneRowRelation())) {
           createCmds.foreach(_())
         }
       } else {
@@ -523,7 +523,7 @@ private[hive] class TestHiveSparkSession(
   /**
    * Resets the test instance by deleting any table, view, temp view, and UDF that have been created
    */
-  def reset() {
+  def reset(): Unit = {
     try {
       // HACK: Hive is too noisy by default.
       org.apache.log4j.LogManager.getCurrentLoggers.asScala.foreach { log =>
@@ -647,3 +647,25 @@ private[sql] class TestHiveSessionStateBuilder(
 
   override protected def newBuilder: NewBuilder = new TestHiveSessionStateBuilder(_, _)
 }
+
+private[hive] object HiveTestJars {
+  private val repository = SQLConf.ADDITIONAL_REMOTE_REPOSITORIES.defaultValueString.split(",")(0)
+  private val hiveTestJarsDir = Utils.createTempDir()
+
+  def getHiveContribJar(version: String = HiveUtils.builtinHiveVersion): File =
+    getJarFromUrl(s"${repository}org/apache/hive/hive-contrib/" +
+      s"$version/hive-contrib-$version.jar")
+
+  def getHiveHcatalogCoreJar(version: String = HiveUtils.builtinHiveVersion): File =
+    getJarFromUrl(s"${repository}org/apache/hive/hcatalog/hive-hcatalog-core/" +
+      s"$version/hive-hcatalog-core-$version.jar")
+
+  private def getJarFromUrl(urlString: String): File = {
+    val fileName = urlString.split("/").last
+    val targetFile = new File(hiveTestJarsDir, fileName)
+    if (!targetFile.exists()) {
+      Utils.doFetchFile(urlString, hiveTestJarsDir, fileName, new SparkConf, null, null)
+    }
+    targetFile
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index 5db83c698ff15..4ada5077aec7f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -73,22 +73,22 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
 
     // Simple filtering and partition pruning
     checkAnswer(
-      df.filter('a > 1 && 'p1 === 2),
+      df.filter($"a" > 1 && $"p1" === 2),
       for (i <- 2 to 3; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", 2, p2))
 
     // Simple projection and filtering
     checkAnswer(
-      df.filter('a > 1).select('b, 'a + 1),
+      df.filter($"a" > 1).select($"b", $"a" + 1),
       for (i <- 2 to 3; _ <- 1 to 2; _ <- Seq("foo", "bar")) yield Row(s"val_$i", i + 1))
 
     // Simple projection and partition pruning
     checkAnswer(
-      df.filter('a > 1 && 'p1 < 2).select('b, 'p1),
+      df.filter($"a" > 1 && $"p1" < 2).select($"b", $"p1"),
       for (i <- 2 to 3; _ <- Seq("foo", "bar")) yield Row(s"val_$i", 1))
 
     // Project many copies of columns with different types (reproduction for SPARK-7858)
     checkAnswer(
-      df.filter('a > 1 && 'p1 < 2).select('b, 'b, 'b, 'b, 'p1, 'p1, 'p1, 'p1),
+      df.filter($"a" > 1 && $"p1" < 2).select($"b", $"b", $"b", $"b", $"p1", $"p1", $"p1", $"p1"),
       for (i <- 2 to 3; _ <- Seq("foo", "bar"))
         yield Row(s"val_$i", s"val_$i", s"val_$i", s"val_$i", 1, 1, 1, 1))
 
@@ -384,12 +384,12 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
 
   test("saveAsTable()/load() - partitioned table - boolean type") {
     spark.range(2)
-      .select('id, ('id % 2 === 0).as("b"))
+      .select($"id", ($"id" % 2 === 0).as("b"))
       .write.partitionBy("b").saveAsTable("t")
 
     withTable("t") {
       checkAnswer(
-        spark.table("t").sort('id),
+        spark.table("t").sort($"id"),
         Row(0, true) :: Row(1, false) :: Nil
       )
     }
@@ -731,12 +731,12 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
       } yield (i, s"val_$i", 1.0d, p2, 123, 123.123f)).toDF("a", "b", "p1", "p2", "p3", "f")
 
       val input = df.select(
-        'a,
-        'b,
-        'p1.cast(StringType).as('ps1),
-        'p2,
-        'p3.cast(FloatType).as('pf1),
-        'f)
+        $"a",
+        $"b",
+        $"p1".cast(StringType).as("ps1"),
+        $"p2",
+        $"p3".cast(FloatType).as("pf1"),
+        $"f")
 
       withTempView("t") {
         input
@@ -770,7 +770,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
       .saveAsTable("t")
 
     withTable("t") {
-      checkAnswer(spark.table("t").select('b, 'c, 'a), df.select('b, 'c, 'a).collect())
+      checkAnswer(spark.table("t").select("b", "c", "a"), df.select("b", "c", "a").collect())
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala
index 6ebc1d145848c..2e6b86206a631 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala
@@ -152,8 +152,8 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
-      spark.range(2).select('id as 'a, 'id as 'b).write.partitionBy("b").parquet(path)
-      val df = spark.read.parquet(path).filter('a === 0).select('b)
+      spark.range(2).select($"id" as "a", $"id" as "b").write.partitionBy("b").parquet(path)
+      val df = spark.read.parquet(path).filter($"a" === 0).select("b")
       val physicalPlan = df.queryExecution.sparkPlan
 
       assert(physicalPlan.collect { case p: execution.ProjectExec => p }.length === 1)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
index 60a4638f610b3..d1b97b2852fbc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.sql.{sources, SparkSession}
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
-import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, GenericInternalRow, InterpretedPredicate, InterpretedProjection, JoinedRow, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, GenericInternalRow, InterpretedProjection, JoinedRow, Literal, Predicate}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.types.{DataType, StructType}
@@ -88,7 +88,7 @@ class SimpleTextSource extends TextBasedFileFormat with DataSourceRegister {
             val attribute = inputAttributes.find(_.name == column).get
             expressions.GreaterThan(attribute, literal)
         }.reduceOption(expressions.And).getOrElse(Literal(true))
-        InterpretedPredicate.create(filterCondition, inputAttributes)
+        Predicate.create(filterCondition, inputAttributes)
       }
 
       // Uses a simple projection to simulate column pruning
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 1d1ea469f7d18..87af6388e1118 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -131,4 +131,16 @@
       </plugin>
     </plugins>
   </build>
+
+  <profiles>
+    <profile>
+      <id>scala-2.13</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 54f91ff1c69d5..5d81d36dfe357 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -55,6 +55,8 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time)
       "spark.driver.bindAddress",
       "spark.driver.port",
       "spark.master",
+      "spark.ui.port",
+      "spark.blockManager.port",
       "spark.kubernetes.driver.pod.name",
       "spark.kubernetes.executor.podNamePrefix",
       "spark.yarn.jars",
@@ -69,6 +71,8 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time)
       .remove("spark.driver.host")
       .remove("spark.driver.bindAddress")
       .remove("spark.driver.port")
+      .remove("spark.ui.port")
+      .remove("spark.blockManager.port")
       .remove("spark.kubernetes.driver.pod.name")
       .remove("spark.kubernetes.executor.podNamePrefix")
     val newReloadConf = new SparkConf(loadDefaults = true)
@@ -90,7 +94,7 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time)
     newSparkConf
   }
 
-  def validate() {
+  def validate(): Unit = {
     assert(master != null, "Checkpoint.master is null")
     assert(framework != null, "Checkpoint.framework is null")
     assert(graph != null, "Checkpoint.graph is null")
@@ -131,8 +135,8 @@ object Checkpoint extends Logging {
     try {
       val statuses = fs.listStatus(path)
       if (statuses != null) {
-        val paths = statuses.map(_.getPath)
-        val filtered = paths.filter(p => REGEX.findFirstIn(p.toString).nonEmpty)
+        val paths = statuses.filterNot(_.isDirectory).map(_.getPath)
+        val filtered = paths.filter(p => REGEX.findFirstIn(p.getName).nonEmpty)
         filtered.sortWith(sortFunc)
       } else {
         logWarning(s"Listing $path returned null")
@@ -213,7 +217,7 @@ class CheckpointWriter(
       checkpointTime: Time,
       bytes: Array[Byte],
       clearCheckpointDataLater: Boolean) extends Runnable {
-    def run() {
+    def run(): Unit = {
       if (latestCheckpointTime == null || latestCheckpointTime < checkpointTime) {
         latestCheckpointTime = checkpointTime
       }
@@ -288,7 +292,7 @@ class CheckpointWriter(
     }
   }
 
-  def write(checkpoint: Checkpoint, clearCheckpointDataLater: Boolean) {
+  def write(checkpoint: Checkpoint, clearCheckpointDataLater: Boolean): Unit = {
     try {
       val bytes = Checkpoint.serialize(checkpoint, conf)
       executor.execute(new CheckpointWriteHandler(
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index dce2028b48878..683db21d3f0e1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -20,6 +20,7 @@ package org.apache.spark.streaming
 import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 
 import scala.collection.mutable.ArrayBuffer
+import scala.collection.parallel.immutable.ParVector
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.streaming.dstream.{DStream, InputDStream, ReceiverInputDStream}
@@ -41,7 +42,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
   var batchDuration: Duration = null
   @volatile private var numReceivers: Int = 0
 
-  def start(time: Time) {
+  def start(time: Time): Unit = {
     this.synchronized {
       require(zeroTime == null, "DStream graph computation already started")
       zeroTime = time
@@ -50,28 +51,28 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
       outputStreams.foreach(_.remember(rememberDuration))
       outputStreams.foreach(_.validateAtStart())
       numReceivers = inputStreams.count(_.isInstanceOf[ReceiverInputDStream[_]])
-      inputStreamNameAndID = inputStreams.map(is => (is.name, is.id))
-      inputStreams.par.foreach(_.start())
+      inputStreamNameAndID = inputStreams.map(is => (is.name, is.id)).toSeq
+      new ParVector(inputStreams.toVector).foreach(_.start())
     }
   }
 
-  def restart(time: Time) {
+  def restart(time: Time): Unit = {
     this.synchronized { startTime = time }
   }
 
-  def stop() {
+  def stop(): Unit = {
     this.synchronized {
-      inputStreams.par.foreach(_.stop())
+      new ParVector(inputStreams.toVector).foreach(_.stop())
     }
   }
 
-  def setContext(ssc: StreamingContext) {
+  def setContext(ssc: StreamingContext): Unit = {
     this.synchronized {
       outputStreams.foreach(_.setContext(ssc))
     }
   }
 
-  def setBatchDuration(duration: Duration) {
+  def setBatchDuration(duration: Duration): Unit = {
     this.synchronized {
       require(batchDuration == null,
         s"Batch duration already set as $batchDuration. Cannot set it again.")
@@ -79,7 +80,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     }
   }
 
-  def remember(duration: Duration) {
+  def remember(duration: Duration): Unit = {
     this.synchronized {
       require(rememberDuration == null,
         s"Remember duration already set as $rememberDuration. Cannot set it again.")
@@ -87,14 +88,14 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     }
   }
 
-  def addInputStream(inputStream: InputDStream[_]) {
+  def addInputStream(inputStream: InputDStream[_]): Unit = {
     this.synchronized {
       inputStream.setGraph(this)
       inputStreams += inputStream
     }
   }
 
-  def addOutputStream(outputStream: DStream[_]) {
+  def addOutputStream(outputStream: DStream[_]): Unit = {
     this.synchronized {
       outputStream.setGraph(this)
       outputStreams += outputStream
@@ -128,7 +129,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     jobs
   }
 
-  def clearMetadata(time: Time) {
+  def clearMetadata(time: Time): Unit = {
     logDebug("Clearing metadata for time " + time)
     this.synchronized {
       outputStreams.foreach(_.clearMetadata(time))
@@ -136,7 +137,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     logDebug("Cleared old metadata for time " + time)
   }
 
-  def updateCheckpointData(time: Time) {
+  def updateCheckpointData(time: Time): Unit = {
     logInfo("Updating checkpoint data for time " + time)
     this.synchronized {
       outputStreams.foreach(_.updateCheckpointData(time))
@@ -144,7 +145,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     logInfo("Updated checkpoint data for time " + time)
   }
 
-  def clearCheckpointData(time: Time) {
+  def clearCheckpointData(time: Time): Unit = {
     logInfo("Clearing checkpoint data for time " + time)
     this.synchronized {
       outputStreams.foreach(_.clearCheckpointData(time))
@@ -152,7 +153,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     logInfo("Cleared checkpoint data for time " + time)
   }
 
-  def restoreCheckpointData() {
+  def restoreCheckpointData(): Unit = {
     logInfo("Restoring checkpoint data")
     this.synchronized {
       outputStreams.foreach(_.restoreCheckpointData())
@@ -160,7 +161,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     logInfo("Restored checkpoint data")
   }
 
-  def validate() {
+  def validate(): Unit = {
     this.synchronized {
       require(batchDuration != null, "Batch duration has not been set")
       // assert(batchDuration >= Milliseconds(100), "Batch duration of " + batchDuration +
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 589dd877c8c97..440b653e45de1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -26,7 +26,6 @@ import scala.collection.mutable.Queue
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
-import org.apache.commons.lang3.SerializationUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.{BytesWritable, LongWritable, Text}
@@ -222,7 +221,7 @@ class StreamingContext private[streaming] (
    * if the developer wishes to query old data outside the DStream computation).
    * @param duration Minimum duration that each DStream should remember its RDDs
    */
-  def remember(duration: Duration) {
+  def remember(duration: Duration): Unit = {
     graph.remember(duration)
   }
 
@@ -232,7 +231,7 @@ class StreamingContext private[streaming] (
    * @param directory HDFS-compatible directory where the checkpoint data will be reliably stored.
    *                  Note that this must be a fault-tolerant file system like HDFS.
    */
-  def checkpoint(directory: String) {
+  def checkpoint(directory: String): Unit = {
     if (directory != null) {
       val path = new Path(directory)
       val fs = path.getFileSystem(sparkContext.hadoopConfiguration)
@@ -505,7 +504,7 @@ class StreamingContext private[streaming] (
    * Add a [[org.apache.spark.streaming.scheduler.StreamingListener]] object for
    * receiving system events related to streaming.
    */
-  def addStreamingListener(streamingListener: StreamingListener) {
+  def addStreamingListener(streamingListener: StreamingListener): Unit = {
     scheduler.listenerBus.addListener(streamingListener)
   }
 
@@ -513,7 +512,7 @@ class StreamingContext private[streaming] (
     scheduler.listenerBus.removeListener(streamingListener)
   }
 
-  private def validate() {
+  private def validate(): Unit = {
     assert(graph != null, "Graph is null")
     graph.validate()
 
@@ -586,7 +585,7 @@ class StreamingContext private[streaming] (
               sparkContext.setCallSite(startSite.get)
               sparkContext.clearJobGroup()
               sparkContext.setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, "false")
-              savedProperties.set(SerializationUtils.clone(sparkContext.localProperties.get()))
+              savedProperties.set(Utils.cloneProperties(sparkContext.localProperties.get()))
               scheduler.start()
             }
             state = StreamingContextState.ACTIVE
@@ -621,7 +620,7 @@ class StreamingContext private[streaming] (
    * Wait for the execution to stop. Any exceptions that occurs during the execution
    * will be thrown in this thread.
    */
-  def awaitTermination() {
+  def awaitTermination(): Unit = {
     waiter.waitForStopOrError()
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
index 4a0ec31b5f3c8..51141212f9ecb 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
@@ -268,7 +268,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
-  def foreachRDD(foreachFunc: JVoidFunction[R]) {
+  def foreachRDD(foreachFunc: JVoidFunction[R]): Unit = {
     dstream.foreachRDD(rdd => foreachFunc.call(wrapRDD(rdd)))
   }
 
@@ -276,7 +276,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
-  def foreachRDD(foreachFunc: JVoidFunction2[R, Time]) {
+  def foreachRDD(foreachFunc: JVoidFunction2[R, Time]): Unit = {
     dstream.foreachRDD((rdd, time) => foreachFunc.call(wrapRDD(rdd), time))
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index 3f88fe0817c57..650d8c7f4d1a7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -759,7 +759,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * Save each RDD in `this` DStream as a Hadoop file. The file name at each batch interval is
    * generated based on `prefix` and `suffix`: "prefix-TIME_IN_MS.suffix".
    */
-  def saveAsHadoopFiles(prefix: String, suffix: String) {
+  def saveAsHadoopFiles(prefix: String, suffix: String): Unit = {
     dstream.saveAsHadoopFiles(prefix, suffix)
   }
 
@@ -772,7 +772,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       suffix: String,
       keyClass: Class[_],
       valueClass: Class[_],
-      outputFormatClass: Class[F]) {
+      outputFormatClass: Class[F]): Unit = {
     dstream.saveAsHadoopFiles(prefix, suffix, keyClass, valueClass, outputFormatClass)
   }
 
@@ -786,7 +786,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      conf: JobConf) {
+      conf: JobConf): Unit = {
     dstream.saveAsHadoopFiles(prefix, suffix, keyClass, valueClass, outputFormatClass, conf)
   }
 
@@ -794,7 +794,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * Save each RDD in `this` DStream as a Hadoop file. The file name at each batch interval is
    * generated based on `prefix` and `suffix`: "prefix-TIME_IN_MS.suffix".
    */
-  def saveAsNewAPIHadoopFiles(prefix: String, suffix: String) {
+  def saveAsNewAPIHadoopFiles(prefix: String, suffix: String): Unit = {
     dstream.saveAsNewAPIHadoopFiles(prefix, suffix)
   }
 
@@ -807,7 +807,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       suffix: String,
       keyClass: Class[_],
       valueClass: Class[_],
-      outputFormatClass: Class[F]) {
+      outputFormatClass: Class[F]): Unit = {
     dstream.saveAsNewAPIHadoopFiles(prefix, suffix, keyClass, valueClass, outputFormatClass)
   }
 
@@ -821,7 +821,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      conf: Configuration = dstream.context.sparkContext.hadoopConfiguration) {
+      conf: Configuration = dstream.context.sparkContext.hadoopConfiguration): Unit = {
     dstream.saveAsNewAPIHadoopFiles(prefix, suffix, keyClass, valueClass, outputFormatClass, conf)
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index d4f03bedc7ed6..2d53a1b4c78b6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -505,7 +505,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * fault-tolerance. The graph will be checkpointed every batch interval.
    * @param directory HDFS-compatible directory where the checkpoint data will be reliably stored
    */
-  def checkpoint(directory: String) {
+  def checkpoint(directory: String): Unit = {
     ssc.checkpoint(directory)
   }
 
@@ -516,7 +516,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * if the developer wishes to query old data outside the DStream computation).
    * @param duration Minimum duration that each DStream should remember its RDDs
    */
-  def remember(duration: Duration) {
+  def remember(duration: Duration): Unit = {
     ssc.remember(duration)
   }
 
@@ -524,7 +524,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * Add a [[org.apache.spark.streaming.scheduler.StreamingListener]] object for
    * receiving system events related to streaming.
    */
-  def addStreamingListener(streamingListener: StreamingListener) {
+  def addStreamingListener(streamingListener: StreamingListener): Unit = {
     ssc.addStreamingListener(streamingListener)
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala
index 28cb86c9f31fd..ce1afad7a91d8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala
@@ -22,33 +22,33 @@ import org.apache.spark.streaming.Time
 private[streaming] trait PythonStreamingListener{
 
   /** Called when the streaming has been started */
-  def onStreamingStarted(streamingStarted: JavaStreamingListenerStreamingStarted) { }
+  def onStreamingStarted(streamingStarted: JavaStreamingListenerStreamingStarted): Unit = { }
 
   /** Called when a receiver has been started */
-  def onReceiverStarted(receiverStarted: JavaStreamingListenerReceiverStarted) { }
+  def onReceiverStarted(receiverStarted: JavaStreamingListenerReceiverStarted): Unit = { }
 
   /** Called when a receiver has reported an error */
-  def onReceiverError(receiverError: JavaStreamingListenerReceiverError) { }
+  def onReceiverError(receiverError: JavaStreamingListenerReceiverError): Unit = { }
 
   /** Called when a receiver has been stopped */
-  def onReceiverStopped(receiverStopped: JavaStreamingListenerReceiverStopped) { }
+  def onReceiverStopped(receiverStopped: JavaStreamingListenerReceiverStopped): Unit = { }
 
   /** Called when a batch of jobs has been submitted for processing. */
-  def onBatchSubmitted(batchSubmitted: JavaStreamingListenerBatchSubmitted) { }
+  def onBatchSubmitted(batchSubmitted: JavaStreamingListenerBatchSubmitted): Unit = { }
 
   /** Called when processing of a batch of jobs has started.  */
-  def onBatchStarted(batchStarted: JavaStreamingListenerBatchStarted) { }
+  def onBatchStarted(batchStarted: JavaStreamingListenerBatchStarted): Unit = { }
 
   /** Called when processing of a batch of jobs has completed. */
-  def onBatchCompleted(batchCompleted: JavaStreamingListenerBatchCompleted) { }
+  def onBatchCompleted(batchCompleted: JavaStreamingListenerBatchCompleted): Unit = { }
 
   /** Called when processing of a job of a batch has started. */
   def onOutputOperationStarted(
-      outputOperationStarted: JavaStreamingListenerOutputOperationStarted) { }
+      outputOperationStarted: JavaStreamingListenerOutputOperationStarted): Unit = { }
 
   /** Called when processing of a job of a batch has completed. */
   def onOutputOperationCompleted(
-      outputOperationCompleted: JavaStreamingListenerOutputOperationCompleted) { }
+      outputOperationCompleted: JavaStreamingListenerOutputOperationCompleted): Unit = { }
 }
 
 private[streaming] class PythonStreamingListenerWrapper(listener: PythonStreamingListener)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
index 46bfc60856453..570663c6f6ad3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
@@ -174,7 +174,7 @@ private[streaming] object PythonDStream {
    * helper function for DStream.foreachRDD(),
    * cannot be `foreachRDD`, it will confusing py4j
    */
-  def callForeachRDD(jdstream: JavaDStream[Array[Byte]], pfunc: PythonTransformFunction) {
+  def callForeachRDD(jdstream: JavaDStream[Array[Byte]], pfunc: PythonTransformFunction): Unit = {
     val func = new TransformFunction((pfunc))
     jdstream.dstream.foreachRDD((rdd, time) => func(Some(rdd), time))
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
index 995470ec8deae..ed2ddf9e25572 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
@@ -31,9 +31,9 @@ class ConstantInputDStream[T: ClassTag](_ssc: StreamingContext, rdd: RDD[T])
   require(rdd != null,
     "parameter rdd null is illegal, which will lead to NPE in the following transformation")
 
-  override def start() {}
+  override def start(): Unit = {}
 
-  override def stop() {}
+  override def stop(): Unit = {}
 
   override def compute(validTime: Time): Option[RDD[T]] = {
     Some(rdd)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 41374b5e370f8..6c981b293ac76 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -33,7 +33,7 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.StreamingContext.rddToFileName
 import org.apache.spark.streaming.scheduler.Job
-import org.apache.spark.streaming.ui.UIUtils
+import org.apache.spark.ui.{UIUtils => SparkUIUtils}
 import org.apache.spark.util.{CallSite, Utils}
 
 /**
@@ -138,7 +138,7 @@ abstract class DStream[T: ClassTag] (
    */
   private def makeScope(time: Time): Option[RDDOperationScope] = {
     baseScope.map { bsJson =>
-      val formattedBatchTime = UIUtils.formatBatchTime(
+      val formattedBatchTime = SparkUIUtils.formatBatchTime(
         time.milliseconds, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false)
       val bs = RDDOperationScope.fromJson(bsJson)
       val baseName = bs.name // e.g. countByWindow, "kafka stream [0]"
@@ -189,7 +189,7 @@ abstract class DStream[T: ClassTag] (
    * the validity of future times is calculated. This method also recursively initializes
    * its parent DStreams.
    */
-  private[streaming] def initialize(time: Time) {
+  private[streaming] def initialize(time: Time): Unit = {
     if (zeroTime != null && zeroTime != time) {
       throw new SparkException(s"ZeroTime is already initialized to $zeroTime"
         + s", cannot initialize it again to $time")
@@ -231,7 +231,7 @@ abstract class DStream[T: ClassTag] (
     }
   }
 
-  private[streaming] def validateAtStart() {
+  private[streaming] def validateAtStart(): Unit = {
     require(rememberDuration != null, "Remember duration is set to null")
 
     require(
@@ -282,7 +282,7 @@ abstract class DStream[T: ClassTag] (
     logInfo(s"Initialized and validated $this")
   }
 
-  private[streaming] def setContext(s: StreamingContext) {
+  private[streaming] def setContext(s: StreamingContext): Unit = {
     if (ssc != null && ssc != s) {
       throw new SparkException(s"Context must not be set again for $this")
     }
@@ -291,7 +291,7 @@ abstract class DStream[T: ClassTag] (
     dependencies.foreach(_.setContext(ssc))
   }
 
-  private[streaming] def setGraph(g: DStreamGraph) {
+  private[streaming] def setGraph(g: DStreamGraph): Unit = {
     if (graph != null && graph != g) {
       throw new SparkException(s"Graph must not be set again for $this")
     }
@@ -299,7 +299,7 @@ abstract class DStream[T: ClassTag] (
     dependencies.foreach(_.setGraph(graph))
   }
 
-  private[streaming] def remember(duration: Duration) {
+  private[streaming] def remember(duration: Duration): Unit = {
     if (duration != null && (rememberDuration == null || duration > rememberDuration)) {
       rememberDuration = duration
       logInfo(s"Duration for remembering RDDs set to $rememberDuration for $this")
@@ -446,7 +446,7 @@ abstract class DStream[T: ClassTag] (
    * implementation clears the old generated RDDs. Subclasses of DStream may override
    * this to clear their own metadata along with the generated RDDs.
    */
-  private[streaming] def clearMetadata(time: Time) {
+  private[streaming] def clearMetadata(time: Time): Unit = {
     val unpersistData = ssc.conf.getBoolean("spark.streaming.unpersist", true)
     val oldRDDs = generatedRDDs.filter(_._1 <= (time - rememberDuration))
     logDebug("Clearing references to old RDDs: [" +
@@ -477,14 +477,14 @@ abstract class DStream[T: ClassTag] (
    * checkpointData. Subclasses of DStream (especially those of InputDStream) may override
    * this method to save custom checkpoint data.
    */
-  private[streaming] def updateCheckpointData(currentTime: Time) {
+  private[streaming] def updateCheckpointData(currentTime: Time): Unit = {
     logDebug(s"Updating checkpoint data for time $currentTime")
     checkpointData.update(currentTime)
     dependencies.foreach(_.updateCheckpointData(currentTime))
     logDebug(s"Updated checkpoint data for time $currentTime: $checkpointData")
   }
 
-  private[streaming] def clearCheckpointData(time: Time) {
+  private[streaming] def clearCheckpointData(time: Time): Unit = {
     logDebug("Clearing checkpoint data")
     checkpointData.cleanup(time)
     dependencies.foreach(_.clearCheckpointData(time))
@@ -497,7 +497,7 @@ abstract class DStream[T: ClassTag] (
    * from the checkpoint file names stored in checkpointData. Subclasses of DStream that
    * override the updateCheckpointData() method would also need to override this method.
    */
-  private[streaming] def restoreCheckpointData() {
+  private[streaming] def restoreCheckpointData(): Unit = {
     if (!restoredFromCheckpointData) {
       // Create RDDs from the checkpoint data
       logInfo("Restoring checkpoint data")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
index b35f7d97233e2..667edf3713d43 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
@@ -46,7 +46,7 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
    * the graph checkpoint is initiated. Default implementation records the
    * checkpoint files at which the generated RDDs of the DStream have been saved.
    */
-  def update(time: Time) {
+  def update(time: Time): Unit = {
 
     // Get the checkpointed RDDs from the generated RDDs
     val checkpointFiles = dstream.generatedRDDs.filter(_._2.getCheckpointFile.isDefined)
@@ -69,7 +69,7 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
    * Cleanup old checkpoint data. This gets called after a checkpoint of `time` has been
    * written to the checkpoint directory.
    */
-  def cleanup(time: Time) {
+  def cleanup(time: Time): Unit = {
     // Get the time of the oldest checkpointed RDD that was written as part of the
     // checkpoint of `time`
     timeToOldestCheckpointFileTime.remove(time) match {
@@ -109,7 +109,7 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
    * (along with its output DStreams) is being restored from a graph checkpoint file.
    * Default implementation restores the RDDs from their checkpoint files.
    */
-  def restore() {
+  def restore(): Unit = {
     // Create RDDs from the checkpoint data
     currentCheckpointFiles.foreach {
       case(time, file) =>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
index 438847caf0c3a..d46c9a22379d3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
@@ -128,9 +128,9 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
   @transient private var _path: Path = null
   @transient private var _fs: FileSystem = null
 
-  override def start() { }
+  override def start(): Unit = { }
 
-  override def stop() { }
+  override def stop(): Unit = { }
 
   /**
    * Finds the files that were modified since the last time this method was called and makes
@@ -160,7 +160,7 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
   }
 
   /** Clear the old time-to-files mappings along with old RDDs */
-  protected[streaming] override def clearMetadata(time: Time) {
+  protected[streaming] override def clearMetadata(time: Time): Unit = {
     super.clearMetadata(time)
     batchTimeToSelectedFiles.synchronized {
       val oldFiles = batchTimeToSelectedFiles.filter(_._1 < (time - rememberDuration))
@@ -306,7 +306,7 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
     _fs
   }
 
-  private def reset()  {
+  private def reset(): Unit = {
     _fs = null
   }
 
@@ -328,14 +328,14 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
 
     private def hadoopFiles = data.asInstanceOf[mutable.HashMap[Time, Array[String]]]
 
-    override def update(time: Time) {
+    override def update(time: Time): Unit = {
       hadoopFiles.clear()
       batchTimeToSelectedFiles.synchronized { hadoopFiles ++= batchTimeToSelectedFiles }
     }
 
-    override def cleanup(time: Time) { }
+    override def cleanup(time: Time): Unit = { }
 
-    override def restore() {
+    override def restore(): Unit = {
       hadoopFiles.toSeq.sortBy(_._1)(Time.ordering).foreach {
         case (t, f) =>
           // Restore the metadata in both files and generatedRDDs
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
index 6495c91247047..5a75b77659960 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.Utils
  * Input streams that can generate RDDs from new data by running a service/thread only on
  * the driver node (that is, without running a receiver on worker nodes), can be
  * implemented by directly inheriting this InputDStream. For example,
- * FileInputDStream, a subclass of InputDStream, monitors a HDFS directory from the driver for
+ * FileInputDStream, a subclass of InputDStream, monitors an HDFS directory from the driver for
  * new files and generates RDDs with the new files. For implementing input streams
  * that requires running a receiver on the worker nodes, use
  * [[org.apache.spark.streaming.dstream.ReceiverInputDStream]] as the parent class.
@@ -48,7 +48,7 @@ abstract class InputDStream[T: ClassTag](_ssc: StreamingContext)
 
   ssc.graph.addInputStream(this)
 
-  /** This is an unique identifier for the input stream. */
+  /** This is a unique identifier for the input stream. */
   val id = ssc.getNewInputStreamId()
 
   // Keep track of the freshest rate for this stream using the rateEstimator
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
index f9c78699164ab..d3e6e766bea4a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
@@ -33,9 +33,9 @@ class QueueInputDStream[T: ClassTag](
     defaultRDD: RDD[T]
   ) extends InputDStream[T](ssc) {
 
-  override def start() { }
+  override def start(): Unit = { }
 
-  override def stop() { }
+  override def stop(): Unit = { }
 
   private def readObject(in: ObjectInputStream): Unit = {
     throw new NotSerializableException("queueStream doesn't support checkpointing. " +
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
index b22bbb79a5cc9..671ac7b97f9d2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
@@ -55,7 +55,7 @@ class RawNetworkReceiver(host: String, port: Int, storageLevel: StorageLevel)
 
   var blockPushingThread: Thread = null
 
-  def onStart() {
+  def onStart(): Unit = {
     // Open a socket to the target address and keep reading from it
     logInfo("Connecting to " + host + ":" + port)
     val channel = SocketChannel.open()
@@ -67,7 +67,7 @@ class RawNetworkReceiver(host: String, port: Int, storageLevel: StorageLevel)
 
     blockPushingThread = new Thread {
       setDaemon(true)
-      override def run() {
+      override def run(): Unit = {
         var nextBlockNumber = 0
         while (true) {
           val buffer = queue.take()
@@ -92,12 +92,12 @@ class RawNetworkReceiver(host: String, port: Int, storageLevel: StorageLevel)
     }
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     if (blockPushingThread != null) blockPushingThread.interrupt()
   }
 
   /** Read a buffer fully from a given Channel */
-  private def readFully(channel: ReadableByteChannel, dest: ByteBuffer) {
+  private def readFully(channel: ReadableByteChannel, dest: ByteBuffer): Unit = {
     while (dest.position() < dest.limit()) {
       if (channel.read(dest) == -1) {
         throw new EOFException("End of channel")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
index fd3e72e41be26..983ac09cd435e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
@@ -60,9 +60,9 @@ abstract class ReceiverInputDStream[T: ClassTag](_ssc: StreamingContext)
   def getReceiver(): Receiver[T]
 
   // Nothing to start or stop as both taken care of by the ReceiverTracker.
-  def start() {}
+  def start(): Unit = {}
 
-  def stop() {}
+  def stop(): Unit = {}
 
   /**
    * Generates RDDs with blocks received by the receiver of this stream. */
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
index 7853af562368e..9d3facc68e0c6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
@@ -54,7 +54,7 @@ class SocketReceiver[T: ClassTag](
 
   private var socket: Socket = _
 
-  def onStart() {
+  def onStart(): Unit = {
 
     logInfo(s"Connecting to $host:$port")
     try {
@@ -69,11 +69,11 @@ class SocketReceiver[T: ClassTag](
     // Start the thread that receives data over a connection
     new Thread("Socket Receiver") {
       setDaemon(true)
-      override def run() { receive() }
+      override def run(): Unit = { receive() }
     }.start()
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     // in case restart thread close it twice
     synchronized {
       if (socket != null) {
@@ -85,7 +85,7 @@ class SocketReceiver[T: ClassTag](
   }
 
   /** Create a socket connection and receive data until receiver is stopped */
-  def receive() {
+  def receive(): Unit = {
     try {
       val iterator = bytesToObjects(socket.getInputStream())
       while(!isStopped && iterator.hasNext) {
@@ -125,7 +125,7 @@ object SocketReceiver  {
         nextValue
       }
 
-      protected override def close() {
+      protected override def close(): Unit = {
         dataInputStream.close()
       }
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
index 90309c0145ae1..2533c53883cac 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
@@ -107,7 +107,8 @@ private[streaming] class BlockGenerator(
     new RecurringTimer(clock, blockIntervalMs, updateCurrentBuffer, "BlockGenerator")
   private val blockQueueSize = conf.getInt("spark.streaming.blockQueueSize", 10)
   private val blocksForPushing = new ArrayBlockingQueue[Block](blockQueueSize)
-  private val blockPushingThread = new Thread() { override def run() { keepPushingBlocks() } }
+  private val blockPushingThread =
+    new Thread() { override def run(): Unit = keepPushingBlocks() }
 
   @volatile private var currentBuffer = new ArrayBuffer[Any]
   @volatile private var state = Initialized
@@ -255,7 +256,7 @@ private[streaming] class BlockGenerator(
   }
 
   /** Keep pushing blocks to the BlockManager. */
-  private def keepPushingBlocks() {
+  private def keepPushingBlocks(): Unit = {
     logInfo("Started block pushing thread")
 
     def areBlocksBeingGenerated: Boolean = synchronized {
@@ -288,12 +289,12 @@ private[streaming] class BlockGenerator(
     }
   }
 
-  private def reportError(message: String, t: Throwable) {
+  private def reportError(message: String, t: Throwable): Unit = {
     logError(message, t)
     listener.onError(message, t)
   }
 
-  private def pushBlock(block: Block) {
+  private def pushBlock(block: Block): Unit = {
     listener.onPushBlock(block.id, block.buffer)
     logInfo("Pushed block " + block.id)
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
index fbac4880bdf65..c620074b4e44d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
@@ -40,7 +40,7 @@ private[receiver] abstract class RateLimiter(conf: SparkConf) extends Logging {
   private val maxRateLimit = conf.getLong("spark.streaming.receiver.maxRate", Long.MaxValue)
   private lazy val rateLimiter = GuavaRateLimiter.create(getInitialRateLimit().toDouble)
 
-  def waitToPush() {
+  def waitToPush(): Unit = {
     rateLimiter.acquire()
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index eb70232a7452e..12ed8015117e5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -98,7 +98,7 @@ private[streaming] class BlockManagerBasedBlockHandler(
     BlockManagerBasedStoreResult(blockId, numRecords)
   }
 
-  def cleanupOldBlocks(threshTime: Long) {
+  def cleanupOldBlocks(threshTime: Long): Unit = {
     // this is not used as blocks inserted into the BlockManager are cleared by DStream's clearing
     // of BlockRDDs.
   }
@@ -210,11 +210,11 @@ private[streaming] class WriteAheadLogBasedBlockHandler(
     WriteAheadLogBasedStoreResult(blockId, numRecords, walRecordHandle)
   }
 
-  def cleanupOldBlocks(threshTime: Long) {
+  def cleanupOldBlocks(threshTime: Long): Unit = {
     writeAheadLog.clean(threshTime, false)
   }
 
-  def stop() {
+  def stop(): Unit = {
     writeAheadLog.close()
     executionContext.shutdown()
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
index 31a88730d163e..dde074c7e324b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
@@ -115,12 +115,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * These single items will be aggregated together into data blocks before
    * being pushed into Spark's memory.
    */
-  def store(dataItem: T) {
+  def store(dataItem: T): Unit = {
     supervisor.pushSingle(dataItem)
   }
 
   /** Store an ArrayBuffer of received data as a data block into Spark's memory. */
-  def store(dataBuffer: ArrayBuffer[T]) {
+  def store(dataBuffer: ArrayBuffer[T]): Unit = {
     supervisor.pushArrayBuffer(dataBuffer, None, None)
   }
 
@@ -129,12 +129,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * The metadata will be associated with this block of data
    * for being used in the corresponding InputDStream.
    */
-  def store(dataBuffer: ArrayBuffer[T], metadata: Any) {
+  def store(dataBuffer: ArrayBuffer[T], metadata: Any): Unit = {
     supervisor.pushArrayBuffer(dataBuffer, Some(metadata), None)
   }
 
   /** Store an iterator of received data as a data block into Spark's memory. */
-  def store(dataIterator: Iterator[T]) {
+  def store(dataIterator: Iterator[T]): Unit = {
     supervisor.pushIterator(dataIterator, None, None)
   }
 
@@ -143,12 +143,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * The metadata will be associated with this block of data
    * for being used in the corresponding InputDStream.
    */
-  def store(dataIterator: java.util.Iterator[T], metadata: Any) {
+  def store(dataIterator: java.util.Iterator[T], metadata: Any): Unit = {
     supervisor.pushIterator(dataIterator.asScala, Some(metadata), None)
   }
 
   /** Store an iterator of received data as a data block into Spark's memory. */
-  def store(dataIterator: java.util.Iterator[T]) {
+  def store(dataIterator: java.util.Iterator[T]): Unit = {
     supervisor.pushIterator(dataIterator.asScala, None, None)
   }
 
@@ -157,7 +157,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * The metadata will be associated with this block of data
    * for being used in the corresponding InputDStream.
    */
-  def store(dataIterator: Iterator[T], metadata: Any) {
+  def store(dataIterator: Iterator[T], metadata: Any): Unit = {
     supervisor.pushIterator(dataIterator, Some(metadata), None)
   }
 
@@ -166,7 +166,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * that the data in the ByteBuffer must be serialized using the same serializer
    * that Spark is configured to use.
    */
-  def store(bytes: ByteBuffer) {
+  def store(bytes: ByteBuffer): Unit = {
     supervisor.pushBytes(bytes, None, None)
   }
 
@@ -175,12 +175,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * The metadata will be associated with this block of data
    * for being used in the corresponding InputDStream.
    */
-  def store(bytes: ByteBuffer, metadata: Any) {
+  def store(bytes: ByteBuffer, metadata: Any): Unit = {
     supervisor.pushBytes(bytes, Some(metadata), None)
   }
 
   /** Report exceptions in receiving data. */
-  def reportError(message: String, throwable: Throwable) {
+  def reportError(message: String, throwable: Throwable): Unit = {
     supervisor.reportError(message, throwable)
   }
 
@@ -192,7 +192,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * is defined by the Spark configuration `spark.streaming.receiverRestartDelay`.
    * The `message` will be reported to the driver.
    */
-  def restart(message: String) {
+  def restart(message: String): Unit = {
     supervisor.restartReceiver(message)
   }
 
@@ -204,7 +204,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * is defined by the Spark configuration `spark.streaming.receiverRestartDelay`.
    * The `message` and `exception` will be reported to the driver.
    */
-  def restart(message: String, error: Throwable) {
+  def restart(message: String, error: Throwable): Unit = {
     supervisor.restartReceiver(message, Some(error))
   }
 
@@ -214,17 +214,17 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * (by calling `onStop()` and `onStart()`) is performed asynchronously
    * in a background thread.
    */
-  def restart(message: String, error: Throwable, millisecond: Int) {
+  def restart(message: String, error: Throwable, millisecond: Int): Unit = {
     supervisor.restartReceiver(message, Some(error), millisecond)
   }
 
   /** Stop the receiver completely. */
-  def stop(message: String) {
+  def stop(message: String): Unit = {
     supervisor.stop(message, None)
   }
 
   /** Stop the receiver completely due to an exception */
-  def stop(message: String, error: Throwable) {
+  def stop(message: String, error: Throwable): Unit = {
     supervisor.stop(message, Some(error))
   }
 
@@ -260,12 +260,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
   @transient private var _supervisor: ReceiverSupervisor = null
 
   /** Set the ID of the DStream that this receiver is associated with. */
-  private[streaming] def setReceiverId(_id: Int) {
+  private[streaming] def setReceiverId(_id: Int): Unit = {
     id = _id
   }
 
   /** Attach Network Receiver executor to this receiver. */
-  private[streaming] def attachSupervisor(exec: ReceiverSupervisor) {
+  private[streaming] def attachSupervisor(exec: ReceiverSupervisor): Unit = {
     assert(_supervisor == null)
     _supervisor = exec
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
index faf6db82d5b18..b464dccb760f6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
@@ -110,29 +110,29 @@ private[streaming] abstract class ReceiverSupervisor(
    * Note that this must be called before the receiver.onStart() is called to ensure
    * things like [[BlockGenerator]]s are started before the receiver starts sending data.
    */
-  protected def onStart() { }
+  protected def onStart(): Unit = { }
 
   /**
    * Called when supervisor is stopped.
    * Note that this must be called after the receiver.onStop() is called to ensure
    * things like [[BlockGenerator]]s are cleaned up after the receiver stops sending data.
    */
-  protected def onStop(message: String, error: Option[Throwable]) { }
+  protected def onStop(message: String, error: Option[Throwable]): Unit = { }
 
   /** Called when receiver is started. Return true if the driver accepts us */
   protected def onReceiverStart(): Boolean
 
   /** Called when receiver is stopped */
-  protected def onReceiverStop(message: String, error: Option[Throwable]) { }
+  protected def onReceiverStop(message: String, error: Option[Throwable]): Unit = { }
 
   /** Start the supervisor */
-  def start() {
+  def start(): Unit = {
     onStart()
     startReceiver()
   }
 
   /** Mark the supervisor and the receiver for stopping */
-  def stop(message: String, error: Option[Throwable]) {
+  def stop(message: String, error: Option[Throwable]): Unit = {
     stoppingError = error.orNull
     stopReceiver(message, error)
     onStop(message, error)
@@ -180,12 +180,12 @@ private[streaming] abstract class ReceiverSupervisor(
   }
 
   /** Restart receiver with delay */
-  def restartReceiver(message: String, error: Option[Throwable] = None) {
+  def restartReceiver(message: String, error: Option[Throwable] = None): Unit = {
     restartReceiver(message, error, defaultRestartDelay)
   }
 
   /** Restart receiver with delay */
-  def restartReceiver(message: String, error: Option[Throwable], delay: Int) {
+  def restartReceiver(message: String, error: Option[Throwable], delay: Int): Unit = {
     Future {
       // This is a blocking action so we should use "futureExecutionContext" which is a cached
       // thread pool.
@@ -214,7 +214,7 @@ private[streaming] abstract class ReceiverSupervisor(
 
 
   /** Wait the thread until the supervisor is stopped */
-  def awaitTermination() {
+  def awaitTermination(): Unit = {
     logInfo("Waiting for receiver to be stopped")
     stopLatch.await()
     if (stoppingError != null) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
index 5d38c56aa5873..13c80841d4d14 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
@@ -102,11 +102,11 @@ private[streaming] class ReceiverSupervisorImpl(
 
     def onGenerateBlock(blockId: StreamBlockId): Unit = { }
 
-    def onError(message: String, throwable: Throwable) {
+    def onError(message: String, throwable: Throwable): Unit = {
       reportError(message, throwable)
     }
 
-    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]) {
+    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]): Unit = {
       pushArrayBuffer(arrayBuffer, None, Some(blockId))
     }
   }
@@ -116,7 +116,7 @@ private[streaming] class ReceiverSupervisorImpl(
   override private[streaming] def getCurrentRateLimit: Long = defaultBlockGenerator.getCurrentLimit
 
   /** Push a single record of received data into block generator. */
-  def pushSingle(data: Any) {
+  def pushSingle(data: Any): Unit = {
     defaultBlockGenerator.addData(data)
   }
 
@@ -125,7 +125,7 @@ private[streaming] class ReceiverSupervisorImpl(
       arrayBuffer: ArrayBuffer[_],
       metadataOption: Option[Any],
       blockIdOption: Option[StreamBlockId]
-    ) {
+    ): Unit = {
     pushAndReportBlock(ArrayBufferBlock(arrayBuffer), metadataOption, blockIdOption)
   }
 
@@ -134,7 +134,7 @@ private[streaming] class ReceiverSupervisorImpl(
       iterator: Iterator[_],
       metadataOption: Option[Any],
       blockIdOption: Option[StreamBlockId]
-    ) {
+    ): Unit = {
     pushAndReportBlock(IteratorBlock(iterator), metadataOption, blockIdOption)
   }
 
@@ -143,7 +143,7 @@ private[streaming] class ReceiverSupervisorImpl(
       bytes: ByteBuffer,
       metadataOption: Option[Any],
       blockIdOption: Option[StreamBlockId]
-    ) {
+    ): Unit = {
     pushAndReportBlock(ByteBufferBlock(bytes), metadataOption, blockIdOption)
   }
 
@@ -152,7 +152,7 @@ private[streaming] class ReceiverSupervisorImpl(
       receivedBlock: ReceivedBlock,
       metadataOption: Option[Any],
       blockIdOption: Option[StreamBlockId]
-    ) {
+    ): Unit = {
     val blockId = blockIdOption.getOrElse(nextBlockId)
     val time = System.currentTimeMillis
     val blockStoreResult = receivedBlockHandler.storeBlock(blockId, receivedBlock)
@@ -166,17 +166,17 @@ private[streaming] class ReceiverSupervisorImpl(
   }
 
   /** Report error to the receiver tracker */
-  def reportError(message: String, error: Throwable) {
+  def reportError(message: String, error: Throwable): Unit = {
     val errorString = Option(error).map(Throwables.getStackTraceAsString).getOrElse("")
     trackerEndpoint.send(ReportError(streamId, message, errorString))
     logWarning("Reported error " + message + " - " + error)
   }
 
-  override protected def onStart() {
+  override protected def onStart(): Unit = {
     registeredBlockGenerators.asScala.foreach { _.start() }
   }
 
-  override protected def onStop(message: String, error: Option[Throwable]) {
+  override protected def onStop(message: String, error: Option[Throwable]): Unit = {
     receivedBlockHandler match {
       case handler: WriteAheadLogBasedBlockHandler =>
         // Write ahead log should be closed.
@@ -193,7 +193,7 @@ private[streaming] class ReceiverSupervisorImpl(
     trackerEndpoint.askSync[Boolean](msg)
   }
 
-  override protected def onReceiverStop(message: String, error: Option[Throwable]) {
+  override protected def onReceiverStop(message: String, error: Option[Throwable]): Unit = {
     logInfo("Deregistering receiver " + streamId)
     val errorString = error.map(Throwables.getStackTraceAsString).getOrElse("")
     trackerEndpoint.askSync[Boolean](DeregisterReceiver(streamId, message, errorString))
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
index e85a3b9009c32..58bd56c591d04 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
@@ -23,6 +23,7 @@ import scala.util.Random
 import org.apache.spark.{ExecutorAllocationClient, SparkConf}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Streaming._
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.streaming.util.RecurringTimer
 import org.apache.spark.util.{Clock, Utils}
 
@@ -111,7 +112,11 @@ private[streaming] class ExecutorAllocationManager(
     logDebug(s"Executors (${allExecIds.size}) = ${allExecIds}")
     val targetTotalExecutors =
       math.max(math.min(maxNumExecutors, allExecIds.size + numNewExecutors), minNumExecutors)
-    client.requestTotalExecutors(targetTotalExecutors, 0, Map.empty)
+    // Just map the targetTotalExecutors to the default ResourceProfile
+    client.requestTotalExecutors(
+      Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> targetTotalExecutors),
+      Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> 0),
+      Map.empty)
     logInfo(s"Requested total $targetTotalExecutors executors")
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala
index 7050d7ef45240..88e7b56895993 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala
@@ -35,7 +35,7 @@ class Job(val time: Time, func: () => _) {
   private var _startTime: Option[Long] = None
   private var _endTime: Option[Long] = None
 
-  def run() {
+  def run(): Unit = {
     _result = Try(func())
   }
 
@@ -66,7 +66,7 @@ class Job(val time: Time, func: () => _) {
     _outputOpId
   }
 
-  def setOutputOpId(outputOpId: Int) {
+  def setOutputOpId(outputOpId: Int): Unit = {
     if (isSet) {
       throw new IllegalStateException("Cannot call setOutputOpId more than once")
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index ddeb3d4547c55..7e8449ee5aa7e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -77,7 +77,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   private var eventLoop: EventLoop[JobGeneratorEvent] = null
 
   // last batch whose completion,checkpointing and metadata cleanup has been completed
-  private var lastProcessedBatch: Time = null
+  @volatile private[streaming] var lastProcessedBatch: Time = null
 
   /** Start generation of jobs */
   def start(): Unit = synchronized {
@@ -166,21 +166,21 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   /**
    * Callback called when a batch has been completely processed.
    */
-  def onBatchCompletion(time: Time) {
+  def onBatchCompletion(time: Time): Unit = {
     eventLoop.post(ClearMetadata(time))
   }
 
   /**
    * Callback called when the checkpoint of a batch has been written.
    */
-  def onCheckpointCompletion(time: Time, clearCheckpointDataLater: Boolean) {
+  def onCheckpointCompletion(time: Time, clearCheckpointDataLater: Boolean): Unit = {
     if (clearCheckpointDataLater) {
       eventLoop.post(ClearCheckpointData(time))
     }
   }
 
   /** Processes all events */
-  private def processEvent(event: JobGeneratorEvent) {
+  private def processEvent(event: JobGeneratorEvent): Unit = {
     logDebug("Got event " + event)
     event match {
       case GenerateJobs(time) => generateJobs(time)
@@ -192,7 +192,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Starts the generator for the first time */
-  private def startFirstTime() {
+  private def startFirstTime(): Unit = {
     val startTime = new Time(timer.getStartTime())
     graph.start(startTime - graph.batchDuration)
     timer.start(startTime.milliseconds)
@@ -200,7 +200,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Restarts the generator based on the information in checkpoint */
-  private def restart() {
+  private def restart(): Unit = {
     // If manual clock is being used for testing, then
     // either set the manual clock to the last checkpointed time,
     // or if the property is defined set it to that time
@@ -243,7 +243,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Generate jobs and perform checkpointing for the given `time`.  */
-  private def generateJobs(time: Time) {
+  private def generateJobs(time: Time): Unit = {
     // Checkpoint all RDDs marked for checkpointing to ensure their lineages are
     // truncated periodically. Otherwise, we may run into stack overflows (SPARK-6847).
     ssc.sparkContext.setLocalProperty(RDD.CHECKPOINT_ALL_MARKED_ANCESTORS, "true")
@@ -262,7 +262,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Clear DStream metadata for the given `time`. */
-  private def clearMetadata(time: Time) {
+  private def clearMetadata(time: Time): Unit = {
     ssc.graph.clearMetadata(time)
 
     // If checkpointing is enabled, then checkpoint,
@@ -281,7 +281,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Clear DStream checkpoint data for the given `time`. */
-  private def clearCheckpointData(time: Time) {
+  private def clearCheckpointData(time: Time): Unit = {
     ssc.graph.clearCheckpointData(time)
 
     // All the checkpoint information about which batches have been processed, etc have
@@ -293,7 +293,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Perform checkpoint for the given `time`. */
-  private def doCheckpoint(time: Time, clearCheckpointDataLater: Boolean) {
+  private def doCheckpoint(time: Time, clearCheckpointDataLater: Boolean): Unit = {
     if (shouldCheckpoint && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
       logInfo("Checkpointing graph for time " + time)
       ssc.graph.updateCheckpointData(time)
@@ -303,7 +303,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     }
   }
 
-  private def markBatchFullyProcessed(time: Time) {
+  private def markBatchFullyProcessed(time: Time): Unit = {
     lastProcessedBatch = time
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
index 2fa3bf7d5230b..7eea57cc083ed 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -22,16 +22,14 @@ import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import scala.collection.JavaConverters._
 import scala.util.Failure
 
-import org.apache.commons.lang3.SerializationUtils
-
 import org.apache.spark.ExecutorAllocationClient
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.io.SparkHadoopWriterUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.api.python.PythonDStream
-import org.apache.spark.streaming.ui.UIUtils
-import org.apache.spark.util.{EventLoop, ThreadUtils}
+import org.apache.spark.ui.{UIUtils => SparkUIUtils}
+import org.apache.spark.util.{EventLoop, ThreadUtils, Utils}
 
 
 private[scheduler] sealed trait JobSchedulerEvent
@@ -52,8 +50,9 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
   private val numConcurrentJobs = ssc.conf.getInt("spark.streaming.concurrentJobs", 1)
   private val jobExecutor =
     ThreadUtils.newDaemonFixedThreadPool(numConcurrentJobs, "streaming-job-executor")
-  private val jobGenerator = new JobGenerator(this)
+  private[streaming] val jobGenerator = new JobGenerator(this)
   val clock = jobGenerator.clock
+
   val listenerBus = new StreamingListenerBus(ssc.sparkContext.listenerBus)
 
   // These two are created only when scheduler starts.
@@ -144,7 +143,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     logInfo("Stopped JobScheduler")
   }
 
-  def submitJobSet(jobSet: JobSet) {
+  def submitJobSet(jobSet: JobSet): Unit = {
     if (jobSet.jobs.isEmpty) {
       logInfo("No jobs added for time " + jobSet.time)
     } else {
@@ -159,7 +158,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     jobSets.asScala.keys.toSeq
   }
 
-  def reportError(msg: String, e: Throwable) {
+  def reportError(msg: String, e: Throwable): Unit = {
     eventLoop.post(ErrorReported(msg, e))
   }
 
@@ -167,7 +166,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     eventLoop != null
   }
 
-  private def processEvent(event: JobSchedulerEvent) {
+  private def processEvent(event: JobSchedulerEvent): Unit = {
     try {
       event match {
         case JobStarted(job, startTime) => handleJobStart(job, startTime)
@@ -180,7 +179,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     }
   }
 
-  private def handleJobStart(job: Job, startTime: Long) {
+  private def handleJobStart(job: Job, startTime: Long): Unit = {
     val jobSet = jobSets.get(job.time)
     val isFirstJobOfJobSet = !jobSet.hasStarted
     jobSet.handleJobStart(job)
@@ -194,7 +193,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     logInfo("Starting job " + job.id + " from job set of time " + jobSet.time)
   }
 
-  private def handleJobCompletion(job: Job, completedTime: Long) {
+  private def handleJobCompletion(job: Job, completedTime: Long): Unit = {
     val jobSet = jobSets.get(job.time)
     jobSet.handleJobCompletion(job)
     job.setEndTime(completedTime)
@@ -218,7 +217,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     }
   }
 
-  private def handleError(msg: String, e: Throwable) {
+  private def handleError(msg: String, e: Throwable): Unit = {
     logError(msg, e)
     ssc.waiter.notifyError(e)
     PythonDStream.stopStreamingContextIfPythonProcessIsDead(e)
@@ -227,11 +226,11 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
   private class JobHandler(job: Job) extends Runnable with Logging {
     import JobScheduler._
 
-    def run() {
+    def run(): Unit = {
       val oldProps = ssc.sparkContext.getLocalProperties
       try {
-        ssc.sparkContext.setLocalProperties(SerializationUtils.clone(ssc.savedProperties.get()))
-        val formattedTime = UIUtils.formatBatchTime(
+        ssc.sparkContext.setLocalProperties(Utils.cloneProperties(ssc.savedProperties.get()))
+        val formattedTime = SparkUIUtils.formatBatchTime(
           job.time.milliseconds, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false)
         val batchUrl = s"/streaming/batch/?id=${job.time.milliseconds}"
         val batchLinkText = s"[output operation ${job.outputOpId}, batch time ${formattedTime}]"
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
index 0baedaf275d67..5a5469ac6543a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
@@ -39,11 +39,11 @@ case class JobSet(
   jobs.zipWithIndex.foreach { case (job, i) => job.setOutputOpId(i) }
   incompleteJobs ++= jobs
 
-  def handleJobStart(job: Job) {
+  def handleJobStart(job: Job): Unit = {
     if (processingStartTime < 0) processingStartTime = System.currentTimeMillis()
   }
 
-  def handleJobCompletion(job: Job) {
+  def handleJobCompletion(job: Job): Unit = {
     incompleteJobs -= job
     if (hasCompleted) processingEndTime = System.currentTimeMillis()
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/RateController.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/RateController.scala
index a46c0c1b25e74..7774e85f778a6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/RateController.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/RateController.scala
@@ -47,7 +47,7 @@ private[streaming] abstract class RateController(val streamUID: Int, rateEstimat
   /**
    * An initialization method called both from the constructor and Serialization code.
    */
-  private def init() {
+  private def init(): Unit = {
     executionContext = ExecutionContext.fromExecutorService(
       ThreadUtils.newDaemonSingleThreadExecutor("stream-rate-update"))
     rateLimit = new AtomicLong(-1L)
@@ -72,7 +72,7 @@ private[streaming] abstract class RateController(val streamUID: Int, rateEstimat
 
   def getLatestRate(): Long = rateLimit.get()
 
-  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) {
+  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = {
     val elements = batchCompleted.batchInfo.streamIdToInputInfo
 
     for {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
index a9763cfe04539..6c71b18b46213 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
@@ -182,7 +182,7 @@ private[streaming] class ReceivedBlockTracker(
   }
 
   /** Stop the block tracker. */
-  def stop() {
+  def stop(): Unit = {
     writeAheadLogOption.foreach { _.close() }
   }
 
@@ -192,7 +192,7 @@ private[streaming] class ReceivedBlockTracker(
    */
   private def recoverPastEvents(): Unit = synchronized {
     // Insert the recovered block information
-    def insertAddedBlock(receivedBlockInfo: ReceivedBlockInfo) {
+    def insertAddedBlock(receivedBlockInfo: ReceivedBlockInfo): Unit = {
       logTrace(s"Recovery: Inserting added block $receivedBlockInfo")
       receivedBlockInfo.setBlockIdInvalid()
       getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo
@@ -200,7 +200,7 @@ private[streaming] class ReceivedBlockTracker(
 
     // Insert the recovered block-to-batch allocations and removes them from queue of
     // received blocks.
-    def insertAllocatedBatch(batchTime: Time, allocatedBlocks: AllocatedBlocks) {
+    def insertAllocatedBatch(batchTime: Time, allocatedBlocks: AllocatedBlocks): Unit = {
       logTrace(s"Recovery: Inserting allocated batch for time $batchTime to " +
         s"${allocatedBlocks.streamIdToAllocatedBlocks}")
       allocatedBlocks.streamIdToAllocatedBlocks.foreach {
@@ -212,7 +212,7 @@ private[streaming] class ReceivedBlockTracker(
     }
 
     // Cleanup the batch allocations
-    def cleanupBatches(batchTimes: Seq[Time]) {
+    def cleanupBatches(batchTimes: Seq[Time]): Unit = {
       logTrace(s"Recovery: Cleaning up batches $batchTimes")
       timeToAllocatedBlocks --= batchTimes
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
index 551d376fbc1e7..13cf5cc0e71ea 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -223,7 +223,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
    * Clean up the data and metadata of blocks and batches that are strictly
    * older than the threshold time. Note that this does not
    */
-  def cleanupOldBlocksAndBatches(cleanupThreshTime: Time) {
+  def cleanupOldBlocksAndBatches(cleanupThreshTime: Time): Unit = {
     // Clean up old block and batch metadata
     receivedBlockTracker.cleanupOldBatches(cleanupThreshTime, waitForCompletion = false)
 
@@ -309,7 +309,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
   }
 
   /** Deregister a receiver */
-  private def deregisterReceiver(streamId: Int, message: String, error: String) {
+  private def deregisterReceiver(streamId: Int, message: String, error: String): Unit = {
     val lastErrorTime =
       if (error == null || error == "") -1 else ssc.scheduler.clock.getTimeMillis()
     val errorInfo = ReceiverErrorInfo(
@@ -345,7 +345,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
   }
 
   /** Report error sent by a receiver */
-  private def reportError(streamId: Int, message: String, error: String) {
+  private def reportError(streamId: Int, message: String, error: String): Unit = {
     val newReceiverTrackingInfo = receiverTrackingInfos.get(streamId) match {
       case Some(oldInfo) =>
         val errorInfo = ReceiverErrorInfo(lastErrorMessage = message, lastError = error,
@@ -613,7 +613,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
       ssc.sparkContext.setCallSite(Option(ssc.getStartSite()).getOrElse(Utils.getCallSite()))
 
       val future = ssc.sparkContext.submitJob[Receiver[_], Unit, Unit](
-        receiverRDD, startReceiverFunc, Seq(0), (_, _) => Unit, ())
+        receiverRDD, startReceiverFunc, Seq(0), (_, _) => (), ())
       // We will keep restarting the receiver job until ReceiverTracker is stopped
       future.onComplete {
         case Success(_) =>
@@ -653,7 +653,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
     }
 
     /** Send stop signal to the receivers. */
-    private def stopReceivers() {
+    private def stopReceivers(): Unit = {
       receiverTrackingInfos.values.flatMap(_.endpoint).foreach { _.send(StopReceiver) }
       logInfo("Sent stop signal to all " + receiverTrackingInfos.size + " receivers")
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
index b57f9b772f8c6..cc961bb268c9d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
@@ -70,33 +70,33 @@ case class StreamingListenerReceiverStopped(receiverInfo: ReceiverInfo)
 trait StreamingListener {
 
   /** Called when the streaming has been started */
-  def onStreamingStarted(streamingStarted: StreamingListenerStreamingStarted) { }
+  def onStreamingStarted(streamingStarted: StreamingListenerStreamingStarted): Unit = { }
 
   /** Called when a receiver has been started */
-  def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted) { }
+  def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted): Unit = { }
 
   /** Called when a receiver has reported an error */
-  def onReceiverError(receiverError: StreamingListenerReceiverError) { }
+  def onReceiverError(receiverError: StreamingListenerReceiverError): Unit = { }
 
   /** Called when a receiver has been stopped */
-  def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped) { }
+  def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped): Unit = { }
 
   /** Called when a batch of jobs has been submitted for processing. */
-  def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted) { }
+  def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted): Unit = { }
 
   /** Called when processing of a batch of jobs has started.  */
-  def onBatchStarted(batchStarted: StreamingListenerBatchStarted) { }
+  def onBatchStarted(batchStarted: StreamingListenerBatchStarted): Unit = { }
 
   /** Called when processing of a batch of jobs has completed. */
-  def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) { }
+  def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = { }
 
   /** Called when processing of a job of a batch has started. */
   def onOutputOperationStarted(
-      outputOperationStarted: StreamingListenerOutputOperationStarted) { }
+      outputOperationStarted: StreamingListenerOutputOperationStarted): Unit = { }
 
   /** Called when processing of a job of a batch has completed. */
   def onOutputOperationCompleted(
-      outputOperationCompleted: StreamingListenerOutputOperationCompleted) { }
+      outputOperationCompleted: StreamingListenerOutputOperationCompleted): Unit = { }
 }
 
 
@@ -110,18 +110,18 @@ class StatsReportListener(numBatchInfos: Int = 10) extends StreamingListener {
   // Queue containing latest completed batches
   val batchInfos = new Queue[BatchInfo]()
 
-  override def onBatchCompleted(batchStarted: StreamingListenerBatchCompleted) {
+  override def onBatchCompleted(batchStarted: StreamingListenerBatchCompleted): Unit = {
     batchInfos.enqueue(batchStarted.batchInfo)
     if (batchInfos.size > numBatchInfos) batchInfos.dequeue()
     printStats()
   }
 
-  def printStats() {
+  def printStats(): Unit = {
     showMillisDistribution("Total delay: ", _.totalDelay)
     showMillisDistribution("Processing time: ", _.processingDelay)
   }
 
-  def showMillisDistribution(heading: String, getMetric: BatchInfo => Option[Long]) {
+  def showMillisDistribution(heading: String, getMetric: BatchInfo => Option[Long]): Unit = {
     org.apache.spark.scheduler.StatsReportListener.showMillisDistribution(
       heading, extractDistribution(getMetric))
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
index 6a70bf7406b3c..8a10a62f0180b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
@@ -33,7 +33,7 @@ private[streaming] class StreamingListenerBus(sparkListenerBus: LiveListenerBus)
    * Post a StreamingListenerEvent to the Spark listener bus asynchronously. This event will be
    * dispatched to all StreamingListeners in the thread of the Spark listener bus.
    */
-  def post(event: StreamingListenerEvent) {
+  def post(event: StreamingListenerEvent): Unit = {
     sparkListenerBus.post(new WrappedStreamingListenerEvent(event))
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
index f1070e9029cb5..b5a0e92e69c04 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala
@@ -51,7 +51,7 @@ private[ui] abstract class BatchTableBase(tableId: String, batchInterval: Long)
 
   protected def baseRow(batch: BatchUIData): Seq[Node] = {
     val batchTime = batch.batchTime.milliseconds
-    val formattedBatchTime = UIUtils.formatBatchTime(batchTime, batchInterval)
+    val formattedBatchTime = SparkUIUtils.formatBatchTime(batchTime, batchInterval)
     val numRecords = batch.numRecords
     val schedulingDelay = batch.schedulingDelay
     val formattedSchedulingDelay = schedulingDelay.map(SparkUIUtils.formatDuration).getOrElse("-")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
index f3d2e478e9b2d..04cd063a28713 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
@@ -37,10 +37,13 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
   private def columns: Seq[Node] = {
     <th>Output Op Id</th>
       <th>Description</th>
-      <th>Output Op Duration</th>
+      <th>Output Op Duration {SparkUIUtils.tooltip("Time taken for all the jobs of this batch to" +
+        " finish processing from the time they were submitted.",
+        "top")}</th>
       <th>Status</th>
       <th>Job Id</th>
-      <th>Job Duration</th>
+      <th>Job Duration {SparkUIUtils.tooltip("Time taken from submission time to completion " +
+        "time of the job", "top")}</th>
       <th class="sorttable_nosort">Stages: Succeeded/Total</th>
       <th class="sorttable_nosort">Tasks (for all stages): Succeeded/Total</th>
       <th>Error</th>
@@ -322,7 +325,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
         throw new IllegalArgumentException(s"Missing id parameter")
       }
     val formattedBatchTime =
-      UIUtils.formatBatchTime(batchTime.milliseconds, streamingListener.batchDuration)
+      SparkUIUtils.formatBatchTime(batchTime.milliseconds, streamingListener.batchDuration)
 
     val batchUIData = streamingListener.getBatchUIData(batchTime).getOrElse {
       throw new IllegalArgumentException(s"Batch $formattedBatchTime does not exist")
@@ -381,7 +384,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
       <thead>
         <tr>
           <th>Input</th>
-          <th>Metadata</th>
+          <th>Metadata {SparkUIUtils.tooltip("Batch Input Details", "right")}</th>
         </tr>
       </thead>
       <tbody>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
index ed4c1e484efd2..de73762beb860 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
@@ -68,23 +68,23 @@ private[spark] class StreamingJobProgressListener(ssc: StreamingContext)
 
   val batchDuration = ssc.graph.batchDuration.milliseconds
 
-  override def onStreamingStarted(streamingStarted: StreamingListenerStreamingStarted) {
+  override def onStreamingStarted(streamingStarted: StreamingListenerStreamingStarted): Unit = {
     _startTime = streamingStarted.time
   }
 
-  override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted) {
+  override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted): Unit = {
     synchronized {
       receiverInfos(receiverStarted.receiverInfo.streamId) = receiverStarted.receiverInfo
     }
   }
 
-  override def onReceiverError(receiverError: StreamingListenerReceiverError) {
+  override def onReceiverError(receiverError: StreamingListenerReceiverError): Unit = {
     synchronized {
       receiverInfos(receiverError.receiverInfo.streamId) = receiverError.receiverInfo
     }
   }
 
-  override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped) {
+  override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped): Unit = {
     synchronized {
       receiverInfos(receiverStopped.receiverInfo.streamId) = receiverStopped.receiverInfo
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index d16611f412034..d47287b6077f8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -20,79 +20,10 @@ package org.apache.spark.streaming.ui
 import java.util.concurrent.TimeUnit
 import javax.servlet.http.HttpServletRequest
 
-import scala.collection.mutable.ArrayBuffer
 import scala.xml.{Node, Unparsed}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.ui._
-import org.apache.spark.ui.{UIUtils => SparkUIUtils}
-
-/**
- * A helper class to generate JavaScript and HTML for both timeline and histogram graphs.
- *
- * @param timelineDivId the timeline `id` used in the html `div` tag
- * @param histogramDivId the timeline `id` used in the html `div` tag
- * @param data the data for the graph
- * @param minX the min value of X axis
- * @param maxX the max value of X axis
- * @param minY the min value of Y axis
- * @param maxY the max value of Y axis
- * @param unitY the unit of Y axis
- * @param batchInterval if `batchInterval` is not None, we will draw a line for `batchInterval` in
- *                      the graph
- */
-private[ui] class GraphUIData(
-    timelineDivId: String,
-    histogramDivId: String,
-    data: Seq[(Long, Double)],
-    minX: Long,
-    maxX: Long,
-    minY: Double,
-    maxY: Double,
-    unitY: String,
-    batchInterval: Option[Double] = None) {
-
-  private var dataJavaScriptName: String = _
-
-  def generateDataJs(jsCollector: JsCollector): Unit = {
-    val jsForData = data.map { case (x, y) =>
-      s"""{"x": $x, "y": $y}"""
-    }.mkString("[", ",", "]")
-    dataJavaScriptName = jsCollector.nextVariableName
-    jsCollector.addPreparedStatement(s"var $dataJavaScriptName = $jsForData;")
-  }
-
-  def generateTimelineHtml(jsCollector: JsCollector): Seq[Node] = {
-    jsCollector.addPreparedStatement(s"registerTimeline($minY, $maxY);")
-    if (batchInterval.isDefined) {
-      jsCollector.addStatement(
-        "drawTimeline(" +
-          s"'#$timelineDivId', $dataJavaScriptName, $minX, $maxX, $minY, $maxY, '$unitY'," +
-          s" ${batchInterval.get}" +
-          ");")
-    } else {
-      jsCollector.addStatement(
-        s"drawTimeline('#$timelineDivId', $dataJavaScriptName, $minX, $maxX, $minY, $maxY," +
-          s" '$unitY');")
-    }
-    <div id={timelineDivId}></div>
-  }
-
-  def generateHistogramHtml(jsCollector: JsCollector): Seq[Node] = {
-    val histogramData = s"$dataJavaScriptName.map(function(d) { return d.y; })"
-    jsCollector.addPreparedStatement(s"registerHistogram($histogramData, $minY, $maxY);")
-    if (batchInterval.isDefined) {
-      jsCollector.addStatement(
-        "drawHistogram(" +
-          s"'#$histogramDivId', $histogramData, $minY, $maxY, '$unitY', ${batchInterval.get}" +
-          ");")
-    } else {
-      jsCollector.addStatement(
-        s"drawHistogram('#$histogramDivId', $histogramData, $minY, $maxY, '$unitY');")
-    }
-    <div id={histogramDivId}></div>
-  }
-}
+import org.apache.spark.ui.{GraphUIData, JsCollector, UIUtils => SparkUIUtils, WebUIPage}
 
 /**
  * A helper class for "scheduling delay", "processing time" and "total delay" to generate data that
@@ -165,8 +96,8 @@ private[ui] class StreamingPage(parent: StreamingTab)
   private def generateLoadResources(request: HttpServletRequest): Seq[Node] = {
     // scalastyle:off
     <script src={SparkUIUtils.prependBaseUri(request, "/static/d3.min.js")}></script>
-      <link rel="stylesheet" href={SparkUIUtils.prependBaseUri(request, "/static/streaming/streaming-page.css")} type="text/css"/>
-      <script src={SparkUIUtils.prependBaseUri(request, "/static/streaming/streaming-page.js")}></script>
+      <link rel="stylesheet" href={SparkUIUtils.prependBaseUri(request, "/static/streaming-page.css")} type="text/css"/>
+      <script src={SparkUIUtils.prependBaseUri(request, "/static/streaming-page.js")}></script>
     // scalastyle:on
   }
 
@@ -202,7 +133,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
   private def generateTimeMap(times: Seq[Long]): Seq[Node] = {
     val js = "var timeFormat = {};\n" + times.map { time =>
       val formattedTime =
-        UIUtils.formatBatchTime(time, listener.batchDuration, showYYYYMMSS = false)
+        SparkUIUtils.formatBatchTime(time, listener.batchDuration, showYYYYMMSS = false)
       s"timeFormat[$time] = '$formattedTime';"
     }.mkString("\n")
 
@@ -321,7 +252,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
                 if (hasStream) {
                   <span class="expand-input-rate">
                     <span class="expand-input-rate-arrow arrow-closed"></span>
-                    <a data-toggle="tooltip" title="Show/hide details of each receiver" data-placement="right">
+                    <a data-toggle="tooltip" title="Show/hide details of each receiver" data-placement="top">
                       <strong>Input Rate</strong>
                     </a>
                   </span>
@@ -351,7 +282,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         <tr>
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
-              <div><strong>Scheduling Delay {SparkUIUtils.tooltip("Time taken by Streaming scheduler to submit jobs of a batch", "right")}</strong></div>
+              <div><strong>Scheduling Delay {SparkUIUtils.tooltip("Time taken by Streaming scheduler to submit jobs of a batch", "top")}</strong></div>
               <div>Avg: {schedulingDelay.formattedAvg}</div>
             </div>
           </td>
@@ -361,7 +292,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         <tr>
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
-              <div><strong>Processing Time {SparkUIUtils.tooltip("Time taken to process all jobs of a batch", "right")}</strong></div>
+              <div><strong>Processing Time {SparkUIUtils.tooltip("Time taken to process all jobs of a batch", "top")}</strong></div>
               <div>Avg: {processingTime.formattedAvg}</div>
             </div>
           </td>
@@ -371,7 +302,7 @@ private[ui] class StreamingPage(parent: StreamingTab)
         <tr>
           <td style="vertical-align: middle;">
             <div style="width: 160px;">
-              <div><strong>Total Delay {SparkUIUtils.tooltip("Total time taken to handle a batch", "right")}</strong></div>
+              <div><strong>Total Delay {SparkUIUtils.tooltip("Total time taken to handle a batch", "top")}</strong></div>
               <div>Avg: {totalDelay.formattedAvg}</div>
             </div>
           </td>
@@ -545,52 +476,3 @@ private[ui] object StreamingPage {
 
 }
 
-/**
- * A helper class that allows the user to add JavaScript statements which will be executed when the
- * DOM has finished loading.
- */
-private[ui] class JsCollector {
-
-  private var variableId = 0
-
-  /**
-   * Return the next unused JavaScript variable name
-   */
-  def nextVariableName: String = {
-    variableId += 1
-    "v" + variableId
-  }
-
-  /**
-   * JavaScript statements that will execute before `statements`
-   */
-  private val preparedStatements = ArrayBuffer[String]()
-
-  /**
-   * JavaScript statements that will execute after `preparedStatements`
-   */
-  private val statements = ArrayBuffer[String]()
-
-  def addPreparedStatement(js: String): Unit = {
-    preparedStatements += js
-  }
-
-  def addStatement(js: String): Unit = {
-    statements += js
-  }
-
-  /**
-   * Generate a html snippet that will execute all scripts when the DOM has finished loading.
-   */
-  def toHtml: Seq[Node] = {
-    val js =
-      s"""
-         |$$(document).ready(function() {
-         |    ${preparedStatements.mkString("\n")}
-         |    ${statements.mkString("\n")}
-         |});""".stripMargin
-
-   <script>{Unparsed(js)}</script>
-  }
-}
-
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
index 13357db728701..d616b47117f1c 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
@@ -28,7 +28,7 @@ import org.apache.spark.ui.{SparkUI, SparkUITab}
 private[spark] class StreamingTab(val ssc: StreamingContext, sparkUI: SparkUI)
   extends SparkUITab(sparkUI, "streaming") with Logging {
 
-  private val STATIC_RESOURCE_DIR = "org/apache/spark/streaming/ui/static"
+  private val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
 
   val parent = sparkUI
   val listener = ssc.progressListener
@@ -36,12 +36,12 @@ private[spark] class StreamingTab(val ssc: StreamingContext, sparkUI: SparkUI)
   attachPage(new StreamingPage(this))
   attachPage(new BatchPage(this))
 
-  def attach() {
+  def attach(): Unit = {
     parent.attachTab(this)
     parent.addStaticHandler(STATIC_RESOURCE_DIR, "/static/streaming")
   }
 
-  def detach() {
+  def detach(): Unit = {
     parent.detachTab(this)
     parent.detachHandler("/static/streaming")
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
index c21912ab2816c..dc1af0a940ec7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.streaming.ui
 
-import java.text.SimpleDateFormat
-import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 
 import scala.xml.Node
 
 import org.apache.commons.text.StringEscapeUtils
 
+import org.apache.spark.ui.{ UIUtils => SparkUIUtils }
+
 private[streaming] object UIUtils {
 
   /**
@@ -78,59 +78,6 @@ private[streaming] object UIUtils {
     case TimeUnit.DAYS => milliseconds / 1000.0 / 60.0 / 60.0 / 24.0
   }
 
-  // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
-  private val batchTimeFormat = new ThreadLocal[SimpleDateFormat]() {
-    override def initialValue(): SimpleDateFormat =
-      new SimpleDateFormat("yyyy/MM/dd HH:mm:ss", Locale.US)
-  }
-
-  private val batchTimeFormatWithMilliseconds = new ThreadLocal[SimpleDateFormat]() {
-    override def initialValue(): SimpleDateFormat =
-      new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS", Locale.US)
-  }
-
-  /**
-   * If `batchInterval` is less than 1 second, format `batchTime` with milliseconds. Otherwise,
-   * format `batchTime` without milliseconds.
-   *
-   * @param batchTime the batch time to be formatted
-   * @param batchInterval the batch interval
-   * @param showYYYYMMSS if showing the `yyyy/MM/dd` part. If it's false, the return value wll be
-   *                     only `HH:mm:ss` or `HH:mm:ss.SSS` depending on `batchInterval`
-   * @param timezone only for test
-   */
-  def formatBatchTime(
-      batchTime: Long,
-      batchInterval: Long,
-      showYYYYMMSS: Boolean = true,
-      timezone: TimeZone = null): String = {
-    val oldTimezones =
-      (batchTimeFormat.get.getTimeZone, batchTimeFormatWithMilliseconds.get.getTimeZone)
-    if (timezone != null) {
-      batchTimeFormat.get.setTimeZone(timezone)
-      batchTimeFormatWithMilliseconds.get.setTimeZone(timezone)
-    }
-    try {
-      val formattedBatchTime =
-        if (batchInterval < 1000) {
-          batchTimeFormatWithMilliseconds.get.format(batchTime)
-        } else {
-          // If batchInterval >= 1 second, don't show milliseconds
-          batchTimeFormat.get.format(batchTime)
-        }
-      if (showYYYYMMSS) {
-        formattedBatchTime
-      } else {
-        formattedBatchTime.substring(formattedBatchTime.indexOf(' ') + 1)
-      }
-    } finally {
-      if (timezone != null) {
-        batchTimeFormat.get.setTimeZone(oldTimezones._1)
-        batchTimeFormatWithMilliseconds.get.setTimeZone(oldTimezones._2)
-      }
-    }
-  }
-
   def createOutputOperationFailureForUI(failure: String): String = {
     if (failure.startsWith("org.apache.spark.Spark")) {
       // SparkException or SparkDriverExecutionException
@@ -164,19 +111,7 @@ private[streaming] object UIUtils {
       } else {
         failureReason
       }
-    val details = if (isMultiline) {
-      // scalastyle:off
-      <span onclick="this.parentNode.querySelector('.stacktrace-details').classList.toggle('collapsed')"
-            class="expand-details">
-        +details
-      </span> ++
-        <div class="stacktrace-details collapsed">
-          <pre>{failureDetails}</pre>
-        </div>
-      // scalastyle:on
-    } else {
-      ""
-    }
+    val details = SparkUIUtils.detailsUINode(isMultiline, failureDetails)
 
     if (rowspan == 1) {
       <td valign="middle" style="max-width: 300px">{failureReasonSummary}{details}</td>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
index 21f3bbe40bfab..d33f83c819086 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -24,6 +24,7 @@ import java.util.concurrent.RejectedExecutionException
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.parallel.ExecutionContextTaskSupport
+import scala.collection.parallel.immutable.ParVector
 import scala.concurrent.{Await, ExecutionContext, Future}
 
 import org.apache.hadoop.conf.Configuration
@@ -313,7 +314,7 @@ private[streaming] object FileBasedWriteAheadLog {
     val groupSize = taskSupport.parallelismLevel.max(8)
 
     source.grouped(groupSize).flatMap { group =>
-      val parallelCollection = group.par
+      val parallelCollection = new ParVector(group.toVector)
       parallelCollection.tasksupport = taskSupport
       parallelCollection.map(handler)
     }.flatten
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogRandomReader.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogRandomReader.scala
index 56d4977da0b51..7af018f6d7561 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogRandomReader.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogRandomReader.scala
@@ -48,7 +48,7 @@ private[streaming] class FileBasedWriteAheadLogRandomReader(path: String, conf:
     instream.close()
   }
 
-  private def assertOpen() {
+  private def assertOpen(): Unit = {
     HdfsUtils.checkState(!closed, "Stream is closed. Create a new Reader to read from the file.")
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala
index 1f5c1d4369b53..40d8865b146db 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala
@@ -53,13 +53,13 @@ private[streaming] class FileBasedWriteAheadLogWriter(path: String, hadoopConf:
     stream.close()
   }
 
-  private def flush() {
+  private def flush(): Unit = {
     stream.hflush()
     // Useful for local file system where hflush/sync does not work (HADOOP-7844)
     stream.getWrappedStream.flush()
   }
 
-  private def assertOpen() {
+  private def assertOpen(): Unit = {
     HdfsUtils.checkState(!closed, "Stream is closed. Create a new Writer to write to file.")
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
index 8cb68b2be4ecf..146577214de17 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
@@ -40,7 +40,7 @@ private[streaming] object HdfsUtils {
         }
       } else {
         // we dont' want to use hdfs erasure coding, as that lacks support for append and hflush
-        SparkHadoopUtil.createNonECFile(dfs, dfsPath)
+        SparkHadoopUtil.createFile(dfs, dfsPath, false)
       }
     }
     stream
@@ -62,7 +62,7 @@ private[streaming] object HdfsUtils {
     }
   }
 
-  def checkState(state: Boolean, errorMsg: => String) {
+  def checkState(state: Boolean, errorMsg: => String): Unit = {
     if (!state) {
       throw new IllegalStateException(errorMsg)
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
index 342f20f47a39e..af1f19e9cd98b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
@@ -36,17 +36,17 @@ class RateLimitedOutputStream(out: OutputStream, desiredBytesPerSec: Int)
   private var lastSyncTime = System.nanoTime
   private var bytesWrittenSinceSync = 0L
 
-  override def write(b: Int) {
+  override def write(b: Int): Unit = {
     waitToWrite(1)
     out.write(b)
   }
 
-  override def write(bytes: Array[Byte]) {
+  override def write(bytes: Array[Byte]): Unit = {
     write(bytes, 0, bytes.length)
   }
 
   @tailrec
-  override final def write(bytes: Array[Byte], offset: Int, length: Int) {
+  override final def write(bytes: Array[Byte], offset: Int, length: Int): Unit = {
     val writeSize = math.min(length - offset, CHUNK_SIZE)
     if (writeSize > 0) {
       waitToWrite(writeSize)
@@ -55,16 +55,16 @@ class RateLimitedOutputStream(out: OutputStream, desiredBytesPerSec: Int)
     }
   }
 
-  override def flush() {
+  override def flush(): Unit = {
     out.flush()
   }
 
-  override def close() {
+  override def close(): Unit = {
     out.close()
   }
 
   @tailrec
-  private def waitToWrite(numBytes: Int) {
+  private def waitToWrite(numBytes: Int): Unit = {
     val now = System.nanoTime
     val elapsedNanosecs = math.max(now - lastSyncTime, 1)
     val rate = bytesWrittenSinceSync.toDouble * 1000000000 / elapsedNanosecs
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
index eb9996ece3779..9cdfdb8374322 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
@@ -96,7 +96,7 @@ object RawTextHelper {
    * Warms up the SparkContext in master and slave by running tasks to force JIT kick in
    * before real workload starts.
    */
-  def warmUp(sc: SparkContext) {
+  def warmUp(sc: SparkContext): Unit = {
     for (i <- 0 to 1) {
       sc.parallelize(1 to 200000, 1000)
         .map(_ % 1331).map(_.toString)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
index 9667af97f03bc..5d4fcf8bd1596 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.IntParam
  */
 private[streaming]
 object RawTextSender extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 4) {
       // scalastyle:off println
       System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
index 62e681e3e9646..3ffb2c12fb2dc 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
@@ -26,7 +26,7 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name:
 
   private val thread = new Thread("RecurringTimer - " + name) {
     setDaemon(true)
-    override def run() { loop }
+    override def run(): Unit = { loop }
   }
 
   @volatile private var prevTime = -1L
@@ -100,7 +100,7 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name:
   /**
    * Repeatedly call the callback every interval.
    */
-  private def loop() {
+  private def loop(): Unit = {
     try {
       while (!stopped) {
         triggerActionForNextInterval()
@@ -115,11 +115,11 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name:
 private[streaming]
 object RecurringTimer extends Logging {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     var lastRecurTime = 0L
     val period = 1000
 
-    def onRecur(time: Long) {
+    def onRecur(time: Long): Unit = {
       val currentTime = System.currentTimeMillis()
       logInfo("" + currentTime + ": " + (currentTime - lastRecurTime))
       lastRecurTime = currentTime
diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaWriteAheadLogSuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaWriteAheadLogSuite.java
index 3f4e6ddb216ec..7037de1526c9c 100644
--- a/streaming/src/test/java/org/apache/spark/streaming/JavaWriteAheadLogSuite.java
+++ b/streaming/src/test/java/org/apache/spark/streaming/JavaWriteAheadLogSuite.java
@@ -120,6 +120,6 @@ public void testCustomWAL() {
     while (dataIterator.hasNext()) {
       readData.add(JavaUtils.bytesToString(dataIterator.next()));
     }
-    Assert.assertEquals(readData, Arrays.asList("data3", "data4"));
+    Assert.assertEquals(Arrays.asList("data3", "data4"), readData);
   }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index 287a43ac689ed..742eae50e159b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -732,7 +732,7 @@ class BasicOperationsSuite extends TestSuiteBase {
         val blockRdds = new mutable.HashMap[Time, BlockRDD[_]]
         val persistentRddIds = new mutable.HashMap[Time, Int]
 
-        def collectRddInfo() { // get all RDD info required for verification
+        def collectRddInfo(): Unit = { // get all RDD info required for verification
           networkStream.generatedRDDs.foreach { case (time, rdd) =>
             blockRdds(time) = rdd.asInstanceOf[BlockRDD[_]]
           }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 55fdd4c82ac75..238ef1e2367a0 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -39,8 +39,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.scheduler._
-import org.apache.spark.util.{Clock, ManualClock, MutableURLClassLoader, ResetSystemProperties,
-  Utils}
+import org.apache.spark.util.{Clock, ManualClock, MutableURLClassLoader, ResetSystemProperties, Utils}
 
 /**
  * A input stream that records the times of restore() invoked
@@ -55,7 +54,7 @@ class CheckpointInputDStream(_ssc: StreamingContext) extends InputDStream[Int](_
   class FileInputDStreamCheckpointData extends DStreamCheckpointData(this) {
     @transient
     var restoredTimes = 0
-    override def restore() {
+    override def restore(): Unit = {
       restoredTimes += 1
       super.restore()
     }
@@ -85,7 +84,7 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
       numBatchesBeforeRestart: Int,
       batchDuration: Duration = Milliseconds(500),
       stopSparkContextAfterTest: Boolean = true
-    ) {
+    ): Unit = {
     require(numBatchesBeforeRestart < expectedOutput.size,
       "Number of batches before context restart less than number of expected output " +
         "(i.e. number of total batches to run)")
@@ -206,24 +205,21 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
  * the checkpointing of a DStream's RDDs as well as the checkpointing of
  * the whole DStream graph.
  */
-class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
+class CheckpointSuite extends TestSuiteBase with LocalStreamingContext with DStreamCheckpointTester
   with ResetSystemProperties {
 
-  var ssc: StreamingContext = null
-
   override def batchDuration: Duration = Milliseconds(500)
 
-  override def beforeFunction() {
-    super.beforeFunction()
+  override def beforeEach(): Unit = {
+    super.beforeEach()
     Utils.deleteRecursively(new File(checkpointDir))
   }
 
-  override def afterFunction() {
+  override def afterEach(): Unit = {
     try {
-      if (ssc != null) { ssc.stop() }
       Utils.deleteRecursively(new File(checkpointDir))
     } finally {
-      super.afterFunction()
+      super.afterEach()
     }
   }
 
@@ -241,8 +237,8 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
     val stateStreamCheckpointInterval = Seconds(1)
     val fs = FileSystem.getLocal(new Configuration())
     // this ensure checkpointing occurs at least once
-    val firstNumBatches = (stateStreamCheckpointInterval / batchDuration).toLong * 2
-    val secondNumBatches = firstNumBatches
+    val firstNumBatches = (stateStreamCheckpointInterval / batchDuration).toLong
+    val secondNumBatches = firstNumBatches * 2
 
     // Setup the streams
     val input = (1 to 10).map(_ => Seq("a")).toSeq
@@ -255,17 +251,28 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
       .checkpoint(stateStreamCheckpointInterval)
       .map(t => (t._1, t._2))
     }
-    var ssc = setupStreams(input, operation)
+    ssc = setupStreams(input, operation)
     var stateStream = ssc.graph.getOutputStreams().head.dependencies.head.dependencies.head
 
+    def waitForCompletionOfBatch(numBatches: Long): Unit = {
+      eventually(timeout(10.seconds), interval(50.millis)) {
+        val lastProcessed = ssc.scheduler.jobGenerator.lastProcessedBatch
+        assert(lastProcessed != null &&
+          lastProcessed >= Time(batchDuration.milliseconds * numBatches))
+      }
+    }
+
     // Run till a time such that at least one RDD in the stream should have been checkpointed,
     // then check whether some RDD has been checkpointed or not
     ssc.start()
     advanceTimeWithRealDelay(ssc, firstNumBatches)
+    waitForCompletionOfBatch(firstNumBatches)
+
     logInfo("Checkpoint data of state stream = \n" + stateStream.checkpointData)
-    assert(!stateStream.checkpointData.currentCheckpointFiles.isEmpty,
+    var currCheckpointFiles = stateStream.checkpointData.currentCheckpointFiles
+    assert(!currCheckpointFiles.isEmpty,
       "No checkpointed RDDs in state stream before first failure")
-    stateStream.checkpointData.currentCheckpointFiles.foreach {
+    currCheckpointFiles.foreach {
       case (time, file) =>
         assert(fs.exists(new Path(file)), "Checkpoint file '" + file +"' for time " + time +
             " for state stream before first failure does not exist")
@@ -273,8 +280,10 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
 
     // Run till a further time such that previous checkpoint files in the stream would be deleted
     // and check whether the earlier checkpoint files are deleted
-    val checkpointFiles = stateStream.checkpointData.currentCheckpointFiles.map(x => new File(x._2))
-    advanceTimeWithRealDelay(ssc, secondNumBatches)
+    currCheckpointFiles = stateStream.checkpointData.currentCheckpointFiles
+    val checkpointFiles = currCheckpointFiles.map(x => new File(x._2))
+    advanceTimeWithRealDelay(ssc, secondNumBatches - firstNumBatches)
+    waitForCompletionOfBatch(secondNumBatches)
     checkpointFiles.foreach(file =>
       assert(!file.exists, "Checkpoint file '" + file + "' was not deleted"))
     ssc.stop()
@@ -287,14 +296,15 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
     assert(!stateStream.generatedRDDs.isEmpty,
       "No restored RDDs in state stream after recovery from first failure")
 
-
     // Run one batch to generate a new checkpoint file and check whether some RDD
     // is present in the checkpoint data or not
     ssc.start()
     advanceTimeWithRealDelay(ssc, 1)
-    assert(!stateStream.checkpointData.currentCheckpointFiles.isEmpty,
+    waitForCompletionOfBatch(secondNumBatches + 1)
+    currCheckpointFiles = stateStream.checkpointData.currentCheckpointFiles
+    assert(!currCheckpointFiles.isEmpty,
       "No checkpointed RDDs in state stream before second failure")
-    stateStream.checkpointData.currentCheckpointFiles.foreach {
+    currCheckpointFiles.foreach {
       case (time, file) =>
         assert(fs.exists(new Path(file)), "Checkpoint file '" + file +"' for time " + time +
           " for state stream before seconds failure does not exist")
@@ -410,6 +420,33 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
     assert(restoredConf1.get("spark.driver.port") !== "9999")
   }
 
+  test("SPARK-30199 get ui port and blockmanager port") {
+    val conf = Map("spark.ui.port" -> "30001", "spark.blockManager.port" -> "30002")
+    conf.foreach { case (k, v) => System.setProperty(k, v) }
+    ssc = new StreamingContext(master, framework, batchDuration)
+    conf.foreach { case (k, v) => assert(ssc.conf.get(k) === v) }
+
+    val cp = new Checkpoint(ssc, Time(1000))
+    ssc.stop()
+
+    // Serialize/deserialize to simulate write to storage and reading it back
+    val newCp = Utils.deserialize[Checkpoint](Utils.serialize(cp))
+
+    val newCpConf = newCp.createSparkConf()
+    conf.foreach { case (k, v) => assert(newCpConf.contains(k) && newCpConf.get(k) === v) }
+
+    // Check if all the parameters have been restored
+    ssc = new StreamingContext(null, newCp, null)
+    conf.foreach { case (k, v) => assert(ssc.conf.get(k) === v) }
+    ssc.stop()
+
+    // If port numbers are not set in system property, these parameters should not be presented
+    // in the newly recovered conf.
+    conf.foreach(kv => System.clearProperty(kv._1))
+    val newCpConf1 = newCp.createSparkConf()
+    conf.foreach { case (k, _) => assert(!newCpConf1.contains(k)) }
+  }
+
   // This tests whether the system can recover from a master failure with simple
   // non-stateful operations. This assumes as reliable, replayable input
   // source - TestInputDStream.
@@ -847,6 +884,23 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
     checkpointWriter.stop()
   }
 
+  test("SPARK-28912: Fix MatchError in getCheckpointFiles") {
+    withTempDir { tempDir =>
+      val fs = FileSystem.get(tempDir.toURI, new Configuration)
+      val checkpointDir = tempDir.getAbsolutePath + "/checkpoint-01"
+
+      assert(Checkpoint.getCheckpointFiles(checkpointDir, Some(fs)).length === 0)
+
+      // Ignore files whose parent path match.
+      fs.create(new Path(checkpointDir, "this-is-matched-before-due-to-parent-path")).close()
+      assert(Checkpoint.getCheckpointFiles(checkpointDir, Some(fs)).length === 0)
+
+      // Ignore directories whose names match.
+      fs.mkdirs(new Path(checkpointDir, "checkpoint-1000000000"))
+      assert(Checkpoint.getCheckpointFiles(checkpointDir, Some(fs)).length === 0)
+    }
+  }
+
   test("SPARK-6847: stack overflow when updateStateByKey is followed by a checkpointed dstream") {
     // In this test, there are two updateStateByKey operators. The RDD DAG is as follows:
     //
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
index 2ab600ab817e0..0576bf560f30e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
@@ -29,24 +29,14 @@ import org.apache.spark.util.ReturnStatementInClosureException
 /**
  * Test that closures passed to DStream operations are actually cleaned.
  */
-class DStreamClosureSuite extends SparkFunSuite with BeforeAndAfterAll {
-  private var ssc: StreamingContext = null
+class DStreamClosureSuite extends SparkFunSuite with LocalStreamingContext with BeforeAndAfterAll {
+  override protected def beforeEach(): Unit = {
+    super.beforeEach()
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
     val sc = new SparkContext("local", "test")
     ssc = new StreamingContext(sc, Seconds(1))
   }
 
-  override def afterAll(): Unit = {
-    try {
-      ssc.stop(stopSparkContext = true)
-      ssc = null
-    } finally {
-      super.afterAll()
-    }
-  }
-
   test("user provided closures are actually cleaned") {
     val dstream = new DummyInputDStream(ssc)
     val pairDstream = dstream.map { i => (i, i) }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
index 94f1bcebc3a39..36036fcd44b04 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
@@ -19,39 +19,38 @@ package org.apache.spark.streaming
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
-
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.streaming.dstream.DStream
-import org.apache.spark.streaming.ui.UIUtils
+import org.apache.spark.ui.{UIUtils => SparkUIUtils}
 import org.apache.spark.util.ManualClock
 
 /**
  * Tests whether scope information is passed from DStream operations to RDDs correctly.
  */
-class DStreamScopeSuite extends SparkFunSuite with BeforeAndAfter with BeforeAndAfterAll {
-  private var ssc: StreamingContext = null
-  private val batchDuration: Duration = Seconds(1)
+class DStreamScopeSuite
+  extends SparkFunSuite
+  with LocalStreamingContext {
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
     val conf = new SparkConf().setMaster("local").setAppName("test")
     conf.set("spark.streaming.clock", classOf[ManualClock].getName())
+    val batchDuration: Duration = Seconds(1)
     ssc = new StreamingContext(new SparkContext(conf), batchDuration)
+
+    assertPropertiesNotSet()
   }
 
-  override def afterAll(): Unit = {
+  override def afterEach(): Unit = {
     try {
-      ssc.stop(stopSparkContext = true)
+      assertPropertiesNotSet()
     } finally {
-      super.afterAll()
+      super.afterEach()
     }
   }
 
-  before { assertPropertiesNotSet() }
-  after { assertPropertiesNotSet() }
-
   test("dstream without scope") {
     val dummyStream = new DummyDStream(ssc)
     dummyStream.initialize(Time(0))
@@ -213,7 +212,7 @@ class DStreamScopeSuite extends SparkFunSuite with BeforeAndAfter with BeforeAnd
       rddScope: RDDOperationScope,
       batchTime: Long): Unit = {
     val (baseScopeId, baseScopeName) = (baseScope.id, baseScope.name)
-    val formattedBatchTime = UIUtils.formatBatchTime(
+    val formattedBatchTime = SparkUIUtils.formatBatchTime(
       batchTime, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false)
     assert(rddScope.id === s"${baseScopeId}_$batchTime")
     assert(rddScope.name.replaceAll("\\n", " ") === s"$baseScopeName @ $formattedBatchTime")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 0792770442055..53ef840864bce 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -449,9 +449,9 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
 
       class TestInputDStream extends InputDStream[String](ssc) {
-        def start() {}
+        def start(): Unit = {}
 
-        def stop() {}
+        def stop(): Unit = {}
 
         def compute(validTime: Time): Option[RDD[String]] = None
       }
@@ -473,7 +473,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     }
   }
 
-  def testFileStream(newFilesOnly: Boolean) {
+  def testFileStream(newFilesOnly: Boolean): Unit = {
     withTempDir { testDir =>
       val batchDuration = Seconds(2)
       // Create a file that exists before the StreamingContext is created:
@@ -537,7 +537,7 @@ class TestServer(portToBind: Int = 0) extends Logging with Assertions {
   private val startLatch = new CountDownLatch(1)
 
   val servingThread = new Thread() {
-    override def run() {
+    override def run(): Unit = {
       try {
         while (true) {
           logInfo("Accepting connections on port " + port)
@@ -608,9 +608,9 @@ class TestServer(portToBind: Int = 0) extends Logging with Assertions {
     }
   }
 
-  def send(msg: String) { queue.put(msg) }
+  def send(msg: String): Unit = { queue.put(msg) }
 
-  def stop() { servingThread.interrupt() }
+  def stop(): Unit = { servingThread.interrupt() }
 
   def port: Int = serverSocket.getLocalPort
 }
@@ -621,10 +621,10 @@ class MultiThreadTestReceiver(numThreads: Int, numRecordsPerThread: Int)
   lazy val executorPool = Executors.newFixedThreadPool(numThreads)
   lazy val finishCount = new AtomicInteger(0)
 
-  def onStart() {
+  def onStart(): Unit = {
     (1 to numThreads).map(threadId => {
       val runnable = new Runnable {
-        def run() {
+        def run(): Unit = {
           (1 to numRecordsPerThread).foreach(i =>
             store(threadId * numRecordsPerThread + i) )
           if (finishCount.incrementAndGet == numThreads) {
@@ -637,7 +637,7 @@ class MultiThreadTestReceiver(numThreads: Int, numRecordsPerThread: Int)
     })
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     executorPool.shutdown()
   }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/LocalStreamingContext.scala b/streaming/src/test/scala/org/apache/spark/streaming/LocalStreamingContext.scala
new file mode 100644
index 0000000000000..5bf24a9705dc9
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/LocalStreamingContext.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import org.scalatest.{BeforeAndAfterEach, Suite}
+
+import org.apache.spark.SparkContext
+import org.apache.spark.internal.Logging
+
+/**
+ * Manages a local `ssc` `StreamingContext` variable, correctly stopping it after each test.
+ * Note that it also stops active SparkContext if `stopSparkContext` is set to true (default).
+ * In most cases you may want to leave it, to isolate environment for SparkContext in each test.
+ */
+trait LocalStreamingContext extends BeforeAndAfterEach { self: Suite =>
+
+  @transient var ssc: StreamingContext = _
+  @transient var stopSparkContext: Boolean = true
+
+  override def afterEach(): Unit = {
+    try {
+      resetStreamingContext()
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  def resetStreamingContext(): Unit = {
+    LocalStreamingContext.stop(ssc, stopSparkContext)
+    ssc = null
+  }
+}
+
+object LocalStreamingContext extends Logging {
+  def stop(ssc: StreamingContext, stopSparkContext: Boolean): Unit = {
+    try {
+      if (ssc != null) {
+        ssc.stop(stopSparkContext = stopSparkContext)
+      }
+    } finally {
+      if (stopSparkContext) {
+        ensureNoActiveSparkContext()
+      }
+    }
+  }
+
+  /**
+   * Clean up active SparkContext: try to stop first if there's an active SparkContext.
+   * If it fails to stop, log warning message and clear active SparkContext to avoid
+   * interfere between tests.
+   */
+  def ensureNoActiveSparkContext(): Unit = {
+    // if SparkContext is still active, try to clean up
+    SparkContext.getActive match {
+      case Some(sc) =>
+        try {
+          sc.stop()
+        } catch {
+          case e: Throwable =>
+            logError("Exception trying to stop SparkContext, clear active SparkContext...", e)
+            SparkContext.clearActiveContext()
+            throw e
+        }
+      case _ =>
+    }
+  }
+
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
index 06c0c2aa97ee1..b2b8d2f41fc80 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
@@ -23,46 +23,36 @@ import java.util.concurrent.ConcurrentLinkedQueue
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
 import org.scalatest.PrivateMethodTester._
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.streaming.dstream.{DStream, InternalMapWithStateDStream, MapWithStateDStream, MapWithStateDStreamImpl}
 import org.apache.spark.util.{ManualClock, Utils}
 
-class MapWithStateSuite extends SparkFunSuite
-  with DStreamCheckpointTester with BeforeAndAfterAll with BeforeAndAfter {
+class MapWithStateSuite extends SparkFunSuite with LocalStreamingContext
+  with DStreamCheckpointTester {
 
   private var sc: SparkContext = null
   protected var checkpointDir: File = null
   protected val batchDuration = Seconds(1)
 
-  before {
-    StreamingContext.getActive().foreach { _.stop(stopSparkContext = false) }
-    checkpointDir = Utils.createTempDir(namePrefix = "checkpoint")
-  }
+  override def beforeEach(): Unit = {
+    super.beforeEach()
 
-  after {
-    StreamingContext.getActive().foreach { _.stop(stopSparkContext = false) }
-    if (checkpointDir != null) {
-      Utils.deleteRecursively(checkpointDir)
-    }
-  }
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
     val conf = new SparkConf().setMaster("local").setAppName("MapWithStateSuite")
     conf.set("spark.streaming.clock", classOf[ManualClock].getName())
     sc = new SparkContext(conf)
+
+    checkpointDir = Utils.createTempDir(namePrefix = "checkpoint")
   }
 
-  override def afterAll(): Unit = {
+  override def afterEach(): Unit = {
     try {
-      if (sc != null) {
-        sc.stop()
+      if (checkpointDir != null) {
+        Utils.deleteRecursively(checkpointDir)
       }
     } finally {
-      super.afterAll()
+      super.afterEach()
     }
   }
 
@@ -446,7 +436,8 @@ class MapWithStateSuite extends SparkFunSuite
   }
 
   test("mapWithState - checkpoint durations") {
-    val privateMethod = PrivateMethod[InternalMapWithStateDStream[_, _, _, _]]('internalStream)
+    val privateMethod =
+      PrivateMethod[InternalMapWithStateDStream[_, _, _, _]](Symbol("internalStream"))
 
     def testCheckpointDuration(
         batchDuration: Duration,
@@ -571,7 +562,7 @@ class MapWithStateSuite extends SparkFunSuite
     (collectedOutputs.asScala.toSeq, collectedStateSnapshots.asScala.toSeq)
   }
 
-  private def assert[U](expected: Seq[Seq[U]], collected: Seq[Seq[U]], typ: String) {
+  private def assert[U](expected: Seq[Seq[U]], collected: Seq[Seq[U]], typ: String): Unit = {
     val debugString = "\nExpected:\n" + expected.mkString("\n") +
       "\nCollected:\n" + collected.mkString("\n")
     assert(expected.size === collected.size,
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
index cf8dd10571f47..d0a5ababc7cac 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
@@ -30,6 +30,7 @@ import scala.util.Random
 import com.google.common.io.Files
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
+import org.scalatest.Assertions._
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.streaming.dstream.DStream
@@ -42,7 +43,7 @@ object MasterFailureTest extends Logging {
   @volatile var killCount = 0
   @volatile var setupCalled = false
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // scalastyle:off println
     if (args.size < 2) {
       println(
@@ -64,7 +65,7 @@ object MasterFailureTest extends Logging {
     // scalastyle:on println
   }
 
-  def testMap(directory: String, numBatches: Int, batchDuration: Duration) {
+  def testMap(directory: String, numBatches: Int, batchDuration: Duration): Unit = {
     // Input: time=1 ==> [ 1 ] , time=2 ==> [ 2 ] , time=3 ==> [ 3 ] , ...
     val input = (1 to numBatches).map(_.toString).toSeq
     // Expected output: time=1 ==> [ 1 ] , time=2 ==> [ 2 ] , time=3 ==> [ 3 ] , ...
@@ -86,7 +87,7 @@ object MasterFailureTest extends Logging {
   }
 
 
-  def testUpdateStateByKey(directory: String, numBatches: Int, batchDuration: Duration) {
+  def testUpdateStateByKey(directory: String, numBatches: Int, batchDuration: Duration): Unit = {
     // Input: time=1 ==> [ a ] , time=2 ==> [ a, a ] , time=3 ==> [ a, a, a ] , ...
     val input = (1 to numBatches).map(i => (1 to i).map(_ => "a").mkString(" ")).toSeq
     // Expected output: time=1 ==> [ (a, 1) ] , time=2 ==> [ (a, 3) ] , time=3 ==> [ (a,6) ] , ...
@@ -293,7 +294,7 @@ object MasterFailureTest extends Logging {
    * duplicate batch outputs of values from the `output`. As a result, the
    * expected output should not have consecutive batches with the same values as output.
    */
-  private def verifyOutput[T: ClassTag](output: Seq[T], expectedOutput: Seq[T]) {
+  private def verifyOutput[T: ClassTag](output: Seq[T], expectedOutput: Seq[T]): Unit = {
     // Verify whether expected outputs do not consecutive batches with same output
     for (i <- 0 until expectedOutput.size - 1) {
       assert(expectedOutput(i) != expectedOutput(i + 1),
@@ -315,7 +316,7 @@ object MasterFailureTest extends Logging {
   }
 
   /** Resets counter to prepare for the test */
-  private def reset() {
+  private def reset(): Unit = {
     killed = false
     killCount = 0
     setupCalled = false
@@ -328,7 +329,7 @@ object MasterFailureTest extends Logging {
 private[streaming]
 class KillingThread(ssc: StreamingContext, maxKillWaitTime: Long) extends Thread with Logging {
 
-  override def run() {
+  override def run(): Unit = {
     try {
       // If it is the first killing, then allow the first checkpoint to be created
       var minKillWaitTime = if (MasterFailureTest.killCount == 0) 5000 else 2000
@@ -362,7 +363,7 @@ private[streaming]
 class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
   extends Thread with Logging {
 
-  override def run() {
+  override def run(): Unit = {
     val localTestDir = Utils.createTempDir()
     var fs = testDir.getFileSystem(new Configuration())
     val maxTries = 3
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index c8f424af9af01..0976494b6d094 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.streaming
 import java.io.File
 import java.nio.ByteBuffer
 
+import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 import scala.reflect.ClassTag
@@ -87,9 +88,12 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
     rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr)
     conf.set("spark.driver.port", rpcEnv.address.port.toString)
 
+    val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]()
     blockManagerMaster = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager",
       new BlockManagerMasterEndpoint(rpcEnv, true, conf,
-        new LiveListenerBus(conf), None)), conf, true)
+        new LiveListenerBus(conf), None, blockManagerInfo)),
+      rpcEnv.setupEndpoint("blockmanagerHeartbeat",
+      new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), conf, true)
 
     storageLevel = StorageLevel.MEMORY_ONLY_SER
     blockManager = createBlockManager(blockManagerSize, conf)
@@ -242,7 +246,8 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
     }
   }
 
-  private def testCountWithBlockManagerBasedBlockHandler(isBlockManagerBasedBlockHandler: Boolean) {
+  private def testCountWithBlockManagerBasedBlockHandler(
+      isBlockManagerBasedBlockHandler: Boolean): Unit = {
     // ByteBufferBlock-MEMORY_ONLY
     testRecordcount(isBlockManagerBasedBlockHandler, StorageLevel.MEMORY_ONLY,
       ByteBufferBlock(ByteBuffer.wrap(Array.tabulate(100)(i => i.toByte))), blockManager, None)
@@ -298,7 +303,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
       receivedBlock: ReceivedBlock,
       bManager: BlockManager,
       expectedNumRecords: Option[Long]
-      ) {
+      ): Unit = {
     blockManager = bManager
     storageLevel = sLevel
     var bId: StreamBlockId = null
@@ -335,10 +340,11 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
    * using the given verification function
    */
   private def testBlockStoring(receivedBlockHandler: ReceivedBlockHandler)
-      (verifyFunc: (Seq[String], Seq[StreamBlockId], Seq[ReceivedBlockStoreResult]) => Unit) {
+      (verifyFunc: (Seq[String], Seq[StreamBlockId], Seq[ReceivedBlockStoreResult]) => Unit)
+      : Unit = {
     val data = Seq.tabulate(100) { _.toString }
 
-    def storeAndVerify(blocks: Seq[ReceivedBlock]) {
+    def storeAndVerify(blocks: Seq[ReceivedBlock]): Unit = {
       blocks should not be empty
       val (blockIds, storeResults) = storeBlocks(receivedBlockHandler, blocks)
       withClue(s"Testing with ${blocks.head.getClass.getSimpleName}s:") {
@@ -361,7 +367,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
   }
 
   /** Test error handling when blocks that cannot be stored */
-  private def testErrorHandling(receivedBlockHandler: ReceivedBlockHandler) {
+  private def testErrorHandling(receivedBlockHandler: ReceivedBlockHandler): Unit = {
     // Handle error in iterator (e.g. divide-by-zero error)
     intercept[Exception] {
       val iterator = (10 to (-10, -1)).toIterator.map { _ / 0 }
@@ -376,12 +382,14 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
   }
 
   /** Instantiate a BlockManagerBasedBlockHandler and run a code with it */
-  private def withBlockManagerBasedBlockHandler(body: BlockManagerBasedBlockHandler => Unit) {
+  private def withBlockManagerBasedBlockHandler(
+      body: BlockManagerBasedBlockHandler => Unit): Unit = {
     body(new BlockManagerBasedBlockHandler(blockManager, storageLevel))
   }
 
   /** Instantiate a WriteAheadLogBasedBlockHandler and run a code with it */
-  private def withWriteAheadLogBasedBlockHandler(body: WriteAheadLogBasedBlockHandler => Unit) {
+  private def withWriteAheadLogBasedBlockHandler(
+      body: WriteAheadLogBasedBlockHandler => Unit): Unit = {
     require(WriteAheadLogUtils.getRollingIntervalSecs(conf, isDriver = false) === 1)
     val receivedBlockHandler = new WriteAheadLogBasedBlockHandler(blockManager, serializerManager,
       1, storageLevel, conf, hadoopConf, tempDirectory.toString, manualClock)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
index 0b15f00eba499..368411cc2214b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
@@ -184,7 +184,7 @@ class ReceivedBlockTrackerSuite
     // Set the time increment level to twice the rotation interval so that every increment creates
     // a new log file
 
-    def incrementTime() {
+    def incrementTime(): Unit = {
       val timeIncrementMillis = 2000L
       manualClock.advance(timeIncrementMillis)
     }
@@ -197,7 +197,7 @@ class ReceivedBlockTrackerSuite
     }
 
     // Print the data present in the log ahead files in the log directory
-    def printLogFiles(message: String) {
+    def printLogFiles(message: String): Unit = {
       val fileContents = getWriteAheadLogFiles().map { file =>
         (s"\n>>>>> $file: <<<<<\n${getWrittenLogData(file).mkString("\n")}")
       }.mkString("\n")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
index 0349e11224cfc..5e2ce25c7c441 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.streaming
 
 import scala.util.Random
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.rdd.BlockRDD
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
@@ -30,15 +28,9 @@ import org.apache.spark.streaming.receiver.{BlockManagerBasedStoreResult, Receiv
 import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
 import org.apache.spark.streaming.util.{WriteAheadLogRecordHandle, WriteAheadLogUtils}
 
-class ReceiverInputDStreamSuite extends TestSuiteBase with BeforeAndAfterAll {
-
-  override def afterAll(): Unit = {
-    try {
-      StreamingContext.getActive().foreach(_.stop())
-    } finally {
-      super.afterAll()
-    }
-  }
+class ReceiverInputDStreamSuite
+  extends TestSuiteBase
+  with LocalStreamingContext {
 
   testWithoutWAL("createBlockRDD creates empty BlockRDD when no block info") { receiverStream =>
     val rdd = receiverStream.createBlockRDD(Time(0), Seq.empty)
@@ -127,7 +119,7 @@ class ReceiverInputDStreamSuite extends TestSuiteBase with BeforeAndAfterAll {
     conf.setMaster("local[4]").setAppName("ReceiverInputDStreamSuite")
     conf.set(WriteAheadLogUtils.RECEIVER_WAL_ENABLE_CONF_KEY, enableWAL.toString)
     require(WriteAheadLogUtils.enableReceiverLog(conf) === enableWAL)
-    val ssc = new StreamingContext(conf, Seconds(1))
+    ssc = new StreamingContext(conf, Seconds(1))
     val receiverStream = new ReceiverInputDStream[Int](ssc) {
       override def getReceiver(): Receiver[Int] = null
     }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
index 6b664b7a7dfd4..b07fd733953db 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -52,7 +52,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
 
     // Thread that runs the executor
     val executingThread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         executor.start()
         executorStarted.release(1)
         executor.awaitTermination()
@@ -73,7 +73,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     executorStarted.acquire()
 
     // Verify that receiver was started
-    assert(receiver.onStartCalled)
+    assert(receiver.callsRecorder.calls === Seq("onStart"))
     assert(executor.isReceiverStarted)
     assert(receiver.isStarted)
     assert(!receiver.isStopped())
@@ -106,19 +106,22 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     assert(executor.errors.head.eq(exception))
 
     // Verify restarting actually stops and starts the receiver
-    receiver.restart("restarting", null, 600)
-    eventually(timeout(300.milliseconds), interval(10.milliseconds)) {
-      // receiver will be stopped async
-      assert(receiver.isStopped)
-      assert(receiver.onStopCalled)
-    }
-    eventually(timeout(1.second), interval(10.milliseconds)) {
-      // receiver will be started async
-      assert(receiver.onStartCalled)
-      assert(executor.isReceiverStarted)
+    executor.callsRecorder.reset()
+    receiver.callsRecorder.reset()
+    receiver.restart("restarting", null, 100)
+    eventually(timeout(10.seconds), interval(10.milliseconds)) {
+      // below verification ensures for now receiver is already restarted
       assert(receiver.isStarted)
       assert(!receiver.isStopped)
       assert(receiver.receiving)
+
+      // both receiver supervisor and receiver should be stopped first, and started
+      assert(executor.callsRecorder.calls === Seq("onReceiverStop", "onReceiverStart"))
+      assert(receiver.callsRecorder.calls === Seq("onStop", "onStart"))
+
+      // check whether the delay between stop and start is respected
+      assert(executor.callsRecorder.timestamps.reverse.reduceLeft { _ - _ } >= 100)
+      assert(receiver.callsRecorder.timestamps.reverse.reduceLeft { _ - _ } >= 100)
     }
 
     // Verify that stopping actually stops the thread
@@ -229,7 +232,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
       }
     }
 
-    def printLogFiles(message: String, files: Seq[String]) {
+    def printLogFiles(message: String, files: Seq[String]): Unit = {
       logInfo(s"$message (${files.size} files):\n" + files.mkString("\n"))
     }
 
@@ -290,42 +293,53 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     val arrayBuffers = new ArrayBuffer[ArrayBuffer[_]]
     val errors = new ArrayBuffer[Throwable]
 
+    // tracks calls of "onReceiverStart", "onReceiverStop"
+    val callsRecorder = new MethodsCallRecorder()
+
     /** Check if all data structures are clean */
     def isAllEmpty: Boolean = {
       singles.isEmpty && byteBuffers.isEmpty && iterators.isEmpty &&
         arrayBuffers.isEmpty && errors.isEmpty
     }
 
-    def pushSingle(data: Any) {
+    def pushSingle(data: Any): Unit = {
       singles += data
     }
 
     def pushBytes(
         bytes: ByteBuffer,
         optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]) {
+        optionalBlockId: Option[StreamBlockId]): Unit = {
       byteBuffers += bytes
     }
 
     def pushIterator(
         iterator: Iterator[_],
         optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]) {
+        optionalBlockId: Option[StreamBlockId]): Unit = {
       iterators += iterator
     }
 
     def pushArrayBuffer(
         arrayBuffer: ArrayBuffer[_],
         optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]) {
+        optionalBlockId: Option[StreamBlockId]): Unit = {
       arrayBuffers +=  arrayBuffer
     }
 
-    def reportError(message: String, throwable: Throwable) {
+    def reportError(message: String, throwable: Throwable): Unit = {
       errors += throwable
     }
 
-    override protected def onReceiverStart(): Boolean = true
+    override protected def onReceiverStart(): Boolean = {
+      callsRecorder.record()
+      true
+    }
+
+    override protected def onReceiverStop(message: String, error: Option[Throwable]): Unit = {
+      callsRecorder.record()
+      super.onReceiverStop(message, error)
+    }
 
     override def createBlockGenerator(
         blockGeneratorListener: BlockGeneratorListener): BlockGenerator = {
@@ -341,17 +355,17 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     val arrayBuffers = new ArrayBuffer[ArrayBuffer[Int]]
     val errors = new ArrayBuffer[Throwable]
 
-    def onAddData(data: Any, metadata: Any) { }
+    def onAddData(data: Any, metadata: Any): Unit = { }
 
-    def onGenerateBlock(blockId: StreamBlockId) { }
+    def onGenerateBlock(blockId: StreamBlockId): Unit = { }
 
-    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]) {
+    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]): Unit = {
       val bufferOfInts = arrayBuffer.map(_.asInstanceOf[Int])
       arrayBuffers += bufferOfInts
       Thread.sleep(0)
     }
 
-    def onError(message: String, throwable: Throwable) {
+    def onError(message: String, throwable: Throwable): Unit = {
       errors += throwable
     }
   }
@@ -363,36 +377,55 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
 class FakeReceiver(sendData: Boolean = false) extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
   @volatile var otherThread: Thread = null
   @volatile var receiving = false
-  @volatile var onStartCalled = false
-  @volatile var onStopCalled = false
 
-  def onStart() {
+  // tracks calls of "onStart", "onStop"
+  @transient lazy val callsRecorder = new MethodsCallRecorder()
+
+  def onStart(): Unit = {
     otherThread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         receiving = true
-        var count = 0
-        while(!isStopped()) {
-          if (sendData) {
-            store(count)
-            count += 1
+        try {
+          var count = 0
+          while(!isStopped()) {
+            if (sendData) {
+              store(count)
+              count += 1
+            }
+            Thread.sleep(10)
           }
-          Thread.sleep(10)
+        } finally {
+          receiving = false
         }
       }
     }
-    onStartCalled = true
+    callsRecorder.record()
     otherThread.start()
   }
 
-  def onStop() {
-    onStopCalled = true
+  def onStop(): Unit = {
+    callsRecorder.record()
     otherThread.join()
   }
+}
+
+class MethodsCallRecorder {
+  // tracks calling methods as (timestamp, methodName)
+  private val records = new ArrayBuffer[(Long, String)]
+
+  def record(): Unit = records.append((System.currentTimeMillis(), callerMethodName))
+
+  def reset(): Unit = records.clear()
 
-  def reset() {
-    receiving = false
-    onStartCalled = false
-    onStopCalled = false
+  def callsWithTimestamp: scala.collection.immutable.Seq[(Long, String)] = records.toList
+
+  def calls: scala.collection.immutable.Seq[String] = records.map(_._2).toList
+
+  def timestamps: scala.collection.immutable.Seq[Long] = records.map(_._1).toList
+
+  private def callerMethodName: String = {
+    val stackTrace = new Throwable().getStackTrace
+    // it should return method name of two levels deeper
+    stackTrace(2).getMethodName
   }
 }
-
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index c4424b3cff877..1d6637861511f 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -26,7 +26,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.Queue
 
 import org.apache.commons.io.FileUtils
-import org.scalatest.{Assertions, BeforeAndAfter, PrivateMethodTester}
+import org.scalatest.{Assertions, PrivateMethodTester}
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.exceptions.TestFailedDueToTimeoutException
@@ -44,7 +44,11 @@ import org.apache.spark.streaming.receiver.Receiver
 import org.apache.spark.util.{ManualClock, Utils}
 
 
-class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeLimits with Logging {
+class StreamingContextSuite
+  extends SparkFunSuite
+  with LocalStreamingContext
+  with TimeLimits
+  with Logging {
 
   // Necessary to make ScalaTest 3.x interrupt a thread on the JVM like ScalaTest 2.2.x
   implicit val signaler: Signaler = ThreadSignaler
@@ -56,20 +60,6 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   val envPair = "key" -> "value"
   val conf = new SparkConf().setMaster(master).setAppName(appName)
 
-  var sc: SparkContext = null
-  var ssc: StreamingContext = null
-
-  after {
-    if (ssc != null) {
-      ssc.stop()
-      ssc = null
-    }
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-  }
-
   test("from no conf constructor") {
     ssc = new StreamingContext(master, appName, batchDuration)
     assert(ssc.sparkContext.conf.get("spark.master") === master)
@@ -95,7 +85,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   }
 
   test("from existing SparkContext") {
-    sc = new SparkContext(master, appName)
+    val sc = new SparkContext(master, appName)
     ssc = new StreamingContext(sc, batchDuration)
   }
 
@@ -272,7 +262,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
 
     // Explicitly do not stop SparkContext
     ssc = new StreamingContext(conf, batchDuration)
-    sc = ssc.sparkContext
+    var sc = ssc.sparkContext
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop(stopSparkContext = false)
@@ -306,7 +296,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   test("stop gracefully") {
     val conf = new SparkConf().setMaster(master).setAppName(appName)
     conf.set("spark.dummyTimeConfig", "3600s")
-    sc = new SparkContext(conf)
+    val sc = new SparkContext(conf)
     for (i <- 1 to 4) {
       logInfo("==================================\n\n\n")
       ssc = new StreamingContext(sc, Milliseconds(100))
@@ -338,7 +328,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // This is not a deterministic unit. But if this unit test is flaky, then there is definitely
     // something wrong. See SPARK-5681
     val conf = new SparkConf().setMaster(master).setAppName(appName)
-    sc = new SparkContext(conf)
+    val sc = new SparkContext(conf)
     ssc = new StreamingContext(sc, Milliseconds(100))
     val input = ssc.receiverStream(new TestReceiver)
     input.foreachRDD(_ => {})
@@ -352,11 +342,10 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   test("stop slow receiver gracefully") {
     val conf = new SparkConf().setMaster(master).setAppName(appName)
     conf.set("spark.streaming.gracefulStopTimeout", "20000s")
-    sc = new SparkContext(conf)
+    val sc = new SparkContext(conf)
     logInfo("==================================\n\n\n")
     ssc = new StreamingContext(sc, Milliseconds(100))
     var runningCount = 0
-    SlowTestReceiver.receivedAllRecords = false
     // Create test receiver that sleeps in onStop()
     val totalNumRecords = 15
     val recordsPerSecond = 1
@@ -368,6 +357,9 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     }
     ssc.start()
     ssc.awaitTerminationOrTimeout(500)
+    eventually(timeout(10.seconds), interval(10.millis)) {
+      assert(SlowTestReceiver.initialized)
+    }
     ssc.stop(stopSparkContext = false, stopGracefully = true)
     logInfo("Running count = " + runningCount)
     assert(runningCount > 0)
@@ -445,7 +437,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // test whether wait exits if context is stopped
     failAfter(10.seconds) { // 10 seconds because spark takes a long time to shutdown
       t = new Thread() {
-        override def run() {
+        override def run(): Unit = {
           Thread.sleep(500)
           ssc.stop()
         }
@@ -512,7 +504,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // test whether awaitTerminationOrTimeout() return true if context is stopped
     failAfter(10.seconds) { // 10 seconds because spark takes a long time to shutdown
       t = new Thread() {
-        override def run() {
+        override def run(): Unit = {
           Thread.sleep(500)
           ssc.stop()
         }
@@ -591,7 +583,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
 
     // getOrCreate should recover StreamingContext with existing SparkContext
     testGetOrCreate {
-      sc = new SparkContext(conf)
+      val sc = new SparkContext(conf)
       ssc = StreamingContext.getOrCreate(checkpointPath, () => creatingFunction())
       assert(ssc != null, "no context created")
       assert(!newContextCreated, "old context not recovered")
@@ -603,7 +595,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     require(StreamingContext.getActive().isEmpty, "context exists from before")
     var newContextCreated = false
 
-    def creatingFunc(): StreamingContext = {
+    def creatingFunc(sc: SparkContext)(): StreamingContext = {
       newContextCreated = true
       val newSsc = new StreamingContext(sc, batchDuration)
       val input = addInputStream(newSsc)
@@ -627,8 +619,8 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // getActiveOrCreate should create new context and getActive should return it only
     // after starting the context
     testGetActiveOrCreate {
-      sc = new SparkContext(conf)
-      ssc = StreamingContext.getActiveOrCreate(creatingFunc _)
+      val sc = new SparkContext(conf)
+      ssc = StreamingContext.getActiveOrCreate(creatingFunc(sc))
       assert(ssc != null, "no context created")
       assert(newContextCreated, "new context not created")
       assert(StreamingContext.getActive().isEmpty,
@@ -636,25 +628,25 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
       ssc.start()
       assert(StreamingContext.getActive() === Some(ssc),
         "active context not returned")
-      assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc,
+      assert(StreamingContext.getActiveOrCreate(creatingFunc(sc)) === ssc,
         "active context not returned")
       ssc.stop()
       assert(StreamingContext.getActive().isEmpty,
         "inactive context returned")
-      assert(StreamingContext.getActiveOrCreate(creatingFunc _) !== ssc,
+      assert(StreamingContext.getActiveOrCreate(creatingFunc(sc)) !== ssc,
         "inactive context returned")
     }
 
     // getActiveOrCreate and getActive should return independently created context after activating
     testGetActiveOrCreate {
-      sc = new SparkContext(conf)
-      ssc = creatingFunc()  // Create
+      val sc = new SparkContext(conf)
+      ssc = creatingFunc(sc)  // Create
       assert(StreamingContext.getActive().isEmpty,
         "new initialized context returned before starting")
       ssc.start()
       assert(StreamingContext.getActive() === Some(ssc),
         "active context not returned")
-      assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc,
+      assert(StreamingContext.getActiveOrCreate(creatingFunc(sc)) === ssc,
         "active context not returned")
       ssc.stop()
       assert(StreamingContext.getActive().isEmpty,
@@ -736,7 +728,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   }
 
   test("multiple streaming contexts") {
-    sc = new SparkContext(
+    val sc = new SparkContext(
       conf.clone.set("spark.streaming.clock", "org.apache.spark.util.ManualClock"))
     ssc = new StreamingContext(sc, Seconds(1))
     val input = addInputStream(ssc)
@@ -930,9 +922,9 @@ class TestReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging
 
   var receivingThreadOption: Option[Thread] = None
 
-  def onStart() {
+  def onStart(): Unit = {
     val thread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         logInfo("Receiving started")
         while (!isStopped) {
           store(TestReceiver.counter.getAndIncrement)
@@ -944,7 +936,7 @@ class TestReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging
     thread.start()
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     // no clean to be done, the receiving thread should stop on it own, so just wait for it.
     receivingThreadOption.foreach(_.join())
   }
@@ -959,26 +951,28 @@ class SlowTestReceiver(totalRecords: Int, recordsPerSecond: Int)
   extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging {
 
   var receivingThreadOption: Option[Thread] = None
+  @volatile var receivedAllRecords = false
 
-  def onStart() {
+  def onStart(): Unit = {
     val thread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         logInfo("Receiving started")
         for(i <- 1 to totalRecords) {
           Thread.sleep(1000 / recordsPerSecond)
           store(i)
         }
-        SlowTestReceiver.receivedAllRecords = true
+        receivedAllRecords = true
         logInfo(s"Received all $totalRecords records")
       }
     }
     receivingThreadOption = Some(thread)
     thread.start()
+    SlowTestReceiver.initialized = true
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     // Simulate slow receiver by waiting for all records to be produced
-    while (!SlowTestReceiver.receivedAllRecords) {
+    while (!receivedAllRecords) {
       Thread.sleep(100)
     }
     // no clean to be done, the receiving thread should stop on it own
@@ -986,12 +980,12 @@ class SlowTestReceiver(totalRecords: Int, recordsPerSecond: Int)
 }
 
 object SlowTestReceiver {
-  var receivedAllRecords = false
+  var initialized = false
 }
 
 /** Streaming application for testing DStream and RDD creation sites */
-package object testPackage extends Assertions {
-  def test() {
+object testPackage extends Assertions {
+  def test(): Unit = {
     val conf = new SparkConf().setMaster("local").setAppName("CreationSite test")
     val ssc = new StreamingContext(conf, Milliseconds(100))
     try {
@@ -1032,11 +1026,11 @@ package object testPackage extends Assertions {
  * This includes methods to access private methods and fields in StreamingContext and MetricsSystem
  */
 private object StreamingContextSuite extends PrivateMethodTester {
-  private val _sources = PrivateMethod[ArrayBuffer[Source]]('sources)
+  private val _sources = PrivateMethod[ArrayBuffer[Source]](Symbol("sources"))
   private def getSources(metricsSystem: MetricsSystem): ArrayBuffer[Source] = {
     metricsSystem.invokePrivate(_sources())
   }
-  private val _streamingSource = PrivateMethod[StreamingSource]('streamingSource)
+  private val _streamingSource = PrivateMethod[StreamingSource](Symbol("streamingSource"))
   private def getStreamingSource(streamingContext: StreamingContext): StreamingSource = {
     streamingContext.invokePrivate(_streamingSource())
   }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
index 62fd43302b9d7..679c58dbae92b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
@@ -36,20 +36,11 @@ import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.receiver.Receiver
 import org.apache.spark.streaming.scheduler._
 
-class StreamingListenerSuite extends TestSuiteBase with Matchers {
+class StreamingListenerSuite extends TestSuiteBase with LocalStreamingContext with Matchers {
 
   val input = (1 to 4).map(Seq(_)).toSeq
   val operation = (d: DStream[Int]) => d.map(x => x)
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   // To make sure that the processing start and end times in collected
   // information are different for successive batches
   override def batchDuration: Duration = Milliseconds(100)
@@ -236,7 +227,7 @@ class StreamingListenerSuite extends TestSuiteBase with Matchers {
     // Post a Streaming event after stopping StreamingContext
     val receiverInfoStopped = ReceiverInfo(0, "test", false, "localhost", "0")
     ssc.scheduler.listenerBus.post(StreamingListenerReceiverStopped(receiverInfoStopped))
-    ssc.sparkContext.listenerBus.waitUntilEmpty(1000)
+    ssc.sparkContext.listenerBus.waitUntilEmpty()
     // The StreamingListener should not receive any event
     verifyNoMoreInteractions(streamingListener)
   }
@@ -288,15 +279,15 @@ class BatchInfoCollector extends StreamingListener {
   val batchInfosStarted = new ConcurrentLinkedQueue[BatchInfo]
   val batchInfosSubmitted = new ConcurrentLinkedQueue[BatchInfo]
 
-  override def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted) {
+  override def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted): Unit = {
     batchInfosSubmitted.add(batchSubmitted.batchInfo)
   }
 
-  override def onBatchStarted(batchStarted: StreamingListenerBatchStarted) {
+  override def onBatchStarted(batchStarted: StreamingListenerBatchStarted): Unit = {
     batchInfosStarted.add(batchStarted.batchInfo)
   }
 
-  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) {
+  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = {
     batchInfosCompleted.add(batchCompleted.batchInfo)
   }
 }
@@ -307,15 +298,15 @@ class ReceiverInfoCollector extends StreamingListener {
   val stoppedReceiverStreamIds = new ConcurrentLinkedQueue[Int]
   val receiverErrors = new ConcurrentLinkedQueue[(Int, String, String)]
 
-  override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted) {
+  override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted): Unit = {
     startedReceiverStreamIds.add(receiverStarted.receiverInfo.streamId)
   }
 
-  override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped) {
+  override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped): Unit = {
     stoppedReceiverStreamIds.add(receiverStopped.receiverInfo.streamId)
   }
 
-  override def onReceiverError(receiverError: StreamingListenerReceiverError) {
+  override def onReceiverError(receiverError: StreamingListenerReceiverError): Unit = {
     receiverErrors.add(((receiverError.receiverInfo.streamId,
       receiverError.receiverInfo.lastErrorMessage, receiverError.receiverInfo.lastError)))
   }
@@ -338,7 +329,7 @@ class OutputOperationInfoCollector extends StreamingListener {
 }
 
 class StreamingListenerSuiteReceiver extends Receiver[Any](StorageLevel.MEMORY_ONLY) with Logging {
-  def onStart() {
+  def onStart(): Unit = {
     Future {
       logInfo("Started receiver and sleeping")
       Thread.sleep(10)
@@ -349,7 +340,7 @@ class StreamingListenerSuiteReceiver extends Receiver[Any](StorageLevel.MEMORY_O
       stop("test stop error")
     }
   }
-  def onStop() { }
+  def onStop(): Unit = { }
 }
 
 /**
@@ -377,7 +368,7 @@ class StreamingContextStoppingCollector(val ssc: StreamingContext) extends Strea
 
   private var isFirstBatch = true
 
-  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) {
+  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = {
     if (isFirstBatch) {
       // We should only call `ssc.stop()` in the first batch. Otherwise, it's possible that the main
       // thread is calling `ssc.stop()`, while StreamingContextStoppingCollector is also calling
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index f2ae77896a5d3..55c2950261a07 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
-import org.scalatest.BeforeAndAfter
+import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually.timeout
 import org.scalatest.concurrent.PatienceConfiguration
 import org.scalatest.time.{Seconds => ScalaTestSeconds, Span}
@@ -62,9 +62,9 @@ private[streaming] class DummyInputDStream(ssc: StreamingContext) extends InputD
 class TestInputStream[T: ClassTag](_ssc: StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
   extends InputDStream[T](_ssc) {
 
-  def start() {}
+  def start(): Unit = {}
 
-  def stop() {}
+  def stop(): Unit = {}
 
   def compute(validTime: Time): Option[RDD[T]] = {
     logInfo("Computing RDD for time " + validTime)
@@ -211,7 +211,7 @@ class BatchCounter(ssc: StreamingContext) {
  * This is the base trait for Spark Streaming testsuites. This provides basic functionality
  * to run user-defined set of input on user-defined stream operations, and verify the output.
  */
-trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
+trait TestSuiteBase extends SparkFunSuite with BeforeAndAfterEach with Logging {
 
   // Name of the framework for Spark context
   def framework: String = this.getClass.getSimpleName
@@ -250,8 +250,8 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
   val eventuallyTimeout: PatienceConfiguration.Timeout = timeout(Span(10, ScalaTestSeconds))
 
   // Default before function for any streaming test suite. Override this
-  // if you want to add your stuff to "before" (i.e., don't call before { } )
-  def beforeFunction() {
+  // if you want to add your stuff to "beforeEach"
+  def beforeFunction(): Unit = {
     if (useManualClock) {
       logInfo("Using manual clock")
       conf.set("spark.streaming.clock", "org.apache.spark.util.ManualClock")
@@ -262,13 +262,24 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
   }
 
   // Default after function for any streaming test suite. Override this
-  // if you want to add your stuff to "after" (i.e., don't call after { } )
-  def afterFunction() {
+  // if you want to add your stuff to "afterEach"
+  def afterFunction(): Unit = {
     System.clearProperty("spark.streaming.clock")
   }
 
-  before(beforeFunction)
-  after(afterFunction)
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    beforeFunction()
+  }
+
+  override def afterEach(): Unit = {
+    try {
+      afterFunction()
+    } finally {
+      super.afterEach()
+    }
+
+  }
 
   /**
    * Run a block of code with the given StreamingContext and automatically
@@ -278,12 +289,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
     try {
       block(ssc)
     } finally {
-      try {
-        ssc.stop(stopSparkContext = true)
-      } catch {
-        case e: Exception =>
-          logError("Error stopping StreamingContext", e)
-      }
+      LocalStreamingContext.stop(ssc, stopSparkContext = true)
     }
   }
 
@@ -452,7 +458,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       output: Seq[Seq[V]],
       expectedOutput: Seq[Seq[V]],
       useSet: Boolean
-    ) {
+    ): Unit = {
     logInfo("--------------------------------")
     logInfo("output.size = " + output.size)
     logInfo("output")
@@ -492,7 +498,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       operation: DStream[U] => DStream[V],
       expectedOutput: Seq[Seq[V]],
       useSet: Boolean = false
-    ) {
+    ): Unit = {
     testOperation[U, V](input, operation, expectedOutput, -1, useSet)
   }
 
@@ -511,7 +517,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       expectedOutput: Seq[Seq[V]],
       numBatches: Int,
       useSet: Boolean
-    ) {
+    ): Unit = {
     val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
     withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
       val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
@@ -529,7 +535,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       operation: (DStream[U], DStream[V]) => DStream[W],
       expectedOutput: Seq[Seq[W]],
       useSet: Boolean
-    ) {
+    ): Unit = {
     testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
   }
 
@@ -550,7 +556,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       expectedOutput: Seq[Seq[W]],
       numBatches: Int,
       useSet: Boolean
-    ) {
+    ): Unit = {
     val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
     withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
       val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
index 1d34221fde4f4..bdc9e9ee2aed1 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
@@ -23,8 +23,8 @@ import org.openqa.selenium.WebDriver
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.scalatest._
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.selenium.WebBrowser
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.selenium.WebBrowser
 
 import org.apache.spark._
 import org.apache.spark.internal.config.UI.UI_ENABLED
@@ -97,7 +97,7 @@ class UISeleniumSuite
 
       val sparkUI = ssc.sparkContext.ui.get
 
-      sparkUI.getHandlers.count(_.getContextPath.contains("/streaming")) should be (5)
+      sparkUI.getDelegatingHandlers.count(_.getContextPath.contains("/streaming")) should be (5)
 
       eventually(timeout(10.seconds), interval(50.milliseconds)) {
         go to (sparkUI.webUrl.stripSuffix("/"))
@@ -151,8 +151,9 @@ class UISeleniumSuite
         summaryText should contain ("Total delay:")
 
         findAll(cssSelector("""#batch-job-table th""")).map(_.text).toSeq should be {
-          List("Output Op Id", "Description", "Output Op Duration", "Status", "Job Id",
-            "Job Duration", "Stages: Succeeded/Total", "Tasks (for all stages): Succeeded/Total",
+          List("Output Op Id", "Description", "Output Op Duration (?)", "Status", "Job Id",
+            "Job Duration (?)", "Stages: Succeeded/Total",
+            "Tasks (for all stages): Succeeded/Total",
             "Error")
         }
 
@@ -163,7 +164,7 @@ class UISeleniumSuite
 
         // Check job ids
         val jobIdCells = findAll(cssSelector( """#batch-job-table a""")).toSeq
-        jobIdCells.map(_.text) should be (List("0", "1", "2", "3"))
+        jobIdCells.map(_.text).filter(_.forall(_.isDigit)) should be (List("0", "1", "2", "3"))
 
         val jobLinks = jobIdCells.flatMap(_.attribute("href"))
         jobLinks.size should be (4)
@@ -198,7 +199,7 @@ class UISeleniumSuite
 
       ssc.stop(false)
 
-      sparkUI.getHandlers.count(_.getContextPath.contains("/streaming")) should be (0)
+      sparkUI.getDelegatingHandlers.count(_.getContextPath.contains("/streaming")) should be (0)
 
       eventually(timeout(10.seconds), interval(50.milliseconds)) {
         go to (sparkUI.webUrl.stripSuffix("/"))
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
index c7d085ec0799b..468a52226682e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
@@ -146,15 +146,16 @@ class WindowOperationsSuite extends TestSuiteBase {
 
   test("window - persistence level") {
     val input = Seq( Seq(0), Seq(1), Seq(2), Seq(3), Seq(4), Seq(5))
-    val ssc = new StreamingContext(conf, batchDuration)
-    val inputStream = new TestInputStream[Int](ssc, input, 1)
-    val windowStream1 = inputStream.window(batchDuration * 2)
-    assert(windowStream1.storageLevel === StorageLevel.NONE)
-    assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY_SER)
-    windowStream1.persist(StorageLevel.MEMORY_ONLY)
-    assert(windowStream1.storageLevel === StorageLevel.NONE)
-    assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY)
-    ssc.stop()
+
+    withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
+      val inputStream = new TestInputStream[Int](ssc, input, 1)
+      val windowStream1 = inputStream.window(batchDuration * 2)
+      assert(windowStream1.storageLevel === StorageLevel.NONE)
+      assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY_SER)
+      windowStream1.persist(StorageLevel.MEMORY_ONLY)
+      assert(windowStream1.storageLevel === StorageLevel.NONE)
+      assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY)
+    }
   }
 
   // Testing naive reduceByKeyAndWindow (without invertible function)
@@ -276,7 +277,7 @@ class WindowOperationsSuite extends TestSuiteBase {
     expectedOutput: Seq[Seq[Int]],
     windowDuration: Duration = Seconds(2),
     slideDuration: Duration = Seconds(1)
-    ) {
+    ): Unit = {
     test("window - " + name) {
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
       val operation = (s: DStream[Int]) => s.window(windowDuration, slideDuration)
@@ -290,7 +291,7 @@ class WindowOperationsSuite extends TestSuiteBase {
     expectedOutput: Seq[Seq[(String, Int)]],
     windowDuration: Duration = Seconds(2),
     slideDuration: Duration = Seconds(1)
-    ) {
+    ): Unit = {
     test("reduceByKeyAndWindow - " + name) {
       logInfo("reduceByKeyAndWindow - " + name)
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
@@ -307,7 +308,7 @@ class WindowOperationsSuite extends TestSuiteBase {
     expectedOutput: Seq[Seq[(String, Int)]],
     windowDuration: Duration = Seconds(2),
     slideDuration: Duration = Seconds(1)
-  ) {
+  ): Unit = {
     test("reduceByKeyAndWindow with inverse function - " + name) {
       logInfo("reduceByKeyAndWindow with inverse function - " + name)
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
@@ -325,7 +326,7 @@ class WindowOperationsSuite extends TestSuiteBase {
       expectedOutput: Seq[Seq[(String, Int)]],
       windowDuration: Duration = Seconds(2),
       slideDuration: Duration = Seconds(1)
-    ) {
+    ): Unit = {
     test("reduceByKeyAndWindow with inverse and filter functions - " + name) {
       logInfo("reduceByKeyAndWindow with inverse and filter functions - " + name)
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
index aa69be7ca9939..86a8dc47098af 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -164,7 +164,7 @@ class WriteAheadLogBackedBlockRDDSuite
       testIsBlockValid: Boolean = false,
       testBlockRemove: Boolean = false,
       testStoreInBM: Boolean = false
-    ) {
+    ): Unit = {
     require(numPartitionsInBM <= numPartitions,
       "Can't put more partitions in BlockManager than that in RDD")
     require(numPartitionsInWAL <= numPartitions,
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index a8b00558b40a7..65efa10bfcf92 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -19,26 +19,26 @@ package org.apache.spark.streaming.scheduler
 
 import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito.{never, reset, times, verify, when}
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, PrivateMethodTester}
+import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
-import org.scalatest.mockito.MockitoSugar
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.mockito.MockitoSugar
 
-import org.apache.spark.{ExecutorAllocationClient, SparkConf, SparkFunSuite}
+import org.apache.spark.{ExecutorAllocationClient, SparkConf}
 import org.apache.spark.internal.config.{DYN_ALLOCATION_ENABLED, DYN_ALLOCATION_TESTING}
 import org.apache.spark.internal.config.Streaming._
-import org.apache.spark.streaming.{DummyInputDStream, Seconds, StreamingContext}
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.streaming.{DummyInputDStream, Seconds, StreamingContext, TestSuiteBase}
 import org.apache.spark.util.{ManualClock, Utils}
 
-
-class ExecutorAllocationManagerSuite extends SparkFunSuite
-  with BeforeAndAfter with BeforeAndAfterAll with MockitoSugar with PrivateMethodTester {
+class ExecutorAllocationManagerSuite extends TestSuiteBase
+  with MockitoSugar with PrivateMethodTester {
 
   private val batchDurationMillis = 1000L
   private var allocationClient: ExecutorAllocationClient = null
   private var clock: StreamManualClock = null
 
-  before {
+  override def beforeEach(): Unit = {
     allocationClient = mock[ExecutorAllocationClient]
     clock = new StreamManualClock()
   }
@@ -72,10 +72,15 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
         if (expectedRequestedTotalExecs.nonEmpty) {
           require(expectedRequestedTotalExecs.get > 0)
           verify(allocationClient, times(1)).requestTotalExecutors(
-            meq(expectedRequestedTotalExecs.get), meq(0), meq(Map.empty))
+              meq(Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID ->
+                expectedRequestedTotalExecs.get)),
+              meq(Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> 0)),
+              meq(Map.empty))
         } else {
-          verify(allocationClient, never).requestTotalExecutors(0, 0, Map.empty)
-        }
+          verify(allocationClient, never).requestTotalExecutors(
+            Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> 0),
+            Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> 0),
+            Map.empty)}
       }
 
       /** Verify that a particular executor was killed */
@@ -140,8 +145,11 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
       reset(allocationClient)
       when(allocationClient.getExecutorIds()).thenReturn((1 to numExecs).map(_.toString))
       requestExecutors(allocationManager, numNewExecs)
-      verify(allocationClient, times(1)).requestTotalExecutors(
-        meq(expectedRequestedTotalExecs), meq(0), meq(Map.empty))
+      val defaultProfId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
+      verify(allocationClient, times(1)).
+        requestTotalExecutors(
+          meq(Map(defaultProfId -> expectedRequestedTotalExecs)),
+          meq(Map(defaultProfId -> 0)), meq(Map.empty))
     }
 
     withAllocationManager(numReceivers = 1) { case (_, allocationManager) =>
@@ -364,11 +372,11 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
     }
   }
 
-  private val _addBatchProcTime = PrivateMethod[Unit]('addBatchProcTime)
-  private val _requestExecutors = PrivateMethod[Unit]('requestExecutors)
-  private val _killExecutor = PrivateMethod[Unit]('killExecutor)
+  private val _addBatchProcTime = PrivateMethod[Unit](Symbol("addBatchProcTime"))
+  private val _requestExecutors = PrivateMethod[Unit](Symbol("requestExecutors"))
+  private val _killExecutor = PrivateMethod[Unit](Symbol("killExecutor"))
   private val _executorAllocationManager =
-    PrivateMethod[Option[ExecutorAllocationManager]]('executorAllocationManager)
+    PrivateMethod[Option[ExecutorAllocationManager]](Symbol("executorAllocationManager"))
 
   private def addBatchProcTime(manager: ExecutorAllocationManager, timeMs: Long): Unit = {
     manager invokePrivate _addBatchProcTime(timeMs)
@@ -392,13 +400,9 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
       .setAppName(this.getClass.getSimpleName)
       .set("spark.streaming.dynamicAllocation.testing", "true")  // to test dynamic allocation
 
-    var ssc: StreamingContext = null
-    try {
-      ssc = new  StreamingContext(conf, Seconds(1))
+    withStreamingContext(new StreamingContext(conf, Seconds(1))) { ssc =>
       new DummyInputDStream(ssc).foreachRDD(_ => { })
       body(ssc)
-    } finally {
-      if (ssc != null) ssc.stop()
     }
   }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/InputInfoTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/InputInfoTrackerSuite.scala
index a7e365649d3e8..cc393425ca6f0 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/InputInfoTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/InputInfoTrackerSuite.scala
@@ -17,27 +17,15 @@
 
 package org.apache.spark.streaming.scheduler
 
-import org.scalatest.BeforeAndAfter
-
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.streaming.{Duration, StreamingContext, Time}
-
-class InputInfoTrackerSuite extends SparkFunSuite with BeforeAndAfter {
+import org.apache.spark.streaming.{Duration, LocalStreamingContext, StreamingContext, Time}
 
-  private var ssc: StreamingContext = _
+class InputInfoTrackerSuite extends SparkFunSuite with LocalStreamingContext {
 
-  before {
+  override def beforeEach(): Unit = {
+    super.beforeEach()
     val conf = new SparkConf().setMaster("local[2]").setAppName("DirectStreamTacker")
-    if (ssc == null) {
-      ssc = new StreamingContext(conf, Duration(1000))
-    }
-  }
-
-  after {
-    if (ssc != null) {
-      ssc.stop()
-      ssc = null
-    }
+    ssc = new StreamingContext(conf, Duration(1000))
   }
 
   test("test report and get InputInfo from InputInfoTracker") {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
index f0e502727402e..227a02eece65b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
@@ -93,14 +93,14 @@ class JobGeneratorSuite extends TestSuiteBase {
       }
 
       // Wait for new blocks to be received
-      def waitForNewReceivedBlocks() {
+      def waitForNewReceivedBlocks(): Unit = {
         eventually(testTimeout) {
           assert(receiverTracker.hasUnallocatedBlocks)
         }
       }
 
       // Wait for received blocks to be allocated to a batch
-      def waitForBlocksToBeAllocatedToBatch(batchTime: Long) {
+      def waitForBlocksToBeAllocatedToBatch(batchTime: Long): Unit = {
         eventually(testTimeout) {
           assert(getBlocksOfBatch(batchTime).nonEmpty)
         }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala
index 37ca0ce2f6a30..b5a45fc317d0e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala
@@ -30,8 +30,7 @@ class RateControllerSuite extends TestSuiteBase {
   override def batchDuration: Duration = Milliseconds(50)
 
   test("RateController - rate controller publishes updates after batches complete") {
-    val ssc = new StreamingContext(conf, batchDuration)
-    withStreamingContext(ssc) { ssc =>
+    withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
       val dstream = new RateTestInputDStream(ssc)
       dstream.register()
       ssc.start()
@@ -43,8 +42,7 @@ class RateControllerSuite extends TestSuiteBase {
   }
 
   test("ReceiverRateController - published rates reach receivers") {
-    val ssc = new StreamingContext(conf, batchDuration)
-    withStreamingContext(ssc) { ssc =>
+    withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
       val estimator = new ConstantEstimator(100)
       val dstream = new RateTestInputDStream(ssc) {
         override val rateController =
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala
index fec20f0429ff0..1a0154600bf3c 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala
@@ -205,9 +205,9 @@ class StoppableReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
 
   var receivingThreadOption: Option[Thread] = None
 
-  def onStart() {
+  def onStart(): Unit = {
     val thread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         while (!StoppableReceiver.shouldStop) {
           Thread.sleep(10)
         }
@@ -217,7 +217,7 @@ class StoppableReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
     thread.start()
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     StoppableReceiver.shouldStop = true
     receivingThreadOption.foreach(_.join())
     // Reset it so as to restart it
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala
index 56b400850fdd4..10f92f9386173 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala
@@ -22,24 +22,18 @@ import java.util.Properties
 import org.scalatest.Matchers
 
 import org.apache.spark.scheduler.SparkListenerJobStart
-import org.apache.spark.streaming._
+import org.apache.spark.streaming.{LocalStreamingContext, _}
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.scheduler._
 
-class StreamingJobProgressListenerSuite extends TestSuiteBase with Matchers {
+class StreamingJobProgressListenerSuite
+  extends TestSuiteBase
+  with LocalStreamingContext
+  with Matchers {
 
   val input = (1 to 4).map(Seq(_)).toSeq
   val operation = (d: DStream[Int]) => d.map(x => x)
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   private def createJobStart(
       batchTime: Time, outputOpId: Int, jobId: Int): SparkListenerJobStart = {
     val properties = new Properties()
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala
index d3ca2b58f36c2..576083723f8bd 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.TimeUnit
 import org.scalatest.Matchers
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.ui.{UIUtils => SparkUIUtils}
 
 class UIUtilsSuite extends SparkFunSuite with Matchers{
 
@@ -70,10 +71,13 @@ class UIUtilsSuite extends SparkFunSuite with Matchers{
   test("formatBatchTime") {
     val tzForTest = TimeZone.getTimeZone("America/Los_Angeles")
     val batchTime = 1431637480452L // Thu May 14 14:04:40 PDT 2015
-    assert("2015/05/14 14:04:40" === UIUtils.formatBatchTime(batchTime, 1000, timezone = tzForTest))
+    assert("2015/05/14 14:04:40" ===
+      SparkUIUtils.formatBatchTime(batchTime, 1000, timezone = tzForTest))
     assert("2015/05/14 14:04:40.452" ===
-      UIUtils.formatBatchTime(batchTime, 999, timezone = tzForTest))
-    assert("14:04:40" === UIUtils.formatBatchTime(batchTime, 1000, false, timezone = tzForTest))
-    assert("14:04:40.452" === UIUtils.formatBatchTime(batchTime, 999, false, timezone = tzForTest))
+      SparkUIUtils.formatBatchTime(batchTime, 999, timezone = tzForTest))
+    assert("14:04:40" ===
+      SparkUIUtils.formatBatchTime(batchTime, 1000, false, timezone = tzForTest))
+    assert("14:04:40.452" ===
+      SparkUIUtils.formatBatchTime(batchTime, 999, false, timezone = tzForTest))
   }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/RecurringTimerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/RecurringTimerSuite.scala
index 25b70a3d089ee..a11dac4d41caa 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/util/RecurringTimerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/util/RecurringTimerSuite.scala
@@ -69,7 +69,7 @@ class RecurringTimerSuite extends SparkFunSuite with PrivateMethodTester {
       }
     }
     thread.start()
-    val stopped = PrivateMethod[RecurringTimer]('stopped)
+    val stopped = PrivateMethod[RecurringTimer](Symbol("stopped"))
     // Make sure the `stopped` field has been changed
     eventually(timeout(10.seconds), interval(10.millis)) {
       assert(timer.invokePrivate(stopped()) === true)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
index 8d2fa7d515e2f..bb60d6fa7bf78 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
@@ -34,9 +34,10 @@ import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{any, anyLong, eq => meq}
 import org.mockito.Mockito.{times, verify, when}
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterEach, PrivateMethodTester}
+import org.scalatest.Assertions._
 import org.scalatest.concurrent.Eventually
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
 import org.apache.spark.streaming.scheduler._
@@ -256,12 +257,12 @@ class FileBasedWriteAheadLogSuite
           counter.increment()
           // block so that other threads also launch
           latch.await(10, TimeUnit.SECONDS)
-          override def completion() { counter.decrement() }
+          override def completion(): Unit = { counter.decrement() }
         }
       }
       @volatile var collected: Seq[Int] = Nil
       val t = new Thread() {
-        override def run() {
+        override def run(): Unit = {
           // run the calculation on a separate thread so that we can release the latch
           val iterator = FileBasedWriteAheadLog.seqToParIterator[Int, Int](executionContext,
             testSeq, handle)
@@ -434,7 +435,7 @@ class BatchedWriteAheadLogSuite extends CommonWriteAheadLogTests(
   private var walBatchingExecutionContext: ExecutionContextExecutorService = _
   private val sparkConf = new SparkConf()
 
-  private val queueLength = PrivateMethod[Int]('getQueueLength)
+  private val queueLength = PrivateMethod[Int](Symbol("getQueueLength"))
 
   override def beforeEach(): Unit = {
     super.beforeEach()
diff --git a/tools/pom.xml b/tools/pom.xml
index 6286fad403c83..e380e869f55c7 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>org.clapper</groupId>
       <artifactId>classutil_${scala.binary.version}</artifactId>
-      <version>1.1.2</version>
+      <version>1.5.1</version>
     </dependency>
   </dependencies>
 
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index c9058ff409893..f9bc499961ad7 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -115,7 +115,7 @@ object GenerateMIMAIgnore {
     ).filter(x => isPackagePrivate(x)).map(_.fullName) ++ getInnerFunctions(classSymbol)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     import scala.tools.nsc.io.File
     val (privateClasses, privateMembers) = privateWithin("org.apache.spark")
     val previousContents = Try(File(".generated-mima-class-excludes").lines()).